Skip to content

Commit

Permalink
Processor accepts any kwargs (huggingface#31889)
Browse files Browse the repository at this point in the history
* accept kwargs in processors

* return unused kwargs

* fix tests

* typo

* update the other way
  • Loading branch information
zucchini-nlp committed Jul 24, 2024
1 parent b6449bf commit 0ae50d3
Show file tree
Hide file tree
Showing 14 changed files with 65 additions and 22 deletions.
3 changes: 2 additions & 1 deletion src/transformers/models/blip/processing_blip.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,11 @@ class BlipProcessor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = []
image_processor_class = "BlipImageProcessor"
tokenizer_class = ("BertTokenizer", "BertTokenizerFast")

def __init__(self, image_processor, tokenizer):
def __init__(self, image_processor, tokenizer, **kwargs):
tokenizer.return_token_type_ids = False
super().__init__(image_processor, tokenizer)
self.current_processor = self.image_processor
Expand Down
3 changes: 2 additions & 1 deletion src/transformers/models/blip_2/processing_blip_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,12 @@ class Blip2Processor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = []
image_processor_class = "BlipImageProcessor"
tokenizer_class = "AutoTokenizer"

# Copied from transformers.models.blip.processing_blip.BlipProcessor.__init__
def __init__(self, image_processor, tokenizer):
def __init__(self, image_processor, tokenizer, **kwargs):
tokenizer.return_token_type_ids = False
super().__init__(image_processor, tokenizer)
self.current_processor = self.image_processor
Expand Down
3 changes: 2 additions & 1 deletion src/transformers/models/fuyu/processing_fuyu.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,10 +322,11 @@ class FuyuProcessor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = []
image_processor_class = "FuyuImageProcessor"
tokenizer_class = "AutoTokenizer"

def __init__(self, image_processor, tokenizer):
def __init__(self, image_processor, tokenizer, **kwargs):
super().__init__(image_processor=image_processor, tokenizer=tokenizer)
self.image_processor = image_processor
self.tokenizer = tokenizer
Expand Down
1 change: 1 addition & 0 deletions src/transformers/models/idefics/processing_idefics.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ class IdeficsProcessor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = ["image_size", "add_end_of_utterance_token"]
image_processor_class = "IdeficsImageProcessor"
tokenizer_class = "LlamaTokenizerFast"

Expand Down
1 change: 1 addition & 0 deletions src/transformers/models/idefics2/processing_idefics2.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class Idefics2Processor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = ["image_seq_len", "chat_template"]
image_processor_class = "Idefics2ImageProcessor"
tokenizer_class = "AutoTokenizer"

Expand Down
15 changes: 10 additions & 5 deletions src/transformers/models/instructblip/processing_instructblip.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,16 @@ class InstructBlipProcessor(ProcessorMixin):
An instance of [`BlipImageProcessor`]. The image processor is a required input.
tokenizer (`AutoTokenizer`):
An instance of ['PreTrainedTokenizer`]. The tokenizer is a required input.
qformer_tokenizer (`AutoTokenizer`):
qformer_tokenizer (`AutoTokenizer`, *optional*):
An instance of ['PreTrainedTokenizer`]. The Q-Former tokenizer is a required input.
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = []
image_processor_class = "BlipImageProcessor"
tokenizer_class = "AutoTokenizer"

def __init__(self, image_processor, tokenizer, qformer_tokenizer):
def __init__(self, image_processor, tokenizer, qformer_tokenizer=None, **kwargs):
super().__init__(image_processor, tokenizer)

# add QFormer tokenizer
Expand Down Expand Up @@ -167,7 +168,11 @@ def save_pretrained(self, save_directory, **kwargs):
# overwrite to load the Q-Former tokenizer from a separate folder
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
processor = super().from_pretrained(pretrained_model_name_or_path, **kwargs)

# if return_unused_kwargs a tuple is returned where the second element is 'unused_kwargs'
if isinstance(processor, tuple):
processor = processor[0]
qformer_tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder="qformer_tokenizer")
args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
args.append(qformer_tokenizer)
return cls(*args)
processor.qformer_tokenizer = qformer_tokenizer
return processor
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,16 @@ class InstructBlipVideoProcessor(ProcessorMixin):
An instance of [`InstructBlipVideoImageProcessor`]. The image processor is a required input.
tokenizer (`AutoTokenizer`):
An instance of ['PreTrainedTokenizer`]. The tokenizer is a required input.
qformer_tokenizer (`AutoTokenizer`):
qformer_tokenizer (`AutoTokenizer`, *optional*):
An instance of ['PreTrainedTokenizer`]. The Q-Former tokenizer is a required input.
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = []
image_processor_class = "InstructBlipVideoImageProcessor"
tokenizer_class = "AutoTokenizer"

def __init__(self, image_processor, tokenizer, qformer_tokenizer):
def __init__(self, image_processor, tokenizer, qformer_tokenizer=None, **kwargs):
super().__init__(image_processor, tokenizer)

# add QFormer tokenizer
Expand Down Expand Up @@ -164,7 +165,11 @@ def save_pretrained(self, save_directory, **kwargs):
# overwrite to load the Q-Former tokenizer from a separate folder
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
processor = super().from_pretrained(pretrained_model_name_or_path, **kwargs)

# if return_unused_kwargs a tuple is returned where the second element is 'unused_kwargs'
if isinstance(processor, tuple):
processor = processor[0]
qformer_tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder="qformer_tokenizer")
args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
args.append(qformer_tokenizer)
return cls(*args)
processor.qformer_tokenizer = qformer_tokenizer
return processor
3 changes: 2 additions & 1 deletion src/transformers/models/kosmos2/processing_kosmos2.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,11 @@ class Kosmos2Processor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = ["num_patch_index_tokens"]
image_processor_class = "CLIPImageProcessor"
tokenizer_class = ("XLMRobertaTokenizer", "XLMRobertaTokenizerFast")

def __init__(self, image_processor, tokenizer, num_patch_index_tokens=1024):
def __init__(self, image_processor, tokenizer, num_patch_index_tokens=1024, *kwargs):
tokenizer.return_token_type_ids = False

self.eod_token = "</doc>"
Expand Down
3 changes: 2 additions & 1 deletion src/transformers/models/llava/processing_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ class LlavaProcessor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = ["chat_template"]
image_processor_class = "AutoImageProcessor"
tokenizer_class = "AutoTokenizer"

def __init__(self, image_processor=None, tokenizer=None, chat_template=None):
def __init__(self, image_processor=None, tokenizer=None, chat_template=None, **kwargs):
super().__init__(image_processor, tokenizer, chat_template=chat_template)

def __call__(
Expand Down
3 changes: 2 additions & 1 deletion src/transformers/models/llava_next/processing_llava_next.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ class LlavaNextProcessor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = ["chat_template"]
image_processor_class = "AutoImageProcessor"
tokenizer_class = "AutoTokenizer"

def __init__(self, image_processor=None, tokenizer=None, chat_template=None):
def __init__(self, image_processor=None, tokenizer=None, chat_template=None, **kwargs):
super().__init__(image_processor, tokenizer, chat_template=chat_template)

def __call__(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,12 @@ class LlavaNextVideoProcessor(ProcessorMixin):
# video and image processor share same args, but have different processing logic
# only image processor config is saved in the hub
attributes = ["video_processor", "image_processor", "tokenizer"]
valid_kwargs = ["chat_template"]
image_processor_class = "LlavaNextImageProcessor"
video_processor_class = "LlavaNextVideoImageProcessor"
tokenizer_class = ("LlamaTokenizer", "LlamaTokenizerFast")

def __init__(self, video_processor=None, image_processor=None, tokenizer=None, chat_template=None):
def __init__(self, video_processor=None, image_processor=None, tokenizer=None, chat_template=None, **kwargs):
super().__init__(video_processor, image_processor, tokenizer, chat_template=chat_template)

def __call__(
Expand Down
7 changes: 6 additions & 1 deletion src/transformers/models/paligemma/processing_paligemma.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,16 +85,21 @@ class PaliGemmaProcessor(ProcessorMixin):
The image processor is a required input.
tokenizer ([`LlamaTokenizerFast`], *optional*):
The tokenizer is a required input.
chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages
in a chat into a tokenizable string.
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = ["chat_template"]
image_processor_class = "SiglipImageProcessor"
tokenizer_class = ("GemmaTokenizer", "GemmaTokenizerFast")

def __init__(
self,
image_processor=None,
tokenizer=None,
chat_template=None,
**kwargs,
):
if image_processor is None:
raise ValueError("You need to specify an `image_processor`.")
Expand All @@ -113,7 +118,7 @@ def __init__(
tokenizer.add_bos_token = False
tokenizer.add_eos_token = False

super().__init__(image_processor, tokenizer)
super().__init__(image_processor, tokenizer, chat_template=chat_template)

def __call__(
self,
Expand Down
7 changes: 5 additions & 2 deletions src/transformers/models/video_llava/processing_video_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,17 @@ class VideoLlavaProcessor(ProcessorMixin):
The image processor is a required input.
tokenizer ([`LlamaTokenizerFast`], *optional*):
The tokenizer is a required input.
chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages
in a chat into a tokenizable string.
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = ["chat_template"]
image_processor_class = "VideoLlavaImageProcessor"
tokenizer_class = "AutoTokenizer"

def __init__(self, image_processor=None, tokenizer=None):
super().__init__(image_processor, tokenizer)
def __init__(self, image_processor=None, tokenizer=None, chat_template=None, **kwargs):
super().__init__(image_processor, tokenizer, chat_template=chat_template)

def __call__(
self,
Expand Down
20 changes: 18 additions & 2 deletions src/transformers/processing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ class ProcessorMixin(PushToHubMixin):
feature_extractor_class = None
tokenizer_class = None
_auto_class = None
valid_kwargs: List[str] = []

# args have to match the attributes class attribute
def __init__(self, *args, **kwargs):
Expand Down Expand Up @@ -648,21 +649,23 @@ def from_args_and_dict(cls, args, processor_dict: Dict[str, Any], **kwargs):
processor_dict = processor_dict.copy()
return_unused_kwargs = kwargs.pop("return_unused_kwargs", False)

# Unlike image processors or feature extractors whose `__init__` accept `kwargs`, processor don't have `kwargs`.
# We have to pop up some unused (but specific) arguments to make it work.
# We have to pop up some unused (but specific) kwargs and then validate that it doesn't contain unused kwargs
# If we don't pop, some specific kwargs will raise a warning
if "processor_class" in processor_dict:
del processor_dict["processor_class"]

if "auto_map" in processor_dict:
del processor_dict["auto_map"]

unused_kwargs = cls.validate_init_kwargs(processor_config=processor_dict, valid_kwargs=cls.valid_kwargs)
processor = cls(*args, **processor_dict)

# Update processor with kwargs if needed
for key in set(kwargs.keys()):
if hasattr(processor, key):
setattr(processor, key, kwargs.pop(key))

kwargs.update(unused_kwargs)
logger.info(f"Processor {processor}")
if return_unused_kwargs:
return processor, kwargs
Expand Down Expand Up @@ -887,6 +890,19 @@ def model_input_names(self):
first_attribute = getattr(self, self.attributes[0])
return getattr(first_attribute, "model_input_names", None)

@staticmethod
def validate_init_kwargs(processor_config, valid_kwargs):
kwargs_from_config = processor_config.keys()
unused_kwargs = {}
unused_keys = set(kwargs_from_config) - set(valid_kwargs)
if unused_keys:
unused_key_str = ", ".join(unused_keys)
logger.warning(
f"Some kwargs in processor config are unused and will not have any effect: {unused_key_str}. "
)
unused_kwargs = {k: processor_config[k] for k in unused_keys}
return unused_kwargs

def apply_chat_template(
self,
conversation: Union[List[Dict[str, str]]],
Expand Down

0 comments on commit 0ae50d3

Please sign in to comment.