Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Processor accepts any kwargs #31889

Merged
merged 5 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/transformers/models/blip/processing_blip.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,11 @@ class BlipProcessor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = []
image_processor_class = "BlipImageProcessor"
tokenizer_class = ("BertTokenizer", "BertTokenizerFast")

def __init__(self, image_processor, tokenizer):
def __init__(self, image_processor, tokenizer, **kwargs):
tokenizer.return_token_type_ids = False
super().__init__(image_processor, tokenizer)
self.current_processor = self.image_processor
Expand Down
3 changes: 2 additions & 1 deletion src/transformers/models/blip_2/processing_blip_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,12 @@ class Blip2Processor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = []
image_processor_class = "BlipImageProcessor"
tokenizer_class = "AutoTokenizer"

# Copied from transformers.models.blip.processing_blip.BlipProcessor.__init__
def __init__(self, image_processor, tokenizer):
def __init__(self, image_processor, tokenizer, **kwargs):
tokenizer.return_token_type_ids = False
super().__init__(image_processor, tokenizer)
self.current_processor = self.image_processor
Expand Down
3 changes: 2 additions & 1 deletion src/transformers/models/fuyu/processing_fuyu.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,10 +322,11 @@ class FuyuProcessor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = []
image_processor_class = "FuyuImageProcessor"
tokenizer_class = "AutoTokenizer"

def __init__(self, image_processor, tokenizer):
def __init__(self, image_processor, tokenizer, **kwargs):
super().__init__(image_processor=image_processor, tokenizer=tokenizer)
self.image_processor = image_processor
self.tokenizer = tokenizer
Expand Down
1 change: 1 addition & 0 deletions src/transformers/models/idefics/processing_idefics.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ class IdeficsProcessor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = ["image_size", "add_end_of_utterance_token"]
image_processor_class = "IdeficsImageProcessor"
tokenizer_class = "LlamaTokenizerFast"

Expand Down
1 change: 1 addition & 0 deletions src/transformers/models/idefics2/processing_idefics2.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class Idefics2Processor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = ["image_seq_len", "chat_template"]
image_processor_class = "Idefics2ImageProcessor"
tokenizer_class = "AutoTokenizer"

Expand Down
15 changes: 10 additions & 5 deletions src/transformers/models/instructblip/processing_instructblip.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,16 @@ class InstructBlipProcessor(ProcessorMixin):
An instance of [`BlipImageProcessor`]. The image processor is a required input.
tokenizer (`AutoTokenizer`):
An instance of ['PreTrainedTokenizer`]. The tokenizer is a required input.
qformer_tokenizer (`AutoTokenizer`):
qformer_tokenizer (`AutoTokenizer`, *optional*):
An instance of ['PreTrainedTokenizer`]. The Q-Former tokenizer is a required input.
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = []
image_processor_class = "BlipImageProcessor"
tokenizer_class = "AutoTokenizer"

def __init__(self, image_processor, tokenizer, qformer_tokenizer):
def __init__(self, image_processor, tokenizer, qformer_tokenizer=None, **kwargs):
super().__init__(image_processor, tokenizer)

# add QFormer tokenizer
Expand Down Expand Up @@ -167,7 +168,11 @@ def save_pretrained(self, save_directory, **kwargs):
# overwrite to load the Q-Former tokenizer from a separate folder
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
processor = super().from_pretrained(pretrained_model_name_or_path, **kwargs)

# if return_unused_kwargs a tuple is returned where the second element is 'unused_kwargs'
if isinstance(processor, tuple):
processor = processor[0]
qformer_tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder="qformer_tokenizer")
args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
args.append(qformer_tokenizer)
return cls(*args)
processor.qformer_tokenizer = qformer_tokenizer
return processor
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,16 @@ class InstructBlipVideoProcessor(ProcessorMixin):
An instance of [`InstructBlipVideoImageProcessor`]. The image processor is a required input.
tokenizer (`AutoTokenizer`):
An instance of ['PreTrainedTokenizer`]. The tokenizer is a required input.
qformer_tokenizer (`AutoTokenizer`):
qformer_tokenizer (`AutoTokenizer`, *optional*):
An instance of ['PreTrainedTokenizer`]. The Q-Former tokenizer is a required input.
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = []
image_processor_class = "InstructBlipVideoImageProcessor"
tokenizer_class = "AutoTokenizer"

def __init__(self, image_processor, tokenizer, qformer_tokenizer):
def __init__(self, image_processor, tokenizer, qformer_tokenizer=None, **kwargs):
super().__init__(image_processor, tokenizer)

# add QFormer tokenizer
Expand Down Expand Up @@ -164,7 +165,11 @@ def save_pretrained(self, save_directory, **kwargs):
# overwrite to load the Q-Former tokenizer from a separate folder
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
processor = super().from_pretrained(pretrained_model_name_or_path, **kwargs)

# if return_unused_kwargs a tuple is returned where the second element is 'unused_kwargs'
if isinstance(processor, tuple):
processor = processor[0]
qformer_tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder="qformer_tokenizer")
args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
args.append(qformer_tokenizer)
return cls(*args)
processor.qformer_tokenizer = qformer_tokenizer
return processor
3 changes: 2 additions & 1 deletion src/transformers/models/kosmos2/processing_kosmos2.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,11 @@ class Kosmos2Processor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = ["num_patch_index_tokens"]
image_processor_class = "CLIPImageProcessor"
tokenizer_class = ("XLMRobertaTokenizer", "XLMRobertaTokenizerFast")

def __init__(self, image_processor, tokenizer, num_patch_index_tokens=1024):
def __init__(self, image_processor, tokenizer, num_patch_index_tokens=1024, *kwargs):
tokenizer.return_token_type_ids = False

self.eod_token = "</doc>"
Expand Down
3 changes: 2 additions & 1 deletion src/transformers/models/llava/processing_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ class LlavaProcessor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = ["chat_template"]
image_processor_class = "AutoImageProcessor"
tokenizer_class = "AutoTokenizer"

def __init__(self, image_processor=None, tokenizer=None, chat_template=None):
def __init__(self, image_processor=None, tokenizer=None, chat_template=None, **kwargs):
super().__init__(image_processor, tokenizer, chat_template=chat_template)

def __call__(
Expand Down
3 changes: 2 additions & 1 deletion src/transformers/models/llava_next/processing_llava_next.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,11 @@ class LlavaNextProcessor(ProcessorMixin):
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = ["chat_template"]
image_processor_class = "AutoImageProcessor"
tokenizer_class = "AutoTokenizer"

def __init__(self, image_processor=None, tokenizer=None, chat_template=None):
def __init__(self, image_processor=None, tokenizer=None, chat_template=None, **kwargs):
super().__init__(image_processor, tokenizer, chat_template=chat_template)

def __call__(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,12 @@ class LlavaNextVideoProcessor(ProcessorMixin):
# video and image processor share same args, but have different processing logic
# only image processor config is saved in the hub
attributes = ["video_processor", "image_processor", "tokenizer"]
valid_kwargs = ["chat_template"]
image_processor_class = "LlavaNextImageProcessor"
video_processor_class = "LlavaNextVideoImageProcessor"
tokenizer_class = ("LlamaTokenizer", "LlamaTokenizerFast")

def __init__(self, video_processor=None, image_processor=None, tokenizer=None, chat_template=None):
def __init__(self, video_processor=None, image_processor=None, tokenizer=None, chat_template=None, **kwargs):
super().__init__(video_processor, image_processor, tokenizer, chat_template=chat_template)

def __call__(
Expand Down
7 changes: 6 additions & 1 deletion src/transformers/models/paligemma/processing_paligemma.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,16 +85,21 @@ class PaliGemmaProcessor(ProcessorMixin):
The image processor is a required input.
tokenizer ([`LlamaTokenizerFast`], *optional*):
The tokenizer is a required input.
chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages
in a chat into a tokenizable string.
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = ["chat_template"]
image_processor_class = "SiglipImageProcessor"
tokenizer_class = ("GemmaTokenizer", "GemmaTokenizerFast")

def __init__(
self,
image_processor=None,
tokenizer=None,
chat_template=None,
**kwargs,
):
if image_processor is None:
raise ValueError("You need to specify an `image_processor`.")
Expand All @@ -113,7 +118,7 @@ def __init__(
tokenizer.add_bos_token = False
tokenizer.add_eos_token = False

super().__init__(image_processor, tokenizer)
super().__init__(image_processor, tokenizer, chat_template=chat_template)

def __call__(
self,
Expand Down
7 changes: 5 additions & 2 deletions src/transformers/models/video_llava/processing_video_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,17 @@ class VideoLlavaProcessor(ProcessorMixin):
The image processor is a required input.
tokenizer ([`LlamaTokenizerFast`], *optional*):
The tokenizer is a required input.
chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages
in a chat into a tokenizable string.
"""

attributes = ["image_processor", "tokenizer"]
valid_kwargs = ["chat_template"]
image_processor_class = "VideoLlavaImageProcessor"
tokenizer_class = "AutoTokenizer"

def __init__(self, image_processor=None, tokenizer=None):
super().__init__(image_processor, tokenizer)
def __init__(self, image_processor=None, tokenizer=None, chat_template=None, **kwargs):
super().__init__(image_processor, tokenizer, chat_template=chat_template)

def __call__(
self,
Expand Down
20 changes: 18 additions & 2 deletions src/transformers/processing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ class ProcessorMixin(PushToHubMixin):
feature_extractor_class = None
tokenizer_class = None
_auto_class = None
valid_kwargs: List[str] = []

# args have to match the attributes class attribute
def __init__(self, *args, **kwargs):
Expand Down Expand Up @@ -648,21 +649,23 @@ def from_args_and_dict(cls, args, processor_dict: Dict[str, Any], **kwargs):
processor_dict = processor_dict.copy()
return_unused_kwargs = kwargs.pop("return_unused_kwargs", False)

# Unlike image processors or feature extractors whose `__init__` accept `kwargs`, processor don't have `kwargs`.
# We have to pop up some unused (but specific) arguments to make it work.
# We have to pop up some unused (but specific) kwargs and then validate that it doesn't contain unused kwargs
# If we don't pop, some specific kwargs will raise a warning
if "processor_class" in processor_dict:
del processor_dict["processor_class"]

if "auto_map" in processor_dict:
del processor_dict["auto_map"]

unused_kwargs = cls.validate_init_kwargs(processor_config=processor_dict, valid_kwargs=cls.valid_kwargs)
processor = cls(*args, **processor_dict)

# Update processor with kwargs if needed
for key in set(kwargs.keys()):
if hasattr(processor, key):
setattr(processor, key, kwargs.pop(key))

kwargs.update(unused_kwargs)
logger.info(f"Processor {processor}")
if return_unused_kwargs:
return processor, kwargs
Expand Down Expand Up @@ -887,6 +890,19 @@ def model_input_names(self):
first_attribute = getattr(self, self.attributes[0])
return getattr(first_attribute, "model_input_names", None)

@staticmethod
def validate_init_kwargs(processor_config, valid_kwargs):
kwargs_from_config = processor_config.keys()
unused_kwargs = {}
unused_keys = set(kwargs_from_config) - set(valid_kwargs)
if unused_keys:
unused_key_str = ", ".join(unused_keys)
logger.warning(
f"Some kwargs in processor config are unused and will not have any effect: {unused_key_str}. "
)
unused_kwargs = {k: processor_config[k] for k in unused_keys}
return unused_kwargs

def apply_chat_template(
self,
conversation: Union[List[Dict[str, str]]],
Expand Down
Loading