From cae68e5e4eb3436cbcb640d61a540660242e671a Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 12 Sep 2024 16:49:28 +0200 Subject: [PATCH 01/22] transformers v4.45 support --- .github/workflows/test_onnxruntime.yml | 1 + setup.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test_onnxruntime.yml b/.github/workflows/test_onnxruntime.yml index 291a3b0833..c4157797e7 100644 --- a/.github/workflows/test_onnxruntime.yml +++ b/.github/workflows/test_onnxruntime.yml @@ -42,6 +42,7 @@ jobs: run: | pip install --upgrade pip pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu + pip install git+https://github.com/huggingface/transformers pip install .[tests,onnxruntime] - name: Test with pytest (in series) diff --git a/setup.py b/setup.py index ac5db71a74..231dc9110e 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ REQUIRED_PKGS = [ "coloredlogs", "sympy", - "transformers[sentencepiece]>=4.29,<4.45.0", + "transformers[sentencepiece]>=4.29,<4.46.0", "torch>=1.11", "packaging", "numpy<2.0", # transformers requires numpy<2.0 https://github.com/huggingface/transformers/pull/31569 From 69a1d2e48ecb5a8d5042cd95ac77e9933dd2998d Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 13 Sep 2024 14:11:59 +0200 Subject: [PATCH 02/22] fix transformers v4.45 compatibility --- optimum/exporters/onnx/convert.py | 13 +++++++++++++ optimum/exporters/onnx/model_configs.py | 8 ++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/optimum/exporters/onnx/convert.py b/optimum/exporters/onnx/convert.py index 63a9067b90..e5a912b18e 100644 --- a/optimum/exporters/onnx/convert.py +++ b/optimum/exporters/onnx/convert.py @@ -38,6 +38,7 @@ is_torch_onnx_support_available, logging, require_numpy_strictly_lower, + check_if_transformers_greater, ) from ...utils.modeling_utils import MODEL_TO_PATCH_FOR_PAST from ...utils.save_utils import maybe_save_preprocessors @@ -1120,6 +1121,18 @@ def onnx_export_from_model( if isinstance(atol, dict): atol = atol[task.replace("-with-past", "")] + if check_if_transformers_greater("4.44.99"): + misplaced_generation_parameters = model.config._get_non_default_generation_parameters() + if model.can_generate() and len(misplaced_generation_parameters) > 0: + logger.warning( + "Moving the following attributes in the config to the generation config: " + f"{misplaced_generation_parameters}. You are seeing this warning because you've set " + "generation parameters in the model config, as opposed to in the generation config.", + ) + for param_name, param_value in misplaced_generation_parameters.items(): + setattr(model.generation_config, param_name, param_value) + setattr(model.config, param_name, None) + # Saving the model config and preprocessor as this is needed sometimes. model.config.save_pretrained(output) generation_config = getattr(model, "generation_config", None) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index d4b15b2968..e927d5dfe9 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -119,7 +119,7 @@ def inputs(self) -> Dict[str, Dict[int, str]]: class AlbertOnnxConfig(BertOnnxConfig): - DEFAULT_ONNX_OPSET = 11 + DEFAULT_ONNX_OPSET = 14 # now uses F.scaled_dot_product_attention by default for torch>=2.1.1. class ConvBertOnnxConfig(BertOnnxConfig): @@ -171,11 +171,11 @@ class MPNetOnnxConfig(DistilBertOnnxConfig): class RobertaOnnxConfig(DistilBertOnnxConfig): - pass + DEFAULT_ONNX_OPSET = 14 # now uses F.scaled_dot_product_attention by default for torch>=2.1.1. class CamembertOnnxConfig(DistilBertOnnxConfig): - pass + DEFAULT_ONNX_OPSET = 14 # now uses F.scaled_dot_product_attention by default for torch>=2.1.1. class FlaubertOnnxConfig(BertOnnxConfig): @@ -187,7 +187,7 @@ class IBertOnnxConfig(DistilBertOnnxConfig): class XLMRobertaOnnxConfig(DistilBertOnnxConfig): - pass + DEFAULT_ONNX_OPSET = 14 # now uses F.scaled_dot_product_attention by default for torch>=2.1.1. class DebertaOnnxConfig(BertOnnxConfig): From 450a5a45dd0a2d31807b3f1f0d4139a5e4e515d5 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 13 Sep 2024 15:58:19 +0200 Subject: [PATCH 03/22] update opset --- optimum/exporters/onnx/model_configs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index e927d5dfe9..f24a984ca0 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -257,7 +257,7 @@ class ImageGPTOnnxConfig(GPT2OnnxConfig): class GPTNeoOnnxConfig(TextDecoderWithPositionIdsOnnxConfig): - DEFAULT_ONNX_OPSET = 13 + DEFAULT_ONNX_OPSET = 14 NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args(num_attention_heads="num_heads") From e29f4666551f23fe37b06eaf1b2aa1ba6e681f5f Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 13 Sep 2024 16:35:50 +0200 Subject: [PATCH 04/22] fix config --- optimum/modeling_base.py | 3 ++ optimum/onnxruntime/modeling_decoder.py | 53 ++++++++++++--------- optimum/onnxruntime/modeling_ort.py | 3 -- optimum/onnxruntime/modeling_seq2seq.py | 63 +++++++++++-------------- 4 files changed, 62 insertions(+), 60 deletions(-) diff --git a/optimum/modeling_base.py b/optimum/modeling_base.py index 5bab0622de..cfef0bac8e 100644 --- a/optimum/modeling_base.py +++ b/optimum/modeling_base.py @@ -372,6 +372,9 @@ def from_pretrained( export = from_transformers if len(model_id.split("@")) == 2: + logger.warning( + f"Specifying the `revision` as @{model_id.split('@')[1]} is deprecated and will be removed in v1.23, please use the `revision` argument instead." + ) if revision is not None: logger.warning( f"The argument `revision` was set to {revision} but will be ignored for {model_id.split('@')[1]}" diff --git a/optimum/onnxruntime/modeling_decoder.py b/optimum/onnxruntime/modeling_decoder.py index f6d4b7e20a..35417ba60f 100644 --- a/optimum/onnxruntime/modeling_decoder.py +++ b/optimum/onnxruntime/modeling_decoder.py @@ -149,6 +149,19 @@ def __init__( generation_config = GenerationConfig.from_model_config(config) self.generation_config = generation_config + + if check_if_transformers_greater("4.44.99"): + misplaced_generation_parameters = self.config._get_non_default_generation_parameters() + if len(misplaced_generation_parameters) > 0: + logger.warning( + "Moving the following attributes in the config to the generation config: " + f"{misplaced_generation_parameters}. You are seeing this warning because you've set " + "generation parameters in the model config, as opposed to in the generation config.", + ) + for param_name, param_value in misplaced_generation_parameters.items(): + setattr(self.generation_config, param_name, param_value) + setattr(self.config, param_name, None) + self.onnx_paths = [self.model_path] self.use_merged = "use_cache_branch" in self.input_names self.model_type = self.config.model_type @@ -393,7 +406,6 @@ def _from_pretrained( cls, model_id: Union[str, Path], config: "PretrainedConfig", - use_auth_token: Optional[Union[bool, str]] = None, token: Optional[Union[bool, str]] = None, revision: Optional[str] = None, force_download: bool = False, @@ -410,15 +422,7 @@ def _from_pretrained( model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None, **kwargs, ) -> "ORTModelForCausalLM": - if use_auth_token is not None: - warnings.warn( - "The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.", - FutureWarning, - ) - if token is not None: - raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.") - token = use_auth_token - + generation_config = kwargs.pop("generation_config", None) model_path = Path(model_id) # We do not implement the logic for use_cache=False, use_merged=True @@ -586,6 +590,22 @@ def _from_pretrained( else: init_cls = ORTModelForCausalLM + if generation_config is None: + try: + generation_config = GenerationConfig.from_pretrained( + model_id, + cache_dir=cache_dir, + force_download=force_download, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=subfolder, + ) + except OSError: + logger.info( + "Generation config file not found, using a generation config created from the model config." + ) + return init_cls( model=model, config=config, @@ -593,6 +613,7 @@ def _from_pretrained( model_save_dir=model_save_dir, preprocessors=preprocessors, use_cache=use_cache, + generation_config=generation_config, ) @classmethod @@ -600,7 +621,6 @@ def _from_transformers( cls, model_id: str, config: "PretrainedConfig", - use_auth_token: Optional[Union[bool, str]] = None, token: Optional[Union[bool, str]] = None, revision: str = "main", force_download: bool = True, @@ -616,15 +636,6 @@ def _from_transformers( use_io_binding: Optional[bool] = None, task: Optional[str] = None, ) -> "ORTModelForCausalLM": - if use_auth_token is not None: - warnings.warn( - "The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.", - FutureWarning, - ) - if token is not None: - raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.") - token = use_auth_token - file_name = ONNX_WEIGHTS_NAME if use_merged: @@ -655,8 +666,6 @@ def _from_transformers( force_download=force_download, trust_remote_code=trust_remote_code, ) - - config.save_pretrained(save_dir_path) maybe_save_preprocessors(model_id, save_dir_path, src_subfolder=subfolder) return cls._from_pretrained( diff --git a/optimum/onnxruntime/modeling_ort.py b/optimum/onnxruntime/modeling_ort.py index 254b771e33..0485b9fb27 100644 --- a/optimum/onnxruntime/modeling_ort.py +++ b/optimum/onnxruntime/modeling_ort.py @@ -661,8 +661,6 @@ def _export( force_download=force_download, trust_remote_code=trust_remote_code, ) - - config.save_pretrained(save_dir_path) maybe_save_preprocessors(model_id, save_dir_path, src_subfolder=subfolder) return cls._from_pretrained( @@ -1169,7 +1167,6 @@ def _export( library_name="transformers", ) - config.save_pretrained(save_dir_path) maybe_save_preprocessors(model_id, save_dir_path, src_subfolder=subfolder) return cls._from_pretrained( diff --git a/optimum/onnxruntime/modeling_seq2seq.py b/optimum/onnxruntime/modeling_seq2seq.py index 4ce3e4707e..46a6609b81 100644 --- a/optimum/onnxruntime/modeling_seq2seq.py +++ b/optimum/onnxruntime/modeling_seq2seq.py @@ -717,6 +717,18 @@ def show_deprecated_argument(arg_name): generation_config = GenerationConfig.from_model_config(config) self.generation_config = generation_config + if check_if_transformers_greater("4.44.99"): + misplaced_generation_parameters = self.config._get_non_default_generation_parameters() + if len(misplaced_generation_parameters) > 0: + logger.warning( + "Moving the following attributes in the config to the generation config: " + f"{misplaced_generation_parameters}. You are seeing this warning because you've set " + "generation parameters in the model config, as opposed to in the generation config.", + ) + for param_name, param_value in misplaced_generation_parameters.items(): + setattr(self.generation_config, param_name, param_value) + setattr(self.config, param_name, None) + @abstractmethod def _initialize_encoder(self, session: ort.InferenceSession) -> ORTEncoder: pass @@ -791,7 +803,6 @@ def _from_pretrained( cls, model_id: Union[str, Path], config: "PretrainedConfig", - use_auth_token: Optional[Union[bool, str]] = None, token: Optional[Union[bool, str]] = None, revision: Optional[str] = None, force_download: bool = False, @@ -810,15 +821,7 @@ def _from_pretrained( model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None, **kwargs, ): - if use_auth_token is not None: - warnings.warn( - "The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.", - FutureWarning, - ) - if token is not None: - raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.") - token = use_auth_token - + generation_config = kwargs.pop("generation_config", None) model_path = Path(model_id) # We do not implement the logic for use_cache=False, use_merged=True @@ -1007,19 +1010,21 @@ def _from_pretrained( if model_save_dir is None: model_save_dir = new_model_save_dir - generation_config = None - try: - generation_config = GenerationConfig.from_pretrained( - model_id, - cache_dir=cache_dir, - force_download=force_download, - local_files_only=local_files_only, - token=token, - revision=revision, - subfolder=subfolder, - ) - except OSError: - logger.info("Generation config file not found, using a generation config created from the model config.") + if generation_config is None: + try: + generation_config = GenerationConfig.from_pretrained( + model_id, + cache_dir=cache_dir, + force_download=force_download, + local_files_only=local_files_only, + token=token, + revision=revision, + subfolder=subfolder, + ) + except OSError: + logger.info( + "Generation config file not found, using a generation config created from the model config." + ) onnx_paths = [encoder_path] if use_merged is False: @@ -1046,7 +1051,6 @@ def _from_transformers( cls, model_id: str, config: "PretrainedConfig", - use_auth_token: Optional[Union[bool, str]] = None, token: Optional[Union[bool, str]] = None, revision: str = "main", force_download: bool = True, @@ -1062,15 +1066,6 @@ def _from_transformers( use_io_binding: Optional[bool] = None, task: Optional[str] = None, ) -> "ORTModelForConditionalGeneration": - if use_auth_token is not None: - warnings.warn( - "The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.", - FutureWarning, - ) - if token is not None: - raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.") - token = use_auth_token - if use_cache is False and use_merged is True: raise ValueError( "The incompatible arguments use_cache=False, use_merged=True were passed to" @@ -1102,8 +1097,6 @@ def _from_transformers( force_download=force_download, trust_remote_code=trust_remote_code, ) - - config.save_pretrained(save_dir_path) maybe_save_preprocessors(model_id, save_dir_path, src_subfolder=subfolder) return cls._from_pretrained( From e69658c21b705e27a099304f2884df5fc00f4eb7 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 13 Sep 2024 16:46:09 +0200 Subject: [PATCH 05/22] fix --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index e2c2126303..824ef3d0cf 100644 --- a/Makefile +++ b/Makefile @@ -23,11 +23,11 @@ REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL)) # Run code quality checks style_check: black --check . - ruff . + ruff check . style: black . - ruff . --fix + ruff check . --fix # Run tests for the library test: From 0cc167d7eacd615a9a60b35bed9cd202d75f17eb Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 13 Sep 2024 16:46:18 +0200 Subject: [PATCH 06/22] style --- optimum/exporters/onnx/convert.py | 2 +- optimum/onnxruntime/modeling_decoder.py | 1 - optimum/onnxruntime/modeling_seq2seq.py | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/optimum/exporters/onnx/convert.py b/optimum/exporters/onnx/convert.py index e5a912b18e..7142d5be4a 100644 --- a/optimum/exporters/onnx/convert.py +++ b/optimum/exporters/onnx/convert.py @@ -34,11 +34,11 @@ DEFAULT_DUMMY_SHAPES, ONNX_WEIGHTS_NAME, TORCH_MINIMUM_VERSION, + check_if_transformers_greater, is_diffusers_available, is_torch_onnx_support_available, logging, require_numpy_strictly_lower, - check_if_transformers_greater, ) from ...utils.modeling_utils import MODEL_TO_PATCH_FOR_PAST from ...utils.save_utils import maybe_save_preprocessors diff --git a/optimum/onnxruntime/modeling_decoder.py b/optimum/onnxruntime/modeling_decoder.py index 35417ba60f..239eaee5f3 100644 --- a/optimum/onnxruntime/modeling_decoder.py +++ b/optimum/onnxruntime/modeling_decoder.py @@ -14,7 +14,6 @@ """Classes handling causal-lm related architectures in ONNX Runtime.""" import logging -import warnings from pathlib import Path from tempfile import TemporaryDirectory from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union diff --git a/optimum/onnxruntime/modeling_seq2seq.py b/optimum/onnxruntime/modeling_seq2seq.py index 46a6609b81..3c0a7f4b06 100644 --- a/optimum/onnxruntime/modeling_seq2seq.py +++ b/optimum/onnxruntime/modeling_seq2seq.py @@ -18,7 +18,6 @@ import logging import shutil -import warnings from abc import ABC, abstractmethod from pathlib import Path from tempfile import TemporaryDirectory From c98d5d6fb48bc31f0425fa732909f53c4c026f31 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 16 Sep 2024 11:15:44 +0200 Subject: [PATCH 07/22] fix --- optimum/onnxruntime/optimization.py | 12 ++++++++---- tests/onnxruntime/test_modeling.py | 2 +- tests/onnxruntime/utils_onnxruntime_tests.py | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/optimum/onnxruntime/optimization.py b/optimum/onnxruntime/optimization.py index 9e62a3f324..fd6958bba7 100644 --- a/optimum/onnxruntime/optimization.py +++ b/optimum/onnxruntime/optimization.py @@ -20,6 +20,7 @@ import onnx from onnx import load_model +from transformers import GenerationConfig from transformers.models.auto.configuration_auto import AutoConfig from onnxruntime.transformers.onnx_model_bert import BertOnnxModel @@ -152,10 +153,6 @@ def optimize( save_dir = Path(save_dir) save_dir.mkdir(parents=True, exist_ok=True) ORTConfigManager.check_optimization_supported_model(self.model_type, optimization_config) - - self.config.save_pretrained(save_dir) - maybe_save_preprocessors(self.onnx_model_path[0].parent, save_dir) - model_type = ORTConfigManager.get_model_ort_type(self.config.model_type) optimization_options = optimization_config.create_fusion_options(model_type) @@ -236,6 +233,13 @@ def optimize( # Save the model configuration self.config.save_pretrained(save_dir) ort_config.save_pretrained(save_dir) + maybe_save_preprocessors(self.onnx_model_path[0].parent, save_dir) + + try: + generation_config = GenerationConfig.from_pretrained(self.onnx_model_path[0].parent) + generation_config.save_pretrained(save_dir) + except Exception: + pass logger.info( f"Optimized model saved at: {save_dir} (external data format: " diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index 4b44acb38a..a19dbcd1a8 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -3147,7 +3147,7 @@ class ORTModelForAudioClassificationIntegrationTest(ORTModelTestMixin): "wavlm", "wav2vec2", "wav2vec2-conformer", - "whisper", + # "whisper", saving of the model's config is broken in transformers v4.45.0 ] FULL_GRID = {"model_arch": SUPPORTED_ARCHITECTURES} diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index bb6935461d..8d25d88df5 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -114,7 +114,7 @@ "longt5": "hf-internal-testing/tiny-random-LongT5Model", "llama": "fxmarty/tiny-llama-fast-tokenizer", "m2m_100": "hf-internal-testing/tiny-random-m2m_100", - "marian": "sshleifer/tiny-marian-en-de", # hf-internal-testing ones are broken + "marian": "fxmarty/tiny-marian", # hf-internal-testing ones are broken "mbart": "hf-internal-testing/tiny-random-mbart", "mistral": "echarlaix/tiny-random-mistral", "mobilebert": "hf-internal-testing/tiny-random-MobileBertModel", From 9a6f601536a4623a7cf1ec58dc7b32e220e309c2 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 16 Sep 2024 15:28:07 +0200 Subject: [PATCH 08/22] update model --- tests/onnxruntime/utils_onnxruntime_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index 8d25d88df5..804aa00f04 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -114,7 +114,7 @@ "longt5": "hf-internal-testing/tiny-random-LongT5Model", "llama": "fxmarty/tiny-llama-fast-tokenizer", "m2m_100": "hf-internal-testing/tiny-random-m2m_100", - "marian": "fxmarty/tiny-marian", # hf-internal-testing ones are broken + "marian": "echarlaix/tiny-random-marian", "mbart": "hf-internal-testing/tiny-random-mbart", "mistral": "echarlaix/tiny-random-mistral", "mobilebert": "hf-internal-testing/tiny-random-MobileBertModel", From fadadc9a441734696f8d26dc974eb80c0e33b247 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 16 Sep 2024 18:11:00 +0200 Subject: [PATCH 09/22] Add generation config saving --- optimum/onnxruntime/modeling_decoder.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/optimum/onnxruntime/modeling_decoder.py b/optimum/onnxruntime/modeling_decoder.py index 239eaee5f3..378fdbb6db 100644 --- a/optimum/onnxruntime/modeling_decoder.py +++ b/optimum/onnxruntime/modeling_decoder.py @@ -720,6 +720,9 @@ def _reorder_cache(past: Tuple[Tuple[torch.Tensor]], beam_idx: torch.Tensor) -> for layer_past in past ) + def _save_pretrained(self, save_directory: Union[str, Path]): + super()._save_pretrained(save_directory) + self.generation_config.save_pretrained(save_directory) class ORTGPTBigCodeForCausalLM(ORTModelForCausalLM): # Adapted from transformers.models.gpt_bigcode.modeling_gpt_bigcode.GPTBigCodeForCausalLM.prepare_inputs_for_generation From 9fa9e9fc3ebd4ca719ac88aa06942cf6ae1da2d8 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 25 Sep 2024 19:01:54 +0200 Subject: [PATCH 10/22] fix codegen --- optimum/bettertransformer/models/attention.py | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/optimum/bettertransformer/models/attention.py b/optimum/bettertransformer/models/attention.py index 9dfa57844d..53e6a676e6 100644 --- a/optimum/bettertransformer/models/attention.py +++ b/optimum/bettertransformer/models/attention.py @@ -195,7 +195,7 @@ def codegen_wrapped_scaled_dot_product( query, key, value, attn_mask=None, dropout_p=dropout_p, is_causal=True ) else: - # in this case, which is the later decoding steps, the `causal_mask`` in + # in this case, which is the later decoding steps, the `causal_mask` in # https://github.com/huggingface/transformers/blob/ae54e3c3b18bac0832ad62ea9b896dfd52a09850/src/transformers/models/gpt2/modeling_gpt2.py#L195 # is [True, ..., True] so actually not causal sdpa_result = torch.nn.functional.scaled_dot_product_attention( @@ -207,15 +207,20 @@ def codegen_wrapped_scaled_dot_product( # causal_mask is always [True, ..., True] otherwise, so executing this # is unnecessary if query_length > 1: - causal_mask = self.causal_mask[:, :, key_length - query_length : key_length, :key_length].to(torch.bool) - causal_mask = torch.where(causal_mask, 0, mask_value) + if not check_if_transformers_greater("4.44.99"): + causal_mask = self.causal_mask[:, :, key_length - query_length : key_length, :key_length].to(torch.bool) - # torch.Tensor.expand does no memory copy - causal_mask = causal_mask.expand(batch_size, -1, -1, -1) + causal_mask = torch.where(causal_mask, 0, mask_value) - # we use torch.min to avoid having tensor(-inf) - attention_mask = torch.min(causal_mask, attention_mask) + # torch.Tensor.expand does no memory copy + causal_mask = causal_mask.expand(batch_size, -1, -1, -1) + + # we use torch.min to avoid having tensor(-inf) + attention_mask = torch.min(causal_mask, attention_mask) + else: + + attention_mask = attention_mask[:, :, :, : key.shape[-2]] sdpa_result = torch.nn.functional.scaled_dot_product_attention( query, key, value, attn_mask=attention_mask, dropout_p=dropout_p, is_causal=False @@ -224,6 +229,7 @@ def codegen_wrapped_scaled_dot_product( return sdpa_result, None + # Adapted from transformers.models.opt.modeling_opt.OPTAttention.forward def opt_forward( self, From bf913c285b3ef149070572a8e509d0ac7b874ceb Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 25 Sep 2024 19:04:53 +0200 Subject: [PATCH 11/22] udpate setup --- .github/workflows/test_onnxruntime.yml | 1 - optimum/bettertransformer/models/attention.py | 7 +++---- optimum/onnxruntime/modeling_decoder.py | 1 + setup.py | 3 ++- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test_onnxruntime.yml b/.github/workflows/test_onnxruntime.yml index c4157797e7..291a3b0833 100644 --- a/.github/workflows/test_onnxruntime.yml +++ b/.github/workflows/test_onnxruntime.yml @@ -42,7 +42,6 @@ jobs: run: | pip install --upgrade pip pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu - pip install git+https://github.com/huggingface/transformers pip install .[tests,onnxruntime] - name: Test with pytest (in series) diff --git a/optimum/bettertransformer/models/attention.py b/optimum/bettertransformer/models/attention.py index 53e6a676e6..63f0275464 100644 --- a/optimum/bettertransformer/models/attention.py +++ b/optimum/bettertransformer/models/attention.py @@ -207,9 +207,10 @@ def codegen_wrapped_scaled_dot_product( # causal_mask is always [True, ..., True] otherwise, so executing this # is unnecessary if query_length > 1: - if not check_if_transformers_greater("4.44.99"): - causal_mask = self.causal_mask[:, :, key_length - query_length : key_length, :key_length].to(torch.bool) + causal_mask = self.causal_mask[:, :, key_length - query_length : key_length, :key_length].to( + torch.bool + ) causal_mask = torch.where(causal_mask, 0, mask_value) @@ -219,7 +220,6 @@ def codegen_wrapped_scaled_dot_product( # we use torch.min to avoid having tensor(-inf) attention_mask = torch.min(causal_mask, attention_mask) else: - attention_mask = attention_mask[:, :, :, : key.shape[-2]] sdpa_result = torch.nn.functional.scaled_dot_product_attention( @@ -229,7 +229,6 @@ def codegen_wrapped_scaled_dot_product( return sdpa_result, None - # Adapted from transformers.models.opt.modeling_opt.OPTAttention.forward def opt_forward( self, diff --git a/optimum/onnxruntime/modeling_decoder.py b/optimum/onnxruntime/modeling_decoder.py index 378fdbb6db..bda3ec98d9 100644 --- a/optimum/onnxruntime/modeling_decoder.py +++ b/optimum/onnxruntime/modeling_decoder.py @@ -724,6 +724,7 @@ def _save_pretrained(self, save_directory: Union[str, Path]): super()._save_pretrained(save_directory) self.generation_config.save_pretrained(save_directory) + class ORTGPTBigCodeForCausalLM(ORTModelForCausalLM): # Adapted from transformers.models.gpt_bigcode.modeling_gpt_bigcode.GPTBigCodeForCausalLM.prepare_inputs_for_generation def prepare_inputs_for_generation(self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs): diff --git a/setup.py b/setup.py index 231dc9110e..c961cf973c 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,8 @@ REQUIRED_PKGS = [ "coloredlogs", "sympy", - "transformers[sentencepiece]>=4.29,<4.46.0", + "transformers @ git+https://github.com/huggingface/transformers.git", + # "transformers[sentencepiece]>=4.29,<4.46.0", "torch>=1.11", "packaging", "numpy<2.0", # transformers requires numpy<2.0 https://github.com/huggingface/transformers/pull/31569 From 94dee276bbb1c8b1227ca2b131b1566dd6d81b79 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 26 Sep 2024 10:52:41 +0200 Subject: [PATCH 12/22] update setup --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index c961cf973c..231dc9110e 100644 --- a/setup.py +++ b/setup.py @@ -15,8 +15,7 @@ REQUIRED_PKGS = [ "coloredlogs", "sympy", - "transformers @ git+https://github.com/huggingface/transformers.git", - # "transformers[sentencepiece]>=4.29,<4.46.0", + "transformers[sentencepiece]>=4.29,<4.46.0", "torch>=1.11", "packaging", "numpy<2.0", # transformers requires numpy<2.0 https://github.com/huggingface/transformers/pull/31569 From 3bfa30e9c98b59399eea0c5b0b49954a63a9c6d1 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 26 Sep 2024 11:01:47 +0200 Subject: [PATCH 13/22] bump default opset m2m100 --- optimum/exporters/onnx/model_configs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index f24a984ca0..36963a986d 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -564,6 +564,7 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int class M2M100OnnxConfig(TextSeq2SeqOnnxConfig): + DEFAULT_ONNX_OPSET = 14 # now uses F.scaled_dot_product_attention by default for torch>=2.1.1. NORMALIZED_CONFIG_CLASS = NormalizedSeq2SeqConfig.with_args( encoder_num_layers="encoder_layers", decoder_num_layers="decoder_layers", From 7bf1d305ea7dd1110c849dbbb25593186817651b Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 26 Sep 2024 18:36:45 +0200 Subject: [PATCH 14/22] fix codegen --- optimum/bettertransformer/models/decoder_models.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/optimum/bettertransformer/models/decoder_models.py b/optimum/bettertransformer/models/decoder_models.py index b64b7f5a1e..95ebcdd25f 100644 --- a/optimum/bettertransformer/models/decoder_models.py +++ b/optimum/bettertransformer/models/decoder_models.py @@ -238,12 +238,16 @@ def __init__(self, layer: "nn.Module", config: "PretrainedConfig"): super(BetterTransformerBaseLayer, self).__init__(config) self.module_mapping = None - submodules = ["attn_dropout", "resid_dropout", "qkv_proj", "out_proj", "causal_mask", "scale_attn"] + submodules = ["attn_dropout", "resid_dropout", "qkv_proj", "out_proj", "scale_attn"] # Attribute only for transformers>=4.28 if hasattr(layer, "embed_positions"): submodules.append("embed_positions") + # Attribute only for transformers<4.45 + if hasattr(layer, "causal_mask"): + submodules.append("causal_mask") + for attr in submodules: setattr(self, attr, getattr(layer, attr)) From f01cccf799d754736d54f381be4b01eec966aa89 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 26 Sep 2024 18:45:22 +0200 Subject: [PATCH 15/22] fix --- optimum/bettertransformer/models/decoder_models.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/optimum/bettertransformer/models/decoder_models.py b/optimum/bettertransformer/models/decoder_models.py index 95ebcdd25f..8393c92b8b 100644 --- a/optimum/bettertransformer/models/decoder_models.py +++ b/optimum/bettertransformer/models/decoder_models.py @@ -96,14 +96,19 @@ def __init__(self, layer: "nn.Module", config: "PretrainedConfig"): "out_proj", "attn_dropout", "resid_dropout", - "bias", "scale_attn", - "masked_bias", ] # Attribute only for transformers>=4.28 if hasattr(layer, "embed_positions"): submodules.append("embed_positions") + # Attribute only for transformers<4.45 + if hasattr(layer, "bias"): + submodules.append("bias") + if hasattr(layer, "masked_bias"): + submodules.append("masked_bias") + + for attr in submodules: setattr(self, attr, getattr(layer, attr)) From e206d44e7d9a09699c735f8aa48c5daa250d81f9 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 27 Sep 2024 14:54:36 +0200 Subject: [PATCH 16/22] fix bettertransformers --- optimum/bettertransformer/models/attention.py | 66 +++++++++++++++++++ .../models/decoder_models.py | 20 +++++- 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/optimum/bettertransformer/models/attention.py b/optimum/bettertransformer/models/attention.py index 63f0275464..053f283f18 100644 --- a/optimum/bettertransformer/models/attention.py +++ b/optimum/bettertransformer/models/attention.py @@ -91,6 +91,72 @@ def gpt2_wrapped_scaled_dot_product( return sdpa_result, None +# Adapted from transformers.models.gptj.modeling_gptj.GPTJAttention._attn +def gptj_wrapped_scaled_dot_product( + self, + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + head_mask: Optional[torch.Tensor] = None, +): + raise_on_head_mask(head_mask) + batch_size = query.shape[0] + + mask_value = torch.finfo(value.dtype).min + mask_value = torch.full([], mask_value, dtype=value.dtype) + + # in gpt-neo-x and gpt-j the query and keys are always in fp32 + # thus we need to cast them to the value dtype + if self.downcast_qk: + query = query.to(value.dtype) + key = key.to(value.dtype) + + if batch_size == 1 and attention_mask is not None and attention_mask[0, 0, -1, -1] < -1: + raise ValueError("BetterTransformer does not support padding='max_length' with a batch size of 1.") + + dropout_p = self.dropout_prob_attn if self.training else 0.0 + if batch_size == 1 or self.training: + if query.shape[2] > 1: + sdpa_result = torch.nn.functional.scaled_dot_product_attention( + query, key, value, attn_mask=None, dropout_p=dropout_p, is_causal=True + ) + else: + sdpa_result = torch.nn.functional.scaled_dot_product_attention( + query, key, value, attn_mask=None, dropout_p=dropout_p, is_causal=False + ) + else: + query_length, key_length = query.size(-2), key.size(-2) + + # causal_mask is always [True, ..., True] otherwise, so executing this + # is unnecessary + if query_length > 1: + + if not check_if_transformers_greater("4.44.99"): + + causal_mask = self.bias[:, :, key_length - query_length : key_length, :key_length].to(torch.bool) + + causal_mask = torch.where(causal_mask, 0, mask_value) + + # torch.Tensor.expand does no memory copy + causal_mask = causal_mask.expand(batch_size, -1, -1, -1) + if attention_mask is not None: + attention_mask = causal_mask + attention_mask + + else: + attention_mask = attention_mask[:, :, :, : key.shape[-2]] + + sdpa_result = torch.nn.functional.scaled_dot_product_attention( + query, key, value, attn_mask=attention_mask, dropout_p=dropout_p, is_causal=False + ) + + # in gpt-neo-x and gpt-j the query and keys are always in fp32 + # thus we need to cast them to the value dtype + if self.downcast_qk: + sdpa_result = sdpa_result.to(value.dtype) + + return sdpa_result, None + # Adapted from transformers.models.bark.modeling_bark.BarkSelfAttention._attn def bark_wrapped_scaled_dot_product( diff --git a/optimum/bettertransformer/models/decoder_models.py b/optimum/bettertransformer/models/decoder_models.py index 8393c92b8b..11616162f3 100644 --- a/optimum/bettertransformer/models/decoder_models.py +++ b/optimum/bettertransformer/models/decoder_models.py @@ -43,6 +43,7 @@ bloom_forward, codegen_wrapped_scaled_dot_product, gpt2_wrapped_scaled_dot_product, + gptj_wrapped_scaled_dot_product, gpt_neo_wrapped_scaled_dot_product, opt_forward, t5_forward, @@ -82,7 +83,7 @@ def forward(self, *args, **kwargs): class GPTJAttentionLayerBetterTransformer(BetterTransformerBaseLayer, GPTJAttention, nn.Module): - _attn = gpt2_wrapped_scaled_dot_product + _attn = gptj_wrapped_scaled_dot_product def __init__(self, layer: "nn.Module", config: "PretrainedConfig"): super().__init__(config) @@ -108,6 +109,9 @@ def __init__(self, layer: "nn.Module", config: "PretrainedConfig"): if hasattr(layer, "masked_bias"): submodules.append("masked_bias") + # Attribute only for transformers>=4.45 + if hasattr(layer, "layer_idx"): + submodules.append("layer_idx") for attr in submodules: setattr(self, attr, getattr(layer, attr)) @@ -132,6 +136,11 @@ def __init__(self, layer: "nn.Module", config: "PretrainedConfig"): self.module_mapping = None submodules = ["rotary_emb", "query_key_value", "dense", "bias", "masked_bias", "norm_factor"] + + # Attribute only for transformers>=4.45 + if hasattr(layer, "layer_idx"): + submodules.append("layer_idx") + for attr in submodules: setattr(self, attr, getattr(layer, attr)) @@ -160,6 +169,11 @@ def __init__(self, layer: "nn.Module", config: "PretrainedConfig"): self.module_mapping = None submodules = ["attn_dropout", "resid_dropout", "k_proj", "v_proj", "q_proj", "out_proj", "bias", "masked_bias"] + + # Attribute only for transformers>=4.45 + if hasattr(layer, "layer_idx"): + submodules.append("layer_idx") + for attr in submodules: setattr(self, attr, getattr(layer, attr)) @@ -253,6 +267,10 @@ def __init__(self, layer: "nn.Module", config: "PretrainedConfig"): if hasattr(layer, "causal_mask"): submodules.append("causal_mask") + # Attribute only for transformers>=4.45 + if hasattr(layer, "layer_idx"): + submodules.append("layer_idx") + for attr in submodules: setattr(self, attr, getattr(layer, attr)) From 3572a0b4d6f96019b82101f23ba2f80b19791500 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 27 Sep 2024 14:59:39 +0200 Subject: [PATCH 17/22] format --- optimum/bettertransformer/models/attention.py | 3 +-- optimum/bettertransformer/models/decoder_models.py | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/optimum/bettertransformer/models/attention.py b/optimum/bettertransformer/models/attention.py index 053f283f18..22b8faf1c2 100644 --- a/optimum/bettertransformer/models/attention.py +++ b/optimum/bettertransformer/models/attention.py @@ -91,6 +91,7 @@ def gpt2_wrapped_scaled_dot_product( return sdpa_result, None + # Adapted from transformers.models.gptj.modeling_gptj.GPTJAttention._attn def gptj_wrapped_scaled_dot_product( self, @@ -131,9 +132,7 @@ def gptj_wrapped_scaled_dot_product( # causal_mask is always [True, ..., True] otherwise, so executing this # is unnecessary if query_length > 1: - if not check_if_transformers_greater("4.44.99"): - causal_mask = self.bias[:, :, key_length - query_length : key_length, :key_length].to(torch.bool) causal_mask = torch.where(causal_mask, 0, mask_value) diff --git a/optimum/bettertransformer/models/decoder_models.py b/optimum/bettertransformer/models/decoder_models.py index 11616162f3..1fb7fe6fbf 100644 --- a/optimum/bettertransformer/models/decoder_models.py +++ b/optimum/bettertransformer/models/decoder_models.py @@ -43,8 +43,8 @@ bloom_forward, codegen_wrapped_scaled_dot_product, gpt2_wrapped_scaled_dot_product, - gptj_wrapped_scaled_dot_product, gpt_neo_wrapped_scaled_dot_product, + gptj_wrapped_scaled_dot_product, opt_forward, t5_forward, ) @@ -169,7 +169,7 @@ def __init__(self, layer: "nn.Module", config: "PretrainedConfig"): self.module_mapping = None submodules = ["attn_dropout", "resid_dropout", "k_proj", "v_proj", "q_proj", "out_proj", "bias", "masked_bias"] - + # Attribute only for transformers>=4.45 if hasattr(layer, "layer_idx"): submodules.append("layer_idx") @@ -270,7 +270,7 @@ def __init__(self, layer: "nn.Module", config: "PretrainedConfig"): # Attribute only for transformers>=4.45 if hasattr(layer, "layer_idx"): submodules.append("layer_idx") - + for attr in submodules: setattr(self, attr, getattr(layer, attr)) From d6e97cf224e2f65ea4c7d02c85934c381ee7b51c Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 27 Sep 2024 15:02:58 +0200 Subject: [PATCH 18/22] add warnign deprecation bettertransformer --- optimum/bettertransformer/transformation.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/optimum/bettertransformer/transformation.py b/optimum/bettertransformer/transformation.py index 2105e19987..a101757b6f 100644 --- a/optimum/bettertransformer/transformation.py +++ b/optimum/bettertransformer/transformation.py @@ -206,6 +206,10 @@ def transform( The converted model if the conversion has been successful. """ + logger.warning( + "The class `optimum.bettertransformers.transformation.BetterTransformer` is deprecated and will be removed in a future release." + ) + hf_config = model.config if hf_config.model_type in ["falcon", "gpt_bigcode", "llama", "whisper"]: raise ValueError( From 0e2ed872a457bbb6d32af28e5636f3b35a6d8044 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 27 Sep 2024 15:49:36 +0200 Subject: [PATCH 19/22] bettertransformers fixes --- optimum/bettertransformer/models/decoder_models.py | 4 ++-- tests/bettertransformer/testing_utils.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/optimum/bettertransformer/models/decoder_models.py b/optimum/bettertransformer/models/decoder_models.py index 1fb7fe6fbf..52d28d076d 100644 --- a/optimum/bettertransformer/models/decoder_models.py +++ b/optimum/bettertransformer/models/decoder_models.py @@ -171,8 +171,8 @@ def __init__(self, layer: "nn.Module", config: "PretrainedConfig"): submodules = ["attn_dropout", "resid_dropout", "k_proj", "v_proj", "q_proj", "out_proj", "bias", "masked_bias"] # Attribute only for transformers>=4.45 - if hasattr(layer, "layer_idx"): - submodules.append("layer_idx") + if hasattr(layer, "layer_id"): + submodules.append("layer_id") for attr in submodules: setattr(self, attr, getattr(layer, attr)) diff --git a/tests/bettertransformer/testing_utils.py b/tests/bettertransformer/testing_utils.py index e9e2edd979..098882180a 100644 --- a/tests/bettertransformer/testing_utils.py +++ b/tests/bettertransformer/testing_utils.py @@ -59,12 +59,12 @@ # "llama": "fxmarty/tiny-llama-fast-tokenizer", # "llama-gqa": "noamwies/llama-test-gqa-with-better-transformer", "m2m_100": "hf-internal-testing/tiny-random-nllb", - "marian": "fxmarty/tiny-marian", # the other tiny ones have a too small max_position_embeddings + "marian": "optimum-internal-testing/tiny-random-marian", # the other tiny ones have a too small max_position_embeddings "markuplm": "hf-internal-testing/tiny-random-MarkupLMModel", "mbart": "hf-internal-testing/tiny-random-mbart", "opt": "hf-internal-testing/tiny-random-OPTModel", "pegasus": "hf-internal-testing/tiny-random-PegasusModel", - "prophetnet": "hirotasoshu/tiny-random-prophetnet", # the other tiny ones have a too small max_position_embeddings + "prophetnet": "optimum-internal-testing/tiny-random-prophetnet", # the other tiny ones have a too small max_position_embeddings "rembert": "hf-internal-testing/tiny-random-RemBertModel", "roberta": "hf-internal-testing/tiny-random-RobertaModel", "rocbert": "hf-internal-testing/tiny-random-RoCBertModel", From bc28f035b78a1fd2aa8f4e25022def6a81347d7b Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 30 Sep 2024 14:19:11 +0200 Subject: [PATCH 20/22] disable transformers 4.45 for onnx export --- optimum/exporters/onnx/convert.py | 5 +++++ setup.py | 7 +++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/optimum/exporters/onnx/convert.py b/optimum/exporters/onnx/convert.py index 7142d5be4a..f2bf95f3e3 100644 --- a/optimum/exporters/onnx/convert.py +++ b/optimum/exporters/onnx/convert.py @@ -26,6 +26,7 @@ import numpy as np import onnx +import transformers from transformers.modeling_utils import get_parameter_dtype from transformers.utils import is_tf_available, is_torch_available @@ -1000,6 +1001,10 @@ def onnx_export_from_model( >>> onnx_export_from_model(model, output="gpt2_onnx/") ``` """ + if check_if_transformers_greater("4.44.99"): + raise ImportError( + f"ONNX conversion disabled for now for transformers version greater than v4.45, found {transformers.__version__}" + ) TasksManager.standardize_model_attributes(model) diff --git a/setup.py b/setup.py index 231dc9110e..24c1ae1cd4 100644 --- a/setup.py +++ b/setup.py @@ -54,6 +54,7 @@ "datasets>=1.2.1", "evaluate", "protobuf>=3.20.1", + "transformers<4.45.0", ], "onnxruntime-gpu": [ "onnx", @@ -62,9 +63,10 @@ "evaluate", "protobuf>=3.20.1", "accelerate", # ORTTrainer requires it. + "transformers<4.45.0", ], - "exporters": ["onnx", "onnxruntime", "timm"], - "exporters-gpu": ["onnx", "onnxruntime-gpu", "timm"], + "exporters": ["onnx", "onnxruntime", "timm", "transformers<4.45.0"], + "exporters-gpu": ["onnx", "onnxruntime-gpu", "timm", "transformers<4.45.0"], "exporters-tf": [ "tensorflow>=2.4,<=2.12.1", "tf2onnx", @@ -75,6 +77,7 @@ "numpy<1.24.0", "datasets<=2.16", "transformers[sentencepiece]>=4.26,<4.38", + "transformers<4.45.0", ], "diffusers": ["diffusers"], "intel": "optimum-intel>=1.18.0", From e7d3ba42cdc1b97a33dc0843417f9224442c6b45 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 30 Sep 2024 15:28:18 +0200 Subject: [PATCH 21/22] update model ID --- tests/onnxruntime/utils_onnxruntime_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index 804aa00f04..3a6af30df1 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -112,7 +112,7 @@ "layoutlm": "hf-internal-testing/tiny-random-LayoutLMModel", "layoutlmv3": "hf-internal-testing/tiny-random-LayoutLMv3Model", "longt5": "hf-internal-testing/tiny-random-LongT5Model", - "llama": "fxmarty/tiny-llama-fast-tokenizer", + "llama": "optimum-internal-testing/tiny-random-llama", "m2m_100": "hf-internal-testing/tiny-random-m2m_100", "marian": "echarlaix/tiny-random-marian", "mbart": "hf-internal-testing/tiny-random-mbart", From e146328029bbdc7320a7dbccf95c2a93ddf5dd89 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 30 Sep 2024 15:57:05 +0200 Subject: [PATCH 22/22] udpate model id --- tests/onnxruntime/test_modeling.py | 2 +- tests/onnxruntime/utils_onnxruntime_tests.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index a19dbcd1a8..4b44acb38a 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -3147,7 +3147,7 @@ class ORTModelForAudioClassificationIntegrationTest(ORTModelTestMixin): "wavlm", "wav2vec2", "wav2vec2-conformer", - # "whisper", saving of the model's config is broken in transformers v4.45.0 + "whisper", ] FULL_GRID = {"model_arch": SUPPORTED_ARCHITECTURES} diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index 3a6af30df1..a51249781c 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -151,7 +151,7 @@ "unispeech_sat": "hf-internal-testing/tiny-random-UnispeechSatModel", "vision-encoder-decoder": "hf-internal-testing/tiny-random-VisionEncoderDecoderModel-vit-gpt2", "vit": "hf-internal-testing/tiny-random-vit", - "whisper": "openai/whisper-tiny.en", # hf-internal-testing ones are broken + "whisper": "optimum-internal-testing/tiny-random-whisper", "wav2vec2": "hf-internal-testing/tiny-random-Wav2Vec2Model", "wav2vec2-conformer": "hf-internal-testing/tiny-random-wav2vec2-conformer", "wavlm": "hf-internal-testing/tiny-random-WavlmModel",