neuralmagic · dbogunowicz · Nov 20, 2023 · Nov 3, 2023 · Nov 7, 2023 · Nov 7, 2023
diff --git a/src/deepsparse/transformers/helpers.py b/src/deepsparse/transformers/helpers.py
@@ -17,24 +17,26 @@
 """
 
 
+import logging
 import os
 import re
 from pathlib import Path
 from tempfile import NamedTemporaryFile
-from typing import List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import numpy
 import onnx
+import transformers
 from onnx import ModelProto
 
 from deepsparse.log import get_main_logger
-from deepsparse.utils.onnx import _MODEL_DIR_ONNX_NAME, truncate_onnx_model
+from deepsparse.utils.onnx import MODEL_ONNX_NAME, truncate_onnx_model
 from sparsezoo import Model
 from sparsezoo.utils import save_onnx
 
 
 __all__ = [
-    "get_deployment_path",
+    "setup_transformers_pipeline",
     "overwrite_transformer_onnx_model_inputs",
     "fix_numpy_types",
     "get_transformer_layer_init_names",
@@ -44,7 +46,94 @@
 _LOGGER = get_main_logger()
 
 
-def get_deployment_path(model_path: str) -> Tuple[str, str]:
+def setup_transformers_pipeline(
+    model_path: str,
+    sequence_length: int,
+    tokenizer_padding_side: str = "left",
+    engine_kwargs: Optional[Dict] = None,
+    onnx_model_name: Optional[str] = None,
+) -> Tuple[
+    str, transformers.PretrainedConfig, transformers.PreTrainedTokenizer, Dict[str, Any]
+]:
+    """
+    A helper function that sets up the model path, config, tokenizer,
+    and engine kwargs for a transformers model.
+    :param model_path: The path to the model to load
+    :param sequence_length: The sequence length to use for the model
+    :param tokenizer_padding_side: The side to pad on for the tokenizer,
+        either "left" or "right"
+    :param engine_kwargs: The kwargs to pass to the engine
+    :param onnx_model_name: The name of the onnx model to be loaded.
+        If not specified, defaults are used (see setup_onnx_file_path)
+    :return The model path, config, tokenizer, and engine kwargs
+    """
+    model_path, config, tokenizer = setup_onnx_file_path(
+        model_path, sequence_length, onnx_model_name
+    )
+
+    tokenizer.padding_side = tokenizer_padding_side
+    if not tokenizer.pad_token:
+        tokenizer.pad_token = tokenizer.eos_token
+
+    engine_kwargs = engine_kwargs or {}
+    if engine_kwargs.get("model_path"):
+        raise ValueError(
+            "The engine kwargs already specify "
+            f"a model path: {engine_kwargs['model_path']}, "
+            f"but a model path was also provided: {model_path}. "
+            "Please only provide one."
+        )
+    engine_kwargs["model_path"] = model_path
+    return model_path, config, tokenizer, engine_kwargs
+
+
+def setup_onnx_file_path(
+    model_path: str,
+    sequence_length: int,
+    onnx_model_name: Optional[str] = None,
+    task: Optional[str] = None,
+) -> Tuple[str, transformers.PretrainedConfig, transformers.PreTrainedTokenizer]:
+    """
+    Parses ONNX model from the `model_path` provided. It additionally
+    creates config and tokenizer objects from the `deployment path`,
+    derived from the `model_path` provided.
+    :param model_path: path to the model to be parsed
+    :param sequence_length: maximum sequence length of the model
+    :param onnx_model_name: optionally, the precise name of the ONNX model
+        of interest may be specified. If not specified, the default ONNX model
+        name will be used (refer to `get_deployment_path` for details)
+    :return: file path to the processed ONNX file for the engine to compile
+    """
+    deployment_path, onnx_path = get_deployment_path(model_path, onnx_model_name)
+
+    hf_logger = logging.getLogger("transformers")
+    hf_logger_level = hf_logger.level
+    hf_logger.setLevel(logging.ERROR)
+
+    config = transformers.PretrainedConfig.from_pretrained(
+        deployment_path, finetuning_task=task
+    )
+    hf_logger.setLevel(hf_logger_level)
+
+    trust_remote_code = False
+    tokenizer = transformers.AutoTokenizer.from_pretrained(
+        deployment_path,
+        trust_remote_code=trust_remote_code,
+        model_max_length=sequence_length,
+    )
+
+    if not config or not tokenizer:
+        raise RuntimeError(
+            "Invalid config or tokenizer provided. Please provide "
+            "paths to the files or ensure they exist in the `model_path` provided. "
+            "See `tokenizer` and `config` arguments for details."
+        )
+    return onnx_path, config, tokenizer
+
+
+def get_deployment_path(
+    model_path: str, onnx_model_name: Optional[str] = None
+) -> Tuple[str, str]:
     """
     Returns the path to the deployment directory
     for the given model path and the path to the mandatory
@@ -53,36 +142,39 @@ def get_deployment_path(model_path: str) -> Tuple[str, str]:
     for running the transformers model in the deepsparse pipeline
 
     :param model_path: path to model directory, sparsezoo stub, or ONNX file
+    :param onnx_model_name: name of the ONNX file to look for in the deployment
+        directory. Defaults to MODEL_ONNX_NAME
     :return: path to the deployment directory and path to the ONNX file inside
         the deployment directory
     """
+    onnx_model_name = onnx_model_name or MODEL_ONNX_NAME
     if os.path.isfile(model_path):
         # return the parent directory of the ONNX file
         return os.path.dirname(model_path), model_path
 
     if os.path.isdir(model_path):
         model_files = os.listdir(model_path)
 
-        if _MODEL_DIR_ONNX_NAME not in model_files:
+        if onnx_model_name not in model_files:
             raise ValueError(
-                f"{_MODEL_DIR_ONNX_NAME} not found in transformers model directory "
+                f"{onnx_model_name} not found in transformers model directory "
                 f"{model_path}. Be sure that an export of the model is written to "
-                f"{os.path.join(model_path, _MODEL_DIR_ONNX_NAME)}"
+                f"{os.path.join(model_path, onnx_model_name)}"
             )
-        return model_path, os.path.join(model_path, _MODEL_DIR_ONNX_NAME)
+        return model_path, os.path.join(model_path, onnx_model_name)
 
     elif model_path.startswith("zoo:"):
         zoo_model = Model(model_path)
         deployment_path = zoo_model.deployment_directory_path
-        return deployment_path, os.path.join(deployment_path, _MODEL_DIR_ONNX_NAME)
+        return deployment_path, os.path.join(deployment_path, onnx_model_name)
     elif model_path.startswith("hf:"):
         from huggingface_hub import snapshot_download
 
         deployment_path = snapshot_download(repo_id=model_path.replace("hf:", "", 1))
-        onnx_path = os.path.join(deployment_path, _MODEL_DIR_ONNX_NAME)
+        onnx_path = os.path.join(deployment_path, onnx_model_name)
         if not os.path.isfile(onnx_path):
             raise ValueError(
-                f"{_MODEL_DIR_ONNX_NAME} not found in transformers model directory "
+                f"{onnx_model_name} not found in transformers model directory "
                 f"{deployment_path}. Be sure that an export of the model is written to "
                 f"{onnx_path}"
             )

diff --git a/src/deepsparse/transformers/pipelines/pipeline.py b/src/deepsparse/transformers/pipelines/pipeline.py
@@ -16,19 +16,18 @@
 Base Pipeline class for transformers inference pipeline
 """
 
-import logging
+
 import warnings
 from pathlib import Path
 from typing import Any, Dict, List, Mapping, Optional, Union
 
 import numpy
 import transformers
-from transformers.models.auto import AutoTokenizer
 
 from deepsparse import Bucketable, Pipeline
+from deepsparse.transformers.helpers import overwrite_transformer_onnx_model_inputs
 from deepsparse.transformers.helpers import (
-    get_deployment_path,
-    overwrite_transformer_onnx_model_inputs,
+    setup_onnx_file_path as setup_onnx_file_path_v2,
 )
 
 
@@ -124,24 +123,15 @@ def setup_onnx_file_path(self) -> str:
 
         :return: file path to the processed ONNX file for the engine to compile
         """
-        deployment_path, onnx_path = get_deployment_path(self.model_path)
-
-        # temporarily set transformers logger to ERROR to avoid
-        # printing misleading warnings
-        hf_logger = logging.getLogger("transformers")
-        hf_logger_level = hf_logger.level
-        hf_logger.setLevel(logging.ERROR)
-        self.config = transformers.PretrainedConfig.from_pretrained(
-            deployment_path,
-            finetuning_task=self.task if hasattr(self, "task") else None,
-        )
-        hf_logger.setLevel(hf_logger_level)
-
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            deployment_path,
-            trust_remote_code=self._trust_remote_code,
-            model_max_length=self.sequence_length,
+        # we will be soon retiring V1 pipelines. This is why I am deciding
+        # to reuse the functions from V2 pipelines in the (soon) legacy pipelines
+        onnx_path, config, tokenizer = setup_onnx_file_path_v2(
+            model_path=self.model_path,
+            sequence_length=self.sequence_length,
+            task=self.task if hasattr(self, "task") else None,
         )
+        self.config = config
+        self.tokenizer = tokenizer
 
         if not self._delay_overwriting_inputs:
             # overwrite onnx graph to given required input shape
@@ -153,12 +143,6 @@ def setup_onnx_file_path(self) -> str:
                 onnx_path, max_length=self.sequence_length
             )
 
-        if not self.config or not self.tokenizer:
-            raise RuntimeError(
-                "Invalid config or tokenizer provided. Please provide "
-                "paths to the files or ensure they exist in the `model_path` provided. "
-                "See `tokenizer` and `config` arguments for details."
-            )
         return onnx_path
 
     def tokens_to_engine_input(

diff --git a/src/deepsparse/utils/onnx.py b/src/deepsparse/utils/onnx.py
@@ -56,12 +56,12 @@
     "has_model_kv_cache",
     "CACHE_INPUT_PREFIX",
     "CACHE_OUTPUT_PREFIX",
-    "_MODEL_DIR_ONNX_NAME",
+    "MODEL_ONNX_NAME",
 ]
 
 _LOGGER = logging.getLogger(__name__)
 
-_MODEL_DIR_ONNX_NAME = "model.onnx"
+MODEL_ONNX_NAME = "model.onnx"
 CACHE_INPUT_PREFIX = "past_key_values"
 CACHE_OUTPUT_PREFIX = "present"
 
@@ -132,7 +132,7 @@ def model_to_path(model: Union[str, Model, File]) -> str:
         model.deployment_directory_path
 
         # default to the main onnx file for the model
-        model = model.deployment.get_file(_MODEL_DIR_ONNX_NAME).path
+        model = model.deployment.get_file(MODEL_ONNX_NAME).path
 
     elif File is not object and isinstance(model, File):
         # get the downloaded_path -- will auto download if not on local system
@@ -146,7 +146,7 @@ def model_to_path(model: Union[str, Model, File]) -> str:
 
     model_path = Path(model)
     if model_path.is_dir():
-        return str(model_path / _MODEL_DIR_ONNX_NAME)
+        return str(model_path / MODEL_ONNX_NAME)
 
     return model
 

diff --git a/src/deepsparse/v2/text_generation/pipeline.py b/src/deepsparse/v2/text_generation/pipeline.py
@@ -12,8 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Dict
+from typing import Dict, Optional
 
+from deepsparse.transformers.helpers import setup_transformers_pipeline
 from deepsparse.transformers.utils.helpers import process_generation_config
 from deepsparse.utils import split_engine_inputs
 from deepsparse.v2.pipeline import Pipeline
@@ -47,23 +48,20 @@ def __init__(
         internal_kv_cache: bool = True,
         force_max_tokens: bool = False,
         generation_config=None,
-        engine_kwargs: Dict = None,
+        engine_kwargs: Optional[Dict] = None,
     ):
+        (
+            self.model_path,
+            self.config,
+            self.tokenizer,
+            engine_kwargs,
+        ) = setup_transformers_pipeline(
+            model_path, sequence_length, engine_kwargs=engine_kwargs
+        )
 
         pipeline_state = PipelineState()
         pipeline_state_vals = {}
 
-        # TODO: The code below will be replaced with a transformers set-up Operator.
-        self.tokenizer = None
-        model_path = self.setup_onnx_file_path(model_path, sequence_length)
-        self.tokenizer.padding_side = "left"
-        if not self.tokenizer.pad_token:
-            self.tokenizer.pad_token = self.tokenizer.eos_token
-
-        if not engine_kwargs:
-            engine_kwargs = {}
-        engine_kwargs["model_path"] = model_path
-
         if internal_kv_cache and engine_kwargs.get("engine_type") == "onnxruntime":
             internal_kv_cache = False
 
@@ -82,7 +80,7 @@ def __init__(
         )
 
         # NOTE: Currently using pipeline state. Can swap to simply pass in the
-        # attributes to the specific Operator that neeed them, as class attributes.
+        # attributes to the specific Operator that need them, as class attributes.
         pipeline_state_vals[
             "onnx_input_names_no_cache"
         ] = single_engine_operator.onnx_input_names_no_cache
@@ -196,45 +194,3 @@ def expand_inputs(self, items, batch_size):
 
     def condense_inputs(self, *args, **kwargs):
         return args[0], kwargs
-
-    # TODO: Move to be part of a generic transformers set-up Operator.
-    def setup_onnx_file_path(self, model_path, sequence_length) -> str:
-        import logging
-
-        import transformers
-        from transformers import AutoTokenizer
-
-        from deepsparse.transformers.helpers import get_deployment_path
-
-        """
-        Parses ONNX model from the `model_path` provided. It additionally
-        creates config and tokenizer objects from the `deployment path`,
-        derived from the `model_path` provided.
-
-        :return: file path to the processed ONNX file for the engine to compile
-        """
-        deployment_path, onnx_path = get_deployment_path(model_path)
-
-        hf_logger = logging.getLogger("transformers")
-        hf_logger_level = hf_logger.level
-        hf_logger.setLevel(logging.ERROR)
-        self.config = transformers.PretrainedConfig.from_pretrained(
-            deployment_path,
-            finetuning_task=self.task if hasattr(self, "task") else None,
-        )
-        hf_logger.setLevel(hf_logger_level)
-
-        self._trust_remote_code = False
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            deployment_path,
-            trust_remote_code=self._trust_remote_code,
-            model_max_length=sequence_length,
-        )
-
-        if not self.config or not self.tokenizer:
-            raise RuntimeError(
-                "Invalid config or tokenizer provided. Please provide "
-                "paths to the files or ensure they exist in the `model_path` provided. "
-                "See `tokenizer` and `config` arguments for details."
-            )
-        return onnx_path