initial commit

neuralmagic · Dec 18, 2023 · 7f3eb12 · 7f3eb12
1 parent 5ee36d6
commit 7f3eb12
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 78 deletions.
diff --git a/src/deepsparse/transformers/helpers.py b/src/deepsparse/transformers/helpers.py
@@ -30,6 +30,8 @@
 from onnx import ModelProto
 
 from deepsparse.log import get_main_logger
+from deepsparse.utils.onnx import MODEL_ONNX_NAME, truncate_onnx_model
+from sparsezoo import Model
 from deepsparse.utils.onnx import (
     _MODEL_DIR_ONNX_NAME,
     model_to_path,
@@ -39,6 +41,7 @@
 
 
 __all__ = [
+    "setup_transformers_pipeline",
     "get_deployment_path",
     "setup_transformers_pipeline",
     "overwrite_transformer_onnx_model_inputs",
@@ -55,6 +58,7 @@ def setup_transformers_pipeline(
     sequence_length: int,
     tokenizer_padding_side: str = "left",
     engine_kwargs: Optional[Dict] = None,
+    onnx_model_name: Optional[str] = None,
 ) -> Tuple[
     str, transformers.PretrainedConfig, transformers.PreTrainedTokenizer, Dict[str, Any]
 ]:
@@ -66,48 +70,46 @@ def setup_transformers_pipeline(
     :param tokenizer_padding_side: The side to pad on for the tokenizer,
         either "left" or "right"
     :param engine_kwargs: The kwargs to pass to the engine
+    :param onnx_model_name: The name of the onnx model to be loaded.
+        If not specified, defaults are used (see setup_onnx_file_path)
     :return The model path, config, tokenizer, and engine kwargs
     """
-    model_path, config, tokenizer = fetch_onnx_file_path(model_path, sequence_length)
+    model_path, config, tokenizer = setup_onnx_file_path(
+        model_path, sequence_length, onnx_model_name
+    )
 
     tokenizer.padding_side = tokenizer_padding_side
     if not tokenizer.pad_token:
         tokenizer.pad_token = tokenizer.eos_token
 
     engine_kwargs = engine_kwargs or {}
-    if engine_kwargs.get("model_path"):
-        raise ValueError(
-            "The engine kwargs already specify "
-            f"a model path: {engine_kwargs['model_path']}, "
-            f"but a model path was also provided: {model_path}. "
-            "Please only provide one."
-        )
     engine_kwargs["model_path"] = model_path
     return model_path, config, tokenizer, engine_kwargs
 
 
-def fetch_onnx_file_path(
+def setup_onnx_file_path(
     model_path: str,
     sequence_length: int,
-    task: Optional[str] = None,
+    onnx_model_name: Optional[str] = None,
 ) -> Tuple[str, transformers.PretrainedConfig, transformers.PreTrainedTokenizer]:
     """
     Parses ONNX model from the `model_path` provided. It additionally
     creates config and tokenizer objects from the `deployment path`,
     derived from the `model_path` provided.
     :param model_path: path to the model to be parsed
     :param sequence_length: maximum sequence length of the model
+    :param onnx_model_name: optionally, the precise name of the ONNX model
+        of interest may be specified. If not specified, the default ONNX model
+        name will be used (refer to `get_deployment_path` for details)
     :return: file path to the processed ONNX file for the engine to compile
     """
-    deployment_path, onnx_path = get_deployment_path(model_path)
+    deployment_path, onnx_path = get_deployment_path(model_path, onnx_model_name)
 
     hf_logger = logging.getLogger("transformers")
     hf_logger_level = hf_logger.level
     hf_logger.setLevel(logging.ERROR)
 
-    config = transformers.PretrainedConfig.from_pretrained(
-        deployment_path, finetuning_task=task
-    )
+    config = transformers.PretrainedConfig.from_pretrained(deployment_path)
     hf_logger.setLevel(hf_logger_level)
 
     trust_remote_code = False
@@ -145,13 +147,13 @@ def get_deployment_path(model_path: str) -> Tuple[str, str]:
     if os.path.isdir(model_path):
         model_files = os.listdir(model_path)
 
-        if _MODEL_DIR_ONNX_NAME not in model_files:
+        if MODEL_ONNX_NAME not in model_files:
             raise ValueError(
-                f"{_MODEL_DIR_ONNX_NAME} not found in transformers model directory "
+                f"{MODEL_ONNX_NAME} not found in transformers model directory "
                 f"{model_path}. Be sure that an export of the model is written to "
-                f"{os.path.join(model_path, _MODEL_DIR_ONNX_NAME)}"
+                f"{os.path.join(model_path, MODEL_ONNX_NAME)}"
             )
-        return model_path, os.path.join(model_path, _MODEL_DIR_ONNX_NAME)
+        return model_path, os.path.join(model_path, MODEL_ONNX_NAME)
 
     elif model_path.startswith("zoo:") or model_path.startswith("hf:"):
         onnx_model_path = model_to_path(model_path)

diff --git a/src/deepsparse/utils/onnx.py b/src/deepsparse/utils/onnx.py
@@ -56,12 +56,12 @@
     "has_model_kv_cache",
     "CACHE_INPUT_PREFIX",
     "CACHE_OUTPUT_PREFIX",
-    "_MODEL_DIR_ONNX_NAME",
+    "MODEL_ONNX_NAME",
 ]
 
 _LOGGER = logging.getLogger(__name__)
 
-_MODEL_DIR_ONNX_NAME = "model.onnx"
+MODEL_ONNX_NAME = "model.onnx"
 CACHE_INPUT_PREFIX = "past_key_values"
 CACHE_OUTPUT_PREFIX = "present"
 
@@ -132,7 +132,7 @@ def model_to_path(model: Union[str, Model, File]) -> str:
         model.deployment.path
 
         # default to the main onnx file for the model
-        model = model.deployment.get_file(_MODEL_DIR_ONNX_NAME).path
+        model = model.deployment.get_file(MODEL_ONNX_NAME).path
 
     elif File is not object and isinstance(model, File):
         # get the downloaded_path -- will auto download if not on local system
@@ -161,7 +161,7 @@ def model_to_path(model: Union[str, Model, File]) -> str:
 
     model_path = Path(model)
     if model_path.is_dir():
-        return str(model_path / _MODEL_DIR_ONNX_NAME)
+        return str(model_path / MODEL_ONNX_NAME)
 
     return model
 

diff --git a/src/deepsparse/v2/text_generation/pipeline.py b/src/deepsparse/v2/text_generation/pipeline.py
@@ -12,8 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Dict
+from typing import Dict, Optional
 
+from deepsparse.transformers.helpers import setup_transformers_pipeline
 from deepsparse.transformers.utils.helpers import process_generation_config
 from deepsparse.v2.pipeline import Pipeline
 from deepsparse.v2.routers import GraphRouter
@@ -45,23 +46,20 @@ def __init__(
         internal_kv_cache: bool = True,
         force_max_tokens: bool = False,
         generation_config=None,
-        engine_kwargs: Dict = None,
+        engine_kwargs: Optional[Dict] = None,
     ):
+        (
+            self.model_path,
+            self.config,
+            self.tokenizer,
+            engine_kwargs,
+        ) = setup_transformers_pipeline(
+            model_path, sequence_length, engine_kwargs=engine_kwargs
+        )
 
         pipeline_state = PipelineState()
         pipeline_state_vals = {}
 
-        # TODO: The code below will be replaced with a transformers set-up Operator.
-        self.tokenizer = None
-        model_path = self.setup_onnx_file_path(model_path, sequence_length)
-        self.tokenizer.padding_side = "left"
-        if not self.tokenizer.pad_token:
-            self.tokenizer.pad_token = self.tokenizer.eos_token
-
-        if not engine_kwargs:
-            engine_kwargs = {}
-        engine_kwargs["model_path"] = model_path
-
         if internal_kv_cache and engine_kwargs.get("engine_type") == "onnxruntime":
             internal_kv_cache = False
 
@@ -80,7 +78,7 @@ def __init__(
         )
 
         # NOTE: Currently using pipeline state. Can swap to simply pass in the
-        # attributes to the specific Operator that neeed them, as class attributes.
+        # attributes to the specific Operator that need them, as class attributes.
         pipeline_state_vals[
             "onnx_input_names_no_cache"
         ] = single_engine_operator.onnx_input_names_no_cache
@@ -180,45 +178,3 @@ def __init__(
         super().__init__(
             ops=ops, router=router, schedulers=scheduler, pipeline_state=pipeline_state
         )
-
-    # TODO: Move to be part of a generic transformers set-up Operator.
-    def setup_onnx_file_path(self, model_path, sequence_length) -> str:
-        import logging
-
-        import transformers
-        from transformers import AutoTokenizer
-
-        from deepsparse.transformers.helpers import get_deployment_path
-
-        """
-        Parses ONNX model from the `model_path` provided. It additionally
-        creates config and tokenizer objects from the `deployment path`,
-        derived from the `model_path` provided.
-
-        :return: file path to the processed ONNX file for the engine to compile
-        """
-        deployment_path, onnx_path = get_deployment_path(model_path)
-
-        hf_logger = logging.getLogger("transformers")
-        hf_logger_level = hf_logger.level
-        hf_logger.setLevel(logging.ERROR)
-        self.config = transformers.PretrainedConfig.from_pretrained(
-            deployment_path,
-            finetuning_task=self.task if hasattr(self, "task") else None,
-        )
-        hf_logger.setLevel(hf_logger_level)
-
-        self._trust_remote_code = False
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            deployment_path,
-            trust_remote_code=self._trust_remote_code,
-            model_max_length=sequence_length,
-        )
-
-        if not self.config or not self.tokenizer:
-            raise RuntimeError(
-                "Invalid config or tokenizer provided. Please provide "
-                "paths to the files or ensure they exist in the `model_path` provided. "
-                "See `tokenizer` and `config` arguments for details."
-            )
-        return onnx_path