Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
dbogunowicz committed Dec 18, 2023
1 parent 5ee36d6 commit 7f3eb12
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 78 deletions.
38 changes: 20 additions & 18 deletions src/deepsparse/transformers/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
from onnx import ModelProto

from deepsparse.log import get_main_logger
from deepsparse.utils.onnx import MODEL_ONNX_NAME, truncate_onnx_model
from sparsezoo import Model
from deepsparse.utils.onnx import (
_MODEL_DIR_ONNX_NAME,
model_to_path,
Expand All @@ -39,6 +41,7 @@


__all__ = [
"setup_transformers_pipeline",
"get_deployment_path",
"setup_transformers_pipeline",
"overwrite_transformer_onnx_model_inputs",
Expand All @@ -55,6 +58,7 @@ def setup_transformers_pipeline(
sequence_length: int,
tokenizer_padding_side: str = "left",
engine_kwargs: Optional[Dict] = None,
onnx_model_name: Optional[str] = None,
) -> Tuple[
str, transformers.PretrainedConfig, transformers.PreTrainedTokenizer, Dict[str, Any]
]:
Expand All @@ -66,48 +70,46 @@ def setup_transformers_pipeline(
:param tokenizer_padding_side: The side to pad on for the tokenizer,
either "left" or "right"
:param engine_kwargs: The kwargs to pass to the engine
:param onnx_model_name: The name of the onnx model to be loaded.
If not specified, defaults are used (see setup_onnx_file_path)
:return The model path, config, tokenizer, and engine kwargs
"""
model_path, config, tokenizer = fetch_onnx_file_path(model_path, sequence_length)
model_path, config, tokenizer = setup_onnx_file_path(
model_path, sequence_length, onnx_model_name
)

tokenizer.padding_side = tokenizer_padding_side
if not tokenizer.pad_token:
tokenizer.pad_token = tokenizer.eos_token

engine_kwargs = engine_kwargs or {}
if engine_kwargs.get("model_path"):
raise ValueError(
"The engine kwargs already specify "
f"a model path: {engine_kwargs['model_path']}, "
f"but a model path was also provided: {model_path}. "
"Please only provide one."
)
engine_kwargs["model_path"] = model_path
return model_path, config, tokenizer, engine_kwargs


def fetch_onnx_file_path(
def setup_onnx_file_path(
model_path: str,
sequence_length: int,
task: Optional[str] = None,
onnx_model_name: Optional[str] = None,
) -> Tuple[str, transformers.PretrainedConfig, transformers.PreTrainedTokenizer]:
"""
Parses ONNX model from the `model_path` provided. It additionally
creates config and tokenizer objects from the `deployment path`,
derived from the `model_path` provided.
:param model_path: path to the model to be parsed
:param sequence_length: maximum sequence length of the model
:param onnx_model_name: optionally, the precise name of the ONNX model
of interest may be specified. If not specified, the default ONNX model
name will be used (refer to `get_deployment_path` for details)
:return: file path to the processed ONNX file for the engine to compile
"""
deployment_path, onnx_path = get_deployment_path(model_path)
deployment_path, onnx_path = get_deployment_path(model_path, onnx_model_name)

hf_logger = logging.getLogger("transformers")
hf_logger_level = hf_logger.level
hf_logger.setLevel(logging.ERROR)

config = transformers.PretrainedConfig.from_pretrained(
deployment_path, finetuning_task=task
)
config = transformers.PretrainedConfig.from_pretrained(deployment_path)
hf_logger.setLevel(hf_logger_level)

trust_remote_code = False
Expand Down Expand Up @@ -145,13 +147,13 @@ def get_deployment_path(model_path: str) -> Tuple[str, str]:
if os.path.isdir(model_path):
model_files = os.listdir(model_path)

if _MODEL_DIR_ONNX_NAME not in model_files:
if MODEL_ONNX_NAME not in model_files:
raise ValueError(
f"{_MODEL_DIR_ONNX_NAME} not found in transformers model directory "
f"{MODEL_ONNX_NAME} not found in transformers model directory "
f"{model_path}. Be sure that an export of the model is written to "
f"{os.path.join(model_path, _MODEL_DIR_ONNX_NAME)}"
f"{os.path.join(model_path, MODEL_ONNX_NAME)}"
)
return model_path, os.path.join(model_path, _MODEL_DIR_ONNX_NAME)
return model_path, os.path.join(model_path, MODEL_ONNX_NAME)

elif model_path.startswith("zoo:") or model_path.startswith("hf:"):
onnx_model_path = model_to_path(model_path)
Expand Down
8 changes: 4 additions & 4 deletions src/deepsparse/utils/onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@
"has_model_kv_cache",
"CACHE_INPUT_PREFIX",
"CACHE_OUTPUT_PREFIX",
"_MODEL_DIR_ONNX_NAME",
"MODEL_ONNX_NAME",
]

_LOGGER = logging.getLogger(__name__)

_MODEL_DIR_ONNX_NAME = "model.onnx"
MODEL_ONNX_NAME = "model.onnx"
CACHE_INPUT_PREFIX = "past_key_values"
CACHE_OUTPUT_PREFIX = "present"

Expand Down Expand Up @@ -132,7 +132,7 @@ def model_to_path(model: Union[str, Model, File]) -> str:
model.deployment.path

# default to the main onnx file for the model
model = model.deployment.get_file(_MODEL_DIR_ONNX_NAME).path
model = model.deployment.get_file(MODEL_ONNX_NAME).path

elif File is not object and isinstance(model, File):
# get the downloaded_path -- will auto download if not on local system
Expand Down Expand Up @@ -161,7 +161,7 @@ def model_to_path(model: Union[str, Model, File]) -> str:

model_path = Path(model)
if model_path.is_dir():
return str(model_path / _MODEL_DIR_ONNX_NAME)
return str(model_path / MODEL_ONNX_NAME)

return model

Expand Down
68 changes: 12 additions & 56 deletions src/deepsparse/v2/text_generation/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Dict
from typing import Dict, Optional

from deepsparse.transformers.helpers import setup_transformers_pipeline
from deepsparse.transformers.utils.helpers import process_generation_config
from deepsparse.v2.pipeline import Pipeline
from deepsparse.v2.routers import GraphRouter
Expand Down Expand Up @@ -45,23 +46,20 @@ def __init__(
internal_kv_cache: bool = True,
force_max_tokens: bool = False,
generation_config=None,
engine_kwargs: Dict = None,
engine_kwargs: Optional[Dict] = None,
):
(
self.model_path,
self.config,
self.tokenizer,
engine_kwargs,
) = setup_transformers_pipeline(
model_path, sequence_length, engine_kwargs=engine_kwargs
)

pipeline_state = PipelineState()
pipeline_state_vals = {}

# TODO: The code below will be replaced with a transformers set-up Operator.
self.tokenizer = None
model_path = self.setup_onnx_file_path(model_path, sequence_length)
self.tokenizer.padding_side = "left"
if not self.tokenizer.pad_token:
self.tokenizer.pad_token = self.tokenizer.eos_token

if not engine_kwargs:
engine_kwargs = {}
engine_kwargs["model_path"] = model_path

if internal_kv_cache and engine_kwargs.get("engine_type") == "onnxruntime":
internal_kv_cache = False

Expand All @@ -80,7 +78,7 @@ def __init__(
)

# NOTE: Currently using pipeline state. Can swap to simply pass in the
# attributes to the specific Operator that neeed them, as class attributes.
# attributes to the specific Operator that need them, as class attributes.
pipeline_state_vals[
"onnx_input_names_no_cache"
] = single_engine_operator.onnx_input_names_no_cache
Expand Down Expand Up @@ -180,45 +178,3 @@ def __init__(
super().__init__(
ops=ops, router=router, schedulers=scheduler, pipeline_state=pipeline_state
)

# TODO: Move to be part of a generic transformers set-up Operator.
def setup_onnx_file_path(self, model_path, sequence_length) -> str:
import logging

import transformers
from transformers import AutoTokenizer

from deepsparse.transformers.helpers import get_deployment_path

"""
Parses ONNX model from the `model_path` provided. It additionally
creates config and tokenizer objects from the `deployment path`,
derived from the `model_path` provided.
:return: file path to the processed ONNX file for the engine to compile
"""
deployment_path, onnx_path = get_deployment_path(model_path)

hf_logger = logging.getLogger("transformers")
hf_logger_level = hf_logger.level
hf_logger.setLevel(logging.ERROR)
self.config = transformers.PretrainedConfig.from_pretrained(
deployment_path,
finetuning_task=self.task if hasattr(self, "task") else None,
)
hf_logger.setLevel(hf_logger_level)

self._trust_remote_code = False
self.tokenizer = AutoTokenizer.from_pretrained(
deployment_path,
trust_remote_code=self._trust_remote_code,
model_max_length=sequence_length,
)

if not self.config or not self.tokenizer:
raise RuntimeError(
"Invalid config or tokenizer provided. Please provide "
"paths to the files or ensure they exist in the `model_path` provided. "
"See `tokenizer` and `config` arguments for details."
)
return onnx_path

0 comments on commit 7f3eb12

Please sign in to comment.