Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Pipeline Refactor][Text-Generation] Refactor transformers helpers functions #1394

Merged
merged 24 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
f18d5f3
add split/join functionality
dsikka Nov 3, 2023
2c4d231
update router to include split/join in parent class, refactor pipelin…
dsikka Nov 7, 2023
672ca20
process multiple generations
dsikka Nov 7, 2023
304eb35
initial commit
dbogunowicz Nov 8, 2023
71515ac
fix error
dbogunowicz Nov 8, 2023
6f1b175
Merge remote-tracking branch 'origin/features/v2/run_multiple' into f…
dbogunowicz Nov 9, 2023
a508342
unit testing for text generation operators
dsikka Nov 6, 2023
cbb0e86
additional changes
dsikka Nov 7, 2023
2541581
unit testing completion
dsikka Nov 8, 2023
8c8989d
remove debug
dsikka Nov 8, 2023
f8d75e3
fix
dsikka Nov 8, 2023
fd1e466
add todo
dsikka Nov 8, 2023
64c0552
more clean-up
dsikka Nov 8, 2023
913665a
fix test
dsikka Nov 8, 2023
e15521f
add docstrings/comments
dsikka Nov 8, 2023
379481e
break out tests to individual unit test files; add conftest and make …
dsikka Nov 9, 2023
a90a20a
Merge remote-tracking branch 'origin/features/v2/unit_testing' into f…
dbogunowicz Nov 10, 2023
c0c4240
Merge branch 'v2' into feature/damian/v2/factor_out_transformation_utils
dbogunowicz Nov 10, 2023
4f248dd
Delete tests/deepsparse/v2/unit/text_generation/test_msic.py
dbogunowicz Nov 13, 2023
98f7a6d
Merge branch 'v2' into feature/damian/v2/factor_out_transformation_utils
dbogunowicz Nov 14, 2023
d1683b4
Merge branch 'v2' into feature/damian/v2/factor_out_transformation_utils
dbogunowicz Nov 14, 2023
51c4ee6
pipeline runs, but incorrectly
dbogunowicz Nov 17, 2023
a4f6f19
Revert "pipeline runs, but incorrectly"
dbogunowicz Nov 17, 2023
71f4c6d
PR review comments
dbogunowicz Nov 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 103 additions & 11 deletions src/deepsparse/transformers/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,26 @@
"""


import logging
import os
import re
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Union

import numpy
import onnx
import transformers
from onnx import ModelProto

from deepsparse.log import get_main_logger
from deepsparse.utils.onnx import _MODEL_DIR_ONNX_NAME, truncate_onnx_model
from deepsparse.utils.onnx import MODEL_ONNX_NAME, truncate_onnx_model
from sparsezoo import Model
from sparsezoo.utils import save_onnx


__all__ = [
"get_deployment_path",
"setup_transformers_pipeline",
"overwrite_transformer_onnx_model_inputs",
"fix_numpy_types",
"get_transformer_layer_init_names",
Expand All @@ -44,7 +46,94 @@
_LOGGER = get_main_logger()


def get_deployment_path(model_path: str) -> Tuple[str, str]:
def setup_transformers_pipeline(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we also get the v1 pipeline to use these? Fear is that we'll have two places to update going forward.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, good idea.

model_path: str,
sequence_length: int,
tokenizer_padding_side: str = "left",
engine_kwargs: Optional[Dict] = None,
onnx_model_name: Optional[str] = None,
) -> Tuple[
str, transformers.PretrainedConfig, transformers.PreTrainedTokenizer, Dict[str, Any]
]:
"""
A helper function that sets up the model path, config, tokenizer,
and engine kwargs for a transformers model.
:param model_path: The path to the model to load
:param sequence_length: The sequence length to use for the model
:param tokenizer_padding_side: The side to pad on for the tokenizer,
either "left" or "right"
:param engine_kwargs: The kwargs to pass to the engine
:param onnx_model_name: The name of the onnx model to be loaded.
If not specified, defaults are used (see setup_onnx_file_path)
:return The model path, config, tokenizer, and engine kwargs
"""
model_path, config, tokenizer = setup_onnx_file_path(
model_path, sequence_length, onnx_model_name
)

tokenizer.padding_side = tokenizer_padding_side
if not tokenizer.pad_token:
tokenizer.pad_token = tokenizer.eos_token

engine_kwargs = engine_kwargs or {}
if engine_kwargs.get("model_path"):
raise ValueError(
"The engine kwargs already specify "
f"a model path: {engine_kwargs['model_path']}, "
f"but a model path was also provided: {model_path}. "
"Please only provide one."
)
engine_kwargs["model_path"] = model_path
dsikka marked this conversation as resolved.
Show resolved Hide resolved
return model_path, config, tokenizer, engine_kwargs


def setup_onnx_file_path(
model_path: str,
sequence_length: int,
onnx_model_name: Optional[str] = None,
task: Optional[str] = None,
) -> Tuple[str, transformers.PretrainedConfig, transformers.PreTrainedTokenizer]:
"""
Parses ONNX model from the `model_path` provided. It additionally
creates config and tokenizer objects from the `deployment path`,
derived from the `model_path` provided.
:param model_path: path to the model to be parsed
:param sequence_length: maximum sequence length of the model
:param onnx_model_name: optionally, the precise name of the ONNX model
of interest may be specified. If not specified, the default ONNX model
name will be used (refer to `get_deployment_path` for details)
:return: file path to the processed ONNX file for the engine to compile
"""
deployment_path, onnx_path = get_deployment_path(model_path, onnx_model_name)

hf_logger = logging.getLogger("transformers")
hf_logger_level = hf_logger.level
hf_logger.setLevel(logging.ERROR)

config = transformers.PretrainedConfig.from_pretrained(
deployment_path, finetuning_task=task
)
hf_logger.setLevel(hf_logger_level)

trust_remote_code = False
tokenizer = transformers.AutoTokenizer.from_pretrained(
deployment_path,
trust_remote_code=trust_remote_code,
model_max_length=sequence_length,
)

if not config or not tokenizer:
raise RuntimeError(
"Invalid config or tokenizer provided. Please provide "
"paths to the files or ensure they exist in the `model_path` provided. "
"See `tokenizer` and `config` arguments for details."
)
return onnx_path, config, tokenizer


def get_deployment_path(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We talked about a bug/fix for this in stand-up: #1396
Have we considered this fix as part of this function?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's not make this PR take on too many hats. The problem that @bfineran was fixing was particular to the few transformer tasks. More of those are bound to happen once we swap v1 transformers pipelines in favor of v2 transformers pipelines. We will deal with them in their own time, this PR seems fairly orthogonal.

model_path: str, onnx_model_name: Optional[str] = None
) -> Tuple[str, str]:
"""
Returns the path to the deployment directory
for the given model path and the path to the mandatory
Expand All @@ -53,36 +142,39 @@ def get_deployment_path(model_path: str) -> Tuple[str, str]:
for running the transformers model in the deepsparse pipeline

:param model_path: path to model directory, sparsezoo stub, or ONNX file
:param onnx_model_name: name of the ONNX file to look for in the deployment
directory. Defaults to MODEL_ONNX_NAME
:return: path to the deployment directory and path to the ONNX file inside
the deployment directory
"""
onnx_model_name = onnx_model_name or MODEL_ONNX_NAME
if os.path.isfile(model_path):
# return the parent directory of the ONNX file
return os.path.dirname(model_path), model_path

if os.path.isdir(model_path):
model_files = os.listdir(model_path)

if _MODEL_DIR_ONNX_NAME not in model_files:
if onnx_model_name not in model_files:
raise ValueError(
f"{_MODEL_DIR_ONNX_NAME} not found in transformers model directory "
f"{onnx_model_name} not found in transformers model directory "
f"{model_path}. Be sure that an export of the model is written to "
f"{os.path.join(model_path, _MODEL_DIR_ONNX_NAME)}"
f"{os.path.join(model_path, onnx_model_name)}"
)
return model_path, os.path.join(model_path, _MODEL_DIR_ONNX_NAME)
return model_path, os.path.join(model_path, onnx_model_name)

elif model_path.startswith("zoo:"):
zoo_model = Model(model_path)
deployment_path = zoo_model.deployment_directory_path
return deployment_path, os.path.join(deployment_path, _MODEL_DIR_ONNX_NAME)
return deployment_path, os.path.join(deployment_path, onnx_model_name)
elif model_path.startswith("hf:"):
from huggingface_hub import snapshot_download

deployment_path = snapshot_download(repo_id=model_path.replace("hf:", "", 1))
onnx_path = os.path.join(deployment_path, _MODEL_DIR_ONNX_NAME)
onnx_path = os.path.join(deployment_path, onnx_model_name)
if not os.path.isfile(onnx_path):
raise ValueError(
f"{_MODEL_DIR_ONNX_NAME} not found in transformers model directory "
f"{onnx_model_name} not found in transformers model directory "
f"{deployment_path}. Be sure that an export of the model is written to "
f"{onnx_path}"
)
Expand Down
38 changes: 11 additions & 27 deletions src/deepsparse/transformers/pipelines/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,18 @@
Base Pipeline class for transformers inference pipeline
"""

import logging

import warnings
from pathlib import Path
from typing import Any, Dict, List, Mapping, Optional, Union

import numpy
import transformers
from transformers.models.auto import AutoTokenizer

from deepsparse import Bucketable, Pipeline
from deepsparse.transformers.helpers import overwrite_transformer_onnx_model_inputs
from deepsparse.transformers.helpers import (
get_deployment_path,
overwrite_transformer_onnx_model_inputs,
setup_onnx_file_path as setup_onnx_file_path_v2,
)


Expand Down Expand Up @@ -124,24 +123,15 @@ def setup_onnx_file_path(self) -> str:

:return: file path to the processed ONNX file for the engine to compile
"""
deployment_path, onnx_path = get_deployment_path(self.model_path)

# temporarily set transformers logger to ERROR to avoid
# printing misleading warnings
hf_logger = logging.getLogger("transformers")
hf_logger_level = hf_logger.level
hf_logger.setLevel(logging.ERROR)
self.config = transformers.PretrainedConfig.from_pretrained(
deployment_path,
finetuning_task=self.task if hasattr(self, "task") else None,
)
hf_logger.setLevel(hf_logger_level)

self.tokenizer = AutoTokenizer.from_pretrained(
deployment_path,
trust_remote_code=self._trust_remote_code,
model_max_length=self.sequence_length,
# we will be soon retiring V1 pipelines. This is why I am deciding
# to reuse the functions from V2 pipelines in the (soon) legacy pipelines
onnx_path, config, tokenizer = setup_onnx_file_path_v2(
model_path=self.model_path,
sequence_length=self.sequence_length,
task=self.task if hasattr(self, "task") else None,
)
self.config = config
self.tokenizer = tokenizer

if not self._delay_overwriting_inputs:
# overwrite onnx graph to given required input shape
Expand All @@ -153,12 +143,6 @@ def setup_onnx_file_path(self) -> str:
onnx_path, max_length=self.sequence_length
)

if not self.config or not self.tokenizer:
raise RuntimeError(
"Invalid config or tokenizer provided. Please provide "
"paths to the files or ensure they exist in the `model_path` provided. "
"See `tokenizer` and `config` arguments for details."
)
return onnx_path

def tokens_to_engine_input(
Expand Down
8 changes: 4 additions & 4 deletions src/deepsparse/utils/onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@
"has_model_kv_cache",
"CACHE_INPUT_PREFIX",
"CACHE_OUTPUT_PREFIX",
"_MODEL_DIR_ONNX_NAME",
"MODEL_ONNX_NAME",
]

_LOGGER = logging.getLogger(__name__)

_MODEL_DIR_ONNX_NAME = "model.onnx"
MODEL_ONNX_NAME = "model.onnx"
CACHE_INPUT_PREFIX = "past_key_values"
CACHE_OUTPUT_PREFIX = "present"

Expand Down Expand Up @@ -132,7 +132,7 @@ def model_to_path(model: Union[str, Model, File]) -> str:
model.deployment_directory_path

# default to the main onnx file for the model
model = model.deployment.get_file(_MODEL_DIR_ONNX_NAME).path
model = model.deployment.get_file(MODEL_ONNX_NAME).path

elif File is not object and isinstance(model, File):
# get the downloaded_path -- will auto download if not on local system
Expand All @@ -146,7 +146,7 @@ def model_to_path(model: Union[str, Model, File]) -> str:

model_path = Path(model)
if model_path.is_dir():
return str(model_path / _MODEL_DIR_ONNX_NAME)
return str(model_path / MODEL_ONNX_NAME)

return model

Expand Down
68 changes: 12 additions & 56 deletions src/deepsparse/v2/text_generation/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Dict
from typing import Dict, Optional

from deepsparse.transformers.helpers import setup_transformers_pipeline
from deepsparse.transformers.utils.helpers import process_generation_config
from deepsparse.utils import split_engine_inputs
from deepsparse.v2.pipeline import Pipeline
Expand Down Expand Up @@ -47,23 +48,20 @@ def __init__(
internal_kv_cache: bool = True,
force_max_tokens: bool = False,
generation_config=None,
engine_kwargs: Dict = None,
engine_kwargs: Optional[Dict] = None,
):
(
self.model_path,
self.config,
self.tokenizer,
engine_kwargs,
) = setup_transformers_pipeline(
model_path, sequence_length, engine_kwargs=engine_kwargs
)

pipeline_state = PipelineState()
pipeline_state_vals = {}

# TODO: The code below will be replaced with a transformers set-up Operator.
self.tokenizer = None
model_path = self.setup_onnx_file_path(model_path, sequence_length)
self.tokenizer.padding_side = "left"
if not self.tokenizer.pad_token:
self.tokenizer.pad_token = self.tokenizer.eos_token

if not engine_kwargs:
engine_kwargs = {}
engine_kwargs["model_path"] = model_path

if internal_kv_cache and engine_kwargs.get("engine_type") == "onnxruntime":
internal_kv_cache = False

Expand All @@ -82,7 +80,7 @@ def __init__(
)

# NOTE: Currently using pipeline state. Can swap to simply pass in the
# attributes to the specific Operator that neeed them, as class attributes.
# attributes to the specific Operator that need them, as class attributes.
pipeline_state_vals[
"onnx_input_names_no_cache"
] = single_engine_operator.onnx_input_names_no_cache
Expand Down Expand Up @@ -196,45 +194,3 @@ def expand_inputs(self, items, batch_size):

def condense_inputs(self, *args, **kwargs):
return args[0], kwargs

# TODO: Move to be part of a generic transformers set-up Operator.
def setup_onnx_file_path(self, model_path, sequence_length) -> str:
import logging

import transformers
from transformers import AutoTokenizer

from deepsparse.transformers.helpers import get_deployment_path

"""
Parses ONNX model from the `model_path` provided. It additionally
creates config and tokenizer objects from the `deployment path`,
derived from the `model_path` provided.

:return: file path to the processed ONNX file for the engine to compile
"""
deployment_path, onnx_path = get_deployment_path(model_path)

hf_logger = logging.getLogger("transformers")
hf_logger_level = hf_logger.level
hf_logger.setLevel(logging.ERROR)
self.config = transformers.PretrainedConfig.from_pretrained(
deployment_path,
finetuning_task=self.task if hasattr(self, "task") else None,
)
hf_logger.setLevel(hf_logger_level)

self._trust_remote_code = False
self.tokenizer = AutoTokenizer.from_pretrained(
deployment_path,
trust_remote_code=self._trust_remote_code,
model_max_length=sequence_length,
)

if not self.config or not self.tokenizer:
raise RuntimeError(
"Invalid config or tokenizer provided. Please provide "
"paths to the files or ensure they exist in the `model_path` provided. "
"See `tokenizer` and `config` arguments for details."
)
return onnx_path