Skip to content

Commit

Permalink
remove files to make review easier
Browse files Browse the repository at this point in the history
  • Loading branch information
dbogunowicz committed Jun 22, 2023
1 parent 950c653 commit ea82e99
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 463 deletions.
54 changes: 21 additions & 33 deletions src/deepsparse/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def __call__(self, *args, **kwargs) -> BaseModel:
batches = self.split_engine_inputs(engine_inputs, self._batch_size)

# submit split batches to engine threadpool
batch_outputs = [self.engine_forward(x) for x in batches]
batch_outputs = list(self.executor.map(self.engine_forward, batches))

# join together the batches of size `self._batch_size`
engine_outputs = self.join_engine_outputs(batch_outputs)
Expand Down Expand Up @@ -567,34 +567,6 @@ def _register_pipeline_tasks_decorator(pipeline_class: Pipeline):

return _register_pipeline_tasks_decorator

@staticmethod
def create_engine(
onnx_file_path: str,
engine_type: str,
engine_args: Dict,
context: Optional[Context] = None,
) -> Union[Engine, MultiModelEngine, ORTEngine]:
engine_type = engine_type.lower()

if engine_type == DEEPSPARSE_ENGINE:
if context is not None and isinstance(context, Context):
engine_args.pop("num_cores", None)
engine_args.pop("scheduler", None)
engine_args["context"] = context
return MultiModelEngine(
model=onnx_file_path,
**engine_args,
)
return Engine(onnx_file_path, **engine_args)

if engine_type == ORT_ENGINE:
return ORTEngine(onnx_file_path, **engine_args)

raise ValueError(
f"Unknown engine_type {engine_type}. Supported values include: "
f"{SUPPORTED_PIPELINE_ENGINES}"
)

@classmethod
def from_config(
cls,
Expand Down Expand Up @@ -819,10 +791,26 @@ def engine_forward(self, engine_inputs: List[numpy.ndarray]) -> List[numpy.ndarr
"""
return self.engine(engine_inputs)

def _initialize_engine(self) -> Union[Engine, MultiModelEngine, ORTEngine]:
return Pipeline.create_engine(
self.onnx_file_path, self.engine_type, self._engine_args, self.context
)
def _initialize_engine(self) -> Union[Engine, ORTEngine]:
engine_type = self.engine_type.lower()

if engine_type == DEEPSPARSE_ENGINE:
if self.context is not None and isinstance(self.context, Context):
self._engine_args.pop("num_cores", None)
self._engine_args.pop("scheduler", None)
self._engine_args["context"] = self.context
return MultiModelEngine(
model=self.onnx_file_path,
**self._engine_args,
)
return Engine(self.onnx_file_path, **self._engine_args)
elif engine_type == ORT_ENGINE:
return ORTEngine(self.onnx_file_path, **self._engine_args)
else:
raise ValueError(
f"Unknown engine_type {self.engine_type}. Supported values include: "
f"{SUPPORTED_PIPELINE_ENGINES}"
)

def _identifier(self):
# get pipeline identifier; used in the context of logging
Expand Down
23 changes: 0 additions & 23 deletions src/deepsparse/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,6 @@ class SupportedTasks:
),
)

text_generation = namedtuple("text_generation", ["opt", "codegen", "bloom"])(
codegen=AliasedTask("codegen", []),
opt=AliasedTask("opt", []),
bloom=AliasedTask("bloom", []),
)

image_classification = namedtuple("image_classification", ["image_classification"])(
image_classification=AliasedTask(
"image_classification",
Expand Down Expand Up @@ -156,9 +150,6 @@ def check_register_task(
# custom task, register the CustomPipeline
import deepsparse.pipelines.custom_pipeline # noqa: F401

elif cls.is_text_generation(task):
import deepsparse.transformers.pipelines.text_generation # noqa: F401

elif cls.is_nlp(task):
# trigger transformers pipelines to register with Pipeline.register
import deepsparse.transformers.pipelines # noqa: F401
Expand Down Expand Up @@ -202,20 +193,6 @@ def check_register_task(
f"{list(all_tasks)}"
)

@classmethod
def is_text_generation(cls, task: str) -> bool:
"""
:param task: the name of the task to check whether it is a text generation task
such as codegen
:return: True if it is a text generation task, False otherwise
"""
return any(
[
text_generation_task.matches(task)
for text_generation_task in cls.text_generation
]
)

@classmethod
def is_nlp(cls, task: str) -> bool:
"""
Expand Down
50 changes: 3 additions & 47 deletions src/deepsparse/transformers/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ methods such as [pruning](https://neuralmagic.com/blog/pruning-overview/) and [q
These techniques result in significantly more performant and smaller models with limited to no effect on the baseline metrics.

This integration currently supports several fundamental NLP tasks:
- **Text Generation** - given the input prompt, generate an output text sequence (e.g. to fill in incomplete text or paraphrase part of the prompt)
- **Question Answering** - posing questions about a document
- **Sentiment Analysis** - assigning a sentiment to a piece of text
- **Text Classification** - assigning a label or class to a piece of text (e.g duplicate question pairing)
Expand All @@ -31,12 +30,10 @@ compatible with our [hardware requirements](https://docs.neuralmagic.com/deepspa
By default, to deploy the transformer using DeepSparse Engine it is required to supply the model in the ONNX format along with the HuggingFace supporting files.
This grants the engine the flexibility to serve any model in a framework-agnostic environment.

In general, the DeepSparse pipelines require the following files within a folder on the local server to properly load a Transformers model:
The DeepSparse pipelines require the following files within a folder on the local server to properly load a Transformers model:
- `model.onnx`: The exported Transformers model in the [ONNX format](https://github.com/onnx/onnx).
- `model_kvcache.onnx` (optional): the ONNX model with the KV Cache support (akin to the Transformers model with `use_cache = True`. Specific for the `text-generation` integration.
- `tokenizer.json`: The [HuggingFace compatible tokenizer configuration](https://huggingface.co/docs/transformers/fast_tokenizers) used with the model.
- `config.json`: The [HuggingFace compatible configuration file](https://huggingface.co/docs/transformers/main_classes/configuration) used with the model.
- `tokenizer_config.json`: The [HuggingFace compatible tokenizer configuration](https://huggingface.co/docs/transformers/fast_tokenizers) used with the model.
- `tokenizer.json`, `special_tokens_map.json`, `vocab.json`, `merges.txt` (optional): Other files that may be required by a tokenizer

Below we describe two possibilities to obtain the required structure.

Expand All @@ -51,7 +48,7 @@ sparseml.transformers.export_onnx --task question-answering --model_path model_p
```

This creates `model.onnx` file, in the directory of your `model_path`(e.g. `/trained_model/model.onnx`).
Any additional, required files, such as e.g.`tokenizer.json` or `config.json`, are stored under the `model_path` folder as well, so a DeepSparse pipeline ca be directly instantiated by using that folder after export (e.g. `/trained_model/`).
The `tokenizer.json` and `config.json` are stored under the `model_path` folder as well, so a DeepSparse pipeline ca be directly instantiated by using that folder after export (e.g. `/trained_model/`).

#### SparseZoo Stub
Alternatively, you can skip the process of the ONNX model export by using Neural Magic's [SparseZoo](https://sparsezoo.neuralmagic.com/). The SparseZoo contains pre-sparsified models and SparseZoo stubs enable you to reference any model on the SparseZoo in a convenient and predictable way.
Expand Down Expand Up @@ -140,47 +137,6 @@ response.text

>> '{"score":0.9534820914268494,"start":8,"end":14,"answer":"batman"}'
```
### Text Generation
The text generation task generates a sequence of words given the prompt. Popular text generation LLMs (Large Language Models) are used
for the chats (the instruction models), code generation, text summarization, or filling out the missing text.
are used for chats or following instructions are also covered in this task. The following example uses a sparsified text classification
OPT model to complete the prompt

[List of available SparseZoo Text Generation Models](
https://sparsezoo.neuralmagic.com/?useCase=text_generation)

#### Python Pipeline
```python
from deepsparse import Pipeline

opt_pipeline = Pipeline.create(task="opt")

inference = opt_pipeline("Who is the president of the United States?")

>> 'The president of the United States is the head of the executive branch of government...'
```

#### HTTP Server
Spinning up:
```bash
deepsparse.server \
task text-generation \
--model_path # TODO: Pending until text generation models get uploaded to SparseZoo
```

Making a request:
```python
import requests

url = "http://localhost:5543/predict" # Server's port default to 5543

obj = {"sequence": "Who is the president of the United States?"}

response = requests.post(url, json=obj)
response.text

>> 'The president of the United States is the head of the executive branch of government...'
```

### Sentiment Analysis
The sentiment analysis task takes in a sentence and classifies its sentiment. The following example
Expand Down
12 changes: 4 additions & 8 deletions src/deepsparse/transformers/pipelines/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,23 +126,19 @@ def setup_onnx_file_path(self) -> str:
return onnx_path

def tokens_to_engine_input(
self,
tokens: Mapping[Any, numpy.ndarray],
onnx_input_names: Optional[List[str]] = None,
self, tokens: Mapping[Any, numpy.ndarray]
) -> List[numpy.ndarray]:
"""
:param tokens: outputs of the pipeline tokenizer
:return: list of numpy arrays in expected order for model input
"""
if onnx_input_names is None:
onnx_input_names = self.onnx_input_names
if not all(name in tokens for name in onnx_input_names):
if not all(name in tokens for name in self.onnx_input_names):
raise ValueError(
f"pipeline expected arrays with names {onnx_input_names}, "
f"pipeline expected arrays with names {self.onnx_input_names}, "
f"received inputs: {list(tokens.keys())}"
)

return [tokens[name] for name in onnx_input_names]
return [tokens[name] for name in self.onnx_input_names]

@staticmethod
def should_bucket(*args, **kwargs) -> bool:
Expand Down
Loading

0 comments on commit ea82e99

Please sign in to comment.