Skip to content

Commit

Permalink
pipeline runs, but incorrectly
Browse files Browse the repository at this point in the history
  • Loading branch information
dbogunowicz committed Nov 17, 2023
1 parent d1683b4 commit 51c4ee6
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 15 deletions.
10 changes: 5 additions & 5 deletions src/deepsparse/transformers/utils/token_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,16 +77,16 @@ def generate(self, logits: numpy.ndarray) -> numpy.ndarray:
:param logits: the logits from the model with shape (vocab_size,)
:return: the sampled token
"""
if self.top_k:
logits = self.apply_top_k(logits)
if self.top_p:
logits = self.apply_top_p(logits)

if self.deterministic:
token = numpy.argmax(logits)
self.tokens.append(token)
return token

if self.top_k:
logits = self.apply_top_k(logits)
if self.top_p:
logits = self.apply_top_p(logits)

if self.sampling_temperature != 1.0:
logits /= self.sampling_temperature

Expand Down
3 changes: 3 additions & 0 deletions src/deepsparse/v2/text_generation/join_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ def __init__(self, tokenizer):
self.tokenizer = tokenizer

def run(self, inp: List[CompileGenerationsOutput], **kwargs):

if not isinstance(inp, list):
inp = [[inp]]
batch_outputs = [x for x in inp[0]]
generated_tokens = [x.generated_tokens for x in batch_outputs]
generated_logits = [x.generated_logits for x in batch_outputs]
Expand Down
45 changes: 43 additions & 2 deletions src/deepsparse/v2/text_generation/nl_engine_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from pydantic import BaseModel, Field

from deepsparse.transformers.helpers import overwrite_transformer_onnx_model_inputs
from deepsparse.utils.onnx import (
CACHE_INPUT_PREFIX,
overwrite_onnx_model_inputs_for_kv_cache_models,
Expand All @@ -29,7 +30,12 @@
)


__all__ = ["NLEngineOperator", "NlEngineInput"]
__all__ = [
"NlEngineOperator",
"NlEngineOperatorNoCache",
"NlEngineInputNoCache",
"NlEngineInput",
]


class NlEngineInput(BaseModel):
Expand All @@ -39,7 +45,12 @@ class NlEngineInput(BaseModel):
in_generation: bool = Field(description="in_generation", default=None)


class NLEngineOperator(EngineOperator):
class NlEngineInputNoCache(BaseModel):
input_ids: Any
attention_mask: Any


class NlEngineOperator(EngineOperator):

"""
Operator for the NL Decoder Engine. This Operator inherits from the EngineOperator.
Expand Down Expand Up @@ -195,3 +206,33 @@ def output_names(self) -> List[str]:
:return: The output names for the onnx model
"""
return self.engine.output_names


class NlEngineOperatorNoCache(EngineOperator):

input_schema = NlEngineInputNoCache
output_schema = None

def __init__(self, sequence_length, **kwargs):
model_path, *_ = overwrite_transformer_onnx_model_inputs(
path=kwargs.get("model_path"),
max_length=sequence_length,
batch_size=kwargs.get("batch_size", 1),
)
super().__init__(**kwargs)

def run(self, inp: NlEngineInputNoCache, **kwargs) -> Any:
engine_inputs = [inp.input_ids, inp.attention_mask]
logits = (
super()
.run(EngineOperatorInputs(engine_inputs=engine_inputs), **kwargs)
.get("engine_outputs")
)
return {
"logits": logits,
"logits_shape": None,
"deterministic": None,
"kv_cache": None,
"tokens": None,
"sampling_temperature": None,
}, {"prompt_logits": logits}
85 changes: 79 additions & 6 deletions src/deepsparse/v2/text_generation/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
from deepsparse.transformers.helpers import setup_transformers_pipeline
from deepsparse.transformers.utils.helpers import process_generation_config
from deepsparse.utils import split_engine_inputs
from deepsparse.utils.onnx import default_cached_outputs
from deepsparse.v2.pipeline import Pipeline
from deepsparse.v2.routers import GraphRouter
from deepsparse.v2.routers import GraphRouter, LinearRouter
from deepsparse.v2.schedulers import OperatorScheduler
from deepsparse.v2.text_generation import (
AutoRegressiveOperatorPreprocess,
Expand All @@ -29,7 +30,8 @@
JoinOutput,
KVCacheCreator,
MultiEnginePrefill,
NLEngineOperator,
NlEngineOperator,
NlEngineOperatorNoCache,
PrepareforPrefill,
PrepareGeneration,
ProcessInputsTextGeneration,
Expand All @@ -39,6 +41,79 @@
from deepsparse.v2.utils import PipelineState


class TextGenerationPipelineNoCache(Pipeline):
def __init__(
self,
model_path: str,
sequence_length: int = 1024,
engine_kwargs: Optional[Dict] = None,
onnx_model_name: Optional[str] = None,
generation_config=None, # TODO: Typing here
**kwargs,
):

(
self.model_path,
self.config,
self.tokenizer,
engine_kwargs,
) = setup_transformers_pipeline(
model_path,
sequence_length,
onnx_model_name=onnx_model_name,
engine_kwargs=engine_kwargs,
)
self.verify_no_kv_cache_present()

token_generator = TokenGeneratorOperator()

ops = [
ProcessInputsTextGeneration(
generation_config=process_generation_config(generation_config),
sequence_length=sequence_length,
tokenizer=self.tokenizer,
),
NlEngineOperatorNoCache(sequence_length=sequence_length, **engine_kwargs),
PrepareGeneration(
sequence_length=sequence_length,
prompt_sequence_length=1,
token_generator=token_generator,
),
GenerateNewTokenOperator(tokenizer=self.tokenizer, force_max_tokens=True),
CompileGeneratedTokens(),
CompileGenerations(),
JoinOutput(tokenizer=self.tokenizer),
ProcessOutputs(tokenizer=self.tokenizer),
]
router = LinearRouter(end_route=len(ops))
scheduler = [OperatorScheduler()]
super().__init__(
ops=ops,
router=router,
schedulers=scheduler,
)

def run(self, *args, **kwargs):
# we need to set the fixed_sequences_length flag to True
# for the non-kv cache pipeline
kwargs.update(dict(fixed_sequences_length=True))
return super().run(*args, **kwargs)

def verify_no_kv_cache_present(self) -> bool:
"""
Verifies that the ONNX model does not have
KV cache inputs/outputs present.
:return: True if compatible, False otherwise
"""
is_kv_cache_present = any(default_cached_outputs(self.model_path))
if is_kv_cache_present:
raise ValueError(
f"The model: {self.model_path} has KV cache inputs/outputs present. "
"Please use the TextGenerationPipeline instead."
)
return not is_kv_cache_present


class TextGenerationPipeline(Pipeline):
def __init__(
self,
Expand All @@ -65,14 +140,14 @@ def __init__(
if internal_kv_cache and engine_kwargs.get("engine_type") == "onnxruntime":
internal_kv_cache = False

single_engine_operator = NLEngineOperator(
single_engine_operator = NlEngineOperator(
sequence_length=sequence_length,
internal_kv_cache=internal_kv_cache,
input_ids_length=1,
**engine_kwargs,
)

multi_engine_operator = NLEngineOperator(
multi_engine_operator = NlEngineOperator(
sequence_length=sequence_length,
internal_kv_cache=internal_kv_cache,
input_ids_length=prompt_sequence_length,
Expand Down Expand Up @@ -194,5 +269,3 @@ def expand_inputs(self, items, batch_size):

def condense_inputs(self, *args, **kwargs):
return args[0], kwargs


1 change: 1 addition & 0 deletions src/deepsparse/v2/text_generation/prep_for_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def run(
"token_generator": token_generator,
}
output = {
"logits": prompt_logits,
"tokens": token_generator.tokens,
"kv_cache": kv_cache,
"in_generation": True,
Expand Down
4 changes: 2 additions & 2 deletions tests/deepsparse/v2/unit/text_generation/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from deepsparse.v2 import InferenceState, PipelineState
from deepsparse.v2.text_generation import (
GenerationDefaults,
NLEngineOperator,
NlEngineOperator,
TokenGeneratorOperator,
)

Expand Down Expand Up @@ -61,7 +61,7 @@ def single_token_engine_no_internal_cache(text_generation_attributes, model_attr
seq_length, _ = text_generation_attributes
_, model_path = model_attributes

nl_engine_operator = NLEngineOperator(
nl_engine_operator = NlEngineOperator(
sequence_length=seq_length, input_ids_length=1, model_path=model_path
)
return nl_engine_operator
Expand Down
1 change: 1 addition & 0 deletions tests/testdata/gsm8k-v0-greedy_until
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3b4bf5c7d1504339aa06bcb50212dba05ff761d30de6faf720fdc818b16316ad
1 change: 1 addition & 0 deletions tests/testdata/gsm8k-v0-res.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"results": {"gsm8k": {"acc": 0.0, "acc_stderr": 0.0}}, "versions": {"gsm8k": 0}}

0 comments on commit 51c4ee6

Please sign in to comment.