Skip to content

Commit

Permalink
[TextGeneration] Fix initialization; don't try v1 init for text gen (#…
Browse files Browse the repository at this point in the history
…1571)

* only check capacity condition durin prefill; already have check in generation

* dont try v1 if running text gen; just raise error
  • Loading branch information
dsikka committed Jan 31, 2024
1 parent 29a826d commit 2f6cd9d
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
6 changes: 5 additions & 1 deletion src/deepsparse/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
SchedulerGroup,
)
from deepsparse.subgraph_execute import SubGraphExecutor
from deepsparse.tasks import SupportedTasks
from deepsparse.utils import InferenceState, PipelineState
from deepsparse.utils.subgraph import SubGraph
from deepsparse.utils.time import TIMER_KEY, InferenceStages, TimerManager
Expand Down Expand Up @@ -139,7 +140,10 @@ def create(cls, task: str, **kwargs) -> "Pipeline":
"Pipeline was not created for the given task. The "
"provided task should be registered using the OperatorRegistry"
)
except Exception:
except Exception as e:
if SupportedTasks.is_text_generation(task):
raise e

_LOGGER.warning(f"Could not create v2 '{task}' pipeline, trying legacy")
from deepsparse.legacy import Pipeline

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,10 @@ def can_operate(self, inp: Any) -> bool:
if inp.get("in_generation"):
return True

if kv_cache.total_num_processed_tokens >= kv_cache.capacity:
if (
kv_cache.total_num_processed_tokens >= kv_cache.capacity
and inp.get("in_generation") is None
):
raise RuntimeError(
"Not enough kv_cache capacity to run generation. Please use a larger "
"sequence_length or a shorter prompt"
Expand Down

0 comments on commit 2f6cd9d

Please sign in to comment.