diff --git a/src/deepsparse/transformers/engines/nl_decoder_engine.py b/src/deepsparse/transformers/engines/nl_decoder_engine.py index 2670a315e9..99ab552660 100644 --- a/src/deepsparse/transformers/engines/nl_decoder_engine.py +++ b/src/deepsparse/transformers/engines/nl_decoder_engine.py @@ -175,7 +175,7 @@ def run( else: # run the engine assuming external kv cache # management. - return self.engine.run(inputs, val_inp, kv_cache) + return self.engine.run(inputs, val_inp) def __call__( self,