neuralmagic · bfineran · Jul 27, 2023 · Jul 20, 2023 · Jul 20, 2023 · Jul 20, 2023
diff --git a/src/sparseml/exporters/kv_cache_injector.py b/src/sparseml/exporters/kv_cache_injector.py
@@ -60,10 +60,8 @@ def __init__(
 
         This transformation not only solely injects the kv cache
         inputs/outputs, but also adjusts the original ONNX graph to
-        account for the necessary changes. This involves e.g. adding
-        the 'position' input to the model, so that the positional
-        embeddings of the new model are compatible with the past kv
-        cache information.
+        account for the necessary changes. This is done by the
+        optional `additional_transforms` variable.
 
         Usage:
         ```python
@@ -133,7 +131,7 @@ def export(self, pre_transforms_model: onnx.ModelProto, file_path: str):
 
     @staticmethod
     def _get_transforms_from_config(config: KeyValueCacheConfig) -> List[OnnxTransform]:
-        positions_adjustment = config.positions_adjustment_transform
+        additional_transforms = config.additional_transforms
 
         transforms = [
             CacheKeysAndValues(
@@ -144,8 +142,8 @@ def _get_transforms_from_config(config: KeyValueCacheConfig) -> List[OnnxTransfo
                 transpose_key_input=config.transpose_key_input,
             )
         ]
-        if positions_adjustment is not None:
-            transforms += [positions_adjustment()]
+        if additional_transforms is not None:
+            transforms += [additional_transforms()]
 
         return transforms
 

diff --git a/src/sparseml/exporters/transforms/kv_cache/__init__.py b/src/sparseml/exporters/transforms/kv_cache/__init__.py
@@ -19,7 +19,7 @@
 # isort:skip_file
 
 from .cache_keys_and_values import *
-from .positions_adjustment_base import *
-from .positions_adjustment_opt import *
-from .positions_adjustment_codegen import *
+from .transforms_base import *
+from .transforms_opt import *
+from .transforms_codegen import *
 from .configs import *
diff --git a/src/sparseml/exporters/transforms/kv_cache/configs.py b/src/sparseml/exporters/transforms/kv_cache/configs.py
@@ -19,11 +19,11 @@
 
 from pydantic import BaseModel, Field
 
-from sparseml.exporters.transforms.kv_cache.positions_adjustment_codegen import (
-    PositionsAdjustmentCodeGen,
+from sparseml.exporters.transforms.kv_cache.transforms_codegen import (
+    AdditionalTransformsCodeGen,
 )
-from sparseml.exporters.transforms.kv_cache.positions_adjustment_opt import (
-    PositionsAdjustmentOPT,
+from sparseml.exporters.transforms.kv_cache.transforms_opt import (
+    AdditionalTransformsOPT,
 )
 
 
@@ -37,12 +37,9 @@ class KeyValueCacheConfig(BaseModel):
         description="The name of the model type. This should correspond to "
         "the `model_type` field in the transformer's `config.json` file."
     )
-    positions_adjustment_transform: Any = Field(
-        description="The class to use to transform the positional embeddings. "
-        "This should be a subclass of `PositionsAdjustmentBase`. Note: In the "
-        "future, when we encounter models that are more complex than just "
-        "editing the positions in the model, we can make this transformation more "
-        "general."
+    additional_transforms: Any = Field(
+        description="A transform class to use for additional transforms "
+        "to the model required for finalizing the kv cache injection."
     )
     key_num_attention_heads: str = Field(
         description="The key to use to get the number of attention heads from the "
@@ -84,7 +81,7 @@ class Config:
 
 OPT_CONFIG = KeyValueCacheConfig(
     model_name="opt",
-    positions_adjustment_transform=PositionsAdjustmentOPT,
+    additional_transforms=AdditionalTransformsOPT,
     key_num_attention_heads="num_attention_heads",
     key_num_embedding_hidden_size="hidden_size",
     transpose_value_input=None,
@@ -94,7 +91,7 @@ class Config:
 
 CODEGEN_CONFIG = KeyValueCacheConfig(
     model_name="codegen",
-    positions_adjustment_transform=PositionsAdjustmentCodeGen,
+    additional_transforms=AdditionalTransformsCodeGen,
     key_num_attention_heads="n_head",
     key_num_embedding_hidden_size="n_embd",
     transpose_value_input=(0, 2, 1, 3),
@@ -104,7 +101,7 @@ class Config:
 
 BLOOM_CONFIG = KeyValueCacheConfig(
     model_name="bloom",
-    positional_embedding_transform=None,
+    additional_transforms=None,
     key_num_attention_heads="num_attention_heads",
     key_num_embedding_hidden_size="n_embed",
     transpose_value_input=None,

diff --git a/src/sparseml/exporters/transforms/kv_cache/positions_adjustment_base.py b/src/sparseml/exporters/transforms/kv_cache/positions_adjustment_base.py
diff --git a/src/sparseml/exporters/transforms/kv_cache/positions_adjustment_codegen.py b/src/sparseml/exporters/transforms/kv_cache/positions_adjustment_codegen.py