neuralmagic · Satrat · Jan 9, 2024 · Dec 13, 2023 · Dec 13, 2023 · Jan 8, 2024
diff --git a/src/sparseml/core/recipe/modifier.py b/src/sparseml/core/recipe/modifier.py
@@ -113,4 +113,4 @@ def dict(self, *args, **kwargs) -> Dict[str, Any]:
         """
         :return: the dictionary representation of the modifier
         """
-        return {self.type: self.args, "group": f"{self.group}_modifiers"}
+        return {self.type: self.args_evaluated, "group": f"{self.group}_modifiers"}
diff --git a/src/sparseml/transformers/finetune/text_generation.py b/src/sparseml/transformers/finetune/text_generation.py
@@ -104,6 +104,13 @@ def parse_args(**kwargs):
     else:
         model_args, data_args, training_args = parser.parse_dict(kwargs)
 
+    if training_args.recipe_args is not None:
+        arg_dict = {}
+        for recipe_arg in training_args.recipe_args:
+            key, value = recipe_arg.split("=")
+            arg_dict[key] = value
+        training_args.recipe_args = arg_dict
+
     return model_args, data_args, training_args
 
 
@@ -166,6 +173,10 @@ def main(
         revision=model_args.model_revision,
         use_auth_token=True if model_args.use_auth_token else None,
     )
+    teacher_config = AutoConfig.from_pretrained(
+        training_args.distill_teacher,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
 
     model_kwargs = {
         "config": config,
@@ -175,6 +186,7 @@ def main(
         "torch_dtype": parse_dtype(model_args.precision),
     }
     teacher_kwargs = {
+        "config": teacher_config,
         "cache_dir": model_args.cache_dir,
         "use_auth_token": True if model_args.use_auth_token else None,
     }

diff --git a/src/sparseml/transformers/finetune/training_args.py b/src/sparseml/transformers/finetune/training_args.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 from dataclasses import dataclass, field
-from typing import Optional
+from typing import List, Optional
 
 from transformers import TrainingArguments as HFTrainingArgs
 
@@ -51,9 +51,14 @@ class TrainingArguments(HFTrainingArgs):
             ),
         },
     )
-    recipe_args: Optional[str] = field(
+    recipe_args: Optional[List[str]] = field(
         default=None,
-        metadata={"help": "Recipe arguments to be overwritten"},
+        metadata={
+            "help": (
+                "List of recipe arguments to evaluate, of the format key1=value1 "
+                "key2=value2"
+            )
+        },
     )
     do_oneshot: Optional[bool] = field(
         default=False,

diff --git a/src/sparseml/transformers/sparsification/obcq/obcq.py b/src/sparseml/transformers/sparsification/obcq/obcq.py
@@ -16,7 +16,7 @@
 import logging
 import os
 from pathlib import Path
-from typing import Optional
+from typing import Dict, Optional
 
 from torch.nn import Module
 from transformers import AutoConfig
@@ -56,6 +56,7 @@ def one_shot(
     deploy_dir: Optional[str] = ".",
     recipe_file: Optional[str] = None,
     precision: str = "auto",
+    recipe_args: Optional[Dict] = None,
     eval_data: Optional[str] = None,
     do_save: Optional[bool] = False,
 ) -> Module:
@@ -70,6 +71,7 @@ def one_shot(
     :param deploy_dir: The output directory to save the model to
     :param recipe_file: recipe containing SparseGPT configuration
     :param precision: precision to load model as, either auto, half or full
+    :param recipe_args: additional arguments to use for recipe evaluation
     :param eval_data: dataset to use for perplexity evalaution, or none to skip
     :param do_save: whether to save the output model to disk
 
@@ -144,6 +146,7 @@ def one_shot(
         start=-1,
         device=device,
         copy_data=False,
+        recipe_args=recipe_args,
     )
 
     if do_save:
@@ -166,6 +169,15 @@ def one_shot(
     return model
 
 
+class KeyValue(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        setattr(namespace, self.dest, dict())
+
+        for value in values:
+            key, value = value.split("=")
+            getattr(namespace, self.dest)[key] = value
+
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
 
@@ -195,6 +207,12 @@ def one_shot(
         default="auto",
         help="Precision to cast model weights to, default to auto",
     )
+    parser.add_argument(
+        "--recipe_args",
+        nargs="*",
+        action=KeyValue,
+        help="Recipe arguments to evaluate, of the format key1=value1 key2=value2",
+    )
     parser.add_argument(
         "--eval", type=str, default=None, help="Optional dataset for perplexity eval"
     )
@@ -213,6 +231,7 @@ def one_shot(
         device=args.device,
         recipe_file=args.recipe,
         precision=args.precision,
+        recipe_args=args.recipe_args,
         eval_data=args.eval,
         do_save=args.save,
     )