neuralmagic · Satrat · Jan 9, 2024 · Nov 16, 2023 · Dec 5, 2023 · Dec 7, 2023
diff --git a/src/sparseml/core/lifecycle/session.py b/src/sparseml/core/lifecycle/session.py
@@ -78,7 +78,11 @@ def pre_initialize_structure(
             if data is not None:
                 mod_data.append(data)
 
+        # mark which modifiers have already had their structures initialized
+        # so when we consolidate the next recipe  this info isn't lost
         self.initialized_structure = True
+        applied_stage_names = [mod.unique_id for mod in self.modifiers if mod.applied]
+        self.recipe_container.update_applied_stages(applied_stage_names)
 
         return mod_data
 
@@ -113,6 +117,8 @@ def finalize(self, **kwargs) -> List[Any]:
                 mod_data.append(data)
 
         self.finalized = True
+        applied_stage_names = [mod.unique_id for mod in self.modifiers if mod.applied]
+        self.recipe_container.update_applied_stages(applied_stage_names)
 
         return mod_data
 
@@ -169,6 +175,9 @@ def _check_compile_recipe(self):
             self.modifiers = self.recipe_container.compiled_recipe.create_modifier(
                 self.state.framework
             )
+            for mod in self.modifiers:
+                if mod.unique_id in self.recipe_container.applied_stages:
+                    mod.applied = True
 
     def _check_setup_event_lifecycle(self, event_type: EventType):
         if self.event_lifecycle is not None:

diff --git a/src/sparseml/core/modifier/stage.py b/src/sparseml/core/modifier/stage.py
@@ -36,11 +36,14 @@ class StageModifiers(ModifierInterface, BaseModel):
     :param modifiers: The modifiers to apply as a stage
     :param index: The index of the stage, if applicable
     :param group: The group name of the stage, if applicable
+    :param applied: Flag for indicating if this stage has has already been
+    applied to the model, through structure initialization or finalization
     """
 
     modifiers: List["Modifier"] = Field(default_factory=list)
     index: Optional[int] = None
     group: Optional[str] = None
+    applied: bool = False
 
     @property
     def initialized_structure(self) -> bool:
@@ -66,6 +69,13 @@ def finalized(self) -> bool:
         """
         return all(mod.finalized for mod in self.modifiers)
 
+    @property
+    def unique_id(self) -> str:
+        """
+        :return: ID for stage containing the name and index
+        """
+        return self.group + "_" + str(self.index)
+
     def check_initialized(self):
         """
         Check if all of the stage modifiers have been initialized, and log a warning
@@ -103,7 +113,7 @@ def calculate_end(self) -> float:
 
     def pre_initialize_structure(self, state: "State", **kwargs):
         """
-        Pre initialize the structure for all stage modifiers
+        Pre initialize the structure for all stage modifiers mark the stage applied
 
         :param state: The current state of the training
         :param kwargs: Additional kwargs to pass to the modifier(s)
@@ -112,6 +122,8 @@ def pre_initialize_structure(self, state: "State", **kwargs):
         for modifier in self.modifiers:
             modifier.pre_initialize_structure(state, **kwargs)
 
+        self.applied = True
+
     def initialize(self, state: "State", **kwargs):
         """
         Initialize all the stage modifiers
@@ -120,20 +132,30 @@ def initialize(self, state: "State", **kwargs):
         :param kwargs: Additional kwargs to pass to the modifier(s)
             initialize method
         """
+
+        if self.applied:
+            return
+
         for modifier in self.modifiers:
             modifier.initialize(state, **kwargs)
 
     def finalize(self, state: "State", **kwargs):
         """
-        Finalize all the stage modifiers
+        Finalize all the stage modifiers and mark the stage as applied
 
         :param state: The state of current session
         :param kwargs: Additional kwargs to pass to the modifier(s)
             finalize method
         """
+
+        if self.applied:
+            return
+
         for modifier in self.modifiers:
             modifier.finalize(state, **kwargs)
 
+        self.applied = True
+
     def update_event(self, state: "State", event: "Event", **kwargs):
         """
         Propagate the event to all the stage modifiers
@@ -143,5 +165,9 @@ def update_event(self, state: "State", event: "Event", **kwargs):
         :param kwargs: Additional kwargs to pass to the modifier(s)
             update_event method
         """
+
+        if self.applied:
+            return
+
         for modifier in self.modifiers:
             modifier.update_event(state, event, **kwargs)
diff --git a/src/sparseml/core/recipe/container.py b/src/sparseml/core/recipe/container.py
@@ -44,10 +44,12 @@ class RecipeContainer:
 
     :param compiled_recipe: the compiled recipe from the recipes list
     :param recipes: the list of RecipeTuple instances to be compiled
+    :param applied_stages: list of recipe stages that have already been applied
     """
 
     compiled_recipe: Optional[Recipe] = None
     recipes: List[RecipeTuple] = field(default_factory=list)
+    applied_stages: List[str] = field(default_factory=list)
 
     def update(
         self,
@@ -118,6 +120,17 @@ def update(
 
         return kwargs
 
+    def update_applied_stages(self, new_stages: List[str]):
+        """
+        Updates the applied_stages list with new stages, indicating their structure
+        has already been applied
+
+        :param new_stages: new stage names to add
+        """
+        for stage in new_stages:
+            if stage not in self.applied_stages:
+                self.applied_stages.append(stage)
+
     def check_compile_recipe(self) -> bool:
         """
         Check if the recipes need to be compiled into a single recipe and

diff --git a/src/sparseml/core/recipe/recipe.py b/src/sparseml/core/recipe/recipe.py
@@ -484,20 +484,22 @@ def _modifier_group_to_dict(modifier_group: List[Dict[str, Any]]):
                 for key, value in modifier.items()
             }
 
-        def _stage_to_dict(stage: List[Dict[str, Any]]):
-            # convert a list of stages to a dict of modifiers
+        def _stage_to_dict(stage: Dict[str, Any]):
+            # convert a stage to a dict of modifiers
             return {
                 modifier_group_name: _modifier_group_to_dict(modifier_group)
-                for stage_modifiers in stage
-                for modifier_group_name, modifier_group in stage_modifiers[
-                    "modifiers"
-                ].items()
+                for modifier_group_name, modifier_group in stage["modifiers"].items()
             }
 
-        return {
-            stage_name: _stage_to_dict(stage=stage)
-            for stage_name, stage in self.dict()["stages"].items()
-        }
+        final_dict = {}
+        for stage_name, stages in self.dict()["stages"].items():
+            if len(stages) == 1:
+                final_dict[stage_name] = _stage_to_dict(stages[0])
+            else:
+                for idx, stage in enumerate(stages):
+                    final_dict[stage_name + "_" + str(idx)] = _stage_to_dict(stage)
+
+        return final_dict
 
 
 @dataclass

diff --git a/src/sparseml/modifiers/quantization/utils/quantize.py b/src/sparseml/modifiers/quantization/utils/quantize.py
@@ -52,6 +52,8 @@
     "add_input_activation_quant_wrappers",
     "add_output_activation_observers",
     "raise_if_torch_quantization_not_available",
+    "raise_if_already_quantized",
+    "is_module_quantized",
 ]
 
 
@@ -148,6 +150,18 @@ def set_quantization_schemes(
             # submodule type or graph section set to ignore, skip
             continue
 
+        if isinstance(submodule, torch_quantization.QuantWrapper):
+            # special case to catch QuantizableMatMul children
+            if ignore and _match_submodule_name_or_type(
+                submodule.module, submodule_name, ignore
+            ):
+                continue
+
+        if is_qat_helper_module(submodule):
+            # ignore children of an already quantized module, if there is a clash it
+            # will have been caught in the parent
+            continue
+
         # override default scheme if necessary
         override_key = _match_submodule_name_or_type(
             submodule, submodule_name, scheme_overrides
@@ -162,6 +176,7 @@ def set_quantization_schemes(
             wrap_qat_targets[submodule_name] = submodule_scheme
         elif is_module_type_override or is_quantizable_module(submodule):
             # is base quantizable module or user specifically targeted module type
+            raise_if_already_quantized(submodule_name, submodule)
             submodule.quantization_scheme = submodule_scheme
 
     # inject any targeted QATWrappers
@@ -351,6 +366,34 @@ def raise_if_torch_quantization_not_available():
         )
 
 
+def raise_if_already_quantized(module_name: str, module: Module):
+    """
+    :param module_name: name of module to check for quantization
+    :param module: module to check for quantization
+    :raises: RuntimeError if module is already quantized, it cannot be re-quantized
+    """
+    if is_module_quantized(module):
+        raise RuntimeError(
+            f"Unable to quantize module {module_name}, as it has already been "
+            "quantized. Ensure your input recipe does not contain multiple "
+            "QuantizationModifiers that act on the same module. "
+        )
+
+
+def is_module_quantized(module: Module) -> bool:
+    """
+    :param module: module to check for quantization
+    :return: True if the module is quantized, False otherwise
+    """
+    if hasattr(module, "quantization_scheme") and isinstance(
+        module.quantization_scheme, QuantizationScheme
+    ):
+        return True
+    if isinstance(module, torch_quantization.QuantWrapper):
+        return True
+    return False
+
+
 def _match_submodule_name_or_type(
     submodule: Module, submodule_name: str, names_or_types: List[str]
 ) -> Optional[str]:

diff --git a/src/sparseml/pytorch/model_load/helpers.py b/src/sparseml/pytorch/model_load/helpers.py
@@ -32,6 +32,10 @@
     "apply_recipe_structure_to_model",
     "reload_model_state",
     "reload_model_from_checkpoint",
+    "save_model_and_recipe",
+    "fallback_to_cpu",
+    "parse_dtype",
+    "get_session_model",
 ]
 
 _LOGGER = logging.getLogger(__name__)
@@ -57,6 +61,10 @@ def apply_recipe_structure_to_model(model: Module, recipe_path: str, model_path:
         model=model, recipe=recipe_path, framework=Framework.pytorch
     )
 
+    # no need to reload if no recipe was applied
+    if recipe_path is None:
+        return
+
     session = session_manager.active_session()
     num_stages = len(session.lifecycle.recipe_container.compiled_recipe.stages)
     msg = (
@@ -173,3 +181,73 @@ def reload_model_from_checkpoint(model: Module, checkpoint: Optional[str] = None
     # reload the state dict for the model from the checkpoint
     if reload_model_state(model, checkpoint, orig_state_dict):
         _LOGGER.info(f"Reloaded model state from checkpoint {checkpoint}")
+
+
+def save_model_and_recipe(
+    model: Module,
+    save_path: str,
+    tokenizer: Optional[Any] = None,
+):
+    """
+    Save a model, tokenizer and the currently loaded recipe to file
+
+    :param model: pytorch model to save
+    :param save_path: path to save output to
+    :param tokenizer: model tokenizer to save
+    """
+    model.save_pretrained(save_path)
+    if tokenizer is not None:
+        tokenizer.save_pretrained(save_path)
+
+    _LOGGER.info("Saving output to {}".format(os.path.abspath(save_path)))
+
+    recipe_path = os.path.join(save_path, RECIPE_FILE_NAME)
+    session = session_manager.active_session()
+    recipe_yaml_str = session.get_serialized_recipe()
+    with open(recipe_path, "w") as fp:
+        fp.write(recipe_yaml_str)
+
+
+def fallback_to_cpu(device: str) -> str:
+    """
+    Takes in a device string and forces it to cpu if cuda is not available
+
+    :param device: device id to check
+    :return: device modified for CUDA status
+    """
+    if "cuda" in device and not torch.cuda.is_available():
+        _LOGGER.warning(
+            f"Requested {device} but CUDA is not available, falling back to CPU"
+        )
+        return "cpu"
+
+    return device
+
+
+def parse_dtype(dtype_arg: str) -> torch.dtype:
+    """
+    :param dtype_arg: dtype string to parse
+    :return: torch.dtype parsed from input string
+    """
+    dtype = "auto"  # get precision from model by default
+    if dtype_arg == "half" or dtype_arg == "float16":
+        dtype = torch.float16
+    elif dtype_arg == "bfloat16":
+        dtype = torch.bfloat16
+    elif dtype_arg == "full" or dtype_arg == "float32":
+        dtype = torch.float32
+
+    return dtype
+
+
+def get_session_model() -> Module:
+    """
+    :return: pytorch module stored by the active SparseSession, or None if no session
+    is active
+    """
+    session = session_manager.active_session()
+    if not session:
+        return None
+
+    active_model = session.state.model.model
+    return active_model
diff --git a/src/sparseml/transformers/finetune/data/data_args.py b/src/sparseml/transformers/finetune/data/data_args.py
@@ -58,6 +58,10 @@ class DataTrainingArguments:
         default=None,
         metadata={"help": "Optional percentages of each split to download"},
     )
+    num_calibration_samples: Optional[int] = field(
+        default=512,
+        metadata={"help": "Number of samples to use for one-shot calibration"},
+    )
     overwrite_cache: bool = field(
         default=False,
         metadata={"help": "Overwrite the cached preprocessed datasets or not."},