neuralmagic · dbogunowicz · May 11, 2023 · Mar 29, 2023 · Apr 10, 2023 · Apr 10, 2023
diff --git a/src/sparseml/pytorch/utils/exporter.py b/src/sparseml/pytorch/utils/exporter.py
@@ -570,7 +570,23 @@ def export_onnx(
 
         # clean up graph from any injected / wrapped operations
         _delete_trivial_onnx_adds(onnx_model)
-    onnx.save(onnx_model, file_path)
+
+    try:
+        # alternatively we can just check the size of the onnx file on disk
+        onnx.save(onnx_model, file_path)
+
+    except Exception as e:
+        _LOGGER.info(
+            "Attempted to save ONNX model without external data."
+            f"Results in error: {e}. Saving with external data instead."
+        )
+        onnx.save(
+            onnx_model,
+            file_path,
+            location=os.path.basename(file_path).replace("onnx", "data"),
+            save_as_external_data=True,
+            all_tensors_to_one_file=True,
+        )
 
     if convert_qat and is_quant_module:
         # overwrite exported model with fully quantized version

diff --git a/src/sparseml/transformers/export.py b/src/sparseml/transformers/export.py
@@ -91,12 +91,14 @@
 __all__ = ["export_transformer_to_onnx", "load_task_model"]
 
 MODEL_ONNX_NAME = "model.onnx"
-DEPLOYMENT_FILES: List[str] = [
+EXTERNAL_ONNX_DATA_NAME = "model.data"
+MANDATORY_DEPLOYMENT_FILES: List[str] = [
     MODEL_ONNX_NAME,
-    "tokenizer.json",
+    EXTERNAL_ONNX_DATA_NAME,
     "tokenizer_config.json",
     "config.json",
 ]
+OPTIONAL_DEPLOYMENT_FILES: List[str] = [EXTERNAL_ONNX_DATA_NAME, "tokenizer.json"]
 
 _LOGGER = logging.getLogger(__name__)
 
@@ -113,7 +115,12 @@ def load_task_model(task: str, model_path: str, config: Any) -> Module:
         return SparseAutoModel.question_answering_from_pretrained(
             model_name_or_path=model_path,
             config=config,
-            model_type="model",
+        )
+    if task == "text-generation" or task == "codegen" or task == "opt":
+        if task == "codegen":
+            raise NotImplementedError()
+        return SparseAutoModel.text_generation_from_pretrained(
+            model_name_or_path=model_path, config=config
         )
 
     if (
@@ -403,7 +410,9 @@ def create_deployment_folder(
 
     if deployment_files is None:
         # set deployment files to default values
-        deployment_files = copy.deepcopy(DEPLOYMENT_FILES)
+        deployment_files = copy.deepcopy(
+            MANDATORY_DEPLOYMENT_FILES + OPTIONAL_DEPLOYMENT_FILES
+        )
         if onnx_file_name != MODEL_ONNX_NAME:
             # replace the default onnx model name with the custom one
             deployment_files[deployment_files.index(MODEL_ONNX_NAME)] = onnx_file_name
@@ -418,6 +427,12 @@ def create_deployment_folder(
         expected_file_path = os.path.join(training_directory, file_name)
         deployment_file_path = os.path.join(deployment_folder_dir, file_name)
         if not os.path.exists(expected_file_path):
+            if file_name in OPTIONAL_DEPLOYMENT_FILES:
+                _LOGGER.warning(
+                    f"Optional file {file_name} not found in {training_directory}. "
+                    f"Skipping copying to deployment folder."
+                )
+                continue
             raise ValueError(
                 f"Attempting to copy {file_name} file from {expected_file_path},"
                 f"but the file does not exits. Make sure that {training_directory} "
@@ -426,6 +441,9 @@ def create_deployment_folder(
         if file_name == MODEL_ONNX_NAME:
             # moving onnx file from training to deployment directory
             shutil.move(expected_file_path, deployment_file_path)
+        elif file_name == EXTERNAL_ONNX_DATA_NAME:
+            # moving external onnx tensors from training to deployment directory
+            shutil.move(expected_file_path, deployment_file_path)
         else:
             # copying remaining `deployment_files` from training to deployment directory
             shutil.copyfile(expected_file_path, deployment_file_path)

diff --git a/src/sparseml/transformers/utils/model.py b/src/sparseml/transformers/utils/model.py
@@ -20,6 +20,7 @@
 import torch
 from torch.nn import Module
 from transformers import (
+    AutoModelForCausalLM,
     AutoModelForMaskedLM,
     AutoModelForQuestionAnswering,
     AutoModelForSequenceClassification,
@@ -111,6 +112,18 @@ def masked_language_modeling_from_pretrained_distil(
 
         return model, teacher
 
+    @staticmethod
+    def text_generation_from_pretrained(
+        model_name_or_path: str,
+        config: Optional[Any] = None,
+        **kwargs,
+    ) -> Module:
+        """ """
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name_or_path, config=config, **kwargs
+        )
+        return model
+
     @staticmethod
     def question_answering_from_pretrained(
         model_name_or_path: str,

diff --git a/src/sparseml/yolov8/modules.py b/src/sparseml/yolov8/modules.py
@@ -0,0 +1,68 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch.nn as nn
+
+from ultralytics.nn import modules as ulm
+
+
+class Conv(nn.Module):
+    """
+    Slightly modified version of ultralytics Conv with SiLU instantiated
+    for each instance. This is to help with SiLU naming in SparseML recipe
+    """
+
+    def __init__(self, layer: ulm.Conv):
+        super().__init__()
+        self.conv = layer.conv
+        self.bn = layer.bn
+        is_silu = isinstance(layer.act, nn.SiLU)
+        self.act = nn.SiLU() if is_silu else layer.act
+
+    def forward(self, x):
+        return self.act(self.bn(self.conv(x)))
+
+    def forward_fuse(self, x):
+        return self.act(self.conv(x))
+
+
+class AddInput(nn.Module):
+    """
+    Equivalent to Identity for quantization support
+    """
+
+    def forward(self, x):
+        return x
+
+
+class Bottleneck(nn.Module):
+    """
+    Modified version of ultralyltics Bottleneck with inputs of the residual
+    adds being marked for potential quantization
+    """
+
+    def __init__(self, layer: ulm.Bottleneck):
+        super().__init__()
+        self.cv1 = layer.cv1
+        self.cv2 = layer.cv2
+        self.add = layer.add
+        self.add_input_0 = AddInput()
+        self.add_input_1 = AddInput()
+
+    def forward(self, x):
+        return (
+            self.add_input_0(x) + self.add_input_1(self.cv2(self.cv1(x)))
+            if self.add
+            else self.cv2(self.cv1(x))
+        )
diff --git a/src/sparseml/yolov8/trainers.py b/src/sparseml/yolov8/trainers.py
@@ -21,7 +21,7 @@
 from copy import copy, deepcopy
 from datetime import datetime
 from pathlib import Path
-from typing import Optional
+from typing import List, Optional
 
 import onnx
 import torch
@@ -31,6 +31,7 @@
 from sparseml.pytorch.utils import ModuleExporter
 from sparseml.pytorch.utils.helpers import download_framework_model_by_recipe_type
 from sparseml.pytorch.utils.logger import LoggerManager, PythonLogger, WANDBLogger
+from sparseml.yolov8.modules import Bottleneck, Conv
 from sparseml.yolov8.utils.export_samples import export_sample_inputs_outputs
 from sparseml.yolov8.validators import (
     SparseClassificationValidator,
@@ -200,11 +201,22 @@ def setup_model(self):
         LOGGER.info("Loaded previous weights from sparseml checkpoint")
         return ckpt
 
+    def _modify_arch_for_quantization(self):
+        layer_map = {"Bottleneck": Bottleneck, "Conv": Conv}
+        for name, layer in self.model.named_modules():
+            cls_name = layer.__class__.__name__
+            if cls_name in layer_map:
+                submodule_path = name.split(".")
+                parent_module = _get_submodule(self.model, submodule_path[:-1])
+                setattr(parent_module, submodule_path[-1], layer_map[cls_name](layer))
+
     def _build_managers(self, ckpt: Optional[dict]):
         if self.args.recipe is not None:
             self.manager = ScheduledModifierManager.from_yaml(
                 self.args.recipe, recipe_variables=self.args.recipe_args
             )
+            if self.manager.quantization_modifiers:
+                self._modify_arch_for_quantization()
 
         if ckpt is None:
             return
@@ -218,6 +230,8 @@ def _build_managers(self, ckpt: Optional[dict]):
                 f"at epoch {ckpt['epoch']}"
             )
             self.checkpoint_manager = ScheduledModifierManager.from_yaml(ckpt["recipe"])
+            if self.checkpoint_manager.quantization_modifiers:
+                self._modify_arch_for_quantization()
             self.checkpoint_manager.apply_structure(self.model, epoch=float("inf"))
 
         else:
@@ -228,6 +242,8 @@ def _build_managers(self, ckpt: Optional[dict]):
                 "Applying structure from un-finished recipe in checkpoint "
                 f"at epoch {ckpt['epoch']}"
             )
+            if self.manager.quantization_modifiers:
+                self._modify_arch_for_quantization()
             self.manager.apply_structure(self.model, epoch=ckpt["epoch"])
 
     def resume_training(self, ckpt):
@@ -730,3 +746,9 @@ def generate_ddp_file(trainer):
     ) as file:
         file.write(content)
     return file.name
+
+
+def _get_submodule(module: torch.nn.Module, path: List[str]) -> torch.nn.Module:
+    if not path:
+        return module
+    return _get_submodule(getattr(module, path[0]), path[1:])