neuralmagic · dbogunowicz · May 11, 2023 · Mar 29, 2023 · Apr 10, 2023 · Apr 10, 2023
diff --git a/src/sparseml/pytorch/utils/exporter.py b/src/sparseml/pytorch/utils/exporter.py
@@ -570,7 +570,23 @@ def export_onnx(
 
         # clean up graph from any injected / wrapped operations
         _delete_trivial_onnx_adds(onnx_model)
-    onnx.save(onnx_model, file_path)
+
+    try:
+        # alternatively we can just check the size of the onnx file on disk
+        onnx.save(onnx_model, file_path)
+
+    except Exception as e:
+        _LOGGER.info(
+            "Attempted to save ONNX model without external data."
+            f"Results in error: {e}. Saving with external data instead."
+        )
+        onnx.save(
+            onnx_model,
+            file_path,
+            location=os.path.basename(file_path).replace("onnx", "data"),
+            save_as_external_data=True,
+            all_tensors_to_one_file=True,
+        )
 
     if convert_qat and is_quant_module:
         # overwrite exported model with fully quantized version

diff --git a/src/sparseml/transformers/export.py b/src/sparseml/transformers/export.py
@@ -91,12 +91,13 @@
 __all__ = ["export_transformer_to_onnx", "load_task_model"]
 
 MODEL_ONNX_NAME = "model.onnx"
-DEPLOYMENT_FILES: List[str] = [
+EXTERNAL_ONNX_DATA_NAME = "model.data"
+MANDATORY_DEPLOYMENT_FILES: List[str] = [
     MODEL_ONNX_NAME,
-    "tokenizer.json",
     "tokenizer_config.json",
     "config.json",
 ]
+OPTIONAL_DEPLOYMENT_FILES: List[str] = [EXTERNAL_ONNX_DATA_NAME, "tokenizer.json"]
 
 _LOGGER = logging.getLogger(__name__)
 
@@ -113,7 +114,12 @@ def load_task_model(task: str, model_path: str, config: Any) -> Module:
         return SparseAutoModel.question_answering_from_pretrained(
             model_name_or_path=model_path,
             config=config,
-            model_type="model",
+        )
+    if task == "text-generation" or task == "codegen" or task == "opt" or task=="bloom":
+        if task not in ["text-generation", "opt"]:
+            raise NotImplementedError()
+        return SparseAutoModel.text_generation_from_pretrained(
+            model_name_or_path=model_path, config=config
         )
 
     if (
@@ -403,7 +409,9 @@ def create_deployment_folder(
 
     if deployment_files is None:
         # set deployment files to default values
-        deployment_files = copy.deepcopy(DEPLOYMENT_FILES)
+        deployment_files = copy.deepcopy(
+            MANDATORY_DEPLOYMENT_FILES + OPTIONAL_DEPLOYMENT_FILES
+        )
         if onnx_file_name != MODEL_ONNX_NAME:
             # replace the default onnx model name with the custom one
             deployment_files[deployment_files.index(MODEL_ONNX_NAME)] = onnx_file_name
@@ -418,6 +426,12 @@ def create_deployment_folder(
         expected_file_path = os.path.join(training_directory, file_name)
         deployment_file_path = os.path.join(deployment_folder_dir, file_name)
         if not os.path.exists(expected_file_path):
+            if file_name in OPTIONAL_DEPLOYMENT_FILES:
+                _LOGGER.warning(
+                    f"Optional file {file_name} not found in {training_directory}. "
+                    f"Skipping copying to deployment folder."
+                )
+                continue
             raise ValueError(
                 f"Attempting to copy {file_name} file from {expected_file_path},"
                 f"but the file does not exits. Make sure that {training_directory} "
@@ -426,6 +440,9 @@ def create_deployment_folder(
         if file_name == MODEL_ONNX_NAME:
             # moving onnx file from training to deployment directory
             shutil.move(expected_file_path, deployment_file_path)
+        elif file_name == EXTERNAL_ONNX_DATA_NAME:
+            # moving external onnx tensors from training to deployment directory
+            shutil.move(expected_file_path, deployment_file_path)
         else:
             # copying remaining `deployment_files` from training to deployment directory
             shutil.copyfile(expected_file_path, deployment_file_path)

diff --git a/src/sparseml/transformers/utils/model.py b/src/sparseml/transformers/utils/model.py
@@ -20,6 +20,7 @@
 import torch
 from torch.nn import Module
 from transformers import (
+    AutoModelForCausalLM,
     AutoModelForMaskedLM,
     AutoModelForQuestionAnswering,
     AutoModelForSequenceClassification,
@@ -111,6 +112,18 @@ def masked_language_modeling_from_pretrained_distil(
 
         return model, teacher
 
+    @staticmethod
+    def text_generation_from_pretrained(
+        model_name_or_path: str,
+        config: Optional[Any] = None,
+        **kwargs,
+    ) -> Module:
+        """ """
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name_or_path, config=config, **kwargs
+        )
+        return model
+
     @staticmethod
     def question_answering_from_pretrained(
         model_name_or_path: str,