[Export Refactor] End to end testing (#1898)

* initial commit * looking good, time to cleanup * Delete src/sparseml/export/helpers.py * Delete tests/sparseml/export/test_helpers.py * ready for review * improve design * tests pass * reuse _validate_dataset_num_classes * initial commit * Update src/sparseml/pytorch/image_classification/integration_helper_functions.py * Update src/sparseml/pytorch/image_classification/integration_helper_functions.py * ready for review * Update src/sparseml/export/export.py * Update src/sparseml/integration_helper_functions.py * initial commit * fixes * ready for review * nit * add return * initial commit * initial commit * initial commit * initial commit * Delete tests/sparseml/test_integration_helper_functions.py * ready to merge * add structure validator * ready for review * Delete tests/sparseml/export/model.onnx * Delete tests/sparseml/export/image_classification/model.onnx * Delete tests/sparseml/export/image_classification/conftest.py * PR comments * remove onnx
neuralmagic · Dec 14, 2023 · fcd361b · fcd361b
1 parent d8f1174
commit fcd361b
Show file tree

Hide file tree

Showing 12 changed files with 613 additions and 227 deletions.
diff --git a/src/sparseml/export/export.py b/src/sparseml/export/export.py
@@ -16,19 +16,21 @@
 from pathlib import Path
 from typing import Any, List, Optional, Union
 
+from sparseml.export.export_data import export_data_samples
 from sparseml.export.helpers import (
     AVAILABLE_DEPLOYMENT_TARGETS,
     ONNX_MODEL_NAME,
     apply_optimizations,
     create_deployment_folder,
 )
-from sparseml.export.validate_correctness import validate_correctness
-from sparseml.export_data import export_data_samples
-from sparseml.integration_helper_functions import (
+from sparseml.export.validators import validate_correctness as validate_correctness_
+from sparseml.export.validators import validate_structure as validate_structure_
+from sparseml.pytorch.opset import TORCH_DEFAULT_ONNX_OPSET
+from sparseml.pytorch.utils.helpers import default_device
+from src.sparseml.integration_helper_functions import (
     IntegrationHelperFunctions,
-    infer_integration,
+    resolve_integration,
 )
-from sparseml.pytorch.opset import TORCH_DEFAULT_ONNX_OPSET
 
 
 _LOGGER = logging.getLogger(__name__)
@@ -37,135 +39,195 @@
 def export(
     source_path: Union[Path, str],
     target_path: Union[Path, str],
-    model_onnx_name: str = ONNX_MODEL_NAME,
+    onnx_model_name: str = ONNX_MODEL_NAME,
     deployment_target: str = "deepsparse",
-    integration: Optional[str] = None,
-    sample_data: Optional[Any] = None,
     opset: int = TORCH_DEFAULT_ONNX_OPSET,
-    batch_size: Optional[int] = None,
     single_graph_file: bool = True,
-    graph_optimizations: Union[str, List[str], None] = "all",
-    validate_model_correctness: bool = False,
     num_export_samples: int = 0,
     deployment_directory_name: str = "deployment",
     device: str = "auto",
+    graph_optimizations: Union[str, List[str], None] = "all",
+    validate_correctness: bool = False,
+    validate_structure: bool = True,
+    integration: Optional[str] = None,
+    sample_data: Optional[Any] = None,
+    batch_size: Optional[int] = None,
+    **kwargs,
 ):
     """
-    Export a PyTorch model to a deployment target specified by the `deployment_target`.
-
-    The functionality follows a set of steps:
-    1. Create a PyTorch model from the file located in source_path.
-    2. Create model dummy input.
-    3. Export the model to the format specified by the `deployment_target`.
-    4. (Optional) Apply optimizations to the exported model.
-    5. Export sample inputs and outputs for the exported model (optional).
-    6. Create a deployment folder for the exported model with the appropriate structure.
-    7. Validate the correctness of the exported model (optional).
+    Export a PyTorch model located in source_path, to target_path.
+    The deployment files will be located at target_path/deployment_directory_name
+
+    The exporting logic consists of the following steps:
+    1. Create the model and validation dataloader (if needed) using the
+        integration-specific `create_model` function.
+    2. Export the model to ONNX using the integration-specific `export` function.
+    3. Apply the graph optimizations to the exported model.
+    4. Create the deployment folder at target_path/deployment_directory_name
+        using the integration-specific `create_deployment_folder` function.
+    5. Optionally, export samples using the integration-specific
+        `create_data_samples` function.
+    6. Optionally, validate the correctness of the exported model using
+        the integration-specific `validate_correctness` function.
+    7. Optionally, validate the structure of the exported model using
+        the integration-specific `validate_structure` function.
 
     :param source_path: The path to the PyTorch model to export.
     :param target_path: The path to save the exported model to.
-    :param model_onnx_name: The name of the exported model.
+    :param onnx_model_name: The name of the exported model.
         Defaults to ONNX_MODEL_NAME.
     :param deployment_target: The deployment target to export
         the model to. Defaults to 'deepsparse'.
-    :param integration: The name of the integration to use for
-        exporting the model.Defaults to None, which will infer
-        the integration from the source_path.
-    :param sample_data: Optional sample data to use for exporting
-        the model. If not provided, a dummy input will be created
-        for the model. Defaults to None.
     :param opset: The ONNX opset to use for exporting the model.
         Defaults to the latest supported opset.
-    :param batch_size: The batch size to use for exporting the model.
-        Defaults to None.
     :param single_graph_file: Whether to save the model as a single
-        file (that contains both the model graph and model weights).
-        Defaults to True.
-    :param graph_optimizations: The graph optimizations to apply
-        to the exported model. Defaults to 'all'.
-    :param validate_model_correctness: Whether to validate the correctness
-        of the exported model. Defaults to False.
-    :param num_export_samples: The number of samples to export for
+        file. Defaults to True.
+    :param num_export_samples: The number of samples to create for
         the exported model. Defaults to 0.
     :param deployment_directory_name: The name of the deployment
         directory to create for the exported model. Thus, the exported
         model will be saved to `target_path/deployment_directory_name`.
         Defaults to 'deployment'.
     :param device: The device to use for exporting the model.
         Defaults to 'auto'.
+    :param graph_optimizations: The graph optimizations to apply
+        to the exported model. Defaults to 'all'.
+    :param validate_correctness: Whether to validate the correctness
+        of the exported model. Defaults to False.
+    :param validate_structure: Whether to validate the structure
+        of the exporter model (contents of the target_path).
+    :param integration: The name of the integration to use for
+        exporting the model.Defaults to None, which will infer
+        the integration from the source_path.
+    :param sample_data: Optional sample data to use for exporting
+        the model. If not provided, a dummy input will be created
+        for the model. Defaults to None.
+    :param batch_size: The batch size to use for exporting the data.
+        Defaults to None.
     """
 
+    # create the target path if it doesn't exist
+    if not Path(target_path).exists():
+        Path(target_path).mkdir(parents=True, exist_ok=True)
+
+    # choose the appropriate device
+    device = default_device() if device == "auto" else device
+
+    # assert the valid deployment target
     if deployment_target not in AVAILABLE_DEPLOYMENT_TARGETS:
         raise ValueError(
             "Argument: deployment_target must be "
             f"one of {AVAILABLE_DEPLOYMENT_TARGETS}. "
             f"Got {deployment_target} instead."
         )
 
-    integration = integration or infer_integration(source_path)
+    integration = resolve_integration(source_path, integration)
+
+    _LOGGER.info(f"Starting export for {integration} model...")
+
     helper_functions: IntegrationHelperFunctions = (
         IntegrationHelperFunctions.load_from_registry(integration)
     )
 
-    # for now, this code is not runnable, serves as a blueprint
-    model, auxiliary_items = helper_functions.create_model(
-        source_path, **kwargs  # noqa: F821
+    _LOGGER.info("Creating model for the export...")
+    model, validation_dataloader = helper_functions.create_model(
+        source_path, batch_size, device, **kwargs
     )
+
+    if validation_dataloader:
+        _LOGGER.info("Created validation dataloader for the export")
+    else:
+        _LOGGER.warning(
+            "Failed to create validation dataloader for the export. "
+            "Will be using the dummy (or user-provided) data instead "
+            "and will be not able to export samples or validate the model "
+            "correctness."
+        )
+
     sample_data = (
-        helper_functions.create_dummy_input(**auxiliary_items)
+        helper_functions.create_dummy_input(
+            validation_dataloader=validation_dataloader, **kwargs
+        )
         if sample_data is None
         else sample_data
     )
-    onnx_file_path = helper_functions.export_model(
-        model, sample_data, target_path, deployment_target, opset
+
+    _LOGGER.info(f"Exporting {onnx_model_name} to {target_path}...")
+    onnx_file_path = helper_functions.export(
+        model=model,
+        sample_data=sample_data,
+        target_path=target_path,
+        onnx_model_name=onnx_model_name,
+        deployment_target=deployment_target,
+        opset=opset,
     )
+    _LOGGER.info(f"Successfully exported {onnx_model_name} to {target_path}...")
 
+    _LOGGER.info(
+        f"Applying optimizations: {graph_optimizations} to the exported model..."
+    )
     apply_optimizations(
         onnx_file_path=onnx_file_path,
-        graph_optimizations=graph_optimizations,
-        available_graph_optimizations=helper_functions.graph_optimizations,
+        target_optimizations=graph_optimizations,
+        available_optimizations=helper_functions.graph_optimizations,
         single_graph_file=single_graph_file,
     )
 
     if num_export_samples:
-        data_loader = auxiliary_items.get("validation_loader")
-        if data_loader is None:
+        _LOGGER.info(f"Exporting {num_export_samples} samples...")
+        if not validation_dataloader:
             raise ValueError(
-                "To export sample inputs/outputs a data loader is needed."
-                "To enable the export, provide a `validation_loader` "
-                "as a part of `auxiliary_items` output of the `create_model` function."
+                "To export sample inputs/outputs a data loader is needed. "
+                "To return a data loader provide the appropriate, integration-specific "
+                "arguments to `create_model` function"
             )
         (
             input_samples,
             output_samples,
             label_samples,
         ) = helper_functions.create_data_samples(
-            num_samples=num_export_samples, data_loader=data_loader, model=model
+            num_samples=num_export_samples,
+            data_loader=validation_dataloader,
+            model=model,
         )
         export_data_samples(
             input_samples=input_samples,
             output_samples=output_samples,
             label_samples=label_samples,
             target_path=target_path,
-            as_tar=True,
+            as_tar=False,
         )
 
+    _LOGGER.info(
+        f"Creating deployment folder {deployment_directory_name} "
+        f"at directory: {target_path}..."
+    )
+
     deployment_path = create_deployment_folder(
         source_path=source_path,
         target_path=target_path,
         deployment_directory_name=deployment_directory_name,
         deployment_directory_files=helper_functions.deployment_directory_structure,
-        onnx_model_name=model_onnx_name,
+        onnx_model_name=onnx_model_name,
     )
+    if validate_structure:
+        _LOGGER.info("Validating model structure...")
+        validate_structure_(
+            target_path=target_path,
+            deployment_directory_name=deployment_directory_name,
+            onnx_model_name=onnx_model_name,
+            deployment_directory_files=helper_functions.deployment_directory_structure,
+        )
 
-    if validate_model_correctness:
+    if validate_correctness:
+        _LOGGER.info("Validating model correctness...")
         if not num_export_samples:
             raise ValueError(
                 "To validate correctness sample inputs/outputs are needed."
                 "To enable the validation, set `num_export_samples`"
                 "to True"
             )
-        validate_correctness(deployment_path, model_onnx_name)
+        validate_correctness_(target_path, deployment_path, onnx_model_name)
 
     _LOGGER.info(
         f"Successfully exported model from:\n{target_path}"

diff --git a/src/sparseml/export/export_data.py b/src/sparseml/export/export_data.py
@@ -20,10 +20,13 @@
 from pathlib import Path
 from typing import List, Optional, Tuple, Union
 
+import torch
 from tqdm import tqdm
 
+from sparseml.pytorch.utils.helpers import tensors_export, tensors_to_device
 
-__all__ = ["create_data_samples"]
+
+__all__ = ["create_data_samples", "export_data_samples"]
 
 _LOGGER = logging.getLogger(__name__)
 
@@ -44,30 +47,37 @@ class InputsNames(Enum):
 
 
 def create_data_samples(
-    data_loader: "torch.utils.data.DataLoader",  # noqa F821
-    model: Optional["torch.nn.Module"] = None,  # noqa F821
+    data_loader: torch.utils.data.DataLoader,
+    model: Optional[torch.nn.Module] = None,
     num_samples: int = 1,
-) -> Tuple[
-    List["torch.Tensor"],  # noqa F821
-    Optional[List["torch.Tensor"]],  # noqa F821
-    List["torch.Tensor"],  # noqa F821
-]:
+) -> Tuple[List[torch.Tensor], List[torch.Tensor], List[torch.Tensor]]:
     """
     Fetch a batch of samples from the data loader and return the inputs and outputs
 
     :param data_loader: The data loader to get a batch of inputs/outputs from.
+    :param model: The model to run the inputs through to get the outputs.
+        If None, the outputs will be an empty list.
     :param num_samples: The number of samples to generate. Defaults to 1
     :return: The inputs and outputs as lists of torch tensors
     """
     inputs, outputs, labels = [], [], []
+    if model is None:
+        _LOGGER.warning("The model is None. The list of outputs will be empty")
     for batch_num, (inputs_, labels_) in tqdm(enumerate(data_loader)):
         if batch_num == num_samples:
             break
         if model:
             outputs_ = model(inputs_)
+            if isinstance(outputs_, tuple):
+                # outputs_ contains (logits, softmax)
+                outputs_ = outputs_[0]
             outputs.append(outputs_)
         inputs.append(inputs_)
-        labels.append(labels_)
+        labels.append(
+            torch.IntTensor([labels_])
+            if not isinstance(labels_, torch.Tensor)
+            else labels_
+        )
 
     return inputs, outputs, labels
 
@@ -115,14 +125,16 @@ def export_data_samples(
         [InputsNames, OutputsNames, LabelNames],
     ):
         if samples is not None:
-            _LOGGER.info(f"Exporting {names.basename.value} to {target_path}")
+            _LOGGER.info(f"Exporting {names.basename.value} to {target_path}...")
             export_data_sample(samples, names, target_path, as_tar)
+            _LOGGER.info(
+                f"Successfully exported {names.basename.value} to {target_path}!"
+            )
 
 
 def export_data_sample(
     samples, names: Enum, target_path: Union[Path, str], as_tar: bool = False
 ):
-    from sparseml.pytorch.utils.helpers import tensors_export, tensors_to_device
 
     samples = tensors_to_device(samples, "cpu")
 

diff --git a/src/sparseml/export/export_torch_model.py b/src/sparseml/export/export_torch_model.py
@@ -16,6 +16,7 @@
 from pathlib import Path
 from typing import Union
 
+import onnx
 import torch
 
 from sparseml.exporters import ExportTargets
@@ -50,15 +51,15 @@ def export_model(
     """
 
     model.eval()
-
+    path_to_exported_model = os.path.join(target_path, onnx_model_name)
     exporter = TorchToONNX(sample_batch=sample_data, opset=opset, **kwargs)
-    exporter.export(model, os.path.join(target_path, onnx_model_name))
+    exporter.export(model, path_to_exported_model)
     if deployment_target == ExportTargets.deepsparse.value:
         exporter = ONNXToDeepsparse()
-        model = exporter.load_model(os.path.join(target_path, onnx_model_name))
-        exporter.export(model, os.path.join(target_path, onnx_model_name))
+        model = onnx.load(path_to_exported_model)
+        exporter.export(model, path_to_exported_model)
+        return path_to_exported_model
     if deployment_target == ExportTargets.onnx.value:
-        pass
+        return path_to_exported_model
     else:
         raise ValueError(f"Unsupported deployment target: {deployment_target}")
-    return os.path.join(target_path, onnx_model_name)