Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Export Refactor][Image Classification] export_sample_inputs_outputs function #1888

Merged
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
6d8991c
initial commit
dbogunowicz Dec 5, 2023
059ceab
looking good, time to cleanup
dbogunowicz Dec 6, 2023
b390c2e
Delete src/sparseml/export/helpers.py
dbogunowicz Dec 6, 2023
c2c8444
Delete tests/sparseml/export/test_helpers.py
dbogunowicz Dec 6, 2023
6ce6ba5
ready for review
dbogunowicz Dec 6, 2023
6f3e5e7
Merge branch 'feature/damian/create_model_ic' of github.com:neuralmag…
dbogunowicz Dec 6, 2023
5dfbdcd
improve design
dbogunowicz Dec 6, 2023
042c193
tests pass
dbogunowicz Dec 6, 2023
29cfa1d
reuse _validate_dataset_num_classes
dbogunowicz Dec 6, 2023
ab73aec
initial commit
dbogunowicz Dec 6, 2023
f628532
Update src/sparseml/pytorch/image_classification/integration_helper_f…
dbogunowicz Dec 6, 2023
b93b634
Update src/sparseml/pytorch/image_classification/integration_helper_f…
dbogunowicz Dec 6, 2023
e7606cd
ready for review
dbogunowicz Dec 7, 2023
ea9cb61
Update src/sparseml/export/export.py
dbogunowicz Dec 7, 2023
9572e0b
Update src/sparseml/integration_helper_functions.py
dbogunowicz Dec 7, 2023
0229deb
initial commit
dbogunowicz Dec 7, 2023
a8c1b68
fixes
dbogunowicz Dec 7, 2023
2379354
ready for review
dbogunowicz Dec 7, 2023
741fb12
nit
dbogunowicz Dec 7, 2023
ebdeb9f
add return
dbogunowicz Dec 7, 2023
8b2fca0
initial commit
dbogunowicz Dec 7, 2023
ff52598
initial commit
dbogunowicz Dec 8, 2023
50ca948
PR comments
dbogunowicz Dec 11, 2023
2f71f7b
beautification
dbogunowicz Dec 11, 2023
cabc17e
Merge remote-tracking branch 'origin/feature/damian/feature_branch_ex…
dbogunowicz Dec 11, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/sparseml/core/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def pre_initialize_structure(
This will run the pre-initialize structure method for each modifier in the
session's lifecycle. This will also set the session's state to the
pre-initialized state. Takes care of cases when the model(s) structure
has been previosuly modified by a modifier.
has been previously modified by a modifier.

:param model: the model to pre-initialize the structure for
:param recipe: the recipe to use for the sparsification, can be a path to a
Expand Down
13 changes: 13 additions & 0 deletions src/sparseml/export/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
118 changes: 49 additions & 69 deletions src/sparseml/export.py → src/sparseml/export/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,56 +14,26 @@

import logging
from pathlib import Path
from typing import Any, Callable, List, Optional, Union

from pydantic import BaseModel, Field

from typing import Any, List, Optional, Union

from sparseml.export.helpers import apply_optimizations, export_sample_inputs_outputs
from sparseml.exporters import ExportTargets
from sparseml.integration_helper_functions import (
IntegrationHelperFunctions,
infer_integration,
)
from sparseml.pytorch.opset import TORCH_DEFAULT_ONNX_OPSET
from sparsezoo.utils.registry import RegistryMixin


_LOGGER = logging.getLogger(__name__)
AVAILABLE_DEPLOYMENT_TARGETS = ["deepsparse", "onnxruntime"]


class IntegrationHelperFunctions(BaseModel, RegistryMixin):
"""
Registry that maps integration names to helper functions
for creation/export/manipulation of models for a specific
integration.
"""

create_model: Optional[Callable] = Field(
description="A function that creates a (sparse) "
"PyTorch model from a source path."
)
create_dummy_input: Optional[Callable] = Field(
description="A function that creates a dummy input "
"given a (sparse) PyTorch model."
)
export_model: Optional[Callable] = Field(
description="A function that exports a (sparse) PyTorch "
"model to an ONNX format appropriate for a "
"deployment target."
)
apply_optimizations: Optional[Callable] = Field(
description="A function that takes a set of "
"optimizations and applies them to an ONNX model."
)
export_sample_inputs_outputs: Optional[Callable] = Field(
description="A function that exports input/output samples given "
"a (sparse) PyTorch model."
)
create_deployment_folder: Optional[Callable] = Field(
description="A function that creates a "
"deployment folder for the exporter ONNX model"
"with the appropriate structure."
)
AVAILABLE_DEPLOYMENT_TARGETS = [target.value for target in ExportTargets]
ONNX_MODEL_NAME = "model.onnx"


def export(
source_path: Union[Path, str],
target_path: Union[Path, str],
model_onnx_name: str = ONNX_MODEL_NAME,
deployment_target: str = "deepsparse",
integration: Optional[str] = None,
sample_data: Optional[Any] = None,
Expand All @@ -72,25 +42,26 @@ def export(
single_graph_file: bool = True,
graph_optimizations: Union[str, List[str], None] = "all",
validate_correctness: bool = False,
export_sample_inputs_outputs: bool = False,
num_export_samples: int = 0,
deployment_directory_name: str = "deployment",
device: str = "auto",
):
"""
Export a PyTorch model to a deployment target specified by the `deployment_target`.

The functionality follows a set of steps:
1. Create a PyTorch model from the source_path.
2. Create a dummy input for the model.
3. Export the model, using the precomputed dummy input, to an
ONNX format appropriate for the deployment target.
4. Apply optimizations to the exported model (optional).
1. Create a PyTorch model from the file located in source_path.
2. Create model dummy input.
3. Export the model to the format specified by the `deployment_target`.
4. (Optional) Apply optimizations to the exported model.
5. Export sample inputs and outputs for the exported model (optional).
6. Create a deployment folder for the exported model with the appropriate structure.
7. Validate the correctness of the exported model (optional).

:param source_path: The path to the PyTorch model to export.
:param target_path: The path to save the exported model to.
:param model_onnx_name: The name of the exported model.
Defaults to ONNX_MODEL_NAME.
:param deployment_target: The deployment target to export
the model to. Defaults to 'deepsparse'.
:param integration: The name of the integration to use for
Expand All @@ -110,8 +81,8 @@ def export(
to the exported model. Defaults to 'all'.
:param validate_correctness: Whether to validate the correctness
of the exported model. Defaults to False.
:param export_sample_inputs_outputs: Whether to export sample
inputs and outputs for the exported model.Defaults to False.
:param num_export_samples: The number of samples to export for
the exported model. Defaults to 0.
:param deployment_directory_name: The name of the deployment
directory to create for the exported model. Thus, the exported
model will be saved to `target_path/deployment_directory_name`.
Expand All @@ -132,20 +103,43 @@ def export(
IntegrationHelperFunctions.load_from_registry(integration)
)

model = helper_functions.create_model(source_path, device)
# for now, this code is not runnable, serves as a blueprint
model, auxiliary_items = helper_functions.create_model(
source_path, **kwargs # noqa: F821
)
sample_data = (
helper_functions.create_dummy_input(model, batch_size)
helper_functions.create_dummy_input(**auxiliary_items)
if sample_data is None
else sample_data
)
onnx_file_path = helper_functions.export_model(
model, sample_data, target_path, deployment_target, opset, single_graph_file
model, sample_data, target_path, deployment_target, opset
)

helper_functions.apply_optimizations(onnx_file_path, graph_optimizations)
apply_optimizations(
onnx_file_path=onnx_file_path,
graph_optimizations=graph_optimizations,
available_graph_optimizations=helper_functions.graph_optimizations,
single_graph_file=single_graph_file,
)

if export_sample_inputs_outputs:
helper_functions.export_sample_inputs_outputs(model, target_path)
if num_export_samples:
data_loader = auxiliary_items.get("validation_loader")
if data_loader is None:
raise ValueError(
"To export sample inputs/outputs a data loader is needed."
"To enable the export, provide a `validatation_loader` "
"as a part of `auxiliary_items` output of the `create_model` function."
)
input_samples, output_samples = helper_functions.create_sample_inputs_outputs(
num_samples=num_export_samples, data_loader=data_loader
)
export_sample_inputs_outputs(
input_samples=input_samples,
output_samples=output_samples,
target_path=target_path,
as_tar=True,
)

deployment_path = helper_functions.create_deployment_folder(
source_path, target_path, deployment_directory_name
Expand All @@ -166,20 +160,6 @@ def export(
)


def infer_integration(source_path: Union[Path, str]) -> str:
"""
Infer the integration to use for exporting the model from the source_path.
For example:
- for transformers model the integration
can be inferred from `config.json`
- for computer vision, the integration
can be inferred from the model architecture (`arch_key`)
:param source_path: The path to the PyTorch model to export.
:return: The name of the integration to use for exporting the model.
"""
raise NotImplementedError


def validate_correctness(deployment_path: Union[Path, str]):
"""
Validate the correctness of the exported model.
Expand Down
172 changes: 172 additions & 0 deletions src/sparseml/export/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import shutil
import tarfile
from collections import OrderedDict
from enum import Enum
from pathlib import Path
from typing import Callable, List, Union

import onnx

from sparsezoo.utils.onnx import save_onnx


__all__ = ["apply_optimizations", "export_sample_inputs_outputs"]


class GraphOptimizationOptions(Enum):
"""
Holds the string names of the graph optimization options.
"""

none = "none"
all = "all"


class OutputsNames(Enum):
basename = "sample-outputs"
filename = "out"


class InputsNames(Enum):
basename = "sample-inputs"
filename = "inp"


def export_sample_inputs_outputs(
input_samples: List["torch.Tensor"], # noqa F821
output_samples: List["torch.Tensor"], # noqa F821
target_path: Union[Path, str],
as_tar: bool = False,
):
"""
Save the input and output samples to the target path.

Input samples will be saved to:
.../sample-inputs/inp_0001.npz
.../sample-inputs/inp_0002.npz
...

Output samples will be saved to:
.../sample-outputs/out_0001.npz
.../sample-outputs/out_0002.npz
...

If as_tar is True, the samples will be saved as tar files:
.../sample-inputs.tar.gz
.../sample-outputs.tar.gz

:param input_samples: The input samples to save.
:param output_samples: The output samples to save.
:param target_path: The path to save the samples to.
:param as_tar: Whether to save the samples as tar files.
"""

from sparseml.pytorch.utils.helpers import tensors_export, tensors_to_device

input_samples = tensors_to_device(input_samples, "cpu")
output_samples = tensors_to_device(output_samples, "cpu")

for tensors, names in zip(
[input_samples, output_samples], [InputsNames, OutputsNames]
):
tensors_export(
tensors=tensors,
export_dir=os.path.join(target_path, names.basename.value),
name_prefix=names.filename.value,
)
if as_tar:
for folder_name_to_tar in [
InputsNames.basename.value,
OutputsNames.basename.value,
]:
folder_path = os.path.join(target_path, folder_name_to_tar)
with tarfile.open(folder_path + ".tar.gz", "w:gz") as tar:
tar.add(folder_path, arcname=os.path.basename(folder_path))
shutil.rmtree(folder_path)


def apply_optimizations(
onnx_file_path: Union[str, Path],
available_optimizations: OrderedDict[str, Callable],
target_optimizations: Union[str, List[str]] = GraphOptimizationOptions.all.value,
single_graph_file: bool = True,
):
"""
Apply optimizations to the graph of the ONNX model.

:param onnx_file_path: The path to the ONNX model file.
:param available_optimizations: The graph optimizations available
for the model. It is an ordered mapping from the string names
to functions that alter the model
:param target_optimizations: The name(s) of optimizations to apply.
It can be either a list of string name or a single string option
that specifies the set of optimizations to apply.
If is string, refer to the `GraphOptimizationOptions` enum
for the available options.
:param single_graph_file: Whether to save the optimized graph to a single
file or split it into multiple files. By default, it is True.
"""
optimizations: List[Callable] = resolve_graph_optimizations(
available_optimizations=available_optimizations,
optimizations=target_optimizations,
)

onnx_model = onnx.load(onnx_file_path)

for optimization in optimizations:
onnx_model = optimization(onnx_model)

if single_graph_file:
save_onnx(onnx_model, onnx_file_path)
return

save_onnx_multiple_files(onnx_model)


def resolve_graph_optimizations(
available_optimizations: OrderedDict[str, Callable],
optimizations: Union[str, List[str]],
) -> List[Callable]:
"""
Get the optimization functions to apply to the onnx model.

:param available_optimizations: The graph optimizations available
for the model. It is an ordered mapping from the string names
to functions that alter the model
:param optimizations: The name(s) of optimizations to apply.
It can be either a list of string name or a single string option
that specifies the set of optimizations to apply.
If is string, refer to the `GraphOptimizationOptions` enum
for the available options.
return The list of optimization functions to apply.
"""
if isinstance(optimizations, str):
if optimizations == GraphOptimizationOptions.none.value:
return []
elif optimizations == GraphOptimizationOptions.all.value:
return list(available_optimizations.values())
else:
raise KeyError(f"Unknown graph optimization option: {optimizations}")
elif isinstance(optimizations, list):
return [available_optimizations[optimization] for optimization in optimizations]
else:
raise KeyError(f"Unknown graph optimization option: {optimizations}")


# TODO: To discuss with @bfineran
def save_onnx_multiple_files(*args, **kwargs):
raise NotImplementedError
11 changes: 11 additions & 0 deletions src/sparseml/exporters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,14 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from enum import Enum


class ExportTargets(Enum):
"""
Holds the names of the supported export targets
"""

deepsparse = "deepsparse"
onnx = "onnx"
Loading