Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support exporting > 2Gb transformer models #1514

Merged
merged 14 commits into from
May 11, 2023
18 changes: 17 additions & 1 deletion src/sparseml/pytorch/utils/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,23 @@ def export_onnx(

# clean up graph from any injected / wrapped operations
_delete_trivial_onnx_adds(onnx_model)
onnx.save(onnx_model, file_path)
dbogunowicz marked this conversation as resolved.
Show resolved Hide resolved

try:
# alternatively we can just check the size of the onnx file on disk
dbogunowicz marked this conversation as resolved.
Show resolved Hide resolved
onnx.save(onnx_model, file_path)

except Exception as e:
_LOGGER.info(
"Attempted to save ONNX model without external data."
f"Results in error: {e}. Saving with external data instead."
)
onnx.save(
onnx_model,
file_path,
location=os.path.basename(file_path).replace("onnx", "data"),
save_as_external_data=True,
all_tensors_to_one_file=True,
)

if convert_qat and is_quant_module:
# overwrite exported model with fully quantized version
Expand Down
26 changes: 22 additions & 4 deletions src/sparseml/transformers/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,14 @@
__all__ = ["export_transformer_to_onnx", "load_task_model"]

MODEL_ONNX_NAME = "model.onnx"
dbogunowicz marked this conversation as resolved.
Show resolved Hide resolved
DEPLOYMENT_FILES: List[str] = [
EXTERNAL_ONNX_DATA_NAME = "model.data"
dbogunowicz marked this conversation as resolved.
Show resolved Hide resolved
MANDATORY_DEPLOYMENT_FILES: List[str] = [
MODEL_ONNX_NAME,
"tokenizer.json",
EXTERNAL_ONNX_DATA_NAME,
"tokenizer_config.json",
"config.json",
]
OPTIONAL_DEPLOYMENT_FILES: List[str] = [EXTERNAL_ONNX_DATA_NAME, "tokenizer.json"]

_LOGGER = logging.getLogger(__name__)

Expand All @@ -113,7 +115,12 @@ def load_task_model(task: str, model_path: str, config: Any) -> Module:
return SparseAutoModel.question_answering_from_pretrained(
model_name_or_path=model_path,
config=config,
dbogunowicz marked this conversation as resolved.
Show resolved Hide resolved
model_type="model",
)
if task == "text-generation" or task == "codegen" or task == "opt":
if task == "codegen":
raise NotImplementedError()
return SparseAutoModel.text_generation_from_pretrained(
model_name_or_path=model_path, config=config
)

if (
Expand Down Expand Up @@ -403,7 +410,9 @@ def create_deployment_folder(

if deployment_files is None:
# set deployment files to default values
deployment_files = copy.deepcopy(DEPLOYMENT_FILES)
deployment_files = copy.deepcopy(
MANDATORY_DEPLOYMENT_FILES + OPTIONAL_DEPLOYMENT_FILES
)
if onnx_file_name != MODEL_ONNX_NAME:
# replace the default onnx model name with the custom one
deployment_files[deployment_files.index(MODEL_ONNX_NAME)] = onnx_file_name
Expand All @@ -418,6 +427,12 @@ def create_deployment_folder(
expected_file_path = os.path.join(training_directory, file_name)
deployment_file_path = os.path.join(deployment_folder_dir, file_name)
if not os.path.exists(expected_file_path):
if file_name in OPTIONAL_DEPLOYMENT_FILES:
_LOGGER.warning(
f"Optional file {file_name} not found in {training_directory}. "
f"Skipping copying to deployment folder."
)
continue
raise ValueError(
f"Attempting to copy {file_name} file from {expected_file_path},"
f"but the file does not exits. Make sure that {training_directory} "
Expand All @@ -426,6 +441,9 @@ def create_deployment_folder(
if file_name == MODEL_ONNX_NAME:
# moving onnx file from training to deployment directory
shutil.move(expected_file_path, deployment_file_path)
elif file_name == EXTERNAL_ONNX_DATA_NAME:
# moving external onnx tensors from training to deployment directory
shutil.move(expected_file_path, deployment_file_path)
else:
# copying remaining `deployment_files` from training to deployment directory
shutil.copyfile(expected_file_path, deployment_file_path)
Expand Down
13 changes: 13 additions & 0 deletions src/sparseml/transformers/utils/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import torch
from torch.nn import Module
from transformers import (
AutoModelForCausalLM,
AutoModelForMaskedLM,
AutoModelForQuestionAnswering,
AutoModelForSequenceClassification,
Expand Down Expand Up @@ -111,6 +112,18 @@ def masked_language_modeling_from_pretrained_distil(

return model, teacher

@staticmethod
def text_generation_from_pretrained(
model_name_or_path: str,
config: Optional[Any] = None,
**kwargs,
) -> Module:
""" """
model = AutoModelForCausalLM.from_pretrained(
model_name_or_path, config=config, **kwargs
)
return model

@staticmethod
def question_answering_from_pretrained(
model_name_or_path: str,
Expand Down
68 changes: 68 additions & 0 deletions src/sparseml/yolov8/modules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch.nn as nn

from ultralytics.nn import modules as ulm


class Conv(nn.Module):
"""
Slightly modified version of ultralytics Conv with SiLU instantiated
for each instance. This is to help with SiLU naming in SparseML recipe
"""

def __init__(self, layer: ulm.Conv):
super().__init__()
self.conv = layer.conv
self.bn = layer.bn
is_silu = isinstance(layer.act, nn.SiLU)
self.act = nn.SiLU() if is_silu else layer.act

def forward(self, x):
return self.act(self.bn(self.conv(x)))

def forward_fuse(self, x):
return self.act(self.conv(x))


class AddInput(nn.Module):
"""
Equivalent to Identity for quantization support
"""

def forward(self, x):
return x


class Bottleneck(nn.Module):
"""
Modified version of ultralyltics Bottleneck with inputs of the residual
adds being marked for potential quantization
"""

def __init__(self, layer: ulm.Bottleneck):
super().__init__()
self.cv1 = layer.cv1
self.cv2 = layer.cv2
self.add = layer.add
self.add_input_0 = AddInput()
self.add_input_1 = AddInput()

def forward(self, x):
return (
self.add_input_0(x) + self.add_input_1(self.cv2(self.cv1(x)))
if self.add
else self.cv2(self.cv1(x))
)
24 changes: 23 additions & 1 deletion src/sparseml/yolov8/trainers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from copy import copy, deepcopy
from datetime import datetime
from pathlib import Path
from typing import Optional
from typing import List, Optional

import onnx
import torch
Expand All @@ -31,6 +31,7 @@
from sparseml.pytorch.utils import ModuleExporter
from sparseml.pytorch.utils.helpers import download_framework_model_by_recipe_type
from sparseml.pytorch.utils.logger import LoggerManager, PythonLogger, WANDBLogger
from sparseml.yolov8.modules import Bottleneck, Conv
from sparseml.yolov8.utils.export_samples import export_sample_inputs_outputs
from sparseml.yolov8.validators import (
SparseClassificationValidator,
Expand Down Expand Up @@ -200,11 +201,22 @@ def setup_model(self):
LOGGER.info("Loaded previous weights from sparseml checkpoint")
return ckpt

def _modify_arch_for_quantization(self):
layer_map = {"Bottleneck": Bottleneck, "Conv": Conv}
for name, layer in self.model.named_modules():
cls_name = layer.__class__.__name__
if cls_name in layer_map:
submodule_path = name.split(".")
parent_module = _get_submodule(self.model, submodule_path[:-1])
setattr(parent_module, submodule_path[-1], layer_map[cls_name](layer))

def _build_managers(self, ckpt: Optional[dict]):
if self.args.recipe is not None:
self.manager = ScheduledModifierManager.from_yaml(
self.args.recipe, recipe_variables=self.args.recipe_args
)
if self.manager.quantization_modifiers:
self._modify_arch_for_quantization()

if ckpt is None:
return
Expand All @@ -218,6 +230,8 @@ def _build_managers(self, ckpt: Optional[dict]):
f"at epoch {ckpt['epoch']}"
)
self.checkpoint_manager = ScheduledModifierManager.from_yaml(ckpt["recipe"])
if self.checkpoint_manager.quantization_modifiers:
self._modify_arch_for_quantization()
self.checkpoint_manager.apply_structure(self.model, epoch=float("inf"))

else:
Expand All @@ -228,6 +242,8 @@ def _build_managers(self, ckpt: Optional[dict]):
"Applying structure from un-finished recipe in checkpoint "
f"at epoch {ckpt['epoch']}"
)
if self.manager.quantization_modifiers:
self._modify_arch_for_quantization()
self.manager.apply_structure(self.model, epoch=ckpt["epoch"])

def resume_training(self, ckpt):
Expand Down Expand Up @@ -730,3 +746,9 @@ def generate_ddp_file(trainer):
) as file:
file.write(content)
return file.name


def _get_submodule(module: torch.nn.Module, path: List[str]) -> torch.nn.Module:
if not path:
return module
return _get_submodule(getattr(module, path[0]), path[1:])