Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable SD XL ONNX export and ONNX Runtime inference #1168

Merged
merged 40 commits into from
Jul 17, 2023
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
691cee3
add stable diffusion XL export
echarlaix Jul 6, 2023
b2b8d73
fix style
echarlaix Jul 6, 2023
86bd2aa
fix test model name
echarlaix Jul 6, 2023
9930adb
merge main in branch
echarlaix Jul 6, 2023
d98e859
Fix issues related to merging with main
echarlaix Jul 6, 2023
8f8f595
fix
echarlaix Jul 6, 2023
ef7d65b
fix style
echarlaix Jul 6, 2023
4d4eded
remove clip with projection from test
echarlaix Jul 7, 2023
d813195
change model name
echarlaix Jul 7, 2023
e3b81d0
fix style
echarlaix Jul 7, 2023
4905eab
remove need create pretrainedconfig
echarlaix Jul 7, 2023
a6a68d0
fix style
echarlaix Jul 7, 2023
10497b3
fix dummy input generation
echarlaix Jul 7, 2023
89eb71c
fix style
echarlaix Jul 7, 2023
99755f2
fix style
echarlaix Jul 7, 2023
1777870
add saving second tokenzier when exporting a SD XL model
echarlaix Jul 7, 2023
4aab693
fix style
echarlaix Jul 7, 2023
044a1fa
add SD XL pipeline
echarlaix Jul 10, 2023
99c9736
fix style
echarlaix Jul 10, 2023
45d94c1
add test
echarlaix Jul 10, 2023
ffb337d
add watermarker
echarlaix Jul 10, 2023
3ad4355
fix style
echarlaix Jul 10, 2023
b53e3c3
fix style
echarlaix Jul 10, 2023
cd437ff
fix style
echarlaix Jul 10, 2023
77aaebf
add watermark
echarlaix Jul 11, 2023
40a62e7
add test
echarlaix Jul 11, 2023
02e0304
set default height width stable diffusion pipeline
echarlaix Jul 11, 2023
46d9d1d
add img2img
echarlaix Jul 11, 2023
080f2ba
disable img2img until added
echarlaix Jul 11, 2023
b5c8909
remove img2img
echarlaix Jul 11, 2023
e5d0bb4
add img2img
echarlaix Jul 11, 2023
74457ee
remove redundant
echarlaix Jul 11, 2023
bac78eb
fix style
echarlaix Jul 11, 2023
00d26ba
fix style
echarlaix Jul 11, 2023
fc48e94
enable to only have the second tokenizer and text encoder
echarlaix Jul 11, 2023
6c12200
add test
echarlaix Jul 12, 2023
c522840
minor
echarlaix Jul 12, 2023
13bdbe3
fix cli export
echarlaix Jul 13, 2023
a0d5592
Merge branch 'main' into sd-XL
echarlaix Jul 13, 2023
0426fa4
test for batch size > 1
echarlaix Jul 17, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 18 additions & 28 deletions optimum/exporters/onnx/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,7 @@
from transformers.utils import is_torch_available

from ...commands.export.onnx import parse_args_onnx
from ...utils import (
DEFAULT_DUMMY_SHAPES,
DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER,
DIFFUSION_MODEL_UNET_SUBFOLDER,
DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER,
DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER,
ONNX_WEIGHTS_NAME,
logging,
)
from ...utils import DEFAULT_DUMMY_SHAPES, ONNX_WEIGHTS_NAME, logging
from ...utils.save_utils import maybe_save_preprocessors
from ..error_utils import AtolError, OutputMatchError, ShapeError
from ..tasks import TasksManager
Expand Down Expand Up @@ -71,8 +63,9 @@ def _get_submodels_and_onnx_configs(
custom_architecture: bool,
fn_get_submodels: Optional[Callable] = None,
):
is_stable_diffusion = "stable-diffusion" in task
if not custom_architecture:
if task == "stable-diffusion":
if is_stable_diffusion:
onnx_config = None
models_and_onnx_configs = get_stable_diffusion_models_for_export(model)
else:
Expand Down Expand Up @@ -104,7 +97,7 @@ def _get_submodels_and_onnx_configs(
if fn_get_submodels is not None:
submodels_for_export = fn_get_submodels(model)
else:
if task == "stable-diffusion":
if is_stable_diffusion:
submodels_for_export = _get_submodels_for_export_stable_diffusion(model)
elif (
model.config.is_encoder_decoder
Expand Down Expand Up @@ -312,7 +305,9 @@ def main_export(
)

custom_architecture = False
if task != "stable-diffusion" and model.config.model_type.replace(
is_stable_diffusion = "stable-diffusion" in task

if not is_stable_diffusion and model.config.model_type.replace(
"-", "_"
) not in TasksManager.get_supported_model_type_for_task(task, exporter="onnx"):
custom_architecture = True
Expand All @@ -330,7 +325,7 @@ def main_export(

if (
not custom_architecture
and task != "stable-diffusion"
and not is_stable_diffusion
and task + "-with-past"
in TasksManager.get_supported_tasks_for_model_type(model.config.model_type.replace("_", "-"), "onnx")
):
Expand Down Expand Up @@ -367,7 +362,7 @@ def main_export(
fn_get_submodels=fn_get_submodels,
)

if task != "stable-diffusion":
if not is_stable_diffusion:
needs_pad_token_id = (
isinstance(onnx_config, OnnxConfigWithPast)
and getattr(model.config, "pad_token_id", None) is None
Expand Down Expand Up @@ -415,28 +410,23 @@ def main_export(

onnx_files_subpaths = None
else:
onnx_files_subpaths = [
DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER,
DIFFUSION_MODEL_UNET_SUBFOLDER,
DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER,
DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER,
]

# save the subcomponent configuration
for model_name, name_dir in zip(models_and_onnx_configs, onnx_files_subpaths):
for model_name in models_and_onnx_configs:
subcomponent = models_and_onnx_configs[model_name][0]
if hasattr(subcomponent, "save_config"):
subcomponent.save_config(output / name_dir)
subcomponent.save_config(output / model_name)
elif hasattr(subcomponent, "config") and hasattr(subcomponent.config, "save_pretrained"):
subcomponent.config.save_pretrained(output / name_dir)
subcomponent.config.save_pretrained(output / model_name)

onnx_files_subpaths = [os.path.join(path, ONNX_WEIGHTS_NAME) for path in onnx_files_subpaths]
onnx_files_subpaths = [os.path.join(name_dir, ONNX_WEIGHTS_NAME) for name_dir in models_and_onnx_configs]

# Saving the additional components needed to perform inference.
model.tokenizer.save_pretrained(output.joinpath("tokenizer"))
model.scheduler.save_pretrained(output.joinpath("scheduler"))
if model.feature_extractor is not None:
if getattr(model, "feature_extractor", None) is not None:
model.feature_extractor.save_pretrained(output.joinpath("feature_extractor"))
echarlaix marked this conversation as resolved.
Show resolved Hide resolved
if getattr(model, "tokenizer_2", None) is not None:
model.tokenizer_2.save_pretrained(output.joinpath("tokenizer_2"))
model.save_config(output)

_, onnx_outputs = export_models(
Expand Down Expand Up @@ -464,7 +454,7 @@ def main_export(

# Optionally post process the obtained ONNX file(s), for example to merge the decoder / decoder with past if any
# TODO: treating stable diffusion separately is quite ugly
if not no_post_process and task != "stable-diffusion":
if not no_post_process and not is_stable_diffusion:
try:
logger.info("Post-processing the exported models...")
models_and_onnx_configs, onnx_files_subpaths = onnx_config.post_process_exported_models(
Expand All @@ -475,7 +465,7 @@ def main_export(
f"The post-processing of the ONNX export failed. The export can still be performed by passing the option --no-post-process. Detailed error: {e}"
)

if task == "stable-diffusion":
if is_stable_diffusion:
use_subprocess = (
False # TODO: fix Can't pickle local object 'get_stable_diffusion_models_for_export.<locals>.<lambda>'
)
Expand Down
2 changes: 2 additions & 0 deletions optimum/exporters/onnx/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,8 @@ def _run_validation(
if isinstance(value, (list, tuple)):
value = config.flatten_output_collection_property(name, value)
onnx_inputs.update({tensor_name: pt_tensor.cpu().numpy() for tensor_name, pt_tensor in value.items()})
elif isinstance(value, (dict)):
echarlaix marked this conversation as resolved.
Show resolved Hide resolved
onnx_inputs.update({tensor_name: pt_tensor.cpu().numpy() for tensor_name, pt_tensor in value.items()})
else:
onnx_inputs[name] = value.cpu().numpy()

Expand Down
44 changes: 41 additions & 3 deletions optimum/exporters/onnx/model_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,14 +658,15 @@ def outputs(self) -> Dict[str, Dict[int, str]]:
}


class CLIPTextOnnxConfig(TextEncoderOnnxConfig):
class CLIPTextWithProjectionOnnxConfig(TextEncoderOnnxConfig):
ATOL_FOR_VALIDATION = 1e-3
# The ONNX export of this architecture needs the Trilu operator support, available since opset 14
DEFAULT_ONNX_OPSET = 14

NORMALIZED_CONFIG_CLASS = NormalizedConfig.with_args(
vocab_size="vocab_size",
sequence_length="max_position_embeddings",
num_layers="num_hidden_layers",
allow_new=True,
)

Expand All @@ -677,13 +678,33 @@ def inputs(self) -> Dict[str, Dict[int, str]]:

@property
def outputs(self) -> Dict[str, Dict[int, str]]:
return {
outputs = {
"text_embeds": {0: "batch_size", 1: "sequence_length"},
"last_hidden_state": {0: "batch_size", 1: "sequence_length"},
}
if self._normalized_config.output_hidden_states:
for i in range(self._normalized_config.num_layers + 1):
outputs[f"hidden_states.{i}"] = {0: "batch_size", 1: "sequence_length"}

return outputs
echarlaix marked this conversation as resolved.
Show resolved Hide resolved


class CLIPTextOnnxConfig(CLIPTextWithProjectionOnnxConfig):
@property
def outputs(self) -> Dict[str, Dict[int, str]]:
outputs = {
"last_hidden_state": {0: "batch_size", 1: "sequence_length"},
"pooler_output": {0: "batch_size"},
}
if self._normalized_config.output_hidden_states:
for i in range(self._normalized_config.num_layers + 1):
outputs[f"hidden_states.{i}"] = {0: "batch_size", 1: "sequence_length"}

return outputs
echarlaix marked this conversation as resolved.
Show resolved Hide resolved

def generate_dummy_inputs(self, framework: str = "pt", **kwargs):
dummy_inputs = super().generate_dummy_inputs(framework=framework, **kwargs)

if framework == "pt":
import torch

Expand Down Expand Up @@ -713,12 +734,19 @@ class UNetOnnxConfig(VisionOnnxConfig):

@property
def inputs(self) -> Dict[str, Dict[int, str]]:
return {
inputs = {
"sample": {0: "batch_size", 1: "num_channels", 2: "height", 3: "width"},
"timestep": {0: "steps"},
"encoder_hidden_states": {0: "batch_size", 1: "sequence_length"},
}

# TODO : add text_image, image and image_embeds
if getattr(self._normalized_config, "addition_embed_type", None) == "text_time":
inputs["text_embeds"] = {0: "batch_size"}
inputs["time_ids"] = {0: "batch_size"}

return inputs
echarlaix marked this conversation as resolved.
Show resolved Hide resolved

@property
def outputs(self) -> Dict[str, Dict[int, str]]:
return {
Expand All @@ -734,8 +762,18 @@ def torch_to_onnx_output_map(self) -> Dict[str, str]:
def generate_dummy_inputs(self, framework: str = "pt", **kwargs):
dummy_inputs = super().generate_dummy_inputs(framework=framework, **kwargs)
dummy_inputs["encoder_hidden_states"] = dummy_inputs["encoder_hidden_states"][0]

if getattr(self._normalized_config, "addition_embed_type", None) == "text_time":
dummy_inputs["added_cond_kwargs"] = {
"text_embeds": dummy_inputs.pop("text_embeds"),
"time_ids": dummy_inputs.pop("time_ids"),
}

return dummy_inputs

def ordered_inputs(self, model) -> Dict[str, Dict[int, str]]:
return self.inputs
echarlaix marked this conversation as resolved.
Show resolved Hide resolved


class VaeEncoderOnnxConfig(VisionOnnxConfig):
ATOL_FOR_VALIDATION = 1e-2
Expand Down
23 changes: 23 additions & 0 deletions optimum/exporters/onnx/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,23 @@ def _get_submodels_for_export_stable_diffusion(
"""
Returns the components of a Stable Diffusion model.
"""
from diffusers import StableDiffusionXLPipeline

models_for_export = {}

if isinstance(pipeline, StableDiffusionXLPipeline):
pipeline.text_encoder.config.output_hidden_states = True
projection_dim = pipeline.text_encoder_2.config.projection_dim
else:
projection_dim = pipeline.text_encoder.config.projection_dim

# Text encoder
models_for_export["text_encoder"] = pipeline.text_encoder

# U-NET
# PyTorch does not support the ONNX export of torch.nn.functional.scaled_dot_product_attention
pipeline.unet.set_attn_processor(AttnProcessor())
pipeline.unet.config.text_encoder_projection_dim = projection_dim
models_for_export["unet"] = pipeline.unet

# VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565
Expand All @@ -124,6 +133,10 @@ def _get_submodels_for_export_stable_diffusion(
vae_decoder.forward = lambda latent_sample: vae_decoder.decode(z=latent_sample)
models_for_export["vae_decoder"] = vae_decoder

if getattr(pipeline, "text_encoder_2", None) is not None:
echarlaix marked this conversation as resolved.
Show resolved Hide resolved
pipeline.text_encoder_2.config.output_hidden_states = True
models_for_export["text_encoder_2"] = pipeline.text_encoder_2

return models_for_export


Expand Down Expand Up @@ -278,6 +291,16 @@ def get_stable_diffusion_models_for_export(
vae_onnx_config = vae_config_constructor(vae_decoder.config)
models_for_export["vae_decoder"] = (vae_decoder, vae_onnx_config)

if "text_encoder_2" in models_for_export:
onnx_config_constructor = TasksManager.get_exporter_config_constructor(
model=pipeline.text_encoder_2,
exporter="onnx",
task="feature-extraction",
model_type="clip-text-with-projection",
)
onnx_config = onnx_config_constructor(pipeline.text_encoder_2.config)
models_for_export["text_encoder_2"] = (models_for_export["text_encoder_2"], onnx_config)

return models_for_export


Expand Down
23 changes: 19 additions & 4 deletions optimum/exporters/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ class TasksManager:
"audio-xvector": "AutoModelForAudioXVector",
"image-to-text": "AutoModelForVision2Seq",
"stable-diffusion": "StableDiffusionPipeline",
"stable-diffusion-xl": "StableDiffusionXLPipeline",
echarlaix marked this conversation as resolved.
Show resolved Hide resolved
"zero-shot-image-classification": "AutoModelForZeroShotImageClassification",
"zero-shot-object-detection": "AutoModelForZeroShotObjectDetection",
}
Expand Down Expand Up @@ -267,6 +268,7 @@ class TasksManager:
"image-to-text": "transformers",
"sentence-similarity": "transformers",
"stable-diffusion": "diffusers",
"stable-diffusion-xl": "diffusers",
"summarization": "transformers",
"visual-question-answering": "transformers",
"zero-shot-classification": "transformers",
Expand Down Expand Up @@ -390,6 +392,10 @@ class TasksManager:
"feature-extraction",
onnx="CLIPTextOnnxConfig",
),
"clip-text-with-projection": supported_tasks_mapping(
"feature-extraction",
onnx="CLIPTextWithProjectionOnnxConfig",
),
"codegen": supported_tasks_mapping(
"feature-extraction",
"feature-extraction-with-past",
Expand Down Expand Up @@ -931,7 +937,14 @@ class TasksManager:
onnx="YolosOnnxConfig",
),
}
_UNSUPPORTED_CLI_MODEL_TYPE = {"unet", "vae-encoder", "vae-decoder", "clip-text-model", "trocr"}
_UNSUPPORTED_CLI_MODEL_TYPE = {
echarlaix marked this conversation as resolved.
Show resolved Hide resolved
"unet",
"vae-encoder",
"vae-decoder",
"clip-text-model",
"clip-text-with-projection",
"trocr",
}
_SUPPORTED_CLI_MODEL_TYPE = set(_SUPPORTED_MODEL_TYPE.keys()) - _UNSUPPORTED_CLI_MODEL_TYPE

@classmethod
Expand Down Expand Up @@ -1271,7 +1284,7 @@ def _infer_task_from_model_or_model_class(
(
target_name.startswith("Auto"),
target_name.startswith("TFAuto"),
target_name == "StableDiffusionPipeline",
"StableDiffusion" in target_name,
)
):
if target_name == auto_cls_name:
Expand Down Expand Up @@ -1314,8 +1327,10 @@ def _infer_task_from_model_name_or_path(
model_info = huggingface_hub.model_info(model_name_or_path, revision=revision)
if model_info.library_name == "diffusers":
# TODO : getattr(model_info, "model_index") defining auto_model_class_name currently set to None
if "stable-diffusion" in model_info.tags:
inferred_task_name = "stable-diffusion"
for task in ("stable-diffusion-xl", "stable-diffusion"):
if task in model_info.tags:
inferred_task_name = task
break
else:
pipeline_tag = getattr(model_info, "pipeline_tag", None)
# conversational is not a supported task per se, just an alias that may map to
Expand Down
4 changes: 4 additions & 0 deletions optimum/onnxruntime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,14 @@
"ORTStableDiffusionPipeline",
"ORTStableDiffusionImg2ImgPipeline",
"ORTStableDiffusionInpaintPipeline",
"ORTStableDiffusionXLPipeline",
]
else:
_import_structure["modeling_diffusion"] = [
"ORTStableDiffusionPipeline",
"ORTStableDiffusionImg2ImgPipeline",
"ORTStableDiffusionInpaintPipeline",
"ORTStableDiffusionXLPipeline",
]


Expand Down Expand Up @@ -124,12 +126,14 @@
ORTStableDiffusionImg2ImgPipeline,
ORTStableDiffusionInpaintPipeline,
ORTStableDiffusionPipeline,
ORTStableDiffusionXLPipeline,
)
else:
from .modeling_diffusion import (
ORTStableDiffusionImg2ImgPipeline,
ORTStableDiffusionInpaintPipeline,
ORTStableDiffusionPipeline,
ORTStableDiffusionXLPipeline,
)
else:
import sys
Expand Down
Loading
Loading