Skip to content

Commit

Permalink
Improved OpenVINO quantization code (#12614)
Browse files Browse the repository at this point in the history
* Improved OpenVINO quantization code

* Auto-format by Ultralytics actions

---------

Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
  • Loading branch information
adrianboguszewski and UltralyticsAssistant committed Jan 12, 2024
1 parent 9abbef5 commit de64179
Showing 1 changed file with 11 additions and 20 deletions.
31 changes: 11 additions & 20 deletions export.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,28 +226,19 @@ def export_openvino(file, metadata, half, int8, data, prefix=colorstr("OpenVINO:
from openvino.tools import mo # noqa

LOGGER.info(f"\n{prefix} starting export with openvino {ov.__version__}...")
f = str(file).replace(file.suffix, f"_openvino_model{os.sep}")
f = str(file).replace(file.suffix, f"_{'int8_' if int8 else ''}openvino_model{os.sep}")
f_onnx = file.with_suffix(".onnx")
f_ov = str(Path(f) / file.with_suffix(".xml").name)

ov_model = mo.convert_model(f_onnx, model_name=file.stem, framework="onnx", compress_to_fp16=half) # export

if int8:
check_requirements("nncf>=2.4.0") # requires at least version 2.4.0 to use the post-training quantization
check_requirements("nncf>=2.5.0") # requires at least version 2.5.0 to use the post-training quantization
import nncf
import numpy as np
from openvino.runtime import Core

from utils.dataloaders import create_dataloader

core = Core()
onnx_model = core.read_model(f_onnx) # export

def prepare_input_tensor(image: np.ndarray):
input_tensor = image.astype(np.float32) # uint8 to fp16/32
input_tensor /= 255.0 # 0 - 255 to 0.0 - 1.0

if input_tensor.ndim == 3:
input_tensor = np.expand_dims(input_tensor, 0)
return input_tensor

def gen_dataloader(yaml_path, task="train", imgsz=640, workers=4):
data_yaml = check_yaml(yaml_path)
data = check_dataset(data_yaml)
Expand All @@ -268,15 +259,15 @@ def transform_fn(data_item):
Returns:
input_tensor: Input data for quantization
"""
img = data_item[0].numpy()
input_tensor = prepare_input_tensor(img)
return input_tensor
assert data_item[0].dtype == torch.uint8, "input image must be uint8 for the quantization preprocessing"

img = data_item[0].numpy().astype(np.float32) # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
return np.expand_dims(img, 0) if img.ndim == 3 else img

ds = gen_dataloader(data)
quantization_dataset = nncf.Dataset(ds, transform_fn)
ov_model = nncf.quantize(onnx_model, quantization_dataset, preset=nncf.QuantizationPreset.MIXED)
else:
ov_model = mo.convert_model(f_onnx, model_name=file.stem, framework="onnx", compress_to_fp16=half) # export
ov_model = nncf.quantize(ov_model, quantization_dataset, preset=nncf.QuantizationPreset.MIXED)

ov.serialize(ov_model, f_ov) # save
yaml_save(Path(f) / file.with_suffix(".yaml").name, metadata) # add metadata.yaml
Expand Down

1 comment on commit de64179

@zhengstake
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found that with export.py under this repo, the exported OpenVINO model is not as clean as the export flow under the new https://github.com/ultralytics/ultralytics.git repo. Specifically for yolov5m6, there are extra normalization nodes and fakequantize nodes.

yolov5m6

Per the code in https://github.com/ultralytics/ultralytics.git, could we also add ignore_scope for multiply, subtract and sigmoid as shown below:
ds = gen_dataloader(data, imgsz=imgsz[0])
quantization_dataset = nncf.Dataset(ds, transform_fn)
ignored_scope = nncf.IgnoredScope(types=['Multiply', 'Subtract', 'Sigmoid']) # ignore operation
ov_model = nncf.quantize(onnx_model, quantization_dataset, preset=nncf.QuantizationPreset.MIXED, ignored_scope=ignored_scope)

Further, the export.py flow currently hard-code imgsz to 640. This can be enhanced by passing down the imgsz input to gen_dataloader API.

Please sign in to comment.