From b5546b60c6f5494670731af16c2dd3620ad5a91a Mon Sep 17 00:00:00 2001 From: Jiacong Fang Date: Thu, 16 Sep 2021 21:27:22 +0800 Subject: [PATCH] Multiple TF export improvements (#4824) * Add fused conv support * Set all saved_model values to non trainable * Fix TFLite fp16 model export * Fix int8 TFLite conversion --- export.py | 7 +++++-- models/tf.py | 5 +++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/export.py b/export.py index a5139c0a965e..dd7eefc51702 100644 --- a/export.py +++ b/export.py @@ -145,6 +145,7 @@ def export_saved_model(model, im, file, dynamic, inputs = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size) outputs = tf_model.predict(inputs, tf_nms, agnostic_nms, topk_per_class, topk_all, iou_thres, conf_thres) keras_model = keras.Model(inputs=inputs, outputs=outputs) + keras_model.trainable = False keras_model.summary() keras_model.save(f, save_format='tf') @@ -183,15 +184,17 @@ def export_tflite(keras_model, im, file, int8, data, ncalib, prefix=colorstr('Te print(f'\n{prefix} starting export with tensorflow {tf.__version__}...') batch_size, ch, *imgsz = list(im.shape) # BCHW - f = file.with_suffix('.tflite') + f = str(file).replace('.pt', '-fp16.tflite') converter = tf.lite.TFLiteConverter.from_keras_model(keras_model) converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS] + converter.target_spec.supported_types = [tf.float16] converter.optimizations = [tf.lite.Optimize.DEFAULT] if int8: dataset = LoadImages(check_dataset(data)['train'], img_size=imgsz, auto=False) # representative data converter.representative_dataset = lambda: representative_dataset_gen(dataset, ncalib) converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] + converter.target_spec.supported_types = [] converter.inference_input_type = tf.uint8 # or tf.int8 converter.inference_output_type = tf.uint8 # or tf.int8 converter.experimental_new_quantizer = False @@ -249,7 +252,7 @@ def run(data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path' # Load PyTorch model device = select_device(device) assert not (device.type == 'cpu' and half), '--half only compatible with GPU export, i.e. use --device 0' - model = attempt_load(weights, map_location=device, inplace=True, fuse=not any(tf_exports)) # load FP32 model + model = attempt_load(weights, map_location=device, inplace=True, fuse=True) # load FP32 model nc, names = model.nc, model.names # number of classes, class names # Input diff --git a/models/tf.py b/models/tf.py index 621236240f10..5d7153f246eb 100644 --- a/models/tf.py +++ b/models/tf.py @@ -70,8 +70,9 @@ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None): # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch conv = keras.layers.Conv2D( - c2, k, s, 'SAME' if s == 1 else 'VALID', use_bias=False, - kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy())) + c2, k, s, 'SAME' if s == 1 else 'VALID', use_bias=False if hasattr(w, 'bn') else True, + kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()), + bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy())) self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv]) self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity