Fixes to load pre-trained model w/ channel-wise quantization

neuralmagic · Oct 2, 2023 · 7cfad32 · 7cfad32
1 parent 779af69
commit 7cfad32
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/src/sparseml/pytorch/sparsification/quantization/helpers.py b/src/sparseml/pytorch/sparsification/quantization/helpers.py
@@ -725,7 +725,7 @@ def initialize_channel_wise_scale_zp(module: Module):
     for name, submodule in module.named_modules():
         weight_fake_quant = getattr(submodule, "weight_fake_quant", None)
         if not weight_fake_quant or (
-            getattr(weight_fake_quant, "qscheme", None) is not torch.per_channel_affine
+            getattr(weight_fake_quant, "qscheme", None) not in [torch.per_channel_affine, torch.per_channel_symmetric]
         ):
             # only consider modules with channel-wise quantized weights
             continue
@@ -743,11 +743,11 @@ def initialize_channel_wise_scale_zp(module: Module):
         # update scale and zero point if they are initialized to a size of 1
         scale = weight_fake_quant.scale
         if scale.numel() == 1:
-            weight_fake_quant.scale = scale.reshape(-1).expand(num_channels)
+            weight_fake_quant.scale = torch.ones(num_channels, dtype=scale.dtype)
 
         zero_point = weight_fake_quant.zero_point
         if zero_point.numel() == 1:
-            weight_fake_quant.zero_point = zero_point.reshape(-1).expand(num_channels)
+            weight_fake_quant.scale = torch.ones(num_channels, dtype=zero_point.dtype)
 
         # update the observer min and max vals
         if weight_fake_quant.activation_post_process.min_val.numel() == 0: