Revert "[Cherry-Pick] Fix CacheKV Quant Bug (PaddlePaddle#61966)"

This reverts commit 04ac1c0.
hanhaowen-mt · May 13, 2024 · 6320702 · 6320702
1 parent e627c5b
commit 6320702
Show file tree

Hide file tree

Showing 2 changed files with 1 addition and 48 deletions.
diff --git a/python/paddle/quantization/quantize.py b/python/paddle/quantization/quantize.py
@@ -74,12 +74,7 @@ def convert(self, model: Layer, inplace=False, remain_weight=False):
         for name, child in _model.named_children():
             quant_dequant = None
             if isinstance(child, ConvertibleQuantedLayer):
-                if child.converted:
-                    continue
-                if (
-                    child.weight_quanter is None
-                    or child.weight_quanter.scales() is None
-                ):
+                if child.weight_quanter.scales() is None:
                     continue
                 child._convert(remain_weight=remain_weight)
             elif isinstance(child, BaseQuanter):

diff --git a/test/quantization/test_ptq.py b/test/quantization/test_ptq.py
@@ -128,48 +128,6 @@ def test_convert(self):
         self.assertIsNotNone(results)
         paddle.disable_static()
 
-    def test_convert_2times(self):
-        quant_model, ptq = self._get_model_for_ptq()
-
-        image = paddle.rand([1, 1, 32, 32], dtype="float32")
-        converted_model = ptq.convert(quant_model)
-        converted_model = ptq.convert(converted_model)
-        out = converted_model(image)
-        self.assertIsNotNone(out)
-
-        observer_count = self._count_layers(
-            converted_model, AbsmaxObserverLayer
-        )
-        quanter_count = self._count_layers(converted_model, LinearQuanter)
-        dequanter_count = self._count_layers(converted_model, LinearDequanter)
-        self.assertEqual(observer_count, 0)
-        self.assertEqual(dequanter_count, 14)
-        self.assertEqual(quanter_count, 9)
-
-        save_path = os.path.join(self.temp_dir.name, 'int8_infer')
-        paddle.jit.save(converted_model, save_path, [image])
-
-        paddle.enable_static()
-        exe = paddle.static.Executor(paddle.CPUPlace())
-        main_program = paddle.static.Program()
-        startup_program = paddle.static.Program()
-        with paddle.static.program_guard(main_program, startup_program):
-            [
-                inference_program,
-                feed_target_names,
-                fetch_targets,
-            ] = paddle.static.load_inference_model(save_path, exe)
-        tensor_img = np.array(
-            np.random.random((1, 1, 32, 32)), dtype=np.float32
-        )
-        results = exe.run(
-            inference_program,
-            feed={feed_target_names[0]: tensor_img},
-            fetch_list=fetch_targets,
-        )
-        self.assertIsNotNone(results)
-        paddle.disable_static()
-
 
 if __name__ == '__main__':
     unittest.main()