From 472390826bc2642648de4b3fd20e82710d97ace9 Mon Sep 17 00:00:00 2001 From: Marc Sun Date: Mon, 22 Jul 2024 15:10:34 +0200 Subject: [PATCH] code review --- src/transformers/integrations/fbgemm_fp8.py | 3 +-- tests/quantization/fbgemm_fp8/test_fbgemm_fp8.py | 15 ++++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/transformers/integrations/fbgemm_fp8.py b/src/transformers/integrations/fbgemm_fp8.py index d2356c7c40a26b..a0f5b2b76089b9 100644 --- a/src/transformers/integrations/fbgemm_fp8.py +++ b/src/transformers/integrations/fbgemm_fp8.py @@ -1,5 +1,4 @@ -# coding=utf-8 -# Copyright 2024 NetEase, Inc. and the HuggingFace Inc. team. All rights reserved. +# Copyright 2024 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/quantization/fbgemm_fp8/test_fbgemm_fp8.py b/tests/quantization/fbgemm_fp8/test_fbgemm_fp8.py index 8a06c1a0f6ad75..61a1eecba8d3df 100644 --- a/tests/quantization/fbgemm_fp8/test_fbgemm_fp8.py +++ b/tests/quantization/fbgemm_fp8/test_fbgemm_fp8.py @@ -21,6 +21,7 @@ from transformers.testing_utils import ( require_accelerate, require_fbgemm_gpu, + require_read_token, require_torch_gpu, require_torch_multi_gpu, slow, @@ -63,6 +64,7 @@ def test_from_dict(self): @require_torch_gpu @require_fbgemm_gpu @require_accelerate +@require_read_token class FbgemmFp8Test(unittest.TestCase): model_name = "meta-llama/Meta-Llama-3-8B" @@ -148,24 +150,23 @@ def test_quantized_model_conversion(self): nb_linears += 1 model = replace_with_fbgemm_fp8_linear(model, quantization_config=quantization_config) - nb_eetq_linear = 0 + nb_fbgemm_linear = 0 for module in model.modules(): if isinstance(module, FbgemmFp8Linear): - nb_eetq_linear += 1 + nb_fbgemm_linear += 1 - self.assertEqual(nb_linears - 1, nb_eetq_linear) + self.assertEqual(nb_linears - 1, nb_fbgemm_linear) - # Try with `linear_weights_not_to_quantize` with init_empty_weights(): model = OPTForCausalLM(config) quantization_config = FbgemmFp8Config(modules_to_not_convert=["fc1"]) model = replace_with_fbgemm_fp8_linear(model, quantization_config=quantization_config) - nb_eetq_linear = 0 + nb_fbgemm_linear = 0 for module in model.modules(): if isinstance(module, FbgemmFp8Linear): - nb_eetq_linear += 1 + nb_fbgemm_linear += 1 - self.assertEqual(nb_linears - 25, nb_eetq_linear) + self.assertEqual(nb_linears - 25, nb_fbgemm_linear) def test_quantized_model(self): """