Skip to content

Commit

Permalink
fixed example of cpu woq accuracy issue (#1183)
Browse files Browse the repository at this point in the history
* fixed example of cpu woq accuracy issue

Signed-off-by: Cheng, Penghui <penghui.cheng@intel.com>
  • Loading branch information
PenghuiCheng authored Jan 25, 2024
1 parent 76b865c commit e530f74
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 285 deletions.
4 changes: 2 additions & 2 deletions intel_extension_for_transformers/llm/quantization/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def _replace_linear(
with init_empty_weights():
in_features = module.in_features
out_features = module.out_features
if device == "cpu" or device == torch.device("cpu"):
if device == "cpu" or device == torch.device("cpu") or device == "auto":
from .nn.modules import (
QuantizedLinearQBits,
) # TODO: QuantizedLinearINT4, QuantizedLinearINT8
Expand Down Expand Up @@ -171,7 +171,7 @@ def _replace_linear(
model._modules[name].source_cls = type(module)
# Force requires grad to False to avoid unexpected errors
model._modules[name].requires_grad_(False)
if device == "cpu" or device == torch.device("cpu"):
if device == "cpu" or device == torch.device("cpu") or device == "auto":
if not empty_weights:
if quantization_config.algorithm == "GPTQ":
p_func = None
Expand Down
Loading

0 comments on commit e530f74

Please sign in to comment.