Skip to content

Commit

Permalink
Fix code styles
Browse files Browse the repository at this point in the history
  • Loading branch information
natuan committed Sep 15, 2023
1 parent dff49e3 commit c63001d
Show file tree
Hide file tree
Showing 7 changed files with 18 additions and 18 deletions.
2 changes: 1 addition & 1 deletion src/sparseml/experimental/sparsegpt/dispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,6 @@ def _get_model_key(args):
break
if key is None:
raise ValueError(
f"Model {model} is not supported. Supported models: {SUPPORTED_MODELS.keys()}"
f"Model {args.model} is not supported. Supported: {SUPPORTED_MODELS.keys()}"
)
return key
4 changes: 0 additions & 4 deletions src/sparseml/experimental/sparsegpt/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,6 @@ def forward_with_quantized_bmms(
key_states = repeat_kv(key_states, self.num_key_value_groups)
value_states = repeat_kv(value_states, self.num_key_value_groups)

# ELDAR disabled for quantized version
# attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
attn_weights = attn_weights_matmul(
query_states, key_states.transpose(2, 3)
) / math.sqrt(self.head_dim)
Expand All @@ -226,8 +224,6 @@ def forward_with_quantized_bmms(
attn_weights = nn.functional.softmax(
attn_weights, dim=-1, dtype=torch.float32
).to(query_states.dtype)
# ELDAR disabled for quantized version
# attn_output = torch.matmul(attn_weights, value_states)
attn_output = attn_output_matmul(attn_weights, value_states)

if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim):
Expand Down
2 changes: 1 addition & 1 deletion src/sparseml/experimental/sparsegpt/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import wandb

has_wandb = True
except:
except Exception:
has_wandb = False


Expand Down
5 changes: 3 additions & 2 deletions src/sparseml/experimental/sparsegpt/model_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ def _initialize_scales_from_batches(self, dev):
# Tuan: If the model does not fit into the device,
# we need a different version of this func to forward
# the batches through the model layer by layer
# See: https://github.com/neuralmagic/neuralmagicml/blob/tuan-falcon/research/sparsegpt/falcon/FalconPress-main/modelutils.py
# See: https://github.com/neuralmagic/neuralmagicml/blob/
# tuan-falcon/research/sparsegpt/falcon/FalconPress-main/modelutils.py
self.model.to(dev)
num_batches = self.observer_batches
with torch.no_grad():
Expand All @@ -89,7 +90,7 @@ def _initialize_scales_from_batches(self, dev):
raise ValueError(
f"Dont know how to process given batch type: {type(batch)}"
)
res = self.model(inp)
self.model(inp)
del inp
batches += 1
self.model.apply(torch.quantization.disable_observer)
Expand Down
4 changes: 3 additions & 1 deletion src/sparseml/experimental/sparsegpt/mpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import contextlib
import math
import warnings
from typing import Dict, Tuple

Expand Down Expand Up @@ -254,7 +255,8 @@ def quantized_scaled_multihead_dot_product_attention(
or (attn_bias.size(-2) != 1 and attn_bias.size(-2) != s_q)
):
raise RuntimeError(
f"attn_bias (shape: {attn_bias.shape}) is expected to broadcast to shape: {attn_weight.shape}."
f"attn_bias (shape: {attn_bias.shape}) is expected to "
"broadcast to shape: {attn_weight.shape}."
)
attn_weight = attn_weight + attn_bias
min_val = torch.finfo(q.dtype).min
Expand Down
17 changes: 9 additions & 8 deletions src/sparseml/experimental/sparsegpt/opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,14 +350,15 @@ def forward(self, inp, **kwargs):
print(i)
layer = layers[i].to(dev)

if args.gmp:
subset = find_layers(layer)
for name in subset:
W = subset[name].weight.data
thresh = torch.sort(torch.abs(W.flatten()))[0][
int(W.numel() * args.sparsity)
]
W.data[torch.abs(W.data) <= thresh] = 0
# Todo: Check and clean up the commented block below
# if args.gmp:
# subset = find_layers(layer)
# for name in subset:
# W = subset[name].weight.data
# thresh = torch.sort(torch.abs(W.flatten()))[0][
# int(W.numel() * args.sparsity)
# ]
# W.data[torch.abs(W.data) <= thresh] = 0

for j in range(nsamples):
outs[j] = layer(inps[j].unsqueeze(0), attention_mask=attention_mask)[0]
Expand Down
2 changes: 1 addition & 1 deletion src/sparseml/experimental/sparsegpt/sequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def compressible_layers(self):
"""
try:
return self.model.model.decoders.layers
except:
except Exception:
raise RuntimeError(
"Derived class should override to provide list of compressible layers"
)
Expand Down

0 comments on commit c63001d

Please sign in to comment.