Fix code styles

neuralmagic · Sep 15, 2023 · c63001d · c63001d
1 parent dff49e3
commit c63001d
Show file tree

Hide file tree

Showing 7 changed files with 18 additions and 18 deletions.
diff --git a/src/sparseml/experimental/sparsegpt/dispatch.py b/src/sparseml/experimental/sparsegpt/dispatch.py
@@ -71,6 +71,6 @@ def _get_model_key(args):
             break
     if key is None:
         raise ValueError(
-            f"Model {model} is not supported. Supported models: {SUPPORTED_MODELS.keys()}"
+            f"Model {args.model} is not supported. Supported: {SUPPORTED_MODELS.keys()}"
         )
     return key
diff --git a/src/sparseml/experimental/sparsegpt/llama.py b/src/sparseml/experimental/sparsegpt/llama.py
@@ -201,8 +201,6 @@ def forward_with_quantized_bmms(
         key_states = repeat_kv(key_states, self.num_key_value_groups)
         value_states = repeat_kv(value_states, self.num_key_value_groups)
 
-        # ELDAR disabled for quantized version
-        # attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
         attn_weights = attn_weights_matmul(
             query_states, key_states.transpose(2, 3)
         ) / math.sqrt(self.head_dim)
@@ -226,8 +224,6 @@ def forward_with_quantized_bmms(
         attn_weights = nn.functional.softmax(
             attn_weights, dim=-1, dtype=torch.float32
         ).to(query_states.dtype)
-        # ELDAR disabled for quantized version
-        # attn_output = torch.matmul(attn_weights, value_states)
         attn_output = attn_output_matmul(attn_weights, value_states)
 
         if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim):

diff --git a/src/sparseml/experimental/sparsegpt/main.py b/src/sparseml/experimental/sparsegpt/main.py
@@ -25,7 +25,7 @@
     import wandb
 
     has_wandb = True
-except:
+except Exception:
     has_wandb = False
 
 

diff --git a/src/sparseml/experimental/sparsegpt/model_preprocessor.py b/src/sparseml/experimental/sparsegpt/model_preprocessor.py
@@ -68,7 +68,8 @@ def _initialize_scales_from_batches(self, dev):
         # Tuan: If the model does not fit into the device,
         # we need a different version of this func to forward
         # the batches through the model layer by layer
-        # See: https://github.com/neuralmagic/neuralmagicml/blob/tuan-falcon/research/sparsegpt/falcon/FalconPress-main/modelutils.py
+        # See: https://github.com/neuralmagic/neuralmagicml/blob/
+        # tuan-falcon/research/sparsegpt/falcon/FalconPress-main/modelutils.py
         self.model.to(dev)
         num_batches = self.observer_batches
         with torch.no_grad():
@@ -89,7 +90,7 @@ def _initialize_scales_from_batches(self, dev):
                         raise ValueError(
                             f"Dont know how to process given batch type: {type(batch)}"
                         )
-                    res = self.model(inp)
+                    self.model(inp)
                     del inp
                     batches += 1
         self.model.apply(torch.quantization.disable_observer)

diff --git a/src/sparseml/experimental/sparsegpt/mpt.py b/src/sparseml/experimental/sparsegpt/mpt.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import contextlib
+import math
 import warnings
 from typing import Dict, Tuple
 
@@ -254,7 +255,8 @@ def quantized_scaled_multihead_dot_product_attention(
                 or (attn_bias.size(-2) != 1 and attn_bias.size(-2) != s_q)
             ):
                 raise RuntimeError(
-                    f"attn_bias (shape: {attn_bias.shape}) is expected to broadcast to shape: {attn_weight.shape}."
+                    f"attn_bias (shape: {attn_bias.shape}) is expected to "
+                    "broadcast to shape: {attn_weight.shape}."
                 )
             attn_weight = attn_weight + attn_bias
         min_val = torch.finfo(q.dtype).min

diff --git a/src/sparseml/experimental/sparsegpt/opt.py b/src/sparseml/experimental/sparsegpt/opt.py
@@ -350,14 +350,15 @@ def forward(self, inp, **kwargs):
         print(i)
         layer = layers[i].to(dev)
 
-        if args.gmp:
-            subset = find_layers(layer)
-            for name in subset:
-                W = subset[name].weight.data
-                thresh = torch.sort(torch.abs(W.flatten()))[0][
-                    int(W.numel() * args.sparsity)
-                ]
-                W.data[torch.abs(W.data) <= thresh] = 0
+        # Todo: Check and clean up the commented block below
+        # if args.gmp:
+        #     subset = find_layers(layer)
+        #     for name in subset:
+        #         W = subset[name].weight.data
+        #         thresh = torch.sort(torch.abs(W.flatten()))[0][
+        #             int(W.numel() * args.sparsity)
+        #         ]
+        #         W.data[torch.abs(W.data) <= thresh] = 0
 
         for j in range(nsamples):
             outs[j] = layer(inps[j].unsqueeze(0), attention_mask=attention_mask)[0]

diff --git a/src/sparseml/experimental/sparsegpt/sequential.py b/src/sparseml/experimental/sparsegpt/sequential.py
@@ -46,7 +46,7 @@ def compressible_layers(self):
         """
         try:
             return self.model.model.decoders.layers
-        except:
+        except Exception:
             raise RuntimeError(
                 "Derived class should override to provide list of compressible layers"
             )