Skip to content

Commit

Permalink
[OBCQ Bug Fix] Fallback to CPU if no CUDA (#1828)
Browse files Browse the repository at this point in the history
* fallback to cpu if no cuda:

* warning instead of info

* add log for device
  • Loading branch information
Satrat authored and bfineran committed Nov 16, 2023
1 parent bd02486 commit 5e76668
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/sparseml/modifiers/obcq/utils/sparsegpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,8 @@ def fasterprune(
_LOGGER.debug(torch.sum((self.layer(self._inp1) - self.out1) ** 2))
_LOGGER.debug(torch.sum(Losses))

torch.cuda.synchronize()
if torch.cuda.is_available():
torch.cuda.synchronize()
_LOGGER.info("time %.2f" % (time.time() - tick))
_LOGGER.info("error %.2f" % torch.sum(Losses).item())

Expand Down
14 changes: 14 additions & 0 deletions src/sparseml/transformers/sparsification/obcq/obcq.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ def one_shot(
if deploy_dir.exists():
raise RuntimeError(f"deploy_dir={deploy_dir} already exists")

# fallback to cpu if cuda not available
device = _fallback_to_cpu(device)
_LOGGER.info(f"Running one_shot on device {device}")

# Load the configuration from the model path
config = AutoConfig.from_pretrained(model_path)
model_type = config.model_type.lower()
Expand Down Expand Up @@ -164,6 +168,16 @@ def _save(model, tokenizer, save_path, recipe_path):
fp.write(load_recipe_yaml_str(recipe_path))


def _fallback_to_cpu(device):
if "cuda" in device and not torch.cuda.is_available():
_LOGGER.warning(
f"Requested {device} but CUDA is not available, falling back to CPU"
)
return "cpu"

return device


if __name__ == "__main__":
parser = argparse.ArgumentParser()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def opt_forward(model: Module, data_loader: List, device: str, nsamples: int = N
dataloader=data_loader,
device=device,
nsamples=nsamples,
target_ids=["attention_mask"],
layer_prefix="decoder",
)
buffer = [b[0] for b in cached_inputs.pop("inputs")]
Expand Down Expand Up @@ -95,6 +96,7 @@ def llama_forward(model: Module, data_loader: List, device: str, nsamples: int =
dataloader=data_loader,
device=device,
nsamples=nsamples,
target_ids=["attention_mask", "position_ids"],
layer_prefix=None,
)
buffer = [b[0] for b in cached_inputs.pop("inputs")]
Expand Down
3 changes: 3 additions & 0 deletions tests/sparseml/transformers/obcq/test_obcq.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import pytest
import torch

from sparseml.modifiers.obcq.utils.helpers import ppl_eval_general
from sparseml.transformers.data import TransformersDataset
Expand All @@ -30,6 +31,8 @@
def test_obcq_tinystories(recipe_file_path):
tiny_model_path = "Xenova/llama2.c-stories15M"
device = "cuda:0"
if not torch.cuda.is_available():
device = "cpu"

# test recipe with 50% sparsity, quantization and smoothquant
tiny_model = one_shot(
Expand Down

0 comments on commit 5e76668

Please sign in to comment.