Skip to content

Commit

Permalink
Merge branch 'main' into wanda
Browse files Browse the repository at this point in the history
  • Loading branch information
rahul-tuli committed Nov 27, 2023
2 parents 35ab428 + 0946ca8 commit c3e78a0
Show file tree
Hide file tree
Showing 20 changed files with 2,436 additions and 38 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/test-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -197,3 +197,27 @@ jobs:
run: pip3 install .[dev,torchvision,onnxruntime] torch==1.9.1
- name: "🔬 Running onnx tests"
run: make test TARGETS=onnx
transformers-tests:
runs-on: ubuntu-22.04
env:
SPARSEZOO_TEST_MODE: "true"
needs: test-setup
if: ${{needs.test-setup.outputs.pytorch == 1}}
steps:
- uses: actions/setup-python@v4
with:
python-version: '3.11'
- uses: actions/checkout@v2
- uses: actions/checkout@v2
with:
repository: "neuralmagic/sparsezoo"
path: "sparsezoo"
ref: ${{needs.test-setup.outputs.branch}}
- name: "⚙️ Install sparsezoo dependencies"
run: pip3 install -U pip && pip3 install setuptools sparsezoo/
- name: "Clean sparsezoo directory"
run: rm -r sparsezoo/
- name: "⚙️ Install dependencies"
run: pip3 install .[dev,torch,transformers]
- name: "🔬 Running transformers tests"
run: make test TARGETS=transformers
27 changes: 15 additions & 12 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,17 +63,17 @@

_onnxruntime_deps = ["onnxruntime>=1.0.0"]
_clip_deps = ["open_clip_torch==2.20.0"]
supported_torch_version = "torch>=1.7.0,<=2.0"
supported_torch_version = "torch>=1.7.0,<2.2"
_pytorch_deps = [
supported_torch_version,
"gputils",
]
_pytorch_all_deps = _pytorch_deps + [
"torchvision>=0.3.0,<=0.15.1",
"torchvision>=0.3.0,<0.17",
"torchaudio<=2.0.1",
]
_pytorch_vision_deps = _pytorch_deps + [
"torchvision>=0.3.0,<=0.15.1",
"torchvision>=0.3.0,<0.17",
"opencv-python<=4.6.0.66",
]
_transformers_deps = _pytorch_deps + [
Expand Down Expand Up @@ -103,24 +103,26 @@
"black==22.12.0",
"flake8==3.9.2",
"isort==5.8.0",
"m2r2~=0.2.7",
"wheel>=0.36.2",
"pytest>=6.0.0",
"pytest-mock>=3.6.0",
"flaky~=3.7.0",
"tensorboard>=1.0,<2.9",
"tensorboardX>=1.0",
]

_docs_deps = [
"m2r2>=0.2.7",
"mistune<3,>=2.0.3",
"myst-parser~=0.14.0",
"myst-parser>=0.14.0",
"rinohtype~=0.4.2",
"sphinx~=3.5.0",
"sphinx-copybutton~=0.3.0",
"sphinx-markdown-tables~=0.0.15",
"sphinx-multiversion~=0.2.4",
"sphinx-pydantic~=0.1.0",
"sphinx-rtd-theme~=0.5.0",
"wheel>=0.36.2",
"pytest~=6.2.0",
"pytest-mock~=3.6.0",
"flaky~=3.7.0",
"sphinx-rtd-theme",
"docutils<0.17",
"tensorboard>=1.0,<2.9",
"tensorboardX>=1.0",
]


Expand Down Expand Up @@ -148,6 +150,7 @@ def _setup_extras() -> Dict:
return {
"clip": _clip_deps,
"dev": _dev_deps,
"docs": _docs_deps,
"deepsparse": _deepsparse_deps,
"deepsparse-ent": _deepsparse_ent_deps,
"openpifpaf": _open_pif_paf_deps,
Expand Down
3 changes: 2 additions & 1 deletion src/sparseml/exporters/transforms/flatten_qparams.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ def transform(self, model: ModelProto) -> ModelProto:
continue
self.log_match(init)
a = numpy_helper.to_array(init)
assert a.shape == (1,)
if a.shape != (1,):
continue # assume qparam is already flattened
b = numpy.array(a[0])
assert b.shape == ()
assert b.dtype == a.dtype
Expand Down
2 changes: 1 addition & 1 deletion src/sparseml/pytorch/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@


_TORCH_MIN_VERSION = "1.0.0"
_TORCH_MAX_VERSION = os.environ.get("MAX_TORCH", "2.0.100")
_TORCH_MAX_VERSION = os.environ.get("MAX_TORCH", "2.1.10")


def check_torch_install(
Expand Down
3 changes: 3 additions & 0 deletions src/sparseml/pytorch/sparsification/pruning/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from .mask_creator import *
from .mask_params import *
from .modifier_as import *
from .modifier_powerpropagation import *
from .modifier_pruning_acdc import *
from .modifier_pruning_base import *
from .modifier_pruning_constant import *
Expand All @@ -30,5 +31,7 @@
from .modifier_pruning_mfac import *
from .modifier_pruning_movement import *
from .modifier_pruning_obs import *
from .modifier_pruning_rigl import *
from .modifier_pruning_structured import *
from .modifier_pruning_topkast import *
from .scorer import *
64 changes: 46 additions & 18 deletions src/sparseml/pytorch/sparsification/pruning/mask_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ class ModuleParamPruningMask(object):
sparsity ranking values within each individual tensor. Default is False
:param allow_reintroduction: set True to not mask weights and gradients between
forward passes (forward mask hooks will remain). Default is False
:param mask_gradients_only: Apply the mask to the gradients only and not to
the weights (at any point). Default is False.
"""

def __init__(
Expand All @@ -69,6 +71,7 @@ def __init__(
param_names: Union[str, List[str]] = "weight",
store_init: bool = False,
store_unmasked: bool = False,
mask_gradients_only: bool = False,
track_grad_mom: float = -1.0,
layer_names: Optional[List[str]] = None,
global_sparsity: bool = False,
Expand All @@ -86,6 +89,7 @@ def __init__(
self._layer_names = layer_names
self._store_init = store_init
self._store_unmasked = store_unmasked
self._mask_gradients_only = mask_gradients_only
self._track_grad_mom = track_grad_mom
self._global_sparsity = global_sparsity

Expand Down Expand Up @@ -114,6 +118,7 @@ def __init__(
self._params_grad = [None] * len(self._layers) # type: List[Tensor]
self._params_movement = [None] * len(self._layers) # type: List[Tensor]
self._params_applied_thinning = [0.0] * len(self._layers) # type: List[float]
self._mask_applied = [False] * len(self._layers) # type: bool

# movement pruning requires weight reintroduction
self._allow_reintroduction = allow_reintroduction
Expand Down Expand Up @@ -299,7 +304,7 @@ def set_param_data(self, value: Tensor, param_idx: int):
self._params_unmasked[param_idx] = None
self._setup_params_unmasked(param_idx)

if not self._allow_reintroduction:
if not self._allow_reintroduction and not self._mask_gradients_only:
self.apply(param_idx)

def set_param_masks(self, masks: List[Tensor]):
Expand Down Expand Up @@ -330,7 +335,7 @@ def set_param_masks(self, masks: List[Tensor]):
if self._scorer:
self._scorer.mask_update(masks, mask_diffs)

if not self._allow_reintroduction:
if not self._allow_reintroduction and not self._mask_gradients_only:
self.apply()

return mask_diffs
Expand Down Expand Up @@ -395,11 +400,27 @@ def apply(self, param_idx: Optional[int] = None):
self._check_regen_param_vals(idx)

with torch.no_grad():
# In the case of forward-pass-only masks (Top-KAST, Movement
# pruning), the mask is applied on the forward pass and
# reverted on the backward pass. At the same time, every time the
# mask is applied, we store the previous values in
# _params_unmasked. So long as we alternate forward and backward
# passes (i.e., during training), this works fine. However, if
# we only do forward passes (i.e., during testing/validation),
# we can override the unmasked parameters with sparse ones. To
# prevent this, only update the unmasked params cache when the
# mask is applied for the first time since it was removed.
#
# Note that there is an assumption here that the weights do not
# change when the mask is applied (which is satisfied during
# training, since the mask is removed on every backward pass).
if self._store_unmasked:
self._params_unmasked[idx] = self._params[idx].data.mul(
1 - self._param_masks[idx] # inverted mask
)
if not self._mask_applied[idx]:
self._params_unmasked[idx] = self._params[idx].data.mul(
1 - self._param_masks[idx] # inverted mask
)
self._params[idx].data.mul_(self._param_masks[idx])
self._mask_applied[idx] = True

def reset(self):
"""
Expand Down Expand Up @@ -429,7 +450,8 @@ def pruning_end(self, leave_enabled: bool):
if not leave_enabled:
self.enabled = False
self._allow_reintroduction = False
self.apply() # ensure that weights are pruned to final level
if not self._mask_gradients_only:
self.apply() # ensure that weights are pruned to final level
if self._scorer:
self._scorer.on_pruning_end()

Expand Down Expand Up @@ -501,15 +523,20 @@ def _check_regen_param_vals(self, param_idx: int = None):

def _create_hooks(self):
for idx, (param, layer) in enumerate(zip(self._params, self._layers)):
if self._forward_hooks[idx] is None:
self._forward_hooks[idx] = layer.register_forward_pre_hook(
partial(self._hook_mask_forward, idx)
)
if not self._mask_gradients_only:
if self._forward_hooks[idx] is None:
self._forward_hooks[idx] = layer.register_forward_pre_hook(
partial(self._hook_mask_forward, idx)
)

if self._allow_reintroduction and self._undo_mask_hooks[idx] is None:
self._undo_mask_hooks[idx] = layer.register_forward_hook(
partial(self._hook_undo_mask, idx)
)
if (
self._allow_reintroduction
and self._undo_mask_hooks[idx] is None
and not self._mask_gradients_only
):
self._undo_mask_hooks[idx] = layer.register_full_backward_hook(
partial(self._hook_undo_mask, idx)
)

if self._gradient_hooks[idx] is None:
self._gradient_hooks[idx] = param.register_hook(
Expand All @@ -536,23 +563,24 @@ def _delete_hooks(self):
def _hook_mask_forward(
self, param_idx: int, mod: Module, inp: Union[Tensor, Tuple[Tensor]]
):
self.apply(param_idx)
with torch.no_grad():
self.apply(param_idx)

def _hook_undo_mask(self, param_idx, module, inp, out):
if self._allow_reintroduction:
with torch.no_grad():
self._params[param_idx].data.add_(self._params_unmasked[param_idx])
self._mask_applied[param_idx] = False

def _hook_mask_gradient(self, param_idx, grad):
if 0.0 <= self._track_grad_mom < 1.0:
self._params_grad[param_idx].mul_(self._track_grad_mom).add_(
(1.0 - self._track_grad_mom) * grad
)

return (
grad.mul_(self._param_masks[param_idx])
if not self._allow_reintroduction
else grad # do not mask gradient for movement pruning
if self._mask_gradients_only or not self._allow_reintroduction
else grad
)

def _setup_params_init(self):
Expand Down
Loading

0 comments on commit c3e78a0

Please sign in to comment.