From 6bd286bc8953c497401eeca1321013080e93d278 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Thu, 22 Dec 2022 15:09:18 +0800 Subject: [PATCH 01/59] Copybook --- .../pruning/mmcls/dmcp/dmcp_resnet_8xb32.py | 62 +++ mmrazor/engine/hooks/__init__.py | 4 +- mmrazor/engine/hooks/dmcp_subnet_hook.py | 62 +++ mmrazor/models/algorithms/pruning/__init__.py | 3 +- mmrazor/models/algorithms/pruning/dmcp.py | 403 ++++++++++++++++++ .../dynamic_ops/bricks/__init__.py | 32 +- .../dynamic_ops/bricks/dynamic_norm.py | 168 +++----- mmrazor/models/mutables/__init__.py | 5 +- mmrazor/models/mutables/derived_mutable.py | 2 +- .../mutables/mutable_channel/__init__.py | 5 +- .../mutable_channel_container.py | 5 +- .../sequential_mutable_channel.py | 8 +- .../mutable_channel/simple_mutable_channel.py | 12 +- .../mutable_channel/units/__init__.py | 4 +- .../mutable_channel/units/channel_unit.py | 94 ++-- .../units/dmcp_channel_unit.py | 58 +++ .../units/mutable_channel_unit.ipynb | 149 ++++++- .../units/mutable_channel_unit.py | 57 ++- .../units/sequential_mutable_channel_unit.py | 11 +- .../mutables/mutable_channel/units/utils.py | 80 ---- mmrazor/models/mutators/__init__.py | 5 +- .../mutators/channel_mutator/__init__.py | 3 +- .../channel_mutator/channel_mutator.ipynb | 26 +- .../channel_mutator/channel_mutator.py | 54 +-- .../channel_mutator/dcff_channel_mutator.py | 5 +- .../channel_mutator/dmcp_channel_mutator.py | 176 ++++++++ .../one_shot_channel_mutator.py | 22 +- .../slimmable_channel_mutator.py | 5 +- mmrazor/models/mutators/group_mixin.py | 68 +-- .../value_mutator/dynamic_value_mutator.py | 1 - .../counters/flops_params_counter.py | 4 +- .../counters/op_counters/__init__.py | 9 +- .../op_counters/conv_layer_counter.py | 44 ++ .../op_counters/linear_layer_counter.py | 5 + .../op_counters/norm_layer_counter.py | 17 + 35 files changed, 1239 insertions(+), 429 deletions(-) create mode 100644 configs/pruning/mmcls/dmcp/dmcp_resnet_8xb32.py create mode 100644 mmrazor/engine/hooks/dmcp_subnet_hook.py create mode 100644 mmrazor/models/algorithms/pruning/dmcp.py create mode 100644 mmrazor/models/mutables/mutable_channel/units/dmcp_channel_unit.py delete mode 100644 mmrazor/models/mutables/mutable_channel/units/utils.py create mode 100644 mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py diff --git a/configs/pruning/mmcls/dmcp/dmcp_resnet_8xb32.py b/configs/pruning/mmcls/dmcp/dmcp_resnet_8xb32.py new file mode 100644 index 000000000..5104181ba --- /dev/null +++ b/configs/pruning/mmcls/dmcp/dmcp_resnet_8xb32.py @@ -0,0 +1,62 @@ +_base_ = [ + 'mmcls::_base_/datasets/imagenet_bs32.py', + 'mmcls::_base_/schedules/imagenet_bs256.py', + 'mmcls::_base_/default_runtime.py' +] +optim_wrapper = dict( + _delete_=True, + constructor='mmrazor.SeparateOptimWrapperConstructor', + architecture=dict( + type='OptimWrapper', + optimizer=dict(type='SGD', lr=0.025, momentum=0.9, weight_decay=3e-4), + clip_grad=dict(max_norm=5, norm_type=2)), + mutator=dict( + type='OptimWrapper', + optimizer=dict(type='Adam', lr=3e-4, weight_decay=1e-3))) + +param_scheduler = dict( + type='MultiStepLR', by_epoch=True, milestones=[30, 60, 90], gamma=0.1) + +train_cfg = dict( + by_epoch=True, + max_epochs=120, + val_interval=1) + +data_preprocessor = {'type': 'mmcls.ClsDataPreprocessor'} + +custom_hooks = [dict(type='DMCPSubnetHook')] + +# model settings +model = dict( + _scope_='mmrazor', + type='DMCP', + architecture=dict( + cfg_path='mmcls::resnet/resnet50_8xb32_in1k.py', pretrained=False), + distiller=dict( + type='ConfigurableDistiller', + teacher_recorders=dict( + fc=dict(type='ModuleOutputs', source='head.fc')), + student_recorders=dict( + fc=dict(type='ModuleOutputs', source='head.fc')), + distill_losses=dict( + loss_kl=dict(type='KLDivergence', tau=1, loss_weight=1)), + loss_forward_mappings=dict( + loss_kl=dict( + preds_S=dict(recorder='fc', from_student=True), + preds_T=dict(recorder='fc', from_student=False)))), + mutator_cfg=dict( + type='DMCPChannelMutator', + channel_unit_cfg=dict( + type='DMCPChannelUnit', default_args=dict(choice_mode='number')), + parse_cfg=dict( + type='BackwardTracer', + loss_calculator=dict(type='ImageClassifierPseudoLoss'))), + arch_start_train=10000, + step_freq=500, + distillation_times=20000, + target_flops=2000) + +model_wrapper_cfg = dict( + type='mmrazor.DMCPDDP', + broadcast_buffers=False, + find_unused_parameters=True) \ No newline at end of file diff --git a/mmrazor/engine/hooks/__init__.py b/mmrazor/engine/hooks/__init__.py index d25c7c993..64326ec56 100644 --- a/mmrazor/engine/hooks/__init__.py +++ b/mmrazor/engine/hooks/__init__.py @@ -1,6 +1,8 @@ # Copyright (c) OpenMMLab. All rights reserved. +from .dmcp_subnet_hook import DMCPSubnetHook from .dump_subnet_hook import DumpSubnetHook from .estimate_resources_hook import EstimateResourcesHook from .visualization_hook import RazorVisualizationHook -__all__ = ['DumpSubnetHook', 'EstimateResourcesHook', 'RazorVisualizationHook'] +__all__ = ['DumpSubnetHook', 'EstimateResourcesHook', 'RazorVisualizationHook', + 'DMCPSubnetHook'] diff --git a/mmrazor/engine/hooks/dmcp_subnet_hook.py b/mmrazor/engine/hooks/dmcp_subnet_hook.py new file mode 100644 index 000000000..34a6854d9 --- /dev/null +++ b/mmrazor/engine/hooks/dmcp_subnet_hook.py @@ -0,0 +1,62 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import yaml +from typing import Optional, Sequence + +from mmengine.dist import master_only +from mmengine.hooks import Hook +from mmengine.registry import HOOKS + +DATA_BATCH = Optional[Sequence[dict]] + + +@HOOKS.register_module() +class DMCPSubnetHook(Hook): + + priority = 'VERY_LOW' + + def __init__(self, + subnet_sample_num: int = 10, + **kwargs) -> None: + self.subnet_sample_num = subnet_sample_num + + def _save_subnet(self, arch_space_dict, save_path): + _cfg = dict() + for k, v in arch_space_dict.items(): + _cfg[k] = int(v) + + with open(save_path, 'w') as file: + file.write(yaml.dump(_cfg, allow_unicode=True)) + + @master_only + def after_run(self, runner): + import pdb;pdb.set_trace() + model = getattr(runner.model, 'module', runner.model) + runner.logger.info('Sampling...') + + num_sample = self.subnet_sample_num + root_dir = os.path.join(runner.work_dir, 'model_sample') + target_flops = model.target_flops + + if not os.path.exists(root_dir): + os.makedirs(root_dir) + + for i in range(num_sample + 1): + cur_flops = target_flops * 10 + while cur_flops > target_flops * 1.02 or \ + cur_flops < target_flops * 0.98: + model.set_subnet(mode='direct', arch_train=False) + cur_flops = model.mutator.calc_current_flops(model) + + if i == num_sample: + model.set_subnet(mode='expected', arch_train=False) + save_path = os.path.join(root_dir, 'excepted_ch.yaml') + runner.logger.info( + f'Excepted sample(ES) arch with FlOP(MB):{cur_flops}') + else: + save_path = os.path.join(root_dir, + 'subnet_{}.yaml'.format(i + 1)) + runner.logger.info( + f'Driect sample(DS) arch with FlOP(MB): {cur_flops}') + self._save_subnet(model.mutator.current_choices, save_path) + diff --git a/mmrazor/models/algorithms/pruning/__init__.py b/mmrazor/models/algorithms/pruning/__init__.py index ea7d77901..d54d32b14 100644 --- a/mmrazor/models/algorithms/pruning/__init__.py +++ b/mmrazor/models/algorithms/pruning/__init__.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. from .dcff import DCFF +from .dmcp import DMCP, DMCPDDP from .slimmable_network import SlimmableNetwork, SlimmableNetworkDDP -__all__ = ['SlimmableNetwork', 'SlimmableNetworkDDP', 'DCFF'] +__all__ = ['SlimmableNetwork', 'SlimmableNetworkDDP', 'DCFF', 'DMCP', 'DMCPDDP'] diff --git a/mmrazor/models/algorithms/pruning/dmcp.py b/mmrazor/models/algorithms/pruning/dmcp.py new file mode 100644 index 000000000..2b5d6baae --- /dev/null +++ b/mmrazor/models/algorithms/pruning/dmcp.py @@ -0,0 +1,403 @@ +import os +from typing import Dict, List, Optional, Tuple, Union + +import random +from typing_extensions import Self +import torch +from mmengine import MMLogger +from mmengine.model import BaseModel, MMDistributedDataParallel +from mmengine.optim import OptimWrapper +from mmengine.structures import BaseDataElement +from torch import nn + +from mmrazor.models.distillers import ConfigurableDistiller +from mmrazor.models.mutators.base_mutator import BaseMutator +from mmrazor.models.utils import add_prefix +from mmrazor.registry import MODEL_WRAPPERS, MODELS +from mmrazor.utils import ValidFixMutable +from mmrazor.structures.subnet.fix_subnet import _dynamic_to_static +from ..base import BaseAlgorithm + +VALID_MUTATOR_TYPE = Union[BaseMutator, Dict] +VALID_MUTATORS_TYPE = Dict[str, Union[BaseMutator, Dict]] +VALID_DISTILLER_TYPE = Union[ConfigurableDistiller, Dict] + +from mmrazor.models.mutators import DMCPChannelMutator +from mmrazor.models.mutators import ChannelMutator +from .ite_prune_algorithm import ItePruneAlgorithm, ItePruneConfigManager + +LossResults = Dict[str, torch.Tensor] +TensorResults = Union[Tuple[torch.Tensor], torch.Tensor] +PredictResults = List[BaseDataElement] +ForwardResults = Union[LossResults, TensorResults, PredictResults] + +@MODELS.register_module() +class DMCP(ItePruneAlgorithm): + + def __init__(self, + # mutators: VALID_MUTATORS_TYPE, + distiller: VALID_DISTILLER_TYPE, + architecture: Union[BaseModel, Dict], + mutator_cfg: Union[Dict, DMCPChannelMutator] = dict( + type=' DMCPChannelMutator', + channel_unit_cfg=dict(type='DMCPChannelUnit')), + fix_subnet: Optional[ValidFixMutable] = None, + data_preprocessor: Optional[Union[Dict, nn.Module]] = None, + strategy: List = ['max', 'min', 'scheduled_random', 'arch_random'], + init_cfg: Optional[Dict] = None, + target_pruning_ratio: Optional[Dict[str, float]] = None, + arch_start_train=10000, # arch_start_train_iter + step_freq=500, # arch_train_freq + distillation_times=2000, # distillation_start_train_iter + target_flops=150, # MFLOPs + flops_loss_type: str = 'log_l1', + flop_loss_weight: float = 1.0, + linear_schedule=False, + is_deployed=False) -> None: + super().__init__(architecture, mutator_cfg, data_preprocessor, + target_pruning_ratio, step_freq, + init_cfg, linear_schedule) + + self.arch_start_train = arch_start_train + self.strategy = strategy + self.distillation_times = distillation_times + self.target_flops = target_flops + + self.samples = len([s for s in self.strategy if 'random' in s]) + self.is_supernet = True if len(self.strategy) > 1 else False + self.distiller = self._build_distiller(distiller) + self.distiller.prepare_from_teacher(self.architecture) + self.distiller.prepare_from_student(self.architecture) + + self.flops_loss_type = flops_loss_type + self.flop_loss_weight = flop_loss_weight + self.cur_sample_prob = 1.0 + self.arch_train = False + + if fix_subnet: + # Avoid circular import + from mmrazor.structures import load_fix_subnet + + # According to fix_subnet, delete the unchosen part of supernet + load_fix_subnet(self.architecture, fix_subnet) + self.is_supernet = False + self.is_deployed = is_deployed + if (self.is_deployed): + # To static ops for loaded pruned network. + self._deploy() + + def _deploy(self): + config = self.prune_config_manager.prune_at(self._iter) + self.mutator.set_choices(config) + self.mutator.fix_channel_mutables() + self._fix_archtecture() + _dynamic_to_static(self.architecture) + self.is_deployed = True + + def _build_mutator(self, mutator: VALID_MUTATOR_TYPE) -> BaseMutator: + """build mutator.""" + if isinstance(mutator, dict): + mutator = MODELS.build(mutator) + if not isinstance(mutator, BaseMutator): + raise TypeError('mutator should be a `dict` or ' + '`OneShotModuleMutator` instance, but got ' + f'{type(mutator)}') + + return mutator + + def _build_distiller( + self, distiller: VALID_DISTILLER_TYPE) -> ConfigurableDistiller: + if isinstance(distiller, dict): + distiller = MODELS.build(distiller) + if not isinstance(distiller, ConfigurableDistiller): + raise TypeError('distiller should be a `dict` or ' + '`ConfigurableDistiller` instance, but got ' + f'{type(distiller)}') + + return distiller + + def set_subnet(self, mode, arch_train=None) -> None: + """Set subnet by 'max' 'min' 'random' 'direct' or 'expected.""" + assert mode in ('max', 'min', 'random', 'direct', 'expected') + if arch_train is None: + arch_train = self.arch_train + self.mutator.sample_subnet(mode, arch_train) + + def train_step(self, data: List[dict], + optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]: + + if not self.arch_train and \ + self._iter > self.arch_start_train: + self.arch_train = True + + if self.is_supernet: + def distill_step( + batch_inputs: torch.Tensor, data_samples: List[BaseDataElement] + ) -> Dict[str, torch.Tensor]: + subnet_losses = dict() + with optim_wrapper['architecture'].optim_context( + self + ), self.distiller.student_recorders: # type: ignore + hard_loss = self(batch_inputs, data_samples, mode='loss') + soft_loss = self.distiller.compute_distill_losses() + + subnet_losses.update(hard_loss) + if self._iter > self.distillation_times: + subnet_losses.update(soft_loss) + + parsed_subnet_losses, _ = self.parse_losses(subnet_losses) + optim_wrapper['architecture'].update_params(parsed_subnet_losses) + + return subnet_losses + + batch_inputs, data_samples = self.data_preprocessor( + data, True).values() + total_losses = dict() + + #update model parameters + for kind in self.strategy: + if kind in ('max'): + self.set_subnet(mode='max') + with optim_wrapper['architecture'].optim_context( + self + ), self.distiller.teacher_recorders: # type: ignore + max_subnet_losses = self( + batch_inputs, data_samples, mode='loss') + parsed_max_subnet_losses, _ = self.parse_losses( + max_subnet_losses) + optim_wrapper['architecture'].update_params(parsed_max_subnet_losses) + total_losses.update(add_prefix(max_subnet_losses, 'max_subnet')) + elif kind in ('min'): + self.set_subnet(mode='min') + min_subnet_losses = distill_step(batch_inputs, data_samples) + total_losses.update(add_prefix(min_subnet_losses, 'min_subnet')) + elif kind in ('arch_random'): + if self.arch_train: + self.set_subnet(mode='direct') + direct_subnet_losses = distill_step( + batch_inputs, data_samples) + total_losses.update( + add_prefix(direct_subnet_losses, + 'direct_subnet')) + else: + self.set_subnet(mode='random') + random_subnet_losses = distill_step( + batch_inputs, data_samples) + total_losses.update( + add_prefix(random_subnet_losses, + 'random_subnet')) + elif kind in ('scheduled_random'): + if random.uniform(0, 1) > self.cur_sample_prob\ + and self.arch_train: + self.set_subnet(mode='direct') + direct_subnet_losses = distill_step( + batch_inputs, data_samples) + total_losses.update( + add_prefix(direct_subnet_losses, + 'direct_subnet')) + else: + self.set_subnet(mode='random') + random_subnet_losses = distill_step( + batch_inputs, data_samples) + total_losses.update( + add_prefix(random_subnet_losses, + 'random_subnet')) + self.cur_sample_prob *= 0.9999 + + #update arch parameters + if self.arch_train \ + and self._iter % self.step_freq == 0: + with optim_wrapper['mutator'].optim_context(self): + optim_wrapper['mutator'].zero_grad() + mutator_loss = self._update_arch_params( + batch_inputs, data_samples, optim_wrapper, mode='loss') + total_losses.update(mutator_loss) + return total_losses + else: + return super().train_step(data, optim_wrapper) + + def _update_arch_params( + self, + inputs: torch.Tensor, + data_samples: Optional[List[BaseDataElement]], + optim_wrapper: OptimWrapper, + mode: str = 'loss') -> Dict: + arch_params_loss = dict() + self.eval() + # update arch_loss + self.set_subnet(mode='max', arch_train=True) + with optim_wrapper['mutator'].optim_context(self): + arch_loss = self(inputs, data_samples, mode=mode) + parsed_arch_loss, _ = self.parse_losses(arch_loss) + optim_wrapper['mutator'].update_params(parsed_arch_loss) + arch_params_loss.update(add_prefix(arch_loss, 'arch')) + + # update flops_loss + self.set_subnet(mode='expected', arch_train=False) + expected_flops = self.mutator.calc_current_flops(self) + flops_loss = self._compute_flops_loss(expected_flops).to( + arch_loss['loss'].device) + parsed_flops_loss, _ = self.parse_losses({'loss':flops_loss}) + optim_wrapper['mutator'].update_params(parsed_flops_loss) + arch_params_loss.update(add_prefix({'loss':flops_loss}, 'flops')) + self.train() + return arch_params_loss + + def _compute_flops_loss(self, expected_flops): + """Calculation of loss functions of arch parameters. + + Calculate the difference between the expected FLOPs and the + target FLOPs in the units of M. + """ + flops_error = expected_flops - self.target_flops + + if self.flops_loss_type == 'l2': + floss = torch.pow(flops_error, 2) + elif self.flops_loss_type == 'inverted_log_l1': + floss = -torch.log(1 / (flops_error + 1e-5)) + elif self.flops_loss_type == 'log_l1': + if abs(flops_error) > 200: + ratio = 0.1 + else: + ratio = 1.0 + # piecewise log function + lower_flops = self.target_flops * 0.95 + if expected_flops < lower_flops: + floss = torch.log(ratio * abs(flops_error)) + elif (lower_flops <= expected_flops < self.target_flops): + floss = expected_flops * 0 + else: + floss = ( + torch.log(ratio * abs(expected_flops - (lower_flops)))) + elif self.flops_loss_type == 'l1': + floss = abs(flops_error) + else: + raise NotImplementedError + return floss * self.flop_loss_weight + + + def forward(self, + inputs: torch.Tensor, + data_samples: Optional[List[BaseDataElement]] = None, + mode: str = 'loss') -> ForwardResults: + """Forward.""" + return BaseAlgorithm.forward(self, inputs, data_samples, mode) + +@MODEL_WRAPPERS.register_module() +class DMCPDDP(MMDistributedDataParallel): + + def __init__(self, + *, + device_ids: Optional[Union[List, int, torch.device]] = None, + **kwargs) -> None: + if device_ids is None: + if os.environ.get('LOCAL_RANK') is not None: + device_ids = [int(os.environ['LOCAL_RANK'])] + super().__init__(device_ids=device_ids, **kwargs) + + def train_step(self, data: List[dict], + optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]: + + if not self.module.arch_train and \ + self.module._iter > self.module.arch_start_train: + self.module.arch_train = True + + if self.module.is_supernet: + def distill_step( + batch_inputs: torch.Tensor, data_samples: List[BaseDataElement] + ) -> Dict[str, torch.Tensor]: + subnet_losses = dict() + with optim_wrapper['architecture'].optim_context( + self + ), self.module.distiller.student_recorders: # type: ignore + hard_loss = self(batch_inputs, data_samples, mode='loss') + soft_loss = self.module.distiller.compute_distill_losses() + + subnet_losses.update(hard_loss) + if self.module._iter > self.module.distillation_times: + subnet_losses.update(soft_loss) + + parsed_subnet_losses, _ = self.module.parse_losses(subnet_losses) + optim_wrapper['architecture'].update_params(parsed_subnet_losses) + + return subnet_losses + + batch_inputs, data_samples = self.module.data_preprocessor( + data, True).values() + total_losses = dict() + #update model parameters + max_net_num = min_net_num = random_net_num = direct_net_num = 1 + for kind in self.module.strategy: + if kind in ('max'): + self.module.set_subnet(mode='max') + with optim_wrapper['architecture'].optim_context( + self + ), self.module.distiller.teacher_recorders: # type: ignore + max_subnet_losses = self( + batch_inputs, data_samples, mode='loss') + parsed_max_subnet_losses, _ = self.module.parse_losses( + max_subnet_losses) + optim_wrapper['architecture'].update_params(parsed_max_subnet_losses) + total_losses.update(add_prefix(max_subnet_losses, + f'max_subnet{max_net_num}')) + max_net_num += 1 + elif kind in ('min'): + self.module.set_subnet(mode='min') + min_subnet_losses = distill_step(batch_inputs, data_samples) + total_losses.update(add_prefix(min_subnet_losses, + f'min_subnet{min_net_num}')) + min_net_num += 1 + elif kind in ('arch_random'): + if self.module.arch_train: + self.module.set_subnet(mode='direct') + direct_subnet_losses = distill_step( + batch_inputs, data_samples) + total_losses.update( + add_prefix(direct_subnet_losses, + f'direct_subnet{direct_net_num}')) + direct_net_num += 1 + else: + self.module.set_subnet(mode='random') + random_subnet_losses = distill_step( + batch_inputs, data_samples) + total_losses.update( + add_prefix(random_subnet_losses, + f'random_subnet{random_net_num}')) + random_net_num += 1 + elif kind in ('scheduled_random'): + if random.uniform(0, 1) > self.module.cur_sample_prob\ + and self.module.arch_train: + self.module.set_subnet(mode='direct') + direct_subnet_losses = distill_step( + batch_inputs, data_samples) + total_losses.update( + add_prefix(direct_subnet_losses, + f'direct_subnet{direct_net_num}')) + direct_net_num += 1 + else: + self.module.set_subnet(mode='random') + random_subnet_losses = distill_step( + batch_inputs, data_samples) + total_losses.update( + add_prefix(random_subnet_losses, + f'random_subnet{random_net_num}')) + random_net_num += 1 + self.module.cur_sample_prob *= 0.9999 + + with optim_wrapper['mutator'].optim_context(self): + optim_wrapper['mutator'].zero_grad() + mutator_loss = self.module._update_arch_params( + batch_inputs, data_samples, optim_wrapper, mode='loss') + total_losses.update(mutator_loss) + + # update arch parameters + if self.module.arch_train \ + and self.module._iter % self.modqule.step_freq == 0: + with optim_wrapper['mutator'].optim_context(self): + optim_wrapper['mutator'].zero_grad() + mutator_loss = self.module._update_arch_params( + batch_inputs, data_samples, optim_wrapper, mode='loss') + total_losses.update(mutator_loss) + return total_losses + else: + return super().train_step(data, optim_wrapper) diff --git a/mmrazor/models/architectures/dynamic_ops/bricks/__init__.py b/mmrazor/models/architectures/dynamic_ops/bricks/__init__.py index 269e17516..c41dd0897 100644 --- a/mmrazor/models/architectures/dynamic_ops/bricks/__init__.py +++ b/mmrazor/models/architectures/dynamic_ops/bricks/__init__.py @@ -1,34 +1,18 @@ # Copyright (c) OpenMMLab. All rights reserved. from .dynamic_container import DynamicSequential -from .dynamic_conv import (BigNasConv2d, DynamicConv2d, - DynamicConv2dAdaptivePadding, FuseConv2d, OFAConv2d) +from .dynamic_conv import BigNasConv2d, DynamicConv2d, FuseConv2d, OFAConv2d from .dynamic_embed import DynamicPatchEmbed -from .dynamic_function import DynamicInputResizer from .dynamic_linear import DynamicLinear from .dynamic_multi_head_attention import DynamicMultiheadAttention from .dynamic_norm import (DynamicBatchNorm1d, DynamicBatchNorm2d, - DynamicBatchNorm3d, DynamicBatchNormXd, - DynamicLayerNorm, DynamicSyncBatchNorm, - SwitchableBatchNorm2d) + DynamicBatchNorm3d, DynamicLayerNorm, + SwitchableBatchNorm2d, DMCPBatchNorm2d) from .dynamic_relative_position import DynamicRelativePosition2D __all__ = [ - 'BigNasConv2d', - 'DynamicConv2d', - 'OFAConv2d', - 'DynamicLinear', - 'DynamicBatchNorm1d', - 'DynamicBatchNorm2d', - 'DynamicBatchNorm3d', - 'SwitchableBatchNorm2d', - 'DynamicSequential', - 'DynamicPatchEmbed', - 'DynamicRelativePosition2D', - 'FuseConv2d', - 'DynamicMultiheadAttention', - 'DynamicSyncBatchNorm', - 'DynamicConv2dAdaptivePadding', - 'DynamicBatchNormXd', - 'DynamicInputResizer', - 'DynamicLayerNorm', + 'BigNasConv2d', 'DynamicConv2d', 'OFAConv2d', 'DynamicLinear', + 'DynamicBatchNorm1d', 'DynamicBatchNorm2d', 'DynamicBatchNorm3d', + 'SwitchableBatchNorm2d', 'DynamicSequential', 'DynamicPatchEmbed', + 'DynamicLayerNorm', 'DynamicRelativePosition2D', 'FuseConv2d', + 'DynamicMultiheadAttention', 'DMCPBatchNorm2d' ] diff --git a/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py b/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py index c249fadca..fbdde7316 100644 --- a/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py +++ b/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py @@ -1,19 +1,19 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Any, Dict, List, Optional +from typing import Any, Callable, Dict, List, Optional, Tuple +from functools import partial import torch import torch.nn as nn import torch.nn.functional as F -from mmengine.model.utils import _BatchNormXd from torch import Tensor from torch.nn import LayerNorm -from torch.nn.modules._functions import SyncBatchNorm as sync_batch_norm from torch.nn.modules.batchnorm import _BatchNorm from mmrazor.models.mutables.base_mutable import BaseMutable from mmrazor.registry import MODELS from ..mixins import DynamicBatchNormMixin, DynamicLayerNormMixin +PartialType = Callable[[Any, Optional[nn.Parameter]], Tuple] class _DynamicBatchNorm(_BatchNorm, DynamicBatchNormMixin): """Dynamic BatchNormxd OP. @@ -257,118 +257,70 @@ def _check_input_dim(self, input: Tensor) -> None: input.dim())) -class DynamicSyncBatchNorm(nn.SyncBatchNorm, DynamicBatchNormMixin): - """DynamicOp for sync bn.""" +@MODELS.register_module() +class DMCPBatchNorm2d(DynamicBatchNorm2d): + + accepted_mutable_attrs = {'num_features'} - def __init__(self, - num_features: int, - eps: float = 0.00001, - momentum: float = 0.1, - affine: bool = True, - track_running_stats: bool = True, - process_group: Optional[Any] = None) -> None: - super().__init__(num_features, eps, momentum, affine, - track_running_stats, process_group) + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) self.mutable_attrs: Dict[str, Optional[BaseMutable]] = nn.ModuleDict() @classmethod - def convert_from(cls, module): - return cls(module.num_features, module.eps, module.momentum, - module.affine, module.track_running_stats, - module.process_group) + def convert_from(cls, module: _BatchNorm): + """Convert a _BatchNorm module to a DynamicBatchNorm. - @property - def static_op_factory(self): - return nn.SyncBatchNorm + Args: + module (:obj:`torch.nn._BatchNorm`): The original BatchNorm module. + """ + dynamic_bn = cls( + num_features=module.num_features, + eps=module.eps, + momentum=module.momentum, + affine=module.affine, + track_running_stats=module.track_running_stats) + return dynamic_bn - def forward(self, input: Tensor) -> Tensor: - # currently only GPU input is supported - if not input.is_cuda: - raise ValueError( - 'SyncBatchNorm expected input tensor to be on GPU') + def forward(self, + input: Tensor, + arch_param = None, + arch_attr = None): + # arch_param: Optional[nn.Parameter] = None, + # arch_attr: Optional[Tuple] = None) -> Tensor: + out = self.forward_batchnorm(input) + if arch_param is not None: + out = self.forward_arch_param(out, arch_param, arch_attr) + return out + def forward_batchnorm(self, input: Tensor) -> Tensor: + """Forward of dynamic BatchNormxd OP.""" self._check_input_dim(input) - if hasattr(self, '_check_non_zero_input_channels'): - self._check_non_zero_input_channels(input) - # exponential_average_factor is set to self.momentum - # (when it is available) only so that it gets updated - # in ONNX graph when this node is exported to ONNX. if self.momentum is None: exponential_average_factor = 0.0 else: exponential_average_factor = self.momentum if self.training and self.track_running_stats: - assert self.num_batches_tracked is not None - self.num_batches_tracked.add_(1) - if self.momentum is None: # use cumulative moving average - exponential_average_factor = (1.0 / - self.num_batches_tracked.item()) - else: # use exponential moving average - exponential_average_factor = self.momentum - r""" - Decide whether the mini-batch stats should be used for normalization - rather than the buffers. - Mini-batch stats are used in training mode, and in eval mode when - buffers are None. - """ + if self.num_batches_tracked is not None: # type: ignore + self.num_batches_tracked = \ + self.num_batches_tracked + 1 # type: ignore + if self.momentum is None: # use cumulative moving average + exponential_average_factor = 1.0 / float( + self.num_batches_tracked) + else: # use exponential moving average + exponential_average_factor = self.momentum + if self.training: bn_training = True else: bn_training = (self.running_mean is None) and (self.running_var is None) - r""" - Buffers are only updated if they are to be tracked and we are in - training mode. Thus they only need to be - passed when the update should occur (i.e. in training mode when - they are tracked), or when buffer stats are - used for normalization (i.e. in eval mode when buffers are not None). - """ - # If buffers are not to be tracked, ensure that they won't be updated - running_mean = ( - self.running_mean - if not self.training or self.track_running_stats else None) - running_var = ( - self.running_var - if not self.training or self.track_running_stats else None) - - # Don't sync batchnorm stats in inference mode (model.eval()). - need_sync = (bn_training and self.training) - if need_sync: - process_group = torch.distributed.group.WORLD - if self.process_group: - process_group = self.process_group - world_size = torch.distributed.get_world_size(process_group) - need_sync = world_size > 1 running_mean, running_var, weight, bias = self.get_dynamic_params() - # fallback to framework BN when synchronization is not necessary - if not need_sync: - out = F.batch_norm( - input, - running_mean, - running_var, - weight, - bias, - bn_training, - exponential_average_factor, - self.eps, - ) - else: - assert bn_training - out = sync_batch_norm.apply( - input, - weight, - bias, - running_mean, - running_var, - self.eps, - exponential_average_factor, - process_group, - world_size, - ) + out = F.batch_norm(input, running_mean, running_var, weight, bias, + bn_training, exponential_average_factor, self.eps) # copy changed running statistics if self.training and self.track_running_stats: @@ -377,14 +329,32 @@ def forward(self, input: Tensor) -> Tensor: self.running_var.masked_scatter_(out_mask, running_var) return out + + def forward_arch_param(self, input: Tensor, arch_param, arch_attr): + size_x = input.size() + (group_size, num_groups, min_ch) = arch_attr + if num_groups == 0 or size_x[1] == min_ch: + return input -class DynamicBatchNormXd(_DynamicBatchNorm): - """Dynamic op for _DynamicBatchNorm.""" + arch = torch.clamp(arch_param, min=0) + prob_distribute = torch.exp(-arch) - @property - def static_op_factory(self): - return _BatchNormXd + prob = torch.cumprod(prob_distribute, dim=0).view(num_groups, 1) + tp_x = input.transpose(0, 1).contiguous() + tp_group_x = tp_x[min_ch:] + + size_tp_group = tp_group_x.size() + num_groups = size_tp_group[0] // group_size + tp_group_x = tp_group_x.view(num_groups, -1) * prob[:num_groups] + tp_group_x = tp_group_x.view(size_tp_group) + + out = torch.cat([tp_x[:min_ch], + tp_group_x]).transpose(0, 1).contiguous() + return out - def _check_input_dim(self, input: torch.Tensor): - return + def set_forward_args(self, arch_param: nn.Parameter, arch_attr:Tuple) -> None: + """Interface for modifying the arch_param using partial.""" + forward_with_default_args: PartialType = \ + partial(self.forward, arch_param=arch_param, arch_attr=arch_attr) + setattr(self, 'forward', forward_with_default_args) \ No newline at end of file diff --git a/mmrazor/models/mutables/__init__.py b/mmrazor/models/mutables/__init__.py index 70315482f..3efba7e75 100644 --- a/mmrazor/models/mutables/__init__.py +++ b/mmrazor/models/mutables/__init__.py @@ -8,7 +8,8 @@ L1MutableChannelUnit, MutableChannelUnit, OneShotMutableChannelUnit, SequentialMutableChannelUnit, - SlimmableChannelUnit) + SlimmableChannelUnit, + DMCPChannelUnit) from .mutable_module import (DiffChoiceRoute, DiffMutableModule, DiffMutableOP, OneHotMutableOP, OneShotMutableModule, OneShotMutableOP) @@ -22,5 +23,5 @@ 'SimpleMutableChannel', 'MutableChannelUnit', 'SlimmableChannelUnit', 'BaseMutableChannel', 'MutableChannelContainer', 'ChannelUnitType', 'SquentialMutableChannel', 'OneHotMutableOP', 'OneShotMutableChannel', - 'BaseMutable', 'DCFFChannelUnit' + 'BaseMutable', 'DCFFChannelUnit', 'DMCPChannelUnit' ] diff --git a/mmrazor/models/mutables/derived_mutable.py b/mmrazor/models/mutables/derived_mutable.py index ac8a8c60a..5a3f9abb9 100644 --- a/mmrazor/models/mutables/derived_mutable.py +++ b/mmrazor/models/mutables/derived_mutable.py @@ -82,7 +82,7 @@ def _divide_and_divise(x: int, ratio: int, divisor: int = 8) -> int: """Helper function for divide and divise.""" new_x = x // ratio - return make_divisible(new_x, divisor) # type: ignore + return make_divisible(new_x, divisor) def _divide_choice_fn(mutable: MutableProtocol, diff --git a/mmrazor/models/mutables/mutable_channel/__init__.py b/mmrazor/models/mutables/mutable_channel/__init__.py index 618766e4e..da4e1370b 100644 --- a/mmrazor/models/mutables/mutable_channel/__init__.py +++ b/mmrazor/models/mutables/mutable_channel/__init__.py @@ -6,12 +6,13 @@ from .simple_mutable_channel import SimpleMutableChannel from .units import (ChannelUnitType, DCFFChannelUnit, L1MutableChannelUnit, MutableChannelUnit, OneShotMutableChannelUnit, - SequentialMutableChannelUnit, SlimmableChannelUnit) + SequentialMutableChannelUnit, SlimmableChannelUnit, + DMCPChannelUnit) __all__ = [ 'SimpleMutableChannel', 'L1MutableChannelUnit', 'SequentialMutableChannelUnit', 'MutableChannelUnit', 'OneShotMutableChannelUnit', 'SlimmableChannelUnit', 'BaseMutableChannel', 'MutableChannelContainer', 'SquentialMutableChannel', 'ChannelUnitType', - 'DCFFChannelUnit', 'OneShotMutableChannel' + 'DCFFChannelUnit', 'OneShotMutableChannel', 'DMCPChannelUnit' ] diff --git a/mmrazor/models/mutables/mutable_channel/mutable_channel_container.py b/mmrazor/models/mutables/mutable_channel/mutable_channel_container.py index 5706d0750..f59929b27 100644 --- a/mmrazor/models/mutables/mutable_channel/mutable_channel_container.py +++ b/mmrazor/models/mutables/mutable_channel/mutable_channel_container.py @@ -81,7 +81,7 @@ def register_mutable_channel_to_module(cls, """Register a BaseMutableChannel to a module with MutableChannelContainers.""" if end == -1: - end = mutable.current_choice + start + end = mutable.num_channels + start if is_to_output_channel: container: MutableChannelContainer = module.get_mutable_attr( 'out_channels') @@ -100,8 +100,7 @@ def _assert_mutables_valid(self): for start, end in self.mutable_channels: assert start == last_end last_end = end - assert last_end == self.num_channels, ( - f'channel mismatch: {last_end} vs {self.num_channels}') + assert last_end == self.num_channels def _fill_unregistered_range(self): """Fill with SimpleMutableChannels in the range without any stored diff --git a/mmrazor/models/mutables/mutable_channel/sequential_mutable_channel.py b/mmrazor/models/mutables/mutable_channel/sequential_mutable_channel.py index c2b4f9291..07b85f6c6 100644 --- a/mmrazor/models/mutables/mutable_channel/sequential_mutable_channel.py +++ b/mmrazor/models/mutables/mutable_channel/sequential_mutable_channel.py @@ -27,6 +27,7 @@ def __init__(self, num_channels: int, choice_mode='number', **kwargs): super().__init__(num_channels, **kwargs) assert choice_mode in ['ratio', 'number'] self.choice_mode = choice_mode + self.mask = torch.ones([self.num_channels]).bool() @property def is_num_mode(self): @@ -49,13 +50,14 @@ def current_choice(self, choice: Union[int, float]): int_choice = self._ratio2num(choice) else: int_choice = choice - self.mask.fill_(0.0) - self.mask[0:int_choice] = 1.0 + mask = torch.zeros([self.num_channels], device=self.mask.device) + mask[0:int_choice] = 1 + self.mask = mask.bool() @property def current_mask(self) -> torch.Tensor: """Return current mask.""" - return self.mask.bool() + return self.mask # methods for diff --git a/mmrazor/models/mutables/mutable_channel/simple_mutable_channel.py b/mmrazor/models/mutables/mutable_channel/simple_mutable_channel.py index 9e85f81a3..7f949890c 100644 --- a/mmrazor/models/mutables/mutable_channel/simple_mutable_channel.py +++ b/mmrazor/models/mutables/mutable_channel/simple_mutable_channel.py @@ -1,7 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Union - import torch from mmrazor.registry import MODELS @@ -20,10 +18,7 @@ class SimpleMutableChannel(BaseMutableChannel): def __init__(self, num_channels: int, **kwargs) -> None: super().__init__(num_channels, **kwargs) - mask = torch.ones([self.num_channels - ]) # save bool as float for dist training - self.register_buffer('mask', mask) - self.mask: torch.Tensor + self.mask = torch.ones(num_channels).bool() # choice @@ -35,7 +30,7 @@ def current_choice(self) -> torch.Tensor: @current_choice.setter def current_choice(self, choice: torch.Tensor): """Set current choice.""" - self.mask = choice.to(self.mask.device).float() + self.mask = choice.to(self.mask.device).bool() @property def current_mask(self) -> torch.Tensor: @@ -44,8 +39,7 @@ def current_mask(self) -> torch.Tensor: # basic extension - def expand_mutable_channel( - self, expand_ratio: Union[int, float]) -> DerivedMutable: + def expand_mutable_channel(self, expand_ratio: int) -> DerivedMutable: """Get a derived SimpleMutableChannel with expanded mask.""" def _expand_mask(): diff --git a/mmrazor/models/mutables/mutable_channel/units/__init__.py b/mmrazor/models/mutables/mutable_channel/units/__init__.py index 8cf814163..f6aa19222 100644 --- a/mmrazor/models/mutables/mutable_channel/units/__init__.py +++ b/mmrazor/models/mutables/mutable_channel/units/__init__.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. from .dcff_channel_unit import DCFFChannelUnit +from .dmcp_channel_unit import DMCPChannelUnit from .l1_mutable_channel_unit import L1MutableChannelUnit from .mutable_channel_unit import ChannelUnitType, MutableChannelUnit from .one_shot_mutable_channel_unit import OneShotMutableChannelUnit @@ -9,5 +10,6 @@ __all__ = [ 'L1MutableChannelUnit', 'MutableChannelUnit', 'SequentialMutableChannelUnit', 'OneShotMutableChannelUnit', - 'SlimmableChannelUnit', 'ChannelUnitType', 'DCFFChannelUnit' + 'SlimmableChannelUnit', 'ChannelUnitType', 'DCFFChannelUnit', + 'DMCPChannelUnit' ] diff --git a/mmrazor/models/mutables/mutable_channel/units/channel_unit.py b/mmrazor/models/mutables/mutable_channel/units/channel_unit.py index bf1bf909f..c68b1f491 100644 --- a/mmrazor/models/mutables/mutable_channel/units/channel_unit.py +++ b/mmrazor/models/mutables/mutable_channel/units/channel_unit.py @@ -5,8 +5,12 @@ import torch.nn as nn from mmengine.model import BaseModule -from mmrazor.models.architectures.dynamic_ops.mixins import DynamicChannelMixin -from mmrazor.registry import TASK_UTILS +from mmrazor.structures.graph import ModuleGraph +from mmrazor.structures.graph.channel_graph import ChannelGraph +from mmrazor.structures.graph.channel_modules import (BaseChannel, + BaseChannelUnit) +from mmrazor.structures.graph.channel_nodes import \ + default_channel_node_converter class Channel(BaseModule): @@ -21,6 +25,7 @@ class Channel(BaseModule): Channel. Defaults to None. is_output_channel (bool, optional): Is the channel output channel. Defaults to True. + expand_ratio (int, optional): Expand ratio of the mask. Defaults to 1. """ # init @@ -30,10 +35,11 @@ def __init__(self, module, index, node=None, - is_output_channel=True) -> None: + is_output_channel=True, + expand_ratio=1) -> None: super().__init__() self.name = name - self.module: nn.Module = module + self.module = module self.index = index self.start = index[0] self.end = index[1] @@ -41,6 +47,7 @@ def __init__(self, self.node = node self.is_output_channel = is_output_channel + self.expand_ratio = expand_ratio @classmethod def init_from_cfg(cls, model: nn.Module, config: Dict): @@ -49,13 +56,29 @@ def init_from_cfg(cls, model: nn.Module, config: Dict): name = config['name'] start = config['start'] end = config['end'] + expand_ratio = config['expand_ratio'] \ + if 'expand_ratio' in config else 1 is_output_channel = config['is_output_channel'] name2module = dict(model.named_modules()) name2module.pop('') module = name2module[name] if name in name2module else None return Channel( - name, module, (start, end), is_output_channel=is_output_channel) + name, + module, (start, end), + is_output_channel=is_output_channel, + expand_ratio=expand_ratio) + + @classmethod + def init_from_base_channel(cls, base_channel: BaseChannel): + """Init from a BaseChannel object.""" + return cls( + base_channel.name, + base_channel.module, + base_channel.index, + node=None, + is_output_channel=base_channel.is_output_channel, + expand_ratio=base_channel.expand_ratio) # config template @@ -66,6 +89,7 @@ def config_template(self): 'name': self.name, 'start': self.start, 'end': self.end, + 'expand_ratio': self.expand_ratio, 'is_output_channel': self.is_output_channel } @@ -79,29 +103,29 @@ def num_channels(self) -> int: @property def is_mutable(self) -> bool: """If the channel is prunable.""" - if self.module is not None: - has_prama = len(list(self.module.parameters())) != 0 - is_dynamic_op = isinstance(self.module, DynamicChannelMixin) - return (not has_prama) or is_dynamic_op - else: - is_unmutable = self.name in [ - 'input_placeholder', 'output_placeholder' - ] - return not is_unmutable + if isinstance(self.module, nn.Conv2d): + # group-wise conv + if self.module.groups != 1 and not (self.module.groups == + self.module.in_channels == + self.module.out_channels): + return False + return True def __repr__(self) -> str: return (f'{self.__class__.__name__}(' f'{self.name}, index={self.index}, ' f'is_output_channel=' f'{"true" if self.is_output_channel else "false"}, ' + f'expand_ratio={self.expand_ratio}' ')') def __eq__(self, obj: object) -> bool: - if isinstance(obj, Channel): + if isinstance(obj, BaseChannel): return self.name == obj.name \ and self.module == obj.module \ and self.index == obj.index \ and self.is_output_channel == obj.is_output_channel \ + and self.expand_ratio == obj.expand_ratio \ and self.node == obj.node else: return False @@ -161,7 +185,7 @@ def auto_fill_channel_config(channel_config: Dict, Channel.init_from_cfg(model, channel_config)) for channel_config in channels['output_related']: auto_fill_channel_config(channel_config, True) - unit.add_output_related( + unit.add_ouptut_related( Channel.init_from_cfg(model, channel_config)) return unit @@ -177,16 +201,30 @@ def init_from_channel_unit(cls, return mutable_unit @classmethod - def init_from_channel_analyzer(cls, model, analyzer=None): - """Init MutableChannelUnits from a ChannelAnalyzer.""" + def init_from_graph(cls, + graph: ModuleGraph, + unit_args={}, + num_input_channel=3) -> List['ChannelUnit']: + """Parse a module-graph and get ChannelUnits.""" + + def init_from_base_channel_unit(base_channel_unit: BaseChannelUnit): + unit = cls(len(base_channel_unit.channel_elems), **unit_args) + unit.input_related = [ + Channel.init_from_base_channel(channel) + for channel in base_channel_unit.input_related + ] + unit.output_related = [ + Channel.init_from_base_channel(channel) + for channel in base_channel_unit.output_related + ] + return unit - if analyzer is None: - from mmrazor.models.task_modules.tracer import ChannelAnalyzer - analyzer = ChannelAnalyzer() - if isinstance(analyzer, dict): - analyzer = TASK_UTILS.build(analyzer) - unit_config = analyzer.analyze(model) - return [cls.init_from_cfg(model, cfg) for cfg in unit_config.values()] + unit_graph = ChannelGraph.copy_from(graph, + default_channel_node_converter) + unit_graph.forward(num_input_channel) + units = unit_graph.collect_units() + units = [init_from_base_channel_unit(unit) for unit in units] + return units # tools @@ -221,15 +259,19 @@ def config_template(self, # node operations - def add_output_related(self, channel: Channel): + def add_ouptut_related(self, channel: Channel): """Add a Channel which is output related.""" assert channel.is_output_channel + assert self.num_channels == \ + int(channel.num_channels // channel.expand_ratio) if channel not in self.output_related: self.output_related.append(channel) def add_input_related(self, channel: Channel): """Add a Channel which is input related.""" assert channel.is_output_channel is False + assert self.num_channels == \ + int(channel.num_channels // channel.expand_ratio) if channel not in self.input_related: self.input_related.append(channel) diff --git a/mmrazor/models/mutables/mutable_channel/units/dmcp_channel_unit.py b/mmrazor/models/mutables/mutable_channel/units/dmcp_channel_unit.py new file mode 100644 index 000000000..386db923b --- /dev/null +++ b/mmrazor/models/mutables/mutable_channel/units/dmcp_channel_unit.py @@ -0,0 +1,58 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List, Union + +import torch.nn as nn + +from mmrazor.models.architectures import dynamic_ops +from mmrazor.registry import MODELS +from ..mutable_channel_container import MutableChannelContainer +from .sequential_mutable_channel_unit import SequentialMutableChannelUnit + + +class tracable_choice(): + def __init__(self, choice = None) -> None: + self.choice = choice + + def __call__(self): + return self.choice + + +@MODELS.register_module() +class DMCPChannelUnit(SequentialMutableChannelUnit): + """``DMCPChannelUnit`` is for supernet DMCP and based on + OneShotMutableChannelUnit. In DMCP supernet, each module only has one + choice. The channel choice is fixed before training. + + Args: + num_channels (int): The raw number of channels. + candidate_choices (List[Union[int, float]], optional): + A list of candidate width numbers or ratios. Each + candidate indicates how many channels to be reserved. + Defaults to [1.0](choice_mode='number'). + choice_mode (str, optional): Mode of candidates. + One of "ratio" or "number". Defaults to 'ratio'. + divisor (int): Used to make choice divisible. + min_value (int): the minimal value used when make divisible. + min_ratio (float): the minimal ratio used when make divisible. + """ + + def __init__(self, + num_channels: int, + choice_mode: str = 'number', + divisor: int = 1, + min_value: int = 1, + min_ratio: float = 0.5) -> None: + super().__init__(num_channels, choice_mode, divisor, min_value, + min_ratio) + self._traceable_choice = tracable_choice() + + def prepare_for_pruning(self, model: nn.Module): + """In ``DMCPChannelGroup`` nn.BatchNorm2d is replaced with MixedBatchNorm2d.""" + self._replace_with_dynamic_ops( + model, { + nn.Conv2d: dynamic_ops.DynamicConv2d, + nn.BatchNorm2d: dynamic_ops.DMCPBatchNorm2d, + nn.Linear: dynamic_ops.DynamicLinear + }) + self._register_channel_container(model, MutableChannelContainer) + self._register_mutable_channel(self.mutable_channel) diff --git a/mmrazor/models/mutables/mutable_channel/units/mutable_channel_unit.ipynb b/mmrazor/models/mutables/mutable_channel/units/mutable_channel_unit.ipynb index bc40d191b..5af2d496b 100644 --- a/mmrazor/models/mutables/mutable_channel/units/mutable_channel_unit.ipynb +++ b/mmrazor/models/mutables/mutable_channel/units/mutable_channel_unit.ipynb @@ -36,19 +36,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# define a model\n", "from mmengine.model import BaseModel\n", "from torch import nn\n", + "import torch\n", "from collections import OrderedDict\n", "\n", - "class MyModel(nn.Module):\n", + "class MyModel(BaseModel):\n", "\n", " def __init__(self):\n", - " super().__init__()\n", + " super().__init__(None, None)\n", " self.net = nn.Sequential(\n", " OrderedDict([('conv0', nn.Conv2d(3, 8, 3, 1, 1)),\n", " ('relu', nn.ReLU()),\n", @@ -64,9 +65,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "This model has 4 MutableChannelUnit(SequentialMutableChannelUnit).\n" + ] + } + ], "source": [ "# There are multiple types of MutableChannelUnits. Here, We take SequentialMutableChannelUnit as the example.\n", "from mmrazor.models.mutables.mutable_channel.units import SequentialMutableChannelUnit\n", @@ -74,8 +83,9 @@ "from typing import List\n", "\n", "model = MyModel()\n", + "graph = ModuleGraph.init_from_backward_tracer(model)\n", "units: List[\n", - " SequentialMutableChannelUnit] = SequentialMutableChannelUnit.init_from_channel_analyzer(model) # type: ignore\n", + " SequentialMutableChannelUnit] = SequentialMutableChannelUnit.init_from_graph(graph) # type: ignore\n", "print(\n", " f'This model has {len(units)} MutableChannelUnit(SequentialMutableChannelUnit).'\n", ")\n" @@ -83,9 +93,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SequentialMutableChannelUnit(\n", + " name=net.conv0_(0, 8)_8\n", + " (output_related): ModuleList(\n", + " (0): Channel(net.conv0, index=(0, 8), is_output_channel=true, expand_ratio=1)\n", + " )\n", + " (input_related): ModuleList(\n", + " (0): Channel(net.conv1, index=(0, 8), is_output_channel=false, expand_ratio=1)\n", + " )\n", + " (mutable_channel): SquentialMutableChannel(num_channels=8, activated_channels=8)\n", + ")\n" + ] + } + ], "source": [ "unit1=units[1]\n", "print(unit1)" @@ -131,9 +158,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The current choice of unit1 is 8.\n", + "DynamicConv2d(\n", + " 3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)\n", + " (mutable_attrs): ModuleDict(\n", + " (in_channels): MutableChannelContainer(num_channels=3, activated_channels=3)\n", + " (out_channels): MutableChannelContainer(num_channels=8, activated_channels=8)\n", + " )\n", + ")\n", + "DynamicConv2d(\n", + " 8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)\n", + " (mutable_attrs): ModuleDict(\n", + " (in_channels): MutableChannelContainer(num_channels=8, activated_channels=8)\n", + " (out_channels): MutableChannelContainer(num_channels=16, activated_channels=16)\n", + " )\n", + ")\n" + ] + } + ], "source": [ "# We run \"prepare_for_pruning\" once before pruning to run step 1 and 2 above.\n", "unit1.prepare_for_pruning(model)\n", @@ -151,9 +200,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "We get a sampled choice 2.\n", + "DynamicConv2d(\n", + " 3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)\n", + " (mutable_attrs): ModuleDict(\n", + " (in_channels): MutableChannelContainer(num_channels=3, activated_channels=3)\n", + " (out_channels): MutableChannelContainer(num_channels=8, activated_channels=2)\n", + " )\n", + ")\n", + "DynamicConv2d(\n", + " 8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)\n", + " (mutable_attrs): ModuleDict(\n", + " (in_channels): MutableChannelContainer(num_channels=8, activated_channels=2)\n", + " (out_channels): MutableChannelContainer(num_channels=16, activated_channels=16)\n", + " )\n", + ")\n" + ] + } + ], "source": [ "sampled_choice=unit1.sample_choice()\n", "print(f'We get a sampled choice {sampled_choice}.')\n", @@ -193,13 +264,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The model has 4 MutableChannelUnits.\n" + ] + } + ], "source": [ "# 1. using tracer\n", "def get_mutable_channel_units_using_tracer(model):\n", - " units = SequentialMutableChannelUnit.init_from_channel_analyzer(model)\n", + " graph = ModuleGraph.init_from_backward_tracer(model)\n", + " units = SequentialMutableChannelUnit.init_from_graph(graph)\n", " return units\n", "\n", "\n", @@ -210,9 +290,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SequentialMutableChannelUnit(\n", + " name=net.conv0_(0, 8)_8\n", + " (output_related): ModuleList(\n", + " (0): Channel(net.conv0, index=(0, 8), is_output_channel=true, expand_ratio=1)\n", + " )\n", + " (input_related): ModuleList(\n", + " (0): Channel(net.conv1, index=(0, 8), is_output_channel=false, expand_ratio=1)\n", + " )\n", + " (mutable_channel): SquentialMutableChannel(num_channels=8, activated_channels=8)\n", + ")\n" + ] + } + ], "source": [ "# 2. using config\n", "config = {\n", @@ -235,9 +332,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The model has 2 MutableChannelUnits.\n" + ] + } + ], "source": [ "# 3. using predefined model\n", "\n", @@ -286,7 +391,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.9.13 ('lab2max')", + "display_name": "Python 3.9.12 ('mmlab')", "language": "python", "name": "python3" }, @@ -300,12 +405,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.9.12" }, "orig_nbformat": 4, "vscode": { "interpreter": { - "hash": "e31a827d0913016ad78e01c7b97f787f4b9e53102dd62d238e8548bcd97ff875" + "hash": "feec882ee78c63cb8d4b485f1b52bbb873bb9a7b094435863200c7afba202382" } } }, diff --git a/mmrazor/models/mutables/mutable_channel/units/mutable_channel_unit.py b/mmrazor/models/mutables/mutable_channel/units/mutable_channel_unit.py index dabe41fab..748b2333b 100644 --- a/mmrazor/models/mutables/mutable_channel/units/mutable_channel_unit.py +++ b/mmrazor/models/mutables/mutable_channel/units/mutable_channel_unit.py @@ -4,13 +4,13 @@ from collections import Set from typing import Dict, List, Type, TypeVar -import torch import torch.nn as nn from mmrazor.models.architectures.dynamic_ops.mixins import DynamicChannelMixin from mmrazor.models.mutables import DerivedMutable from mmrazor.models.mutables.mutable_channel import (BaseMutableChannel, MutableChannelContainer) +from mmrazor.models.mutables.mutable_value import MutableValue from .channel_unit import Channel, ChannelUnit @@ -41,16 +41,6 @@ def __init__(self, num_channels: int, **kwargs) -> None: super().__init__(num_channels) - @classmethod - def init_from_cfg(cls, model: nn.Module, config: Dict): - """init a Channel using a config which can be generated by - self.config_template(), include init choice.""" - unit = super().init_from_cfg(model, config) - # TO DO: add illegal judgement here? - if 'choice' in config: - unit.current_choice = config['choice'] - return unit - @classmethod def init_from_mutable_channel(cls, mutable_channel: BaseMutableChannel): unit = cls(mutable_channel.num_channels) @@ -61,15 +51,13 @@ def init_from_predefined_model(cls, model: nn.Module): """Initialize units using the model with pre-defined dynamicops and mutable-channels.""" - def process_container(container: MutableChannelContainer, + def process_container(contanier: MutableChannelContainer, module, module_name, mutable2units, is_output=True): - for index, mutable in container.mutable_channels.items(): - derived_choices = mutable.current_choice - if isinstance(derived_choices, torch.Tensor): - derived_choices = derived_choices.sum().item() + for index, mutable in contanier.mutable_channels.items(): + expand_ratio = 1 if isinstance(mutable, DerivedMutable): source_mutables: Set = \ mutable._trace_source_mutables() @@ -80,7 +68,18 @@ def process_container(container: MutableChannelContainer, assert len(source_channel_mutables) == 1, ( 'only support one mutable channel ' 'used in DerivedMutable') - mutable = source_channel_mutables[0] + mutable = list(source_channel_mutables)[0] + + source_value_mutables = [ + mutable for mutable in source_mutables + if isinstance(mutable, MutableValue) + ] + assert len(source_value_mutables) <= 1, ( + 'only support one mutable value ' + 'used in DerivedMutable') + expand_ratio = int( + list(source_value_mutables) + [0].current_choice) if source_value_mutables else 1 if mutable not in mutable2units: mutable2units[mutable] = cls.init_from_mutable_channel( @@ -88,19 +87,21 @@ def process_container(container: MutableChannelContainer, unit: MutableChannelUnit = mutable2units[mutable] if is_output: - unit.add_output_related( + unit.add_ouptut_related( Channel( module_name, module, index, - is_output_channel=is_output)) + is_output_channel=is_output, + expand_ratio=expand_ratio)) else: unit.add_input_related( Channel( module_name, module, index, - is_output_channel=is_output)) + is_output_channel=is_output, + expand_ratio=expand_ratio)) mutable2units: Dict = {} for name, module in model.named_modules(): @@ -266,16 +267,14 @@ def _register_mutable_channel(self, mutable_channel: BaseMutableChannel): start = channel.start end = channel.end elif channel.num_channels > self.num_channels: - if channel.num_channels % self.num_channels == 0: - ratio = channel.num_channels // self.num_channels + mutable_channel_ = \ + mutable_channel.expand_mutable_channel( + channel.num_channels // self.num_channels) + start = channel.start + end = channel.end else: - ratio = channel.num_channels / self.num_channels - - mutable_channel_ = \ - mutable_channel.expand_mutable_channel(ratio) - start = channel.start - end = channel.end + raise NotImplementedError() else: raise NotImplementedError() @@ -290,7 +289,7 @@ def _register_mutable_channel(self, mutable_channel: BaseMutableChannel): mutable_channel is mutable for mutable in source_mutables ] - assert any(is_same), 'existed a mutable channel.' + assert any(is_same) else: container.register_mutable(mutable_channel_, start, end) diff --git a/mmrazor/models/mutables/mutable_channel/units/sequential_mutable_channel_unit.py b/mmrazor/models/mutables/mutable_channel/units/sequential_mutable_channel_unit.py index 89dc785ed..89a25d236 100644 --- a/mmrazor/models/mutables/mutable_channel/units/sequential_mutable_channel_unit.py +++ b/mmrazor/models/mutables/mutable_channel/units/sequential_mutable_channel_unit.py @@ -3,11 +3,7 @@ from typing import Dict, Union import torch.nn as nn -from mmcv.cnn.bricks import Conv2dAdaptivePadding from mmengine import MMLogger -from mmengine.model.utils import _BatchNormXd -from mmengine.utils.dl_utils.parrots_wrapper import \ - SyncBatchNorm as EngineSyncBatchNorm from mmrazor.models.architectures import dynamic_ops from mmrazor.models.utils import make_divisible @@ -64,14 +60,9 @@ def prepare_for_pruning(self, model: nn.Module): # register MutableMask self._replace_with_dynamic_ops( model, { - Conv2dAdaptivePadding: - dynamic_ops.DynamicConv2dAdaptivePadding, nn.Conv2d: dynamic_ops.DynamicConv2d, nn.BatchNorm2d: dynamic_ops.DynamicBatchNorm2d, - nn.Linear: dynamic_ops.DynamicLinear, - nn.SyncBatchNorm: dynamic_ops.DynamicSyncBatchNorm, - EngineSyncBatchNorm: dynamic_ops.DynamicSyncBatchNorm, - _BatchNormXd: dynamic_ops.DynamicBatchNormXd, + nn.Linear: dynamic_ops.DynamicLinear }) self._register_channel_container(model, MutableChannelContainer) self._register_mutable_channel(self.mutable_channel) diff --git a/mmrazor/models/mutables/mutable_channel/units/utils.py b/mmrazor/models/mutables/mutable_channel/units/utils.py deleted file mode 100644 index 41601ac7a..000000000 --- a/mmrazor/models/mutables/mutable_channel/units/utils.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. - -from typing import List - -import torch - -from mmrazor.models.mutables.mutable_channel.units import \ - SequentialMutableChannelUnit -from mmrazor.utils import print_log - - -def assert_model_is_changed(tensors1, tensors2): - """Return if the tensors has the same shape (length).""" - shape1 = get_shape(tensors1, only_length=True) - shape2 = get_shape(tensors2, only_length=True) - assert shape1 == shape2, f'{shape1}!={shape2}' - - -def get_shape(tensor, only_length=False): - """Get the shape of a tensor list/tuple/dict. - - Args: - tensor (Union[List,Tuple,Dict,Tensor]): input tensors. - only_length (bool, optional): If only return the length of the tensors. - Defaults to False. - """ - if isinstance(tensor, torch.Tensor): - if only_length: - return len(tensor.shape) - else: - return tensor.shape - elif isinstance(tensor, list) or isinstance(tensor, tuple): - shapes = [] - for x in tensor: - shapes.append(get_shape(x, only_length)) - return shapes - elif isinstance(tensor, dict): - shapes = {} - for key in tensor: - shapes[key] = get_shape(tensor[key], only_length) - return shapes - else: - raise NotImplementedError( - f'unsuppored type{type(tensor)} to get shape of tensors.') - - -def forward_units(model, try_units: List[SequentialMutableChannelUnit], - units: List[SequentialMutableChannelUnit], demo_input, - template_output): - """Forward a model with MutableChannelUnits and assert if the result - changed.""" - model.eval() - for unit in units: - unit.current_choice = 1.0 - for unit in try_units: - unit.current_choice = min(max(0.1, unit.sample_choice()), 0.9) - if isinstance(demo_input, dict): - tensors = model(**demo_input) - else: - tensors = model(demo_input) - assert_model_is_changed(template_output, tensors) - - -def find_mutable(model, try_units, units, demo_input, template_tensors): - """Find really mutable MutableChannelUnits in some MutableChannelUnits.""" - if len(try_units) == 0: - return [] - try: - forward_units(model, try_units, units, demo_input, template_tensors) - return try_units - except Exception: - if len(try_units) == 1: - print_log(f'Find an unmutable unit {try_units[0]}', level='debug') - return [] - else: - num = len(try_units) - return find_mutable(model, try_units[:num // 2], units, demo_input, - template_tensors) + find_mutable( - model, try_units[num // 2:], units, - demo_input, template_tensors) diff --git a/mmrazor/models/mutators/__init__.py b/mmrazor/models/mutators/__init__.py index 6a430bed3..dab7a955b 100644 --- a/mmrazor/models/mutators/__init__.py +++ b/mmrazor/models/mutators/__init__.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .channel_mutator import (ChannelMutator, DCFFChannelMutator, +from .channel_mutator import (ChannelMutator, DCFFChannelMutator, DMCPChannelMutator, OneShotChannelMutator, SlimmableChannelMutator) from .module_mutator import (DiffModuleMutator, ModuleMutator, OneShotModuleMutator) @@ -8,5 +8,6 @@ __all__ = [ 'OneShotModuleMutator', 'DiffModuleMutator', 'ModuleMutator', 'ChannelMutator', 'OneShotChannelMutator', 'SlimmableChannelMutator', - 'ValueMutator', 'DynamicValueMutator', 'DCFFChannelMutator' + 'ValueMutator', 'DynamicValueMutator', 'DCFFChannelMutator', + 'DMCPChannelMutator' ] diff --git a/mmrazor/models/mutators/channel_mutator/__init__.py b/mmrazor/models/mutators/channel_mutator/__init__.py index 3b64c1cf8..6bc1a953f 100644 --- a/mmrazor/models/mutators/channel_mutator/__init__.py +++ b/mmrazor/models/mutators/channel_mutator/__init__.py @@ -1,10 +1,11 @@ # Copyright (c) OpenMMLab. All rights reserved. from .channel_mutator import ChannelMutator from .dcff_channel_mutator import DCFFChannelMutator +from .dmcp_channel_mutator import DMCPChannelMutator from .one_shot_channel_mutator import OneShotChannelMutator from .slimmable_channel_mutator import SlimmableChannelMutator __all__ = [ 'SlimmableChannelMutator', 'ChannelMutator', 'OneShotChannelMutator', - 'DCFFChannelMutator' + 'DCFFChannelMutator', 'DMCPChannelMutator' ] diff --git a/mmrazor/models/mutators/channel_mutator/channel_mutator.ipynb b/mmrazor/models/mutators/channel_mutator/channel_mutator.ipynb index 58b56c783..9fe1010c4 100644 --- a/mmrazor/models/mutators/channel_mutator/channel_mutator.ipynb +++ b/mmrazor/models/mutators/channel_mutator/channel_mutator.ipynb @@ -27,16 +27,7 @@ "cell_type": "code", "execution_count": 24, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/liukai/miniconda3/envs/lab2max/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], + "outputs": [], "source": [ "# define a model\n", "from mmengine.model import BaseModel\n", @@ -44,10 +35,10 @@ "import torch\n", "from collections import OrderedDict\n", "\n", - "class MyModel(nn.Module):\n", + "class MyModel(BaseModel):\n", "\n", " def __init__(self):\n", - " super().__init__()\n", + " super().__init__(None, None)\n", " self.net = nn.Sequential(\n", " OrderedDict([('conv0', nn.Conv2d(3, 8, 3, 1, 1)),\n", " ('relu', nn.ReLU()),\n", @@ -79,8 +70,6 @@ "name": "stdout", "output_type": "stream", "text": [ - "11/14 14:24:13 - mmengine - \u001b[5m\u001b[4m\u001b[33mWARNING\u001b[0m - add a input before net.conv0(net.conv0), error: net.conv0(net.conv0)\n", - "11/14 14:24:13 - mmengine - \u001b[5m\u001b[4m\u001b[33mWARNING\u001b[0m - add a output after head(head), error: head(head)\n", "The mutator has 2 mutable channel units.\n" ] } @@ -97,7 +86,8 @@ " units={},\n", " ),\n", " parse_cfg=dict(\n", - " type='ChannelAnalyzer'))\n", + " type='BackwardTracer',\n", + " loss_calculator=dict(type='ImageClassifierPseudoLoss')))\n", "# init the ChannelMutator object with a model\n", "mutator.prepare_from_supernet(model)\n", "print(f'The mutator has {len(mutator.mutable_units)} mutable channel units.')" @@ -347,7 +337,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.9.13 ('lab2max')", + "display_name": "Python 3.6.8 64-bit", "language": "python", "name": "python3" }, @@ -361,12 +351,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.6.8" }, "orig_nbformat": 4, "vscode": { "interpreter": { - "hash": "e31a827d0913016ad78e01c7b97f787f4b9e53102dd62d238e8548bcd97ff875" + "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" } } }, diff --git a/mmrazor/models/mutators/channel_mutator/channel_mutator.py b/mmrazor/models/mutators/channel_mutator/channel_mutator.py index 71db1cd43..7ced693b9 100644 --- a/mmrazor/models/mutators/channel_mutator/channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/channel_mutator.py @@ -5,16 +5,21 @@ from mmengine import fileio from torch.nn import Module, ModuleList +from mmrazor.models.architectures.dynamic_ops import DynamicChannelMixin from mmrazor.models.mutables import (ChannelUnitType, MutableChannelUnit, SequentialMutableChannelUnit) from mmrazor.models.mutables.mutable_channel.units.channel_unit import \ ChannelUnit -from mmrazor.models.task_modules.tracer.channel_analyzer import ChannelAnalyzer -from mmrazor.registry import MODELS, TASK_UTILS +from mmrazor.registry import MODELS +from mmrazor.structures.graph import ModuleGraph from ..base_mutator import BaseMutator from ..group_mixin import GroupMixin +def is_dynamic_op_for_fx_tracer(module, name): + return isinstance(module, DynamicChannelMixin) + + @MODELS.register_module() class ChannelMutator(BaseMutator, Generic[ChannelUnitType], GroupMixin): """ChannelMutator manages the pruning structure of a model. @@ -41,10 +46,8 @@ class ChannelMutator(BaseMutator, Generic[ChannelUnitType], GroupMixin): parse_cfg (Dict, optional): The config to parse the model. Defaults to - dict( - type='ChannelAnalyzer', - demo_input=(1, 3, 224, 224), - tracer_type='BackwardTracer') + dict( type='BackwardTracer', + loss_calculator=dict(type='ImageClassifierPseudoLoss')). custom_groups (list[list[str]], optional): User-defined search groups. All searchable modules that are not in ``custom_group`` will be @@ -56,8 +59,7 @@ class ChannelMutator(BaseMutator, Generic[ChannelUnitType], GroupMixin): Note: There are three ways used in ChannelMutator to parse a model and get MutableChannelUnits. - 1. Using tracer. It needs parse_cfg to be the config of the - ChannelAnalyzer. + 1. Using tracer. It needs parse_cfg to be the config of a tracer. 2. Using config. When parse_cfg['type']='Config'. It needs that channel_unit_cfg['unit']['xxx_unit_name] has a key 'channels'. 3. Using the model with pre-defined dynamic-ops and mutablechannels: @@ -71,9 +73,8 @@ def __init__(self, dict, Type[MutableChannelUnit]] = SequentialMutableChannelUnit, parse_cfg: Dict = dict( - type='ChannelAnalyzer', - demo_input=(1, 3, 224, 224), - tracer_type='BackwardTracer'), + type='BackwardTracer', + loss_calculator=dict(type='ImageClassifierPseudoLoss')), custom_groups: Optional[List[List[str]]] = None, init_cfg: Optional[Dict] = None) -> None: @@ -82,7 +83,7 @@ def __init__(self, # tracer if isinstance(parse_cfg, dict): assert parse_cfg['type'] in [ - 'ChannelAnalyzer', 'Config', 'Predefined' + 'RazorFxTracer', 'BackwardTracer', 'Config', 'Predefined' ] self.parse_cfg = parse_cfg @@ -107,10 +108,10 @@ def prepare_from_supernet(self, supernet: Module) -> None: 1. parse the model and get MutableChannelUnits. 2. call unit.prepare_for_pruning for each unit. """ + self._name2module = dict(supernet.named_modules()) - if isinstance(self.parse_cfg, - ChannelAnalyzer) or 'Analyzer' in self.parse_cfg['type']: + if 'Tracer' in self.parse_cfg['type']: units = self._prepare_from_tracer(supernet, self.parse_cfg) elif self.parse_cfg['type'] == 'Config': units = self._prepare_from_cfg(supernet, self.units_cfg) @@ -212,7 +213,7 @@ def current_choices(self) -> Dict: return current_choices - def sample_choices(self, kind: str = 'random') -> Dict[int, Any]: + def sample_choices(self) -> Dict[int, Any]: """Sampling by search groups. The sampling result of the first mutable of each group is the sampling @@ -221,7 +222,6 @@ def sample_choices(self, kind: str = 'random') -> Dict[int, Any]: Returns: Dict[int, Any]: Random choices dict. """ - assert kind == 'random', f'unsupported the {kind} sample method.' random_choices = dict() for group_id, modules in self.search_groups.items(): random_choices[group_id] = modules[0].sample_choice() @@ -319,18 +319,20 @@ def _parse_channel_unit_cfg( def _prepare_from_tracer(self, model: Module, parse_cfg: Dict): """Initialize units using a tracer.""" - - if isinstance(parse_cfg, Dict): - tracer: ChannelAnalyzer = TASK_UTILS.build(parse_cfg) + if 'num_input_channel' in parse_cfg: + num_input_channel = parse_cfg.pop('num_input_channel') else: - tracer = parse_cfg - unit_configs = tracer.analyze(model) - + num_input_channel = 3 + if self.parse_cfg['type'] == 'BackwardTracer': + graph = ModuleGraph.init_from_backward_tracer(model, parse_cfg) + elif self.parse_cfg['type'] == 'RazorFxTracer': + graph = ModuleGraph.init_from_fx_tracer(model, fx_tracer=parse_cfg) + else: + raise NotImplementedError() + self._graph = graph # get ChannelUnits - units = [ - ChannelUnit.init_from_cfg(model, cfg) - for cfg in unit_configs.values() - ] + units = ChannelUnit.init_from_graph( + graph, num_input_channel=num_input_channel) # convert to MutableChannelUnits units = self._convert_channel_unit_to_mutable(units) return units diff --git a/mmrazor/models/mutators/channel_mutator/dcff_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/dcff_channel_mutator.py index 5994eade4..8dd335bff 100644 --- a/mmrazor/models/mutators/channel_mutator/dcff_channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/dcff_channel_mutator.py @@ -25,9 +25,8 @@ def __init__(self, channel_unit_cfg: Union[dict, Type[ChannelUnitType]] = dict( type='DCFFChannelUnit', units={}), parse_cfg=dict( - type='ChannelAnalyzer', - demo_input=(1, 3, 224, 224), - tracer_type='BackwardTracer'), + type='BackwardTracer', + loss_calculator=dict(type='ImageClassifierPseudoLoss')), **kwargs) -> None: super().__init__(channel_unit_cfg, parse_cfg, **kwargs) diff --git a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py new file mode 100644 index 000000000..a6cfcd416 --- /dev/null +++ b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py @@ -0,0 +1,176 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Any, Dict, Type, Union +import random + +import torch +import torch.nn as nn +from torch.nn import Module, ModuleList +from mmrazor.models.mutables import DMCPChannelUnit +from mmrazor.registry import MODELS +from .channel_mutator import ChannelMutator, ChannelUnitType +from ...architectures import DMCPBatchNorm2d +from ...task_modules.estimators import ResourceEstimator + +@MODELS.register_module() +class DMCPChannelMutator(ChannelMutator[DMCPChannelUnit]): + """DMCP channel mutable based channel mutator. It uses DMCPPChannelUnit. + + Args: + channel_unit_cfg (Union[dict, Type[ChannelUnitType]], optional): + Config of MutableChannelUnits. Defaults to + dict( type='DMCPPChannelUnit', units={}). + parse_cfg (Dict): The config of the tracer to parse the model. + Defaults to dict( type='BackwardTracer', + loss_calculator=dict(type='ImageClassifierPseudoLoss')). + Change loss_calculator according to task and backbone. + pruning_cfg (Tuple): (min_sample_rate, max_sample_rate, sample_offset)). + """ + + def __init__(self, + channel_unit_cfg: Union[dict, Type[ChannelUnitType]] = dict( + type='DMCPChannelUnit', units={}), + parse_cfg=dict( + type='BackwardTracer', + loss_calculator=dict(type='ImageClassifierPseudoLoss')), + pruning_cfg=(0.1, 1, 0.05), + **kwargs) -> None: + super().__init__(channel_unit_cfg, parse_cfg, **kwargs) + self.pruning_cfg = pruning_cfg + + + def prepare_from_supernet(self, supernet: Module) -> None: + """Prepare from a model for pruning. + + It includes two steps: + 1. parse the model and get MutableChannelUnits. + 2. call unit.prepare_for_pruning for each unit. + """ + super().prepare_from_supernet(supernet) + self.prepare_arch_params(supernet) + + def _build_arch_param(self, num_choices) -> nn.Parameter: + """Build learnable architecture parameters.""" + return nn.Parameter(torch.zeros(num_choices)) + + def prepare_arch_params(self, supernet): + # Associate all the op's in the model with their corresponding arch parameters + self.arch_params = nn.ParameterDict() + self._op_arch_align = dict() + self._arch_params_attr = dict() + for group_id, module in self.search_groups.items(): + arch_message = self._generate_arch_message(module[0].mutable_channel.num_channels) + self._arch_params_attr[str(group_id)] = arch_message + group_arch_param = self._build_arch_param(arch_message[1]) + self.arch_params[str(group_id)] = group_arch_param + + for m in module[0].output_related: + self._op_arch_align[str(m.name)] = str(group_id) + + # Associate all the BN in the model with their corresponding arch parameters + self._bn_arch_align = dict() + for name, module in supernet.named_modules(): + if isinstance(module, DMCPBatchNorm2d): + self._bn_arch_align[module] = self._op_arch_align[str(name)] + # prepare for calulate FLOPs with _traceable_choice + if name in self._op_arch_align.keys(): + module._traceable_choice = \ + self.search_groups[int(self._op_arch_align[str(name)])][0]._traceable_choice + + + def _generate_arch_message(self, out_channels): + """ + Define the search space of the channel according to the pruning + rate range, where the search space consists of two parts + 1. sampled by pruning rate (that is, maximum, minimum and random + pruning rate) + 2. sampled by probability + """ + (min_rate, max_rate, rate_offset) = self.pruning_cfg + + # sampled by probability + group_size = int(rate_offset * out_channels / max_rate) + num_groups = int((max_rate - min_rate) / rate_offset + 1e-4) + min_ch = out_channels - (group_size * num_groups) + assert min_ch > 0 + assert group_size * num_groups + min_ch == out_channels + + return (group_size, num_groups, min_ch) + + def modify_supernet_forward(self, arch_train): + for module, group_id in self._bn_arch_align.items(): + if arch_train: + arch_param = self.arch_params[self._bn_arch_align[module]] + arch_params_attr = self._arch_params_attr[str(group_id)] + else: + arch_param = arch_params_attr = None + module.set_forward_args(arch_param=arch_param, arch_attr=arch_params_attr) + + def sample_subnet(self, mode, arch_train): + choices = dict() + for group_id, _ in self.search_groups.items(): + choices[group_id] = self._prune_by_arch(mode, group_id) + self.set_choices(choices) + + self.modify_supernet_forward(arch_train) + + def _prune_by_arch(self, mode, group_id): + """ + Prune the output channels according to the specified mode. + + Inputs: + mode (list): one of ['max', 'min', 'random', 'direct', 'expected'] + group_id (int): number of search_groups + """ + arch_param = self.arch_params[str(group_id)] + (group_size, num_groups, min_ch) = self._arch_params_attr[str(group_id)] + + if mode == 'max': + return min_ch + group_size * num_groups + elif mode == 'min': + return min_ch + elif mode == 'random': + return min_ch + group_size * random.randint(0, num_groups) + else: + if num_groups == 0: + return min_ch + prob = torch.clamp(arch_param, min=0) + condition_prob = torch.exp(-prob) + if mode == 'direct': + direct_channel = min_ch + for i in range(num_groups): + if random.uniform(0, 1) > condition_prob[i]: + break + direct_channel += group_size + return direct_channel + elif mode == 'expected': + marginal_prob = torch.cumprod(condition_prob, dim=0) + expected_channel = (torch.sum(marginal_prob) * + group_size) + min_ch + return expected_channel + else: + raise NotImplementedError + + def calc_current_flops(self, model): + estimator = ResourceEstimator() + model = getattr(model, 'module', model) + estimation = estimator.estimate(model=model.architecture.backbone) + return estimation['flops'] + + def set_choices(self, choices: Dict[int, Any]) -> None: + """Set mutables' current choice according to choices sample by + :func:`sample_choices`. + + Args: + choices (Dict[int, Any]): Choices dict. The key is group_id in + search groups, and the value is the sampling results + corresponding to this group. + """ + for group_id, modules in self.search_groups.items(): + if group_id not in choices: + # allow optional target_prune_ratio + continue + choice = choices[group_id] + for module in modules: + module.current_choice = choice + module._traceable_choice.choice = choice + diff --git a/mmrazor/models/mutators/channel_mutator/one_shot_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/one_shot_channel_mutator.py index fdaee3161..1f8e3496b 100644 --- a/mmrazor/models/mutators/channel_mutator/one_shot_channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/one_shot_channel_mutator.py @@ -1,15 +1,13 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Type, Union +from typing import Dict, Type, Union from mmrazor.models.mutables import OneShotMutableChannelUnit from mmrazor.registry import MODELS -from ..group_mixin import DynamicSampleMixin from .channel_mutator import ChannelMutator, ChannelUnitType @MODELS.register_module() -class OneShotChannelMutator(ChannelMutator[OneShotMutableChannelUnit], - DynamicSampleMixin): +class OneShotChannelMutator(ChannelMutator[OneShotMutableChannelUnit]): """OneShotChannelMutator based on ChannelMutator. It use OneShotMutableChannelUnit by default. @@ -27,3 +25,19 @@ def __init__(self, **kwargs) -> None: super().__init__(channel_unit_cfg, **kwargs) + + def min_choices(self) -> Dict: + """Return the minimal pruning subnet(structure).""" + min_choices = dict() + for group_id, modules in self.search_groups.items(): + min_choices[group_id] = modules[0].min_choice + + return min_choices + + def max_choices(self) -> Dict: + """Return the maximal pruning subnet(structure).""" + max_choices = dict() + for group_id, modules in self.search_groups.items(): + max_choices[group_id] = modules[0].max_choice + + return max_choices diff --git a/mmrazor/models/mutators/channel_mutator/slimmable_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/slimmable_channel_mutator.py index ec726ad85..9f5eb0075 100644 --- a/mmrazor/models/mutators/channel_mutator/slimmable_channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/slimmable_channel_mutator.py @@ -24,9 +24,8 @@ class SlimmableChannelMutator(ChannelMutator[SlimmableChannelUnit]): def __init__(self, channel_unit_cfg=dict(type='SlimmableChannelUnit', units={}), parse_cfg=dict( - type='ChannelAnalyzer', - demo_input=(1, 3, 224, 224), - tracer_type='BackwardTracer'), + type='BackwardTracer', + loss_calculator=dict(type='ImageClassifierPseudoLoss')), init_cfg: Optional[Dict] = None) -> None: super().__init__(channel_unit_cfg, parse_cfg, None, init_cfg) diff --git a/mmrazor/models/mutators/group_mixin.py b/mmrazor/models/mutators/group_mixin.py index f6b84aea2..2575af2f1 100644 --- a/mmrazor/models/mutators/group_mixin.py +++ b/mmrazor/models/mutators/group_mixin.py @@ -75,11 +75,6 @@ def _build_name_mutable_mapping( for name, module in supernet.named_modules(): if isinstance(module, support_mutables): name2mutable[name] = module - elif hasattr(module, 'source_mutables'): - for each_mutables in module.source_mutables: - if isinstance(each_mutables, support_mutables): - name2mutable[name] = each_mutables - self._name2mutable = name2mutable return name2mutable @@ -89,23 +84,14 @@ def _build_alias_names_mapping( support_mutables: Type) -> Dict[str, List[str]]: """Mapping alias to module names.""" alias2mutable_names: Dict[str, List[str]] = dict() - - def _append(key, dict, name): - if key not in dict: - dict[key] = [name] - else: - dict[key].append(name) - for name, module in supernet.named_modules(): if isinstance(module, support_mutables): + if module.alias is not None: - _append(module.alias, alias2mutable_names, name) - elif hasattr(module, 'source_mutables'): - for each_mutables in module.source_mutables: - if isinstance(each_mutables, support_mutables): - if each_mutables.alias is not None: - _append(each_mutables.alias, alias2mutable_names, - name) + if module.alias not in alias2mutable_names: + alias2mutable_names[module.alias] = [name] + else: + alias2mutable_names[module.alias].append(name) return alias2mutable_names @@ -185,14 +171,6 @@ def build_search_groups(self, supernet: Module, support_mutables: Type, else: search_groups[current_group_nums] = [module] current_group_nums += 1 - elif hasattr(module, 'source_mutables'): - for each_mutables in module.source_mutables: - if isinstance(each_mutables, support_mutables): - if name in grouped_mutable_names: - continue - else: - search_groups[current_group_nums] = [each_mutables] - current_group_nums += 1 grouped_counter = Counter(grouped_mutable_names) @@ -213,10 +191,11 @@ def build_search_groups(self, supernet: Module, support_mutables: Type, def _check_valid_groups(self, alias2mutable_names: Dict[str, List[str]], name2mutable: Dict[str, BaseMutable], custom_group: List[List[str]]) -> None: - """Check if all keys are legal.""" + aliases = [*alias2mutable_names.keys()] module_names = [*name2mutable.keys()] + # check if all keys are legal expanded_custom_group: List[str] = [ _ for group in custom_group for _ in group ] @@ -260,10 +239,8 @@ def search_groups(self) -> Dict: class OneShotSampleMixin: - """Sample mixin for one-shot mutators.""" def sample_choices(self: MutatorProtocol) -> Dict: - """Sample choices for each group in search_groups.""" random_choices = dict() for group_id, modules in self.search_groups.items(): random_choices[group_id] = modules[0].sample_choice() @@ -271,7 +248,6 @@ def sample_choices(self: MutatorProtocol) -> Dict: return random_choices def set_choices(self: MutatorProtocol, choices: Dict) -> None: - """Set choices for each group in search_groups.""" for group_id, modules in self.search_groups.items(): choice = choices[group_id] for module in modules: @@ -280,32 +256,18 @@ def set_choices(self: MutatorProtocol, choices: Dict) -> None: class DynamicSampleMixin(OneShotSampleMixin): - def sample_choices(self: MutatorProtocol, kind: str = 'random') -> Dict: - """Sample choices for each group in search_groups.""" - random_choices = dict() - for group_id, modules in self.search_groups.items(): - if kind == 'max': - random_choices[group_id] = modules[0].max_choice - elif kind == 'min': - random_choices[group_id] = modules[0].min_choice - else: - random_choices[group_id] = modules[0].sample_choice() - return random_choices - @property - def max_choice(self: MutatorProtocol) -> Dict: - """Get max choices for each group in search_groups.""" - max_choice = dict() + def max_choices(self: MutatorProtocol) -> Dict: + max_choices = dict() for group_id, modules in self.search_groups.items(): - max_choice[group_id] = modules[0].max_choice + max_choices[group_id] = modules[0].max_choice - return max_choice + return max_choices @property - def min_choice(self: MutatorProtocol) -> Dict: - """Get min choices for each group in search_groups.""" - min_choice = dict() + def min_choices(self: MutatorProtocol) -> Dict: + min_choices = dict() for group_id, modules in self.search_groups.items(): - min_choice[group_id] = modules[0].min_choice + min_choices[group_id] = modules[0].min_choice - return min_choice + return min_choices diff --git a/mmrazor/models/mutators/value_mutator/dynamic_value_mutator.py b/mmrazor/models/mutators/value_mutator/dynamic_value_mutator.py index d8d081343..c65c90f80 100644 --- a/mmrazor/models/mutators/value_mutator/dynamic_value_mutator.py +++ b/mmrazor/models/mutators/value_mutator/dynamic_value_mutator.py @@ -7,7 +7,6 @@ @MODELS.register_module() class DynamicValueMutator(ValueMutator, DynamicSampleMixin): - """Dynamic value mutator with type as `OneShotMutableValue`.""" @property def mutable_class_type(self): diff --git a/mmrazor/models/task_modules/estimators/counters/flops_params_counter.py b/mmrazor/models/task_modules/estimators/counters/flops_params_counter.py index dca740214..538e52135 100644 --- a/mmrazor/models/task_modules/estimators/counters/flops_params_counter.py +++ b/mmrazor/models/task_modules/estimators/counters/flops_params_counter.py @@ -107,7 +107,6 @@ def get_model_flops_params(model, params_count, ost=ost, flush=flush) - if units is not None: flops_count = params_units_convert(flops_count, units['flops']) params_count = params_units_convert(params_count, units['params']) @@ -172,7 +171,8 @@ def params_units_convert(num_params, units='M', precision=3): >>> params_units_convert(3e-9) '3e-09' """ - + if getattr(num_params, 'requires_grad', None): + return num_params if units == 'G': return round(num_params / 10.**9, precision) elif units == 'M': diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/__init__.py b/mmrazor/models/task_modules/estimators/counters/op_counters/__init__.py index 6e33babe2..9044c9788 100644 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/__init__.py +++ b/mmrazor/models/task_modules/estimators/counters/op_counters/__init__.py @@ -2,13 +2,15 @@ from .activation_layer_counter import (ELUCounter, LeakyReLUCounter, PReLUCounter, ReLU6Counter, ReLUCounter) from .base_counter import BaseCounter -from .conv_layer_counter import Conv1dCounter, Conv2dCounter, Conv3dCounter +from .conv_layer_counter import (Conv1dCounter, Conv2dCounter, Conv3dCounter, + DynamicConv2dCounter) from .deconv_layer_counter import ConvTranspose2dCounter from .linear_layer_counter import LinearCounter from .norm_layer_counter import (BatchNorm1dCounter, BatchNorm2dCounter, BatchNorm3dCounter, GroupNormCounter, InstanceNorm1dCounter, InstanceNorm2dCounter, - InstanceNorm3dCounter, LayerNormCounter) + InstanceNorm3dCounter, LayerNormCounter, + DMCPBatchNorm2dCounter) from .pooling_layer_counter import * # noqa: F403, F405, F401 from .upsample_layer_counter import UpsampleCounter @@ -18,5 +20,6 @@ 'BatchNorm3dCounter', 'Conv1dCounter', 'Conv2dCounter', 'Conv3dCounter', 'ConvTranspose2dCounter', 'UpsampleCounter', 'LinearCounter', 'GroupNormCounter', 'InstanceNorm1dCounter', 'InstanceNorm2dCounter', - 'InstanceNorm3dCounter', 'LayerNormCounter', 'BaseCounter' + 'InstanceNorm3dCounter', 'LayerNormCounter', 'BaseCounter', + 'DMCPBatchNorm2dCounter', 'DynamicConv2dCounter' ] diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py b/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py index 959d88fa4..62b54cdb2 100644 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py +++ b/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import numpy as np +import torch.nn as nn from mmrazor.registry import TASK_UTILS from .base_counter import BaseCounter @@ -12,6 +13,7 @@ class ConvCounter(BaseCounter): def add_count_hook(module, input, output): """Calculate FLOPs and params based on the size of input & output.""" # Can have multiple inputs, getting the first one + import pdb;pdb.set_trace() input = input[0] batch_size = input.shape[0] @@ -59,3 +61,45 @@ class Conv2dCounter(ConvCounter): class Conv3dCounter(ConvCounter): """FLOPs/params counter for Conv3d module.""" pass + + +@TASK_UTILS.register_module() +class DynamicConv2dCounter(ConvCounter): + + @staticmethod + def add_count_hook(module: nn.Conv2d, input, output): + + input = input[0] + + batch_size = input.shape[0] + output_dims = list(output.shape[2:]) + + kernel_dims = list(module.kernel_size) + + if hasattr(module, '_traceable_choice'): + out_channels = module._traceable_choice() + else: + out_channels = module.mutable_attrs['out_channels'].activated_channels + in_channels = module.mutable_attrs['in_channels'].activated_channels + + groups = module.groups + + filters_per_channel = out_channels / groups + conv_per_position_flops = \ + np.prod(kernel_dims) * in_channels * filters_per_channel + + active_elements_count = batch_size * int(np.prod(output_dims)) + + overall_conv_flops = conv_per_position_flops * active_elements_count + overall_params = conv_per_position_flops + + bias_flops = 0 + overall_params = conv_per_position_flops + if module.bias is not None: + bias_flops = out_channels * active_elements_count + overall_params += out_channels + + overall_flops = overall_conv_flops + bias_flops + + module.__flops__ += overall_flops + module.__params__ += int(overall_params) \ No newline at end of file diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/linear_layer_counter.py b/mmrazor/models/task_modules/estimators/counters/op_counters/linear_layer_counter.py index f8e9ea8fb..4959b710c 100644 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/linear_layer_counter.py +++ b/mmrazor/models/task_modules/estimators/counters/op_counters/linear_layer_counter.py @@ -18,3 +18,8 @@ def add_count_hook(module, input, output): -1] # pytorch checks dimensions, so here we don't care much module.__flops__ += int(np.prod(input.shape) * output_last_dim) module.__params__ += get_model_parameters_number(module) + + +@TASK_UTILS.register_module() +class DynamicLinearCounter(LinearCounter): + pass \ No newline at end of file diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py b/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py index 9b9a14ca4..20787a9c9 100644 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py +++ b/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py @@ -12,6 +12,7 @@ class BNCounter(BaseCounter): @staticmethod def add_count_hook(module, input, output): """Calculate FLOPs and params based on the size of input & output.""" + import pdb;pdb.set_trace() input = input[0] batch_flops = np.prod(input.shape) if getattr(module, 'affine', False): @@ -66,3 +67,19 @@ class LayerNormCounter(BNCounter): class GroupNormCounter(BNCounter): """FLOPs/params counter for GroupNorm module.""" pass + + +@TASK_UTILS.register_module() +class DMCPBatchNorm2dCounter(BNCounter): + """FLOPs/params counter for DynamicBatchNorm2d module.""" + + @staticmethod + def add_count_hook(module, input, output): + """Calculate FLOPs and params based on the size of input & output.""" + input = input[0] + batch_flops = np.prod(input.shape) + if getattr(module, 'affine', False): + batch_flops *= 2 + num_features = module.mutable_attrs['num_features'].activated_channels + module.__flops__ += int(batch_flops) + module.__params__ += num_features*2 From 6fc9f78d316bf0f2b5cfeb40d19cf62a6504b66b Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Thu, 22 Dec 2022 15:35:45 +0800 Subject: [PATCH 02/59] Newly created copy PR --- mmrazor/models/mutables/derived_mutable.py | 2 +- .../mutable_channel_container.py | 5 +- .../sequential_mutable_channel.py | 8 +- .../mutable_channel/simple_mutable_channel.py | 12 +- .../mutable_channel/units/channel_unit.py | 94 +++-------- .../units/mutable_channel_unit.ipynb | 149 +++--------------- .../units/mutable_channel_unit.py | 57 +++---- .../units/sequential_mutable_channel_unit.py | 11 +- .../mutables/mutable_channel/units/utils.py | 80 ++++++++++ .../channel_mutator/channel_mutator.ipynb | 26 ++- .../channel_mutator/channel_mutator.py | 54 +++---- .../channel_mutator/dcff_channel_mutator.py | 5 +- .../one_shot_channel_mutator.py | 22 +-- .../slimmable_channel_mutator.py | 5 +- mmrazor/models/mutators/group_mixin.py | 68 ++++++-- .../value_mutator/dynamic_value_mutator.py | 1 + .../counters/flops_params_counter.py | 2 +- 17 files changed, 292 insertions(+), 309 deletions(-) create mode 100644 mmrazor/models/mutables/mutable_channel/units/utils.py diff --git a/mmrazor/models/mutables/derived_mutable.py b/mmrazor/models/mutables/derived_mutable.py index 5a3f9abb9..ac8a8c60a 100644 --- a/mmrazor/models/mutables/derived_mutable.py +++ b/mmrazor/models/mutables/derived_mutable.py @@ -82,7 +82,7 @@ def _divide_and_divise(x: int, ratio: int, divisor: int = 8) -> int: """Helper function for divide and divise.""" new_x = x // ratio - return make_divisible(new_x, divisor) + return make_divisible(new_x, divisor) # type: ignore def _divide_choice_fn(mutable: MutableProtocol, diff --git a/mmrazor/models/mutables/mutable_channel/mutable_channel_container.py b/mmrazor/models/mutables/mutable_channel/mutable_channel_container.py index f59929b27..5706d0750 100644 --- a/mmrazor/models/mutables/mutable_channel/mutable_channel_container.py +++ b/mmrazor/models/mutables/mutable_channel/mutable_channel_container.py @@ -81,7 +81,7 @@ def register_mutable_channel_to_module(cls, """Register a BaseMutableChannel to a module with MutableChannelContainers.""" if end == -1: - end = mutable.num_channels + start + end = mutable.current_choice + start if is_to_output_channel: container: MutableChannelContainer = module.get_mutable_attr( 'out_channels') @@ -100,7 +100,8 @@ def _assert_mutables_valid(self): for start, end in self.mutable_channels: assert start == last_end last_end = end - assert last_end == self.num_channels + assert last_end == self.num_channels, ( + f'channel mismatch: {last_end} vs {self.num_channels}') def _fill_unregistered_range(self): """Fill with SimpleMutableChannels in the range without any stored diff --git a/mmrazor/models/mutables/mutable_channel/sequential_mutable_channel.py b/mmrazor/models/mutables/mutable_channel/sequential_mutable_channel.py index 07b85f6c6..c2b4f9291 100644 --- a/mmrazor/models/mutables/mutable_channel/sequential_mutable_channel.py +++ b/mmrazor/models/mutables/mutable_channel/sequential_mutable_channel.py @@ -27,7 +27,6 @@ def __init__(self, num_channels: int, choice_mode='number', **kwargs): super().__init__(num_channels, **kwargs) assert choice_mode in ['ratio', 'number'] self.choice_mode = choice_mode - self.mask = torch.ones([self.num_channels]).bool() @property def is_num_mode(self): @@ -50,14 +49,13 @@ def current_choice(self, choice: Union[int, float]): int_choice = self._ratio2num(choice) else: int_choice = choice - mask = torch.zeros([self.num_channels], device=self.mask.device) - mask[0:int_choice] = 1 - self.mask = mask.bool() + self.mask.fill_(0.0) + self.mask[0:int_choice] = 1.0 @property def current_mask(self) -> torch.Tensor: """Return current mask.""" - return self.mask + return self.mask.bool() # methods for diff --git a/mmrazor/models/mutables/mutable_channel/simple_mutable_channel.py b/mmrazor/models/mutables/mutable_channel/simple_mutable_channel.py index 7f949890c..9e85f81a3 100644 --- a/mmrazor/models/mutables/mutable_channel/simple_mutable_channel.py +++ b/mmrazor/models/mutables/mutable_channel/simple_mutable_channel.py @@ -1,5 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. +from typing import Union + import torch from mmrazor.registry import MODELS @@ -18,7 +20,10 @@ class SimpleMutableChannel(BaseMutableChannel): def __init__(self, num_channels: int, **kwargs) -> None: super().__init__(num_channels, **kwargs) - self.mask = torch.ones(num_channels).bool() + mask = torch.ones([self.num_channels + ]) # save bool as float for dist training + self.register_buffer('mask', mask) + self.mask: torch.Tensor # choice @@ -30,7 +35,7 @@ def current_choice(self) -> torch.Tensor: @current_choice.setter def current_choice(self, choice: torch.Tensor): """Set current choice.""" - self.mask = choice.to(self.mask.device).bool() + self.mask = choice.to(self.mask.device).float() @property def current_mask(self) -> torch.Tensor: @@ -39,7 +44,8 @@ def current_mask(self) -> torch.Tensor: # basic extension - def expand_mutable_channel(self, expand_ratio: int) -> DerivedMutable: + def expand_mutable_channel( + self, expand_ratio: Union[int, float]) -> DerivedMutable: """Get a derived SimpleMutableChannel with expanded mask.""" def _expand_mask(): diff --git a/mmrazor/models/mutables/mutable_channel/units/channel_unit.py b/mmrazor/models/mutables/mutable_channel/units/channel_unit.py index c68b1f491..bf1bf909f 100644 --- a/mmrazor/models/mutables/mutable_channel/units/channel_unit.py +++ b/mmrazor/models/mutables/mutable_channel/units/channel_unit.py @@ -5,12 +5,8 @@ import torch.nn as nn from mmengine.model import BaseModule -from mmrazor.structures.graph import ModuleGraph -from mmrazor.structures.graph.channel_graph import ChannelGraph -from mmrazor.structures.graph.channel_modules import (BaseChannel, - BaseChannelUnit) -from mmrazor.structures.graph.channel_nodes import \ - default_channel_node_converter +from mmrazor.models.architectures.dynamic_ops.mixins import DynamicChannelMixin +from mmrazor.registry import TASK_UTILS class Channel(BaseModule): @@ -25,7 +21,6 @@ class Channel(BaseModule): Channel. Defaults to None. is_output_channel (bool, optional): Is the channel output channel. Defaults to True. - expand_ratio (int, optional): Expand ratio of the mask. Defaults to 1. """ # init @@ -35,11 +30,10 @@ def __init__(self, module, index, node=None, - is_output_channel=True, - expand_ratio=1) -> None: + is_output_channel=True) -> None: super().__init__() self.name = name - self.module = module + self.module: nn.Module = module self.index = index self.start = index[0] self.end = index[1] @@ -47,7 +41,6 @@ def __init__(self, self.node = node self.is_output_channel = is_output_channel - self.expand_ratio = expand_ratio @classmethod def init_from_cfg(cls, model: nn.Module, config: Dict): @@ -56,29 +49,13 @@ def init_from_cfg(cls, model: nn.Module, config: Dict): name = config['name'] start = config['start'] end = config['end'] - expand_ratio = config['expand_ratio'] \ - if 'expand_ratio' in config else 1 is_output_channel = config['is_output_channel'] name2module = dict(model.named_modules()) name2module.pop('') module = name2module[name] if name in name2module else None return Channel( - name, - module, (start, end), - is_output_channel=is_output_channel, - expand_ratio=expand_ratio) - - @classmethod - def init_from_base_channel(cls, base_channel: BaseChannel): - """Init from a BaseChannel object.""" - return cls( - base_channel.name, - base_channel.module, - base_channel.index, - node=None, - is_output_channel=base_channel.is_output_channel, - expand_ratio=base_channel.expand_ratio) + name, module, (start, end), is_output_channel=is_output_channel) # config template @@ -89,7 +66,6 @@ def config_template(self): 'name': self.name, 'start': self.start, 'end': self.end, - 'expand_ratio': self.expand_ratio, 'is_output_channel': self.is_output_channel } @@ -103,29 +79,29 @@ def num_channels(self) -> int: @property def is_mutable(self) -> bool: """If the channel is prunable.""" - if isinstance(self.module, nn.Conv2d): - # group-wise conv - if self.module.groups != 1 and not (self.module.groups == - self.module.in_channels == - self.module.out_channels): - return False - return True + if self.module is not None: + has_prama = len(list(self.module.parameters())) != 0 + is_dynamic_op = isinstance(self.module, DynamicChannelMixin) + return (not has_prama) or is_dynamic_op + else: + is_unmutable = self.name in [ + 'input_placeholder', 'output_placeholder' + ] + return not is_unmutable def __repr__(self) -> str: return (f'{self.__class__.__name__}(' f'{self.name}, index={self.index}, ' f'is_output_channel=' f'{"true" if self.is_output_channel else "false"}, ' - f'expand_ratio={self.expand_ratio}' ')') def __eq__(self, obj: object) -> bool: - if isinstance(obj, BaseChannel): + if isinstance(obj, Channel): return self.name == obj.name \ and self.module == obj.module \ and self.index == obj.index \ and self.is_output_channel == obj.is_output_channel \ - and self.expand_ratio == obj.expand_ratio \ and self.node == obj.node else: return False @@ -185,7 +161,7 @@ def auto_fill_channel_config(channel_config: Dict, Channel.init_from_cfg(model, channel_config)) for channel_config in channels['output_related']: auto_fill_channel_config(channel_config, True) - unit.add_ouptut_related( + unit.add_output_related( Channel.init_from_cfg(model, channel_config)) return unit @@ -201,30 +177,16 @@ def init_from_channel_unit(cls, return mutable_unit @classmethod - def init_from_graph(cls, - graph: ModuleGraph, - unit_args={}, - num_input_channel=3) -> List['ChannelUnit']: - """Parse a module-graph and get ChannelUnits.""" - - def init_from_base_channel_unit(base_channel_unit: BaseChannelUnit): - unit = cls(len(base_channel_unit.channel_elems), **unit_args) - unit.input_related = [ - Channel.init_from_base_channel(channel) - for channel in base_channel_unit.input_related - ] - unit.output_related = [ - Channel.init_from_base_channel(channel) - for channel in base_channel_unit.output_related - ] - return unit + def init_from_channel_analyzer(cls, model, analyzer=None): + """Init MutableChannelUnits from a ChannelAnalyzer.""" - unit_graph = ChannelGraph.copy_from(graph, - default_channel_node_converter) - unit_graph.forward(num_input_channel) - units = unit_graph.collect_units() - units = [init_from_base_channel_unit(unit) for unit in units] - return units + if analyzer is None: + from mmrazor.models.task_modules.tracer import ChannelAnalyzer + analyzer = ChannelAnalyzer() + if isinstance(analyzer, dict): + analyzer = TASK_UTILS.build(analyzer) + unit_config = analyzer.analyze(model) + return [cls.init_from_cfg(model, cfg) for cfg in unit_config.values()] # tools @@ -259,19 +221,15 @@ def config_template(self, # node operations - def add_ouptut_related(self, channel: Channel): + def add_output_related(self, channel: Channel): """Add a Channel which is output related.""" assert channel.is_output_channel - assert self.num_channels == \ - int(channel.num_channels // channel.expand_ratio) if channel not in self.output_related: self.output_related.append(channel) def add_input_related(self, channel: Channel): """Add a Channel which is input related.""" assert channel.is_output_channel is False - assert self.num_channels == \ - int(channel.num_channels // channel.expand_ratio) if channel not in self.input_related: self.input_related.append(channel) diff --git a/mmrazor/models/mutables/mutable_channel/units/mutable_channel_unit.ipynb b/mmrazor/models/mutables/mutable_channel/units/mutable_channel_unit.ipynb index 5af2d496b..bc40d191b 100644 --- a/mmrazor/models/mutables/mutable_channel/units/mutable_channel_unit.ipynb +++ b/mmrazor/models/mutables/mutable_channel/units/mutable_channel_unit.ipynb @@ -36,20 +36,19 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# define a model\n", "from mmengine.model import BaseModel\n", "from torch import nn\n", - "import torch\n", "from collections import OrderedDict\n", "\n", - "class MyModel(BaseModel):\n", + "class MyModel(nn.Module):\n", "\n", " def __init__(self):\n", - " super().__init__(None, None)\n", + " super().__init__()\n", " self.net = nn.Sequential(\n", " OrderedDict([('conv0', nn.Conv2d(3, 8, 3, 1, 1)),\n", " ('relu', nn.ReLU()),\n", @@ -65,17 +64,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "This model has 4 MutableChannelUnit(SequentialMutableChannelUnit).\n" - ] - } - ], + "outputs": [], "source": [ "# There are multiple types of MutableChannelUnits. Here, We take SequentialMutableChannelUnit as the example.\n", "from mmrazor.models.mutables.mutable_channel.units import SequentialMutableChannelUnit\n", @@ -83,9 +74,8 @@ "from typing import List\n", "\n", "model = MyModel()\n", - "graph = ModuleGraph.init_from_backward_tracer(model)\n", "units: List[\n", - " SequentialMutableChannelUnit] = SequentialMutableChannelUnit.init_from_graph(graph) # type: ignore\n", + " SequentialMutableChannelUnit] = SequentialMutableChannelUnit.init_from_channel_analyzer(model) # type: ignore\n", "print(\n", " f'This model has {len(units)} MutableChannelUnit(SequentialMutableChannelUnit).'\n", ")\n" @@ -93,26 +83,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "SequentialMutableChannelUnit(\n", - " name=net.conv0_(0, 8)_8\n", - " (output_related): ModuleList(\n", - " (0): Channel(net.conv0, index=(0, 8), is_output_channel=true, expand_ratio=1)\n", - " )\n", - " (input_related): ModuleList(\n", - " (0): Channel(net.conv1, index=(0, 8), is_output_channel=false, expand_ratio=1)\n", - " )\n", - " (mutable_channel): SquentialMutableChannel(num_channels=8, activated_channels=8)\n", - ")\n" - ] - } - ], + "outputs": [], "source": [ "unit1=units[1]\n", "print(unit1)" @@ -158,31 +131,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The current choice of unit1 is 8.\n", - "DynamicConv2d(\n", - " 3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)\n", - " (mutable_attrs): ModuleDict(\n", - " (in_channels): MutableChannelContainer(num_channels=3, activated_channels=3)\n", - " (out_channels): MutableChannelContainer(num_channels=8, activated_channels=8)\n", - " )\n", - ")\n", - "DynamicConv2d(\n", - " 8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)\n", - " (mutable_attrs): ModuleDict(\n", - " (in_channels): MutableChannelContainer(num_channels=8, activated_channels=8)\n", - " (out_channels): MutableChannelContainer(num_channels=16, activated_channels=16)\n", - " )\n", - ")\n" - ] - } - ], + "outputs": [], "source": [ "# We run \"prepare_for_pruning\" once before pruning to run step 1 and 2 above.\n", "unit1.prepare_for_pruning(model)\n", @@ -200,31 +151,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "We get a sampled choice 2.\n", - "DynamicConv2d(\n", - " 3, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)\n", - " (mutable_attrs): ModuleDict(\n", - " (in_channels): MutableChannelContainer(num_channels=3, activated_channels=3)\n", - " (out_channels): MutableChannelContainer(num_channels=8, activated_channels=2)\n", - " )\n", - ")\n", - "DynamicConv2d(\n", - " 8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)\n", - " (mutable_attrs): ModuleDict(\n", - " (in_channels): MutableChannelContainer(num_channels=8, activated_channels=2)\n", - " (out_channels): MutableChannelContainer(num_channels=16, activated_channels=16)\n", - " )\n", - ")\n" - ] - } - ], + "outputs": [], "source": [ "sampled_choice=unit1.sample_choice()\n", "print(f'We get a sampled choice {sampled_choice}.')\n", @@ -264,22 +193,13 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The model has 4 MutableChannelUnits.\n" - ] - } - ], + "outputs": [], "source": [ "# 1. using tracer\n", "def get_mutable_channel_units_using_tracer(model):\n", - " graph = ModuleGraph.init_from_backward_tracer(model)\n", - " units = SequentialMutableChannelUnit.init_from_graph(graph)\n", + " units = SequentialMutableChannelUnit.init_from_channel_analyzer(model)\n", " return units\n", "\n", "\n", @@ -290,26 +210,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "SequentialMutableChannelUnit(\n", - " name=net.conv0_(0, 8)_8\n", - " (output_related): ModuleList(\n", - " (0): Channel(net.conv0, index=(0, 8), is_output_channel=true, expand_ratio=1)\n", - " )\n", - " (input_related): ModuleList(\n", - " (0): Channel(net.conv1, index=(0, 8), is_output_channel=false, expand_ratio=1)\n", - " )\n", - " (mutable_channel): SquentialMutableChannel(num_channels=8, activated_channels=8)\n", - ")\n" - ] - } - ], + "outputs": [], "source": [ "# 2. using config\n", "config = {\n", @@ -332,17 +235,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The model has 2 MutableChannelUnits.\n" - ] - } - ], + "outputs": [], "source": [ "# 3. using predefined model\n", "\n", @@ -391,7 +286,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.9.12 ('mmlab')", + "display_name": "Python 3.9.13 ('lab2max')", "language": "python", "name": "python3" }, @@ -405,12 +300,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12" + "version": "3.9.13" }, "orig_nbformat": 4, "vscode": { "interpreter": { - "hash": "feec882ee78c63cb8d4b485f1b52bbb873bb9a7b094435863200c7afba202382" + "hash": "e31a827d0913016ad78e01c7b97f787f4b9e53102dd62d238e8548bcd97ff875" } } }, diff --git a/mmrazor/models/mutables/mutable_channel/units/mutable_channel_unit.py b/mmrazor/models/mutables/mutable_channel/units/mutable_channel_unit.py index 748b2333b..dabe41fab 100644 --- a/mmrazor/models/mutables/mutable_channel/units/mutable_channel_unit.py +++ b/mmrazor/models/mutables/mutable_channel/units/mutable_channel_unit.py @@ -4,13 +4,13 @@ from collections import Set from typing import Dict, List, Type, TypeVar +import torch import torch.nn as nn from mmrazor.models.architectures.dynamic_ops.mixins import DynamicChannelMixin from mmrazor.models.mutables import DerivedMutable from mmrazor.models.mutables.mutable_channel import (BaseMutableChannel, MutableChannelContainer) -from mmrazor.models.mutables.mutable_value import MutableValue from .channel_unit import Channel, ChannelUnit @@ -41,6 +41,16 @@ def __init__(self, num_channels: int, **kwargs) -> None: super().__init__(num_channels) + @classmethod + def init_from_cfg(cls, model: nn.Module, config: Dict): + """init a Channel using a config which can be generated by + self.config_template(), include init choice.""" + unit = super().init_from_cfg(model, config) + # TO DO: add illegal judgement here? + if 'choice' in config: + unit.current_choice = config['choice'] + return unit + @classmethod def init_from_mutable_channel(cls, mutable_channel: BaseMutableChannel): unit = cls(mutable_channel.num_channels) @@ -51,13 +61,15 @@ def init_from_predefined_model(cls, model: nn.Module): """Initialize units using the model with pre-defined dynamicops and mutable-channels.""" - def process_container(contanier: MutableChannelContainer, + def process_container(container: MutableChannelContainer, module, module_name, mutable2units, is_output=True): - for index, mutable in contanier.mutable_channels.items(): - expand_ratio = 1 + for index, mutable in container.mutable_channels.items(): + derived_choices = mutable.current_choice + if isinstance(derived_choices, torch.Tensor): + derived_choices = derived_choices.sum().item() if isinstance(mutable, DerivedMutable): source_mutables: Set = \ mutable._trace_source_mutables() @@ -68,18 +80,7 @@ def process_container(contanier: MutableChannelContainer, assert len(source_channel_mutables) == 1, ( 'only support one mutable channel ' 'used in DerivedMutable') - mutable = list(source_channel_mutables)[0] - - source_value_mutables = [ - mutable for mutable in source_mutables - if isinstance(mutable, MutableValue) - ] - assert len(source_value_mutables) <= 1, ( - 'only support one mutable value ' - 'used in DerivedMutable') - expand_ratio = int( - list(source_value_mutables) - [0].current_choice) if source_value_mutables else 1 + mutable = source_channel_mutables[0] if mutable not in mutable2units: mutable2units[mutable] = cls.init_from_mutable_channel( @@ -87,21 +88,19 @@ def process_container(contanier: MutableChannelContainer, unit: MutableChannelUnit = mutable2units[mutable] if is_output: - unit.add_ouptut_related( + unit.add_output_related( Channel( module_name, module, index, - is_output_channel=is_output, - expand_ratio=expand_ratio)) + is_output_channel=is_output)) else: unit.add_input_related( Channel( module_name, module, index, - is_output_channel=is_output, - expand_ratio=expand_ratio)) + is_output_channel=is_output)) mutable2units: Dict = {} for name, module in model.named_modules(): @@ -267,14 +266,16 @@ def _register_mutable_channel(self, mutable_channel: BaseMutableChannel): start = channel.start end = channel.end elif channel.num_channels > self.num_channels: + if channel.num_channels % self.num_channels == 0: - mutable_channel_ = \ - mutable_channel.expand_mutable_channel( - channel.num_channels // self.num_channels) - start = channel.start - end = channel.end + ratio = channel.num_channels // self.num_channels else: - raise NotImplementedError() + ratio = channel.num_channels / self.num_channels + + mutable_channel_ = \ + mutable_channel.expand_mutable_channel(ratio) + start = channel.start + end = channel.end else: raise NotImplementedError() @@ -289,7 +290,7 @@ def _register_mutable_channel(self, mutable_channel: BaseMutableChannel): mutable_channel is mutable for mutable in source_mutables ] - assert any(is_same) + assert any(is_same), 'existed a mutable channel.' else: container.register_mutable(mutable_channel_, start, end) diff --git a/mmrazor/models/mutables/mutable_channel/units/sequential_mutable_channel_unit.py b/mmrazor/models/mutables/mutable_channel/units/sequential_mutable_channel_unit.py index 89a25d236..89dc785ed 100644 --- a/mmrazor/models/mutables/mutable_channel/units/sequential_mutable_channel_unit.py +++ b/mmrazor/models/mutables/mutable_channel/units/sequential_mutable_channel_unit.py @@ -3,7 +3,11 @@ from typing import Dict, Union import torch.nn as nn +from mmcv.cnn.bricks import Conv2dAdaptivePadding from mmengine import MMLogger +from mmengine.model.utils import _BatchNormXd +from mmengine.utils.dl_utils.parrots_wrapper import \ + SyncBatchNorm as EngineSyncBatchNorm from mmrazor.models.architectures import dynamic_ops from mmrazor.models.utils import make_divisible @@ -60,9 +64,14 @@ def prepare_for_pruning(self, model: nn.Module): # register MutableMask self._replace_with_dynamic_ops( model, { + Conv2dAdaptivePadding: + dynamic_ops.DynamicConv2dAdaptivePadding, nn.Conv2d: dynamic_ops.DynamicConv2d, nn.BatchNorm2d: dynamic_ops.DynamicBatchNorm2d, - nn.Linear: dynamic_ops.DynamicLinear + nn.Linear: dynamic_ops.DynamicLinear, + nn.SyncBatchNorm: dynamic_ops.DynamicSyncBatchNorm, + EngineSyncBatchNorm: dynamic_ops.DynamicSyncBatchNorm, + _BatchNormXd: dynamic_ops.DynamicBatchNormXd, }) self._register_channel_container(model, MutableChannelContainer) self._register_mutable_channel(self.mutable_channel) diff --git a/mmrazor/models/mutables/mutable_channel/units/utils.py b/mmrazor/models/mutables/mutable_channel/units/utils.py new file mode 100644 index 000000000..41601ac7a --- /dev/null +++ b/mmrazor/models/mutables/mutable_channel/units/utils.py @@ -0,0 +1,80 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +from typing import List + +import torch + +from mmrazor.models.mutables.mutable_channel.units import \ + SequentialMutableChannelUnit +from mmrazor.utils import print_log + + +def assert_model_is_changed(tensors1, tensors2): + """Return if the tensors has the same shape (length).""" + shape1 = get_shape(tensors1, only_length=True) + shape2 = get_shape(tensors2, only_length=True) + assert shape1 == shape2, f'{shape1}!={shape2}' + + +def get_shape(tensor, only_length=False): + """Get the shape of a tensor list/tuple/dict. + + Args: + tensor (Union[List,Tuple,Dict,Tensor]): input tensors. + only_length (bool, optional): If only return the length of the tensors. + Defaults to False. + """ + if isinstance(tensor, torch.Tensor): + if only_length: + return len(tensor.shape) + else: + return tensor.shape + elif isinstance(tensor, list) or isinstance(tensor, tuple): + shapes = [] + for x in tensor: + shapes.append(get_shape(x, only_length)) + return shapes + elif isinstance(tensor, dict): + shapes = {} + for key in tensor: + shapes[key] = get_shape(tensor[key], only_length) + return shapes + else: + raise NotImplementedError( + f'unsuppored type{type(tensor)} to get shape of tensors.') + + +def forward_units(model, try_units: List[SequentialMutableChannelUnit], + units: List[SequentialMutableChannelUnit], demo_input, + template_output): + """Forward a model with MutableChannelUnits and assert if the result + changed.""" + model.eval() + for unit in units: + unit.current_choice = 1.0 + for unit in try_units: + unit.current_choice = min(max(0.1, unit.sample_choice()), 0.9) + if isinstance(demo_input, dict): + tensors = model(**demo_input) + else: + tensors = model(demo_input) + assert_model_is_changed(template_output, tensors) + + +def find_mutable(model, try_units, units, demo_input, template_tensors): + """Find really mutable MutableChannelUnits in some MutableChannelUnits.""" + if len(try_units) == 0: + return [] + try: + forward_units(model, try_units, units, demo_input, template_tensors) + return try_units + except Exception: + if len(try_units) == 1: + print_log(f'Find an unmutable unit {try_units[0]}', level='debug') + return [] + else: + num = len(try_units) + return find_mutable(model, try_units[:num // 2], units, demo_input, + template_tensors) + find_mutable( + model, try_units[num // 2:], units, + demo_input, template_tensors) diff --git a/mmrazor/models/mutators/channel_mutator/channel_mutator.ipynb b/mmrazor/models/mutators/channel_mutator/channel_mutator.ipynb index 9fe1010c4..58b56c783 100644 --- a/mmrazor/models/mutators/channel_mutator/channel_mutator.ipynb +++ b/mmrazor/models/mutators/channel_mutator/channel_mutator.ipynb @@ -27,7 +27,16 @@ "cell_type": "code", "execution_count": 24, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/liukai/miniconda3/envs/lab2max/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "# define a model\n", "from mmengine.model import BaseModel\n", @@ -35,10 +44,10 @@ "import torch\n", "from collections import OrderedDict\n", "\n", - "class MyModel(BaseModel):\n", + "class MyModel(nn.Module):\n", "\n", " def __init__(self):\n", - " super().__init__(None, None)\n", + " super().__init__()\n", " self.net = nn.Sequential(\n", " OrderedDict([('conv0', nn.Conv2d(3, 8, 3, 1, 1)),\n", " ('relu', nn.ReLU()),\n", @@ -70,6 +79,8 @@ "name": "stdout", "output_type": "stream", "text": [ + "11/14 14:24:13 - mmengine - \u001b[5m\u001b[4m\u001b[33mWARNING\u001b[0m - add a input before net.conv0(net.conv0), error: net.conv0(net.conv0)\n", + "11/14 14:24:13 - mmengine - \u001b[5m\u001b[4m\u001b[33mWARNING\u001b[0m - add a output after head(head), error: head(head)\n", "The mutator has 2 mutable channel units.\n" ] } @@ -86,8 +97,7 @@ " units={},\n", " ),\n", " parse_cfg=dict(\n", - " type='BackwardTracer',\n", - " loss_calculator=dict(type='ImageClassifierPseudoLoss')))\n", + " type='ChannelAnalyzer'))\n", "# init the ChannelMutator object with a model\n", "mutator.prepare_from_supernet(model)\n", "print(f'The mutator has {len(mutator.mutable_units)} mutable channel units.')" @@ -337,7 +347,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.6.8 64-bit", + "display_name": "Python 3.9.13 ('lab2max')", "language": "python", "name": "python3" }, @@ -351,12 +361,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.9.13" }, "orig_nbformat": 4, "vscode": { "interpreter": { - "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" + "hash": "e31a827d0913016ad78e01c7b97f787f4b9e53102dd62d238e8548bcd97ff875" } } }, diff --git a/mmrazor/models/mutators/channel_mutator/channel_mutator.py b/mmrazor/models/mutators/channel_mutator/channel_mutator.py index 7ced693b9..71db1cd43 100644 --- a/mmrazor/models/mutators/channel_mutator/channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/channel_mutator.py @@ -5,21 +5,16 @@ from mmengine import fileio from torch.nn import Module, ModuleList -from mmrazor.models.architectures.dynamic_ops import DynamicChannelMixin from mmrazor.models.mutables import (ChannelUnitType, MutableChannelUnit, SequentialMutableChannelUnit) from mmrazor.models.mutables.mutable_channel.units.channel_unit import \ ChannelUnit -from mmrazor.registry import MODELS -from mmrazor.structures.graph import ModuleGraph +from mmrazor.models.task_modules.tracer.channel_analyzer import ChannelAnalyzer +from mmrazor.registry import MODELS, TASK_UTILS from ..base_mutator import BaseMutator from ..group_mixin import GroupMixin -def is_dynamic_op_for_fx_tracer(module, name): - return isinstance(module, DynamicChannelMixin) - - @MODELS.register_module() class ChannelMutator(BaseMutator, Generic[ChannelUnitType], GroupMixin): """ChannelMutator manages the pruning structure of a model. @@ -46,8 +41,10 @@ class ChannelMutator(BaseMutator, Generic[ChannelUnitType], GroupMixin): parse_cfg (Dict, optional): The config to parse the model. Defaults to - dict( type='BackwardTracer', - loss_calculator=dict(type='ImageClassifierPseudoLoss')). + dict( + type='ChannelAnalyzer', + demo_input=(1, 3, 224, 224), + tracer_type='BackwardTracer') custom_groups (list[list[str]], optional): User-defined search groups. All searchable modules that are not in ``custom_group`` will be @@ -59,7 +56,8 @@ class ChannelMutator(BaseMutator, Generic[ChannelUnitType], GroupMixin): Note: There are three ways used in ChannelMutator to parse a model and get MutableChannelUnits. - 1. Using tracer. It needs parse_cfg to be the config of a tracer. + 1. Using tracer. It needs parse_cfg to be the config of the + ChannelAnalyzer. 2. Using config. When parse_cfg['type']='Config'. It needs that channel_unit_cfg['unit']['xxx_unit_name] has a key 'channels'. 3. Using the model with pre-defined dynamic-ops and mutablechannels: @@ -73,8 +71,9 @@ def __init__(self, dict, Type[MutableChannelUnit]] = SequentialMutableChannelUnit, parse_cfg: Dict = dict( - type='BackwardTracer', - loss_calculator=dict(type='ImageClassifierPseudoLoss')), + type='ChannelAnalyzer', + demo_input=(1, 3, 224, 224), + tracer_type='BackwardTracer'), custom_groups: Optional[List[List[str]]] = None, init_cfg: Optional[Dict] = None) -> None: @@ -83,7 +82,7 @@ def __init__(self, # tracer if isinstance(parse_cfg, dict): assert parse_cfg['type'] in [ - 'RazorFxTracer', 'BackwardTracer', 'Config', 'Predefined' + 'ChannelAnalyzer', 'Config', 'Predefined' ] self.parse_cfg = parse_cfg @@ -108,10 +107,10 @@ def prepare_from_supernet(self, supernet: Module) -> None: 1. parse the model and get MutableChannelUnits. 2. call unit.prepare_for_pruning for each unit. """ - self._name2module = dict(supernet.named_modules()) - if 'Tracer' in self.parse_cfg['type']: + if isinstance(self.parse_cfg, + ChannelAnalyzer) or 'Analyzer' in self.parse_cfg['type']: units = self._prepare_from_tracer(supernet, self.parse_cfg) elif self.parse_cfg['type'] == 'Config': units = self._prepare_from_cfg(supernet, self.units_cfg) @@ -213,7 +212,7 @@ def current_choices(self) -> Dict: return current_choices - def sample_choices(self) -> Dict[int, Any]: + def sample_choices(self, kind: str = 'random') -> Dict[int, Any]: """Sampling by search groups. The sampling result of the first mutable of each group is the sampling @@ -222,6 +221,7 @@ def sample_choices(self) -> Dict[int, Any]: Returns: Dict[int, Any]: Random choices dict. """ + assert kind == 'random', f'unsupported the {kind} sample method.' random_choices = dict() for group_id, modules in self.search_groups.items(): random_choices[group_id] = modules[0].sample_choice() @@ -319,20 +319,18 @@ def _parse_channel_unit_cfg( def _prepare_from_tracer(self, model: Module, parse_cfg: Dict): """Initialize units using a tracer.""" - if 'num_input_channel' in parse_cfg: - num_input_channel = parse_cfg.pop('num_input_channel') - else: - num_input_channel = 3 - if self.parse_cfg['type'] == 'BackwardTracer': - graph = ModuleGraph.init_from_backward_tracer(model, parse_cfg) - elif self.parse_cfg['type'] == 'RazorFxTracer': - graph = ModuleGraph.init_from_fx_tracer(model, fx_tracer=parse_cfg) + + if isinstance(parse_cfg, Dict): + tracer: ChannelAnalyzer = TASK_UTILS.build(parse_cfg) else: - raise NotImplementedError() - self._graph = graph + tracer = parse_cfg + unit_configs = tracer.analyze(model) + # get ChannelUnits - units = ChannelUnit.init_from_graph( - graph, num_input_channel=num_input_channel) + units = [ + ChannelUnit.init_from_cfg(model, cfg) + for cfg in unit_configs.values() + ] # convert to MutableChannelUnits units = self._convert_channel_unit_to_mutable(units) return units diff --git a/mmrazor/models/mutators/channel_mutator/dcff_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/dcff_channel_mutator.py index 8dd335bff..5994eade4 100644 --- a/mmrazor/models/mutators/channel_mutator/dcff_channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/dcff_channel_mutator.py @@ -25,8 +25,9 @@ def __init__(self, channel_unit_cfg: Union[dict, Type[ChannelUnitType]] = dict( type='DCFFChannelUnit', units={}), parse_cfg=dict( - type='BackwardTracer', - loss_calculator=dict(type='ImageClassifierPseudoLoss')), + type='ChannelAnalyzer', + demo_input=(1, 3, 224, 224), + tracer_type='BackwardTracer'), **kwargs) -> None: super().__init__(channel_unit_cfg, parse_cfg, **kwargs) diff --git a/mmrazor/models/mutators/channel_mutator/one_shot_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/one_shot_channel_mutator.py index 1f8e3496b..fdaee3161 100644 --- a/mmrazor/models/mutators/channel_mutator/one_shot_channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/one_shot_channel_mutator.py @@ -1,13 +1,15 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Dict, Type, Union +from typing import Type, Union from mmrazor.models.mutables import OneShotMutableChannelUnit from mmrazor.registry import MODELS +from ..group_mixin import DynamicSampleMixin from .channel_mutator import ChannelMutator, ChannelUnitType @MODELS.register_module() -class OneShotChannelMutator(ChannelMutator[OneShotMutableChannelUnit]): +class OneShotChannelMutator(ChannelMutator[OneShotMutableChannelUnit], + DynamicSampleMixin): """OneShotChannelMutator based on ChannelMutator. It use OneShotMutableChannelUnit by default. @@ -25,19 +27,3 @@ def __init__(self, **kwargs) -> None: super().__init__(channel_unit_cfg, **kwargs) - - def min_choices(self) -> Dict: - """Return the minimal pruning subnet(structure).""" - min_choices = dict() - for group_id, modules in self.search_groups.items(): - min_choices[group_id] = modules[0].min_choice - - return min_choices - - def max_choices(self) -> Dict: - """Return the maximal pruning subnet(structure).""" - max_choices = dict() - for group_id, modules in self.search_groups.items(): - max_choices[group_id] = modules[0].max_choice - - return max_choices diff --git a/mmrazor/models/mutators/channel_mutator/slimmable_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/slimmable_channel_mutator.py index 9f5eb0075..ec726ad85 100644 --- a/mmrazor/models/mutators/channel_mutator/slimmable_channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/slimmable_channel_mutator.py @@ -24,8 +24,9 @@ class SlimmableChannelMutator(ChannelMutator[SlimmableChannelUnit]): def __init__(self, channel_unit_cfg=dict(type='SlimmableChannelUnit', units={}), parse_cfg=dict( - type='BackwardTracer', - loss_calculator=dict(type='ImageClassifierPseudoLoss')), + type='ChannelAnalyzer', + demo_input=(1, 3, 224, 224), + tracer_type='BackwardTracer'), init_cfg: Optional[Dict] = None) -> None: super().__init__(channel_unit_cfg, parse_cfg, None, init_cfg) diff --git a/mmrazor/models/mutators/group_mixin.py b/mmrazor/models/mutators/group_mixin.py index 2575af2f1..f6b84aea2 100644 --- a/mmrazor/models/mutators/group_mixin.py +++ b/mmrazor/models/mutators/group_mixin.py @@ -75,6 +75,11 @@ def _build_name_mutable_mapping( for name, module in supernet.named_modules(): if isinstance(module, support_mutables): name2mutable[name] = module + elif hasattr(module, 'source_mutables'): + for each_mutables in module.source_mutables: + if isinstance(each_mutables, support_mutables): + name2mutable[name] = each_mutables + self._name2mutable = name2mutable return name2mutable @@ -84,14 +89,23 @@ def _build_alias_names_mapping( support_mutables: Type) -> Dict[str, List[str]]: """Mapping alias to module names.""" alias2mutable_names: Dict[str, List[str]] = dict() + + def _append(key, dict, name): + if key not in dict: + dict[key] = [name] + else: + dict[key].append(name) + for name, module in supernet.named_modules(): if isinstance(module, support_mutables): - if module.alias is not None: - if module.alias not in alias2mutable_names: - alias2mutable_names[module.alias] = [name] - else: - alias2mutable_names[module.alias].append(name) + _append(module.alias, alias2mutable_names, name) + elif hasattr(module, 'source_mutables'): + for each_mutables in module.source_mutables: + if isinstance(each_mutables, support_mutables): + if each_mutables.alias is not None: + _append(each_mutables.alias, alias2mutable_names, + name) return alias2mutable_names @@ -171,6 +185,14 @@ def build_search_groups(self, supernet: Module, support_mutables: Type, else: search_groups[current_group_nums] = [module] current_group_nums += 1 + elif hasattr(module, 'source_mutables'): + for each_mutables in module.source_mutables: + if isinstance(each_mutables, support_mutables): + if name in grouped_mutable_names: + continue + else: + search_groups[current_group_nums] = [each_mutables] + current_group_nums += 1 grouped_counter = Counter(grouped_mutable_names) @@ -191,11 +213,10 @@ def build_search_groups(self, supernet: Module, support_mutables: Type, def _check_valid_groups(self, alias2mutable_names: Dict[str, List[str]], name2mutable: Dict[str, BaseMutable], custom_group: List[List[str]]) -> None: - + """Check if all keys are legal.""" aliases = [*alias2mutable_names.keys()] module_names = [*name2mutable.keys()] - # check if all keys are legal expanded_custom_group: List[str] = [ _ for group in custom_group for _ in group ] @@ -239,8 +260,10 @@ def search_groups(self) -> Dict: class OneShotSampleMixin: + """Sample mixin for one-shot mutators.""" def sample_choices(self: MutatorProtocol) -> Dict: + """Sample choices for each group in search_groups.""" random_choices = dict() for group_id, modules in self.search_groups.items(): random_choices[group_id] = modules[0].sample_choice() @@ -248,6 +271,7 @@ def sample_choices(self: MutatorProtocol) -> Dict: return random_choices def set_choices(self: MutatorProtocol, choices: Dict) -> None: + """Set choices for each group in search_groups.""" for group_id, modules in self.search_groups.items(): choice = choices[group_id] for module in modules: @@ -256,18 +280,32 @@ def set_choices(self: MutatorProtocol, choices: Dict) -> None: class DynamicSampleMixin(OneShotSampleMixin): + def sample_choices(self: MutatorProtocol, kind: str = 'random') -> Dict: + """Sample choices for each group in search_groups.""" + random_choices = dict() + for group_id, modules in self.search_groups.items(): + if kind == 'max': + random_choices[group_id] = modules[0].max_choice + elif kind == 'min': + random_choices[group_id] = modules[0].min_choice + else: + random_choices[group_id] = modules[0].sample_choice() + return random_choices + @property - def max_choices(self: MutatorProtocol) -> Dict: - max_choices = dict() + def max_choice(self: MutatorProtocol) -> Dict: + """Get max choices for each group in search_groups.""" + max_choice = dict() for group_id, modules in self.search_groups.items(): - max_choices[group_id] = modules[0].max_choice + max_choice[group_id] = modules[0].max_choice - return max_choices + return max_choice @property - def min_choices(self: MutatorProtocol) -> Dict: - min_choices = dict() + def min_choice(self: MutatorProtocol) -> Dict: + """Get min choices for each group in search_groups.""" + min_choice = dict() for group_id, modules in self.search_groups.items(): - min_choices[group_id] = modules[0].min_choice + min_choice[group_id] = modules[0].min_choice - return min_choices + return min_choice diff --git a/mmrazor/models/mutators/value_mutator/dynamic_value_mutator.py b/mmrazor/models/mutators/value_mutator/dynamic_value_mutator.py index c65c90f80..d8d081343 100644 --- a/mmrazor/models/mutators/value_mutator/dynamic_value_mutator.py +++ b/mmrazor/models/mutators/value_mutator/dynamic_value_mutator.py @@ -7,6 +7,7 @@ @MODELS.register_module() class DynamicValueMutator(ValueMutator, DynamicSampleMixin): + """Dynamic value mutator with type as `OneShotMutableValue`.""" @property def mutable_class_type(self): diff --git a/mmrazor/models/task_modules/estimators/counters/flops_params_counter.py b/mmrazor/models/task_modules/estimators/counters/flops_params_counter.py index 538e52135..2307dad7d 100644 --- a/mmrazor/models/task_modules/estimators/counters/flops_params_counter.py +++ b/mmrazor/models/task_modules/estimators/counters/flops_params_counter.py @@ -63,7 +63,7 @@ def get_model_flops_params(model, flush (bool): same as that in :func:`print`. Default to False. ost (stream): same as ``file`` param in :func:`print`. Default to sys.stdout. - +grad Returns: tuple[float | str] | dict[str, float]: If `as_strings` is set to True, it will return FLOPs and parameter counts in a string format. From 55d96c38db156094521df7867235ca806c7b8429 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Thu, 22 Dec 2022 15:37:13 +0800 Subject: [PATCH 03/59] Newly created copy PR --- mmrazor/engine/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mmrazor/engine/__init__.py b/mmrazor/engine/__init__.py index e479b65a7..ef851673f 100644 --- a/mmrazor/engine/__init__.py +++ b/mmrazor/engine/__init__.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .hooks import DumpSubnetHook, EstimateResourcesHook +from .hooks import DumpSubnetHook, EstimateResourcesHook, DMCPSubnetHook from .optimizers import SeparateOptimWrapperConstructor from .runner import (DartsEpochBasedTrainLoop, DartsIterBasedTrainLoop, EvolutionSearchLoop, GreedySamplerTrainLoop, @@ -11,5 +11,5 @@ 'SingleTeacherDistillValLoop', 'DartsEpochBasedTrainLoop', 'DartsIterBasedTrainLoop', 'SlimmableValLoop', 'EvolutionSearchLoop', 'GreedySamplerTrainLoop', 'SubnetValLoop', 'EstimateResourcesHook', - 'SelfDistillValLoop' + 'SelfDistillValLoop', 'DMCPSubnetHook' ] From 26da4d223a1c215388b1a911317e5921cc288917 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Thu, 22 Dec 2022 15:44:33 +0800 Subject: [PATCH 04/59] update op_counters --- .../counters/op_counters/linear_layer_counter.py | 2 +- .../counters/op_counters/norm_layer_counter.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/linear_layer_counter.py b/mmrazor/models/task_modules/estimators/counters/op_counters/linear_layer_counter.py index 4959b710c..80c024c09 100644 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/linear_layer_counter.py +++ b/mmrazor/models/task_modules/estimators/counters/op_counters/linear_layer_counter.py @@ -22,4 +22,4 @@ def add_count_hook(module, input, output): @TASK_UTILS.register_module() class DynamicLinearCounter(LinearCounter): - pass \ No newline at end of file + pass diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py b/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py index 20787a9c9..8f12c15a3 100644 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py +++ b/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py @@ -11,8 +11,6 @@ class BNCounter(BaseCounter): @staticmethod def add_count_hook(module, input, output): - """Calculate FLOPs and params based on the size of input & output.""" - import pdb;pdb.set_trace() input = input[0] batch_flops = np.prod(input.shape) if getattr(module, 'affine', False): @@ -77,9 +75,12 @@ class DMCPBatchNorm2dCounter(BNCounter): def add_count_hook(module, input, output): """Calculate FLOPs and params based on the size of input & output.""" input = input[0] - batch_flops = np.prod(input.shape) + B, C, H, W = input.shape + if hasattr(module, '_traceable_choice'): + C = module._traceable_choice() + batch_flops = B * C * H * W if getattr(module, 'affine', False): batch_flops *= 2 num_features = module.mutable_attrs['num_features'].activated_channels - module.__flops__ += int(batch_flops) + module.__flops__ += batch_flops module.__params__ += num_features*2 From 0058373bebd0c3bc4da2828f40768973e69e840b Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Tue, 3 Jan 2023 02:35:08 +0800 Subject: [PATCH 05/59] update subnet/commit/FLOPsCounter --- .../mmcls/dmcp/DMCP_SUBNET_IMAGENET.yaml | 4 + .../mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py | 35 +++++ ...b32.py => dmcp_resnet50_supernet_8xb32.py} | 0 mmrazor/engine/hooks/dmcp_subnet_hook.py | 3 - mmrazor/models/algorithms/pruning/dmcp.py | 99 ++++++------- .../dynamic_ops/bricks/dynamic_norm.py | 135 ++++++++++++++++++ .../units/dmcp_channel_unit.py | 9 -- .../channel_mutator/dmcp_channel_mutator.py | 41 +++--- .../counters/flops_params_counter.py | 6 +- .../op_counters/conv_layer_counter.py | 5 +- .../op_counters/norm_layer_counter.py | 7 +- 11 files changed, 248 insertions(+), 96 deletions(-) create mode 100644 configs/pruning/mmcls/dmcp/DMCP_SUBNET_IMAGENET.yaml create mode 100644 configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py rename configs/pruning/mmcls/dmcp/{dmcp_resnet_8xb32.py => dmcp_resnet50_supernet_8xb32.py} (100%) diff --git a/configs/pruning/mmcls/dmcp/DMCP_SUBNET_IMAGENET.yaml b/configs/pruning/mmcls/dmcp/DMCP_SUBNET_IMAGENET.yaml new file mode 100644 index 000000000..aa45b040b --- /dev/null +++ b/configs/pruning/mmcls/dmcp/DMCP_SUBNET_IMAGENET.yaml @@ -0,0 +1,4 @@ +{0: 64, 1: 64, 2: 64, 3: 256, 4: 64, 5: 64, 6: 64, 7: 64, 8: 128, 9: 128, 10: 512, + 11: 128, 12: 128, 13: 128, 14: 128, 15: 128, 16: 128, 17: 256, 18: 256, 19: 1024, + 20: 256, 21: 256, 22: 256, 23: 256, 24: 256, 25: 256, 26: 256, 27: 256, 28: 256, + 29: 256, 30: 512, 31: 512, 32: 2048, 33: 512, 34: 512, 35: 512, 36: 512} diff --git a/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py b/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py new file mode 100644 index 000000000..ef73d3c64 --- /dev/null +++ b/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py @@ -0,0 +1,35 @@ +_base_ = [ + 'mmcls::_base_/datasets/imagenet_bs32.py', + 'mmcls::_base_/schedules/imagenet_bs256.py', + 'mmcls::_base_/default_runtime.py' +] + +optim_wrapper = dict( + optimizer=dict(type='SGD', lr=0.25, momentum=0.9, weight_decay=0.0001)) + +param_scheduler = dict( + type='MultiStepLR', by_epoch=True, milestones=[30, 60, 90], gamma=0.1) + +train_cfg = dict(by_epoch=True, max_epochs=120, val_interval=1) + +data_preprocessor = {'type': 'mmcls.ClsDataPreprocessor'} + +# model settings +model = dict( + _scope_='mmrazor', + type='DMCP', + architecture=dict( + cfg_path='mmcls::resnet/resnet50_8xb32_in1k.py', pretrained=False), + mutator_cfg=dict( + type='DMCPChannelMutator', + channel_unit_cfg=dict( + type='DMCPChannelUnit', default_args=dict(choice_mode='number')), + parse_cfg=dict( + type='BackwardTracer', + loss_calculator=dict(type='ImageClassifierPseudoLoss'))), + fix_subnet='configs/pruning/mmcls/dmcp/DMCP_SUBNET_IMAGENET.yaml') + +model_wrapper_cfg = dict( + type='mmrazor.DMCPDDP', + broadcast_buffers=False, + find_unused_parameters=True) \ No newline at end of file diff --git a/configs/pruning/mmcls/dmcp/dmcp_resnet_8xb32.py b/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py similarity index 100% rename from configs/pruning/mmcls/dmcp/dmcp_resnet_8xb32.py rename to configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py diff --git a/mmrazor/engine/hooks/dmcp_subnet_hook.py b/mmrazor/engine/hooks/dmcp_subnet_hook.py index 34a6854d9..370f7eede 100644 --- a/mmrazor/engine/hooks/dmcp_subnet_hook.py +++ b/mmrazor/engine/hooks/dmcp_subnet_hook.py @@ -7,8 +7,6 @@ from mmengine.hooks import Hook from mmengine.registry import HOOKS -DATA_BATCH = Optional[Sequence[dict]] - @HOOKS.register_module() class DMCPSubnetHook(Hook): @@ -59,4 +57,3 @@ def after_run(self, runner): runner.logger.info( f'Driect sample(DS) arch with FlOP(MB): {cur_flops}') self._save_subnet(model.mutator.current_choices, save_path) - diff --git a/mmrazor/models/algorithms/pruning/dmcp.py b/mmrazor/models/algorithms/pruning/dmcp.py index 2b5d6baae..9c71009fa 100644 --- a/mmrazor/models/algorithms/pruning/dmcp.py +++ b/mmrazor/models/algorithms/pruning/dmcp.py @@ -1,10 +1,12 @@ import os -from typing import Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union import random from typing_extensions import Self import torch -from mmengine import MMLogger +import yaml + +from mmengine import MessageHub, MMLogger from mmengine.model import BaseModel, MMDistributedDataParallel from mmengine.optim import OptimWrapper from mmengine.structures import BaseDataElement @@ -17,14 +19,12 @@ from mmrazor.utils import ValidFixMutable from mmrazor.structures.subnet.fix_subnet import _dynamic_to_static from ..base import BaseAlgorithm +from ...task_modules.estimators import ResourceEstimator -VALID_MUTATOR_TYPE = Union[BaseMutator, Dict] -VALID_MUTATORS_TYPE = Dict[str, Union[BaseMutator, Dict]] -VALID_DISTILLER_TYPE = Union[ConfigurableDistiller, Dict] +VALID_DISTILLER_TYPE = Union[ConfigurableDistiller, Dict, Any] from mmrazor.models.mutators import DMCPChannelMutator from mmrazor.models.mutators import ChannelMutator -from .ite_prune_algorithm import ItePruneAlgorithm, ItePruneConfigManager LossResults = Dict[str, torch.Tensor] TensorResults = Union[Tuple[torch.Tensor], torch.Tensor] @@ -32,10 +32,9 @@ ForwardResults = Union[LossResults, TensorResults, PredictResults] @MODELS.register_module() -class DMCP(ItePruneAlgorithm): +class DMCP(BaseAlgorithm): def __init__(self, - # mutators: VALID_MUTATORS_TYPE, distiller: VALID_DISTILLER_TYPE, architecture: Union[BaseModel, Dict], mutator_cfg: Union[Dict, DMCPChannelMutator] = dict( @@ -45,65 +44,41 @@ def __init__(self, data_preprocessor: Optional[Union[Dict, nn.Module]] = None, strategy: List = ['max', 'min', 'scheduled_random', 'arch_random'], init_cfg: Optional[Dict] = None, - target_pruning_ratio: Optional[Dict[str, float]] = None, - arch_start_train=10000, # arch_start_train_iter - step_freq=500, # arch_train_freq - distillation_times=2000, # distillation_start_train_iter + arch_start_train=10000, + arch_train_freq=500, + distillation_times=2000, target_flops=150, # MFLOPs flops_loss_type: str = 'log_l1', - flop_loss_weight: float = 1.0, - linear_schedule=False, - is_deployed=False) -> None: - super().__init__(architecture, mutator_cfg, data_preprocessor, - target_pruning_ratio, step_freq, - init_cfg, linear_schedule) + flop_loss_weight: float = 1.0) -> None: + super().__init__(architecture, data_preprocessor, init_cfg) self.arch_start_train = arch_start_train self.strategy = strategy self.distillation_times = distillation_times self.target_flops = target_flops - self.samples = len([s for s in self.strategy if 'random' in s]) - self.is_supernet = True if len(self.strategy) > 1 else False - self.distiller = self._build_distiller(distiller) - self.distiller.prepare_from_teacher(self.architecture) - self.distiller.prepare_from_student(self.architecture) + if distiller: + self.distiller = self._build_distiller(distiller) + self.distiller.prepare_from_teacher(self.architecture) + self.distiller.prepare_from_student(self.architecture) self.flops_loss_type = flops_loss_type self.flop_loss_weight = flop_loss_weight self.cur_sample_prob = 1.0 self.arch_train = False - if fix_subnet: - # Avoid circular import - from mmrazor.structures import load_fix_subnet + self.mutator: ChannelMutator = MODELS.build(mutator_cfg) + self.mutator.prepare_from_supernet(self.architecture) - # According to fix_subnet, delete the unchosen part of supernet - load_fix_subnet(self.architecture, fix_subnet) + if fix_subnet: + self._load_fix_subnet(fix_subnet) self.is_supernet = False - self.is_deployed = is_deployed - if (self.is_deployed): - # To static ops for loaded pruned network. - self._deploy() - - def _deploy(self): - config = self.prune_config_manager.prune_at(self._iter) - self.mutator.set_choices(config) - self.mutator.fix_channel_mutables() - self._fix_archtecture() - _dynamic_to_static(self.architecture) - self.is_deployed = True - - def _build_mutator(self, mutator: VALID_MUTATOR_TYPE) -> BaseMutator: - """build mutator.""" - if isinstance(mutator, dict): - mutator = MODELS.build(mutator) - if not isinstance(mutator, BaseMutator): - raise TypeError('mutator should be a `dict` or ' - '`OneShotModuleMutator` instance, but got ' - f'{type(mutator)}') - - return mutator + else: + self.is_supernet = True + + def _load_fix_subnet(self, save_path): + with open(save_path) as file: + self.mutator.set_choices(yaml.load(file.read())) def _build_distiller( self, distiller: VALID_DISTILLER_TYPE) -> ConfigurableDistiller: @@ -206,7 +181,7 @@ def distill_step( #update arch parameters if self.arch_train \ - and self._iter % self.step_freq == 0: + and self._iter % self.arch_train_freq == 0: with optim_wrapper['mutator'].optim_context(self): optim_wrapper['mutator'].zero_grad() mutator_loss = self._update_arch_params( @@ -234,7 +209,7 @@ def _update_arch_params( # update flops_loss self.set_subnet(mode='expected', arch_train=False) - expected_flops = self.mutator.calc_current_flops(self) + expected_flops = self.calc_current_flops() flops_loss = self._compute_flops_loss(expected_flops).to( arch_loss['loss'].device) parsed_flops_loss, _ = self.parse_losses({'loss':flops_loss}) @@ -275,6 +250,11 @@ def _compute_flops_loss(self, expected_flops): raise NotImplementedError return floss * self.flop_loss_weight + def calc_current_flops(self): + estimator = ResourceEstimator(units=None) + model = getattr(self, 'module', self) + estimation = estimator.estimate(model=model.architecture.backbone,\) + return estimation['flops'] def forward(self, inputs: torch.Tensor, @@ -283,6 +263,17 @@ def forward(self, """Forward.""" return BaseAlgorithm.forward(self, inputs, data_samples, mode) + @property + def _iter(self): + """Get current sum iteration number.""" + message_hub = MessageHub.get_current_instance() + if 'iter' in message_hub.runtime_info: + return message_hub.runtime_info['iter'] + else: + raise RuntimeError('Use MessageHub before initiation.' + 'iter is inited in before_run_iter().') + + @MODEL_WRAPPERS.register_module() class DMCPDDP(MMDistributedDataParallel): @@ -392,7 +383,7 @@ def distill_step( # update arch parameters if self.module.arch_train \ - and self.module._iter % self.modqule.step_freq == 0: + and self.module._iter % self.modqule.arch_train_freq == 0: with optim_wrapper['mutator'].optim_context(self): optim_wrapper['mutator'].zero_grad() mutator_loss = self.module._update_arch_params( diff --git a/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py b/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py index fbdde7316..0519df80a 100644 --- a/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py +++ b/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py @@ -5,8 +5,10 @@ import torch import torch.nn as nn import torch.nn.functional as F +from mmengine.model.utils import _BatchNormXd from torch import Tensor from torch.nn import LayerNorm +from torch.nn.modules._functions import SyncBatchNorm as sync_batch_norm from torch.nn.modules.batchnorm import _BatchNorm from mmrazor.models.mutables.base_mutable import BaseMutable @@ -257,6 +259,139 @@ def _check_input_dim(self, input: Tensor) -> None: input.dim())) +class DynamicSyncBatchNorm(nn.SyncBatchNorm, DynamicBatchNormMixin): + """DynamicOp for sync bn.""" + + def __init__(self, + num_features: int, + eps: float = 0.00001, + momentum: float = 0.1, + affine: bool = True, + track_running_stats: bool = True, + process_group: Optional[Any] = None) -> None: + super().__init__(num_features, eps, momentum, affine, + track_running_stats, process_group) + self.mutable_attrs: Dict[str, Optional[BaseMutable]] = nn.ModuleDict() + + @classmethod + def convert_from(cls, module): + return cls(module.num_features, module.eps, module.momentum, + module.affine, module.track_running_stats, + module.process_group) + + @property + def static_op_factory(self): + return nn.SyncBatchNorm + + def forward(self, input: Tensor) -> Tensor: + # currently only GPU input is supported + if not input.is_cuda: + raise ValueError( + 'SyncBatchNorm expected input tensor to be on GPU') + + self._check_input_dim(input) + if hasattr(self, '_check_non_zero_input_channels'): + self._check_non_zero_input_channels(input) + + # exponential_average_factor is set to self.momentum + # (when it is available) only so that it gets updated + # in ONNX graph when this node is exported to ONNX. + if self.momentum is None: + exponential_average_factor = 0.0 + else: + exponential_average_factor = self.momentum + + if self.training and self.track_running_stats: + assert self.num_batches_tracked is not None + self.num_batches_tracked.add_(1) + if self.momentum is None: # use cumulative moving average + exponential_average_factor = (1.0 / + self.num_batches_tracked.item()) + else: # use exponential moving average + exponential_average_factor = self.momentum + r""" + Decide whether the mini-batch stats should be used for normalization + rather than the buffers. + Mini-batch stats are used in training mode, and in eval mode when + buffers are None. + """ + if self.training: + bn_training = True + else: + bn_training = (self.running_mean is None) and (self.running_var is + None) + r""" + Buffers are only updated if they are to be tracked and we are in + training mode. Thus they only need to be + passed when the update should occur (i.e. in training mode when + they are tracked), or when buffer stats are + used for normalization (i.e. in eval mode when buffers are not None). + """ + # If buffers are not to be tracked, ensure that they won't be updated + running_mean = ( + self.running_mean + if not self.training or self.track_running_stats else None) + running_var = ( + self.running_var + if not self.training or self.track_running_stats else None) + + # Don't sync batchnorm stats in inference mode (model.eval()). + need_sync = (bn_training and self.training) + if need_sync: + process_group = torch.distributed.group.WORLD + if self.process_group: + process_group = self.process_group + world_size = torch.distributed.get_world_size(process_group) + need_sync = world_size > 1 + + running_mean, running_var, weight, bias = self.get_dynamic_params() + + # fallback to framework BN when synchronization is not necessary + if not need_sync: + out = F.batch_norm( + input, + running_mean, + running_var, + weight, + bias, + bn_training, + exponential_average_factor, + self.eps, + ) + else: + assert bn_training + out = sync_batch_norm.apply( + input, + weight, + bias, + running_mean, + running_var, + self.eps, + exponential_average_factor, + process_group, + world_size, + ) + + # copy changed running statistics + if self.training and self.track_running_stats: + out_mask = self._get_num_features_mask() + self.running_mean.masked_scatter_(out_mask, running_mean) + self.running_var.masked_scatter_(out_mask, running_var) + + return out + + +class DynamicBatchNormXd(_DynamicBatchNorm): + """Dynamic op for _DynamicBatchNorm.""" + + @property + def static_op_factory(self): + return _BatchNormXd + + def _check_input_dim(self, input: torch.Tensor): + return + + @MODELS.register_module() class DMCPBatchNorm2d(DynamicBatchNorm2d): diff --git a/mmrazor/models/mutables/mutable_channel/units/dmcp_channel_unit.py b/mmrazor/models/mutables/mutable_channel/units/dmcp_channel_unit.py index 386db923b..7fb738016 100644 --- a/mmrazor/models/mutables/mutable_channel/units/dmcp_channel_unit.py +++ b/mmrazor/models/mutables/mutable_channel/units/dmcp_channel_unit.py @@ -9,14 +9,6 @@ from .sequential_mutable_channel_unit import SequentialMutableChannelUnit -class tracable_choice(): - def __init__(self, choice = None) -> None: - self.choice = choice - - def __call__(self): - return self.choice - - @MODELS.register_module() class DMCPChannelUnit(SequentialMutableChannelUnit): """``DMCPChannelUnit`` is for supernet DMCP and based on @@ -44,7 +36,6 @@ def __init__(self, min_ratio: float = 0.5) -> None: super().__init__(num_channels, choice_mode, divisor, min_value, min_ratio) - self._traceable_choice = tracable_choice() def prepare_for_pruning(self, model: nn.Module): """In ``DMCPChannelGroup`` nn.BatchNorm2d is replaced with MixedBatchNorm2d.""" diff --git a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py index a6cfcd416..6cc292b3c 100644 --- a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py @@ -4,12 +4,13 @@ import torch import torch.nn as nn +from torch import tensor from torch.nn import Module, ModuleList from mmrazor.models.mutables import DMCPChannelUnit from mmrazor.registry import MODELS from .channel_mutator import ChannelMutator, ChannelUnitType from ...architectures import DMCPBatchNorm2d -from ...task_modules.estimators import ResourceEstimator + @MODELS.register_module() class DMCPChannelMutator(ChannelMutator[DMCPChannelUnit]): @@ -29,9 +30,10 @@ class DMCPChannelMutator(ChannelMutator[DMCPChannelUnit]): def __init__(self, channel_unit_cfg: Union[dict, Type[ChannelUnitType]] = dict( type='DMCPChannelUnit', units={}), - parse_cfg=dict( - type='BackwardTracer', - loss_calculator=dict(type='ImageClassifierPseudoLoss')), + parse_cfg: Dict = dict( + type='ChannelAnalyzer', + demo_input=(1, 3, 224, 224), + tracer_type='BackwardTracer'), pruning_cfg=(0.1, 1, 0.05), **kwargs) -> None: super().__init__(channel_unit_cfg, parse_cfg, **kwargs) @@ -52,7 +54,7 @@ def _build_arch_param(self, num_choices) -> nn.Parameter: """Build learnable architecture parameters.""" return nn.Parameter(torch.zeros(num_choices)) - def prepare_arch_params(self, supernet): + def prepare_arch_params(self, supernet; Module) -> None: # Associate all the op's in the model with their corresponding arch parameters self.arch_params = nn.ParameterDict() self._op_arch_align = dict() @@ -63,21 +65,16 @@ def prepare_arch_params(self, supernet): group_arch_param = self._build_arch_param(arch_message[1]) self.arch_params[str(group_id)] = group_arch_param - for m in module[0].output_related: - self._op_arch_align[str(m.name)] = str(group_id) + for unit in module[0].output_related: + self._op_arch_align[str(unit.name)] = str(group_id) # Associate all the BN in the model with their corresponding arch parameters self._bn_arch_align = dict() for name, module in supernet.named_modules(): if isinstance(module, DMCPBatchNorm2d): self._bn_arch_align[module] = self._op_arch_align[str(name)] - # prepare for calulate FLOPs with _traceable_choice - if name in self._op_arch_align.keys(): - module._traceable_choice = \ - self.search_groups[int(self._op_arch_align[str(name)])][0]._traceable_choice - - def _generate_arch_message(self, out_channels): + def _generate_arch_message(self, out_channels: int) -> tuple: """ Define the search space of the channel according to the pruning rate range, where the search space consists of two parts @@ -96,7 +93,7 @@ def _generate_arch_message(self, out_channels): return (group_size, num_groups, min_ch) - def modify_supernet_forward(self, arch_train): + def modify_supernet_forward(self, arch_train: str) -> bool: for module, group_id in self._bn_arch_align.items(): if arch_train: arch_param = self.arch_params[self._bn_arch_align[module]] @@ -105,7 +102,7 @@ def modify_supernet_forward(self, arch_train): arch_param = arch_params_attr = None module.set_forward_args(arch_param=arch_param, arch_attr=arch_params_attr) - def sample_subnet(self, mode, arch_train): + def sample_subnet(self, mode: str, arch_train: bool) -> None: choices = dict() for group_id, _ in self.search_groups.items(): choices[group_id] = self._prune_by_arch(mode, group_id) @@ -113,13 +110,17 @@ def sample_subnet(self, mode, arch_train): self.modify_supernet_forward(arch_train) - def _prune_by_arch(self, mode, group_id): + def _prune_by_arch(self, mode: str, group_id: int) -> Union[int, tensor]: """ Prune the output channels according to the specified mode. Inputs: mode (list): one of ['max', 'min', 'random', 'direct', 'expected'] group_id (int): number of search_groups + + Outputs: + channels (int): for mode 'max'/'min'/'random'/'dirext' + channels (tensor): for mode 'expected' """ arch_param = self.arch_params[str(group_id)] (group_size, num_groups, min_ch) = self._arch_params_attr[str(group_id)] @@ -149,12 +150,6 @@ def _prune_by_arch(self, mode, group_id): return expected_channel else: raise NotImplementedError - - def calc_current_flops(self, model): - estimator = ResourceEstimator() - model = getattr(model, 'module', model) - estimation = estimator.estimate(model=model.architecture.backbone) - return estimation['flops'] def set_choices(self, choices: Dict[int, Any]) -> None: """Set mutables' current choice according to choices sample by @@ -172,5 +167,5 @@ def set_choices(self, choices: Dict[int, Any]) -> None: choice = choices[group_id] for module in modules: module.current_choice = choice - module._traceable_choice.choice = choice + module.mutable_channel.traceable_choice = choice diff --git a/mmrazor/models/task_modules/estimators/counters/flops_params_counter.py b/mmrazor/models/task_modules/estimators/counters/flops_params_counter.py index 2307dad7d..dca740214 100644 --- a/mmrazor/models/task_modules/estimators/counters/flops_params_counter.py +++ b/mmrazor/models/task_modules/estimators/counters/flops_params_counter.py @@ -63,7 +63,7 @@ def get_model_flops_params(model, flush (bool): same as that in :func:`print`. Default to False. ost (stream): same as ``file`` param in :func:`print`. Default to sys.stdout. -grad + Returns: tuple[float | str] | dict[str, float]: If `as_strings` is set to True, it will return FLOPs and parameter counts in a string format. @@ -107,6 +107,7 @@ def get_model_flops_params(model, params_count, ost=ost, flush=flush) + if units is not None: flops_count = params_units_convert(flops_count, units['flops']) params_count = params_units_convert(params_count, units['params']) @@ -171,8 +172,7 @@ def params_units_convert(num_params, units='M', precision=3): >>> params_units_convert(3e-9) '3e-09' """ - if getattr(num_params, 'requires_grad', None): - return num_params + if units == 'G': return round(num_params / 10.**9, precision) elif units == 'M': diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py b/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py index 62b54cdb2..5b2397168 100644 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py +++ b/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py @@ -76,8 +76,9 @@ def add_count_hook(module: nn.Conv2d, input, output): kernel_dims = list(module.kernel_size) - if hasattr(module, '_traceable_choice'): - out_channels = module._traceable_choice() + mutable_channel = list(module.mutable_attrs['out_channels'].mutable_channels.values()) + if hasattr(mutable_channel[0], 'traceable_choice'): + out_channels = mutable_channel[0].traceable_choice else: out_channels = module.mutable_attrs['out_channels'].activated_channels in_channels = module.mutable_attrs['in_channels'].activated_channels diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py b/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py index 8f12c15a3..e061a2182 100644 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py +++ b/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py @@ -76,8 +76,11 @@ def add_count_hook(module, input, output): """Calculate FLOPs and params based on the size of input & output.""" input = input[0] B, C, H, W = input.shape - if hasattr(module, '_traceable_choice'): - C = module._traceable_choice() + + mutable_channel = list(module.mutable_attrs['num_features'].mutable_channels.values()) + if hasattr(mutable_channel[0], 'traceable_choice'): + C = mutable_channel[0].traceable_choice + batch_flops = B * C * H * W if getattr(module, 'affine', False): batch_flops *= 2 From f4adf907463716bb6a9c1f775d65755f8c87a93e Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Wed, 4 Jan 2023 19:16:12 +0800 Subject: [PATCH 06/59] update docs/UT --- configs/pruning/mmcls/dmcp/README.md | 42 +++++++++++++++++++ .../mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py | 36 +++------------- .../dmcp/dmcp_resnet50_supernet_8xb32.py | 1 + .../test_mutators/test_dmcp_mutator.py | 40 ++++++++++++++++++ 4 files changed, 89 insertions(+), 30 deletions(-) create mode 100644 configs/pruning/mmcls/dmcp/README.md create mode 100644 tests/test_models/test_mutators/test_dmcp_mutator.py diff --git a/configs/pruning/mmcls/dmcp/README.md b/configs/pruning/mmcls/dmcp/README.md new file mode 100644 index 000000000..3e4bd3850 --- /dev/null +++ b/configs/pruning/mmcls/dmcp/README.md @@ -0,0 +1,42 @@ +# DMCP: Differentiable Markov Channel Pruning for Neural Networks + + +## Abstract + +Recent works imply that the channel pruning can be regarded as searching optimal sub-structure from unpruned networks. However, existing works based on this observation require training and evaluating a large number of structures, which limits their application. In this paper, we propose a novel differentiable method for channel pruning, named Differentiable Markov Channel Pruning (DMCP), to efficiently search the optimal sub-structure. Our method is differentiable and can be directly optimized by gradient descent with respect to standard task loss and budget regularization (e.g. FLOPs constraint). In DMCP, we model the channel pruning as a Markov process, in which each state represnets for retaining the corresponding channel during pruning, and transitions between states denote the pruning process. In the end, our method is able to implicitly select the proper number of channels in each layer by the Markov process with optimized transitions. To validate the effectiveness of our method, we perform extensive experiments on Imagenet with ResNet and MobilenetV2. Results show our method can achieve consistent improvement than stateof-the-art pruning methods in various FLOPs settings. + + + +## Citation + +```latex +@inproceedings{guo2020dmcp, + title={Dmcp: Differentiable markov channel pruning for neural networks}, + author={Guo, Shaopeng and Wang, Yujie and Li, Quanquan and Yan, Junjie}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={1539--1547}, + year={2020} +} +``` + +## Results and models +### 1.Classification +|Dataset| Supernet | Flops(M) | Top-1 (%) | Top-5 (%) | Config | Download | +|:---------------------:|:---------------------:|:------:|:---------:|:--------:|:---------:|:------:| +|ImageNet| ResNet50 | - | - | - | [config](./dmcp_resnet50_supernet_8xb32.py) | [model] / [arch](./DMCP_SUBNET_IMAGENET.yaml)/ [log] | + + +## Getting Started +#### Train DMCP from scrach +```bash +sh tools/slurm_train.sh $PARTION $JOB_NAME \ + configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py \ + --work-dir $WORK_DIR +``` +#### After the previous steps, retrain the selected sub-network based on +#### the output structure 'DMCP_SUBNET_IMAGENET.yaml' +```bash +sh tools/slurm_train.sh $PARTION $JOB_NAME \ + configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py \ + --work-dir $WORK_DIR +``` \ No newline at end of file diff --git a/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py b/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py index ef73d3c64..b346e9b63 100644 --- a/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py +++ b/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py @@ -1,35 +1,11 @@ -_base_ = [ - 'mmcls::_base_/datasets/imagenet_bs32.py', - 'mmcls::_base_/schedules/imagenet_bs256.py', - 'mmcls::_base_/default_runtime.py' -] +_base_ = ['dmcp_resnet_8xb32.py'] -optim_wrapper = dict( - optimizer=dict(type='SGD', lr=0.25, momentum=0.9, weight_decay=0.0001)) - -param_scheduler = dict( - type='MultiStepLR', by_epoch=True, milestones=[30, 60, 90], gamma=0.1) -train_cfg = dict(by_epoch=True, max_epochs=120, val_interval=1) +_base_.optim_wrapper = dict( + optimizer=dict(type='SGD', lr=0.25, momentum=0.9, weight_decay=0.0001)) -data_preprocessor = {'type': 'mmcls.ClsDataPreprocessor'} +custom_hooks = None # model settings -model = dict( - _scope_='mmrazor', - type='DMCP', - architecture=dict( - cfg_path='mmcls::resnet/resnet50_8xb32_in1k.py', pretrained=False), - mutator_cfg=dict( - type='DMCPChannelMutator', - channel_unit_cfg=dict( - type='DMCPChannelUnit', default_args=dict(choice_mode='number')), - parse_cfg=dict( - type='BackwardTracer', - loss_calculator=dict(type='ImageClassifierPseudoLoss'))), - fix_subnet='configs/pruning/mmcls/dmcp/DMCP_SUBNET_IMAGENET.yaml') - -model_wrapper_cfg = dict( - type='mmrazor.DMCPDDP', - broadcast_buffers=False, - find_unused_parameters=True) \ No newline at end of file +model = _base_.model +model['fix_subnet'] = 'configs/pruning/mmcls/dmcp/DMCP_SUBNET_IMAGENET.yaml' diff --git a/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py b/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py index 5104181ba..e671e1b36 100644 --- a/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py +++ b/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py @@ -53,6 +53,7 @@ loss_calculator=dict(type='ImageClassifierPseudoLoss'))), arch_start_train=10000, step_freq=500, + flop_loss_weight=0.1, distillation_times=20000, target_flops=2000) diff --git a/tests/test_models/test_mutators/test_dmcp_mutator.py b/tests/test_models/test_mutators/test_dmcp_mutator.py new file mode 100644 index 000000000..31fcc90d4 --- /dev/null +++ b/tests/test_models/test_mutators/test_dmcp_mutator.py @@ -0,0 +1,40 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from mmcls.models import * # noqa: F401,F403 +from torch import Tensor, nn +from torch.nn import Module + +from mmrazor.models.mutators import DMCPChannelMutator + + +class ResBlock(Module): + + def __init__(self) -> None: + super().__init__() + + self.op1 = nn.Conv2d(3, 8, 1) + self.bn1 = nn.BatchNorm2d(8) + self.op2 = nn.Conv2d(8, 8, 1) + self.bn2 = nn.BatchNorm2d(8) + self.op3 = nn.Conv2d(8, 8, 1) + + def forward(self, x: Tensor) -> Tensor: + x1 = self.bn1(self.op1(x)) + x2 = self.bn2(self.op2(x1)) + x3 = self.op3(x2 + x1) + return x3 + + +def test_DCFF_channel_mutator() -> None: + imgs = torch.randn(16, 3, 224, 224) + + # ResBlock + mutator = DMCPChannelMutator(channel_unit_cfg=dict(type='DMCPChannelUnit')) + + model = ResBlock() + mutator.prepare_from_supernet(model) + for mode in ['max', 'min', 'random', 'expected', 'direct']: + mutator.sample_subnet(mode, arch_train=True) + out3 = model(imgs) + + assert out3.shape == (16, 8, 224, 224) From f16ba55968b48601e4f5de71beaf5d9a94c34295 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Wed, 4 Jan 2023 20:36:35 +0800 Subject: [PATCH 07/59] update docs/UT --- .../dmcp/dmcp_resnet50_supernet_8xb32.py | 4 +- .../test_models/test_algorithms/test_dmcp.py | 86 +++++++++++++++++++ 2 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 tests/test_models/test_algorithms/test_dmcp.py diff --git a/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py b/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py index e671e1b36..52709dc38 100644 --- a/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py +++ b/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py @@ -49,7 +49,9 @@ channel_unit_cfg=dict( type='DMCPChannelUnit', default_args=dict(choice_mode='number')), parse_cfg=dict( - type='BackwardTracer', + type='ChannelAnalyzer', + demo_input=(1, 3, 224, 224), + tracer_type='BackwardTracer', loss_calculator=dict(type='ImageClassifierPseudoLoss'))), arch_start_train=10000, step_freq=500, diff --git a/tests/test_models/test_algorithms/test_dmcp.py b/tests/test_models/test_algorithms/test_dmcp.py new file mode 100644 index 000000000..c68e8c2c1 --- /dev/null +++ b/tests/test_models/test_algorithms/test_dmcp.py @@ -0,0 +1,86 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +from unittest import TestCase + +import torch + +from mmrazor.models import DMCP +from mmrazor.registry import MODELS + +MUTATOR_CFG = dict( + channel_mutator=dict( + type='mmrazor.DMCPChannelMutator', + channel_unit_cfg={ + 'type': 'DMCPChannelUnit' + }, + parse_cfg: Dict = dict( + type='ChannelAnalyzer', + demo_input=(1, 3, 224, 224), + tracer_type='BackwardTracer'),)) + +DISTILLER_CFG = dict( + _scope_='mmrazor', + type='ConfigurableDistiller', + teacher_recorders=dict(fc=dict(type='ModuleOutputs', source='head.fc')), + student_recorders=dict(fc=dict(type='ModuleOutputs', source='head.fc')), + distill_losses=dict( + loss_kl=dict(type='KLDivergence', tau=1, loss_weight=1)), + loss_forward_mappings=dict( + loss_kl=dict( + preds_S=dict(recorder='fc', from_student=True), + preds_T=dict(recorder='fc', from_student=False)))) + +ALGORITHM_CFG = dict( + type='mmrazor.DMCP', + architecture=dict( + cfg_path='mmcls::resnet/resnet50_8xb32_in1k.py', pretrained=False), + mutators=MUTATOR_CFG, + distiller=DISTILLER_CFG) + + +class TestDMCP(TestCase): + + def test_init(self): + ALGORITHM_CFG_SUPERNET = copy.deepcopy(ALGORITHM_CFG) + # initiate dmcp with built `algorithm`. + dmcp_algo = MODELS.build(ALGORITHM_CFG_SUPERNET) + self.assertIsInstance(dmcp_algo, DMCP) + # dmcp mutators include channel_mutator and value_mutator + assert 'channel_mutator' in dmcp_algo.mutators + assert 'value_mutator' in dmcp_algo.mutators + + # dmcp_algo support training + self.assertTrue(dmcp_algo.is_supernet) + + # initiate dmcp without any `mutator`. + ALGORITHM_CFG_SUPERNET.pop('type') + ALGORITHM_CFG_SUPERNET['mutators'] = None + none_type = type(ALGORITHM_CFG_SUPERNET['mutators']) + with self.assertRaisesRegex( + TypeError, f'mutator should be a `dict` but got {none_type}'): + _ = DMCP(**ALGORITHM_CFG_SUPERNET) + + # initiate dmcp with error type `mutator`. + backwardtracer_cfg = dict( + type='OneShotChannelMutator', + channel_unit_cfg=dict( + type='OneShotMutableChannelUnit', + default_args=dict( + candidate_choices=list(i / 12 for i in range(2, 13)), + choice_mode='ratio')), + parse_cfg=dict( + type='BackwardTracer', + loss_calculator=dict(type='ImageClassifierPseudoLoss'))) + ALGORITHM_CFG_SUPERNET['mutators'] = dict( + channel_mutator=backwardtracer_cfg, + value_mutator=dict(type='mmrazor.DynamicValueMutator')) + with self.assertRaisesRegex(AssertionError, + 'DMCP only support predefined.'): + _ = DMCP(**ALGORITHM_CFG_SUPERNET) + + def test_loss(self): + # supernet + inputs = torch.randn(1, 3, 224, 224) + dmcp = MODELS.build(ALGORITHM_CFG) + loss = dmcp(inputs) + assert loss.size(1) == 1000 From 864d08c93af301b7d8935a87bd3893ccf33aa021 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Thu, 5 Jan 2023 15:22:28 +0800 Subject: [PATCH 08/59] add setter for current_mask --- mmrazor/engine/hooks/dmcp_subnet_hook.py | 1 - mmrazor/models/algorithms/pruning/dmcp.py | 2 +- .../mutables/mutable_channel/sequential_mutable_channel.py | 3 +++ .../models/mutators/channel_mutator/dmcp_channel_mutator.py | 2 +- .../estimators/counters/op_counters/conv_layer_counter.py | 5 ++--- .../estimators/counters/op_counters/norm_layer_counter.py | 4 ++-- 6 files changed, 9 insertions(+), 8 deletions(-) diff --git a/mmrazor/engine/hooks/dmcp_subnet_hook.py b/mmrazor/engine/hooks/dmcp_subnet_hook.py index 370f7eede..b443c4707 100644 --- a/mmrazor/engine/hooks/dmcp_subnet_hook.py +++ b/mmrazor/engine/hooks/dmcp_subnet_hook.py @@ -28,7 +28,6 @@ def _save_subnet(self, arch_space_dict, save_path): @master_only def after_run(self, runner): - import pdb;pdb.set_trace() model = getattr(runner.model, 'module', runner.model) runner.logger.info('Sampling...') diff --git a/mmrazor/models/algorithms/pruning/dmcp.py b/mmrazor/models/algorithms/pruning/dmcp.py index 9c71009fa..8e3c3e985 100644 --- a/mmrazor/models/algorithms/pruning/dmcp.py +++ b/mmrazor/models/algorithms/pruning/dmcp.py @@ -383,7 +383,7 @@ def distill_step( # update arch parameters if self.module.arch_train \ - and self.module._iter % self.modqule.arch_train_freq == 0: + and self.module._iter % self.画.arch_train_freq == 0: with optim_wrapper['mutator'].optim_context(self): optim_wrapper['mutator'].zero_grad() mutator_loss = self.module._update_arch_params( diff --git a/mmrazor/models/mutables/mutable_channel/sequential_mutable_channel.py b/mmrazor/models/mutables/mutable_channel/sequential_mutable_channel.py index c2b4f9291..f9bcf8fdf 100644 --- a/mmrazor/models/mutables/mutable_channel/sequential_mutable_channel.py +++ b/mmrazor/models/mutables/mutable_channel/sequential_mutable_channel.py @@ -57,6 +57,9 @@ def current_mask(self) -> torch.Tensor: """Return current mask.""" return self.mask.bool() + @current_mask.setter + def current_mask(self, value): + self._current_mask = value # methods for def fix_chosen(self, chosen=...): diff --git a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py index 6cc292b3c..20168a177 100644 --- a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py @@ -167,5 +167,5 @@ def set_choices(self, choices: Dict[int, Any]) -> None: choice = choices[group_id] for module in modules: module.current_choice = choice - module.mutable_channel.traceable_choice = choice + module.mutable_channel.current_mask = choice diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py b/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py index 5b2397168..2865780d5 100644 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py +++ b/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py @@ -13,7 +13,6 @@ class ConvCounter(BaseCounter): def add_count_hook(module, input, output): """Calculate FLOPs and params based on the size of input & output.""" # Can have multiple inputs, getting the first one - import pdb;pdb.set_trace() input = input[0] batch_size = input.shape[0] @@ -77,8 +76,8 @@ def add_count_hook(module: nn.Conv2d, input, output): kernel_dims = list(module.kernel_size) mutable_channel = list(module.mutable_attrs['out_channels'].mutable_channels.values()) - if hasattr(mutable_channel[0], 'traceable_choice'): - out_channels = mutable_channel[0].traceable_choice + if hasattr(mutable_channel[0], '_current_mask'): + out_channels = mutable_channel[0]._current_mask else: out_channels = module.mutable_attrs['out_channels'].activated_channels in_channels = module.mutable_attrs['in_channels'].activated_channels diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py b/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py index e061a2182..621219b47 100644 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py +++ b/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py @@ -78,8 +78,8 @@ def add_count_hook(module, input, output): B, C, H, W = input.shape mutable_channel = list(module.mutable_attrs['num_features'].mutable_channels.values()) - if hasattr(mutable_channel[0], 'traceable_choice'): - C = mutable_channel[0].traceable_choice + if hasattr(mutable_channel[0], '_current_mask'): + C = mutable_channel[0]._current_mask batch_flops = B * C * H * W if getattr(module, 'affine', False): From b41acc3664b00a017089ab3a868959dfc1d52af2 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Fri, 6 Jan 2023 10:31:52 +0800 Subject: [PATCH 09/59] replace current_mask with activated_tensor_channel --- .../mutables/mutable_channel/sequential_mutable_channel.py | 3 --- .../mutables/mutable_channel/units/dmcp_channel_unit.py | 1 + .../models/mutators/channel_mutator/dmcp_channel_mutator.py | 2 +- .../estimators/counters/op_counters/conv_layer_counter.py | 4 ++-- .../estimators/counters/op_counters/norm_layer_counter.py | 4 ++-- 5 files changed, 6 insertions(+), 8 deletions(-) diff --git a/mmrazor/models/mutables/mutable_channel/sequential_mutable_channel.py b/mmrazor/models/mutables/mutable_channel/sequential_mutable_channel.py index f9bcf8fdf..c2b4f9291 100644 --- a/mmrazor/models/mutables/mutable_channel/sequential_mutable_channel.py +++ b/mmrazor/models/mutables/mutable_channel/sequential_mutable_channel.py @@ -57,9 +57,6 @@ def current_mask(self) -> torch.Tensor: """Return current mask.""" return self.mask.bool() - @current_mask.setter - def current_mask(self, value): - self._current_mask = value # methods for def fix_chosen(self, chosen=...): diff --git a/mmrazor/models/mutables/mutable_channel/units/dmcp_channel_unit.py b/mmrazor/models/mutables/mutable_channel/units/dmcp_channel_unit.py index 7fb738016..51b6798df 100644 --- a/mmrazor/models/mutables/mutable_channel/units/dmcp_channel_unit.py +++ b/mmrazor/models/mutables/mutable_channel/units/dmcp_channel_unit.py @@ -36,6 +36,7 @@ def __init__(self, min_ratio: float = 0.5) -> None: super().__init__(num_channels, choice_mode, divisor, min_value, min_ratio) + self.mutable_channel.activated_tensor_channels = None def prepare_for_pruning(self, model: nn.Module): """In ``DMCPChannelGroup`` nn.BatchNorm2d is replaced with MixedBatchNorm2d.""" diff --git a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py index 20168a177..6249ec976 100644 --- a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py @@ -167,5 +167,5 @@ def set_choices(self, choices: Dict[int, Any]) -> None: choice = choices[group_id] for module in modules: module.current_choice = choice - module.mutable_channel.current_mask = choice + module.mutable_channel.activated_tensor_channels = choice diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py b/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py index 2865780d5..18cd8d5a6 100644 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py +++ b/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py @@ -76,8 +76,8 @@ def add_count_hook(module: nn.Conv2d, input, output): kernel_dims = list(module.kernel_size) mutable_channel = list(module.mutable_attrs['out_channels'].mutable_channels.values()) - if hasattr(mutable_channel[0], '_current_mask'): - out_channels = mutable_channel[0]._current_mask + if hasattr(mutable_channel[0], 'activated_tensor_channels'): + out_channels = mutable_channel[0].activated_tensor_channels else: out_channels = module.mutable_attrs['out_channels'].activated_channels in_channels = module.mutable_attrs['in_channels'].activated_channels diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py b/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py index 621219b47..1a510e0b9 100644 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py +++ b/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py @@ -78,8 +78,8 @@ def add_count_hook(module, input, output): B, C, H, W = input.shape mutable_channel = list(module.mutable_attrs['num_features'].mutable_channels.values()) - if hasattr(mutable_channel[0], '_current_mask'): - C = mutable_channel[0]._current_mask + if hasattr(mutable_channel[0], 'activated_tensor_channels'): + C = mutable_channel[0].activated_tensor_channels batch_flops = B * C * H * W if getattr(module, 'affine', False): From 41b540c1c5194f48395510efe47d6993c3097e96 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Fri, 6 Jan 2023 14:15:43 +0800 Subject: [PATCH 10/59] update subnet training --- mmrazor/engine/hooks/dmcp_subnet_hook.py | 2 ++ mmrazor/models/algorithms/pruning/dmcp.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/mmrazor/engine/hooks/dmcp_subnet_hook.py b/mmrazor/engine/hooks/dmcp_subnet_hook.py index b443c4707..e3060eda3 100644 --- a/mmrazor/engine/hooks/dmcp_subnet_hook.py +++ b/mmrazor/engine/hooks/dmcp_subnet_hook.py @@ -7,6 +7,8 @@ from mmengine.hooks import Hook from mmengine.registry import HOOKS +DATA_BATCH = Optional[Sequence[dict]] + @HOOKS.register_module() class DMCPSubnetHook(Hook): diff --git a/mmrazor/models/algorithms/pruning/dmcp.py b/mmrazor/models/algorithms/pruning/dmcp.py index 8e3c3e985..457755f9a 100644 --- a/mmrazor/models/algorithms/pruning/dmcp.py +++ b/mmrazor/models/algorithms/pruning/dmcp.py @@ -77,8 +77,10 @@ def __init__(self, self.is_supernet = True def _load_fix_subnet(self, save_path): + from mmrazor.structures import load_fix_subnet with open(save_path) as file: self.mutator.set_choices(yaml.load(file.read())) + load_fix_subnet(self.architecture, save_path) def _build_distiller( self, distiller: VALID_DISTILLER_TYPE) -> ConfigurableDistiller: From 59910464e6df960032e63db19984aa5f4bd0e91f Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Mon, 9 Jan 2023 14:42:11 +0800 Subject: [PATCH 11/59] fix ci --- .../mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py | 2 +- mmrazor/engine/__init__.py | 2 +- mmrazor/engine/hooks/dmcp_subnet_hook.py | 5 ++--- mmrazor/models/algorithms/pruning/__init__.py | 4 +++- mmrazor/models/algorithms/pruning/dmcp.py | 6 +++--- .../dynamic_ops/bricks/__init__.py | 6 +++--- .../dynamic_ops/bricks/dynamic_norm.py | 18 +++++++++--------- mmrazor/models/mutables/__init__.py | 6 +++--- .../mutables/mutable_channel/__init__.py | 8 ++++---- .../mutable_channel/units/dmcp_channel_unit.py | 9 ++------- mmrazor/models/mutators/__init__.py | 5 +++-- .../channel_mutator/dmcp_channel_mutator.py | 2 +- .../counters/op_counters/__init__.py | 8 ++++---- .../counters/op_counters/conv_layer_counter.py | 8 ++++---- .../counters/op_counters/norm_layer_counter.py | 6 ++++-- tests/test_models/test_algorithms/test_dmcp.py | 2 +- 16 files changed, 48 insertions(+), 49 deletions(-) diff --git a/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py b/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py index 52709dc38..eb5cf7e49 100644 --- a/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py +++ b/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py @@ -62,4 +62,4 @@ model_wrapper_cfg = dict( type='mmrazor.DMCPDDP', broadcast_buffers=False, - find_unused_parameters=True) \ No newline at end of file + find_unused_parameters=True) diff --git a/mmrazor/engine/__init__.py b/mmrazor/engine/__init__.py index ef851673f..0bd97c24d 100644 --- a/mmrazor/engine/__init__.py +++ b/mmrazor/engine/__init__.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .hooks import DumpSubnetHook, EstimateResourcesHook, DMCPSubnetHook +from .hooks import DMCPSubnetHook, DumpSubnetHook, EstimateResourcesHook from .optimizers import SeparateOptimWrapperConstructor from .runner import (DartsEpochBasedTrainLoop, DartsIterBasedTrainLoop, EvolutionSearchLoop, GreedySamplerTrainLoop, diff --git a/mmrazor/engine/hooks/dmcp_subnet_hook.py b/mmrazor/engine/hooks/dmcp_subnet_hook.py index e3060eda3..9cb567dd4 100644 --- a/mmrazor/engine/hooks/dmcp_subnet_hook.py +++ b/mmrazor/engine/hooks/dmcp_subnet_hook.py @@ -28,7 +28,6 @@ def _save_subnet(self, arch_space_dict, save_path): with open(save_path, 'w') as file: file.write(yaml.dump(_cfg, allow_unicode=True)) - @master_only def after_run(self, runner): model = getattr(runner.model, 'module', runner.model) runner.logger.info('Sampling...') @@ -53,8 +52,8 @@ def after_run(self, runner): runner.logger.info( f'Excepted sample(ES) arch with FlOP(MB):{cur_flops}') else: - save_path = os.path.join(root_dir, - 'subnet_{}.yaml'.format(i + 1)) + save_path = os.path.join( + root_dir, 'subnet_{}.yaml'.format(i + 1)) runner.logger.info( f'Driect sample(DS) arch with FlOP(MB): {cur_flops}') self._save_subnet(model.mutator.current_choices, save_path) diff --git a/mmrazor/models/algorithms/pruning/__init__.py b/mmrazor/models/algorithms/pruning/__init__.py index d54d32b14..4958938a7 100644 --- a/mmrazor/models/algorithms/pruning/__init__.py +++ b/mmrazor/models/algorithms/pruning/__init__.py @@ -3,4 +3,6 @@ from .dmcp import DMCP, DMCPDDP from .slimmable_network import SlimmableNetwork, SlimmableNetworkDDP -__all__ = ['SlimmableNetwork', 'SlimmableNetworkDDP', 'DCFF', 'DMCP', 'DMCPDDP'] +__all__ = [ + 'SlimmableNetwork', 'SlimmableNetworkDDP', 'DCFF', 'DMCP', 'DMCPDDP' + ] diff --git a/mmrazor/models/algorithms/pruning/dmcp.py b/mmrazor/models/algorithms/pruning/dmcp.py index 457755f9a..58547b9eb 100644 --- a/mmrazor/models/algorithms/pruning/dmcp.py +++ b/mmrazor/models/algorithms/pruning/dmcp.py @@ -23,14 +23,14 @@ VALID_DISTILLER_TYPE = Union[ConfigurableDistiller, Dict, Any] -from mmrazor.models.mutators import DMCPChannelMutator -from mmrazor.models.mutators import ChannelMutator +from mmrazor.models.mutators import ChannelMutator, DMCPChannelMutator LossResults = Dict[str, torch.Tensor] TensorResults = Union[Tuple[torch.Tensor], torch.Tensor] PredictResults = List[BaseDataElement] ForwardResults = Union[LossResults, TensorResults, PredictResults] + @MODELS.register_module() class DMCP(BaseAlgorithm): @@ -255,7 +255,7 @@ def _compute_flops_loss(self, expected_flops): def calc_current_flops(self): estimator = ResourceEstimator(units=None) model = getattr(self, 'module', self) - estimation = estimator.estimate(model=model.architecture.backbone,\) + estimation = estimator.estimate(model=model.architecture.backbone) return estimation['flops'] def forward(self, diff --git a/mmrazor/models/architectures/dynamic_ops/bricks/__init__.py b/mmrazor/models/architectures/dynamic_ops/bricks/__init__.py index c41dd0897..6992be443 100644 --- a/mmrazor/models/architectures/dynamic_ops/bricks/__init__.py +++ b/mmrazor/models/architectures/dynamic_ops/bricks/__init__.py @@ -4,9 +4,9 @@ from .dynamic_embed import DynamicPatchEmbed from .dynamic_linear import DynamicLinear from .dynamic_multi_head_attention import DynamicMultiheadAttention -from .dynamic_norm import (DynamicBatchNorm1d, DynamicBatchNorm2d, - DynamicBatchNorm3d, DynamicLayerNorm, - SwitchableBatchNorm2d, DMCPBatchNorm2d) +from .dynamic_norm import (DMCPBatchNorm2d, DynamicBatchNorm1d, + DynamicBatchNorm2d, DynamicBatchNorm3d, + DynamicLayerNorm, SwitchableBatchNorm2d) from .dynamic_relative_position import DynamicRelativePosition2D __all__ = [ diff --git a/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py b/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py index 0519df80a..4aecca7aa 100644 --- a/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py +++ b/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py @@ -17,6 +17,7 @@ PartialType = Callable[[Any, Optional[nn.Parameter]], Tuple] + class _DynamicBatchNorm(_BatchNorm, DynamicBatchNormMixin): """Dynamic BatchNormxd OP. @@ -418,10 +419,8 @@ def convert_from(cls, module: _BatchNorm): def forward(self, input: Tensor, - arch_param = None, - arch_attr = None): - # arch_param: Optional[nn.Parameter] = None, - # arch_attr: Optional[Tuple] = None) -> Tensor: + arch_param=None, + arch_attr=None): out = self.forward_batchnorm(input) if arch_param is not None: out = self.forward_arch_param(out, arch_param, arch_attr) @@ -464,7 +463,7 @@ def forward_batchnorm(self, input: Tensor) -> Tensor: self.running_var.masked_scatter_(out_mask, running_var) return out - + def forward_arch_param(self, input: Tensor, arch_param, arch_attr): size_x = input.size() (group_size, num_groups, min_ch) = arch_attr @@ -484,12 +483,13 @@ def forward_arch_param(self, input: Tensor, arch_param, arch_attr): tp_group_x = tp_group_x.view(num_groups, -1) * prob[:num_groups] tp_group_x = tp_group_x.view(size_tp_group) - out = torch.cat([tp_x[:min_ch], - tp_group_x]).transpose(0, 1).contiguous() + out = torch.cat( + [tp_x[:min_ch], tp_group_x]).transpose(0, 1).contiguous() return out - def set_forward_args(self, arch_param: nn.Parameter, arch_attr:Tuple) -> None: + def set_forward_args( + self, arch_param: nn.Parameter, arch_attr: Tuple) -> None: """Interface for modifying the arch_param using partial.""" forward_with_default_args: PartialType = \ partial(self.forward, arch_param=arch_param, arch_attr=arch_attr) - setattr(self, 'forward', forward_with_default_args) \ No newline at end of file + setattr(self, 'forward', forward_with_default_args) diff --git a/mmrazor/models/mutables/__init__.py b/mmrazor/models/mutables/__init__.py index 3efba7e75..baf12b092 100644 --- a/mmrazor/models/mutables/__init__.py +++ b/mmrazor/models/mutables/__init__.py @@ -5,11 +5,11 @@ OneShotMutableChannel, SimpleMutableChannel, SquentialMutableChannel) from .mutable_channel.units import (ChannelUnitType, DCFFChannelUnit, - L1MutableChannelUnit, MutableChannelUnit, + DMCPChannelUnit, L1MutableChannelUnit, + MutableChannelUnit, OneShotMutableChannelUnit, SequentialMutableChannelUnit, - SlimmableChannelUnit, - DMCPChannelUnit) + SlimmableChannelUnit) from .mutable_module import (DiffChoiceRoute, DiffMutableModule, DiffMutableOP, OneHotMutableOP, OneShotMutableModule, OneShotMutableOP) diff --git a/mmrazor/models/mutables/mutable_channel/__init__.py b/mmrazor/models/mutables/mutable_channel/__init__.py index da4e1370b..1dd78cb69 100644 --- a/mmrazor/models/mutables/mutable_channel/__init__.py +++ b/mmrazor/models/mutables/mutable_channel/__init__.py @@ -4,10 +4,10 @@ from .oneshot_mutable_channel import OneShotMutableChannel from .sequential_mutable_channel import SquentialMutableChannel from .simple_mutable_channel import SimpleMutableChannel -from .units import (ChannelUnitType, DCFFChannelUnit, L1MutableChannelUnit, - MutableChannelUnit, OneShotMutableChannelUnit, - SequentialMutableChannelUnit, SlimmableChannelUnit, - DMCPChannelUnit) +from .units import (ChannelUnitType, DCFFChannelUnit, DMCPChannelUnit, + L1MutableChannelUnit, MutableChannelUnit, + OneShotMutableChannelUnit, SequentialMutableChannelUnit, + SlimmableChannelUnit) __all__ = [ 'SimpleMutableChannel', 'L1MutableChannelUnit', diff --git a/mmrazor/models/mutables/mutable_channel/units/dmcp_channel_unit.py b/mmrazor/models/mutables/mutable_channel/units/dmcp_channel_unit.py index 51b6798df..f069a7f7e 100644 --- a/mmrazor/models/mutables/mutable_channel/units/dmcp_channel_unit.py +++ b/mmrazor/models/mutables/mutable_channel/units/dmcp_channel_unit.py @@ -1,6 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import List, Union - import torch.nn as nn from mmrazor.models.architectures import dynamic_ops @@ -17,10 +15,6 @@ class DMCPChannelUnit(SequentialMutableChannelUnit): Args: num_channels (int): The raw number of channels. - candidate_choices (List[Union[int, float]], optional): - A list of candidate width numbers or ratios. Each - candidate indicates how many channels to be reserved. - Defaults to [1.0](choice_mode='number'). choice_mode (str, optional): Mode of candidates. One of "ratio" or "number". Defaults to 'ratio'. divisor (int): Used to make choice divisible. @@ -39,7 +33,8 @@ def __init__(self, self.mutable_channel.activated_tensor_channels = None def prepare_for_pruning(self, model: nn.Module): - """In ``DMCPChannelGroup`` nn.BatchNorm2d is replaced with MixedBatchNorm2d.""" + """In ``DMCPChannelGroup`` nn.BatchNorm2d is replaced with + DMCPBatchNorm2d.""" self._replace_with_dynamic_ops( model, { nn.Conv2d: dynamic_ops.DynamicConv2d, diff --git a/mmrazor/models/mutators/__init__.py b/mmrazor/models/mutators/__init__.py index dab7a955b..0bb318dd5 100644 --- a/mmrazor/models/mutators/__init__.py +++ b/mmrazor/models/mutators/__init__.py @@ -1,6 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .channel_mutator import (ChannelMutator, DCFFChannelMutator, DMCPChannelMutator, - OneShotChannelMutator, SlimmableChannelMutator) +from .channel_mutator import (ChannelMutator, DCFFChannelMutator, + DMCPChannelMutator, OneShotChannelMutator, + SlimmableChannelMutator) from .module_mutator import (DiffModuleMutator, ModuleMutator, OneShotModuleMutator) from .value_mutator import DynamicValueMutator, ValueMutator diff --git a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py index 6249ec976..ad664a93e 100644 --- a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py @@ -54,7 +54,7 @@ def _build_arch_param(self, num_choices) -> nn.Parameter: """Build learnable architecture parameters.""" return nn.Parameter(torch.zeros(num_choices)) - def prepare_arch_params(self, supernet; Module) -> None: + def prepare_arch_params(self, supernet: Module) -> None: # Associate all the op's in the model with their corresponding arch parameters self.arch_params = nn.ParameterDict() self._op_arch_align = dict() diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/__init__.py b/mmrazor/models/task_modules/estimators/counters/op_counters/__init__.py index 9044c9788..53e19a709 100644 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/__init__.py +++ b/mmrazor/models/task_modules/estimators/counters/op_counters/__init__.py @@ -7,10 +7,10 @@ from .deconv_layer_counter import ConvTranspose2dCounter from .linear_layer_counter import LinearCounter from .norm_layer_counter import (BatchNorm1dCounter, BatchNorm2dCounter, - BatchNorm3dCounter, GroupNormCounter, - InstanceNorm1dCounter, InstanceNorm2dCounter, - InstanceNorm3dCounter, LayerNormCounter, - DMCPBatchNorm2dCounter) + BatchNorm3dCounter, DMCPBatchNorm2dCounter, + GroupNormCounter, InstanceNorm1dCounter, + InstanceNorm2dCounter, InstanceNorm3dCounter, + LayerNormCounter) from .pooling_layer_counter import * # noqa: F403, F405, F401 from .upsample_layer_counter import UpsampleCounter diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py b/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py index 18cd8d5a6..d098439cd 100644 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py +++ b/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py @@ -75,11 +75,11 @@ def add_count_hook(module: nn.Conv2d, input, output): kernel_dims = list(module.kernel_size) - mutable_channel = list(module.mutable_attrs['out_channels'].mutable_channels.values()) + out_channels = module.mutable_attrs['out_channels'].activated_channels + mutable_channel = list( + module.mutable_attrs['out_channels'].mutable_channels.values()) if hasattr(mutable_channel[0], 'activated_tensor_channels'): out_channels = mutable_channel[0].activated_tensor_channels - else: - out_channels = module.mutable_attrs['out_channels'].activated_channels in_channels = module.mutable_attrs['in_channels'].activated_channels groups = module.groups @@ -102,4 +102,4 @@ def add_count_hook(module: nn.Conv2d, input, output): overall_flops = overall_conv_flops + bias_flops module.__flops__ += overall_flops - module.__params__ += int(overall_params) \ No newline at end of file + module.__params__ += int(overall_params) diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py b/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py index 1a510e0b9..f3bce9151 100644 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py +++ b/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py @@ -11,6 +11,7 @@ class BNCounter(BaseCounter): @staticmethod def add_count_hook(module, input, output): + """Calculate FLOPs and params based on the size of input & output.""" input = input[0] batch_flops = np.prod(input.shape) if getattr(module, 'affine', False): @@ -70,14 +71,15 @@ class GroupNormCounter(BNCounter): @TASK_UTILS.register_module() class DMCPBatchNorm2dCounter(BNCounter): """FLOPs/params counter for DynamicBatchNorm2d module.""" - + @staticmethod def add_count_hook(module, input, output): """Calculate FLOPs and params based on the size of input & output.""" input = input[0] B, C, H, W = input.shape - mutable_channel = list(module.mutable_attrs['num_features'].mutable_channels.values()) + mutable_channel = list( + module.mutable_attrs['num_features'].mutable_channels.values()) if hasattr(mutable_channel[0], 'activated_tensor_channels'): C = mutable_channel[0].activated_tensor_channels diff --git a/tests/test_models/test_algorithms/test_dmcp.py b/tests/test_models/test_algorithms/test_dmcp.py index c68e8c2c1..9b5c983b9 100644 --- a/tests/test_models/test_algorithms/test_dmcp.py +++ b/tests/test_models/test_algorithms/test_dmcp.py @@ -13,7 +13,7 @@ channel_unit_cfg={ 'type': 'DMCPChannelUnit' }, - parse_cfg: Dict = dict( + parse_cfg = dict( type='ChannelAnalyzer', demo_input=(1, 3, 224, 224), tracer_type='BackwardTracer'),)) From c6a01c3abe4bbb36db3cee58e0989890f98a0627 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Mon, 9 Jan 2023 15:57:47 +0800 Subject: [PATCH 12/59] fix ci --- configs/pruning/mmcls/dmcp/README.md | 10 +- mmrazor/engine/hooks/dmcp_subnet_hook.py | 3 +- mmrazor/models/algorithms/pruning/dmcp.py | 101 +++++++++--------- .../dynamic_ops/bricks/dynamic_norm.py | 4 +- .../channel_mutator/dmcp_channel_mutator.py | 35 +++--- .../test_models/test_algorithms/test_dmcp.py | 2 +- 6 files changed, 80 insertions(+), 75 deletions(-) diff --git a/configs/pruning/mmcls/dmcp/README.md b/configs/pruning/mmcls/dmcp/README.md index 3e4bd3850..7e5f774a5 100644 --- a/configs/pruning/mmcls/dmcp/README.md +++ b/configs/pruning/mmcls/dmcp/README.md @@ -3,7 +3,7 @@ ## Abstract -Recent works imply that the channel pruning can be regarded as searching optimal sub-structure from unpruned networks. However, existing works based on this observation require training and evaluating a large number of structures, which limits their application. In this paper, we propose a novel differentiable method for channel pruning, named Differentiable Markov Channel Pruning (DMCP), to efficiently search the optimal sub-structure. Our method is differentiable and can be directly optimized by gradient descent with respect to standard task loss and budget regularization (e.g. FLOPs constraint). In DMCP, we model the channel pruning as a Markov process, in which each state represnets for retaining the corresponding channel during pruning, and transitions between states denote the pruning process. In the end, our method is able to implicitly select the proper number of channels in each layer by the Markov process with optimized transitions. To validate the effectiveness of our method, we perform extensive experiments on Imagenet with ResNet and MobilenetV2. Results show our method can achieve consistent improvement than stateof-the-art pruning methods in various FLOPs settings. +Recent works imply that the channel pruning can be regarded as searching optimal sub-structure from unpruned networks. However, existing works based on this observation require training and evaluating a large number of structures, which limits their application. In this paper, we propose a novel differentiable method for channel pruning, named Differentiable Markov Channel Pruning (DMCP), to efficiently search the optimal sub-structure. Our method is differentiable and can be directly optimized by gradient descent with respect to standard task loss and budget regularization (e.g. FLOPs constraint). In DMCP, we model the channel pruning as a Markov process, in which each state represents for retaining the corresponding channel during pruning, and transitions between states denote the pruning process. In the end, our method is able to implicitly select the proper number of channels in each layer by the Markov process with optimized transitions. To validate the effectiveness of our method, we perform extensive experiments on Imagenet with ResNet and MobilenetV2. Results show our method can achieve consistent improvement than stateof-the-art pruning methods in various FLOPs settings. @@ -27,16 +27,16 @@ Recent works imply that the channel pruning can be regarded as searching optimal ## Getting Started -#### Train DMCP from scrach +#### Train DMCP from scratch ```bash -sh tools/slurm_train.sh $PARTION $JOB_NAME \ +sh tools/slurm_train.sh $PARTITION $JOB_NAME \ configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py \ --work-dir $WORK_DIR ``` #### After the previous steps, retrain the selected sub-network based on #### the output structure 'DMCP_SUBNET_IMAGENET.yaml' ```bash -sh tools/slurm_train.sh $PARTION $JOB_NAME \ +sh tools/slurm_train.sh $PARTITION $JOB_NAME \ configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py \ --work-dir $WORK_DIR -``` \ No newline at end of file +``` diff --git a/mmrazor/engine/hooks/dmcp_subnet_hook.py b/mmrazor/engine/hooks/dmcp_subnet_hook.py index 9cb567dd4..a2d7f37b1 100644 --- a/mmrazor/engine/hooks/dmcp_subnet_hook.py +++ b/mmrazor/engine/hooks/dmcp_subnet_hook.py @@ -1,9 +1,8 @@ # Copyright (c) OpenMMLab. All rights reserved. import os -import yaml from typing import Optional, Sequence -from mmengine.dist import master_only +import yaml from mmengine.hooks import Hook from mmengine.registry import HOOKS diff --git a/mmrazor/models/algorithms/pruning/dmcp.py b/mmrazor/models/algorithms/pruning/dmcp.py index 58547b9eb..d791648c9 100644 --- a/mmrazor/models/algorithms/pruning/dmcp.py +++ b/mmrazor/models/algorithms/pruning/dmcp.py @@ -1,30 +1,25 @@ import os +import random from typing import Any, Dict, List, Optional, Tuple, Union -import random -from typing_extensions import Self import torch import yaml - -from mmengine import MessageHub, MMLogger +from mmengine import MessageHub from mmengine.model import BaseModel, MMDistributedDataParallel from mmengine.optim import OptimWrapper from mmengine.structures import BaseDataElement from torch import nn from mmrazor.models.distillers import ConfigurableDistiller -from mmrazor.models.mutators.base_mutator import BaseMutator +from mmrazor.models.mutators import ChannelMutator, DMCPChannelMutator from mmrazor.models.utils import add_prefix from mmrazor.registry import MODEL_WRAPPERS, MODELS from mmrazor.utils import ValidFixMutable -from mmrazor.structures.subnet.fix_subnet import _dynamic_to_static -from ..base import BaseAlgorithm from ...task_modules.estimators import ResourceEstimator +from ..base import BaseAlgorithm VALID_DISTILLER_TYPE = Union[ConfigurableDistiller, Dict, Any] -from mmrazor.models.mutators import ChannelMutator, DMCPChannelMutator - LossResults = Dict[str, torch.Tensor] TensorResults = Union[Tuple[torch.Tensor], torch.Tensor] PredictResults = List[BaseDataElement] @@ -42,16 +37,17 @@ def __init__(self, channel_unit_cfg=dict(type='DMCPChannelUnit')), fix_subnet: Optional[ValidFixMutable] = None, data_preprocessor: Optional[Union[Dict, nn.Module]] = None, - strategy: List = ['max', 'min', 'scheduled_random', 'arch_random'], + strategy: List = \ + ['max', 'min', 'scheduled_random', 'arch_random'], init_cfg: Optional[Dict] = None, - arch_start_train=10000, + arch_start_train=10000, arch_train_freq=500, - distillation_times=2000, - target_flops=150, # MFLOPs + distillation_times=2000, + target_flops=150, flops_loss_type: str = 'log_l1', flop_loss_weight: float = 1.0) -> None: super().__init__(architecture, data_preprocessor, init_cfg) - + self.arch_start_train = arch_start_train self.strategy = strategy self.distillation_times = distillation_times @@ -76,7 +72,7 @@ def __init__(self, else: self.is_supernet = True - def _load_fix_subnet(self, save_path): + def _load_fix_subnet(self, save_path): from mmrazor.structures import load_fix_subnet with open(save_path) as file: self.mutator.set_choices(yaml.load(file.read())) @@ -104,7 +100,7 @@ def train_step(self, data: List[dict], optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]: if not self.arch_train and \ - self._iter > self.arch_start_train: + self._iter > self.arch_start_train: self.arch_train = True if self.is_supernet: @@ -123,7 +119,8 @@ def distill_step( subnet_losses.update(soft_loss) parsed_subnet_losses, _ = self.parse_losses(subnet_losses) - optim_wrapper['architecture'].update_params(parsed_subnet_losses) + optim_wrapper['architecture'].update_params( + parsed_subnet_losses) return subnet_losses @@ -131,9 +128,9 @@ def distill_step( data, True).values() total_losses = dict() - #update model parameters + # update model parameters for kind in self.strategy: - if kind in ('max'): + if kind in ('max'): self.set_subnet(mode='max') with optim_wrapper['architecture'].optim_context( self @@ -142,22 +139,26 @@ def distill_step( batch_inputs, data_samples, mode='loss') parsed_max_subnet_losses, _ = self.parse_losses( max_subnet_losses) - optim_wrapper['architecture'].update_params(parsed_max_subnet_losses) - total_losses.update(add_prefix(max_subnet_losses, 'max_subnet')) + optim_wrapper['architecture'].update_params( + parsed_max_subnet_losses) + total_losses.update( + add_prefix(max_subnet_losses, 'max_subnet')) elif kind in ('min'): self.set_subnet(mode='min') - min_subnet_losses = distill_step(batch_inputs, data_samples) - total_losses.update(add_prefix(min_subnet_losses, 'min_subnet')) + min_subnet_losses =\ + distill_step(batch_inputs, data_samples) + total_losses.update( + add_prefix(min_subnet_losses, 'min_subnet')) elif kind in ('arch_random'): if self.arch_train: self.set_subnet(mode='direct') direct_subnet_losses = distill_step( batch_inputs, data_samples) total_losses.update( - add_prefix(direct_subnet_losses, + add_prefix(direct_subnet_losses, 'direct_subnet')) else: - self.set_subnet(mode='random') + self.set_subnet(mode='random') random_subnet_losses = distill_step( batch_inputs, data_samples) total_losses.update( @@ -170,10 +171,10 @@ def distill_step( direct_subnet_losses = distill_step( batch_inputs, data_samples) total_losses.update( - add_prefix(direct_subnet_losses, + add_prefix(direct_subnet_losses, 'direct_subnet')) else: - self.set_subnet(mode='random') + self.set_subnet(mode='random') random_subnet_losses = distill_step( batch_inputs, data_samples) total_losses.update( @@ -181,9 +182,9 @@ def distill_step( 'random_subnet')) self.cur_sample_prob *= 0.9999 - #update arch parameters + # update arch parameters if self.arch_train \ - and self._iter % self.arch_train_freq == 0: + and self._iter % self.arch_train_freq == 0: with optim_wrapper['mutator'].optim_context(self): optim_wrapper['mutator'].zero_grad() mutator_loss = self._update_arch_params( @@ -214,12 +215,12 @@ def _update_arch_params( expected_flops = self.calc_current_flops() flops_loss = self._compute_flops_loss(expected_flops).to( arch_loss['loss'].device) - parsed_flops_loss, _ = self.parse_losses({'loss':flops_loss}) + parsed_flops_loss, _ = self.parse_losses({'loss': flops_loss}) optim_wrapper['mutator'].update_params(parsed_flops_loss) - arch_params_loss.update(add_prefix({'loss':flops_loss}, 'flops')) + arch_params_loss.update(add_prefix({'loss': flops_loss}, 'flops')) self.train() return arch_params_loss - + def _compute_flops_loss(self, expected_flops): """Calculation of loss functions of arch parameters. @@ -292,7 +293,7 @@ def train_step(self, data: List[dict], optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]: if not self.module.arch_train and \ - self.module._iter > self.module.arch_start_train: + self.module._iter > self.module.arch_start_train: self.module.arch_train = True if self.module.is_supernet: @@ -302,7 +303,7 @@ def distill_step( subnet_losses = dict() with optim_wrapper['architecture'].optim_context( self - ), self.module.distiller.student_recorders: # type: ignore + ), self.module.distiller.student_recorders: hard_loss = self(batch_inputs, data_samples, mode='loss') soft_loss = self.module.distiller.compute_distill_losses() @@ -310,18 +311,20 @@ def distill_step( if self.module._iter > self.module.distillation_times: subnet_losses.update(soft_loss) - parsed_subnet_losses, _ = self.module.parse_losses(subnet_losses) - optim_wrapper['architecture'].update_params(parsed_subnet_losses) + parsed_subnet_losses, _ = \ + self.module.parse_losses(subnet_losses) + optim_wrapper['architecture'].update_params( + parsed_subnet_losses) return subnet_losses batch_inputs, data_samples = self.module.data_preprocessor( data, True).values() total_losses = dict() - #update model parameters + # update model parameters max_net_num = min_net_num = random_net_num = direct_net_num = 1 for kind in self.module.strategy: - if kind in ('max'): + if kind in ('max'): self.module.set_subnet(mode='max') with optim_wrapper['architecture'].optim_context( self @@ -330,13 +333,15 @@ def distill_step( batch_inputs, data_samples, mode='loss') parsed_max_subnet_losses, _ = self.module.parse_losses( max_subnet_losses) - optim_wrapper['architecture'].update_params(parsed_max_subnet_losses) + optim_wrapper['architecture'].update_params( + parsed_max_subnet_losses) total_losses.update(add_prefix(max_subnet_losses, f'max_subnet{max_net_num}')) max_net_num += 1 elif kind in ('min'): self.module.set_subnet(mode='min') - min_subnet_losses = distill_step(batch_inputs, data_samples) + min_subnet_losses = distill_step( + batch_inputs, data_samples) total_losses.update(add_prefix(min_subnet_losses, f'min_subnet{min_net_num}')) min_net_num += 1 @@ -346,11 +351,11 @@ def distill_step( direct_subnet_losses = distill_step( batch_inputs, data_samples) total_losses.update( - add_prefix(direct_subnet_losses, + add_prefix(direct_subnet_losses, f'direct_subnet{direct_net_num}')) direct_net_num += 1 else: - self.module.set_subnet(mode='random') + self.module.set_subnet(mode='random') random_subnet_losses = distill_step( batch_inputs, data_samples) total_losses.update( @@ -364,11 +369,11 @@ def distill_step( direct_subnet_losses = distill_step( batch_inputs, data_samples) total_losses.update( - add_prefix(direct_subnet_losses, + add_prefix(direct_subnet_losses, f'direct_subnet{direct_net_num}')) direct_net_num += 1 else: - self.module.set_subnet(mode='random') + self.module.set_subnet(mode='random') random_subnet_losses = distill_step( batch_inputs, data_samples) total_losses.update( @@ -378,14 +383,14 @@ def distill_step( self.module.cur_sample_prob *= 0.9999 with optim_wrapper['mutator'].optim_context(self): - optim_wrapper['mutator'].zero_grad() - mutator_loss = self.module._update_arch_params( - batch_inputs, data_samples, optim_wrapper, mode='loss') + optim_wrapper['mutator'].zero_grad() + mutator_loss = self.module._update_arch_params( + batch_inputs, data_samples, optim_wrapper, mode='loss') total_losses.update(mutator_loss) # update arch parameters if self.module.arch_train \ - and self.module._iter % self.画.arch_train_freq == 0: + and self.module._iter % self.module.arch_train_freq == 0: with optim_wrapper['mutator'].optim_context(self): optim_wrapper['mutator'].zero_grad() mutator_loss = self.module._update_arch_params( diff --git a/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py b/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py index 4aecca7aa..78e4fddcc 100644 --- a/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py +++ b/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Any, Callable, Dict, List, Optional, Tuple from functools import partial +from typing import Any, Callable, Dict, List, Optional, Tuple import torch import torch.nn as nn @@ -488,7 +488,7 @@ def forward_arch_param(self, input: Tensor, arch_param, arch_attr): return out def set_forward_args( - self, arch_param: nn.Parameter, arch_attr: Tuple) -> None: + self, arch_param: nn.Parameter, arch_attr: Tuple) -> None: """Interface for modifying the arch_param using partial.""" forward_with_default_args: PartialType = \ partial(self.forward, arch_param=arch_param, arch_attr=arch_attr) diff --git a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py index ad664a93e..97e451218 100644 --- a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py @@ -1,15 +1,17 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Any, Dict, Type, Union import random +from typing import Any, Dict, Type, Union import torch import torch.nn as nn from torch import tensor -from torch.nn import Module, ModuleList +from torch.nn import Module + from mmrazor.models.mutables import DMCPChannelUnit from mmrazor.registry import MODELS -from .channel_mutator import ChannelMutator, ChannelUnitType from ...architectures import DMCPBatchNorm2d +from .channel_mutator import ChannelMutator, ChannelUnitType + @MODELS.register_module() @@ -24,7 +26,7 @@ class DMCPChannelMutator(ChannelMutator[DMCPChannelUnit]): Defaults to dict( type='BackwardTracer', loss_calculator=dict(type='ImageClassifierPseudoLoss')). Change loss_calculator according to task and backbone. - pruning_cfg (Tuple): (min_sample_rate, max_sample_rate, sample_offset)). + pruning_cfg (Tuple): (min_sample_rate, max_sample_rate, sample_offset) """ def __init__(self, @@ -39,7 +41,6 @@ def __init__(self, super().__init__(channel_unit_cfg, parse_cfg, **kwargs) self.pruning_cfg = pruning_cfg - def prepare_from_supernet(self, supernet: Module) -> None: """Prepare from a model for pruning. @@ -53,26 +54,25 @@ def prepare_from_supernet(self, supernet: Module) -> None: def _build_arch_param(self, num_choices) -> nn.Parameter: """Build learnable architecture parameters.""" return nn.Parameter(torch.zeros(num_choices)) - + def prepare_arch_params(self, supernet: Module) -> None: - # Associate all the op's in the model with their corresponding arch parameters self.arch_params = nn.ParameterDict() self._op_arch_align = dict() self._arch_params_attr = dict() for group_id, module in self.search_groups.items(): - arch_message = self._generate_arch_message(module[0].mutable_channel.num_channels) + arch_message = self._generate_arch_message( + module[0].mutable_channel.num_channels) self._arch_params_attr[str(group_id)] = arch_message group_arch_param = self._build_arch_param(arch_message[1]) self.arch_params[str(group_id)] = group_arch_param for unit in module[0].output_related: self._op_arch_align[str(unit.name)] = str(group_id) - - # Associate all the BN in the model with their corresponding arch parameters + self._bn_arch_align = dict() for name, module in supernet.named_modules(): if isinstance(module, DMCPBatchNorm2d): - self._bn_arch_align[module] = self._op_arch_align[str(name)] + self._bn_arch_align[module] = self._op_arch_align[str(name)] def _generate_arch_message(self, out_channels: int) -> tuple: """ @@ -90,7 +90,7 @@ def _generate_arch_message(self, out_channels: int) -> tuple: min_ch = out_channels - (group_size * num_groups) assert min_ch > 0 assert group_size * num_groups + min_ch == out_channels - + return (group_size, num_groups, min_ch) def modify_supernet_forward(self, arch_train: str) -> bool: @@ -100,7 +100,8 @@ def modify_supernet_forward(self, arch_train: str) -> bool: arch_params_attr = self._arch_params_attr[str(group_id)] else: arch_param = arch_params_attr = None - module.set_forward_args(arch_param=arch_param, arch_attr=arch_params_attr) + module.set_forward_args( + arch_param=arch_param, arch_attr=arch_params_attr) def sample_subnet(self, mode: str, arch_train: bool) -> None: choices = dict() @@ -117,13 +118,14 @@ def _prune_by_arch(self, mode: str, group_id: int) -> Union[int, tensor]: Inputs: mode (list): one of ['max', 'min', 'random', 'direct', 'expected'] group_id (int): number of search_groups - + Outputs: channels (int): for mode 'max'/'min'/'random'/'dirext' channels (tensor): for mode 'expected' """ arch_param = self.arch_params[str(group_id)] - (group_size, num_groups, min_ch) = self._arch_params_attr[str(group_id)] + (group_size, num_groups, min_ch) =\ + self._arch_params_attr[str(group_id)] if mode == 'max': return min_ch + group_size * num_groups @@ -150,7 +152,7 @@ def _prune_by_arch(self, mode: str, group_id: int) -> Union[int, tensor]: return expected_channel else: raise NotImplementedError - + def set_choices(self, choices: Dict[int, Any]) -> None: """Set mutables' current choice according to choices sample by :func:`sample_choices`. @@ -168,4 +170,3 @@ def set_choices(self, choices: Dict[int, Any]) -> None: for module in modules: module.current_choice = choice module.mutable_channel.activated_tensor_channels = choice - diff --git a/tests/test_models/test_algorithms/test_dmcp.py b/tests/test_models/test_algorithms/test_dmcp.py index 9b5c983b9..38530565e 100644 --- a/tests/test_models/test_algorithms/test_dmcp.py +++ b/tests/test_models/test_algorithms/test_dmcp.py @@ -13,7 +13,7 @@ channel_unit_cfg={ 'type': 'DMCPChannelUnit' }, - parse_cfg = dict( + parse_cfg=dict( type='ChannelAnalyzer', demo_input=(1, 3, 224, 224), tracer_type='BackwardTracer'),)) From 8c4d8ea391954ebaa72a15d5f7a97952b98cb71c Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Mon, 9 Jan 2023 16:13:08 +0800 Subject: [PATCH 13/59] fix ci --- mmrazor/models/algorithms/pruning/dmcp.py | 4 ++-- .../mutators/channel_mutator/dmcp_channel_mutator.py | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/mmrazor/models/algorithms/pruning/dmcp.py b/mmrazor/models/algorithms/pruning/dmcp.py index d791648c9..98f0cf141 100644 --- a/mmrazor/models/algorithms/pruning/dmcp.py +++ b/mmrazor/models/algorithms/pruning/dmcp.py @@ -1,3 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. import os import random from typing import Any, Dict, List, Optional, Tuple, Union @@ -37,8 +38,7 @@ def __init__(self, channel_unit_cfg=dict(type='DMCPChannelUnit')), fix_subnet: Optional[ValidFixMutable] = None, data_preprocessor: Optional[Union[Dict, nn.Module]] = None, - strategy: List = \ - ['max', 'min', 'scheduled_random', 'arch_random'], + strategy: List = ['max', 'min', 'arch_random'], init_cfg: Optional[Dict] = None, arch_start_train=10000, arch_train_freq=500, diff --git a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py index 97e451218..056557b36 100644 --- a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py @@ -13,7 +13,6 @@ from .channel_mutator import ChannelMutator, ChannelUnitType - @MODELS.register_module() class DMCPChannelMutator(ChannelMutator[DMCPChannelUnit]): """DMCP channel mutable based channel mutator. It uses DMCPPChannelUnit. @@ -81,6 +80,10 @@ def _generate_arch_message(self, out_channels: int) -> tuple: 1. sampled by pruning rate (that is, maximum, minimum and random pruning rate) 2. sampled by probability + Inputs: + out_channels (int): channel num of conv layers + Outputs: + attr (tuple): (group_size, num_groups, min_ch) """ (min_rate, max_rate, rate_offset) = self.pruning_cfg @@ -103,7 +106,7 @@ def modify_supernet_forward(self, arch_train: str) -> bool: module.set_forward_args( arch_param=arch_param, arch_attr=arch_params_attr) - def sample_subnet(self, mode: str, arch_train: bool) -> None: + def sample_subnet(self, mode: str, arch_train: str) -> None: choices = dict() for group_id, _ in self.search_groups.items(): choices[group_id] = self._prune_by_arch(mode, group_id) From bc4bd39134e3ab1c8a9f25e81bb929d46b8cfa28 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Mon, 9 Jan 2023 17:38:32 +0800 Subject: [PATCH 14/59] fix readme.md --- configs/pruning/mmcls/dmcp/README.md | 21 +++++++++++-------- .../channel_mutator/dmcp_channel_mutator.py | 7 ++++--- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/configs/pruning/mmcls/dmcp/README.md b/configs/pruning/mmcls/dmcp/README.md index 7e5f774a5..94afce66a 100644 --- a/configs/pruning/mmcls/dmcp/README.md +++ b/configs/pruning/mmcls/dmcp/README.md @@ -1,11 +1,8 @@ # DMCP: Differentiable Markov Channel Pruning for Neural Networks - ## Abstract -Recent works imply that the channel pruning can be regarded as searching optimal sub-structure from unpruned networks. However, existing works based on this observation require training and evaluating a large number of structures, which limits their application. In this paper, we propose a novel differentiable method for channel pruning, named Differentiable Markov Channel Pruning (DMCP), to efficiently search the optimal sub-structure. Our method is differentiable and can be directly optimized by gradient descent with respect to standard task loss and budget regularization (e.g. FLOPs constraint). In DMCP, we model the channel pruning as a Markov process, in which each state represents for retaining the corresponding channel during pruning, and transitions between states denote the pruning process. In the end, our method is able to implicitly select the proper number of channels in each layer by the Markov process with optimized transitions. To validate the effectiveness of our method, we perform extensive experiments on Imagenet with ResNet and MobilenetV2. Results show our method can achieve consistent improvement than stateof-the-art pruning methods in various FLOPs settings. - - +Recent works imply that the channel pruning can be regarded as searching optimal sub-structure from unpruned networks. However, existing works based on this observation require training and evaluating a large number of structures, which limits their application. In this paper, we propose a novel differentiable method for channel pruning, named Differentiable Markov Channel Pruning (DMCP), to efficiently search the optimal sub-structure. Our method is differentiable and can be directly optimized by gradient descent with respect to standard task loss and budget regularization (e.g. FLOPs constraint). In DMCP, we model the channel pruning as a Markov process, in which each state represnets for retaining the corresponding channel during pruning, and transitions between states denote the pruning process. In the end, our method is able to implicitly select the proper number of channels in each layer by the Markov process with optimized transitions. To validate the effectiveness of our method, we perform extensive experiments on Imagenet with ResNet and MobilenetV2. Results show our method can achieve consistent improvement than stateof-the-art pruning methods in various FLOPs settings. ## Citation @@ -20,23 +17,29 @@ Recent works imply that the channel pruning can be regarded as searching optimal ``` ## Results and models + ### 1.Classification + |Dataset| Supernet | Flops(M) | Top-1 (%) | Top-5 (%) | Config | Download | |:---------------------:|:---------------------:|:------:|:---------:|:--------:|:---------:|:------:| -|ImageNet| ResNet50 | - | - | - | [config](./dmcp_resnet50_supernet_8xb32.py) | [model] / [arch](./DMCP_SUBNET_IMAGENET.yaml)/ [log] | - +|ImageNet| ResNet50 | - | - | - | [config](./dmcp_resnet50_supernet_8xb32.py) | \[model\] / [arch](./DMCP_SUBNET_IMAGENET.yaml)/ \[log\] | ## Getting Started -#### Train DMCP from scratch + +#### Train DMCP from scrach + ```bash -sh tools/slurm_train.sh $PARTITION $JOB_NAME \ +sh tools/slurm_train.sh $PARTION $JOB_NAME \ configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py \ --work-dir $WORK_DIR ``` + #### After the previous steps, retrain the selected sub-network based on + #### the output structure 'DMCP_SUBNET_IMAGENET.yaml' + ```bash -sh tools/slurm_train.sh $PARTITION $JOB_NAME \ +sh tools/slurm_train.sh $PARTION $JOB_NAME \ configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py \ --work-dir $WORK_DIR ``` diff --git a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py index 056557b36..85a3dc1f5 100644 --- a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py @@ -81,7 +81,7 @@ def _generate_arch_message(self, out_channels: int) -> tuple: pruning rate) 2. sampled by probability Inputs: - out_channels (int): channel num of conv layers + out_channels (int): channel num of conv layers. Outputs: attr (tuple): (group_size, num_groups, min_ch) """ @@ -96,13 +96,14 @@ def _generate_arch_message(self, out_channels: int) -> tuple: return (group_size, num_groups, min_ch) - def modify_supernet_forward(self, arch_train: str) -> bool: + def modify_supernet_forward(self, arch_train: str): for module, group_id in self._bn_arch_align.items(): if arch_train: arch_param = self.arch_params[self._bn_arch_align[module]] arch_params_attr = self._arch_params_attr[str(group_id)] else: - arch_param = arch_params_attr = None + arch_param = None + arch_params_attr = None module.set_forward_args( arch_param=arch_param, arch_attr=arch_params_attr) From 406cdbc2ef6f44c4ca56b116cc913561284356c6 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Mon, 9 Jan 2023 17:58:44 +0800 Subject: [PATCH 15/59] fix readme.md --- configs/pruning/mmcls/dmcp/README.md | 8 ++++---- .../mutators/channel_mutator/dmcp_channel_mutator.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/configs/pruning/mmcls/dmcp/README.md b/configs/pruning/mmcls/dmcp/README.md index 94afce66a..63cef31a6 100644 --- a/configs/pruning/mmcls/dmcp/README.md +++ b/configs/pruning/mmcls/dmcp/README.md @@ -2,7 +2,7 @@ ## Abstract -Recent works imply that the channel pruning can be regarded as searching optimal sub-structure from unpruned networks. However, existing works based on this observation require training and evaluating a large number of structures, which limits their application. In this paper, we propose a novel differentiable method for channel pruning, named Differentiable Markov Channel Pruning (DMCP), to efficiently search the optimal sub-structure. Our method is differentiable and can be directly optimized by gradient descent with respect to standard task loss and budget regularization (e.g. FLOPs constraint). In DMCP, we model the channel pruning as a Markov process, in which each state represnets for retaining the corresponding channel during pruning, and transitions between states denote the pruning process. In the end, our method is able to implicitly select the proper number of channels in each layer by the Markov process with optimized transitions. To validate the effectiveness of our method, we perform extensive experiments on Imagenet with ResNet and MobilenetV2. Results show our method can achieve consistent improvement than stateof-the-art pruning methods in various FLOPs settings. +Recent works imply that the channel pruning can be regarded as searching optimal sub-structure from unpruned networks. However, existing works based on this observation require training and evaluating a large number of structures, which limits their application. In this paper, we propose a novel differentiable method for channel pruning, named Differentiable Markov Channel Pruning (DMCP), to efficiently search the optimal sub-structure. Our method is differentiable and can be directly optimized by gradient descent with respect to standard task loss and budget regularization (e.g. FLOPs constraint). In DMCP, we model the channel pruning as a Markov process, in which each state represents for retaining the corresponding channel during pruning, and transitions between states denote the pruning process. In the end, our method is able to implicitly select the proper number of channels in each layer by the Markov process with optimized transitions. To validate the effectiveness of our method, we perform extensive experiments on Imagenet with ResNet and MobilenetV2. Results show our method can achieve consistent improvement than stateof-the-art pruning methods in various FLOPs settings. ## Citation @@ -26,10 +26,10 @@ Recent works imply that the channel pruning can be regarded as searching optimal ## Getting Started -#### Train DMCP from scrach +#### Train DMCP from scratch ```bash -sh tools/slurm_train.sh $PARTION $JOB_NAME \ +sh tools/slurm_train.sh $PARTITION $JOB_NAME \ configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py \ --work-dir $WORK_DIR ``` @@ -39,7 +39,7 @@ sh tools/slurm_train.sh $PARTION $JOB_NAME \ #### the output structure 'DMCP_SUBNET_IMAGENET.yaml' ```bash -sh tools/slurm_train.sh $PARTION $JOB_NAME \ +sh tools/slurm_train.sh $PARTITION $JOB_NAME \ configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py \ --work-dir $WORK_DIR ``` diff --git a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py index 85a3dc1f5..d793827ec 100644 --- a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py @@ -54,7 +54,7 @@ def _build_arch_param(self, num_choices) -> nn.Parameter: """Build learnable architecture parameters.""" return nn.Parameter(torch.zeros(num_choices)) - def prepare_arch_params(self, supernet: Module) -> None: + def prepare_arch_params(self, supernet: Module): self.arch_params = nn.ParameterDict() self._op_arch_align = dict() self._arch_params_attr = dict() From 20b2bcb6028490ab866756a28ebcd1b8ca215b93 Mon Sep 17 00:00:00 2001 From: Lxtccc Date: Mon, 9 Jan 2023 21:37:39 +0800 Subject: [PATCH 16/59] update --- mmrazor/engine/hooks/dmcp_subnet_hook.py | 9 +-- mmrazor/models/algorithms/pruning/dmcp.py | 72 ++++++++++++------- .../dynamic_ops/bricks/__init__.py | 31 ++++++-- .../dynamic_ops/bricks/dynamic_norm.py | 16 ++--- .../channel_mutator/dmcp_channel_mutator.py | 13 ++-- .../op_counters/conv_layer_counter.py | 1 - .../test_models/test_algorithms/test_dmcp.py | 7 +- 7 files changed, 86 insertions(+), 63 deletions(-) diff --git a/mmrazor/engine/hooks/dmcp_subnet_hook.py b/mmrazor/engine/hooks/dmcp_subnet_hook.py index a2d7f37b1..4a8813df3 100644 --- a/mmrazor/engine/hooks/dmcp_subnet_hook.py +++ b/mmrazor/engine/hooks/dmcp_subnet_hook.py @@ -11,12 +11,9 @@ @HOOKS.register_module() class DMCPSubnetHook(Hook): - priority = 'VERY_LOW' - def __init__(self, - subnet_sample_num: int = 10, - **kwargs) -> None: + def __init__(self, subnet_sample_num: int = 10, **kwargs) -> None: self.subnet_sample_num = subnet_sample_num def _save_subnet(self, arch_space_dict, save_path): @@ -51,8 +48,8 @@ def after_run(self, runner): runner.logger.info( f'Excepted sample(ES) arch with FlOP(MB):{cur_flops}') else: - save_path = os.path.join( - root_dir, 'subnet_{}.yaml'.format(i + 1)) + save_path = os.path.join(root_dir, + 'subnet_{}.yaml'.format(i + 1)) runner.logger.info( f'Driect sample(DS) arch with FlOP(MB): {cur_flops}') self._save_subnet(model.mutator.current_choices, save_path) diff --git a/mmrazor/models/algorithms/pruning/dmcp.py b/mmrazor/models/algorithms/pruning/dmcp.py index 98f0cf141..373048a7d 100644 --- a/mmrazor/models/algorithms/pruning/dmcp.py +++ b/mmrazor/models/algorithms/pruning/dmcp.py @@ -29,6 +29,26 @@ @MODELS.register_module() class DMCP(BaseAlgorithm): + """Implementation of `DMCP `_ + + Args: + architecture (dict|:obj:`BaseModel`): The config of :class:`BaseModel` + or built model. Corresponding to supernet in NAS algorithm. + distiller (VALID_DISTILLER_TYPE): Configs to build a distiller. + fix_subnet (str | dict | :obj:`FixSubnet`): The path of yaml file or + loaded dict or built :obj:`FixSubnet`. Defaults to None. + data_preprocessor (Optional[Union[dict, nn.Module]]): The pre-process + config of :class:`BaseDataPreprocessor`. Defaults to None. + strategy (list): mode of sampled net. + arch_start_train (int): Number of iter to start arch training. + arch_train_freq (int): Frequency of training. Defaults to 500. + distillation_times (int): Number of iter to start arch training. + target_flops (int): Target FLOPs. Default unit: MFLOPs. + flops_loss_type (str): The model used to calculate flops_loss. + flop_loss_weight (float): Weight of flops_loss. + init_cfg (Optional[dict]): Init config for ``BaseModule``. + Defaults to None. + """ def __init__(self, distiller: VALID_DISTILLER_TYPE, @@ -42,7 +62,7 @@ def __init__(self, init_cfg: Optional[Dict] = None, arch_start_train=10000, arch_train_freq=500, - distillation_times=2000, + distillation_times=20000, target_flops=150, flops_loss_type: str = 'log_l1', flop_loss_weight: float = 1.0) -> None: @@ -104,6 +124,7 @@ def train_step(self, data: List[dict], self.arch_train = True if self.is_supernet: + def distill_step( batch_inputs: torch.Tensor, data_samples: List[BaseDataElement] ) -> Dict[str, torch.Tensor]: @@ -124,8 +145,8 @@ def distill_step( return subnet_losses - batch_inputs, data_samples = self.data_preprocessor( - data, True).values() + batch_inputs, data_samples = self.data_preprocessor(data, + True).values() total_losses = dict() # update model parameters @@ -155,15 +176,13 @@ def distill_step( direct_subnet_losses = distill_step( batch_inputs, data_samples) total_losses.update( - add_prefix(direct_subnet_losses, - 'direct_subnet')) + add_prefix(direct_subnet_losses, 'direct_subnet')) else: self.set_subnet(mode='random') random_subnet_losses = distill_step( batch_inputs, data_samples) total_losses.update( - add_prefix(random_subnet_losses, - 'random_subnet')) + add_prefix(random_subnet_losses, 'random_subnet')) elif kind in ('scheduled_random'): if random.uniform(0, 1) > self.cur_sample_prob\ and self.arch_train: @@ -171,15 +190,13 @@ def distill_step( direct_subnet_losses = distill_step( batch_inputs, data_samples) total_losses.update( - add_prefix(direct_subnet_losses, - 'direct_subnet')) + add_prefix(direct_subnet_losses, 'direct_subnet')) else: self.set_subnet(mode='random') random_subnet_losses = distill_step( batch_inputs, data_samples) total_losses.update( - add_prefix(random_subnet_losses, - 'random_subnet')) + add_prefix(random_subnet_losses, 'random_subnet')) self.cur_sample_prob *= 0.9999 # update arch parameters @@ -194,12 +211,11 @@ def distill_step( else: return super().train_step(data, optim_wrapper) - def _update_arch_params( - self, - inputs: torch.Tensor, - data_samples: Optional[List[BaseDataElement]], - optim_wrapper: OptimWrapper, - mode: str = 'loss') -> Dict: + def _update_arch_params(self, + inputs: torch.Tensor, + data_samples: Optional[List[BaseDataElement]], + optim_wrapper: OptimWrapper, + mode: str = 'loss') -> Dict: arch_params_loss = dict() self.eval() # update arch_loss @@ -224,8 +240,8 @@ def _update_arch_params( def _compute_flops_loss(self, expected_flops): """Calculation of loss functions of arch parameters. - Calculate the difference between the expected FLOPs and the - target FLOPs in the units of M. + Calculate the difference between the expected FLOPs and the target + FLOPs in the units of M. """ flops_error = expected_flops - self.target_flops @@ -297,13 +313,13 @@ def train_step(self, data: List[dict], self.module.arch_train = True if self.module.is_supernet: + def distill_step( batch_inputs: torch.Tensor, data_samples: List[BaseDataElement] ) -> Dict[str, torch.Tensor]: subnet_losses = dict() with optim_wrapper['architecture'].optim_context( - self - ), self.module.distiller.student_recorders: + self), self.module.distiller.student_recorders: hard_loss = self(batch_inputs, data_samples, mode='loss') soft_loss = self.module.distiller.compute_distill_losses() @@ -335,15 +351,17 @@ def distill_step( max_subnet_losses) optim_wrapper['architecture'].update_params( parsed_max_subnet_losses) - total_losses.update(add_prefix(max_subnet_losses, - f'max_subnet{max_net_num}')) + total_losses.update( + add_prefix(max_subnet_losses, + f'max_subnet{max_net_num}')) max_net_num += 1 elif kind in ('min'): self.module.set_subnet(mode='min') - min_subnet_losses = distill_step( - batch_inputs, data_samples) - total_losses.update(add_prefix(min_subnet_losses, - f'min_subnet{min_net_num}')) + min_subnet_losses = distill_step(batch_inputs, + data_samples) + total_losses.update( + add_prefix(min_subnet_losses, + f'min_subnet{min_net_num}')) min_net_num += 1 elif kind in ('arch_random'): if self.module.arch_train: diff --git a/mmrazor/models/architectures/dynamic_ops/bricks/__init__.py b/mmrazor/models/architectures/dynamic_ops/bricks/__init__.py index 6992be443..d3abe4fd8 100644 --- a/mmrazor/models/architectures/dynamic_ops/bricks/__init__.py +++ b/mmrazor/models/architectures/dynamic_ops/bricks/__init__.py @@ -1,18 +1,35 @@ # Copyright (c) OpenMMLab. All rights reserved. from .dynamic_container import DynamicSequential -from .dynamic_conv import BigNasConv2d, DynamicConv2d, FuseConv2d, OFAConv2d +from .dynamic_conv import (BigNasConv2d, DynamicConv2d, + DynamicConv2dAdaptivePadding, FuseConv2d, OFAConv2d) from .dynamic_embed import DynamicPatchEmbed +from .dynamic_function import DynamicInputResizer from .dynamic_linear import DynamicLinear from .dynamic_multi_head_attention import DynamicMultiheadAttention from .dynamic_norm import (DMCPBatchNorm2d, DynamicBatchNorm1d, DynamicBatchNorm2d, DynamicBatchNorm3d, - DynamicLayerNorm, SwitchableBatchNorm2d) + DynamicBatchNormXd, DynamicLayerNorm, + DynamicSyncBatchNorm, SwitchableBatchNorm2d) from .dynamic_relative_position import DynamicRelativePosition2D __all__ = [ - 'BigNasConv2d', 'DynamicConv2d', 'OFAConv2d', 'DynamicLinear', - 'DynamicBatchNorm1d', 'DynamicBatchNorm2d', 'DynamicBatchNorm3d', - 'SwitchableBatchNorm2d', 'DynamicSequential', 'DynamicPatchEmbed', - 'DynamicLayerNorm', 'DynamicRelativePosition2D', 'FuseConv2d', - 'DynamicMultiheadAttention', 'DMCPBatchNorm2d' + 'BigNasConv2d', + 'DynamicConv2d', + 'OFAConv2d', + 'DynamicLinear', + 'DynamicBatchNorm1d', + 'DynamicBatchNorm2d', + 'DynamicBatchNorm3d', + 'SwitchableBatchNorm2d', + 'DynamicSequential', + 'DynamicPatchEmbed', + 'DynamicRelativePosition2D', + 'FuseConv2d', + 'DynamicMultiheadAttention', + 'DynamicSyncBatchNorm', + 'DynamicConv2dAdaptivePadding', + 'DynamicBatchNormXd', + 'DynamicInputResizer', + 'DynamicLayerNorm', + 'DMCPBatchNorm2d', ] diff --git a/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py b/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py index 78e4fddcc..7b1aca280 100644 --- a/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py +++ b/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. from functools import partial -from typing import Any, Callable, Dict, List, Optional, Tuple +from typing import Any, Callable, Dict, List, Optional, Tuple, Union import torch import torch.nn as nn @@ -395,7 +395,6 @@ def _check_input_dim(self, input: torch.Tensor): @MODELS.register_module() class DMCPBatchNorm2d(DynamicBatchNorm2d): - accepted_mutable_attrs = {'num_features'} def __init__(self, *args, **kwargs) -> None: @@ -417,10 +416,7 @@ def convert_from(cls, module: _BatchNorm): track_running_stats=module.track_running_stats) return dynamic_bn - def forward(self, - input: Tensor, - arch_param=None, - arch_attr=None): + def forward(self, input: Tensor, arch_param=None, arch_attr=None): out = self.forward_batchnorm(input) if arch_param is not None: out = self.forward_arch_param(out, arch_param, arch_attr) @@ -483,12 +479,12 @@ def forward_arch_param(self, input: Tensor, arch_param, arch_attr): tp_group_x = tp_group_x.view(num_groups, -1) * prob[:num_groups] tp_group_x = tp_group_x.view(size_tp_group) - out = torch.cat( - [tp_x[:min_ch], tp_group_x]).transpose(0, 1).contiguous() + out = torch.cat([tp_x[:min_ch], tp_group_x]).transpose(0, + 1).contiguous() return out - def set_forward_args( - self, arch_param: nn.Parameter, arch_attr: Tuple) -> None: + def set_forward_args(self, arch_param: nn.Parameter, + arch_attr: Union[Tuple, Any]) -> None: """Interface for modifying the arch_param using partial.""" forward_with_default_args: PartialType = \ partial(self.forward, arch_param=arch_param, arch_attr=arch_attr) diff --git a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py index d793827ec..81c107352 100644 --- a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py @@ -1,10 +1,9 @@ # Copyright (c) OpenMMLab. All rights reserved. import random -from typing import Any, Dict, Type, Union +from typing import Any, Dict, Optional, Tuple, Type, Union import torch import torch.nn as nn -from torch import tensor from torch.nn import Module from mmrazor.models.mutables import DMCPChannelUnit @@ -98,12 +97,11 @@ def _generate_arch_message(self, out_channels: int) -> tuple: def modify_supernet_forward(self, arch_train: str): for module, group_id in self._bn_arch_align.items(): + arch_param: Optional[nn.Parameter] = None + arch_params_attr: Optional[Tuple] = None if arch_train: arch_param = self.arch_params[self._bn_arch_align[module]] arch_params_attr = self._arch_params_attr[str(group_id)] - else: - arch_param = None - arch_params_attr = None module.set_forward_args( arch_param=arch_param, arch_attr=arch_params_attr) @@ -115,9 +113,8 @@ def sample_subnet(self, mode: str, arch_train: str) -> None: self.modify_supernet_forward(arch_train) - def _prune_by_arch(self, mode: str, group_id: int) -> Union[int, tensor]: - """ - Prune the output channels according to the specified mode. + def _prune_by_arch(self, mode: str, group_id: int) -> Union[int, Any]: + """Prune the output channels according to the specified mode. Inputs: mode (list): one of ['max', 'min', 'random', 'direct', 'expected'] diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py b/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py index d098439cd..13ef0677c 100644 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py +++ b/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py @@ -67,7 +67,6 @@ class DynamicConv2dCounter(ConvCounter): @staticmethod def add_count_hook(module: nn.Conv2d, input, output): - input = input[0] batch_size = input.shape[0] diff --git a/tests/test_models/test_algorithms/test_dmcp.py b/tests/test_models/test_algorithms/test_dmcp.py index 38530565e..06769fd84 100644 --- a/tests/test_models/test_algorithms/test_dmcp.py +++ b/tests/test_models/test_algorithms/test_dmcp.py @@ -10,13 +10,12 @@ MUTATOR_CFG = dict( channel_mutator=dict( type='mmrazor.DMCPChannelMutator', - channel_unit_cfg={ - 'type': 'DMCPChannelUnit' - }, + channel_unit_cfg={'type': 'DMCPChannelUnit'}, parse_cfg=dict( type='ChannelAnalyzer', demo_input=(1, 3, 224, 224), - tracer_type='BackwardTracer'),)) + tracer_type='BackwardTracer'), + )) DISTILLER_CFG = dict( _scope_='mmrazor', From 60cc3cefb2c5143a824c8c9eaabcbae63d82feee Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Mon, 9 Jan 2023 21:38:46 +0800 Subject: [PATCH 17/59] fix expression --- .../architectures/dynamic_ops/bricks/dynamic_norm.py | 4 +++- .../mutators/channel_mutator/dmcp_channel_mutator.py | 8 +++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py b/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py index c5bbff5ee..45e8cbe6d 100644 --- a/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py +++ b/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py @@ -488,7 +488,9 @@ def forward_arch_param(self, input: Tensor, arch_param, arch_attr): return out def set_forward_args( - self, arch_param: nn.Parameter, arch_attr: Optional[Tuple]) -> None: + self, + arch_param: nn.Parameter, + arch_attr: Optional[Tuple]) -> None: """Interface for modifying the arch_param using partial.""" forward_with_default_args: PartialType = \ partial(self.forward, arch_param=arch_param, arch_attr=arch_attr) diff --git a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py index f653271c7..81c107352 100644 --- a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py @@ -1,10 +1,9 @@ # Copyright (c) OpenMMLab. All rights reserved. import random -from typing import Any, Dict, Type, Union +from typing import Any, Dict, Optional, Tuple, Type, Union import torch import torch.nn as nn -from torch import tensor from torch.nn import Module from mmrazor.models.mutables import DMCPChannelUnit @@ -98,12 +97,11 @@ def _generate_arch_message(self, out_channels: int) -> tuple: def modify_supernet_forward(self, arch_train: str): for module, group_id in self._bn_arch_align.items(): + arch_param: Optional[nn.Parameter] = None + arch_params_attr: Optional[Tuple] = None if arch_train: arch_param = self.arch_params[self._bn_arch_align[module]] arch_params_attr = self._arch_params_attr[str(group_id)] - else: - arch_param = None - arch_params_attr = None module.set_forward_args( arch_param=arch_param, arch_attr=arch_params_attr) From c9715c0f633ba3512505119abf5ae69c2b48c225 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Tue, 10 Jan 2023 15:31:23 +0800 Subject: [PATCH 18/59] fix CI --- configs/pruning/mmcls/dcff/README.md | 6 +++--- configs/pruning/mmcls/dmcp/README.md | 6 +++--- configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py | 1 - configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py | 5 +---- mmrazor/engine/hooks/__init__.py | 6 ++++-- mmrazor/models/algorithms/pruning/__init__.py | 2 +- .../estimators/counters/op_counters/conv_layer_counter.py | 1 + .../estimators/counters/op_counters/norm_layer_counter.py | 2 +- tests/test_models/test_mutators/test_dmcp_mutator.py | 1 + 9 files changed, 15 insertions(+), 15 deletions(-) diff --git a/configs/pruning/mmcls/dcff/README.md b/configs/pruning/mmcls/dcff/README.md index 63cef31a6..fee4fe426 100644 --- a/configs/pruning/mmcls/dcff/README.md +++ b/configs/pruning/mmcls/dcff/README.md @@ -20,9 +20,9 @@ Recent works imply that the channel pruning can be regarded as searching optimal ### 1.Classification -|Dataset| Supernet | Flops(M) | Top-1 (%) | Top-5 (%) | Config | Download | -|:---------------------:|:---------------------:|:------:|:---------:|:--------:|:---------:|:------:| -|ImageNet| ResNet50 | - | - | - | [config](./dmcp_resnet50_supernet_8xb32.py) | \[model\] / [arch](./DMCP_SUBNET_IMAGENET.yaml)/ \[log\] | +| Dataset | Supernet | Flops(M) | Top-1 (%) | Top-5 (%) | Config | Download | +| :------: | :------: | :------: | :-------: | :-------: | :-----------------------------------------: | :------------------------------------------------------: | +| ImageNet | ResNet50 | - | - | - | [config](./dmcp_resnet50_supernet_8xb32.py) | \[model\] / [arch](./DMCP_SUBNET_IMAGENET.yaml)/ \[log\] | ## Getting Started diff --git a/configs/pruning/mmcls/dmcp/README.md b/configs/pruning/mmcls/dmcp/README.md index 63cef31a6..fee4fe426 100644 --- a/configs/pruning/mmcls/dmcp/README.md +++ b/configs/pruning/mmcls/dmcp/README.md @@ -20,9 +20,9 @@ Recent works imply that the channel pruning can be regarded as searching optimal ### 1.Classification -|Dataset| Supernet | Flops(M) | Top-1 (%) | Top-5 (%) | Config | Download | -|:---------------------:|:---------------------:|:------:|:---------:|:--------:|:---------:|:------:| -|ImageNet| ResNet50 | - | - | - | [config](./dmcp_resnet50_supernet_8xb32.py) | \[model\] / [arch](./DMCP_SUBNET_IMAGENET.yaml)/ \[log\] | +| Dataset | Supernet | Flops(M) | Top-1 (%) | Top-5 (%) | Config | Download | +| :------: | :------: | :------: | :-------: | :-------: | :-----------------------------------------: | :------------------------------------------------------: | +| ImageNet | ResNet50 | - | - | - | [config](./dmcp_resnet50_supernet_8xb32.py) | \[model\] / [arch](./DMCP_SUBNET_IMAGENET.yaml)/ \[log\] | ## Getting Started diff --git a/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py b/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py index b346e9b63..c835aedd1 100644 --- a/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py +++ b/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py @@ -1,6 +1,5 @@ _base_ = ['dmcp_resnet_8xb32.py'] - _base_.optim_wrapper = dict( optimizer=dict(type='SGD', lr=0.25, momentum=0.9, weight_decay=0.0001)) diff --git a/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py b/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py index eb5cf7e49..b8fcddcc3 100644 --- a/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py +++ b/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py @@ -17,10 +17,7 @@ param_scheduler = dict( type='MultiStepLR', by_epoch=True, milestones=[30, 60, 90], gamma=0.1) -train_cfg = dict( - by_epoch=True, - max_epochs=120, - val_interval=1) +train_cfg = dict(by_epoch=True, max_epochs=120, val_interval=1) data_preprocessor = {'type': 'mmcls.ClsDataPreprocessor'} diff --git a/mmrazor/engine/hooks/__init__.py b/mmrazor/engine/hooks/__init__.py index 64326ec56..193165ec2 100644 --- a/mmrazor/engine/hooks/__init__.py +++ b/mmrazor/engine/hooks/__init__.py @@ -4,5 +4,7 @@ from .estimate_resources_hook import EstimateResourcesHook from .visualization_hook import RazorVisualizationHook -__all__ = ['DumpSubnetHook', 'EstimateResourcesHook', 'RazorVisualizationHook', - 'DMCPSubnetHook'] +__all__ = [ + 'DumpSubnetHook', 'EstimateResourcesHook', 'RazorVisualizationHook', + 'DMCPSubnetHook' +] diff --git a/mmrazor/models/algorithms/pruning/__init__.py b/mmrazor/models/algorithms/pruning/__init__.py index 4958938a7..09a87b90d 100644 --- a/mmrazor/models/algorithms/pruning/__init__.py +++ b/mmrazor/models/algorithms/pruning/__init__.py @@ -5,4 +5,4 @@ __all__ = [ 'SlimmableNetwork', 'SlimmableNetworkDDP', 'DCFF', 'DMCP', 'DMCPDDP' - ] +] diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py b/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py index 05deffadd..13ef0677c 100644 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py +++ b/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py @@ -64,6 +64,7 @@ class Conv3dCounter(ConvCounter): @TASK_UTILS.register_module() class DynamicConv2dCounter(ConvCounter): + @staticmethod def add_count_hook(module: nn.Conv2d, input, output): input = input[0] diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py b/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py index f3bce9151..54f3f26d2 100644 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py +++ b/mmrazor/models/task_modules/estimators/counters/op_counters/norm_layer_counter.py @@ -88,4 +88,4 @@ def add_count_hook(module, input, output): batch_flops *= 2 num_features = module.mutable_attrs['num_features'].activated_channels module.__flops__ += batch_flops - module.__params__ += num_features*2 + module.__params__ += num_features * 2 diff --git a/tests/test_models/test_mutators/test_dmcp_mutator.py b/tests/test_models/test_mutators/test_dmcp_mutator.py index 6fdcba7ea..31fcc90d4 100644 --- a/tests/test_models/test_mutators/test_dmcp_mutator.py +++ b/tests/test_models/test_mutators/test_dmcp_mutator.py @@ -8,6 +8,7 @@ class ResBlock(Module): + def __init__(self) -> None: super().__init__() From fb7f6711c3c32c77d9837e15b9f7ab3a9166b39a Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Tue, 10 Jan 2023 16:39:09 +0800 Subject: [PATCH 19/59] fix UT --- mmrazor/models/algorithms/__init__.py | 4 +- .../test_models/test_algorithms/test_dmcp.py | 49 ++++++------------- .../test_mutators/test_dmcp_mutator.py | 2 +- 3 files changed, 18 insertions(+), 37 deletions(-) diff --git a/mmrazor/models/algorithms/__init__.py b/mmrazor/models/algorithms/__init__.py index 3f649c426..0e1145835 100644 --- a/mmrazor/models/algorithms/__init__.py +++ b/mmrazor/models/algorithms/__init__.py @@ -5,7 +5,7 @@ SelfDistill, SingleTeacherDistill) from .nas import (DSNAS, DSNASDDP, SPOS, Autoformer, AutoSlim, AutoSlimDDP, BigNAS, BigNASDDP, Darts, DartsDDP) -from .pruning import DCFF, SlimmableNetwork, SlimmableNetworkDDP +from .pruning import DCFF, DMCP, SlimmableNetwork, SlimmableNetworkDDP from .pruning.ite_prune_algorithm import ItePruneAlgorithm __all__ = [ @@ -14,5 +14,5 @@ 'Darts', 'DartsDDP', 'DCFF', 'SelfDistill', 'DataFreeDistillation', 'DAFLDataFreeDistillation', 'OverhaulFeatureDistillation', 'ItePruneAlgorithm', 'DSNAS', 'DSNASDDP', 'Autoformer', 'BigNAS', - 'BigNASDDP' + 'BigNASDDP', 'DMCP' ] diff --git a/tests/test_models/test_algorithms/test_dmcp.py b/tests/test_models/test_algorithms/test_dmcp.py index 06769fd84..dbd0a267d 100644 --- a/tests/test_models/test_algorithms/test_dmcp.py +++ b/tests/test_models/test_algorithms/test_dmcp.py @@ -4,18 +4,17 @@ import torch -from mmrazor.models import DMCP +from mmrazor.models import DMCP, DMCPChannelMutator from mmrazor.registry import MODELS MUTATOR_CFG = dict( - channel_mutator=dict( - type='mmrazor.DMCPChannelMutator', - channel_unit_cfg={'type': 'DMCPChannelUnit'}, - parse_cfg=dict( - type='ChannelAnalyzer', - demo_input=(1, 3, 224, 224), - tracer_type='BackwardTracer'), - )) + type='mmrazor.DMCPChannelMutator', + channel_unit_cfg={'type': 'DMCPChannelUnit'}, + parse_cfg=dict( + type='ChannelAnalyzer', + demo_input=(1, 3, 224, 224), + tracer_type='BackwardTracer'), +) DISTILLER_CFG = dict( _scope_='mmrazor', @@ -33,7 +32,7 @@ type='mmrazor.DMCP', architecture=dict( cfg_path='mmcls::resnet/resnet50_8xb32_in1k.py', pretrained=False), - mutators=MUTATOR_CFG, + mutator_cfg=MUTATOR_CFG, distiller=DISTILLER_CFG) @@ -45,41 +44,23 @@ def test_init(self): dmcp_algo = MODELS.build(ALGORITHM_CFG_SUPERNET) self.assertIsInstance(dmcp_algo, DMCP) # dmcp mutators include channel_mutator and value_mutator - assert 'channel_mutator' in dmcp_algo.mutators - assert 'value_mutator' in dmcp_algo.mutators + assert isinstance(dmcp_algo.mutator, DMCPChannelMutator) # dmcp_algo support training self.assertTrue(dmcp_algo.is_supernet) # initiate dmcp without any `mutator`. ALGORITHM_CFG_SUPERNET.pop('type') - ALGORITHM_CFG_SUPERNET['mutators'] = None - none_type = type(ALGORITHM_CFG_SUPERNET['mutators']) - with self.assertRaisesRegex( - TypeError, f'mutator should be a `dict` but got {none_type}'): - _ = DMCP(**ALGORITHM_CFG_SUPERNET) + ALGORITHM_CFG_SUPERNET['mutator_cfg'] = None - # initiate dmcp with error type `mutator`. - backwardtracer_cfg = dict( - type='OneShotChannelMutator', - channel_unit_cfg=dict( - type='OneShotMutableChannelUnit', - default_args=dict( - candidate_choices=list(i / 12 for i in range(2, 13)), - choice_mode='ratio')), - parse_cfg=dict( - type='BackwardTracer', - loss_calculator=dict(type='ImageClassifierPseudoLoss'))) - ALGORITHM_CFG_SUPERNET['mutators'] = dict( - channel_mutator=backwardtracer_cfg, - value_mutator=dict(type='mmrazor.DynamicValueMutator')) - with self.assertRaisesRegex(AssertionError, - 'DMCP only support predefined.'): + with self.assertRaisesRegex( + AttributeError, "'NoneType' object has no attribute 'get'"): _ = DMCP(**ALGORITHM_CFG_SUPERNET) def test_loss(self): # supernet inputs = torch.randn(1, 3, 224, 224) dmcp = MODELS.build(ALGORITHM_CFG) - loss = dmcp(inputs) + dmcp.is_supernet = False + loss = dmcp(inputs, mode='tensor') assert loss.size(1) == 1000 diff --git a/tests/test_models/test_mutators/test_dmcp_mutator.py b/tests/test_models/test_mutators/test_dmcp_mutator.py index 31fcc90d4..eea8e2a08 100644 --- a/tests/test_models/test_mutators/test_dmcp_mutator.py +++ b/tests/test_models/test_mutators/test_dmcp_mutator.py @@ -25,7 +25,7 @@ def forward(self, x: Tensor) -> Tensor: return x3 -def test_DCFF_channel_mutator() -> None: +def test_DMCP_channel_mutator() -> None: imgs = torch.randn(16, 3, 224, 224) # ResBlock From f58826ca8b7a967da4b6c96b2fecbb1f3c73e961 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Wed, 1 Feb 2023 15:14:58 +0800 Subject: [PATCH 20/59] fix ci --- .pre-commit-config.yaml | 4 +- .../_base_/settings/imagenet_bs2048_dmcp.py | 104 +++++++++++++ configs/pruning/mmcls/dcff/README.md | 81 +++++++--- .../pruning/mmcls/dmcp/DMCP_MBV2_100M.yaml | 9 ++ configs/pruning/mmcls/dmcp/DMCP_R50_2G.yaml | 5 + .../mmcls/dmcp/DMCP_SUBNET_IMAGENET.yaml | 4 - configs/pruning/mmcls/dmcp/README.md | 65 ++++---- .../mmcls/dmcp/dmcp_mbv2_subnet_32xb64.py | 49 ++++++ .../mmcls/dmcp/dmcp_mbv2_supernet_32xb64.py | 62 ++++++++ .../mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py | 49 ++++++ .../mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py | 10 -- ...32.py => dmcp_resnet50_supernet_32xb64.py} | 57 ++++--- mmrazor/engine/hooks/dmcp_subnet_hook.py | 25 ++- mmrazor/models/algorithms/__init__.py | 4 +- mmrazor/models/algorithms/pruning/dmcp.py | 92 +++++++---- .../dynamic_ops/bricks/dynamic_norm.py | 27 ++-- .../channel_mutator/dmcp_channel_mutator.py | 9 +- .../op_counters/conv_layer_counter.py | 2 + .../test_models/test_algorithms/test_dmcp.py | 147 +++++++++++++++++- 19 files changed, 654 insertions(+), 151 deletions(-) create mode 100644 configs/_base_/settings/imagenet_bs2048_dmcp.py create mode 100644 configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.yaml create mode 100644 configs/pruning/mmcls/dmcp/DMCP_R50_2G.yaml delete mode 100644 configs/pruning/mmcls/dmcp/DMCP_SUBNET_IMAGENET.yaml create mode 100644 configs/pruning/mmcls/dmcp/dmcp_mbv2_subnet_32xb64.py create mode 100644 configs/pruning/mmcls/dmcp/dmcp_mbv2_supernet_32xb64.py create mode 100644 configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py delete mode 100644 configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py rename configs/pruning/mmcls/dmcp/{dmcp_resnet50_supernet_8xb32.py => dmcp_resnet50_supernet_32xb64.py} (56%) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fb1f746a4..491ddaa78 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,8 +5,8 @@ repos: rev: 4.0.1 hooks: - id: flake8 - - repo: https://github.com/timothycrosley/isort - rev: 5.10.1 + - repo: https://github.com/PyCQA/isort + rev: 5.11.5 hooks: - id: isort - repo: https://github.com/pre-commit/mirrors-yapf diff --git a/configs/_base_/settings/imagenet_bs2048_dmcp.py b/configs/_base_/settings/imagenet_bs2048_dmcp.py new file mode 100644 index 000000000..161137392 --- /dev/null +++ b/configs/_base_/settings/imagenet_bs2048_dmcp.py @@ -0,0 +1,104 @@ +# dataset settings +dataset_type = 'mmcls.ImageNet' + +max_search_epochs = 100 +# learning rate setting +param_scheduler = [ + # warm up learning rate scheduler + dict( + type='LinearLR', + start_factor=0.5, + by_epoch=True, + begin=0, + end=10, + convert_to_iter_based=True), + dict( + type='CosineAnnealingLR', + T_max=max_search_epochs, + eta_min=0.08, + by_epoch=True, + begin=10, + end=max_search_epochs, + convert_to_iter_based=True), +] + +# optimizer setting +paramwise_cfg = dict( + norm_decay_mult=0.0, + bias_decay_mult=0.0) + +optim_wrapper = dict( + _delete_=True, + constructor='mmrazor.SeparateOptimWrapperConstructor', + architecture=dict( + type='OptimWrapper', + optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=3e-4), + clip_grad=dict(max_norm=5, norm_type=2)), + mutator=dict( + type='OptimWrapper', + optimizer=dict(type='Adam', lr=0.5, weight_decay=1e-3))) + +# data preprocessor +data_preprocessor = dict( + type='mmcls.ClsDataPreprocessor', + # RGB format normalization parameters + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + # convert image from BGR to RGB + to_rgb=True, +) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='RandomResizedCrop', scale=224), + dict( + type='ColorJitter', + brightness=0.2, + contrast=0.2, + saturation=0.2, + hue=0.1), + dict(type='RandomFlip', prob=0.5, direction='horizontal'), + dict(type='PackClsInputs'), +] + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='ResizeEdge', scale=256, edge='short'), + dict(type='CenterCrop', crop_size=224), + dict(type='PackClsInputs'), +] + +train_dataloader = dict( + batch_size=64, + num_workers=4, + dataset=dict( + type=dataset_type, + data_root='data/imagenet', + ann_file='meta/train.txt', + data_prefix='train', + pipeline=train_pipeline), + sampler=dict(type='DefaultSampler', shuffle=True, _scope_='mmcls'), + persistent_workers=True, +) + +val_dataloader = dict( + batch_size=64, + num_workers=4, + dataset=dict( + type=dataset_type, + data_root='data/imagenet', + ann_file='meta/val.txt', + data_prefix='val', + pipeline=test_pipeline), + sampler=dict(type='DefaultSampler', shuffle=True, _scope_='mmcls'), + persistent_workers=True, +) +val_evaluator = dict(type='mmcls.Accuracy', topk=(1, 5)) + +# If you want standard test, please manually configure the test dataset +test_dataloader = val_dataloader +test_evaluator = val_evaluator + +evaluation = dict(interval=1, metric='accuracy') + +train_cfg = dict(by_epoch=True, max_epochs=max_search_epochs, val_interval=1) +custom_hooks = [dict(type='DMCPSubnetHook')] diff --git a/configs/pruning/mmcls/dcff/README.md b/configs/pruning/mmcls/dcff/README.md index fee4fe426..ba5692d61 100644 --- a/configs/pruning/mmcls/dcff/README.md +++ b/configs/pruning/mmcls/dcff/README.md @@ -1,45 +1,82 @@ -# DMCP: Differentiable Markov Channel Pruning for Neural Networks +# Training Compact CNNs for Image Classification using Dynamic-coded Filter Fusion ## Abstract -Recent works imply that the channel pruning can be regarded as searching optimal sub-structure from unpruned networks. However, existing works based on this observation require training and evaluating a large number of structures, which limits their application. In this paper, we propose a novel differentiable method for channel pruning, named Differentiable Markov Channel Pruning (DMCP), to efficiently search the optimal sub-structure. Our method is differentiable and can be directly optimized by gradient descent with respect to standard task loss and budget regularization (e.g. FLOPs constraint). In DMCP, we model the channel pruning as a Markov process, in which each state represents for retaining the corresponding channel during pruning, and transitions between states denote the pruning process. In the end, our method is able to implicitly select the proper number of channels in each layer by the Markov process with optimized transitions. To validate the effectiveness of our method, we perform extensive experiments on Imagenet with ResNet and MobilenetV2. Results show our method can achieve consistent improvement than stateof-the-art pruning methods in various FLOPs settings. +The mainstream approach for filter pruning is usually either to force a hard-coded importance estimation upon a computation-heavy pretrained model to select “important” filters, or to impose a hyperparameter-sensitive sparse constraint on the loss objective to regularize the network training. In this paper, we present a novel filter pruning method, dubbed dynamic-coded filter fusion (DCFF), to derive compact CNNs in a computationeconomical and regularization-free manner for efficient image classification. Each filter in our DCFF is firstly given an intersimilarity distribution with a temperature parameter as a filter proxy, on top of which, a fresh Kullback-Leibler divergence based dynamic-coded criterion is proposed to evaluate the filter importance. In contrast to simply keeping high-score filters in other methods, we propose the concept of filter fusion, i.e., the weighted averages using the assigned proxies, as our preserved filters. We obtain a one-hot inter-similarity distribution as the temperature parameter approaches infinity. Thus, the relative importance of each filter can vary along with the training of the compact CNN, leading to dynamically changeable fused filters without both the dependency on the pretrained model and the introduction of sparse constraints. Extensive experiments on classification benchmarks demonstrate the superiority of our DCFF over the compared counterparts. For example, our DCFF derives a compact VGGNet-16 with only 72.77M FLOPs and 1.06M parameters while reaching top-1 accuracy of 93.47% on CIFAR-10. A compact ResNet-50 is obtained with 63.8% FLOPs and 58.6% parameter reductions, retaining 75.60% top1 accuracy on ILSVRC-2012. + +![pipeline](https://user-images.githubusercontent.com/31244134/189286581-722853ba-c6d7-4a39-b902-37995b444c71.jpg) + +## Results and models + +### 1. Classification + +| Dataset | Backbone | Params(M) | FLOPs(M) | lr_type | Top-1 (%) | Top-5 (%) | CPrate | Config | Download | +| :------: | :----------: | :-------: | :------: | :-----: | :-------: | :-------: | :---------------------------------------------: | :--------------------------------------------------: | :--------------------------: | +| ImageNet | DCFFResNet50 | 15.16 | 2260 | step | 73.96 | 91.66 | \[0.0\]+\[0.35,0.4,0.1\]\*10+\[0.3,0.3,0.1\]\*6 | [config](../../mmcls/dcff/dcff_resnet_8xb32_in1k.py) | [model](<>) \| \[log\] (\<>) | + +### 2. Detection + +| Dataset | Method | Backbone | Style | Lr schd | Params(M) | FLOPs(M) | bbox AP | CPrate | Config | Download | +| :-----: | :---------: | :----------: | :-----: | :-----: | :-------: | :------: | :-----: | :---------------------------------------------: | :---------------------------------------------------------------: | :--------------------------: | +| COCO | Faster_RCNN | DCFFResNet50 | pytorch | step | 33.31 | 168320 | 35.8 | \[0.0\]+\[0.35,0.4,0.1\]\*10+\[0.3,0.3,0.1\]\*6 | [config](../../mmdet/dcff/dcff_faster_rcnn_resnet50_8xb4_coco.py) | [model](<>) \| \[log\] (\<>) | + +### 3. Segmentation + +| Dataset | Method | Backbone | crop size | Lr schd | Params(M) | FLOPs(M) | mIoU | CPrate | Config | Download | +| :--------: | :-------: | :-------------: | :-------: | :-----: | :-------: | :------: | :---: | :-----------------------------------------------------------------: | :-------------------------------------------------------------------: | :--------------------------: | +| Cityscapes | PointRend | DCFFResNetV1c50 | 512x1024 | 160k | 18.43 | 74410 | 76.75 | \[0.0, 0.0, 0.0\] + \[0.35, 0.4, 0.1\] * 10 + \[0.3, 0.3, 0.1\] * 6 | [config](../../mmseg/dcff/dcff_pointrend_resnet50_8xb2_cityscapes.py) | [model](<>) \| \[log\] (\<>) | + +### 4. Pose + +| Dataset | Method | Backbone | crop size | total epochs | Params(M) | FLOPs(M) | AP | CPrate | Config | Download | +| :-----: | :-------------: | :----------: | :-------: | :----------: | :-------: | :------: | :--: | :--------------------------------------------------------: | :---------------------------------------------------------------: | :--------------------------: | +| COCO | TopDown HeatMap | DCFFResNet50 | 256x192 | 300 | 26.95 | 4290 | 68.3 | \[0.0\] + \[0.2, 0.2, 0.1\] * 10 + \[0.15, 0.15, 0.1\] * 6 | [config](../../mmpose/dcff/dcff_topdown_heatmap_resnet50_coco.py) | [model](<>) \| \[log\] (\<>) | ## Citation ```latex -@inproceedings{guo2020dmcp, - title={Dmcp: Differentiable markov channel pruning for neural networks}, - author={Guo, Shaopeng and Wang, Yujie and Li, Quanquan and Yan, Junjie}, - booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, - pages={1539--1547}, - year={2020} +@article{lin2021training, + title={Training Compact CNNs for Image Classification using Dynamic-coded Filter Fusion}, + author={Lin, Mingbao and Ji, Rongrong and Chen, Bohong and Chao, Fei and Liu, Jianzhuang and Zeng, Wei and Tian, Yonghong and Tian, Qi}, + journal={arXiv preprint arXiv:2107.06916}, + year={2021} } ``` -## Results and models +## Get Started + +### Generate channel_config file -### 1.Classification +Generate `resnet_cls.json` with `tools/pruning/get_channel_units.py`. -| Dataset | Supernet | Flops(M) | Top-1 (%) | Top-5 (%) | Config | Download | -| :------: | :------: | :------: | :-------: | :-------: | :-----------------------------------------: | :------------------------------------------------------: | -| ImageNet | ResNet50 | - | - | - | [config](./dmcp_resnet50_supernet_8xb32.py) | \[model\] / [arch](./DMCP_SUBNET_IMAGENET.yaml)/ \[log\] | +```bash +python tools/pruning/get_channel_units.py + configs/pruning/mmcls/dcff/dcff_resnet50_8xb32_in1k.py \ + -c -i --output-path=configs/pruning/mmcls/dcff/resnet_cls.json +``` -## Getting Started +Then set layers' pruning rates `target_pruning_ratio` by `resnet_cls.json`. -#### Train DMCP from scratch +### Train DCFF + +#### Classification + +##### ImageNet ```bash -sh tools/slurm_train.sh $PARTITION $JOB_NAME \ - configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py \ +CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ + configs/pruning/mmcls/dcff/dcff_resnet50_8xb32_in1k.py 4 \ --work-dir $WORK_DIR ``` -#### After the previous steps, retrain the selected sub-network based on +### Test DCFF -#### the output structure 'DMCP_SUBNET_IMAGENET.yaml' +#### Classification + +##### ImageNet ```bash -sh tools/slurm_train.sh $PARTITION $JOB_NAME \ - configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py \ - --work-dir $WORK_DIR +CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_test.sh \ + configs/pruning/mmcls/dcff/dcff_compact_resnet50_8xb32_in1k.py \ + $CKPT 1 --work-dir $WORK_DIR ``` diff --git a/configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.yaml b/configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.yaml new file mode 100644 index 000000000..10afcf74c --- /dev/null +++ b/configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.yaml @@ -0,0 +1,9 @@ +{0: 9, +1: 10, +2: 36, 3: 16, 4: 16, +5: 48, 6: 21, 7: 41, 8: 22, +9: 60, 10: 24, 11: 44, 12: 272, 13: 272, +14: 310, 15: 36, 16: 294, 17: 351, +18: 693, 19: 80, 20: 96, 21: 864, +22: 1440, 23: 192, +24: 1664} diff --git a/configs/pruning/mmcls/dmcp/DMCP_R50_2G.yaml b/configs/pruning/mmcls/dmcp/DMCP_R50_2G.yaml new file mode 100644 index 000000000..6bd0bec96 --- /dev/null +++ b/configs/pruning/mmcls/dmcp/DMCP_R50_2G.yaml @@ -0,0 +1,5 @@ +{0: 52, +1: 22, 2: 22, 3: 106, 4: 16, 5: 16, 6: 40, 7: 16, +8: 68, 9: 56, 10: 155, 11: 32, 12: 68, 13: 56, 14: 56, 15: 80, 16: 92, +17: 256, 18: 256, 19: 1024, 20: 106, 21: 106, 22: 131, 23: 256, 24: 131, 25: 256, 26: 256, 27: 256, 28: 256, 29: 256, +30: 512, 31: 512, 32: 2048, 33: 512, 34: 461, 35: 512, 36: 512} diff --git a/configs/pruning/mmcls/dmcp/DMCP_SUBNET_IMAGENET.yaml b/configs/pruning/mmcls/dmcp/DMCP_SUBNET_IMAGENET.yaml deleted file mode 100644 index aa45b040b..000000000 --- a/configs/pruning/mmcls/dmcp/DMCP_SUBNET_IMAGENET.yaml +++ /dev/null @@ -1,4 +0,0 @@ -{0: 64, 1: 64, 2: 64, 3: 256, 4: 64, 5: 64, 6: 64, 7: 64, 8: 128, 9: 128, 10: 512, - 11: 128, 12: 128, 13: 128, 14: 128, 15: 128, 16: 128, 17: 256, 18: 256, 19: 1024, - 20: 256, 21: 256, 22: 256, 23: 256, 24: 256, 25: 256, 26: 256, 27: 256, 28: 256, - 29: 256, 30: 512, 31: 512, 32: 2048, 33: 512, 34: 512, 35: 512, 36: 512} diff --git a/configs/pruning/mmcls/dmcp/README.md b/configs/pruning/mmcls/dmcp/README.md index fee4fe426..53328f538 100644 --- a/configs/pruning/mmcls/dmcp/README.md +++ b/configs/pruning/mmcls/dmcp/README.md @@ -4,42 +4,55 @@ Recent works imply that the channel pruning can be regarded as searching optimal sub-structure from unpruned networks. However, existing works based on this observation require training and evaluating a large number of structures, which limits their application. In this paper, we propose a novel differentiable method for channel pruning, named Differentiable Markov Channel Pruning (DMCP), to efficiently search the optimal sub-structure. Our method is differentiable and can be directly optimized by gradient descent with respect to standard task loss and budget regularization (e.g. FLOPs constraint). In DMCP, we model the channel pruning as a Markov process, in which each state represents for retaining the corresponding channel during pruning, and transitions between states denote the pruning process. In the end, our method is able to implicitly select the proper number of channels in each layer by the Markov process with optimized transitions. To validate the effectiveness of our method, we perform extensive experiments on Imagenet with ResNet and MobilenetV2. Results show our method can achieve consistent improvement than stateof-the-art pruning methods in various FLOPs settings. -## Citation - -```latex -@inproceedings{guo2020dmcp, - title={Dmcp: Differentiable markov channel pruning for neural networks}, - author={Guo, Shaopeng and Wang, Yujie and Li, Quanquan and Yan, Junjie}, - booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, - pages={1539--1547}, - year={2020} -} -``` - -## Results and models - -### 1.Classification - -| Dataset | Supernet | Flops(M) | Top-1 (%) | Top-5 (%) | Config | Download | -| :------: | :------: | :------: | :-------: | :-------: | :-----------------------------------------: | :------------------------------------------------------: | -| ImageNet | ResNet50 | - | - | - | [config](./dmcp_resnet50_supernet_8xb32.py) | \[model\] / [arch](./DMCP_SUBNET_IMAGENET.yaml)/ \[log\] | - ## Getting Started #### Train DMCP from scratch ```bash -sh tools/slurm_train.sh $PARTITION $JOB_NAME \ - configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py \ +GPUS=32 sh tools/slurm_train.sh $PARTITION $JOB_NAME \ + configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_32xb64.py \ --work-dir $WORK_DIR ``` -#### After the previous steps, retrain the selected sub-network based on +#### After the previous steps, retrain the selected pruned sub-network -#### the output structure 'DMCP_SUBNET_IMAGENET.yaml' +#### with 2GFLOPs based on the output structure + +#### 'DMCP_R50_2G.yaml'(SOURCECODE) ```bash -sh tools/slurm_train.sh $PARTITION $JOB_NAME \ - configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py \ +GPUS=32 sh tools/slurm_train.sh $PARTITION $JOB_NAME \ + configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py \ --work-dir $WORK_DIR ``` + +## Results and models + +### 1.Classification + +| Dataset | Supernet | Flops(M) | Top-1 (%) | Top-5 (%) | config | Download | Remark | +| :------: | :---------: | :-------------: | :-------: | :-------: | :------------------------------------------: | :----------------------: | :-----------------------------: | +| ImageNet | ResNet50 | 4.09G(Supernet) | 77.46 | 93.55 | [config](./dmcp_resnet50_supernet_32xb64.py) | [model](<>) / [log](<>) | | +| ImageNet | ResNet50 | 2.07G(Subnet) | 76.11 | 93.01 | [config](./dmcp_resnet50_subnet_32xb64.py) | [model](<>) / [log](<>) | [arch\*](./DMCP_R50_2G.yaml) | +| ImageNet | ResNet50 | 1.05G(Subnet) | 74.12 | 92.33 | [config](./dmcp_resnet50_subnet_32xb64.py) | [model](<>) / [log](<>) | [arch](./DMCP_R50_1G.yaml) | +| ImageNet | MobilenetV2 | 319M(Supernet) | 72.30 | 90.42 | [config](./dmcp_resnet50_supernet_32xb64.py) | [model](<>) / [log](<>) | | +| ImageNet | MobilenetV2 | 209M(Subnet) | 71.94 | 90.05 | [config](./dmcp_mbv2_subnet_32xb64.py) | [model](<>) / [log](<>) | [arch](./DMCP_MBV2_200M.yaml) | +| ImageNet | MobilenetV2 | 102M(Subnet) | 67.22 | 88.61 | [config](./dmcp_mbv2_subnet_32xb64.py) | [model](<>) / [log](<>) | [arch\*](./DMCP_MBV2_100M.yaml) | + +**Note** + +1. Arch with * are converted from the [official repo](https://github.com/Zx55/dmcp). +2. To get the sub-network structure with different pruning rates, we support modifying `target_flops` in `model` in the supernet config, note that here it is in MFLOPs. For example, `target_flops=1000` means get subnet with 1GFLOPs. +3. More models with different pruning rates will be released later. + +## Citation + +```latex +@inproceedings{guo2020dmcp, + title={Dmcp: Differentiable markov channel pruning for neural networks}, + author={Guo, Shaopeng and Wang, Yujie and Li, Quanquan and Yan, Junjie}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={1539--1547}, + year={2020} +} +``` diff --git a/configs/pruning/mmcls/dmcp/dmcp_mbv2_subnet_32xb64.py b/configs/pruning/mmcls/dmcp/dmcp_mbv2_subnet_32xb64.py new file mode 100644 index 000000000..5bb840532 --- /dev/null +++ b/configs/pruning/mmcls/dmcp/dmcp_mbv2_subnet_32xb64.py @@ -0,0 +1,49 @@ +_base_ = ['dmcp_mbv2_supernet_32xb64.py'] + +paramwise_cfg = dict( + norm_decay_mult=0.0, + bias_decay_mult=0.0) + +_base_.optim_wrapper = dict( + optimizer=dict( + type='SGD', + lr=0.8, + momentum=0.9, + weight_decay=0.00004, + nesterov=True), + paramwise_cfg=paramwise_cfg) + +max_epochs = 250 + +_base_.param_scheduler = [ + # warm up learning rate scheduler + dict( + type='LinearLR', + start_factor=0.25, + by_epoch=True, + begin=0, + end=3, + convert_to_iter_based=True), + # main learning rate scheduler + dict( + type='CosineAnnealingLR', + T_max=max_epochs, + eta_min=1e-5, + by_epoch=True, + begin=3, + end=max_epochs, + convert_to_iter_based=True), +] + +_base_.train_cfg = dict(by_epoch=True, max_epochs=max_epochs, val_interval=1) + +custom_hooks = None + +# model settings +model = _base_.model +model['fix_subnet'] = 'configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.yaml' + +default_hooks = _base_.default_hooks +default_hooks['checkpoint'] = dict(type='CheckpointHook', interval=5) + +randomness = dict(seed=4872, diff_rank_seed=True) diff --git a/configs/pruning/mmcls/dmcp/dmcp_mbv2_supernet_32xb64.py b/configs/pruning/mmcls/dmcp/dmcp_mbv2_supernet_32xb64.py new file mode 100644 index 000000000..306187ead --- /dev/null +++ b/configs/pruning/mmcls/dmcp/dmcp_mbv2_supernet_32xb64.py @@ -0,0 +1,62 @@ +_base_ = [ + 'mmcls::_base_/schedules/imagenet_bs256.py', + 'mmcls::_base_/default_runtime.py', + '../../../_base_/settings/imagenet_bs2048_dmcp.py', +] + +# model settings +supernet = dict( + _scope_='mmcls', + type='ImageClassifier', + backbone=dict(type='MobileNetV2', widen_factor=1.0), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='LinearClsHead', + num_classes=1000, + in_channels=1280, + loss=dict( + type='mmcls.LabelSmoothLoss', + mode='original', + num_classes=1000, + label_smooth_val=0.1, + loss_weight=1.0), + topk=(1, 5), + )) + +model = dict( + _scope_='mmrazor', + type='DMCP', + architecture=supernet, + distiller=dict( + type='ConfigurableDistiller', + teacher_recorders=dict( + fc=dict(type='ModuleOutputs', source='head.fc')), + student_recorders=dict( + fc=dict(type='ModuleOutputs', source='head.fc')), + distill_losses=dict( + loss_kl=dict(type='KLDivergence', tau=1, loss_weight=1)), + loss_forward_mappings=dict( + loss_kl=dict( + preds_S=dict(recorder='fc', from_student=True), + preds_T=dict(recorder='fc', from_student=False)))), + mutator_cfg=dict( + type='DMCPChannelMutator', + channel_unit_cfg=dict( + type='DMCPChannelUnit', default_args=dict(choice_mode='number')), + parse_cfg=dict( + type='ChannelAnalyzer', + demo_input=(1, 3, 224, 224), + tracer_type='BackwardTracer')), + strategy=['max', 'min', 'scheduled_random', 'arch_random'], + arch_start_train=5000, + arch_train_freq=500, + flop_loss_weight=0.1, + distillation_times=10000, + target_flops=100) + +model_wrapper_cfg = dict( + type='mmrazor.DMCPDDP', + broadcast_buffers=False, + find_unused_parameters=True) + +randomness = dict(seed=0, diff_rank_seed=True) diff --git a/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py b/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py new file mode 100644 index 000000000..2e6e10a50 --- /dev/null +++ b/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py @@ -0,0 +1,49 @@ +_base_ = ['dmcp_resnet50_supernet_32xb64.py'] + +paramwise_cfg = dict( + norm_decay_mult=0.0, + bias_decay_mult=0.0) + +_base_.optim_wrapper = dict( + optimizer=dict( + type='SGD', + lr=0.8, + momentum=0.9, + weight_decay=0.0001, + nesterov=True), + paramwise_cfg=paramwise_cfg) + +max_epochs = 250 + +_base_.param_scheduler = [ + # warm up learning rate scheduler + dict( + type='LinearLR', + start_factor=0.25, + by_epoch=True, + begin=0, + end=2, + convert_to_iter_based=True), + # main learning rate scheduler + dict( + type='CosineAnnealingLR', + T_max=max_epochs, + eta_min=1e-5, + by_epoch=True, + begin=2, + end=max_epochs, + convert_to_iter_based=True), +] + +_base_.train_cfg = dict(by_epoch=True, max_epochs=max_epochs, val_interval=1) + +custom_hooks = None + +# model settings +model = _base_.model +model['fix_subnet'] = 'configs/pruning/mmcls/dmcp/DMCP_R50_2G.yaml' + +default_hooks = _base_.default_hooks +default_hooks['checkpoint'] = dict(type='CheckpointHook', interval=5) + +randomness = dict(seed=2016, diff_rank_seed=True) diff --git a/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py b/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py deleted file mode 100644 index c835aedd1..000000000 --- a/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_8xb32.py +++ /dev/null @@ -1,10 +0,0 @@ -_base_ = ['dmcp_resnet_8xb32.py'] - -_base_.optim_wrapper = dict( - optimizer=dict(type='SGD', lr=0.25, momentum=0.9, weight_decay=0.0001)) - -custom_hooks = None - -# model settings -model = _base_.model -model['fix_subnet'] = 'configs/pruning/mmcls/dmcp/DMCP_SUBNET_IMAGENET.yaml' diff --git a/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py b/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_32xb64.py similarity index 56% rename from configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py rename to configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_32xb64.py index b8fcddcc3..b0934af76 100644 --- a/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_8xb32.py +++ b/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_32xb64.py @@ -1,27 +1,32 @@ _base_ = [ - 'mmcls::_base_/datasets/imagenet_bs32.py', 'mmcls::_base_/schedules/imagenet_bs256.py', - 'mmcls::_base_/default_runtime.py' + 'mmcls::_base_/default_runtime.py', + '../../../_base_/settings/imagenet_bs2048_dmcp.py', ] -optim_wrapper = dict( - _delete_=True, - constructor='mmrazor.SeparateOptimWrapperConstructor', - architecture=dict( - type='OptimWrapper', - optimizer=dict(type='SGD', lr=0.025, momentum=0.9, weight_decay=3e-4), - clip_grad=dict(max_norm=5, norm_type=2)), - mutator=dict( - type='OptimWrapper', - optimizer=dict(type='Adam', lr=3e-4, weight_decay=1e-3))) - -param_scheduler = dict( - type='MultiStepLR', by_epoch=True, milestones=[30, 60, 90], gamma=0.1) - -train_cfg = dict(by_epoch=True, max_epochs=120, val_interval=1) -data_preprocessor = {'type': 'mmcls.ClsDataPreprocessor'} - -custom_hooks = [dict(type='DMCPSubnetHook')] +# model settings +supernet = dict( + _scope_='mmcls', + type='ImageClassifier', + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(3, ), + style='pytorch'), + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='LinearClsHead', + num_classes=1000, + in_channels=2048, + loss=dict( + type='mmcls.LabelSmoothLoss', + mode='original', + num_classes=1000, + label_smooth_val=0.1, + loss_weight=1.0), + topk=(1, 5), + )) # model settings model = dict( @@ -48,15 +53,17 @@ parse_cfg=dict( type='ChannelAnalyzer', demo_input=(1, 3, 224, 224), - tracer_type='BackwardTracer', - loss_calculator=dict(type='ImageClassifierPseudoLoss'))), - arch_start_train=10000, - step_freq=500, + tracer_type='BackwardTracer')), + strategy=['max', 'min', 'scheduled_random', 'arch_random'], + arch_start_train=5000, + arch_train_freq=500, flop_loss_weight=0.1, - distillation_times=20000, + distillation_times=10000, target_flops=2000) model_wrapper_cfg = dict( type='mmrazor.DMCPDDP', broadcast_buffers=False, find_unused_parameters=True) + +randomness = dict(seed=2020, diff_rank_seed=True) diff --git a/mmrazor/engine/hooks/dmcp_subnet_hook.py b/mmrazor/engine/hooks/dmcp_subnet_hook.py index 4a8813df3..3ca13e4e1 100644 --- a/mmrazor/engine/hooks/dmcp_subnet_hook.py +++ b/mmrazor/engine/hooks/dmcp_subnet_hook.py @@ -11,12 +11,22 @@ @HOOKS.register_module() class DMCPSubnetHook(Hook): + """Dump subnet periodically. + + Args: + subnet_sample_num (int):The number of networks sampled, + the last of which is the sub-network sampled in ``expected`` + mode and the others are sampled in ``direct`` mode. + Defaults to 10. + """ + priority = 'VERY_LOW' def __init__(self, subnet_sample_num: int = 10, **kwargs) -> None: self.subnet_sample_num = subnet_sample_num def _save_subnet(self, arch_space_dict, save_path): + """Save the sampled sub-network structure in yaml format.""" _cfg = dict() for k, v in arch_space_dict.items(): _cfg[k] = int(v) @@ -25,22 +35,27 @@ def _save_subnet(self, arch_space_dict, save_path): file.write(yaml.dump(_cfg, allow_unicode=True)) def after_run(self, runner): + """Save the sampled subnet under target FLOPs. + + Args: + runner (Runner): The runner of the training process. + """ model = getattr(runner.model, 'module', runner.model) runner.logger.info('Sampling...') num_sample = self.subnet_sample_num root_dir = os.path.join(runner.work_dir, 'model_sample') - target_flops = model.target_flops + target_flops = model.target_flops * 1e6 if not os.path.exists(root_dir): os.makedirs(root_dir) for i in range(num_sample + 1): cur_flops = target_flops * 10 - while cur_flops > target_flops * 1.02 or \ - cur_flops < target_flops * 0.98: + while cur_flops > target_flops * 1.05 or \ + cur_flops < target_flops * 0.95: model.set_subnet(mode='direct', arch_train=False) - cur_flops = model.mutator.calc_current_flops(model) + cur_flops = model.calc_current_flops() if i == num_sample: model.set_subnet(mode='expected', arch_train=False) @@ -51,5 +66,5 @@ def after_run(self, runner): save_path = os.path.join(root_dir, 'subnet_{}.yaml'.format(i + 1)) runner.logger.info( - f'Driect sample(DS) arch with FlOP(MB): {cur_flops}') + f'Driect sample(DS) arch with FlOP(MB): {cur_flops/1e6}') self._save_subnet(model.mutator.current_choices, save_path) diff --git a/mmrazor/models/algorithms/__init__.py b/mmrazor/models/algorithms/__init__.py index 0e1145835..3cef96dfe 100644 --- a/mmrazor/models/algorithms/__init__.py +++ b/mmrazor/models/algorithms/__init__.py @@ -5,7 +5,7 @@ SelfDistill, SingleTeacherDistill) from .nas import (DSNAS, DSNASDDP, SPOS, Autoformer, AutoSlim, AutoSlimDDP, BigNAS, BigNASDDP, Darts, DartsDDP) -from .pruning import DCFF, DMCP, SlimmableNetwork, SlimmableNetworkDDP +from .pruning import DCFF, DMCP, DMCPDDP, SlimmableNetwork, SlimmableNetworkDDP from .pruning.ite_prune_algorithm import ItePruneAlgorithm __all__ = [ @@ -14,5 +14,5 @@ 'Darts', 'DartsDDP', 'DCFF', 'SelfDistill', 'DataFreeDistillation', 'DAFLDataFreeDistillation', 'OverhaulFeatureDistillation', 'ItePruneAlgorithm', 'DSNAS', 'DSNASDDP', 'Autoformer', 'BigNAS', - 'BigNASDDP', 'DMCP' + 'BigNASDDP', 'DMCP', 'DMCPDDP' ] diff --git a/mmrazor/models/algorithms/pruning/dmcp.py b/mmrazor/models/algorithms/pruning/dmcp.py index 373048a7d..b11e2fde4 100644 --- a/mmrazor/models/algorithms/pruning/dmcp.py +++ b/mmrazor/models/algorithms/pruning/dmcp.py @@ -69,15 +69,11 @@ def __init__(self, super().__init__(architecture, data_preprocessor, init_cfg) self.arch_start_train = arch_start_train + self.arch_train_freq = arch_train_freq self.strategy = strategy self.distillation_times = distillation_times self.target_flops = target_flops - if distiller: - self.distiller = self._build_distiller(distiller) - self.distiller.prepare_from_teacher(self.architecture) - self.distiller.prepare_from_student(self.architecture) - self.flops_loss_type = flops_loss_type self.flop_loss_weight = flop_loss_weight self.cur_sample_prob = 1.0 @@ -86,6 +82,10 @@ def __init__(self, self.mutator: ChannelMutator = MODELS.build(mutator_cfg) self.mutator.prepare_from_supernet(self.architecture) + self.distiller = self._build_distiller(distiller) + self.distiller.prepare_from_teacher(self.architecture) + self.distiller.prepare_from_student(self.architecture) + if fix_subnet: self._load_fix_subnet(fix_subnet) self.is_supernet = False @@ -93,13 +93,16 @@ def __init__(self, self.is_supernet = True def _load_fix_subnet(self, save_path): + """Load sub-network structure and fix.""" from mmrazor.structures import load_fix_subnet with open(save_path) as file: - self.mutator.set_choices(yaml.load(file.read())) + self.mutator.set_choices( + yaml.load(file.read(), Loader=yaml.FullLoader)) load_fix_subnet(self.architecture, save_path) def _build_distiller( self, distiller: VALID_DISTILLER_TYPE) -> ConfigurableDistiller: + """Build distiller.""" if isinstance(distiller, dict): distiller = MODELS.build(distiller) if not isinstance(distiller, ConfigurableDistiller): @@ -118,7 +121,7 @@ def set_subnet(self, mode, arch_train=None) -> None: def train_step(self, data: List[dict], optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]: - + """The iteration step during training.""" if not self.arch_train and \ self._iter > self.arch_start_train: self.arch_train = True @@ -133,10 +136,10 @@ def distill_step( self ), self.distiller.student_recorders: # type: ignore hard_loss = self(batch_inputs, data_samples, mode='loss') - soft_loss = self.distiller.compute_distill_losses() - subnet_losses.update(hard_loss) + if self._iter > self.distillation_times: + soft_loss = self.distiller.compute_distill_losses() subnet_losses.update(soft_loss) parsed_subnet_losses, _ = self.parse_losses(subnet_losses) @@ -147,9 +150,10 @@ def distill_step( batch_inputs, data_samples = self.data_preprocessor(data, True).values() - total_losses = dict() + total_losses = dict() # update model parameters + max_net_num = min_net_num = random_net_num = direct_net_num = 1 for kind in self.strategy: if kind in ('max'): self.set_subnet(mode='max') @@ -163,26 +167,34 @@ def distill_step( optim_wrapper['architecture'].update_params( parsed_max_subnet_losses) total_losses.update( - add_prefix(max_subnet_losses, 'max_subnet')) + add_prefix(max_subnet_losses, + f'max_subnet{max_net_num}')) + max_net_num += 1 elif kind in ('min'): self.set_subnet(mode='min') min_subnet_losses =\ distill_step(batch_inputs, data_samples) total_losses.update( - add_prefix(min_subnet_losses, 'min_subnet')) + add_prefix(min_subnet_losses, + f'min_subnet{min_net_num}')) + min_net_num += 1 elif kind in ('arch_random'): if self.arch_train: self.set_subnet(mode='direct') direct_subnet_losses = distill_step( batch_inputs, data_samples) total_losses.update( - add_prefix(direct_subnet_losses, 'direct_subnet')) + add_prefix(direct_subnet_losses, + f'direct_subnet{direct_net_num}')) + direct_net_num += 1 else: self.set_subnet(mode='random') random_subnet_losses = distill_step( batch_inputs, data_samples) total_losses.update( - add_prefix(random_subnet_losses, 'random_subnet')) + add_prefix(random_subnet_losses, + f'random_subnet{random_net_num}')) + random_net_num += 1 elif kind in ('scheduled_random'): if random.uniform(0, 1) > self.cur_sample_prob\ and self.arch_train: @@ -190,13 +202,17 @@ def distill_step( direct_subnet_losses = distill_step( batch_inputs, data_samples) total_losses.update( - add_prefix(direct_subnet_losses, 'direct_subnet')) + add_prefix(direct_subnet_losses, + f'direct_subnet{direct_net_num}')) + direct_net_num += 1 else: self.set_subnet(mode='random') random_subnet_losses = distill_step( batch_inputs, data_samples) total_losses.update( - add_prefix(random_subnet_losses, 'random_subnet')) + add_prefix(random_subnet_losses, + f'random_subnet{random_net_num}')) + random_net_num += 1 self.cur_sample_prob *= 0.9999 # update arch parameters @@ -216,6 +232,15 @@ def _update_arch_params(self, data_samples: Optional[List[BaseDataElement]], optim_wrapper: OptimWrapper, mode: str = 'loss') -> Dict: + """Update the arch parameters in mutator. + + Returns: + dict: It should contain 2 keys: ``arch_loss``, ``flops_loss``. + ``arch_loss`` is a tensor for back propagation, which can be a + weighted sum of multiple losses. + ``flops_loss`` contains all the variables to be sent to the + logger. + """ arch_params_loss = dict() self.eval() # update arch_loss @@ -240,10 +265,18 @@ def _update_arch_params(self, def _compute_flops_loss(self, expected_flops): """Calculation of loss functions of arch parameters. - Calculate the difference between the expected FLOPs and the target - FLOPs in the units of M. + Calculate the difference between the calculated FLOPs and the target + FLOPs(MFLOPs). + + Args: + expected_flops (tensor|float): FLOPs calculated from the current + number of sampling channels + Returns: + tensor|float: A loss calculated from the input expected FLOPs and + the target FLOPs. And the type of this loss should be the same + as the expected FLOPs. """ - flops_error = expected_flops - self.target_flops + flops_error = expected_flops - self.target_flops * 1e6 if self.flops_loss_type == 'l2': floss = torch.pow(flops_error, 2) @@ -270,9 +303,12 @@ def _compute_flops_loss(self, expected_flops): return floss * self.flop_loss_weight def calc_current_flops(self): - estimator = ResourceEstimator(units=None) + """Calculate the FLOPs under the current sampled network.""" + estimator = ResourceEstimator() model = getattr(self, 'module', self) - estimation = estimator.estimate(model=model.architecture.backbone) + estimation = estimator.estimate( + model=model.architecture.backbone, + flops_params_cfg=dict(units=None)) return estimation['flops'] def forward(self, @@ -295,6 +331,7 @@ def _iter(self): @MODEL_WRAPPERS.register_module() class DMCPDDP(MMDistributedDataParallel): + """DDP for DMCP and rewrite train_step of MMDDP.""" def __init__(self, *, @@ -307,7 +344,7 @@ def __init__(self, def train_step(self, data: List[dict], optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]: - + """The iteration step during training.""" if not self.module.arch_train and \ self.module._iter > self.module.arch_start_train: self.module.arch_train = True @@ -321,10 +358,10 @@ def distill_step( with optim_wrapper['architecture'].optim_context( self), self.module.distiller.student_recorders: hard_loss = self(batch_inputs, data_samples, mode='loss') - soft_loss = self.module.distiller.compute_distill_losses() - subnet_losses.update(hard_loss) if self.module._iter > self.module.distillation_times: + soft_loss = \ + self.module.distiller.compute_distill_losses() subnet_losses.update(soft_loss) parsed_subnet_losses, _ = \ @@ -336,6 +373,7 @@ def distill_step( batch_inputs, data_samples = self.module.data_preprocessor( data, True).values() + total_losses = dict() # update model parameters max_net_num = min_net_num = random_net_num = direct_net_num = 1 @@ -400,12 +438,6 @@ def distill_step( random_net_num += 1 self.module.cur_sample_prob *= 0.9999 - with optim_wrapper['mutator'].optim_context(self): - optim_wrapper['mutator'].zero_grad() - mutator_loss = self.module._update_arch_params( - batch_inputs, data_samples, optim_wrapper, mode='loss') - total_losses.update(mutator_loss) - # update arch parameters if self.module.arch_train \ and self.module._iter % self.module.arch_train_freq == 0: diff --git a/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py b/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py index 1c53e5784..eb5dd3b75 100644 --- a/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py +++ b/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_norm.py @@ -401,29 +401,18 @@ def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) self.mutable_attrs: Dict[str, Optional[BaseMutable]] = nn.ModuleDict() - @classmethod - def convert_from(cls, module: _BatchNorm): - """Convert a _BatchNorm module to a DynamicBatchNorm. - - Args: - module (:obj:`torch.nn._BatchNorm`): The original BatchNorm module. - """ - dynamic_bn = cls( - num_features=module.num_features, - eps=module.eps, - momentum=module.momentum, - affine=module.affine, - track_running_stats=module.track_running_stats) - return dynamic_bn - - def forward(self, input: Tensor, arch_param=None, arch_attr=None): + def forward(self, + input: Tensor, + arch_param=None, + arch_attr=None) -> Tensor: + """Forward of dynamic DMCPBatchNorm2d.""" out = self.forward_batchnorm(input) if arch_param is not None: out = self.forward_arch_param(out, arch_param, arch_attr) return out def forward_batchnorm(self, input: Tensor) -> Tensor: - """Forward of dynamic BatchNormxd OP.""" + """Forward of BatchNorm2d.""" self._check_input_dim(input) if self.momentum is None: @@ -460,7 +449,9 @@ def forward_batchnorm(self, input: Tensor) -> Tensor: return out - def forward_arch_param(self, input: Tensor, arch_param, arch_attr): + def forward_arch_param(self, input: Tensor, arch_param, + arch_attr) -> Tensor: + """Forward of arch parameters.""" size_x = input.size() (group_size, num_groups, min_ch) = arch_attr diff --git a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py index 81c107352..232561998 100644 --- a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py @@ -53,7 +53,9 @@ def _build_arch_param(self, num_choices) -> nn.Parameter: """Build learnable architecture parameters.""" return nn.Parameter(torch.zeros(num_choices)) - def prepare_arch_params(self, supernet: Module): + def prepare_arch_params(self, supernet: Module) -> None: + """Prepare the arch parameters and associate them with the + corresponding op.""" self.arch_params = nn.ParameterDict() self._op_arch_align = dict() self._arch_params_attr = dict() @@ -95,7 +97,9 @@ def _generate_arch_message(self, out_channels: int) -> tuple: return (group_size, num_groups, min_ch) - def modify_supernet_forward(self, arch_train: str): + def modify_supernet_forward(self, arch_train: str) -> None: + """According to the arch_train, assign the arch parameter to the + forward of the corresponding op.""" for module, group_id in self._bn_arch_align.items(): arch_param: Optional[nn.Parameter] = None arch_params_attr: Optional[Tuple] = None @@ -106,6 +110,7 @@ def modify_supernet_forward(self, arch_train: str): arch_param=arch_param, arch_attr=arch_params_attr) def sample_subnet(self, mode: str, arch_train: str) -> None: + """Sampling according to the input mode.""" choices = dict() for group_id, _ in self.search_groups.items(): choices[group_id] = self._prune_by_arch(mode, group_id) diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py b/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py index 13ef0677c..c027f0c84 100644 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py +++ b/mmrazor/models/task_modules/estimators/counters/op_counters/conv_layer_counter.py @@ -67,6 +67,8 @@ class DynamicConv2dCounter(ConvCounter): @staticmethod def add_count_hook(module: nn.Conv2d, input, output): + """Calculate FLOPs and params based on the dynamic channels of conv + layers.""" input = input[0] batch_size = input.shape[0] diff --git a/tests/test_models/test_algorithms/test_dmcp.py b/tests/test_models/test_algorithms/test_dmcp.py index dbd0a267d..044dea9d0 100644 --- a/tests/test_models/test_algorithms/test_dmcp.py +++ b/tests/test_models/test_algorithms/test_dmcp.py @@ -1,12 +1,24 @@ # Copyright (c) OpenMMLab. All rights reserved. import copy +import os +from typing import Dict, Union from unittest import TestCase +import pytest import torch +import torch.distributed as dist +from mmcls.structures import ClsDataSample +from mmengine import MessageHub +from mmengine.optim.optimizer import OptimWrapper, OptimWrapperDict +from torch.optim import SGD -from mmrazor.models import DMCP, DMCPChannelMutator +from mmrazor.models.algorithms import DMCP, DMCPDDP +from mmrazor.models.mutators import DMCPChannelMutator from mmrazor.registry import MODELS +MUTATOR_TYPE = Union[torch.nn.Module, Dict] +DISTILLER_TYPE = Union[torch.nn.Module, Dict] + MUTATOR_CFG = dict( type='mmrazor.DMCPChannelMutator', channel_unit_cfg={'type': 'DMCPChannelUnit'}, @@ -33,12 +45,27 @@ architecture=dict( cfg_path='mmcls::resnet/resnet50_8xb32_in1k.py', pretrained=False), mutator_cfg=MUTATOR_CFG, - distiller=DISTILLER_CFG) + distiller=DISTILLER_CFG, + strategy=['max', 'min', 'scheduled_random', 'arch_random'], + arch_start_train=10, + distillation_times=10, + arch_train_freq=10) class TestDMCP(TestCase): + def _prepare_fake_data(self) -> Dict: + imgs = torch.randn(16, 3, 224, 224).to(self.device) + data_samples = [ + ClsDataSample().set_gt_label(torch.randint(0, 1000, + (16, ))).to(self.device) + ] + + return {'inputs': imgs, 'data_samples': data_samples} + def test_init(self): + self.device = 'cuda' if torch.cuda.is_available() else 'cpu' + ALGORITHM_CFG_SUPERNET = copy.deepcopy(ALGORITHM_CFG) # initiate dmcp with built `algorithm`. dmcp_algo = MODELS.build(ALGORITHM_CFG_SUPERNET) @@ -49,18 +76,128 @@ def test_init(self): # dmcp_algo support training self.assertTrue(dmcp_algo.is_supernet) - # initiate dmcp without any `mutator`. ALGORITHM_CFG_SUPERNET.pop('type') - ALGORITHM_CFG_SUPERNET['mutator_cfg'] = None + fake_distiller = 'distiller' + # initiate dmcp without `distiller`. + with self.assertRaisesRegex( + TypeError, 'distiller should be a `dict` or ' + '`ConfigurableDistiller` instance, but got ' + f'{type(fake_distiller)}'): + ALGORITHM_CFG_SUPERNET['distiller'] = fake_distiller + _ = DMCP(**ALGORITHM_CFG_SUPERNET) + # initiate dmcp without any `mutator`. + ALGORITHM_CFG_SUPERNET['mutator_cfg'] = None with self.assertRaisesRegex( AttributeError, "'NoneType' object has no attribute 'get'"): _ = DMCP(**ALGORITHM_CFG_SUPERNET) def test_loss(self): - # supernet + # subernet inputs = torch.randn(1, 3, 224, 224) dmcp = MODELS.build(ALGORITHM_CFG) dmcp.is_supernet = False loss = dmcp(inputs, mode='tensor') assert loss.size(1) == 1000 + + def test_dmcp_train_step(self): + # supernet + self.device = 'cuda' if torch.cuda.is_available() else 'cpu' + inputs = self._prepare_fake_data() + dmcp = MODELS.build(ALGORITHM_CFG) + optim_wrapper_dict = OptimWrapperDict( + architecture=OptimWrapper(SGD(dmcp.parameters(), lr=0.1)), + mutator=OptimWrapper(SGD(dmcp.parameters(), lr=0.01))) + + message_hub = MessageHub.get_current_instance() + + message_hub.update_info('iter', 20) + dmcp.cur_sample_prob = -1 + + losses = dmcp.train_step(inputs, optim_wrapper_dict) + + assert len(losses) == 9 + assert losses['max_subnet1.loss'] > 0 + assert losses['min_subnet1.loss'] > 0 + assert losses['min_subnet1.loss_kl'] + 1e-5 > 0 + assert losses['direct_subnet1.loss'] > 0 + assert losses['direct_subnet1.loss_kl'] + 1e-5 > 0 + assert losses['direct_subnet2.loss'] > 0 + assert losses['direct_subnet2.loss_kl'] + 1e-5 > 0 + assert losses['arch.loss'] > 0 + assert losses['flops.loss'] > 0 + + message_hub.update_info('iter', 0) + dmcp.arch_train = False + losses = dmcp.train_step(inputs, optim_wrapper_dict) + + assert len(losses) == 4 + assert losses['max_subnet1.loss'] > 0 + assert losses['min_subnet1.loss'] > 0 + assert losses['random_subnet1.loss'] > 0 + assert losses['random_subnet2.loss'] > 0 + + def test_dmcp_load_fix_subnet(self): + ALGORITHM_CFG_SUPERNET = copy.deepcopy(ALGORITHM_CFG) + ALGORITHM_CFG_SUPERNET['fix_subnet'] = \ + 'configs/pruning/mmcls/dmcp/DMCP_R50_2G.yaml' + _ = MODELS.build(ALGORITHM_CFG_SUPERNET) + + def test_dmcp_compute_flops_loss(self): + dmcp = MODELS.build(ALGORITHM_CFG) + for type in ['l2', 'inverted_log_l1', 'log_l1', 'l1']: + dmcp.flops_loss_type = type + fake_flops = torch.tensor(100) + dmcp._compute_flops_loss(expected_flops=fake_flops) + + +class TestDMCPDDP(TestDMCP): + + @classmethod + def setUpClass(cls) -> None: + os.environ['MASTER_ADDR'] = 'localhost' + os.environ['MASTER_PORT'] = '12345' + + # initialize the process group + backend = 'nccl' if torch.cuda.is_available() else 'gloo' + dist.init_process_group(backend, rank=0, world_size=1) + + def prepare_model(self, device_ids=None) -> DMCPDDP: + self.device = 'cuda' if torch.cuda.is_available() else 'cpu' + + dmcp_algo = MODELS.build(ALGORITHM_CFG).to(self.device) + self.assertIsInstance(dmcp_algo, DMCP) + + return DMCPDDP( + module=dmcp_algo, + find_unused_parameters=True, + device_ids=device_ids) + + @classmethod + def tearDownClass(cls) -> None: + dist.destroy_process_group() + + @pytest.mark.skipif( + not torch.cuda.is_available(), reason='cuda device is not avaliable') + def test_init(self) -> None: + ddp_model = self.prepare_model() + self.assertIsInstance(ddp_model, DMCPDDP) + + def test_dmcpddp_train_step(self) -> None: + ddp_model = self.prepare_model() + data = self._prepare_fake_data() + optim_wrapper_dict = OptimWrapperDict( + architecture=OptimWrapper(SGD(ddp_model.parameters(), lr=0.1)), + mutator=OptimWrapper(SGD(ddp_model.parameters(), lr=0.01))) + + message_hub = MessageHub.get_current_instance() + + message_hub.update_info('iter', 20) + ddp_model.module.cur_sample_prob = -1 + loss = ddp_model.train_step(data, optim_wrapper_dict) + + message_hub.update_info('iter', 0) + ddp_model.module.arch_train = False + loss = ddp_model.train_step(data, optim_wrapper_dict) + + self.assertIsNotNone(loss) From c1ce2ac5db7a23c6255740ae11636049cb08b01c Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Wed, 1 Feb 2023 15:26:17 +0800 Subject: [PATCH 21/59] fix arch YAMLs --- configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.yaml | 10 +++++----- configs/pruning/mmcls/dmcp/DMCP_R50_2G.yaml | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.yaml b/configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.yaml index 10afcf74c..41a8be9ec 100644 --- a/configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.yaml +++ b/configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.yaml @@ -1,8 +1,8 @@ -{0: 9, -1: 10, -2: 36, 3: 16, 4: 16, -5: 48, 6: 21, 7: 41, 8: 22, -9: 60, 10: 24, 11: 44, 12: 272, 13: 272, +{0: 9, +1: 10, +2: 36, 3: 16, 4: 16, +5: 48, 6: 21, 7: 41, 8: 22, +9: 60, 10: 24, 11: 44, 12: 272, 13: 272, 14: 310, 15: 36, 16: 294, 17: 351, 18: 693, 19: 80, 20: 96, 21: 864, 22: 1440, 23: 192, diff --git a/configs/pruning/mmcls/dmcp/DMCP_R50_2G.yaml b/configs/pruning/mmcls/dmcp/DMCP_R50_2G.yaml index 6bd0bec96..3af6c995c 100644 --- a/configs/pruning/mmcls/dmcp/DMCP_R50_2G.yaml +++ b/configs/pruning/mmcls/dmcp/DMCP_R50_2G.yaml @@ -1,5 +1,5 @@ -{0: 52, +{0: 52, 1: 22, 2: 22, 3: 106, 4: 16, 5: 16, 6: 40, 7: 16, -8: 68, 9: 56, 10: 155, 11: 32, 12: 68, 13: 56, 14: 56, 15: 80, 16: 92, +8: 68, 9: 56, 10: 155, 11: 32, 12: 68, 13: 56, 14: 56, 15: 80, 16: 92, 17: 256, 18: 256, 19: 1024, 20: 106, 21: 106, 22: 131, 23: 256, 24: 131, 25: 256, 26: 256, 27: 256, 28: 256, 29: 256, 30: 512, 31: 512, 32: 2048, 33: 512, 34: 461, 35: 512, 36: 512} From 0cba028d14cfe22f17152ec2afada04589b17e30 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Wed, 1 Feb 2023 16:22:44 +0800 Subject: [PATCH 22/59] fix yapf --- configs/_base_/settings/imagenet_bs2048_dmcp.py | 4 +--- configs/pruning/mmcls/dmcp/dmcp_mbv2_subnet_32xb64.py | 10 ++-------- .../pruning/mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py | 10 ++-------- 3 files changed, 5 insertions(+), 19 deletions(-) diff --git a/configs/_base_/settings/imagenet_bs2048_dmcp.py b/configs/_base_/settings/imagenet_bs2048_dmcp.py index 161137392..f6ae09579 100644 --- a/configs/_base_/settings/imagenet_bs2048_dmcp.py +++ b/configs/_base_/settings/imagenet_bs2048_dmcp.py @@ -23,9 +23,7 @@ ] # optimizer setting -paramwise_cfg = dict( - norm_decay_mult=0.0, - bias_decay_mult=0.0) +paramwise_cfg = dict(norm_decay_mult=0.0, bias_decay_mult=0.0) optim_wrapper = dict( _delete_=True, diff --git a/configs/pruning/mmcls/dmcp/dmcp_mbv2_subnet_32xb64.py b/configs/pruning/mmcls/dmcp/dmcp_mbv2_subnet_32xb64.py index 5bb840532..e97eab2a8 100644 --- a/configs/pruning/mmcls/dmcp/dmcp_mbv2_subnet_32xb64.py +++ b/configs/pruning/mmcls/dmcp/dmcp_mbv2_subnet_32xb64.py @@ -1,16 +1,10 @@ _base_ = ['dmcp_mbv2_supernet_32xb64.py'] -paramwise_cfg = dict( - norm_decay_mult=0.0, - bias_decay_mult=0.0) +paramwise_cfg = dict(norm_decay_mult=0.0, bias_decay_mult=0.0) _base_.optim_wrapper = dict( optimizer=dict( - type='SGD', - lr=0.8, - momentum=0.9, - weight_decay=0.00004, - nesterov=True), + type='SGD', lr=0.8, momentum=0.9, weight_decay=0.00004, nesterov=True), paramwise_cfg=paramwise_cfg) max_epochs = 250 diff --git a/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py b/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py index 2e6e10a50..aa2f12063 100644 --- a/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py +++ b/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py @@ -1,16 +1,10 @@ _base_ = ['dmcp_resnet50_supernet_32xb64.py'] -paramwise_cfg = dict( - norm_decay_mult=0.0, - bias_decay_mult=0.0) +paramwise_cfg = dict(norm_decay_mult=0.0, bias_decay_mult=0.0) _base_.optim_wrapper = dict( optimizer=dict( - type='SGD', - lr=0.8, - momentum=0.9, - weight_decay=0.0001, - nesterov=True), + type='SGD', lr=0.8, momentum=0.9, weight_decay=0.0001, nesterov=True), paramwise_cfg=paramwise_cfg) max_epochs = 250 From a9299cb5190cf6b47fd0ab5e7bf4d6d18bd0cd30 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Wed, 1 Feb 2023 16:31:36 +0800 Subject: [PATCH 23/59] revise mmcv version<=2.0.0rc3 --- .circleci/test.yml | 2 +- .github/workflows/build.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/test.yml b/.circleci/test.yml index 25140a879..aba5fa469 100644 --- a/.circleci/test.yml +++ b/.circleci/test.yml @@ -116,7 +116,7 @@ jobs: command: | docker exec mmrazor pip install -e /mmengine docker exec mmrazor pip install -U openmim - docker exec mmrazor mim install 'mmcv >= 2.0.0rc1' + docker exec mmrazor mim install 'mmcv <= 2.0.0rc3' docker exec mmrazor pip install -e /mmdetection docker exec mmrazor pip install -e /mmclassification docker exec mmrazor pip install -e /mmsegmentation diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e00ed24c8..e4edde3de 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -101,7 +101,7 @@ jobs: - name: Install MMCV run: | pip install -U openmim - mim install 'mmcv >= 2.0.0rc1' + mim install 'mmcv <= 2.0.0rc3' - name: Install MMCls run: pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x - name: Install MMDet From 4bca1f7bb1ced5c093e7cd60f26b577e4ff2922d Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Wed, 1 Feb 2023 16:50:53 +0800 Subject: [PATCH 24/59] fix build.yaml --- .circleci/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/test.yml b/.circleci/test.yml index aba5fa469..c6df453eb 100644 --- a/.circleci/test.yml +++ b/.circleci/test.yml @@ -66,7 +66,7 @@ jobs: command: | pip install git+https://github.com/open-mmlab/mmengine.git@main pip install -U openmim - mim install 'mmcv >= 2.0.0rc1' + mim install 'mmcv <= 2.0.0rc3' pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x pip install git+https://github.com/open-mmlab/mmsegmentation.git@dev-1.x From 117e1e6051753552681d9bfe599042343ff17688 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Wed, 1 Feb 2023 18:05:55 +0800 Subject: [PATCH 25/59] Rollback mmdet to v3.0.0rc5 --- .circleci/test.yml | 2 +- .github/workflows/build.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/test.yml b/.circleci/test.yml index c6df453eb..379b4f304 100644 --- a/.circleci/test.yml +++ b/.circleci/test.yml @@ -103,7 +103,7 @@ jobs: name: Clone Repos command: | git clone -b main --depth 1 https://github.com/open-mmlab/mmengine.git /home/circleci/mmengine - git clone -b dev-3.x --depth 1 https://github.com/open-mmlab/mmdetection.git /home/circleci/mmdetection + git clone -b v3.0.0rc5 --depth 1 https://github.com/open-mmlab/mmdetection.git /home/circleci/mmdetection git clone -b dev-1.x --depth 1 https://github.com/open-mmlab/mmclassification.git /home/circleci/mmclassification git clone -b dev-1.x --depth 1 https://github.com/open-mmlab/mmsegmentation.git /home/circleci/mmsegmentation - run: diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e4edde3de..b792f5216 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -105,7 +105,7 @@ jobs: - name: Install MMCls run: pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x - name: Install MMDet - run: pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x + run: pip install git+https://github.com/open-mmlab/mmdetection.git@v3.0.0rc5 - name: Install MMSeg run: pip install git+https://github.com/open-mmlab/mmsegmentation.git@dev-1.x - name: Install other dependencies From 7051a481ebf5795f76737d1b158a65e5ce1d7afd Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Wed, 1 Feb 2023 18:15:22 +0800 Subject: [PATCH 26/59] Rollback mmdet to v3.0.0rc5 --- .circleci/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/test.yml b/.circleci/test.yml index 379b4f304..2e185c7c7 100644 --- a/.circleci/test.yml +++ b/.circleci/test.yml @@ -68,7 +68,7 @@ jobs: pip install -U openmim mim install 'mmcv <= 2.0.0rc3' pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x - pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x + pip install git+https://github.com/open-mmlab/mmdetection.git@v3.0.0rc5 pip install git+https://github.com/open-mmlab/mmsegmentation.git@dev-1.x python -m pip install git+ssh://git@github.com/open-mmlab/mmpose.git@dev-1.x pip install -r requirements.txt From dd00ce7c47d957ed0d1fea4a6c36fc368d508c9e Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Wed, 1 Feb 2023 18:28:47 +0800 Subject: [PATCH 27/59] Rollback mmseg to v1.0.0rc4 --- .circleci/test.yml | 4 ++-- .github/workflows/build.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.circleci/test.yml b/.circleci/test.yml index 2e185c7c7..fb4068739 100644 --- a/.circleci/test.yml +++ b/.circleci/test.yml @@ -69,7 +69,7 @@ jobs: mim install 'mmcv <= 2.0.0rc3' pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x pip install git+https://github.com/open-mmlab/mmdetection.git@v3.0.0rc5 - pip install git+https://github.com/open-mmlab/mmsegmentation.git@dev-1.x + pip install git+https://github.com/open-mmlab/mmsegmentation.git@v1.0.0rc4 python -m pip install git+ssh://git@github.com/open-mmlab/mmpose.git@dev-1.x pip install -r requirements.txt - run: @@ -105,7 +105,7 @@ jobs: git clone -b main --depth 1 https://github.com/open-mmlab/mmengine.git /home/circleci/mmengine git clone -b v3.0.0rc5 --depth 1 https://github.com/open-mmlab/mmdetection.git /home/circleci/mmdetection git clone -b dev-1.x --depth 1 https://github.com/open-mmlab/mmclassification.git /home/circleci/mmclassification - git clone -b dev-1.x --depth 1 https://github.com/open-mmlab/mmsegmentation.git /home/circleci/mmsegmentation + git clone -b v1.0.0rc4 --depth 1 https://github.com/open-mmlab/mmsegmentation.git /home/circleci/mmsegmentation - run: name: Build Docker image command: | diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b792f5216..8fe20be47 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -107,7 +107,7 @@ jobs: - name: Install MMDet run: pip install git+https://github.com/open-mmlab/mmdetection.git@v3.0.0rc5 - name: Install MMSeg - run: pip install git+https://github.com/open-mmlab/mmsegmentation.git@dev-1.x + run: pip install git+https://github.com/open-mmlab/mmsegmentation.git@v1.0.0rc4 - name: Install other dependencies run: pip install -r requirements.txt - name: Build and install From 4c7023b58ed119f068f240f017df6d80f2ee2e35 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Thu, 2 Feb 2023 15:14:32 +0800 Subject: [PATCH 28/59] remove search_groups in mutator --- .../_base_/settings/cifar10_darts_supernet.py | 30 +- .../autoformer_supernet_32xb256_in1k.py | 12 +- ...ttentive_mobilenet_supernet_32xb64_in1k.py | 12 +- .../darts_supernet_unroll_1xb96_cifar10.py | 20 +- .../mmcls/dsnas/dsnas_supernet_8xb128_in1k.py | 2 +- .../ofa_mobilenet_supernet_32xb64_in1k.py | 12 +- .../spos_mobilenet_supernet_8xb128_in1k.py | 2 +- .../spos_shufflenet_supernet_8xb128_in1k.py | 2 +- ...etnas_frcnn_shufflenet_supernet_coco_1x.py | 2 +- ...tnas_retina_shufflenet_supernet_coco_1x.py | 2 +- .../mmcls/dcff/dcff_resnet_8xb32_in1k.py | 1 - .../dcff_faster_rcnn_resnet50_8xb4_coco.py | 2 +- mmrazor/engine/hooks/dump_subnet_hook.py | 23 +- .../engine/hooks/estimate_resources_hook.py | 8 +- .../runner/autoslim_greedy_search_loop.py | 30 +- .../engine/runner/evolution_search_loop.py | 20 +- mmrazor/engine/runner/iteprune_val_loop.py | 8 +- mmrazor/engine/runner/subnet_sampler_loop.py | 4 +- mmrazor/engine/runner/subnet_val_loop.py | 3 +- mmrazor/engine/runner/utils/check.py | 4 +- mmrazor/models/algorithms/nas/autoformer.py | 68 ++--- mmrazor/models/algorithms/nas/autoslim.py | 92 +++---- mmrazor/models/algorithms/nas/bignas.py | 133 +++------ mmrazor/models/algorithms/nas/darts.py | 55 ++-- mmrazor/models/algorithms/nas/dsnas.py | 105 ++++--- mmrazor/models/algorithms/nas/spos.py | 64 ++--- mmrazor/models/algorithms/pruning/dcff.py | 62 +++-- .../algorithms/pruning/ite_prune_algorithm.py | 55 ++-- .../algorithms/pruning/slimmable_network.py | 2 +- .../dynamic_ops/mixins/dynamic_mixins.py | 2 +- .../architectures/utils/mutable_register.py | 8 +- .../mutable_channel/units/channel_unit.py | 3 +- .../units/mutable_channel_unit.py | 5 + .../mutables/mutable_module/mutable_module.py | 17 ++ .../mutables/mutable_value/mutable_value.py | 5 + mmrazor/models/mutators/__init__.py | 10 +- mmrazor/models/mutators/base_mutator.py | 8 +- .../channel_mutator/channel_mutator.py | 65 ++--- .../channel_mutator/dmcp_channel_mutator.py | 28 +- .../one_shot_channel_mutator.py | 49 +++- .../slimmable_channel_mutator.py | 3 +- mmrazor/models/mutators/group_mixin.py | 162 ++++------- mmrazor/models/mutators/nas_mutator.py | 260 ++++++++++++++++++ mmrazor/structures/subnet/__init__.py | 6 +- mmrazor/structures/subnet/fix_subnet.py | 68 +++-- tests/data/models.py | 22 +- .../test_algorithms/test_autoformer.py | 52 +--- .../test_algorithms/test_autoslim.py | 48 ++-- .../test_algorithms/test_bignas.py | 47 +--- .../test_models/test_algorithms/test_darts.py | 32 ++- .../test_algorithms/test_dcff_network.py | 41 +-- .../test_models/test_algorithms/test_dsnas.py | 20 +- .../test_algorithms/test_prune_algorithm.py | 34 +-- .../test_algorithms/test_slimmable_network.py | 24 +- .../test_models/test_algorithms/test_spos.py | 14 +- .../test_backbones/test_dartsbackbone.py | 6 +- .../test_l1_mutable_channel_unit.py | 1 - .../test_one_shot_mutable_channel_unit.py | 2 - .../test_mutators/test_channel_mutator.py | 41 +-- .../test_mutators/test_dcff_mutator.py | 5 - .../test_mutators/test_nas_mutator.py | 196 +++++++++++++ tests/test_registry/test_registry.py | 20 +- .../test_runners/test_subnet_sampler_loop.py | 2 +- 63 files changed, 1141 insertions(+), 1000 deletions(-) create mode 100644 mmrazor/models/mutators/nas_mutator.py create mode 100644 tests/test_models/test_mutators/test_nas_mutator.py diff --git a/configs/_base_/settings/cifar10_darts_supernet.py b/configs/_base_/settings/cifar10_darts_supernet.py index 128598b5c..66fb75fe4 100644 --- a/configs/_base_/settings/cifar10_darts_supernet.py +++ b/configs/_base_/settings/cifar10_darts_supernet.py @@ -48,36 +48,26 @@ # optimizer optim_wrapper = dict( + constructor='mmrazor.SeparateOptimWrapperConstructor', architecture=dict( - type='mmcls.SGD', lr=0.025, momentum=0.9, weight_decay=3e-4), - mutator=dict(type='mmcls.Adam', lr=3e-4, weight_decay=1e-3), - clip_grad=dict(max_norm=5, norm_type=2)) + optimizer=dict( + type='mmcls.SGD', lr=0.025, momentum=0.9, weight_decay=3e-4), + clip_grad=dict(max_norm=5, norm_type=2)), + mutator=dict( + optimizer=dict(type='mmcls.Adam', lr=3e-4, weight_decay=1e-3))) +search_epochs = 50 # leanring policy -# TODO support different optim use different scheduler (wait mmengine) param_scheduler = [ dict( type='mmcls.CosineAnnealingLR', - T_max=50, + T_max=search_epochs, eta_min=1e-3, begin=0, - end=50), + end=search_epochs), ] -# param_scheduler = dict( -# architecture = dict( -# type='mmcls.CosineAnnealingLR', -# T_max=50, -# eta_min=1e-3, -# begin=0, -# end=50), -# mutator = dict( -# type='mmcls.ConstantLR', -# factor=1, -# begin=0, -# end=50)) # train, val, test setting -# TODO split cifar dataset train_cfg = dict( type='mmrazor.DartsEpochBasedTrainLoop', mutator_dataloader=dict( @@ -92,7 +82,7 @@ sampler=dict(type='mmcls.DefaultSampler', shuffle=True), persistent_workers=True, ), - max_epochs=50) + max_epochs=search_epochs) val_cfg = dict() # validate each epoch test_cfg = dict() # dataset settings diff --git a/configs/nas/mmcls/autoformer/autoformer_supernet_32xb256_in1k.py b/configs/nas/mmcls/autoformer/autoformer_supernet_32xb256_in1k.py index 24639a545..b563e0093 100644 --- a/configs/nas/mmcls/autoformer/autoformer_supernet_32xb256_in1k.py +++ b/configs/nas/mmcls/autoformer/autoformer_supernet_32xb256_in1k.py @@ -53,17 +53,7 @@ type='mmrazor.Autoformer', architecture=supernet, fix_subnet=None, - mutators=dict( - channel_mutator=dict( - type='mmrazor.OneShotChannelMutator', - channel_unit_cfg={ - 'type': 'OneShotMutableChannelUnit', - 'default_args': { - 'unit_predefined': True - } - }, - parse_cfg={'type': 'Predefined'}), - value_mutator=dict(type='mmrazor.DynamicValueMutator'))) + mutator=dict(type='mmrazor.NasMutator')) # runtime setting custom_hooks = [dict(type='EMAHook', momentum=4e-5, priority='ABOVE_NORMAL')] diff --git a/configs/nas/mmcls/bignas/attentive_mobilenet_supernet_32xb64_in1k.py b/configs/nas/mmcls/bignas/attentive_mobilenet_supernet_32xb64_in1k.py index 15a11ed99..3b44dc36f 100644 --- a/configs/nas/mmcls/bignas/attentive_mobilenet_supernet_32xb64_in1k.py +++ b/configs/nas/mmcls/bignas/attentive_mobilenet_supernet_32xb64_in1k.py @@ -44,17 +44,7 @@ loss_kl=dict( preds_S=dict(recorder='fc', from_student=True), preds_T=dict(recorder='fc', from_student=False)))), - mutators=dict( - channel_mutator=dict( - type='mmrazor.OneShotChannelMutator', - channel_unit_cfg={ - 'type': 'OneShotMutableChannelUnit', - 'default_args': { - 'unit_predefined': True - } - }, - parse_cfg={'type': 'Predefined'}), - value_mutator=dict(type='DynamicValueMutator'))) + mutators=dict(type='mmrazor.NasMutator')) model_wrapper_cfg = dict( type='mmrazor.BigNASDDP', diff --git a/configs/nas/mmcls/darts/darts_supernet_unroll_1xb96_cifar10.py b/configs/nas/mmcls/darts/darts_supernet_unroll_1xb96_cifar10.py index f8f098570..bcbd2dfe0 100644 --- a/configs/nas/mmcls/darts/darts_supernet_unroll_1xb96_cifar10.py +++ b/configs/nas/mmcls/darts/darts_supernet_unroll_1xb96_cifar10.py @@ -4,9 +4,11 @@ 'mmcls::_base_/default_runtime.py', ] -# model -mutator = dict(type='mmrazor.DiffModuleMutator') +custom_hooks = [ + dict(type='mmrazor.DumpSubnetHook', interval=10, by_epoch=True) +] +# model model = dict( type='mmrazor.Darts', architecture=dict( @@ -20,7 +22,7 @@ loss=dict(type='CrossEntropyLoss', loss_weight=1.0), topk=(1, 5), cal_acc=True)), - mutator=dict(type='mmrazor.DiffModuleMutator'), + mutator=dict(type='mmrazor.NasMutator'), unroll=True) model_wrapper_cfg = dict( @@ -28,16 +30,4 @@ broadcast_buffers=False, find_unused_parameters=False) -# TRAINING -optim_wrapper = dict( - _delete_=True, - constructor='mmrazor.SeparateOptimWrapperConstructor', - architecture=dict( - type='OptimWrapper', - optimizer=dict(type='SGD', lr=0.025, momentum=0.9, weight_decay=3e-4), - clip_grad=dict(max_norm=5, norm_type=2)), - mutator=dict( - type='OptimWrapper', - optimizer=dict(type='Adam', lr=3e-4, weight_decay=1e-3))) - find_unused_parameter = False diff --git a/configs/nas/mmcls/dsnas/dsnas_supernet_8xb128_in1k.py b/configs/nas/mmcls/dsnas/dsnas_supernet_8xb128_in1k.py index 50d11dee2..b341edfd9 100644 --- a/configs/nas/mmcls/dsnas/dsnas_supernet_8xb128_in1k.py +++ b/configs/nas/mmcls/dsnas/dsnas_supernet_8xb128_in1k.py @@ -23,7 +23,7 @@ mode='original', loss_weight=1.0), topk=(1, 5))), - mutator=dict(type='mmrazor.DiffModuleMutator'), + mutator=dict(type='mmrazor.NasMutator'), pretrain_epochs=15, finetune_epochs=_base_.search_epochs, ) diff --git a/configs/nas/mmcls/onceforall/ofa_mobilenet_supernet_32xb64_in1k.py b/configs/nas/mmcls/onceforall/ofa_mobilenet_supernet_32xb64_in1k.py index 8a58e6171..c2e0f05ab 100644 --- a/configs/nas/mmcls/onceforall/ofa_mobilenet_supernet_32xb64_in1k.py +++ b/configs/nas/mmcls/onceforall/ofa_mobilenet_supernet_32xb64_in1k.py @@ -43,17 +43,7 @@ loss_kl=dict( preds_S=dict(recorder='fc', from_student=True), preds_T=dict(recorder='fc', from_student=False)))), - mutators=dict( - channel_mutator=dict( - type='mmrazor.OneShotChannelMutator', - channel_unit_cfg={ - 'type': 'OneShotMutableChannelUnit', - 'default_args': { - 'unit_predefined': True - } - }, - parse_cfg={'type': 'Predefined'}), - value_mutator=dict(type='DynamicValueMutator'))) + mutators=dict(type='mmrazor.NasMutator')) model_wrapper_cfg = dict( type='mmrazor.BigNASDDP', diff --git a/configs/nas/mmcls/spos/spos_mobilenet_supernet_8xb128_in1k.py b/configs/nas/mmcls/spos/spos_mobilenet_supernet_8xb128_in1k.py index c6c8a4973..3d47d8f7f 100644 --- a/configs/nas/mmcls/spos/spos_mobilenet_supernet_8xb128_in1k.py +++ b/configs/nas/mmcls/spos/spos_mobilenet_supernet_8xb128_in1k.py @@ -25,6 +25,6 @@ model = dict( type='mmrazor.SPOS', architecture=supernet, - mutator=dict(type='mmrazor.OneShotModuleMutator')) + mutator=dict(type='mmrazor.NasMutator')) find_unused_parameters = True diff --git a/configs/nas/mmcls/spos/spos_shufflenet_supernet_8xb128_in1k.py b/configs/nas/mmcls/spos/spos_shufflenet_supernet_8xb128_in1k.py index 1991f6abb..a5d6ce726 100644 --- a/configs/nas/mmcls/spos/spos_shufflenet_supernet_8xb128_in1k.py +++ b/configs/nas/mmcls/spos/spos_shufflenet_supernet_8xb128_in1k.py @@ -25,6 +25,6 @@ model = dict( type='mmrazor.SPOS', architecture=supernet, - mutator=dict(type='mmrazor.OneShotModuleMutator')) + mutator=dict(type='mmrazor.NasMutator')) find_unused_parameters = True diff --git a/configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_supernet_coco_1x.py b/configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_supernet_coco_1x.py index add7c5feb..b2b2711f6 100644 --- a/configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_supernet_coco_1x.py +++ b/configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_supernet_coco_1x.py @@ -25,6 +25,6 @@ _delete_=True, type='mmrazor.SPOS', architecture=supernet, - mutator=dict(type='mmrazor.OneShotModuleMutator')) + mutator=dict(type='mmrazor.NasMutator')) find_unused_parameters = True diff --git a/configs/nas/mmdet/detnas/detnas_retina_shufflenet_supernet_coco_1x.py b/configs/nas/mmdet/detnas/detnas_retina_shufflenet_supernet_coco_1x.py index 3186e0f5c..21c37f51e 100644 --- a/configs/nas/mmdet/detnas/detnas_retina_shufflenet_supernet_coco_1x.py +++ b/configs/nas/mmdet/detnas/detnas_retina_shufflenet_supernet_coco_1x.py @@ -22,6 +22,6 @@ _delete_=True, type='mmrazor.SPOS', architecture=supernet, - mutator=dict(type='mmrazor.OneShotModuleMutator')) + mutator=dict(type='mmrazor.NasMutator')) find_unused_parameters = True diff --git a/configs/pruning/mmcls/dcff/dcff_resnet_8xb32_in1k.py b/configs/pruning/mmcls/dcff/dcff_resnet_8xb32_in1k.py index 360645a6a..f833cb562 100644 --- a/configs/pruning/mmcls/dcff/dcff_resnet_8xb32_in1k.py +++ b/configs/pruning/mmcls/dcff/dcff_resnet_8xb32_in1k.py @@ -76,7 +76,6 @@ type='ChannelAnalyzer', demo_input=(1, 3, 224, 224), tracer_type='BackwardTracer')), - fix_subnet=None, data_preprocessor=None, target_pruning_ratio=target_pruning_ratio, step_freq=1, diff --git a/configs/pruning/mmdet/dcff/dcff_faster_rcnn_resnet50_8xb4_coco.py b/configs/pruning/mmdet/dcff/dcff_faster_rcnn_resnet50_8xb4_coco.py index b6051c649..5d51677b8 100644 --- a/configs/pruning/mmdet/dcff/dcff_faster_rcnn_resnet50_8xb4_coco.py +++ b/configs/pruning/mmdet/dcff/dcff_faster_rcnn_resnet50_8xb4_coco.py @@ -76,7 +76,7 @@ parse_cfg=dict( type='ChannelAnalyzer', demo_input=(1, 3, 224, 224), - tracer_type='BackwardTracer')), + tracer_type='FxTracer')), target_pruning_ratio=target_pruning_ratio, step_freq=1, linear_schedule=False) diff --git a/mmrazor/engine/hooks/dump_subnet_hook.py b/mmrazor/engine/hooks/dump_subnet_hook.py index 4ea0a6a3e..9234ba79c 100644 --- a/mmrazor/engine/hooks/dump_subnet_hook.py +++ b/mmrazor/engine/hooks/dump_subnet_hook.py @@ -1,4 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. +import copy import os.path as osp from pathlib import Path from typing import Optional, Sequence, Union @@ -8,6 +9,9 @@ from mmengine.hooks import Hook from mmengine.registry import HOOKS +from mmrazor.models.mutables.base_mutable import BaseMutable +from mmrazor.structures import convert_fix_subnet, export_fix_subnet + DATA_BATCH = Optional[Sequence[dict]] @@ -103,16 +107,25 @@ def after_train_epoch(self, runner) -> None: @master_only def _save_subnet(self, runner) -> None: - """Save the current subnet and delete outdated subnet. + """Save the current best subnet. Args: runner (Runner): The runner of the training process. """ + model = runner.model.module if runner.distributed else runner.model - if runner.distributed: - subnet_dict = runner.model.module.search_subnet() - else: - subnet_dict = runner.model.search_subnet() + # delete non-leaf tensor to get deepcopy(model). + # TODO solve the hard case. + for module in model.architecture.modules(): + if isinstance(module, BaseMutable): + if hasattr(module, 'arch_weights'): + delattr(module, 'arch_weights') + + copied_model = copy.deepcopy(model) + copied_model.mutator.set_choices(copied_model.sample_choices()) + + subnet_dict = export_fix_subnet(copied_model)[0] + subnet_dict = convert_fix_subnet(subnet_dict) if self.by_epoch: subnet_filename = self.args.get( diff --git a/mmrazor/engine/hooks/estimate_resources_hook.py b/mmrazor/engine/hooks/estimate_resources_hook.py index e8c4d8446..28f381a3c 100644 --- a/mmrazor/engine/hooks/estimate_resources_hook.py +++ b/mmrazor/engine/hooks/estimate_resources_hook.py @@ -104,7 +104,7 @@ def export_subnet(self, model) -> torch.nn.Module: """ # Avoid circular import from mmrazor.models.mutables.base_mutable import BaseMutable - from mmrazor.structures import load_fix_subnet + from mmrazor.structures import export_fix_subnet, load_fix_subnet # delete non-leaf tensor to get deepcopy(model). # TODO solve the hard case. @@ -114,7 +114,9 @@ def export_subnet(self, model) -> torch.nn.Module: delattr(module, 'arch_weights') copied_model = copy.deepcopy(model) - fix_mutable = copied_model.search_subnet() - load_fix_subnet(copied_model, fix_mutable) + copied_model.mutator.set_choices(copied_model.mutator.sample_choices()) + + subnet_dict = export_fix_subnet(copied_model)[0] + load_fix_subnet(copied_model, subnet_dict) return copied_model diff --git a/mmrazor/engine/runner/autoslim_greedy_search_loop.py b/mmrazor/engine/runner/autoslim_greedy_search_loop.py index 6383cc2b3..cf9752ce0 100644 --- a/mmrazor/engine/runner/autoslim_greedy_search_loop.py +++ b/mmrazor/engine/runner/autoslim_greedy_search_loop.py @@ -11,7 +11,7 @@ from torch.utils.data import DataLoader from mmrazor.registry import LOOPS, TASK_UTILS -from mmrazor.structures import export_fix_subnet +from mmrazor.structures import convert_fix_subnet, export_fix_subnet from .utils import check_subnet_resources @@ -68,14 +68,15 @@ def __init__(self, self.model = runner.model assert hasattr(self.model, 'mutator') - search_groups = self.model.mutator.search_groups + units = self.model.mutator.mutable_units + self.candidate_choices = {} - for group_id, modules in search_groups.items(): - self.candidate_choices[group_id] = modules[0].candidate_choices + for unit in units: + self.candidate_choices[unit.alias] = unit.candidate_choices self.max_subnet = {} - for group_id, candidate_choices in self.candidate_choices.items(): - self.max_subnet[group_id] = len(candidate_choices) + for name, candidate_choices in self.candidate_choices.items(): + self.max_subnet[name] = len(candidate_choices) self.current_subnet = self.max_subnet current_subnet_choices = self._channel_bins2choices( @@ -117,7 +118,7 @@ def run(self) -> None: pruned_subnet[unit_name] -= 1 pruned_subnet_choices = self._channel_bins2choices( pruned_subnet) - self.model.set_subnet(pruned_subnet_choices) + self.model.mutator.set_choices(pruned_subnet_choices) metrics = self._val_subnet() score = metrics[self.score_key] \ if len(metrics) != 0 else 0. @@ -195,27 +196,16 @@ def _save_searcher_ckpt(self) -> None: def _save_searched_subnet(self): """Save the final searched subnet dict.""" - - def _convert_fix_subnet(fixed_subnet: Dict[str, Any]): - from mmrazor.utils.typing import DumpChosen - - converted_fix_subnet = dict() - for key, val in fixed_subnet.items(): - assert isinstance(val, DumpChosen) - converted_fix_subnet[key] = dict(val._asdict()) - - return converted_fix_subnet - if self.runner.rank != 0: return self.runner.logger.info('Search finished:') for subnet, flops in zip(self.searched_subnet, self.searched_subnet_flops): subnet_choice = self._channel_bins2choices(subnet) - self.model.set_subnet(subnet_choice) + self.model.mutator.set_choices(subnet_choice) fixed_subnet, _ = export_fix_subnet(self.model) save_name = 'FLOPS_{:.2f}M.yaml'.format(flops) - fixed_subnet = _convert_fix_subnet(fixed_subnet) + fixed_subnet = convert_fix_subnet(fixed_subnet) fileio.dump(fixed_subnet, osp.join(self.runner.work_dir, save_name)) self.runner.logger.info( diff --git a/mmrazor/engine/runner/evolution_search_loop.py b/mmrazor/engine/runner/evolution_search_loop.py index d35f62a86..c1a73d4c3 100644 --- a/mmrazor/engine/runner/evolution_search_loop.py +++ b/mmrazor/engine/runner/evolution_search_loop.py @@ -16,7 +16,8 @@ from torch.utils.data import DataLoader from mmrazor.registry import LOOPS, TASK_UTILS -from mmrazor.structures import Candidates, export_fix_subnet +from mmrazor.structures import (Candidates, convert_fix_subnet, + export_fix_subnet) from mmrazor.utils import SupportRandomSubnet from .utils import CalibrateBNMixin, check_subnet_resources, crossover @@ -220,7 +221,7 @@ def update_candidates_scores(self) -> None: """Validate candicate one by one from the candicate pool, and update top-k candicates.""" for i, candidate in enumerate(self.candidates.subnets): - self.model.set_subnet(candidate) + self.model.mutator.set_choices(candidate) metrics = self._val_candidate(use_predictor=self.use_predictor) score = round(metrics[self.score_key], 2) \ if len(metrics) != 0 else 0. @@ -311,7 +312,7 @@ def _save_best_fix_subnet(self): """Save best subnet in searched top-k candidates.""" if self.runner.rank == 0: best_random_subnet = self.top_k_candidates.subnets[0] - self.model.set_subnet(best_random_subnet) + self.model.mutator.set_choices(best_random_subnet) best_fix_subnet, sliced_model = \ export_fix_subnet(self.model, slice_weight=True) @@ -327,7 +328,7 @@ def _save_best_fix_subnet(self): f'{self.runner.work_dir}') save_name = 'best_fix_subnet.yaml' - best_fix_subnet = self._convert_fix_subnet(best_fix_subnet) + best_fix_subnet = convert_fix_subnet(best_fix_subnet) fileio.dump(best_fix_subnet, osp.join(self.runner.work_dir, save_name)) self.runner.logger.info( @@ -335,17 +336,6 @@ def _save_best_fix_subnet(self): self.runner.logger.info('Search finished.') - def _convert_fix_subnet(self, fix_subnet: Dict[str, Any]): - """Convert the fixed subnet to avoid python typing error.""" - from mmrazor.utils.typing import DumpChosen - - converted_fix_subnet = dict() - for k, v in fix_subnet.items(): - assert isinstance(v, DumpChosen) - converted_fix_subnet[k] = dict(chosen=v.chosen) - - return converted_fix_subnet - @torch.no_grad() def _val_candidate(self, use_predictor: bool = False) -> Dict: """Run validation. diff --git a/mmrazor/engine/runner/iteprune_val_loop.py b/mmrazor/engine/runner/iteprune_val_loop.py index 07d40c884..bbca5d53a 100644 --- a/mmrazor/engine/runner/iteprune_val_loop.py +++ b/mmrazor/engine/runner/iteprune_val_loop.py @@ -39,16 +39,20 @@ def run(self): def _save_fix_subnet(self): """Save model subnet config.""" - # TO DO: Modify export_fix_subnet's output. Might contain weight return + model = self.runner.model.module \ + if self.runner.distributed else self.runner.model + fix_subnet, static_model = export_fix_subnet( - self.model, export_subnet_mode='mutator', slice_weight=True) + model, export_subnet_mode='mutator', slice_weight=True) fix_subnet = json.dumps(fix_subnet, indent=4, separators=(',', ':')) + subnet_name = 'fix_subnet.json' weight_name = 'fix_subnet_weight.pth' with open(osp.join(self.runner.work_dir, subnet_name), 'w') as file: file.write(fix_subnet) torch.save({'state_dict': static_model.state_dict()}, osp.join(self.runner.work_dir, weight_name)) + self.runner.logger.info( 'export finished and ' f'{subnet_name}, ' diff --git a/mmrazor/engine/runner/subnet_sampler_loop.py b/mmrazor/engine/runner/subnet_sampler_loop.py index 56c4f893c..4f26ee7a2 100644 --- a/mmrazor/engine/runner/subnet_sampler_loop.py +++ b/mmrazor/engine/runner/subnet_sampler_loop.py @@ -60,7 +60,7 @@ def run_iter(self, data_batch: Sequence[dict]) -> None: # synchronization during gradient accumulation process. # outputs should be a dict of loss. subnet = self.sample_subnet() - self.model.set_subnet(subnet) + self.model.mutator.set_choices(subnet) outputs = self.runner.model.train_step( data_batch, optim_wrapper=self.runner.optim_wrapper) self.runner.message_hub.update_info('train_logs', outputs) @@ -290,7 +290,7 @@ def update_candidates_scores(self) -> None: """Update candidates' scores, which are validated with the `dataloader_val`.""" for i, candidate in enumerate(self.candidates.subnets): - self.model.set_subnet(candidate) + self.model.mutator.set_choices(candidate) metrics = self._val_candidate() score = metrics[self.score_key] if len(metrics) != 0 else 0. self.candidates.set_resource(i, score, 'score') diff --git a/mmrazor/engine/runner/subnet_val_loop.py b/mmrazor/engine/runner/subnet_val_loop.py index 318606466..55e7c0c13 100644 --- a/mmrazor/engine/runner/subnet_val_loop.py +++ b/mmrazor/engine/runner/subnet_val_loop.py @@ -80,7 +80,8 @@ def run(self): metrics = self._evaluate_once() all_metrics.update(add_prefix(metrics, 'min_subnet')) elif 'random' in kind: - self.model.set_subnet(self.model.sample_subnet()) + self.model.mutator.set_choices( + self.model.mutator.sample_choices()) metrics = self._evaluate_once() all_metrics.update(add_prefix(metrics, f'{kind}_subnet')) diff --git a/mmrazor/engine/runner/utils/check.py b/mmrazor/engine/runner/utils/check.py index eb49ede68..ad774f647 100644 --- a/mmrazor/engine/runner/utils/check.py +++ b/mmrazor/engine/runner/utils/check.py @@ -29,8 +29,8 @@ def check_subnet_resources( if constraints_range is None: return True, dict() - assert hasattr(model, 'set_subnet') and hasattr(model, 'architecture') - model.set_subnet(subnet) + assert hasattr(model, 'mutator') and hasattr(model, 'architecture') + model.mutator.set_choices(subnet) _, sliced_model = export_fix_subnet(model, slice_weight=True) model_to_check = sliced_model.architecture # type: ignore diff --git a/mmrazor/models/algorithms/nas/autoformer.py b/mmrazor/models/algorithms/nas/autoformer.py index 8ac896a08..76044cab3 100644 --- a/mmrazor/models/algorithms/nas/autoformer.py +++ b/mmrazor/models/algorithms/nas/autoformer.py @@ -6,10 +6,13 @@ from mmengine.structures import BaseDataElement from torch import nn +from mmrazor.models.mutators import NasMutator from mmrazor.registry import MODELS from mmrazor.utils import ValidFixMutable from ..base import BaseAlgorithm, LossResults +VALID_MUTATOR_TYPE = Union[NasMutator, Dict] + @MODELS.register_module() class Autoformer(BaseAlgorithm): @@ -24,25 +27,19 @@ class Autoformer(BaseAlgorithm): Args: architecture (dict|:obj:`BaseModel`): The config of :class:`BaseModel` or built model. Corresponding to supernet in NAS algorithm. - mutators (Optional[dict]): The dict of different Mutators config. - Defaults to None. + mutator (VALID_MUTATOR_TYPE): The config of :class:`NasMutator` or + built mutator. fix_subnet (str | dict | :obj:`FixSubnet`): The path of yaml file or loaded dict or built :obj:`FixSubnet`. Defaults to None. data_preprocessor (Optional[Union[dict, nn.Module]]): The pre-process config of :class:`BaseDataPreprocessor`. Defaults to None. init_cfg (Optional[dict]): Init config for ``BaseModule``. Defaults to None. - - Note: - Autoformer uses two mutators which are ``DynamicValueMutator`` and - ``ChannelMutator``. `DynamicValueMutator` handle the mutable object - ``OneShotMutableValue`` in Autoformer while ChannelMutator handle - the mutable object ``OneShotMutableChannel`` in Autoformer. """ def __init__(self, architecture: Union[BaseModel, Dict], - mutators: Optional[Dict] = None, + mutator: VALID_MUTATOR_TYPE = None, fix_subnet: Optional[ValidFixMutable] = None, data_preprocessor: Optional[Union[dict, nn.Module]] = None, init_cfg: Optional[dict] = None): @@ -58,49 +55,18 @@ def __init__(self, load_fix_subnet(self.architecture, fix_subnet) self.is_supernet = False else: - assert mutators is not None, \ - 'mutator cannot be None when fix_subnet is None.' - if isinstance(mutators, dict): - built_mutators: Dict = dict() - for name, mutator_cfg in mutators.items(): - if 'parse_cfg' in mutator_cfg and isinstance( - mutator_cfg['parse_cfg'], dict): - assert mutator_cfg['parse_cfg'][ - 'type'] == 'Predefined', \ - 'autoformer only support predefined.' - mutator = MODELS.build(mutator_cfg) - built_mutators[name] = mutator - mutator.prepare_from_supernet(self.architecture) - self.mutators = built_mutators - else: - raise TypeError('mutator should be a `dict` but got ' - f'{type(mutator)}') - + self.mutator = self._build_mutator(mutator) + self.mutator.prepare_from_supernet(self.architecture) self.is_supernet = True - def sample_subnet(self) -> Dict: - """Random sample subnet by mutator.""" - value_subnet = dict() - channel_subnet = dict() - for name, mutator in self.mutators.items(): - if name == 'value_mutator': - value_subnet.update(mutator.sample_choices()) - elif name == 'channel_mutator': - channel_subnet.update(mutator.sample_choices()) - else: - raise NotImplementedError - return dict(value_subnet=value_subnet, channel_subnet=channel_subnet) - - def set_subnet(self, subnet: Dict[str, Dict[int, Union[int, - list]]]) -> None: - """Set the subnet sampled by :meth:sample_subnet.""" - for name, mutator in self.mutators.items(): - if name == 'value_mutator': - mutator.set_choices(subnet['value_subnet']) - elif name == 'channel_mutator': - mutator.set_choices(subnet['channel_subnet']) - else: - raise NotImplementedError + def _build_mutator(self, mutator: VALID_MUTATOR_TYPE = None) -> NasMutator: + """build mutator.""" + if isinstance(mutator, dict): + mutator = MODELS.build(mutator) + if not isinstance(mutator, NasMutator): + raise TypeError('mutator should be a `dict` or `NasMutator` ' + f'instance, but got {type(mutator)}.') + return mutator def loss( self, @@ -109,5 +75,5 @@ def loss( ) -> LossResults: """Calculate losses from a batch of inputs and data samples.""" if self.is_supernet: - self.set_subnet(self.sample_subnet()) + self.mutator.set_choices(self.mutator.sample_choices()) return self.architecture(batch_inputs, data_samples, mode='loss') diff --git a/mmrazor/models/algorithms/nas/autoslim.py b/mmrazor/models/algorithms/nas/autoslim.py index 47d35201c..dc8d54c0e 100644 --- a/mmrazor/models/algorithms/nas/autoslim.py +++ b/mmrazor/models/algorithms/nas/autoslim.py @@ -11,13 +11,13 @@ from torch.nn.modules.batchnorm import _BatchNorm from mmrazor.models.distillers import ConfigurableDistiller -from mmrazor.models.mutators import OneShotChannelMutator +from mmrazor.models.mutators import ChannelMutator from mmrazor.models.utils import (add_prefix, reinitialize_optim_wrapper_count_status) from mmrazor.registry import MODEL_WRAPPERS, MODELS from ..base import BaseAlgorithm -VALID_MUTATOR_TYPE = Union[OneShotChannelMutator, Dict] +VALID_MUTATOR_TYPE = Union[ChannelMutator, Dict] VALID_DISTILLER_TYPE = Union[ConfigurableDistiller, Dict] VALID_PATH_TYPE = Union[str, Path] VALID_CHANNEL_CFG_PATH_TYPE = Union[VALID_PATH_TYPE, List[VALID_PATH_TYPE]] @@ -29,35 +29,39 @@ class AutoSlim(BaseAlgorithm): https://arxiv.org/abs/1903.11728 for more details. Args: - mutator (VALID_MUTATOR_TYPE): config of mutator. - distiller (VALID_DISTILLER_TYPE): config of distiller. - architecture (Union[BaseModel, Dict]): the model to be searched. - data_preprocessor (Optional[Union[Dict, nn.Module]], optional): - data prepocessor. Defaults to None. - num_random_samples (int): number of random sample subnets. - Defaults to 2. - init_cfg (Optional[Dict], optional): config of initialization. - Defaults to None. - bn_training_mode (bool): Whether set bn to training mode when model is + architecture (dict|:obj:`BaseModel`): The config of :class:`BaseModel` + or built model. Corresponding to supernet in NAS algorithm. + mutator (VALID_MUTATOR_TYPE): The config of :class:`ChannelMutator` or + built mutator. + distiller (VALID_DISTILLER_TYPE): Cfg of :class:`ConfigurableDistiller` + or built distiller. + norm_training (bool): Whether set bn to training mode when model is set to eval mode. Note that in slimmable networks, accumulating different numbers of channels results in different feature means and variances, which further leads to inaccurate statistics of - shared BN. Set ``bn_training_mode`` to True to use the feature + shared BN. Set ``norm_training`` to True to use the feature means and variances in a batch. + data_preprocessor (Optional[Union[dict, nn.Module]]): The pre-process + config of :class:`BaseDataPreprocessor`. Defaults to None. + num_random_samples (int): number of random sample subnets. + Defaults to 2. + init_cfg (Optional[dict]): Init config for ``BaseModule``. + Defaults to None. """ def __init__(self, - mutator: VALID_MUTATOR_TYPE, - distiller: VALID_DISTILLER_TYPE, architecture: Union[BaseModel, Dict], + mutator: VALID_MUTATOR_TYPE = None, + distiller: VALID_DISTILLER_TYPE = None, + norm_training: bool = False, num_random_samples: int = 2, data_preprocessor: Optional[Union[Dict, nn.Module]] = None, - init_cfg: Optional[Dict] = None, - bn_training_mode=False) -> None: + init_cfg: Optional[Dict] = None) -> None: super().__init__(architecture, data_preprocessor, init_cfg) - self.mutator: OneShotChannelMutator = MODELS.build(mutator) - # prepare_from_supernet` must be called before distiller initialized + self.mutator = self._build_mutator(mutator) + # NOTE: `mutator.prepare_from_supernet` must be called + # before distiller initialized. self.mutator.prepare_from_supernet(self.architecture) self.distiller = self._build_distiller(distiller) @@ -69,23 +73,21 @@ def __init__(self, self.sample_kinds.append('random' + str(i)) self._optim_wrapper_count_status_reinitialized = False - - self.bn_training_mode = bn_training_mode + self.norm_training = norm_training def _build_mutator(self, - mutator: VALID_MUTATOR_TYPE) -> OneShotChannelMutator: + mutator: VALID_MUTATOR_TYPE = None) -> ChannelMutator: """Build mutator.""" if isinstance(mutator, dict): mutator = MODELS.build(mutator) - if not isinstance(mutator, OneShotChannelMutator): - raise TypeError('mutator should be a `dict` or ' - '`OneShotModuleMutator` instance, but got ' - f'{type(mutator)}') - + if not isinstance(mutator, ChannelMutator): + raise TypeError('mutator should be a `dict` or `ChannelMutator` ' + f'instance, but got {type(mutator)}.') return mutator def _build_distiller( - self, distiller: VALID_DISTILLER_TYPE) -> ConfigurableDistiller: + self, + distiller: VALID_DISTILLER_TYPE = None) -> ConfigurableDistiller: """Build distiller.""" if isinstance(distiller, dict): distiller = MODELS.build(distiller) @@ -93,25 +95,8 @@ def _build_distiller( raise TypeError('distiller should be a `dict` or ' '`ConfigurableDistiller` instance, but got ' f'{type(distiller)}') - return distiller - def sample_subnet(self) -> Dict: - """Sample a subnet.""" - return self.mutator.sample_choices() - - def set_subnet(self, subnet) -> None: - """Set a subnet.""" - self.mutator.set_choices(subnet) - - def set_max_subnet(self) -> None: - """Set max subnet.""" - self.mutator.set_choices(self.mutator.max_choice) - - def set_min_subnet(self) -> None: - """Set min subnet.""" - self.mutator.set_choices(self.mutator.min_choice) - def train_step(self, data: List[dict], optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]: """Train step.""" @@ -148,7 +133,7 @@ def distill_step( for kind in self.sample_kinds: # update the max subnet loss. if kind == 'max': - self.set_max_subnet() + self.mutator.set_choices(self.mutator.max_choices) with optim_wrapper.optim_context( self ), self.distiller.teacher_recorders: # type: ignore @@ -161,13 +146,13 @@ def distill_step( add_prefix(max_subnet_losses, 'max_subnet')) # update the min subnet loss. elif kind == 'min': - self.set_min_subnet() + self.mutator.set_choices(self.mutator.min_choices) min_subnet_losses = distill_step(batch_inputs, data_samples) total_losses.update( add_prefix(min_subnet_losses, 'min_subnet')) # update the random subnets loss. elif 'random' in kind: - self.set_subnet(self.sample_subnet()) + self.mutator.set_choices(self.mutator.sample_choices()) random_subnet_losses = distill_step(batch_inputs, data_samples) total_losses.update( add_prefix(random_subnet_losses, f'{kind}_subnet')) @@ -177,14 +162,14 @@ def distill_step( def train(self, mode=True): """Overwrite the train method in ``nn.Module`` to set ``nn.BatchNorm`` to training mode when model is set to eval mode when - ``self.bn_training_mode`` is ``True``. + ``self.norm_training`` is ``True``. Args: mode (bool): whether to set training mode (``True``) or evaluation mode (``False``). Default: ``True``. """ super(AutoSlim, self).train(mode) - if not mode and self.bn_training_mode: + if not mode and self.norm_training: for module in self.modules(): if isinstance(module, _BatchNorm): module.training = True @@ -240,7 +225,7 @@ def distill_step( for kind in self.module.sample_kinds: # update the max subnet loss. if kind == 'max': - self.module.set_max_subnet() + self.module.mutator.set_max_choices() with optim_wrapper.optim_context( self ), self.module.distiller.teacher_recorders: # type: ignore @@ -253,13 +238,14 @@ def distill_step( add_prefix(max_subnet_losses, 'max_subnet')) # update the min subnet loss. elif kind == 'min': - self.module.set_min_subnet() + self.module.mutator.set_min_choices() min_subnet_losses = distill_step(batch_inputs, data_samples) total_losses.update( add_prefix(min_subnet_losses, 'min_subnet')) # update the random subnets loss. elif 'random' in kind: - self.module.set_subnet(self.module.sample_subnet()) + self.module.mutator.set_choices( + self.module.mutator.sample_choices()) random_subnet_losses = distill_step(batch_inputs, data_samples) total_losses.update( add_prefix(random_subnet_losses, f'{kind}_subnet')) diff --git a/mmrazor/models/algorithms/nas/bignas.py b/mmrazor/models/algorithms/nas/bignas.py index f75c60e57..2648488c3 100644 --- a/mmrazor/models/algorithms/nas/bignas.py +++ b/mmrazor/models/algorithms/nas/bignas.py @@ -11,15 +11,14 @@ from mmrazor.models.architectures.ops.mobilenet_series import MBBlock from mmrazor.models.architectures.utils import set_dropout from mmrazor.models.distillers import ConfigurableDistiller -from mmrazor.models.mutators.base_mutator import BaseMutator +from mmrazor.models.mutators import NasMutator from mmrazor.models.utils import (add_prefix, reinitialize_optim_wrapper_count_status) from mmrazor.registry import MODEL_WRAPPERS, MODELS from mmrazor.utils import ValidFixMutable from ..base import BaseAlgorithm -VALID_MUTATOR_TYPE = Union[BaseMutator, Dict] -VALID_MUTATORS_TYPE = Dict[str, Union[BaseMutator, Dict]] +VALID_MUTATOR_TYPE = Union[NasMutator, Dict] VALID_DISTILLER_TYPE = Union[ConfigurableDistiller, Dict] @@ -42,8 +41,10 @@ class BigNAS(BaseAlgorithm): Args: architecture (dict|:obj:`BaseModel`): The config of :class:`BaseModel` or built model. Corresponding to supernet in NAS algorithm. - mutators (VALID_MUTATORS_TYPE): Configs to build different mutators. - distiller (VALID_DISTILLER_TYPE): Configs to build a distiller. + mutator (VALID_MUTATOR_TYPE): The config of :class:`NasMutator` or + built mutator. + distiller (VALID_DISTILLER_TYPE): Cfg of :class:`ConfigurableDistiller` + or built distiller. fix_subnet (str | dict | :obj:`FixSubnet`): The path of yaml file or loaded dict or built :obj:`FixSubnet`. Defaults to None. data_preprocessor (Optional[Union[dict, nn.Module]]): The pre-process @@ -55,18 +56,12 @@ class BigNAS(BaseAlgorithm): [6, 7]. init_cfg (Optional[dict]): Init config for ``BaseModule``. Defaults to None. - - Note: - BigNAS uses two mutators which are ``DynamicValueMutator`` and - ``ChannelMutator``. `DynamicValueMutator` handle the mutable object - ``OneShotMutableValue`` in BigNAS while ChannelMutator handle - the mutable object ``OneShotMutableChannel`` in BigNAS. """ def __init__(self, architecture: Union[BaseModel, Dict], - mutators: VALID_MUTATORS_TYPE, - distiller: VALID_DISTILLER_TYPE, + mutator: VALID_MUTATOR_TYPE = None, + distiller: VALID_DISTILLER_TYPE = None, fix_subnet: Optional[ValidFixMutable] = None, data_preprocessor: Optional[Union[Dict, nn.Module]] = None, num_random_samples: int = 2, @@ -75,35 +70,6 @@ def __init__(self, init_cfg: Optional[Dict] = None) -> None: super().__init__(architecture, data_preprocessor, init_cfg) - if isinstance(mutators, dict): - built_mutators: Dict = dict() - for name, mutator_cfg in mutators.items(): - if 'parse_cfg' in mutator_cfg and isinstance( - mutator_cfg['parse_cfg'], dict): - assert mutator_cfg['parse_cfg'][ - 'type'] == 'Predefined', \ - 'BigNAS only support predefined.' - mutator: BaseMutator = MODELS.build(mutator_cfg) - built_mutators[name] = mutator - mutator.prepare_from_supernet(self.architecture) - self.mutators = built_mutators - else: - raise TypeError('mutator should be a `dict` but got ' - f'{type(mutators)}') - - self.distiller = self._build_distiller(distiller) - self.distiller.prepare_from_teacher(self.architecture) - self.distiller.prepare_from_student(self.architecture) - - self.sample_kinds = ['max', 'min'] - for i in range(num_random_samples): - self.sample_kinds.append('random' + str(i)) - - self.drop_path_rate = drop_path_rate - self.backbone_dropout_stages = backbone_dropout_stages - self._optim_wrapper_count_status_reinitialized = False - self.is_supernet = True - if fix_subnet: # Avoid circular import from mmrazor.structures import load_fix_subnet @@ -111,63 +77,47 @@ def __init__(self, # According to fix_subnet, delete the unchosen part of supernet load_fix_subnet(self, fix_subnet) self.is_supernet = False + else: + self.mutator = self._build_mutator(mutator) + # NOTE: `mutator.prepare_from_supernet` must be called + # before distiller initialized. + self.mutator.prepare_from_supernet(self.architecture) - def _build_mutator(self, mutator: VALID_MUTATOR_TYPE) -> BaseMutator: - """build mutator.""" + self.distiller = self._build_distiller(distiller) + self.distiller.prepare_from_teacher(self.architecture) + self.distiller.prepare_from_student(self.architecture) + + self.sample_kinds = ['max', 'min'] + for i in range(num_random_samples): + self.sample_kinds.append('random' + str(i)) + + self.is_supernet = True + + self.drop_path_rate = drop_path_rate + self.backbone_dropout_stages = backbone_dropout_stages + self._optim_wrapper_count_status_reinitialized = False + + def _build_mutator(self, mutator: VALID_MUTATOR_TYPE = None) -> NasMutator: + """Build mutator.""" if isinstance(mutator, dict): mutator = MODELS.build(mutator) - if not isinstance(mutator, BaseMutator): - raise TypeError('mutator should be a `dict` or ' - '`OneShotModuleMutator` instance, but got ' - f'{type(mutator)}') - + if not isinstance(mutator, NasMutator): + raise TypeError('mutator should be a `dict` or `NasMutator` ' + f'instance, but got {type(mutator)}.') return mutator def _build_distiller( - self, distiller: VALID_DISTILLER_TYPE) -> ConfigurableDistiller: + self, + distiller: VALID_DISTILLER_TYPE = None) -> ConfigurableDistiller: + """Build distiller.""" if isinstance(distiller, dict): distiller = MODELS.build(distiller) if not isinstance(distiller, ConfigurableDistiller): raise TypeError('distiller should be a `dict` or ' '`ConfigurableDistiller` instance, but got ' f'{type(distiller)}') - return distiller - def sample_subnet(self, kind='random') -> Dict: - """Random sample subnet by mutator.""" - value_subnet = dict() - channel_subnet = dict() - for name, mutator in self.mutators.items(): - if name == 'value_mutator': - value_subnet.update(mutator.sample_choices(kind)) - elif name == 'channel_mutator': - channel_subnet.update(mutator.sample_choices(kind)) - else: - raise NotImplementedError - return dict(value_subnet=value_subnet, channel_subnet=channel_subnet) - - def set_subnet(self, subnet: Dict[str, Dict[int, Union[int, - list]]]) -> None: - """Set the subnet sampled by :meth:sample_subnet.""" - for name, mutator in self.mutators.items(): - if name == 'value_mutator': - mutator.set_choices(subnet['value_subnet']) - elif name == 'channel_mutator': - mutator.set_choices(subnet['channel_subnet']) - else: - raise NotImplementedError - - def set_max_subnet(self) -> None: - """Set max subnet.""" - for mutator in self.mutators.values(): - mutator.set_choices(mutator.max_choice) - - def set_min_subnet(self) -> None: - """Set min subnet.""" - for mutator in self.mutators.values(): - mutator.set_choices(mutator.min_choice) - def train_step(self, data: List[dict], optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]: if self.is_supernet: @@ -203,7 +153,7 @@ def distill_step( for kind in self.sample_kinds: # update the max subnet loss. if kind == 'max': - self.set_max_subnet() + self.mutator.set_max_choices() set_dropout( layers=self.architecture.backbone.layers[:-1], module=MBBlock, @@ -221,7 +171,7 @@ def distill_step( add_prefix(max_subnet_losses, 'max_subnet')) # update the min subnet loss. elif kind == 'min': - self.set_min_subnet() + self.mutator.set_min_choices() set_dropout( layers=self.architecture.backbone.layers[:-1], module=MBBlock, @@ -233,7 +183,7 @@ def distill_step( add_prefix(min_subnet_losses, 'min_subnet')) # update the random subnets loss. elif 'random' in kind: - self.set_subnet(self.sample_subnet()) + self.mutator.set_choices(self.mutator.sample_choices()) set_dropout( layers=self.architecture.backbone.layers[:-1], module=MBBlock, @@ -297,7 +247,7 @@ def distill_step( for kind in self.module.sample_kinds: # update the max subnet loss. if kind == 'max': - self.module.set_max_subnet() + self.module.mutator.set_max_choices() set_dropout( layers=self.module.architecture.backbone.layers[:-1], module=MBBlock, @@ -315,7 +265,7 @@ def distill_step( add_prefix(max_subnet_losses, 'max_subnet')) # update the min subnet loss. elif kind == 'min': - self.module.set_min_subnet() + self.module.mutator.set_min_choices() set_dropout( layers=self.module.architecture.backbone.layers[:-1], module=MBBlock, @@ -327,7 +277,8 @@ def distill_step( add_prefix(min_subnet_losses, 'min_subnet')) # update the random subnets loss. elif 'random' in kind: - self.module.set_subnet(self.module.sample_subnet()) + self.module.mutator.set_choices( + self.module.mutator.sample_choices()) set_dropout( layers=self.module.architecture.backbone.layers[:-1], module=MBBlock, diff --git a/mmrazor/models/algorithms/nas/darts.py b/mmrazor/models/algorithms/nas/darts.py index 917f47866..2c53b45c7 100644 --- a/mmrazor/models/algorithms/nas/darts.py +++ b/mmrazor/models/algorithms/nas/darts.py @@ -9,12 +9,14 @@ from torch import nn from torch.nn.modules.batchnorm import _BatchNorm -from mmrazor.models.mutators import DiffModuleMutator +from mmrazor.models.mutators import NasMutator from mmrazor.models.utils import add_prefix from mmrazor.registry import MODEL_WRAPPERS, MODELS -from mmrazor.utils import FixMutable +from mmrazor.utils import ValidFixMutable from ..base import BaseAlgorithm +VALID_MUTATOR_TYPE = Union[NasMutator, Dict] + @MODELS.register_module() class Darts(BaseAlgorithm): @@ -27,27 +29,23 @@ class Darts(BaseAlgorithm): Args: architecture (dict|:obj:`BaseModel`): The config of :class:`BaseModel` or built model. Corresponding to supernet in NAS algorithm. - mutator (dict|:obj:`DiffModuleMutator`): The config of - :class:`DiffModuleMutator` or built mutator. + mutator (VALID_MUTATOR_TYPE): The config of :class:`NasMutator` or + built mutator. fix_subnet (str | dict | :obj:`FixSubnet`): The path of yaml file or loaded dict or built :obj:`FixSubnet`. norm_training (bool): Whether to set norm layers to training mode, namely, not freeze running stats (mean and var). Note: Effect on Batch Norm and its variants only. Defaults to False. - data_preprocessor (dict, optional): The pre-process config of - :class:`BaseDataPreprocessor`. Defaults to None. - init_cfg (dict): Init config for ``BaseModule``. - - Note: - Darts has two training mode: supernet training and subnet retraining. - If `fix_subnet` is None, it means supernet training. - If `fix_subnet` is not None, it means subnet training. + data_preprocessor (Optional[Union[dict, nn.Module]]): The pre-process + config of :class:`BaseDataPreprocessor`. Defaults to None. + init_cfg (Optional[dict]): Init config for ``BaseModule``. + Defaults to None. """ def __init__(self, architecture: Union[BaseModel, Dict], - mutator: Optional[Union[DiffModuleMutator, Dict]] = None, - fix_subnet: Optional[FixMutable] = None, + mutator: VALID_MUTATOR_TYPE = None, + fix_subnet: Optional[ValidFixMutable] = None, unroll: bool = False, norm_training: bool = False, data_preprocessor: Optional[Union[dict, nn.Module]] = None, @@ -64,35 +62,26 @@ def __init__(self, load_fix_subnet(self.architecture, fix_subnet) self.is_supernet = False else: - assert mutator is not None, \ - 'mutator cannot be None when fix_subnet is None.' - if isinstance(mutator, DiffModuleMutator): - self.mutator = mutator - elif isinstance(mutator, dict): - self.mutator = MODELS.build(mutator) - else: - raise TypeError('mutator should be a `dict` or ' - f'`DiffModuleMutator` instance, but got ' - f'{type(mutator)}') - + self.mutator = self._build_mutator(mutator) # Mutator is an essential component of the NAS algorithm. It # provides some APIs commonly used by NAS. # Before using it, you must do some preparation according to # the supernet. self.mutator.prepare_from_supernet(self.architecture) + self.mutator.prepare_arch_params() self.is_supernet = True self.norm_training = norm_training self.unroll = unroll - def search_subnet(self): - """Search subnet by mutator.""" - # Avoid circular import - from mmrazor.structures import export_fix_subnet - - subnet = self.mutator.sample_choices() - self.mutator.set_choices(subnet) - return export_fix_subnet(self)[0] + def _build_mutator(self, mutator: VALID_MUTATOR_TYPE = None) -> NasMutator: + """Build mutator.""" + if isinstance(mutator, dict): + mutator = MODELS.build(mutator) + if not isinstance(mutator, NasMutator): + raise TypeError('mutator should be a `dict` or `NasMutator` ' + f'instance, but got {type(mutator)}.') + return mutator def train(self, mode=True): """Convert the model into eval mode while keep normalization layer diff --git a/mmrazor/models/algorithms/nas/dsnas.py b/mmrazor/models/algorithms/nas/dsnas.py index b80e655ff..4a730575e 100644 --- a/mmrazor/models/algorithms/nas/dsnas.py +++ b/mmrazor/models/algorithms/nas/dsnas.py @@ -13,14 +13,16 @@ from torch import nn from torch.nn.modules.batchnorm import _BatchNorm -from mmrazor.models.mutables.base_mutable import BaseMutable -from mmrazor.models.mutators import DiffModuleMutator +from mmrazor.models.mutables import BaseMutable +from mmrazor.models.mutators import NasMutator from mmrazor.models.utils import add_prefix from mmrazor.registry import MODEL_WRAPPERS, MODELS, TASK_UTILS -from mmrazor.structures import load_fix_subnet -from mmrazor.utils import FixMutable +from mmrazor.structures import export_fix_subnet, load_fix_subnet +from mmrazor.utils import ValidFixMutable from ..base import BaseAlgorithm +VALID_MUTATOR_TYPE = Union[NasMutator, Dict] + @MODELS.register_module() class DSNAS(BaseAlgorithm): @@ -29,8 +31,8 @@ class DSNAS(BaseAlgorithm): Args: architecture (dict|:obj:`BaseModel`): The config of :class:`BaseModel` or built model. Corresponding to supernet in NAS algorithm. - mutator (dict|:obj:`DiffModuleMutator`): The config of - :class:`DiffModuleMutator` or built mutator. + mutator (VALID_MUTATOR_TYPE): The config of :class:`NasMutator` or + built mutator. fix_subnet (str | dict | :obj:`FixSubnet`): The path of yaml file or loaded dict or built :obj:`FixSubnet`. pretrain_epochs (int): Num of epochs for supernet pretraining. @@ -56,51 +58,41 @@ class DSNAS(BaseAlgorithm): def __init__(self, architecture: Union[BaseModel, Dict], - mutator: Optional[Union[DiffModuleMutator, Dict]] = None, - fix_subnet: Optional[FixMutable] = None, + mutator: VALID_MUTATOR_TYPE = None, + fix_subnet: Optional[ValidFixMutable] = None, pretrain_epochs: int = 0, finetune_epochs: int = 80, flops_constraints: float = 300.0, estimator_cfg: Dict[str, Any] = None, norm_training: bool = False, data_preprocessor: Optional[Union[dict, nn.Module]] = None, - init_cfg: Optional[dict] = None, - **kwargs): - super().__init__(architecture, data_preprocessor, **kwargs) + init_cfg: Optional[dict] = None): + super().__init__(architecture, data_preprocessor, init_cfg) # initialize estimator estimator_cfg = dict() if estimator_cfg is None else estimator_cfg if 'type' not in estimator_cfg: estimator_cfg['type'] = 'mmrazor.ResourceEstimator' self.estimator = TASK_UTILS.build(estimator_cfg) - if fix_subnet: - # Avoid circular import - from mmrazor.structures import load_fix_subnet + if fix_subnet: # According to fix_subnet, delete the unchosen part of supernet load_fix_subnet(self.architecture, fix_subnet) self.is_supernet = False else: - assert mutator is not None, \ - 'mutator cannot be None when fix_subnet is None.' - if isinstance(mutator, DiffModuleMutator): - self.mutator = mutator - elif isinstance(mutator, dict): - self.mutator = MODELS.build(mutator) - else: - raise TypeError('mutator should be a `dict` or ' - f'`DiffModuleMutator` instance, but got ' - f'{type(mutator)}') - - self.mutable_module_resources = self._get_module_resources() + self.mutator = self._build_mutator(mutator) # Mutator is an essential component of the NAS algorithm. It # provides some APIs commonly used by NAS. - # Before using it, you must do some preparations according to + # Before using it, you must do some preparation according to # the supernet. self.mutator.prepare_from_supernet(self.architecture) - self.is_supernet = True + self.mutator.prepare_arch_params() + + self.mutable_module_resources = self._get_module_resources() self.search_space_name_list = list( - self.mutator.name2mutable.keys()) + self.mutator._name2mutable.keys()) + + self.is_supernet = True self.norm_training = norm_training self.pretrain_epochs = pretrain_epochs @@ -114,25 +106,14 @@ def __init__(self, self.flops_constraints = flops_constraints _, self.world_size = get_dist_info() - def search_subnet(self): - """Search subnet by mutator.""" - - # Avoid circular import - from mmrazor.structures import export_fix_subnet - - subnet = self.mutator.sample_choices() - self.mutator.set_choices(subnet) - return export_fix_subnet(self)[0] - - def fix_subnet(self): - """Fix subnet when finetuning.""" - subnet = self.mutator.sample_choices() - self.mutator.set_choices(subnet) - for module in self.architecture.modules(): - if isinstance(module, BaseMutable): - if not module.is_fixed: - module.fix_chosen(module.current_choice) - self.is_supernet = False + def _build_mutator(self, mutator: VALID_MUTATOR_TYPE = None) -> NasMutator: + """Build mutator.""" + if isinstance(mutator, dict): + mutator = MODELS.build(mutator) + if not isinstance(mutator, NasMutator): + raise TypeError('mutator should be a `dict` or `NasMutator` ' + f'instance, but got {type(mutator)}.') + return mutator def train(self, mode=True): """Convert the model into eval mode while keep normalization layer @@ -159,12 +140,12 @@ def train_step(self, data: List[dict], cur_epoch = self.message_hub.get_info('epoch') need_update_mutator = self.need_update_mutator(cur_epoch) - # TODO process the input if cur_epoch == self.finetune_epochs and self.is_supernet: # synchronize arch params to start the finetune stage. for k, v in self.mutator.arch_params.items(): dist.broadcast(v, src=0) - self.fix_subnet() + self._fix_archtecture() + self.is_supernet = False # 1. update architecture with optim_wrapper['architecture'].optim_context(self): @@ -205,9 +186,16 @@ def train_step(self, data: List[dict], return log_vars + def _fix_archtecture(self): + """Fix architecture based on current choice.""" + self.mutator.set_choices(self.mutator.sample_choices()) + for module in self.architecture.modules(): + if isinstance(module, BaseMutable): + if not module.is_fixed: + module.fix_chosen(module.current_choice) + def _get_module_resources(self): """Get resources of spec modules.""" - spec_modules = [] for name, module in self.architecture.named_modules(): if isinstance(module, BaseMutable): @@ -239,7 +227,8 @@ def compute_mutator_loss(self) -> Dict[str, torch.Tensor]: flops_loss = 0.0 for name, module in self.architecture.named_modules(): if isinstance(module, BaseMutable): - k = str(self.search_space_name_list.index(name)) + k = module.mutable_prefix + '_' + \ + str(self.search_space_name_list.index(name)) probs = F.softmax(self.mutator.arch_params[k], -1) arch_loss += torch.log( (module.arch_weights * probs).sum(-1)).sum() @@ -253,8 +242,10 @@ def compute_mutator_loss(self) -> Dict[str, torch.Tensor]: mutator_loss = dict(arch_loss=arch_loss / self.world_size) copied_model = copy.deepcopy(self) - fix_mutable = copied_model.search_subnet() - load_fix_subnet(copied_model, fix_mutable) + copied_model.mutator.set_choices(copied_model.mutator.sample_choices()) + + subnet_dict = export_fix_subnet(copied_model)[0] + load_fix_subnet(copied_model, subnet_dict) subnet_flops = self.estimator.estimate(copied_model)['flops'] if subnet_flops >= self.flops_constraints: @@ -267,7 +258,8 @@ def handle_grads(self): """Handle grads of arch params & arch weights.""" for name, module in self.architecture.named_modules(): if isinstance(module, BaseMutable): - k = str(self.search_space_name_list.index(name)) + k = module.mutable_prefix + '_' + \ + str(self.search_space_name_list.index(name)) self.mutator.arch_params[k].grad.data.mul_( module.arch_weights.grad.data.sum()) module.arch_weights.grad.zero_() @@ -307,7 +299,8 @@ def train_step(self, data: List[dict], # synchronize arch params to start the finetune stage. for k, v in self.module.mutator.arch_params.items(): dist.broadcast(v, src=0) - self.module.fix_subnet() + self.module._fix_archtecture() + self.module.is_supernet = False # 1. update architecture with optim_wrapper['architecture'].optim_context(self): diff --git a/mmrazor/models/algorithms/nas/spos.py b/mmrazor/models/algorithms/nas/spos.py index 356dab6a2..cc7799c7c 100644 --- a/mmrazor/models/algorithms/nas/spos.py +++ b/mmrazor/models/algorithms/nas/spos.py @@ -7,11 +7,13 @@ from torch import nn from torch.nn.modules.batchnorm import _BatchNorm -from mmrazor.models.mutators import OneShotModuleMutator +from mmrazor.models.mutators import NasMutator from mmrazor.registry import MODELS -from mmrazor.utils import SingleMutatorRandomSubnet, ValidFixMutable +from mmrazor.utils import ValidFixMutable from ..base import BaseAlgorithm, LossResults +VALID_MUTATOR_TYPE = Union[NasMutator, Dict] + @MODELS.register_module() class SPOS(BaseAlgorithm): @@ -28,21 +30,17 @@ class SPOS(BaseAlgorithm): Args: architecture (dict|:obj:`BaseModel`): The config of :class:`BaseModel` or built model. Corresponding to supernet in NAS algorithm. - mutator (dict|:obj:`OneShotModuleMutator`): The config of - :class:`OneShotModuleMutator` or built mutator. + mutator (VALID_MUTATOR_TYPE): The config of :class:`NasMutator` or + built mutator. fix_subnet (str | dict | :obj:`FixSubnet`): The path of yaml file or - loaded dict or built :obj:`FixSubnet`. + loaded dict or built :obj:`FixSubnet`. Defaults to None. norm_training (bool): Whether to set norm layers to training mode, namely, not freeze running stats (mean and var). Note: Effect on Batch Norm and its variants only. Defaults to False. - data_preprocessor (dict, optional): The pre-process config of - :class:`BaseDataPreprocessor`. Defaults to None. - init_cfg (dict): Init config for ``BaseModule``. - - Note: - SPOS has two training mode: supernet training and subnet retraining. - If `fix_subnet` is None, it means supernet training. - If `fix_subnet` is not None, it means subnet training. + data_preprocessor (Optional[Union[dict, nn.Module]]): The pre-process + config of :class:`BaseDataPreprocessor`. Defaults to None. + init_cfg (Optional[dict]): Init config for ``BaseModule``. + Defaults to None. Note: During supernet training, since each op is not fully trained, the @@ -54,19 +52,11 @@ class SPOS(BaseAlgorithm): 1) Using a large batch size, BNs use the mean and variance of the current batch during forward. 2) Recalibrate the statistics of BN before searching. - - Note: - SPOS only uses one mutator. If you want to inherit SPOS to develop - more complex algorithms, it is also feasible to use multiple mutators. - For example, one part of the supernet uses SPOS(OneShotModuleMutator) - to search, and the other part uses Darts(DiffModuleMutator) to search. """ - # TODO fix ea's name in doc-string. - def __init__(self, architecture: Union[BaseModel, Dict], - mutator: Optional[Union[OneShotModuleMutator, Dict]] = None, + mutator: VALID_MUTATOR_TYPE = None, fix_subnet: Optional[ValidFixMutable] = None, norm_training: bool = False, data_preprocessor: Optional[Union[dict, nn.Module]] = None, @@ -83,17 +73,7 @@ def __init__(self, load_fix_subnet(self.architecture, fix_subnet) self.is_supernet = False else: - assert mutator is not None, \ - 'mutator cannot be None when fix_subnet is None.' - if isinstance(mutator, OneShotModuleMutator): - self.mutator = mutator - elif isinstance(mutator, dict): - self.mutator = MODELS.build(mutator) - else: - raise TypeError('mutator should be a `dict` or ' - f'`OneShotModuleMutator` instance, but got ' - f'{type(mutator)}') - + self.mutator = self._build_mutator(mutator) # Mutator is an essential component of the NAS algorithm. It # provides some APIs commonly used by NAS. # Before using it, you must do some preparations according to @@ -103,13 +83,14 @@ def __init__(self, self.norm_training = norm_training - def sample_subnet(self) -> SingleMutatorRandomSubnet: - """Random sample subnet by mutator.""" - return self.mutator.sample_choices() - - def set_subnet(self, subnet: SingleMutatorRandomSubnet): - """Set the subnet sampled by :meth:sample_subnet.""" - self.mutator.set_choices(subnet) + def _build_mutator(self, mutator: VALID_MUTATOR_TYPE = None) -> NasMutator: + """Build mutator.""" + if isinstance(mutator, dict): + mutator = MODELS.build(mutator) + if not isinstance(mutator, NasMutator): + raise TypeError('mutator should be a `dict` or `NasMutator` ' + f'instance, but got {type(mutator)}.') + return mutator def loss( self, @@ -118,8 +99,7 @@ def loss( ) -> LossResults: """Calculate losses from a batch of inputs and data samples.""" if self.is_supernet: - random_subnet = self.sample_subnet() - self.set_subnet(random_subnet) + self.mutator.set_choices(self.mutator.sample_choices()) return self.architecture(batch_inputs, data_samples, mode='loss') else: return self.architecture(batch_inputs, data_samples, mode='loss') diff --git a/mmrazor/models/algorithms/pruning/dcff.py b/mmrazor/models/algorithms/pruning/dcff.py index 71b669c09..e89da50b4 100644 --- a/mmrazor/models/algorithms/pruning/dcff.py +++ b/mmrazor/models/algorithms/pruning/dcff.py @@ -10,7 +10,6 @@ from mmrazor.models.mutators import DCFFChannelMutator from mmrazor.registry import MODELS -from mmrazor.utils import ValidFixMutable from .ite_prune_algorithm import ItePruneAlgorithm, ItePruneConfigManager LossResults = Dict[str, torch.Tensor] @@ -51,9 +50,8 @@ class DCFF(ItePruneAlgorithm): def __init__(self, architecture: Union[BaseModel, Dict], mutator_cfg: Union[Dict, DCFFChannelMutator] = dict( - type=' DCFFChannelMutator', + type='DCFFChannelMutator', channel_unit_cfg=dict(type='DCFFChannelUnit')), - fix_subnet: Optional[ValidFixMutable] = None, data_preprocessor: Optional[Union[Dict, nn.Module]] = None, target_pruning_ratio: Optional[Dict[str, float]] = None, step_freq=1, @@ -61,9 +59,9 @@ def __init__(self, init_cfg: Optional[Dict] = None, linear_schedule=False) -> None: # invalid param prune_times, reset after message_hub get [max_epoch] - super().__init__(architecture, mutator_cfg, fix_subnet, - data_preprocessor, target_pruning_ratio, step_freq, - prune_times, init_cfg, linear_schedule) + super().__init__(architecture, mutator_cfg, data_preprocessor, + target_pruning_ratio, step_freq, prune_times, + init_cfg, linear_schedule) def _calc_temperature(self, cur_num: int, max_num: int): """Calculate temperature param.""" @@ -92,10 +90,10 @@ def _init_prune_config_manager(self): In DCFF, prune_times is set by step_freq and self._max_iters. """ if self.target_pruning_ratio is None: - group_target_ratio = self.mutator.current_choices + target_pruning_ratio = self.mutator.current_choices else: - group_target_ratio = self.group_target_pruning_ratio( - self.target_pruning_ratio, self.mutator.search_groups) + target_pruning_ratio = self.set_target_pruning_ratio( + self.target_pruning_ratio, self.mutator.mutable_units) if self.by_epoch: # step_freq based on iterations @@ -114,7 +112,7 @@ def _init_prune_config_manager(self): # config_manager move to forward. # message_hub['max_epoch'] unaccessible when init prune_config_manager = ItePruneConfigManager( - group_target_ratio, + target_pruning_ratio, self.mutator.current_choices, self.step_freq, prune_times=self.prune_times, @@ -127,25 +125,29 @@ def forward(self, data_samples: Optional[List[BaseDataElement]] = None, mode: str = 'tensor') -> ForwardResults: """Forward.""" - # In DCFF prune_message is related to total_num - # Set self.prune_config_manager after message_hub has['max_epoch/iter'] - if not hasattr(self, 'prune_config_manager'): - # iter num per epoch only available after initiation - self.prune_config_manager = self._init_prune_config_manager() - if self.prune_config_manager.is_prune_time(self._iter): - config = self.prune_config_manager.prune_at(self._iter) - self.mutator.set_choices(config) - - # calc fusion channel - temperature = self._calc_temperature(self._iter, self._max_iters) - self.mutator.calc_information(temperature) - - logger = MMLogger.get_current_instance() - if (self.by_epoch): - logger.info( - f'The model is pruned at {self._epoch}th epoch once.') - else: - logger.info( - f'The model is pruned at {self._iter}th iter once.') + + if self.training: + # In DCFF prune_message is related to total_num + # Set self.prune_config_manager after message_hub + # has['max_epoch/iter'] + if not hasattr(self, 'prune_config_manager'): + # iter num per epoch only available after initiation + self.prune_config_manager = self._init_prune_config_manager() + if self.prune_config_manager.is_prune_time(self._iter): + config = self.prune_config_manager.prune_at(self._iter) + self.mutator.set_choices(config) + + # calc fusion channel + temperature = self._calc_temperature(self._iter, + self._max_iters) + self.mutator.calc_information(temperature) + + logger = MMLogger.get_current_instance() + if (self.by_epoch): + logger.info( + f'The model is pruned at {self._epoch}th epoch once.') + else: + logger.info( + f'The model is pruned at {self._iter}th iter once.') return super().forward(inputs, data_samples, mode) diff --git a/mmrazor/models/algorithms/pruning/ite_prune_algorithm.py b/mmrazor/models/algorithms/pruning/ite_prune_algorithm.py index 057422290..937aaa156 100644 --- a/mmrazor/models/algorithms/pruning/ite_prune_algorithm.py +++ b/mmrazor/models/algorithms/pruning/ite_prune_algorithm.py @@ -10,7 +10,6 @@ from mmrazor.models.mutables import MutableChannelUnit from mmrazor.models.mutators import ChannelMutator from mmrazor.registry import MODELS -from mmrazor.utils import ValidFixMutable from ..base import BaseAlgorithm LossResults = Dict[str, torch.Tensor] @@ -98,8 +97,6 @@ class ItePruneAlgorithm(BaseAlgorithm): mutator_cfg (Union[Dict, ChannelMutator], optional): The config of a mutator. Defaults to dict( type='ChannelMutator', channel_unit_cfg=dict( type='SequentialMutableChannelUnit')). - fix_subnet (str | dict | :obj:`FixSubnet`): The path of yaml file or - loaded dict or built :obj:`FixSubnet`. Defaults to None. data_preprocessor (Optional[Union[Dict, nn.Module]], optional): Defaults to None. target_pruning_ratio (dict, optional): The prune-target. The template @@ -121,7 +118,6 @@ def __init__(self, type='ChannelMutator', channel_unit_cfg=dict( type='SequentialMutableChannelUnit')), - fix_subnet: Optional[ValidFixMutable] = None, data_preprocessor: Optional[Union[Dict, nn.Module]] = None, target_pruning_ratio: Optional[Dict[str, float]] = None, step_freq=1, @@ -140,33 +136,24 @@ def __init__(self, self.mutator: ChannelMutator = MODELS.build(mutator_cfg) self.mutator.prepare_from_supernet(self.architecture) - def group_target_pruning_ratio( - self, target: Dict[str, float], - search_groups: Dict[int, - List[MutableChannelUnit]]) -> Dict[int, float]: + def set_target_pruning_ratio( + self, target: Dict[str, float], + units: List[MutableChannelUnit]) -> Dict[str, float]: """According to the target pruning ratio of each unit, set the target - ratio of each search group.""" - group_target: Dict[int, float] = dict() - for group_id, units in search_groups.items(): - for unit in units: - unit_name = unit.name - # The config of target pruning ratio does not - # contain all units. - if unit_name not in target: - continue - if group_id in group_target: - unit_target = target[unit_name] - if unit_target != group_target[group_id]: - group_names = [u.name for u in units] - raise ValueError( - f"'{unit_name}' target ratio is different from " - f'other units in the same group {group_names}. ' - 'Pls check your target pruning ratio config.') - else: - unit_target = target[unit_name] - assert isinstance(unit_target, (float, int)) - group_target[group_id] = unit_target - return group_target + ratio of each unit in units.""" + target_pruning_ratio: Dict[str, float] = dict() + for unit in units: + assert isinstance(unit, MutableChannelUnit), ( + f'unit should be `MutableChannelUnit`, but got {type(unit)}.') + unit_name = unit.name + # The config of target pruning ratio does not + # contain all units. + if unit_name not in target: + continue + unit_target = target[unit_name] + assert isinstance(unit_target, (float, int)) + target_pruning_ratio[unit_name] = unit_target + return target_pruning_ratio def check_prune_target(self, config: Dict): """Check if the prune-target is supported.""" @@ -179,10 +166,10 @@ def _init_prune_config_manager(self): message_hub['max_epoch/iter'] unaccessible when initiation. """ if self.target_pruning_ratio is None: - group_target_ratio = self.mutator.current_choices + target_pruning_ratio = self.mutator.current_choices else: - group_target_ratio = self.group_target_pruning_ratio( - self.target_pruning_ratio, self.mutator.search_groups) + target_pruning_ratio = self.set_target_pruning_ratio( + self.target_pruning_ratio, self.mutator.mutable_units) if self.by_epoch: # step_freq based on iterations @@ -191,7 +178,7 @@ def _init_prune_config_manager(self): # config_manager move to forward. # message_hub['max_epoch'] unaccessible when init prune_config_manager = ItePruneConfigManager( - group_target_ratio, + target_pruning_ratio, self.mutator.current_choices, self.step_freq, prune_times=self.prune_times, diff --git a/mmrazor/models/algorithms/pruning/slimmable_network.py b/mmrazor/models/algorithms/pruning/slimmable_network.py index 429c2c856..f57c223ee 100644 --- a/mmrazor/models/algorithms/pruning/slimmable_network.py +++ b/mmrazor/models/algorithms/pruning/slimmable_network.py @@ -45,8 +45,8 @@ class SlimmableNetwork(BaseAlgorithm): """ def __init__(self, - mutator: VALID_MUTATOR_TYPE, architecture: Union[BaseModel, Dict], + mutator: VALID_MUTATOR_TYPE = None, deploy_index=-1, data_preprocessor: Optional[Union[Dict, nn.Module]] = None, init_cfg: Optional[Dict] = None) -> None: diff --git a/mmrazor/models/architectures/dynamic_ops/mixins/dynamic_mixins.py b/mmrazor/models/architectures/dynamic_ops/mixins/dynamic_mixins.py index dbd5de869..2a610e5e8 100644 --- a/mmrazor/models/architectures/dynamic_ops/mixins/dynamic_mixins.py +++ b/mmrazor/models/architectures/dynamic_ops/mixins/dynamic_mixins.py @@ -79,7 +79,7 @@ def check_if_mutables_fixed(self) -> None: def check_fixed(mutable: Optional[BaseMutable]) -> None: if mutable is not None and not mutable.is_fixed: - raise RuntimeError(f'Mutable {type(mutable)} is not fixed.') + raise RuntimeError(f'Mutable `{mutable.alias}` is not fixed.') for mutable in self.mutable_attrs.values(): # type: ignore if isinstance(mutable, (MutableChannelContainer, DerivedMutable)): diff --git a/mmrazor/models/architectures/utils/mutable_register.py b/mmrazor/models/architectures/utils/mutable_register.py index 1e33a80a1..256bf9ca5 100644 --- a/mmrazor/models/architectures/utils/mutable_register.py +++ b/mmrazor/models/architectures/utils/mutable_register.py @@ -1,4 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. +import copy from typing import Optional, Sequence, Tuple from mmrazor.models.architectures.ops.mobilenet_series import MBBlock @@ -50,8 +51,13 @@ def mutate_mobilenet_layer(mb_layer: MBBlock, mutable_in_channels, mutable_kernel_size=mutable_kernel_size) if mb_layer.with_se: + mutable_expand_ratio2 = copy.deepcopy(mutable_expand_ratio) + mutable_expand_ratio2.alias += '_se' + + derived_se_channels = mutable_expand_ratio2 * mutable_in_channels mb_layer.derived_se_channels = \ - mb_layer.derived_expand_channels.derive_divide_mutable(4, 8) + derived_se_channels.derive_divide_mutable(4, 8) + mutate_conv_module( mb_layer.se.conv1, mutable_in_channels=mb_layer.derived_expand_channels, diff --git a/mmrazor/models/mutables/mutable_channel/units/channel_unit.py b/mmrazor/models/mutables/mutable_channel/units/channel_unit.py index bf1bf909f..b99423aa2 100644 --- a/mmrazor/models/mutables/mutable_channel/units/channel_unit.py +++ b/mmrazor/models/mutables/mutable_channel/units/channel_unit.py @@ -62,8 +62,9 @@ def init_from_cfg(cls, model: nn.Module, config: Dict): def config_template(self): """Generate a config template which can be used to initialize a Channel by cls.init_from_cfg(**kwargs)""" + return { - 'name': self.name, + 'name': str(self.name), 'start': self.start, 'end': self.end, 'is_output_channel': self.is_output_channel diff --git a/mmrazor/models/mutables/mutable_channel/units/mutable_channel_unit.py b/mmrazor/models/mutables/mutable_channel/units/mutable_channel_unit.py index dabe41fab..f99b9b7cc 100644 --- a/mmrazor/models/mutables/mutable_channel/units/mutable_channel_unit.py +++ b/mmrazor/models/mutables/mutable_channel/units/mutable_channel_unit.py @@ -120,6 +120,11 @@ def process_container(container: MutableChannelContainer, # properties + @property + def mutable_prefix(self) -> str: + """Mutable prefix.""" + return 'channel' + @property def is_mutable(self) -> bool: """If the channel-unit is prunable.""" diff --git a/mmrazor/models/mutables/mutable_module/mutable_module.py b/mmrazor/models/mutables/mutable_module/mutable_module.py index c71f1a969..6e03df285 100644 --- a/mmrazor/models/mutables/mutable_module/mutable_module.py +++ b/mmrazor/models/mutables/mutable_module/mutable_module.py @@ -39,6 +39,23 @@ def __init__(self, self.module_kwargs = module_kwargs self._current_choice = None + @property + def mutable_prefix(self) -> str: + """Mutable prefix.""" + return 'module' + + @property + def max_choice(self): + """max_choice shouldn't exist.""" + raise AttributeError( + 'MutableModule does not have the attr `max choice`.') + + @property + def min_choice(self): + """min_choice shouldn't exist.""" + raise AttributeError( + 'MutableModule does not have the attr `min choice`.') + @property def current_choice(self): """Current choice will affect :meth:`forward` and will be used in diff --git a/mmrazor/models/mutables/mutable_value/mutable_value.py b/mmrazor/models/mutables/mutable_value/mutable_value.py index 3df551813..146e886d0 100644 --- a/mmrazor/models/mutables/mutable_value/mutable_value.py +++ b/mmrazor/models/mutables/mutable_value/mutable_value.py @@ -57,6 +57,11 @@ def _check_is_same_type(value_list: List[Any]) -> None: f'type, but both types {type(value_list[i-1])} ' f'and type {type(value_list[i])} exist.') + @property + def mutable_prefix(self) -> str: + """Mutable prefix.""" + return 'value' + @property def choices(self) -> List[Any]: """List of choices.""" diff --git a/mmrazor/models/mutators/__init__.py b/mmrazor/models/mutators/__init__.py index 0bb318dd5..179b4455d 100644 --- a/mmrazor/models/mutators/__init__.py +++ b/mmrazor/models/mutators/__init__.py @@ -2,13 +2,9 @@ from .channel_mutator import (ChannelMutator, DCFFChannelMutator, DMCPChannelMutator, OneShotChannelMutator, SlimmableChannelMutator) -from .module_mutator import (DiffModuleMutator, ModuleMutator, - OneShotModuleMutator) -from .value_mutator import DynamicValueMutator, ValueMutator +from .nas_mutator import NasMutator __all__ = [ - 'OneShotModuleMutator', 'DiffModuleMutator', 'ModuleMutator', - 'ChannelMutator', 'OneShotChannelMutator', 'SlimmableChannelMutator', - 'ValueMutator', 'DynamicValueMutator', 'DCFFChannelMutator', - 'DMCPChannelMutator' + 'ChannelMutator', 'DCFFChannelMutator', 'DMCPChannelMutator', + 'SlimmableChannelMutator', 'NasMutator', 'OneShotChannelMutator' ] diff --git a/mmrazor/models/mutators/base_mutator.py b/mmrazor/models/mutators/base_mutator.py index 28b4ba0c8..994ba5e6d 100644 --- a/mmrazor/models/mutators/base_mutator.py +++ b/mmrazor/models/mutators/base_mutator.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. from abc import ABC, abstractmethod -from typing import Dict, Generic, Optional, Type, TypeVar +from typing import Dict, Generic, Optional, TypeVar from mmengine.model import BaseModule from torch.nn import Module @@ -51,9 +51,3 @@ def search_groups(self) -> Dict: Returns: dict: Search group. """ - - @property - @abstractmethod - def mutable_class_type(self) -> Type[MUTABLE_TYPE]: - """Corresponding mutable class type.""" - pass diff --git a/mmrazor/models/mutators/channel_mutator/channel_mutator.py b/mmrazor/models/mutators/channel_mutator/channel_mutator.py index 71db1cd43..38abd2fcc 100644 --- a/mmrazor/models/mutators/channel_mutator/channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/channel_mutator.py @@ -12,11 +12,10 @@ from mmrazor.models.task_modules.tracer.channel_analyzer import ChannelAnalyzer from mmrazor.registry import MODELS, TASK_UTILS from ..base_mutator import BaseMutator -from ..group_mixin import GroupMixin @MODELS.register_module() -class ChannelMutator(BaseMutator, Generic[ChannelUnitType], GroupMixin): +class ChannelMutator(BaseMutator, Generic[ChannelUnitType]): """ChannelMutator manages the pruning structure of a model. Args: @@ -26,7 +25,7 @@ class ChannelMutator(BaseMutator, Generic[ChannelUnitType], GroupMixin): channel_unit_cfg = dict( # type of used MutableChannelUnit type ='XxxMutableChannelUnit', - # default args for MutableChananelUnit + # default args for MutableChannelUnit default_args={}, units = { # config of a unit @@ -46,10 +45,6 @@ class ChannelMutator(BaseMutator, Generic[ChannelUnitType], GroupMixin): demo_input=(1, 3, 224, 224), tracer_type='BackwardTracer') - custom_groups (list[list[str]], optional): User-defined search groups. - All searchable modules that are not in ``custom_group`` will be - grouped separately. - init_cfg (dict, optional): initialization configuration dict for BaseModule. @@ -74,7 +69,6 @@ def __init__(self, type='ChannelAnalyzer', demo_input=(1, 3, 224, 224), tracer_type='BackwardTracer'), - custom_groups: Optional[List[List[str]]] = None, init_cfg: Optional[Dict] = None) -> None: super().__init__(init_cfg) @@ -96,10 +90,6 @@ def __init__(self, self._parse_channel_unit_cfg( channel_unit_cfg) - if custom_groups is None: - custom_groups = [] - self._custom_groups = custom_groups - def prepare_from_supernet(self, supernet: Module) -> None: """Prepare from a model for pruning. @@ -124,12 +114,6 @@ def prepare_from_supernet(self, supernet: Module) -> None: self._name2unit[unit.name] = unit self.units = ModuleList(units) - self._search_groups = self.build_search_groups( - ModuleList(self.mutable_units), self.mutable_class_type, - self._custom_groups) - - # ~ - @property def mutable_units(self) -> List[ChannelUnitType]: """Prunable units.""" @@ -203,16 +187,7 @@ def fix_channel_mutables(self): # choice manage - @property - def current_choices(self) -> Dict: - """Get current choices.""" - current_choices = dict() - for group_id, modules in self.search_groups.items(): - current_choices[group_id] = modules[0].current_choice - - return current_choices - - def sample_choices(self, kind: str = 'random') -> Dict[int, Any]: + def sample_choices(self, kind: str = 'random') -> Dict[str, Any]: """Sampling by search groups. The sampling result of the first mutable of each group is the sampling @@ -222,13 +197,12 @@ def sample_choices(self, kind: str = 'random') -> Dict[int, Any]: Dict[int, Any]: Random choices dict. """ assert kind == 'random', f'unsupported the {kind} sample method.' - random_choices = dict() - for group_id, modules in self.search_groups.items(): - random_choices[group_id] = modules[0].sample_choice() - - return random_choices + template = self.choice_template + for key in template: + template[key] = self._name2unit[key].sample_choice() + return template - def set_choices(self, choices: Dict[int, Any]) -> None: + def set_choices(self, choices: Dict[str, Any]) -> None: """Set mutables' current choice according to choices sample by :func:`sample_choices`. @@ -237,13 +211,17 @@ def set_choices(self, choices: Dict[int, Any]) -> None: search groups, and the value is the sampling results corresponding to this group. """ - for group_id, modules in self.search_groups.items(): - if group_id not in choices: - # allow optional target_prune_ratio - continue - choice = choices[group_id] - for module in modules: - module.current_choice = choice + for name, choice in choices.items(): + unit = self._name2unit[name] + unit.current_choice = choice + + @property + def current_choices(self) -> Dict: + """Get current choices.""" + config = self.choice_template + for unit in self.mutable_units: + config[unit.name] = unit.current_choice + return config @property def choice_template(self) -> Dict: @@ -275,11 +253,6 @@ def search_groups(self) -> Dict[int, List]: """ return self._search_groups - @property - def mutable_class_type(self) -> Type[ChannelUnitType]: - """Mutable class type supported by this mutator.""" - return self.unit_class - # private methods def _convert_channel_unit_to_mutable(self, units: List[ChannelUnit]): diff --git a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py index 232561998..de7bbc405 100644 --- a/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/dmcp_channel_mutator.py @@ -59,14 +59,14 @@ def prepare_arch_params(self, supernet: Module) -> None: self.arch_params = nn.ParameterDict() self._op_arch_align = dict() self._arch_params_attr = dict() - for group_id, module in self.search_groups.items(): + for group_id, module in enumerate(self.units): arch_message = self._generate_arch_message( - module[0].mutable_channel.num_channels) + module.mutable_channel.num_channels) self._arch_params_attr[str(group_id)] = arch_message group_arch_param = self._build_arch_param(arch_message[1]) self.arch_params[str(group_id)] = group_arch_param - for unit in module[0].output_related: + for unit in module.output_related: self._op_arch_align[str(unit.name)] = str(group_id) self._bn_arch_align = dict() @@ -112,8 +112,9 @@ def modify_supernet_forward(self, arch_train: str) -> None: def sample_subnet(self, mode: str, arch_train: str) -> None: """Sampling according to the input mode.""" choices = dict() - for group_id, _ in self.search_groups.items(): - choices[group_id] = self._prune_by_arch(mode, group_id) + + for group_id, _ in enumerate(self.units): + choices[str(group_id)] = self._prune_by_arch(mode, group_id) self.set_choices(choices) self.modify_supernet_forward(arch_train) @@ -123,7 +124,7 @@ def _prune_by_arch(self, mode: str, group_id: int) -> Union[int, Any]: Inputs: mode (list): one of ['max', 'min', 'random', 'direct', 'expected'] - group_id (int): number of search_groups + group_id (int): index of units Outputs: channels (int): for mode 'max'/'min'/'random'/'dirext' @@ -159,20 +160,19 @@ def _prune_by_arch(self, mode: str, group_id: int) -> Union[int, Any]: else: raise NotImplementedError - def set_choices(self, choices: Dict[int, Any]) -> None: + def set_choices(self, choices: Dict[str, Any]) -> None: """Set mutables' current choice according to choices sample by :func:`sample_choices`. Args: - choices (Dict[int, Any]): Choices dict. The key is group_id in + choices (Dict[str, Any]): Choices dict. The key is group_id in search groups, and the value is the sampling results corresponding to this group. """ - for group_id, modules in self.search_groups.items(): - if group_id not in choices: + for group_id, module in enumerate(self.units): + if str(group_id) not in choices.keys(): # allow optional target_prune_ratio continue - choice = choices[group_id] - for module in modules: - module.current_choice = choice - module.mutable_channel.activated_tensor_channels = choice + choice = choices[str(group_id)] + module.current_choice = choice + module.mutable_channel.activated_tensor_channels = choice diff --git a/mmrazor/models/mutators/channel_mutator/one_shot_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/one_shot_channel_mutator.py index fdaee3161..cc008b0b8 100644 --- a/mmrazor/models/mutators/channel_mutator/one_shot_channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/one_shot_channel_mutator.py @@ -1,15 +1,14 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Type, Union +import copy +from typing import Dict, Type, Union from mmrazor.models.mutables import OneShotMutableChannelUnit from mmrazor.registry import MODELS -from ..group_mixin import DynamicSampleMixin from .channel_mutator import ChannelMutator, ChannelUnitType @MODELS.register_module() -class OneShotChannelMutator(ChannelMutator[OneShotMutableChannelUnit], - DynamicSampleMixin): +class OneShotChannelMutator(ChannelMutator[OneShotMutableChannelUnit]): """OneShotChannelMutator based on ChannelMutator. It use OneShotMutableChannelUnit by default. @@ -27,3 +26,45 @@ def __init__(self, **kwargs) -> None: super().__init__(channel_unit_cfg, **kwargs) + + @property + def max_choices(self) -> Dict: + """Get max choice for each unit in choice_template.""" + max_choices = copy.deepcopy(self.choice_template) + for key in self.choice_template: + max_choices[key] = self._name2unit[key].max_choice + return max_choices + + @property + def min_choices(self) -> Dict: + """Get min choice for each unit in choice_template.""" + min_choices = copy.deepcopy(self.choice_template) + for key in self.choice_template: + min_choices[key] = self._name2unit[key].min_choice + return min_choices + + def sample_choices(self, kind: str = 'random') -> Dict: + """Sample choice for each unit in choice_template.""" + choices = copy.deepcopy(self.choice_template) + for key in self.choice_template: + if kind == 'max': + choices[key] = self._name2unit[key].max_choice + elif kind == 'min': + choices[key] = self._name2unit[key].min_choice + elif kind == 'random': + choices[key] = self._name2unit[key].sample_choice() + else: + raise NotImplementedError() + return choices + + def set_max_choices(self): + """Set max choice for each unit in choice_template.""" + for name, choice in self.max_choices.items(): + unit = self._name2unit[name] + unit.current_choice = choice + + def set_min_choices(self): + """Set min choice for each unit in choice_template.""" + for name, choice in self.min_choices.items(): + unit = self._name2unit[name] + unit.current_choice = choice diff --git a/mmrazor/models/mutators/channel_mutator/slimmable_channel_mutator.py b/mmrazor/models/mutators/channel_mutator/slimmable_channel_mutator.py index ec726ad85..c3da419bf 100644 --- a/mmrazor/models/mutators/channel_mutator/slimmable_channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/slimmable_channel_mutator.py @@ -29,7 +29,7 @@ def __init__(self, tracer_type='BackwardTracer'), init_cfg: Optional[Dict] = None) -> None: - super().__init__(channel_unit_cfg, parse_cfg, None, init_cfg) + super().__init__(channel_unit_cfg, parse_cfg, init_cfg) self.subnets = self._prepare_subnets(self.units_cfg) @@ -65,7 +65,6 @@ def _prepare_subnets(self, unit_cfg: Dict) -> List[Dict[str, int]]: Returns: List[Dict[str, int]]: config of the subnets. """ - """Prepare subnet config.""" subnets: List[Dict[str, int]] = [] num_subnets = 0 for key in unit_cfg: diff --git a/mmrazor/models/mutators/group_mixin.py b/mmrazor/models/mutators/group_mixin.py index f6b84aea2..569f01ebc 100644 --- a/mmrazor/models/mutators/group_mixin.py +++ b/mmrazor/models/mutators/group_mixin.py @@ -1,22 +1,18 @@ # Copyright (c) OpenMMLab. All rights reserved. -import sys from collections import Counter -from typing import Dict, List, Type +from typing import Dict, List from torch.nn import Module -from ..mutables import BaseMutable - -if sys.version_info < (3, 8): - from typing_extensions import Protocol -else: - from typing import Protocol +from mmrazor.models.mutables import MutableValue +from mmrazor.models.mutables.mutable_module import MutableModule +from .base_mutator import MUTABLE_TYPE class GroupMixin(): """A mixin for :class:`BaseMutator`, which can group mutables by ``custom_group`` and ``alias``(see more information in - :class:`BaseMutable`). Grouping by alias and module name are both + :class:`MUTABLE_TYPE`). Grouping by alias and module name are both supported. Note: @@ -67,26 +63,31 @@ class GroupMixin(): """ + def is_supported_mutable(self, module): + """Judge whether is a supported mutable.""" + for mutable_type in [MutableModule, MutableValue]: + if isinstance(module, mutable_type): + return True + return False + def _build_name_mutable_mapping( - self, supernet: Module, - support_mutables: Type) -> Dict[str, BaseMutable]: + self, supernet: Module) -> Dict[str, MUTABLE_TYPE]: """Mapping module name to mutable.""" - name2mutable: Dict[str, BaseMutable] = dict() + name2mutable: Dict[str, MUTABLE_TYPE] = dict() for name, module in supernet.named_modules(): - if isinstance(module, support_mutables): + if self.is_supported_mutable(module): name2mutable[name] = module elif hasattr(module, 'source_mutables'): - for each_mutables in module.source_mutables: - if isinstance(each_mutables, support_mutables): - name2mutable[name] = each_mutables + for each_mutable in module.source_mutables: + if self.is_supported_mutable(each_mutable): + name2mutable[name] = each_mutable self._name2mutable = name2mutable return name2mutable - def _build_alias_names_mapping( - self, supernet: Module, - support_mutables: Type) -> Dict[str, List[str]]: + def _build_alias_names_mapping(self, + supernet: Module) -> Dict[str, List[str]]: """Mapping alias to module names.""" alias2mutable_names: Dict[str, List[str]] = dict() @@ -97,23 +98,24 @@ def _append(key, dict, name): dict[key].append(name) for name, module in supernet.named_modules(): - if isinstance(module, support_mutables): + if self.is_supported_mutable(module): if module.alias is not None: _append(module.alias, alias2mutable_names, name) elif hasattr(module, 'source_mutables'): - for each_mutables in module.source_mutables: - if isinstance(each_mutables, support_mutables): - if each_mutables.alias is not None: - _append(each_mutables.alias, alias2mutable_names, + for each_mutable in module.source_mutables: + if self.is_supported_mutable(each_mutable): + if each_mutable.alias is not None: + _append(each_mutable.alias, alias2mutable_names, name) return alias2mutable_names - def build_search_groups(self, supernet: Module, support_mutables: Type, - custom_groups: List[List[str]]) -> Dict[int, List]: + def build_search_groups( + self, supernet: Module, + custom_groups: List[List[str]]) -> Dict[str, List[MUTABLE_TYPE]]: """Build search group with ``custom_group`` and ``alias``(see more - information in :class:`BaseMutable`). Grouping by alias and module name - are both supported. + information in :class:`MUTABLE_TYPE`). Grouping by alias and module + name are both supported. Args: supernet (:obj:`torch.nn.Module`): The supernet to be searched @@ -122,12 +124,14 @@ def build_search_groups(self, supernet: Module, support_mutables: Type, custom_group (list, optional): User-defined search groups. All searchable modules that are not in ``custom_group`` will be grouped separately. + + Return: + search_groups (Dict[str, List[MUTABLE_TYPE]]): The built + search_groups. """ - name2mutable: Dict[str, - BaseMutable] = self._build_name_mutable_mapping( - supernet, support_mutables) - alias2mutable_names = self._build_alias_names_mapping( - supernet, support_mutables) + name2mutable: Dict[ + str, MUTABLE_TYPE] = self._build_name_mutable_mapping(supernet) + alias2mutable_names = self._build_alias_names_mapping(supernet) # Check whether the custom group is valid if len(custom_groups) > 0: @@ -135,7 +139,7 @@ def build_search_groups(self, supernet: Module, support_mutables: Type, custom_groups) # Construct search_groups based on user-defined group - search_groups: Dict[int, List[BaseMutable]] = dict() + search_groups: Dict[str, List[MUTABLE_TYPE]] = dict() current_group_nums = 0 grouped_mutable_names: List[str] = list() @@ -155,7 +159,10 @@ def build_search_groups(self, supernet: Module, support_mutables: Type, group_mutables.append(name2mutable[item]) grouped_mutable_names.append(item) - search_groups[current_group_nums] = group_mutables + # TODO: fix prefix when constructing custom groups. + prefix = name2mutable[item].mutable_prefix + group_name = prefix + '_' + str(current_group_nums) + search_groups[group_name] = group_mutables current_group_nums += 1 # Construct search_groups based on alias @@ -169,29 +176,35 @@ def build_search_groups(self, supernet: Module, support_mutables: Type, # If not all mutables are already grouped if not flag_all_grouped: - search_groups[current_group_nums] = [] + prefix = name2mutable[mutable_names[0]].mutable_prefix + group_name = prefix + '_' + str(current_group_nums) + search_groups[group_name] = [] for mutable_name in mutable_names: if mutable_name not in grouped_mutable_names: - search_groups[current_group_nums].append( + search_groups[group_name].append( name2mutable[mutable_name]) grouped_mutable_names.append(mutable_name) current_group_nums += 1 # check whether all the mutable objects are in the search_groups for name, module in supernet.named_modules(): - if isinstance(module, support_mutables): + if self.is_supported_mutable(module): if name in grouped_mutable_names: continue else: - search_groups[current_group_nums] = [module] + prefix = module.mutable_prefix + group_name = prefix + '_' + str(current_group_nums) + search_groups[group_name] = [module] current_group_nums += 1 elif hasattr(module, 'source_mutables'): - for each_mutables in module.source_mutables: - if isinstance(each_mutables, support_mutables): + for each_mutable in module.source_mutables: + if self.is_supported_mutable(each_mutable): if name in grouped_mutable_names: continue else: - search_groups[current_group_nums] = [each_mutables] + prefix = each_mutable.mutable_prefix + group_name = prefix + '_' + str(current_group_nums) + search_groups[group_name] = [each_mutable] current_group_nums += 1 grouped_counter = Counter(grouped_mutable_names) @@ -211,7 +224,7 @@ def build_search_groups(self, supernet: Module, support_mutables: Type, return search_groups def _check_valid_groups(self, alias2mutable_names: Dict[str, List[str]], - name2mutable: Dict[str, BaseMutable], + name2mutable: Dict[str, MUTABLE_TYPE], custom_group: List[List[str]]) -> None: """Check if all keys are legal.""" aliases = [*alias2mutable_names.keys()] @@ -246,66 +259,3 @@ def _check_valid_groups(self, alias2mutable_names: Dict[str, List[str]], f'When a mutable is set alias attribute :{alias_key},' f'the corresponding module name {mutable_name} should ' f'not be used in `custom_group` {custom_group}.') - - -class MutatorProtocol(Protocol): # pragma: no cover - - @property - def mutable_class_type(self) -> Type[BaseMutable]: - ... - - @property - def search_groups(self) -> Dict: - ... - - -class OneShotSampleMixin: - """Sample mixin for one-shot mutators.""" - - def sample_choices(self: MutatorProtocol) -> Dict: - """Sample choices for each group in search_groups.""" - random_choices = dict() - for group_id, modules in self.search_groups.items(): - random_choices[group_id] = modules[0].sample_choice() - - return random_choices - - def set_choices(self: MutatorProtocol, choices: Dict) -> None: - """Set choices for each group in search_groups.""" - for group_id, modules in self.search_groups.items(): - choice = choices[group_id] - for module in modules: - module.current_choice = choice - - -class DynamicSampleMixin(OneShotSampleMixin): - - def sample_choices(self: MutatorProtocol, kind: str = 'random') -> Dict: - """Sample choices for each group in search_groups.""" - random_choices = dict() - for group_id, modules in self.search_groups.items(): - if kind == 'max': - random_choices[group_id] = modules[0].max_choice - elif kind == 'min': - random_choices[group_id] = modules[0].min_choice - else: - random_choices[group_id] = modules[0].sample_choice() - return random_choices - - @property - def max_choice(self: MutatorProtocol) -> Dict: - """Get max choices for each group in search_groups.""" - max_choice = dict() - for group_id, modules in self.search_groups.items(): - max_choice[group_id] = modules[0].max_choice - - return max_choice - - @property - def min_choice(self: MutatorProtocol) -> Dict: - """Get min choices for each group in search_groups.""" - min_choice = dict() - for group_id, modules in self.search_groups.items(): - min_choice[group_id] = modules[0].min_choice - - return min_choice diff --git a/mmrazor/models/mutators/nas_mutator.py b/mmrazor/models/mutators/nas_mutator.py new file mode 100644 index 000000000..4636a899c --- /dev/null +++ b/mmrazor/models/mutators/nas_mutator.py @@ -0,0 +1,260 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings +from typing import Dict, List, Optional + +import torch +import torch.nn as nn +from mmengine.model import ModuleList +from torch.nn import Module + +from mmrazor.models.architectures.dynamic_ops.mixins import DynamicChannelMixin +from mmrazor.models.mutables.mutable_module import MutableModule +from mmrazor.registry import MODELS +from .base_mutator import MUTABLE_TYPE, BaseMutator +from .group_mixin import GroupMixin + + +@MODELS.register_module() +class NasMutator(BaseMutator[MUTABLE_TYPE], GroupMixin): + """The base class for mutable based mutator. + + Args: + custom_groups (list[list[str]], optional): User-defined search groups. + All searchable modules that are not in ``custom_group`` will be + grouped separately. + """ + + def __init__(self, + custom_groups: Optional[List[List[str]]] = None, + init_cfg: Optional[Dict] = None) -> None: + super().__init__(init_cfg) + + if custom_groups is None: + custom_groups = [] + self._custom_groups = custom_groups + self._search_groups: Optional[Dict[str, List[MUTABLE_TYPE]]] = None + + def prepare_from_supernet(self, supernet: Module) -> None: + """Do some necessary preparations with supernet. + + Note: + For mutable based mutator, we need to build search group first. + + Args: + supernet (:obj:`torch.nn.Module`): The supernet to be searched + in your algorithm. + """ + self._search_groups = dict() + + # prepare for channel mutables + if self.has_channel(supernet): + units = self._prepare_from_predefined_model(supernet) + self.mutable_units = [unit for unit in units if unit.is_mutable] + + _channel_groups = dict() + for id, unit in enumerate(ModuleList(self.mutable_units)): + _channel_groups['channel' + '_' + str(id)] = [unit] + self._search_groups.update(_channel_groups) + else: + self.mutable_units = [] + + # prepare for value mutables + _value_groups: Dict[str, List[MUTABLE_TYPE]] = \ + self.build_search_groups(supernet, self._custom_groups) + self._search_groups.update(_value_groups) + + def prepare_arch_params(self): + """This function will build searchable params for each layer, which are + generally used in differentiable search algorithms, such as Darts' + series. + + Each name corresponds to an search param, so the Mutables with the same + name share the same search param. + """ + self._arch_params = nn.ParameterDict() + + for name, mutables in self.search_groups.items(): + if isinstance(mutables[0], MutableModule): + self._arch_params[name] = nn.Parameter( + torch.randn(mutables[0].num_choices) * 1e-3) + + self._modify_supernet_forward() + + def has_channel(self, supernet): + """Whether to build channel space.""" + for module in supernet.modules(): + if isinstance(module, DynamicChannelMixin): + if module.get_mutable_attr('out_channels') or \ + module.get_mutable_attr('in_channels'): + return True + return False + + @property + def search_groups(self) -> Dict[str, List[MUTABLE_TYPE]]: + """Search group of supernet. + + Note: + For mutable based mutator, the search group is composed of + corresponding mutables. + + Raises: + RuntimeError: Called before search group has been built. + + Returns: + Dict[int, List[MUTABLE_TYPE]]: Search group. + """ + if self._search_groups is None: + raise RuntimeError( + 'Call `prepare_from_supernet` first to get the search space.') + return self._search_groups + + @property + def arch_params(self) -> nn.ParameterDict: + """Search params of supernet. + + Note: + For mutable based mutator, the search group is composed of + corresponding mutables. + + Raises: + RuntimeError: Called before search group has been built. + + Returns: + Dict[int, List[MUTABLE_TYPE]]: Search group. + """ + if self._arch_params is None: + raise RuntimeError( + 'Call `prepare_arch_params` first to get the search params.') + return self._arch_params + + def _prepare_from_predefined_model(self, model: Module): + """Initialize units using the model with pre-defined dynamic-ops and + mutable-channels.""" + from mmrazor.models.mutables import OneShotMutableChannelUnit + + self._name2unit: Dict = {} + units = OneShotMutableChannelUnit.init_from_predefined_model(model) + + for unit in units: + unit.current_choice = unit.max_choice + self._name2unit[unit.name] = unit + + return units + + def _modify_supernet_forward(self): + """Modify the DiffMutableModule's default arch_param in forward. + + In MMRazor, the `DiffMutableModule` needs `arch_param` in the forward. + Here we use partial function to assign the corresponding `arch_param` + to each `DiffMutableModule`. + """ + for name, mutables in self.search_groups.items(): + for mutable in mutables: + if isinstance(mutable, MutableModule): + mutable.set_forward_args(arch_param=self.arch_params[name]) + + # choice manage + + def sample_choices(self, kind='random') -> Dict: + """Random sample choices by search space.""" + choices = dict() + for name, mutables in self.search_groups.items(): + if hasattr(self, + 'arch_params') and name in self.arch_params.keys(): + arch_param = self.arch_params[name] + choices[name] = mutables[0].sample_choice(arch_param) + else: + if kind == 'max': + choices[name] = mutables[0].max_choice + elif kind == 'min': + choices[name] = mutables[0].min_choice + elif kind == 'random': + choices[name] = mutables[0].sample_choice() + else: + raise NotImplementedError() + return choices + + def set_choices(self, choices: Dict) -> None: + """Set choices for each mutable in search space.""" + for name, mutables in self.search_groups.items(): + choice = choices[name] + + for mutable in mutables: + mutable.current_choice = choice # type: ignore + + @property + def max_choices(self) -> Dict: + """Get max choices for each mutable in search space.""" + max_choices = dict() + warned = False + for name, mutables in self.search_groups.items(): + if hasattr(self, + 'arch_params') and name in self.arch_params.keys(): + arch_param = self.arch_params[name] + max_choices[name] = mutables[0].sample_choice(arch_param) + if not warned: + warnings.warn('mutables with `arch param` detected. ' + 'which is not supposed to have max choices. ' + 'Sample by arch params instead.') + warned = True + else: + max_choices[name] = mutables[0].max_choice + + return max_choices + + @property + def min_choices(self) -> Dict: + """Get min choices for each mutable in search space.""" + min_choices = dict() + warned = False + for name, mutables in self.search_groups.items(): + if hasattr(self, + 'arch_params') and name in self.arch_params.keys(): + arch_param = self.arch_params[name] + min_choices[name] = mutables[0].sample_choice(arch_param) + if not warned: + warnings.warn('mutables with `arch param` detected. ' + 'which is not supposed to have min choices. ' + 'Sample by arch params instead.') + warned = True + else: + min_choices[name] = mutables[0].min_choice + + return min_choices + + @property + def current_choices(self) -> Dict: + """Get current choices by search space.""" + current_choices = dict() + for name, mutables in self.search_groups.items(): + current_choices[name] = mutables[0].current_choice + + return current_choices + + def set_max_choices(self): + """Set max choices for each mutable in search space.""" + warned = False + for name, mutables in self.search_groups.items(): + choice = self.max_choices[name] + if hasattr(self, + 'arch_params') and name in self.arch_params.keys(): + if not warned: + warnings.warn('mutables with `arch param` detected. ' + '`set_max_choices` is not available for it.') + warned = True + for mutable in mutables: + mutable.current_choice = choice + + def set_min_choices(self): + """Set min choices for each mutable in search space.""" + warned = False + for name, mutables in self.search_groups.items(): + choice = self.min_choices[name] + if hasattr(self, + 'arch_params') and name in self.arch_params.keys(): + if not warned: + warnings.warn('mutables with `arch param` detected. ' + '`set_max_choices` is not available for it.') + warned = True + for mutable in mutables: + mutable.current_choice = choice diff --git a/mmrazor/structures/subnet/__init__.py b/mmrazor/structures/subnet/__init__.py index fa3c9fae8..af69cc96e 100644 --- a/mmrazor/structures/subnet/__init__.py +++ b/mmrazor/structures/subnet/__init__.py @@ -1,5 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. from .candidate import Candidates -from .fix_subnet import export_fix_subnet, load_fix_subnet +from .fix_subnet import convert_fix_subnet, export_fix_subnet, load_fix_subnet -__all__ = ['load_fix_subnet', 'export_fix_subnet', 'Candidates'] +__all__ = [ + 'load_fix_subnet', 'export_fix_subnet', 'convert_fix_subnet', 'Candidates' +] diff --git a/mmrazor/structures/subnet/fix_subnet.py b/mmrazor/structures/subnet/fix_subnet.py index 311dc8936..803d31b6a 100644 --- a/mmrazor/structures/subnet/fix_subnet.py +++ b/mmrazor/structures/subnet/fix_subnet.py @@ -3,7 +3,6 @@ from typing import Dict, Optional, Tuple from mmengine import fileio -from mmengine.logging import print_log from torch import nn from mmrazor.registry import MODELS @@ -30,7 +29,7 @@ def traverse_children(module: nn.Module) -> None: def load_fix_subnet(model: nn.Module, - fix_mutable: ValidFixMutable, + subnet_dict: ValidFixMutable, load_subnet_mode: str = 'mutable', prefix: str = '', extra_prefix: str = '') -> None: @@ -38,20 +37,20 @@ def load_fix_subnet(model: nn.Module, if prefix and extra_prefix: raise RuntimeError('`prefix` and `extra_prefix` can not be set at the ' f'same time, but got {prefix} vs {extra_prefix}') - if isinstance(fix_mutable, str): - fix_mutable = fileio.load(fix_mutable) - if not isinstance(fix_mutable, dict): - raise TypeError('fix_mutable should be a `str` or `dict`' - f'but got {type(fix_mutable)}') + if isinstance(subnet_dict, str): + subnet_dict = fileio.load(subnet_dict) + if not isinstance(subnet_dict, dict): + raise TypeError('subnet_dict should be a `str` or `dict`' + f'but got {type(subnet_dict)}') from mmrazor.models.architectures.dynamic_ops import DynamicMixin if isinstance(model, DynamicMixin): raise RuntimeError('Root model can not be dynamic op.') if load_subnet_mode == 'mutable': - _load_fix_subnet_by_mutable(model, fix_mutable, prefix, extra_prefix) + _load_fix_subnet_by_mutable(model, subnet_dict, prefix, extra_prefix) elif load_subnet_mode == 'mutator': - _load_fix_subnet_by_mutator(model, fix_mutable) + _load_fix_subnet_by_mutator(model, subnet_dict) else: raise ValueError(f'Invalid load_subnet_mode {load_subnet_mode}, ' 'only mutable or mutator is supported.') @@ -61,7 +60,7 @@ def load_fix_subnet(model: nn.Module, def _load_fix_subnet_by_mutable(model: nn.Module, - fix_mutable: Dict, + subnet_dict: Dict, prefix: str = '', extra_prefix: str = '') -> None: # Avoid circular import @@ -72,11 +71,11 @@ def load_fix_module(module): """Load fix module.""" if getattr(module, 'alias', None): alias = module.alias - assert alias in fix_mutable, \ + assert alias in subnet_dict, \ f'The alias {alias} is not in fix_modules, ' \ - 'please check your `fix_mutable`.' + 'please check your `subnet_dict`.' # {chosen=xx, meta=xx) - chosen = fix_mutable.get(alias, None) + chosen = subnet_dict.get(alias, None) else: if prefix: mutable_name = name.lstrip(prefix) @@ -84,13 +83,13 @@ def load_fix_module(module): mutable_name = extra_prefix + name else: mutable_name = name - if mutable_name not in fix_mutable and not isinstance( + if mutable_name not in subnet_dict and not isinstance( module, MutableChannelContainer): raise RuntimeError( f'The module name {mutable_name} is not in ' - 'fix_mutable, please check your `fix_mutable`.') + 'subnet_dict, please check your `subnet_dict`.') # {chosen=xx, meta=xx) - chosen = fix_mutable.get(mutable_name, None) + chosen = subnet_dict.get(mutable_name, None) if not isinstance(chosen, DumpChosen): chosen = DumpChosen(**chosen) @@ -120,7 +119,6 @@ def _load_fix_subnet_by_mutator(model: nn.Module, mutator_cfg: Dict) -> None: mutator_cfg['parse_cfg'] = {'type': 'Config'} mutator = MODELS.build(mutator_cfg) mutator.prepare_from_supernet(model) - mutator.set_choices(mutator.current_choices) def export_fix_subnet( @@ -142,25 +140,29 @@ def export_fix_subnet( static_model (Optional[Dict]): Exported static model state_dict. Valid when `slice_weight`=True. """ - - static_model = copy.deepcopy(model) - fix_subnet = dict() if export_subnet_mode == 'mutable': - fix_subnet = _export_subnet_by_mutable(static_model) + fix_subnet = _export_subnet_by_mutable(model) elif export_subnet_mode == 'mutator': - fix_subnet = _export_subnet_by_mutator(static_model) + fix_subnet = _export_subnet_by_mutator(model) else: raise ValueError(f'Invalid export_subnet_mode {export_subnet_mode}, ' 'only mutable or mutator is supported.') if slice_weight: # export subnet ckpt - print_log('Exporting fixed subnet weight') - _dynamic_to_static(static_model) - if next(static_model.parameters()).is_cuda: - static_model.cuda() - return fix_subnet, static_model + from mmrazor.models.mutators import ChannelMutator + + copied_model = copy.deepcopy(model) + if hasattr(model, 'mutator') and \ + isinstance(model.mutator, ChannelMutator): + _dynamic_to_static(copied_model) + else: + load_fix_subnet(copied_model, fix_subnet) + + if next(copied_model.parameters()).is_cuda: + copied_model.cuda() + return fix_subnet, copied_model else: return fix_subnet, None @@ -198,3 +200,15 @@ def _export_subnet_by_mutator(model: nn.Module) -> Dict: with_channels=False, with_unit_init_args=True) return fix_subnet + + +def convert_fix_subnet(fix_subnet: Dict[str, DumpChosen]): + """Convert the fixed subnet to avoid python typing error.""" + from mmrazor.utils.typing import DumpChosen + + converted_fix_subnet = dict() + for k, v in fix_subnet.items(): + assert isinstance(v, DumpChosen) + converted_fix_subnet[k] = dict(chosen=v.chosen) + + return converted_fix_subnet diff --git a/tests/data/models.py b/tests/data/models.py index 78ff2982f..220130b56 100644 --- a/tests/data/models.py +++ b/tests/data/models.py @@ -859,6 +859,7 @@ def __init__( self, conv_cfg: Dict = dict(type='mmrazor.BigNasConv2d'), norm_cfg: Dict = dict(type='mmrazor.DynamicBatchNorm2d'), + fine_grained_mode: bool = False, ) -> None: super().__init__() @@ -875,6 +876,8 @@ def __init__( parse_values(self.arch_setting['num_out_channels']) assert len(self.kernel_size_list) == len(self.num_blocks_list) == \ len(self.expand_ratio_list) == len(self.num_channels_list) + + self.fine_grained_mode = fine_grained_mode self.with_attentive_shortcut = True self.in_channels = 24 @@ -997,17 +1000,28 @@ def register_mutables(self): candidate_choices=out_channels, num_channels=max(out_channels)) - mutable_kernel_size = OneShotMutableValue( - alias=prefix + 'kernel_size', value_list=kernel_sizes) + if not self.fine_grained_mode: + mutable_kernel_size = OneShotMutableValue( + alias=prefix + 'kernel_size', value_list=kernel_sizes) - mutable_expand_ratio = OneShotMutableValue( - alias=prefix + 'expand_ratio', value_list=expand_ratios) + mutable_expand_ratio = OneShotMutableValue( + alias=prefix + 'expand_ratio', value_list=expand_ratios) mutable_depth = OneShotMutableValue( alias=prefix + 'depth', value_list=num_blocks) layer.register_mutable_attr('depth', mutable_depth) for k in range(max(self.num_blocks_list[i])): + + if self.fine_grained_mode: + mutable_kernel_size = OneShotMutableValue( + alias=prefix + str(k) + '.kernel_size', + value_list=kernel_sizes) + + mutable_expand_ratio = OneShotMutableValue( + alias=prefix + str(k) + '.expand_ratio', + value_list=expand_ratios) + mutate_mobilenet_layer(layer[k], mid_mutable, mutable_out_channels, mutable_expand_ratio, diff --git a/tests/test_models/test_algorithms/test_autoformer.py b/tests/test_models/test_algorithms/test_autoformer.py index c60ba5abe..edcafd8f5 100644 --- a/tests/test_models/test_algorithms/test_autoformer.py +++ b/tests/test_models/test_algorithms/test_autoformer.py @@ -4,7 +4,7 @@ import torch -from mmrazor.models import Autoformer +from mmrazor.models import Autoformer, NasMutator from mmrazor.registry import MODELS arch_setting = dict( @@ -13,17 +13,7 @@ depth=[14, 15, 16], embed_dims=[528, 576, 624]) -MUTATOR_CFG = dict( - channel_mutator=dict( - type='mmrazor.OneShotChannelMutator', - channel_unit_cfg={ - 'type': 'OneShotMutableChannelUnit', - 'default_args': { - 'unit_predefined': True - } - }, - parse_cfg={'type': 'Predefined'}), - value_mutator=dict(type='mmrazor.DynamicValueMutator')) +MUTATOR_CFG = dict(type='NasMutator') ARCHITECTURE_CFG = dict( _scope_='mmrazor', @@ -50,23 +40,21 @@ ALGORITHM_CFG = dict( type='mmrazor.Autoformer', architecture=ARCHITECTURE_CFG, - fix_subnet=None, - mutators=MUTATOR_CFG) + mutator=MUTATOR_CFG, + fix_subnet=None) -class TestAUTOFORMER(TestCase): +class TestAutoFormer(TestCase): def test_init(self): ALGORITHM_CFG_SUPERNET = copy.deepcopy(ALGORITHM_CFG) # initiate autoformer with built `algorithm`. autoformer_algo = MODELS.build(ALGORITHM_CFG_SUPERNET) self.assertIsInstance(autoformer_algo, Autoformer) - # autoformer mutators include channel_mutator and value_mutator - assert 'channel_mutator' in autoformer_algo.mutators - assert 'value_mutator' in autoformer_algo.mutators + self.assertIsInstance(autoformer_algo.mutator, NasMutator) # autoformer search_groups - random_subnet = autoformer_algo.sample_subnet() + random_subnet = autoformer_algo.mutator.sample_choices() self.assertIsInstance(random_subnet, dict) # autoformer_algo support training @@ -74,29 +62,11 @@ def test_init(self): # initiate autoformer without any `mutator`. ALGORITHM_CFG_SUPERNET.pop('type') - ALGORITHM_CFG_SUPERNET['mutators'] = None + ALGORITHM_CFG_SUPERNET['mutator'] = None + none_type = type(ALGORITHM_CFG_SUPERNET['mutator']) with self.assertRaisesRegex( - AssertionError, - 'mutator cannot be None when fix_subnet is None.'): - _ = Autoformer(**ALGORITHM_CFG_SUPERNET) - - # initiate autoformer with error type `mutator`. - backwardtracer_cfg = dict( - type='OneShotChannelMutator', - channel_unit_cfg=dict( - type='OneShotMutableChannelUnit', - default_args=dict( - candidate_choices=list(i / 12 for i in range(2, 13)), - choice_mode='ratio')), - parse_cfg=dict( - type='ChannelAnalyzer', - demo_input=(1, 3, 224, 224), - tracer_type='BackwardTracer')) - ALGORITHM_CFG_SUPERNET['mutators'] = dict( - channel_mutator=backwardtracer_cfg, - value_mutator=dict(type='mmrazor.DynamicValueMutator')) - with self.assertRaisesRegex(AssertionError, - 'autoformer only support predefined.'): + TypeError, 'mutator should be a `dict` or `NasMutator` ' + f'instance, but got {none_type}.'): _ = Autoformer(**ALGORITHM_CFG_SUPERNET) def test_loss(self): diff --git a/tests/test_models/test_algorithms/test_autoslim.py b/tests/test_models/test_algorithms/test_autoslim.py index bfa12217b..fed6ca5e0 100644 --- a/tests/test_models/test_algorithms/test_autoslim.py +++ b/tests/test_models/test_algorithms/test_autoslim.py @@ -92,6 +92,30 @@ def forward( class TestAutoSlim(TestCase): device: str = 'cpu' + def _prepare_fake_data(self) -> Dict: + imgs = torch.randn(16, 3, 224, 224).to(self.device) + data_samples = [ + ClsDataSample().set_gt_label(torch.randint(0, 1000, + (16, ))).to(self.device) + ] + + return {'inputs': imgs, 'data_samples': data_samples} + + def prepare_model(self, + mutator_cfg: MUTATOR_TYPE = MUTATOR_CFG, + distiller_cfg: DISTILLER_TYPE = DISTILLER_CFG, + architecture_cfg: Dict = ARCHITECTURE_CFG, + num_random_samples: int = 2) -> AutoSlim: + model = AutoSlim( + mutator=mutator_cfg, + distiller=distiller_cfg, + architecture=architecture_cfg, + data_preprocessor=ToyDataPreprocessor(), + num_random_samples=num_random_samples) + model.to(self.device) + + return model + def test_init(self) -> None: mutator_wrong_type = FakeMutator() with pytest.raises(Exception): @@ -129,30 +153,6 @@ def test_autoslim_train_step(self) -> None: losses = algo.train_step(data, optim_wrapper) assert algo._optim_wrapper_count_status_reinitialized - def _prepare_fake_data(self) -> Dict: - imgs = torch.randn(16, 3, 224, 224).to(self.device) - data_samples = [ - ClsDataSample().set_gt_label(torch.randint(0, 1000, - (16, ))).to(self.device) - ] - - return {'inputs': imgs, 'data_samples': data_samples} - - def prepare_model(self, - mutator_cfg: MUTATOR_TYPE = MUTATOR_CFG, - distiller_cfg: DISTILLER_TYPE = DISTILLER_CFG, - architecture_cfg: Dict = ARCHITECTURE_CFG, - num_random_samples: int = 2) -> AutoSlim: - model = AutoSlim( - mutator=mutator_cfg, - distiller=distiller_cfg, - architecture=architecture_cfg, - data_preprocessor=ToyDataPreprocessor(), - num_random_samples=num_random_samples) - model.to(self.device) - - return model - class TestAutoSlimDDP(TestAutoSlim): diff --git a/tests/test_models/test_algorithms/test_bignas.py b/tests/test_models/test_algorithms/test_bignas.py index a372ce6cf..5a844fc7a 100644 --- a/tests/test_models/test_algorithms/test_bignas.py +++ b/tests/test_models/test_algorithms/test_bignas.py @@ -4,7 +4,7 @@ import torch -from mmrazor.models import BigNAS +from mmrazor.models import BigNAS, NasMutator from mmrazor.registry import MODELS arch_setting = dict( @@ -36,17 +36,7 @@ [72, 72, 8], # last layer ]) -MUTATOR_CFG = dict( - channel_mutator=dict( - type='mmrazor.OneShotChannelMutator', - channel_unit_cfg={ - 'type': 'OneShotMutableChannelUnit', - 'default_args': { - 'unit_predefined': True - } - }, - parse_cfg={'type': 'Predefined'}), - value_mutator=dict(type='mmrazor.DynamicValueMutator')) +MUTATOR_CFG = dict(type='NasMutator') DISTILLER_CFG = dict( _scope_='mmrazor', @@ -87,7 +77,7 @@ ALGORITHM_CFG = dict( type='mmrazor.BigNAS', architecture=ARCHITECTURE_CFG, - mutators=MUTATOR_CFG, + mutator=MUTATOR_CFG, distiller=DISTILLER_CFG) @@ -98,12 +88,10 @@ def test_init(self): # initiate bignas with built `algorithm`. bignas_algo = MODELS.build(ALGORITHM_CFG_SUPERNET) self.assertIsInstance(bignas_algo, BigNAS) - # bignas mutators include channel_mutator and value_mutator - assert 'channel_mutator' in bignas_algo.mutators - assert 'value_mutator' in bignas_algo.mutators + self.assertIsInstance(bignas_algo.mutator, NasMutator) # bignas search_groups - random_subnet = bignas_algo.sample_subnet() + random_subnet = bignas_algo.mutator.sample_choices() self.assertIsInstance(random_subnet, dict) # bignas_algo support training @@ -111,28 +99,11 @@ def test_init(self): # initiate bignas without any `mutator`. ALGORITHM_CFG_SUPERNET.pop('type') - ALGORITHM_CFG_SUPERNET['mutators'] = None - none_type = type(ALGORITHM_CFG_SUPERNET['mutators']) + ALGORITHM_CFG_SUPERNET['mutator'] = None + none_type = type(ALGORITHM_CFG_SUPERNET['mutator']) with self.assertRaisesRegex( - TypeError, f'mutator should be a `dict` but got {none_type}'): - _ = BigNAS(**ALGORITHM_CFG_SUPERNET) - - # initiate bignas with error type `mutator`. - backwardtracer_cfg = dict( - type='OneShotChannelMutator', - channel_unit_cfg=dict( - type='OneShotMutableChannelUnit', - default_args=dict( - candidate_choices=list(i / 12 for i in range(2, 13)), - choice_mode='ratio')), - parse_cfg=dict( - type='BackwardTracer', - loss_calculator=dict(type='ImageClassifierPseudoLoss'))) - ALGORITHM_CFG_SUPERNET['mutators'] = dict( - channel_mutator=backwardtracer_cfg, - value_mutator=dict(type='mmrazor.DynamicValueMutator')) - with self.assertRaisesRegex(AssertionError, - 'BigNAS only support predefined.'): + TypeError, 'mutator should be a `dict` or `NasMutator` ' + f'instance, but got {none_type}.'): _ = BigNAS(**ALGORITHM_CFG_SUPERNET) def test_loss(self): diff --git a/tests/test_models/test_algorithms/test_darts.py b/tests/test_models/test_algorithms/test_darts.py index 52f5d10e6..efca993b7 100644 --- a/tests/test_models/test_algorithms/test_darts.py +++ b/tests/test_models/test_algorithms/test_darts.py @@ -14,7 +14,7 @@ from torch import Tensor from torch.optim import SGD -from mmrazor.models import Darts, DiffModuleMutator, DiffMutableOP +from mmrazor.models import Darts, DiffMutableOP, NasMutator from mmrazor.models.algorithms.nas.darts import DartsDDP from mmrazor.registry import MODELS @@ -87,21 +87,21 @@ def setUp(self) -> None: def test_init(self) -> None: # initiate darts when `norm_training` is True. model = ToyDiffModule2() - mutator = DiffModuleMutator() + mutator = NasMutator() algo = Darts(architecture=model, mutator=mutator, norm_training=True) algo.eval() self.assertTrue(model.bn.training) # initiate darts with built mutator model = ToyDiffModule2() - mutator = DiffModuleMutator() + mutator = NasMutator() algo = Darts(model, mutator) self.assertIs(algo.mutator, mutator) # initiate darts with unbuilt mutator - mutator = dict(type='DiffModuleMutator') + mutator = dict(type='NasMutator') algo = Darts(model, mutator) - self.assertIsInstance(algo.mutator, DiffModuleMutator) + self.assertIsInstance(algo.mutator, NasMutator) # initiate darts when `fix_subnet` is not None fix_subnet = { @@ -121,8 +121,10 @@ def test_forward_loss(self) -> None: model = ToyDiffModule2() # supernet - mutator = DiffModuleMutator() + mutator = NasMutator() mutator.prepare_from_supernet(model) + mutator.prepare_arch_params() + algo = Darts(model, mutator) loss = algo(inputs, mode='loss') self.assertIsInstance(loss, dict) @@ -149,16 +151,20 @@ def _prepare_fake_data(self) -> Dict: def test_search_subnet(self) -> None: model = ToyDiffModule2() - mutator = DiffModuleMutator() + mutator = NasMutator() mutator.prepare_from_supernet(model) + mutator.prepare_arch_params() + algo = Darts(model, mutator) - subnet = algo.search_subnet() + subnet = algo.mutator.sample_choices() self.assertIsInstance(subnet, dict) def test_darts_train_step(self) -> None: model = ToyDiffModule2() - mutator = DiffModuleMutator() + + mutator = NasMutator() mutator.prepare_from_supernet(model) + mutator.prepare_arch_params() # data is tensor algo = Darts(model, mutator) @@ -180,8 +186,10 @@ def test_darts_train_step(self) -> None: def test_darts_with_unroll(self) -> None: model = ToyDiffModule2() - mutator = DiffModuleMutator() + + mutator = NasMutator() mutator.prepare_from_supernet(model) + mutator.prepare_arch_params() # data is tuple or list algo = Darts(model, mutator, unroll=True) @@ -209,8 +217,10 @@ def prepare_model(self, unroll=False, device_ids=None) -> Darts: self.device = 'cuda' if torch.cuda.is_available() else 'cpu' model = ToyDiffModule2() - mutator = DiffModuleMutator() + + mutator = NasMutator() mutator.prepare_from_supernet(model) + mutator.prepare_arch_params() algo = Darts(model, mutator, unroll=unroll).to(self.device) diff --git a/tests/test_models/test_algorithms/test_dcff_network.py b/tests/test_models/test_algorithms/test_dcff_network.py index 9d369f3f2..657d7a09b 100644 --- a/tests/test_models/test_algorithms/test_dcff_network.py +++ b/tests/test_models/test_algorithms/test_dcff_network.py @@ -147,11 +147,11 @@ def test_iterative_prune_int(self): algorithm.step_freq) current_choices = algorithm.mutator.current_choices - group_prune_target = algorithm.group_target_pruning_ratio( - prune_target, mutator.search_groups) + target_pruning_ratio = algorithm.set_target_pruning_ratio( + prune_target, mutator.mutable_units) for key in current_choices: self.assertAlmostEqual( - current_choices[key], group_prune_target[key], delta=0.1) + current_choices[key], target_pruning_ratio[key], delta=0.1) def test_load_pretrained(self): iter_per_epoch = 10 @@ -190,13 +190,6 @@ def test_group_target_ratio(self): mutator.set_choices(mutator.sample_choices()) prune_target = mutator.choice_template - custom_groups = [[ - 'backbone.layer1.0.conv1_(0, 64)_64', - 'backbone.layer1.1.conv1_(0, 64)_64' - ]] - mutator_cfg = copy.deepcopy(MUTATOR_CONFIG_FLOAT) - mutator_cfg['custom_groups'] = custom_groups - iter_per_epoch = 10 epoch_step = 2 epoch = 6 @@ -208,7 +201,7 @@ def test_group_target_ratio(self): algorithm = DCFF( MODEL_CFG, target_pruning_ratio=prune_target, - mutator_cfg=mutator_cfg, + mutator_cfg=MUTATOR_CONFIG_FLOAT, step_freq=epoch_step).to(DEVICE) algorithm.init_weights() @@ -216,23 +209,6 @@ def test_group_target_ratio(self): algorithm.forward(data['inputs'], data['data_samples'], mode='loss') self.assertEqual(algorithm.step_freq, epoch_step * iter_per_epoch) - prune_target['backbone.layer1.0.conv1_(0, 64)_64'] = 0.1 - prune_target['backbone.layer1.1.conv1_(0, 64)_64'] = 0.2 - - with self.assertRaises(ValueError): - - algorithm = DCFF( - MODEL_CFG, - target_pruning_ratio=prune_target, - mutator_cfg=mutator_cfg, - step_freq=epoch_step).to(DEVICE) - - algorithm.init_weights() - self._set_epoch_ite(1, 2, epoch) - algorithm.forward( - data['inputs'], data['data_samples'], mode='loss') - self.assertEqual(algorithm.step_freq, epoch_step * iter_per_epoch) - def test_export_subnet(self): model = MODELS.build(MODEL_CFG) @@ -240,13 +216,6 @@ def test_export_subnet(self): mutator.prepare_from_supernet(model) mutator.set_choices(mutator.sample_choices()) - custom_groups = [[ - 'backbone.layer1.0.conv1_(0, 64)_64', - 'backbone.layer1.1.conv1_(0, 64)_64' - ]] - mutator_cfg = copy.deepcopy(MUTATOR_CONFIG_FLOAT) - mutator_cfg['custom_groups'] = custom_groups - iter_per_epoch = 10 epoch_step = 2 epoch = 6 @@ -303,7 +272,7 @@ def test_export_subnet(self): algorithm = DCFF( MODEL_CFG, target_pruning_ratio=target_pruning_ratio, - mutator_cfg=mutator_cfg, + mutator_cfg=MUTATOR_CONFIG_FLOAT, step_freq=epoch_step).to(DEVICE) algorithm.init_weights() diff --git a/tests/test_models/test_algorithms/test_dsnas.py b/tests/test_models/test_algorithms/test_dsnas.py index 2b5bbfa49..423d27f42 100644 --- a/tests/test_models/test_algorithms/test_dsnas.py +++ b/tests/test_models/test_algorithms/test_dsnas.py @@ -14,7 +14,7 @@ from torch import Tensor from torch.optim import SGD -from mmrazor.models import DSNAS, DiffModuleMutator, OneHotMutableOP +from mmrazor.models import DSNAS, NasMutator, OneHotMutableOP from mmrazor.models.algorithms.nas.dsnas import DSNASDDP from mmrazor.registry import MODELS @@ -80,21 +80,21 @@ def setUp(self) -> None: def test_init(self) -> None: # initiate dsnas when `norm_training` is True. model = ToyDiffModule() - mutator = DiffModuleMutator() + mutator = NasMutator() algo = DSNAS(architecture=model, mutator=mutator, norm_training=True) algo.eval() self.assertTrue(model.bn.training) # initiate Dsnas with built mutator model = ToyDiffModule() - mutator = DiffModuleMutator() + mutator = NasMutator() algo = DSNAS(model, mutator) self.assertIs(algo.mutator, mutator) # initiate Dsnas with unbuilt mutator - mutator = dict(type='DiffModuleMutator') + mutator = dict(type='NasMutator') algo = DSNAS(model, mutator) - self.assertIsInstance(algo.mutator, DiffModuleMutator) + self.assertIsInstance(algo.mutator, NasMutator) # initiate Dsnas when `fix_subnet` is not None fix_subnet = {'mutable': {'chosen': 'torch_conv2d_5x5'}} @@ -110,7 +110,7 @@ def test_forward_loss(self) -> None: model = ToyDiffModule() # supernet - mutator = DiffModuleMutator() + mutator = NasMutator() mutator.prepare_from_supernet(model) algo = DSNAS(model, mutator) loss = algo(inputs, mode='loss') @@ -133,16 +133,16 @@ def _prepare_fake_data(self): def test_search_subnet(self) -> None: model = ToyDiffModule() - mutator = DiffModuleMutator() + mutator = NasMutator() mutator.prepare_from_supernet(model) algo = DSNAS(model, mutator) - subnet = algo.search_subnet() + subnet = algo.mutator.sample_choices() self.assertIsInstance(subnet, dict) @patch('mmengine.logging.message_hub.MessageHub.get_info') def test_dsnas_train_step(self, mock_get_info) -> None: model = ToyDiffModule() - mutator = DiffModuleMutator() + mutator = NasMutator() mutator.prepare_from_supernet(model) mock_get_info.return_value = 2 @@ -177,7 +177,7 @@ def prepare_model(self, device_ids=None) -> DSNAS: self.device = 'cuda' if torch.cuda.is_available() else 'cpu' model = ToyDiffModule() - mutator = DiffModuleMutator() + mutator = NasMutator() mutator.prepare_from_supernet(model) algo = DSNAS(model, mutator).to(self.device) diff --git a/tests/test_models/test_algorithms/test_prune_algorithm.py b/tests/test_models/test_algorithms/test_prune_algorithm.py index 536da67fd..00d615815 100644 --- a/tests/test_models/test_algorithms/test_prune_algorithm.py +++ b/tests/test_models/test_algorithms/test_prune_algorithm.py @@ -119,7 +119,6 @@ def test_iterative_prune_int(self): model = MODELS.build(MODEL_CFG) mutator = MODELS.build(MUTATOR_CONFIG_FLOAT) mutator.prepare_from_supernet(model) - mutator.set_choices(mutator.sample_choices()) prune_target = mutator.choice_template iter_per_epoch = 10 @@ -145,11 +144,11 @@ def test_iterative_prune_int(self): algorithm.step_freq) current_choices = algorithm.mutator.current_choices - group_prune_target = algorithm.group_target_pruning_ratio( - prune_target, mutator.search_groups) + target_pruning_ratio = algorithm.set_target_pruning_ratio( + prune_target, mutator.mutable_units) for key in current_choices: self.assertAlmostEqual( - current_choices[key], group_prune_target[key], delta=0.1) + current_choices[key], target_pruning_ratio[key], delta=0.1) def test_load_pretrained(self): iter_per_epoch = 10 @@ -191,13 +190,6 @@ def test_group_target_ratio(self): mutator.set_choices(mutator.sample_choices()) prune_target = mutator.choice_template - custom_groups = [[ - 'backbone.layer1.0.conv1_(0, 64)_64', - 'backbone.layer1.1.conv1_(0, 64)_64' - ]] - mutator_cfg = copy.deepcopy(MUTATOR_CONFIG_FLOAT) - mutator_cfg['custom_groups'] = custom_groups - iter_per_epoch = 10 epoch_step = 2 time = 2 @@ -210,7 +202,7 @@ def test_group_target_ratio(self): algorithm = ItePruneAlgorithm( MODEL_CFG, target_pruning_ratio=prune_target, - mutator_cfg=mutator_cfg, + mutator_cfg=MUTATOR_CONFIG_FLOAT, step_freq=epoch_step, prune_times=time).to(DEVICE) @@ -219,24 +211,6 @@ def test_group_target_ratio(self): algorithm.forward(data['inputs'], data['data_samples'], mode='loss') self.assertEqual(algorithm.step_freq, epoch_step * iter_per_epoch) - prune_target['backbone.layer1.0.conv1_(0, 64)_64'] = 0.1 - prune_target['backbone.layer1.1.conv1_(0, 64)_64'] = 0.2 - - with self.assertRaises(ValueError): - - algorithm = ItePruneAlgorithm( - MODEL_CFG, - target_pruning_ratio=prune_target, - mutator_cfg=mutator_cfg, - step_freq=epoch_step, - prune_times=time).to(DEVICE) - - algorithm.init_weights() - self._set_epoch_ite(1, 2, epoch) - algorithm.forward( - data['inputs'], data['data_samples'], mode='loss') - self.assertEqual(algorithm.step_freq, epoch_step * iter_per_epoch) - def test_dist_init(self): if DEVICE != torch.device('cuda:0'): self.skipTest('not use cuda') diff --git a/tests/test_models/test_algorithms/test_slimmable_network.py b/tests/test_models/test_algorithms/test_slimmable_network.py index 3015576f8..3f6fdba57 100644 --- a/tests/test_models/test_algorithms/test_slimmable_network.py +++ b/tests/test_models/test_algorithms/test_slimmable_network.py @@ -62,16 +62,16 @@ def test_init(self) -> None: mutator_wrong_type = FakeMutator() with pytest.raises(AttributeError): - _ = self.prepare_model(mutator_wrong_type, MODEL_CFG) + _ = self.prepare_model(MODEL_CFG, mutator_wrong_type) # assert has prunable units - algo = SlimmableNetwork(MUTATOR_CFG, MODEL_CFG) + algo = SlimmableNetwork(MODEL_CFG, MUTATOR_CFG) self.assertGreater(len(algo.mutator.mutable_units), 0) # assert can generate config template mutator_cfg = copy.deepcopy(MUTATOR_CFG) mutator_cfg['channel_unit_cfg']['units'] = {} - algo = SlimmableNetwork(mutator_cfg, MODEL_CFG) + algo = SlimmableNetwork(MODEL_CFG, mutator_cfg) try: algo.mutator.config_template() except Exception: @@ -79,11 +79,11 @@ def test_init(self) -> None: def test_is_deployed(self) -> None: slimmable_should_not_deployed = \ - SlimmableNetwork(MUTATOR_CFG, MODEL_CFG) + SlimmableNetwork(MODEL_CFG, MUTATOR_CFG) assert not slimmable_should_not_deployed.is_deployed slimmable_should_deployed = \ - SlimmableNetwork(MUTATOR_CFG, MODEL_CFG, deploy_index=0) + SlimmableNetwork(MODEL_CFG, MUTATOR_CFG, deploy_index=0) assert slimmable_should_deployed.is_deployed def test_slimmable_train_step(self) -> None: @@ -129,20 +129,18 @@ def _prepare_fake_data(self) -> Dict: return {'inputs': imgs, 'data_samples': data_samples} def prepare_slimmable_model(self) -> SlimmableNetwork: - return self.prepare_model(MUTATOR_CFG, MODEL_CFG) + return self.prepare_model(MODEL_CFG, MUTATOR_CFG) def prepare_fixed_model(self) -> SlimmableNetwork: - - return self.prepare_model(MUTATOR_CFG, MODEL_CFG, deploy=0) + return self.prepare_model(MODEL_CFG, MUTATOR_CFG, deploy=0) def prepare_model(self, - mutator_cfg: Dict, model_cfg: Dict, + mutator_cfg: Dict, deploy=-1) -> SlimmableNetwork: - model = SlimmableNetwork(mutator_cfg, model_cfg, deploy, + model = SlimmableNetwork(model_cfg, mutator_cfg, deploy, ToyDataPreprocessor()) model.to(self.device) - return model @@ -162,10 +160,10 @@ def setUpClass(cls) -> None: dist.init_process_group(backend, rank=0, world_size=1) def prepare_model(self, - mutator_cfg: Dict, model_cfg: Dict, + mutator_cfg: Dict, deploy=-1) -> SlimmableNetwork: - model = super().prepare_model(mutator_cfg, model_cfg, deploy) + model = super().prepare_model(model_cfg, mutator_cfg, deploy) return SlimmableNetworkDDP(module=model, find_unused_parameters=True) def test_is_deployed(self) -> None: diff --git a/tests/test_models/test_algorithms/test_spos.py b/tests/test_models/test_algorithms/test_spos.py index f73521111..1a223b04a 100644 --- a/tests/test_models/test_algorithms/test_spos.py +++ b/tests/test_models/test_algorithms/test_spos.py @@ -5,9 +5,11 @@ import torch.nn as nn from mmengine.model import BaseModel -from mmrazor.models import SPOS, OneShotModuleMutator, OneShotMutableOP +from mmrazor.models import SPOS, NasMutator, OneShotMutableOP from mmrazor.registry import MODELS +MUTATOR_CFG = dict(type='NasMutator') + @MODELS.register_module() class ToySearchableModel(BaseModel): @@ -39,21 +41,21 @@ class TestSPOS(TestCase): def test_init(self): # initiate spos when `norm_training` is True. model = ToySearchableModel() - mutator = OneShotModuleMutator() + mutator = MODELS.build(MUTATOR_CFG) alg = SPOS(model, mutator, norm_training=True) alg.eval() self.assertTrue(model.bn.training) # initiate spos with built `mutator`. model = ToySearchableModel() - mutator = OneShotModuleMutator() + mutator = MODELS.build(MUTATOR_CFG) alg = SPOS(model, mutator) self.assertIs(alg.mutator, mutator) # initiate spos with unbuilt `mutator`. - mutator = dict(type='OneShotModuleMutator') + mutator = dict(type='NasMutator') alg = SPOS(model, mutator) - self.assertIsInstance(alg.mutator, OneShotModuleMutator) + self.assertIsInstance(alg.mutator, NasMutator) # initiate spos when `fix_subnet` is not None. fix_subnet = {'mutable': {'chosen': 'conv1'}} @@ -69,7 +71,7 @@ def test_forward_loss(self): model = ToySearchableModel() # supernet - mutator = OneShotModuleMutator() + mutator = MODELS.build(MUTATOR_CFG) alg = SPOS(model, mutator) loss = alg(inputs, mode='loss') self.assertIsInstance(loss, dict) diff --git a/tests/test_models/test_architectures/test_backbones/test_dartsbackbone.py b/tests/test_models/test_architectures/test_backbones/test_dartsbackbone.py index ba3f5955d..a4ae05950 100644 --- a/tests/test_models/test_architectures/test_backbones/test_dartsbackbone.py +++ b/tests/test_models/test_architectures/test_backbones/test_dartsbackbone.py @@ -54,7 +54,7 @@ def setUp(self) -> None: route_cfg=self.route_cfg) self.mutator_cfg = dict( - type='DiffModuleMutator', + type='NasMutator', custom_groups=None, ) @@ -68,7 +68,7 @@ def test_darts_backbone(self): assert mutator is not None mutator.prepare_from_supernet(model) - mutator.modify_supernet_forward(mutator.arch_params) + # mutator.modify_supernet_forward(mutator.arch_params) inputs = torch.randn(4, 3, 224, 224) outputs = model(inputs) @@ -85,7 +85,7 @@ def test_darts_backbone_with_auxliary(self): mutator = MODELS.build(self.mutator_cfg) assert mutator is not None mutator.prepare_from_supernet(model) - mutator.modify_supernet_forward(mutator.arch_params) + # mutator.modify_supernet_forward(mutator.arch_params) inputs = torch.randn(4, 3, 224, 224) outputs = model(inputs) diff --git a/tests/test_models/test_mutables/test_mutable_channel/test_units/test_l1_mutable_channel_unit.py b/tests/test_models/test_mutables/test_mutable_channel/test_units/test_l1_mutable_channel_unit.py index 6cf292f10..94bbfe6b6 100644 --- a/tests/test_models/test_mutables/test_mutable_channel/test_units/test_l1_mutable_channel_unit.py +++ b/tests/test_models/test_mutables/test_mutable_channel/test_units/test_l1_mutable_channel_unit.py @@ -20,7 +20,6 @@ def test_init(self): } }) mutator.prepare_from_supernet(model) - mutator.set_choices(mutator.sample_choices()) def test_convnd(self): unit = L1MutableChannelUnit(8) diff --git a/tests/test_models/test_mutables/test_mutable_channel/test_units/test_one_shot_mutable_channel_unit.py b/tests/test_models/test_mutables/test_mutable_channel/test_units/test_one_shot_mutable_channel_unit.py index fb4d8ca43..80d9800c6 100644 --- a/tests/test_models/test_mutables/test_mutable_channel/test_units/test_one_shot_mutable_channel_unit.py +++ b/tests/test_models/test_mutables/test_mutable_channel/test_units/test_one_shot_mutable_channel_unit.py @@ -28,8 +28,6 @@ def test_unit_predefined(self): }, parse_cfg={'type': 'Predefined'}) mutator.prepare_from_supernet(model) - choices = mutator.sample_choices() - mutator.set_choices(choices) self.assertSequenceEqual(mutator.units[0].candidate_choices, [576, 624]) self.assertSequenceEqual(mutator.units[1].candidate_choices, [64]) diff --git a/tests/test_models/test_mutators/test_channel_mutator.py b/tests/test_models/test_mutators/test_channel_mutator.py index 127af9380..1d9d290a2 100644 --- a/tests/test_models/test_mutators/test_channel_mutator.py +++ b/tests/test_models/test_mutators/test_channel_mutator.py @@ -37,8 +37,6 @@ def generate_mask(self, choice: Union[int, float]) -> torch.Tensor: class TestChannelMutator(unittest.TestCase): def _test_a_mutator(self, mutator: ChannelMutator, model): - choices = mutator.sample_choices() - mutator.set_choices(choices) self.assertGreater(len(mutator.mutable_units), 0) x = torch.rand([2, 3, 224, 224]) y = model(x) @@ -156,45 +154,10 @@ def test_models_with_predefined_dynamic_op_without_pruning(self): }, parse_cfg={'type': 'Predefined'}) mutator.prepare_from_supernet(model) - choices = mutator.sample_choices() - mutator.set_choices(choices) self.assertGreater(len(mutator.mutable_units), 0) x = torch.rand([2, 3, 224, 224]) y = model(x) - self.assertEqual( - list(y.shape), - [2, list(mutator.current_choices.values())[0]]) - - def test_custom_group(self): - ARCHITECTURE_CFG = dict( - type='mmcls.ImageClassifier', - backbone=dict(type='mmcls.MobileNetV2', widen_factor=1.5), - neck=dict(type='mmcls.GlobalAveragePooling'), - head=dict( - type='mmcls.LinearClsHead', - num_classes=1000, - in_channels=1920, - loss=dict(type='mmcls.CrossEntropyLoss', loss_weight=1.0), - topk=(1, 5))) - model = MODELS.build(ARCHITECTURE_CFG) - - # generate config - model1 = copy.deepcopy(model) - mutator1 = ChannelMutator() - mutator1.prepare_from_supernet(model1) - - self.assertEqual(len(mutator1.search_groups), 25) - - custom_groups = [[ - 'backbone.layer2.1.conv.0.conv_(0, 240)_240', - 'backbone.layer3.0.conv.0.conv_(0, 240)_240' - ]] - - model2 = copy.deepcopy(model) - mutator2 = ChannelMutator(custom_groups=custom_groups) - mutator2.prepare_from_supernet(model2) - - self.assertEqual(len(mutator2.search_groups), 24) + self.assertEqual(list(y.shape), [2, 624]) def test_related_shortcut_layer(self): for Model in [ @@ -211,8 +174,6 @@ def test_related_shortcut_layer(self): }, parse_cfg={'type': 'Predefined'}) mutator.prepare_from_supernet(model) - choices = mutator.sample_choices() - mutator.set_choices(choices) self.assertGreater(len(mutator.mutable_units), 0) x = torch.rand([2, 3, 224, 224]) y = model(x) diff --git a/tests/test_models/test_mutators/test_dcff_mutator.py b/tests/test_models/test_mutators/test_dcff_mutator.py index eee8049e6..fc0250248 100644 --- a/tests/test_models/test_mutators/test_dcff_mutator.py +++ b/tests/test_models/test_mutators/test_dcff_mutator.py @@ -93,13 +93,8 @@ def test_DCFF_channel_mutator() -> None: # ResBlock mutator = DCFFChannelMutator(channel_unit_cfg=dict(type='DCFFChannelUnit')) - target_pruning_ratio = { - 0: 0.5, - } - model = ResBlock() mutator.prepare_from_supernet(model) - mutator.set_choices(target_pruning_ratio) mutator.calc_information(1.0) out3 = model(imgs) diff --git a/tests/test_models/test_mutators/test_nas_mutator.py b/tests/test_models/test_mutators/test_nas_mutator.py new file mode 100644 index 000000000..dce6b6c38 --- /dev/null +++ b/tests/test_models/test_mutators/test_nas_mutator.py @@ -0,0 +1,196 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import unittest + +import pytest +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from mmrazor.models.architectures.utils import mutate_conv_module +from mmrazor.models.mutables import (MutableChannelContainer, MutableValue, + OneShotMutableChannel, + OneShotMutableChannelUnit, + OneShotMutableValue) +from mmrazor.models.mutables.mutable_module import MutableModule +from mmrazor.models.mutators import NasMutator +from mmrazor.registry import MODELS + +MODELS.register_module(name='torchConv2d', module=nn.Conv2d, force=True) +MODELS.register_module(name='torchMaxPool2d', module=nn.MaxPool2d, force=True) +MODELS.register_module(name='torchAvgPool2d', module=nn.AvgPool2d, force=True) + + +class SearchableLayer(nn.Module): + + def __init__(self, mutable_cfg: dict) -> None: + super().__init__() + self.op1 = MODELS.build(mutable_cfg) + self.op2 = MODELS.build(mutable_cfg) + self.op3 = MODELS.build(mutable_cfg) + + def forward(self, x): + x = self.op1(x) + x = self.op2(x) + return self.op3(x) + + +class SearchableModel(nn.Module): + """A searchable model with a mixed search space as follows: + + 1. value search. + 2. module search. + 3. channel search. + """ + + def __init__(self, mutable_cfg: dict) -> None: + super().__init__() + + self.first_conv = ConvModule( + in_channels=3, + out_channels=32, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=dict(type='mmrazor.BigNasConv2d'), + norm_cfg=dict(type='mmrazor.DynamicBatchNorm2d')) + + self.second_conv = ConvModule( + in_channels=32, + out_channels=32, + kernel_size=1, + stride=1, + padding=1, + conv_cfg=dict(type='mmrazor.BigNasConv2d')) + + self.slayer1 = SearchableLayer(mutable_cfg) + self.slayer2 = SearchableLayer(mutable_cfg) + self.slayer3 = SearchableLayer(mutable_cfg) + + self.register_mutables() + + def forward(self, x): + x = self.first_conv(x) + x = self.second_conv(x) + x = self.slayer1(x) + x = self.slayer2(x) + return self.slayer3(x) + + def register_mutables(self): + """Mutate the defined model.""" + OneShotMutableChannelUnit._register_channel_container( + self, MutableChannelContainer) + + mutable_kernel_size = OneShotMutableValue( + value_list=[1, 3], default_value=3) + mutable_out_channels = OneShotMutableChannel( + 32, candidate_choices=[16, 32]) + mutate_conv_module( + self.first_conv, + mutable_kernel_size=mutable_kernel_size, + mutable_out_channels=mutable_out_channels) + + # dont forget the last connection. + MutableChannelContainer.register_mutable_channel_to_module( + self.second_conv.conv, mutable_out_channels, False) + + +class TestNasMutator(unittest.TestCase): + + def setUp(self): + self.MUTABLE_CFG = dict( + type='DiffMutableOP', + candidates=dict( + torch_conv2d_3x3=dict( + type='torchConv2d', + kernel_size=3, + padding=1, + ), + torch_conv2d_5x5=dict( + type='torchConv2d', + kernel_size=5, + padding=2, + ), + torch_conv2d_7x7=dict( + type='torchConv2d', + kernel_size=7, + padding=3, + ), + ), + module_kwargs=dict(in_channels=32, out_channels=32, stride=1)) + + self.MUTATOR_CFG = dict(type='NasMutator') + + def test_models_with_predefined_dynamic_op(self): + for Model in [SearchableModel]: + with self.subTest(model=Model): + model = SearchableModel(self.MUTABLE_CFG) + mutator = MODELS.build(self.MUTATOR_CFG) + assert isinstance(mutator, NasMutator) + + with pytest.raises(RuntimeError): + _ = mutator.search_groups + mutator.prepare_from_supernet(model) + assert hasattr(mutator, 'search_groups') + + with pytest.raises(AttributeError): + _ = mutator.arch_params + mutator.prepare_arch_params() + assert hasattr(mutator, 'arch_params') + + for name in mutator.search_groups.keys(): + assert 'value' or 'channel' or 'module' in name + + self.assertEqual(len(mutator.arch_params.keys()), 9) + for v in mutator.arch_params.values(): + self.assertEqual(v.size()[0], 3) + + mutable_values = [] + mutable_modules = [] + for name, module in model.named_modules(): + if isinstance(module, MutableValue): + mutable_values.append(name) + elif isinstance(module, MutableModule): + mutable_modules.append(name) + elif hasattr(module, 'source_mutables'): + for each_mutables in module.source_mutables: + if isinstance(each_mutables, MutableValue): + mutable_values.append(each_mutables) + elif isinstance(each_mutables, MutableModule): + mutable_modules.append(each_mutables) + + num_mutables = len(mutable_values) + \ + len(mutable_modules) + len(mutator.mutable_units) + self.assertEqual(len(mutator.search_groups), num_mutables) + + choices = mutator.sample_choices() + min_choices = mutator.sample_choices(kind='min') + max_choices = mutator.sample_choices(kind='max') + + self.assertEqual(choices.keys(), min_choices.keys()) + self.assertEqual(choices.keys(), max_choices.keys()) + + with self.assertRaises(NotImplementedError): + _ = mutator.sample_choices(kind='mun') + + assert hasattr(mutator, 'current_choices') + with self.assertWarnsRegex( + UserWarning, 'mutables with `arch param` detected'): + _ = mutator.max_choices + + with self.assertWarnsRegex( + UserWarning, 'mutables with `arch param` detected'): + _ = mutator.min_choices + + with self.assertWarnsRegex( + UserWarning, 'mutables with `arch param` detected'): + mutator.set_max_choices() + + with self.assertWarnsRegex( + UserWarning, 'mutables with `arch param` detected'): + mutator.set_min_choices() + + mutator.set_choices(choices) + + x = torch.rand([1, 3, 224, 224]) + y = model(x) + self.assertEqual(list(y.shape), [1, 32, 114, 114]) diff --git a/tests/test_registry/test_registry.py b/tests/test_registry/test_registry.py index 64de464f2..4830803a4 100644 --- a/tests/test_registry/test_registry.py +++ b/tests/test_registry/test_registry.py @@ -83,7 +83,7 @@ def test_build_razor_from_cfg(self): model = MODELS.build(cfg.model) self.assertTrue(isinstance(model, BaseModel)) - def test_build_subnet_prune_from_cfg(self): + def test_build_subnet_prune_from_cfg_by_mutator(self): mutator_cfg = fileio.load('tests/data/test_registry/subnet.json') init_cfg = dict( type='Pretrained', @@ -101,6 +101,24 @@ def test_build_subnet_prune_from_cfg(self): model = MODELS.build(model_cfg) self.assertTrue(isinstance(model, BaseModel)) + def test_build_subnet_prune_from_cfg_by_mutable(self): + mutator_cfg = fileio.load('tests/data/test_registry/subnet.json') + init_cfg = dict( + type='Pretrained', + checkpoint='tests/data/test_registry/subnet_weight.pth') + # test fix subnet + model_cfg = dict( + # use mmrazor's build_func + type='mmrazor.sub_model', + cfg=dict( + cfg_path='mmcls::resnet/resnet50_8xb32_in1k.py', + pretrained=False), + fix_subnet=mutator_cfg, + mode='mutable', + init_cfg=init_cfg) + model = MODELS.build(model_cfg) + self.assertTrue(isinstance(model, BaseModel)) + if __name__ == '__main__': unittest.main() diff --git a/tests/test_runners/test_subnet_sampler_loop.py b/tests/test_runners/test_subnet_sampler_loop.py index 1c9422fc1..02c3a90d5 100644 --- a/tests/test_runners/test_subnet_sampler_loop.py +++ b/tests/test_runners/test_subnet_sampler_loop.py @@ -21,7 +21,7 @@ @MODELS.register_module() class ToyModel_GreedySamplerTrainLoop(BaseModel): - @patch('mmrazor.models.mutators.OneShotModuleMutator') + @patch('mmrazor.models.mutators.NasMutator') def __init__(self, mock_mutator): super().__init__() self.linear1 = nn.Linear(2, 2) From 6eeebc7cf8245154d329d3eb4f8d39ea63aedfbb Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Thu, 2 Feb 2023 15:59:45 +0800 Subject: [PATCH 29/59] revert env change --- .circleci/test.yml | 12 ++++++------ .github/workflows/build.yml | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.circleci/test.yml b/.circleci/test.yml index fb4068739..25140a879 100644 --- a/.circleci/test.yml +++ b/.circleci/test.yml @@ -66,10 +66,10 @@ jobs: command: | pip install git+https://github.com/open-mmlab/mmengine.git@main pip install -U openmim - mim install 'mmcv <= 2.0.0rc3' + mim install 'mmcv >= 2.0.0rc1' pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x - pip install git+https://github.com/open-mmlab/mmdetection.git@v3.0.0rc5 - pip install git+https://github.com/open-mmlab/mmsegmentation.git@v1.0.0rc4 + pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x + pip install git+https://github.com/open-mmlab/mmsegmentation.git@dev-1.x python -m pip install git+ssh://git@github.com/open-mmlab/mmpose.git@dev-1.x pip install -r requirements.txt - run: @@ -103,9 +103,9 @@ jobs: name: Clone Repos command: | git clone -b main --depth 1 https://github.com/open-mmlab/mmengine.git /home/circleci/mmengine - git clone -b v3.0.0rc5 --depth 1 https://github.com/open-mmlab/mmdetection.git /home/circleci/mmdetection + git clone -b dev-3.x --depth 1 https://github.com/open-mmlab/mmdetection.git /home/circleci/mmdetection git clone -b dev-1.x --depth 1 https://github.com/open-mmlab/mmclassification.git /home/circleci/mmclassification - git clone -b v1.0.0rc4 --depth 1 https://github.com/open-mmlab/mmsegmentation.git /home/circleci/mmsegmentation + git clone -b dev-1.x --depth 1 https://github.com/open-mmlab/mmsegmentation.git /home/circleci/mmsegmentation - run: name: Build Docker image command: | @@ -116,7 +116,7 @@ jobs: command: | docker exec mmrazor pip install -e /mmengine docker exec mmrazor pip install -U openmim - docker exec mmrazor mim install 'mmcv <= 2.0.0rc3' + docker exec mmrazor mim install 'mmcv >= 2.0.0rc1' docker exec mmrazor pip install -e /mmdetection docker exec mmrazor pip install -e /mmclassification docker exec mmrazor pip install -e /mmsegmentation diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8fe20be47..e00ed24c8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -101,13 +101,13 @@ jobs: - name: Install MMCV run: | pip install -U openmim - mim install 'mmcv <= 2.0.0rc3' + mim install 'mmcv >= 2.0.0rc1' - name: Install MMCls run: pip install git+https://github.com/open-mmlab/mmclassification.git@dev-1.x - name: Install MMDet - run: pip install git+https://github.com/open-mmlab/mmdetection.git@v3.0.0rc5 + run: pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x - name: Install MMSeg - run: pip install git+https://github.com/open-mmlab/mmsegmentation.git@v1.0.0rc4 + run: pip install git+https://github.com/open-mmlab/mmsegmentation.git@dev-1.x - name: Install other dependencies run: pip install -r requirements.txt - name: Build and install From fd183f98454200b1798a5ebbe50f2be947fc4297 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Tue, 7 Feb 2023 21:28:14 +0800 Subject: [PATCH 30/59] update usage of sub_model --- .../_base_/settings/imagenet_bs2048_dmcp.py | 12 +- configs/nas/mmcls/autoformer/README.md | 2 +- .../autoformer_supernet_32xb256_in1k.py | 1 - ..._mbv2_1.5x_slimmable_subnet_8xb256_in1k.py | 23 +- .../attentive_mobilenet_subnet_8xb256_in1k.py | 11 +- .../darts/darts_subnet_1xb96_cifar10_2.0.py | 12 +- .../darts_subnet_1xb96_cifar10_2.0_mmrazor.py | 12 +- .../mmcls/dsnas/dsnas_subnet_8xb128_in1k.py | 12 +- .../ofa_mobilenet_subnet_8xb256_in1k.py | 12 +- .../spos/spos_mobilenet_subnet_8xb128_in1k.py | 11 +- .../spos_shufflenet_subnet_8xb128_in1k.py | 11 +- .../detnas_frcnn_shufflenet_subnet_coco_1x.py | 11 +- .../detnas_shufflenet_subnet_8xb128_in1k.py | 12 +- .../dcff/dcff_compact_resnet_8xb32_in1k.py | 2 +- configs/pruning/mmcls/dcff/fix_subnet.json | 600 +++++ .../pruning/mmcls/dmcp/DMCP_MBV2_100M.json | 1729 +++++++++++++++ .../pruning/mmcls/dmcp/DMCP_MBV2_100M.yaml | 9 - configs/pruning/mmcls/dmcp/DMCP_R50_2G.json | 1933 +++++++++++++++++ configs/pruning/mmcls/dmcp/DMCP_R50_2G.yaml | 5 - .../mmcls/dmcp/dmcp_mbv2_subnet_32xb64.py | 10 +- .../mmcls/dmcp/dmcp_mbv2_supernet_32xb64.py | 1 - .../mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py | 11 +- .../dmcp/dmcp_resnet50_supernet_32xb64.py | 4 +- ..._compact_faster_rcnn_resnet50_8xb4_coco.py | 2 +- configs/pruning/mmdet/dcff/fix_subnet.json | 602 ++++- ...f_compact_topdown_heatmap_resnet50_coco.py | 2 +- configs/pruning/mmpose/dcff/fix_subnet.json | 602 ++++- ...pact_pointrend_resnet50_8xb2_cityscapes.py | 2 +- configs/pruning/mmseg/dcff/fix_subnet.json | 602 ++++- mmrazor/engine/hooks/dmcp_subnet_hook.py | 31 +- mmrazor/models/algorithms/nas/autoformer.py | 22 +- mmrazor/models/algorithms/nas/bignas.py | 319 ++- mmrazor/models/algorithms/nas/darts.py | 29 +- mmrazor/models/algorithms/nas/dsnas.py | 34 +- mmrazor/models/algorithms/nas/spos.py | 34 +- mmrazor/models/algorithms/pruning/dmcp.py | 392 ++-- .../units/dmcp_channel_unit.py | 5 + mmrazor/registry/registry.py | 3 +- mmrazor/structures/subnet/fix_subnet.py | 12 +- tests/data/MBV2_slimmable_config.json | 763 +++---- tests/data/test_registry/subnet.json | 600 +++++ .../test_algorithms/test_autoformer.py | 6 +- .../test_algorithms/test_bignas.py | 3 - .../test_models/test_algorithms/test_darts.py | 20 +- .../test_models/test_algorithms/test_dmcp.py | 10 - .../test_models/test_algorithms/test_dsnas.py | 11 - .../test_models/test_algorithms/test_spos.py | 11 - tests/test_registry/test_registry.py | 2 + 48 files changed, 7560 insertions(+), 1005 deletions(-) create mode 100644 configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.json delete mode 100644 configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.yaml create mode 100644 configs/pruning/mmcls/dmcp/DMCP_R50_2G.json delete mode 100644 configs/pruning/mmcls/dmcp/DMCP_R50_2G.yaml diff --git a/configs/_base_/settings/imagenet_bs2048_dmcp.py b/configs/_base_/settings/imagenet_bs2048_dmcp.py index f6ae09579..3532423fc 100644 --- a/configs/_base_/settings/imagenet_bs2048_dmcp.py +++ b/configs/_base_/settings/imagenet_bs2048_dmcp.py @@ -26,12 +26,11 @@ paramwise_cfg = dict(norm_decay_mult=0.0, bias_decay_mult=0.0) optim_wrapper = dict( - _delete_=True, constructor='mmrazor.SeparateOptimWrapperConstructor', architecture=dict( type='OptimWrapper', optimizer=dict(type='SGD', lr=0.5, momentum=0.9, weight_decay=3e-4), - clip_grad=dict(max_norm=5, norm_type=2)), + paramwise_cfg=paramwise_cfg), mutator=dict( type='OptimWrapper', optimizer=dict(type='Adam', lr=0.5, weight_decay=1e-3))) @@ -48,12 +47,7 @@ train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='RandomResizedCrop', scale=224), - dict( - type='ColorJitter', - brightness=0.2, - contrast=0.2, - saturation=0.2, - hue=0.1), + dict(type='ColorJitter', brightness=0.2, contrast=0.2, saturation=0.2), dict(type='RandomFlip', prob=0.5, direction='horizontal'), dict(type='PackClsInputs'), ] @@ -99,4 +93,6 @@ evaluation = dict(interval=1, metric='accuracy') train_cfg = dict(by_epoch=True, max_epochs=max_search_epochs, val_interval=1) +val_cfg = dict() +test_cfg = dict() custom_hooks = [dict(type='DMCPSubnetHook')] diff --git a/configs/nas/mmcls/autoformer/README.md b/configs/nas/mmcls/autoformer/README.md index 768d7e027..294463e9b 100644 --- a/configs/nas/mmcls/autoformer/README.md +++ b/configs/nas/mmcls/autoformer/README.md @@ -43,7 +43,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ ```bash CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh \ - configs/nas/mmcls/autoformer/autoformer_subnet_8xb128_in1k.py \ + configs/nas/mmcls/autoformer/autoformer_supernet_8xb128_in1k.py \ $STEP2_CKPT 1 --work-dir $WORK_DIR \ --cfg-options algorithm.mutable_cfg=$STEP2_SUBNET_YAML ``` diff --git a/configs/nas/mmcls/autoformer/autoformer_supernet_32xb256_in1k.py b/configs/nas/mmcls/autoformer/autoformer_supernet_32xb256_in1k.py index b563e0093..21555bfe7 100644 --- a/configs/nas/mmcls/autoformer/autoformer_supernet_32xb256_in1k.py +++ b/configs/nas/mmcls/autoformer/autoformer_supernet_32xb256_in1k.py @@ -52,7 +52,6 @@ model = dict( type='mmrazor.Autoformer', architecture=supernet, - fix_subnet=None, mutator=dict(type='mmrazor.NasMutator')) # runtime setting diff --git a/configs/nas/mmcls/autoslim/autoslim_mbv2_1.5x_slimmable_subnet_8xb256_in1k.py b/configs/nas/mmcls/autoslim/autoslim_mbv2_1.5x_slimmable_subnet_8xb256_in1k.py index 61d64a226..a1f4f56dc 100644 --- a/configs/nas/mmcls/autoslim/autoslim_mbv2_1.5x_slimmable_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/autoslim/autoslim_mbv2_1.5x_slimmable_subnet_8xb256_in1k.py @@ -21,26 +21,13 @@ # !autoslim algorithm config # ========================================================================== +supernet.data_preprocessor = data_preprocessor model = dict( - _delete_=True, _scope_='mmrazor', - type='SlimmableNetwork', - architecture=supernet, - data_preprocessor=data_preprocessor, - mutator=dict( - type='SlimmableChannelMutator', - channel_unit_cfg=dict( - type='SlimmableChannelUnit', - units='tests/data/MBV2_slimmable_config.json'), - parse_cfg=dict( - type='ChannelAnalyzer', - demo_input=(1, 3, 224, 224), - tracer_type='BackwardTracer'))) - -model_wrapper_cfg = dict( - type='mmrazor.SlimmableNetworkDDP', - broadcast_buffers=False, - find_unused_parameters=True) + type='sub_model', + cfg=supernet, + fix_subnet='tests/data/MBV2_slimmable_config.json', + mode='mutator') val_cfg = dict(type='mmrazor.SlimmableValLoop') diff --git a/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py b/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py index e1d86108d..89d73dd71 100644 --- a/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py @@ -1,5 +1,14 @@ _base_ = 'attentive_mobilenet_supernet_32xb64_in1k.py' -model = dict(fix_subnet='configs/nas/mmcls/bignas/ATTENTIVE_SUBNET_A6.yaml') +supernet = _base_.supernet + +model_cfg = dict( + _scope_='mmrazor', + type='sub_model', + cfg=supernet, + fix_subnet='configs/nas/mmcls/bignas/ATTENTIVE_SUBNET_A6.yaml', + mode='mutator') + +_base_.model = model_cfg test_cfg = dict(evaluate_fixed_subnet=True) diff --git a/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py b/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py index 766ebd142..e1ea12dd6 100644 --- a/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py +++ b/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py @@ -29,13 +29,11 @@ topk=(1, 5), cal_acc=True)) -fix_subnet = 'configs/nas/mmcls/darts/DARTS_SUBNET_CIFAR_PAPER_ALIAS.yaml' - model = dict( - type='mmrazor.SPOS', - architecture=supernet, - mutator=None, - fix_subnet=fix_subnet, -) + _scope_='mmrazor', + type='sub_model', + cfg=supernet, + fix_subnet='configs/nas/mmcls/darts/DARTS_SUBNET_CIFAR_PAPER_ALIAS.yaml', + mode='mutator') find_unused_parameter = False diff --git a/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0_mmrazor.py b/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0_mmrazor.py index 5a20916ca..0e3e6505d 100644 --- a/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0_mmrazor.py +++ b/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0_mmrazor.py @@ -29,13 +29,11 @@ topk=(1, 5), cal_acc=True)) -fix_subnet = 'configs/nas/mmcls/darts/DARTS_SUBNET_CIFAR_MMRAZOR_97.32.yaml' - model = dict( - type='mmrazor.SPOS', - architecture=supernet, - mutator=None, - fix_subnet=fix_subnet, -) + _scope_='mmrazor', + type='sub_model', + cfg=supernet, + fix_subnet='configs/nas/mmcls/darts/DARTS_SUBNET_CIFAR_MMRAZOR_97.32.yaml', + mode='mutator') find_unused_parameter = False diff --git a/configs/nas/mmcls/dsnas/dsnas_subnet_8xb128_in1k.py b/configs/nas/mmcls/dsnas/dsnas_subnet_8xb128_in1k.py index a96c81f82..3d52cfb62 100644 --- a/configs/nas/mmcls/dsnas/dsnas_subnet_8xb128_in1k.py +++ b/configs/nas/mmcls/dsnas/dsnas_subnet_8xb128_in1k.py @@ -1,8 +1,16 @@ _base_ = ['./dsnas_supernet_8xb128_in1k.py'] # NOTE: Replace this with the mutable_cfg searched by yourself. -fix_subnet = 'configs/nas/mmcls/dsnas/DSNAS_SUBNET_IMAGENET_PAPER_ALIAS.yaml' +supernet = _base_.model['architecture'] -model = dict(fix_subnet=fix_subnet) +model_cfg = dict( + _scope_='mmrazor', + type='sub_model', + cfg=supernet, + fix_subnet= # noqa: E251 + 'configs/nas/mmcls/dsnas/DSNAS_SUBNET_IMAGENET_PAPER_ALIAS.yaml', # noqa: E501 + mode='mutator') + +_base_.model = model_cfg find_unused_parameters = False diff --git a/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py b/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py index ffeb44898..bbe27468d 100644 --- a/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py @@ -1,6 +1,14 @@ _base_ = 'ofa_mobilenet_supernet_32xb64_in1k.py' -model = dict( - fix_subnet='configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT22.yaml') +supernet = _base_.supernet + +model_cfg = dict( + _scope_='mmrazor', + type='sub_model', + cfg=supernet, + fix_subnet='configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT22.yaml', + mode='mutator') + +_base_.model = model_cfg test_cfg = dict(evaluate_fixed_subnet=True) diff --git a/configs/nas/mmcls/spos/spos_mobilenet_subnet_8xb128_in1k.py b/configs/nas/mmcls/spos/spos_mobilenet_subnet_8xb128_in1k.py index 7cabf112f..1e7a20d7e 100644 --- a/configs/nas/mmcls/spos/spos_mobilenet_subnet_8xb128_in1k.py +++ b/configs/nas/mmcls/spos/spos_mobilenet_subnet_8xb128_in1k.py @@ -1,8 +1,15 @@ _base_ = ['./spos_mobilenet_supernet_8xb128_in1k.py'] # FIXME: you may replace this with the mutable_cfg searched by yourself -fix_subnet = 'configs/nas/spos/AngleNAS_SHUFFLENETV2_IN1k_2.0.yaml' # noqa: E501 +supernet = _base_.supernet -model = dict(fix_subnet=fix_subnet) +model_cfg = dict( + _scope_='mmrazor', + type='sub_model', + cfg=supernet, + fix_subnet='configs/nas/spos/AngleNAS_SHUFFLENETV2_IN1k_2.0.yaml', + mode='mutator') + +_base_.model = model_cfg find_unused_parameters = False diff --git a/configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py b/configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py index 1243d16b2..3c344cfa1 100644 --- a/configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py +++ b/configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py @@ -1,8 +1,15 @@ _base_ = ['./spos_shufflenet_supernet_8xb128_in1k.py'] # FIXME: you may replace this with the searched by yourself -fix_subnet = 'configs/nas/mmcls/spos/SPOS_SUBNET.yaml' +supernet = _base_.supernet -model = dict(fix_subnet=fix_subnet) +model_cfg = dict( + _scope_='mmrazor', + type='sub_model', + cfg=supernet, + fix_subnet='configs/nas/mmcls/spos/SPOS_SUBNET.yaml', + mode='mutator') + +_base_.model = model_cfg find_unused_parameters = False diff --git a/configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_subnet_coco_1x.py b/configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_subnet_coco_1x.py index 8334c78b8..afa02cb53 100644 --- a/configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_subnet_coco_1x.py +++ b/configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_subnet_coco_1x.py @@ -1,8 +1,15 @@ _base_ = ['./detnas_frcnn_shufflenet_supernet_coco_1x.py'] # FIXME: you may replace this with the searched by yourself -fix_subnet = 'configs/nas/mmdet/detnas/DETNAS_SUBNET.yaml' +supernet = _base_.supernet -model = dict(fix_subnet=fix_subnet) +model_cfg = dict( + _scope_='mmrazor', + type='sub_model', + cfg=supernet, + fix_subnet='configs/nas/mmdet/detnas/DETNAS_SUBNET.yaml', + mode='mutator') + +_base_.model = model_cfg find_unused_parameters = False diff --git a/configs/nas/mmdet/detnas/detnas_shufflenet_subnet_8xb128_in1k.py b/configs/nas/mmdet/detnas/detnas_shufflenet_subnet_8xb128_in1k.py index 7b6ea7e48..d929bb83a 100644 --- a/configs/nas/mmdet/detnas/detnas_shufflenet_subnet_8xb128_in1k.py +++ b/configs/nas/mmdet/detnas/detnas_shufflenet_subnet_8xb128_in1k.py @@ -1,8 +1,16 @@ _base_ = './detnas_shufflenet_supernet_8xb128_in1k.py' # FIXME: you may replace this with the mutable_cfg searched by yourself -fix_subnet = 'https://download.openmmlab.com/mmrazor/v1/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20220715-61d2e900_subnet_cfg_v1.yaml' # noqa: E501 +supernet = _base_.supernet -model = dict(fix_subnet=fix_subnet) +model_cfg = dict( + _scope_='mmrazor', + type='sub_model', + cfg=supernet, + fix_subnet= # noqa: E251 + 'https://download.openmmlab.com/mmrazor/v1/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20220715-61d2e900_subnet_cfg_v1.yaml', # noqa: E501 + mode='mutator') + +_base_.model = model_cfg find_unused_parameters = False diff --git a/configs/pruning/mmcls/dcff/dcff_compact_resnet_8xb32_in1k.py b/configs/pruning/mmcls/dcff/dcff_compact_resnet_8xb32_in1k.py index f90ce65be..4a98b2584 100644 --- a/configs/pruning/mmcls/dcff/dcff_compact_resnet_8xb32_in1k.py +++ b/configs/pruning/mmcls/dcff/dcff_compact_resnet_8xb32_in1k.py @@ -1,7 +1,7 @@ _base_ = ['dcff_resnet_8xb32_in1k.py'] # model settings -model_cfg = dict( +_base_.model = dict( _scope_='mmrazor', type='sub_model', cfg=dict( diff --git a/configs/pruning/mmcls/dcff/fix_subnet.json b/configs/pruning/mmcls/dcff/fix_subnet.json index dfdcea758..0522765d1 100644 --- a/configs/pruning/mmcls/dcff/fix_subnet.json +++ b/configs/pruning/mmcls/dcff/fix_subnet.json @@ -14,6 +14,102 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.bn1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.conv1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.bn2", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.1.conv1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.1.bn2", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.conv1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.downsample.0", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.conv1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.bn1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.conv2", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.bn2", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.1.conv2", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.1.bn2", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, "choice":1.0 }, "backbone.layer1.0.conv1_(0, 64)_64":{ @@ -24,6 +120,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer1.0.bn1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.conv2", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer1.0.conv1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.bn1", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, "choice":0.640625 }, "backbone.layer1.1.conv1_(0, 64)_64":{ @@ -34,6 +160,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer1.1.bn1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.1.conv2", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer1.1.conv1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.1.bn1", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, "choice":0.640625 }, "backbone.layer2.0.conv1_(0, 128)_128":{ @@ -44,6 +200,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.0.bn1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.conv2", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.0.conv1", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.bn1", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, "choice":0.6484375 }, "backbone.layer2.0.conv2_(0, 128)_128":{ @@ -54,6 +240,96 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.0.bn2", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.downsample.1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.conv1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.bn2", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.conv1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.downsample.0", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.0.conv2", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.bn2", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.downsample.0", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.downsample.1", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.conv2", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.bn2", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, "choice":0.59375 }, "backbone.layer2.1.conv1_(0, 128)_128":{ @@ -64,6 +340,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.1.bn1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.conv2", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.1.conv1", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.bn1", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, "choice":0.6484375 }, "backbone.layer3.0.conv1_(0, 256)_256":{ @@ -74,6 +380,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.0.bn1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.conv2", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.0.conv1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.bn1", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, "choice":0.6484375 }, "backbone.layer3.0.conv2_(0, 256)_256":{ @@ -84,6 +420,96 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.0.bn2", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.downsample.1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.conv1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.bn2", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.conv1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.downsample.0", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.0.conv2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.bn2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.downsample.0", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.downsample.1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.conv2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.bn2", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, "choice":0.59765625 }, "backbone.layer3.1.conv1_(0, 256)_256":{ @@ -94,6 +520,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.1.bn1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.conv2", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.1.conv1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.bn1", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, "choice":0.6484375 }, "backbone.layer4.0.conv1_(0, 512)_512":{ @@ -104,6 +560,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.0.bn1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.conv2", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.0.conv1", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.bn1", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, "choice":0.69921875 }, "backbone.layer4.0.conv2_(0, 512)_512":{ @@ -114,6 +600,90 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.0.bn2", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.downsample.1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.conv1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.bn2", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"head.fc", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.0.conv2", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.bn2", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.downsample.0", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.downsample.1", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.conv2", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.bn2", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, "choice":0.69921875 }, "backbone.layer4.1.conv1_(0, 512)_512":{ @@ -124,6 +694,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.1.bn1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.conv2", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.1.conv1", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.bn1", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, "choice":0.69921875 } } diff --git a/configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.json b/configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.json new file mode 100644 index 000000000..d7abf3c80 --- /dev/null +++ b/configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.json @@ -0,0 +1,1729 @@ +{ + "type":"DMCPChannelMutator", + "channel_unit_cfg":{ + "type":"DMCPChannelUnit", + "default_args":{ + "choice_mode":"number" + }, + "units":{ + "backbone.conv1.conv_(0, 32)_32":{ + "init_args":{ + "num_channels":32, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.conv1.bn", + "start":0, + "end":32, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.conv.0.conv", + "start":0, + "end":32, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.conv.0.bn", + "start":0, + "end":32, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.conv.1.conv", + "start":0, + "end":32, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.conv1.conv", + "start":0, + "end":32, + "is_output_channel":true + }, + { + "name":"backbone.conv1.bn", + "start":0, + "end":32, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.conv.0.conv", + "start":0, + "end":32, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.conv.0.bn", + "start":0, + "end":32, + "is_output_channel":true + } + ] + }, + "choice":9 + }, + "backbone.layer1.0.conv.1.conv_(0, 16)_16":{ + "init_args":{ + "num_channels":16, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer1.0.conv.1.bn", + "start":0, + "end":16, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.conv.0.conv", + "start":0, + "end":16, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer1.0.conv.1.conv", + "start":0, + "end":16, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.conv.1.bn", + "start":0, + "end":16, + "is_output_channel":true + } + ] + }, + "choice":10 + }, + "backbone.layer2.0.conv.0.conv_(0, 96)_96":{ + "init_args":{ + "num_channels":96, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.0.conv.0.bn", + "start":0, + "end":96, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.conv.1.conv", + "start":0, + "end":96, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.conv.1.bn", + "start":0, + "end":96, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.conv.2.conv", + "start":0, + "end":96, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.0.conv.0.conv", + "start":0, + "end":96, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.conv.0.bn", + "start":0, + "end":96, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.conv.1.conv", + "start":0, + "end":96, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.conv.1.bn", + "start":0, + "end":96, + "is_output_channel":true + } + ] + }, + "choice":36 + }, + "backbone.layer2.0.conv.2.conv_(0, 24)_24":{ + "init_args":{ + "num_channels":24, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.0.conv.2.bn", + "start":0, + "end":24, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.conv.0.conv", + "start":0, + "end":24, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.conv.2.bn", + "start":0, + "end":24, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":24, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.conv.0.conv", + "start":0, + "end":24, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.0.conv.2.conv", + "start":0, + "end":24, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.conv.2.bn", + "start":0, + "end":24, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.conv.2.conv", + "start":0, + "end":24, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.conv.2.bn", + "start":0, + "end":24, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":24, + "is_output_channel":true + } + ] + }, + "choice":16 + }, + "backbone.layer2.1.conv.0.conv_(0, 144)_144":{ + "init_args":{ + "num_channels":144, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.1.conv.0.bn", + "start":0, + "end":144, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.conv.1.conv", + "start":0, + "end":144, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.conv.1.bn", + "start":0, + "end":144, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.conv.2.conv", + "start":0, + "end":144, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.1.conv.0.conv", + "start":0, + "end":144, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.conv.0.bn", + "start":0, + "end":144, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.conv.1.conv", + "start":0, + "end":144, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.conv.1.bn", + "start":0, + "end":144, + "is_output_channel":true + } + ] + }, + "choice":16 + }, + "backbone.layer3.0.conv.0.conv_(0, 144)_144":{ + "init_args":{ + "num_channels":144, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.0.conv.0.bn", + "start":0, + "end":144, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.conv.1.conv", + "start":0, + "end":144, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.conv.1.bn", + "start":0, + "end":144, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.conv.2.conv", + "start":0, + "end":144, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.0.conv.0.conv", + "start":0, + "end":144, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.conv.0.bn", + "start":0, + "end":144, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.conv.1.conv", + "start":0, + "end":144, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.conv.1.bn", + "start":0, + "end":144, + "is_output_channel":true + } + ] + }, + "choice":48 + }, + "backbone.layer3.0.conv.2.conv_(0, 32)_32":{ + "init_args":{ + "num_channels":32, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.0.conv.2.bn", + "start":0, + "end":32, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.conv.0.conv", + "start":0, + "end":32, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.conv.2.bn", + "start":0, + "end":32, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":32, + "is_output_channel":false + }, + { + "name":"backbone.layer3.2.conv.0.conv", + "start":0, + "end":32, + "is_output_channel":false + }, + { + "name":"backbone.layer3.2.conv.2.bn", + "start":0, + "end":32, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.conv.0.conv", + "start":0, + "end":32, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.0.conv.2.conv", + "start":0, + "end":32, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.conv.2.bn", + "start":0, + "end":32, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.conv.2.conv", + "start":0, + "end":32, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.conv.2.bn", + "start":0, + "end":32, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":32, + "is_output_channel":true + }, + { + "name":"backbone.layer3.2.conv.2.conv", + "start":0, + "end":32, + "is_output_channel":true + }, + { + "name":"backbone.layer3.2.conv.2.bn", + "start":0, + "end":32, + "is_output_channel":true + } + ] + }, + "choice":21 + }, + "backbone.layer3.1.conv.0.conv_(0, 192)_192":{ + "init_args":{ + "num_channels":192, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.1.conv.0.bn", + "start":0, + "end":192, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.conv.1.conv", + "start":0, + "end":192, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.conv.1.bn", + "start":0, + "end":192, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.conv.2.conv", + "start":0, + "end":192, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.1.conv.0.conv", + "start":0, + "end":192, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.conv.0.bn", + "start":0, + "end":192, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.conv.1.conv", + "start":0, + "end":192, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.conv.1.bn", + "start":0, + "end":192, + "is_output_channel":true + } + ] + }, + "choice":41 + }, + "backbone.layer3.2.conv.0.conv_(0, 192)_192":{ + "init_args":{ + "num_channels":192, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.2.conv.0.bn", + "start":0, + "end":192, + "is_output_channel":false + }, + { + "name":"backbone.layer3.2.conv.1.conv", + "start":0, + "end":192, + "is_output_channel":false + }, + { + "name":"backbone.layer3.2.conv.1.bn", + "start":0, + "end":192, + "is_output_channel":false + }, + { + "name":"backbone.layer3.2.conv.2.conv", + "start":0, + "end":192, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.2.conv.0.conv", + "start":0, + "end":192, + "is_output_channel":true + }, + { + "name":"backbone.layer3.2.conv.0.bn", + "start":0, + "end":192, + "is_output_channel":true + }, + { + "name":"backbone.layer3.2.conv.1.conv", + "start":0, + "end":192, + "is_output_channel":true + }, + { + "name":"backbone.layer3.2.conv.1.bn", + "start":0, + "end":192, + "is_output_channel":true + } + ] + }, + "choice":22 + }, + "backbone.layer4.0.conv.0.conv_(0, 192)_192":{ + "init_args":{ + "num_channels":192, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.0.conv.0.bn", + "start":0, + "end":192, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.conv.1.conv", + "start":0, + "end":192, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.conv.1.bn", + "start":0, + "end":192, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.conv.2.conv", + "start":0, + "end":192, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.0.conv.0.conv", + "start":0, + "end":192, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.conv.0.bn", + "start":0, + "end":192, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.conv.1.conv", + "start":0, + "end":192, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.conv.1.bn", + "start":0, + "end":192, + "is_output_channel":true + } + ] + }, + "choice":60 + }, + "backbone.layer4.0.conv.2.conv_(0, 64)_64":{ + "init_args":{ + "num_channels":64, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.0.conv.2.bn", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.conv.0.conv", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.conv.2.bn", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer4.2.conv.0.conv", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer4.2.conv.2.bn", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer4.3.conv.0.conv", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer4.3.conv.2.bn", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer5.0.conv.0.conv", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.0.conv.2.conv", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.conv.2.bn", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.conv.2.conv", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.conv.2.bn", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer4.2.conv.2.conv", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer4.2.conv.2.bn", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer4.3.conv.2.conv", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer4.3.conv.2.bn", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, + "choice":24 + }, + "backbone.layer4.1.conv.0.conv_(0, 384)_384":{ + "init_args":{ + "num_channels":384, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.1.conv.0.bn", + "start":0, + "end":384, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.conv.1.conv", + "start":0, + "end":384, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.conv.1.bn", + "start":0, + "end":384, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.conv.2.conv", + "start":0, + "end":384, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.1.conv.0.conv", + "start":0, + "end":384, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.conv.0.bn", + "start":0, + "end":384, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.conv.1.conv", + "start":0, + "end":384, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.conv.1.bn", + "start":0, + "end":384, + "is_output_channel":true + } + ] + }, + "choice":44 + }, + "backbone.layer4.2.conv.0.conv_(0, 384)_384":{ + "init_args":{ + "num_channels":384, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.2.conv.0.bn", + "start":0, + "end":384, + "is_output_channel":false + }, + { + "name":"backbone.layer4.2.conv.1.conv", + "start":0, + "end":384, + "is_output_channel":false + }, + { + "name":"backbone.layer4.2.conv.1.bn", + "start":0, + "end":384, + "is_output_channel":false + }, + { + "name":"backbone.layer4.2.conv.2.conv", + "start":0, + "end":384, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.2.conv.0.conv", + "start":0, + "end":384, + "is_output_channel":true + }, + { + "name":"backbone.layer4.2.conv.0.bn", + "start":0, + "end":384, + "is_output_channel":true + }, + { + "name":"backbone.layer4.2.conv.1.conv", + "start":0, + "end":384, + "is_output_channel":true + }, + { + "name":"backbone.layer4.2.conv.1.bn", + "start":0, + "end":384, + "is_output_channel":true + } + ] + }, + "choice":272 + }, + "backbone.layer4.3.conv.0.conv_(0, 384)_384":{ + "init_args":{ + "num_channels":384, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.3.conv.0.bn", + "start":0, + "end":384, + "is_output_channel":false + }, + { + "name":"backbone.layer4.3.conv.1.conv", + "start":0, + "end":384, + "is_output_channel":false + }, + { + "name":"backbone.layer4.3.conv.1.bn", + "start":0, + "end":384, + "is_output_channel":false + }, + { + "name":"backbone.layer4.3.conv.2.conv", + "start":0, + "end":384, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.3.conv.0.conv", + "start":0, + "end":384, + "is_output_channel":true + }, + { + "name":"backbone.layer4.3.conv.0.bn", + "start":0, + "end":384, + "is_output_channel":true + }, + { + "name":"backbone.layer4.3.conv.1.conv", + "start":0, + "end":384, + "is_output_channel":true + }, + { + "name":"backbone.layer4.3.conv.1.bn", + "start":0, + "end":384, + "is_output_channel":true + } + ] + }, + "choice":272 + }, + "backbone.layer5.0.conv.0.conv_(0, 384)_384":{ + "init_args":{ + "num_channels":384, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer5.0.conv.0.bn", + "start":0, + "end":384, + "is_output_channel":false + }, + { + "name":"backbone.layer5.0.conv.1.conv", + "start":0, + "end":384, + "is_output_channel":false + }, + { + "name":"backbone.layer5.0.conv.1.bn", + "start":0, + "end":384, + "is_output_channel":false + }, + { + "name":"backbone.layer5.0.conv.2.conv", + "start":0, + "end":384, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer5.0.conv.0.conv", + "start":0, + "end":384, + "is_output_channel":true + }, + { + "name":"backbone.layer5.0.conv.0.bn", + "start":0, + "end":384, + "is_output_channel":true + }, + { + "name":"backbone.layer5.0.conv.1.conv", + "start":0, + "end":384, + "is_output_channel":true + }, + { + "name":"backbone.layer5.0.conv.1.bn", + "start":0, + "end":384, + "is_output_channel":true + } + ] + }, + "choice":310 + }, + "backbone.layer5.0.conv.2.conv_(0, 96)_96":{ + "init_args":{ + "num_channels":96, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer5.0.conv.2.bn", + "start":0, + "end":96, + "is_output_channel":false + }, + { + "name":"backbone.layer5.1.conv.0.conv", + "start":0, + "end":96, + "is_output_channel":false + }, + { + "name":"backbone.layer5.1.conv.2.bn", + "start":0, + "end":96, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":96, + "is_output_channel":false + }, + { + "name":"backbone.layer5.2.conv.0.conv", + "start":0, + "end":96, + "is_output_channel":false + }, + { + "name":"backbone.layer5.2.conv.2.bn", + "start":0, + "end":96, + "is_output_channel":false + }, + { + "name":"backbone.layer6.0.conv.0.conv", + "start":0, + "end":96, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer5.0.conv.2.conv", + "start":0, + "end":96, + "is_output_channel":true + }, + { + "name":"backbone.layer5.0.conv.2.bn", + "start":0, + "end":96, + "is_output_channel":true + }, + { + "name":"backbone.layer5.1.conv.2.conv", + "start":0, + "end":96, + "is_output_channel":true + }, + { + "name":"backbone.layer5.1.conv.2.bn", + "start":0, + "end":96, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":96, + "is_output_channel":true + }, + { + "name":"backbone.layer5.2.conv.2.conv", + "start":0, + "end":96, + "is_output_channel":true + }, + { + "name":"backbone.layer5.2.conv.2.bn", + "start":0, + "end":96, + "is_output_channel":true + } + ] + }, + "choice":36 + }, + "backbone.layer5.1.conv.0.conv_(0, 576)_576":{ + "init_args":{ + "num_channels":576, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer5.1.conv.0.bn", + "start":0, + "end":576, + "is_output_channel":false + }, + { + "name":"backbone.layer5.1.conv.1.conv", + "start":0, + "end":576, + "is_output_channel":false + }, + { + "name":"backbone.layer5.1.conv.1.bn", + "start":0, + "end":576, + "is_output_channel":false + }, + { + "name":"backbone.layer5.1.conv.2.conv", + "start":0, + "end":576, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer5.1.conv.0.conv", + "start":0, + "end":576, + "is_output_channel":true + }, + { + "name":"backbone.layer5.1.conv.0.bn", + "start":0, + "end":576, + "is_output_channel":true + }, + { + "name":"backbone.layer5.1.conv.1.conv", + "start":0, + "end":576, + "is_output_channel":true + }, + { + "name":"backbone.layer5.1.conv.1.bn", + "start":0, + "end":576, + "is_output_channel":true + } + ] + }, + "choice":294 + }, + "backbone.layer5.2.conv.0.conv_(0, 576)_576":{ + "init_args":{ + "num_channels":576, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer5.2.conv.0.bn", + "start":0, + "end":576, + "is_output_channel":false + }, + { + "name":"backbone.layer5.2.conv.1.conv", + "start":0, + "end":576, + "is_output_channel":false + }, + { + "name":"backbone.layer5.2.conv.1.bn", + "start":0, + "end":576, + "is_output_channel":false + }, + { + "name":"backbone.layer5.2.conv.2.conv", + "start":0, + "end":576, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer5.2.conv.0.conv", + "start":0, + "end":576, + "is_output_channel":true + }, + { + "name":"backbone.layer5.2.conv.0.bn", + "start":0, + "end":576, + "is_output_channel":true + }, + { + "name":"backbone.layer5.2.conv.1.conv", + "start":0, + "end":576, + "is_output_channel":true + }, + { + "name":"backbone.layer5.2.conv.1.bn", + "start":0, + "end":576, + "is_output_channel":true + } + ] + }, + "choice":351 + }, + "backbone.layer6.0.conv.0.conv_(0, 576)_576":{ + "init_args":{ + "num_channels":576, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer6.0.conv.0.bn", + "start":0, + "end":576, + "is_output_channel":false + }, + { + "name":"backbone.layer6.0.conv.1.conv", + "start":0, + "end":576, + "is_output_channel":false + }, + { + "name":"backbone.layer6.0.conv.1.bn", + "start":0, + "end":576, + "is_output_channel":false + }, + { + "name":"backbone.layer6.0.conv.2.conv", + "start":0, + "end":576, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer6.0.conv.0.conv", + "start":0, + "end":576, + "is_output_channel":true + }, + { + "name":"backbone.layer6.0.conv.0.bn", + "start":0, + "end":576, + "is_output_channel":true + }, + { + "name":"backbone.layer6.0.conv.1.conv", + "start":0, + "end":576, + "is_output_channel":true + }, + { + "name":"backbone.layer6.0.conv.1.bn", + "start":0, + "end":576, + "is_output_channel":true + } + ] + }, + "choice":693 + }, + "backbone.layer6.0.conv.2.conv_(0, 160)_160":{ + "init_args":{ + "num_channels":160, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer6.0.conv.2.bn", + "start":0, + "end":160, + "is_output_channel":false + }, + { + "name":"backbone.layer6.1.conv.0.conv", + "start":0, + "end":160, + "is_output_channel":false + }, + { + "name":"backbone.layer6.1.conv.2.bn", + "start":0, + "end":160, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":160, + "is_output_channel":false + }, + { + "name":"backbone.layer6.2.conv.0.conv", + "start":0, + "end":160, + "is_output_channel":false + }, + { + "name":"backbone.layer6.2.conv.2.bn", + "start":0, + "end":160, + "is_output_channel":false + }, + { + "name":"backbone.layer7.0.conv.0.conv", + "start":0, + "end":160, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer6.0.conv.2.conv", + "start":0, + "end":160, + "is_output_channel":true + }, + { + "name":"backbone.layer6.0.conv.2.bn", + "start":0, + "end":160, + "is_output_channel":true + }, + { + "name":"backbone.layer6.1.conv.2.conv", + "start":0, + "end":160, + "is_output_channel":true + }, + { + "name":"backbone.layer6.1.conv.2.bn", + "start":0, + "end":160, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":160, + "is_output_channel":true + }, + { + "name":"backbone.layer6.2.conv.2.conv", + "start":0, + "end":160, + "is_output_channel":true + }, + { + "name":"backbone.layer6.2.conv.2.bn", + "start":0, + "end":160, + "is_output_channel":true + } + ] + }, + "choice":80 + }, + "backbone.layer6.1.conv.0.conv_(0, 960)_960":{ + "init_args":{ + "num_channels":960, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer6.1.conv.0.bn", + "start":0, + "end":960, + "is_output_channel":false + }, + { + "name":"backbone.layer6.1.conv.1.conv", + "start":0, + "end":960, + "is_output_channel":false + }, + { + "name":"backbone.layer6.1.conv.1.bn", + "start":0, + "end":960, + "is_output_channel":false + }, + { + "name":"backbone.layer6.1.conv.2.conv", + "start":0, + "end":960, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer6.1.conv.0.conv", + "start":0, + "end":960, + "is_output_channel":true + }, + { + "name":"backbone.layer6.1.conv.0.bn", + "start":0, + "end":960, + "is_output_channel":true + }, + { + "name":"backbone.layer6.1.conv.1.conv", + "start":0, + "end":960, + "is_output_channel":true + }, + { + "name":"backbone.layer6.1.conv.1.bn", + "start":0, + "end":960, + "is_output_channel":true + } + ] + }, + "choice":96 + }, + "backbone.layer6.2.conv.0.conv_(0, 960)_960":{ + "init_args":{ + "num_channels":960, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer6.2.conv.0.bn", + "start":0, + "end":960, + "is_output_channel":false + }, + { + "name":"backbone.layer6.2.conv.1.conv", + "start":0, + "end":960, + "is_output_channel":false + }, + { + "name":"backbone.layer6.2.conv.1.bn", + "start":0, + "end":960, + "is_output_channel":false + }, + { + "name":"backbone.layer6.2.conv.2.conv", + "start":0, + "end":960, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer6.2.conv.0.conv", + "start":0, + "end":960, + "is_output_channel":true + }, + { + "name":"backbone.layer6.2.conv.0.bn", + "start":0, + "end":960, + "is_output_channel":true + }, + { + "name":"backbone.layer6.2.conv.1.conv", + "start":0, + "end":960, + "is_output_channel":true + }, + { + "name":"backbone.layer6.2.conv.1.bn", + "start":0, + "end":960, + "is_output_channel":true + } + ] + }, + "choice":864 + }, + "backbone.layer7.0.conv.0.conv_(0, 960)_960":{ + "init_args":{ + "num_channels":960, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer7.0.conv.0.bn", + "start":0, + "end":960, + "is_output_channel":false + }, + { + "name":"backbone.layer7.0.conv.1.conv", + "start":0, + "end":960, + "is_output_channel":false + }, + { + "name":"backbone.layer7.0.conv.1.bn", + "start":0, + "end":960, + "is_output_channel":false + }, + { + "name":"backbone.layer7.0.conv.2.conv", + "start":0, + "end":960, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer7.0.conv.0.conv", + "start":0, + "end":960, + "is_output_channel":true + }, + { + "name":"backbone.layer7.0.conv.0.bn", + "start":0, + "end":960, + "is_output_channel":true + }, + { + "name":"backbone.layer7.0.conv.1.conv", + "start":0, + "end":960, + "is_output_channel":true + }, + { + "name":"backbone.layer7.0.conv.1.bn", + "start":0, + "end":960, + "is_output_channel":true + } + ] + }, + "choice":960 + }, + "backbone.layer7.0.conv.2.conv_(0, 320)_320":{ + "init_args":{ + "num_channels":320, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer7.0.conv.2.bn", + "start":0, + "end":320, + "is_output_channel":false + }, + { + "name":"backbone.conv2.conv", + "start":0, + "end":320, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer7.0.conv.2.conv", + "start":0, + "end":320, + "is_output_channel":true + }, + { + "name":"backbone.layer7.0.conv.2.bn", + "start":0, + "end":320, + "is_output_channel":true + } + ] + }, + "choice":192 + }, + "backbone.conv2.conv_(0, 1280)_1280":{ + "init_args":{ + "num_channels":1280, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.conv2.bn", + "start":0, + "end":1280, + "is_output_channel":false + }, + { + "name":"head.fc", + "start":0, + "end":1280, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.conv2.conv", + "start":0, + "end":1280, + "is_output_channel":true + }, + { + "name":"backbone.conv2.bn", + "start":0, + "end":1280, + "is_output_channel":true + } + ] + }, + "choice":1280 + } + } + }, + "parse_cfg":{ + "type":"ChannelAnalyzer", + "demo_input":[ + 1, + 3, + 224, + 224 + ], + "tracer_type":"BackwardTracer" + } +} diff --git a/configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.yaml b/configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.yaml deleted file mode 100644 index 41a8be9ec..000000000 --- a/configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.yaml +++ /dev/null @@ -1,9 +0,0 @@ -{0: 9, -1: 10, -2: 36, 3: 16, 4: 16, -5: 48, 6: 21, 7: 41, 8: 22, -9: 60, 10: 24, 11: 44, 12: 272, 13: 272, -14: 310, 15: 36, 16: 294, 17: 351, -18: 693, 19: 80, 20: 96, 21: 864, -22: 1440, 23: 192, -24: 1664} diff --git a/configs/pruning/mmcls/dmcp/DMCP_R50_2G.json b/configs/pruning/mmcls/dmcp/DMCP_R50_2G.json new file mode 100644 index 000000000..bdbbf1c21 --- /dev/null +++ b/configs/pruning/mmcls/dmcp/DMCP_R50_2G.json @@ -0,0 +1,1933 @@ +{ + "type":"DMCPChannelMutator", + "channel_unit_cfg":{ + "type":"DMCPChannelUnit", + "default_args":{ + "choice_mode":"number" + }, + "units":{ + "backbone.conv1_(0, 64)_64":{ + "init_args":{ + "num_channels":64, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.bn1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.conv1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.downsample.0", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.conv1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.bn1", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, + "choice":52 + }, + "backbone.layer1.0.conv1_(0, 64)_64":{ + "init_args":{ + "num_channels":64, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer1.0.bn1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.conv2", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer1.0.conv1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.bn1", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, + "choice":22 + }, + "backbone.layer1.0.conv2_(0, 64)_64":{ + "init_args":{ + "num_channels":64, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer1.0.bn2", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.conv3", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer1.0.conv2", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.bn2", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, + "choice":22 + }, + "backbone.layer1.0.conv3_(0, 256)_256":{ + "init_args":{ + "num_channels":256, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer1.0.bn3", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.downsample.1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer1.1.conv1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer1.1.bn3", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer1.2.conv1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer1.2.bn3", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.conv1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.downsample.0", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer1.0.conv3", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.bn3", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.downsample.0", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.downsample.1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer1.1.conv3", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer1.1.bn3", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer1.2.conv3", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer1.2.bn3", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, + "choice":106 + }, + "backbone.layer1.1.conv1_(0, 64)_64":{ + "init_args":{ + "num_channels":64, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer1.1.bn1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.1.conv2", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer1.1.conv1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.1.bn1", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, + "choice":16 + }, + "backbone.layer1.1.conv2_(0, 64)_64":{ + "init_args":{ + "num_channels":64, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer1.1.bn2", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.1.conv3", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer1.1.conv2", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.1.bn2", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, + "choice":16 + }, + "backbone.layer1.2.conv1_(0, 64)_64":{ + "init_args":{ + "num_channels":64, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer1.2.bn1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.2.conv2", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer1.2.conv1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.2.bn1", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, + "choice":40 + }, + "backbone.layer1.2.conv2_(0, 64)_64":{ + "init_args":{ + "num_channels":64, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer1.2.bn2", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.2.conv3", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer1.2.conv2", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.2.bn2", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, + "choice":16 + }, + "backbone.layer2.0.conv1_(0, 128)_128":{ + "init_args":{ + "num_channels":128, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.0.bn1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.conv2", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.0.conv1", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.bn1", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, + "choice":68 + }, + "backbone.layer2.0.conv2_(0, 128)_128":{ + "init_args":{ + "num_channels":128, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.0.bn2", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.conv3", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.0.conv2", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.bn2", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, + "choice":56 + }, + "backbone.layer2.0.conv3_(0, 512)_512":{ + "init_args":{ + "num_channels":512, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.0.bn3", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.downsample.1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.conv1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.bn3", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer2.2.conv1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer2.2.bn3", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer2.3.conv1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer2.3.bn3", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.conv1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.downsample.0", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.0.conv3", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.bn3", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.downsample.0", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.downsample.1", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.conv3", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.bn3", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer2.2.conv3", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer2.2.bn3", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer2.3.conv3", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer2.3.bn3", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, + "choice":155 + }, + "backbone.layer2.1.conv1_(0, 128)_128":{ + "init_args":{ + "num_channels":128, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.1.bn1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.conv2", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.1.conv1", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.bn1", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, + "choice":32 + }, + "backbone.layer2.1.conv2_(0, 128)_128":{ + "init_args":{ + "num_channels":128, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.1.bn2", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.conv3", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.1.conv2", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.bn2", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, + "choice":68 + }, + "backbone.layer2.2.conv1_(0, 128)_128":{ + "init_args":{ + "num_channels":128, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.2.bn1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.2.conv2", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.2.conv1", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.2.bn1", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, + "choice":56 + }, + "backbone.layer2.2.conv2_(0, 128)_128":{ + "init_args":{ + "num_channels":128, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.2.bn2", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.2.conv3", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.2.conv2", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.2.bn2", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, + "choice":56 + }, + "backbone.layer2.3.conv1_(0, 128)_128":{ + "init_args":{ + "num_channels":128, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.3.bn1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.3.conv2", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.3.conv1", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.3.bn1", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, + "choice":80 + }, + "backbone.layer2.3.conv2_(0, 128)_128":{ + "init_args":{ + "num_channels":128, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.3.bn2", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.3.conv3", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.3.conv2", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.3.bn2", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, + "choice":92 + }, + "backbone.layer3.0.conv1_(0, 256)_256":{ + "init_args":{ + "num_channels":256, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.0.bn1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.conv2", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.0.conv1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.bn1", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, + "choice":256 + }, + "backbone.layer3.0.conv2_(0, 256)_256":{ + "init_args":{ + "num_channels":256, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.0.bn2", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.conv3", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.0.conv2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.bn2", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, + "choice":256 + }, + "backbone.layer3.0.conv3_(0, 1024)_1024":{ + "init_args":{ + "num_channels":1024, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.0.bn3", + "start":0, + "end":1024, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.downsample.1", + "start":0, + "end":1024, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":1024, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.conv1", + "start":0, + "end":1024, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.bn3", + "start":0, + "end":1024, + "is_output_channel":false + }, + { + "name":"backbone.layer3.2.conv1", + "start":0, + "end":1024, + "is_output_channel":false + }, + { + "name":"backbone.layer3.2.bn3", + "start":0, + "end":1024, + "is_output_channel":false + }, + { + "name":"backbone.layer3.3.conv1", + "start":0, + "end":1024, + "is_output_channel":false + }, + { + "name":"backbone.layer3.3.bn3", + "start":0, + "end":1024, + "is_output_channel":false + }, + { + "name":"backbone.layer3.4.conv1", + "start":0, + "end":1024, + "is_output_channel":false + }, + { + "name":"backbone.layer3.4.bn3", + "start":0, + "end":1024, + "is_output_channel":false + }, + { + "name":"backbone.layer3.5.conv1", + "start":0, + "end":1024, + "is_output_channel":false + }, + { + "name":"backbone.layer3.5.bn3", + "start":0, + "end":1024, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.conv1", + "start":0, + "end":1024, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.downsample.0", + "start":0, + "end":1024, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.0.conv3", + "start":0, + "end":1024, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.bn3", + "start":0, + "end":1024, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.downsample.0", + "start":0, + "end":1024, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.downsample.1", + "start":0, + "end":1024, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":1024, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.conv3", + "start":0, + "end":1024, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.bn3", + "start":0, + "end":1024, + "is_output_channel":true + }, + { + "name":"backbone.layer3.2.conv3", + "start":0, + "end":1024, + "is_output_channel":true + }, + { + "name":"backbone.layer3.2.bn3", + "start":0, + "end":1024, + "is_output_channel":true + }, + { + "name":"backbone.layer3.3.conv3", + "start":0, + "end":1024, + "is_output_channel":true + }, + { + "name":"backbone.layer3.3.bn3", + "start":0, + "end":1024, + "is_output_channel":true + }, + { + "name":"backbone.layer3.4.conv3", + "start":0, + "end":1024, + "is_output_channel":true + }, + { + "name":"backbone.layer3.4.bn3", + "start":0, + "end":1024, + "is_output_channel":true + }, + { + "name":"backbone.layer3.5.conv3", + "start":0, + "end":1024, + "is_output_channel":true + }, + { + "name":"backbone.layer3.5.bn3", + "start":0, + "end":1024, + "is_output_channel":true + } + ] + }, + "choice":1024 + }, + "backbone.layer3.1.conv1_(0, 256)_256":{ + "init_args":{ + "num_channels":256, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.1.bn1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.conv2", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.1.conv1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.bn1", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, + "choice":106 + }, + "backbone.layer3.1.conv2_(0, 256)_256":{ + "init_args":{ + "num_channels":256, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.1.bn2", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.conv3", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.1.conv2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.bn2", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, + "choice":106 + }, + "backbone.layer3.2.conv1_(0, 256)_256":{ + "init_args":{ + "num_channels":256, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.2.bn1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.2.conv2", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.2.conv1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.2.bn1", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, + "choice":131 + }, + "backbone.layer3.2.conv2_(0, 256)_256":{ + "init_args":{ + "num_channels":256, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.2.bn2", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.2.conv3", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.2.conv2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.2.bn2", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, + "choice":256 + }, + "backbone.layer3.3.conv1_(0, 256)_256":{ + "init_args":{ + "num_channels":256, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.3.bn1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.3.conv2", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.3.conv1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.3.bn1", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, + "choice":131 + }, + "backbone.layer3.3.conv2_(0, 256)_256":{ + "init_args":{ + "num_channels":256, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.3.bn2", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.3.conv3", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.3.conv2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.3.bn2", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, + "choice":256 + }, + "backbone.layer3.4.conv1_(0, 256)_256":{ + "init_args":{ + "num_channels":256, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.4.bn1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.4.conv2", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.4.conv1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.4.bn1", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, + "choice":256 + }, + "backbone.layer3.4.conv2_(0, 256)_256":{ + "init_args":{ + "num_channels":256, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.4.bn2", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.4.conv3", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.4.conv2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.4.bn2", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, + "choice":256 + }, + "backbone.layer3.5.conv1_(0, 256)_256":{ + "init_args":{ + "num_channels":256, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.5.bn1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.5.conv2", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.5.conv1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.5.bn1", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, + "choice":256 + }, + "backbone.layer3.5.conv2_(0, 256)_256":{ + "init_args":{ + "num_channels":256, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.5.bn2", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.5.conv3", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.5.conv2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.5.bn2", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, + "choice":256 + }, + "backbone.layer4.0.conv1_(0, 512)_512":{ + "init_args":{ + "num_channels":512, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.0.bn1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.conv2", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.0.conv1", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.bn1", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, + "choice":512 + }, + "backbone.layer4.0.conv2_(0, 512)_512":{ + "init_args":{ + "num_channels":512, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.0.bn2", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.conv3", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.0.conv2", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.bn2", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, + "choice":512 + }, + "backbone.layer4.0.conv3_(0, 2048)_2048":{ + "init_args":{ + "num_channels":2048, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.0.bn3", + "start":0, + "end":2048, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.downsample.1", + "start":0, + "end":2048, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":2048, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.conv1", + "start":0, + "end":2048, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.bn3", + "start":0, + "end":2048, + "is_output_channel":false + }, + { + "name":"backbone.layer4.2.conv1", + "start":0, + "end":2048, + "is_output_channel":false + }, + { + "name":"backbone.layer4.2.bn3", + "start":0, + "end":2048, + "is_output_channel":false + }, + { + "name":"head.fc", + "start":0, + "end":2048, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.0.conv3", + "start":0, + "end":2048, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.bn3", + "start":0, + "end":2048, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.downsample.0", + "start":0, + "end":2048, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.downsample.1", + "start":0, + "end":2048, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":2048, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.conv3", + "start":0, + "end":2048, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.bn3", + "start":0, + "end":2048, + "is_output_channel":true + }, + { + "name":"backbone.layer4.2.conv3", + "start":0, + "end":2048, + "is_output_channel":true + }, + { + "name":"backbone.layer4.2.bn3", + "start":0, + "end":2048, + "is_output_channel":true + } + ] + }, + "choice":2048 + }, + "backbone.layer4.1.conv1_(0, 512)_512":{ + "init_args":{ + "num_channels":512, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.1.bn1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.conv2", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.1.conv1", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.bn1", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, + "choice":512 + }, + "backbone.layer4.1.conv2_(0, 512)_512":{ + "init_args":{ + "num_channels":512, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.1.bn2", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.conv3", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.1.conv2", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.bn2", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, + "choice":461 + }, + "backbone.layer4.2.conv1_(0, 512)_512":{ + "init_args":{ + "num_channels":512, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.2.bn1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.2.conv2", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.2.conv1", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.2.bn1", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, + "choice":512 + }, + "backbone.layer4.2.conv2_(0, 512)_512":{ + "init_args":{ + "num_channels":512, + "choice_mode":"number", + "divisor":1, + "min_value":1, + "min_ratio":0.5 + }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.2.bn2", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.2.conv3", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.2.conv2", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.2.bn2", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, + "choice":512 + } + } + }, + "parse_cfg":{ + "type":"ChannelAnalyzer", + "demo_input":[ + 1, + 3, + 224, + 224 + ], + "tracer_type":"BackwardTracer" + } +} diff --git a/configs/pruning/mmcls/dmcp/DMCP_R50_2G.yaml b/configs/pruning/mmcls/dmcp/DMCP_R50_2G.yaml deleted file mode 100644 index 3af6c995c..000000000 --- a/configs/pruning/mmcls/dmcp/DMCP_R50_2G.yaml +++ /dev/null @@ -1,5 +0,0 @@ -{0: 52, -1: 22, 2: 22, 3: 106, 4: 16, 5: 16, 6: 40, 7: 16, -8: 68, 9: 56, 10: 155, 11: 32, 12: 68, 13: 56, 14: 56, 15: 80, 16: 92, -17: 256, 18: 256, 19: 1024, 20: 106, 21: 106, 22: 131, 23: 256, 24: 131, 25: 256, 26: 256, 27: 256, 28: 256, 29: 256, -30: 512, 31: 512, 32: 2048, 33: 512, 34: 461, 35: 512, 36: 512} diff --git a/configs/pruning/mmcls/dmcp/dmcp_mbv2_subnet_32xb64.py b/configs/pruning/mmcls/dmcp/dmcp_mbv2_subnet_32xb64.py index e97eab2a8..8a68430cf 100644 --- a/configs/pruning/mmcls/dmcp/dmcp_mbv2_subnet_32xb64.py +++ b/configs/pruning/mmcls/dmcp/dmcp_mbv2_subnet_32xb64.py @@ -7,7 +7,7 @@ type='SGD', lr=0.8, momentum=0.9, weight_decay=0.00004, nesterov=True), paramwise_cfg=paramwise_cfg) -max_epochs = 250 +max_epochs = 100 _base_.param_scheduler = [ # warm up learning rate scheduler @@ -34,8 +34,12 @@ custom_hooks = None # model settings -model = _base_.model -model['fix_subnet'] = 'configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.yaml' +model = dict( + _scope_='mmrazor', + type='sub_model', + cfg=_base_.supernet, + fix_subnet='configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.json', + mode='mutator') default_hooks = _base_.default_hooks default_hooks['checkpoint'] = dict(type='CheckpointHook', interval=5) diff --git a/configs/pruning/mmcls/dmcp/dmcp_mbv2_supernet_32xb64.py b/configs/pruning/mmcls/dmcp/dmcp_mbv2_supernet_32xb64.py index 306187ead..4109964c4 100644 --- a/configs/pruning/mmcls/dmcp/dmcp_mbv2_supernet_32xb64.py +++ b/configs/pruning/mmcls/dmcp/dmcp_mbv2_supernet_32xb64.py @@ -1,5 +1,4 @@ _base_ = [ - 'mmcls::_base_/schedules/imagenet_bs256.py', 'mmcls::_base_/default_runtime.py', '../../../_base_/settings/imagenet_bs2048_dmcp.py', ] diff --git a/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py b/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py index aa2f12063..eb70e639b 100644 --- a/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py +++ b/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py @@ -7,7 +7,7 @@ type='SGD', lr=0.8, momentum=0.9, weight_decay=0.0001, nesterov=True), paramwise_cfg=paramwise_cfg) -max_epochs = 250 +max_epochs = 100 _base_.param_scheduler = [ # warm up learning rate scheduler @@ -33,9 +33,12 @@ custom_hooks = None -# model settings -model = _base_.model -model['fix_subnet'] = 'configs/pruning/mmcls/dmcp/DMCP_R50_2G.yaml' +model = dict( + _scope_='mmrazor', + type='sub_model', + cfg=_base_.supernet, + fix_subnet='configs/pruning/mmcls/dmcp/DMCP_R50_2G.json', + mode='mutator') default_hooks = _base_.default_hooks default_hooks['checkpoint'] = dict(type='CheckpointHook', interval=5) diff --git a/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_32xb64.py b/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_32xb64.py index b0934af76..9aeaeb838 100644 --- a/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_32xb64.py +++ b/configs/pruning/mmcls/dmcp/dmcp_resnet50_supernet_32xb64.py @@ -1,5 +1,4 @@ _base_ = [ - 'mmcls::_base_/schedules/imagenet_bs256.py', 'mmcls::_base_/default_runtime.py', '../../../_base_/settings/imagenet_bs2048_dmcp.py', ] @@ -32,8 +31,7 @@ model = dict( _scope_='mmrazor', type='DMCP', - architecture=dict( - cfg_path='mmcls::resnet/resnet50_8xb32_in1k.py', pretrained=False), + architecture=supernet, distiller=dict( type='ConfigurableDistiller', teacher_recorders=dict( diff --git a/configs/pruning/mmdet/dcff/dcff_compact_faster_rcnn_resnet50_8xb4_coco.py b/configs/pruning/mmdet/dcff/dcff_compact_faster_rcnn_resnet50_8xb4_coco.py index 73e64f1aa..5a2db5c11 100644 --- a/configs/pruning/mmdet/dcff/dcff_compact_faster_rcnn_resnet50_8xb4_coco.py +++ b/configs/pruning/mmdet/dcff/dcff_compact_faster_rcnn_resnet50_8xb4_coco.py @@ -1,7 +1,7 @@ _base_ = ['dcff_faster_rcnn_resnet50_8xb4_coco.py'] # model settings -model_cfg = dict( +_base_.model = dict( _scope_='mmrazor', type='sub_model', cfg=_base_.architecture, diff --git a/configs/pruning/mmdet/dcff/fix_subnet.json b/configs/pruning/mmdet/dcff/fix_subnet.json index 9722b07e5..0522765d1 100644 --- a/configs/pruning/mmdet/dcff/fix_subnet.json +++ b/configs/pruning/mmdet/dcff/fix_subnet.json @@ -14,6 +14,102 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.bn1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.conv1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.bn2", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.1.conv1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.1.bn2", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.conv1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.downsample.0", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.conv1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.bn1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.conv2", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.bn2", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.1.conv2", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.1.bn2", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, "choice":1.0 }, "backbone.layer1.0.conv1_(0, 64)_64":{ @@ -24,6 +120,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer1.0.bn1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.conv2", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer1.0.conv1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.bn1", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, "choice":0.640625 }, "backbone.layer1.1.conv1_(0, 64)_64":{ @@ -34,6 +160,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer1.1.bn1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.1.conv2", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer1.1.conv1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.1.bn1", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, "choice":0.640625 }, "backbone.layer2.0.conv1_(0, 128)_128":{ @@ -44,6 +200,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.0.bn1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.conv2", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.0.conv1", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.bn1", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, "choice":0.6484375 }, "backbone.layer2.0.conv2_(0, 128)_128":{ @@ -54,6 +240,96 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.0.bn2", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.downsample.1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.conv1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.bn2", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.conv1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.downsample.0", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.0.conv2", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.bn2", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.downsample.0", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.downsample.1", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.conv2", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.bn2", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, "choice":0.59375 }, "backbone.layer2.1.conv1_(0, 128)_128":{ @@ -64,6 +340,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.1.bn1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.conv2", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.1.conv1", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.bn1", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, "choice":0.6484375 }, "backbone.layer3.0.conv1_(0, 256)_256":{ @@ -74,6 +380,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.0.bn1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.conv2", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.0.conv1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.bn1", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, "choice":0.6484375 }, "backbone.layer3.0.conv2_(0, 256)_256":{ @@ -84,6 +420,96 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.0.bn2", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.downsample.1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.conv1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.bn2", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.conv1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.downsample.0", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.0.conv2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.bn2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.downsample.0", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.downsample.1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.conv2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.bn2", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, "choice":0.59765625 }, "backbone.layer3.1.conv1_(0, 256)_256":{ @@ -94,7 +520,37 @@ "min_value":1, "min_ratio":0.9 }, - "choice":0.6484374 + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.1.bn1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.conv2", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.1.conv1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.bn1", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, + "choice":0.6484375 }, "backbone.layer4.0.conv1_(0, 512)_512":{ "init_args":{ @@ -104,6 +560,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.0.bn1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.conv2", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.0.conv1", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.bn1", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, "choice":0.69921875 }, "backbone.layer4.0.conv2_(0, 512)_512":{ @@ -114,6 +600,90 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.0.bn2", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.downsample.1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.conv1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.bn2", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"head.fc", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.0.conv2", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.bn2", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.downsample.0", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.downsample.1", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.conv2", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.bn2", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, "choice":0.69921875 }, "backbone.layer4.1.conv1_(0, 512)_512":{ @@ -124,6 +694,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.1.bn1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.conv2", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.1.conv1", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.bn1", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, "choice":0.69921875 } } diff --git a/configs/pruning/mmpose/dcff/dcff_compact_topdown_heatmap_resnet50_coco.py b/configs/pruning/mmpose/dcff/dcff_compact_topdown_heatmap_resnet50_coco.py index 8fddabb02..ba5032379 100644 --- a/configs/pruning/mmpose/dcff/dcff_compact_topdown_heatmap_resnet50_coco.py +++ b/configs/pruning/mmpose/dcff/dcff_compact_topdown_heatmap_resnet50_coco.py @@ -1,7 +1,7 @@ _base_ = ['dcff_topdown_heatmap_resnet50_coco.py'] # model settings -model_cfg = dict( +_base_.model = dict( _scope_='mmrazor', type='sub_model', cfg=_base_.architecture, diff --git a/configs/pruning/mmpose/dcff/fix_subnet.json b/configs/pruning/mmpose/dcff/fix_subnet.json index 6c5243e0a..0522765d1 100644 --- a/configs/pruning/mmpose/dcff/fix_subnet.json +++ b/configs/pruning/mmpose/dcff/fix_subnet.json @@ -14,6 +14,102 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.bn1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.conv1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.bn2", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.1.conv1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.1.bn2", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.conv1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.downsample.0", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.conv1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.bn1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.conv2", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.bn2", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.1.conv2", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.1.bn2", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, "choice":1.0 }, "backbone.layer1.0.conv1_(0, 64)_64":{ @@ -24,6 +120,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer1.0.bn1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.conv2", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer1.0.conv1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.bn1", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, "choice":0.640625 }, "backbone.layer1.1.conv1_(0, 64)_64":{ @@ -34,6 +160,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer1.1.bn1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.1.conv2", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer1.1.conv1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.1.bn1", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, "choice":0.640625 }, "backbone.layer2.0.conv1_(0, 128)_128":{ @@ -44,6 +200,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.0.bn1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.conv2", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.0.conv1", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.bn1", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, "choice":0.6484375 }, "backbone.layer2.0.conv2_(0, 128)_128":{ @@ -54,7 +240,97 @@ "min_value":1, "min_ratio":0.9 }, - "choice":0.59374 + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.0.bn2", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.downsample.1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.conv1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.bn2", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.conv1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.downsample.0", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.0.conv2", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.bn2", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.downsample.0", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.downsample.1", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.conv2", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.bn2", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, + "choice":0.59375 }, "backbone.layer2.1.conv1_(0, 128)_128":{ "init_args":{ @@ -64,6 +340,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.1.bn1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.conv2", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.1.conv1", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.bn1", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, "choice":0.6484375 }, "backbone.layer3.0.conv1_(0, 256)_256":{ @@ -74,6 +380,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.0.bn1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.conv2", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.0.conv1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.bn1", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, "choice":0.6484375 }, "backbone.layer3.0.conv2_(0, 256)_256":{ @@ -84,6 +420,96 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.0.bn2", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.downsample.1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.conv1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.bn2", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.conv1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.downsample.0", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.0.conv2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.bn2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.downsample.0", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.downsample.1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.conv2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.bn2", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, "choice":0.59765625 }, "backbone.layer3.1.conv1_(0, 256)_256":{ @@ -94,6 +520,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.1.bn1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.conv2", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.1.conv1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.bn1", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, "choice":0.6484375 }, "backbone.layer4.0.conv1_(0, 512)_512":{ @@ -104,6 +560,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.0.bn1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.conv2", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.0.conv1", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.bn1", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, "choice":0.69921875 }, "backbone.layer4.0.conv2_(0, 512)_512":{ @@ -114,6 +600,90 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.0.bn2", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.downsample.1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.conv1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.bn2", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"head.fc", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.0.conv2", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.bn2", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.downsample.0", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.downsample.1", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.conv2", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.bn2", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, "choice":0.69921875 }, "backbone.layer4.1.conv1_(0, 512)_512":{ @@ -124,6 +694,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.1.bn1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.conv2", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.1.conv1", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.bn1", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, "choice":0.69921875 } } diff --git a/configs/pruning/mmseg/dcff/dcff_compact_pointrend_resnet50_8xb2_cityscapes.py b/configs/pruning/mmseg/dcff/dcff_compact_pointrend_resnet50_8xb2_cityscapes.py index 3a5d86a2b..e6c1eb031 100644 --- a/configs/pruning/mmseg/dcff/dcff_compact_pointrend_resnet50_8xb2_cityscapes.py +++ b/configs/pruning/mmseg/dcff/dcff_compact_pointrend_resnet50_8xb2_cityscapes.py @@ -1,7 +1,7 @@ _base_ = ['dcff_pointrend_resnet50_8xb2_cityscapes.py'] # model settings -model_cfg = dict( +_base_.model = dict( _scope_='mmrazor', type='sub_model', cfg=_base_.architecture, diff --git a/configs/pruning/mmseg/dcff/fix_subnet.json b/configs/pruning/mmseg/dcff/fix_subnet.json index bd9fcb189..0522765d1 100644 --- a/configs/pruning/mmseg/dcff/fix_subnet.json +++ b/configs/pruning/mmseg/dcff/fix_subnet.json @@ -14,6 +14,102 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.bn1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.conv1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.bn2", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.1.conv1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.1.bn2", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.conv1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.downsample.0", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.conv1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.bn1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.conv2", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.bn2", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.1.conv2", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.1.bn2", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, "choice":1.0 }, "backbone.layer1.0.conv1_(0, 64)_64":{ @@ -24,6 +120,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer1.0.bn1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.conv2", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer1.0.conv1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.bn1", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, "choice":0.640625 }, "backbone.layer1.1.conv1_(0, 64)_64":{ @@ -34,6 +160,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer1.1.bn1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.1.conv2", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer1.1.conv1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.1.bn1", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, "choice":0.640625 }, "backbone.layer2.0.conv1_(0, 128)_128":{ @@ -44,6 +200,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.0.bn1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.conv2", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.0.conv1", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.bn1", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, "choice":0.6484375 }, "backbone.layer2.0.conv2_(0, 128)_128":{ @@ -54,6 +240,96 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.0.bn2", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.downsample.1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.conv1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.bn2", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.conv1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.downsample.0", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.0.conv2", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.bn2", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.downsample.0", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.downsample.1", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.conv2", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.bn2", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, "choice":0.59375 }, "backbone.layer2.1.conv1_(0, 128)_128":{ @@ -64,6 +340,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.1.bn1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.conv2", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.1.conv1", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.bn1", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, "choice":0.6484375 }, "backbone.layer3.0.conv1_(0, 256)_256":{ @@ -74,6 +380,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.0.bn1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.conv2", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.0.conv1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.bn1", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, "choice":0.6484375 }, "backbone.layer3.0.conv2_(0, 256)_256":{ @@ -84,6 +420,96 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.0.bn2", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.downsample.1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.conv1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.bn2", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.conv1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.downsample.0", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.0.conv2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.bn2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.downsample.0", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.downsample.1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.conv2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.bn2", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, "choice":0.59765625 }, "backbone.layer3.1.conv1_(0, 256)_256":{ @@ -94,6 +520,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.1.bn1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.conv2", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.1.conv1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.bn1", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, "choice":0.6484375 }, "backbone.layer4.0.conv1_(0, 512)_512":{ @@ -104,6 +560,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.0.bn1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.conv2", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.0.conv1", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.bn1", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, "choice":0.69921875 }, "backbone.layer4.0.conv2_(0, 512)_512":{ @@ -114,6 +600,90 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.0.bn2", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.downsample.1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.conv1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.bn2", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"head.fc", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.0.conv2", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.bn2", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.downsample.0", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.downsample.1", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.conv2", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.bn2", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, "choice":0.69921875 }, "backbone.layer4.1.conv1_(0, 512)_512":{ @@ -124,7 +694,37 @@ "min_value":1, "min_ratio":0.9 }, - "choice":0.69921874 + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.1.bn1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.conv2", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.1.conv1", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.bn1", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, + "choice":0.69921875 } } }, diff --git a/mmrazor/engine/hooks/dmcp_subnet_hook.py b/mmrazor/engine/hooks/dmcp_subnet_hook.py index 3ca13e4e1..bacdcaffd 100644 --- a/mmrazor/engine/hooks/dmcp_subnet_hook.py +++ b/mmrazor/engine/hooks/dmcp_subnet_hook.py @@ -1,12 +1,11 @@ # Copyright (c) OpenMMLab. All rights reserved. +import json import os -from typing import Optional, Sequence -import yaml from mmengine.hooks import Hook from mmengine.registry import HOOKS -DATA_BATCH = Optional[Sequence[dict]] +from mmrazor.structures import export_fix_subnet @HOOKS.register_module() @@ -25,14 +24,20 @@ class DMCPSubnetHook(Hook): def __init__(self, subnet_sample_num: int = 10, **kwargs) -> None: self.subnet_sample_num = subnet_sample_num - def _save_subnet(self, arch_space_dict, save_path): - """Save the sampled sub-network structure in yaml format.""" - _cfg = dict() - for k, v in arch_space_dict.items(): - _cfg[k] = int(v) - + def _save_subnet(self, model, runner, save_path): + """Save the sampled sub-network config.""" + fix_subnet, _ = export_fix_subnet( + model, + export_subnet_mode='mutator', + slice_weight=True, + export_channel=True, + ) + fix_subnet = json.dumps(fix_subnet, indent=4, separators=(',', ':')) with open(save_path, 'w') as file: - file.write(yaml.dump(_cfg, allow_unicode=True)) + file.write(fix_subnet) + + runner.logger.info('export finished and ' + f'{save_path} saved in {runner.work_dir}.') def after_run(self, runner): """Save the sampled subnet under target FLOPs. @@ -59,12 +64,12 @@ def after_run(self, runner): if i == num_sample: model.set_subnet(mode='expected', arch_train=False) - save_path = os.path.join(root_dir, 'excepted_ch.yaml') + save_path = os.path.join(root_dir, 'excepted_ch.json') runner.logger.info( f'Excepted sample(ES) arch with FlOP(MB):{cur_flops}') else: save_path = os.path.join(root_dir, - 'subnet_{}.yaml'.format(i + 1)) + 'subnet_{}.json'.format(i + 1)) runner.logger.info( f'Driect sample(DS) arch with FlOP(MB): {cur_flops/1e6}') - self._save_subnet(model.mutator.current_choices, save_path) + self._save_subnet(model, runner, save_path) diff --git a/mmrazor/models/algorithms/nas/autoformer.py b/mmrazor/models/algorithms/nas/autoformer.py index 76044cab3..1ac5432d8 100644 --- a/mmrazor/models/algorithms/nas/autoformer.py +++ b/mmrazor/models/algorithms/nas/autoformer.py @@ -8,7 +8,6 @@ from mmrazor.models.mutators import NasMutator from mmrazor.registry import MODELS -from mmrazor.utils import ValidFixMutable from ..base import BaseAlgorithm, LossResults VALID_MUTATOR_TYPE = Union[NasMutator, Dict] @@ -29,8 +28,6 @@ class Autoformer(BaseAlgorithm): or built model. Corresponding to supernet in NAS algorithm. mutator (VALID_MUTATOR_TYPE): The config of :class:`NasMutator` or built mutator. - fix_subnet (str | dict | :obj:`FixSubnet`): The path of yaml file or - loaded dict or built :obj:`FixSubnet`. Defaults to None. data_preprocessor (Optional[Union[dict, nn.Module]]): The pre-process config of :class:`BaseDataPreprocessor`. Defaults to None. init_cfg (Optional[dict]): Init config for ``BaseModule``. @@ -40,24 +37,12 @@ class Autoformer(BaseAlgorithm): def __init__(self, architecture: Union[BaseModel, Dict], mutator: VALID_MUTATOR_TYPE = None, - fix_subnet: Optional[ValidFixMutable] = None, data_preprocessor: Optional[Union[dict, nn.Module]] = None, init_cfg: Optional[dict] = None): super().__init__(architecture, data_preprocessor, init_cfg) - # Autoformer supports supernet training and subnet retraining. - # fix_subnet is not None, means subnet retraining. - if fix_subnet: - # Avoid circular import - from mmrazor.structures import load_fix_subnet - - # According to fix_subnet, delete the unchosen part of supernet - load_fix_subnet(self.architecture, fix_subnet) - self.is_supernet = False - else: - self.mutator = self._build_mutator(mutator) - self.mutator.prepare_from_supernet(self.architecture) - self.is_supernet = True + self.mutator = self._build_mutator(mutator) + self.mutator.prepare_from_supernet(self.architecture) def _build_mutator(self, mutator: VALID_MUTATOR_TYPE = None) -> NasMutator: """build mutator.""" @@ -74,6 +59,5 @@ def loss( data_samples: Optional[List[BaseDataElement]] = None, ) -> LossResults: """Calculate losses from a batch of inputs and data samples.""" - if self.is_supernet: - self.mutator.set_choices(self.mutator.sample_choices()) + self.mutator.set_choices(self.mutator.sample_choices()) return self.architecture(batch_inputs, data_samples, mode='loss') diff --git a/mmrazor/models/algorithms/nas/bignas.py b/mmrazor/models/algorithms/nas/bignas.py index 2648488c3..8b4e72004 100644 --- a/mmrazor/models/algorithms/nas/bignas.py +++ b/mmrazor/models/algorithms/nas/bignas.py @@ -15,7 +15,6 @@ from mmrazor.models.utils import (add_prefix, reinitialize_optim_wrapper_count_status) from mmrazor.registry import MODEL_WRAPPERS, MODELS -from mmrazor.utils import ValidFixMutable from ..base import BaseAlgorithm VALID_MUTATOR_TYPE = Union[NasMutator, Dict] @@ -45,8 +44,6 @@ class BigNAS(BaseAlgorithm): built mutator. distiller (VALID_DISTILLER_TYPE): Cfg of :class:`ConfigurableDistiller` or built distiller. - fix_subnet (str | dict | :obj:`FixSubnet`): The path of yaml file or - loaded dict or built :obj:`FixSubnet`. Defaults to None. data_preprocessor (Optional[Union[dict, nn.Module]]): The pre-process config of :class:`BaseDataPreprocessor`. Defaults to None. num_random_samples (int): number of random sample subnets. @@ -62,7 +59,6 @@ def __init__(self, architecture: Union[BaseModel, Dict], mutator: VALID_MUTATOR_TYPE = None, distiller: VALID_DISTILLER_TYPE = None, - fix_subnet: Optional[ValidFixMutable] = None, data_preprocessor: Optional[Union[Dict, nn.Module]] = None, num_random_samples: int = 2, drop_path_rate: float = 0.2, @@ -70,28 +66,18 @@ def __init__(self, init_cfg: Optional[Dict] = None) -> None: super().__init__(architecture, data_preprocessor, init_cfg) - if fix_subnet: - # Avoid circular import - from mmrazor.structures import load_fix_subnet + self.mutator = self._build_mutator(mutator) + # NOTE: `mutator.prepare_from_supernet` must be called + # before distiller initialized. + self.mutator.prepare_from_supernet(self.architecture) - # According to fix_subnet, delete the unchosen part of supernet - load_fix_subnet(self, fix_subnet) - self.is_supernet = False - else: - self.mutator = self._build_mutator(mutator) - # NOTE: `mutator.prepare_from_supernet` must be called - # before distiller initialized. - self.mutator.prepare_from_supernet(self.architecture) + self.distiller = self._build_distiller(distiller) + self.distiller.prepare_from_teacher(self.architecture) + self.distiller.prepare_from_student(self.architecture) - self.distiller = self._build_distiller(distiller) - self.distiller.prepare_from_teacher(self.architecture) - self.distiller.prepare_from_student(self.architecture) - - self.sample_kinds = ['max', 'min'] - for i in range(num_random_samples): - self.sample_kinds.append('random' + str(i)) - - self.is_supernet = True + self.sample_kinds = ['max', 'min'] + for i in range(num_random_samples): + self.sample_kinds.append('random' + str(i)) self.drop_path_rate = drop_path_rate self.backbone_dropout_stages = backbone_dropout_stages @@ -120,83 +106,77 @@ def _build_distiller( def train_step(self, data: List[dict], optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]: - if self.is_supernet: - def distill_step( + def distill_step( batch_inputs: torch.Tensor, data_samples: List[BaseDataElement] - ) -> Dict[str, torch.Tensor]: - subnet_losses = dict() + ) -> Dict[str, torch.Tensor]: + subnet_losses = dict() + with optim_wrapper.optim_context( + self), self.distiller.student_recorders: # type: ignore + _ = self(batch_inputs, data_samples, mode='loss') + soft_loss = self.distiller.compute_distill_losses() + + subnet_losses.update(soft_loss) + + parsed_subnet_losses, _ = self.parse_losses(subnet_losses) + optim_wrapper.update_params(parsed_subnet_losses) + + return subnet_losses + + if not self._optim_wrapper_count_status_reinitialized: + reinitialize_optim_wrapper_count_status( + model=self, + optim_wrapper=optim_wrapper, + accumulative_counts=len(self.sample_kinds)) + self._optim_wrapper_count_status_reinitialized = True + + batch_inputs, data_samples = self.data_preprocessor(data, + True).values() + + total_losses = dict() + for kind in self.sample_kinds: + # update the max subnet loss. + if kind == 'max': + self.mutator.set_max_choices() + set_dropout( + layers=self.architecture.backbone.layers[:-1], + module=MBBlock, + dropout_stages=self.backbone_dropout_stages, + drop_path_rate=self.drop_path_rate) with optim_wrapper.optim_context( self - ), self.distiller.student_recorders: # type: ignore - _ = self(batch_inputs, data_samples, mode='loss') - soft_loss = self.distiller.compute_distill_losses() - - subnet_losses.update(soft_loss) - - parsed_subnet_losses, _ = self.parse_losses(subnet_losses) - optim_wrapper.update_params(parsed_subnet_losses) - - return subnet_losses - - if not self._optim_wrapper_count_status_reinitialized: - reinitialize_optim_wrapper_count_status( - model=self, - optim_wrapper=optim_wrapper, - accumulative_counts=len(self.sample_kinds)) - self._optim_wrapper_count_status_reinitialized = True - - batch_inputs, data_samples = self.data_preprocessor(data, - True).values() - - total_losses = dict() - for kind in self.sample_kinds: - # update the max subnet loss. - if kind == 'max': - self.mutator.set_max_choices() - set_dropout( - layers=self.architecture.backbone.layers[:-1], - module=MBBlock, - dropout_stages=self.backbone_dropout_stages, - drop_path_rate=self.drop_path_rate) - with optim_wrapper.optim_context( - self - ), self.distiller.teacher_recorders: # type: ignore - max_subnet_losses = self( - batch_inputs, data_samples, mode='loss') - parsed_max_subnet_losses, _ = self.parse_losses( - max_subnet_losses) - optim_wrapper.update_params(parsed_max_subnet_losses) - total_losses.update( - add_prefix(max_subnet_losses, 'max_subnet')) - # update the min subnet loss. - elif kind == 'min': - self.mutator.set_min_choices() - set_dropout( - layers=self.architecture.backbone.layers[:-1], - module=MBBlock, - dropout_stages=self.backbone_dropout_stages, - drop_path_rate=0.) - min_subnet_losses = distill_step(batch_inputs, - data_samples) - total_losses.update( - add_prefix(min_subnet_losses, 'min_subnet')) - # update the random subnets loss. - elif 'random' in kind: - self.mutator.set_choices(self.mutator.sample_choices()) - set_dropout( - layers=self.architecture.backbone.layers[:-1], - module=MBBlock, - dropout_stages=self.backbone_dropout_stages, - drop_path_rate=0.) - random_subnet_losses = distill_step( - batch_inputs, data_samples) - total_losses.update( - add_prefix(random_subnet_losses, f'{kind}_subnet')) - - return total_losses - else: - return super(BigNAS, self).train_step(data, optim_wrapper) + ), self.distiller.teacher_recorders: # type: ignore + max_subnet_losses = self( + batch_inputs, data_samples, mode='loss') + parsed_max_subnet_losses, _ = self.parse_losses( + max_subnet_losses) + optim_wrapper.update_params(parsed_max_subnet_losses) + total_losses.update( + add_prefix(max_subnet_losses, 'max_subnet')) + # update the min subnet loss. + elif kind == 'min': + self.mutator.set_min_choices() + set_dropout( + layers=self.architecture.backbone.layers[:-1], + module=MBBlock, + dropout_stages=self.backbone_dropout_stages, + drop_path_rate=0.) + min_subnet_losses = distill_step(batch_inputs, data_samples) + total_losses.update( + add_prefix(min_subnet_losses, 'min_subnet')) + # update the random subnets loss. + elif 'random' in kind: + self.mutator.set_choices(self.mutator.sample_choices()) + set_dropout( + layers=self.architecture.backbone.layers[:-1], + module=MBBlock, + dropout_stages=self.backbone_dropout_stages, + drop_path_rate=0.) + random_subnet_losses = distill_step(batch_inputs, data_samples) + total_losses.update( + add_prefix(random_subnet_losses, f'{kind}_subnet')) + + return total_losses @MODEL_WRAPPERS.register_module() @@ -213,85 +193,80 @@ def __init__(self, def train_step(self, data: List[dict], optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]: - if self.module.is_supernet: - def distill_step( + def distill_step( batch_inputs: torch.Tensor, data_samples: List[BaseDataElement] - ) -> Dict[str, torch.Tensor]: - subnet_losses = dict() + ) -> Dict[str, torch.Tensor]: + subnet_losses = dict() + with optim_wrapper.optim_context( + self + ), self.module.distiller.student_recorders: # type: ignore + _ = self(batch_inputs, data_samples, mode='loss') + soft_loss = self.module.distiller.compute_distill_losses() + + subnet_losses.update(soft_loss) + + parsed_subnet_losses, _ = self.module.parse_losses( + subnet_losses) + optim_wrapper.update_params(parsed_subnet_losses) + + return subnet_losses + + if not self._optim_wrapper_count_status_reinitialized: + reinitialize_optim_wrapper_count_status( + model=self, + optim_wrapper=optim_wrapper, + accumulative_counts=len(self.module.sample_kinds)) + self._optim_wrapper_count_status_reinitialized = True + + batch_inputs, data_samples = self.module.data_preprocessor( + data, True).values() + + total_losses = dict() + for kind in self.module.sample_kinds: + # update the max subnet loss. + if kind == 'max': + self.module.mutator.set_max_choices() + set_dropout( + layers=self.module.architecture.backbone.layers[:-1], + module=MBBlock, + dropout_stages=self.module.backbone_dropout_stages, + drop_path_rate=self.module.drop_path_rate) with optim_wrapper.optim_context( self - ), self.module.distiller.student_recorders: # type: ignore - _ = self(batch_inputs, data_samples, mode='loss') - soft_loss = self.module.distiller.compute_distill_losses() - - subnet_losses.update(soft_loss) - - parsed_subnet_losses, _ = self.module.parse_losses( - subnet_losses) - optim_wrapper.update_params(parsed_subnet_losses) - - return subnet_losses - - if not self._optim_wrapper_count_status_reinitialized: - reinitialize_optim_wrapper_count_status( - model=self, - optim_wrapper=optim_wrapper, - accumulative_counts=len(self.module.sample_kinds)) - self._optim_wrapper_count_status_reinitialized = True - - batch_inputs, data_samples = self.module.data_preprocessor( - data, True).values() - - total_losses = dict() - for kind in self.module.sample_kinds: - # update the max subnet loss. - if kind == 'max': - self.module.mutator.set_max_choices() - set_dropout( - layers=self.module.architecture.backbone.layers[:-1], - module=MBBlock, - dropout_stages=self.module.backbone_dropout_stages, - drop_path_rate=self.module.drop_path_rate) - with optim_wrapper.optim_context( - self - ), self.module.distiller.teacher_recorders: # type: ignore - max_subnet_losses = self( - batch_inputs, data_samples, mode='loss') - parsed_max_subnet_losses, _ = self.module.parse_losses( - max_subnet_losses) - optim_wrapper.update_params(parsed_max_subnet_losses) - total_losses.update( - add_prefix(max_subnet_losses, 'max_subnet')) - # update the min subnet loss. - elif kind == 'min': - self.module.mutator.set_min_choices() - set_dropout( - layers=self.module.architecture.backbone.layers[:-1], - module=MBBlock, - dropout_stages=self.module.backbone_dropout_stages, - drop_path_rate=0.) - min_subnet_losses = distill_step(batch_inputs, - data_samples) - total_losses.update( - add_prefix(min_subnet_losses, 'min_subnet')) - # update the random subnets loss. - elif 'random' in kind: - self.module.mutator.set_choices( - self.module.mutator.sample_choices()) - set_dropout( - layers=self.module.architecture.backbone.layers[:-1], - module=MBBlock, - dropout_stages=self.module.backbone_dropout_stages, - drop_path_rate=0.) - random_subnet_losses = distill_step( - batch_inputs, data_samples) - total_losses.update( - add_prefix(random_subnet_losses, f'{kind}_subnet')) - - return total_losses - else: - return super(BigNASDDP, self).train_step(data, optim_wrapper) + ), self.module.distiller.teacher_recorders: # type: ignore + max_subnet_losses = self( + batch_inputs, data_samples, mode='loss') + parsed_max_subnet_losses, _ = self.module.parse_losses( + max_subnet_losses) + optim_wrapper.update_params(parsed_max_subnet_losses) + total_losses.update( + add_prefix(max_subnet_losses, 'max_subnet')) + # update the min subnet loss. + elif kind == 'min': + self.module.mutator.set_min_choices() + set_dropout( + layers=self.module.architecture.backbone.layers[:-1], + module=MBBlock, + dropout_stages=self.module.backbone_dropout_stages, + drop_path_rate=0.) + min_subnet_losses = distill_step(batch_inputs, data_samples) + total_losses.update( + add_prefix(min_subnet_losses, 'min_subnet')) + # update the random subnets loss. + elif 'random' in kind: + self.module.mutator.set_choices( + self.module.mutator.sample_choices()) + set_dropout( + layers=self.module.architecture.backbone.layers[:-1], + module=MBBlock, + dropout_stages=self.module.backbone_dropout_stages, + drop_path_rate=0.) + random_subnet_losses = distill_step(batch_inputs, data_samples) + total_losses.update( + add_prefix(random_subnet_losses, f'{kind}_subnet')) + + return total_losses @property def _optim_wrapper_count_status_reinitialized(self) -> bool: diff --git a/mmrazor/models/algorithms/nas/darts.py b/mmrazor/models/algorithms/nas/darts.py index 2c53b45c7..b110f47ce 100644 --- a/mmrazor/models/algorithms/nas/darts.py +++ b/mmrazor/models/algorithms/nas/darts.py @@ -12,7 +12,6 @@ from mmrazor.models.mutators import NasMutator from mmrazor.models.utils import add_prefix from mmrazor.registry import MODEL_WRAPPERS, MODELS -from mmrazor.utils import ValidFixMutable from ..base import BaseAlgorithm VALID_MUTATOR_TYPE = Union[NasMutator, Dict] @@ -31,8 +30,6 @@ class Darts(BaseAlgorithm): or built model. Corresponding to supernet in NAS algorithm. mutator (VALID_MUTATOR_TYPE): The config of :class:`NasMutator` or built mutator. - fix_subnet (str | dict | :obj:`FixSubnet`): The path of yaml file or - loaded dict or built :obj:`FixSubnet`. norm_training (bool): Whether to set norm layers to training mode, namely, not freeze running stats (mean and var). Note: Effect on Batch Norm and its variants only. Defaults to False. @@ -45,31 +42,19 @@ class Darts(BaseAlgorithm): def __init__(self, architecture: Union[BaseModel, Dict], mutator: VALID_MUTATOR_TYPE = None, - fix_subnet: Optional[ValidFixMutable] = None, unroll: bool = False, norm_training: bool = False, data_preprocessor: Optional[Union[dict, nn.Module]] = None, init_cfg: Optional[dict] = None): super().__init__(architecture, data_preprocessor, init_cfg) - # Darts has two training mode: supernet training and subnet retraining. - # fix_subnet is not None, means subnet retraining. - if fix_subnet: - # Avoid circular import - from mmrazor.structures import load_fix_subnet - - # According to fix_subnet, delete the unchosen part of supernet - load_fix_subnet(self.architecture, fix_subnet) - self.is_supernet = False - else: - self.mutator = self._build_mutator(mutator) - # Mutator is an essential component of the NAS algorithm. It - # provides some APIs commonly used by NAS. - # Before using it, you must do some preparation according to - # the supernet. - self.mutator.prepare_from_supernet(self.architecture) - self.mutator.prepare_arch_params() - self.is_supernet = True + self.mutator = self._build_mutator(mutator) + # Mutator is an essential component of the NAS algorithm. It + # provides some APIs commonly used by NAS. + # Before using it, you must do some preparation according to + # the supernet. + self.mutator.prepare_from_supernet(self.architecture) + self.mutator.prepare_arch_params() self.norm_training = norm_training self.unroll = unroll diff --git a/mmrazor/models/algorithms/nas/dsnas.py b/mmrazor/models/algorithms/nas/dsnas.py index 4a730575e..e5937ba71 100644 --- a/mmrazor/models/algorithms/nas/dsnas.py +++ b/mmrazor/models/algorithms/nas/dsnas.py @@ -18,7 +18,6 @@ from mmrazor.models.utils import add_prefix from mmrazor.registry import MODEL_WRAPPERS, MODELS, TASK_UTILS from mmrazor.structures import export_fix_subnet, load_fix_subnet -from mmrazor.utils import ValidFixMutable from ..base import BaseAlgorithm VALID_MUTATOR_TYPE = Union[NasMutator, Dict] @@ -33,8 +32,6 @@ class DSNAS(BaseAlgorithm): or built model. Corresponding to supernet in NAS algorithm. mutator (VALID_MUTATOR_TYPE): The config of :class:`NasMutator` or built mutator. - fix_subnet (str | dict | :obj:`FixSubnet`): The path of yaml file or - loaded dict or built :obj:`FixSubnet`. pretrain_epochs (int): Num of epochs for supernet pretraining. finetune_epochs (int): Num of epochs for subnet finetuning. flops_constraints (float): Flops constraints for judging whether to @@ -59,7 +56,6 @@ class DSNAS(BaseAlgorithm): def __init__(self, architecture: Union[BaseModel, Dict], mutator: VALID_MUTATOR_TYPE = None, - fix_subnet: Optional[ValidFixMutable] = None, pretrain_epochs: int = 0, finetune_epochs: int = 80, flops_constraints: float = 300.0, @@ -75,24 +71,18 @@ def __init__(self, estimator_cfg['type'] = 'mmrazor.ResourceEstimator' self.estimator = TASK_UTILS.build(estimator_cfg) - if fix_subnet: - # According to fix_subnet, delete the unchosen part of supernet - load_fix_subnet(self.architecture, fix_subnet) - self.is_supernet = False - else: - self.mutator = self._build_mutator(mutator) - # Mutator is an essential component of the NAS algorithm. It - # provides some APIs commonly used by NAS. - # Before using it, you must do some preparation according to - # the supernet. - self.mutator.prepare_from_supernet(self.architecture) - self.mutator.prepare_arch_params() - - self.mutable_module_resources = self._get_module_resources() - self.search_space_name_list = list( - self.mutator._name2mutable.keys()) - - self.is_supernet = True + self.mutator = self._build_mutator(mutator) + # Mutator is an essential component of the NAS algorithm. It + # provides some APIs commonly used by NAS. + # Before using it, you must do some preparation according to + # the supernet. + self.mutator.prepare_from_supernet(self.architecture) + self.mutator.prepare_arch_params() + + self.mutable_module_resources = self._get_module_resources() + self.search_space_name_list = list(self.mutator._name2mutable.keys()) + + self.is_supernet = True self.norm_training = norm_training self.pretrain_epochs = pretrain_epochs diff --git a/mmrazor/models/algorithms/nas/spos.py b/mmrazor/models/algorithms/nas/spos.py index cc7799c7c..90a27aa4b 100644 --- a/mmrazor/models/algorithms/nas/spos.py +++ b/mmrazor/models/algorithms/nas/spos.py @@ -9,7 +9,6 @@ from mmrazor.models.mutators import NasMutator from mmrazor.registry import MODELS -from mmrazor.utils import ValidFixMutable from ..base import BaseAlgorithm, LossResults VALID_MUTATOR_TYPE = Union[NasMutator, Dict] @@ -32,8 +31,6 @@ class SPOS(BaseAlgorithm): or built model. Corresponding to supernet in NAS algorithm. mutator (VALID_MUTATOR_TYPE): The config of :class:`NasMutator` or built mutator. - fix_subnet (str | dict | :obj:`FixSubnet`): The path of yaml file or - loaded dict or built :obj:`FixSubnet`. Defaults to None. norm_training (bool): Whether to set norm layers to training mode, namely, not freeze running stats (mean and var). Note: Effect on Batch Norm and its variants only. Defaults to False. @@ -57,29 +54,17 @@ class SPOS(BaseAlgorithm): def __init__(self, architecture: Union[BaseModel, Dict], mutator: VALID_MUTATOR_TYPE = None, - fix_subnet: Optional[ValidFixMutable] = None, norm_training: bool = False, data_preprocessor: Optional[Union[dict, nn.Module]] = None, init_cfg: Optional[dict] = None): super().__init__(architecture, data_preprocessor, init_cfg) - # SPOS has two training mode: supernet training and subnet retraining. - # fix_subnet is not None, means subnet retraining. - if fix_subnet: - # Avoid circular import - from mmrazor.structures import load_fix_subnet - - # According to fix_subnet, delete the unchosen part of supernet - load_fix_subnet(self.architecture, fix_subnet) - self.is_supernet = False - else: - self.mutator = self._build_mutator(mutator) - # Mutator is an essential component of the NAS algorithm. It - # provides some APIs commonly used by NAS. - # Before using it, you must do some preparations according to - # the supernet. - self.mutator.prepare_from_supernet(self.architecture) - self.is_supernet = True + self.mutator = self._build_mutator(mutator) + # Mutator is an essential component of the NAS algorithm. It + # provides some APIs commonly used by NAS. + # Before using it, you must do some preparations according to + # the supernet. + self.mutator.prepare_from_supernet(self.architecture) self.norm_training = norm_training @@ -98,11 +83,8 @@ def loss( data_samples: Optional[List[BaseDataElement]] = None, ) -> LossResults: """Calculate losses from a batch of inputs and data samples.""" - if self.is_supernet: - self.mutator.set_choices(self.mutator.sample_choices()) - return self.architecture(batch_inputs, data_samples, mode='loss') - else: - return self.architecture(batch_inputs, data_samples, mode='loss') + self.mutator.set_choices(self.mutator.sample_choices()) + return self.architecture(batch_inputs, data_samples, mode='loss') def train(self, mode=True): """Convert the model into eval mode while keep normalization layer diff --git a/mmrazor/models/algorithms/pruning/dmcp.py b/mmrazor/models/algorithms/pruning/dmcp.py index b11e2fde4..043cf9acf 100644 --- a/mmrazor/models/algorithms/pruning/dmcp.py +++ b/mmrazor/models/algorithms/pruning/dmcp.py @@ -4,7 +4,6 @@ from typing import Any, Dict, List, Optional, Tuple, Union import torch -import yaml from mmengine import MessageHub from mmengine.model import BaseModel, MMDistributedDataParallel from mmengine.optim import OptimWrapper @@ -15,7 +14,6 @@ from mmrazor.models.mutators import ChannelMutator, DMCPChannelMutator from mmrazor.models.utils import add_prefix from mmrazor.registry import MODEL_WRAPPERS, MODELS -from mmrazor.utils import ValidFixMutable from ...task_modules.estimators import ResourceEstimator from ..base import BaseAlgorithm @@ -35,17 +33,22 @@ class DMCP(BaseAlgorithm): architecture (dict|:obj:`BaseModel`): The config of :class:`BaseModel` or built model. Corresponding to supernet in NAS algorithm. distiller (VALID_DISTILLER_TYPE): Configs to build a distiller. - fix_subnet (str | dict | :obj:`FixSubnet`): The path of yaml file or - loaded dict or built :obj:`FixSubnet`. Defaults to None. data_preprocessor (Optional[Union[dict, nn.Module]]): The pre-process config of :class:`BaseDataPreprocessor`. Defaults to None. strategy (list): mode of sampled net. + Defaults to ['max', 'min', 'arch_random']. arch_start_train (int): Number of iter to start arch training. - arch_train_freq (int): Frequency of training. Defaults to 500. + Defaults to ['max', 'min', 'arch_random']. + arch_train_freq (int): Frequency of training. + Defaults to 500. distillation_times (int): Number of iter to start arch training. + Defaults to 20000. target_flops (int): Target FLOPs. Default unit: MFLOPs. + Defaults to 150. flops_loss_type (str): The model used to calculate flops_loss. + Defaults to `log_l1`. flop_loss_weight (float): Weight of flops_loss. + Defaults to 1.0. init_cfg (Optional[dict]): Init config for ``BaseModule``. Defaults to None. """ @@ -56,7 +59,6 @@ def __init__(self, mutator_cfg: Union[Dict, DMCPChannelMutator] = dict( type=' DMCPChannelMutator', channel_unit_cfg=dict(type='DMCPChannelUnit')), - fix_subnet: Optional[ValidFixMutable] = None, data_preprocessor: Optional[Union[Dict, nn.Module]] = None, strategy: List = ['max', 'min', 'arch_random'], init_cfg: Optional[Dict] = None, @@ -86,20 +88,6 @@ def __init__(self, self.distiller.prepare_from_teacher(self.architecture) self.distiller.prepare_from_student(self.architecture) - if fix_subnet: - self._load_fix_subnet(fix_subnet) - self.is_supernet = False - else: - self.is_supernet = True - - def _load_fix_subnet(self, save_path): - """Load sub-network structure and fix.""" - from mmrazor.structures import load_fix_subnet - with open(save_path) as file: - self.mutator.set_choices( - yaml.load(file.read(), Loader=yaml.FullLoader)) - load_fix_subnet(self.architecture, save_path) - def _build_distiller( self, distiller: VALID_DISTILLER_TYPE) -> ConfigurableDistiller: """Build distiller.""" @@ -126,106 +114,99 @@ def train_step(self, data: List[dict], self._iter > self.arch_start_train: self.arch_train = True - if self.is_supernet: - - def distill_step( + def distill_step( batch_inputs: torch.Tensor, data_samples: List[BaseDataElement] - ) -> Dict[str, torch.Tensor]: - subnet_losses = dict() + ) -> Dict[str, torch.Tensor]: + subnet_losses = dict() + with optim_wrapper['architecture'].optim_context( + self), self.distiller.student_recorders: # type: ignore + hard_loss = self(batch_inputs, data_samples, mode='loss') + subnet_losses.update(hard_loss) + + if self._iter > self.distillation_times: + soft_loss = self.distiller.compute_distill_losses() + subnet_losses.update(soft_loss) + + parsed_subnet_losses, _ = self.parse_losses(subnet_losses) + optim_wrapper['architecture'].update_params( + parsed_subnet_losses) + + return subnet_losses + + batch_inputs, data_samples = self.data_preprocessor(data, + True).values() + + total_losses = dict() + # update model parameters + max_net_num = min_net_num = random_net_num = direct_net_num = 1 + for kind in self.strategy: + if kind in ('max'): + self.set_subnet(mode='max') with optim_wrapper['architecture'].optim_context( self - ), self.distiller.student_recorders: # type: ignore - hard_loss = self(batch_inputs, data_samples, mode='loss') - subnet_losses.update(hard_loss) - - if self._iter > self.distillation_times: - soft_loss = self.distiller.compute_distill_losses() - subnet_losses.update(soft_loss) - - parsed_subnet_losses, _ = self.parse_losses(subnet_losses) + ), self.distiller.teacher_recorders: # type: ignore + max_subnet_losses = self( + batch_inputs, data_samples, mode='loss') + parsed_max_subnet_losses, _ = self.parse_losses( + max_subnet_losses) optim_wrapper['architecture'].update_params( - parsed_subnet_losses) - - return subnet_losses - - batch_inputs, data_samples = self.data_preprocessor(data, - True).values() - - total_losses = dict() - # update model parameters - max_net_num = min_net_num = random_net_num = direct_net_num = 1 - for kind in self.strategy: - if kind in ('max'): - self.set_subnet(mode='max') - with optim_wrapper['architecture'].optim_context( - self - ), self.distiller.teacher_recorders: # type: ignore - max_subnet_losses = self( - batch_inputs, data_samples, mode='loss') - parsed_max_subnet_losses, _ = self.parse_losses( - max_subnet_losses) - optim_wrapper['architecture'].update_params( - parsed_max_subnet_losses) + parsed_max_subnet_losses) + total_losses.update( + add_prefix(max_subnet_losses, f'max_subnet{max_net_num}')) + max_net_num += 1 + elif kind in ('min'): + self.set_subnet(mode='min') + min_subnet_losses =\ + distill_step(batch_inputs, data_samples) + total_losses.update( + add_prefix(min_subnet_losses, f'min_subnet{min_net_num}')) + min_net_num += 1 + elif kind in ('arch_random'): + if self.arch_train: + self.set_subnet(mode='direct') + direct_subnet_losses = distill_step( + batch_inputs, data_samples) total_losses.update( - add_prefix(max_subnet_losses, - f'max_subnet{max_net_num}')) - max_net_num += 1 - elif kind in ('min'): - self.set_subnet(mode='min') - min_subnet_losses =\ - distill_step(batch_inputs, data_samples) + add_prefix(direct_subnet_losses, + f'direct_subnet{direct_net_num}')) + direct_net_num += 1 + else: + self.set_subnet(mode='random') + random_subnet_losses = distill_step( + batch_inputs, data_samples) total_losses.update( - add_prefix(min_subnet_losses, - f'min_subnet{min_net_num}')) - min_net_num += 1 - elif kind in ('arch_random'): - if self.arch_train: - self.set_subnet(mode='direct') - direct_subnet_losses = distill_step( - batch_inputs, data_samples) - total_losses.update( - add_prefix(direct_subnet_losses, - f'direct_subnet{direct_net_num}')) - direct_net_num += 1 - else: - self.set_subnet(mode='random') - random_subnet_losses = distill_step( - batch_inputs, data_samples) - total_losses.update( - add_prefix(random_subnet_losses, - f'random_subnet{random_net_num}')) - random_net_num += 1 - elif kind in ('scheduled_random'): - if random.uniform(0, 1) > self.cur_sample_prob\ - and self.arch_train: - self.set_subnet(mode='direct') - direct_subnet_losses = distill_step( - batch_inputs, data_samples) - total_losses.update( - add_prefix(direct_subnet_losses, - f'direct_subnet{direct_net_num}')) - direct_net_num += 1 - else: - self.set_subnet(mode='random') - random_subnet_losses = distill_step( - batch_inputs, data_samples) - total_losses.update( - add_prefix(random_subnet_losses, - f'random_subnet{random_net_num}')) - random_net_num += 1 - self.cur_sample_prob *= 0.9999 - - # update arch parameters - if self.arch_train \ - and self._iter % self.arch_train_freq == 0: - with optim_wrapper['mutator'].optim_context(self): - optim_wrapper['mutator'].zero_grad() - mutator_loss = self._update_arch_params( - batch_inputs, data_samples, optim_wrapper, mode='loss') - total_losses.update(mutator_loss) - return total_losses - else: - return super().train_step(data, optim_wrapper) + add_prefix(random_subnet_losses, + f'random_subnet{random_net_num}')) + random_net_num += 1 + elif kind in ('scheduled_random'): + if random.uniform(0, 1) > self.cur_sample_prob\ + and self.arch_train: + self.set_subnet(mode='direct') + direct_subnet_losses = distill_step( + batch_inputs, data_samples) + total_losses.update( + add_prefix(direct_subnet_losses, + f'direct_subnet{direct_net_num}')) + direct_net_num += 1 + else: + self.set_subnet(mode='random') + random_subnet_losses = distill_step( + batch_inputs, data_samples) + total_losses.update( + add_prefix(random_subnet_losses, + f'random_subnet{random_net_num}')) + random_net_num += 1 + self.cur_sample_prob *= 0.9999 + + # update arch parameters + if self.arch_train \ + and self._iter % self.arch_train_freq == 0: + with optim_wrapper['mutator'].optim_context(self): + optim_wrapper['mutator'].zero_grad() + mutator_loss = self._update_arch_params( + batch_inputs, data_samples, optim_wrapper, mode='loss') + total_losses.update(mutator_loss) + return total_losses def _update_arch_params(self, inputs: torch.Tensor, @@ -349,103 +330,96 @@ def train_step(self, data: List[dict], self.module._iter > self.module.arch_start_train: self.module.arch_train = True - if self.module.is_supernet: - - def distill_step( + def distill_step( batch_inputs: torch.Tensor, data_samples: List[BaseDataElement] - ) -> Dict[str, torch.Tensor]: - subnet_losses = dict() + ) -> Dict[str, torch.Tensor]: + subnet_losses = dict() + with optim_wrapper['architecture'].optim_context( + self), self.module.distiller.student_recorders: + hard_loss = self(batch_inputs, data_samples, mode='loss') + subnet_losses.update(hard_loss) + if self.module._iter > self.module.distillation_times: + soft_loss = \ + self.module.distiller.compute_distill_losses() + subnet_losses.update(soft_loss) + + parsed_subnet_losses, _ = \ + self.module.parse_losses(subnet_losses) + optim_wrapper['architecture'].update_params( + parsed_subnet_losses) + + return subnet_losses + + batch_inputs, data_samples = self.module.data_preprocessor( + data, True).values() + + total_losses = dict() + # update model parameters + max_net_num = min_net_num = random_net_num = direct_net_num = 1 + for kind in self.module.strategy: + if kind in ('max'): + self.module.set_subnet(mode='max') with optim_wrapper['architecture'].optim_context( - self), self.module.distiller.student_recorders: - hard_loss = self(batch_inputs, data_samples, mode='loss') - subnet_losses.update(hard_loss) - if self.module._iter > self.module.distillation_times: - soft_loss = \ - self.module.distiller.compute_distill_losses() - subnet_losses.update(soft_loss) - - parsed_subnet_losses, _ = \ - self.module.parse_losses(subnet_losses) + self + ), self.module.distiller.teacher_recorders: # type: ignore + max_subnet_losses = self( + batch_inputs, data_samples, mode='loss') + parsed_max_subnet_losses, _ = self.module.parse_losses( + max_subnet_losses) optim_wrapper['architecture'].update_params( - parsed_subnet_losses) - - return subnet_losses - - batch_inputs, data_samples = self.module.data_preprocessor( - data, True).values() - - total_losses = dict() - # update model parameters - max_net_num = min_net_num = random_net_num = direct_net_num = 1 - for kind in self.module.strategy: - if kind in ('max'): - self.module.set_subnet(mode='max') - with optim_wrapper['architecture'].optim_context( - self - ), self.module.distiller.teacher_recorders: # type: ignore - max_subnet_losses = self( - batch_inputs, data_samples, mode='loss') - parsed_max_subnet_losses, _ = self.module.parse_losses( - max_subnet_losses) - optim_wrapper['architecture'].update_params( - parsed_max_subnet_losses) + parsed_max_subnet_losses) + total_losses.update( + add_prefix(max_subnet_losses, f'max_subnet{max_net_num}')) + max_net_num += 1 + elif kind in ('min'): + self.module.set_subnet(mode='min') + min_subnet_losses = distill_step(batch_inputs, data_samples) + total_losses.update( + add_prefix(min_subnet_losses, f'min_subnet{min_net_num}')) + min_net_num += 1 + elif kind in ('arch_random'): + if self.module.arch_train: + self.module.set_subnet(mode='direct') + direct_subnet_losses = distill_step( + batch_inputs, data_samples) total_losses.update( - add_prefix(max_subnet_losses, - f'max_subnet{max_net_num}')) - max_net_num += 1 - elif kind in ('min'): - self.module.set_subnet(mode='min') - min_subnet_losses = distill_step(batch_inputs, - data_samples) + add_prefix(direct_subnet_losses, + f'direct_subnet{direct_net_num}')) + direct_net_num += 1 + else: + self.module.set_subnet(mode='random') + random_subnet_losses = distill_step( + batch_inputs, data_samples) total_losses.update( - add_prefix(min_subnet_losses, - f'min_subnet{min_net_num}')) - min_net_num += 1 - elif kind in ('arch_random'): - if self.module.arch_train: - self.module.set_subnet(mode='direct') - direct_subnet_losses = distill_step( - batch_inputs, data_samples) - total_losses.update( - add_prefix(direct_subnet_losses, - f'direct_subnet{direct_net_num}')) - direct_net_num += 1 - else: - self.module.set_subnet(mode='random') - random_subnet_losses = distill_step( - batch_inputs, data_samples) - total_losses.update( - add_prefix(random_subnet_losses, - f'random_subnet{random_net_num}')) - random_net_num += 1 - elif kind in ('scheduled_random'): - if random.uniform(0, 1) > self.module.cur_sample_prob\ - and self.module.arch_train: - self.module.set_subnet(mode='direct') - direct_subnet_losses = distill_step( - batch_inputs, data_samples) - total_losses.update( - add_prefix(direct_subnet_losses, - f'direct_subnet{direct_net_num}')) - direct_net_num += 1 - else: - self.module.set_subnet(mode='random') - random_subnet_losses = distill_step( - batch_inputs, data_samples) - total_losses.update( - add_prefix(random_subnet_losses, - f'random_subnet{random_net_num}')) - random_net_num += 1 - self.module.cur_sample_prob *= 0.9999 - - # update arch parameters - if self.module.arch_train \ - and self.module._iter % self.module.arch_train_freq == 0: - with optim_wrapper['mutator'].optim_context(self): - optim_wrapper['mutator'].zero_grad() - mutator_loss = self.module._update_arch_params( - batch_inputs, data_samples, optim_wrapper, mode='loss') - total_losses.update(mutator_loss) - return total_losses - else: - return super().train_step(data, optim_wrapper) + add_prefix(random_subnet_losses, + f'random_subnet{random_net_num}')) + random_net_num += 1 + elif kind in ('scheduled_random'): + if random.uniform(0, 1) > self.module.cur_sample_prob\ + and self.module.arch_train: + self.module.set_subnet(mode='direct') + direct_subnet_losses = distill_step( + batch_inputs, data_samples) + total_losses.update( + add_prefix(direct_subnet_losses, + f'direct_subnet{direct_net_num}')) + direct_net_num += 1 + else: + self.module.set_subnet(mode='random') + random_subnet_losses = distill_step( + batch_inputs, data_samples) + total_losses.update( + add_prefix(random_subnet_losses, + f'random_subnet{random_net_num}')) + random_net_num += 1 + self.module.cur_sample_prob *= 0.9999 + + # update arch parameters + if self.module.arch_train \ + and self.module._iter % self.module.arch_train_freq == 0: + with optim_wrapper['mutator'].optim_context(self): + optim_wrapper['mutator'].zero_grad() + mutator_loss = self.module._update_arch_params( + batch_inputs, data_samples, optim_wrapper, mode='loss') + total_losses.update(mutator_loss) + return total_losses diff --git a/mmrazor/models/mutables/mutable_channel/units/dmcp_channel_unit.py b/mmrazor/models/mutables/mutable_channel/units/dmcp_channel_unit.py index f069a7f7e..144127420 100644 --- a/mmrazor/models/mutables/mutable_channel/units/dmcp_channel_unit.py +++ b/mmrazor/models/mutables/mutable_channel/units/dmcp_channel_unit.py @@ -13,6 +13,11 @@ class DMCPChannelUnit(SequentialMutableChannelUnit): OneShotMutableChannelUnit. In DMCP supernet, each module only has one choice. The channel choice is fixed before training. + Note: + In dmcpunit, a new attribute `activated_tensor_channels` is defined + in self.mutable_channel, which is specifically used to store the number + of channels in the form of tensor. Defaults to None. + Args: num_channels (int): The raw number of channels. choice_mode (str, optional): Mode of candidates. diff --git a/mmrazor/registry/registry.py b/mmrazor/registry/registry.py index d3a5c5423..16aff7d63 100644 --- a/mmrazor/registry/registry.py +++ b/mmrazor/registry/registry.py @@ -113,7 +113,8 @@ def sub_model(cfg, prefix: str = '', extra_prefix: str = '', init_weight_from_supernet: bool = False, - init_cfg: Optional[Dict] = None): + init_cfg: Optional[Dict] = None, + **kwargs): model = MODELS.build(cfg) # Save path type cfg process, set init_cfg directly. if init_cfg: diff --git a/mmrazor/structures/subnet/fix_subnet.py b/mmrazor/structures/subnet/fix_subnet.py index 803d31b6a..07447ad40 100644 --- a/mmrazor/structures/subnet/fix_subnet.py +++ b/mmrazor/structures/subnet/fix_subnet.py @@ -124,7 +124,8 @@ def _load_fix_subnet_by_mutator(model: nn.Module, mutator_cfg: Dict) -> None: def export_fix_subnet( model: nn.Module, export_subnet_mode: str = 'mutable', - slice_weight: bool = False) -> Tuple[FixMutable, Optional[Dict]]: + slice_weight: bool = False, + export_channel: bool = True) -> Tuple[FixMutable, Optional[Dict]]: """Export subnet that can be loaded by :func:`load_fix_subnet`. Include subnet structure and subnet weight. @@ -134,6 +135,9 @@ def export_fix_subnet( Export by `mutable.dump_chosen()` when set to 'mutable' (NAS) Export by `mutator.config_template()` when set to 'mutator' (Prune) slice_weight (bool): Export subnet weight. Default to False. + export_channel (bool): Whether to export the mutator's channel. + Often required when finetune is needed for the exported subnet. + Default to True. Return: fix_subnet (ValidFixMutable): Exported subnet choice config. @@ -144,7 +148,7 @@ def export_fix_subnet( if export_subnet_mode == 'mutable': fix_subnet = _export_subnet_by_mutable(model) elif export_subnet_mode == 'mutator': - fix_subnet = _export_subnet_by_mutator(model) + fix_subnet = _export_subnet_by_mutator(model, export_channel) else: raise ValueError(f'Invalid export_subnet_mode {export_subnet_mode}, ' 'only mutable or mutator is supported.') @@ -192,12 +196,12 @@ def module_dump_chosen(module, fix_subnet): return fix_subnet -def _export_subnet_by_mutator(model: nn.Module) -> Dict: +def _export_subnet_by_mutator(model: nn.Module, export_channel: bool) -> Dict: if not hasattr(model, 'mutator'): raise ValueError('model should contain `mutator` attribute, but got ' f'{type(model)} model') fix_subnet = model.mutator.config_template( - with_channels=False, with_unit_init_args=True) + with_channels=export_channel, with_unit_init_args=True) return fix_subnet diff --git a/tests/data/MBV2_slimmable_config.json b/tests/data/MBV2_slimmable_config.json index 9010b83e2..5b9a5573a 100644 --- a/tests/data/MBV2_slimmable_config.json +++ b/tests/data/MBV2_slimmable_config.json @@ -1,377 +1,396 @@ { - "backbone.conv1.conv_(0, 48)_48": { - "init_args": { - "num_channels": 48, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 8, - 8, - 32 - ], - "choice_mode": "number" + "type":"OneShotChannelMutator", + "channel_unit_cfg":{ + "type":"OneShotMutableChannelUnit", + "default_args":{ + "choice_mode":"number" }, - "choice": 32 + "units":{ + "backbone.conv1.conv_(0, 48)_48": { + "init_args": { + "num_channels": 48, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 8, + 8, + 32 + ], + "choice_mode": "number" + }, + "choice": 32 + }, + "backbone.layer1.0.conv.1.conv_(0, 24)_24": { + "init_args": { + "num_channels": 24, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 8, + 8, + 16 + ], + "choice_mode": "number" + }, + "choice": 16 + }, + "backbone.layer2.0.conv.0.conv_(0, 144)_144": { + "init_args": { + "num_channels": 144, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 96, + 96, + 144 + ], + "choice_mode": "number" + }, + "choice": 144 + }, + "backbone.layer2.0.conv.2.conv_(0, 40)_40": { + "init_args": { + "num_channels": 40, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 16, + 16, + 24 + ], + "choice_mode": "number" + }, + "choice": 24 + }, + "backbone.layer2.1.conv.0.conv_(0, 240)_240": { + "init_args": { + "num_channels": 240, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 96, + 96, + 176 + ], + "choice_mode": "number" + }, + "choice": 176 + }, + "backbone.layer3.0.conv.0.conv_(0, 240)_240": { + "init_args": { + "num_channels": 240, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 96, + 96, + 192 + ], + "choice_mode": "number" + }, + "choice": 192 + }, + "backbone.layer3.0.conv.2.conv_(0, 48)_48": { + "init_args": { + "num_channels": 48, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 24, + 24, + 48 + ], + "choice_mode": "number" + }, + "choice": 48 + }, + "backbone.layer3.1.conv.0.conv_(0, 288)_288": { + "init_args": { + "num_channels": 288, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 144, + 144, + 240 + ], + "choice_mode": "number" + }, + "choice": 240 + }, + "backbone.layer3.2.conv.0.conv_(0, 288)_288": { + "init_args": { + "num_channels": 288, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 144, + 144, + 144 + ], + "choice_mode": "number" + }, + "choice": 144 + }, + "backbone.layer4.0.conv.0.conv_(0, 288)_288": { + "init_args": { + "num_channels": 288, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 144, + 144, + 264 + ], + "choice_mode": "number" + }, + "choice": 264 + }, + "backbone.layer4.0.conv.2.conv_(0, 96)_96": { + "init_args": { + "num_channels": 96, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 48, + 56, + 88 + ], + "choice_mode": "number" + }, + "choice": 88 + }, + "backbone.layer4.1.conv.0.conv_(0, 576)_576": { + "init_args": { + "num_channels": 576, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 288, + 288, + 288 + ], + "choice_mode": "number" + }, + "choice": 288 + }, + "backbone.layer4.2.conv.0.conv_(0, 576)_576": { + "init_args": { + "num_channels": 576, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 288, + 288, + 336 + ], + "choice_mode": "number" + }, + "choice": 336 + }, + "backbone.layer4.3.conv.0.conv_(0, 576)_576": { + "init_args": { + "num_channels": 576, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 288, + 288, + 432 + ], + "choice_mode": "number" + }, + "choice": 432 + }, + "backbone.layer5.0.conv.0.conv_(0, 576)_576": { + "init_args": { + "num_channels": 576, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 288, + 288, + 576 + ], + "choice_mode": "number" + }, + "choice": 576 + }, + "backbone.layer5.0.conv.2.conv_(0, 144)_144": { + "init_args": { + "num_channels": 144, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 64, + 96, + 144 + ], + "choice_mode": "number" + }, + "choice": 144 + }, + "backbone.layer5.1.conv.0.conv_(0, 864)_864": { + "init_args": { + "num_channels": 864, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 432, + 432, + 576 + ], + "choice_mode": "number" + }, + "choice": 576 + }, + "backbone.layer5.2.conv.0.conv_(0, 864)_864": { + "init_args": { + "num_channels": 864, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 432, + 432, + 648 + ], + "choice_mode": "number" + }, + "choice": 648 + }, + "backbone.layer6.0.conv.0.conv_(0, 864)_864": { + "init_args": { + "num_channels": 864, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 648, + 864, + 864 + ], + "choice_mode": "number" + }, + "choice": 864 + }, + "backbone.layer6.0.conv.2.conv_(0, 240)_240": { + "init_args": { + "num_channels": 240, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 176, + 240, + 240 + ], + "choice_mode": "number" + }, + "choice": 240 + }, + "backbone.layer6.1.conv.0.conv_(0, 1440)_1440": { + "init_args": { + "num_channels": 1440, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 720, + 1440, + 1440 + ], + "choice_mode": "number" + }, + "choice": 1440 + }, + "backbone.layer6.2.conv.0.conv_(0, 1440)_1440": { + "init_args": { + "num_channels": 1440, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 720, + 960, + 1440 + ], + "choice_mode": "number" + }, + "choice": 1440 + }, + "backbone.layer7.0.conv.0.conv_(0, 1440)_1440": { + "init_args": { + "num_channels": 1440, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 1440, + 1440, + 1440 + ], + "choice_mode": "number" + }, + "choice": 1440 + }, + "backbone.layer7.0.conv.2.conv_(0, 480)_480": { + "init_args": { + "num_channels": 480, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 280, + 480, + 480 + ], + "choice_mode": "number" + }, + "choice": 480 + }, + "backbone.conv2.conv_(0, 1920)_1920": { + "init_args": { + "num_channels": 1920, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 1920, + 1920, + 1920 + ], + "choice_mode": "number" + }, + "choice": 1920 + } + } }, - "backbone.layer1.0.conv.1.conv_(0, 24)_24": { - "init_args": { - "num_channels": 24, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 8, - 8, - 16 - ], - "choice_mode": "number" - }, - "choice": 16 - }, - "backbone.layer2.0.conv.0.conv_(0, 144)_144": { - "init_args": { - "num_channels": 144, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 96, - 96, - 144 - ], - "choice_mode": "number" - }, - "choice": 144 - }, - "backbone.layer2.0.conv.2.conv_(0, 40)_40": { - "init_args": { - "num_channels": 40, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 16, - 16, - 24 - ], - "choice_mode": "number" - }, - "choice": 24 - }, - "backbone.layer2.1.conv.0.conv_(0, 240)_240": { - "init_args": { - "num_channels": 240, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 96, - 96, - 176 - ], - "choice_mode": "number" - }, - "choice": 176 - }, - "backbone.layer3.0.conv.0.conv_(0, 240)_240": { - "init_args": { - "num_channels": 240, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 96, - 96, - 192 - ], - "choice_mode": "number" - }, - "choice": 192 - }, - "backbone.layer3.0.conv.2.conv_(0, 48)_48": { - "init_args": { - "num_channels": 48, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 24, - 24, - 48 - ], - "choice_mode": "number" - }, - "choice": 48 - }, - "backbone.layer3.1.conv.0.conv_(0, 288)_288": { - "init_args": { - "num_channels": 288, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 144, - 144, - 240 - ], - "choice_mode": "number" - }, - "choice": 240 - }, - "backbone.layer3.2.conv.0.conv_(0, 288)_288": { - "init_args": { - "num_channels": 288, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 144, - 144, - 144 - ], - "choice_mode": "number" - }, - "choice": 144 - }, - "backbone.layer4.0.conv.0.conv_(0, 288)_288": { - "init_args": { - "num_channels": 288, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 144, - 144, - 264 - ], - "choice_mode": "number" - }, - "choice": 264 - }, - "backbone.layer4.0.conv.2.conv_(0, 96)_96": { - "init_args": { - "num_channels": 96, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 48, - 56, - 88 - ], - "choice_mode": "number" - }, - "choice": 88 - }, - "backbone.layer4.1.conv.0.conv_(0, 576)_576": { - "init_args": { - "num_channels": 576, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 288, - 288, - 288 - ], - "choice_mode": "number" - }, - "choice": 288 - }, - "backbone.layer4.2.conv.0.conv_(0, 576)_576": { - "init_args": { - "num_channels": 576, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 288, - 288, - 336 - ], - "choice_mode": "number" - }, - "choice": 336 - }, - "backbone.layer4.3.conv.0.conv_(0, 576)_576": { - "init_args": { - "num_channels": 576, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 288, - 288, - 432 - ], - "choice_mode": "number" - }, - "choice": 432 - }, - "backbone.layer5.0.conv.0.conv_(0, 576)_576": { - "init_args": { - "num_channels": 576, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 288, - 288, - 576 - ], - "choice_mode": "number" - }, - "choice": 576 - }, - "backbone.layer5.0.conv.2.conv_(0, 144)_144": { - "init_args": { - "num_channels": 144, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 64, - 96, - 144 - ], - "choice_mode": "number" - }, - "choice": 144 - }, - "backbone.layer5.1.conv.0.conv_(0, 864)_864": { - "init_args": { - "num_channels": 864, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 432, - 432, - 576 - ], - "choice_mode": "number" - }, - "choice": 576 - }, - "backbone.layer5.2.conv.0.conv_(0, 864)_864": { - "init_args": { - "num_channels": 864, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 432, - 432, - 648 - ], - "choice_mode": "number" - }, - "choice": 648 - }, - "backbone.layer6.0.conv.0.conv_(0, 864)_864": { - "init_args": { - "num_channels": 864, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 648, - 864, - 864 - ], - "choice_mode": "number" - }, - "choice": 864 - }, - "backbone.layer6.0.conv.2.conv_(0, 240)_240": { - "init_args": { - "num_channels": 240, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 176, - 240, - 240 - ], - "choice_mode": "number" - }, - "choice": 240 - }, - "backbone.layer6.1.conv.0.conv_(0, 1440)_1440": { - "init_args": { - "num_channels": 1440, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 720, - 1440, - 1440 - ], - "choice_mode": "number" - }, - "choice": 1440 - }, - "backbone.layer6.2.conv.0.conv_(0, 1440)_1440": { - "init_args": { - "num_channels": 1440, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 720, - 960, - 1440 - ], - "choice_mode": "number" - }, - "choice": 1440 - }, - "backbone.layer7.0.conv.0.conv_(0, 1440)_1440": { - "init_args": { - "num_channels": 1440, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 1440, - 1440, - 1440 - ], - "choice_mode": "number" - }, - "choice": 1440 - }, - "backbone.layer7.0.conv.2.conv_(0, 480)_480": { - "init_args": { - "num_channels": 480, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 280, - 480, - 480 - ], - "choice_mode": "number" - }, - "choice": 480 - }, - "backbone.conv2.conv_(0, 1920)_1920": { - "init_args": { - "num_channels": 1920, - "divisor": 1, - "min_value": 1, - "min_ratio": 0.9, - "candidate_choices": [ - 1920, - 1920, - 1920 - ], - "choice_mode": "number" - }, - "choice": 1920 + "parse_cfg":{ + "type":"ChannelAnalyzer", + "demo_input":[ + 1, + 3, + 224, + 224 + ], + "tracer_type":"BackwardTracer" } } \ No newline at end of file diff --git a/tests/data/test_registry/subnet.json b/tests/data/test_registry/subnet.json index 4fe63bda2..531d6af46 100644 --- a/tests/data/test_registry/subnet.json +++ b/tests/data/test_registry/subnet.json @@ -14,6 +14,102 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.bn1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.conv1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.bn2", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.1.conv1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.1.bn2", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.conv1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.downsample.0", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.conv1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.bn1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.conv2", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.bn2", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.1.conv2", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.1.bn2", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, "choice":1.0 }, "backbone.layer1.0.conv1_(0, 64)_64":{ @@ -24,6 +120,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer1.0.bn1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.0.conv2", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer1.0.conv1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.0.bn1", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, "choice":0.640625 }, "backbone.layer1.1.conv1_(0, 64)_64":{ @@ -34,6 +160,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer1.1.bn1", + "start":0, + "end":64, + "is_output_channel":false + }, + { + "name":"backbone.layer1.1.conv2", + "start":0, + "end":64, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer1.1.conv1", + "start":0, + "end":64, + "is_output_channel":true + }, + { + "name":"backbone.layer1.1.bn1", + "start":0, + "end":64, + "is_output_channel":true + } + ] + }, "choice":0.640625 }, "backbone.layer2.0.conv1_(0, 128)_128":{ @@ -44,6 +200,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.0.bn1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.conv2", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.0.conv1", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.bn1", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, "choice":0.6484375 }, "backbone.layer2.0.conv2_(0, 128)_128":{ @@ -54,6 +240,96 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.0.bn2", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.0.downsample.1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.conv1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.bn2", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.conv1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.downsample.0", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.0.conv2", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.bn2", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.downsample.0", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.0.downsample.1", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.conv2", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.bn2", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, "choice":0.59375 }, "backbone.layer2.1.conv1_(0, 128)_128":{ @@ -64,6 +340,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer2.1.bn1", + "start":0, + "end":128, + "is_output_channel":false + }, + { + "name":"backbone.layer2.1.conv2", + "start":0, + "end":128, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer2.1.conv1", + "start":0, + "end":128, + "is_output_channel":true + }, + { + "name":"backbone.layer2.1.bn1", + "start":0, + "end":128, + "is_output_channel":true + } + ] + }, "choice":0.6484375 }, "backbone.layer3.0.conv1_(0, 256)_256":{ @@ -74,6 +380,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.0.bn1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.conv2", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.0.conv1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.bn1", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, "choice":0.6484375 }, "backbone.layer3.0.conv2_(0, 256)_256":{ @@ -84,6 +420,96 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.0.bn2", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.0.downsample.1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.conv1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.bn2", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.conv1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.downsample.0", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.0.conv2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.bn2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.downsample.0", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.0.downsample.1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.conv2", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.bn2", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, "choice":0.59765625 }, "backbone.layer3.1.conv1_(0, 256)_256":{ @@ -94,6 +520,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer3.1.bn1", + "start":0, + "end":256, + "is_output_channel":false + }, + { + "name":"backbone.layer3.1.conv2", + "start":0, + "end":256, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer3.1.conv1", + "start":0, + "end":256, + "is_output_channel":true + }, + { + "name":"backbone.layer3.1.bn1", + "start":0, + "end":256, + "is_output_channel":true + } + ] + }, "choice":0.6484375 }, "backbone.layer4.0.conv1_(0, 512)_512":{ @@ -104,6 +560,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.0.bn1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.conv2", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.0.conv1", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.bn1", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, "choice":0.69921875 }, "backbone.layer4.0.conv2_(0, 512)_512":{ @@ -114,6 +600,90 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.0.bn2", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.0.downsample.1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"bind_placeholder", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.conv1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.bn2", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"head.fc", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.0.conv2", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.bn2", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.downsample.0", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.0.downsample.1", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"bind_placeholder", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.conv2", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.bn2", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, "choice":0.69921875 }, "backbone.layer4.1.conv1_(0, 512)_512":{ @@ -124,6 +694,36 @@ "min_value":1, "min_ratio":0.9 }, + "channels":{ + "input_related":[ + { + "name":"backbone.layer4.1.bn1", + "start":0, + "end":512, + "is_output_channel":false + }, + { + "name":"backbone.layer4.1.conv2", + "start":0, + "end":512, + "is_output_channel":false + } + ], + "output_related":[ + { + "name":"backbone.layer4.1.conv1", + "start":0, + "end":512, + "is_output_channel":true + }, + { + "name":"backbone.layer4.1.bn1", + "start":0, + "end":512, + "is_output_channel":true + } + ] + }, "choice":0.69921875 } } diff --git a/tests/test_models/test_algorithms/test_autoformer.py b/tests/test_models/test_algorithms/test_autoformer.py index edcafd8f5..2baa703fe 100644 --- a/tests/test_models/test_algorithms/test_autoformer.py +++ b/tests/test_models/test_algorithms/test_autoformer.py @@ -40,8 +40,7 @@ ALGORITHM_CFG = dict( type='mmrazor.Autoformer', architecture=ARCHITECTURE_CFG, - mutator=MUTATOR_CFG, - fix_subnet=None) + mutator=MUTATOR_CFG) class TestAutoFormer(TestCase): @@ -57,9 +56,6 @@ def test_init(self): random_subnet = autoformer_algo.mutator.sample_choices() self.assertIsInstance(random_subnet, dict) - # autoformer_algo support training - self.assertTrue(autoformer_algo.is_supernet) - # initiate autoformer without any `mutator`. ALGORITHM_CFG_SUPERNET.pop('type') ALGORITHM_CFG_SUPERNET['mutator'] = None diff --git a/tests/test_models/test_algorithms/test_bignas.py b/tests/test_models/test_algorithms/test_bignas.py index 5a844fc7a..41ce4673d 100644 --- a/tests/test_models/test_algorithms/test_bignas.py +++ b/tests/test_models/test_algorithms/test_bignas.py @@ -94,9 +94,6 @@ def test_init(self): random_subnet = bignas_algo.mutator.sample_choices() self.assertIsInstance(random_subnet, dict) - # bignas_algo support training - self.assertTrue(bignas_algo.is_supernet) - # initiate bignas without any `mutator`. ALGORITHM_CFG_SUPERNET.pop('type') ALGORITHM_CFG_SUPERNET['mutator'] = None diff --git a/tests/test_models/test_algorithms/test_darts.py b/tests/test_models/test_algorithms/test_darts.py index efca993b7..8360c1a3a 100644 --- a/tests/test_models/test_algorithms/test_darts.py +++ b/tests/test_models/test_algorithms/test_darts.py @@ -103,14 +103,8 @@ def test_init(self) -> None: algo = Darts(model, mutator) self.assertIsInstance(algo.mutator, NasMutator) - # initiate darts when `fix_subnet` is not None - fix_subnet = { - 'normal': { - 'chosen': ['torch_conv2d_3x3', 'torch_conv2d_7x7'] - } - } - algo = Darts(model, mutator, fix_subnet=fix_subnet) - self.assertEqual(algo.architecture.mutable.num_choices, 2) + algo = Darts(model, mutator) + self.assertEqual(algo.architecture.mutable.num_choices, 3) # initiate darts with error type `mutator` with self.assertRaisesRegex(TypeError, 'mutator should be'): @@ -129,16 +123,6 @@ def test_forward_loss(self) -> None: loss = algo(inputs, mode='loss') self.assertIsInstance(loss, dict) - # subnet - fix_subnet = { - 'normal': { - 'chosen': ['torch_conv2d_3x3', 'torch_conv2d_7x7'] - } - } - algo = Darts(model, fix_subnet=fix_subnet) - loss = algo(inputs, mode='loss') - self.assertIsInstance(loss, dict) - def _prepare_fake_data(self) -> Dict: imgs = torch.randn(16, 3, 224, 224).to(self.device) data_samples = [ diff --git a/tests/test_models/test_algorithms/test_dmcp.py b/tests/test_models/test_algorithms/test_dmcp.py index 044dea9d0..5ec199b20 100644 --- a/tests/test_models/test_algorithms/test_dmcp.py +++ b/tests/test_models/test_algorithms/test_dmcp.py @@ -73,9 +73,6 @@ def test_init(self): # dmcp mutators include channel_mutator and value_mutator assert isinstance(dmcp_algo.mutator, DMCPChannelMutator) - # dmcp_algo support training - self.assertTrue(dmcp_algo.is_supernet) - ALGORITHM_CFG_SUPERNET.pop('type') fake_distiller = 'distiller' # initiate dmcp without `distiller`. @@ -96,7 +93,6 @@ def test_loss(self): # subernet inputs = torch.randn(1, 3, 224, 224) dmcp = MODELS.build(ALGORITHM_CFG) - dmcp.is_supernet = False loss = dmcp(inputs, mode='tensor') assert loss.size(1) == 1000 @@ -137,12 +133,6 @@ def test_dmcp_train_step(self): assert losses['random_subnet1.loss'] > 0 assert losses['random_subnet2.loss'] > 0 - def test_dmcp_load_fix_subnet(self): - ALGORITHM_CFG_SUPERNET = copy.deepcopy(ALGORITHM_CFG) - ALGORITHM_CFG_SUPERNET['fix_subnet'] = \ - 'configs/pruning/mmcls/dmcp/DMCP_R50_2G.yaml' - _ = MODELS.build(ALGORITHM_CFG_SUPERNET) - def test_dmcp_compute_flops_loss(self): dmcp = MODELS.build(ALGORITHM_CFG) for type in ['l2', 'inverted_log_l1', 'log_l1', 'l1']: diff --git a/tests/test_models/test_algorithms/test_dsnas.py b/tests/test_models/test_algorithms/test_dsnas.py index 423d27f42..f652a3faf 100644 --- a/tests/test_models/test_algorithms/test_dsnas.py +++ b/tests/test_models/test_algorithms/test_dsnas.py @@ -96,11 +96,6 @@ def test_init(self) -> None: algo = DSNAS(model, mutator) self.assertIsInstance(algo.mutator, NasMutator) - # initiate Dsnas when `fix_subnet` is not None - fix_subnet = {'mutable': {'chosen': 'torch_conv2d_5x5'}} - algo = DSNAS(model, mutator, fix_subnet=fix_subnet) - self.assertEqual(algo.architecture.mutable.num_choices, 1) - # initiate Dsnas with error type `mutator` with self.assertRaisesRegex(TypeError, 'mutator should be'): DSNAS(model, model) @@ -116,12 +111,6 @@ def test_forward_loss(self) -> None: loss = algo(inputs, mode='loss') self.assertIsInstance(loss, dict) - # subnet - fix_subnet = {'mutable': {'chosen': 'torch_conv2d_5x5'}} - algo = DSNAS(model, fix_subnet=fix_subnet) - loss = algo(inputs, mode='loss') - self.assertIsInstance(loss, dict) - def _prepare_fake_data(self): imgs = torch.randn(16, 3, 224, 224).to(self.device) data_samples = [ diff --git a/tests/test_models/test_algorithms/test_spos.py b/tests/test_models/test_algorithms/test_spos.py index 1a223b04a..9d606cfa6 100644 --- a/tests/test_models/test_algorithms/test_spos.py +++ b/tests/test_models/test_algorithms/test_spos.py @@ -57,11 +57,6 @@ def test_init(self): alg = SPOS(model, mutator) self.assertIsInstance(alg.mutator, NasMutator) - # initiate spos when `fix_subnet` is not None. - fix_subnet = {'mutable': {'chosen': 'conv1'}} - alg = SPOS(model, mutator, fix_subnet=fix_subnet) - self.assertEqual(alg.architecture.mutable.num_choices, 1) - # initiate spos with error type `mutator`. with self.assertRaisesRegex(TypeError, 'mutator should be'): SPOS(model, model) @@ -75,9 +70,3 @@ def test_forward_loss(self): alg = SPOS(model, mutator) loss = alg(inputs, mode='loss') self.assertIsInstance(loss, dict) - - # subnet - fix_subnet = {'mutable': {'chosen': 'conv1'}} - alg = SPOS(model, fix_subnet=fix_subnet) - loss = alg(inputs, mode='loss') - self.assertIsInstance(loss, dict) diff --git a/tests/test_registry/test_registry.py b/tests/test_registry/test_registry.py index 4830803a4..6652cb943 100644 --- a/tests/test_registry/test_registry.py +++ b/tests/test_registry/test_registry.py @@ -100,6 +100,8 @@ def test_build_subnet_prune_from_cfg_by_mutator(self): init_cfg=init_cfg) model = MODELS.build(model_cfg) self.assertTrue(isinstance(model, BaseModel)) + # make sure the model is pruned + assert model.backbone.layer1[0].conv1.weight.size()[0] == 41 def test_build_subnet_prune_from_cfg_by_mutable(self): mutator_cfg = fileio.load('tests/data/test_registry/subnet.json') From 20779c7c365d226e9ee878d152c683a981d1304f Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Wed, 8 Feb 2023 20:47:15 +0800 Subject: [PATCH 31/59] fix UT --- configs/nas/mmcls/autoformer/README.md | 2 +- ...ttentive_mobilenet_supernet_32xb64_in1k.py | 2 +- .../darts/darts_subnet_1xb96_cifar10_2.0.py | 6 +- .../darts_subnet_1xb96_cifar10_2.0_mmrazor.py | 6 +- .../mmcls/dsnas/dsnas_subnet_8xb128_in1k.py | 4 +- .../ofa_mobilenet_subnet_8xb256_in1k.py | 3 +- .../spos/spos_mobilenet_subnet_8xb128_in1k.py | 3 +- .../spos_shufflenet_subnet_8xb128_in1k.py | 3 +- .../detnas_frcnn_shufflenet_subnet_coco_1x.py | 3 +- .../detnas_shufflenet_subnet_8xb128_in1k.py | 4 +- mmrazor/engine/runner/subnet_val_loop.py | 4 +- mmrazor/structures/subnet/fix_subnet.py | 2 +- tests/data/MBV2_slimmable_channel_config.json | 362 ++++++++++++++++++ .../test_algorithms/test_slimmable_network.py | 2 +- 14 files changed, 380 insertions(+), 26 deletions(-) create mode 100644 tests/data/MBV2_slimmable_channel_config.json diff --git a/configs/nas/mmcls/autoformer/README.md b/configs/nas/mmcls/autoformer/README.md index 294463e9b..768d7e027 100644 --- a/configs/nas/mmcls/autoformer/README.md +++ b/configs/nas/mmcls/autoformer/README.md @@ -43,7 +43,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ ```bash CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh \ - configs/nas/mmcls/autoformer/autoformer_supernet_8xb128_in1k.py \ + configs/nas/mmcls/autoformer/autoformer_subnet_8xb128_in1k.py \ $STEP2_CKPT 1 --work-dir $WORK_DIR \ --cfg-options algorithm.mutable_cfg=$STEP2_SUBNET_YAML ``` diff --git a/configs/nas/mmcls/bignas/attentive_mobilenet_supernet_32xb64_in1k.py b/configs/nas/mmcls/bignas/attentive_mobilenet_supernet_32xb64_in1k.py index 3b44dc36f..303fea924 100644 --- a/configs/nas/mmcls/bignas/attentive_mobilenet_supernet_32xb64_in1k.py +++ b/configs/nas/mmcls/bignas/attentive_mobilenet_supernet_32xb64_in1k.py @@ -44,7 +44,7 @@ loss_kl=dict( preds_S=dict(recorder='fc', from_student=True), preds_T=dict(recorder='fc', from_student=False)))), - mutators=dict(type='mmrazor.NasMutator')) + mutator=dict(type='mmrazor.NasMutator')) model_wrapper_cfg = dict( type='mmrazor.BigNASDDP', diff --git a/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py b/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py index e1ea12dd6..ecc3dde30 100644 --- a/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py +++ b/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py @@ -30,10 +30,8 @@ cal_acc=True)) model = dict( - _scope_='mmrazor', - type='sub_model', + type='mmrazor.sub_model', cfg=supernet, - fix_subnet='configs/nas/mmcls/darts/DARTS_SUBNET_CIFAR_PAPER_ALIAS.yaml', - mode='mutator') + fix_subnet='configs/nas/mmcls/darts/DARTS_SUBNET_CIFAR_PAPER_ALIAS.yaml') find_unused_parameter = False diff --git a/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0_mmrazor.py b/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0_mmrazor.py index 0e3e6505d..b77179e43 100644 --- a/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0_mmrazor.py +++ b/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0_mmrazor.py @@ -30,10 +30,8 @@ cal_acc=True)) model = dict( - _scope_='mmrazor', - type='sub_model', + type='mmrazor.sub_model', cfg=supernet, - fix_subnet='configs/nas/mmcls/darts/DARTS_SUBNET_CIFAR_MMRAZOR_97.32.yaml', - mode='mutator') + fix_subnet='configs/nas/mmcls/darts/DARTS_SUBNET_CIFAR_MMRAZOR_97.32.yaml') find_unused_parameter = False diff --git a/configs/nas/mmcls/dsnas/dsnas_subnet_8xb128_in1k.py b/configs/nas/mmcls/dsnas/dsnas_subnet_8xb128_in1k.py index 3d52cfb62..a338ecb69 100644 --- a/configs/nas/mmcls/dsnas/dsnas_subnet_8xb128_in1k.py +++ b/configs/nas/mmcls/dsnas/dsnas_subnet_8xb128_in1k.py @@ -8,8 +8,8 @@ type='sub_model', cfg=supernet, fix_subnet= # noqa: E251 - 'configs/nas/mmcls/dsnas/DSNAS_SUBNET_IMAGENET_PAPER_ALIAS.yaml', # noqa: E501 - mode='mutator') + 'configs/nas/mmcls/dsnas/DSNAS_SUBNET_IMAGENET_PAPER_ALIAS.yaml' +) # noqa: E501 _base_.model = model_cfg diff --git a/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py b/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py index bbe27468d..fdf6aa6af 100644 --- a/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py @@ -6,8 +6,7 @@ _scope_='mmrazor', type='sub_model', cfg=supernet, - fix_subnet='configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT22.yaml', - mode='mutator') + fix_subnet='configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT22.yaml') _base_.model = model_cfg diff --git a/configs/nas/mmcls/spos/spos_mobilenet_subnet_8xb128_in1k.py b/configs/nas/mmcls/spos/spos_mobilenet_subnet_8xb128_in1k.py index 1e7a20d7e..37d9a767c 100644 --- a/configs/nas/mmcls/spos/spos_mobilenet_subnet_8xb128_in1k.py +++ b/configs/nas/mmcls/spos/spos_mobilenet_subnet_8xb128_in1k.py @@ -7,8 +7,7 @@ _scope_='mmrazor', type='sub_model', cfg=supernet, - fix_subnet='configs/nas/spos/AngleNAS_SHUFFLENETV2_IN1k_2.0.yaml', - mode='mutator') + fix_subnet='configs/nas/spos/AngleNAS_SHUFFLENETV2_IN1k_2.0.yaml') _base_.model = model_cfg diff --git a/configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py b/configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py index 3c344cfa1..5556476e6 100644 --- a/configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py +++ b/configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py @@ -7,8 +7,7 @@ _scope_='mmrazor', type='sub_model', cfg=supernet, - fix_subnet='configs/nas/mmcls/spos/SPOS_SUBNET.yaml', - mode='mutator') + fix_subnet='configs/nas/mmcls/spos/SPOS_SUBNET.yaml') _base_.model = model_cfg diff --git a/configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_subnet_coco_1x.py b/configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_subnet_coco_1x.py index afa02cb53..5c51903ce 100644 --- a/configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_subnet_coco_1x.py +++ b/configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_subnet_coco_1x.py @@ -7,8 +7,7 @@ _scope_='mmrazor', type='sub_model', cfg=supernet, - fix_subnet='configs/nas/mmdet/detnas/DETNAS_SUBNET.yaml', - mode='mutator') + fix_subnet='configs/nas/mmdet/detnas/DETNAS_SUBNET.yaml') _base_.model = model_cfg diff --git a/configs/nas/mmdet/detnas/detnas_shufflenet_subnet_8xb128_in1k.py b/configs/nas/mmdet/detnas/detnas_shufflenet_subnet_8xb128_in1k.py index d929bb83a..ea3dad587 100644 --- a/configs/nas/mmdet/detnas/detnas_shufflenet_subnet_8xb128_in1k.py +++ b/configs/nas/mmdet/detnas/detnas_shufflenet_subnet_8xb128_in1k.py @@ -8,8 +8,8 @@ type='sub_model', cfg=supernet, fix_subnet= # noqa: E251 - 'https://download.openmmlab.com/mmrazor/v1/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20220715-61d2e900_subnet_cfg_v1.yaml', # noqa: E501 - mode='mutator') + 'https://download.openmmlab.com/mmrazor/v1/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20220715-61d2e900_subnet_cfg_v1.yaml' # noqa: E501 +) _base_.model = model_cfg diff --git a/mmrazor/engine/runner/subnet_val_loop.py b/mmrazor/engine/runner/subnet_val_loop.py index 55e7c0c13..8e9176582 100644 --- a/mmrazor/engine/runner/subnet_val_loop.py +++ b/mmrazor/engine/runner/subnet_val_loop.py @@ -72,11 +72,11 @@ def run(self): elif hasattr(self.model, 'sample_kinds'): for kind in self.model.sample_kinds: if kind == 'max': - self.model.set_max_subnet() + self.model.mutator.set_max_choices() metrics = self._evaluate_once() all_metrics.update(add_prefix(metrics, 'max_subnet')) elif kind == 'min': - self.model.set_min_subnet() + self.model.mutator.set_min_choices() metrics = self._evaluate_once() all_metrics.update(add_prefix(metrics, 'min_subnet')) elif 'random' in kind: diff --git a/mmrazor/structures/subnet/fix_subnet.py b/mmrazor/structures/subnet/fix_subnet.py index 07447ad40..f000db62c 100644 --- a/mmrazor/structures/subnet/fix_subnet.py +++ b/mmrazor/structures/subnet/fix_subnet.py @@ -125,7 +125,7 @@ def export_fix_subnet( model: nn.Module, export_subnet_mode: str = 'mutable', slice_weight: bool = False, - export_channel: bool = True) -> Tuple[FixMutable, Optional[Dict]]: + export_channel: bool = False) -> Tuple[FixMutable, Optional[Dict]]: """Export subnet that can be loaded by :func:`load_fix_subnet`. Include subnet structure and subnet weight. diff --git a/tests/data/MBV2_slimmable_channel_config.json b/tests/data/MBV2_slimmable_channel_config.json new file mode 100644 index 000000000..4b9e421f3 --- /dev/null +++ b/tests/data/MBV2_slimmable_channel_config.json @@ -0,0 +1,362 @@ +{ + "backbone.conv1.conv_(0, 48)_48": { + "init_args": { + "num_channels": 48, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 8, + 8, + 32 + ], + "choice_mode": "number" + }, + "choice": 32 + }, + "backbone.layer1.0.conv.1.conv_(0, 24)_24": { + "init_args": { + "num_channels": 24, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 8, + 8, + 16 + ], + "choice_mode": "number" + }, + "choice": 16 + }, + "backbone.layer2.0.conv.0.conv_(0, 144)_144": { + "init_args": { + "num_channels": 144, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 96, + 96, + 144 + ], + "choice_mode": "number" + }, + "choice": 144 + }, + "backbone.layer2.0.conv.2.conv_(0, 40)_40": { + "init_args": { + "num_channels": 40, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 16, + 16, + 24 + ], + "choice_mode": "number" + }, + "choice": 24 + }, + "backbone.layer2.1.conv.0.conv_(0, 240)_240": { + "init_args": { + "num_channels": 240, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 96, + 96, + 176 + ], + "choice_mode": "number" + }, + "choice": 176 + }, + "backbone.layer3.0.conv.0.conv_(0, 240)_240": { + "init_args": { + "num_channels": 240, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 96, + 96, + 192 + ], + "choice_mode": "number" + }, + "choice": 192 + }, + "backbone.layer3.0.conv.2.conv_(0, 48)_48": { + "init_args": { + "num_channels": 48, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 24, + 24, + 48 + ], + "choice_mode": "number" + }, + "choice": 48 + }, + "backbone.layer3.1.conv.0.conv_(0, 288)_288": { + "init_args": { + "num_channels": 288, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 144, + 144, + 240 + ], + "choice_mode": "number" + }, + "choice": 240 + }, + "backbone.layer3.2.conv.0.conv_(0, 288)_288": { + "init_args": { + "num_channels": 288, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 144, + 144, + 144 + ], + "choice_mode": "number" + }, + "choice": 144 + }, + "backbone.layer4.0.conv.0.conv_(0, 288)_288": { + "init_args": { + "num_channels": 288, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 144, + 144, + 264 + ], + "choice_mode": "number" + }, + "choice": 264 + }, + "backbone.layer4.0.conv.2.conv_(0, 96)_96": { + "init_args": { + "num_channels": 96, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 48, + 56, + 88 + ], + "choice_mode": "number" + }, + "choice": 88 + }, + "backbone.layer4.1.conv.0.conv_(0, 576)_576": { + "init_args": { + "num_channels": 576, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 288, + 288, + 288 + ], + "choice_mode": "number" + }, + "choice": 288 + }, + "backbone.layer4.2.conv.0.conv_(0, 576)_576": { + "init_args": { + "num_channels": 576, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 288, + 288, + 336 + ], + "choice_mode": "number" + }, + "choice": 336 + }, + "backbone.layer4.3.conv.0.conv_(0, 576)_576": { + "init_args": { + "num_channels": 576, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 288, + 288, + 432 + ], + "choice_mode": "number" + }, + "choice": 432 + }, + "backbone.layer5.0.conv.0.conv_(0, 576)_576": { + "init_args": { + "num_channels": 576, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 288, + 288, + 576 + ], + "choice_mode": "number" + }, + "choice": 576 + }, + "backbone.layer5.0.conv.2.conv_(0, 144)_144": { + "init_args": { + "num_channels": 144, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 64, + 96, + 144 + ], + "choice_mode": "number" + }, + "choice": 144 + }, + "backbone.layer5.1.conv.0.conv_(0, 864)_864": { + "init_args": { + "num_channels": 864, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 432, + 432, + 576 + ], + "choice_mode": "number" + }, + "choice": 576 + }, + "backbone.layer5.2.conv.0.conv_(0, 864)_864": { + "init_args": { + "num_channels": 864, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 432, + 432, + 648 + ], + "choice_mode": "number" + }, + "choice": 648 + }, + "backbone.layer6.0.conv.0.conv_(0, 864)_864": { + "init_args": { + "num_channels": 864, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 648, + 864, + 864 + ], + "choice_mode": "number" + }, + "choice": 864 + }, + "backbone.layer6.0.conv.2.conv_(0, 240)_240": { + "init_args": { + "num_channels": 240, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 176, + 240, + 240 + ], + "choice_mode": "number" + }, + "choice": 240 + }, + "backbone.layer6.1.conv.0.conv_(0, 1440)_1440": { + "init_args": { + "num_channels": 1440, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 720, + 1440, + 1440 + ], + "choice_mode": "number" + }, + "choice": 1440 + }, + "backbone.layer6.2.conv.0.conv_(0, 1440)_1440": { + "init_args": { + "num_channels": 1440, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 720, + 960, + 1440 + ], + "choice_mode": "number" + }, + "choice": 1440 + }, + "backbone.layer7.0.conv.0.conv_(0, 1440)_1440": { + "init_args": { + "num_channels": 1440, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 1440, + 1440, + 1440 + ], + "choice_mode": "number" + }, + "choice": 1440 + }, + "backbone.layer7.0.conv.2.conv_(0, 480)_480": { + "init_args": { + "num_channels": 480, + "divisor": 1, + "min_value": 1, + "min_ratio": 0.9, + "candidate_choices": [ + 280, + 480, + 480 + ], + "choice_mode": "number" + }, + "choice": 480 + } +} \ No newline at end of file diff --git a/tests/test_models/test_algorithms/test_slimmable_network.py b/tests/test_models/test_algorithms/test_slimmable_network.py index 3f6fdba57..2402e2493 100644 --- a/tests/test_models/test_algorithms/test_slimmable_network.py +++ b/tests/test_models/test_algorithms/test_slimmable_network.py @@ -24,7 +24,7 @@ in_channels=1920, loss=dict(type='CrossEntropyLoss', loss_weight=1.0), topk=(1, 5))) -CHANNEL_CFG_PATH = 'tests/data/MBV2_slimmable_config.json' +CHANNEL_CFG_PATH = 'tests/data/MBV2_slimmable_channel_config.json' MUTATOR_CFG = dict( type='SlimmableChannelMutator', From 8f0658746a92dae4c50e24c5ce67bdbc9ccb6f8a Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Wed, 8 Feb 2023 20:54:26 +0800 Subject: [PATCH 32/59] fix bignas config --- .../nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py b/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py index 89d73dd71..2e2827cdd 100644 --- a/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py @@ -6,8 +6,7 @@ _scope_='mmrazor', type='sub_model', cfg=supernet, - fix_subnet='configs/nas/mmcls/bignas/ATTENTIVE_SUBNET_A6.yaml', - mode='mutator') + fix_subnet='configs/nas/mmcls/bignas/ATTENTIVE_SUBNET_A6.yaml') _base_.model = model_cfg From 630359f7a66b84b211979e11319af0bbda5a9e4e Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Wed, 8 Feb 2023 21:45:00 +0800 Subject: [PATCH 33/59] fix UT for dcff & registry --- tests/test_models/test_algorithms/test_dcff_network.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_models/test_algorithms/test_dcff_network.py b/tests/test_models/test_algorithms/test_dcff_network.py index 657d7a09b..397c0b3fa 100644 --- a/tests/test_models/test_algorithms/test_dcff_network.py +++ b/tests/test_models/test_algorithms/test_dcff_network.py @@ -281,7 +281,10 @@ def test_export_subnet(self): self.assertEqual(algorithm.step_freq, epoch_step * iter_per_epoch) fix_subnet, static_model = export_fix_subnet( - algorithm, export_subnet_mode='mutator', slice_weight=True) + algorithm, + export_subnet_mode='mutator', + slice_weight=True, + export_channel=True) fix_subnet = json.dumps(fix_subnet, indent=4, separators=(',', ':')) subnet_name = 'subnet.json' weight_name = 'subnet_weight.pth' From 232869742299744bb1f2569f14b7663b562b4c3b Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Wed, 15 Feb 2023 15:24:16 +0800 Subject: [PATCH 34/59] update Ut&channel_mutator --- configs/nas/mmcls/autoformer/README.md | 3 +- .../autoformer_subnet_8xb256_in1k.py | 13 + configs/nas/mmcls/autoslim/README.md | 16 +- ..._mbv2_1.5x_slimmable_subnet_8xb256_in1k.py | 23 +- configs/pruning/mmcls/dcff/fix_subnet.json | 600 ------- .../pruning/mmcls/dmcp/DMCP_MBV2_100M.json | 1458 ---------------- configs/pruning/mmcls/dmcp/DMCP_R50_2G.json | 1542 ----------------- .../mmcls/dmcp/dmcp_mbv2_subnet_32xb64.py | 2 + .../mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py | 2 + configs/pruning/mmdet/dcff/fix_subnet.json | 600 ------- configs/pruning/mmpose/dcff/fix_subnet.json | 600 ------- configs/pruning/mmseg/dcff/fix_subnet.json | 600 ------- .../channel_mutator/channel_mutator.py | 39 +- mmrazor/structures/subnet/fix_subnet.py | 3 +- .../test_models/test_algorithms/test_darts.py | 20 +- .../test_models/test_algorithms/test_dsnas.py | 13 + .../test_models/test_algorithms/test_spos.py | 13 + 17 files changed, 121 insertions(+), 5426 deletions(-) create mode 100644 configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py diff --git a/configs/nas/mmcls/autoformer/README.md b/configs/nas/mmcls/autoformer/README.md index 768d7e027..c24cb0ac0 100644 --- a/configs/nas/mmcls/autoformer/README.md +++ b/configs/nas/mmcls/autoformer/README.md @@ -44,8 +44,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ ```bash CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh \ configs/nas/mmcls/autoformer/autoformer_subnet_8xb128_in1k.py \ - $STEP2_CKPT 1 --work-dir $WORK_DIR \ - --cfg-options algorithm.mutable_cfg=$STEP2_SUBNET_YAML + $STEP2_CKPT 1 --work-dir $WORK_DIR ``` ## Results and models diff --git a/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py b/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py new file mode 100644 index 000000000..c0f56a728 --- /dev/null +++ b/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py @@ -0,0 +1,13 @@ +_base_ = 'autoformer_supernet_32xb256_in1k.py' + +supernet = _base_.supernet + +model_cfg = dict( + _scope_='mmrazor', + type='sub_model', + cfg=supernet, + fix_subnet='STEP2_SUBNET_YAML.yaml') + +_base_.model = model_cfg + +test_cfg = dict(evaluate_fixed_subnet=True) diff --git a/configs/nas/mmcls/autoslim/README.md b/configs/nas/mmcls/autoslim/README.md index 6f38e1da0..e7292fae3 100644 --- a/configs/nas/mmcls/autoslim/README.md +++ b/configs/nas/mmcls/autoslim/README.md @@ -17,7 +17,7 @@ Notably, by setting optimized channel numbers, our AutoSlim-MobileNet-v2 at 305M ```bash CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ - configs/pruning/autoslim/autoslim_mbv2_supernet_8xb256_in1k.py 4 \ + configs/nas/autoslim/autoslim_mbv2_1.5x_supernet_8xb256_in1k.py 4 \ --work-dir $WORK_DIR ``` @@ -25,7 +25,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ ```bash CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ - configs/pruning/autoslim/autoslim_mbv2_search_8xb1024_in1k.py 4 \ + configs/nas/autoslim/autoslim_mbv2_1.5x_search_8xb256_in1k.py 4 \ --work-dir $WORK_DIR --cfg-options load_from=$STEP1_CKPT ``` @@ -33,27 +33,27 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ ```bash CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ - configs/pruning/autoslim/autoslim_mbv2_subnet_8xb256_in1k.py 4 \ + configs/nas/autoslim/autoslim_mbv2_subnet_8xb256_in1k.py 4 \ --work-dir $WORK_DIR \ - --cfg-options algorithm.channel_cfg=configs/pruning/autoslim/AUTOSLIM_MBV2_530M_OFFICIAL.yaml,configs/pruning/autoslim/AUTOSLIM_MBV2_320M_OFFICIAL.yaml,configs/pruning/autoslim/AUTOSLIM_MBV2_220M_OFFICIAL.yaml + --cfg-options algorithm.channel_cfg=configs/nas/autoslim/AUTOSLIM_MBV2_530M_OFFICIAL.yaml,configs/nas/autoslim/AUTOSLIM_MBV2_320M_OFFICIAL.yaml,configs/nas/autoslim/AUTOSLIM_MBV2_220M_OFFICIAL.yaml ``` ### Split checkpoint ```bash python ./tools/model_converters/split_checkpoint.py \ - configs/pruning/autoslim/autoslim_mbv2_subnet_8xb256_in1k.py \ + configs/nas/autoslim/autoslim_mbv2_subnet_8xb256_in1k.py \ $RETRAINED_CKPT \ - --channel-cfgs configs/pruning/autoslim/AUTOSLIM_MBV2_530M_OFFICIAL.yaml configs/pruning/autoslim/AUTOSLIM_MBV2_320M_OFFICIAL.yaml configs/pruning/autoslim/AUTOSLIM_MBV2_220M_OFFICIAL.yaml + --channel-cfgs configs/nas/autoslim/AUTOSLIM_MBV2_530M_OFFICIAL.yaml configs/nas/autoslim/AUTOSLIM_MBV2_320M_OFFICIAL.yaml configs/nas/autoslim/AUTOSLIM_MBV2_220M_OFFICIAL.yaml ``` ### Subnet inference ```bash CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh \ - configs/pruning/autoslim/autoslim_mbv2_subnet_8xb256_in1k.py \ + configs/nas/autoslim/autoslim_mbv2_subnet_8xb256_in1k.py \ $SEARCHED_CKPT 1 --work-dir $WORK_DIR \ - --cfg-options algorithm.channel_cfg=configs/pruning/autoslim/AUTOSLIM_MBV2_530M_OFFICIAL.yaml # or modify the config directly + --cfg-options algorithm.channel_cfg=configs/nas/autoslim/AUTOSLIM_MBV2_530M_OFFICIAL.yaml # or modify the config directly ``` ## Results and models diff --git a/configs/nas/mmcls/autoslim/autoslim_mbv2_1.5x_slimmable_subnet_8xb256_in1k.py b/configs/nas/mmcls/autoslim/autoslim_mbv2_1.5x_slimmable_subnet_8xb256_in1k.py index a1f4f56dc..61d64a226 100644 --- a/configs/nas/mmcls/autoslim/autoslim_mbv2_1.5x_slimmable_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/autoslim/autoslim_mbv2_1.5x_slimmable_subnet_8xb256_in1k.py @@ -21,13 +21,26 @@ # !autoslim algorithm config # ========================================================================== -supernet.data_preprocessor = data_preprocessor model = dict( + _delete_=True, _scope_='mmrazor', - type='sub_model', - cfg=supernet, - fix_subnet='tests/data/MBV2_slimmable_config.json', - mode='mutator') + type='SlimmableNetwork', + architecture=supernet, + data_preprocessor=data_preprocessor, + mutator=dict( + type='SlimmableChannelMutator', + channel_unit_cfg=dict( + type='SlimmableChannelUnit', + units='tests/data/MBV2_slimmable_config.json'), + parse_cfg=dict( + type='ChannelAnalyzer', + demo_input=(1, 3, 224, 224), + tracer_type='BackwardTracer'))) + +model_wrapper_cfg = dict( + type='mmrazor.SlimmableNetworkDDP', + broadcast_buffers=False, + find_unused_parameters=True) val_cfg = dict(type='mmrazor.SlimmableValLoop') diff --git a/configs/pruning/mmcls/dcff/fix_subnet.json b/configs/pruning/mmcls/dcff/fix_subnet.json index 0522765d1..dfdcea758 100644 --- a/configs/pruning/mmcls/dcff/fix_subnet.json +++ b/configs/pruning/mmcls/dcff/fix_subnet.json @@ -14,102 +14,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.bn1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.conv1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.bn2", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.1.conv1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.1.bn2", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.conv1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.downsample.0", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.conv1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.bn1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.conv2", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.bn2", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.1.conv2", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.1.bn2", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":1.0 }, "backbone.layer1.0.conv1_(0, 64)_64":{ @@ -120,36 +24,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer1.0.bn1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.conv2", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer1.0.conv1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.bn1", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":0.640625 }, "backbone.layer1.1.conv1_(0, 64)_64":{ @@ -160,36 +34,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer1.1.bn1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.1.conv2", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer1.1.conv1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.1.bn1", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":0.640625 }, "backbone.layer2.0.conv1_(0, 128)_128":{ @@ -200,36 +44,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.0.bn1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.conv2", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.0.conv1", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.bn1", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":0.6484375 }, "backbone.layer2.0.conv2_(0, 128)_128":{ @@ -240,96 +54,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.0.bn2", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.downsample.1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.conv1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.bn2", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.conv1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.downsample.0", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.0.conv2", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.bn2", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.downsample.0", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.downsample.1", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.conv2", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.bn2", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":0.59375 }, "backbone.layer2.1.conv1_(0, 128)_128":{ @@ -340,36 +64,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.1.bn1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.conv2", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.1.conv1", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.bn1", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":0.6484375 }, "backbone.layer3.0.conv1_(0, 256)_256":{ @@ -380,36 +74,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.0.bn1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.conv2", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.0.conv1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.bn1", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":0.6484375 }, "backbone.layer3.0.conv2_(0, 256)_256":{ @@ -420,96 +84,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.0.bn2", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.downsample.1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.conv1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.bn2", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.conv1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.downsample.0", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.0.conv2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.bn2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.downsample.0", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.downsample.1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.conv2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.bn2", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":0.59765625 }, "backbone.layer3.1.conv1_(0, 256)_256":{ @@ -520,36 +94,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.1.bn1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.conv2", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.1.conv1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.bn1", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":0.6484375 }, "backbone.layer4.0.conv1_(0, 512)_512":{ @@ -560,36 +104,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.0.bn1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.conv2", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.0.conv1", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.bn1", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":0.69921875 }, "backbone.layer4.0.conv2_(0, 512)_512":{ @@ -600,90 +114,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.0.bn2", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.downsample.1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.conv1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.bn2", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"head.fc", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.0.conv2", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.bn2", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.downsample.0", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.downsample.1", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.conv2", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.bn2", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":0.69921875 }, "backbone.layer4.1.conv1_(0, 512)_512":{ @@ -694,36 +124,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.1.bn1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.conv2", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.1.conv1", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.bn1", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":0.69921875 } } diff --git a/configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.json b/configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.json index d7abf3c80..d4ee2409f 100644 --- a/configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.json +++ b/configs/pruning/mmcls/dmcp/DMCP_MBV2_100M.json @@ -14,60 +14,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.conv1.bn", - "start":0, - "end":32, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.conv.0.conv", - "start":0, - "end":32, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.conv.0.bn", - "start":0, - "end":32, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.conv.1.conv", - "start":0, - "end":32, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.conv1.conv", - "start":0, - "end":32, - "is_output_channel":true - }, - { - "name":"backbone.conv1.bn", - "start":0, - "end":32, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.conv.0.conv", - "start":0, - "end":32, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.conv.0.bn", - "start":0, - "end":32, - "is_output_channel":true - } - ] - }, "choice":9 }, "backbone.layer1.0.conv.1.conv_(0, 16)_16":{ @@ -78,36 +24,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer1.0.conv.1.bn", - "start":0, - "end":16, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.conv.0.conv", - "start":0, - "end":16, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer1.0.conv.1.conv", - "start":0, - "end":16, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.conv.1.bn", - "start":0, - "end":16, - "is_output_channel":true - } - ] - }, "choice":10 }, "backbone.layer2.0.conv.0.conv_(0, 96)_96":{ @@ -118,60 +34,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.0.conv.0.bn", - "start":0, - "end":96, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.conv.1.conv", - "start":0, - "end":96, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.conv.1.bn", - "start":0, - "end":96, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.conv.2.conv", - "start":0, - "end":96, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.0.conv.0.conv", - "start":0, - "end":96, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.conv.0.bn", - "start":0, - "end":96, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.conv.1.conv", - "start":0, - "end":96, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.conv.1.bn", - "start":0, - "end":96, - "is_output_channel":true - } - ] - }, "choice":36 }, "backbone.layer2.0.conv.2.conv_(0, 24)_24":{ @@ -182,72 +44,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.0.conv.2.bn", - "start":0, - "end":24, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.conv.0.conv", - "start":0, - "end":24, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.conv.2.bn", - "start":0, - "end":24, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":24, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.conv.0.conv", - "start":0, - "end":24, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.0.conv.2.conv", - "start":0, - "end":24, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.conv.2.bn", - "start":0, - "end":24, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.conv.2.conv", - "start":0, - "end":24, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.conv.2.bn", - "start":0, - "end":24, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":24, - "is_output_channel":true - } - ] - }, "choice":16 }, "backbone.layer2.1.conv.0.conv_(0, 144)_144":{ @@ -258,60 +54,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.1.conv.0.bn", - "start":0, - "end":144, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.conv.1.conv", - "start":0, - "end":144, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.conv.1.bn", - "start":0, - "end":144, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.conv.2.conv", - "start":0, - "end":144, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.1.conv.0.conv", - "start":0, - "end":144, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.conv.0.bn", - "start":0, - "end":144, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.conv.1.conv", - "start":0, - "end":144, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.conv.1.bn", - "start":0, - "end":144, - "is_output_channel":true - } - ] - }, "choice":16 }, "backbone.layer3.0.conv.0.conv_(0, 144)_144":{ @@ -322,60 +64,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.0.conv.0.bn", - "start":0, - "end":144, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.conv.1.conv", - "start":0, - "end":144, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.conv.1.bn", - "start":0, - "end":144, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.conv.2.conv", - "start":0, - "end":144, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.0.conv.0.conv", - "start":0, - "end":144, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.conv.0.bn", - "start":0, - "end":144, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.conv.1.conv", - "start":0, - "end":144, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.conv.1.bn", - "start":0, - "end":144, - "is_output_channel":true - } - ] - }, "choice":48 }, "backbone.layer3.0.conv.2.conv_(0, 32)_32":{ @@ -386,96 +74,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.0.conv.2.bn", - "start":0, - "end":32, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.conv.0.conv", - "start":0, - "end":32, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.conv.2.bn", - "start":0, - "end":32, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":32, - "is_output_channel":false - }, - { - "name":"backbone.layer3.2.conv.0.conv", - "start":0, - "end":32, - "is_output_channel":false - }, - { - "name":"backbone.layer3.2.conv.2.bn", - "start":0, - "end":32, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.conv.0.conv", - "start":0, - "end":32, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.0.conv.2.conv", - "start":0, - "end":32, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.conv.2.bn", - "start":0, - "end":32, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.conv.2.conv", - "start":0, - "end":32, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.conv.2.bn", - "start":0, - "end":32, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":32, - "is_output_channel":true - }, - { - "name":"backbone.layer3.2.conv.2.conv", - "start":0, - "end":32, - "is_output_channel":true - }, - { - "name":"backbone.layer3.2.conv.2.bn", - "start":0, - "end":32, - "is_output_channel":true - } - ] - }, "choice":21 }, "backbone.layer3.1.conv.0.conv_(0, 192)_192":{ @@ -486,60 +84,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.1.conv.0.bn", - "start":0, - "end":192, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.conv.1.conv", - "start":0, - "end":192, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.conv.1.bn", - "start":0, - "end":192, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.conv.2.conv", - "start":0, - "end":192, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.1.conv.0.conv", - "start":0, - "end":192, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.conv.0.bn", - "start":0, - "end":192, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.conv.1.conv", - "start":0, - "end":192, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.conv.1.bn", - "start":0, - "end":192, - "is_output_channel":true - } - ] - }, "choice":41 }, "backbone.layer3.2.conv.0.conv_(0, 192)_192":{ @@ -550,60 +94,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.2.conv.0.bn", - "start":0, - "end":192, - "is_output_channel":false - }, - { - "name":"backbone.layer3.2.conv.1.conv", - "start":0, - "end":192, - "is_output_channel":false - }, - { - "name":"backbone.layer3.2.conv.1.bn", - "start":0, - "end":192, - "is_output_channel":false - }, - { - "name":"backbone.layer3.2.conv.2.conv", - "start":0, - "end":192, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.2.conv.0.conv", - "start":0, - "end":192, - "is_output_channel":true - }, - { - "name":"backbone.layer3.2.conv.0.bn", - "start":0, - "end":192, - "is_output_channel":true - }, - { - "name":"backbone.layer3.2.conv.1.conv", - "start":0, - "end":192, - "is_output_channel":true - }, - { - "name":"backbone.layer3.2.conv.1.bn", - "start":0, - "end":192, - "is_output_channel":true - } - ] - }, "choice":22 }, "backbone.layer4.0.conv.0.conv_(0, 192)_192":{ @@ -614,60 +104,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.0.conv.0.bn", - "start":0, - "end":192, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.conv.1.conv", - "start":0, - "end":192, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.conv.1.bn", - "start":0, - "end":192, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.conv.2.conv", - "start":0, - "end":192, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.0.conv.0.conv", - "start":0, - "end":192, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.conv.0.bn", - "start":0, - "end":192, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.conv.1.conv", - "start":0, - "end":192, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.conv.1.bn", - "start":0, - "end":192, - "is_output_channel":true - } - ] - }, "choice":60 }, "backbone.layer4.0.conv.2.conv_(0, 64)_64":{ @@ -678,120 +114,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.0.conv.2.bn", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.conv.0.conv", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.conv.2.bn", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer4.2.conv.0.conv", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer4.2.conv.2.bn", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer4.3.conv.0.conv", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer4.3.conv.2.bn", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer5.0.conv.0.conv", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.0.conv.2.conv", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.conv.2.bn", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.conv.2.conv", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.conv.2.bn", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer4.2.conv.2.conv", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer4.2.conv.2.bn", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer4.3.conv.2.conv", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer4.3.conv.2.bn", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":24 }, "backbone.layer4.1.conv.0.conv_(0, 384)_384":{ @@ -802,60 +124,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.1.conv.0.bn", - "start":0, - "end":384, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.conv.1.conv", - "start":0, - "end":384, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.conv.1.bn", - "start":0, - "end":384, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.conv.2.conv", - "start":0, - "end":384, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.1.conv.0.conv", - "start":0, - "end":384, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.conv.0.bn", - "start":0, - "end":384, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.conv.1.conv", - "start":0, - "end":384, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.conv.1.bn", - "start":0, - "end":384, - "is_output_channel":true - } - ] - }, "choice":44 }, "backbone.layer4.2.conv.0.conv_(0, 384)_384":{ @@ -866,60 +134,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.2.conv.0.bn", - "start":0, - "end":384, - "is_output_channel":false - }, - { - "name":"backbone.layer4.2.conv.1.conv", - "start":0, - "end":384, - "is_output_channel":false - }, - { - "name":"backbone.layer4.2.conv.1.bn", - "start":0, - "end":384, - "is_output_channel":false - }, - { - "name":"backbone.layer4.2.conv.2.conv", - "start":0, - "end":384, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.2.conv.0.conv", - "start":0, - "end":384, - "is_output_channel":true - }, - { - "name":"backbone.layer4.2.conv.0.bn", - "start":0, - "end":384, - "is_output_channel":true - }, - { - "name":"backbone.layer4.2.conv.1.conv", - "start":0, - "end":384, - "is_output_channel":true - }, - { - "name":"backbone.layer4.2.conv.1.bn", - "start":0, - "end":384, - "is_output_channel":true - } - ] - }, "choice":272 }, "backbone.layer4.3.conv.0.conv_(0, 384)_384":{ @@ -930,60 +144,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.3.conv.0.bn", - "start":0, - "end":384, - "is_output_channel":false - }, - { - "name":"backbone.layer4.3.conv.1.conv", - "start":0, - "end":384, - "is_output_channel":false - }, - { - "name":"backbone.layer4.3.conv.1.bn", - "start":0, - "end":384, - "is_output_channel":false - }, - { - "name":"backbone.layer4.3.conv.2.conv", - "start":0, - "end":384, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.3.conv.0.conv", - "start":0, - "end":384, - "is_output_channel":true - }, - { - "name":"backbone.layer4.3.conv.0.bn", - "start":0, - "end":384, - "is_output_channel":true - }, - { - "name":"backbone.layer4.3.conv.1.conv", - "start":0, - "end":384, - "is_output_channel":true - }, - { - "name":"backbone.layer4.3.conv.1.bn", - "start":0, - "end":384, - "is_output_channel":true - } - ] - }, "choice":272 }, "backbone.layer5.0.conv.0.conv_(0, 384)_384":{ @@ -994,60 +154,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer5.0.conv.0.bn", - "start":0, - "end":384, - "is_output_channel":false - }, - { - "name":"backbone.layer5.0.conv.1.conv", - "start":0, - "end":384, - "is_output_channel":false - }, - { - "name":"backbone.layer5.0.conv.1.bn", - "start":0, - "end":384, - "is_output_channel":false - }, - { - "name":"backbone.layer5.0.conv.2.conv", - "start":0, - "end":384, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer5.0.conv.0.conv", - "start":0, - "end":384, - "is_output_channel":true - }, - { - "name":"backbone.layer5.0.conv.0.bn", - "start":0, - "end":384, - "is_output_channel":true - }, - { - "name":"backbone.layer5.0.conv.1.conv", - "start":0, - "end":384, - "is_output_channel":true - }, - { - "name":"backbone.layer5.0.conv.1.bn", - "start":0, - "end":384, - "is_output_channel":true - } - ] - }, "choice":310 }, "backbone.layer5.0.conv.2.conv_(0, 96)_96":{ @@ -1058,96 +164,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer5.0.conv.2.bn", - "start":0, - "end":96, - "is_output_channel":false - }, - { - "name":"backbone.layer5.1.conv.0.conv", - "start":0, - "end":96, - "is_output_channel":false - }, - { - "name":"backbone.layer5.1.conv.2.bn", - "start":0, - "end":96, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":96, - "is_output_channel":false - }, - { - "name":"backbone.layer5.2.conv.0.conv", - "start":0, - "end":96, - "is_output_channel":false - }, - { - "name":"backbone.layer5.2.conv.2.bn", - "start":0, - "end":96, - "is_output_channel":false - }, - { - "name":"backbone.layer6.0.conv.0.conv", - "start":0, - "end":96, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer5.0.conv.2.conv", - "start":0, - "end":96, - "is_output_channel":true - }, - { - "name":"backbone.layer5.0.conv.2.bn", - "start":0, - "end":96, - "is_output_channel":true - }, - { - "name":"backbone.layer5.1.conv.2.conv", - "start":0, - "end":96, - "is_output_channel":true - }, - { - "name":"backbone.layer5.1.conv.2.bn", - "start":0, - "end":96, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":96, - "is_output_channel":true - }, - { - "name":"backbone.layer5.2.conv.2.conv", - "start":0, - "end":96, - "is_output_channel":true - }, - { - "name":"backbone.layer5.2.conv.2.bn", - "start":0, - "end":96, - "is_output_channel":true - } - ] - }, "choice":36 }, "backbone.layer5.1.conv.0.conv_(0, 576)_576":{ @@ -1158,60 +174,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer5.1.conv.0.bn", - "start":0, - "end":576, - "is_output_channel":false - }, - { - "name":"backbone.layer5.1.conv.1.conv", - "start":0, - "end":576, - "is_output_channel":false - }, - { - "name":"backbone.layer5.1.conv.1.bn", - "start":0, - "end":576, - "is_output_channel":false - }, - { - "name":"backbone.layer5.1.conv.2.conv", - "start":0, - "end":576, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer5.1.conv.0.conv", - "start":0, - "end":576, - "is_output_channel":true - }, - { - "name":"backbone.layer5.1.conv.0.bn", - "start":0, - "end":576, - "is_output_channel":true - }, - { - "name":"backbone.layer5.1.conv.1.conv", - "start":0, - "end":576, - "is_output_channel":true - }, - { - "name":"backbone.layer5.1.conv.1.bn", - "start":0, - "end":576, - "is_output_channel":true - } - ] - }, "choice":294 }, "backbone.layer5.2.conv.0.conv_(0, 576)_576":{ @@ -1222,60 +184,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer5.2.conv.0.bn", - "start":0, - "end":576, - "is_output_channel":false - }, - { - "name":"backbone.layer5.2.conv.1.conv", - "start":0, - "end":576, - "is_output_channel":false - }, - { - "name":"backbone.layer5.2.conv.1.bn", - "start":0, - "end":576, - "is_output_channel":false - }, - { - "name":"backbone.layer5.2.conv.2.conv", - "start":0, - "end":576, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer5.2.conv.0.conv", - "start":0, - "end":576, - "is_output_channel":true - }, - { - "name":"backbone.layer5.2.conv.0.bn", - "start":0, - "end":576, - "is_output_channel":true - }, - { - "name":"backbone.layer5.2.conv.1.conv", - "start":0, - "end":576, - "is_output_channel":true - }, - { - "name":"backbone.layer5.2.conv.1.bn", - "start":0, - "end":576, - "is_output_channel":true - } - ] - }, "choice":351 }, "backbone.layer6.0.conv.0.conv_(0, 576)_576":{ @@ -1286,60 +194,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer6.0.conv.0.bn", - "start":0, - "end":576, - "is_output_channel":false - }, - { - "name":"backbone.layer6.0.conv.1.conv", - "start":0, - "end":576, - "is_output_channel":false - }, - { - "name":"backbone.layer6.0.conv.1.bn", - "start":0, - "end":576, - "is_output_channel":false - }, - { - "name":"backbone.layer6.0.conv.2.conv", - "start":0, - "end":576, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer6.0.conv.0.conv", - "start":0, - "end":576, - "is_output_channel":true - }, - { - "name":"backbone.layer6.0.conv.0.bn", - "start":0, - "end":576, - "is_output_channel":true - }, - { - "name":"backbone.layer6.0.conv.1.conv", - "start":0, - "end":576, - "is_output_channel":true - }, - { - "name":"backbone.layer6.0.conv.1.bn", - "start":0, - "end":576, - "is_output_channel":true - } - ] - }, "choice":693 }, "backbone.layer6.0.conv.2.conv_(0, 160)_160":{ @@ -1350,96 +204,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer6.0.conv.2.bn", - "start":0, - "end":160, - "is_output_channel":false - }, - { - "name":"backbone.layer6.1.conv.0.conv", - "start":0, - "end":160, - "is_output_channel":false - }, - { - "name":"backbone.layer6.1.conv.2.bn", - "start":0, - "end":160, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":160, - "is_output_channel":false - }, - { - "name":"backbone.layer6.2.conv.0.conv", - "start":0, - "end":160, - "is_output_channel":false - }, - { - "name":"backbone.layer6.2.conv.2.bn", - "start":0, - "end":160, - "is_output_channel":false - }, - { - "name":"backbone.layer7.0.conv.0.conv", - "start":0, - "end":160, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer6.0.conv.2.conv", - "start":0, - "end":160, - "is_output_channel":true - }, - { - "name":"backbone.layer6.0.conv.2.bn", - "start":0, - "end":160, - "is_output_channel":true - }, - { - "name":"backbone.layer6.1.conv.2.conv", - "start":0, - "end":160, - "is_output_channel":true - }, - { - "name":"backbone.layer6.1.conv.2.bn", - "start":0, - "end":160, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":160, - "is_output_channel":true - }, - { - "name":"backbone.layer6.2.conv.2.conv", - "start":0, - "end":160, - "is_output_channel":true - }, - { - "name":"backbone.layer6.2.conv.2.bn", - "start":0, - "end":160, - "is_output_channel":true - } - ] - }, "choice":80 }, "backbone.layer6.1.conv.0.conv_(0, 960)_960":{ @@ -1450,60 +214,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer6.1.conv.0.bn", - "start":0, - "end":960, - "is_output_channel":false - }, - { - "name":"backbone.layer6.1.conv.1.conv", - "start":0, - "end":960, - "is_output_channel":false - }, - { - "name":"backbone.layer6.1.conv.1.bn", - "start":0, - "end":960, - "is_output_channel":false - }, - { - "name":"backbone.layer6.1.conv.2.conv", - "start":0, - "end":960, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer6.1.conv.0.conv", - "start":0, - "end":960, - "is_output_channel":true - }, - { - "name":"backbone.layer6.1.conv.0.bn", - "start":0, - "end":960, - "is_output_channel":true - }, - { - "name":"backbone.layer6.1.conv.1.conv", - "start":0, - "end":960, - "is_output_channel":true - }, - { - "name":"backbone.layer6.1.conv.1.bn", - "start":0, - "end":960, - "is_output_channel":true - } - ] - }, "choice":96 }, "backbone.layer6.2.conv.0.conv_(0, 960)_960":{ @@ -1514,60 +224,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer6.2.conv.0.bn", - "start":0, - "end":960, - "is_output_channel":false - }, - { - "name":"backbone.layer6.2.conv.1.conv", - "start":0, - "end":960, - "is_output_channel":false - }, - { - "name":"backbone.layer6.2.conv.1.bn", - "start":0, - "end":960, - "is_output_channel":false - }, - { - "name":"backbone.layer6.2.conv.2.conv", - "start":0, - "end":960, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer6.2.conv.0.conv", - "start":0, - "end":960, - "is_output_channel":true - }, - { - "name":"backbone.layer6.2.conv.0.bn", - "start":0, - "end":960, - "is_output_channel":true - }, - { - "name":"backbone.layer6.2.conv.1.conv", - "start":0, - "end":960, - "is_output_channel":true - }, - { - "name":"backbone.layer6.2.conv.1.bn", - "start":0, - "end":960, - "is_output_channel":true - } - ] - }, "choice":864 }, "backbone.layer7.0.conv.0.conv_(0, 960)_960":{ @@ -1578,60 +234,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer7.0.conv.0.bn", - "start":0, - "end":960, - "is_output_channel":false - }, - { - "name":"backbone.layer7.0.conv.1.conv", - "start":0, - "end":960, - "is_output_channel":false - }, - { - "name":"backbone.layer7.0.conv.1.bn", - "start":0, - "end":960, - "is_output_channel":false - }, - { - "name":"backbone.layer7.0.conv.2.conv", - "start":0, - "end":960, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer7.0.conv.0.conv", - "start":0, - "end":960, - "is_output_channel":true - }, - { - "name":"backbone.layer7.0.conv.0.bn", - "start":0, - "end":960, - "is_output_channel":true - }, - { - "name":"backbone.layer7.0.conv.1.conv", - "start":0, - "end":960, - "is_output_channel":true - }, - { - "name":"backbone.layer7.0.conv.1.bn", - "start":0, - "end":960, - "is_output_channel":true - } - ] - }, "choice":960 }, "backbone.layer7.0.conv.2.conv_(0, 320)_320":{ @@ -1642,36 +244,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer7.0.conv.2.bn", - "start":0, - "end":320, - "is_output_channel":false - }, - { - "name":"backbone.conv2.conv", - "start":0, - "end":320, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer7.0.conv.2.conv", - "start":0, - "end":320, - "is_output_channel":true - }, - { - "name":"backbone.layer7.0.conv.2.bn", - "start":0, - "end":320, - "is_output_channel":true - } - ] - }, "choice":192 }, "backbone.conv2.conv_(0, 1280)_1280":{ @@ -1682,36 +254,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.conv2.bn", - "start":0, - "end":1280, - "is_output_channel":false - }, - { - "name":"head.fc", - "start":0, - "end":1280, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.conv2.conv", - "start":0, - "end":1280, - "is_output_channel":true - }, - { - "name":"backbone.conv2.bn", - "start":0, - "end":1280, - "is_output_channel":true - } - ] - }, "choice":1280 } } diff --git a/configs/pruning/mmcls/dmcp/DMCP_R50_2G.json b/configs/pruning/mmcls/dmcp/DMCP_R50_2G.json index bdbbf1c21..833707cde 100644 --- a/configs/pruning/mmcls/dmcp/DMCP_R50_2G.json +++ b/configs/pruning/mmcls/dmcp/DMCP_R50_2G.json @@ -14,42 +14,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.bn1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.conv1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.downsample.0", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.conv1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.bn1", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":52 }, "backbone.layer1.0.conv1_(0, 64)_64":{ @@ -60,36 +24,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer1.0.bn1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.conv2", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer1.0.conv1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.bn1", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":22 }, "backbone.layer1.0.conv2_(0, 64)_64":{ @@ -100,36 +34,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer1.0.bn2", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.conv3", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer1.0.conv2", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.bn2", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":22 }, "backbone.layer1.0.conv3_(0, 256)_256":{ @@ -140,120 +44,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer1.0.bn3", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.downsample.1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer1.1.conv1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer1.1.bn3", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer1.2.conv1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer1.2.bn3", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.conv1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.downsample.0", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer1.0.conv3", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.bn3", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.downsample.0", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.downsample.1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer1.1.conv3", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer1.1.bn3", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer1.2.conv3", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer1.2.bn3", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":106 }, "backbone.layer1.1.conv1_(0, 64)_64":{ @@ -264,36 +54,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer1.1.bn1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.1.conv2", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer1.1.conv1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.1.bn1", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":16 }, "backbone.layer1.1.conv2_(0, 64)_64":{ @@ -304,36 +64,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer1.1.bn2", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.1.conv3", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer1.1.conv2", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.1.bn2", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":16 }, "backbone.layer1.2.conv1_(0, 64)_64":{ @@ -344,36 +74,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer1.2.bn1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.2.conv2", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer1.2.conv1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.2.bn1", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":40 }, "backbone.layer1.2.conv2_(0, 64)_64":{ @@ -384,36 +84,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer1.2.bn2", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.2.conv3", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer1.2.conv2", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.2.bn2", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":16 }, "backbone.layer2.0.conv1_(0, 128)_128":{ @@ -424,36 +94,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.0.bn1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.conv2", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.0.conv1", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.bn1", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":68 }, "backbone.layer2.0.conv2_(0, 128)_128":{ @@ -464,36 +104,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.0.bn2", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.conv3", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.0.conv2", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.bn2", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":56 }, "backbone.layer2.0.conv3_(0, 512)_512":{ @@ -504,144 +114,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.0.bn3", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.downsample.1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.conv1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.bn3", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer2.2.conv1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer2.2.bn3", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer2.3.conv1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer2.3.bn3", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.conv1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.downsample.0", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.0.conv3", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.bn3", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.downsample.0", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.downsample.1", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.conv3", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.bn3", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer2.2.conv3", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer2.2.bn3", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer2.3.conv3", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer2.3.bn3", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":155 }, "backbone.layer2.1.conv1_(0, 128)_128":{ @@ -652,36 +124,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.1.bn1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.conv2", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.1.conv1", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.bn1", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":32 }, "backbone.layer2.1.conv2_(0, 128)_128":{ @@ -692,36 +134,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.1.bn2", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.conv3", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.1.conv2", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.bn2", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":68 }, "backbone.layer2.2.conv1_(0, 128)_128":{ @@ -732,36 +144,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.2.bn1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.2.conv2", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.2.conv1", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.2.bn1", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":56 }, "backbone.layer2.2.conv2_(0, 128)_128":{ @@ -772,36 +154,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.2.bn2", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.2.conv3", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.2.conv2", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.2.bn2", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":56 }, "backbone.layer2.3.conv1_(0, 128)_128":{ @@ -812,36 +164,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.3.bn1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.3.conv2", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.3.conv1", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.3.bn1", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":80 }, "backbone.layer2.3.conv2_(0, 128)_128":{ @@ -852,36 +174,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.3.bn2", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.3.conv3", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.3.conv2", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.3.bn2", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":92 }, "backbone.layer3.0.conv1_(0, 256)_256":{ @@ -892,36 +184,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.0.bn1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.conv2", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.0.conv1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.bn1", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":256 }, "backbone.layer3.0.conv2_(0, 256)_256":{ @@ -932,36 +194,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.0.bn2", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.conv3", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.0.conv2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.bn2", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":256 }, "backbone.layer3.0.conv3_(0, 1024)_1024":{ @@ -972,192 +204,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.0.bn3", - "start":0, - "end":1024, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.downsample.1", - "start":0, - "end":1024, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":1024, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.conv1", - "start":0, - "end":1024, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.bn3", - "start":0, - "end":1024, - "is_output_channel":false - }, - { - "name":"backbone.layer3.2.conv1", - "start":0, - "end":1024, - "is_output_channel":false - }, - { - "name":"backbone.layer3.2.bn3", - "start":0, - "end":1024, - "is_output_channel":false - }, - { - "name":"backbone.layer3.3.conv1", - "start":0, - "end":1024, - "is_output_channel":false - }, - { - "name":"backbone.layer3.3.bn3", - "start":0, - "end":1024, - "is_output_channel":false - }, - { - "name":"backbone.layer3.4.conv1", - "start":0, - "end":1024, - "is_output_channel":false - }, - { - "name":"backbone.layer3.4.bn3", - "start":0, - "end":1024, - "is_output_channel":false - }, - { - "name":"backbone.layer3.5.conv1", - "start":0, - "end":1024, - "is_output_channel":false - }, - { - "name":"backbone.layer3.5.bn3", - "start":0, - "end":1024, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.conv1", - "start":0, - "end":1024, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.downsample.0", - "start":0, - "end":1024, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.0.conv3", - "start":0, - "end":1024, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.bn3", - "start":0, - "end":1024, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.downsample.0", - "start":0, - "end":1024, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.downsample.1", - "start":0, - "end":1024, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":1024, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.conv3", - "start":0, - "end":1024, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.bn3", - "start":0, - "end":1024, - "is_output_channel":true - }, - { - "name":"backbone.layer3.2.conv3", - "start":0, - "end":1024, - "is_output_channel":true - }, - { - "name":"backbone.layer3.2.bn3", - "start":0, - "end":1024, - "is_output_channel":true - }, - { - "name":"backbone.layer3.3.conv3", - "start":0, - "end":1024, - "is_output_channel":true - }, - { - "name":"backbone.layer3.3.bn3", - "start":0, - "end":1024, - "is_output_channel":true - }, - { - "name":"backbone.layer3.4.conv3", - "start":0, - "end":1024, - "is_output_channel":true - }, - { - "name":"backbone.layer3.4.bn3", - "start":0, - "end":1024, - "is_output_channel":true - }, - { - "name":"backbone.layer3.5.conv3", - "start":0, - "end":1024, - "is_output_channel":true - }, - { - "name":"backbone.layer3.5.bn3", - "start":0, - "end":1024, - "is_output_channel":true - } - ] - }, "choice":1024 }, "backbone.layer3.1.conv1_(0, 256)_256":{ @@ -1168,36 +214,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.1.bn1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.conv2", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.1.conv1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.bn1", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":106 }, "backbone.layer3.1.conv2_(0, 256)_256":{ @@ -1208,36 +224,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.1.bn2", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.conv3", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.1.conv2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.bn2", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":106 }, "backbone.layer3.2.conv1_(0, 256)_256":{ @@ -1248,36 +234,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.2.bn1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.2.conv2", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.2.conv1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.2.bn1", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":131 }, "backbone.layer3.2.conv2_(0, 256)_256":{ @@ -1288,36 +244,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.2.bn2", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.2.conv3", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.2.conv2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.2.bn2", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":256 }, "backbone.layer3.3.conv1_(0, 256)_256":{ @@ -1328,36 +254,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.3.bn1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.3.conv2", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.3.conv1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.3.bn1", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":131 }, "backbone.layer3.3.conv2_(0, 256)_256":{ @@ -1368,36 +264,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.3.bn2", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.3.conv3", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.3.conv2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.3.bn2", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":256 }, "backbone.layer3.4.conv1_(0, 256)_256":{ @@ -1408,36 +274,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.4.bn1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.4.conv2", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.4.conv1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.4.bn1", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":256 }, "backbone.layer3.4.conv2_(0, 256)_256":{ @@ -1448,36 +284,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.4.bn2", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.4.conv3", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.4.conv2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.4.bn2", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":256 }, "backbone.layer3.5.conv1_(0, 256)_256":{ @@ -1488,36 +294,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.5.bn1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.5.conv2", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.5.conv1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.5.bn1", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":256 }, "backbone.layer3.5.conv2_(0, 256)_256":{ @@ -1528,36 +304,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.5.bn2", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.5.conv3", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.5.conv2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.5.bn2", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":256 }, "backbone.layer4.0.conv1_(0, 512)_512":{ @@ -1568,36 +314,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.0.bn1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.conv2", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.0.conv1", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.bn1", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":512 }, "backbone.layer4.0.conv2_(0, 512)_512":{ @@ -1608,36 +324,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.0.bn2", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.conv3", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.0.conv2", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.bn2", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":512 }, "backbone.layer4.0.conv3_(0, 2048)_2048":{ @@ -1648,114 +334,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.0.bn3", - "start":0, - "end":2048, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.downsample.1", - "start":0, - "end":2048, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":2048, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.conv1", - "start":0, - "end":2048, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.bn3", - "start":0, - "end":2048, - "is_output_channel":false - }, - { - "name":"backbone.layer4.2.conv1", - "start":0, - "end":2048, - "is_output_channel":false - }, - { - "name":"backbone.layer4.2.bn3", - "start":0, - "end":2048, - "is_output_channel":false - }, - { - "name":"head.fc", - "start":0, - "end":2048, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.0.conv3", - "start":0, - "end":2048, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.bn3", - "start":0, - "end":2048, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.downsample.0", - "start":0, - "end":2048, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.downsample.1", - "start":0, - "end":2048, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":2048, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.conv3", - "start":0, - "end":2048, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.bn3", - "start":0, - "end":2048, - "is_output_channel":true - }, - { - "name":"backbone.layer4.2.conv3", - "start":0, - "end":2048, - "is_output_channel":true - }, - { - "name":"backbone.layer4.2.bn3", - "start":0, - "end":2048, - "is_output_channel":true - } - ] - }, "choice":2048 }, "backbone.layer4.1.conv1_(0, 512)_512":{ @@ -1766,36 +344,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.1.bn1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.conv2", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.1.conv1", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.bn1", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":512 }, "backbone.layer4.1.conv2_(0, 512)_512":{ @@ -1806,36 +354,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.1.bn2", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.conv3", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.1.conv2", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.bn2", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":461 }, "backbone.layer4.2.conv1_(0, 512)_512":{ @@ -1846,36 +364,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.2.bn1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.2.conv2", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.2.conv1", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.2.bn1", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":512 }, "backbone.layer4.2.conv2_(0, 512)_512":{ @@ -1886,36 +374,6 @@ "min_value":1, "min_ratio":0.5 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.2.bn2", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.2.conv3", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.2.conv2", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.2.bn2", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":512 } } diff --git a/configs/pruning/mmcls/dmcp/dmcp_mbv2_subnet_32xb64.py b/configs/pruning/mmcls/dmcp/dmcp_mbv2_subnet_32xb64.py index 8a68430cf..81880f4eb 100644 --- a/configs/pruning/mmcls/dmcp/dmcp_mbv2_subnet_32xb64.py +++ b/configs/pruning/mmcls/dmcp/dmcp_mbv2_subnet_32xb64.py @@ -44,4 +44,6 @@ default_hooks = _base_.default_hooks default_hooks['checkpoint'] = dict(type='CheckpointHook', interval=5) +_base_.model_wrapper_cfg = None + randomness = dict(seed=4872, diff_rank_seed=True) diff --git a/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py b/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py index eb70e639b..c612e3aa5 100644 --- a/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py +++ b/configs/pruning/mmcls/dmcp/dmcp_resnet50_subnet_32xb64.py @@ -43,4 +43,6 @@ default_hooks = _base_.default_hooks default_hooks['checkpoint'] = dict(type='CheckpointHook', interval=5) +_base_.model_wrapper_cfg = None + randomness = dict(seed=2016, diff_rank_seed=True) diff --git a/configs/pruning/mmdet/dcff/fix_subnet.json b/configs/pruning/mmdet/dcff/fix_subnet.json index 0522765d1..dfdcea758 100644 --- a/configs/pruning/mmdet/dcff/fix_subnet.json +++ b/configs/pruning/mmdet/dcff/fix_subnet.json @@ -14,102 +14,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.bn1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.conv1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.bn2", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.1.conv1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.1.bn2", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.conv1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.downsample.0", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.conv1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.bn1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.conv2", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.bn2", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.1.conv2", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.1.bn2", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":1.0 }, "backbone.layer1.0.conv1_(0, 64)_64":{ @@ -120,36 +24,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer1.0.bn1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.conv2", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer1.0.conv1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.bn1", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":0.640625 }, "backbone.layer1.1.conv1_(0, 64)_64":{ @@ -160,36 +34,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer1.1.bn1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.1.conv2", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer1.1.conv1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.1.bn1", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":0.640625 }, "backbone.layer2.0.conv1_(0, 128)_128":{ @@ -200,36 +44,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.0.bn1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.conv2", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.0.conv1", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.bn1", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":0.6484375 }, "backbone.layer2.0.conv2_(0, 128)_128":{ @@ -240,96 +54,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.0.bn2", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.downsample.1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.conv1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.bn2", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.conv1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.downsample.0", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.0.conv2", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.bn2", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.downsample.0", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.downsample.1", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.conv2", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.bn2", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":0.59375 }, "backbone.layer2.1.conv1_(0, 128)_128":{ @@ -340,36 +64,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.1.bn1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.conv2", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.1.conv1", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.bn1", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":0.6484375 }, "backbone.layer3.0.conv1_(0, 256)_256":{ @@ -380,36 +74,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.0.bn1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.conv2", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.0.conv1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.bn1", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":0.6484375 }, "backbone.layer3.0.conv2_(0, 256)_256":{ @@ -420,96 +84,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.0.bn2", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.downsample.1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.conv1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.bn2", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.conv1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.downsample.0", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.0.conv2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.bn2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.downsample.0", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.downsample.1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.conv2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.bn2", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":0.59765625 }, "backbone.layer3.1.conv1_(0, 256)_256":{ @@ -520,36 +94,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.1.bn1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.conv2", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.1.conv1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.bn1", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":0.6484375 }, "backbone.layer4.0.conv1_(0, 512)_512":{ @@ -560,36 +104,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.0.bn1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.conv2", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.0.conv1", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.bn1", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":0.69921875 }, "backbone.layer4.0.conv2_(0, 512)_512":{ @@ -600,90 +114,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.0.bn2", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.downsample.1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.conv1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.bn2", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"head.fc", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.0.conv2", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.bn2", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.downsample.0", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.downsample.1", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.conv2", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.bn2", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":0.69921875 }, "backbone.layer4.1.conv1_(0, 512)_512":{ @@ -694,36 +124,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.1.bn1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.conv2", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.1.conv1", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.bn1", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":0.69921875 } } diff --git a/configs/pruning/mmpose/dcff/fix_subnet.json b/configs/pruning/mmpose/dcff/fix_subnet.json index 0522765d1..dfdcea758 100644 --- a/configs/pruning/mmpose/dcff/fix_subnet.json +++ b/configs/pruning/mmpose/dcff/fix_subnet.json @@ -14,102 +14,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.bn1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.conv1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.bn2", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.1.conv1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.1.bn2", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.conv1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.downsample.0", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.conv1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.bn1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.conv2", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.bn2", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.1.conv2", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.1.bn2", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":1.0 }, "backbone.layer1.0.conv1_(0, 64)_64":{ @@ -120,36 +24,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer1.0.bn1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.conv2", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer1.0.conv1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.bn1", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":0.640625 }, "backbone.layer1.1.conv1_(0, 64)_64":{ @@ -160,36 +34,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer1.1.bn1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.1.conv2", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer1.1.conv1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.1.bn1", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":0.640625 }, "backbone.layer2.0.conv1_(0, 128)_128":{ @@ -200,36 +44,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.0.bn1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.conv2", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.0.conv1", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.bn1", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":0.6484375 }, "backbone.layer2.0.conv2_(0, 128)_128":{ @@ -240,96 +54,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.0.bn2", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.downsample.1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.conv1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.bn2", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.conv1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.downsample.0", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.0.conv2", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.bn2", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.downsample.0", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.downsample.1", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.conv2", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.bn2", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":0.59375 }, "backbone.layer2.1.conv1_(0, 128)_128":{ @@ -340,36 +64,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.1.bn1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.conv2", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.1.conv1", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.bn1", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":0.6484375 }, "backbone.layer3.0.conv1_(0, 256)_256":{ @@ -380,36 +74,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.0.bn1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.conv2", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.0.conv1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.bn1", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":0.6484375 }, "backbone.layer3.0.conv2_(0, 256)_256":{ @@ -420,96 +84,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.0.bn2", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.downsample.1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.conv1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.bn2", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.conv1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.downsample.0", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.0.conv2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.bn2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.downsample.0", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.downsample.1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.conv2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.bn2", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":0.59765625 }, "backbone.layer3.1.conv1_(0, 256)_256":{ @@ -520,36 +94,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.1.bn1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.conv2", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.1.conv1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.bn1", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":0.6484375 }, "backbone.layer4.0.conv1_(0, 512)_512":{ @@ -560,36 +104,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.0.bn1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.conv2", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.0.conv1", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.bn1", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":0.69921875 }, "backbone.layer4.0.conv2_(0, 512)_512":{ @@ -600,90 +114,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.0.bn2", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.downsample.1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.conv1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.bn2", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"head.fc", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.0.conv2", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.bn2", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.downsample.0", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.downsample.1", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.conv2", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.bn2", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":0.69921875 }, "backbone.layer4.1.conv1_(0, 512)_512":{ @@ -694,36 +124,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.1.bn1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.conv2", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.1.conv1", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.bn1", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":0.69921875 } } diff --git a/configs/pruning/mmseg/dcff/fix_subnet.json b/configs/pruning/mmseg/dcff/fix_subnet.json index 0522765d1..dfdcea758 100644 --- a/configs/pruning/mmseg/dcff/fix_subnet.json +++ b/configs/pruning/mmseg/dcff/fix_subnet.json @@ -14,102 +14,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.bn1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.conv1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.bn2", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.1.conv1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.1.bn2", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.conv1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.downsample.0", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.conv1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.bn1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.conv2", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.bn2", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.1.conv2", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.1.bn2", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":1.0 }, "backbone.layer1.0.conv1_(0, 64)_64":{ @@ -120,36 +24,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer1.0.bn1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.conv2", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer1.0.conv1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.bn1", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":0.640625 }, "backbone.layer1.1.conv1_(0, 64)_64":{ @@ -160,36 +34,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer1.1.bn1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.1.conv2", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer1.1.conv1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.1.bn1", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":0.640625 }, "backbone.layer2.0.conv1_(0, 128)_128":{ @@ -200,36 +44,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.0.bn1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.conv2", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.0.conv1", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.bn1", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":0.6484375 }, "backbone.layer2.0.conv2_(0, 128)_128":{ @@ -240,96 +54,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.0.bn2", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.downsample.1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.conv1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.bn2", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.conv1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.downsample.0", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.0.conv2", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.bn2", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.downsample.0", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.downsample.1", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.conv2", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.bn2", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":0.59375 }, "backbone.layer2.1.conv1_(0, 128)_128":{ @@ -340,36 +64,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.1.bn1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.conv2", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.1.conv1", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.bn1", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":0.6484375 }, "backbone.layer3.0.conv1_(0, 256)_256":{ @@ -380,36 +74,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.0.bn1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.conv2", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.0.conv1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.bn1", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":0.6484375 }, "backbone.layer3.0.conv2_(0, 256)_256":{ @@ -420,96 +84,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.0.bn2", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.downsample.1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.conv1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.bn2", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.conv1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.downsample.0", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.0.conv2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.bn2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.downsample.0", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.downsample.1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.conv2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.bn2", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":0.59765625 }, "backbone.layer3.1.conv1_(0, 256)_256":{ @@ -520,36 +94,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.1.bn1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.conv2", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.1.conv1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.bn1", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":0.6484375 }, "backbone.layer4.0.conv1_(0, 512)_512":{ @@ -560,36 +104,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.0.bn1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.conv2", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.0.conv1", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.bn1", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":0.69921875 }, "backbone.layer4.0.conv2_(0, 512)_512":{ @@ -600,90 +114,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.0.bn2", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.downsample.1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.conv1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.bn2", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"head.fc", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.0.conv2", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.bn2", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.downsample.0", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.downsample.1", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.conv2", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.bn2", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":0.69921875 }, "backbone.layer4.1.conv1_(0, 512)_512":{ @@ -694,36 +124,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.1.bn1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.conv2", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.1.conv1", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.bn1", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":0.69921875 } } diff --git a/mmrazor/models/mutators/channel_mutator/channel_mutator.py b/mmrazor/models/mutators/channel_mutator/channel_mutator.py index 38abd2fcc..5da790941 100644 --- a/mmrazor/models/mutators/channel_mutator/channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/channel_mutator.py @@ -54,7 +54,8 @@ class ChannelMutator(BaseMutator, Generic[ChannelUnitType]): 1. Using tracer. It needs parse_cfg to be the config of the ChannelAnalyzer. 2. Using config. When parse_cfg['type']='Config'. It needs that - channel_unit_cfg['unit']['xxx_unit_name] has a key 'channels'. + channel_unit_cfg['unit']['xxx_unit_name] has a key 'channels', + otherwise tracer is required. 3. Using the model with pre-defined dynamic-ops and mutablechannels: When parse_cfg['type']='Predefined'. """ @@ -99,11 +100,12 @@ def prepare_from_supernet(self, supernet: Module) -> None: """ self._name2module = dict(supernet.named_modules()) - if isinstance(self.parse_cfg, - ChannelAnalyzer) or 'Analyzer' in self.parse_cfg['type']: - units = self._prepare_from_tracer(supernet, self.parse_cfg) - elif self.parse_cfg['type'] == 'Config': + if self.parse_cfg['type'] == 'Config' or 'from_cfg' in self.parse_cfg: units = self._prepare_from_cfg(supernet, self.units_cfg) + elif isinstance( + self.parse_cfg, + ChannelAnalyzer) or 'Analyzer' in self.parse_cfg['type']: + units = self._prepare_from_tracer(supernet, self.parse_cfg) elif self.parse_cfg['type'] == 'Predefined': units = self._prepare_from_predefined_model(supernet) else: @@ -316,16 +318,41 @@ def _prepare_from_cfg(self, model, config: Dict): if isinstance(config, str): config = fileio.load(config) assert isinstance(config, dict) + + if 'Analyzer' in self.parse_cfg['type']: + self.parse_cfg.pop('from_cfg') + tracer = TASK_UTILS.build(self.parse_cfg) + unit_configs = tracer.analyze(model) + units = [] for unit_key in config: init_args = copy.deepcopy(self.unit_default_args) if 'init_args' in config[unit_key]: init_args.update(config[unit_key]['init_args']) config[unit_key]['init_args'] = init_args - unit = self.unit_class.init_from_cfg(model, config[unit_key]) + # config[unit_key].pop('channels') + if 'channels' in config[unit_key]: + unit = self.unit_class.init_from_cfg(model, config[unit_key]) + unit.name = unit_key + else: + try: + unit = self._prepare_unit_from_init_cfg( + model, unit_configs[unit_key]) + except ValueError: + raise ValueError( + 'Initializing channel_mutator from the config needs to' + 'include `channels` or `Analyzer` in the config.') units.append(unit) return units + def _prepare_unit_from_init_cfg(self, model: Module, init_cfg: dict): + """Initialize units using the init_cfg, which created by tracer.""" + unit = ChannelUnit.init_from_cfg(model, init_cfg) + unit = self._convert_channel_unit_to_mutable([unit])[0] + if 'choice' in init_cfg: + unit.current_choice = init_cfg['choice'] + return unit + def _prepare_from_predefined_model(self, model: Module): """Initialize units using the model with pre-defined dynamicops and mutable-channels.""" diff --git a/mmrazor/structures/subnet/fix_subnet.py b/mmrazor/structures/subnet/fix_subnet.py index f000db62c..07f25b15f 100644 --- a/mmrazor/structures/subnet/fix_subnet.py +++ b/mmrazor/structures/subnet/fix_subnet.py @@ -116,8 +116,8 @@ def _load_fix_subnet_by_mutator(model: nn.Module, mutator_cfg: Dict) -> None: raise ValueError('mutator_cfg must contain key channel_unit_cfg, ' f'but got mutator_cfg:' f'{mutator_cfg}') - mutator_cfg['parse_cfg'] = {'type': 'Config'} mutator = MODELS.build(mutator_cfg) + mutator.parse_cfg['from_cfg'] = True mutator.prepare_from_supernet(model) @@ -137,7 +137,6 @@ def export_fix_subnet( slice_weight (bool): Export subnet weight. Default to False. export_channel (bool): Whether to export the mutator's channel. Often required when finetune is needed for the exported subnet. - Default to True. Return: fix_subnet (ValidFixMutable): Exported subnet choice config. diff --git a/tests/test_models/test_algorithms/test_darts.py b/tests/test_models/test_algorithms/test_darts.py index 8360c1a3a..8d0949fa0 100644 --- a/tests/test_models/test_algorithms/test_darts.py +++ b/tests/test_models/test_algorithms/test_darts.py @@ -17,6 +17,7 @@ from mmrazor.models import Darts, DiffMutableOP, NasMutator from mmrazor.models.algorithms.nas.darts import DartsDDP from mmrazor.registry import MODELS +from mmrazor.structures import load_fix_subnet MODELS.register_module(name='torchConv2d', module=nn.Conv2d, force=True) MODELS.register_module(name='torchMaxPool2d', module=nn.MaxPool2d, force=True) @@ -103,8 +104,15 @@ def test_init(self) -> None: algo = Darts(model, mutator) self.assertIsInstance(algo.mutator, NasMutator) + # test load fix_subnet + fix_subnet = { + 'normal': { + 'chosen': ['torch_conv2d_3x3', 'torch_conv2d_7x7'] + } + } + load_fix_subnet(model, fix_subnet) algo = Darts(model, mutator) - self.assertEqual(algo.architecture.mutable.num_choices, 3) + self.assertEqual(algo.architecture.mutable.num_choices, 2) # initiate darts with error type `mutator` with self.assertRaisesRegex(TypeError, 'mutator should be'): @@ -119,8 +127,14 @@ def test_forward_loss(self) -> None: mutator.prepare_from_supernet(model) mutator.prepare_arch_params() - algo = Darts(model, mutator) - loss = algo(inputs, mode='loss') + # subnet + fix_subnet = fix_subnet = { + 'normal': { + 'chosen': ['torch_conv2d_3x3', 'torch_conv2d_7x7'] + } + } + load_fix_subnet(model, fix_subnet) + loss = model(inputs, mode='loss') self.assertIsInstance(loss, dict) def _prepare_fake_data(self) -> Dict: diff --git a/tests/test_models/test_algorithms/test_dsnas.py b/tests/test_models/test_algorithms/test_dsnas.py index f652a3faf..c6d28e4c6 100644 --- a/tests/test_models/test_algorithms/test_dsnas.py +++ b/tests/test_models/test_algorithms/test_dsnas.py @@ -17,6 +17,7 @@ from mmrazor.models import DSNAS, NasMutator, OneHotMutableOP from mmrazor.models.algorithms.nas.dsnas import DSNASDDP from mmrazor.registry import MODELS +from mmrazor.structures import load_fix_subnet MODELS.register_module(name='torchConv2d', module=nn.Conv2d, force=True) MODELS.register_module(name='torchMaxPool2d', module=nn.MaxPool2d, force=True) @@ -96,6 +97,12 @@ def test_init(self) -> None: algo = DSNAS(model, mutator) self.assertIsInstance(algo.mutator, NasMutator) + # test load fix_subnet + fix_subnet = {'mutable': {'chosen': 'torch_conv2d_5x5'}} + load_fix_subnet(model, fix_subnet) + algo = DSNAS(model, mutator) + self.assertEqual(algo.architecture.mutable.num_choices, 1) + # initiate Dsnas with error type `mutator` with self.assertRaisesRegex(TypeError, 'mutator should be'): DSNAS(model, model) @@ -111,6 +118,12 @@ def test_forward_loss(self) -> None: loss = algo(inputs, mode='loss') self.assertIsInstance(loss, dict) + # subnet + fix_subnet = {'mutable': {'chosen': 'torch_conv2d_5x5'}} + load_fix_subnet(model, fix_subnet) + loss = model(inputs, mode='loss') + self.assertIsInstance(loss, dict) + def _prepare_fake_data(self): imgs = torch.randn(16, 3, 224, 224).to(self.device) data_samples = [ diff --git a/tests/test_models/test_algorithms/test_spos.py b/tests/test_models/test_algorithms/test_spos.py index 9d606cfa6..537a16438 100644 --- a/tests/test_models/test_algorithms/test_spos.py +++ b/tests/test_models/test_algorithms/test_spos.py @@ -7,6 +7,7 @@ from mmrazor.models import SPOS, NasMutator, OneShotMutableOP from mmrazor.registry import MODELS +from mmrazor.structures import load_fix_subnet MUTATOR_CFG = dict(type='NasMutator') @@ -57,6 +58,12 @@ def test_init(self): alg = SPOS(model, mutator) self.assertIsInstance(alg.mutator, NasMutator) + # test load fix_subnet + fix_subnet = {'mutable': {'chosen': 'conv1'}} + load_fix_subnet(model, fix_subnet) + algo = SPOS(model, mutator) + self.assertEqual(algo.architecture.mutable.num_choices, 1) + # initiate spos with error type `mutator`. with self.assertRaisesRegex(TypeError, 'mutator should be'): SPOS(model, model) @@ -70,3 +77,9 @@ def test_forward_loss(self): alg = SPOS(model, mutator) loss = alg(inputs, mode='loss') self.assertIsInstance(loss, dict) + + # subnet + fix_subnet = {'mutable': {'chosen': 'conv1'}} + load_fix_subnet(model, fix_subnet) + loss = model(inputs, mode='loss') + self.assertIsInstance(loss, dict) From 882c136ddcd8fe4b9b9aee27f77d21ecc8151575 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Wed, 15 Feb 2023 16:16:48 +0800 Subject: [PATCH 35/59] fix test_channel_mutator --- .../mutators/channel_mutator/channel_mutator.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/mmrazor/models/mutators/channel_mutator/channel_mutator.py b/mmrazor/models/mutators/channel_mutator/channel_mutator.py index 5da790941..52c9238bb 100644 --- a/mmrazor/models/mutators/channel_mutator/channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/channel_mutator.py @@ -100,12 +100,16 @@ def prepare_from_supernet(self, supernet: Module) -> None: """ self._name2module = dict(supernet.named_modules()) - if self.parse_cfg['type'] == 'Config' or 'from_cfg' in self.parse_cfg: + if isinstance(self.parse_cfg, + ChannelAnalyzer) or 'Analyzer' in self.parse_cfg['type']: + if isinstance(self.parse_cfg, + dict) and 'from_cfg' in self.parse_cfg: + units = self._prepare_from_cfg(supernet, self.units_cfg) + else: + units = self._prepare_from_tracer(supernet, self.parse_cfg) + elif self.parse_cfg['type'] == 'Config' \ + or 'from_cfg' in self.parse_cfg: units = self._prepare_from_cfg(supernet, self.units_cfg) - elif isinstance( - self.parse_cfg, - ChannelAnalyzer) or 'Analyzer' in self.parse_cfg['type']: - units = self._prepare_from_tracer(supernet, self.parse_cfg) elif self.parse_cfg['type'] == 'Predefined': units = self._prepare_from_predefined_model(supernet) else: From d1afbaf67ac42919ae90ed5a6894f133251e11a5 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Wed, 15 Feb 2023 16:54:08 +0800 Subject: [PATCH 36/59] fix Ut --- mmrazor/engine/hooks/dmcp_subnet_hook.py | 1 - .../mutable_channel/units/channel_unit.py | 6 +- .../channel_mutator/channel_mutator.py | 1 - mmrazor/structures/subnet/fix_subnet.py | 1 + tests/data/test_registry/subnet.json | 600 ------------------ .../test_algorithms/test_dcff_network.py | 5 +- 6 files changed, 7 insertions(+), 607 deletions(-) diff --git a/mmrazor/engine/hooks/dmcp_subnet_hook.py b/mmrazor/engine/hooks/dmcp_subnet_hook.py index bacdcaffd..5c3186fca 100644 --- a/mmrazor/engine/hooks/dmcp_subnet_hook.py +++ b/mmrazor/engine/hooks/dmcp_subnet_hook.py @@ -30,7 +30,6 @@ def _save_subnet(self, model, runner, save_path): model, export_subnet_mode='mutator', slice_weight=True, - export_channel=True, ) fix_subnet = json.dumps(fix_subnet, indent=4, separators=(',', ':')) with open(save_path, 'w') as file: diff --git a/mmrazor/models/mutables/mutable_channel/units/channel_unit.py b/mmrazor/models/mutables/mutable_channel/units/channel_unit.py index b99423aa2..e730245d4 100644 --- a/mmrazor/models/mutables/mutable_channel/units/channel_unit.py +++ b/mmrazor/models/mutables/mutable_channel/units/channel_unit.py @@ -201,7 +201,11 @@ def name(self) -> str: else: first_module_name = 'unitx' name = f'{first_module_name}_{self.num_channels}' - return name + return getattr(self, '_name', name) + + @name.setter + def name(self, unit_name) -> None: + self._name = unit_name @property def alias(self) -> str: diff --git a/mmrazor/models/mutators/channel_mutator/channel_mutator.py b/mmrazor/models/mutators/channel_mutator/channel_mutator.py index 52c9238bb..5aa987aac 100644 --- a/mmrazor/models/mutators/channel_mutator/channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/channel_mutator.py @@ -334,7 +334,6 @@ def _prepare_from_cfg(self, model, config: Dict): if 'init_args' in config[unit_key]: init_args.update(config[unit_key]['init_args']) config[unit_key]['init_args'] = init_args - # config[unit_key].pop('channels') if 'channels' in config[unit_key]: unit = self.unit_class.init_from_cfg(model, config[unit_key]) unit.name = unit_key diff --git a/mmrazor/structures/subnet/fix_subnet.py b/mmrazor/structures/subnet/fix_subnet.py index 07f25b15f..56b33be76 100644 --- a/mmrazor/structures/subnet/fix_subnet.py +++ b/mmrazor/structures/subnet/fix_subnet.py @@ -137,6 +137,7 @@ def export_fix_subnet( slice_weight (bool): Export subnet weight. Default to False. export_channel (bool): Whether to export the mutator's channel. Often required when finetune is needed for the exported subnet. + Default to False. Return: fix_subnet (ValidFixMutable): Exported subnet choice config. diff --git a/tests/data/test_registry/subnet.json b/tests/data/test_registry/subnet.json index 531d6af46..4fe63bda2 100644 --- a/tests/data/test_registry/subnet.json +++ b/tests/data/test_registry/subnet.json @@ -14,102 +14,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.bn1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.conv1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.bn2", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.1.conv1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.1.bn2", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.conv1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.downsample.0", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.conv1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.bn1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.conv2", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.bn2", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.1.conv2", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.1.bn2", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":1.0 }, "backbone.layer1.0.conv1_(0, 64)_64":{ @@ -120,36 +24,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer1.0.bn1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.0.conv2", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer1.0.conv1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.0.bn1", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":0.640625 }, "backbone.layer1.1.conv1_(0, 64)_64":{ @@ -160,36 +34,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer1.1.bn1", - "start":0, - "end":64, - "is_output_channel":false - }, - { - "name":"backbone.layer1.1.conv2", - "start":0, - "end":64, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer1.1.conv1", - "start":0, - "end":64, - "is_output_channel":true - }, - { - "name":"backbone.layer1.1.bn1", - "start":0, - "end":64, - "is_output_channel":true - } - ] - }, "choice":0.640625 }, "backbone.layer2.0.conv1_(0, 128)_128":{ @@ -200,36 +44,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.0.bn1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.conv2", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.0.conv1", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.bn1", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":0.6484375 }, "backbone.layer2.0.conv2_(0, 128)_128":{ @@ -240,96 +54,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.0.bn2", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.0.downsample.1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.conv1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.bn2", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.conv1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.downsample.0", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.0.conv2", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.bn2", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.downsample.0", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.0.downsample.1", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.conv2", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.bn2", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":0.59375 }, "backbone.layer2.1.conv1_(0, 128)_128":{ @@ -340,36 +64,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer2.1.bn1", - "start":0, - "end":128, - "is_output_channel":false - }, - { - "name":"backbone.layer2.1.conv2", - "start":0, - "end":128, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer2.1.conv1", - "start":0, - "end":128, - "is_output_channel":true - }, - { - "name":"backbone.layer2.1.bn1", - "start":0, - "end":128, - "is_output_channel":true - } - ] - }, "choice":0.6484375 }, "backbone.layer3.0.conv1_(0, 256)_256":{ @@ -380,36 +74,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.0.bn1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.conv2", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.0.conv1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.bn1", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":0.6484375 }, "backbone.layer3.0.conv2_(0, 256)_256":{ @@ -420,96 +84,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.0.bn2", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.0.downsample.1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.conv1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.bn2", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.conv1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.downsample.0", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.0.conv2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.bn2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.downsample.0", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.0.downsample.1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.conv2", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.bn2", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":0.59765625 }, "backbone.layer3.1.conv1_(0, 256)_256":{ @@ -520,36 +94,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer3.1.bn1", - "start":0, - "end":256, - "is_output_channel":false - }, - { - "name":"backbone.layer3.1.conv2", - "start":0, - "end":256, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer3.1.conv1", - "start":0, - "end":256, - "is_output_channel":true - }, - { - "name":"backbone.layer3.1.bn1", - "start":0, - "end":256, - "is_output_channel":true - } - ] - }, "choice":0.6484375 }, "backbone.layer4.0.conv1_(0, 512)_512":{ @@ -560,36 +104,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.0.bn1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.conv2", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.0.conv1", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.bn1", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":0.69921875 }, "backbone.layer4.0.conv2_(0, 512)_512":{ @@ -600,90 +114,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.0.bn2", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.0.downsample.1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"bind_placeholder", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.conv1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.bn2", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"head.fc", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.0.conv2", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.bn2", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.downsample.0", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.0.downsample.1", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"bind_placeholder", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.conv2", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.bn2", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":0.69921875 }, "backbone.layer4.1.conv1_(0, 512)_512":{ @@ -694,36 +124,6 @@ "min_value":1, "min_ratio":0.9 }, - "channels":{ - "input_related":[ - { - "name":"backbone.layer4.1.bn1", - "start":0, - "end":512, - "is_output_channel":false - }, - { - "name":"backbone.layer4.1.conv2", - "start":0, - "end":512, - "is_output_channel":false - } - ], - "output_related":[ - { - "name":"backbone.layer4.1.conv1", - "start":0, - "end":512, - "is_output_channel":true - }, - { - "name":"backbone.layer4.1.bn1", - "start":0, - "end":512, - "is_output_channel":true - } - ] - }, "choice":0.69921875 } } diff --git a/tests/test_models/test_algorithms/test_dcff_network.py b/tests/test_models/test_algorithms/test_dcff_network.py index 397c0b3fa..657d7a09b 100644 --- a/tests/test_models/test_algorithms/test_dcff_network.py +++ b/tests/test_models/test_algorithms/test_dcff_network.py @@ -281,10 +281,7 @@ def test_export_subnet(self): self.assertEqual(algorithm.step_freq, epoch_step * iter_per_epoch) fix_subnet, static_model = export_fix_subnet( - algorithm, - export_subnet_mode='mutator', - slice_weight=True, - export_channel=True) + algorithm, export_subnet_mode='mutator', slice_weight=True) fix_subnet = json.dumps(fix_subnet, indent=4, separators=(',', ':')) subnet_name = 'subnet.json' weight_name = 'subnet_weight.pth' From 5dd0aa64dd9b0e1980fd3cdb3c04731d17e7ccf7 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Wed, 15 Feb 2023 17:52:34 +0800 Subject: [PATCH 37/59] fix bug for load dcffnet --- .../mutators/channel_mutator/channel_mutator.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/mmrazor/models/mutators/channel_mutator/channel_mutator.py b/mmrazor/models/mutators/channel_mutator/channel_mutator.py index 5aa987aac..856387b24 100644 --- a/mmrazor/models/mutators/channel_mutator/channel_mutator.py +++ b/mmrazor/models/mutators/channel_mutator/channel_mutator.py @@ -340,20 +340,21 @@ def _prepare_from_cfg(self, model, config: Dict): else: try: unit = self._prepare_unit_from_init_cfg( - model, unit_configs[unit_key]) + model, config[unit_key], unit_configs[unit_key]) except ValueError: raise ValueError( - 'Initializing channel_mutator from the config needs to' - 'include `channels` or `Analyzer` in the config.') + 'Initializing channel_mutator from the config needs' + 'to include `channels` or `Analyzer` in the config.') units.append(unit) return units - def _prepare_unit_from_init_cfg(self, model: Module, init_cfg: dict): + def _prepare_unit_from_init_cfg(self, model: Module, channel_cfg: dict, + init_cfg: dict): """Initialize units using the init_cfg, which created by tracer.""" unit = ChannelUnit.init_from_cfg(model, init_cfg) unit = self._convert_channel_unit_to_mutable([unit])[0] - if 'choice' in init_cfg: - unit.current_choice = init_cfg['choice'] + if 'choice' in channel_cfg: + unit.current_choice = channel_cfg['choice'] return unit def _prepare_from_predefined_model(self, model: Module): From 58df15fd57252fe8f3ad0e7826bdc315f52e36a3 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Thu, 16 Feb 2023 20:45:25 +0800 Subject: [PATCH 38/59] update nas config --- .DS_Store | Bin 0 -> 6148 bytes configs/.DS_Store | Bin 0 -> 6148 bytes configs/nas/.DS_Store | Bin 0 -> 6148 bytes configs/nas/mmcls/.DS_Store | Bin 0 -> 6148 bytes .../nas/mmcls/bignas/ATTENTIVE_SUBNET_A0.yaml | 72 ++++++++++-------- .../attentive_mobilenet_subnet_8xb256_in1k.py | 2 + .../darts/darts_subnet_1xb96_cifar10_2.0.py | 3 +- .../darts_subnet_1xb96_cifar10_2.0_mmrazor.py | 3 +- .../mmcls/dsnas/dsnas_subnet_8xb128_in1k.py | 24 ++++++ .../mmcls/dsnas/dsnas_supernet_8xb128_in1k.py | 1 + .../onceforall/OFA_SUBNET_NOTE8_LAT22.yaml | 24 ++++++ .../ofa_mobilenet_subnet_8xb256_in1k.py | 2 + .../spos/spos_mobilenet_subnet_8xb128_in1k.py | 2 + .../spos_mobilenet_supernet_8xb128_in1k.py | 1 + .../spos_shufflenet_subnet_8xb128_in1k.py | 2 + .../spos_shufflenet_supernet_8xb128_in1k.py | 1 + mmrazor/models/algorithms/nas/bignas.py | 1 + 17 files changed, 104 insertions(+), 34 deletions(-) create mode 100644 .DS_Store create mode 100644 configs/.DS_Store create mode 100644 configs/nas/.DS_Store create mode 100644 configs/nas/mmcls/.DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..3b598e6d3104ddb7da0ca3a2c18a7a4c51ca17fe GIT binary patch literal 6148 zcmeHKJ5EC}5S)cbM50NV(pTUHR+O9|7eFKmLW+1rME@$z#nG7k6hx17p$M9l)?=@C zYPK8g8u11@DTu9pzMaV&k|s<0$7tRhzd-j6^vH(F~sWL z4wk&GCR;GtMRWMjytCRA1Jh_1El6Nm9Sl@}3XBw(N8a1{zk`39|3@uMsQ?xDGX-?9 z-ERlHRGzILuV?i`R&CwjpkI#g_7i}_j^Y*E4g19wU`@6lDlq;CxC{(b;HL__03Z?( AZvX%Q literal 0 HcmV?d00001 diff --git a/configs/nas/.DS_Store b/configs/nas/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..feb5d9acc574de8a94b37ede29f5e7a336b2ddc3 GIT binary patch literal 6148 zcmeH~F%H5o3`K1y5)um|V-^m;4I)%dzy%QPSkwWr=jePpURsz@g}y82$4&z68|qp_ z^!(~iB6|_p!cFC9!^{-%Mjp628RXeNyWu{LgQaB^do6(DH>T^cOd$ahAOR8}0TNgc z0WadH|GNd<6K{nCNMIQR?ER47ra81!^;ZW5ZvmhU(r%c0F9A)KfacIr6&aXD_n>i9 zy$!LvABU#cmqSa{dR#P`4~>6T>tbM9kBcS*FwHI;NPq-p1lBd)Tm8SlzxDsKBMeD^ z1pbVGHtn@N>=!ix%U#lVgX=H zYyuI1X;6Vd)od{|=!lohtBFlu&_%QP(7ai*Ls7pS=NC^Gt$`e=02O#vpc~7P)&DvC zP5=K);))7Tfxl8fd$YxCiYH}lZ9UFvZGkW0mUDxfVeS+RUXFoYjD>lb| WO>6?4j=0l-{24G^XjI_a3fuwQ+!cWU literal 0 HcmV?d00001 diff --git a/configs/nas/mmcls/bignas/ATTENTIVE_SUBNET_A0.yaml b/configs/nas/mmcls/bignas/ATTENTIVE_SUBNET_A0.yaml index e926d4b03..972ea2148 100644 --- a/configs/nas/mmcls/bignas/ATTENTIVE_SUBNET_A0.yaml +++ b/configs/nas/mmcls/bignas/ATTENTIVE_SUBNET_A0.yaml @@ -1,64 +1,72 @@ backbone.first_channels: - chosen: 16 + chosen: 24 backbone.last_channels: chosen: 1792 -backbone.layers.1.kernel_size: - chosen: 3 -backbone.layers.1.expand_ratio: - chosen: 1 backbone.layers.1.depth: chosen: 1 +backbone.layers.1.expand_ratio: + chosen: 1 +backbone.layers.1.kernel_size: + chosen: 3 backbone.layers.1.out_channels: chosen: 16 -backbone.layers.2.kernel_size: +backbone.layers.2.depth: chosen: 3 backbone.layers.2.expand_ratio: chosen: 4 -backbone.layers.2.depth: +backbone.layers.2.kernel_size: chosen: 3 backbone.layers.2.out_channels: - chosen: 24 -backbone.layers.3.kernel_size: - chosen: 3 + chosen: 32 +backbone.layers.3.depth: + chosen: 4 backbone.layers.3.expand_ratio: + chosen: 5 +backbone.layers.3.expand_ratio_se: chosen: 4 -backbone.layers.3.depth: - chosen: 3 +backbone.layers.3.kernel_size: + chosen: 5 backbone.layers.3.out_channels: chosen: 32 -backbone.layers.4.kernel_size: - chosen: 3 -backbone.layers.4.expand_ratio: - chosen: 4 backbone.layers.4.depth: - chosen: 3 + chosen: 4 +backbone.layers.4.expand_ratio: + chosen: 6 +backbone.layers.4.kernel_size: + chosen: 5 backbone.layers.4.out_channels: - chosen: 64 -backbone.layers.5.kernel_size: + chosen: 72 +backbone.layers.5.depth: chosen: 3 backbone.layers.5.expand_ratio: - chosen: 4 -backbone.layers.5.depth: + chosen: 6 +backbone.layers.5.expand_ratio_se: + chosen: 6 +backbone.layers.5.kernel_size: chosen: 3 backbone.layers.5.out_channels: - chosen: 112 -backbone.layers.6.kernel_size: - chosen: 3 + chosen: 120 +backbone.layers.6.depth: + chosen: 7 backbone.layers.6.expand_ratio: chosen: 6 -backbone.layers.6.depth: +backbone.layers.6.expand_ratio_se: + chosen: 6 +backbone.layers.6.kernel_size: chosen: 3 backbone.layers.6.out_channels: chosen: 192 -backbone.layers.7.kernel_size: - chosen: 3 -backbone.layers.7.expand_ratio: - chosen: 6 backbone.layers.7.depth: chosen: 1 +backbone.layers.7.expand_ratio: + chosen: 6 +backbone.layers.7.expand_ratio_se: + chosen: 6 +backbone.layers.7.kernel_size: + chosen: 5 backbone.layers.7.out_channels: - chosen: 216 + chosen: 224 input_shape: chosen: - - 192 - - 192 + - 224 + - 224 diff --git a/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py b/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py index 2e2827cdd..f80f18a0f 100644 --- a/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py @@ -9,5 +9,7 @@ fix_subnet='configs/nas/mmcls/bignas/ATTENTIVE_SUBNET_A6.yaml') _base_.model = model_cfg +_base_.model_wrapper_cfg = None +find_unused_parameters = True test_cfg = dict(evaluate_fixed_subnet=True) diff --git a/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py b/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py index ecc3dde30..91c0a88e5 100644 --- a/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py +++ b/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py @@ -34,4 +34,5 @@ cfg=supernet, fix_subnet='configs/nas/mmcls/darts/DARTS_SUBNET_CIFAR_PAPER_ALIAS.yaml') -find_unused_parameter = False +_base_.model_wrapper_cfg = None +find_unused_parameters = True diff --git a/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0_mmrazor.py b/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0_mmrazor.py index b77179e43..a6a7d6c38 100644 --- a/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0_mmrazor.py +++ b/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0_mmrazor.py @@ -34,4 +34,5 @@ cfg=supernet, fix_subnet='configs/nas/mmcls/darts/DARTS_SUBNET_CIFAR_MMRAZOR_97.32.yaml') -find_unused_parameter = False +_base_.model_wrapper_cfg = None +find_unused_parameters = True diff --git a/configs/nas/mmcls/dsnas/dsnas_subnet_8xb128_in1k.py b/configs/nas/mmcls/dsnas/dsnas_subnet_8xb128_in1k.py index a338ecb69..249601445 100644 --- a/configs/nas/mmcls/dsnas/dsnas_subnet_8xb128_in1k.py +++ b/configs/nas/mmcls/dsnas/dsnas_subnet_8xb128_in1k.py @@ -3,6 +3,30 @@ # NOTE: Replace this with the mutable_cfg searched by yourself. supernet = _base_.model['architecture'] +paramwise_cfg = dict(norm_decay_mult=0.0, bias_decay_mult=0.0) +_base_.optim_wrapper = dict( + optimizer=dict( + type='SGD', lr=0.8, momentum=0.9, weight_decay=0.00004, nesterov=True), + paramwise_cfg=paramwise_cfg) + +epochs = 200 + +param_scheduler = [ + dict( + type='LinearLR', + end=5, + start_factor=0.2, + by_epoch=True, + convert_to_iter_based=True), + dict( + type='CosineAnnealingLR', + T_max=epochs, + begin=5, + end=epochs, + by_epoch=True, + convert_to_iter_based=True) +] + model_cfg = dict( _scope_='mmrazor', type='sub_model', diff --git a/configs/nas/mmcls/dsnas/dsnas_supernet_8xb128_in1k.py b/configs/nas/mmcls/dsnas/dsnas_supernet_8xb128_in1k.py index b341edfd9..519d4b2ad 100644 --- a/configs/nas/mmcls/dsnas/dsnas_supernet_8xb128_in1k.py +++ b/configs/nas/mmcls/dsnas/dsnas_supernet_8xb128_in1k.py @@ -8,6 +8,7 @@ model = dict( type='mmrazor.DSNAS', architecture=dict( + _scope_='mmcls', type='ImageClassifier', data_preprocessor=_base_.data_preprocessor, backbone=_base_.nas_backbone, diff --git a/configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT22.yaml b/configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT22.yaml index 0a8b47199..ed7bdc1fa 100644 --- a/configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT22.yaml +++ b/configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT22.yaml @@ -32,18 +32,26 @@ backbone.layers.2.out_channels: chosen: 24 backbone.layers.3.0.expand_ratio: chosen: 3 +backbone.layers.3.0.expand_ratio_se: + chosen: 4 backbone.layers.3.0.kernel_size: chosen: 5 backbone.layers.3.1.expand_ratio: chosen: 3 +backbone.layers.3.1.expand_ratio_se: + chosen: 3 backbone.layers.3.1.kernel_size: chosen: 5 backbone.layers.3.2.expand_ratio: chosen: 3 +backbone.layers.3.2.expand_ratio_se: + chosen: 3 backbone.layers.3.2.kernel_size: chosen: 3 backbone.layers.3.3.expand_ratio: chosen: 3 +backbone.layers.3.3.expand_ratio_se: + chosen: 3 backbone.layers.3.3.kernel_size: chosen: 3 backbone.layers.3.depth: @@ -72,18 +80,26 @@ backbone.layers.4.out_channels: chosen: 80 backbone.layers.5.0.expand_ratio: chosen: 3 +backbone.layers.5.0.expand_ratio_se: + chosen: 3 backbone.layers.5.0.kernel_size: chosen: 5 backbone.layers.5.1.expand_ratio: chosen: 4 +backbone.layers.5.1.expand_ratio_se: + chosen: 3 backbone.layers.5.1.kernel_size: chosen: 3 backbone.layers.5.2.expand_ratio: chosen: 3 +backbone.layers.5.2.expand_ratio_se: + chosen: 3 backbone.layers.5.2.kernel_size: chosen: 3 backbone.layers.5.3.expand_ratio: chosen: 3 +backbone.layers.5.3.expand_ratio_se: + chosen: 3 backbone.layers.5.3.kernel_size: chosen: 3 backbone.layers.5.depth: @@ -92,18 +108,26 @@ backbone.layers.5.out_channels: chosen: 112 backbone.layers.6.0.expand_ratio: chosen: 6 +backbone.layers.6.0.expand_ratio_se: + chosen: 6 backbone.layers.6.0.kernel_size: chosen: 3 backbone.layers.6.1.expand_ratio: chosen: 6 +backbone.layers.6.1.expand_ratio_se: + chosen: 6 backbone.layers.6.1.kernel_size: chosen: 7 backbone.layers.6.2.expand_ratio: chosen: 3 +backbone.layers.6.2.expand_ratio_se: + chosen: 6 backbone.layers.6.2.kernel_size: chosen: 3 backbone.layers.6.3.expand_ratio: chosen: 6 +backbone.layers.6.3.expand_ratio_se: + chosen: 6 backbone.layers.6.3.kernel_size: chosen: 3 backbone.layers.6.depth: diff --git a/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py b/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py index fdf6aa6af..a3082103e 100644 --- a/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py @@ -9,5 +9,7 @@ fix_subnet='configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT22.yaml') _base_.model = model_cfg +_base_.model_wrapper_cfg = None +find_unused_parameters = True test_cfg = dict(evaluate_fixed_subnet=True) diff --git a/configs/nas/mmcls/spos/spos_mobilenet_subnet_8xb128_in1k.py b/configs/nas/mmcls/spos/spos_mobilenet_subnet_8xb128_in1k.py index 37d9a767c..c47a562f8 100644 --- a/configs/nas/mmcls/spos/spos_mobilenet_subnet_8xb128_in1k.py +++ b/configs/nas/mmcls/spos/spos_mobilenet_subnet_8xb128_in1k.py @@ -11,4 +11,6 @@ _base_.model = model_cfg +_base_.model_wrapper_cfg = None + find_unused_parameters = False diff --git a/configs/nas/mmcls/spos/spos_mobilenet_supernet_8xb128_in1k.py b/configs/nas/mmcls/spos/spos_mobilenet_supernet_8xb128_in1k.py index 3d47d8f7f..eb38013af 100644 --- a/configs/nas/mmcls/spos/spos_mobilenet_supernet_8xb128_in1k.py +++ b/configs/nas/mmcls/spos/spos_mobilenet_supernet_8xb128_in1k.py @@ -6,6 +6,7 @@ # model supernet = dict( + _scope_='mmcls', type='ImageClassifier', # data_preprocessor=_base_.preprocess_cfg, backbone=_base_.nas_backbone, diff --git a/configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py b/configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py index 5556476e6..62c21f07a 100644 --- a/configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py +++ b/configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py @@ -11,4 +11,6 @@ _base_.model = model_cfg +_base_.model_wrapper_cfg = None + find_unused_parameters = False diff --git a/configs/nas/mmcls/spos/spos_shufflenet_supernet_8xb128_in1k.py b/configs/nas/mmcls/spos/spos_shufflenet_supernet_8xb128_in1k.py index a5d6ce726..869bcac4c 100644 --- a/configs/nas/mmcls/spos/spos_shufflenet_supernet_8xb128_in1k.py +++ b/configs/nas/mmcls/spos/spos_shufflenet_supernet_8xb128_in1k.py @@ -6,6 +6,7 @@ # model supernet = dict( + _scope_='mmcls', type='ImageClassifier', data_preprocessor=_base_.preprocess_cfg, backbone=_base_.nas_backbone, diff --git a/mmrazor/models/algorithms/nas/bignas.py b/mmrazor/models/algorithms/nas/bignas.py index 8b4e72004..bd3ec4e20 100644 --- a/mmrazor/models/algorithms/nas/bignas.py +++ b/mmrazor/models/algorithms/nas/bignas.py @@ -190,6 +190,7 @@ def __init__(self, if os.environ.get('LOCAL_RANK') is not None: device_ids = [int(os.environ['LOCAL_RANK'])] super().__init__(device_ids=device_ids, **kwargs) + self.device = 'cuda' if torch.cuda.is_available() else 'cpu' def train_step(self, data: List[dict], optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]: From 31b780a28cee1b1cf108f6793aca53c7a682809e Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Thu, 16 Feb 2023 20:46:51 +0800 Subject: [PATCH 39/59] update nas config --- .DS_Store | Bin 6148 -> 0 bytes configs/.DS_Store | Bin 6148 -> 0 bytes configs/nas/.DS_Store | Bin 6148 -> 0 bytes configs/nas/mmcls/.DS_Store | Bin 6148 -> 0 bytes 4 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .DS_Store delete mode 100644 configs/.DS_Store delete mode 100644 configs/nas/.DS_Store delete mode 100644 configs/nas/mmcls/.DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 3b598e6d3104ddb7da0ca3a2c18a7a4c51ca17fe..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKJ5EC}5S)cbM50NV(pTUHR+O9|7eFKmLW+1rME@$z#nG7k6hx17p$M9l)?=@C zYPK8g8u11@DTu9pzMaV&k|s<0$7tRhzd-j6^vH(F~sWL z4wk&GCR;GtMRWMjytCRA1Jh_1El6Nm9Sl@}3XBw(N8a1{zk`39|3@uMsQ?xDGX-?9 z-ERlHRGzILuV?i`R&CwjpkI#g_7i}_j^Y*E4g19wU`@6lDlq;CxC{(b;HL__03Z?( AZvX%Q diff --git a/configs/nas/.DS_Store b/configs/nas/.DS_Store deleted file mode 100644 index feb5d9acc574de8a94b37ede29f5e7a336b2ddc3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeH~F%H5o3`K1y5)um|V-^m;4I)%dzy%QPSkwWr=jePpURsz@g}y82$4&z68|qp_ z^!(~iB6|_p!cFC9!^{-%Mjp628RXeNyWu{LgQaB^do6(DH>T^cOd$ahAOR8}0TNgc z0WadH|GNd<6K{nCNMIQR?ER47ra81!^;ZW5ZvmhU(r%c0F9A)KfacIr6&aXD_n>i9 zy$!LvABU#cmqSa{dR#P`4~>6T>tbM9kBcS*FwHI;NPq-p1lBd)Tm8SlzxDsKBMeD^ z1pbVGHtn@N>=!ix%U#lVgX=H zYyuI1X;6Vd)od{|=!lohtBFlu&_%QP(7ai*Ls7pS=NC^Gt$`e=02O#vpc~7P)&DvC zP5=K);))7Tfxl8fd$YxCiYH}lZ9UFvZGkW0mUDxfVeS+RUXFoYjD>lb| WO>6?4j=0l-{24G^XjI_a3fuwQ+!cWU From 2b9c8a6e87c652d655830d9115a414158fa48a71 Mon Sep 17 00:00:00 2001 From: gaoyang07 <1546308416@qq.com> Date: Fri, 17 Feb 2023 09:23:33 +0800 Subject: [PATCH 40/59] fix api in evolution_search_loop --- mmrazor/engine/runner/evolution_search_loop.py | 4 ++-- tests/test_runners/test_evolution_search_loop.py | 16 +++++++++------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/mmrazor/engine/runner/evolution_search_loop.py b/mmrazor/engine/runner/evolution_search_loop.py index c1a73d4c3..644385e2f 100644 --- a/mmrazor/engine/runner/evolution_search_loop.py +++ b/mmrazor/engine/runner/evolution_search_loop.py @@ -197,7 +197,7 @@ def sample_candidates(self) -> None: init_candidates = len(self.candidates) if self.runner.rank == 0: while len(self.candidates) < self.num_candidates: - candidate = self.model.sample_subnet() + candidate = self.model.mutator.sample_choices() is_pass, result = self._check_constraints( random_subnet=candidate) if is_pass: @@ -285,7 +285,7 @@ def gen_crossover_candidates(self): def _mutation(self) -> SupportRandomSubnet: """Mutate with the specified mutate_prob.""" candidate1 = random.choice(self.top_k_candidates.subnets) - candidate2 = self.model.sample_subnet() + candidate2 = self.model.mutator.sample_choices() candidate = crossover(candidate1, candidate2, prob=self.mutate_prob) return candidate diff --git a/tests/test_runners/test_evolution_search_loop.py b/tests/test_runners/test_evolution_search_loop.py index 53d066658..1dc2cf958 100644 --- a/tests/test_runners/test_evolution_search_loop.py +++ b/tests/test_runners/test_evolution_search_loop.py @@ -140,7 +140,7 @@ def test_run_epoch(self, flops_params, mock_export_fix_subnet, self.runner.distributed = False self.runner.work_dir = self.temp_dir fake_subnet = {'1': 'choice1', '2': 'choice2'} - loop.model.sample_subnet = MagicMock(return_value=fake_subnet) + loop.model.mutator.sample_choices = MagicMock(return_value=fake_subnet) mock_export_fix_subnet.return_value = (fake_subnet, self.runner.model) load_status.return_value = True flops_params.return_value = 0, 0 @@ -155,7 +155,8 @@ def test_run_epoch(self, flops_params, mock_export_fix_subnet, self.runner.distributed = True self.runner.work_dir = self.temp_dir fake_subnet = {'1': 'choice1', '2': 'choice2'} - self.runner.model.sample_subnet = MagicMock(return_value=fake_subnet) + self.runner.model.mutator.sample_choices = MagicMock( + return_value=fake_subnet) loop.run_epoch() self.assertEqual(len(loop.candidates), 4) self.assertEqual(len(loop.top_k_candidates), 2) @@ -168,7 +169,7 @@ def test_run_epoch(self, flops_params, mock_export_fix_subnet, self.runner.distributed = True self.runner.work_dir = self.temp_dir fake_subnet = {'1': 'choice1', '2': 'choice2'} - loop.model.sample_subnet = MagicMock(return_value=fake_subnet) + loop.model.mutator.sample_choices = MagicMock(return_value=fake_subnet) flops_params.return_value = (50., 1) loop.run_epoch() self.assertEqual(len(loop.candidates), 4) @@ -297,7 +298,7 @@ def test_run_epoch(self, flops_params, mock_export_fix_subnet, self.runner.distributed = False self.runner.work_dir = self.temp_dir fake_subnet = {'1': 'choice1', '2': 'choice2'} - loop.model.sample_subnet = MagicMock(return_value=fake_subnet) + loop.model.mutator.sample_choices = MagicMock(return_value=fake_subnet) mock_export_fix_subnet.return_value = (fake_subnet, self.runner.model) load_status.return_value = True flops_params.return_value = 0, 0 @@ -312,7 +313,8 @@ def test_run_epoch(self, flops_params, mock_export_fix_subnet, self.runner.distributed = True self.runner.work_dir = self.temp_dir fake_subnet = {'1': 'choice1', '2': 'choice2'} - self.runner.model.sample_subnet = MagicMock(return_value=fake_subnet) + self.runner.model.mutator.sample_choices = MagicMock( + return_value=fake_subnet) loop.run_epoch() self.assertEqual(len(loop.candidates), 4) self.assertEqual(len(loop.top_k_candidates), 2) @@ -325,7 +327,7 @@ def test_run_epoch(self, flops_params, mock_export_fix_subnet, self.runner.distributed = True self.runner.work_dir = self.temp_dir fake_subnet = {'1': 'choice1', '2': 'choice2'} - loop.model.sample_subnet = MagicMock(return_value=fake_subnet) + loop.model.mutator.sample_choices = MagicMock(return_value=fake_subnet) flops_params.return_value = (50., 1) loop.run_epoch() self.assertEqual(len(loop.candidates), 4) @@ -349,7 +351,7 @@ def test_run_loop(self, mock_flops, mock_model2vector, loop._epoch = 1 fake_subnet = {'1': 'choice1', '2': 'choice2'} - loop.model.sample_subnet = MagicMock(return_value=fake_subnet) + loop.model.mutator.sample_choices = MagicMock(return_value=fake_subnet) mock_export_fix_subnet.return_value = (fake_subnet, self.runner.model) self.runner.work_dir = self.temp_dir From 8562854e304cb1c8512f9fbdaaf371d62c82da28 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Fri, 17 Feb 2023 10:04:08 +0800 Subject: [PATCH 41/59] update evolu_search_loop --- mmrazor/engine/runner/evolution_search_loop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mmrazor/engine/runner/evolution_search_loop.py b/mmrazor/engine/runner/evolution_search_loop.py index c1a73d4c3..ee97b5e77 100644 --- a/mmrazor/engine/runner/evolution_search_loop.py +++ b/mmrazor/engine/runner/evolution_search_loop.py @@ -197,7 +197,7 @@ def sample_candidates(self) -> None: init_candidates = len(self.candidates) if self.runner.rank == 0: while len(self.candidates) < self.num_candidates: - candidate = self.model.sample_subnet() + candidate = self.model.mutator.sample_choice() is_pass, result = self._check_constraints( random_subnet=candidate) if is_pass: @@ -285,7 +285,7 @@ def gen_crossover_candidates(self): def _mutation(self) -> SupportRandomSubnet: """Mutate with the specified mutate_prob.""" candidate1 = random.choice(self.top_k_candidates.subnets) - candidate2 = self.model.sample_subnet() + candidate2 = self.model.mutator.sample_choice() candidate = crossover(candidate1, candidate2, prob=self.mutate_prob) return candidate From a06b823600fdc48e01c313b8768850a09a2667f0 Mon Sep 17 00:00:00 2001 From: gaoyang07 <1546308416@qq.com> Date: Fri, 17 Feb 2023 10:20:22 +0800 Subject: [PATCH 42/59] fix metric_predictor --- .../predictor/metric_predictor.py | 36 ++++++++++++++----- .../test_predictors/test_metric_predictor.py | 8 ++--- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/mmrazor/models/task_modules/predictor/metric_predictor.py b/mmrazor/models/task_modules/predictor/metric_predictor.py index a05e2a1b4..796c3ac5b 100644 --- a/mmrazor/models/task_modules/predictor/metric_predictor.py +++ b/mmrazor/models/task_modules/predictor/metric_predictor.py @@ -96,6 +96,10 @@ def model2vector( vector_dict: Dict[str, list] = \ dict(normal_vector=[], onehot_vector=[]) + assert len(model.keys()) == len(self.search_groups.keys()), ( + f'Length mismatch for model({len(model.keys())}) and search_groups' + f'({len(self.search_groups.keys())}).') + for key, choice in model.items(): if isinstance(choice, DumpChosen): assert choice.meta is not None, ( @@ -105,8 +109,16 @@ def model2vector( len(choice.meta['all_choices']), dtype=np.int) _chosen_index = choice.meta['all_choices'].index(choice.chosen) else: - assert len(self.search_groups[index]) == 1 - choices = self.search_groups[index][0].choices + if key is not None: + from mmrazor.models.mutables import MutableChannelUnit + if isinstance(self.search_groups[key][0], + MutableChannelUnit): + choices = self.search_groups[key][0].candidate_choices + else: + choices = self.search_groups[key][0].choices + else: + assert len(self.search_groups[index]) == 1 + choices = self.search_groups[index][0].choices onehot = np.zeros(len(choices), dtype=np.int) _chosen_index = choices.index(choice) onehot[_chosen_index] = 1 @@ -126,18 +138,26 @@ def vector2model(self, vector: np.array) -> Dict[str, str]: Returns: Dict[str, str]: converted model. """ + from mmrazor.models.mutables import OneShotMutableChannelUnit + start = 0 model = {} - for key, value in self.search_groups.items(): + vector = np.squeeze(vector) + for name, mutables in self.search_groups.items(): + if isinstance(mutables[0], OneShotMutableChannelUnit): + choices = mutables[0].candidate_choices + else: + choices = mutables[0].choices + if self.encoding_type == 'onehot': - index = np.where(vector[start:start + - len(value[0].choices)] == 1)[0][0] - start += len(value) + index = np.where(vector[start:start + len(choices)] == 1)[0][0] + start += len(choices) else: index = vector[start] start += 1 - chosen = value[0].choices[int(index)] - model[key] = chosen + + chosen = choices[int(index)] if len(choices) > 1 else choices[0] + model[name] = chosen return model diff --git a/tests/test_models/test_task_modules/test_predictors/test_metric_predictor.py b/tests/test_models/test_task_modules/test_predictors/test_metric_predictor.py index d9293cbf2..5da4ab4d1 100644 --- a/tests/test_models/test_task_modules/test_predictors/test_metric_predictor.py +++ b/tests/test_models/test_task_modules/test_predictors/test_metric_predictor.py @@ -40,7 +40,7 @@ class TestMetricPredictorWithGP(TestCase): def setUp(self) -> None: self.temp_dir = tempfile.mkdtemp() - self.search_groups = {0: [MutableOP], 1: [MutableOP]} + self.search_groups = {0: [MutableOP]} self.candidates = [{0: 'conv1'}, {0: 'conv2'}, {0: 'conv3'}] predictor_cfg = dict( type='MetricPredictor', @@ -80,7 +80,7 @@ class TestMetricPredictorWithCart(TestCase): def setUp(self) -> None: self.temp_dir = tempfile.mkdtemp() - self.search_groups = {0: [MutableOP], 1: [MutableOP]} + self.search_groups = {0: [MutableOP]} self.candidates = [{0: 'conv1'}, {0: 'conv2'}, {0: 'conv3'}] predictor_cfg = dict( type='MetricPredictor', @@ -120,7 +120,7 @@ class TestMetricPredictorWithRBF(TestCase): def setUp(self) -> None: self.temp_dir = tempfile.mkdtemp() - self.search_groups = {0: [MutableOP], 1: [MutableOP]} + self.search_groups = {0: [MutableOP]} self.candidates = [{0: 'conv1'}, {0: 'conv2'}, {0: 'conv3'}] predictor_cfg = dict( type='MetricPredictor', @@ -160,7 +160,7 @@ class TestMetricPredictorWithMLP(TestCase): def setUp(self) -> None: self.temp_dir = tempfile.mkdtemp() - self.search_groups = {0: [MutableOP], 1: [MutableOP]} + self.search_groups = {0: [MutableOP]} self.candidates = [{0: 'conv1'}, {0: 'conv2'}, {0: 'conv3'}] predictor_cfg = dict( type='MetricPredictor', From 7a72392f929b64bb6647a6955ab5a9be0a9c4f02 Mon Sep 17 00:00:00 2001 From: XiaotongLu Date: Fri, 17 Feb 2023 18:22:38 +0800 Subject: [PATCH 43/59] update url --- configs/pruning/mmcls/dmcp/README.md | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/configs/pruning/mmcls/dmcp/README.md b/configs/pruning/mmcls/dmcp/README.md index 53328f538..d2c7aa7a4 100644 --- a/configs/pruning/mmcls/dmcp/README.md +++ b/configs/pruning/mmcls/dmcp/README.md @@ -18,7 +18,7 @@ GPUS=32 sh tools/slurm_train.sh $PARTITION $JOB_NAME \ #### with 2GFLOPs based on the output structure -#### 'DMCP_R50_2G.yaml'(SOURCECODE) +#### 'DMCP_R50_2G.json'(SOURCECODE) ```bash GPUS=32 sh tools/slurm_train.sh $PARTITION $JOB_NAME \ @@ -30,20 +30,21 @@ GPUS=32 sh tools/slurm_train.sh $PARTITION $JOB_NAME \ ### 1.Classification -| Dataset | Supernet | Flops(M) | Top-1 (%) | Top-5 (%) | config | Download | Remark | -| :------: | :---------: | :-------------: | :-------: | :-------: | :------------------------------------------: | :----------------------: | :-----------------------------: | -| ImageNet | ResNet50 | 4.09G(Supernet) | 77.46 | 93.55 | [config](./dmcp_resnet50_supernet_32xb64.py) | [model](<>) / [log](<>) | | -| ImageNet | ResNet50 | 2.07G(Subnet) | 76.11 | 93.01 | [config](./dmcp_resnet50_subnet_32xb64.py) | [model](<>) / [log](<>) | [arch\*](./DMCP_R50_2G.yaml) | -| ImageNet | ResNet50 | 1.05G(Subnet) | 74.12 | 92.33 | [config](./dmcp_resnet50_subnet_32xb64.py) | [model](<>) / [log](<>) | [arch](./DMCP_R50_1G.yaml) | -| ImageNet | MobilenetV2 | 319M(Supernet) | 72.30 | 90.42 | [config](./dmcp_resnet50_supernet_32xb64.py) | [model](<>) / [log](<>) | | -| ImageNet | MobilenetV2 | 209M(Subnet) | 71.94 | 90.05 | [config](./dmcp_mbv2_subnet_32xb64.py) | [model](<>) / [log](<>) | [arch](./DMCP_MBV2_200M.yaml) | -| ImageNet | MobilenetV2 | 102M(Subnet) | 67.22 | 88.61 | [config](./dmcp_mbv2_subnet_32xb64.py) | [model](<>) / [log](<>) | [arch\*](./DMCP_MBV2_100M.yaml) | +| Dataset | Supernet | Flops(M) | Top-1 (%) | Top-5 (%) | config | Download | Remark | +| :------: | :---------: | :-------------: | :-------: | :-------: | :----------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------: | +| ImageNet | ResNet50 | 4.09G(Supernet) | 77.46 | 93.55 | - | - | - | +| ImageNet | ResNet50 | 2.07G(Subnet) | 76.11 | 93.01 | [config](./dmcp_resnet50_subnet_32xb64.py) | [model](https://download.openmmlab.com/mmrazor/v1/pruning/dmcp/resnet50/2G/DMCP_R50_2G.pth) / [log](https://download.openmmlab.com/mmrazor/v1/pruning/dmcp/resnet50/2G/dmcp_resnet50_supernet_32xb64_target_flops_2g_20230129_112944.log) | [arch\*](./DMCP_R50_2G.json) | +| ImageNet | ResNet50 | 1.05G(Subnet) | 74.12 | 92.33 | [config](./dmcp_resnet50_subnet_32xb64.py) | [model](https://download.openmmlab.com/mmrazor/v1/pruning/dmcp/resnet50/1G/DMCP_R50_1G.pth) / [log](https://download.openmmlab.com/mmrazor/v1/pruning/dmcp/resnet50/1G/dmcp_resnet50_supernet_32xb64_target_flops_1g_20230107_223552.log) | [arch](https://download.openmmlab.com/mmrazor/v1/pruning/dmcp/resnet50/1G/DMCP_R50_1G.json) | +| ImageNet | MobilenetV2 | 319M(Supernet) | 72.30 | 90.42 | - | - | - | +| ImageNet | MobilenetV2 | 209M(Subnet) | 71.94 | 90.05 | [config](./dmcp_mbv2_subnet_32xb64.py) | [model](https://download.openmmlab.com/mmrazor/v1/pruning/dmcp/mobilenetv2/200M/DMCP_MBV2_200M.pth) / [log](https://download.openmmlab.com/mmrazor/v1/pruning/dmcp/mobilenetv2/200M/dmcp_mobilenetv2_supernet_32xb64_target_flops_200m_20230129_184919.log) | [arch](https://download.openmmlab.com/mmrazor/v1/pruning/dmcp/mobilenetv2/200M/DMCP_MBV2_200M.json) | +| ImageNet | MobilenetV2 | 102M(Subnet) | 67.22 | 88.61 | [config](./dmcp_mbv2_subnet_32xb64.py) | [model](https://download.openmmlab.com/mmrazor/v1/pruning/dmcp/mobilenetv2/100M/DMCP_MBV2_100M.pth) / [log](https://download.openmmlab.com/mmrazor/v1/pruning/dmcp/mobilenetv2/100M/dmcp_mobilenetv2_supernet_32xb64_target_flops_100m_20230129_184919.log) | [arch\*](./DMCP_MBV2_100M.json) | **Note** 1. Arch with * are converted from the [official repo](https://github.com/Zx55/dmcp). 2. To get the sub-network structure with different pruning rates, we support modifying `target_flops` in `model` in the supernet config, note that here it is in MFLOPs. For example, `target_flops=1000` means get subnet with 1GFLOPs. -3. More models with different pruning rates will be released later. +3. When outputting the sampled sub-network, the FLOPs will fluctuate around 5% around the target value for efficiency. +4. More models with different pruning rates will be released later. ## Citation From d239f1d5fb7b4366f5b904906e1ba6cc394ae199 Mon Sep 17 00:00:00 2001 From: sunyue1 Date: Thu, 23 Feb 2023 14:53:28 +0800 Subject: [PATCH 44/59] fix a0 fine_grained --- .../nas/mmcls/bignas/ATTENTIVE_SUBNET_A0.yaml | 72 +++++++++---------- .../backbones/searchable_mobilenet_v3.py | 3 +- .../architectures/utils/mutable_register.py | 17 +++-- 3 files changed, 44 insertions(+), 48 deletions(-) diff --git a/configs/nas/mmcls/bignas/ATTENTIVE_SUBNET_A0.yaml b/configs/nas/mmcls/bignas/ATTENTIVE_SUBNET_A0.yaml index 972ea2148..e926d4b03 100644 --- a/configs/nas/mmcls/bignas/ATTENTIVE_SUBNET_A0.yaml +++ b/configs/nas/mmcls/bignas/ATTENTIVE_SUBNET_A0.yaml @@ -1,72 +1,64 @@ backbone.first_channels: - chosen: 24 + chosen: 16 backbone.last_channels: chosen: 1792 -backbone.layers.1.depth: - chosen: 1 -backbone.layers.1.expand_ratio: - chosen: 1 backbone.layers.1.kernel_size: chosen: 3 +backbone.layers.1.expand_ratio: + chosen: 1 +backbone.layers.1.depth: + chosen: 1 backbone.layers.1.out_channels: chosen: 16 -backbone.layers.2.depth: +backbone.layers.2.kernel_size: chosen: 3 backbone.layers.2.expand_ratio: chosen: 4 -backbone.layers.2.kernel_size: +backbone.layers.2.depth: chosen: 3 backbone.layers.2.out_channels: - chosen: 32 -backbone.layers.3.depth: - chosen: 4 + chosen: 24 +backbone.layers.3.kernel_size: + chosen: 3 backbone.layers.3.expand_ratio: - chosen: 5 -backbone.layers.3.expand_ratio_se: chosen: 4 -backbone.layers.3.kernel_size: - chosen: 5 +backbone.layers.3.depth: + chosen: 3 backbone.layers.3.out_channels: chosen: 32 -backbone.layers.4.depth: - chosen: 4 -backbone.layers.4.expand_ratio: - chosen: 6 backbone.layers.4.kernel_size: - chosen: 5 + chosen: 3 +backbone.layers.4.expand_ratio: + chosen: 4 +backbone.layers.4.depth: + chosen: 3 backbone.layers.4.out_channels: - chosen: 72 -backbone.layers.5.depth: + chosen: 64 +backbone.layers.5.kernel_size: chosen: 3 backbone.layers.5.expand_ratio: - chosen: 6 -backbone.layers.5.expand_ratio_se: - chosen: 6 -backbone.layers.5.kernel_size: + chosen: 4 +backbone.layers.5.depth: chosen: 3 backbone.layers.5.out_channels: - chosen: 120 -backbone.layers.6.depth: - chosen: 7 + chosen: 112 +backbone.layers.6.kernel_size: + chosen: 3 backbone.layers.6.expand_ratio: chosen: 6 -backbone.layers.6.expand_ratio_se: - chosen: 6 -backbone.layers.6.kernel_size: +backbone.layers.6.depth: chosen: 3 backbone.layers.6.out_channels: chosen: 192 -backbone.layers.7.depth: - chosen: 1 +backbone.layers.7.kernel_size: + chosen: 3 backbone.layers.7.expand_ratio: chosen: 6 -backbone.layers.7.expand_ratio_se: - chosen: 6 -backbone.layers.7.kernel_size: - chosen: 5 +backbone.layers.7.depth: + chosen: 1 backbone.layers.7.out_channels: - chosen: 224 + chosen: 216 input_shape: chosen: - - 224 - - 224 + - 192 + - 192 diff --git a/mmrazor/models/architectures/backbones/searchable_mobilenet_v3.py b/mmrazor/models/architectures/backbones/searchable_mobilenet_v3.py index 5a6e15cbc..b5fe373d7 100644 --- a/mmrazor/models/architectures/backbones/searchable_mobilenet_v3.py +++ b/mmrazor/models/architectures/backbones/searchable_mobilenet_v3.py @@ -299,7 +299,8 @@ def register_mutables(self): mutate_mobilenet_layer(layer[k], mid_mutable, mutable_out_channels, mutable_expand_ratio, - mutable_kernel_size) + mutable_kernel_size, + self.fine_grained_mode) mid_mutable = mutable_out_channels self.last_mutable_channels = OneShotMutableChannel( diff --git a/mmrazor/models/architectures/utils/mutable_register.py b/mmrazor/models/architectures/utils/mutable_register.py index 256bf9ca5..c984a6af8 100644 --- a/mmrazor/models/architectures/utils/mutable_register.py +++ b/mmrazor/models/architectures/utils/mutable_register.py @@ -33,7 +33,7 @@ def mutate_conv_module( def mutate_mobilenet_layer(mb_layer: MBBlock, mutable_in_channels, mutable_out_channels, mutable_expand_ratio, - mutable_kernel_size): + mutable_kernel_size, fine_grained_mode): """Mutate MobileNet layers.""" mb_layer.derived_expand_channels = \ mutable_expand_ratio * mutable_in_channels @@ -51,12 +51,15 @@ def mutate_mobilenet_layer(mb_layer: MBBlock, mutable_in_channels, mutable_kernel_size=mutable_kernel_size) if mb_layer.with_se: - mutable_expand_ratio2 = copy.deepcopy(mutable_expand_ratio) - mutable_expand_ratio2.alias += '_se' - - derived_se_channels = mutable_expand_ratio2 * mutable_in_channels - mb_layer.derived_se_channels = \ - derived_se_channels.derive_divide_mutable(4, 8) + if fine_grained_mode: + mutable_expand_ratio2 = copy.deepcopy(mutable_expand_ratio) + mutable_expand_ratio2.alias += '_se' + derived_se_channels = mutable_expand_ratio2 * mutable_in_channels + mb_layer.derived_se_channels = \ + derived_se_channels.derive_divide_mutable(4, 8) + else: + mb_layer.derived_se_channels = \ + mb_layer.derived_expand_channels.derive_divide_mutable(4, 8) mutate_conv_module( mb_layer.se.conv1, From cc1acb15ece6291f6f2778368c7f26c6a97afceb Mon Sep 17 00:00:00 2001 From: sunyue1 Date: Tue, 28 Feb 2023 14:17:27 +0800 Subject: [PATCH 45/59] fix subnet export misskey --- .../attentive_mobilenet_search_8xb128_in1k.py | 2 +- .../attentive_mobilenet_subnet_8xb256_in1k.py | 4 ++++ mmrazor/engine/runner/evolution_search_loop.py | 2 +- mmrazor/structures/subnet/fix_subnet.py | 15 +++++++++++++-- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/configs/nas/mmcls/bignas/attentive_mobilenet_search_8xb128_in1k.py b/configs/nas/mmcls/bignas/attentive_mobilenet_search_8xb128_in1k.py index 8ee2f9578..9c42b0432 100644 --- a/configs/nas/mmcls/bignas/attentive_mobilenet_search_8xb128_in1k.py +++ b/configs/nas/mmcls/bignas/attentive_mobilenet_search_8xb128_in1k.py @@ -13,4 +13,4 @@ mutate_prob=0.1, calibrate_sample_num=4096, constraints_range=dict(flops=(0., 700.)), - score_key='accuracy/top1') + score_key='accuracy/top1') \ No newline at end of file diff --git a/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py b/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py index f80f18a0f..c59c5ac0f 100644 --- a/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py @@ -1,5 +1,7 @@ _base_ = 'attentive_mobilenet_supernet_32xb64_in1k.py' +_base_.supernet.data_preprocessor = _base_.data_preprocessor + supernet = _base_.supernet model_cfg = dict( @@ -13,3 +15,5 @@ find_unused_parameters = True test_cfg = dict(evaluate_fixed_subnet=True) + +default_hooks = dict(checkpoint=None) \ No newline at end of file diff --git a/mmrazor/engine/runner/evolution_search_loop.py b/mmrazor/engine/runner/evolution_search_loop.py index 644385e2f..beef57e9d 100644 --- a/mmrazor/engine/runner/evolution_search_loop.py +++ b/mmrazor/engine/runner/evolution_search_loop.py @@ -321,7 +321,7 @@ def _save_best_fix_subnet(self): model_name = f'subnet_{timestamp_subnet}.pth' save_path = osp.join(self.runner.work_dir, model_name) torch.save({ - 'state_dict': sliced_model.state_dict(), + 'state_dict': sliced_model, 'meta': {} }, save_path) self.runner.logger.info(f'Subnet checkpoint {model_name} saved in ' diff --git a/mmrazor/structures/subnet/fix_subnet.py b/mmrazor/structures/subnet/fix_subnet.py index 56b33be76..241f3c093 100644 --- a/mmrazor/structures/subnet/fix_subnet.py +++ b/mmrazor/structures/subnet/fix_subnet.py @@ -8,7 +8,7 @@ from mmrazor.registry import MODELS from mmrazor.utils import FixMutable, ValidFixMutable from mmrazor.utils.typing import DumpChosen - +from collections import OrderedDict def _dynamic_to_static(model: nn.Module) -> None: # Avoid circular import @@ -125,6 +125,7 @@ def export_fix_subnet( model: nn.Module, export_subnet_mode: str = 'mutable', slice_weight: bool = False, + remove_architecture: bool = True, export_channel: bool = False) -> Tuple[FixMutable, Optional[Dict]]: """Export subnet that can be loaded by :func:`load_fix_subnet`. Include subnet structure and subnet weight. @@ -135,6 +136,8 @@ def export_fix_subnet( Export by `mutable.dump_chosen()` when set to 'mutable' (NAS) Export by `mutator.config_template()` when set to 'mutator' (Prune) slice_weight (bool): Export subnet weight. Default to False. + remove_architecture (bool): Subnet weight key without 'architecture'. + Default to True. export_channel (bool): Whether to export the mutator's channel. Often required when finetune is needed for the exported subnet. Default to False. @@ -166,7 +169,15 @@ def export_fix_subnet( if next(copied_model.parameters()).is_cuda: copied_model.cuda() - return fix_subnet, copied_model + + if remove_architecture: + new_state_dict = OrderedDict() + for k, v in copied_model.state_dict().items(): + if k.startswith('architecture.'): + new_state_dict[k[13:]] = v + return fix_subnet, new_state_dict + + return fix_subnet, copied_model.state_dict() else: return fix_subnet, None From e0fabc341dbc8b6b8df35ae1106c6d7dfd1dd601 Mon Sep 17 00:00:00 2001 From: sunyue1 Date: Tue, 28 Feb 2023 14:58:34 +0800 Subject: [PATCH 46/59] fix ofa yaml --- .../onceforall/OFA_SUBNET_NOTE8_LAT22.yaml | 2 +- .../onceforall/OFA_SUBNET_NOTE8_LAT31.yaml | 32 +++++++++++++++++++ .../ofa_mobilenet_subnet_8xb256_in1k.py | 5 ++- 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT22.yaml b/configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT22.yaml index ed7bdc1fa..144342caa 100644 --- a/configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT22.yaml +++ b/configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT22.yaml @@ -87,7 +87,7 @@ backbone.layers.5.0.kernel_size: backbone.layers.5.1.expand_ratio: chosen: 4 backbone.layers.5.1.expand_ratio_se: - chosen: 3 + chosen: 4 backbone.layers.5.1.kernel_size: chosen: 3 backbone.layers.5.2.expand_ratio: diff --git a/configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT31.yaml b/configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT31.yaml index 39707b9ff..b8f752d9f 100644 --- a/configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT31.yaml +++ b/configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT31.yaml @@ -32,18 +32,26 @@ backbone.layers.2.out_channels: chosen: 24 backbone.layers.3.0.expand_ratio: chosen: 4 +backbone.layers.3.0.expand_ratio_se: + chosen: 4 backbone.layers.3.0.kernel_size: chosen: 5 backbone.layers.3.1.expand_ratio: chosen: 3 +backbone.layers.3.1.expand_ratio_se: + chosen: 3 backbone.layers.3.1.kernel_size: chosen: 5 backbone.layers.3.2.expand_ratio: chosen: 3 +backbone.layers.3.2.expand_ratio_se: + chosen: 3 backbone.layers.3.2.kernel_size: chosen: 3 backbone.layers.3.3.expand_ratio: chosen: 3 +backbone.layers.3.3.expand_ratio_se: + chosen: 3 backbone.layers.3.3.kernel_size: chosen: 3 backbone.layers.3.depth: @@ -52,18 +60,26 @@ backbone.layers.3.out_channels: chosen: 40 backbone.layers.4.0.expand_ratio: chosen: 4 +backbone.layers.4.0.expand_ratio_se: + chosen: 4 backbone.layers.4.0.kernel_size: chosen: 3 backbone.layers.4.1.expand_ratio: chosen: 4 +backbone.layers.4.1.expand_ratio_se: + chosen: 4 backbone.layers.4.1.kernel_size: chosen: 3 backbone.layers.4.2.expand_ratio: chosen: 4 +backbone.layers.4.2.expand_ratio_se: + chosen: 4 backbone.layers.4.2.kernel_size: chosen: 5 backbone.layers.4.3.expand_ratio: chosen: 4 +backbone.layers.4.3.expand_ratio_se: + chosen: 4 backbone.layers.4.3.kernel_size: chosen: 3 backbone.layers.4.depth: @@ -72,18 +88,26 @@ backbone.layers.4.out_channels: chosen: 80 backbone.layers.5.0.expand_ratio: chosen: 4 +backbone.layers.5.0.expand_ratio_se: + chosen: 4 backbone.layers.5.0.kernel_size: chosen: 3 backbone.layers.5.1.expand_ratio: chosen: 3 +backbone.layers.5.1.expand_ratio_se: + chosen: 3 backbone.layers.5.1.kernel_size: chosen: 5 backbone.layers.5.2.expand_ratio: chosen: 4 +backbone.layers.5.2.expand_ratio_se: + chosen: 4 backbone.layers.5.2.kernel_size: chosen: 7 backbone.layers.5.3.expand_ratio: chosen: 3 +backbone.layers.5.3.expand_ratio_se: + chosen: 3 backbone.layers.5.3.kernel_size: chosen: 3 backbone.layers.5.depth: @@ -92,18 +116,26 @@ backbone.layers.5.out_channels: chosen: 112 backbone.layers.6.0.expand_ratio: chosen: 6 +backbone.layers.6.0.expand_ratio_se: + chosen: 6 backbone.layers.6.0.kernel_size: chosen: 3 backbone.layers.6.1.expand_ratio: chosen: 3 +backbone.layers.6.1.expand_ratio_se: + chosen: 3 backbone.layers.6.1.kernel_size: chosen: 3 backbone.layers.6.2.expand_ratio: chosen: 3 +backbone.layers.6.2.expand_ratio_se: + chosen: 3 backbone.layers.6.2.kernel_size: chosen: 5 backbone.layers.6.3.expand_ratio: chosen: 3 +backbone.layers.6.3.expand_ratio_se: + chosen: 3 backbone.layers.6.3.kernel_size: chosen: 5 backbone.layers.6.depth: diff --git a/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py b/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py index a3082103e..000b689be 100644 --- a/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py @@ -1,12 +1,15 @@ _base_ = 'ofa_mobilenet_supernet_32xb64_in1k.py' +_base_.supernet.data_preprocessor = _base_.data_preprocessor + supernet = _base_.supernet model_cfg = dict( _scope_='mmrazor', type='sub_model', cfg=supernet, - fix_subnet='configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT22.yaml') + fix_subnet='configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT31.yaml') + # fix_subnet='configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT22.yaml') _base_.model = model_cfg _base_.model_wrapper_cfg = None From 5510cf857b59188f3c2c3805d2410d5c6268e4a2 Mon Sep 17 00:00:00 2001 From: aptsunny <36404164+aptsunny@users.noreply.github.com> Date: Tue, 28 Feb 2023 15:06:56 +0800 Subject: [PATCH 47/59] fix lint --- .../mmcls/bignas/attentive_mobilenet_search_8xb128_in1k.py | 2 +- .../mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py | 2 +- .../nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py | 2 +- mmrazor/engine/runner/evolution_search_loop.py | 5 +---- mmrazor/structures/subnet/fix_subnet.py | 5 +++-- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/configs/nas/mmcls/bignas/attentive_mobilenet_search_8xb128_in1k.py b/configs/nas/mmcls/bignas/attentive_mobilenet_search_8xb128_in1k.py index 9c42b0432..8ee2f9578 100644 --- a/configs/nas/mmcls/bignas/attentive_mobilenet_search_8xb128_in1k.py +++ b/configs/nas/mmcls/bignas/attentive_mobilenet_search_8xb128_in1k.py @@ -13,4 +13,4 @@ mutate_prob=0.1, calibrate_sample_num=4096, constraints_range=dict(flops=(0., 700.)), - score_key='accuracy/top1') \ No newline at end of file + score_key='accuracy/top1') diff --git a/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py b/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py index c59c5ac0f..01450a70f 100644 --- a/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py @@ -16,4 +16,4 @@ test_cfg = dict(evaluate_fixed_subnet=True) -default_hooks = dict(checkpoint=None) \ No newline at end of file +default_hooks = dict(checkpoint=None) diff --git a/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py b/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py index 000b689be..b34ac6a91 100644 --- a/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py @@ -9,7 +9,7 @@ type='sub_model', cfg=supernet, fix_subnet='configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT31.yaml') - # fix_subnet='configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT22.yaml') +# fix_subnet='configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT22.yaml') _base_.model = model_cfg _base_.model_wrapper_cfg = None diff --git a/mmrazor/engine/runner/evolution_search_loop.py b/mmrazor/engine/runner/evolution_search_loop.py index beef57e9d..2c2c9ea25 100644 --- a/mmrazor/engine/runner/evolution_search_loop.py +++ b/mmrazor/engine/runner/evolution_search_loop.py @@ -320,10 +320,7 @@ def _save_best_fix_subnet(self): timestamp_subnet = time.strftime('%Y%m%d_%H%M', time.localtime()) model_name = f'subnet_{timestamp_subnet}.pth' save_path = osp.join(self.runner.work_dir, model_name) - torch.save({ - 'state_dict': sliced_model, - 'meta': {} - }, save_path) + torch.save({'state_dict': sliced_model, 'meta': {}}, save_path) self.runner.logger.info(f'Subnet checkpoint {model_name} saved in ' f'{self.runner.work_dir}') diff --git a/mmrazor/structures/subnet/fix_subnet.py b/mmrazor/structures/subnet/fix_subnet.py index 241f3c093..4c81f6036 100644 --- a/mmrazor/structures/subnet/fix_subnet.py +++ b/mmrazor/structures/subnet/fix_subnet.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import copy +from collections import OrderedDict from typing import Dict, Optional, Tuple from mmengine import fileio @@ -8,7 +9,7 @@ from mmrazor.registry import MODELS from mmrazor.utils import FixMutable, ValidFixMutable from mmrazor.utils.typing import DumpChosen -from collections import OrderedDict + def _dynamic_to_static(model: nn.Module) -> None: # Avoid circular import @@ -136,7 +137,7 @@ def export_fix_subnet( Export by `mutable.dump_chosen()` when set to 'mutable' (NAS) Export by `mutator.config_template()` when set to 'mutator' (Prune) slice_weight (bool): Export subnet weight. Default to False. - remove_architecture (bool): Subnet weight key without 'architecture'. + remove_architecture (bool): Subnet weight key without 'architecture'. Default to True. export_channel (bool): Whether to export the mutator's channel. Often required when finetune is needed for the exported subnet. From 31a062e6e5b7c53f9780b550aea1d3f1dfbd4686 Mon Sep 17 00:00:00 2001 From: gaoyang07 <1546308416@qq.com> Date: Wed, 1 Mar 2023 15:30:18 +0800 Subject: [PATCH 48/59] fix comments --- .../autoformer_subnet_8xb256_in1k.py | 9 ++--- configs/nas/mmcls/bignas/README.md | 4 +- .../attentive_mobilenet_subnet_8xb256_in1k.py | 15 +++---- ...ttentive_mobilenet_supernet_32xb64_in1k.py | 2 +- .../darts/darts_subnet_1xb96_cifar10_2.0.py | 3 +- .../darts_subnet_1xb96_cifar10_2.0_mmrazor.py | 1 + .../mmcls/dsnas/dsnas_subnet_8xb128_in1k.py | 34 ++-------------- .../mmcls/dsnas/dsnas_supernet_8xb128_in1k.py | 40 +++++++++++-------- .../ofa_mobilenet_subnet_8xb256_in1k.py | 13 ++---- .../ofa_mobilenet_supernet_32xb64_in1k.py | 2 +- .../spos/spos_mobilenet_subnet_8xb128_in1k.py | 12 ++---- .../spos_shufflenet_subnet_8xb128_in1k.py | 12 ++---- .../detnas_frcnn_shufflenet_subnet_coco_1x.py | 10 ++--- .../detnas_shufflenet_subnet_8xb128_in1k.py | 10 ++--- mmrazor/engine/hooks/dump_subnet_hook.py | 2 +- .../engine/runner/evolution_search_loop.py | 5 ++- .../architectures/utils/mutable_register.py | 9 +++-- 17 files changed, 71 insertions(+), 112 deletions(-) diff --git a/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py b/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py index c0f56a728..0a3c6e246 100644 --- a/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py @@ -1,13 +1,10 @@ _base_ = 'autoformer_supernet_32xb256_in1k.py' -supernet = _base_.supernet - -model_cfg = dict( +_base_.model = dict( _scope_='mmrazor', type='sub_model', - cfg=supernet, + cfg=_base_.supernet, + # NOTE: You can replace the yaml with the mutable_cfg searched by yourself fix_subnet='STEP2_SUBNET_YAML.yaml') -_base_.model = model_cfg - test_cfg = dict(evaluate_fixed_subnet=True) diff --git a/configs/nas/mmcls/bignas/README.md b/configs/nas/mmcls/bignas/README.md index 4850251c2..1f71ba998 100644 --- a/configs/nas/mmcls/bignas/README.md +++ b/configs/nas/mmcls/bignas/README.md @@ -40,8 +40,8 @@ CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh \ | Dataset | Supernet | Subnet | Params(M) | Flops(G) | Top-1 | Config | Download | Remarks | | :------: | :------------------: | :-------------------------------------------------------------------------------------------------------------------------------: | :--------------------: | :------------------: | :---------------------: | :-------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------: | | ImageNet | AttentiveMobileNetV3 | [search space](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/_base_/nas_backbones/attentive_mobilenetv3_supernet.py) | 11.56(min) / 23.3(max) | 414(min) / 1944(max) | 76.88(min) / 81.42(max) | [config](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/nas/mmcls/bignas/attentive_mobilenet_supernet_32xb64_in1k.py) | [model\*](https://download.openmmlab.com/mmrazor/v0.1/nas/bignas/attentive_mobilenet_subnet_8xb256_in1k/attentive_mobilenet_supernet_32xb64_in1k_flops-2G_acc-81.72_20221229_200440-954772a3.pth) | [log](https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_supernet_32xb64_in1k_20221227_175800-bcf94eaa.json) (`sandwich rule`) | -| ImageNet | AttentiveMobileNetV3 | [AttentiveNAS-A0\*](https://download.openmmlab.com/mmrazor/v1/bignas/ATTENTIVE_SUBNET_A0.yaml) | 11.559 | 414 | 77.19 | [config](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py) | [model](https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_subnet_8xb256_in1k_flops-0.21G_acc-77.19_20221229_200440-282a1f70.pth) | Converted from the repo | -| ImageNet | AttentiveMobileNetV3 | [AttentiveNAS-A6\*](https://download.openmmlab.com/mmrazor/v1/bignas/ATTENTIVE_SUBNET_A6.yaml) | 16.476 | 1163 | 80.81 | [config](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py) | [model](https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_subnet_8xb256_in1k_flops-0.93G_acc-80.81_20221229_200440-73d92cc6.pth) | Converted from the repo | +| ImageNet | AttentiveMobileNetV3 | [AttentiveNAS-A0\*](https://download.openmmlab.com/mmrazor/v1/bignas/ATTENTIVE_SUBNET_A0.yaml) | 8.854 | 212 | 77.19 | [config](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py) | [model](https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_subnet_8xb256_in1k_flops-0.21G_acc-77.19_20221229_200440-282a1f70.pth) | Converted from the repo | +| ImageNet | AttentiveMobileNetV3 | [AttentiveNAS-A6\*](https://download.openmmlab.com/mmrazor/v1/bignas/ATTENTIVE_SUBNET_A6.yaml) | 15.594 | 927 | 80.81 | [config](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py) | [model](https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_subnet_8xb256_in1k_flops-0.93G_acc-80.81_20221229_200440-73d92cc6.pth) | Converted from the repo | *Models with * are converted from the [official repo](https://github.com/facebookresearch/AttentiveNAS). The config files of these models are only for inference. We support training the supernet by `sandwich rule`, which is different from `rejection sampling` in [official repo](https://github.com/facebookresearch/AttentiveNAS), and welcome you to contribute your reproduction results.* diff --git a/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py b/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py index 01450a70f..a8ebc41fb 100644 --- a/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py @@ -1,17 +1,14 @@ _base_ = 'attentive_mobilenet_supernet_32xb64_in1k.py' -_base_.supernet.data_preprocessor = _base_.data_preprocessor - -supernet = _base_.supernet - -model_cfg = dict( +model = dict( _scope_='mmrazor', type='sub_model', - cfg=supernet, - fix_subnet='configs/nas/mmcls/bignas/ATTENTIVE_SUBNET_A6.yaml') + cfg=_base_.supernet, + # NOTE: You can replace the yaml with the mutable_cfg searched by yourself + fix_subnet='configs/nas/mmcls/bignas/ATTENTIVE_SUBNET_A0.yaml', +) -_base_.model = model_cfg -_base_.model_wrapper_cfg = None +model_wrapper_cfg = None find_unused_parameters = True test_cfg = dict(evaluate_fixed_subnet=True) diff --git a/configs/nas/mmcls/bignas/attentive_mobilenet_supernet_32xb64_in1k.py b/configs/nas/mmcls/bignas/attentive_mobilenet_supernet_32xb64_in1k.py index 303fea924..2683b7e5b 100644 --- a/configs/nas/mmcls/bignas/attentive_mobilenet_supernet_32xb64_in1k.py +++ b/configs/nas/mmcls/bignas/attentive_mobilenet_supernet_32xb64_in1k.py @@ -7,6 +7,7 @@ supernet = dict( _scope_='mmrazor', type='SearchableImageClassifier', + data_preprocessor=_base_.data_preprocessor, backbone=_base_.nas_backbone, neck=dict(type='SqueezeMeanPoolingWithDropout', drop_ratio=0.2), head=dict( @@ -31,7 +32,6 @@ num_random_samples=2, backbone_dropout_stages=[6, 7], architecture=supernet, - data_preprocessor=_base_.data_preprocessor, distiller=dict( type='ConfigurableDistiller', teacher_recorders=dict( diff --git a/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py b/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py index 91c0a88e5..71303fc4d 100644 --- a/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py +++ b/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py @@ -32,7 +32,8 @@ model = dict( type='mmrazor.sub_model', cfg=supernet, + # NOTE: You can replace the yaml with the mutable_cfg searched by yourself fix_subnet='configs/nas/mmcls/darts/DARTS_SUBNET_CIFAR_PAPER_ALIAS.yaml') -_base_.model_wrapper_cfg = None +model_wrapper_cfg = None find_unused_parameters = True diff --git a/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0_mmrazor.py b/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0_mmrazor.py index a6a7d6c38..2085c2130 100644 --- a/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0_mmrazor.py +++ b/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0_mmrazor.py @@ -32,6 +32,7 @@ model = dict( type='mmrazor.sub_model', cfg=supernet, + # NOTE: You can replace the yaml with the mutable_cfg searched by yourself fix_subnet='configs/nas/mmcls/darts/DARTS_SUBNET_CIFAR_MMRAZOR_97.32.yaml') _base_.model_wrapper_cfg = None diff --git a/configs/nas/mmcls/dsnas/dsnas_subnet_8xb128_in1k.py b/configs/nas/mmcls/dsnas/dsnas_subnet_8xb128_in1k.py index 249601445..beafd5638 100644 --- a/configs/nas/mmcls/dsnas/dsnas_subnet_8xb128_in1k.py +++ b/configs/nas/mmcls/dsnas/dsnas_subnet_8xb128_in1k.py @@ -1,40 +1,12 @@ _base_ = ['./dsnas_supernet_8xb128_in1k.py'] -# NOTE: Replace this with the mutable_cfg searched by yourself. -supernet = _base_.model['architecture'] - -paramwise_cfg = dict(norm_decay_mult=0.0, bias_decay_mult=0.0) -_base_.optim_wrapper = dict( - optimizer=dict( - type='SGD', lr=0.8, momentum=0.9, weight_decay=0.00004, nesterov=True), - paramwise_cfg=paramwise_cfg) - -epochs = 200 - -param_scheduler = [ - dict( - type='LinearLR', - end=5, - start_factor=0.2, - by_epoch=True, - convert_to_iter_based=True), - dict( - type='CosineAnnealingLR', - T_max=epochs, - begin=5, - end=epochs, - by_epoch=True, - convert_to_iter_based=True) -] - -model_cfg = dict( +model = dict( _scope_='mmrazor', type='sub_model', - cfg=supernet, + cfg=_base_.supernet, + # NOTE: You can replace the yaml with the mutable_cfg searched by yourself fix_subnet= # noqa: E251 'configs/nas/mmcls/dsnas/DSNAS_SUBNET_IMAGENET_PAPER_ALIAS.yaml' ) # noqa: E501 -_base_.model = model_cfg - find_unused_parameters = False diff --git a/configs/nas/mmcls/dsnas/dsnas_supernet_8xb128_in1k.py b/configs/nas/mmcls/dsnas/dsnas_supernet_8xb128_in1k.py index 519d4b2ad..869dede5b 100644 --- a/configs/nas/mmcls/dsnas/dsnas_supernet_8xb128_in1k.py +++ b/configs/nas/mmcls/dsnas/dsnas_supernet_8xb128_in1k.py @@ -4,26 +4,32 @@ 'mmcls::_base_/default_runtime.py', ] +custom_hooks = [ + dict(type='mmrazor.DumpSubnetHook', interval=10, by_epoch=True) +] + +supernet = dict( + _scope_='mmcls', + type='ImageClassifier', + data_preprocessor=_base_.data_preprocessor, + backbone=_base_.nas_backbone, + neck=dict(type='GlobalAveragePooling'), + head=dict( + type='LinearClsHead', + num_classes=1000, + in_channels=1024, + loss=dict( + type='LabelSmoothLoss', + num_classes=1000, + label_smooth_val=0.1, + mode='original', + loss_weight=1.0), + topk=(1, 5))) + # model model = dict( type='mmrazor.DSNAS', - architecture=dict( - _scope_='mmcls', - type='ImageClassifier', - data_preprocessor=_base_.data_preprocessor, - backbone=_base_.nas_backbone, - neck=dict(type='GlobalAveragePooling'), - head=dict( - type='LinearClsHead', - num_classes=1000, - in_channels=1024, - loss=dict( - type='LabelSmoothLoss', - num_classes=1000, - label_smooth_val=0.1, - mode='original', - loss_weight=1.0), - topk=(1, 5))), + architecture=supernet, mutator=dict(type='mmrazor.NasMutator'), pretrain_epochs=15, finetune_epochs=_base_.search_epochs, diff --git a/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py b/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py index b34ac6a91..5586460ab 100644 --- a/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py @@ -1,18 +1,13 @@ _base_ = 'ofa_mobilenet_supernet_32xb64_in1k.py' -_base_.supernet.data_preprocessor = _base_.data_preprocessor - -supernet = _base_.supernet - -model_cfg = dict( +model = dict( _scope_='mmrazor', type='sub_model', - cfg=supernet, + cfg=_base_.supernet, + # NOTE: You can replace the yaml with the mutable_cfg searched by yourself fix_subnet='configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT31.yaml') -# fix_subnet='configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT22.yaml') -_base_.model = model_cfg -_base_.model_wrapper_cfg = None +model_wrapper_cfg = None find_unused_parameters = True test_cfg = dict(evaluate_fixed_subnet=True) diff --git a/configs/nas/mmcls/onceforall/ofa_mobilenet_supernet_32xb64_in1k.py b/configs/nas/mmcls/onceforall/ofa_mobilenet_supernet_32xb64_in1k.py index c2e0f05ab..341f4bda9 100644 --- a/configs/nas/mmcls/onceforall/ofa_mobilenet_supernet_32xb64_in1k.py +++ b/configs/nas/mmcls/onceforall/ofa_mobilenet_supernet_32xb64_in1k.py @@ -7,6 +7,7 @@ supernet = dict( _scope_='mmrazor', type='SearchableImageClassifier', + data_preprocessor=_base_.data_preprocessor, backbone=_base_.nas_backbone, neck=dict(type='mmcls.GlobalAveragePooling'), head=dict( @@ -30,7 +31,6 @@ drop_path_rate=0.2, backbone_dropout_stages=[6, 7], architecture=supernet, - data_preprocessor=_base_.data_preprocessor, distiller=dict( type='ConfigurableDistiller', teacher_recorders=dict( diff --git a/configs/nas/mmcls/spos/spos_mobilenet_subnet_8xb128_in1k.py b/configs/nas/mmcls/spos/spos_mobilenet_subnet_8xb128_in1k.py index c47a562f8..636505cdb 100644 --- a/configs/nas/mmcls/spos/spos_mobilenet_subnet_8xb128_in1k.py +++ b/configs/nas/mmcls/spos/spos_mobilenet_subnet_8xb128_in1k.py @@ -1,16 +1,12 @@ _base_ = ['./spos_mobilenet_supernet_8xb128_in1k.py'] -# FIXME: you may replace this with the mutable_cfg searched by yourself -supernet = _base_.supernet - -model_cfg = dict( +model = dict( _scope_='mmrazor', type='sub_model', - cfg=supernet, + cfg=_base_.supernet, + # NOTE: You can replace the yaml with the mutable_cfg searched by yourself fix_subnet='configs/nas/spos/AngleNAS_SHUFFLENETV2_IN1k_2.0.yaml') -_base_.model = model_cfg - -_base_.model_wrapper_cfg = None +model_wrapper_cfg = None find_unused_parameters = False diff --git a/configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py b/configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py index 62c21f07a..e27e8fa5e 100644 --- a/configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py +++ b/configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py @@ -1,16 +1,12 @@ _base_ = ['./spos_shufflenet_supernet_8xb128_in1k.py'] -# FIXME: you may replace this with the searched by yourself -supernet = _base_.supernet - -model_cfg = dict( +_base_.model = dict( _scope_='mmrazor', type='sub_model', - cfg=supernet, + cfg=_base_.supernet, + # NOTE: You can replace the yaml with the mutable_cfg searched by yourself fix_subnet='configs/nas/mmcls/spos/SPOS_SUBNET.yaml') -_base_.model = model_cfg - -_base_.model_wrapper_cfg = None +model_wrapper_cfg = None find_unused_parameters = False diff --git a/configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_subnet_coco_1x.py b/configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_subnet_coco_1x.py index 5c51903ce..dba60a5db 100644 --- a/configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_subnet_coco_1x.py +++ b/configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_subnet_coco_1x.py @@ -1,14 +1,10 @@ _base_ = ['./detnas_frcnn_shufflenet_supernet_coco_1x.py'] -# FIXME: you may replace this with the searched by yourself -supernet = _base_.supernet - -model_cfg = dict( +model = dict( _scope_='mmrazor', type='sub_model', - cfg=supernet, + cfg=_base_.supernet, + # NOTE: You can replace the yaml with the mutable_cfg searched by yourself fix_subnet='configs/nas/mmdet/detnas/DETNAS_SUBNET.yaml') -_base_.model = model_cfg - find_unused_parameters = False diff --git a/configs/nas/mmdet/detnas/detnas_shufflenet_subnet_8xb128_in1k.py b/configs/nas/mmdet/detnas/detnas_shufflenet_subnet_8xb128_in1k.py index ea3dad587..4129f1863 100644 --- a/configs/nas/mmdet/detnas/detnas_shufflenet_subnet_8xb128_in1k.py +++ b/configs/nas/mmdet/detnas/detnas_shufflenet_subnet_8xb128_in1k.py @@ -1,16 +1,12 @@ _base_ = './detnas_shufflenet_supernet_8xb128_in1k.py' -# FIXME: you may replace this with the mutable_cfg searched by yourself -supernet = _base_.supernet - -model_cfg = dict( +model = dict( _scope_='mmrazor', type='sub_model', - cfg=supernet, + cfg=_base_.supernet, + # NOTE: You can replace the yaml with the mutable_cfg searched by yourself fix_subnet= # noqa: E251 'https://download.openmmlab.com/mmrazor/v1/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20220715-61d2e900_subnet_cfg_v1.yaml' # noqa: E501 ) -_base_.model = model_cfg - find_unused_parameters = False diff --git a/mmrazor/engine/hooks/dump_subnet_hook.py b/mmrazor/engine/hooks/dump_subnet_hook.py index 9234ba79c..1aaeaba0f 100644 --- a/mmrazor/engine/hooks/dump_subnet_hook.py +++ b/mmrazor/engine/hooks/dump_subnet_hook.py @@ -122,7 +122,7 @@ def _save_subnet(self, runner) -> None: delattr(module, 'arch_weights') copied_model = copy.deepcopy(model) - copied_model.mutator.set_choices(copied_model.sample_choices()) + copied_model.mutator.set_choices(copied_model.mutator.sample_choices()) subnet_dict = export_fix_subnet(copied_model)[0] subnet_dict = convert_fix_subnet(subnet_dict) diff --git a/mmrazor/engine/runner/evolution_search_loop.py b/mmrazor/engine/runner/evolution_search_loop.py index 2c2c9ea25..644385e2f 100644 --- a/mmrazor/engine/runner/evolution_search_loop.py +++ b/mmrazor/engine/runner/evolution_search_loop.py @@ -320,7 +320,10 @@ def _save_best_fix_subnet(self): timestamp_subnet = time.strftime('%Y%m%d_%H%M', time.localtime()) model_name = f'subnet_{timestamp_subnet}.pth' save_path = osp.join(self.runner.work_dir, model_name) - torch.save({'state_dict': sliced_model, 'meta': {}}, save_path) + torch.save({ + 'state_dict': sliced_model.state_dict(), + 'meta': {} + }, save_path) self.runner.logger.info(f'Subnet checkpoint {model_name} saved in ' f'{self.runner.work_dir}') diff --git a/mmrazor/models/architectures/utils/mutable_register.py b/mmrazor/models/architectures/utils/mutable_register.py index c984a6af8..f3a916748 100644 --- a/mmrazor/models/architectures/utils/mutable_register.py +++ b/mmrazor/models/architectures/utils/mutable_register.py @@ -31,9 +31,12 @@ def mutate_conv_module( mutable_kernel_size) -def mutate_mobilenet_layer(mb_layer: MBBlock, mutable_in_channels, - mutable_out_channels, mutable_expand_ratio, - mutable_kernel_size, fine_grained_mode): +def mutate_mobilenet_layer(mb_layer: MBBlock, + mutable_in_channels, + mutable_out_channels, + mutable_expand_ratio, + mutable_kernel_size, + fine_grained_mode: bool = False): """Mutate MobileNet layers.""" mb_layer.derived_expand_channels = \ mutable_expand_ratio * mutable_in_channels From 450c885c26fe27ad17ce7a22fa2ebd4323e58edc Mon Sep 17 00:00:00 2001 From: sunyue1 Date: Wed, 1 Mar 2023 16:13:47 +0800 Subject: [PATCH 49/59] add autoformer cfg --- .../mmcls/autoformer/AUTOFORMER_SUBNET_B.yaml | 134 ++++++++++++++++++ .../autoformer_subnet_8xb256_in1k.py | 4 +- 2 files changed, 135 insertions(+), 3 deletions(-) create mode 100644 configs/nas/mmcls/autoformer/AUTOFORMER_SUBNET_B.yaml diff --git a/configs/nas/mmcls/autoformer/AUTOFORMER_SUBNET_B.yaml b/configs/nas/mmcls/autoformer/AUTOFORMER_SUBNET_B.yaml new file mode 100644 index 000000000..49173dbdd --- /dev/null +++ b/configs/nas/mmcls/autoformer/AUTOFORMER_SUBNET_B.yaml @@ -0,0 +1,134 @@ +architecture.backbone.base_embed_dims: + chosen: 64 +architecture.backbone.blocks.0.attn.mutable_attrs.num_heads: + chosen: 10 +architecture.backbone.blocks.0.middle_channels: + chosen: 3.5 +architecture.backbone.blocks.0.mutable_mlp_ratios: + chosen: 3.5 +architecture.backbone.blocks.0.mutable_q_embed_dims: + chosen: 10 +architecture.backbone.blocks.1.attn.mutable_attrs.num_heads: + chosen: 10 +architecture.backbone.blocks.1.middle_channels: + chosen: 3.5 +architecture.backbone.blocks.1.mutable_mlp_ratios: + chosen: 3.5 +architecture.backbone.blocks.1.mutable_q_embed_dims: + chosen: 64 +architecture.backbone.blocks.10.attn.mutable_attrs.num_heads: + chosen: 10 +architecture.backbone.blocks.10.middle_channels: + chosen: 4.0 +architecture.backbone.blocks.10.mutable_mlp_ratios: + chosen: 4.0 +architecture.backbone.blocks.10.mutable_q_embed_dims: + chosen: 64 +architecture.backbone.blocks.11.attn.mutable_attrs.num_heads: + chosen: 10 +architecture.backbone.blocks.11.middle_channels: + chosen: 576 +architecture.backbone.blocks.11.mutable_mlp_ratios: + chosen: 4.0 +architecture.backbone.blocks.11.mutable_q_embed_dims: + chosen: 10 +architecture.backbone.blocks.12.attn.mutable_attrs.num_heads: + chosen: 9 +architecture.backbone.blocks.12.middle_channels: + chosen: 4.0 +architecture.backbone.blocks.12.mutable_mlp_ratios: + chosen: 4.0 +architecture.backbone.blocks.12.mutable_q_embed_dims: + chosen: 9 +architecture.backbone.blocks.13.attn.mutable_attrs.num_heads: + chosen: 10 +architecture.backbone.blocks.13.middle_channels: + chosen: 4.0 +architecture.backbone.blocks.13.mutable_mlp_ratios: + chosen: 4.0 +architecture.backbone.blocks.13.mutable_q_embed_dims: + chosen: 10 +architecture.backbone.blocks.14.attn.mutable_attrs.num_heads: + chosen: 8 +architecture.backbone.blocks.14.middle_channels: + chosen: 576 +architecture.backbone.blocks.14.mutable_mlp_ratios: + chosen: 3.5 +architecture.backbone.blocks.14.mutable_q_embed_dims: + chosen: 8 +architecture.backbone.blocks.15.attn.mutable_attrs.num_heads: + chosen: 10 +architecture.backbone.blocks.15.middle_channels: + chosen: 3.0 +architecture.backbone.blocks.15.mutable_mlp_ratios: + chosen: 3.0 +architecture.backbone.blocks.15.mutable_q_embed_dims: + chosen: 10 +architecture.backbone.blocks.2.attn.mutable_attrs.num_heads: + chosen: 10 +architecture.backbone.blocks.2.middle_channels: + chosen: 576 +architecture.backbone.blocks.2.mutable_mlp_ratios: + chosen: 3.5 +architecture.backbone.blocks.2.mutable_q_embed_dims: + chosen: 10 +architecture.backbone.blocks.3.attn.mutable_attrs.num_heads: + chosen: 8 +architecture.backbone.blocks.3.middle_channels: + chosen: 4.0 +architecture.backbone.blocks.3.mutable_mlp_ratios: + chosen: 4.0 +architecture.backbone.blocks.3.mutable_q_embed_dims: + chosen: 8 +architecture.backbone.blocks.4.attn.mutable_attrs.num_heads: + chosen: 10 +architecture.backbone.blocks.4.middle_channels: + chosen: 576 +architecture.backbone.blocks.4.mutable_mlp_ratios: + chosen: 3.0 +architecture.backbone.blocks.4.mutable_q_embed_dims: + chosen: 10 +architecture.backbone.blocks.5.attn.mutable_attrs.num_heads: + chosen: 9 +architecture.backbone.blocks.5.middle_channels: + chosen: 3.0 +architecture.backbone.blocks.5.mutable_mlp_ratios: + chosen: 3.0 +architecture.backbone.blocks.5.mutable_q_embed_dims: + chosen: 9 +architecture.backbone.blocks.6.attn.mutable_attrs.num_heads: + chosen: 8 +architecture.backbone.blocks.6.middle_channels: + chosen: 576 +architecture.backbone.blocks.6.mutable_mlp_ratios: + chosen: 3.5 +architecture.backbone.blocks.6.mutable_q_embed_dims: + chosen: 8 +architecture.backbone.blocks.7.attn.mutable_attrs.num_heads: + chosen: 8 +architecture.backbone.blocks.7.middle_channels: + chosen: 3.5 +architecture.backbone.blocks.7.mutable_mlp_ratios: + chosen: 3.5 +architecture.backbone.blocks.7.mutable_q_embed_dims: + chosen: 8 +architecture.backbone.blocks.8.attn.mutable_attrs.num_heads: + chosen: 9 +architecture.backbone.blocks.8.middle_channels: + chosen: 576 +architecture.backbone.blocks.8.mutable_mlp_ratios: + chosen: 4.0 +architecture.backbone.blocks.8.mutable_q_embed_dims: + chosen: 9 +architecture.backbone.blocks.9.attn.mutable_attrs.num_heads: + chosen: 8 +architecture.backbone.blocks.9.middle_channels: + chosen: 576 +architecture.backbone.blocks.9.mutable_mlp_ratios: + chosen: 4.0 +architecture.backbone.blocks.9.mutable_q_embed_dims: + chosen: 8 +architecture.backbone.mutable_depth: + chosen: 14 +architecture.backbone.mutable_embed_dims: + chosen: 576 diff --git a/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py b/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py index c0f56a728..348af90f2 100644 --- a/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py @@ -6,8 +6,6 @@ _scope_='mmrazor', type='sub_model', cfg=supernet, - fix_subnet='STEP2_SUBNET_YAML.yaml') + fix_subnet='configs/nas/mmcls/autoformer/AUTOFORMER_SUBNET_B.yaml') _base_.model = model_cfg - -test_cfg = dict(evaluate_fixed_subnet=True) From 0efa384c685141f012422b6f37dfb0b2c56ad659 Mon Sep 17 00:00:00 2001 From: sunyue1 Date: Wed, 1 Mar 2023 16:15:51 +0800 Subject: [PATCH 50/59] update readme --- configs/nas/mmcls/autoformer/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/nas/mmcls/autoformer/README.md b/configs/nas/mmcls/autoformer/README.md index c24cb0ac0..7ebd1db66 100644 --- a/configs/nas/mmcls/autoformer/README.md +++ b/configs/nas/mmcls/autoformer/README.md @@ -51,7 +51,7 @@ CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh \ | Dataset | Supernet | Subnet | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download | Remarks | | :------: | :------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------: | :------: | :-------: | :-------: | :---------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :--------------: | -| ImageNet | vit | [mutable](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmrazor/v0.1/nas/spos/spos_shufflenetv2_subnet_8xb128_in1k/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-454627be_mutable_cfg.yaml?versionId=CAEQHxiBgICw5b6I7xciIGY5MjVmNWFhY2U5MjQzN2M4NDViYzI2YWRmYWE1YzQx) | 52.472 | 10.2 | 82.48 | 95.99 | [config](./autoformer_supernet_32xb256_in1k.py) | [model](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmrazor/x.pth) \| [log](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmrazor/v0.1/nas/spos/x.log.json) | MMRazor searched | +| ImageNet | vit | [mutable](./configs/nas/mmcls/autoformer/AUTOFORMER_SUBNET_B.yaml) | 54.319 | 10.57 | 82.47 | 95.99 | [config](./autoformer_supernet_32xb256_in1k.py) | [model](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmrazor/x.pth) \| [log](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmrazor/v0.1/nas/spos/x.log.json) | MMRazor searched | **Note**: From b950750edda0a02e269ee15a6c327398d2d2eefd Mon Sep 17 00:00:00 2001 From: aptsunny Date: Wed, 1 Mar 2023 17:29:31 +0800 Subject: [PATCH 51/59] update supernet link --- configs/nas/mmcls/bignas/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/nas/mmcls/bignas/README.md b/configs/nas/mmcls/bignas/README.md index 1f71ba998..fe165cee4 100644 --- a/configs/nas/mmcls/bignas/README.md +++ b/configs/nas/mmcls/bignas/README.md @@ -39,7 +39,7 @@ CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh \ | Dataset | Supernet | Subnet | Params(M) | Flops(G) | Top-1 | Config | Download | Remarks | | :------: | :------------------: | :-------------------------------------------------------------------------------------------------------------------------------: | :--------------------: | :------------------: | :---------------------: | :-------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------: | -| ImageNet | AttentiveMobileNetV3 | [search space](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/_base_/nas_backbones/attentive_mobilenetv3_supernet.py) | 11.56(min) / 23.3(max) | 414(min) / 1944(max) | 76.88(min) / 81.42(max) | [config](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/nas/mmcls/bignas/attentive_mobilenet_supernet_32xb64_in1k.py) | [model\*](https://download.openmmlab.com/mmrazor/v0.1/nas/bignas/attentive_mobilenet_subnet_8xb256_in1k/attentive_mobilenet_supernet_32xb64_in1k_flops-2G_acc-81.72_20221229_200440-954772a3.pth) | [log](https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_supernet_32xb64_in1k_20221227_175800-bcf94eaa.json) (`sandwich rule`) | +| ImageNet | AttentiveMobileNetV3 | [search space](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/_base_/nas_backbones/attentive_mobilenetv3_supernet.py) | 11.56(min) / 23.3(max) | 414(min) / 1944(max) | 76.88(min) / 81.42(max) | [config](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/nas/mmcls/bignas/attentive_mobilenet_supernet_32xb64_in1k.py) | [model\*](https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_supernet_32xb64_in1k_flops-2G_acc-81.72_20221229_200440-954772a3.pth) | [log](https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_supernet_32xb64_in1k_20221227_175800-bcf94eaa.json) (`sandwich rule`) | | ImageNet | AttentiveMobileNetV3 | [AttentiveNAS-A0\*](https://download.openmmlab.com/mmrazor/v1/bignas/ATTENTIVE_SUBNET_A0.yaml) | 8.854 | 212 | 77.19 | [config](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py) | [model](https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_subnet_8xb256_in1k_flops-0.21G_acc-77.19_20221229_200440-282a1f70.pth) | Converted from the repo | | ImageNet | AttentiveMobileNetV3 | [AttentiveNAS-A6\*](https://download.openmmlab.com/mmrazor/v1/bignas/ATTENTIVE_SUBNET_A6.yaml) | 15.594 | 927 | 80.81 | [config](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py) | [model](https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_subnet_8xb256_in1k_flops-0.93G_acc-80.81_20221229_200440-73d92cc6.pth) | Converted from the repo | From e98846d27e30770f01938fc48b9d40e6a3fc83b8 Mon Sep 17 00:00:00 2001 From: gaoyang07 <1546308416@qq.com> Date: Wed, 1 Mar 2023 20:32:00 +0800 Subject: [PATCH 52/59] fix sub_model configs --- .../mmcls/autoformer/AUTOFORMER_SUBNET_B.yaml | 134 +++++++++--------- .../autoformer_subnet_8xb256_in1k.py | 18 ++- configs/nas/mmcls/bignas/README.md | 10 +- .../attentive_mobilenet_subnet_8xb256_in1k.py | 11 +- .../ofa_mobilenet_subnet_8xb256_in1k.py | 11 +- .../spos/spos_mobilenet_subnet_8xb128_in1k.py | 7 +- .../spos_shufflenet_subnet_8xb128_in1k.py | 7 +- .../detnas_frcnn_shufflenet_subnet_coco_1x.py | 7 +- .../pruning/group_fisher/counters.py | 2 +- .../counters/op_counters/__init__.py | 4 +- .../op_counters/dynamic_op_counters.py | 60 -------- mmrazor/registry/registry.py | 10 +- mmrazor/structures/subnet/fix_subnet.py | 14 +- 13 files changed, 132 insertions(+), 163 deletions(-) delete mode 100644 mmrazor/models/task_modules/estimators/counters/op_counters/dynamic_op_counters.py diff --git a/configs/nas/mmcls/autoformer/AUTOFORMER_SUBNET_B.yaml b/configs/nas/mmcls/autoformer/AUTOFORMER_SUBNET_B.yaml index 49173dbdd..e3672feaf 100644 --- a/configs/nas/mmcls/autoformer/AUTOFORMER_SUBNET_B.yaml +++ b/configs/nas/mmcls/autoformer/AUTOFORMER_SUBNET_B.yaml @@ -1,134 +1,134 @@ -architecture.backbone.base_embed_dims: +backbone.base_embed_dims: chosen: 64 -architecture.backbone.blocks.0.attn.mutable_attrs.num_heads: +backbone.blocks.0.attn.mutable_attrs.num_heads: chosen: 10 -architecture.backbone.blocks.0.middle_channels: +backbone.blocks.0.middle_channels: chosen: 3.5 -architecture.backbone.blocks.0.mutable_mlp_ratios: +backbone.blocks.0.mutable_mlp_ratios: chosen: 3.5 -architecture.backbone.blocks.0.mutable_q_embed_dims: +backbone.blocks.0.mutable_q_embed_dims: chosen: 10 -architecture.backbone.blocks.1.attn.mutable_attrs.num_heads: +backbone.blocks.1.attn.mutable_attrs.num_heads: chosen: 10 -architecture.backbone.blocks.1.middle_channels: +backbone.blocks.1.middle_channels: chosen: 3.5 -architecture.backbone.blocks.1.mutable_mlp_ratios: +backbone.blocks.1.mutable_mlp_ratios: chosen: 3.5 -architecture.backbone.blocks.1.mutable_q_embed_dims: +backbone.blocks.1.mutable_q_embed_dims: chosen: 64 -architecture.backbone.blocks.10.attn.mutable_attrs.num_heads: +backbone.blocks.10.attn.mutable_attrs.num_heads: chosen: 10 -architecture.backbone.blocks.10.middle_channels: +backbone.blocks.10.middle_channels: chosen: 4.0 -architecture.backbone.blocks.10.mutable_mlp_ratios: +backbone.blocks.10.mutable_mlp_ratios: chosen: 4.0 -architecture.backbone.blocks.10.mutable_q_embed_dims: +backbone.blocks.10.mutable_q_embed_dims: chosen: 64 -architecture.backbone.blocks.11.attn.mutable_attrs.num_heads: +backbone.blocks.11.attn.mutable_attrs.num_heads: chosen: 10 -architecture.backbone.blocks.11.middle_channels: +backbone.blocks.11.middle_channels: chosen: 576 -architecture.backbone.blocks.11.mutable_mlp_ratios: +backbone.blocks.11.mutable_mlp_ratios: chosen: 4.0 -architecture.backbone.blocks.11.mutable_q_embed_dims: +backbone.blocks.11.mutable_q_embed_dims: chosen: 10 -architecture.backbone.blocks.12.attn.mutable_attrs.num_heads: +backbone.blocks.12.attn.mutable_attrs.num_heads: chosen: 9 -architecture.backbone.blocks.12.middle_channels: +backbone.blocks.12.middle_channels: chosen: 4.0 -architecture.backbone.blocks.12.mutable_mlp_ratios: +backbone.blocks.12.mutable_mlp_ratios: chosen: 4.0 -architecture.backbone.blocks.12.mutable_q_embed_dims: +backbone.blocks.12.mutable_q_embed_dims: chosen: 9 -architecture.backbone.blocks.13.attn.mutable_attrs.num_heads: +backbone.blocks.13.attn.mutable_attrs.num_heads: chosen: 10 -architecture.backbone.blocks.13.middle_channels: +backbone.blocks.13.middle_channels: chosen: 4.0 -architecture.backbone.blocks.13.mutable_mlp_ratios: +backbone.blocks.13.mutable_mlp_ratios: chosen: 4.0 -architecture.backbone.blocks.13.mutable_q_embed_dims: +backbone.blocks.13.mutable_q_embed_dims: chosen: 10 -architecture.backbone.blocks.14.attn.mutable_attrs.num_heads: +backbone.blocks.14.attn.mutable_attrs.num_heads: chosen: 8 -architecture.backbone.blocks.14.middle_channels: +backbone.blocks.14.middle_channels: chosen: 576 -architecture.backbone.blocks.14.mutable_mlp_ratios: +backbone.blocks.14.mutable_mlp_ratios: chosen: 3.5 -architecture.backbone.blocks.14.mutable_q_embed_dims: +backbone.blocks.14.mutable_q_embed_dims: chosen: 8 -architecture.backbone.blocks.15.attn.mutable_attrs.num_heads: +backbone.blocks.15.attn.mutable_attrs.num_heads: chosen: 10 -architecture.backbone.blocks.15.middle_channels: +backbone.blocks.15.middle_channels: chosen: 3.0 -architecture.backbone.blocks.15.mutable_mlp_ratios: +backbone.blocks.15.mutable_mlp_ratios: chosen: 3.0 -architecture.backbone.blocks.15.mutable_q_embed_dims: +backbone.blocks.15.mutable_q_embed_dims: chosen: 10 -architecture.backbone.blocks.2.attn.mutable_attrs.num_heads: +backbone.blocks.2.attn.mutable_attrs.num_heads: chosen: 10 -architecture.backbone.blocks.2.middle_channels: +backbone.blocks.2.middle_channels: chosen: 576 -architecture.backbone.blocks.2.mutable_mlp_ratios: +backbone.blocks.2.mutable_mlp_ratios: chosen: 3.5 -architecture.backbone.blocks.2.mutable_q_embed_dims: +backbone.blocks.2.mutable_q_embed_dims: chosen: 10 -architecture.backbone.blocks.3.attn.mutable_attrs.num_heads: +backbone.blocks.3.attn.mutable_attrs.num_heads: chosen: 8 -architecture.backbone.blocks.3.middle_channels: +backbone.blocks.3.middle_channels: chosen: 4.0 -architecture.backbone.blocks.3.mutable_mlp_ratios: +backbone.blocks.3.mutable_mlp_ratios: chosen: 4.0 -architecture.backbone.blocks.3.mutable_q_embed_dims: +backbone.blocks.3.mutable_q_embed_dims: chosen: 8 -architecture.backbone.blocks.4.attn.mutable_attrs.num_heads: +backbone.blocks.4.attn.mutable_attrs.num_heads: chosen: 10 -architecture.backbone.blocks.4.middle_channels: +backbone.blocks.4.middle_channels: chosen: 576 -architecture.backbone.blocks.4.mutable_mlp_ratios: +backbone.blocks.4.mutable_mlp_ratios: chosen: 3.0 -architecture.backbone.blocks.4.mutable_q_embed_dims: +backbone.blocks.4.mutable_q_embed_dims: chosen: 10 -architecture.backbone.blocks.5.attn.mutable_attrs.num_heads: +backbone.blocks.5.attn.mutable_attrs.num_heads: chosen: 9 -architecture.backbone.blocks.5.middle_channels: +backbone.blocks.5.middle_channels: chosen: 3.0 -architecture.backbone.blocks.5.mutable_mlp_ratios: +backbone.blocks.5.mutable_mlp_ratios: chosen: 3.0 -architecture.backbone.blocks.5.mutable_q_embed_dims: +backbone.blocks.5.mutable_q_embed_dims: chosen: 9 -architecture.backbone.blocks.6.attn.mutable_attrs.num_heads: +backbone.blocks.6.attn.mutable_attrs.num_heads: chosen: 8 -architecture.backbone.blocks.6.middle_channels: +backbone.blocks.6.middle_channels: chosen: 576 -architecture.backbone.blocks.6.mutable_mlp_ratios: +backbone.blocks.6.mutable_mlp_ratios: chosen: 3.5 -architecture.backbone.blocks.6.mutable_q_embed_dims: +backbone.blocks.6.mutable_q_embed_dims: chosen: 8 -architecture.backbone.blocks.7.attn.mutable_attrs.num_heads: +backbone.blocks.7.attn.mutable_attrs.num_heads: chosen: 8 -architecture.backbone.blocks.7.middle_channels: +backbone.blocks.7.middle_channels: chosen: 3.5 -architecture.backbone.blocks.7.mutable_mlp_ratios: +backbone.blocks.7.mutable_mlp_ratios: chosen: 3.5 -architecture.backbone.blocks.7.mutable_q_embed_dims: +backbone.blocks.7.mutable_q_embed_dims: chosen: 8 -architecture.backbone.blocks.8.attn.mutable_attrs.num_heads: +backbone.blocks.8.attn.mutable_attrs.num_heads: chosen: 9 -architecture.backbone.blocks.8.middle_channels: +backbone.blocks.8.middle_channels: chosen: 576 -architecture.backbone.blocks.8.mutable_mlp_ratios: +backbone.blocks.8.mutable_mlp_ratios: chosen: 4.0 -architecture.backbone.blocks.8.mutable_q_embed_dims: +backbone.blocks.8.mutable_q_embed_dims: chosen: 9 -architecture.backbone.blocks.9.attn.mutable_attrs.num_heads: +backbone.blocks.9.attn.mutable_attrs.num_heads: chosen: 8 -architecture.backbone.blocks.9.middle_channels: +backbone.blocks.9.middle_channels: chosen: 576 -architecture.backbone.blocks.9.mutable_mlp_ratios: +backbone.blocks.9.mutable_mlp_ratios: chosen: 4.0 -architecture.backbone.blocks.9.mutable_q_embed_dims: +backbone.blocks.9.mutable_q_embed_dims: chosen: 8 -architecture.backbone.mutable_depth: +backbone.mutable_depth: chosen: 14 -architecture.backbone.mutable_embed_dims: +backbone.mutable_embed_dims: chosen: 576 diff --git a/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py b/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py index 2368f117c..b41320b22 100644 --- a/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py @@ -1,9 +1,17 @@ _base_ = 'autoformer_supernet_32xb256_in1k.py' -_base_.model = dict( +model = dict( _scope_='mmrazor', type='sub_model', - cfg=supernet, - fix_subnet='configs/nas/mmcls/autoformer/AUTOFORMER_SUBNET_B.yaml') - -_base_.model = model_cfg + cfg=_base_.supernet, + # NOTE: You can replace the yaml with the mutable_cfg searched by yourself + fix_subnet='configs/nas/mmcls/autoformer/AUTOFORMER_SUBNET_B.yaml', + # You can also load the checkpoint of supernet instead of the specific + # subnet by modifying the `checkpoint`(path) in the following `init_cfg` + # with `init_weight_from_supernet = True`. + init_weight_from_supernet=False, + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmrazor/x.pth', # noqa: E501 + prefix='architecture.')) diff --git a/configs/nas/mmcls/bignas/README.md b/configs/nas/mmcls/bignas/README.md index fe165cee4..6330faba2 100644 --- a/configs/nas/mmcls/bignas/README.md +++ b/configs/nas/mmcls/bignas/README.md @@ -37,11 +37,11 @@ CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh \ ## Results and models -| Dataset | Supernet | Subnet | Params(M) | Flops(G) | Top-1 | Config | Download | Remarks | -| :------: | :------------------: | :-------------------------------------------------------------------------------------------------------------------------------: | :--------------------: | :------------------: | :---------------------: | :-------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------: | -| ImageNet | AttentiveMobileNetV3 | [search space](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/_base_/nas_backbones/attentive_mobilenetv3_supernet.py) | 11.56(min) / 23.3(max) | 414(min) / 1944(max) | 76.88(min) / 81.42(max) | [config](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/nas/mmcls/bignas/attentive_mobilenet_supernet_32xb64_in1k.py) | [model\*](https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_supernet_32xb64_in1k_flops-2G_acc-81.72_20221229_200440-954772a3.pth) | [log](https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_supernet_32xb64_in1k_20221227_175800-bcf94eaa.json) (`sandwich rule`) | -| ImageNet | AttentiveMobileNetV3 | [AttentiveNAS-A0\*](https://download.openmmlab.com/mmrazor/v1/bignas/ATTENTIVE_SUBNET_A0.yaml) | 8.854 | 212 | 77.19 | [config](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py) | [model](https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_subnet_8xb256_in1k_flops-0.21G_acc-77.19_20221229_200440-282a1f70.pth) | Converted from the repo | -| ImageNet | AttentiveMobileNetV3 | [AttentiveNAS-A6\*](https://download.openmmlab.com/mmrazor/v1/bignas/ATTENTIVE_SUBNET_A6.yaml) | 15.594 | 927 | 80.81 | [config](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py) | [model](https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_subnet_8xb256_in1k_flops-0.93G_acc-80.81_20221229_200440-73d92cc6.pth) | Converted from the repo | +| Dataset | Supernet | Subnet | Params(M) | Flops(G) | Top-1 | Config | Download | Remarks | +| :------: | :------------------: | :-------------------------------------------------------------------------------------------------------------------------------: | :--------------------: | :------------------: | :---------------------: | :-------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------: | +| ImageNet | AttentiveMobileNetV3 | [search space](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/_base_/nas_backbones/attentive_mobilenetv3_supernet.py) | 8.854(min) / 23.3(max) | 212(min) / 1944(max) | 77.19(min) / 81.42(max) | [config](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/nas/mmcls/bignas/attentive_mobilenet_supernet_32xb64_in1k.py) | [model\*](https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_supernet_32xb64_in1k_flops-2G_acc-81.72_20221229_200440-954772a3.pth) | [log](https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_supernet_32xb64_in1k_20221227_175800-bcf94eaa.json) (`sandwich rule`) | +| ImageNet | AttentiveMobileNetV3 | [AttentiveNAS-A0\*](https://download.openmmlab.com/mmrazor/v1/bignas/ATTENTIVE_SUBNET_A0.yaml) | 8.854 | 212 | 77.19 | [config](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py) | [model](https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_subnet_8xb256_in1k_flops-0.21G_acc-77.19_20221229_200440-282a1f70.pth) | Converted from the repo | +| ImageNet | AttentiveMobileNetV3 | [AttentiveNAS-A6\*](https://download.openmmlab.com/mmrazor/v1/bignas/ATTENTIVE_SUBNET_A6.yaml) | 15.594 | 927 | 80.81 | [config](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py) | [model](https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_subnet_8xb256_in1k_flops-0.93G_acc-80.81_20221229_200440-73d92cc6.pth) | Converted from the repo | *Models with * are converted from the [official repo](https://github.com/facebookresearch/AttentiveNAS). The config files of these models are only for inference. We support training the supernet by `sandwich rule`, which is different from `rejection sampling` in [official repo](https://github.com/facebookresearch/AttentiveNAS), and welcome you to contribute your reproduction results.* diff --git a/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py b/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py index a8ebc41fb..daa3215f9 100644 --- a/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py @@ -6,8 +6,15 @@ cfg=_base_.supernet, # NOTE: You can replace the yaml with the mutable_cfg searched by yourself fix_subnet='configs/nas/mmcls/bignas/ATTENTIVE_SUBNET_A0.yaml', -) - + # You can load the checkpoint of supernet instead of the specific + # subnet by modifying the `checkpoint`(path) in the following `init_cfg` + # with `init_weight_from_supernet = True`. + init_weight_from_supernet=True, + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_supernet_32xb64_in1k_flops-2G_acc-81.72_20221229_200440-954772a3.pth', # noqa: E501 + prefix='architecture.')) model_wrapper_cfg = None find_unused_parameters = True diff --git a/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py b/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py index 5586460ab..9b033b3e3 100644 --- a/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py @@ -5,7 +5,16 @@ type='sub_model', cfg=_base_.supernet, # NOTE: You can replace the yaml with the mutable_cfg searched by yourself - fix_subnet='configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT31.yaml') + fix_subnet='configs/nas/mmcls/onceforall/OFA_SUBNET_NOTE8_LAT31.yaml', + # You can also load the checkpoint of supernet instead of the specific + # subnet by modifying the `checkpoint`(path) in the following `init_cfg` + # with `init_weight_from_supernet = True`. + init_weight_from_supernet=False, + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmrazor/v1/ofa/ofa_mobilenet_subnet_8xb256_in1k_note8_lat%4031ms_top1%4072.8_finetune%4025.py_20221214_0939-981a8b2a.pth', # noqa: E501 + prefix='architecture.')) model_wrapper_cfg = None find_unused_parameters = True diff --git a/configs/nas/mmcls/spos/spos_mobilenet_subnet_8xb128_in1k.py b/configs/nas/mmcls/spos/spos_mobilenet_subnet_8xb128_in1k.py index 636505cdb..a45d17c1b 100644 --- a/configs/nas/mmcls/spos/spos_mobilenet_subnet_8xb128_in1k.py +++ b/configs/nas/mmcls/spos/spos_mobilenet_subnet_8xb128_in1k.py @@ -5,7 +5,12 @@ type='sub_model', cfg=_base_.supernet, # NOTE: You can replace the yaml with the mutable_cfg searched by yourself - fix_subnet='configs/nas/spos/AngleNAS_SHUFFLENETV2_IN1k_2.0.yaml') + fix_subnet='configs/nas/spos/AngleNAS_SHUFFLENETV2_IN1k_2.0.yaml', + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmrazor/v1/spos/spos_mobilenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d_v3.pth', # noqa: E501 + prefix='architecture.')) model_wrapper_cfg = None diff --git a/configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py b/configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py index e27e8fa5e..7f671def4 100644 --- a/configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py +++ b/configs/nas/mmcls/spos/spos_shufflenet_subnet_8xb128_in1k.py @@ -5,7 +5,12 @@ type='sub_model', cfg=_base_.supernet, # NOTE: You can replace the yaml with the mutable_cfg searched by yourself - fix_subnet='configs/nas/mmcls/spos/SPOS_SUBNET.yaml') + fix_subnet='configs/nas/mmcls/spos/SPOS_SUBNET.yaml', + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmrazor/v1/spos/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d_v3.pth', # noqa: E501 + prefix='architecture.')) model_wrapper_cfg = None diff --git a/configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_subnet_coco_1x.py b/configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_subnet_coco_1x.py index dba60a5db..0da0388f1 100644 --- a/configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_subnet_coco_1x.py +++ b/configs/nas/mmdet/detnas/detnas_frcnn_shufflenet_subnet_coco_1x.py @@ -5,6 +5,11 @@ type='sub_model', cfg=_base_.supernet, # NOTE: You can replace the yaml with the mutable_cfg searched by yourself - fix_subnet='configs/nas/mmdet/detnas/DETNAS_SUBNET.yaml') + fix_subnet='configs/nas/mmdet/detnas/DETNAS_SUBNET.yaml', + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco_bbox_backbone_flops-0.34M_mAP-37.5_20220715-61d2e900_v1.pth', # noqa: E501 + prefix='architecture.')) find_unused_parameters = False diff --git a/mmrazor/implementations/pruning/group_fisher/counters.py b/mmrazor/implementations/pruning/group_fisher/counters.py index 6f41a0244..a8888e1dd 100644 --- a/mmrazor/implementations/pruning/group_fisher/counters.py +++ b/mmrazor/implementations/pruning/group_fisher/counters.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from mmrazor.models.task_modules.estimators.counters.op_counters.dynamic_op_counters import ( # noqa +from mmrazor.models.task_modules.estimators.counters import ( DynamicConv2dCounter, DynamicLinearCounter) from mmrazor.registry import TASK_UTILS diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/__init__.py b/mmrazor/models/task_modules/estimators/counters/op_counters/__init__.py index 53e19a709..5226a7047 100644 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/__init__.py +++ b/mmrazor/models/task_modules/estimators/counters/op_counters/__init__.py @@ -5,7 +5,7 @@ from .conv_layer_counter import (Conv1dCounter, Conv2dCounter, Conv3dCounter, DynamicConv2dCounter) from .deconv_layer_counter import ConvTranspose2dCounter -from .linear_layer_counter import LinearCounter +from .linear_layer_counter import DynamicLinearCounter, LinearCounter from .norm_layer_counter import (BatchNorm1dCounter, BatchNorm2dCounter, BatchNorm3dCounter, DMCPBatchNorm2dCounter, GroupNormCounter, InstanceNorm1dCounter, @@ -21,5 +21,5 @@ 'ConvTranspose2dCounter', 'UpsampleCounter', 'LinearCounter', 'GroupNormCounter', 'InstanceNorm1dCounter', 'InstanceNorm2dCounter', 'InstanceNorm3dCounter', 'LayerNormCounter', 'BaseCounter', - 'DMCPBatchNorm2dCounter', 'DynamicConv2dCounter' + 'DMCPBatchNorm2dCounter', 'DynamicConv2dCounter', 'DynamicLinearCounter' ] diff --git a/mmrazor/models/task_modules/estimators/counters/op_counters/dynamic_op_counters.py b/mmrazor/models/task_modules/estimators/counters/op_counters/dynamic_op_counters.py deleted file mode 100644 index 2a58a09ec..000000000 --- a/mmrazor/models/task_modules/estimators/counters/op_counters/dynamic_op_counters.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from typing import Tuple - -import numpy as np -import torch -import torch.nn as nn - -from mmrazor.registry import TASK_UTILS -from .conv_layer_counter import Conv2dCounter -from .linear_layer_counter import LinearCounter - - -@TASK_UTILS.register_module() -class DynamicConv2dCounter(Conv2dCounter): - """Flop counter for DynamicCon2d.""" - - @staticmethod - def add_count_hook(module: nn.Conv2d, input: Tuple[torch.Tensor], - output: torch.Tensor) -> None: - """Count the flops and params of a DynamicConv2d. - - Args: - module (nn.Conv2d): A Conv2d module. - input (Tuple[torch.Tensor]): Input of this module. - output (torch.Tensor): Output of this module. - """ - batch_size = input[0].shape[0] - output_dims = list(output.shape[2:]) - - kernel_dims = list(module.kernel_size) - - out_channels = module.mutable_attrs['out_channels'].activated_channels - in_channels = module.mutable_attrs['in_channels'].activated_channels - - groups = module.groups - - filters_per_channel = out_channels / groups - conv_per_position_flops = int( - np.prod(kernel_dims)) * in_channels * filters_per_channel - - active_elements_count = batch_size * int(np.prod(output_dims)) - - overall_conv_flops = conv_per_position_flops * active_elements_count - overall_params = conv_per_position_flops - - bias_flops = 0 - overall_params = conv_per_position_flops - if module.bias is not None: - bias_flops = out_channels * active_elements_count - overall_params += out_channels - - overall_flops = overall_conv_flops + bias_flops - - module.__flops__ += overall_flops - module.__params__ += int(overall_params) - - -@TASK_UTILS.register_module() -class DynamicLinearCounter(LinearCounter): - pass diff --git a/mmrazor/registry/registry.py b/mmrazor/registry/registry.py index 16aff7d63..7f915ee74 100644 --- a/mmrazor/registry/registry.py +++ b/mmrazor/registry/registry.py @@ -120,9 +120,11 @@ def sub_model(cfg, if init_cfg: # update init_cfg when init_cfg is valid. model.init_cfg = init_cfg + if init_weight_from_supernet: - # Supernet is modified after load_fix_subnet(), init weight here. + # init weights from supernet first before it turns into a sub model. model.init_weights() + from mmrazor.structures import load_fix_subnet load_fix_subnet( @@ -132,8 +134,8 @@ def sub_model(cfg, prefix=prefix, extra_prefix=extra_prefix) - if init_weight_from_supernet: - # Supernet is modified after load_fix_subnet(). - model.init_cfg = None + if not init_weight_from_supernet: + # init weights from the specific sub model. + model.init_weights() return model diff --git a/mmrazor/structures/subnet/fix_subnet.py b/mmrazor/structures/subnet/fix_subnet.py index 4c81f6036..56b33be76 100644 --- a/mmrazor/structures/subnet/fix_subnet.py +++ b/mmrazor/structures/subnet/fix_subnet.py @@ -1,6 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. import copy -from collections import OrderedDict from typing import Dict, Optional, Tuple from mmengine import fileio @@ -126,7 +125,6 @@ def export_fix_subnet( model: nn.Module, export_subnet_mode: str = 'mutable', slice_weight: bool = False, - remove_architecture: bool = True, export_channel: bool = False) -> Tuple[FixMutable, Optional[Dict]]: """Export subnet that can be loaded by :func:`load_fix_subnet`. Include subnet structure and subnet weight. @@ -137,8 +135,6 @@ def export_fix_subnet( Export by `mutable.dump_chosen()` when set to 'mutable' (NAS) Export by `mutator.config_template()` when set to 'mutator' (Prune) slice_weight (bool): Export subnet weight. Default to False. - remove_architecture (bool): Subnet weight key without 'architecture'. - Default to True. export_channel (bool): Whether to export the mutator's channel. Often required when finetune is needed for the exported subnet. Default to False. @@ -170,15 +166,7 @@ def export_fix_subnet( if next(copied_model.parameters()).is_cuda: copied_model.cuda() - - if remove_architecture: - new_state_dict = OrderedDict() - for k, v in copied_model.state_dict().items(): - if k.startswith('architecture.'): - new_state_dict[k[13:]] = v - return fix_subnet, new_state_dict - - return fix_subnet, copied_model.state_dict() + return fix_subnet, copied_model else: return fix_subnet, None From ca39542731f3e13175afc21ea829b9ae26fc62e2 Mon Sep 17 00:00:00 2001 From: sunyue1 Date: Thu, 2 Mar 2023 00:15:32 +0800 Subject: [PATCH 53/59] update subnet inference readme --- configs/nas/mmcls/autoformer/README.md | 6 ++++-- .../nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py | 2 +- configs/nas/mmcls/bignas/README.md | 2 +- configs/nas/mmcls/onceforall/README.md | 3 ++- .../dynamic_ops/bricks/dynamic_multi_head_attention.py | 1 - 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/configs/nas/mmcls/autoformer/README.md b/configs/nas/mmcls/autoformer/README.md index 7ebd1db66..d070bc0f5 100644 --- a/configs/nas/mmcls/autoformer/README.md +++ b/configs/nas/mmcls/autoformer/README.md @@ -44,14 +44,16 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ ```bash CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh \ configs/nas/mmcls/autoformer/autoformer_subnet_8xb128_in1k.py \ - $STEP2_CKPT 1 --work-dir $WORK_DIR + $STEP1_CKPT 1 --work-dir $WORK_DIR \ + --cfg-options model.init_cfg.checkpoint=$STEP1_CKPT model.init_weight_from_supernet=True + ``` ## Results and models | Dataset | Supernet | Subnet | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download | Remarks | | :------: | :------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------: | :------: | :-------: | :-------: | :---------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :--------------: | -| ImageNet | vit | [mutable](./configs/nas/mmcls/autoformer/AUTOFORMER_SUBNET_B.yaml) | 54.319 | 10.57 | 82.47 | 95.99 | [config](./autoformer_supernet_32xb256_in1k.py) | [model](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmrazor/x.pth) \| [log](https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmrazor/v0.1/nas/spos/x.log.json) | MMRazor searched | +| ImageNet | vit | [mutable](./configs/nas/mmcls/autoformer/AUTOFORMER_SUBNET_B.yaml) | 54.319 | 10.57 | 82.47 | 95.99 | [config](./autoformer_supernet_32xb256_in1k.py) | [model](https://download.openmmlab.com/mmrazor/v1/autoformer/autoformer_supernet_32xb256_in1k_20220919_110144-c658ce8f.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/autoformer/autoformer_supernet_32xb256_in1k_20220919_110144-c658ce8f.json) | MMRazor searched | **Note**: diff --git a/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py b/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py index b41320b22..7f4c5cab7 100644 --- a/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py @@ -13,5 +13,5 @@ init_cfg=dict( type='Pretrained', checkpoint= # noqa: E251 - 'https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmrazor/x.pth', # noqa: E501 + 'https://download.openmmlab.com/mmrazor/v1/.pth', # noqa: E501 prefix='architecture.')) diff --git a/configs/nas/mmcls/bignas/README.md b/configs/nas/mmcls/bignas/README.md index 6330faba2..e5f999899 100644 --- a/configs/nas/mmcls/bignas/README.md +++ b/configs/nas/mmcls/bignas/README.md @@ -32,7 +32,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh \ configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py \ $STEP2_CKPT 1 --work-dir $WORK_DIR \ - --cfg-options algorithm.mutable_cfg=$STEP2_SUBNET_YAML + --cfg-options model.init_cfg.checkpoint=$STEP2_CKPT model.init_weight_from_supernet=False ``` ## Results and models diff --git a/configs/nas/mmcls/onceforall/README.md b/configs/nas/mmcls/onceforall/README.md index 52fe4fa50..6df2c2e84 100644 --- a/configs/nas/mmcls/onceforall/README.md +++ b/configs/nas/mmcls/onceforall/README.md @@ -17,7 +17,8 @@ We product inference models which are published by official Once-For-All repo an ```bash CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh \ configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py \ - $OFA_CKPT 1 --work-dir $WORK_DIR + $OFA_CKPT 1 --work-dir $WORK_DIR \ + --cfg-options model.init_cfg.checkpoint=$OFA_CKPT model.init_weight_from_supernet=False ``` ## Results and models diff --git a/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_multi_head_attention.py b/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_multi_head_attention.py index b270f870e..8dcd6de3c 100644 --- a/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_multi_head_attention.py +++ b/mmrazor/models/architectures/dynamic_ops/bricks/dynamic_multi_head_attention.py @@ -201,7 +201,6 @@ def to_static_op(self) -> MultiheadAttention: embed_dims=embed_dims, num_heads=num_heads, input_dims=None, - attn_drop_rate=self.attn_drop_rate, relative_position=self.relative_position, max_relative_position=self.max_relative_position) From 64a0589c2736e67b37eca72723f845bed139f9b2 Mon Sep 17 00:00:00 2001 From: aptsunny <36404164+aptsunny@users.noreply.github.com> Date: Thu, 2 Mar 2023 00:19:10 +0800 Subject: [PATCH 54/59] fix lint --- configs/nas/mmcls/autoformer/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configs/nas/mmcls/autoformer/README.md b/configs/nas/mmcls/autoformer/README.md index d070bc0f5..36543e0fc 100644 --- a/configs/nas/mmcls/autoformer/README.md +++ b/configs/nas/mmcls/autoformer/README.md @@ -51,9 +51,9 @@ CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh \ ## Results and models -| Dataset | Supernet | Subnet | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download | Remarks | -| :------: | :------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------: | :------: | :-------: | :-------: | :---------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :--------------: | -| ImageNet | vit | [mutable](./configs/nas/mmcls/autoformer/AUTOFORMER_SUBNET_B.yaml) | 54.319 | 10.57 | 82.47 | 95.99 | [config](./autoformer_supernet_32xb256_in1k.py) | [model](https://download.openmmlab.com/mmrazor/v1/autoformer/autoformer_supernet_32xb256_in1k_20220919_110144-c658ce8f.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/autoformer/autoformer_supernet_32xb256_in1k_20220919_110144-c658ce8f.json) | MMRazor searched | +| Dataset | Supernet | Subnet | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download | Remarks | +| :------: | :------: | :----------------------------------------------------------------: | :-------: | :------: | :-------: | :-------: | :---------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :--------------: | +| ImageNet | vit | [mutable](./configs/nas/mmcls/autoformer/AUTOFORMER_SUBNET_B.yaml) | 54.319 | 10.57 | 82.47 | 95.99 | [config](./autoformer_supernet_32xb256_in1k.py) | [model](https://download.openmmlab.com/mmrazor/v1/autoformer/autoformer_supernet_32xb256_in1k_20220919_110144-c658ce8f.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/autoformer/autoformer_supernet_32xb256_in1k_20220919_110144-c658ce8f.json) | MMRazor searched | **Note**: From 7a45886dfd7ab25810c41cef915580c9567b3636 Mon Sep 17 00:00:00 2001 From: aptsunny <36404164+aptsunny@users.noreply.github.com> Date: Thu, 2 Mar 2023 09:29:42 +0800 Subject: [PATCH 55/59] fix lint --- mmrazor/engine/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mmrazor/engine/__init__.py b/mmrazor/engine/__init__.py index 847194955..da6cec34d 100644 --- a/mmrazor/engine/__init__.py +++ b/mmrazor/engine/__init__.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .hooks import DMCPSubnetHook, DumpSubnetHook, EstimateResourcesHook, StopDistillHook +from .hooks import (DMCPSubnetHook, DumpSubnetHook, EstimateResourcesHook, + StopDistillHook) from .optimizers import SeparateOptimWrapperConstructor from .runner import (AutoSlimGreedySearchLoop, DartsEpochBasedTrainLoop, DartsIterBasedTrainLoop, EvolutionSearchLoop, From 83cec1ccadc43f1860a5f150a4558905ce81a585 Mon Sep 17 00:00:00 2001 From: wang shiguang Date: Thu, 2 Mar 2023 15:15:12 +0800 Subject: [PATCH 56/59] Update autoformer_subnet_8xb256_in1k.py --- configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py b/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py index 7f4c5cab7..f4d53ae76 100644 --- a/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/autoformer/autoformer_subnet_8xb256_in1k.py @@ -13,5 +13,5 @@ init_cfg=dict( type='Pretrained', checkpoint= # noqa: E251 - 'https://download.openmmlab.com/mmrazor/v1/.pth', # noqa: E501 + 'https://download.openmmlab.com/mmrazor/v1/autoformer/autoformer_supernet_32xb256_in1k_20220919_110144-c658ce8f.pth', # noqa: E501 prefix='architecture.')) From 86b50fa2959d38d747bdec851ba165fb8b186d43 Mon Sep 17 00:00:00 2001 From: gaoyang07 <1546308416@qq.com> Date: Thu, 2 Mar 2023 16:39:06 +0800 Subject: [PATCH 57/59] update test.py to support args.checkpoint as none --- configs/nas/mmcls/autoformer/README.md | 2 +- configs/nas/mmcls/bignas/README.md | 2 +- configs/nas/mmcls/onceforall/README.md | 2 +- configs/nas/mmcls/spos/README.md | 4 ++-- configs/nas/mmdet/detnas/README.md | 9 +++++++++ tools/test.py | 8 +++++++- 6 files changed, 21 insertions(+), 6 deletions(-) diff --git a/configs/nas/mmcls/autoformer/README.md b/configs/nas/mmcls/autoformer/README.md index 36543e0fc..161e83a56 100644 --- a/configs/nas/mmcls/autoformer/README.md +++ b/configs/nas/mmcls/autoformer/README.md @@ -44,7 +44,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ ```bash CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh \ configs/nas/mmcls/autoformer/autoformer_subnet_8xb128_in1k.py \ - $STEP1_CKPT 1 --work-dir $WORK_DIR \ + none 1 --work-dir $WORK_DIR \ --cfg-options model.init_cfg.checkpoint=$STEP1_CKPT model.init_weight_from_supernet=True ``` diff --git a/configs/nas/mmcls/bignas/README.md b/configs/nas/mmcls/bignas/README.md index e5f999899..d6b117f11 100644 --- a/configs/nas/mmcls/bignas/README.md +++ b/configs/nas/mmcls/bignas/README.md @@ -31,7 +31,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ ```bash CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh \ configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py \ - $STEP2_CKPT 1 --work-dir $WORK_DIR \ + none 1 --work-dir $WORK_DIR \ --cfg-options model.init_cfg.checkpoint=$STEP2_CKPT model.init_weight_from_supernet=False ``` diff --git a/configs/nas/mmcls/onceforall/README.md b/configs/nas/mmcls/onceforall/README.md index 6df2c2e84..92270f708 100644 --- a/configs/nas/mmcls/onceforall/README.md +++ b/configs/nas/mmcls/onceforall/README.md @@ -17,7 +17,7 @@ We product inference models which are published by official Once-For-All repo an ```bash CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh \ configs/nas/mmcls/onceforall/ofa_mobilenet_subnet_8xb256_in1k.py \ - $OFA_CKPT 1 --work-dir $WORK_DIR \ + none 1 --work-dir $WORK_DIR \ --cfg-options model.init_cfg.checkpoint=$OFA_CKPT model.init_weight_from_supernet=False ``` diff --git a/configs/nas/mmcls/spos/README.md b/configs/nas/mmcls/spos/README.md index 09163a22e..5e5e5e8a6 100644 --- a/configs/nas/mmcls/spos/README.md +++ b/configs/nas/mmcls/spos/README.md @@ -42,8 +42,8 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ ```bash CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh \ configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k.py \ - $SEARCHED_CKPT 1 --work-dir $WORK_DIR \ - --cfg-options algorithm.mutable_cfg=$STEP2_SUBNET_YAML + none 1 --work-dir $WORK_DIR \ + --cfg-options model.init_cfg.checkpoint=$STEP3_CKPT ``` ## Results and models diff --git a/configs/nas/mmdet/detnas/README.md b/configs/nas/mmdet/detnas/README.md index 1a7585462..2e0feae2b 100644 --- a/configs/nas/mmdet/detnas/README.md +++ b/configs/nas/mmdet/detnas/README.md @@ -53,6 +53,15 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ --cfg-options algorithm.mutable_cfg=$STEP3_SUBNET_YAML load_from=$STEP4_CKPT # or modify the config directly ``` +### Step 6: Subnet inference on COCO + +```bash +CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh \ + configs/nas/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco.py \ + none 1 --work-dir $WORK_DIR \ + --cfg-options model.init_cfg.checkpoint=$STEP5_CKPT +``` + ## Results and models | Dataset | Supernet | Subnet | Params(M) | Flops(G) | mAP | Config | Download | Remarks | diff --git a/tools/test.py b/tools/test.py index 3b8597de6..fb6b00b86 100644 --- a/tools/test.py +++ b/tools/test.py @@ -59,7 +59,13 @@ def main(): cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0]) - cfg.load_from = args.checkpoint + if args.checkpoint == 'none': + # NOTE: In this case, `args.checkpoint` isn't specified. If you haven't + # specified a checkpoint in the `init_cfg` of the model yet, it may + # cause the invalid results. + cfg.load_from = None + else: + cfg.load_from = args.checkpoint # build the runner from config runner = Runner.from_cfg(cfg) From 573fa4268230f637f9ec95a8d35757f85fb3ba37 Mon Sep 17 00:00:00 2001 From: gaoyang07 <1546308416@qq.com> Date: Thu, 2 Mar 2023 17:24:57 +0800 Subject: [PATCH 58/59] update DARTS readme --- .../attentive_mobilenet_subnet_8xb256_in1k.py | 1 + configs/nas/mmcls/darts/README.md | 16 +++++++++++++--- .../darts/darts_subnet_1xb96_cifar10_2.0.py | 7 ++++++- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py b/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py index daa3215f9..6ce2cfec6 100644 --- a/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py +++ b/configs/nas/mmcls/bignas/attentive_mobilenet_subnet_8xb256_in1k.py @@ -15,6 +15,7 @@ checkpoint= # noqa: E251 'https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_supernet_32xb64_in1k_flops-2G_acc-81.72_20221229_200440-954772a3.pth', # noqa: E501 prefix='architecture.')) + model_wrapper_cfg = None find_unused_parameters = True diff --git a/configs/nas/mmcls/darts/README.md b/configs/nas/mmcls/darts/README.md index 3a2437034..5d3da3d8e 100644 --- a/configs/nas/mmcls/darts/README.md +++ b/configs/nas/mmcls/darts/README.md @@ -12,7 +12,7 @@ This paper addresses the scalability challenge of architecture search by formula ## Get Started -### Supernet training on Cifar-10 +### Step 1: Supernet training on Cifar-10 ```bash CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ @@ -20,12 +20,22 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ --work-dir $WORK_DIR ``` -## Subnet inference on Cifar-10 +## Step 2: Subnet retraining on Cifar-10 + +```bash +CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ + configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py 4 \ + --work-dir $WORK_DIR \ + --cfg-options model.init_cfg.checkpoint=$STEP2_CKPT +``` + +## Step 3: Subnet inference on Cifar-10 ```bash CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh \ configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py \ - $STEP1_CKPT 1 --work-dir $WORK_DIR + none 1 --work-dir $WORK_DIR \ + --cfg-options model.init_cfg.checkpoint=$STEP2_CKPT ``` ## Results and models diff --git a/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py b/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py index 71303fc4d..ab9ee6180 100644 --- a/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py +++ b/configs/nas/mmcls/darts/darts_subnet_1xb96_cifar10_2.0.py @@ -33,7 +33,12 @@ type='mmrazor.sub_model', cfg=supernet, # NOTE: You can replace the yaml with the mutable_cfg searched by yourself - fix_subnet='configs/nas/mmcls/darts/DARTS_SUBNET_CIFAR_PAPER_ALIAS.yaml') + fix_subnet='configs/nas/mmcls/darts/DARTS_SUBNET_CIFAR_PAPER_ALIAS.yaml', + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://openmmlab-share.oss-cn-hangzhou.aliyuncs.com/mmrazor/v1/darts/darts_subnetnet_1xb96_cifar10_acc-97.27_20211222-17e42600_latest.pth', # noqa: E501 + prefix='architecture.')) model_wrapper_cfg = None find_unused_parameters = True From 7d57599256bec3141721ad75a53cec3ff35fff84 Mon Sep 17 00:00:00 2001 From: gaoyang07 <1546308416@qq.com> Date: Thu, 2 Mar 2023 17:47:26 +0800 Subject: [PATCH 59/59] update readme --- configs/nas/mmcls/spos/README.md | 4 +++- configs/nas/mmdet/detnas/README.md | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/configs/nas/mmcls/spos/README.md b/configs/nas/mmcls/spos/README.md index 5e5e5e8a6..19e55b795 100644 --- a/configs/nas/mmcls/spos/README.md +++ b/configs/nas/mmcls/spos/README.md @@ -34,7 +34,9 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ ```bash CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ configs/nas/spos/spos_subnet_shufflenetv2_8xb128_in1k.py 4 \ - --work-dir $WORK_DIR --cfg-options algorithm.mutable_cfg=$STEP2_SUBNET_YAML # or modify the config directly + --work-dir $WORK_DIR \ + --cfg-options model.init_cfg.checkpoint=$STEP2_CKPT + ``` ## Step 4: Subnet inference on ImageNet diff --git a/configs/nas/mmdet/detnas/README.md b/configs/nas/mmdet/detnas/README.md index 2e0feae2b..2b4096a63 100644 --- a/configs/nas/mmdet/detnas/README.md +++ b/configs/nas/mmdet/detnas/README.md @@ -50,7 +50,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh \ configs/nas/detnas/detnas_subnet_frcnn_shufflenetv2_fpn_1x_coco.py 4 \ --work-dir $WORK_DIR \ - --cfg-options algorithm.mutable_cfg=$STEP3_SUBNET_YAML load_from=$STEP4_CKPT # or modify the config directly + --cfg-options --cfg-options model.init_cfg.checkpoint=$STEP4_CKPT ``` ### Step 6: Subnet inference on COCO