Feature/sg 1172 criterion params removal (#1519)

* added deprecation decorator and removed some refs * all refs removed * yolox ref removed * added tests * added docs * fixed test and updated factory for kdloss param * fixed yaml celoss ref * fixed unittest * fixed last unit test
Deci-AI · Oct 12, 2023 · 0b79e68 · 0b79e68
1 parent ecdec5e
commit 0b79e68
Show file tree

Hide file tree

Showing 63 changed files with 230 additions and 142 deletions.
diff --git a/documentation/source/Checkpoints.md b/documentation/source/Checkpoints.md
@@ -80,7 +80,7 @@ model = models.get(model_name=Models.RESNET18, num_classes=10)
 train_params = {
     ...
     "loss": "LabelSmoothingCrossEntropyLoss",
-    "criterion_params": {},
+
     "save_ckpt_epoch_list": [10,15]
     ...
 }

diff --git a/documentation/source/Example_Classification.md b/documentation/source/Example_Classification.md
@@ -308,7 +308,6 @@ Output (Training parameters):
     'ckpt_name': 'ckpt_latest.pth',
     'clip_grad_norm': None,
     'cosine_final_lr_ratio': 0.01,
-    'criterion_params': {},
     'dataset_statistics': False,
     'ema': False,
     'ema_params': {'decay': 0.9999, 'decay_type': 'exp', 'beta': 15},

diff --git a/documentation/source/LRScheduling.md b/documentation/source/LRScheduling.md
@@ -299,7 +299,7 @@ train_params = {
     "initial_lr": 0.1,
     "loss": torch.nn.CrossEntropyLoss(),
     "optimizer": "SGD",
-    "criterion_params": {},
+
     "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
     "train_metrics_list": [Accuracy()],
     "valid_metrics_list": [Accuracy()],
@@ -327,7 +327,6 @@ training_hyperparams:
     initial_lr: 0.1
     loss: CrossEntropyLoss
     optimizer: SGD
-    criterion_params: {}
     optimizer_params:
       weight_decay: 1e-4
       momentum: 0.9
@@ -366,7 +365,7 @@ train_params = {
     "initial_lr": 0.1,
     "loss": torch.nn.CrossEntropyLoss(),
     "optimizer": "SGD",
-    "criterion_params": {},
+
     "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
     "train_metrics_list": [Accuracy()],
     "valid_metrics_list": [Accuracy()],
@@ -398,7 +397,6 @@ training_hyperparams:
     initial_lr: 0.1
     loss: CrossEntropyLoss
     optimizer: SGD
-    criterion_params: {}
     optimizer_params:
       weight_decay: 1e-4
       momentum: 0.9

diff --git a/documentation/source/Losses.md b/documentation/source/Losses.md
@@ -32,7 +32,6 @@ model = ...
 train_params = {
    ...
    "loss": "LabelSmoothingCrossEntropyLoss",
-   "criterion_params": {}
    ...
 }
 trainer.train(model=model, training_params=train_params, train_loader=train_dataloader, valid_loader=valid_dataloader)
@@ -54,15 +53,12 @@ When doing so, in your `my_training_hyperparams.yaml` file:
 ```yaml
 ...
 
-loss: YoloXDetectionLoss
-
-criterion_params:
-   strides: [8, 16, 32]  # output strides of all yolo outputs
-   num_classes: 80
+loss: 
+  YoloXDetectionLoss:
+    strides: [8, 16, 32]  # output strides of all yolo outputs
+    num_classes: 80
 ```
 
-Note that two `training_params` parameters define the loss function:  `loss` which defines the type of the loss, and`criterion_params` dictionary which will be unpacked to the underlying `YoloXDetectionLoss` class constructor.
-
 ## Passing Instantiated nn.Module Objects as Loss Functions
 
 SuperGradients also supports passing instantiated nn.Module Objects as demonstrated below:
@@ -201,9 +197,11 @@ Then, in your `my_training_hyperparams.yaml`, use `"my_loss"` in the same way as
 ```yaml
 ...
 
-loss: my_loss
+loss:
+  my_loss:
+    my_loss_arg1: ...
+    my_loss_arg2: ...
 
-criterion_params:
   ...
 ```
 

diff --git a/documentation/source/PhaseCallbacks.md b/documentation/source/PhaseCallbacks.md
@@ -238,7 +238,7 @@ model = ...
 
 train_params = {
     "loss": "LabelSmoothingCrossEntropyLoss",
-    "criterion_params": {},
+
     "phase_callbacks": [SaveFirstBatchCallback()],
     ...
 }

diff --git a/documentation/source/configuration_files.md b/documentation/source/configuration_files.md
@@ -30,7 +30,6 @@ lr_warmup_epochs: 0
 initial_lr: 0.1
 loss: LabelSmoothingCrossEntropyLoss
 optimizer: SGD
-criterion_params: {}
 
 optimizer_params:
   weight_decay: 1e-4

diff --git a/src/super_gradients/common/deprecate.py b/src/super_gradients/common/deprecate.py
@@ -1,6 +1,6 @@
 import warnings
 from functools import wraps
-from typing import Optional
+from typing import Optional, Callable
 from pkg_resources import parse_version
 
 
@@ -76,3 +76,104 @@ def wrapper(*args, **kwargs):
         return wrapper
 
     return decorator
+
+
+def deprecated_training_param(deprecated_tparam_name: str, deprecated_since: str, removed_from: str, new_arg_assigner: Callable, message: str = ""):
+    """
+    Decorator for deprecating training hyperparameters.
+
+    Recommended tp be used as a decorator on top of super_gradients.training.params.TrainingParams's override method:
+
+        class TrainingParams(HpmStruct):
+        def __init__(self, **entries):
+            # WE initialize by the default training params, overridden by the provided params
+            default_training_params = deepcopy(DEFAULT_TRAINING_PARAMS)
+            super().__init__(**default_training_params)
+        self.set_schema(TRAINING_PARAM_SCHEMA)
+            if len(entries) > 0:
+                self.override(**entries)
+
+    @deprecated_training_param(
+        "criterion_params", "3.2.1", "3.3.0", new_arg_assigner=get_deprecated_nested_params_to_factory_format_assigner("loss", "criterion_params")
+    )
+    def override(self, **entries):
+        super().override(**entries)
+        self.validate()
+
+
+    :param deprecated_tparam_name: str, the name of the deprecated hyperparameter.
+    :param deprecated_since: str, SG version of deprecation.
+    :param removed_from: str, SG version of removal.
+    :param new_arg_assigner: Callable, a handler to assign the deprecated parameter value to the updated
+     hyperparameter entry.
+    :param message: str, message to append to the deprecation warning (default="")
+    :return:
+    """
+
+    def decorator(func):
+        def wrapper(*args, **training_params):
+            if deprecated_tparam_name in training_params:
+                import super_gradients
+
+                is_still_supported = parse_version(super_gradients.__version__) < parse_version(removed_from)
+                if is_still_supported:
+                    message_prefix = (
+                        f"Training hyperparameter `{deprecated_tparam_name} is deprecated since version `{deprecated_since}` "
+                        f"and will be removed in version `{removed_from}`.\n"
+                    )
+                    warnings.warn(message_prefix + message, DeprecationWarning)
+                    training_params = new_arg_assigner(**training_params)
+                else:
+                    message_prefix = (
+                        f"Training hyperparameter `{deprecated_tparam_name} was deprecate since version `{deprecated_since}` "
+                        f"and was removed in version `{removed_from}`.\n"
+                    )
+                    raise RuntimeError(message_prefix + message)
+
+            return func(*args, **training_params)
+
+        return wrapper
+
+    return decorator
+
+
+def get_deprecated_nested_params_to_factory_format_assigner(param_name: str, nested_params_name: str) -> Callable:
+    """
+    Returns an assigner to be used by deprecated_training_param decorator.
+
+    The assigner takes a deprecated parameter name, and its __init___ arguments that previously were passed
+     through nested_params_name entry in training_params and manipulates the training_params so they are in 'Factory' format.
+     For example:
+
+    class TrainingParams(HpmStruct):
+        def __init__(self, **entries):
+            # WE initialize by the default training params, overridden by the provided params
+            default_training_params = deepcopy(DEFAULT_TRAINING_PARAMS)
+            super().__init__(**default_training_params)
+        self.set_schema(TRAINING_PARAM_SCHEMA)
+            if len(entries) > 0:
+                self.override(**entries)
+
+    @deprecated_training_param(
+        "criterion_params", "3.2.1", "3.3.0", new_arg_assigner=get_deprecated_nested_params_to_factory_format_assigner("loss", "criterion_params")
+    )
+    def override(self, **entries):
+        super().override(**entries)
+        self.validate()
+
+
+    then under the hood, training_params.loss will be set to
+     {training_params.loss: training_params.criterion_params}
+
+    :param param_name: str, parameter name (for example, 'loss').
+    :param nested_params_name: str, nested_params_name (for example, 'criterion_params')
+    :return: Callable as described above.
+    """
+
+    def deprecated_nested_params_to_factory_format_assigner(**params):
+        nested_params = params.get(nested_params_name)
+        param_val = params.get(param_name)
+        params[param_name] = {param_val: nested_params}
+        return params
+
+    return deprecated_nested_params_to_factory_format_assigner
diff --git a/...adients/examples/cifar10_training_torch_objects/cifar10_training_torch_objects_example.py b/...adients/examples/cifar10_training_torch_objects/cifar10_training_torch_objects_example.py
@@ -55,7 +55,6 @@
     "phase_callbacks": phase_callbacks,
     "initial_lr": lr,
     "loss": loss_fn,
-    "criterion_params": {},
     "optimizer": optimizer,
     "train_metrics_list": [Accuracy(), Top5()],
     "valid_metrics_list": [Accuracy(), Top5()],

diff --git a/src/super_gradients/examples/deci_lab_export_example/deci_lab_export_example.py b/src/super_gradients/examples/deci_lab_export_example/deci_lab_export_example.py
@@ -61,7 +61,6 @@ def main(architecture_name: str):
         "initial_lr": 0.1,
         "loss": "CrossEntropyLoss",
         "optimizer": "SGD",
-        "criterion_params": {},
         "train_metrics_list": [Accuracy(), Top5()],
         "valid_metrics_list": [Accuracy(), Top5()],
         "metric_to_watch": "Accuracy",

diff --git a/src/super_gradients/examples/early_stop/early_stop_example.py b/src/super_gradients/examples/early_stop/early_stop_example.py
@@ -23,7 +23,6 @@
     "initial_lr": 0.1,
     "loss": "CrossEntropyLoss",
     "optimizer": "SGD",
-    "criterion_params": {},
     "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
     "train_metrics_list": [Accuracy(), Top5()],
     "valid_metrics_list": [Accuracy(), Top5()],

diff --git a/src/super_gradients/recipes/cityscapes_regseg48.yaml b/src/super_gradients/recipes/cityscapes_regseg48.yaml
@@ -62,9 +62,9 @@ training_hyperparams:
 
   ema: True
 
-  loss: LabelSmoothingCrossEntropyLoss
-  criterion_params:
-    ignore_index: ${cityscapes_ignored_label}
+  loss:
+    LabelSmoothingCrossEntropyLoss:
+      ignore_index: ${cityscapes_ignored_label}
 
   train_metrics_list:
     - PixelAccuracy:

diff --git a/src/super_gradients/recipes/cityscapes_segformer.yaml b/src/super_gradients/recipes/cityscapes_segformer.yaml
@@ -95,9 +95,9 @@ training_hyperparams:
 
   sync_bn: True
 
-  loss: LabelSmoothingCrossEntropyLoss
-  criterion_params:
-    ignore_index: ${cityscapes_ignored_label}
+  loss:
+    LabelSmoothingCrossEntropyLoss:
+      ignore_index: ${cityscapes_ignored_label}
 
   phase_callbacks:
     - SlidingWindowValidationCallback:

diff --git a/src/super_gradients/recipes/coco2017_ssd_lite_mobilenet_v2.yaml b/src/super_gradients/recipes/coco2017_ssd_lite_mobilenet_v2.yaml
@@ -50,9 +50,11 @@ arch_params:
 resume: False
 training_hyperparams:
   resume: ${resume}
-  criterion_params:
-    alpha: 1.0
-    dboxes: ${dboxes}
+  loss:
+    SSDLoss:
+      alpha: 1.0
+      dboxes: ${dboxes} # OVERRIDEN IN MAIN RECIPE YAML FILE ONCE DBOXES ARE CHOSEN.
+
 
 multi_gpu: DDP
 num_gpus: 4
diff --git a/src/super_gradients/recipes/imagenet_resnet50_kd.yaml b/src/super_gradients/recipes/imagenet_resnet50_kd.yaml
@@ -25,11 +25,10 @@ val_dataloader: imagenet_val
 resume: False
 training_hyperparams:
   resume: ${resume}
-  loss: KDLogitsLoss
-  criterion_params:
-    distillation_loss_coeff: 0.8
-    task_loss_fn:
-      _target_: super_gradients.training.losses.label_smoothing_cross_entropy_loss.LabelSmoothingCrossEntropyLoss
+  loss:
+    KDLogitsLoss:
+      distillation_loss_coeff: 0.8
+      task_loss_fn: CrossEntropyLoss
 
 arch_params:
   teacher_input_adapter:

diff --git a/src/super_gradients/recipes/roboflow_ppyoloe.yaml b/src/super_gradients/recipes/roboflow_ppyoloe.yaml
@@ -40,8 +40,6 @@ training_hyperparams:
   resume: ${resume}
   max_epochs: 100
   mixed_precision: True
-  criterion_params:
-    num_classes: ${num_classes}
   phase_callbacks:
     - RoboflowResultCallback:
         dataset_name: ${dataset_name}

diff --git a/src/super_gradients/recipes/roboflow_yolo_nas_m.yaml b/src/super_gradients/recipes/roboflow_yolo_nas_m.yaml
@@ -60,9 +60,6 @@ training_hyperparams:
 
   max_epochs: 100
   mixed_precision: True
-  criterion_params:
-    num_classes: ${num_classes}
-
 
   phase_callbacks: []
   loss:

diff --git a/src/super_gradients/recipes/roboflow_yolo_nas_s.yaml b/src/super_gradients/recipes/roboflow_yolo_nas_s.yaml
@@ -60,8 +60,6 @@ training_hyperparams:
 
   max_epochs: 100
   mixed_precision: True
-  criterion_params:
-    num_classes: ${num_classes}
 
 
   phase_callbacks: []

diff --git a/src/super_gradients/recipes/roboflow_yolox.yaml b/src/super_gradients/recipes/roboflow_yolox.yaml
@@ -39,8 +39,11 @@ resume: False
 training_hyperparams:
   max_epochs: 100
   resume: ${resume}
-  criterion_params:
-    num_classes: ${num_classes}
+  loss:
+    YoloXDetectionLoss:
+      strides: [ 8, 16, 32 ]  # output strides of all yolo outputs
+      num_classes: ${num_classes}
+
   train_metrics_list:
     - DetectionMetrics:
         normalize_targets: True

diff --git a/src/super_gradients/recipes/training_hyperparams/cifar10_resnet_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/cifar10_resnet_train_params.yaml
@@ -15,7 +15,6 @@ lr_warmup_epochs: 0
 initial_lr: 0.1
 loss: LabelSmoothingCrossEntropyLoss
 optimizer: SGD
-criterion_params: {}
 
 optimizer_params:
   weight_decay: 1e-4

diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_dekr_pose_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_dekr_pose_train_params.yaml
@@ -12,12 +12,11 @@ lr_mode: CosineLRScheduler
 cosine_final_lr_ratio: 0.1
 batch_accumulate: 1
 initial_lr: 1e-3
-loss: DEKRLoss
-
-criterion_params:
-  heatmap_loss: qfl
-  heatmap_loss_factor: 1.0
-  offset_loss_factor: 0.1
+loss:
+  DEKRLoss:
+    heatmap_loss: qfl
+    heatmap_loss_factor: 1.0
+    offset_loss_factor: 0.1
 
 mixed_precision: True
 

diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_rescoring_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_rescoring_train_params.yaml
@@ -13,8 +13,6 @@ cosine_final_lr_ratio: 0.1
 batch_accumulate: 1
 initial_lr: 0.001
 loss: RescoringLoss
-criterion_params: {}
-
 mixed_precision: False
 
 optimizer: AdamW

diff --git a/...r_gradients/recipes/training_hyperparams/coco2017_ssd_lite_mobilenet_v2_train_params.yaml b/...r_gradients/recipes/training_hyperparams/coco2017_ssd_lite_mobilenet_v2_train_params.yaml
@@ -7,11 +7,10 @@ lr_mode: CosineLRScheduler
 cosine_final_lr_ratio: 0.01
 batch_accumulate: 1
 initial_lr: 0.01
-loss: SSDLoss
-
-criterion_params:
-  alpha: 1.0
-  dboxes: # OVERRIDEN IN MAIN RECIPE YAML FILE ONCE DBOXES ARE CHOSEN.
+loss:
+  SSDLoss:
+    alpha: 1.0
+    dboxes: # OVERRIDEN IN MAIN RECIPE YAML FILE ONCE DBOXES ARE CHOSEN.
 
 optimizer: SGD
 optimizer_params: