Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/sg 1172 criterion params removal #1519

Merged
merged 11 commits into from
Oct 12, 2023
2 changes: 1 addition & 1 deletion documentation/source/Checkpoints.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ model = models.get(model_name=Models.RESNET18, num_classes=10)
train_params = {
...
"loss": "LabelSmoothingCrossEntropyLoss",
"criterion_params": {},

"save_ckpt_epoch_list": [10,15]
...
}
Expand Down
1 change: 0 additions & 1 deletion documentation/source/Example_Classification.md
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,6 @@ Output (Training parameters):
'ckpt_name': 'ckpt_latest.pth',
'clip_grad_norm': None,
'cosine_final_lr_ratio': 0.01,
'criterion_params': {},
'dataset_statistics': False,
'ema': False,
'ema_params': {'decay': 0.9999, 'decay_type': 'exp', 'beta': 15},
Expand Down
6 changes: 2 additions & 4 deletions documentation/source/LRScheduling.md
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ train_params = {
"initial_lr": 0.1,
"loss": torch.nn.CrossEntropyLoss(),
"optimizer": "SGD",
"criterion_params": {},

"optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
"train_metrics_list": [Accuracy()],
"valid_metrics_list": [Accuracy()],
Expand Down Expand Up @@ -327,7 +327,6 @@ training_hyperparams:
initial_lr: 0.1
loss: CrossEntropyLoss
optimizer: SGD
criterion_params: {}
optimizer_params:
weight_decay: 1e-4
momentum: 0.9
Expand Down Expand Up @@ -366,7 +365,7 @@ train_params = {
"initial_lr": 0.1,
"loss": torch.nn.CrossEntropyLoss(),
"optimizer": "SGD",
"criterion_params": {},

"optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
"train_metrics_list": [Accuracy()],
"valid_metrics_list": [Accuracy()],
Expand Down Expand Up @@ -398,7 +397,6 @@ training_hyperparams:
initial_lr: 0.1
loss: CrossEntropyLoss
optimizer: SGD
criterion_params: {}
optimizer_params:
weight_decay: 1e-4
momentum: 0.9
Expand Down
18 changes: 8 additions & 10 deletions documentation/source/Losses.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ model = ...
train_params = {
...
"loss": "LabelSmoothingCrossEntropyLoss",
"criterion_params": {}
...
}
trainer.train(model=model, training_params=train_params, train_loader=train_dataloader, valid_loader=valid_dataloader)
Expand All @@ -54,15 +53,12 @@ When doing so, in your `my_training_hyperparams.yaml` file:
```yaml
...

loss: YoloXDetectionLoss

criterion_params:
strides: [8, 16, 32] # output strides of all yolo outputs
num_classes: 80
loss:
YoloXDetectionLoss:
strides: [8, 16, 32] # output strides of all yolo outputs
num_classes: 80
```

Note that two `training_params` parameters define the loss function: `loss` which defines the type of the loss, and`criterion_params` dictionary which will be unpacked to the underlying `YoloXDetectionLoss` class constructor.

## Passing Instantiated nn.Module Objects as Loss Functions

SuperGradients also supports passing instantiated nn.Module Objects as demonstrated below:
Expand Down Expand Up @@ -201,9 +197,11 @@ Then, in your `my_training_hyperparams.yaml`, use `"my_loss"` in the same way as
```yaml
...

loss: my_loss
loss:
my_loss:
my_loss_arg1: ...
my_loss_arg2: ...

criterion_params:
...
```

Expand Down
2 changes: 1 addition & 1 deletion documentation/source/PhaseCallbacks.md
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ model = ...

train_params = {
"loss": "LabelSmoothingCrossEntropyLoss",
"criterion_params": {},

"phase_callbacks": [SaveFirstBatchCallback()],
...
}
Expand Down
1 change: 0 additions & 1 deletion documentation/source/configuration_files.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ lr_warmup_epochs: 0
initial_lr: 0.1
loss: LabelSmoothingCrossEntropyLoss
optimizer: SGD
criterion_params: {}

optimizer_params:
weight_decay: 1e-4
Expand Down
40 changes: 39 additions & 1 deletion src/super_gradients/common/deprecate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import warnings
from functools import wraps
from typing import Optional
from typing import Optional, Callable
from pkg_resources import parse_version


Expand Down Expand Up @@ -76,3 +76,41 @@ def wrapper(*args, **kwargs):
return wrapper

return decorator


def deprecated_training_param(deprecated_tparam_name: str, deprecated_since: str, removed_from: str, new_arg_assigner: Callable, message: str = ""):
def decorator(func):
def wrapper(*args, **training_params):
if deprecated_tparam_name in training_params:
import super_gradients

is_still_supported = parse_version(super_gradients.__version__) < parse_version(removed_from)
if is_still_supported:
message_prefix = (
f"Training hyperparameter `{deprecated_tparam_name} is deprecated since version `{deprecated_since}` "
f"and will be removed in version `{removed_from}`.\n"
)
warnings.warn(message_prefix + message, DeprecationWarning)
training_params = new_arg_assigner(**training_params)
else:
message_prefix = (
f"Training hyperparameter `{deprecated_tparam_name} was deprecate since version `{deprecated_since}` "
f"and was removed in version `{removed_from}`.\n"
)
raise RuntimeError(message_prefix + message)

return func(*args, **training_params)

return wrapper

return decorator


def get_deprecated_nested_params_to_factory_format_assigner(param_name: str, nested_params_name: str) -> Callable:
def deprecated_nested_params_to_factory_format_assigner(**params):
nested_params = params.get(nested_params_name)
param_val = params.get(param_name)
params[param_name] = {param_val: nested_params}
return params

return deprecated_nested_params_to_factory_format_assigner
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
"phase_callbacks": phase_callbacks,
"initial_lr": lr,
"loss": loss_fn,
"criterion_params": {},
"optimizer": optimizer,
"train_metrics_list": [Accuracy(), Top5()],
"valid_metrics_list": [Accuracy(), Top5()],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ def main(architecture_name: str):
"initial_lr": 0.1,
"loss": "CrossEntropyLoss",
"optimizer": "SGD",
"criterion_params": {},
"train_metrics_list": [Accuracy(), Top5()],
"valid_metrics_list": [Accuracy(), Top5()],
"metric_to_watch": "Accuracy",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
"initial_lr": 0.1,
"loss": "CrossEntropyLoss",
"optimizer": "SGD",
"criterion_params": {},
"optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
"train_metrics_list": [Accuracy(), Top5()],
"valid_metrics_list": [Accuracy(), Top5()],
Expand Down
6 changes: 3 additions & 3 deletions src/super_gradients/recipes/cityscapes_regseg48.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ training_hyperparams:

ema: True

loss: LabelSmoothingCrossEntropyLoss
criterion_params:
ignore_index: ${cityscapes_ignored_label}
loss:
LabelSmoothingCrossEntropyLoss:
ignore_index: ${cityscapes_ignored_label}

train_metrics_list:
- PixelAccuracy:
Expand Down
6 changes: 3 additions & 3 deletions src/super_gradients/recipes/cityscapes_segformer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ training_hyperparams:

sync_bn: True

loss: LabelSmoothingCrossEntropyLoss
criterion_params:
ignore_index: ${cityscapes_ignored_label}
loss:
LabelSmoothingCrossEntropyLoss:
ignore_index: ${cityscapes_ignored_label}

phase_callbacks:
- SlidingWindowValidationCallback:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,11 @@ arch_params:
resume: False
training_hyperparams:
resume: ${resume}
criterion_params:
alpha: 1.0
dboxes: ${dboxes}
loss:
SSDLoss:
alpha: 1.0
dboxes: ${dboxes} # OVERRIDEN IN MAIN RECIPE YAML FILE ONCE DBOXES ARE CHOSEN.


multi_gpu: DDP
num_gpus: 4
10 changes: 5 additions & 5 deletions src/super_gradients/recipes/imagenet_resnet50_kd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ val_dataloader: imagenet_val
resume: False
training_hyperparams:
resume: ${resume}
loss: KDLogitsLoss
criterion_params:
distillation_loss_coeff: 0.8
task_loss_fn:
_target_: super_gradients.training.losses.label_smoothing_cross_entropy_loss.LabelSmoothingCrossEntropyLoss
loss:
KDLogitsLoss:
distillation_loss_coeff: 0.8
task_loss_fn:
_target_: super_gradients.training.losses.label_smoothing_cross_entropy_loss.LabelSmoothingCrossEntropyLoss
shaydeci marked this conversation as resolved.
Show resolved Hide resolved

arch_params:
teacher_input_adapter:
Expand Down
2 changes: 0 additions & 2 deletions src/super_gradients/recipes/roboflow_ppyoloe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ training_hyperparams:
resume: ${resume}
max_epochs: 100
mixed_precision: True
criterion_params:
num_classes: ${num_classes}
phase_callbacks:
- RoboflowResultCallback:
dataset_name: ${dataset_name}
Expand Down
3 changes: 0 additions & 3 deletions src/super_gradients/recipes/roboflow_yolo_nas_m.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@ training_hyperparams:

max_epochs: 100
mixed_precision: True
criterion_params:
num_classes: ${num_classes}


phase_callbacks: []
loss:
Expand Down
2 changes: 0 additions & 2 deletions src/super_gradients/recipes/roboflow_yolo_nas_s.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,6 @@ training_hyperparams:

max_epochs: 100
mixed_precision: True
criterion_params:
num_classes: ${num_classes}


phase_callbacks: []
Expand Down
7 changes: 5 additions & 2 deletions src/super_gradients/recipes/roboflow_yolox.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,11 @@ resume: False
training_hyperparams:
max_epochs: 100
resume: ${resume}
criterion_params:
num_classes: ${num_classes}
loss:
YoloXDetectionLoss:
strides: [ 8, 16, 32 ] # output strides of all yolo outputs
num_classes: ${num_classes}

train_metrics_list:
- DetectionMetrics:
normalize_targets: True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ lr_warmup_epochs: 0
initial_lr: 0.1
loss: LabelSmoothingCrossEntropyLoss
optimizer: SGD
criterion_params: {}

optimizer_params:
weight_decay: 1e-4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,11 @@ lr_mode: CosineLRScheduler
cosine_final_lr_ratio: 0.1
batch_accumulate: 1
initial_lr: 1e-3
loss: DEKRLoss

criterion_params:
heatmap_loss: qfl
heatmap_loss_factor: 1.0
offset_loss_factor: 0.1
loss:
DEKRLoss:
heatmap_loss: qfl
heatmap_loss_factor: 1.0
offset_loss_factor: 0.1

mixed_precision: True

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ cosine_final_lr_ratio: 0.1
batch_accumulate: 1
initial_lr: 0.001
loss: RescoringLoss
criterion_params: {}

mixed_precision: False

optimizer: AdamW
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@ lr_mode: CosineLRScheduler
cosine_final_lr_ratio: 0.01
batch_accumulate: 1
initial_lr: 0.01
loss: SSDLoss

criterion_params:
alpha: 1.0
dboxes: # OVERRIDEN IN MAIN RECIPE YAML FILE ONCE DBOXES ARE CHOSEN.
loss:
SSDLoss:
alpha: 1.0
dboxes: # OVERRIDEN IN MAIN RECIPE YAML FILE ONCE DBOXES ARE CHOSEN.

optimizer: SGD
optimizer_params:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,10 @@ batch_accumulate: 1

save_ckpt_epoch_list: [285]

loss: YoloXDetectionLoss

criterion_params:
strides: [8, 16, 32] # output strides of all yolo outputs
num_classes: 80
loss:
YoloXDetectionLoss:
strides: [8, 16, 32] # output strides of all yolo outputs
num_classes: 80



Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ zero_weight_decay_on_bias_and_bn: False # whether to apply weight decay on batch


loss: # Loss function for training (str as one of SuperGradient's built in options, or torch.nn.module)
criterion_params: {} # when `loss` is one of SuperGradient's built in options, it will be initialized with criterion_params.
criterion_params: {} # (DEPRECATED) when `loss` is one of SuperGradient's built in options, it will be initialized with criterion_params.


ema: False # whether to use Model Exponential Moving Average
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ ema_params:
decay: 0.9999
decay_type: constant

loss: LabelSmoothingCrossEntropyLoss
criterion_params:
smooth_eps: 0.1
loss:
LabelSmoothingCrossEntropyLoss:
smooth_eps: 0.1


metric_to_watch: Accuracy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ optimizer_params:
weight_decay: 0.00004

lr_warmup_epochs: 5
loss: LabelSmoothingCrossEntropyLoss
criterion_params:
smooth_eps: 0.1
loss:
LabelSmoothingCrossEntropyLoss:
smooth_eps: 0.1

zero_weight_decay_on_bias_and_bn: True
ema: True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ ema_params:
decay_type: constant
decay: 0.9999

loss: LabelSmoothingCrossEntropyLoss
criterion_params:
smooth_eps: 0.1
loss:
LabelSmoothingCrossEntropyLoss:
smooth_eps: 0.1


metric_to_watch: Accuracy
Expand Down
Loading