diff --git a/.circleci/config.yml b/.circleci/config.yml index de0afea275..1d27ab4fd2 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -547,7 +547,7 @@ jobs: python3.8 src/super_gradients/train_from_recipe.py --config-name=coco2017_pose_dekr_w32_no_dc experiment_name=shortened_coco2017_pose_dekr_w32_ap_test batch_size=4 val_batch_size=8 epochs=1 training_hyperparams.lr_warmup_steps=0 training_hyperparams.average_best_models=False training_hyperparams.max_train_batches=1000 training_hyperparams.max_valid_batches=100 multi_gpu=DDP num_gpus=4 python3.8 src/super_gradients/train_from_recipe.py --config-name=cifar10_resnet experiment_name=shortened_cifar10_resnet_accuracy_test epochs=100 training_hyperparams.average_best_models=False multi_gpu=DDP num_gpus=4 python3.8 src/super_gradients/examples/convert_recipe_example/convert_recipe_example.py --config-name=cifar10_conversion_params experiment_name=shortened_cifar10_resnet_accuracy_test - python3.8 src/super_gradients/train_from_recipe.py --config-name=coco2017_yolox experiment_name=shortened_coco2017_yolox_n_map_test architecture=yolox_n training_hyperparams.loss=yolox_fast_loss epochs=10 training_hyperparams.average_best_models=False multi_gpu=DDP num_gpus=4 + python3.8 src/super_gradients/train_from_recipe.py --config-name=coco2017_yolox experiment_name=shortened_coco2017_yolox_n_map_test architecture=yolox_n training_hyperparams.loss=YoloXFastDetectionLoss epochs=10 training_hyperparams.average_best_models=False multi_gpu=DDP num_gpus=4 python3.8 src/super_gradients/train_from_recipe.py --config-name=cityscapes_regseg48 experiment_name=shortened_cityscapes_regseg48_iou_test epochs=10 training_hyperparams.average_best_models=False multi_gpu=DDP num_gpus=4 coverage run --source=super_gradients -m unittest tests/deci_core_recipe_test_suite_runner.py diff --git a/Makefile b/Makefile index 255938e51d..6cf22249de 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ yolo_nas_integration_tests: recipe_accuracy_tests: python src/super_gradients/train_from_recipe.py --config-name=coco2017_pose_dekr_w32_no_dc experiment_name=shortened_coco2017_pose_dekr_w32_ap_test epochs=1 batch_size=4 val_batch_size=8 training_hyperparams.lr_warmup_steps=0 training_hyperparams.average_best_models=False training_hyperparams.max_train_batches=1000 training_hyperparams.max_valid_batches=100 multi_gpu=DDP num_gpus=4 python src/super_gradients/train_from_recipe.py --config-name=cifar10_resnet experiment_name=shortened_cifar10_resnet_accuracy_test epochs=100 training_hyperparams.average_best_models=False multi_gpu=DDP num_gpus=4 - python src/super_gradients/train_from_recipe.py --config-name=coco2017_yolox experiment_name=shortened_coco2017_yolox_n_map_test epochs=10 architecture=yolox_n training_hyperparams.loss=yolox_fast_loss training_hyperparams.average_best_models=False multi_gpu=DDP num_gpus=4 + python src/super_gradients/train_from_recipe.py --config-name=coco2017_yolox experiment_name=shortened_coco2017_yolox_n_map_test epochs=10 architecture=yolox_n training_hyperparams.loss=YoloXFastDetectionLoss training_hyperparams.average_best_models=False multi_gpu=DDP num_gpus=4 python src/super_gradients/train_from_recipe.py --config-name=cityscapes_regseg48 experiment_name=shortened_cityscapes_regseg48_iou_test epochs=10 training_hyperparams.average_best_models=False multi_gpu=DDP num_gpus=4 python src/super_gradients/examples/convert_recipe_example/convert_recipe_example.py --config-name=cifar10_conversion_params experiment_name=shortened_cifar10_resnet_accuracy_test coverage run --source=super_gradients -m unittest tests/deci_core_recipe_test_suite_runner.py diff --git a/documentation/source/Checkpoints.md b/documentation/source/Checkpoints.md index 295ab687ca..48847ca871 100644 --- a/documentation/source/Checkpoints.md +++ b/documentation/source/Checkpoints.md @@ -79,7 +79,7 @@ model = models.get(model_name=Models.RESNET18, num_classes=10) train_params = { ... - "loss": "cross_entropy", + "loss": "LabelSmoothingCrossEntropyLoss", "criterion_params": {}, "save_ckpt_epoch_list": [10,15] ... diff --git a/documentation/source/Example_Classification.md b/documentation/source/Example_Classification.md index b332177134..f6d9d6606b 100644 --- a/documentation/source/Example_Classification.md +++ b/documentation/source/Example_Classification.md @@ -318,10 +318,10 @@ Output (Training parameters): 'launch_tensorboard': False, 'load_opt_params': True, 'log_installed_packages': True, - 'loss': 'cross_entropy', + 'loss': "LabelSmoothingCrossEntropyLoss", 'lr_cooldown_epochs': 0, 'lr_decay_factor': 0.1, - 'lr_mode': 'step', + 'lr_mode': 'StepLRScheduler', 'lr_schedule_function': None, 'lr_updates': array([100, 150, 200]), 'lr_warmup_epochs': 0, @@ -355,7 +355,7 @@ Output (Training parameters): 'train_metrics_list': ['Accuracy', 'Top5'], 'valid_metrics_list': ['Accuracy', 'Top5'], 'warmup_initial_lr': None, - 'warmup_mode': 'linear_epoch_step', + 'warmup_mode': 'LinearEpochLRWarmup', 'zero_weight_decay_on_bias_and_bn': False } ``` diff --git a/documentation/source/Example_Training-an-external-model.md b/documentation/source/Example_Training-an-external-model.md index 4978ef0d88..9b5379dba7 100644 --- a/documentation/source/Example_Training-an-external-model.md +++ b/documentation/source/Example_Training-an-external-model.md @@ -640,7 +640,7 @@ And lastly, we need to define the training hyperparameters: ```python train_params = { "max_epochs": 100, - "lr_mode": "cosine", + "lr_mode": "CosineLRScheduler", "initial_lr": 0.001, "optimizer": "Adam", "loss": CustomSegLoss(), diff --git a/documentation/source/LRScheduling.md b/documentation/source/LRScheduling.md index 4912ffccb0..04cfa238bf 100644 --- a/documentation/source/LRScheduling.md +++ b/documentation/source/LRScheduling.md @@ -7,15 +7,15 @@ Learning rate scheduling type is controlled by the training parameter `lr_mode`. When str: - Learning rate scheduling policy, one of ['step','poly','cosine','function']. + Learning rate scheduling policy, one of ['StepLRScheduler','PolyLRScheduler','CosineLRScheduler','FunctionLRScheduler']. - 'step' refers to constant updates at epoch numbers passed through `lr_updates`. Each update decays the learning rate by `lr_decay_factor`. + 'StepLRScheduler' refers to constant updates at epoch numbers passed through `lr_updates`. Each update decays the learning rate by `lr_decay_factor`. - 'cosine' refers to the Cosine Anealing policy as mentioned in https://arxiv.org/abs/1608.03983. The final learning rate ratio is controlled by `cosine_final_lr_ratio` training parameter. + 'CosineLRScheduler' refers to the Cosine Anealing policy as mentioned in https://arxiv.org/abs/1608.03983. The final learning rate ratio is controlled by `cosine_final_lr_ratio` training parameter. - 'poly' refers to the polynomial decrease: in each epoch iteration `self.lr = self.initial_lr * pow((1.0 - (current_iter / max_iter)), 0.9)` + 'PolyLRScheduler' refers to the polynomial decrease: in each epoch iteration `self.lr = self.initial_lr * pow((1.0 - (current_iter / max_iter)), 0.9)` - 'function' refers to a user-defined learning rate scheduling function, that is passed through `lr_schedule_function`. + 'FunctionLRScheduler' refers to a user-defined learning rate scheduling function, that is passed through `lr_schedule_function`. For example, the training code below will start with an initial learning rate of 0.1 and decay by 0.1 at epochs 100,150 and 200: @@ -30,7 +30,7 @@ valid_dataloader = ... model = ... train_params = { "initial_lr": 0.1, - "lr_mode":"step", + "lr_mode":"StepLRScheduler", "lr_updates": [100, 150, 200], "lr_decay_factor": 0.1, ..., @@ -45,7 +45,7 @@ trainer.train(model=model, training_params=train_params, train_loader=train_data ```yaml training_hyperparams: initial_lr: 0.1 - lr_mode: step + lr_mode: StepLRScheduler user_lr_updates: - 100 - 150 @@ -66,7 +66,7 @@ Prerequisites: [phase callbacks](PhaseCallbacks.md), [training with configuratio In SG, learning rate schedulers are implemented as [phase callbacks](PhaseCallbacks.md). They read the learning rate from the `PhaseContext` in their `__call__` method, calculate the new learning rate according to the current state of training, and update the optimizer's param groups. -For example, the code snippet from the previous section translates "lr_mode":"step" to a `super_gradients.training.utils.callbacks.callbacks.StepLRCallback` instance, which is added to the phase callbacks list. +For example, the code snippet from the previous section translates "lr_mode":"StepLRScheduler" to a `super_gradients.training.utils.callbacks.callbacks.StepLRScheduler` instance, which is added to the phase callbacks list. ### Implementing Your Own Scheduler A custom learning rate scheduler should inherit from `LRCallbackBase`, so let's take a look at it: diff --git a/documentation/source/Losses.md b/documentation/source/Losses.md index 3def77c7e5..e7fd837566 100644 --- a/documentation/source/Losses.md +++ b/documentation/source/Losses.md @@ -2,18 +2,18 @@ SuperGradients can support any PyTorch-based loss function. Additionally, multiple Loss function implementations for various tasks are also supported: - cross_entropy - mse - r_squared_loss - shelfnet_ohem_loss - shelfnet_se_loss - yolox_loss - yolox_fast_loss - ssd_loss - stdc_loss - bce_dice_loss - kd_loss - dice_ce_edge_loss + LabelSmoothingCrossEntropyLoss + MSE + RSquaredLoss + ShelfNetOHEMLoss + ShelfNetSemanticEncodingLoss + YoloXDetectionLoss + YoloXFastDetectionLoss + SSDLoss + STDCLoss + BCEDiceLoss + KDLogitsLoss + DiceCEEdgeLoss All the above, are just string aliases for the underlying torch.nn.Module classes, implementing the specified loss functions. @@ -31,7 +31,7 @@ model = ... train_params = { ... - "loss": "cross_entropy", + "loss": "LabelSmoothingCrossEntropyLoss", "criterion_params": {} ... } @@ -42,7 +42,7 @@ Since most IDEs support auto-completion, for your convenience, you can use our o ```python from super_gradients.common.object_names import Losses ``` -Then simply instead of "cross_entropy", use +Then simply instead of "LabelSmoothingCrossEntropyLoss", use ```python Losses.CROSS_ENTROPY ``` @@ -54,14 +54,14 @@ When doing so, in your `my_training_hyperparams.yaml` file: ```yaml ... -loss: yolox_loss +loss: YoloXDetectionLoss criterion_params: strides: [8, 16, 32] # output strides of all yolo outputs num_classes: 80 ``` -Note that two `training_params` parameters define the loss function: `loss` which defines the type of the loss, and`criterion_params` dictionary which will be unpacked to the underlying `yolox_loss` class constructor. +Note that two `training_params` parameters define the loss function: `loss` which defines the type of the loss, and`criterion_params` dictionary which will be unpacked to the underlying `YoloXDetectionLoss` class constructor. ## Passing Instantiated nn.Module Objects as Loss Functions diff --git a/documentation/source/PhaseCallbacks.md b/documentation/source/PhaseCallbacks.md index cd8512ef2c..cc7480aa06 100644 --- a/documentation/source/PhaseCallbacks.md +++ b/documentation/source/PhaseCallbacks.md @@ -8,13 +8,13 @@ SG's `super_gradients.training.utils.callbacks` module implements some common us ModelConversionCheckCallback LRCallbackBase - EpochStepWarmupLRCallback - BatchStepLinearWarmupLRCallback - StepLRCallback - ExponentialLRCallback - PolyLRCallback - CosineLRCallback - FunctionLRCallback + LinearEpochLRWarmup + LinearBatchLRWarmup + StepLRScheduler + ExponentialLRScheduler + PolyLRScheduler + CosineLRScheduler + FunctionLRScheduler LRSchedulerCallback DetectionVisualizationCallback BinarySegmentationVisualizationCallback @@ -30,7 +30,7 @@ off augmentations and incorporate L1 loss starting from epoch 285: max_epochs: 300 ... -loss: yolox_loss +loss: YoloXDetectionLoss ... @@ -237,7 +237,7 @@ valid_dataloader = ... model = ... train_params = { - "loss": "cross_entropy", + "loss": "LabelSmoothingCrossEntropyLoss", "criterion_params": {}, "phase_callbacks": [SaveFirstBatchCallback()], ... diff --git a/documentation/source/QuickstartBasicToolkit.md b/documentation/source/QuickstartBasicToolkit.md index a13ec89cee..4968d72000 100644 --- a/documentation/source/QuickstartBasicToolkit.md +++ b/documentation/source/QuickstartBasicToolkit.md @@ -61,7 +61,7 @@ model = models.get(Models.RESNET18, num_classes=10) training_params = { "max_epochs": 20, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "LabelSmoothingCrossEntropyLoss", "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], "metric_to_watch": "Accuracy", diff --git a/documentation/source/Segmentation.md b/documentation/source/Segmentation.md index c627a97d98..0cee203c35 100644 --- a/documentation/source/Segmentation.md +++ b/documentation/source/Segmentation.md @@ -143,12 +143,12 @@ from super_gradients.training.metrics.segmentation_metrics import BinaryIOU train_params = { "max_epochs": 30, - "lr_mode": "cosine", + "lr_mode": "CosineLRScheduler", "initial_lr": 0.005, "lr_warmup_epochs": 5, "multiply_head_lr": 10, "optimizer": "SGD", - "loss": "bce_dice_loss", + "loss": "BCEDiceLoss", "ema": True, "zero_weight_decay_on_bias_and_bn": True, "average_best_models": True, diff --git a/documentation/source/configuration_files.md b/documentation/source/configuration_files.md index bf7a4b43ab..bb253bd8c3 100644 --- a/documentation/source/configuration_files.md +++ b/documentation/source/configuration_files.md @@ -28,7 +28,7 @@ lr_decay_factor: 0.1 lr_mode: step lr_warmup_epochs: 0 initial_lr: 0.1 -loss: cross_entropy +loss: LabelSmoothingCrossEntropyLoss optimizer: SGD criterion_params: {} diff --git a/src/super_gradients/common/object_names.py b/src/super_gradients/common/object_names.py index 26bd48f890..f21b44d999 100644 --- a/src/super_gradients/common/object_names.py +++ b/src/super_gradients/common/object_names.py @@ -1,21 +1,21 @@ class Losses: """Static class holding all the supported loss names""" - CROSS_ENTROPY = "cross_entropy" - MSE = "mse" - R_SQUARED_LOSS = "r_squared_loss" - SHELFNET_OHEM_LOSS = "shelfnet_ohem_loss" - SHELFNET_SE_LOSS = "shelfnet_se_loss" - YOLOX_LOSS = "yolox_loss" - PPYOLOE_LOSS = "ppyoloe_loss" - YOLOX_FAST_LOSS = "yolox_fast_loss" - SSD_LOSS = "ssd_loss" - STDC_LOSS = "stdc_loss" - BCE_DICE_LOSS = "bce_dice_loss" - KD_LOSS = "kd_loss" - DICE_CE_EDGE_LOSS = "dice_ce_edge_loss" - DEKR_LOSS = "dekr_loss" - RESCORING_LOSS = "rescoring_loss" + CROSS_ENTROPY = "CrossEntropyLoss" + MSE = "MSE" + R_SQUARED_LOSS = "RSquaredLoss" + SHELFNET_OHEM_LOSS = "ShelfNetOHEMLoss" + SHELFNET_SE_LOSS = "ShelfNetSemanticEncodingLoss" + YOLOX_LOSS = "YoloXDetectionLoss" + PPYOLOE_LOSS = "PPYoloELoss" + YOLOX_FAST_LOSS = "YoloXFastDetectionLoss" + SSD_LOSS = "SSDLoss" + STDC_LOSS = "STDCLoss" + BCE_DICE_LOSS = "BCEDiceLoss" + KD_LOSS = "KDLogitsLoss" + DICE_CE_EDGE_LOSS = "DiceCEEdgeLoss" + DEKR_LOSS = "DEKRLoss" + RESCORING_LOSS = "RescoringLoss" class Metrics: @@ -154,19 +154,19 @@ class Callbacks: class LRSchedulers: """Static class to hold all the supported LR Scheduler names""" - STEP = "step" - POLY = "poly" - COSINE = "cosine" - EXP = "exp" - FUNCTION = "function" + STEP = "StepLRScheduler" + POLY = "PolyLRScheduler" + COSINE = "CosineLRScheduler" + EXP = "ExponentialLRScheduler" + FUNCTION = "FunctionLRScheduler" class LRWarmups: """Static class to hold all the supported LR Warmup names""" - LINEAR_STEP = "linear_step" - LINEAR_EPOCH_STEP = "linear_epoch_step" - LINEAR_BATCH_STEP = "linear_batch_step" + LINEAR_STEP = "LinearEpochLRWarmup" + LINEAR_EPOCH_STEP = "LinearEpochLRWarmup" + LINEAR_BATCH_STEP = "LinearBatchLRWarmup" class Samplers: diff --git a/src/super_gradients/common/registry/registry.py b/src/super_gradients/common/registry/registry.py index f00f0a3193..e303f3766f 100644 --- a/src/super_gradients/common/registry/registry.py +++ b/src/super_gradients/common/registry/registry.py @@ -68,7 +68,8 @@ def warn_if_deprecated(name: str, registry: dict): """ deprecated_names = registry.get(_DEPRECATED_KEY, {}) if name in deprecated_names: - warnings.warn(f"Using `{name}` in the recipe has been deprecated. Please use `{deprecated_names[name]}`", DeprecationWarning) + warnings.simplefilter("once", DeprecationWarning) # Required, otherwise the warning may never be displayed. + warnings.warn(f"Object name `{name}` is now deprecated. Please replace it with `{deprecated_names[name]}`.", DeprecationWarning) ARCHITECTURES = {} @@ -83,9 +84,9 @@ def warn_if_deprecated(name: str, registry: dict): METRICS = {} register_metric = create_register_decorator(registry=METRICS) -LOSSES = {Losses.MSE: nn.MSELoss} +LOSSES = {} register_loss = create_register_decorator(registry=LOSSES) - +register_loss(name=Losses.MSE, deprecated_name="mse")(nn.MSELoss) # Register manually to benefit from deprecated logic ALL_DATALOADERS = {} register_dataloader = create_register_decorator(registry=ALL_DATALOADERS) diff --git a/src/super_gradients/examples/ddrnet_imagenet/ddrnet_classification_example.py b/src/super_gradients/examples/ddrnet_imagenet/ddrnet_classification_example.py index eba1463bdf..c760571169 100644 --- a/src/super_gradients/examples/ddrnet_imagenet/ddrnet_classification_example.py +++ b/src/super_gradients/examples/ddrnet_imagenet/ddrnet_classification_example.py @@ -39,13 +39,13 @@ train_params_ddr = { "max_epochs": args.max_epochs, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_updates": [30, 60, 90], "lr_decay_factor": 0.1, "initial_lr": 0.1 * devices, "optimizer": "SGD", "optimizer_params": {"weight_decay": 0.0001, "momentum": 0.9, "nesterov": True}, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], "metric_to_watch": "Accuracy", diff --git a/src/super_gradients/examples/deci_lab_export_example/deci_lab_export_example.py b/src/super_gradients/examples/deci_lab_export_example/deci_lab_export_example.py index 8080cf2ae8..9f49c0130f 100644 --- a/src/super_gradients/examples/deci_lab_export_example/deci_lab_export_example.py +++ b/src/super_gradients/examples/deci_lab_export_example/deci_lab_export_example.py @@ -56,10 +56,10 @@ def main(architecture_name: str): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "train_metrics_list": [Accuracy(), Top5()], diff --git a/src/super_gradients/examples/early_stop/early_stop_example.py b/src/super_gradients/examples/early_stop/early_stop_example.py index e2cbb782cf..5cf1124b06 100644 --- a/src/super_gradients/examples/early_stop/early_stop_example.py +++ b/src/super_gradients/examples/early_stop/early_stop_example.py @@ -12,16 +12,16 @@ super_gradients.init_trainer() early_stop_acc = EarlyStop(Phase.VALIDATION_EPOCH_END, monitor="Accuracy", mode="max", patience=3, verbose=True) -early_stop_val_loss = EarlyStop(Phase.VALIDATION_EPOCH_END, monitor="LabelSmoothingCrossEntropyLoss", mode="min", patience=3, verbose=True) +early_stop_val_loss = EarlyStop(Phase.VALIDATION_EPOCH_END, monitor="CrossEntropyLoss", mode="min", patience=3, verbose=True) train_params = { "max_epochs": 250, "lr_updates": [100, 150, 200], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, diff --git a/src/super_gradients/examples/loggers_examples/clearml_logger_example.py b/src/super_gradients/examples/loggers_examples/clearml_logger_example.py index 700bb6f584..7a96dea5d0 100644 --- a/src/super_gradients/examples/loggers_examples/clearml_logger_example.py +++ b/src/super_gradients/examples/loggers_examples/clearml_logger_example.py @@ -11,9 +11,9 @@ "max_epochs": 20, "lr_updates": [5, 10, 15], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy(), Top5()], diff --git a/src/super_gradients/examples/loggers_examples/deci_platform_logger_example.py b/src/super_gradients/examples/loggers_examples/deci_platform_logger_example.py index 988373e9ee..9a69d3ef86 100644 --- a/src/super_gradients/examples/loggers_examples/deci_platform_logger_example.py +++ b/src/super_gradients/examples/loggers_examples/deci_platform_logger_example.py @@ -14,9 +14,9 @@ "max_epochs": 20, "lr_updates": [5, 10, 15], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy(), Top5()], diff --git a/src/super_gradients/examples/quantization/resnet_qat_example.py b/src/super_gradients/examples/quantization/resnet_qat_example.py index d9512c74c5..c442b7303e 100644 --- a/src/super_gradients/examples/quantization/resnet_qat_example.py +++ b/src/super_gradients/examples/quantization/resnet_qat_example.py @@ -89,7 +89,7 @@ def sg_selective_qdq_resnet50(): "initial_lr": args.lr, "optimizer": "SGD", "optimizer_params": {"weight_decay": 0.0001, "momentum": 0.9, "nesterov": True}, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], "test_metrics_list": [Accuracy(), Top5()], diff --git a/src/super_gradients/examples/regseg_transfer_learning_example/regseg_transfer_learning_example.py b/src/super_gradients/examples/regseg_transfer_learning_example/regseg_transfer_learning_example.py index e8bd1b167e..4e363f460e 100644 --- a/src/super_gradients/examples/regseg_transfer_learning_example/regseg_transfer_learning_example.py +++ b/src/super_gradients/examples/regseg_transfer_learning_example/regseg_transfer_learning_example.py @@ -39,13 +39,13 @@ # DEFINE TRAINING PARAMS. SEE DOCS FOR THE FULL LIST. train_params = { "max_epochs": 50, - "lr_mode": "cosine", + "lr_mode": "CosineLRScheduler", "initial_lr": 0.0064, # for batch_size=16 "optimizer_params": {"momentum": 0.843, "weight_decay": 0.00036, "nesterov": True}, "cosine_final_lr_ratio": 0.1, "multiply_head_lr": 10, "optimizer": "SGD", - "loss": "bce_dice_loss", + "loss": "BCEDiceLoss", "ema": True, "zero_weight_decay_on_bias_and_bn": True, "average_best_models": True, diff --git a/src/super_gradients/examples/train_with_test_set/train_with_test_example.py b/src/super_gradients/examples/train_with_test_set/train_with_test_example.py index 6d8eb4c77b..8729101b8c 100644 --- a/src/super_gradients/examples/train_with_test_set/train_with_test_example.py +++ b/src/super_gradients/examples/train_with_test_set/train_with_test_example.py @@ -12,7 +12,7 @@ "lr_decay_factor": 0.1, "lr_mode": "step", "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "train_metrics_list": [Accuracy(), Top5()], diff --git a/src/super_gradients/recipes/cityscapes_al_ddrnet.yaml b/src/super_gradients/recipes/cityscapes_al_ddrnet.yaml index d5b69683a1..48dd55122d 100644 --- a/src/super_gradients/recipes/cityscapes_al_ddrnet.yaml +++ b/src/super_gradients/recipes/cityscapes_al_ddrnet.yaml @@ -61,7 +61,7 @@ training_hyperparams: max_epochs: 200 initial_lr: 0.0075 # batch size 24 loss: - dice_ce_edge_loss: + DiceCEEdgeLoss: num_classes: 19 ignore_index: 19 num_aux_heads: 1 diff --git a/src/super_gradients/recipes/cityscapes_ddrnet.yaml b/src/super_gradients/recipes/cityscapes_ddrnet.yaml index 9c590c4727..d6763593c8 100644 --- a/src/super_gradients/recipes/cityscapes_ddrnet.yaml +++ b/src/super_gradients/recipes/cityscapes_ddrnet.yaml @@ -57,7 +57,7 @@ training_hyperparams: max_epochs: 500 initial_lr: 0.0075 # batch size 24 loss: - dice_ce_edge_loss: + DiceCEEdgeLoss: num_classes: 19 ignore_index: 19 num_aux_heads: 1 diff --git a/src/super_gradients/recipes/cityscapes_kd_base.yaml b/src/super_gradients/recipes/cityscapes_kd_base.yaml index d6a99d88a9..40d0d6e2b4 100644 --- a/src/super_gradients/recipes/cityscapes_kd_base.yaml +++ b/src/super_gradients/recipes/cityscapes_kd_base.yaml @@ -55,7 +55,7 @@ training_hyperparams: weights: [ 1. ] kd_loss_weights: [1., 6.] - kd_loss: + KDLogitsLoss: _target_: super_gradients.training.losses.cwd_loss.ChannelWiseKnowledgeDistillationLoss temperature: 3. normalization_mode: channel_wise diff --git a/src/super_gradients/recipes/cityscapes_pplite_seg50.yaml b/src/super_gradients/recipes/cityscapes_pplite_seg50.yaml index 0843464532..ad1b90d03a 100644 --- a/src/super_gradients/recipes/cityscapes_pplite_seg50.yaml +++ b/src/super_gradients/recipes/cityscapes_pplite_seg50.yaml @@ -68,7 +68,7 @@ checkpoint_params: training_hyperparams: sync_bn: True loss: - dice_ce_edge_loss: + DiceCEEdgeLoss: num_classes: 19 ignore_index: 19 num_aux_heads: 3 diff --git a/src/super_gradients/recipes/cityscapes_pplite_seg75.yaml b/src/super_gradients/recipes/cityscapes_pplite_seg75.yaml index b20030cff8..cbc19e4660 100644 --- a/src/super_gradients/recipes/cityscapes_pplite_seg75.yaml +++ b/src/super_gradients/recipes/cityscapes_pplite_seg75.yaml @@ -63,7 +63,7 @@ checkpoint_params: training_hyperparams: sync_bn: True loss: - dice_ce_edge_loss: + DiceCEEdgeLoss: num_classes: 19 ignore_index: 19 num_aux_heads: 3 diff --git a/src/super_gradients/recipes/cityscapes_regseg48.yaml b/src/super_gradients/recipes/cityscapes_regseg48.yaml index 4e426100af..27c4cbd5ac 100644 --- a/src/super_gradients/recipes/cityscapes_regseg48.yaml +++ b/src/super_gradients/recipes/cityscapes_regseg48.yaml @@ -52,7 +52,7 @@ training_hyperparams: sync_bn: True resume: ${resume} max_epochs: 800 - lr_mode: poly + lr_mode: PolyLRScheduler initial_lr: 0.02 # for effective batch_size=16 lr_warmup_epochs: 0 optimizer: SGD @@ -62,7 +62,7 @@ training_hyperparams: ema: True - loss: cross_entropy + loss: LabelSmoothingCrossEntropyLoss criterion_params: ignore_index: ${cityscapes_ignored_label} diff --git a/src/super_gradients/recipes/cityscapes_segformer.yaml b/src/super_gradients/recipes/cityscapes_segformer.yaml index 60c57ba273..fb4d8bb227 100644 --- a/src/super_gradients/recipes/cityscapes_segformer.yaml +++ b/src/super_gradients/recipes/cityscapes_segformer.yaml @@ -95,7 +95,7 @@ training_hyperparams: sync_bn: True - loss: cross_entropy + loss: LabelSmoothingCrossEntropyLoss criterion_params: ignore_index: ${cityscapes_ignored_label} diff --git a/src/super_gradients/recipes/cityscapes_segformer_b0.yaml b/src/super_gradients/recipes/cityscapes_segformer_b0.yaml index 2415776c9e..a4e54798f5 100644 --- a/src/super_gradients/recipes/cityscapes_segformer_b0.yaml +++ b/src/super_gradients/recipes/cityscapes_segformer_b0.yaml @@ -18,7 +18,7 @@ experiment_name: ${architecture}_cityscapes training_hyperparams: max_epochs: 2 - lr_mode: poly + lr_mode: PolyLRScheduler initial_lr: 0.00006 # for effective batch_size=8 multi_gpu: DDP diff --git a/src/super_gradients/recipes/cityscapes_segformer_b1.yaml b/src/super_gradients/recipes/cityscapes_segformer_b1.yaml index 0bb8b2fef0..7bd9f4a26f 100644 --- a/src/super_gradients/recipes/cityscapes_segformer_b1.yaml +++ b/src/super_gradients/recipes/cityscapes_segformer_b1.yaml @@ -18,7 +18,7 @@ experiment_name: ${architecture}_cityscapes training_hyperparams: max_epochs: 2 - lr_mode: poly + lr_mode: PolyLRScheduler initial_lr: 0.00006 # for effective batch_size=8 multi_gpu: DDP diff --git a/src/super_gradients/recipes/cityscapes_segformer_b2.yaml b/src/super_gradients/recipes/cityscapes_segformer_b2.yaml index 0a0a05ea90..c793c3e1f2 100644 --- a/src/super_gradients/recipes/cityscapes_segformer_b2.yaml +++ b/src/super_gradients/recipes/cityscapes_segformer_b2.yaml @@ -18,7 +18,7 @@ experiment_name: ${architecture}_cityscapes training_hyperparams: max_epochs: 2 - lr_mode: poly + lr_mode: PolyLRScheduler initial_lr: 0.00006 # for effective batch_size=8 multi_gpu: DDP diff --git a/src/super_gradients/recipes/cityscapes_segformer_b3.yaml b/src/super_gradients/recipes/cityscapes_segformer_b3.yaml index dfde685aa3..31245514f1 100644 --- a/src/super_gradients/recipes/cityscapes_segformer_b3.yaml +++ b/src/super_gradients/recipes/cityscapes_segformer_b3.yaml @@ -18,7 +18,7 @@ experiment_name: ${architecture}_cityscapes training_hyperparams: max_epochs: 2 - lr_mode: poly + lr_mode: PolyLRScheduler initial_lr: 0.00006 # for effective batch_size=8 multi_gpu: DDP diff --git a/src/super_gradients/recipes/cityscapes_segformer_b4.yaml b/src/super_gradients/recipes/cityscapes_segformer_b4.yaml index 1c40dcef90..dc82e01c99 100644 --- a/src/super_gradients/recipes/cityscapes_segformer_b4.yaml +++ b/src/super_gradients/recipes/cityscapes_segformer_b4.yaml @@ -18,7 +18,7 @@ experiment_name: ${architecture}_cityscapes training_hyperparams: max_epochs: 2 - lr_mode: poly + lr_mode: PolyLRScheduler initial_lr: 0.00006 # for effective batch_size=8 mixed_precision: True diff --git a/src/super_gradients/recipes/cityscapes_segformer_b5.yaml b/src/super_gradients/recipes/cityscapes_segformer_b5.yaml index eba8aaab15..e812db4fa3 100644 --- a/src/super_gradients/recipes/cityscapes_segformer_b5.yaml +++ b/src/super_gradients/recipes/cityscapes_segformer_b5.yaml @@ -18,7 +18,7 @@ experiment_name: ${architecture}_cityscapes training_hyperparams: max_epochs: 2 - lr_mode: poly + lr_mode: PolyLRScheduler initial_lr: 0.00006 # for effective batch_size=8 mixed_precision: True diff --git a/src/super_gradients/recipes/cityscapes_stdc_seg50.yaml b/src/super_gradients/recipes/cityscapes_stdc_seg50.yaml index 4c0edec4f7..05f565256d 100644 --- a/src/super_gradients/recipes/cityscapes_stdc_seg50.yaml +++ b/src/super_gradients/recipes/cityscapes_stdc_seg50.yaml @@ -62,7 +62,7 @@ checkpoint_params: training_hyperparams: sync_bn: True loss: - dice_ce_edge_loss: + DiceCEEdgeLoss: num_classes: 19 ignore_index: 19 weights: [ 1., 0.6, 0.4, 1. ] diff --git a/src/super_gradients/recipes/cityscapes_stdc_seg75.yaml b/src/super_gradients/recipes/cityscapes_stdc_seg75.yaml index f63ad19072..c5b6ff7b5a 100644 --- a/src/super_gradients/recipes/cityscapes_stdc_seg75.yaml +++ b/src/super_gradients/recipes/cityscapes_stdc_seg75.yaml @@ -68,7 +68,7 @@ training_hyperparams: sync_bn: True loss: - stdc_loss: + STDCLoss: num_classes: 19 ignore_index: 19 mining_percent: 0.0625 # mining percentage is 1/16 of pixels following original implementation. diff --git a/src/super_gradients/recipes/imagenet_resnet50_kd.yaml b/src/super_gradients/recipes/imagenet_resnet50_kd.yaml index 60bb7db496..2bc9109f46 100644 --- a/src/super_gradients/recipes/imagenet_resnet50_kd.yaml +++ b/src/super_gradients/recipes/imagenet_resnet50_kd.yaml @@ -25,7 +25,7 @@ val_dataloader: imagenet_val resume: False training_hyperparams: resume: ${resume} - loss: kd_loss + loss: KDLogitsLoss criterion_params: distillation_loss_coeff: 0.8 task_loss_fn: diff --git a/src/super_gradients/recipes/quantization_params/default_quantization_params.yaml b/src/super_gradients/recipes/quantization_params/default_quantization_params.yaml index 2b08ae0c02..0d9ceb8e36 100644 --- a/src/super_gradients/recipes/quantization_params/default_quantization_params.yaml +++ b/src/super_gradients/recipes/quantization_params/default_quantization_params.yaml @@ -7,7 +7,7 @@ selective_quantizer_params: skip_modules: # optional list of module names (strings) to skip from quantization calib_params: - histogram_calib_method: "percentile" # calibration method for all "histogram" calibrators, acceptable types are ["percentile", "entropy", mse"], "max" calibrators always use "max" + histogram_calib_method: "percentile" # calibration method for all "histogram" calibrators, acceptable types are ["percentile", "entropy", "mse"], "max" calibrators always use "max" percentile: 99.99 # percentile for all histogram calibrators with method "percentile", other calibrators are not affected num_calib_batches: # number of batches to use for calibration, if None, 512 / batch_size will be used verbose: False # if calibrator should be verbose diff --git a/src/super_gradients/recipes/roboflow_ppyoloe.yaml b/src/super_gradients/recipes/roboflow_ppyoloe.yaml index 23a2801a66..c904cf96c5 100644 --- a/src/super_gradients/recipes/roboflow_ppyoloe.yaml +++ b/src/super_gradients/recipes/roboflow_ppyoloe.yaml @@ -47,7 +47,7 @@ training_hyperparams: dataset_name: ${dataset_name} output_path: ${result_path} loss: - ppyoloe_loss: + PPYoloELoss: num_classes: ${num_classes} reg_max: ${arch_params.head.reg_max} diff --git a/src/super_gradients/recipes/roboflow_yolo_nas_m.yaml b/src/super_gradients/recipes/roboflow_yolo_nas_m.yaml index 0c0c43c569..2d6641e801 100644 --- a/src/super_gradients/recipes/roboflow_yolo_nas_m.yaml +++ b/src/super_gradients/recipes/roboflow_yolo_nas_m.yaml @@ -46,7 +46,7 @@ training_hyperparams: zero_weight_decay_on_bias_and_bn: True lr_warmup_epochs: 3 - warmup_mode: linear_epoch_step + warmup_mode: LinearEpochLRWarmup initial_lr: 4e-4 cosine_final_lr_ratio: 0.1 @@ -66,7 +66,7 @@ training_hyperparams: phase_callbacks: [] loss: - ppyoloe_loss: + PPYoloELoss: num_classes: ${num_classes} reg_max: 16 diff --git a/src/super_gradients/recipes/roboflow_yolo_nas_s.yaml b/src/super_gradients/recipes/roboflow_yolo_nas_s.yaml index cf94960756..8fb2baf901 100644 --- a/src/super_gradients/recipes/roboflow_yolo_nas_s.yaml +++ b/src/super_gradients/recipes/roboflow_yolo_nas_s.yaml @@ -46,7 +46,7 @@ training_hyperparams: zero_weight_decay_on_bias_and_bn: True lr_warmup_epochs: 3 - warmup_mode: linear_epoch_step + warmup_mode: LinearEpochLRWarmup initial_lr: 5e-4 cosine_final_lr_ratio: 0.1 @@ -66,7 +66,7 @@ training_hyperparams: phase_callbacks: [] loss: - ppyoloe_loss: + PPYoloELoss: num_classes: ${num_classes} reg_max: 16 diff --git a/src/super_gradients/recipes/supervisely_unet.yaml b/src/super_gradients/recipes/supervisely_unet.yaml index a05b8f8bd0..e5caa4b3b8 100644 --- a/src/super_gradients/recipes/supervisely_unet.yaml +++ b/src/super_gradients/recipes/supervisely_unet.yaml @@ -29,7 +29,7 @@ training_hyperparams: initial_lr: 0.025 loss: - bce_dice_loss: + BCEDiceLoss: loss_weights: [ 1., 1. ] logits: True diff --git a/src/super_gradients/recipes/training_hyperparams/cifar10_resnet_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/cifar10_resnet_train_params.yaml index 55c65dc639..0905ba57ff 100644 --- a/src/super_gradients/recipes/training_hyperparams/cifar10_resnet_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/cifar10_resnet_train_params.yaml @@ -10,10 +10,10 @@ lr_updates: step: 50 lr_decay_factor: 0.1 -lr_mode: step +lr_mode: StepLRScheduler lr_warmup_epochs: 0 initial_lr: 0.1 -loss: cross_entropy +loss: LabelSmoothingCrossEntropyLoss optimizer: SGD criterion_params: {} @@ -34,4 +34,3 @@ valid_metrics_list: # metrics for evaluation - Top5 _convert_: all - diff --git a/src/super_gradients/recipes/training_hyperparams/cityscapes_default_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/cityscapes_default_train_params.yaml index 47f4baaa03..3ceb393b10 100644 --- a/src/super_gradients/recipes/training_hyperparams/cityscapes_default_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/cityscapes_default_train_params.yaml @@ -2,7 +2,7 @@ defaults: - default_train_params max_epochs: 800 -lr_mode: poly +lr_mode: PolyLRScheduler initial_lr: 0.01 # for effective batch_size=32 lr_warmup_epochs: 10 multiply_head_lr: 10. diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_dekr_pose_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_dekr_pose_train_params.yaml index 045d74986d..7ed162ad83 100644 --- a/src/super_gradients/recipes/training_hyperparams/coco2017_dekr_pose_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/coco2017_dekr_pose_train_params.yaml @@ -8,11 +8,11 @@ ema_params: beta: 20 max_epochs: 150 -lr_mode: cosine +lr_mode: CosineLRScheduler cosine_final_lr_ratio: 0.1 batch_accumulate: 1 initial_lr: 1e-3 -loss: dekr_loss +loss: DEKRLoss criterion_params: heatmap_loss: qfl diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_ppyoloe_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_ppyoloe_train_params.yaml index 21f24ec157..b80d8fcb66 100644 --- a/src/super_gradients/recipes/training_hyperparams/coco2017_ppyoloe_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/coco2017_ppyoloe_train_params.yaml @@ -4,13 +4,13 @@ defaults: max_epochs: 500 static_assigner_end_epoch: 150 -warmup_mode: "linear_batch_step" +warmup_mode: LinearBatchLRWarmup warmup_initial_lr: 1e-6 lr_warmup_steps: 1000 lr_warmup_epochs: 0 initial_lr: 2e-3 -lr_mode: cosine +lr_mode: CosineLRScheduler cosine_final_lr_ratio: 0.1 zero_weight_decay_on_bias_and_bn: False @@ -19,7 +19,7 @@ batch_accumulate: 1 save_ckpt_epoch_list: [200, 250, 300, 350, 400, 450] loss: - ppyoloe_loss: + PPYoloELoss: num_classes: ${arch_params.num_classes} reg_max: ${arch_params.head.reg_max} diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_rescoring_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_rescoring_train_params.yaml index 8257edd78e..62ce33e6f2 100644 --- a/src/super_gradients/recipes/training_hyperparams/coco2017_rescoring_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/coco2017_rescoring_train_params.yaml @@ -8,11 +8,11 @@ ema_params: beta: 20 max_epochs: 50 -lr_mode: cosine +lr_mode: CosineLRScheduler cosine_final_lr_ratio: 0.1 batch_accumulate: 1 initial_lr: 0.001 -loss: rescoring_loss +loss: RescoringLoss criterion_params: {} mixed_precision: False diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_ssd_lite_mobilenet_v2_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_ssd_lite_mobilenet_v2_train_params.yaml index f3e29743cb..65239ffa13 100644 --- a/src/super_gradients/recipes/training_hyperparams/coco2017_ssd_lite_mobilenet_v2_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/coco2017_ssd_lite_mobilenet_v2_train_params.yaml @@ -3,11 +3,11 @@ defaults: ema: True max_epochs: 400 -lr_mode: cosine +lr_mode: CosineLRScheduler cosine_final_lr_ratio: 0.01 batch_accumulate: 1 initial_lr: 0.01 -loss: ssd_loss +loss: SSDLoss criterion_params: alpha: 1.0 diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_yolo_nas_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_yolo_nas_train_params.yaml index 83d6ec799b..cf7c7add94 100644 --- a/src/super_gradients/recipes/training_hyperparams/coco2017_yolo_nas_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/coco2017_yolo_nas_train_params.yaml @@ -3,13 +3,13 @@ defaults: max_epochs: 300 -warmup_mode: "linear_batch_step" +warmup_mode: LinearBatchLRWarmup warmup_initial_lr: 1e-6 lr_warmup_steps: 1000 lr_warmup_epochs: 0 initial_lr: 2e-4 -lr_mode: cosine +lr_mode: CosineLRScheduler cosine_final_lr_ratio: 0.1 zero_weight_decay_on_bias_and_bn: True @@ -18,7 +18,7 @@ batch_accumulate: 1 save_ckpt_epoch_list: [100, 200, 250] loss: - ppyoloe_loss: + PPYoloELoss: use_static_assigner: False num_classes: ${arch_params.num_classes} reg_max: 16 diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_yolox_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_yolox_train_params.yaml index cb0df61965..fcc3fa4ba1 100644 --- a/src/super_gradients/recipes/training_hyperparams/coco2017_yolox_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/coco2017_yolox_train_params.yaml @@ -2,7 +2,7 @@ defaults: - default_train_params max_epochs: 300 -lr_mode: cosine +lr_mode: CosineLRScheduler cosine_final_lr_ratio: 0.05 lr_warmup_epochs: 5 lr_cooldown_epochs: 15 @@ -12,7 +12,7 @@ batch_accumulate: 1 save_ckpt_epoch_list: [285] -loss: yolox_loss +loss: YoloXDetectionLoss criterion_params: strides: [8, 16, 32] # output strides of all yolo outputs diff --git a/src/super_gradients/recipes/training_hyperparams/coco_segmentation_shelfnet_lw_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco_segmentation_shelfnet_lw_train_params.yaml index fd644b0913..69e0fe5032 100644 --- a/src/super_gradients/recipes/training_hyperparams/coco_segmentation_shelfnet_lw_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/coco_segmentation_shelfnet_lw_train_params.yaml @@ -3,11 +3,11 @@ defaults: max_epochs: 150 initial_lr: 5e-3 -loss: shelfnet_ohem_loss +loss: ShelfNetOHEMLoss optimizer: SGD mixed_precision: True batch_accumulate: 3 -lr_mode: poly +lr_mode: PolyLRScheduler optimizer_params: momentum: 0.9 weight_decay: 1e-4 diff --git a/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml index a37e6c05f5..0015f58e9d 100644 --- a/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml @@ -13,17 +13,17 @@ resume_from_remote_sg_logger: False # bool (default=False), When true, ckpt_name ckpt_name: ckpt_latest.pth # The checkpoint (.pth file) filename in CKPT_ROOT_DIR/EXPERIMENT_NAME/ to use when resume=True and resume_path=None lr_mode: # Union[str, Mapping] - # when str: Learning rate scheduling policy, one of ['step','poly','cosine','function'] + # when str: Learning rate scheduling policy, one of ["StepLRScheduler", "PolyLRScheduler", "CosineLRScheduler", "ExponentialLRScheduler", "FunctionLRScheduler"] # when Mapping: refers to a torch.optim.lr_scheduler._LRScheduler, following the below API: lr_mode = {LR_SCHEDULER_CLASS_NAME: {**LR_SCHEDULER_KWARGS, "phase": XXX, "metric_name": XXX) -lr_schedule_function: # Learning rate scheduling function to be used when `lr_mode` is 'function'. +lr_schedule_function: # Learning rate scheduling function to be used when `lr_mode` is 'FunctionLRScheduler'. lr_warmup_epochs: 0 # number of epochs for learning rate warm up - see https://arxiv.org/pdf/1706.02677.pdf (Section 2.2). -lr_warmup_steps: 0 # number of warmup steps (Used when warmup_mode=linear_batch_step) +lr_warmup_steps: 0 # number of warmup steps (Used when warmup_mode=LinearBatchLRWarmup) lr_cooldown_epochs: 0 # epochs to cooldown LR (i.e the last epoch from scheduling view point=max_epochs-cooldown) -warmup_initial_lr: # Initial lr for linear_epoch_step/linear_batch_step. When none is given, initial_lr/(warmup_epochs+1) will be used. -step_lr_update_freq: # (float) update frequency in epoch units for computing lr_updates when lr_mode=`step`. -cosine_final_lr_ratio: 0.01 # final learning rate ratio (only relevant when `lr_mode`='cosine') -warmup_mode: linear_epoch_step # learning rate warmup scheme, currently 'linear_epoch_step' and 'linear_batch_step' are supported +warmup_initial_lr: # Initial lr for LinearEpochLRWarmup/LinearBatchLRWarmup. When none is given, initial_lr/(warmup_epochs+1) will be used. +step_lr_update_freq: # (float) update frequency in epoch units for computing lr_updates when lr_mode=`StepLRScheduler`. +cosine_final_lr_ratio: 0.01 # final learning rate ratio (only relevant when `lr_mode`='CosineLRScheduler') +warmup_mode: LinearEpochLRWarmup # learning rate warmup scheme, currently ['LinearEpochLRWarmup', 'LinearEpochLRWarmup', 'LinearBatchLRWarmup'] are supported lr_updates: _target_: super_gradients.training.utils.utils.empty_list # This is a workaround to instantiate a list using _target_. If we would instantiate as "lr_updates: []", diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_efficientnet_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_efficientnet_train_params.yaml index 3f8c1b122c..766b968597 100644 --- a/src/super_gradients/recipes/training_hyperparams/imagenet_efficientnet_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/imagenet_efficientnet_train_params.yaml @@ -2,7 +2,7 @@ defaults: - default_train_params max_epochs: 450 -lr_mode: step +lr_mode: StepLRScheduler step_lr_update_freq: 2.4 initial_lr: 0.016 lr_warmup_epochs: 3 @@ -20,7 +20,7 @@ ema_params: decay: 0.9999 decay_type: constant -loss: cross_entropy +loss: LabelSmoothingCrossEntropyLoss criterion_params: smooth_eps: 0.1 diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv2_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv2_train_params.yaml index a0703b43b1..813ff21a43 100644 --- a/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv2_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv2_train_params.yaml @@ -3,7 +3,7 @@ defaults: max_epochs: 450 -lr_mode: step +lr_mode: StepLRScheduler initial_lr: 0.032 # for total batch-size of 512 lr_decay_factor: 0.973 lr_updates: @@ -20,7 +20,7 @@ optimizer_params: alpha: 0.9 eps: 0.001 -loss: cross_entropy +loss: LabelSmoothingCrossEntropyLoss zero_weight_decay_on_bias_and_bn: True ema: True diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv3_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv3_train_params.yaml index b7aa565199..1dddb79b14 100644 --- a/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv3_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv3_train_params.yaml @@ -2,7 +2,7 @@ defaults: - default_train_params max_epochs: 150 -lr_mode: cosine +lr_mode: CosineLRScheduler initial_lr: 0.1 optimizer: SGD @@ -10,7 +10,7 @@ optimizer_params: weight_decay: 0.00004 lr_warmup_epochs: 5 -loss: cross_entropy +loss: LabelSmoothingCrossEntropyLoss criterion_params: smooth_eps: 0.1 diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_regnetY_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_regnetY_train_params.yaml index ad8d2f498c..b1b90729ea 100644 --- a/src/super_gradients/recipes/training_hyperparams/imagenet_regnetY_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/imagenet_regnetY_train_params.yaml @@ -2,7 +2,7 @@ defaults: - default_train_params max_epochs: 450 -lr_mode: step +lr_mode: StepLRScheduler step_lr_update_freq: 2.4 initial_lr: 0.016 lr_warmup_epochs: 3 @@ -20,7 +20,7 @@ ema_params: decay_type: constant decay: 0.9999 -loss: cross_entropy +loss: LabelSmoothingCrossEntropyLoss criterion_params: smooth_eps: 0.1 diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_repvgg_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_repvgg_train_params.yaml index 215923583e..966aa8b194 100644 --- a/src/super_gradients/recipes/training_hyperparams/imagenet_repvgg_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/imagenet_repvgg_train_params.yaml @@ -2,11 +2,11 @@ defaults: - default_train_params max_epochs: 120 -lr_mode: cosine +lr_mode: CosineLRScheduler initial_lr: 0.1 cosine_final_lr_ratio: 0 -loss: cross_entropy +loss: LabelSmoothingCrossEntropyLoss zero_weight_decay_on_bias_and_bn: True average_best_models: True diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_resnet50_kd_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_resnet50_kd_train_params.yaml index 84c1a09501..6e39f6a4d2 100644 --- a/src/super_gradients/recipes/training_hyperparams/imagenet_resnet50_kd_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/imagenet_resnet50_kd_train_params.yaml @@ -3,7 +3,7 @@ defaults: max_epochs: 610 initial_lr: 5e-3 -lr_mode: cosine +lr_mode: CosineLRScheduler lr_warmup_epochs: 5 lr_cooldown_epochs: 10 ema: True @@ -12,7 +12,7 @@ zero_weight_decay_on_bias_and_bn: True optimizer: Lamb optimizer_params: weight_decay: 0.02 -loss: cross_entropy +loss: LabelSmoothingCrossEntropyLoss train_metrics_list: # metrics for evaluation - Accuracy - Top5 @@ -21,4 +21,4 @@ valid_metrics_list: # metrics for evaluation - Top5 -_convert_: all \ No newline at end of file +_convert_: all diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_resnet50_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_resnet50_train_params.yaml index 11df8abe3c..4dac223ac0 100644 --- a/src/super_gradients/recipes/training_hyperparams/imagenet_resnet50_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/imagenet_resnet50_train_params.yaml @@ -3,13 +3,13 @@ defaults: max_epochs: 400 initial_lr: 0.1 -lr_mode: cosine +lr_mode: CosineLRScheduler lr_warmup_epochs: 5 ema: False save_ckpt_epoch_list: [ 50, 100, 150, 200, 300 ] mixed_precision: True zero_weight_decay_on_bias_and_bn: True -loss: cross_entropy +loss: LabelSmoothingCrossEntropyLoss train_metrics_list: # metrics for evaluation - Accuracy - Top5 @@ -21,4 +21,4 @@ valid_metrics_list: # metrics for evaluation metric_to_watch: Accuracy greater_metric_to_watch_is_better: True -_convert_: all \ No newline at end of file +_convert_: all diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_vit_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_vit_train_params.yaml index 749a1dc921..63598dd9ce 100644 --- a/src/super_gradients/recipes/training_hyperparams/imagenet_vit_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/imagenet_vit_train_params.yaml @@ -3,13 +3,13 @@ defaults: max_epochs: 10 initial_lr: 0.03 -lr_mode: cosine +lr_mode: CosineLRScheduler cosine_final_lr_ratio: 0 lr_warmup_epochs: 1 warmup_initial_lr: 0 -warmup_mode: linear_epoch_step +warmup_mode: LinearEpochLRWarmup ema: False -loss: cross_entropy +loss: LabelSmoothingCrossEntropyLoss clip_grad_norm: 1 optimizer: SGD optimizer_params: diff --git a/src/super_gradients/recipes/training_hyperparams/supervisely_default_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/supervisely_default_train_params.yaml index 7a770dd305..b544a381ad 100644 --- a/src/super_gradients/recipes/training_hyperparams/supervisely_default_train_params.yaml +++ b/src/super_gradients/recipes/training_hyperparams/supervisely_default_train_params.yaml @@ -2,7 +2,7 @@ defaults: - default_train_params max_epochs: 100 -lr_mode: cosine +lr_mode: CosineLRScheduler cosine_final_lr_ratio: 0.01 initial_lr: 0.1 lr_warmup_epochs: 0 diff --git a/src/super_gradients/training/losses/__init__.py b/src/super_gradients/training/losses/__init__.py index f14781c2a2..b0eae2eb2e 100755 --- a/src/super_gradients/training/losses/__init__.py +++ b/src/super_gradients/training/losses/__init__.py @@ -1,6 +1,6 @@ from super_gradients.training.losses.focal_loss import FocalLoss from super_gradients.training.losses.kd_losses import KDLogitsLoss -from super_gradients.training.losses.label_smoothing_cross_entropy_loss import LabelSmoothingCrossEntropyLoss +from super_gradients.training.losses.label_smoothing_cross_entropy_loss import CrossEntropyLoss, LabelSmoothingCrossEntropyLoss from super_gradients.training.losses.r_squared_loss import RSquaredLoss from super_gradients.training.losses.shelfnet_ohem_loss import ShelfNetOHEMLoss from super_gradients.training.losses.shelfnet_semantic_encoding_loss import ShelfNetSemanticEncodingLoss @@ -20,7 +20,7 @@ "LOSSES", "Losses", "FocalLoss", - "LabelSmoothingCrossEntropyLoss", + "CrossEntropyLoss", "ShelfNetOHEMLoss", "ShelfNetSemanticEncodingLoss", "YoloXDetectionLoss", @@ -34,4 +34,5 @@ "DEKRLoss", "STDCLoss", "RescoringLoss", + "LabelSmoothingCrossEntropyLoss", ] diff --git a/src/super_gradients/training/losses/bce_dice_loss.py b/src/super_gradients/training/losses/bce_dice_loss.py index 973dd9b2ca..7539120111 100644 --- a/src/super_gradients/training/losses/bce_dice_loss.py +++ b/src/super_gradients/training/losses/bce_dice_loss.py @@ -7,7 +7,7 @@ from super_gradients.training.losses.dice_loss import BinaryDiceLoss -@register_loss(Losses.BCE_DICE_LOSS) +@register_loss(name=Losses.BCE_DICE_LOSS, deprecated_name="bce_dice_loss") class BCEDiceLoss(torch.nn.Module): """ Binary Cross Entropy + Dice Loss diff --git a/src/super_gradients/training/losses/dekr_loss.py b/src/super_gradients/training/losses/dekr_loss.py index 26698db494..8b2a8ea8b5 100644 --- a/src/super_gradients/training/losses/dekr_loss.py +++ b/src/super_gradients/training/losses/dekr_loss.py @@ -7,7 +7,7 @@ from super_gradients.common.registry.registry import register_loss -@register_loss(Losses.DEKR_LOSS) +@register_loss(name=Losses.DEKR_LOSS, deprecated_name="dekr_loss") class DEKRLoss(nn.Module): """ Implementation of the loss function from the "Bottom-Up Human Pose Estimation Via Disentangled Keypoint Regression" diff --git a/src/super_gradients/training/losses/dice_ce_edge_loss.py b/src/super_gradients/training/losses/dice_ce_edge_loss.py index f7cec313a5..0a0550188e 100644 --- a/src/super_gradients/training/losses/dice_ce_edge_loss.py +++ b/src/super_gradients/training/losses/dice_ce_edge_loss.py @@ -11,7 +11,7 @@ from super_gradients.training.losses.mask_loss import MaskAttentionLoss -@register_loss(Losses.DICE_CE_EDGE_LOSS) +@register_loss(name=Losses.DICE_CE_EDGE_LOSS, deprecated_name="dice_ce_edge_loss") class DiceCEEdgeLoss(_Loss): def __init__( self, diff --git a/src/super_gradients/training/losses/kd_losses.py b/src/super_gradients/training/losses/kd_losses.py index 2d3c1908c9..a42ee2c448 100644 --- a/src/super_gradients/training/losses/kd_losses.py +++ b/src/super_gradients/training/losses/kd_losses.py @@ -15,13 +15,13 @@ def forward(self, student_output, teacher_output): return super(KDklDivLoss, self).forward(torch.log_softmax(student_output, dim=1), torch.softmax(teacher_output, dim=1)) -@register_loss(Losses.KD_LOSS) +@register_loss(name=Losses.KD_LOSS, deprecated_name="kd_loss") class KDLogitsLoss(_Loss): """Knowledge distillation loss, wraps the task loss and distillation loss""" def __init__(self, task_loss_fn: _Loss, distillation_loss_fn: _Loss = KDklDivLoss(), distillation_loss_coeff: float = 0.5): """ - :param task_loss_fn: task loss. E.g., LabelSmoothingCrossEntropyLoss + :param task_loss_fn: task loss. E.g., CrossEntropyLoss :param distillation_loss_fn: distillation loss. E.g., KLDivLoss :param distillation_loss_coeff: """ diff --git a/src/super_gradients/training/losses/label_smoothing_cross_entropy_loss.py b/src/super_gradients/training/losses/label_smoothing_cross_entropy_loss.py index affcbdb6db..f9a1f36476 100755 --- a/src/super_gradients/training/losses/label_smoothing_cross_entropy_loss.py +++ b/src/super_gradients/training/losses/label_smoothing_cross_entropy_loss.py @@ -4,6 +4,7 @@ from super_gradients.common.object_names import Losses from super_gradients.common.registry.registry import register_loss +from super_gradients.common.deprecate import deprecated def onehot(indexes, N=None, ignore_index=None): @@ -83,12 +84,12 @@ def cross_entropy(inputs, target, weight=None, ignore_index=-100, reduction="mea return loss -@register_loss(Losses.CROSS_ENTROPY) -class LabelSmoothingCrossEntropyLoss(nn.CrossEntropyLoss): +@register_loss(name=Losses.CROSS_ENTROPY, deprecated_name="cross_entropy") +class CrossEntropyLoss(nn.CrossEntropyLoss): """CrossEntropyLoss - with ability to recieve distrbution as targets, and optional label smoothing""" def __init__(self, weight=None, ignore_index=-100, reduction="mean", smooth_eps=None, smooth_dist=None, from_logits=True): - super(LabelSmoothingCrossEntropyLoss, self).__init__(weight=weight, ignore_index=ignore_index, reduction=reduction) + super(CrossEntropyLoss, self).__init__(weight=weight, ignore_index=ignore_index, reduction=reduction) self.smooth_eps = smooth_eps self.smooth_dist = smooth_dist self.from_logits = from_logits @@ -109,3 +110,8 @@ def forward(self, input, target, smooth_dist=None): # CHANGED TO THE CURRENT FORMAT- OUR CRITERION FUNCTIONS SHOULD ALL NPW RETURN A TUPLE OF (LOSS_FOR_BACKPROP, ADDITIONAL_ITEMS) # WHERE ADDITIONAL ITEMS ARE TORCH TENSORS OF SIZE (N_ITEMS,...) DETACHED FROM THEIR GRADIENTS FOR LOGGING return loss, loss.unsqueeze(0).detach() + + +@deprecated(deprecated_since="3.2.1", removed_from="3.5.0", target=CrossEntropyLoss) +class LabelSmoothingCrossEntropyLoss(CrossEntropyLoss): + ... diff --git a/src/super_gradients/training/losses/ppyolo_loss.py b/src/super_gradients/training/losses/ppyolo_loss.py index e42588f2e7..8fc851896c 100644 --- a/src/super_gradients/training/losses/ppyolo_loss.py +++ b/src/super_gradients/training/losses/ppyolo_loss.py @@ -628,7 +628,7 @@ def __call__(self, pbox: Tensor, gbox: Tensor, iou_weight=1.0, loc_reweight=None return loss * self.loss_weight -@register_loss(Losses.PPYOLOE_LOSS) +@register_loss(name=Losses.PPYOLOE_LOSS, deprecated_name="ppyoloe_loss") class PPYoloELoss(nn.Module): def __init__( self, diff --git a/src/super_gradients/training/losses/r_squared_loss.py b/src/super_gradients/training/losses/r_squared_loss.py index ece6baa63c..d5c15fad82 100755 --- a/src/super_gradients/training/losses/r_squared_loss.py +++ b/src/super_gradients/training/losses/r_squared_loss.py @@ -9,7 +9,7 @@ from super_gradients.training.utils import convert_to_tensor -@register_loss(Losses.R_SQUARED_LOSS) +@register_loss(name=Losses.R_SQUARED_LOSS, deprecated_name="r_squared_loss") class RSquaredLoss(_Loss): def forward(self, output, target): # FIXME - THIS NEEDS TO BE CHANGED SUCH THAT THIS CLASS INHERETS FROM _Loss (TAKE A LOOK AT YoLoV3DetectionLoss) diff --git a/src/super_gradients/training/losses/rescoring_loss.py b/src/super_gradients/training/losses/rescoring_loss.py index c27acef5b2..3ead9958bb 100644 --- a/src/super_gradients/training/losses/rescoring_loss.py +++ b/src/super_gradients/training/losses/rescoring_loss.py @@ -7,7 +7,7 @@ from super_gradients.common.registry import register_loss -@register_loss(Losses.RESCORING_LOSS) +@register_loss(name=Losses.RESCORING_LOSS, deprecated_name="rescoring_loss") class RescoringLoss(nn.Module): def __init__(self): super().__init__() diff --git a/src/super_gradients/training/losses/seg_kd_loss.py b/src/super_gradients/training/losses/seg_kd_loss.py index 58f9ea3ec8..2791b527d8 100644 --- a/src/super_gradients/training/losses/seg_kd_loss.py +++ b/src/super_gradients/training/losses/seg_kd_loss.py @@ -9,7 +9,7 @@ class SegKDLoss(nn.Module): """ Wrapper loss for semantic segmentation KD. - This loss includes two loss components, `ce_loss` i.e CrossEntropyLoss, and `kd_loss` i.e + This loss includes two loss components, `ce_loss` i.e CrossEntropyLoss, and `KDLogitsLoss` i.e `ChannelWiseKnowledgeDistillationLoss`. """ diff --git a/src/super_gradients/training/losses/shelfnet_ohem_loss.py b/src/super_gradients/training/losses/shelfnet_ohem_loss.py index f699bf83a8..61a06dde52 100755 --- a/src/super_gradients/training/losses/shelfnet_ohem_loss.py +++ b/src/super_gradients/training/losses/shelfnet_ohem_loss.py @@ -5,7 +5,7 @@ from super_gradients.training.losses.ohem_ce_loss import OhemCELoss -@register_loss(Losses.SHELFNET_OHEM_LOSS) +@register_loss(name=Losses.SHELFNET_OHEM_LOSS, deprecated_name="shelfnet_ohem_loss") class ShelfNetOHEMLoss(OhemCELoss): def __init__(self, threshold: float = 0.7, mining_percent: float = 1e-4, ignore_lb: int = 255): """ diff --git a/src/super_gradients/training/losses/shelfnet_semantic_encoding_loss.py b/src/super_gradients/training/losses/shelfnet_semantic_encoding_loss.py index 864788bf26..c630ebb973 100755 --- a/src/super_gradients/training/losses/shelfnet_semantic_encoding_loss.py +++ b/src/super_gradients/training/losses/shelfnet_semantic_encoding_loss.py @@ -6,7 +6,7 @@ from super_gradients.common.registry.registry import register_loss -@register_loss(Losses.SHELFNET_SE_LOSS) +@register_loss(name=Losses.SHELFNET_SE_LOSS, deprecated_name="shelfnet_se_loss") class ShelfNetSemanticEncodingLoss(nn.CrossEntropyLoss): """2D Cross Entropy Loss with Auxilary Loss""" diff --git a/src/super_gradients/training/losses/ssd_loss.py b/src/super_gradients/training/losses/ssd_loss.py index c183c745ce..be1b27e906 100755 --- a/src/super_gradients/training/losses/ssd_loss.py +++ b/src/super_gradients/training/losses/ssd_loss.py @@ -52,7 +52,7 @@ def forward(self, pred_labels, target_labels): return closs -@register_loss(Losses.SSD_LOSS) +@register_loss(name=Losses.SSD_LOSS, deprecated_name="ssd_loss") class SSDLoss(_Loss): """ Implements the loss as the sum of the followings: diff --git a/src/super_gradients/training/losses/stdc_loss.py b/src/super_gradients/training/losses/stdc_loss.py index ad0fa44182..6b0a3375e0 100644 --- a/src/super_gradients/training/losses/stdc_loss.py +++ b/src/super_gradients/training/losses/stdc_loss.py @@ -111,7 +111,7 @@ def forward(self, detail_out: torch.Tensor, detail_target: torch.Tensor): return self.weights[0] * bce_loss + self.weights[1] * dice_loss -@register_loss(Losses.STDC_LOSS) +@register_loss(name=Losses.STDC_LOSS, deprecated_name="stdc_loss") class STDCLoss(_Loss): """ Loss class of STDC-Seg training. diff --git a/src/super_gradients/training/losses/yolox_loss.py b/src/super_gradients/training/losses/yolox_loss.py index f5d6696f1f..b8ffe022d7 100644 --- a/src/super_gradients/training/losses/yolox_loss.py +++ b/src/super_gradients/training/losses/yolox_loss.py @@ -81,7 +81,7 @@ def forward(self, pred, target): return loss -@register_loss(Losses.YOLOX_LOSS) +@register_loss(name=Losses.YOLOX_LOSS, deprecated_name="yolox_loss") class YoloXDetectionLoss(_Loss): """ Calculate YOLOX loss: @@ -626,7 +626,7 @@ def dynamic_k_matching(self, cost, pair_wise_ious, gt_classes, num_gt, fg_mask): return num_fg, gt_matched_classes, pred_ious_this_matching, matched_gt_inds -@register_loss(Losses.YOLOX_FAST_LOSS) +@register_loss(name=Losses.YOLOX_FAST_LOSS, deprecated_name="yolox_fast_loss") class YoloXFastDetectionLoss(YoloXDetectionLoss): """ A completely new implementation of YOLOX loss. diff --git a/src/super_gradients/training/params.py b/src/super_gradients/training/params.py index 21c18d05c6..1388457841 100755 --- a/src/super_gradients/training/params.py +++ b/src/super_gradients/training/params.py @@ -48,7 +48,7 @@ "save_tensorboard_remote": False, # upload tensorboard files to s3 "save_logs_remote": False, }, # upload log files to s3 - "warmup_mode": "linear_step", + "warmup_mode": "LinearEpochLRWarmup", "step_lr_update_freq": None, "lr_updates": [], "clip_grad_norm": None, @@ -100,7 +100,7 @@ "lr_warmup_epochs": {"type": "number", "minimum": 0, "maximum": 10}, "initial_lr": {"type": "number", "exclusiveMinimum": 0, "maximum": 10}, }, - "if": {"properties": {"lr_mode": {"const": "step"}}}, + "if": {"properties": {"lr_mode": {"const": "StepLRScheduler"}}}, "then": {"required": ["lr_updates", "lr_decay_factor"]}, "required": ["max_epochs", "lr_mode", "initial_lr", "loss"], } diff --git a/src/super_gradients/training/pre_launch_callbacks/pre_launch_callbacks.py b/src/super_gradients/training/pre_launch_callbacks/pre_launch_callbacks.py index 486db3f085..cca464fb5f 100644 --- a/src/super_gradients/training/pre_launch_callbacks/pre_launch_callbacks.py +++ b/src/super_gradients/training/pre_launch_callbacks/pre_launch_callbacks.py @@ -300,8 +300,8 @@ def modify_params_for_qat( logger.warning(f"New learning rate: {training_hyperparams['initial_lr']}") logger.warning(f"New weight decay: {training_hyperparams['optimizer_params']['weight_decay']}") # as recommended by pytorch-quantization docs - if get_param(training_hyperparams, "lr_mode") != "cosine": - training_hyperparams["lr_mode"] = "cosine" + if get_param(training_hyperparams, "lr_mode") != "CosineLRScheduler": + training_hyperparams["lr_mode"] = "CosineLRScheduler" training_hyperparams["cosine_final_lr_ratio"] = cosine_final_lr_ratio logger.warning( f"lr_mode will be set to cosine for QAT run instead of {get_param(training_hyperparams, 'lr_mode')} with " diff --git a/src/super_gradients/training/sg_trainer/sg_trainer.py b/src/super_gradients/training/sg_trainer/sg_trainer.py index 8e7943b15d..5f224838f6 100755 --- a/src/super_gradients/training/sg_trainer/sg_trainer.py +++ b/src/super_gradients/training/sg_trainer/sg_trainer.py @@ -773,27 +773,29 @@ def train( - `lr_updates` : list(int) - List of fixed epoch numbers to perform learning rate updates when `lr_mode='step'`. + List of fixed epoch numbers to perform learning rate updates when `lr_mode='StepLRScheduler'`. - `lr_decay_factor` : float - Decay factor to apply to the learning rate at each update when `lr_mode='step'`. + Decay factor to apply to the learning rate at each update when `lr_mode='StepLRScheduler'`. - `lr_mode` : Union[str, Mapping], When str: - Learning rate scheduling policy, one of ['step','poly','cosine','function']. + Learning rate scheduling policy, one of ['StepLRScheduler','PolyLRScheduler','CosineLRScheduler','FunctionLRScheduler']. - 'step' refers to constant updates at epoch numbers passed through `lr_updates`. Each update decays the learning rate by `lr_decay_factor`. + 'StepLRScheduler' refers to constant updates at epoch numbers passed through `lr_updates`. + Each update decays the learning rate by `lr_decay_factor`. - 'cosine' refers to the Cosine Anealing policy as mentioned in https://arxiv.org/abs/1608.03983. + 'CosineLRScheduler' refers to the Cosine Anealing policy as mentioned in https://arxiv.org/abs/1608.03983. The final learning rate ratio is controlled by `cosine_final_lr_ratio` training parameter. - 'poly' refers to the polynomial decrease: in each epoch iteration `self.lr = self.initial_lr * pow((1.0 - (current_iter / max_iter)), 0.9)` + 'PolyLRScheduler' refers to the polynomial decrease: + in each epoch iteration `self.lr = self.initial_lr * pow((1.0 - (current_iter / max_iter)), 0.9)` - 'function' refers to a user-defined learning rate scheduling function, that is passed through `lr_schedule_function`. + 'FunctionLRScheduler' refers to a user-defined learning rate scheduling function, that is passed through `lr_schedule_function`. @@ -828,7 +830,7 @@ def train( - `lr_schedule_function` : Union[callable,None] - Learning rate scheduling function to be used when `lr_mode` is 'function'. + Learning rate scheduling function to be used when `lr_mode` is 'FunctionLRScheduler'. - `warmup_mode`: Union[str, Type[LRCallbackBase], None] @@ -851,7 +853,7 @@ def train( The capping is done to avoid interference of warmup with epoch-based schedulers. - `cosine_final_lr_ratio` : float (default=0.01) - Final learning rate ratio (only relevant when `lr_mode`='cosine'). The cosine starts from initial_lr and reaches + Final learning rate ratio (only relevant when `lr_mode`='CosineLRScheduler'). The cosine starts from initial_lr and reaches initial_lr * cosine_final_lr_ratio in last epoch - `inital_lr` : float @@ -863,13 +865,13 @@ def train( Loss function for training. One of SuperGradient's built in options: - "cross_entropy": LabelSmoothingCrossEntropyLoss, - "mse": MSELoss, - "r_squared_loss": RSquaredLoss, - "detection_loss": YoLoV3DetectionLoss, - "shelfnet_ohem_loss": ShelfNetOHEMLoss, - "shelfnet_se_loss": ShelfNetSemanticEncodingLoss, - "ssd_loss": SSDLoss, + - CrossEntropyLoss, + - MSELoss, + - RSquaredLoss, + - YoLoV3DetectionLoss, + - ShelfNetOHEMLoss, + - ShelfNetSemanticEncodingLoss, + - SSDLoss, or user defined nn.module loss function. @@ -1240,6 +1242,10 @@ def forward(self, inputs, targets): warmup_mode = self.training_params.warmup_mode warmup_callback_cls = None if isinstance(warmup_mode, str): + from super_gradients.common.registry.registry import warn_if_deprecated + + warn_if_deprecated(warmup_mode, LR_WARMUP_CLS_DICT) + warmup_callback_cls = LR_WARMUP_CLS_DICT[warmup_mode] elif isinstance(warmup_mode, type) and issubclass(warmup_mode, LRCallbackBase): warmup_callback_cls = warmup_mode diff --git a/src/super_gradients/training/utils/callbacks/__init__.py b/src/super_gradients/training/utils/callbacks/__init__.py index db705a5d5c..31103bd3ee 100644 --- a/src/super_gradients/training/utils/callbacks/__init__.py +++ b/src/super_gradients/training/utils/callbacks/__init__.py @@ -3,13 +3,13 @@ ModelConversionCheckCallback, DeciLabUploadCallback, LRCallbackBase, - EpochStepWarmupLRCallback, - BatchStepLinearWarmupLRCallback, - StepLRCallback, - ExponentialLRCallback, - PolyLRCallback, - CosineLRCallback, - FunctionLRCallback, + LinearEpochLRWarmup, + LinearBatchLRWarmup, + StepLRScheduler, + ExponentialLRScheduler, + PolyLRScheduler, + CosineLRScheduler, + FunctionLRScheduler, IllegalLRSchedulerMetric, LRSchedulerCallback, MetricsUpdateCallback, @@ -21,6 +21,13 @@ YoloXTrainingStageSwitchCallback, TestLRCallback, TimerCallback, + EpochStepWarmupLRCallback, + BatchStepLinearWarmupLRCallback, + StepLRCallback, + ExponentialLRCallback, + PolyLRCallback, + CosineLRCallback, + FunctionLRCallback, ) from super_gradients.training.utils.callbacks.ppyoloe_switch_callback import PPYoloETrainingStageSwitchCallback from super_gradients.common.object_names import Callbacks, LRSchedulers, LRWarmups @@ -40,13 +47,13 @@ "ModelConversionCheckCallback", "DeciLabUploadCallback", "LRCallbackBase", - "EpochStepWarmupLRCallback", - "BatchStepLinearWarmupLRCallback", - "StepLRCallback", - "ExponentialLRCallback", - "PolyLRCallback", - "CosineLRCallback", - "FunctionLRCallback", + "LinearEpochLRWarmup", + "LinearBatchLRWarmup", + "StepLRScheduler", + "ExponentialLRScheduler", + "PolyLRScheduler", + "CosineLRScheduler", + "FunctionLRScheduler", "IllegalLRSchedulerMetric", "LRSchedulerCallback", "MetricsUpdateCallback", @@ -60,4 +67,11 @@ "TestLRCallback", "PPYoloETrainingStageSwitchCallback", "TimerCallback", + "EpochStepWarmupLRCallback", + "BatchStepLinearWarmupLRCallback", + "StepLRCallback", + "ExponentialLRCallback", + "PolyLRCallback", + "CosineLRCallback", + "FunctionLRCallback", ] diff --git a/src/super_gradients/training/utils/callbacks/callbacks.py b/src/super_gradients/training/utils/callbacks/callbacks.py index 11796e31ab..e0b52fa327 100644 --- a/src/super_gradients/training/utils/callbacks/callbacks.py +++ b/src/super_gradients/training/utils/callbacks/callbacks.py @@ -12,9 +12,9 @@ import onnx import onnxruntime import torch -from deprecated import deprecated from torch.utils.data import DataLoader from torchmetrics import MetricCollection, Metric +from torchvision.utils import draw_segmentation_masks from super_gradients.common.abstractions.abstract_logger import get_logger from super_gradients.common.decorators.factory_decorator import resolve_param @@ -32,7 +32,8 @@ from super_gradients.training.utils.segmentation_utils import BinarySegmentationVisualization from super_gradients.common.environment.checkpoints_dir_utils import get_project_checkpoints_dir_path from super_gradients.training.utils.utils import unwrap_model -from torchvision.utils import draw_segmentation_masks +from super_gradients.common.deprecate import deprecated + logger = get_logger(__name__) @@ -276,8 +277,8 @@ def update_lr(self, optimizer, epoch, batch_idx=None): param_group["lr"] = self.lr -@register_lr_warmup(LRWarmups.LINEAR_EPOCH_STEP) -class EpochStepWarmupLRCallback(LRCallbackBase): +@register_lr_warmup(LRWarmups.LINEAR_EPOCH_STEP, deprecated_name="linear_epoch_step") +class LinearEpochLRWarmup(LRCallbackBase): """ LR scheduling callback for linear step warmup. This scheduler uses a whole epoch as single step. LR climbs from warmup_initial_lr with even steps to initial lr. When warmup_initial_lr is None - LR climb starts from @@ -286,7 +287,7 @@ class EpochStepWarmupLRCallback(LRCallbackBase): """ def __init__(self, **kwargs): - super(EpochStepWarmupLRCallback, self).__init__(Phase.TRAIN_EPOCH_START, **kwargs) + super().__init__(Phase.TRAIN_EPOCH_START, **kwargs) self.warmup_initial_lr = self.training_params.warmup_initial_lr or self.initial_lr / (self.training_params.lr_warmup_epochs + 1) self.warmup_step_size = ( (self.initial_lr - self.warmup_initial_lr) / self.training_params.lr_warmup_epochs if self.training_params.lr_warmup_epochs > 0 else 0 @@ -300,20 +301,23 @@ def is_lr_scheduling_enabled(self, context): return self.training_params.lr_warmup_epochs > 0 and self.training_params.lr_warmup_epochs >= context.epoch -@register_lr_warmup(LRWarmups.LINEAR_STEP) -class LinearStepWarmupLRCallback(EpochStepWarmupLRCallback): - """Deprecated, use EpochStepWarmupLRCallback instead""" +@deprecated(deprecated_since="3.2.1", removed_from="3.5.0", target=LinearEpochLRWarmup) +class EpochStepWarmupLRCallback(LinearEpochLRWarmup): + ... - def __init__(self, **kwargs): - logger.warning( - f"Parameter {LRWarmups.LINEAR_STEP} has been made deprecated and will be removed in the next SG release. " - f"Please use `{LRWarmups.LINEAR_EPOCH_STEP}` instead." - ) - super(LinearStepWarmupLRCallback, self).__init__(**kwargs) +@deprecated(deprecated_since="3.2.1", removed_from="3.5.0", target=LinearEpochLRWarmup) +class LinearLRWarmup(LinearEpochLRWarmup): + ... -@register_lr_warmup(LRWarmups.LINEAR_BATCH_STEP) -class BatchStepLinearWarmupLRCallback(Callback): + +@deprecated(deprecated_since="3.2.1", removed_from="3.5.0", target=LinearEpochLRWarmup) +class LinearStepWarmupLRCallback(LinearEpochLRWarmup): + ... + + +@register_lr_warmup(LRWarmups.LINEAR_BATCH_STEP, deprecated_name="linear_batch_step") +class LinearBatchLRWarmup(Callback): """ LR scheduling callback for linear step warmup on each batch step. LR climbs from warmup_initial_lr with to initial lr. @@ -339,7 +343,7 @@ def __init__( :param kwargs: """ - super(BatchStepLinearWarmupLRCallback, self).__init__() + super().__init__() if lr_warmup_steps > train_loader_len: logger.warning( @@ -384,16 +388,21 @@ def update_lr(self, optimizer, epoch, batch_idx=None): param_group["lr"] = self.lr -@register_lr_scheduler(LRSchedulers.STEP) -class StepLRCallback(LRCallbackBase): +@deprecated(deprecated_since="3.2.1", removed_from="3.5.0", target=LinearBatchLRWarmup) +class BatchStepLinearWarmupLRCallback(LinearBatchLRWarmup): + ... + + +@register_lr_scheduler(LRSchedulers.STEP, deprecated_name="step") +class StepLRScheduler(LRCallbackBase): """ Hard coded step learning rate scheduling (i.e at specific milestones). """ def __init__(self, lr_updates, lr_decay_factor, step_lr_update_freq=None, **kwargs): - super(StepLRCallback, self).__init__(Phase.TRAIN_EPOCH_END, **kwargs) + super().__init__(Phase.TRAIN_EPOCH_END, **kwargs) if step_lr_update_freq and len(lr_updates): - raise ValueError("Only one of [lr_updates, step_lr_update_freq] should be passed to StepLRCallback constructor") + raise ValueError("Only one of [lr_updates, step_lr_update_freq] should be passed to StepLRScheduler constructor") if step_lr_update_freq: max_epochs = self.training_params.max_epochs - self.training_params.lr_cooldown_epochs @@ -415,8 +424,13 @@ def is_lr_scheduling_enabled(self, context): return self.training_params.lr_warmup_epochs <= context.epoch -@register_lr_scheduler(LRSchedulers.EXP) -class ExponentialLRCallback(LRCallbackBase): +@deprecated(deprecated_since="3.2.1", removed_from="3.5.0", target=StepLRScheduler) +class StepLRCallback(StepLRScheduler): + ... + + +@register_lr_scheduler(LRSchedulers.EXP, deprecated_name="exp") +class ExponentialLRScheduler(LRCallbackBase): """ Exponential decay learning rate scheduling. Decays the learning rate by `lr_decay_factor` every epoch. """ @@ -436,14 +450,19 @@ def is_lr_scheduling_enabled(self, context): return self.training_params.lr_warmup_epochs <= context.epoch < post_warmup_epochs -@register_lr_scheduler(LRSchedulers.POLY) -class PolyLRCallback(LRCallbackBase): +@deprecated(deprecated_since="3.2.1", removed_from="3.5.0", target=ExponentialLRScheduler) +class ExponentialLRCallback(ExponentialLRScheduler): + ... + + +@register_lr_scheduler(LRSchedulers.POLY, deprecated_name="poly") +class PolyLRScheduler(LRCallbackBase): """ Hard coded polynomial decay learning rate scheduling (i.e at specific milestones). """ def __init__(self, max_epochs, **kwargs): - super(PolyLRCallback, self).__init__(Phase.TRAIN_BATCH_STEP, **kwargs) + super().__init__(Phase.TRAIN_BATCH_STEP, **kwargs) self.max_epochs = max_epochs def perform_scheduling(self, context): @@ -459,14 +478,19 @@ def is_lr_scheduling_enabled(self, context): return self.training_params.lr_warmup_epochs <= context.epoch < post_warmup_epochs -@register_lr_scheduler(LRSchedulers.COSINE) -class CosineLRCallback(LRCallbackBase): +@deprecated(deprecated_since="3.2.1", removed_from="3.5.0", target=PolyLRScheduler) +class PolyLRCallback(PolyLRScheduler): + ... + + +@register_lr_scheduler(LRSchedulers.COSINE, deprecated_name="cosine") +class CosineLRScheduler(LRCallbackBase): """ Hard coded step Cosine anealing learning rate scheduling. """ def __init__(self, max_epochs, cosine_final_lr_ratio, **kwargs): - super(CosineLRCallback, self).__init__(Phase.TRAIN_BATCH_STEP, **kwargs) + super().__init__(Phase.TRAIN_BATCH_STEP, **kwargs) self.max_epochs = max_epochs self.cosine_final_lr_ratio = cosine_final_lr_ratio @@ -497,15 +521,20 @@ def compute_learning_rate(cls, step: Union[float, np.ndarray], total_steps: floa return lr * (1 - final_lr_ratio) + (initial_lr * final_lr_ratio) -@register_lr_scheduler(LRSchedulers.FUNCTION) -class FunctionLRCallback(LRCallbackBase): +@deprecated(deprecated_since="3.2.1", removed_from="3.5.0", target=CosineLRScheduler) +class CosineLRCallback(CosineLRScheduler): + ... + + +@register_lr_scheduler(LRSchedulers.FUNCTION, deprecated_name="function") +class FunctionLRScheduler(LRCallbackBase): """ Hard coded rate scheduling for user defined lr scheduling function. """ - @deprecated(version="3.2.0", reason="This callback is deprecated and will be removed in future versions.") + @deprecated(deprecated_since="3.2.0", removed_from="3.5.0", reason="This callback is deprecated and will be removed in future versions.") def __init__(self, max_epochs, lr_schedule_function, **kwargs): - super(FunctionLRCallback, self).__init__(Phase.TRAIN_BATCH_STEP, **kwargs) + super().__init__(Phase.TRAIN_BATCH_STEP, **kwargs) assert callable(lr_schedule_function), "self.lr_function must be callable" self.lr_schedule_function = lr_schedule_function self.max_epochs = max_epochs @@ -527,6 +556,11 @@ def perform_scheduling(self, context): self.update_lr(context.optimizer, context.epoch, context.batch_idx) +@deprecated(deprecated_since="3.2.1", removed_from="3.5.0", target=FunctionLRScheduler) +class FunctionLRCallback(FunctionLRScheduler): + ... + + class IllegalLRSchedulerMetric(Exception): """Exception raised illegal combination of training parameters. @@ -924,16 +958,18 @@ def create_lr_scheduler_callback( When str: - Learning rate scheduling policy, one of ['step','poly','cosine','function']. + Learning rate scheduling policy, one of ['StepLRScheduler','PolyLRScheduler','CosineLRScheduler','FunctionLRScheduler']. - 'step' refers to constant updates at epoch numbers passed through `lr_updates`. Each update decays the learning rate by `lr_decay_factor`. + 'StepLRScheduler' refers to constant updates at epoch numbers passed through `lr_updates`. + Each update decays the learning rate by `lr_decay_factor`. - 'cosine' refers to the Cosine Anealing policy as mentioned in https://arxiv.org/abs/1608.03983. + 'CosineLRScheduler' refers to the Cosine Anealing policy as mentioned in https://arxiv.org/abs/1608.03983. The final learning rate ratio is controlled by `cosine_final_lr_ratio` training parameter. - 'poly' refers to the polynomial decrease: in each epoch iteration `self.lr = self.initial_lr * pow((1.0 - (current_iter / max_iter)), 0.9)` + 'PolyLRScheduler' refers to the polynomial decrease: + in each epoch iteration `self.lr = self.initial_lr * pow((1.0 - (current_iter / max_iter)), 0.9)` - 'function' refers to a user-defined learning rate scheduling function, that is passed through `lr_schedule_function`. + 'FunctionLRScheduler' refers to a user-defined learning rate scheduling function, that is passed through `lr_schedule_function`. @@ -1339,7 +1375,7 @@ class ExtremeBatchSegVisualizationCallback(ExtremeBatchCaseVisualizationCallback max=False ignore_idx=19), ExtremeBatchSegVisualizationCallback( - loss_to_monitor="LabelSmoothingCrossEntropyLoss" + loss_to_monitor="CrossEntropyLoss" max=True ignore_idx=19)] ...} diff --git a/src/super_gradients/training/utils/deprecated_utils.py b/src/super_gradients/training/utils/deprecated_utils.py index 433615d0e1..8da5257139 100644 --- a/src/super_gradients/training/utils/deprecated_utils.py +++ b/src/super_gradients/training/utils/deprecated_utils.py @@ -10,14 +10,14 @@ def wrap_with_warning(cls: Callable, message: str) -> Any: Emits a warning when target class of function is called. >>> from super_gradients.training.utils.deprecated_utils import wrap_with_warning - >>> from super_gradients.training.utils.callbacks import EpochStepWarmupLRCallback, BatchStepLinearWarmupLRCallback + >>> from super_gradients.training.utils.callbacks import LinearEpochLRWarmup, LinearBatchLRWarmup >>> >>> LR_WARMUP_CLS_DICT = { >>> "linear": wrap_with_warning( - >>> EpochStepWarmupLRCallback, + >>> LinearEpochLRWarmup, >>> message=f"Parameter `linear` has been made deprecated and will be removed in the next SG release. Please use `linear_epoch` instead", >>> ), - >>> 'linear_epoch`': EpochStepWarmupLRCallback, + >>> 'linear_epoch`': LinearEpochLRWarmup, >>> } :param cls: A class or function to wrap diff --git a/tests/end_to_end_tests/cifar_trainer_test.py b/tests/end_to_end_tests/cifar_trainer_test.py index 7b91a59cba..00398ddbd0 100644 --- a/tests/end_to_end_tests/cifar_trainer_test.py +++ b/tests/end_to_end_tests/cifar_trainer_test.py @@ -25,7 +25,7 @@ def test_train_cifar10_dataloader(self): training_params={ "max_epochs": 1, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "train_metrics_list": ["Accuracy"], "valid_metrics_list": ["Accuracy"], "metric_to_watch": "Accuracy", @@ -44,7 +44,7 @@ def test_train_cifar100_dataloader(self): training_params={ "max_epochs": 1, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "train_metrics_list": ["Accuracy"], "valid_metrics_list": ["Accuracy"], "metric_to_watch": "Accuracy", diff --git a/tests/end_to_end_tests/trainer_test.py b/tests/end_to_end_tests/trainer_test.py index 122f1e7cca..edfb4ec486 100644 --- a/tests/end_to_end_tests/trainer_test.py +++ b/tests/end_to_end_tests/trainer_test.py @@ -25,8 +25,8 @@ def setUp(cls): "lr_decay_factor": 0.1, "initial_lr": 0.1, "lr_updates": [4], - "lr_mode": "step", - "loss": "cross_entropy", + "lr_mode": "StepLRScheduler", + "loss": "CrossEntropyLoss", "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], "metric_to_watch": "Accuracy", diff --git a/tests/integration_tests/conversion_callback_test.py b/tests/integration_tests/conversion_callback_test.py index 709287b4fc..22b01512cf 100644 --- a/tests/integration_tests/conversion_callback_test.py +++ b/tests/integration_tests/conversion_callback_test.py @@ -54,10 +54,10 @@ def test_classification_architectures(self): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "train_metrics_list": [Accuracy(), Top5()], @@ -90,7 +90,7 @@ def get_architecture_custom_config(architecture_name: str): } elif re.search(r"regseg", architecture_name): return { - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", } else: raise Exception("You tried to run a conversion test on an unknown architecture") @@ -107,7 +107,7 @@ def get_architecture_custom_config(architecture_name: str): train_params = { "max_epochs": 3, "initial_lr": 1e-2, - "lr_mode": "poly", + "lr_mode": "PolyLRScheduler", "ema": True, # unlike the paper (not specified in paper) "optimizer": "SGD", "optimizer_params": {"weight_decay": 5e-4, "momentum": 0.9}, diff --git a/tests/integration_tests/deci_lab_export_test.py b/tests/integration_tests/deci_lab_export_test.py index 3130b9d785..50e6132d2e 100644 --- a/tests/integration_tests/deci_lab_export_test.py +++ b/tests/integration_tests/deci_lab_export_test.py @@ -44,10 +44,10 @@ def test_train_with_deci_lab_integration(self): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": self.optimizer, "criterion_params": {}, "train_metrics_list": [Accuracy(), Top5()], diff --git a/tests/integration_tests/ema_train_integration_test.py b/tests/integration_tests/ema_train_integration_test.py index 777e5b319c..3bca4b3204 100644 --- a/tests/integration_tests/ema_train_integration_test.py +++ b/tests/integration_tests/ema_train_integration_test.py @@ -49,11 +49,11 @@ def _train(self, ema_params): training_params = { "max_epochs": 4, "lr_updates": [4], - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_decay_factor": 0.1, "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "ema": True, diff --git a/tests/integration_tests/lr_test.py b/tests/integration_tests/lr_test.py index 82bdaec94d..1b7a4ce245 100644 --- a/tests/integration_tests/lr_test.py +++ b/tests/integration_tests/lr_test.py @@ -19,7 +19,7 @@ def setUp(cls): "max_epochs": 1, "silent_mode": True, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], "metric_to_watch": "Accuracy", @@ -45,12 +45,12 @@ def test_lr_function(initial_lr, epoch, iter, max_epoch, iters_per_epoch, **kwar return initial_lr * (1 - ((epoch * iters_per_epoch + iter) / (max_epoch * iters_per_epoch))) # test if we are able that lr_function supports functions with this structure - training_params = {**self.training_params, "lr_mode": "function", "lr_schedule_function": test_lr_function} + training_params = {**self.training_params, "lr_mode": "FunctionLRScheduler", "lr_schedule_function": test_lr_function} trainer.train( model=model, training_params=training_params, train_loader=classification_test_dataloader(), valid_loader=classification_test_dataloader() ) # test that we assert lr_function is callable - training_params = {**self.training_params, "lr_mode": "function"} + training_params = {**self.training_params, "lr_mode": "FunctionLRScheduler"} with self.assertRaises(AssertionError): trainer.train( model=model, training_params=training_params, train_loader=classification_test_dataloader(), valid_loader=classification_test_dataloader() @@ -58,14 +58,14 @@ def test_lr_function(initial_lr, epoch, iter, max_epoch, iters_per_epoch, **kwar def test_cosine_lr(self): trainer, model = self.get_trainer(self.folder_name) - training_params = {**self.training_params, "lr_mode": "cosine", "cosine_final_lr_ratio": 0.01} + training_params = {**self.training_params, "lr_mode": "CosineLRScheduler", "cosine_final_lr_ratio": 0.01} trainer.train( model=model, training_params=training_params, train_loader=classification_test_dataloader(), valid_loader=classification_test_dataloader() ) def test_step_lr(self): trainer, model = self.get_trainer(self.folder_name) - training_params = {**self.training_params, "lr_mode": "step", "lr_decay_factor": 0.1, "lr_updates": [4]} + training_params = {**self.training_params, "lr_mode": "StepLRScheduler", "lr_decay_factor": 0.1, "lr_updates": [4]} trainer.train( model=model, training_params=training_params, train_loader=classification_test_dataloader(), valid_loader=classification_test_dataloader() ) diff --git a/tests/integration_tests/pretrained_models_test.py b/tests/integration_tests/pretrained_models_test.py index fbf7abd96f..2ef2b7b23d 100644 --- a/tests/integration_tests/pretrained_models_test.py +++ b/tests/integration_tests/pretrained_models_test.py @@ -86,8 +86,8 @@ def setUp(self) -> None: "lr_updates": [1], "lr_decay_factor": 0.1, "initial_lr": 0.6, - "loss": "cross_entropy", - "lr_mode": "step", + "loss": "CrossEntropyLoss", + "lr_mode": "StepLRScheduler", "optimizer_params": {"weight_decay": 0.000, "momentum": 0.9}, "train_metrics_list": [Accuracy()], "valid_metrics_list": [Accuracy()], @@ -128,12 +128,12 @@ def setUp(self) -> None: ssd_dboxes = DEFAULT_SSD_LITE_MOBILENET_V2_ARCH_PARAMS["heads"]["SSDHead"]["anchors"] self.transfer_detection_train_params_ssd = { "max_epochs": 3, - "lr_mode": "cosine", + "lr_mode": "CosineLRScheduler", "initial_lr": 0.01, "cosine_final_lr_ratio": 0.01, "lr_warmup_epochs": 3, "batch_accumulate": 1, - "loss": "ssd_loss", + "loss": "SSDLoss", "criterion_params": {"dboxes": ssd_dboxes}, "optimizer": "SGD", "warmup_momentum": 0.8, @@ -145,12 +145,12 @@ def setUp(self) -> None: } self.transfer_detection_train_params_yolox = { "max_epochs": 3, - "lr_mode": "cosine", + "lr_mode": "CosineLRScheduler", "cosine_final_lr_ratio": 0.05, "warmup_bias_lr": 0.0, "warmup_momentum": 0.9, "initial_lr": 0.02, - "loss": "yolox_loss", + "loss": "YoloXDetectionLoss", "criterion_params": {"strides": [8, 16, 32], "num_classes": 5}, # output strides of all yolo outputs "train_metrics_list": [], "valid_metrics_list": [DetectionMetrics(post_prediction_callback=YoloXPostPredictionCallback(), normalize_targets=True, num_cls=5)], @@ -215,7 +215,7 @@ def setUp(self) -> None: "max_epochs": 3, "initial_lr": 1e-2, "loss": DDRNetLoss(), - "lr_mode": "poly", + "lr_mode": "PolyLRScheduler", "ema": True, # unlike the paper (not specified in paper) "average_best_models": True, "optimizer": "SGD", @@ -232,7 +232,7 @@ def setUp(self) -> None: "max_epochs": 3, "initial_lr": 1e-2, "loss": STDCLoss(num_classes=5), - "lr_mode": "poly", + "lr_mode": "PolyLRScheduler", "ema": True, # unlike the paper (not specified in paper) "optimizer": "SGD", "optimizer_params": {"weight_decay": 5e-4, "momentum": 0.9}, @@ -246,8 +246,8 @@ def setUp(self) -> None: self.regseg_transfer_segmentation_train_params = { "max_epochs": 3, "initial_lr": 1e-2, - "loss": "cross_entropy", - "lr_mode": "poly", + "loss": "CrossEntropyLoss", + "lr_mode": "PolyLRScheduler", "ema": True, # unlike the paper (not specified in paper) "optimizer": "SGD", "optimizer_params": {"weight_decay": 5e-4, "momentum": 0.9}, diff --git a/tests/recipe_training_tests/coded_qat_launch_test.py b/tests/recipe_training_tests/coded_qat_launch_test.py index e5bb8531c1..243d78cd53 100644 --- a/tests/recipe_training_tests/coded_qat_launch_test.py +++ b/tests/recipe_training_tests/coded_qat_launch_test.py @@ -17,10 +17,10 @@ def test_qat_launch(self): "max_epochs": 10, "lr_updates": [], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, @@ -78,10 +78,10 @@ def test_ptq_launch(self): "max_epochs": 10, "lr_updates": [], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, diff --git a/tests/unit_tests/dataset_statistics_test.py b/tests/unit_tests/dataset_statistics_test.py index b3881b4c9a..f68fbb562e 100644 --- a/tests/unit_tests/dataset_statistics_test.py +++ b/tests/unit_tests/dataset_statistics_test.py @@ -24,9 +24,9 @@ def test_dataset_statistics_tensorboard_logger(self): training_params = { "max_epochs": 1, # we dont really need the actual training to run - "lr_mode": "cosine", + "lr_mode": "CosineLRScheduler", "initial_lr": 0.01, - "loss": "yolox_loss", + "loss": "YoloXDetectionLoss", "criterion_params": {"strides": [8, 16, 32], "num_classes": 80}, "dataset_statistics": True, "launch_tensorboard": True, diff --git a/tests/unit_tests/detection_dataset_test.py b/tests/unit_tests/detection_dataset_test.py index 6e6efdd523..cb38faa7bb 100644 --- a/tests/unit_tests/detection_dataset_test.py +++ b/tests/unit_tests/detection_dataset_test.py @@ -168,12 +168,12 @@ def test_coco_detection_metrics_with_classwise_ap(self): detection_train_params_yolox = { "max_epochs": 5, - "lr_mode": "cosine", + "lr_mode": "CosineLRScheduler", "cosine_final_lr_ratio": 0.05, "warmup_bias_lr": 0.0, "warmup_momentum": 0.9, "initial_lr": 0.02, - "loss": "yolox_loss", + "loss": "YoloXDetectionLoss", "mixed_precision": False, "criterion_params": {"strides": [8, 16, 32], "num_classes": 80}, # output strides of all yolo outputs "train_metrics_list": [], diff --git a/tests/unit_tests/double_training_test.py b/tests/unit_tests/double_training_test.py index b556aaabc2..4a9ab0b265 100644 --- a/tests/unit_tests/double_training_test.py +++ b/tests/unit_tests/double_training_test.py @@ -24,7 +24,7 @@ def test_call_train_twice(self): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, "loss": torch.nn.CrossEntropyLoss(), diff --git a/tests/unit_tests/early_stop_test.py b/tests/unit_tests/early_stop_test.py index 1feeb6a9df..2082d2fd73 100644 --- a/tests/unit_tests/early_stop_test.py +++ b/tests/unit_tests/early_stop_test.py @@ -49,10 +49,10 @@ def setUp(self) -> None: "max_epochs": self.max_epochs, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, diff --git a/tests/unit_tests/extreme_batch_cb_test.py b/tests/unit_tests/extreme_batch_cb_test.py index b4692b6274..26bfd636a2 100644 --- a/tests/unit_tests/extreme_batch_cb_test.py +++ b/tests/unit_tests/extreme_batch_cb_test.py @@ -40,7 +40,7 @@ def setUpClass(cls): "max_epochs": 3, "initial_lr": 1e-2, "loss": DDRNetLoss(), - "lr_mode": "poly", + "lr_mode": "PolyLRScheduler", "ema": True, "optimizer": "SGD", "mixed_precision": False, @@ -56,7 +56,7 @@ def setUpClass(cls): "max_epochs": 3, "initial_lr": 1e-2, "loss": PPYoloELoss(num_classes=1, use_static_assigner=False, reg_max=16), - "lr_mode": "poly", + "lr_mode": "PolyLRScheduler", "ema": True, "optimizer": "SGD", "mixed_precision": False, diff --git a/tests/unit_tests/factories_test.py b/tests/unit_tests/factories_test.py index c0def96302..e3b7babba0 100644 --- a/tests/unit_tests/factories_test.py +++ b/tests/unit_tests/factories_test.py @@ -8,7 +8,7 @@ from super_gradients.common.object_names import Models from super_gradients.training import models from super_gradients.training.dataloaders.dataloaders import classification_test_dataloader -from super_gradients.training.losses import LabelSmoothingCrossEntropyLoss +from super_gradients.training.losses import CrossEntropyLoss from super_gradients.training.metrics import Accuracy, Top5 from torch import nn @@ -21,10 +21,10 @@ def test_training_with_factories(self): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "torch.optim.ASGD", # use an optimizer by factory "criterion_params": {}, "optimizer_params": {"lambd": 0.0001, "alpha": 0.75}, @@ -47,7 +47,7 @@ def test_training_with_factories_with_typos(self): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, "loss": "crossEnt_ropy", @@ -64,7 +64,7 @@ def test_training_with_factories_with_typos(self): self.assertIsInstance(trainer.train_metrics.Accuracy, Accuracy) self.assertIsInstance(trainer.valid_metrics.Top5, Top5) self.assertIsInstance(trainer.optimizer, torch.optim.Adam) - self.assertIsInstance(trainer.criterion, LabelSmoothingCrossEntropyLoss) + self.assertIsInstance(trainer.criterion, CrossEntropyLoss) def test_activations_factory(self): class DummyModel(nn.Module): diff --git a/tests/unit_tests/forward_pass_prep_fn_test.py b/tests/unit_tests/forward_pass_prep_fn_test.py index 97c8a27af7..57ccf27f69 100644 --- a/tests/unit_tests/forward_pass_prep_fn_test.py +++ b/tests/unit_tests/forward_pass_prep_fn_test.py @@ -38,11 +38,11 @@ def test_resizing_with_forward_pass_prep_fn(self): train_params = { "max_epochs": 2, "cosine_final_lr_ratio": 0.2, - "lr_mode": "cosine", + "lr_mode": "CosineLRScheduler", "lr_cooldown_epochs": 2, "lr_warmup_epochs": 3, "initial_lr": 1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, diff --git a/tests/unit_tests/initialize_with_dataloaders_test.py b/tests/unit_tests/initialize_with_dataloaders_test.py index 6d9c14034d..9853431498 100644 --- a/tests/unit_tests/initialize_with_dataloaders_test.py +++ b/tests/unit_tests/initialize_with_dataloaders_test.py @@ -35,9 +35,9 @@ def test_train_with_dataloaders(self): "max_epochs": 2, "lr_updates": [5, 6, 12], "lr_decay_factor": 0.01, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "initial_lr": 0.01, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "optimizer_params": {"weight_decay": 1e-5, "momentum": 0.9}, "train_metrics_list": [Accuracy()], diff --git a/tests/unit_tests/kd_ema_test.py b/tests/unit_tests/kd_ema_test.py index 1f59084fe7..bbdf9164bd 100644 --- a/tests/unit_tests/kd_ema_test.py +++ b/tests/unit_tests/kd_ema_test.py @@ -20,7 +20,7 @@ def setUp(cls): "max_epochs": 3, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, "loss": KDLogitsLoss(torch.nn.CrossEntropyLoss()), diff --git a/tests/unit_tests/kd_trainer_test.py b/tests/unit_tests/kd_trainer_test.py index 3b866e7b2a..98b3a37f3f 100644 --- a/tests/unit_tests/kd_trainer_test.py +++ b/tests/unit_tests/kd_trainer_test.py @@ -42,7 +42,7 @@ def setUp(cls): "max_epochs": 3, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, "loss": KDLogitsLoss(torch.nn.CrossEntropyLoss()), diff --git a/tests/unit_tests/load_ema_ckpt_test.py b/tests/unit_tests/load_ema_ckpt_test.py index b070c8d862..c1d1fe1d98 100644 --- a/tests/unit_tests/load_ema_ckpt_test.py +++ b/tests/unit_tests/load_ema_ckpt_test.py @@ -23,10 +23,10 @@ def setUp(self) -> None: "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, diff --git a/tests/unit_tests/local_ckpt_head_replacement_test.py b/tests/unit_tests/local_ckpt_head_replacement_test.py index 8ba7371683..0d100e364a 100644 --- a/tests/unit_tests/local_ckpt_head_replacement_test.py +++ b/tests/unit_tests/local_ckpt_head_replacement_test.py @@ -14,10 +14,10 @@ def test_local_ckpt_head_replacement(self): "max_epochs": 1, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, diff --git a/tests/unit_tests/loss_loggings_test.py b/tests/unit_tests/loss_loggings_test.py index 54d476f0ad..5294885bd1 100644 --- a/tests/unit_tests/loss_loggings_test.py +++ b/tests/unit_tests/loss_loggings_test.py @@ -35,7 +35,7 @@ def test_single_item_logging(self): "max_epochs": 1, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, "loss": torch.nn.CrossEntropyLoss(), @@ -59,7 +59,7 @@ def test_multiple_unnamed_components_loss_logging(self): "max_epochs": 1, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, "loss": CriterionWithUnnamedComponents(), @@ -83,7 +83,7 @@ def test_multiple_named_components_loss_logging(self): "max_epochs": 1, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, "loss": CriterionWithNamedComponents(), diff --git a/tests/unit_tests/lr_cooldown_test.py b/tests/unit_tests/lr_cooldown_test.py index 2f04d56d5b..668bc0c74f 100644 --- a/tests/unit_tests/lr_cooldown_test.py +++ b/tests/unit_tests/lr_cooldown_test.py @@ -19,11 +19,11 @@ def test_lr_cooldown_with_lr_scheduling(self): train_params = { "max_epochs": 7, "cosine_final_lr_ratio": 0.2, - "lr_mode": "cosine", + "lr_mode": "CosineLRScheduler", "lr_cooldown_epochs": 2, "lr_warmup_epochs": 3, "initial_lr": 1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, diff --git a/tests/unit_tests/lr_warmup_test.py b/tests/unit_tests/lr_warmup_test.py index c8473cdb53..2521090499 100644 --- a/tests/unit_tests/lr_warmup_test.py +++ b/tests/unit_tests/lr_warmup_test.py @@ -6,7 +6,7 @@ from super_gradients.training.dataloaders.dataloaders import classification_test_dataloader from super_gradients.training.metrics import Accuracy from super_gradients.training.models import LeNet -from super_gradients.training.utils.callbacks import TestLRCallback, LRCallbackBase, Phase, Callback, PhaseContext, CosineLRCallback +from super_gradients.training.utils.callbacks import TestLRCallback, LRCallbackBase, Phase, Callback, PhaseContext, CosineLRScheduler class CollectLRCallback(Callback): @@ -58,10 +58,10 @@ def test_lr_warmup(self): "max_epochs": 5, "lr_updates": [], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 3, "initial_lr": 1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, @@ -71,7 +71,7 @@ def test_lr_warmup(self): "greater_metric_to_watch_is_better": True, "ema": False, "phase_callbacks": phase_callbacks, - "warmup_mode": "linear_epoch_step", + "warmup_mode": "LinearEpochLRWarmup", } expected_lrs = [0.25, 0.5, 0.75, 1.0, 1.0] @@ -94,10 +94,10 @@ def test_lr_warmup_with_lr_scheduling(self): train_params = { "max_epochs": 5, "cosine_final_lr_ratio": 0.2, - "lr_mode": "cosine", + "lr_mode": "CosineLRScheduler", "lr_warmup_epochs": 3, "initial_lr": 1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, @@ -107,7 +107,7 @@ def test_lr_warmup_with_lr_scheduling(self): "greater_metric_to_watch_is_better": True, "ema": False, "phase_callbacks": phase_callbacks, - "warmup_mode": "linear_epoch_step", + "warmup_mode": "LinearEpochLRWarmup", } expected_lrs = [0.25, 0.5, 0.75, 0.9236067977499791, 0.4763932022500211] @@ -137,13 +137,13 @@ def test_warmup_linear_batch_step(self): train_params = { "max_epochs": max_epochs, - "lr_mode": "cosine", + "lr_mode": "CosineLRScheduler", "cosine_final_lr_ratio": cosine_final_lr_ratio, "warmup_initial_lr": warmup_initial_lr, - "warmup_mode": "linear_batch_step", + "warmup_mode": "LinearBatchLRWarmup", "lr_warmup_steps": lr_warmup_steps, "initial_lr": 1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, @@ -161,7 +161,7 @@ def test_warmup_linear_batch_step(self): expected_warmup_lrs = np.linspace(warmup_initial_lr, initial_lr, lr_warmup_steps).tolist() total_steps = max_epochs * len(train_loader) - lr_warmup_steps - expected_cosine_lrs = CosineLRCallback.compute_learning_rate( + expected_cosine_lrs = CosineLRScheduler.compute_learning_rate( step=np.arange(0, total_steps), total_steps=total_steps, initial_lr=initial_lr, final_lr_ratio=cosine_final_lr_ratio ) @@ -186,11 +186,11 @@ def test_warmup_linear_epoch_step(self): "max_epochs": 5, "lr_updates": [], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 3, "initial_lr": 1, "warmup_initial_lr": 4.0, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, @@ -200,7 +200,7 @@ def test_warmup_linear_epoch_step(self): "greater_metric_to_watch_is_better": True, "ema": False, "phase_callbacks": [collect_lr_callback], - "warmup_mode": "linear_epoch_step", + "warmup_mode": "LinearEpochLRWarmup", } expected_lrs = [4.0, 3.0, 2.0, 1.0, 1.0] @@ -224,9 +224,9 @@ def test_custom_lr_warmup(self): "max_epochs": 5, "lr_updates": [], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 3, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, diff --git a/tests/unit_tests/max_batches_loop_break_test.py b/tests/unit_tests/max_batches_loop_break_test.py index 075b5a590b..bbaa483e09 100644 --- a/tests/unit_tests/max_batches_loop_break_test.py +++ b/tests/unit_tests/max_batches_loop_break_test.py @@ -23,10 +23,10 @@ def test_max_train_batches_loop_break(self): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, @@ -58,10 +58,10 @@ def test_max_valid_batches_loop_break(self): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, diff --git a/tests/unit_tests/optimizer_params_override_test.py b/tests/unit_tests/optimizer_params_override_test.py index a3bcf9789c..f0b250b160 100644 --- a/tests/unit_tests/optimizer_params_override_test.py +++ b/tests/unit_tests/optimizer_params_override_test.py @@ -16,10 +16,10 @@ def test_optimizer_params_partial_override(self): "max_epochs": 1, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"momentum": 0.9}, @@ -45,10 +45,10 @@ def test_optimizer_params_full_override(self): "max_epochs": 1, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "zero_weight_decay_on_bias_and_bn": True, diff --git a/tests/unit_tests/phase_context_test.py b/tests/unit_tests/phase_context_test.py index a9d37f7f6f..5fb20101c4 100644 --- a/tests/unit_tests/phase_context_test.py +++ b/tests/unit_tests/phase_context_test.py @@ -28,10 +28,10 @@ def context_information_in_train_test(self): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, diff --git a/tests/unit_tests/preprocessing_unit_test.py b/tests/unit_tests/preprocessing_unit_test.py index 9416f309ba..4c1d20f805 100644 --- a/tests/unit_tests/preprocessing_unit_test.py +++ b/tests/unit_tests/preprocessing_unit_test.py @@ -97,12 +97,12 @@ def test_setting_preprocessing_params_from_validation_set(self): detection_train_params_yolox = { "max_epochs": 1, - "lr_mode": "cosine", + "lr_mode": "CosineLRScheduler", "cosine_final_lr_ratio": 0.05, "warmup_bias_lr": 0.0, "warmup_momentum": 0.9, "initial_lr": 0.02, - "loss": "yolox_loss", + "loss": "YoloXDetectionLoss", "criterion_params": {"strides": [8, 16, 32], "num_classes": 80}, # output strides of all yolo outputs "train_metrics_list": [], "valid_metrics_list": [DetectionMetrics(post_prediction_callback=YoloXPostPredictionCallback(), normalize_targets=True, num_cls=80)], @@ -168,12 +168,12 @@ def test_setting_preprocessing_params_from_checkpoint(self): detection_train_params_yolox = { "max_epochs": 1, - "lr_mode": "cosine", + "lr_mode": "CosineLRScheduler", "cosine_final_lr_ratio": 0.05, "warmup_bias_lr": 0.0, "warmup_momentum": 0.9, "initial_lr": 0.02, - "loss": "yolox_loss", + "loss": "YoloXDetectionLoss", "criterion_params": {"strides": [8, 16, 32], "num_classes": 80}, # output strides of all yolo outputs "train_metrics_list": [], "valid_metrics_list": [DetectionMetrics(post_prediction_callback=YoloXPostPredictionCallback(), normalize_targets=True, num_cls=80)], diff --git a/tests/unit_tests/resume_training_test.py b/tests/unit_tests/resume_training_test.py index 0c3bf69abb..6c8bc0b465 100644 --- a/tests/unit_tests/resume_training_test.py +++ b/tests/unit_tests/resume_training_test.py @@ -31,10 +31,10 @@ def test_resume_training(self): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, @@ -77,10 +77,10 @@ def test_resume_run_id_training(self): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, @@ -142,10 +142,10 @@ def test_resume_external_training(self): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, @@ -190,10 +190,10 @@ def test_resume_external_training_same_dir(self): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, diff --git a/tests/unit_tests/save_ckpt_test.py b/tests/unit_tests/save_ckpt_test.py index dcb5208744..11ae820467 100644 --- a/tests/unit_tests/save_ckpt_test.py +++ b/tests/unit_tests/save_ckpt_test.py @@ -13,15 +13,15 @@ def setUp(self): "max_epochs": 4, "lr_decay_factor": 0.1, "lr_updates": [4], - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, "save_ckpt_epoch_list": [1, 3], - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "train_metrics_list": [Accuracy(), Top5()], "valid_metrics_list": [Accuracy(), Top5()], "metric_to_watch": "Accuracy", diff --git a/tests/unit_tests/train_after_test_test.py b/tests/unit_tests/train_after_test_test.py index 870fa072bf..d0a7ec085e 100644 --- a/tests/unit_tests/train_after_test_test.py +++ b/tests/unit_tests/train_after_test_test.py @@ -20,7 +20,7 @@ def setUp(self) -> None: "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, "loss": torch.nn.CrossEntropyLoss(), diff --git a/tests/unit_tests/train_logging_test.py b/tests/unit_tests/train_logging_test.py index 759af58988..5fbb16a539 100644 --- a/tests/unit_tests/train_logging_test.py +++ b/tests/unit_tests/train_logging_test.py @@ -19,10 +19,10 @@ def test_train_logging(self): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, diff --git a/tests/unit_tests/train_with_intialized_param_args_test.py b/tests/unit_tests/train_with_intialized_param_args_test.py index d1ed21f175..d1dcefbd22 100644 --- a/tests/unit_tests/train_with_intialized_param_args_test.py +++ b/tests/unit_tests/train_with_intialized_param_args_test.py @@ -28,7 +28,7 @@ def test_train_with_external_criterion(self): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, "loss": torch.nn.CrossEntropyLoss(), @@ -52,10 +52,10 @@ def test_train_with_external_optimizer(self): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": optimizer, "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, @@ -81,7 +81,7 @@ def test_train_with_external_scheduler(self): "phase_callbacks": phase_callbacks, "lr_warmup_epochs": 0, "initial_lr": lr, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": optimizer, "criterion_params": {}, "train_metrics_list": [Accuracy(), Top5()], @@ -103,7 +103,7 @@ def test_train_with_external_scheduler_class(self): "max_epochs": 2, "lr_warmup_epochs": 0, "initial_lr": 0.3, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": optimizer, "criterion_params": {}, "train_metrics_list": [Accuracy(), Top5()], @@ -128,7 +128,7 @@ def test_train_with_reduce_on_plateau(self): "phase_callbacks": phase_callbacks, "lr_warmup_epochs": 0, "initial_lr": lr, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": optimizer, "criterion_params": {}, "train_metrics_list": [Accuracy(), Top5()], @@ -148,10 +148,10 @@ def test_train_with_external_metric(self): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, @@ -178,10 +178,10 @@ def test_train_with_external_dataloaders(self): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, diff --git a/tests/unit_tests/train_with_precise_bn_test.py b/tests/unit_tests/train_with_precise_bn_test.py index 7a2eff2a99..a67d87bb40 100644 --- a/tests/unit_tests/train_with_precise_bn_test.py +++ b/tests/unit_tests/train_with_precise_bn_test.py @@ -18,10 +18,10 @@ def test_train_with_precise_bn_explicit_size(self): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, @@ -47,10 +47,10 @@ def test_train_with_precise_bn_implicit_size(self): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, diff --git a/tests/unit_tests/training_params_factory_test.py b/tests/unit_tests/training_params_factory_test.py index b574cce8a2..5e30984841 100644 --- a/tests/unit_tests/training_params_factory_test.py +++ b/tests/unit_tests/training_params_factory_test.py @@ -5,12 +5,12 @@ class TrainingParamsTest(unittest.TestCase): def test_get_train_params(self): train_params = training_hyperparams.coco2017_yolox_train_params() - self.assertTrue(train_params["loss"] == "yolox_loss") + self.assertTrue(train_params["loss"] == "YoloXDetectionLoss") self.assertTrue(train_params["max_epochs"] == 300) def test_get_train_params_with_overrides(self): train_params = training_hyperparams.coco2017_yolox_train_params(overriding_params={"max_epochs": 5}) - self.assertTrue(train_params["loss"] == "yolox_loss") + self.assertTrue(train_params["loss"] == "YoloXDetectionLoss") self.assertTrue(train_params["max_epochs"] == 5) diff --git a/tests/unit_tests/update_param_groups_unit_test.py b/tests/unit_tests/update_param_groups_unit_test.py index f0c85c71a2..e4edd4ca02 100644 --- a/tests/unit_tests/update_param_groups_unit_test.py +++ b/tests/unit_tests/update_param_groups_unit_test.py @@ -34,11 +34,11 @@ def test_lr_scheduling_with_update_param_groups(self): train_params = { "max_epochs": 3, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_updates": [0, 1, 2], "initial_lr": 0.1, "lr_decay_factor": 1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9}, diff --git a/tests/unit_tests/vit_unit_test.py b/tests/unit_tests/vit_unit_test.py index a943671abb..b9a3527761 100644 --- a/tests/unit_tests/vit_unit_test.py +++ b/tests/unit_tests/vit_unit_test.py @@ -15,10 +15,10 @@ def setUp(self): "max_epochs": 2, "lr_updates": [1], "lr_decay_factor": 0.1, - "lr_mode": "step", + "lr_mode": "StepLRScheduler", "lr_warmup_epochs": 0, "initial_lr": 0.1, - "loss": "cross_entropy", + "loss": "CrossEntropyLoss", "optimizer": "SGD", "criterion_params": {}, "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},