diff --git a/.circleci/config.yml b/.circleci/config.yml
index de0afea275..1d27ab4fd2 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -547,7 +547,7 @@ jobs:
             python3.8 src/super_gradients/train_from_recipe.py --config-name=coco2017_pose_dekr_w32_no_dc experiment_name=shortened_coco2017_pose_dekr_w32_ap_test batch_size=4 val_batch_size=8 epochs=1 training_hyperparams.lr_warmup_steps=0 training_hyperparams.average_best_models=False training_hyperparams.max_train_batches=1000 training_hyperparams.max_valid_batches=100 multi_gpu=DDP num_gpus=4
             python3.8 src/super_gradients/train_from_recipe.py --config-name=cifar10_resnet experiment_name=shortened_cifar10_resnet_accuracy_test epochs=100 training_hyperparams.average_best_models=False multi_gpu=DDP num_gpus=4
             python3.8 src/super_gradients/examples/convert_recipe_example/convert_recipe_example.py --config-name=cifar10_conversion_params experiment_name=shortened_cifar10_resnet_accuracy_test
-            python3.8 src/super_gradients/train_from_recipe.py --config-name=coco2017_yolox experiment_name=shortened_coco2017_yolox_n_map_test architecture=yolox_n training_hyperparams.loss=yolox_fast_loss epochs=10 training_hyperparams.average_best_models=False multi_gpu=DDP num_gpus=4
+            python3.8 src/super_gradients/train_from_recipe.py --config-name=coco2017_yolox experiment_name=shortened_coco2017_yolox_n_map_test architecture=yolox_n training_hyperparams.loss=YoloXFastDetectionLoss epochs=10 training_hyperparams.average_best_models=False multi_gpu=DDP num_gpus=4
             python3.8 src/super_gradients/train_from_recipe.py --config-name=cityscapes_regseg48 experiment_name=shortened_cityscapes_regseg48_iou_test epochs=10 training_hyperparams.average_best_models=False multi_gpu=DDP num_gpus=4
             coverage run --source=super_gradients -m unittest tests/deci_core_recipe_test_suite_runner.py
 
diff --git a/Makefile b/Makefile
index 255938e51d..6cf22249de 100644
--- a/Makefile
+++ b/Makefile
@@ -10,7 +10,7 @@ yolo_nas_integration_tests:
 recipe_accuracy_tests:
 	python src/super_gradients/train_from_recipe.py --config-name=coco2017_pose_dekr_w32_no_dc experiment_name=shortened_coco2017_pose_dekr_w32_ap_test epochs=1 batch_size=4 val_batch_size=8 training_hyperparams.lr_warmup_steps=0 training_hyperparams.average_best_models=False training_hyperparams.max_train_batches=1000 training_hyperparams.max_valid_batches=100 multi_gpu=DDP num_gpus=4
 	python src/super_gradients/train_from_recipe.py --config-name=cifar10_resnet               experiment_name=shortened_cifar10_resnet_accuracy_test   epochs=100 training_hyperparams.average_best_models=False multi_gpu=DDP num_gpus=4
-	python src/super_gradients/train_from_recipe.py --config-name=coco2017_yolox               experiment_name=shortened_coco2017_yolox_n_map_test      epochs=10  architecture=yolox_n training_hyperparams.loss=yolox_fast_loss training_hyperparams.average_best_models=False multi_gpu=DDP num_gpus=4
+	python src/super_gradients/train_from_recipe.py --config-name=coco2017_yolox               experiment_name=shortened_coco2017_yolox_n_map_test      epochs=10  architecture=yolox_n training_hyperparams.loss=YoloXFastDetectionLoss training_hyperparams.average_best_models=False multi_gpu=DDP num_gpus=4
 	python src/super_gradients/train_from_recipe.py --config-name=cityscapes_regseg48          experiment_name=shortened_cityscapes_regseg48_iou_test   epochs=10 training_hyperparams.average_best_models=False multi_gpu=DDP num_gpus=4
 	python src/super_gradients/examples/convert_recipe_example/convert_recipe_example.py --config-name=cifar10_conversion_params experiment_name=shortened_cifar10_resnet_accuracy_test
 	coverage run --source=super_gradients -m unittest tests/deci_core_recipe_test_suite_runner.py
diff --git a/documentation/source/Checkpoints.md b/documentation/source/Checkpoints.md
index 295ab687ca..48847ca871 100644
--- a/documentation/source/Checkpoints.md
+++ b/documentation/source/Checkpoints.md
@@ -79,7 +79,7 @@ model = models.get(model_name=Models.RESNET18, num_classes=10)
 
 train_params = {
     ...
-    "loss": "cross_entropy",
+    "loss": "LabelSmoothingCrossEntropyLoss",
     "criterion_params": {},
     "save_ckpt_epoch_list": [10,15]
     ...
diff --git a/documentation/source/Example_Classification.md b/documentation/source/Example_Classification.md
index b332177134..f6d9d6606b 100644
--- a/documentation/source/Example_Classification.md
+++ b/documentation/source/Example_Classification.md
@@ -318,10 +318,10 @@ Output (Training parameters):
     'launch_tensorboard': False,
     'load_opt_params': True,
     'log_installed_packages': True,
-    'loss': 'cross_entropy',
+    'loss': "LabelSmoothingCrossEntropyLoss",
     'lr_cooldown_epochs': 0,
     'lr_decay_factor': 0.1,
-    'lr_mode': 'step',
+    'lr_mode': 'StepLRScheduler',
     'lr_schedule_function': None,
     'lr_updates': array([100, 150, 200]),
     'lr_warmup_epochs': 0,
@@ -355,7 +355,7 @@ Output (Training parameters):
     'train_metrics_list': ['Accuracy', 'Top5'],
     'valid_metrics_list': ['Accuracy', 'Top5'],
     'warmup_initial_lr': None,
-    'warmup_mode': 'linear_epoch_step',
+    'warmup_mode': 'LinearEpochLRWarmup',
     'zero_weight_decay_on_bias_and_bn': False
 }
 ```
diff --git a/documentation/source/Example_Training-an-external-model.md b/documentation/source/Example_Training-an-external-model.md
index 4978ef0d88..9b5379dba7 100644
--- a/documentation/source/Example_Training-an-external-model.md
+++ b/documentation/source/Example_Training-an-external-model.md
@@ -640,7 +640,7 @@ And lastly, we need to define the training hyperparameters:
 ```python
 train_params = {
     "max_epochs": 100,
-    "lr_mode": "cosine",
+    "lr_mode": "CosineLRScheduler",
     "initial_lr": 0.001,
     "optimizer": "Adam",
     "loss": CustomSegLoss(),
diff --git a/documentation/source/LRScheduling.md b/documentation/source/LRScheduling.md
index 4912ffccb0..04cfa238bf 100644
--- a/documentation/source/LRScheduling.md
+++ b/documentation/source/LRScheduling.md
@@ -7,15 +7,15 @@ Learning rate scheduling type is controlled by the training parameter `lr_mode`.
 
         When str:
 
-        Learning rate scheduling policy, one of ['step','poly','cosine','function'].
+        Learning rate scheduling policy, one of ['StepLRScheduler','PolyLRScheduler','CosineLRScheduler','FunctionLRScheduler'].
 
-        'step' refers to constant updates at epoch numbers passed through `lr_updates`. Each update decays the learning rate by `lr_decay_factor`.
+        'StepLRScheduler' refers to constant updates at epoch numbers passed through `lr_updates`. Each update decays the learning rate by `lr_decay_factor`.
 
-        'cosine' refers to the Cosine Anealing policy as mentioned in https://arxiv.org/abs/1608.03983. The final learning rate ratio is controlled by `cosine_final_lr_ratio` training parameter.
+        'CosineLRScheduler' refers to the Cosine Anealing policy as mentioned in https://arxiv.org/abs/1608.03983. The final learning rate ratio is controlled by `cosine_final_lr_ratio` training parameter.
 
-        'poly' refers to the polynomial decrease: in each epoch iteration `self.lr = self.initial_lr * pow((1.0 - (current_iter / max_iter)), 0.9)`
+        'PolyLRScheduler' refers to the polynomial decrease: in each epoch iteration `self.lr = self.initial_lr * pow((1.0 - (current_iter / max_iter)), 0.9)`
 
-        'function' refers to a user-defined learning rate scheduling function, that is passed through `lr_schedule_function`.
+        'FunctionLRScheduler' refers to a user-defined learning rate scheduling function, that is passed through `lr_schedule_function`.
 
 For example, the training code below will start with an initial learning rate of 0.1 and decay by 0.1 at epochs 100,150 and 200:
 
@@ -30,7 +30,7 @@ valid_dataloader = ...
 model = ...
 train_params = {
     "initial_lr": 0.1,
-    "lr_mode":"step",
+    "lr_mode":"StepLRScheduler",
     "lr_updates": [100, 150, 200],
     "lr_decay_factor": 0.1,
     ...,
@@ -45,7 +45,7 @@ trainer.train(model=model, training_params=train_params, train_loader=train_data
 ```yaml
 training_hyperparams:
     initial_lr: 0.1
-    lr_mode: step
+    lr_mode: StepLRScheduler
     user_lr_updates:
       - 100
       - 150
@@ -66,7 +66,7 @@ Prerequisites: [phase callbacks](PhaseCallbacks.md), [training with configuratio
 In SG, learning rate schedulers are implemented as [phase callbacks](PhaseCallbacks.md).
 They read the learning rate from the `PhaseContext` in their `__call__` method, calculate the new learning rate according to the current state of training, and update the optimizer's param groups.
 
-For example, the code snippet from the previous section translates "lr_mode":"step" to a `super_gradients.training.utils.callbacks.callbacks.StepLRCallback` instance, which is added to the phase callbacks list.
+For example, the code snippet from the previous section translates "lr_mode":"StepLRScheduler" to a `super_gradients.training.utils.callbacks.callbacks.StepLRScheduler` instance, which is added to the phase callbacks list.
 
 ### Implementing Your Own Scheduler
 A custom learning rate scheduler should inherit from `LRCallbackBase`, so let's take a look at it:
diff --git a/documentation/source/Losses.md b/documentation/source/Losses.md
index 3def77c7e5..e7fd837566 100644
--- a/documentation/source/Losses.md
+++ b/documentation/source/Losses.md
@@ -2,18 +2,18 @@
 
 SuperGradients can support any PyTorch-based loss function. Additionally, multiple Loss function implementations for various tasks are also supported:
 
-    cross_entropy
-    mse
-    r_squared_loss
-    shelfnet_ohem_loss
-    shelfnet_se_loss
-    yolox_loss
-    yolox_fast_loss
-    ssd_loss
-    stdc_loss
-    bce_dice_loss
-    kd_loss
-    dice_ce_edge_loss
+    LabelSmoothingCrossEntropyLoss
+    MSE
+    RSquaredLoss
+    ShelfNetOHEMLoss
+    ShelfNetSemanticEncodingLoss
+    YoloXDetectionLoss
+    YoloXFastDetectionLoss
+    SSDLoss
+    STDCLoss
+    BCEDiceLoss
+    KDLogitsLoss
+    DiceCEEdgeLoss
 
 All the above, are just string aliases for the underlying torch.nn.Module classes, implementing the specified loss functions.
 
@@ -31,7 +31,7 @@ model = ...
 
 train_params = {
    ...
-   "loss": "cross_entropy",
+   "loss": "LabelSmoothingCrossEntropyLoss",
    "criterion_params": {}
    ...
 }
@@ -42,7 +42,7 @@ Since most IDEs support auto-completion, for your convenience, you can use our o
 ```python
 from super_gradients.common.object_names import Losses
 ```
-Then simply instead of "cross_entropy", use 
+Then simply instead of "LabelSmoothingCrossEntropyLoss", use 
 ```python
 Losses.CROSS_ENTROPY
 ```
@@ -54,14 +54,14 @@ When doing so, in your `my_training_hyperparams.yaml` file:
 ```yaml
 ...
 
-loss: yolox_loss
+loss: YoloXDetectionLoss
 
 criterion_params:
    strides: [8, 16, 32]  # output strides of all yolo outputs
    num_classes: 80
 ```
 
-Note that two `training_params` parameters define the loss function:  `loss` which defines the type of the loss, and`criterion_params` dictionary which will be unpacked to the underlying `yolox_loss` class constructor.
+Note that two `training_params` parameters define the loss function:  `loss` which defines the type of the loss, and`criterion_params` dictionary which will be unpacked to the underlying `YoloXDetectionLoss` class constructor.
 
 ## Passing Instantiated nn.Module Objects as Loss Functions
 
diff --git a/documentation/source/PhaseCallbacks.md b/documentation/source/PhaseCallbacks.md
index cd8512ef2c..cc7480aa06 100644
--- a/documentation/source/PhaseCallbacks.md
+++ b/documentation/source/PhaseCallbacks.md
@@ -8,13 +8,13 @@ SG's `super_gradients.training.utils.callbacks` module implements some common us
 
     ModelConversionCheckCallback
     LRCallbackBase
-    EpochStepWarmupLRCallback
-    BatchStepLinearWarmupLRCallback
-    StepLRCallback
-    ExponentialLRCallback
-    PolyLRCallback
-    CosineLRCallback
-    FunctionLRCallback
+    LinearEpochLRWarmup
+    LinearBatchLRWarmup
+    StepLRScheduler
+    ExponentialLRScheduler
+    PolyLRScheduler
+    CosineLRScheduler
+    FunctionLRScheduler
     LRSchedulerCallback
     DetectionVisualizationCallback
     BinarySegmentationVisualizationCallback
@@ -30,7 +30,7 @@ off augmentations and incorporate L1 loss starting from epoch 285:
 max_epochs: 300
 ...
 
-loss: yolox_loss
+loss: YoloXDetectionLoss
 
 ...
 
@@ -237,7 +237,7 @@ valid_dataloader = ...
 model = ...
 
 train_params = {
-    "loss": "cross_entropy",
+    "loss": "LabelSmoothingCrossEntropyLoss",
     "criterion_params": {},
     "phase_callbacks": [SaveFirstBatchCallback()],
     ...
diff --git a/documentation/source/QuickstartBasicToolkit.md b/documentation/source/QuickstartBasicToolkit.md
index a13ec89cee..4968d72000 100644
--- a/documentation/source/QuickstartBasicToolkit.md
+++ b/documentation/source/QuickstartBasicToolkit.md
@@ -61,7 +61,7 @@ model = models.get(Models.RESNET18, num_classes=10)
 training_params = {
     "max_epochs": 20,
     "initial_lr": 0.1,
-    "loss": "cross_entropy",
+    "loss": "LabelSmoothingCrossEntropyLoss",
     "train_metrics_list": [Accuracy(), Top5()],
     "valid_metrics_list": [Accuracy(), Top5()],
     "metric_to_watch": "Accuracy",
diff --git a/documentation/source/Segmentation.md b/documentation/source/Segmentation.md
index c627a97d98..0cee203c35 100644
--- a/documentation/source/Segmentation.md
+++ b/documentation/source/Segmentation.md
@@ -143,12 +143,12 @@ from super_gradients.training.metrics.segmentation_metrics import BinaryIOU
 
 train_params = {
     "max_epochs": 30,
-    "lr_mode": "cosine",
+    "lr_mode": "CosineLRScheduler",
     "initial_lr": 0.005,
     "lr_warmup_epochs": 5,
     "multiply_head_lr": 10,
     "optimizer": "SGD",
-    "loss": "bce_dice_loss",
+    "loss": "BCEDiceLoss",
     "ema": True,
     "zero_weight_decay_on_bias_and_bn": True,
     "average_best_models": True,
diff --git a/documentation/source/configuration_files.md b/documentation/source/configuration_files.md
index bf7a4b43ab..bb253bd8c3 100644
--- a/documentation/source/configuration_files.md
+++ b/documentation/source/configuration_files.md
@@ -28,7 +28,7 @@ lr_decay_factor: 0.1
 lr_mode: step
 lr_warmup_epochs: 0
 initial_lr: 0.1
-loss: cross_entropy
+loss: LabelSmoothingCrossEntropyLoss
 optimizer: SGD
 criterion_params: {}
 
diff --git a/src/super_gradients/common/object_names.py b/src/super_gradients/common/object_names.py
index 26bd48f890..f21b44d999 100644
--- a/src/super_gradients/common/object_names.py
+++ b/src/super_gradients/common/object_names.py
@@ -1,21 +1,21 @@
 class Losses:
     """Static class holding all the supported loss names"""
 
-    CROSS_ENTROPY = "cross_entropy"
-    MSE = "mse"
-    R_SQUARED_LOSS = "r_squared_loss"
-    SHELFNET_OHEM_LOSS = "shelfnet_ohem_loss"
-    SHELFNET_SE_LOSS = "shelfnet_se_loss"
-    YOLOX_LOSS = "yolox_loss"
-    PPYOLOE_LOSS = "ppyoloe_loss"
-    YOLOX_FAST_LOSS = "yolox_fast_loss"
-    SSD_LOSS = "ssd_loss"
-    STDC_LOSS = "stdc_loss"
-    BCE_DICE_LOSS = "bce_dice_loss"
-    KD_LOSS = "kd_loss"
-    DICE_CE_EDGE_LOSS = "dice_ce_edge_loss"
-    DEKR_LOSS = "dekr_loss"
-    RESCORING_LOSS = "rescoring_loss"
+    CROSS_ENTROPY = "CrossEntropyLoss"
+    MSE = "MSE"
+    R_SQUARED_LOSS = "RSquaredLoss"
+    SHELFNET_OHEM_LOSS = "ShelfNetOHEMLoss"
+    SHELFNET_SE_LOSS = "ShelfNetSemanticEncodingLoss"
+    YOLOX_LOSS = "YoloXDetectionLoss"
+    PPYOLOE_LOSS = "PPYoloELoss"
+    YOLOX_FAST_LOSS = "YoloXFastDetectionLoss"
+    SSD_LOSS = "SSDLoss"
+    STDC_LOSS = "STDCLoss"
+    BCE_DICE_LOSS = "BCEDiceLoss"
+    KD_LOSS = "KDLogitsLoss"
+    DICE_CE_EDGE_LOSS = "DiceCEEdgeLoss"
+    DEKR_LOSS = "DEKRLoss"
+    RESCORING_LOSS = "RescoringLoss"
 
 
 class Metrics:
@@ -154,19 +154,19 @@ class Callbacks:
 class LRSchedulers:
     """Static class to hold all the supported LR Scheduler names"""
 
-    STEP = "step"
-    POLY = "poly"
-    COSINE = "cosine"
-    EXP = "exp"
-    FUNCTION = "function"
+    STEP = "StepLRScheduler"
+    POLY = "PolyLRScheduler"
+    COSINE = "CosineLRScheduler"
+    EXP = "ExponentialLRScheduler"
+    FUNCTION = "FunctionLRScheduler"
 
 
 class LRWarmups:
     """Static class to hold all the supported LR Warmup names"""
 
-    LINEAR_STEP = "linear_step"
-    LINEAR_EPOCH_STEP = "linear_epoch_step"
-    LINEAR_BATCH_STEP = "linear_batch_step"
+    LINEAR_STEP = "LinearEpochLRWarmup"
+    LINEAR_EPOCH_STEP = "LinearEpochLRWarmup"
+    LINEAR_BATCH_STEP = "LinearBatchLRWarmup"
 
 
 class Samplers:
diff --git a/src/super_gradients/common/registry/registry.py b/src/super_gradients/common/registry/registry.py
index f00f0a3193..e303f3766f 100644
--- a/src/super_gradients/common/registry/registry.py
+++ b/src/super_gradients/common/registry/registry.py
@@ -68,7 +68,8 @@ def warn_if_deprecated(name: str, registry: dict):
     """
     deprecated_names = registry.get(_DEPRECATED_KEY, {})
     if name in deprecated_names:
-        warnings.warn(f"Using `{name}` in the recipe has been deprecated. Please use `{deprecated_names[name]}`", DeprecationWarning)
+        warnings.simplefilter("once", DeprecationWarning)  # Required, otherwise the warning may never be displayed.
+        warnings.warn(f"Object name `{name}` is now deprecated. Please replace it with `{deprecated_names[name]}`.", DeprecationWarning)
 
 
 ARCHITECTURES = {}
@@ -83,9 +84,9 @@ def warn_if_deprecated(name: str, registry: dict):
 METRICS = {}
 register_metric = create_register_decorator(registry=METRICS)
 
-LOSSES = {Losses.MSE: nn.MSELoss}
+LOSSES = {}
 register_loss = create_register_decorator(registry=LOSSES)
-
+register_loss(name=Losses.MSE, deprecated_name="mse")(nn.MSELoss)  # Register manually to benefit from deprecated logic
 
 ALL_DATALOADERS = {}
 register_dataloader = create_register_decorator(registry=ALL_DATALOADERS)
diff --git a/src/super_gradients/examples/ddrnet_imagenet/ddrnet_classification_example.py b/src/super_gradients/examples/ddrnet_imagenet/ddrnet_classification_example.py
index eba1463bdf..c760571169 100644
--- a/src/super_gradients/examples/ddrnet_imagenet/ddrnet_classification_example.py
+++ b/src/super_gradients/examples/ddrnet_imagenet/ddrnet_classification_example.py
@@ -39,13 +39,13 @@
 
 train_params_ddr = {
     "max_epochs": args.max_epochs,
-    "lr_mode": "step",
+    "lr_mode": "StepLRScheduler",
     "lr_updates": [30, 60, 90],
     "lr_decay_factor": 0.1,
     "initial_lr": 0.1 * devices,
     "optimizer": "SGD",
     "optimizer_params": {"weight_decay": 0.0001, "momentum": 0.9, "nesterov": True},
-    "loss": "cross_entropy",
+    "loss": "CrossEntropyLoss",
     "train_metrics_list": [Accuracy(), Top5()],
     "valid_metrics_list": [Accuracy(), Top5()],
     "metric_to_watch": "Accuracy",
diff --git a/src/super_gradients/examples/deci_lab_export_example/deci_lab_export_example.py b/src/super_gradients/examples/deci_lab_export_example/deci_lab_export_example.py
index 8080cf2ae8..9f49c0130f 100644
--- a/src/super_gradients/examples/deci_lab_export_example/deci_lab_export_example.py
+++ b/src/super_gradients/examples/deci_lab_export_example/deci_lab_export_example.py
@@ -56,10 +56,10 @@ def main(architecture_name: str):
         "max_epochs": 2,
         "lr_updates": [1],
         "lr_decay_factor": 0.1,
-        "lr_mode": "step",
+        "lr_mode": "StepLRScheduler",
         "lr_warmup_epochs": 0,
         "initial_lr": 0.1,
-        "loss": "cross_entropy",
+        "loss": "CrossEntropyLoss",
         "optimizer": "SGD",
         "criterion_params": {},
         "train_metrics_list": [Accuracy(), Top5()],
diff --git a/src/super_gradients/examples/early_stop/early_stop_example.py b/src/super_gradients/examples/early_stop/early_stop_example.py
index e2cbb782cf..5cf1124b06 100644
--- a/src/super_gradients/examples/early_stop/early_stop_example.py
+++ b/src/super_gradients/examples/early_stop/early_stop_example.py
@@ -12,16 +12,16 @@
 super_gradients.init_trainer()
 
 early_stop_acc = EarlyStop(Phase.VALIDATION_EPOCH_END, monitor="Accuracy", mode="max", patience=3, verbose=True)
-early_stop_val_loss = EarlyStop(Phase.VALIDATION_EPOCH_END, monitor="LabelSmoothingCrossEntropyLoss", mode="min", patience=3, verbose=True)
+early_stop_val_loss = EarlyStop(Phase.VALIDATION_EPOCH_END, monitor="CrossEntropyLoss", mode="min", patience=3, verbose=True)
 
 train_params = {
     "max_epochs": 250,
     "lr_updates": [100, 150, 200],
     "lr_decay_factor": 0.1,
-    "lr_mode": "step",
+    "lr_mode": "StepLRScheduler",
     "lr_warmup_epochs": 0,
     "initial_lr": 0.1,
-    "loss": "cross_entropy",
+    "loss": "CrossEntropyLoss",
     "optimizer": "SGD",
     "criterion_params": {},
     "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
diff --git a/src/super_gradients/examples/loggers_examples/clearml_logger_example.py b/src/super_gradients/examples/loggers_examples/clearml_logger_example.py
index 700bb6f584..7a96dea5d0 100644
--- a/src/super_gradients/examples/loggers_examples/clearml_logger_example.py
+++ b/src/super_gradients/examples/loggers_examples/clearml_logger_example.py
@@ -11,9 +11,9 @@
     "max_epochs": 20,
     "lr_updates": [5, 10, 15],
     "lr_decay_factor": 0.1,
-    "lr_mode": "step",
+    "lr_mode": "StepLRScheduler",
     "initial_lr": 0.1,
-    "loss": "cross_entropy",
+    "loss": "CrossEntropyLoss",
     "optimizer": "SGD",
     "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
     "train_metrics_list": [Accuracy(), Top5()],
diff --git a/src/super_gradients/examples/loggers_examples/deci_platform_logger_example.py b/src/super_gradients/examples/loggers_examples/deci_platform_logger_example.py
index 988373e9ee..9a69d3ef86 100644
--- a/src/super_gradients/examples/loggers_examples/deci_platform_logger_example.py
+++ b/src/super_gradients/examples/loggers_examples/deci_platform_logger_example.py
@@ -14,9 +14,9 @@
     "max_epochs": 20,
     "lr_updates": [5, 10, 15],
     "lr_decay_factor": 0.1,
-    "lr_mode": "step",
+    "lr_mode": "StepLRScheduler",
     "initial_lr": 0.1,
-    "loss": "cross_entropy",
+    "loss": "CrossEntropyLoss",
     "optimizer": "SGD",
     "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
     "train_metrics_list": [Accuracy(), Top5()],
diff --git a/src/super_gradients/examples/quantization/resnet_qat_example.py b/src/super_gradients/examples/quantization/resnet_qat_example.py
index d9512c74c5..c442b7303e 100644
--- a/src/super_gradients/examples/quantization/resnet_qat_example.py
+++ b/src/super_gradients/examples/quantization/resnet_qat_example.py
@@ -89,7 +89,7 @@ def sg_selective_qdq_resnet50():
         "initial_lr": args.lr,
         "optimizer": "SGD",
         "optimizer_params": {"weight_decay": 0.0001, "momentum": 0.9, "nesterov": True},
-        "loss": "cross_entropy",
+        "loss": "CrossEntropyLoss",
         "train_metrics_list": [Accuracy(), Top5()],
         "valid_metrics_list": [Accuracy(), Top5()],
         "test_metrics_list": [Accuracy(), Top5()],
diff --git a/src/super_gradients/examples/regseg_transfer_learning_example/regseg_transfer_learning_example.py b/src/super_gradients/examples/regseg_transfer_learning_example/regseg_transfer_learning_example.py
index e8bd1b167e..4e363f460e 100644
--- a/src/super_gradients/examples/regseg_transfer_learning_example/regseg_transfer_learning_example.py
+++ b/src/super_gradients/examples/regseg_transfer_learning_example/regseg_transfer_learning_example.py
@@ -39,13 +39,13 @@
 # DEFINE TRAINING PARAMS. SEE DOCS FOR THE FULL LIST.
 train_params = {
     "max_epochs": 50,
-    "lr_mode": "cosine",
+    "lr_mode": "CosineLRScheduler",
     "initial_lr": 0.0064,  # for batch_size=16
     "optimizer_params": {"momentum": 0.843, "weight_decay": 0.00036, "nesterov": True},
     "cosine_final_lr_ratio": 0.1,
     "multiply_head_lr": 10,
     "optimizer": "SGD",
-    "loss": "bce_dice_loss",
+    "loss": "BCEDiceLoss",
     "ema": True,
     "zero_weight_decay_on_bias_and_bn": True,
     "average_best_models": True,
diff --git a/src/super_gradients/examples/train_with_test_set/train_with_test_example.py b/src/super_gradients/examples/train_with_test_set/train_with_test_example.py
index 6d8eb4c77b..8729101b8c 100644
--- a/src/super_gradients/examples/train_with_test_set/train_with_test_example.py
+++ b/src/super_gradients/examples/train_with_test_set/train_with_test_example.py
@@ -12,7 +12,7 @@
     "lr_decay_factor": 0.1,
     "lr_mode": "step",
     "initial_lr": 0.1,
-    "loss": "cross_entropy",
+    "loss": "CrossEntropyLoss",
     "optimizer": "SGD",
     "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
     "train_metrics_list": [Accuracy(), Top5()],
diff --git a/src/super_gradients/recipes/cityscapes_al_ddrnet.yaml b/src/super_gradients/recipes/cityscapes_al_ddrnet.yaml
index d5b69683a1..48dd55122d 100644
--- a/src/super_gradients/recipes/cityscapes_al_ddrnet.yaml
+++ b/src/super_gradients/recipes/cityscapes_al_ddrnet.yaml
@@ -61,7 +61,7 @@ training_hyperparams:
   max_epochs: 200
   initial_lr: 0.0075   # batch size 24
   loss:
-    dice_ce_edge_loss:
+    DiceCEEdgeLoss:
       num_classes: 19
       ignore_index: 19
       num_aux_heads: 1
diff --git a/src/super_gradients/recipes/cityscapes_ddrnet.yaml b/src/super_gradients/recipes/cityscapes_ddrnet.yaml
index 9c590c4727..d6763593c8 100644
--- a/src/super_gradients/recipes/cityscapes_ddrnet.yaml
+++ b/src/super_gradients/recipes/cityscapes_ddrnet.yaml
@@ -57,7 +57,7 @@ training_hyperparams:
   max_epochs: 500
   initial_lr: 0.0075   # batch size 24
   loss:
-    dice_ce_edge_loss:
+    DiceCEEdgeLoss:
       num_classes: 19
       ignore_index: 19
       num_aux_heads: 1
diff --git a/src/super_gradients/recipes/cityscapes_kd_base.yaml b/src/super_gradients/recipes/cityscapes_kd_base.yaml
index d6a99d88a9..40d0d6e2b4 100644
--- a/src/super_gradients/recipes/cityscapes_kd_base.yaml
+++ b/src/super_gradients/recipes/cityscapes_kd_base.yaml
@@ -55,7 +55,7 @@ training_hyperparams:
     weights: [ 1. ]
     kd_loss_weights: [1., 6.]
 
-    kd_loss:
+    KDLogitsLoss:
       _target_: super_gradients.training.losses.cwd_loss.ChannelWiseKnowledgeDistillationLoss
       temperature: 3.
       normalization_mode: channel_wise
diff --git a/src/super_gradients/recipes/cityscapes_pplite_seg50.yaml b/src/super_gradients/recipes/cityscapes_pplite_seg50.yaml
index 0843464532..ad1b90d03a 100644
--- a/src/super_gradients/recipes/cityscapes_pplite_seg50.yaml
+++ b/src/super_gradients/recipes/cityscapes_pplite_seg50.yaml
@@ -68,7 +68,7 @@ checkpoint_params:
 training_hyperparams:
   sync_bn: True
   loss:
-    dice_ce_edge_loss:
+    DiceCEEdgeLoss:
       num_classes: 19
       ignore_index: 19
       num_aux_heads: 3
diff --git a/src/super_gradients/recipes/cityscapes_pplite_seg75.yaml b/src/super_gradients/recipes/cityscapes_pplite_seg75.yaml
index b20030cff8..cbc19e4660 100644
--- a/src/super_gradients/recipes/cityscapes_pplite_seg75.yaml
+++ b/src/super_gradients/recipes/cityscapes_pplite_seg75.yaml
@@ -63,7 +63,7 @@ checkpoint_params:
 training_hyperparams:
   sync_bn: True
   loss:
-    dice_ce_edge_loss:
+    DiceCEEdgeLoss:
       num_classes: 19
       ignore_index: 19
       num_aux_heads: 3
diff --git a/src/super_gradients/recipes/cityscapes_regseg48.yaml b/src/super_gradients/recipes/cityscapes_regseg48.yaml
index 4e426100af..27c4cbd5ac 100644
--- a/src/super_gradients/recipes/cityscapes_regseg48.yaml
+++ b/src/super_gradients/recipes/cityscapes_regseg48.yaml
@@ -52,7 +52,7 @@ training_hyperparams:
   sync_bn: True
   resume: ${resume}
   max_epochs: 800
-  lr_mode: poly
+  lr_mode: PolyLRScheduler
   initial_lr: 0.02   # for effective batch_size=16
   lr_warmup_epochs: 0
   optimizer: SGD
@@ -62,7 +62,7 @@ training_hyperparams:
 
   ema: True
 
-  loss: cross_entropy
+  loss: LabelSmoothingCrossEntropyLoss
   criterion_params:
     ignore_index: ${cityscapes_ignored_label}
 
diff --git a/src/super_gradients/recipes/cityscapes_segformer.yaml b/src/super_gradients/recipes/cityscapes_segformer.yaml
index 60c57ba273..fb4d8bb227 100644
--- a/src/super_gradients/recipes/cityscapes_segformer.yaml
+++ b/src/super_gradients/recipes/cityscapes_segformer.yaml
@@ -95,7 +95,7 @@ training_hyperparams:
 
   sync_bn: True
 
-  loss: cross_entropy
+  loss: LabelSmoothingCrossEntropyLoss
   criterion_params:
     ignore_index: ${cityscapes_ignored_label}
 
diff --git a/src/super_gradients/recipes/cityscapes_segformer_b0.yaml b/src/super_gradients/recipes/cityscapes_segformer_b0.yaml
index 2415776c9e..a4e54798f5 100644
--- a/src/super_gradients/recipes/cityscapes_segformer_b0.yaml
+++ b/src/super_gradients/recipes/cityscapes_segformer_b0.yaml
@@ -18,7 +18,7 @@ experiment_name: ${architecture}_cityscapes
 training_hyperparams:
   max_epochs: 2
 
-  lr_mode: poly
+  lr_mode: PolyLRScheduler
   initial_lr: 0.00006  # for effective batch_size=8
 
 multi_gpu: DDP
diff --git a/src/super_gradients/recipes/cityscapes_segformer_b1.yaml b/src/super_gradients/recipes/cityscapes_segformer_b1.yaml
index 0bb8b2fef0..7bd9f4a26f 100644
--- a/src/super_gradients/recipes/cityscapes_segformer_b1.yaml
+++ b/src/super_gradients/recipes/cityscapes_segformer_b1.yaml
@@ -18,7 +18,7 @@ experiment_name: ${architecture}_cityscapes
 training_hyperparams:
   max_epochs: 2
 
-  lr_mode: poly
+  lr_mode: PolyLRScheduler
   initial_lr: 0.00006  # for effective batch_size=8
 
 multi_gpu: DDP
diff --git a/src/super_gradients/recipes/cityscapes_segformer_b2.yaml b/src/super_gradients/recipes/cityscapes_segformer_b2.yaml
index 0a0a05ea90..c793c3e1f2 100644
--- a/src/super_gradients/recipes/cityscapes_segformer_b2.yaml
+++ b/src/super_gradients/recipes/cityscapes_segformer_b2.yaml
@@ -18,7 +18,7 @@ experiment_name: ${architecture}_cityscapes
 training_hyperparams:
   max_epochs: 2
 
-  lr_mode: poly
+  lr_mode: PolyLRScheduler
   initial_lr: 0.00006  # for effective batch_size=8
 
 multi_gpu: DDP
diff --git a/src/super_gradients/recipes/cityscapes_segformer_b3.yaml b/src/super_gradients/recipes/cityscapes_segformer_b3.yaml
index dfde685aa3..31245514f1 100644
--- a/src/super_gradients/recipes/cityscapes_segformer_b3.yaml
+++ b/src/super_gradients/recipes/cityscapes_segformer_b3.yaml
@@ -18,7 +18,7 @@ experiment_name: ${architecture}_cityscapes
 training_hyperparams:
   max_epochs: 2
 
-  lr_mode: poly
+  lr_mode: PolyLRScheduler
   initial_lr: 0.00006  # for effective batch_size=8
 
 multi_gpu: DDP
diff --git a/src/super_gradients/recipes/cityscapes_segformer_b4.yaml b/src/super_gradients/recipes/cityscapes_segformer_b4.yaml
index 1c40dcef90..dc82e01c99 100644
--- a/src/super_gradients/recipes/cityscapes_segformer_b4.yaml
+++ b/src/super_gradients/recipes/cityscapes_segformer_b4.yaml
@@ -18,7 +18,7 @@ experiment_name: ${architecture}_cityscapes
 training_hyperparams:
   max_epochs: 2
 
-  lr_mode: poly
+  lr_mode: PolyLRScheduler
   initial_lr: 0.00006  # for effective batch_size=8
 
   mixed_precision: True
diff --git a/src/super_gradients/recipes/cityscapes_segformer_b5.yaml b/src/super_gradients/recipes/cityscapes_segformer_b5.yaml
index eba8aaab15..e812db4fa3 100644
--- a/src/super_gradients/recipes/cityscapes_segformer_b5.yaml
+++ b/src/super_gradients/recipes/cityscapes_segformer_b5.yaml
@@ -18,7 +18,7 @@ experiment_name: ${architecture}_cityscapes
 training_hyperparams:
   max_epochs: 2
 
-  lr_mode: poly
+  lr_mode: PolyLRScheduler
   initial_lr: 0.00006  # for effective batch_size=8
 
   mixed_precision: True
diff --git a/src/super_gradients/recipes/cityscapes_stdc_seg50.yaml b/src/super_gradients/recipes/cityscapes_stdc_seg50.yaml
index 4c0edec4f7..05f565256d 100644
--- a/src/super_gradients/recipes/cityscapes_stdc_seg50.yaml
+++ b/src/super_gradients/recipes/cityscapes_stdc_seg50.yaml
@@ -62,7 +62,7 @@ checkpoint_params:
 training_hyperparams:
   sync_bn: True
   loss:
-    dice_ce_edge_loss:
+    DiceCEEdgeLoss:
       num_classes: 19
       ignore_index: 19
       weights: [ 1., 0.6, 0.4, 1. ]
diff --git a/src/super_gradients/recipes/cityscapes_stdc_seg75.yaml b/src/super_gradients/recipes/cityscapes_stdc_seg75.yaml
index f63ad19072..c5b6ff7b5a 100644
--- a/src/super_gradients/recipes/cityscapes_stdc_seg75.yaml
+++ b/src/super_gradients/recipes/cityscapes_stdc_seg75.yaml
@@ -68,7 +68,7 @@ training_hyperparams:
   sync_bn: True
 
   loss:
-    stdc_loss:
+    STDCLoss:
       num_classes: 19
       ignore_index: 19
       mining_percent: 0.0625 # mining percentage is 1/16 of pixels following original implementation.
diff --git a/src/super_gradients/recipes/imagenet_resnet50_kd.yaml b/src/super_gradients/recipes/imagenet_resnet50_kd.yaml
index 60bb7db496..2bc9109f46 100644
--- a/src/super_gradients/recipes/imagenet_resnet50_kd.yaml
+++ b/src/super_gradients/recipes/imagenet_resnet50_kd.yaml
@@ -25,7 +25,7 @@ val_dataloader: imagenet_val
 resume: False
 training_hyperparams:
   resume: ${resume}
-  loss: kd_loss
+  loss: KDLogitsLoss
   criterion_params:
     distillation_loss_coeff: 0.8
     task_loss_fn:
diff --git a/src/super_gradients/recipes/quantization_params/default_quantization_params.yaml b/src/super_gradients/recipes/quantization_params/default_quantization_params.yaml
index 2b08ae0c02..0d9ceb8e36 100644
--- a/src/super_gradients/recipes/quantization_params/default_quantization_params.yaml
+++ b/src/super_gradients/recipes/quantization_params/default_quantization_params.yaml
@@ -7,7 +7,7 @@ selective_quantizer_params:
   skip_modules:              # optional list of module names (strings) to skip from quantization
 
 calib_params:
-  histogram_calib_method: "percentile"  # calibration method for all "histogram" calibrators, acceptable types are ["percentile", "entropy", mse"], "max" calibrators always use "max"
+  histogram_calib_method: "percentile"  # calibration method for all "histogram" calibrators, acceptable types are ["percentile", "entropy", "mse"], "max" calibrators always use "max"
   percentile: 99.99                     # percentile for all histogram calibrators with method "percentile", other calibrators are not affected
   num_calib_batches:                    # number of batches to use for calibration, if None, 512 / batch_size will be used
   verbose: False                        # if calibrator should be verbose
diff --git a/src/super_gradients/recipes/roboflow_ppyoloe.yaml b/src/super_gradients/recipes/roboflow_ppyoloe.yaml
index 23a2801a66..c904cf96c5 100644
--- a/src/super_gradients/recipes/roboflow_ppyoloe.yaml
+++ b/src/super_gradients/recipes/roboflow_ppyoloe.yaml
@@ -47,7 +47,7 @@ training_hyperparams:
         dataset_name: ${dataset_name}
         output_path: ${result_path}
   loss:
-    ppyoloe_loss:
+    PPYoloELoss:
       num_classes: ${num_classes}
       reg_max: ${arch_params.head.reg_max}
 
diff --git a/src/super_gradients/recipes/roboflow_yolo_nas_m.yaml b/src/super_gradients/recipes/roboflow_yolo_nas_m.yaml
index 0c0c43c569..2d6641e801 100644
--- a/src/super_gradients/recipes/roboflow_yolo_nas_m.yaml
+++ b/src/super_gradients/recipes/roboflow_yolo_nas_m.yaml
@@ -46,7 +46,7 @@ training_hyperparams:
   zero_weight_decay_on_bias_and_bn: True
 
   lr_warmup_epochs: 3
-  warmup_mode: linear_epoch_step
+  warmup_mode: LinearEpochLRWarmup
 
   initial_lr: 4e-4
   cosine_final_lr_ratio: 0.1
@@ -66,7 +66,7 @@ training_hyperparams:
 
   phase_callbacks: []
   loss:
-    ppyoloe_loss:
+    PPYoloELoss:
       num_classes: ${num_classes}
       reg_max: 16
 
diff --git a/src/super_gradients/recipes/roboflow_yolo_nas_s.yaml b/src/super_gradients/recipes/roboflow_yolo_nas_s.yaml
index cf94960756..8fb2baf901 100644
--- a/src/super_gradients/recipes/roboflow_yolo_nas_s.yaml
+++ b/src/super_gradients/recipes/roboflow_yolo_nas_s.yaml
@@ -46,7 +46,7 @@ training_hyperparams:
   zero_weight_decay_on_bias_and_bn: True
 
   lr_warmup_epochs: 3
-  warmup_mode: linear_epoch_step
+  warmup_mode: LinearEpochLRWarmup
 
   initial_lr: 5e-4
   cosine_final_lr_ratio: 0.1
@@ -66,7 +66,7 @@ training_hyperparams:
 
   phase_callbacks: []
   loss:
-    ppyoloe_loss:
+    PPYoloELoss:
       num_classes: ${num_classes}
       reg_max: 16
 
diff --git a/src/super_gradients/recipes/supervisely_unet.yaml b/src/super_gradients/recipes/supervisely_unet.yaml
index a05b8f8bd0..e5caa4b3b8 100644
--- a/src/super_gradients/recipes/supervisely_unet.yaml
+++ b/src/super_gradients/recipes/supervisely_unet.yaml
@@ -29,7 +29,7 @@ training_hyperparams:
   initial_lr: 0.025
 
   loss:
-    bce_dice_loss:
+    BCEDiceLoss:
       loss_weights: [ 1., 1. ]
       logits: True
 
diff --git a/src/super_gradients/recipes/training_hyperparams/cifar10_resnet_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/cifar10_resnet_train_params.yaml
index 55c65dc639..0905ba57ff 100644
--- a/src/super_gradients/recipes/training_hyperparams/cifar10_resnet_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/cifar10_resnet_train_params.yaml
@@ -10,10 +10,10 @@ lr_updates:
   step: 50
 
 lr_decay_factor: 0.1
-lr_mode: step
+lr_mode: StepLRScheduler
 lr_warmup_epochs: 0
 initial_lr: 0.1
-loss: cross_entropy
+loss: LabelSmoothingCrossEntropyLoss
 optimizer: SGD
 criterion_params: {}
 
@@ -34,4 +34,3 @@ valid_metrics_list:                               # metrics for evaluation
   - Top5
 
 _convert_: all
-
diff --git a/src/super_gradients/recipes/training_hyperparams/cityscapes_default_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/cityscapes_default_train_params.yaml
index 47f4baaa03..3ceb393b10 100644
--- a/src/super_gradients/recipes/training_hyperparams/cityscapes_default_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/cityscapes_default_train_params.yaml
@@ -2,7 +2,7 @@ defaults:
   - default_train_params
 
 max_epochs: 800
-lr_mode: poly
+lr_mode: PolyLRScheduler
 initial_lr: 0.01   # for effective batch_size=32
 lr_warmup_epochs: 10
 multiply_head_lr: 10.
diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_dekr_pose_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_dekr_pose_train_params.yaml
index 045d74986d..7ed162ad83 100644
--- a/src/super_gradients/recipes/training_hyperparams/coco2017_dekr_pose_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/coco2017_dekr_pose_train_params.yaml
@@ -8,11 +8,11 @@ ema_params:
   beta: 20
 
 max_epochs: 150
-lr_mode: cosine
+lr_mode: CosineLRScheduler
 cosine_final_lr_ratio: 0.1
 batch_accumulate: 1
 initial_lr: 1e-3
-loss: dekr_loss
+loss: DEKRLoss
 
 criterion_params:
   heatmap_loss: qfl
diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_ppyoloe_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_ppyoloe_train_params.yaml
index 21f24ec157..b80d8fcb66 100644
--- a/src/super_gradients/recipes/training_hyperparams/coco2017_ppyoloe_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/coco2017_ppyoloe_train_params.yaml
@@ -4,13 +4,13 @@ defaults:
 max_epochs: 500
 static_assigner_end_epoch: 150
 
-warmup_mode: "linear_batch_step"
+warmup_mode: LinearBatchLRWarmup
 warmup_initial_lr:  1e-6
 lr_warmup_steps: 1000
 lr_warmup_epochs: 0
 
 initial_lr:  2e-3
-lr_mode: cosine
+lr_mode: CosineLRScheduler
 cosine_final_lr_ratio: 0.1
 
 zero_weight_decay_on_bias_and_bn: False
@@ -19,7 +19,7 @@ batch_accumulate: 1
 save_ckpt_epoch_list: [200, 250, 300, 350, 400, 450]
 
 loss:
-  ppyoloe_loss:
+  PPYoloELoss:
     num_classes: ${arch_params.num_classes}
     reg_max: ${arch_params.head.reg_max}
 
diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_rescoring_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_rescoring_train_params.yaml
index 8257edd78e..62ce33e6f2 100644
--- a/src/super_gradients/recipes/training_hyperparams/coco2017_rescoring_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/coco2017_rescoring_train_params.yaml
@@ -8,11 +8,11 @@ ema_params:
   beta: 20
 
 max_epochs: 50
-lr_mode: cosine
+lr_mode: CosineLRScheduler
 cosine_final_lr_ratio: 0.1
 batch_accumulate: 1
 initial_lr: 0.001
-loss: rescoring_loss
+loss: RescoringLoss
 criterion_params: {}
 
 mixed_precision: False
diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_ssd_lite_mobilenet_v2_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_ssd_lite_mobilenet_v2_train_params.yaml
index f3e29743cb..65239ffa13 100644
--- a/src/super_gradients/recipes/training_hyperparams/coco2017_ssd_lite_mobilenet_v2_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/coco2017_ssd_lite_mobilenet_v2_train_params.yaml
@@ -3,11 +3,11 @@ defaults:
 
 ema: True
 max_epochs: 400
-lr_mode: cosine
+lr_mode: CosineLRScheduler
 cosine_final_lr_ratio: 0.01
 batch_accumulate: 1
 initial_lr: 0.01
-loss: ssd_loss
+loss: SSDLoss
 
 criterion_params:
   alpha: 1.0
diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_yolo_nas_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_yolo_nas_train_params.yaml
index 83d6ec799b..cf7c7add94 100644
--- a/src/super_gradients/recipes/training_hyperparams/coco2017_yolo_nas_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/coco2017_yolo_nas_train_params.yaml
@@ -3,13 +3,13 @@ defaults:
 
 max_epochs: 300
 
-warmup_mode: "linear_batch_step"
+warmup_mode: LinearBatchLRWarmup
 warmup_initial_lr:  1e-6
 lr_warmup_steps: 1000
 lr_warmup_epochs: 0
 
 initial_lr:  2e-4
-lr_mode: cosine
+lr_mode: CosineLRScheduler
 cosine_final_lr_ratio: 0.1
 
 zero_weight_decay_on_bias_and_bn: True
@@ -18,7 +18,7 @@ batch_accumulate: 1
 save_ckpt_epoch_list: [100, 200, 250]
 
 loss:
-  ppyoloe_loss:
+  PPYoloELoss:
     use_static_assigner: False
     num_classes: ${arch_params.num_classes}
     reg_max: 16
diff --git a/src/super_gradients/recipes/training_hyperparams/coco2017_yolox_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco2017_yolox_train_params.yaml
index cb0df61965..fcc3fa4ba1 100644
--- a/src/super_gradients/recipes/training_hyperparams/coco2017_yolox_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/coco2017_yolox_train_params.yaml
@@ -2,7 +2,7 @@ defaults:
   - default_train_params
 
 max_epochs: 300
-lr_mode: cosine
+lr_mode: CosineLRScheduler
 cosine_final_lr_ratio: 0.05
 lr_warmup_epochs: 5
 lr_cooldown_epochs: 15
@@ -12,7 +12,7 @@ batch_accumulate: 1
 
 save_ckpt_epoch_list: [285]
 
-loss: yolox_loss
+loss: YoloXDetectionLoss
 
 criterion_params:
   strides: [8, 16, 32]  # output strides of all yolo outputs
diff --git a/src/super_gradients/recipes/training_hyperparams/coco_segmentation_shelfnet_lw_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/coco_segmentation_shelfnet_lw_train_params.yaml
index fd644b0913..69e0fe5032 100644
--- a/src/super_gradients/recipes/training_hyperparams/coco_segmentation_shelfnet_lw_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/coco_segmentation_shelfnet_lw_train_params.yaml
@@ -3,11 +3,11 @@ defaults:
 
 max_epochs: 150
 initial_lr: 5e-3
-loss: shelfnet_ohem_loss
+loss: ShelfNetOHEMLoss
 optimizer: SGD
 mixed_precision: True
 batch_accumulate: 3
-lr_mode: poly
+lr_mode: PolyLRScheduler
 optimizer_params:
   momentum: 0.9
   weight_decay: 1e-4
diff --git a/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml
index a37e6c05f5..0015f58e9d 100644
--- a/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/default_train_params.yaml
@@ -13,17 +13,17 @@ resume_from_remote_sg_logger: False # bool (default=False), When true, ckpt_name
 ckpt_name: ckpt_latest.pth  # The checkpoint (.pth file) filename in CKPT_ROOT_DIR/EXPERIMENT_NAME/ to use when resume=True and resume_path=None
 
 lr_mode: # Union[str, Mapping]
-         # when str: Learning rate scheduling policy, one of ['step','poly','cosine','function']
+         # when str: Learning rate scheduling policy, one of ["StepLRScheduler", "PolyLRScheduler", "CosineLRScheduler", "ExponentialLRScheduler", "FunctionLRScheduler"]
          # when Mapping: refers to a torch.optim.lr_scheduler._LRScheduler, following the below API: lr_mode = {LR_SCHEDULER_CLASS_NAME: {**LR_SCHEDULER_KWARGS, "phase": XXX, "metric_name": XXX)
 
-lr_schedule_function: # Learning rate scheduling function to be used when `lr_mode` is 'function'.
+lr_schedule_function: # Learning rate scheduling function to be used when `lr_mode` is 'FunctionLRScheduler'.
 lr_warmup_epochs: 0 # number of epochs for learning rate warm up - see https://arxiv.org/pdf/1706.02677.pdf (Section 2.2).
-lr_warmup_steps: 0  # number of warmup steps (Used when warmup_mode=linear_batch_step)
+lr_warmup_steps: 0  # number of warmup steps (Used when warmup_mode=LinearBatchLRWarmup)
 lr_cooldown_epochs: 0 # epochs to cooldown LR (i.e the last epoch from scheduling view point=max_epochs-cooldown)
-warmup_initial_lr: # Initial lr for linear_epoch_step/linear_batch_step. When none is given, initial_lr/(warmup_epochs+1) will be used.
-step_lr_update_freq: # (float) update frequency in epoch units for computing lr_updates when lr_mode=`step`.
-cosine_final_lr_ratio: 0.01 # final learning rate ratio (only relevant when `lr_mode`='cosine')
-warmup_mode: linear_epoch_step # learning rate warmup scheme, currently 'linear_epoch_step' and 'linear_batch_step' are supported
+warmup_initial_lr: # Initial lr for LinearEpochLRWarmup/LinearBatchLRWarmup. When none is given, initial_lr/(warmup_epochs+1) will be used.
+step_lr_update_freq: # (float) update frequency in epoch units for computing lr_updates when lr_mode=`StepLRScheduler`.
+cosine_final_lr_ratio: 0.01 # final learning rate ratio (only relevant when `lr_mode`='CosineLRScheduler')
+warmup_mode: LinearEpochLRWarmup # learning rate warmup scheme, currently ['LinearEpochLRWarmup', 'LinearEpochLRWarmup', 'LinearBatchLRWarmup'] are supported
 
 lr_updates:
   _target_: super_gradients.training.utils.utils.empty_list # This is a workaround to instantiate a list using _target_. If we would instantiate as "lr_updates: []",
diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_efficientnet_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_efficientnet_train_params.yaml
index 3f8c1b122c..766b968597 100644
--- a/src/super_gradients/recipes/training_hyperparams/imagenet_efficientnet_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/imagenet_efficientnet_train_params.yaml
@@ -2,7 +2,7 @@ defaults:
   - default_train_params
 
 max_epochs: 450
-lr_mode: step
+lr_mode: StepLRScheduler
 step_lr_update_freq: 2.4
 initial_lr: 0.016
 lr_warmup_epochs: 3
@@ -20,7 +20,7 @@ ema_params:
   decay: 0.9999
   decay_type: constant
 
-loss: cross_entropy
+loss: LabelSmoothingCrossEntropyLoss
 criterion_params:
   smooth_eps: 0.1
 
diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv2_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv2_train_params.yaml
index a0703b43b1..813ff21a43 100644
--- a/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv2_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv2_train_params.yaml
@@ -3,7 +3,7 @@ defaults:
 
 max_epochs: 450
 
-lr_mode: step
+lr_mode: StepLRScheduler
 initial_lr: 0.032   # for total batch-size of 512
 lr_decay_factor: 0.973
 lr_updates:
@@ -20,7 +20,7 @@ optimizer_params:
   alpha: 0.9
   eps: 0.001
 
-loss: cross_entropy
+loss: LabelSmoothingCrossEntropyLoss
 
 zero_weight_decay_on_bias_and_bn: True
 ema: True
diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv3_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv3_train_params.yaml
index b7aa565199..1dddb79b14 100644
--- a/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv3_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/imagenet_mobilenetv3_train_params.yaml
@@ -2,7 +2,7 @@ defaults:
   - default_train_params
 
 max_epochs: 150
-lr_mode: cosine
+lr_mode: CosineLRScheduler
 initial_lr: 0.1
 optimizer: SGD
 
@@ -10,7 +10,7 @@ optimizer_params:
   weight_decay: 0.00004
 
 lr_warmup_epochs: 5
-loss: cross_entropy
+loss: LabelSmoothingCrossEntropyLoss
 criterion_params:
   smooth_eps: 0.1
 
diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_regnetY_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_regnetY_train_params.yaml
index ad8d2f498c..b1b90729ea 100644
--- a/src/super_gradients/recipes/training_hyperparams/imagenet_regnetY_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/imagenet_regnetY_train_params.yaml
@@ -2,7 +2,7 @@ defaults:
   - default_train_params
 
 max_epochs: 450
-lr_mode: step
+lr_mode: StepLRScheduler
 step_lr_update_freq: 2.4
 initial_lr: 0.016
 lr_warmup_epochs: 3
@@ -20,7 +20,7 @@ ema_params:
   decay_type: constant
   decay: 0.9999
 
-loss: cross_entropy
+loss: LabelSmoothingCrossEntropyLoss
 criterion_params:
   smooth_eps: 0.1
 
diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_repvgg_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_repvgg_train_params.yaml
index 215923583e..966aa8b194 100644
--- a/src/super_gradients/recipes/training_hyperparams/imagenet_repvgg_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/imagenet_repvgg_train_params.yaml
@@ -2,11 +2,11 @@ defaults:
   - default_train_params
 
 max_epochs: 120
-lr_mode: cosine
+lr_mode: CosineLRScheduler
 initial_lr: 0.1
 cosine_final_lr_ratio: 0
 
-loss: cross_entropy
+loss: LabelSmoothingCrossEntropyLoss
 
 zero_weight_decay_on_bias_and_bn: True
 average_best_models: True
diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_resnet50_kd_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_resnet50_kd_train_params.yaml
index 84c1a09501..6e39f6a4d2 100644
--- a/src/super_gradients/recipes/training_hyperparams/imagenet_resnet50_kd_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/imagenet_resnet50_kd_train_params.yaml
@@ -3,7 +3,7 @@ defaults:
 
 max_epochs: 610
 initial_lr: 5e-3
-lr_mode: cosine
+lr_mode: CosineLRScheduler
 lr_warmup_epochs: 5
 lr_cooldown_epochs: 10
 ema: True
@@ -12,7 +12,7 @@ zero_weight_decay_on_bias_and_bn: True
 optimizer: Lamb
 optimizer_params:
   weight_decay: 0.02
-loss: cross_entropy
+loss: LabelSmoothingCrossEntropyLoss
 train_metrics_list:                               # metrics for evaluation
   - Accuracy
   - Top5
@@ -21,4 +21,4 @@ valid_metrics_list:                               # metrics for evaluation
   - Top5
 
 
-_convert_: all
\ No newline at end of file
+_convert_: all
diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_resnet50_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_resnet50_train_params.yaml
index 11df8abe3c..4dac223ac0 100644
--- a/src/super_gradients/recipes/training_hyperparams/imagenet_resnet50_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/imagenet_resnet50_train_params.yaml
@@ -3,13 +3,13 @@ defaults:
 
 max_epochs: 400
 initial_lr: 0.1
-lr_mode: cosine
+lr_mode: CosineLRScheduler
 lr_warmup_epochs: 5
 ema: False
 save_ckpt_epoch_list: [ 50, 100, 150, 200, 300 ]
 mixed_precision: True
 zero_weight_decay_on_bias_and_bn: True
-loss: cross_entropy
+loss: LabelSmoothingCrossEntropyLoss
 train_metrics_list:                               # metrics for evaluation
   - Accuracy
   - Top5
@@ -21,4 +21,4 @@ valid_metrics_list:                               # metrics for evaluation
 metric_to_watch: Accuracy
 greater_metric_to_watch_is_better: True
 
-_convert_: all
\ No newline at end of file
+_convert_: all
diff --git a/src/super_gradients/recipes/training_hyperparams/imagenet_vit_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/imagenet_vit_train_params.yaml
index 749a1dc921..63598dd9ce 100644
--- a/src/super_gradients/recipes/training_hyperparams/imagenet_vit_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/imagenet_vit_train_params.yaml
@@ -3,13 +3,13 @@ defaults:
 
 max_epochs: 10
 initial_lr: 0.03
-lr_mode: cosine
+lr_mode: CosineLRScheduler
 cosine_final_lr_ratio: 0
 lr_warmup_epochs: 1
 warmup_initial_lr: 0
-warmup_mode: linear_epoch_step
+warmup_mode: LinearEpochLRWarmup
 ema: False
-loss: cross_entropy
+loss: LabelSmoothingCrossEntropyLoss
 clip_grad_norm: 1
 optimizer: SGD
 optimizer_params:
diff --git a/src/super_gradients/recipes/training_hyperparams/supervisely_default_train_params.yaml b/src/super_gradients/recipes/training_hyperparams/supervisely_default_train_params.yaml
index 7a770dd305..b544a381ad 100644
--- a/src/super_gradients/recipes/training_hyperparams/supervisely_default_train_params.yaml
+++ b/src/super_gradients/recipes/training_hyperparams/supervisely_default_train_params.yaml
@@ -2,7 +2,7 @@ defaults:
   - default_train_params
 
 max_epochs: 100
-lr_mode: cosine
+lr_mode: CosineLRScheduler
 cosine_final_lr_ratio: 0.01
 initial_lr: 0.1
 lr_warmup_epochs: 0
diff --git a/src/super_gradients/training/losses/__init__.py b/src/super_gradients/training/losses/__init__.py
index f14781c2a2..b0eae2eb2e 100755
--- a/src/super_gradients/training/losses/__init__.py
+++ b/src/super_gradients/training/losses/__init__.py
@@ -1,6 +1,6 @@
 from super_gradients.training.losses.focal_loss import FocalLoss
 from super_gradients.training.losses.kd_losses import KDLogitsLoss
-from super_gradients.training.losses.label_smoothing_cross_entropy_loss import LabelSmoothingCrossEntropyLoss
+from super_gradients.training.losses.label_smoothing_cross_entropy_loss import CrossEntropyLoss, LabelSmoothingCrossEntropyLoss
 from super_gradients.training.losses.r_squared_loss import RSquaredLoss
 from super_gradients.training.losses.shelfnet_ohem_loss import ShelfNetOHEMLoss
 from super_gradients.training.losses.shelfnet_semantic_encoding_loss import ShelfNetSemanticEncodingLoss
@@ -20,7 +20,7 @@
     "LOSSES",
     "Losses",
     "FocalLoss",
-    "LabelSmoothingCrossEntropyLoss",
+    "CrossEntropyLoss",
     "ShelfNetOHEMLoss",
     "ShelfNetSemanticEncodingLoss",
     "YoloXDetectionLoss",
@@ -34,4 +34,5 @@
     "DEKRLoss",
     "STDCLoss",
     "RescoringLoss",
+    "LabelSmoothingCrossEntropyLoss",
 ]
diff --git a/src/super_gradients/training/losses/bce_dice_loss.py b/src/super_gradients/training/losses/bce_dice_loss.py
index 973dd9b2ca..7539120111 100644
--- a/src/super_gradients/training/losses/bce_dice_loss.py
+++ b/src/super_gradients/training/losses/bce_dice_loss.py
@@ -7,7 +7,7 @@
 from super_gradients.training.losses.dice_loss import BinaryDiceLoss
 
 
-@register_loss(Losses.BCE_DICE_LOSS)
+@register_loss(name=Losses.BCE_DICE_LOSS, deprecated_name="bce_dice_loss")
 class BCEDiceLoss(torch.nn.Module):
     """
     Binary Cross Entropy + Dice Loss
diff --git a/src/super_gradients/training/losses/dekr_loss.py b/src/super_gradients/training/losses/dekr_loss.py
index 26698db494..8b2a8ea8b5 100644
--- a/src/super_gradients/training/losses/dekr_loss.py
+++ b/src/super_gradients/training/losses/dekr_loss.py
@@ -7,7 +7,7 @@
 from super_gradients.common.registry.registry import register_loss
 
 
-@register_loss(Losses.DEKR_LOSS)
+@register_loss(name=Losses.DEKR_LOSS, deprecated_name="dekr_loss")
 class DEKRLoss(nn.Module):
     """
     Implementation of the loss function from the "Bottom-Up Human Pose Estimation Via Disentangled Keypoint Regression"
diff --git a/src/super_gradients/training/losses/dice_ce_edge_loss.py b/src/super_gradients/training/losses/dice_ce_edge_loss.py
index f7cec313a5..0a0550188e 100644
--- a/src/super_gradients/training/losses/dice_ce_edge_loss.py
+++ b/src/super_gradients/training/losses/dice_ce_edge_loss.py
@@ -11,7 +11,7 @@
 from super_gradients.training.losses.mask_loss import MaskAttentionLoss
 
 
-@register_loss(Losses.DICE_CE_EDGE_LOSS)
+@register_loss(name=Losses.DICE_CE_EDGE_LOSS, deprecated_name="dice_ce_edge_loss")
 class DiceCEEdgeLoss(_Loss):
     def __init__(
         self,
diff --git a/src/super_gradients/training/losses/kd_losses.py b/src/super_gradients/training/losses/kd_losses.py
index 2d3c1908c9..a42ee2c448 100644
--- a/src/super_gradients/training/losses/kd_losses.py
+++ b/src/super_gradients/training/losses/kd_losses.py
@@ -15,13 +15,13 @@ def forward(self, student_output, teacher_output):
         return super(KDklDivLoss, self).forward(torch.log_softmax(student_output, dim=1), torch.softmax(teacher_output, dim=1))
 
 
-@register_loss(Losses.KD_LOSS)
+@register_loss(name=Losses.KD_LOSS, deprecated_name="kd_loss")
 class KDLogitsLoss(_Loss):
     """Knowledge distillation loss, wraps the task loss and distillation loss"""
 
     def __init__(self, task_loss_fn: _Loss, distillation_loss_fn: _Loss = KDklDivLoss(), distillation_loss_coeff: float = 0.5):
         """
-        :param task_loss_fn: task loss. E.g., LabelSmoothingCrossEntropyLoss
+        :param task_loss_fn: task loss. E.g., CrossEntropyLoss
         :param distillation_loss_fn: distillation loss. E.g., KLDivLoss
         :param distillation_loss_coeff:
         """
diff --git a/src/super_gradients/training/losses/label_smoothing_cross_entropy_loss.py b/src/super_gradients/training/losses/label_smoothing_cross_entropy_loss.py
index affcbdb6db..f9a1f36476 100755
--- a/src/super_gradients/training/losses/label_smoothing_cross_entropy_loss.py
+++ b/src/super_gradients/training/losses/label_smoothing_cross_entropy_loss.py
@@ -4,6 +4,7 @@
 
 from super_gradients.common.object_names import Losses
 from super_gradients.common.registry.registry import register_loss
+from super_gradients.common.deprecate import deprecated
 
 
 def onehot(indexes, N=None, ignore_index=None):
@@ -83,12 +84,12 @@ def cross_entropy(inputs, target, weight=None, ignore_index=-100, reduction="mea
     return loss
 
 
-@register_loss(Losses.CROSS_ENTROPY)
-class LabelSmoothingCrossEntropyLoss(nn.CrossEntropyLoss):
+@register_loss(name=Losses.CROSS_ENTROPY, deprecated_name="cross_entropy")
+class CrossEntropyLoss(nn.CrossEntropyLoss):
     """CrossEntropyLoss - with ability to recieve distrbution as targets, and optional label smoothing"""
 
     def __init__(self, weight=None, ignore_index=-100, reduction="mean", smooth_eps=None, smooth_dist=None, from_logits=True):
-        super(LabelSmoothingCrossEntropyLoss, self).__init__(weight=weight, ignore_index=ignore_index, reduction=reduction)
+        super(CrossEntropyLoss, self).__init__(weight=weight, ignore_index=ignore_index, reduction=reduction)
         self.smooth_eps = smooth_eps
         self.smooth_dist = smooth_dist
         self.from_logits = from_logits
@@ -109,3 +110,8 @@ def forward(self, input, target, smooth_dist=None):
         # CHANGED TO THE CURRENT FORMAT- OUR CRITERION FUNCTIONS SHOULD ALL NPW RETURN A TUPLE OF (LOSS_FOR_BACKPROP, ADDITIONAL_ITEMS)
         # WHERE ADDITIONAL ITEMS ARE TORCH TENSORS OF SIZE (N_ITEMS,...) DETACHED FROM THEIR GRADIENTS FOR LOGGING
         return loss, loss.unsqueeze(0).detach()
+
+
+@deprecated(deprecated_since="3.2.1", removed_from="3.5.0", target=CrossEntropyLoss)
+class LabelSmoothingCrossEntropyLoss(CrossEntropyLoss):
+    ...
diff --git a/src/super_gradients/training/losses/ppyolo_loss.py b/src/super_gradients/training/losses/ppyolo_loss.py
index e42588f2e7..8fc851896c 100644
--- a/src/super_gradients/training/losses/ppyolo_loss.py
+++ b/src/super_gradients/training/losses/ppyolo_loss.py
@@ -628,7 +628,7 @@ def __call__(self, pbox: Tensor, gbox: Tensor, iou_weight=1.0, loc_reweight=None
         return loss * self.loss_weight
 
 
-@register_loss(Losses.PPYOLOE_LOSS)
+@register_loss(name=Losses.PPYOLOE_LOSS, deprecated_name="ppyoloe_loss")
 class PPYoloELoss(nn.Module):
     def __init__(
         self,
diff --git a/src/super_gradients/training/losses/r_squared_loss.py b/src/super_gradients/training/losses/r_squared_loss.py
index ece6baa63c..d5c15fad82 100755
--- a/src/super_gradients/training/losses/r_squared_loss.py
+++ b/src/super_gradients/training/losses/r_squared_loss.py
@@ -9,7 +9,7 @@
 from super_gradients.training.utils import convert_to_tensor
 
 
-@register_loss(Losses.R_SQUARED_LOSS)
+@register_loss(name=Losses.R_SQUARED_LOSS, deprecated_name="r_squared_loss")
 class RSquaredLoss(_Loss):
     def forward(self, output, target):
         # FIXME - THIS NEEDS TO BE CHANGED SUCH THAT THIS CLASS INHERETS FROM _Loss (TAKE A LOOK AT YoLoV3DetectionLoss)
diff --git a/src/super_gradients/training/losses/rescoring_loss.py b/src/super_gradients/training/losses/rescoring_loss.py
index c27acef5b2..3ead9958bb 100644
--- a/src/super_gradients/training/losses/rescoring_loss.py
+++ b/src/super_gradients/training/losses/rescoring_loss.py
@@ -7,7 +7,7 @@
 from super_gradients.common.registry import register_loss
 
 
-@register_loss(Losses.RESCORING_LOSS)
+@register_loss(name=Losses.RESCORING_LOSS, deprecated_name="rescoring_loss")
 class RescoringLoss(nn.Module):
     def __init__(self):
         super().__init__()
diff --git a/src/super_gradients/training/losses/seg_kd_loss.py b/src/super_gradients/training/losses/seg_kd_loss.py
index 58f9ea3ec8..2791b527d8 100644
--- a/src/super_gradients/training/losses/seg_kd_loss.py
+++ b/src/super_gradients/training/losses/seg_kd_loss.py
@@ -9,7 +9,7 @@
 class SegKDLoss(nn.Module):
     """
     Wrapper loss for semantic segmentation KD.
-    This loss includes two loss components, `ce_loss` i.e CrossEntropyLoss, and `kd_loss` i.e
+    This loss includes two loss components, `ce_loss` i.e CrossEntropyLoss, and `KDLogitsLoss` i.e
     `ChannelWiseKnowledgeDistillationLoss`.
     """
 
diff --git a/src/super_gradients/training/losses/shelfnet_ohem_loss.py b/src/super_gradients/training/losses/shelfnet_ohem_loss.py
index f699bf83a8..61a06dde52 100755
--- a/src/super_gradients/training/losses/shelfnet_ohem_loss.py
+++ b/src/super_gradients/training/losses/shelfnet_ohem_loss.py
@@ -5,7 +5,7 @@
 from super_gradients.training.losses.ohem_ce_loss import OhemCELoss
 
 
-@register_loss(Losses.SHELFNET_OHEM_LOSS)
+@register_loss(name=Losses.SHELFNET_OHEM_LOSS, deprecated_name="shelfnet_ohem_loss")
 class ShelfNetOHEMLoss(OhemCELoss):
     def __init__(self, threshold: float = 0.7, mining_percent: float = 1e-4, ignore_lb: int = 255):
         """
diff --git a/src/super_gradients/training/losses/shelfnet_semantic_encoding_loss.py b/src/super_gradients/training/losses/shelfnet_semantic_encoding_loss.py
index 864788bf26..c630ebb973 100755
--- a/src/super_gradients/training/losses/shelfnet_semantic_encoding_loss.py
+++ b/src/super_gradients/training/losses/shelfnet_semantic_encoding_loss.py
@@ -6,7 +6,7 @@
 from super_gradients.common.registry.registry import register_loss
 
 
-@register_loss(Losses.SHELFNET_SE_LOSS)
+@register_loss(name=Losses.SHELFNET_SE_LOSS, deprecated_name="shelfnet_se_loss")
 class ShelfNetSemanticEncodingLoss(nn.CrossEntropyLoss):
     """2D Cross Entropy Loss with Auxilary Loss"""
 
diff --git a/src/super_gradients/training/losses/ssd_loss.py b/src/super_gradients/training/losses/ssd_loss.py
index c183c745ce..be1b27e906 100755
--- a/src/super_gradients/training/losses/ssd_loss.py
+++ b/src/super_gradients/training/losses/ssd_loss.py
@@ -52,7 +52,7 @@ def forward(self, pred_labels, target_labels):
         return closs
 
 
-@register_loss(Losses.SSD_LOSS)
+@register_loss(name=Losses.SSD_LOSS, deprecated_name="ssd_loss")
 class SSDLoss(_Loss):
     """
         Implements the loss as the sum of the followings:
diff --git a/src/super_gradients/training/losses/stdc_loss.py b/src/super_gradients/training/losses/stdc_loss.py
index ad0fa44182..6b0a3375e0 100644
--- a/src/super_gradients/training/losses/stdc_loss.py
+++ b/src/super_gradients/training/losses/stdc_loss.py
@@ -111,7 +111,7 @@ def forward(self, detail_out: torch.Tensor, detail_target: torch.Tensor):
         return self.weights[0] * bce_loss + self.weights[1] * dice_loss
 
 
-@register_loss(Losses.STDC_LOSS)
+@register_loss(name=Losses.STDC_LOSS, deprecated_name="stdc_loss")
 class STDCLoss(_Loss):
     """
     Loss class of STDC-Seg training.
diff --git a/src/super_gradients/training/losses/yolox_loss.py b/src/super_gradients/training/losses/yolox_loss.py
index f5d6696f1f..b8ffe022d7 100644
--- a/src/super_gradients/training/losses/yolox_loss.py
+++ b/src/super_gradients/training/losses/yolox_loss.py
@@ -81,7 +81,7 @@ def forward(self, pred, target):
         return loss
 
 
-@register_loss(Losses.YOLOX_LOSS)
+@register_loss(name=Losses.YOLOX_LOSS, deprecated_name="yolox_loss")
 class YoloXDetectionLoss(_Loss):
     """
     Calculate YOLOX loss:
@@ -626,7 +626,7 @@ def dynamic_k_matching(self, cost, pair_wise_ious, gt_classes, num_gt, fg_mask):
         return num_fg, gt_matched_classes, pred_ious_this_matching, matched_gt_inds
 
 
-@register_loss(Losses.YOLOX_FAST_LOSS)
+@register_loss(name=Losses.YOLOX_FAST_LOSS, deprecated_name="yolox_fast_loss")
 class YoloXFastDetectionLoss(YoloXDetectionLoss):
     """
     A completely new implementation of YOLOX loss.
diff --git a/src/super_gradients/training/params.py b/src/super_gradients/training/params.py
index 21c18d05c6..1388457841 100755
--- a/src/super_gradients/training/params.py
+++ b/src/super_gradients/training/params.py
@@ -48,7 +48,7 @@
         "save_tensorboard_remote": False,  # upload tensorboard files to s3
         "save_logs_remote": False,
     },  # upload log files to s3
-    "warmup_mode": "linear_step",
+    "warmup_mode": "LinearEpochLRWarmup",
     "step_lr_update_freq": None,
     "lr_updates": [],
     "clip_grad_norm": None,
@@ -100,7 +100,7 @@
         "lr_warmup_epochs": {"type": "number", "minimum": 0, "maximum": 10},
         "initial_lr": {"type": "number", "exclusiveMinimum": 0, "maximum": 10},
     },
-    "if": {"properties": {"lr_mode": {"const": "step"}}},
+    "if": {"properties": {"lr_mode": {"const": "StepLRScheduler"}}},
     "then": {"required": ["lr_updates", "lr_decay_factor"]},
     "required": ["max_epochs", "lr_mode", "initial_lr", "loss"],
 }
diff --git a/src/super_gradients/training/pre_launch_callbacks/pre_launch_callbacks.py b/src/super_gradients/training/pre_launch_callbacks/pre_launch_callbacks.py
index 486db3f085..cca464fb5f 100644
--- a/src/super_gradients/training/pre_launch_callbacks/pre_launch_callbacks.py
+++ b/src/super_gradients/training/pre_launch_callbacks/pre_launch_callbacks.py
@@ -300,8 +300,8 @@ def modify_params_for_qat(
     logger.warning(f"New learning rate: {training_hyperparams['initial_lr']}")
     logger.warning(f"New weight decay: {training_hyperparams['optimizer_params']['weight_decay']}")
     # as recommended by pytorch-quantization docs
-    if get_param(training_hyperparams, "lr_mode") != "cosine":
-        training_hyperparams["lr_mode"] = "cosine"
+    if get_param(training_hyperparams, "lr_mode") != "CosineLRScheduler":
+        training_hyperparams["lr_mode"] = "CosineLRScheduler"
     training_hyperparams["cosine_final_lr_ratio"] = cosine_final_lr_ratio
     logger.warning(
         f"lr_mode will be set to cosine for QAT run instead of {get_param(training_hyperparams, 'lr_mode')} with "
diff --git a/src/super_gradients/training/sg_trainer/sg_trainer.py b/src/super_gradients/training/sg_trainer/sg_trainer.py
index 8e7943b15d..5f224838f6 100755
--- a/src/super_gradients/training/sg_trainer/sg_trainer.py
+++ b/src/super_gradients/training/sg_trainer/sg_trainer.py
@@ -773,27 +773,29 @@ def train(
 
                 - `lr_updates` : list(int)
 
-                    List of fixed epoch numbers to perform learning rate updates when `lr_mode='step'`.
+                    List of fixed epoch numbers to perform learning rate updates when `lr_mode='StepLRScheduler'`.
 
                 - `lr_decay_factor` : float
 
-                    Decay factor to apply to the learning rate at each update when `lr_mode='step'`.
+                    Decay factor to apply to the learning rate at each update when `lr_mode='StepLRScheduler'`.
 
 
                 -  `lr_mode` : Union[str, Mapping],
 
                     When str:
 
-                    Learning rate scheduling policy, one of ['step','poly','cosine','function'].
+                    Learning rate scheduling policy, one of ['StepLRScheduler','PolyLRScheduler','CosineLRScheduler','FunctionLRScheduler'].
 
-                    'step' refers to constant updates at epoch numbers passed through `lr_updates`. Each update decays the learning rate by `lr_decay_factor`.
+                    'StepLRScheduler' refers to constant updates at epoch numbers passed through `lr_updates`.
+                        Each update decays the learning rate by `lr_decay_factor`.
 
-                    'cosine' refers to the Cosine Anealing policy as mentioned in https://arxiv.org/abs/1608.03983.
+                    'CosineLRScheduler' refers to the Cosine Anealing policy as mentioned in https://arxiv.org/abs/1608.03983.
                       The final learning rate ratio is controlled by `cosine_final_lr_ratio` training parameter.
 
-                    'poly' refers to the polynomial decrease: in each epoch iteration `self.lr = self.initial_lr * pow((1.0 - (current_iter / max_iter)), 0.9)`
+                    'PolyLRScheduler' refers to the polynomial decrease:
+                        in each epoch iteration `self.lr = self.initial_lr * pow((1.0 - (current_iter / max_iter)), 0.9)`
 
-                    'function' refers to a user-defined learning rate scheduling function, that is passed through `lr_schedule_function`.
+                    'FunctionLRScheduler' refers to a user-defined learning rate scheduling function, that is passed through `lr_schedule_function`.
 
 
 
@@ -828,7 +830,7 @@ def train(
 
                 - `lr_schedule_function` : Union[callable,None]
 
-                    Learning rate scheduling function to be used when `lr_mode` is 'function'.
+                    Learning rate scheduling function to be used when `lr_mode` is 'FunctionLRScheduler'.
 
                 - `warmup_mode`: Union[str, Type[LRCallbackBase], None]
 
@@ -851,7 +853,7 @@ def train(
                     The capping is done to avoid interference of warmup with epoch-based schedulers.
 
                 - `cosine_final_lr_ratio` : float (default=0.01)
-                    Final learning rate ratio (only relevant when `lr_mode`='cosine'). The cosine starts from initial_lr and reaches
+                    Final learning rate ratio (only relevant when `lr_mode`='CosineLRScheduler'). The cosine starts from initial_lr and reaches
                      initial_lr * cosine_final_lr_ratio in last epoch
 
                 - `inital_lr` : float
@@ -863,13 +865,13 @@ def train(
                     Loss function for training.
                     One of SuperGradient's built in options:
 
-                              "cross_entropy": LabelSmoothingCrossEntropyLoss,
-                              "mse": MSELoss,
-                              "r_squared_loss": RSquaredLoss,
-                              "detection_loss": YoLoV3DetectionLoss,
-                              "shelfnet_ohem_loss": ShelfNetOHEMLoss,
-                              "shelfnet_se_loss": ShelfNetSemanticEncodingLoss,
-                              "ssd_loss": SSDLoss,
+                        - CrossEntropyLoss,
+                        - MSELoss,
+                        - RSquaredLoss,
+                        - YoLoV3DetectionLoss,
+                        - ShelfNetOHEMLoss,
+                        - ShelfNetSemanticEncodingLoss,
+                        - SSDLoss,
 
 
                     or user defined nn.module loss function.
@@ -1240,6 +1242,10 @@ def forward(self, inputs, targets):
         warmup_mode = self.training_params.warmup_mode
         warmup_callback_cls = None
         if isinstance(warmup_mode, str):
+            from super_gradients.common.registry.registry import warn_if_deprecated
+
+            warn_if_deprecated(warmup_mode, LR_WARMUP_CLS_DICT)
+
             warmup_callback_cls = LR_WARMUP_CLS_DICT[warmup_mode]
         elif isinstance(warmup_mode, type) and issubclass(warmup_mode, LRCallbackBase):
             warmup_callback_cls = warmup_mode
diff --git a/src/super_gradients/training/utils/callbacks/__init__.py b/src/super_gradients/training/utils/callbacks/__init__.py
index db705a5d5c..31103bd3ee 100644
--- a/src/super_gradients/training/utils/callbacks/__init__.py
+++ b/src/super_gradients/training/utils/callbacks/__init__.py
@@ -3,13 +3,13 @@
     ModelConversionCheckCallback,
     DeciLabUploadCallback,
     LRCallbackBase,
-    EpochStepWarmupLRCallback,
-    BatchStepLinearWarmupLRCallback,
-    StepLRCallback,
-    ExponentialLRCallback,
-    PolyLRCallback,
-    CosineLRCallback,
-    FunctionLRCallback,
+    LinearEpochLRWarmup,
+    LinearBatchLRWarmup,
+    StepLRScheduler,
+    ExponentialLRScheduler,
+    PolyLRScheduler,
+    CosineLRScheduler,
+    FunctionLRScheduler,
     IllegalLRSchedulerMetric,
     LRSchedulerCallback,
     MetricsUpdateCallback,
@@ -21,6 +21,13 @@
     YoloXTrainingStageSwitchCallback,
     TestLRCallback,
     TimerCallback,
+    EpochStepWarmupLRCallback,
+    BatchStepLinearWarmupLRCallback,
+    StepLRCallback,
+    ExponentialLRCallback,
+    PolyLRCallback,
+    CosineLRCallback,
+    FunctionLRCallback,
 )
 from super_gradients.training.utils.callbacks.ppyoloe_switch_callback import PPYoloETrainingStageSwitchCallback
 from super_gradients.common.object_names import Callbacks, LRSchedulers, LRWarmups
@@ -40,13 +47,13 @@
     "ModelConversionCheckCallback",
     "DeciLabUploadCallback",
     "LRCallbackBase",
-    "EpochStepWarmupLRCallback",
-    "BatchStepLinearWarmupLRCallback",
-    "StepLRCallback",
-    "ExponentialLRCallback",
-    "PolyLRCallback",
-    "CosineLRCallback",
-    "FunctionLRCallback",
+    "LinearEpochLRWarmup",
+    "LinearBatchLRWarmup",
+    "StepLRScheduler",
+    "ExponentialLRScheduler",
+    "PolyLRScheduler",
+    "CosineLRScheduler",
+    "FunctionLRScheduler",
     "IllegalLRSchedulerMetric",
     "LRSchedulerCallback",
     "MetricsUpdateCallback",
@@ -60,4 +67,11 @@
     "TestLRCallback",
     "PPYoloETrainingStageSwitchCallback",
     "TimerCallback",
+    "EpochStepWarmupLRCallback",
+    "BatchStepLinearWarmupLRCallback",
+    "StepLRCallback",
+    "ExponentialLRCallback",
+    "PolyLRCallback",
+    "CosineLRCallback",
+    "FunctionLRCallback",
 ]
diff --git a/src/super_gradients/training/utils/callbacks/callbacks.py b/src/super_gradients/training/utils/callbacks/callbacks.py
index 11796e31ab..e0b52fa327 100644
--- a/src/super_gradients/training/utils/callbacks/callbacks.py
+++ b/src/super_gradients/training/utils/callbacks/callbacks.py
@@ -12,9 +12,9 @@
 import onnx
 import onnxruntime
 import torch
-from deprecated import deprecated
 from torch.utils.data import DataLoader
 from torchmetrics import MetricCollection, Metric
+from torchvision.utils import draw_segmentation_masks
 
 from super_gradients.common.abstractions.abstract_logger import get_logger
 from super_gradients.common.decorators.factory_decorator import resolve_param
@@ -32,7 +32,8 @@
 from super_gradients.training.utils.segmentation_utils import BinarySegmentationVisualization
 from super_gradients.common.environment.checkpoints_dir_utils import get_project_checkpoints_dir_path
 from super_gradients.training.utils.utils import unwrap_model
-from torchvision.utils import draw_segmentation_masks
+from super_gradients.common.deprecate import deprecated
+
 
 logger = get_logger(__name__)
 
@@ -276,8 +277,8 @@ def update_lr(self, optimizer, epoch, batch_idx=None):
                 param_group["lr"] = self.lr
 
 
-@register_lr_warmup(LRWarmups.LINEAR_EPOCH_STEP)
-class EpochStepWarmupLRCallback(LRCallbackBase):
+@register_lr_warmup(LRWarmups.LINEAR_EPOCH_STEP, deprecated_name="linear_epoch_step")
+class LinearEpochLRWarmup(LRCallbackBase):
     """
     LR scheduling callback for linear step warmup. This scheduler uses a whole epoch as single step.
     LR climbs from warmup_initial_lr with even steps to initial lr. When warmup_initial_lr is None - LR climb starts from
@@ -286,7 +287,7 @@ class EpochStepWarmupLRCallback(LRCallbackBase):
     """
 
     def __init__(self, **kwargs):
-        super(EpochStepWarmupLRCallback, self).__init__(Phase.TRAIN_EPOCH_START, **kwargs)
+        super().__init__(Phase.TRAIN_EPOCH_START, **kwargs)
         self.warmup_initial_lr = self.training_params.warmup_initial_lr or self.initial_lr / (self.training_params.lr_warmup_epochs + 1)
         self.warmup_step_size = (
             (self.initial_lr - self.warmup_initial_lr) / self.training_params.lr_warmup_epochs if self.training_params.lr_warmup_epochs > 0 else 0
@@ -300,20 +301,23 @@ def is_lr_scheduling_enabled(self, context):
         return self.training_params.lr_warmup_epochs > 0 and self.training_params.lr_warmup_epochs >= context.epoch
 
 
-@register_lr_warmup(LRWarmups.LINEAR_STEP)
-class LinearStepWarmupLRCallback(EpochStepWarmupLRCallback):
-    """Deprecated, use EpochStepWarmupLRCallback instead"""
+@deprecated(deprecated_since="3.2.1", removed_from="3.5.0", target=LinearEpochLRWarmup)
+class EpochStepWarmupLRCallback(LinearEpochLRWarmup):
+    ...
 
-    def __init__(self, **kwargs):
-        logger.warning(
-            f"Parameter {LRWarmups.LINEAR_STEP} has been made deprecated and will be removed in the next SG release. "
-            f"Please use `{LRWarmups.LINEAR_EPOCH_STEP}` instead."
-        )
-        super(LinearStepWarmupLRCallback, self).__init__(**kwargs)
 
+@deprecated(deprecated_since="3.2.1", removed_from="3.5.0", target=LinearEpochLRWarmup)
+class LinearLRWarmup(LinearEpochLRWarmup):
+    ...
 
-@register_lr_warmup(LRWarmups.LINEAR_BATCH_STEP)
-class BatchStepLinearWarmupLRCallback(Callback):
+
+@deprecated(deprecated_since="3.2.1", removed_from="3.5.0", target=LinearEpochLRWarmup)
+class LinearStepWarmupLRCallback(LinearEpochLRWarmup):
+    ...
+
+
+@register_lr_warmup(LRWarmups.LINEAR_BATCH_STEP, deprecated_name="linear_batch_step")
+class LinearBatchLRWarmup(Callback):
     """
     LR scheduling callback for linear step warmup on each batch step.
     LR climbs from warmup_initial_lr with to initial lr.
@@ -339,7 +343,7 @@ def __init__(
         :param kwargs:
         """
 
-        super(BatchStepLinearWarmupLRCallback, self).__init__()
+        super().__init__()
 
         if lr_warmup_steps > train_loader_len:
             logger.warning(
@@ -384,16 +388,21 @@ def update_lr(self, optimizer, epoch, batch_idx=None):
                 param_group["lr"] = self.lr
 
 
-@register_lr_scheduler(LRSchedulers.STEP)
-class StepLRCallback(LRCallbackBase):
+@deprecated(deprecated_since="3.2.1", removed_from="3.5.0", target=LinearBatchLRWarmup)
+class BatchStepLinearWarmupLRCallback(LinearBatchLRWarmup):
+    ...
+
+
+@register_lr_scheduler(LRSchedulers.STEP, deprecated_name="step")
+class StepLRScheduler(LRCallbackBase):
     """
     Hard coded step learning rate scheduling (i.e at specific milestones).
     """
 
     def __init__(self, lr_updates, lr_decay_factor, step_lr_update_freq=None, **kwargs):
-        super(StepLRCallback, self).__init__(Phase.TRAIN_EPOCH_END, **kwargs)
+        super().__init__(Phase.TRAIN_EPOCH_END, **kwargs)
         if step_lr_update_freq and len(lr_updates):
-            raise ValueError("Only one of [lr_updates, step_lr_update_freq] should be passed to StepLRCallback constructor")
+            raise ValueError("Only one of [lr_updates, step_lr_update_freq] should be passed to StepLRScheduler constructor")
 
         if step_lr_update_freq:
             max_epochs = self.training_params.max_epochs - self.training_params.lr_cooldown_epochs
@@ -415,8 +424,13 @@ def is_lr_scheduling_enabled(self, context):
         return self.training_params.lr_warmup_epochs <= context.epoch
 
 
-@register_lr_scheduler(LRSchedulers.EXP)
-class ExponentialLRCallback(LRCallbackBase):
+@deprecated(deprecated_since="3.2.1", removed_from="3.5.0", target=StepLRScheduler)
+class StepLRCallback(StepLRScheduler):
+    ...
+
+
+@register_lr_scheduler(LRSchedulers.EXP, deprecated_name="exp")
+class ExponentialLRScheduler(LRCallbackBase):
     """
     Exponential decay learning rate scheduling. Decays the learning rate by `lr_decay_factor` every epoch.
     """
@@ -436,14 +450,19 @@ def is_lr_scheduling_enabled(self, context):
         return self.training_params.lr_warmup_epochs <= context.epoch < post_warmup_epochs
 
 
-@register_lr_scheduler(LRSchedulers.POLY)
-class PolyLRCallback(LRCallbackBase):
+@deprecated(deprecated_since="3.2.1", removed_from="3.5.0", target=ExponentialLRScheduler)
+class ExponentialLRCallback(ExponentialLRScheduler):
+    ...
+
+
+@register_lr_scheduler(LRSchedulers.POLY, deprecated_name="poly")
+class PolyLRScheduler(LRCallbackBase):
     """
     Hard coded polynomial decay learning rate scheduling (i.e at specific milestones).
     """
 
     def __init__(self, max_epochs, **kwargs):
-        super(PolyLRCallback, self).__init__(Phase.TRAIN_BATCH_STEP, **kwargs)
+        super().__init__(Phase.TRAIN_BATCH_STEP, **kwargs)
         self.max_epochs = max_epochs
 
     def perform_scheduling(self, context):
@@ -459,14 +478,19 @@ def is_lr_scheduling_enabled(self, context):
         return self.training_params.lr_warmup_epochs <= context.epoch < post_warmup_epochs
 
 
-@register_lr_scheduler(LRSchedulers.COSINE)
-class CosineLRCallback(LRCallbackBase):
+@deprecated(deprecated_since="3.2.1", removed_from="3.5.0", target=PolyLRScheduler)
+class PolyLRCallback(PolyLRScheduler):
+    ...
+
+
+@register_lr_scheduler(LRSchedulers.COSINE, deprecated_name="cosine")
+class CosineLRScheduler(LRCallbackBase):
     """
     Hard coded step Cosine anealing learning rate scheduling.
     """
 
     def __init__(self, max_epochs, cosine_final_lr_ratio, **kwargs):
-        super(CosineLRCallback, self).__init__(Phase.TRAIN_BATCH_STEP, **kwargs)
+        super().__init__(Phase.TRAIN_BATCH_STEP, **kwargs)
         self.max_epochs = max_epochs
         self.cosine_final_lr_ratio = cosine_final_lr_ratio
 
@@ -497,15 +521,20 @@ def compute_learning_rate(cls, step: Union[float, np.ndarray], total_steps: floa
         return lr * (1 - final_lr_ratio) + (initial_lr * final_lr_ratio)
 
 
-@register_lr_scheduler(LRSchedulers.FUNCTION)
-class FunctionLRCallback(LRCallbackBase):
+@deprecated(deprecated_since="3.2.1", removed_from="3.5.0", target=CosineLRScheduler)
+class CosineLRCallback(CosineLRScheduler):
+    ...
+
+
+@register_lr_scheduler(LRSchedulers.FUNCTION, deprecated_name="function")
+class FunctionLRScheduler(LRCallbackBase):
     """
     Hard coded rate scheduling for user defined lr scheduling function.
     """
 
-    @deprecated(version="3.2.0", reason="This callback is deprecated and will be removed in future versions.")
+    @deprecated(deprecated_since="3.2.0", removed_from="3.5.0", reason="This callback is deprecated and will be removed in future versions.")
     def __init__(self, max_epochs, lr_schedule_function, **kwargs):
-        super(FunctionLRCallback, self).__init__(Phase.TRAIN_BATCH_STEP, **kwargs)
+        super().__init__(Phase.TRAIN_BATCH_STEP, **kwargs)
         assert callable(lr_schedule_function), "self.lr_function must be callable"
         self.lr_schedule_function = lr_schedule_function
         self.max_epochs = max_epochs
@@ -527,6 +556,11 @@ def perform_scheduling(self, context):
         self.update_lr(context.optimizer, context.epoch, context.batch_idx)
 
 
+@deprecated(deprecated_since="3.2.1", removed_from="3.5.0", target=FunctionLRScheduler)
+class FunctionLRCallback(FunctionLRScheduler):
+    ...
+
+
 class IllegalLRSchedulerMetric(Exception):
     """Exception raised illegal combination of training parameters.
 
@@ -924,16 +958,18 @@ def create_lr_scheduler_callback(
 
                     When str:
 
-                    Learning rate scheduling policy, one of ['step','poly','cosine','function'].
+                    Learning rate scheduling policy, one of ['StepLRScheduler','PolyLRScheduler','CosineLRScheduler','FunctionLRScheduler'].
 
-                    'step' refers to constant updates at epoch numbers passed through `lr_updates`. Each update decays the learning rate by `lr_decay_factor`.
+                    'StepLRScheduler' refers to constant updates at epoch numbers passed through `lr_updates`.
+                        Each update decays the learning rate by `lr_decay_factor`.
 
-                    'cosine' refers to the Cosine Anealing policy as mentioned in https://arxiv.org/abs/1608.03983.
+                    'CosineLRScheduler' refers to the Cosine Anealing policy as mentioned in https://arxiv.org/abs/1608.03983.
                       The final learning rate ratio is controlled by `cosine_final_lr_ratio` training parameter.
 
-                    'poly' refers to the polynomial decrease: in each epoch iteration `self.lr = self.initial_lr * pow((1.0 - (current_iter / max_iter)), 0.9)`
+                    'PolyLRScheduler' refers to the polynomial decrease:
+                        in each epoch iteration `self.lr = self.initial_lr * pow((1.0 - (current_iter / max_iter)), 0.9)`
 
-                    'function' refers to a user-defined learning rate scheduling function, that is passed through `lr_schedule_function`.
+                    'FunctionLRScheduler' refers to a user-defined learning rate scheduling function, that is passed through `lr_schedule_function`.
 
 
 
@@ -1339,7 +1375,7 @@ class ExtremeBatchSegVisualizationCallback(ExtremeBatchCaseVisualizationCallback
                 max=False
                 ignore_idx=19),
             ExtremeBatchSegVisualizationCallback(
-                loss_to_monitor="LabelSmoothingCrossEntropyLoss"
+                loss_to_monitor="CrossEntropyLoss"
                 max=True
                 ignore_idx=19)]
                 ...}
diff --git a/src/super_gradients/training/utils/deprecated_utils.py b/src/super_gradients/training/utils/deprecated_utils.py
index 433615d0e1..8da5257139 100644
--- a/src/super_gradients/training/utils/deprecated_utils.py
+++ b/src/super_gradients/training/utils/deprecated_utils.py
@@ -10,14 +10,14 @@ def wrap_with_warning(cls: Callable, message: str) -> Any:
     Emits a warning when target class of function is called.
 
     >>> from super_gradients.training.utils.deprecated_utils import wrap_with_warning
-    >>> from super_gradients.training.utils.callbacks import EpochStepWarmupLRCallback, BatchStepLinearWarmupLRCallback
+    >>> from super_gradients.training.utils.callbacks import LinearEpochLRWarmup, LinearBatchLRWarmup
     >>>
     >>> LR_WARMUP_CLS_DICT = {
     >>>     "linear": wrap_with_warning(
-    >>>         EpochStepWarmupLRCallback,
+    >>>         LinearEpochLRWarmup,
     >>>         message=f"Parameter `linear` has been made deprecated and will be removed in the next SG release. Please use `linear_epoch` instead",
     >>>     ),
-    >>>     'linear_epoch`': EpochStepWarmupLRCallback,
+    >>>     'linear_epoch`': LinearEpochLRWarmup,
     >>> }
 
     :param cls: A class or function to wrap
diff --git a/tests/end_to_end_tests/cifar_trainer_test.py b/tests/end_to_end_tests/cifar_trainer_test.py
index 7b91a59cba..00398ddbd0 100644
--- a/tests/end_to_end_tests/cifar_trainer_test.py
+++ b/tests/end_to_end_tests/cifar_trainer_test.py
@@ -25,7 +25,7 @@ def test_train_cifar10_dataloader(self):
             training_params={
                 "max_epochs": 1,
                 "initial_lr": 0.1,
-                "loss": "cross_entropy",
+                "loss": "CrossEntropyLoss",
                 "train_metrics_list": ["Accuracy"],
                 "valid_metrics_list": ["Accuracy"],
                 "metric_to_watch": "Accuracy",
@@ -44,7 +44,7 @@ def test_train_cifar100_dataloader(self):
             training_params={
                 "max_epochs": 1,
                 "initial_lr": 0.1,
-                "loss": "cross_entropy",
+                "loss": "CrossEntropyLoss",
                 "train_metrics_list": ["Accuracy"],
                 "valid_metrics_list": ["Accuracy"],
                 "metric_to_watch": "Accuracy",
diff --git a/tests/end_to_end_tests/trainer_test.py b/tests/end_to_end_tests/trainer_test.py
index 122f1e7cca..edfb4ec486 100644
--- a/tests/end_to_end_tests/trainer_test.py
+++ b/tests/end_to_end_tests/trainer_test.py
@@ -25,8 +25,8 @@ def setUp(cls):
             "lr_decay_factor": 0.1,
             "initial_lr": 0.1,
             "lr_updates": [4],
-            "lr_mode": "step",
-            "loss": "cross_entropy",
+            "lr_mode": "StepLRScheduler",
+            "loss": "CrossEntropyLoss",
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
             "metric_to_watch": "Accuracy",
diff --git a/tests/integration_tests/conversion_callback_test.py b/tests/integration_tests/conversion_callback_test.py
index 709287b4fc..22b01512cf 100644
--- a/tests/integration_tests/conversion_callback_test.py
+++ b/tests/integration_tests/conversion_callback_test.py
@@ -54,10 +54,10 @@ def test_classification_architectures(self):
                 "max_epochs": 2,
                 "lr_updates": [1],
                 "lr_decay_factor": 0.1,
-                "lr_mode": "step",
+                "lr_mode": "StepLRScheduler",
                 "lr_warmup_epochs": 0,
                 "initial_lr": 0.1,
-                "loss": "cross_entropy",
+                "loss": "CrossEntropyLoss",
                 "optimizer": "SGD",
                 "criterion_params": {},
                 "train_metrics_list": [Accuracy(), Top5()],
@@ -90,7 +90,7 @@ def get_architecture_custom_config(architecture_name: str):
                 }
             elif re.search(r"regseg", architecture_name):
                 return {
-                    "loss": "cross_entropy",
+                    "loss": "CrossEntropyLoss",
                 }
             else:
                 raise Exception("You tried to run a conversion test on an unknown architecture")
@@ -107,7 +107,7 @@ def get_architecture_custom_config(architecture_name: str):
             train_params = {
                 "max_epochs": 3,
                 "initial_lr": 1e-2,
-                "lr_mode": "poly",
+                "lr_mode": "PolyLRScheduler",
                 "ema": True,  # unlike the paper (not specified in paper)
                 "optimizer": "SGD",
                 "optimizer_params": {"weight_decay": 5e-4, "momentum": 0.9},
diff --git a/tests/integration_tests/deci_lab_export_test.py b/tests/integration_tests/deci_lab_export_test.py
index 3130b9d785..50e6132d2e 100644
--- a/tests/integration_tests/deci_lab_export_test.py
+++ b/tests/integration_tests/deci_lab_export_test.py
@@ -44,10 +44,10 @@ def test_train_with_deci_lab_integration(self):
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": self.optimizer,
             "criterion_params": {},
             "train_metrics_list": [Accuracy(), Top5()],
diff --git a/tests/integration_tests/ema_train_integration_test.py b/tests/integration_tests/ema_train_integration_test.py
index 777e5b319c..3bca4b3204 100644
--- a/tests/integration_tests/ema_train_integration_test.py
+++ b/tests/integration_tests/ema_train_integration_test.py
@@ -49,11 +49,11 @@ def _train(self, ema_params):
         training_params = {
             "max_epochs": 4,
             "lr_updates": [4],
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_decay_factor": 0.1,
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "ema": True,
diff --git a/tests/integration_tests/lr_test.py b/tests/integration_tests/lr_test.py
index 82bdaec94d..1b7a4ce245 100644
--- a/tests/integration_tests/lr_test.py
+++ b/tests/integration_tests/lr_test.py
@@ -19,7 +19,7 @@ def setUp(cls):
             "max_epochs": 1,
             "silent_mode": True,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
             "metric_to_watch": "Accuracy",
@@ -45,12 +45,12 @@ def test_lr_function(initial_lr, epoch, iter, max_epoch, iters_per_epoch, **kwar
             return initial_lr * (1 - ((epoch * iters_per_epoch + iter) / (max_epoch * iters_per_epoch)))
 
         # test if we are able that lr_function supports functions with this structure
-        training_params = {**self.training_params, "lr_mode": "function", "lr_schedule_function": test_lr_function}
+        training_params = {**self.training_params, "lr_mode": "FunctionLRScheduler", "lr_schedule_function": test_lr_function}
         trainer.train(
             model=model, training_params=training_params, train_loader=classification_test_dataloader(), valid_loader=classification_test_dataloader()
         )
         # test that we assert lr_function is callable
-        training_params = {**self.training_params, "lr_mode": "function"}
+        training_params = {**self.training_params, "lr_mode": "FunctionLRScheduler"}
         with self.assertRaises(AssertionError):
             trainer.train(
                 model=model, training_params=training_params, train_loader=classification_test_dataloader(), valid_loader=classification_test_dataloader()
@@ -58,14 +58,14 @@ def test_lr_function(initial_lr, epoch, iter, max_epoch, iters_per_epoch, **kwar
 
     def test_cosine_lr(self):
         trainer, model = self.get_trainer(self.folder_name)
-        training_params = {**self.training_params, "lr_mode": "cosine", "cosine_final_lr_ratio": 0.01}
+        training_params = {**self.training_params, "lr_mode": "CosineLRScheduler", "cosine_final_lr_ratio": 0.01}
         trainer.train(
             model=model, training_params=training_params, train_loader=classification_test_dataloader(), valid_loader=classification_test_dataloader()
         )
 
     def test_step_lr(self):
         trainer, model = self.get_trainer(self.folder_name)
-        training_params = {**self.training_params, "lr_mode": "step", "lr_decay_factor": 0.1, "lr_updates": [4]}
+        training_params = {**self.training_params, "lr_mode": "StepLRScheduler", "lr_decay_factor": 0.1, "lr_updates": [4]}
         trainer.train(
             model=model, training_params=training_params, train_loader=classification_test_dataloader(), valid_loader=classification_test_dataloader()
         )
diff --git a/tests/integration_tests/pretrained_models_test.py b/tests/integration_tests/pretrained_models_test.py
index fbf7abd96f..2ef2b7b23d 100644
--- a/tests/integration_tests/pretrained_models_test.py
+++ b/tests/integration_tests/pretrained_models_test.py
@@ -86,8 +86,8 @@ def setUp(self) -> None:
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
             "initial_lr": 0.6,
-            "loss": "cross_entropy",
-            "lr_mode": "step",
+            "loss": "CrossEntropyLoss",
+            "lr_mode": "StepLRScheduler",
             "optimizer_params": {"weight_decay": 0.000, "momentum": 0.9},
             "train_metrics_list": [Accuracy()],
             "valid_metrics_list": [Accuracy()],
@@ -128,12 +128,12 @@ def setUp(self) -> None:
         ssd_dboxes = DEFAULT_SSD_LITE_MOBILENET_V2_ARCH_PARAMS["heads"]["SSDHead"]["anchors"]
         self.transfer_detection_train_params_ssd = {
             "max_epochs": 3,
-            "lr_mode": "cosine",
+            "lr_mode": "CosineLRScheduler",
             "initial_lr": 0.01,
             "cosine_final_lr_ratio": 0.01,
             "lr_warmup_epochs": 3,
             "batch_accumulate": 1,
-            "loss": "ssd_loss",
+            "loss": "SSDLoss",
             "criterion_params": {"dboxes": ssd_dboxes},
             "optimizer": "SGD",
             "warmup_momentum": 0.8,
@@ -145,12 +145,12 @@ def setUp(self) -> None:
         }
         self.transfer_detection_train_params_yolox = {
             "max_epochs": 3,
-            "lr_mode": "cosine",
+            "lr_mode": "CosineLRScheduler",
             "cosine_final_lr_ratio": 0.05,
             "warmup_bias_lr": 0.0,
             "warmup_momentum": 0.9,
             "initial_lr": 0.02,
-            "loss": "yolox_loss",
+            "loss": "YoloXDetectionLoss",
             "criterion_params": {"strides": [8, 16, 32], "num_classes": 5},  # output strides of all yolo outputs
             "train_metrics_list": [],
             "valid_metrics_list": [DetectionMetrics(post_prediction_callback=YoloXPostPredictionCallback(), normalize_targets=True, num_cls=5)],
@@ -215,7 +215,7 @@ def setUp(self) -> None:
             "max_epochs": 3,
             "initial_lr": 1e-2,
             "loss": DDRNetLoss(),
-            "lr_mode": "poly",
+            "lr_mode": "PolyLRScheduler",
             "ema": True,  # unlike the paper (not specified in paper)
             "average_best_models": True,
             "optimizer": "SGD",
@@ -232,7 +232,7 @@ def setUp(self) -> None:
             "max_epochs": 3,
             "initial_lr": 1e-2,
             "loss": STDCLoss(num_classes=5),
-            "lr_mode": "poly",
+            "lr_mode": "PolyLRScheduler",
             "ema": True,  # unlike the paper (not specified in paper)
             "optimizer": "SGD",
             "optimizer_params": {"weight_decay": 5e-4, "momentum": 0.9},
@@ -246,8 +246,8 @@ def setUp(self) -> None:
         self.regseg_transfer_segmentation_train_params = {
             "max_epochs": 3,
             "initial_lr": 1e-2,
-            "loss": "cross_entropy",
-            "lr_mode": "poly",
+            "loss": "CrossEntropyLoss",
+            "lr_mode": "PolyLRScheduler",
             "ema": True,  # unlike the paper (not specified in paper)
             "optimizer": "SGD",
             "optimizer_params": {"weight_decay": 5e-4, "momentum": 0.9},
diff --git a/tests/recipe_training_tests/coded_qat_launch_test.py b/tests/recipe_training_tests/coded_qat_launch_test.py
index e5bb8531c1..243d78cd53 100644
--- a/tests/recipe_training_tests/coded_qat_launch_test.py
+++ b/tests/recipe_training_tests/coded_qat_launch_test.py
@@ -17,10 +17,10 @@ def test_qat_launch(self):
             "max_epochs": 10,
             "lr_updates": [],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
@@ -78,10 +78,10 @@ def test_ptq_launch(self):
             "max_epochs": 10,
             "lr_updates": [],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
diff --git a/tests/unit_tests/dataset_statistics_test.py b/tests/unit_tests/dataset_statistics_test.py
index b3881b4c9a..f68fbb562e 100644
--- a/tests/unit_tests/dataset_statistics_test.py
+++ b/tests/unit_tests/dataset_statistics_test.py
@@ -24,9 +24,9 @@ def test_dataset_statistics_tensorboard_logger(self):
 
         training_params = {
             "max_epochs": 1,  # we dont really need the actual training to run
-            "lr_mode": "cosine",
+            "lr_mode": "CosineLRScheduler",
             "initial_lr": 0.01,
-            "loss": "yolox_loss",
+            "loss": "YoloXDetectionLoss",
             "criterion_params": {"strides": [8, 16, 32], "num_classes": 80},
             "dataset_statistics": True,
             "launch_tensorboard": True,
diff --git a/tests/unit_tests/detection_dataset_test.py b/tests/unit_tests/detection_dataset_test.py
index 6e6efdd523..cb38faa7bb 100644
--- a/tests/unit_tests/detection_dataset_test.py
+++ b/tests/unit_tests/detection_dataset_test.py
@@ -168,12 +168,12 @@ def test_coco_detection_metrics_with_classwise_ap(self):
 
         detection_train_params_yolox = {
             "max_epochs": 5,
-            "lr_mode": "cosine",
+            "lr_mode": "CosineLRScheduler",
             "cosine_final_lr_ratio": 0.05,
             "warmup_bias_lr": 0.0,
             "warmup_momentum": 0.9,
             "initial_lr": 0.02,
-            "loss": "yolox_loss",
+            "loss": "YoloXDetectionLoss",
             "mixed_precision": False,
             "criterion_params": {"strides": [8, 16, 32], "num_classes": 80},  # output strides of all yolo outputs
             "train_metrics_list": [],
diff --git a/tests/unit_tests/double_training_test.py b/tests/unit_tests/double_training_test.py
index b556aaabc2..4a9ab0b265 100644
--- a/tests/unit_tests/double_training_test.py
+++ b/tests/unit_tests/double_training_test.py
@@ -24,7 +24,7 @@ def test_call_train_twice(self):
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
             "loss": torch.nn.CrossEntropyLoss(),
diff --git a/tests/unit_tests/early_stop_test.py b/tests/unit_tests/early_stop_test.py
index 1feeb6a9df..2082d2fd73 100644
--- a/tests/unit_tests/early_stop_test.py
+++ b/tests/unit_tests/early_stop_test.py
@@ -49,10 +49,10 @@ def setUp(self) -> None:
             "max_epochs": self.max_epochs,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
diff --git a/tests/unit_tests/extreme_batch_cb_test.py b/tests/unit_tests/extreme_batch_cb_test.py
index b4692b6274..26bfd636a2 100644
--- a/tests/unit_tests/extreme_batch_cb_test.py
+++ b/tests/unit_tests/extreme_batch_cb_test.py
@@ -40,7 +40,7 @@ def setUpClass(cls):
             "max_epochs": 3,
             "initial_lr": 1e-2,
             "loss": DDRNetLoss(),
-            "lr_mode": "poly",
+            "lr_mode": "PolyLRScheduler",
             "ema": True,
             "optimizer": "SGD",
             "mixed_precision": False,
@@ -56,7 +56,7 @@ def setUpClass(cls):
             "max_epochs": 3,
             "initial_lr": 1e-2,
             "loss": PPYoloELoss(num_classes=1, use_static_assigner=False, reg_max=16),
-            "lr_mode": "poly",
+            "lr_mode": "PolyLRScheduler",
             "ema": True,
             "optimizer": "SGD",
             "mixed_precision": False,
diff --git a/tests/unit_tests/factories_test.py b/tests/unit_tests/factories_test.py
index c0def96302..e3b7babba0 100644
--- a/tests/unit_tests/factories_test.py
+++ b/tests/unit_tests/factories_test.py
@@ -8,7 +8,7 @@
 from super_gradients.common.object_names import Models
 from super_gradients.training import models
 from super_gradients.training.dataloaders.dataloaders import classification_test_dataloader
-from super_gradients.training.losses import LabelSmoothingCrossEntropyLoss
+from super_gradients.training.losses import CrossEntropyLoss
 from super_gradients.training.metrics import Accuracy, Top5
 from torch import nn
 
@@ -21,10 +21,10 @@ def test_training_with_factories(self):
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "torch.optim.ASGD",  # use an optimizer by factory
             "criterion_params": {},
             "optimizer_params": {"lambd": 0.0001, "alpha": 0.75},
@@ -47,7 +47,7 @@ def test_training_with_factories_with_typos(self):
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
             "loss": "crossEnt_ropy",
@@ -64,7 +64,7 @@ def test_training_with_factories_with_typos(self):
         self.assertIsInstance(trainer.train_metrics.Accuracy, Accuracy)
         self.assertIsInstance(trainer.valid_metrics.Top5, Top5)
         self.assertIsInstance(trainer.optimizer, torch.optim.Adam)
-        self.assertIsInstance(trainer.criterion, LabelSmoothingCrossEntropyLoss)
+        self.assertIsInstance(trainer.criterion, CrossEntropyLoss)
 
     def test_activations_factory(self):
         class DummyModel(nn.Module):
diff --git a/tests/unit_tests/forward_pass_prep_fn_test.py b/tests/unit_tests/forward_pass_prep_fn_test.py
index 97c8a27af7..57ccf27f69 100644
--- a/tests/unit_tests/forward_pass_prep_fn_test.py
+++ b/tests/unit_tests/forward_pass_prep_fn_test.py
@@ -38,11 +38,11 @@ def test_resizing_with_forward_pass_prep_fn(self):
         train_params = {
             "max_epochs": 2,
             "cosine_final_lr_ratio": 0.2,
-            "lr_mode": "cosine",
+            "lr_mode": "CosineLRScheduler",
             "lr_cooldown_epochs": 2,
             "lr_warmup_epochs": 3,
             "initial_lr": 1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
diff --git a/tests/unit_tests/initialize_with_dataloaders_test.py b/tests/unit_tests/initialize_with_dataloaders_test.py
index 6d9c14034d..9853431498 100644
--- a/tests/unit_tests/initialize_with_dataloaders_test.py
+++ b/tests/unit_tests/initialize_with_dataloaders_test.py
@@ -35,9 +35,9 @@ def test_train_with_dataloaders(self):
                 "max_epochs": 2,
                 "lr_updates": [5, 6, 12],
                 "lr_decay_factor": 0.01,
-                "lr_mode": "step",
+                "lr_mode": "StepLRScheduler",
                 "initial_lr": 0.01,
-                "loss": "cross_entropy",
+                "loss": "CrossEntropyLoss",
                 "optimizer": "SGD",
                 "optimizer_params": {"weight_decay": 1e-5, "momentum": 0.9},
                 "train_metrics_list": [Accuracy()],
diff --git a/tests/unit_tests/kd_ema_test.py b/tests/unit_tests/kd_ema_test.py
index 1f59084fe7..bbdf9164bd 100644
--- a/tests/unit_tests/kd_ema_test.py
+++ b/tests/unit_tests/kd_ema_test.py
@@ -20,7 +20,7 @@ def setUp(cls):
             "max_epochs": 3,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
             "loss": KDLogitsLoss(torch.nn.CrossEntropyLoss()),
diff --git a/tests/unit_tests/kd_trainer_test.py b/tests/unit_tests/kd_trainer_test.py
index 3b866e7b2a..98b3a37f3f 100644
--- a/tests/unit_tests/kd_trainer_test.py
+++ b/tests/unit_tests/kd_trainer_test.py
@@ -42,7 +42,7 @@ def setUp(cls):
             "max_epochs": 3,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
             "loss": KDLogitsLoss(torch.nn.CrossEntropyLoss()),
diff --git a/tests/unit_tests/load_ema_ckpt_test.py b/tests/unit_tests/load_ema_ckpt_test.py
index b070c8d862..c1d1fe1d98 100644
--- a/tests/unit_tests/load_ema_ckpt_test.py
+++ b/tests/unit_tests/load_ema_ckpt_test.py
@@ -23,10 +23,10 @@ def setUp(self) -> None:
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
diff --git a/tests/unit_tests/local_ckpt_head_replacement_test.py b/tests/unit_tests/local_ckpt_head_replacement_test.py
index 8ba7371683..0d100e364a 100644
--- a/tests/unit_tests/local_ckpt_head_replacement_test.py
+++ b/tests/unit_tests/local_ckpt_head_replacement_test.py
@@ -14,10 +14,10 @@ def test_local_ckpt_head_replacement(self):
             "max_epochs": 1,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
diff --git a/tests/unit_tests/loss_loggings_test.py b/tests/unit_tests/loss_loggings_test.py
index 54d476f0ad..5294885bd1 100644
--- a/tests/unit_tests/loss_loggings_test.py
+++ b/tests/unit_tests/loss_loggings_test.py
@@ -35,7 +35,7 @@ def test_single_item_logging(self):
             "max_epochs": 1,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
             "loss": torch.nn.CrossEntropyLoss(),
@@ -59,7 +59,7 @@ def test_multiple_unnamed_components_loss_logging(self):
             "max_epochs": 1,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
             "loss": CriterionWithUnnamedComponents(),
@@ -83,7 +83,7 @@ def test_multiple_named_components_loss_logging(self):
             "max_epochs": 1,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
             "loss": CriterionWithNamedComponents(),
diff --git a/tests/unit_tests/lr_cooldown_test.py b/tests/unit_tests/lr_cooldown_test.py
index 2f04d56d5b..668bc0c74f 100644
--- a/tests/unit_tests/lr_cooldown_test.py
+++ b/tests/unit_tests/lr_cooldown_test.py
@@ -19,11 +19,11 @@ def test_lr_cooldown_with_lr_scheduling(self):
         train_params = {
             "max_epochs": 7,
             "cosine_final_lr_ratio": 0.2,
-            "lr_mode": "cosine",
+            "lr_mode": "CosineLRScheduler",
             "lr_cooldown_epochs": 2,
             "lr_warmup_epochs": 3,
             "initial_lr": 1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
diff --git a/tests/unit_tests/lr_warmup_test.py b/tests/unit_tests/lr_warmup_test.py
index c8473cdb53..2521090499 100644
--- a/tests/unit_tests/lr_warmup_test.py
+++ b/tests/unit_tests/lr_warmup_test.py
@@ -6,7 +6,7 @@
 from super_gradients.training.dataloaders.dataloaders import classification_test_dataloader
 from super_gradients.training.metrics import Accuracy
 from super_gradients.training.models import LeNet
-from super_gradients.training.utils.callbacks import TestLRCallback, LRCallbackBase, Phase, Callback, PhaseContext, CosineLRCallback
+from super_gradients.training.utils.callbacks import TestLRCallback, LRCallbackBase, Phase, Callback, PhaseContext, CosineLRScheduler
 
 
 class CollectLRCallback(Callback):
@@ -58,10 +58,10 @@ def test_lr_warmup(self):
             "max_epochs": 5,
             "lr_updates": [],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 3,
             "initial_lr": 1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
@@ -71,7 +71,7 @@ def test_lr_warmup(self):
             "greater_metric_to_watch_is_better": True,
             "ema": False,
             "phase_callbacks": phase_callbacks,
-            "warmup_mode": "linear_epoch_step",
+            "warmup_mode": "LinearEpochLRWarmup",
         }
 
         expected_lrs = [0.25, 0.5, 0.75, 1.0, 1.0]
@@ -94,10 +94,10 @@ def test_lr_warmup_with_lr_scheduling(self):
         train_params = {
             "max_epochs": 5,
             "cosine_final_lr_ratio": 0.2,
-            "lr_mode": "cosine",
+            "lr_mode": "CosineLRScheduler",
             "lr_warmup_epochs": 3,
             "initial_lr": 1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
@@ -107,7 +107,7 @@ def test_lr_warmup_with_lr_scheduling(self):
             "greater_metric_to_watch_is_better": True,
             "ema": False,
             "phase_callbacks": phase_callbacks,
-            "warmup_mode": "linear_epoch_step",
+            "warmup_mode": "LinearEpochLRWarmup",
         }
 
         expected_lrs = [0.25, 0.5, 0.75, 0.9236067977499791, 0.4763932022500211]
@@ -137,13 +137,13 @@ def test_warmup_linear_batch_step(self):
 
         train_params = {
             "max_epochs": max_epochs,
-            "lr_mode": "cosine",
+            "lr_mode": "CosineLRScheduler",
             "cosine_final_lr_ratio": cosine_final_lr_ratio,
             "warmup_initial_lr": warmup_initial_lr,
-            "warmup_mode": "linear_batch_step",
+            "warmup_mode": "LinearBatchLRWarmup",
             "lr_warmup_steps": lr_warmup_steps,
             "initial_lr": 1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
@@ -161,7 +161,7 @@ def test_warmup_linear_batch_step(self):
         expected_warmup_lrs = np.linspace(warmup_initial_lr, initial_lr, lr_warmup_steps).tolist()
         total_steps = max_epochs * len(train_loader) - lr_warmup_steps
 
-        expected_cosine_lrs = CosineLRCallback.compute_learning_rate(
+        expected_cosine_lrs = CosineLRScheduler.compute_learning_rate(
             step=np.arange(0, total_steps), total_steps=total_steps, initial_lr=initial_lr, final_lr_ratio=cosine_final_lr_ratio
         )
 
@@ -186,11 +186,11 @@ def test_warmup_linear_epoch_step(self):
             "max_epochs": 5,
             "lr_updates": [],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 3,
             "initial_lr": 1,
             "warmup_initial_lr": 4.0,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
@@ -200,7 +200,7 @@ def test_warmup_linear_epoch_step(self):
             "greater_metric_to_watch_is_better": True,
             "ema": False,
             "phase_callbacks": [collect_lr_callback],
-            "warmup_mode": "linear_epoch_step",
+            "warmup_mode": "LinearEpochLRWarmup",
         }
 
         expected_lrs = [4.0, 3.0, 2.0, 1.0, 1.0]
@@ -224,9 +224,9 @@ def test_custom_lr_warmup(self):
             "max_epochs": 5,
             "lr_updates": [],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 3,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
diff --git a/tests/unit_tests/max_batches_loop_break_test.py b/tests/unit_tests/max_batches_loop_break_test.py
index 075b5a590b..bbaa483e09 100644
--- a/tests/unit_tests/max_batches_loop_break_test.py
+++ b/tests/unit_tests/max_batches_loop_break_test.py
@@ -23,10 +23,10 @@ def test_max_train_batches_loop_break(self):
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
@@ -58,10 +58,10 @@ def test_max_valid_batches_loop_break(self):
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
diff --git a/tests/unit_tests/optimizer_params_override_test.py b/tests/unit_tests/optimizer_params_override_test.py
index a3bcf9789c..f0b250b160 100644
--- a/tests/unit_tests/optimizer_params_override_test.py
+++ b/tests/unit_tests/optimizer_params_override_test.py
@@ -16,10 +16,10 @@ def test_optimizer_params_partial_override(self):
             "max_epochs": 1,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"momentum": 0.9},
@@ -45,10 +45,10 @@ def test_optimizer_params_full_override(self):
             "max_epochs": 1,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "zero_weight_decay_on_bias_and_bn": True,
diff --git a/tests/unit_tests/phase_context_test.py b/tests/unit_tests/phase_context_test.py
index a9d37f7f6f..5fb20101c4 100644
--- a/tests/unit_tests/phase_context_test.py
+++ b/tests/unit_tests/phase_context_test.py
@@ -28,10 +28,10 @@ def context_information_in_train_test(self):
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
diff --git a/tests/unit_tests/preprocessing_unit_test.py b/tests/unit_tests/preprocessing_unit_test.py
index 9416f309ba..4c1d20f805 100644
--- a/tests/unit_tests/preprocessing_unit_test.py
+++ b/tests/unit_tests/preprocessing_unit_test.py
@@ -97,12 +97,12 @@ def test_setting_preprocessing_params_from_validation_set(self):
 
         detection_train_params_yolox = {
             "max_epochs": 1,
-            "lr_mode": "cosine",
+            "lr_mode": "CosineLRScheduler",
             "cosine_final_lr_ratio": 0.05,
             "warmup_bias_lr": 0.0,
             "warmup_momentum": 0.9,
             "initial_lr": 0.02,
-            "loss": "yolox_loss",
+            "loss": "YoloXDetectionLoss",
             "criterion_params": {"strides": [8, 16, 32], "num_classes": 80},  # output strides of all yolo outputs
             "train_metrics_list": [],
             "valid_metrics_list": [DetectionMetrics(post_prediction_callback=YoloXPostPredictionCallback(), normalize_targets=True, num_cls=80)],
@@ -168,12 +168,12 @@ def test_setting_preprocessing_params_from_checkpoint(self):
 
         detection_train_params_yolox = {
             "max_epochs": 1,
-            "lr_mode": "cosine",
+            "lr_mode": "CosineLRScheduler",
             "cosine_final_lr_ratio": 0.05,
             "warmup_bias_lr": 0.0,
             "warmup_momentum": 0.9,
             "initial_lr": 0.02,
-            "loss": "yolox_loss",
+            "loss": "YoloXDetectionLoss",
             "criterion_params": {"strides": [8, 16, 32], "num_classes": 80},  # output strides of all yolo outputs
             "train_metrics_list": [],
             "valid_metrics_list": [DetectionMetrics(post_prediction_callback=YoloXPostPredictionCallback(), normalize_targets=True, num_cls=80)],
diff --git a/tests/unit_tests/resume_training_test.py b/tests/unit_tests/resume_training_test.py
index 0c3bf69abb..6c8bc0b465 100644
--- a/tests/unit_tests/resume_training_test.py
+++ b/tests/unit_tests/resume_training_test.py
@@ -31,10 +31,10 @@ def test_resume_training(self):
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
@@ -77,10 +77,10 @@ def test_resume_run_id_training(self):
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
@@ -142,10 +142,10 @@ def test_resume_external_training(self):
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
@@ -190,10 +190,10 @@ def test_resume_external_training_same_dir(self):
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
diff --git a/tests/unit_tests/save_ckpt_test.py b/tests/unit_tests/save_ckpt_test.py
index dcb5208744..11ae820467 100644
--- a/tests/unit_tests/save_ckpt_test.py
+++ b/tests/unit_tests/save_ckpt_test.py
@@ -13,15 +13,15 @@ def setUp(self):
             "max_epochs": 4,
             "lr_decay_factor": 0.1,
             "lr_updates": [4],
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
             "save_ckpt_epoch_list": [1, 3],
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "train_metrics_list": [Accuracy(), Top5()],
             "valid_metrics_list": [Accuracy(), Top5()],
             "metric_to_watch": "Accuracy",
diff --git a/tests/unit_tests/train_after_test_test.py b/tests/unit_tests/train_after_test_test.py
index 870fa072bf..d0a7ec085e 100644
--- a/tests/unit_tests/train_after_test_test.py
+++ b/tests/unit_tests/train_after_test_test.py
@@ -20,7 +20,7 @@ def setUp(self) -> None:
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
             "loss": torch.nn.CrossEntropyLoss(),
diff --git a/tests/unit_tests/train_logging_test.py b/tests/unit_tests/train_logging_test.py
index 759af58988..5fbb16a539 100644
--- a/tests/unit_tests/train_logging_test.py
+++ b/tests/unit_tests/train_logging_test.py
@@ -19,10 +19,10 @@ def test_train_logging(self):
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
diff --git a/tests/unit_tests/train_with_intialized_param_args_test.py b/tests/unit_tests/train_with_intialized_param_args_test.py
index d1ed21f175..d1dcefbd22 100644
--- a/tests/unit_tests/train_with_intialized_param_args_test.py
+++ b/tests/unit_tests/train_with_intialized_param_args_test.py
@@ -28,7 +28,7 @@ def test_train_with_external_criterion(self):
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
             "loss": torch.nn.CrossEntropyLoss(),
@@ -52,10 +52,10 @@ def test_train_with_external_optimizer(self):
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": optimizer,
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
@@ -81,7 +81,7 @@ def test_train_with_external_scheduler(self):
             "phase_callbacks": phase_callbacks,
             "lr_warmup_epochs": 0,
             "initial_lr": lr,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": optimizer,
             "criterion_params": {},
             "train_metrics_list": [Accuracy(), Top5()],
@@ -103,7 +103,7 @@ def test_train_with_external_scheduler_class(self):
             "max_epochs": 2,
             "lr_warmup_epochs": 0,
             "initial_lr": 0.3,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": optimizer,
             "criterion_params": {},
             "train_metrics_list": [Accuracy(), Top5()],
@@ -128,7 +128,7 @@ def test_train_with_reduce_on_plateau(self):
             "phase_callbacks": phase_callbacks,
             "lr_warmup_epochs": 0,
             "initial_lr": lr,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": optimizer,
             "criterion_params": {},
             "train_metrics_list": [Accuracy(), Top5()],
@@ -148,10 +148,10 @@ def test_train_with_external_metric(self):
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
@@ -178,10 +178,10 @@ def test_train_with_external_dataloaders(self):
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
diff --git a/tests/unit_tests/train_with_precise_bn_test.py b/tests/unit_tests/train_with_precise_bn_test.py
index 7a2eff2a99..a67d87bb40 100644
--- a/tests/unit_tests/train_with_precise_bn_test.py
+++ b/tests/unit_tests/train_with_precise_bn_test.py
@@ -18,10 +18,10 @@ def test_train_with_precise_bn_explicit_size(self):
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
@@ -47,10 +47,10 @@ def test_train_with_precise_bn_implicit_size(self):
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
diff --git a/tests/unit_tests/training_params_factory_test.py b/tests/unit_tests/training_params_factory_test.py
index b574cce8a2..5e30984841 100644
--- a/tests/unit_tests/training_params_factory_test.py
+++ b/tests/unit_tests/training_params_factory_test.py
@@ -5,12 +5,12 @@
 class TrainingParamsTest(unittest.TestCase):
     def test_get_train_params(self):
         train_params = training_hyperparams.coco2017_yolox_train_params()
-        self.assertTrue(train_params["loss"] == "yolox_loss")
+        self.assertTrue(train_params["loss"] == "YoloXDetectionLoss")
         self.assertTrue(train_params["max_epochs"] == 300)
 
     def test_get_train_params_with_overrides(self):
         train_params = training_hyperparams.coco2017_yolox_train_params(overriding_params={"max_epochs": 5})
-        self.assertTrue(train_params["loss"] == "yolox_loss")
+        self.assertTrue(train_params["loss"] == "YoloXDetectionLoss")
         self.assertTrue(train_params["max_epochs"] == 5)
 
 
diff --git a/tests/unit_tests/update_param_groups_unit_test.py b/tests/unit_tests/update_param_groups_unit_test.py
index f0c85c71a2..e4edd4ca02 100644
--- a/tests/unit_tests/update_param_groups_unit_test.py
+++ b/tests/unit_tests/update_param_groups_unit_test.py
@@ -34,11 +34,11 @@ def test_lr_scheduling_with_update_param_groups(self):
 
         train_params = {
             "max_epochs": 3,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_updates": [0, 1, 2],
             "initial_lr": 0.1,
             "lr_decay_factor": 1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},
diff --git a/tests/unit_tests/vit_unit_test.py b/tests/unit_tests/vit_unit_test.py
index a943671abb..b9a3527761 100644
--- a/tests/unit_tests/vit_unit_test.py
+++ b/tests/unit_tests/vit_unit_test.py
@@ -15,10 +15,10 @@ def setUp(self):
             "max_epochs": 2,
             "lr_updates": [1],
             "lr_decay_factor": 0.1,
-            "lr_mode": "step",
+            "lr_mode": "StepLRScheduler",
             "lr_warmup_epochs": 0,
             "initial_lr": 0.1,
-            "loss": "cross_entropy",
+            "loss": "CrossEntropyLoss",
             "optimizer": "SGD",
             "criterion_params": {},
             "optimizer_params": {"weight_decay": 1e-4, "momentum": 0.9},