Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/sg 850 support torch 2.0 #944

Merged
merged 12 commits into from
May 8, 2023
30 changes: 15 additions & 15 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ jobs:
command: |
python3 -m venv venv
. venv/bin/activate
python3 -m pip install pip==22.0.4
python3 -m pip install pip==23.1.2
cat requirements.txt | cut -f1 -d"#" | xargs -n 1 -L 1 pip install --progress-bar off
- run:
name: edit package version
Expand Down Expand Up @@ -453,7 +453,7 @@ jobs:
python3.8 -m pip install --upgrade setuptools pip wheel
python3.8 -m pip install -r requirements.txt
python3.8 -m pip install .
python3.8 -m pip install torch==1.12.0+cu116 torchvision==0.13.0+cu116 torchaudio==0.12.0 --extra-index-url https://download.pytorch.org/whl/cu116
python3.8 -m pip install torch torchvision torchaudio
python3.8 src/super_gradients/examples/train_from_recipe_example/train_from_recipe.py --config-name=coco2017_pose_dekr_w32_no_dc experiment_name=shortened_coco2017_pose_dekr_w32_ap_test batch_size=4 val_batch_size=8 epochs=1 training_hyperparams.lr_warmup_steps=0 training_hyperparams.average_best_models=False training_hyperparams.max_train_batches=1000 training_hyperparams.max_valid_batches=100 multi_gpu=DDP num_gpus=4
python3.8 src/super_gradients/examples/train_from_recipe_example/train_from_recipe.py --config-name=cifar10_resnet experiment_name=shortened_cifar10_resnet_accuracy_test epochs=100 training_hyperparams.average_best_models=False multi_gpu=DDP num_gpus=4
python3.8 src/super_gradients/examples/convert_recipe_example/convert_recipe_example.py --config-name=cifar10_conversion_params experiment_name=shortened_cifar10_resnet_accuracy_test
Expand Down Expand Up @@ -491,7 +491,7 @@ jobs:
python3.8 -m pip install --upgrade setuptools pip wheel
python3.8 -m pip install -r requirements.txt
python3.8 -m pip install .
python3.8 -m pip install torch==1.12.0+cu116 torchvision==0.13.0+cu116 torchaudio==0.12.0 --extra-index-url https://download.pytorch.org/whl/cu116
python3.8 -m pip install torch torchvision torchaudio
python3.8 src/super_gradients/examples/train_from_recipe_example/train_from_recipe.py --config-name=imagenet_regnetY architecture=regnetY600 batch_size=8 val_batch_size=16 epochs=1 training_hyperparams.average_best_models=False training_hyperparams.max_train_batches=100 training_hyperparams.max_valid_batches=100 multi_gpu=DDP num_gpus=4 dataset_params.train_dataset_params.root=/data/Imagenet/train dataset_params.val_dataset_params.root=/data/Imagenet/val
python3.8 src/super_gradients/examples/train_from_recipe_example/train_from_recipe.py --config-name=imagenet_regnetY architecture=regnetY800 batch_size=8 val_batch_size=16 epochs=1 training_hyperparams.average_best_models=False training_hyperparams.max_train_batches=100 training_hyperparams.max_valid_batches=100 multi_gpu=DDP num_gpus=4 dataset_params.train_dataset_params.root=/data/Imagenet/train dataset_params.val_dataset_params.root=/data/Imagenet/val
python3.8 src/super_gradients/examples/train_from_recipe_example/train_from_recipe.py --config-name=imagenet_repvgg batch_size=8 val_batch_size=16 epochs=1 training_hyperparams.average_best_models=False training_hyperparams.max_train_batches=100 training_hyperparams.max_valid_batches=100 multi_gpu=DDP num_gpus=4 dataset_params.train_dataset_params.root=/data/Imagenet/train dataset_params.val_dataset_params.root=/data/Imagenet/val
Expand Down Expand Up @@ -527,7 +527,7 @@ jobs:
python3.8 -m pip install --upgrade setuptools pip wheel
python3.8 -m pip install -r requirements.txt
python3.8 -m pip install .
python3.8 -m pip install torch==1.12.0+cu116 torchvision==0.13.0+cu116 torchaudio==0.12.0 --extra-index-url https://download.pytorch.org/whl/cu116
python3.8 -m pip install torch torchvision torchaudio
python3.8 src/super_gradients/examples/train_from_recipe_example/train_from_recipe.py --config-name=imagenet_efficientnet batch_size=8 val_batch_size=16 epochs=1 training_hyperparams.average_best_models=False training_hyperparams.max_train_batches=100 training_hyperparams.max_valid_batches=100 multi_gpu=DDP num_gpus=4 dataset_params.train_dataset_params.root=/data/Imagenet/train dataset_params.val_dataset_params.root=/data/Imagenet/val
python3.8 src/super_gradients/examples/train_from_recipe_example/train_from_recipe.py --config-name=imagenet_mobilenetv2 batch_size=8 val_batch_size=16 epochs=1 training_hyperparams.average_best_models=False training_hyperparams.max_train_batches=100 training_hyperparams.max_valid_batches=100 multi_gpu=DDP num_gpus=4 dataset_params.train_dataset_params.root=/data/Imagenet/train dataset_params.val_dataset_params.root=/data/Imagenet/val
python3.8 src/super_gradients/examples/train_from_recipe_example/train_from_recipe.py --config-name=imagenet_mobilenetv3_large batch_size=8 val_batch_size=16 epochs=1 training_hyperparams.average_best_models=False training_hyperparams.max_train_batches=100 training_hyperparams.max_valid_batches=100 multi_gpu=DDP num_gpus=4 dataset_params.train_dataset_params.root=/data/Imagenet/train dataset_params.val_dataset_params.root=/data/Imagenet/val
Expand Down Expand Up @@ -563,7 +563,7 @@ jobs:
python3.8 -m pip install --upgrade setuptools pip wheel
python3.8 -m pip install -r requirements.txt
python3.8 -m pip install .
python3.8 -m pip install torch==1.12.0+cu116 torchvision==0.13.0+cu116 torchaudio==0.12.0 --extra-index-url https://download.pytorch.org/whl/cu116
python3.8 -m pip install torch torchvision torchaudio
wget -O $(pwd)/checkpoints/ddrnet23_slim_bb_imagenet.pth https://deci-pretrained-models.s3.amazonaws.com/ddrnet/imagenet_pt_backbones/ddrnet23_slim_bb_imagenet.pth
wget -O $(pwd)/checkpoints/ddrnet23_bb_imagenet.pth https://deci-pretrained-models.s3.amazonaws.com/ddrnet/imagenet_pt_backbones/ddrnet23_bb_imagenet.pth
wget -O $(pwd)/checkpoints/ddrnet39_imagenet_pretrained.pth https://deci-pretrained-models.s3.amazonaws.com/ddrnet/imagenet_pt_backbones/ddrnet39_bb_imagenet.pth
Expand Down Expand Up @@ -609,7 +609,7 @@ jobs:
python3.8 -m pip install --upgrade setuptools pip wheel
python3.8 -m pip install -r requirements.txt
python3.8 -m pip install .
python3.8 -m pip install torch==1.12.0+cu116 torchvision==0.13.0+cu116 torchaudio==0.12.0 --extra-index-url https://download.pytorch.org/whl/cu116
python3.8 -m pip install torch torchvision torchaudio
python3.8 src/super_gradients/examples/train_from_recipe_example/train_from_recipe.py --config-name=coco2017_ssd_lite_mobilenet_v2 batch_size=8 val_batch_size=16 epochs=1 training_hyperparams.average_best_models=False training_hyperparams.max_train_batches=100 training_hyperparams.max_valid_batches=100 multi_gpu=DDP num_gpus=4
python3.8 src/super_gradients/examples/train_from_recipe_example/train_from_recipe.py --config-name=coco2017_yolox architecture=yolox_n batch_size=8 val_batch_size=16 epochs=1 training_hyperparams.average_best_models=False training_hyperparams.max_train_batches=100 training_hyperparams.max_valid_batches=100 multi_gpu=DDP num_gpus=4
python3.8 src/super_gradients/examples/train_from_recipe_example/train_from_recipe.py --config-name=coco2017_yolox architecture=yolox_t batch_size=8 val_batch_size=16 epochs=1 training_hyperparams.average_best_models=False training_hyperparams.max_train_batches=100 training_hyperparams.max_valid_batches=100 multi_gpu=DDP num_gpus=4
Expand Down Expand Up @@ -646,7 +646,7 @@ jobs:
python3.8 -m pip install --upgrade setuptools pip wheel
python3.8 -m pip install -r requirements.txt
python3.8 -m pip install .
python3.8 -m pip install torch==1.12.0+cu116 torchvision==0.13.0+cu116 torchaudio==0.12.0 --extra-index-url https://download.pytorch.org/whl/cu116
python3.8 -m pip install torch torchvision torchaudio
python3.8 src/super_gradients/examples/train_from_recipe_example/train_from_recipe.py --config-name=coco2017_pose_dekr_w32_no_dc experiment_name=shortened_coco2017_pose_dekr_w32_ap_test batch_size=4 val_batch_size=8 epochs=1 training_hyperparams.lr_warmup_steps=0 training_hyperparams.average_best_models=False training_hyperparams.max_train_batches=1000 training_hyperparams.max_valid_batches=100 multi_gpu=DDP num_gpus=4

- run:
Expand Down Expand Up @@ -717,8 +717,8 @@ workflows:
repo_name: "deci-packages"
<<: *release_tag_filter
- build:
name: "build3.7"
py_version: "3.7"
name: "build3.8"
py_version: "3.8"
package_name: "super-gradients"
requires:
- deci-common/persist_version_info
Expand All @@ -737,9 +737,9 @@ workflows:
- recipe_accuracy_tests:
<<: *release_tag_filter
- release_version:
py_version: "3.7"
py_version: "3.8"
requires:
- "build3.7"
- "build3.8"
- recipe_accuracy_tests
- recipe_sanity_tests_classification_pt1
- recipe_sanity_tests_classification_pt2
Expand Down Expand Up @@ -770,8 +770,8 @@ workflows:
version_override: ""

- build:
name: "build3.7"
py_version: "3.7"
name: "build3.8"
py_version: "3.8"
package_name: "super-gradients"
requires:
- deci-common/persist_version_info
Expand All @@ -781,9 +781,9 @@ workflows:
<<: *release_candidate_filter

- release_candidate: # happens on merge
py_version: "3.7"
py_version: "3.8"
requires:
- "build3.7"
- "build3.8"
- deci-common/codeartifact_login
<<: *release_candidate_filter

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
torch>=1.9.0,<1.14
torch>=1.9.0
tqdm>=4.57.0
boto3>=1.17.15
jsonschema>=3.2.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ ckpt_root_dir:
train_dataloader: coco2017_pose_train
val_dataloader: coco2017_pose_val

arch_params:
shaydeci marked this conversation as resolved.
Show resolved Hide resolved
num_classes: ${dataset_params.num_joints}

checkpoint_params:
# Original training recipe uses pretrained weights for HRNet on ImageNet.
# You will need to download the pretrained weights from the original repo and place
Expand Down
2 changes: 1 addition & 1 deletion tests/unit_tests/detection_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def setUp(self):
def test_visualization(self):

valid_loader = coco2017_val(dataloader_params={"batch_size": 16})
trainer = Trainer("visualization_test", device=self.device)
trainer = Trainer("visualization_test")
shaydeci marked this conversation as resolved.
Show resolved Hide resolved
post_prediction_callback = YoloPostPredictionCallback()

# Simulate one iteration of validation subset
Expand Down
3 changes: 2 additions & 1 deletion tests/unit_tests/lr_cooldown_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from super_gradients.training.metrics import Accuracy
from super_gradients.training.models import LeNet
from super_gradients.training.utils.callbacks import TestLRCallback
import numpy as np


class LRCooldownTest(unittest.TestCase):
Expand Down Expand Up @@ -44,4 +45,4 @@ def test_lr_cooldown_with_lr_scheduling(self):

# ALTHOUGH NOT SEEN IN HERE, THE 4TH EPOCH USES LR=1, SO THIS IS THE EXPECTED LIST AS WE COLLECT
# THE LRS AFTER THE UPDATE
self.assertListEqual(lrs, expected_lrs)
np.testing.assert_allclose(np.array(lrs), np.array(expected_lrs), rtol=1e-6)
4 changes: 2 additions & 2 deletions tests/unit_tests/lr_warmup_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def test_lr_warmup_with_lr_scheduling(self):

# ALTHOUGH NOT SEEN IN HERE, THE 4TH EPOCH USES LR=1, SO THIS IS THE EXPECTED LIST AS WE COLLECT
# THE LRS AFTER THE UPDATE
self.assertListEqual(lrs, expected_lrs)
np.testing.assert_allclose(np.array(lrs), np.array(expected_lrs), rtol=1e-6)

def test_warmup_linear_batch_step(self):
# Define model
Expand Down Expand Up @@ -248,7 +248,7 @@ def test_custom_lr_warmup(self):
train_loader=classification_test_dataloader(batch_size=4),
valid_loader=classification_test_dataloader(batch_size=4),
)
self.assertListEqual(lrs, expected_lrs)
np.testing.assert_allclose(np.array(lrs), np.array(expected_lrs), rtol=1e-6)


if __name__ == "__main__":
Expand Down