Skip to content

Commit

Permalink
Bump torch to 2.1.1 version (mosaicml#2717)
Browse files Browse the repository at this point in the history
  • Loading branch information
j316chuck committed Nov 30, 2023
1 parent 66f412d commit 1dccb14
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 28 deletions.
3 changes: 0 additions & 3 deletions composer/trainer/mosaic_fsdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,3 @@ def patch_pytorch():

# Monkey patch partial state dict handling
_state_dict_utils._sharded_pre_load_state_dict_hook = (_sharded_pre_load_state_dict_hook)

elif version.parse(torch.__version__) >= version.parse('2.1.1'):
raise NotImplementedError(f'FullyShardedDataParallel is not supported for torch >= 2.2.0')
6 changes: 3 additions & 3 deletions docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ To install composer, once inside the image, run `pip install mosaicml`.
| Linux Distro | Flavor | PyTorch Version | CUDA Version | Python Version | Docker Tags |
|----------------|----------|-------------------|---------------------|------------------|------------------------------------------------------------------------------------------|
| Ubuntu 20.04 | Base | 2.2.0 | 12.1.0 (Infiniband) | 3.10 | `mosaicml/pytorch:2.2.0_cu121-nightly20231024-python3.10-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.1.0 | 12.1.0 (Infiniband) | 3.10 | `mosaicml/pytorch:latest`, `mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.1.0 | 12.1.0 (EFA) | 3.10 | `mosaicml/pytorch:latest-aws`, `mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04-aws` |
| Ubuntu 20.04 | Base | 2.1.0 | cpu | 3.10 | `mosaicml/pytorch:latest_cpu`, `mosaicml/pytorch:2.1.0_cpu-python3.10-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.1.1 | 12.1.0 (Infiniband) | 3.10 | `mosaicml/pytorch:latest`, `mosaicml/pytorch:2.1.1_cu121-python3.10-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.1.1 | 12.1.0 (EFA) | 3.10 | `mosaicml/pytorch:latest-aws`, `mosaicml/pytorch:2.1.1_cu121-python3.10-ubuntu20.04-aws` |
| Ubuntu 20.04 | Base | 2.1.1 | cpu | 3.10 | `mosaicml/pytorch:latest_cpu`, `mosaicml/pytorch:2.1.1_cpu-python3.10-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.0.1 | 11.8.0 (Infiniband) | 3.10 | `mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04` |
| Ubuntu 20.04 | Base | 2.0.1 | 11.8.0 (EFA) | 3.10 | `mosaicml/pytorch:2.0.1_cu118-python3.10-ubuntu20.04-aws` |
| Ubuntu 20.04 | Base | 2.0.1 | cpu | 3.10 | `mosaicml/pytorch:2.0.1_cpu-python3.10-ubuntu20.04` |
Expand Down
32 changes: 16 additions & 16 deletions docker/build_matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
- AWS_OFI_NCCL_VERSION: ''
BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04
CUDA_VERSION: 12.1.0
IMAGE_NAME: torch-2-1-0-cu121
IMAGE_NAME: torch-2-1-1-cu121
MOFED_VERSION: 5.5-1.0.3.2
NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471
brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471
Expand All @@ -21,16 +21,16 @@
PYTHON_VERSION: '3.10'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
PYTORCH_VERSION: 2.1.0
PYTORCH_VERSION: 2.1.1
TAGS:
- mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04
- mosaicml/pytorch:2.1.1_cu121-python3.10-ubuntu20.04
- mosaicml/pytorch:latest
TARGET: pytorch_stage
TORCHVISION_VERSION: 0.16.0
TORCHVISION_VERSION: 0.16.1
- AWS_OFI_NCCL_VERSION: v1.7.3-aws
BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04
CUDA_VERSION: 12.1.0
IMAGE_NAME: torch-2-1-0-cu121-aws
IMAGE_NAME: torch-2-1-1-cu121-aws
MOFED_VERSION: ''
NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471
brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471
Expand All @@ -49,27 +49,27 @@
PYTHON_VERSION: '3.10'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
PYTORCH_VERSION: 2.1.0
PYTORCH_VERSION: 2.1.1
TAGS:
- mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04-aws
- mosaicml/pytorch:2.1.1_cu121-python3.10-ubuntu20.04-aws
- mosaicml/pytorch:latest-aws
TARGET: pytorch_stage
TORCHVISION_VERSION: 0.16.0
TORCHVISION_VERSION: 0.16.1
- AWS_OFI_NCCL_VERSION: ''
BASE_IMAGE: ubuntu:20.04
CUDA_VERSION: ''
IMAGE_NAME: torch-2-1-0-cpu
IMAGE_NAME: torch-2-1-1-cpu
MOFED_VERSION: ''
NVIDIA_REQUIRE_CUDA_OVERRIDE: ''
PYTHON_VERSION: '3.10'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
PYTORCH_VERSION: 2.1.0
PYTORCH_VERSION: 2.1.1
TAGS:
- mosaicml/pytorch:2.1.0_cpu-python3.10-ubuntu20.04
- mosaicml/pytorch:2.1.1_cpu-python3.10-ubuntu20.04
- mosaicml/pytorch:latest_cpu
TARGET: pytorch_stage
TORCHVISION_VERSION: 0.16.0
TORCHVISION_VERSION: 0.16.1
- AWS_OFI_NCCL_VERSION: ''
BASE_IMAGE: nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
CUDA_VERSION: 11.8.0
Expand Down Expand Up @@ -214,12 +214,12 @@
PYTHON_VERSION: '3.10'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
PYTORCH_VERSION: 2.1.0
PYTORCH_VERSION: 2.1.1
TAGS:
- mosaicml/composer:0.17.1
- mosaicml/composer:latest
TARGET: composer_stage
TORCHVISION_VERSION: 0.16.0
TORCHVISION_VERSION: 0.16.1
- AWS_OFI_NCCL_VERSION: ''
BASE_IMAGE: ubuntu:20.04
COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.17.1
Expand All @@ -230,9 +230,9 @@
PYTHON_VERSION: '3.10'
PYTORCH_NIGHTLY_URL: ''
PYTORCH_NIGHTLY_VERSION: ''
PYTORCH_VERSION: 2.1.0
PYTORCH_VERSION: 2.1.1
TAGS:
- mosaicml/composer:0.17.1_cpu
- mosaicml/composer:latest_cpu
TARGET: composer_stage
TORCHVISION_VERSION: 0.16.0
TORCHVISION_VERSION: 0.16.1
10 changes: 5 additions & 5 deletions docker/generate_build_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@
import yaml

LATEST_PYTHON_VERSION = '3.10'
PRODUCTION_PYTORCH_VERSION = '2.1.0'
PRODUCTION_PYTORCH_VERSION = '2.1.1'


def _get_torchvision_version(pytorch_version: str):
if pytorch_version == '2.1.0':
return '0.16.0'
if pytorch_version == '2.1.1':
return '0.16.1'
if pytorch_version == '2.0.1':
return '0.15.2'
if pytorch_version == '1.13.1':
Expand All @@ -41,7 +41,7 @@ def _get_base_image(cuda_version: str):
def _get_cuda_version(pytorch_version: str, use_cuda: bool):
if not use_cuda:
return ''
if pytorch_version == '2.1.0':
if pytorch_version == '2.1.1':
return '12.1.0'
if pytorch_version == '2.0.1':
return '11.8.0'
Expand Down Expand Up @@ -166,7 +166,7 @@ def _write_table(table_tag: str, table_contents: str):

def _main():
python_versions = ['3.10']
pytorch_versions = ['2.1.0', '2.0.1', '1.13.1']
pytorch_versions = ['2.1.1', '2.0.1', '1.13.1']
cuda_options = [True, False]
stages = ['pytorch_stage']
interconnects = ['mellanox', 'EFA'] # mellanox is default, EFA needed for AWS
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def package_files(prefix: str, directory: str, extension: str):
'torchmetrics>=0.10.0,<1.1',
'torch_optimizer>=0.3.0,<0.4',
'torchvision>=0.13.1,<0.17',
'torch>=1.13.1,<2.1.1',
'torch>=1.13.1,<2.1.2',
'requests>=2.26.0,<3',
'numpy>=1.21.5,<1.27.0',
'psutil>=5.8.0,<6',
Expand Down

0 comments on commit 1dccb14

Please sign in to comment.