diff --git a/.circleci/config.yml b/.circleci/config.yml index d67daa074..6b136d285 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -87,7 +87,7 @@ install_dep_1_8_1: &install_dep_1_8_1 if [ -f /home/circleci/venv/check_version.py ]; then python /home/circleci/venv/check_version.py torch eq 1.8 && exit 0; fi # start installing pip install --progress-bar off torch==1.8.1+cu102 torchvision==0.9.1+cu102 -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html - pip install --progress-bar off -r requirements-test.txt + pip install --progress-bar off -r requirements-dev.txt pip install --progress-bar off -r requirements-benchmarks.txt python -c 'import torch; print("Torch version:", torch.__version__)' python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "8"], "wrong torch version"' @@ -103,7 +103,7 @@ install_dep_1_10_0: &install_dep_1_10_0 if [ -f /home/circleci/venv/check_version.py ]; then python /home/circleci/venv/check_version.py torch eq 1.10 && exit 0; fi # start installing pip install --progress-bar off torch==1.10.0+cu111 torchvision==0.11.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html - pip install --progress-bar off -r requirements-test.txt + pip install --progress-bar off -r requirements-dev.txt pip install --progress-bar off -r requirements-benchmarks.txt python -c 'import torch; print("Torch version:", torch.__version__)' python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "10"], "wrong torch version"' @@ -118,7 +118,7 @@ install_dep_pytorch_nightly: &install_dep_pytorch_nightly if [ -f /home/circleci/venv/check_version.py ]; then python /home/circleci/venv/check_version.py torch eq 1.10 && exit 0; fi # start installing pip install --progress-bar off --pre torch==1.11.0.dev20211101+cu111 torchvision==0.12.0.dev20211101+cu111 -f https://download.pytorch.org/whl/nightly/cu111/torch_nightly.html - pip install --progress-bar off -r requirements-test.txt + pip install --progress-bar off -r requirements-dev.txt pip install --progress-bar off -r requirements-benchmarks.txt python -c 'import torch; print("Torch version:", torch.__version__)' python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "11"], "wrong torch version"' @@ -247,14 +247,14 @@ jobs: # Cache the venv directory that contains dependencies - restore_cache: keys: - - cache-key-cpu-py37-torch-1-10-0-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-test.txt"}} + - cache-key-cpu-py37-torch-1-10-0-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}} - <<: *install_dep_1_10_0 - save_cache: paths: - ~/venv - key: cache-key-cpu-py37-torch-1-10-0-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-test.txt"}} + key: cache-key-cpu-py37-torch-1-10-0-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}} - <<: *install_repo - <<: *run_unittests @@ -276,13 +276,13 @@ jobs: # Cache the venv directory that contains dependencies - restore_cache: keys: - - cache-key-cpu-py38-torch-1-10-0-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-test.txt"}} + - cache-key-cpu-py38-torch-1-10-0-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}} - <<: *install_dep_1_10_0 - save_cache: paths: - ~/venv - key: cache-key-cpu-py38-torch-1-10-0-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-test.txt"}} + key: cache-key-cpu-py38-torch-1-10-0-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}} - <<: *install_repo - <<: *run_unittests @@ -304,14 +304,14 @@ jobs: # Cache the venv directory that contains dependencies - restore_cache: keys: - - cache-key-cpu-py39-torch-1-10-0-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-test.txt"}} + - cache-key-cpu-py39-torch-1-10-0-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}} - <<: *install_dep_1_10_0 - save_cache: paths: - ~/venv - key: cache-key-cpu-py39-torch-1-10-0-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-test.txt"}} + key: cache-key-cpu-py39-torch-1-10-0-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}} - <<: *install_repo - <<: *run_unittests @@ -344,14 +344,14 @@ jobs: # Cache the venv directory that contains dependencies - restore_cache: keys: - - cache-key-py-3-9-7-gpu-torch-1-8-1-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-test.txt"}} + - cache-key-py-3-9-7-gpu-torch-1-8-1-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}} - <<: *install_dep_1_8_1 - save_cache: paths: - ~/venv - key: cache-key-py-3-9-7-gpu-torch-1-8-1-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-test.txt"}} + key: cache-key-py-3-9-7-gpu-torch-1-8-1-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}} - <<: *install_repo @@ -387,14 +387,14 @@ jobs: # Cache the venv directory that contains dependencies - restore_cache: keys: - - cache-key-py-3-9-7-gpu-torch-1-10-0-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-test.txt"}} + - cache-key-py-3-9-7-gpu-torch-1-10-0-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}} - <<: *install_dep_1_10_0 - save_cache: paths: - ~/venv - key: cache-key-py-3-9-7-gpu-torch-1-10-0-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-test.txt"}} + key: cache-key-py-3-9-7-gpu-torch-1-10-0-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}} - <<: *install_repo @@ -428,14 +428,14 @@ jobs: # Cache the venv directory that contains dependencies - restore_cache: keys: - - cache-key-py-3-9-7-gpu-pytorch-nightly-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-test.txt"}} + - cache-key-py-3-9-7-gpu-pytorch-nightly-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}} - <<: *install_dep_pytorch_nightly - save_cache: paths: - ~/venv - key: cache-key-py-3-9-7-gpu-pytorch-nightly-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-test.txt"}} + key: cache-key-py-3-9-7-gpu-pytorch-nightly-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}} - <<: *install_repo @@ -463,7 +463,7 @@ jobs: # Cache the venv directory that contains dependencies - restore_cache: keys: - - cache-key-py-3-9-7-benchmarks-torch-1-10-0-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-test.txt"}} + - cache-key-py-3-9-7-benchmarks-torch-1-10-0-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}} # Cache the MNIST directory that contains benchmark data - restore_cache: @@ -475,7 +475,7 @@ jobs: - save_cache: paths: - ~/venv - key: cache-key-py-3-9-7-benchmarks-torch-1-10-0-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-test.txt"}} + key: cache-key-py-3-9-7-benchmarks-torch-1-10-0-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}} - <<: *install_repo @@ -512,7 +512,7 @@ jobs: # Cache the venv directory that contains dependencies - restore_cache: keys: - - cache-key-py-3-9-7-benchmarks-torch-1-10-0-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-test.txt"}} + - cache-key-py-3-9-7-benchmarks-torch-1-10-0-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}} # Cache the MNIST directory that contains benchmark data @@ -525,7 +525,7 @@ jobs: - save_cache: paths: - ~/venv - key: cache-key-py-3-9-7-benchmarks-torch-1-10-0-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-test.txt"}} + key: cache-key-py-3-9-7-benchmarks-torch-1-10-0-cuda-11-2-{{.Environment.CACHE_VERSION }}-{{checksum "setup.py"}}-{{checksum "requirements-dev.txt"}} - <<: *install_repo diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fdc976286..f58e48c25 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,5 @@ # If you change the versions below, please make sure they are in-sync -# with requirements-test.txt +# with requirements-dev.txt exclude: 'build|stubs' diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f6e21847..e979254e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.4.3] - TBD + +### Added +- Sharded Grad Scaler works with cpu offload in mixed and full precision. [#831] + +### Changed +- Cleanup: Moving forward we would be testing all of our code with Python 3.9.7, CUDA 11.2 and the following three versions of PyTorch [#847]: + - the most recent stable version + - the most recent LTS version + - a recent nightly build + ## [0.4.2] - 2021-11-08 ### Fixed - FSDP: Fixed an pre-backward hook bug for certain type of models and FSDP config. [#833] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3dc58144a..fb28bf792 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -42,7 +42,7 @@ outlined on that page and do not file a public issue. ~$ python3 -m venv venv2 ~$ source venv2/bin/activate (venv2) ~$ cd git/fairscale/ -(venv2) ~/git/fairscale $ pip3 install -r requirements-test.txt +(venv2) ~/git/fairscale $ pip3 install -r requirements-dev.txt ``` ## Coding Style @@ -57,19 +57,23 @@ outlined on that page and do not file a public issue. and less development overhead in maintaining an importing list. * Please setup pre-commit before opening up your PR. -### Pre-commit +### Pre-Commit (Recommended) + +We use pre-commit to maintain the coding style. Pre-Commit checks are run via Github Actions on every +commit. To install all the relevant libraries and run the pre-commit tests locally, execute the following +commands: ``` pip install -r requirements-dev.txt pre-commit install ``` -After the above, your `git commit` command will automatically trigger pre-commit -checks, which are static code analysis tools we use. +After the above, your `git commit` command will automatically trigger pre-commit checks. -### Run statis analysis by hand (without using pre-commit) +### Running static code analysis manually (Deprecated) -Note that, trailing spaces are not checked by the manual commands below, but they are checked by the pre-commit hooks above. +Note that, trailing spaces are not checked by the manual commands below, but they are checked by the +pre-commit hooks we use above. ``` black . @@ -80,6 +84,13 @@ mypy --ignore-missing-imports --scripts-are-modules --pretty . ## Testing +FairScale code is tested on Python 3.9.7, CUDA 11.2 and the following three PyTorch versions: +- the latest stable version +- the latest LTS version +- a recent nightly release + +See the [README](https://github.com/facebookresearch/fairscale/blob/main/README.md#testing) for the exact version numbers. + ### Unit tests ``` diff --git a/README.md b/README.md index deeed32d6..d822efdbf 100644 --- a/README.md +++ b/README.md @@ -155,9 +155,9 @@ At a high level, we want ML researchers to: ## Testing We use circleci to test FairScale with the following PyTorch versions (with CUDA 11.2): -* the most recent PyTorch stable release -* the most recent PyTorch LTS release -* a recent PyTorch nightly release +* the latest stable release (1.10) +* the latest LTS release (1.8) +* a recent nightly release (1.11.0.dev20211101+cu111) Please create an [issue](https://github.com/facebookresearch/fairscale/issues) if you are having trouble with installation. diff --git a/RELEASE.md b/RELEASE.md index 3a8c7fef7..004437428 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -2,6 +2,7 @@ - Update the CHANGELOG.md - Update "what's new" in README.md +- If needed, update the PyTorch versions in README.md in the Testing section. - Update `fairscale/__init__.py` and `docs/source/conf.py` for the new version number - git commit the change with title like "[chore] 0.3.1 release" - make a tag, like `git tag v0.3.1` diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml deleted file mode 100644 index e5f28184f..000000000 --- a/conda.recipe/meta.yaml +++ /dev/null @@ -1,42 +0,0 @@ -{% set name = "fairscale" %} -{% set version = "0.0.2" %} - -package: - name: "{{ name|lower }}" - version: "{{ version }}" - -source: - git_url: ../ - -build: - number: 0 - script: "{{ PYTHON }} -m pip install . --no-deps --ignore-installed -vv " - -requirements: - host: - - python - - pytorch >=1.4.0 - - run: - - python - - pytorch >=1.4.0 - -test: - imports: - - fairscale - - fairscale.nn - - fairscale.nn.data_parallel - - fairscale.nn.model_parallel - - fairscale.nn.pipe - - fairscale.nn.pipe.balance - - fairscale.nn.pipe.skip - - fairscale.optim - -about: - home: "The package home page" - license: "BSD" - license_family: "BSD" - license_file: "" - summary: "fairscale: A PyTorch library for large-scale and high-performance training." - doc_url: "" - dev_url: "" diff --git a/docs/source/installation_instructions.rst b/docs/source/installation_instructions.rst index a9309ccd7..8cc9f226a 100644 --- a/docs/source/installation_instructions.rst +++ b/docs/source/installation_instructions.rst @@ -6,7 +6,7 @@ from source using the instructions below. ### Requirements -* PyTorch>= 1.7.1 +* PyTorch>= 1.8.1 ### Installing the pip package (stable) diff --git a/requirements-benchmarks.txt b/requirements-benchmarks.txt index 86143778b..12e45bbc2 100644 --- a/requirements-benchmarks.txt +++ b/requirements-benchmarks.txt @@ -1,5 +1,5 @@ # Bring in everything that tests depends on. --r requirements-test.txt +-r requirements-dev.txt # Benchmark dependencies. torchtext == 0.6.0 diff --git a/requirements-dev.txt b/requirements-dev.txt index 5abd436be..0f40783b4 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,30 @@ # Core deps. -r requirements.txt -# For pre-commit hooks. +# Tools for static checking. +# - flake8-annotations is needed to avoid F811 error with overload +# function typing with mypy. +# - if you change versions below, please make sure it is in-sync with +# .pre-commit-config.yaml for pre-commit. +black == 21.10b0 +flake8 == 4.0.1 +flake8-annotations == 2.7.0 +isort == 5.10.1 +mypy == 0.910 pre-commit >= 2.15.0 + +# Tools for unit tests & coverage. +pytest == 5.4.1 +pytest-cov == 2.10.0 +pytest-timeout == 1.4.2 +remote-pdb >= 2.1.0 +parameterized >= 0.8.1 + +# Tools for testing docs +docutils == 0.17 + +# For torch.cuda.list_gpu_processes() +pynvml == 8.0.4 + +# For mypy typing +numpy >= 1.21 diff --git a/requirements-test.txt b/requirements-test.txt deleted file mode 100644 index 9a84ec2ff..000000000 --- a/requirements-test.txt +++ /dev/null @@ -1,29 +0,0 @@ -# Get core deps. --r requirements.txt - -# Tools for static checking. -# - flake8-annotations is needed to avoid F811 error with overload -# function typing with mypy. -# - if you change versions below, please make sure it is in-sync with -# .pre-commit-config.yaml for pre-commit. -black == 21.10b0 -flake8 == 4.0.1 -flake8-annotations == 2.7.0 -isort == 5.10.1 -mypy == 0.910 - -# Tools for unit tests & coverage. -pytest == 5.4.1 -pytest-cov == 2.10.0 -pytest-timeout == 1.4.2 -remote-pdb >= 2.1.0 -parameterized >= 0.8.1 - -# Tools for testing docs -docutils == 0.17 - -# For torch.cuda.list_gpu_processes() -pynvml == 8.0.4 - -# For mypy typing -numpy >= 1.21