repair CI for Win (#2358)

* no cov * no cov * ReduceOp * group * reduce_op.sum * Update sklearns.py * formatting * horovod * Apply suggestions from code review * horovod * horovod * horovod * horovod * ci * print * ci * timeout * timeout * time * fix * distributed cpu * pipes * time * cpu * spawn * spawn * spawn * tp * separate * os * os * npm * Fix load_from_checkpoint() not working with URL on Windows * Update CHANGELOG * Update CHANGELOG.md Co-authored-by: Peter Yu <2057325+yukw777@users.noreply.github.com> * Apply suggestions from code review * fix * fix meta tags creating empty lines * pyright * node * fix httpserver address * drop tutils.default_trainer_options * imports * Better fix for load_from_checkpoint() not working with absolute path on Windows (#2294) * Fix load_from_checkpoint() not working with URL on Windows * Update CHANGELOG * Update CHANGELOG.md Co-authored-by: Peter Yu <2057325+yukw777@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Peter Yu <2057325+yukw777@users.noreply.github.com> * drop duplicate Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> Co-authored-by: airium <airium@outlook.com> Co-authored-by: Peter Yu <2057325+yukw777@users.noreply.github.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: AIRIUM <38249940+airium@users.noreply.github.com>
Lightning-AI · Jun 27, 2020 · f1c9693 · f1c9693
1 parent a5f4578
commit f1c9693
Show file tree

Hide file tree

Showing 50 changed files with 462 additions and 436 deletions.
diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
@@ -33,11 +33,11 @@ jobs:
             os: macOS-10.15
 
     # Timeout: https://stackoverflow.com/a/59076067/4521646
-    timeout-minutes: 15
+    timeout-minutes: 50
     steps:
     - uses: actions/checkout@v2
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v1
+      uses: actions/setup-python@v2
       with:
         python-version: ${{ matrix.python-version }}
 
@@ -122,8 +122,9 @@ jobs:
       run: |
         # tox --sitepackages
         # flake8 .
-        coverage run --source pytorch_lightning -m py.test pytorch_lightning tests -v --junitxml=junit/test-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml
-        coverage report
+        # NOTE: run coverage on tests does not propagare faler status for Win, https://github.com/nedbat/coveragepy/issues/1003
+        python -m pytest pytorch_lightning tests -v --junitxml=junit/test-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml
+        # coverage report
 
     - name: Upload pytest test results
       uses: actions/upload-artifact@master
@@ -139,14 +140,15 @@ jobs:
         python setup.py check --metadata --strict
         python setup.py sdist
         twine check dist/*
+
     #- name: Try install package
     #  if: ! startsWith(matrix.os, 'windows')
     #  run: |
     #    virtualenv vEnv ; source vEnv/bin/activate
     #    pip install --editable . ; cd .. & python -c "import pytorch_lightning ; print(pytorch_lightning.__version__)"
     #    deactivate ; rm -rf vEnv
 
-    - name: Statistics
-      if: success()
-      run: |
-         coverage report
+    #- name: Statistics
+    #  if: success()
+    #  run: |
+    #     coverage report
diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml
@@ -1,4 +1,4 @@
-name: "Check Formatting - Black"
+name: "Check Code Format"
 on:
   # Trigger the workflow on push or pull request,
   # but only for the master branch
@@ -10,18 +10,63 @@ on:
       - master
 
 jobs:
-  check_code_formatting:
+  code-black:
     name: Check code formatting with Black
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
         uses: actions/checkout@v2
-      - name: Set up Python
-        uses: actions/setup-python@v1.1.1
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v2
         with:
           python-version: 3.8
       - name: Install Black
         run: pip install black==19.10b0
       - name: Run Black
         run: echo "LGTM"
         # run black --skip-string-normalization --config=pyproject.toml --check . # TODO, uncomment
+
+  python-types:
+    name: Python static type checking with Pyright
+    runs-on: ubuntu-18.04
+
+    # Timeout: https://stackoverflow.com/a/59076067/4521646
+    timeout-minutes: 15
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+        with:
+          python-version: 3.7
+
+      # Note: This uses an internal pip API and may not always work
+      # https://github.com/actions/cache/blob/master/examples.md#multiple-oss-in-a-workflow
+      - name: Cache pip
+        uses: actions/cache@v1
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ hashFiles('requirements/base.txt') }}-${{ hashFiles('requirements/extra.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-pip-
+
+      - name: Install dependencies
+        run: |
+          # python -m pip install --upgrade --user pip
+          pip install -r requirements/base.txt -U -f https://download.pytorch.org/whl/torch_stable.html -q
+          HOROVOD_BUILD_ARCH_FLAGS="-mfma" pip install -r ./requirements/devel.txt -q
+          # pip install tox coverage
+          python --version ; pip --version ; pip list
+        shell: bash
+
+      - name: Set up node
+        uses: actions/setup-node@v1
+        with:
+          node-version: '12'
+
+      - name: Install pyright
+        run: |
+          npm install pyright
+
+      - name: Run type checking
+        run: |
+          $(npm bin)/pyright --project .pyrightconfig.json
diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml
@@ -16,7 +16,11 @@ jobs:
         python_version: [3.6, 3.7, 3.8]
         pytorch_version: [1.3, 1.4, 1.5]
     steps:
-      - uses: actions/checkout@v2
+      - name: Checkout
+        uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+        with:
+          python-version: 3.7
 
       - name: Publish Master to Docker
         # publish master

diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml
@@ -19,8 +19,7 @@ jobs:
 
     steps:
     - uses: actions/checkout@v2
-    - name: Set up Python 3.7
-      uses: actions/setup-python@v1
+    - uses: actions/setup-python@v2
       with:
         python-version: 3.7
 

diff --git a/.github/workflows/python-type-check.yml b/.github/workflows/python-type-check.yml
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -24,6 +24,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Fixed an issue with forward hooks not being removed after model summary ([#2298](https://github.com/PyTorchLightning/pytorch-lightning/pull/2298))
 
+- Fix for `load_from_checkpoint()` not working with absolute path on Windows ([#2294](https://github.com/PyTorchLightning/pytorch-lightning/pull/2294))
+
 - Fixed an issue how _has_len handles `NotImplementedError` e.g. raised by `torchtext.data.Iterator` ([#2293](https://github.com/PyTorchLightning/pytorch-lightning/pull/2293)), ([#2307](https://github.com/PyTorchLightning/pytorch-lightning/pull/2307))
 
 - Fixed `average_precision` metric ([#2319](https://github.com/PyTorchLightning/pytorch-lightning/pull/2319))
@@ -49,7 +51,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 ### Added
 
 - Added `overfit_batches`, `limit_{val|test}_batches` flags (overfit now uses training set for all three) ([#2213](https://github.com/PyTorchLightning/pytorch-lightning/pull/2213))
-- Added metrics 
+- Added metrics
   * Base classes ([#1326](https://github.com/PyTorchLightning/pytorch-lightning/pull/1326), [#1877](https://github.com/PyTorchLightning/pytorch-lightning/pull/1877))
   * Sklearn metrics classes ([#1327](https://github.com/PyTorchLightning/pytorch-lightning/pull/1327))
   * Native torch metrics ([#1488](https://github.com/PyTorchLightning/pytorch-lightning/pull/1488), [#2062](https://github.com/PyTorchLightning/pytorch-lightning/pull/2062))
@@ -59,7 +61,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Allow dataloaders without sampler field present ([#1907](https://github.com/PyTorchLightning/pytorch-lightning/pull/1907))
 - Added option `save_last` to save the model at the end of every epoch in `ModelCheckpoint` [(#1908)](https://github.com/PyTorchLightning/pytorch-lightning/pull/1908)
 - Early stopping checks `on_validation_end` ([#1458](https://github.com/PyTorchLightning/pytorch-lightning/pull/1458))
-- Attribute `best_model_path` to `ModelCheckpoint` for storing and later retrieving the path to the best saved model file ([#1799](https://github.com/PyTorchLightning/pytorch-lightning/pull/1799)) 
+- Attribute `best_model_path` to `ModelCheckpoint` for storing and later retrieving the path to the best saved model file ([#1799](https://github.com/PyTorchLightning/pytorch-lightning/pull/1799))
 - Speed up single-core TPU training by loading data using `ParallelLoader` ([#2033](https://github.com/PyTorchLightning/pytorch-lightning/pull/2033))
 - Added a model hook `transfer_batch_to_device` that enables moving custom data structures to the target device ([1756](https://github.com/PyTorchLightning/pytorch-lightning/pull/1756))
 - Added [black](https://black.readthedocs.io/en/stable/) formatter for the code with code-checker on pull ([1610](https://github.com/PyTorchLightning/pytorch-lightning/pull/1610))
@@ -74,7 +76,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Allow user to select individual TPU core to train on ([#1729](https://github.com/PyTorchLightning/pytorch-lightning/pull/1729))
 - Removed non-finite values from loss in `LRFinder` ([#1862](https://github.com/PyTorchLightning/pytorch-lightning/pull/1862))
-- Allow passing model hyperparameters as complete kwarg list ([#1896](https://github.com/PyTorchLightning/pytorch-lightning/pull/1896)) 
+- Allow passing model hyperparameters as complete kwarg list ([#1896](https://github.com/PyTorchLightning/pytorch-lightning/pull/1896))
 - Renamed `ModelCheckpoint`'s attributes `best` to `best_model_score` and `kth_best_model` to `kth_best_model_path` ([#1799](https://github.com/PyTorchLightning/pytorch-lightning/pull/1799))
 - Re-Enable Logger's `ImportError`s ([#1938](https://github.com/PyTorchLightning/pytorch-lightning/pull/1938))
 - Changed the default value of the Trainer argument `weights_summary` from `full` to `top` ([#2029](https://github.com/PyTorchLightning/pytorch-lightning/pull/2029))
@@ -107,7 +109,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Run graceful training teardown on interpreter exit ([#1631](https://github.com/PyTorchLightning/pytorch-lightning/pull/1631))
 - Fixed user warning when apex was used together with learning rate schedulers ([#1873](https://github.com/PyTorchLightning/pytorch-lightning/pull/1873))
-- Fixed multiple calls of `EarlyStopping` callback ([#1863](https://github.com/PyTorchLightning/pytorch-lightning/pull/1863)) 
+- Fixed multiple calls of `EarlyStopping` callback ([#1863](https://github.com/PyTorchLightning/pytorch-lightning/pull/1863))
 - Fixed an issue with `Trainer.from_argparse_args` when passing in unknown Trainer args ([#1932](https://github.com/PyTorchLightning/pytorch-lightning/pull/1932))
 - Fixed bug related to logger not being reset correctly for model after tuner algorithms ([#1933](https://github.com/PyTorchLightning/pytorch-lightning/pull/1933))
 - Fixed root node resolution for SLURM cluster with dash in host name ([#1954](https://github.com/PyTorchLightning/pytorch-lightning/pull/1954))

diff --git a/benchmarks/test_parity.py b/benchmarks/test_parity.py
@@ -4,7 +4,7 @@
 import pytest
 import torch
 
-import tests.base.utils as tutils
+import tests.base.develop_utils as tutils
 from pytorch_lightning import Trainer, seed_everything
 from tests.base.models import ParityModuleRNN, ParityModuleMNIST
 

diff --git a/pl_examples/basic_examples/gpu_template.py b/pl_examples/basic_examples/gpu_template.py
@@ -61,7 +61,8 @@ def main(hparams):
         '--distributed_backend',
         type=str,
         default='dp',
-        help='supports four options dp, ddp, ddp2, ddp_spawn'
+        help='supports four options dp, ddp, ddp2, ddp_spawn, ...',
+        choices=['dp', 'ddp', 'ddp2', 'ddp_spawn', 'ddp_cpu'],
     )
     parent_parser.add_argument(
         '--use_16bit',

diff --git a/pytorch_lightning/core/saving.py b/pytorch_lightning/core/saving.py
@@ -279,7 +279,7 @@ def load_hparams_from_tags_csv(tags_csv: str) -> Dict[str, Any]:
     """Load hparams from a file.
 
     >>> hparams = Namespace(batch_size=32, learning_rate=0.001, data_root='./any/path/here')
-    >>> path_csv = './testing-hparams.csv'
+    >>> path_csv = os.path.join('.', 'testing-hparams.csv')
     >>> save_hparams_to_tags_csv(path_csv, hparams)
     >>> hparams_new = load_hparams_from_tags_csv(path_csv)
     >>> vars(hparams) == hparams_new
@@ -304,7 +304,7 @@ def save_hparams_to_tags_csv(tags_csv: str, hparams: Union[dict, Namespace]) ->
     if isinstance(hparams, Namespace):
         hparams = vars(hparams)
 
-    with open(tags_csv, 'w') as fp:
+    with open(tags_csv, 'w', newline='') as fp:
         fieldnames = ['key', 'value']
         writer = csv.DictWriter(fp, fieldnames=fieldnames)
         writer.writerow({'key': 'key', 'value': 'value'})

diff --git a/pytorch_lightning/metrics/converters.py b/pytorch_lightning/metrics/converters.py
@@ -10,8 +10,16 @@
 import numpy as np
 import torch
 from torch.utils.data._utils.collate import np_str_obj_array_pattern
-
 from pytorch_lightning.utilities.apply_func import apply_to_collection
+from pytorch_lightning.utilities import rank_zero_warn
+
+try:
+    from torch.distributed import ReduceOp
+except ImportError:
+    class ReduceOp:
+        SUM = None
+
+    rank_zero_warn('Unsupported `ReduceOp` for distributed computing.')
 
 
 def _apply_to_inputs(func_to_apply: Callable, *dec_args, **dec_kwargs) -> Callable:
@@ -217,7 +225,7 @@ def _tensor_collection_metric_conversion(func_to_decorate: Callable) -> Callable
 
 def _sync_ddp_if_available(result: Union[torch.Tensor],
                            group: Optional[Any] = None,
-                           reduce_op: Optional[torch.distributed.ReduceOp] = None,
+                           reduce_op: Optional[ReduceOp] = None,
                            ) -> torch.Tensor:
     """
     Function to reduce the tensors from several ddp processes to one master process
@@ -247,7 +255,7 @@ def _sync_ddp_if_available(result: Union[torch.Tensor],
 
 
 def sync_ddp(group: Optional[Any] = None,
-             reduce_op: Optional[torch.distributed.ReduceOp] = None) -> Callable:
+             reduce_op: Optional[ReduceOp] = None) -> Callable:
     """
     This decorator syncs a functions outputs across different processes for DDP.
 
@@ -269,7 +277,7 @@ def decorator_fn(func_to_decorate):
 
 
 def numpy_metric(group: Optional[Any] = None,
-                 reduce_op: Optional[torch.distributed.ReduceOp] = None) -> Callable:
+                 reduce_op: Optional[ReduceOp] = None) -> Callable:
     """
     This decorator shall be used on all function metrics working on numpy arrays.
     It handles the argument conversion and DDP reduction for metrics working on numpy.
@@ -292,7 +300,7 @@ def decorator_fn(func_to_decorate):
 
 
 def tensor_metric(group: Optional[Any] = None,
-                  reduce_op: Optional[torch.distributed.ReduceOp] = None) -> Callable:
+                  reduce_op: Optional[ReduceOp] = None) -> Callable:
     """
     This decorator shall be used on all function metrics working on tensors.
     It handles the argument conversion and DDP reduction for metrics working on tensors.
@@ -314,7 +322,7 @@ def decorator_fn(func_to_decorate):
 
 
 def tensor_collection_metric(group: Optional[Any] = None,
-                             reduce_op: Optional[torch.distributed.ReduceOp] = None) -> Callable:
+                             reduce_op: Optional[ReduceOp] = None) -> Callable:
     """
     This decorator shall be used on all function metrics working on tensors and returning collections
     that cannot be converted to tensors.