Skip to content

Commit

Permalink
update CI testing with pip upgrade (#2380)
Browse files Browse the repository at this point in the history
* try pt1.5

* cpu

* upgrade

* tpu

* user

* [blocked by #2380] freeze GPU PT 1.4 (#2780)

* freeze

* user
  • Loading branch information
Borda authored Jul 31, 2020
1 parent bc7a08f commit 3772601
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 59 deletions.
2 changes: 1 addition & 1 deletion .drone.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ name: torch-GPU

steps:
- name: testing
image: pytorchlightning/pytorch_lightning:cuda-extras-py3.7-torch1.5
image: pytorchlightning/pytorch_lightning:devel-pt1.4

environment:
SLURM_LOCALID: 0
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci-test-base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade --user pip
pip install --requirement ./requirements/base.txt --quiet --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --upgrade-strategy only-if-needed
pip install --requirement ./requirements/base.txt --quiet --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --upgrade
pip install --requirement ./requirements/test.txt --quiet --upgrade-strategy only-if-needed
# pip install tox coverage
python --version
Expand Down
13 changes: 4 additions & 9 deletions .github/workflows/ci-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:

- name: Update Pip
run: |
pip install -U -q "pip>=20.1" # needed for get pip cacher folder
pip install --quiet "pip>=20.1" --upgrade --user # needed for get pip cacher folder
# Github Actions: Run step on specific OS: https://stackoverflow.com/a/57948488/4521646
- name: Setup macOS
Expand All @@ -54,14 +54,9 @@ jobs:
- name: Setup Windows
if: runner.os == 'windows'
run: |
# remove Horovod from requirements
python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('horovod')] ; open(fname, 'w').writelines(lines)"
# TODO: remove after https://github.com/pytorch/pytorch/issues/32186 is resolved
#- name: Setup Windows on Latest
# if: runner.os == 'windows' && matrix.requires == 'latest'
# run: |
# python -c "fname = 'requirements/base.txt' ; req = open(fname).read().replace('torch>=1.3', 'torch<1.5') ; open(fname, 'w').write(req)"

# versions <= 1.3 may have issues on mac with some BLAS ops due to missing mkl (https://github.com/pytorch/pytorch/issues/18996)
- name: Setup MacOS Minimal
if: runner.os == 'macOS' && matrix.requires == 'minimal'
Expand Down Expand Up @@ -92,8 +87,8 @@ jobs:
- name: Install dependencies
run: |
pip install --requirement requirements/base.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --quiet --upgrade-strategy only-if-needed
# pip install -q "PyYAML>=5.3.1" # needed for installing dependencues
# python -m pip install --upgrade --user pip
pip install --requirement requirements/base.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --quiet --upgrade
HOROVOD_BUILD_ARCH_FLAGS="-mfma" pip install --requirement ./requirements/devel.txt --quiet --upgrade-strategy "only-if-needed"
python --version
pip --version
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,8 @@ You can also install any past release `0.X.Y` from this repository:
pip install https://github.com/PytorchLightning/pytorch-lightning/archive/0.X.Y.zip --upgrade
```

---

## Lightning team

#### Leads
Expand Down
60 changes: 12 additions & 48 deletions tests/models/test_tpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,42 +51,24 @@ def test_model_tpu_cores_1(tmpdir):
tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False)


@pytest.mark.parametrize('tpu_core', [1, 5])
@pytest.mark.skipif(not TPU_AVAILABLE, reason="test requires TPU machine")
@pl_multi_process_test
def test_model_tpu_index_1(tmpdir):
"""Make sure model trains on TPU."""
trainer_options = dict(
default_root_dir=tmpdir,
progress_bar_refresh_rate=0,
max_epochs=1,
distributed_backend='tpu',
tpu_cores=[1],
limit_train_batches=0.4,
limit_val_batches=0.4,
)

model = EvalModelTemplate()
tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False)
assert torch_xla._XLAC._xla_get_default_device() == 'xla:1'


@pytest.mark.skipif(not TPU_AVAILABLE, reason="test requires TPU machine")
@pl_multi_process_test
def test_model_tpu_index_5(tmpdir):
def test_model_tpu_index(tmpdir, tpu_core):
"""Make sure model trains on TPU."""
trainer_options = dict(
default_root_dir=tmpdir,
progress_bar_refresh_rate=0,
max_epochs=1,
distributed_backend='tpu',
tpu_cores=[5],
tpu_cores=[tpu_core],
limit_train_batches=0.4,
limit_val_batches=0.4,
)

model = EvalModelTemplate()
tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False)
assert torch_xla._XLAC._xla_get_default_device() == 'xla:5'
assert torch_xla._XLAC._xla_get_default_device() == f'xla:{tpu_core}'


@pytest.mark.skipif(not TPU_AVAILABLE, reason="test requires TPU machine")
Expand Down Expand Up @@ -131,24 +113,27 @@ def test_model_16bit_tpu_cores_1(tmpdir):
assert os.environ.get('XLA_USE_BF16') == str(1), "XLA_USE_BF16 was not set in environment variables"


@pytest.mark.parametrize('tpu_core', [1, 5])
@pytest.mark.skipif(not TPU_AVAILABLE, reason="test requires TPU machine")
@pl_multi_process_test
def test_model_16bit_tpu_index_1(tmpdir):
def test_model_16bit_tpu_index(tmpdir, tpu_core):
"""Make sure model trains on TPU."""
trainer_options = dict(
default_root_dir=tmpdir,
precision=16,
progress_bar_refresh_rate=0,
train_percent_check=0.4,
val_percent_check=0.2,
max_epochs=1,
distributed_backend='tpu',
tpu_cores=[1],
tpu_cores=[tpu_core],
limit_train_batches=0.4,
limit_val_batches=0.4,
)

model = EvalModelTemplate()
tpipes.run_model_test(trainer_options, model, on_gpu=False)
assert torch_xla._XLAC._xla_get_default_device() == 'xla:1'
assert torch_xla._XLAC._xla_get_default_device() == f'xla:{tpu_core}'
assert os.environ.get('XLA_USE_BF16') == str(1), "XLA_USE_BF16 was not set in environment variables"


Expand Down Expand Up @@ -177,27 +162,7 @@ def test_model_16bit_tpu_cores_8(tmpdir):

@pytest.mark.skipif(not TPU_AVAILABLE, reason="test requires TPU machine")
@pl_multi_process_test
def test_model_16bit_tpu_index_5(tmpdir):
"""Test if distributed TPU core training works"""
model = EvalModelTemplate()
trainer = Trainer(
default_root_dir=tmpdir,
precision=16,
max_epochs=1,
train_percent_check=0.4,
val_percent_check=0.2,
distributed_backend='tpu',
tpu_cores=[5],
)
trainer.fit(model)
assert torch_xla._XLAC._xla_get_default_device() == 'xla:5'
assert os.environ.get('XLA_USE_BF16') == str(1), "XLA_USE_BF16 was not set in environment variables"


@pytest.mark.parametrize('tpu_core', [1, 5])
@pytest.mark.skipif(not TPU_AVAILABLE, reason="test requires TPU machine")
@pl_multi_process_test
def test_train_on_single_tpu(tmpdir, tpu_core):
def test_model_tpu_early_stop(tmpdir):
"""Test if single TPU core training works"""
model = EvalModelTemplate()
trainer = Trainer(
Expand All @@ -208,10 +173,9 @@ def test_train_on_single_tpu(tmpdir, tpu_core):
limit_train_batches=10,
limit_val_batches=10,
distributed_backend='tpu',
tpu_cores=[tpu_core],
tpu_cores=1,
)
trainer.fit(model)
assert torch_xla._XLAC._xla_get_default_device() == 'xla:5'


@pytest.mark.skipif(not TPU_AVAILABLE, reason="test requires TPU machine")
Expand Down

0 comments on commit 3772601

Please sign in to comment.