Skip to content

Commit

Permalink
Automatically find and run special tests (#6669)
Browse files Browse the repository at this point in the history
  • Loading branch information
carmocca authored Mar 26, 2021
1 parent b730a5a commit 21fc5eb
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 164 deletions.
2 changes: 1 addition & 1 deletion azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ jobs:
displayName: 'Testing: standard'
- bash: |
sh tests/special_tests.sh
bash tests/special_tests.sh
displayName: 'Testing: special'
- bash: |
Expand Down
150 changes: 43 additions & 107 deletions benchmarks/test_sharded_parity.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import time
from typing import Type

Expand All @@ -21,113 +20,13 @@

from pytorch_lightning import seed_everything, Trainer
from pytorch_lightning.plugins import DDPSpawnShardedPlugin
from tests.accelerators import DDPLauncher
from tests.helpers.boring_model import BoringModel, RandomDataset
from tests.helpers.runif import RunIf


@RunIf(min_gpus=1, skip_windows=True, fairscale=True)
def test_ddp_sharded_plugin_correctness_one_gpu():
plugin_parity_test(
gpus=1,
model_cls=SeedTrainLoaderModel,
)


@RunIf(min_gpus=1, skip_windows=True, fairscale=True, amp_native=True)
def test_ddp_sharded_plugin_correctness_amp_one_gpu():
plugin_parity_test(
gpus=1,
precision=16,
model_cls=SeedTrainLoaderModel,
)


@pytest.mark.skip(reason="Not a critical test, skip till drone CI performance improves.")
@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
def test_ddp_sharded_plugin_correctness_multi_gpu():
plugin_parity_test(
gpus=2,
model_cls=SeedTrainLoaderModel,
max_percent_speed_diff=0.25, # todo: Increase speed diff since only 2 GPUs sharding 2 optimizers
)


@RunIf(min_gpus=2, skip_windows=True, fairscale=True, amp_native=True)
def test_ddp_sharded_plugin_correctness_amp_multi_gpu():
plugin_parity_test(
gpus=2,
precision=16,
model_cls=SeedTrainLoaderModel,
max_percent_speed_diff=0.25, # todo: Increase speed diff since only 2 GPUs sharding 2 optimizers
)


@RunIf(min_gpus=2, skip_windows=True, fairscale=True, amp_native=True)
def test_ddp_string_sharded_plugin_correctness_amp_multi_gpu():
plugin_parity_test(
gpus=2,
precision=16,
model_cls=SeedTrainLoaderModel,
max_percent_speed_diff=0.25, # todo: Increase speed diff since only 2 GPUs sharding 2 optimizers
)


@RunIf(min_gpus=2, fairscale=True)
@pytest.mark.skipif(
not os.getenv("PL_RUNNING_SPECIAL_TESTS", '0') == '1', reason="test should be run outside of pytest"
)
@DDPLauncher.run("--accelerator ddp --gpus 2 --precision 32")
def test_ddp_sharded_plugin_correctness_multi_gpu_ddp(tmpdir, args=None):
plugin_parity_test(
gpus=args.gpus,
precision=args.precision,
model_cls=SeedTrainLoaderModel,
)


@RunIf(min_gpus=2, fairscale=True)
@pytest.mark.skipif(
not os.getenv("PL_RUNNING_SPECIAL_TESTS", '0') == '1', reason="test should be run outside of pytest"
)
@DDPLauncher.run("--accelerator ddp --gpus 2 --precision 16")
def test_ddp_sharded_plugin_correctness_amp_multi_gpu_ddp(tmpdir, args=None):
plugin_parity_test(
gpus=args.gpus,
precision=args.precision,
model_cls=SeedTrainLoaderModel,
)


@pytest.mark.skip(reason="Current issue with multiple optimizers and FairScale.")
@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
def test_ddp_sharded_plugin_correctness_multi_gpu_multi_optim():
"""
Ensures same results using multiple optimizers across multiple GPUs
"""
plugin_parity_test(
gpus=2,
model_cls=SeedTrainLoaderMultipleOptimizersModel,
max_percent_speed_diff=0.25, # todo: Increase speed diff since only 2 GPUs sharding 2 optimizers
)


@pytest.mark.skip(reason="Current issue with multiple optimizers and FairScale.")
@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
def test_ddp_sharded_plugin_correctness_multi_gpu_multi_optim_manual(tmpdir):
"""
Ensures using multiple optimizers across multiple GPUs with manual optimization
"""
plugin_parity_test(
gpus=2,
model_cls=SeedTrainLoaderManualModel,
max_percent_speed_diff=0.25, # todo: Increase speed diff since only 2 GPUs sharding 2 optimizers
)


class SeedTrainLoaderModel(BoringModel):
"""
Overrides training loader to ensure we enforce the same seed for all DDP processes.
Overrides training loader to ensure we enforce the same seed for all DDP processes.
"""

def train_dataloader(self):
Expand Down Expand Up @@ -177,7 +76,7 @@ class SeedTrainLoaderMultipleOptimizersModel(SeedTrainLoaderModel):
def training_step(self, batch, batch_idx, optimizer_idx):
output = self.layer(batch)
loss = self.loss(batch, output)
return {"loss": loss}
return {'loss': loss}

def training_epoch_end(self, outputs) -> None:
# outputs should be an array with an entry per optimizer
Expand Down Expand Up @@ -279,11 +178,48 @@ def plugin_parity_test(
# Assert speed parity by ensuring percentage difference between custom/ddp is below threshold
percent_diff = (custom_model_time - ddp_time) / custom_model_time

assert percent_diff <= max_percent_speed_diff, \
f'Custom DDP plugin was too slow compared to DDP, Custom Plugin Time: {custom_model_time}, DDP Time: {ddp_time}'
assert (
percent_diff <= max_percent_speed_diff
), f'Custom DDP plugin was too slow compared to DDP, Custom Plugin Time: {custom_model_time}, DDP Time: {ddp_time}'

if use_cuda:
# Assert CUDA memory parity
assert max_memory_custom <= max_memory_ddp, \
f'Custom plugin used too much memory compared to DDP,' \
assert max_memory_custom <= max_memory_ddp, (
'Custom plugin used too much memory compared to DDP, '
f'Custom Mem: {max_memory_custom}, DDP Mem: {max_memory_ddp}'
)


@RunIf(skip_windows=True, fairscale=True)
@pytest.mark.parametrize(
'kwargs',
[
pytest.param(dict(gpus=1, model_cls=SeedTrainLoaderModel), marks=RunIf(min_gpus=1)),
pytest.param(
dict(gpus=1, precision=16, model_cls=SeedTrainLoaderModel), marks=RunIf(min_gpus=1, amp_native=True)
),
pytest.param(dict(gpus=2, model_cls=SeedTrainLoaderModel), marks=RunIf(min_gpus=2)),
pytest.param(
dict(gpus=2, precision=16, model_cls=SeedTrainLoaderModel), marks=RunIf(min_gpus=2, amp_native=True)
),
pytest.param(
dict(gpus=2, model_cls=SeedTrainLoaderMultipleOptimizersModel),
marks=[
RunIf(min_gpus=2),
pytest.mark.skip(reason='TODO: Current issue with multiple optimizers and FairScale.'),
],
),
pytest.param(
dict(gpus=2, model_cls=SeedTrainLoaderManualModel),
marks=[
RunIf(min_gpus=2),
pytest.mark.skip(reason='TODO: Current issue with multiple optimizers and FairScale.'),
],
),
],
)
def test_ddp_spawn_sharded_plugin(kwargs):
if kwargs['gpus'] > 1:
# TODO: decrease speed diff since only 2 GPUs sharding 2 optimizers
kwargs['max_percent_speed_diff'] = 0.25
plugin_parity_test(**kwargs)
12 changes: 0 additions & 12 deletions tests/accelerators/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +0,0 @@
try:
from dtrun.launcher import DDPLauncher
except ImportError:

class DDPLauncher:

def run(cmd_line, **kwargs):

def inner(func):
pass

return inner
15 changes: 1 addition & 14 deletions tests/accelerators/test_ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import torch

from pytorch_lightning import Trainer
from tests.accelerators import ddp_model, DDPLauncher
from tests.accelerators import ddp_model
from tests.helpers.boring_model import BoringModel
from tests.helpers.runif import RunIf
from tests.utilities.distributed import call_training_script
Expand Down Expand Up @@ -71,19 +71,6 @@ def test_multi_gpu_model_ddp_fit_test(tmpdir):
assert out['test_acc'] > 0.7


@RunIf(min_gpus=2)
@DDPLauncher.run(
"--max_epochs [max_epochs] --gpus 2 --accelerator [accelerator]",
max_epochs=["1"],
accelerator=["ddp", "ddp_spawn"]
)
def test_cli_to_pass(tmpdir, args=None):
"""
This test verify we can call function using test_cli name
"""
return '1'


@RunIf(skip_windows=True)
@pytest.mark.skipif(torch.cuda.is_available(), reason="test doesn't requires GPU machine")
def test_torch_distributed_backend_env_variables(tmpdir):
Expand Down
7 changes: 7 additions & 0 deletions tests/accelerators/test_multi_nodes_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import sys
from unittest import mock

import pytest
import torch

from tests.helpers.runif import RunIf
Expand All @@ -28,6 +29,9 @@
from tests.helpers.boring_model import BoringModel # noqa: E402


# TODO(Borda): When multi-node tests are re-enabled (.github/workflows/ci_test-mnodes.yml)
# use an environment variable `PL_RUNNING_MULTINODE_TESTS` and set `RunIf(multinode=True)`
@pytest.mark.skip("Multi-node testing is currently disabled")
@RunIf(special=True)
def test_logging_sync_dist_true_ddp(tmpdir):
"""
Expand Down Expand Up @@ -65,6 +69,9 @@ def validation_step(self, batch, batch_idx):
assert trainer.logged_metrics['bar'] == fake_result


# TODO(Borda): When multi-node tests are re-enabled (.github/workflows/ci_test-mnodes.yml)
# use an environment variable `PL_RUNNING_MULTINODE_TESTS` and set `RunIf(multinode=True)`
@pytest.mark.skip("Multi-node testing is currently disabled")
@RunIf(special=True)
@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
def test__validation_step__log(tmpdir):
Expand Down
81 changes: 54 additions & 27 deletions tests/special_tests.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/bash
# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -11,32 +12,58 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Running special tests
set -e

# this environment variable allows special tests to run
export PL_RUNNING_SPECIAL_TESTS=1
DEFAULTS="-m coverage run --source pytorch_lightning --append -m pytest --verbose --capture=no"
python ${DEFAULTS} tests/trainer/optimization/test_manual_optimization.py::test_step_with_optimizer_closure_with_different_frequencies_ddp
python ${DEFAULTS} tests/models/test_sync_batchnorm.py::test_sync_batchnorm_ddp
python ${DEFAULTS} tests/plugins/test_deepspeed_plugin.py::test_invalid_deepspeed_defaults_no_precision
python ${DEFAULTS} tests/plugins/test_deepspeed_plugin.py::test_warn_deepspeed_override_backward
python ${DEFAULTS} tests/plugins/test_deepspeed_plugin.py::test_deepspeed_run_configure_optimizers
python ${DEFAULTS} tests/plugins/test_deepspeed_plugin.py::test_deepspeed_config
python ${DEFAULTS} tests/plugins/test_deepspeed_plugin.py::test_deepspeed_custom_precision_params
python ${DEFAULTS} tests/plugins/test_deepspeed_plugin.py::test_deepspeed_assert_config_zero_offload_disabled
python ${DEFAULTS} tests/plugins/test_deepspeed_plugin.py::test_deepspeed_multigpu
python ${DEFAULTS} tests/plugins/test_rpc_plugin.py::test_rpc_function_calls_ddp
python ${DEFAULTS} tests/plugins/test_rpc_sequential_plugin.py::test_rpc_sequential_plugin_manual
python ${DEFAULTS} tests/plugins/test_rpc_sequential_plugin.py::test_rpc_sequential_plugin_manual_amp
python ${DEFAULTS} tests/plugins/test_rpc_sequential_plugin.py::test_rpc_sequential_plugin_automatic
python ${DEFAULTS} tests/plugins/test_rpc_sequential_plugin.py::test_rpc_sequential_plugin_with_wrong_balance
python ${DEFAULTS} tests/utilities/test_all_gather_grad.py::test_all_gather_collection
python ${DEFAULTS} tests/trainer/test_trainer.py::test_trainer_predict_ddp
python ${DEFAULTS} tests/trainer/test_trainer.py::test_trainer_predict_dp
python ${DEFAULTS} tests/trainer/logging_/test_train_loop_logging_1_0.py::test_logging_sync_dist_true_ddp
python ${DEFAULTS} tests/callbacks/test_pruning.py::test_pruning_callback_ddp
python ${DEFAULTS} tests/test_profiler.py::test_pytorch_profiler_trainer_ddp
python ${DEFAULTS} tests/models/test_hooks.py::test_transfer_batch_hook_ddp
python ${DEFAULTS} tests/trainer/test_data_loading.py::test_replace_distrubuted_sampler_custom_dataloader_custom_batch_sampler
python ${DEFAULTS} tests/trainer/optimization/test_manual_optimization.py::test_step_with_optimizer_closure_with_different_frequencies_ddp_with_toggle_model
python ${DEFAULTS} tests/checkpointing/test_checkpoint_callback_frequency.py::test_top_k_ddp
nvprof --profile-from-start off -o trace_name.prof -- python ${DEFAULTS} tests/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx
# python arguments
defaults='-m coverage run --source pytorch_lightning --append -m pytest --verbose --capture=no'

# find tests marked as `@RunIf(special=True)`
grep_output=$(grep --recursive --line-number --word-regexp 'tests' 'benchmarks' --regexp 'special=True')
# file paths
files=$(echo "$grep_output" | cut -f1 -d:)
files_arr=($files)
# line numbers
linenos=$(echo "$grep_output" | cut -f2 -d:)
linenos_arr=($linenos)

# tests to skip - space separated
blocklist='test_pytorch_profiler_nested_emit_nvtx'
report=''

for i in "${!files_arr[@]}"; do
file=${files_arr[$i]}
lineno=${linenos_arr[$i]}

# get code from `@RunIf(special=True)` line to EOF
test_code=$(tail -n +"$lineno" "$file")

# read line by line
while read -r line; do
# if it's a test
if [[ $line == def\ test_* ]]; then
# get the name
test_name=$(echo $line | cut -c 5- | cut -f1 -d\()

# check blocklist
if echo $blocklist | grep --word-regexp "$test_name" > /dev/null; then
report+="Skipped\t$file:$lineno::$test_name\n"
break
fi

# run the test
report+="Ran\t$file:$lineno::$test_name\n"
python ${defaults} "${file}::${test_name}"
break
fi
done < <(echo "$test_code")
done

nvprof --profile-from-start off -o trace_name.prof -- python ${defaults} tests/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx

# echo test report
printf '=%.s' {1..80}
printf "\n$report"
printf '=%.s' {1..80}
printf '\n'
6 changes: 3 additions & 3 deletions tests/utilities/test_all_gather_grad.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ class TestModel(BoringModel):
training_epoch_end_called = False

def training_epoch_end(self, outputs) -> None:
self.training_epoch_end_called = True
losses = torch.stack([x["loss"] for x in outputs])
gathered_loss = self.all_gather({
"losses_tensor_int": torch.rand(2, 2).int().t(),
Expand All @@ -67,7 +66,7 @@ def training_epoch_end(self, outputs) -> None:
"losses": losses,
"losses_list": [losses, losses]
})
assert gathered_loss["losses_tensor_int"][0].dtype == torch.int64
assert gathered_loss["losses_tensor_int"][0].dtype == torch.int32
assert gathered_loss["losses_tensor_float"][0].dtype == torch.float
assert gathered_loss["losses_np_ndarray"][0].dtype == torch.int64
# torch.bool can't be all_gathered
Expand All @@ -76,6 +75,7 @@ def training_epoch_end(self, outputs) -> None:
assert gathered_loss["losses_int"][0].dtype == torch.int
assert gathered_loss["losses_list"][0].numel() == 2 * len(losses)
assert gathered_loss["losses"].numel() == 2 * len(losses)
self.training_epoch_end_called = True

seed_everything(42)

Expand Down Expand Up @@ -115,6 +115,6 @@ def training_step(self, batch, batch_idx):
return loss

model = TestModel()
trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, gpus=2)
trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, gpus=2, accelerator="ddp")
trainer.fit(model)
assert model.training_step_called

0 comments on commit 21fc5eb

Please sign in to comment.