diff --git a/azure-pipelines.yml b/azure-pipelines.yml index d88a31ae9775a..ad203374979e6 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -116,8 +116,9 @@ jobs: - script: | set -e python -m pytest pl_examples -v --maxfail=2 --durations=0 - python setup.py install --user --quiet - bash pl_examples/run_ddp-example.sh + pip install . --user --quiet + bash pl_examples/run_examples-args.sh --gpus 1 --max_epochs 1 --batch_size 64 --limit_train_batches 5 --limit_val_batches 3 + bash pl_examples/run_ddp-examples.sh --max_epochs 1 --batch_size 32 --limit_train_batches 2 --limit_val_batches 2 # cd pl_examples/basic_examples # bash submit_ddp_job.sh # bash submit_ddp2_job.sh diff --git a/dockers/nvidia/Dockerfile b/dockers/nvidia/Dockerfile index 4b04bc9426d4d..ad1169c4450dd 100644 --- a/dockers/nvidia/Dockerfile +++ b/dockers/nvidia/Dockerfile @@ -12,18 +12,55 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM nvcr.io/nvidia/pytorch:21.02-py3 +FROM nvcr.io/nvidia/cuda:11.1.1-runtime-ubuntu20.04 MAINTAINER PyTorchLightning ARG LIGHTNING_VERSION="" -COPY ./ ./pytorch-lightning/ +SHELL ["/bin/bash", "-c"] +# https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/ +ENV \ + DEBIAN_FRONTEND=noninteractive \ + TZ=Europe/Prague \ + PATH="$PATH:/root/.local/bin" \ + CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda" \ + MKL_THREADING_LAYER=GNU + +RUN apt-get update -qq && \ + apt-get install -y --no-install-recommends \ + build-essential \ + python3 \ + python3-distutils \ + python3-dev \ + pkg-config \ + cmake \ + git \ + wget \ + unzip \ + ca-certificates \ + && \ + +# Cleaning + apt-get autoremove -y && \ + apt-get clean && \ + rm -rf /root/.cache && \ + rm -rf /var/lib/apt/lists/* && \ + +# Setup PIP + update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \ + wget https://bootstrap.pypa.io/get-pip.py --progress=bar:force:noscroll --no-check-certificate && \ + python get-pip.py && \ + rm get-pip.py && \ + pip --version + +COPY ./ /home/pytorch-lightning/ -# install dependencies RUN \ - #conda install "pip>20.1" && \ - pip list | grep torch && \ + cd /home && \ + mv pytorch-lightning/notebooks . && \ + mv pytorch-lightning/pl_examples . && \ + # replace by specific version if asked if [ ! -z "$LIGHTNING_VERSION" ] ; then \ rm -rf pytorch-lightning ; \ wget https://github.com/PyTorchLightning/pytorch-lightning/archive/${LIGHTNING_VERSION}.zip --progress=bar:force:noscroll ; \ @@ -31,7 +68,12 @@ RUN \ mv pytorch-lightning-*/ pytorch-lightning ; \ rm *.zip ; \ fi && \ - pip install ./pytorch-lightning["extra"] --no-cache-dir && \ + +# Installations + python -c "fname = './pytorch-lightning/requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('horovod')] ; open(fname, 'w').writelines(lines)" && \ + pip install -r ./pytorch-lightning/requirements/extra.txt -U --no-cache-dir && \ + pip install -r ./pytorch-lightning/requirements/examples.txt -U --no-cache-dir && \ + pip install ./pytorch-lightning --no-cache-dir && \ rm -rf pytorch-lightning RUN python --version && \ diff --git a/dockers/release/Dockerfile b/dockers/release/Dockerfile index 0eec1e41a5a3f..5cd53385f660b 100644 --- a/dockers/release/Dockerfile +++ b/dockers/release/Dockerfile @@ -21,11 +21,14 @@ MAINTAINER PyTorchLightning ARG LIGHTNING_VERSION="" -COPY ./ ./pytorch-lightning/ +COPY ./ /home/pytorch-lightning/ # install dependencies RUN \ - #conda install "pip>20.1" && \ + cd /home && \ + mv pytorch-lightning/notebooks . && \ + mv pytorch-lightning/pl_examples . && \ + # replace by specific version if asked if [ ! -z "$LIGHTNING_VERSION" ] ; then \ rm -rf pytorch-lightning ; \ wget https://github.com/PyTorchLightning/pytorch-lightning/archive/${LIGHTNING_VERSION}.zip --progress=bar:force:noscroll ; \ diff --git a/pl_examples/run_ddp-example.sh b/pl_examples/run_ddp-example.sh deleted file mode 100644 index f0c7695e766f2..0000000000000 --- a/pl_examples/run_ddp-example.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -ARGS_DEFAULT=" --default_root_dir %(tmpdir)s --max_epochs 1 --batch_size 32 --limit_train_batches 2 --limit_val_batches 2" -ARGS_EXTRA_DDP=" --gpus 2 --accelerator ddp" -ARGS_EXTRA_AMP=" --precision 16" - -python pl_examples/basic_examples/simple_image_classifier.py ${ARGS_DEFAULT} ${ARGS_EXTRA_DDP} -python pl_examples/basic_examples/simple_image_classifier.py ${ARGS_DEFAULT} ${ARGS_EXTRA_DDP} ${ARGS_EXTRA_AMP} -python pl_examples/basic_examples/backbone_image_classifier.py ${ARGS_DEFAULT} ${ARGS_EXTRA_DDP} -python pl_examples/basic_examples/backbone_image_classifier.py ${ARGS_DEFAULT} ${ARGS_EXTRA_DDP} ${ARGS_EXTRA_AMP} -python pl_examples/basic_examples/autoencoder.py ${ARGS_DEFAULT} ${ARGS_EXTRA_DDP} -python pl_examples/basic_examples/autoencoder.py ${ARGS_DEFAULT} ${ARGS_EXTRA_DDP} ${ARGS_EXTRA_AMP} diff --git a/pl_examples/run_ddp-examples.sh b/pl_examples/run_ddp-examples.sh new file mode 100644 index 0000000000000..6cc36364e397d --- /dev/null +++ b/pl_examples/run_ddp-examples.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +ARGS_EXTRA_DDP=" --gpus 2 --accelerator ddp" +ARGS_EXTRA_AMP=" --precision 16" + +python pl_examples/basic_examples/simple_image_classifier.py $@ ${ARGS_EXTRA_DDP} +python pl_examples/basic_examples/simple_image_classifier.py $@ ${ARGS_EXTRA_DDP} ${ARGS_EXTRA_AMP} + +python pl_examples/basic_examples/backbone_image_classifier.py $@ ${ARGS_EXTRA_DDP} +python pl_examples/basic_examples/backbone_image_classifier.py $@ ${ARGS_EXTRA_DDP} ${ARGS_EXTRA_AMP} + +python pl_examples/basic_examples/autoencoder.py $@ ${ARGS_EXTRA_DDP} +python pl_examples/basic_examples/autoencoder.py $@ ${ARGS_EXTRA_DDP} ${ARGS_EXTRA_AMP} diff --git a/pl_examples/run_examples-args.sh b/pl_examples/run_examples-args.sh new file mode 100644 index 0000000000000..352869538cb18 --- /dev/null +++ b/pl_examples/run_examples-args.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +echo $@ + +full_path=$(realpath $0) +echo $full_path + +dir_path=$(dirname $full_path) +echo $dir_path + +python ${dir_path}/basic_examples/simple_image_classifier.py $@ + +python ${dir_path}/basic_examples/backbone_image_classifier.py $@ + +python ${dir_path}/basic_examples/autoencoder.py $@