Skip to content

Commit

Permalink
chore: 0.34.0 bumpenvs (#9565)
Browse files Browse the repository at this point in the history
* retag images for release and bumpenvs
  • Loading branch information
MikhailKardash authored Jun 25, 2024
1 parent 2677dc2 commit 8da67d2
Show file tree
Hide file tree
Showing 35 changed files with 84 additions and 87 deletions.
14 changes: 7 additions & 7 deletions .circleci/real_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ commands:
- when:
condition: <<parameters.tf2>>
steps:
- run: docker pull determinedai/pytorch-ngc-dev:8c90e80
- run: docker pull determinedai/pytorch-ngc:0.34.0

login-docker:
parameters:
Expand Down Expand Up @@ -2401,7 +2401,7 @@ jobs:

test-unit-harness-gpu-tf:
docker:
- image: determinedai/tensorflow-ngc-dev:8c90e80
- image: determinedai/tensorflow-ngc:0.34.0
resource_class: determined-ai/container-runner-gpu
steps:
- run: mkdir -p ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts
Expand All @@ -2428,7 +2428,7 @@ jobs:

test-unit-harness-pytorch2-gpu:
docker:
- image: determinedai/pytorch-ngc-dev:8c90e80
- image: determinedai/pytorch-ngc:0.34.0
resource_class: determined-ai/container-runner-gpu
steps:
- run: mkdir -p ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts
Expand All @@ -2455,7 +2455,7 @@ jobs:

test-unit-harness-pytorch2-cpu:
docker:
- image: determinedai/pytorch-ngc-dev:8c90e80
- image: determinedai/pytorch-ngc:0.34.0
steps:
- run: mkdir -p ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts
- checkout
Expand All @@ -2481,7 +2481,7 @@ jobs:

test-unit-harness-gpu-parallel:
docker:
- image: determinedai/pytorch-ngc-dev:8c90e80
- image: determinedai/pytorch-ngc:0.34.0
resource_class: determined-ai/container-runner-multi-gpu
steps:
- run: mkdir -p ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts
Expand All @@ -2508,7 +2508,7 @@ jobs:

test-unit-harness-gpu-deepspeed:
docker:
- image: determinedai/pytorch-ngc-dev:8c90e80
- image: determinedai/pytorch-ngc:0.34.0
resource_class: determined-ai/container-runner-gpu
steps:
- run: mkdir -p ~/.ssh && ssh-keyscan github.com >> ~/.ssh/known_hosts
Expand Down Expand Up @@ -3747,7 +3747,7 @@ jobs:
type: string
default: "1"
environment-image:
default: determinedai/pytorch-ngc-dev:8c90e80
default: determinedai/pytorch-ngc:0.34.0
type: string
accel-node-taints:
type: string
Expand Down
2 changes: 1 addition & 1 deletion .circleci/scripts/pull_image_daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ spec:
spec:
containers:
- name: pull-docker-daemonset
image: determinedai/pytorch-ngc-dev:8c90e80
image: determinedai/pytorch-ngc:0.34.0
command: ["/bin/bash"]
args: ["echo", "test"]
resources:
Expand Down
8 changes: 4 additions & 4 deletions docs/model-dev-guide/prepare-container/custom-env.rst
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ Default Images
- - Environment
- File Name
- - CPUs
- ``determinedai/pytorch-ngc-dev:8c90e80``
- ``determinedai/pytorch-ngc:0.34.0``
- - NVIDIA GPUs
- ``determinedai/pytorch-ngc-dev:8c90e80``
- ``determinedai/pytorch-ngc:0.34.0``
- - AMD GPUs
- ``determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-0.26.4``

Expand Down Expand Up @@ -155,7 +155,7 @@ Example Dockerfile that installs custom ``conda``-, ``pip``-, and ``apt``-based
.. code:: bash
# Determined Image
FROM determinedai/tensorflow-ngc-dev:8c90e80
FROM determinedai/tensorflow-ngc:0.34.0
# Custom Configuration
RUN apt-get update && \
Expand Down Expand Up @@ -216,7 +216,7 @@ environments using :ref:`custom images <custom-docker-images>`:
.. code:: bash
# Determined Image
FROM determinedai/pytorch-ngc-dev:8c90e80
FROM determinedai/pytorch-ngc:0.34.0
# Create a virtual environment
RUN conda create -n myenv python=3.8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Determined supports both TensorFlow 1 and 2. The version of TensorFlow used for
experiment is controlled by the configured container image. Determined provides prebuilt Docker
images that include TensorFlow 2+, 1.15, and 2.8, respectively:

- ``determinedai/tensorflow-ngc-dev:8c90e80``
- ``determinedai/tensorflow-ngc:0.34.0``
- ``determinedai/environments:cuda-10.2-pytorch-1.7-tf-1.15-gpu-0.21.2``
- ``determinedai/environments:cuda-11.2-tf-2.8-gpu-0.29.1``

Expand Down
4 changes: 2 additions & 2 deletions docs/reference/deploy/helm-config-reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -197,13 +197,13 @@

- ``cpuImage``: Sets the default Docker image for all non-GPU tasks. If a Docker image is
specified in the :ref:`experiment config <exp-environment-image>` this default is overriden.
Defaults to: ``determinedai/pytorch-ngc-dev:8c90e80``.
Defaults to: ``determinedai/pytorch-ngc:0.34.0``.

- ``startupHook``: An optional inline script that will be executed as part of task set up.

- ``gpuImage``: Sets the default Docker image for all GPU tasks. If a Docker image is specified
in the :ref:`experiment config <exp-environment-image>` this default is overriden. Defaults
to: ``determinedai/pytorch-ngc-dev:8c90e80``.
to: ``determinedai/pytorch-ngc:0.34.0``.

- ``logPolicies``: Sets log policies for trials. For details, visit :ref:`log_policies
<experiment-config-min-validation-period>`.
Expand Down
4 changes: 2 additions & 2 deletions docs/reference/deploy/master-config-reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,12 @@ configure different container images for NVIDIA GPU tasks using the ``cuda`` key
Determined 0.17.6), CPU tasks using ``cpu`` key, and ROCm (AMD GPU) tasks using the ``rocm`` key.
Default values:

- ``determinedai/pytorch-ngc-dev:8c90e80`` for NVIDIA GPUs and for CPUs.
- ``determinedai/pytorch-ngc:0.34.0`` for NVIDIA GPUs and for CPUs.
- ``determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-0.26.4`` for ROCm.

For TensorFlow users, we provide an image that must be referenced in the experiment configuration:

- ``determinedai/tensorflow-ngc-dev:8c90e80`` for NVIDIA GPUs and for CPUs.
- ``determinedai/tensorflow-ngc:0.34.0`` for NVIDIA GPUs and for CPUs.

``environment_variables``
=========================
Expand Down
4 changes: 2 additions & 2 deletions docs/reference/experiment-config-reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1333,12 +1333,12 @@ Optional. The Docker image to use when executing the workload. This image must b
container images for NVIDIA GPU tasks using ``cuda`` key (``gpu`` prior to 0.17.6), CPU tasks using
``cpu`` key, and ROCm (AMD GPU) tasks using ``rocm`` key. Default values:

- ``determinedai/pytorch-ngc-dev:8c90e80`` for NVIDIA GPUs and for CPUs.
- ``determinedai/pytorch-ngc:0.34.0`` for NVIDIA GPUs and for CPUs.
- ``determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-0.26.4`` for ROCm.

For TensorFlow users, we provide an image that must be referenced in the experiment configuration:

- ``determinedai/tensorflow-ngc-dev:8c90e80`` for NVIDIA GPUs and for CPUs.
- ``determinedai/tensorflow-ngc:0.34.0`` for NVIDIA GPUs and for CPUs.

When the cluster is configured with :ref:`resource_manager.type: slurm
<cluster-configuration-slurm>` and ``container_run_type: singularity``, images are executed using
Expand Down
4 changes: 2 additions & 2 deletions docs/reference/job-config-reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ The following configuration settings are supported:
different container images for NVIDIA GPU tasks using ``cuda`` key (``gpu`` prior to 0.17.6),
CPU tasks using ``cpu`` key, and ROCm (AMD GPU) tasks using ``rocm`` key. Default values:

- ``determinedai/pytorch-ngc-dev:8c90e80`` for NVIDIA GPUs and for CPUs.
- ``determinedai/pytorch-ngc:0.34.0`` for NVIDIA GPUs and for CPUs.
- ``determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-0.26.4`` for ROCm.

For TensorFlow users, we provide an image that must be referenced in the experiment
configuration:

- ``determinedai/tensorflow-ngc-dev:8c90e80`` for NVIDIA GPUs and for CPUs.
- ``determinedai/tensorflow-ngc:0.34.0`` for NVIDIA GPUs and for CPUs.

- ``force_pull_image``: Forcibly pull the image from the Docker registry and bypass the Docker
cache. Defaults to ``false``.
Expand Down
4 changes: 2 additions & 2 deletions docs/setup-cluster/deploy-cluster/slurm/singularity.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ by default in this version of Determined are described below.
- - Environment
- File Name
- - CPUs
- ``determinedai/pytorch-ngc-dev:8c90e80``
- ``determinedai/pytorch-ngc:0.34.0``
- - NVIDIA GPUs
- ``determinedai/pytorch-ngc-dev:8c90e80``
- ``determinedai/pytorch-ngc:0.34.0``
- - AMD GPUs
- ``determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-622d512``

Expand Down
4 changes: 2 additions & 2 deletions docs/setup-cluster/gcp/install-gcp.rst
Original file line number Diff line number Diff line change
Expand Up @@ -406,5 +406,5 @@ This command line will spin up a cluster of up to 2 A100s in the ``us-central1-c
--compute-agent-instance-type a2-highgpu-1g --gpu-num 1 \
--gpu-type nvidia-tesla-a100 \
--region us-central1 --zone us-central1-c \
--gpu-env-image determinedai/pytorch-ngc-dev:8c90e80 \
--cpu-env-image determinedai/pytorch-ngc-dev:8c90e80
--gpu-env-image determinedai/pytorch-ngc:0.34.0 \
--cpu-env-image determinedai/pytorch-ngc:0.34.0
4 changes: 2 additions & 2 deletions docs/setup-cluster/slurm/singularity.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ by default in this version of Determined are described below.
- - Environment
- File Name
- - CPUs
- ``determinedai/pytorch-ngc-dev:8c90e80``
- ``determinedai/pytorch-ngc:0.34.0``
- - NVIDIA GPUs
- ``determinedai/pytorch-ngc-dev:8c90e80``
- ``determinedai/pytorch-ngc:0.34.0``
- - AMD GPUs
- ``determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-622d512``

Expand Down
2 changes: 1 addition & 1 deletion docs/setup-cluster/slurm/slurm-requirements.rst
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@ platform. There may be additional per-user configuration that is required.

.. code:: bash
image=determinedai/pytorch-ngc-dev:8c90e80
image=determinedai/pytorch-ngc:0.34.0
cd /shared/enroot/images
enroot import docker://$image
enroot create /shared/enroot/images/${image//[\/:]/\+}.sqsh
Expand Down
12 changes: 6 additions & 6 deletions e2e_tests/tests/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
MAX_TRIAL_BUILD_SECS = 90


DEFAULT_TF2_CPU_IMAGE = "determinedai/tensorflow-ngc-dev:8c90e80"
DEFAULT_TF2_GPU_IMAGE = "determinedai/tensorflow-ngc-dev:8c90e80"
DEFAULT_PT_CPU_IMAGE = "determinedai/pytorch-tensorflow-cpu-dev:8c90e80"
DEFAULT_PT_GPU_IMAGE = "determinedai/pytorch-tensorflow-cuda-dev:8c90e80"
DEFAULT_PT2_CPU_IMAGE = "determinedai/pytorch-ngc-dev:8c90e80"
DEFAULT_PT2_GPU_IMAGE = "determinedai/pytorch-ngc-dev:8c90e80"
DEFAULT_TF2_CPU_IMAGE = "determinedai/tensorflow-ngc:0.34.0"
DEFAULT_TF2_GPU_IMAGE = "determinedai/tensorflow-ngc:0.34.0"
DEFAULT_PT_CPU_IMAGE = "determinedai/pytorch-tensorflow-cpu:0.34.0"
DEFAULT_PT_GPU_IMAGE = "determinedai/pytorch-tensorflow-cuda:0.34.0"
DEFAULT_PT2_CPU_IMAGE = "determinedai/pytorch-ngc:0.34.0"
DEFAULT_PT2_GPU_IMAGE = "determinedai/pytorch-ngc:0.34.0"

TF2_CPU_IMAGE = os.environ.get("TF2_CPU_IMAGE") or DEFAULT_TF2_CPU_IMAGE
TF2_GPU_IMAGE = os.environ.get("TF2_GPU_IMAGE") or DEFAULT_TF2_GPU_IMAGE
Expand Down
2 changes: 1 addition & 1 deletion e2e_tests/tests/fixtures/ports-proxy/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ max_restarts: 0

# Hardcode the image because the new image has a bug. TODO fix this when the image bug is fixed.
environment:
image: determinedai/pytorch-tensorflow-cpu-dev:8c90e80
image: determinedai/pytorch-tensorflow-cpu:0.34.0
proxy_ports:
- proxy_port: 8000
proxy_tcp: false
Expand Down
4 changes: 2 additions & 2 deletions examples/computer_vision/iris_tf_keras/adaptive.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ data:
test_url: http://download.tensorflow.org/data/iris_test.csv
environment:
image:
cpu: determinedai/tensorflow-ngc-dev:8c90e80
gpu: determinedai/tensorflow-ngc-dev:8c90e80
cpu: determinedai/tensorflow-ngc:0.34.0
gpu: determinedai/tensorflow-ngc:0.34.0
hyperparameters:
learning_rate:
type: log
Expand Down
4 changes: 2 additions & 2 deletions examples/computer_vision/iris_tf_keras/const.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ data:
test_url: http://download.tensorflow.org/data/iris_test.csv
environment:
image:
cpu: determinedai/tensorflow-ngc-dev:8c90e80
gpu: determinedai/tensorflow-ngc-dev:8c90e80
cpu: determinedai/tensorflow-ngc:0.34.0
gpu: determinedai/tensorflow-ngc:0.34.0
hyperparameters:
learning_rate: 1.0e-4
learning_rate_decay: 1.0e-6
Expand Down
4 changes: 2 additions & 2 deletions examples/computer_vision/iris_tf_keras/distributed.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ data:
test_url: http://download.tensorflow.org/data/iris_test.csv
environment:
image:
cpu: determinedai/tensorflow-ngc-dev:8c90e80
gpu: determinedai/tensorflow-ngc-dev:8c90e80
cpu: determinedai/tensorflow-ngc:0.34.0
gpu: determinedai/tensorflow-ngc:0.34.0
hyperparameters:
learning_rate: 1.0e-4
learning_rate_decay: 1.0e-6
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ data:
test_url: http://download.tensorflow.org/data/iris_test.csv
environment:
image:
cpu: determinedai/tensorflow-ngc-dev:8c90e80
gpu: determinedai/tensorflow-ngc-dev:8c90e80
cpu: determinedai/tensorflow-ngc:0.34.0
gpu: determinedai/tensorflow-ngc:0.34.0
resources:
slots_per_trial: 8
resource_pool: defq_GPU_cancelable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ data:
test_url: http://download.tensorflow.org/data/iris_test.csv
environment:
image:
cpu: determinedai/tensorflow-ngc-dev:8c90e80
gpu: determinedai/tensorflow-ngc-dev:8c90e80
cpu: determinedai/tensorflow-ngc:0.34.0
gpu: determinedai/tensorflow-ngc:0.34.0
resources:
slots_per_trial: 8
resource_pool: defq_GPU_hipri
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: torchvision dsat core_api
max_restarts: 0
environment:
image:
gpu: determinedai/pytorch-ngc-dev:8c90e80
gpu: determinedai/pytorch-ngc:0.34.0
resources:
slots_per_trial: 2
shm_size: 4294967296 # 4 GiB.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: torchvision dsat deepspeed_trial
max_restarts: 0
environment:
image:
gpu: determinedai/pytorch-ngc-dev:8c90e80
gpu: determinedai/pytorch-ngc:0.34.0
resources:
slots_per_trial: 2
shm_size: 4294967296 # 4 GiB.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ environment:
# You may need to modify this to match your network configuration.
- NCCL_SOCKET_IFNAME=ens,eth,ib
image:
gpu: determinedai/pytorch-ngc-dev:8c90e80
gpu: determinedai/pytorch-ngc:0.34.0
resources:
slots_per_trial: 2
searcher:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ environment:
# You may need to modify this to match your network configuration.
- NCCL_SOCKET_IFNAME=ens,eth,ib
image:
gpu: determinedai/pytorch-ngc-dev:8c90e80
gpu: determinedai/pytorch-ngc:0.34.0
resources:
slots_per_trial: 2
searcher:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@
},
"force_pull_image": false,
"image": {
"cpu": "determinedai/tensorflow-ngc-dev:8c90e80",
"cuda": "determinedai/tensorflow-ngc-dev:8c90e80",
"cpu": "determinedai/tensorflow-ngc:0.34.0",
"cuda": "determinedai/tensorflow-ngc:0.34.0",
"rocm": "determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-622d512"
},
"pod_spec": null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@
},
"force_pull_image": false,
"image": {
"cpu": "determinedai/tensorflow-ngc-dev:8c90e80",
"cuda": "determinedai/tensorflow-ngc-dev:8c90e80",
"cpu": "determinedai/tensorflow-ngc:0.34.0",
"cuda": "determinedai/tensorflow-ngc:0.34.0",
"rocm": "determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-622d512"
},
"pod_spec": null,
Expand Down
4 changes: 2 additions & 2 deletions harness/tests/fixtures/checkpoint.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@
},
"force_pull_image":false,
"image":{
"cpu":"determinedai/pytorch-ngc-dev:8c90e80",
"cuda":"determinedai/pytorch-ngc-dev:8c90e80",
"cpu":"determinedai/pytorch-ngc:0.34.0",
"cuda":"determinedai/pytorch-ngc:0.34.0",
"rocm":"determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-622d512"
},
"pod_spec":null,
Expand Down
4 changes: 2 additions & 2 deletions helm/charts/determined/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ defaultImages:
kubeSchedulerPreemption: "determinedai/kube-scheduler:0.17.0"

# default images for CPU and GPU environments
cpuImage: "determinedai/pytorch-ngc-dev:8c90e80"
gpuImage: "determinedai/pytorch-ngc-dev:8c90e80"
cpuImage: "determinedai/pytorch-ngc:0.34.0"
gpuImage: "determinedai/pytorch-ngc:0.34.0"

# Install Determined enterprise edition.
enterpriseEdition: false
Expand Down
4 changes: 2 additions & 2 deletions master/pkg/schemas/expconf/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const (

// Default task environment docker image names.
const (
CPUImage = "determinedai/pytorch-ngc-dev:8c90e80"
CUDAImage = "determinedai/pytorch-ngc-dev:8c90e80"
CPUImage = "determinedai/pytorch-ngc:0.34.0"
CUDAImage = "determinedai/pytorch-ngc:0.34.0"
ROCMImage = "determinedai/environments:rocm-5.0-pytorch-1.10-tf-2.7-rocm-622d512"
)
2 changes: 1 addition & 1 deletion model_hub/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ SHORT_GIT_HASH := $(shell git rev-parse --short HEAD)
ARTIFACTS_DIR := /tmp/artifacts

# Model-hub library environments will be built on top of the default GPU and CPU images in master/pkg/model/defaults.go
DEFAULT_GPU_IMAGE := determinedai/pytorch-tensorflow-cuda-dev:8c90e80
DEFAULT_GPU_IMAGE := determinedai/pytorch-tensorflow-cuda:0.34.0

############REMINDER############
# When bumping third-party library versions, remember to bump versions in
Expand Down
Loading

0 comments on commit 8da67d2

Please sign in to comment.