Skip to content

Commit

Permalink
Auto merge of #114762 - Kobzol:ci-docker-cache, r=<try>
Browse files Browse the repository at this point in the history
[WIP] CI: use BuildKit Docker layer caching

Trying to experiment with using Docker buildkit layer caching, to resolve the recent CI caching issues caused by GHA updating its Docker version.

r? `@ghost`
  • Loading branch information
bors committed Dec 26, 2023
2 parents ea7ef7b + f9534ea commit b8ea986
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 79 deletions.
32 changes: 15 additions & 17 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ jobs:
CI_JOB_NAME: "${{ matrix.name }}"
CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
HEAD_SHA: "${{ github.event.pull_request.head.sha || github.sha }}"
DOCKER_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
SCCACHE_BUCKET: rust-lang-ci-sccache2
TOOLSTATE_REPO: "https://github.com/rust-lang-nursery/rust-toolstate"
CACHE_DOMAIN: ci-caches.rust-lang.org
Expand All @@ -50,19 +51,10 @@ jobs:
strategy:
matrix:
include:
- name: mingw-check
os: ubuntu-20.04-4core-16gb
env: {}
- name: mingw-check-tidy
os: ubuntu-20.04-4core-16gb
env: {}
- name: x86_64-gnu-llvm-16
- name: quick
env:
ENABLE_GCC_CODEGEN: "1"
os: ubuntu-20.04-16core-64gb
- name: x86_64-gnu-tools
os: ubuntu-20.04-16core-64gb
env: {}
os: ubuntu-20.04
timeout-minutes: 600
runs-on: "${{ matrix.os }}"
steps:
Expand Down Expand Up @@ -168,10 +160,13 @@ jobs:
if: "success() && !env.SKIP_JOB && (github.event_name == 'push' || env.DEPLOY == '1' || env.DEPLOY_ALT == '1')"
auto:
name: "auto - ${{ matrix.name }}"
permissions:
packages: write
env:
CI_JOB_NAME: "${{ matrix.name }}"
CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
HEAD_SHA: "${{ github.event.pull_request.head.sha || github.sha }}"
DOCKER_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
SCCACHE_BUCKET: rust-lang-ci-sccache2
DEPLOY_BUCKET: rust-lang-ci2
TOOLSTATE_REPO: "https://github.com/rust-lang-nursery/rust-toolstate"
Expand Down Expand Up @@ -204,7 +199,7 @@ jobs:
os: ubuntu-20.04-8core-32gb
env: {}
- name: dist-arm-linux
os: ubuntu-20.04-16core-64gb
os: ubuntu-20.04
env: {}
- name: dist-armhf-linux
os: ubuntu-20.04-8core-32gb
Expand Down Expand Up @@ -251,12 +246,12 @@ jobs:
- name: dist-x86_64-linux
env:
CODEGEN_BACKENDS: "llvm,cranelift"
os: ubuntu-20.04-16core-64gb
os: ubuntu-20.04
- name: dist-x86_64-linux-alt
env:
IMAGE: dist-x86_64-linux
CODEGEN_BACKENDS: "llvm,cranelift"
os: ubuntu-20.04-16core-64gb
os: ubuntu-20.04
- name: dist-x86_64-musl
env:
CODEGEN_BACKENDS: "llvm,cranelift"
Expand Down Expand Up @@ -291,7 +286,7 @@ jobs:
- name: x86_64-gnu-integration
env:
CI_ONLY_WHEN_CHANNEL: nightly
os: ubuntu-20.04-16core-64gb
os: ubuntu-20.04
- name: x86_64-gnu-debug
os: ubuntu-20.04-8core-32gb
env: {}
Expand Down Expand Up @@ -561,11 +556,14 @@ jobs:
if: "success() && !env.SKIP_JOB && (github.event_name == 'push' || env.DEPLOY == '1' || env.DEPLOY_ALT == '1')"
try:
name: "try - ${{ matrix.name }}"
permissions:
packages: write
env:
DIST_TRY_BUILD: 1
CI_JOB_NAME: "${{ matrix.name }}"
CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
HEAD_SHA: "${{ github.event.pull_request.head.sha || github.sha }}"
DOCKER_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
SCCACHE_BUCKET: rust-lang-ci-sccache2
DEPLOY_BUCKET: rust-lang-ci2
TOOLSTATE_REPO: "https://github.com/rust-lang-nursery/rust-toolstate"
Expand All @@ -579,10 +577,10 @@ jobs:
strategy:
matrix:
include:
- name: dist-x86_64-linux
- name: quick
env:
CODEGEN_BACKENDS: "llvm,cranelift"
os: ubuntu-20.04-16core-64gb
os: ubuntu-20.04
timeout-minutes: 600
runs-on: "${{ matrix.os }}"
steps:
Expand Down
36 changes: 36 additions & 0 deletions src/ci/docker/host-x86_64/quick/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
FROM ubuntu:23.04

ARG DEBIAN_FRONTEND=noninteractive

RUN apt-get update && apt-get install -y --no-install-recommends \
make \
file \
curl \
ca-certificates \
python3 \
git && rm -rf /var/lib/apt/lists/*

# Note: libgccjit needs to match the default gcc version for the linker to find it.


COPY scripts/sccache.sh /scripts/
RUN sh /scripts/sccache.sh

# We are disabling CI LLVM since this builder is intentionally using a host
# LLVM, rather than the typical src/llvm-project LLVM.
ENV NO_DOWNLOAD_CI_LLVM 1

# This is not the latest LLVM version, so some components required by tests may
# be missing.
ENV IS_NOT_LATEST_LLVM 1

# Using llvm-link-shared due to libffi issues -- see #34486
ENV RUST_CONFIGURE_ARGS \
--build=x86_64-unknown-linux-gnu \
--llvm-root=/usr/lib/llvm-16 \
--enable-llvm-link-shared \
--set rust.thin-lto-import-instr-limit=10

COPY host-x86_64/x86_64-gnu-llvm-16/script.sh /tmp/

ENV SCRIPT /tmp/script.sh
85 changes: 35 additions & 50 deletions src/ci/docker/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -74,25 +74,6 @@ if [ -f "$docker_dir/$image/Dockerfile" ]; then

cksum=$(sha512sum $hash_key | \
awk '{print $1}')

url="https://$CACHE_DOMAIN/docker/$cksum"

echo "Attempting to download $url"
rm -f /tmp/rustci_docker_cache
set +e
retry curl --max-time 600 -y 30 -Y 10 --connect-timeout 30 -f -L -C - \
-o /tmp/rustci_docker_cache "$url"

docker_archive_hash=$(sha512sum /tmp/rustci_docker_cache | awk '{print $1}')
echo "Downloaded archive hash: ${docker_archive_hash}"

echo "Loading images into docker"
# docker load sometimes hangs in the CI, so time out after 10 minutes with TERM,
# KILL after 12 minutes
loaded_images=$(/usr/bin/timeout -k 720 600 docker load -i /tmp/rustci_docker_cache \
| sed 's/.* sha/sha/')
set -e
printf "Downloaded containers:\n$loaded_images\n"
fi

dockerfile="$docker_dir/$image/Dockerfile"
Expand All @@ -103,44 +84,48 @@ if [ -f "$docker_dir/$image/Dockerfile" ]; then
context="$script_dir"
fi
echo "::group::Building docker image for $image"

# As of August 2023, Github Actions have updated Docker to 23.X,
# which uses the BuildKit by default. It currently throws aways all
# intermediate layers, which breaks our usage of S3 layer caching.
# Therefore we opt-in to the old build backend for now.
export DOCKER_BUILDKIT=0
retry docker \
build \
--rm \
-t rust-ci \
-f "$dockerfile" \
"$context"
echo "Image checksum ${cksum}"

# On PR jobs, we don't have permissions to write to the cache, so we should not use
# `docker login` nor caching.
if [ "$PR_CI_JOB" -eq 1 ]
then
docker pull ghcr.io/rust-lang-ci/rust-ci:e933e07d88a3a99bf4260cfb60899ada91f8df72a6588179fcf65ebe7ce824675eb8f2c985515ca3c51f2d0f5c006cb1d9e2fa66af562cdc91537385af559d59
# docker buildx create --use --driver docker-container
# retry docker buildx build --rm -t rust-ci \
# --output=type=docker \
# --cache-from type=registry,ref=ghcr.io/rust-lang-ci/rust-ci:${cksum} \
# -f "$dockerfile" "$context"
else
docker pull ghcr.io/rust-lang-ci/rust-ci:e933e07d88a3a99bf4260cfb60899ada91f8df72a6588179fcf65ebe7ce824675eb8f2c985515ca3c51f2d0f5c006cb1d9e2fa66af562cdc91537385af559d59

docker buildx create --use --driver docker-container

# Login to Docker registry
echo ${DOCKER_TOKEN} | docker login ghcr.io --username rust-lang-ci --password-stdin

dest="type=registry,ref=ghcr.io/rust-lang-ci/rust-ci:${cksum},compression=zstd,mode=max"

retry docker \
buildx \
build \
--rm \
-t rust-ci \
-f "$dockerfile" \
--cache-from type=registry,ref=ghcr.io/rust-lang-ci/rust-ci:${cksum} \
--cache-to ${dest} \
--output=type=docker \
"$context"
docker manifest inspect rust-ci
fi
echo "::endgroup::"

if [ "$CI" != "" ]; then
s3url="s3://$SCCACHE_BUCKET/docker/$cksum"
upload="aws s3 cp - $s3url"
digest=$(docker inspect rust-ci --format '{{.Id}}')
echo "Built container $digest"
if ! grep -q "$digest" <(echo "$loaded_images"); then
echo "Uploading finished image $digest to $url"
set +e
# Print image history for easier debugging of layer SHAs
docker history rust-ci
docker history -q rust-ci | \
grep -v missing | \
xargs docker save | \
gzip | \
$upload
set -e
else
echo "Looks like docker image is the same as before, not uploading"
fi
# Record the container image for reuse, e.g. by rustup.rs builds
info="$dist/image-$image.txt"
mkdir -p "$dist"
echo "$url" >"$info"
echo "$digest" >>"$info"
echo "${cksum}" > "$info"
cat "$info"
fi
elif [ -f "$docker_dir/disabled/$image/Dockerfile" ]; then
Expand Down
20 changes: 8 additions & 12 deletions src/ci/github-actions/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ x--expand-yaml-anchors--remove:
CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
# commit of PR sha or commit sha. `GITHUB_SHA` is not accurate for PRs.
HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
DOCKER_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- &public-variables
SCCACHE_BUCKET: rust-lang-ci-sccache2
Expand Down Expand Up @@ -84,7 +85,7 @@ x--expand-yaml-anchors--remove:
<<: *base-job

- &job-linux-16c
os: ubuntu-20.04-16core-64gb
os: ubuntu-20.04
<<: *base-job

- &job-macos-xl
Expand Down Expand Up @@ -328,23 +329,16 @@ jobs:
strategy:
matrix:
include:
- name: mingw-check
<<: *job-linux-4c

- name: mingw-check-tidy
<<: *job-linux-4c

- name: x86_64-gnu-llvm-16
- name: quick
env:
ENABLE_GCC_CODEGEN: "1"
<<: *job-linux-16c

- name: x86_64-gnu-tools
<<: *job-linux-16c

auto:
<<: *base-ci-job
name: auto - ${{ matrix.name }}
permissions:
packages: write
env:
<<: [*shared-ci-variables, *prod-variables]
if: github.event_name == 'push' && github.ref == 'refs/heads/auto' && github.repository == 'rust-lang-ci/rust'
Expand Down Expand Up @@ -725,6 +719,8 @@ jobs:
try:
<<: *base-ci-job
name: try - ${{ matrix.name }}
permissions:
packages: write
env:
DIST_TRY_BUILD: 1
<<: [*shared-ci-variables, *prod-variables]
Expand All @@ -733,7 +729,7 @@ jobs:
matrix:
include:
- &dist-x86_64-linux
name: dist-x86_64-linux
name: quick
env:
CODEGEN_BACKENDS: llvm,cranelift
<<: *job-linux-16c
Expand Down

0 comments on commit b8ea986

Please sign in to comment.