From f03745d187d0c33b927121d4c8da977789b929ac Mon Sep 17 00:00:00 2001 From: marun Date: Tue, 1 Oct 2024 13:29:25 -0700 Subject: [PATCH] [ci] Add test binary and image to support bootstrap testing (#3352) Signed-off-by: marun Co-authored-by: Stephen Buttolph --- .github/workflows/ci.yml | 9 + .github/workflows/publish_docker_image.yml | 8 + Dockerfile | 3 +- config/viper.go | 5 + go.mod | 29 +- go.sum | 60 +- scripts/build_bootstrap_monitor.sh | 14 + scripts/build_bootstrap_monitor_image.sh | 30 + scripts/build_image.sh | 53 +- scripts/build_test.sh | 2 +- scripts/tests.build_image.sh | 2 +- scripts/tests.e2e.bootstrap_monitor.sh | 69 ++ tests/antithesis/compose.go | 6 +- tests/antithesis/config.go | 9 +- tests/fixture/bootstrapmonitor/README.md | 172 +++++ .../bootstrapmonitor/bootstrap_test_config.go | 157 +++++ .../bootstrap_test_config_test.go | 277 ++++++++ tests/fixture/bootstrapmonitor/cmd/main.go | 118 ++++ tests/fixture/bootstrapmonitor/common.go | 294 +++++++++ .../fixture/bootstrapmonitor/e2e/e2e_test.go | 609 ++++++++++++++++++ tests/fixture/bootstrapmonitor/init.go | 121 ++++ tests/fixture/bootstrapmonitor/wait.go | 196 ++++++ tests/fixture/e2e/helpers.go | 19 + tests/fixture/tmpnet/node_process.go | 31 +- tests/fixture/tmpnet/utils.go | 41 +- 25 files changed, 2269 insertions(+), 65 deletions(-) create mode 100755 scripts/build_bootstrap_monitor.sh create mode 100755 scripts/build_bootstrap_monitor_image.sh create mode 100755 scripts/tests.e2e.bootstrap_monitor.sh create mode 100644 tests/fixture/bootstrapmonitor/README.md create mode 100644 tests/fixture/bootstrapmonitor/bootstrap_test_config.go create mode 100644 tests/fixture/bootstrapmonitor/bootstrap_test_config_test.go create mode 100644 tests/fixture/bootstrapmonitor/cmd/main.go create mode 100644 tests/fixture/bootstrapmonitor/common.go create mode 100644 tests/fixture/bootstrapmonitor/e2e/e2e_test.go create mode 100644 tests/fixture/bootstrapmonitor/init.go create mode 100644 tests/fixture/bootstrapmonitor/wait.go diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 39550d4259f2..d0851d0cf44a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -238,3 +238,12 @@ jobs: run: bash -x scripts/tests.build_antithesis_images.sh env: TEST_SETUP: xsvm + e2e_bootstrap_monitor: + name: Run bootstrap monitor e2e tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/setup-go-for-project + - name: Run e2e tests + shell: bash + run: bash -x ./scripts/tests.e2e.bootstrap_monitor.sh diff --git a/.github/workflows/publish_docker_image.yml b/.github/workflows/publish_docker_image.yml index 2674c429bfd5..38648de3d5df 100644 --- a/.github/workflows/publish_docker_image.yml +++ b/.github/workflows/publish_docker_image.yml @@ -26,4 +26,12 @@ jobs: DOCKER_USERNAME: ${{ secrets.docker_username }} DOCKER_PASS: ${{ secrets.docker_pass }} DOCKER_IMAGE: ${{ secrets.docker_repo }} + BUILD_MULTI_ARCH: 1 run: scripts/build_image.sh + - name: Build and publish bootstrap-monitor image to DockerHub + env: + DOCKER_USERNAME: ${{ secrets.docker_username }} + DOCKER_PASS: ${{ secrets.docker_pass }} + DOCKER_IMAGE: avaplatform/bootstrap-monitor + BUILD_MULTI_ARCH: 1 + run: scripts/build_bootstrap_monitor_image.sh diff --git a/Dockerfile b/Dockerfile index f7fffb848b2d..5a6b58f9c194 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,10 +39,11 @@ RUN [ -d ./build ] && rm -rf ./build/* || true # Build avalanchego. The build environment is configured with build_env.sh from the step # enabling cross-compilation. ARG RACE_FLAG="" +ARG BUILD_SCRIPT=build.sh RUN . ./build_env.sh && \ echo "{CC=$CC, TARGETPLATFORM=$TARGETPLATFORM, BUILDPLATFORM=$BUILDPLATFORM}" && \ export GOARCH=$(echo ${TARGETPLATFORM} | cut -d / -f2) && \ - ./scripts/build.sh ${RACE_FLAG} + ./scripts/${BUILD_SCRIPT} ${RACE_FLAG} # Create this directory in the builder to avoid requiring anything to be executed in the # potentially emulated execution container. diff --git a/config/viper.go b/config/viper.go index cc15ac55c35b..dbccae2fd7f4 100644 --- a/config/viper.go +++ b/config/viper.go @@ -19,6 +19,11 @@ const EnvPrefix = "avago" var DashesToUnderscores = strings.NewReplacer("-", "_") +func EnvVarName(prefix string, key string) string { + // e.g. MY_PREFIX, network-id -> MY_PREFIX_NETWORK_ID + return strings.ToUpper(prefix + "_" + DashesToUnderscores.Replace(key)) +} + // BuildViper returns the viper environment from parsing config file from // default search paths and any parsed command line flags func BuildViper(fs *pflag.FlagSet, args []string) (*viper.Viper, error) { diff --git a/go.mod b/go.mod index 69fc291f6229..5022deb52b78 100644 --- a/go.mod +++ b/go.mod @@ -22,7 +22,7 @@ require ( github.com/google/uuid v1.6.0 github.com/gorilla/mux v1.8.0 github.com/gorilla/rpc v1.2.0 - github.com/gorilla/websocket v1.4.2 + github.com/gorilla/websocket v1.5.0 github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 github.com/holiman/uint256 v1.2.4 github.com/huin/goupnp v1.3.0 @@ -69,6 +69,10 @@ require ( google.golang.org/protobuf v1.34.2 gopkg.in/natefinch/lumberjack.v2 v2.0.0 gopkg.in/yaml.v3 v3.0.1 + k8s.io/api v0.29.0 + k8s.io/apimachinery v0.29.0 + k8s.io/client-go v0.29.0 + k8s.io/utils v0.0.0-20230726121419-3b25d923346b ) require ( @@ -96,6 +100,7 @@ require ( github.com/docker/go-connections v0.4.0 // indirect github.com/docker/go-units v0.5.0 // indirect github.com/dop251/goja v0.0.0-20230806174421-c933cf95e127 // indirect + github.com/emicklei/go-restful/v3 v3.11.0 // indirect github.com/ethereum/c-kzg-4844 v0.4.0 // indirect github.com/frankban/quicktest v1.14.4 // indirect github.com/fsnotify/fsnotify v1.6.0 // indirect @@ -105,11 +110,16 @@ require ( github.com/go-logr/logr v1.4.1 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.3.0 // indirect + github.com/go-openapi/jsonpointer v0.19.6 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.22.3 // indirect github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb // indirect + github.com/google/gnostic-models v0.6.8 // indirect + github.com/google/gofuzz v1.2.0 // indirect github.com/google/pprof v0.0.0-20230207041349-798e818bf904 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 // indirect github.com/hashicorp/go-bexpr v0.1.10 // indirect @@ -117,11 +127,15 @@ require ( github.com/hashicorp/hcl v1.0.0 // indirect github.com/holiman/billy v0.0.0-20240216141850-2abb0c79d3c4 // indirect github.com/holiman/bloomfilter/v2 v2.0.3 // indirect + github.com/imdario/mergo v0.3.16 // indirect github.com/inconshreveable/mousetrap v1.0.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/compress v1.15.15 // indirect github.com/kr/pretty v0.3.1 // indirect github.com/kr/text v0.2.0 // indirect github.com/magiconair/properties v1.8.6 // indirect + github.com/mailru/easyjson v0.7.7 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.17 // indirect github.com/mattn/go-runewidth v0.0.13 // indirect @@ -129,6 +143,11 @@ require ( github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect github.com/mitchellh/pointerstructure v1.2.0 // indirect github.com/mmcloughlin/addchain v0.4.0 // indirect + github.com/moby/spdystream v0.2.0 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect github.com/olekukonko/tablewriter v0.0.5 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/pelletier/go-toml v1.9.5 // indirect @@ -156,11 +175,19 @@ require ( go.opentelemetry.io/otel/metric v1.22.0 // indirect go.opentelemetry.io/proto/otlp v1.0.0 // indirect go.uber.org/multierr v1.11.0 // indirect + golang.org/x/oauth2 v0.16.0 // indirect golang.org/x/sys v0.19.0 // indirect golang.org/x/text v0.14.0 // indirect golang.org/x/tools v0.17.0 // indirect + google.golang.org/appengine v1.6.8 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20240123012728-ef4313101c80 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect + k8s.io/klog/v2 v2.110.1 // indirect + k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect rsc.io/tmplfunc v0.0.3 // indirect + sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect + sigs.k8s.io/yaml v1.3.0 // indirect ) diff --git a/go.sum b/go.sum index 51ab55496511..db421494a6b9 100644 --- a/go.sum +++ b/go.sum @@ -62,6 +62,8 @@ github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156/go.mod h1:Cb/ax github.com/antithesishq/antithesis-sdk-go v0.3.8 h1:OvGoHxIcOXFJLyn9IJQ5DzByZ3YVAWNBc394ObzDRb8= github.com/antithesishq/antithesis-sdk-go v0.3.8/go.mod h1:IUpT2DPAKh6i/YhSbt6Gl3v2yvUZjmKncl7U91fup7E= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/ava-labs/coreth v0.13.8 h1:f14X3KgwHl9LwzfxlN6S4bbn5VA2rhEsNnHaRLSTo/8= github.com/ava-labs/coreth v0.13.8/go.mod h1:t3BSv/eQv0AlDPMfEDCMMoD/jq1RkUsbFzQAFg5qBcE= github.com/ava-labs/ledger-avalanche/go v0.0.0-20240610153809-9c955cc90a95 h1:dOVbtdnZL++pENdTCNZ1nu41eYDQkTML4sWebDnnq8c= @@ -177,6 +179,8 @@ github.com/dop251/goja_nodejs v0.0.0-20210225215109-d91c329300e7/go.mod h1:hn7BA github.com/dop251/goja_nodejs v0.0.0-20211022123610-8dd9abb0616d/go.mod h1:DngW8aVqWbuLRMHItjPUyqdj+HWPvnQe8V8y1nDpIbM= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM= +github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= +github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -216,6 +220,7 @@ github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= @@ -224,6 +229,12 @@ github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab/go.mod h1:/P9AE github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE= github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78= +github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= +github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= +github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= +github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= github.com/go-sourcemap/sourcemap v2.1.3+incompatible h1:W1iEw64niKVGogNgBN3ePyLFfuisuzeidWPMPWmECqU= github.com/go-sourcemap/sourcemap v2.1.3+incompatible/go.mod h1:F8jJfvm2KbVjc5NqelyYJmf/v5J0dwNLS2mL4sNA1Jg= github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= @@ -278,6 +289,8 @@ github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Z github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU= github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= +github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= +github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= @@ -327,8 +340,9 @@ github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB7 github.com/gorilla/rpc v1.2.0 h1:WvvdC2lNeT1SP32zrIce5l0ECBfbAlmrmSBsuc57wfk= github.com/gorilla/rpc v1.2.0/go.mod h1:V4h9r+4sF5HnzqbwIez0fKSpANP0zlYd3qR7p36jkTQ= github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= -github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc= +github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms= @@ -355,6 +369,8 @@ github.com/hydrogen18/memlistener v0.0.0-20200120041712-dcc25e7acd91/go.mod h1:q github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20220319035150-800ac71e25c2/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w= +github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= +github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/imkira/go-interpol v1.1.0/go.mod h1:z0h2/2T3XF8kyEPpRgJ3kmNv+C43p+I/CoI+jC3w2iA= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= @@ -369,9 +385,13 @@ github.com/jackpal/go-nat-pmp v1.0.2 h1:KzKSgb7qkJvOUTqYl9/Hg/me3pWgBmERKrTGD7Bd github.com/jackpal/go-nat-pmp v1.0.2/go.mod h1:QPH045xvCAeXUZOxsnwmrtiCoxIr9eob+4orBN1SBKc= github.com/jessevdk/go-flags v0.0.0-20141203071132-1679536dcc89/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/jrick/logrotate v1.0.0/go.mod h1:LNinyqDIJnpAur+b8yyulnQw/wDuN1+BYKlTRt3OuAQ= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= @@ -408,6 +428,8 @@ github.com/leanovate/gopter v0.2.9/go.mod h1:U2L/78B+KVFIx2VmW6onHJQzXtFb+p5y3y2 github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/magiconair/properties v1.8.6 h1:5ibWZ6iY0NctNGWo87LalDlEZ6R41TqbbDamhfG/Qzo= github.com/magiconair/properties v1.8.6/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.11/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= @@ -441,13 +463,22 @@ github.com/mitchellh/pointerstructure v1.2.0/go.mod h1:BRAsLI5zgXmw97Lf6s25bs8oh github.com/mmcloughlin/addchain v0.4.0 h1:SobOdjm2xLj1KkXN5/n0xTIWyZA2+s99UCY1iPfkHRY= github.com/mmcloughlin/addchain v0.4.0/go.mod h1:A86O+tHqZLMNO4w6ZZ4FlVQEadcoqkyU72HC5wJ4RlU= github.com/mmcloughlin/profile v0.1.1/go.mod h1:IhHD7q1ooxgwTgjxQYkACGA77oFTDdFVejUS1/tS/qU= +github.com/moby/spdystream v0.2.0 h1:cjW1zVyyoiM0T7b6UoySUFqzXMoqRckQtXwGPiBhOM8= +github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0GqbN2Wy8c= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ= github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o= github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= +github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg= github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzEE/Zbp4w= github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= @@ -553,6 +584,7 @@ github.com/status-im/keycard-go v0.2.0 h1:QDLFswOQu1r5jsycloeQh3bVU8n/NatHHaZobt github.com/status-im/keycard-go v0.2.0/go.mod h1:wlp8ZLbsmrF6g6WjugPAx+IzoLrkdf9+mHxBEeo3Hbg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v0.0.0-20161117074351-18a02ba4a312/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.1.5-0.20170601210322-f6abca593680/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= @@ -564,6 +596,7 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/subosito/gotenv v1.3.0 h1:mjC+YW8QpAdXibNi+vNWgzmgBH4+5l5dCXv8cNysBLI= @@ -756,6 +789,8 @@ golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43/go.mod h1:KelEdhl1UZF7XfJ golang.org/x/oauth2 v0.0.0-20201109201403-9fd604954f58/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.16.0 h1:aDkGMBSYxElaoP81NpoUoz2oo2R2wHdZpGToUxfyQrQ= +golang.org/x/oauth2 v0.16.0/go.mod h1:hqZ+0LWXsiVoZpeld6jVt06P3adbS2Uu911W1SsJv2o= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -951,6 +986,8 @@ google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM= +google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds= google.golang.org/genproto v0.0.0-20180518175338-11a468237815/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= @@ -1038,6 +1075,8 @@ gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/go-playground/assert.v1 v1.2.1/go.mod h1:9RXL0bg/zibRAgZUYszZSwO/z8Y/a8bDuhia5mkpMnE= gopkg.in/go-playground/validator.v8 v8.18.2/go.mod h1:RX2a/7Ha8BgOhfk7j780h4/u/RRjR0eouCJSH80/M2Y= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/ini.v1 v1.51.1/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= @@ -1049,6 +1088,7 @@ gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWD gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= @@ -1066,6 +1106,18 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= +k8s.io/api v0.29.0 h1:NiCdQMY1QOp1H8lfRyeEf8eOwV6+0xA6XEE44ohDX2A= +k8s.io/api v0.29.0/go.mod h1:sdVmXoz2Bo/cb77Pxi71IPTSErEW32xa4aXwKH7gfBA= +k8s.io/apimachinery v0.29.0 h1:+ACVktwyicPz0oc6MTMLwa2Pw3ouLAfAon1wPLtG48o= +k8s.io/apimachinery v0.29.0/go.mod h1:eVBxQ/cwiJxH58eK/jd/vAk4mrxmVlnpBH5J2GbMeis= +k8s.io/client-go v0.29.0 h1:KmlDtFcrdUzOYrBhXHgKw5ycWzc3ryPX5mQe0SkG3y8= +k8s.io/client-go v0.29.0/go.mod h1:yLkXH4HKMAywcrD82KMSmfYg2DlE8mepPR4JGSo5n38= +k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= +k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= +k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 h1:aVUu9fTY98ivBPKR9Y5w/AuzbMm96cd3YHRTU83I780= +k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00/go.mod h1:AsvuZPBlUDVuCdzJ87iajxtXuR9oktsTctW/R9wwouA= +k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= +k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= launchpad.net/gocheck v0.0.0-20140225173054-000000000087 h1:Izowp2XBH6Ya6rv+hqbceQyw/gSGoXfH/UPoTGduL54= launchpad.net/gocheck v0.0.0-20140225173054-000000000087/go.mod h1:hj7XX3B/0A+80Vse0e+BUHsHMTEhd0O4cpUHr/e/BUM= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= @@ -1073,3 +1125,9 @@ rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= rsc.io/tmplfunc v0.0.3 h1:53XFQh69AfOa8Tw0Jm7t+GV7KZhOi6jzsCzTtKbMvzU= rsc.io/tmplfunc v0.0.3/go.mod h1:AG3sTPzElb1Io3Yg4voV9AGZJuleGAwaVRxL9M49PhA= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= +sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= +sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= diff --git a/scripts/build_bootstrap_monitor.sh b/scripts/build_bootstrap_monitor.sh new file mode 100755 index 000000000000..5a4a86c32faf --- /dev/null +++ b/scripts/build_bootstrap_monitor.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Avalanchego root folder +AVALANCHE_PATH=$( cd "$( dirname "${BASH_SOURCE[0]}" )"; cd .. && pwd ) +# Load the constants +source "$AVALANCHE_PATH"/scripts/constants.sh + +echo "Building bootstrap-monitor..." +go build -ldflags\ + "-X github.com/ava-labs/avalanchego/version.GitCommit=$git_commit $static_ld_flags"\ + -o "$AVALANCHE_PATH/build/bootstrap-monitor"\ + "$AVALANCHE_PATH/tests/fixture/bootstrapmonitor/cmd/"*.go diff --git a/scripts/build_bootstrap_monitor_image.sh b/scripts/build_bootstrap_monitor_image.sh new file mode 100755 index 000000000000..5005fdc57283 --- /dev/null +++ b/scripts/build_bootstrap_monitor_image.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# e.g., +# ./scripts/build_bootstrap_monitor_image.sh # Build local image +# DOCKER_IMAGE=my-bootstrap-monitor ./scripts/build_bootstrap_monitor_image.sh # Build local single arch image with a custom image name +# DOCKER_IMAGE=avaplatform/bootstrap-monitor ./scripts/build_bootstrap_monitor_image.sh # Build and push image to docker hub + +# Builds the image for the bootstrap monitor + +# Directory above this script +AVALANCHE_PATH=$( cd "$( dirname "${BASH_SOURCE[0]}" )"; cd .. && pwd ) + +# Load the constants +source "$AVALANCHE_PATH"/scripts/constants.sh + +# The published name should be 'avaplatform/bootstrap-monitor', but to avoid unintentional pushes it +# is defaulted to 'bootstrap-monitor' (without a repo or registry name) which can only be used to +# create local images. +export DOCKER_IMAGE=${DOCKER_IMAGE:-"bootstrap-monitor"} + +# Skip building the race image +export SKIP_BUILD_RACE=1 + +# Reuse the avalanchego build script for convenience. The image will have a CMD of "./avalanchego", so +# to run the bootstrap monitor will need to specify ./bootstrap-monitor". +# +# TODO(marun) Figure out how to set the CMD for a multi-arch image. +bash -x "${AVALANCHE_PATH}"/scripts/build_image.sh --build-arg BUILD_SCRIPT=build_bootstrap_monitor.sh diff --git a/scripts/build_image.sh b/scripts/build_image.sh index acadc3818baf..70c944193501 100755 --- a/scripts/build_image.sh +++ b/scripts/build_image.sh @@ -3,12 +3,13 @@ set -euo pipefail # e.g., -# ./scripts/build_image.sh # Build local single-arch image -# SKIP_BUILD_RACE=1 ./scripts/build_image.sh # Build local single-arch image but skip building -r image -# DOCKER_IMAGE=myavalanchego ./scripts/build_image.sh # Build local single arch image with a custom image name -# DOCKER_IMAGE=avaplatform/avalanchego ./scripts/build_image.sh # Build and push multi-arch image to docker hub -# DOCKER_IMAGE=localhost:5001/avalanchego ./scripts/build_image.sh # Build and push multi-arch image to private registry -# DOCKER_IMAGE=localhost:5001/myavalanchego ./scripts/build_image.sh # Build and push multi-arch image to private registry with a custom image name +# ./scripts/build_image.sh # Build local single-arch image +# ./scripts/build_image.sh --no-cache # All arguments are provided to `docker buildx build` +# SKIP_BUILD_RACE=1 ./scripts/build_image.sh # Build local single-arch image but skip building -r image +# DOCKER_IMAGE=myavalanchego ./scripts/build_image.sh # Build local single arch image with a custom image name +# DOCKER_IMAGE=avaplatform/avalanchego ./scripts/build_image.sh # Build and push multi-arch image to docker hub +# DOCKER_IMAGE=localhost:5001/avalanchego ./scripts/build_image.sh # Build and push multi-arch image to private registry +# DOCKER_IMAGE=localhost:5001/avalanchego FORCE_TAG_LATEST=1 ./scripts/build_image.sh # Build and push image to private registry with tag `latest` # Multi-arch builds require Docker Buildx and QEMU. buildx should be enabled by # default in the verson of docker included with Ubuntu 22.04, and qemu can be @@ -26,8 +27,12 @@ set -euo pipefail # Directory above this script AVALANCHE_PATH=$( cd "$( dirname "${BASH_SOURCE[0]}" )"; cd .. && pwd ) +# Skip building the race image SKIP_BUILD_RACE="${SKIP_BUILD_RACE:-}" +# Force tagging as latest even if not the master branch +FORCE_TAG_LATEST="${FORCE_TAG_LATEST:-}" + # Load the constants source "$AVALANCHE_PATH"/scripts/constants.sh @@ -39,13 +44,24 @@ fi # The published name should be 'avaplatform/avalanchego', but to avoid unintentional # pushes it is defaulted to 'avalanchego' (without a repo or registry name) which can # only be used to create local images. -DOCKER_IMAGE=${DOCKER_IMAGE:-"avalanchego"} +DOCKER_IMAGE="${DOCKER_IMAGE:-avalanchego}" + +# If set to non-empty, prompts the building of a multi-arch image when the image +# name indicates use of a registry. +# +# A registry is required to build a multi-arch image since a multi-arch image is +# not really an image at all. A multi-arch image (also called a manifest) is +# basically a list of arch-specific images available from the same registry that +# hosts the manifest. Manifests are not supported for local images. +# +# Reference: https://docs.docker.com/build/building/multi-platform/ +BUILD_MULTI_ARCH="${BUILD_MULTI_ARCH:-}" # buildx (BuildKit) improves the speed and UI of builds over the legacy builder and # simplifies creation of multi-arch images. # # Reference: https://docs.docker.com/build/buildkit/ -DOCKER_CMD="docker buildx build" +DOCKER_CMD="docker buildx build ${*}" # The dockerfile doesn't specify the golang version to minimize the # changes required to bump the version. Instead, the golang version is @@ -54,20 +70,17 @@ GO_VERSION="$(go list -m -f '{{.GoVersion}}')" DOCKER_CMD="${DOCKER_CMD} --build-arg GO_VERSION=${GO_VERSION}" if [[ "${DOCKER_IMAGE}" == *"/"* ]]; then - # Build a multi-arch image since the image name includes a slash which indicates - # the use of a registry e.g. + # Default to pushing when the image name includes a slash which indicates the + # use of a registry e.g. # # - dockerhub: [repo]/[image name]:[tag] # - private registry: [private registry hostname]/[image name]:[tag] - # - # A registry is required to build a multi-arch image since a multi-arch image is - # not really an image at all. A multi-arch image (also called a manifest) is - # basically a list of arch-specific images available from the same registry that - # hosts the manifest. Manifests are not supported for local images. - # - # Reference: https://docs.docker.com/build/building/multi-platform/ - PLATFORMS="${PLATFORMS:-linux/amd64,linux/arm64}" - DOCKER_CMD="${DOCKER_CMD} --push --platform=${PLATFORMS}" + DOCKER_CMD="${DOCKER_CMD} --push" + + # Build a multi-arch image if requested + if [[ -n "${BUILD_MULTI_ARCH}" ]]; then + DOCKER_CMD="${DOCKER_CMD} --platform=${PLATFORMS:-linux/amd64,linux/arm64}" + fi # A populated DOCKER_USERNAME env var triggers login if [[ -n "${DOCKER_USERNAME:-}" ]]; then @@ -94,7 +107,7 @@ if [[ -z "${SKIP_BUILD_RACE}" ]]; then fi # Only tag the latest image for the master branch when images are pushed to a registry -if [[ "${DOCKER_IMAGE}" == *"/"* && $image_tag == "master" ]]; then +if [[ "${DOCKER_IMAGE}" == *"/"* && ($image_tag == "master" || -n "${FORCE_TAG_LATEST}") ]]; then echo "Tagging current avalanchego images as $DOCKER_IMAGE:latest" docker buildx imagetools create -t "$DOCKER_IMAGE:latest" "$DOCKER_IMAGE:$commit_hash" fi diff --git a/scripts/build_test.sh b/scripts/build_test.sh index 4a7cbd04f746..1511c351782a 100755 --- a/scripts/build_test.sh +++ b/scripts/build_test.sh @@ -7,7 +7,7 @@ AVALANCHE_PATH=$( cd "$( dirname "${BASH_SOURCE[0]}" )"; cd .. && pwd ) # Load the constants source "$AVALANCHE_PATH"/scripts/constants.sh -EXCLUDED_TARGETS="| grep -v /mocks | grep -v proto | grep -v tests/e2e | grep -v tests/upgrade" +EXCLUDED_TARGETS="| grep -v /mocks | grep -v proto | grep -v tests/e2e | grep -v tests/upgrade | grep -v tests/fixture/bootstrapmonitor/e2e" if [[ "$(go env GOOS)" == "windows" ]]; then # Test discovery for the antithesis test setups is broken due to diff --git a/scripts/tests.build_image.sh b/scripts/tests.build_image.sh index a383b3190f96..f4dd81754165 100755 --- a/scripts/tests.build_image.sh +++ b/scripts/tests.build_image.sh @@ -16,7 +16,7 @@ source "$AVALANCHE_PATH"/scripts/constants.sh build_and_test() { local image_name=$1 - DOCKER_IMAGE="$image_name" ./scripts/build_image.sh + BUILD_MULTI_ARCH=1 DOCKER_IMAGE="$image_name" ./scripts/build_image.sh echo "listing images" docker images diff --git a/scripts/tests.e2e.bootstrap_monitor.sh b/scripts/tests.e2e.bootstrap_monitor.sh new file mode 100755 index 000000000000..e0742f9760ac --- /dev/null +++ b/scripts/tests.e2e.bootstrap_monitor.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Run e2e tests for bootstrap monitor. + +if ! [[ "$0" =~ scripts/tests.e2e.bootstrap_monitor.sh ]]; then + echo "must be run from repository root" + exit 255 +fi + +# Determine DIST and ARCH in case installation is required for kubectl and kind +# +# TODO(marun) Factor this out for reuse +if which sw_vers &> /dev/null; then + OS="darwin" + ARCH="$(uname -m)" +else + # Assume linux (windows is not supported) + OS="linux" + RAW_ARCH="$(uname -i)" + # Convert the linux arch string to the string used for k8s releases + if [[ "${RAW_ARCH}" == "aarch64" ]]; then + ARCH="arm64" + elif [[ "${RAW_ARCH}" == "x86_64" ]]; then + ARCH="amd64" + else + echo "Unsupported architecture: ${RAW_ARCH}" + exit 1 + fi +fi + +function ensure_command { + local cmd=$1 + local install_uri=$2 + + if ! command -v "${cmd}" &> /dev/null; then + # Try to use a local version + local local_cmd="${PWD}/bin/${cmd}" + mkdir -p "${PWD}/bin" + if ! command -v "${local_cmd}" &> /dev/null; then + echo "${cmd} not found, attempting to install..." + curl -L -o "${local_cmd}" "${install_uri}" + # TODO(marun) Optionally validate the binary against published checksum + chmod +x "${local_cmd}" + fi + fi +} + +# Ensure the kubectl command is available +KUBECTL_VERSION=v1.30.2 +ensure_command kubectl "https://dl.k8s.io/release/${KUBECTL_VERSION}/bin/${OS}/${ARCH}/kubectl" + +# Ensure the kind command is available +KIND_VERSION=v0.23.0 +ensure_command kind "https://kind.sigs.k8s.io/dl/${KIND_VERSION}/kind-${OS}-${ARCH}" + +# Ensure the kind-with-registry command is available +ensure_command "kind-with-registry.sh" "https://github.com/raw/kubernetes-sigs/kind/7cb9e6be25b48a0e248097eef29d496ab1a044d0/site/static/examples/kind-with-registry.sh" + +# Deploy a kind cluster with a local registry. Include the local bin in the path to +# ensure locally installed kind and kubectl are available since the script expects to +# call them without a qualifying path. +PATH="${PWD}/bin:$PATH" bash -x "${PWD}/bin/kind-with-registry.sh" + +# TODO(marun) Factor out ginkgo installation to avoid duplicating it across test scripts +go install -v github.com/onsi/ginkgo/v2/ginkgo@v2.13.1 + +KUBECONFIG="$HOME/.kube/config" PATH="${PWD}/bin:$PATH" ginkgo -v ./tests/fixture/bootstrapmonitor/e2e diff --git a/tests/antithesis/compose.go b/tests/antithesis/compose.go index f10ffe5471d7..3b25eb48d535 100644 --- a/tests/antithesis/compose.go +++ b/tests/antithesis/compose.go @@ -236,7 +236,7 @@ func newComposeProject(network *tmpnet.Network, nodeImageName string, workloadIm } workloadEnv := types.Mapping{ - envVarName(EnvPrefix, URIsKey): uris.String(), + config.EnvVarName(EnvPrefix, URIsKey): uris.String(), } chainIDs := CSV{} for _, subnet := range network.Subnets { @@ -245,7 +245,7 @@ func newComposeProject(network *tmpnet.Network, nodeImageName string, workloadIm } } if len(chainIDs) > 0 { - workloadEnv[envVarName(EnvPrefix, ChainIDsKey)] = chainIDs.String() + workloadEnv[config.EnvVarName(EnvPrefix, ChainIDsKey)] = chainIDs.String() } workloadName := "workload" @@ -283,7 +283,7 @@ func newComposeProject(network *tmpnet.Network, nodeImageName string, workloadIm func keyMapToEnvVarMap(keyMap types.Mapping) types.Mapping { envVarMap := make(types.Mapping, len(keyMap)) for key, val := range keyMap { - envVar := envVarName(config.EnvPrefix, key) + envVar := config.EnvVarName(config.EnvPrefix, key) envVarMap[envVar] = val } return envVarMap diff --git a/tests/antithesis/config.go b/tests/antithesis/config.go index b7d006e769c5..751cbb33e731 100644 --- a/tests/antithesis/config.go +++ b/tests/antithesis/config.go @@ -55,12 +55,12 @@ func NewConfigWithSubnets(tc tests.TestContext, defaultNetwork *tmpnet.Network, flag.Parse() // Env vars take priority over flags - envURIs := os.Getenv(envVarName(EnvPrefix, URIsKey)) + envURIs := os.Getenv(config.EnvVarName(EnvPrefix, URIsKey)) if len(envURIs) > 0 { //nolint:errcheck // CSV.Set doesn't actually return an error uris.Set(envURIs) } - envChainIDs := os.Getenv(envVarName(EnvPrefix, ChainIDsKey)) + envChainIDs := os.Getenv(config.EnvVarName(EnvPrefix, ChainIDsKey)) if len(envChainIDs) > 0 { //nolint:errcheck // CSV.Set doesn't actually return an error chainIDs.Set(envChainIDs) @@ -126,8 +126,3 @@ func (c *CSV) Set(value string) error { *c = strings.Split(value, ",") return nil } - -func envVarName(prefix string, key string) string { - // e.g. MY_PREFIX, network-id -> MY_PREFIX_NETWORK_ID - return strings.ToUpper(prefix + "_" + config.DashesToUnderscores.Replace(key)) -} diff --git a/tests/fixture/bootstrapmonitor/README.md b/tests/fixture/bootstrapmonitor/README.md new file mode 100644 index 000000000000..bb232da3ed43 --- /dev/null +++ b/tests/fixture/bootstrapmonitor/README.md @@ -0,0 +1,172 @@ +# bootstrap-monitor + +Code rooted at this package implements a `bootstrap-monitor` binary +intended to enable continous bootstrap testing for avalanchego +networks. + +## Bootstrap testing + +Bootstrapping an avalanchego node on a persistent network like +`mainnet` or `fuji` requires that the version of avalanchego that the +node is running be compatible with the historical data of that +network. Bootstrapping regularly is a good way of insuring against +regressions in compatibility. + +### Types of bootstrap testing for C-Chain + +The X-Chain and P-Chain always synchronize all state, but the bulk of +data for testnet and mainnet is on the C-Chain and there are 2 options: + +#### State Sync + +A bootstrap with state sync enabled (the default) ensures that only +recent blocks will be processed. + +#### Full Sync + +All history will be processed, though with pruning (enabled by +default) not all history will be stored. + +To enable, supply `state-sync-enabled: false` as C-Chain configuration. + +## Overview + +The intention of `bootstrap-monitor` is to enable a Kubernetes +[StatefulSet](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) +to perform continous bootstrap testing for a given avalanchego +configuration. It ensures that a testing pod either starts or resumes +a test, and upon completion of a test, polls for a new image to test +and initiates a new test when one is found. + + - Both the `init` and `wait-for-completion` commands of the + `bootstrap-monitor` binary are intended to run as containers of a + pod alongside an avalanchego container. The pod is expected to be + managed by a `StatefulSet` to ensure the pod is restarted on + failure and that only a single pod runs at a time to avoid + contention for the backing data volume. Both commands derive the + configuration of a bootstrap test from the pod: + - The network targeted by the test is determined by the value of + the `AVAGO_NETWORK_NAME` env var set for the avalanchego + container. + - Whether state sync is enabled is determined by the value of the + `AVAGO_CHAIN_CONFIG_CONTENT` env var set for the avalanchego + container. + - The image used by the test is determined by the image configured + for the avalanchego container. + - The versions of the avalanchego image used by the test is + determined by the pod annotation with key + `avalanche.avax.network/avalanchego-versions`. + - When a bootstrap testing pod is inevitably rescheduled or + restarted, the contents of the `PersistentVolumeClaim` configured + by the managing `StatefulSet` will persist across pod restarts to + allow resumption of the interrupted test. + - Both the `init` and `wait-for-completion` commands of the + `bootstrap-monitor` attempt to read serialized test details (namely + the image used for the test and the start time of the test) from + the same data volume used by the avalanchego node. These details + are written by the `init` command when it determines that a new test + is starting. + - The `bootstrap-monitor init` command is intended to run as an + [init + container](https://kubernetes.io/docs/concepts/workloads/pods/init-containers/) + of an avalanchego node and ensure that the ID of the image and its + associated versions are recorded for the test and that the contents + of the pod's data volume is either cleared for a new test or + retained to enable resuming a previously started test. It + accomplishes this by: + - Mounting the same data volume as the avalanchego node + - Reading bootstrap test configuration as described previously + - Determining the image ID and versions for an image if the + avalanchego image for the pod uses the `latest` tag. This will + only need to be performed the first pod that a bootstrap testing + `StatefulSet` runs. Subsequent pods from the same `StatefulSet` + should have an image qualified with its SHA and version details + set by the previous test run's `wait-for-completion` pod. + - A new pod will be started with the `latest` image to execute + `avalanchego --versions-json` to determine the image ID (which + includes a sha256 hash) of the image and its avalanchego + versions. Those values will then be applied to the `StatefulSet` + managing the pod which will prompt pod deletion and recreation + with the updated values. This ensures that a test result can be + associated with both a specific image SHA and the avalanchego + versions (including commit hash) of the binary that the image + provides. + - A separate pod is used because the image ID of a non-init + avalanchego container using a `latest`-tagged image is only + available when that container runs rather than when an init container runs. + - While it would be possible to add an init container running the + same avalanchego image as the primary avalanchego container, + have it run the version command, and then have a subsequent + `bootstrap-monitor init` container read those results, the use + of a separate pod for SHA and versions discovery would still be + required by the `wait-for-completion` command. It seemed + preferable to have only a single way to discover image details. + - Attempting to read the serialized test details from a file on the + data volume. This file will not exist if the data volume has not + been used before. + - Comparing the image from the serialized test details to the image + in the test configuration. + - If the images differ (or the file was not present), the data + volume is initialized for a new test: + - The data volume is cleared + - The image from the test configuration and the time are + serialized to a file on the data volume + - If the images are the same, the data volume is used as-is to + enable resuming an in-progress test. + - `bootstrap-monitor wait-for-completion` is intended to run as a + sidecar of the avalanchego container. It polls the health of the + node container to detect when a bootstrap test has completed + successfully and then polls for a new image to test. When a new + image is found, the managing `StatefulSet` is updated with the + details of the image to trigger a new test. The process to detect a + new image is the same as was described for the `init` command. + +## Package details + +| Filename | Purpose | +|:-------------------------|:--------------------------------------------------------------------------------------------| +| bootstrap_test_config.go | Defines how the configuration for a bootstrap test is read from a pod | +| common.go | Defines code common between init and wait | +| init.go | Defines how a bootstrap test is initialized | +| wait.go | Defines how a bootstrap test is determined to have completed and how a new one is initiated | +| cmd/main.go | The binary entrypoint for the `bootstrap-monitor` | +| e2e/e2e_test.go | The e2e test that validates `bootstrap-monitor` | + +## Supporting files + +| Filename | Purpose | +|:-----------------------------------------|:--------------------------------------------------| +| scripts/build_bootstrap_monitor.sh | Builds the `bootstrap-monitor` binary | +| scripts/build_bootstrap_monitor_image.sh | Builds the image for the `bootstrap-monitor` | +| scripts/tests.e2e.bootstrap_monitor.go | Script for running the `bootstrap-monitor` e2e test | + + - The test script is used by the github action workflow that + validates the `bootstrap-monitor` binary and image. + - The image build script is used by the github action workflow that + publishes repo images post-merge. + +## Alternatives considered + +### Run bootstrap tests on github workers + + - Public github workers are not compatible with bootstrap testing due +to the available storage of 30GB being insufficient for even state +sync bootstrap. + - Self-hosted github workers are not compatible with bootstrap testing +due to the 5 day maximum duration for a job running on a self-hosted +runner. State sync bootstrap usually completes within 5 days, but full +sync bootstrap usually takes much longer. + +### Adding a 'bootstrap mode' to avalanchego + +If avalanchego supported a `--bootstrap-mode` flag that exited on +successful bootstrap, and a pod configured with this flag used an +image with a `latest` tag, the pod would continously bootstrap, exit, +and restart with the current latest image. While appealingly simple, +this approach doesn't directly support: + + - A mechanism for resuming a long-running bootstrap. Given the +expected duration of a bootstrap test, and the fact that a workload on +Kubernetes is not guaranteed to run without interruption, a separate +init process is suggested to enable resumption of an interrupted test. +- A mechanism for reporting disk usage and duration of execution diff --git a/tests/fixture/bootstrapmonitor/bootstrap_test_config.go b/tests/fixture/bootstrapmonitor/bootstrap_test_config.go new file mode 100644 index 000000000000..6f79669a9902 --- /dev/null +++ b/tests/fixture/bootstrapmonitor/bootstrap_test_config.go @@ -0,0 +1,157 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package bootstrapmonitor + +import ( + "context" + "encoding/base64" + "encoding/json" + "errors" + "fmt" + + "github.com/spf13/cast" + "k8s.io/client-go/kubernetes" + + "github.com/ava-labs/avalanchego/chains" + "github.com/ava-labs/avalanchego/config" + "github.com/ava-labs/avalanchego/version" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const VersionsAnnotationKey = "avalanche.avax.network/avalanchego-versions" + +var ( + chainConfigContentEnvName = config.EnvVarName(config.EnvPrefix, config.ChainConfigContentKey) + networkEnvName = config.EnvVarName(config.EnvPrefix, config.NetworkNameKey) + + // Errors for bootstrapTestConfigForPod + errContainerNotFound = errors.New("container not found") + errInvalidNetworkEnvVar = fmt.Errorf("missing or empty %s env var", networkEnvName) + errFailedToUnmarshalAnnoation = errors.New("failed to unmarshal versions annotation") + + // Errors for stateSyncEnabledFromEnvVars + errFailedToDecodeChainConfigContent = errors.New("failed to decode chain config content") + errFailedToUnmarshalChainConfigContent = errors.New("failed to unmarshal chain config content") + errFailedToUnmarshalCChainConfig = errors.New("failed to unmarshal C-Chain config") + errFailedToCastToBool = errors.New("failed to cast to bool") +) + +type BootstrapTestConfig struct { + Network string `json:"network"` + StateSyncEnabled bool `json:"stateSyncEnabled"` + Image string `json:"image"` + Versions *version.Versions `json:"versions,omitempty"` +} + +// GetBootstrapTestConfigFromPod extracts the bootstrap test configuration from the specified pod. +func GetBootstrapTestConfigFromPod(ctx context.Context, clientset *kubernetes.Clientset, namespace string, podName string, nodeContainerName string) (*BootstrapTestConfig, error) { + pod, err := clientset.CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get pod %s.%s: %w", namespace, podName, err) + } + return bootstrapTestConfigForPod(pod, nodeContainerName) +} + +// bootstrapTestConfigForPod collects the details for a bootstrap test configuration from the provided pod. +func bootstrapTestConfigForPod(pod *corev1.Pod, nodeContainerName string) (*BootstrapTestConfig, error) { + // Find the node container + var nodeContainer corev1.Container + for _, container := range pod.Spec.Containers { + if container.Name == nodeContainerName { + nodeContainer = container + break + } + } + if len(nodeContainer.Name) == 0 { + return nil, fmt.Errorf("%w: %s", errContainerNotFound, nodeContainerName) + } + + // Get the network ID from the container's environment + var network string + for _, envVar := range nodeContainer.Env { + if envVar.Name == networkEnvName { + network = envVar.Value + break + } + } + if len(network) == 0 { + return nil, fmt.Errorf("%w in container %q", errInvalidNetworkEnvVar, nodeContainerName) + } + + // Determine whether state sync is enabled in the env vars + stateSyncEnabled, err := stateSyncEnabledFromEnvVars(nodeContainer.Env) + if err != nil { + return nil, err + } + + testConfig := &BootstrapTestConfig{ + Network: network, + StateSyncEnabled: stateSyncEnabled, + Image: nodeContainer.Image, + } + + // Attempt to retrieve the image versions from a pod annotation. The annotation may not be populated in + // the case of a newly-created bootstrap test using an image tagged `latest` that hasn't yet had a + // chance to discover the versions. + if versionsAnnotation := pod.Annotations[VersionsAnnotationKey]; len(versionsAnnotation) > 0 { + if err := json.Unmarshal([]byte(versionsAnnotation), &testConfig.Versions); err != nil { + return nil, fmt.Errorf("%w: %w", errFailedToUnmarshalAnnoation, err) + } + } + + return testConfig, nil +} + +// stateSyncEnabledFromEnvVars determines whether the env vars configure state sync for a +// node container. State sync is assumed to be enabled if the chain config content is +// missing, does not contain C-Chain configuration, or the C-Chain configuration does not +// configure state-sync-enabled. +func stateSyncEnabledFromEnvVars(env []corev1.EnvVar) (bool, error) { + // Look for chain config content in the env vars + var encodedChainConfigContent string + for _, envVar := range env { + if envVar.Name == chainConfigContentEnvName { + encodedChainConfigContent = envVar.Value + break + } + } + + if len(encodedChainConfigContent) == 0 { + return true, nil + } + + // Attempt to unmarshal + var chainConfigs map[string]chains.ChainConfig + chainConfigContent, err := base64.StdEncoding.DecodeString(encodedChainConfigContent) + if err != nil { + return false, fmt.Errorf("%w: %w", errFailedToDecodeChainConfigContent, err) + } + if err := json.Unmarshal(chainConfigContent, &chainConfigs); err != nil { + return false, fmt.Errorf("%w: %w", errFailedToUnmarshalChainConfigContent, err) + } + + cChainConfig, ok := chainConfigs["C"] + if !ok { + return true, nil + } + + // Attempt to unmarshal the C-Chain config + var cChainConfigMap map[string]any + if err := json.Unmarshal(cChainConfig.Config, &cChainConfigMap); err != nil { + return false, fmt.Errorf("%w: %w", errFailedToUnmarshalCChainConfig, err) + } + + // Attempt to read the value from the C-Chain config + rawStateSyncEnabled, ok := cChainConfigMap["state-sync-enabled"] + if !ok { + return true, nil + } + stateSyncEnabled, err := cast.ToBoolE(rawStateSyncEnabled) + if err != nil { + return false, fmt.Errorf("%w (%v): %w", errFailedToCastToBool, rawStateSyncEnabled, err) + } + return stateSyncEnabled, nil +} diff --git a/tests/fixture/bootstrapmonitor/bootstrap_test_config_test.go b/tests/fixture/bootstrapmonitor/bootstrap_test_config_test.go new file mode 100644 index 000000000000..31226b65b838 --- /dev/null +++ b/tests/fixture/bootstrapmonitor/bootstrap_test_config_test.go @@ -0,0 +1,277 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package bootstrapmonitor + +import ( + "encoding/base64" + "encoding/json" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/ava-labs/avalanchego/chains" + "github.com/ava-labs/avalanchego/version" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestBootstrapTestConfigForPod(t *testing.T) { + networkName := "network" + nodeContainerName := "avago" + imageName := "image" + validVersionsString := `{"application": "avalanchego/1.11.11", "database": "v1.4.5", "rpcchainvm": 37, "commit": "5bcfb0fb30cc311adb22173daabb56eae736fac3","go": "1.21.12" }` + invalidVersionsString := "invalid" + + tests := []struct { + name string + pod *corev1.Pod + expectedConfig *BootstrapTestConfig + expectedErr error + }{ + { + name: "container not found", + pod: &corev1.Pod{ + Spec: corev1.PodSpec{}, + }, + expectedErr: errContainerNotFound, + }, + { + name: "missing network id env var", + pod: &corev1.Pod{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: nodeContainerName, + }, + }, + }, + }, + expectedErr: errInvalidNetworkEnvVar, + }, + { + name: "valid configuration without versions and state sync disabled", + pod: &corev1.Pod{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: nodeContainerName, + Image: imageName, + Env: []corev1.EnvVar{ + { + Name: networkEnvName, + Value: networkName, + }, + { + Name: chainConfigContentEnvName, + // Sets state-sync-enabled:false for the C-Chain + Value: "eyJDIjp7IkNvbmZpZyI6ImV5SnpkR0YwWlMxemVXNWpMV1Z1WVdKc1pXUWlPbVpoYkhObGZRPT0iLCJVcGdyYWRlIjpudWxsfX0=", + }, + }, + }, + }, + }, + }, + expectedConfig: &BootstrapTestConfig{ + Network: networkName, + StateSyncEnabled: false, + Image: imageName, + }, + }, + { + name: "valid configuration with valid versions", + pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + VersionsAnnotationKey: validVersionsString, + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: nodeContainerName, + Image: imageName, + Env: []corev1.EnvVar{ + { + Name: networkEnvName, + Value: networkName, + }, + }, + }, + }, + }, + }, + expectedConfig: &BootstrapTestConfig{ + Network: networkName, + StateSyncEnabled: true, + Image: imageName, + Versions: &version.Versions{ + Application: "avalanchego/1.11.11", + Database: "v1.4.5", + RPCChainVM: 37, + Commit: "5bcfb0fb30cc311adb22173daabb56eae736fac3", + Go: "1.21.12", + }, + }, + }, + { + name: "invalid configuration due to invalid versions", + pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + VersionsAnnotationKey: invalidVersionsString, + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: nodeContainerName, + Env: []corev1.EnvVar{ + { + Name: networkEnvName, + Value: networkName, + }, + }, + }, + }, + }, + }, + expectedErr: errFailedToUnmarshalAnnoation, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + require := require.New(t) + + config, err := bootstrapTestConfigForPod(test.pod, nodeContainerName) + require.ErrorIs(err, test.expectedErr) + require.Equal(test.expectedConfig, config) + }) + } +} + +func marshalAndEncode(t *testing.T, chainConfigs map[string]chains.ChainConfig) string { + chainConfigContent, err := json.Marshal(chainConfigs) + require.NoError(t, err) + return base64.StdEncoding.EncodeToString(chainConfigContent) +} + +func TestStateSyncEnabledFromEnvVars(t *testing.T) { + invalidJSON := "asdf" + invalidBase64 := "abc$def" + tests := []struct { + name string + chainConfigContent string + expectedEnabled bool + expectedErr error + }{ + { + name: "no chain config", + chainConfigContent: "", + expectedEnabled: true, + }, + { + name: "no C-Chain config", + chainConfigContent: marshalAndEncode(t, + map[string]chains.ChainConfig{ + "Not-C": {}, + }, + ), + expectedEnabled: true, + }, + { + name: "invalid encoded content", + chainConfigContent: invalidBase64, + expectedErr: errFailedToDecodeChainConfigContent, + }, + { + name: "invalid json content", + chainConfigContent: base64.StdEncoding.EncodeToString([]byte(invalidJSON)), + expectedErr: errFailedToUnmarshalChainConfigContent, + }, + { + name: "invalid C-Chain config", + chainConfigContent: marshalAndEncode(t, + map[string]chains.ChainConfig{ + "C": { + Config: []byte(invalidJSON), + }, + }, + ), + expectedErr: errFailedToUnmarshalCChainConfig, + }, + { + name: "empty C-Chain config", + chainConfigContent: marshalAndEncode(t, + map[string]chains.ChainConfig{ + "C": { + Config: []byte("{}"), + }, + }, + ), + expectedEnabled: true, + }, + { + name: "invalid state sync value", + chainConfigContent: marshalAndEncode(t, + map[string]chains.ChainConfig{ + "C": { + Config: []byte("{\"state-sync-enabled\":1234}"), + }, + }, + ), + expectedErr: errFailedToCastToBool, + }, + { + name: "C-Chain config with state sync enabled", + chainConfigContent: marshalAndEncode(t, + map[string]chains.ChainConfig{ + "C": { + Config: []byte("{\"state-sync-enabled\":true}"), + }, + }, + ), + expectedEnabled: true, + }, + { + name: "C-Chain config with state sync disabled", + chainConfigContent: marshalAndEncode(t, + map[string]chains.ChainConfig{ + "C": { + Config: []byte("{\"state-sync-enabled\":false}"), + }, + }, + ), + expectedEnabled: false, + }, + { + name: "C-Chain config with state sync disabled with string bool", + chainConfigContent: marshalAndEncode(t, + map[string]chains.ChainConfig{ + "C": { + Config: []byte("{\"state-sync-enabled\":\"false\"}"), + }, + }, + ), + expectedEnabled: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + require := require.New(t) + + env := []corev1.EnvVar{ + { + Name: chainConfigContentEnvName, + Value: test.chainConfigContent, + }, + } + enabled, err := stateSyncEnabledFromEnvVars(env) + require.ErrorIs(err, test.expectedErr) + require.Equal(test.expectedEnabled, enabled) + }) + } +} diff --git a/tests/fixture/bootstrapmonitor/cmd/main.go b/tests/fixture/bootstrapmonitor/cmd/main.go new file mode 100644 index 000000000000..9dfe08279256 --- /dev/null +++ b/tests/fixture/bootstrapmonitor/cmd/main.go @@ -0,0 +1,118 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package main + +import ( + "errors" + "fmt" + "os" + "time" + + "github.com/spf13/cobra" + "go.uber.org/zap" + + "github.com/ava-labs/avalanchego/tests/fixture/bootstrapmonitor" + "github.com/ava-labs/avalanchego/utils/logging" + "github.com/ava-labs/avalanchego/version" +) + +const ( + cliVersion = "0.0.1" + commandName = "bootstrap-monitor" + + defaultHealthCheckInterval = 1 * time.Minute + defaultImageCheckInterval = 5 * time.Minute +) + +func main() { + var ( + namespace string + podName string + nodeContainerName string + dataDir string + ) + rootCmd := &cobra.Command{ + Use: commandName, + Short: commandName + " commands", + } + rootCmd.PersistentFlags().StringVar(&namespace, "namespace", os.Getenv("POD_NAMESPACE"), "The namespace of the pod") + rootCmd.PersistentFlags().StringVar(&podName, "pod-name", os.Getenv("POD_NAME"), "The name of the pod") + rootCmd.PersistentFlags().StringVar(&nodeContainerName, "node-container-name", "", "The name of the node container in the pod") + rootCmd.PersistentFlags().StringVar(&dataDir, "data-dir", "", "The path of the data directory used for the bootstrap job") + + versionCmd := &cobra.Command{ + Use: "version", + Short: "Print version details", + RunE: func(*cobra.Command, []string) error { + msg := cliVersion + if len(version.GitCommit) > 0 { + msg += ", commit=" + version.GitCommit + } + fmt.Fprintf(os.Stdout, msg+"\n") + return nil + }, + } + rootCmd.AddCommand(versionCmd) + + // Use avalanchego logger for consistency + log := logging.NewLogger("", logging.NewWrappedCore(logging.Verbo, os.Stdout, logging.Plain.ConsoleEncoder())) + + initCmd := &cobra.Command{ + Use: "init", + Short: "Initialize a new bootstrap test", + RunE: func(*cobra.Command, []string) error { + if err := checkArgs(namespace, podName, nodeContainerName, dataDir); err != nil { + return err + } + return bootstrapmonitor.InitBootstrapTest(log, namespace, podName, nodeContainerName, dataDir) + }, + } + rootCmd.AddCommand(initCmd) + + var ( + healthCheckInterval time.Duration + imageCheckInterval time.Duration + ) + waitCmd := &cobra.Command{ + Use: "wait-for-completion", + Short: "Wait for the local node to report healthy indicating completion of bootstrapping", + RunE: func(*cobra.Command, []string) error { + if err := checkArgs(namespace, podName, nodeContainerName, dataDir); err != nil { + return err + } + if healthCheckInterval <= 0 { + return errors.New("--health-check-interval must be greater than 0") + } + if imageCheckInterval <= 0 { + return errors.New("--image-check-interval must be greater than 0") + } + return bootstrapmonitor.WaitForCompletion(log, namespace, podName, nodeContainerName, dataDir, healthCheckInterval, imageCheckInterval) + }, + } + waitCmd.PersistentFlags().DurationVar(&healthCheckInterval, "health-check-interval", defaultHealthCheckInterval, "The interval at which to check for node health") + waitCmd.PersistentFlags().DurationVar(&imageCheckInterval, "image-check-interval", defaultImageCheckInterval, "The interval at which to check for a new image") + rootCmd.AddCommand(waitCmd) + + if err := rootCmd.Execute(); err != nil { + log.Error(commandName+" failed", zap.Error(err)) + os.Exit(1) + } + os.Exit(0) +} + +func checkArgs(namespace string, podName string, nodeContainerName string, dataDir string) error { + if len(namespace) == 0 { + return errors.New("--namespace is required") + } + if len(podName) == 0 { + return errors.New("--pod-name is required") + } + if len(nodeContainerName) == 0 { + return errors.New("--node-container-name is required") + } + if len(dataDir) == 0 { + return errors.New("--data-dir is required") + } + return nil +} diff --git a/tests/fixture/bootstrapmonitor/common.go b/tests/fixture/bootstrapmonitor/common.go new file mode 100644 index 000000000000..e67736140504 --- /dev/null +++ b/tests/fixture/bootstrapmonitor/common.go @@ -0,0 +1,294 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package bootstrapmonitor + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "strings" + "time" + + "go.uber.org/zap" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" + + "github.com/ava-labs/avalanchego/utils/logging" + "github.com/ava-labs/avalanchego/version" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + restclient "k8s.io/client-go/rest" +) + +// Path to write the details to on the data volume +func getTestDetailsPath(dataDir string) string { + return filepath.Join(dataDir, "bootstrap_test_details.txt") +} + +// Used to serialize test details to the data volume used for a given test to +// support resuming a previously started test and tracking test duration. +type bootstrapTestDetails struct { + Image string `json:"image"` + StartTime time.Time `json:"startTime"` +} + +// WaitForPodCondition watches the specified pod until the status includes the specified condition. +func WaitForPodCondition(ctx context.Context, clientset *kubernetes.Clientset, namespace string, podName string, conditionType corev1.PodConditionType) error { + return waitForPodStatus( + ctx, + clientset, + namespace, + podName, + func(status *corev1.PodStatus) bool { + for _, condition := range status.Conditions { + if condition.Type == conditionType && condition.Status == corev1.ConditionTrue { + return true + } + } + return false + }, + ) +} + +// waitForPodStatus watches the specified pod until the status is deemed acceptable by the provided test function. +func waitForPodStatus( + ctx context.Context, + clientset *kubernetes.Clientset, + namespace string, + name string, + acceptable func(*corev1.PodStatus) bool, +) error { + watch, err := clientset.CoreV1().Pods(namespace).Watch(ctx, metav1.SingleObject(metav1.ObjectMeta{Name: name})) + if err != nil { + return fmt.Errorf("failed to initiate watch of pod %s/%s: %w", namespace, name, err) + } + + for { + select { + case event := <-watch.ResultChan(): + pod, ok := event.Object.(*corev1.Pod) + if !ok { + continue + } + + if acceptable(&pod.Status) { + return nil + } + case <-ctx.Done(): + return fmt.Errorf("timeout waiting for pod readiness: %w", ctx.Err()) + } + } +} + +// setImageDetails updates the pod's owning statefulset with the image of the specified container and associated version details +func setImageDetails(ctx context.Context, log logging.Logger, clientset *kubernetes.Clientset, namespace string, podName string, imageDetails *ImageDetails) error { + // Determine the name of the statefulset to update + pod, err := clientset.CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get pod %s.%s: %w", namespace, podName, err) + } + if len(pod.OwnerReferences) != 1 { + return errors.New("pod does not have exactly one owner reference") + } + ownerReference := pod.OwnerReferences[0] + if ownerReference.Kind != "StatefulSet" { + return errors.New("unexpected owner reference kind: " + ownerReference.Kind) + } + statefulSetName := ownerReference.Name + + // Marshal the versions to JSON + versionJSONBytes, err := json.Marshal(imageDetails.Versions) + if err != nil { + return fmt.Errorf("failed to marshal versions: %w", err) + } + + // Create the JSON patch + patchData := []map[string]interface{}{ + { + "op": "replace", + "path": "/spec/template/spec/containers/0/image", + "value": imageDetails.Image, + }, + { + "op": "replace", + "path": "/spec/template/metadata/annotations/" + strings.ReplaceAll(VersionsAnnotationKey, "/", "~1"), + "value": string(versionJSONBytes), + }, + } + + // Convert patch data to JSON + patchBytes, err := json.Marshal(patchData) + if err != nil { + return fmt.Errorf("failed to marshal patch data: %w", err) + } + + // Apply the patch + _, err = clientset.AppsV1().StatefulSets(namespace).Patch(context.TODO(), statefulSetName, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) + if err != nil { + return fmt.Errorf("failed to patch statefulset %s.%s: %w", namespace, statefulSetName, err) + } + log.Info("Updated statefulset to target new image", + zap.String("namespace", namespace), + zap.String("statefulSetName", statefulSetName), + zap.String("image", imageDetails.Image), + zap.Reflect("versions", imageDetails.Versions), + ) + return nil +} + +// getBaseImageName removes the tag from the image name +func getBaseImageName(log logging.Logger, imageName string) (string, error) { + if strings.Contains(imageName, "@") { + // Image name contains a digest, remove it + return strings.Split(imageName, "@")[0], nil + } + + imageNameParts := strings.Split(imageName, ":") + switch len(imageNameParts) { + case 1: + // No tag or registry + return imageName, nil + case 2: + // Ambiguous image name - could contain a tag or a registry + log.Info("Derived tag-less image name from string", + zap.String("tagLessImageName", imageNameParts[0]), + zap.String("imageName", imageName), + ) + return imageNameParts[0], nil + case 3: + // Image name contains a registry and a tag - remove the tag + return strings.Join(imageNameParts[0:2], ":"), nil + default: + return "", fmt.Errorf("unexpected image name format: %q", imageName) + } +} + +type ImageDetails struct { + Image string + Versions *version.Versions +} + +// GetLatestImageDetails retrieves the image details for the avalanchego image with tag `latest`. +func getLatestImageDetails( + ctx context.Context, + log logging.Logger, + clientset *kubernetes.Clientset, + namespace string, + imageName string, + containerName string, +) (*ImageDetails, error) { + baseImageName, err := getBaseImageName(log, imageName) + if err != nil { + return nil, err + } + + // Start a new pod with the `latest`-tagged avalanchego image to discover its image ID + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "avalanchego-version-check-", + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: containerName, + Command: []string{"./avalanchego"}, + Args: []string{"--version-json"}, + Image: baseImageName + ":latest", + }, + }, + RestartPolicy: corev1.RestartPolicyNever, + }, + } + createdPod, err := clientset.CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to start pod %w", err) + } + qualifiedPodName := fmt.Sprintf("%s.%s", namespace, createdPod.Name) + + err = waitForPodStatus(ctx, clientset, namespace, createdPod.Name, func(status *corev1.PodStatus) bool { + return status.Phase == corev1.PodSucceeded || status.Phase == corev1.PodFailed + }) + if err != nil { + return nil, fmt.Errorf("failed to wait for pod %s to terminate: %w", qualifiedPodName, err) + } + + terminatedPod, err := clientset.CoreV1().Pods(namespace).Get(ctx, createdPod.Name, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to retrieve terminated pod %s: %w", qualifiedPodName, err) + } + + // Get the image id for the avalanchego image + imageID := "" + for _, status := range terminatedPod.Status.ContainerStatuses { + if status.Name == containerName { + imageID = status.ImageID + break + } + } + if len(imageID) == 0 { + return nil, fmt.Errorf("failed to get image id for pod %s", qualifiedPodName) + } + + // Get the logs for the pod + req := clientset.CoreV1().Pods(namespace).GetLogs(createdPod.Name, &corev1.PodLogOptions{ + Container: containerName, + }) + logStream, err := req.Stream(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get logs for pod %s: %w", qualifiedPodName, err) + } + defer logStream.Close() + logs, err := io.ReadAll(logStream) + if err != nil { + return nil, fmt.Errorf("failed to read logs for pod %s: %w", qualifiedPodName, err) + } + + // Attempt to unmarshal the logs to a Versions instance + versions := &version.Versions{} + if err := json.Unmarshal(logs, versions); err != nil { + return nil, fmt.Errorf("failed to unmarshal logs for pod %s: %w", qualifiedPodName, err) + } + + // Only delete the pod if successful to aid in debugging + err = clientset.CoreV1().Pods(namespace).Delete(ctx, createdPod.Name, metav1.DeleteOptions{}) + if err != nil { + return nil, err + } + + return &ImageDetails{ + Image: imageID, + Versions: versions, + }, nil +} + +func getClientset(log logging.Logger) (*kubernetes.Clientset, error) { + log.Info("Initializing clientset") + kubeconfigPath := os.Getenv("KUBECONFIG") + var ( + kubeconfig *restclient.Config + err error + ) + if len(kubeconfigPath) > 0 { + // Only use BuildConfigFromFlags if a path is provided to avoid the warning logs that + // will be omitted in a format that differs from the avalanchego format. + if kubeconfig, err = clientcmd.BuildConfigFromFlags("", kubeconfigPath); err != nil { + return nil, fmt.Errorf("failed to build kubeconfig: %w", err) + } + } else { + if kubeconfig, err = restclient.InClusterConfig(); err != nil { + return nil, fmt.Errorf("failed to build kubeconfig: %w", err) + } + } + clientset, err := kubernetes.NewForConfig(kubeconfig) + if err != nil { + return nil, fmt.Errorf("failed to create clientset: %w", err) + } + return clientset, nil +} diff --git a/tests/fixture/bootstrapmonitor/e2e/e2e_test.go b/tests/fixture/bootstrapmonitor/e2e/e2e_test.go new file mode 100644 index 000000000000..3ed7ebd6b60e --- /dev/null +++ b/tests/fixture/bootstrapmonitor/e2e/e2e_test.go @@ -0,0 +1,609 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package e2e + +import ( + "bufio" + "flag" + "fmt" + "io" + "net/http" + "net/url" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + + "github.com/onsi/ginkgo/v2" + "github.com/stretchr/testify/require" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" + "k8s.io/client-go/tools/portforward" + "k8s.io/client-go/transport/spdy" + "k8s.io/utils/pointer" + + "github.com/ava-labs/avalanchego/api/info" + "github.com/ava-labs/avalanchego/config" + "github.com/ava-labs/avalanchego/ids" + "github.com/ava-labs/avalanchego/tests" + "github.com/ava-labs/avalanchego/tests/fixture/bootstrapmonitor" + "github.com/ava-labs/avalanchego/tests/fixture/e2e" + "github.com/ava-labs/avalanchego/tests/fixture/tmpnet" + "github.com/ava-labs/avalanchego/utils/constants" + "github.com/ava-labs/avalanchego/utils/logging" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + restclient "k8s.io/client-go/rest" +) + +func TestE2E(t *testing.T) { + ginkgo.RunSpecs(t, "bootstrap test suite") +} + +const ( + // The relative path to the repo root enables discovery of the + // repo root when the test is executed from the root or the path + // of this file. + repoRelativePath = "tests/fixture/bootstrapmonitor/e2e" + + avalanchegoImage = "localhost:5001/avalanchego" + latestAvalanchegoImage = avalanchegoImage + ":latest" + monitorImage = "localhost:5001/bootstrap-monitor" + latestMonitorImage = monitorImage + ":latest" + + initContainerName = "init" + monitorContainerName = "monitor" + nodeContainerName = "avago" + + volumeSize = "128Mi" + volumeName = "data" + + dataDir = "/data" +) + +var ( + skipAvalanchegoImageBuild bool + skipMonitorImageBuild bool + + nodeDataDir = bootstrapmonitor.NodeDataDir(dataDir) // Use a subdirectory of the data path so that os.RemoveAll can be used when starting a new test +) + +func init() { + flag.BoolVar( + &skipAvalanchegoImageBuild, + "skip-avalanchego-image-build", + false, + "whether to skip building the avalanchego image", + ) + flag.BoolVar( + &skipMonitorImageBuild, + "skip-monitor-image-build", + false, + "whether to skip building the bootstrap-monitor image", + ) +} + +var _ = ginkgo.Describe("[Bootstrap Tester]", func() { + const () + + ginkgo.It("should support continuous testing of node bootstrap", func() { + tc := e2e.NewTestContext() + require := require.New(tc) + + if skipAvalanchegoImageBuild { + tc.Outf("{{yellow}}skipping build of avalanchego image{{/}}\n") + } else { + ginkgo.By("Building the avalanchego image") + buildAvalanchegoImage(tc, avalanchegoImage, false /* forceNewHash */) + } + + if skipMonitorImageBuild { + tc.Outf("{{yellow}}skipping build of bootstrap-monitor image{{/}}\n") + } else { + ginkgo.By("Building the bootstrap-monitor image") + buildImage(tc, monitorImage, false /* forceNewHash */, "build_bootstrap_monitor_image.sh") + } + + ginkgo.By("Configuring a kubernetes client") + kubeconfigPath := os.Getenv("KUBECONFIG") + kubeconfig, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath) + require.NoError(err) + clientset, err := kubernetes.NewForConfig(kubeconfig) + require.NoError(err) + + ginkgo.By("Creating a kube namespace to ensure isolation between test runs") + createdNamespace, err := clientset.CoreV1().Namespaces().Create(tc.DefaultContext(), &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "bootstrap-test-e2e-", + }, + }, metav1.CreateOptions{}) + require.NoError(err) + namespace := createdNamespace.Name + ginkgo.By(fmt.Sprintf("Created namespace %q", namespace)) + + ginkgo.By("Creating a node to bootstrap from") + nodeStatefulSet := newNodeStatefulSet("avalanchego-node", defaultNodeFlags()) + createdNodeStatefulSet, err := clientset.AppsV1().StatefulSets(namespace).Create(tc.DefaultContext(), nodeStatefulSet, metav1.CreateOptions{}) + require.NoError(err) + nodePodName := createdNodeStatefulSet.Name + "-0" + waitForPodCondition(tc, clientset, namespace, nodePodName, corev1.PodReady) + bootstrapID := waitForNodeHealthy(tc, kubeconfig, namespace, nodePodName) + pod, err := clientset.CoreV1().Pods(namespace).Get(tc.DefaultContext(), nodePodName, metav1.GetOptions{}) + require.NoError(err) + bootstrapIP := pod.Status.PodIP + ginkgo.By(fmt.Sprintf("Created pod %s.%s for %s@a%s", namespace, nodePodName, bootstrapID, bootstrapIP)) + + ginkgo.By("Creating a node that will bootstrap from the first node") + bootstrapStatefulSet := createBootstrapTester(tc, clientset, namespace, bootstrapIP, bootstrapID) + bootstrapPodName := bootstrapStatefulSet.Name + "-0" + waitForPodCondition(tc, clientset, namespace, bootstrapPodName, corev1.PodReadyToStartContainers) + ginkgo.By(fmt.Sprintf("Created pod %s.%s", namespace, bootstrapPodName)) + + ginkgo.By("Waiting for the pod image to be updated to include an image digest") + var containerImage string + require.Eventually(func() bool { + testConfig, err := bootstrapmonitor.GetBootstrapTestConfigFromPod(tc.DefaultContext(), clientset, namespace, bootstrapPodName, nodeContainerName) + if err != nil { + tc.Outf("Error determining image used by the %q container of pod %s.%s: %v \n", nodeContainerName, namespace, bootstrapPodName, err) + return false + } + if !strings.Contains(testConfig.Image, "sha256") { + return false + } + containerImage = testConfig.Image + return true + }, e2e.DefaultTimeout, e2e.DefaultPollingInterval) + + ginkgo.By(fmt.Sprintf("Waiting for the %q container to report the start of a bootstrap test", initContainerName)) + waitForPodCondition(tc, clientset, namespace, bootstrapPodName, corev1.PodInitialized) + bootstrapStartingMessage := bootstrapMessageForImage(bootstrapmonitor.BootstrapStartingMessage, containerImage) + waitForLogOutput(tc, clientset, namespace, bootstrapPodName, initContainerName, bootstrapStartingMessage) + + ginkgo.By("Waiting for the pod to report readiness") + waitForPodCondition(tc, clientset, namespace, bootstrapPodName, corev1.PodReady) + + ginkgo.By(fmt.Sprintf("Waiting for the %q container to report the success of the bootstrap test", monitorContainerName)) + waitForLogOutput(tc, clientset, namespace, bootstrapPodName, monitorContainerName, bootstrapmonitor.ImageUnchanged) + _ = waitForNodeHealthy(tc, kubeconfig, namespace, nodePodName) + + ginkgo.By("Checking that bootstrap testing is resumed when a pod is rescheduled") + // Retrieve the UID of the pod pre-deletion + pod, err = clientset.CoreV1().Pods(namespace).Get(tc.DefaultContext(), bootstrapPodName, metav1.GetOptions{}) + require.NoError(err) + podUID := pod.UID + require.NoError(clientset.CoreV1().Pods(namespace).Delete(tc.DefaultContext(), bootstrapPodName, metav1.DeleteOptions{})) + // Wait for the pod to be recreated with a new UID + require.Eventually(func() bool { + pod, err := clientset.CoreV1().Pods(namespace).Get(tc.DefaultContext(), bootstrapPodName, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + return false + } + if err != nil { + tc.Outf("Error getting pod %s.%s: %v\n", namespace, bootstrapPodName, err) + return false + } + return pod.UID != podUID + }, e2e.DefaultTimeout, e2e.DefaultPollingInterval) + waitForPodCondition(tc, clientset, namespace, bootstrapPodName, corev1.PodInitialized) + bootstrapResumingMessage := bootstrapMessageForImage(bootstrapmonitor.BootstrapResumingMessage, containerImage) + waitForLogOutput(tc, clientset, namespace, bootstrapPodName, initContainerName, bootstrapResumingMessage) + + ginkgo.By("Building and pushing a new avalanchego image to prompt the start of a new bootstrap test") + buildAvalanchegoImage(tc, avalanchegoImage, true /* forceNewHash */) + + ginkgo.By("Waiting for the pod image to change") + require.Eventually(func() bool { + testConfig, err := bootstrapmonitor.GetBootstrapTestConfigFromPod(tc.DefaultContext(), clientset, namespace, bootstrapPodName, nodeContainerName) + if err != nil { + tc.Outf("Error determining image used by the %q container of pod %s.%s: %v \n", nodeContainerName, namespace, bootstrapPodName, err) + return false + } + if testConfig.Image != containerImage { + containerImage = testConfig.Image + return true + } + return false + }, e2e.DefaultTimeout, e2e.DefaultPollingInterval) + + ginkgo.By(fmt.Sprintf("Waiting for the %q container to report the start of a new bootstrap test", initContainerName)) + waitForPodCondition(tc, clientset, namespace, bootstrapPodName, corev1.PodInitialized) + bootstrapStartingMessage = bootstrapMessageForImage(bootstrapmonitor.BootstrapStartingMessage, containerImage) + waitForLogOutput(tc, clientset, namespace, bootstrapPodName, initContainerName, bootstrapStartingMessage) + }) +}) + +func bootstrapMessageForImage(message, image string) string { + return message + fmt.Sprintf(`{"image": "%s"}`, image) +} + +func buildAvalanchegoImage(tc tests.TestContext, imageName string, forceNewHash bool) { + buildImage(tc, imageName, forceNewHash, "build_image.sh") +} + +func buildImage(tc tests.TestContext, imageName string, forceNewHash bool, scriptName string) { + require := require.New(tc) + + repoRoot, err := e2e.GetRepoRootPath(repoRelativePath) + require.NoError(err) + + var args []string + if forceNewHash { + // Ensure the build results in a new image hash by preventing use of a cached final stage + args = append(args, "--no-cache-filter", "execution") + } + + cmd := exec.CommandContext( + tc.DefaultContext(), + filepath.Join(repoRoot, "scripts", scriptName), + args..., + ) // #nosec G204 + cmd.Env = append(os.Environ(), + "DOCKER_IMAGE="+imageName, + "FORCE_TAG_LATEST=1", + "SKIP_BUILD_RACE=1", + ) + output, err := cmd.CombinedOutput() + if err != nil { + require.FailNow("Image build failed: %v\nWith output: %s", err, output) + } +} + +// newNodeStatefulSet returns a statefulset for an avalanchego node. +func newNodeStatefulSet(name string, flags map[string]string) *appsv1.StatefulSet { + return &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: name + "-", + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: pointer.Int32(1), + ServiceName: name, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": name, + }, + }, + VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: volumeName, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{ + corev1.ReadWriteOnce, + }, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse(volumeSize), + }, + }, + }, + }, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": name, + }, + Annotations: map[string]string{ + // This needs to be present to ensure compatibility with json patch replace + bootstrapmonitor.VersionsAnnotationKey: "", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: nodeContainerName, + Image: latestAvalanchegoImage, + Ports: []corev1.ContainerPort{ + { + Name: "http", + ContainerPort: config.DefaultHTTPPort, + }, + { + Name: "staker", + ContainerPort: config.DefaultStakingPort, + }, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: volumeName, + MountPath: nodeDataDir, + }, + }, + ReadinessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/ext/health/liveness", + Port: intstr.FromInt(config.DefaultHTTPPort), + }, + }, + PeriodSeconds: 1, + SuccessThreshold: 1, + }, + Env: stringMapToEnvVarSlice(flags), + }, + }, + }, + }, + }, + } +} + +// stringMapToEnvVarSlice converts a string map to a kube EnvVar slice. +func stringMapToEnvVarSlice(mapping map[string]string) []corev1.EnvVar { + envVars := make([]corev1.EnvVar, len(mapping)) + var i int + for k, v := range mapping { + envVars[i] = corev1.EnvVar{ + Name: config.EnvVarName(config.EnvPrefix, k), + Value: v, + } + i++ + } + return envVars +} + +// defaultNodeFlags defines common flags for avalanchego nodes used by this test +func defaultNodeFlags() map[string]string { + return map[string]string{ + config.DataDirKey: nodeDataDir, + config.NetworkNameKey: constants.LocalName, + config.SybilProtectionEnabledKey: "false", + config.HealthCheckFreqKey: "500ms", // Ensure rapid detection of a healthy state + config.LogDisplayLevelKey: logging.Debug.String(), + config.LogLevelKey: logging.Debug.String(), + config.HTTPHostKey: "0.0.0.0", // Need to bind to pod IP to ensure kubelet can access the http port for the readiness check + } +} + +// waitForPodCondition waits until the specified pod reports the specified condition +func waitForPodCondition(tc tests.TestContext, clientset *kubernetes.Clientset, namespace string, podName string, conditionType corev1.PodConditionType) { + require.NoError(tc, bootstrapmonitor.WaitForPodCondition(tc.DefaultContext(), clientset, namespace, podName, conditionType)) +} + +// waitForNodeHealthy waits for the node running in the specified pod to report healthy. +func waitForNodeHealthy(tc tests.TestContext, kubeconfig *restclient.Config, namespace string, podName string) ids.NodeID { + require := require.New(tc) + + // A forwarded connection enables connectivity without exposing the node external to the kube cluster + ginkgo.By(fmt.Sprintf("Enabling a local forward for pod %s.%s", namespace, podName)) + localPort, localPortStopChan, err := enableLocalForwardForPod(kubeconfig, namespace, podName, config.DefaultHTTPPort, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter) + require.NoError(err) + defer close(localPortStopChan) + localNodeURI := fmt.Sprintf("http://127.0.0.1:%d", localPort) + + infoClient := info.NewClient(localNodeURI) + bootstrapNodeID, _, err := infoClient.GetNodeID(tc.DefaultContext()) + require.NoError(err) + + ginkgo.By(fmt.Sprintf("Waiting for pod %s.%s to report a healthy status at %s", namespace, podName, localNodeURI)) + require.Eventually(func() bool { + healthReply, err := tmpnet.CheckNodeHealth(tc.DefaultContext(), localNodeURI) + if err != nil { + tc.Outf("Error checking node health: %v\n", err) + return false + } + return healthReply.Healthy + }, e2e.DefaultTimeout, e2e.DefaultPollingInterval) + + return bootstrapNodeID +} + +// enableLocalForwardForPod enables traffic forwarding from a local port to the specified pod with client-go. The returned +// stop channel should be closed to stop the port forwarding. +func enableLocalForwardForPod(kubeconfig *restclient.Config, namespace string, name string, port int, out, errOut io.Writer) (uint16, chan struct{}, error) { + transport, upgrader, err := spdy.RoundTripperFor(kubeconfig) + if err != nil { + return 0, nil, fmt.Errorf("failed to create round tripper: %w", err) + } + + dialer := spdy.NewDialer( + upgrader, + &http.Client{ + Transport: transport, + }, + http.MethodPost, + &url.URL{ + Scheme: "https", + Path: fmt.Sprintf("/api/v1/namespaces/%s/pods/%s/portforward", namespace, name), + Host: strings.TrimPrefix(kubeconfig.Host, "https://"), + }, + ) + ports := []string{fmt.Sprintf("0:%d", port)} + + // Need to specify 127.0.0.1 to ensure that forwarding is only attempted for the ipv4 + // address of the pod. By default, kind is deployed with only ipv4, and attempting to + // connect to a pod with ipv6 will fail. + addresses := []string{"127.0.0.1"} + + stopChan, readyChan := make(chan struct{}, 1), make(chan struct{}, 1) + forwarder, err := portforward.NewOnAddresses(dialer, addresses, ports, stopChan, readyChan, out, errOut) + if err != nil { + return 0, nil, fmt.Errorf("failed to create forwarder: %w", err) + } + + go func() { + if err := forwarder.ForwardPorts(); err != nil { + // TODO(marun) Need better error handling here? Or is ok for test-only usage? + panic(err) + } + }() + + <-readyChan // Wait for port forwarding to be ready + + // Retrieve the dynamically allocated local port + forwardedPorts, err := forwarder.GetPorts() + if err != nil { + close(stopChan) + return 0, nil, fmt.Errorf("failed to get forwarded ports: %w", err) + } + if len(forwardedPorts) == 0 { + close(stopChan) + return 0, nil, fmt.Errorf("failed to find at least one forwarded port: %w", err) + } + return forwardedPorts[0].Local, stopChan, nil +} + +// createBootstrapTester creates a pod that can continuously bootstrap from the specified bootstrap IP+ID. +func createBootstrapTester(tc tests.TestContext, clientset *kubernetes.Clientset, namespace string, bootstrapIP string, bootstrapNodeID ids.NodeID) *appsv1.StatefulSet { + flags := defaultNodeFlags() + flags[config.BootstrapIPsKey] = fmt.Sprintf("%s:%d", bootstrapIP, config.DefaultStakingPort) + flags[config.BootstrapIDsKey] = bootstrapNodeID.String() + + statefulSet := newNodeStatefulSet("bootstrap-tester", flags) + + // Add the bootstrap-monitor containers to enable continuous bootstrap testing + + initContainer := getMonitorContainer(initContainerName, []string{ + "init", + "--node-container-name=" + nodeContainerName, + "--data-dir=" + dataDir, + }) + initContainer.VolumeMounts = []corev1.VolumeMount{ + { + Name: volumeName, + MountPath: dataDir, + }, + } + statefulSet.Spec.Template.Spec.InitContainers = append(statefulSet.Spec.Template.Spec.InitContainers, initContainer) + monitorContainer := getMonitorContainer(monitorContainerName, []string{ + "wait-for-completion", + "--node-container-name=" + nodeContainerName, + "--data-dir=" + dataDir, + "--health-check-interval=1s", + "--image-check-interval=1s", + }) + monitorContainer.VolumeMounts = []corev1.VolumeMount{ + { + Name: volumeName, + MountPath: dataDir, + ReadOnly: true, // The volume is only used for checking disk usage + }, + } + statefulSet.Spec.Template.Spec.Containers = append(statefulSet.Spec.Template.Spec.Containers, monitorContainer) + + grantMonitorPermissions(tc, clientset, namespace) + + createdStatefulSet, err := clientset.AppsV1().StatefulSets(namespace).Create(tc.DefaultContext(), statefulSet, metav1.CreateOptions{}) + require.NoError(tc, err) + + return createdStatefulSet +} + +// getMonitorContainer retrieves the common container definition for bootstrap-monitor containers. +func getMonitorContainer(name string, args []string) corev1.Container { + return corev1.Container{ + Name: name, + Image: latestMonitorImage, + Command: []string{"./bootstrap-monitor"}, + Args: args, + Env: []corev1.EnvVar{ + { + Name: "POD_NAME", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.name", + }, + }, + }, + { + Name: "POD_NAMESPACE", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.namespace", + }, + }, + }, + }, + } +} + +// grantMonitorPermissions grants the permissions required by the bootstrap-monitor to the namespace's default service account. +func grantMonitorPermissions(tc tests.TestContext, clientset *kubernetes.Clientset, namespace string) { + require := require.New(tc) + + role := &rbacv1.Role{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "bootstrap-monitor-role-", + }, + Rules: []rbacv1.PolicyRule{ + { + APIGroups: []string{""}, + Resources: []string{"pods"}, + Verbs: []string{"get", "create", "watch", "delete"}, + }, + { + APIGroups: []string{""}, + Resources: []string{"pods/log"}, + Verbs: []string{"get"}, + }, + { + APIGroups: []string{"apps"}, + Resources: []string{"statefulsets"}, + Verbs: []string{"patch"}, + }, + }, + } + createdRole, err := clientset.RbacV1().Roles(namespace).Create(tc.DefaultContext(), role, metav1.CreateOptions{}) + require.NoError(err) + + roleBinding := &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "bootstrap-monitor-role-binding-", + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: "default", + Namespace: namespace, + }, + }, + RoleRef: rbacv1.RoleRef{ + Kind: "Role", + Name: createdRole.Name, + APIGroup: "rbac.authorization.k8s.io", + }, + } + _, err = clientset.RbacV1().RoleBindings(namespace).Create(tc.DefaultContext(), roleBinding, metav1.CreateOptions{}) + require.NoError(err) +} + +// waitForLogOutput streams the logs from the specified pod container until the desired output is found or the context times out. +func waitForLogOutput(tc tests.TestContext, clientset *kubernetes.Clientset, namespace string, podName string, containerName string, desiredOutput string) { + // TODO(marun) Figure out why log output is randomly truncated (not flushed?) + + outputLogLine(tc, fmt.Sprintf( + "Logs from: %q container of pod %s.%s (may not be complete)", containerName, namespace, podName)) + outputLogLine(tc, strings.Repeat("=", 80)) + + req := clientset.CoreV1().Pods(namespace).GetLogs(podName, &corev1.PodLogOptions{ + Container: containerName, + }) + + // Stream the logs until the desired output is seen + readCloser, err := req.Stream(tc.DefaultContext()) + require.NoError(tc, err) + defer readCloser.Close() + + scanner := bufio.NewScanner(readCloser) + for scanner.Scan() { + line := scanner.Text() + outputLogLine(tc, line) + if len(desiredOutput) > 0 && strings.Contains(line, desiredOutput) { + return + } + } +} + +// outputLogLine outputs logs in a consistent color +func outputLogLine(tc tests.TestContext, line string) { + tc.Outf("{{light-gray}}%s{{/}}\n", line) +} diff --git a/tests/fixture/bootstrapmonitor/init.go b/tests/fixture/bootstrapmonitor/init.go new file mode 100644 index 000000000000..94ec8b0f7943 --- /dev/null +++ b/tests/fixture/bootstrapmonitor/init.go @@ -0,0 +1,121 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package bootstrapmonitor + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "os" + "strings" + "time" + + "go.uber.org/zap" + + "github.com/ava-labs/avalanchego/utils/logging" + "github.com/ava-labs/avalanchego/utils/perms" +) + +const ( + initTimeout = 2 * time.Minute + + BootstrapStartingMessage = "Starting bootstrap test" + BootstrapResumingMessage = "Resuming bootstrap test" +) + +func NodeDataDir(path string) string { + return path + "/avalanchego" +} + +func InitBootstrapTest(log logging.Logger, namespace string, podName string, nodeContainerName string, dataDir string) error { + clientset, err := getClientset(log) + if err != nil { + return fmt.Errorf("failed to get clientset: %w", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), initTimeout) + defer cancel() + + log.Info("Retrieving pod to determine bootstrap test config", + zap.String("namespace", namespace), + zap.String("pod", podName), + zap.String("container", nodeContainerName), + ) + testConfig, err := GetBootstrapTestConfigFromPod(ctx, clientset, namespace, podName, nodeContainerName) + if err != nil { + return fmt.Errorf("failed to determine bootstrap test config: %w", err) + } + log.Info("Retrieved bootstrap test config", zap.Reflect("testConfig", testConfig)) + + // If the image uses the latest tag, determine the latest image id and set the container image to that + if strings.HasSuffix(testConfig.Image, ":latest") { + log.Info("Determining image id for image", zap.String("image", testConfig.Image)) + latestImageDetails, err := getLatestImageDetails(ctx, log, clientset, namespace, testConfig.Image, nodeContainerName) + if err != nil { + return fmt.Errorf("failed to get latest image details: %w", err) + } + log.Info("Updating owning statefulset with image details", + zap.String("image", latestImageDetails.Image), + zap.Reflect("versions", latestImageDetails.Versions), + ) + if err := setImageDetails(ctx, log, clientset, namespace, podName, latestImageDetails); err != nil { + return fmt.Errorf("failed to set container image: %w", err) + } + } + + // A bootstrap is being resumed if a version file exists and the image name it contains matches the container + // image. If a bootstrap is being started, the version file should be created and the data path cleared. + + testDetailsPath := getTestDetailsPath(dataDir) + + var testDetails bootstrapTestDetails + if testDetailsBytes, err := os.ReadFile(testDetailsPath); errors.Is(err, os.ErrNotExist) { + log.Info("Test details file does not exist", zap.String("path", testDetailsPath)) + } else if err != nil { + return fmt.Errorf("failed to read test details file: %w", err) + } else { + if err := json.Unmarshal(testDetailsBytes, &testDetails); err != nil { + return fmt.Errorf("failed to unmarshal test details: %w", err) + } + log.Info("Loaded test details", zap.Reflect("testDetails", testDetails)) + } + + if testDetails.Image == testConfig.Image { + log.Info("Test details image matches test config image") + log.Info(BootstrapResumingMessage, zap.Reflect("testConfig", testConfig)) + return nil + } else if len(testDetails.Image) > 0 { + log.Info("Test details image differs from test config image") + } + + nodeDataDir := NodeDataDir(dataDir) + log.Info("Removing node directory", zap.String("path", nodeDataDir)) + if err := os.RemoveAll(nodeDataDir); err != nil { + return fmt.Errorf("failed to remove contents of node directory: %w", err) + } + + log.Info("Writing test details to file", + zap.Reflect("testDetails", testDetails), + zap.String("path", testDetailsPath), + ) + testDetails = bootstrapTestDetails{ + Image: testConfig.Image, + StartTime: time.Now(), + } + testDetailsBytes, err := json.Marshal(testDetails) + if err != nil { + return fmt.Errorf("failed to marshal test details: %w", err) + } + if err := os.WriteFile(testDetailsPath, testDetailsBytes, perms.ReadWrite); err != nil { + return fmt.Errorf("failed to write test details to file: %w", err) + } + + log.Info(BootstrapStartingMessage, + zap.Reflect("testConfig", testConfig), + zap.Time("startTime", testDetails.StartTime), + ) + + return nil +} diff --git a/tests/fixture/bootstrapmonitor/wait.go b/tests/fixture/bootstrapmonitor/wait.go new file mode 100644 index 000000000000..1d34ad4c26b6 --- /dev/null +++ b/tests/fixture/bootstrapmonitor/wait.go @@ -0,0 +1,196 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package bootstrapmonitor + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "os" + "os/exec" + "strings" + "time" + + "go.uber.org/zap" + "k8s.io/apimachinery/pkg/util/wait" + + "github.com/ava-labs/avalanchego/config" + "github.com/ava-labs/avalanchego/tests/fixture/tmpnet" + "github.com/ava-labs/avalanchego/utils/logging" + + corev1 "k8s.io/api/core/v1" +) + +const ( + contextDuration = 30 * time.Second + + ImageUnchanged = "Image unchanged" +) + +var nodeURL = fmt.Sprintf("http://localhost:%d", config.DefaultHTTPPort) + +func WaitForCompletion( + log logging.Logger, + namespace string, + podName string, + nodeContainerName string, + dataDir string, + healthCheckInterval time.Duration, + imageCheckInterval time.Duration, +) error { + testDetailsPath := getTestDetailsPath(dataDir) + var testDetails bootstrapTestDetails + if testDetailsBytes, err := os.ReadFile(testDetailsPath); err != nil { + return fmt.Errorf("failed to load test details file %s: %w", testDetailsPath, err) + } else { + if err := json.Unmarshal(testDetailsBytes, &testDetails); err != nil { + return fmt.Errorf("failed to unmarshal test details: %w", err) + } + log.Info("Loaded test details", zap.Reflect("testDetails", testDetails)) + } + + clientset, err := getClientset(log) + if err != nil { + return fmt.Errorf("failed to get clientset: %w", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), initTimeout) + defer cancel() + + log.Info("Retrieving pod to determine bootstrap test config", + zap.String("namespace", namespace), + zap.String("pod", podName), + zap.String("container", nodeContainerName), + ) + testConfig, err := GetBootstrapTestConfigFromPod(ctx, clientset, namespace, podName, nodeContainerName) + if err != nil { + return fmt.Errorf("failed to determine bootstrap test config: %w", err) + } + log.Info("Retrieved bootstrap test config", zap.Reflect("testConfig", testConfig)) + + // Avoid checking node health before it reports initial ready + log.Info("Waiting for pod readiness") + if err := WaitForPodCondition(ctx, clientset, namespace, podName, corev1.PodReady); err != nil { + return fmt.Errorf("failed to wait for pod condition: %w", err) + } + + log.Info("Waiting for node to report healthy") + if err := wait.PollImmediateInfinite(healthCheckInterval, func() (bool, error) { + ctx, cancel := context.WithTimeout(context.Background(), contextDuration) + defer cancel() + + // Define common fields for logging + diskUsage := getDiskUsage(log, dataDir) + commonFields := []zap.Field{ + zap.String("diskUsage", diskUsage), + zap.Duration("duration", time.Since(testDetails.StartTime)), + } + + // Check whether the node is reporting healthy which indicates that bootstrap is complete + if healthy, err := tmpnet.CheckNodeHealth(ctx, nodeURL); err != nil { + log.Error("failed to check node health", zap.Error(err)) + return false, nil + } else { + if !healthy.Healthy { + log.Info("Node reported unhealthy", commonFields...) + return false, nil + } + + log.Info("Node reported healthy") + } + + commonFields = append(commonFields, zap.Reflect("testConfig", testConfig)) + log.Info("Bootstrap completed successfully", commonFields...) + + return true, nil + }); err != nil { + return fmt.Errorf("failed to wait for node to report healthy: %w", err) + } + + log.Info("Waiting for new image to test") + if err := wait.PollImmediateInfinite(imageCheckInterval, func() (bool, error) { + ctx, cancel := context.WithTimeout(context.Background(), contextDuration) + defer cancel() + + log.Info("Starting pod to get the image id for the `latest` tag") + latestImageDetails, err := getLatestImageDetails(ctx, log, clientset, namespace, testConfig.Image, nodeContainerName) + if err != nil { + log.Error("failed to get latest image id", zap.Error(err)) + return false, nil + } + + if latestImageDetails.Image == testConfig.Image { + log.Info(ImageUnchanged) + return false, nil + } + + log.Info("Found updated image", + zap.String("image", latestImageDetails.Image), + zap.Reflect("versions", latestImageDetails.Versions), + ) + + log.Info("Updating StatefulSet to trigger a new test") + if err := setImageDetails(ctx, log, clientset, namespace, podName, latestImageDetails); err != nil { + log.Error("failed to set container image", zap.Error(err)) + return false, nil + } + + // Statefulset will restart the pod with the new image + return true, nil + }); err != nil { + return fmt.Errorf("failed to wait for new image to test: %w", err) + } + + // Avoid exiting immediately to avoid container restart before the pod is recreated with the new image + time.Sleep(5 * time.Minute) + return nil +} + +// Determines the current disk usage for the specified directory +func getDiskUsage(log logging.Logger, dir string) string { + cmd := exec.Command("du", "-sh", dir) + + // Create a buffer to capture stderr in case an unexpected error occurs + var stderr bytes.Buffer + cmd.Stderr = &stderr + + output, err := cmd.Output() + if err != nil { + exitError, ok := err.(*exec.ExitError) + if !ok { + log.Error("Error executing du", zap.Error(err)) + return "" + } + switch exitError.ExitCode() { + case 1: + // Exit code 1 usually indicates that files cannot be accessed. Since avalanchego will + // regularly delete files in the db dir, this can be safely ignored and the regular disk + // usage message can be printed. + case 2: + log.Error("Incorrect usage of du command for dir", + zap.String("dir", dir), + zap.String("stderr", stderr.String()), + zap.Error(err), + ) + return "" + default: + log.Error("du command failed for dir", + zap.String("dir", dir), + zap.String("stderr", stderr.String()), + zap.Error(err), + ) + return "" + } + } + + usageParts := strings.Split(string(output), "\t") + if len(usageParts) != 2 { + log.Error("Unexpected output from du command", + zap.String("output", string(output)), + ) + } + + return usageParts[0] +} diff --git a/tests/fixture/e2e/helpers.go b/tests/fixture/e2e/helpers.go index ba06c60e3a38..4d5d65dac1e6 100644 --- a/tests/fixture/e2e/helpers.go +++ b/tests/fixture/e2e/helpers.go @@ -290,3 +290,22 @@ func NewPChainFeeCalculatorFromContext(context *builder.Context) fee.Calculator } return fee.NewStaticCalculator(context.StaticFeeConfig) } + +// GetRepoRootPath strips the provided suffix from the current working +// directory. If the test binary is executed from the root of the repo, the +// result will be the repo root. +func GetRepoRootPath(suffix string) (string, error) { + // - When executed via a test binary, the working directory will be wherever + // the binary is executed from, but scripts should require execution from + // the repo root. + // + // - When executed via ginkgo (nicer for development + supports + // parallel execution) the working directory will always be the + // target path (e.g. [repo root]./tests/bootstrap/e2e) and getting the repo + // root will require stripping the target path suffix. + cwd, err := os.Getwd() + if err != nil { + return "", err + } + return strings.TrimSuffix(cwd, suffix), nil +} diff --git a/tests/fixture/tmpnet/node_process.go b/tests/fixture/tmpnet/node_process.go index b33fe32f0730..c11d83d3639a 100644 --- a/tests/fixture/tmpnet/node_process.go +++ b/tests/fixture/tmpnet/node_process.go @@ -10,7 +10,6 @@ import ( "fmt" "io" "io/fs" - "net" "os" "os/exec" "path/filepath" @@ -19,7 +18,6 @@ import ( "syscall" "time" - "github.com/ava-labs/avalanchego/api/health" "github.com/ava-labs/avalanchego/config" "github.com/ava-labs/avalanchego/node" "github.com/ava-labs/avalanchego/utils/perms" @@ -37,29 +35,6 @@ var ( errNotRunning = errors.New("node is not running") ) -func checkNodeHealth(ctx context.Context, uri string) (bool, error) { - // Check that the node is reporting healthy - health, err := health.NewClient(uri).Health(ctx, nil) - if err == nil { - return health.Healthy, nil - } - - switch t := err.(type) { - case *net.OpError: - if t.Op == "read" { - // Connection refused - potentially recoverable - return false, nil - } - case syscall.Errno: - if t == syscall.ECONNREFUSED { - // Connection refused - potentially recoverable - return false, nil - } - } - // Assume all other errors are not recoverable - return false, fmt.Errorf("failed to query node health: %w", err) -} - // Defines local-specific node configuration. Supports setting default // and node-specific values. type NodeProcess struct { @@ -199,7 +174,11 @@ func (p *NodeProcess) IsHealthy(ctx context.Context) (bool, error) { return false, errNotRunning } - return checkNodeHealth(ctx, p.node.URI) + healthReply, err := CheckNodeHealth(ctx, p.node.URI) + if err != nil { + return false, err + } + return healthReply.Healthy, nil } func (p *NodeProcess) getProcessContextPath() string { diff --git a/tests/fixture/tmpnet/utils.go b/tests/fixture/tmpnet/utils.go index ea320f2a8801..1ed097c864ac 100644 --- a/tests/fixture/tmpnet/utils.go +++ b/tests/fixture/tmpnet/utils.go @@ -6,9 +6,13 @@ package tmpnet import ( "context" "encoding/json" + "errors" "fmt" + "net" + "syscall" "time" + "github.com/ava-labs/avalanchego/api/health" "github.com/ava-labs/avalanchego/ids" "github.com/ava-labs/avalanchego/utils/crypto/secp256k1" ) @@ -17,6 +21,31 @@ const ( DefaultNodeTickerInterval = 50 * time.Millisecond ) +var ErrUnrecoverableNodeHealthCheck = errors.New("failed to query node health") + +func CheckNodeHealth(ctx context.Context, uri string) (*health.APIReply, error) { + // Check that the node is reporting healthy + healthReply, err := health.NewClient(uri).Health(ctx, nil) + if err == nil { + return healthReply, nil + } + + switch t := err.(type) { + case *net.OpError: + if t.Op == "read" { + // Connection refused - potentially recoverable + return nil, err + } + case syscall.Errno: + if t == syscall.ECONNREFUSED { + // Connection refused - potentially recoverable + return nil, err + } + } + // Assume all other errors are not recoverable + return nil, fmt.Errorf("%w: %w", ErrUnrecoverableNodeHealthCheck, err) +} + // WaitForHealthy blocks until Node.IsHealthy returns true or an error (including context timeout) is observed. func WaitForHealthy(ctx context.Context, node *Node) error { if _, ok := ctx.Deadline(); !ok { @@ -27,10 +56,14 @@ func WaitForHealthy(ctx context.Context, node *Node) error { for { healthy, err := node.IsHealthy(ctx) - if err != nil { - return fmt.Errorf("failed to wait for health of node %q: %w", node.NodeID, err) - } - if healthy { + switch { + case errors.Is(err, ErrUnrecoverableNodeHealthCheck): + return fmt.Errorf("%w for node %q", err, node.NodeID) + case err != nil: + // Error is recoverable + // TODO(marun) Log the error to aid in troubleshooting once a logger is available + continue + case healthy: return nil }