diff --git a/Dockerfile.dapper b/Dockerfile.dapper index 2654f138757a..cd3a518acc27 100644 --- a/Dockerfile.dapper +++ b/Dockerfile.dapper @@ -22,7 +22,7 @@ RUN apk -U --no-cache add \ RUN PIPX_BIN_DIR=/usr/local/bin pipx install awscli # Install Trivy -ENV TRIVY_VERSION="0.51.4" +ENV TRIVY_VERSION="0.53.0" RUN case "$(go env GOARCH)" in \ arm64) TRIVY_ARCH="ARM64" ;; \ amd64) TRIVY_ARCH="64bit" ;; \ diff --git a/docs/adrs/etcd-s3-secret.md b/docs/adrs/etcd-s3-secret.md new file mode 100644 index 000000000000..bd728cdd3960 --- /dev/null +++ b/docs/adrs/etcd-s3-secret.md @@ -0,0 +1,83 @@ +# Support etcd Snapshot Configuration via Kubernetes Secret + +Date: 2024-02-06 +Revised: 2024-06-10 + +## Status + +Accepted + +## Context + +### Current State + +K3s currently reads configuration for S3 storage of etcd snapshots from CLI flags and/or configuration files. + +Security-conscious users have raised issue with the current state. They want to store snapshots on S3, but do not want +to have credentials visible in config files or systemd units. Users operating in highly secure environments have also +asked for the ability to configure a proxy server to be used when creating/restoring snapshots stored on S3, without +managing complicated `NO_PROXY` settings or affecting the rest of the K3s process environment. + +### Security Considerations + +Storing credentials on-disk is generally considered a bad idea, and is not allowed by security practices in many +organizations. Use of static credentials in the config file also makes them difficult to rotate, as K3s only reloads the +configuration on startup. + +### Existing Work + +Cloud-providers and other tools that need to auth to external systems frequently can be configured to retrieve secrets +from an existing credential secret that is provisioned via an external process, such as a secrets management tool. This +avoids embedding the credentials directly in the system configuration, chart values, and so on. + +## Decision + +* We will add a `--etcd-s3-proxy` flag that can be used to set the proxy used by the S3 client. This will override the + settings that golang's default HTTP client reads from the `HTTP_PROXY/HTTPS_PROXY/NO_PROXY` environment varibles. +* We will add support for reading etcd snapshot S3 configuration from a Secret. The secret name will be specified via a new + `--etcd-s3-config-secret` flag, which accepts the name of the Secret in the `kube-system` namespace. +* Presence of the `--etcd-s3-config-secret` flag does not imply `--etcd-s3`. If S3 is not enabled by use of the `--etcd-s3` flag, + the Secret will not be used. +* The Secret does not need to exist when K3s starts; it will be checked for every time a snapshot operation is performed. +* Secret and CLI/config values will NOT be merged. The Secret will provide values to be used in absence of other + configuration; if S3 configuration is passed via CLI flags or configuration file, ALL fields set by the Secret + will be ignored. +* The Secret will ONLY be used for on-demand and scheduled snapshot save operations; it will not be used by snapshot restore. + Snapshot restore operations that want to retrieve a snapshot from S3 will need to pass the appropriate configuration + via environment variables or CLI flags, as the Secret is not available during the restore process. + +Fields within the Secret will match `k3s server` CLI flags / config file keys. For the `etcd-s3-endpoint-ca`, which +normally contains the path of a file on disk, the `etcd-s3-endpoint-ca` field can specify an inline PEM-encoded CA +bundle, or the `etcd-s3-endpoint-ca-name` can be used to specify the name of a ConfigMap in the `kube-system` namespace +containing one or more CA bundles. All valid CA bundles found in either field are loaded. + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: k3s-etcd-snapshot-s3-config + namespace: kube-system +stringData: + etcd-s3-endpoint: "" + etcd-s3-endpoint-ca: "" + etcd-s3-endpoint-ca-name: "" + etcd-s3-skip-ssl-verify: "false" + etcd-s3-access-key: "AWS_ACCESS_KEY_ID" + etcd-s3-secret-key: "AWS_SECRET_ACCESS_KEY" + etcd-s3-bucket: "bucket" + etcd-s3-folder: "folder" + etcd-s3-region: "us-east-1" + etcd-s3-insecure: "false" + etcd-s3-timeout: "5m" + etcd-s3-proxy: "" +``` + +## Consequences + +This will require additional documentation, tests, and QA work to validate use of secrets for s3 snapshot configuration. + +## Revisions + +#### 2024-06-10: +* Changed flag to `etcd-s3-config-secret` to avoid confusion with `etcd-s3-secret-key`. +* Added `etcd-s3-folder` to example Secret. diff --git a/go.mod b/go.mod index 7dc60a428d3c..88e45a929e76 100644 --- a/go.mod +++ b/go.mod @@ -105,7 +105,7 @@ require ( github.com/joho/godotenv v1.5.1 github.com/json-iterator/go v1.1.12 github.com/k3s-io/helm-controller v0.15.10 - github.com/k3s-io/kine v0.11.9 + github.com/k3s-io/kine v0.11.11 github.com/klauspost/compress v1.17.7 github.com/kubernetes-sigs/cri-tools v0.0.0-00010101000000-000000000000 github.com/lib/pq v1.10.2 @@ -125,7 +125,7 @@ require ( github.com/rancher/dynamiclistener v0.3.6 github.com/rancher/lasso v0.0.0-20230830164424-d684fdeb6f29 github.com/rancher/permissions v0.0.0-20240523180510-4001d3d637f7 - github.com/rancher/remotedialer v0.3.0 + github.com/rancher/remotedialer v0.4.1 github.com/rancher/wharfie v0.6.4 github.com/rancher/wrangler v1.1.1 github.com/robfig/cron/v3 v3.0.1 @@ -138,11 +138,9 @@ require ( github.com/vishvananda/netlink v1.2.1-beta.2 github.com/yl2chen/cidranger v1.0.2 go.etcd.io/etcd/api/v3 v3.5.13 - go.etcd.io/etcd/client/pkg/v3 v3.5.13 go.etcd.io/etcd/client/v3 v3.5.13 - go.etcd.io/etcd/etcdutl/v3 v3.5.9 + go.etcd.io/etcd/etcdutl/v3 v3.5.13 go.etcd.io/etcd/server/v3 v3.5.13 - go.uber.org/zap v1.27.0 golang.org/x/crypto v0.23.0 golang.org/x/net v0.25.0 golang.org/x/sync v0.7.0 @@ -225,7 +223,7 @@ require ( github.com/containers/ocicrypt v1.1.10 // indirect github.com/coreos/go-oidc v2.2.1+incompatible // indirect github.com/coreos/go-semver v0.3.1 // indirect - github.com/cpuguy83/go-md2man/v2 v2.0.3 // indirect + github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect github.com/cyphar/filepath-securejoin v0.2.4 // indirect github.com/danwinship/knftables v0.0.13 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect @@ -290,7 +288,7 @@ require ( github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-cleanhttp v0.5.2 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect - github.com/hashicorp/go-retryablehttp v0.7.4 // indirect + github.com/hashicorp/go-retryablehttp v0.7.7 // indirect github.com/hashicorp/go-version v1.6.0 // indirect github.com/hashicorp/golang-lru v0.5.4 // indirect github.com/hashicorp/golang-lru/arc/v2 v2.0.5 // indirect @@ -422,15 +420,16 @@ require ( github.com/tchap/go-patricia/v2 v2.3.1 // indirect github.com/tidwall/btree v1.6.0 // indirect github.com/tmc/grpc-websocket-proxy v0.0.0-20220101234140-673ab2c3ae75 // indirect - github.com/urfave/cli/v2 v2.26.0 // indirect + github.com/urfave/cli/v2 v2.27.2 // indirect github.com/vbatts/tar-split v0.11.5 // indirect github.com/vishvananda/netns v0.0.4 // indirect github.com/vmware/govmomi v0.30.6 // indirect github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1 // indirect github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510 // indirect github.com/xlab/treeprint v1.2.0 // indirect - github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect + github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 // indirect go.etcd.io/bbolt v1.3.9 // indirect + go.etcd.io/etcd/client/pkg/v3 v3.5.13 // indirect go.etcd.io/etcd/client/v2 v2.305.13 // indirect go.etcd.io/etcd/pkg/v3 v3.5.13 // indirect go.etcd.io/etcd/raft/v3 v3.5.13 // indirect @@ -451,6 +450,7 @@ require ( go.uber.org/fx v1.20.1 // indirect go.uber.org/mock v0.4.0 // indirect go.uber.org/multierr v1.11.0 // indirect + go.uber.org/zap v1.27.0 // indirect golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 // indirect golang.org/x/mod v0.15.0 // indirect golang.org/x/oauth2 v0.17.0 // indirect diff --git a/go.sum b/go.sum index 7266b151efa9..24723a496349 100644 --- a/go.sum +++ b/go.sum @@ -455,8 +455,9 @@ github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwc github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/cpuguy83/go-md2man/v2 v2.0.3 h1:qMCsGGgs+MAzDFyp9LpAe1Lqy/fY/qCovCm0qnXZOBM= github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4= +github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= @@ -549,6 +550,8 @@ github.com/fatih/camelcase v1.0.0 h1:hxNvNX/xYBp0ovncs8WyWZrOrpBNub/JfaMvbURyft8 github.com/fatih/camelcase v1.0.0/go.mod h1:yN2Sb0lFhZJUdVvtELVWefmrXpuZESvPmqwoZc+/fpc= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw= +github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= +github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= @@ -819,15 +822,17 @@ github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brv github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= -github.com/hashicorp/go-hclog v0.9.2 h1:CG6TE5H9/JXsFWJCfoIVpKFIkFe6ysEuHirp4DxCsHI= github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ= +github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k= +github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= -github.com/hashicorp/go-retryablehttp v0.7.4 h1:ZQgVdpTdAL7WpMIwLzCfbalOcSUdkDZnpUv3/+BxzFA= github.com/hashicorp/go-retryablehttp v0.7.4/go.mod h1:Jy/gPYAdjqffZ/yFGCFV2doI5wjtH1ewM9u8iYVjtX8= +github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU= +github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk= github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU= github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= @@ -952,8 +957,8 @@ github.com/k3s-io/etcd/server/v3 v3.5.13-k3s1 h1:Pqcxkg7V60c26ZpHoekP9QoUdLuduxF github.com/k3s-io/etcd/server/v3 v3.5.13-k3s1/go.mod h1:K/8nbsGupHqmr5MkgaZpLlH1QdX1pcNQLAkODy44XcQ= github.com/k3s-io/helm-controller v0.15.10 h1:TIfbbCbv8mJ1AquPzSxH3vMqIcqfgZ9Pr/Pq/jka/zc= github.com/k3s-io/helm-controller v0.15.10/go.mod h1:AYitg40howLjKloL/zdjDDOPL1jg/K5R4af0tQcyPR8= -github.com/k3s-io/kine v0.11.9 h1:7HfWSwtOowb7GuV6nECnNlFKShgRgVBLdWXj0/4t0sE= -github.com/k3s-io/kine v0.11.9/go.mod h1:N8rc1GDmEvvYRuTxhKTZfSc4fm/vyI6GbDxwBjccAjs= +github.com/k3s-io/kine v0.11.11 h1:f1DhpGNjCDVd1HFWPbeA824YP7MtsrKgstoJ5M0SRgs= +github.com/k3s-io/kine v0.11.11/go.mod h1:L4x3qotFebVh1ZVzYwFVL5PPfqw2sRJTjDTIeViO70Y= github.com/k3s-io/klog/v2 v2.100.1-k3s1 h1:xb/Ta8dpQuIZueQEw2YTZUYrKoILdBmPiITVkNmYPa0= github.com/k3s-io/klog/v2 v2.100.1-k3s1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= github.com/k3s-io/kube-router/v2 v2.1.2 h1:/eLfIsELLsqqRW1skIJ2qe7bWL6IZZ9Hg3IniIgObXo= @@ -1116,6 +1121,7 @@ github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJ github.com/marten-seemann/tcp v0.0.0-20210406111302-dfbc87cc63fd h1:br0buuQ854V8u83wA0rVZ8ttrq5CpaPZdvrK0LP2lOk= github.com/marten-seemann/tcp v0.0.0-20210406111302-dfbc87cc63fd/go.mod h1:QuCEs1Nt24+FYQEqAAncTDPJIuGs+LxK1MCiFL25pMU= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= @@ -1429,8 +1435,8 @@ github.com/rancher/lasso v0.0.0-20230830164424-d684fdeb6f29 h1:+kige/h8/LnzWgPjB github.com/rancher/lasso v0.0.0-20230830164424-d684fdeb6f29/go.mod h1:kgk9kJVMj9FIrrXU0iyM6u/9Je4bEjPImqswkTVaKsQ= github.com/rancher/permissions v0.0.0-20240523180510-4001d3d637f7 h1:0Kg2SGoMeU1ll4xPi4DE0+qNHLFO/U5MwtK0WrIdK+o= github.com/rancher/permissions v0.0.0-20240523180510-4001d3d637f7/go.mod h1:fsbs0YOsGn1ofPD5p+BuI4qDhbMbSJtTegKt6Ucna+c= -github.com/rancher/remotedialer v0.3.0 h1:y1EO8JCsgZo0RcqTUp6U8FXcBAv27R+TLnWRcpvX1sM= -github.com/rancher/remotedialer v0.3.0/go.mod h1:BwwztuvViX2JrLLUwDlsYt5DiyUwHLlzynRwkZLAY0Q= +github.com/rancher/remotedialer v0.4.1 h1:jwOf2kPRjBBpSFofv1OuZHWaYHeC9Eb6/XgDvbkoTgc= +github.com/rancher/remotedialer v0.4.1/go.mod h1:Ys004RpJuTLSm+k4aYUCoFiOOad37ubYev3TkOFg/5w= github.com/rancher/wharfie v0.6.4 h1:JwYB+q661n8ut/ysgsjKe0P0z6bHCCFoC+29995ME90= github.com/rancher/wharfie v0.6.4/go.mod h1:kWv97z0sMAbnVNT/oe+JFZJVKn4xkas7ZdFf6UifWis= github.com/rancher/wrangler v1.1.1-0.20230818201331-3604a6be798d h1:RQBqHXyAN5gWqUazV637kqmYcy8M8K5bdvXszNciLcY= @@ -1447,8 +1453,8 @@ github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFR github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= -github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= -github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= +github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= +github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= github.com/rootless-containers/rootlesskit v1.0.1 h1:jepqW1txFSowKSMAEkVhWH3Oa1TCY9S400MVYe/6Iro= github.com/rootless-containers/rootlesskit v1.0.1/go.mod h1:t2UAiYagxrJ+wmpFAUIZPcqsm4k2B7ve6g7lILKbloc= github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc= @@ -1589,8 +1595,8 @@ github.com/urfave/cli v1.22.10/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60Nt github.com/urfave/cli v1.22.12/go.mod h1:sSBEIC79qR6OvcmsD4U3KABeOTxDqQtdDnaFuUN30b8= github.com/urfave/cli v1.22.14 h1:ebbhrRiGK2i4naQJr+1Xj92HXZCrK7MsyTS/ob3HnAk= github.com/urfave/cli v1.22.14/go.mod h1:X0eDS6pD6Exaclxm99NJ3FiCDRED7vIHpx2mDOHLvkA= -github.com/urfave/cli/v2 v2.26.0 h1:3f3AMg3HpThFNT4I++TKOejZO8yU55t3JnnSr4S4QEI= -github.com/urfave/cli/v2 v2.26.0/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ= +github.com/urfave/cli/v2 v2.27.2 h1:6e0H+AkS+zDckwPCUrZkKX38mRaau4nL2uipkJpbkcI= +github.com/urfave/cli/v2 v2.27.2/go.mod h1:g0+79LmHHATl7DAcHO99smiR/T7uGLw84w8Y42x+4eM= github.com/vbatts/tar-split v0.11.2/go.mod h1:vV3ZuO2yWSVsz+pfFzDG/upWH1JhjOiEaWq6kXyQ3VI= github.com/vbatts/tar-split v0.11.5 h1:3bHCTIheBm1qFTcgh9oPu+nNBtX+XJIupG/vacinCts= github.com/vbatts/tar-split v0.11.5/go.mod h1:yZbwRsSeGjusneWgA781EKej9HF8vme8okylkAeNKLk= @@ -1628,8 +1634,8 @@ github.com/xiang90/probing v0.0.0-20221125231312-a49e3df8f510/go.mod h1:UETIi67q github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ= github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0= github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= -github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU= -github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8= +github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 h1:+qGGcbkzsfDQNPPe9UDgpxAWQrhbbBXOYJFQDq/dtJw= +github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913/go.mod h1:4aEEwZQutDLsQv2Deui4iYQ6DWTxR14g6m8Wv88+Xqk= github.com/yashtewari/glob-intersection v0.1.0/go.mod h1:LK7pIC3piUjovexikBbJ26Yml7g8xa5bsjfx2v1fwok= github.com/yashtewari/glob-intersection v0.2.0/go.mod h1:LK7pIC3piUjovexikBbJ26Yml7g8xa5bsjfx2v1fwok= github.com/yl2chen/cidranger v1.0.2 h1:lbOWZVCG1tCRX4u24kuM1Tb4nHqWkDxwLdoS+SevawU= diff --git a/manifests/local-storage.yaml b/manifests/local-storage.yaml index f6fcc0d7c9b6..fb72ffc07704 100644 --- a/manifests/local-storage.yaml +++ b/manifests/local-storage.yaml @@ -67,7 +67,7 @@ spec: effect: "NoSchedule" containers: - name: local-path-provisioner - image: "%{SYSTEM_DEFAULT_REGISTRY}%rancher/local-path-provisioner:v0.0.27" + image: "%{SYSTEM_DEFAULT_REGISTRY}%rancher/local-path-provisioner:v0.0.28" imagePullPolicy: IfNotPresent command: - local-path-provisioner diff --git a/pkg/agent/config/config.go b/pkg/agent/config/config.go index 9318bc6e4677..3f691d492a5e 100644 --- a/pkg/agent/config/config.go +++ b/pkg/agent/config/config.go @@ -103,7 +103,7 @@ func APIServers(ctx context.Context, node *config.Node, proxy proxy.Proxy) []str return false, err } if len(addresses) == 0 { - logrus.Infof("Waiting for apiserver addresses") + logrus.Infof("Waiting for supervisor to provide apiserver addresses") return false, nil } return true, nil @@ -370,10 +370,9 @@ func get(ctx context.Context, envInfo *cmds.Agent, proxy proxy.Proxy) (*config.N if err != nil { return nil, errors.Wrap(err, "failed to retrieve configuration from server") } - // If the supervisor and externally-facing apiserver are not on the same port, tell the proxy where to find the apiserver. if controlConfig.SupervisorPort != controlConfig.HTTPSPort { - isIPv6 := utilsnet.IsIPv6(net.ParseIP([]string{envInfo.NodeIP.String()}[0])) + isIPv6 := utilsnet.IsIPv6(net.ParseIP(util.GetFirstValidIPString(envInfo.NodeIP))) if err := proxy.SetAPIServerPort(controlConfig.HTTPSPort, isIPv6); err != nil { return nil, errors.Wrapf(err, "failed to set apiserver port to %d", controlConfig.HTTPSPort) } diff --git a/pkg/agent/loadbalancer/loadbalancer.go b/pkg/agent/loadbalancer/loadbalancer.go index 36019470c8d2..567d825a2bb7 100644 --- a/pkg/agent/loadbalancer/loadbalancer.go +++ b/pkg/agent/loadbalancer/loadbalancer.go @@ -8,6 +8,7 @@ import ( "os" "path/filepath" "sync" + "time" "github.com/k3s-io/k3s/pkg/version" "github.com/sirupsen/logrus" @@ -167,11 +168,12 @@ func (lb *LoadBalancer) dialContext(ctx context.Context, network, _ string) (net if server == nil || targetServer == "" { logrus.Debugf("Nil server for load balancer %s: %s", lb.serviceName, targetServer) } else if allChecksFailed || server.healthCheck() { + dialTime := time.Now() conn, err := server.dialContext(ctx, network, targetServer) if err == nil { return conn, nil } - logrus.Debugf("Dial error from load balancer %s: %s", lb.serviceName, err) + logrus.Debugf("Dial error from load balancer %s after %s: %s", lb.serviceName, time.Now().Sub(dialTime), err) // Don't close connections to the failed server if we're retrying with health checks ignored. // We don't want to disrupt active connections if it is unlikely they will have anywhere to go. if !allChecksFailed { diff --git a/pkg/agent/loadbalancer/servers.go b/pkg/agent/loadbalancer/servers.go index 3564a6a4ee03..6b7f25606064 100644 --- a/pkg/agent/loadbalancer/servers.go +++ b/pkg/agent/loadbalancer/servers.go @@ -111,10 +111,12 @@ func (lb *LoadBalancer) setServers(serverAddresses []string) bool { return true } +// nextServer attempts to get the next server in the loadbalancer server list. +// If another goroutine has already updated the current server address to point at +// a different address than just failed, nothing is changed. Otherwise, a new server address +// is stored to the currentServerAddress field, and returned for use. +// This function must always be called by a goroutine that holds a read lock on the loadbalancer mutex. func (lb *LoadBalancer) nextServer(failedServer string) (string, error) { - lb.mutex.RLock() - defer lb.mutex.RUnlock() - // note: these fields are not protected by the mutex, so we clamp the index value and update // the index/current address using local variables, to avoid time-of-check vs time-of-use // race conditions caused by goroutine A incrementing it in between the time goroutine B diff --git a/pkg/agent/run.go b/pkg/agent/run.go index f3342767ad29..93b4e27b6230 100644 --- a/pkg/agent/run.go +++ b/pkg/agent/run.go @@ -322,7 +322,7 @@ func createProxyAndValidateToken(ctx context.Context, cfg *cmds.Agent) (proxy.Pr if err := os.MkdirAll(agentDir, 0700); err != nil { return nil, err } - isIPv6 := utilsnet.IsIPv6(net.ParseIP([]string{cfg.NodeIP.String()}[0])) + isIPv6 := utilsnet.IsIPv6(net.ParseIP(util.GetFirstValidIPString(cfg.NodeIP))) proxy, err := proxy.NewSupervisorProxy(ctx, !cfg.DisableLoadBalancer, agentDir, cfg.ServerURL, cfg.LBServerPort, isIPv6) if err != nil { @@ -530,20 +530,31 @@ func setupTunnelAndRunAgent(ctx context.Context, nodeConfig *daemonconfig.Node, } func waitForAPIServerAddresses(ctx context.Context, nodeConfig *daemonconfig.Node, cfg cmds.Agent, proxy proxy.Proxy) error { + var localSupervisorDefault bool + if addresses := proxy.SupervisorAddresses(); len(addresses) > 0 { + host, _, _ := net.SplitHostPort(addresses[0]) + if host == "127.0.0.1" || host == "::1" { + localSupervisorDefault = true + } + } + for { select { case <-time.After(5 * time.Second): - logrus.Info("Waiting for apiserver addresses") + logrus.Info("Waiting for control-plane node to register apiserver addresses in etcd") case addresses := <-cfg.APIAddressCh: for i, a := range addresses { host, _, err := net.SplitHostPort(a) if err == nil { addresses[i] = net.JoinHostPort(host, strconv.Itoa(nodeConfig.ServerHTTPSPort)) - if i == 0 { - proxy.SetSupervisorDefault(addresses[i]) - } } } + // If this is an etcd-only node that started up using its local supervisor, + // switch to using a control-plane node as the supervisor. Otherwise, leave the + // configured server address as the default. + if localSupervisorDefault && len(addresses) > 0 { + proxy.SetSupervisorDefault(addresses[0]) + } proxy.Update(addresses) return nil case <-ctx.Done(): diff --git a/pkg/agent/tunnel/tunnel.go b/pkg/agent/tunnel/tunnel.go index 23c6dac404b8..479288e0fb28 100644 --- a/pkg/agent/tunnel/tunnel.go +++ b/pkg/agent/tunnel/tunnel.go @@ -38,6 +38,7 @@ import ( var ( endpointDebounceDelay = time.Second + defaultDialer = net.Dialer{} ) type agentTunnel struct { @@ -45,6 +46,7 @@ type agentTunnel struct { cidrs cidranger.Ranger ports map[string]bool mode string + kubeletAddr string kubeletPort string startTime time.Time } @@ -82,6 +84,7 @@ func Setup(ctx context.Context, config *daemonconfig.Node, proxy proxy.Proxy) er cidrs: cidranger.NewPCTrieRanger(), ports: map[string]bool{}, mode: config.EgressSelectorMode, + kubeletAddr: config.AgentConfig.ListenAddress, kubeletPort: fmt.Sprint(ports.KubeletPort), startTime: time.Now().Truncate(time.Second), } @@ -121,18 +124,33 @@ func Setup(ctx context.Context, config *daemonconfig.Node, proxy proxy.Proxy) er // The loadbalancer is only disabled when there is a local apiserver. Servers without a local // apiserver load-balance to themselves initially, then switch over to an apiserver node as soon // as we get some addresses from the code below. + var localSupervisorDefault bool + if addresses := proxy.SupervisorAddresses(); len(addresses) > 0 { + host, _, _ := net.SplitHostPort(addresses[0]) + if host == "127.0.0.1" || host == "::1" { + localSupervisorDefault = true + } + } + if proxy.IsSupervisorLBEnabled() && proxy.SupervisorURL() != "" { logrus.Info("Getting list of apiserver endpoints from server") // If not running an apiserver locally, try to get a list of apiservers from the server we're // connecting to. If that fails, fall back to querying the endpoints list from Kubernetes. This // fallback requires that the server we're joining be running an apiserver, but is the only safe // thing to do if its supervisor is down-level and can't provide us with an endpoint list. - if addresses := agentconfig.APIServers(ctx, config, proxy); len(addresses) > 0 { - proxy.SetSupervisorDefault(addresses[0]) + addresses := agentconfig.APIServers(ctx, config, proxy) + logrus.Infof("Got apiserver addresses from supervisor: %v", addresses) + + if len(addresses) > 0 { + if localSupervisorDefault { + proxy.SetSupervisorDefault(addresses[0]) + } proxy.Update(addresses) } else { if endpoint, _ := client.CoreV1().Endpoints(metav1.NamespaceDefault).Get(ctx, "kubernetes", metav1.GetOptions{}); endpoint != nil { - if addresses := util.GetAddresses(endpoint); len(addresses) > 0 { + addresses = util.GetAddresses(endpoint) + logrus.Infof("Got apiserver addresses from kubernetes endpoints: %v", addresses) + if len(addresses) > 0 { proxy.Update(addresses) } } @@ -186,7 +204,7 @@ func (a *agentTunnel) setKubeletPort(ctx context.Context, apiServerReady <-chan return false, nil } a.kubeletPort = kubeletPort - logrus.Infof("Tunnel authorizer set Kubelet Port %s", a.kubeletPort) + logrus.Infof("Tunnel authorizer set Kubelet Port %s", net.JoinHostPort(a.kubeletAddr, a.kubeletPort)) return true, nil }) } @@ -390,7 +408,7 @@ func (a *agentTunnel) authorized(ctx context.Context, proto, address string) boo logrus.Debugf("Tunnel authorizer checking dial request for %s", address) host, port, err := net.SplitHostPort(address) if err == nil { - if a.isKubeletPort(proto, host, port) { + if a.isKubeletOrStreamPort(proto, host, port) { return true } if ip := net.ParseIP(host); ip != nil { @@ -448,7 +466,7 @@ func (a *agentTunnel) connect(rootCtx context.Context, waitGroup *sync.WaitGroup go func() { for { // ConnectToProxy blocks until error or context cancellation - err := remotedialer.ConnectToProxy(ctx, wsURL, nil, auth, ws, onConnect) + err := remotedialer.ConnectToProxyWithDialer(ctx, wsURL, nil, auth, ws, a.dialContext, onConnect) connected = false if err != nil && !errors.Is(err, context.Canceled) { logrus.WithField("url", wsURL).WithError(err).Error("Remotedialer proxy error; reconnecting...") @@ -471,7 +489,21 @@ func (a *agentTunnel) connect(rootCtx context.Context, waitGroup *sync.WaitGroup } } -// isKubeletPort returns true if the connection is to a reserved TCP port on a loopback address. -func (a *agentTunnel) isKubeletPort(proto, host, port string) bool { +// isKubeletOrStreamPort returns true if the connection is to a reserved TCP port on a loopback address. +func (a *agentTunnel) isKubeletOrStreamPort(proto, host, port string) bool { return proto == "tcp" && (host == "127.0.0.1" || host == "::1") && (port == a.kubeletPort || port == daemonconfig.StreamServerPort) } + +// dialContext dials a local connection on behalf of the remote server. If the +// connection is to the kubelet port on the loopback address, the kubelet is dialed +// at its configured bind address. Otherwise, the connection is dialed normally. +func (a *agentTunnel) dialContext(ctx context.Context, network, address string) (net.Conn, error) { + host, port, err := net.SplitHostPort(address) + if err != nil { + return nil, err + } + if a.isKubeletOrStreamPort(network, host, port) && port == a.kubeletPort { + address = net.JoinHostPort(a.kubeletAddr, port) + } + return defaultDialer.DialContext(ctx, network, address) +} diff --git a/pkg/cli/cmds/etcd_snapshot.go b/pkg/cli/cmds/etcd_snapshot.go index c885031b3a42..378b394f0a83 100644 --- a/pkg/cli/cmds/etcd_snapshot.go +++ b/pkg/cli/cmds/etcd_snapshot.go @@ -21,11 +21,14 @@ var EtcdSnapshotFlags = []cli.Flag{ Destination: &AgentConfig.NodeName, }, DataDirFlag, - ServerToken, &cli.StringFlag{ - Name: "server, s", - Usage: "(cluster) Server to connect to", - EnvVar: version.ProgramUpper + "_URL", + Name: "etcd-token,t", + Usage: "(cluster) Shared secret used to authenticate to etcd server", + Destination: &ServerConfig.Token, + }, + &cli.StringFlag{ + Name: "etcd-server, s", + Usage: "(cluster) Server with etcd role to connect to for snapshot management operations", Value: "https://127.0.0.1:6443", Destination: &ServerConfig.ServerURL, }, @@ -100,6 +103,16 @@ var EtcdSnapshotFlags = []cli.Flag{ Usage: "(db) S3 folder", Destination: &ServerConfig.EtcdS3Folder, }, + &cli.StringFlag{ + Name: "s3-proxy,etcd-s3-proxy", + Usage: "(db) Proxy server to use when connecting to S3, overriding any proxy-releated environment variables", + Destination: &ServerConfig.EtcdS3Proxy, + }, + &cli.StringFlag{ + Name: "s3-config-secret,etcd-s3-config-secret", + Usage: "(db) Name of secret in the kube-system namespace used to configure S3, if etcd-s3 is enabled and no other etcd-s3 options are set", + Destination: &ServerConfig.EtcdS3ConfigSecret, + }, &cli.BoolFlag{ Name: "s3-insecure,etcd-s3-insecure", Usage: "(db) Disables S3 over HTTPS", diff --git a/pkg/cli/cmds/server.go b/pkg/cli/cmds/server.go index e179f5237de3..c7fec4f54139 100644 --- a/pkg/cli/cmds/server.go +++ b/pkg/cli/cmds/server.go @@ -104,6 +104,8 @@ type Server struct { EtcdS3BucketName string EtcdS3Region string EtcdS3Folder string + EtcdS3Proxy string + EtcdS3ConfigSecret string EtcdS3Timeout time.Duration EtcdS3Insecure bool ServiceLBNamespace string @@ -430,6 +432,16 @@ var ServerFlags = []cli.Flag{ Usage: "(db) S3 folder", Destination: &ServerConfig.EtcdS3Folder, }, + &cli.StringFlag{ + Name: "etcd-s3-proxy", + Usage: "(db) Proxy server to use when connecting to S3, overriding any proxy-releated environment variables", + Destination: &ServerConfig.EtcdS3Proxy, + }, + &cli.StringFlag{ + Name: "etcd-s3-config-secret", + Usage: "(db) Name of secret in the kube-system namespace used to configure S3, if etcd-s3 is enabled and no other etcd-s3 options are set", + Destination: &ServerConfig.EtcdS3ConfigSecret, + }, &cli.BoolFlag{ Name: "etcd-s3-insecure", Usage: "(db) Disables S3 over HTTPS", diff --git a/pkg/cli/etcdsnapshot/etcd_snapshot.go b/pkg/cli/etcdsnapshot/etcd_snapshot.go index b6e774affec8..876b0ea7dec5 100644 --- a/pkg/cli/etcdsnapshot/etcd_snapshot.go +++ b/pkg/cli/etcdsnapshot/etcd_snapshot.go @@ -16,6 +16,7 @@ import ( "github.com/k3s-io/k3s/pkg/cli/cmds" "github.com/k3s-io/k3s/pkg/clientaccess" "github.com/k3s-io/k3s/pkg/cluster/managed" + "github.com/k3s-io/k3s/pkg/daemons/config" "github.com/k3s-io/k3s/pkg/etcd" "github.com/k3s-io/k3s/pkg/proctitle" "github.com/k3s-io/k3s/pkg/server" @@ -50,17 +51,20 @@ func commandSetup(app *cli.Context, cfg *cmds.Server) (*etcd.SnapshotRequest, *c } if cfg.EtcdS3 { - sr.S3 = &etcd.SnapshotRequestS3{} - sr.S3.AccessKey = cfg.EtcdS3AccessKey - sr.S3.Bucket = cfg.EtcdS3BucketName - sr.S3.Endpoint = cfg.EtcdS3Endpoint - sr.S3.EndpointCA = cfg.EtcdS3EndpointCA - sr.S3.Folder = cfg.EtcdS3Folder - sr.S3.Insecure = cfg.EtcdS3Insecure - sr.S3.Region = cfg.EtcdS3Region - sr.S3.SecretKey = cfg.EtcdS3SecretKey - sr.S3.SkipSSLVerify = cfg.EtcdS3SkipSSLVerify - sr.S3.Timeout = metav1.Duration{Duration: cfg.EtcdS3Timeout} + sr.S3 = &config.EtcdS3{ + AccessKey: cfg.EtcdS3AccessKey, + Bucket: cfg.EtcdS3BucketName, + ConfigSecret: cfg.EtcdS3ConfigSecret, + Endpoint: cfg.EtcdS3Endpoint, + EndpointCA: cfg.EtcdS3EndpointCA, + Folder: cfg.EtcdS3Folder, + Insecure: cfg.EtcdS3Insecure, + Proxy: cfg.EtcdS3Proxy, + Region: cfg.EtcdS3Region, + SecretKey: cfg.EtcdS3SecretKey, + SkipSSLVerify: cfg.EtcdS3SkipSSLVerify, + Timeout: metav1.Duration{Duration: cfg.EtcdS3Timeout}, + } // extend request timeout to allow the S3 operation to complete timeout += cfg.EtcdS3Timeout } diff --git a/pkg/cli/server/server.go b/pkg/cli/server/server.go index 7fd735bba495..f3a26700a8a0 100644 --- a/pkg/cli/server/server.go +++ b/pkg/cli/server/server.go @@ -32,6 +32,7 @@ import ( "github.com/rancher/wrangler/pkg/signals" "github.com/sirupsen/logrus" "github.com/urfave/cli" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilnet "k8s.io/apimachinery/pkg/util/net" kubeapiserverflag "k8s.io/component-base/cli/flag" "k8s.io/kubernetes/pkg/controlplane/apiserver/options" @@ -186,17 +187,22 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont serverConfig.ControlConfig.EtcdSnapshotCron = cfg.EtcdSnapshotCron serverConfig.ControlConfig.EtcdSnapshotDir = cfg.EtcdSnapshotDir serverConfig.ControlConfig.EtcdSnapshotRetention = cfg.EtcdSnapshotRetention - serverConfig.ControlConfig.EtcdS3 = cfg.EtcdS3 - serverConfig.ControlConfig.EtcdS3Endpoint = cfg.EtcdS3Endpoint - serverConfig.ControlConfig.EtcdS3EndpointCA = cfg.EtcdS3EndpointCA - serverConfig.ControlConfig.EtcdS3SkipSSLVerify = cfg.EtcdS3SkipSSLVerify - serverConfig.ControlConfig.EtcdS3AccessKey = cfg.EtcdS3AccessKey - serverConfig.ControlConfig.EtcdS3SecretKey = cfg.EtcdS3SecretKey - serverConfig.ControlConfig.EtcdS3BucketName = cfg.EtcdS3BucketName - serverConfig.ControlConfig.EtcdS3Region = cfg.EtcdS3Region - serverConfig.ControlConfig.EtcdS3Folder = cfg.EtcdS3Folder - serverConfig.ControlConfig.EtcdS3Insecure = cfg.EtcdS3Insecure - serverConfig.ControlConfig.EtcdS3Timeout = cfg.EtcdS3Timeout + if cfg.EtcdS3 { + serverConfig.ControlConfig.EtcdS3 = &config.EtcdS3{ + AccessKey: cfg.EtcdS3AccessKey, + Bucket: cfg.EtcdS3BucketName, + ConfigSecret: cfg.EtcdS3ConfigSecret, + Endpoint: cfg.EtcdS3Endpoint, + EndpointCA: cfg.EtcdS3EndpointCA, + Folder: cfg.EtcdS3Folder, + Insecure: cfg.EtcdS3Insecure, + Proxy: cfg.EtcdS3Proxy, + Region: cfg.EtcdS3Region, + SecretKey: cfg.EtcdS3SecretKey, + SkipSSLVerify: cfg.EtcdS3SkipSSLVerify, + Timeout: metav1.Duration{Duration: cfg.EtcdS3Timeout}, + } + } } else { logrus.Info("ETCD snapshots are disabled") } diff --git a/pkg/configfilearg/defaultparser_test.go b/pkg/configfilearg/defaultparser_test.go index e43a0c154598..8ae8decc26fa 100644 --- a/pkg/configfilearg/defaultparser_test.go +++ b/pkg/configfilearg/defaultparser_test.go @@ -48,7 +48,7 @@ func Test_UnitMustParse(t *testing.T) { name: "Etcd-snapshot with config with known and unknown flags", args: []string{"k3s", "etcd-snapshot", "save"}, config: "./testdata/defaultdata.yaml", - want: []string{"k3s", "etcd-snapshot", "save", "--token=12345", "--etcd-s3=true", "--etcd-s3-bucket=my-backup"}, + want: []string{"k3s", "etcd-snapshot", "save", "--etcd-s3=true", "--etcd-s3-bucket=my-backup"}, }, { name: "Agent with known flags", diff --git a/pkg/daemons/agent/agent_linux.go b/pkg/daemons/agent/agent_linux.go index 23f7b46a6438..c0af31f78fa0 100644 --- a/pkg/daemons/agent/agent_linux.go +++ b/pkg/daemons/agent/agent_linux.go @@ -34,8 +34,7 @@ func createRootlessConfig(argsMap map[string]string, controllers map[string]bool func kubeProxyArgs(cfg *config.Agent) map[string]string { bindAddress := "127.0.0.1" - isIPv6 := utilsnet.IsIPv6(net.ParseIP([]string{cfg.NodeIP}[0])) - if isIPv6 { + if utilsnet.IsIPv6(net.ParseIP(cfg.NodeIP)) { bindAddress = "::1" } argsMap := map[string]string{ @@ -67,8 +66,7 @@ func kubeProxyArgs(cfg *config.Agent) map[string]string { func kubeletArgs(cfg *config.Agent) map[string]string { bindAddress := "127.0.0.1" - isIPv6 := utilsnet.IsIPv6(net.ParseIP([]string{cfg.NodeIP}[0])) - if isIPv6 { + if utilsnet.IsIPv6(net.ParseIP(cfg.NodeIP)) { bindAddress = "::1" } argsMap := map[string]string{ diff --git a/pkg/daemons/agent/agent_windows.go b/pkg/daemons/agent/agent_windows.go index eb020afdb8e5..11d6605b241f 100644 --- a/pkg/daemons/agent/agent_windows.go +++ b/pkg/daemons/agent/agent_windows.go @@ -4,6 +4,7 @@ package agent import ( + "net" "os" "path/filepath" "strings" @@ -11,8 +12,8 @@ import ( "github.com/k3s-io/k3s/pkg/daemons/config" "github.com/k3s-io/k3s/pkg/util" "github.com/sirupsen/logrus" - "k8s.io/apimachinery/pkg/util/net" "k8s.io/kubernetes/pkg/kubeapiserver/authorizer/modes" + utilsnet "k8s.io/utils/net" ) const ( @@ -21,8 +22,7 @@ const ( func kubeProxyArgs(cfg *config.Agent) map[string]string { bindAddress := "127.0.0.1" - _, IPv6only, _ := util.GetFirstString([]string{cfg.NodeIP}) - if IPv6only { + if utilsnet.IsIPv6(net.ParseIP(cfg.NodeIP)) { bindAddress = "::1" } argsMap := map[string]string{ @@ -98,9 +98,22 @@ func kubeletArgs(cfg *config.Agent) map[string]string { if cfg.NodeName != "" { argsMap["hostname-override"] = cfg.NodeName } - defaultIP, err := net.ChooseHostInterface() - if err != nil || defaultIP.String() != cfg.NodeIP { - argsMap["node-ip"] = cfg.NodeIP + + // If the embedded CCM is disabled, don't assume that dual-stack node IPs are safe. + // When using an external CCM, the user wants dual-stack node IPs, they will need to set the node-ip kubelet arg directly. + // This should be fine since most cloud providers have their own way of finding node IPs that doesn't depend on the kubelet + // setting them. + if cfg.DisableCCM { + dualStack, err := utilsnet.IsDualStackIPs(cfg.NodeIPs) + if err == nil && !dualStack { + argsMap["node-ip"] = cfg.NodeIP + } + } else { + // Cluster is using the embedded CCM, we know that the feature-gate will be enabled there as well. + argsMap["feature-gates"] = util.AddFeatureGate(argsMap["feature-gates"], "CloudDualStackNodeIPs=true") + if nodeIPs := util.JoinIPs(cfg.NodeIPs); nodeIPs != "" { + argsMap["node-ip"] = util.JoinIPs(cfg.NodeIPs) + } } argsMap["node-labels"] = strings.Join(cfg.NodeLabels, ",") diff --git a/pkg/daemons/config/types.go b/pkg/daemons/config/types.go index b6175b63c3a0..dc1d7221f2f0 100644 --- a/pkg/daemons/config/types.go +++ b/pkg/daemons/config/types.go @@ -8,13 +8,13 @@ import ( "sort" "strings" "sync" - "time" "github.com/k3s-io/k3s/pkg/generated/controllers/k3s.cattle.io" "github.com/k3s-io/kine/pkg/endpoint" "github.com/rancher/wharfie/pkg/registries" "github.com/rancher/wrangler/pkg/generated/controllers/core" "github.com/rancher/wrangler/pkg/leader" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilnet "k8s.io/apimachinery/pkg/util/net" "k8s.io/apiserver/pkg/authentication/authenticator" "k8s.io/client-go/tools/record" @@ -62,6 +62,21 @@ type Node struct { DefaultRuntime string } +type EtcdS3 struct { + AccessKey string `json:"accessKey,omitempty"` + Bucket string `json:"bucket,omitempty"` + ConfigSecret string `json:"configSecret,omitempty"` + Endpoint string `json:"endpoint,omitempty"` + EndpointCA string `json:"endpointCA,omitempty"` + Folder string `json:"folder,omitempty"` + Proxy string `json:"proxy,omitempty"` + Region string `json:"region,omitempty"` + SecretKey string `json:"secretKey,omitempty"` + Insecure bool `json:"insecure,omitempty"` + SkipSSLVerify bool `json:"skipSSLVerify,omitempty"` + Timeout metav1.Duration `json:"timeout,omitempty"` +} + type Containerd struct { Address string Log string @@ -216,27 +231,17 @@ type Control struct { EncryptSkip bool MinTLSVersion string CipherSuites []string - TLSMinVersion uint16 `json:"-"` - TLSCipherSuites []uint16 `json:"-"` - EtcdSnapshotName string `json:"-"` - EtcdDisableSnapshots bool `json:"-"` - EtcdExposeMetrics bool `json:"-"` - EtcdSnapshotDir string `json:"-"` - EtcdSnapshotCron string `json:"-"` - EtcdSnapshotRetention int `json:"-"` - EtcdSnapshotCompress bool `json:"-"` - EtcdListFormat string `json:"-"` - EtcdS3 bool `json:"-"` - EtcdS3Endpoint string `json:"-"` - EtcdS3EndpointCA string `json:"-"` - EtcdS3SkipSSLVerify bool `json:"-"` - EtcdS3AccessKey string `json:"-"` - EtcdS3SecretKey string `json:"-"` - EtcdS3BucketName string `json:"-"` - EtcdS3Region string `json:"-"` - EtcdS3Folder string `json:"-"` - EtcdS3Timeout time.Duration `json:"-"` - EtcdS3Insecure bool `json:"-"` + TLSMinVersion uint16 `json:"-"` + TLSCipherSuites []uint16 `json:"-"` + EtcdSnapshotName string `json:"-"` + EtcdDisableSnapshots bool `json:"-"` + EtcdExposeMetrics bool `json:"-"` + EtcdSnapshotDir string `json:"-"` + EtcdSnapshotCron string `json:"-"` + EtcdSnapshotRetention int `json:"-"` + EtcdSnapshotCompress bool `json:"-"` + EtcdListFormat string `json:"-"` + EtcdS3 *EtcdS3 `json:"-"` ServerNodeName string VLevel int VModule string diff --git a/pkg/daemons/control/tunnel.go b/pkg/daemons/control/tunnel.go index 86c685318b3f..fba58aa4f77b 100644 --- a/pkg/daemons/control/tunnel.go +++ b/pkg/daemons/control/tunnel.go @@ -3,7 +3,6 @@ package control import ( "bufio" "context" - "fmt" "io" "net" "net/http" @@ -197,7 +196,6 @@ func (t *TunnelServer) dialBackend(ctx context.Context, addr string) (net.Conn, if err != nil { return nil, err } - loopback := t.config.Loopback(true) var nodeName string var toKubelet, useTunnel bool @@ -224,14 +222,17 @@ func (t *TunnelServer) dialBackend(ctx context.Context, addr string) (net.Conn, useTunnel = true } - // Always dial kubelet via the loopback address. - if toKubelet { - addr = fmt.Sprintf("%s:%s", loopback, port) - } - // If connecting to something hosted by the local node, don't tunnel if nodeName == t.config.ServerNodeName { useTunnel = false + if toKubelet { + // Dial local kubelet at the configured bind address + addr = net.JoinHostPort(t.config.BindAddress, port) + } + } else if toKubelet { + // Dial remote kubelet via the loopback address, the remotedialer client + // will ensure that it hits the right local address. + addr = net.JoinHostPort(t.config.Loopback(false), port) } if useTunnel { diff --git a/pkg/deploy/zz_generated_bindata.go b/pkg/deploy/zz_generated_bindata.go index dc7984c584af..9eb4d80d19cd 100644 --- a/pkg/deploy/zz_generated_bindata.go +++ b/pkg/deploy/zz_generated_bindata.go @@ -132,7 +132,7 @@ func corednsYaml() (*asset, error) { return a, nil } -var _localStorageYaml = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xb4\x56\x5f\x6f\xdb\x36\x10\x7f\xd7\xa7\xb8\x69\xcb\xcb\x50\xca\xc9\x06\x2c\x03\xdf\xbc\xd8\x69\x03\x38\xb6\x61\xbb\x1d\x8a\xa2\x30\x68\xea\x6c\xb3\xa1\x48\x82\xa4\xdc\x7a\x59\xbe\xfb\x40\x52\x76\xe4\x24\x4d\x1c\x6c\xd3\x8b\xa0\xe3\xdd\xef\x8e\xf7\xbb\x3f\x62\x46\x7c\x40\xeb\x84\x56\x14\x36\x67\xd9\x8d\x50\x25\x85\x29\xda\x8d\xe0\xd8\xe5\x5c\xd7\xca\x67\x15\x7a\x56\x32\xcf\x68\x06\xa0\x58\x85\x14\xa4\xe6\x4c\x12\xc3\xfc\x9a\x18\xab\x37\x22\xd8\xa3\x25\x2e\xd9\x11\xd6\x18\x26\x75\x67\x18\x47\x0a\x37\xf5\x02\x89\xdb\x3a\x8f\x55\x46\x08\xc9\xda\x9e\xed\x82\xf1\x82\xd5\x7e\xad\xad\xf8\x8b\x79\xa1\x55\x71\xf3\xbb\x2b\x84\xee\xec\x63\xba\x90\xb5\xf3\x68\x27\x5a\xe2\xf1\x01\xd9\xa0\x6d\x6b\x89\x8e\x66\x04\x98\x11\x6f\xad\xae\x8d\xa3\xf0\x29\xcf\x3f\x67\x00\x16\x9d\xae\x2d\xc7\x28\x51\xba\x44\x97\xbf\x81\xdc\x84\xb0\x9c\x47\xe5\x37\x5a\xd6\x15\x72\xc9\x44\x15\x4f\xb8\x56\x4b\xb1\xaa\x98\x49\x7a\xba\x74\x1d\xa9\x57\x11\x6a\x83\x76\x11\x61\x56\xe8\xc3\xa1\x14\x2e\xbe\xbf\x32\xcf\xd7\xf9\xe7\x97\xdd\xa3\x2a\x8d\x16\xca\x3f\x19\xc2\xde\xdf\xa1\xaf\x9f\x8f\x02\xde\x60\x40\x3d\x30\xe4\x16\x99\xc7\x08\xfa\x74\x7c\xce\x6b\xcb\x56\xd8\xd0\xf0\x18\xb4\x39\xe7\x92\x39\x87\xee\xb8\x0c\xfc\x2b\xd2\xff\x10\xaa\x14\x6a\x75\x3c\xf7\x0b\xa1\xca\x2c\x14\xc0\x04\x97\x41\x79\x77\xbd\x67\x1c\x67\x00\x8f\x8b\xed\x98\x12\x73\xf5\xe2\x0b\x72\x1f\xab\xec\xc9\x16\xfa\xbf\x1a\x87\x19\xe3\xee\xd3\xd5\x43\x23\xf5\xb6\xc2\x57\xf4\xec\xf7\x5d\x39\x83\x9c\x46\xda\x93\xee\x3b\x11\x38\xdf\x0e\x44\x25\x3c\x85\xd3\x0c\xc0\x79\xcb\x3c\xae\xb6\x41\x0b\xc0\x6f\x0d\x52\x98\x68\x29\x85\x5a\xbd\x37\x25\xf3\x18\xe5\xb6\x2d\x49\xaa\x00\x15\xfb\xf6\x5e\xb1\x0d\x13\x92\x2d\x24\x52\x38\x0b\x70\x28\x91\x7b\x6d\x93\x4e\x15\xaa\x66\xc0\x16\x28\xdd\xce\x88\x19\xf3\xcc\x35\x3c\x56\x46\xee\x5d\xb4\xef\x1f\x1e\x79\x80\xf4\x12\x16\xc0\xee\xf6\xe1\x31\x56\x68\x2b\xfc\xf6\x22\x14\xfb\x30\x26\x33\x4f\x49\x22\x61\x66\x10\x6e\x85\x17\x9c\xc9\xbc\xd1\x77\x07\xdc\x0f\x5f\x47\x7c\x4c\xa5\x96\x68\x63\x61\xb6\x22\x06\x20\x70\x83\x5b\x0a\xf9\x45\xe3\xaf\x5b\x96\x5a\xb9\x91\x92\xdb\xbc\xa5\x05\xa0\x4d\xb0\xd6\x96\x42\xde\xff\x26\x9c\x77\xf9\x13\x20\x31\xf2\x50\xbc\x45\x20\xdd\x2a\xf4\x18\x7b\x8f\x6b\xe5\xad\x96\xc4\x48\xa6\xf0\x15\xb8\x00\xb8\x5c\x22\xf7\x14\xf2\xa1\x9e\xf2\x35\x96\xb5\xc4\xd7\x38\xae\x58\x68\xb9\xff\xca\x63\xb8\x06\x13\x0a\xed\x3e\x83\xe4\xa5\x3e\x48\x8f\xa8\xd8\x2a\x10\x7c\x72\x3b\xfd\x38\x9d\xf5\xaf\xe7\xbd\xfe\x65\xf7\xfd\x60\x36\x9f\xf4\xdf\x5e\x4d\x67\x93\x8f\x77\x27\x96\x29\xbe\x46\xdb\x79\x1a\x89\x6e\x4e\x8b\xd3\xe2\x97\xf3\xfc\x10\x72\x5c\x4b\x39\xd6\x52\xf0\x2d\x85\xab\xe5\x50\xfb\xb1\x45\x87\x7b\xca\x43\xc4\x55\xc5\x54\x79\x4f\x38\x79\x29\x54\x02\xce\x33\xeb\x5b\xdf\x84\xa4\x0d\xd5\x12\x75\xd0\xf3\x4e\x92\x36\xaf\xe2\x8b\xd3\x6a\xaf\x91\xf6\xcb\x75\xa8\x3e\xd7\xf6\x9d\x92\x95\x2c\x48\x52\x6a\xe5\xbe\x0a\xfa\x63\xe6\xd7\xf4\xc0\xc1\x5e\x03\xd5\xe6\x31\xd8\x78\xd4\x9b\x0f\xbb\xd7\xfd\xe9\xb8\x7b\xd1\x6f\x81\x6d\x98\xac\xf1\xd2\xea\x8a\x1e\xb0\xbb\x14\x28\xcb\x66\x78\x3f\x92\x27\xdf\xbb\x2e\x2f\xf6\x33\x2c\x6b\xdf\xea\x15\x17\x4a\xf2\x6b\x66\x0e\xbd\x3d\x2a\x99\x26\xbf\x0f\xe7\xf0\xe1\xba\xbc\x9f\xc8\xd3\x24\x8f\x93\xe3\xd9\x99\x1c\x16\x94\x52\xda\xb7\xbb\xbe\xc4\x25\xab\xa5\xff\x10\x63\x9d\xc5\xf1\x9a\x47\x8b\x54\x5a\xed\x15\xfc\xa0\x97\x84\x23\x8d\x31\x89\xc7\x14\x72\x6f\x6b\xcc\xb3\x76\x9d\x42\x53\xc7\xc1\xa0\x15\x48\x4a\x4d\xb3\x6e\xaf\x75\x89\x14\xfe\x64\xc2\x5f\x6a\x7b\x29\xac\xf3\x17\x5a\xb9\xba\x42\x9b\xd9\xf4\x5f\xb4\xab\xe9\x1e\x4a\xf4\x18\x13\xd3\xec\xd0\x5d\x46\xb3\x07\xff\x98\xcf\xae\xa6\x7d\xfd\x7e\x67\x2b\xed\x0c\x5b\xa5\x4c\xe1\x6f\x12\x13\x72\xdb\x50\x17\x47\x4c\x28\x90\x6b\x66\x72\xfa\xa9\x91\xde\xee\x89\x8d\xe7\x39\xcd\x77\x9d\x3d\xee\xce\xde\xcd\x2f\x47\x93\xf9\x70\x34\x9c\x0f\xae\xa6\xb3\x7e\x6f\x3e\x1c\xf5\xfa\xd3\xfc\xcd\xbd\x4d\x88\xce\xe5\xf4\x53\x7e\x72\xbb\xb3\x1b\x8c\x2e\xba\x83\xf9\x74\x36\x9a\x74\xdf\xf6\x23\xca\xdd\x49\xfc\x13\x0a\xcf\x5d\xf3\x4e\xdf\x77\x71\xbf\xf9\xf0\xf7\xd1\x04\xfb\xe3\x0f\x9d\x85\x50\x1d\xb7\x4e\x5c\xa2\x07\x82\x75\x5a\x5d\x37\xa5\xb0\x40\x2a\x38\x3d\x3f\x3f\x07\x62\x20\xff\xe9\xf6\xc3\x68\x30\xef\x5d\x4d\xee\x12\xf3\x7c\x5d\xe9\x12\xce\x4f\x4f\xdb\x47\x9d\xa2\xc8\xe3\x1a\x64\xb6\xd4\x5f\xd5\x11\x8e\x6c\x05\xc4\x2e\x1f\xc2\xaf\x51\x1a\xb4\x63\x5d\x16\x5b\x56\xc9\x3d\xcc\x03\x12\x83\x28\xf1\x3c\xd6\xe5\x93\x1b\x37\x51\x9b\xd0\x88\x69\x94\xda\x6b\xf5\xfb\x23\xfa\x81\x11\xbc\x6e\x2c\x57\xc2\x5a\x6d\xb1\x24\x52\x2c\x2c\xb3\x5b\xb2\xa8\xdd\x76\xa1\xbf\xd1\xb3\xe2\xd7\xdf\x8a\xb3\x63\xe7\xf2\x3f\x01\x00\x00\xff\xff\x68\x8a\xdf\xd2\x1a\x0d\x00\x00") +var _localStorageYaml = []byte("\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xb4\x56\xdf\x6f\xdb\xb6\x13\x7f\xd7\x5f\x71\x5f\x7d\x97\x97\xa1\x94\x93\x0d\x58\x0a\xbe\x79\xb1\xd3\x06\x70\x6c\xc3\x76\x3b\x14\x45\x61\xd0\xd4\xd9\x66\x43\x91\x04\x49\xb9\xf5\xb2\xfc\xef\x03\x49\xd9\x91\x93\x34\x71\xb0\x4d\x2f\x82\x8e\x77\x9f\x3b\xde\xe7\x7e\x88\x19\xf1\x11\xad\x13\x5a\x51\xd8\x9c\x65\x37\x42\x95\x14\xa6\x68\x37\x82\x63\x97\x73\x5d\x2b\x9f\x55\xe8\x59\xc9\x3c\xa3\x19\x80\x62\x15\x52\x90\x9a\x33\x49\x0c\xf3\x6b\x62\xac\xde\x88\x60\x8f\x96\xb8\x64\x47\x58\x63\x98\xd4\x9d\x61\x1c\x29\xdc\xd4\x0b\x24\x6e\xeb\x3c\x56\x19\x21\x24\x6b\x7b\xb6\x0b\xc6\x0b\x56\xfb\xb5\xb6\xe2\x4f\xe6\x85\x56\xc5\xcd\x5b\x57\x08\xdd\xd9\xc7\x74\x21\x6b\xe7\xd1\x4e\xb4\xc4\xe3\x03\xb2\x41\xdb\xd6\x12\x1d\xcd\x08\x30\x23\xde\x59\x5d\x1b\x47\xe1\x73\x9e\x7f\xc9\x00\x2c\x3a\x5d\x5b\x8e\x51\xa2\x74\x89\x2e\x7f\x03\xb9\x09\x61\x39\x8f\xca\x6f\xb4\xac\x2b\xe4\x92\x89\x2a\x9e\x70\xad\x96\x62\x55\x31\x93\xf4\x74\xe9\x3a\x52\xaf\x22\xd4\x06\xed\x22\xc2\xac\xd0\x87\x43\x29\x5c\x7c\x7f\x63\x9e\xaf\xf3\x2f\x2f\xbb\x47\x55\x1a\x2d\x94\x7f\x32\x84\xbd\xbf\x43\x5f\x3f\x1f\x05\xbc\xc1\x80\x7a\x60\xc8\x2d\x32\x8f\x11\xf4\xe9\xf8\x9c\xd7\x96\xad\xb0\xa1\xe1\x31\x68\x73\xce\x25\x73\x0e\xdd\x71\x19\xf8\x47\xa4\xff\x2e\x54\x29\xd4\xea\x78\xee\x17\x42\x95\x59\x28\x80\x09\x2e\x83\xf2\xee\x7a\xcf\x38\xce\x00\x1e\x17\xdb\x31\x25\xe6\xea\xc5\x57\xe4\x3e\x56\xd9\x93\x2d\xf4\x5f\x35\x0e\x33\xc6\xdd\xa7\xab\x87\x46\xea\x6d\x85\xaf\xe8\xd9\x1f\xbb\x72\x06\x39\x8d\xb4\x27\xdd\xf7\x22\x70\xbe\x1d\x88\x4a\x78\x0a\xa7\x19\x80\xf3\x96\x79\x5c\x6d\x83\x16\x80\xdf\x1a\xa4\x30\xd1\x52\x0a\xb5\xfa\x60\x4a\xe6\x31\xca\x6d\x5b\x92\x54\x01\x2a\xf6\xfd\x83\x62\x1b\x26\x24\x5b\x48\xa4\x70\x16\xe0\x50\x22\xf7\xda\x26\x9d\x2a\x54\xcd\x80\x2d\x50\xba\x9d\x11\x33\xe6\x99\x6b\x78\xac\x8c\xdc\xbb\x68\xdf\x3f\x3c\xf2\x00\xe9\x25\x2c\x80\xdd\xed\xc3\x63\xac\xd0\x56\xf8\xed\x45\x28\xf6\x61\x4c\x66\x9e\x92\x44\xc2\xcc\x20\xdc\x0a\x2f\x38\x93\x79\xa3\xef\x0e\xb8\x1f\xbe\x8e\xf8\x98\x4a\x2d\xd1\xc6\xc2\x6c\x45\x0c\x40\xe0\x06\xb7\x14\xf2\x8b\xc6\x5f\xb7\x2c\xb5\x72\x23\x25\xb7\x79\x4b\x0b\x40\x9b\x60\xad\x2d\x85\xbc\xff\x5d\x38\xef\xf2\x27\x40\x62\xe4\xa1\x78\x8b\x40\xba\x55\xe8\x31\xf6\x1e\xd7\xca\x5b\x2d\x89\x91\x4c\xe1\x2b\x70\x01\x70\xb9\x44\xee\x29\xe4\x43\x3d\xe5\x6b\x2c\x6b\x89\xaf\x71\x5c\xb1\xd0\x72\xff\x96\xc7\x70\x0d\x26\x14\xda\x7d\x06\xc9\x4b\x7d\x90\x1e\x51\xb1\x55\x20\xf8\xe4\x76\xfa\x69\x3a\xeb\x5f\xcf\x7b\xfd\xcb\xee\x87\xc1\x6c\x3e\xe9\xbf\xbb\x9a\xce\x26\x9f\xee\x4e\x2c\x53\x7c\x8d\xb6\xf3\x34\x12\xdd\x9c\x16\xa7\xc5\x2f\x6f\xf3\x43\xc8\x71\x2d\xe5\x58\x4b\xc1\xb7\x14\xae\x96\x43\xed\xc7\x16\x1d\xee\x29\x0f\x11\x57\x15\x53\xe5\x3d\xe1\xe4\xa5\x50\x09\x38\xcf\xac\x6f\x7d\x13\x92\x36\x54\x4b\xd4\x41\xcf\x3b\x49\xda\xbc\x8a\xaf\x4e\xab\xbd\x46\xda\x2f\xd7\xa1\xfa\x5c\xdb\x77\x4a\x56\xb2\x20\x49\xa9\x95\xfb\x2a\xe8\x8f\x99\x5f\xd3\x03\x07\x7b\x0d\x54\x9b\xc7\x60\xe3\x51\x6f\x3e\xec\x5e\xf7\xa7\xe3\xee\x45\xbf\x05\xb6\x61\xb2\xc6\x4b\xab\x2b\x7a\xc0\xee\x52\xa0\x2c\x9b\xe1\xfd\x48\x9e\x7c\xef\xba\xbc\xd8\xcf\xb0\xac\x7d\xab\x57\x5c\x28\xc9\xaf\x99\x39\xf4\xf6\xa8\x64\x9a\xfc\x3e\x9c\xc3\x87\xeb\xf2\x7e\x22\x4f\x93\x3c\x4e\x8e\x67\x67\x72\x58\x50\x4a\x69\xdf\xee\xfa\x12\x97\xac\x96\xfe\x63\x8c\x75\x16\xc7\x6b\x1e\x2d\x52\x69\xb5\x57\xf0\x83\x5e\x12\x8e\x34\xc6\x24\x1e\x53\xc8\xbd\xad\x31\xcf\xda\x75\x0a\x4d\x1d\x07\x83\x56\x20\x29\x35\xcd\xba\xbd\xd6\x25\x52\xf8\x83\x09\x7f\xa9\xed\xa5\xb0\xce\x5f\x68\xe5\xea\x0a\x6d\x66\xd3\x7f\xd1\xae\xa6\x7b\x28\xd1\x63\x4c\x4c\xb3\x43\x77\x19\xcd\x1e\xfc\x63\x3e\xbb\x9a\xf6\xf5\xfb\x83\xad\xb4\x33\x6c\x95\x32\x85\xbf\x48\x4c\xc8\x6d\x43\x5d\x1c\x31\xa1\x40\xae\x99\xc9\xe9\xe7\x46\x7a\xbb\x27\x36\x9e\xe7\x34\xdf\x75\xf6\xb8\x3b\x7b\x3f\xbf\x1c\x4d\xe6\xc3\xd1\x70\x3e\xb8\x9a\xce\xfa\xbd\xf9\x70\xd4\xeb\x4f\xf3\x37\xf7\x36\x21\x3a\x97\xd3\xcf\xf9\xc9\xed\xce\x6e\x30\xba\xe8\x0e\xe6\xd3\xd9\x68\xd2\x7d\xd7\x8f\x28\x77\x27\xf1\x4f\x28\x3c\x77\xcd\x3b\x7d\xdf\xc5\xfd\xe6\xc3\xdf\x47\x13\xec\xff\xff\xd7\x59\x08\xd5\x71\xeb\xc4\x25\x7a\x20\x58\xa7\xd5\x75\x53\x0a\x0b\xa4\x82\xd3\xf3\xf3\x73\x20\x06\xf2\x9f\x6e\x3f\x8e\x06\xf3\xde\xd5\xe4\x2e\x31\xcf\xd7\x95\x2e\xe1\xfc\xf4\xb4\x7d\xd4\x29\x8a\x3c\xae\x41\x66\x4b\xfd\x4d\x1d\xe1\xc8\x56\x40\xec\xf2\x21\xfc\x1a\xa5\x41\x3b\xd6\x65\xb1\x65\x95\xdc\xc3\x3c\x20\x31\x88\x12\xcf\x63\x5d\x3e\xb9\x71\x13\xb5\x09\x8d\x98\x46\xa9\xbd\x56\x7f\x3c\xa2\x1f\x18\xc1\xeb\xc6\x72\x25\xac\xd5\x16\x4b\x22\xc5\xc2\x32\xbb\x25\x8b\xda\x6d\x17\xfa\x3b\x3d\x2b\x7e\xfd\xad\x38\x3b\x76\x2e\xff\x1d\x00\x00\xff\xff\xf6\x4c\xc2\x69\x1a\x0d\x00\x00") func localStorageYamlBytes() ([]byte, error) { return bindataRead( diff --git a/pkg/etcd/etcd.go b/pkg/etcd/etcd.go index e3907e696e1f..b4b58ec7a3fa 100644 --- a/pkg/etcd/etcd.go +++ b/pkg/etcd/etcd.go @@ -12,7 +12,6 @@ import ( "net/url" "os" "path/filepath" - "regexp" "sort" "strconv" "strings" @@ -26,6 +25,8 @@ import ( "github.com/k3s-io/k3s/pkg/daemons/config" "github.com/k3s-io/k3s/pkg/daemons/control/deps" "github.com/k3s-io/k3s/pkg/daemons/executor" + "github.com/k3s-io/k3s/pkg/etcd/s3" + "github.com/k3s-io/k3s/pkg/etcd/snapshot" "github.com/k3s-io/k3s/pkg/server/auth" "github.com/k3s-io/k3s/pkg/util" "github.com/k3s-io/k3s/pkg/version" @@ -40,10 +41,8 @@ import ( "github.com/sirupsen/logrus" "go.etcd.io/etcd/api/v3/etcdserverpb" "go.etcd.io/etcd/api/v3/v3rpc/rpctypes" - "go.etcd.io/etcd/client/pkg/v3/logutil" clientv3 "go.etcd.io/etcd/client/v3" - "go.etcd.io/etcd/etcdutl/v3/snapshot" - "go.uber.org/zap" + snapshotv3 "go.etcd.io/etcd/etcdutl/v3/snapshot" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" @@ -93,8 +92,6 @@ var ( ErrAddressNotSet = errors.New("apiserver addresses not yet set") ErrNotMember = errNotMember() ErrMemberListFailed = errMemberListFailed() - - invalidKeyChars = regexp.MustCompile(`[^-._a-zA-Z0-9]`) ) type NodeControllerGetter func() controllerv1.NodeController @@ -110,8 +107,8 @@ type ETCD struct { name string address string cron *cron.Cron - s3 *S3 cancel context.CancelFunc + s3 *s3.Controller snapshotMu *sync.Mutex } @@ -128,16 +125,16 @@ type Members struct { Members []*etcdserverpb.Member `json:"members"` } -type MembershipError struct { - Self string - Members []string +type membershipError struct { + self string + members []string } -func (e *MembershipError) Error() string { - return fmt.Sprintf("this server is a not a member of the etcd cluster. Found %v, expect: %s", e.Members, e.Self) +func (e *membershipError) Error() string { + return fmt.Sprintf("this server is a not a member of the etcd cluster. Found %v, expect: %s", e.members, e.self) } -func (e *MembershipError) Is(target error) bool { +func (e *membershipError) Is(target error) bool { switch target { case ErrNotMember: return true @@ -145,17 +142,17 @@ func (e *MembershipError) Is(target error) bool { return false } -func errNotMember() error { return &MembershipError{} } +func errNotMember() error { return &membershipError{} } -type MemberListError struct { - Err error +type memberListError struct { + err error } -func (e *MemberListError) Error() string { - return fmt.Sprintf("failed to get MemberList from server: %v", e.Err) +func (e *memberListError) Error() string { + return fmt.Sprintf("failed to get MemberList from server: %v", e.err) } -func (e *MemberListError) Is(target error) bool { +func (e *memberListError) Is(target error) bool { switch target { case ErrMemberListFailed: return true @@ -163,7 +160,7 @@ func (e *MemberListError) Is(target error) bool { return false } -func errMemberListFailed() error { return &MemberListError{} } +func errMemberListFailed() error { return &memberListError{} } // NewETCD creates a new value of type // ETCD with initialized cron and snapshot mutex values. @@ -256,7 +253,7 @@ func (e *ETCD) Test(ctx context.Context) error { memberNameUrls = append(memberNameUrls, member.Name+"="+member.PeerURLs[0]) } } - return &MembershipError{Members: memberNameUrls, Self: e.name + "=" + e.peerURL()} + return &membershipError{members: memberNameUrls, self: e.name + "=" + e.peerURL()} } // dbDir returns the path to dataDir/db/etcd @@ -391,14 +388,25 @@ func (e *ETCD) Reset(ctx context.Context, rebootstrap func() error) error { // If asked to restore from a snapshot, do so if e.config.ClusterResetRestorePath != "" { - if e.config.EtcdS3 { + if e.config.EtcdS3 != nil { logrus.Infof("Retrieving etcd snapshot %s from S3", e.config.ClusterResetRestorePath) - if err := e.initS3IfNil(ctx); err != nil { - return err + s3client, err := e.getS3Client(ctx) + if err != nil { + if errors.Is(err, s3.ErrNoConfigSecret) { + return errors.New("cannot use S3 config secret when restoring snapshot; configuration must be set in CLI or config file") + } else { + return errors.Wrap(err, "failed to initialize S3 client") + } } - if err := e.s3.Download(ctx); err != nil { - return err + dir, err := snapshotDir(e.config, true) + if err != nil { + return errors.Wrap(err, "failed to get the snapshot dir") } + path, err := s3client.Download(ctx, e.config.ClusterResetRestorePath, dir) + if err != nil { + return errors.Wrap(err, "failed to download snapshot from S3") + } + e.config.ClusterResetRestorePath = path logrus.Infof("S3 download complete for %s", e.config.ClusterResetRestorePath) } @@ -442,6 +450,7 @@ func (e *ETCD) Start(ctx context.Context, clientAccessInfo *clientaccess.Info) e } go e.manageLearners(ctx) + go e.getS3Client(ctx) if isInitialized { // check etcd dir permission @@ -1416,7 +1425,7 @@ func ClientURLs(ctx context.Context, clientAccessInfo *clientaccess.Info, selfIP // get the full list from the server we're joining resp, err := clientAccessInfo.Get("/db/info") if err != nil { - return nil, memberList, &MemberListError{Err: err} + return nil, memberList, &memberListError{err: err} } if err := json.Unmarshal(resp, &memberList); err != nil { return nil, memberList, err @@ -1463,13 +1472,13 @@ func (e *ETCD) Restore(ctx context.Context) error { } var restorePath string - if strings.HasSuffix(e.config.ClusterResetRestorePath, compressedExtension) { - snapshotDir, err := snapshotDir(e.config, true) + if strings.HasSuffix(e.config.ClusterResetRestorePath, snapshot.CompressedExtension) { + dir, err := snapshotDir(e.config, true) if err != nil { return errors.Wrap(err, "failed to get the snapshot dir") } - decompressSnapshot, err := e.decompressSnapshot(snapshotDir, e.config.ClusterResetRestorePath) + decompressSnapshot, err := e.decompressSnapshot(dir, e.config.ClusterResetRestorePath) if err != nil { return err } @@ -1485,13 +1494,7 @@ func (e *ETCD) Restore(ctx context.Context) error { } logrus.Infof("Pre-restore etcd database moved to %s", oldDataDir) - - lg, err := logutil.CreateDefaultZapLogger(zap.InfoLevel) - if err != nil { - return err - } - - return snapshot.NewV3(lg).Restore(snapshot.RestoreConfig{ + return snapshotv3.NewV3(e.client.GetLogger()).Restore(snapshotv3.RestoreConfig{ SnapshotPath: restorePath, Name: e.name, OutputDataDir: dbDir(e.config), diff --git a/pkg/etcd/etcd_test.go b/pkg/etcd/etcd_test.go index a28dee46e113..5a519bdcffe4 100644 --- a/pkg/etcd/etcd_test.go +++ b/pkg/etcd/etcd_test.go @@ -11,8 +11,10 @@ import ( "github.com/k3s-io/k3s/pkg/clientaccess" "github.com/k3s-io/k3s/pkg/daemons/config" + "github.com/k3s-io/k3s/pkg/etcd/s3" testutil "github.com/k3s-io/k3s/tests" "github.com/robfig/cron/v3" + "github.com/sirupsen/logrus" clientv3 "go.etcd.io/etcd/client/v3" "go.etcd.io/etcd/server/v3/etcdserver" utilnet "k8s.io/apimachinery/pkg/util/net" @@ -47,10 +49,12 @@ func generateTestConfig() *config.Control { EtcdSnapshotName: "etcd-snapshot", EtcdSnapshotCron: "0 */12 * * *", EtcdSnapshotRetention: 5, - EtcdS3Endpoint: "s3.amazonaws.com", - EtcdS3Region: "us-east-1", - SANs: []string{"127.0.0.1", mustGetAddress()}, - CriticalControlArgs: criticalControlArgs, + EtcdS3: &config.EtcdS3{ + Endpoint: "s3.amazonaws.com", + Region: "us-east-1", + }, + SANs: []string{"127.0.0.1", mustGetAddress()}, + CriticalControlArgs: criticalControlArgs, } } @@ -112,6 +116,10 @@ func Test_UnitETCD_IsInitialized(t *testing.T) { want: false, }, } + + // enable logging + logrus.SetLevel(logrus.DebugLevel) + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { e := NewETCD() @@ -227,7 +235,7 @@ func Test_UnitETCD_Start(t *testing.T) { name string address string cron *cron.Cron - s3 *S3 + s3 *s3.Controller } type args struct { clientAccessInfo *clientaccess.Info diff --git a/pkg/etcd/s3.go b/pkg/etcd/s3.go deleted file mode 100644 index 52671e5967d4..000000000000 --- a/pkg/etcd/s3.go +++ /dev/null @@ -1,494 +0,0 @@ -package etcd - -import ( - "context" - "crypto/tls" - "crypto/x509" - "encoding/base64" - "encoding/pem" - "fmt" - "io/ioutil" - "net/http" - "net/textproto" - "os" - "path" - "path/filepath" - "sort" - "strconv" - "strings" - "time" - - "github.com/k3s-io/k3s/pkg/daemons/config" - "github.com/k3s-io/k3s/pkg/util" - "github.com/k3s-io/k3s/pkg/version" - "github.com/minio/minio-go/v7" - "github.com/minio/minio-go/v7/pkg/credentials" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/wait" -) - -var ( - clusterIDKey = textproto.CanonicalMIMEHeaderKey(version.Program + "-cluster-id") - tokenHashKey = textproto.CanonicalMIMEHeaderKey(version.Program + "-token-hash") - nodeNameKey = textproto.CanonicalMIMEHeaderKey(version.Program + "-node-name") -) - -// S3 maintains state for S3 functionality. -type S3 struct { - config *config.Control - client *minio.Client - clusterID string - tokenHash string - nodeName string -} - -// newS3 creates a new value of type s3 pointer with a -// copy of the config.Control pointer and initializes -// a new Minio client. -func NewS3(ctx context.Context, config *config.Control) (*S3, error) { - if config.EtcdS3BucketName == "" { - return nil, errors.New("s3 bucket name was not set") - } - tr := http.DefaultTransport - - switch { - case config.EtcdS3EndpointCA != "": - trCA, err := setTransportCA(tr, config.EtcdS3EndpointCA, config.EtcdS3SkipSSLVerify) - if err != nil { - return nil, err - } - tr = trCA - case config.EtcdS3 && config.EtcdS3SkipSSLVerify: - tr.(*http.Transport).TLSClientConfig = &tls.Config{ - InsecureSkipVerify: config.EtcdS3SkipSSLVerify, - } - } - - var creds *credentials.Credentials - if len(config.EtcdS3AccessKey) == 0 && len(config.EtcdS3SecretKey) == 0 { - creds = credentials.NewIAM("") // for running on ec2 instance - } else { - creds = credentials.NewStaticV4(config.EtcdS3AccessKey, config.EtcdS3SecretKey, "") - } - - opt := minio.Options{ - Creds: creds, - Secure: !config.EtcdS3Insecure, - Region: config.EtcdS3Region, - Transport: tr, - BucketLookup: bucketLookupType(config.EtcdS3Endpoint), - } - c, err := minio.New(config.EtcdS3Endpoint, &opt) - if err != nil { - return nil, err - } - - logrus.Infof("Checking if S3 bucket %s exists", config.EtcdS3BucketName) - - ctx, cancel := context.WithTimeout(ctx, config.EtcdS3Timeout) - defer cancel() - - exists, err := c.BucketExists(ctx, config.EtcdS3BucketName) - if err != nil { - return nil, errors.Wrapf(err, "failed to test for existence of bucket %s", config.EtcdS3BucketName) - } - if !exists { - return nil, fmt.Errorf("bucket %s does not exist", config.EtcdS3BucketName) - } - logrus.Infof("S3 bucket %s exists", config.EtcdS3BucketName) - - s3 := &S3{ - config: config, - client: c, - nodeName: os.Getenv("NODE_NAME"), - } - - if config.ClusterReset { - logrus.Debug("Skip setting S3 snapshot cluster ID and token during cluster-reset") - } else { - if err := wait.PollImmediateUntilWithContext(ctx, time.Second, func(ctx context.Context) (bool, error) { - if config.Runtime.Core == nil { - return false, nil - } - - // cluster id hack: see https://groups.google.com/forum/#!msg/kubernetes-sig-architecture/mVGobfD4TpY/nkdbkX1iBwAJ - ns, err := config.Runtime.Core.Core().V1().Namespace().Get(metav1.NamespaceSystem, metav1.GetOptions{}) - if err != nil { - return false, errors.Wrap(err, "failed to set S3 snapshot cluster ID") - } - s3.clusterID = string(ns.UID) - - tokenHash, err := util.GetTokenHash(config) - if err != nil { - return false, errors.Wrap(err, "failed to set S3 snapshot server token hash") - } - s3.tokenHash = tokenHash - - return true, nil - }); err != nil { - return nil, err - } - } - - return s3, nil -} - -// upload uploads the given snapshot to the configured S3 -// compatible backend. -func (s *S3) upload(ctx context.Context, snapshot string, extraMetadata *v1.ConfigMap, now time.Time) (*snapshotFile, error) { - basename := filepath.Base(snapshot) - metadata := filepath.Join(filepath.Dir(snapshot), "..", metadataDir, basename) - snapshotKey := path.Join(s.config.EtcdS3Folder, basename) - metadataKey := path.Join(s.config.EtcdS3Folder, metadataDir, basename) - - sf := &snapshotFile{ - Name: basename, - Location: fmt.Sprintf("s3://%s/%s", s.config.EtcdS3BucketName, snapshotKey), - NodeName: "s3", - CreatedAt: &metav1.Time{ - Time: now, - }, - S3: &s3Config{ - Endpoint: s.config.EtcdS3Endpoint, - EndpointCA: s.config.EtcdS3EndpointCA, - SkipSSLVerify: s.config.EtcdS3SkipSSLVerify, - Bucket: s.config.EtcdS3BucketName, - Region: s.config.EtcdS3Region, - Folder: s.config.EtcdS3Folder, - Insecure: s.config.EtcdS3Insecure, - }, - Compressed: strings.HasSuffix(snapshot, compressedExtension), - metadataSource: extraMetadata, - nodeSource: s.nodeName, - } - - logrus.Infof("Uploading snapshot to s3://%s/%s", s.config.EtcdS3BucketName, snapshotKey) - uploadInfo, err := s.uploadSnapshot(ctx, snapshotKey, snapshot) - if err != nil { - sf.Status = failedSnapshotStatus - sf.Message = base64.StdEncoding.EncodeToString([]byte(err.Error())) - } else { - sf.Status = successfulSnapshotStatus - sf.Size = uploadInfo.Size - sf.tokenHash = s.tokenHash - } - if _, err := s.uploadSnapshotMetadata(ctx, metadataKey, metadata); err != nil { - logrus.Warnf("Failed to upload snapshot metadata to S3: %v", err) - } else { - logrus.Infof("Uploaded snapshot metadata s3://%s/%s", s.config.EtcdS3BucketName, metadataKey) - } - return sf, err -} - -// uploadSnapshot uploads the snapshot file to S3 using the minio API. -func (s *S3) uploadSnapshot(ctx context.Context, key, path string) (info minio.UploadInfo, err error) { - opts := minio.PutObjectOptions{ - NumThreads: 2, - UserMetadata: map[string]string{ - clusterIDKey: s.clusterID, - nodeNameKey: s.nodeName, - tokenHashKey: s.tokenHash, - }, - } - if strings.HasSuffix(key, compressedExtension) { - opts.ContentType = "application/zip" - } else { - opts.ContentType = "application/octet-stream" - } - ctx, cancel := context.WithTimeout(ctx, s.config.EtcdS3Timeout) - defer cancel() - - return s.client.FPutObject(ctx, s.config.EtcdS3BucketName, key, path, opts) -} - -// uploadSnapshotMetadata marshals and uploads the snapshot metadata to S3 using the minio API. -// The upload is silently skipped if no extra metadata is provided. -func (s *S3) uploadSnapshotMetadata(ctx context.Context, key, path string) (info minio.UploadInfo, err error) { - if _, err := os.Stat(path); err != nil { - if os.IsNotExist(err) { - return minio.UploadInfo{}, nil - } - return minio.UploadInfo{}, err - } - - opts := minio.PutObjectOptions{ - NumThreads: 2, - ContentType: "application/json", - UserMetadata: map[string]string{ - clusterIDKey: s.clusterID, - nodeNameKey: s.nodeName, - }, - } - ctx, cancel := context.WithTimeout(ctx, s.config.EtcdS3Timeout) - defer cancel() - return s.client.FPutObject(ctx, s.config.EtcdS3BucketName, key, path, opts) -} - -// Download downloads the given snapshot from the configured S3 -// compatible backend. -func (s *S3) Download(ctx context.Context) error { - snapshotKey := path.Join(s.config.EtcdS3Folder, s.config.ClusterResetRestorePath) - metadataKey := path.Join(s.config.EtcdS3Folder, metadataDir, s.config.ClusterResetRestorePath) - snapshotDir, err := snapshotDir(s.config, true) - if err != nil { - return errors.Wrap(err, "failed to get the snapshot dir") - } - snapshotFile := filepath.Join(snapshotDir, s.config.ClusterResetRestorePath) - metadataFile := filepath.Join(snapshotDir, "..", metadataDir, s.config.ClusterResetRestorePath) - - if err := s.downloadSnapshot(ctx, snapshotKey, snapshotFile); err != nil { - return err - } - if err := s.downloadSnapshotMetadata(ctx, metadataKey, metadataFile); err != nil { - return err - } - - s.config.ClusterResetRestorePath = snapshotFile - return nil -} - -// downloadSnapshot downloads the snapshot file from S3 using the minio API. -func (s *S3) downloadSnapshot(ctx context.Context, key, file string) error { - logrus.Debugf("Downloading snapshot from s3://%s/%s", s.config.EtcdS3BucketName, key) - ctx, cancel := context.WithTimeout(ctx, s.config.EtcdS3Timeout) - defer cancel() - defer os.Chmod(file, 0600) - return s.client.FGetObject(ctx, s.config.EtcdS3BucketName, key, file, minio.GetObjectOptions{}) -} - -// downloadSnapshotMetadata downloads the snapshot metadata file from S3 using the minio API. -// No error is returned if the metadata file does not exist, as it is optional. -func (s *S3) downloadSnapshotMetadata(ctx context.Context, key, file string) error { - logrus.Debugf("Downloading snapshot metadata from s3://%s/%s", s.config.EtcdS3BucketName, key) - ctx, cancel := context.WithTimeout(ctx, s.config.EtcdS3Timeout) - defer cancel() - defer os.Chmod(file, 0600) - err := s.client.FGetObject(ctx, s.config.EtcdS3BucketName, key, file, minio.GetObjectOptions{}) - if resp := minio.ToErrorResponse(err); resp.StatusCode == http.StatusNotFound { - return nil - } - return err -} - -// snapshotPrefix returns the prefix used in the -// naming of the snapshots. -func (s *S3) snapshotPrefix() string { - return path.Join(s.config.EtcdS3Folder, s.config.EtcdSnapshotName) -} - -// snapshotRetention prunes snapshots in the configured S3 compatible backend for this specific node. -// Returns a list of pruned snapshot names. -func (s *S3) snapshotRetention(ctx context.Context) ([]string, error) { - if s.config.EtcdSnapshotRetention < 1 { - return nil, nil - } - logrus.Infof("Applying snapshot retention=%d to snapshots stored in s3://%s/%s", s.config.EtcdSnapshotRetention, s.config.EtcdS3BucketName, s.snapshotPrefix()) - - var snapshotFiles []minio.ObjectInfo - - toCtx, cancel := context.WithTimeout(ctx, s.config.EtcdS3Timeout) - defer cancel() - - opts := minio.ListObjectsOptions{ - Prefix: s.snapshotPrefix(), - Recursive: true, - } - for info := range s.client.ListObjects(toCtx, s.config.EtcdS3BucketName, opts) { - if info.Err != nil { - return nil, info.Err - } - - // skip metadata - if path.Base(path.Dir(info.Key)) == metadataDir { - continue - } - - snapshotFiles = append(snapshotFiles, info) - } - - if len(snapshotFiles) <= s.config.EtcdSnapshotRetention { - return nil, nil - } - - // sort newest-first so we can prune entries past the retention count - sort.Slice(snapshotFiles, func(i, j int) bool { - return snapshotFiles[j].LastModified.Before(snapshotFiles[i].LastModified) - }) - - deleted := []string{} - for _, df := range snapshotFiles[s.config.EtcdSnapshotRetention:] { - logrus.Infof("Removing S3 snapshot: s3://%s/%s", s.config.EtcdS3BucketName, df.Key) - - key := path.Base(df.Key) - if err := s.deleteSnapshot(ctx, key); err != nil { - return deleted, err - } - deleted = append(deleted, key) - } - - return deleted, nil -} - -func (s *S3) deleteSnapshot(ctx context.Context, key string) error { - ctx, cancel := context.WithTimeout(ctx, s.config.EtcdS3Timeout) - defer cancel() - - key = path.Join(s.config.EtcdS3Folder, key) - err := s.client.RemoveObject(ctx, s.config.EtcdS3BucketName, key, minio.RemoveObjectOptions{}) - if err == nil || isNotExist(err) { - metadataKey := path.Join(path.Dir(key), metadataDir, path.Base(key)) - if merr := s.client.RemoveObject(ctx, s.config.EtcdS3BucketName, metadataKey, minio.RemoveObjectOptions{}); merr != nil && !isNotExist(merr) { - err = merr - } - } - - return err -} - -// listSnapshots provides a list of currently stored -// snapshots in S3 along with their relevant -// metadata. -func (s *S3) listSnapshots(ctx context.Context) (map[string]snapshotFile, error) { - snapshots := map[string]snapshotFile{} - metadatas := []string{} - ctx, cancel := context.WithTimeout(ctx, s.config.EtcdS3Timeout) - defer cancel() - - opts := minio.ListObjectsOptions{ - Prefix: s.config.EtcdS3Folder, - Recursive: true, - } - - objects := s.client.ListObjects(ctx, s.config.EtcdS3BucketName, opts) - - for obj := range objects { - if obj.Err != nil { - return nil, obj.Err - } - if obj.Size == 0 { - continue - } - - if o, err := s.client.StatObject(ctx, s.config.EtcdS3BucketName, obj.Key, minio.StatObjectOptions{}); err != nil { - logrus.Warnf("Failed to get object metadata: %v", err) - } else { - obj = o - } - - filename := path.Base(obj.Key) - if path.Base(path.Dir(obj.Key)) == metadataDir { - metadatas = append(metadatas, obj.Key) - continue - } - - basename, compressed := strings.CutSuffix(filename, compressedExtension) - ts, err := strconv.ParseInt(basename[strings.LastIndexByte(basename, '-')+1:], 10, 64) - if err != nil { - ts = obj.LastModified.Unix() - } - - sf := snapshotFile{ - Name: filename, - Location: fmt.Sprintf("s3://%s/%s", s.config.EtcdS3BucketName, obj.Key), - NodeName: "s3", - CreatedAt: &metav1.Time{ - Time: time.Unix(ts, 0), - }, - Size: obj.Size, - S3: &s3Config{ - Endpoint: s.config.EtcdS3Endpoint, - EndpointCA: s.config.EtcdS3EndpointCA, - SkipSSLVerify: s.config.EtcdS3SkipSSLVerify, - Bucket: s.config.EtcdS3BucketName, - Region: s.config.EtcdS3Region, - Folder: s.config.EtcdS3Folder, - Insecure: s.config.EtcdS3Insecure, - }, - Status: successfulSnapshotStatus, - Compressed: compressed, - nodeSource: obj.UserMetadata[nodeNameKey], - tokenHash: obj.UserMetadata[tokenHashKey], - } - sfKey := generateSnapshotConfigMapKey(sf) - snapshots[sfKey] = sf - } - - for _, metadataKey := range metadatas { - filename := path.Base(metadataKey) - sfKey := generateSnapshotConfigMapKey(snapshotFile{Name: filename, NodeName: "s3"}) - if sf, ok := snapshots[sfKey]; ok { - logrus.Debugf("Loading snapshot metadata from s3://%s/%s", s.config.EtcdS3BucketName, metadataKey) - if obj, err := s.client.GetObject(ctx, s.config.EtcdS3BucketName, metadataKey, minio.GetObjectOptions{}); err != nil { - if isNotExist(err) { - logrus.Debugf("Failed to get snapshot metadata: %v", err) - } else { - logrus.Warnf("Failed to get snapshot metadata for %s: %v", filename, err) - } - } else { - if m, err := ioutil.ReadAll(obj); err != nil { - if isNotExist(err) { - logrus.Debugf("Failed to read snapshot metadata: %v", err) - } else { - logrus.Warnf("Failed to read snapshot metadata for %s: %v", filename, err) - } - } else { - sf.Metadata = base64.StdEncoding.EncodeToString(m) - snapshots[sfKey] = sf - } - } - } - } - - return snapshots, nil -} - -func readS3EndpointCA(endpointCA string) ([]byte, error) { - ca, err := base64.StdEncoding.DecodeString(endpointCA) - if err != nil { - return os.ReadFile(endpointCA) - } - return ca, nil -} - -func setTransportCA(tr http.RoundTripper, endpointCA string, insecureSkipVerify bool) (http.RoundTripper, error) { - ca, err := readS3EndpointCA(endpointCA) - if err != nil { - return tr, err - } - if !isValidCertificate(ca) { - return tr, errors.New("endpoint-ca is not a valid x509 certificate") - } - - certPool := x509.NewCertPool() - certPool.AppendCertsFromPEM(ca) - - tr.(*http.Transport).TLSClientConfig = &tls.Config{ - RootCAs: certPool, - InsecureSkipVerify: insecureSkipVerify, - } - - return tr, nil -} - -// isValidCertificate checks to see if the given -// byte slice is a valid x509 certificate. -func isValidCertificate(c []byte) bool { - p, _ := pem.Decode(c) - if p == nil { - return false - } - if _, err := x509.ParseCertificates(p.Bytes); err != nil { - return false - } - return true -} - -func bucketLookupType(endpoint string) minio.BucketLookupType { - if strings.Contains(endpoint, "aliyun") { // backwards compt with RKE1 - return minio.BucketLookupDNS - } - return minio.BucketLookupAuto -} diff --git a/pkg/etcd/s3/config_secret.go b/pkg/etcd/s3/config_secret.go new file mode 100644 index 000000000000..0b81e94b41f8 --- /dev/null +++ b/pkg/etcd/s3/config_secret.go @@ -0,0 +1,119 @@ +package s3 + +import ( + "encoding/base64" + "fmt" + "strconv" + "strings" + "time" + + "github.com/k3s-io/k3s/pkg/daemons/config" + "github.com/k3s-io/k3s/pkg/util" + "github.com/sirupsen/logrus" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +var ErrNoConfigSecret = errNoConfigSecret() + +type secretError struct { + err error +} + +func (e *secretError) Error() string { + return fmt.Sprintf("failed to get etcd S3 config secret: %v", e.err) +} + +func (e *secretError) Is(target error) bool { + switch target { + case ErrNoConfigSecret: + return true + } + return false +} + +func errNoConfigSecret() error { return &secretError{} } + +func (c *Controller) getConfigFromSecret(secretName string) (*config.EtcdS3, error) { + if c.core == nil { + return nil, &secretError{err: util.ErrCoreNotReady} + } + + secret, err := c.core.V1().Secret().Get(metav1.NamespaceSystem, secretName, metav1.GetOptions{}) + if err != nil { + return nil, &secretError{err: err} + } + + etcdS3 := &config.EtcdS3{ + AccessKey: string(secret.Data["etcd-s3-access-key"]), + Bucket: string(secret.Data["etcd-s3-bucket"]), + Endpoint: defaultEtcdS3.Endpoint, + Folder: string(secret.Data["etcd-s3-folder"]), + Proxy: string(secret.Data["etcd-s3-proxy"]), + Region: defaultEtcdS3.Region, + SecretKey: string(secret.Data["etcd-s3-secret-key"]), + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + } + + // Set endpoint from secret if set + if v, ok := secret.Data["etcd-s3-endpoint"]; ok { + etcdS3.Endpoint = string(v) + } + + // Set region from secret if set + if v, ok := secret.Data["etcd-s3-region"]; ok { + etcdS3.Region = string(v) + } + + // Set timeout from secret if set + if v, ok := secret.Data["etcd-s3-timeout"]; ok { + if duration, err := time.ParseDuration(string(v)); err != nil { + logrus.Warnf("Failed to parse etcd-s3-timeout value from S3 config secret %s: %v", secretName, err) + } else { + etcdS3.Timeout.Duration = duration + } + } + + // configure ssl verification, if value can be parsed + if v, ok := secret.Data["etcd-s3-skip-ssl-verify"]; ok { + if b, err := strconv.ParseBool(string(v)); err != nil { + logrus.Warnf("Failed to parse etcd-s3-skip-ssl-verify value from S3 config secret %s: %v", secretName, err) + } else { + etcdS3.SkipSSLVerify = b + } + } + + // configure insecure http, if value can be parsed + if v, ok := secret.Data["etcd-s3-insecure"]; ok { + if b, err := strconv.ParseBool(string(v)); err != nil { + logrus.Warnf("Failed to parse etcd-s3-insecure value from S3 config secret %s: %v", secretName, err) + } else { + etcdS3.Insecure = b + } + } + + // encode CA bundles from value, and keys in configmap if one is named + caBundles := []string{} + // Add inline CA bundle if set + if len(secret.Data["etcd-s3-endpoint-ca"]) > 0 { + caBundles = append(caBundles, base64.StdEncoding.EncodeToString(secret.Data["etcd-s3-endpoint-ca"])) + } + + // Add CA bundles from named configmap if set + if caConfigMapName := string(secret.Data["etcd-s3-endpoint-ca-name"]); caConfigMapName != "" { + configMap, err := c.core.V1().ConfigMap().Get(metav1.NamespaceSystem, caConfigMapName, metav1.GetOptions{}) + if err != nil { + logrus.Warnf("Failed to get ConfigMap %s for etcd-s3-endpoint-ca-name value from S3 config secret %s: %v", caConfigMapName, secretName, err) + } else { + for _, v := range configMap.Data { + caBundles = append(caBundles, base64.StdEncoding.EncodeToString([]byte(v))) + } + for _, v := range configMap.BinaryData { + caBundles = append(caBundles, base64.StdEncoding.EncodeToString(v)) + } + } + } + + // Concatenate all requested CA bundle strings into config var + etcdS3.EndpointCA = strings.Join(caBundles, " ") + return etcdS3, nil +} diff --git a/pkg/etcd/s3/s3.go b/pkg/etcd/s3/s3.go new file mode 100644 index 000000000000..9e88dac42bb4 --- /dev/null +++ b/pkg/etcd/s3/s3.go @@ -0,0 +1,567 @@ +package s3 + +import ( + "context" + "crypto/tls" + "crypto/x509" + "encoding/base64" + "fmt" + "io/ioutil" + "net/http" + "net/textproto" + "net/url" + "os" + "path" + "path/filepath" + "reflect" + "sort" + "strconv" + "strings" + "sync" + "time" + + "github.com/k3s-io/k3s/pkg/daemons/config" + "github.com/k3s-io/k3s/pkg/etcd/snapshot" + "github.com/k3s-io/k3s/pkg/util" + "github.com/k3s-io/k3s/pkg/version" + "github.com/minio/minio-go/v7" + "github.com/minio/minio-go/v7/pkg/credentials" + "github.com/pkg/errors" + "github.com/rancher/wrangler/pkg/generated/controllers/core" + "github.com/sirupsen/logrus" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/utils/lru" +) + +var ( + clusterIDKey = textproto.CanonicalMIMEHeaderKey(version.Program + "-cluster-id") + tokenHashKey = textproto.CanonicalMIMEHeaderKey(version.Program + "-token-hash") + nodeNameKey = textproto.CanonicalMIMEHeaderKey(version.Program + "-node-name") +) + +var defaultEtcdS3 = &config.EtcdS3{ + Endpoint: "s3.amazonaws.com", + Region: "us-east-1", + Timeout: metav1.Duration{ + Duration: 5 * time.Minute, + }, +} + +var ( + controller *Controller + cErr error + once sync.Once +) + +// Controller maintains state for S3 functionality, +// and can be used to get clients for interacting with +// an S3 service, given specific client configuration. +type Controller struct { + clusterID string + tokenHash string + nodeName string + core core.Interface + clientCache *lru.Cache +} + +// Client holds state for a given configuration - a preconfigured minio client, +// and reference to the config it was created for. +type Client struct { + mc *minio.Client + etcdS3 *config.EtcdS3 + controller *Controller +} + +// Start initializes the cache and sets the cluster id and token hash, +// returning a reference to the the initialized controller. Initialization is +// locked by a sync.Once to prevent races, and multiple calls to start will +// return the same controller or error. +func Start(ctx context.Context, config *config.Control) (*Controller, error) { + once.Do(func() { + c := &Controller{ + clientCache: lru.New(5), + nodeName: os.Getenv("NODE_NAME"), + } + + if config.ClusterReset { + logrus.Debug("Skip setting S3 snapshot cluster ID and server token hash during cluster-reset") + controller = c + } else { + logrus.Debug("Getting S3 snapshot cluster ID and server token hash") + if err := wait.PollImmediateUntilWithContext(ctx, time.Second, func(ctx context.Context) (bool, error) { + if config.Runtime.Core == nil { + return false, nil + } + c.core = config.Runtime.Core.Core() + + // cluster id hack: see https://groups.google.com/forum/#!msg/kubernetes-sig-architecture/mVGobfD4TpY/nkdbkX1iBwAJ + ns, err := c.core.V1().Namespace().Get(metav1.NamespaceSystem, metav1.GetOptions{}) + if err != nil { + return false, errors.Wrap(err, "failed to set S3 snapshot cluster ID") + } + c.clusterID = string(ns.UID) + + tokenHash, err := util.GetTokenHash(config) + if err != nil { + return false, errors.Wrap(err, "failed to set S3 snapshot server token hash") + } + c.tokenHash = tokenHash + + return true, nil + }); err != nil { + cErr = err + } else { + controller = c + } + } + }) + + return controller, cErr +} + +func (c *Controller) GetClient(ctx context.Context, etcdS3 *config.EtcdS3) (*Client, error) { + if etcdS3 == nil { + return nil, errors.New("nil s3 configuration") + } + + // update ConfigSecret in defaults so that comparisons between current and default config + // ignore ConfigSecret when deciding if CLI configuration is present. + defaultEtcdS3.ConfigSecret = etcdS3.ConfigSecret + + // If config is default, try to load config from secret, and fail if it cannot be retrieved or if the secret name is not set. + // If config is not default, and secret name is set, warn that the secret is being ignored + isDefault := reflect.DeepEqual(defaultEtcdS3, etcdS3) + if etcdS3.ConfigSecret != "" { + if isDefault { + e, err := c.getConfigFromSecret(etcdS3.ConfigSecret) + if err != nil { + return nil, errors.Wrapf(err, "failed to get config from etcd-s3-config-secret %q", etcdS3.ConfigSecret) + } + logrus.Infof("Using etcd s3 configuration from etcd-s3-config-secret %q", etcdS3.ConfigSecret) + etcdS3 = e + } else { + logrus.Warnf("Ignoring s3 configuration from etcd-s3-config-secret %q due to existing configuration from CLI or config file", etcdS3.ConfigSecret) + } + } else if isDefault { + return nil, errors.New("s3 configuration was not set") + } + + // used just for logging + scheme := "https://" + if etcdS3.Insecure { + scheme = "http://" + } + + // Try to get an existing client from cache. The entire EtcdS3 struct + // (including the key id and secret) is used as the cache key, but we only + // print the endpoint and bucket name to avoid leaking creds into the logs. + if client, ok := c.clientCache.Get(*etcdS3); ok { + logrus.Infof("Reusing cached S3 client for endpoint=%q bucket=%q folder=%q", scheme+etcdS3.Endpoint, etcdS3.Bucket, etcdS3.Folder) + return client.(*Client), nil + } + logrus.Infof("Attempting to create new S3 client for endpoint=%q bucket=%q folder=%q", scheme+etcdS3.Endpoint, etcdS3.Bucket, etcdS3.Folder) + + if etcdS3.Bucket == "" { + return nil, errors.New("s3 bucket name was not set") + } + tr := http.DefaultTransport.(*http.Transport).Clone() + + // You can either disable SSL verification or use a custom CA bundle, + // it doesn't make sense to do both - if verification is disabled, + // the CA is not checked! + if etcdS3.SkipSSLVerify { + tr.TLSClientConfig = &tls.Config{InsecureSkipVerify: true} + } else if etcdS3.EndpointCA != "" { + tlsConfig, err := loadEndpointCAs(etcdS3.EndpointCA) + if err != nil { + return nil, err + } + tr.TLSClientConfig = tlsConfig + } + + // Set a fixed proxy URL, if requested by the user. This replaces the default, + // which calls ProxyFromEnvironment to read proxy settings from the environment. + if etcdS3.Proxy != "" { + var u *url.URL + var err error + // proxy address of literal "none" disables all use of a proxy by S3 + if etcdS3.Proxy != "none" { + u, err = url.Parse(etcdS3.Proxy) + if err != nil { + return nil, errors.Wrap(err, "failed to parse etcd-s3-proxy value as URL") + } + if u.Scheme == "" || u.Host == "" { + return nil, fmt.Errorf("proxy URL must include scheme and host") + } + } + tr.Proxy = http.ProxyURL(u) + } + + var creds *credentials.Credentials + if len(etcdS3.AccessKey) == 0 && len(etcdS3.SecretKey) == 0 { + creds = credentials.NewIAM("") // for running on ec2 instance + if _, err := creds.Get(); err != nil { + return nil, errors.Wrap(err, "failed to get IAM credentials") + } + } else { + creds = credentials.NewStaticV4(etcdS3.AccessKey, etcdS3.SecretKey, "") + } + + opt := minio.Options{ + Creds: creds, + Secure: !etcdS3.Insecure, + Region: etcdS3.Region, + Transport: tr, + BucketLookup: bucketLookupType(etcdS3.Endpoint), + } + mc, err := minio.New(etcdS3.Endpoint, &opt) + if err != nil { + return nil, err + } + + logrus.Infof("Checking if S3 bucket %s exists", etcdS3.Bucket) + + ctx, cancel := context.WithTimeout(ctx, etcdS3.Timeout.Duration) + defer cancel() + + exists, err := mc.BucketExists(ctx, etcdS3.Bucket) + if err != nil { + return nil, errors.Wrapf(err, "failed to test for existence of bucket %s", etcdS3.Bucket) + } + if !exists { + return nil, fmt.Errorf("bucket %s does not exist", etcdS3.Bucket) + } + logrus.Infof("S3 bucket %s exists", etcdS3.Bucket) + + client := &Client{ + mc: mc, + etcdS3: etcdS3, + controller: c, + } + logrus.Infof("Adding S3 client to cache") + c.clientCache.Add(*etcdS3, client) + return client, nil +} + +// upload uploads the given snapshot to the configured S3 +// compatible backend. +func (c *Client) Upload(ctx context.Context, snapshotPath string, extraMetadata *v1.ConfigMap, now time.Time) (*snapshot.File, error) { + basename := filepath.Base(snapshotPath) + metadata := filepath.Join(filepath.Dir(snapshotPath), "..", snapshot.MetadataDir, basename) + snapshotKey := path.Join(c.etcdS3.Folder, basename) + metadataKey := path.Join(c.etcdS3.Folder, snapshot.MetadataDir, basename) + + sf := &snapshot.File{ + Name: basename, + Location: fmt.Sprintf("s3://%s/%s", c.etcdS3.Bucket, snapshotKey), + NodeName: "s3", + CreatedAt: &metav1.Time{ + Time: now, + }, + S3: &snapshot.S3Config{EtcdS3: *c.etcdS3}, + Compressed: strings.HasSuffix(snapshotPath, snapshot.CompressedExtension), + MetadataSource: extraMetadata, + NodeSource: c.controller.nodeName, + } + + logrus.Infof("Uploading snapshot to s3://%s/%s", c.etcdS3.Bucket, snapshotKey) + uploadInfo, err := c.uploadSnapshot(ctx, snapshotKey, snapshotPath) + if err != nil { + sf.Status = snapshot.FailedStatus + sf.Message = base64.StdEncoding.EncodeToString([]byte(err.Error())) + } else { + sf.Status = snapshot.SuccessfulStatus + sf.Size = uploadInfo.Size + sf.TokenHash = c.controller.tokenHash + } + if uploadInfo, err := c.uploadSnapshotMetadata(ctx, metadataKey, metadata); err != nil { + logrus.Warnf("Failed to upload snapshot metadata to S3: %v", err) + } else if uploadInfo.Size != 0 { + logrus.Infof("Uploaded snapshot metadata s3://%s/%s", c.etcdS3.Bucket, metadataKey) + } + return sf, err +} + +// uploadSnapshot uploads the snapshot file to S3 using the minio API. +func (c *Client) uploadSnapshot(ctx context.Context, key, path string) (info minio.UploadInfo, err error) { + opts := minio.PutObjectOptions{ + NumThreads: 2, + UserMetadata: map[string]string{ + clusterIDKey: c.controller.clusterID, + nodeNameKey: c.controller.nodeName, + tokenHashKey: c.controller.tokenHash, + }, + } + if strings.HasSuffix(key, snapshot.CompressedExtension) { + opts.ContentType = "application/zip" + } else { + opts.ContentType = "application/octet-stream" + } + ctx, cancel := context.WithTimeout(ctx, c.etcdS3.Timeout.Duration) + defer cancel() + return c.mc.FPutObject(ctx, c.etcdS3.Bucket, key, path, opts) +} + +// uploadSnapshotMetadata marshals and uploads the snapshot metadata to S3 using the minio API. +// The upload is silently skipped if no extra metadata is provided. +func (c *Client) uploadSnapshotMetadata(ctx context.Context, key, path string) (info minio.UploadInfo, err error) { + if _, err := os.Stat(path); err != nil { + if os.IsNotExist(err) { + return minio.UploadInfo{}, nil + } + return minio.UploadInfo{}, err + } + + opts := minio.PutObjectOptions{ + NumThreads: 2, + ContentType: "application/json", + UserMetadata: map[string]string{ + clusterIDKey: c.controller.clusterID, + nodeNameKey: c.controller.nodeName, + tokenHashKey: c.controller.tokenHash, + }, + } + ctx, cancel := context.WithTimeout(ctx, c.etcdS3.Timeout.Duration) + defer cancel() + return c.mc.FPutObject(ctx, c.etcdS3.Bucket, key, path, opts) +} + +// Download downloads the given snapshot from the configured S3 +// compatible backend. If the file is successfully downloaded, it returns +// the path the file was downloaded to. +func (c *Client) Download(ctx context.Context, snapshotName, snapshotDir string) (string, error) { + snapshotKey := path.Join(c.etcdS3.Folder, snapshotName) + metadataKey := path.Join(c.etcdS3.Folder, snapshot.MetadataDir, snapshotName) + snapshotFile := filepath.Join(snapshotDir, snapshotName) + metadataFile := filepath.Join(snapshotDir, "..", snapshot.MetadataDir, snapshotName) + + if err := c.downloadSnapshot(ctx, snapshotKey, snapshotFile); err != nil { + return "", err + } + if err := c.downloadSnapshotMetadata(ctx, metadataKey, metadataFile); err != nil { + return "", err + } + + return snapshotFile, nil +} + +// downloadSnapshot downloads the snapshot file from S3 using the minio API. +func (c *Client) downloadSnapshot(ctx context.Context, key, file string) error { + logrus.Debugf("Downloading snapshot from s3://%s/%s", c.etcdS3.Bucket, key) + ctx, cancel := context.WithTimeout(ctx, c.etcdS3.Timeout.Duration) + defer cancel() + defer os.Chmod(file, 0600) + return c.mc.FGetObject(ctx, c.etcdS3.Bucket, key, file, minio.GetObjectOptions{}) +} + +// downloadSnapshotMetadata downloads the snapshot metadata file from S3 using the minio API. +// No error is returned if the metadata file does not exist, as it is optional. +func (c *Client) downloadSnapshotMetadata(ctx context.Context, key, file string) error { + logrus.Debugf("Downloading snapshot metadata from s3://%s/%s", c.etcdS3.Bucket, key) + ctx, cancel := context.WithTimeout(ctx, c.etcdS3.Timeout.Duration) + defer cancel() + defer os.Chmod(file, 0600) + err := c.mc.FGetObject(ctx, c.etcdS3.Bucket, key, file, minio.GetObjectOptions{}) + if resp := minio.ToErrorResponse(err); resp.StatusCode == http.StatusNotFound { + return nil + } + return err +} + +// SnapshotRetention prunes snapshots in the configured S3 compatible backend for this specific node. +// Returns a list of pruned snapshot names. +func (c *Client) SnapshotRetention(ctx context.Context, retention int, prefix string) ([]string, error) { + if retention < 1 { + return nil, nil + } + + prefix = path.Join(c.etcdS3.Folder, prefix) + logrus.Infof("Applying snapshot retention=%d to snapshots stored in s3://%s/%s", retention, c.etcdS3.Bucket, prefix) + + var snapshotFiles []minio.ObjectInfo + + toCtx, cancel := context.WithTimeout(ctx, c.etcdS3.Timeout.Duration) + defer cancel() + + opts := minio.ListObjectsOptions{ + Prefix: prefix, + Recursive: true, + } + for info := range c.mc.ListObjects(toCtx, c.etcdS3.Bucket, opts) { + if info.Err != nil { + return nil, info.Err + } + + // skip metadata + if path.Base(path.Dir(info.Key)) == snapshot.MetadataDir { + continue + } + + snapshotFiles = append(snapshotFiles, info) + } + + if len(snapshotFiles) <= retention { + return nil, nil + } + + // sort newest-first so we can prune entries past the retention count + sort.Slice(snapshotFiles, func(i, j int) bool { + return snapshotFiles[j].LastModified.Before(snapshotFiles[i].LastModified) + }) + + deleted := []string{} + for _, df := range snapshotFiles[retention:] { + logrus.Infof("Removing S3 snapshot: s3://%s/%s", c.etcdS3.Bucket, df.Key) + + key := path.Base(df.Key) + if err := c.DeleteSnapshot(ctx, key); err != nil { + return deleted, err + } + deleted = append(deleted, key) + } + + return deleted, nil +} + +// DeleteSnapshot deletes the selected snapshot (and its metadata) from S3 +func (c *Client) DeleteSnapshot(ctx context.Context, key string) error { + ctx, cancel := context.WithTimeout(ctx, c.etcdS3.Timeout.Duration) + defer cancel() + + key = path.Join(c.etcdS3.Folder, key) + err := c.mc.RemoveObject(ctx, c.etcdS3.Bucket, key, minio.RemoveObjectOptions{}) + if err == nil || snapshot.IsNotExist(err) { + metadataKey := path.Join(path.Dir(key), snapshot.MetadataDir, path.Base(key)) + if merr := c.mc.RemoveObject(ctx, c.etcdS3.Bucket, metadataKey, minio.RemoveObjectOptions{}); merr != nil && !snapshot.IsNotExist(merr) { + err = merr + } + } + + return err +} + +// listSnapshots provides a list of currently stored +// snapshots in S3 along with their relevant +// metadata. +func (c *Client) ListSnapshots(ctx context.Context) (map[string]snapshot.File, error) { + snapshots := map[string]snapshot.File{} + metadatas := []string{} + ctx, cancel := context.WithTimeout(ctx, c.etcdS3.Timeout.Duration) + defer cancel() + + opts := minio.ListObjectsOptions{ + Prefix: c.etcdS3.Folder, + Recursive: true, + } + + objects := c.mc.ListObjects(ctx, c.etcdS3.Bucket, opts) + + for obj := range objects { + if obj.Err != nil { + return nil, obj.Err + } + if obj.Size == 0 { + continue + } + + if o, err := c.mc.StatObject(ctx, c.etcdS3.Bucket, obj.Key, minio.StatObjectOptions{}); err != nil { + logrus.Warnf("Failed to get object metadata: %v", err) + } else { + obj = o + } + + filename := path.Base(obj.Key) + if path.Base(path.Dir(obj.Key)) == snapshot.MetadataDir { + metadatas = append(metadatas, obj.Key) + continue + } + + basename, compressed := strings.CutSuffix(filename, snapshot.CompressedExtension) + ts, err := strconv.ParseInt(basename[strings.LastIndexByte(basename, '-')+1:], 10, 64) + if err != nil { + ts = obj.LastModified.Unix() + } + + sf := snapshot.File{ + Name: filename, + Location: fmt.Sprintf("s3://%s/%s", c.etcdS3.Bucket, obj.Key), + NodeName: "s3", + CreatedAt: &metav1.Time{ + Time: time.Unix(ts, 0), + }, + Size: obj.Size, + S3: &snapshot.S3Config{EtcdS3: *c.etcdS3}, + Status: snapshot.SuccessfulStatus, + Compressed: compressed, + NodeSource: obj.UserMetadata[nodeNameKey], + TokenHash: obj.UserMetadata[tokenHashKey], + } + sfKey := sf.GenerateConfigMapKey() + snapshots[sfKey] = sf + } + + for _, metadataKey := range metadatas { + filename := path.Base(metadataKey) + dsf := &snapshot.File{Name: filename, NodeName: "s3"} + sfKey := dsf.GenerateConfigMapKey() + if sf, ok := snapshots[sfKey]; ok { + logrus.Debugf("Loading snapshot metadata from s3://%s/%s", c.etcdS3.Bucket, metadataKey) + if obj, err := c.mc.GetObject(ctx, c.etcdS3.Bucket, metadataKey, minio.GetObjectOptions{}); err != nil { + if snapshot.IsNotExist(err) { + logrus.Debugf("Failed to get snapshot metadata: %v", err) + } else { + logrus.Warnf("Failed to get snapshot metadata for %s: %v", filename, err) + } + } else { + if m, err := ioutil.ReadAll(obj); err != nil { + if snapshot.IsNotExist(err) { + logrus.Debugf("Failed to read snapshot metadata: %v", err) + } else { + logrus.Warnf("Failed to read snapshot metadata for %s: %v", filename, err) + } + } else { + sf.Metadata = base64.StdEncoding.EncodeToString(m) + snapshots[sfKey] = sf + } + } + } + } + + return snapshots, nil +} + +func loadEndpointCAs(etcdS3EndpointCA string) (*tls.Config, error) { + var loaded bool + certPool := x509.NewCertPool() + + for _, ca := range strings.Split(etcdS3EndpointCA, " ") { + // Try to decode the value as base64-encoded data - yes, a base64 string that itself + // contains multiline, ascii-armored, base64-encoded certificate data - as would be produced + // by `base64 --wrap=0 /path/to/cert.pem`. If this fails, assume the value is the path to a + // file on disk, and try to read that. This is backwards compatible with RKE1. + caData, err := base64.StdEncoding.DecodeString(ca) + if err != nil { + caData, err = os.ReadFile(ca) + } + if err != nil { + return nil, err + } + if certPool.AppendCertsFromPEM(caData) { + loaded = true + } + } + + if loaded { + return &tls.Config{RootCAs: certPool}, nil + } + return nil, errors.New("no certificates loaded from etcd-s3-endpoint-ca") +} + +func bucketLookupType(endpoint string) minio.BucketLookupType { + if strings.Contains(endpoint, "aliyun") { // backwards compatible with RKE1 + return minio.BucketLookupDNS + } + return minio.BucketLookupAuto +} diff --git a/pkg/etcd/s3/s3_test.go b/pkg/etcd/s3/s3_test.go new file mode 100644 index 000000000000..1778d21ccc5c --- /dev/null +++ b/pkg/etcd/s3/s3_test.go @@ -0,0 +1,1743 @@ +package s3 + +import ( + "context" + "fmt" + "net" + "net/http" + "os" + "path" + "path/filepath" + "reflect" + "strings" + "testing" + "text/template" + "time" + + "github.com/golang/mock/gomock" + "github.com/gorilla/mux" + "github.com/k3s-io/k3s/pkg/daemons/config" + "github.com/k3s-io/k3s/pkg/etcd/snapshot" + "github.com/rancher/dynamiclistener/cert" + "github.com/rancher/wrangler/pkg/generated/controllers/core" + corev1 "github.com/rancher/wrangler/pkg/generated/controllers/core/v1" + "github.com/rancher/wrangler/pkg/generic/fake" + "github.com/sirupsen/logrus" + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/utils/lru" +) + +var gmt = time.FixedZone("GMT", 0) + +func Test_UnitControllerGetClient(t *testing.T) { + logrus.SetLevel(logrus.DebugLevel) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Dummy server with http and https listeners as a simple S3 mock + server := &http.Server{Handler: s3Router(t)} + + // Create temp cert/key + cert, key, _ := cert.GenerateSelfSignedCertKey("localhost", []net.IP{net.ParseIP("::1"), net.ParseIP("127.0.0.1")}, nil) + tempDir := t.TempDir() + certFile := filepath.Join(tempDir, "test.crt") + keyFile := filepath.Join(tempDir, "test.key") + os.WriteFile(certFile, cert, 0600) + os.WriteFile(keyFile, key, 0600) + + listener, _ := net.Listen("tcp", ":0") + listenerTLS, _ := net.Listen("tcp", ":0") + + _, port, _ := net.SplitHostPort(listener.Addr().String()) + listenerAddr := net.JoinHostPort("localhost", port) + _, port, _ = net.SplitHostPort(listenerTLS.Addr().String()) + listenerTLSAddr := net.JoinHostPort("localhost", port) + + go server.Serve(listener) + go server.ServeTLS(listenerTLS, certFile, keyFile) + go func() { + <-ctx.Done() + server.Close() + }() + + type fields struct { + clusterID string + tokenHash string + nodeName string + clientCache *lru.Cache + } + type args struct { + ctx context.Context + etcdS3 *config.EtcdS3 + } + tests := []struct { + name string + fields fields + args args + setup func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) + want *Client + wantErr bool + }{ + { + name: "Fail to get client with nil config", + args: args{ + ctx: ctx, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + wantErr: true, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Fail to get client when bucket not set", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + wantErr: true, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Fail to get client when bucket does not exist", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "badbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + wantErr: true, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Fail to get client with missing Secret", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + Endpoint: defaultEtcdS3.Endpoint, + Region: defaultEtcdS3.Region, + ConfigSecret: "my-etcd-s3-config-secret", + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + wantErr: true, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + coreMock.v1.secret.EXPECT().Get(metav1.NamespaceSystem, "my-etcd-s3-config-secret", gomock.Any()).AnyTimes().DoAndReturn(func(namespace, name string, _ metav1.GetOptions) (*v1.Secret, error) { + return nil, errorNotFound("secret", name) + }) + return coreMock, nil + }, + }, + { + name: "Create client for config from secret", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + Endpoint: defaultEtcdS3.Endpoint, + Region: defaultEtcdS3.Region, + ConfigSecret: "my-etcd-s3-config-secret", + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + coreMock.v1.secret.EXPECT().Get(metav1.NamespaceSystem, "my-etcd-s3-config-secret", gomock.Any()).AnyTimes().DoAndReturn(func(namespace, name string, _ metav1.GetOptions) (*v1.Secret, error) { + return &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Type: v1.SecretTypeOpaque, + Data: map[string][]byte{ + "etcd-s3-access-key": []byte("test"), + "etcd-s3-bucket": []byte("testbucket"), + "etcd-s3-endpoint": []byte(listenerTLSAddr), + "etcd-s3-region": []byte("us-west-2"), + "etcd-s3-timeout": []byte("1m"), + "etcd-s3-endpoint-ca": cert, + }, + }, nil + }) + return coreMock, nil + }, + }, + { + name: "Create client for config from secret with CA in configmap", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + Endpoint: defaultEtcdS3.Endpoint, + Region: defaultEtcdS3.Region, + ConfigSecret: "my-etcd-s3-config-secret", + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + coreMock.v1.secret.EXPECT().Get(metav1.NamespaceSystem, "my-etcd-s3-config-secret", gomock.Any()).AnyTimes().DoAndReturn(func(namespace, name string, _ metav1.GetOptions) (*v1.Secret, error) { + return &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Type: v1.SecretTypeOpaque, + Data: map[string][]byte{ + "etcd-s3-access-key": []byte("test"), + "etcd-s3-bucket": []byte("testbucket"), + "etcd-s3-endpoint": []byte(listenerTLSAddr), + "etcd-s3-region": []byte("us-west-2"), + "etcd-s3-timeout": []byte("1m"), + "etcd-s3-endpoint-ca-name": []byte("my-etcd-s3-ca"), + "etcd-s3-skip-ssl-verify": []byte("false"), + }, + }, nil + }) + coreMock.v1.configMap.EXPECT().Get(metav1.NamespaceSystem, "my-etcd-s3-ca", gomock.Any()).AnyTimes().DoAndReturn(func(namespace, name string, _ metav1.GetOptions) (*v1.ConfigMap, error) { + return &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Data: map[string]string{ + "dummy-ca": string(cert), + }, + BinaryData: map[string][]byte{ + "dummy-ca-binary": cert, + }, + }, nil + }) + return coreMock, nil + }, + }, + { + name: "Fail to create client for config from secret with CA in missing configmap", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + Endpoint: defaultEtcdS3.Endpoint, + Region: defaultEtcdS3.Region, + ConfigSecret: "my-etcd-s3-config-secret", + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + wantErr: true, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + coreMock.v1.secret.EXPECT().Get(metav1.NamespaceSystem, "my-etcd-s3-config-secret", gomock.Any()).AnyTimes().DoAndReturn(func(namespace, name string, _ metav1.GetOptions) (*v1.Secret, error) { + return &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Type: v1.SecretTypeOpaque, + Data: map[string][]byte{ + "etcd-s3-access-key": []byte("test"), + "etcd-s3-bucket": []byte("testbucket"), + "etcd-s3-endpoint": []byte(listenerTLSAddr), + "etcd-s3-region": []byte("us-west-2"), + "etcd-s3-timeout": []byte("invalid"), + "etcd-s3-endpoint-ca": []byte("invalid"), + "etcd-s3-endpoint-ca-name": []byte("my-etcd-s3-ca"), + "etcd-s3-skip-ssl-verify": []byte("invalid"), + "etcd-s3-insecure": []byte("invalid"), + }, + }, nil + }) + coreMock.v1.configMap.EXPECT().Get(metav1.NamespaceSystem, "my-etcd-s3-ca", gomock.Any()).AnyTimes().DoAndReturn(func(namespace, name string, _ metav1.GetOptions) (*v1.ConfigMap, error) { + return nil, errorNotFound("configmap", name) + }) + return coreMock, nil + }, + }, + { + name: "Create insecure client for config from cli when secret is also set", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Region: "us-west-2", + ConfigSecret: "my-etcd-s3-config-secret", + Endpoint: listenerAddr, + Insecure: true, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Create skip-ssl-verify client for config from cli when secret is also set", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Region: "us-west-2", + ConfigSecret: "my-etcd-s3-config-secret", + Endpoint: listenerTLSAddr, + SkipSSLVerify: true, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Create client for config from cli when secret is not set", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Region: "us-west-2", + Endpoint: listenerAddr, + Insecure: true, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Get cached client for config from secret", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + Endpoint: defaultEtcdS3.Endpoint, + Region: defaultEtcdS3.Region, + ConfigSecret: "my-etcd-s3-config-secret", + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + want: &Client{}, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + c.etcdS3 = &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + } + f.clientCache.Add(*c.etcdS3, c) + coreMock := newCoreMock(gomock.NewController(t)) + coreMock.v1.secret.EXPECT().Get(metav1.NamespaceSystem, "my-etcd-s3-config-secret", gomock.Any()).AnyTimes().DoAndReturn(func(namespace, name string, _ metav1.GetOptions) (*v1.Secret, error) { + return &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Type: v1.SecretTypeOpaque, + Data: map[string][]byte{ + "etcd-s3-access-key": []byte("test"), + "etcd-s3-bucket": []byte("testbucket"), + "etcd-s3-endpoint": []byte(listenerAddr), + "etcd-s3-insecure": []byte("true"), + }, + }, nil + }) + return coreMock, nil + }, + }, + { + name: "Get cached client for config from cli", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Region: "us-west-2", + ConfigSecret: "my-etcd-s3-config-secret", + Endpoint: listenerAddr, + Insecure: true, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + want: &Client{}, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + c.etcdS3 = a.etcdS3 + f.clientCache.Add(*c.etcdS3, c) + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Create client for config from cli with proxy", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Region: "us-west-2", + ConfigSecret: "my-etcd-s3-config-secret", + Endpoint: listenerAddr, + Insecure: true, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + Proxy: "http://" + listenerAddr, + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Fail to create client for config from cli with invalid proxy", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Region: "us-west-2", + ConfigSecret: "my-etcd-s3-config-secret", + Endpoint: listenerAddr, + Insecure: true, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + Proxy: "http://%invalid", + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + wantErr: true, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Fail to create client for config from cli with no proxy scheme", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Region: "us-west-2", + ConfigSecret: "my-etcd-s3-config-secret", + Endpoint: listenerAddr, + Insecure: true, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + Proxy: "/proxy", + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + wantErr: true, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Create client for config from cli with CA path", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Region: "us-west-2", + ConfigSecret: "my-etcd-s3-config-secret", + Endpoint: listenerTLSAddr, + EndpointCA: certFile, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + { + name: "Fail to create client for config from cli with invalid CA path", + args: args{ + ctx: ctx, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Region: "us-west-2", + ConfigSecret: "my-etcd-s3-config-secret", + Endpoint: listenerTLSAddr, + EndpointCA: "/does/not/exist", + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + fields: fields{ + clusterID: "1234", + tokenHash: "abcd", + nodeName: "server01", + clientCache: lru.New(5), + }, + wantErr: true, + setup: func(t *testing.T, a args, f fields, c *Client) (core.Interface, error) { + coreMock := newCoreMock(gomock.NewController(t)) + return coreMock, nil + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + core, err := tt.setup(t, tt.args, tt.fields, tt.want) + if err != nil { + t.Errorf("Setup for Controller.GetClient() failed = %v", err) + return + } + c := &Controller{ + clusterID: tt.fields.clusterID, + tokenHash: tt.fields.tokenHash, + nodeName: tt.fields.nodeName, + clientCache: tt.fields.clientCache, + core: core, + } + got, err := c.GetClient(tt.args.ctx, tt.args.etcdS3) + t.Logf("Got client=%#v err=%v", got, err) + if (err != nil) != tt.wantErr { + t.Errorf("Controller.GetClient() error = %v, wantErr %v", err, tt.wantErr) + return + } + if tt.want != nil && !reflect.DeepEqual(got, tt.want) { + t.Errorf("Controller.GetClient() = %+v\nWant = %+v", got, tt.want) + } + }) + } +} + +func Test_UnitClientUpload(t *testing.T) { + logrus.SetLevel(logrus.DebugLevel) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Dummy server with http listener as a simple S3 mock + server := &http.Server{Handler: s3Router(t)} + + listener, _ := net.Listen("tcp", ":0") + + _, port, _ := net.SplitHostPort(listener.Addr().String()) + listenerAddr := net.JoinHostPort("localhost", port) + + go server.Serve(listener) + go func() { + <-ctx.Done() + server.Close() + }() + + controller, err := Start(ctx, &config.Control{ClusterReset: true}) + if err != nil { + t.Errorf("Start() for Client.Upload() failed = %v", err) + return + } + + tempDir := t.TempDir() + metadataDir := filepath.Join(tempDir, ".metadata") + snapshotDir := filepath.Join(tempDir, "snapshots") + snapshotPath := filepath.Join(snapshotDir, "snapshot-01") + metadataPath := filepath.Join(metadataDir, "snapshot-01") + if err := os.Mkdir(snapshotDir, 0700); err != nil { + t.Errorf("Mkdir() failed = %v", err) + return + } + if err := os.Mkdir(metadataDir, 0700); err != nil { + t.Errorf("Mkdir() failed = %v", err) + return + } + if err := os.WriteFile(snapshotPath, []byte("test snapshot file\n"), 0600); err != nil { + t.Errorf("WriteFile() failed = %v", err) + return + } + if err := os.WriteFile(metadataPath, []byte("test snapshot metadata\n"), 0600); err != nil { + t.Errorf("WriteFile() failed = %v", err) + return + } + + t.Logf("Using snapshot = %s, metadata = %s", snapshotPath, metadataPath) + + type fields struct { + controller *Controller + etcdS3 *config.EtcdS3 + } + type args struct { + ctx context.Context + snapshotPath string + extraMetadata *v1.ConfigMap + now time.Time + } + tests := []struct { + name string + fields fields + args args + want *snapshot.File + wantErr bool + }{ + { + name: "Successful Upload", + fields: fields{ + controller: controller, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + args: args{ + ctx: ctx, + snapshotPath: snapshotPath, + extraMetadata: &v1.ConfigMap{Data: map[string]string{"foo": "bar"}}, + now: time.Now(), + }, + }, + { + name: "Successful Upload with Prefix", + fields: fields{ + controller: controller, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Folder: "testfolder", + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + args: args{ + ctx: ctx, + snapshotPath: snapshotPath, + extraMetadata: &v1.ConfigMap{Data: map[string]string{"foo": "bar"}}, + now: time.Now(), + }, + }, + { + name: "Fails Upload to Nonexistent Bucket", + fields: fields{ + controller: controller, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "badbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + args: args{ + ctx: ctx, + snapshotPath: snapshotPath, + extraMetadata: &v1.ConfigMap{Data: map[string]string{"foo": "bar"}}, + now: time.Now(), + }, + wantErr: true, + }, + { + name: "Fails Upload to Unauthorized Bucket", + fields: fields{ + controller: controller, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "authbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + args: args{ + ctx: ctx, + snapshotPath: snapshotPath, + extraMetadata: &v1.ConfigMap{Data: map[string]string{"foo": "bar"}}, + now: time.Now(), + }, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c, err := tt.fields.controller.GetClient(tt.args.ctx, tt.fields.etcdS3) + if err != nil { + if !tt.wantErr { + t.Errorf("GetClient for Client.Upload() error = %v, wantErr %v", err, tt.wantErr) + } + return + } + got, err := c.Upload(tt.args.ctx, tt.args.snapshotPath, tt.args.extraMetadata, tt.args.now) + t.Logf("Got File=%#v err=%v", got, err) + if (err != nil) != tt.wantErr { + t.Errorf("Client.Upload() error = %v, wantErr %v", err, tt.wantErr) + return + } + if tt.want != nil && !reflect.DeepEqual(got, tt.want) { + t.Errorf("Client.Upload() = %+v\nWant = %+v", got, tt.want) + } + }) + } +} + +func Test_UnitClientDownload(t *testing.T) { + logrus.SetLevel(logrus.DebugLevel) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Dummy server with http listener as a simple S3 mock + server := &http.Server{Handler: s3Router(t)} + + listener, _ := net.Listen("tcp", ":0") + + _, port, _ := net.SplitHostPort(listener.Addr().String()) + listenerAddr := net.JoinHostPort("localhost", port) + + go server.Serve(listener) + go func() { + <-ctx.Done() + server.Close() + }() + + controller, err := Start(ctx, &config.Control{ClusterReset: true}) + if err != nil { + t.Errorf("Start() for Client.Download() failed = %v", err) + return + } + + snapshotName := "snapshot-01" + tempDir := t.TempDir() + metadataDir := filepath.Join(tempDir, ".metadata") + snapshotDir := filepath.Join(tempDir, "snapshots") + if err := os.Mkdir(snapshotDir, 0700); err != nil { + t.Errorf("Mkdir() failed = %v", err) + return + } + if err := os.Mkdir(metadataDir, 0700); err != nil { + t.Errorf("Mkdir() failed = %v", err) + return + } + + type fields struct { + etcdS3 *config.EtcdS3 + controller *Controller + } + type args struct { + ctx context.Context + snapshotName string + snapshotDir string + } + tests := []struct { + name string + fields fields + args args + want string + wantErr bool + }{ + { + name: "Successful Download", + fields: fields{ + controller: controller, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + args: args{ + ctx: ctx, + snapshotName: snapshotName, + snapshotDir: snapshotDir, + }, + }, + { + name: "Unauthorizied Download", + fields: fields{ + controller: controller, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "authbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + args: args{ + ctx: ctx, + snapshotName: snapshotName, + snapshotDir: snapshotDir, + }, + wantErr: true, + }, + { + name: "Nonexistent Bucket", + fields: fields{ + controller: controller, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "badbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + args: args{ + ctx: ctx, + snapshotName: snapshotName, + snapshotDir: snapshotDir, + }, + wantErr: true, + }, + { + name: "Nonexistent Snapshot", + fields: fields{ + controller: controller, + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + }, + args: args{ + ctx: ctx, + snapshotName: "badfile-1", + snapshotDir: snapshotDir, + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c, err := tt.fields.controller.GetClient(tt.args.ctx, tt.fields.etcdS3) + if err != nil { + if !tt.wantErr { + t.Errorf("GetClient for Client.Upload() error = %v, wantErr %v", err, tt.wantErr) + } + return + } + got, err := c.Download(tt.args.ctx, tt.args.snapshotName, tt.args.snapshotDir) + t.Logf("Got snapshotPath=%#v err=%v", got, err) + if (err != nil) != tt.wantErr { + t.Errorf("Client.Download() error = %v, wantErr %v", err, tt.wantErr) + return + } + if tt.want != "" && got != tt.want { + t.Errorf("Client.Download() = %+v\nWant = %+v", got, tt.want) + } + }) + } +} + +func Test_UnitClientListSnapshots(t *testing.T) { + logrus.SetLevel(logrus.DebugLevel) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Dummy server with http listener as a simple S3 mock + server := &http.Server{Handler: s3Router(t)} + + listener, _ := net.Listen("tcp", ":0") + + _, port, _ := net.SplitHostPort(listener.Addr().String()) + listenerAddr := net.JoinHostPort("localhost", port) + + go server.Serve(listener) + go func() { + <-ctx.Done() + server.Close() + }() + + controller, err := Start(ctx, &config.Control{ClusterReset: true}) + if err != nil { + t.Errorf("Start() for Client.Download() failed = %v", err) + return + } + + type fields struct { + etcdS3 *config.EtcdS3 + controller *Controller + } + type args struct { + ctx context.Context + } + tests := []struct { + name string + fields fields + args args + want map[string]snapshot.File + wantErr bool + }{ + { + name: "List Snapshots", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + }, + }, + { + name: "List Snapshots with Prefix", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Folder: "testfolder", + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + }, + }, + { + name: "Fail to List Snapshots from Nonexistent Bucket", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "badbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + }, + wantErr: true, + }, + { + name: "Fail to List Snapshots from Unauthorized Bucket", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "authbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c, err := tt.fields.controller.GetClient(tt.args.ctx, tt.fields.etcdS3) + if err != nil { + if !tt.wantErr { + t.Errorf("GetClient for Client.Upload() error = %v, wantErr %v", err, tt.wantErr) + } + return + } + got, err := c.ListSnapshots(tt.args.ctx) + t.Logf("Got snapshots=%#v err=%v", got, err) + if (err != nil) != tt.wantErr { + t.Errorf("Client.ListSnapshots() error = %v, wantErr %v", err, tt.wantErr) + return + } + if tt.want != nil && !reflect.DeepEqual(got, tt.want) { + t.Errorf("Client.ListSnapshots() = %+v\nWant = %+v", got, tt.want) + } + }) + } +} + +func Test_UnitClientDeleteSnapshot(t *testing.T) { + logrus.SetLevel(logrus.DebugLevel) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Dummy server with http listener as a simple S3 mock + server := &http.Server{Handler: s3Router(t)} + + listener, _ := net.Listen("tcp", ":0") + + _, port, _ := net.SplitHostPort(listener.Addr().String()) + listenerAddr := net.JoinHostPort("localhost", port) + + go server.Serve(listener) + go func() { + <-ctx.Done() + server.Close() + }() + + controller, err := Start(ctx, &config.Control{ClusterReset: true}) + if err != nil { + t.Errorf("Start() for Client.Download() failed = %v", err) + return + } + + type fields struct { + etcdS3 *config.EtcdS3 + controller *Controller + } + type args struct { + ctx context.Context + key string + } + tests := []struct { + name string + fields fields + args args + wantErr bool + }{ + { + name: "Delete Snapshot", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + key: "snapshot-01", + }, + }, + { + name: "Fails to Delete from Nonexistent Bucket", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "badbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + key: "snapshot-01", + }, + wantErr: true, + }, + { + name: "Fails to Delete from Unauthorized Bucket", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "authbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + key: "snapshot-01", + }, + wantErr: true, + }, + { + name: "Fails to Delete Nonexistent Snapshot", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + key: "badfile-1", + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c, err := tt.fields.controller.GetClient(tt.args.ctx, tt.fields.etcdS3) + if err != nil { + if !tt.wantErr { + t.Errorf("GetClient for Client.DeleteSnapshot() error = %v, wantErr %v", err, tt.wantErr) + } + return + } + err = c.DeleteSnapshot(tt.args.ctx, tt.args.key) + t.Logf("DeleteSnapshot got error=%v", err) + if (err != nil) != tt.wantErr { + t.Errorf("Client.DeleteSnapshot() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} + +func Test_UnitClientSnapshotRetention(t *testing.T) { + logrus.SetLevel(logrus.DebugLevel) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Dummy server with http listener as a simple S3 mock + server := &http.Server{Handler: s3Router(t)} + + listener, _ := net.Listen("tcp", ":0") + + _, port, _ := net.SplitHostPort(listener.Addr().String()) + listenerAddr := net.JoinHostPort("localhost", port) + + go server.Serve(listener) + go func() { + <-ctx.Done() + server.Close() + }() + + controller, err := Start(ctx, &config.Control{ClusterReset: true}) + if err != nil { + t.Errorf("Start() for Client.Download() failed = %v", err) + return + } + + type fields struct { + etcdS3 *config.EtcdS3 + controller *Controller + } + type args struct { + ctx context.Context + retention int + prefix string + } + tests := []struct { + name string + fields fields + args args + want []string + wantErr bool + }{ + { + name: "Prune Snapshots - keep all, no folder", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + retention: 10, + prefix: "snapshot-", + }, + }, + { + name: "Prune Snapshots keep 2 of 3, no folder", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + retention: 2, + prefix: "snapshot-", + }, + want: []string{"snapshot-03"}, + }, + { + name: "Prune Snapshots - keep 1 of 3, no folder", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + retention: 1, + prefix: "snapshot-", + }, + want: []string{"snapshot-02", "snapshot-03"}, + }, + { + name: "Prune Snapshots - keep all, with folder", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Folder: "testfolder", + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + retention: 10, + prefix: "snapshot-", + }, + }, + { + name: "Prune Snapshots keep 2 of 3, with folder", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Folder: "testfolder", + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + retention: 2, + prefix: "snapshot-", + }, + want: []string{"snapshot-06"}, + }, + { + name: "Prune Snapshots - keep 1 of 3, with folder", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "testbucket", + Endpoint: listenerAddr, + Folder: "testfolder", + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + retention: 1, + prefix: "snapshot-", + }, + want: []string{"snapshot-05", "snapshot-06"}, + }, + { + name: "Fail to Prune from Unauthorized Bucket", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "authbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + retention: 1, + prefix: "snapshot-", + }, + wantErr: true, + }, + { + name: "Fail to Prune from Nonexistent Bucket", + fields: fields{ + etcdS3: &config.EtcdS3{ + AccessKey: "test", + Bucket: "badbucket", + Endpoint: listenerAddr, + Insecure: true, + Region: defaultEtcdS3.Region, + Timeout: *defaultEtcdS3.Timeout.DeepCopy(), + }, + controller: controller, + }, + args: args{ + ctx: ctx, + retention: 1, + prefix: "snapshot-", + }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c, err := tt.fields.controller.GetClient(tt.args.ctx, tt.fields.etcdS3) + if err != nil { + if !tt.wantErr { + t.Errorf("GetClient for Client.SnapshotRetention() error = %v, wantErr %v", err, tt.wantErr) + } + return + } + got, err := c.SnapshotRetention(tt.args.ctx, tt.args.retention, tt.args.prefix) + t.Logf("Got snapshots=%#v err=%v", got, err) + if (err != nil) != tt.wantErr { + t.Errorf("Client.SnapshotRetention() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("Client.SnapshotRetention() = %+v\nWant = %+v", got, tt.want) + } + }) + } +} + +// +// Mocks so that we can call Runtime.Core.Core().V1() without a functioning apiserver +// + +// explicit interface check for core mock +var _ core.Interface = &coreMock{} + +type coreMock struct { + v1 *v1Mock +} + +func newCoreMock(c *gomock.Controller) *coreMock { + return &coreMock{ + v1: newV1Mock(c), + } +} + +func (m *coreMock) V1() corev1.Interface { + return m.v1 +} + +// explicit interface check for core v1 mock +var _ corev1.Interface = &v1Mock{} + +type v1Mock struct { + configMap *fake.MockControllerInterface[*v1.ConfigMap, *v1.ConfigMapList] + endpoints *fake.MockControllerInterface[*v1.Endpoints, *v1.EndpointsList] + event *fake.MockControllerInterface[*v1.Event, *v1.EventList] + namespace *fake.MockNonNamespacedControllerInterface[*v1.Namespace, *v1.NamespaceList] + node *fake.MockNonNamespacedControllerInterface[*v1.Node, *v1.NodeList] + persistentVolume *fake.MockNonNamespacedControllerInterface[*v1.PersistentVolume, *v1.PersistentVolumeList] + persistentVolumeClaim *fake.MockControllerInterface[*v1.PersistentVolumeClaim, *v1.PersistentVolumeClaimList] + pod *fake.MockControllerInterface[*v1.Pod, *v1.PodList] + secret *fake.MockControllerInterface[*v1.Secret, *v1.SecretList] + service *fake.MockControllerInterface[*v1.Service, *v1.ServiceList] + serviceAccount *fake.MockControllerInterface[*v1.ServiceAccount, *v1.ServiceAccountList] +} + +func newV1Mock(c *gomock.Controller) *v1Mock { + return &v1Mock{ + configMap: fake.NewMockControllerInterface[*v1.ConfigMap, *v1.ConfigMapList](c), + endpoints: fake.NewMockControllerInterface[*v1.Endpoints, *v1.EndpointsList](c), + event: fake.NewMockControllerInterface[*v1.Event, *v1.EventList](c), + namespace: fake.NewMockNonNamespacedControllerInterface[*v1.Namespace, *v1.NamespaceList](c), + node: fake.NewMockNonNamespacedControllerInterface[*v1.Node, *v1.NodeList](c), + persistentVolume: fake.NewMockNonNamespacedControllerInterface[*v1.PersistentVolume, *v1.PersistentVolumeList](c), + persistentVolumeClaim: fake.NewMockControllerInterface[*v1.PersistentVolumeClaim, *v1.PersistentVolumeClaimList](c), + pod: fake.NewMockControllerInterface[*v1.Pod, *v1.PodList](c), + secret: fake.NewMockControllerInterface[*v1.Secret, *v1.SecretList](c), + service: fake.NewMockControllerInterface[*v1.Service, *v1.ServiceList](c), + serviceAccount: fake.NewMockControllerInterface[*v1.ServiceAccount, *v1.ServiceAccountList](c), + } +} + +func (m *v1Mock) ConfigMap() corev1.ConfigMapController { + return m.configMap +} + +func (m *v1Mock) Endpoints() corev1.EndpointsController { + return m.endpoints +} + +func (m *v1Mock) Event() corev1.EventController { + return m.event +} + +func (m *v1Mock) Namespace() corev1.NamespaceController { + return m.namespace +} + +func (m *v1Mock) Node() corev1.NodeController { + return m.node +} + +func (m *v1Mock) PersistentVolume() corev1.PersistentVolumeController { + return m.persistentVolume +} + +func (m *v1Mock) PersistentVolumeClaim() corev1.PersistentVolumeClaimController { + return m.persistentVolumeClaim +} + +func (m *v1Mock) Pod() corev1.PodController { + return m.pod +} + +func (m *v1Mock) Secret() corev1.SecretController { + return m.secret +} + +func (m *v1Mock) Service() corev1.ServiceController { + return m.service +} + +func (m *v1Mock) ServiceAccount() corev1.ServiceAccountController { + return m.serviceAccount +} + +func errorNotFound(gv, name string) error { + return apierrors.NewNotFound(schema.ParseGroupResource(gv), name) +} + +// +// ListObjects response body template +// + +var listObjectsV2ResponseTemplate = ` +{{- /* */ -}} +{{ with $b := . -}} + + {{$b.Name}} + {{ if $b.Prefix }}{{$b.Prefix}}{{ else }}{{ end }} + {{ len $b.Objects }} + 1000 + + false + {{- range $o := $b.Objects }} + + {{ $o.Key }} + {{ $o.LastModified }} + {{ printf "%q" $o.ETag }} + {{ $o.Size }} + + 0 + test + + STANDARD + + {{- end }} + url + +{{- end }} +` + +func s3Router(t *testing.T) http.Handler { + var listResponse = template.Must(template.New("listObjectsV2").Parse(listObjectsV2ResponseTemplate)) + + type object struct { + Key string + LastModified string + ETag string + Size int + } + + type bucket struct { + Name string + Prefix string + Objects []object + } + + snapshotId := 0 + objects := []object{} + timestamp := time.Now().Format(time.RFC3339) + for _, prefix := range []string{"", "testfolder", "testfolder/netsted", "otherfolder"} { + for idx := range []int{0, 1, 2} { + snapshotId++ + objects = append(objects, object{ + Key: path.Join(prefix, fmt.Sprintf("snapshot-%02d", snapshotId)), + LastModified: timestamp, + ETag: "0000", + Size: 100, + }) + if idx != 0 { + objects = append(objects, object{ + Key: path.Join(prefix, fmt.Sprintf(".metadata/snapshot-%02d", snapshotId)), + LastModified: timestamp, + ETag: "0000", + Size: 10, + }) + } + } + } + + // badbucket returns 404 for all requests + // authbucket returns 200 for HeadBucket, 403 for all others + // others return 200 for objects with name prefix snapshot, 404 for all others + router := mux.NewRouter().SkipClean(true) + // HeadBucket + router.Path("/{bucket}/").Methods(http.MethodHead).HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + switch vars["bucket"] { + case "badbucket": + rw.WriteHeader(http.StatusNotFound) + } + }) + // ListObjectsV2 + router.Path("/{bucket}/").Methods(http.MethodGet).HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + switch vars["bucket"] { + case "badbucket": + rw.WriteHeader(http.StatusNotFound) + case "authbucket": + rw.WriteHeader(http.StatusForbidden) + default: + prefix := r.URL.Query().Get("prefix") + filtered := []object{} + for _, object := range objects { + if strings.HasPrefix(object.Key, prefix) { + filtered = append(filtered, object) + } + } + if err := listResponse.Execute(rw, bucket{Name: vars["bucket"], Prefix: prefix, Objects: filtered}); err != nil { + t.Errorf("Failed to generate ListObjectsV2 response, error = %v", err) + rw.WriteHeader(http.StatusInternalServerError) + } + } + }) + // HeadObject - snapshot + router.Path("/{bucket}/{prefix:.*}snapshot-{snapshot}").Methods(http.MethodHead).HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + switch vars["bucket"] { + case "badbucket": + rw.WriteHeader(http.StatusNotFound) + case "authbucket": + rw.WriteHeader(http.StatusForbidden) + default: + rw.Header().Add("last-modified", time.Now().In(gmt).Format(time.RFC1123)) + } + }) + // GetObject - snapshot + router.Path("/{bucket}/{prefix:.*}snapshot-{snapshot}").Methods(http.MethodGet).HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + switch vars["bucket"] { + case "badbucket": + rw.WriteHeader(http.StatusNotFound) + case "authbucket": + rw.WriteHeader(http.StatusForbidden) + default: + rw.Header().Add("last-modified", time.Now().In(gmt).Format(time.RFC1123)) + rw.Write([]byte("test snapshot file\n")) + } + }) + // PutObject/DeleteObject - snapshot + router.Path("/{bucket}/{prefix:.*}snapshot-{snapshot}").Methods(http.MethodPut, http.MethodDelete).HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + switch vars["bucket"] { + case "badbucket": + rw.WriteHeader(http.StatusNotFound) + case "authbucket": + rw.WriteHeader(http.StatusForbidden) + default: + if r.Method == http.MethodDelete { + rw.WriteHeader(http.StatusNoContent) + } + } + }) + // HeadObject - snapshot metadata + router.Path("/{bucket}/{prefix:.*}.metadata/snapshot-{snapshot}").Methods(http.MethodHead).HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + switch vars["bucket"] { + case "badbucket": + rw.WriteHeader(http.StatusNotFound) + case "authbucket": + rw.WriteHeader(http.StatusForbidden) + default: + rw.Header().Add("last-modified", time.Now().In(gmt).Format(time.RFC1123)) + } + }) + // GetObject - snapshot metadata + router.Path("/{bucket}/{prefix:.*}.metadata/snapshot-{snapshot}").Methods(http.MethodGet).HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + switch vars["bucket"] { + case "badbucket": + rw.WriteHeader(http.StatusNotFound) + case "authbucket": + rw.WriteHeader(http.StatusForbidden) + default: + rw.Header().Add("last-modified", time.Now().In(gmt).Format(time.RFC1123)) + rw.Write([]byte("test snapshot metadata\n")) + } + }) + // PutObject/DeleteObject - snapshot metadata + router.Path("/{bucket}/{prefix:.*}.metadata/snapshot-{snapshot}").Methods(http.MethodPut, http.MethodDelete).HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + switch vars["bucket"] { + case "badbucket": + rw.WriteHeader(http.StatusNotFound) + case "authbucket": + rw.WriteHeader(http.StatusForbidden) + default: + if r.Method == http.MethodDelete { + rw.WriteHeader(http.StatusNoContent) + } + } + }) + return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + scheme := "http" + if r.TLS != nil { + scheme = "https" + } + logrus.Infof("%s %s://%s %s", r.Method, scheme, r.Host, r.URL) + router.ServeHTTP(rw, r) + }) +} diff --git a/pkg/etcd/snapshot.go b/pkg/etcd/snapshot.go index 8669b8443ffa..3fccfe37e868 100644 --- a/pkg/etcd/snapshot.go +++ b/pkg/etcd/snapshot.go @@ -3,14 +3,11 @@ package etcd import ( "archive/zip" "context" - "crypto/sha256" "encoding/base64" - "encoding/hex" "encoding/json" "fmt" "io" "math/rand" - "net/http" "os" "path/filepath" "runtime" @@ -22,38 +19,31 @@ import ( k3s "github.com/k3s-io/k3s/pkg/apis/k3s.cattle.io/v1" "github.com/k3s-io/k3s/pkg/cluster/managed" "github.com/k3s-io/k3s/pkg/daemons/config" + "github.com/k3s-io/k3s/pkg/etcd/s3" + "github.com/k3s-io/k3s/pkg/etcd/snapshot" "github.com/k3s-io/k3s/pkg/util" "github.com/k3s-io/k3s/pkg/version" - "github.com/minio/minio-go/v7" "github.com/pkg/errors" "github.com/robfig/cron/v3" "github.com/sirupsen/logrus" - "go.etcd.io/etcd/etcdutl/v3/snapshot" + snapshotv3 "go.etcd.io/etcd/etcdutl/v3/snapshot" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/validation" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/util/retry" - "k8s.io/utils/ptr" ) const ( - compressedExtension = ".zip" - metadataDir = ".metadata" - errorTTL = 24 * time.Hour + errorTTL = 24 * time.Hour ) var ( - snapshotExtraMetadataConfigMapName = version.Program + "-etcd-snapshot-extra-metadata" - labelStorageNode = "etcd." + version.Program + ".cattle.io/snapshot-storage-node" - annotationLocalReconciled = "etcd." + version.Program + ".cattle.io/local-snapshots-timestamp" - annotationS3Reconciled = "etcd." + version.Program + ".cattle.io/s3-snapshots-timestamp" - annotationTokenHash = "etcd." + version.Program + ".cattle.io/snapshot-token-hash" + annotationLocalReconciled = "etcd." + version.Program + ".cattle.io/local-snapshots-timestamp" + annotationS3Reconciled = "etcd." + version.Program + ".cattle.io/s3-snapshots-timestamp" // snapshotDataBackoff will retry at increasing steps for up to ~30 seconds. // If the ConfigMap update fails, the list won't be reconciled again until next time @@ -109,7 +99,7 @@ func snapshotDir(config *config.Control, create bool) (string, error) { func (e *ETCD) compressSnapshot(snapshotDir, snapshotName, snapshotPath string, now time.Time) (string, error) { logrus.Info("Compressing etcd snapshot file: " + snapshotName) - zippedSnapshotName := snapshotName + compressedExtension + zippedSnapshotName := snapshotName + snapshot.CompressedExtension zipPath := filepath.Join(snapshotDir, zippedSnapshotName) zf, err := os.Create(zipPath) @@ -168,7 +158,7 @@ func (e *ETCD) decompressSnapshot(snapshotDir, snapshotFile string) (string, err var decompressed *os.File for _, sf := range r.File { - decompressed, err = os.OpenFile(strings.Replace(sf.Name, compressedExtension, "", -1), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, sf.Mode()) + decompressed, err = os.OpenFile(strings.Replace(sf.Name, snapshot.CompressedExtension, "", -1), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, sf.Mode()) if err != nil { return "", err } @@ -203,13 +193,13 @@ func (e *ETCD) Snapshot(ctx context.Context) (*managed.SnapshotResult, error) { // make sure the core.Factory is initialized before attempting to add snapshot metadata var extraMetadata *v1.ConfigMap if e.config.Runtime.Core == nil { - logrus.Debugf("Cannot retrieve extra metadata from %s ConfigMap: runtime core not ready", snapshotExtraMetadataConfigMapName) + logrus.Debugf("Cannot retrieve extra metadata from %s ConfigMap: runtime core not ready", snapshot.ExtraMetadataConfigMapName) } else { - logrus.Debugf("Attempting to retrieve extra metadata from %s ConfigMap", snapshotExtraMetadataConfigMapName) - if snapshotExtraMetadataConfigMap, err := e.config.Runtime.Core.Core().V1().ConfigMap().Get(metav1.NamespaceSystem, snapshotExtraMetadataConfigMapName, metav1.GetOptions{}); err != nil { - logrus.Debugf("Error encountered attempting to retrieve extra metadata from %s ConfigMap, error: %v", snapshotExtraMetadataConfigMapName, err) + logrus.Debugf("Attempting to retrieve extra metadata from %s ConfigMap", snapshot.ExtraMetadataConfigMapName) + if snapshotExtraMetadataConfigMap, err := e.config.Runtime.Core.Core().V1().ConfigMap().Get(metav1.NamespaceSystem, snapshot.ExtraMetadataConfigMapName, metav1.GetOptions{}); err != nil { + logrus.Debugf("Error encountered attempting to retrieve extra metadata from %s ConfigMap, error: %v", snapshot.ExtraMetadataConfigMapName, err) } else { - logrus.Debugf("Setting extra metadata from %s ConfigMap", snapshotExtraMetadataConfigMapName) + logrus.Debugf("Setting extra metadata from %s ConfigMap", snapshot.ExtraMetadataConfigMapName) extraMetadata = snapshotExtraMetadataConfigMap } } @@ -246,20 +236,20 @@ func (e *ETCD) Snapshot(ctx context.Context) (*managed.SnapshotResult, error) { snapshotPath := filepath.Join(snapshotDir, snapshotName) logrus.Infof("Saving etcd snapshot to %s", snapshotPath) - var sf *snapshotFile + var sf *snapshot.File - if err := snapshot.NewV3(e.client.GetLogger()).Save(ctx, *cfg, snapshotPath); err != nil { - sf = &snapshotFile{ + if err := snapshotv3.NewV3(e.client.GetLogger()).Save(ctx, *cfg, snapshotPath); err != nil { + sf = &snapshot.File{ Name: snapshotName, Location: "", NodeName: nodeName, CreatedAt: &metav1.Time{ Time: now, }, - Status: failedSnapshotStatus, + Status: snapshot.FailedStatus, Message: base64.StdEncoding.EncodeToString([]byte(err.Error())), Size: 0, - metadataSource: extraMetadata, + MetadataSource: extraMetadata, } logrus.Errorf("Failed to take etcd snapshot: %v", err) if err := e.addSnapshotData(*sf); err != nil { @@ -290,18 +280,18 @@ func (e *ETCD) Snapshot(ctx context.Context) (*managed.SnapshotResult, error) { return nil, errors.Wrap(err, "unable to retrieve snapshot information from local snapshot") } - sf = &snapshotFile{ + sf = &snapshot.File{ Name: f.Name(), Location: "file://" + snapshotPath, NodeName: nodeName, CreatedAt: &metav1.Time{ Time: now, }, - Status: successfulSnapshotStatus, + Status: snapshot.SuccessfulStatus, Size: f.Size(), Compressed: e.config.EtcdSnapshotCompress, - metadataSource: extraMetadata, - tokenHash: tokenHash, + MetadataSource: extraMetadata, + TokenHash: tokenHash, } res.Created = append(res.Created, sf.Name) @@ -323,34 +313,29 @@ func (e *ETCD) Snapshot(ctx context.Context) (*managed.SnapshotResult, error) { } res.Deleted = append(res.Deleted, deleted...) - if e.config.EtcdS3 { - if err := e.initS3IfNil(ctx); err != nil { + if e.config.EtcdS3 != nil { + if s3client, err := e.getS3Client(ctx); err != nil { logrus.Warnf("Unable to initialize S3 client: %v", err) - sf = &snapshotFile{ - Name: f.Name(), - NodeName: "s3", - CreatedAt: &metav1.Time{ - Time: now, - }, - Message: base64.StdEncoding.EncodeToString([]byte(err.Error())), - Size: 0, - Status: failedSnapshotStatus, - S3: &s3Config{ - Endpoint: e.config.EtcdS3Endpoint, - EndpointCA: e.config.EtcdS3EndpointCA, - SkipSSLVerify: e.config.EtcdS3SkipSSLVerify, - Bucket: e.config.EtcdS3BucketName, - Region: e.config.EtcdS3Region, - Folder: e.config.EtcdS3Folder, - Insecure: e.config.EtcdS3Insecure, - }, - metadataSource: extraMetadata, + if !errors.Is(err, s3.ErrNoConfigSecret) { + err = errors.Wrap(err, "failed to initialize S3 client") + sf = &snapshot.File{ + Name: f.Name(), + NodeName: "s3", + CreatedAt: &metav1.Time{ + Time: now, + }, + Message: base64.StdEncoding.EncodeToString([]byte(err.Error())), + Size: 0, + Status: snapshot.FailedStatus, + S3: &snapshot.S3Config{EtcdS3: *e.config.EtcdS3}, + MetadataSource: extraMetadata, + } } } else { logrus.Infof("Saving etcd snapshot %s to S3", snapshotName) - // upload will return a snapshotFile even on error - if there was an + // upload will return a snapshot.File even on error - if there was an // error, it will be reflected in the status and message. - sf, err = e.s3.upload(ctx, snapshotPath, extraMetadata, now) + sf, err = s3client.Upload(ctx, snapshotPath, extraMetadata, now) if err != nil { logrus.Errorf("Error received during snapshot upload to S3: %s", err) } else { @@ -360,7 +345,7 @@ func (e *ETCD) Snapshot(ctx context.Context) (*managed.SnapshotResult, error) { // Attempt to apply retention even if the upload failed; failure may be due to bucket // being full or some other condition that retention policy would resolve. // Snapshot retention may prune some files before returning an error. Failing to prune is not fatal. - deleted, err := e.s3.snapshotRetention(ctx) + deleted, err := s3client.SnapshotRetention(ctx, e.config.EtcdSnapshotRetention, e.config.EtcdSnapshotName) res.Deleted = append(res.Deleted, deleted...) if err != nil { logrus.Warnf("Failed to apply s3 snapshot retention policy: %v", err) @@ -378,52 +363,12 @@ func (e *ETCD) Snapshot(ctx context.Context) (*managed.SnapshotResult, error) { return res, e.ReconcileSnapshotData(ctx) } -type s3Config struct { - Endpoint string `json:"endpoint,omitempty"` - EndpointCA string `json:"endpointCA,omitempty"` - SkipSSLVerify bool `json:"skipSSLVerify,omitempty"` - Bucket string `json:"bucket,omitempty"` - Region string `json:"region,omitempty"` - Folder string `json:"folder,omitempty"` - Insecure bool `json:"insecure,omitempty"` -} - -type snapshotStatus string - -const ( - successfulSnapshotStatus snapshotStatus = "successful" - failedSnapshotStatus snapshotStatus = "failed" -) - -// snapshotFile represents a single snapshot and it's -// metadata. -type snapshotFile struct { - Name string `json:"name"` - // Location contains the full path of the snapshot. For - // local paths, the location will be prefixed with "file://". - Location string `json:"location,omitempty"` - Metadata string `json:"metadata,omitempty"` - Message string `json:"message,omitempty"` - NodeName string `json:"nodeName,omitempty"` - CreatedAt *metav1.Time `json:"createdAt,omitempty"` - Size int64 `json:"size,omitempty"` - Status snapshotStatus `json:"status,omitempty"` - S3 *s3Config `json:"s3Config,omitempty"` - Compressed bool `json:"compressed"` - - // these fields are used for the internal representation of the snapshot - // to populate other fields before serialization to the legacy configmap. - metadataSource *v1.ConfigMap `json:"-"` - nodeSource string `json:"-"` - tokenHash string `json:"-"` -} - // listLocalSnapshots provides a list of the currently stored // snapshots on disk along with their relevant // metadata. -func (e *ETCD) listLocalSnapshots() (map[string]snapshotFile, error) { +func (e *ETCD) listLocalSnapshots() (map[string]snapshot.File, error) { nodeName := os.Getenv("NODE_NAME") - snapshots := make(map[string]snapshotFile) + snapshots := make(map[string]snapshot.File) snapshotDir, err := snapshotDir(e.config, true) if err != nil { return snapshots, errors.Wrap(err, "failed to get etcd-snapshot-dir") @@ -434,7 +379,7 @@ func (e *ETCD) listLocalSnapshots() (map[string]snapshotFile, error) { return err } - basename, compressed := strings.CutSuffix(file.Name(), compressedExtension) + basename, compressed := strings.CutSuffix(file.Name(), snapshot.CompressedExtension) ts, err := strconv.ParseInt(basename[strings.LastIndexByte(basename, '-')+1:], 10, 64) if err != nil { ts = file.ModTime().Unix() @@ -443,13 +388,13 @@ func (e *ETCD) listLocalSnapshots() (map[string]snapshotFile, error) { // try to read metadata from disk; don't warn if it is missing as it will not exist // for snapshot files from old releases or if there was no metadata provided. var metadata string - metadataFile := filepath.Join(filepath.Dir(path), "..", metadataDir, file.Name()) + metadataFile := filepath.Join(filepath.Dir(path), "..", snapshot.MetadataDir, file.Name()) if m, err := os.ReadFile(metadataFile); err == nil { logrus.Debugf("Loading snapshot metadata from %s", metadataFile) metadata = base64.StdEncoding.EncodeToString(m) } - sf := snapshotFile{ + sf := snapshot.File{ Name: file.Name(), Location: "file://" + filepath.Join(snapshotDir, file.Name()), NodeName: nodeName, @@ -458,10 +403,10 @@ func (e *ETCD) listLocalSnapshots() (map[string]snapshotFile, error) { Time: time.Unix(ts, 0), }, Size: file.Size(), - Status: successfulSnapshotStatus, + Status: snapshot.SuccessfulStatus, Compressed: compressed, } - sfKey := generateSnapshotConfigMapKey(sf) + sfKey := sf.GenerateConfigMapKey() snapshots[sfKey] = sf return nil }); err != nil { @@ -471,18 +416,21 @@ func (e *ETCD) listLocalSnapshots() (map[string]snapshotFile, error) { return snapshots, nil } -// initS3IfNil initializes the S3 client -// if it hasn't yet been initialized. -func (e *ETCD) initS3IfNil(ctx context.Context) error { - if e.config.EtcdS3 && e.s3 == nil { - s3, err := NewS3(ctx, e.config) +// getS3Client initializes the S3 controller if it hasn't yet been initialized. +// If S3 is or can be initialized successfully, and valid S3 configuration is +// present, a client for the current S3 configuration is returned. +// The context passed here is only used to validate the configuration, +// it does not need to continue to remain uncancelled after the call returns. +func (e *ETCD) getS3Client(ctx context.Context) (*s3.Client, error) { + if e.s3 == nil { + s3, err := s3.Start(ctx, e.config) if err != nil { - return err + return nil, err } e.s3 = s3 } - return nil + return e.s3.GetClient(ctx, e.config.EtcdS3) } // PruneSnapshots deleted old snapshots in excess of the configured retention count. @@ -502,11 +450,11 @@ func (e *ETCD) PruneSnapshots(ctx context.Context) (*managed.SnapshotResult, err logrus.Errorf("Error applying snapshot retention policy: %v", err) } - if e.config.EtcdS3 { - if err := e.initS3IfNil(ctx); err != nil { + if e.config.EtcdS3 != nil { + if s3client, err := e.getS3Client(ctx); err != nil { logrus.Warnf("Unable to initialize S3 client: %v", err) } else { - deleted, err := e.s3.snapshotRetention(ctx) + deleted, err := s3client.SnapshotRetention(ctx, e.config.EtcdSnapshotRetention, e.config.EtcdSnapshotName) if err != nil { logrus.Errorf("Error applying S3 snapshot retention policy: %v", err) } @@ -524,19 +472,23 @@ func (e *ETCD) ListSnapshots(ctx context.Context) (*k3s.ETCDSnapshotFileList, er snapshotFiles := &k3s.ETCDSnapshotFileList{ TypeMeta: metav1.TypeMeta{APIVersion: "v1", Kind: "List"}, } - if e.config.EtcdS3 { - if err := e.initS3IfNil(ctx); err != nil { + + if e.config.EtcdS3 != nil { + if s3client, err := e.getS3Client(ctx); err != nil { logrus.Warnf("Unable to initialize S3 client: %v", err) - return nil, err - } - sfs, err := e.s3.listSnapshots(ctx) - if err != nil { - return nil, err - } - for k, sf := range sfs { - esf := k3s.NewETCDSnapshotFile("", k, k3s.ETCDSnapshotFile{}) - sf.toETCDSnapshotFile(esf) - snapshotFiles.Items = append(snapshotFiles.Items, *esf) + if !errors.Is(err, s3.ErrNoConfigSecret) { + return nil, errors.Wrap(err, "failed to initialize S3 client") + } + } else { + sfs, err := s3client.ListSnapshots(ctx) + if err != nil { + return nil, err + } + for k, sf := range sfs { + esf := k3s.NewETCDSnapshotFile("", k, k3s.ETCDSnapshotFile{}) + sf.ToETCDSnapshotFile(esf) + snapshotFiles.Items = append(snapshotFiles.Items, *esf) + } } } @@ -546,7 +498,7 @@ func (e *ETCD) ListSnapshots(ctx context.Context) (*k3s.ETCDSnapshotFileList, er } for k, sf := range sfs { esf := k3s.NewETCDSnapshotFile("", k, k3s.ETCDSnapshotFile{}) - sf.toETCDSnapshotFile(esf) + sf.ToETCDSnapshotFile(esf) snapshotFiles.Items = append(snapshotFiles.Items, *esf) } @@ -561,17 +513,22 @@ func (e *ETCD) DeleteSnapshots(ctx context.Context, snapshots []string) (*manage if err != nil { return nil, errors.Wrap(err, "failed to get etcd-snapshot-dir") } - if e.config.EtcdS3 { - if err := e.initS3IfNil(ctx); err != nil { + + var s3client *s3.Client + if e.config.EtcdS3 != nil { + s3client, err = e.getS3Client(ctx) + if err != nil { logrus.Warnf("Unable to initialize S3 client: %v", err) - return nil, err + if !errors.Is(err, s3.ErrNoConfigSecret) { + return nil, errors.Wrap(err, "failed to initialize S3 client") + } } } res := &managed.SnapshotResult{} for _, s := range snapshots { if err := e.deleteSnapshot(filepath.Join(snapshotDir, s)); err != nil { - if isNotExist(err) { + if snapshot.IsNotExist(err) { logrus.Infof("Snapshot %s not found locally", s) } else { logrus.Errorf("Failed to delete local snapshot %s: %v", s, err) @@ -581,9 +538,9 @@ func (e *ETCD) DeleteSnapshots(ctx context.Context, snapshots []string) (*manage logrus.Infof("Snapshot %s deleted locally", s) } - if e.config.EtcdS3 { - if err := e.s3.deleteSnapshot(ctx, s); err != nil { - if isNotExist(err) { + if s3client != nil { + if err := s3client.DeleteSnapshot(ctx, s); err != nil { + if snapshot.IsNotExist(err) { logrus.Infof("Snapshot %s not found in S3", s) } else { logrus.Errorf("Failed to delete S3 snapshot %s: %v", s, err) @@ -599,13 +556,13 @@ func (e *ETCD) DeleteSnapshots(ctx context.Context, snapshots []string) (*manage } func (e *ETCD) deleteSnapshot(snapshotPath string) error { - dir := filepath.Join(filepath.Dir(snapshotPath), "..", metadataDir) + dir := filepath.Join(filepath.Dir(snapshotPath), "..", snapshot.MetadataDir) filename := filepath.Base(snapshotPath) metadataPath := filepath.Join(dir, filename) err := os.Remove(snapshotPath) if err == nil || os.IsNotExist(err) { - if merr := os.Remove(metadataPath); err != nil && !isNotExist(err) { + if merr := os.Remove(metadataPath); err != nil && !snapshot.IsNotExist(err) { err = merr } } @@ -613,27 +570,16 @@ func (e *ETCD) deleteSnapshot(snapshotPath string) error { return err } -func marshalSnapshotFile(sf snapshotFile) ([]byte, error) { - if sf.metadataSource != nil { - if m, err := json.Marshal(sf.metadataSource.Data); err != nil { - logrus.Debugf("Error attempting to marshal extra metadata contained in %s ConfigMap, error: %v", snapshotExtraMetadataConfigMapName, err) - } else { - sf.Metadata = base64.StdEncoding.EncodeToString(m) - } - } - return json.Marshal(sf) -} - // addSnapshotData syncs an internal snapshotFile representation to an ETCDSnapshotFile resource // of the same name. Resources will be created or updated as necessary. -func (e *ETCD) addSnapshotData(sf snapshotFile) error { +func (e *ETCD) addSnapshotData(sf snapshot.File) error { // make sure the K3s factory is initialized. for e.config.Runtime.K3s == nil { runtime.Gosched() } snapshots := e.config.Runtime.K3s.K3s().V1().ETCDSnapshotFile() - esfName := generateSnapshotName(sf) + esfName := sf.GenerateName() var esf *k3s.ETCDSnapshotFile return retry.OnError(snapshotDataBackoff, func(err error) bool { @@ -654,7 +600,7 @@ func (e *ETCD) addSnapshotData(sf snapshotFile) error { // mutate object existing := esf.DeepCopyObject() - sf.toETCDSnapshotFile(esf) + sf.ToETCDSnapshotFile(esf) // create or update as necessary if esf.CreationTimestamp.IsZero() { @@ -671,48 +617,10 @@ func (e *ETCD) addSnapshotData(sf snapshotFile) error { }) } -// generateSnapshotConfigMapKey generates a derived name for the snapshot that is safe for use -// as a configmap key. -func generateSnapshotConfigMapKey(sf snapshotFile) string { - name := invalidKeyChars.ReplaceAllString(sf.Name, "_") - if sf.NodeName == "s3" { - return "s3-" + name - } - return "local-" + name -} - -// generateSnapshotName generates a derived name for the snapshot that is safe for use -// as a resource name. -func generateSnapshotName(sf snapshotFile) string { - name := strings.ToLower(sf.Name) - nodename := sf.nodeSource - if nodename == "" { - nodename = sf.NodeName - } - // Include a digest of the hostname and location to ensure unique resource - // names. Snapshots should already include the hostname, but this ensures we - // don't accidentally hide records if a snapshot with the same name somehow - // exists on multiple nodes. - digest := sha256.Sum256([]byte(nodename + sf.Location)) - // If the lowercase filename isn't usable as a resource name, and short enough that we can include a prefix and suffix, - // generate a safe name derived from the hostname and timestamp. - if errs := validation.IsDNS1123Subdomain(name); len(errs) != 0 || len(name)+13 > validation.DNS1123SubdomainMaxLength { - nodename, _, _ := strings.Cut(nodename, ".") - name = fmt.Sprintf("etcd-snapshot-%s-%d", nodename, sf.CreatedAt.Unix()) - if sf.Compressed { - name += compressedExtension - } - } - if sf.NodeName == "s3" { - return "s3-" + name + "-" + hex.EncodeToString(digest[0:])[0:6] - } - return "local-" + name + "-" + hex.EncodeToString(digest[0:])[0:6] -} - // generateETCDSnapshotFileConfigMapKey generates a key that the corresponding // snapshotFile would be stored under in the legacy configmap func generateETCDSnapshotFileConfigMapKey(esf k3s.ETCDSnapshotFile) string { - name := invalidKeyChars.ReplaceAllString(esf.Spec.SnapshotName, "_") + name := snapshot.InvalidKeyChars.ReplaceAllString(esf.Spec.SnapshotName, "_") if esf.Spec.S3 != nil { return "s3-" + name } @@ -757,19 +665,21 @@ func (e *ETCD) ReconcileSnapshotData(ctx context.Context) error { nodeNames := []string{os.Getenv("NODE_NAME")} // Get snapshots from S3 - if e.config.EtcdS3 { - if err := e.initS3IfNil(ctx); err != nil { + if e.config.EtcdS3 != nil { + if s3client, err := e.getS3Client(ctx); err != nil { logrus.Warnf("Unable to initialize S3 client: %v", err) - return err - } - - if s3Snapshots, err := e.s3.listSnapshots(ctx); err != nil { - logrus.Errorf("Error retrieving S3 snapshots for reconciliation: %v", err) + if !errors.Is(err, s3.ErrNoConfigSecret) { + return errors.Wrap(err, "failed to initialize S3 client") + } } else { - for k, v := range s3Snapshots { - snapshotFiles[k] = v + if s3Snapshots, err := s3client.ListSnapshots(ctx); err != nil { + logrus.Errorf("Error retrieving S3 snapshots for reconciliation: %v", err) + } else { + for k, v := range s3Snapshots { + snapshotFiles[k] = v + } + nodeNames = append(nodeNames, "s3") } - nodeNames = append(nodeNames, "s3") } } @@ -784,9 +694,9 @@ func (e *ETCD) ReconcileSnapshotData(ctx context.Context) error { for sfKey, sf := range snapshotFiles { logrus.Debugf("Found snapshotFile for %s with key %s", sf.Name, sfKey) // if the configmap has data for this snapshot, and local metadata is empty, - // deserialize the value from the configmap and attempt to load it. - if cmSnapshotValue := snapshotConfigMap.Data[sfKey]; cmSnapshotValue != "" && sf.Metadata == "" && sf.metadataSource == nil { - sfTemp := &snapshotFile{} + // deserialize the value from the configmap and attempt to load iM. + if cmSnapshotValue := snapshotConfigMap.Data[sfKey]; cmSnapshotValue != "" && sf.Metadata == "" && sf.MetadataSource == nil { + sfTemp := &snapshot.File{} if err := json.Unmarshal([]byte(cmSnapshotValue), sfTemp); err != nil { logrus.Warnf("Failed to unmarshal configmap data for snapshot %s: %v", sfKey, err) continue @@ -799,7 +709,7 @@ func (e *ETCD) ReconcileSnapshotData(ctx context.Context) error { labelSelector := &metav1.LabelSelector{ MatchExpressions: []metav1.LabelSelectorRequirement{{ - Key: labelStorageNode, + Key: snapshot.LabelStorageNode, Operator: metav1.LabelSelectorOpIn, Values: nodeNames, }}, @@ -823,7 +733,7 @@ func (e *ETCD) ReconcileSnapshotData(ctx context.Context) error { for _, esf := range esfList.Items { sfKey := generateETCDSnapshotFileConfigMapKey(esf) logrus.Debugf("Found ETCDSnapshotFile for %s with key %s", esf.Spec.SnapshotName, sfKey) - if sf, ok := snapshotFiles[sfKey]; ok && generateSnapshotName(sf) == esf.Name { + if sf, ok := snapshotFiles[sfKey]; ok && sf.GenerateName() == esf.Name { // exists in both and names match, don't need to sync delete(snapshotFiles, sfKey) } else { @@ -835,7 +745,7 @@ func (e *ETCD) ReconcileSnapshotData(ctx context.Context) error { } } if ok { - logrus.Debugf("Name of ETCDSnapshotFile for snapshotFile with key %s does not match: %s vs %s", sfKey, generateSnapshotName(sf), esf.Name) + logrus.Debugf("Name of ETCDSnapshotFile for snapshotFile with key %s does not match: %s vs %s", sfKey, sf.GenerateName(), esf.Name) } else { logrus.Debugf("Key %s not found in snapshotFile list", sfKey) } @@ -904,7 +814,7 @@ func (e *ETCD) ReconcileSnapshotData(ctx context.Context) error { "path": "/metadata/annotations/" + strings.ReplaceAll(annotationLocalReconciled, "/", "~1"), }, } - if e.config.EtcdS3 { + if e.config.EtcdS3 != nil { patch = append(patch, map[string]string{ "op": "add", "value": now, @@ -942,18 +852,18 @@ func snapshotRetention(retention int, snapshotPrefix string, snapshotDir string) logrus.Infof("Applying snapshot retention=%d to local snapshots with prefix %s in %s", retention, snapshotPrefix, snapshotDir) - var snapshotFiles []snapshotFile + var snapshotFiles []snapshot.File if err := filepath.Walk(snapshotDir, func(path string, info os.FileInfo, err error) error { if info.IsDir() || err != nil { return err } if strings.HasPrefix(info.Name(), snapshotPrefix) { - basename, compressed := strings.CutSuffix(info.Name(), compressedExtension) + basename, compressed := strings.CutSuffix(info.Name(), snapshot.CompressedExtension) ts, err := strconv.ParseInt(basename[strings.LastIndexByte(basename, '-')+1:], 10, 64) if err != nil { ts = info.ModTime().Unix() } - snapshotFiles = append(snapshotFiles, snapshotFile{Name: info.Name(), CreatedAt: &metav1.Time{Time: time.Unix(ts, 0)}, Compressed: compressed}) + snapshotFiles = append(snapshotFiles, snapshot.File{Name: info.Name(), CreatedAt: &metav1.Time{Time: time.Unix(ts, 0)}, Compressed: compressed}) } return nil }); err != nil { @@ -971,7 +881,7 @@ func snapshotRetention(retention int, snapshotPrefix string, snapshotDir string) deleted := []string{} for _, df := range snapshotFiles[retention:] { snapshotPath := filepath.Join(snapshotDir, df.Name) - metadataPath := filepath.Join(snapshotDir, "..", metadataDir, df.Name) + metadataPath := filepath.Join(snapshotDir, "..", snapshot.MetadataDir, df.Name) logrus.Infof("Removing local snapshot %s", snapshotPath) if err := os.Remove(snapshotPath); err != nil { return deleted, err @@ -985,13 +895,6 @@ func snapshotRetention(retention int, snapshotPrefix string, snapshotDir string) return deleted, nil } -func isNotExist(err error) bool { - if resp := minio.ToErrorResponse(err); resp.StatusCode == http.StatusNotFound || os.IsNotExist(err) { - return true - } - return false -} - // saveSnapshotMetadata writes extra metadata to disk. // The upload is silently skipped if no extra metadata is provided. func saveSnapshotMetadata(snapshotPath string, extraMetadata *v1.ConfigMap) error { @@ -999,7 +902,7 @@ func saveSnapshotMetadata(snapshotPath string, extraMetadata *v1.ConfigMap) erro return nil } - dir := filepath.Join(filepath.Dir(snapshotPath), "..", metadataDir) + dir := filepath.Join(filepath.Dir(snapshotPath), "..", snapshot.MetadataDir) filename := filepath.Base(snapshotPath) metadataPath := filepath.Join(dir, filename) logrus.Infof("Saving snapshot metadata to %s", metadataPath) @@ -1012,135 +915,3 @@ func saveSnapshotMetadata(snapshotPath string, extraMetadata *v1.ConfigMap) erro } return os.WriteFile(metadataPath, m, 0700) } - -func (sf *snapshotFile) fromETCDSnapshotFile(esf *k3s.ETCDSnapshotFile) { - if esf == nil { - panic("cannot convert from nil ETCDSnapshotFile") - } - - sf.Name = esf.Spec.SnapshotName - sf.Location = esf.Spec.Location - sf.CreatedAt = esf.Status.CreationTime - sf.nodeSource = esf.Spec.NodeName - sf.Compressed = strings.HasSuffix(esf.Spec.SnapshotName, compressedExtension) - - if esf.Status.ReadyToUse != nil && *esf.Status.ReadyToUse { - sf.Status = successfulSnapshotStatus - } else { - sf.Status = failedSnapshotStatus - } - - if esf.Status.Size != nil { - sf.Size = esf.Status.Size.Value() - } - - if esf.Status.Error != nil { - if esf.Status.Error.Time != nil { - sf.CreatedAt = esf.Status.Error.Time - } - message := "etcd snapshot failed" - if esf.Status.Error.Message != nil { - message = *esf.Status.Error.Message - } - sf.Message = base64.StdEncoding.EncodeToString([]byte(message)) - } - - if len(esf.Spec.Metadata) > 0 { - if b, err := json.Marshal(esf.Spec.Metadata); err != nil { - logrus.Warnf("Failed to marshal metadata for %s: %v", esf.Name, err) - } else { - sf.Metadata = base64.StdEncoding.EncodeToString(b) - } - } - - if tokenHash := esf.Annotations[annotationTokenHash]; tokenHash != "" { - sf.tokenHash = tokenHash - } - - if esf.Spec.S3 == nil { - sf.NodeName = esf.Spec.NodeName - } else { - sf.NodeName = "s3" - sf.S3 = &s3Config{ - Endpoint: esf.Spec.S3.Endpoint, - EndpointCA: esf.Spec.S3.EndpointCA, - SkipSSLVerify: esf.Spec.S3.SkipSSLVerify, - Bucket: esf.Spec.S3.Bucket, - Region: esf.Spec.S3.Region, - Folder: esf.Spec.S3.Prefix, - Insecure: esf.Spec.S3.Insecure, - } - } -} - -func (sf *snapshotFile) toETCDSnapshotFile(esf *k3s.ETCDSnapshotFile) { - if esf == nil { - panic("cannot convert to nil ETCDSnapshotFile") - } - esf.Spec.SnapshotName = sf.Name - esf.Spec.Location = sf.Location - esf.Status.CreationTime = sf.CreatedAt - esf.Status.ReadyToUse = ptr.To(sf.Status == successfulSnapshotStatus) - esf.Status.Size = resource.NewQuantity(sf.Size, resource.DecimalSI) - - if sf.nodeSource != "" { - esf.Spec.NodeName = sf.nodeSource - } else { - esf.Spec.NodeName = sf.NodeName - } - - if sf.Message != "" { - var message string - b, err := base64.StdEncoding.DecodeString(sf.Message) - if err != nil { - logrus.Warnf("Failed to decode error message for %s: %v", sf.Name, err) - message = "etcd snapshot failed" - } else { - message = string(b) - } - esf.Status.Error = &k3s.ETCDSnapshotError{ - Time: sf.CreatedAt, - Message: &message, - } - } - - if sf.metadataSource != nil { - esf.Spec.Metadata = sf.metadataSource.Data - } else if sf.Metadata != "" { - metadata, err := base64.StdEncoding.DecodeString(sf.Metadata) - if err != nil { - logrus.Warnf("Failed to decode metadata for %s: %v", sf.Name, err) - } else { - if err := json.Unmarshal(metadata, &esf.Spec.Metadata); err != nil { - logrus.Warnf("Failed to unmarshal metadata for %s: %v", sf.Name, err) - } - } - } - - if esf.ObjectMeta.Labels == nil { - esf.ObjectMeta.Labels = map[string]string{} - } - - if esf.ObjectMeta.Annotations == nil { - esf.ObjectMeta.Annotations = map[string]string{} - } - - if sf.tokenHash != "" { - esf.ObjectMeta.Annotations[annotationTokenHash] = sf.tokenHash - } - - if sf.S3 == nil { - esf.ObjectMeta.Labels[labelStorageNode] = esf.Spec.NodeName - } else { - esf.ObjectMeta.Labels[labelStorageNode] = "s3" - esf.Spec.S3 = &k3s.ETCDSnapshotS3{ - Endpoint: sf.S3.Endpoint, - EndpointCA: sf.S3.EndpointCA, - SkipSSLVerify: sf.S3.SkipSSLVerify, - Bucket: sf.S3.Bucket, - Region: sf.S3.Region, - Prefix: sf.S3.Folder, - Insecure: sf.S3.Insecure, - } - } -} diff --git a/pkg/etcd/snapshot/types.go b/pkg/etcd/snapshot/types.go new file mode 100644 index 000000000000..00e93cc6d8ca --- /dev/null +++ b/pkg/etcd/snapshot/types.go @@ -0,0 +1,270 @@ +package snapshot + +import ( + "crypto/sha256" + "encoding/base64" + "encoding/hex" + "encoding/json" + "fmt" + "net/http" + "os" + "regexp" + "strings" + + k3s "github.com/k3s-io/k3s/pkg/apis/k3s.cattle.io/v1" + "github.com/k3s-io/k3s/pkg/daemons/config" + "github.com/k3s-io/k3s/pkg/version" + "github.com/minio/minio-go/v7" + "github.com/sirupsen/logrus" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/validation" + "k8s.io/utils/ptr" +) + +type SnapshotStatus string + +const ( + SuccessfulStatus SnapshotStatus = "successful" + FailedStatus SnapshotStatus = "failed" + + CompressedExtension = ".zip" + MetadataDir = ".metadata" +) + +var ( + InvalidKeyChars = regexp.MustCompile(`[^-._a-zA-Z0-9]`) + + LabelStorageNode = "etcd." + version.Program + ".cattle.io/snapshot-storage-node" + AnnotationTokenHash = "etcd." + version.Program + ".cattle.io/snapshot-token-hash" + + ExtraMetadataConfigMapName = version.Program + "-etcd-snapshot-extra-metadata" +) + +type S3Config struct { + config.EtcdS3 + // Mask these fields in the embedded struct to avoid serializing their values in the snapshotFile record + AccessKey string `json:"accessKey,omitempty"` + ConfigSecret string `json:"configSecret,omitempty"` + Proxy string `json:"proxy,omitempty"` + SecretKey string `json:"secretKey,omitempty"` + Timeout metav1.Duration `json:"timeout,omitempty"` +} + +// File represents a single snapshot and it's +// metadata. +type File struct { + Name string `json:"name"` + // Location contains the full path of the snapshot. For + // local paths, the location will be prefixed with "file://". + Location string `json:"location,omitempty"` + Metadata string `json:"metadata,omitempty"` + Message string `json:"message,omitempty"` + NodeName string `json:"nodeName,omitempty"` + CreatedAt *metav1.Time `json:"createdAt,omitempty"` + Size int64 `json:"size,omitempty"` + Status SnapshotStatus `json:"status,omitempty"` + S3 *S3Config `json:"s3Config,omitempty"` + Compressed bool `json:"compressed"` + + // these fields are used for the internal representation of the snapshot + // to populate other fields before serialization to the legacy configmap. + MetadataSource *v1.ConfigMap `json:"-"` + NodeSource string `json:"-"` + TokenHash string `json:"-"` +} + +// GenerateConfigMapKey generates a derived name for the snapshot that is safe for use +// as a configmap key. +func (sf *File) GenerateConfigMapKey() string { + name := InvalidKeyChars.ReplaceAllString(sf.Name, "_") + if sf.NodeName == "s3" { + return "s3-" + name + } + return "local-" + name +} + +// GenerateName generates a derived name for the snapshot that is safe for use +// as a resource name. +func (sf *File) GenerateName() string { + name := strings.ToLower(sf.Name) + nodename := sf.NodeSource + if nodename == "" { + nodename = sf.NodeName + } + // Include a digest of the hostname and location to ensure unique resource + // names. Snapshots should already include the hostname, but this ensures we + // don't accidentally hide records if a snapshot with the same name somehow + // exists on multiple nodes. + digest := sha256.Sum256([]byte(nodename + sf.Location)) + // If the lowercase filename isn't usable as a resource name, and short enough that we can include a prefix and suffix, + // generate a safe name derived from the hostname and timestamp. + if errs := validation.IsDNS1123Subdomain(name); len(errs) != 0 || len(name)+13 > validation.DNS1123SubdomainMaxLength { + nodename, _, _ := strings.Cut(nodename, ".") + name = fmt.Sprintf("etcd-snapshot-%s-%d", nodename, sf.CreatedAt.Unix()) + if sf.Compressed { + name += CompressedExtension + } + } + if sf.NodeName == "s3" { + return "s3-" + name + "-" + hex.EncodeToString(digest[0:])[0:6] + } + return "local-" + name + "-" + hex.EncodeToString(digest[0:])[0:6] +} + +// FromETCDSnapshotFile translates fields to the File from the ETCDSnapshotFile +func (sf *File) FromETCDSnapshotFile(esf *k3s.ETCDSnapshotFile) { + if esf == nil { + panic("cannot convert from nil ETCDSnapshotFile") + } + + sf.Name = esf.Spec.SnapshotName + sf.Location = esf.Spec.Location + sf.CreatedAt = esf.Status.CreationTime + sf.NodeSource = esf.Spec.NodeName + sf.Compressed = strings.HasSuffix(esf.Spec.SnapshotName, CompressedExtension) + + if esf.Status.ReadyToUse != nil && *esf.Status.ReadyToUse { + sf.Status = SuccessfulStatus + } else { + sf.Status = FailedStatus + } + + if esf.Status.Size != nil { + sf.Size = esf.Status.Size.Value() + } + + if esf.Status.Error != nil { + if esf.Status.Error.Time != nil { + sf.CreatedAt = esf.Status.Error.Time + } + message := "etcd snapshot failed" + if esf.Status.Error.Message != nil { + message = *esf.Status.Error.Message + } + sf.Message = base64.StdEncoding.EncodeToString([]byte(message)) + } + + if len(esf.Spec.Metadata) > 0 { + if b, err := json.Marshal(esf.Spec.Metadata); err != nil { + logrus.Warnf("Failed to marshal metadata for %s: %v", esf.Name, err) + } else { + sf.Metadata = base64.StdEncoding.EncodeToString(b) + } + } + + if tokenHash := esf.Annotations[AnnotationTokenHash]; tokenHash != "" { + sf.TokenHash = tokenHash + } + + if esf.Spec.S3 == nil { + sf.NodeName = esf.Spec.NodeName + } else { + sf.NodeName = "s3" + sf.S3 = &S3Config{ + EtcdS3: config.EtcdS3{ + Endpoint: esf.Spec.S3.Endpoint, + EndpointCA: esf.Spec.S3.EndpointCA, + SkipSSLVerify: esf.Spec.S3.SkipSSLVerify, + Bucket: esf.Spec.S3.Bucket, + Region: esf.Spec.S3.Region, + Folder: esf.Spec.S3.Prefix, + Insecure: esf.Spec.S3.Insecure, + }, + } + } +} + +// ToETCDSnapshotFile translates fields from the File to the ETCDSnapshotFile +func (sf *File) ToETCDSnapshotFile(esf *k3s.ETCDSnapshotFile) { + if esf == nil { + panic("cannot convert to nil ETCDSnapshotFile") + } + esf.Spec.SnapshotName = sf.Name + esf.Spec.Location = sf.Location + esf.Status.CreationTime = sf.CreatedAt + esf.Status.ReadyToUse = ptr.To(sf.Status == SuccessfulStatus) + esf.Status.Size = resource.NewQuantity(sf.Size, resource.DecimalSI) + + if sf.NodeSource != "" { + esf.Spec.NodeName = sf.NodeSource + } else { + esf.Spec.NodeName = sf.NodeName + } + + if sf.Message != "" { + var message string + b, err := base64.StdEncoding.DecodeString(sf.Message) + if err != nil { + logrus.Warnf("Failed to decode error message for %s: %v", sf.Name, err) + message = "etcd snapshot failed" + } else { + message = string(b) + } + esf.Status.Error = &k3s.ETCDSnapshotError{ + Time: sf.CreatedAt, + Message: &message, + } + } + + if sf.MetadataSource != nil { + esf.Spec.Metadata = sf.MetadataSource.Data + } else if sf.Metadata != "" { + metadata, err := base64.StdEncoding.DecodeString(sf.Metadata) + if err != nil { + logrus.Warnf("Failed to decode metadata for %s: %v", sf.Name, err) + } else { + if err := json.Unmarshal(metadata, &esf.Spec.Metadata); err != nil { + logrus.Warnf("Failed to unmarshal metadata for %s: %v", sf.Name, err) + } + } + } + + if esf.ObjectMeta.Labels == nil { + esf.ObjectMeta.Labels = map[string]string{} + } + + if esf.ObjectMeta.Annotations == nil { + esf.ObjectMeta.Annotations = map[string]string{} + } + + if sf.TokenHash != "" { + esf.ObjectMeta.Annotations[AnnotationTokenHash] = sf.TokenHash + } + + if sf.S3 == nil { + esf.ObjectMeta.Labels[LabelStorageNode] = esf.Spec.NodeName + } else { + esf.ObjectMeta.Labels[LabelStorageNode] = "s3" + esf.Spec.S3 = &k3s.ETCDSnapshotS3{ + Endpoint: sf.S3.Endpoint, + EndpointCA: sf.S3.EndpointCA, + SkipSSLVerify: sf.S3.SkipSSLVerify, + Bucket: sf.S3.Bucket, + Region: sf.S3.Region, + Prefix: sf.S3.Folder, + Insecure: sf.S3.Insecure, + } + } +} + +// Marshal returns the JSON encoding of the snapshot File, with metadata inlined as base64. +func (sf *File) Marshal() ([]byte, error) { + if sf.MetadataSource != nil { + if m, err := json.Marshal(sf.MetadataSource.Data); err != nil { + logrus.Debugf("Error attempting to marshal extra metadata contained in %s ConfigMap, error: %v", ExtraMetadataConfigMapName, err) + } else { + sf.Metadata = base64.StdEncoding.EncodeToString(m) + } + } + return json.Marshal(sf) +} + +// IsNotExist returns true if the error is from http.StatusNotFound or os.IsNotExist +func IsNotExist(err error) bool { + if resp := minio.ToErrorResponse(err); resp.StatusCode == http.StatusNotFound || os.IsNotExist(err) { + return true + } + return false +} diff --git a/pkg/etcd/snapshot_controller.go b/pkg/etcd/snapshot_controller.go index 5d43de645791..9c62cc9c5022 100644 --- a/pkg/etcd/snapshot_controller.go +++ b/pkg/etcd/snapshot_controller.go @@ -9,6 +9,7 @@ import ( "time" apisv1 "github.com/k3s-io/k3s/pkg/apis/k3s.cattle.io/v1" + "github.com/k3s-io/k3s/pkg/etcd/snapshot" controllersv1 "github.com/k3s-io/k3s/pkg/generated/controllers/k3s.cattle.io/v1" "github.com/k3s-io/k3s/pkg/util" "github.com/k3s-io/k3s/pkg/version" @@ -81,10 +82,10 @@ func (e *etcdSnapshotHandler) sync(key string, esf *apisv1.ETCDSnapshotFile) (*a return nil, nil } - sf := snapshotFile{} - sf.fromETCDSnapshotFile(esf) - sfKey := generateSnapshotConfigMapKey(sf) - m, err := marshalSnapshotFile(sf) + sf := &snapshot.File{} + sf.FromETCDSnapshotFile(esf) + sfKey := sf.GenerateConfigMapKey() + m, err := sf.Marshal() if err != nil { return nil, errors.Wrap(err, "failed to marshal snapshot ConfigMap data") } @@ -283,9 +284,9 @@ func (e *etcdSnapshotHandler) reconcile() error { // Ensure keys for existing snapshots for sfKey, esf := range snapshots { - sf := snapshotFile{} - sf.fromETCDSnapshotFile(esf) - m, err := marshalSnapshotFile(sf) + sf := &snapshot.File{} + sf.FromETCDSnapshotFile(esf) + m, err := sf.Marshal() if err != nil { logrus.Warnf("Failed to marshal snapshot ConfigMap data for %s", sfKey) continue @@ -327,12 +328,12 @@ func pruneConfigMap(snapshotConfigMap *v1.ConfigMap, pruneCount int) error { return errors.New("unable to reduce snapshot ConfigMap size by eliding old snapshots") } - var snapshotFiles []snapshotFile + var snapshotFiles []snapshot.File retention := len(snapshotConfigMap.Data) - pruneCount for name := range snapshotConfigMap.Data { - basename, compressed := strings.CutSuffix(name, compressedExtension) + basename, compressed := strings.CutSuffix(name, snapshot.CompressedExtension) ts, _ := strconv.ParseInt(basename[strings.LastIndexByte(basename, '-')+1:], 10, 64) - snapshotFiles = append(snapshotFiles, snapshotFile{Name: name, CreatedAt: &metav1.Time{Time: time.Unix(ts, 0)}, Compressed: compressed}) + snapshotFiles = append(snapshotFiles, snapshot.File{Name: name, CreatedAt: &metav1.Time{Time: time.Unix(ts, 0)}, Compressed: compressed}) } // sort newest-first so we can prune entries past the retention count diff --git a/pkg/etcd/snapshot_handler.go b/pkg/etcd/snapshot_handler.go index 0bae2e0401b7..23eefbc4c45b 100644 --- a/pkg/etcd/snapshot_handler.go +++ b/pkg/etcd/snapshot_handler.go @@ -11,8 +11,8 @@ import ( "github.com/k3s-io/k3s/pkg/cluster/managed" "github.com/k3s-io/k3s/pkg/daemons/config" "github.com/k3s-io/k3s/pkg/util" + "github.com/pkg/errors" "github.com/sirupsen/logrus" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) type SnapshotOperation string @@ -24,21 +24,13 @@ const ( SnapshotOperationDelete SnapshotOperation = "delete" ) -type SnapshotRequestS3 struct { - s3Config - Timeout metav1.Duration `json:"timeout"` - AccessKey string `json:"accessKey"` - SecretKey string `json:"secretKey"` -} - type SnapshotRequest struct { Operation SnapshotOperation `json:"operation"` Name []string `json:"name,omitempty"` Dir *string `json:"dir,omitempty"` Compress *bool `json:"compress,omitempty"` Retention *int `json:"retention,omitempty"` - - S3 *SnapshotRequestS3 `json:"s3,omitempty"` + S3 *config.EtcdS3 `json:"s3,omitempty"` ctx context.Context } @@ -76,9 +68,12 @@ func (e *ETCD) snapshotHandler() http.Handler { } func (e *ETCD) handleList(rw http.ResponseWriter, req *http.Request) error { - if err := e.initS3IfNil(req.Context()); err != nil { - util.SendError(err, rw, req, http.StatusBadRequest) - return nil + if e.config.EtcdS3 != nil { + if _, err := e.getS3Client(req.Context()); err != nil { + err = errors.Wrap(err, "failed to initialize S3 client") + util.SendError(err, rw, req, http.StatusBadRequest) + return nil + } } sf, err := e.ListSnapshots(req.Context()) if sf == nil { @@ -90,9 +85,12 @@ func (e *ETCD) handleList(rw http.ResponseWriter, req *http.Request) error { } func (e *ETCD) handleSave(rw http.ResponseWriter, req *http.Request) error { - if err := e.initS3IfNil(req.Context()); err != nil { - util.SendError(err, rw, req, http.StatusBadRequest) - return nil + if e.config.EtcdS3 != nil { + if _, err := e.getS3Client(req.Context()); err != nil { + err = errors.Wrap(err, "failed to initialize S3 client") + util.SendError(err, rw, req, http.StatusBadRequest) + return nil + } } sr, err := e.Snapshot(req.Context()) if sr == nil { @@ -104,9 +102,12 @@ func (e *ETCD) handleSave(rw http.ResponseWriter, req *http.Request) error { } func (e *ETCD) handlePrune(rw http.ResponseWriter, req *http.Request) error { - if err := e.initS3IfNil(req.Context()); err != nil { - util.SendError(err, rw, req, http.StatusBadRequest) - return nil + if e.config.EtcdS3 != nil { + if _, err := e.getS3Client(req.Context()); err != nil { + err = errors.Wrap(err, "failed to initialize S3 client") + util.SendError(err, rw, req, http.StatusBadRequest) + return nil + } } sr, err := e.PruneSnapshots(req.Context()) if sr == nil { @@ -118,9 +119,12 @@ func (e *ETCD) handlePrune(rw http.ResponseWriter, req *http.Request) error { } func (e *ETCD) handleDelete(rw http.ResponseWriter, req *http.Request, snapshots []string) error { - if err := e.initS3IfNil(req.Context()); err != nil { - util.SendError(err, rw, req, http.StatusBadRequest) - return nil + if e.config.EtcdS3 != nil { + if _, err := e.getS3Client(req.Context()); err != nil { + err = errors.Wrap(err, "failed to initialize S3 client") + util.SendError(err, rw, req, http.StatusBadRequest) + return nil + } } sr, err := e.DeleteSnapshots(req.Context(), snapshots) if sr == nil { @@ -149,7 +153,9 @@ func (e *ETCD) withRequest(sr *SnapshotRequest) *ETCD { EtcdSnapshotCompress: e.config.EtcdSnapshotCompress, EtcdSnapshotName: e.config.EtcdSnapshotName, EtcdSnapshotRetention: e.config.EtcdSnapshotRetention, + EtcdS3: sr.S3, }, + s3: e.s3, name: e.name, address: e.address, cron: e.cron, @@ -168,19 +174,6 @@ func (e *ETCD) withRequest(sr *SnapshotRequest) *ETCD { if sr.Retention != nil { re.config.EtcdSnapshotRetention = *sr.Retention } - if sr.S3 != nil { - re.config.EtcdS3 = true - re.config.EtcdS3AccessKey = sr.S3.AccessKey - re.config.EtcdS3BucketName = sr.S3.Bucket - re.config.EtcdS3Endpoint = sr.S3.Endpoint - re.config.EtcdS3EndpointCA = sr.S3.EndpointCA - re.config.EtcdS3Folder = sr.S3.Folder - re.config.EtcdS3Insecure = sr.S3.Insecure - re.config.EtcdS3Region = sr.S3.Region - re.config.EtcdS3SecretKey = sr.S3.SecretKey - re.config.EtcdS3SkipSSLVerify = sr.S3.SkipSSLVerify - re.config.EtcdS3Timeout = sr.S3.Timeout.Duration - } return re } diff --git a/scripts/airgap/image-list.txt b/scripts/airgap/image-list.txt index e0f3fd59419a..407d1b55a5e3 100644 --- a/scripts/airgap/image-list.txt +++ b/scripts/airgap/image-list.txt @@ -1,6 +1,6 @@ docker.io/rancher/klipper-helm:v0.8.4-build20240523 docker.io/rancher/klipper-lb:v0.4.7 -docker.io/rancher/local-path-provisioner:v0.0.27 +docker.io/rancher/local-path-provisioner:v0.0.28 docker.io/rancher/mirrored-coredns-coredns:1.10.1 docker.io/rancher/mirrored-library-busybox:1.36.1 docker.io/rancher/mirrored-library-traefik:2.10.7 diff --git a/scripts/download b/scripts/download index f75d9f925c67..1b70cb196d06 100755 --- a/scripts/download +++ b/scripts/download @@ -26,13 +26,12 @@ case ${OS} in linux) git clone --single-branch --branch=${VERSION_RUNC} --depth=1 https://github.com/k3s-io/runc ${RUNC_DIR} curl --compressed -sfL https://github.com/k3s-io/k3s-root/releases/download/${VERSION_ROOT}/k3s-root-${ARCH}.tar | tar xf - - cp scripts/wg-add.sh bin/aux ;; windows) git clone --single-branch --branch=${VERSION_HCSSHIM} --depth=1 https://github.com/microsoft/hcsshim ${HCSSHIM_DIR} ;; *) - echo "[ERROR] unrecognized opertaing system: ${OS}" + echo "[ERROR] unrecognized operating system: ${OS}" exit 1 ;; esac diff --git a/scripts/version.sh b/scripts/version.sh index 84957bf66ddc..5784e89e18cd 100755 --- a/scripts/version.sh +++ b/scripts/version.sh @@ -76,7 +76,7 @@ if [ -z "$VERSION_KUBE_ROUTER" ]; then VERSION_KUBE_ROUTER="v0.0.0" fi -VERSION_ROOT="v0.13.0" +VERSION_ROOT="v0.14.0" DEPENDENCIES_URL="https://raw.githubusercontent.com/kubernetes/kubernetes/${VERSION_K8S}/build/dependencies.yaml" VERSION_GOLANG="go"$(curl -sL "${DEPENDENCIES_URL}" | yq e '.dependencies[] | select(.name == "golang: upstream version").version' -) diff --git a/scripts/wg-add.sh b/scripts/wg-add.sh deleted file mode 100755 index dc2f4a81c57c..000000000000 --- a/scripts/wg-add.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env bash - -auto-mtu() { - local mtu=0 endpoint output - while read -r _ endpoint; do - [[ $endpoint =~ ^\[?([a-z0-9:.]+)\]?:[0-9]+$ ]] || continue - output="$(ip route get "${BASH_REMATCH[1]}" || true)" - [[ ( $output =~ mtu\ ([0-9]+) || ( $output =~ dev\ ([^ ]+) && $(ip link show dev "${BASH_REMATCH[1]}") =~ mtu\ ([0-9]+) ) ) && ${BASH_REMATCH[1]} -gt $mtu ]] && mtu="${BASH_REMATCH[1]}" - done < <(wg show "$1" endpoints) - if [[ $mtu -eq 0 ]]; then - read -r output < <(ip route show default || true) || true - [[ ( $output =~ mtu\ ([0-9]+) || ( $output =~ dev\ ([^ ]+) && $(ip link show dev "${BASH_REMATCH[1]}") =~ mtu\ ([0-9]+) ) ) && ${BASH_REMATCH[1]} -gt $mtu ]] && mtu="${BASH_REMATCH[1]}" - fi - [[ $mtu -gt 0 ]] || mtu=1500 - ip link set mtu $(( mtu - 80 )) up dev "$1" -} - -# probe for any modules that may be needed -modprobe wireguard -modprobe tun - -# try wireguard kernel module first -ip link add "$1" type wireguard && exit - -# try boringtun and let it drop privileges -boringtun "$1" && auto-mtu "$1" && exit - -# try boringtun w/o dropping privileges -WG_SUDO=1 boringtun "$1" && auto-mtu "$1" && exit - -# try wireguard-go - p.s. should not use wireguard-go, it leaks memory -WG_I_PREFER_BUGGY_USERSPACE_TO_POLISHED_KMOD=1 wireguard-go "$1" && auto-mtu "$1" && exit - -exit 1 diff --git a/tests/e2e/s3/Vagrantfile b/tests/e2e/s3/Vagrantfile index 652a990c12c5..75c44426607f 100644 --- a/tests/e2e/s3/Vagrantfile +++ b/tests/e2e/s3/Vagrantfile @@ -46,13 +46,8 @@ def provision(vm, role, role_num, node_num) cluster-init: true etcd-snapshot-schedule-cron: '*/1 * * * *' etcd-snapshot-retention: 2 - etcd-s3-insecure: true - etcd-s3-bucket: test-bucket - etcd-s3-folder: test-folder etcd-s3: true - etcd-s3-endpoint: localhost:9090 - etcd-s3-skip-ssl-verify: true - etcd-s3-access-key: test + etcd-s3-config-secret: k3s-etcd-s3-config YAML k3s.env = %W[K3S_KUBECONFIG_MODE=0644 #{install_type}] k3s.config_mode = '0644' # side-step https://github.com/k3s-io/k3s/issues/4321 diff --git a/tests/e2e/s3/s3_test.go b/tests/e2e/s3/s3_test.go index f1aee914a21a..ac203f63d4e0 100644 --- a/tests/e2e/s3/s3_test.go +++ b/tests/e2e/s3/s3_test.go @@ -87,7 +87,31 @@ var _ = Describe("Verify Create", Ordered, func() { fmt.Println(res) Expect(err).NotTo(HaveOccurred()) }) - It("save s3 snapshot", func() { + It("save s3 snapshot using CLI", func() { + res, err := e2e.RunCmdOnNode("k3s etcd-snapshot save "+ + "--etcd-s3-insecure=true "+ + "--etcd-s3-bucket=test-bucket "+ + "--etcd-s3-folder=test-folder "+ + "--etcd-s3-endpoint=localhost:9090 "+ + "--etcd-s3-skip-ssl-verify=true "+ + "--etcd-s3-access-key=test ", + serverNodeNames[0]) + Expect(err).NotTo(HaveOccurred()) + Expect(res).To(ContainSubstring("Snapshot on-demand-server-0")) + }) + It("creates s3 config secret", func() { + res, err := e2e.RunCmdOnNode("k3s kubectl create secret generic k3s-etcd-s3-config --namespace=kube-system "+ + "--from-literal=etcd-s3-insecure=true "+ + "--from-literal=etcd-s3-bucket=test-bucket "+ + "--from-literal=etcd-s3-folder=test-folder "+ + "--from-literal=etcd-s3-endpoint=localhost:9090 "+ + "--from-literal=etcd-s3-skip-ssl-verify=true "+ + "--from-literal=etcd-s3-access-key=test ", + serverNodeNames[0]) + Expect(err).NotTo(HaveOccurred()) + Expect(res).To(ContainSubstring("secret/k3s-etcd-s3-config created")) + }) + It("save s3 snapshot using secret", func() { res, err := e2e.RunCmdOnNode("k3s etcd-snapshot save", serverNodeNames[0]) Expect(err).NotTo(HaveOccurred()) Expect(res).To(ContainSubstring("Snapshot on-demand-server-0"))