Skip to content

Commit

Permalink
Don't failfast on e2e tests (#6363)
Browse files Browse the repository at this point in the history
* Remove failfast from the e2e tests.

Signed-off-by: Michael Montgomery <mmontg1@gmail.com>
Co-authored-by: Michael Morello <michael.morello@gmail.com>
Co-authored-by: Peter Brachwitz <peter.brachwitz@gmail.com>
Co-authored-by: Thibault Richard <thbkrkr@users.noreply.github.com>
  • Loading branch information
4 people committed Mar 29, 2023
1 parent 3b3693a commit bd3f88e
Show file tree
Hide file tree
Showing 16 changed files with 310 additions and 53 deletions.
3 changes: 2 additions & 1 deletion .buildkite/e2e/pipeline-gen/pipeline.tpl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ steps:

artifact_paths:
- tests-report/*.xml
- "eck-diagnostic-*.zip"
- "eck-diagnostic*.zip"

{{- end }}

Expand All @@ -112,6 +112,7 @@ steps:

commands:
- .buildkite/scripts/test/set-deployer-config.sh
- .buildkite/scripts/common/cp-diagnostics.sh

{{- if not $test.Dind }}
- make run-deployer
Expand Down
15 changes: 15 additions & 0 deletions .buildkite/scripts/common/cp-diagnostics.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/usr/bin/env bash

set -euo pipefail

# Triggered by buildkite in cleanup step, make sure that diagnostics are copied from bucket
# to local Buildkite agent to be uploaded as artifacts.
main() {
# If diagnostics exist in remote bucket, copy them from bucket to the local agent to be picked up as buildkite artifacts
if gsutil ls "gs://eck-e2e-buildkite-artifacts/jobs/$CLUSTER_NAME/eck-diagnostic*.zip" 2> /dev/null ; then
gsutil cp "gs://eck-e2e-buildkite-artifacts/jobs/$CLUSTER_NAME/eck-diagnostic*.zip" .
gsutil rm "gs://eck-e2e-buildkite-artifacts/jobs/$CLUSTER_NAME/eck-diagnostic*.zip" .
fi
}

main
3 changes: 3 additions & 0 deletions .ci/setenvconfig
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ if [[ ${BUILD_ID:-} != "" ]]; then
VAULT_ROOT_PATH="secret/devops-ci/cloud-on-k8s"

# BUILD_NUMBER is set by Jenkins
# JOB_NAME is set by Jenkins
# GCLOUD_PROJECT, VAULT_ROLE_ID and VAULT_SECRET_ID are set in the Jenkins pipeline definitions

# Buildkite
Expand All @@ -47,6 +48,7 @@ elif [[ ${BUILDKITE_BUILD_ID:-} != "" ]]; then
GCLOUD_PROJECT=$(vault read -field=gcloud_project ${VAULT_ROOT_PATH}/k8s)

BUILD_NUMBER=${BUILDKITE_BUILD_NUMBER}
JOB_NAME=${BUILDKITE_PIPELINE_NAME}

JKS_PARAM_OPERATOR_IMAGE=$(buildkite-agent meta-data get operator-image)

Expand Down Expand Up @@ -296,6 +298,7 @@ MONITORING_SECRETS=${PROJECT_PATH}/.ci/monitoring-secrets.json
PIPELINE=e2e/main
BUILD_NUMBER=$BUILD_NUMBER
JOB_NAME=$JOB_NAME
E2E_PROVIDER=gke
CLUSTER_NAME=${clusterName}
TESTS_MATCH=$TESTS_MATCH
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,6 @@ deploy/eck-stack/charts/*

# Ignore eck-stack lockfile to allow upgrading of dependencies/child charts.
deploy/eck-stack/Chart.lock

# Ignore binary for buildkite pipeline-gen
.buildkite/e2e/pipeline-gen/pipeline-gen
20 changes: 20 additions & 0 deletions config/e2e/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,13 @@ rules:
- "pods/exec"
verbs:
- "create"
# to allow eck-diagnostics to get pod logs
- apiGroups:
- ""
resources:
- "pods/log"
verbs:
- "get"
- apiGroups:
- ""
resources:
Expand Down Expand Up @@ -205,6 +212,19 @@ rules:
- update
- patch
- delete
- apiGroups:
- apm.k8s.elastic.co
resources:
- apmservers
- apmservers/status
verbs:
- get
- list
- watch
- create
- update
- patch
- delete
- apiGroups:
- kibana.k8s.elastic.co
resources:
Expand Down
61 changes: 40 additions & 21 deletions hack/deployer/runner/gke.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ const (
GKEDriverID = "gke"
GKEVaultPath = "ci-gcp-k8s-operator"
GKEServiceAccountVaultFieldName = "service-account"
GoogleCloudProjectCtxKey = "GCloudProject"
DefaultGKERunConfigTemplate = `id: gke-dev
overrides:
clusterName: %s-dev-cluster
Expand Down Expand Up @@ -76,18 +77,18 @@ func (gdf *GKEDriverFactory) Create(plan Plan) (Driver, error) {
return &GKEDriver{
plan: plan,
ctx: map[string]interface{}{
"GCloudProject": plan.Gke.GCloudProject,
"ClusterName": plan.ClusterName,
"PVCPrefix": pvcPrefix,
"PlanId": plan.Id,
"Region": plan.Gke.Region,
"KubernetesVersion": plan.KubernetesVersion,
"MachineType": plan.MachineType,
"LocalSsdCount": plan.Gke.LocalSsdCount,
"GcpScopes": plan.Gke.GcpScopes,
"NodeCountPerZone": plan.Gke.NodeCountPerZone,
"ClusterIPv4CIDR": clusterIPv4CIDR,
"ServicesIPv4CIDR": servicesIPv4CIDR,
GoogleCloudProjectCtxKey: plan.Gke.GCloudProject,
"ClusterName": plan.ClusterName,
"PVCPrefix": pvcPrefix,
"PlanId": plan.Id,
"Region": plan.Gke.Region,
"KubernetesVersion": plan.KubernetesVersion,
"MachineType": plan.MachineType,
"LocalSsdCount": plan.Gke.LocalSsdCount,
"GcpScopes": plan.Gke.GcpScopes,
"NodeCountPerZone": plan.Gke.NodeCountPerZone,
"ClusterIPv4CIDR": clusterIPv4CIDR,
"ServicesIPv4CIDR": servicesIPv4CIDR,
},
vaultClient: c,
}, nil
Expand All @@ -96,7 +97,7 @@ func (gdf *GKEDriverFactory) Create(plan Plan) (Driver, error) {
func (d *GKEDriver) Execute() error {
if err := authToGCP(
d.vaultClient, GKEVaultPath, GKEServiceAccountVaultFieldName,
d.plan.ServiceAccount, false, d.ctx["GCloudProject"],
d.plan.ServiceAccount, false, d.ctx[GoogleCloudProjectCtxKey],
); err != nil {
return err
}
Expand Down Expand Up @@ -311,11 +312,29 @@ func (d *GKEDriver) bindRoles() error {
}

func (d *GKEDriver) GetCredentials() error {
if err := authToGCP(
d.vaultClient, GKEVaultPath, GKEServiceAccountVaultFieldName,
d.plan.ServiceAccount, false, d.ctx["GCloudProject"],
); err != nil {
return err
log.Println("Verifying gcloud authentication...")
// --verbosity flag here disables warnings, and survey output.
out, err := exec.NewCommand(`gcloud auth list --filter=status:ACTIVE --format="value(account)" --verbosity error`).StdoutOnly().OutputList()
if err != nil {
return fmt.Errorf("while retrieving list of credentialed gcloud accounts: %w", err)
}
gcloudProjectInt, ok := d.ctx[GoogleCloudProjectCtxKey]
if !ok {
return fmt.Errorf("while retrieving google cloud project: missing key %s", GoogleCloudProjectCtxKey)
}
gCloudProject, ok := gcloudProjectInt.(string)
if !ok {
return fmt.Errorf("while retrieving google cloud project: key %s was not a string, was %T ", GoogleCloudProjectCtxKey, gcloudProjectInt)
}
// If there's no authenticated user, or the authenticated user doesn't exist in the configured project
// then we need to authenticate with what's within vault.
if len(out) == 0 || (len(out) > 0 && !strings.Contains(out[0], gCloudProject)) {
if err := authToGCP(
d.vaultClient, GKEVaultPath, GKEServiceAccountVaultFieldName,
d.plan.ServiceAccount, false, d.ctx[GoogleCloudProjectCtxKey],
); err != nil {
return fmt.Errorf("while authenticating to GCP: %w", err)
}
}
log.Println("Getting credentials...")
cmd := "gcloud container clusters --project {{.GCloudProject}} get-credentials {{.ClusterName}} --region {{.Region}}"
Expand Down Expand Up @@ -370,9 +389,9 @@ func (d *GKEDriver) deleteDisks(disks []string) error {
cmd := `gcloud compute disks delete {{.Name}} --project {{.GCloudProject}} --zone {{.Zone}} --quiet`
err := exec.NewCommand(cmd).
AsTemplate(map[string]interface{}{
"GCloudProject": d.plan.Gke.GCloudProject,
"Name": name,
"Zone": zone,
GoogleCloudProjectCtxKey: d.plan.Gke.GCloudProject,
"Name": name,
"Zone": zone,
}).
Run()
if err != nil {
Expand Down
22 changes: 11 additions & 11 deletions hack/deployer/runner/ocp.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,17 +158,17 @@ func (d *OCPDriver) Execute() error {
func (d *OCPDriver) create() error {
log.Println("Creating cluster...")
params := map[string]interface{}{
"GCloudProject": d.plan.Ocp.GCloudProject,
"ClusterName": d.plan.ClusterName,
"Region": d.plan.Ocp.Region,
"AdminUsername": d.plan.Ocp.AdminUsername,
"KubernetesVersion": d.plan.KubernetesVersion,
"MachineType": d.plan.MachineType,
"LocalSsdCount": d.plan.Ocp.LocalSsdCount,
"NodeCount": d.plan.Ocp.NodeCount,
"BaseDomain": d.baseDomain(),
"OCPStateBucket": OCPStateBucket,
"PullSecret": d.plan.Ocp.PullSecret,
GoogleCloudProjectCtxKey: d.plan.Ocp.GCloudProject,
"ClusterName": d.plan.ClusterName,
"Region": d.plan.Ocp.Region,
"AdminUsername": d.plan.Ocp.AdminUsername,
"KubernetesVersion": d.plan.KubernetesVersion,
"MachineType": d.plan.MachineType,
"LocalSsdCount": d.plan.Ocp.LocalSsdCount,
"NodeCount": d.plan.Ocp.NodeCount,
"BaseDomain": d.baseDomain(),
"OCPStateBucket": OCPStateBucket,
"PullSecret": d.plan.Ocp.PullSecret,
}
var tpl bytes.Buffer
if err := template.Must(template.New("").Parse(OcpInstallerConfigTemplate)).Execute(&tpl, params); err != nil {
Expand Down
17 changes: 17 additions & 0 deletions test/e2e/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,25 @@ FROM --platform=$TARGETPLATFORM docker.io/library/golang:1.20.2
ARG TARGETPLATFORM
ARG BUILDPLATFORM

ENV GCLOUD_VERSION=381.0.0
ENV ECK_DIAG_VERSION=1.4.0

WORKDIR /go/src/github.com/elastic/cloud-on-k8s

# gcloud sdk to use gsutil
ENV PATH=${PATH}:/usr/local/google-cloud-sdk/bin
RUN curl -fsSLO https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-${GCLOUD_VERSION}-linux-x86_64.tar.gz && \
mkdir -p /usr/local/gcloud && \
tar -zxf google-cloud-sdk-${GCLOUD_VERSION}-linux-x86_64.tar.gz -C /usr/local && \
/usr/local/google-cloud-sdk/install.sh --quiet && \
gcloud config set core/disable_usage_reporting true && \
gcloud config set component_manager/disable_update_check true && \
rm google-cloud-sdk-${GCLOUD_VERSION}-linux-x86_64.tar.gz

RUN curl -fsSLO https://github.com/elastic/eck-diagnostics/releases/download/${ECK_DIAG_VERSION}/eck-diagnostics_${ECK_DIAG_VERSION}_Linux_x86_64.tar.gz && \
tar xzf eck-diagnostics_${ECK_DIAG_VERSION}_Linux_x86_64.tar.gz && \
mv eck-diagnostics /usr/local/bin/eck-diagnostics

# create the go test cache directory
RUN mkdir -p /.cache && chmod 777 /.cache

Expand Down
2 changes: 1 addition & 1 deletion test/e2e/beat/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ func TestBeatConfigRef(t *testing.T) {
esBuilder := elasticsearch.NewBuilder(name).
WithESMasterDataNodes(3, elasticsearch.DefaultResources)

secretName := "fb-config" // nolint:gosec
secretName := "fb-config" //nolint:gosec
agentName := "configref-test-agent"
config := fmt.Sprintf(`
name: %s
Expand Down
2 changes: 2 additions & 0 deletions test/e2e/cmd/run/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ type runFlags struct {
deployChaosJob bool
e2eTags string
testEnvTags []string
gsBucketName string
}

var log logr.Logger
Expand Down Expand Up @@ -108,6 +109,7 @@ func Command() *cobra.Command {
cmd.Flags().BoolVar(&flags.deployChaosJob, "deploy-chaos-job", false, "Deploy the chaos job")
cmd.Flags().StringVar(&flags.e2eTags, "e2e-tags", "e2e", "Go tags to specify a subset of the tests using Go build constraints")
cmd.Flags().StringSliceVar(&flags.testEnvTags, "test-env-tags", nil, "Tags describing the environment for this test run")
cmd.Flags().StringVar(&flags.gsBucketName, "gs-bucket-name", "eck-e2e-buildkite-artifacts", "E2E test Google storage bucket name")
logutil.BindFlags(cmd.PersistentFlags())

// enable setting flags via environment variables
Expand Down
15 changes: 15 additions & 0 deletions test/e2e/cmd/run/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ func (h *helper) initTestContext() error {
TestEnvTags: h.testEnvTags,
E2ETags: h.e2eTags,
LogToFile: h.logToFile,
GSBucketName: h.gsBucketName,
}

for i, ns := range h.managedNamespaces {
Expand Down Expand Up @@ -261,6 +262,20 @@ func (h *helper) initTestSecrets() error {
return err
}

// Only initialize gcp credentials when running in CI
if os.Getenv("CI") == "true" {
b, err := vault.ReadFile(c, vault.SecretFile{
Name: "gcp-credentials.json",
Path: "ci-gcp-k8s-operator",
FieldResolver: func() string { return "service-account" },
})
if err != nil {
return fmt.Errorf("reading gcp credentials: %w", err)
}
h.testSecrets["gcp-credentials.json"] = string(b)
h.testContext.GCPCredentialsPath = "/var/run/secrets/e2e/gcp-credentials.json"
}

if h.testLicense != "" {
bytes, err := vault.ReadFile(c, vault.SecretFile{
Name: h.testLicense,
Expand Down
27 changes: 11 additions & 16 deletions test/e2e/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,17 @@ set -euo pipefail
chaos=${CHAOS:-"false"}

run_e2e_tests() {
# Go's -failfast flag does not prevent tests from other packages from being executed if the package
# list is specified using the "./..." form. This script works around that limitation so that CI jobs
# fail faster.
for PKG in $(go list -tags "$E2E_TAGS" github.com/elastic/cloud-on-k8s/v2/test/e2e/...); do
if [ "${E2E_JSON}" == "true" ]
then
go test -v -failfast -timeout=6h -tags="$E2E_TAGS" -p=1 --json "$PKG" "$@"
else
go test -v -failfast -timeout=6h -tags="$E2E_TAGS" -p=1 "$PKG" "$@"
fi
done

# sleep 1s to allow filebeat to read all logs with 1s max_backoff
# minimizes race condition in filebeat between reading log file and
# stopping reading due to pod termination autodiscover event
sleep 1
if [ "${E2E_JSON}" == "true" ]
then
go test -v -timeout=6h -tags="$E2E_TAGS" -p=1 --json github.com/elastic/cloud-on-k8s/v2/test/e2e/... "$@"
else
go test -v -timeout=6h -tags="$E2E_TAGS" -p=1 github.com/elastic/cloud-on-k8s/v2/test/e2e/... "$@"
fi

# sleep 1s to allow filebeat to read all logs with 1s max_backoff
# minimizes race condition in filebeat between reading log file and
# stopping reading due to pod termination autodiscover event
sleep 1
}

run_chaos() {
Expand Down
2 changes: 2 additions & 0 deletions test/e2e/test/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,11 @@ type Context struct {
Provider string `json:"provider"`
ClusterName string `json:"clusterName"`
KubernetesVersion version.Version `json:"kubernetes_version"`
GCPCredentialsPath string `json:"gcp_credentials_path"`
TestEnvTags []string `json:"test_tags"`
E2ETags string `json:"e2e_tags"`
LogToFile bool `json:"log_to_file"`
GSBucketName string `json:"gs_bucket_name"`
}

// ManagedNamespace returns the nth managed namespace.
Expand Down
Loading

0 comments on commit bd3f88e

Please sign in to comment.