From 15c1371bf6aa90b3fa4f2f9c2ddf505445713632 Mon Sep 17 00:00:00 2001
From: John Kyros <jkyros@redhat.com>
Date: Fri, 15 Dec 2023 18:21:38 -0600
Subject: [PATCH] Make cpu scaler test wait for metrics window

The CPU scaler test assumes a default metrics window of 30s, so those
testing on platforms where it is set to a larger value will potentially
fail the CPU scaler test because the metrics won't be ready by the time
the test starts.

This:
- Adds a helper that waits for either the metrics to show up in the
HPA, or for some amount of time to pass, whichever happens first
- Uses said helper to ensure that the metrics are ready before the CPU
test starts testing scaling

Signed-off-by: John Kyros <jkyros@redhat.com>
---
 CHANGELOG.md                  |  1 +
 tests/helper/helper.go        | 28 ++++++++++++++++++++++++++++
 tests/scalers/cpu/cpu_test.go | 10 ++++++++++
 3 files changed, 39 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 508ce44ade3..f1c375cc3b2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -65,6 +65,7 @@ Here is an overview of all new **experimental** features:
 - **General**: Add validations for replica counts when creating ScaledObjects ([#5288](https://github.com/kedacore/keda/issues/5288))
 - **General**: Support TriggerAuthentication properties from ConfigMap ([#4830](https://github.com/kedacore/keda/issues/4830))
 - **General**: Use client-side round-robin load balancing for grpc calls ([#5224](https://github.com/kedacore/keda/issues/5224))
+- **CPU scaler**: Wait for metrics window during CPU scaler test ([#0000](https://github.com/kedacore/keda/pull/0000))
 - **GCP pubsub scaler**: Support distribution-valued metrics and metrics from topics ([#5070](https://github.com/kedacore/keda/issues/5070))
 - **Hashicorp Vault**: Add support to get secret that needs write operation (e.g. pki) ([#5067](https://github.com/kedacore/keda/issues/5067))
 - **Hashicorp Vault**: Fix operator panic when spec.hashiCorpVault.credential.serviceAccount is not set ([#4964](https://github.com/kedacore/keda/issues/4964))
diff --git a/tests/helper/helper.go b/tests/helper/helper.go
index 78d264da7b7..e0c8f734cc7 100644
--- a/tests/helper/helper.go
+++ b/tests/helper/helper.go
@@ -10,6 +10,7 @@ import (
 	"crypto/rsa"
 	"crypto/x509"
 	"crypto/x509/pkix"
+	"encoding/json"
 	"encoding/pem"
 	"fmt"
 	"io"
@@ -403,6 +404,33 @@ func WaitForAllPodRunningInNamespace(t *testing.T, kc *kubernetes.Clientset, nam
 	return false
 }
 
+// Waits until the Horizontal Pod Autoscaler for the scaledObject reports that it has metrics available
+// to calculate, or until the number of iterations are done, whichever happens first.
+func WaitForHPAMetricsToPopulate(t *testing.T, kc *kubernetes.Clientset, name, namespace string,
+	iterations, intervalSeconds int) bool {
+	totalWaitDuration := time.Duration(iterations) * time.Duration(intervalSeconds) * time.Second
+	startedWaiting := time.Now()
+	for i := 0; i < iterations; i++ {
+		t.Logf("Waiting up to %s for HPA to populate metrics - %s so far", totalWaitDuration, time.Since(startedWaiting).Round(time.Second))
+
+		hpa, _ := kc.AutoscalingV2().HorizontalPodAutoscalers(namespace).Get(context.Background(), name, metav1.GetOptions{})
+		if hpa.Status.CurrentMetrics != nil {
+			for _, currentMetric := range hpa.Status.CurrentMetrics {
+				// When testing on a kind cluster at least, an empty metricStatus object with a blank type shows up first,
+				// so we need to make sure we have *actual* resource metrics before we return
+				if currentMetric.Type != "" {
+					j, _ := json.MarshalIndent(hpa.Status.CurrentMetrics, "  ", "    ")
+					t.Logf("HPA has metrics after %s: %s", time.Since(startedWaiting), j)
+					return true
+				}
+			}
+		}
+
+		time.Sleep(time.Duration(intervalSeconds) * time.Second)
+	}
+	return false
+}
+
 // Waits until deployment ready replica count hits target or number of iterations are done.
 func WaitForDeploymentReplicaReadyCount(t *testing.T, kc *kubernetes.Clientset, name, namespace string,
 	target, iterations, intervalSeconds int) bool {
diff --git a/tests/scalers/cpu/cpu_test.go b/tests/scalers/cpu/cpu_test.go
index 1f1000ad019..f24922dc61d 100644
--- a/tests/scalers/cpu/cpu_test.go
+++ b/tests/scalers/cpu/cpu_test.go
@@ -9,6 +9,7 @@ import (
 
 	"github.com/joho/godotenv"
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 	"k8s.io/client-go/kubernetes"
 
 	. "github.com/kedacore/keda/v2/tests/helper"
@@ -28,6 +29,7 @@ var (
 	testNamespace          = fmt.Sprintf("%s-ns", testName)
 	deploymentName         = fmt.Sprintf("%s-deployment", testName)
 	scaledObjectName       = fmt.Sprintf("%s-so", testName)
+	hpaName                = fmt.Sprintf("keda-hpa-%s-so", testName)
 )
 
 type templateData struct {
@@ -197,6 +199,14 @@ func scaleOut(t *testing.T, kc *kubernetes.Clientset, data templateData) {
 	assert.True(t, WaitForDeploymentReplicaReadyCount(t, kc, deploymentName, testNamespace, 1, 60, 1),
 		"Replica count should start out as 1")
 
+	// The default metrics-server window is 30s, and that's what keda is used to, but some platforms use things like
+	// prometheus-adapter, and have the window tuned to a larger window of say 5m. In that case it takes 5 minutes before
+	// the HPA can even start scaling, and as a result we'll fail this test unless we wait for the metrics before we start.
+	// We'd read the window straight from the metrics-server config, but we'd have to know too much about unusual configurations,
+	// so we just wait up to 10 minutes for the metrics (wherever they're coming from) before we proceed with the test.
+	require.True(t, WaitForHPAMetricsToPopulate(t, kc, hpaName, testNamespace, 120, 5),
+		"HPA should populate metrics within 10 minutes")
+
 	t.Log("--- testing scale out ---")
 	t.Log("--- applying job ---")