Skip to content

Commit

Permalink
Filter terminated pods from node request metrics. (amazon-contributin…
Browse files Browse the repository at this point in the history
  • Loading branch information
jefchien authored and lisguo committed Oct 20, 2023
1 parent 546d245 commit 328502e
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 4 deletions.
11 changes: 7 additions & 4 deletions receiver/awscontainerinsightreceiver/internal/stores/podstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,10 +303,13 @@ func (p *PodStore) refreshInternal(now time.Time, podList []corev1.Pod) {
p.logger.Warn(fmt.Sprintf("podKey is unavailable, refresh pod store for pod %s", pod.Name))
continue
}
tmpCPUReq, _ := getResourceSettingForPod(&pod, p.nodeInfo.getCPUCapacity(), cpuKey, getRequestForContainer)
cpuRequest += tmpCPUReq
tmpMemReq, _ := getResourceSettingForPod(&pod, p.nodeInfo.getMemCapacity(), memoryKey, getRequestForContainer)
memRequest += tmpMemReq
// filter out terminated pods
if pod.Status.Phase != corev1.PodSucceeded && pod.Status.Phase != corev1.PodFailed {
tmpCPUReq, _ := getResourceSettingForPod(&pod, p.nodeInfo.getCPUCapacity(), cpuKey, getRequestForContainer)
cpuRequest += tmpCPUReq
tmpMemReq, _ := getResourceSettingForPod(&pod, p.nodeInfo.getMemCapacity(), memoryKey, getRequestForContainer)
memRequest += tmpMemReq
}
if pod.Status.Phase == corev1.PodRunning {
podCount++
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1141,6 +1141,59 @@ func TestPodStore_decorateNode(t *testing.T) {
assert.Equal(t, uint64(15), metric.GetField("node_status_allocatable_pods").(uint64))
}

func TestPodStore_decorateNode_multiplePodStates(t *testing.T) {
podStore := getPodStore()
defer require.NoError(t, podStore.Shutdown())

tags := map[string]string{ci.MetricType: ci.TypeNode}
fields := map[string]interface{}{
ci.MetricName(ci.TypeNode, ci.CPUTotal): float64(100),
ci.MetricName(ci.TypeNode, ci.CPULimit): uint64(4000),
ci.MetricName(ci.TypeNode, ci.MemWorkingset): float64(100 * 1024 * 1024),
ci.MetricName(ci.TypeNode, ci.MemLimit): uint64(400 * 1024 * 1024),
}
metric := generateMetric(fields, tags)

// terminated pods should not contribute to requests
failedPod := generatePodInfo("./test_resources/pod_in_phase_failed.json")
succeededPod := generatePodInfo("./test_resources/pod_in_phase_succeeded.json")
podList := []corev1.Pod{*failedPod, *succeededPod}
podStore.refreshInternal(time.Now(), podList)
podStore.decorateNode(metric)

assert.Equal(t, uint64(0), metric.GetField("node_cpu_request").(uint64))
assert.Equal(t, uint64(4000), metric.GetField("node_cpu_limit").(uint64))
assert.Equal(t, float64(0), metric.GetField("node_cpu_reserved_capacity").(float64))
assert.Equal(t, float64(100), metric.GetField("node_cpu_usage_total").(float64))

assert.Equal(t, uint64(0), metric.GetField("node_memory_request").(uint64))
assert.Equal(t, uint64(400*1024*1024), metric.GetField("node_memory_limit").(uint64))
assert.Equal(t, float64(0), metric.GetField("node_memory_reserved_capacity").(float64))
assert.Equal(t, float64(100*1024*1024), metric.GetField("node_memory_working_set").(float64))

// non-terminated pods should contribute to requests
pendingPod := generatePodInfo("./test_resources/pod_in_phase_pending.json")
podList = append(podList, *pendingPod)
podStore.refreshInternal(time.Now(), podList)
podStore.decorateNode(metric)
assert.Equal(t, uint64(10), metric.GetField("node_cpu_request").(uint64))
assert.Equal(t, float64(0.25), metric.GetField("node_cpu_reserved_capacity").(float64))

assert.Equal(t, uint64(50*1024*1024), metric.GetField("node_memory_request").(uint64))
assert.Equal(t, float64(12.5), metric.GetField("node_memory_reserved_capacity").(float64))

runningPod := generatePodInfo("./test_resources/pod_in_phase_running.json")
podList = append(podList, *runningPod)
podStore.refreshInternal(time.Now(), podList)
podStore.decorateNode(metric)

assert.Equal(t, uint64(20), metric.GetField("node_cpu_request").(uint64))
assert.Equal(t, float64(0.5), metric.GetField("node_cpu_reserved_capacity").(float64))

assert.Equal(t, uint64(100*1024*1024), metric.GetField("node_memory_request").(uint64))
assert.Equal(t, float64(25), metric.GetField("node_memory_reserved_capacity").(float64))
}

func TestPodStore_Decorate(t *testing.T) {
// not the metrics for decoration
tags := map[string]string{}
Expand Down

0 comments on commit 328502e

Please sign in to comment.