Skip to content

Commit

Permalink
experimental: l3 group metrics
Browse files Browse the repository at this point in the history
wire in the l3 group alignment metrics

Signed-off-by: Francesco Romani <fromani@redhat.com>
  • Loading branch information
ffromani committed Sep 19, 2024
1 parent 672e835 commit 54faf44
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 11 deletions.
26 changes: 18 additions & 8 deletions pkg/kubelet/cm/cpumanager/cpu_assignment.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"k8s.io/klog/v2"

"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
"k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/utils/cpuset"
)

Expand Down Expand Up @@ -102,7 +103,7 @@ func min(x, y int) int {
type numaOrSocketsFirstFuncs interface {
takeFullFirstLevel()
takeFullSecondLevel()
takeThirdLevel()
takeThirdLevel() (int, int)
sortAvailableNUMANodes() []int
sortAvailableSockets() []int
sortAvailableUnCoreCaches() []int
Expand All @@ -128,8 +129,8 @@ func (n *numaFirst) takeFullSecondLevel() {
}

// In Split L3 Topology, we take from the sets of uncorecache as the third level
func (n *numaFirst) takeThirdLevel() {
n.acc.takeUnCoreCache()
func (n *numaFirst) takeThirdLevel() (int, int) {
return n.acc.takeUnCoreCache()
}

// If NUMA nodes are higher in the memory hierarchy than sockets, then just
Expand Down Expand Up @@ -189,8 +190,8 @@ func (s *socketsFirst) takeFullSecondLevel() {
s.acc.takeFullNUMANodes()
}

func (s *socketsFirst) takeThirdLevel() {
s.acc.takeUnCoreCache()
func (s *socketsFirst) takeThirdLevel() (int, int) {
return s.acc.takeUnCoreCache()
}

// If sockets are higher in the memory hierarchy than NUMA nodes, then we need
Expand Down Expand Up @@ -430,8 +431,10 @@ func (a *cpuAccumulator) takeFullUnCore() {

// First try to take partial uncorecache (CCD), if available and the request size can fit w/in the uncorecache.
// Second try to take the full CCD if available and need is at least the size of the uncorecache group.
func (a *cpuAccumulator) takeUnCoreCache() {
func (a *cpuAccumulator) takeUnCoreCache() (int, int) {
// check if SMT ON
part := 0
full := 0

for _, uncore := range a.allUnCoreCache() {
numCoresNeeded := a.numCPUsNeeded / a.topo.CPUsPerCore() // this is another new change
Expand All @@ -447,14 +450,18 @@ func (a *cpuAccumulator) takeUnCoreCache() {
if a.numCPUsNeeded == freeCPUsInUncorecache.Size() {
klog.V(4).InfoS("takePartialUncore: claiming cores from Uncorecache ID", "uncore", uncore)
a.take(freeCPUsInUncorecache)
part += 1
}
// take full Uncorecache if the numCPUsNeeded is greater the L3 cache size
a.takeFullUnCore()
full += 1

if a.isSatisfied() {
return
return part, full
}
}

return part, full
}

func (a *cpuAccumulator) takeFullCores() {
Expand Down Expand Up @@ -614,8 +621,11 @@ func takeByTopologyUnCoreCachePacked(topo *topology.CPUTopology, availableCPUs c

// 2. Acquire partial uncorecache, if there are enough CPUs available to satisfy the container requirement
// Acquire the full uncorecache, if available and the container requires at least all the CPUs in the uncorecache grouping
acc.numaOrSocketsFirst.takeThirdLevel()
part, full := acc.numaOrSocketsFirst.takeThirdLevel()
if acc.isSatisfied() {
if part == 0 && full > 0 {
metrics.ContainerAlignedComputeResources.WithLabelValues(metrics.AlignedL3CacheGroup).Inc()
}
return acc.result, nil
}

Expand Down
7 changes: 4 additions & 3 deletions pkg/kubelet/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,9 @@ const (
InitContainer = "init_container"
EphemeralContainer = "ephemeral_container"

AlignedPhysicalCPU = "physical_cpu"
AlignedNUMAZone = "numa_zone"
AlignedPhysicalCPU = "physical_cpu"
AlignedNUMAZone = "numa_zone"
AlignedL3CacheGroup = "l3_cache_group"
)

var (
Expand Down Expand Up @@ -836,7 +837,7 @@ var (
Help: "Cumulative number of aligned compute resources allocated to containers by alignment type.",
StabilityLevel: metrics.ALPHA,
},
[]string{"physical_cpu", "numa_zone"},
[]string{"physical_cpu", "l3_cache_group", "numa_zone"},
)
)

Expand Down

0 comments on commit 54faf44

Please sign in to comment.