Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(qrm): implement qos resource manager to extend the ability of re… #2

Merged
merged 1 commit into from
Dec 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions api/api-rules/violation_exceptions.list
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,8 @@ API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,V
API rule violation: names_match,k8s.io/kube-proxy/config/v1alpha1,KubeProxyConfiguration,IPTables
API rule violation: names_match,k8s.io/kubelet/config/v1beta1,KubeletConfiguration,IPTablesDropBit
API rule violation: names_match,k8s.io/kubelet/config/v1beta1,KubeletConfiguration,IPTablesMasqueradeBit
API rule violation: names_match,k8s.io/kubelet/config/v1beta1,KubeletConfiguration,QoSResourceManagerReconcilePeriod
API rule violation: names_match,k8s.io/kubelet/config/v1beta1,KubeletConfiguration,QoSResourceManagerResourceNamesMap
API rule violation: names_match,k8s.io/kubelet/config/v1beta1,KubeletConfiguration,ResolverConfig
API rule violation: names_match,k8s.io/metrics/pkg/apis/custom_metrics/v1beta1,MetricValue,WindowSeconds
API rule violation: names_match,k8s.io/metrics/pkg/apis/external_metrics/v1beta1,ExternalMetricValue,WindowSeconds
2 changes: 2 additions & 0 deletions cmd/kubelet/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,8 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig
fs.StringVar(&c.CPUManagerPolicy, "cpu-manager-policy", c.CPUManagerPolicy, "CPU Manager policy to use. Possible values: 'none', 'static'.")
fs.Var(cliflag.NewMapStringStringNoSplit(&c.CPUManagerPolicyOptions), "cpu-manager-policy-options", "A set of key=value CPU Manager policy options to use, to fine tune their behaviour. If not supplied, keep the default behaviour.")
fs.DurationVar(&c.CPUManagerReconcilePeriod.Duration, "cpu-manager-reconcile-period", c.CPUManagerReconcilePeriod.Duration, "<Warning: Alpha feature> CPU Manager reconciliation period. Examples: '10s', or '1m'. If not supplied, defaults to 'NodeStatusUpdateFrequency'")
fs.DurationVar(&c.QoSResourceManagerReconcilePeriod.Duration, "qos-resource-manager-reconcile-period", c.QoSResourceManagerReconcilePeriod.Duration, "<Warning: Alpha feature> QoS Resource Manager reconciliation period. Examples: '10s', or '1m'. If not supplied, defaults to 3s")
fs.Var(cliflag.NewMapStringString(&c.QoSResourceManagerResourceNamesMap), "qos-resource-manager-resource-names-map", "A set of ResourceName=ResourceQuantity (e.g. best-effort-cpu=cpu,best-effort-memory=memory,...) pairs that map resource name \"best-effort-cpu\" to resource name \"cpu\" during QoS Resource Manager allocation period.")
fs.Var(cliflag.NewMapStringString(&c.QOSReserved), "qos-reserved", "<Warning: Alpha feature> A set of ResourceName=Percentage (e.g. memory=50%) pairs that describe how pod resource requests are reserved at the QoS level. Currently only memory is supported. Requires the QOSReserved feature gate to be enabled.")
fs.StringVar(&c.TopologyManagerPolicy, "topology-manager-policy", c.TopologyManagerPolicy, "Topology Manager policy to use. Possible values: 'none', 'best-effort', 'restricted', 'single-numa-node'.")
fs.DurationVar(&c.RuntimeRequestTimeout.Duration, "runtime-request-timeout", c.RuntimeRequestTimeout.Duration, "Timeout of all runtime requests except long running request - pull, logs, exec and attach. When timeout exceeded, kubelet will cancel the request, throw out an error and retry later.")
Expand Down
25 changes: 14 additions & 11 deletions cmd/kubelet/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/stats/pidlimit"
utilfs "k8s.io/kubernetes/pkg/util/filesystem"
"k8s.io/kubernetes/pkg/util/flock"
maputil "k8s.io/kubernetes/pkg/util/maps"
nodeutil "k8s.io/kubernetes/pkg/util/node"
"k8s.io/kubernetes/pkg/util/oom"
"k8s.io/kubernetes/pkg/util/rlimit"
Expand Down Expand Up @@ -725,17 +726,19 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend
ReservedSystemCPUs: reservedSystemCPUs,
HardEvictionThresholds: hardEvictionThresholds,
},
QOSReserved: *experimentalQOSReserved,
ExperimentalCPUManagerPolicy: s.CPUManagerPolicy,
ExperimentalCPUManagerPolicyOptions: cpuManagerPolicyOptions,
ExperimentalCPUManagerReconcilePeriod: s.CPUManagerReconcilePeriod.Duration,
ExperimentalMemoryManagerPolicy: s.MemoryManagerPolicy,
ExperimentalMemoryManagerReservedMemory: s.ReservedMemory,
ExperimentalPodPidsLimit: s.PodPidsLimit,
EnforceCPULimits: s.CPUCFSQuota,
CPUCFSQuotaPeriod: s.CPUCFSQuotaPeriod.Duration,
ExperimentalTopologyManagerPolicy: s.TopologyManagerPolicy,
ExperimentalTopologyManagerScope: s.TopologyManagerScope,
QOSReserved: *experimentalQOSReserved,
ExperimentalCPUManagerPolicy: s.CPUManagerPolicy,
ExperimentalCPUManagerPolicyOptions: cpuManagerPolicyOptions,
ExperimentalCPUManagerReconcilePeriod: s.CPUManagerReconcilePeriod.Duration,
ExperimentalMemoryManagerPolicy: s.MemoryManagerPolicy,
ExperimentalMemoryManagerReservedMemory: s.ReservedMemory,
ExperimentalQoSResourceManagerReconcilePeriod: s.QoSResourceManagerReconcilePeriod.Duration,
QoSResourceManagerResourceNamesMap: maputil.CopySS(s.QoSResourceManagerResourceNamesMap),
ExperimentalPodPidsLimit: s.PodPidsLimit,
EnforceCPULimits: s.CPUCFSQuota,
CPUCFSQuotaPeriod: s.CPUCFSQuotaPeriod.Duration,
ExperimentalTopologyManagerPolicy: s.TopologyManagerPolicy,
ExperimentalTopologyManagerScope: s.TopologyManagerScope,
},
s.FailSwapOn,
devicePluginEnabled,
Expand Down
29 changes: 29 additions & 0 deletions hack/update-generated-resource-plugin-dockerized.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env bash

# Copyright 2017 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This script generates `*/api.pb.go` from the protobuf file `*/api.proto`.
# Example:
# kube::protoc::generate_proto "${RESOURCE_PLUGIN_ALPHA}"

set -o errexit
set -o nounset
set -o pipefail

KUBE_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../" && pwd -P)"
RESOURCE_PLUGIN_ALPHA="${KUBE_ROOT}/staging/src/k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"

source "${KUBE_ROOT}/hack/lib/protoc.sh"
kube::protoc::generate_proto "${RESOURCE_PLUGIN_ALPHA}"
27 changes: 27 additions & 0 deletions hack/update-generated-resource-plugin.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env bash

# Copyright 2017 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -o errexit
set -o nounset
set -o pipefail

KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/..

# NOTE: All output from this script needs to be copied back to the calling
# source tree. This is managed in kube::build::copy_output in build/common.sh.
# If the output set is changed update that function.

"${KUBE_ROOT}/build/run.sh" hack/update-generated-resource-plugin-dockerized.sh "$@"
7 changes: 7 additions & 0 deletions pkg/features/kube_features.go
Original file line number Diff line number Diff line change
Expand Up @@ -882,6 +882,12 @@ const (
//
// Enables support for time zones in CronJobs.
CronJobTimeZone featuregate.Feature = "CronJobTimeZone"

// owner: @sunjianyu
// alpha: v1.18
//
// Enable qos resource managers to make NUMA aligned decisions for resources which aren't devices
QoSResourceManager featuregate.Feature = "QoSResourceManager"
)

func init() {
Expand Down Expand Up @@ -1009,6 +1015,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
MaxUnavailableStatefulSet: {Default: false, PreRelease: featuregate.Alpha},
NetworkPolicyStatus: {Default: false, PreRelease: featuregate.Alpha},
CronJobTimeZone: {Default: false, PreRelease: featuregate.Alpha},
QoSResourceManager: {Default: false, PreRelease: featuregate.Alpha},

// inherited features from generic apiserver, relisted here to get a conflict if it is changed
// unintentionally on either side:
Expand Down
6 changes: 6 additions & 0 deletions pkg/generated/openapi/zz_generated.openapi.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pkg/kubelet/apis/config/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -282,5 +282,6 @@ var (
"ShutdownGracePeriod.Duration",
"ShutdownGracePeriodCriticalPods.Duration",
"MemoryThrottlingFactor",
"QoSResourceManagerReconcilePeriod.Duration",
)
)
6 changes: 6 additions & 0 deletions pkg/kubelet/apis/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,12 @@ type KubeletConfiguration struct {
// MemoryManagerPolicy is the name of the policy to use.
// Requires the MemoryManager feature gate to be enabled.
MemoryManagerPolicy string
// QoS Resource Manager reconciliation period.
// Requires the QoSResourceManager feature gate to be enabled.
QoSResourceManagerReconcilePeriod metav1.Duration
// Map of resource name "A" to resource name "B" during QoS Resource Manager allocation period.
// It's useful for the same kind resource with different types. (eg. maps best-effort-cpu to cpu)
QoSResourceManagerResourceNamesMap map[string]string
// TopologyManagerPolicy is the name of the policy to use.
// Policies other than "none" require the TopologyManager feature gate to be enabled.
TopologyManagerPolicy string
Expand Down
3 changes: 3 additions & 0 deletions pkg/kubelet/apis/config/v1beta1/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,9 @@ func SetDefaults_KubeletConfiguration(obj *kubeletconfigv1beta1.KubeletConfigura
if obj.MemoryManagerPolicy == "" {
obj.MemoryManagerPolicy = kubeletconfigv1beta1.NoneMemoryManagerPolicy
}
if obj.QoSResourceManagerReconcilePeriod == zeroDuration {
obj.QoSResourceManagerReconcilePeriod = metav1.Duration{Duration: 5 * time.Second}
}
if obj.TopologyManagerPolicy == "" {
obj.TopologyManagerPolicy = kubeletconfigv1beta1.NoneTopologyManagerPolicy
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions pkg/kubelet/apis/config/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 32 additions & 20 deletions pkg/kubelet/apis/podresources/server_v1.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,25 +24,29 @@ import (
kubefeatures "k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/metrics"

"k8s.io/kubelet/pkg/apis/podresources/v1"
v1 "k8s.io/kubelet/pkg/apis/podresources/v1"
pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
maputil "k8s.io/kubernetes/pkg/util/maps"
)

// podResourcesServerV1alpha1 implements PodResourcesListerServer
type v1PodResourcesServer struct {
podsProvider PodsProvider
devicesProvider DevicesProvider
cpusProvider CPUsProvider
memoryProvider MemoryProvider
podsProvider PodsProvider
devicesProvider DevicesProvider
cpusProvider CPUsProvider
memoryProvider MemoryProvider
resourcesProvider ResourcesProvider
}

// NewV1PodResourcesServer returns a PodResourcesListerServer which lists pods provided by the PodsProvider
// with device information provided by the DevicesProvider
func NewV1PodResourcesServer(podsProvider PodsProvider, devicesProvider DevicesProvider, cpusProvider CPUsProvider, memoryProvider MemoryProvider) v1.PodResourcesListerServer {
func NewV1PodResourcesServer(podsProvider PodsProvider, devicesProvider DevicesProvider, cpusProvider CPUsProvider, memoryProvider MemoryProvider, resourcesProvider ResourcesProvider) v1.PodResourcesListerServer {
return &v1PodResourcesServer{
podsProvider: podsProvider,
devicesProvider: devicesProvider,
cpusProvider: cpusProvider,
memoryProvider: memoryProvider,
podsProvider: podsProvider,
devicesProvider: devicesProvider,
cpusProvider: cpusProvider,
memoryProvider: memoryProvider,
resourcesProvider: resourcesProvider,
}
}

Expand All @@ -54,25 +58,32 @@ func (p *v1PodResourcesServer) List(ctx context.Context, req *v1.ListPodResource
pods := p.podsProvider.GetPods()
podResources := make([]*v1.PodResources, len(pods))
p.devicesProvider.UpdateAllocatedDevices()
p.resourcesProvider.UpdateAllocatedResources()

for i, pod := range pods {
pRes := v1.PodResources{
Name: pod.Name,
Namespace: pod.Namespace,
Containers: make([]*v1.ContainerResources, len(pod.Spec.Containers)),
Name: pod.Name,
Namespace: pod.Namespace,
PodRole: pod.Labels[pluginapi.PodRoleLabelKey],
PodType: pod.Annotations[pluginapi.PodTypeAnnotationKey],
Labels: maputil.CopySS(pod.Labels),
Annotations: maputil.CopySS(pod.Annotations),
Containers: make([]*v1.ContainerResources, len(pod.Spec.Containers)),
}

for j, container := range pod.Spec.Containers {
pRes.Containers[j] = &v1.ContainerResources{
Name: container.Name,
Devices: p.devicesProvider.GetDevices(string(pod.UID), container.Name),
CpuIds: p.cpusProvider.GetCPUs(string(pod.UID), container.Name),
Memory: p.memoryProvider.GetMemory(string(pod.UID), container.Name),
Name: container.Name,
Devices: p.devicesProvider.GetDevices(string(pod.UID), container.Name),
CpuIds: p.cpusProvider.GetCPUs(string(pod.UID), container.Name),
Memory: p.memoryProvider.GetMemory(string(pod.UID), container.Name),
Resources: p.resourcesProvider.GetTopologyAwareResources(pod, &container),
}
}
podResources[i] = &pRes
}

// [TODO](sunjianyu): List always return error nil, but what if resource plugin rpc call gets error and Resources in ContainerResources is nil?
return &v1.ListPodResourcesResponse{
PodResources: podResources,
}, nil
Expand All @@ -91,8 +102,9 @@ func (p *v1PodResourcesServer) GetAllocatableResources(ctx context.Context, req
metrics.PodResourcesEndpointRequestsTotalCount.WithLabelValues("v1").Inc()

return &v1.AllocatableResourcesResponse{
Devices: p.devicesProvider.GetAllocatableDevices(),
CpuIds: p.cpusProvider.GetAllocatableCPUs(),
Memory: p.memoryProvider.GetAllocatableMemory(),
Devices: p.devicesProvider.GetAllocatableDevices(),
CpuIds: p.cpusProvider.GetAllocatableCPUs(),
Memory: p.memoryProvider.GetAllocatableMemory(),
Resources: p.resourcesProvider.GetTopologyAwareAllocatableResources(),
}, nil
}
10 changes: 10 additions & 0 deletions pkg/kubelet/apis/podresources/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,13 @@ type MemoryProvider interface {
// GetAllocatableMemory returns the allocatable memory from the node
GetAllocatableMemory() []*podresourcesapi.ContainerMemory
}

// ResourcesProvider knows how to provide the resources used by the given container
type ResourcesProvider interface {
// UpdateAllocatedResources frees any Resources that are bound to terminated pods.
UpdateAllocatedResources()
// GetResources returns information about the resources assigned to pods and containers in topology aware format
GetTopologyAwareResources(pod *v1.Pod, container *v1.Container) []*podresourcesapi.TopologyAwareResource
// GetAllocatableResources returns information about all the resources known to the manager in topology aware format
GetTopologyAwareAllocatableResources() []*podresourcesapi.AllocatableTopologyAwareResource
}
Loading