Skip to content

Commit

Permalink
Add topologySpreadConstraints configuration to pod spec.
Browse files Browse the repository at this point in the history
  • Loading branch information
laiminhtrung1997 committed Jul 9, 2024
1 parent 37d6993 commit 4b34781
Show file tree
Hide file tree
Showing 9 changed files with 182 additions and 13 deletions.
96 changes: 85 additions & 11 deletions e2e/tests/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ def compare_config():

pg_patch_config["spec"]["patroni"]["slots"][slot_to_change]["database"] = "bar"
del pg_patch_config["spec"]["patroni"]["slots"][slot_to_remove]

k8s.api.custom_objects_api.patch_namespaced_custom_object(
"acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_delete_slot_patch)

Expand All @@ -573,7 +573,7 @@ def compare_config():

self.eventuallyEqual(lambda: self.query_database(leader.metadata.name, "postgres", get_slot_query%("database", slot_to_change))[0], "bar",
"The replication slot cannot be updated", 10, 5)

# make sure slot from Patroni didn't get deleted
self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", get_slot_query%("slot_name", patroni_slot))), 1,
"The replication slot from Patroni gets deleted", 10, 5)
Expand Down Expand Up @@ -929,7 +929,7 @@ def test_ignored_annotations(self):
},
}
}

old_sts_creation_timestamp = sts.metadata.creation_timestamp
k8s.api.apps_v1.patch_namespaced_stateful_set(sts.metadata.name, sts.metadata.namespace, annotation_patch)
old_svc_creation_timestamp = svc.metadata.creation_timestamp
Expand Down Expand Up @@ -1254,7 +1254,7 @@ def test_persistent_volume_claim_retention_policy(self):
}
k8s.update_config(patch_scaled_policy_retain)
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")

# decrease the number of instances
k8s.api.custom_objects_api.patch_namespaced_custom_object(
'acid.zalan.do', 'v1', 'default', 'postgresqls', 'acid-minimal-cluster', pg_patch_scale_down_instances)
Expand Down Expand Up @@ -1537,7 +1537,6 @@ def test_node_readiness_label(self):
# toggle pod anti affinity to move replica away from master node
self.assert_distributed_pods(master_nodes)


@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_overwrite_pooler_deployment(self):
pooler_name = 'acid-minimal-cluster-pooler'
Expand Down Expand Up @@ -1622,7 +1621,7 @@ def test_password_rotation(self):
},
}
k8s.api.core_v1.patch_namespaced_secret(
name="foo-user.acid-minimal-cluster.credentials.postgresql.acid.zalan.do",
name="foo-user.acid-minimal-cluster.credentials.postgresql.acid.zalan.do",
namespace="default",
body=secret_fake_rotation)

Expand All @@ -1638,7 +1637,7 @@ def test_password_rotation(self):
"data": {
"enable_password_rotation": "true",
"password_rotation_interval": "30",
"password_rotation_user_retention": "30", # should be set to 60
"password_rotation_user_retention": "30", # should be set to 60
},
}
k8s.update_config(enable_password_rotation)
Expand Down Expand Up @@ -1691,7 +1690,7 @@ def test_password_rotation(self):
"Unexpected username in secret of test.db_user: expected {}, got {}".format("test.db_user", secret_username))

# disable password rotation for all other users (foo_user)
# and pick smaller intervals to see if the third fake rotation user is dropped
# and pick smaller intervals to see if the third fake rotation user is dropped
enable_password_rotation = {
"data": {
"enable_password_rotation": "false",
Expand Down Expand Up @@ -2041,6 +2040,81 @@ def test_taint_based_eviction(self):
# toggle pod anti affinity to move replica away from master node
self.assert_distributed_pods(master_nodes)

@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_topology_spread_constraints(self):
'''
Enable topologySpreadConstraints for pods
'''
k8s = self.k8s
cluster_labels = 'application=spilo,cluster-name=acid-minimal-cluster'

# Verify we are in good state from potential previous tests
self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running")

master_nodes, replica_nodes = k8s.get_cluster_nodes()
self.assertNotEqual(master_nodes, [])
self.assertNotEqual(replica_nodes, [])

patch_node_label = {
"metadata": {
"labels": {
"topology.kubernetes.io/zone": "zalando"
}
}
}

k8s.api.core_v1.patch_node(master_nodes[0], patch_node_label)
k8s.api.core_v1.patch_node(replica_nodes[0], patch_node_label)

# add toleration to pods
patch_toleration_config = {
"data": {
"toleration": "key:postgres,operator:Exists,effect:NoExecute"
}
}

try:
k8s.update_config(patch_toleration_config, step="allow tainted nodes")
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, "No 2 pods running")

# Delete master pod to re-scheduled to replica node
master_pod = k8s.get_cluster_leader_pod()
k8s.api.core_v1.delete_namespaced_pod(master_pod.metadata.name, 'default')

# Wait for new replica pod re-scheduled to replica node
k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_labels)
k8s.wait_for_pod_start('spilo-role=master,' + cluster_labels)
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_labels)

except timeout_decorator.TimeoutError:
print('Operator log: {}'.format(k8s.get_operator_log()))
raise

# Assert master pod and replica pod are in the same node
master_nodes, replica_nodes = k8s.get_cluster_nodes()
try:
self.assertEqual(master_nodes[0], replica_nodes[0])
except:
print('Operator log: {}'.format(k8s.get_operator_log()))
raise

patch_enable_topology_spread_constraints = {
"data": {
"enable_postgres_topology_spread_constraints": "true"
}
}

k8s.update_config(patch_enable_topology_spread_constraints, "enable topologySpreadConstraints")
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")

# Assert master pod and replica pod are spread in two diffrence nodes
master_nodes, replica_nodes = k8s.get_cluster_nodes()
self.assertNotEqual(master_nodes[0], replica_nodes[0])

# Reset taints and tolerations
k8s.api.core_v1.patch_node(master_nodes[0], {"spec": {"taints": []}})

@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_zz_cluster_deletion(self):
'''
Expand Down Expand Up @@ -2158,7 +2232,7 @@ def assert_distributed_pods(self, target_nodes, cluster_labels='cluster-name=aci

# if nodes are different we can quit here
if master_nodes[0] not in replica_nodes:
return True
return True

# enable pod anti affintiy in config map which should trigger movement of replica
patch_enable_antiaffinity = {
Expand All @@ -2182,7 +2256,7 @@ def assert_distributed_pods(self, target_nodes, cluster_labels='cluster-name=aci
}
k8s.update_config(patch_disable_antiaffinity, "disable antiaffinity")
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")

k8s.wait_for_pod_start('spilo-role=replica,' + cluster_labels)
k8s.wait_for_running_pods(cluster_labels, 2)

Expand All @@ -2193,7 +2267,7 @@ def assert_distributed_pods(self, target_nodes, cluster_labels='cluster-name=aci
# if nodes are different we can quit here
for target_node in target_nodes:
if (target_node not in master_nodes or target_node not in replica_nodes) and master_nodes[0] in replica_nodes:
print('Pods run on the same node')
print('Pods run on the same node')
return False

except timeout_decorator.TimeoutError:
Expand Down
6 changes: 6 additions & 0 deletions manifests/postgresql.crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,12 @@ spec:
- PreferNoSchedule
tolerationSeconds:
type: integer
topologySpreadConstraints:
type: array
nullable: true
items:
type: object
x-kubernetes-preserve-unknown-fields: true
useLoadBalancer:
type: boolean
description: deprecated
Expand Down
10 changes: 10 additions & 0 deletions pkg/apis/acid.zalan.do/v1/crds.go
Original file line number Diff line number Diff line change
Expand Up @@ -898,6 +898,16 @@ var PostgresCRDResourceValidation = apiextv1.CustomResourceValidation{
},
},
},
"topologySpreadConstraints": {
Type: "array",
Nullable: true,
Items: &apiextv1.JSONSchemaPropsOrArray{
Schema: &apiextv1.JSONSchemaProps{
Type: "object",
XPreserveUnknownFields: util.True(),
},
},
},
"useLoadBalancer": {
Type: "boolean",
Description: "deprecated",
Expand Down
1 change: 1 addition & 0 deletions pkg/apis/acid.zalan.do/v1/operator_configuration_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ type KubernetesMetaConfiguration struct {
EnableReadinessProbe bool `json:"enable_readiness_probe,omitempty"`
EnableCrossNamespaceSecret bool `json:"enable_cross_namespace_secret,omitempty"`
EnableFinalizers *bool `json:"enable_finalizers,omitempty"`
EnablePostgresTopologySpreadConstraints bool `json:"enable_postgres_topology_spread_constraints,omitempty"`
}

// PostgresPodResourcesDefaults defines the spec of default resources
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/acid.zalan.do/v1/postgresql_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ type PostgresSpec struct {
// deprecated json tags
InitContainersOld []v1.Container `json:"init_containers,omitempty"`
PodPriorityClassNameOld string `json:"pod_priority_class_name,omitempty"`

AdditionalTopologySpreadConstraints []v1.TopologySpreadConstraint `json:"additionalTopologySpreadConstraints,omitempty"`
}

// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
Expand Down
5 changes: 5 additions & 0 deletions pkg/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,11 @@ func (c *Cluster) compareStatefulSetWith(statefulSet *appsv1.StatefulSet) *compa
needsRollUpdate = true
reasons = append(reasons, "new statefulset's pod affinity does not match the current one")
}
if !reflect.DeepEqual(c.Statefulset.Spec.Template.Spec.TopologySpreadConstraints, statefulSet.Spec.Template.Spec.TopologySpreadConstraints) {
needsReplace = true
needsRollUpdate = true
reasons = append(reasons, "new statefulset's pod topologySpreadConstraints does not match the current one")
}
if len(c.Statefulset.Spec.Template.Spec.Tolerations) != len(statefulSet.Spec.Template.Spec.Tolerations) {
needsReplace = true
needsRollUpdate = true
Expand Down
30 changes: 28 additions & 2 deletions pkg/cluster/k8sres.go
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,22 @@ func generatePodAntiAffinity(podAffinityTerm v1.PodAffinityTerm, preferredDuring
return podAntiAffinity
}

func generateTopologySpreadConstraints(labels labels.Set, additionalTopologySpreadConstraints []v1.TopologySpreadConstraint) []v1.TopologySpreadConstraint {
topologySpreadConstraint := v1.TopologySpreadConstraint{
MaxSkew: int32(1),
TopologyKey: "topology.kubernetes.io/zone",
WhenUnsatisfiable: v1.DoNotSchedule,
LabelSelector: &metav1.LabelSelector{
MatchLabels: labels,
},
}
topologySpreadConstraints := []v1.TopologySpreadConstraint{topologySpreadConstraint}
if len(additionalTopologySpreadConstraints) > 0 {
topologySpreadConstraints = append(topologySpreadConstraints, additionalTopologySpreadConstraints...)
}
return topologySpreadConstraints
}

func tolerations(tolerationsSpec *[]v1.Toleration, podToleration map[string]string) []v1.Toleration {
// allow to override tolerations by postgresql manifest
if len(*tolerationsSpec) > 0 {
Expand Down Expand Up @@ -832,6 +848,8 @@ func (c *Cluster) generatePodTemplate(
additionalSecretMount string,
additionalSecretMountPath string,
additionalVolumes []acidv1.AdditionalVolume,
topologySpreadConstraints bool,
additionalTopologySpreadConstraints []v1.TopologySpreadConstraint,
) (*v1.PodTemplateSpec, error) {

terminateGracePeriodSeconds := terminateGracePeriod
Expand Down Expand Up @@ -884,6 +902,10 @@ func (c *Cluster) generatePodTemplate(
podSpec.PriorityClassName = priorityClassName
}

if topologySpreadConstraints {
podSpec.TopologySpreadConstraints = generateTopologySpreadConstraints(labels, additionalTopologySpreadConstraints)
}

if sharePgSocketWithSidecars != nil && *sharePgSocketWithSidecars {
addVarRunVolume(&podSpec)
}
Expand Down Expand Up @@ -1487,7 +1509,9 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef
c.OpConfig.PodAntiAffinityPreferredDuringScheduling,
c.OpConfig.AdditionalSecretMount,
c.OpConfig.AdditionalSecretMountPath,
additionalVolumes)
additionalVolumes,
c.OpConfig.EnablePostgresTopologySpreadConstraints,
spec.AdditionalTopologySpreadConstraints)

if err != nil {
return nil, fmt.Errorf("could not generate pod template: %v", err)
Expand Down Expand Up @@ -2334,7 +2358,9 @@ func (c *Cluster) generateLogicalBackupJob() (*batchv1.CronJob, error) {
false,
c.OpConfig.AdditionalSecretMount,
c.OpConfig.AdditionalSecretMountPath,
[]acidv1.AdditionalVolume{}); err != nil {
[]acidv1.AdditionalVolume{},
true,
[]v1.TopologySpreadConstraint{}); err != nil {
return nil, fmt.Errorf("could not generate pod template for logical backup pod: %v", err)
}

Expand Down
44 changes: 44 additions & 0 deletions pkg/cluster/k8sres_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3795,3 +3795,47 @@ func TestGenerateCapabilities(t *testing.T) {
}
}
}

func TestTopologySpreadConstraints(t *testing.T) {
clusterName := "acid-test-cluster"
namespace := "default"

pg := acidv1.Postgresql{
ObjectMeta: metav1.ObjectMeta{
Name: clusterName,
Namespace: namespace,
},
Spec: acidv1.PostgresSpec{
NumberOfInstances: 1,
Resources: &acidv1.Resources{
ResourceRequests: acidv1.ResourceDescription{CPU: k8sutil.StringToPointer("1"), Memory: k8sutil.StringToPointer("10")},
ResourceLimits: acidv1.ResourceDescription{CPU: k8sutil.StringToPointer("1"), Memory: k8sutil.StringToPointer("10")},
},
Volume: acidv1.Volume{
Size: "1G",
},
},
}

cluster := New(
Config{
OpConfig: config.Config{
PodManagementPolicy: "ordered_ready",
EnablePostgresTopologySpreadConstraints: true,
},
}, k8sutil.KubernetesClient{}, acidv1.Postgresql{}, logger, eventRecorder)
cluster.Name = clusterName
cluster.Namespace = namespace
cluster.labelsSet(true)

s, err := cluster.generateStatefulSet(&pg.Spec)
assert.NoError(t, err)
assert.Contains(t, s.Spec.Template.Spec.TopologySpreadConstraints, v1.TopologySpreadConstraint{
MaxSkew: int32(1),
TopologyKey: "topology.kubernetes.io/zone",
WhenUnsatisfiable: v1.DoNotSchedule,
LabelSelector: &metav1.LabelSelector{
MatchLabels: cluster.labelsSet(true),
},
})
}
1 change: 1 addition & 0 deletions pkg/util/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ type Config struct {
EnableSecretsDeletion *bool `name:"enable_secrets_deletion" default:"true"`
EnablePersistentVolumeClaimDeletion *bool `name:"enable_persistent_volume_claim_deletion" default:"true"`
PersistentVolumeClaimRetentionPolicy map[string]string `name:"persistent_volume_claim_retention_policy" default:"when_deleted:retain,when_scaled:retain"`
EnablePostgresTopologySpreadConstraints bool `json:"enable_postgres_topology_spread_constraints,omitempty"`
}

// MustMarshal marshals the config or panics
Expand Down

0 comments on commit 4b34781

Please sign in to comment.