Skip to content
This repository has been archived by the owner on Mar 31, 2023. It is now read-only.

Commit

Permalink
Exclude node being updated from HA check
Browse files Browse the repository at this point in the history
This is a better check than what we did before.

Also compute the quorum instead of assuming there are always 3
control-plane nodes
  • Loading branch information
bboreham committed Jul 28, 2020
1 parent 8f84574 commit 4dc65a3
Showing 1 changed file with 16 additions and 6 deletions.
22 changes: 16 additions & 6 deletions pkg/apis/wksprovider/controller/wksctl/machine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ func (a *MachineController) update(ctx context.Context, c *baremetalspecv1.BareM
contextLog.Infof("........................NEW UPDATE FOR: %s...........................", machine.Name)
isMaster := isMaster(node)
if isMaster {
if err := a.prepareForMasterUpdate(ctx); err != nil {
if err := a.prepareForMasterUpdate(ctx, node); err != nil {
return err
}
}
Expand Down Expand Up @@ -517,9 +517,9 @@ func (a *MachineController) kubeadmUpOrDowngrade(ctx context.Context, machine *c
return nil
}

func (a *MachineController) prepareForMasterUpdate(ctx context.Context) error {
func (a *MachineController) prepareForMasterUpdate(ctx context.Context, node *v1.Node) error {
// Check if it's safe to update a master
if err := a.checkMasterHAConstraint(ctx); err != nil {
if err := a.checkMasterHAConstraint(ctx, node); err != nil {
return gerrors.Wrap(err, "Not enough available master nodes to allow master update")
}
return nil
Expand Down Expand Up @@ -842,23 +842,33 @@ func (a *MachineController) modifyNode(ctx context.Context, node *corev1.Node, u
return nil
}

func (a *MachineController) checkMasterHAConstraint(ctx context.Context) error {
func (a *MachineController) checkMasterHAConstraint(ctx context.Context, nodeBeingUpdated *v1.Node) error {
nodes, err := a.getMasterNodes(ctx)
if err != nil {
// If we can't read the nodes, return the error so we don't
// accidentally flush the sole master
return err
}
avail := 0
quorum := (len(nodes) + 1) / 2
for _, node := range nodes {
if sameNode(nodeBeingUpdated, node) {
continue
}
if hasConditionTrue(node, corev1.NodeReady) && !hasTaint(node, "NoSchedule") {
avail++
if avail > 2 { // We need 2 remaining after we take one offline
if avail >= quorum {
return nil
}
}
}
return errors.New("Fewer than two control-plane nodes would be available")
return fmt.Errorf("Fewer than %d control-plane nodes would be available", quorum)
}

// we compare Nodes by name, because name is required to be unique and
// uids will differ if we manage to delete and recreate the object.
func sameNode(a, b *v1.Node) bool {
return a.Name == b.Name
}

func hasConditionTrue(node *corev1.Node, typ corev1.NodeConditionType) bool {
Expand Down

0 comments on commit 4dc65a3

Please sign in to comment.