Skip to content

Commit

Permalink
Implement Refresh Worker Certificates Logic (#65)
Browse files Browse the repository at this point in the history
  • Loading branch information
mateoflorido authored Oct 25, 2024
1 parent 1e5230f commit d7c73a4
Show file tree
Hide file tree
Showing 9 changed files with 532 additions and 100 deletions.
1 change: 1 addition & 0 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ jobs:
- "Workload cluster creation"
- "Workload cluster scaling"
- "Workload cluster upgrade"
- "Certificate Refresh"
- "Orchestrated In place upgrades"
# TODO(ben): Remove once all tests are running stable.
fail-fast: false
Expand Down
9 changes: 8 additions & 1 deletion bootstrap/api/v1beta2/certificates_refresh_consts.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
package v1beta2

const (
CertificatesRefreshAnnotation = "v1beta2.k8sd.io/refresh-certificates"
CertificatesRefreshAnnotation = "v1beta2.k8sd.io/refresh-certificates"
CertificatesRefreshStatusAnnotation = "v1beta2.k8sd.io/refresh-certificates-status"
)

const (
CertificatesRefreshInProgressStatus = "in-progress"
CertificatesRefreshDoneStatus = "done"
CertificatesRefreshFailedStatus = "failed"
)

const (
Expand Down
192 changes: 119 additions & 73 deletions bootstrap/controllers/certificates_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,25 @@ import (
// CertificatesReconciler reconciles a Machine's certificates.
type CertificatesReconciler struct {
client.Client
Log logr.Logger
Scheme *runtime.Scheme
recorder record.EventRecorder

K8sdDialTimeout time.Duration

Log logr.Logger
Scheme *runtime.Scheme
recorder record.EventRecorder
K8sdDialTimeout time.Duration
managementCluster ck8s.ManagementCluster
}

type CertificatesScope struct {
Cluster *clusterv1.Cluster
Config *bootstrapv1.CK8sConfig
Log logr.Logger
Machine *clusterv1.Machine
Patcher *patch.Helper
Workload *ck8s.Workload
}

// SetupWithManager sets up the controller with the Manager.
func (r *CertificatesReconciler) SetupWithManager(mgr ctrl.Manager) error {
if _, err := ctrl.NewControllerManagedBy(mgr).For(&clusterv1.Machine{}).Build(r); err != nil {
if err := ctrl.NewControllerManagedBy(mgr).For(&clusterv1.Machine{}).Complete(r); err != nil {
return err
}

Expand All @@ -54,15 +61,6 @@ func (r *CertificatesReconciler) SetupWithManager(mgr ctrl.Manager) error {
return nil
}

type CertificatesScope struct {
Cluster *clusterv1.Cluster
Config *bootstrapv1.CK8sConfig
Log logr.Logger
Machine *clusterv1.Machine
Patcher *patch.Helper
Workload *ck8s.Workload
}

// +kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=ck8sconfigs,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=ck8sconfigs/status,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status;machines;machines/status,verbs=get;list;watch
Expand All @@ -77,97 +75,133 @@ func (r *CertificatesReconciler) Reconcile(ctx context.Context, req ctrl.Request
if apierrors.IsNotFound(err) {
return ctrl.Result{}, nil
}
// Error reading the object - requeue the request.
return ctrl.Result{}, err
}

if m.Status.NodeRef == nil {
// If the machine does not have a node ref, we requeue the request to retry.
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
}

if !m.ObjectMeta.DeletionTimestamp.IsZero() {
// Machine is being deleted, return early.
return ctrl.Result{}, nil
}

mAnnotations := m.GetAnnotations()
if mAnnotations == nil {
mAnnotations = map[string]string{}
}

var refreshCertificates, hasExpiryDateAnnotation bool
_, refreshCertificates = mAnnotations[bootstrapv1.CertificatesRefreshAnnotation]
_, hasExpiryDateAnnotation = mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation]

if mAnnotations[bootstrapv1.CertificatesRefreshStatusAnnotation] == bootstrapv1.CertificatesRefreshInProgressStatus {
if !refreshCertificates {
// If a refresh is in progress but the refresh annotation is missing
// clear the status.
delete(mAnnotations, bootstrapv1.CertificatesRefreshStatusAnnotation)
m.SetAnnotations(mAnnotations)
if err := r.Client.Update(ctx, m); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to clear status annotation: %w", err)
}
return ctrl.Result{}, nil
}
log.Info("Certificates refresh already in progress",
"refreshStatus", bootstrapv1.CertificatesRefreshInProgressStatus,
"refreshAnnotation", mAnnotations[bootstrapv1.CertificatesRefreshAnnotation],
)
return ctrl.Result{}, nil
}

if !refreshCertificates && hasExpiryDateAnnotation {
// No need to refresh certificates or update expiry date, return early.
return ctrl.Result{}, nil
}

// Look up for the CK8sConfig.
scope, err := r.createScope(ctx, m, log)
if err != nil {
return ctrl.Result{}, err
}

if !hasExpiryDateAnnotation {
if err := r.updateExpiryDateAnnotation(ctx, scope); err != nil {
return ctrl.Result{}, err
}
}

if refreshCertificates {
if err := r.refreshCertificates(ctx, scope); err != nil {
// On error, we requeue the request to retry.
mAnnotations[bootstrapv1.CertificatesRefreshStatusAnnotation] = bootstrapv1.CertificatesRefreshFailedStatus
m.SetAnnotations(mAnnotations)
if err := r.Client.Update(ctx, m); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to clear status annotation after error: %w", err)
}
return ctrl.Result{}, err
}
}

return ctrl.Result{}, nil
}

func (r *CertificatesReconciler) createScope(ctx context.Context, m *clusterv1.Machine, log logr.Logger) (*CertificatesScope, error) {
config := &bootstrapv1.CK8sConfig{}
if err := r.Client.Get(ctx, types.NamespacedName{Namespace: m.Namespace, Name: m.Spec.Bootstrap.ConfigRef.Name}, config); err != nil {
return ctrl.Result{}, err
return nil, fmt.Errorf("failed to get CK8sConfig: %w", err)
}

// Get the owner of the CK8sConfig to determine if it's a control plane or worker node.
configOwner, err := bsutil.GetConfigOwner(ctx, r.Client, config)
if err != nil {
log.Error(err, "Failed to get config owner")
return ctrl.Result{}, err
}
if configOwner == nil {
return ctrl.Result{}, nil
if err != nil || configOwner == nil {
return nil, fmt.Errorf("failed to get config owner: %w", err)
}

cluster, err := util.GetClusterByName(ctx, r.Client, m.GetNamespace(), m.Spec.ClusterName)
if err != nil {
return ctrl.Result{}, err
return nil, fmt.Errorf("failed to get cluster: %w", err)
}

microclusterPort := config.Spec.ControlPlaneConfig.GetMicroclusterPort()
workload, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster), microclusterPort)
workload, err := r.managementCluster.GetWorkloadCluster(
ctx,
util.ObjectKey(cluster),
config.Spec.ControlPlaneConfig.GetMicroclusterPort(),
)
if err != nil {
return ctrl.Result{}, err
return nil, fmt.Errorf("failed to get workload cluster: %w", err)
}

patchHelper, err := patch.NewHelper(m, r.Client)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to create patch helper for machine: %w", err)
return nil, fmt.Errorf("failed to create patch helper: %w", err)
}

scope := &CertificatesScope{
return &CertificatesScope{
Log: log,
Machine: m,
Config: config,
Cluster: cluster,
Patcher: patchHelper,
Workload: workload,
}

if !hasExpiryDateAnnotation {
if err := r.updateExpiryDateAnnotation(ctx, scope); err != nil {
return ctrl.Result{}, err
}
}

if refreshCertificates {
if configOwner.IsControlPlaneMachine() {
if err := r.refreshControlPlaneCertificates(ctx, scope); err != nil {
return ctrl.Result{}, err
}
} else {
log.Info("worker nodes are not supported yet")
return ctrl.Result{}, nil
}
}

return ctrl.Result{}, nil
}, nil
}

func (r *CertificatesReconciler) refreshControlPlaneCertificates(ctx context.Context, scope *CertificatesScope) error {
func (r *CertificatesReconciler) refreshCertificates(ctx context.Context, scope *CertificatesScope) error {
nodeToken, err := token.LookupNodeToken(ctx, r.Client, util.ObjectKey(scope.Cluster), scope.Machine.Name)
if err != nil {
return fmt.Errorf("failed to lookup node token: %w", err)
}

mAnnotations := scope.Machine.GetAnnotations()

refreshAnnotation, ok := mAnnotations[bootstrapv1.CertificatesRefreshAnnotation]
if !ok {
return nil
return fmt.Errorf("refresh annotation not found")
}

mAnnotations[bootstrapv1.CertificatesRefreshStatusAnnotation] = bootstrapv1.CertificatesRefreshInProgressStatus
scope.Machine.SetAnnotations(mAnnotations)
if err := scope.Patcher.Patch(ctx, scope.Machine); err != nil {
return fmt.Errorf("failed to set in-progress status: %w", err)
}

r.recorder.Eventf(
Expand All @@ -179,16 +213,31 @@ func (r *CertificatesReconciler) refreshControlPlaneCertificates(ctx context.Con

seconds, err := utiltime.TTLToSeconds(refreshAnnotation)
if err != nil {
return fmt.Errorf("failed to parse expires-in annotation value: %w", err)
return fmt.Errorf("failed to parse TTL: %w", err)
}

controlPlaneConfig := scope.Config.Spec.ControlPlaneConfig
controlPlaneEndpoint := scope.Cluster.Spec.ControlPlaneEndpoint.Host

extraSANs := controlPlaneConfig.ExtraSANs
extraSANs = append(extraSANs, controlPlaneEndpoint)
var expirySecondsUnix int
configOwner, _ := bsutil.GetConfigOwner(ctx, r.Client, scope.Config)
if configOwner.IsControlPlaneMachine() {
var extraSANs []string
extraSANs = append(extraSANs, scope.Config.Spec.ControlPlaneConfig.ExtraSANs...)
extraSANs = append(extraSANs, scope.Cluster.Spec.ControlPlaneEndpoint.Host)
expirySecondsUnix, err = scope.Workload.RefreshControlPlaneCertificates(
ctx,
scope.Machine,
*nodeToken,
seconds,
extraSANs,
)
} else {
expirySecondsUnix, err = scope.Workload.RefreshWorkerCertificates(
ctx,
scope.Machine,
*nodeToken,
seconds,
)
}

expirySecondsUnix, err := scope.Workload.RefreshCertificates(ctx, scope.Machine, *nodeToken, seconds, extraSANs)
if err != nil {
r.recorder.Eventf(
scope.Machine,
Expand All @@ -200,10 +249,11 @@ func (r *CertificatesReconciler) refreshControlPlaneCertificates(ctx context.Con
}

expiryTime := time.Unix(int64(expirySecondsUnix), 0)

delete(mAnnotations, bootstrapv1.CertificatesRefreshAnnotation)
mAnnotations[bootstrapv1.CertificatesRefreshStatusAnnotation] = bootstrapv1.CertificatesRefreshDoneStatus
mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation] = expiryTime.Format(time.RFC3339)
scope.Machine.SetAnnotations(mAnnotations)

if err := scope.Patcher.Patch(ctx, scope.Machine); err != nil {
return fmt.Errorf("failed to patch machine annotations: %w", err)
}
Expand All @@ -230,21 +280,17 @@ func (r *CertificatesReconciler) updateExpiryDateAnnotation(ctx context.Context,
return fmt.Errorf("failed to lookup node token: %w", err)
}

mAnnotations := scope.Machine.GetAnnotations()
if mAnnotations == nil {
mAnnotations = map[string]string{}
}

expiryDateString, err := scope.Workload.GetCertificatesExpiryDate(ctx, scope.Machine, *nodeToken)
if err != nil {
return fmt.Errorf("failed to get certificates expiry date: %w", err)
}

mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation] = expiryDateString
scope.Machine.SetAnnotations(mAnnotations)
if err := scope.Patcher.Patch(ctx, scope.Machine); err != nil {
return fmt.Errorf("failed to patch machine annotations: %w", err)
mAnnotations := scope.Machine.GetAnnotations()
if mAnnotations == nil {
mAnnotations = map[string]string{}
}

return nil
mAnnotations[bootstrapv1.MachineCertificatesExpiryDateAnnotation] = expiryDateString
scope.Machine.SetAnnotations(mAnnotations)
return scope.Patcher.Patch(ctx, scope.Machine)
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ require (
golang.org/x/mod v0.19.0
golang.org/x/net v0.23.0 // indirect
golang.org/x/oauth2 v0.18.0 // indirect
golang.org/x/sync v0.6.0 // indirect
golang.org/x/sync v0.8.0
golang.org/x/sys v0.18.0 // indirect
golang.org/x/term v0.18.0 // indirect
golang.org/x/text v0.14.0 // indirect
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -359,8 +359,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ=
golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
Expand Down
Loading

0 comments on commit d7c73a4

Please sign in to comment.