Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NS Deletion hardening #156

Merged
merged 1 commit into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions internal/common/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package common
import (
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/rand"
)

Expand All @@ -23,3 +24,12 @@ func RandomizeDuration(variance float64, duration time.Duration) time.Duration {
int64(lowerLimit),
int64(upperLimit)))
}

func Owns(owner, object metav1.Object) bool {
for _, ref := range object.GetOwnerReferences() {
if ref.UID == owner.GetUID() {
return true
}
}
return false
}
137 changes: 137 additions & 0 deletions internal/common/helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ package common
import (
"testing"
"time"

. "github.com/onsi/gomega"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/ptr"

"github.com/kuadrant/dns-operator/api/v1alpha1"
)

func TestRandomizeDuration(t *testing.T) {
Expand Down Expand Up @@ -41,3 +48,133 @@ func isValidVariance(duration, randomizedDuration time.Duration, variance float6
return float64(randomizedDuration.Milliseconds()) >= lowerLimmit &&
float64(randomizedDuration.Milliseconds()) < upperLimit
}

func TestOwns(t *testing.T) {
RegisterTestingT(t)
testCases := []struct {
Name string
Object metav1.Object
Owner metav1.Object
Verify func(t *testing.T, result bool)
}{
{
Name: "object is owned",
Object: &v1alpha1.DNSRecord{
ObjectMeta: metav1.ObjectMeta{
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "v1beta1",
Kind: "ManagedZone",
Name: "test-zone",
UID: "unique-uid",
BlockOwnerDeletion: ptr.To(true),
},
},
},
},
Owner: &v1alpha1.ManagedZone{
ObjectMeta: metav1.ObjectMeta{
UID: "unique-uid",
},
},
Verify: func(t *testing.T, result bool) {
Expect(result).To(BeTrue())
},
}, {
Name: "object is owned by multiple",
Object: &v1alpha1.DNSRecord{
ObjectMeta: metav1.ObjectMeta{
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "v1beta1",
Kind: "ManagedZone",
Name: "test-zone-other",
UID: "unique-uid-other",
BlockOwnerDeletion: ptr.To(true),
},
{
APIVersion: "v1beta1",
Kind: "ManagedZone",
Name: "test-zone",
UID: "unique-uid",
BlockOwnerDeletion: ptr.To(true),
},
{
APIVersion: "v1beta1",
Kind: "ManagedZone",
Name: "test-zone-other2",
UID: "unique-uid-other2",
BlockOwnerDeletion: ptr.To(true),
},
},
},
},
Owner: &v1alpha1.ManagedZone{
ObjectMeta: metav1.ObjectMeta{
UID: "unique-uid",
},
},
Verify: func(t *testing.T, result bool) {
Expect(result).To(BeTrue())
},
}, {
Name: "object is not owned",
Object: &v1alpha1.DNSRecord{
ObjectMeta: metav1.ObjectMeta{
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "v1beta1",
Kind: "ManagedZone",
Name: "test-zone-other",
UID: "unique-uid-other",
BlockOwnerDeletion: ptr.To(false),
},
},
},
},
Owner: &v1alpha1.ManagedZone{
ObjectMeta: metav1.ObjectMeta{
UID: "unique-uid",
},
},
Verify: func(t *testing.T, result bool) {
Expect(result).To(BeFalse())
},
}, {
Name: "object is not owned multiple",
Object: &v1alpha1.DNSRecord{
ObjectMeta: metav1.ObjectMeta{
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "v1beta1",
Kind: "ManagedZone",
Name: "test-zone-other",
UID: "unique-uid-other",
BlockOwnerDeletion: ptr.To(true),
}, {
APIVersion: "v1beta1",
Kind: "ManagedZone",
Name: "test-zone-other2",
UID: "unique-uid-other2",
BlockOwnerDeletion: ptr.To(false),
},
},
},
},
Owner: &v1alpha1.ManagedZone{
ObjectMeta: metav1.ObjectMeta{
UID: "unique-uid",
},
},
Verify: func(t *testing.T, result bool) {
Expect(result).To(BeFalse())
},
},
}

for _, testCase := range testCases {
t.Run(testCase.Name, func(t *testing.T) {
testCase.Verify(t, Owns(testCase.Owner, testCase.Object))
})
}
}
93 changes: 42 additions & 51 deletions internal/controller/dnsrecord_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import (
"github.com/kuadrant/dns-operator/internal/common"
externaldnsplan "github.com/kuadrant/dns-operator/internal/external-dns/plan"
externaldnsregistry "github.com/kuadrant/dns-operator/internal/external-dns/registry"
"github.com/kuadrant/dns-operator/internal/metrics"
"github.com/kuadrant/dns-operator/internal/provider"
)

Expand Down Expand Up @@ -96,11 +97,25 @@ func (r *DNSRecordReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
}
dnsRecord := previous.DeepCopy()

managedZone := &v1alpha1.ManagedZone{
ObjectMeta: metav1.ObjectMeta{
Name: dnsRecord.Spec.ManagedZoneRef.Name,
Namespace: dnsRecord.Namespace,
},
}
err = r.Get(ctx, client.ObjectKeyFromObject(managedZone), managedZone, &client.GetOptions{})
if err != nil {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what if the managed zone is not found when the dns record is being deleted? The finalizer on the dnsrecord will never be removed

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is the intention, that means that DNS Records have been created in the DNS Provider and have not been cleaned up.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, you might be right. I know I do not know. But that is causing an issue anyway. Some tests are failing because the dns record is never gone after having deleted it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@philbrookes Is this the intention if a user deletes a ManagedZone first before deleting the DNSRecords ? 🤔 This is how the integration tests are in Kuadrant Operator. In the tests, the ManagedZone has completed their deletion, but DNSRecords remained, and prevented the test NS deletion.

Seems like this introduces a required order of deletion for the user. So maybe the ManagedZone deletion should wait until the assoicated DNSRecords are deleted ? 🤔

If this is the intended behaviour anyway, I've a fix to update the Kuadrant Operator DNSPolicy tests as part of a PR Kuadrant/kuadrant-operator@c70b5e1. Just want to check this is the expected flow being merging

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is the intention, if the DNS Record is deleted after the managed zone, the DNS Operator cannot remove the records from the DNS Provider, in order to highlight this to the user/admin the DNS Record never has it's finalizer removed.

See the original issue here: #106

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool, thanks, I'll merge the PR containing the changes to the DNS Policy tests to account for this 👍

reason := "ManagedZoneError"
message := fmt.Sprintf("The managedZone could not be loaded: %v", err)
setDNSRecordCondition(dnsRecord, string(v1alpha1.ConditionTypeReady), metav1.ConditionFalse, reason, message)
return r.updateStatus(ctx, previous, dnsRecord, false, err)
}

if dnsRecord.DeletionTimestamp != nil && !dnsRecord.DeletionTimestamp.IsZero() {
if err = r.ReconcileHealthChecks(ctx, dnsRecord); client.IgnoreNotFound(err) != nil {
if err = r.ReconcileHealthChecks(ctx, dnsRecord, managedZone); client.IgnoreNotFound(err) != nil {
return ctrl.Result{}, err
}
hadChanges, err := r.deleteRecord(ctx, dnsRecord)
hadChanges, err := r.deleteRecord(ctx, dnsRecord, managedZone)
if err != nil {
logger.Error(err, "Failed to delete DNSRecord")
return ctrl.Result{}, err
Expand Down Expand Up @@ -134,6 +149,14 @@ func (r *DNSRecordReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
return ctrl.Result{RequeueAfter: randomizedValidationRequeue}, nil
}

if !common.Owns(managedZone, dnsRecord) {
err = controllerutil.SetOwnerReference(managedZone, dnsRecord, r.Scheme)
if err != nil {
return ctrl.Result{}, err
}
err = r.Client.Update(ctx, dnsRecord)
return ctrl.Result{}, err
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Arguably not gaining anything by setting this relationship up since it didn't work in the way that we/i hoped (Doesn't trigger reconciles of children when the owner changes).

var reason, message string
err = dnsRecord.Validate()
if err != nil {
Expand All @@ -144,15 +167,15 @@ func (r *DNSRecordReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
}

// Publish the record
hadChanges, err := r.publishRecord(ctx, dnsRecord)
hadChanges, err := r.publishRecord(ctx, dnsRecord, managedZone)
if err != nil {
reason = "ProviderError"
message = fmt.Sprintf("The DNS provider failed to ensure the record: %v", provider.SanitizeError(err))
setDNSRecordCondition(dnsRecord, string(v1alpha1.ConditionTypeReady), metav1.ConditionFalse, reason, message)
return r.updateStatus(ctx, previous, dnsRecord, hadChanges, err)
}

if err = r.ReconcileHealthChecks(ctx, dnsRecord); err != nil {
if err = r.ReconcileHealthChecks(ctx, dnsRecord, managedZone); err != nil {
return ctrl.Result{}, err
}

Expand Down Expand Up @@ -185,7 +208,7 @@ func (r *DNSRecordReconciler) updateStatus(ctx context.Context, previous, curren
// implies that they were overridden - bump write counter
if !generationChanged(current) {
current.Status.WriteCounter++
writeCounter.WithLabelValues(current.Name, current.Namespace).Inc()
metrics.WriteCounter.WithLabelValues(current.Name, current.Namespace).Inc()
logger.V(1).Info("Changes needed on the same generation of record")
}
requeueTime = randomizedValidationRequeue
Expand All @@ -208,7 +231,7 @@ func (r *DNSRecordReconciler) updateStatus(ctx context.Context, previous, curren
// reset the counter on the gen change regardless of having changes in the plan
if generationChanged(current) {
current.Status.WriteCounter = 0
writeCounter.WithLabelValues(current.Name, current.Namespace).Set(0)
metrics.WriteCounter.WithLabelValues(current.Name, current.Namespace).Set(0)
logger.V(1).Info("Resetting write counter on the generation change")
}

Expand Down Expand Up @@ -239,15 +262,15 @@ func (r *DNSRecordReconciler) SetupWithManager(mgr ctrl.Manager, maxRequeue, val
For(&v1alpha1.DNSRecord{}).
Watches(&v1alpha1.ManagedZone{}, handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, o client.Object) []reconcile.Request {
logger := log.FromContext(ctx)
toReconcile := []reconcile.Request{}
// list dns records in the maanagedzone namespace as they will be in the same namespace as the zone
var toReconcile []reconcile.Request
// list dns records in the managedzone namespace as they will be in the same namespace as the zone
records := &v1alpha1.DNSRecordList{}
if err := mgr.GetClient().List(ctx, records, &client.ListOptions{Namespace: o.GetNamespace()}); err != nil {
logger.Error(err, "failed to list dnsrecords ", "namespace", o.GetNamespace())
return toReconcile
}
for _, record := range records.Items {
if record.Spec.ManagedZoneRef.Name == o.GetName() {
if common.Owns(o, &record) {
logger.Info("managed zone updated", "managedzone", o.GetNamespace()+"/"+o.GetName(), "enqueuing dnsrecord ", record.GetName())
toReconcile = append(toReconcile, reconcile.Request{NamespacedName: client.ObjectKeyFromObject(&record)})
}
philbrookes marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -259,20 +282,9 @@ func (r *DNSRecordReconciler) SetupWithManager(mgr ctrl.Manager, maxRequeue, val

// deleteRecord deletes record(s) in the DNSPRovider(i.e. route53) configured by the ManagedZone assigned to this
// DNSRecord (dnsRecord.Status.ParentManagedZone).
func (r *DNSRecordReconciler) deleteRecord(ctx context.Context, dnsRecord *v1alpha1.DNSRecord) (bool, error) {
func (r *DNSRecordReconciler) deleteRecord(ctx context.Context, dnsRecord *v1alpha1.DNSRecord, managedZone *v1alpha1.ManagedZone) (bool, error) {
logger := log.FromContext(ctx)

managedZone := &v1alpha1.ManagedZone{
ObjectMeta: metav1.ObjectMeta{
Name: dnsRecord.Spec.ManagedZoneRef.Name,
Namespace: dnsRecord.Namespace,
},
}
err := r.Get(ctx, client.ObjectKeyFromObject(managedZone), managedZone, &client.GetOptions{})
if err != nil {
// If the Managed Zone isn't found, just continue
return false, client.IgnoreNotFound(err)
}
managedZoneReady := meta.IsStatusConditionTrue(managedZone.Status.Conditions, "Ready")

if !managedZoneReady {
Expand All @@ -297,18 +309,9 @@ func (r *DNSRecordReconciler) deleteRecord(ctx context.Context, dnsRecord *v1alp

// publishRecord publishes record(s) to the DNSPRovider(i.e. route53) configured by the ManagedZone assigned to this
// DNSRecord (dnsRecord.Status.ParentManagedZone).
func (r *DNSRecordReconciler) publishRecord(ctx context.Context, dnsRecord *v1alpha1.DNSRecord) (bool, error) {
func (r *DNSRecordReconciler) publishRecord(ctx context.Context, dnsRecord *v1alpha1.DNSRecord, managedZone *v1alpha1.ManagedZone) (bool, error) {
logger := log.FromContext(ctx)
managedZone := &v1alpha1.ManagedZone{
ObjectMeta: metav1.ObjectMeta{
Name: dnsRecord.Spec.ManagedZoneRef.Name,
Namespace: dnsRecord.Namespace,
},
}
err := r.Get(ctx, client.ObjectKeyFromObject(managedZone), managedZone, &client.GetOptions{})
if err != nil {
return false, err
}

managedZoneReady := meta.IsStatusConditionTrue(managedZone.Status.Conditions, "Ready")

if !managedZoneReady {
Expand Down Expand Up @@ -355,14 +358,14 @@ func exponentialRequeueTime(lastRequeueTime string) time.Duration {
return randomizedValidationRequeue
}
// double the duration. Return the max timeout if overshoot
newReqeueue := lastRequeue * 2
if newReqeueue > defaultRequeueTime {
newRequeue := lastRequeue * 2
if newRequeue > defaultRequeueTime {
return defaultRequeueTime
}
return newReqeueue
return newRequeue
}

// setDNSRecordCondition adds or updates a given condition in the DNSRecord status..
// setDNSRecordCondition adds or updates a given condition in the DNSRecord status.
func setDNSRecordCondition(dnsRecord *v1alpha1.DNSRecord, conditionType string, status metav1.ConditionStatus, reason, message string) {
cond := metav1.Condition{
Type: conditionType,
Expand All @@ -374,18 +377,7 @@ func setDNSRecordCondition(dnsRecord *v1alpha1.DNSRecord, conditionType string,
meta.SetStatusCondition(&dnsRecord.Status.Conditions, cond)
}

func (r *DNSRecordReconciler) getDNSProvider(ctx context.Context, dnsRecord *v1alpha1.DNSRecord) (provider.Provider, error) {
managedZone := &v1alpha1.ManagedZone{
ObjectMeta: metav1.ObjectMeta{
Name: dnsRecord.Spec.ManagedZoneRef.Name,
Namespace: dnsRecord.Namespace,
},
}
err := r.Get(ctx, client.ObjectKeyFromObject(managedZone), managedZone, &client.GetOptions{})
if err != nil {
return nil, err
}

func (r *DNSRecordReconciler) getDNSProvider(ctx context.Context, managedZone *v1alpha1.ManagedZone) (provider.Provider, error) {
providerConfig := provider.Config{
DomainFilter: externaldnsendpoint.NewDomainFilter([]string{managedZone.Spec.DomainName}),
ZoneTypeFilter: externaldnsprovider.NewZoneTypeFilter(""),
Expand All @@ -403,9 +395,8 @@ func (r *DNSRecordReconciler) applyChanges(ctx context.Context, dnsRecord *v1alp
rootDomainName, _ := strings.CutPrefix(dnsRecord.Spec.RootHost, v1alpha1.WildcardPrefix)
zoneDomainFilter := externaldnsendpoint.NewDomainFilter([]string{zoneDomainName})
managedDNSRecordTypes := []string{externaldnsendpoint.RecordTypeA, externaldnsendpoint.RecordTypeAAAA, externaldnsendpoint.RecordTypeCNAME}
excludeDNSRecordTypes := []string{}

dnsProvider, err := r.getDNSProvider(ctx, dnsRecord)
var excludeDNSRecordTypes []string
dnsProvider, err := r.getDNSProvider(ctx, managedZone)
if err != nil {
return false, err
}
Expand Down
Loading
Loading