From cf39f8036d5c512878ee08684566e0ffd6c20d62 Mon Sep 17 00:00:00 2001 From: Bowei Du Date: Fri, 24 Mar 2023 00:13:08 -0700 Subject: [PATCH] Adds global OperaitonsUseWait which switches Operation.Get to Operation.Wait This flag is temporary. After soaking testing, we should be able to remove the Get() path. --- pkg/cloud/op.go | 129 +++++++++++++++++++++++++++++++------------ pkg/cloud/service.go | 10 ++-- 2 files changed, 99 insertions(+), 40 deletions(-) diff --git a/pkg/cloud/op.go b/pkg/cloud/op.go index e8516bb7..11ec0921 100644 --- a/pkg/cloud/op.go +++ b/pkg/cloud/op.go @@ -34,6 +34,13 @@ const ( operationStatusDone = "DONE" ) +var ( + // OperationsUseWait set to true to switch to using /Wait vs /Get. This should be a + // transparent change to the clients and this option will be removed once we have + // confidence that there are no issues with switching to the new method. + OperationsUseWait = true +) + // operation is a GCE operation that can be watied on. type operation interface { // isDone queries GCE for the done status. This call can block. @@ -64,19 +71,36 @@ func (o *gaOperation) isDone(ctx context.Context) (bool, error) { err error ) - switch o.key.Type() { - case meta.Regional: - op, err = o.s.GA.RegionOperations.Get(o.projectID, o.key.Region, o.key.Name).Context(ctx).Do() - klog.V(5).Infof("GA.RegionOperations.Get(%v, %v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Region, o.key.Name, op, err, ctx) - case meta.Zonal: - op, err = o.s.GA.ZoneOperations.Get(o.projectID, o.key.Zone, o.key.Name).Context(ctx).Do() - klog.V(5).Infof("GA.ZoneOperations.Get(%v, %v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Zone, o.key.Name, op, err, ctx) - case meta.Global: - op, err = o.s.GA.GlobalOperations.Get(o.projectID, o.key.Name).Context(ctx).Do() - klog.V(5).Infof("GA.GlobalOperations.Get(%v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Name, op, err, ctx) - default: - return false, fmt.Errorf("invalid key type: %#v", o.key) + if OperationsUseWait { + switch o.key.Type() { + case meta.Regional: + op, err = o.s.GA.RegionOperations.Wait(o.projectID, o.key.Region, o.key.Name).Context(ctx).Do() + klog.V(5).Infof("GA.RegionOperations.Wait(%v, %v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Region, o.key.Name, op, err, ctx) + case meta.Zonal: + op, err = o.s.GA.ZoneOperations.Wait(o.projectID, o.key.Zone, o.key.Name).Context(ctx).Do() + klog.V(5).Infof("GA.ZoneOperations.Wait(%v, %v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Zone, o.key.Name, op, err, ctx) + case meta.Global: + op, err = o.s.GA.GlobalOperations.Wait(o.projectID, o.key.Name).Context(ctx).Do() + klog.V(5).Infof("GA.GlobalOperations.Wait(%v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Name, op, err, ctx) + default: + return false, fmt.Errorf("invalid key type: %#v", o.key) + } + } else { + switch o.key.Type() { + case meta.Regional: + op, err = o.s.GA.RegionOperations.Get(o.projectID, o.key.Region, o.key.Name).Context(ctx).Do() + klog.V(5).Infof("GA.RegionOperations.Get(%v, %v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Region, o.key.Name, op, err, ctx) + case meta.Zonal: + op, err = o.s.GA.ZoneOperations.Get(o.projectID, o.key.Zone, o.key.Name).Context(ctx).Do() + klog.V(5).Infof("GA.ZoneOperations.Get(%v, %v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Zone, o.key.Name, op, err, ctx) + case meta.Global: + op, err = o.s.GA.GlobalOperations.Get(o.projectID, o.key.Name).Context(ctx).Do() + klog.V(5).Infof("GA.GlobalOperations.Get(%v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Name, op, err, ctx) + default: + return false, fmt.Errorf("invalid key type: %#v", o.key) + } } + if err != nil { return false, err } @@ -121,19 +145,36 @@ func (o *alphaOperation) isDone(ctx context.Context) (bool, error) { err error ) - switch o.key.Type() { - case meta.Regional: - op, err = o.s.Alpha.RegionOperations.Get(o.projectID, o.key.Region, o.key.Name).Context(ctx).Do() - klog.V(5).Infof("Alpha.RegionOperations.Get(%v, %v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Region, o.key.Name, op, err, ctx) - case meta.Zonal: - op, err = o.s.Alpha.ZoneOperations.Get(o.projectID, o.key.Zone, o.key.Name).Context(ctx).Do() - klog.V(5).Infof("Alpha.ZoneOperations.Get(%v, %v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Zone, o.key.Name, op, err, ctx) - case meta.Global: - op, err = o.s.Alpha.GlobalOperations.Get(o.projectID, o.key.Name).Context(ctx).Do() - klog.V(5).Infof("Alpha.GlobalOperations.Get(%v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Name, op, err, ctx) - default: - return false, fmt.Errorf("invalid key type: %#v", o.key) + if OperationsUseWait { + switch o.key.Type() { + case meta.Regional: + op, err = o.s.Alpha.RegionOperations.Wait(o.projectID, o.key.Region, o.key.Name).Context(ctx).Do() + klog.V(5).Infof("Alpha.RegionOperations.Wait(%v, %v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Region, o.key.Name, op, err, ctx) + case meta.Zonal: + op, err = o.s.Alpha.ZoneOperations.Wait(o.projectID, o.key.Zone, o.key.Name).Context(ctx).Do() + klog.V(5).Infof("Alpha.ZoneOperations.Wait(%v, %v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Zone, o.key.Name, op, err, ctx) + case meta.Global: + op, err = o.s.Alpha.GlobalOperations.Wait(o.projectID, o.key.Name).Context(ctx).Do() + klog.V(5).Infof("Alpha.GlobalOperations.Wait(%v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Name, op, err, ctx) + default: + return false, fmt.Errorf("invalid key type: %#v", o.key) + } + } else { + switch o.key.Type() { + case meta.Regional: + op, err = o.s.Alpha.RegionOperations.Get(o.projectID, o.key.Region, o.key.Name).Context(ctx).Do() + klog.V(5).Infof("Alpha.RegionOperations.Get(%v, %v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Region, o.key.Name, op, err, ctx) + case meta.Zonal: + op, err = o.s.Alpha.ZoneOperations.Get(o.projectID, o.key.Zone, o.key.Name).Context(ctx).Do() + klog.V(5).Infof("Alpha.ZoneOperations.Get(%v, %v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Zone, o.key.Name, op, err, ctx) + case meta.Global: + op, err = o.s.Alpha.GlobalOperations.Get(o.projectID, o.key.Name).Context(ctx).Do() + klog.V(5).Infof("Alpha.GlobalOperations.Get(%v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Name, op, err, ctx) + default: + return false, fmt.Errorf("invalid key type: %#v", o.key) + } } + if err != nil { return false, err } @@ -178,18 +219,34 @@ func (o *betaOperation) isDone(ctx context.Context) (bool, error) { err error ) - switch o.key.Type() { - case meta.Regional: - op, err = o.s.Beta.RegionOperations.Get(o.projectID, o.key.Region, o.key.Name).Context(ctx).Do() - klog.V(5).Infof("Beta.RegionOperations.Get(%v, %v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Region, o.key.Name, op, err, ctx) - case meta.Zonal: - op, err = o.s.Beta.ZoneOperations.Get(o.projectID, o.key.Zone, o.key.Name).Context(ctx).Do() - klog.V(5).Infof("Beta.ZoneOperations.Get(%v, %v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Zone, o.key.Name, op, err, ctx) - case meta.Global: - op, err = o.s.Beta.GlobalOperations.Get(o.projectID, o.key.Name).Context(ctx).Do() - klog.V(5).Infof("Beta.GlobalOperations.Get(%v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Name, op, err, ctx) - default: - return false, fmt.Errorf("invalid key type: %#v", o.key) + if OperationsUseWait { + switch o.key.Type() { + case meta.Regional: + op, err = o.s.Beta.RegionOperations.Wait(o.projectID, o.key.Region, o.key.Name).Context(ctx).Do() + klog.V(5).Infof("Beta.RegionOperations.Wait(%v, %v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Region, o.key.Name, op, err, ctx) + case meta.Zonal: + op, err = o.s.Beta.ZoneOperations.Wait(o.projectID, o.key.Zone, o.key.Name).Context(ctx).Do() + klog.V(5).Infof("Beta.ZoneOperations.Wait(%v, %v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Zone, o.key.Name, op, err, ctx) + case meta.Global: + op, err = o.s.Beta.GlobalOperations.Wait(o.projectID, o.key.Name).Context(ctx).Do() + klog.V(5).Infof("Beta.GlobalOperations.Wait(%v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Name, op, err, ctx) + default: + return false, fmt.Errorf("invalid key type: %#v", o.key) + } + } else { + switch o.key.Type() { + case meta.Regional: + op, err = o.s.Beta.RegionOperations.Get(o.projectID, o.key.Region, o.key.Name).Context(ctx).Do() + klog.V(5).Infof("Beta.RegionOperations.Get(%v, %v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Region, o.key.Name, op, err, ctx) + case meta.Zonal: + op, err = o.s.Beta.ZoneOperations.Get(o.projectID, o.key.Zone, o.key.Name).Context(ctx).Do() + klog.V(5).Infof("Beta.ZoneOperations.Get(%v, %v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Zone, o.key.Name, op, err, ctx) + case meta.Global: + op, err = o.s.Beta.GlobalOperations.Get(o.projectID, o.key.Name).Context(ctx).Do() + klog.V(5).Infof("Beta.GlobalOperations.Get(%v, %v) = %+v, %v; ctx = %v", o.projectID, o.key.Name, op, err, ctx) + default: + return false, fmt.Errorf("invalid key type: %#v", o.key) + } } if err != nil { return false, err diff --git a/pkg/cloud/service.go b/pkg/cloud/service.go index 130ebe64..31ef6e17 100644 --- a/pkg/cloud/service.go +++ b/pkg/cloud/service.go @@ -19,6 +19,7 @@ package cloud import ( "context" "fmt" + "time" alpha "google.golang.org/api/compute/v0.alpha" beta "google.golang.org/api/compute/v0.beta" @@ -79,28 +80,29 @@ func (s *Service) WaitForCompletion(ctx context.Context, genericOp interface{}) // If an error occurs retrieving the operation, the loop will continue until the context is done. // This is to prevent a transient error from bubbling up to controller-level logic. func (s *Service) pollOperation(ctx context.Context, op operation) error { + start := time.Now() var pollCount int for { // Check if context has been cancelled. Note that ctx.Done() must be checked before // returning ctx.Err(). select { case <-ctx.Done(): - klog.V(5).Infof("op.pollOperation(%v, %v) not completed, poll count = %d, ctx.Err = %v", ctx, op, pollCount, ctx.Err()) + klog.V(5).Infof("op.pollOperation(%v, %v) not completed, poll count = %d, ctx.Err = %v (%v elapsed)", ctx, op, pollCount, ctx.Err(), time.Now().Sub(start)) return ctx.Err() default: // ctx is not canceled, continue immediately } pollCount++ - klog.V(5).Infof("op.isDone(%v) waiting; op = %v, poll count = %d", ctx, op, pollCount) + klog.V(5).Infof("op.isDone(%v) waiting; op = %v, poll count = %d (%v elapsed)", ctx, op, pollCount, time.Now().Sub(start)) s.RateLimiter.Accept(ctx, op.rateLimitKey()) switch done, err := op.isDone(ctx); { case err != nil: - klog.V(5).Infof("op.isDone(%v) error; op = %v, poll count = %d, err = %v, retrying", ctx, op, pollCount, err) + klog.V(5).Infof("op.isDone(%v) error; op = %v, poll count = %d, err = %v, retrying (%v elapsed)", ctx, op, pollCount, err, time.Now().Sub(start)) s.RateLimiter.Observe(ctx, err, op.rateLimitKey()) return err case done: - klog.V(5).Infof("op.isDone(%v) complete; op = %v, poll count = %d, op.err = %v", ctx, op, pollCount, op.error()) + klog.V(5).Infof("op.isDone(%v) complete; op = %v, poll count = %d, op.err = %v (%v elapsed)", ctx, op, pollCount, op.error(), time.Now().Sub(start)) s.RateLimiter.Observe(ctx, op.error(), op.rateLimitKey()) return op.error() }