-
Notifications
You must be signed in to change notification settings - Fork 41
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WIP] Update Syncing condition to include all errors #260
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,7 @@ import ( | |
) | ||
|
||
// NewNamespaceRunner creates a new runnable parser for parsing a Namespace repo. | ||
// TODO: replace with builder pattern to avoid too many arguments. | ||
func NewNamespaceRunner(clusterName, syncName, reconcilerName string, scope declared.Scope, fileReader reader.Reader, c client.Client, pollingPeriod, resyncPeriod, retryPeriod, statusUpdatePeriod time.Duration, fs FileSource, dc discovery.DiscoveryInterface, resources *declared.Resources, app applier.KptApplier, rem remediator.Interface) (Parser, error) { | ||
converter, err := declared.NewValueConverter(dc) | ||
if err != nil { | ||
|
@@ -161,13 +162,14 @@ func (p *namespace) setSourceStatusWithRetries(ctx context.Context, newStatus so | |
currentRS := rs.DeepCopy() | ||
|
||
setSourceStatusFields(&rs.Status.Source, p, newStatus, denominator) | ||
errorSources, errorSummary := summarizeErrors(rs.Status.Status, newStatus.commit) | ||
syncErrorCount := totalErrors(errorSummary) | ||
|
||
continueSyncing := (rs.Status.Source.ErrorSummary.TotalCount == 0) | ||
var errorSource []v1beta1.ErrorSource | ||
if len(rs.Status.Source.Errors) > 0 { | ||
errorSource = []v1beta1.ErrorSource{v1beta1.SourceError} | ||
} | ||
reposync.SetSyncing(&rs, continueSyncing, "Source", "Source", newStatus.commit, errorSource, rs.Status.Source.ErrorSummary, newStatus.lastUpdate) | ||
// Syncing is stopped/prevented if there are any errors from the same commit. | ||
// TODO: Handle non-blocking errors | ||
syncing := (syncErrorCount == 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Previously, It affects how the notification sends out alerts. The current proposal is to leverage the Syncing condition to determine the sync status and send out alerts. If the Syncing condition is In this case, if there're errors from the same commit, even if it might be fixed with the new loop, there will be a failure notification. We can include previous errors with the same commit in the ErrorSummary to surface the errors earlier, but can we keep @sdowell FYI There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It's not super clear what the Syncing condition was/is supposed to represent. Internally, the phases are effectively Fetch/Read -> Render/Read -> Parse -> Update (Validate/Apply/Watch/Reconcile). It's confusing to have the This PR effectively assumes that since the source and rendering phases were setting the Syncing condition to True that the Syncing condition must include the entire pipeline, and not just the Update/Apply phase. If that is NOT what we want, then we need to document the proposed change and implement it consistency. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, it's worth noting that the inputs to a sync/apply are not just Git. They also can include Helm values in the spec and other config values, like Git dir, URL, cluster name, etc. So it doesn't make sense to have a unique pipeline execution depend on just a commit. Even just a generation would be technically insufficient, because the previous run may fail, perhaps due to some ephemeral network issue, triggering a retry. Today we don't indicate what generation or attempt/retry the Syncing condition represents, which makes it hard to use for triggering discrete alerts. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The Syncing condition represents the current/latest fetch -> render -> parse -> update pipeline, while It is possible that the Syncing condition only includes status of partial pipeline while the loop is still in progress. For example,
|
||
|
||
reposync.SetSyncing(&rs, syncing, "Source", "Source", newStatus.commit, errorSources, errorSummary, newStatus.lastUpdate) | ||
|
||
// Avoid unnecessary status updates. | ||
if !currentRS.Status.Source.LastUpdate.IsZero() && cmp.Equal(currentRS.Status, rs.Status, compare.IgnoreTimestampUpdates) { | ||
|
@@ -191,7 +193,8 @@ func (p *namespace) setSourceStatusWithRetries(ctx context.Context, newStatus so | |
if err := p.client.Status().Update(ctx, &rs); err != nil { | ||
// If the update failure was caused by the size of the RepoSync object, we would truncate the errors and retry. | ||
if isRequestTooLargeError(err) { | ||
klog.Infof("Failed to update RepoSync source status (total error count: %d, denominator: %d): %s.", rs.Status.Source.ErrorSummary.TotalCount, denominator, err) | ||
klog.Infof("Failed to update RepoSync source status (total error count: %d, denominator: %d): %s.", | ||
syncErrorCount, denominator, err) | ||
return p.setSourceStatusWithRetries(ctx, newStatus, denominator*2) | ||
} | ||
return status.APIServerError(err, "failed to update RepoSync source status from parser") | ||
|
@@ -201,7 +204,7 @@ func (p *namespace) setSourceStatusWithRetries(ctx context.Context, newStatus so | |
|
||
// setRenderingStatus implements the Parser interface | ||
func (p *namespace) setRenderingStatus(ctx context.Context, oldStatus, newStatus renderingStatus) error { | ||
if oldStatus.equal(newStatus) { | ||
if oldStatus.Equal(newStatus) { | ||
return nil | ||
} | ||
|
||
|
@@ -223,13 +226,14 @@ func (p *namespace) setRenderingStatusWithRetires(ctx context.Context, newStatus | |
currentRS := rs.DeepCopy() | ||
|
||
setRenderingStatusFields(&rs.Status.Rendering, p, newStatus, denominator) | ||
errorSources, errorSummary := summarizeErrors(rs.Status.Status, newStatus.commit) | ||
syncErrorCount := totalErrors(errorSummary) | ||
|
||
continueSyncing := (rs.Status.Rendering.ErrorSummary.TotalCount == 0) | ||
var errorSource []v1beta1.ErrorSource | ||
if len(rs.Status.Rendering.Errors) > 0 { | ||
errorSource = []v1beta1.ErrorSource{v1beta1.RenderingError} | ||
} | ||
reposync.SetSyncing(&rs, continueSyncing, "Rendering", newStatus.message, newStatus.commit, errorSource, rs.Status.Rendering.ErrorSummary, newStatus.lastUpdate) | ||
// Syncing is stopped/prevented if there are any errors from the same commit. | ||
// TODO: Handle non-blocking errors | ||
syncing := (syncErrorCount == 0) | ||
|
||
reposync.SetSyncing(&rs, syncing, "Rendering", newStatus.message, newStatus.commit, errorSources, errorSummary, newStatus.lastUpdate) | ||
|
||
// Avoid unnecessary status updates. | ||
if !currentRS.Status.Rendering.LastUpdate.IsZero() && cmp.Equal(currentRS.Status, rs.Status, compare.IgnoreTimestampUpdates) { | ||
|
@@ -253,7 +257,8 @@ func (p *namespace) setRenderingStatusWithRetires(ctx context.Context, newStatus | |
if err := p.client.Status().Update(ctx, &rs); err != nil { | ||
// If the update failure was caused by the size of the RepoSync object, we would truncate the errors and retry. | ||
if isRequestTooLargeError(err) { | ||
klog.Infof("Failed to update RepoSync rendering status (total error count: %d, denominator: %d): %s.", rs.Status.Rendering.ErrorSummary.TotalCount, denominator, err) | ||
klog.Infof("Failed to update RepoSync rendering status (total error count: %d, denominator: %d): %s.", | ||
syncErrorCount, denominator, err) | ||
return p.setRenderingStatusWithRetires(ctx, newStatus, denominator*2) | ||
} | ||
return status.APIServerError(err, "failed to update RepoSync rendering status from parser") | ||
|
@@ -283,16 +288,19 @@ func (p *namespace) setSyncStatusWithRetries(ctx context.Context, newStatus sync | |
currentRS := rs.DeepCopy() | ||
|
||
setSyncStatusFields(&rs.Status.Status, newStatus, denominator) | ||
errorSources, errorSummary := summarizeErrors(rs.Status.Status, newStatus.commit) | ||
syncErrorCount := totalErrors(errorSummary) | ||
|
||
errorSources, errorSummary := summarizeErrors(rs.Status.Source, rs.Status.Sync) | ||
var message string | ||
if newStatus.syncing { | ||
reposync.SetSyncing(rs, true, "Sync", "Syncing", rs.Status.Sync.Commit, errorSources, errorSummary, rs.Status.Sync.LastUpdate) | ||
message = "Syncing" | ||
} else { | ||
if errorSummary.TotalCount == 0 { | ||
message = "Sync Completed" | ||
if syncErrorCount == 0 { | ||
rs.Status.LastSyncedCommit = rs.Status.Sync.Commit | ||
} | ||
reposync.SetSyncing(rs, false, "Sync", "Sync Completed", rs.Status.Sync.Commit, errorSources, errorSummary, rs.Status.Sync.LastUpdate) | ||
} | ||
reposync.SetSyncing(rs, newStatus.syncing, "Sync", message, rs.Status.Sync.Commit, errorSources, errorSummary, rs.Status.Sync.LastUpdate) | ||
|
||
// Avoid unnecessary status updates. | ||
if !currentRS.Status.Sync.LastUpdate.IsZero() && cmp.Equal(currentRS.Status, rs.Status, compare.IgnoreTimestampUpdates) { | ||
|
@@ -312,14 +320,15 @@ func (p *namespace) setSyncStatusWithRetries(ctx context.Context, newStatus sync | |
} | ||
|
||
if klog.V(5).Enabled() { | ||
klog.Infof("Updating status for RepoSync %s/%s:\nDiff (- Expected, + Actual):\n%s", | ||
klog.Infof("Updating sync status for RepoSync %s/%s:\nDiff (- Old, + New):\n%s", | ||
rs.Namespace, rs.Name, cmp.Diff(currentRS.Status, rs.Status)) | ||
} | ||
|
||
if err := p.client.Status().Update(ctx, rs); err != nil { | ||
// If the update failure was caused by the size of the RepoSync object, we would truncate the errors and retry. | ||
if isRequestTooLargeError(err) { | ||
klog.Infof("Failed to update RepoSync sync status (total error count: %d, denominator: %d): %s.", rs.Status.Sync.ErrorSummary.TotalCount, denominator, err) | ||
klog.Infof("Failed to update RepoSync sync status (total error count: %d, denominator: %d): %s.", | ||
syncErrorCount, denominator, err) | ||
return p.setSyncStatusWithRetries(ctx, newStatus, denominator*2) | ||
} | ||
return status.APIServerError(err, fmt.Sprintf("failed to update the RepoSync sync status for the %v namespace", p.scope)) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
and validating
->and validates