Skip to content

Commit

Permalink
pageserver: drop out of secondary download if iteration time has passed
Browse files Browse the repository at this point in the history
  • Loading branch information
jcsp committed Jun 28, 2024
1 parent 32b75e7 commit f3a0728
Showing 1 changed file with 32 additions and 1 deletion.
33 changes: 32 additions & 1 deletion pageserver/src/tenant/secondary/downloader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,9 @@ impl JobGenerator<PendingDownload, RunningDownload, CompleteDownload, DownloadCo
Err(e @ (UpdateError::DownloadError(_) | UpdateError::Other(_))) => {
tracing::error!("Error while downloading tenant: {e}");
},
Err(UpdateError::Restart) => {
tracing::info!("Download reached deadline & will restart to update heatmap")
}
Ok(()) => {}
};

Expand Down Expand Up @@ -452,6 +455,11 @@ struct TenantDownloader<'a> {
/// Errors that may be encountered while updating a tenant
#[derive(thiserror::Error, Debug)]
enum UpdateError {
/// This is not a true failure, but it's how a download indicates that it would like to be restarted by
/// the scheduler, to pick up the latest heatmap
#[error("Reached deadline, restarting downloads")]
Restart,

#[error("No remote data found")]
NoData,
#[error("Insufficient local storage space")]
Expand Down Expand Up @@ -603,6 +611,23 @@ impl<'a> TenantDownloader<'a> {
self.prepare_timelines(&heatmap, heatmap_mtime).await?;
}

// Calculate a deadline for downloads: if downloading takes longer than this, it is useful to drop out and start again,
// so that we are always using reasonably a fresh heatmap. Otherwise, if we had really huge content to download, we might
// spend 10s of minutes downloading layers we don't need.
// (see https://github.com/neondatabase/neon/issues/8182)
let deadline = {
let period = self
.secondary_state
.detail
.lock()
.unwrap()
.last_download
.as_ref()
.map(|d| d.upload_period)
.unwrap_or(DEFAULT_DOWNLOAD_INTERVAL);
Instant::now() + period
};

// Download the layers in the heatmap
for timeline in heatmap.timelines {
let timeline_state = timeline_states
Expand All @@ -618,7 +643,7 @@ impl<'a> TenantDownloader<'a> {
}

let timeline_id = timeline.timeline_id;
self.download_timeline(timeline, timeline_state, ctx)
self.download_timeline(timeline, timeline_state, deadline, ctx)
.instrument(tracing::info_span!(
"secondary_download_timeline",
tenant_id=%tenant_shard_id.tenant_id,
Expand Down Expand Up @@ -831,6 +856,7 @@ impl<'a> TenantDownloader<'a> {
&self,
timeline: HeatMapTimeline,
timeline_state: SecondaryDetailTimeline,
deadline: Instant,
ctx: &RequestContext,
) -> Result<(), UpdateError> {
debug_assert_current_span_has_tenant_and_timeline_id();
Expand All @@ -849,6 +875,11 @@ impl<'a> TenantDownloader<'a> {
return Err(UpdateError::Cancelled);
}

if Instant::now() > deadline {
// We've been running downloads for a while, restart to download latest heatmap.
return Err(UpdateError::Restart);
}

// Existing on-disk layers: just update their access time.
if let Some(on_disk) = timeline_state.on_disk_layers.get(&layer.name) {
tracing::debug!("Layer {} is already on disk", layer.name);
Expand Down

0 comments on commit f3a0728

Please sign in to comment.