Skip to content

Commit

Permalink
pageserver/controller: enable tenant deletion without attachment (#7957)
Browse files Browse the repository at this point in the history
## Problem

As described in #7952, the controller's attempt to reconcile a tenant
before finally deleting it can get hung up waiting for the compute
notification hook to accept updates.

The fact that we try and reconcile a tenant at all during deletion is
part of a more general design issue (#5080), where deletion was
implemented as an operation on attached tenant, requiring the tenant to
be attached in order to delete it, which is not in principle necessary.

Closes: #7952

## Summary of changes

- In the pageserver deletion API, only do the traditional deletion path
if the tenant is attached. If it's secondary, then tear down the
secondary location, and then do a remote delete. If it's not attached at
all, just do the remote delete.
- In the storage controller, instead of ensuring a tenant is attached
before deletion, do a best-effort detach of the tenant, and then call
into some arbitrary pageserver to issue a deletion of remote content.

The pageserver retains its existing delete behavior when invoked on
attached locations. We can remove this later when all users of the API
are updated to either do a detach-before-delete. This will enable
removing the "weird" code paths during startup that sometimes load a
tenant and then immediately delete it, and removing the deletion markers
on tenants.
  • Loading branch information
jcsp committed Jun 5, 2024
1 parent 83ab14e commit 91dd990
Show file tree
Hide file tree
Showing 7 changed files with 312 additions and 117 deletions.
4 changes: 3 additions & 1 deletion pageserver/src/http/openapi_spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,10 @@ paths:
Attempts to delete specified tenant. 500, 503 and 409 errors should be retried until 404 is retrieved.
404 means that deletion successfully finished"
responses:
"200":
description: Tenant was successfully deleted, or was already not found.
"404":
description: Tenant not found. This is the success path.
description: Tenant not found. This is a success result, equivalent to 200.
content:
application/json:
schema:
Expand Down
11 changes: 9 additions & 2 deletions pageserver/src/http/routes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,7 @@ async fn tenant_delete_handler(

let state = get_state(&request);

state
let status = state
.tenant_manager
.delete_tenant(tenant_shard_id, ACTIVE_TENANT_TIMEOUT)
.instrument(info_span!("tenant_delete_handler",
Expand All @@ -1082,7 +1082,14 @@ async fn tenant_delete_handler(
))
.await?;

json_response(StatusCode::ACCEPTED, ())
// Callers use 404 as success for deletions, for historical reasons.
if status == StatusCode::NOT_FOUND {
return Err(ApiError::NotFound(
anyhow::anyhow!("Deletion complete").into(),
));
}

json_response(status, ())
}

/// HTTP endpoint to query the current tenant_size of a tenant.
Expand Down
86 changes: 75 additions & 11 deletions pageserver/src/tenant/mgr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf};
use futures::StreamExt;
use hyper::StatusCode;
use itertools::Itertools;
use pageserver_api::key::Key;
use pageserver_api::models::LocationConfigMode;
Expand Down Expand Up @@ -54,6 +55,7 @@ use utils::generation::Generation;
use utils::id::{TenantId, TimelineId};

use super::delete::DeleteTenantError;
use super::remote_timeline_client::remote_tenant_path;
use super::secondary::SecondaryTenant;
use super::timeline::detach_ancestor::PreparedTimelineDetach;
use super::TenantSharedResources;
Expand Down Expand Up @@ -1369,7 +1371,7 @@ impl TenantManager {
&self,
tenant_shard_id: TenantShardId,
activation_timeout: Duration,
) -> Result<(), DeleteTenantError> {
) -> Result<StatusCode, DeleteTenantError> {
super::span::debug_assert_current_span_has_tenant_id();
// We acquire a SlotGuard during this function to protect against concurrent
// changes while the ::prepare phase of DeleteTenantFlow executes, but then
Expand All @@ -1382,18 +1384,79 @@ impl TenantManager {
//
// See https://github.com/neondatabase/neon/issues/5080

let slot_guard =
tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::MustExist)?;
// Tenant deletion can happen two ways:
// - Legacy: called on an attached location. The attached Tenant object stays alive in Stopping
// state until deletion is complete.
// - New: called on a pageserver without an attached location. We proceed with deletion from
// remote storage.
//
// See https://github.com/neondatabase/neon/issues/5080 for more context on this transition.

// unwrap is safe because we used MustExist mode when acquiring
let tenant = match slot_guard.get_old_value().as_ref().unwrap() {
TenantSlot::Attached(tenant) => tenant.clone(),
_ => {
// Express "not attached" as equivalent to "not found"
return Err(DeleteTenantError::NotAttached);
let slot_guard = tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::Any)?;
match &slot_guard.old_value {
Some(TenantSlot::Attached(tenant)) => {
// Legacy deletion flow: the tenant remains attached, goes to Stopping state, and
// deletion will be resumed across restarts.
let tenant = tenant.clone();
return self
.delete_tenant_attached(slot_guard, tenant, activation_timeout)
.await;
}
Some(TenantSlot::Secondary(secondary_tenant)) => {
secondary_tenant.shutdown().await;
let local_tenant_directory = self.conf.tenant_path(&tenant_shard_id);
let tmp_dir = safe_rename_tenant_dir(&local_tenant_directory)
.await
.with_context(|| {
format!("local tenant directory {local_tenant_directory:?} rename")
})?;
spawn_background_purge(tmp_dir);
}
Some(TenantSlot::InProgress(_)) => unreachable!(),
None => {}
};

// Fall through: local state for this tenant is no longer present, proceed with remote delete
let remote_path = remote_tenant_path(&tenant_shard_id);
let keys = match self
.resources
.remote_storage
.list(
Some(&remote_path),
remote_storage::ListingMode::NoDelimiter,
None,
&self.cancel,
)
.await
{
Ok(listing) => listing.keys,
Err(remote_storage::DownloadError::Cancelled) => {
return Err(DeleteTenantError::Cancelled)
}
Err(remote_storage::DownloadError::NotFound) => return Ok(StatusCode::NOT_FOUND),
Err(other) => return Err(DeleteTenantError::Other(anyhow::anyhow!(other))),
};

if keys.is_empty() {
tracing::info!("Remote storage already deleted");
} else {
tracing::info!("Deleting {} keys from remote storage", keys.len());
self.resources
.remote_storage
.delete_objects(&keys, &self.cancel)
.await?;
}

// Callers use 404 as success for deletions, for historical reasons.
Ok(StatusCode::NOT_FOUND)
}

async fn delete_tenant_attached(
&self,
slot_guard: SlotGuard,
tenant: Arc<Tenant>,
activation_timeout: Duration,
) -> Result<StatusCode, DeleteTenantError> {
match tenant.current_state() {
TenantState::Broken { .. } | TenantState::Stopping { .. } => {
// If deletion is already in progress, return success (the semantics of this
Expand All @@ -1403,7 +1466,7 @@ impl TenantManager {
// The `delete_progress` lock is held: deletion is already happening
// in the bacckground
slot_guard.revert();
return Ok(());
return Ok(StatusCode::ACCEPTED);
}
}
_ => {
Expand Down Expand Up @@ -1436,7 +1499,8 @@ impl TenantManager {

// The Tenant goes back into the map in Stopping state, it will eventually be removed by DeleteTenantFLow
slot_guard.revert();
result
let () = result?;
Ok(StatusCode::ACCEPTED)
}

#[instrument(skip_all, fields(tenant_id=%tenant.get_tenant_shard_id().tenant_id, shard_id=%tenant.get_tenant_shard_id().shard_slug(), new_shard_count=%new_shard_count.literal()))]
Expand Down
109 changes: 56 additions & 53 deletions storage_controller/src/http.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,52 +142,6 @@ async fn handle_tenant_create(
)
}

// For tenant and timeline deletions, which both implement an "initially return 202, then 404 once
// we're done" semantic, we wrap with a retry loop to expose a simpler API upstream. This avoids
// needing to track a "deleting" state for tenants.
async fn deletion_wrapper<R, F>(service: Arc<Service>, f: F) -> Result<Response<Body>, ApiError>
where
R: std::future::Future<Output = Result<StatusCode, ApiError>> + Send + 'static,
F: Fn(Arc<Service>) -> R + Send + Sync + 'static,
{
let started_at = Instant::now();
// To keep deletion reasonably snappy for small tenants, initially check after 1 second if deletion
// completed.
let mut retry_period = Duration::from_secs(1);
// On subsequent retries, wait longer.
let max_retry_period = Duration::from_secs(5);
// Enable callers with a 30 second request timeout to reliably get a response
let max_wait = Duration::from_secs(25);

loop {
let status = f(service.clone()).await?;
match status {
StatusCode::ACCEPTED => {
tracing::info!("Deletion accepted, waiting to try again...");
tokio::time::sleep(retry_period).await;
retry_period = max_retry_period;
}
StatusCode::NOT_FOUND => {
tracing::info!("Deletion complete");
return json_response(StatusCode::OK, ());
}
_ => {
tracing::warn!("Unexpected status {status}");
return json_response(status, ());
}
}

let now = Instant::now();
if now + retry_period > started_at + max_wait {
tracing::info!("Deletion timed out waiting for 404");
// REQUEST_TIMEOUT would be more appropriate, but CONFLICT is already part of
// the pageserver's swagger definition for this endpoint, and has the same desired
// effect of causing the control plane to retry later.
return json_response(StatusCode::CONFLICT, ());
}
}
}

async fn handle_tenant_location_config(
service: Arc<Service>,
mut req: Request<Body>,
Expand Down Expand Up @@ -283,13 +237,17 @@ async fn handle_tenant_delete(
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
check_permissions(&req, Scope::PageServerApi)?;

deletion_wrapper(service, move |service| async move {
service
.tenant_delete(tenant_id)
.await
.and_then(map_reqwest_hyper_status)
})
.await
let status_code = service
.tenant_delete(tenant_id)
.await
.and_then(map_reqwest_hyper_status)?;

if status_code == StatusCode::NOT_FOUND {
// The pageserver uses 404 for successful deletion, but we use 200
json_response(StatusCode::OK, ())
} else {
json_response(status_code, ())
}
}

async fn handle_tenant_timeline_create(
Expand Down Expand Up @@ -317,6 +275,51 @@ async fn handle_tenant_timeline_delete(

let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;

// For timeline deletions, which both implement an "initially return 202, then 404 once
// we're done" semantic, we wrap with a retry loop to expose a simpler API upstream.
async fn deletion_wrapper<R, F>(service: Arc<Service>, f: F) -> Result<Response<Body>, ApiError>
where
R: std::future::Future<Output = Result<StatusCode, ApiError>> + Send + 'static,
F: Fn(Arc<Service>) -> R + Send + Sync + 'static,
{
let started_at = Instant::now();
// To keep deletion reasonably snappy for small tenants, initially check after 1 second if deletion
// completed.
let mut retry_period = Duration::from_secs(1);
// On subsequent retries, wait longer.
let max_retry_period = Duration::from_secs(5);
// Enable callers with a 30 second request timeout to reliably get a response
let max_wait = Duration::from_secs(25);

loop {
let status = f(service.clone()).await?;
match status {
StatusCode::ACCEPTED => {
tracing::info!("Deletion accepted, waiting to try again...");
tokio::time::sleep(retry_period).await;
retry_period = max_retry_period;
}
StatusCode::NOT_FOUND => {
tracing::info!("Deletion complete");
return json_response(StatusCode::OK, ());
}
_ => {
tracing::warn!("Unexpected status {status}");
return json_response(status, ());
}
}

let now = Instant::now();
if now + retry_period > started_at + max_wait {
tracing::info!("Deletion timed out waiting for 404");
// REQUEST_TIMEOUT would be more appropriate, but CONFLICT is already part of
// the pageserver's swagger definition for this endpoint, and has the same desired
// effect of causing the control plane to retry later.
return json_response(StatusCode::CONFLICT, ());
}
}
}

deletion_wrapper(service, move |service| async move {
service
.tenant_timeline_delete(tenant_id, timeline_id)
Expand Down
Loading

1 comment on commit 91dd990

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

3274 tests run: 3122 passed, 0 failed, 152 skipped (full report)


Flaky tests (1)

Postgres 15

  • test_scrubber_tenant_snapshot[4]: release

Code coverage* (full report)

  • functions: 31.5% (6600 of 20927 functions)
  • lines: 48.5% (51069 of 105267 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
91dd990 at 2024-06-05T21:40:26.605Z :recycle:

Please sign in to comment.