Skip to content

Commit

Permalink
control_plane/attachment_service: implement PlacementPolicy::Secondar…
Browse files Browse the repository at this point in the history
…y, configuration updates (#6521)



During onboarding, the control plane may attempt ad-hoc creation of a
secondary location to facilitate live migration. This gives us two
problems to solve:
- Accept 'Secondary' mode in /location_config and use it to put the
tenant into secondary mode on some physical pageserver, then pass
through /tenant/xyz/secondary/download requests
- Create tenants with no generation initially, since the initial
`Secondary` mode call will not provide us a generation.

This PR also fixes modification of a tenant's TenantConf during
/location_conf, which was previously ignored, and refines the flow for
config modification:
- avoid bumping generations when the only reason we're reconciling an
attached location is a config change
- increment TenantState.sequence when spawning a reconciler: usually
schedule() does this, but when we do config changes that doesn't happen,
so without this change waiters would think reconciliation was done
immediately. `sequence` is a bit of a murky thing right now, as it's
dual-purposed for tracking waiters, and for checking if an existing
reconciliation is already making updates to our current sequence. I'll
follow up at some point to clarify it's purpose.
- test config modification at the end of onboarding test
  • Loading branch information
jcsp committed Mar 1, 2024
1 parent ea0d35f commit 20d0939
Show file tree
Hide file tree
Showing 11 changed files with 839 additions and 228 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ALTER TABLE tenant_shards ALTER generation SET NOT NULL;
ALTER TABLE tenant_shards ALTER generation_pageserver SET NOT NULL;
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@


ALTER TABLE tenant_shards ALTER generation DROP NOT NULL;
ALTER TABLE tenant_shards ALTER generation_pageserver DROP NOT NULL;
48 changes: 46 additions & 2 deletions control_plane/attachment_service/src/http.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use crate::reconciler::ReconcileError;
use crate::service::{Service, STARTUP_RECONCILE_TIMEOUT};
use crate::PlacementPolicy;
use hyper::{Body, Request, Response};
use hyper::{StatusCode, Uri};
use pageserver_api::models::{
TenantCreateRequest, TenantLocationConfigRequest, TenantShardSplitRequest,
TenantConfigRequest, TenantCreateRequest, TenantLocationConfigRequest, TenantShardSplitRequest,
TenantTimeTravelRequest, TimelineCreateRequest,
};
use pageserver_api::shard::TenantShardId;
Expand Down Expand Up @@ -117,9 +118,14 @@ async fn handle_tenant_create(
check_permissions(&req, Scope::PageServerApi)?;

let create_req = json_request::<TenantCreateRequest>(&mut req).await?;

// TODO: enable specifying this. Using Single as a default helps legacy tests to work (they
// have no expectation of HA).
let placement_policy = PlacementPolicy::Single;

json_response(
StatusCode::CREATED,
service.tenant_create(create_req).await?,
service.tenant_create(create_req, placement_policy).await?,
)
}

Expand Down Expand Up @@ -185,6 +191,27 @@ async fn handle_tenant_location_config(
)
}

async fn handle_tenant_config_set(
service: Arc<Service>,
mut req: Request<Body>,
) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::PageServerApi)?;

let config_req = json_request::<TenantConfigRequest>(&mut req).await?;

json_response(StatusCode::OK, service.tenant_config_set(config_req).await?)
}

async fn handle_tenant_config_get(
service: Arc<Service>,
req: Request<Body>,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
check_permissions(&req, Scope::PageServerApi)?;

json_response(StatusCode::OK, service.tenant_config_get(tenant_id)?)
}

async fn handle_tenant_time_travel_remote_storage(
service: Arc<Service>,
mut req: Request<Body>,
Expand Down Expand Up @@ -216,7 +243,15 @@ async fn handle_tenant_time_travel_remote_storage(
done_if_after_raw,
)
.await?;
json_response(StatusCode::OK, ())
}

async fn handle_tenant_secondary_download(
service: Arc<Service>,
req: Request<Body>,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
service.tenant_secondary_download(tenant_id).await?;
json_response(StatusCode::OK, ())
}

Expand Down Expand Up @@ -551,12 +586,21 @@ pub fn make_router(
.delete("/v1/tenant/:tenant_id", |r| {
tenant_service_handler(r, handle_tenant_delete)
})
.put("/v1/tenant/config", |r| {
tenant_service_handler(r, handle_tenant_config_set)
})
.get("/v1/tenant/:tenant_id/config", |r| {
tenant_service_handler(r, handle_tenant_config_get)
})
.put("/v1/tenant/:tenant_id/location_config", |r| {
tenant_service_handler(r, handle_tenant_location_config)
})
.put("/v1/tenant/:tenant_id/time_travel_remote_storage", |r| {
tenant_service_handler(r, handle_tenant_time_travel_remote_storage)
})
.post("/v1/tenant/:tenant_id/secondary/download", |r| {
tenant_service_handler(r, handle_tenant_secondary_download)
})
// Timeline operations
.delete("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
tenant_service_handler(r, handle_tenant_timeline_delete)
Expand Down
10 changes: 8 additions & 2 deletions control_plane/attachment_service/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,20 @@ mod schema;
pub mod service;
mod tenant_state;

#[derive(Clone, Serialize, Deserialize, Debug)]
#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)]
enum PlacementPolicy {
/// Cheapest way to attach a tenant: just one pageserver, no secondary
Single,
/// Production-ready way to attach a tenant: one attached pageserver and
/// some number of secondaries.
Double(usize),
/// Do not attach to any pageservers
/// Create one secondary mode locations. This is useful when onboarding
/// a tenant, or for an idle tenant that we might want to bring online quickly.
Secondary,

/// Do not attach to any pageservers. This is appropriate for tenants that
/// have been idle for a long time, where we do not mind some delay in making
/// them available in future.
Detached,
}

Expand Down
101 changes: 95 additions & 6 deletions control_plane/attachment_service/src/persistence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,15 @@ impl Persistence {
shard_number: ShardNumber(tsp.shard_number as u8),
shard_count: ShardCount::new(tsp.shard_count as u8),
};
result.insert(tenant_shard_id, Generation::new(tsp.generation as u32));

let Some(g) = tsp.generation else {
// If the generation_pageserver column was non-NULL, then the generation column should also be non-NULL:
// we only set generation_pageserver when setting generation.
return Err(DatabaseError::Logical(
"Generation should always be set after incrementing".to_string(),
));
};
result.insert(tenant_shard_id, Generation::new(g as u32));
}

Ok(result)
Expand Down Expand Up @@ -366,7 +374,85 @@ impl Persistence {
})
.await?;

Ok(Generation::new(updated.generation as u32))
// Generation is always non-null in the rseult: if the generation column had been NULL, then we
// should have experienced an SQL Confilict error while executing a query that tries to increment it.
debug_assert!(updated.generation.is_some());
let Some(g) = updated.generation else {
return Err(DatabaseError::Logical(
"Generation should always be set after incrementing".to_string(),
)
.into());
};

Ok(Generation::new(g as u32))
}

/// For use when updating a persistent property of a tenant, such as its config or placement_policy.
///
/// Do not use this for settting generation, unless in the special onboarding code path (/location_config)
/// API: use [`Self::increment_generation`] instead. Setting the generation via this route is a one-time thing
/// that we only do the first time a tenant is set to an attached policy via /location_config.
pub(crate) async fn update_tenant_shard(
&self,
tenant_shard_id: TenantShardId,
input_placement_policy: PlacementPolicy,
input_config: TenantConfig,
input_generation: Option<Generation>,
) -> DatabaseResult<()> {
use crate::schema::tenant_shards::dsl::*;

self.with_conn(move |conn| {
let query = diesel::update(tenant_shards)
.filter(tenant_id.eq(tenant_shard_id.tenant_id.to_string()))
.filter(shard_number.eq(tenant_shard_id.shard_number.0 as i32))
.filter(shard_count.eq(tenant_shard_id.shard_count.literal() as i32));

if let Some(input_generation) = input_generation {
// Update includes generation column
query
.set((
generation.eq(Some(input_generation.into().unwrap() as i32)),
placement_policy
.eq(serde_json::to_string(&input_placement_policy).unwrap()),
config.eq(serde_json::to_string(&input_config).unwrap()),
))
.execute(conn)?;
} else {
// Update does not include generation column
query
.set((
placement_policy
.eq(serde_json::to_string(&input_placement_policy).unwrap()),
config.eq(serde_json::to_string(&input_config).unwrap()),
))
.execute(conn)?;
}

Ok(())
})
.await?;

Ok(())
}

pub(crate) async fn update_tenant_config(
&self,
input_tenant_id: TenantId,
input_config: TenantConfig,
) -> DatabaseResult<()> {
use crate::schema::tenant_shards::dsl::*;

self.with_conn(move |conn| {
diesel::update(tenant_shards)
.filter(tenant_id.eq(input_tenant_id.to_string()))
.set((config.eq(serde_json::to_string(&input_config).unwrap()),))
.execute(conn)?;

Ok(())
})
.await?;

Ok(())
}

pub(crate) async fn detach(&self, tenant_shard_id: TenantShardId) -> anyhow::Result<()> {
Expand All @@ -377,7 +463,7 @@ impl Persistence {
.filter(shard_number.eq(tenant_shard_id.shard_number.0 as i32))
.filter(shard_count.eq(tenant_shard_id.shard_count.literal() as i32))
.set((
generation_pageserver.eq(i64::MAX),
generation_pageserver.eq(Option::<i64>::None),
placement_policy.eq(serde_json::to_string(&PlacementPolicy::Detached).unwrap()),
))
.execute(conn)?;
Expand Down Expand Up @@ -503,12 +589,15 @@ pub(crate) struct TenantShardPersistence {
pub(crate) shard_stripe_size: i32,

// Latest generation number: next time we attach, increment this
// and use the incremented number when attaching
pub(crate) generation: i32,
// and use the incremented number when attaching.
//
// Generation is only None when first onboarding a tenant, where it may
// be in PlacementPolicy::Secondary and therefore have no valid generation state.
pub(crate) generation: Option<i32>,

// Currently attached pageserver
#[serde(rename = "pageserver")]
pub(crate) generation_pageserver: i64,
pub(crate) generation_pageserver: Option<i64>,

#[serde(default)]
pub(crate) placement_policy: String,
Expand Down
73 changes: 57 additions & 16 deletions control_plane/attachment_service/src/reconciler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ pub(super) struct Reconciler {
/// of a tenant's state from when we spawned a reconcile task.
pub(super) tenant_shard_id: TenantShardId,
pub(crate) shard: ShardIdentity,
pub(crate) generation: Generation,
pub(crate) generation: Option<Generation>,
pub(crate) intent: TargetState,
pub(crate) config: TenantConfig,
pub(crate) observed: ObservedState,
Expand Down Expand Up @@ -312,7 +312,7 @@ impl Reconciler {
&self.shard,
&self.config,
LocationConfigMode::AttachedStale,
Some(self.generation),
self.generation,
None,
);
self.location_config(origin_ps_id, stale_conf, Some(Duration::from_secs(10)))
Expand All @@ -335,16 +335,17 @@ impl Reconciler {
}

// Increment generation before attaching to new pageserver
self.generation = self
.persistence
.increment_generation(self.tenant_shard_id, dest_ps_id)
.await?;
self.generation = Some(
self.persistence
.increment_generation(self.tenant_shard_id, dest_ps_id)
.await?,
);

let dest_conf = build_location_config(
&self.shard,
&self.config,
LocationConfigMode::AttachedMulti,
Some(self.generation),
self.generation,
None,
);

Expand Down Expand Up @@ -401,7 +402,7 @@ impl Reconciler {
&self.shard,
&self.config,
LocationConfigMode::AttachedSingle,
Some(self.generation),
self.generation,
None,
);
self.location_config(dest_ps_id, dest_final_conf.clone(), None)
Expand Down Expand Up @@ -433,22 +434,62 @@ impl Reconciler {

// If the attached pageserver is not attached, do so now.
if let Some(node_id) = self.intent.attached {
let mut wanted_conf =
attached_location_conf(self.generation, &self.shard, &self.config);
// If we are in an attached policy, then generation must have been set (null generations
// are only present when a tenant is initially loaded with a secondary policy)
debug_assert!(self.generation.is_some());
let Some(generation) = self.generation else {
return Err(ReconcileError::Other(anyhow::anyhow!(
"Attempted to attach with NULL generation"
)));
};

let mut wanted_conf = attached_location_conf(generation, &self.shard, &self.config);
match self.observed.locations.get(&node_id) {
Some(conf) if conf.conf.as_ref() == Some(&wanted_conf) => {
// Nothing to do
tracing::info!(%node_id, "Observed configuration already correct.")
}
_ => {
observed => {
// In all cases other than a matching observed configuration, we will
// reconcile this location. This includes locations with different configurations, as well
// as locations with unknown (None) observed state.
self.generation = self
.persistence
.increment_generation(self.tenant_shard_id, node_id)
.await?;
wanted_conf.generation = self.generation.into();

// The general case is to increment the generation. However, there are cases
// where this is not necessary:
// - if we are only updating the TenantConf part of the location
// - if we are only changing the attachment mode (e.g. going to attachedmulti or attachedstale)
// and the location was already in the correct generation
let increment_generation = match observed {
None => true,
Some(ObservedStateLocation { conf: None }) => true,
Some(ObservedStateLocation {
conf: Some(observed),
}) => {
let generations_match = observed.generation == wanted_conf.generation;

use LocationConfigMode::*;
let mode_transition_requires_gen_inc =
match (observed.mode, wanted_conf.mode) {
// Usually the short-lived attachment modes (multi and stale) are only used
// in the case of [`Self::live_migrate`], but it is simple to handle them correctly
// here too. Locations are allowed to go Single->Stale and Multi->Single within the same generation.
(AttachedSingle, AttachedStale) => false,
(AttachedMulti, AttachedSingle) => false,
(lhs, rhs) => lhs != rhs,
};

!generations_match || mode_transition_requires_gen_inc
}
};

if increment_generation {
let generation = self
.persistence
.increment_generation(self.tenant_shard_id, node_id)
.await?;
self.generation = Some(generation);
wanted_conf.generation = generation.into();
}
tracing::info!(%node_id, "Observed configuration requires update.");
self.location_config(node_id, wanted_conf, None).await?;
self.compute_notify().await?;
Expand Down
4 changes: 2 additions & 2 deletions control_plane/attachment_service/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ diesel::table! {
shard_number -> Int4,
shard_count -> Int4,
shard_stripe_size -> Int4,
generation -> Int4,
generation_pageserver -> Int8,
generation -> Nullable<Int4>,
generation_pageserver -> Nullable<Int8>,
placement_policy -> Varchar,
splitting -> Int2,
config -> Text,
Expand Down
Loading

1 comment on commit 20d0939

@github-actions
Copy link

@github-actions github-actions bot commented on 20d0939 Mar 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2561 tests run: 2426 passed, 1 failed, 134 skipped (full report)


Failures on Postgres 14

  • test_basebackup_with_high_slru_count[github-actions-selfhosted-sequential-10-13-30]: release
# Run all failed tests locally:
scripts/pytest -vv -n $(nproc) -k "test_basebackup_with_high_slru_count[release-pg14-github-actions-selfhosted-sequential-10-13-30]"

Code coverage* (full report)

  • functions: 28.7% (6932 of 24161 functions)
  • lines: 47.2% (42560 of 90170 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
20d0939 at 2024-03-04T10:52:42.321Z :recycle:

Please sign in to comment.