From 19accfee4e677ed8fabc4dd1f370389038978499 Mon Sep 17 00:00:00 2001 From: Yuchen Liang <70461588+yliang412@users.noreply.github.com> Date: Thu, 4 Jul 2024 11:09:05 -0400 Subject: [PATCH] feat(pageserver): integrate lsn lease into synthetic size (#8220) Part of #7497, closes #8071. (accidentally closed #8208, reopened here) ## Problem After the changes in #8084, we need synthetic size to also account for leased LSNs so that users do not get free retention by running a small ephemeral endpoint for a long time. ## Summary of changes This PR integrates LSN leases into the synthetic size calculation. We model leases as read-only branches started at the leased LSN (except it does not have a timeline id). Other changes: - Add new unit tests testing whether a lease behaves like a read-only branch. - Change `/size_debug` response to include lease point in the SVG visualization. - Fix `/lsn_lease` HTTP API to do proper parsing for POST. Signed-off-by: Yuchen Liang Co-authored-by: Joonas Koivunen Co-authored-by: Christian Schwarz --- libs/pageserver_api/src/models.rs | 5 ++ libs/tenant_size_model/src/calculation.rs | 4 +- libs/tenant_size_model/src/svg.rs | 36 ++++++++-- pageserver/src/http/openapi_spec.yml | 22 +++--- pageserver/src/http/routes.rs | 18 +++-- pageserver/src/tenant/size.rs | 85 ++++++++++++++++++++-- pageserver/src/tenant/timeline.rs | 9 +++ test_runner/fixtures/pageserver/http.py | 16 +++++ test_runner/regress/test_tenant_size.py | 88 +++++++++++++++++++++++ 9 files changed, 256 insertions(+), 27 deletions(-) diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 92289537613d..ad65602f54d9 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -228,6 +228,11 @@ pub struct TimelineCreateRequest { pub pg_version: Option, } +#[derive(Serialize, Deserialize, Clone)] +pub struct LsnLeaseRequest { + pub lsn: Lsn, +} + #[derive(Serialize, Deserialize)] pub struct TenantShardSplitRequest { pub new_shard_count: u8, diff --git a/libs/tenant_size_model/src/calculation.rs b/libs/tenant_size_model/src/calculation.rs index f05997ee6547..be005622199d 100644 --- a/libs/tenant_size_model/src/calculation.rs +++ b/libs/tenant_size_model/src/calculation.rs @@ -34,10 +34,10 @@ struct SegmentSize { } struct SizeAlternatives { - // cheapest alternative if parent is available. + /// cheapest alternative if parent is available. incremental: SegmentSize, - // cheapest alternative if parent node is not available + /// cheapest alternative if parent node is not available non_incremental: Option, } diff --git a/libs/tenant_size_model/src/svg.rs b/libs/tenant_size_model/src/svg.rs index f26d3aa79d1a..0de2890bb414 100644 --- a/libs/tenant_size_model/src/svg.rs +++ b/libs/tenant_size_model/src/svg.rs @@ -3,10 +3,17 @@ use std::fmt::Write; const SVG_WIDTH: f32 = 500.0; +/// Different branch kind for SVG drawing. +#[derive(PartialEq)] +pub enum SvgBranchKind { + Timeline, + Lease, +} + struct SvgDraw<'a> { storage: &'a StorageModel, branches: &'a [String], - seg_to_branch: &'a [usize], + seg_to_branch: &'a [(usize, SvgBranchKind)], sizes: &'a [SegmentSizeResult], // layout @@ -42,13 +49,18 @@ fn draw_legend(result: &mut String) -> anyhow::Result<()> { "" )?; writeln!(result, "WAL not retained")?; + writeln!( + result, + "" + )?; + writeln!(result, "LSN lease")?; Ok(()) } pub fn draw_svg( storage: &StorageModel, branches: &[String], - seg_to_branch: &[usize], + seg_to_branch: &[(usize, SvgBranchKind)], sizes: &SizeResult, ) -> anyhow::Result { let mut draw = SvgDraw { @@ -100,7 +112,7 @@ impl<'a> SvgDraw<'a> { // Layout the timelines on Y dimension. // TODO - let mut y = 100.0; + let mut y = 120.0; let mut branch_y_coordinates = Vec::new(); for _branch in self.branches { branch_y_coordinates.push(y); @@ -109,7 +121,7 @@ impl<'a> SvgDraw<'a> { // Calculate coordinates for each point let seg_coordinates = std::iter::zip(segments, self.seg_to_branch) - .map(|(seg, branch_id)| { + .map(|(seg, (branch_id, _))| { let x = (seg.lsn - min_lsn) as f32 / xscale; let y = branch_y_coordinates[*branch_id]; (x, y) @@ -175,6 +187,22 @@ impl<'a> SvgDraw<'a> { // draw a snapshot point if it's needed let (coord_x, coord_y) = self.seg_coordinates[seg_id]; + + let (_, kind) = &self.seg_to_branch[seg_id]; + if kind == &SvgBranchKind::Lease { + let (x1, y1) = (coord_x, coord_y - 10.0); + let (x2, y2) = (coord_x, coord_y + 10.0); + + let style = "stroke-width=\"3\" stroke=\"blue\""; + + writeln!( + result, + "", + )?; + writeln!(result, " leased lsn at {}", seg.lsn)?; + writeln!(result, "")?; + } + if self.sizes[seg_id].method == SegmentMethod::SnapshotHere { writeln!( result, diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml index 58ff6e3f83cc..5ba329f05ece 100644 --- a/pageserver/src/http/openapi_spec.yml +++ b/pageserver/src/http/openapi_spec.yml @@ -265,15 +265,19 @@ paths: type: string format: hex post: - description: Obtain lease for the given LSN - parameters: - - name: lsn - in: query - required: true - schema: - type: string - format: hex - description: A LSN to obtain the lease for + description: Obtains a lease for the given LSN. + requestBody: + content: + application/json: + schema: + type: object + required: + - lsn + properties: + lsn: + description: A LSN to obtain the lease for. + type: string + format: hex responses: "200": description: OK diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 6a6f17604dee..893302b7d6d9 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -22,6 +22,7 @@ use pageserver_api::models::ListAuxFilesRequest; use pageserver_api::models::LocationConfig; use pageserver_api::models::LocationConfigListResponse; use pageserver_api::models::LsnLease; +use pageserver_api::models::LsnLeaseRequest; use pageserver_api::models::ShardParameters; use pageserver_api::models::TenantDetails; use pageserver_api::models::TenantLocationConfigResponse; @@ -42,7 +43,7 @@ use pageserver_api::shard::TenantShardId; use remote_storage::DownloadError; use remote_storage::GenericRemoteStorage; use remote_storage::TimeTravelError; -use tenant_size_model::{SizeResult, StorageModel}; +use tenant_size_model::{svg::SvgBranchKind, SizeResult, StorageModel}; use tokio_util::sync::CancellationToken; use tracing::*; use utils::auth::JwtAuth; @@ -1195,10 +1196,15 @@ fn synthetic_size_html_response( timeline_map.insert(ti.timeline_id, index); timeline_ids.push(ti.timeline_id.to_string()); } - let seg_to_branch: Vec = inputs + let seg_to_branch: Vec<(usize, SvgBranchKind)> = inputs .segments .iter() - .map(|seg| *timeline_map.get(&seg.timeline_id).unwrap()) + .map(|seg| { + ( + *timeline_map.get(&seg.timeline_id).unwrap(), + seg.kind.into(), + ) + }) .collect(); let svg = @@ -1531,15 +1537,13 @@ async fn handle_tenant_break( // Obtains an lsn lease on the given timeline. async fn lsn_lease_handler( - request: Request, + mut request: Request, _cancel: CancellationToken, ) -> Result, ApiError> { let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?; let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; check_permission(&request, Some(tenant_shard_id.tenant_id))?; - - let lsn: Lsn = parse_query_param(&request, "lsn")? - .ok_or_else(|| ApiError::BadRequest(anyhow!("missing 'lsn' query parameter")))?; + let lsn = json_request::(&mut request).await?.lsn; let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); diff --git a/pageserver/src/tenant/size.rs b/pageserver/src/tenant/size.rs index b2338b620ebf..23354417e788 100644 --- a/pageserver/src/tenant/size.rs +++ b/pageserver/src/tenant/size.rs @@ -3,6 +3,7 @@ use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet}; use std::sync::Arc; +use tenant_size_model::svg::SvgBranchKind; use tokio::sync::oneshot::error::RecvError; use tokio::sync::Semaphore; use tokio_util::sync::CancellationToken; @@ -87,6 +88,9 @@ impl SegmentMeta { LsnKind::BranchPoint => true, LsnKind::GcCutOff => true, LsnKind::BranchEnd => false, + LsnKind::LeasePoint => true, + LsnKind::LeaseStart => false, + LsnKind::LeaseEnd => false, } } } @@ -103,6 +107,21 @@ pub enum LsnKind { GcCutOff, /// Last record LSN BranchEnd, + /// A LSN lease is granted here. + LeasePoint, + /// A lease starts from here. + LeaseStart, + /// Last record LSN for the lease (should have the same LSN as the previous [`LsnKind::LeaseStart`]). + LeaseEnd, +} + +impl From for SvgBranchKind { + fn from(kind: LsnKind) -> Self { + match kind { + LsnKind::LeasePoint | LsnKind::LeaseStart | LsnKind::LeaseEnd => SvgBranchKind::Lease, + _ => SvgBranchKind::Timeline, + } + } } /// Collect all relevant LSNs to the inputs. These will only be helpful in the serialized form as @@ -124,6 +143,9 @@ pub struct TimelineInputs { /// Cutoff point calculated from the user-supplied 'max_retention_period' retention_param_cutoff: Option, + + /// Lease points on the timeline + lease_points: Vec, } /// Gathers the inputs for the tenant sizing model. @@ -234,6 +256,13 @@ pub(super) async fn gather_inputs( None }; + let lease_points = gc_info + .leases + .keys() + .filter(|&&lsn| lsn > ancestor_lsn) + .copied() + .collect::>(); + // next_gc_cutoff in parent branch are not of interest (right now at least), nor do we // want to query any logical size before initdb_lsn. let branch_start_lsn = cmp::max(ancestor_lsn, timeline.initdb_lsn); @@ -248,6 +277,8 @@ pub(super) async fn gather_inputs( .map(|lsn| (lsn, LsnKind::BranchPoint)) .collect::>(); + lsns.extend(lease_points.iter().map(|&lsn| (lsn, LsnKind::LeasePoint))); + drop(gc_info); // Add branch points we collected earlier, just in case there were any that were @@ -296,6 +327,7 @@ pub(super) async fn gather_inputs( if kind == LsnKind::BranchPoint { branchpoint_segments.insert((timeline_id, lsn), segments.len()); } + segments.push(SegmentMeta { segment: Segment { parent: Some(parent), @@ -306,7 +338,45 @@ pub(super) async fn gather_inputs( timeline_id: timeline.timeline_id, kind, }); - parent += 1; + + parent = segments.len() - 1; + + if kind == LsnKind::LeasePoint { + // Needs `LeaseStart` and `LeaseEnd` as well to model lease as a read-only branch that never writes data + // (i.e. it's lsn has not advanced from ancestor_lsn), and therefore the three segments have the same LSN + // value. Without the other two segments, the calculation code would not count the leased LSN as a point + // to be retained. + // Did not use `BranchStart` or `BranchEnd` so we can differentiate branches and leases during debug. + // + // Alt Design: rewrite the entire calculation code to be independent of timeline id. Both leases and + // branch points can be given a synthetic id so we can unite them. + let mut lease_parent = parent; + + // Start of a lease. + segments.push(SegmentMeta { + segment: Segment { + parent: Some(lease_parent), + lsn: lsn.0, + size: None, // Filled in later, if necessary + needed: lsn > next_gc_cutoff, // only needed if the point is within rentention. + }, + timeline_id: timeline.timeline_id, + kind: LsnKind::LeaseStart, + }); + lease_parent += 1; + + // End of the lease. + segments.push(SegmentMeta { + segment: Segment { + parent: Some(lease_parent), + lsn: lsn.0, + size: None, // Filled in later, if necessary + needed: true, // everything at the lease LSN must be readable => is needed + }, + timeline_id: timeline.timeline_id, + kind: LsnKind::LeaseEnd, + }); + } } // Current end of the timeline @@ -332,6 +402,7 @@ pub(super) async fn gather_inputs( pitr_cutoff, next_gc_cutoff, retention_param_cutoff, + lease_points, }); } @@ -674,7 +745,8 @@ fn verify_size_for_multiple_branches() { "horizon_cutoff": "0/2210CD0", "pitr_cutoff": "0/2210CD0", "next_gc_cutoff": "0/2210CD0", - "retention_param_cutoff": null + "retention_param_cutoff": null, + "lease_points": [] }, { "timeline_id": "454626700469f0a9914949b9d018e876", @@ -684,7 +756,8 @@ fn verify_size_for_multiple_branches() { "horizon_cutoff": "0/1817770", "pitr_cutoff": "0/1817770", "next_gc_cutoff": "0/1817770", - "retention_param_cutoff": null + "retention_param_cutoff": null, + "lease_points": [] }, { "timeline_id": "cb5e3cbe60a4afc00d01880e1a37047f", @@ -694,7 +767,8 @@ fn verify_size_for_multiple_branches() { "horizon_cutoff": "0/18B3D98", "pitr_cutoff": "0/18B3D98", "next_gc_cutoff": "0/18B3D98", - "retention_param_cutoff": null + "retention_param_cutoff": null, + "lease_points": [] } ] } @@ -749,7 +823,8 @@ fn verify_size_for_one_branch() { "horizon_cutoff": "47/240A5860", "pitr_cutoff": "47/240A5860", "next_gc_cutoff": "47/240A5860", - "retention_param_cutoff": "0/0" + "retention_param_cutoff": "0/0", + "lease_points": [] } ] }"#; diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index bbf0d0a4bf68..42e55ab2695c 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -14,6 +14,7 @@ use anyhow::{anyhow, bail, ensure, Context, Result}; use arc_swap::ArcSwap; use bytes::Bytes; use camino::Utf8Path; +use chrono::{DateTime, Utc}; use enumset::EnumSet; use fail::fail_point; use once_cell::sync::Lazy; @@ -1590,7 +1591,13 @@ impl Timeline { let existing_lease = occupied.get_mut(); if valid_until > existing_lease.valid_until { existing_lease.valid_until = valid_until; + let dt: DateTime = valid_until.into(); + info!("lease extended to {}", dt); + } else { + let dt: DateTime = existing_lease.valid_until.into(); + info!("existing lease covers greater length, valid until {}", dt); } + existing_lease.clone() } else { // Reject already GC-ed LSN (lsn < latest_gc_cutoff) @@ -1599,6 +1606,8 @@ impl Timeline { bail!("tried to request a page version that was garbage collected. requested at {} gc cutoff {}", lsn, *latest_gc_cutoff_lsn); } + let dt: DateTime = valid_until.into(); + info!("lease created, valid until {}", dt); entry.or_insert(LsnLease { valid_until }).clone() } }; diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index 3da0be802116..03aee9e5c597 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -599,6 +599,22 @@ def timeline_get_lsn_by_timestamp( res_json = res.json() return res_json + def timeline_lsn_lease( + self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId, lsn: Lsn + ): + data = { + "lsn": str(lsn), + } + + log.info(f"Requesting lsn lease for {lsn=}, {tenant_id=}, {timeline_id=}") + res = self.post( + f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/lsn_lease", + json=data, + ) + self.verbose_error(res) + res_json = res.json() + return res_json + def timeline_get_timestamp_of_lsn( self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId, lsn: Lsn ): diff --git a/test_runner/regress/test_tenant_size.py b/test_runner/regress/test_tenant_size.py index 6c85ddebbcfb..70e8fe67d595 100644 --- a/test_runner/regress/test_tenant_size.py +++ b/test_runner/regress/test_tenant_size.py @@ -10,6 +10,7 @@ Endpoint, NeonEnv, NeonEnvBuilder, + flush_ep_to_pageserver, wait_for_last_flush_lsn, wait_for_wal_insert_lsn, ) @@ -710,3 +711,90 @@ def mask_model_inputs(x): return newlist else: return x + + +@pytest.mark.parametrize("zero_gc", [True, False]) +def test_lsn_lease_size(neon_env_builder: NeonEnvBuilder, test_output_dir: Path, zero_gc: bool): + """ + Compare a LSN lease to a read-only branch for synthetic size calculation. + They should have the same effect. + """ + + conf = { + "pitr_interval": "0s" if zero_gc else "3600s", + "gc_period": "0s", + } + + env = neon_env_builder.init_start(initial_tenant_conf=conf) + + ro_branch_res = insert_with_action( + env, env.initial_tenant, env.initial_timeline, test_output_dir, action="branch" + ) + + tenant, timeline = env.neon_cli.create_tenant(conf=conf) + lease_res = insert_with_action(env, tenant, timeline, test_output_dir, action="lease") + + assert_size_approx_equal(lease_res, ro_branch_res) + + +def insert_with_action( + env: NeonEnv, + tenant: TenantId, + timeline: TimelineId, + test_output_dir: Path, + action: str, +) -> int: + """ + Inserts some data on the timeline, perform an action, and insert more data on the same timeline. + Returns the size at the end of the insertion. + + Valid actions: + - "lease": Acquires a lease. + - "branch": Creates a child branch but never writes to it. + """ + + client = env.pageserver.http_client() + with env.endpoints.create_start("main", tenant_id=tenant) as ep: + initial_size = client.tenant_size(tenant) + log.info(f"initial size: {initial_size}") + + with ep.cursor() as cur: + cur.execute( + "CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" + ) + last_flush_lsn = wait_for_last_flush_lsn(env, ep, tenant, timeline) + + if action == "lease": + res = client.timeline_lsn_lease(tenant, timeline, last_flush_lsn) + log.info(f"result from lsn_lease api: {res}") + elif action == "branch": + ro_branch = env.neon_cli.create_branch( + "ro_branch", tenant_id=tenant, ancestor_start_lsn=last_flush_lsn + ) + log.info(f"{ro_branch=} created") + else: + raise AssertionError("Invalid action type, only `lease` and `branch`are accepted") + + with ep.cursor() as cur: + cur.execute( + "CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" + ) + cur.execute( + "CREATE TABLE t2 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" + ) + cur.execute( + "CREATE TABLE t3 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" + ) + + last_flush_lsn = wait_for_last_flush_lsn(env, ep, tenant, timeline) + + # Avoid flakiness when calculating logical size. + flush_ep_to_pageserver(env, ep, tenant, timeline) + + size_after_action_and_insert = client.tenant_size(tenant) + log.info(f"{size_after_action_and_insert=}") + + size_debug_file = open(test_output_dir / f"size_debug_{action}.html", "w") + size_debug = client.tenant_size_debug(tenant) + size_debug_file.write(size_debug) + return size_after_action_and_insert