From 10c46db5e4c625988614022a36c6e2c0c470b8af Mon Sep 17 00:00:00 2001 From: Yuchen Liang Date: Fri, 28 Jun 2024 18:13:02 -0400 Subject: [PATCH 1/4] feat: integrate lsn lease into synthetic size Signed-off-by: Yuchen Liang --- libs/pageserver_api/src/models.rs | 5 + libs/tenant_size_model/src/calculation.rs | 4 +- libs/tenant_size_model/src/svg.rs | 40 +++- libs/tenant_size_model/tests/tests.rs | 214 ++++++++++++++++++++++ pageserver/src/http/routes.rs | 17 +- pageserver/src/tenant/size.rs | 73 +++++++- test_runner/fixtures/pageserver/http.py | 16 ++ test_runner/regress/test_lsn_lease.py | 79 ++++++++ test_runner/regress/test_tenant_size.py | 100 ++++++++++ 9 files changed, 529 insertions(+), 19 deletions(-) create mode 100644 test_runner/regress/test_lsn_lease.py diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 3db75b7d0e39..ed1df8517d46 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -229,6 +229,11 @@ pub struct TimelineCreateRequest { pub pg_version: Option, } +#[derive(Serialize, Deserialize, Clone)] +pub struct LsnLeaseRequest { + pub lsn: Lsn, +} + #[derive(Serialize, Deserialize)] pub struct TenantShardSplitRequest { pub new_shard_count: u8, diff --git a/libs/tenant_size_model/src/calculation.rs b/libs/tenant_size_model/src/calculation.rs index f05997ee6547..be005622199d 100644 --- a/libs/tenant_size_model/src/calculation.rs +++ b/libs/tenant_size_model/src/calculation.rs @@ -34,10 +34,10 @@ struct SegmentSize { } struct SizeAlternatives { - // cheapest alternative if parent is available. + /// cheapest alternative if parent is available. incremental: SegmentSize, - // cheapest alternative if parent node is not available + /// cheapest alternative if parent node is not available non_incremental: Option, } diff --git a/libs/tenant_size_model/src/svg.rs b/libs/tenant_size_model/src/svg.rs index f26d3aa79d1a..4f442cd47b4b 100644 --- a/libs/tenant_size_model/src/svg.rs +++ b/libs/tenant_size_model/src/svg.rs @@ -6,13 +6,13 @@ const SVG_WIDTH: f32 = 500.0; struct SvgDraw<'a> { storage: &'a StorageModel, branches: &'a [String], - seg_to_branch: &'a [usize], + seg_to_branch: &'a [(usize, bool)], sizes: &'a [SegmentSizeResult], // layout xscale: f32, min_lsn: u64, - seg_coordinates: Vec<(f32, f32)>, + seg_coordinates: Vec<(f32, f32, bool)>, } fn draw_legend(result: &mut String) -> anyhow::Result<()> { @@ -42,13 +42,18 @@ fn draw_legend(result: &mut String) -> anyhow::Result<()> { "" )?; writeln!(result, "WAL not retained")?; + writeln!( + result, + "" + )?; + writeln!(result, "LSN lease")?; Ok(()) } pub fn draw_svg( storage: &StorageModel, branches: &[String], - seg_to_branch: &[usize], + seg_to_branch: &[(usize, bool)], sizes: &SizeResult, ) -> anyhow::Result { let mut draw = SvgDraw { @@ -100,7 +105,7 @@ impl<'a> SvgDraw<'a> { // Layout the timelines on Y dimension. // TODO - let mut y = 100.0; + let mut y = 120.0; let mut branch_y_coordinates = Vec::new(); for _branch in self.branches { branch_y_coordinates.push(y); @@ -109,10 +114,10 @@ impl<'a> SvgDraw<'a> { // Calculate coordinates for each point let seg_coordinates = std::iter::zip(segments, self.seg_to_branch) - .map(|(seg, branch_id)| { + .map(|(seg, (branch_id, is_lease_point))| { let x = (seg.lsn - min_lsn) as f32 / xscale; let y = branch_y_coordinates[*branch_id]; - (x, y) + (x, y, *is_lease_point) }) .collect(); @@ -140,8 +145,8 @@ impl<'a> SvgDraw<'a> { SegmentMethod::Skipped => "stroke-width=\"1\" stroke=\"gray\"", }; if let Some(parent_id) = seg.parent { - let (x1, y1) = self.seg_coordinates[parent_id]; - let (x2, y2) = self.seg_coordinates[seg_id]; + let (x1, y1, _) = self.seg_coordinates[parent_id]; + let (x2, y2, _) = self.seg_coordinates[seg_id]; writeln!( result, @@ -154,7 +159,7 @@ impl<'a> SvgDraw<'a> { writeln!(result, "")?; } else { // draw a little dash to mark the starting point of this branch - let (x, y) = self.seg_coordinates[seg_id]; + let (x, y, _) = self.seg_coordinates[seg_id]; let (x1, y1) = (x, y - 5.0); let (x2, y2) = (x, y + 5.0); @@ -174,7 +179,22 @@ impl<'a> SvgDraw<'a> { let seg = &self.storage.segments[seg_id]; // draw a snapshot point if it's needed - let (coord_x, coord_y) = self.seg_coordinates[seg_id]; + let (coord_x, coord_y, is_lease_point) = self.seg_coordinates[seg_id]; + + if is_lease_point { + let (x1, y1) = (coord_x, coord_y - 10.0); + let (x2, y2) = (coord_x, coord_y + 10.0); + + let style = "stroke-width=\"3\" stroke=\"blue\""; + + writeln!( + result, + "", + )?; + writeln!(result, " leased lsn at {}", seg.lsn)?; + writeln!(result, "")?; + } + if self.sizes[seg_id].method == SegmentMethod::SnapshotHere { writeln!( result, diff --git a/libs/tenant_size_model/tests/tests.rs b/libs/tenant_size_model/tests/tests.rs index 0ffea0f2cdc6..dd9d6622a796 100644 --- a/libs/tenant_size_model/tests/tests.rs +++ b/libs/tenant_size_model/tests/tests.rs @@ -93,6 +93,14 @@ impl ScenarioBuilder { let storage_model = StorageModel { segments: self.segments.clone(), }; + + let segs = storage_model + .segments + .iter() + .enumerate() + .collect::>(); + eprintln!("segs before: {:#?}", segs); + let size_result = storage_model.calculate(); (storage_model, size_result) } @@ -160,6 +168,212 @@ fn scenario_2() { assert_eq!(result.total_size, 5000 + 1000 + 1000); } +// Main branch only. Some updates on it. +#[test] +fn scenario_ro_branch() { + const MB: u64 = 1; + const GB: u64 = 1_000; + const TB: u64 = 1_000_000; + + // Create main branch + let mut scenario = ScenarioBuilder::new("main"); + + scenario.modify_branch("main", 4 * TB, 600 * GB as i64); + + // Branch + scenario.branch("main", "child"); + scenario.modify_branch("child", 0, 0); + scenario.modify_branch("child", 0, 0); + + // 1TB snapshot at GC cutoff + scenario.modify_branch("main", 2 * TB, 400 * GB as i64); + + scenario.modify_branch("main", 1 * MB, 0); + + let (_model, result) = scenario.calculate(2 * TB + 1 * MB); + + let segs = _model + .segments + .iter() + .zip(result.segments) + .enumerate() + .collect::>(); + eprintln!("segs size result: {:#?}", segs); + assert_eq!(result.total_size, 1 * TB + 1 * MB + 600 * GB); +} + +#[test] +fn scenario_handcraft_lease_before_gc() { + const MB: u64 = 1; + const GB: u64 = 1_000; + const TB: u64 = 1_000_000; + + let mut segs = Vec::new(); + let mut lsn = 0; + let mut size = Some(0); + let needed = false; + { + let main_start = Segment { + parent: None, + lsn, + size, + needed, + }; + segs.push(main_start); + } + { + lsn += 4 * TB; + size = size.map(|n| n + 600 * GB); + let branchpoint = Segment { + parent: Some(0), + lsn, + size, + needed, + }; + segs.push(branchpoint); + } + { + let child_start = Segment { + parent: Some(1), + lsn, + size, + needed, + }; + + segs.push(child_start); + + let child_end = Segment { + parent: Some(2), + lsn, + size, + needed: true, + }; + + segs.push(child_end); + } + { + lsn += 3 * MB; + size = size.map(|n| n + 100 * GB); + let gc_cutoff = Segment { + parent: Some(1), + lsn, + size, + needed, + }; + + segs.push(gc_cutoff); + } + { + lsn += 1 * MB; + let main_end = Segment { + parent: Some(4), + lsn, + size, + needed: true, + }; + segs.push(main_end); + } + + let model = StorageModel { + segments: segs.clone(), + }; + let res = model.calculate(); + println!("total: {}", res.total_size); + let seg_res = segs + .iter() + .zip(res.segments) + .enumerate() + .collect::>(); + println!("seg_results: {:#?}", seg_res); + assert!(false); +} + +#[test] +fn scenario_handcraft_lease_after_gc() { + const MB: u64 = 1; + const GB: u64 = 1_000; + const TB: u64 = 1_000_000; + + let mut segs = Vec::new(); + let mut lsn = 0; + let mut size = Some(0); + let needed = false; + { + let main_start = Segment { + parent: None, + lsn, + size, + needed, + }; + segs.push(main_start); + } + { + lsn += 4 * TB + 3 * MB; + size = Some(700 * GB); + let gc_cutoff = Segment { + parent: Some(0), + lsn, + size, + needed, + }; + + segs.push(gc_cutoff); + } + { + lsn += 1 * MB; + let branchpoint = Segment { + parent: Some(1), + lsn, + size, + needed: true, + }; + segs.push(branchpoint); + } + { + let child_start = Segment { + parent: Some(2), + lsn, + size, + needed, + }; + + segs.push(child_start); + + let child_end = Segment { + parent: Some(3), + lsn, + size, + needed: true, + }; + + segs.push(child_end); + } + + { + lsn += 1 * MB; + let main_end = Segment { + parent: Some(2), + lsn, + size, + needed: true, + }; + segs.push(main_end); + } + + let model = StorageModel { + segments: segs.clone(), + }; + let res = model.calculate(); + println!("total: {}", res.total_size); + let seg_res = segs + .iter() + .zip(res.segments) + .enumerate() + .collect::>(); + println!("seg_results: {:#?}", seg_res); + assert!(false); +} + // Like 2, but more updates on main #[test] fn scenario_3() { diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 450f89820e5b..8c1690fa0a2e 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -22,6 +22,7 @@ use pageserver_api::models::ListAuxFilesRequest; use pageserver_api::models::LocationConfig; use pageserver_api::models::LocationConfigListResponse; use pageserver_api::models::LsnLease; +use pageserver_api::models::LsnLeaseRequest; use pageserver_api::models::ShardParameters; use pageserver_api::models::TenantDetails; use pageserver_api::models::TenantLocationConfigResponse; @@ -71,6 +72,7 @@ use crate::tenant::remote_timeline_client::download_index_part; use crate::tenant::remote_timeline_client::list_remote_tenant_shards; use crate::tenant::remote_timeline_client::list_remote_timelines; use crate::tenant::secondary::SecondaryController; +use crate::tenant::size::LsnKind; use crate::tenant::size::ModelInputs; use crate::tenant::storage_layer::LayerAccessStatsReset; use crate::tenant::storage_layer::LayerName; @@ -1263,10 +1265,15 @@ fn synthetic_size_html_response( timeline_map.insert(ti.timeline_id, index); timeline_ids.push(ti.timeline_id.to_string()); } - let seg_to_branch: Vec = inputs + let seg_to_branch: Vec<(usize, bool)> = inputs .segments .iter() - .map(|seg| *timeline_map.get(&seg.timeline_id).unwrap()) + .map(|seg| { + ( + *timeline_map.get(&seg.timeline_id).unwrap(), + seg.kind == LsnKind::LeasePoint, + ) + }) .collect(); let svg = @@ -1668,15 +1675,13 @@ async fn handle_tenant_break( // Obtains an lsn lease on the given timeline. async fn lsn_lease_handler( - request: Request, + mut request: Request, _cancel: CancellationToken, ) -> Result, ApiError> { let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?; let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; check_permission(&request, Some(tenant_shard_id.tenant_id))?; - - let lsn: Lsn = parse_query_param(&request, "lsn")? - .ok_or_else(|| ApiError::BadRequest(anyhow!("missing 'lsn' query parameter")))?; + let lsn = json_request::(&mut request).await?.lsn; let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); diff --git a/pageserver/src/tenant/size.rs b/pageserver/src/tenant/size.rs index b2338b620ebf..0c640cc58c2b 100644 --- a/pageserver/src/tenant/size.rs +++ b/pageserver/src/tenant/size.rs @@ -87,6 +87,9 @@ impl SegmentMeta { LsnKind::BranchPoint => true, LsnKind::GcCutOff => true, LsnKind::BranchEnd => false, + LsnKind::LeasePoint => true, + LsnKind::LeaseStart => false, + LsnKind::LeaseEnd => false, } } } @@ -103,6 +106,38 @@ pub enum LsnKind { GcCutOff, /// Last record LSN BranchEnd, + LeasePoint, + LeaseStart, + LeaseEnd, +} + +#[derive(Debug, Copy, Clone, Hash, Default, PartialEq, Eq)] +pub struct FakeTimelineId(u64); + +#[derive(Default, Debug)] +pub struct FakeTimelineIdMap { + next_id: FakeTimelineId, + map: HashMap>, +} + +impl FakeTimelineIdMap { + pub fn new() -> Self { + Self { + next_id: FakeTimelineId(0), + ..Default::default() + } + } + + pub fn insert(&mut self, timeline_id: Option) -> FakeTimelineId { + let res = self.next_id; + self.map.insert(res, timeline_id); + self.next_id.0 += 1; + res + } + + pub fn get(&self, fake_id: &FakeTimelineId) -> Option> { + self.map.get(fake_id).copied() + } } /// Collect all relevant LSNs to the inputs. These will only be helpful in the serialized form as @@ -248,6 +283,15 @@ pub(super) async fn gather_inputs( .map(|lsn| (lsn, LsnKind::BranchPoint)) .collect::>(); + lsns.extend( + gc_info + .leases + .keys() + .filter(|&&lsn| lsn > ancestor_lsn) + .copied() + .map(|lsn| (lsn, LsnKind::LeasePoint)), + ); + drop(gc_info); // Add branch points we collected earlier, just in case there were any that were @@ -296,6 +340,7 @@ pub(super) async fn gather_inputs( if kind == LsnKind::BranchPoint { branchpoint_segments.insert((timeline_id, lsn), segments.len()); } + segments.push(SegmentMeta { segment: Segment { parent: Some(parent), @@ -306,7 +351,33 @@ pub(super) async fn gather_inputs( timeline_id: timeline.timeline_id, kind, }); - parent += 1; + + parent = segments.len() - 1; + + if kind == LsnKind::LeasePoint { + let mut lease_parent = parent; + segments.push(SegmentMeta { + segment: Segment { + parent: Some(lease_parent), + lsn: lsn.0, + size: None, + needed: next_gc_cutoff <= lsn, + }, + timeline_id: timeline.timeline_id, + kind: LsnKind::LeaseStart, + }); + lease_parent += 1; + segments.push(SegmentMeta { + segment: Segment { + parent: Some(lease_parent), + lsn: lsn.0, + size: None, + needed: true, + }, + timeline_id: timeline.timeline_id, + kind: LsnKind::LeaseEnd, + }); + } } // Current end of the timeline diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index 64c7ddee6c8c..fe3205bfe7c0 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -634,6 +634,22 @@ def timeline_get_lsn_by_timestamp( res_json = res.json() return res_json + def timeline_lsn_lease( + self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId, lsn: Lsn + ): + data = { + "lsn": str(lsn), + } + + log.info(f"Requesting lsn lease for {lsn=}, {tenant_id=}, {timeline_id=}") + res = self.post( + f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/lsn_lease", + json=data, + ) + self.verbose_error(res) + res_json = res.json() + return res_json + def timeline_get_timestamp_of_lsn( self, tenant_id: Union[TenantId, TenantShardId], timeline_id: TimelineId, lsn: Lsn ): diff --git a/test_runner/regress/test_lsn_lease.py b/test_runner/regress/test_lsn_lease.py new file mode 100644 index 000000000000..342ff5542cac --- /dev/null +++ b/test_runner/regress/test_lsn_lease.py @@ -0,0 +1,79 @@ +# from pathlib import Path + +# from fixtures.common_types import TenantId, TimelineId +# from fixtures.log_helper import log +# from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, wait_for_last_flush_lsn + + +# # def test_lsn_lease_size_zero_gc(neon_env_builder: NeonEnvBuilder, test_output_dir: Path): +# # conf = {"pitr_interval": "0s"} +# # env = neon_env_builder.init_start(initial_tenant_conf=conf) +# # lease_res = insert_and_acquire_lease(env, env.initial_tenant, env.initial_timeline, test_output_dir) + +# # tenant, timeline = env.neon_cli.create_tenant(conf=conf) +# # ro_branch_res = insert_and_create_ro_branch( +# # env, +# # tenant, +# # timeline, +# # ) + +# # for (l, r) in zip(lease_res, ro_branch_res): + + +# def test_lsn_lease_api_zero_cost(neon_env_builder: NeonEnvBuilder, test_output_dir: Path): +# env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "3600s"}) + +# client = env.pageserver.http_client() +# with env.endpoints.create_start("main") as ep: +# initial_size = client.tenant_size(env.initial_tenant) +# log.info(f"initial size: {initial_size}") + +# with ep.cursor() as cur: +# cur.execute( +# "CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" +# ) +# last_flush_lsn = wait_for_last_flush_lsn(env, ep, env.initial_tenant, env.initial_timeline) +# res = client.timeline_lsn_lease(env.initial_tenant, env.initial_timeline, last_flush_lsn) +# log.info(f"result from lsn_lease api: {res}") + +# with ep.cursor() as cur: +# cur.execute( +# "CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" +# ) +# wait_for_last_flush_lsn(env, ep, env.initial_tenant, env.initial_timeline) +# size_after_lease_and_insert = client.tenant_size(env.initial_tenant) +# log.info(f"size_after_lease_and_insert: {size_after_lease_and_insert}") + +# size_debug_file = open(test_output_dir / "size_debug.html", "w") +# size_debug = client.tenant_size_debug(env.initial_tenant) +# size_debug_file.write(size_debug) + + +# def test_lsn_lease_api_zero_cost_compare(neon_env_builder: NeonEnvBuilder, test_output_dir: Path): +# env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "3600s"}) + +# client = env.pageserver.http_client() +# with env.endpoints.create_start("main") as ep: +# initial_size = client.tenant_size(env.initial_tenant) +# log.info(f"initial size: {initial_size}") + +# with ep.cursor() as cur: +# cur.execute( +# "CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" +# ) +# last_flush_lsn = wait_for_last_flush_lsn(env, ep, env.initial_tenant, env.initial_timeline) +# static_branch = env.neon_cli.create_branch("static") +# # res = client.timeline_lsn_lease(env.initial_tenant, env.initial_timeline, last_flush_lsn) +# log.info(f"{static_branch=}") + +# with ep.cursor() as cur: +# cur.execute( +# "CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" +# ) +# wait_for_last_flush_lsn(env, ep, env.initial_tenant, env.initial_timeline) +# size_after_lease_and_insert = client.tenant_size(env.initial_tenant) +# log.info(f"size_after_lease_and_insert: {size_after_lease_and_insert}") + +# size_debug_file = open(test_output_dir / "size_debug.html", "w") +# size_debug = client.tenant_size_debug(env.initial_tenant) +# size_debug_file.write(size_debug) diff --git a/test_runner/regress/test_tenant_size.py b/test_runner/regress/test_tenant_size.py index 6c85ddebbcfb..ba4386d9387a 100644 --- a/test_runner/regress/test_tenant_size.py +++ b/test_runner/regress/test_tenant_size.py @@ -710,3 +710,103 @@ def mask_model_inputs(x): return newlist else: return x + + +@pytest.mark.parametrize("zero_gc", [True, False]) +def test_lsn_lease_size(neon_env_builder: NeonEnvBuilder, test_output_dir: Path, zero_gc: bool): + """ + Compare a LSN lease to a read-only branch for synthetic size calculation. + They should have the same effect. + """ + + conf = {"pitr_interval": "0s"} if zero_gc else {"pitr_interval": "3600s"} + + env = neon_env_builder.init_start(initial_tenant_conf=conf) + lease_res = insert_and_acquire_lease( + env, env.initial_tenant, env.initial_timeline, test_output_dir + ) + + tenant, timeline = env.neon_cli.create_tenant(conf=conf) + ro_branch_res = insert_and_create_ro_branch(env, tenant, timeline, test_output_dir) + + for lhs, rhs in zip(lease_res, ro_branch_res): + assert_size_approx_equal(lhs, rhs) + + +def insert_and_acquire_lease( + env: NeonEnv, tenant: TenantId, timeline: TimelineId, test_output_dir: Path +) -> list[int]: + sizes = [] + client = env.pageserver.http_client() + with env.endpoints.create_start("main", tenant_id=tenant) as ep: + initial_size = client.tenant_size(tenant) + log.info(f"initial size: {initial_size}") + + with ep.cursor() as cur: + cur.execute( + "CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" + ) + last_flush_lsn = wait_for_last_flush_lsn(env, ep, tenant, timeline) + res = client.timeline_lsn_lease(tenant, timeline, last_flush_lsn) + log.info(f"result from lsn_lease api: {res}") + + with ep.cursor() as cur: + cur.execute( + "CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" + ) + cur.execute( + "CREATE TABLE t2 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" + ) + cur.execute( + "CREATE TABLE t3 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" + ) + + last_flush_lsn = wait_for_last_flush_lsn(env, ep, tenant, timeline) + + size_after_lease_and_insert = client.tenant_size(tenant) + sizes.append(size_after_lease_and_insert) + log.info(f"size_after_lease_and_insert: {size_after_lease_and_insert}") + + size_debug_file = open(test_output_dir / "size_debug_lease.html", "w") + size_debug = client.tenant_size_debug(tenant) + size_debug_file.write(size_debug) + return sizes + + +def insert_and_create_ro_branch( + env: NeonEnv, tenant: TenantId, timeline: TimelineId, test_output_dir: Path +) -> list[int]: + sizes = [] + client = env.pageserver.http_client() + with env.endpoints.create_start("main", tenant_id=tenant) as ep: + initial_size = client.tenant_size(tenant) + log.info(f"initial size: {initial_size}") + + with ep.cursor() as cur: + cur.execute( + "CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" + ) + wait_for_last_flush_lsn(env, ep, tenant, timeline) + ro_branch = env.neon_cli.create_branch("ro_branch", tenant_id=tenant) + log.info(f"{ro_branch=}") + + with ep.cursor() as cur: + cur.execute( + "CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" + ) + cur.execute( + "CREATE TABLE t2 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" + ) + cur.execute( + "CREATE TABLE t3 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" + ) + wait_for_last_flush_lsn(env, ep, tenant, timeline) + + size_after_branching = client.tenant_size(tenant) + sizes.append(size_after_branching) + log.info(f"size_after_branching: {size_after_branching}") + + size_debug_file = open(test_output_dir / "size_debug_ro_branch.html", "w") + size_debug = client.tenant_size_debug(tenant) + size_debug_file.write(size_debug) + return sizes From 3c8611f26b81c4e068d9e16f16c35a93aa954f6c Mon Sep 17 00:00:00 2001 From: Yuchen Liang Date: Fri, 28 Jun 2024 18:17:19 -0400 Subject: [PATCH 2/4] remove scratch Signed-off-by: Yuchen Liang --- pageserver/src/tenant/size.rs | 29 ---------- test_runner/regress/test_lsn_lease.py | 79 --------------------------- 2 files changed, 108 deletions(-) delete mode 100644 test_runner/regress/test_lsn_lease.py diff --git a/pageserver/src/tenant/size.rs b/pageserver/src/tenant/size.rs index 0c640cc58c2b..dc607d16eaa6 100644 --- a/pageserver/src/tenant/size.rs +++ b/pageserver/src/tenant/size.rs @@ -111,35 +111,6 @@ pub enum LsnKind { LeaseEnd, } -#[derive(Debug, Copy, Clone, Hash, Default, PartialEq, Eq)] -pub struct FakeTimelineId(u64); - -#[derive(Default, Debug)] -pub struct FakeTimelineIdMap { - next_id: FakeTimelineId, - map: HashMap>, -} - -impl FakeTimelineIdMap { - pub fn new() -> Self { - Self { - next_id: FakeTimelineId(0), - ..Default::default() - } - } - - pub fn insert(&mut self, timeline_id: Option) -> FakeTimelineId { - let res = self.next_id; - self.map.insert(res, timeline_id); - self.next_id.0 += 1; - res - } - - pub fn get(&self, fake_id: &FakeTimelineId) -> Option> { - self.map.get(fake_id).copied() - } -} - /// Collect all relevant LSNs to the inputs. These will only be helpful in the serialized form as /// part of [`ModelInputs`] from the HTTP api, explaining the inputs. #[derive(Debug, serde::Serialize, serde::Deserialize)] diff --git a/test_runner/regress/test_lsn_lease.py b/test_runner/regress/test_lsn_lease.py deleted file mode 100644 index 342ff5542cac..000000000000 --- a/test_runner/regress/test_lsn_lease.py +++ /dev/null @@ -1,79 +0,0 @@ -# from pathlib import Path - -# from fixtures.common_types import TenantId, TimelineId -# from fixtures.log_helper import log -# from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, wait_for_last_flush_lsn - - -# # def test_lsn_lease_size_zero_gc(neon_env_builder: NeonEnvBuilder, test_output_dir: Path): -# # conf = {"pitr_interval": "0s"} -# # env = neon_env_builder.init_start(initial_tenant_conf=conf) -# # lease_res = insert_and_acquire_lease(env, env.initial_tenant, env.initial_timeline, test_output_dir) - -# # tenant, timeline = env.neon_cli.create_tenant(conf=conf) -# # ro_branch_res = insert_and_create_ro_branch( -# # env, -# # tenant, -# # timeline, -# # ) - -# # for (l, r) in zip(lease_res, ro_branch_res): - - -# def test_lsn_lease_api_zero_cost(neon_env_builder: NeonEnvBuilder, test_output_dir: Path): -# env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "3600s"}) - -# client = env.pageserver.http_client() -# with env.endpoints.create_start("main") as ep: -# initial_size = client.tenant_size(env.initial_tenant) -# log.info(f"initial size: {initial_size}") - -# with ep.cursor() as cur: -# cur.execute( -# "CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" -# ) -# last_flush_lsn = wait_for_last_flush_lsn(env, ep, env.initial_tenant, env.initial_timeline) -# res = client.timeline_lsn_lease(env.initial_tenant, env.initial_timeline, last_flush_lsn) -# log.info(f"result from lsn_lease api: {res}") - -# with ep.cursor() as cur: -# cur.execute( -# "CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" -# ) -# wait_for_last_flush_lsn(env, ep, env.initial_tenant, env.initial_timeline) -# size_after_lease_and_insert = client.tenant_size(env.initial_tenant) -# log.info(f"size_after_lease_and_insert: {size_after_lease_and_insert}") - -# size_debug_file = open(test_output_dir / "size_debug.html", "w") -# size_debug = client.tenant_size_debug(env.initial_tenant) -# size_debug_file.write(size_debug) - - -# def test_lsn_lease_api_zero_cost_compare(neon_env_builder: NeonEnvBuilder, test_output_dir: Path): -# env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "3600s"}) - -# client = env.pageserver.http_client() -# with env.endpoints.create_start("main") as ep: -# initial_size = client.tenant_size(env.initial_tenant) -# log.info(f"initial size: {initial_size}") - -# with ep.cursor() as cur: -# cur.execute( -# "CREATE TABLE t0 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" -# ) -# last_flush_lsn = wait_for_last_flush_lsn(env, ep, env.initial_tenant, env.initial_timeline) -# static_branch = env.neon_cli.create_branch("static") -# # res = client.timeline_lsn_lease(env.initial_tenant, env.initial_timeline, last_flush_lsn) -# log.info(f"{static_branch=}") - -# with ep.cursor() as cur: -# cur.execute( -# "CREATE TABLE t1 AS SELECT i::bigint n FROM generate_series(0, 1000000) s(i)" -# ) -# wait_for_last_flush_lsn(env, ep, env.initial_tenant, env.initial_timeline) -# size_after_lease_and_insert = client.tenant_size(env.initial_tenant) -# log.info(f"size_after_lease_and_insert: {size_after_lease_and_insert}") - -# size_debug_file = open(test_output_dir / "size_debug.html", "w") -# size_debug = client.tenant_size_debug(env.initial_tenant) -# size_debug_file.write(size_debug) From 911da0a71b7683fa541c6a7f8c29aff86865c10b Mon Sep 17 00:00:00 2001 From: Yuchen Liang Date: Fri, 28 Jun 2024 18:19:20 -0400 Subject: [PATCH 3/4] remove scratch testing Signed-off-by: Yuchen Liang --- libs/tenant_size_model/tests/tests.rs | 206 -------------------------- 1 file changed, 206 deletions(-) diff --git a/libs/tenant_size_model/tests/tests.rs b/libs/tenant_size_model/tests/tests.rs index dd9d6622a796..55bfdce87907 100644 --- a/libs/tenant_size_model/tests/tests.rs +++ b/libs/tenant_size_model/tests/tests.rs @@ -168,212 +168,6 @@ fn scenario_2() { assert_eq!(result.total_size, 5000 + 1000 + 1000); } -// Main branch only. Some updates on it. -#[test] -fn scenario_ro_branch() { - const MB: u64 = 1; - const GB: u64 = 1_000; - const TB: u64 = 1_000_000; - - // Create main branch - let mut scenario = ScenarioBuilder::new("main"); - - scenario.modify_branch("main", 4 * TB, 600 * GB as i64); - - // Branch - scenario.branch("main", "child"); - scenario.modify_branch("child", 0, 0); - scenario.modify_branch("child", 0, 0); - - // 1TB snapshot at GC cutoff - scenario.modify_branch("main", 2 * TB, 400 * GB as i64); - - scenario.modify_branch("main", 1 * MB, 0); - - let (_model, result) = scenario.calculate(2 * TB + 1 * MB); - - let segs = _model - .segments - .iter() - .zip(result.segments) - .enumerate() - .collect::>(); - eprintln!("segs size result: {:#?}", segs); - assert_eq!(result.total_size, 1 * TB + 1 * MB + 600 * GB); -} - -#[test] -fn scenario_handcraft_lease_before_gc() { - const MB: u64 = 1; - const GB: u64 = 1_000; - const TB: u64 = 1_000_000; - - let mut segs = Vec::new(); - let mut lsn = 0; - let mut size = Some(0); - let needed = false; - { - let main_start = Segment { - parent: None, - lsn, - size, - needed, - }; - segs.push(main_start); - } - { - lsn += 4 * TB; - size = size.map(|n| n + 600 * GB); - let branchpoint = Segment { - parent: Some(0), - lsn, - size, - needed, - }; - segs.push(branchpoint); - } - { - let child_start = Segment { - parent: Some(1), - lsn, - size, - needed, - }; - - segs.push(child_start); - - let child_end = Segment { - parent: Some(2), - lsn, - size, - needed: true, - }; - - segs.push(child_end); - } - { - lsn += 3 * MB; - size = size.map(|n| n + 100 * GB); - let gc_cutoff = Segment { - parent: Some(1), - lsn, - size, - needed, - }; - - segs.push(gc_cutoff); - } - { - lsn += 1 * MB; - let main_end = Segment { - parent: Some(4), - lsn, - size, - needed: true, - }; - segs.push(main_end); - } - - let model = StorageModel { - segments: segs.clone(), - }; - let res = model.calculate(); - println!("total: {}", res.total_size); - let seg_res = segs - .iter() - .zip(res.segments) - .enumerate() - .collect::>(); - println!("seg_results: {:#?}", seg_res); - assert!(false); -} - -#[test] -fn scenario_handcraft_lease_after_gc() { - const MB: u64 = 1; - const GB: u64 = 1_000; - const TB: u64 = 1_000_000; - - let mut segs = Vec::new(); - let mut lsn = 0; - let mut size = Some(0); - let needed = false; - { - let main_start = Segment { - parent: None, - lsn, - size, - needed, - }; - segs.push(main_start); - } - { - lsn += 4 * TB + 3 * MB; - size = Some(700 * GB); - let gc_cutoff = Segment { - parent: Some(0), - lsn, - size, - needed, - }; - - segs.push(gc_cutoff); - } - { - lsn += 1 * MB; - let branchpoint = Segment { - parent: Some(1), - lsn, - size, - needed: true, - }; - segs.push(branchpoint); - } - { - let child_start = Segment { - parent: Some(2), - lsn, - size, - needed, - }; - - segs.push(child_start); - - let child_end = Segment { - parent: Some(3), - lsn, - size, - needed: true, - }; - - segs.push(child_end); - } - - { - lsn += 1 * MB; - let main_end = Segment { - parent: Some(2), - lsn, - size, - needed: true, - }; - segs.push(main_end); - } - - let model = StorageModel { - segments: segs.clone(), - }; - let res = model.calculate(); - println!("total: {}", res.total_size); - let seg_res = segs - .iter() - .zip(res.segments) - .enumerate() - .collect::>(); - println!("seg_results: {:#?}", seg_res); - assert!(false); -} - // Like 2, but more updates on main #[test] fn scenario_3() { From 796bd7cc79623dadcc10a1158883a6759933a558 Mon Sep 17 00:00:00 2001 From: Yuchen Liang Date: Mon, 1 Jul 2024 09:15:33 -0400 Subject: [PATCH 4/4] remove dbg print Signed-off-by: Yuchen Liang --- libs/tenant_size_model/tests/tests.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/libs/tenant_size_model/tests/tests.rs b/libs/tenant_size_model/tests/tests.rs index 55bfdce87907..0ffea0f2cdc6 100644 --- a/libs/tenant_size_model/tests/tests.rs +++ b/libs/tenant_size_model/tests/tests.rs @@ -93,14 +93,6 @@ impl ScenarioBuilder { let storage_model = StorageModel { segments: self.segments.clone(), }; - - let segs = storage_model - .segments - .iter() - .enumerate() - .collect::>(); - eprintln!("segs before: {:#?}", segs); - let size_result = storage_model.calculate(); (storage_model, size_result) }