Skip to content

Commit

Permalink
feat(pageserver): integrate lsn lease into synthetic size (#8220)
Browse files Browse the repository at this point in the history
Part of #7497, closes #8071. (accidentally closed #8208, reopened here)

## Problem

After the changes in #8084, we need synthetic size to also account for
leased LSNs so that users do not get free retention by running a small
ephemeral endpoint for a long time.

## Summary of changes

This PR integrates LSN leases into the synthetic size calculation. We
model leases as read-only branches started at the leased LSN (except it
does not have a timeline id).

Other changes:
- Add new unit tests testing whether a lease behaves like a read-only
branch.
- Change `/size_debug` response to include lease point in the SVG
visualization.
- Fix `/lsn_lease` HTTP API to do proper parsing for POST.



Signed-off-by: Yuchen Liang <yuchen@neon.tech>
Co-authored-by: Joonas Koivunen <joonas@neon.tech>
Co-authored-by: Christian Schwarz <christian@neon.tech>
  • Loading branch information
3 people committed Jul 4, 2024
1 parent e579bc0 commit 19accfe
Show file tree
Hide file tree
Showing 9 changed files with 256 additions and 27 deletions.
5 changes: 5 additions & 0 deletions libs/pageserver_api/src/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,11 @@ pub struct TimelineCreateRequest {
pub pg_version: Option<u32>,
}

#[derive(Serialize, Deserialize, Clone)]
pub struct LsnLeaseRequest {
pub lsn: Lsn,
}

#[derive(Serialize, Deserialize)]
pub struct TenantShardSplitRequest {
pub new_shard_count: u8,
Expand Down
4 changes: 2 additions & 2 deletions libs/tenant_size_model/src/calculation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ struct SegmentSize {
}

struct SizeAlternatives {
// cheapest alternative if parent is available.
/// cheapest alternative if parent is available.
incremental: SegmentSize,

// cheapest alternative if parent node is not available
/// cheapest alternative if parent node is not available
non_incremental: Option<SegmentSize>,
}

Expand Down
36 changes: 32 additions & 4 deletions libs/tenant_size_model/src/svg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,17 @@ use std::fmt::Write;

const SVG_WIDTH: f32 = 500.0;

/// Different branch kind for SVG drawing.
#[derive(PartialEq)]
pub enum SvgBranchKind {
Timeline,
Lease,
}

struct SvgDraw<'a> {
storage: &'a StorageModel,
branches: &'a [String],
seg_to_branch: &'a [usize],
seg_to_branch: &'a [(usize, SvgBranchKind)],
sizes: &'a [SegmentSizeResult],

// layout
Expand Down Expand Up @@ -42,13 +49,18 @@ fn draw_legend(result: &mut String) -> anyhow::Result<()> {
"<line x1=\"5\" y1=\"70\" x2=\"15\" y2=\"70\" stroke-width=\"1\" stroke=\"gray\" />"
)?;
writeln!(result, "<text x=\"20\" y=\"75\">WAL not retained</text>")?;
writeln!(
result,
"<line x1=\"10\" y1=\"85\" x2=\"10\" y2=\"95\" stroke-width=\"3\" stroke=\"blue\" />"
)?;
writeln!(result, "<text x=\"20\" y=\"95\">LSN lease</text>")?;
Ok(())
}

pub fn draw_svg(
storage: &StorageModel,
branches: &[String],
seg_to_branch: &[usize],
seg_to_branch: &[(usize, SvgBranchKind)],
sizes: &SizeResult,
) -> anyhow::Result<String> {
let mut draw = SvgDraw {
Expand Down Expand Up @@ -100,7 +112,7 @@ impl<'a> SvgDraw<'a> {

// Layout the timelines on Y dimension.
// TODO
let mut y = 100.0;
let mut y = 120.0;
let mut branch_y_coordinates = Vec::new();
for _branch in self.branches {
branch_y_coordinates.push(y);
Expand All @@ -109,7 +121,7 @@ impl<'a> SvgDraw<'a> {

// Calculate coordinates for each point
let seg_coordinates = std::iter::zip(segments, self.seg_to_branch)
.map(|(seg, branch_id)| {
.map(|(seg, (branch_id, _))| {
let x = (seg.lsn - min_lsn) as f32 / xscale;
let y = branch_y_coordinates[*branch_id];
(x, y)
Expand Down Expand Up @@ -175,6 +187,22 @@ impl<'a> SvgDraw<'a> {

// draw a snapshot point if it's needed
let (coord_x, coord_y) = self.seg_coordinates[seg_id];

let (_, kind) = &self.seg_to_branch[seg_id];
if kind == &SvgBranchKind::Lease {
let (x1, y1) = (coord_x, coord_y - 10.0);
let (x2, y2) = (coord_x, coord_y + 10.0);

let style = "stroke-width=\"3\" stroke=\"blue\"";

writeln!(
result,
"<line x1=\"{x1}\" y1=\"{y1}\" x2=\"{x2}\" y2=\"{y2}\" {style}>",
)?;
writeln!(result, " <title>leased lsn at {}</title>", seg.lsn)?;
writeln!(result, "</line>")?;
}

if self.sizes[seg_id].method == SegmentMethod::SnapshotHere {
writeln!(
result,
Expand Down
22 changes: 13 additions & 9 deletions pageserver/src/http/openapi_spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -265,15 +265,19 @@ paths:
type: string
format: hex
post:
description: Obtain lease for the given LSN
parameters:
- name: lsn
in: query
required: true
schema:
type: string
format: hex
description: A LSN to obtain the lease for
description: Obtains a lease for the given LSN.
requestBody:
content:
application/json:
schema:
type: object
required:
- lsn
properties:
lsn:
description: A LSN to obtain the lease for.
type: string
format: hex
responses:
"200":
description: OK
Expand Down
18 changes: 11 additions & 7 deletions pageserver/src/http/routes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use pageserver_api::models::ListAuxFilesRequest;
use pageserver_api::models::LocationConfig;
use pageserver_api::models::LocationConfigListResponse;
use pageserver_api::models::LsnLease;
use pageserver_api::models::LsnLeaseRequest;
use pageserver_api::models::ShardParameters;
use pageserver_api::models::TenantDetails;
use pageserver_api::models::TenantLocationConfigResponse;
Expand All @@ -42,7 +43,7 @@ use pageserver_api::shard::TenantShardId;
use remote_storage::DownloadError;
use remote_storage::GenericRemoteStorage;
use remote_storage::TimeTravelError;
use tenant_size_model::{SizeResult, StorageModel};
use tenant_size_model::{svg::SvgBranchKind, SizeResult, StorageModel};
use tokio_util::sync::CancellationToken;
use tracing::*;
use utils::auth::JwtAuth;
Expand Down Expand Up @@ -1195,10 +1196,15 @@ fn synthetic_size_html_response(
timeline_map.insert(ti.timeline_id, index);
timeline_ids.push(ti.timeline_id.to_string());
}
let seg_to_branch: Vec<usize> = inputs
let seg_to_branch: Vec<(usize, SvgBranchKind)> = inputs
.segments
.iter()
.map(|seg| *timeline_map.get(&seg.timeline_id).unwrap())
.map(|seg| {
(
*timeline_map.get(&seg.timeline_id).unwrap(),
seg.kind.into(),
)
})
.collect();

let svg =
Expand Down Expand Up @@ -1531,15 +1537,13 @@ async fn handle_tenant_break(

// Obtains an lsn lease on the given timeline.
async fn lsn_lease_handler(
request: Request<Body>,
mut request: Request<Body>,
_cancel: CancellationToken,
) -> Result<Response<Body>, ApiError> {
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?;

let lsn: Lsn = parse_query_param(&request, "lsn")?
.ok_or_else(|| ApiError::BadRequest(anyhow!("missing 'lsn' query parameter")))?;
let lsn = json_request::<LsnLeaseRequest>(&mut request).await?.lsn;

let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);

Expand Down
85 changes: 80 additions & 5 deletions pageserver/src/tenant/size.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;

use tenant_size_model::svg::SvgBranchKind;
use tokio::sync::oneshot::error::RecvError;
use tokio::sync::Semaphore;
use tokio_util::sync::CancellationToken;
Expand Down Expand Up @@ -87,6 +88,9 @@ impl SegmentMeta {
LsnKind::BranchPoint => true,
LsnKind::GcCutOff => true,
LsnKind::BranchEnd => false,
LsnKind::LeasePoint => true,
LsnKind::LeaseStart => false,
LsnKind::LeaseEnd => false,
}
}
}
Expand All @@ -103,6 +107,21 @@ pub enum LsnKind {
GcCutOff,
/// Last record LSN
BranchEnd,
/// A LSN lease is granted here.
LeasePoint,
/// A lease starts from here.
LeaseStart,
/// Last record LSN for the lease (should have the same LSN as the previous [`LsnKind::LeaseStart`]).
LeaseEnd,
}

impl From<LsnKind> for SvgBranchKind {
fn from(kind: LsnKind) -> Self {
match kind {
LsnKind::LeasePoint | LsnKind::LeaseStart | LsnKind::LeaseEnd => SvgBranchKind::Lease,
_ => SvgBranchKind::Timeline,
}
}
}

/// Collect all relevant LSNs to the inputs. These will only be helpful in the serialized form as
Expand All @@ -124,6 +143,9 @@ pub struct TimelineInputs {

/// Cutoff point calculated from the user-supplied 'max_retention_period'
retention_param_cutoff: Option<Lsn>,

/// Lease points on the timeline
lease_points: Vec<Lsn>,
}

/// Gathers the inputs for the tenant sizing model.
Expand Down Expand Up @@ -234,6 +256,13 @@ pub(super) async fn gather_inputs(
None
};

let lease_points = gc_info
.leases
.keys()
.filter(|&&lsn| lsn > ancestor_lsn)
.copied()
.collect::<Vec<_>>();

// next_gc_cutoff in parent branch are not of interest (right now at least), nor do we
// want to query any logical size before initdb_lsn.
let branch_start_lsn = cmp::max(ancestor_lsn, timeline.initdb_lsn);
Expand All @@ -248,6 +277,8 @@ pub(super) async fn gather_inputs(
.map(|lsn| (lsn, LsnKind::BranchPoint))
.collect::<Vec<_>>();

lsns.extend(lease_points.iter().map(|&lsn| (lsn, LsnKind::LeasePoint)));

drop(gc_info);

// Add branch points we collected earlier, just in case there were any that were
Expand Down Expand Up @@ -296,6 +327,7 @@ pub(super) async fn gather_inputs(
if kind == LsnKind::BranchPoint {
branchpoint_segments.insert((timeline_id, lsn), segments.len());
}

segments.push(SegmentMeta {
segment: Segment {
parent: Some(parent),
Expand All @@ -306,7 +338,45 @@ pub(super) async fn gather_inputs(
timeline_id: timeline.timeline_id,
kind,
});
parent += 1;

parent = segments.len() - 1;

if kind == LsnKind::LeasePoint {
// Needs `LeaseStart` and `LeaseEnd` as well to model lease as a read-only branch that never writes data
// (i.e. it's lsn has not advanced from ancestor_lsn), and therefore the three segments have the same LSN
// value. Without the other two segments, the calculation code would not count the leased LSN as a point
// to be retained.
// Did not use `BranchStart` or `BranchEnd` so we can differentiate branches and leases during debug.
//
// Alt Design: rewrite the entire calculation code to be independent of timeline id. Both leases and
// branch points can be given a synthetic id so we can unite them.
let mut lease_parent = parent;

// Start of a lease.
segments.push(SegmentMeta {
segment: Segment {
parent: Some(lease_parent),
lsn: lsn.0,
size: None, // Filled in later, if necessary
needed: lsn > next_gc_cutoff, // only needed if the point is within rentention.
},
timeline_id: timeline.timeline_id,
kind: LsnKind::LeaseStart,
});
lease_parent += 1;

// End of the lease.
segments.push(SegmentMeta {
segment: Segment {
parent: Some(lease_parent),
lsn: lsn.0,
size: None, // Filled in later, if necessary
needed: true, // everything at the lease LSN must be readable => is needed
},
timeline_id: timeline.timeline_id,
kind: LsnKind::LeaseEnd,
});
}
}

// Current end of the timeline
Expand All @@ -332,6 +402,7 @@ pub(super) async fn gather_inputs(
pitr_cutoff,
next_gc_cutoff,
retention_param_cutoff,
lease_points,
});
}

Expand Down Expand Up @@ -674,7 +745,8 @@ fn verify_size_for_multiple_branches() {
"horizon_cutoff": "0/2210CD0",
"pitr_cutoff": "0/2210CD0",
"next_gc_cutoff": "0/2210CD0",
"retention_param_cutoff": null
"retention_param_cutoff": null,
"lease_points": []
},
{
"timeline_id": "454626700469f0a9914949b9d018e876",
Expand All @@ -684,7 +756,8 @@ fn verify_size_for_multiple_branches() {
"horizon_cutoff": "0/1817770",
"pitr_cutoff": "0/1817770",
"next_gc_cutoff": "0/1817770",
"retention_param_cutoff": null
"retention_param_cutoff": null,
"lease_points": []
},
{
"timeline_id": "cb5e3cbe60a4afc00d01880e1a37047f",
Expand All @@ -694,7 +767,8 @@ fn verify_size_for_multiple_branches() {
"horizon_cutoff": "0/18B3D98",
"pitr_cutoff": "0/18B3D98",
"next_gc_cutoff": "0/18B3D98",
"retention_param_cutoff": null
"retention_param_cutoff": null,
"lease_points": []
}
]
}
Expand Down Expand Up @@ -749,7 +823,8 @@ fn verify_size_for_one_branch() {
"horizon_cutoff": "47/240A5860",
"pitr_cutoff": "47/240A5860",
"next_gc_cutoff": "47/240A5860",
"retention_param_cutoff": "0/0"
"retention_param_cutoff": "0/0",
"lease_points": []
}
]
}"#;
Expand Down
9 changes: 9 additions & 0 deletions pageserver/src/tenant/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use anyhow::{anyhow, bail, ensure, Context, Result};
use arc_swap::ArcSwap;
use bytes::Bytes;
use camino::Utf8Path;
use chrono::{DateTime, Utc};
use enumset::EnumSet;
use fail::fail_point;
use once_cell::sync::Lazy;
Expand Down Expand Up @@ -1590,7 +1591,13 @@ impl Timeline {
let existing_lease = occupied.get_mut();
if valid_until > existing_lease.valid_until {
existing_lease.valid_until = valid_until;
let dt: DateTime<Utc> = valid_until.into();
info!("lease extended to {}", dt);
} else {
let dt: DateTime<Utc> = existing_lease.valid_until.into();
info!("existing lease covers greater length, valid until {}", dt);
}

existing_lease.clone()
} else {
// Reject already GC-ed LSN (lsn < latest_gc_cutoff)
Expand All @@ -1599,6 +1606,8 @@ impl Timeline {
bail!("tried to request a page version that was garbage collected. requested at {} gc cutoff {}", lsn, *latest_gc_cutoff_lsn);
}

let dt: DateTime<Utc> = valid_until.into();
info!("lease created, valid until {}", dt);
entry.or_insert(LsnLease { valid_until }).clone()
}
};
Expand Down
Loading

1 comment on commit 19accfe

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

3105 tests run: 2978 passed, 0 failed, 127 skipped (full report)


Flaky tests (1)

Postgres 14

Code coverage* (full report)

  • functions: 32.6% (6932 of 21267 functions)
  • lines: 50.0% (54454 of 108873 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
19accfe at 2024-07-04T16:49:19.006Z :recycle:

Please sign in to comment.