diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 5c8f350f7b2a..c6b160733167 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -545,6 +545,15 @@ static AUX_FILE_SIZE: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); +static VALID_LSN_LEASE_COUNT: Lazy = Lazy::new(|| { + register_uint_gauge_vec!( + "pageserver_valid_lsn_lease_count", + "The number of valid leases after refreshing gc info.", + &["tenant_id", "shard_id", "timeline_id"], + ) + .expect("failed to define a metric") +}); + pub(crate) mod initial_logical_size { use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec}; use once_cell::sync::Lazy; @@ -2055,6 +2064,8 @@ pub(crate) struct TimelineMetrics { pub directory_entries_count_gauge: Lazy UIntGauge>>, pub evictions: IntCounter, pub evictions_with_low_residence_duration: std::sync::RwLock, + /// Number of valid LSN leases. + pub valid_lsn_lease_count_gauge: UIntGauge, shutdown: std::sync::atomic::AtomicBool, } @@ -2153,6 +2164,10 @@ impl TimelineMetrics { let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder .build(&tenant_id, &shard_id, &timeline_id); + let valid_lsn_lease_count_gauge = VALID_LSN_LEASE_COUNT + .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) + .unwrap(); + TimelineMetrics { tenant_id, shard_id, @@ -2175,6 +2190,7 @@ impl TimelineMetrics { evictions_with_low_residence_duration: std::sync::RwLock::new( evictions_with_low_residence_duration, ), + valid_lsn_lease_count_gauge, shutdown: std::sync::atomic::AtomicBool::default(), } } @@ -2224,6 +2240,7 @@ impl TimelineMetrics { } let _ = EVICTIONS.remove_label_values(&[tenant_id, shard_id, timeline_id]); let _ = AUX_FILE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]); + let _ = VALID_LSN_LEASE_COUNT.remove_label_values(&[tenant_id, shard_id, timeline_id]); self.evictions_with_low_residence_duration .write() diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 62f066862a16..4e03e09a9b8d 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -2960,6 +2960,11 @@ impl Tenant { let now = SystemTime::now(); target.leases.retain(|_, lease| !lease.is_expired(&now)); + timeline + .metrics + .valid_lsn_lease_count_gauge + .set(target.leases.len() as u64); + match gc_cutoffs.remove(&timeline.timeline_id) { Some(cutoffs) => { target.retain_lsns = branchpoints; diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index e01bb6da5165..41fa8e679f28 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -149,6 +149,7 @@ def histogram(prefix_without_trailing_underscore: str) -> List[str]: "pageserver_evictions_total", "pageserver_evictions_with_low_residence_duration_total", "pageserver_aux_file_estimated_size", + "pageserver_valid_lsn_lease_count", *PAGESERVER_PER_TENANT_REMOTE_TIMELINE_CLIENT_METRICS, # "pageserver_directory_entries_count", -- only used if above a certain threshold # "pageserver_broken_tenants_count" -- used only for broken