Skip to content

Commit

Permalink
Add counters for commands processed through the libpq page service API
Browse files Browse the repository at this point in the history
I was looking for metrics on how many computes are still using
protocol version 1 and 2. This provides counters for that as
"pagestream" and "pagestream_v2" commands, but also all the other
commands. The new metrics are global for the whole pageserver instance
rather than per-tenant, so the additional metrics bloat should be
fairly small.
  • Loading branch information
hlinnaka committed Jun 25, 2024
1 parent 07f21dd commit fac383b
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 1 deletion.
41 changes: 41 additions & 0 deletions pageserver/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1445,6 +1445,46 @@ pub(crate) static LIVE_CONNECTIONS_COUNT: Lazy<IntGaugeVec> = Lazy::new(|| {
.expect("failed to define a metric")
});

#[derive(Clone, Copy, enum_map::Enum, IntoStaticStr)]
pub(crate) enum ComputeCommandKind {
PageStreamV2,
PageStream,
Basebackup,
GetLastRecordRlsn,
Fullbackup,
ImportBasebackup,
ImportWal,
LeaseLsn,
Show,
}

pub(crate) struct ComputeCommandCounters {
map: EnumMap<ComputeCommandKind, IntCounter>,
}

pub(crate) static COMPUTE_COMMANDS_COUNTERS: Lazy<ComputeCommandCounters> = Lazy::new(|| {
let inner = register_int_counter_vec!(
"pageserver_compute_commands",
"Number of compute -> pageserver commands processed",
&["command"]
)
.expect("failed to define a metric");

ComputeCommandCounters {
map: EnumMap::from_array(std::array::from_fn(|i| {
let command = <ComputeCommandKind as enum_map::Enum>::from_usize(i);
let command_str: &'static str = command.into();
inner.with_label_values(&[command_str])
})),
}
});

impl ComputeCommandCounters {
pub(crate) fn for_command(&self, command: ComputeCommandKind) -> &IntCounter {
&self.map[command]
}
}

// remote storage metrics

static REMOTE_TIMELINE_CLIENT_CALLS: Lazy<IntCounterPairVec> = Lazy::new(|| {
Expand Down Expand Up @@ -2949,4 +2989,5 @@ pub fn preinitialize_metrics() {
Lazy::force(&RECONSTRUCT_TIME);
Lazy::force(&tenant_throttling::TIMELINE_GET);
Lazy::force(&BASEBACKUP_QUERY_TIME);
Lazy::force(&COMPUTE_COMMANDS_COUNTERS);
}
39 changes: 38 additions & 1 deletion pageserver/src/page_service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ use crate::basebackup::BasebackupError;
use crate::context::{DownloadBehavior, RequestContext};
use crate::import_datadir::import_wal_from_tar;
use crate::metrics;
use crate::metrics::LIVE_CONNECTIONS_COUNT;
use crate::metrics::{ComputeCommandKind, COMPUTE_COMMANDS_COUNTERS, LIVE_CONNECTIONS_COUNT};
use crate::pgdatadir_mapping::Version;
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id;
Expand Down Expand Up @@ -1554,6 +1554,10 @@ where

self.check_permission(Some(tenant_id))?;

COMPUTE_COMMANDS_COUNTERS
.for_command(ComputeCommandKind::PageStreamV2)
.inc();

self.handle_pagerequests(
pgb,
tenant_id,
Expand All @@ -1579,6 +1583,10 @@ where

self.check_permission(Some(tenant_id))?;

COMPUTE_COMMANDS_COUNTERS
.for_command(ComputeCommandKind::PageStream)
.inc();

self.handle_pagerequests(
pgb,
tenant_id,
Expand All @@ -1605,6 +1613,10 @@ where

self.check_permission(Some(tenant_id))?;

COMPUTE_COMMANDS_COUNTERS
.for_command(ComputeCommandKind::Basebackup)
.inc();

let lsn = if let Some(lsn_str) = params.get(2) {
Some(
Lsn::from_str(lsn_str)
Expand Down Expand Up @@ -1662,6 +1674,11 @@ where
.record("timeline_id", field::display(timeline_id));

self.check_permission(Some(tenant_id))?;

COMPUTE_COMMANDS_COUNTERS
.for_command(ComputeCommandKind::GetLastRecordRlsn)
.inc();

async {
let timeline = self
.get_active_tenant_timeline(tenant_id, timeline_id, ShardSelector::Zero)
Expand Down Expand Up @@ -1723,6 +1740,10 @@ where

self.check_permission(Some(tenant_id))?;

COMPUTE_COMMANDS_COUNTERS
.for_command(ComputeCommandKind::Fullbackup)
.inc();

// Check that the timeline exists
self.handle_basebackup_request(
pgb,
Expand Down Expand Up @@ -1771,6 +1792,10 @@ where

self.check_permission(Some(tenant_id))?;

COMPUTE_COMMANDS_COUNTERS
.for_command(ComputeCommandKind::ImportBasebackup)
.inc();

match self
.handle_import_basebackup(
pgb,
Expand Down Expand Up @@ -1818,6 +1843,10 @@ where

self.check_permission(Some(tenant_id))?;

COMPUTE_COMMANDS_COUNTERS
.for_command(ComputeCommandKind::ImportWal)
.inc();

match self
.handle_import_wal(pgb, tenant_id, timeline_id, start_lsn, end_lsn, ctx)
.await
Expand Down Expand Up @@ -1855,6 +1884,10 @@ where

self.check_permission(Some(tenant_shard_id.tenant_id))?;

COMPUTE_COMMANDS_COUNTERS
.for_command(ComputeCommandKind::LeaseLsn)
.inc();

// The caller is responsible for providing correct lsn.
let lsn = Lsn::from_str(params[2])
.with_context(|| format!("Failed to parse Lsn from {}", params[2]))?;
Expand Down Expand Up @@ -1886,6 +1919,10 @@ where

self.check_permission(Some(tenant_id))?;

COMPUTE_COMMANDS_COUNTERS
.for_command(ComputeCommandKind::Show)
.inc();

let tenant = self
.get_active_tenant_with_timeout(
tenant_id,
Expand Down

0 comments on commit fac383b

Please sign in to comment.