Skip to content

Commit

Permalink
Add counters for commands processed through the libpq page service API
Browse files Browse the repository at this point in the history
I was looking for metrics on how many computes are still using
protocol version 1 and 2. This provides counters for that as
"pagestream" and "pagestream_v2" commands, but also all the other
commands. The new metrics are global for the whole pageserver instance
rather than per-tenant, so the additional metrics bloat should be
fairly small.
  • Loading branch information
hlinnaka committed Jun 18, 2024
1 parent 6c6a7f9 commit 1a75651
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 1 deletion.
9 changes: 9 additions & 0 deletions pageserver/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1489,6 +1489,15 @@ pub(crate) static LIVE_CONNECTIONS_COUNT: Lazy<IntGaugeVec> = Lazy::new(|| {
.expect("failed to define a metric")
});

pub(crate) static COMPUTE_COMMANDS_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"pageserver_compute_commands",
"Number of compute -> pageserver commands processed",
&["command"]
)
.expect("failed to define a metric")
});

// remote storage metrics

static REMOTE_TIMELINE_CLIENT_CALLS: Lazy<IntCounterPairVec> = Lazy::new(|| {
Expand Down
37 changes: 36 additions & 1 deletion pageserver/src/page_service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ use crate::basebackup::BasebackupError;
use crate::context::{DownloadBehavior, RequestContext};
use crate::import_datadir::import_wal_from_tar;
use crate::metrics;
use crate::metrics::LIVE_CONNECTIONS_COUNT;
use crate::metrics::{COMPUTE_COMMANDS_COUNT, LIVE_CONNECTIONS_COUNT};
use crate::pgdatadir_mapping::Version;
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id;
Expand Down Expand Up @@ -1554,6 +1554,10 @@ where

self.check_permission(Some(tenant_id))?;

COMPUTE_COMMANDS_COUNT
.with_label_values(&["pagestream_v2"])
.inc();

self.handle_pagerequests(
pgb,
tenant_id,
Expand All @@ -1579,6 +1583,10 @@ where

self.check_permission(Some(tenant_id))?;

COMPUTE_COMMANDS_COUNT
.with_label_values(&["pagestream"])
.inc();

self.handle_pagerequests(
pgb,
tenant_id,
Expand All @@ -1605,6 +1613,10 @@ where

self.check_permission(Some(tenant_id))?;

COMPUTE_COMMANDS_COUNT
.with_label_values(&["basebackup"])
.inc();

let lsn = if let Some(lsn_str) = params.get(2) {
Some(
Lsn::from_str(lsn_str)
Expand Down Expand Up @@ -1662,6 +1674,11 @@ where
.record("timeline_id", field::display(timeline_id));

self.check_permission(Some(tenant_id))?;

COMPUTE_COMMANDS_COUNT
.with_label_values(&["get_last_record_rlsn"])
.inc();

async {
let timeline = self
.get_active_tenant_timeline(tenant_id, timeline_id, ShardSelector::Zero)
Expand Down Expand Up @@ -1723,6 +1740,10 @@ where

self.check_permission(Some(tenant_id))?;

COMPUTE_COMMANDS_COUNT
.with_label_values(&["fullbackup"])
.inc();

// Check that the timeline exists
self.handle_basebackup_request(
pgb,
Expand Down Expand Up @@ -1771,6 +1792,10 @@ where

self.check_permission(Some(tenant_id))?;

COMPUTE_COMMANDS_COUNT
.with_label_values(&["import basebackup"])
.inc();

match self
.handle_import_basebackup(
pgb,
Expand Down Expand Up @@ -1818,6 +1843,10 @@ where

self.check_permission(Some(tenant_id))?;

COMPUTE_COMMANDS_COUNT
.with_label_values(&["import wal"])
.inc();

match self
.handle_import_wal(pgb, tenant_id, timeline_id, start_lsn, end_lsn, ctx)
.await
Expand Down Expand Up @@ -1855,6 +1884,10 @@ where

self.check_permission(Some(tenant_shard_id.tenant_id))?;

COMPUTE_COMMANDS_COUNT
.with_label_values(&["lease lsn"])
.inc();

// The caller is responsible for providing correct lsn.
let lsn = Lsn::from_str(params[2])
.with_context(|| format!("Failed to parse Lsn from {}", params[2]))?;
Expand Down Expand Up @@ -1886,6 +1919,8 @@ where

self.check_permission(Some(tenant_id))?;

COMPUTE_COMMANDS_COUNT.with_label_values(&["show"]).inc();

let tenant = self
.get_active_tenant_with_timeout(
tenant_id,
Expand Down

0 comments on commit 1a75651

Please sign in to comment.