Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

Introduce jemalloc-allocator feature flag #6675

Merged
merged 18 commits into from
Feb 9, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 7 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,22 @@ repository = "https://github.com/paritytech/polkadot.git"
version = "0.9.37"

[dependencies]
polkadot-cli = { path = "cli", features = [ "kusama-native", "westend-native", "rococo-native" ] }
color-eyre = { version = "0.6.1", default-features = false }
tikv-jemallocator = "0.5.0"

# Crates in our workspace, defined as dependencies so we can pass them feature flags.
polkadot-cli = { path = "cli", features = [ "kusama-native", "westend-native", "rococo-native" ] }
polkadot-node-core-pvf = { path = "node/core/pvf" }
polkadot-overseer = { path = "node/overseer" }

[dev-dependencies]
assert_cmd = "2.0.4"
nix = { version = "0.26.1", features = ["signal"] }
tempfile = "3.2.0"
tokio = "1.24.2"
substrate-rpc-client = { git = "https://github.com/paritytech/substrate", branch = "master" }
polkadot-core-primitives = { path = "core-primitives" }

[workspace]
members = [
"cli",
Expand Down Expand Up @@ -202,6 +206,7 @@ try-runtime = [ "polkadot-cli/try-runtime" ]
fast-runtime = [ "polkadot-cli/fast-runtime" ]
runtime-metrics = [ "polkadot-cli/runtime-metrics" ]
pyroscope = ["polkadot-cli/pyroscope"]
jemalloc-stats = ["polkadot-node-core-pvf/jemalloc-stats", "polkadot-overseer/jemalloc-stats"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please rename this to jemalloc or jemalloc-allocator and then do the trick that @koute mentioned with enabling it automatically for Linux.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, i've done this. Thanks!! Please review again


# Configuration for building a .deb package - for use with `cargo-deb`
[package.metadata.deb]
Expand Down
3 changes: 3 additions & 0 deletions node/core/pvf/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,6 @@ adder = { package = "test-parachain-adder", path = "../../../parachain/test-para
halt = { package = "test-parachain-halt", path = "../../../parachain/test-parachains/halt" }
hex-literal = "0.3.4"
tempfile = "3.3.0"

[features]
jemalloc-stats = []
322 changes: 164 additions & 158 deletions node/core/pvf/src/prepare/memory_stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,7 @@

use crate::{metrics::Metrics, LOG_TARGET};
use parity_scale_codec::{Decode, Encode};
use std::{
io,
sync::mpsc::{Receiver, RecvTimeoutError, Sender},
time::Duration,
};
use tikv_jemalloc_ctl::{epoch, stats, Error};
use tokio::task::JoinHandle;

#[cfg(target_os = "linux")]
use libc::{getrusage, rusage, timeval, RUSAGE_THREAD};
use std::io;

/// Helper struct to contain all the memory stats, including [`MemoryAllocationStats`] and, if
/// supported by the OS, `ru_maxrss`.
Expand All @@ -60,164 +51,17 @@ pub struct MemoryAllocationStats {
pub allocated: u64,
}

#[derive(Clone)]
struct MemoryAllocationTracker {
epoch: tikv_jemalloc_ctl::epoch_mib,
allocated: stats::allocated_mib,
resident: stats::resident_mib,
}

impl MemoryAllocationTracker {
pub fn new() -> Result<Self, Error> {
Ok(Self {
epoch: epoch::mib()?,
allocated: stats::allocated::mib()?,
resident: stats::resident::mib()?,
})
}

pub fn snapshot(&self) -> Result<MemoryAllocationStats, Error> {
// update stats by advancing the allocation epoch
self.epoch.advance()?;

// Convert to `u64`, as `usize` is not `Encode`able.
let allocated = self.allocated.read()? as u64;
let resident = self.resident.read()? as u64;
Ok(MemoryAllocationStats { allocated, resident })
}
}

/// Get the rusage stats for the current thread.
#[cfg(target_os = "linux")]
fn getrusage_thread() -> io::Result<rusage> {
let mut result = rusage {
ru_utime: timeval { tv_sec: 0, tv_usec: 0 },
ru_stime: timeval { tv_sec: 0, tv_usec: 0 },
ru_maxrss: 0,
ru_ixrss: 0,
ru_idrss: 0,
ru_isrss: 0,
ru_minflt: 0,
ru_majflt: 0,
ru_nswap: 0,
ru_inblock: 0,
ru_oublock: 0,
ru_msgsnd: 0,
ru_msgrcv: 0,
ru_nsignals: 0,
ru_nvcsw: 0,
ru_nivcsw: 0,
};
if unsafe { getrusage(RUSAGE_THREAD, &mut result) } == -1 {
return Err(io::Error::last_os_error())
}
Ok(result)
}

/// Gets the `ru_maxrss` for the current thread if the OS supports `getrusage`. Otherwise, just
/// returns `None`.
pub fn get_max_rss_thread() -> Option<io::Result<i64>> {
// `c_long` is either `i32` or `i64` depending on architecture. `i64::from` always works.
#[cfg(target_os = "linux")]
let max_rss = Some(getrusage_thread().map(|rusage| i64::from(rusage.ru_maxrss)));
let max_rss = Some(getrusage::getrusage_thread().map(|rusage| i64::from(rusage.ru_maxrss)));
#[cfg(not(target_os = "linux"))]
let max_rss = None;
max_rss
}

/// Runs a thread in the background that observes memory statistics. The goal is to try to get
/// accurate stats during preparation.
///
/// # Algorithm
///
/// 1. Create the memory tracker.
///
/// 2. Sleep for some short interval. Whenever we wake up, take a snapshot by updating the
/// allocation epoch.
///
/// 3. When we receive a signal that preparation has completed, take one last snapshot and return
/// the maximum observed values.
///
/// # Errors
///
/// For simplicity, any errors are returned as a string. As this is not a critical component, errors
/// are used for informational purposes (logging) only.
pub fn memory_tracker_loop(finished_rx: Receiver<()>) -> Result<MemoryAllocationStats, String> {
// This doesn't need to be too fine-grained since preparation currently takes 3-10s or more.
// Apart from that, there is not really a science to this number.
const POLL_INTERVAL: Duration = Duration::from_millis(100);

let tracker = MemoryAllocationTracker::new().map_err(|err| err.to_string())?;
let mut max_stats = MemoryAllocationStats::default();

let mut update_stats = || -> Result<(), String> {
let current_stats = tracker.snapshot().map_err(|err| err.to_string())?;
if current_stats.resident > max_stats.resident {
max_stats.resident = current_stats.resident;
}
if current_stats.allocated > max_stats.allocated {
max_stats.allocated = current_stats.allocated;
}
Ok(())
};

loop {
// Take a snapshot and update the max stats.
update_stats()?;

// Sleep.
match finished_rx.recv_timeout(POLL_INTERVAL) {
// Received finish signal.
Ok(()) => {
update_stats()?;
return Ok(max_stats)
},
// Timed out, restart loop.
Err(RecvTimeoutError::Timeout) => continue,
Err(RecvTimeoutError::Disconnected) =>
return Err("memory_tracker_loop: finished_rx disconnected".into()),
}
}
}

/// Helper function to terminate the memory tracker thread and get the stats. Helps isolate all this
/// error handling.
pub async fn get_memory_tracker_loop_stats(
fut: JoinHandle<Result<MemoryAllocationStats, String>>,
tx: Sender<()>,
) -> Option<MemoryAllocationStats> {
// Signal to the memory tracker thread to terminate.
if let Err(err) = tx.send(()) {
gum::warn!(
target: LOG_TARGET,
worker_pid = %std::process::id(),
"worker: error sending signal to memory tracker_thread: {}", err
);
None
} else {
// Join on the thread handle.
match fut.await {
Ok(Ok(stats)) => Some(stats),
Ok(Err(err)) => {
gum::warn!(
target: LOG_TARGET,
worker_pid = %std::process::id(),
"worker: error occurred in the memory tracker thread: {}", err
);
None
},
Err(err) => {
gum::warn!(
target: LOG_TARGET,
worker_pid = %std::process::id(),
"worker: error joining on memory tracker thread: {}", err
);
None
},
}
}
}

/// Helper function to send the memory metrics, if available, to prometheus.
pub fn observe_memory_metrics(metrics: &Metrics, memory_stats: MemoryStats, pid: u32) {
if let Some(max_rss) = memory_stats.max_rss {
Expand All @@ -241,3 +85,165 @@ pub fn observe_memory_metrics(metrics: &Metrics, memory_stats: MemoryStats, pid:
metrics.observe_preparation_max_allocated(allocated_kb);
}
}

#[cfg(any(target_os = "linux", feature = "jemalloc-stats"))]
pub mod memory_tracker {
use super::*;
use std::{
sync::mpsc::{Receiver, RecvTimeoutError, Sender},
time::Duration,
};
use tikv_jemalloc_ctl::{epoch, stats, Error};
use tokio::task::JoinHandle;

#[derive(Clone)]
struct MemoryAllocationTracker {
epoch: tikv_jemalloc_ctl::epoch_mib,
allocated: stats::allocated_mib,
resident: stats::resident_mib,
}

impl MemoryAllocationTracker {
pub fn new() -> Result<Self, Error> {
Ok(Self {
epoch: epoch::mib()?,
allocated: stats::allocated::mib()?,
resident: stats::resident::mib()?,
})
}

pub fn snapshot(&self) -> Result<MemoryAllocationStats, Error> {
// update stats by advancing the allocation epoch
self.epoch.advance()?;

// Convert to `u64`, as `usize` is not `Encode`able.
let allocated = self.allocated.read()? as u64;
let resident = self.resident.read()? as u64;
Ok(MemoryAllocationStats { allocated, resident })
}
}

/// Runs a thread in the background that observes memory statistics. The goal is to try to get
/// accurate stats during preparation.
///
/// # Algorithm
///
/// 1. Create the memory tracker.
///
/// 2. Sleep for some short interval. Whenever we wake up, take a snapshot by updating the
/// allocation epoch.
///
/// 3. When we receive a signal that preparation has completed, take one last snapshot and return
/// the maximum observed values.
///
/// # Errors
///
/// For simplicity, any errors are returned as a string. As this is not a critical component, errors
/// are used for informational purposes (logging) only.
pub fn memory_tracker_loop(finished_rx: Receiver<()>) -> Result<MemoryAllocationStats, String> {
// This doesn't need to be too fine-grained since preparation currently takes 3-10s or more.
// Apart from that, there is not really a science to this number.
const POLL_INTERVAL: Duration = Duration::from_millis(100);

let tracker = MemoryAllocationTracker::new().map_err(|err| err.to_string())?;
let mut max_stats = MemoryAllocationStats::default();

let mut update_stats = || -> Result<(), String> {
let current_stats = tracker.snapshot().map_err(|err| err.to_string())?;
if current_stats.resident > max_stats.resident {
max_stats.resident = current_stats.resident;
}
if current_stats.allocated > max_stats.allocated {
max_stats.allocated = current_stats.allocated;
}
Ok(())
};

loop {
// Take a snapshot and update the max stats.
update_stats()?;

// Sleep.
match finished_rx.recv_timeout(POLL_INTERVAL) {
// Received finish signal.
Ok(()) => {
update_stats()?;
return Ok(max_stats)
},
// Timed out, restart loop.
Err(RecvTimeoutError::Timeout) => continue,
Err(RecvTimeoutError::Disconnected) =>
return Err("memory_tracker_loop: finished_rx disconnected".into()),
}
}
}

/// Helper function to terminate the memory tracker thread and get the stats. Helps isolate all this
/// error handling.
pub async fn get_memory_tracker_loop_stats(
fut: JoinHandle<Result<MemoryAllocationStats, String>>,
tx: Sender<()>,
) -> Option<MemoryAllocationStats> {
// Signal to the memory tracker thread to terminate.
if let Err(err) = tx.send(()) {
gum::warn!(
target: LOG_TARGET,
worker_pid = %std::process::id(),
"worker: error sending signal to memory tracker_thread: {}", err
);
None
} else {
// Join on the thread handle.
match fut.await {
Ok(Ok(stats)) => Some(stats),
Ok(Err(err)) => {
gum::warn!(
target: LOG_TARGET,
worker_pid = %std::process::id(),
"worker: error occurred in the memory tracker thread: {}", err
);
None
},
Err(err) => {
gum::warn!(
target: LOG_TARGET,
worker_pid = %std::process::id(),
"worker: error joining on memory tracker thread: {}", err
);
None
},
}
}
}
}

#[cfg(target_os = "linux")]
mod getrusage {
use libc::{getrusage, rusage, timeval, RUSAGE_THREAD};

/// Get the rusage stats for the current thread.
fn getrusage_thread() -> io::Result<rusage> {
let mut result = rusage {
ru_utime: timeval { tv_sec: 0, tv_usec: 0 },
ru_stime: timeval { tv_sec: 0, tv_usec: 0 },
ru_maxrss: 0,
ru_ixrss: 0,
ru_idrss: 0,
ru_isrss: 0,
ru_minflt: 0,
ru_majflt: 0,
ru_nswap: 0,
ru_inblock: 0,
ru_oublock: 0,
ru_msgsnd: 0,
ru_msgrcv: 0,
ru_nsignals: 0,
ru_nvcsw: 0,
ru_nivcsw: 0,
};
if unsafe { getrusage(RUSAGE_THREAD, &mut result) } == -1 {
return Err(io::Error::last_os_error())
}
Ok(result)
}
}
Loading