Skip to content

Commit

Permalink
Rollup merge of #118029 - saethlin:allocid-gc, r=RalfJung
Browse files Browse the repository at this point in the history
Expand Miri's BorTag GC to a Provenance GC

As suggested in #3080 (comment)

We previously solved memory growth issues associated with the Stacked Borrows and Tree Borrows runtimes with a GC. But of course we also have state accumulation associated with whole allocations elsewhere in the interpreter, and this PR starts tackling those.

To do this, we expand the visitor for the GC so that it can visit a BorTag or an AllocId. Instead of collecting all live AllocIds into a single HashSet, we just collect from the Machine itself then go through an accessor `InterpCx::is_alloc_live` which checks a number of allocation data structures in the core interpreter. This avoids the overhead of all the inserts that collecting their keys would require.

r? ``@RalfJung``
  • Loading branch information
Noratrieb committed Nov 21, 2023
2 parents 34aa10e + f46e192 commit 0027b12
Show file tree
Hide file tree
Showing 40 changed files with 424 additions and 308 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:

- name: Set the tag GC interval to 1 on linux
if: runner.os == 'Linux'
run: echo "MIRIFLAGS=-Zmiri-tag-gc=1" >> $GITHUB_ENV
run: echo "MIRIFLAGS=-Zmiri-provenance-gc=1" >> $GITHUB_ENV

# Cache the global cargo directory, but NOT the local `target` directory which
# we cannot reuse anyway when the nightly changes (and it grows quite large
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -411,10 +411,10 @@ to Miri failing to detect cases of undefined behavior in a program.
without an explicit value), `none` means it never recurses, `scalar` means it only recurses for
types where we would also emit `noalias` annotations in the generated LLVM IR (types passed as
individual scalars or pairs of scalars). Setting this to `none` is **unsound**.
* `-Zmiri-tag-gc=<blocks>` configures how often the pointer tag garbage collector runs. The default
is to search for and remove unreachable tags once every `10000` basic blocks. Setting this to
`0` disables the garbage collector, which causes some programs to have explosive memory usage
and/or super-linear runtime.
* `-Zmiri-provenance-gc=<blocks>` configures how often the pointer provenance garbage collector runs.
The default is to search for and remove unreachable provenance once every `10000` basic blocks. Setting
this to `0` disables the garbage collector, which causes some programs to have explosive memory
usage and/or super-linear runtime.
* `-Zmiri-track-alloc-id=<id1>,<id2>,...` shows a backtrace when the given allocations are
being allocated or freed. This helps in debugging memory leaks and
use after free bugs. Specifying this argument multiple times does not overwrite the previous
Expand Down
4 changes: 2 additions & 2 deletions src/bin/miri.rs
Original file line number Diff line number Diff line change
Expand Up @@ -531,10 +531,10 @@ fn main() {
Err(err) => show_error!("-Zmiri-report-progress requires a `u32`: {}", err),
};
miri_config.report_progress = Some(interval);
} else if let Some(param) = arg.strip_prefix("-Zmiri-tag-gc=") {
} else if let Some(param) = arg.strip_prefix("-Zmiri-provenance-gc=") {
let interval = match param.parse::<u32>() {
Ok(i) => i,
Err(err) => show_error!("-Zmiri-tag-gc requires a `u32`: {}", err),
Err(err) => show_error!("-Zmiri-provenance-gc requires a `u32`: {}", err),
};
miri_config.gc_interval = interval;
} else if let Some(param) = arg.strip_prefix("-Zmiri-measureme=") {
Expand Down
22 changes: 13 additions & 9 deletions src/borrow_tracker/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ pub struct FrameState {
protected_tags: SmallVec<[(AllocId, BorTag); 2]>,
}

impl VisitTags for FrameState {
fn visit_tags(&self, _visit: &mut dyn FnMut(BorTag)) {
impl VisitProvenance for FrameState {
fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
// `protected_tags` are already recorded by `GlobalStateInner`.
}
}
Expand Down Expand Up @@ -110,10 +110,10 @@ pub struct GlobalStateInner {
unique_is_unique: bool,
}

impl VisitTags for GlobalStateInner {
fn visit_tags(&self, visit: &mut dyn FnMut(BorTag)) {
impl VisitProvenance for GlobalStateInner {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
for &tag in self.protected_tags.keys() {
visit(tag);
visit(None, Some(tag));
}
// The only other candidate is base_ptr_tags, and that does not need visiting since we don't ever
// GC the bottommost/root tag.
Expand Down Expand Up @@ -236,6 +236,10 @@ impl GlobalStateInner {
tag
})
}

pub fn remove_unreachable_allocs(&mut self, allocs: &LiveAllocs<'_, '_, '_>) {
self.base_ptr_tags.retain(|id, _| allocs.is_live(*id));
}
}

/// Which borrow tracking method to use
Expand Down Expand Up @@ -503,11 +507,11 @@ impl AllocState {
}
}

impl VisitTags for AllocState {
fn visit_tags(&self, visit: &mut dyn FnMut(BorTag)) {
impl VisitProvenance for AllocState {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
match self {
AllocState::StackedBorrows(sb) => sb.visit_tags(visit),
AllocState::TreeBorrows(tb) => tb.visit_tags(visit),
AllocState::StackedBorrows(sb) => sb.visit_provenance(visit),
AllocState::TreeBorrows(tb) => tb.visit_provenance(visit),
}
}
}
6 changes: 3 additions & 3 deletions src/borrow_tracker/stacked_borrows/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -462,10 +462,10 @@ impl Stacks {
}
}

impl VisitTags for Stacks {
fn visit_tags(&self, visit: &mut dyn FnMut(BorTag)) {
impl VisitProvenance for Stacks {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
for tag in self.exposed_tags.iter().copied() {
visit(tag);
visit(None, Some(tag));
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/borrow_tracker/tree_borrows/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ impl<'tcx> Tree {
/// Climb the tree to get the tag of a distant ancestor.
/// Allows operations on tags that are unreachable by the program
/// but still exist in the tree. Not guaranteed to perform consistently
/// if `tag-gc=1`.
/// if `provenance-gc=1`.
fn nth_parent(&self, tag: BorTag, nth_parent: u8) -> Option<BorTag> {
let mut idx = self.tag_mapping.get(&tag).unwrap();
for _ in 0..nth_parent {
Expand Down
6 changes: 3 additions & 3 deletions src/borrow_tracker/tree_borrows/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -742,11 +742,11 @@ impl Tree {
}
}

impl VisitTags for Tree {
fn visit_tags(&self, visit: &mut dyn FnMut(BorTag)) {
impl VisitProvenance for Tree {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
// To ensure that the root never gets removed, we visit it
// (the `root` node of `Tree` is not an `Option<_>`)
visit(self.nodes.get(self.root).unwrap().tag)
visit(None, Some(self.nodes.get(self.root).unwrap().tag))
}
}

Expand Down
10 changes: 5 additions & 5 deletions src/concurrency/data_race.rs
Original file line number Diff line number Diff line change
Expand Up @@ -790,9 +790,9 @@ pub struct VClockAlloc {
alloc_ranges: RefCell<RangeMap<MemoryCellClocks>>,
}

impl VisitTags for VClockAlloc {
fn visit_tags(&self, _visit: &mut dyn FnMut(BorTag)) {
// No tags here.
impl VisitProvenance for VClockAlloc {
fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
// No tags or allocIds here.
}
}

Expand Down Expand Up @@ -1404,8 +1404,8 @@ pub struct GlobalState {
pub track_outdated_loads: bool,
}

impl VisitTags for GlobalState {
fn visit_tags(&self, _visit: &mut dyn FnMut(BorTag)) {
impl VisitProvenance for GlobalState {
fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
// We don't have any tags.
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/concurrency/init_once.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ pub(super) struct InitOnce<'mir, 'tcx> {
data_race: VClock,
}

impl<'mir, 'tcx> VisitTags for InitOnce<'mir, 'tcx> {
fn visit_tags(&self, visit: &mut dyn FnMut(BorTag)) {
impl<'mir, 'tcx> VisitProvenance for InitOnce<'mir, 'tcx> {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
for waiter in self.waiters.iter() {
waiter.callback.visit_tags(visit);
waiter.callback.visit_provenance(visit);
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/concurrency/sync.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,10 @@ pub(crate) struct SynchronizationState<'mir, 'tcx> {
pub(super) init_onces: IndexVec<InitOnceId, InitOnce<'mir, 'tcx>>,
}

impl<'mir, 'tcx> VisitTags for SynchronizationState<'mir, 'tcx> {
fn visit_tags(&self, visit: &mut dyn FnMut(BorTag)) {
impl<'mir, 'tcx> VisitProvenance for SynchronizationState<'mir, 'tcx> {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
for init_once in self.init_onces.iter() {
init_once.visit_tags(visit);
init_once.visit_provenance(visit);
}
}
}
Expand Down
38 changes: 19 additions & 19 deletions src/concurrency/thread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ pub enum TlsAllocAction {
}

/// Trait for callbacks that can be executed when some event happens, such as after a timeout.
pub trait MachineCallback<'mir, 'tcx>: VisitTags {
pub trait MachineCallback<'mir, 'tcx>: VisitProvenance {
fn call(&self, ecx: &mut InterpCx<'mir, 'tcx, MiriMachine<'mir, 'tcx>>) -> InterpResult<'tcx>;
}

Expand Down Expand Up @@ -228,8 +228,8 @@ impl<'mir, 'tcx> Thread<'mir, 'tcx> {
}
}

impl VisitTags for Thread<'_, '_> {
fn visit_tags(&self, visit: &mut dyn FnMut(BorTag)) {
impl VisitProvenance for Thread<'_, '_> {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
let Thread {
panic_payloads: panic_payload,
last_error,
Expand All @@ -242,17 +242,17 @@ impl VisitTags for Thread<'_, '_> {
} = self;

for payload in panic_payload {
payload.visit_tags(visit);
payload.visit_provenance(visit);
}
last_error.visit_tags(visit);
last_error.visit_provenance(visit);
for frame in stack {
frame.visit_tags(visit)
frame.visit_provenance(visit)
}
}
}

impl VisitTags for Frame<'_, '_, Provenance, FrameExtra<'_>> {
fn visit_tags(&self, visit: &mut dyn FnMut(BorTag)) {
impl VisitProvenance for Frame<'_, '_, Provenance, FrameExtra<'_>> {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
let Frame {
return_place,
locals,
Expand All @@ -266,22 +266,22 @@ impl VisitTags for Frame<'_, '_, Provenance, FrameExtra<'_>> {
} = self;

// Return place.
return_place.visit_tags(visit);
return_place.visit_provenance(visit);
// Locals.
for local in locals.iter() {
match local.as_mplace_or_imm() {
None => {}
Some(Either::Left((ptr, meta))) => {
ptr.visit_tags(visit);
meta.visit_tags(visit);
ptr.visit_provenance(visit);
meta.visit_provenance(visit);
}
Some(Either::Right(imm)) => {
imm.visit_tags(visit);
imm.visit_provenance(visit);
}
}
}

extra.visit_tags(visit);
extra.visit_provenance(visit);
}
}

Expand Down Expand Up @@ -341,8 +341,8 @@ pub struct ThreadManager<'mir, 'tcx> {
timeout_callbacks: FxHashMap<ThreadId, TimeoutCallbackInfo<'mir, 'tcx>>,
}

impl VisitTags for ThreadManager<'_, '_> {
fn visit_tags(&self, visit: &mut dyn FnMut(BorTag)) {
impl VisitProvenance for ThreadManager<'_, '_> {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
let ThreadManager {
threads,
thread_local_alloc_ids,
Expand All @@ -353,15 +353,15 @@ impl VisitTags for ThreadManager<'_, '_> {
} = self;

for thread in threads {
thread.visit_tags(visit);
thread.visit_provenance(visit);
}
for ptr in thread_local_alloc_ids.borrow().values() {
ptr.visit_tags(visit);
ptr.visit_provenance(visit);
}
for callback in timeout_callbacks.values() {
callback.callback.visit_tags(visit);
callback.callback.visit_provenance(visit);
}
sync.visit_tags(visit);
sync.visit_provenance(visit);
}
}

Expand Down
6 changes: 3 additions & 3 deletions src/concurrency/weak_memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,15 +108,15 @@ pub struct StoreBufferAlloc {
store_buffers: RefCell<RangeObjectMap<StoreBuffer>>,
}

impl VisitTags for StoreBufferAlloc {
fn visit_tags(&self, visit: &mut dyn FnMut(BorTag)) {
impl VisitProvenance for StoreBufferAlloc {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
let Self { store_buffers } = self;
for val in store_buffers
.borrow()
.iter()
.flat_map(|buf| buf.buffer.iter().map(|element| &element.val))
{
val.visit_tags(visit);
val.visit_provenance(visit);
}
}
}
Expand Down
45 changes: 35 additions & 10 deletions src/intptrcast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,21 @@ pub struct GlobalStateInner {
provenance_mode: ProvenanceMode,
}

impl VisitTags for GlobalStateInner {
fn visit_tags(&self, _visit: &mut dyn FnMut(BorTag)) {
// Nothing to visit here.
impl VisitProvenance for GlobalStateInner {
fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
let GlobalStateInner {
int_to_ptr_map: _,
base_addr: _,
exposed: _,
next_base_addr: _,
provenance_mode: _,
} = self;
// Though base_addr, int_to_ptr_map, and exposed contain AllocIds, we do not want to visit them.
// int_to_ptr_map and exposed must contain only live allocations, and those
// are never garbage collected.
// base_addr is only relevant if we have a pointer to an AllocId and need to look up its
// base address; so if an AllocId is not reachable from somewhere else we can remove it
// here.
}
}

Expand All @@ -62,6 +74,12 @@ impl GlobalStateInner {
provenance_mode: config.provenance_mode,
}
}

pub fn remove_unreachable_allocs(&mut self, allocs: &LiveAllocs<'_, '_, '_>) {
// `exposed` and `int_to_ptr_map` are cleared immediately when an allocation
// is freed, so `base_addr` is the only one we have to clean up based on the GC.
self.base_addr.retain(|id, _| allocs.is_live(*id));
}
}

/// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple
Expand Down Expand Up @@ -107,7 +125,7 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
// We only use this provenance if it has been exposed.
if global_state.exposed.contains(&alloc_id) {
// This must still be live, since we remove allocations from `int_to_ptr_map` when they get freed.
debug_assert!(!matches!(ecx.get_alloc_info(alloc_id).2, AllocKind::Dead));
debug_assert!(ecx.is_alloc_live(alloc_id));
Some(alloc_id)
} else {
None
Expand Down Expand Up @@ -181,12 +199,19 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
let ecx = self.eval_context_mut();
let global_state = ecx.machine.intptrcast.get_mut();
// In strict mode, we don't need this, so we can save some cycles by not tracking it.
if global_state.provenance_mode != ProvenanceMode::Strict {
trace!("Exposing allocation id {alloc_id:?}");
global_state.exposed.insert(alloc_id);
if ecx.machine.borrow_tracker.is_some() {
ecx.expose_tag(alloc_id, tag)?;
}
if global_state.provenance_mode == ProvenanceMode::Strict {
return Ok(());
}
// Exposing a dead alloc is a no-op, because it's not possible to get a dead allocation
// via int2ptr.
if !ecx.is_alloc_live(alloc_id) {
return Ok(());
}
trace!("Exposing allocation id {alloc_id:?}");
let global_state = ecx.machine.intptrcast.get_mut();
global_state.exposed.insert(alloc_id);
if ecx.machine.borrow_tracker.is_some() {
ecx.expose_tag(alloc_id, tag)?;
}
Ok(())
}
Expand Down
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ mod intptrcast;
mod machine;
mod mono_hash_map;
mod operator;
mod provenance_gc;
mod range_map;
mod shims;
mod tag_gc;

// Establish a "crate-wide prelude": we often import `crate::*`.

Expand Down Expand Up @@ -125,8 +125,8 @@ pub use crate::machine::{
};
pub use crate::mono_hash_map::MonoHashMap;
pub use crate::operator::EvalContextExt as _;
pub use crate::provenance_gc::{EvalContextExt as _, VisitProvenance, VisitWith, LiveAllocs};
pub use crate::range_map::RangeMap;
pub use crate::tag_gc::{EvalContextExt as _, VisitTags};

/// Insert rustc arguments at the beginning of the argument list that Miri wants to be
/// set per default, for maximal validation power.
Expand Down
Loading

0 comments on commit 0027b12

Please sign in to comment.