From 80c42c427d828f6ccf004169d8dd018dc5b4d0f0 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Mon, 22 Jul 2024 15:18:10 -0700 Subject: [PATCH] A few follow ups to #8978 and user stack maps and safepoints (#8991) * Move safepoints and stack maps code to its own module This is purely mechanical code motion. * Shorten log messages' prefix These are all inside a "safepoint" module, and `env_logger` prints the module before the log message, so the old "stack map" part of the log messages' prefix is a bit redundant now. * Use a typed enum for stack slot sizes Rather than an open-coded `u32`. Addresses https://github.com/bytecodealliance/wasmtime/pull/8978#discussion_r1684809610 * Update some comments These comments were referencing things that we did in the old implementation of stack maps and safepoints, and have been updated for the new approach. --- cranelift/frontend/src/frontend.rs | 324 +------------- cranelift/frontend/src/frontend/safepoints.rs | 400 ++++++++++++++++++ 2 files changed, 402 insertions(+), 322 deletions(-) create mode 100644 cranelift/frontend/src/frontend/safepoints.rs diff --git a/cranelift/frontend/src/frontend.rs b/cranelift/frontend/src/frontend.rs index ed77944d8c06..b0e9ae857e4f 100644 --- a/cranelift/frontend/src/frontend.rs +++ b/cranelift/frontend/src/frontend.rs @@ -20,6 +20,8 @@ use cranelift_codegen::packed_option::PackedOption; use cranelift_codegen::traversals::Dfs; use smallvec::SmallVec; +mod safepoints; + /// Structure used for translating a series of functions into Cranelift IR. /// /// In order to reduce memory reallocations when compiling multiple functions, @@ -690,328 +692,6 @@ impl<'a> FunctionBuilder<'a> { } } - /// Insert spills for every value that needs to be in a stack map at every - /// safepoint. - /// - /// First, we do a very simple, imprecise, and overapproximating liveness - /// analysis. This considers any use (regardless if that use produces side - /// effects or flows into another instruction that produces side effects!) - /// of a needs-stack-map value to keep the value live. This allows us to do - /// this liveness analysis in a single post-order traversal of the IR, - /// without any fixed-point loop. The result of this analysis is the set of - /// live needs-stack-map values at each instruction that must be a safepoint - /// (currently this is just non-tail calls). - /// - /// Second, take those results, add stack slots so we have a place to spill - /// to. - /// - /// Third, and finally, we spill the live needs-stack-map values at each - /// safepoint into those stack slots. - fn insert_safepoint_spills(&mut self) { - log::trace!( - "before inserting safepoint spills and reloads:\n{}", - self.func.display() - ); - - // Find all the GC values that are live across each safepoint and add - // stack map entries for them. - let stack_slots = self.find_live_stack_map_values_at_each_safepoint(); - - // Insert spills to our new stack slots before each safepoint - // instruction. - self.insert_safepoint_spills_and_reloads(&stack_slots); - - log::trace!( - "after inserting safepoint spills and reloads:\n{}", - self.func.display() - ); - } - - /// Find the live GC references for each safepoint instruction in this - /// function. - /// - /// Returns a map from each safepoint instruction to the set of GC - /// references that are live across it - fn find_live_stack_map_values_at_each_safepoint( - &mut self, - ) -> crate::HashMap { - // A mapping from each needs-stack-map value that is live across some - // safepoint to the stack slot that it resides within. Note that if a - // needs-stack-map value is never live across a safepoint, then we won't - // ever add it to this map, it can remain in a virtual register for the - // duration of its lifetime, and we won't replace all its uses with - // reloads and all that stuff. - let mut stack_slots: crate::HashMap = Default::default(); - - // A map from size/align to free stack slots that are not being used - // anymore. This allows us to reuse stack slots across multiple values - // helps cut down on the ultimate size of our stack frames. - let mut free_stack_slots: crate::HashMap> = - Default::default(); - - // The set of needs-stack-maps values that are currently live in our - // traversal. - // - // NB: use a `BTreeSet` so that iteration is deterministic, as we will - // insert spills an order derived from this collection's iteration - // order. - let mut live = BTreeSet::new(); - - // Do our single-pass liveness analysis. - // - // Use a post-order traversal, traversing the IR backwards from uses to - // defs, because liveness is a backwards analysis. - // - // 1. The definition of a value removes it from our `live` set. Values - // are not live before they are defined. - // - // 2. When we see any instruction that is a safepoint (aka non-tail - // calls) we record the current live set of needs-stack-map values. - // - // We ignore tail calls because this caller and its frame won't exist - // by the time the callee is executing and potentially triggers a GC; - // nothing is live in the function after it exits! - // - // Note that this step should actually happen *before* adding uses to - // the `live` set below, in order to avoid holding GC objects alive - // longer than necessary, because arguments to the call that are not - // live afterwards should need not be prevented from reclamation by - // the GC for us, and therefore need not appear in this stack map. It - // is the callee's responsibility to record such arguments in its - // stack maps if it keeps them alive across some call that might - // trigger GC. - // - // 3. Any use of a needs-stack-map value adds it to our `live` set. - // - // Note: we do not flow liveness from block parameters back to branch - // arguments, and instead always consider branch arguments live. That - // additional precision would require a fixed-point loop in the - // presence of back edges. - // - // Furthermore, we do not differentiate between uses of a - // needs-stack-map value that ultimately flow into a side-effecting - // operation versus uses that themselves are not live. This could be - // tightened up in the future, but we're starting with the easiest, - // simplest thing. Besides, none of our mid-end optimization passes - // have run at this point in time yet, so there probably isn't much, - // if any, dead code. - - // Helper for (1) - let process_def = |func: &Function, - stack_slots: &crate::HashMap<_, _>, - free_stack_slots: &mut crate::HashMap>, - live: &mut BTreeSet, - val: ir::Value| { - log::trace!("stack map liveness: defining {val:?}, removing it from the live set"); - live.remove(&val); - - // This value's stack slot, if any, is now available for reuse. - if let Some(slot) = stack_slots.get(&val) { - log::trace!("stack map liveness: returning {slot:?} to the free list"); - let ty = func.dfg.value_type(val); - free_stack_slots.entry(ty.bytes()).or_default().push(*slot); - } - }; - - // Helper for (2) - let process_safepoint = |func: &mut Function, - stack_slots: &mut crate::HashMap, - free_stack_slots: &mut crate::HashMap>, - live: &BTreeSet<_>, - inst: Inst| { - log::trace!( - "stack map liveness: found safepoint: {inst:?}: {}", - func.dfg.display_inst(inst) - ); - log::trace!("stack map liveness: live set = {live:?}"); - - for val in live { - let ty = func.dfg.value_type(*val); - let slot = *stack_slots.entry(*val).or_insert_with(|| { - log::trace!("stack map liveness: {val:?} needs a stack slot"); - let size = func.dfg.value_type(*val).bytes(); - free_stack_slots - .get_mut(&size) - .and_then(|list| list.pop().inspect(|slot| { - log::trace!( - "stack map liveness: reusing free stack slot {slot:?} for {val:?}" - ) - })) - .unwrap_or_else(|| { - debug_assert!(size.is_power_of_two()); - let log2_size = size.ilog2(); - let slot = func.create_sized_stack_slot(ir::StackSlotData::new( - ir::StackSlotKind::ExplicitSlot, - size, - log2_size.try_into().unwrap(), - )); - log::trace!( - "stack map liveness: created new stack slot {slot:?} for {val:?}" - ); - slot - }) - }); - func.dfg.append_user_stack_map_entry( - inst, - ir::UserStackMapEntry { - ty, - slot, - offset: 0, - }, - ); - } - }; - - // Helper for (3) - let process_use = |func: &Function, live: &mut BTreeSet<_>, inst: Inst, val: Value| { - if live.insert(val) { - log::trace!( - "stack map liveness: found use of {val:?}, marking it live: {inst:?}: {}", - func.dfg.display_inst(inst) - ); - } - }; - - for block in self - .func_ctx - .dfs - .post_order_iter(&self.func) - // We have to `collect` here to release the borrow on `self.func` so - // we can add the stack map entries below. - .collect::>() - { - log::trace!("stack map liveness: traversing {block:?}"); - let mut option_inst = self.func.layout.last_inst(block); - while let Some(inst) = option_inst { - // (1) Remove values defined by this instruction from the `live` - // set. - for val in self.func.dfg.inst_results(inst) { - process_def( - &self.func, - &stack_slots, - &mut free_stack_slots, - &mut live, - *val, - ); - } - - // (2) If this instruction is a safepoint, then we need to add - // stack map entries to record the values in `live`. - let opcode = self.func.dfg.insts[inst].opcode(); - if opcode.is_call() && !opcode.is_return() { - process_safepoint( - &mut self.func, - &mut stack_slots, - &mut free_stack_slots, - &live, - inst, - ); - } - - // (3) Add all needs-stack-map values that are operands to this - // instruction to the live set. This includes branch arguments, - // as mentioned above. - for val in self.func.dfg.inst_values(inst) { - let val = self.func.dfg.resolve_aliases(val); - if self.func_ctx.stack_map_values.contains(val) { - process_use(&self.func, &mut live, inst, val); - } - } - - option_inst = self.func.layout.prev_inst(inst); - } - - // After we've processed this block's instructions, remove its - // parameters from the live set. This is part of step (1). - for val in self.func.dfg.block_params(block) { - process_def( - &self.func, - &stack_slots, - &mut free_stack_slots, - &mut live, - *val, - ); - } - } - - stack_slots - } - - /// This function does a forwards pass over the IR and does two things: - /// - /// 1. Insert spills to a needs-stack-map value's associated stack slot just - /// after its definition. - /// - /// 2. Replace all uses of the needs-stack-map value with loads from that - /// stack slot. This will introduce many redundant loads, but the alias - /// analysis pass in the mid-end should clean most of these up when not - /// actually needed. - fn insert_safepoint_spills_and_reloads( - &mut self, - stack_slots: &crate::HashMap, - ) { - let mut pos = FuncCursor::new(self.func); - let mut vals: SmallVec<[_; 8]> = Default::default(); - - while let Some(block) = pos.next_block() { - // Spill needs-stack-map values defined by block parameters to their - // associated stack slot. - vals.extend_from_slice(pos.func.dfg.block_params(block)); - pos.next_inst(); - let mut spilled_any = false; - for val in vals.drain(..) { - if let Some(slot) = stack_slots.get(&val) { - pos.ins().stack_store(val, *slot, 0); - spilled_any = true; - } - } - - // The cursor needs to point just *before* the first original - // instruction in the block that we didn't introduce just above, so - // that when we loop over `next_inst()` below we are processing the - // block's original instructions. If we inserted spills, then it - // already does point there. If we did not insert spills, then it is - // currently pointing at the first original instruction, but that - // means that the upcoming `pos.next_inst()` call will skip over the - // first original instruction, so in this case we need to back up - // the cursor. - if !spilled_any { - pos = pos.at_position(CursorPosition::Before(block)); - } - - while let Some(mut inst) = pos.next_inst() { - // Replace all uses of needs-stack-map values with loads from - // the value's associated stack slot. - vals.extend(pos.func.dfg.inst_values(inst)); - let mut replaced_any = false; - for val in &mut vals { - if let Some(slot) = stack_slots.get(val) { - replaced_any = true; - let ty = pos.func.dfg.value_type(*val); - *val = pos.ins().stack_load(ty, *slot, 0); - } - } - if replaced_any { - pos.func.dfg.overwrite_inst_values(inst, vals.drain(..)); - } else { - vals.clear(); - } - - // If this instruction defines a needs-stack-map value, then - // spill it to its stack slot. - pos = pos.after_inst(inst); - vals.extend_from_slice(pos.func.dfg.inst_results(inst)); - for val in vals.drain(..) { - if let Some(slot) = stack_slots.get(&val) { - inst = pos.ins().stack_store(val, *slot, 0); - } - } - - pos = pos.at_inst(inst); - } - } - } - /// Declare that translation of the current function is complete. /// /// This resets the state of the [`FunctionBuilderContext`] in preparation to diff --git a/cranelift/frontend/src/frontend/safepoints.rs b/cranelift/frontend/src/frontend/safepoints.rs new file mode 100644 index 000000000000..21ebb606faf2 --- /dev/null +++ b/cranelift/frontend/src/frontend/safepoints.rs @@ -0,0 +1,400 @@ +//! Safepoints and stack maps. + +use core::ops::{Index, IndexMut}; + +use super::*; + +#[derive(Clone, Copy)] +#[repr(u8)] +enum SlotSize { + Size8 = 0, + Size16 = 1, + Size32 = 2, + Size64 = 3, + Size128 = 4, + // If adding support for more slot sizes, update `SLOT_SIZE_LEN` below. +} +const SLOT_SIZE_LEN: usize = 5; + +impl TryFrom for SlotSize { + type Error = &'static str; + + fn try_from(ty: ir::Type) -> Result { + Self::new(ty.bytes()).ok_or("type is not supported in stack maps") + } +} + +impl SlotSize { + fn new(bytes: u32) -> Option { + match bytes { + 1 => Some(Self::Size8), + 2 => Some(Self::Size16), + 4 => Some(Self::Size32), + 8 => Some(Self::Size64), + 16 => Some(Self::Size128), + _ => None, + } + } + + fn unwrap_new(bytes: u32) -> Self { + Self::new(bytes).unwrap_or_else(|| panic!("cannot create a `SlotSize` for {bytes} bytes")) + } +} + +/// A map from every `SlotSize` to a `T`. +struct SlotSizeMap([T; SLOT_SIZE_LEN]); + +impl Index for SlotSizeMap { + type Output = T; + fn index(&self, index: SlotSize) -> &Self::Output { + self.get(index) + } +} + +impl IndexMut for SlotSizeMap { + fn index_mut(&mut self, index: SlotSize) -> &mut Self::Output { + self.get_mut(index) + } +} + +impl SlotSizeMap { + fn new() -> Self + where + T: Default, + { + Self([ + T::default(), + T::default(), + T::default(), + T::default(), + T::default(), + ]) + } + + fn get(&self, size: SlotSize) -> &T { + let index = size as u8 as usize; + &self.0[index] + } + + fn get_mut(&mut self, size: SlotSize) -> &mut T { + let index = size as u8 as usize; + &mut self.0[index] + } +} + +impl FunctionBuilder<'_> { + /// Insert spills for every value that needs to be in a stack map at every + /// safepoint. + /// + /// We begin with a very simple, imprecise, and overapproximating liveness + /// analysis. This considers any use (regardless if that use produces side + /// effects or flows into another instruction that produces side effects!) + /// of a needs-stack-map value to keep the value live. This allows us to do + /// this liveness analysis in a single post-order traversal of the IR, + /// without any fixed-point loop. The result of this analysis is the mapping + /// from each needs-stack-map value that is live across a safepoint to its + /// associated stack slot. + /// + /// Finally, we spill each of the needs-stack-map values that are live + /// across a safepoint to their associated stack slot upon definition, and + /// insert reloads from that stack slot at each use of the value. + pub(super) fn insert_safepoint_spills(&mut self) { + log::trace!( + "before inserting safepoint spills and reloads:\n{}", + self.func.display() + ); + + let stack_slots = self.find_live_stack_map_values_at_each_safepoint(); + self.insert_safepoint_spills_and_reloads(&stack_slots); + + log::trace!( + "after inserting safepoint spills and reloads:\n{}", + self.func.display() + ); + } + + /// Find the live GC references for each safepoint instruction in this + /// function. + /// + /// Returns a map from each safepoint instruction to the set of GC + /// references that are live across it + fn find_live_stack_map_values_at_each_safepoint( + &mut self, + ) -> crate::HashMap { + // A mapping from each needs-stack-map value that is live across some + // safepoint to the stack slot that it resides within. Note that if a + // needs-stack-map value is never live across a safepoint, then we won't + // ever add it to this map, it can remain in a virtual register for the + // duration of its lifetime, and we won't replace all its uses with + // reloads and all that stuff. + let mut stack_slots: crate::HashMap = Default::default(); + + // A map from slot size to free stack slots that are not being used + // anymore. This allows us to reuse stack slots across multiple values + // helps cut down on the ultimate size of our stack frames. + let mut free_stack_slots = SlotSizeMap::>::new(); + + // The set of needs-stack-maps values that are currently live in our + // traversal. + // + // NB: use a `BTreeSet` so that iteration is deterministic, as we will + // insert spills an order derived from this collection's iteration + // order. + let mut live = BTreeSet::new(); + + // Do our single-pass liveness analysis. + // + // Use a post-order traversal, traversing the IR backwards from uses to + // defs, because liveness is a backwards analysis. + // + // 1. The definition of a value removes it from our `live` set. Values + // are not live before they are defined. + // + // 2. When we see any instruction that is a safepoint (aka non-tail + // calls) we record the current live set of needs-stack-map values. + // + // We ignore tail calls because this caller and its frame won't exist + // by the time the callee is executing and potentially triggers a GC; + // nothing is live in the function after it exits! + // + // Note that this step should actually happen *before* adding uses to + // the `live` set below, in order to avoid holding GC objects alive + // longer than necessary, because arguments to the call that are not + // live afterwards should need not be prevented from reclamation by + // the GC for us, and therefore need not appear in this stack map. It + // is the callee's responsibility to record such arguments in its + // stack maps if it keeps them alive across some call that might + // trigger GC. + // + // 3. Any use of a needs-stack-map value adds it to our `live` set. + // + // Note: we do not flow liveness from block parameters back to branch + // arguments, and instead always consider branch arguments live. That + // additional precision would require a fixed-point loop in the + // presence of back edges. + // + // Furthermore, we do not differentiate between uses of a + // needs-stack-map value that ultimately flow into a side-effecting + // operation versus uses that themselves are not live. This could be + // tightened up in the future, but we're starting with the easiest, + // simplest thing. Besides, none of our mid-end optimization passes + // have run at this point in time yet, so there probably isn't much, + // if any, dead code. + + // Helper for (1) + let process_def = |func: &Function, + stack_slots: &crate::HashMap<_, _>, + free_stack_slots: &mut SlotSizeMap>, + live: &mut BTreeSet, + val: ir::Value| { + log::trace!("liveness: defining {val:?}, removing it from the live set"); + live.remove(&val); + + // This value's stack slot, if any, is now available for reuse. + if let Some(slot) = stack_slots.get(&val) { + log::trace!("liveness: returning {slot:?} to the free list"); + let ty = func.dfg.value_type(val); + free_stack_slots[SlotSize::try_from(ty).unwrap()].push(*slot); + } + }; + + // Helper for (2) + let process_safepoint = |func: &mut Function, + stack_slots: &mut crate::HashMap, + free_stack_slots: &mut SlotSizeMap>, + live: &BTreeSet<_>, + inst: Inst| { + log::trace!( + "liveness: found safepoint: {inst:?}: {}", + func.dfg.display_inst(inst) + ); + log::trace!("liveness: live set = {live:?}"); + + for val in live { + let ty = func.dfg.value_type(*val); + let slot = *stack_slots.entry(*val).or_insert_with(|| { + log::trace!("liveness: {val:?} needs a stack slot"); + let size = func.dfg.value_type(*val).bytes(); + match free_stack_slots[SlotSize::unwrap_new(size)].pop() { + Some(slot) => { + log::trace!( + "liveness: reusing free stack slot {slot:?} for {val:?}" + ); + slot + } + None => { + debug_assert!(size.is_power_of_two()); + let log2_size = size.ilog2(); + let slot = func.create_sized_stack_slot(ir::StackSlotData::new( + ir::StackSlotKind::ExplicitSlot, + size, + log2_size.try_into().unwrap(), + )); + log::trace!( + "liveness: created new stack slot {slot:?} for {val:?}" + ); + slot + } + } + }); + func.dfg.append_user_stack_map_entry( + inst, + ir::UserStackMapEntry { + ty, + slot, + offset: 0, + }, + ); + } + }; + + // Helper for (3) + let process_use = |func: &Function, live: &mut BTreeSet<_>, inst: Inst, val: Value| { + if live.insert(val) { + log::trace!( + "liveness: found use of {val:?}, marking it live: {inst:?}: {}", + func.dfg.display_inst(inst) + ); + } + }; + + for block in self + .func_ctx + .dfs + .post_order_iter(&self.func) + // We have to `collect` here to release the borrow on `self.func` so + // we can add the stack map entries below. + .collect::>() + { + log::trace!("liveness: traversing {block:?}"); + let mut option_inst = self.func.layout.last_inst(block); + while let Some(inst) = option_inst { + // (1) Remove values defined by this instruction from the `live` + // set. + for val in self.func.dfg.inst_results(inst) { + process_def( + &self.func, + &stack_slots, + &mut free_stack_slots, + &mut live, + *val, + ); + } + + // (2) If this instruction is a safepoint, then we need to add + // stack map entries to record the values in `live`. + let opcode = self.func.dfg.insts[inst].opcode(); + if opcode.is_call() && !opcode.is_return() { + process_safepoint( + &mut self.func, + &mut stack_slots, + &mut free_stack_slots, + &live, + inst, + ); + } + + // (3) Add all needs-stack-map values that are operands to this + // instruction to the live set. This includes branch arguments, + // as mentioned above. + for val in self.func.dfg.inst_values(inst) { + let val = self.func.dfg.resolve_aliases(val); + if self.func_ctx.stack_map_values.contains(val) { + process_use(&self.func, &mut live, inst, val); + } + } + + option_inst = self.func.layout.prev_inst(inst); + } + + // After we've processed this block's instructions, remove its + // parameters from the live set. This is part of step (1). + for val in self.func.dfg.block_params(block) { + process_def( + &self.func, + &stack_slots, + &mut free_stack_slots, + &mut live, + *val, + ); + } + } + + stack_slots + } + + /// This function does a forwards pass over the IR and does two things: + /// + /// 1. Insert spills to a needs-stack-map value's associated stack slot just + /// after its definition. + /// + /// 2. Replace all uses of the needs-stack-map value with loads from that + /// stack slot. This will introduce many redundant loads, but the alias + /// analysis pass in the mid-end should clean most of these up when not + /// actually needed. + fn insert_safepoint_spills_and_reloads( + &mut self, + stack_slots: &crate::HashMap, + ) { + let mut pos = FuncCursor::new(self.func); + let mut vals: SmallVec<[_; 8]> = Default::default(); + + while let Some(block) = pos.next_block() { + // Spill needs-stack-map values defined by block parameters to their + // associated stack slot. + vals.extend_from_slice(pos.func.dfg.block_params(block)); + pos.next_inst(); + let mut spilled_any = false; + for val in vals.drain(..) { + if let Some(slot) = stack_slots.get(&val) { + pos.ins().stack_store(val, *slot, 0); + spilled_any = true; + } + } + + // The cursor needs to point just *before* the first original + // instruction in the block that we didn't introduce just above, so + // that when we loop over `next_inst()` below we are processing the + // block's original instructions. If we inserted spills, then it + // already does point there. If we did not insert spills, then it is + // currently pointing at the first original instruction, but that + // means that the upcoming `pos.next_inst()` call will skip over the + // first original instruction, so in this case we need to back up + // the cursor. + if !spilled_any { + pos = pos.at_position(CursorPosition::Before(block)); + } + + while let Some(mut inst) = pos.next_inst() { + // Replace all uses of needs-stack-map values with loads from + // the value's associated stack slot. + vals.extend(pos.func.dfg.inst_values(inst)); + let mut replaced_any = false; + for val in &mut vals { + if let Some(slot) = stack_slots.get(val) { + replaced_any = true; + let ty = pos.func.dfg.value_type(*val); + *val = pos.ins().stack_load(ty, *slot, 0); + } + } + if replaced_any { + pos.func.dfg.overwrite_inst_values(inst, vals.drain(..)); + } else { + vals.clear(); + } + + // If this instruction defines a needs-stack-map value, then + // spill it to its stack slot. + pos = pos.after_inst(inst); + vals.extend_from_slice(pos.func.dfg.inst_results(inst)); + for val in vals.drain(..) { + if let Some(slot) = stack_slots.get(&val) { + inst = pos.ins().stack_store(val, *slot, 0); + } + } + + pos = pos.at_inst(inst); + } + } + } +}