Skip to content

Commit

Permalink
Cranelift: Make heap_addr return calculated base + index + offset
Browse files Browse the repository at this point in the history
Rather than return just the `base + index`.

(Note: I've chosen to use the nomenclature "index" for the dynamic operand and
"offset" for the static immediate.)

This move the addition of the `offset` into `heap_addr`, instead of leaving it
for the subsequent memory operation, so that we can Spectre-guard the full
address, and not allow speculative execution to read the first 4GiB of memory.

Before this commit, we were effectively doing

    load(spectre_guard(base + index) + offset)

Now we are effectively doing

    load(spectre_guard(base + index + offset))

Finally, this also corrects `heap_addr`'s documented semantics to say that it
returns an address that will trap on access if `index + offset + access_size` is
out of bounds for the given heap, rather than saying that the `heap_addr` itself
will trap. This matches the implemented behavior for static memories, and after
#5190 lands (which is blocked
on this commit) will also match the implemented behavior for dynamic memories.
  • Loading branch information
fitzgen committed Nov 8, 2022
1 parent d59caf3 commit 4542a8f
Show file tree
Hide file tree
Showing 38 changed files with 546 additions and 277 deletions.
9 changes: 9 additions & 0 deletions cranelift/codegen/meta/src/cdsl/formats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,15 @@ impl InstructionFormatBuilder {
self
}

pub fn imm_with_name(mut self, name: &'static str, operand_kind: &OperandKind) -> Self {
let field = FormatField {
kind: operand_kind.clone(),
member: name,
};
self.0.imm_fields.push(field);
self
}

pub fn typevar_operand(mut self, operand_index: usize) -> Self {
assert!(self.0.typevar_operand.is_none());
assert!(operand_index < self.0.num_value_operands);
Expand Down
3 changes: 2 additions & 1 deletion cranelift/codegen/meta/src/shared/formats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,8 @@ impl Formats {
heap_addr: Builder::new("HeapAddr")
.imm(&entities.heap)
.value()
.imm(&imm.uimm32)
.imm_with_name("offset", &imm.uimm32)
.imm_with_name("size", &imm.uimm8)
.build(),

// Accessing a WebAssembly table.
Expand Down
16 changes: 10 additions & 6 deletions cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1129,25 +1129,29 @@ pub(crate) fn define(

let H = &Operand::new("H", &entities.heap);
let p = &Operand::new("p", HeapOffset);
let Size = &Operand::new("Size", &imm.uimm32).with_doc("Size in bytes");
let Offset = &Operand::new("Offset", &imm.uimm32).with_doc("Offset in bytes");
let Size = &Operand::new("Size", &imm.uimm8).with_doc("Size in bytes");

ig.push(
Inst::new(
"heap_addr",
r#"
Bounds check and compute absolute address of heap memory.
Bounds check and compute absolute address of ``p + Offset`` in heap memory.
Verify that the offset range ``p .. p + Size - 1`` is in bounds for the
Verify that the offset range ``p .. p + Offset + Size - 1`` is in bounds for the
heap H, and generate an absolute address that is safe to dereference.
1. If ``p + Size`` is not greater than the heap bound, return an
1. If ``p + Offset + Size`` is not greater than the heap bound, return an
absolute address corresponding to a byte offset of ``p`` from the
heap's base address.
2. If ``p + Size`` is greater than the heap bound, generate a trap.
2. If ``p + Offset + Size`` is greater than the heap bound, return the
``NULL`` address or another address that is guaranteed to generate a trap
when accessed.
"#,
&formats.heap_addr,
)
.operands_in(vec![H, p, Size])
.operands_in(vec![H, p, Offset, Size])
.operands_out(vec![addr]),
);

Expand Down
142 changes: 84 additions & 58 deletions cranelift/codegen/src/legalizer/heap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
use crate::cursor::{Cursor, FuncCursor};
use crate::flowgraph::ControlFlowGraph;
use crate::ir::condcodes::IntCC;
use crate::ir::immediates::Uimm32;
use crate::ir::immediates::{Uimm32, Uimm8};
use crate::ir::{self, InstBuilder, RelSourceLoc};
use crate::isa::TargetIsa;

Expand All @@ -17,25 +17,28 @@ pub fn expand_heap_addr(
cfg: &mut ControlFlowGraph,
isa: &dyn TargetIsa,
heap: ir::Heap,
offset: ir::Value,
access_size: Uimm32,
index_operand: ir::Value,
offset_immediate: Uimm32,
access_size: Uimm8,
) {
match func.heaps[heap].style {
ir::HeapStyle::Dynamic { bound_gv } => dynamic_addr(
isa,
inst,
heap,
offset,
u64::from(access_size),
index_operand,
u32::from(offset_immediate),
u8::from(access_size),
bound_gv,
func,
),
ir::HeapStyle::Static { bound } => static_addr(
isa,
inst,
heap,
offset,
u64::from(access_size),
index_operand,
u32::from(offset_immediate),
u8::from(access_size),
bound.into(),
func,
cfg,
Expand All @@ -48,35 +51,40 @@ fn dynamic_addr(
isa: &dyn TargetIsa,
inst: ir::Inst,
heap: ir::Heap,
offset: ir::Value,
access_size: u64,
index: ir::Value,
offset: u32,
access_size: u8,
bound_gv: ir::GlobalValue,
func: &mut ir::Function,
) {
let offset_ty = func.dfg.value_type(offset);
let index_ty = func.dfg.value_type(index);
let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
let min_size = func.heaps[heap].min_size.into();
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);

let offset = cast_offset_to_pointer_ty(offset, offset_ty, addr_ty, &mut pos);
let index = cast_index_to_pointer_ty(index, index_ty, addr_ty, &mut pos);

// Start with the bounds check. Trap if `offset + access_size > bound`.
// Start with the bounds check. Trap if `index + offset + access_size > bound`.
let bound = pos.ins().global_value(addr_ty, bound_gv);
let (cc, lhs, bound) = if access_size == 1 {
// `offset > bound - 1` is the same as `offset >= bound`.
(IntCC::UnsignedGreaterThanOrEqual, offset, bound)
} else if access_size <= min_size {
// We know that bound >= min_size, so here we can compare `offset > bound - access_size`
// without wrapping.
let adj_bound = pos.ins().iadd_imm(bound, -(access_size as i64));
(IntCC::UnsignedGreaterThan, offset, adj_bound)
let (cc, lhs, bound) = if offset == 0 && access_size == 1 {
// `index > bound - 1` is the same as `index >= bound`.
(IntCC::UnsignedGreaterThanOrEqual, index, bound)
} else if offset as u64 + access_size as u64 <= min_size {
// We know that `bound >= min_size`, so here we can compare `offset >
// bound - (offset + access_size)` without wrapping.
let adj_bound = pos
.ins()
.iadd_imm(bound, -(offset as i64 + access_size as i64));
(IntCC::UnsignedGreaterThan, index, adj_bound)
} else {
// We need an overflow check for the adjusted offset.
let access_size_val = pos.ins().iconst(addr_ty, access_size as i64);
let access_size_val = pos
.ins()
.iconst(addr_ty, offset as i64 + access_size as i64);
let adj_offset =
pos.ins()
.uadd_overflow_trap(offset, access_size_val, ir::TrapCode::HeapOutOfBounds);
.uadd_overflow_trap(index, access_size_val, ir::TrapCode::HeapOutOfBounds);
(IntCC::UnsignedGreaterThan, adj_offset, bound)
};
let oob = pos.ins().icmp(cc, lhs, bound);
Expand All @@ -93,6 +101,7 @@ fn dynamic_addr(
inst,
heap,
addr_ty,
index,
offset,
pos.func,
spectre_oob_comparison,
Expand All @@ -104,26 +113,27 @@ fn static_addr(
isa: &dyn TargetIsa,
inst: ir::Inst,
heap: ir::Heap,
mut offset: ir::Value,
access_size: u64,
index: ir::Value,
offset: u32,
access_size: u8,
bound: u64,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
) {
let offset_ty = func.dfg.value_type(offset);
let index_ty = func.dfg.value_type(index);
let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);

// The goal here is to trap if `offset + access_size > bound`.
// The goal here is to trap if `index + offset + access_size > bound`.
//
// This first case is a trivial case where we can easily trap.
if access_size > bound {
// This first case is a trivial case where we can statically trap.
if offset as u64 + access_size as u64 > bound {
// This will simply always trap since `offset >= 0`.
pos.ins().trap(ir::TrapCode::HeapOutOfBounds);
pos.func.dfg.replace(inst).iconst(addr_ty, 0);

// Split Block, as the trap is a terminator instruction.
// Split the block, as the trap is a terminator instruction.
let curr_block = pos.current_block().expect("Cursor is not in a block");
let new_block = pos.func.dfg.make_block();
pos.insert_block(new_block);
Expand All @@ -132,29 +142,29 @@ fn static_addr(
return;
}

// After the trivial case is done we're now mostly interested in trapping
// if `offset > bound - access_size`. We know `bound - access_size` here is
// non-negative from the above comparison.
// After the trivial case is done we're now mostly interested in trapping if
// `index > bound - offset - access_size`. We know `bound - offset -
// access_size` here is non-negative from the above comparison.
//
// If we can know `bound - access_size >= 4GB` then with a 32-bit offset
// we're guaranteed:
// If we can know `bound - offset - access_size >= 4GB` then with a 32-bit
// offset we're guaranteed:
//
// bound - access_size >= 4GB > offset
// bound - offset - access_size >= 4GB > index
//
// or, in other words, `offset < bound - access_size`, meaning we can't trap
// for any value of `offset`.
// or, in other words, `index < bound - offset - access_size`, meaning we
// can't trap for any value of `index`.
//
// With that we have an optimization here where with 32-bit offsets and
// `bound - access_size >= 4GB` we can omit a bounds check.
let limit = bound - access_size;
let limit = bound - offset as u64 - access_size as u64;
let mut spectre_oob_comparison = None;
offset = cast_offset_to_pointer_ty(offset, offset_ty, addr_ty, &mut pos);
if offset_ty != ir::types::I32 || limit < 0xffff_ffff {
// Here we want to test the condition `offset > limit` and if that's
let index = cast_index_to_pointer_ty(index, index_ty, addr_ty, &mut pos);
if index_ty != ir::types::I32 || limit < 0xffff_ffff {
// Here we want to test the condition `index > limit` and if that's
// true then this is an out-of-bounds access and needs to trap. For ARM
// and other RISC architectures it's easier to test against an immediate
// that's even instead of odd, so if `limit` is odd then we instead test
// for `offset >= limit + 1`.
// for `index >= limit + 1`.
//
// The thinking behind this is that:
//
Expand All @@ -164,10 +174,10 @@ fn static_addr(
// should mean that `A >= B + 1` is an equivalent check for `A > B`
let (cc, lhs, limit_imm) = if limit & 1 == 1 {
let limit = limit as i64 + 1;
(IntCC::UnsignedGreaterThanOrEqual, offset, limit)
(IntCC::UnsignedGreaterThanOrEqual, index, limit)
} else {
let limit = limit as i64;
(IntCC::UnsignedGreaterThan, offset, limit)
(IntCC::UnsignedGreaterThan, index, limit)
};
let oob = pos.ins().icmp_imm(cc, lhs, limit_imm);
pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
Expand All @@ -182,29 +192,30 @@ fn static_addr(
inst,
heap,
addr_ty,
index,
offset,
pos.func,
spectre_oob_comparison,
);
}

fn cast_offset_to_pointer_ty(
offset: ir::Value,
offset_ty: ir::Type,
fn cast_index_to_pointer_ty(
index: ir::Value,
index_ty: ir::Type,
addr_ty: ir::Type,
pos: &mut FuncCursor,
) -> ir::Value {
if offset_ty == addr_ty {
return offset;
if index_ty == addr_ty {
return index;
}
// Note that using 64-bit heaps on a 32-bit host is not currently supported,
// would require at least a bounds check here to ensure that the truncation
// from 64-to-32 bits doesn't lose any upper bits. For now though we're
// mostly interested in the 32-bit-heaps-on-64-bit-hosts cast.
assert!(offset_ty.bits() < addr_ty.bits());
assert!(index_ty.bits() < addr_ty.bits());

// Convert `offset` to `addr_ty`.
let extended_offset = pos.ins().uextend(addr_ty, offset);
// Convert `index` to `addr_ty`.
let extended_index = pos.ins().uextend(addr_ty, index);

// Add debug value-label alias so that debuginfo can name the extended
// value as the address
Expand All @@ -213,9 +224,9 @@ fn cast_offset_to_pointer_ty(
pos.func
.stencil
.dfg
.add_value_label_alias(extended_offset, loc, offset);
.add_value_label_alias(extended_index, loc, index);

extended_offset
extended_index
}

/// Emit code for the base address computation of a `heap_addr` instruction.
Expand All @@ -224,15 +235,16 @@ fn compute_addr(
inst: ir::Inst,
heap: ir::Heap,
addr_ty: ir::Type,
offset: ir::Value,
index: ir::Value,
offset: u32,
func: &mut ir::Function,
// If we are performing Spectre mitigation with conditional selects, the
// values to compare and the condition code that indicates an out-of bounds
// condition; on this condition, the conditional move will choose a
// speculatively safe address (a zero / null pointer) instead.
spectre_oob_comparison: Option<(IntCC, ir::Value, ir::Value)>,
) {
debug_assert_eq!(func.dfg.value_type(offset), addr_ty);
debug_assert_eq!(func.dfg.value_type(index), addr_ty);
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);

Expand All @@ -245,14 +257,28 @@ fn compute_addr(
};

if let Some((cc, a, b)) = spectre_oob_comparison {
let final_addr = pos.ins().iadd(base, offset);
let final_base = pos.ins().iadd(base, index);
// NB: The addition of the offset immediate must happen *before* the
// `select_spectre_guard`. If it happens after, then we potentially are
// letting speculative execution read the whole first 4GiB of memory.
let final_addr = if offset == 0 {
final_base
} else {
pos.ins().iadd_imm(final_base, offset as i64)
};
let zero = pos.ins().iconst(addr_ty, 0);
let cmp = pos.ins().icmp(cc, a, b);
pos.func
.dfg
.replace(inst)
.select_spectre_guard(cmp, zero, final_addr);
} else if offset == 0 {
pos.func.dfg.replace(inst).iadd(base, index);
} else {
pos.func.dfg.replace(inst).iadd(base, offset);
let final_base = pos.ins().iadd(base, index);
pos.func
.dfg
.replace(inst)
.iadd_imm(final_base, offset as i64);
}
}
5 changes: 3 additions & 2 deletions cranelift/codegen/src/legalizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,9 @@ pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa:
opcode: ir::Opcode::HeapAddr,
heap,
arg,
imm,
} => expand_heap_addr(inst, &mut pos.func, cfg, isa, heap, arg, imm),
offset,
size,
} => expand_heap_addr(inst, &mut pos.func, cfg, isa, heap, arg, offset, size),
InstructionData::StackLoad {
opcode: ir::Opcode::StackLoad,
stack_slot,
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ pub fn write_operands(w: &mut dyn Write, dfg: &DataFlowGraph, inst: Inst) -> fmt
dynamic_stack_slot,
..
} => write!(w, " {}, {}", arg, dynamic_stack_slot),
HeapAddr { heap, arg, imm, .. } => write!(w, " {}, {}, {}", heap, arg, imm),
HeapAddr { heap, arg, offset, size, .. } => write!(w, " {}, {}, {}, {}", heap, arg, offset, size),
TableAddr { table, arg, .. } => write!(w, " {}, {}", table, arg),
Load {
flags, arg, offset, ..
Expand Down
4 changes: 2 additions & 2 deletions cranelift/filetests/filetests/alias/extends.clif
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ function %f0(i64 vmctx, i32) -> i32, i32, i32, i64, i64, i64 {
gv0 = vmctx
gv1 = load.i64 notrap readonly aligned gv0+8
heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32

block0(v0: i64, v1: i32):
v2 = heap_addr.i64 heap0, v1, 0
v2 = heap_addr.i64 heap0, v1, 12, 0

;; Initial load. This will not be reused by anything below, even
;; though it does access the same address.
Expand Down
Loading

0 comments on commit 4542a8f

Please sign in to comment.