diff --git a/cranelift/codegen/meta/src/cdsl/formats.rs b/cranelift/codegen/meta/src/cdsl/formats.rs index 876fb7702f3f..c627e0269bd6 100644 --- a/cranelift/codegen/meta/src/cdsl/formats.rs +++ b/cranelift/codegen/meta/src/cdsl/formats.rs @@ -116,6 +116,15 @@ impl InstructionFormatBuilder { self } + pub fn imm_with_name(mut self, name: &'static str, operand_kind: &OperandKind) -> Self { + let field = FormatField { + kind: operand_kind.clone(), + member: name, + }; + self.0.imm_fields.push(field); + self + } + pub fn typevar_operand(mut self, operand_index: usize) -> Self { assert!(self.0.typevar_operand.is_none()); assert!(operand_index < self.0.num_value_operands); diff --git a/cranelift/codegen/meta/src/shared/formats.rs b/cranelift/codegen/meta/src/shared/formats.rs index 057e03bd3910..8c54c1916911 100644 --- a/cranelift/codegen/meta/src/shared/formats.rs +++ b/cranelift/codegen/meta/src/shared/formats.rs @@ -202,7 +202,8 @@ impl Formats { heap_addr: Builder::new("HeapAddr") .imm(&entities.heap) .value() - .imm(&imm.uimm32) + .imm_with_name("offset", &imm.uimm32) + .imm_with_name("size", &imm.uimm8) .build(), // Accessing a WebAssembly table. diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs old mode 100644 new mode 100755 index c1b9f4e9fad4..4f74ac18c9d6 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -1128,26 +1128,30 @@ pub(crate) fn define( ); let H = &Operand::new("H", &entities.heap); - let p = &Operand::new("p", HeapOffset); - let Size = &Operand::new("Size", &imm.uimm32).with_doc("Size in bytes"); + let index = &Operand::new("index", HeapOffset); + let Offset = &Operand::new("Offset", &imm.uimm32).with_doc("Static offset immediate in bytes"); + let Size = &Operand::new("Size", &imm.uimm8).with_doc("Static size immediate in bytes"); ig.push( Inst::new( "heap_addr", r#" - Bounds check and compute absolute address of heap memory. + Bounds check and compute absolute address of ``index + Offset`` in heap memory. - Verify that the offset range ``p .. p + Size - 1`` is in bounds for the - heap H, and generate an absolute address that is safe to dereference. + Verify that the range ``index .. index + Offset + Size`` is in bounds for the + heap ``H``, and generate an absolute address that is safe to dereference. - 1. If ``p + Size`` is not greater than the heap bound, return an - absolute address corresponding to a byte offset of ``p`` from the + 1. If ``index + Offset + Size`` is less than or equal ot the heap bound, return an + absolute address corresponding to a byte offset of ``index + Offset`` from the heap's base address. - 2. If ``p + Size`` is greater than the heap bound, generate a trap. + + 2. If ``index + Offset + Size`` is greater than the heap bound, return the + ``NULL`` pointer or any other address that is guaranteed to generate a trap + when accessed. "#, &formats.heap_addr, ) - .operands_in(vec![H, p, Size]) + .operands_in(vec![H, index, Offset, Size]) .operands_out(vec![addr]), ); diff --git a/cranelift/codegen/src/legalizer/heap.rs b/cranelift/codegen/src/legalizer/heap.rs index 34ef3b34def0..d51a6244eb04 100644 --- a/cranelift/codegen/src/legalizer/heap.rs +++ b/cranelift/codegen/src/legalizer/heap.rs @@ -6,7 +6,7 @@ use crate::cursor::{Cursor, FuncCursor}; use crate::flowgraph::ControlFlowGraph; use crate::ir::condcodes::IntCC; -use crate::ir::immediates::Uimm32; +use crate::ir::immediates::{Uimm32, Uimm8}; use crate::ir::{self, InstBuilder, RelSourceLoc}; use crate::isa::TargetIsa; @@ -17,16 +17,18 @@ pub fn expand_heap_addr( cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa, heap: ir::Heap, - offset: ir::Value, - access_size: Uimm32, + index_operand: ir::Value, + offset_immediate: Uimm32, + access_size: Uimm8, ) { match func.heaps[heap].style { ir::HeapStyle::Dynamic { bound_gv } => dynamic_addr( isa, inst, heap, - offset, - u64::from(access_size), + index_operand, + u32::from(offset_immediate), + u8::from(access_size), bound_gv, func, ), @@ -34,8 +36,9 @@ pub fn expand_heap_addr( isa, inst, heap, - offset, - u64::from(access_size), + index_operand, + u32::from(offset_immediate), + u8::from(access_size), bound.into(), func, cfg, @@ -48,35 +51,40 @@ fn dynamic_addr( isa: &dyn TargetIsa, inst: ir::Inst, heap: ir::Heap, - offset: ir::Value, - access_size: u64, + index: ir::Value, + offset: u32, + access_size: u8, bound_gv: ir::GlobalValue, func: &mut ir::Function, ) { - let offset_ty = func.dfg.value_type(offset); + let index_ty = func.dfg.value_type(index); let addr_ty = func.dfg.value_type(func.dfg.first_result(inst)); let min_size = func.heaps[heap].min_size.into(); let mut pos = FuncCursor::new(func).at_inst(inst); pos.use_srcloc(inst); - let offset = cast_offset_to_pointer_ty(offset, offset_ty, addr_ty, &mut pos); + let index = cast_index_to_pointer_ty(index, index_ty, addr_ty, &mut pos); - // Start with the bounds check. Trap if `offset + access_size > bound`. + // Start with the bounds check. Trap if `index + offset + access_size > bound`. let bound = pos.ins().global_value(addr_ty, bound_gv); - let (cc, lhs, bound) = if access_size == 1 { - // `offset > bound - 1` is the same as `offset >= bound`. - (IntCC::UnsignedGreaterThanOrEqual, offset, bound) - } else if access_size <= min_size { - // We know that bound >= min_size, so here we can compare `offset > bound - access_size` - // without wrapping. - let adj_bound = pos.ins().iadd_imm(bound, -(access_size as i64)); - (IntCC::UnsignedGreaterThan, offset, adj_bound) + let (cc, lhs, bound) = if offset == 0 && access_size == 1 { + // `index > bound - 1` is the same as `index >= bound`. + (IntCC::UnsignedGreaterThanOrEqual, index, bound) + } else if offset_plus_size(offset, access_size) <= min_size { + // We know that `bound >= min_size`, so here we can compare `offset > + // bound - (offset + access_size)` without wrapping. + let adj_bound = pos + .ins() + .iadd_imm(bound, -(offset_plus_size(offset, access_size) as i64)); + (IntCC::UnsignedGreaterThan, index, adj_bound) } else { // We need an overflow check for the adjusted offset. - let access_size_val = pos.ins().iconst(addr_ty, access_size as i64); + let access_size_val = pos + .ins() + .iconst(addr_ty, offset_plus_size(offset, access_size) as i64); let adj_offset = pos.ins() - .uadd_overflow_trap(offset, access_size_val, ir::TrapCode::HeapOutOfBounds); + .uadd_overflow_trap(index, access_size_val, ir::TrapCode::HeapOutOfBounds); (IntCC::UnsignedGreaterThan, adj_offset, bound) }; let oob = pos.ins().icmp(cc, lhs, bound); @@ -93,6 +101,7 @@ fn dynamic_addr( inst, heap, addr_ty, + index, offset, pos.func, spectre_oob_comparison, @@ -104,26 +113,27 @@ fn static_addr( isa: &dyn TargetIsa, inst: ir::Inst, heap: ir::Heap, - mut offset: ir::Value, - access_size: u64, + index: ir::Value, + offset: u32, + access_size: u8, bound: u64, func: &mut ir::Function, cfg: &mut ControlFlowGraph, ) { - let offset_ty = func.dfg.value_type(offset); + let index_ty = func.dfg.value_type(index); let addr_ty = func.dfg.value_type(func.dfg.first_result(inst)); let mut pos = FuncCursor::new(func).at_inst(inst); pos.use_srcloc(inst); - // The goal here is to trap if `offset + access_size > bound`. + // The goal here is to trap if `index + offset + access_size > bound`. // - // This first case is a trivial case where we can easily trap. - if access_size > bound { + // This first case is a trivial case where we can statically trap. + if offset_plus_size(offset, access_size) > bound { // This will simply always trap since `offset >= 0`. pos.ins().trap(ir::TrapCode::HeapOutOfBounds); pos.func.dfg.replace(inst).iconst(addr_ty, 0); - // Split Block, as the trap is a terminator instruction. + // Split the block, as the trap is a terminator instruction. let curr_block = pos.current_block().expect("Cursor is not in a block"); let new_block = pos.func.dfg.make_block(); pos.insert_block(new_block); @@ -132,29 +142,29 @@ fn static_addr( return; } - // After the trivial case is done we're now mostly interested in trapping - // if `offset > bound - access_size`. We know `bound - access_size` here is - // non-negative from the above comparison. + // After the trivial case is done we're now mostly interested in trapping if + // `index > bound - offset - access_size`. We know `bound - offset - + // access_size` here is non-negative from the above comparison. // - // If we can know `bound - access_size >= 4GB` then with a 32-bit offset - // we're guaranteed: + // If we can know `bound - offset - access_size >= 4GB` then with a 32-bit + // offset we're guaranteed: // - // bound - access_size >= 4GB > offset + // bound - offset - access_size >= 4GB > index // - // or, in other words, `offset < bound - access_size`, meaning we can't trap - // for any value of `offset`. + // or, in other words, `index < bound - offset - access_size`, meaning we + // can't trap for any value of `index`. // // With that we have an optimization here where with 32-bit offsets and // `bound - access_size >= 4GB` we can omit a bounds check. - let limit = bound - access_size; + let limit = bound - offset as u64 - access_size as u64; let mut spectre_oob_comparison = None; - offset = cast_offset_to_pointer_ty(offset, offset_ty, addr_ty, &mut pos); - if offset_ty != ir::types::I32 || limit < 0xffff_ffff { - // Here we want to test the condition `offset > limit` and if that's + let index = cast_index_to_pointer_ty(index, index_ty, addr_ty, &mut pos); + if index_ty != ir::types::I32 || limit < 0xffff_ffff { + // Here we want to test the condition `index > limit` and if that's // true then this is an out-of-bounds access and needs to trap. For ARM // and other RISC architectures it's easier to test against an immediate // that's even instead of odd, so if `limit` is odd then we instead test - // for `offset >= limit + 1`. + // for `index >= limit + 1`. // // The thinking behind this is that: // @@ -164,10 +174,10 @@ fn static_addr( // should mean that `A >= B + 1` is an equivalent check for `A > B` let (cc, lhs, limit_imm) = if limit & 1 == 1 { let limit = limit as i64 + 1; - (IntCC::UnsignedGreaterThanOrEqual, offset, limit) + (IntCC::UnsignedGreaterThanOrEqual, index, limit) } else { let limit = limit as i64; - (IntCC::UnsignedGreaterThan, offset, limit) + (IntCC::UnsignedGreaterThan, index, limit) }; let oob = pos.ins().icmp_imm(cc, lhs, limit_imm); pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds); @@ -182,29 +192,30 @@ fn static_addr( inst, heap, addr_ty, + index, offset, pos.func, spectre_oob_comparison, ); } -fn cast_offset_to_pointer_ty( - offset: ir::Value, - offset_ty: ir::Type, +fn cast_index_to_pointer_ty( + index: ir::Value, + index_ty: ir::Type, addr_ty: ir::Type, pos: &mut FuncCursor, ) -> ir::Value { - if offset_ty == addr_ty { - return offset; + if index_ty == addr_ty { + return index; } // Note that using 64-bit heaps on a 32-bit host is not currently supported, // would require at least a bounds check here to ensure that the truncation // from 64-to-32 bits doesn't lose any upper bits. For now though we're // mostly interested in the 32-bit-heaps-on-64-bit-hosts cast. - assert!(offset_ty.bits() < addr_ty.bits()); + assert!(index_ty.bits() < addr_ty.bits()); - // Convert `offset` to `addr_ty`. - let extended_offset = pos.ins().uextend(addr_ty, offset); + // Convert `index` to `addr_ty`. + let extended_index = pos.ins().uextend(addr_ty, index); // Add debug value-label alias so that debuginfo can name the extended // value as the address @@ -213,9 +224,9 @@ fn cast_offset_to_pointer_ty( pos.func .stencil .dfg - .add_value_label_alias(extended_offset, loc, offset); + .add_value_label_alias(extended_index, loc, index); - extended_offset + extended_index } /// Emit code for the base address computation of a `heap_addr` instruction. @@ -224,7 +235,8 @@ fn compute_addr( inst: ir::Inst, heap: ir::Heap, addr_ty: ir::Type, - offset: ir::Value, + index: ir::Value, + offset: u32, func: &mut ir::Function, // If we are performing Spectre mitigation with conditional selects, the // values to compare and the condition code that indicates an out-of bounds @@ -232,7 +244,7 @@ fn compute_addr( // speculatively safe address (a zero / null pointer) instead. spectre_oob_comparison: Option<(IntCC, ir::Value, ir::Value)>, ) { - debug_assert_eq!(func.dfg.value_type(offset), addr_ty); + debug_assert_eq!(func.dfg.value_type(index), addr_ty); let mut pos = FuncCursor::new(func).at_inst(inst); pos.use_srcloc(inst); @@ -245,14 +257,33 @@ fn compute_addr( }; if let Some((cc, a, b)) = spectre_oob_comparison { - let final_addr = pos.ins().iadd(base, offset); + let final_base = pos.ins().iadd(base, index); + // NB: The addition of the offset immediate must happen *before* the + // `select_spectre_guard`. If it happens after, then we potentially are + // letting speculative execution read the whole first 4GiB of memory. + let final_addr = if offset == 0 { + final_base + } else { + pos.ins().iadd_imm(final_base, offset as i64) + }; let zero = pos.ins().iconst(addr_ty, 0); let cmp = pos.ins().icmp(cc, a, b); pos.func .dfg .replace(inst) .select_spectre_guard(cmp, zero, final_addr); + } else if offset == 0 { + pos.func.dfg.replace(inst).iadd(base, index); } else { - pos.func.dfg.replace(inst).iadd(base, offset); + let final_base = pos.ins().iadd(base, index); + pos.func + .dfg + .replace(inst) + .iadd_imm(final_base, offset as i64); } } + +fn offset_plus_size(offset: u32, size: u8) -> u64 { + // Cannot overflow because we are widening to `u64`. + offset as u64 + size as u64 +} diff --git a/cranelift/codegen/src/legalizer/mod.rs b/cranelift/codegen/src/legalizer/mod.rs index acb0c437a751..96eccb2079c9 100644 --- a/cranelift/codegen/src/legalizer/mod.rs +++ b/cranelift/codegen/src/legalizer/mod.rs @@ -72,8 +72,9 @@ pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: opcode: ir::Opcode::HeapAddr, heap, arg, - imm, - } => expand_heap_addr(inst, &mut pos.func, cfg, isa, heap, arg, imm), + offset, + size, + } => expand_heap_addr(inst, &mut pos.func, cfg, isa, heap, arg, offset, size), InstructionData::StackLoad { opcode: ir::Opcode::StackLoad, stack_slot, diff --git a/cranelift/codegen/src/write.rs b/cranelift/codegen/src/write.rs index 8e4ae92437af..725ca46c4945 100644 --- a/cranelift/codegen/src/write.rs +++ b/cranelift/codegen/src/write.rs @@ -476,7 +476,13 @@ pub fn write_operands(w: &mut dyn Write, dfg: &DataFlowGraph, inst: Inst) -> fmt dynamic_stack_slot, .. } => write!(w, " {}, {}", arg, dynamic_stack_slot), - HeapAddr { heap, arg, imm, .. } => write!(w, " {}, {}, {}", heap, arg, imm), + HeapAddr { + heap, + arg, + offset, + size, + .. + } => write!(w, " {}, {}, {}, {}", heap, arg, offset, size), TableAddr { table, arg, .. } => write!(w, " {}, {}", table, arg), Load { flags, arg, offset, .. diff --git a/cranelift/filetests/filetests/alias/extends.clif b/cranelift/filetests/filetests/alias/extends.clif index d6bbf7d4a837..30d5cc03553f 100644 --- a/cranelift/filetests/filetests/alias/extends.clif +++ b/cranelift/filetests/filetests/alias/extends.clif @@ -9,9 +9,9 @@ function %f0(i64 vmctx, i32) -> i32, i32, i32, i64, i64, i64 { gv0 = vmctx gv1 = load.i64 notrap readonly aligned gv0+8 heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 - + block0(v0: i64, v1: i32): - v2 = heap_addr.i64 heap0, v1, 0 + v2 = heap_addr.i64 heap0, v1, 12, 0 ;; Initial load. This will not be reused by anything below, even ;; though it does access the same address. diff --git a/cranelift/filetests/filetests/alias/fence.clif b/cranelift/filetests/filetests/alias/fence.clif index 3202dbfcd750..c5b55ccc63b1 100644 --- a/cranelift/filetests/filetests/alias/fence.clif +++ b/cranelift/filetests/filetests/alias/fence.clif @@ -9,9 +9,9 @@ function %f0(i64 vmctx, i32) -> i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 gv0 = vmctx gv1 = load.i64 notrap readonly aligned gv0+8 heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 - + block0(v0: i64, v1: i32): - v2 = heap_addr.i64 heap0, v1, 0 + v2 = heap_addr.i64 heap0, v1, 12, 0 v3 = load.i32 v2+8 v4 = load.i32 vmctx v0+16 @@ -39,7 +39,7 @@ block0(v0: i64, v1: i32): v11 = atomic_load.i32 v0 v12 = load.i32 vmctx v0+16 - ; check: v12 = load.i32 vmctx v0+16 + ; check: v12 = load.i32 vmctx v0+16 return v3, v4, v5, v6, v7, v8, v9, v10, v11, v12 } diff --git a/cranelift/filetests/filetests/alias/multiple-blocks.clif b/cranelift/filetests/filetests/alias/multiple-blocks.clif index 3812c8911fbb..4ce7488b0a31 100644 --- a/cranelift/filetests/filetests/alias/multiple-blocks.clif +++ b/cranelift/filetests/filetests/alias/multiple-blocks.clif @@ -11,7 +11,7 @@ function %f0(i64 vmctx, i32) -> i32 { block0(v0: i64, v1: i32): - v2 = heap_addr.i64 heap0, v1, 0 + v2 = heap_addr.i64 heap0, v1, 12, 0 v3 = load.i32 v2+8 brz v2, block1 jump block2 diff --git a/cranelift/filetests/filetests/alias/partial-redundancy.clif b/cranelift/filetests/filetests/alias/partial-redundancy.clif index e869d262f1b5..3c2926ed617b 100644 --- a/cranelift/filetests/filetests/alias/partial-redundancy.clif +++ b/cranelift/filetests/filetests/alias/partial-redundancy.clif @@ -16,17 +16,17 @@ block0(v0: i64, v1: i32): jump block2 block1: - v2 = heap_addr.i64 heap0, v1, 0 + v2 = heap_addr.i64 heap0, v1, 68, 0 v3 = load.i32 v2+64 jump block3(v3) block2: - v4 = heap_addr.i64 heap0, v1, 0 + v4 = heap_addr.i64 heap0, v1, 132, 0 v5 = load.i32 v4+128 jump block3(v5) block3(v6: i32): - v7 = heap_addr.i64 heap0, v1, 0 + v7 = heap_addr.i64 heap0, v1, 68, 0 v8 = load.i32 v7+64 ;; load should survive: ; check: v8 = load.i32 v7+64 diff --git a/cranelift/filetests/filetests/alias/simple-alias.clif b/cranelift/filetests/filetests/alias/simple-alias.clif index 9b559bc3e571..f1109c8379e8 100644 --- a/cranelift/filetests/filetests/alias/simple-alias.clif +++ b/cranelift/filetests/filetests/alias/simple-alias.clif @@ -13,13 +13,13 @@ function %f0(i64 vmctx, i32) -> i32, i32, i32, i32 { fn0 = %g(i64 vmctx) block0(v0: i64, v1: i32): - v2 = heap_addr.i64 heap0, v1, 0 + v2 = heap_addr.i64 heap0, v1, 12, 0 v3 = load.i32 v2+8 ;; This should reuse the load above. - v4 = heap_addr.i64 heap0, v1, 0 + v4 = heap_addr.i64 heap0, v1, 12, 0 v5 = load.i32 v4+8 ; check: v5 -> v3 - + call fn0(v0) ;; The second load is redundant wrt the first, but the call above @@ -27,7 +27,7 @@ block0(v0: i64, v1: i32): v6 = load.i32 v4+8 v7 = load.i32 v4+8 ; check: v7 -> v6 - + return v3, v5, v6, v7 } @@ -42,13 +42,13 @@ function %f1(i64 vmctx, i32) -> i32 { fn0 = %g(i64 vmctx) block0(v0: i64, v1: i32): - v2 = heap_addr.i64 heap0, v1, 0 + v2 = heap_addr.i64 heap0, v1, 12, 0 store.i32 v1, v2+8 ;; This load should pick up the store above. - v3 = heap_addr.i64 heap0, v1, 0 + v3 = heap_addr.i64 heap0, v1, 12, 0 v4 = load.i32 v3+8 ; check: v4 -> v1 - + return v4 } diff --git a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif index f6b69d7c0c92..2f893bcd7122 100644 --- a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif +++ b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif @@ -9,7 +9,7 @@ function %dynamic_heap_check(i64 vmctx, i32) -> i64 { heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i32 block0(v0: i64, v1: i32): - v2 = heap_addr.i64 heap0, v1, 0 + v2 = heap_addr.i64 heap0, v1, 0, 0 return v2 } @@ -34,7 +34,7 @@ function %static_heap_check(i64 vmctx, i32) -> i64 { heap0 = static gv0, bound 0x1_0000, offset_guard 0x1000, index_type i32 block0(v0: i64, v1: i32): - v2 = heap_addr.i64 heap0, v1, 0 + v2 = heap_addr.i64 heap0, v1, 0, 0 return v2 } @@ -52,3 +52,59 @@ block0(v0: i64, v1: i32): ; block2: ; udf #0xc11f + +function %dynamic_heap_check_with_offset(i64 vmctx, i32) -> i64 { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i32 + +block0(v0: i64, v1: i32): + v2 = heap_addr.i64 heap0, v1, 16, 8 + return v2 +} + +; block0: +; mov w11, w1 +; ldr x10, [x0] +; movz x9, #24 +; adds x11, x11, x9 +; b.lo 8 ; udf +; subs xzr, x11, x10 +; b.ls label1 ; b label2 +; block1: +; add x13, x0, x1, UXTW +; add x13, x13, #16 +; movz x12, #0 +; subs xzr, x11, x10 +; csel x0, x12, x13, hi +; csdb +; ret +; block2: +; udf #0xc11f + +function %static_heap_check_with_offset(i64 vmctx, i32) -> i64 { + gv0 = vmctx + heap0 = static gv0, bound 0x1_0000, offset_guard 0x1000, index_type i32 + +block0(v0: i64, v1: i32): + v2 = heap_addr.i64 heap0, v1, 16, 8 + return v2 +} + +; block0: +; mov w9, w1 +; movz x10, #65512 +; subs xzr, x9, x10 +; b.ls label1 ; b label2 +; block1: +; add x11, x0, x1, UXTW +; add x11, x11, #16 +; movz x10, #65512 +; movz x12, #0 +; subs xzr, x9, x10 +; csel x0, x12, x11, hi +; csdb +; ret +; block2: +; udf #0xc11f + diff --git a/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif b/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif index 140add4a6d8b..d90da8f22c45 100644 --- a/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif +++ b/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif @@ -8,7 +8,7 @@ function %dynamic_heap_check(i64 vmctx, i32) -> i64 { heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i32 block0(v0: i64, v1: i32): - v2 = heap_addr.i64 heap0, v1, 0 + v2 = heap_addr.i64 heap0, v1, 0, 0 return v2 } @@ -32,7 +32,7 @@ function %static_heap_check(i64 vmctx, i32) -> i64 { heap0 = static gv0, bound 0x1_0000, offset_guard 0x1000, index_type i32 block0(v0: i64, v1: i32): - v2 = heap_addr.i64 heap0, v1, 0 + v2 = heap_addr.i64 heap0, v1, 0, 0 return v2 } @@ -51,3 +51,59 @@ block0(v0: i64, v1: i32): ; block2: ; udf##trap_code=heap_oob +function %dynamic_heap_check_with_offset(i64 vmctx, i32) -> i64 { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i32 + +block0(v0: i64, v1: i32): + v2 = heap_addr.i64 heap0, v1, 16, 8 + return v2 +} + +; block0: +; uext.w t1,a1 +; ld t0,0(a0) +; li t3,24 +; add t2,t1,t3 +; ult a1,t2,t1##ty=i64 +; trap_if a1,heap_oob +; ule a1,t2,t0##ty=i64 +; bne a1,zero,taken(label1),not_taken(label2) +; block1: +; add a0,a0,t1 +; addi a0,a0,16 +; ugt t1,t2,t0##ty=i64 +; li a1,0 +; selectif_spectre_guard a0,a1,a0##test=t1 +; ret +; block2: +; udf##trap_code=heap_oob + +function %static_heap_check_with_offset(i64 vmctx, i32) -> i64 { + gv0 = vmctx + heap0 = static gv0, bound 0x1_0000, offset_guard 0x1000, index_type i32 + +block0(v0: i64, v1: i32): + v2 = heap_addr.i64 heap0, v1, 16, 8 + return v2 +} + +; block0: +; uext.w t3,a1 +; lui a7,16 +; addi a7,a7,4072 +; ule t0,t3,a7##ty=i64 +; bne t0,zero,taken(label1),not_taken(label2) +; block1: +; add t0,a0,t3 +; addi t0,t0,16 +; lui t4,16 +; addi t4,t4,4072 +; ugt t1,t3,t4##ty=i64 +; li a0,0 +; selectif_spectre_guard a0,a0,t0##test=t1 +; ret +; block2: +; udf##trap_code=heap_oob + diff --git a/cranelift/filetests/filetests/isa/s390x/heap_addr.clif b/cranelift/filetests/filetests/isa/s390x/heap_addr.clif index acde9132509c..4dc22f499f3e 100644 --- a/cranelift/filetests/filetests/isa/s390x/heap_addr.clif +++ b/cranelift/filetests/filetests/isa/s390x/heap_addr.clif @@ -7,7 +7,7 @@ function %dynamic_heap_check(i64 vmctx, i32) -> i64 { heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i32 block0(v0: i64, v1: i32): - v2 = heap_addr.i64 heap0, v1, 0 + v2 = heap_addr.i64 heap0, v1, 0, 0 return v2 } @@ -32,7 +32,7 @@ function %static_heap_check(i64 vmctx, i32) -> i64 { heap0 = static gv0, bound 0x1_0000, offset_guard 0x1000, index_type i32 block0(v0: i64, v1: i32): - v2 = heap_addr.i64 heap0, v1, 0 + v2 = heap_addr.i64 heap0, v1, 0, 0 return v2 } @@ -49,3 +49,56 @@ block0(v0: i64, v1: i32): ; block2: ; trap +function %dynamic_heap_check_with_offset(i64 vmctx, i32) -> i64 { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i32 + +block0(v0: i64, v1: i32): + v2 = heap_addr.i64 heap0, v1, 16, 8 + return v2 +} + +; stmg %r7, %r15, 56(%r15) +; block0: +; llgfr %r7, %r3 +; lg %r4, 0(%r2) +; lghi %r5, 24 +; algfr %r5, %r3 +; jle 6 ; trap +; clgr %r5, %r4 +; jgnh label1 ; jg label2 +; block1: +; agrk %r3, %r2, %r7 +; aghik %r2, %r3, 16 +; lghi %r3, 0 +; clgr %r5, %r4 +; locgrh %r2, %r3 +; lmg %r7, %r15, 56(%r15) +; br %r14 +; block2: +; trap + +function %static_heap_check_with_offset(i64 vmctx, i32) -> i64 { + gv0 = vmctx + heap0 = static gv0, bound 0x1_0000, offset_guard 0x1000, index_type i32 + +block0(v0: i64, v1: i32): + v2 = heap_addr.i64 heap0, v1, 16, 8 + return v2 +} + +; block0: +; llgfr %r5, %r3 +; clgfi %r5, 65512 +; jgnh label1 ; jg label2 +; block1: +; agrk %r3, %r2, %r5 +; aghik %r2, %r3, 16 +; lghi %r3, 0 +; clgfi %r5, 65512 +; locgrh %r2, %r3 +; br %r14 +; block2: +; trap + diff --git a/cranelift/filetests/filetests/isa/x64/heap-no-spectre.clif b/cranelift/filetests/filetests/isa/x64/heap-no-spectre.clif index 652742df8be7..ca1595001553 100644 --- a/cranelift/filetests/filetests/isa/x64/heap-no-spectre.clif +++ b/cranelift/filetests/filetests/isa/x64/heap-no-spectre.clif @@ -12,7 +12,7 @@ function %f(i32, i64 vmctx) -> i64 { heap0 = dynamic gv1, bound gv2, offset_guard 0x1000, index_type i32 block0(v0: i32, v1: i64): - v2 = heap_addr.i64 heap0, v0, 0x8000 + v2 = heap_addr.i64 heap0, v0, 0x8000, 0 return v2 } @@ -20,14 +20,15 @@ block0(v0: i32, v1: i64): ; movq %rsp, %rbp ; block0: ; movl %edi, %eax -; movq 8(%rsi), %r9 -; movq %rax, %r10 -; addq %r10, $32768, %r10 +; movq 8(%rsi), %r10 +; movq %rax, %r11 +; addq %r11, $32768, %r11 ; jnb ; ud2 heap_oob ; -; cmpq %r9, %r10 +; cmpq %r10, %r11 ; jbe label1; j label2 ; block1: ; addq %rax, 0(%rsi), %rax +; addq %rax, $32768, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -43,7 +44,7 @@ function %f(i64 vmctx, i32) -> i64 system_v { heap0 = static gv1, bound 0x1000, offset_guard 0x1000, index_type i32 block0(v0: i64, v1: i32): - v10 = heap_addr.i64 heap0, v1, 0 + v10 = heap_addr.i64 heap0, v1, 0, 0 return v10 } @@ -70,7 +71,7 @@ function %f(i64 vmctx, i32) -> i64 system_v { heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 block0(v0: i64, v1: i32): - v10 = heap_addr.i64 heap0, v1, 0 + v10 = heap_addr.i64 heap0, v1, 0, 0 return v10 } diff --git a/cranelift/filetests/filetests/isa/x64/heap.clif b/cranelift/filetests/filetests/isa/x64/heap.clif index b1eaaf15ff75..87444682ac46 100644 --- a/cranelift/filetests/filetests/isa/x64/heap.clif +++ b/cranelift/filetests/filetests/isa/x64/heap.clif @@ -25,7 +25,7 @@ function %f(i32, i64 vmctx) -> i64 { heap0 = dynamic gv1, bound gv2, offset_guard 0x1000, index_type i32 block0(v0: i32, v1: i64): - v2 = heap_addr.i64 heap0, v0, 0x8000 + v2 = heap_addr.i64 heap0, v0, 0x8000, 0 return v2 } @@ -33,16 +33,17 @@ block0(v0: i32, v1: i64): ; movq %rsp, %rbp ; block0: ; movl %edi, %eax -; movq 8(%rsi), %r11 +; movq 8(%rsi), %rdx ; movq %rax, %rdi ; addq %rdi, $32768, %rdi ; jnb ; ud2 heap_oob ; -; cmpq %r11, %rdi +; cmpq %rdx, %rdi ; jbe label1; j label2 ; block1: ; addq %rax, 0(%rsi), %rax +; addq %rax, $32768, %rax ; xorq %rcx, %rcx, %rcx -; cmpq %r11, %rdi +; cmpq %rdx, %rdi ; cmovnbeq %rcx, %rax, %rax ; movq %rbp, %rsp ; popq %rbp @@ -60,7 +61,7 @@ function %f(i64 vmctx, i32) -> i64 system_v { heap0 = static gv1, bound 0x1000, offset_guard 0x1000, index_type i32 block0(v0: i64, v1: i32): - v10 = heap_addr.i64 heap0, v1, 0 + v10 = heap_addr.i64 heap0, v1, 0, 0 return v10 } @@ -91,7 +92,7 @@ function %f(i64 vmctx, i32) -> i64 system_v { heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 block0(v0: i64, v1: i32): - v10 = heap_addr.i64 heap0, v1, 0 + v10 = heap_addr.i64 heap0, v1, 0, 0 return v10 } @@ -104,3 +105,66 @@ block0(v0: i64, v1: i32): ; popq %rbp ; ret +function %dynamic_heap_check_with_offset(i64 vmctx, i32) -> i64 { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i32 + +block0(v0: i64, v1: i32): + v2 = heap_addr.i64 heap0, v1, 16, 8 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; movl %esi, %edi +; movq %rax, %rcx +; movq 0(%rcx), %rsi +; movq %rdi, %rdx +; addq %rdx, $24, %rdx +; jnb ; ud2 heap_oob ; +; cmpq %rsi, %rdx +; jbe label1; j label2 +; block1: +; movq %rcx, %rax +; addq %rax, %rdi, %rax +; addq %rax, $16, %rax +; xorq %rcx, %rcx, %rcx +; cmpq %rsi, %rdx +; cmovnbeq %rcx, %rax, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; block2: +; ud2 heap_oob + +function %static_heap_check_with_offset(i64 vmctx, i32) -> i64 { + gv0 = vmctx + heap0 = static gv0, bound 0x1_0000, offset_guard 0x1000, index_type i32 + +block0(v0: i64, v1: i32): + v2 = heap_addr.i64 heap0, v1, 16, 8 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl %esi, %r10d +; cmpq $65512, %r10 +; jbe label1; j label2 +; block1: +; movq %rdi, %rax +; addq %rax, %r10, %rax +; addq %rax, $16, %rax +; xorq %r11, %r11, %r11 +; cmpq $65512, %r10 +; cmovnbeq %r11, %rax, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; block2: +; ud2 heap_oob + diff --git a/cranelift/filetests/filetests/licm/load_readonly_notrap.clif b/cranelift/filetests/filetests/licm/load_readonly_notrap.clif index 011b5833d5e3..27b72cfda922 100644 --- a/cranelift/filetests/filetests/licm/load_readonly_notrap.clif +++ b/cranelift/filetests/filetests/licm/load_readonly_notrap.clif @@ -16,7 +16,7 @@ block0(v0: i32, v1: i64): block1(v2: i32, v3: i64): v4 = iconst.i32 1 - v5 = heap_addr.i64 heap0, v4, 1 + v5 = heap_addr.i64 heap0, v4, 0, 4 v6 = load.i32 notrap aligned readonly v5 v7 = iadd v2, v6 brz v2, block3(v2) @@ -37,7 +37,7 @@ block3(v9: i32): ; nextln: ; nextln: block0(v0: i32, v1: i64): ; nextln: v4 = iconst.i32 1 -; nextln: v5 = heap_addr.i64 heap0, v4, 1 +; nextln: v5 = heap_addr.i64 heap0, v4, 0, 4 ; nextln: v6 = load.i32 notrap aligned readonly v5 ; nextln: jump block1(v0, v1) ; nextln: diff --git a/cranelift/filetests/filetests/licm/reject_load_notrap.clif b/cranelift/filetests/filetests/licm/reject_load_notrap.clif index 6236d0d1efab..3ca5b1c5df8d 100644 --- a/cranelift/filetests/filetests/licm/reject_load_notrap.clif +++ b/cranelift/filetests/filetests/licm/reject_load_notrap.clif @@ -14,7 +14,7 @@ function %hoist_load(i32, i64 vmctx) -> i32 { block0(v0: i32, v1: i64): v4 = iconst.i32 1 - v5 = heap_addr.i64 heap0, v4, 1 + v5 = heap_addr.i64 heap0, v4, 0, 4 jump block1(v0, v1) block1(v2: i32, v3: i64): @@ -32,25 +32,25 @@ block3(v9: i32): } ; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast { -; nextln: gv0 = vmctx -; nextln: gv1 = load.i64 notrap aligned readonly gv0 -; nextln: heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 +; nextln: gv0 = vmctx +; nextln: gv1 = load.i64 notrap aligned readonly gv0 +; nextln: heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 ; nextln: ; nextln: block0(v0: i32, v1: i64): -; nextln: v4 = iconst.i32 1 -; nextln: v5 = heap_addr.i64 heap0, v4, 1 -; nextln: jump block1(v0, v1) +; nextln: v4 = iconst.i32 1 +; nextln: v5 = heap_addr.i64 heap0, v4, 0, 4 ; v4 = 1 +; nextln: jump block1(v0, v1) ; nextln: ; nextln: block1(v2: i32, v3: i64): -; nextln: v6 = load.i32 notrap aligned v5 -; nextln: v7 = iadd v2, v6 -; nextln: brz v2, block3(v2) -; nextln: jump block2 +; nextln: v6 = load.i32 notrap aligned v5 +; nextln: v7 = iadd v2, v6 +; nextln: brz v2, block3(v2) +; nextln: jump block2 ; nextln: ; nextln: block2: -; nextln: v8 = isub.i32 v2, v4 -; nextln: jump block1(v8, v3) +; nextln: v8 = isub.i32 v2, v4 ; v4 = 1 +; nextln: jump block1(v8, v3) ; nextln: ; nextln: block3(v9: i32): -; nextln: return v9 +; nextln: return v9 ; nextln: } diff --git a/cranelift/filetests/filetests/licm/reject_load_readonly.clif b/cranelift/filetests/filetests/licm/reject_load_readonly.clif index c94ace259124..a180847cd779 100644 --- a/cranelift/filetests/filetests/licm/reject_load_readonly.clif +++ b/cranelift/filetests/filetests/licm/reject_load_readonly.clif @@ -17,7 +17,7 @@ block0(v0: i32, v1: i64): block1(v2: i32, v3: i64): v4 = iconst.i32 1 - v5 = heap_addr.i64 heap0, v4, 1 + v5 = heap_addr.i64 heap0, v4, 0, 4 v6 = load.i32 aligned readonly v5 v7 = iadd v2, v6 brz v2, block3(v2) @@ -38,7 +38,7 @@ block3(v9: i32): ; nextln: ; nextln: block0(v0: i32, v1: i64): ; nextln: v4 = iconst.i32 1 -; nextln: v5 = heap_addr.i64 heap0, v4, 1 +; nextln: v5 = heap_addr.i64 heap0, v4, 0, 4 ; nextln: jump block1(v0, v1) ; nextln: ; nextln: block1(v2: i32, v3: i64): diff --git a/cranelift/filetests/filetests/parser/memory.clif b/cranelift/filetests/filetests/parser/memory.clif index abe059c0fb7c..31f8589bea8f 100644 --- a/cranelift/filetests/filetests/parser/memory.clif +++ b/cranelift/filetests/filetests/parser/memory.clif @@ -60,8 +60,8 @@ function %sheap(i32, i64 vmctx) -> i64 { ; check: heap1 = static gv5, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 ; check: heap2 = static gv5, min 0, bound 0x0001_0000, offset_guard 4096 block0(v1: i32, v2: i64): - v3 = heap_addr.i64 heap1, v1, 0 - ; check: v3 = heap_addr.i64 heap1, v1, 0 + v3 = heap_addr.i64 heap1, v1, 0, 0 + ; check: v3 = heap_addr.i64 heap1, v1, 0, 0 return v3 } @@ -76,7 +76,7 @@ function %dheap(i32, i64 vmctx) -> i64 { ; check: heap1 = dynamic gv5, min 0x0001_0000, bound gv6, offset_guard 0x8000_0000 ; check: heap2 = dynamic gv5, min 0, bound gv6, offset_guard 4096 block0(v1: i32, v2: i64): - v3 = heap_addr.i64 heap2, v1, 0 - ; check: v3 = heap_addr.i64 heap2, v1, 0 + v3 = heap_addr.i64 heap2, v1, 0, 0 + ; check: v3 = heap_addr.i64 heap2, v1, 0, 0 return v3 } diff --git a/cranelift/filetests/filetests/runtests/conversions-load-store.clif b/cranelift/filetests/filetests/runtests/conversions-load-store.clif index c30aa19b0df5..78abe5ba6748 100644 --- a/cranelift/filetests/filetests/runtests/conversions-load-store.clif +++ b/cranelift/filetests/filetests/runtests/conversions-load-store.clif @@ -11,7 +11,7 @@ function %fpromote_f32_f64(i64 vmctx, i64, f32) -> f64 { heap0 = static gv1, min 0x10, bound 0x10, offset_guard 0x0, index_type i64 block0(v0: i64, v1: i64, v2: f32): - v3 = heap_addr.i64 heap0, v1, 4 + v3 = heap_addr.i64 heap0, v1, 0, 4 store.f32 v2, v3 v4 = load.f32 v3 v5 = fpromote.f64 v4 @@ -31,7 +31,7 @@ function %fdemote_test(i64 vmctx, i64, f64) -> f32 { heap0 = static gv1, min 0x10, bound 0x10, offset_guard 0x0, index_type i64 block0(v0: i64, v1: i64, v2: f64): - v3 = heap_addr.i64 heap0, v1, 8 + v3 = heap_addr.i64 heap0, v1, 0, 8 store.f64 v2, v3 v4 = load.f64 v3 v5 = fdemote.f32 v4 @@ -51,7 +51,7 @@ function %fvdemote_test(i64 vmctx, i64, f64x2) -> f32x4 { heap0 = static gv1, min 0x20, bound 0x20, offset_guard 0, index_type i64 block0(v0: i64, v1: i64, v2: f64x2): - v3 = heap_addr.i64 heap0, v1, 16 + v3 = heap_addr.i64 heap0, v1, 0, 16 store.f64x2 v2, v3 v4 = load.f64x2 v3 v5 = fvdemote v4 @@ -72,7 +72,7 @@ function %fvpromote_low_test(i64 vmctx, i64, f32x4) -> f64x2 { heap0 = static gv1, min 0x20, bound 0x20, offset_guard 0, index_type i64 block0(v0: i64, v1: i64, v2: f32x4): - v3 = heap_addr.i64 heap0, v1, 16 + v3 = heap_addr.i64 heap0, v1, 0, 16 store.f32x4 v2, v3 v4 = load.f32x4 v3 v5 = fvpromote_low v4 diff --git a/cranelift/filetests/filetests/runtests/global_value.clif b/cranelift/filetests/filetests/runtests/global_value.clif index e9514b1d6bd2..e8caf14805f6 100644 --- a/cranelift/filetests/filetests/runtests/global_value.clif +++ b/cranelift/filetests/filetests/runtests/global_value.clif @@ -12,7 +12,7 @@ function %store_load(i64 vmctx, i64, i32) -> i32 { heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0, index_type i64 block0(v0: i64, v1: i64, v2: i32): - v3 = heap_addr.i64 heap0, v1, 0 + v3 = heap_addr.i64 heap0, v1, 0, 0 store.i32 v2, v3 v4 = global_value.i64 gv1 diff --git a/cranelift/filetests/filetests/runtests/heap.clif b/cranelift/filetests/filetests/runtests/heap.clif index e956dcff3868..3e7bd41649ac 100644 --- a/cranelift/filetests/filetests/runtests/heap.clif +++ b/cranelift/filetests/filetests/runtests/heap.clif @@ -11,7 +11,7 @@ function %static_heap_i64(i64 vmctx, i64, i32) -> i32 { heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0, index_type i64 block0(v0: i64, v1: i64, v2: i32): - v3 = heap_addr.i64 heap0, v1, 4 + v3 = heap_addr.i64 heap0, v1, 0, 4 store.i32 v2, v3 v4 = load.i32 v3 return v4 @@ -29,7 +29,7 @@ function %static_heap_i32(i64 vmctx, i32, i32) -> i32 { heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0, index_type i32 block0(v0: i64, v1: i32, v2: i32): - v3 = heap_addr.i64 heap0, v1, 4 + v3 = heap_addr.i64 heap0, v1, 0, 4 store.i32 v2, v3 v4 = load.i32 v3 return v4 @@ -47,7 +47,7 @@ function %heap_no_min(i64 vmctx, i32, i32) -> i32 { heap0 = static gv1, bound 0x1_0000_0000, offset_guard 0, index_type i32 block0(v0: i64, v1: i32, v2: i32): - v3 = heap_addr.i64 heap0, v1, 4 + v3 = heap_addr.i64 heap0, v1, 0, 4 store.i32 v2, v3 v4 = load.i32 v3 return v4 @@ -66,7 +66,7 @@ function %dynamic_i64(i64 vmctx, i64, i32) -> i32 { heap0 = dynamic gv1, bound gv2, offset_guard 0, index_type i64 block0(v0: i64, v1: i64, v2: i32): - v3 = heap_addr.i64 heap0, v1, 4 + v3 = heap_addr.i64 heap0, v1, 0, 4 store.i32 v2, v3 v4 = load.i32 v3 return v4 @@ -85,7 +85,7 @@ function %dynamic_i32(i64 vmctx, i32, i32) -> i32 { heap0 = dynamic gv1, bound gv2, offset_guard 0, index_type i32 block0(v0: i64, v1: i32, v2: i32): - v3 = heap_addr.i64 heap0, v1, 4 + v3 = heap_addr.i64 heap0, v1, 0, 4 store.i32 v2, v3 v4 = load.i32 v3 return v4 @@ -110,11 +110,11 @@ block0(v0: i64, v1: i32, v2: i32): v4 = iconst.i32 0 ; Store lhs in heap0 - v5 = heap_addr.i64 heap0, v3, 4 + v5 = heap_addr.i64 heap0, v3, 0, 4 store.i32 v1, v5 ; Store rhs in heap1 - v6 = heap_addr.i64 heap1, v4, 4 + v6 = heap_addr.i64 heap1, v4, 0, 4 store.i32 v2, v6 @@ -146,11 +146,11 @@ block0(v0: i64, v1: i32, v2: i32): v4 = iconst.i64 0 ; Store lhs in heap0 - v5 = heap_addr.i64 heap0, v3, 4 + v5 = heap_addr.i64 heap0, v3, 0, 4 store.i32 v1, v5 ; Store rhs in heap1 - v6 = heap_addr.i64 heap1, v4, 4 + v6 = heap_addr.i64 heap1, v4, 0, 4 store.i32 v2, v6 @@ -172,7 +172,7 @@ function %unaligned_access(i64 vmctx, i64, i32) -> i32 { heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0, index_type i64 block0(v0: i64, v1: i64, v2: i32): - v3 = heap_addr.i64 heap0, v1, 4 + v3 = heap_addr.i64 heap0, v1, 0, 4 store.i32 v2, v3 v4 = load.i32 v3 return v4 @@ -196,7 +196,7 @@ function %iadd_imm(i64 vmctx, i32) -> i32 { block0(v0: i64, v1: i32): v2 = iconst.i64 0 - v3 = heap_addr.i64 heap0, v2, 4 + v3 = heap_addr.i64 heap0, v2, 0, 4 store.i32 v1, v3 v4 = load.i32 v3 return v4 @@ -211,7 +211,7 @@ function %heap_limit_i64(i64 vmctx, i64, i32) -> i32 { heap0 = static gv1, min 0, bound 0x8, offset_guard 0, index_type i64 block0(v0: i64, v1: i64, v2: i32): - v3 = heap_addr.i64 heap0, v1, 4 + v3 = heap_addr.i64 heap0, v1, 0, 4 store.i32 v2, v3 v4 = load.i32 v3 return v4 diff --git a/cranelift/filetests/filetests/runtests/load-op-store.clif b/cranelift/filetests/filetests/runtests/load-op-store.clif index 0d7ba86c4882..d2dfb12a4130 100644 --- a/cranelift/filetests/filetests/runtests/load-op-store.clif +++ b/cranelift/filetests/filetests/runtests/load-op-store.clif @@ -2,7 +2,7 @@ test run target x86_64 target s390x target aarch64 -target riscv64 +target riscv64 function %load_op_store_iadd_i64(i64 vmctx, i64, i64) -> i64 { @@ -11,7 +11,7 @@ function %load_op_store_iadd_i64(i64 vmctx, i64, i64) -> i64 { heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0, index_type i64 block0(v0: i64, v1: i64, v2: i64): - v3 = heap_addr.i64 heap0, v1, 8 + v3 = heap_addr.i64 heap0, v1, 0, 8 v4 = iconst.i64 42 store.i64 v4, v3 v5 = load.i64 v3 @@ -30,7 +30,7 @@ function %load_op_store_iadd_i32(i64 vmctx, i64, i32) -> i32 { heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0, index_type i64 block0(v0: i64, v1: i64, v2: i32): - v3 = heap_addr.i64 heap0, v1, 4 + v3 = heap_addr.i64 heap0, v1, 0, 4 v4 = iconst.i32 42 store.i32 v4, v3 v5 = load.i32 v3 @@ -49,7 +49,7 @@ function %load_op_store_iadd_i8(i64 vmctx, i64, i8) -> i8 { heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0, index_type i64 block0(v0: i64, v1: i64, v2: i8): - v3 = heap_addr.i64 heap0, v1, 4 + v3 = heap_addr.i64 heap0, v1, 0, 4 v4 = iconst.i8 42 store.i8 v4, v3 v5 = load.i8 v3 @@ -68,7 +68,7 @@ function %load_op_store_iadd_isub_iand_ior_ixor_i64(i64 vmctx, i64, i64) -> i64 heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0, index_type i64 block0(v0: i64, v1: i64, v2: i64): - v3 = heap_addr.i64 heap0, v1, 8 + v3 = heap_addr.i64 heap0, v1, 0, 8 store.i64 v2, v3 v4 = load.i64 v3 v5 = iconst.i64 1 diff --git a/cranelift/filetests/filetests/runtests/table_addr.clif b/cranelift/filetests/filetests/runtests/table_addr.clif index f77d356e367d..186dcb1a89c0 100644 --- a/cranelift/filetests/filetests/runtests/table_addr.clif +++ b/cranelift/filetests/filetests/runtests/table_addr.clif @@ -128,7 +128,7 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64): ; v1 - heap offset (bytes) ; v2 - table offset (elements) ; v3 - store/load value - v4 = heap_addr.i64 heap0, v1, 0 + v4 = heap_addr.i64 heap0, v1, 0, 0 v5 = table_addr.i64 table0, v2, +2 ; Store via heap, load via table diff --git a/cranelift/filetests/filetests/simple_gvn/readonly.clif b/cranelift/filetests/filetests/simple_gvn/readonly.clif index 93ede4a5b8aa..b28da609a7b0 100644 --- a/cranelift/filetests/filetests/simple_gvn/readonly.clif +++ b/cranelift/filetests/filetests/simple_gvn/readonly.clif @@ -9,8 +9,8 @@ function %eliminate_redundant_global_loads(i32, i64 vmctx) { heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 block0(v0: i32, v1: i64): - v2 = heap_addr.i64 heap0, v0, 1 - v3 = heap_addr.i64 heap0, v0, 1 + v2 = heap_addr.i64 heap0, v0, 0, 1 + v3 = heap_addr.i64 heap0, v0, 0, 1 v4 = iconst.i32 0 store.i32 notrap aligned v4, v2 @@ -18,7 +18,7 @@ block0(v0: i32, v1: i64): return } -; check: v2 = heap_addr.i64 heap0, v0, 1 +; check: v2 = heap_addr.i64 heap0, v0, 0, 1 ; check: v3 -> v2 ; check: v4 = iconst.i32 0 ; check: store notrap aligned v4, v2 diff --git a/cranelift/filetests/filetests/simple_preopt/replace_branching_instructions_and_cfg_predecessors.clif b/cranelift/filetests/filetests/simple_preopt/replace_branching_instructions_and_cfg_predecessors.clif index 493896f0d751..e5adefca0015 100644 --- a/cranelift/filetests/filetests/simple_preopt/replace_branching_instructions_and_cfg_predecessors.clif +++ b/cranelift/filetests/filetests/simple_preopt/replace_branching_instructions_and_cfg_predecessors.clif @@ -7,7 +7,7 @@ function u0:2(i64 , i64) { heap0 = static gv1 block0(v0: i64, v1: i64): v16 = iconst.i32 6 - v17 = heap_addr.i64 heap0, v16, 1 + v17 = heap_addr.i64 heap0, v16, 0, 1 v18 = load.i32 v17 v19 = iconst.i32 4 v20 = icmp ne v18, v19 diff --git a/cranelift/filetests/filetests/verifier/heap.clif b/cranelift/filetests/filetests/verifier/heap.clif index 2a73f4ee8f01..b46779e23359 100644 --- a/cranelift/filetests/filetests/verifier/heap.clif +++ b/cranelift/filetests/filetests/verifier/heap.clif @@ -40,6 +40,6 @@ function %heap_addr_index_type(i64 vmctx, i64) { heap0 = static gv0, offset_guard 0x1000, bound 0x1_0000, index_type i32 block0(v0: i64, v1: i64): - v2 = heap_addr.i64 heap0, v1, 0; error: index type i64 differs from heap index type i32 + v2 = heap_addr.i64 heap0, v1, 0, 0; error: index type i64 differs from heap index type i32 return } diff --git a/cranelift/filetests/filetests/wasm/f32-memory64.clif b/cranelift/filetests/filetests/wasm/f32-memory64.clif index 9985898b7947..8f6d5e44322c 100644 --- a/cranelift/filetests/filetests/wasm/f32-memory64.clif +++ b/cranelift/filetests/filetests/wasm/f32-memory64.clif @@ -11,7 +11,7 @@ function %f32_load(i32, i64 vmctx) -> f32 { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i32, v1: i64): - v2 = heap_addr.i64 heap0, v0, 1 + v2 = heap_addr.i64 heap0, v0, 0, 1 v3 = load.f32 v2 return v3 } @@ -21,7 +21,7 @@ function %f32_store(f32, i32, i64 vmctx) { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: f32, v1: i32, v2: i64): - v3 = heap_addr.i64 heap0, v1, 1 + v3 = heap_addr.i64 heap0, v1, 0, 1 store v0, v3 return } diff --git a/cranelift/filetests/filetests/wasm/f64-memory64.clif b/cranelift/filetests/filetests/wasm/f64-memory64.clif index f55a73fb8711..2805be18ef07 100644 --- a/cranelift/filetests/filetests/wasm/f64-memory64.clif +++ b/cranelift/filetests/filetests/wasm/f64-memory64.clif @@ -11,7 +11,7 @@ function %f64_load(i32, i64 vmctx) -> f64 { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i32, v1: i64): - v2 = heap_addr.i64 heap0, v0, 1 + v2 = heap_addr.i64 heap0, v0, 0, 1 v3 = load.f64 v2 return v3 } @@ -21,7 +21,7 @@ function %f64_store(f64, i32, i64 vmctx) { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: f64, v1: i32, v2: i64): - v3 = heap_addr.i64 heap0, v1, 1 + v3 = heap_addr.i64 heap0, v1, 0, 1 store v0, v3 return } diff --git a/cranelift/filetests/filetests/wasm/i32-memory64.clif b/cranelift/filetests/filetests/wasm/i32-memory64.clif index 7fcf0316c2ec..f4a89f1da2f1 100644 --- a/cranelift/filetests/filetests/wasm/i32-memory64.clif +++ b/cranelift/filetests/filetests/wasm/i32-memory64.clif @@ -11,7 +11,7 @@ function %i32_load(i32, i64 vmctx) -> i32 { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i32, v1: i64): - v2 = heap_addr.i64 heap0, v0, 1 + v2 = heap_addr.i64 heap0, v0, 0, 1 v3 = load.i32 v2 return v3 } @@ -21,7 +21,7 @@ function %i32_store(i32, i32, i64 vmctx) { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i32, v1: i32, v2: i64): - v3 = heap_addr.i64 heap0, v1, 1 + v3 = heap_addr.i64 heap0, v1, 0, 1 store v0, v3 return } @@ -31,7 +31,7 @@ function %i32_load8_s(i32, i64 vmctx) -> i32 { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i32, v1: i64): - v2 = heap_addr.i64 heap0, v0, 1 + v2 = heap_addr.i64 heap0, v0, 0, 1 v3 = sload8.i32 v2 return v3 } @@ -41,7 +41,7 @@ function %i32_load8_u(i32, i64 vmctx) -> i32 { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i32, v1: i64): - v2 = heap_addr.i64 heap0, v0, 1 + v2 = heap_addr.i64 heap0, v0, 0, 1 v3 = uload8.i32 v2 return v3 } @@ -51,7 +51,7 @@ function %i32_store8(i32, i32, i64 vmctx) { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i32, v1: i32, v2: i64): - v3 = heap_addr.i64 heap0, v1, 1 + v3 = heap_addr.i64 heap0, v1, 0, 1 istore8 v0, v3 return } @@ -61,7 +61,7 @@ function %i32_load16_s(i32, i64 vmctx) -> i32 { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i32, v1: i64): - v2 = heap_addr.i64 heap0, v0, 1 + v2 = heap_addr.i64 heap0, v0, 0, 1 v3 = sload16.i32 v2 return v3 } @@ -71,7 +71,7 @@ function %i32_load16_u(i32, i64 vmctx) -> i32 { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i32, v1: i64): - v2 = heap_addr.i64 heap0, v0, 1 + v2 = heap_addr.i64 heap0, v0, 0, 1 v3 = uload16.i32 v2 return v3 } @@ -81,8 +81,7 @@ function %i32_store16(i32, i32, i64 vmctx) { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i32, v1: i32, v2: i64): - v3 = heap_addr.i64 heap0, v1, 1 + v3 = heap_addr.i64 heap0, v1, 0, 1 istore16 v0, v3 return } - diff --git a/cranelift/filetests/filetests/wasm/i64-memory64.clif b/cranelift/filetests/filetests/wasm/i64-memory64.clif index 7f76ccd86e5d..64c3baecc64f 100644 --- a/cranelift/filetests/filetests/wasm/i64-memory64.clif +++ b/cranelift/filetests/filetests/wasm/i64-memory64.clif @@ -11,7 +11,7 @@ function %i64_load(i32, i64 vmctx) -> i64 { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i32, v1: i64): - v2 = heap_addr.i64 heap0, v0, 1 + v2 = heap_addr.i64 heap0, v0, 0, 1 v3 = load.i64 v2 return v3 } @@ -21,7 +21,7 @@ function %i64_store(i64, i32, i64 vmctx) { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i64, v1: i32, v2: i64): - v3 = heap_addr.i64 heap0, v1, 1 + v3 = heap_addr.i64 heap0, v1, 0, 1 store v0, v3 return } @@ -31,7 +31,7 @@ function %i64_load8_s(i32, i64 vmctx) -> i64 { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i32, v1: i64): - v2 = heap_addr.i64 heap0, v0, 1 + v2 = heap_addr.i64 heap0, v0, 0, 1 v3 = sload8.i64 v2 return v3 } @@ -41,7 +41,7 @@ function %i64_load8_u(i32, i64 vmctx) -> i64 { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i32, v1: i64): - v2 = heap_addr.i64 heap0, v0, 1 + v2 = heap_addr.i64 heap0, v0, 0, 1 v3 = uload8.i64 v2 return v3 } @@ -51,7 +51,7 @@ function %i64_store8(i64, i32, i64 vmctx) { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i64, v1: i32, v2: i64): - v3 = heap_addr.i64 heap0, v1, 1 + v3 = heap_addr.i64 heap0, v1, 0, 1 istore8 v0, v3 return } @@ -61,7 +61,7 @@ function %i64_load16_s(i32, i64 vmctx) -> i64 { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i32, v1: i64): - v2 = heap_addr.i64 heap0, v0, 1 + v2 = heap_addr.i64 heap0, v0, 0, 1 v3 = sload16.i64 v2 return v3 } @@ -71,7 +71,7 @@ function %i64_load16_u(i32, i64 vmctx) -> i64 { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i32, v1: i64): - v2 = heap_addr.i64 heap0, v0, 1 + v2 = heap_addr.i64 heap0, v0, 0, 1 v3 = uload16.i64 v2 return v3 } @@ -81,7 +81,7 @@ function %i64_store16(i64, i32, i64 vmctx) { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i64, v1: i32, v2: i64): - v3 = heap_addr.i64 heap0, v1, 1 + v3 = heap_addr.i64 heap0, v1, 0, 1 istore16 v0, v3 return } @@ -91,7 +91,7 @@ function %i64_load32_s(i32, i64 vmctx) -> i64 { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i32, v1: i64): - v2 = heap_addr.i64 heap0, v0, 1 + v2 = heap_addr.i64 heap0, v0, 0, 1 v3 = sload32.i64 v2 return v3 } @@ -101,7 +101,7 @@ function %i64_load32_u(i32, i64 vmctx) -> i64 { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i32, v1: i64): - v2 = heap_addr.i64 heap0, v0, 1 + v2 = heap_addr.i64 heap0, v0, 0, 1 v3 = uload32.i64 v2 return v3 } @@ -111,7 +111,7 @@ function %i64_store32(i64, i32, i64 vmctx) { heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 block0(v0: i64, v1: i32, v2: i64): - v3 = heap_addr.i64 heap0, v1, 1 + v3 = heap_addr.i64 heap0, v1, 0, 1 istore32 v0, v3 return } diff --git a/cranelift/filetests/src/test_compile.rs b/cranelift/filetests/src/test_compile.rs index 4f8fe10840b0..cfc180ba9ded 100644 --- a/cranelift/filetests/src/test_compile.rs +++ b/cranelift/filetests/src/test_compile.rs @@ -130,7 +130,10 @@ fn update_test(output: &[&str], context: &Context) -> Result<()> { // but after we hit a real line then we push all remaining lines. let mut in_next_function = false; for line in old_test { - if !in_next_function && (line.trim().is_empty() || line.starts_with(";")) { + if !in_next_function + && (line.trim().is_empty() + || (line.starts_with(";") && !line.starts_with(";;"))) + { continue; } in_next_function = true; diff --git a/cranelift/filetests/src/test_licm.rs b/cranelift/filetests/src/test_licm.rs index 2ca245055a74..b02bac1e74c6 100644 --- a/cranelift/filetests/src/test_licm.rs +++ b/cranelift/filetests/src/test_licm.rs @@ -45,6 +45,7 @@ impl SubTest for TestLICM { .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, Into::into(e)))?; let text = comp_ctx.func.display().to_string(); + log::debug!("Post-LICM CLIF:\n{}", text); run_filecheck(&text, context) } } diff --git a/cranelift/interpreter/src/interpreter.rs b/cranelift/interpreter/src/interpreter.rs index eb1af7b4e312..cc58cf29ba99 100644 --- a/cranelift/interpreter/src/interpreter.rs +++ b/cranelift/interpreter/src/interpreter.rs @@ -1011,7 +1011,7 @@ mod tests { block0(v0: i64): v1 = iconst.i64 0 v2 = iconst.i64 123 - v3 = heap_addr.i64 heap0, v1, 8 + v3 = heap_addr.i64 heap0, v1, 0, 8 store.i64 v2, v3 v4 = load.i64 v3 v5 = icmp eq v2, v4 diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index c158f699e822..fcba98cdf7bf 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -119,10 +119,6 @@ where } // 32-bit InstructionData::UnaryIeee32 { imm, .. } => DataValue::from(imm), - InstructionData::HeapAddr { imm, .. } => { - let imm: u32 = imm.into(); - DataValue::from(imm as i32) // Note the switch from unsigned to signed. - } InstructionData::Load { offset, .. } | InstructionData::Store { offset, .. } | InstructionData::StackLoad { offset, .. } @@ -489,19 +485,27 @@ where Opcode::SymbolValue => unimplemented!("SymbolValue"), Opcode::TlsValue => unimplemented!("TlsValue"), Opcode::HeapAddr => { - if let InstructionData::HeapAddr { heap, .. } = inst { + if let InstructionData::HeapAddr { + heap, + offset: imm_offset, + size, + .. + } = inst + { let addr_ty = inst_context.controlling_type().unwrap(); - let offset = arg(0)?.into_int()? as u64; - let load_size = imm().into_int()? as u64; + let dyn_offset = arg(0)?.into_int()? as u64; assign_or_memtrap({ AddressSize::try_from(addr_ty).and_then(|addr_size| { // Attempt to build an address at the maximum possible offset // for this load. If address generation fails we know it's out of bounds. - let bound_offset = (offset + load_size).saturating_sub(1); + let bound_offset = + (dyn_offset + u64::from(u32::from(imm_offset)) + u64::from(size)) + .saturating_sub(1); state.heap_address(addr_size, heap, bound_offset)?; // Build the actual address - let addr = state.heap_address(addr_size, heap, offset)?; + let mut addr = state.heap_address(addr_size, heap, dyn_offset)?; + addr.offset += u64::from(u32::from(imm_offset)); let dv = DataValue::try_from(addr)?; Ok(dv.into()) }) diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index 912282bae4f6..ff85acec72e7 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -2965,12 +2965,15 @@ impl<'a> Parser<'a> { self.match_token(Token::Comma, "expected ',' between operands")?; let arg = self.match_value("expected SSA value heap address")?; self.match_token(Token::Comma, "expected ',' between operands")?; - let imm = self.match_uimm32("expected 32-bit integer size")?; + let offset = self.match_uimm32("expected 32-bit integer offset")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let size = self.match_uimm8("expected 8-bit integer size")?; InstructionData::HeapAddr { opcode, heap, arg, - imm, + offset, + size, } } InstructionFormat::TableAddr => { diff --git a/cranelift/src/clif-util.rs b/cranelift/src/clif-util.rs old mode 100755 new mode 100644 diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index 86d54ffc0328..28478b342fea 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -91,7 +91,6 @@ use cranelift_codegen::packed_option::ReservedValue; use cranelift_frontend::{FunctionBuilder, Variable}; use itertools::Itertools; use smallvec::SmallVec; -use std::cmp; use std::convert::TryFrom; use std::vec::Vec; use wasmparser::{FuncValidator, MemArg, Operator, WasmModuleResources}; @@ -697,33 +696,33 @@ pub fn translate_operator( translate_load(memarg, ir::Opcode::Load, I8X16, builder, state, environ)?; } Operator::V128Load8x8S { memarg } => { - let (flags, base, offset) = prepare_addr(memarg, 8, builder, state, environ)?; - let loaded = builder.ins().sload8x8(flags, base, offset); + let (flags, base) = prepare_addr(memarg, 8, builder, state, environ)?; + let loaded = builder.ins().sload8x8(flags, base, 0); state.push1(loaded); } Operator::V128Load8x8U { memarg } => { - let (flags, base, offset) = prepare_addr(memarg, 8, builder, state, environ)?; - let loaded = builder.ins().uload8x8(flags, base, offset); + let (flags, base) = prepare_addr(memarg, 8, builder, state, environ)?; + let loaded = builder.ins().uload8x8(flags, base, 0); state.push1(loaded); } Operator::V128Load16x4S { memarg } => { - let (flags, base, offset) = prepare_addr(memarg, 8, builder, state, environ)?; - let loaded = builder.ins().sload16x4(flags, base, offset); + let (flags, base) = prepare_addr(memarg, 8, builder, state, environ)?; + let loaded = builder.ins().sload16x4(flags, base, 0); state.push1(loaded); } Operator::V128Load16x4U { memarg } => { - let (flags, base, offset) = prepare_addr(memarg, 8, builder, state, environ)?; - let loaded = builder.ins().uload16x4(flags, base, offset); + let (flags, base) = prepare_addr(memarg, 8, builder, state, environ)?; + let loaded = builder.ins().uload16x4(flags, base, 0); state.push1(loaded); } Operator::V128Load32x2S { memarg } => { - let (flags, base, offset) = prepare_addr(memarg, 8, builder, state, environ)?; - let loaded = builder.ins().sload32x2(flags, base, offset); + let (flags, base) = prepare_addr(memarg, 8, builder, state, environ)?; + let loaded = builder.ins().sload32x2(flags, base, 0); state.push1(loaded); } Operator::V128Load32x2U { memarg } => { - let (flags, base, offset) = prepare_addr(memarg, 8, builder, state, environ)?; - let loaded = builder.ins().uload32x2(flags, base, offset); + let (flags, base) = prepare_addr(memarg, 8, builder, state, environ)?; + let loaded = builder.ins().uload32x2(flags, base, 0); state.push1(loaded); } /****************************** Store instructions *********************************** @@ -1067,8 +1066,13 @@ pub fn translate_operator( let heap = state.get_heap(builder.func, memarg.memory, environ)?; let timeout = state.pop1(); // 64 (fixed) let expected = state.pop1(); // 32 or 64 (per the `Ixx` in `IxxAtomicWait`) - let (_flags, addr) = - prepare_atomic_addr(memarg, implied_ty.bytes(), builder, state, environ)?; + let (_flags, addr) = prepare_atomic_addr( + memarg, + u8::try_from(implied_ty.bytes()).unwrap(), + builder, + state, + environ, + )?; assert!(builder.func.dfg.value_type(expected) == implied_ty); // `fn translate_atomic_wait` can inspect the type of `expected` to figure out what // code it needs to generate, if it wants. @@ -2171,21 +2175,20 @@ fn translate_unreachable_operator( /// This function is a generalized helper for validating that a wasm-supplied /// heap address is in-bounds. /// -/// This function takes a litany of parameters and requires that the address to -/// be verified is at the top of the stack in `state`. This will generate -/// necessary IR to validate that the heap address is correctly in-bounds, and -/// various parameters are returned describing the valid heap address if -/// execution reaches that point. +/// This function takes a litany of parameters and requires that the *Wasm* +/// address to be verified is at the top of the stack in `state`. This will +/// generate necessary IR to validate that the heap address is correctly +/// in-bounds, and various parameters are returned describing the valid *native* +/// heap address if execution reaches that point. fn prepare_addr( memarg: &MemArg, - access_size: u32, + access_size: u8, builder: &mut FunctionBuilder, state: &mut FuncTranslationState, environ: &mut FE, -) -> WasmResult<(MemFlags, Value, Offset32)> { +) -> WasmResult<(MemFlags, Value)> { let addr = state.pop1(); let heap = state.get_heap(builder.func, memarg.memory, environ)?; - let offset_guard_size: u64 = builder.func.heaps[heap].offset_guard_size.into(); // How exactly the bounds check is performed here and what it's performed // on is a bit tricky. Generally we want to rely on access violations (e.g. @@ -2244,10 +2247,9 @@ fn prepare_addr( // hit like so: // // * For wasm32, wasmtime defaults to 4gb "static" memories with 2gb guard - // regions. This means our `adjusted_offset` is 1 for all offsets <=2gb. - // This hits the optimized case for `heap_addr` on static memories 4gb in - // size in cranelift's legalization of `heap_addr`, eliding the bounds - // check entirely. + // regions. This means that for all offsets <=2gb, we hit the optimized + // case for `heap_addr` on static memories 4gb in size in cranelift's + // legalization of `heap_addr`, eliding the bounds check entirely. // // * For wasm64 offsets <=2gb will generate a single `heap_addr` // instruction, but at this time all heaps are "dyanmic" which means that @@ -2258,43 +2260,17 @@ fn prepare_addr( // offsets in `memarg` are <=2gb, which means we get the fast path of one // `heap_addr` instruction plus a hardcoded i32-offset in memory-related // instructions. - let adjusted_offset = if offset_guard_size == 0 { - // Why saturating? see (1) above - memarg.offset.saturating_add(u64::from(access_size)) - } else { - // Why is there rounding here? see (2) above - assert!(access_size < 1024); - cmp::max(memarg.offset / offset_guard_size * offset_guard_size, 1) - }; - - debug_assert!(adjusted_offset > 0); // want to bounds check at least 1 byte - let (addr, offset) = match u32::try_from(adjusted_offset) { - // If our adjusted offset fits within a u32, then we can place the - // entire offset into the offset of the `heap_addr` instruction. After - // the `heap_addr` instruction, though, we need to factor the the offset - // into the returned address. This is either an immediate to later - // memory instructions if the offset further fits within `i32`, or a - // manual add instruction otherwise. - // - // Note that native instructions take a signed offset hence the switch - // to i32. Note also the lack of overflow checking in the offset - // addition, which should be ok since if `heap_addr` passed we're - // guaranteed that this won't overflow. - Ok(adjusted_offset) => { - let base = builder + let addr = match u32::try_from(memarg.offset) { + // If our offset fits within a u32, then we can place the it into the + // offset immediate of the `heap_addr` instruction. + Ok(offset) => { + builder .ins() - .heap_addr(environ.pointer_type(), heap, addr, adjusted_offset); - match i32::try_from(memarg.offset) { - Ok(val) => (base, val), - Err(_) => { - let adj = builder.ins().iadd_imm(base, memarg.offset as i64); - (adj, 0) - } - } + .heap_addr(environ.pointer_type(), heap, addr, offset, access_size) } - // If the adjusted offset doesn't fit within a u32, then we can't pass - // the adjust sized to `heap_addr` raw. + // If the offset doesn't fit within a u32, then we can't pass it + // directly into `heap_addr`. // // One reasonable question you might ask is "why not?". There's no // fundamental reason why `heap_addr` *must* take a 32-bit offset. The @@ -2313,8 +2289,6 @@ fn prepare_addr( // // Once we have the effective address, offset already folded in, then // `heap_addr` is used to verify that the address is indeed in-bounds. - // The access size of the `heap_addr` is what we were passed in from - // above. // // Note that this is generating what's likely to be at least two // branches, one for the overflow and one for the bounds check itself. @@ -2328,10 +2302,9 @@ fn prepare_addr( builder .ins() .uadd_overflow_trap(addr, offset, ir::TrapCode::HeapOutOfBounds); - let base = builder + builder .ins() - .heap_addr(environ.pointer_type(), heap, addr, access_size); - (base, 0) + .heap_addr(environ.pointer_type(), heap, addr, 0, access_size) } }; @@ -2348,12 +2321,12 @@ fn prepare_addr( // vmctx, stack) accesses. flags.set_heap(); - Ok((flags, addr, offset.into())) + Ok((flags, addr)) } fn prepare_atomic_addr( memarg: &MemArg, - loaded_bytes: u32, + loaded_bytes: u8, builder: &mut FunctionBuilder, state: &mut FuncTranslationState, environ: &mut FE, @@ -2386,18 +2359,7 @@ fn prepare_atomic_addr( builder.ins().trapnz(f, ir::TrapCode::HeapMisaligned); } - let (flags, mut addr, offset) = prepare_addr(memarg, loaded_bytes, builder, state, environ)?; - - // Currently cranelift IR operations for atomics don't have offsets - // associated with them so we fold the offset into the address itself. Note - // that via the `prepare_addr` helper we know that if execution reaches - // this point that this addition won't overflow. - let offset: i64 = offset.into(); - if offset != 0 { - addr = builder.ins().iadd_imm(addr, offset); - } - - Ok((flags, addr)) + prepare_addr(memarg, loaded_bytes, builder, state, environ) } /// Translate a load instruction. @@ -2409,14 +2371,16 @@ fn translate_load( state: &mut FuncTranslationState, environ: &mut FE, ) -> WasmResult<()> { - let (flags, base, offset) = prepare_addr( + let (flags, base) = prepare_addr( memarg, mem_op_size(opcode, result_ty), builder, state, environ, )?; - let (load, dfg) = builder.ins().Load(opcode, result_ty, flags, offset, base); + let (load, dfg) = builder + .ins() + .Load(opcode, result_ty, flags, Offset32::new(0), base); state.push1(dfg.first_result(load)); Ok(()) } @@ -2432,20 +2396,19 @@ fn translate_store( let val = state.pop1(); let val_ty = builder.func.dfg.value_type(val); - let (flags, base, offset) = - prepare_addr(memarg, mem_op_size(opcode, val_ty), builder, state, environ)?; + let (flags, base) = prepare_addr(memarg, mem_op_size(opcode, val_ty), builder, state, environ)?; builder .ins() - .Store(opcode, val_ty, flags, offset.into(), val, base); + .Store(opcode, val_ty, flags, Offset32::new(0), val, base); Ok(()) } -fn mem_op_size(opcode: ir::Opcode, ty: Type) -> u32 { +fn mem_op_size(opcode: ir::Opcode, ty: Type) -> u8 { match opcode { ir::Opcode::Istore8 | ir::Opcode::Sload8 | ir::Opcode::Uload8 => 1, ir::Opcode::Istore16 | ir::Opcode::Sload16 | ir::Opcode::Uload16 => 2, ir::Opcode::Istore32 | ir::Opcode::Sload32 | ir::Opcode::Uload32 => 4, - ir::Opcode::Store | ir::Opcode::Load => ty.bytes(), + ir::Opcode::Store | ir::Opcode::Load => u8::try_from(ty.bytes()).unwrap(), _ => panic!("unknown size of mem op for {:?}", opcode), } } @@ -2490,7 +2453,13 @@ fn translate_atomic_rmw( arg2 = builder.ins().ireduce(access_ty, arg2); } - let (flags, addr) = prepare_atomic_addr(memarg, access_ty.bytes(), builder, state, environ)?; + let (flags, addr) = prepare_atomic_addr( + memarg, + u8::try_from(access_ty.bytes()).unwrap(), + builder, + state, + environ, + )?; let mut res = builder.ins().atomic_rmw(access_ty, flags, op, addr, arg2); if access_ty != widened_ty { @@ -2538,7 +2507,13 @@ fn translate_atomic_cas( replacement = builder.ins().ireduce(access_ty, replacement); } - let (flags, addr) = prepare_atomic_addr(memarg, access_ty.bytes(), builder, state, environ)?; + let (flags, addr) = prepare_atomic_addr( + memarg, + u8::try_from(access_ty.bytes()).unwrap(), + builder, + state, + environ, + )?; let mut res = builder.ins().atomic_cas(flags, addr, expected, replacement); if access_ty != widened_ty { res = builder.ins().uextend(widened_ty, res); @@ -2572,7 +2547,13 @@ fn translate_atomic_load( }; assert!(w_ty_ok && widened_ty.bytes() >= access_ty.bytes()); - let (flags, addr) = prepare_atomic_addr(memarg, access_ty.bytes(), builder, state, environ)?; + let (flags, addr) = prepare_atomic_addr( + memarg, + u8::try_from(access_ty.bytes()).unwrap(), + builder, + state, + environ, + )?; let mut res = builder.ins().atomic_load(access_ty, flags, addr); if access_ty != widened_ty { res = builder.ins().uextend(widened_ty, res); @@ -2612,7 +2593,13 @@ fn translate_atomic_store( data = builder.ins().ireduce(access_ty, data); } - let (flags, addr) = prepare_atomic_addr(memarg, access_ty.bytes(), builder, state, environ)?; + let (flags, addr) = prepare_atomic_addr( + memarg, + u8::try_from(access_ty.bytes()).unwrap(), + builder, + state, + environ, + )?; builder.ins().atomic_store(flags, data, addr); Ok(()) }