From 33d98f20318907385a150e9c3a8d7be1b780a3fa Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Thu, 3 Nov 2022 11:07:55 -0700 Subject: [PATCH 1/7] Cranelift: Add the `DataFlowGraph::display_value_inst` convenience method --- cranelift/codegen/src/ir/dfg.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/cranelift/codegen/src/ir/dfg.rs b/cranelift/codegen/src/ir/dfg.rs index c621253e1aea..e0818df11e95 100644 --- a/cranelift/codegen/src/ir/dfg.rs +++ b/cranelift/codegen/src/ir/dfg.rs @@ -595,6 +595,17 @@ impl DataFlowGraph { DisplayInst(self, inst) } + /// Returns an object that displays the given `value`'s defining instruction. + /// + /// Panics if the value is not defined by an instruction (i.e. it is a basic + /// block argument). + pub fn display_value_inst(&self, value: Value) -> DisplayInst<'_> { + match self.value_def(value) { + ir::ValueDef::Result(inst, _) => self.display_inst(inst), + ir::ValueDef::Param(_, _) => panic!("value is not defined by an instruction"), + } + } + /// Get all value arguments on `inst` as a slice. pub fn inst_args(&self, inst: Inst) -> &[Value] { self.insts[inst].arguments(&self.value_lists) From 04965b4418fac8d7ded9070c6ff7b3a5cee86e0e Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Thu, 3 Nov 2022 11:10:59 -0700 Subject: [PATCH 2/7] Cranelift: Add some `trace!` logs to some parts of legalization --- .../codegen/src/legalizer/globalvalue.rs | 6 +++ cranelift/codegen/src/legalizer/heap.rs | 48 ++++++++++++++++--- cranelift/codegen/src/legalizer/mod.rs | 11 +++++ 3 files changed, 59 insertions(+), 6 deletions(-) diff --git a/cranelift/codegen/src/legalizer/globalvalue.rs b/cranelift/codegen/src/legalizer/globalvalue.rs index 751f4f403587..57fa29e1f5ab 100644 --- a/cranelift/codegen/src/legalizer/globalvalue.rs +++ b/cranelift/codegen/src/legalizer/globalvalue.rs @@ -14,6 +14,12 @@ pub fn expand_global_value( isa: &dyn TargetIsa, global_value: ir::GlobalValue, ) { + crate::trace!( + "expanding global value: {:?}: {}", + inst, + func.dfg.display_inst(inst) + ); + match func.global_values[global_value] { ir::GlobalValueData::VMContext => vmctx_addr(inst, func), ir::GlobalValueData::IAddImm { diff --git a/cranelift/codegen/src/legalizer/heap.rs b/cranelift/codegen/src/legalizer/heap.rs index d51a6244eb04..522d84712979 100644 --- a/cranelift/codegen/src/legalizer/heap.rs +++ b/cranelift/codegen/src/legalizer/heap.rs @@ -9,6 +9,7 @@ use crate::ir::condcodes::IntCC; use crate::ir::immediates::{Uimm32, Uimm8}; use crate::ir::{self, InstBuilder, RelSourceLoc}; use crate::isa::TargetIsa; +use crate::trace; /// Expand a `heap_addr` instruction according to the definition of the heap. pub fn expand_heap_addr( @@ -21,6 +22,12 @@ pub fn expand_heap_addr( offset_immediate: Uimm32, access_size: Uimm8, ) { + trace!( + "expanding heap_addr: {:?}: {}", + inst, + func.dfg.display_inst(inst) + ); + match func.heaps[heap].style { ir::HeapStyle::Dynamic { bound_gv } => dynamic_addr( isa, @@ -76,6 +83,10 @@ fn dynamic_addr( let adj_bound = pos .ins() .iadd_imm(bound, -(offset_plus_size(offset, access_size) as i64)); + trace!( + " inserting: {}", + pos.func.dfg.display_value_inst(adj_bound) + ); (IntCC::UnsignedGreaterThan, index, adj_bound) } else { // We need an overflow check for the adjusted offset. @@ -85,6 +96,10 @@ fn dynamic_addr( let adj_offset = pos.ins() .uadd_overflow_trap(index, access_size_val, ir::TrapCode::HeapOutOfBounds); + trace!( + " inserting: {}", + pos.func.dfg.display_value_inst(adj_offset) + ); (IntCC::UnsignedGreaterThan, adj_offset, bound) }; let oob = pos.ins().icmp(cc, lhs, bound); @@ -250,10 +265,14 @@ fn compute_addr( // Add the heap base address base let base = if isa.flags().enable_pinned_reg() && isa.flags().use_pinned_reg_as_heap_base() { - pos.ins().get_pinned_reg(isa.pointer_type()) + let base = pos.ins().get_pinned_reg(isa.pointer_type()); + trace!(" inserting: {}", pos.func.dfg.display_value_inst(base)); + base } else { let base_gv = pos.func.heaps[heap].base; - pos.ins().global_value(addr_ty, base_gv) + let base = pos.ins().global_value(addr_ty, base_gv); + trace!(" inserting: {}", pos.func.dfg.display_value_inst(base)); + base }; if let Some((cc, a, b)) = spectre_oob_comparison { @@ -264,22 +283,39 @@ fn compute_addr( let final_addr = if offset == 0 { final_base } else { - pos.ins().iadd_imm(final_base, offset as i64) + let final_addr = pos.ins().iadd_imm(final_base, offset as i64); + trace!( + " inserting: {}", + pos.func.dfg.display_value_inst(final_addr) + ); + final_addr }; let zero = pos.ins().iconst(addr_ty, 0); + trace!(" inserting: {}", pos.func.dfg.display_value_inst(zero)); + let cmp = pos.ins().icmp(cc, a, b); - pos.func + trace!(" inserting: {}", pos.func.dfg.display_value_inst(cmp)); + + let value = pos + .func .dfg .replace(inst) .select_spectre_guard(cmp, zero, final_addr); } else if offset == 0 { - pos.func.dfg.replace(inst).iadd(base, index); + let addr = pos.func.dfg.replace(inst).iadd(base, index); + trace!(" inserting: {}", pos.func.dfg.display_value_inst(addr)); } else { let final_base = pos.ins().iadd(base, index); - pos.func + trace!( + " inserting: {}", + pos.func.dfg.display_value_inst(final_base) + ); + let addr = pos + .func .dfg .replace(inst) .iadd_imm(final_base, offset as i64); + trace!(" inserting: {}", pos.func.dfg.display_value_inst(addr)); } } diff --git a/cranelift/codegen/src/legalizer/mod.rs b/cranelift/codegen/src/legalizer/mod.rs index 96eccb2079c9..0fa8156836bd 100644 --- a/cranelift/codegen/src/legalizer/mod.rs +++ b/cranelift/codegen/src/legalizer/mod.rs @@ -19,6 +19,7 @@ use crate::ir::immediates::Imm64; use crate::ir::types::{I128, I64}; use crate::ir::{self, InstBuilder, InstructionData, MemFlags, Value}; use crate::isa::TargetIsa; +use crate::trace; mod globalvalue; mod heap; @@ -46,6 +47,8 @@ fn imm_const(pos: &mut FuncCursor, arg: Value, imm: Imm64, is_signed: bool) -> V /// Perform a simple legalization by expansion of the function, without /// platform-specific transforms. pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) { + trace!("Pre-legalization function:\n{}", func.display()); + let mut pos = FuncCursor::new(func); let func_begin = pos.position(); pos.set_position(func_begin); @@ -246,6 +249,8 @@ pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: pos.set_position(prev_pos); } } + + trace!("Post-legalization function:\n{}", func.display()); } /// Custom expansion for conditional trap instructions. @@ -257,6 +262,12 @@ fn expand_cond_trap( arg: ir::Value, code: ir::TrapCode, ) { + trace!( + "expanding conditional trap: {:?}: {}", + inst, + func.dfg.display_inst(inst) + ); + // Parse the instruction. let trapz = match opcode { ir::Opcode::Trapz => true, From bdff9fd76fb67f14ee740b1299bde367d8a0ede0 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Thu, 3 Nov 2022 11:12:35 -0700 Subject: [PATCH 3/7] Cranelift: de-duplicate bounds checks in legalizations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When both (1) "dynamic" memories that need explicit bounds checks and (2) spectre mitigations that perform bounds checks are enabled, reuse the same bounds checks between the two legalizations. This reduces the overhead of explicit bounds checks and spectre mitigations over using virtual memory guard pages with spectre mitigations from ~1.9-2.1x overhead to ~1.6-1.8x overhead. That is about a 14-19% speed up for when dynamic memories and spectre mitigations are enabled.
``` execution :: instructions-retired :: benchmarks/spidermonkey/benchmark.wasm Δ = 3422455129.47 ± 120159.49 (confidence = 99%) virtual-memory-guards.so is 2.09x to 2.09x faster than bounds-checks.so! [6563931659 6564063496.07 6564301535] bounds-checks.so [3141492675 3141608366.60 3141895249] virtual-memory-guards.so execution :: instructions-retired :: benchmarks/bz2/benchmark.wasm Δ = 338716136.87 ± 1.38 (confidence = 99%) virtual-memory-guards.so is 2.08x to 2.08x faster than bounds-checks.so! [651961494 651961495.47 651961497] bounds-checks.so [313245357 313245358.60 313245362] virtual-memory-guards.so execution :: instructions-retired :: benchmarks/pulldown-cmark/benchmark.wasm Δ = 22742944.07 ± 331.73 (confidence = 99%) virtual-memory-guards.so is 1.87x to 1.87x faster than bounds-checks.so! [48841295 48841567.33 48842139] bounds-checks.so [26098439 26098623.27 26099479] virtual-memory-guards.so ```
``` execution :: instructions-retired :: benchmarks/spidermonkey/benchmark.wasm Δ = 2465900207.27 ± 146476.61 (confidence = 99%) virtual-memory-guards.so is 1.78x to 1.78x faster than de-duped-bounds-checks.so! [5607275431 5607442989.13 5607838342] de-duped-bounds-checks.so [3141445345 3141542781.87 3141711213] virtual-memory-guards.so execution :: instructions-retired :: benchmarks/bz2/benchmark.wasm Δ = 234253620.20 ± 2.33 (confidence = 99%) virtual-memory-guards.so is 1.75x to 1.75x faster than de-duped-bounds-checks.so! [547498977 547498980.93 547498985] de-duped-bounds-checks.so [313245357 313245360.73 313245363] virtual-memory-guards.so execution :: instructions-retired :: benchmarks/pulldown-cmark/benchmark.wasm Δ = 16605659.13 ± 315.78 (confidence = 99%) virtual-memory-guards.so is 1.64x to 1.64x faster than de-duped-bounds-checks.so! [42703971 42704284.40 42704787] de-duped-bounds-checks.so [26098432 26098625.27 26099234] virtual-memory-guards.so ```
``` execution :: instructions-retired :: benchmarks/bz2/benchmark.wasm Δ = 104462517.13 ± 7.32 (confidence = 99%) de-duped-bounds-checks.so is 1.19x to 1.19x faster than bounds-checks.so! [651961493 651961500.80 651961532] bounds-checks.so [547498981 547498983.67 547498989] de-duped-bounds-checks.so execution :: instructions-retired :: benchmarks/spidermonkey/benchmark.wasm Δ = 956556982.80 ± 103034.59 (confidence = 99%) de-duped-bounds-checks.so is 1.17x to 1.17x faster than bounds-checks.so! [6563930590 6564019842.40 6564243651] bounds-checks.so [5607307146 5607462859.60 5607677763] de-duped-bounds-checks.so execution :: instructions-retired :: benchmarks/pulldown-cmark/benchmark.wasm Δ = 6137307.87 ± 247.75 (confidence = 99%) de-duped-bounds-checks.so is 1.14x to 1.14x faster than bounds-checks.so! [48841303 48841472.93 48842000] bounds-checks.so [42703965 42704165.07 42704718] de-duped-bounds-checks.so ```
--- cranelift/codegen/src/legalizer/heap.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/cranelift/codegen/src/legalizer/heap.rs b/cranelift/codegen/src/legalizer/heap.rs index 522d84712979..fbdd07f885e0 100644 --- a/cranelift/codegen/src/legalizer/heap.rs +++ b/cranelift/codegen/src/legalizer/heap.rs @@ -102,12 +102,20 @@ fn dynamic_addr( ); (IntCC::UnsignedGreaterThan, adj_offset, bound) }; - let oob = pos.ins().icmp(cc, lhs, bound); - pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds); let spectre_oob_comparison = if isa.flags().enable_heap_access_spectre_mitigation() { + // When we emit a spectre-guarded heap access, we do a `select + // is_out_of_bounds, NULL, addr` to compute the address, and so the load + // will trap if the address is out of bounds, which means we don't need + // to do another explicit bounds check like we do below. Some((cc, lhs, bound)) } else { + let oob = pos.ins().icmp(cc, lhs, bound); + trace!(" inserting: {}", pos.func.dfg.display_value_inst(oob)); + + let trapnz = pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds); + trace!(" inserting: {}", pos.func.dfg.display_inst(trapnz)); + None }; From b70425e99c4c5d6b293beeb7aaef289bad690e2e Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Thu, 3 Nov 2022 11:36:10 -0700 Subject: [PATCH 4/7] Update test expectations --- cranelift/codegen/src/legalizer/heap.rs | 1 + .../filetests/isa/aarch64/heap_addr.clif | 22 +++------ .../filetests/isa/riscv64/heap-addr.clif | 46 ++++++++----------- .../filetests/isa/s390x/heap_addr.clif | 29 ++++-------- .../filetests/filetests/isa/x64/heap.clif | 36 +++++---------- 5 files changed, 46 insertions(+), 88 deletions(-) diff --git a/cranelift/codegen/src/legalizer/heap.rs b/cranelift/codegen/src/legalizer/heap.rs index fbdd07f885e0..dc33d6e63302 100644 --- a/cranelift/codegen/src/legalizer/heap.rs +++ b/cranelift/codegen/src/legalizer/heap.rs @@ -309,6 +309,7 @@ fn compute_addr( .dfg .replace(inst) .select_spectre_guard(cmp, zero, final_addr); + trace!(" inserting: {}", pos.func.dfg.display_value_inst(value)); } else if offset == 0 { let addr = pos.func.dfg.replace(inst).iadd(base, index); trace!(" inserting: {}", pos.func.dfg.display_value_inst(addr)); diff --git a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif index 2f893bcd7122..b3b1b8ce335a 100644 --- a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif +++ b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif @@ -17,17 +17,12 @@ block0(v0: i64, v1: i32): ; mov w8, w1 ; ldr x9, [x0] ; mov x9, x9 -; subs xzr, x8, x9 -; b.ls label1 ; b label2 -; block1: ; add x10, x0, x1, UXTW -; movz x11, #0 +; movz x7, #0 ; subs xzr, x8, x9 -; csel x0, x11, x10, hi +; csel x0, x7, x10, hi ; csdb ; ret -; block2: -; udf #0xc11f function %static_heap_check(i64 vmctx, i32) -> i64 { gv0 = vmctx @@ -69,18 +64,13 @@ block0(v0: i64, v1: i32): ; movz x9, #24 ; adds x11, x11, x9 ; b.lo 8 ; udf +; add x12, x0, x1, UXTW +; add x12, x12, #16 +; movz x13, #0 ; subs xzr, x11, x10 -; b.ls label1 ; b label2 -; block1: -; add x13, x0, x1, UXTW -; add x13, x13, #16 -; movz x12, #0 -; subs xzr, x11, x10 -; csel x0, x12, x13, hi +; csel x0, x13, x12, hi ; csdb ; ret -; block2: -; udf #0xc11f function %static_heap_check_with_offset(i64 vmctx, i32) -> i64 { gv0 = vmctx diff --git a/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif b/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif index d90da8f22c45..86da6b79a392 100644 --- a/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif +++ b/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif @@ -13,19 +13,14 @@ block0(v0: i64, v1: i32): } ; block0: -; uext.w a7,a1 -; ld t3,0(a0) -; addi t3,t3,0 -; ule t4,a7,t3##ty=i64 -; bne t4,zero,taken(label1),not_taken(label2) -; block1: -; add t4,a0,a7 -; ugt a7,a7,t3##ty=i64 -; li t0,0 -; selectif_spectre_guard a0,t0,t4##test=a7 +; uext.w a6,a1 +; ld a7,0(a0) +; addi t3,a7,0 +; add a7,a0,a6 +; ugt a5,a6,t3##ty=i64 +; li t3,0 +; selectif_spectre_guard a0,t3,a7##test=a5 ; ret -; block2: -; udf##trap_code=heap_oob function %static_heap_check(i64 vmctx, i32) -> i64 { gv0 = vmctx @@ -62,23 +57,18 @@ block0(v0: i64, v1: i32): } ; block0: -; uext.w t1,a1 -; ld t0,0(a0) -; li t3,24 -; add t2,t1,t3 -; ult a1,t2,t1##ty=i64 -; trap_if a1,heap_oob -; ule a1,t2,t0##ty=i64 -; bne a1,zero,taken(label1),not_taken(label2) -; block1: -; add a0,a0,t1 -; addi a0,a0,16 -; ugt t1,t2,t0##ty=i64 -; li a1,0 -; selectif_spectre_guard a0,a1,a0##test=t1 +; uext.w t0,a1 +; ld t4,0(a0) +; li a7,24 +; add t1,t0,a7 +; ult t2,t1,t0##ty=i64 +; trap_if t2,heap_oob +; add t0,a0,t0 +; addi t0,t0,16 +; ugt t4,t1,t4##ty=i64 +; li t1,0 +; selectif_spectre_guard a0,t1,t0##test=t4 ; ret -; block2: -; udf##trap_code=heap_oob function %static_heap_check_with_offset(i64 vmctx, i32) -> i64 { gv0 = vmctx diff --git a/cranelift/filetests/filetests/isa/s390x/heap_addr.clif b/cranelift/filetests/filetests/isa/s390x/heap_addr.clif index 4dc22f499f3e..205564e0fd77 100644 --- a/cranelift/filetests/filetests/isa/s390x/heap_addr.clif +++ b/cranelift/filetests/filetests/isa/s390x/heap_addr.clif @@ -12,20 +12,14 @@ block0(v0: i64, v1: i32): } ; block0: -; llgfr %r5, %r3 -; lgr %r4, %r2 -; lg %r2, 0(%r4) -; aghik %r3, %r2, 0 -; clgr %r5, %r3 -; jgnh label1 ; jg label2 -; block1: -; agrk %r2, %r4, %r5 -; lghi %r4, 0 -; clgr %r5, %r3 -; locgrh %r2, %r4 +; llgfr %r4, %r3 +; lghi %r3, 0 +; ag %r3, 0(%r2) +; agr %r2, %r4 +; lghi %r5, 0 +; clgr %r4, %r3 +; locgrh %r2, %r5 ; br %r14 -; block2: -; trap function %static_heap_check(i64 vmctx, i32) -> i64 { gv0 = vmctx @@ -66,18 +60,13 @@ block0(v0: i64, v1: i32): ; lghi %r5, 24 ; algfr %r5, %r3 ; jle 6 ; trap -; clgr %r5, %r4 -; jgnh label1 ; jg label2 -; block1: -; agrk %r3, %r2, %r7 -; aghik %r2, %r3, 16 +; agr %r2, %r7 +; aghi %r2, 16 ; lghi %r3, 0 ; clgr %r5, %r4 ; locgrh %r2, %r3 ; lmg %r7, %r15, 56(%r15) ; br %r14 -; block2: -; trap function %static_heap_check_with_offset(i64 vmctx, i32) -> i64 { gv0 = vmctx diff --git a/cranelift/filetests/filetests/isa/x64/heap.clif b/cranelift/filetests/filetests/isa/x64/heap.clif index 87444682ac46..f0e472c4822e 100644 --- a/cranelift/filetests/filetests/isa/x64/heap.clif +++ b/cranelift/filetests/filetests/isa/x64/heap.clif @@ -33,23 +33,18 @@ block0(v0: i32, v1: i64): ; movq %rsp, %rbp ; block0: ; movl %edi, %eax -; movq 8(%rsi), %rdx -; movq %rax, %rdi -; addq %rdi, $32768, %rdi +; movq 8(%rsi), %rdi +; movq %rax, %rcx +; addq %rcx, $32768, %rcx ; jnb ; ud2 heap_oob ; -; cmpq %rdx, %rdi -; jbe label1; j label2 -; block1: ; addq %rax, 0(%rsi), %rax ; addq %rax, $32768, %rax -; xorq %rcx, %rcx, %rcx -; cmpq %rdx, %rdi -; cmovnbeq %rcx, %rax, %rax +; xorq %rsi, %rsi, %rsi +; cmpq %rdi, %rcx +; cmovnbeq %rsi, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret -; block2: -; ud2 heap_oob ;; The heap address calculation for this statically-allocated memory checks that ;; the passed offset (%r11) is within bounds (`cmp + jbe + j`) and then includes @@ -120,25 +115,18 @@ block0(v0: i64, v1: i32): ; block0: ; movq %rdi, %rax ; movl %esi, %edi -; movq %rax, %rcx -; movq 0(%rcx), %rsi -; movq %rdi, %rdx -; addq %rdx, $24, %rdx +; movq 0(%rax), %rsi +; movq %rdi, %rcx +; addq %rcx, $24, %rcx ; jnb ; ud2 heap_oob ; -; cmpq %rsi, %rdx -; jbe label1; j label2 -; block1: -; movq %rcx, %rax ; addq %rax, %rdi, %rax ; addq %rax, $16, %rax -; xorq %rcx, %rcx, %rcx -; cmpq %rsi, %rdx -; cmovnbeq %rcx, %rax, %rax +; xorq %rdi, %rdi, %rdi +; cmpq %rsi, %rcx +; cmovnbeq %rdi, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret -; block2: -; ud2 heap_oob function %static_heap_check_with_offset(i64 vmctx, i32) -> i64 { gv0 = vmctx From ad8f6596864dfcbe27fe1065ae9a7f34799e9976 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Thu, 3 Nov 2022 11:36:23 -0700 Subject: [PATCH 5/7] Add a test for deduplicating bounds checks between dynamic memories and spectre mitigations --- .../filetests/legalizer/bounds-checks.clif | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 cranelift/filetests/filetests/legalizer/bounds-checks.clif diff --git a/cranelift/filetests/filetests/legalizer/bounds-checks.clif b/cranelift/filetests/filetests/legalizer/bounds-checks.clif new file mode 100644 index 000000000000..6368ea8b30ad --- /dev/null +++ b/cranelift/filetests/filetests/legalizer/bounds-checks.clif @@ -0,0 +1,32 @@ +test legalizer +set enable_heap_access_spectre_mitigation=true +target aarch64 +target x86_64 + +;; Test that when both (1) dynamic memories and (2) heap access spectre +;; mitigations are enabled, we deduplicate the bounds check between the two. + +function %wasm_load(i64 vmctx, i32) -> i32 wasmtime_system_v { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0+88 + gv2 = load.i64 notrap aligned gv0+80 + heap0 = dynamic gv2, min 0, bound gv1, offset_guard 0x8000_0000, index_type i32 + +block0(v0: i64, v1: i32): + v2 = heap_addr.i64 heap0, v1, 0, 4 + v3 = load.i32 little heap v2 + return v3 +} + +; check: block0(v0: i64, v1: i32): +; nextln: v4 = uextend.i64 v1 +; nextln: v5 = load.i64 notrap aligned v0+88 +; nextln: v6 = iconst.i64 4 +; nextln: v7 = uadd_overflow_trap v4, v6, heap_oob ; v6 = 4 +; nextln: v8 = load.i64 notrap aligned v0+80 +; nextln: v9 = iadd v8, v4 +; nextln: v10 = iconst.i64 0 +; nextln: v11 = icmp ugt v7, v5 +; nextln: v2 = select_spectre_guard v11, v10, v9 ; v10 = 0 +; nextln: v3 = load.i32 little heap v2 +; nextln: return v3 From e2b3722618d1c41b7963044754938cda53e170a1 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Wed, 9 Nov 2022 15:32:59 -0800 Subject: [PATCH 6/7] Define a struct for the Spectre comparison instead of using a tuple --- cranelift/codegen/src/legalizer/heap.rs | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/cranelift/codegen/src/legalizer/heap.rs b/cranelift/codegen/src/legalizer/heap.rs index dc33d6e63302..bb0bc5fe6ceb 100644 --- a/cranelift/codegen/src/legalizer/heap.rs +++ b/cranelift/codegen/src/legalizer/heap.rs @@ -108,7 +108,11 @@ fn dynamic_addr( // is_out_of_bounds, NULL, addr` to compute the address, and so the load // will trap if the address is out of bounds, which means we don't need // to do another explicit bounds check like we do below. - Some((cc, lhs, bound)) + Some(SpectreOobComparison { + cc, + lhs, + rhs: bound, + }) } else { let oob = pos.ins().icmp(cc, lhs, bound); trace!(" inserting: {}", pos.func.dfg.display_value_inst(oob)); @@ -206,7 +210,11 @@ fn static_addr( pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds); if isa.flags().enable_heap_access_spectre_mitigation() { let limit = pos.ins().iconst(addr_ty, limit_imm); - spectre_oob_comparison = Some((cc, lhs, limit)); + spectre_oob_comparison = Some(SpectreOobComparison { + cc, + lhs, + rhs: limit, + }); } } @@ -252,6 +260,12 @@ fn cast_index_to_pointer_ty( extended_index } +struct SpectreOobComparison { + cc: IntCC, + lhs: ir::Value, + rhs: ir::Value, +} + /// Emit code for the base address computation of a `heap_addr` instruction. fn compute_addr( isa: &dyn TargetIsa, @@ -265,7 +279,7 @@ fn compute_addr( // values to compare and the condition code that indicates an out-of bounds // condition; on this condition, the conditional move will choose a // speculatively safe address (a zero / null pointer) instead. - spectre_oob_comparison: Option<(IntCC, ir::Value, ir::Value)>, + spectre_oob_comparison: Option, ) { debug_assert_eq!(func.dfg.value_type(index), addr_ty); let mut pos = FuncCursor::new(func).at_inst(inst); @@ -283,7 +297,7 @@ fn compute_addr( base }; - if let Some((cc, a, b)) = spectre_oob_comparison { + if let Some(SpectreOobComparison { cc, lhs, rhs }) = spectre_oob_comparison { let final_base = pos.ins().iadd(base, index); // NB: The addition of the offset immediate must happen *before* the // `select_spectre_guard`. If it happens after, then we potentially are @@ -301,7 +315,7 @@ fn compute_addr( let zero = pos.ins().iconst(addr_ty, 0); trace!(" inserting: {}", pos.func.dfg.display_value_inst(zero)); - let cmp = pos.ins().icmp(cc, a, b); + let cmp = pos.ins().icmp(cc, lhs, rhs); trace!(" inserting: {}", pos.func.dfg.display_value_inst(cmp)); let value = pos From 7aa3cffdd2a729d3474259111c37026a57ad0093 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Wed, 9 Nov 2022 15:38:49 -0800 Subject: [PATCH 7/7] More trace logging for heap legalization --- cranelift/codegen/src/legalizer/heap.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/cranelift/codegen/src/legalizer/heap.rs b/cranelift/codegen/src/legalizer/heap.rs index bb0bc5fe6ceb..96e905b3f1a2 100644 --- a/cranelift/codegen/src/legalizer/heap.rs +++ b/cranelift/codegen/src/legalizer/heap.rs @@ -157,8 +157,10 @@ fn static_addr( // This first case is a trivial case where we can statically trap. if offset_plus_size(offset, access_size) > bound { // This will simply always trap since `offset >= 0`. - pos.ins().trap(ir::TrapCode::HeapOutOfBounds); - pos.func.dfg.replace(inst).iconst(addr_ty, 0); + let trap = pos.ins().trap(ir::TrapCode::HeapOutOfBounds); + trace!(" inserting: {}", pos.func.dfg.display_inst(trap)); + let iconst = pos.func.dfg.replace(inst).iconst(addr_ty, 0); + trace!(" inserting: {}", pos.func.dfg.display_value_inst(iconst)); // Split the block, as the trap is a terminator instruction. let curr_block = pos.current_block().expect("Cursor is not in a block"); @@ -207,9 +209,14 @@ fn static_addr( (IntCC::UnsignedGreaterThan, index, limit) }; let oob = pos.ins().icmp_imm(cc, lhs, limit_imm); - pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds); + trace!(" inserting: {}", pos.func.dfg.display_value_inst(oob)); + + let trapnz = pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds); + trace!(" inserting: {}", pos.func.dfg.display_inst(trapnz)); + if isa.flags().enable_heap_access_spectre_mitigation() { let limit = pos.ins().iconst(addr_ty, limit_imm); + trace!(" inserting: {}", pos.func.dfg.display_value_inst(limit)); spectre_oob_comparison = Some(SpectreOobComparison { cc, lhs,