From 4b288ba88dfe1c2cce720c46bf9d919e54871c61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C3=BAl=20Cabrera?= Date: Fri, 29 Sep 2023 12:59:40 -0400 Subject: [PATCH] winch(x64): Call indirect (#7100) * winch(x64): Call indirect This change adds support for the `call_indirect` instruction to Winch. Libcalls are a pre-requisite for supporting `call_indirect` in order to lazily initialy funcrefs. This change adds support for libcalls to Winch by introducing a `BuiltinFunctions` struct similar to Cranelift's `BuiltinFunctionSignatures` struct. In general, libcalls are handled like any other function call, with the only difference that given that not all the information to fulfill the function call might be known up-front, control is given to the caller for finalizing the call. The introduction of function references also involves dealing with pointer-sized loads and stores, so this change also adds the required functionality to `FuncEnv` and `MacroAssembler` to be pointer aware, making it straight forward to derive an `OperandSize` or `WasmType` from the target's pointer size. Finally, given the complexity of the call_indirect instrunction, this change bundles an improvement to the register allocator, allowing it to track the allocatable vs non-allocatable registers, this is done to avoid any mistakes when allocating/de-allocating registers that are not alloctable. -- prtest:full * Address review comments * Fix typos * Better documentation for `new_unchecked` * Introduce `max` for `BitSet` * Make allocatable property `u64` * winch(calls): Overhaul `FnCall` This commit simplifies `FnCall`'s interface making its usage more uniform throughout the compiler. In summary, this change: * Avoids side effects in the `FnCall::new` constructor, and also makes it the only constructor. * Exposes `FnCall::save_live_registers` and `FnCall::calculate_call_stack_space` to calculate the stack space consumed by the call and so that the caller can decide which one to use at callsites depending on their use-case. * tests: Fix regset tests --- build.rs | 8 +- crates/winch/src/compiler.rs | 2 +- fuzz/fuzz_targets/differential.rs | 3 +- tests/misc_testsuite/winch/call_indirect.wast | 413 ++++++++++++++++++ winch/codegen/src/abi/mod.rs | 27 +- winch/codegen/src/codegen/builtin.rs | 100 +++++ winch/codegen/src/codegen/call.rs | 171 ++++++-- winch/codegen/src/codegen/context.rs | 33 +- winch/codegen/src/codegen/env.rs | 129 +++++- winch/codegen/src/codegen/mod.rs | 180 +++++--- winch/codegen/src/isa/aarch64/abi.rs | 17 +- winch/codegen/src/isa/aarch64/masm.rs | 28 +- winch/codegen/src/isa/aarch64/mod.rs | 22 +- winch/codegen/src/isa/aarch64/regs.rs | 12 +- winch/codegen/src/isa/mod.rs | 4 +- winch/codegen/src/isa/reg.rs | 4 +- winch/codegen/src/isa/x64/abi.rs | 17 +- winch/codegen/src/isa/x64/address.rs | 3 +- winch/codegen/src/isa/x64/asm.rs | 74 ++-- winch/codegen/src/isa/x64/masm.rs | 111 ++++- winch/codegen/src/isa/x64/mod.rs | 35 +- winch/codegen/src/isa/x64/regs.rs | 15 +- winch/codegen/src/masm.rs | 63 ++- winch/codegen/src/regalloc.rs | 22 +- winch/codegen/src/regset.rs | 124 +++++- winch/codegen/src/stack.rs | 24 +- winch/codegen/src/visitor.rs | 110 ++++- .../x64/call_indirect/call_indirect.wat | 130 ++++++ winch/filetests/src/lib.rs | 9 +- winch/src/compile.rs | 11 +- 30 files changed, 1626 insertions(+), 275 deletions(-) create mode 100644 tests/misc_testsuite/winch/call_indirect.wast create mode 100644 winch/codegen/src/codegen/builtin.rs create mode 100644 winch/filetests/filetests/x64/call_indirect/call_indirect.wat diff --git a/build.rs b/build.rs index b591af39d7cd..041373eba0c3 100644 --- a/build.rs +++ b/build.rs @@ -205,11 +205,17 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { // We ignore tests that assert for traps on windows, given // that Winch doesn't encode unwind information for Windows, yet. if strategy == "Winch" { + if testsuite == "misc_testsuite" { + // The misc/call_indirect is fully supported by Winch. + if testname == "call_indirect" { + return false; + } + } if testsuite != "winch" { return true; } - let assert_trap = ["i32", "i64"].contains(&testname); + let assert_trap = ["i32", "i64", "call_indirect"].contains(&testname); if assert_trap && env::var("CARGO_CFG_TARGET_OS").unwrap().as_str() == "windows" { return true; diff --git a/crates/winch/src/compiler.rs b/crates/winch/src/compiler.rs index 73605c2c3761..37ff419335e6 100644 --- a/crates/winch/src/compiler.rs +++ b/crates/winch/src/compiler.rs @@ -68,7 +68,7 @@ impl wasmtime_environ::Compiler for Compiler { let mut validator = validator.into_validator(self.take_allocations()); let buffer = self .isa - .compile_function(ty, &body, &translation, &mut validator) + .compile_function(ty, types, &body, &translation, &mut validator) .map_err(|e| CompileError::Codegen(format!("{e:?}"))); self.save_allocations(validator.into_allocations()); let buffer = buffer?; diff --git a/fuzz/fuzz_targets/differential.rs b/fuzz/fuzz_targets/differential.rs index 9cbf7c608389..34164666ebf0 100644 --- a/fuzz/fuzz_targets/differential.rs +++ b/fuzz/fuzz_targets/differential.rs @@ -374,7 +374,8 @@ fn winch_supports_module(module: &[u8]) -> bool { | F32Abs { .. } | F64Abs { .. } | F32Neg { .. } - | F64Neg { .. } => {} + | F64Neg { .. } + | CallIndirect { .. } => {} _ => { supported = false; break 'main; diff --git a/tests/misc_testsuite/winch/call_indirect.wast b/tests/misc_testsuite/winch/call_indirect.wast new file mode 100644 index 000000000000..a432415aa6c6 --- /dev/null +++ b/tests/misc_testsuite/winch/call_indirect.wast @@ -0,0 +1,413 @@ +;; Test `call_indirect` operator + +(module + ;; Auxiliary definitions + (type $proc (func)) + (type $out-i32 (func (result i32))) + (type $out-i64 (func (result i64))) + (type $out-f32 (func (result f32))) + (type $out-f64 (func (result f64))) + (type $over-i32 (func (param i32) (result i32))) + (type $over-i64 (func (param i64) (result i64))) + (type $over-f32 (func (param f32) (result f32))) + (type $over-f64 (func (param f64) (result f64))) + (type $f32-i32 (func (param f32 i32) (result i32))) + (type $i32-i64 (func (param i32 i64) (result i64))) + (type $f64-f32 (func (param f64 f32) (result f32))) + (type $i64-f64 (func (param i64 f64) (result f64))) + (type $over-i32-duplicate (func (param i32) (result i32))) + (type $over-i64-duplicate (func (param i64) (result i64))) + (type $over-f32-duplicate (func (param f32) (result f32))) + (type $over-f64-duplicate (func (param f64) (result f64))) + + (func $const-i32 (type $out-i32) (i32.const 0x132)) + (func $const-i64 (type $out-i64) (i64.const 0x164)) + (func $const-f32 (type $out-f32) (f32.const 0xf32)) + (func $const-f64 (type $out-f64) (f64.const 0xf64)) + + (func $id-i32 (type $over-i32) (local.get 0)) + (func $id-i64 (type $over-i64) (local.get 0)) + (func $id-f32 (type $over-f32) (local.get 0)) + (func $id-f64 (type $over-f64) (local.get 0)) + + (func $i32-i64 (type $i32-i64) (local.get 1)) + (func $i64-f64 (type $i64-f64) (local.get 1)) + (func $f32-i32 (type $f32-i32) (local.get 1)) + (func $f64-f32 (type $f64-f32) (local.get 1)) + + (func $over-i32-duplicate (type $over-i32-duplicate) (local.get 0)) + (func $over-i64-duplicate (type $over-i64-duplicate) (local.get 0)) + (func $over-f32-duplicate (type $over-f32-duplicate) (local.get 0)) + (func $over-f64-duplicate (type $over-f64-duplicate) (local.get 0)) + + (table funcref + (elem + $const-i32 $const-i64 $const-f32 $const-f64 ;; 0..3 + $id-i32 $id-i64 $id-f32 $id-f64 ;; 4..7 + $f32-i32 $i32-i64 $f64-f32 $i64-f64 ;; 8..11 + $fac-i64 $fib-i64 $even $odd ;; 12..15 + $over-i32-duplicate $over-i64-duplicate ;; 16..17 + $over-f32-duplicate $over-f64-duplicate ;; 18..19 + $fac-i32 $fib-i32 ;; 20..21 + ) + ) + + ;; Typing + + (func (export "type-i32") (result i32) + (call_indirect (type $out-i32) (i32.const 0)) + ) + (func (export "type-i64") (result i64) + (call_indirect (type $out-i64) (i32.const 1)) + ) + (func (export "type-f32") (result f32) + (call_indirect (type $out-f32) (i32.const 2)) + ) + (func (export "type-f64") (result f64) + (call_indirect (type $out-f64) (i32.const 3)) + ) + + (func (export "type-index") (result i64) + (call_indirect (type $over-i64) (i64.const 100) (i32.const 5)) + ) + + (func (export "type-first-i32") (result i32) + (call_indirect (type $over-i32) (i32.const 32) (i32.const 4)) + ) + (func (export "type-first-i64") (result i64) + (call_indirect (type $over-i64) (i64.const 64) (i32.const 5)) + ) + (func (export "type-first-f32") (result f32) + (call_indirect (type $over-f32) (f32.const 1.32) (i32.const 6)) + ) + (func (export "type-first-f64") (result f64) + (call_indirect (type $over-f64) (f64.const 1.64) (i32.const 7)) + ) + + (func (export "type-second-i32") (result i32) + (call_indirect (type $f32-i32) (f32.const 32.1) (i32.const 32) (i32.const 8)) + ) + (func (export "type-second-i64") (result i64) + (call_indirect (type $i32-i64) (i32.const 32) (i64.const 64) (i32.const 9)) + ) + (func (export "type-second-f32") (result f32) + (call_indirect (type $f64-f32) (f64.const 64) (f32.const 32) (i32.const 10)) + ) + (func (export "type-second-f64") (result f64) + (call_indirect (type $i64-f64) (i64.const 64) (f64.const 64.1) (i32.const 11)) + ) + + ;; Dispatch + + (func (export "dispatch") (param i32 i64) (result i64) + (call_indirect (type $over-i64) (local.get 1) (local.get 0)) + ) + + (func (export "dispatch-structural-i64") (param i32) (result i64) + (call_indirect (type $over-i64-duplicate) (i64.const 9) (local.get 0)) + ) + (func (export "dispatch-structural-i32") (param i32) (result i32) + (call_indirect (type $over-i32-duplicate) (i32.const 9) (local.get 0)) + ) + (func (export "dispatch-structural-f32") (param i32) (result f32) + (call_indirect (type $over-f32-duplicate) (f32.const 9.0) (local.get 0)) + ) + (func (export "dispatch-structural-f64") (param i32) (result f64) + (call_indirect (type $over-f64-duplicate) (f64.const 9.0) (local.get 0)) + ) + + ;; Recursion + + (func $fac-i64 (export "fac-i64") (type $over-i64) + (if (result i64) (i64.eqz (local.get 0)) + (then (i64.const 1)) + (else + (i64.mul + (local.get 0) + (call_indirect (type $over-i64) + (i64.sub (local.get 0) (i64.const 1)) + (i32.const 12) + ) + ) + ) + ) + ) + + (func $fib-i64 (export "fib-i64") (type $over-i64) + (if (result i64) (i64.le_u (local.get 0) (i64.const 1)) + (then (i64.const 1)) + (else + (i64.add + (call_indirect (type $over-i64) + (i64.sub (local.get 0) (i64.const 2)) + (i32.const 13) + ) + (call_indirect (type $over-i64) + (i64.sub (local.get 0) (i64.const 1)) + (i32.const 13) + ) + ) + ) + ) + ) + + (func $fac-i32 (export "fac-i32") (type $over-i32) + (if (result i32) (i32.eqz (local.get 0)) + (then (i32.const 1)) + (else + (i32.mul + (local.get 0) + (call_indirect (type $over-i32) + (i32.sub (local.get 0) (i32.const 1)) + (i32.const 20) + ) + ) + ) + ) + ) + + (func $fib-i32 (export "fib-i32") (type $over-i32) + (if (result i32) (i32.le_u (local.get 0) (i32.const 1)) + (then (i32.const 1)) + (else + (i32.add + (call_indirect (type $over-i32) + (i32.sub (local.get 0) (i32.const 2)) + (i32.const 21) + ) + (call_indirect (type $over-i32) + (i32.sub (local.get 0) (i32.const 1)) + (i32.const 21) + ) + ) + ) + ) + ) + + (func $even (export "even") (param i32) (result i32) + (if (result i32) (i32.eqz (local.get 0)) + (then (i32.const 44)) + (else + (call_indirect (type $over-i32) + (i32.sub (local.get 0) (i32.const 1)) + (i32.const 15) + ) + ) + ) + ) + (func $odd (export "odd") (param i32) (result i32) + (if (result i32) (i32.eqz (local.get 0)) + (then (i32.const 99)) + (else + (call_indirect (type $over-i32) + (i32.sub (local.get 0) (i32.const 1)) + (i32.const 14) + ) + ) + ) + ) + + ;; As parameter of control constructs and instructions + + (memory 1) + + (func (export "as-select-first") (result i32) + (select (call_indirect (type $out-i32) (i32.const 0)) (i32.const 2) (i32.const 3)) + ) + (func (export "as-select-mid") (result i32) + (select (i32.const 2) (call_indirect (type $out-i32) (i32.const 0)) (i32.const 3)) + ) + (func (export "as-select-last") (result i32) + (select (i32.const 2) (i32.const 3) (call_indirect (type $out-i32) (i32.const 0))) + ) + + (func (export "as-if-condition") (result i32) + (if (result i32) (call_indirect (type $out-i32) (i32.const 0)) (then (i32.const 1)) (else (i32.const 2))) + ) + + (func (export "as-br_if-first") (result i64) + (block (result i64) (br_if 0 (call_indirect (type $out-i64) (i32.const 1)) (i32.const 2))) + ) + (func (export "as-br_if-last") (result i32) + (block (result i32) (br_if 0 (i32.const 2) (call_indirect (type $out-i32) (i32.const 0)))) + ) + + (func (export "as-br_table-first") (result f32) + (block (result f32) (call_indirect (type $out-f32) (i32.const 2)) (i32.const 2) (br_table 0 0)) + ) + (func (export "as-br_table-last") (result i32) + (block (result i32) (i32.const 2) (call_indirect (type $out-i32) (i32.const 0)) (br_table 0 0)) + ) + + (func (export "as-return-value") (result i32) + (call_indirect (type $over-i32) (i32.const 1) (i32.const 4)) (return) + ) + (func (export "as-drop-operand") + (call_indirect (type $over-i64) (i64.const 1) (i32.const 5)) (drop) + ) + (func (export "as-br-value") (result f32) + (block (result f32) (br 0 (call_indirect (type $over-f32) (f32.const 1) (i32.const 6)))) + ) + (func (export "as-local.set-value") (result f64) + (local f64) (local.set 0 (call_indirect (type $over-f64) (f64.const 1) (i32.const 7))) (local.get 0) + ) + (func (export "as-local.tee-value") (result f64) + (local f64) (local.tee 0 (call_indirect (type $over-f64) (f64.const 1) (i32.const 7))) + ) + (global $a (mut f64) (f64.const 10.0)) + (func (export "as-global.set-value") (result f64) + (global.set $a (call_indirect (type $over-f64) (f64.const 1.0) (i32.const 7))) + (global.get $a) + ) + + (func (export "as-binary-left") (result i32) + (block (result i32) + (i32.add + (call_indirect (type $over-i32) (i32.const 1) (i32.const 4)) + (i32.const 10) + ) + ) + ) + (func (export "as-binary-right") (result i32) + (block (result i32) + (i32.sub + (i32.const 10) + (call_indirect (type $over-i32) (i32.const 1) (i32.const 4)) + ) + ) + ) + + (func (export "as-test-operand") (result i32) + (block (result i32) + (i32.eqz + (call_indirect (type $over-i32) (i32.const 1) (i32.const 4)) + ) + ) + ) + + (func (export "as-compare-left") (result i32) + (block (result i32) + (i32.le_u + (call_indirect (type $over-i32) (i32.const 1) (i32.const 4)) + (i32.const 10) + ) + ) + ) + (func (export "as-compare-right") (result i32) + (block (result i32) + (i32.ne + (i32.const 10) + (call_indirect (type $over-i32) (i32.const 1) (i32.const 4)) + ) + ) + ) +) + +(assert_return (invoke "type-i32") (i32.const 0x132)) +(assert_return (invoke "type-i64") (i64.const 0x164)) +(assert_return (invoke "type-f32") (f32.const 0xf32)) +(assert_return (invoke "type-f64") (f64.const 0xf64)) + +(assert_return (invoke "type-index") (i64.const 100)) + +(assert_return (invoke "type-first-i32") (i32.const 32)) +(assert_return (invoke "type-first-i64") (i64.const 64)) +(assert_return (invoke "type-first-f32") (f32.const 1.32)) +(assert_return (invoke "type-first-f64") (f64.const 1.64)) + +(assert_return (invoke "type-second-i32") (i32.const 32)) +(assert_return (invoke "type-second-i64") (i64.const 64)) +(assert_return (invoke "type-second-f32") (f32.const 32)) +(assert_return (invoke "type-second-f64") (f64.const 64.1)) + +(assert_return (invoke "dispatch" (i32.const 5) (i64.const 2)) (i64.const 2)) +(assert_return (invoke "dispatch" (i32.const 5) (i64.const 5)) (i64.const 5)) +(assert_return (invoke "dispatch" (i32.const 12) (i64.const 5)) (i64.const 120)) +(assert_return (invoke "dispatch" (i32.const 13) (i64.const 5)) (i64.const 8)) +(assert_return (invoke "dispatch" (i32.const 17) (i64.const 2)) (i64.const 2)) +(assert_trap (invoke "dispatch" (i32.const 0) (i64.const 2)) "indirect call type mismatch") +(assert_trap (invoke "dispatch" (i32.const 15) (i64.const 2)) "indirect call type mismatch") +(assert_trap (invoke "dispatch" (i32.const 32) (i64.const 2)) "undefined element") +(assert_trap (invoke "dispatch" (i32.const -1) (i64.const 2)) "undefined element") +(assert_trap (invoke "dispatch" (i32.const 1213432423) (i64.const 2)) "undefined element") + +(assert_return (invoke "dispatch-structural-i64" (i32.const 5)) (i64.const 9)) +(assert_return (invoke "dispatch-structural-i64" (i32.const 12)) (i64.const 362880)) +(assert_return (invoke "dispatch-structural-i64" (i32.const 13)) (i64.const 55)) +(assert_return (invoke "dispatch-structural-i64" (i32.const 17)) (i64.const 9)) +(assert_trap (invoke "dispatch-structural-i64" (i32.const 11)) "indirect call type mismatch") +(assert_trap (invoke "dispatch-structural-i64" (i32.const 19)) "indirect call type mismatch") + +(assert_return (invoke "dispatch-structural-i32" (i32.const 4)) (i32.const 9)) +(assert_return (invoke "dispatch-structural-i32" (i32.const 20)) (i32.const 362880)) +(assert_return (invoke "dispatch-structural-i32" (i32.const 21)) (i32.const 55)) +(assert_return (invoke "dispatch-structural-i32" (i32.const 16)) (i32.const 9)) +(assert_trap (invoke "dispatch-structural-i32" (i32.const 8)) "indirect call type mismatch") +(assert_trap (invoke "dispatch-structural-i32" (i32.const 18)) "indirect call type mismatch") + +(assert_return (invoke "dispatch-structural-f32" (i32.const 6)) (f32.const 9.0)) +(assert_return (invoke "dispatch-structural-f32" (i32.const 18)) (f32.const 9.0)) +(assert_trap (invoke "dispatch-structural-f32" (i32.const 8)) "indirect call type mismatch") +(assert_trap (invoke "dispatch-structural-f32" (i32.const 19)) "indirect call type mismatch") + +(assert_return (invoke "dispatch-structural-f64" (i32.const 7)) (f64.const 9.0)) +(assert_return (invoke "dispatch-structural-f64" (i32.const 19)) (f64.const 9.0)) +(assert_trap (invoke "dispatch-structural-f64" (i32.const 10)) "indirect call type mismatch") +(assert_trap (invoke "dispatch-structural-f64" (i32.const 18)) "indirect call type mismatch") + +(assert_return (invoke "fac-i64" (i64.const 0)) (i64.const 1)) +(assert_return (invoke "fac-i64" (i64.const 1)) (i64.const 1)) +(assert_return (invoke "fac-i64" (i64.const 5)) (i64.const 120)) +(assert_return (invoke "fac-i64" (i64.const 25)) (i64.const 7034535277573963776)) + +(assert_return (invoke "fac-i32" (i32.const 0)) (i32.const 1)) +(assert_return (invoke "fac-i32" (i32.const 1)) (i32.const 1)) +(assert_return (invoke "fac-i32" (i32.const 5)) (i32.const 120)) +(assert_return (invoke "fac-i32" (i32.const 10)) (i32.const 3628800)) + +(assert_return (invoke "fib-i64" (i64.const 0)) (i64.const 1)) +(assert_return (invoke "fib-i64" (i64.const 1)) (i64.const 1)) +(assert_return (invoke "fib-i64" (i64.const 2)) (i64.const 2)) +(assert_return (invoke "fib-i64" (i64.const 5)) (i64.const 8)) +(assert_return (invoke "fib-i64" (i64.const 20)) (i64.const 10946)) + +(assert_return (invoke "fib-i32" (i32.const 0)) (i32.const 1)) +(assert_return (invoke "fib-i32" (i32.const 1)) (i32.const 1)) +(assert_return (invoke "fib-i32" (i32.const 2)) (i32.const 2)) +(assert_return (invoke "fib-i32" (i32.const 5)) (i32.const 8)) +(assert_return (invoke "fib-i32" (i32.const 20)) (i32.const 10946)) + +(assert_return (invoke "even" (i32.const 0)) (i32.const 44)) +(assert_return (invoke "even" (i32.const 1)) (i32.const 99)) +(assert_return (invoke "even" (i32.const 100)) (i32.const 44)) +(assert_return (invoke "even" (i32.const 77)) (i32.const 99)) +(assert_return (invoke "odd" (i32.const 0)) (i32.const 99)) +(assert_return (invoke "odd" (i32.const 1)) (i32.const 44)) +(assert_return (invoke "odd" (i32.const 200)) (i32.const 99)) +(assert_return (invoke "odd" (i32.const 77)) (i32.const 44)) + +(assert_return (invoke "as-select-first") (i32.const 0x132)) +(assert_return (invoke "as-select-mid") (i32.const 2)) +(assert_return (invoke "as-select-last") (i32.const 2)) + +(assert_return (invoke "as-if-condition") (i32.const 1)) + +(assert_return (invoke "as-br_if-first") (i64.const 0x164)) +(assert_return (invoke "as-br_if-last") (i32.const 2)) + +(assert_return (invoke "as-br_table-first") (f32.const 0xf32)) +(assert_return (invoke "as-br_table-last") (i32.const 2)) + +(assert_return (invoke "as-return-value") (i32.const 1)) +(assert_return (invoke "as-drop-operand")) +(assert_return (invoke "as-br-value") (f32.const 1)) +(assert_return (invoke "as-local.set-value") (f64.const 1)) +(assert_return (invoke "as-local.tee-value") (f64.const 1)) +(assert_return (invoke "as-global.set-value") (f64.const 1.0)) + +(assert_return (invoke "as-binary-left") (i32.const 11)) +(assert_return (invoke "as-binary-right") (i32.const 9)) +(assert_return (invoke "as-test-operand") (i32.const 0)) +(assert_return (invoke "as-compare-left") (i32.const 1)) +(assert_return (invoke "as-compare-right") (i32.const 1)) + diff --git a/winch/codegen/src/abi/mod.rs b/winch/codegen/src/abi/mod.rs index 5fff6798240a..5362a81cfc78 100644 --- a/winch/codegen/src/abi/mod.rs +++ b/winch/codegen/src/abi/mod.rs @@ -72,6 +72,10 @@ pub(crate) trait ABI { /// function type. fn sig(wasm_sig: &WasmFuncType, call_conv: &CallingConvention) -> ABISig; + /// Construct an ABI signature from WasmType params and returns. + fn sig_from(params: &[WasmType], returns: &[WasmType], call_conv: &CallingConvention) + -> ABISig; + /// Construct the ABI-specific result from a slice of /// [`wasmtime_environ::WasmtType`]. fn result(returns: &[WasmType], call_conv: &CallingConvention) -> ABIResult; @@ -107,7 +111,7 @@ pub(crate) trait ABI { /// ABI-specific representation of a function argument. #[derive(Debug)] -pub(crate) enum ABIArg { +pub enum ABIArg { /// A register argument. Reg { /// Type of the argument. @@ -212,11 +216,20 @@ impl ABIResult { 1 } } + + /// Returns an iterator over the result registers. + /// + /// NOTE: Currently only one or zero registers + /// will be returned until suport for multi-value is introduced. + pub fn regs(&self) -> impl Iterator + '_ { + std::iter::once(self.result_reg()).filter_map(|v| v) + } } pub(crate) type ABIParams = SmallVec<[ABIArg; 6]>; /// An ABI-specific representation of a function signature. +#[derive(Debug)] pub(crate) struct ABISig { /// Function parameters. pub params: ABIParams, @@ -235,6 +248,18 @@ impl ABISig { stack_bytes, } } + + /// Returns an iterator over all the registers used as params. + pub fn param_regs(&self) -> impl Iterator + '_ { + self.params.iter().filter_map(|r| r.get_reg()) + } + + /// Returns an iterator over all the registers used in the signature. + pub fn regs(&self) -> impl Iterator + '_ { + let params_iter = self.param_regs(); + let result_iter = self.result.regs(); + params_iter.chain(result_iter) + } } /// Returns the size in bytes of a given WebAssembly type. diff --git a/winch/codegen/src/codegen/builtin.rs b/winch/codegen/src/codegen/builtin.rs new file mode 100644 index 000000000000..9a5fe34a34d3 --- /dev/null +++ b/winch/codegen/src/codegen/builtin.rs @@ -0,0 +1,100 @@ +//! Builtin function handling. + +use crate::{ + abi::{ABISig, ABI}, + codegen::env::ptr_type_from_ptr_size, + CallingConvention, +}; +use wasmtime_environ::{BuiltinFunctionIndex, PtrSize, WasmType}; + +/// Metadata about a builtin function. +pub(crate) struct BuiltinFunction { + /// The ABI specific signature of the function. + pub sig: ABISig, + /// The offset of the builtin function + pub offset: u32, + /// The builtin function base, relative to the VMContext. + pub base: u32, +} + +macro_rules! declare_function_sig { + ( + $( + $( #[$attr:meta] )* + $name:ident( $( $pname:ident: $param:ident ),* ) $( -> $result:ident )?; + )* + ) => { + /// Provides the ABI signatures for each builtin function + /// signature. + pub struct BuiltinFunctions { + /// The target calling convention. + call_conv: CallingConvention, + /// The target pointer size. + ptr_size: u8, + /// The target pointer type, as a WebAssembly type. + ptr_type: WasmType, + /// The builtin functions base relative to the VMContext. + base: u32, + $( + $name: Option, + )* + } + + // Until all the builtin functions are used. + #[allow(dead_code)] + impl BuiltinFunctions { + pub fn new(ptr: impl PtrSize, call_conv: CallingConvention, base: u32) -> Self { + let size = ptr.size(); + Self { + ptr_size: size, + call_conv, + base, + ptr_type: ptr_type_from_ptr_size(size), + $( + $name: None, + )* + } + } + + fn pointer(&self) -> WasmType { + self.ptr_type + } + + fn vmctx(&self) -> WasmType { + self.pointer() + } + + fn i32(&self) -> WasmType { + WasmType::I32 + } + + fn i64(&self) -> WasmType { + WasmType::I64 + } + + fn reference(&self) -> WasmType { + self.pointer() + } + + $( + pub(crate) fn $name(&mut self) -> &BuiltinFunction { + if self.$name.is_none() { + let params = vec![ $(self.$param() ),* ]; + let result = vec![ $(self.$result() )?]; + let sig = A::sig_from(¶ms, &result, &self.call_conv); + let index = BuiltinFunctionIndex::$name(); + self.$name = Some(BuiltinFunction { + sig, + offset: index.index() * (self.ptr_size as u32), + base: self.base, + }); + } + + self.$name.as_ref().unwrap() + } + )* + } + } +} + +wasmtime_environ::foreach_builtin_function!(declare_function_sig); diff --git a/winch/codegen/src/codegen/call.rs b/winch/codegen/src/codegen/call.rs index aab3e688cf8c..6b311e94848b 100644 --- a/winch/codegen/src/codegen/call.rs +++ b/winch/codegen/src/codegen/call.rs @@ -2,13 +2,14 @@ //! calling convention, see [ABI]. use crate::{ abi::{ABIArg, ABISig, ABI}, - codegen::CodeGenContext, + codegen::{BuiltinFunction, CodeGenContext}, masm::{CalleeKind, MacroAssembler, OperandSize}, reg::Reg, }; use wasmtime_environ::FuncIndex; /// All the information needed to emit a function call. +#[derive(Copy, Clone)] pub(crate) struct FnCall<'a> { /// The stack space consumed by the function call; that is, /// the sum of: @@ -57,37 +58,44 @@ pub(crate) struct FnCall<'a> { /// │ │ /// └──────────────────────────────────────────────────┘ ------> Stack pointer when emitting the call /// - call_stack_space: u32, + call_stack_space: Option, /// The total stack space needed for the callee arguments on the /// stack, including any adjustments to the function's frame and /// aligned to to the required ABI alignment. arg_stack_space: u32, /// The ABI-specific signature of the callee. - abi_sig: &'a ABISig, + pub abi_sig: &'a ABISig, } impl<'a> FnCall<'a> { - /// Allocate and setup a new function call. - /// - /// The setup process, will first save all the live registers in - /// the value stack, tracking down those spilled for the function - /// arguments(see comment below for more details) it will also - /// track all the memory entries consumed by the function - /// call. Then, it will calculate any adjustments needed to ensure - /// the alignment of the caller's frame. It's important to note - /// that the order of operations in the setup is important, as we - /// want to calculate any adjustments to the caller's frame, after - /// having saved any live registers, so that we can account for - /// any pushes generated by register spilling. - pub fn new( - callee_sig: &'a ABISig, + /// Creates a new [`FnCall`] from the callee's [`ABISig`]. + pub fn new(callee_sig: &'a ABISig) -> Self { + Self { + abi_sig: &callee_sig, + arg_stack_space: callee_sig.stack_bytes, + call_stack_space: None, + } + } + + /// Saves any live registers and records the stack space that will be + /// consumed by the function call. The stack space consumed by the call must + /// be known before emitting the call via any of the emission variants: + /// [`FnCall::direct`], [`FnCall::indirect`] or [`FnCall::addr`], which + /// means that the call stack space must be calculated either by invoking + /// [`FnCall::save_live_registers`] or + /// [`FnCall::calculate_call_stack_space`] before invoking any of + /// the emission variants. + pub fn save_live_registers( + &mut self, context: &mut CodeGenContext, masm: &mut M, - ) -> Self { + ) -> &mut Self { + // Invariant: ensure that `call_stack_space` is only set once: either by + // [`FnCall::save_live_registers`] or + // [`FnCall::calculate_call_stack_space`] + debug_assert!(self.call_stack_space.is_none()); + let callee_params = &self.abi_sig.params; let stack = &context.stack; - let arg_stack_space = callee_sig.stack_bytes; - let callee_params = &callee_sig.params; - let call_stack_space = match callee_params.len() { 0 => { let _ = context.save_live_registers_and_calculate_sizeof(masm, ..); @@ -126,45 +134,138 @@ impl<'a> FnCall<'a> { } }; - Self { - abi_sig: &callee_sig, - arg_stack_space, - call_stack_space, - } + self.call_stack_space = Some(call_stack_space); + self + } + + /// Records the stack space that will be needeed by the function call by + /// scanning the value stack and returning the size of the all the memory + /// entries present in callee's argument length range. The stack space + /// consumed by the call must be known before emitting the call via any of + /// the emission variants: [`FnCall::direct`], [`FnCall::indirect`] or + /// [`FnCall::addr`], which means that the call stack space must be + /// calculated either by invoking [`FnCall::save_live_registers`] or + /// [`FnCall::calculate_call_stack_space`] before invoking any of + /// the emission variants. + /// This function is particularly useful when there's no need to save any + /// live registers before emitting the function call. This could happen when + /// emitting calls to libcalls: [`FnCall::with_lib`] will eagerly save all + /// the live registers when invoked and will also ensure that any registers + /// allocated after are non argument registers, in which case if any of + /// those registers need to go on the value stack to be used as function + /// arguments, they don't need to be saved. + pub fn calculate_call_stack_space(&mut self, context: &mut CodeGenContext) -> &mut Self { + // Invariant: ensure that `call_stack_space` is only set once: either by + // [`FnCall::save_live_registers`] or + // [`FnCall::calculate_call_stack_space`] + debug_assert!(self.call_stack_space.is_none()); + let params_len = self.abi_sig.params.len(); + assert!(context.stack.len() >= params_len); + + let stack_len = context.stack.len(); + let call_stack_space = if params_len == 0 { + 0 + } else { + context.stack.sizeof((stack_len - params_len)..) + }; + self.call_stack_space = Some(call_stack_space); + self } /// Emit a direct function call, to a locally defined function. pub fn direct( - &self, + self, masm: &mut M, context: &mut CodeGenContext, callee: FuncIndex, ) { + // Invariant: `call_stack_space` must be known. + debug_assert!(self.call_stack_space.is_some()); let reserved_stack = masm.call(self.arg_stack_space, |masm| { self.assign_args(context, masm, ::scratch_reg()); - CalleeKind::Direct(callee.as_u32()) + CalleeKind::direct(callee.as_u32()) }); self.post_call::(masm, context, reserved_stack); } - /// Emit an indirect function call, using a raw address. - pub fn indirect( - &self, + /// Emit an indirect function call, using a register. + pub fn reg(self, masm: &mut M, context: &mut CodeGenContext, reg: Reg) { + // Invariant: `call_stack_space` must be known. + debug_assert!(self.call_stack_space.is_some()); + let reserved_stack = masm.call(self.arg_stack_space, |masm| { + let scratch = ::scratch_reg(); + self.assign_args(context, masm, scratch); + CalleeKind::indirect(reg) + }); + context.free_reg(reg); + self.post_call::(masm, context, reserved_stack); + } + + /// Emit an indirect function call, using a an address. + /// This function will load the provided address into a unallocatable + /// scratch register. + pub fn addr( + self, masm: &mut M, context: &mut CodeGenContext, - addr: M::Address, + callee: M::Address, ) { + // Invariant: `call_stack_space` must be known. + debug_assert!(self.call_stack_space.is_some()); let reserved_stack = masm.call(self.arg_stack_space, |masm| { let scratch = ::scratch_reg(); self.assign_args(context, masm, scratch); - masm.load(addr, scratch, OperandSize::S64); - CalleeKind::Indirect(scratch) + masm.load(callee, scratch, OperandSize::S64); + CalleeKind::indirect(scratch) }); + self.post_call::(masm, context, reserved_stack); } + /// Prepares the compiler to call a built-in function (libcall). + /// This fuction, saves all the live registers and loads the callee + /// address into a non-argument register which is then passed to the + /// caller through the provided callback. + /// + /// It is the caller's responsibility to finalize the function call + /// by calling `FnCall::reg` once all the information is known. + pub fn with_lib( + &mut self, + masm: &mut M, + context: &mut CodeGenContext, + func: &BuiltinFunction, + mut f: F, + ) where + F: FnMut(&mut CodeGenContext, &mut M, &mut Self, Reg), + { + // When dealing with libcalls, we don't have all the information + // upfront (all necessary arguments in the stack) in order to optimize + // saving the live registers, so we save all the values available in + // the value stack. + context.spill(masm); + let vmctx = ::vmctx_reg(); + let scratch = ::scratch_reg(); + + let builtins_base = masm.address_at_reg(vmctx, func.base); + masm.load(builtins_base, scratch, OperandSize::S64); + let builtin_func_addr = masm.address_at_reg(scratch, func.offset); + context.without::<(), M, _>( + // Do not free the result registers if any as the function call will + // push them onto the stack as a result of the call. + self.abi_sig.regs(), + self.abi_sig.param_regs(), + masm, + |cx, masm| { + let callee = cx.any_gpr(masm); + masm.load_ptr(builtin_func_addr, callee); + f(cx, masm, self, callee); + cx.free_reg(callee); + }, + ); + } + fn post_call(&self, masm: &mut M, context: &mut CodeGenContext, size: u32) { - masm.free_stack(self.call_stack_space + size); + masm.free_stack(self.call_stack_space.unwrap() + size); // Only account for registers given that any memory entries // consumed by the call (assigned to a register or to a stack // slot) were freed by the previous call to diff --git a/winch/codegen/src/codegen/context.rs b/winch/codegen/src/codegen/context.rs index ad4bc40693ee..1f7658611911 100644 --- a/winch/codegen/src/codegen/context.rs +++ b/winch/codegen/src/codegen/context.rs @@ -2,7 +2,7 @@ use wasmtime_environ::WasmType; use super::ControlStackFrame; use crate::{ - abi::ABIResult, + abi::{ABIResult, ABI}, frame::Frame, isa::reg::RegClass, masm::{MacroAssembler, OperandSize, RegImm}, @@ -81,22 +81,32 @@ impl<'a> CodeGenContext<'a> { self.reg_for_class(RegClass::Int, masm) } - /// Executes the provided function, guaranteeing that the - /// specified register, if any, remains unallocatable throughout - /// the function's execution. - pub fn without(&mut self, reg: Option, masm: &mut M, mut f: F) -> T + /// Executes the provided function, guaranteeing that the specified set of + /// registers, if any, remain unallocatable throughout the function's + /// execution. Only the registers in the `free` iterator will be freed. The + /// caller must guarantee that in case the iterators are different, the free + /// iterator must be a subset of the alloc iterator. + pub fn without( + &mut self, + alloc: impl Iterator, + free: impl Iterator, + masm: &mut M, + mut f: F, + ) -> T where M: MacroAssembler, F: FnMut(&mut Self, &mut M) -> T, { - if let Some(reg) = reg { - self.reg(reg, masm); + debug_assert!(free.size_hint().0 <= alloc.size_hint().0); + + for r in alloc { + self.reg(r, masm); } let result = f(self, masm); - if let Some(reg) = reg { - self.free_reg(reg); + for r in free { + self.free_reg(r); } result @@ -403,8 +413,9 @@ impl<'a> CodeGenContext<'a> { Val::Local(local) => { let slot = frame.get_local(local.index).expect("valid local at slot"); let addr = masm.local_address(&slot); - masm.load(addr, regalloc.scratch, slot.ty.into()); - let stack_slot = masm.push(regalloc.scratch, slot.ty.into()); + let scratch = ::scratch_reg(); + masm.load(addr, scratch, slot.ty.into()); + let stack_slot = masm.push(scratch, slot.ty.into()); *v = Val::mem(slot.ty, stack_slot); } _ => {} diff --git a/winch/codegen/src/codegen/env.rs b/winch/codegen/src/codegen/env.rs index d3676ca65399..8e2d36c05f3f 100644 --- a/winch/codegen/src/codegen/env.rs +++ b/winch/codegen/src/codegen/env.rs @@ -1,31 +1,108 @@ +use crate::{codegen::BuiltinFunctions, CallingConvention}; use smallvec::{smallvec, SmallVec}; use wasmparser::BlockType; use wasmtime_environ::{ - FuncIndex, GlobalIndex, ModuleTranslation, PtrSize, TypeConvert, VMOffsets, WasmFuncType, - WasmType, + FuncIndex, GlobalIndex, ModuleTranslation, ModuleTypes, PtrSize, TableIndex, TypeConvert, + TypeIndex, VMOffsets, WasmFuncType, WasmType, }; +/// Table metadata. +pub struct TableData { + /// The offset to the base of the table. + pub offset: u32, + /// The offset to the current elements field. + pub current_elems_offset: u32, + /// If the table is imported, return the base + /// offset of the `from` field in `VMTableImport`. + pub base: Option, + /// The size of the table elements, in bytes. + pub element_size: u8, +} + +/// A function callee. +/// It categorizes how the callee should be treated +/// when performing the call. +pub enum Callee { + /// Locally defined function. + Local(CalleeInfo), + /// Imported function. + Import(CalleeInfo), + /// Function reference. + FuncRef(WasmFuncType), +} + +/// Metadata about a function callee. Used by the code generation to +/// emit function calls to local or imported functions. +pub struct CalleeInfo { + /// The function type. + pub ty: WasmFuncType, + /// The callee index in the WebAssembly function index space. + pub index: FuncIndex, +} + /// The function environment. /// /// Contains all information about the module and runtime that is accessible to /// to a particular function during code generation. -pub struct FuncEnv<'a, P> { +pub struct FuncEnv<'a, P: PtrSize> { /// Offsets to the fields within the `VMContext` ptr. pub vmoffsets: VMOffsets

, /// Metadata about the translation process of a WebAssembly module. pub translation: &'a ModuleTranslation<'a>, + /// Metadata about the builtin functions. + pub builtins: BuiltinFunctions, + /// The module's function types. + pub types: &'a ModuleTypes, +} + +pub fn ptr_type_from_ptr_size(size: u8) -> WasmType { + (size == 8) + .then(|| WasmType::I64) + .unwrap_or_else(|| unimplemented!("Support for non-64-bit architectures")) } impl<'a, P: PtrSize> FuncEnv<'a, P> { /// Create a new function environment. - pub fn new(ptr: P, translation: &'a ModuleTranslation) -> Self { + pub fn new( + ptr: P, + translation: &'a ModuleTranslation, + types: &'a ModuleTypes, + call_conv: CallingConvention, + ) -> Self { let vmoffsets = VMOffsets::new(ptr, &translation.module); + let size = vmoffsets.ptr.size(); + let builtins_base = vmoffsets.vmctx_builtin_functions(); Self { vmoffsets, translation, + builtins: BuiltinFunctions::new(size, call_conv, builtins_base), + types, } } + /// Returns a slice of types representing the caller and callee VMContext types. + pub(crate) fn vmctx_args_type(&self) -> [WasmType; 2] { + let ty = self.ptr_type(); + [ty, ty] + } + + /// Derive the [`WasmType`] from the pointer size. + pub(crate) fn ptr_type(&self) -> WasmType { + ptr_type_from_ptr_size(self.ptr_size()) + } + + /// Returns the pointer size for the target ISA. + fn ptr_size(&self) -> u8 { + self.vmoffsets.ptr.size() + } + + /// Resolves a [`Callee::FuncRef`] from a type index. + pub fn funcref(&self, idx: TypeIndex) -> Callee { + let sig_index = self.translation.module.types[idx].unwrap_function(); + let ty = self.types[sig_index].clone(); + Callee::FuncRef(ty) + } + /// Resolves a function [`Callee`] from an index. pub fn callee_from_index(&self, idx: FuncIndex) -> Callee { let types = &self.translation.get_types(); @@ -33,10 +110,12 @@ impl<'a, P: PtrSize> FuncEnv<'a, P> { let ty = self.translation.module.convert_func_type(ty); let import = self.translation.module.is_imported_function(idx); - Callee { - ty, - import, - index: idx, + let info = CalleeInfo { ty, index: idx }; + + if import { + Callee::Import(info) + } else { + Callee::Local(info) } } @@ -60,15 +139,29 @@ impl<'a, P: PtrSize> FuncEnv<'a, P> { (ty, offset) } -} -/// Metadata about a function callee. Use by the code generation -/// to emit function calls. -pub struct Callee { - /// The function type. - pub ty: WasmFuncType, - /// A flag to determine if the callee is imported. - pub import: bool, - /// The callee index in the WebAssembly function index space. - pub index: FuncIndex, + /// Returns the table information for the given table index. + pub fn resolve_table_data(&self, index: TableIndex) -> TableData { + let (from_offset, base_offset, current_elems_offset) = + match self.translation.module.defined_table_index(index) { + Some(defined) => ( + None, + self.vmoffsets.vmctx_vmtable_definition_base(defined), + self.vmoffsets + .vmctx_vmtable_definition_current_elements(defined), + ), + None => ( + Some(self.vmoffsets.vmctx_vmtable_import_from(index)), + self.vmoffsets.vmtable_definition_base().into(), + self.vmoffsets.vmtable_definition_current_elements().into(), + ), + }; + + TableData { + base: from_offset, + offset: base_offset, + current_elems_offset, + element_size: self.vmoffsets.ptr.size(), + } + } } diff --git a/winch/codegen/src/codegen/mod.rs b/winch/codegen/src/codegen/mod.rs index 1c3813901c6b..6771e15ea668 100644 --- a/winch/codegen/src/codegen/mod.rs +++ b/winch/codegen/src/codegen/mod.rs @@ -1,22 +1,25 @@ use crate::{ abi::{ABISig, ABI}, - masm::{MacroAssembler, OperandSize}, + isa::reg::Reg, + masm::{CmpKind, MacroAssembler, OperandSize, TrapCode}, stack::{TypedReg, Val}, CallingConvention, }; use anyhow::Result; -use call::FnCall; use smallvec::SmallVec; use wasmparser::{BinaryReader, FuncValidator, Operator, ValidatorResources, VisitOperator}; -use wasmtime_environ::{FuncIndex, WasmFuncType, WasmType}; +use wasmtime_environ::{PtrSize, TypeIndex, WasmFuncType, WasmType}; mod context; pub(crate) use context::*; mod env; pub use env::*; -pub mod call; +mod call; +pub(crate) use call::*; mod control; pub(crate) use control::*; +mod builtin; +pub(crate) use builtin::*; /// The code generation abstraction. pub(crate) struct CodeGen<'a, M> @@ -173,8 +176,7 @@ where ) -> Result<()> { self.spill_register_arguments(); let defined_locals_range = &self.context.frame.defined_locals_range; - self.masm - .zero_mem_range(defined_locals_range.as_range(), &mut self.context.regalloc); + self.masm.zero_mem_range(defined_locals_range.as_range()); // Save the vmctx pointer to its local slot in case we need to reload it // at any point. @@ -247,51 +249,131 @@ where } } - /// Emit a direct function call. - pub fn emit_call(&mut self, index: FuncIndex) { - let callee = self.env.callee_from_index(index); - let (sig, callee_addr): (ABISig, Option<::Address>) = if callee.import - { - let mut params = vec![WasmType::I64, WasmType::I64]; - params.extend_from_slice(callee.ty.params()); - let sig = WasmFuncType::new(params.into(), callee.ty.returns().into()); - - let caller_vmctx = ::vmctx_reg(); - let callee_vmctx = self.context.any_gpr(self.masm); - let callee_vmctx_offset = self.env.vmoffsets.vmctx_vmfunction_import_vmctx(index); - let callee_vmctx_addr = self.masm.address_at_reg(caller_vmctx, callee_vmctx_offset); - // FIXME Remove harcoded operand size, this will be needed - // once 32-bit architectures are supported. - self.masm - .load(callee_vmctx_addr, callee_vmctx, OperandSize::S64); - - let callee_body_offset = self.env.vmoffsets.vmctx_vmfunction_import_wasm_call(index); - let callee_addr = self.masm.address_at_reg(caller_vmctx, callee_body_offset); - - // Put the callee / caller vmctx at the start of the - // range of the stack so that they are used as first - // and second arguments. - let stack = &mut self.context.stack; - let location = stack.len() - (sig.params().len() - 2); - stack.insert(location as usize, TypedReg::i64(caller_vmctx).into()); - stack.insert(location as usize, TypedReg::i64(callee_vmctx).into()); - ( - ::sig(&sig, &CallingConvention::Default), - Some(callee_addr), - ) - } else { - ( - ::sig(&callee.ty, &CallingConvention::Default), - None, - ) + /// Emit a function call to: + /// * A locally defined function. + /// * A function import. + /// * A funcref. + pub fn emit_call(&mut self, callee: Callee) { + match callee { + Callee::Import(callee) => { + let mut params = Vec::with_capacity(callee.ty.params().len() + 2); + params.extend_from_slice(&self.env.vmctx_args_type()); + params.extend_from_slice(callee.ty.params()); + let sig = WasmFuncType::new(params.into(), callee.ty.returns().into()); + + let caller_vmctx = ::vmctx_reg(); + let callee_vmctx = self.context.any_gpr(self.masm); + let callee_vmctx_offset = self + .env + .vmoffsets + .vmctx_vmfunction_import_vmctx(callee.index); + let callee_vmctx_addr = self.masm.address_at_vmctx(callee_vmctx_offset); + // FIXME Remove harcoded operand size, this will be needed + // once 32-bit architectures are supported. + self.masm + .load(callee_vmctx_addr, callee_vmctx, OperandSize::S64); + + let callee_body_offset = self + .env + .vmoffsets + .vmctx_vmfunction_import_wasm_call(callee.index); + let callee_addr = self.masm.address_at_vmctx(callee_body_offset); + + // Put the callee / caller vmctx at the start of the + // range of the stack so that they are used as first + // and second arguments. + let stack = &mut self.context.stack; + let location = stack.len() - (sig.params().len() - 2); + stack.insert(location as usize, TypedReg::i64(caller_vmctx).into()); + stack.insert(location as usize, TypedReg::i64(callee_vmctx).into()); + + let abi_sig = ::sig(&sig, &CallingConvention::Default); + FnCall::new(&abi_sig) + .save_live_registers(&mut self.context, self.masm) + .addr(self.masm, &mut self.context, callee_addr); + } + + Callee::Local(callee) => { + let abi_sig = ::sig(&callee.ty, &CallingConvention::Default); + FnCall::new(&abi_sig) + .save_live_registers(&mut self.context, self.masm) + .direct(self.masm, &mut self.context, callee.index); + } + + Callee::FuncRef(ty) => { + // Get type for the caller and callee VMContext. + let ptr_type = self.env.ptr_type(); + let abi_sig = ::sig(&ty, &CallingConvention::Default); + // Pop the funcref pointer to a register and allocate a register to hold the + // address of the funcref. Since the callee is not addressed from a global non + // allocatable register (like the vmctx in the case of an import), we load the + // funcref to a register ensuring that it doesn't get assigned to a non-arg + // register. + let (funcref_ptr, funcref) = self.context.without::<_, M, _>( + abi_sig.param_regs(), + abi_sig.param_regs(), + self.masm, + |cx, masm| (cx.pop_to_reg(masm, None).into(), cx.any_gpr(masm)), + ); + self.masm.load( + self.masm.address_at_reg( + funcref_ptr, + self.env.vmoffsets.ptr.vm_func_ref_wasm_call().into(), + ), + funcref, + ptr_type.into(), + ); + self.context.free_reg(funcref_ptr); + + FnCall::new(&abi_sig) + .save_live_registers(&mut self.context, self.masm) + .reg(self.masm, &mut self.context, funcref); + } }; + } - let fncall = FnCall::new::(&sig, &mut self.context, self.masm); - if let Some(addr) = callee_addr { - fncall.indirect::(self.masm, &mut self.context, addr); - } else { - fncall.direct::(self.masm, &mut self.context, index); - } + /// Emits a a series of instructions that will type check a function reference call. + pub fn emit_typecheck_funcref(&mut self, funcref_ptr: Reg, type_index: TypeIndex) { + let ptr_size: OperandSize = self.env.ptr_type().into(); + let sig_index_bytes = self.env.vmoffsets.size_of_vmshared_signature_index(); + let sig_size = OperandSize::from_bytes(sig_index_bytes); + let sig_index = self.env.translation.module.types[type_index].unwrap_function(); + let sig_offset = sig_index + .as_u32() + .checked_mul(sig_index_bytes.into()) + .unwrap(); + let signatures_base_offset = self.env.vmoffsets.vmctx_signature_ids_array(); + let scratch = ::scratch_reg(); + let funcref_sig_offset = self.env.vmoffsets.ptr.vm_func_ref_type_index(); + + // Load the signatures address into the scratch register. + self.masm.load( + self.masm.address_at_vmctx(signatures_base_offset), + scratch, + ptr_size, + ); + + // Get the caller id. + let caller_id = self.context.any_gpr(self.masm); + self.masm.load( + self.masm.address_at_reg(scratch, sig_offset), + caller_id, + sig_size, + ); + + let callee_id = self.context.any_gpr(self.masm); + self.masm.load( + self.masm + .address_at_reg(funcref_ptr, funcref_sig_offset.into()), + callee_id, + sig_size, + ); + + // Typecheck. + self.masm.cmp(callee_id.into(), caller_id, OperandSize::S32); + self.masm.trapif(CmpKind::Ne, TrapCode::BadSignature); + self.context.free_reg(callee_id); + self.context.free_reg(caller_id); } /// Emit the usual function end instruction sequence. diff --git a/winch/codegen/src/isa/aarch64/abi.rs b/winch/codegen/src/isa/aarch64/abi.rs index 99d6a2560897..8a38a2918d18 100644 --- a/winch/codegen/src/isa/aarch64/abi.rs +++ b/winch/codegen/src/isa/aarch64/abi.rs @@ -65,22 +65,29 @@ impl ABI for Aarch64ABI { } fn sig(wasm_sig: &WasmFuncType, call_conv: &CallingConvention) -> ABISig { + Self::sig_from(wasm_sig.params(), wasm_sig.returns(), call_conv) + } + + fn sig_from( + params: &[WasmType], + returns: &[WasmType], + call_conv: &CallingConvention, + ) -> ABISig { assert!(call_conv.is_apple_aarch64() || call_conv.is_default()); - if wasm_sig.returns().len() > 1 { + if returns.len() > 1 { panic!("multi-value not supported"); } let mut stack_offset = 0; let mut index_env = RegIndexEnv::default(); - let params: SmallVec<[ABIArg; 6]> = wasm_sig - .params() + let params: SmallVec<[ABIArg; 6]> = params .iter() .map(|arg| Self::to_abi_arg(arg, &mut stack_offset, &mut index_env)) .collect(); - let result = Self::result(wasm_sig.returns(), call_conv); + let result = Self::result(returns, call_conv); ABISig::new(params, result, stack_offset) } @@ -101,7 +108,7 @@ impl ABI for Aarch64ABI { } fn scratch_reg() -> Reg { - todo!() + regs::scratch() } fn sp_reg() -> Reg { diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index 5953d4d231d1..880357d60587 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -1,11 +1,11 @@ use super::{abi::Aarch64ABI, address::Address, asm::Assembler, regs}; use crate::{ abi::{self, local::LocalSlot}, - codegen::CodeGenContext, + codegen::{CodeGenContext, TableData}, isa::reg::Reg, masm::{ CalleeKind, CmpKind, DivKind, Imm as I, MacroAssembler as Masm, OperandSize, RegImm, - RemKind, RoundingMode, ShiftKind, StackSlot, + RemKind, RoundingMode, ShiftKind, StackSlot, TrapCode, }, }; use cranelift_codegen::{settings, Final, MachBufferFinalized, MachLabel}; @@ -97,6 +97,16 @@ impl Masm for MacroAssembler { Address::offset(reg, offset as i64) } + fn table_elem_address( + &mut self, + _index: Reg, + _size: OperandSize, + _table_data: &TableData, + _context: &mut CodeGenContext, + ) -> Reg { + todo!() + } + fn address_from_sp(&self, _offset: u32) -> Self::Address { todo!() } @@ -105,6 +115,10 @@ impl Masm for MacroAssembler { todo!() } + fn address_at_vmctx(&self, _offset: u32) -> Self::Address { + todo!() + } + fn store(&mut self, src: RegImm, dst: Address, size: OperandSize) { let src = match src { RegImm::Imm(v) => { @@ -135,6 +149,10 @@ impl Masm for MacroAssembler { self.asm.ldr(src, dst, size); } + fn load_ptr(&mut self, _src: Self::Address, _dst: Reg) { + todo!() + } + fn pop(&mut self, _dst: Reg, _size: OperandSize) { todo!() } @@ -313,7 +331,7 @@ impl Masm for MacroAssembler { &mut self, _kind: CmpKind, _lhs: RegImm, - _rhs: RegImm, + _rhs: Reg, _taken: MachLabel, _size: OperandSize, ) { @@ -331,6 +349,10 @@ impl Masm for MacroAssembler { fn jmp_table(&mut self, _targets: &[MachLabel], _index: Reg, _tmp: Reg) { todo!() } + + fn trapif(&mut self, _cc: CmpKind, _code: TrapCode) { + todo!() + } } impl MacroAssembler { diff --git a/winch/codegen/src/isa/aarch64/mod.rs b/winch/codegen/src/isa/aarch64/mod.rs index 8d299eba0a93..70c61ab9ca31 100644 --- a/winch/codegen/src/isa/aarch64/mod.rs +++ b/winch/codegen/src/isa/aarch64/mod.rs @@ -1,4 +1,4 @@ -use self::regs::{scratch, ALL_GPR}; +use self::regs::{ALL_GPR, MAX_FPR, MAX_GPR, NON_ALLOCATABLE_GPR}; use crate::{ abi::ABI, codegen::{CodeGen, CodeGenContext, FuncEnv}, @@ -6,7 +6,7 @@ use crate::{ isa::{Builder, CallingConvention, TargetIsa}, masm::MacroAssembler, regalloc::RegAlloc, - regset::RegSet, + regset::RegBitSet, stack::Stack, TrampolineKind, }; @@ -17,7 +17,7 @@ use cranelift_codegen::{MachTextSectionBuilder, TextSectionBuilder}; use masm::MacroAssembler as Aarch64Masm; use target_lexicon::Triple; use wasmparser::{FuncValidator, FunctionBody, ValidatorResources}; -use wasmtime_environ::{ModuleTranslation, WasmFuncType}; +use wasmtime_environ::{ModuleTranslation, ModuleTypes, WasmFuncType}; mod abi; mod address; @@ -85,6 +85,7 @@ impl TargetIsa for Aarch64 { fn compile_function( &self, sig: &WasmFuncType, + types: &ModuleTypes, body: &FunctionBody, translation: &ModuleTranslation, validator: &mut FuncValidator, @@ -96,10 +97,21 @@ impl TargetIsa for Aarch64 { let defined_locals = DefinedLocals::new(translation, &mut body, validator)?; let frame = Frame::new::(&abi_sig, &defined_locals)?; + let gpr = RegBitSet::int( + ALL_GPR.into(), + NON_ALLOCATABLE_GPR.into(), + usize::try_from(MAX_GPR).unwrap(), + ); // TODO: Add floating point bitmask - let regalloc = RegAlloc::new(RegSet::new(ALL_GPR, 0), scratch()); + let fpr = RegBitSet::float(0, 0, usize::try_from(MAX_FPR).unwrap()); + let regalloc = RegAlloc::from(gpr, fpr); let codegen_context = CodeGenContext::new(regalloc, stack, &frame); - let env = FuncEnv::new(self.pointer_bytes(), translation); + let env = FuncEnv::new( + self.pointer_bytes(), + translation, + types, + self.wasmtime_call_conv(), + ); let mut codegen = CodeGen::new(&mut masm, codegen_context, env, abi_sig); codegen.emit(&mut body, validator)?; diff --git a/winch/codegen/src/isa/aarch64/regs.rs b/winch/codegen/src/isa/aarch64/regs.rs index e11d3a84f94d..ec540b43fe10 100644 --- a/winch/codegen/src/isa/aarch64/regs.rs +++ b/winch/codegen/src/isa/aarch64/regs.rs @@ -4,15 +4,20 @@ use crate::{isa::reg::Reg, masm::OperandSize}; use regalloc2::{PReg, RegClass}; use smallvec::{smallvec, SmallVec}; +/// FPR index bound. +pub(crate) const MAX_FPR: u32 = 32; +/// FPR index bound. +pub(crate) const MAX_GPR: u32 = 32; + /// Construct a X-register from an index. pub(crate) const fn xreg(num: u8) -> Reg { - assert!(num < 32); + assert!((num as u32) < MAX_GPR); Reg::new(PReg::new(num as usize, RegClass::Int)) } /// Construct a V-register from an index. pub(crate) const fn vreg(num: u8) -> Reg { - assert!(num < 32); + assert!((num as u32) < MAX_FPR); Reg::new(PReg::new(num as usize, RegClass::Float)) } @@ -131,7 +136,8 @@ pub(crate) const fn shadow_sp() -> Reg { xreg(28) } -const NON_ALLOCATABLE_GPR: u32 = (1 << ip0().hw_enc()) +/// Bitmask for non-allocatble GPR. +pub(crate) const NON_ALLOCATABLE_GPR: u32 = (1 << ip0().hw_enc()) | (1 << ip1().hw_enc()) | (1 << platform().hw_enc()) | (1 << fp().hw_enc()) diff --git a/winch/codegen/src/isa/mod.rs b/winch/codegen/src/isa/mod.rs index c90d4264c271..771fc556f9ac 100644 --- a/winch/codegen/src/isa/mod.rs +++ b/winch/codegen/src/isa/mod.rs @@ -10,7 +10,7 @@ use std::{ }; use target_lexicon::{Architecture, Triple}; use wasmparser::{FuncValidator, FunctionBody, ValidatorResources}; -use wasmtime_environ::{ModuleTranslation, WasmFuncType}; +use wasmtime_environ::{ModuleTranslation, ModuleTypes, WasmFuncType}; #[cfg(feature = "x64")] pub(crate) mod x64; @@ -77,6 +77,7 @@ pub(crate) enum LookupError { /// This enum is a reduced subset of the calling conventions defined in /// [cranelift_codegen::isa::CallConv]. Introducing this enum makes it easier /// to enforce the invariant of all the calling conventions supported by Winch. +#[derive(Copy, Clone)] pub enum CallingConvention { /// See [cranelift_codegen::isa::CallConv::WasmtimeSystemV] WasmtimeSystemV, @@ -148,6 +149,7 @@ pub trait TargetIsa: Send + Sync { fn compile_function( &self, sig: &WasmFuncType, + types: &ModuleTypes, body: &FunctionBody, translation: &ModuleTranslation, validator: &mut FuncValidator, diff --git a/winch/codegen/src/isa/reg.rs b/winch/codegen/src/isa/reg.rs index fda5f07cfdc0..0a96df3293be 100644 --- a/winch/codegen/src/isa/reg.rs +++ b/winch/codegen/src/isa/reg.rs @@ -34,8 +34,8 @@ impl Reg { } /// Get the encoding of the underlying register. - pub const fn hw_enc(self) -> u8 { - self.0.hw_enc() as u8 + pub const fn hw_enc(self) -> usize { + self.0.hw_enc() } /// Get the physical register representation. diff --git a/winch/codegen/src/isa/x64/abi.rs b/winch/codegen/src/isa/x64/abi.rs index e7194a1acdb1..be52fe9fb455 100644 --- a/winch/codegen/src/isa/x64/abi.rs +++ b/winch/codegen/src/isa/x64/abi.rs @@ -97,10 +97,14 @@ impl ABI for X64ABI { 64 } - fn sig(wasm_sig: &WasmFuncType, call_conv: &CallingConvention) -> ABISig { + fn sig_from( + params: &[WasmType], + returns: &[WasmType], + call_conv: &CallingConvention, + ) -> ABISig { assert!(call_conv.is_fastcall() || call_conv.is_systemv() || call_conv.is_default()); - if wasm_sig.returns().len() > 1 { + if returns.len() > 1 { panic!("multi-value not supported"); } @@ -115,16 +119,19 @@ impl ABI for X64ABI { (0, RegIndexEnv::default()) }; - let params: SmallVec<[ABIArg; 6]> = wasm_sig - .params() + let params: SmallVec<[ABIArg; 6]> = params .iter() .map(|arg| Self::to_abi_arg(arg, &mut stack_offset, &mut index_env, is_fastcall)) .collect(); - let result = Self::result(wasm_sig.returns(), call_conv); + let result = Self::result(returns, call_conv); ABISig::new(params, result, stack_offset) } + fn sig(wasm_sig: &WasmFuncType, call_conv: &CallingConvention) -> ABISig { + Self::sig_from(wasm_sig.params(), wasm_sig.returns(), call_conv) + } + fn result(returns: &[WasmType], _call_conv: &CallingConvention) -> ABIResult { // This invariant will be lifted once support for multi-value is added. assert!(returns.len() <= 1, "multi-value not supported"); diff --git a/winch/codegen/src/isa/x64/address.rs b/winch/codegen/src/isa/x64/address.rs index 970c0fa8b0d2..47229ae37448 100644 --- a/winch/codegen/src/isa/x64/address.rs +++ b/winch/codegen/src/isa/x64/address.rs @@ -1,8 +1,7 @@ //! x64 addressing mode. -use cranelift_codegen::ir::Constant; - use crate::reg::Reg; +use cranelift_codegen::ir::Constant; /// Memory address representation. #[derive(Debug, Copy, Clone)] diff --git a/winch/codegen/src/isa/x64/asm.rs b/winch/codegen/src/isa/x64/asm.rs index 930ba22e9ae1..8377c3a1edda 100644 --- a/winch/codegen/src/isa/x64/asm.rs +++ b/winch/codegen/src/isa/x64/asm.rs @@ -2,7 +2,7 @@ use crate::{ isa::reg::{Reg, RegClass}, - masm::{CalleeKind, CmpKind, DivKind, OperandSize, RemKind, RoundingMode, ShiftKind}, + masm::{CmpKind, DivKind, OperandSize, RemKind, RoundingMode, ShiftKind}, }; use cranelift_codegen::{ entity::EntityRef, @@ -845,41 +845,35 @@ impl Assembler { }) } - /// Emit a function call to a known or unknown location. - /// - /// A known location is a locally defined function index. - /// An unknown location is an address whose value is located - /// ina register. - pub fn call(&mut self, callee: CalleeKind) { - match callee { - CalleeKind::Indirect(reg) => { - self.emit(Inst::CallUnknown { - dest: RegMem::reg(reg.into()), - info: Box::new(CallInfo { - uses: smallvec![], - defs: smallvec![], - clobbers: Default::default(), - opcode: Opcode::Call, - callee_pop_size: 0, - callee_conv: CallConv::SystemV, - }), - }); - } - CalleeKind::Direct(index) => { - let dest = ExternalName::user(UserExternalNameRef::new(index as usize)); - self.emit(Inst::CallKnown { - dest, - info: Box::new(CallInfo { - uses: smallvec![], - defs: smallvec![], - clobbers: Default::default(), - opcode: Opcode::Call, - callee_pop_size: 0, - callee_conv: CallConv::SystemV, - }), - }); - } - } + /// Emit a call to an unknown location through a register. + pub fn call_with_reg(&mut self, callee: Reg) { + self.emit(Inst::CallUnknown { + dest: RegMem::reg(callee.into()), + info: Box::new(CallInfo { + uses: smallvec![], + defs: smallvec![], + clobbers: Default::default(), + opcode: Opcode::Call, + callee_pop_size: 0, + callee_conv: CallConv::SystemV, + }), + }); + } + + /// Emit a call to a locally defined function through an index. + pub fn call_with_index(&mut self, index: u32) { + let dest = ExternalName::user(UserExternalNameRef::new(index as usize)); + self.emit(Inst::CallKnown { + dest, + info: Box::new(CallInfo { + uses: smallvec![], + defs: smallvec![], + clobbers: Default::default(), + opcode: Opcode::Call, + callee_pop_size: 0, + callee_conv: CallConv::SystemV, + }), + }); } /// Emits a conditional jump to the given label. @@ -917,4 +911,12 @@ impl Assembler { pub fn trap(&mut self, code: TrapCode) { self.emit(Inst::Ud2 { trap_code: code }) } + + /// Conditional trap. + pub fn trapif(&mut self, cc: CmpKind, trap_code: TrapCode) { + self.emit(Inst::TrapIf { + cc: cc.into(), + trap_code, + }); + } } diff --git a/winch/codegen/src/isa/x64/masm.rs b/winch/codegen/src/isa/x64/masm.rs index d4c6cb6a0f47..8d89bd435c79 100644 --- a/winch/codegen/src/isa/x64/masm.rs +++ b/winch/codegen/src/isa/x64/masm.rs @@ -7,12 +7,12 @@ use super::{ use crate::masm::{ CmpKind, DivKind, Imm as I, MacroAssembler as Masm, OperandSize, RegImm, RemKind, RoundingMode, - ShiftKind, + ShiftKind, TrapCode, }; use crate::{abi::ABI, masm::StackSlot}; use crate::{ abi::{self, align_to, calculate_frame_adjustment, LocalSlot}, - codegen::CodeGenContext, + codegen::{ptr_type_from_ptr_size, CodeGenContext, TableData}, stack::Val, }; use crate::{ @@ -20,10 +20,11 @@ use crate::{ masm::CalleeKind, }; use cranelift_codegen::{ - ir::TrapCode, isa::x64::settings as x64_settings, settings, Final, MachBufferFinalized, - MachLabel, + isa::x64::settings as x64_settings, settings, Final, MachBufferFinalized, MachLabel, }; +use wasmtime_environ::PtrSize; + /// x64 MacroAssembler. pub(crate) struct MacroAssembler { /// Stack pointer offset. @@ -32,6 +33,10 @@ pub(crate) struct MacroAssembler { asm: Assembler, /// ISA flags. flags: x64_settings::Flags, + /// Shared flags. + shared_flags: settings::Flags, + /// The target pointer size. + ptr_size: OperandSize, } impl Masm for MacroAssembler { @@ -104,6 +109,67 @@ impl Masm for MacroAssembler { Address::offset(reg, offset) } + fn table_elem_address( + &mut self, + index: Reg, + size: OperandSize, + table_data: &TableData, + context: &mut CodeGenContext, + ) -> Reg { + let vmctx = ::vmctx_reg(); + let scratch = regs::scratch(); + let bound = context.any_gpr(self); + let ptr_base = context.any_gpr(self); + let tmp = context.any_gpr(self); + + if let Some(offset) = table_data.base { + // If the table data declares a particular offset base, + // load the address into a register to further use it as + // the table address. + self.asm + .mov_mr(&Address::offset(vmctx, offset), ptr_base, OperandSize::S64); + } else { + // Else, simply move the vmctx register into the addr register as + // the base to calculate the table address. + self.asm.mov_rr(vmctx, ptr_base, OperandSize::S64); + }; + + // OOB check. + let bound_addr = Address::offset(ptr_base, table_data.current_elems_offset); + self.asm.mov_mr(&bound_addr, bound, OperandSize::S64); + self.asm.cmp_rr(bound, index, size); + self.asm.trapif(CmpKind::GeU, TrapCode::TableOutOfBounds); + + // Move the index into the scratch register to calcualte the table + // element address. + // Moving the value of the index register to the scratch register + // also avoids overwriting the context of the index register. + self.asm.mov_rr(index, scratch, OperandSize::S32); + self.asm + .mul_ir(table_data.element_size as i32, scratch, OperandSize::S64); + self.asm.mov_mr( + &Address::offset(ptr_base, table_data.offset), + ptr_base, + OperandSize::S64, + ); + // Copy the value of the table base into a temporary register + // so that we can use it later in case of a misspeculation. + self.asm.mov_rr(ptr_base, tmp, OperandSize::S64); + // Calculate the address of the table element. + self.asm.add_rr(scratch, ptr_base, OperandSize::S64); + if self.shared_flags.enable_table_access_spectre_mitigation() { + // Perform a bounds check and override the value of the + // table element address in case the index is out of bounds. + self.asm.cmp_rr(bound, index, OperandSize::S32); + self.asm.cmov(tmp, ptr_base, CmpKind::GeU, OperandSize::S64); + } + self.asm + .mov_mr(&Address::offset(ptr_base, 0), ptr_base, OperandSize::S64); + context.free_reg(bound); + context.free_reg(tmp); + ptr_base + } + fn address_from_sp(&self, offset: u32) -> Self::Address { Address::offset(regs::rsp(), self.sp_offset - offset) } @@ -112,6 +178,10 @@ impl Masm for MacroAssembler { Address::offset(regs::rsp(), offset) } + fn address_at_vmctx(&self, offset: u32) -> Self::Address { + Address::offset(::vmctx_reg(), offset) + } + fn store(&mut self, src: RegImm, dst: Address, size: OperandSize) { match src { RegImm::Imm(imm) => match imm { @@ -142,7 +212,7 @@ impl Masm for MacroAssembler { self.asm.pop_r(dst); self.decrement_sp(::word_bytes()); } else { - let addr = self.address_at_sp(self.sp_offset); + let addr = self.address_from_sp(self.sp_offset); self.asm.xmm_mov_mr(&addr, dst, size); self.free_stack(size.bytes()); } @@ -160,10 +230,17 @@ impl Masm for MacroAssembler { let total_stack = delta + aligned_args_size; self.reserve_stack(total_stack); let callee = load_callee(self); - self.asm.call(callee); + match callee { + CalleeKind::Indirect(reg) => self.asm.call_with_reg(reg), + CalleeKind::Direct(idx) => self.asm.call_with_index(idx), + }; total_stack } + fn load_ptr(&mut self, src: Self::Address, dst: Reg) { + self.load(src, dst, self.ptr_size); + } + fn load(&mut self, src: Address, dst: Reg, size: OperandSize) { if dst.is_int() { self.asm.mov_mr(&src, dst, size); @@ -531,24 +608,24 @@ impl Masm for MacroAssembler { &mut self, kind: CmpKind, lhs: RegImm, - rhs: RegImm, + rhs: Reg, taken: MachLabel, size: OperandSize, ) { use CmpKind::*; match &(lhs, rhs) { - (RegImm::Reg(rlhs), RegImm::Reg(rrhs)) => { + (RegImm::Reg(rlhs), rrhs) => { // If the comparision kind is zero or not zero and both operands // are the same register, emit a test instruction. Else we emit // a normal comparison. if (kind == Eq || kind == Ne) && (rlhs == rrhs) { self.asm.test_rr(*rrhs, *rlhs, size); } else { - self.cmp(lhs, rhs.get_reg().unwrap(), size); + self.cmp(lhs, rhs, size); } } - _ => self.cmp(lhs, rhs.get_reg().unwrap(), size), + _ => self.cmp(lhs, rhs, size), } self.asm.jmp_if(kind, taken); } @@ -627,6 +704,10 @@ impl Masm for MacroAssembler { self.asm.trap(TrapCode::UnreachableCodeReached) } + fn trapif(&mut self, cc: CmpKind, code: TrapCode) { + self.asm.trapif(cc, code); + } + fn jmp_table(&mut self, targets: &[MachLabel], index: Reg, tmp: Reg) { // At least one default target. assert!(targets.len() >= 1); @@ -649,11 +730,17 @@ impl Masm for MacroAssembler { impl MacroAssembler { /// Create an x64 MacroAssembler. - pub fn new(shared_flags: settings::Flags, isa_flags: x64_settings::Flags) -> Self { + pub fn new( + ptr_size: impl PtrSize, + shared_flags: settings::Flags, + isa_flags: x64_settings::Flags, + ) -> Self { Self { sp_offset: 0, - asm: Assembler::new(shared_flags, isa_flags.clone()), + asm: Assembler::new(shared_flags.clone(), isa_flags.clone()), flags: isa_flags, + shared_flags, + ptr_size: ptr_type_from_ptr_size(ptr_size.size()).into(), } } diff --git a/winch/codegen/src/isa/x64/mod.rs b/winch/codegen/src/isa/x64/mod.rs index 0d3359f8ec39..4703f7f20edb 100644 --- a/winch/codegen/src/isa/x64/mod.rs +++ b/winch/codegen/src/isa/x64/mod.rs @@ -11,7 +11,7 @@ use crate::stack::Stack; use crate::trampoline::{Trampoline, TrampolineKind}; use crate::{ isa::{Builder, TargetIsa}, - regset::RegSet, + regset::RegBitSet, }; use anyhow::Result; use cranelift_codegen::settings::{self, Flags}; @@ -19,9 +19,9 @@ use cranelift_codegen::{isa::x64::settings as x64_settings, Final, MachBufferFin use cranelift_codegen::{MachTextSectionBuilder, TextSectionBuilder}; use target_lexicon::Triple; use wasmparser::{FuncValidator, FunctionBody, ValidatorResources}; -use wasmtime_environ::{ModuleTranslation, WasmFuncType}; +use wasmtime_environ::{ModuleTranslation, ModuleTypes, WasmFuncType}; -use self::regs::{ALL_FPR, ALL_GPR}; +use self::regs::{ALL_FPR, ALL_GPR, MAX_FPR, MAX_GPR, NON_ALLOCATABLE_FPR, NON_ALLOCATABLE_GPR}; mod abi; mod address; @@ -89,20 +89,37 @@ impl TargetIsa for X64 { fn compile_function( &self, sig: &WasmFuncType, + types: &ModuleTypes, body: &FunctionBody, translation: &ModuleTranslation, validator: &mut FuncValidator, ) -> Result> { + let pointer_bytes = self.pointer_bytes(); let mut body = body.get_binary_reader(); - let mut masm = X64Masm::new(self.shared_flags.clone(), self.isa_flags.clone()); + let mut masm = X64Masm::new( + pointer_bytes, + self.shared_flags.clone(), + self.isa_flags.clone(), + ); let stack = Stack::new(); let abi_sig = abi::X64ABI::sig(sig, &CallingConvention::Default); let defined_locals = DefinedLocals::new(translation, &mut body, validator)?; let frame = Frame::new::(&abi_sig, &defined_locals)?; - let regalloc = RegAlloc::new(RegSet::new(ALL_GPR, ALL_FPR), regs::scratch()); + let gpr = RegBitSet::int( + ALL_GPR.into(), + NON_ALLOCATABLE_GPR.into(), + usize::try_from(MAX_GPR).unwrap(), + ); + let fpr = RegBitSet::float( + ALL_FPR.into(), + NON_ALLOCATABLE_FPR.into(), + usize::try_from(MAX_FPR).unwrap(), + ); + + let regalloc = RegAlloc::from(gpr, fpr); let codegen_context = CodeGenContext::new(regalloc, stack, &frame); - let env = FuncEnv::new(self.pointer_bytes(), translation); + let env = FuncEnv::new(pointer_bytes, translation, types, self.wasmtime_call_conv()); let mut codegen = CodeGen::new(&mut masm, codegen_context, env, abi_sig); codegen.emit(&mut body, validator)?; @@ -126,7 +143,11 @@ impl TargetIsa for X64 { ) -> Result> { use TrampolineKind::*; - let mut masm = X64Masm::new(self.shared_flags.clone(), self.isa_flags.clone()); + let mut masm = X64Masm::new( + self.pointer_bytes(), + self.shared_flags.clone(), + self.isa_flags.clone(), + ); let call_conv = self.wasmtime_call_conv(); let mut trampoline = Trampoline::new( diff --git a/winch/codegen/src/isa/x64/regs.rs b/winch/codegen/src/isa/x64/regs.rs index 0724b1d67252..238423dac896 100644 --- a/winch/codegen/src/isa/x64/regs.rs +++ b/winch/codegen/src/isa/x64/regs.rs @@ -170,22 +170,29 @@ pub(crate) fn scratch_xmm() -> Reg { xmm15() } +/// GPR count. const GPR: u32 = 16; +/// FPR count. const FPR: u32 = 16; +/// GPR index bound. +pub(crate) const MAX_GPR: u32 = GPR; +/// GPR index bound. +pub(crate) const MAX_FPR: u32 = FPR; const ALLOCATABLE_GPR: u32 = (1 << GPR) - 1; const ALLOCATABLE_FPR: u32 = (1 << FPR) - 1; +/// Bitmask of non-alloctable GPRs. // R11: Is used as the scratch register. // R14: Is a pinned register, used as the instance register. -const NON_ALLOCATABLE_GPR: u32 = (1 << ENC_RBP) | (1 << ENC_RSP) | (1 << ENC_R11) | (1 << ENC_R14); +pub(crate) const NON_ALLOCATABLE_GPR: u32 = + (1 << ENC_RBP) | (1 << ENC_RSP) | (1 << ENC_R11) | (1 << ENC_R14); +/// Bitmask of non-alloctable FPRs. // xmm15: Is used as the scratch register. -const NON_ALLOCATABLE_FPR: u32 = 1 << 15; +pub(crate) const NON_ALLOCATABLE_FPR: u32 = 1 << 15; /// Bitmask to represent the available general purpose registers. pub(crate) const ALL_GPR: u32 = ALLOCATABLE_GPR & !NON_ALLOCATABLE_GPR; /// Bitmask to represent the available floating point registers. -// Note: at the time of writing all floating point registers are allocatable, -// but we might need a scratch register in the future. pub(crate) const ALL_FPR: u32 = ALLOCATABLE_FPR & !NON_ALLOCATABLE_FPR; /// Returns the callee-saved registers according to a particular calling diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index 2e7da9e208b5..ec258b7b39af 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -1,11 +1,12 @@ use crate::abi::{self, align_to, LocalSlot}; -use crate::codegen::CodeGenContext; +use crate::codegen::{CodeGenContext, TableData}; use crate::isa::reg::Reg; -use crate::regalloc::RegAlloc; use cranelift_codegen::{Final, MachBufferFinalized, MachLabel}; use std::{fmt::Debug, ops::Range}; use wasmtime_environ::PtrSize; +pub(crate) use cranelift_codegen::ir::TrapCode; + #[derive(Eq, PartialEq)] pub(crate) enum DivKind { /// Signed division. @@ -112,6 +113,17 @@ impl OperandSize { OperandSize::S128 => 7, } } + + /// Create an [`OperandSize`] from the given number of bytes. + pub fn from_bytes(bytes: u8) -> Self { + use OperandSize::*; + match bytes { + 4 => S32, + 8 => S64, + 16 => S128, + _ => panic!("Invalid bytes {} for OperandSize", bytes), + } + } } /// An abstraction over a register or immediate. @@ -179,6 +191,18 @@ pub(crate) enum CalleeKind { Direct(u32), } +impl CalleeKind { + /// Creates a callee kind from a register. + pub fn indirect(reg: Reg) -> Self { + Self::Indirect(reg) + } + + /// Creates a direct callee kind from a function index. + pub fn direct(index: u32) -> Self { + Self::Direct(index) + } +} + impl RegImm { /// Register constructor. pub fn reg(r: Reg) -> Self { @@ -281,6 +305,16 @@ pub(crate) trait MacroAssembler { /// Get the address of a local slot. fn local_address(&mut self, local: &LocalSlot) -> Self::Address; + /// Loads the address of the table element at a given index. + /// Returns a register that contains address of the table element. + fn table_elem_address( + &mut self, + index: Reg, + size: OperandSize, + table_data: &TableData, + context: &mut CodeGenContext, + ) -> Reg; + /// Constructs an address with an offset that is relative to the /// current position of the stack pointer (e.g. [sp + (sp_offset - /// offset)]. @@ -290,6 +324,11 @@ pub(crate) trait MacroAssembler { /// current position of the stack pointer (e.g. [sp + offset]. fn address_at_sp(&self, offset: u32) -> Self::Address; + /// Alias for [`Self::address_at_reg`] using the VMContext register as + /// a base. The VMContext register is derived from the ABI type that is + /// associated to the MacroAssembler. + fn address_at_vmctx(&self, offset: u32) -> Self::Address; + /// Construct an address that is absolute to the current position /// of the given register. fn address_at_reg(&self, reg: Reg, offset: u32) -> Self::Address; @@ -306,6 +345,10 @@ pub(crate) trait MacroAssembler { /// Perform a stack load. fn load(&mut self, src: Self::Address, dst: Reg, size: OperandSize); + /// Alias for `MacroAssembler::load` with the operand size corresponding + /// to the pointer size of the target. + fn load_ptr(&mut self, src: Self::Address, dst: Reg); + /// Pop a value from the machine stack into the given register. fn pop(&mut self, dst: Reg, size: OperandSize); @@ -406,7 +449,7 @@ pub(crate) trait MacroAssembler { /// The default implementation divides the given memory range /// into word-sized slots. Then it unrolls a series of store /// instructions, effectively assigning zero to each slot. - fn zero_mem_range(&mut self, mem: &Range, regalloc: &mut RegAlloc) { + fn zero_mem_range(&mut self, mem: &Range) { let word_size = ::word_bytes(); if mem.is_empty() { return; @@ -437,7 +480,7 @@ pub(crate) trait MacroAssembler { // Add an upper bound to this generation; // given a considerably large amount of slots // this will be inefficient. - let zero = regalloc.scratch; + let zero = ::scratch_reg(); self.zero(zero); let zero = RegImm::reg(zero); @@ -460,14 +503,7 @@ pub(crate) trait MacroAssembler { /// Performs a comparison between the two operands, /// and immediately after emits a jump to the given /// label destination if the condition is met. - fn branch( - &mut self, - kind: CmpKind, - lhs: RegImm, - rhs: RegImm, - taken: MachLabel, - size: OperandSize, - ); + fn branch(&mut self, kind: CmpKind, lhs: RegImm, rhs: Reg, taken: MachLabel, size: OperandSize); /// Emits and unconditional jump to the given label. fn jmp(&mut self, target: MachLabel); @@ -478,4 +514,7 @@ pub(crate) trait MacroAssembler { /// Emit an unreachable code trap. fn unreachable(&mut self); + + /// Traps if the condition code is met. + fn trapif(&mut self, cc: CmpKind, code: TrapCode); } diff --git a/winch/codegen/src/regalloc.rs b/winch/codegen/src/regalloc.rs index c63036ff2cf8..2bcb8db3063a 100644 --- a/winch/codegen/src/regalloc.rs +++ b/winch/codegen/src/regalloc.rs @@ -1,6 +1,6 @@ use crate::{ isa::reg::{Reg, RegClass}, - regset::RegSet, + regset::{RegBitSet, RegSet}, }; /// The register allocator. @@ -15,15 +15,15 @@ use crate::{ /// This processs ensures that whenever a register is requested, /// it is going to be available. pub(crate) struct RegAlloc { - pub scratch: Reg, + /// The register set. regset: RegSet, } impl RegAlloc { - /// Create a new register allocator - /// from a register set. - pub fn new(regset: RegSet, scratch: Reg) -> Self { - Self { regset, scratch } + /// Create a register allocator from a bit set for each register class. + pub fn from(gpr: RegBitSet, fpr: RegBitSet) -> Self { + let rs = RegSet::new(gpr, fpr); + Self { regset: rs } } /// Allocate the next available register for the given class, @@ -50,12 +50,6 @@ impl RegAlloc { where F: FnMut(&mut RegAlloc), { - // If the scratch register is explicitly requested - // just return it, it's usage should never cause spills. - if named == self.scratch { - return named; - } - self.regset.reg(named).unwrap_or_else(|| { spill(self); self.regset @@ -66,8 +60,6 @@ impl RegAlloc { /// Free the given register. pub fn free(&mut self, reg: Reg) { - if reg != self.scratch { - self.regset.free(reg); - } + self.regset.free(reg); } } diff --git a/winch/codegen/src/regset.rs b/winch/codegen/src/regset.rs index 655a10dc10ee..0f51dc2babed 100644 --- a/winch/codegen/src/regset.rs +++ b/winch/codegen/src/regset.rs @@ -3,15 +3,15 @@ use crate::isa::reg::{Reg, RegClass}; /// A bit set to track regiter availability. pub(crate) struct RegSet { /// Bitset to track general purpose register availability. - gpr: u32, + gpr: RegBitSet, /// Bitset to track floating-point register availability. - fpr: u32, + fpr: RegBitSet, } use std::ops::{Index, IndexMut}; impl Index for RegSet { - type Output = u32; + type Output = RegBitSet; fn index(&self, class: RegClass) -> &Self::Output { match class { @@ -32,9 +32,53 @@ impl IndexMut for RegSet { } } +/// Bitset for a particular register class. +pub struct RegBitSet { + /// The register class. + class: RegClass, + /// The set of allocatable + allocatable: u64, + /// The set of non-alloctable registers. + non_allocatable: u64, + /// The max number of registers. + /// Invariant: + /// When allocating or freeing a register the encoding (index) of the + /// register must be less than the max property. + max: usize, +} + +impl RegBitSet { + /// Creates an integer register class bitset. + pub fn int(allocatable: u64, non_allocatable: u64, max: usize) -> Self { + // Assert that one set is the complement of the other. + debug_assert!(allocatable & non_allocatable == 0); + Self { + class: RegClass::Int, + allocatable, + non_allocatable, + max, + } + } + + /// Creates a float register class bitset. + pub fn float(allocatable: u64, non_allocatable: u64, max: usize) -> Self { + // Assert that one set is the complement of the other. + debug_assert!(allocatable & non_allocatable == 0); + Self { + class: RegClass::Float, + allocatable, + non_allocatable, + max, + } + } +} + impl RegSet { /// Create a new register set. - pub fn new(gpr: u32, fpr: u32) -> Self { + pub fn new(gpr: RegBitSet, fpr: RegBitSet) -> Self { + debug_assert!(gpr.class == RegClass::Int); + debug_assert!(fpr.class == RegClass::Float); + Self { gpr, fpr } } @@ -42,9 +86,9 @@ impl RegSet { /// returning `None` if there are no more registers available. pub fn reg_for_class(&mut self, class: RegClass) -> Option { self.available(class).then(|| { - let bitset = self[class]; - let index = bitset.trailing_zeros(); - self.allocate(class, index); + let bitset = &self[class]; + let index = bitset.allocatable.trailing_zeros(); + self.allocate(class, index.into()); Reg::from(class, index as usize) }) } @@ -53,7 +97,7 @@ impl RegSet { pub fn reg(&mut self, reg: Reg) -> Option { let index = reg.hw_enc(); self.named_reg_available(reg).then(|| { - self.allocate(reg.class(), index.into()); + self.allocate(reg.class(), index.try_into().unwrap()); reg }) } @@ -61,36 +105,55 @@ impl RegSet { /// Marks the specified register as available, utilizing the /// register class to determine the bitset that requires updating. pub fn free(&mut self, reg: Reg) { - let index = reg.hw_enc() as u32; - self[reg.class()] |= 1 << index; + let bitset = &self[reg.class()]; + let index = reg.hw_enc(); + assert!(index < bitset.max); + let index = u64::try_from(index).unwrap(); + if !self.is_non_allocatable(reg.class(), index) { + self[reg.class()].allocatable |= 1 << index; + } } /// Returns true if the specified register is allocatable. pub fn named_reg_available(&self, reg: Reg) -> bool { - let bitset = self[reg.class()]; + let bitset = &self[reg.class()]; + assert!(reg.hw_enc() < bitset.max); let index = 1 << reg.hw_enc(); - (!bitset & index) == 0 + + (!bitset.allocatable & index) == 0 + || self.is_non_allocatable(reg.class(), reg.hw_enc().try_into().unwrap()) } fn available(&self, class: RegClass) -> bool { - let bitset = self[class]; - bitset != 0 + let bitset = &self[class]; + bitset.allocatable != 0 + } + + fn allocate(&mut self, class: RegClass, index: u64) { + if !self.is_non_allocatable(class, index) { + self[class].allocatable &= !(1 << index); + } } - fn allocate(&mut self, class: RegClass, index: u32) { - self[class] &= !(1 << index); + fn is_non_allocatable(&self, class: RegClass, index: u64) -> bool { + let bitset = &self[class]; + let non_allocatable = bitset.non_allocatable; + non_allocatable != 0 && !non_allocatable & (1 << index) == 0 } } #[cfg(test)] mod tests { - use super::{Reg, RegClass, RegSet}; + use super::{Reg, RegBitSet, RegClass, RegSet}; - const UNIVERSE: u32 = (1 << 16) - 1; + const UNIVERSE: u64 = (1 << 16) - 1; + const MAX: usize = 16; #[test] fn test_any_gpr() { - let mut set = RegSet::new(UNIVERSE, 0); + let bitset = RegBitSet::int(UNIVERSE, !UNIVERSE, MAX); + let zero = RegBitSet::float(0, 0, MAX); + let mut set = RegSet::new(bitset, zero); for _ in 0..16 { let gpr = set.reg_for_class(RegClass::Int); assert!(gpr.is_some()) @@ -102,15 +165,28 @@ mod tests { #[test] fn test_gpr() { - let all = UNIVERSE & !(1 << 5); - let target = Reg::int(5); - let mut set = RegSet::new(all, 0); - assert!(set.reg(target).is_none()); + let non_allocatable: u64 = 1 << 5; + let all = UNIVERSE & !non_allocatable; + let non_alloc = Reg::int(5); + let alloc = Reg::int(2); + let bitset = RegBitSet::int(all, non_allocatable, MAX); + let zero = RegBitSet::float(0, 0, MAX); + let mut set = RegSet::new(bitset, zero); + // Requesting a non alloctable register returns the register + // and doesn't allocate it. + assert!(set.reg(non_alloc).is_some()); + assert!(set.reg(non_alloc).is_some()); + // Requesting an allocatable register twice returns none the + // second time. + assert!(set.reg(alloc).is_some()); + assert!(set.reg(alloc).is_none()); } #[test] fn test_free_reg() { - let mut set = RegSet::new(UNIVERSE, 0); + let set = RegBitSet::int(UNIVERSE, !UNIVERSE, MAX); + let zero = RegBitSet::float(0, 0, MAX); + let mut set = RegSet::new(set, zero); let gpr = set.reg_for_class(RegClass::Int).unwrap(); set.free(gpr); assert!(set.reg(gpr).is_some()); diff --git a/winch/codegen/src/stack.rs b/winch/codegen/src/stack.rs index d176b8d4d04c..203eeab01742 100644 --- a/winch/codegen/src/stack.rs +++ b/winch/codegen/src/stack.rs @@ -1,5 +1,6 @@ use crate::{isa::reg::Reg, masm::StackSlot}; use std::collections::VecDeque; +use std::ops::RangeBounds; use wasmparser::{Ieee32, Ieee64}; use wasmtime_environ::WasmType; @@ -14,12 +15,12 @@ pub struct TypedReg { } impl TypedReg { - /// Create a new TypedReg. + /// Create a new [`TypedReg`]. pub fn new(ty: WasmType, reg: Reg) -> Self { Self { ty, reg } } - /// Create an i64 TypedReg. + /// Create an i64 [`TypedReg`]. pub fn i64(reg: Reg) -> Self { Self { ty: WasmType::I64, @@ -89,6 +90,13 @@ impl From for Val { } } +impl TryFrom for Val { + type Error = anyhow::Error; + fn try_from(value: u32) -> Result { + i32::try_from(value).map(Val::i32).map_err(Into::into) + } +} + impl Val { /// Create a new I32 constant value. pub fn i32(v: i32) -> Self { @@ -294,6 +302,18 @@ impl Stack { pub fn inner_mut(&mut self) -> &mut VecDeque { &mut self.inner } + + /// Calculates size in bytes of memory entries within the specified range of + /// the stack. + pub fn sizeof(&self, range: R) -> u32 + where + R: RangeBounds, + { + self.inner.range(range).fold(0, |acc, v| match v { + Val::Memory(m) => acc + m.slot.size, + _ => acc, + }) + } } #[cfg(test)] diff --git a/winch/codegen/src/visitor.rs b/winch/codegen/src/visitor.rs index 101df49ff5d9..fc96b7d00d72 100644 --- a/winch/codegen/src/visitor.rs +++ b/winch/codegen/src/visitor.rs @@ -5,8 +5,7 @@ //! machine code emitter. use crate::abi::ABI; -use crate::codegen::ControlStackFrame; -use crate::codegen::{control_index, CodeGen}; +use crate::codegen::{control_index, CodeGen, ControlStackFrame, FnCall}; use crate::masm::{ CmpKind, DivKind, MacroAssembler, OperandSize, RegImm, RemKind, RoundingMode, ShiftKind, }; @@ -14,7 +13,9 @@ use crate::stack::{TypedReg, Val}; use smallvec::SmallVec; use wasmparser::BrTable; use wasmparser::{BlockType, Ieee32, Ieee64, VisitOperator}; -use wasmtime_environ::{FuncIndex, GlobalIndex, WasmType}; +use wasmtime_environ::{ + FuncIndex, GlobalIndex, TableIndex, TableStyle, TypeIndex, WasmType, FUNCREF_MASK, +}; /// A macro to define unsupported WebAssembly operators. /// @@ -130,6 +131,8 @@ macro_rules! def_unsupported { (emit Select $($rest:tt)*) => {}; (emit Drop $($rest:tt)*) => {}; (emit BrTable $($rest:tt)*) => {}; + (emit CallIndirect $($rest:tt)*) => {}; + (emit $unsupported:tt $($rest:tt)*) => {$($rest)*}; } @@ -609,14 +612,97 @@ where } } - // TODO verify the case where the target local is on the stack. + // TODO: verify the case where the target local is on the stack. fn visit_local_set(&mut self, index: u32) { let src = self.context.set_local(self.masm, index); self.context.free_reg(src); } fn visit_call(&mut self, index: u32) { - self.emit_call(FuncIndex::from_u32(index)); + let callee = self.env.callee_from_index(FuncIndex::from_u32(index)); + self.emit_call(callee); + } + + fn visit_call_indirect(&mut self, type_index: u32, table_index: u32, _: u8) { + let type_index = TypeIndex::from_u32(type_index); + let table_index = TableIndex::from_u32(table_index); + let table_data = self.env.resolve_table_data(table_index); + let ptr_type = self.env.ptr_type(); + + let builtin = self + .env + .builtins + .table_get_lazy_init_func_ref::(); + + FnCall::new(&builtin.sig).with_lib( + self.masm, + &mut self.context, + &builtin, + |cx, masm, call, callee| { + // Calculate the table element address. + let index = cx.pop_to_reg(masm, None); + let elem_addr = + masm.table_elem_address(index.into(), index.ty.into(), &table_data, cx); + + let defined = masm.get_label(); + let cont = masm.get_label(); + + // Preemptively move the table element address to the + // result register, to avoid conflicts at the control flow merge. + let result = call.abi_sig.result.result_reg().unwrap(); + masm.mov(elem_addr.into(), result, ptr_type.into()); + cx.free_reg(result); + + // Push the builtin function arguments to the stack. + cx.stack + .push(TypedReg::new(ptr_type, ::vmctx_reg()).into()); + cx.stack.push(table_index.as_u32().try_into().unwrap()); + cx.stack.push(index.into()); + + masm.branch( + CmpKind::Ne, + elem_addr.into(), + elem_addr, + defined, + ptr_type.into(), + ); + + call.calculate_call_stack_space(cx).reg(masm, cx, callee); + // We know the signature of the libcall in this case, so we assert that there's + // one element in the stack and that it's the ABI signature's result register. + let top = cx.stack.peek().unwrap(); + let top = top.get_reg(); + debug_assert!(top.reg == result); + masm.jmp(cont); + + // In the defined case, mask the funcref address in place, by peeking into the + // last element of the value stack, which was pushed by the `indirect` function + // call above. + masm.bind(defined); + let imm = RegImm::i64(FUNCREF_MASK as i64); + let dst = top.into(); + masm.and(dst, dst, imm, top.ty.into()); + + masm.bind(cont); + // The indirect call above, will take care of freeing the registers used as + // params. + // So we only free the params used to lazily initialize the func ref. + cx.free_reg(elem_addr); + }, + ); + + // Perform the indirect call. + match self.env.translation.module.table_plans[table_index].style { + TableStyle::CallerChecksSignature => { + let funcref_ptr = self.context.stack.peek().map(|v| v.get_reg()).unwrap(); + self.emit_typecheck_funcref(funcref_ptr.into(), type_index); + } + } + + // Perform call indirect. + // `emit_call` expects the callee to be on the stack. Delaying the + // computation of the callee address reduces register pressure. + self.emit_call(self.env.funcref(type_index)); } fn visit_nop(&mut self) {} @@ -671,11 +757,12 @@ where let frame = &mut self.control_frames[index]; frame.set_as_target(); let result = frame.as_target_result(); - let top = - self.context - .without::(result.result_reg(), self.masm, |ctx, masm| { - ctx.pop_to_reg(masm, None) - }); + let top = self.context.without::( + result.regs(), + result.regs(), + self.masm, + |ctx, masm| ctx.pop_to_reg(masm, None), + ); self.context.pop_abi_results(&result, self.masm); self.context.push_abi_results(&result, self.masm); self.masm.branch( @@ -699,7 +786,8 @@ where let default_index = control_index(targets.default(), self.control_frames.len()); let default_result = self.control_frames[default_index].as_target_result(); let (index, tmp) = self.context.without::<(TypedReg, _), M, _>( - default_result.result_reg(), + default_result.regs(), + default_result.regs(), self.masm, |cx, masm| (cx.pop_to_reg(masm, None), cx.any_gpr(masm)), ); diff --git a/winch/filetests/filetests/x64/call_indirect/call_indirect.wat b/winch/filetests/filetests/x64/call_indirect/call_indirect.wat new file mode 100644 index 000000000000..b2dd60224111 --- /dev/null +++ b/winch/filetests/filetests/x64/call_indirect/call_indirect.wat @@ -0,0 +1,130 @@ +;;! target="x86_64" + +(module + (type $over-i32 (func (param i32) (result i32))) + + (table funcref + (elem + $fib-i32 + ) + ) + + (func $fib-i32 (export "fib-i32") (type $over-i32) + (if (result i32) (i32.le_u (local.get 0) (i32.const 1)) + (then (i32.const 1)) + (else + (i32.add + (call_indirect (type $over-i32) + (i32.sub (local.get 0) (i32.const 2)) + (i32.const 0) + ) + (call_indirect (type $over-i32) + (i32.sub (local.get 0) (i32.const 1)) + (i32.const 0) + ) + ) + ) + ) + ) +) + + +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec10 sub rsp, 0x10 +;; 8: 897c240c mov dword ptr [rsp + 0xc], edi +;; c: 4c89742404 mov qword ptr [rsp + 4], r14 +;; 11: 8b44240c mov eax, dword ptr [rsp + 0xc] +;; 15: 83f801 cmp eax, 1 +;; 18: b800000000 mov eax, 0 +;; 1d: 400f96c0 setbe al +;; 21: 85c0 test eax, eax +;; 23: 0f840a000000 je 0x33 +;; 29: b801000000 mov eax, 1 +;; 2e: e913010000 jmp 0x146 +;; 33: 8b44240c mov eax, dword ptr [rsp + 0xc] +;; 37: 83e802 sub eax, 2 +;; 3a: 50 push rax +;; 3b: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 3f: 498b4b48 mov rcx, qword ptr [r11 + 0x48] +;; 43: bb00000000 mov ebx, 0 +;; 48: 4d89f1 mov r9, r14 +;; 4b: 4d8b4150 mov r8, qword ptr [r9 + 0x50] +;; 4f: 4439c3 cmp ebx, r8d +;; 52: 0f83f4000000 jae 0x14c +;; 58: 4189db mov r11d, ebx +;; 5b: 4d6bdb08 imul r11, r11, 8 +;; 5f: 4d8b4948 mov r9, qword ptr [r9 + 0x48] +;; 63: 4d89ca mov r10, r9 +;; 66: 4d01d9 add r9, r11 +;; 69: 4439c3 cmp ebx, r8d +;; 6c: 4d0f43ca cmovae r9, r10 +;; 70: 4d8b09 mov r9, qword ptr [r9] +;; 73: 4c89c8 mov rax, r9 +;; 76: 4d85c9 test r9, r9 +;; 79: 0f8519000000 jne 0x98 +;; 7f: 4883ec08 sub rsp, 8 +;; 83: 4c89f7 mov rdi, r14 +;; 86: be00000000 mov esi, 0 +;; 8b: 89da mov edx, ebx +;; 8d: ffd1 call rcx +;; 8f: 4883c408 add rsp, 8 +;; 93: e904000000 jmp 0x9c +;; 98: 4883e0fe and rax, 0xfffffffffffffffe +;; 9c: 4d8b5e40 mov r11, qword ptr [r14 + 0x40] +;; a0: 418b0b mov ecx, dword ptr [r11] +;; a3: 8b5018 mov edx, dword ptr [rax + 0x18] +;; a6: 39d1 cmp ecx, edx +;; a8: 0f85a0000000 jne 0x14e +;; ae: 488b4810 mov rcx, qword ptr [rax + 0x10] +;; b2: 4883ec08 sub rsp, 8 +;; b6: 8b7c2408 mov edi, dword ptr [rsp + 8] +;; ba: ffd1 call rcx +;; bc: 4883c410 add rsp, 0x10 +;; c0: 8b4c240c mov ecx, dword ptr [rsp + 0xc] +;; c4: 83e901 sub ecx, 1 +;; c7: 50 push rax +;; c8: 51 push rcx +;; c9: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; cd: 498b4b48 mov rcx, qword ptr [r11 + 0x48] +;; d1: bb00000000 mov ebx, 0 +;; d6: 4d89f1 mov r9, r14 +;; d9: 4d8b4150 mov r8, qword ptr [r9 + 0x50] +;; dd: 4439c3 cmp ebx, r8d +;; e0: 0f836a000000 jae 0x150 +;; e6: 4189db mov r11d, ebx +;; e9: 4d6bdb08 imul r11, r11, 8 +;; ed: 4d8b4948 mov r9, qword ptr [r9 + 0x48] +;; f1: 4d89ca mov r10, r9 +;; f4: 4d01d9 add r9, r11 +;; f7: 4439c3 cmp ebx, r8d +;; fa: 4d0f43ca cmovae r9, r10 +;; fe: 4d8b09 mov r9, qword ptr [r9] +;; 101: 4c89c8 mov rax, r9 +;; 104: 4d85c9 test r9, r9 +;; 107: 0f8511000000 jne 0x11e +;; 10d: 4c89f7 mov rdi, r14 +;; 110: be00000000 mov esi, 0 +;; 115: 89da mov edx, ebx +;; 117: ffd1 call rcx +;; 119: e904000000 jmp 0x122 +;; 11e: 4883e0fe and rax, 0xfffffffffffffffe +;; 122: 4d8b5e40 mov r11, qword ptr [r14 + 0x40] +;; 126: 418b0b mov ecx, dword ptr [r11] +;; 129: 8b5018 mov edx, dword ptr [rax + 0x18] +;; 12c: 39d1 cmp ecx, edx +;; 12e: 0f851e000000 jne 0x152 +;; 134: 488b4810 mov rcx, qword ptr [rax + 0x10] +;; 138: 8b3c24 mov edi, dword ptr [rsp] +;; 13b: ffd1 call rcx +;; 13d: 4883c408 add rsp, 8 +;; 141: 59 pop rcx +;; 142: 01c1 add ecx, eax +;; 144: 89c8 mov eax, ecx +;; 146: 4883c410 add rsp, 0x10 +;; 14a: 5d pop rbp +;; 14b: c3 ret +;; 14c: 0f0b ud2 +;; 14e: 0f0b ud2 +;; 150: 0f0b ud2 +;; 152: 0f0b ud2 diff --git a/winch/filetests/src/lib.rs b/winch/filetests/src/lib.rs index b1f5daa86c6d..6b435b701a8f 100644 --- a/winch/filetests/src/lib.rs +++ b/winch/filetests/src/lib.rs @@ -12,7 +12,7 @@ mod test { use wasmtime_environ::ModuleTranslation; use wasmtime_environ::{ wasmparser::{Parser as WasmParser, Validator}, - DefinedFuncIndex, FunctionBodyData, ModuleEnvironment, Tunables, TypeConvert, + DefinedFuncIndex, FunctionBodyData, ModuleEnvironment, ModuleTypes, Tunables, TypeConvert, }; use winch_codegen::{lookup, TargetIsa}; use winch_test_macros::generate_file_tests; @@ -108,13 +108,13 @@ mod test { .translate(parser, &wasm) .context("Failed to translate WebAssembly module") .unwrap(); - let _ = types.finish(); + let types = types.finish(); let body_inputs = std::mem::take(&mut translation.function_body_inputs); let binding = body_inputs .into_iter() - .map(|func| compile(&isa, &translation, func).join("\n")) + .map(|func| compile(&isa, &types, &translation, func).join("\n")) .collect::>() .join("\n\n"); let actual = binding.as_str(); @@ -147,6 +147,7 @@ mod test { fn compile( isa: &Box, + module_types: &ModuleTypes, translation: &ModuleTranslation, f: (DefinedFuncIndex, FunctionBodyData<'_>), ) -> Vec { @@ -160,7 +161,7 @@ mod test { let FunctionBodyData { body, validator } = f.1; let mut validator = validator.into_validator(Default::default()); let buffer = isa - .compile_function(&sig, &body, &translation, &mut validator) + .compile_function(&sig, module_types, &body, &translation, &mut validator) .expect("Couldn't compile function"); disasm(buffer.data(), isa).unwrap() diff --git a/winch/src/compile.rs b/winch/src/compile.rs index 723f3a6ac4fd..2d8b929022d6 100644 --- a/winch/src/compile.rs +++ b/winch/src/compile.rs @@ -5,8 +5,8 @@ use std::{fs, path::PathBuf, str::FromStr}; use target_lexicon::Triple; use wasmtime_environ::{ wasmparser::{Parser as WasmParser, Validator}, - DefinedFuncIndex, FunctionBodyData, ModuleEnvironment, ModuleTranslation, Tunables, - TypeConvert, + DefinedFuncIndex, FunctionBodyData, ModuleEnvironment, ModuleTranslation, ModuleTypes, + Tunables, TypeConvert, }; use winch_codegen::{lookup, TargetIsa}; use winch_filetests::disasm::disasm; @@ -36,12 +36,12 @@ pub fn run(opt: &Options) -> Result<()> { let mut translation = ModuleEnvironment::new(&tunables, &mut validator, &mut types) .translate(parser, &bytes) .context("Failed to translate WebAssembly module")?; - let _ = types.finish(); + let types = types.finish(); let body_inputs = std::mem::take(&mut translation.function_body_inputs); body_inputs .into_iter() - .try_for_each(|func| compile(&isa, &translation, func))?; + .try_for_each(|func| compile(&isa, &translation, &types, func))?; Ok(()) } @@ -49,6 +49,7 @@ pub fn run(opt: &Options) -> Result<()> { fn compile( isa: &Box, translation: &ModuleTranslation, + module_types: &ModuleTypes, f: (DefinedFuncIndex, FunctionBodyData<'_>), ) -> Result<()> { let index = translation.module.func_index(f.0); @@ -58,7 +59,7 @@ fn compile( let FunctionBodyData { body, validator } = f.1; let mut validator = validator.into_validator(Default::default()); let buffer = isa - .compile_function(&sig, &body, &translation, &mut validator) + .compile_function(&sig, module_types, &body, &translation, &mut validator) .expect("Couldn't compile function"); println!("Disassembly for function: {}", index.as_u32());