From d5fa7e75720180185532ce0b65900a11fceef38b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C3=BAl=20Cabrera?= Date: Thu, 12 Oct 2023 12:47:30 -0400 Subject: [PATCH 1/4] winch: Add known a subset of known libcalls and improve call emission This change is a follow up to: - https://github.com/bytecodealliance/wasmtime/pull/7155 - https://github.com/bytecodealliance/wasmtime/pull/7035 One of the objectives of this change is to make it easy to emit function calls at the MacroAssembler layer, for cases in which it's challenging to know ahead-of-time if a particular functionality can be achieved natively (e.g. rounding and SSE4.2). The original implementation of function call emission, made this objective difficult to achieve and it was also difficult to reason about. I decided to simplify the overall approach to function calls as part of this PR; in essence, the `call` module now exposes a single function `FnCall::emit` which is reponsible of gathtering the dependencies and orchestrating the emission of the call. This new approach deliberately avoids holding any state regarding the function call for simplicity. This change also standardizes the usage of `Callee` as the main entrypoint for function call emission, as of this change 4 `Callee` types exist (`Local`, `Builtin`, `Import`, `FuncRef`), each callee kind is mappable to a `CalleeKind` which is the materialized version of a callee which Cranelift understands. This change also moves the creation of the `BuiltinFunctions` to the `ISA` level given that they can be safely used accross multiple function compilations. Finally, this change also introduces support for some of the "well-known" libcalls and hooks those libcalls at the `MacroAssembler::float_round` callsite. -- prtest:full --- crates/winch/src/compiler.rs | 55 +- winch/codegen/src/abi/mod.rs | 26 +- winch/codegen/src/codegen/builtin.rs | 198 +++++- winch/codegen/src/codegen/call.rs | 594 +++++++++--------- winch/codegen/src/codegen/context.rs | 67 +- winch/codegen/src/codegen/env.rs | 63 +- winch/codegen/src/codegen/mod.rs | 223 +++---- winch/codegen/src/frame/mod.rs | 21 +- winch/codegen/src/isa/aarch64/masm.rs | 7 +- winch/codegen/src/isa/aarch64/mod.rs | 17 +- winch/codegen/src/isa/mod.rs | 5 +- winch/codegen/src/isa/x64/asm.rs | 18 +- winch/codegen/src/isa/x64/masm.rs | 40 +- winch/codegen/src/isa/x64/mod.rs | 13 +- winch/codegen/src/lib.rs | 2 +- winch/codegen/src/masm.rs | 22 +- winch/codegen/src/stack.rs | 13 - winch/codegen/src/visitor.rs | 298 ++++----- .../x64/call_indirect/call_indirect.wat | 187 +++--- .../filetests/x64/f32_ceil/f32_ceil_param.wat | 18 + .../x64/f32_floor/f32_floor_param.wat | 18 + .../x64/f32_nearest/f32_nearest_param.wat | 18 + .../x64/f32_trunc/f32_trunc_param.wat | 18 + .../filetests/x64/f64_ceil/f64_ceil_param.wat | 18 + .../x64/f64_floor/f64_floor_param.wat | 18 + .../x64/f64_nearest/f64_nearest_param.wat | 18 + .../x64/f64_trunc/f64_trunc_param.wat | 18 + winch/filetests/filetests/x64/table/fill.wat | 83 ++- winch/filetests/filetests/x64/table/get.wat | 48 +- winch/filetests/filetests/x64/table/grow.wat | 27 +- .../filetests/x64/table/init_copy_drop.wat | 250 ++++---- winch/filetests/filetests/x64/table/set.wat | 94 ++- winch/filetests/src/lib.rs | 14 +- winch/src/compile.rs | 15 +- 34 files changed, 1427 insertions(+), 1117 deletions(-) create mode 100644 winch/filetests/filetests/x64/f32_ceil/f32_ceil_param.wat create mode 100644 winch/filetests/filetests/x64/f32_floor/f32_floor_param.wat create mode 100644 winch/filetests/filetests/x64/f32_nearest/f32_nearest_param.wat create mode 100644 winch/filetests/filetests/x64/f32_trunc/f32_trunc_param.wat create mode 100644 winch/filetests/filetests/x64/f64_ceil/f64_ceil_param.wat create mode 100644 winch/filetests/filetests/x64/f64_floor/f64_floor_param.wat create mode 100644 winch/filetests/filetests/x64/f64_nearest/f64_nearest_param.wat create mode 100644 winch/filetests/filetests/x64/f64_trunc/f64_trunc_param.wat diff --git a/crates/winch/src/compiler.rs b/crates/winch/src/compiler.rs index 37ff419335e6..fe2c45236e04 100644 --- a/crates/winch/src/compiler.rs +++ b/crates/winch/src/compiler.rs @@ -1,18 +1,29 @@ use anyhow::Result; use object::write::{Object, SymbolId}; use std::any::Any; +use std::mem; use std::sync::Mutex; use wasmparser::FuncValidatorAllocations; use wasmtime_cranelift_shared::{CompiledFunction, ModuleTextBuilder}; use wasmtime_environ::{ CompileError, DefinedFuncIndex, FilePos, FuncIndex, FunctionBodyData, FunctionLoc, - ModuleTranslation, ModuleTypes, PrimaryMap, TrapEncodingBuilder, WasmFunctionInfo, + ModuleTranslation, ModuleTypes, PrimaryMap, TrapEncodingBuilder, VMOffsets, WasmFunctionInfo, }; -use winch_codegen::{TargetIsa, TrampolineKind}; +use winch_codegen::{BuiltinFunctions, TargetIsa, TrampolineKind}; + +/// Function compilation context. +/// This struct holds information that can be shared globally across +/// all function compilations. +struct CompilationContext { + /// Validator allocations. + allocations: FuncValidatorAllocations, + /// Builtin functions available to JIT code. + builtins: BuiltinFunctions, +} pub(crate) struct Compiler { isa: Box, - allocations: Mutex>, + contexts: Mutex>, } /// The compiled function environment. @@ -30,20 +41,26 @@ impl Compiler { pub fn new(isa: Box) -> Self { Self { isa, - allocations: Mutex::new(Vec::new()), + contexts: Mutex::new(Vec::new()), } } - fn take_allocations(&self) -> FuncValidatorAllocations { - self.allocations - .lock() - .unwrap() - .pop() - .unwrap_or_else(Default::default) + /// Get a compilation context or create a new one if none available. + fn get_context(&self, translation: &ModuleTranslation) -> CompilationContext { + self.contexts.lock().unwrap().pop().unwrap_or_else(|| { + let pointer_size = self.isa.pointer_bytes(); + let vmoffsets = VMOffsets::new(pointer_size, &translation.module); + CompilationContext { + allocations: Default::default(), + builtins: BuiltinFunctions::new(&vmoffsets, self.isa.wasmtime_call_conv()), + } + }) } - fn save_allocations(&self, allocs: FuncValidatorAllocations) { - self.allocations.lock().unwrap().push(allocs) + /// Save a compilation context. + fn save_context(&self, mut context: CompilationContext, allocs: FuncValidatorAllocations) { + context.allocations = allocs; + self.contexts.lock().unwrap().push(context); } } @@ -65,12 +82,20 @@ impl wasmtime_environ::Compiler for Compiler { .try_into() .unwrap(), ); - let mut validator = validator.into_validator(self.take_allocations()); + let mut context = self.get_context(translation); + let mut validator = validator.into_validator(mem::take(&mut context.allocations)); let buffer = self .isa - .compile_function(ty, types, &body, &translation, &mut validator) + .compile_function( + ty, + &body, + translation, + types, + &mut context.builtins, + &mut validator, + ) .map_err(|e| CompileError::Codegen(format!("{e:?}"))); - self.save_allocations(validator.into_allocations()); + self.save_context(context, validator.into_allocations()); let buffer = buffer?; let compiled_function = CompiledFunction::new(buffer, CompiledFuncEnv {}, self.isa.function_alignment()); diff --git a/winch/codegen/src/abi/mod.rs b/winch/codegen/src/abi/mod.rs index a92bf6bebe47..d5ec3ec84924 100644 --- a/winch/codegen/src/abi/mod.rs +++ b/winch/codegen/src/abi/mod.rs @@ -46,6 +46,7 @@ use crate::isa::{reg::Reg, CallingConvention}; use crate::masm::OperandSize; use smallvec::SmallVec; +use std::collections::HashSet; use std::ops::{Add, BitAnd, Not, Sub}; use wasmtime_environ::{WasmFuncType, WasmHeapType, WasmType}; @@ -237,29 +238,30 @@ pub(crate) struct ABISig { pub result: ABIResult, /// Stack space needed for stack arguments. pub stack_bytes: u32, + /// All the registers used in the [`ABISig`]. + /// Note that this collection is guaranteed to + /// be unique: in some cases some registers might + /// be used as params as a well as returns (e.g. xmm0 in x64). + pub regs: HashSet, } impl ABISig { /// Create a new ABI signature. pub fn new(params: ABIParams, result: ABIResult, stack_bytes: u32) -> Self { + let regs = params + .iter() + .filter_map(|r| r.get_reg()) + .collect::>(); + let result_regs = result.regs(); + let chained = regs.into_iter().chain(result_regs); + Self { params, result, stack_bytes, + regs: HashSet::from_iter(chained), } } - - /// Returns an iterator over all the registers used as params. - pub fn param_regs(&self) -> impl Iterator + '_ { - self.params.iter().filter_map(|r| r.get_reg()) - } - - /// Returns an iterator over all the registers used in the signature. - pub fn regs(&self) -> impl Iterator + '_ { - let params_iter = self.param_regs(); - let result_iter = self.result.regs(); - params_iter.chain(result_iter) - } } /// Returns the size in bytes of a given WebAssembly type. diff --git a/winch/codegen/src/codegen/builtin.rs b/winch/codegen/src/codegen/builtin.rs index 9a5fe34a34d3..9824a9dbd224 100644 --- a/winch/codegen/src/codegen/builtin.rs +++ b/winch/codegen/src/codegen/builtin.rs @@ -5,16 +5,57 @@ use crate::{ codegen::env::ptr_type_from_ptr_size, CallingConvention, }; -use wasmtime_environ::{BuiltinFunctionIndex, PtrSize, WasmType}; +use cranelift_codegen::ir::LibCall; +use std::sync::Arc; +use wasmtime_environ::{BuiltinFunctionIndex, PtrSize, VMOffsets, WasmType}; + +#[derive(Copy, Clone)] +pub(crate) enum BuiltinType { + /// Dynamic built-in function, derived from the VMContext. + Dynamic { + /// The offset of the built-in function. + offset: u32, + /// The built-in function base, relative to the VMContext. + base: u32, + }, + /// A known libcall. + /// See [`cranelift_codegen::ir::LibCall`] for more details. + Known(LibCall), +} + +impl BuiltinType { + /// Create a new dynamic built-in function type. + pub fn dynamic(offset: u32, base: u32) -> Self { + Self::Dynamic { offset, base } + } + + /// Create a new known built-in function type. + pub fn known(libcall: LibCall) -> Self { + Self::Known(libcall) + } +} + +#[derive(Clone)] +pub struct BuiltinFunction { + inner: Arc, +} + +impl BuiltinFunction { + pub(crate) fn sig(&self) -> &ABISig { + &self.inner.sig + } + + pub(crate) fn ty(&self) -> BuiltinType { + self.inner.ty + } +} /// Metadata about a builtin function. -pub(crate) struct BuiltinFunction { +pub struct BuiltinFunctionInner { /// The ABI specific signature of the function. - pub sig: ABISig, - /// The offset of the builtin function - pub offset: u32, - /// The builtin function base, relative to the VMContext. - pub base: u32, + sig: ABISig, + /// The built-in function type. + ty: BuiltinType, } macro_rules! declare_function_sig { @@ -35,6 +76,22 @@ macro_rules! declare_function_sig { ptr_type: WasmType, /// The builtin functions base relative to the VMContext. base: u32, + /// F32 Ceil. + ceil_f32: Option, + /// F64 Ceil. + ceil_f64: Option, + /// F32 Floor. + floor_f32: Option, + /// F64 Floor. + floor_f64: Option, + /// F32 Trunc. + trunc_f32: Option, + /// F64 Trunc. + trunc_f64: Option, + /// F32 Nearest. + nearest_f32: Option, + /// F64 Nearest. + nearest_f64: Option, $( $name: Option, )* @@ -43,13 +100,21 @@ macro_rules! declare_function_sig { // Until all the builtin functions are used. #[allow(dead_code)] impl BuiltinFunctions { - pub fn new(ptr: impl PtrSize, call_conv: CallingConvention, base: u32) -> Self { - let size = ptr.size(); + pub fn new(vmoffsets: &VMOffsets

, call_conv: CallingConvention) -> Self { + let size = vmoffsets.ptr.size(); Self { ptr_size: size, call_conv, - base, + base: vmoffsets.vmctx_builtin_functions(), ptr_type: ptr_type_from_ptr_size(size), + ceil_f32: None, + ceil_f64: None, + floor_f32: None, + floor_f64: None, + trunc_f32: None, + trunc_f64: None, + nearest_f32: None, + nearest_f64: None, $( $name: None, )* @@ -68,6 +133,14 @@ macro_rules! declare_function_sig { WasmType::I32 } + fn f32(&self) -> WasmType { + WasmType::F32 + } + + fn f64(&self) -> WasmType { + WasmType::F64 + } + fn i64(&self) -> WasmType { WasmType::I64 } @@ -76,21 +149,116 @@ macro_rules! declare_function_sig { self.pointer() } + fn over_f64(&self) -> ABISig { + A::sig_from(&[self.f64()], &[self.f64()], &self.call_conv) + } + + fn over_f32(&self) -> ABISig { + A::sig_from(&[self.f64()], &[self.f64()], &self.call_conv) + } + + pub(crate) fn ceil_f32(&mut self) -> BuiltinFunction { + if self.ceil_f32.is_none() { + let sig = self.over_f32::(); + let inner = Arc::new(BuiltinFunctionInner { sig, ty: BuiltinType::known(LibCall::CeilF32) }); + self.ceil_f32 = Some(BuiltinFunction { + inner, + }); + } + self.ceil_f32.as_ref().unwrap().clone() + } + + pub(crate) fn ceil_f64(&mut self) -> BuiltinFunction { + if self.ceil_f64.is_none() { + let sig = self.over_f64::(); + let inner = Arc::new(BuiltinFunctionInner { sig, ty: BuiltinType::known(LibCall::CeilF64) }); + self.ceil_f64 = Some(BuiltinFunction { + inner, + }); + } + self.ceil_f64.as_ref().unwrap().clone() + } + + pub(crate) fn floor_f32(&mut self) -> BuiltinFunction { + if self.floor_f32.is_none() { + let sig = self.over_f32::(); + let inner = Arc::new(BuiltinFunctionInner { sig, ty: BuiltinType::known(LibCall::FloorF32) }); + self.floor_f32 = Some(BuiltinFunction { + inner, + }); + } + self.floor_f32.as_ref().unwrap().clone() + } + + pub(crate) fn floor_f64(&mut self) -> BuiltinFunction { + if self.floor_f64.is_none() { + let sig = self.over_f64::(); + let inner = Arc::new(BuiltinFunctionInner { sig, ty: BuiltinType::known(LibCall::FloorF64) }); + self.floor_f64 = Some(BuiltinFunction { + inner, + }); + } + self.floor_f64.as_ref().unwrap().clone() + } + + pub(crate) fn trunc_f32(&mut self) -> BuiltinFunction { + if self.trunc_f32.is_none() { + let sig = self.over_f32::(); + let inner = Arc::new(BuiltinFunctionInner { sig, ty: BuiltinType::known(LibCall::TruncF32) }); + self.trunc_f32 = Some(BuiltinFunction { + inner, + }); + } + self.trunc_f32.as_ref().unwrap().clone() + } + + pub(crate) fn trunc_f64(&mut self) -> BuiltinFunction { + if self.trunc_f64.is_none() { + let sig = self.over_f64::(); + let inner = Arc::new(BuiltinFunctionInner { sig, ty: BuiltinType::known(LibCall::TruncF64) }); + self.trunc_f64 = Some(BuiltinFunction { + inner, + }); + } + self.trunc_f64.as_ref().unwrap().clone() + } + + pub(crate) fn nearest_f32(&mut self) -> BuiltinFunction { + if self.nearest_f32.is_none() { + let sig = self.over_f32::(); + let inner = Arc::new(BuiltinFunctionInner { sig, ty: BuiltinType::known(LibCall::NearestF32) }); + self.nearest_f32 = Some(BuiltinFunction { + inner, + }); + } + self.nearest_f32.as_ref().unwrap().clone() + } + + pub(crate) fn nearest_f64(&mut self) -> BuiltinFunction { + if self.nearest_f64.is_none() { + let sig = self.over_f64::(); + let inner = Arc::new(BuiltinFunctionInner { sig, ty: BuiltinType::known(LibCall::NearestF64) }); + self.nearest_f64 = Some(BuiltinFunction { + inner, + }); + } + self.nearest_f64.as_ref().unwrap().clone() + } + $( - pub(crate) fn $name(&mut self) -> &BuiltinFunction { + pub(crate) fn $name(&mut self) -> BuiltinFunction { if self.$name.is_none() { let params = vec![ $(self.$param() ),* ]; let result = vec![ $(self.$result() )?]; let sig = A::sig_from(¶ms, &result, &self.call_conv); let index = BuiltinFunctionIndex::$name(); + let inner = Arc::new(BuiltinFunctionInner { sig, ty: BuiltinType::dynamic(index.index() * (self.ptr_size as u32), self.base) }); self.$name = Some(BuiltinFunction { - sig, - offset: index.index() * (self.ptr_size as u32), - base: self.base, + inner, }); } - self.$name.as_ref().unwrap() + self.$name.as_ref().unwrap().clone() } )* } diff --git a/winch/codegen/src/codegen/call.rs b/winch/codegen/src/codegen/call.rs index 3f9bbba93c4d..d612c3391b8f 100644 --- a/winch/codegen/src/codegen/call.rs +++ b/winch/codegen/src/codegen/call.rs @@ -1,330 +1,277 @@ //! Function call emission. For more details around the ABI and //! calling convention, see [ABI]. +//! +//! This module exposes a single function [`FnCall::emit`], which is responsible +//! of orchestrating the emission of calls. In general such orchestration +//! takes place in 4 steps: +//! +//! 1. [`Callee`] resolution. +//! 2. Mapping of the [`Callee`] to the [`CalleeKind`]. +//! 3. Calculation of the stack space consumed by the call. +//! 4. Emission. +//! +//! The stack space consumed by the function call; that is, +//! the sum of: +//! +//! 1. The amount of stack space created by saving any live +//! registers at the callsite. +//! 2. The amount of space used by any memory entries in the value +//! stack present at the callsite, that will be used as +//! arguments for the function call. Any memory values in the +//! value stack that are needed as part of the function +//! arguments, will be consumed by the function call (either by +//! assigning those values to a register or by storing those +//! values to a memory location if the callee argument is on +//! the stack), so we track that stack space to reclaim it once +//! the function call has ended. This could also be done in +//! when assigning arguments everytime a memory entry needs to be assigned +//! to a particular location, but doing so, will incur in more +//! instructions (e.g. a pop per argument that needs to be +//! assigned); it's more efficient to track the space needed by +//! those memory values and reclaim it at once. +//! +//! The machine stack throghout the function call is as follows: +//! ┌──────────────────────────────────────────────────┐ +//! │ │ +//! │ 1 │ +//! │ Stack space created by any previous spills │ +//! │ from the value stack; and which memory values │ +//! │ are used as function arguments. │ +//! │ │ +//! ├──────────────────────────────────────────────────┤ ---> The Wasm value stack at this point in time would look like: +//! │ │ [ Reg | Reg | Mem(offset) | Mem(offset) ] +//! │ 2 │ +//! │ Stack space created by saving │ +//! │ any live registers at the callsite. │ +//! │ │ +//! │ │ +//! ├─────────────────────────────────────────────────┬┤ ---> The Wasm value stack at this point in time would look like: +//! │ │ [ Mem(offset) | Mem(offset) | Mem(offset) | Mem(offset) ] +//! │ │ Assuming that the callee takes 4 arguments, we calculate +//! │ │ 2 spilled registers + 2 memory values; all of which will be used +//! │ Stack space allocated for │ as arguments to the call via `assign_args`, thus the memory they represent is +//! │ the callee function arguments in the stack; │ is considered to be consumed by the call. +//! │ represented by `arg_stack_space` │ +//! │ │ +//! │ │ +//! │ │ +//! └──────────────────────────────────────────────────┘ ------> Stack pointer when emitting the call + use crate::{ - abi::{ABIArg, ABIResult, ABISig, ABI}, - codegen::{BuiltinFunction, CodeGenContext}, + abi::{ABIArg, ABISig, ABI}, + codegen::{ + ptr_type_from_ptr_size, BuiltinFunction, BuiltinType, Callee, CalleeInfo, CodeGenContext, + TypedReg, + }, masm::{CalleeKind, MacroAssembler, OperandSize}, reg::Reg, + CallingConvention, }; -use wasmtime_environ::FuncIndex; +use smallvec::SmallVec; +use wasmtime_environ::{PtrSize, VMOffsets, WasmType}; /// All the information needed to emit a function call. #[derive(Copy, Clone)] -pub(crate) struct FnCall<'a> { - /// The stack space consumed by the function call; that is, - /// the sum of: - /// - /// 1. The amount of stack space created by saving any live - /// registers at the callsite. - /// 2. The amount of space used by any memory entries in the value - /// stack present at the callsite, that will be used as - /// arguments for the function call. Any memory values in the - /// value stack that are needed as part of the function - /// arguments, will be consumed by the function call (either by - /// assigning those values to a register or by storing those - /// values to a memory location if the callee argument is on - /// the stack), so we track that stack space to reclaim it once - /// the function call has ended. This could also be done in - /// `assign_args` everytime a memory entry needs to be assigned - /// to a particular location, but doing so, will incur in more - /// instructions (e.g. a pop per argument that needs to be - /// assigned); it's more efficient to track the space needed by - /// those memory values and reclaim it at once. - /// - /// The machine stack throghout the function call is as follows: - /// ┌──────────────────────────────────────────────────┐ - /// │ │ - /// │ 1 │ - /// │ Stack space created by any previous spills │ - /// │ from the value stack; and which memory values │ - /// │ are used as function arguments. │ - /// │ │ - /// ├──────────────────────────────────────────────────┤ ---> The Wasm value stack at this point in time would look like: - /// │ │ [ Reg | Reg | Mem(offset) | Mem(offset) ] - /// │ 2 │ - /// │ Stack space created by saving │ - /// │ any live registers at the callsite. │ - /// │ │ - /// │ │ - /// ├─────────────────────────────────────────────────┬┤ ---> The Wasm value stack at this point in time would look like: - /// │ │ [ Mem(offset) | Mem(offset) | Mem(offset) | Mem(offset) ] - /// │ │ Assuming that the callee takes 4 arguments, we calculate - /// │ │ 2 spilled registers + 2 memory values; all of which will be used - /// │ Stack space allocated for │ as arguments to the call via `assign_args`, thus the memory they represent is - /// │ the callee function arguments in the stack; │ is considered to be consumed by the call. - /// │ represented by `arg_stack_space` │ - /// │ │ - /// │ │ - /// │ │ - /// └──────────────────────────────────────────────────┘ ------> Stack pointer when emitting the call - /// - call_stack_space: Option, - /// The total stack space needed for the callee arguments on the - /// stack, including any adjustments to the function's frame and - /// aligned to to the required ABI alignment. - arg_stack_space: u32, - /// The ABI-specific signature of the callee. - pub abi_sig: &'a ABISig, - /// Whether this a built-in function call. - lib: bool, +pub(crate) struct FnCall {} + +/// Internal wrapping of a function signature. +enum Sig<'a> { + Owned(ABISig), + Borrowed(&'a ABISig), } -impl<'a> FnCall<'a> { - /// Creates a new [`FnCall`] from the callee's [`ABISig`]. - pub fn new(callee_sig: &'a ABISig) -> Self { - Self { - abi_sig: &callee_sig, - arg_stack_space: callee_sig.stack_bytes, - call_stack_space: None, - lib: false, +impl<'a> Sig<'a> { + /// Get a reference to the underling signature. + pub fn as_ref(&self) -> &ABISig { + match self { + Self::Owned(ref s) => s, + Self::Borrowed(b) => b, } } +} - /// Saves any live registers and records the stack space that will be - /// consumed by the function call. The stack space consumed by the call must - /// be known before emitting the call via any of the emission variants: - /// [`FnCall::direct`], [`FnCall::indirect`] or [`FnCall::addr`], which - /// means that the call stack space must be calculated either by invoking - /// [`FnCall::save_live_registers`] or - /// [`FnCall::calculate_call_stack_space`] before invoking any of - /// the emission variants. - pub fn save_live_registers( - &mut self, - context: &mut CodeGenContext, +impl FnCall { + /// Orchestrates the emission of a function call: + /// 1. Resolves the [`Callee`] through the given callback. + /// 2. Maps the resolved [`Callee`] to the [`CalleeKind`]. + /// 3. Saves any live registers and calculates the stack space consumed + /// by the function call. + /// 4. Emits the call. + pub fn emit( masm: &mut M, - ) -> &mut Self { - // Invariant: ensure that `call_stack_space` is only set once: either by - // [`FnCall::save_live_registers`] or - // [`FnCall::calculate_call_stack_space`] - debug_assert!(self.call_stack_space.is_none()); - let callee_params = &self.abi_sig.params; - let stack = &context.stack; - let call_stack_space = match callee_params.len() { - 0 => { - let _ = context.save_live_registers_and_calculate_sizeof(masm, ..); - 0u32 - } - _ => { - // Here we perform a "spill" of the register entries - // in the Wasm value stack, we also count any memory - // values that will be used used as part of the callee - // arguments. Saving the live registers is done by - // emitting push operations for every `Reg` entry in - // the Wasm value stack. We do this to be compliant - // with Winch's internal ABI, in which all registers - // are treated as caller-saved. For more details, see - // [ABI]. - // - // The next few lines, partition the value stack into - // two sections: - // +------------------+--+--- (Stack top) - // | | | - // | | | 1. The top `n` elements, which are used for - // | | | function arguments; for which we save any - // | | | live registers, keeping track of the amount of registers - // +------------------+ | saved plus the amount of memory values consumed by the function call; - // | | | with this information we can later reclaim the space used by the function call. - // | | | - // +------------------+--+--- - // | | | 2. The rest of the items in the stack, for which - // | | | we only save any live registers. - // | | | - // +------------------+ | - assert!(stack.len() >= callee_params.len()); - let partition = stack.len() - callee_params.len(); - let _ = context.save_live_registers_and_calculate_sizeof(masm, 0..partition); - context.save_live_registers_and_calculate_sizeof(masm, partition..) - } - }; + context: &mut CodeGenContext, + mut resolve: R, + ) where + R: FnMut(&mut CodeGenContext) -> Callee, + { + let callee = resolve(context); + let ptr_type = ptr_type_from_ptr_size(context.vmoffsets.ptr.size()); + let sig = Self::get_sig::(&callee, ptr_type); + let sig = sig.as_ref(); - self.call_stack_space = Some(call_stack_space); - self - } + let arg_stack_space = sig.stack_bytes; + let kind = Self::map(&context.vmoffsets, &callee, sig, context, masm); + let call_stack_space = Self::save(context, masm, &sig); - /// Records the stack space that will be needeed by the function call by - /// scanning the value stack and returning the size of the all the memory - /// entries present in callee's argument length range. The stack space - /// consumed by the call must be known before emitting the call via any of - /// the emission variants: [`FnCall::direct`], [`FnCall::indirect`] or - /// [`FnCall::addr`], which means that the call stack space must be - /// calculated either by invoking [`FnCall::save_live_registers`] or - /// [`FnCall::calculate_call_stack_space`] before invoking any of - /// the emission variants. - /// This function is particularly useful when there's no need to save any - /// live registers before emitting the function call. This could happen when - /// emitting calls to libcalls: [`FnCall::with_lib`] will eagerly save all - /// the live registers when invoked and will also ensure that any registers - /// allocated after are non argument registers, in which case if any of - /// those registers need to go on the value stack to be used as function - /// arguments, they don't need to be saved. - pub fn calculate_call_stack_space(&mut self, context: &mut CodeGenContext) -> &mut Self { - // Invariant: ensure that `call_stack_space` is only set once: either by - // [`FnCall::save_live_registers`] or - // [`FnCall::calculate_call_stack_space`] - debug_assert!(self.call_stack_space.is_none()); - let params_len = self.abi_sig.params.len(); - assert!(context.stack.len() >= params_len); + let reserved_stack = masm.call(arg_stack_space, |masm| { + let scratch = ::scratch_reg(); + Self::assign(sig, context, masm, scratch); + kind + }); - let stack_len = context.stack.len(); - let call_stack_space = if params_len == 0 { - 0 - } else { - context.stack.sizeof((stack_len - params_len)..) - }; - self.call_stack_space = Some(call_stack_space); - self + match kind { + CalleeKind::Indirect(r) => context.free_reg(r), + _ => {} + } + Self::cleanup( + sig, + call_stack_space.checked_add(reserved_stack).unwrap(), + masm, + context, + ); } - /// Emit a direct function call, to a locally defined function. - pub fn direct( - self, - masm: &mut M, - context: &mut CodeGenContext, - callee: FuncIndex, - ) { - // Invariant: `call_stack_space` must be known. - debug_assert!(self.call_stack_space.is_some()); - let reserved_stack = masm.call(self.arg_stack_space, |masm| { - self.assign_args(context, masm, ::scratch_reg()); - CalleeKind::direct(callee.as_u32()) - }); - self.post_call::(masm, context, reserved_stack); + /// Derive the [`ABISig`] for a particulare [`Callee]. + fn get_sig(callee: &Callee, ptr_type: WasmType) -> Sig { + match callee { + Callee::Builtin(info) => Sig::Borrowed(info.sig()), + Callee::Import(info) => { + let mut params: SmallVec<[WasmType; 6]> = + SmallVec::with_capacity(info.ty.params().len() + 2); + params.extend_from_slice(&[ptr_type, ptr_type]); + params.extend_from_slice(info.ty.params()); + Sig::Owned(::sig_from( + ¶ms, + info.ty.returns(), + &CallingConvention::Default, + )) + } + Callee::Local(info) => { + Sig::Owned(::sig(&info.ty, &CallingConvention::Default)) + } + Callee::FuncRef(ty) => { + Sig::Owned(::sig(&ty, &CallingConvention::Default)) + } + } } - /// Emit an indirect function call, using a register. - pub fn reg(self, masm: &mut M, context: &mut CodeGenContext, reg: Reg) { - // Invariant: `call_stack_space` must be known. - debug_assert!(self.call_stack_space.is_some()); - let reserved_stack = masm.call(self.arg_stack_space, |masm| { - let scratch = ::scratch_reg(); - self.assign_args(context, masm, scratch); - CalleeKind::indirect(reg) - }); - context.free_reg(reg); - self.post_call::(masm, context, reserved_stack); + /// Maps the given [`Callee`] to a [`CalleeKind`]. + fn map( + vmoffsets: &VMOffsets

, + callee: &Callee, + sig: &ABISig, + context: &mut CodeGenContext, + masm: &mut M, + ) -> CalleeKind { + match callee { + Callee::Builtin(b) => Self::load_builtin(b, context, masm), + Callee::FuncRef(_) => Self::load_funcref(sig, vmoffsets.ptr.size(), context, masm), + Callee::Local(i) => Self::map_local(i), + Callee::Import(i) => Self::load_import(i, sig, context, masm, vmoffsets), + } } - /// Emit an indirect function call, using a an address. - /// This function will load the provided address into a unallocatable - /// scratch register. - pub fn addr( - self, - masm: &mut M, + /// Load a built-in function to the next available register. + fn load_builtin( + builtin: &BuiltinFunction, context: &mut CodeGenContext, - callee: M::Address, - ) { - // Invariant: `call_stack_space` must be known. - debug_assert!(self.call_stack_space.is_some()); - let reserved_stack = masm.call(self.arg_stack_space, |masm| { - let scratch = ::scratch_reg(); - self.assign_args(context, masm, scratch); - masm.load(callee, scratch, OperandSize::S64); - CalleeKind::indirect(scratch) - }); + masm: &mut M, + ) -> CalleeKind { + match builtin.ty() { + BuiltinType::Dynamic { offset, base } => { + let sig = builtin.sig(); + let callee = context.without::(&sig.regs, masm, |cx, masm| { + let scratch = ::scratch_reg(); + let builtins_base = masm.address_at_vmctx(base); + masm.load_ptr(builtins_base, scratch); + let addr = masm.address_at_reg(scratch, offset); + let callee = cx.any_gpr(masm); + masm.load_ptr(addr, callee); + callee + }); + CalleeKind::indirect(callee) + } + BuiltinType::Known(c) => CalleeKind::known(c), + } + } - self.post_call::(masm, context, reserved_stack); + /// Map a local function to a [`CalleeKind`]. + fn map_local(info: &CalleeInfo) -> CalleeKind { + CalleeKind::direct(info.index.as_u32()) } - /// Prepares the compiler to call a built-in function (libcall). - /// This fuction, saves all the live registers and loads the callee - /// address into a non-argument register which is then passed to the - /// caller through the provided callback. - /// - /// It is the caller's responsibility to finalize the function call - /// by calling `FnCall::reg` once all the information is known. - pub fn with_lib( - &mut self, - masm: &mut M, + /// Loads a function import to the next available register. + fn load_import( + info: &CalleeInfo, + sig: &ABISig, context: &mut CodeGenContext, - func: &BuiltinFunction, - mut f: F, - ) where - F: FnMut(&mut CodeGenContext, &mut M, &mut Self, Reg), - { - self.lib = true; - // When dealing with libcalls, we don't have all the information - // upfront (all necessary arguments in the stack) in order to optimize - // saving the live registers, so we save all the values available in - // the value stack. - context.spill(masm); - let vmctx = ::vmctx_reg(); - let scratch = ::scratch_reg(); + masm: &mut M, + vmoffsets: &VMOffsets

, + ) -> CalleeKind { + let ptr_type = ptr_type_from_ptr_size(vmoffsets.ptr.size()); + let caller_vmctx = ::vmctx_reg(); + let (callee, callee_vmctx) = + context.without::<(Reg, Reg), M, _>(&sig.regs, masm, |context, masm| { + (context.any_gpr(masm), context.any_gpr(masm)) + }); + let callee_vmctx_offset = vmoffsets.vmctx_vmfunction_import_vmctx(info.index); + let callee_vmctx_addr = masm.address_at_vmctx(callee_vmctx_offset); + masm.load_ptr(callee_vmctx_addr, callee_vmctx); - let builtins_base = masm.address_at_reg(vmctx, func.base); - masm.load(builtins_base, scratch, OperandSize::S64); - let builtin_func_addr = masm.address_at_reg(scratch, func.offset); - context.without::<(), M, _>( - // Do not free the result registers if any as the function call will - // push them onto the stack as a result of the call. - self.abi_sig.regs(), - self.abi_sig.param_regs(), - masm, - |cx, masm| { - let callee = cx.any_gpr(masm); - masm.load_ptr(builtin_func_addr, callee); - f(cx, masm, self, callee); - cx.free_reg(callee); - }, - ); + let callee_body_offset = vmoffsets.vmctx_vmfunction_import_wasm_call(info.index); + let callee_addr = masm.address_at_vmctx(callee_body_offset); + masm.load_ptr(callee_addr, callee); + + // Put the callee / caller vmctx at the start of the + // range of the stack so that they are used as first + // and second arguments. + let stack = &mut context.stack; + let location = stack.len() - (sig.params.len() - 2); + let values = [ + TypedReg::new(ptr_type, callee_vmctx).into(), + TypedReg::new(ptr_type, caller_vmctx).into(), + ] + .into_iter(); + context.stack.insert_many(location, values); + + CalleeKind::indirect(callee) } - fn post_call(&self, masm: &mut M, context: &mut CodeGenContext, size: u32) { - masm.free_stack(self.call_stack_space.unwrap() + size); - // Only account for registers given that any memory entries - // consumed by the call (assigned to a register or to a stack - // slot) were freed by the previous call to - // `masm.free_stack`, so we only care about dropping them - // here. - // - // NOTE / TODO there's probably a path to getting rid of - // `save_live_registers_and_calculate_sizeof` and - // `call_stack_space`, making it a bit more obvious what's - // happening here. We could: - // - // * Modify the `spill` implementation so that it takes a - // filtering callback, to control which values the caller is - // interested in saving (e.g. save all if no function is provided) - // * Rely on the new implementation of `drop_last` to calcuate - // the stack memory entries consumed by the call and then free - // the calculated stack space. - context.drop_last(self.abi_sig.params.len(), |regalloc, v| { - if v.is_reg() { - regalloc.free(v.get_reg().into()); - } + /// Loads a function reference to the next available register. + fn load_funcref( + sig: &ABISig, + ptr: impl PtrSize, + context: &mut CodeGenContext, + masm: &mut M, + ) -> CalleeKind { + // Pop the funcref pointer to a register and allocate a register to hold the + // address of the funcref. Since the callee is not addressed from a global non + // allocatable register (like the vmctx in the case of an import), we load the + // funcref to a register ensuring that it doesn't get assigned to a non-arg + // register. + let (funcref_ptr, funcref) = context.without::<_, M, _>(&sig.regs, masm, |cx, masm| { + (cx.pop_to_reg(masm, None).into(), cx.any_gpr(masm)) }); - // When emitting built-calls we ensure that none of the registers - // (params and results) used as part of the ABI signature are - // allocatable throughout the lifetime of the `with_lib` callback, since - // such registers will be used to assign arguments and hold results. - // After executing the callback, it's only safe to free the param - // registers, since depending on the signature, the caller - // will push any result registers to the stack, keeping those registers allocated. - // Here we ensure that any allocated result registers are correctly - // freed before finalizing the function call and pushing any results to - // the value stack. - if self.lib { - match self.abi_sig.result { - ABIResult::Reg { reg, .. } => { - assert!(!context.regalloc.reg_available(reg)); - context.free_reg(reg); - } - _ => {} - } - } - context.push_abi_results(&self.abi_sig.result, masm); + masm.load_ptr( + masm.address_at_reg(funcref_ptr, ptr.vm_func_ref_wasm_call().into()), + funcref, + ); + context.free_reg(funcref_ptr); + CalleeKind::indirect(funcref) } - fn assign_args( - &self, + /// Assign arguments for the function call. + fn assign( + sig: &ABISig, context: &mut CodeGenContext, masm: &mut M, scratch: Reg, ) { - let arg_count = self.abi_sig.params.len(); + let arg_count = sig.params.len(); let stack = &context.stack; let mut stack_values = stack.peekn(arg_count); - for arg in &self.abi_sig.params { + for arg in &sig.params { let val = stack_values .next() .unwrap_or_else(|| panic!("expected stack value for function argument")); @@ -341,4 +288,81 @@ impl<'a> FnCall<'a> { } } } + + /// Save any live registers prior to emitting the call. + // + // Here we perform a "spill" of the register entries + // in the Wasm value stack, we also count any memory + // values that will be used used as part of the callee + // arguments. Saving the live registers is done by + // emitting push operations for every `Reg` entry in + // the Wasm value stack. We do this to be compliant + // with Winch's internal ABI, in which all registers + // are treated as caller-saved. For more details, see + // [ABI]. + // + // The next few lines, partition the value stack into + // two sections: + // +------------------+--+--- (Stack top) + // | | | + // | | | 1. The top `n` elements, which are used for + // | | | function arguments; for which we save any + // | | | live registers, keeping track of the amount of registers + // +------------------+ | saved plus the amount of memory values consumed by the function call; + // | | | with this information we can later reclaim the space used by the function call. + // | | | + // +------------------+--+--- + // | | | 2. The rest of the items in the stack, for which + // | | | we only save any live registers. + // | | | + // +------------------+ | + fn save(context: &mut CodeGenContext, masm: &mut M, sig: &ABISig) -> u32 { + let callee_params = &sig.params; + let stack = &context.stack; + match callee_params.len() { + 0 => { + let _ = context.save_live_registers_and_calculate_sizeof(masm, ..); + 0u32 + } + _ => { + assert!(stack.len() >= callee_params.len()); + let partition = stack.len() - callee_params.len(); + let _ = context.save_live_registers_and_calculate_sizeof(masm, 0..partition); + context.save_live_registers_and_calculate_sizeof(masm, partition..) + } + } + } + + /// Cleanup stack space and free registers after emitting the call. + fn cleanup( + sig: &ABISig, + total_space: u32, + masm: &mut M, + context: &mut CodeGenContext, + ) { + masm.free_stack(total_space); + // Only account for registers given that any memory entries + // consumed by the call (assigned to a register or to a stack + // slot) were freed by the previous call to + // `masm.free_stack`, so we only care about dropping them + // here. + // + // NOTE / TODO there's probably a path to getting rid of + // `save_live_registers_and_calculate_sizeof` and + // `call_stack_space`, making it a bit more obvious what's + // happening here. We could: + // + // * Modify the `spill` implementation so that it takes a + // filtering callback, to control which values the caller is + // interested in saving (e.g. save all if no function is provided) + // * Rely on the new implementation of `drop_last` to calcuate + // the stack memory entries consumed by the call and then free + // the calculated stack space. + context.drop_last(sig.params.len(), |regalloc, v| { + if v.is_reg() { + regalloc.free(v.get_reg().into()); + } + }); + context.push_abi_results(&sig.result, masm); + } } diff --git a/winch/codegen/src/codegen/context.rs b/winch/codegen/src/codegen/context.rs index d2821aa3ec54..aeeb8cb42ff6 100644 --- a/winch/codegen/src/codegen/context.rs +++ b/winch/codegen/src/codegen/context.rs @@ -1,8 +1,9 @@ -use wasmtime_environ::{WasmHeapType, WasmType}; +use wasmtime_environ::{VMOffsets, WasmHeapType, WasmType}; use super::ControlStackFrame; use crate::{ abi::{ABIResult, ABI}, + codegen::BuiltinFunctions, frame::Frame, isa::reg::RegClass, masm::{MacroAssembler, OperandSize, RegImm}, @@ -27,25 +28,37 @@ use std::ops::RangeBounds; /// generation process. The code generation context should /// be generally used as the single entry point to access /// the compound functionality provided by its elements. -pub(crate) struct CodeGenContext<'a> { +pub(crate) struct CodeGenContext<'a, 'b: 'a> { /// The register allocator. pub regalloc: RegAlloc, /// The value stack. pub stack: Stack, /// The current function's frame. - pub frame: &'a Frame, + pub frame: Frame, /// Reachability state. pub reachable: bool, + /// The built-in functions available to the JIT code. + pub builtins: &'b mut BuiltinFunctions, + /// A reference to the VMOffsets. + pub vmoffsets: &'a VMOffsets, } -impl<'a> CodeGenContext<'a> { +impl<'a, 'b> CodeGenContext<'a, 'b> { /// Create a new code generation context. - pub fn new(regalloc: RegAlloc, stack: Stack, frame: &'a Frame) -> Self { + pub fn new( + regalloc: RegAlloc, + stack: Stack, + frame: Frame, + builtins: &'b mut BuiltinFunctions, + vmoffsets: &'a VMOffsets, + ) -> Self { Self { regalloc, stack, frame, reachable: true, + builtins, + vmoffsets, } } @@ -90,10 +103,9 @@ impl<'a> CodeGenContext<'a> { /// execution. Only the registers in the `free` iterator will be freed. The /// caller must guarantee that in case the iterators are different, the free /// iterator must be a subset of the alloc iterator. - pub fn without( + pub fn without<'r, T, M, F>( &mut self, - alloc: impl Iterator, - free: impl Iterator, + regs: impl IntoIterator + Copy, masm: &mut M, mut f: F, ) -> T @@ -101,21 +113,32 @@ impl<'a> CodeGenContext<'a> { M: MacroAssembler, F: FnMut(&mut Self, &mut M) -> T, { - debug_assert!(free.size_hint().0 <= alloc.size_hint().0); - - for r in alloc { - self.reg(r, masm); + for r in regs { + self.reg(*r, masm); } let result = f(self, masm); - for r in free { - self.free_reg(r); + for r in regs { + self.free_reg(*r); } result } + /// Similar to [`Self::without`] but takes an optional, single register + /// as a paramter. + pub fn maybe_without1(&mut self, reg: Option, masm: &mut M, mut f: F) -> T + where + M: MacroAssembler, + F: FnMut(&mut Self, &mut M) -> T, + { + match reg { + Some(r) => self.without(&[r], masm, f), + None => f(self, masm), + } + } + /// Free the given register. pub fn free_reg(&mut self, reg: impl Into) { let reg: Reg = reg.into(); @@ -378,22 +401,6 @@ impl<'a> CodeGenContext<'a> { } } - /// Pops the value at the stack top and assigns it to the local at - /// the given index, returning the typed register holding the - /// source value. - pub fn set_local(&mut self, masm: &mut M, index: u32) -> TypedReg { - let slot = self - .frame - .get_local(index) - .unwrap_or_else(|| panic!("invalid local slot = {}", index)); - let size: OperandSize = slot.ty.into(); - let src = self.pop_to_reg(masm, None); - let addr = masm.local_address(&slot); - masm.store(RegImm::reg(src.reg), addr, size); - - src - } - /// Spill locals and registers to memory. // TODO optimize the spill range; // diff --git a/winch/codegen/src/codegen/env.rs b/winch/codegen/src/codegen/env.rs index 32f40cc791bb..5b3c9575f854 100644 --- a/winch/codegen/src/codegen/env.rs +++ b/winch/codegen/src/codegen/env.rs @@ -1,7 +1,4 @@ -use crate::{ - codegen::{BuiltinFunctions, OperandSize}, - CallingConvention, -}; +use crate::codegen::{BuiltinFunction, OperandSize}; use smallvec::{smallvec, SmallVec}; use std::collections::{ hash_map::Entry::{Occupied, Vacant}, @@ -32,6 +29,7 @@ pub struct TableData { /// A function callee. /// It categorizes how the callee should be treated /// when performing the call. +#[derive(Clone)] pub enum Callee { /// Locally defined function. Local(CalleeInfo), @@ -39,10 +37,38 @@ pub enum Callee { Import(CalleeInfo), /// Function reference. FuncRef(WasmFuncType), + /// A built-in function. + Builtin(BuiltinFunction), +} + +impl Callee { + /// Get the built-in function metadata. + /// + /// # Panics + /// This function panics if the [`Callee`] is not a built-in function. + pub fn get_builtin(&self) -> &BuiltinFunction { + match self { + Self::Builtin(f) => f, + _ => panic!(), + } + } + + /// Get the associated [`CalleeInfo`], if any. + /// + /// # Panics + /// This function panics if the [`Callee`] is not a local or imported + /// callee. + pub fn get_info(&self) -> &CalleeInfo { + match self { + Self::Local(i) | Self::Import(i) => i, + _ => panic!(), + } + } } /// Metadata about a function callee. Used by the code generation to /// emit function calls to local or imported functions. +#[derive(Clone)] pub struct CalleeInfo { /// The function type. pub ty: WasmFuncType, @@ -54,15 +80,13 @@ pub struct CalleeInfo { /// /// Contains all information about the module and runtime that is accessible to /// to a particular function during code generation. -pub struct FuncEnv<'a, P: PtrSize> { +pub struct FuncEnv<'a, 'b: 'a, 'c: 'b, P: PtrSize> { /// Offsets to the fields within the `VMContext` ptr. - pub vmoffsets: VMOffsets

, + pub vmoffsets: &'a VMOffsets

, /// Metadata about the translation process of a WebAssembly module. - pub translation: &'a ModuleTranslation<'a>, - /// Metadata about the builtin functions. - pub builtins: BuiltinFunctions, + pub translation: &'b ModuleTranslation<'c>, /// The module's function types. - pub types: &'a ModuleTypes, + pub types: &'b ModuleTypes, /// Track resolved table information. resolved_tables: HashMap, } @@ -73,32 +97,21 @@ pub fn ptr_type_from_ptr_size(size: u8) -> WasmType { .unwrap_or_else(|| unimplemented!("Support for non-64-bit architectures")) } -impl<'a, P: PtrSize> FuncEnv<'a, P> { +impl<'a, 'b, 'c, P: PtrSize> FuncEnv<'a, 'b, 'c, P> { /// Create a new function environment. pub fn new( - ptr: P, - translation: &'a ModuleTranslation, - types: &'a ModuleTypes, - call_conv: CallingConvention, + vmoffsets: &'a VMOffsets

, + translation: &'b ModuleTranslation<'c>, + types: &'b ModuleTypes, ) -> Self { - let vmoffsets = VMOffsets::new(ptr, &translation.module); - let size = vmoffsets.ptr.size(); - let builtins_base = vmoffsets.vmctx_builtin_functions(); Self { vmoffsets, translation, - builtins: BuiltinFunctions::new(size, call_conv, builtins_base), types, resolved_tables: HashMap::new(), } } - /// Returns a slice of types representing the caller and callee VMContext types. - pub(crate) fn vmctx_args_type(&self) -> [WasmType; 2] { - let ty = self.ptr_type(); - [ty, ty] - } - /// Derive the [`WasmType`] from the pointer size. pub(crate) fn ptr_type(&self) -> WasmType { ptr_type_from_ptr_size(self.ptr_size()) diff --git a/winch/codegen/src/codegen/mod.rs b/winch/codegen/src/codegen/mod.rs index f591605d3fb2..a96daccf46f7 100644 --- a/winch/codegen/src/codegen/mod.rs +++ b/winch/codegen/src/codegen/mod.rs @@ -1,17 +1,13 @@ use crate::{ abi::{ABISig, ABI}, isa::reg::Reg, - masm::RegImm, - masm::{CmpKind, MacroAssembler, OperandSize, TrapCode}, + masm::{CmpKind, MacroAssembler, OperandSize, RegImm, TrapCode}, stack::{TypedReg, Val}, - CallingConvention, }; use anyhow::Result; use smallvec::SmallVec; use wasmparser::{BinaryReader, FuncValidator, Operator, ValidatorResources, VisitOperator}; -use wasmtime_environ::{ - PtrSize, TableIndex, TypeIndex, WasmFuncType, WasmHeapType, WasmType, FUNCREF_MASK, -}; +use wasmtime_environ::{PtrSize, TableIndex, TypeIndex, WasmHeapType, WasmType, FUNCREF_MASK}; mod context; pub(crate) use context::*; @@ -22,10 +18,10 @@ pub(crate) use call::*; mod control; pub(crate) use control::*; mod builtin; -pub(crate) use builtin::*; +pub use builtin::*; /// The code generation abstraction. -pub(crate) struct CodeGen<'a, M> +pub(crate) struct CodeGen<'a, 'b: 'a, 'c: 'b, M> where M: MacroAssembler, { @@ -33,10 +29,10 @@ where sig: ABISig, /// The code generation context. - pub context: CodeGenContext<'a>, + pub context: CodeGenContext<'a, 'b>, /// A reference to the function compilation environment. - pub env: FuncEnv<'a, M::Ptr>, + pub env: FuncEnv<'a, 'b, 'c, M::Ptr>, /// The MacroAssembler. pub masm: &'a mut M, @@ -47,14 +43,14 @@ where pub control_frames: SmallVec<[ControlStackFrame; 64]>, } -impl<'a, M> CodeGen<'a, M> +impl<'a, 'b, 'c, M> CodeGen<'a, 'b, 'c, M> where M: MacroAssembler, { pub fn new( masm: &'a mut M, - context: CodeGenContext<'a>, - env: FuncEnv<'a, M::Ptr>, + context: CodeGenContext<'a, 'b>, + env: FuncEnv<'a, 'b, 'c, M::Ptr>, sig: ABISig, ) -> Self { Self { @@ -234,7 +230,7 @@ where fn is_reachable(&self) -> bool; } - impl<'a, M: MacroAssembler> ReachableState for CodeGen<'a, M> { + impl<'a, 'b, 'c, M: MacroAssembler> ReachableState for CodeGen<'a, 'b, 'c, M> { fn is_reachable(&self) -> bool { self.context.reachable } @@ -252,90 +248,6 @@ where } } - /// Emit a function call to: - /// * A locally defined function. - /// * A function import. - /// * A funcref. - pub fn emit_call(&mut self, callee: Callee) { - let ptr_type = self.env.ptr_type(); - match callee { - Callee::Import(callee) => { - let mut params = Vec::with_capacity(callee.ty.params().len() + 2); - params.extend_from_slice(&self.env.vmctx_args_type()); - params.extend_from_slice(callee.ty.params()); - let sig = WasmFuncType::new(params.into(), callee.ty.returns().into()); - - let caller_vmctx = ::vmctx_reg(); - let callee_vmctx = self.context.any_gpr(self.masm); - let callee_vmctx_offset = self - .env - .vmoffsets - .vmctx_vmfunction_import_vmctx(callee.index); - let callee_vmctx_addr = self.masm.address_at_vmctx(callee_vmctx_offset); - self.masm.load_ptr(callee_vmctx_addr, callee_vmctx); - - let callee_body_offset = self - .env - .vmoffsets - .vmctx_vmfunction_import_wasm_call(callee.index); - let callee_addr = self.masm.address_at_vmctx(callee_body_offset); - - // Put the callee / caller vmctx at the start of the - // range of the stack so that they are used as first - // and second arguments. - let stack = &mut self.context.stack; - let location = stack.len() - (sig.params().len() - 2); - let values = [ - TypedReg::new(ptr_type, callee_vmctx).into(), - TypedReg::new(ptr_type, caller_vmctx).into(), - ] - .into_iter(); - self.context.stack.insert_many(location, values); - - let abi_sig = ::sig(&sig, &CallingConvention::Default); - FnCall::new(&abi_sig) - .save_live_registers(&mut self.context, self.masm) - .addr(self.masm, &mut self.context, callee_addr); - } - - Callee::Local(callee) => { - let abi_sig = ::sig(&callee.ty, &CallingConvention::Default); - FnCall::new(&abi_sig) - .save_live_registers(&mut self.context, self.masm) - .direct(self.masm, &mut self.context, callee.index); - } - - Callee::FuncRef(ty) => { - // Get type for the caller and callee VMContext. - let abi_sig = ::sig(&ty, &CallingConvention::Default); - // Pop the funcref pointer to a register and allocate a register to hold the - // address of the funcref. Since the callee is not addressed from a global non - // allocatable register (like the vmctx in the case of an import), we load the - // funcref to a register ensuring that it doesn't get assigned to a non-arg - // register. - let (funcref_ptr, funcref) = self.context.without::<_, M, _>( - abi_sig.param_regs(), - abi_sig.param_regs(), - self.masm, - |cx, masm| (cx.pop_to_reg(masm, None).into(), cx.any_gpr(masm)), - ); - self.masm.load( - self.masm.address_at_reg( - funcref_ptr, - self.env.vmoffsets.ptr.vm_func_ref_wasm_call().into(), - ), - funcref, - ptr_type.into(), - ); - self.context.free_reg(funcref_ptr); - - FnCall::new(&abi_sig) - .save_live_registers(&mut self.context, self.masm) - .reg(self.masm, &mut self.context, funcref); - } - }; - } - /// Emits a a series of instructions that will type check a function reference call. pub fn emit_typecheck_funcref(&mut self, funcref_ptr: Reg, type_index: TypeIndex) { let ptr_size: OperandSize = self.env.ptr_type().into(); @@ -417,70 +329,89 @@ where }); } - /// Emits a series of instructions to lazily initialize a function reference. - pub fn emit_lazy_init_funcref( - table_data: &TableData, - table_index: TableIndex, - ptr_type: WasmType, - context: &mut CodeGenContext, - masm: &mut M, - call: &mut FnCall, - callee: Reg, - ) { - let index = context.pop_to_reg(masm, None); - let elem_value: Reg = context.any_gpr(masm).into(); - let base = context.any_gpr(masm); - let elem_addr = masm.table_elem_address(index.into(), base, &table_data, context); - masm.load_ptr(elem_addr, elem_value); - - let defined = masm.get_label(); - let cont = masm.get_label(); - - // Preemptively move the table element address to the - // result register, to avoid conflicts at the control flow merge. - let result = call.abi_sig.result.result_reg().unwrap(); - masm.mov(elem_value.into(), result, ptr_type.into()); - - // Push the builtin function arguments to the stack. - context - .stack - .push(TypedReg::new(ptr_type, ::vmctx_reg()).into()); - context.stack.push(table_index.as_u32().try_into().unwrap()); - context.stack.push(index.into()); - - // `branch` in this case will perform a test of the given register, - // and jump to the defined branch if it's not zero. - masm.branch( + /// Pops the value at the stack top and assigns it to the local at + /// + /// the given index, returning the typed register holding the + /// source value. + pub fn emit_set_local(&mut self, addr: M::Address, size: OperandSize) -> TypedReg { + let src = self.context.pop_to_reg(self.masm, None); + self.masm.store(RegImm::reg(src.reg), addr, size); + + src + } + + pub fn emit_lazy_init_funcref(&mut self, table_index: TableIndex) { + let table_data = self.env.resolve_table_data(table_index); + let ptr_type = self.env.ptr_type(); + let builtin = self + .context + .builtins + .table_get_lazy_init_func_ref::(); + + // Request the builtin's result register and use it to hold the + // table element value. We preemptively request this register to + // avoid conflict at the control flow merge below. + // Requesting the result register is safe since we know ahead-of-time + // the builtin's signature. + let elem_value: Reg = self + .context + .reg(builtin.sig().result.result_reg().unwrap(), self.masm) + .into(); + + let index = self.context.pop_to_reg(self.masm, None); + let base = self.context.any_gpr(self.masm); + + let elem_addr = + self.masm + .table_elem_address(index.into(), base, &table_data, &mut self.context); + self.masm.load_ptr(elem_addr, elem_value); + // Free the register used as base, once we have loaded the element + // address into the element value register. + self.context.free_reg(base); + + let (defined, cont) = (self.masm.get_label(), self.masm.get_label()); + + // Push the built-int arguments to the stack. + self.context.stack.extend( + [ + TypedReg::new(ptr_type, ::vmctx_reg()).into(), + table_index.as_u32().try_into().unwrap(), + index.into(), + ] + .into_iter(), + ); + + self.masm.branch( CmpKind::Ne, elem_value.into(), elem_value, defined, ptr_type.into(), ); + // Free the element value register. + // This is safe since the FnCall::emit call below, will ensure + // that the result register is placed on the value stack. + self.context.free_reg(elem_value); + FnCall::emit::(self.masm, &mut self.context, |_| { + Callee::Builtin(builtin.clone()) + }); - call.calculate_call_stack_space(context) - .reg(masm, context, callee); // We know the signature of the libcall in this case, so we assert that there's // one element in the stack and that it's the ABI signature's result register. - let top = context.stack.peek().unwrap(); + let top = self.context.stack.peek().unwrap(); let top = top.get_reg(); - debug_assert!(top.reg == result); - masm.jmp(cont); + debug_assert!(top.reg == elem_value); + self.masm.jmp(cont); // In the defined case, mask the funcref address in place, by peeking into the // last element of the value stack, which was pushed by the `indirect` function // call above. - masm.bind(defined); + self.masm.bind(defined); let imm = RegImm::i64(FUNCREF_MASK as i64); let dst = top.into(); - masm.and(dst, dst, imm, top.ty.into()); - - masm.bind(cont); - // The indirect call above, will take care of freeing the registers used as - // params. - // So we only free the params used to lazily initialize the func ref. - context.free_reg(base); - context.free_reg(elem_value); + self.masm.and(dst, dst, imm, top.ty.into()); + + self.masm.bind(cont); } } diff --git a/winch/codegen/src/frame/mod.rs b/winch/codegen/src/frame/mod.rs index 2979162648e9..9a0329856c8c 100644 --- a/winch/codegen/src/frame/mod.rs +++ b/winch/codegen/src/frame/mod.rs @@ -1,9 +1,12 @@ -use crate::abi::{align_to, ty_size, ABIArg, ABISig, LocalSlot, ABI}; +use crate::{ + abi::{align_to, ty_size, ABIArg, ABISig, LocalSlot, ABI}, + masm::MacroAssembler, +}; use anyhow::Result; use smallvec::SmallVec; use std::ops::Range; use wasmparser::{BinaryReader, FuncValidator, ValidatorResources}; -use wasmtime_environ::{ModuleTranslation, TypeConvert}; +use wasmtime_environ::{ModuleTranslation, TypeConvert, WasmType}; // TODO: // SpiderMonkey's implementation uses 16; @@ -115,6 +118,20 @@ impl Frame { self.locals.get(index as usize) } + /// Returns the address of the local at the given index. + /// + /// # Panics + /// This function panics if the the index is not associated to a local. + pub fn get_local_address( + &self, + index: u32, + masm: &mut M, + ) -> (WasmType, M::Address) { + self.get_local(index) + .map(|slot| (slot.ty, masm.local_address(slot))) + .unwrap_or_else(|| panic!("Invalid local slot: {}", index)) + } + fn compute_arg_slots(sig: &ABISig) -> Result<(Locals, u32)> { // Go over the function ABI-signature and // calculate the stack slots. diff --git a/winch/codegen/src/isa/aarch64/masm.rs b/winch/codegen/src/isa/aarch64/masm.rs index f6032a9461ac..9f7b2533c983 100644 --- a/winch/codegen/src/isa/aarch64/masm.rs +++ b/winch/codegen/src/isa/aarch64/masm.rs @@ -258,7 +258,12 @@ impl Masm for MacroAssembler { todo!() } - fn float_round(&mut self, _mode: RoundingMode, _dst: Reg, _src: RegImm, _size: OperandSize) { + fn float_round( + &mut self, + _mode: RoundingMode, + _context: &mut CodeGenContext, + _size: OperandSize, + ) { todo!(); } diff --git a/winch/codegen/src/isa/aarch64/mod.rs b/winch/codegen/src/isa/aarch64/mod.rs index 70c61ab9ca31..ba9fb049e9bd 100644 --- a/winch/codegen/src/isa/aarch64/mod.rs +++ b/winch/codegen/src/isa/aarch64/mod.rs @@ -8,7 +8,7 @@ use crate::{ regalloc::RegAlloc, regset::RegBitSet, stack::Stack, - TrampolineKind, + BuiltinFunctions, TrampolineKind, }; use anyhow::Result; use cranelift_codegen::settings::{self, Flags}; @@ -17,7 +17,7 @@ use cranelift_codegen::{MachTextSectionBuilder, TextSectionBuilder}; use masm::MacroAssembler as Aarch64Masm; use target_lexicon::Triple; use wasmparser::{FuncValidator, FunctionBody, ValidatorResources}; -use wasmtime_environ::{ModuleTranslation, ModuleTypes, WasmFuncType}; +use wasmtime_environ::{ModuleTranslation, ModuleTypes, VMOffsets, WasmFuncType}; mod abi; mod address; @@ -85,11 +85,13 @@ impl TargetIsa for Aarch64 { fn compile_function( &self, sig: &WasmFuncType, - types: &ModuleTypes, body: &FunctionBody, translation: &ModuleTranslation, + types: &ModuleTypes, + builtins: &mut BuiltinFunctions, validator: &mut FuncValidator, ) -> Result> { + let vmoffsets = VMOffsets::new(self.pointer_bytes(), &translation.module); let mut body = body.get_binary_reader(); let mut masm = Aarch64Masm::new(self.shared_flags.clone()); let stack = Stack::new(); @@ -105,13 +107,8 @@ impl TargetIsa for Aarch64 { // TODO: Add floating point bitmask let fpr = RegBitSet::float(0, 0, usize::try_from(MAX_FPR).unwrap()); let regalloc = RegAlloc::from(gpr, fpr); - let codegen_context = CodeGenContext::new(regalloc, stack, &frame); - let env = FuncEnv::new( - self.pointer_bytes(), - translation, - types, - self.wasmtime_call_conv(), - ); + let codegen_context = CodeGenContext::new(regalloc, stack, frame, builtins, &vmoffsets); + let env = FuncEnv::new(&vmoffsets, translation, types); let mut codegen = CodeGen::new(&mut masm, codegen_context, env, abi_sig); codegen.emit(&mut body, validator)?; diff --git a/winch/codegen/src/isa/mod.rs b/winch/codegen/src/isa/mod.rs index 771fc556f9ac..53e821dde5f5 100644 --- a/winch/codegen/src/isa/mod.rs +++ b/winch/codegen/src/isa/mod.rs @@ -1,4 +1,4 @@ -use crate::TrampolineKind; +use crate::{BuiltinFunctions, TrampolineKind}; use anyhow::{anyhow, Result}; use core::fmt::Formatter; use cranelift_codegen::isa::{CallConv, IsaBuilder}; @@ -149,9 +149,10 @@ pub trait TargetIsa: Send + Sync { fn compile_function( &self, sig: &WasmFuncType, - types: &ModuleTypes, body: &FunctionBody, translation: &ModuleTranslation, + types: &ModuleTypes, + builtins: &mut BuiltinFunctions, validator: &mut FuncValidator, ) -> Result>; diff --git a/winch/codegen/src/isa/x64/asm.rs b/winch/codegen/src/isa/x64/asm.rs index 6b94c5ad73c5..4f1fc068dd9a 100644 --- a/winch/codegen/src/isa/x64/asm.rs +++ b/winch/codegen/src/isa/x64/asm.rs @@ -6,7 +6,7 @@ use crate::{ }; use cranelift_codegen::{ entity::EntityRef, - ir::{types, ConstantPool, ExternalName, Opcode, TrapCode, UserExternalNameRef}, + ir::{types, ConstantPool, ExternalName, LibCall, Opcode, TrapCode, UserExternalNameRef}, isa::{ x64::{ args::{ @@ -876,6 +876,22 @@ impl Assembler { }); } + /// Emit a call to a well-known libcall. + pub fn call_with_lib(&mut self, lib: LibCall) { + let dest = ExternalName::LibCall(lib); + self.emit(Inst::CallKnown { + dest, + info: Box::new(CallInfo { + uses: smallvec![], + defs: smallvec![], + clobbers: Default::default(), + opcode: Opcode::Call, + callee_pop_size: 0, + callee_conv: CallConv::SystemV, + }), + }); + } + /// Emits a conditional jump to the given label. pub fn jmp_if(&mut self, cc: impl Into, taken: MachLabel) { self.emit(Inst::JmpIf { diff --git a/winch/codegen/src/isa/x64/masm.rs b/winch/codegen/src/isa/x64/masm.rs index 25264de83e5a..336d015c29f8 100644 --- a/winch/codegen/src/isa/x64/masm.rs +++ b/winch/codegen/src/isa/x64/masm.rs @@ -12,7 +12,7 @@ use crate::masm::{ use crate::{abi::ABI, masm::StackSlot, stack::TypedReg}; use crate::{ abi::{self, align_to, calculate_frame_adjustment, LocalSlot}, - codegen::{ptr_type_from_ptr_size, CodeGenContext, TableData}, + codegen::{ptr_type_from_ptr_size, Callee, CodeGenContext, FnCall, TableData}, stack::Val, }; use crate::{ @@ -257,6 +257,7 @@ impl Masm for MacroAssembler { match callee { CalleeKind::Indirect(reg) => self.asm.call_with_reg(reg), CalleeKind::Direct(idx) => self.asm.call_with_index(idx), + CalleeKind::Known(lib) => self.asm.call_with_lib(lib), }; total_stack } @@ -396,11 +397,42 @@ impl Masm for MacroAssembler { self.asm.and_rr(scratch_xmm, dst, size); } - fn float_round(&mut self, mode: RoundingMode, dst: Reg, src: RegImm, size: OperandSize) { + fn float_round(&mut self, mode: RoundingMode, context: &mut CodeGenContext, size: OperandSize) { if self.flags.has_sse41() { - self.asm.rounds(src.get_reg().unwrap(), dst, mode, size); + let src = context.pop_to_reg(self, None); + self.asm.rounds(src.into(), src.into(), mode, size); + context.stack.push(src.into()); } else { - todo!("libcall fallback for rounding is not implemented") + FnCall::emit::(self, context, |context| { + let b = match (&mode, size) { + (RoundingMode::Up, OperandSize::S32) => { + context.builtins.ceil_f32::<::ABI>() + } + (RoundingMode::Up, OperandSize::S64) => { + context.builtins.ceil_f64::<::ABI>() + } + (RoundingMode::Down, OperandSize::S32) => { + context.builtins.floor_f32::<::ABI>() + } + (RoundingMode::Down, OperandSize::S64) => { + context.builtins.floor_f64::<::ABI>() + } + (RoundingMode::Nearest, OperandSize::S32) => { + context.builtins.nearest_f32::<::ABI>() + } + (RoundingMode::Nearest, OperandSize::S64) => { + context.builtins.nearest_f64::<::ABI>() + } + (RoundingMode::Zero, OperandSize::S32) => { + context.builtins.trunc_f32::<::ABI>() + } + (RoundingMode::Zero, OperandSize::S64) => { + context.builtins.trunc_f64::<::ABI>() + } + (_, _) => unreachable!(), + }; + Callee::Builtin(b) + }) } } diff --git a/winch/codegen/src/isa/x64/mod.rs b/winch/codegen/src/isa/x64/mod.rs index 4703f7f20edb..1d38ed4a1703 100644 --- a/winch/codegen/src/isa/x64/mod.rs +++ b/winch/codegen/src/isa/x64/mod.rs @@ -1,6 +1,6 @@ use crate::{ abi::ABI, - codegen::{CodeGen, CodeGenContext, FuncEnv}, + codegen::{BuiltinFunctions, CodeGen, CodeGenContext, FuncEnv}, }; use crate::frame::{DefinedLocals, Frame}; @@ -19,7 +19,7 @@ use cranelift_codegen::{isa::x64::settings as x64_settings, Final, MachBufferFin use cranelift_codegen::{MachTextSectionBuilder, TextSectionBuilder}; use target_lexicon::Triple; use wasmparser::{FuncValidator, FunctionBody, ValidatorResources}; -use wasmtime_environ::{ModuleTranslation, ModuleTypes, WasmFuncType}; +use wasmtime_environ::{ModuleTranslation, ModuleTypes, VMOffsets, WasmFuncType}; use self::regs::{ALL_FPR, ALL_GPR, MAX_FPR, MAX_GPR, NON_ALLOCATABLE_FPR, NON_ALLOCATABLE_GPR}; @@ -89,12 +89,15 @@ impl TargetIsa for X64 { fn compile_function( &self, sig: &WasmFuncType, - types: &ModuleTypes, body: &FunctionBody, translation: &ModuleTranslation, + types: &ModuleTypes, + builtins: &mut BuiltinFunctions, validator: &mut FuncValidator, ) -> Result> { let pointer_bytes = self.pointer_bytes(); + let vmoffsets = VMOffsets::new(pointer_bytes, &translation.module); + let mut body = body.get_binary_reader(); let mut masm = X64Masm::new( pointer_bytes, @@ -118,8 +121,8 @@ impl TargetIsa for X64 { ); let regalloc = RegAlloc::from(gpr, fpr); - let codegen_context = CodeGenContext::new(regalloc, stack, &frame); - let env = FuncEnv::new(pointer_bytes, translation, types, self.wasmtime_call_conv()); + let env = FuncEnv::new(&vmoffsets, translation, types); + let codegen_context = CodeGenContext::new(regalloc, stack, frame, builtins, &vmoffsets); let mut codegen = CodeGen::new(&mut masm, codegen_context, env, abi_sig); codegen.emit(&mut body, validator)?; diff --git a/winch/codegen/src/lib.rs b/winch/codegen/src/lib.rs index f68aa5ab3a2a..12656ff4a265 100644 --- a/winch/codegen/src/lib.rs +++ b/winch/codegen/src/lib.rs @@ -7,7 +7,7 @@ #![cfg_attr(not(feature = "all-arch"), allow(dead_code))] mod abi; -pub use codegen::FuncEnv; +pub use codegen::{BuiltinFunctions, FuncEnv}; mod codegen; mod frame; pub mod isa; diff --git a/winch/codegen/src/masm.rs b/winch/codegen/src/masm.rs index ab032cfb7253..d7508b5bbed3 100644 --- a/winch/codegen/src/masm.rs +++ b/winch/codegen/src/masm.rs @@ -1,7 +1,7 @@ use crate::abi::{self, align_to, LocalSlot}; use crate::codegen::{CodeGenContext, TableData}; use crate::isa::reg::Reg; -use cranelift_codegen::{Final, MachBufferFinalized, MachLabel}; +use cranelift_codegen::{ir::LibCall, Final, MachBufferFinalized, MachLabel}; use std::{fmt::Debug, ops::Range}; use wasmtime_environ::PtrSize; @@ -183,12 +183,14 @@ impl Imm { } } -#[derive(Clone)] +#[derive(Copy, Clone, Debug)] pub(crate) enum CalleeKind { /// A function call to a raw address. Indirect(Reg), /// A function call to a local function. Direct(u32), + /// Call to a well known LibCall. + Known(LibCall), } impl CalleeKind { @@ -201,6 +203,11 @@ impl CalleeKind { pub fn direct(index: u32) -> Self { Self::Direct(index) } + + /// Creates a known callee kind from a libcall. + pub fn known(call: LibCall) -> Self { + Self::Known(call) + } } impl RegImm { @@ -232,15 +239,6 @@ impl RegImm { pub fn f64(bits: u64) -> Self { RegImm::Imm(Imm::f64(bits)) } - - /// Get the underlying register of the operand, - /// if it is one. - pub fn get_reg(&self) -> Option { - match self { - Self::Reg(r) => Some(*r), - _ => None, - } - } } impl From for RegImm { @@ -381,7 +379,7 @@ pub(crate) trait MacroAssembler { fn float_neg(&mut self, dst: Reg, size: OperandSize); /// Perform a floating point floor operation. - fn float_round(&mut self, mode: RoundingMode, dst: Reg, src: RegImm, size: OperandSize); + fn float_round(&mut self, mode: RoundingMode, context: &mut CodeGenContext, size: OperandSize); /// Perform logical and operation. fn and(&mut self, dst: Reg, lhs: Reg, rhs: RegImm, size: OperandSize); diff --git a/winch/codegen/src/stack.rs b/winch/codegen/src/stack.rs index 2a993ae95e78..1fc61bdee6dd 100644 --- a/winch/codegen/src/stack.rs +++ b/winch/codegen/src/stack.rs @@ -1,6 +1,5 @@ use crate::{isa::reg::Reg, masm::StackSlot}; use std::collections::VecDeque; -use std::ops::RangeBounds; use wasmparser::{Ieee32, Ieee64}; use wasmtime_environ::WasmType; @@ -323,18 +322,6 @@ impl Stack { pub fn inner_mut(&mut self) -> &mut VecDeque { &mut self.inner } - - /// Calculates size in bytes of memory entries within the specified range of - /// the stack. - pub fn sizeof(&self, range: R) -> u32 - where - R: RangeBounds, - { - self.inner.range(range).fold(0, |acc, v| match v { - Val::Memory(m) => acc + m.slot.size, - _ => acc, - }) - } } #[cfg(test)] diff --git a/winch/codegen/src/visitor.rs b/winch/codegen/src/visitor.rs index 40a290666920..287059cc8d5e 100644 --- a/winch/codegen/src/visitor.rs +++ b/winch/codegen/src/visitor.rs @@ -5,7 +5,7 @@ //! machine code emitter. use crate::abi::ABI; -use crate::codegen::{control_index, CodeGen, ControlStackFrame, FnCall}; +use crate::codegen::{control_index, Callee, CodeGen, ControlStackFrame, FnCall}; use crate::masm::{ CmpKind, DivKind, MacroAssembler, OperandSize, RegImm, RemKind, RoundingMode, ShiftKind, }; @@ -146,7 +146,7 @@ macro_rules! def_unsupported { (emit $unsupported:tt $($rest:tt)*) => {$($rest)*}; } -impl<'a, M> VisitOperator<'a> for CodeGen<'a, M> +impl<'a, 'b, 'c, M> VisitOperator<'a> for CodeGen<'a, 'b, 'c, M> where M: MacroAssembler, { @@ -197,59 +197,43 @@ where } fn visit_f32_floor(&mut self) { - self.context - .unop(self.masm, OperandSize::S32, &mut |masm, reg, size| { - masm.float_round(RoundingMode::Down, reg, RegImm::Reg(reg), size); - }); + self.masm + .float_round(RoundingMode::Down, &mut self.context, OperandSize::S32); } fn visit_f64_floor(&mut self) { - self.context - .unop(self.masm, OperandSize::S64, &mut |masm, reg, size| { - masm.float_round(RoundingMode::Down, reg, RegImm::Reg(reg), size); - }); + self.masm + .float_round(RoundingMode::Down, &mut self.context, OperandSize::S64); } fn visit_f32_ceil(&mut self) { - self.context - .unop(self.masm, OperandSize::S32, &mut |masm, reg, size| { - masm.float_round(RoundingMode::Up, reg, RegImm::Reg(reg), size); - }); + self.masm + .float_round(RoundingMode::Up, &mut self.context, OperandSize::S32); } fn visit_f64_ceil(&mut self) { - self.context - .unop(self.masm, OperandSize::S64, &mut |masm, reg, size| { - masm.float_round(RoundingMode::Up, reg, RegImm::Reg(reg), size); - }); + self.masm + .float_round(RoundingMode::Up, &mut self.context, OperandSize::S64); } fn visit_f32_nearest(&mut self) { - self.context - .unop(self.masm, OperandSize::S32, &mut |masm, reg, size| { - masm.float_round(RoundingMode::Nearest, reg, RegImm::Reg(reg), size); - }); + self.masm + .float_round(RoundingMode::Nearest, &mut self.context, OperandSize::S32); } fn visit_f64_nearest(&mut self) { - self.context - .unop(self.masm, OperandSize::S64, &mut |masm, reg, size| { - masm.float_round(RoundingMode::Nearest, reg, RegImm::Reg(reg), size); - }); + self.masm + .float_round(RoundingMode::Nearest, &mut self.context, OperandSize::S64); } fn visit_f32_trunc(&mut self) { - self.context - .unop(self.masm, OperandSize::S32, &mut |masm, reg, size| { - masm.float_round(RoundingMode::Zero, reg, RegImm::Reg(reg), size); - }); + self.masm + .float_round(RoundingMode::Zero, &mut self.context, OperandSize::S32); } fn visit_f64_trunc(&mut self) { - self.context - .unop(self.masm, OperandSize::S64, &mut |masm, reg, size| { - masm.float_round(RoundingMode::Zero, reg, RegImm::Reg(reg), size); - }); + self.masm + .float_round(RoundingMode::Zero, &mut self.context, OperandSize::S64); } fn visit_i32_add(&mut self) { @@ -627,44 +611,25 @@ where // TODO: verify the case where the target local is on the stack. fn visit_local_set(&mut self, index: u32) { - let src = self.context.set_local(self.masm, index); + let (ty, slot) = self.context.frame.get_local_address(index, self.masm); + let src = self.emit_set_local(slot, ty.into()); self.context.free_reg(src); } fn visit_call(&mut self, index: u32) { let callee = self.env.callee_from_index(FuncIndex::from_u32(index)); - self.emit_call(callee); + FnCall::emit::(self.masm, &mut self.context, |_| callee.clone()); } fn visit_call_indirect(&mut self, type_index: u32, table_index: u32, _: u8) { let type_index = TypeIndex::from_u32(type_index); let table_index = TableIndex::from_u32(table_index); - let table_data = self.env.resolve_table_data(table_index); - let ptr_type = self.env.ptr_type(); - - let builtin = self - .env - .builtins - .table_get_lazy_init_func_ref::(); - FnCall::new(&builtin.sig).with_lib( - self.masm, - &mut self.context, - &builtin, - |cx, masm, call, callee| { - CodeGen::emit_lazy_init_funcref( - &table_data, - table_index, - ptr_type, - cx, - masm, - call, - callee, - ); - }, - ); + self.emit_lazy_init_funcref(table_index); // Perform the indirect call. + // This code assumes that [`Self::emit_lazy_init_funcref`] will + // push the funcref to the value stack. match self.env.translation.module.table_plans[table_index].style { TableStyle::CallerChecksSignature => { let funcref_ptr = self.context.stack.peek().map(|v| v.get_reg()).unwrap(); @@ -674,132 +639,96 @@ where } } - // Perform call indirect. - // `emit_call` expects the callee to be on the stack. Delaying the - // computation of the callee address reduces register pressure. - self.emit_call(self.env.funcref(type_index)); + FnCall::emit::(self.masm, &mut self.context, |_| { + self.env.funcref(type_index) + }) } fn visit_table_init(&mut self, elem: u32, table: u32) { let ptr_type = self.env.ptr_type(); - let table_init = self.env.builtins.table_init::(); let vmctx = TypedReg::new(ptr_type, ::vmctx_reg()); - FnCall::new(&table_init.sig).with_lib( - self.masm, - &mut self.context, - &table_init, - |cx, masm, call, callee| { - // table.init requires at least 3 elements on the value stack. - debug_assert!(cx.stack.len() >= 3); - let extra_args = [ - vmctx.into(), - table.try_into().unwrap(), - elem.try_into().unwrap(), - ]; - let at = cx.stack.len() - 3; - cx.stack.insert_many(at, extra_args); - // Finalize the call. - call.calculate_call_stack_space(cx).reg(masm, cx, callee); - }, + debug_assert!(self.context.stack.len() >= 3); + let at = self.context.stack.len() - 3; + + self.context.stack.insert_many( + at, + [ + vmctx.into(), + table.try_into().unwrap(), + elem.try_into().unwrap(), + ], ); + FnCall::emit::(self.masm, &mut self.context, |cx| { + Callee::Builtin(cx.builtins.table_init::()) + }); } fn visit_table_copy(&mut self, dst: u32, src: u32) { let ptr_type = self.env.ptr_type(); - let table_copy = self.env.builtins.table_copy::(); let vmctx = TypedReg::new(ptr_type, ::vmctx_reg()); + debug_assert!(self.context.stack.len() >= 3); + let at = self.context.stack.len() - 3; + self.context.stack.insert_many( + at, + [ + vmctx.into(), + dst.try_into().unwrap(), + src.try_into().unwrap(), + ], + ); - FnCall::new(&table_copy.sig).with_lib( - self.masm, - &mut self.context, - &table_copy, - |cx, masm, call, callee| { - // table.copy requires at least 3 elemenents in the value stack. - debug_assert!(cx.stack.len() >= 3); - let at = cx.stack.len() - 3; - cx.stack.insert_many( - at, - [ - vmctx.into(), - dst.try_into().unwrap(), - src.try_into().unwrap(), - ], - ); - call.calculate_call_stack_space(cx).reg(masm, cx, callee); - }, - ) + FnCall::emit::(self.masm, &mut self.context, |context| { + Callee::Builtin(context.builtins.table_copy::()) + }); } fn visit_table_get(&mut self, table: u32) { - let ptr_type = self.env.ptr_type(); let table_index = TableIndex::from_u32(table); - let table_data = self.env.resolve_table_data(table_index); let plan = self.env.table_plan(table_index); let heap_type = plan.table.wasm_ty.heap_type; - let style = plan.style.clone(); - let table_get = self - .env - .builtins - .table_get_lazy_init_func_ref::(); + let style = &plan.style; - FnCall::new(&table_get.sig).with_lib( - self.masm, - &mut self.context, - &table_get, - |cx, masm, call, callee| { - match heap_type { - WasmHeapType::Func => match style { - TableStyle::CallerChecksSignature => { - CodeGen::emit_lazy_init_funcref( - &table_data, - table_index, - ptr_type, - cx, - masm, - call, - callee, - ); - } - }, - t => unimplemented!("Support for WasmHeapType: {t}"), - }; + match heap_type { + WasmHeapType::Func => match style { + TableStyle::CallerChecksSignature => self.emit_lazy_init_funcref(table_index), }, - ); + t => unimplemented!("Support for WasmHeapType: {t}"), + } } fn visit_table_grow(&mut self, table: u32) { let ptr_type = self.env.ptr_type(); + let vmctx = TypedReg::new(ptr_type, ::vmctx_reg()); let table_index = TableIndex::from_u32(table); let table_plan = self.env.table_plan(table_index); - let vmctx = TypedReg::new(ptr_type, ::vmctx_reg()); let builtin = match table_plan.table.wasm_ty.heap_type { - WasmHeapType::Func => self.env.builtins.table_grow_func_ref::(), + WasmHeapType::Func => self + .context + .builtins + .table_grow_func_ref::(), ty => unimplemented!("Support for HeapType: {ty}"), }; - FnCall::new(&builtin.sig).with_lib( - self.masm, - &mut self.context, - &builtin, - |cx, masm, call, callee| { - let len = cx.stack.len(); - // table.grow requires at least 2 elements on the value stack. - debug_assert!(len >= 2); - // The table_grow builtin expects the parameters in a different - // order. - // The value stack at this point should contain: - // [ init_value | delta ] (stack top) - // but the builtin function expects the init value as the last - // argument. - cx.stack.inner_mut().swap(len - 1, len - 2); - let at = len - 2; - cx.stack - .insert_many(at, [vmctx.into(), table.try_into().unwrap()]); - - call.calculate_call_stack_space(cx).reg(masm, cx, callee); - }, - ); + let len = self.context.stack.len(); + // table.grow` requires at least 2 elements on the value stack. + debug_assert!(len >= 2); + let at = len - 2; + + // The table_grow builtin expects the parameters in a different + // order. + // The value stack at this point should contain: + // [ init_value | delta ] (stack top) + // but the builtin function expects the init value as the last + // argument. + self.context.stack.inner_mut().swap(len - 1, len - 2); + self.context + .stack + .insert_many(at, [vmctx.into(), table.try_into().unwrap()]); + + FnCall::emit::(self.masm, &mut self.context, |_| { + Callee::Builtin(builtin.clone()) + }); } fn visit_table_size(&mut self, table: u32) { @@ -814,24 +743,22 @@ where let table_index = TableIndex::from_u32(table); let table_plan = self.env.table_plan(table_index); let builtin = match table_plan.table.wasm_ty.heap_type { - WasmHeapType::Func => self.env.builtins.table_fill_func_ref::(), + WasmHeapType::Func => self + .context + .builtins + .table_fill_func_ref::(), ty => unimplemented!("Support for heap type: {ty}"), }; - FnCall::new(&builtin.sig).with_lib( - self.masm, - &mut self.context, - &builtin, - |cx, masm, call, callee| { - // table.fill requires at least 3 values on the value stack. - debug_assert!(cx.stack.len() >= 3); - let at = cx.stack.len() - 3; - cx.stack - .insert_many(at, [vmctx.into(), table.try_into().unwrap()]); - - call.calculate_call_stack_space(cx).reg(masm, cx, callee); - }, - ); + let len = self.context.stack.len(); + debug_assert!(len >= 3); + let at = len - 3; + self.context + .stack + .insert_many(at, [vmctx.into(), table.try_into().unwrap()]); + FnCall::emit::(self.masm, &mut self.context, |_| { + Callee::Builtin(builtin.clone()) + }) } fn visit_table_set(&mut self, table: u32) { @@ -873,18 +800,14 @@ where fn visit_elem_drop(&mut self, index: u32) { let ptr_type = self.env.ptr_type(); - let elem_drop = self.env.builtins.elem_drop::(); + let elem_drop = self.context.builtins.elem_drop::(); let vmctx = TypedReg::new(ptr_type, ::vmctx_reg()); - - FnCall::new(&elem_drop.sig).with_lib( - self.masm, - &mut self.context, - &elem_drop, - |cx, masm, call, callee| { - cx.stack.extend([vmctx.into(), index.try_into().unwrap()]); - call.calculate_call_stack_space(cx).reg(masm, cx, callee); - }, - ); + self.context + .stack + .extend([vmctx.into(), index.try_into().unwrap()]); + FnCall::emit::(self.masm, &mut self.context, |_| { + Callee::Builtin(elem_drop.clone()) + }); } fn visit_nop(&mut self) {} @@ -939,9 +862,8 @@ where let frame = &mut self.control_frames[index]; frame.set_as_target(); let result = frame.as_target_result(); - let top = self.context.without::( - result.regs(), - result.regs(), + let top = self.context.maybe_without1::( + result.result_reg(), self.masm, |ctx, masm| ctx.pop_to_reg(masm, None), ); @@ -967,9 +889,8 @@ where let default_index = control_index(targets.default(), self.control_frames.len()); let default_result = self.control_frames[default_index].as_target_result(); - let (index, tmp) = self.context.without::<(TypedReg, _), M, _>( - default_result.regs(), - default_result.regs(), + let (index, tmp) = self.context.maybe_without1::<(TypedReg, _), M, _>( + default_result.result_reg(), self.masm, |cx, masm| (cx.pop_to_reg(masm, None), cx.any_gpr(masm)), ); @@ -1023,7 +944,8 @@ where } fn visit_local_tee(&mut self, index: u32) { - let typed_reg = self.context.set_local(self.masm, index); + let (ty, slot) = self.context.frame.get_local_address(index, self.masm); + let typed_reg = self.emit_set_local(slot, ty.into()); self.context.stack.push(typed_reg.into()); } @@ -1075,7 +997,7 @@ where wasmparser::for_each_operator!(def_unsupported); } -impl<'a, M> CodeGen<'a, M> +impl<'a, 'b, 'c, M> CodeGen<'a, 'b, 'c, M> where M: MacroAssembler, { diff --git a/winch/filetests/filetests/x64/call_indirect/call_indirect.wat b/winch/filetests/filetests/x64/call_indirect/call_indirect.wat index 2719ef8f9009..68b768a633a4 100644 --- a/winch/filetests/filetests/x64/call_indirect/call_indirect.wat +++ b/winch/filetests/filetests/x64/call_indirect/call_indirect.wat @@ -41,96 +41,103 @@ ;; 21: 85c0 test eax, eax ;; 23: 0f840a000000 je 0x33 ;; 29: b801000000 mov eax, 1 -;; 2e: e925010000 jmp 0x158 +;; 2e: e92e010000 jmp 0x161 ;; 33: 8b44240c mov eax, dword ptr [rsp + 0xc] ;; 37: 83e802 sub eax, 2 ;; 3a: 50 push rax -;; 3b: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 3f: 498b4b48 mov rcx, qword ptr [r11 + 0x48] -;; 43: bb00000000 mov ebx, 0 -;; 48: 4d89f1 mov r9, r14 -;; 4b: 458b5150 mov r10d, dword ptr [r9 + 0x50] -;; 4f: 4439d3 cmp ebx, r10d -;; 52: 0f8306010000 jae 0x15e -;; 58: 4189db mov r11d, ebx -;; 5b: 4d6bdb08 imul r11, r11, 8 -;; 5f: 4d8b4948 mov r9, qword ptr [r9 + 0x48] -;; 63: 4d89cc mov r12, r9 -;; 66: 4d01d9 add r9, r11 -;; 69: 4439d3 cmp ebx, r10d -;; 6c: 4d0f43cc cmovae r9, r12 -;; 70: 4d8b01 mov r8, qword ptr [r9] -;; 73: 4c89c0 mov rax, r8 -;; 76: 4d85c0 test r8, r8 -;; 79: 0f8519000000 jne 0x98 -;; 7f: 4883ec08 sub rsp, 8 -;; 83: 4c89f7 mov rdi, r14 -;; 86: be00000000 mov esi, 0 -;; 8b: 89da mov edx, ebx -;; 8d: ffd1 call rcx -;; 8f: 4883c408 add rsp, 8 -;; 93: e904000000 jmp 0x9c -;; 98: 4883e0fe and rax, 0xfffffffffffffffe -;; 9c: 4885c0 test rax, rax -;; 9f: 0f84bb000000 je 0x160 -;; a5: 4d8b5e40 mov r11, qword ptr [r14 + 0x40] -;; a9: 418b0b mov ecx, dword ptr [r11] -;; ac: 8b5018 mov edx, dword ptr [rax + 0x18] -;; af: 39d1 cmp ecx, edx -;; b1: 0f85ab000000 jne 0x162 -;; b7: 488b4810 mov rcx, qword ptr [rax + 0x10] -;; bb: 4883ec08 sub rsp, 8 -;; bf: 8b7c2408 mov edi, dword ptr [rsp + 8] -;; c3: ffd1 call rcx -;; c5: 4883c410 add rsp, 0x10 -;; c9: 8b4c240c mov ecx, dword ptr [rsp + 0xc] -;; cd: 83e901 sub ecx, 1 -;; d0: 50 push rax -;; d1: 51 push rcx -;; d2: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; d6: 498b4b48 mov rcx, qword ptr [r11 + 0x48] -;; da: bb00000000 mov ebx, 0 -;; df: 4d89f1 mov r9, r14 -;; e2: 458b5150 mov r10d, dword ptr [r9 + 0x50] -;; e6: 4439d3 cmp ebx, r10d -;; e9: 0f8375000000 jae 0x164 -;; ef: 4189db mov r11d, ebx -;; f2: 4d6bdb08 imul r11, r11, 8 -;; f6: 4d8b4948 mov r9, qword ptr [r9 + 0x48] -;; fa: 4d89cc mov r12, r9 -;; fd: 4d01d9 add r9, r11 -;; 100: 4439d3 cmp ebx, r10d -;; 103: 4d0f43cc cmovae r9, r12 -;; 107: 4d8b01 mov r8, qword ptr [r9] -;; 10a: 4c89c0 mov rax, r8 -;; 10d: 4d85c0 test r8, r8 -;; 110: 0f8511000000 jne 0x127 -;; 116: 4c89f7 mov rdi, r14 -;; 119: be00000000 mov esi, 0 -;; 11e: 89da mov edx, ebx -;; 120: ffd1 call rcx -;; 122: e904000000 jmp 0x12b -;; 127: 4883e0fe and rax, 0xfffffffffffffffe -;; 12b: 4885c0 test rax, rax -;; 12e: 0f8432000000 je 0x166 -;; 134: 4d8b5e40 mov r11, qword ptr [r14 + 0x40] -;; 138: 418b0b mov ecx, dword ptr [r11] -;; 13b: 8b5018 mov edx, dword ptr [rax + 0x18] -;; 13e: 39d1 cmp ecx, edx -;; 140: 0f8522000000 jne 0x168 -;; 146: 488b4810 mov rcx, qword ptr [rax + 0x10] -;; 14a: 8b3c24 mov edi, dword ptr [rsp] -;; 14d: ffd1 call rcx -;; 14f: 4883c408 add rsp, 8 -;; 153: 59 pop rcx -;; 154: 01c1 add ecx, eax -;; 156: 89c8 mov eax, ecx -;; 158: 4883c410 add rsp, 0x10 -;; 15c: 5d pop rbp -;; 15d: c3 ret -;; 15e: 0f0b ud2 -;; 160: 0f0b ud2 -;; 162: 0f0b ud2 -;; 164: 0f0b ud2 -;; 166: 0f0b ud2 -;; 168: 0f0b ud2 +;; 3b: b900000000 mov ecx, 0 +;; 40: 4c89f2 mov rdx, r14 +;; 43: 8b5a50 mov ebx, dword ptr [rdx + 0x50] +;; 46: 39d9 cmp ecx, ebx +;; 48: 0f8319010000 jae 0x167 +;; 4e: 4189cb mov r11d, ecx +;; 51: 4d6bdb08 imul r11, r11, 8 +;; 55: 488b5248 mov rdx, qword ptr [rdx + 0x48] +;; 59: 4889d6 mov rsi, rdx +;; 5c: 4c01da add rdx, r11 +;; 5f: 39d9 cmp ecx, ebx +;; 61: 480f43d6 cmovae rdx, rsi +;; 65: 488b02 mov rax, qword ptr [rdx] +;; 68: 4885c0 test rax, rax +;; 6b: 0f8528000000 jne 0x99 +;; 71: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 75: 498b5b48 mov rbx, qword ptr [r11 + 0x48] +;; 79: 4156 push r14 +;; 7b: 51 push rcx +;; 7c: 4883ec08 sub rsp, 8 +;; 80: 488b7c2410 mov rdi, qword ptr [rsp + 0x10] +;; 85: be00000000 mov esi, 0 +;; 8a: 8b542408 mov edx, dword ptr [rsp + 8] +;; 8e: ffd3 call rbx +;; 90: 4883c418 add rsp, 0x18 +;; 94: e904000000 jmp 0x9d +;; 99: 4883e0fe and rax, 0xfffffffffffffffe +;; 9d: 4885c0 test rax, rax +;; a0: 0f84c3000000 je 0x169 +;; a6: 4d8b5e40 mov r11, qword ptr [r14 + 0x40] +;; aa: 418b0b mov ecx, dword ptr [r11] +;; ad: 8b5018 mov edx, dword ptr [rax + 0x18] +;; b0: 39d1 cmp ecx, edx +;; b2: 0f85b3000000 jne 0x16b +;; b8: 50 push rax +;; b9: 59 pop rcx +;; ba: 488b5110 mov rdx, qword ptr [rcx + 0x10] +;; be: 4883ec08 sub rsp, 8 +;; c2: 8b7c2408 mov edi, dword ptr [rsp + 8] +;; c6: ffd2 call rdx +;; c8: 4883c410 add rsp, 0x10 +;; cc: 8b4c240c mov ecx, dword ptr [rsp + 0xc] +;; d0: 83e901 sub ecx, 1 +;; d3: 50 push rax +;; d4: 51 push rcx +;; d5: b900000000 mov ecx, 0 +;; da: 4c89f2 mov rdx, r14 +;; dd: 8b5a50 mov ebx, dword ptr [rdx + 0x50] +;; e0: 39d9 cmp ecx, ebx +;; e2: 0f8385000000 jae 0x16d +;; e8: 4189cb mov r11d, ecx +;; eb: 4d6bdb08 imul r11, r11, 8 +;; ef: 488b5248 mov rdx, qword ptr [rdx + 0x48] +;; f3: 4889d6 mov rsi, rdx +;; f6: 4c01da add rdx, r11 +;; f9: 39d9 cmp ecx, ebx +;; fb: 480f43d6 cmovae rdx, rsi +;; ff: 488b02 mov rax, qword ptr [rdx] +;; 102: 4885c0 test rax, rax +;; 105: 0f8523000000 jne 0x12e +;; 10b: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 10f: 498b5b48 mov rbx, qword ptr [r11 + 0x48] +;; 113: 4156 push r14 +;; 115: 51 push rcx +;; 116: 488b7c2408 mov rdi, qword ptr [rsp + 8] +;; 11b: be00000000 mov esi, 0 +;; 120: 8b1424 mov edx, dword ptr [rsp] +;; 123: ffd3 call rbx +;; 125: 4883c410 add rsp, 0x10 +;; 129: e904000000 jmp 0x132 +;; 12e: 4883e0fe and rax, 0xfffffffffffffffe +;; 132: 4885c0 test rax, rax +;; 135: 0f8434000000 je 0x16f +;; 13b: 4d8b5e40 mov r11, qword ptr [r14 + 0x40] +;; 13f: 418b0b mov ecx, dword ptr [r11] +;; 142: 8b5018 mov edx, dword ptr [rax + 0x18] +;; 145: 39d1 cmp ecx, edx +;; 147: 0f8524000000 jne 0x171 +;; 14d: 50 push rax +;; 14e: 59 pop rcx +;; 14f: 488b5110 mov rdx, qword ptr [rcx + 0x10] +;; 153: 8b3c24 mov edi, dword ptr [rsp] +;; 156: ffd2 call rdx +;; 158: 4883c408 add rsp, 8 +;; 15c: 59 pop rcx +;; 15d: 01c1 add ecx, eax +;; 15f: 89c8 mov eax, ecx +;; 161: 4883c410 add rsp, 0x10 +;; 165: 5d pop rbp +;; 166: c3 ret +;; 167: 0f0b ud2 +;; 169: 0f0b ud2 +;; 16b: 0f0b ud2 +;; 16d: 0f0b ud2 +;; 16f: 0f0b ud2 +;; 171: 0f0b ud2 diff --git a/winch/filetests/filetests/x64/f32_ceil/f32_ceil_param.wat b/winch/filetests/filetests/x64/f32_ceil/f32_ceil_param.wat new file mode 100644 index 000000000000..ddc23ace0472 --- /dev/null +++ b/winch/filetests/filetests/x64/f32_ceil/f32_ceil_param.wat @@ -0,0 +1,18 @@ +;;! target = "x86_64" + +(module + (func (param f32) (result f32) + (local.get 0) + (f32.ceil) + ) +) +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec10 sub rsp, 0x10 +;; 8: f30f1144240c movss dword ptr [rsp + 0xc], xmm0 +;; e: 4c89742404 mov qword ptr [rsp + 4], r14 +;; 13: f30f1044240c movss xmm0, dword ptr [rsp + 0xc] +;; 19: e800000000 call 0x1e +;; 1e: 4883c410 add rsp, 0x10 +;; 22: 5d pop rbp +;; 23: c3 ret diff --git a/winch/filetests/filetests/x64/f32_floor/f32_floor_param.wat b/winch/filetests/filetests/x64/f32_floor/f32_floor_param.wat new file mode 100644 index 000000000000..905d073e5a0d --- /dev/null +++ b/winch/filetests/filetests/x64/f32_floor/f32_floor_param.wat @@ -0,0 +1,18 @@ +;;! target = "x86_64" + +(module + (func (param f32) (result f32) + (local.get 0) + (f32.floor) + ) +) +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec10 sub rsp, 0x10 +;; 8: f30f1144240c movss dword ptr [rsp + 0xc], xmm0 +;; e: 4c89742404 mov qword ptr [rsp + 4], r14 +;; 13: f30f1044240c movss xmm0, dword ptr [rsp + 0xc] +;; 19: e800000000 call 0x1e +;; 1e: 4883c410 add rsp, 0x10 +;; 22: 5d pop rbp +;; 23: c3 ret diff --git a/winch/filetests/filetests/x64/f32_nearest/f32_nearest_param.wat b/winch/filetests/filetests/x64/f32_nearest/f32_nearest_param.wat new file mode 100644 index 000000000000..3641f869c46f --- /dev/null +++ b/winch/filetests/filetests/x64/f32_nearest/f32_nearest_param.wat @@ -0,0 +1,18 @@ +;;! target = "x86_64" + +(module + (func (param f32) (result f32) + (local.get 0) + (f32.nearest) + ) +) +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec10 sub rsp, 0x10 +;; 8: f30f1144240c movss dword ptr [rsp + 0xc], xmm0 +;; e: 4c89742404 mov qword ptr [rsp + 4], r14 +;; 13: f30f1044240c movss xmm0, dword ptr [rsp + 0xc] +;; 19: e800000000 call 0x1e +;; 1e: 4883c410 add rsp, 0x10 +;; 22: 5d pop rbp +;; 23: c3 ret diff --git a/winch/filetests/filetests/x64/f32_trunc/f32_trunc_param.wat b/winch/filetests/filetests/x64/f32_trunc/f32_trunc_param.wat new file mode 100644 index 000000000000..58a3680ea836 --- /dev/null +++ b/winch/filetests/filetests/x64/f32_trunc/f32_trunc_param.wat @@ -0,0 +1,18 @@ +;;! target = "x86_64" + +(module + (func (param f32) (result f32) + (local.get 0) + (f32.trunc) + ) +) +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec10 sub rsp, 0x10 +;; 8: f30f1144240c movss dword ptr [rsp + 0xc], xmm0 +;; e: 4c89742404 mov qword ptr [rsp + 4], r14 +;; 13: f30f1044240c movss xmm0, dword ptr [rsp + 0xc] +;; 19: e800000000 call 0x1e +;; 1e: 4883c410 add rsp, 0x10 +;; 22: 5d pop rbp +;; 23: c3 ret diff --git a/winch/filetests/filetests/x64/f64_ceil/f64_ceil_param.wat b/winch/filetests/filetests/x64/f64_ceil/f64_ceil_param.wat new file mode 100644 index 000000000000..c08fae754a1e --- /dev/null +++ b/winch/filetests/filetests/x64/f64_ceil/f64_ceil_param.wat @@ -0,0 +1,18 @@ +;;! target = "x86_64" + +(module + (func (param f64) (result f64) + (local.get 0) + (f64.ceil) + ) +) +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec10 sub rsp, 0x10 +;; 8: f20f11442408 movsd qword ptr [rsp + 8], xmm0 +;; e: 4c893424 mov qword ptr [rsp], r14 +;; 12: f20f10442408 movsd xmm0, qword ptr [rsp + 8] +;; 18: e800000000 call 0x1d +;; 1d: 4883c410 add rsp, 0x10 +;; 21: 5d pop rbp +;; 22: c3 ret diff --git a/winch/filetests/filetests/x64/f64_floor/f64_floor_param.wat b/winch/filetests/filetests/x64/f64_floor/f64_floor_param.wat new file mode 100644 index 000000000000..01b6e9517c0f --- /dev/null +++ b/winch/filetests/filetests/x64/f64_floor/f64_floor_param.wat @@ -0,0 +1,18 @@ +;;! target = "x86_64" + +(module + (func (param f64) (result f64) + (local.get 0) + (f64.floor) + ) +) +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec10 sub rsp, 0x10 +;; 8: f20f11442408 movsd qword ptr [rsp + 8], xmm0 +;; e: 4c893424 mov qword ptr [rsp], r14 +;; 12: f20f10442408 movsd xmm0, qword ptr [rsp + 8] +;; 18: e800000000 call 0x1d +;; 1d: 4883c410 add rsp, 0x10 +;; 21: 5d pop rbp +;; 22: c3 ret diff --git a/winch/filetests/filetests/x64/f64_nearest/f64_nearest_param.wat b/winch/filetests/filetests/x64/f64_nearest/f64_nearest_param.wat new file mode 100644 index 000000000000..7e957c10612b --- /dev/null +++ b/winch/filetests/filetests/x64/f64_nearest/f64_nearest_param.wat @@ -0,0 +1,18 @@ +;;! target = "x86_64" + +(module + (func (param f64) (result f64) + (local.get 0) + (f64.nearest) + ) +) +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec10 sub rsp, 0x10 +;; 8: f20f11442408 movsd qword ptr [rsp + 8], xmm0 +;; e: 4c893424 mov qword ptr [rsp], r14 +;; 12: f20f10442408 movsd xmm0, qword ptr [rsp + 8] +;; 18: e800000000 call 0x1d +;; 1d: 4883c410 add rsp, 0x10 +;; 21: 5d pop rbp +;; 22: c3 ret diff --git a/winch/filetests/filetests/x64/f64_trunc/f64_trunc_param.wat b/winch/filetests/filetests/x64/f64_trunc/f64_trunc_param.wat new file mode 100644 index 000000000000..00d68f5ae80f --- /dev/null +++ b/winch/filetests/filetests/x64/f64_trunc/f64_trunc_param.wat @@ -0,0 +1,18 @@ +;;! target = "x86_64" + +(module + (func (param f64) (result f64) + (local.get 0) + (f64.trunc) + ) +) +;; 0: 55 push rbp +;; 1: 4889e5 mov rbp, rsp +;; 4: 4883ec10 sub rsp, 0x10 +;; 8: f20f11442408 movsd qword ptr [rsp + 8], xmm0 +;; e: 4c893424 mov qword ptr [rsp], r14 +;; 12: f20f10442408 movsd xmm0, qword ptr [rsp + 8] +;; 18: e800000000 call 0x1d +;; 1d: 4883c410 add rsp, 0x10 +;; 21: 5d pop rbp +;; 22: c3 ret diff --git a/winch/filetests/filetests/x64/table/fill.wat b/winch/filetests/filetests/x64/table/fill.wat index 0b7960d823ca..2a61212958ae 100644 --- a/winch/filetests/filetests/x64/table/fill.wat +++ b/winch/filetests/filetests/x64/table/fill.wat @@ -50,50 +50,45 @@ ;; c: 89742418 mov dword ptr [rsp + 0x18], esi ;; 10: 89542414 mov dword ptr [rsp + 0x14], edx ;; 14: 4c89742404 mov qword ptr [rsp + 4], r14 -;; 19: 448b5c2418 mov r11d, dword ptr [rsp + 0x18] -;; 1e: 4153 push r11 -;; 20: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 24: 498b4b48 mov rcx, qword ptr [r11 + 0x48] -;; 28: 5b pop rbx -;; 29: 4d89f1 mov r9, r14 -;; 2c: 458b5150 mov r10d, dword ptr [r9 + 0x50] -;; 30: 4439d3 cmp ebx, r10d -;; 33: 0f8384000000 jae 0xbd -;; 39: 4189db mov r11d, ebx -;; 3c: 4d6bdb08 imul r11, r11, 8 -;; 40: 4d8b4948 mov r9, qword ptr [r9 + 0x48] -;; 44: 4d89cc mov r12, r9 -;; 47: 4d01d9 add r9, r11 -;; 4a: 4439d3 cmp ebx, r10d -;; 4d: 4d0f43cc cmovae r9, r12 -;; 51: 4d8b01 mov r8, qword ptr [r9] -;; 54: 4c89c0 mov rax, r8 -;; 57: 4d85c0 test r8, r8 -;; 5a: 0f8511000000 jne 0x71 -;; 60: 4c89f7 mov rdi, r14 -;; 63: be00000000 mov esi, 0 -;; 68: 89da mov edx, ebx -;; 6a: ffd1 call rcx +;; 19: 8b4c2418 mov ecx, dword ptr [rsp + 0x18] +;; 1d: 4c89f2 mov rdx, r14 +;; 20: 8b5a50 mov ebx, dword ptr [rdx + 0x50] +;; 23: 39d9 cmp ecx, ebx +;; 25: 0f8381000000 jae 0xac +;; 2b: 4189cb mov r11d, ecx +;; 2e: 4d6bdb08 imul r11, r11, 8 +;; 32: 488b5248 mov rdx, qword ptr [rdx + 0x48] +;; 36: 4889d6 mov rsi, rdx +;; 39: 4c01da add rdx, r11 +;; 3c: 39d9 cmp ecx, ebx +;; 3e: 480f43d6 cmovae rdx, rsi +;; 42: 488b02 mov rax, qword ptr [rdx] +;; 45: 4885c0 test rax, rax +;; 48: 0f8523000000 jne 0x71 +;; 4e: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 52: 498b5b48 mov rbx, qword ptr [r11 + 0x48] +;; 56: 4156 push r14 +;; 58: 51 push rcx +;; 59: 488b7c2408 mov rdi, qword ptr [rsp + 8] +;; 5e: be00000000 mov esi, 0 +;; 63: 8b1424 mov edx, dword ptr [rsp] +;; 66: ffd3 call rbx +;; 68: 4883c410 add rsp, 0x10 ;; 6c: e904000000 jmp 0x75 ;; 71: 4883e0fe and rax, 0xfffffffffffffffe ;; 75: 488944240c mov qword ptr [rsp + 0xc], rax -;; 7a: 448b5c241c mov r11d, dword ptr [rsp + 0x1c] -;; 7f: 4153 push r11 -;; 81: 4c8b5c2414 mov r11, qword ptr [rsp + 0x14] -;; 86: 4153 push r11 -;; 88: 448b5c2424 mov r11d, dword ptr [rsp + 0x24] -;; 8d: 4153 push r11 -;; 8f: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 93: 498b4368 mov rax, qword ptr [r11 + 0x68] -;; 97: 4883ec08 sub rsp, 8 -;; 9b: 4c89f7 mov rdi, r14 -;; 9e: be01000000 mov esi, 1 -;; a3: 8b542418 mov edx, dword ptr [rsp + 0x18] -;; a7: 488b4c2410 mov rcx, qword ptr [rsp + 0x10] -;; ac: 448b442408 mov r8d, dword ptr [rsp + 8] -;; b1: ffd0 call rax -;; b3: 4883c420 add rsp, 0x20 -;; b7: 4883c420 add rsp, 0x20 -;; bb: 5d pop rbp -;; bc: c3 ret -;; bd: 0f0b ud2 +;; 7a: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 7e: 498b4368 mov rax, qword ptr [r11 + 0x68] +;; 82: 4156 push r14 +;; 84: 4883ec08 sub rsp, 8 +;; 88: 488b7c2408 mov rdi, qword ptr [rsp + 8] +;; 8d: be01000000 mov esi, 1 +;; 92: 8b54242c mov edx, dword ptr [rsp + 0x2c] +;; 96: 488b4c241c mov rcx, qword ptr [rsp + 0x1c] +;; 9b: 448b442424 mov r8d, dword ptr [rsp + 0x24] +;; a0: ffd0 call rax +;; a2: 4883c410 add rsp, 0x10 +;; a6: 4883c420 add rsp, 0x20 +;; aa: 5d pop rbp +;; ab: c3 ret +;; ac: 0f0b ud2 diff --git a/winch/filetests/filetests/x64/table/get.wat b/winch/filetests/filetests/x64/table/get.wat index 2c28b304d48d..58d6b0fb9dfd 100644 --- a/winch/filetests/filetests/x64/table/get.wat +++ b/winch/filetests/filetests/x64/table/get.wat @@ -22,30 +22,30 @@ ;; 4: 4883ec10 sub rsp, 0x10 ;; 8: 897c240c mov dword ptr [rsp + 0xc], edi ;; c: 4c89742404 mov qword ptr [rsp + 4], r14 -;; 11: 448b5c240c mov r11d, dword ptr [rsp + 0xc] -;; 16: 4153 push r11 -;; 18: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 1c: 498b4b48 mov rcx, qword ptr [r11 + 0x48] -;; 20: 5b pop rbx -;; 21: 4d89f1 mov r9, r14 -;; 24: 458b5150 mov r10d, dword ptr [r9 + 0x50] -;; 28: 4439d3 cmp ebx, r10d -;; 2b: 0f8342000000 jae 0x73 -;; 31: 4189db mov r11d, ebx -;; 34: 4d6bdb08 imul r11, r11, 8 -;; 38: 4d8b4948 mov r9, qword ptr [r9 + 0x48] -;; 3c: 4d89cc mov r12, r9 -;; 3f: 4d01d9 add r9, r11 -;; 42: 4439d3 cmp ebx, r10d -;; 45: 4d0f43cc cmovae r9, r12 -;; 49: 4d8b01 mov r8, qword ptr [r9] -;; 4c: 4c89c0 mov rax, r8 -;; 4f: 4d85c0 test r8, r8 -;; 52: 0f8511000000 jne 0x69 -;; 58: 4c89f7 mov rdi, r14 -;; 5b: be00000000 mov esi, 0 -;; 60: 89da mov edx, ebx -;; 62: ffd1 call rcx +;; 11: 8b4c240c mov ecx, dword ptr [rsp + 0xc] +;; 15: 4c89f2 mov rdx, r14 +;; 18: 8b5a50 mov ebx, dword ptr [rdx + 0x50] +;; 1b: 39d9 cmp ecx, ebx +;; 1d: 0f8350000000 jae 0x73 +;; 23: 4189cb mov r11d, ecx +;; 26: 4d6bdb08 imul r11, r11, 8 +;; 2a: 488b5248 mov rdx, qword ptr [rdx + 0x48] +;; 2e: 4889d6 mov rsi, rdx +;; 31: 4c01da add rdx, r11 +;; 34: 39d9 cmp ecx, ebx +;; 36: 480f43d6 cmovae rdx, rsi +;; 3a: 488b02 mov rax, qword ptr [rdx] +;; 3d: 4885c0 test rax, rax +;; 40: 0f8523000000 jne 0x69 +;; 46: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 4a: 498b5b48 mov rbx, qword ptr [r11 + 0x48] +;; 4e: 4156 push r14 +;; 50: 51 push rcx +;; 51: 488b7c2408 mov rdi, qword ptr [rsp + 8] +;; 56: be00000000 mov esi, 0 +;; 5b: 8b1424 mov edx, dword ptr [rsp] +;; 5e: ffd3 call rbx +;; 60: 4883c410 add rsp, 0x10 ;; 64: e904000000 jmp 0x6d ;; 69: 4883e0fe and rax, 0xfffffffffffffffe ;; 6d: 4883c410 add rsp, 0x10 diff --git a/winch/filetests/filetests/x64/table/grow.wat b/winch/filetests/filetests/x64/table/grow.wat index b5f97c33fc94..918d16747603 100644 --- a/winch/filetests/filetests/x64/table/grow.wat +++ b/winch/filetests/filetests/x64/table/grow.wat @@ -14,17 +14,16 @@ ;; 4: 4883ec10 sub rsp, 0x10 ;; 8: 48897c2408 mov qword ptr [rsp + 8], rdi ;; d: 4c893424 mov qword ptr [rsp], r14 -;; 11: 4c8b5c2408 mov r11, qword ptr [rsp + 8] -;; 16: 4153 push r11 -;; 18: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 1c: 498b5b50 mov rbx, qword ptr [r11 + 0x50] -;; 20: 4883ec08 sub rsp, 8 -;; 24: 4c89f7 mov rdi, r14 -;; 27: be00000000 mov esi, 0 -;; 2c: ba0a000000 mov edx, 0xa -;; 31: 488b4c2408 mov rcx, qword ptr [rsp + 8] -;; 36: ffd3 call rbx -;; 38: 4883c410 add rsp, 0x10 -;; 3c: 4883c410 add rsp, 0x10 -;; 40: 5d pop rbp -;; 41: c3 ret +;; 11: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 15: 498b5b50 mov rbx, qword ptr [r11 + 0x50] +;; 19: 4156 push r14 +;; 1b: 4883ec08 sub rsp, 8 +;; 1f: 488b7c2408 mov rdi, qword ptr [rsp + 8] +;; 24: be00000000 mov esi, 0 +;; 29: ba0a000000 mov edx, 0xa +;; 2e: 488b4c2418 mov rcx, qword ptr [rsp + 0x18] +;; 33: ffd3 call rbx +;; 35: 4883c410 add rsp, 0x10 +;; 39: 4883c410 add rsp, 0x10 +;; 3d: 5d pop rbp +;; 3e: c3 ret diff --git a/winch/filetests/filetests/x64/table/init_copy_drop.wat b/winch/filetests/filetests/x64/table/init_copy_drop.wat index 523f2a9838d9..a5d12df2efc9 100644 --- a/winch/filetests/filetests/x64/table/init_copy_drop.wat +++ b/winch/filetests/filetests/x64/table/init_copy_drop.wat @@ -84,142 +84,144 @@ ;; 8: 4c893424 mov qword ptr [rsp], r14 ;; c: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] ;; 10: 498b4310 mov rax, qword ptr [r11 + 0x10] -;; 14: 4883ec08 sub rsp, 8 -;; 18: 4c89f7 mov rdi, r14 -;; 1b: be00000000 mov esi, 0 -;; 20: ba01000000 mov edx, 1 -;; 25: b907000000 mov ecx, 7 -;; 2a: 41b800000000 mov r8d, 0 -;; 30: 41b904000000 mov r9d, 4 -;; 36: ffd0 call rax -;; 38: 4883c408 add rsp, 8 -;; 3c: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 40: 498b4318 mov rax, qword ptr [r11 + 0x18] -;; 44: 4883ec08 sub rsp, 8 -;; 48: 4c89f7 mov rdi, r14 -;; 4b: be01000000 mov esi, 1 -;; 50: ffd0 call rax -;; 52: 4883c408 add rsp, 8 -;; 56: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 5a: 498b4310 mov rax, qword ptr [r11 + 0x10] -;; 5e: 4883ec08 sub rsp, 8 -;; 62: 4c89f7 mov rdi, r14 -;; 65: be00000000 mov esi, 0 -;; 6a: ba03000000 mov edx, 3 -;; 6f: b90f000000 mov ecx, 0xf -;; 74: 41b801000000 mov r8d, 1 -;; 7a: 41b903000000 mov r9d, 3 -;; 80: ffd0 call rax -;; 82: 4883c408 add rsp, 8 -;; 86: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 8a: 498b4318 mov rax, qword ptr [r11 + 0x18] -;; 8e: 4883ec08 sub rsp, 8 -;; 92: 4c89f7 mov rdi, r14 -;; 95: be03000000 mov esi, 3 -;; 9a: ffd0 call rax -;; 9c: 4883c408 add rsp, 8 -;; a0: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; a4: 498b4308 mov rax, qword ptr [r11 + 8] -;; a8: 4883ec08 sub rsp, 8 -;; ac: 4c89f7 mov rdi, r14 -;; af: be00000000 mov esi, 0 -;; b4: ba00000000 mov edx, 0 -;; b9: b914000000 mov ecx, 0x14 -;; be: 41b80f000000 mov r8d, 0xf -;; c4: 41b905000000 mov r9d, 5 -;; ca: ffd0 call rax -;; cc: 4883c408 add rsp, 8 -;; d0: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; d4: 498b4308 mov rax, qword ptr [r11 + 8] -;; d8: 4883ec08 sub rsp, 8 -;; dc: 4c89f7 mov rdi, r14 -;; df: be00000000 mov esi, 0 -;; e4: ba00000000 mov edx, 0 -;; e9: b915000000 mov ecx, 0x15 -;; ee: 41b81d000000 mov r8d, 0x1d -;; f4: 41b901000000 mov r9d, 1 -;; fa: ffd0 call rax -;; fc: 4883c408 add rsp, 8 -;; 100: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 104: 498b4308 mov rax, qword ptr [r11 + 8] -;; 108: 4883ec08 sub rsp, 8 -;; 10c: 4c89f7 mov rdi, r14 -;; 10f: be00000000 mov esi, 0 -;; 114: ba00000000 mov edx, 0 -;; 119: b918000000 mov ecx, 0x18 -;; 11e: 41b80a000000 mov r8d, 0xa -;; 124: 41b901000000 mov r9d, 1 -;; 12a: ffd0 call rax -;; 12c: 4883c408 add rsp, 8 -;; 130: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 134: 498b4308 mov rax, qword ptr [r11 + 8] -;; 138: 4883ec08 sub rsp, 8 -;; 13c: 4c89f7 mov rdi, r14 -;; 13f: be00000000 mov esi, 0 -;; 144: ba00000000 mov edx, 0 -;; 149: b90d000000 mov ecx, 0xd -;; 14e: 41b80b000000 mov r8d, 0xb -;; 154: 41b904000000 mov r9d, 4 -;; 15a: ffd0 call rax -;; 15c: 4883c408 add rsp, 8 -;; 160: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 164: 498b4308 mov rax, qword ptr [r11 + 8] -;; 168: 4883ec08 sub rsp, 8 -;; 16c: 4c89f7 mov rdi, r14 -;; 16f: be00000000 mov esi, 0 -;; 174: ba00000000 mov edx, 0 -;; 179: b913000000 mov ecx, 0x13 -;; 17e: 41b814000000 mov r8d, 0x14 -;; 184: 41b905000000 mov r9d, 5 -;; 18a: ffd0 call rax -;; 18c: 4883c408 add rsp, 8 -;; 190: 4883c408 add rsp, 8 -;; 194: 5d pop rbp -;; 195: c3 ret +;; 14: 4156 push r14 +;; 16: 488b3c24 mov rdi, qword ptr [rsp] +;; 1a: be00000000 mov esi, 0 +;; 1f: ba01000000 mov edx, 1 +;; 24: b907000000 mov ecx, 7 +;; 29: 41b800000000 mov r8d, 0 +;; 2f: 41b904000000 mov r9d, 4 +;; 35: ffd0 call rax +;; 37: 4883c408 add rsp, 8 +;; 3b: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 3f: 498b4318 mov rax, qword ptr [r11 + 0x18] +;; 43: 4156 push r14 +;; 45: 488b3c24 mov rdi, qword ptr [rsp] +;; 49: be01000000 mov esi, 1 +;; 4e: ffd0 call rax +;; 50: 4883c408 add rsp, 8 +;; 54: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 58: 498b4310 mov rax, qword ptr [r11 + 0x10] +;; 5c: 4156 push r14 +;; 5e: 488b3c24 mov rdi, qword ptr [rsp] +;; 62: be00000000 mov esi, 0 +;; 67: ba03000000 mov edx, 3 +;; 6c: b90f000000 mov ecx, 0xf +;; 71: 41b801000000 mov r8d, 1 +;; 77: 41b903000000 mov r9d, 3 +;; 7d: ffd0 call rax +;; 7f: 4883c408 add rsp, 8 +;; 83: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 87: 498b4318 mov rax, qword ptr [r11 + 0x18] +;; 8b: 4156 push r14 +;; 8d: 488b3c24 mov rdi, qword ptr [rsp] +;; 91: be03000000 mov esi, 3 +;; 96: ffd0 call rax +;; 98: 4883c408 add rsp, 8 +;; 9c: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; a0: 498b4308 mov rax, qword ptr [r11 + 8] +;; a4: 4156 push r14 +;; a6: 488b3c24 mov rdi, qword ptr [rsp] +;; aa: be00000000 mov esi, 0 +;; af: ba00000000 mov edx, 0 +;; b4: b914000000 mov ecx, 0x14 +;; b9: 41b80f000000 mov r8d, 0xf +;; bf: 41b905000000 mov r9d, 5 +;; c5: ffd0 call rax +;; c7: 4883c408 add rsp, 8 +;; cb: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; cf: 498b4308 mov rax, qword ptr [r11 + 8] +;; d3: 4156 push r14 +;; d5: 488b3c24 mov rdi, qword ptr [rsp] +;; d9: be00000000 mov esi, 0 +;; de: ba00000000 mov edx, 0 +;; e3: b915000000 mov ecx, 0x15 +;; e8: 41b81d000000 mov r8d, 0x1d +;; ee: 41b901000000 mov r9d, 1 +;; f4: ffd0 call rax +;; f6: 4883c408 add rsp, 8 +;; fa: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; fe: 498b4308 mov rax, qword ptr [r11 + 8] +;; 102: 4156 push r14 +;; 104: 488b3c24 mov rdi, qword ptr [rsp] +;; 108: be00000000 mov esi, 0 +;; 10d: ba00000000 mov edx, 0 +;; 112: b918000000 mov ecx, 0x18 +;; 117: 41b80a000000 mov r8d, 0xa +;; 11d: 41b901000000 mov r9d, 1 +;; 123: ffd0 call rax +;; 125: 4883c408 add rsp, 8 +;; 129: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 12d: 498b4308 mov rax, qword ptr [r11 + 8] +;; 131: 4156 push r14 +;; 133: 488b3c24 mov rdi, qword ptr [rsp] +;; 137: be00000000 mov esi, 0 +;; 13c: ba00000000 mov edx, 0 +;; 141: b90d000000 mov ecx, 0xd +;; 146: 41b80b000000 mov r8d, 0xb +;; 14c: 41b904000000 mov r9d, 4 +;; 152: ffd0 call rax +;; 154: 4883c408 add rsp, 8 +;; 158: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 15c: 498b4308 mov rax, qword ptr [r11 + 8] +;; 160: 4156 push r14 +;; 162: 488b3c24 mov rdi, qword ptr [rsp] +;; 166: be00000000 mov esi, 0 +;; 16b: ba00000000 mov edx, 0 +;; 170: b913000000 mov ecx, 0x13 +;; 175: 41b814000000 mov r8d, 0x14 +;; 17b: 41b905000000 mov r9d, 5 +;; 181: ffd0 call rax +;; 183: 4883c408 add rsp, 8 +;; 187: 4883c408 add rsp, 8 +;; 18b: 5d pop rbp +;; 18c: c3 ret ;; ;; 0: 55 push rbp ;; 1: 4889e5 mov rbp, rsp ;; 4: 4883ec10 sub rsp, 0x10 ;; 8: 897c240c mov dword ptr [rsp + 0xc], edi ;; c: 4c89742404 mov qword ptr [rsp + 4], r14 -;; 11: 448b5c240c mov r11d, dword ptr [rsp + 0xc] -;; 16: 4153 push r11 -;; 18: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 1c: 498b4b48 mov rcx, qword ptr [r11 + 0x48] -;; 20: 5b pop rbx -;; 21: 4d89f1 mov r9, r14 -;; 24: 458b91f0000000 mov r10d, dword ptr [r9 + 0xf0] -;; 2b: 4439d3 cmp ebx, r10d -;; 2e: 0f8366000000 jae 0x9a -;; 34: 4189db mov r11d, ebx -;; 37: 4d6bdb08 imul r11, r11, 8 -;; 3b: 4d8b89e8000000 mov r9, qword ptr [r9 + 0xe8] -;; 42: 4d89cc mov r12, r9 -;; 45: 4d01d9 add r9, r11 -;; 48: 4439d3 cmp ebx, r10d -;; 4b: 4d0f43cc cmovae r9, r12 -;; 4f: 4d8b01 mov r8, qword ptr [r9] -;; 52: 4c89c0 mov rax, r8 -;; 55: 4d85c0 test r8, r8 -;; 58: 0f8511000000 jne 0x6f -;; 5e: 4c89f7 mov rdi, r14 -;; 61: be00000000 mov esi, 0 -;; 66: 89da mov edx, ebx -;; 68: ffd1 call rcx +;; 11: 8b4c240c mov ecx, dword ptr [rsp + 0xc] +;; 15: 4c89f2 mov rdx, r14 +;; 18: 8b9af0000000 mov ebx, dword ptr [rdx + 0xf0] +;; 1e: 39d9 cmp ecx, ebx +;; 20: 0f8376000000 jae 0x9c +;; 26: 4189cb mov r11d, ecx +;; 29: 4d6bdb08 imul r11, r11, 8 +;; 2d: 488b92e8000000 mov rdx, qword ptr [rdx + 0xe8] +;; 34: 4889d6 mov rsi, rdx +;; 37: 4c01da add rdx, r11 +;; 3a: 39d9 cmp ecx, ebx +;; 3c: 480f43d6 cmovae rdx, rsi +;; 40: 488b02 mov rax, qword ptr [rdx] +;; 43: 4885c0 test rax, rax +;; 46: 0f8523000000 jne 0x6f +;; 4c: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 50: 498b5b48 mov rbx, qword ptr [r11 + 0x48] +;; 54: 4156 push r14 +;; 56: 51 push rcx +;; 57: 488b7c2408 mov rdi, qword ptr [rsp + 8] +;; 5c: be00000000 mov esi, 0 +;; 61: 8b1424 mov edx, dword ptr [rsp] +;; 64: ffd3 call rbx +;; 66: 4883c410 add rsp, 0x10 ;; 6a: e904000000 jmp 0x73 ;; 6f: 4883e0fe and rax, 0xfffffffffffffffe ;; 73: 4885c0 test rax, rax -;; 76: 0f8420000000 je 0x9c +;; 76: 0f8422000000 je 0x9e ;; 7c: 4d8b5e40 mov r11, qword ptr [r14 + 0x40] ;; 80: 418b0b mov ecx, dword ptr [r11] ;; 83: 8b5018 mov edx, dword ptr [rax + 0x18] ;; 86: 39d1 cmp ecx, edx -;; 88: 0f8510000000 jne 0x9e -;; 8e: 488b4810 mov rcx, qword ptr [rax + 0x10] -;; 92: ffd1 call rcx -;; 94: 4883c410 add rsp, 0x10 -;; 98: 5d pop rbp -;; 99: c3 ret -;; 9a: 0f0b ud2 +;; 88: 0f8512000000 jne 0xa0 +;; 8e: 50 push rax +;; 8f: 59 pop rcx +;; 90: 488b5110 mov rdx, qword ptr [rcx + 0x10] +;; 94: ffd2 call rdx +;; 96: 4883c410 add rsp, 0x10 +;; 9a: 5d pop rbp +;; 9b: c3 ret ;; 9c: 0f0b ud2 ;; 9e: 0f0b ud2 +;; a0: 0f0b ud2 diff --git a/winch/filetests/filetests/x64/table/set.wat b/winch/filetests/filetests/x64/table/set.wat index 55c325fcb011..b3cfb2b0f56e 100644 --- a/winch/filetests/filetests/x64/table/set.wat +++ b/winch/filetests/filetests/x64/table/set.wat @@ -54,52 +54,48 @@ ;; 8: 897c240c mov dword ptr [rsp + 0xc], edi ;; c: 89742408 mov dword ptr [rsp + 8], esi ;; 10: 4c893424 mov qword ptr [rsp], r14 -;; 14: 448b5c240c mov r11d, dword ptr [rsp + 0xc] -;; 19: 4153 push r11 -;; 1b: 448b5c2410 mov r11d, dword ptr [rsp + 0x10] -;; 20: 4153 push r11 -;; 22: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] -;; 26: 498b4b48 mov rcx, qword ptr [r11 + 0x48] -;; 2a: 5b pop rbx -;; 2b: 4d89f1 mov r9, r14 -;; 2e: 458b5150 mov r10d, dword ptr [r9 + 0x50] -;; 32: 4439d3 cmp ebx, r10d -;; 35: 0f8377000000 jae 0xb2 -;; 3b: 4189db mov r11d, ebx -;; 3e: 4d6bdb08 imul r11, r11, 8 -;; 42: 4d8b4948 mov r9, qword ptr [r9 + 0x48] -;; 46: 4d89cc mov r12, r9 -;; 49: 4d01d9 add r9, r11 -;; 4c: 4439d3 cmp ebx, r10d -;; 4f: 4d0f43cc cmovae r9, r12 -;; 53: 4d8b01 mov r8, qword ptr [r9] -;; 56: 4c89c0 mov rax, r8 -;; 59: 4d85c0 test r8, r8 -;; 5c: 0f8519000000 jne 0x7b -;; 62: 4883ec08 sub rsp, 8 -;; 66: 4c89f7 mov rdi, r14 -;; 69: be00000000 mov esi, 0 -;; 6e: 89da mov edx, ebx -;; 70: ffd1 call rcx -;; 72: 4883c408 add rsp, 8 -;; 76: e904000000 jmp 0x7f -;; 7b: 4883e0fe and rax, 0xfffffffffffffffe -;; 7f: 59 pop rcx -;; 80: 4c89f2 mov rdx, r14 -;; 83: 8b5a50 mov ebx, dword ptr [rdx + 0x50] -;; 86: 39d9 cmp ecx, ebx -;; 88: 0f8326000000 jae 0xb4 -;; 8e: 4189cb mov r11d, ecx -;; 91: 4d6bdb08 imul r11, r11, 8 -;; 95: 488b5248 mov rdx, qword ptr [rdx + 0x48] -;; 99: 4889d6 mov rsi, rdx -;; 9c: 4c01da add rdx, r11 -;; 9f: 39d9 cmp ecx, ebx -;; a1: 480f43d6 cmovae rdx, rsi -;; a5: 4883c801 or rax, 1 -;; a9: 488902 mov qword ptr [rdx], rax -;; ac: 4883c410 add rsp, 0x10 -;; b0: 5d pop rbp -;; b1: c3 ret -;; b2: 0f0b ud2 -;; b4: 0f0b ud2 +;; 14: 8b4c2408 mov ecx, dword ptr [rsp + 8] +;; 18: 4c89f2 mov rdx, r14 +;; 1b: 8b5a50 mov ebx, dword ptr [rdx + 0x50] +;; 1e: 39d9 cmp ecx, ebx +;; 20: 0f8380000000 jae 0xa6 +;; 26: 4189cb mov r11d, ecx +;; 29: 4d6bdb08 imul r11, r11, 8 +;; 2d: 488b5248 mov rdx, qword ptr [rdx + 0x48] +;; 31: 4889d6 mov rsi, rdx +;; 34: 4c01da add rdx, r11 +;; 37: 39d9 cmp ecx, ebx +;; 39: 480f43d6 cmovae rdx, rsi +;; 3d: 488b02 mov rax, qword ptr [rdx] +;; 40: 4885c0 test rax, rax +;; 43: 0f8523000000 jne 0x6c +;; 49: 4d8b5e38 mov r11, qword ptr [r14 + 0x38] +;; 4d: 498b5b48 mov rbx, qword ptr [r11 + 0x48] +;; 51: 4156 push r14 +;; 53: 51 push rcx +;; 54: 488b7c2408 mov rdi, qword ptr [rsp + 8] +;; 59: be00000000 mov esi, 0 +;; 5e: 8b1424 mov edx, dword ptr [rsp] +;; 61: ffd3 call rbx +;; 63: 4883c410 add rsp, 0x10 +;; 67: e904000000 jmp 0x70 +;; 6c: 4883e0fe and rax, 0xfffffffffffffffe +;; 70: 8b4c240c mov ecx, dword ptr [rsp + 0xc] +;; 74: 4c89f2 mov rdx, r14 +;; 77: 8b5a50 mov ebx, dword ptr [rdx + 0x50] +;; 7a: 39d9 cmp ecx, ebx +;; 7c: 0f8326000000 jae 0xa8 +;; 82: 4189cb mov r11d, ecx +;; 85: 4d6bdb08 imul r11, r11, 8 +;; 89: 488b5248 mov rdx, qword ptr [rdx + 0x48] +;; 8d: 4889d6 mov rsi, rdx +;; 90: 4c01da add rdx, r11 +;; 93: 39d9 cmp ecx, ebx +;; 95: 480f43d6 cmovae rdx, rsi +;; 99: 4883c801 or rax, 1 +;; 9d: 488902 mov qword ptr [rdx], rax +;; a0: 4883c410 add rsp, 0x10 +;; a4: 5d pop rbp +;; a5: c3 ret +;; a6: 0f0b ud2 +;; a8: 0f0b ud2 diff --git a/winch/filetests/src/lib.rs b/winch/filetests/src/lib.rs index 6b435b701a8f..aabcab240837 100644 --- a/winch/filetests/src/lib.rs +++ b/winch/filetests/src/lib.rs @@ -13,8 +13,9 @@ mod test { use wasmtime_environ::{ wasmparser::{Parser as WasmParser, Validator}, DefinedFuncIndex, FunctionBodyData, ModuleEnvironment, ModuleTypes, Tunables, TypeConvert, + VMOffsets, }; - use winch_codegen::{lookup, TargetIsa}; + use winch_codegen::{lookup, BuiltinFunctions, TargetIsa}; use winch_test_macros::generate_file_tests; #[derive(Clone, Debug, Serialize, Deserialize)] @@ -158,10 +159,19 @@ mod test { let sig = types[types.function_at(index.as_u32())].unwrap_func(); let sig = translation.module.convert_func_type(&sig); + let vmoffsets = VMOffsets::new(isa.pointer_bytes(), &translation.module); + let mut builtins = BuiltinFunctions::new(&vmoffsets, isa.wasmtime_call_conv()); let FunctionBodyData { body, validator } = f.1; let mut validator = validator.into_validator(Default::default()); let buffer = isa - .compile_function(&sig, module_types, &body, &translation, &mut validator) + .compile_function( + &sig, + &body, + translation, + module_types, + &mut builtins, + &mut validator, + ) .expect("Couldn't compile function"); disasm(buffer.data(), isa).unwrap() diff --git a/winch/src/compile.rs b/winch/src/compile.rs index 2d8b929022d6..ebf6061080af 100644 --- a/winch/src/compile.rs +++ b/winch/src/compile.rs @@ -6,9 +6,9 @@ use target_lexicon::Triple; use wasmtime_environ::{ wasmparser::{Parser as WasmParser, Validator}, DefinedFuncIndex, FunctionBodyData, ModuleEnvironment, ModuleTranslation, ModuleTypes, - Tunables, TypeConvert, + Tunables, TypeConvert, VMOffsets, }; -use winch_codegen::{lookup, TargetIsa}; +use winch_codegen::{lookup, BuiltinFunctions, TargetIsa}; use winch_filetests::disasm::disasm; #[derive(Parser, Debug)] @@ -57,9 +57,18 @@ fn compile( let sig = types[types.function_at(index.as_u32())].unwrap_func(); let sig = translation.module.convert_func_type(sig); let FunctionBodyData { body, validator } = f.1; + let vmoffsets = VMOffsets::new(isa.pointer_bytes(), &translation.module); + let mut builtins = BuiltinFunctions::new(&vmoffsets, isa.wasmtime_call_conv()); let mut validator = validator.into_validator(Default::default()); let buffer = isa - .compile_function(&sig, module_types, &body, &translation, &mut validator) + .compile_function( + &sig, + &body, + translation, + module_types, + &mut builtins, + &mut validator, + ) .expect("Couldn't compile function"); println!("Disassembly for function: {}", index.as_u32()); From 9d3d759478892ec83293d8ead6f41aab992ed5d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C3=BAl=20Cabrera?= Date: Thu, 12 Oct 2023 17:11:49 -0400 Subject: [PATCH 2/4] Review comments --- winch/codegen/src/abi/mod.rs | 4 +-- winch/codegen/src/codegen/builtin.rs | 8 +++--- winch/codegen/src/codegen/call.rs | 31 ++++++----------------- winch/codegen/src/codegen/env.rs | 37 +++++----------------------- winch/codegen/src/codegen/mod.rs | 8 +++--- 5 files changed, 24 insertions(+), 64 deletions(-) diff --git a/winch/codegen/src/abi/mod.rs b/winch/codegen/src/abi/mod.rs index d5ec3ec84924..a8c843113e6c 100644 --- a/winch/codegen/src/abi/mod.rs +++ b/winch/codegen/src/abi/mod.rs @@ -111,7 +111,7 @@ pub(crate) trait ABI { } /// ABI-specific representation of a function argument. -#[derive(Debug)] +#[derive(Clone, Debug)] pub enum ABIArg { /// A register argument. Reg { @@ -230,7 +230,7 @@ impl ABIResult { pub(crate) type ABIParams = SmallVec<[ABIArg; 6]>; /// An ABI-specific representation of a function signature. -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) struct ABISig { /// Function parameters. pub params: ABIParams, diff --git a/winch/codegen/src/codegen/builtin.rs b/winch/codegen/src/codegen/builtin.rs index 9824a9dbd224..2293afc615fa 100644 --- a/winch/codegen/src/codegen/builtin.rs +++ b/winch/codegen/src/codegen/builtin.rs @@ -13,8 +13,8 @@ use wasmtime_environ::{BuiltinFunctionIndex, PtrSize, VMOffsets, WasmType}; pub(crate) enum BuiltinType { /// Dynamic built-in function, derived from the VMContext. Dynamic { - /// The offset of the built-in function. - offset: u32, + /// The index of the built-in function. + index: u32, /// The built-in function base, relative to the VMContext. base: u32, }, @@ -25,8 +25,8 @@ pub(crate) enum BuiltinType { impl BuiltinType { /// Create a new dynamic built-in function type. - pub fn dynamic(offset: u32, base: u32) -> Self { - Self::Dynamic { offset, base } + pub fn dynamic(index: u32, base: u32) -> Self { + Self::Dynamic { index, base } } /// Create a new known built-in function type. diff --git a/winch/codegen/src/codegen/call.rs b/winch/codegen/src/codegen/call.rs index d612c3391b8f..977b4757e901 100644 --- a/winch/codegen/src/codegen/call.rs +++ b/winch/codegen/src/codegen/call.rs @@ -68,28 +68,13 @@ use crate::{ CallingConvention, }; use smallvec::SmallVec; +use std::borrow::Cow; use wasmtime_environ::{PtrSize, VMOffsets, WasmType}; /// All the information needed to emit a function call. #[derive(Copy, Clone)] pub(crate) struct FnCall {} -/// Internal wrapping of a function signature. -enum Sig<'a> { - Owned(ABISig), - Borrowed(&'a ABISig), -} - -impl<'a> Sig<'a> { - /// Get a reference to the underling signature. - pub fn as_ref(&self) -> &ABISig { - match self { - Self::Owned(ref s) => s, - Self::Borrowed(b) => b, - } - } -} - impl FnCall { /// Orchestrates the emission of a function call: /// 1. Resolves the [`Callee`] through the given callback. @@ -132,25 +117,25 @@ impl FnCall { } /// Derive the [`ABISig`] for a particulare [`Callee]. - fn get_sig(callee: &Callee, ptr_type: WasmType) -> Sig { + fn get_sig(callee: &Callee, ptr_type: WasmType) -> Cow<'_, ABISig> { match callee { - Callee::Builtin(info) => Sig::Borrowed(info.sig()), + Callee::Builtin(info) => Cow::Borrowed(info.sig()), Callee::Import(info) => { let mut params: SmallVec<[WasmType; 6]> = SmallVec::with_capacity(info.ty.params().len() + 2); params.extend_from_slice(&[ptr_type, ptr_type]); params.extend_from_slice(info.ty.params()); - Sig::Owned(::sig_from( + Cow::Owned(::sig_from( ¶ms, info.ty.returns(), &CallingConvention::Default, )) } Callee::Local(info) => { - Sig::Owned(::sig(&info.ty, &CallingConvention::Default)) + Cow::Owned(::sig(&info.ty, &CallingConvention::Default)) } Callee::FuncRef(ty) => { - Sig::Owned(::sig(&ty, &CallingConvention::Default)) + Cow::Owned(::sig(&ty, &CallingConvention::Default)) } } } @@ -178,13 +163,13 @@ impl FnCall { masm: &mut M, ) -> CalleeKind { match builtin.ty() { - BuiltinType::Dynamic { offset, base } => { + BuiltinType::Dynamic { index, base } => { let sig = builtin.sig(); let callee = context.without::(&sig.regs, masm, |cx, masm| { let scratch = ::scratch_reg(); let builtins_base = masm.address_at_vmctx(base); masm.load_ptr(builtins_base, scratch); - let addr = masm.address_at_reg(scratch, offset); + let addr = masm.address_at_reg(scratch, index); let callee = cx.any_gpr(masm); masm.load_ptr(addr, callee); callee diff --git a/winch/codegen/src/codegen/env.rs b/winch/codegen/src/codegen/env.rs index 5b3c9575f854..9d5f6e10e868 100644 --- a/winch/codegen/src/codegen/env.rs +++ b/winch/codegen/src/codegen/env.rs @@ -41,31 +41,6 @@ pub enum Callee { Builtin(BuiltinFunction), } -impl Callee { - /// Get the built-in function metadata. - /// - /// # Panics - /// This function panics if the [`Callee`] is not a built-in function. - pub fn get_builtin(&self) -> &BuiltinFunction { - match self { - Self::Builtin(f) => f, - _ => panic!(), - } - } - - /// Get the associated [`CalleeInfo`], if any. - /// - /// # Panics - /// This function panics if the [`Callee`] is not a local or imported - /// callee. - pub fn get_info(&self) -> &CalleeInfo { - match self { - Self::Local(i) | Self::Import(i) => i, - _ => panic!(), - } - } -} - /// Metadata about a function callee. Used by the code generation to /// emit function calls to local or imported functions. #[derive(Clone)] @@ -80,13 +55,13 @@ pub struct CalleeInfo { /// /// Contains all information about the module and runtime that is accessible to /// to a particular function during code generation. -pub struct FuncEnv<'a, 'b: 'a, 'c: 'b, P: PtrSize> { +pub struct FuncEnv<'a, 'translation: 'a, 'data: 'translation, P: PtrSize> { /// Offsets to the fields within the `VMContext` ptr. pub vmoffsets: &'a VMOffsets

, /// Metadata about the translation process of a WebAssembly module. - pub translation: &'b ModuleTranslation<'c>, + pub translation: &'translation ModuleTranslation<'data>, /// The module's function types. - pub types: &'b ModuleTypes, + pub types: &'translation ModuleTypes, /// Track resolved table information. resolved_tables: HashMap, } @@ -97,12 +72,12 @@ pub fn ptr_type_from_ptr_size(size: u8) -> WasmType { .unwrap_or_else(|| unimplemented!("Support for non-64-bit architectures")) } -impl<'a, 'b, 'c, P: PtrSize> FuncEnv<'a, 'b, 'c, P> { +impl<'a, 'translation, 'data, P: PtrSize> FuncEnv<'a, 'translation, 'data, P> { /// Create a new function environment. pub fn new( vmoffsets: &'a VMOffsets

, - translation: &'b ModuleTranslation<'c>, - types: &'b ModuleTypes, + translation: &'translation ModuleTranslation<'data>, + types: &'translation ModuleTypes, ) -> Self { Self { vmoffsets, diff --git a/winch/codegen/src/codegen/mod.rs b/winch/codegen/src/codegen/mod.rs index a96daccf46f7..51cc94b228b0 100644 --- a/winch/codegen/src/codegen/mod.rs +++ b/winch/codegen/src/codegen/mod.rs @@ -21,7 +21,7 @@ mod builtin; pub use builtin::*; /// The code generation abstraction. -pub(crate) struct CodeGen<'a, 'b: 'a, 'c: 'b, M> +pub(crate) struct CodeGen<'a, 'translation: 'a, 'data: 'translation, M> where M: MacroAssembler, { @@ -29,10 +29,10 @@ where sig: ABISig, /// The code generation context. - pub context: CodeGenContext<'a, 'b>, + pub context: CodeGenContext<'a, 'translation>, /// A reference to the function compilation environment. - pub env: FuncEnv<'a, 'b, 'c, M::Ptr>, + pub env: FuncEnv<'a, 'translation, 'data, M::Ptr>, /// The MacroAssembler. pub masm: &'a mut M, @@ -371,7 +371,7 @@ where let (defined, cont) = (self.masm.get_label(), self.masm.get_label()); - // Push the built-int arguments to the stack. + // Push the built-in arguments to the stack. self.context.stack.extend( [ TypedReg::new(ptr_type, ::vmctx_reg()).into(), From 3d38f9b086199959ee20356a29c255521d154d03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C3=BAl=20Cabrera?= Date: Thu, 12 Oct 2023 18:08:46 -0400 Subject: [PATCH 3/4] Remove unnecessary `into_iter` --- winch/codegen/src/codegen/mod.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/winch/codegen/src/codegen/mod.rs b/winch/codegen/src/codegen/mod.rs index 51cc94b228b0..8ba899c00995 100644 --- a/winch/codegen/src/codegen/mod.rs +++ b/winch/codegen/src/codegen/mod.rs @@ -372,14 +372,11 @@ where let (defined, cont) = (self.masm.get_label(), self.masm.get_label()); // Push the built-in arguments to the stack. - self.context.stack.extend( - [ - TypedReg::new(ptr_type, ::vmctx_reg()).into(), - table_index.as_u32().try_into().unwrap(), - index.into(), - ] - .into_iter(), - ); + self.context.stack.extend([ + TypedReg::new(ptr_type, ::vmctx_reg()).into(), + table_index.as_u32().try_into().unwrap(), + index.into(), + ]); self.masm.branch( CmpKind::Ne, From 216ffc335535817f034ada035d8cc44774b52117 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C3=BAl=20Cabrera?= Date: Fri, 13 Oct 2023 10:10:29 -0400 Subject: [PATCH 4/4] Fix remaining lifetime parameter names --- winch/codegen/src/codegen/context.rs | 8 ++++---- winch/codegen/src/codegen/mod.rs | 6 +++--- winch/codegen/src/visitor.rs | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/winch/codegen/src/codegen/context.rs b/winch/codegen/src/codegen/context.rs index aeeb8cb42ff6..a5c7922c9d4d 100644 --- a/winch/codegen/src/codegen/context.rs +++ b/winch/codegen/src/codegen/context.rs @@ -28,7 +28,7 @@ use std::ops::RangeBounds; /// generation process. The code generation context should /// be generally used as the single entry point to access /// the compound functionality provided by its elements. -pub(crate) struct CodeGenContext<'a, 'b: 'a> { +pub(crate) struct CodeGenContext<'a, 'builtins: 'a> { /// The register allocator. pub regalloc: RegAlloc, /// The value stack. @@ -38,18 +38,18 @@ pub(crate) struct CodeGenContext<'a, 'b: 'a> { /// Reachability state. pub reachable: bool, /// The built-in functions available to the JIT code. - pub builtins: &'b mut BuiltinFunctions, + pub builtins: &'builtins mut BuiltinFunctions, /// A reference to the VMOffsets. pub vmoffsets: &'a VMOffsets, } -impl<'a, 'b> CodeGenContext<'a, 'b> { +impl<'a, 'builtins> CodeGenContext<'a, 'builtins> { /// Create a new code generation context. pub fn new( regalloc: RegAlloc, stack: Stack, frame: Frame, - builtins: &'b mut BuiltinFunctions, + builtins: &'builtins mut BuiltinFunctions, vmoffsets: &'a VMOffsets, ) -> Self { Self { diff --git a/winch/codegen/src/codegen/mod.rs b/winch/codegen/src/codegen/mod.rs index 8ba899c00995..93f776ae754d 100644 --- a/winch/codegen/src/codegen/mod.rs +++ b/winch/codegen/src/codegen/mod.rs @@ -43,14 +43,14 @@ where pub control_frames: SmallVec<[ControlStackFrame; 64]>, } -impl<'a, 'b, 'c, M> CodeGen<'a, 'b, 'c, M> +impl<'a, 'translation, 'data, M> CodeGen<'a, 'translation, 'data, M> where M: MacroAssembler, { pub fn new( masm: &'a mut M, - context: CodeGenContext<'a, 'b>, - env: FuncEnv<'a, 'b, 'c, M::Ptr>, + context: CodeGenContext<'a, 'translation>, + env: FuncEnv<'a, 'translation, 'data, M::Ptr>, sig: ABISig, ) -> Self { Self { diff --git a/winch/codegen/src/visitor.rs b/winch/codegen/src/visitor.rs index 287059cc8d5e..b5e0db9940ba 100644 --- a/winch/codegen/src/visitor.rs +++ b/winch/codegen/src/visitor.rs @@ -146,7 +146,7 @@ macro_rules! def_unsupported { (emit $unsupported:tt $($rest:tt)*) => {$($rest)*}; } -impl<'a, 'b, 'c, M> VisitOperator<'a> for CodeGen<'a, 'b, 'c, M> +impl<'a, 'translation, 'data, M> VisitOperator<'a> for CodeGen<'a, 'translation, 'data, M> where M: MacroAssembler, {