Skip to content

Commit

Permalink
Don't copy VMBuiltinFunctionsArray into each VMContext (#3741)
Browse files Browse the repository at this point in the history
* Don't copy `VMBuiltinFunctionsArray` into each `VMContext`

This is another PR along the lines of "let's squeeze all possible
performance we can out of instantiation". Before this PR we would copy,
by value, the contents of `VMBuiltinFunctionsArray` into each
`VMContext` allocated. This array of function pointers is modestly-sized
but growing over time as we add various intrinsics. Additionally it's
the exact same for all `VMContext` allocations.

This PR attempts to speed up instantiation slightly by instead storing
an indirection to the function array. This means that calling a builtin
intrinsic is a tad bit slower since it requires two loads instead of one
(one to get the base pointer, another to get the actual address).
Otherwise though `VMContext` initialization is now simply setting one
pointer instead of doing a `memcpy` from one location to another.

With some macro-magic this commit also replaces the previous
implementation with one that's more `const`-friendly which also gets us
compile-time type-checks of libcalls as well as compile-time
verification that all libcalls are defined.

Overall, as with #3739, the win is very modest here. Locally I measured
a speedup from 1.9us to 1.7us taken to instantiate an empty module with
one function. While small at these scales it's still a 10% improvement!

* Review comments
  • Loading branch information
alexcrichton authored Jan 28, 2022
1 parent 2f49424 commit a25f7bd
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 110 deletions.
11 changes: 8 additions & 3 deletions crates/cranelift/src/func_environ.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,10 +281,15 @@ impl<'module_environment> FuncEnvironment<'module_environment> {
let mut mem_flags = ir::MemFlags::trusted();
mem_flags.set_readonly();

// Load the base of the array of builtin functions
let array_offset = i32::try_from(self.offsets.vmctx_builtin_functions()).unwrap();
let array_addr = pos.ins().load(pointer_type, mem_flags, base, array_offset);

// Load the callee address.
let body_offset =
i32::try_from(self.offsets.vmctx_builtin_function(callee_func_idx)).unwrap();
let func_addr = pos.ins().load(pointer_type, mem_flags, base, body_offset);
let body_offset = i32::try_from(callee_func_idx.index() * pointer_type.bytes()).unwrap();
let func_addr = pos
.ins()
.load(pointer_type, mem_flags, array_addr, body_offset);

(base, func_addr)
}
Expand Down
20 changes: 5 additions & 15 deletions crates/environ/src/vmoffsets.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
// memories: [VMMemoryDefinition; module.num_defined_memories],
// globals: [VMGlobalDefinition; module.num_defined_globals],
// anyfuncs: [VMCallerCheckedAnyfunc; module.num_imported_functions + module.num_defined_functions],
// builtins: VMBuiltinFunctionsArray,
// builtins: *mut VMBuiltinFunctionsArray,
// }

use crate::{
BuiltinFunctionIndex, DefinedGlobalIndex, DefinedMemoryIndex, DefinedTableIndex, FuncIndex,
GlobalIndex, MemoryIndex, Module, TableIndex, TypeIndex,
DefinedGlobalIndex, DefinedMemoryIndex, DefinedTableIndex, FuncIndex, GlobalIndex, MemoryIndex,
Module, TableIndex, TypeIndex,
};
use more_asserts::assert_lt;
use std::convert::TryFrom;
Expand Down Expand Up @@ -287,11 +287,7 @@ impl<P: PtrSize> From<VMOffsetsFields<P>> for VMOffsets<P> {
.unwrap();
ret.size = ret
.builtin_functions
.checked_add(
BuiltinFunctionIndex::builtin_functions_total_number()
.checked_mul(u32::from(ret.pointer_size()))
.unwrap(),
)
.checked_add(u32::from(ret.pointer_size()))
.unwrap();

return ret;
Expand Down Expand Up @@ -597,7 +593,7 @@ impl<P: PtrSize> VMOffsets<P> {

/// The offset of the builtin functions array.
#[inline]
pub fn vmctx_builtin_functions_begin(&self) -> u32 {
pub fn vmctx_builtin_functions(&self) -> u32 {
self.builtin_functions
}

Expand Down Expand Up @@ -739,12 +735,6 @@ impl<P: PtrSize> VMOffsets<P> {
pub fn vmctx_vmglobal_import_from(&self, index: GlobalIndex) -> u32 {
self.vmctx_vmglobal_import(index) + u32::from(self.vmglobal_import_from())
}

/// Return the offset to builtin function in `VMBuiltinFunctionsArray` index `index`.
#[inline]
pub fn vmctx_builtin_function(&self, index: BuiltinFunctionIndex) -> u32 {
self.vmctx_builtin_functions_begin() + index.index() * u32::from(self.pointer_size())
}
}

/// Offsets for `VMExternData`.
Expand Down
6 changes: 2 additions & 4 deletions crates/runtime/src/instance/allocator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -481,10 +481,8 @@ unsafe fn initialize_vmcontext(instance: &mut Instance, req: InstanceAllocationR
}

// Initialize the built-in functions
ptr::write(
instance.vmctx_plus_offset(instance.offsets.vmctx_builtin_functions_begin()),
VMBuiltinFunctionsArray::initialized(),
);
*instance.vmctx_plus_offset(instance.offsets.vmctx_builtin_functions()) =
&VMBuiltinFunctionsArray::INIT;

// Initialize the imports
debug_assert_eq!(req.imports.functions.len(), module.num_imported_funcs);
Expand Down
69 changes: 38 additions & 31 deletions crates/runtime/src/libcalls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,14 +185,14 @@ pub extern "C" fn wasmtime_f64_nearest(x: f64) -> f64 {
}

/// Implementation of memory.grow for locally-defined 32-bit memories.
pub unsafe extern "C" fn wasmtime_memory32_grow(
pub unsafe extern "C" fn memory32_grow(
vmctx: *mut VMContext,
delta: u64,
memory_index: u32,
) -> usize {
) -> *mut u8 {
// Memory grow can invoke user code provided in a ResourceLimiter{,Async},
// so we need to catch a possible panic
match std::panic::catch_unwind(|| {
let ret = match std::panic::catch_unwind(|| {
let instance = (*vmctx).instance_mut();
let memory_index = MemoryIndex::from_u32(memory_index);
instance.memory_grow(memory_index, delta)
Expand All @@ -201,11 +201,12 @@ pub unsafe extern "C" fn wasmtime_memory32_grow(
Ok(Ok(None)) => usize::max_value(),
Ok(Err(err)) => crate::traphandlers::raise_user_trap(err),
Err(p) => resume_panic(p),
}
};
ret as *mut u8
}

/// Implementation of `table.grow`.
pub unsafe extern "C" fn wasmtime_table_grow(
pub unsafe extern "C" fn table_grow(
vmctx: *mut VMContext,
table_index: u32,
delta: u32,
Expand Down Expand Up @@ -238,8 +239,11 @@ pub unsafe extern "C" fn wasmtime_table_grow(
}
}

pub use table_grow as table_grow_funcref;
pub use table_grow as table_grow_externref;

/// Implementation of `table.fill`.
pub unsafe extern "C" fn wasmtime_table_fill(
pub unsafe extern "C" fn table_fill(
vmctx: *mut VMContext,
table_index: u32,
dst: u32,
Expand Down Expand Up @@ -272,8 +276,11 @@ pub unsafe extern "C" fn wasmtime_table_fill(
}
}

pub use table_fill as table_fill_funcref;
pub use table_fill as table_fill_externref;

/// Implementation of `table.copy`.
pub unsafe extern "C" fn wasmtime_table_copy(
pub unsafe extern "C" fn table_copy(
vmctx: *mut VMContext,
dst_table_index: u32,
src_table_index: u32,
Expand All @@ -295,7 +302,7 @@ pub unsafe extern "C" fn wasmtime_table_copy(
}

/// Implementation of `table.init`.
pub unsafe extern "C" fn wasmtime_table_init(
pub unsafe extern "C" fn table_init(
vmctx: *mut VMContext,
table_index: u32,
elem_index: u32,
Expand All @@ -315,14 +322,14 @@ pub unsafe extern "C" fn wasmtime_table_init(
}

/// Implementation of `elem.drop`.
pub unsafe extern "C" fn wasmtime_elem_drop(vmctx: *mut VMContext, elem_index: u32) {
pub unsafe extern "C" fn elem_drop(vmctx: *mut VMContext, elem_index: u32) {
let elem_index = ElemIndex::from_u32(elem_index);
let instance = (*vmctx).instance_mut();
instance.elem_drop(elem_index);
}

/// Implementation of `memory.copy` for locally defined memories.
pub unsafe extern "C" fn wasmtime_memory_copy(
pub unsafe extern "C" fn memory_copy(
vmctx: *mut VMContext,
dst_index: u32,
dst: u64,
Expand All @@ -342,7 +349,7 @@ pub unsafe extern "C" fn wasmtime_memory_copy(
}

/// Implementation of `memory.fill` for locally defined memories.
pub unsafe extern "C" fn wasmtime_memory_fill(
pub unsafe extern "C" fn memory_fill(
vmctx: *mut VMContext,
memory_index: u32,
dst: u64,
Expand All @@ -360,7 +367,7 @@ pub unsafe extern "C" fn wasmtime_memory_fill(
}

/// Implementation of `memory.init`.
pub unsafe extern "C" fn wasmtime_memory_init(
pub unsafe extern "C" fn memory_init(
vmctx: *mut VMContext,
memory_index: u32,
data_index: u32,
Expand All @@ -380,22 +387,22 @@ pub unsafe extern "C" fn wasmtime_memory_init(
}

/// Implementation of `data.drop`.
pub unsafe extern "C" fn wasmtime_data_drop(vmctx: *mut VMContext, data_index: u32) {
pub unsafe extern "C" fn data_drop(vmctx: *mut VMContext, data_index: u32) {
let data_index = DataIndex::from_u32(data_index);
let instance = (*vmctx).instance_mut();
instance.data_drop(data_index)
}

/// Drop a `VMExternRef`.
pub unsafe extern "C" fn wasmtime_drop_externref(externref: *mut u8) {
pub unsafe extern "C" fn drop_externref(externref: *mut u8) {
let externref = externref as *mut crate::externref::VMExternData;
let externref = NonNull::new(externref).unwrap();
crate::externref::VMExternData::drop_and_dealloc(externref);
}

/// Do a GC and insert the given `externref` into the
/// `VMExternRefActivationsTable`.
pub unsafe extern "C" fn wasmtime_activations_table_insert_with_gc(
pub unsafe extern "C" fn activations_table_insert_with_gc(
vmctx: *mut VMContext,
externref: *mut u8,
) {
Expand All @@ -416,10 +423,7 @@ pub unsafe extern "C" fn wasmtime_activations_table_insert_with_gc(
}

/// Perform a Wasm `global.get` for `externref` globals.
pub unsafe extern "C" fn wasmtime_externref_global_get(
vmctx: *mut VMContext,
index: u32,
) -> *mut u8 {
pub unsafe extern "C" fn externref_global_get(vmctx: *mut VMContext, index: u32) -> *mut u8 {
let index = GlobalIndex::from_u32(index);
let instance = (*vmctx).instance();
let global = instance.defined_or_imported_global_ptr(index);
Expand All @@ -436,7 +440,7 @@ pub unsafe extern "C" fn wasmtime_externref_global_get(
}

/// Perform a Wasm `global.set` for `externref` globals.
pub unsafe extern "C" fn wasmtime_externref_global_set(
pub unsafe extern "C" fn externref_global_set(
vmctx: *mut VMContext,
index: u32,
externref: *mut u8,
Expand All @@ -460,13 +464,14 @@ pub unsafe extern "C" fn wasmtime_externref_global_set(
}

/// Implementation of `memory.atomic.notify` for locally defined memories.
pub unsafe extern "C" fn wasmtime_memory_atomic_notify(
pub unsafe extern "C" fn memory_atomic_notify(
vmctx: *mut VMContext,
memory_index: u32,
addr: usize,
addr: *mut u8,
_count: u32,
) -> u32 {
let result = {
let addr = addr as usize;
let memory = MemoryIndex::from_u32(memory_index);
let instance = (*vmctx).instance();
// this should never overflow since addr + 4 either hits a guard page
Expand All @@ -475,7 +480,7 @@ pub unsafe extern "C" fn wasmtime_memory_atomic_notify(
let addr_to_check = addr.checked_add(4).unwrap();
validate_atomic_addr(instance, memory, addr_to_check).and_then(|()| {
Err(Trap::User(anyhow::anyhow!(
"unimplemented: wasm atomics (fn wasmtime_memory_atomic_notify) unsupported",
"unimplemented: wasm atomics (fn memory_atomic_notify) unsupported",
)))
})
};
Expand All @@ -486,22 +491,23 @@ pub unsafe extern "C" fn wasmtime_memory_atomic_notify(
}

/// Implementation of `memory.atomic.wait32` for locally defined memories.
pub unsafe extern "C" fn wasmtime_memory_atomic_wait32(
pub unsafe extern "C" fn memory_atomic_wait32(
vmctx: *mut VMContext,
memory_index: u32,
addr: usize,
addr: *mut u8,
_expected: u32,
_timeout: u64,
) -> u32 {
let result = {
let addr = addr as usize;
let memory = MemoryIndex::from_u32(memory_index);
let instance = (*vmctx).instance();
// see wasmtime_memory_atomic_notify for why this shouldn't overflow
// but we still double-check
let addr_to_check = addr.checked_add(4).unwrap();
validate_atomic_addr(instance, memory, addr_to_check).and_then(|()| {
Err(Trap::User(anyhow::anyhow!(
"unimplemented: wasm atomics (fn wasmtime_memory_atomic_wait32) unsupported",
"unimplemented: wasm atomics (fn memory_atomic_wait32) unsupported",
)))
})
};
Expand All @@ -512,22 +518,23 @@ pub unsafe extern "C" fn wasmtime_memory_atomic_wait32(
}

/// Implementation of `memory.atomic.wait64` for locally defined memories.
pub unsafe extern "C" fn wasmtime_memory_atomic_wait64(
pub unsafe extern "C" fn memory_atomic_wait64(
vmctx: *mut VMContext,
memory_index: u32,
addr: usize,
addr: *mut u8,
_expected: u64,
_timeout: u64,
) -> u32 {
let result = {
let addr = addr as usize;
let memory = MemoryIndex::from_u32(memory_index);
let instance = (*vmctx).instance();
// see wasmtime_memory_atomic_notify for why this shouldn't overflow
// but we still double-check
let addr_to_check = addr.checked_add(8).unwrap();
validate_atomic_addr(instance, memory, addr_to_check).and_then(|()| {
Err(Trap::User(anyhow::anyhow!(
"unimplemented: wasm atomics (fn wasmtime_memory_atomic_wait64) unsupported",
"unimplemented: wasm atomics (fn memory_atomic_wait64) unsupported",
)))
})
};
Expand Down Expand Up @@ -561,15 +568,15 @@ unsafe fn validate_atomic_addr(
}

/// Hook for when an instance runs out of fuel.
pub unsafe extern "C" fn wasmtime_out_of_gas(vmctx: *mut VMContext) {
pub unsafe extern "C" fn out_of_gas(vmctx: *mut VMContext) {
match (*(*vmctx).instance().store()).out_of_gas() {
Ok(()) => {}
Err(err) => crate::traphandlers::raise_user_trap(err),
}
}

/// Hook for when an instance observes that the epoch has changed.
pub unsafe extern "C" fn wasmtime_new_epoch(vmctx: *mut VMContext) -> u64 {
pub unsafe extern "C" fn new_epoch(vmctx: *mut VMContext) -> u64 {
match (*(*vmctx).instance().store()).new_epoch() {
Ok(new_deadline) => new_deadline,
Err(err) => crate::traphandlers::raise_user_trap(err),
Expand Down
Loading

0 comments on commit a25f7bd

Please sign in to comment.