diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 02185746bcd6..38cb9584f061 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -122,6 +122,7 @@ jobs: - run: cargo check --manifest-path crates/wasmtime/Cargo.toml --features jitdump - run: cargo check --manifest-path crates/wasmtime/Cargo.toml --features cache - run: cargo check --manifest-path crates/wasmtime/Cargo.toml --features async + - run: cargo check --manifest-path crates/wasmtime/Cargo.toml --features uffd # Check some feature combinations of the `wasmtime-c-api` crate - run: cargo check --manifest-path crates/c-api/Cargo.toml --no-default-features diff --git a/Cargo.toml b/Cargo.toml index 00cd71f52628..b8d297993348 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -86,6 +86,7 @@ jitdump = ["wasmtime/jitdump"] vtune = ["wasmtime/vtune"] wasi-crypto = ["wasmtime-wasi-crypto"] wasi-nn = ["wasmtime-wasi-nn"] +uffd = ["wasmtime/uffd"] # Try the experimental, work-in-progress new x86_64 backend. This is not stable # as of June 2020. diff --git a/crates/runtime/Cargo.toml b/crates/runtime/Cargo.toml index e4c282fef122..a074bbca8061 100644 --- a/crates/runtime/Cargo.toml +++ b/crates/runtime/Cargo.toml @@ -37,3 +37,9 @@ cc = "1.0" [badges] maintenance = { status = "actively-developed" } + +[features] +default = [] + +# Enables support for userfaultfd in the pooling allocator when building on Linux +uffd = ["userfaultfd"] diff --git a/crates/runtime/src/instance/allocator/pooling.rs b/crates/runtime/src/instance/allocator/pooling.rs index 349fb3681dbe..154bf9cb218a 100644 --- a/crates/runtime/src/instance/allocator/pooling.rs +++ b/crates/runtime/src/instance/allocator/pooling.rs @@ -31,6 +31,11 @@ cfg_if::cfg_if! { if #[cfg(windows)] { mod windows; use windows as imp; + } else if #[cfg(all(feature = "uffd", target_os = "linux"))] { + mod uffd; + use uffd as imp; + use imp::{PageFaultHandler, reset_guard_page}; + use std::sync::atomic::{AtomicBool, Ordering}; } else if #[cfg(target_os = "linux")] { mod linux; use linux as imp; @@ -335,6 +340,9 @@ impl Iterator for BasePointerIterator { /// structure depending on the limits used to create the pool. /// /// The pool maintains a free list for fast instance allocation. +/// +/// The userfault handler relies on how instances are stored in the mapping, +/// so make sure the uffd implementation is kept up-to-date. #[derive(Debug)] struct InstancePool { mapping: Mmap, @@ -472,6 +480,10 @@ impl Drop for InstancePool { /// /// Each index into the pool returns an iterator over the base addresses /// of the instance's linear memories. +/// +/// +/// The userfault handler relies on how memories are stored in the mapping, +/// so make sure the uffd implementation is kept up-to-date. #[derive(Debug)] struct MemoryPool { mapping: Mmap, @@ -524,6 +536,9 @@ impl MemoryPool { /// /// Each index into the pool returns an iterator over the base addresses /// of the instance's tables. +/// +/// The userfault handler relies on how tables are stored in the mapping, +/// so make sure the uffd implementation is kept up-to-date. #[derive(Debug)] struct TablePool { mapping: Mmap, @@ -588,6 +603,9 @@ impl TablePool { /// /// The top of the stack (starting stack pointer) is returned when a stack is allocated /// from the pool. +/// +/// The userfault handler relies on how stacks are stored in the mapping, +/// so make sure the uffd implementation is kept up-to-date. #[derive(Debug)] struct StackPool { mapping: Mmap, @@ -595,6 +613,8 @@ struct StackPool { max_instances: usize, page_size: usize, free_list: Mutex>, + #[cfg(all(feature = "uffd", target_os = "linux"))] + faulted_guard_pages: Arc<[AtomicBool]>, } impl StackPool { @@ -623,6 +643,11 @@ impl StackPool { max_instances, page_size, free_list: Mutex::new((0..max_instances).collect()), + #[cfg(all(feature = "uffd", target_os = "linux"))] + faulted_guard_pages: std::iter::repeat_with(|| false.into()) + .take(max_instances) + .collect::>() + .into(), }) } @@ -647,11 +672,25 @@ impl StackPool { .as_mut_ptr() .add((index * self.stack_size) + self.page_size); - // Make the stack accessible (excluding the guard page) - if !make_accessible(bottom_of_stack, size_without_guard) { - return Err(FiberStackError::Resource( - "failed to make instance memory accessible".into(), - )); + cfg_if::cfg_if! { + if #[cfg(all(feature = "uffd", target_os = "linux"))] { + // Check to see if a guard page needs to be reset + if self.faulted_guard_pages[index].swap(false, Ordering::SeqCst) { + if !reset_guard_page(bottom_of_stack.sub(self.page_size), self.page_size) { + return Err(FiberStackError::Resource( + "failed to reset stack guard page".into(), + )); + } + } + + } else { + // Make the stack accessible (excluding the guard page) + if !make_accessible(bottom_of_stack, size_without_guard) { + return Err(FiberStackError::Resource( + "failed to make instance memory accessible".into(), + )); + } + } } // The top of the stack should be returned @@ -697,6 +736,8 @@ pub struct PoolingInstanceAllocator { memories: mem::ManuallyDrop, tables: mem::ManuallyDrop, stacks: mem::ManuallyDrop, + #[cfg(all(feature = "uffd", target_os = "linux"))] + _fault_handler: PageFaultHandler, } impl PoolingInstanceAllocator { @@ -744,6 +785,9 @@ impl PoolingInstanceAllocator { let tables = TablePool::new(&module_limits, &instance_limits)?; let stacks = StackPool::new(&instance_limits, stack_size)?; + #[cfg(all(feature = "uffd", target_os = "linux"))] + let _fault_handler = PageFaultHandler::new(&instances, &memories, &tables, &stacks)?; + Ok(Self { strategy, module_limits, @@ -752,6 +796,8 @@ impl PoolingInstanceAllocator { memories: mem::ManuallyDrop::new(memories), tables: mem::ManuallyDrop::new(tables), stacks: mem::ManuallyDrop::new(stacks), + #[cfg(all(feature = "uffd", target_os = "linux"))] + _fault_handler, }) } @@ -800,14 +846,28 @@ impl PoolingInstanceAllocator { ) -> Result<(), InstantiationError> { let module = instance.module.as_ref(); + // Reset all guard pages before clearing the previous memories + #[cfg(all(feature = "uffd", target_os = "linux"))] + for (_, m) in instance.memories.iter() { + m.reset_guard_pages() + .map_err(InstantiationError::Resource)?; + } + instance.memories.clear(); for plan in (&module.memory_plans.values().as_slice()[module.num_imported_memories..]).iter() { instance.memories.push( - Memory::new_static(plan, memories.next().unwrap(), max_pages, make_accessible) - .map_err(InstantiationError::Resource)?, + Memory::new_static( + plan, + memories.next().unwrap(), + max_pages, + make_accessible, + #[cfg(all(feature = "uffd", target_os = "linux"))] + reset_guard_page, + ) + .map_err(InstantiationError::Resource)?, ); } @@ -826,7 +886,6 @@ impl PoolingInstanceAllocator { let module = instance.module.as_ref(); instance.tables.clear(); - for plan in (&module.table_plans.values().as_slice()[module.num_imported_tables..]).iter() { let base = tables.next().unwrap(); @@ -852,7 +911,8 @@ impl PoolingInstanceAllocator { impl Drop for PoolingInstanceAllocator { fn drop(&mut self) { - // There are manually dropped for the future uffd implementation + // Manually drop the pools before the fault handler (if uffd is enabled) + // This ensures that any fault handler thread monitoring the pool memory terminates unsafe { mem::ManuallyDrop::drop(&mut self.instances); mem::ManuallyDrop::drop(&mut self.memories); diff --git a/crates/runtime/src/instance/allocator/pooling/uffd.rs b/crates/runtime/src/instance/allocator/pooling/uffd.rs new file mode 100644 index 000000000000..dda577b5c14b --- /dev/null +++ b/crates/runtime/src/instance/allocator/pooling/uffd.rs @@ -0,0 +1,570 @@ +//! Implements user-mode page fault handling with the `userfaultfd` ("uffd") system call on Linux. +//! +//! Handling page faults for memory accesses in regions relating to WebAssembly instances +//! enables the implementation of guard pages in user space rather than kernel space. +//! +//! This reduces the number of system calls and kernel locks needed to provide correct +//! WebAssembly memory semantics. +//! +//! Additionally, linear memories and WebAssembly tables can be lazy-initialized upon access. +//! +//! This feature requires a Linux kernel 4.11 or newer to use. + +use super::{InstancePool, MemoryPool, StackPool, TablePool}; +use crate::{instance::Instance, table::max_table_element_size, Memory, Mmap}; +use std::convert::TryInto; +use std::ptr; +use std::sync::{ + atomic::{AtomicBool, Ordering}, + Arc, +}; +use std::thread; +use userfaultfd::{Event, FeatureFlags, IoctlFlags, Uffd, UffdBuilder}; +use wasmtime_environ::{ + wasm::{DefinedMemoryIndex, DefinedTableIndex}, + WASM_PAGE_SIZE, +}; + +pub unsafe fn make_accessible(_addr: *mut u8, _len: usize) -> bool { + // A no-op when userfaultfd is used + true +} + +pub unsafe fn reset_guard_page(addr: *mut u8, len: usize) -> bool { + // Guard pages are READ_WRITE with uffd until faulted + region::protect(addr, len, region::Protection::READ_WRITE).is_ok() +} + +pub unsafe fn decommit(addr: *mut u8, len: usize) { + // Use MADV_DONTNEED to mark the pages as missing + // This will cause a missing page fault for next access on any page in the given range + assert_eq!( + libc::madvise(addr as _, len, libc::MADV_DONTNEED), + 0, + "madvise failed to mark pages as missing: {}", + std::io::Error::last_os_error() + ); +} + +pub fn create_memory_map(_accessible_size: usize, mapping_size: usize) -> Result { + // Allocate a single read-write region at once + // As writable pages need to count towards commit charge, use MAP_NORESERVE to override. + // This implies that the kernel is configured to allow overcommit or else + // this allocation will almost certainly fail without a plethora of physical memory to back the alloction. + // The consequence of not reserving is that our process may segfault on any write to a memory + // page that cannot be backed (i.e. out of memory conditions). + + if mapping_size == 0 { + return Ok(Mmap::new()); + } + + unsafe { + let ptr = libc::mmap( + ptr::null_mut(), + mapping_size, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_PRIVATE | libc::MAP_ANON | libc::MAP_NORESERVE, + -1, + 0, + ); + + if ptr as isize == -1_isize { + return Err(format!( + "failed to allocate pool memory: {}", + std::io::Error::last_os_error() + )); + } + + Ok(Mmap::from_raw(ptr as usize, mapping_size)) + } +} + +/// Represents a location of a page fault within monitored regions of memory. +enum AddressLocation<'a> { + /// The address location is in a WebAssembly table page. + TablePage { + /// The address of the page being accessed. + page_addr: *mut u8, + /// The length of the page being accessed. + len: usize, + /// The starting element initialization index. + init_start: usize, + /// The exclusive ending element initialization index. + init_end: usize, + }, + /// The address location is in a WebAssembly linear memory page. + MemoryPage { + mem: &'a Memory, + /// The address of the page being accessed. + page_addr: *mut u8, + /// The length of the page being accessed. + len: usize, + /// The starting data initialization offset. + init_start: usize, + /// The exclusive ending data initialization offset. + init_end: usize, + /// Whether or not the access was inbounds (i.e. not a guard page). + inbounds: bool, + }, + /// The address location is in an execution stack. + StackPage { + /// The address of the page being accessed. + page_addr: *mut u8, + /// The length of the page being accessed. + len: usize, + /// The index of the stack that was accessed. + index: usize, + /// Whether or not the access was inbounds (i.e. not a guard page). + inbounds: bool, + }, +} + +/// Used to resolve fault addresses to address locations. +/// +/// This implementation relies heavily on how the various resource pools utilize their memory. +/// +/// `usize` is used here instead of pointers to keep this `Send` as it gets sent to the handler thread. +struct AddressLocator { + instances_start: usize, + instances_end: usize, + instance_size: usize, + max_instances: usize, + memories_start: usize, + memories_end: usize, + memory_size: usize, + max_memories: usize, + tables_start: usize, + tables_end: usize, + table_size: usize, + max_tables: usize, + stacks_start: usize, + stacks_end: usize, + stack_size: usize, + page_size: usize, +} + +impl AddressLocator { + fn new( + instances: &InstancePool, + memories: &MemoryPool, + tables: &TablePool, + stacks: &StackPool, + ) -> Self { + let instances_start = instances.mapping.as_ptr() as usize; + let instances_end = instances_start + instances.mapping.len(); + let memories_start = memories.mapping.as_ptr() as usize; + let memories_end = memories_start + memories.mapping.len(); + let tables_start = tables.mapping.as_ptr() as usize; + let tables_end = tables_start + tables.mapping.len(); + let stacks_start = stacks.mapping.as_ptr() as usize; + let stacks_end = stacks_start + stacks.mapping.len(); + let stack_size = stacks.stack_size; + + // Should always have instances + debug_assert!(instances_start != 0); + + Self { + instances_start, + instances_end, + instance_size: instances.instance_size, + max_instances: instances.max_instances, + memories_start, + memories_end, + memory_size: memories.memory_size, + max_memories: memories.max_memories, + tables_start, + tables_end, + table_size: tables.table_size, + max_tables: tables.max_tables, + stacks_start, + stacks_end, + stack_size, + page_size: tables.page_size, + } + } + + // This is super-duper unsafe as it is used from the handler thread + // to access instance data without any locking primitives. + /// + /// It is assumed that the thread that owns the instance being accessed is + /// currently suspended waiting on a fault to be handled. + /// + /// Of course a stray faulting memory access from a thread that does not own + /// the instance might introduce a race, but this implementation considers + /// such to be a serious bug. + /// + /// If the assumption holds true, accessing the instance data from the handler thread + /// should, in theory, be safe. + unsafe fn get_instance(&self, index: usize) -> &mut Instance { + debug_assert!(index < self.max_instances); + &mut *((self.instances_start + (index * self.instance_size)) as *mut Instance) + } + + unsafe fn get_location(&self, addr: usize) -> Option { + // Check for a memory location + if addr >= self.memories_start && addr < self.memories_end { + let index = (addr - self.memories_start) / self.memory_size; + let wasm_mem_start = self.memories_start + (index * self.memory_size); + let wasm_mem_offset = addr - wasm_mem_start; + let wasm_page = wasm_mem_offset / (WASM_PAGE_SIZE as usize); + let init_start = wasm_page * (WASM_PAGE_SIZE as usize); + + let mem = &self.get_instance(index / self.max_memories).memories + [DefinedMemoryIndex::from_u32((index % self.max_memories).try_into().unwrap())]; + + return Some(AddressLocation::MemoryPage { + mem, + page_addr: (wasm_mem_start + init_start) as _, + len: WASM_PAGE_SIZE as usize, + init_start, + init_end: init_start + (WASM_PAGE_SIZE as usize), + inbounds: wasm_page < mem.size() as usize, + }); + } + + // Check for a table location + if addr >= self.tables_start && addr < self.tables_end { + let element_size = max_table_element_size(); + let elements_per_page = self.page_size / element_size; + + let index = (addr - self.tables_start) / self.table_size; + let table_start = self.tables_start + (index * self.table_size); + let table_offset = addr - table_start; + let page_offset = (table_offset / self.page_size) * self.page_size; + let init_start = page_offset * element_size; + + let instance = self.get_instance(index / self.max_tables); + let table = &instance.tables + [DefinedTableIndex::from_u32((index % self.max_tables).try_into().unwrap())]; + + return Some(AddressLocation::TablePage { + page_addr: (table_start + page_offset) as _, + len: self.page_size, + init_start, + init_end: std::cmp::min( + table.maximum().unwrap_or(u32::MAX) as usize, + init_start + elements_per_page, + ), + }); + } + + // Check for a stack location + if addr >= self.stacks_start && addr < self.stacks_end { + let index = (addr - self.stacks_start) / self.stack_size; + let stack_start = self.stacks_start + (index * self.stack_size); + let stack_offset = addr - stack_start; + let page_offset = (stack_offset / self.page_size) * self.page_size; + + return Some(AddressLocation::StackPage { + page_addr: (stack_start + page_offset) as _, + len: self.page_size, + index, + inbounds: stack_offset >= self.page_size, + }); + } + + None + } +} + +fn wake_guard_page_access(uffd: &Uffd, page_addr: *const u8, len: usize) -> Result<(), String> { + unsafe { + // Set the page to NONE to induce a SIGSEV for the access on the next retry + region::protect(page_addr, len, region::Protection::NONE) + .map_err(|e| format!("failed to change guard page protection: {}", e))?; + + uffd.wake(page_addr as _, len).map_err(|e| { + format!( + "failed to wake page at {:p} with length {}: {}", + page_addr, len, e + ) + })?; + + Ok(()) + } +} + +fn handler_thread( + uffd: Uffd, + locator: AddressLocator, + mut registrations: usize, + faulted_stack_guard_pages: Arc<[AtomicBool]>, +) -> Result<(), String> { + loop { + match uffd.read_event().expect("failed to read event") { + Some(Event::Unmap { start, end }) => { + log::trace!("memory region unmapped: {:p}-{:p}", start, end); + + let (start, end) = (start as usize, end as usize); + + if (start == locator.instances_start && end == locator.instances_end) + || (start == locator.memories_start && end == locator.memories_end) + || (start == locator.tables_start && end == locator.tables_end) + || (start == locator.stacks_start && end == locator.stacks_end) + { + registrations -= 1; + if registrations == 0 { + break; + } + } else { + panic!("unexpected memory region unmapped"); + } + } + Some(Event::Pagefault { + addr: access_addr, .. + }) => { + unsafe { + match locator.get_location(access_addr as usize) { + Some(AddressLocation::TablePage { + page_addr, + len, + init_start, + init_end, + }) => { + log::trace!( + "handling fault in table at address {:p} on page {:p}; initializing elements [{}, {})", + access_addr, + page_addr, + init_start, + init_end + ); + + // TODO: copy the table initialization elements rather than zero the page + // TODO: are retries necessary? + uffd.zeropage(page_addr as _, len, true).map_err(|e| { + format!( + "failed to zero page at {:p} with length {}: {}", + page_addr, len, e + ) + })?; + } + Some(AddressLocation::MemoryPage { + mem, + page_addr, + len, + init_start, + init_end, + inbounds, + }) => { + log::trace!("handling fault in linear memory at address {:p} on page {:p}; initializing data [{:p}, {:p})", + access_addr, page_addr, init_start as *const (), init_end as *const ()); + + if !inbounds { + // Logging as trace as memory accesses are not bounds checked + log::trace!("out of bounds memory access at {:p}", access_addr); + + // Record the page fault with the linear memory + // The next time the memory is grown or reused, the guard page protection + // will be reset. + mem.record_guard_page_fault(page_addr, len); + wake_guard_page_access(&uffd, page_addr, len)?; + continue; + } + + // TODO: copy the memory initialization data rather than zero the page + // TODO: are retries necessary? + uffd.zeropage(page_addr as _, len, true).map_err(|e| { + format!( + "failed to zero page at {:p} with length {}: {}", + page_addr, len, e + ) + })?; + } + Some(AddressLocation::StackPage { + page_addr, + len, + index, + inbounds, + }) => { + log::trace!( + "handling fault in stack {} at address {:p}", + index, + access_addr, + ); + + if !inbounds { + // Logging as trace as stack guard pages might be a trap condition in the future + log::trace!("stack overflow fault at {:p}", access_addr); + + // Mark the stack as having a faulted guard page + // The next time the stack is used the guard page will be reset + faulted_stack_guard_pages[index].store(true, Ordering::SeqCst); + wake_guard_page_access(&uffd, page_addr, len)?; + continue; + } + + // Always zero stack pages + // TODO: are retries necessary? + uffd.zeropage(page_addr as _, len, true).map_err(|e| { + format!( + "failed to zero page at {:p} with length {}: {}", + page_addr, len, e + ) + })?; + } + None => { + return Err(format!( + "failed to locate fault address {:p} in registered memory regions", + access_addr + )); + } + } + } + } + Some(_) => continue, + None => break, + } + } + + Ok(()) +} + +#[derive(Debug)] +pub struct PageFaultHandler { + thread: Option>>, +} + +impl PageFaultHandler { + pub(super) fn new( + instances: &InstancePool, + memories: &MemoryPool, + tables: &TablePool, + stacks: &StackPool, + ) -> Result { + let uffd = UffdBuilder::new() + .close_on_exec(true) + .require_features(FeatureFlags::EVENT_UNMAP) + .create() + .map_err(|e| format!("failed to create user fault descriptor: {}", e))?; + + // Register the ranges with the userfault fd + let mut registrations = 0; + for (start, len) in &[ + (memories.mapping.as_ptr() as usize, memories.mapping.len()), + (tables.mapping.as_ptr() as usize, tables.mapping.len()), + (stacks.mapping.as_ptr() as usize, stacks.mapping.len()), + ] { + if *start == 0 || *len == 0 { + continue; + } + + let ioctls = uffd + .register(*start as _, *len) + .map_err(|e| format!("failed to register user fault range: {}", e))?; + + if !ioctls.contains(IoctlFlags::WAKE | IoctlFlags::COPY | IoctlFlags::ZEROPAGE) { + return Err(format!( + "required user fault ioctls not supported; found: {:?}", + ioctls, + )); + } + + registrations += 1; + } + + let thread = if registrations == 0 { + log::trace!("user fault handling disabled as there are no regions to monitor"); + None + } else { + log::trace!( + "user fault handling enabled on {} memory regions", + registrations + ); + + unsafe { + libc::nice(-3); + } + + let locator = AddressLocator::new(&instances, &memories, &tables, &stacks); + + let faulted_stack_guard_pages = stacks.faulted_guard_pages.clone(); + + Some( + thread::Builder::new() + .name("page fault handler".into()) + .spawn(move || { + handler_thread(uffd, locator, registrations, faulted_stack_guard_pages) + }) + .map_err(|e| format!("failed to spawn page fault handler thread: {}", e))?, + ) + }; + + Ok(Self { thread }) + } +} + +impl Drop for PageFaultHandler { + fn drop(&mut self) { + if let Some(thread) = self.thread.take() { + thread + .join() + .expect("failed to join page fault handler thread") + .expect("fault handler thread failed"); + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::{InstanceLimits, ModuleLimits}; + + #[cfg(target_pointer_width = "64")] + #[test] + fn test_address_locator() { + let module_limits = ModuleLimits { + imported_functions: 0, + imported_tables: 0, + imported_memories: 0, + imported_globals: 0, + types: 0, + functions: 0, + tables: 3, + memories: 2, + globals: 0, + table_elements: 10, + memory_pages: 1, + }; + let instance_limits = InstanceLimits { + count: 3, + address_space_size: (WASM_PAGE_SIZE * 2) as u64, + }; + + let instances = + InstancePool::new(&module_limits, &instance_limits).expect("should allocate"); + let memories = MemoryPool::new(&module_limits, &instance_limits).expect("should allocate"); + let tables = TablePool::new(&module_limits, &instance_limits).expect("should allocate"); + let stacks = StackPool::new(&instance_limits, 8192).expect("should allocate"); + + let locator = AddressLocator::new(&instances, &memories, &tables, &stacks); + + assert_eq!(locator.instances_start, instances.mapping.as_ptr() as usize); + assert_eq!( + locator.instances_end, + locator.instances_start + instances.mapping.len() + ); + assert_eq!(locator.instance_size, 4096); + assert_eq!(locator.max_instances, 3); + assert_eq!(locator.memories_start, memories.mapping.as_ptr() as usize); + assert_eq!( + locator.memories_end, + locator.memories_start + memories.mapping.len() + ); + assert_eq!(locator.memory_size, (WASM_PAGE_SIZE * 2) as usize); + assert_eq!(locator.max_memories, 2); + assert_eq!(locator.tables_start, tables.mapping.as_ptr() as usize); + assert_eq!( + locator.tables_end, + locator.tables_start + tables.mapping.len() + ); + assert_eq!(locator.table_size, 4096); + assert_eq!(locator.max_tables, 3); + + assert_eq!(locator.stacks_start, stacks.mapping.as_ptr() as usize); + assert_eq!( + locator.stacks_end, + locator.stacks_start + stacks.mapping.len() + ); + assert_eq!(locator.stack_size, 12288); + } +} diff --git a/crates/runtime/src/memory.rs b/crates/runtime/src/memory.rs index 7d248b136189..baed7f775270 100644 --- a/crates/runtime/src/memory.rs +++ b/crates/runtime/src/memory.rs @@ -178,6 +178,10 @@ enum MemoryStorage { size: Cell, maximum: u32, make_accessible: unsafe fn(*mut u8, usize) -> bool, + #[cfg(all(feature = "uffd", target_os = "linux"))] + guard_page_faults: RefCell>, + #[cfg(all(feature = "uffd", target_os = "linux"))] + reset_guard_page: unsafe fn(*mut u8, usize) -> bool, }, Dynamic(Box), } @@ -204,6 +208,11 @@ impl Memory { base: *mut u8, maximum: u32, make_accessible: unsafe fn(*mut u8, usize) -> bool, + #[cfg(all(feature = "uffd", target_os = "linux"))] reset_guard_page: unsafe fn( + *mut u8, + usize, + ) + -> bool, ) -> Result { if plan.memory.minimum > 0 { if unsafe { @@ -219,6 +228,10 @@ impl Memory { size: Cell::new(plan.memory.minimum), maximum: min(plan.memory.maximum.unwrap_or(maximum), maximum), make_accessible, + #[cfg(all(feature = "uffd", target_os = "linux"))] + guard_page_faults: RefCell::new(Vec::new()), + #[cfg(all(feature = "uffd", target_os = "linux"))] + reset_guard_page, }, }) } @@ -262,6 +275,10 @@ impl Memory { return None; } + // With uffd enabled, faulted guard pages need to be reset prior to growing memory + #[cfg(all(feature = "uffd", target_os = "linux"))] + self.reset_guard_pages().ok()?; + size.set(new_size); Some(old_size) @@ -280,4 +297,44 @@ impl Memory { MemoryStorage::Dynamic(mem) => mem.vmmemory(), } } + + /// Records a faulted guard page. + /// + /// This is used to track faulted guard pages that need to be reset before growing memory. + #[cfg(all(feature = "uffd", target_os = "linux"))] + pub fn record_guard_page_fault(&self, page_addr: *mut u8, size: usize) { + if let MemoryStorage::Static { + guard_page_faults, .. + } = &self.storage + { + let mut faults = guard_page_faults.borrow_mut(); + faults.push((page_addr, size)); + } + } + + /// Resets previously faulted guard pages. + /// + /// This is used to reset the protection of any guard pages that were previously faulted. + /// + /// Resetting the guard pages is required before growing memory. + #[cfg(all(feature = "uffd", target_os = "linux"))] + pub fn reset_guard_pages(&self) -> Result<(), String> { + if let MemoryStorage::Static { + guard_page_faults, + reset_guard_page, + .. + } = &self.storage + { + let mut faults = guard_page_faults.borrow_mut(); + for (addr, len) in faults.drain(..) { + unsafe { + if !reset_guard_page(addr, len) { + return Err("failed to reset previously faulted memory guard page".into()); + } + } + } + } + + Ok(()) + } } diff --git a/crates/wasmtime/Cargo.toml b/crates/wasmtime/Cargo.toml index d063f0de04ae..3097f6b9a049 100644 --- a/crates/wasmtime/Cargo.toml +++ b/crates/wasmtime/Cargo.toml @@ -72,3 +72,6 @@ experimental_x64 = ["wasmtime-jit/experimental_x64"] # Enables support for "async stores" as well as defining host functions as # `async fn` and calling functions asynchronously. async = ["wasmtime-fiber"] + +# Enables userfaultfd support in the runtime's pooling allocator when building on Linux +uffd = ["wasmtime-runtime/uffd"]