From 66b930ca694d0cd92b7b818dfc4c1883e2a6a233 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Thu, 15 Dec 2022 13:17:50 -0800 Subject: [PATCH] wiggle: copy guest slices back to shared memory This change upgrades `UnsafeGuestSlice` in Wiggle to expose more functionality to be able to use `std::ptr::copy` for writing bytes into Wasm shared memory. Additionally, it adds a new `GuestCow` type for delineating between Wasm memory regions that can be borrowed (non-shared memory) or must be copied (shared memory) in order to maintain Rust guarantees. With these in place, it is now possible to implement the `preview1` "read" functions for shared memory. Previously, these would panic if attempting to copy to a shared memory. This change removes the panic and introduces some (rather complex) logic for handling both the shared and non-shared cases: - if reading into a Wasm non-shared memory, Wiggle guarantees that no other guest pointers will touch the memory region and, in the absence of concurrency, `preview1` can write directly to this memory - if reading into a Wasm shared memory, the memory region can be concurrently modified. At @alexcrichton's request re: Rust safety, this change copies all of the bytes into an intermediate buffer before using `std::ptr::copy` to move them into Wasm memory. --- crates/wasi-common/src/snapshots/preview_1.rs | 291 +++++++++++++----- crates/wiggle/src/lib.rs | 147 ++++++--- 2 files changed, 321 insertions(+), 117 deletions(-) diff --git a/crates/wasi-common/src/snapshots/preview_1.rs b/crates/wasi-common/src/snapshots/preview_1.rs index 37abf1f6f86c..578a488e37b6 100644 --- a/crates/wasi-common/src/snapshots/preview_1.rs +++ b/crates/wasi-common/src/snapshots/preview_1.rs @@ -13,12 +13,16 @@ use crate::{ use cap_std::time::{Duration, SystemClock}; use std::convert::{TryFrom, TryInto}; use std::io::{IoSlice, IoSliceMut}; -use std::ops::{Deref, DerefMut}; +use std::ops::Deref; use wiggle::GuestPtr; pub mod error; use error::{Error, ErrorExt}; +// Limit the size of intermediate buffers when copying to WebAssembly shared +// memory. +const MAX_SHARED_BUFFER_SIZE: usize = 1 << 16; + wiggle::from_witx!({ witx: ["$WASI_ROOT/phases/snapshot/witx/wasi_snapshot_preview1.witx"], errors: { errno => trappable Error }, @@ -295,23 +299,70 @@ impl wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { .get_file_mut(u32::from(fd))? .get_cap_mut(FileCaps::READ)?; - let mut guest_slices: Vec> = - iovs.iter() - .map(|iov_ptr| { - let iov_ptr = iov_ptr?; - let iov: types::Iovec = iov_ptr.read()?; - Ok(iov.buf.as_array(iov.buf_len).as_slice_mut()?.expect( - "cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)", - )) - }) + let iovs: Vec> = iovs + .iter() + .map(|iov_ptr| { + let iov_ptr = iov_ptr?; + let iov: types::Iovec = iov_ptr.read()?; + Ok(iov.buf.as_array(iov.buf_len).as_unsafe_slice_mut()?) + }) + .collect::>()?; + + // If the first iov structure is from shared memory we can safely assume + // all the rest will be. We then read into memory based on the memory's + // shared-ness: + // - if not shared, we copy directly into the Wasm memory + // - if shared, we use an intermediate buffer; this avoids Rust unsafety + // due to holding on to a `&mut [u8]` of Wasm memory when we cannot + // guarantee the `&mut` exclusivity--other threads could be modifying + // the data as this functions writes to it. Though likely there is no + // issue with OS writing to io structs in multi-threaded scenarios, + // since we do not know here if `&dyn WasiFile` does anything else + // (e.g., read), we cautiously incur some performance overhead by + // copying twice. + let is_shared_memory = iovs + .iter() + .next() + .and_then(|s| Some(s.is_shared_memory())) + .unwrap_or(false); + let bytes_read: u64 = if is_shared_memory { + // Read into an intermediate buffer. + let total_available_size = iovs.iter().fold(0, |a, s| a + s.len()); + let mut buffer = vec![0; total_available_size.min(MAX_SHARED_BUFFER_SIZE)]; + let mut ioslices = vec![IoSliceMut::new(&mut buffer)]; + let bytes_read = f.read_vectored(&mut ioslices).await?; + + // Copy the intermediate buffer into the Wasm shared memory--`iov` + // by `iov`. + let mut data_to_write = &buffer[0..]; + for iov in iovs.into_iter() { + let len = data_to_write.len().min(iov.len()); + iov.copy_from_slice(&data_to_write[0..len])?; + data_to_write = &data_to_write[len..]; + if data_to_write.is_empty() { + break; + } + } + + bytes_read + } else { + // Convert all of the unsafe guest slices to safe ones--this uses + // Wiggle's internal borrow checker to ensure no overlaps. We assume + // here that, because the memory is not shared, there are no other + // threads to access it while it is written to. + let mut guest_slices: Vec> = iovs + .into_iter() + .map(|iov| Ok(iov.as_slice_mut()?.unwrap())) .collect::>()?; - let mut ioslices: Vec = guest_slices - .iter_mut() - .map(|s| IoSliceMut::new(&mut *s)) - .collect(); + // Read directly into the Wasm memory. + let mut ioslices: Vec = guest_slices + .iter_mut() + .map(|s| IoSliceMut::new(&mut *s)) + .collect(); + f.read_vectored(&mut ioslices).await? + }; - let bytes_read = f.read_vectored(&mut ioslices).await?; Ok(types::Size::try_from(bytes_read)?) } @@ -326,23 +377,70 @@ impl wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { .get_file_mut(u32::from(fd))? .get_cap_mut(FileCaps::READ | FileCaps::SEEK)?; - let mut guest_slices: Vec> = - iovs.iter() - .map(|iov_ptr| { - let iov_ptr = iov_ptr?; - let iov: types::Iovec = iov_ptr.read()?; - Ok(iov.buf.as_array(iov.buf_len).as_slice_mut()?.expect( - "cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)", - )) - }) + let iovs: Vec> = iovs + .iter() + .map(|iov_ptr| { + let iov_ptr = iov_ptr?; + let iov: types::Iovec = iov_ptr.read()?; + Ok(iov.buf.as_array(iov.buf_len).as_unsafe_slice_mut()?) + }) + .collect::>()?; + + // If the first iov structure is from shared memory we can safely assume + // all the rest will be. We then read into memory based on the memory's + // shared-ness: + // - if not shared, we copy directly into the Wasm memory + // - if shared, we use an intermediate buffer; this avoids Rust unsafety + // due to holding on to a `&mut [u8]` of Wasm memory when we cannot + // guarantee the `&mut` exclusivity--other threads could be modifying + // the data as this functions writes to it. Though likely there is no + // issue with OS writing to io structs in multi-threaded scenarios, + // since we do not know here if `&dyn WasiFile` does anything else + // (e.g., read), we cautiously incur some performance overhead by + // copying twice. + let is_shared_memory = iovs + .iter() + .next() + .and_then(|s| Some(s.is_shared_memory())) + .unwrap_or(false); + let bytes_read: u64 = if is_shared_memory { + // Read into an intermediate buffer. + let total_available_size = iovs.iter().fold(0, |a, s| a + s.len()); + let mut buffer = vec![0; total_available_size.min(MAX_SHARED_BUFFER_SIZE)]; + let mut ioslices = vec![IoSliceMut::new(&mut buffer)]; + let bytes_read = f.read_vectored_at(&mut ioslices, offset).await?; + + // Copy the intermediate buffer into the Wasm shared memory--`iov` + // by `iov`. + let mut data_to_write = &buffer[0..]; + for iov in iovs.into_iter() { + let len = data_to_write.len().min(iov.len()); + iov.copy_from_slice(&data_to_write[0..len])?; + data_to_write = &data_to_write[len..]; + if data_to_write.is_empty() { + break; + } + } + + bytes_read + } else { + // Convert all of the unsafe guest slices to safe ones--this uses + // Wiggle's internal borrow checker to ensure no overlaps. We assume + // here that, because the memory is not shared, there are no other + // threads to access it while it is written to. + let mut guest_slices: Vec> = iovs + .into_iter() + .map(|iov| Ok(iov.as_slice_mut()?.unwrap())) .collect::>()?; - let mut ioslices: Vec = guest_slices - .iter_mut() - .map(|s| IoSliceMut::new(&mut *s)) - .collect(); + // Read directly into the Wasm memory. + let mut ioslices: Vec = guest_slices + .iter_mut() + .map(|s| IoSliceMut::new(&mut *s)) + .collect(); + f.read_vectored_at(&mut ioslices, offset).await? + }; - let bytes_read = f.read_vectored_at(&mut ioslices, offset).await?; Ok(types::Size::try_from(bytes_read)?) } @@ -356,16 +454,12 @@ impl wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { .get_file_mut(u32::from(fd))? .get_cap_mut(FileCaps::WRITE)?; - let guest_slices: Vec> = ciovs + let guest_slices: Vec> = ciovs .iter() .map(|iov_ptr| { let iov_ptr = iov_ptr?; let iov: types::Ciovec = iov_ptr.read()?; - Ok(iov - .buf - .as_array(iov.buf_len) - .as_slice()? - .expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)")) + Ok(iov.buf.as_array(iov.buf_len).as_cow()?) }) .collect::>()?; @@ -389,16 +483,12 @@ impl wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { .get_file_mut(u32::from(fd))? .get_cap_mut(FileCaps::WRITE | FileCaps::SEEK)?; - let guest_slices: Vec> = ciovs + let guest_slices: Vec> = ciovs .iter() .map(|iov_ptr| { let iov_ptr = iov_ptr?; let iov: types::Ciovec = iov_ptr.read()?; - Ok(iov - .buf - .as_array(iov.buf_len) - .as_slice()? - .expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)")) + Ok(iov.buf.as_array(iov.buf_len).as_cow()?) }) .collect::>()?; @@ -440,11 +530,7 @@ impl wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { if path_len < path_max_len as usize { return Err(Error::name_too_long()); } - let mut p_memory = path - .as_array(path_len as u32) - .as_slice_mut()? - .expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)"); - p_memory.copy_from_slice(path_bytes); + path.as_array(path_len as u32).copy_from_slice(path_bytes)?; Ok(()) } else { Err(Error::not_supported()) @@ -737,11 +823,7 @@ impl wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { if link_len > buf_len as usize { return Err(Error::range()); } - let mut buf = buf - .as_array(link_len as u32) - .as_slice_mut()? - .expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)"); - buf.copy_from_slice(link_bytes); + buf.as_array(link_len as u32).copy_from_slice(link_bytes)?; Ok(link_len as types::Size) } @@ -1029,11 +1111,21 @@ impl wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { buf: &GuestPtr<'a, u8>, buf_len: types::Size, ) -> Result<(), Error> { - let mut buf = buf - .as_array(buf_len) - .as_slice_mut()? - .expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)"); - self.random.try_fill_bytes(buf.deref_mut())?; + let buf = buf.as_array(buf_len).as_unsafe_slice_mut()?; + if buf.is_shared_memory() { + // If the Wasm memory is shared, copy to an intermediate buffer to + // avoid Rust unsafety (i.e., the called function could rely on + // `&mut [u8]`'s exclusive ownership which is not guaranteed due to + // potential access from other threads). + let mut tmp = vec![0; buf.len().min(MAX_SHARED_BUFFER_SIZE)]; + self.random.try_fill_bytes(&mut tmp)?; + buf.copy_from_slice(&tmp)?; + } else { + // If the Wasm memory is non-shared, copy directly into the linear + // memory. + let mem = &mut buf.as_slice_mut()?.unwrap(); + self.random.try_fill_bytes(mem)?; + } Ok(()) } @@ -1069,25 +1161,72 @@ impl wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { .get_file_mut(u32::from(fd))? .get_cap_mut(FileCaps::READ)?; - let mut guest_slices: Vec> = - ri_data - .iter() - .map(|iov_ptr| { - let iov_ptr = iov_ptr?; - let iov: types::Iovec = iov_ptr.read()?; - Ok(iov.buf.as_array(iov.buf_len).as_slice_mut()?.expect( - "cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)", - )) - }) + let iovs: Vec> = ri_data + .iter() + .map(|iov_ptr| { + let iov_ptr = iov_ptr?; + let iov: types::Iovec = iov_ptr.read()?; + Ok(iov.buf.as_array(iov.buf_len).as_unsafe_slice_mut()?) + }) + .collect::>()?; + + // If the first iov structure is from shared memory we can safely assume + // all the rest will be. We then read into memory based on the memory's + // shared-ness: + // - if not shared, we copy directly into the Wasm memory + // - if shared, we use an intermediate buffer; this avoids Rust unsafety + // due to holding on to a `&mut [u8]` of Wasm memory when we cannot + // guarantee the `&mut` exclusivity--other threads could be modifying + // the data as this functions writes to it. Though likely there is no + // issue with OS writing to io structs in multi-threaded scenarios, + // since we do not know here if `&dyn WasiFile` does anything else + // (e.g., read), we cautiously incur some performance overhead by + // copying twice. + let is_shared_memory = iovs + .iter() + .next() + .and_then(|s| Some(s.is_shared_memory())) + .unwrap_or(false); + let (bytes_read, ro_flags) = if is_shared_memory { + // Read into an intermediate buffer. + let total_available_size = iovs.iter().fold(0, |a, s| a + s.len()); + let mut buffer = vec![0; total_available_size.min(MAX_SHARED_BUFFER_SIZE)]; + let mut ioslices = vec![IoSliceMut::new(&mut buffer)]; + let (bytes_read, ro_flags) = + f.sock_recv(&mut ioslices, RiFlags::from(ri_flags)).await?; + + // Copy the intermediate buffer into the Wasm shared memory--`iov` + // by `iov`. + let mut data_to_write = &buffer[0..]; + for iov in iovs.into_iter() { + let len = data_to_write.len().min(iov.len()); + iov.copy_from_slice(&data_to_write[0..len])?; + data_to_write = &data_to_write[len..]; + if data_to_write.is_empty() { + break; + } + } + + (bytes_read, ro_flags) + } else { + // Convert all of the unsafe guest slices to safe ones--this uses + // Wiggle's internal borrow checker to ensure no overlaps. We assume + // here that, because the memory is not shared, there are no other + // threads to access it while it is written to. + let mut guest_slices: Vec> = iovs + .into_iter() + .map(|iov| Ok(iov.as_slice_mut()?.unwrap())) .collect::>()?; - let mut ioslices: Vec = guest_slices - .iter_mut() - .map(|s| IoSliceMut::new(&mut *s)) - .collect(); + // Read directly into the Wasm memory. + let mut ioslices: Vec = guest_slices + .iter_mut() + .map(|s| IoSliceMut::new(&mut *s)) + .collect(); + f.sock_recv(&mut ioslices, RiFlags::from(ri_flags)).await? + }; - let (bytes_read, roflags) = f.sock_recv(&mut ioslices, RiFlags::from(ri_flags)).await?; - Ok((types::Size::try_from(bytes_read)?, roflags.into())) + Ok((types::Size::try_from(bytes_read)?, ro_flags.into())) } async fn sock_send<'a>( @@ -1101,16 +1240,12 @@ impl wasi_snapshot_preview1::WasiSnapshotPreview1 for WasiCtx { .get_file_mut(u32::from(fd))? .get_cap_mut(FileCaps::WRITE)?; - let guest_slices: Vec> = si_data + let guest_slices: Vec> = si_data .iter() .map(|iov_ptr| { let iov_ptr = iov_ptr?; let iov: types::Ciovec = iov_ptr.read()?; - Ok(iov - .buf - .as_array(iov.buf_len) - .as_slice()? - .expect("cannot use with shared memories; see https://github.com/bytecodealliance/wasmtime/issues/5235 (TODO)")) + Ok(iov.buf.as_array(iov.buf_len).as_cow()?) }) .collect::>()?; diff --git a/crates/wiggle/src/lib.rs b/crates/wiggle/src/lib.rs index 6e288397809a..43b48f28eb57 100644 --- a/crates/wiggle/src/lib.rs +++ b/crates/wiggle/src/lib.rs @@ -512,6 +512,24 @@ impl<'a, T> GuestPtr<'a, [T]> { (0..self.len()).map(move |i| base.add(i)) } + /// Attempts to create a [`GuestCow<'_, T>`] from this pointer, performing + /// bounds checks and type validation. Whereas [`GuestPtr::as_slice`] will + /// fail with `None` if attempting to access Wasm shared memory, this call + /// will succeed: if used on shared memory, this function will copy the + /// slice into [`GuestCow::Copied`]. If the memory is non-shared, this + /// returns a [`GuestCow::Borrowed`] (a thin wrapper over [`GuestSlice<'_, + /// T>]`). + pub fn as_cow(&self) -> Result, GuestError> + where + T: GuestTypeTransparent<'a> + Copy + 'a, + { + match self.as_unsafe_slice_mut()?.shared_borrow() { + UnsafeBorrowResult::Ok(slice) => Ok(GuestCow::Borrowed(slice)), + UnsafeBorrowResult::Shared(_) => Ok(GuestCow::Copied(self.to_vec()?)), + UnsafeBorrowResult::Err(e) => Err(e), + } + } + /// Attempts to create a [`GuestSlice<'_, T>`] from this pointer, performing /// bounds checks and type validation. The `GuestSlice` is a smart pointer /// that can be used as a `&[T]` via the `Deref` trait. The region of memory @@ -524,7 +542,8 @@ impl<'a, T> GuestPtr<'a, [T]> { /// any checks fail then `GuestError` will be returned. /// /// Additionally, because it is `unsafe` to have a `GuestSlice` of shared - /// memory, this function will return `None` in this case. + /// memory, this function will return `None` in this case (see + /// [`GuestPtr::as_cow`]). pub fn as_slice(&self) -> Result>, GuestError> where T: GuestTypeTransparent<'a>, @@ -553,11 +572,7 @@ impl<'a, T> GuestPtr<'a, [T]> { where T: GuestTypeTransparent<'a>, { - match self.as_unsafe_slice_mut()?.mut_borrow() { - UnsafeBorrowResult::Ok(slice) => Ok(Some(slice)), - UnsafeBorrowResult::Shared(_) => Ok(None), - UnsafeBorrowResult::Err(e) => Err(e), - } + self.as_unsafe_slice_mut()?.as_slice_mut() } /// Similar to `as_slice_mut`, this function will attempt to create a smart @@ -614,39 +629,7 @@ impl<'a, T> GuestPtr<'a, [T]> { where T: GuestTypeTransparent<'a> + Copy + 'a, { - // Retrieve the slice of memory to copy to, performing the necessary - // bounds checks ... - let guest_slice = self.as_unsafe_slice_mut()?; - // ... length check ... - if guest_slice.ptr.len() != slice.len() { - return Err(GuestError::SliceLengthsDiffer); - } - if slice.len() == 0 { - return Ok(()); - } - - // ... and copy the bytes. - match guest_slice.mut_borrow() { - UnsafeBorrowResult::Ok(mut dst) => dst.copy_from_slice(slice), - UnsafeBorrowResult::Shared(guest_slice) => { - // SAFETY: in the shared memory case, we copy and accept that - // the guest data may be concurrently modified. TODO: audit that - // this use of `std::ptr::copy` is safe with shared memory - // (https://github.com/bytecodealliance/wasmtime/issues/4203) - // - // Also note that the validity of `guest_slice` has already been - // determined by the `as_unsafe_slice_mut` call above. - unsafe { - std::ptr::copy( - slice.as_ptr(), - guest_slice.ptr[0].get(), - guest_slice.ptr.len(), - ) - }; - } - UnsafeBorrowResult::Err(e) => return Err(e), - } - Ok(()) + self.as_unsafe_slice_mut()?.copy_from_slice(slice) } /// Returns a `GuestPtr` pointing to the base of the array for the interior @@ -772,6 +755,30 @@ impl fmt::Debug for GuestPtr<'_, T> { } } +/// A smart pointer for distinguishing between different kinds of Wasm memory: +/// shared and non-shared. +/// +/// As with `GuestSlice`, this is usable as a `&'a [T]` via [`std::ops::Deref`]. +/// The major difference is that, for shared memories, the memory will be copied +/// out of Wasm linear memory to avoid the possibility of concurrent mutation by +/// another thread. This extra copy exists solely to maintain the Rust +/// guarantees regarding `&[T]`. +pub enum GuestCow<'a, T> { + Borrowed(GuestSlice<'a, T>), + Copied(Vec), +} + +impl<'a, T> std::ops::Deref for GuestCow<'a, T> { + type Target = [T]; + + fn deref(&self) -> &Self::Target { + match self { + GuestCow::Borrowed(s) => s, + GuestCow::Copied(s) => s, + } + } +} + /// A smart pointer to an shareable slice in guest memory. /// /// Usable as a `&'a [T]` via [`std::ops::Deref`]. @@ -868,7 +875,69 @@ pub struct UnsafeGuestSlice<'a, T> { mem: &'a dyn GuestMemory, } +unsafe impl Sync for UnsafeGuestSlice<'_, T> {} +unsafe impl Send for UnsafeGuestSlice<'_, T> {} + impl<'a, T> UnsafeGuestSlice<'a, T> { + /// See `GuestPtr::copy_from_slice`. + pub fn copy_from_slice(self, slice: &[T]) -> Result<(), GuestError> + where + T: GuestTypeTransparent<'a> + Copy + 'a, + { + // Check the length... + if self.ptr.len() != slice.len() { + return Err(GuestError::SliceLengthsDiffer); + } + if slice.len() == 0 { + return Ok(()); + } + + // ... and copy the bytes. + match self.mut_borrow() { + UnsafeBorrowResult::Ok(mut dst) => dst.copy_from_slice(slice), + UnsafeBorrowResult::Shared(guest_slice) => { + // SAFETY: in the shared memory case, we copy and accept that + // the guest data may be concurrently modified. TODO: audit that + // this use of `std::ptr::copy` is safe with shared memory + // (https://github.com/bytecodealliance/wasmtime/issues/4203) + // + // Also note that the validity of `guest_slice` has already been + // determined by the `as_unsafe_slice_mut` call above. + unsafe { + std::ptr::copy( + slice.as_ptr(), + guest_slice.ptr[0].get(), + guest_slice.ptr.len(), + ) + }; + } + UnsafeBorrowResult::Err(e) => return Err(e), + } + Ok(()) + } + + /// Return the number of items in this slice. + pub fn len(&self) -> usize { + self.ptr.len() + } + + /// Check if this slice comes from WebAssembly shared memory. + pub fn is_shared_memory(&self) -> bool { + self.mem.is_shared_memory() + } + + /// See `GuestPtr::as_slice_mut`. + pub fn as_slice_mut(self) -> Result>, GuestError> + where + T: GuestTypeTransparent<'a>, + { + match self.mut_borrow() { + UnsafeBorrowResult::Ok(slice) => Ok(Some(slice)), + UnsafeBorrowResult::Shared(_) => Ok(None), + UnsafeBorrowResult::Err(e) => Err(e), + } + } + /// Transform an `unsafe` guest slice to a [`GuestSliceMut`]. /// /// # Safety