From 9b896d2a708d314e151849dd1243e56b500c9b02 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 18 Jan 2023 09:04:10 -0600 Subject: [PATCH] Resolve libcall relocations for older CPUs (#5567) * Resolve libcall relocations for older CPUs Long ago Wasmtime used to have logic for resolving relocations post-compilation for libcalls which I ended up removing during refactorings last year. As #5563 points out, however, it's possible to get Wasmtime to panic by disabling SSE features which forces Cranelift to use libcalls for some floating-point operations instead. Note that this also requires disabling SIMD because SIMD support has a baseline of SSE 4.2. This commit pulls back the old implementations of various libcalls and reimplements logic necessary to have them work on CPUs without SSE 4.2 Closes #5563 * Fix log message in `wast` support * Fix offset listed in relocations Be sure to factor in the offset of the function itself * Review comments --- crates/cranelift/src/obj.rs | 75 +++++++++++++++-- crates/environ/src/obj.rs | 43 +++++++++- .../src/generators/codegen_settings.rs | 41 ++++++++++ crates/fuzzing/src/generators/config.rs | 5 ++ crates/jit/src/code_memory.rs | 64 +++++++++++++-- crates/runtime/src/libcalls.rs | 82 +++++++++++++++++++ crates/wast/src/wast.rs | 2 +- 7 files changed, 298 insertions(+), 14 deletions(-) diff --git a/crates/cranelift/src/obj.rs b/crates/cranelift/src/obj.rs index 757b4705db06..a596a1a75ceb 100644 --- a/crates/cranelift/src/obj.rs +++ b/crates/cranelift/src/obj.rs @@ -15,6 +15,8 @@ use crate::{CompiledFunction, RelocationTarget}; use anyhow::Result; +use cranelift_codegen::binemit::Reloc; +use cranelift_codegen::ir::LibCall; use cranelift_codegen::isa::{ unwind::{systemv, UnwindInfo}, TargetIsa, @@ -24,6 +26,7 @@ use gimli::write::{Address, EhFrame, EndianVec, FrameTable, Writer}; use gimli::RunTimeEndian; use object::write::{Object, SectionId, StandardSegment, Symbol, SymbolId, SymbolSection}; use object::{Architecture, SectionKind, SymbolFlags, SymbolKind, SymbolScope}; +use std::collections::HashMap; use std::convert::TryFrom; use std::ops::Range; use wasmtime_environ::FuncIndex; @@ -52,6 +55,13 @@ pub struct ModuleTextBuilder<'a> { /// In-progress text section that we're using cranelift's `MachBuffer` to /// build to resolve relocations (calls) between functions. text: Box, + + /// Symbols defined in the object for libcalls that relocations are applied + /// against. + /// + /// Note that this isn't typically used. It's only used for SSE-disabled + /// builds without SIMD on x86_64 right now. + libcall_symbols: HashMap, } impl<'a> ModuleTextBuilder<'a> { @@ -76,6 +86,7 @@ impl<'a> ModuleTextBuilder<'a> { text_section, unwind_info: Default::default(), text: isa.text_section_builder(num_funcs), + libcall_symbols: HashMap::default(), } } @@ -146,13 +157,49 @@ impl<'a> ModuleTextBuilder<'a> { ); } - // At this time it's not expected that any libcall relocations - // are generated. Ideally we don't want relocations against - // libcalls anyway as libcalls should go through indirect - // `VMContext` tables to avoid needing to apply relocations at - // module-load time as well. + // Relocations against libcalls are not common at this time and + // are only used in non-default configurations that disable wasm + // SIMD, disable SSE features, and for wasm modules that still + // use floating point operations. + // + // Currently these relocations are all expected to be absolute + // 8-byte relocations so that's asserted here and then encoded + // directly into the object as a normal object relocation. This + // is processed at module load time to resolve the relocations. RelocationTarget::LibCall(call) => { - unimplemented!("cannot generate relocation against libcall {call:?}"); + let symbol = *self.libcall_symbols.entry(call).or_insert_with(|| { + self.obj.add_symbol(Symbol { + name: libcall_name(call).as_bytes().to_vec(), + value: 0, + size: 0, + kind: SymbolKind::Text, + scope: SymbolScope::Linkage, + weak: false, + section: SymbolSection::Undefined, + flags: SymbolFlags::None, + }) + }); + let (encoding, kind, size) = match r.reloc { + Reloc::Abs8 => ( + object::RelocationEncoding::Generic, + object::RelocationKind::Absolute, + 8, + ), + other => unimplemented!("unimplemented relocation kind {other:?}"), + }; + self.obj + .add_relocation( + self.text_section, + object::write::Relocation { + symbol, + size, + kind, + encoding, + offset: off + u64::from(r.offset), + addend: r.addend, + }, + ) + .unwrap(); } }; } @@ -486,3 +533,19 @@ impl<'a> UnwindInfoBuilder<'a> { } } } + +fn libcall_name(call: LibCall) -> &'static str { + use wasmtime_environ::obj::LibCall as LC; + let other = match call { + LibCall::FloorF32 => LC::FloorF32, + LibCall::FloorF64 => LC::FloorF64, + LibCall::NearestF32 => LC::NearestF32, + LibCall::NearestF64 => LC::NearestF64, + LibCall::CeilF32 => LC::CeilF32, + LibCall::CeilF64 => LC::CeilF64, + LibCall::TruncF32 => LC::TruncF32, + LibCall::TruncF64 => LC::TruncF64, + _ => panic!("unknown libcall to give a name to: {call:?}"), + }; + other.symbol() +} diff --git a/crates/environ/src/obj.rs b/crates/environ/src/obj.rs index 9da43ef08e20..efd48f0e2f89 100644 --- a/crates/environ/src/obj.rs +++ b/crates/environ/src/obj.rs @@ -48,7 +48,7 @@ pub const ELF_WASMTIME_ADDRMAP: &str = ".wasmtime.addrmap"; /// encodes the ability to map an offset in the text section to the trap code /// that it corresponds to. /// -/// This section is used at runtime to determine what flavor fo trap happened to +/// This section is used at runtime to determine what flavor of trap happened to /// ensure that embedders and debuggers know the reason for the wasm trap. The /// encoding of this section is custom to Wasmtime and managed with helpers in /// the `object` crate: @@ -63,7 +63,7 @@ pub const ELF_WASMTIME_ADDRMAP: &str = ".wasmtime.addrmap"; /// /// This section is decoded by `lookup_trap_code` below which will read the /// section count, slice some bytes to get the various arrays, and then perform -/// a binary search on the offsets array to find the an index corresponding to +/// a binary search on the offsets array to find the index corresponding to /// the pc being looked up. If found the same index in the trap array (the array /// of bytes) is the trap code for that offset. /// @@ -128,3 +128,42 @@ pub const ELF_NAME_DATA: &'static str = ".name.wasm"; /// and is instead indexed directly by relative indices stored in compilation /// metadata. pub const ELF_WASMTIME_DWARF: &str = ".wasmtime.dwarf"; + +macro_rules! libcalls { + ($($rust:ident = $sym:tt)*) => ( + #[allow(missing_docs)] + pub enum LibCall { + $($rust,)* + } + + impl LibCall { + /// Returns the libcall corresponding to the provided symbol name, + /// if one matches. + pub fn from_str(s: &str) -> Option { + match s { + $($sym => Some(LibCall::$rust),)* + _ => None, + } + } + + /// Returns the symbol name in object files associated with this + /// libcall. + pub fn symbol(&self) -> &'static str { + match self { + $(LibCall::$rust => $sym,)* + } + } + } + ) +} + +libcalls! { + FloorF32 = "libcall_floor32" + FloorF64 = "libcall_floor64" + NearestF32 = "libcall_nearestf32" + NearestF64 = "libcall_nearestf64" + CeilF32 = "libcall_ceilf32" + CeilF64 = "libcall_ceilf64" + TruncF32 = "libcall_truncf32" + TruncF64 = "libcall_truncf64" +} diff --git a/crates/fuzzing/src/generators/codegen_settings.rs b/crates/fuzzing/src/generators/codegen_settings.rs index 629a95d8fae3..30484d009fa1 100644 --- a/crates/fuzzing/src/generators/codegen_settings.rs +++ b/crates/fuzzing/src/generators/codegen_settings.rs @@ -1,6 +1,8 @@ //! Generate Cranelift compiler settings. +use crate::generators::ModuleConfig; use arbitrary::{Arbitrary, Unstructured}; +use std::collections::HashMap; /// Choose between matching the host architecture or a cross-compilation target. #[derive(Clone, Debug, Eq, Hash, PartialEq)] @@ -32,6 +34,42 @@ impl CodegenSettings { } } } + + /// Features such as sse4.2 are unconditionally enabled on the x86_64 target + /// because they are hard required for SIMD, but when SIMD is disabled, for + /// example, we support disabling these features. + /// + /// This method will take the wasm feature selection chosen, through + /// `module_config`, and possibly try to disable some more features by + /// reading more of the input. + pub fn maybe_disable_more_features( + &mut self, + module_config: &ModuleConfig, + u: &mut Unstructured<'_>, + ) -> arbitrary::Result<()> { + let flags = match self { + CodegenSettings::Target { flags, .. } => flags, + _ => return Ok(()), + }; + + if !module_config.config.simd_enabled { + // Note that regardless of architecture these booleans are generated + // to have test case failures unrelated to codegen setting input + // that fail on one architecture to fail on other architectures as + // well. + let new_flags = ["has_sse3", "has_ssse3", "has_sse41", "has_sse42"] + .into_iter() + .map(|name| Ok((name, u.arbitrary()?))) + .collect::>>()?; + + for (name, val) in flags { + if let Some(new_value) = new_flags.get(name.as_str()) { + *val = new_value.to_string(); + } + } + } + Ok(()) + } } impl<'a> Arbitrary<'a> for CodegenSettings { @@ -103,6 +141,9 @@ impl<'a> Arbitrary<'a> for CodegenSettings { // fail if these features are disabled, so unconditionally // enable them as we're not interested in fuzzing without // them. + // + // Note that these may still be disabled above in + // `maybe_disable_more_features`. std:"sse3" => clif:"has_sse3" ratio: 1 in 1, std:"ssse3" => clif:"has_ssse3" ratio: 1 in 1, std:"sse4.1" => clif:"has_sse41" ratio: 1 in 1, diff --git a/crates/fuzzing/src/generators/config.rs b/crates/fuzzing/src/generators/config.rs index 30de3d3165a0..6b0d69eeff84 100644 --- a/crates/fuzzing/src/generators/config.rs +++ b/crates/fuzzing/src/generators/config.rs @@ -294,6 +294,11 @@ impl<'a> Arbitrary<'a> for Config { module_config: u.arbitrary()?, }; + config + .wasmtime + .codegen + .maybe_disable_more_features(&config.module_config, u)?; + // If using the pooling allocator, constrain the memory and module configurations // to the module limits. if let InstanceAllocationStrategy::Pooling(pooling) = &mut config.wasmtime.strategy { diff --git a/crates/jit/src/code_memory.rs b/crates/jit/src/code_memory.rs index 37d3a53ca7d3..01365a42a895 100644 --- a/crates/jit/src/code_memory.rs +++ b/crates/jit/src/code_memory.rs @@ -4,12 +4,14 @@ use crate::subslice_range; use crate::unwind::UnwindRegistration; use anyhow::{anyhow, bail, Context, Result}; use object::read::{File, Object, ObjectSection}; +use object::ObjectSymbol; use std::mem; use std::mem::ManuallyDrop; use std::ops::Range; use wasmtime_environ::obj; use wasmtime_environ::FunctionLoc; use wasmtime_jit_icache_coherence as icache_coherence; +use wasmtime_runtime::libcalls; use wasmtime_runtime::{MmapVec, VMTrampoline}; /// Management of executable memory within a `MmapVec` @@ -24,6 +26,8 @@ pub struct CodeMemory { published: bool, enable_branch_protection: bool, + relocations: Vec<(usize, obj::LibCall)>, + // Ranges within `self.mmap` of where the particular sections lie. text: Range, unwind: Range, @@ -60,6 +64,7 @@ impl CodeMemory { let obj = File::parse(&mmap[..]) .with_context(|| "failed to parse internal compilation artifact")?; + let mut relocations = Vec::new(); let mut text = 0..0; let mut unwind = 0..0; let mut enable_branch_protection = None; @@ -93,11 +98,28 @@ impl CodeMemory { ".text" => { text = range; - // Double-check there are no relocations in the text section. At - // this time relocations are not expected at all from loaded code - // since everything should be resolved at compile time. Handling - // must be added here, though, if relocations pop up. - assert!(section.relocations().count() == 0); + // The text section might have relocations for things like + // libcalls which need to be applied, so handle those here. + // + // Note that only a small subset of possible relocations are + // handled. Only those required by the compiler side of + // things are processed. + for (offset, reloc) in section.relocations() { + assert_eq!(reloc.kind(), object::RelocationKind::Absolute); + assert_eq!(reloc.encoding(), object::RelocationEncoding::Generic); + assert_eq!(usize::from(reloc.size()), std::mem::size_of::()); + assert_eq!(reloc.addend(), 0); + let sym = match reloc.target() { + object::RelocationTarget::Symbol(id) => id, + other => panic!("unknown relocation target {other:?}"), + }; + let sym = obj.symbol_by_index(sym).unwrap().name().unwrap(); + let libcall = obj::LibCall::from_str(sym) + .unwrap_or_else(|| panic!("unknown symbol relocation: {sym}")); + + let offset = usize::try_from(offset).unwrap(); + relocations.push((offset, libcall)); + } } UnwindRegistration::SECTION_NAME => unwind = range, obj::ELF_WASM_DATA => wasm_data = range, @@ -124,6 +146,7 @@ impl CodeMemory { dwarf, info_data, wasm_data, + relocations, }) } @@ -214,6 +237,8 @@ impl CodeMemory { // both the actual unwinding tables as well as the validity of the // pointers we pass in itself. unsafe { + self.apply_relocations()?; + let text = self.text(); // Clear the newly allocated code from cache if the processor requires it @@ -243,6 +268,35 @@ impl CodeMemory { Ok(()) } + unsafe fn apply_relocations(&mut self) -> Result<()> { + if self.relocations.is_empty() { + return Ok(()); + } + + // Mmaps currently all start as readonly so before updating relocations + // the mapping needs to be made writable first. Note that this isn't + // reset back to readonly since the `make_executable` call, which + // happens after this, will implicitly remove the writable bit and leave + // it as just read/execute. + self.mmap.make_writable(self.text.clone())?; + + for (offset, libcall) in self.relocations.iter() { + let offset = self.text.start + offset; + let libcall = match libcall { + obj::LibCall::FloorF32 => libcalls::relocs::floorf32 as usize, + obj::LibCall::FloorF64 => libcalls::relocs::floorf64 as usize, + obj::LibCall::NearestF32 => libcalls::relocs::nearestf32 as usize, + obj::LibCall::NearestF64 => libcalls::relocs::nearestf64 as usize, + obj::LibCall::CeilF32 => libcalls::relocs::ceilf32 as usize, + obj::LibCall::CeilF64 => libcalls::relocs::ceilf64 as usize, + obj::LibCall::TruncF32 => libcalls::relocs::truncf32 as usize, + obj::LibCall::TruncF64 => libcalls::relocs::truncf64 as usize, + }; + *self.mmap.as_mut_ptr().add(offset).cast::() = libcall; + } + Ok(()) + } + unsafe fn register_unwind_info(&mut self) -> Result<()> { if self.unwind.len() == 0 { return Ok(()); diff --git a/crates/runtime/src/libcalls.rs b/crates/runtime/src/libcalls.rs index 5bed4d8ef69a..865730173912 100644 --- a/crates/runtime/src/libcalls.rs +++ b/crates/runtime/src/libcalls.rs @@ -492,3 +492,85 @@ unsafe fn out_of_gas(vmctx: *mut VMContext) -> Result<()> { unsafe fn new_epoch(vmctx: *mut VMContext) -> Result { (*(*vmctx).instance().store()).new_epoch() } + +/// This module contains functions which are used for resolving relocations at +/// runtime if necessary. +/// +/// These functions are not used by default and currently the only platform +/// they're used for is on x86_64 when SIMD is disabled and then SSE features +/// are further disabled. In these configurations Cranelift isn't allowed to use +/// native CPU instructions so it falls back to libcalls and we rely on the Rust +/// standard library generally for implementing these. +#[allow(missing_docs)] +pub mod relocs { + pub extern "C" fn floorf32(f: f32) -> f32 { + f.floor() + } + + pub extern "C" fn floorf64(f: f64) -> f64 { + f.floor() + } + + pub extern "C" fn ceilf32(f: f32) -> f32 { + f.ceil() + } + + pub extern "C" fn ceilf64(f: f64) -> f64 { + f.ceil() + } + + pub extern "C" fn truncf32(f: f32) -> f32 { + f.trunc() + } + + pub extern "C" fn truncf64(f: f64) -> f64 { + f.trunc() + } + + const TOINT_32: f32 = 1.0 / f32::EPSILON; + const TOINT_64: f64 = 1.0 / f64::EPSILON; + + // NB: replace with `round_ties_even` from libstd when it's stable as + // tracked by rust-lang/rust#96710 + pub extern "C" fn nearestf32(x: f32) -> f32 { + // Rust doesn't have a nearest function; there's nearbyint, but it's not + // stabilized, so do it manually. + // Nearest is either ceil or floor depending on which is nearest or even. + // This approach exploited round half to even default mode. + let i = x.to_bits(); + let e = i >> 23 & 0xff; + if e >= 0x7f_u32 + 23 { + // Check for NaNs. + if e == 0xff { + // Read the 23-bits significand. + if i & 0x7fffff != 0 { + // Ensure it's arithmetic by setting the significand's most + // significant bit to 1; it also works for canonical NaNs. + return f32::from_bits(i | (1 << 22)); + } + } + x + } else { + (x.abs() + TOINT_32 - TOINT_32).copysign(x) + } + } + + pub extern "C" fn nearestf64(x: f64) -> f64 { + let i = x.to_bits(); + let e = i >> 52 & 0x7ff; + if e >= 0x3ff_u64 + 52 { + // Check for NaNs. + if e == 0x7ff { + // Read the 52-bits significand. + if i & 0xfffffffffffff != 0 { + // Ensure it's arithmetic by setting the significand's most + // significant bit to 1; it also works for canonical NaNs. + return f64::from_bits(i | (1 << 51)); + } + } + x + } else { + (x.abs() + TOINT_64 - TOINT_64).copysign(x) + } + } +} diff --git a/crates/wast/src/wast.rs b/crates/wast/src/wast.rs index 2887241117c4..545f207e8abf 100644 --- a/crates/wast/src/wast.rs +++ b/crates/wast/src/wast.rs @@ -363,7 +363,7 @@ impl WastContext { let sp = directive.span(); if log::log_enabled!(log::Level::Debug) { let (line, col) = sp.linecol_in(wast); - log::debug!("failed directive on {}:{}:{}", filename, line + 1, col); + log::debug!("running directive on {}:{}:{}", filename, line + 1, col); } self.run_directive(directive) .map_err(|e| match e.downcast() {