diff --git a/crates/environ/src/module.rs b/crates/environ/src/module.rs index 644f1a2bb3f1..b25682acf215 100644 --- a/crates/environ/src/module.rs +++ b/crates/environ/src/module.rs @@ -239,11 +239,31 @@ pub enum Initializer { args: IndexMap, }, - /// A module is defined into the module index space, and which module is - /// being defined is specified by the index payload. + /// A module is being created from a set of compiled artifacts. + CreateModule { + /// The index of the artifact that's being convereted into a module. + artifact_index: usize, + /// The list of artifacts that this module value will be inheriting. + artifacts: Vec, + /// The list of modules that this module value will inherit. + modules: Vec, + }, + + /// A module is created from a closed-over-module value, defined when this + /// module was created. DefineModule(usize), } +/// Where module values can come from when creating a new module from a compiled +/// artifact. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum ModuleUpvar { + /// A module value is inherited from the module creating the new module. + Inherit(usize), + /// A module value comes from the instance-to-be-created module index space. + Local(ModuleIndex), +} + impl Module { /// Allocates the module data structures. pub fn new() -> Self { diff --git a/crates/environ/src/module_environ.rs b/crates/environ/src/module_environ.rs index 7bb30ea16606..9d1f89cb1f82 100644 --- a/crates/environ/src/module_environ.rs +++ b/crates/environ/src/module_environ.rs @@ -1,6 +1,6 @@ use crate::module::{ - Initializer, InstanceSignature, MemoryPlan, Module, ModuleSignature, ModuleType, TableElements, - TablePlan, TypeTables, + Initializer, InstanceSignature, MemoryPlan, Module, ModuleSignature, ModuleType, ModuleUpvar, + TableElements, TablePlan, TypeTables, }; use crate::tunables::Tunables; use cranelift_codegen::ir; @@ -75,6 +75,14 @@ pub struct ModuleTranslation<'data> { code_index: u32, implicit_instances: HashMap<&'data str, InstanceIndex>, + + /// The artifacts which are needed from the parent module when this module + /// is created. This is used to insert into `Initializer::CreateModule` when + /// this module is defined in the parent. + creation_artifacts: Vec, + + /// Same as `creation_artifacts`, but for modules instead of artifacts. + creation_modules: Vec, } /// Contains function data: byte code and its offset in the module. @@ -326,8 +334,7 @@ impl<'data> ModuleEnvironment<'data> { } } - fn gen_type_of_module(&mut self, module: usize) -> ModuleTypeIndex { - let module = &self.results[module].module; + fn gen_type_of_module(&mut self, module: &Module) -> ModuleTypeIndex { let imports = module .imports() .map(|(s, field, ty)| { @@ -880,20 +887,43 @@ and for re-adding support for interface types you can see this issue: } fn module_end(&mut self) { - let (record_initializer, done) = match self.in_progress.pop() { + self.result.creation_artifacts.shrink_to_fit(); + self.result.creation_modules.shrink_to_fit(); + + let (record_initializer, mut done) = match self.in_progress.pop() { Some(m) => (true, mem::replace(&mut self.result, m)), None => (false, mem::take(&mut self.result)), }; - self.results.push(done); + if record_initializer { - let index = self.results.len() - 1; + // Record the type of the module we just finished in our own + // module's list of modules. + let sig = self.gen_type_of_module(&done.module); + self.result.module.modules.push(sig); + + // The root module will store the artifacts for this finished + // module at `artifact_index`. This then needs to be inherited by + // all later modules coming down to our now-current `self.result`... + let mut artifact_index = self.results.len(); + for result in self.in_progress.iter_mut().chain(Some(&mut self.result)) { + result.creation_artifacts.push(artifact_index); + artifact_index = result.creation_artifacts.len() - 1; + } + // ... and then `self.result` needs to create a new module with + // whatever was record to save off as its own artifacts/modules. self.result .module .initializers - .push(Initializer::DefineModule(index)); - let sig = self.gen_type_of_module(index); - self.result.module.modules.push(sig); + .push(Initializer::CreateModule { + artifact_index, + artifacts: mem::take(&mut done.creation_artifacts), + modules: mem::take(&mut done.creation_modules), + }); } + + // And the final step is to insert the module into the list of finished + // modules to get returned at the end. + self.results.push(done); } fn reserve_instances(&mut self, amt: u32) { @@ -936,16 +966,44 @@ and for re-adding support for interface types you can see this issue: self.result.module.types.push(ty); } - // FIXME(WebAssembly/module-linking#28) unsure how to implement this - // at this time, if we can alias imported modules it's a lot harder, - // otherwise we'll need to figure out how to translate `index` to a - // `usize` for a defined module (creating Initializer::DefineModule) + // Modules are a bit trickier since we need to record how to track + // the state from the original module down to our own. Alias::OuterModule { relative_depth, index, } => { - drop((relative_depth, index)); - unimplemented!() + // First we can copy the type from the parent module into our + // own module to record what type our module definition will + // have. + let module_idx = self.in_progress.len() - 1 - (relative_depth as usize); + let module_ty = self.in_progress[module_idx].module.modules[index]; + self.result.module.modules.push(module_ty); + + // Next we'll be injecting a module value that is closed over, + // and that will be used to define the module into the index + // space. Record an initializer about where our module is + // sourced from (which will be stored within each module value + // itself). + let module_index = self.result.creation_modules.len(); + self.result + .module + .initializers + .push(Initializer::DefineModule(module_index)); + + // And finally we need to record a breadcrumb trail of how to + // get the module value into `module_index`. The module just + // after our destination module will use a `ModuleIndex` to + // fetch the module value, and everything else inbetween will + // inherit that module's closed-over value. + let mut upvar = ModuleUpvar::Local(index); + for outer in self.in_progress[module_idx + 1..].iter_mut() { + let upvar = mem::replace( + &mut upvar, + ModuleUpvar::Inherit(outer.creation_modules.len()), + ); + outer.creation_modules.push(upvar); + } + self.result.creation_modules.push(upvar); } // This case is slightly more involved, we'll be recording all the diff --git a/crates/jit/src/instantiate.rs b/crates/jit/src/instantiate.rs index c0738c9c2c76..655432e00f47 100644 --- a/crates/jit/src/instantiate.rs +++ b/crates/jit/src/instantiate.rs @@ -221,7 +221,7 @@ impl CompiledModule { artifacts: Vec, isa: &dyn TargetIsa, profiler: &dyn ProfilingAgent, - ) -> Result, SetupError> { + ) -> Result>, SetupError> { maybe_parallel!(artifacts.(into_iter | into_par_iter)) .map(|a| CompiledModule::from_artifacts(a, isa, profiler)) .collect() @@ -232,7 +232,7 @@ impl CompiledModule { artifacts: CompilationArtifacts, isa: &dyn TargetIsa, profiler: &dyn ProfilingAgent, - ) -> Result { + ) -> Result, SetupError> { // Allocate all of the compiled functions into executable memory, // copying over their contents. let (code_memory, code_range, finished_functions, trampolines) = build_code_memory( @@ -266,7 +266,7 @@ impl CompiledModule { let finished_functions = FinishedFunctions(finished_functions); - Ok(Self { + Ok(Arc::new(Self { module: Arc::new(artifacts.module.clone()), artifacts, code: Arc::new(ModuleCode { @@ -275,7 +275,7 @@ impl CompiledModule { }), finished_functions, trampolines, - }) + })) } /// Crate an `Instance` from this `CompiledModule`. diff --git a/crates/wasmtime/src/instance.rs b/crates/wasmtime/src/instance.rs index e574f38812dd..60c52d6271cf 100644 --- a/crates/wasmtime/src/instance.rs +++ b/crates/wasmtime/src/instance.rs @@ -62,46 +62,27 @@ fn instantiate( .with_context(|| format!("incompatible import type for `{}`", name))?; } - // Turns out defining any kind of module is pretty easy, we're just - // slinging around pointers. - Initializer::DefineModule(idx) => { - imports.modules.push(module.submodule(*idx)); - } - // Here we lookup our instance handle, find the right export, // and then push that item into our own index space. We eschew - // type-checking since only valid modules reach this point. - // - // Note that export lookup here needs to happen by name. The - // `export` index is an index into our local type definition of the - // type of the instance to figure out what name it was assigned. - // This is where the subtyping happens! - // - // Note that the unsafety here is because we're asserting that the - // handle comes from our same store, but this should be true because - // we acquired the handle from an instance in the store. + // type-checking since only valid modules should reach this point. Initializer::AliasInstanceExport { instance, export } => { let export = &imports.instances[*instance][export]; let item = unsafe { Extern::from_wasmtime_export(export, store) }; imports.push_extern(&item); } - // Oh boy a recursive instantiation! The recursive arguments here - // are pretty simple, and the only slightly-meaty one is how - // arguments are pulled from `args` and pushed directly into the - // builder specified, which should be an easy enough - // copy-the-pointer operation in all cases. + // Oh boy a recursive instantiation! // - // Note that this recursive call shouldn't result in an infinite - // loop because of wasm module validation which requires everything - // to be a DAG. Additionally the recursion should also be bounded - // due to validation. We may one day need to make this an iterative - // loop, however. + // We use our local index space of modules to find the module to + // instantiate and argument lookup is defined as looking inside of + // `args`. Like above with aliases all type checking is eschewed + // because only valid modules should reach this point. // - // Also note that there's some unsafety here around cloning - // `InstanceHandle` because the handle may not live long enough, but - // we're doing all of this in the context of our `Store` argument - // above so we should be safe here. + // Note that it's thought that due to the acyclic nature of + // instantiation this can't loop to blow the native stack, and + // validation should in theory ensure this has a bounded depth. + // Despite this we may need to change this to a loop instead of + // recursion one day. Initializer::Instantiate { module, args } => { let handle = instantiate( store, @@ -134,6 +115,33 @@ fn instantiate( )?; imports.instances.push(handle); } + + // A new module is being defined, and the source of this module is + // our module's list of closed-over-modules. + // + // This is used for outer aliases. + Initializer::DefineModule(upvar_index) => { + imports + .modules + .push(module.module_upvar(*upvar_index).clone()); + } + + // A new module is defined, created from a set of compiled + // artifacts. The new module value will be created with the + // specified artifacts being closed over as well as the specified + // set of module values in our index/upvar index spaces being closed + // over. + // + // This is used for defining submodules. + Initializer::CreateModule { + artifact_index, + artifacts, + modules, + } => { + let submodule = + module.create_submodule(*artifact_index, artifacts, modules, &imports.modules); + imports.modules.push(submodule); + } } } diff --git a/crates/wasmtime/src/module.rs b/crates/wasmtime/src/module.rs index c78ae8a7f21b..8fcd94a3bd4b 100644 --- a/crates/wasmtime/src/module.rs +++ b/crates/wasmtime/src/module.rs @@ -2,12 +2,16 @@ use crate::types::{ExportType, ExternType, ImportType}; use crate::{Engine, ModuleType}; use anyhow::{bail, Context, Result}; use bincode::Options; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; use std::hash::Hash; use std::path::Path; use std::sync::Arc; use wasmparser::Validator; #[cfg(feature = "cache")] use wasmtime_cache::ModuleCacheEntry; +use wasmtime_environ::entity::PrimaryMap; +use wasmtime_environ::wasm::ModuleIndex; use wasmtime_jit::{CompilationArtifacts, CompiledModule, TypeTables}; /// A compiled WebAssembly module, ready to be instantiated. @@ -80,14 +84,72 @@ use wasmtime_jit::{CompilationArtifacts, CompiledModule, TypeTables}; /// [`Config`]: crate::Config #[derive(Clone)] pub struct Module { + inner: Arc, +} + +struct ModuleInner { engine: Engine, - data: Arc, - index: usize, + /// The compiled artifacts for this module that will be instantiated and + /// executed. + module: Arc, + /// Closed-over compilation artifacts used to create submodules when this + /// module is instantiated. + artifact_upvars: Vec>, + /// Closed-over module values which are used when this module is + /// instantiated. + module_upvars: Vec, + /// Type information of this module and all `artifact_upvars` compiled + /// modules. + types: Arc, +} + +/// A small helper struct which defines modules are serialized. +#[derive(serde::Serialize, serde::Deserialize)] +struct ModuleSerialized<'a> { + /// All compiled artifacts neeeded by this module, where the last entry in + /// this list is the artifacts for the module itself. + artifacts: Vec>, + /// Closed-over module values that are also needed for this module. + modules: Vec>, + /// The index into the list of type tables that are used for this module's + /// type tables. + type_tables: usize, +} + +// This is like `std::borrow::Cow` but it doesn't have a `Clone` bound on `T` +enum MyCow<'a, T> { + Borrowed(&'a T), + Owned(T), +} + +impl<'a, T> MyCow<'a, T> { + fn unwrap_owned(self) -> T { + match self { + MyCow::Owned(val) => val, + MyCow::Borrowed(_) => unreachable!(), + } + } } -pub(crate) struct ModuleData { - pub(crate) types: Arc, - pub(crate) modules: Vec, +impl<'a, T: Serialize> Serialize for MyCow<'a, T> { + fn serialize(&self, dst: S) -> Result + where + S: serde::ser::Serializer, + { + match self { + MyCow::Borrowed(val) => val.serialize(dst), + MyCow::Owned(val) => val.serialize(dst), + } + } +} + +impl<'a, 'b, T: Deserialize<'a>> Deserialize<'a> for MyCow<'b, T> { + fn deserialize(src: D) -> Result + where + D: serde::de::Deserializer<'a>, + { + Ok(MyCow::Owned(T::deserialize(src)?)) + } } impl Module { @@ -169,7 +231,8 @@ impl Module { /// See [`Module::new`] for other details. pub fn new_with_name(engine: &Engine, bytes: impl AsRef<[u8]>, name: &str) -> Result { let mut module = Module::new(engine, bytes.as_ref())?; - Arc::get_mut(&mut module.data).unwrap().modules[module.index] + Arc::get_mut(&mut Arc::get_mut(&mut module.inner).unwrap().module) + .unwrap() .module_mut() .expect("mutable module") .name = Some(name.to_string()); @@ -254,17 +317,21 @@ impl Module { let (main_module, artifacts, types) = CompilationArtifacts::build(engine.compiler(), binary)?; - let modules = CompiledModule::from_artifacts_list( + let mut modules = CompiledModule::from_artifacts_list( artifacts, engine.compiler().isa(), &*engine.config().profiler, )?; + let module = modules.remove(main_module); - let types = Arc::new(types); Ok(Module { - engine: engine.clone(), - index: main_module, - data: Arc::new(ModuleData { types, modules }), + inner: Arc::new(ModuleInner { + engine: engine.clone(), + module, + types: Arc::new(types), + artifact_upvars: modules, + module_upvars: Vec::new(), + }), }) } @@ -313,21 +380,46 @@ impl Module { /// Serialize compilation artifacts to the buffer. See also `deseriaize`. pub fn serialize(&self) -> Result> { - let artifacts = ( - compiler_fingerprint(&self.engine), - self.data - .modules - .iter() - .map(|i| i.compilation_artifacts()) - .collect::>(), - &*self.data.types, - self.index, - ); - + let mut pushed = HashMap::new(); + let mut tables = Vec::new(); + let module = self.serialized_module(&mut pushed, &mut tables); + let artifacts = (compiler_fingerprint(self.engine()), tables, module); let buffer = bincode_options().serialize(&artifacts)?; Ok(buffer) } + fn serialized_module<'a>( + &'a self, + type_tables_pushed: &mut HashMap, + type_tables: &mut Vec<&'a TypeTables>, + ) -> ModuleSerialized<'a> { + // Deduplicate `Arc` using our two parameters to ensure we + // serialize type tables as little as possible. + let ptr = Arc::as_ptr(self.types()); + let type_tables_idx = *type_tables_pushed.entry(ptr as usize).or_insert_with(|| { + type_tables.push(self.types()); + type_tables.len() - 1 + }); + ModuleSerialized { + artifacts: self + .inner + .artifact_upvars + .iter() + .map(|i| MyCow::Borrowed(i.compilation_artifacts())) + .chain(Some(MyCow::Borrowed( + self.compiled_module().compilation_artifacts(), + ))) + .collect(), + modules: self + .inner + .module_upvars + .iter() + .map(|i| i.serialized_module(type_tables_pushed, type_tables)) + .collect(), + type_tables: type_tables_idx, + } + } + /// Deserializes and creates a module from the compilation artifacts. /// The `serialize` saves the compilation artifacts along with the host /// fingerprint, which consists of target, compiler flags, and wasmtime @@ -338,44 +430,113 @@ impl Module { /// for modifications or curruptions. All responsibily of signing and its /// verification falls on the embedder. pub fn deserialize(engine: &Engine, serialized: &[u8]) -> Result { - let expected_fingerprint = compiler_fingerprint(engine); - - let (fingerprint, artifacts, types, index) = bincode_options() - .deserialize::<(u64, _, _, _)>(serialized) + let (fingerprint, types, serialized) = bincode_options() + .deserialize::<(u64, Vec, _)>(serialized) .context("Deserialize compilation artifacts")?; - if fingerprint != expected_fingerprint { + + if fingerprint != compiler_fingerprint(engine) { bail!("Incompatible compilation artifact"); } - let modules = CompiledModule::from_artifacts_list( - artifacts, - engine.compiler().isa(), - &*engine.config().profiler, - )?; - - let types = Arc::new(types); - Ok(Module { - engine: engine.clone(), - index, - data: Arc::new(ModuleData { modules, types }), - }) - } + let types = types.into_iter().map(Arc::new).collect::>(); + return mk(engine, &types, serialized); - pub(crate) fn compiled_module(&self) -> &CompiledModule { - &self.data.modules[self.index] + fn mk( + engine: &Engine, + types: &Vec>, + module: ModuleSerialized<'_>, + ) -> Result { + let mut artifacts = CompiledModule::from_artifacts_list( + module + .artifacts + .into_iter() + .map(|i| i.unwrap_owned()) + .collect(), + engine.compiler().isa(), + &*engine.config().profiler, + )?; + let inner = ModuleInner { + engine: engine.clone(), + types: types[module.type_tables].clone(), + module: artifacts.pop().unwrap(), + artifact_upvars: artifacts, + module_upvars: module + .modules + .into_iter() + .map(|m| mk(engine, types, m)) + .collect::>>()?, + }; + Ok(Module { + inner: Arc::new(inner), + }) + } } - pub(crate) fn submodule(&self, index: usize) -> Module { - assert!(index < self.data.modules.len()); + /// Creates a submodule `Module` value from the specified parameters. + /// + /// This is used for creating submodules as part of module instantiation. + /// + /// * `artifact_index` - the index in `artifact_upvars` that we're creating + /// a module for + /// * `artifact_upvars` - the mapping of indices of what artifact upvars are + /// needed for the submodule. The length of this array is the length of + /// the upvars array in the submodule to be created, and each element of + /// this array is an index into this module's upvar array. + /// * `module_upvars` - similar to `artifact_upvars` this is a mapping of + /// how to create the e`module_upvars` of the submodule being created. + /// Each entry in this array is either an index into this module's own + /// module upvars array or it's an index into `modules`, the list of + /// modules so far for the instance where this submodule is being + /// created. + /// * `modules` - array indexed by `module_upvars`. + /// + /// Note that the real meat of this happens in `ModuleEnvironment` + /// translation inside of `wasmtime_environ`. This just does the easy thing + /// of handling all the indices, over there is where the indices are + /// actually calculated and such. + pub(crate) fn create_submodule( + &self, + artifact_index: usize, + artifact_upvars: &[usize], + module_upvars: &[wasmtime_environ::ModuleUpvar], + modules: &PrimaryMap, + ) -> Module { Module { - engine: self.engine.clone(), - data: self.data.clone(), - index, + inner: Arc::new(ModuleInner { + types: self.types().clone(), + engine: self.engine().clone(), + module: self.inner.artifact_upvars[artifact_index].clone(), + artifact_upvars: artifact_upvars + .iter() + .map(|i| self.inner.artifact_upvars[*i].clone()) + .collect(), + module_upvars: module_upvars + .iter() + .map(|i| match *i { + wasmtime_environ::ModuleUpvar::Inherit(i) => { + self.inner.module_upvars[i].clone() + } + wasmtime_environ::ModuleUpvar::Local(i) => modules[i].clone(), + }) + .collect(), + }), } } + pub(crate) fn compiled_module(&self) -> &CompiledModule { + &self.inner.module + } + pub(crate) fn types(&self) -> &Arc { - &self.data.types + &self.inner.types + } + + /// Looks up the module upvar value at the `index` specified. + /// + /// Note that this panics if `index` is out of bounds since this should + /// only be called for valid indices as part of instantiation. + pub(crate) fn module_upvar(&self, index: usize) -> &Module { + &self.inner.module_upvars[index] } /// Returns identifier/name that this [`Module`] has. This name @@ -585,7 +746,7 @@ impl Module { /// Returns the [`Engine`] that this [`Module`] was compiled by. pub fn engine(&self) -> &Engine { - &self.engine + &self.inner.engine } } diff --git a/tests/misc_testsuite/module-linking/alias-outer.wast b/tests/misc_testsuite/module-linking/alias-outer.wast new file mode 100644 index 000000000000..e6dc6255a322 --- /dev/null +++ b/tests/misc_testsuite/module-linking/alias-outer.wast @@ -0,0 +1,67 @@ +(module $a + (module $m1) + (module $b + (module $m2) + (module $c + (instance (instantiate (module outer $a $m1))) + (instance (instantiate (module outer $b $m2))) + ) + (instance (instantiate $c)) + ) + (instance (instantiate $b)) +) + +(module $a + (module (export "m")) +) + +(module $PARENT + (import "a" "m" (module $b)) + (module $c + (module $d + (instance (instantiate (module outer $PARENT $b))) + ) + (instance (instantiate $d)) + ) + (instance (instantiate $c)) +) + +;; Instantiate `$b` here below twice with two different imports. Ensure the +;; exported modules close over the captured state correctly to ensure that we +;; get the right functions. +(module $a + (module $b (export "close_over_imports") + (import "m" (module $m (export "f" (func (result i32))))) + (module (export "m") + (instance $a (instantiate (module outer $b $m))) + (func (export "f") (result i32) + call (func $a "f")) + ) + ) +) + +(module + (import "a" "close_over_imports" (module $m0 + (import "m" (module (export "f" (func (result i32))))) + (export "m" (module (export "f" (func (result i32))))) + )) + + (module $m1 + (func (export "f") (result i32) + i32.const 0)) + (instance $m_g1 (instantiate $m0 "m" (module $m1))) + (instance $g1 (instantiate (module $m_g1 "m"))) + (module $m2 + (func (export "f") (result i32) + i32.const 1)) + (instance $m_g2 (instantiate $m0 "m" (module $m2))) + (instance $g2 (instantiate (module $m_g2 "m"))) + + (func (export "get1") (result i32) + call (func $g1 "f")) + (func (export "get2") (result i32) + call (func $g2 "f")) +) + +(assert_return (invoke "get1") (i32.const 0)) +(assert_return (invoke "get2") (i32.const 1)) diff --git a/tests/misc_testsuite/module-linking/alias.wast b/tests/misc_testsuite/module-linking/alias.wast index f9d2b2f3fdeb..2fcad017772d 100644 --- a/tests/misc_testsuite/module-linking/alias.wast +++ b/tests/misc_testsuite/module-linking/alias.wast @@ -93,8 +93,7 @@ (instance $a (instantiate $m)) ) -;; alias parent -- module -(; TODO +;; alias outer -- module (module (module $a) (module $m @@ -102,7 +101,6 @@ ) (instance (instantiate $m)) ) -;) ;; The alias, import, type, module, and instance sections can all be interleaved (module $ROOT