From 624521ff91ac654c11072581867f558c26d285ab Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Fri, 6 Oct 2023 08:28:51 +0200 Subject: [PATCH] Performance Improvements (5) (#6076) ### Description The `AsyncModule` code generation in every ESM flags it as "needs availability info" which causes code generation to be duplicated for each entrypoints. This improves it a little bit by checking is the module is an async module before. It also fixes a bug where availability is not correct reduced and only passes the availability root to async module computation instead of the full availability info. ### Testing Instructions Closes WEB-1697 --- crates/turbo-tasks/src/manager.rs | 2 +- .../src/chunk/availability_info.rs | 110 +++++++++++++++++- .../src/chunk/esm_scope.rs | 8 +- crates/turbopack-ecmascript/src/chunk/item.rs | 9 +- crates/turbopack-ecmascript/src/code_gen.rs | 11 +- crates/turbopack-ecmascript/src/lib.rs | 24 ++-- .../src/references/async_module.rs | 68 +++++++---- .../src/references/mod.rs | 33 ++++-- .../src/tree_shake/chunk_item.rs | 26 +++-- 9 files changed, 230 insertions(+), 61 deletions(-) diff --git a/crates/turbo-tasks/src/manager.rs b/crates/turbo-tasks/src/manager.rs index ff59cf309fb23..d3a293505270d 100644 --- a/crates/turbo-tasks/src/manager.rs +++ b/crates/turbo-tasks/src/manager.rs @@ -410,7 +410,7 @@ impl TurboTasks { /// Calls a native function with arguments. Resolves arguments when needed /// with a wrapper [Task]. pub fn dynamic_call(&self, func: FunctionId, inputs: Vec) -> RawVc { - if inputs.iter().all(|i| i.is_resolved() && !i.is_nothing()) { + if inputs.iter().all(|i| i.is_resolved()) { self.native_call(func, inputs) } else { RawVc::TaskOutput(self.backend.get_or_create_persistent_task( diff --git a/crates/turbopack-core/src/chunk/availability_info.rs b/crates/turbopack-core/src/chunk/availability_info.rs index a1cb3c01f100e..ad5eb650f5caa 100644 --- a/crates/turbopack-core/src/chunk/availability_info.rs +++ b/crates/turbopack-core/src/chunk/availability_info.rs @@ -1,19 +1,84 @@ +use std::ops::{BitOr, BitOrAssign}; + use turbo_tasks::Vc; use super::available_modules::AvailableAssets; use crate::module::Module; +/// This specifies which information is needed from an AvailabilityInfo +#[turbo_tasks::value(shared)] +#[derive(Copy, Clone, Debug)] +pub struct AvailabilityInfoNeeds { + pub current_availability_root: bool, + pub available_modules: bool, +} + +impl AvailabilityInfoNeeds { + pub fn all() -> Self { + Self { + current_availability_root: true, + available_modules: true, + } + } + + pub fn none() -> Self { + Self { + current_availability_root: false, + available_modules: false, + } + } + + pub fn is_complete(self) -> bool { + let Self { + current_availability_root, + available_modules, + } = self; + current_availability_root && available_modules + } +} + +impl BitOr for AvailabilityInfoNeeds { + type Output = AvailabilityInfoNeeds; + + fn bitor(mut self, rhs: Self) -> Self::Output { + let Self { + available_modules, + current_availability_root, + } = rhs; + self.current_availability_root |= current_availability_root; + self.available_modules |= available_modules; + self + } +} + +impl BitOrAssign for AvailabilityInfoNeeds { + fn bitor_assign(&mut self, rhs: Self) { + *self = *self | rhs; + } +} + #[turbo_tasks::value(serialization = "auto_for_input")] #[derive(PartialOrd, Ord, Hash, Clone, Copy, Debug)] pub enum AvailabilityInfo { + /// Available modules are not tracked or the information is not available + /// due to specified needs. Untracked, + /// There are no modules available (or the information is not available due + /// to specified needs), but it's tracked for nested chunk groups + /// and the current chunk group root is defined. Root { current_availability_root: Vc>, }, - Inner { + /// There are modules already available and the current chunk group root is + /// defined. + Complete { available_modules: Vc, current_availability_root: Vc>, }, + /// Only partial information is available, about the available modules. + OnlyAvailableModules { + available_modules: Vc, + }, } impl AvailabilityInfo { @@ -23,10 +88,11 @@ impl AvailabilityInfo { Self::Root { current_availability_root, } => Some(*current_availability_root), - Self::Inner { + Self::Complete { current_availability_root, .. } => Some(*current_availability_root), + Self::OnlyAvailableModules { .. } => None, } } @@ -34,9 +100,47 @@ impl AvailabilityInfo { match self { Self::Untracked => None, Self::Root { .. } => None, - Self::Inner { + Self::Complete { available_modules, .. } => Some(*available_modules), + Self::OnlyAvailableModules { + available_modules, .. + } => Some(*available_modules), + } + } + + /// Returns AvailabilityInfo, but only with the information that is needed + pub fn reduce_to_needs(self, needs: AvailabilityInfoNeeds) -> Self { + let AvailabilityInfoNeeds { + available_modules, + current_availability_root, + } = needs; + match (current_availability_root, available_modules, self) { + (false, false, _) => Self::Untracked, + (true, true, _) => self, + ( + true, + false, + Self::Root { + current_availability_root, + } + | Self::Complete { + current_availability_root, + .. + }, + ) => Self::Root { + current_availability_root, + }, + (true, false, _) => Self::Untracked, + ( + false, + true, + Self::Complete { + available_modules, .. + } + | Self::OnlyAvailableModules { available_modules }, + ) => Self::OnlyAvailableModules { available_modules }, + (false, true, _) => Self::Untracked, } } } diff --git a/crates/turbopack-ecmascript/src/chunk/esm_scope.rs b/crates/turbopack-ecmascript/src/chunk/esm_scope.rs index e5e3d36d5bd38..c79edd75ef5b7 100644 --- a/crates/turbopack-ecmascript/src/chunk/esm_scope.rs +++ b/crates/turbopack-ecmascript/src/chunk/esm_scope.rs @@ -2,9 +2,9 @@ use std::collections::HashMap; use anyhow::{Context, Result}; use petgraph::{algo::tarjan_scc, prelude::DiGraphMap}; -use turbo_tasks::{TryFlatJoinIterExt, Value, Vc}; +use turbo_tasks::{TryFlatJoinIterExt, Vc}; use turbopack_core::{ - chunk::{availability_info::AvailabilityInfo, available_modules::chunkable_modules_set}, + chunk::available_modules::chunkable_modules_set, module::{Module, ModulesSet}, }; @@ -38,8 +38,8 @@ pub(crate) struct EsmScopeSccs(Vec>); impl EsmScope { /// Create a new [EsmScope] from the availability root given. #[turbo_tasks::function] - pub(crate) async fn new(availability_info: Value) -> Result> { - let assets = if let Some(root) = availability_info.current_availability_root() { + pub(crate) async fn new(availability_root: Option>>) -> Result> { + let assets = if let Some(root) = availability_root { chunkable_modules_set(root) } else { ModulesSet::empty() diff --git a/crates/turbopack-ecmascript/src/chunk/item.rs b/crates/turbopack-ecmascript/src/chunk/item.rs index 753deefd8f0b1..16a66d8213981 100644 --- a/crates/turbopack-ecmascript/src/chunk/item.rs +++ b/crates/turbopack-ecmascript/src/chunk/item.rs @@ -271,17 +271,20 @@ impl FromChunkableModule for Box { AvailabilityInfo::Untracked => AvailabilityInfo::Untracked, AvailabilityInfo::Root { current_availability_root, - } => AvailabilityInfo::Inner { + } => AvailabilityInfo::Complete { available_modules: AvailableAssets::new(vec![current_availability_root]), current_availability_root: Vc::upcast(module), }, - AvailabilityInfo::Inner { + AvailabilityInfo::Complete { available_modules, current_availability_root, - } => AvailabilityInfo::Inner { + } => AvailabilityInfo::Complete { available_modules: available_modules.with_roots(vec![current_availability_root]), current_availability_root: Vc::upcast(module), }, + AvailabilityInfo::OnlyAvailableModules { .. } => { + panic!("OnlyAvailableModules should not appear here") + } }; let manifest_asset = diff --git a/crates/turbopack-ecmascript/src/code_gen.rs b/crates/turbopack-ecmascript/src/code_gen.rs index d2d812354c6d7..edfe02eb1a7aa 100644 --- a/crates/turbopack-ecmascript/src/code_gen.rs +++ b/crates/turbopack-ecmascript/src/code_gen.rs @@ -1,7 +1,7 @@ use serde::{Deserialize, Serialize}; use swc_core::ecma::visit::{AstParentKind, VisitMut}; use turbo_tasks::{debug::ValueDebugFormat, trace::TraceRawVcs, Value, Vc}; -use turbopack_core::chunk::availability_info::AvailabilityInfo; +use turbopack_core::chunk::availability_info::{AvailabilityInfo, AvailabilityInfoNeeds}; use crate::chunk::EcmascriptChunkingContext; @@ -34,6 +34,15 @@ pub trait CodeGenerateable { #[turbo_tasks::value_trait] pub trait CodeGenerateableWithAvailabilityInfo { + /// Returns the pieces of AvailabilityInfo that are needed for + /// `code_generation`. + fn get_availability_info_needs( + self: Vc, + _is_async_module: bool, + ) -> Vc { + AvailabilityInfoNeeds::all().cell() + } + fn code_generation( self: Vc, chunking_context: Vc>, diff --git a/crates/turbopack-ecmascript/src/lib.rs b/crates/turbopack-ecmascript/src/lib.rs index 048c6017f803f..1b8e2a77dee7e 100644 --- a/crates/turbopack-ecmascript/src/lib.rs +++ b/crates/turbopack-ecmascript/src/lib.rs @@ -353,11 +353,6 @@ impl EcmascriptModuleAsset { availability_info: Value, ) -> Result> { let this = self.await?; - let availability_info = if *self.analyze().needs_availability_info().await? { - availability_info - } else { - Value::new(AvailabilityInfo::Untracked) - }; let parsed = parse(this.source, Value::new(this.ty), this.transforms); @@ -515,13 +510,24 @@ impl EcmascriptChunkItem for ModuleChunkItem { availability_info: Value, ) -> Result> { let this = self.await?; - let content = this - .module - .module_content(this.chunking_context, availability_info); let async_module_options = this .module .get_async_module() - .module_options(availability_info); + .module_options(availability_info.current_availability_root()); + let is_async_module = async_module_options.await?.is_some(); + let availability_info_needs = *this + .module + .analyze() + .get_availability_info_needs(is_async_module) + .await?; + // We reduce the availability info to the needs of the chunk item to improve + // caching of the methods that are called with availability info. e. g. + // module_content() can be cached for different availability info when it + // doesn't really need that info. + let availability_info = availability_info.reduce_to_needs(availability_info_needs); + let content = this + .module + .module_content(this.chunking_context, Value::new(availability_info)); Ok(EcmascriptChunkItemContent::new( content, diff --git a/crates/turbopack-ecmascript/src/references/async_module.rs b/crates/turbopack-ecmascript/src/references/async_module.rs index cda5cb48d14ed..f5d3be82d12ca 100644 --- a/crates/turbopack-ecmascript/src/references/async_module.rs +++ b/crates/turbopack-ecmascript/src/references/async_module.rs @@ -7,7 +7,10 @@ use swc_core::{ quote, }; use turbo_tasks::{trace::TraceRawVcs, TryFlatJoinIterExt, Value, Vc}; -use turbopack_core::chunk::availability_info::AvailabilityInfo; +use turbopack_core::{ + chunk::availability_info::{AvailabilityInfo, AvailabilityInfoNeeds}, + module::Module, +}; use super::esm::base::ReferencedAsset; use crate::{ @@ -67,20 +70,20 @@ impl OptionAsyncModule { #[turbo_tasks::function] pub async fn is_async( self: Vc, - availability_info: Value, + availability_root: Option>>, ) -> Result> { Ok(Vc::cell( - self.module_options(availability_info).await?.is_some(), + self.module_options(availability_root).await?.is_some(), )) } #[turbo_tasks::function] pub async fn module_options( self: Vc, - availability_info: Value, + availability_root: Option>>, ) -> Result> { if let Some(async_module) = &*self.await? { - return Ok(async_module.module_options(availability_info)); + return Ok(async_module.module_options(availability_root)); } Ok(OptionAsyncModuleOptions::none()) @@ -124,7 +127,14 @@ impl AsyncModuleScc { for scc in &*this.scope.get_scc_children(this.scc).await? { // Because we generated SCCs there can be no loops in the children, so calling // recursively is fine. - if *AsyncModuleScc::new(*scc, this.scope).is_async().await? { + // AsyncModuleScc::new is resolved here to avoid unnecessary resolve tasks for + // is_async in this hot code path. + if *AsyncModuleScc::new(*scc, this.scope) + .resolve() + .await? + .is_async() + .await? + { return Ok(Vc::cell(true)); } } @@ -141,7 +151,7 @@ impl AsyncModule { #[turbo_tasks::function] async fn get_async_idents( self: Vc, - availability_info: Value, + availability_root: Option>>, ) -> Result> { let this = self.await?; @@ -158,7 +168,7 @@ impl AsyncModule { ReferencedAsset::Some(placeable) => { if *placeable .get_async_module() - .is_async(availability_info) + .is_async(availability_root) .await? { referenced_asset.get_ident().await? @@ -183,11 +193,11 @@ impl AsyncModule { #[turbo_tasks::function] async fn get_scc( self: Vc, - availability_info: Value, + availability_root: Option>>, ) -> Result> { let this = self.await?; - let scope = EsmScope::new(availability_info); + let scope = EsmScope::new(availability_root); let Some(scc) = &*scope.get_scc(this.placeable).await? else { // I'm not sure if this should be possible. return Ok(Vc::cell(None)); @@ -195,7 +205,7 @@ impl AsyncModule { let scc = AsyncModuleScc::new(*scc, scope); - Ok(Vc::cell(Some(scc))) + Ok(Vc::cell(Some(scc.resolve().await?))) } /// Check if the current module or any of it's ESM children contain a top @@ -203,10 +213,10 @@ impl AsyncModule { #[turbo_tasks::function] pub async fn is_async( self: Vc, - availability_info: Value, + availability_root: Option>>, ) -> Result> { Ok( - if let Some(scc) = &*self.get_scc(availability_info).await? { + if let Some(scc) = &*self.get_scc(availability_root).await? { scc.is_async() } else { self.is_self_async() @@ -218,9 +228,9 @@ impl AsyncModule { #[turbo_tasks::function] pub async fn module_options( self: Vc, - availability_info: Value, + availability_root: Option>>, ) -> Result> { - if !*self.is_async(availability_info).await? { + if !*self.is_async(availability_root).await? { return Ok(Vc::cell(None)); } @@ -240,16 +250,34 @@ impl CodeGenerateableWithAvailabilityInfo for AsyncModule { ) -> Result> { let mut visitors = Vec::new(); - let async_idents = self.get_async_idents(availability_info).await?; + let availability_info = availability_info.into_value(); - if !async_idents.is_empty() { - visitors.push(create_visitor!(visit_mut_program(program: &mut Program) { - add_async_dependency_handler(program, &async_idents); - })); + if !matches!(availability_info, AvailabilityInfo::Untracked) { + let async_idents = self + .get_async_idents(availability_info.current_availability_root()) + .await?; + + if !async_idents.is_empty() { + visitors.push(create_visitor!(visit_mut_program(program: &mut Program) { + add_async_dependency_handler(program, &async_idents); + })); + } } Ok(CodeGeneration { visitors }.into()) } + + #[turbo_tasks::function] + async fn get_availability_info_needs( + self: Vc, + is_async_module: bool, + ) -> Result> { + let mut needs = AvailabilityInfoNeeds::none(); + if is_async_module { + needs.current_availability_root = true; + } + Ok(needs.cell()) + } } fn add_async_dependency_handler(program: &mut Program, idents: &IndexSet) { diff --git a/crates/turbopack-ecmascript/src/references/mod.rs b/crates/turbopack-ecmascript/src/references/mod.rs index 6300e0a58bdda..d919380fdb547 100644 --- a/crates/turbopack-ecmascript/src/references/mod.rs +++ b/crates/turbopack-ecmascript/src/references/mod.rs @@ -48,6 +48,7 @@ use swc_core::{ use turbo_tasks::{TryJoinIterExt, Upcast, Value, Vc}; use turbo_tasks_fs::{FileJsonContent, FileSystemPath}; use turbopack_core::{ + chunk::availability_info::AvailabilityInfoNeeds, compile_time_info::{CompileTimeInfo, FreeVarReference}, error::PrettyPrintError, issue::{analyze::AnalyzeIssue, IssueExt, IssueSeverity, IssueSource, OptionIssueSource}, @@ -139,27 +140,43 @@ pub struct AnalyzeEcmascriptModuleResult { #[turbo_tasks::value_impl] impl AnalyzeEcmascriptModuleResult { + /// Returns the pieces of AvailabilityInfo that are required for code + /// generation. #[turbo_tasks::function] - pub async fn needs_availability_info(self: Vc) -> Result> { + pub async fn get_availability_info_needs( + self: Vc, + is_async_module: bool, + ) -> Result> { let AnalyzeEcmascriptModuleResult { references, code_generation, .. } = &*self.await?; + let mut needs = AvailabilityInfoNeeds::none(); for c in code_generation.await?.iter() { - if matches!(c, CodeGen::CodeGenerateableWithAvailabilityInfo(..)) { - return Ok(Vc::cell(true)); + if let CodeGen::CodeGenerateableWithAvailabilityInfo(code_gen) = c { + needs |= *code_gen + .get_availability_info_needs(is_async_module) + .await?; + if needs.is_complete() { + return Ok(needs.cell()); + } } } for r in references.await?.iter() { - if Vc::try_resolve_sidecast::>(*r) - .await? - .is_some() + if let Some(code_gen) = + Vc::try_resolve_sidecast::>(*r) + .await? { - return Ok(Vc::cell(true)); + needs |= *code_gen + .get_availability_info_needs(is_async_module) + .await?; + if needs.is_complete() { + return Ok(needs.cell()); + } } } - Ok(Vc::cell(false)) + Ok(needs.cell()) } } diff --git a/crates/turbopack-ecmascript/src/tree_shake/chunk_item.rs b/crates/turbopack-ecmascript/src/tree_shake/chunk_item.rs index 3d0b9c60854a2..02a634f7a9155 100644 --- a/crates/turbopack-ecmascript/src/tree_shake/chunk_item.rs +++ b/crates/turbopack-ecmascript/src/tree_shake/chunk_item.rs @@ -39,13 +39,20 @@ impl EcmascriptChunkItem for EcmascriptModulePartChunkItem { availability_info: Value, ) -> Result> { let this = self.await?; - let availability_info = if *this.module.analyze().needs_availability_info().await? { - availability_info - } else { - Value::new(AvailabilityInfo::Untracked) - }; - let module = this.module.await?; + let async_module_options = module + .full_module + .get_async_module() + .module_options(availability_info.current_availability_root()); + let is_async_module = async_module_options.await?.is_some(); + + let availability_info_needs = *this + .module + .analyze() + .get_availability_info_needs(is_async_module) + .await?; + let availability_info = availability_info.reduce_to_needs(availability_info_needs); + let split_data = split_module(module.full_module); let parsed = part_of_module(split_data, module.part); @@ -54,14 +61,9 @@ impl EcmascriptChunkItem for EcmascriptModulePartChunkItem { module.full_module.ident(), this.chunking_context, this.module.analyze(), - availability_info, + Value::new(availability_info), ); - let async_module_options = module - .full_module - .get_async_module() - .module_options(availability_info); - Ok(EcmascriptChunkItemContent::new( content, this.chunking_context,