Skip to content

Commit

Permalink
src: compile code eagerly in snapshot builder
Browse files Browse the repository at this point in the history
By default V8 only compiles the top-level function and
skips code generation for inner functions - that would
only be done when those inner functions are invoked.
Since builtins are compiled as wrapped functions, most
functions that look visually top-level are not actually
included in the built-in code cache. For most of the
builtins this is not too bad because usually only a subset of
all builtin functions are needed by a particular
application and including all their code in the binary
would incur an unnecessary size overhead. But there is also
a subset of more commonly used builtins and it would be
better to include the inner functions in the built-in
code cache because they are more universally used by
most applications.

This patch changes the compilation strategy to eager compilation
(including inner functions) for the following scripts:

1. Primordials (internal/per_context/*), in all situations.
2. Bootstrap scripts (internal/bootstrap/*) and main scripts
   (internal/main/*), when being compiled for built-in code
   cache.
3. Any scripts loaded during built-in snapshot generation.

We can't compile the code eagerly during snapshot generation
and include them into the V8 snapshot itself just now because
we need to start the inspector before context deserialization
for coverage collection to work. So leave that as a TODO.

With this patch the binary size increases by about 666KB
(~0.6% increase) in return the worker startup can be 18-19% faster.

PR-URL: #51672
Reviewed-By: Yagiz Nizipli <yagiz.nizipli@sentry.io>
  • Loading branch information
joyeecheung authored and marco-ippolito committed Feb 26, 2024
1 parent 77ae03f commit 0c41210
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 15 deletions.
3 changes: 3 additions & 0 deletions src/api/environment.cc
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,9 @@ Maybe<bool> InitializePrimordials(Local<Context> context) {
// relatively cheap and all the scripts that we may want to run at
// startup are always present in it.
thread_local builtins::BuiltinLoader builtin_loader;
// Primordials can always be just eagerly compiled.
builtin_loader.SetEagerCompile();

for (const char** module = context_files; *module != nullptr; module++) {
Local<Value> arguments[] = {exports, primordials};
if (builtin_loader
Expand Down
9 changes: 9 additions & 0 deletions src/env.cc
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,15 @@ Environment::Environment(IsolateData* isolate_data,
}
}

// We are supposed to call builtin_loader_.SetEagerCompile() in
// snapshot mode here because it's beneficial to compile built-ins
// loaded in the snapshot eagerly and include the code of inner functions
// that are likely to be used by user since they are part of the core
// startup. But this requires us to start the coverage collections
// before Environment/Context creation which is not currently possible.
// TODO(joyeecheung): refactor V8ProfilerConnection classes to parse
// JSON without v8 and lift this restriction.

// We'll be creating new objects so make sure we've entered the context.
HandleScope handle_scope(isolate);

Expand Down
42 changes: 34 additions & 8 deletions src/node_builtins.cc
Original file line number Diff line number Diff line change
Expand Up @@ -286,15 +286,24 @@ MaybeLocal<Function> BuiltinLoader::LookupAndCompileInternal(
ScriptCompiler::CompileOptions options =
has_cache ? ScriptCompiler::kConsumeCodeCache
: ScriptCompiler::kNoCompileOptions;
if (should_eager_compile_) {
options = ScriptCompiler::kEagerCompile;
} else if (!to_eager_compile_.empty()) {
if (to_eager_compile_.find(id) != to_eager_compile_.end()) {
options = ScriptCompiler::kEagerCompile;
}
}
ScriptCompiler::Source script_source(
source,
origin,
has_cache ? cached_data.AsCachedData().release() : nullptr);

per_process::Debug(DebugCategory::CODE_CACHE,
"Compiling %s %s code cache\n",
id,
has_cache ? "with" : "without");
per_process::Debug(
DebugCategory::CODE_CACHE,
"Compiling %s %s code cache %s\n",
id,
has_cache ? "with" : "without",
options == ScriptCompiler::kEagerCompile ? "eagerly" : "lazily");

MaybeLocal<Function> maybe_fun =
ScriptCompiler::CompileFunction(context,
Expand Down Expand Up @@ -481,14 +490,33 @@ MaybeLocal<Value> BuiltinLoader::CompileAndCall(Local<Context> context,
return fn->Call(context, undefined, argc, argv);
}

bool BuiltinLoader::CompileAllBuiltins(Local<Context> context) {
bool BuiltinLoader::CompileAllBuiltinsAndCopyCodeCache(
Local<Context> context,
const std::vector<std::string>& eager_builtins,
std::vector<CodeCacheInfo>* out) {
std::vector<std::string_view> ids = GetBuiltinIds();
bool all_succeeded = true;
std::string v8_tools_prefix = "internal/deps/v8/tools/";
std::string primordial_prefix = "internal/per_context/";
std::string bootstrap_prefix = "internal/bootstrap/";
std::string main_prefix = "internal/main/";
to_eager_compile_ = std::unordered_set<std::string>(eager_builtins.begin(),
eager_builtins.end());

for (const auto& id : ids) {
if (id.compare(0, v8_tools_prefix.size(), v8_tools_prefix) == 0) {
// No need to generate code cache for v8 scripts.
continue;
}

// Eagerly compile primordials/boostrap/main scripts during code cache
// generation.
if (id.compare(0, primordial_prefix.size(), primordial_prefix) == 0 ||
id.compare(0, bootstrap_prefix.size(), bootstrap_prefix) == 0 ||
id.compare(0, main_prefix.size(), main_prefix) == 0) {
to_eager_compile_.emplace(id);
}

v8::TryCatch bootstrapCatch(context->GetIsolate());
auto fn = LookupAndCompile(context, id.data(), nullptr);
if (bootstrapCatch.HasCaught()) {
Expand All @@ -503,14 +531,12 @@ bool BuiltinLoader::CompileAllBuiltins(Local<Context> context) {
SaveCodeCache(id.data(), fn.ToLocalChecked());
}
}
return all_succeeded;
}

void BuiltinLoader::CopyCodeCache(std::vector<CodeCacheInfo>* out) const {
RwLock::ScopedReadLock lock(code_cache_->mutex);
for (auto const& item : code_cache_->map) {
out->push_back({item.first, item.second});
}
return all_succeeded;
}

void BuiltinLoader::RefreshCodeCache(const std::vector<CodeCacheInfo>& in) {
Expand Down
27 changes: 22 additions & 5 deletions src/node_builtins.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
#include <map>
#include <memory>
#include <optional>
#include <set>
#include <string>
#include <unordered_set>
#include <vector>
#include "node_external_reference.h"
#include "node_mutex.h"
Expand Down Expand Up @@ -112,12 +112,18 @@ class NODE_EXTERN_PRIVATE BuiltinLoader {
bool Exists(const char* id);
bool Add(const char* id, const UnionBytes& source);

bool CompileAllBuiltins(v8::Local<v8::Context> context);
bool CompileAllBuiltinsAndCopyCodeCache(
v8::Local<v8::Context> context,
const std::vector<std::string>& lazy_builtins,
std::vector<CodeCacheInfo>* out);
void RefreshCodeCache(const std::vector<CodeCacheInfo>& in);
void CopyCodeCache(std::vector<CodeCacheInfo>* out) const;

void CopySourceAndCodeCacheReferenceFrom(const BuiltinLoader* other);

std::vector<std::string_view> GetBuiltinIds() const;

void SetEagerCompile() { should_eager_compile_ = true; }

private:
// Only allow access from friends.
friend class CodeCacheBuilder;
Expand All @@ -126,8 +132,6 @@ class NODE_EXTERN_PRIVATE BuiltinLoader {
void LoadJavaScriptSource(); // Loads data into source_
UnionBytes GetConfig(); // Return data for config.gypi

std::vector<std::string_view> GetBuiltinIds() const;

struct BuiltinCategories {
std::set<std::string> can_be_required;
std::set<std::string> cannot_be_required;
Expand Down Expand Up @@ -179,6 +183,18 @@ class NODE_EXTERN_PRIVATE BuiltinLoader {

const UnionBytes config_;

// If any bulitins should be eagerly compiled i.e. with inner functions
// compiled too, either use should_eager_compile_ to compile all builtins
// eagerly, or use to_eager_compile_ to compile specific builtins eagerly.
// Currently we set should_eager_compile_ to true when compiling primordials,
// and use to_eager_compile_ to compile code cache that complements the
// snapshot, where builtins already loaded in the snapshot and a few extras
// are compiled eagerly (other less-essential built-ins are compiled lazily to
// avoid bloating the binary size). At runtime any additional compilation is
// done lazily.
bool should_eager_compile_ = false;
std::unordered_set<std::string> to_eager_compile_;

struct BuiltinCodeCache {
RwLock mutex;
BuiltinCodeCacheMap map;
Expand All @@ -188,6 +204,7 @@ class NODE_EXTERN_PRIVATE BuiltinLoader {

friend class ::PerProcessTest;
};

} // namespace builtins

} // namespace node
Expand Down
9 changes: 7 additions & 2 deletions src/node_snapshotable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1041,10 +1041,12 @@ ExitCode BuildCodeCacheFromSnapshot(SnapshotData* out,
Context::Scope context_scope(context);
builtins::BuiltinLoader builtin_loader;
// Regenerate all the code cache.
if (!builtin_loader.CompileAllBuiltins(context)) {
if (!builtin_loader.CompileAllBuiltinsAndCopyCodeCache(
context,
out->env_info.principal_realm.builtins,
&(out->code_cache))) {
return ExitCode::kGenericUserError;
}
builtin_loader.CopyCodeCache(&(out->code_cache));
if (per_process::enabled_debug_list.enabled(DebugCategory::MKSNAPSHOT)) {
for (const auto& item : out->code_cache) {
std::string size_str = FormatSize(item.data.length);
Expand All @@ -1070,6 +1072,9 @@ ExitCode SnapshotBuilder::Generate(
}

if (!WithoutCodeCache(snapshot_config)) {
per_process::Debug(
DebugCategory::CODE_CACHE,
"---\nGenerate code cache to complement snapshot\n---\n");
// Deserialize the snapshot to recompile code cache. We need to do this in
// the second pass because V8 requires the code cache to be compiled with a
// finalized read-only space.
Expand Down

0 comments on commit 0c41210

Please sign in to comment.