Skip to content

Commit

Permalink
Auto merge of #6880 - alexcrichton:cache, r=Eh2406
Browse files Browse the repository at this point in the history
Parse less JSON on null builds

This commit fixes a performance pathology in Cargo today. Whenever Cargo
generates a lock file (which happens on all invocations of `cargo build`
for example) Cargo will parse the crates.io index to learn about
dependencies. Currently, however, when it parses a crate it parses the
JSON blob for every single version of the crate. With a lock file,
however, or with incremental builds only one of these lines of JSON is
relevant. Measured today Cargo building Cargo parses 3700 JSON
dependencies in the registry.

This commit implements an optimization that brings down the number of
parsed JSON lines in the registry to precisely the right number
necessary to build a project. For example Cargo has 150 crates in its
lock file, so now it only parses 150 JSON lines (a 20x reduction from
3700). This in turn can greatly improve Cargo's null build time. Cargo
building Cargo dropped from 120ms to 60ms on a Linux machine and 400ms
to 200ms on a Mac.

The commit internally has a lot more details about how this is done but
the general idea is to have a cache which is optimized for Cargo to read
which is maintained automatically by Cargo.

Closes #6866
  • Loading branch information
bors committed May 3, 2019
2 parents 254840c + e33881b commit 22e2f23
Show file tree
Hide file tree
Showing 20 changed files with 751 additions and 276 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ lazycell = "1.2.0"
libc = "0.2"
log = "0.4.6"
libgit2-sys = "0.7.9"
memchr = "2.1.3"
num_cpus = "1.0"
opener = "0.4"
rustfix = "0.4.4"
Expand Down
3 changes: 1 addition & 2 deletions src/cargo/core/compiler/layout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,9 @@ impl Layout {
pub fn prepare(&mut self) -> io::Result<()> {
if fs::metadata(&self.root).is_err() {
fs::create_dir_all(&self.root)?;
self.exclude_from_backups(&self.root);
}

self.exclude_from_backups(&self.root);

mkdir(&self.deps)?;
mkdir(&self.native)?;
mkdir(&self.incremental)?;
Expand Down
1 change: 1 addition & 0 deletions src/cargo/core/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pub use self::shell::{Shell, Verbosity};
pub use self::source::{GitReference, Source, SourceId, SourceMap};
pub use self::summary::{FeatureMap, FeatureValue, Summary};
pub use self::workspace::{Members, Workspace, WorkspaceConfig, WorkspaceRootConfig};
pub use self::interning::InternedString;

pub mod compiler;
pub mod dependency;
Expand Down
5 changes: 5 additions & 0 deletions src/cargo/core/package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use crate::ops;
use crate::util::errors::{CargoResult, CargoResultExt, HttpNot200};
use crate::util::network::Retry;
use crate::util::{self, internal, lev_distance, Config, Progress, ProgressStyle};
use crate::util::config::PackageCacheLock;

/// Information about a package that is available somewhere in the file system.
///
Expand Down Expand Up @@ -339,6 +340,9 @@ pub struct Downloads<'a, 'cfg: 'a> {
/// trigger a timeout; reset `next_speed_check` and set this back to the
/// configured threshold.
next_speed_check_bytes_threshold: Cell<u64>,
/// Global filesystem lock to ensure only one Cargo is downloading at a
/// time.
_lock: PackageCacheLock<'cfg>,
}

struct Download<'cfg> {
Expand Down Expand Up @@ -437,6 +441,7 @@ impl<'cfg> PackageSet<'cfg> {
timeout,
next_speed_check: Cell::new(Instant::now()),
next_speed_check_bytes_threshold: Cell::new(0),
_lock: self.config.acquire_package_cache_lock()?,
})
}

Expand Down
2 changes: 1 addition & 1 deletion src/cargo/core/source/source_id.rs
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ impl SourceId {
Kind::Registry => {}
_ => return false,
}
self.inner.url.to_string() == CRATES_IO_INDEX
self.inner.url.as_str() == CRATES_IO_INDEX
}

/// Hashes `self`.
Expand Down
5 changes: 5 additions & 0 deletions src/cargo/ops/cargo_generate_lockfile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ pub fn update_lockfile(ws: &Workspace<'_>, opts: &UpdateOptions<'_>) -> CargoRes
failure::bail!("you can't update in the offline mode");
}

// Updates often require a lot of modifications to the registry, so ensure
// that we're synchronized against other Cargos.
let _lock = ws.config().acquire_package_cache_lock()?;

let previous_resolve = match ops::load_pkg_lockfile(ws)? {
Some(resolve) => resolve,
None => return generate_lockfile(ws),
Expand Down Expand Up @@ -73,6 +77,7 @@ pub fn update_lockfile(ws: &Workspace<'_>, opts: &UpdateOptions<'_>) -> CargoRes
});
}
}

registry.add_sources(sources)?;
}

Expand Down
5 changes: 5 additions & 0 deletions src/cargo/ops/cargo_install.rs
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,11 @@ fn check_yanked_install(ws: &Workspace<'_>) -> CargoResult<()> {
// wouldn't be available for `compile_ws`.
let (pkg_set, resolve) = ops::resolve_ws_with_method(ws, Method::Everything, &specs)?;
let mut sources = pkg_set.sources_mut();

// Checking the yanked status invovles taking a look at the registry and
// maybe updating files, so be sure to lock it here.
let _lock = ws.config().acquire_package_cache_lock()?;

for pkg_id in resolve.iter() {
if let Some(source) = sources.get_mut(pkg_id.source_id()) {
if source.is_yanked(pkg_id)? {
Expand Down
4 changes: 4 additions & 0 deletions src/cargo/ops/cargo_package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,10 @@ fn compare_resolve(
}

fn check_yanked(config: &Config, pkg_set: &PackageSet<'_>, resolve: &Resolve) -> CargoResult<()> {
// Checking the yanked status invovles taking a look at the registry and
// maybe updating files, so be sure to lock it here.
let _lock = config.acquire_package_cache_lock()?;

let mut sources = pkg_set.sources_mut();
for pkg_id in resolve.iter() {
if let Some(source) = sources.get_mut(pkg_id.source_id()) {
Expand Down
5 changes: 5 additions & 0 deletions src/cargo/ops/common_for_install_and_uninstall.rs
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,11 @@ pub fn select_pkg<'a, T>(
where
T: Source + 'a,
{
// This operation may involve updating some sources or making a few queries
// which may involve frobbing caches, as a result make sure we synchronize
// with other global Cargos
let _lock = config.acquire_package_cache_lock()?;

if needs_update {
source.update()?;
}
Expand Down
1 change: 1 addition & 0 deletions src/cargo/ops/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,7 @@ fn registry(
let token = token.or(token_config);
let sid = get_source_id(config, index_config.or(index), registry)?;
let api_host = {
let _lock = config.acquire_package_cache_lock()?;
let mut src = RegistrySource::remote(sid, &HashSet::new(), config);
// Only update the index if the config is not available or `force` is set.
let cfg = src.config();
Expand Down
4 changes: 4 additions & 0 deletions src/cargo/ops/resolve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ pub fn resolve_with_previous<'cfg>(
specs: &[PackageIdSpec],
register_patches: bool,
) -> CargoResult<Resolve> {
// We only want one Cargo at a time resolving a crate graph since this can
// involve a lot of frobbing of the global caches.
let _lock = ws.config().acquire_package_cache_lock()?;

// Here we place an artificial limitation that all non-registry sources
// cannot be locked at more than one revision. This means that if a Git
// repository provides more than one package, they must all be updated in
Expand Down
23 changes: 8 additions & 15 deletions src/cargo/sources/git/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,9 @@ impl<'cfg> Source for GitSource<'cfg> {
}

fn update(&mut self) -> CargoResult<()> {
let lock =
self.config
.git_path()
.open_rw(".cargo-lock-git", self.config, "the git checkouts")?;

let db_path = lock.parent().join("db").join(&self.ident);
let git_path = self.config.git_path();
let git_path = self.config.assert_package_cache_locked(&git_path);
let db_path = git_path.join("db").join(&self.ident);

if self.config.cli_unstable().offline && !db_path.exists() {
failure::bail!(
Expand Down Expand Up @@ -189,21 +186,17 @@ impl<'cfg> Source for GitSource<'cfg> {
(self.remote.db_at(&db_path)?, actual_rev.unwrap())
};

// Don’t use the full hash, in order to contribute less to reaching the path length limit
// on Windows. See <https://github.com/servo/servo/pull/14397>.
// Don’t use the full hash, in order to contribute less to reaching the
// path length limit on Windows. See
// <https://github.com/servo/servo/pull/14397>.
let short_id = db.to_short_id(&actual_rev).unwrap();

let checkout_path = lock
.parent()
let checkout_path = git_path
.join("checkouts")
.join(&self.ident)
.join(short_id.as_str());

// Copy the database to the checkout location. After this we could drop
// the lock on the database as we no longer needed it, but we leave it
// in scope so the destructors here won't tamper with too much.
// Checkout is immutable, so we don't need to protect it with a lock once
// it is created.
// Copy the database to the checkout location.
db.copy_to(actual_rev.clone(), &checkout_path, self.config)?;

let source_id = self.source_id.with_precise(Some(actual_rev.to_string()));
Expand Down
Loading

0 comments on commit 22e2f23

Please sign in to comment.