diff --git a/Cargo.toml b/Cargo.toml index a598dc37747..1fc3ae2ed99 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,6 +44,7 @@ lazycell = "1.2.0" libc = "0.2" log = "0.4.6" libgit2-sys = "0.7.9" +memchr = "2.1.3" num_cpus = "1.0" opener = "0.3.0" rustfix = "0.4.4" diff --git a/src/cargo/core/compiler/layout.rs b/src/cargo/core/compiler/layout.rs index 9ea14800a4d..cc48a41ad69 100644 --- a/src/cargo/core/compiler/layout.rs +++ b/src/cargo/core/compiler/layout.rs @@ -160,10 +160,9 @@ impl Layout { pub fn prepare(&mut self) -> io::Result<()> { if fs::metadata(&self.root).is_err() { fs::create_dir_all(&self.root)?; + self.exclude_from_backups(&self.root); } - self.exclude_from_backups(&self.root); - mkdir(&self.deps)?; mkdir(&self.native)?; mkdir(&self.incremental)?; diff --git a/src/cargo/core/mod.rs b/src/cargo/core/mod.rs index 27265541130..761eadf4525 100644 --- a/src/cargo/core/mod.rs +++ b/src/cargo/core/mod.rs @@ -14,6 +14,7 @@ pub use self::shell::{Shell, Verbosity}; pub use self::source::{GitReference, Source, SourceId, SourceMap}; pub use self::summary::{FeatureMap, FeatureValue, Summary}; pub use self::workspace::{Members, Workspace, WorkspaceConfig, WorkspaceRootConfig}; +pub use self::interning::InternedString; pub mod compiler; pub mod dependency; diff --git a/src/cargo/core/package.rs b/src/cargo/core/package.rs index d5f7d89fafc..405c71e0661 100644 --- a/src/cargo/core/package.rs +++ b/src/cargo/core/package.rs @@ -25,6 +25,7 @@ use crate::ops; use crate::util::errors::{CargoResult, CargoResultExt, HttpNot200}; use crate::util::network::Retry; use crate::util::{self, internal, lev_distance, Config, Progress, ProgressStyle}; +use crate::util::config::PackageCacheLock; /// Information about a package that is available somewhere in the file system. /// @@ -339,6 +340,9 @@ pub struct Downloads<'a, 'cfg: 'a> { /// trigger a timeout; reset `next_speed_check` and set this back to the /// configured threshold. next_speed_check_bytes_threshold: Cell, + /// Global filesystem lock to ensure only one Cargo is downloading at a + /// time. + _lock: PackageCacheLock<'cfg>, } struct Download<'cfg> { @@ -437,6 +441,7 @@ impl<'cfg> PackageSet<'cfg> { timeout, next_speed_check: Cell::new(Instant::now()), next_speed_check_bytes_threshold: Cell::new(0), + _lock: self.config.acquire_package_cache_lock()?, }) } diff --git a/src/cargo/core/source/source_id.rs b/src/cargo/core/source/source_id.rs index 16b329336c1..f0ac5f52087 100644 --- a/src/cargo/core/source/source_id.rs +++ b/src/cargo/core/source/source_id.rs @@ -337,7 +337,7 @@ impl SourceId { Kind::Registry => {} _ => return false, } - self.inner.url.to_string() == CRATES_IO_INDEX + self.inner.url.as_str() == CRATES_IO_INDEX } /// Hashes `self`. diff --git a/src/cargo/ops/cargo_generate_lockfile.rs b/src/cargo/ops/cargo_generate_lockfile.rs index d99c29e122b..88600ec11e8 100644 --- a/src/cargo/ops/cargo_generate_lockfile.rs +++ b/src/cargo/ops/cargo_generate_lockfile.rs @@ -40,6 +40,10 @@ pub fn update_lockfile(ws: &Workspace<'_>, opts: &UpdateOptions<'_>) -> CargoRes failure::bail!("you can't update in the offline mode"); } + // Updates often require a lot of modifications to the registry, so ensure + // that we're synchronized against other Cargos. + let _lock = ws.config().acquire_package_cache_lock()?; + let previous_resolve = match ops::load_pkg_lockfile(ws)? { Some(resolve) => resolve, None => return generate_lockfile(ws), @@ -73,6 +77,7 @@ pub fn update_lockfile(ws: &Workspace<'_>, opts: &UpdateOptions<'_>) -> CargoRes }); } } + registry.add_sources(sources)?; } diff --git a/src/cargo/ops/cargo_install.rs b/src/cargo/ops/cargo_install.rs index 352f3712ff8..973df18b330 100644 --- a/src/cargo/ops/cargo_install.rs +++ b/src/cargo/ops/cargo_install.rs @@ -488,6 +488,11 @@ fn check_yanked_install(ws: &Workspace<'_>) -> CargoResult<()> { // wouldn't be available for `compile_ws`. let (pkg_set, resolve) = ops::resolve_ws_with_method(ws, Method::Everything, &specs)?; let mut sources = pkg_set.sources_mut(); + + // Checking the yanked status invovles taking a look at the registry and + // maybe updating files, so be sure to lock it here. + let _lock = ws.config().acquire_package_cache_lock()?; + for pkg_id in resolve.iter() { if let Some(source) = sources.get_mut(pkg_id.source_id()) { if source.is_yanked(pkg_id)? { diff --git a/src/cargo/ops/cargo_package.rs b/src/cargo/ops/cargo_package.rs index 87ce0816a0c..8dc39a0c7e6 100644 --- a/src/cargo/ops/cargo_package.rs +++ b/src/cargo/ops/cargo_package.rs @@ -548,6 +548,10 @@ fn compare_resolve( } fn check_yanked(config: &Config, pkg_set: &PackageSet<'_>, resolve: &Resolve) -> CargoResult<()> { + // Checking the yanked status invovles taking a look at the registry and + // maybe updating files, so be sure to lock it here. + let _lock = config.acquire_package_cache_lock()?; + let mut sources = pkg_set.sources_mut(); for pkg_id in resolve.iter() { if let Some(source) = sources.get_mut(pkg_id.source_id()) { diff --git a/src/cargo/ops/common_for_install_and_uninstall.rs b/src/cargo/ops/common_for_install_and_uninstall.rs index 9eba393ec37..8502d190649 100644 --- a/src/cargo/ops/common_for_install_and_uninstall.rs +++ b/src/cargo/ops/common_for_install_and_uninstall.rs @@ -564,6 +564,11 @@ pub fn select_pkg<'a, T>( where T: Source + 'a, { + // This operation may involve updating some sources or making a few queries + // which may involve frobbing caches, as a result make sure we synchronize + // with other global Cargos + let _lock = config.acquire_package_cache_lock()?; + if needs_update { source.update()?; } diff --git a/src/cargo/ops/registry.rs b/src/cargo/ops/registry.rs index 83d97121b8e..4cab5bb644f 100644 --- a/src/cargo/ops/registry.rs +++ b/src/cargo/ops/registry.rs @@ -353,6 +353,7 @@ fn registry( let token = token.or(token_config); let sid = get_source_id(config, index_config.or(index), registry)?; let api_host = { + let _lock = config.acquire_package_cache_lock()?; let mut src = RegistrySource::remote(sid, &HashSet::new(), config); // Only update the index if the config is not available or `force` is set. let cfg = src.config(); diff --git a/src/cargo/ops/resolve.rs b/src/cargo/ops/resolve.rs index be273026caa..14da2061179 100644 --- a/src/cargo/ops/resolve.rs +++ b/src/cargo/ops/resolve.rs @@ -146,6 +146,10 @@ pub fn resolve_with_previous<'cfg>( specs: &[PackageIdSpec], register_patches: bool, ) -> CargoResult { + // We only want one Cargo at a time resolving a crate graph since this can + // involve a lot of frobbing of the global caches. + let _lock = ws.config().acquire_package_cache_lock()?; + // Here we place an artificial limitation that all non-registry sources // cannot be locked at more than one revision. This means that if a Git // repository provides more than one package, they must all be updated in diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index da96f6d0ce7..553f0581cf7 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -154,12 +154,9 @@ impl<'cfg> Source for GitSource<'cfg> { } fn update(&mut self) -> CargoResult<()> { - let lock = - self.config - .git_path() - .open_rw(".cargo-lock-git", self.config, "the git checkouts")?; - - let db_path = lock.parent().join("db").join(&self.ident); + let git_path = self.config.git_path(); + let git_path = self.config.assert_package_cache_locked(&git_path); + let db_path = git_path.join("db").join(&self.ident); if self.config.cli_unstable().offline && !db_path.exists() { failure::bail!( @@ -189,21 +186,17 @@ impl<'cfg> Source for GitSource<'cfg> { (self.remote.db_at(&db_path)?, actual_rev.unwrap()) }; - // Don’t use the full hash, in order to contribute less to reaching the path length limit - // on Windows. See . + // Don’t use the full hash, in order to contribute less to reaching the + // path length limit on Windows. See + // . let short_id = db.to_short_id(&actual_rev).unwrap(); - let checkout_path = lock - .parent() + let checkout_path = git_path .join("checkouts") .join(&self.ident) .join(short_id.as_str()); - // Copy the database to the checkout location. After this we could drop - // the lock on the database as we no longer needed it, but we leave it - // in scope so the destructors here won't tamper with too much. - // Checkout is immutable, so we don't need to protect it with a lock once - // it is created. + // Copy the database to the checkout location. db.copy_to(actual_rev.clone(), &checkout_path, self.config)?; let source_id = self.source_id.with_precise(Some(actual_rev.to_string())); diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index 9d47091b29a..f30ac76ea2a 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -1,14 +1,82 @@ +//! Management of the index of a registry source +//! +//! This module contains management of the index and various operations, such as +//! actually parsing the index, looking for crates, etc. This is intended to be +//! abstract over remote indices (downloaded via git) and local registry indices +//! (which are all just present on the filesystem). +//! +//! ## Index Performance +//! +//! One important aspect of the index is that we want to optimize the "happy +//! path" as much as possible. Whenever you type `cargo build` Cargo will +//! *always* reparse the registry and learn about dependency information. This +//! is done because Cargo needs to learn about the upstream crates.io crates +//! that you're using and ensure that the preexisting `Cargo.lock` still matches +//! the current state of the world. +//! +//! Consequently, Cargo "null builds" (the index that Cargo adds to each build +//! itself) need to be fast when accessing the index. The primary performance +//! optimization here is to avoid parsing JSON blobs from the registry if we +//! don't need them. Most secondary optimizations are centered around removing +//! allocations and such, but avoiding parsing JSON is the #1 optimization. +//! +//! When we get queries from the resolver we're given a `Dependency`. This +//! dependency in turn has a version requirement, and with lock files that +//! already exist these version requirements are exact version requirements +//! `=a.b.c`. This means that we in theory only need to parse one line of JSON +//! per query in the registry, the one that matches version `a.b.c`. +//! +//! The crates.io index, however, is not amenable to this form of query. Instead +//! the crates.io index simply is a file where each line is a JSON blob. To +//! learn about the versions in each JSON blob we would need to parse the JSON, +//! defeating the purpose of trying to parse as little as possible. +//! +//! > Note that as a small aside even *loading* the JSON from the registry is +//! > actually pretty slow. For crates.io and remote registries we don't +//! > actually check out the git index on disk because that takes quite some +//! > time and is quite large. Instead we use `libgit2` to read the JSON from +//! > the raw git objects. This in turn can be slow (aka show up high in +//! > profiles) because libgit2 has to do deflate decompression and such. +//! +//! To solve all these issues a strategy is employed here where Cargo basically +//! creates an index into the index. The first time a package is queried about +//! (first time being for an entire computer) Cargo will load the contents +//! (slowly via libgit2) from the registry. It will then (slowly) parse every +//! single line to learn about its versions. Afterwards, however, Cargo will +//! emit a new file (a cache) which is amenable for speedily parsing in future +//! invocations. +//! +//! This cache file is currently organized by basically having the semver +//! version extracted from each JSON blob. That way Cargo can quickly and easily +//! parse all versions contained and which JSON blob they're associated with. +//! The JSON blob then doesn't actually need to get parsed unless the version is +//! parsed. +//! +//! Altogether the initial measurements of this shows a massive improvement for +//! Cargo null build performance. It's expected that the improvements earned +//! here will continue to grow over time in the sense that the previous +//! implementation (parse all lines each time) actually continues to slow down +//! over time as new versions of a crate are published. In any case when first +//! implemented a null build of Cargo itself would parse 3700 JSON blobs from +//! the registry and load 150 blobs from git. Afterwards it parses 150 JSON +//! blobs and loads 0 files git. Removing 200ms or more from Cargo's startup +//! time is certainly nothing to sneeze at! +//! +//! Note that this is just a high-level overview, there's of course lots of +//! details like invalidating caches and whatnot which are handled below, but +//! hopefully those are more obvious inline in the code itself. + use std::collections::{HashMap, HashSet}; +use std::fs; use std::path::Path; use std::str; -use log::{info, trace}; -use semver::Version; +use log::info; +use semver::{Version, VersionReq}; use crate::core::dependency::Dependency; -use crate::core::{PackageId, SourceId, Summary}; -use crate::sources::registry::RegistryData; -use crate::sources::registry::{RegistryPackage, INDEX_LOCK}; +use crate::core::{InternedString, PackageId, SourceId, Summary}; +use crate::sources::registry::{RegistryData, RegistryPackage}; use crate::util::{internal, CargoResult, Config, Filesystem, ToSemver}; /// Crates.io treats hyphen and underscores as interchangeable, but the index and old Cargo do not. @@ -99,11 +167,65 @@ fn overflow_hyphen() { pub struct RegistryIndex<'cfg> { source_id: SourceId, path: Filesystem, - cache: HashMap<&'static str, Vec<(Summary, bool)>>, - // `(name, vers)` -> `checksum` - hashes: HashMap<&'static str, HashMap>, + summaries_cache: HashMap, config: &'cfg Config, - locked: bool, +} + +/// An internal cache of summaries for a particular package. +/// +/// A list of summaries are loaded from disk via one of two methods: +/// +/// 1. Primarily Cargo will parse the corresponding file for a crate in the +/// upstream crates.io registry. That's just a JSON blob per line which we +/// can parse, extract the version, and then store here. +/// +/// 2. Alternatively, if Cargo has previously run, we'll have a cached index of +/// dependencies for the upstream index. This is a file that Cargo maintains +/// lazily on the local filesystem and is much faster to parse since it +/// doesn't involve parsing all of the JSON. +/// +/// The outward-facing interface of this doesn't matter too much where it's +/// loaded from, but it's important when reading the implementation to note that +/// we try to parse as little as possible! +#[derive(Default)] +struct Summaries { + /// A raw vector of uninterpreted bytes. This is what `Unparsed` start/end + /// fields are indexes into. If a `Summaries` is loaded from the crates.io + /// index then this field will be empty since nothing is `Unparsed`. + raw_data: Vec, + + /// All known versions of a crate, keyed from their `Version` to the + /// possibly parsed or unparsed version of the full summary. + versions: HashMap, +} + +/// A lazily parsed `IndexSummary`. +enum MaybeIndexSummary { + /// A summary which has not been parsed, The `start` and `end` are pointers + /// into `Summaries::raw_data` which this is an entry of. + Unparsed { start: usize, end: usize }, + + /// An actually parsed summary. + Parsed(IndexSummary), +} + +/// A parsed representation of a summary from the index. +/// +/// In addition to a full `Summary` we have a few auxiliary pieces of +/// information liked `yanked` and what the checksum hash is. +pub struct IndexSummary { + pub summary: Summary, + pub yanked: bool, + pub hash: String, +} + +/// A representation of the cache on disk that Cargo maintains of summaries. +/// Cargo will initially parse all summaries in the registry and will then +/// serialize that into this form and place it in a new location on disk, +/// ensuring that access in the future is much speedier. +#[derive(Default)] +struct SummariesCache<'a> { + versions: Vec<(Version, &'a [u8])>, } impl<'cfg> RegistryIndex<'cfg> { @@ -111,155 +233,127 @@ impl<'cfg> RegistryIndex<'cfg> { source_id: SourceId, path: &Filesystem, config: &'cfg Config, - locked: bool, ) -> RegistryIndex<'cfg> { RegistryIndex { source_id, path: path.clone(), - cache: HashMap::new(), - hashes: HashMap::new(), + summaries_cache: HashMap::new(), config, - locked, } } /// Returns the hash listed for a specified `PackageId`. pub fn hash(&mut self, pkg: PackageId, load: &mut dyn RegistryData) -> CargoResult { - let name = pkg.name().as_str(); - let version = pkg.version(); - if let Some(s) = self.hashes.get(name).and_then(|v| v.get(version)) { - return Ok(s.clone()); - } - // Ok, we're missing the key, so parse the index file to load it. - self.summaries(name, load)?; - self.hashes - .get(name) - .and_then(|v| v.get(version)) - .ok_or_else(|| internal(format!("no hash listed for {}", pkg))) - .map(|s| s.clone()) + let req = VersionReq::exact(pkg.version()); + let summary = self + .summaries(pkg.name(), &req, load)? + .next() + .ok_or_else(|| internal(format!("no hash listed for {}", pkg)))?; + Ok(summary.hash.clone()) } - /// Parses the on-disk metadata for the package provided. + /// Load a list of summaries for `name` package in this registry which + /// match `req` /// - /// Returns a list of pairs of `(summary, yanked)` for the package name specified. - pub fn summaries( - &mut self, - name: &'static str, + /// This function will semantically parse the on-disk index, match all + /// versions, and then return an iterator over all summaries which matched. + /// Internally there's quite a few layer of caching to amortize this cost + /// though since this method is called quite a lot on null builds in Cargo. + pub fn summaries<'a, 'b>( + &'a mut self, + name: InternedString, + req: &'b VersionReq, load: &mut dyn RegistryData, - ) -> CargoResult<&Vec<(Summary, bool)>> { - if self.cache.contains_key(name) { - return Ok(&self.cache[name]); - } + ) -> CargoResult + 'b> + where + 'a: 'b, + { + let source_id = self.source_id.clone(); + + // First up actually parse what summaries we have available. If Cargo + // has run previously this will parse a Cargo-specific cache file rather + // than the registry itself. In effect this is intended to be a quite + // cheap operation. let summaries = self.load_summaries(name, load)?; - self.cache.insert(name, summaries); - Ok(&self.cache[name]) + + // Iterate over our summaries, extract all relevant ones which match our + // version requirement, and then parse all corresponding rows in the + // registry. As a reminder this `summaries` method is called for each + // entry in a lock file on every build, so we want to absolutely + // minimize the amount of work being done here and parse as little as + // necessary. + let raw_data = &summaries.raw_data; + Ok(summaries + .versions + .iter_mut() + .filter_map(move |(k, v)| if req.matches(k) { Some(v) } else { None }) + .filter_map(move |maybe| match maybe.parse(raw_data, source_id) { + Ok(summary) => Some(summary), + Err(e) => { + info!("failed to parse `{}` registry package: {}", name, e); + None + } + })) } fn load_summaries( &mut self, - name: &str, + name: InternedString, load: &mut dyn RegistryData, - ) -> CargoResult> { + ) -> CargoResult<&mut Summaries> { + // If we've previously loaded what versions are present for `name`, just + // return that since our cache should still be valid. + if self.summaries_cache.contains_key(&name) { + return Ok(self.summaries_cache.get_mut(&name).unwrap()); + } + // Prepare the `RegistryData` which will lazily initialize internal data - // structures. Note that this is also importantly needed to initialize - // to avoid deadlocks where we acquire a lock below but the `load` - // function inside *also* wants to acquire a lock. See an instance of - // this on #5551. + // structures. load.prepare()?; - let (root, _lock) = if self.locked { - let lock = self - .path - .open_ro(Path::new(INDEX_LOCK), self.config, "the registry index"); - match lock { - Ok(lock) => (lock.path().parent().unwrap().to_path_buf(), Some(lock)), - Err(_) => return Ok(Vec::new()), - } - } else { - (self.path.clone().into_path_unlocked(), None) - }; + // let root = self.config.assert_package_cache_locked(&self.path); + let root = load.assert_index_locked(&self.path); + let cache_root = root.join(".cache"); + let index_version = load.current_version(); + + // See module comment in `registry/mod.rs` for why this is structured + // the way it is. let fs_name = name .chars() .flat_map(|c| c.to_lowercase()) .collect::(); - - // See module comment for why this is structured the way it is. let raw_path = match fs_name.len() { 1 => format!("1/{}", fs_name), 2 => format!("2/{}", fs_name), 3 => format!("3/{}/{}", &fs_name[..1], fs_name), _ => format!("{}/{}/{}", &fs_name[0..2], &fs_name[2..4], fs_name), }; - let mut ret = Vec::new(); - for path in UncanonicalizedIter::new(&raw_path).take(1024) { - let mut hit_closure = false; - let err = load.load(&root, Path::new(&path), &mut |contents| { - hit_closure = true; - let contents = str::from_utf8(contents) - .map_err(|_| failure::format_err!("registry index file was not valid utf-8"))?; - ret.reserve(contents.lines().count()); - let lines = contents.lines().map(|s| s.trim()).filter(|l| !l.is_empty()); - // Attempt forwards-compatibility on the index by ignoring - // everything that we ourselves don't understand, that should - // allow future cargo implementations to break the - // interpretation of each line here and older cargo will simply - // ignore the new lines. - ret.extend(lines.filter_map(|line| { - let (summary, locked) = match self.parse_registry_package(line) { - Ok(p) => p, - Err(e) => { - info!("failed to parse `{}` registry package: {}", name, e); - trace!("line: {}", line); - return None; - } - }; - Some((summary, locked)) - })); - - Ok(()) - }); - - // We ignore lookup failures as those are just crates which don't exist - // or we haven't updated the registry yet. If we actually ran the - // closure though then we care about those errors. - if hit_closure { - err?; - // Crates.io ensures that there is only one hyphen and underscore equivalent - // result in the index so return when we find it. - return Ok(ret); + // Attempt to handle misspellings by searching for a chain of related + // names to the original `raw_path` name. Only return summaries + // associated with the first hit, however. The resolver will later + // reject any candidates that have the wrong name, and with this it'll + // along the way produce helpful "did you mean?" suggestions. + for path in UncanonicalizedIter::new(&raw_path).take(1024) { + let summaries = Summaries::parse( + index_version.as_ref().map(|s| &**s), + &root, + &cache_root, + path.as_ref(), + self.source_id, + load, + self.config, + )?; + if let Some(summaries) = summaries { + self.summaries_cache.insert(name, summaries); + return Ok(self.summaries_cache.get_mut(&name).unwrap()); } } - Ok(ret) - } - - /// Parses a line from the registry's index file into a `Summary` for a package. - /// - /// The returned boolean is whether or not the summary has been yanked. - fn parse_registry_package(&mut self, line: &str) -> CargoResult<(Summary, bool)> { - let RegistryPackage { - name, - vers, - cksum, - deps, - features, - yanked, - links, - } = serde_json::from_str(line)?; - let pkgid = PackageId::new(&name, &vers, self.source_id)?; - let name = pkgid.name(); - let deps = deps - .into_iter() - .map(|dep| dep.into_dep(self.source_id)) - .collect::>>()?; - let mut summary = Summary::new(pkgid, deps, &features, links, false)?; - summary.set_checksum(cksum.clone()); - self.hashes - .entry(name.as_str()) - .or_insert_with(HashMap::new) - .insert(vers, cksum); - Ok((summary, yanked.unwrap_or(false))) + // If nothing was found then this crate doesn't exists, so just use an + // empty `Summaries` list. + self.summaries_cache.insert(name, Summaries::default()); + Ok(self.summaries_cache.get_mut(&name).unwrap()) } pub fn query_inner( @@ -295,31 +389,31 @@ impl<'cfg> RegistryIndex<'cfg> { online: bool, ) -> CargoResult { let source_id = self.source_id; - let name = dep.package_name().as_str(); - let summaries = self.summaries(name, load)?; - let summaries = summaries - .iter() - .filter(|&(summary, yanked)| { - // Note: This particular logic can cause problems with - // optional dependencies when offline. If at least 1 version - // of an optional dependency is downloaded, but that version - // does not satisfy the requirements, then resolution will - // fail. Unfortunately, whether or not something is optional - // is not known here. - (online || load.is_crate_downloaded(summary.package_id())) - && (!yanked || { - log::debug!("{:?}", yanked_whitelist); - log::debug!("{:?}", summary.package_id()); - yanked_whitelist.contains(&summary.package_id()) - }) - }) - .map(|s| s.0.clone()); + let summaries = self + .summaries(dep.package_name(), dep.version_req(), load)? + // First filter summaries for `--offline`. If we're online then + // everything is a candidate, otherwise if we're offline we're only + // going to consider candidates which are actually present on disk. + // + // Note: This particular logic can cause problems with + // optional dependencies when offline. If at least 1 version + // of an optional dependency is downloaded, but that version + // does not satisfy the requirements, then resolution will + // fail. Unfortunately, whether or not something is optional + // is not known here. + .filter(|s| (online || load.is_crate_downloaded(s.summary.package_id()))) + // Next filter out all yanked packages. Some yanked packages may + // leak throguh if they're in a whitelist (aka if they were + // previously in `Cargo.lock` + .filter(|s| !s.yanked || yanked_whitelist.contains(&s.summary.package_id())) + .map(|s| s.summary.clone()); // Handle `cargo update --precise` here. If specified, our own source // will have a precise version listed of the form // `=o->` where `` is the name of a crate on // this source, `` is the version installed and ` is the // version requested (argument to `--precise`). + let name = dep.package_name().as_str(); let summaries = summaries.filter(|s| match source_id.precise() { Some(p) if p.starts_with(name) && p[name.len()..].starts_with('=') => { let mut vers = p[name.len() + 1..].splitn(2, "->"); @@ -344,11 +438,305 @@ impl<'cfg> RegistryIndex<'cfg> { } pub fn is_yanked(&mut self, pkg: PackageId, load: &mut dyn RegistryData) -> CargoResult { - let summaries = self.summaries(pkg.name().as_str(), load)?; - let found = summaries - .iter() - .filter(|&(summary, _yanked)| summary.version() == pkg.version()) - .any(|(_summary, yanked)| *yanked); + let req = VersionReq::exact(pkg.version()); + let found = self + .summaries(pkg.name(), &req, load)? + .any(|summary| summary.yanked); Ok(found) } } + +impl Summaries { + /// Parse out a `Summaries` instances from on-disk state. + /// + /// This will attempt to prefer parsing a previous cache file that already + /// exists from a previous invocation of Cargo (aka you're typing `cargo + /// build` again after typing it previously). If parsing fails or the cache + /// isn't found, then we take a slower path which loads the full descriptor + /// for `relative` from the underlying index (aka typically libgit2 with + /// crates.io) and then parse everything in there. + /// + /// * `index_version` - a version string to describe the current state of + /// the index which for remote registries is the current git sha and + /// for local registries is not available. + /// * `root` - this is the root argument passed to `load` + /// * `cache_root` - this is the root on the filesystem itself of where to + /// store cache files. + /// * `relative` - this is the file we're loading from cache or the index + /// data + /// * `source_id` - the registry's SourceId used when parsing JSON blobs to + /// create summaries. + /// * `load` - the actual index implementation which may be very slow to + /// call. We avoid this if we can. + pub fn parse( + index_version: Option<&str>, + root: &Path, + cache_root: &Path, + relative: &Path, + source_id: SourceId, + load: &mut dyn RegistryData, + config: &Config, + ) -> CargoResult> { + // First up, attempt to load the cache. This could fail for all manner + // of reasons, but consider all of them non-fatal and just log their + // occurrence in case anyone is debugging anything. + let cache_path = cache_root.join(relative); + let mut cache_contents = None; + if let Some(index_version) = index_version { + match fs::read(&cache_path) { + Ok(contents) => match Summaries::parse_cache(contents, index_version) { + Ok(s) => { + log::debug!("fast path for registry cache of {:?}", relative); + if cfg!(debug_assertions) { + cache_contents = Some(s.raw_data); + } else { + return Ok(Some(s)) + } + } + Err(e) => { + log::debug!("failed to parse {:?} cache: {}", relative, e); + } + }, + Err(e) => log::debug!("cache missing for {:?} error: {}", relative, e), + } + } + + // This is the fallback path where we actually talk to libgit2 to load + // information. Here we parse every single line in the index (as we need + // to find the versions) + log::debug!("slow path for {:?}", relative); + let mut ret = Summaries::default(); + let mut hit_closure = false; + let mut cache_bytes = None; + let err = load.load(root, relative, &mut |contents| { + ret.raw_data = contents.to_vec(); + let mut cache = SummariesCache::default(); + hit_closure = true; + let mut start = 0; + for end in memchr::Memchr::new(b'\n', contents) { + // Attempt forwards-compatibility on the index by ignoring + // everything that we ourselves don't understand, that should + // allow future cargo implementations to break the + // interpretation of each line here and older cargo will simply + // ignore the new lines. + let line = &contents[start..end]; + let summary = match IndexSummary::parse(line, source_id) { + Ok(summary) => summary, + Err(e) => { + log::info!("failed to parse {:?} registry package: {}", relative, e); + continue; + } + }; + let version = summary.summary.package_id().version().clone(); + cache.versions.push((version.clone(), line)); + ret.versions.insert(version, summary.into()); + start = end + 1; + } + if let Some(index_version) = index_version { + cache_bytes = Some(cache.serialize(index_version)); + } + Ok(()) + }); + + // We ignore lookup failures as those are just crates which don't exist + // or we haven't updated the registry yet. If we actually ran the + // closure though then we care about those errors. + if !hit_closure { + debug_assert!(cache_contents.is_none()); + return Ok(None); + } + err?; + + // If we've got debug assertions enabled and the cache was previously + // present and considered fresh this is where the debug assertions + // actually happens to verify that our cache is indeed fresh and + // computes exactly the same value as before. + if cfg!(debug_assertions) && cache_contents.is_some() { + assert_eq!(cache_bytes, cache_contents); + } + + // Once we have our `cache_bytes` which represents the `Summaries` we're + // about to return, write that back out to disk so future Cargo + // invocations can use it. + // + // This is opportunistic so we ignore failure here but are sure to log + // something in case of error. + if let Some(cache_bytes) = cache_bytes { + if fs::create_dir_all(cache_path.parent().unwrap()).is_ok() { + let path = Filesystem::new(cache_path.clone()); + config.assert_package_cache_locked(&path); + if let Err(e) = fs::write(cache_path, cache_bytes) { + log::info!("failed to write cache: {}", e); + } + } + } + + Ok(Some(ret)) + } + + /// Parses an open `File` which represents information previously cached by + /// Cargo. + pub fn parse_cache(contents: Vec, last_index_update: &str) -> CargoResult { + let cache = SummariesCache::parse(&contents, last_index_update)?; + let mut ret = Summaries::default(); + for (version, summary) in cache.versions { + let (start, end) = subslice_bounds(&contents, summary); + ret.versions + .insert(version, MaybeIndexSummary::Unparsed { start, end }); + } + ret.raw_data = contents; + return Ok(ret); + + // Returns the start/end offsets of `inner` with `outer`. Asserts that + // `inner` is a subslice of `outer`. + fn subslice_bounds(outer: &[u8], inner: &[u8]) -> (usize, usize) { + let outer_start = outer.as_ptr() as usize; + let outer_end = outer_start + outer.len(); + let inner_start = inner.as_ptr() as usize; + let inner_end = inner_start + inner.len(); + assert!(inner_start >= outer_start); + assert!(inner_end <= outer_end); + (inner_start - outer_start, inner_end - outer_start) + } + } +} + +// Implementation of serializing/deserializing the cache of summaries on disk. +// Currently the format looks like: +// +// +--------------+-------------+---+ +// | version byte | git sha rev | 0 | +// +--------------+-------------+---+ +// +// followed by... +// +// +----------------+---+------------+---+ +// | semver version | 0 | JSON blob | 0 | ... +// +----------------+---+------------+---+ +// +// The idea is that this is a very easy file for Cargo to parse in future +// invocations. The read from disk should be quite fast and then afterwards all +// we need to know is what versions correspond to which JSON blob. +// +// The leading version byte is intended to ensure that there's some level of +// future compatibility against changes to this cache format so if different +// versions of Cargo share the same cache they don't get too confused. The git +// sha lets us know when the file needs to be regenerated (it needs regeneration +// whenever the index itself updates). + +const CURRENT_CACHE_VERSION: u8 = 1; + +impl<'a> SummariesCache<'a> { + fn parse(data: &'a [u8], last_index_update: &str) -> CargoResult> { + // NB: keep this method in sync with `serialize` below + let (first_byte, rest) = data + .split_first() + .ok_or_else(|| failure::format_err!("malformed cache"))?; + if *first_byte != CURRENT_CACHE_VERSION { + failure::bail!("looks like a different Cargo's cache, bailing out"); + } + let mut iter = memchr::Memchr::new(0, rest); + let mut start = 0; + if let Some(end) = iter.next() { + let update = &rest[start..end]; + if update != last_index_update.as_bytes() { + failure::bail!( + "cache out of date: current index ({}) != cache ({})", + last_index_update, + str::from_utf8(update)?, + ) + } + start = end + 1; + } else { + failure::bail!("malformed file"); + } + let mut ret = SummariesCache::default(); + while let Some(version_end) = iter.next() { + let version = &rest[start..version_end]; + let version = str::from_utf8(version)?; + let version = Version::parse(version)?; + let summary_end = iter.next().unwrap(); + let summary = &rest[version_end + 1..summary_end]; + ret.versions.push((version, summary)); + start = summary_end + 1; + } + Ok(ret) + } + + fn serialize(&self, index_version: &str) -> Vec { + // NB: keep this method in sync with `parse` above + let size = self + .versions + .iter() + .map(|(_version, data)| (10 + data.len())) + .sum(); + let mut contents = Vec::with_capacity(size); + contents.push(CURRENT_CACHE_VERSION); + contents.extend_from_slice(index_version.as_bytes()); + contents.push(0); + for (version, data) in self.versions.iter() { + contents.extend_from_slice(version.to_string().as_bytes()); + contents.push(0); + contents.extend_from_slice(data); + contents.push(0); + } + return contents; + } +} + +impl MaybeIndexSummary { + /// Parses this "maybe a summary" into a `Parsed` for sure variant. + /// + /// Does nothing if this is already `Parsed`, and otherwise the `raw_data` + /// passed in is sliced with the bounds in `Unparsed` and then actually + /// parsed. + fn parse(&mut self, raw_data: &[u8], source_id: SourceId) -> CargoResult<&IndexSummary> { + let (start, end) = match self { + MaybeIndexSummary::Unparsed { start, end } => (*start, *end), + MaybeIndexSummary::Parsed(summary) => return Ok(summary), + }; + let summary = IndexSummary::parse(&raw_data[start..end], source_id)?; + *self = MaybeIndexSummary::Parsed(summary); + match self { + MaybeIndexSummary::Unparsed { .. } => unreachable!(), + MaybeIndexSummary::Parsed(summary) => Ok(summary), + } + } +} + +impl From for MaybeIndexSummary { + fn from(summary: IndexSummary) -> MaybeIndexSummary { + MaybeIndexSummary::Parsed(summary) + } +} + +impl IndexSummary { + /// Parses a line from the registry's index file into an `IndexSummary` for + /// a package. + /// + /// The `line` provided is expected to be valid JSON. + fn parse(line: &[u8], source_id: SourceId) -> CargoResult { + let RegistryPackage { + name, + vers, + cksum, + deps, + features, + yanked, + links, + } = serde_json::from_slice(line)?; + log::trace!("json parsed registry {}/{}", name, vers); + let pkgid = PackageId::new(&name, &vers, source_id)?; + let deps = deps + .into_iter() + .map(|dep| dep.into_dep(source_id)) + .collect::>>()?; + let mut summary = Summary::new(pkgid, deps, &features, links, false)?; + summary.set_checksum(cksum.clone()); + Ok(IndexSummary { + summary, + yanked: yanked.unwrap_or(false), + hash: cksum, + }) + } +} diff --git a/src/cargo/sources/registry/local.rs b/src/cargo/sources/registry/local.rs index ed649c9e559..a9c16a0ea3b 100644 --- a/src/cargo/sources/registry/local.rs +++ b/src/cargo/sources/registry/local.rs @@ -1,13 +1,13 @@ -use std::io::prelude::*; -use std::io::SeekFrom; -use std::path::Path; - -use crate::core::PackageId; +use crate::core::{PackageId, InternedString}; use crate::sources::registry::{MaybeLock, RegistryConfig, RegistryData}; use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::paths; -use crate::util::{Config, FileLock, Filesystem, Sha256}; +use crate::util::{Config, Filesystem, Sha256}; use hex; +use std::fs::File; +use std::io::prelude::*; +use std::io::SeekFrom; +use std::path::Path; pub struct LocalRegistry<'cfg> { index_path: Filesystem, @@ -36,6 +36,16 @@ impl<'cfg> RegistryData for LocalRegistry<'cfg> { &self.index_path } + fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path { + // Note that the `*_unlocked` variant is used here since we're not + // modifying the index and it's required to be externally synchronized. + path.as_path_unlocked() + } + + fn current_version(&self) -> Option { + None + } + fn load( &self, root: &Path, @@ -71,7 +81,12 @@ impl<'cfg> RegistryData for LocalRegistry<'cfg> { fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult { let crate_file = format!("{}-{}.crate", pkg.name(), pkg.version()); - let mut crate_file = self.root.open_ro(&crate_file, self.config, "crate file")?; + + // Note that the usage of `into_path_unlocked` here is because the local + // crate files here never change in that we're not the one writing them, + // so it's not our responsibility to synchronize access to them. + let path = self.root.join(&crate_file).into_path_unlocked(); + let mut crate_file = File::open(&path)?; // If we've already got an unpacked version of this crate, then skip the // checksum below as it is in theory already verified. @@ -89,7 +104,7 @@ impl<'cfg> RegistryData for LocalRegistry<'cfg> { loop { let n = crate_file .read(&mut buf) - .chain_err(|| format!("failed to read `{}`", crate_file.path().display()))?; + .chain_err(|| format!("failed to read `{}`", path.display()))?; if n == 0 { break; } @@ -109,7 +124,7 @@ impl<'cfg> RegistryData for LocalRegistry<'cfg> { _pkg: PackageId, _checksum: &str, _data: &[u8], - ) -> CargoResult { + ) -> CargoResult { panic!("this source doesn't download") } } diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index fb4bf5fcf49..cffdc943cbf 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -161,25 +161,25 @@ use std::borrow::Cow; use std::collections::BTreeMap; use std::collections::HashSet; +use std::fs::{File, OpenOptions}; use std::io::Write; use std::path::{Path, PathBuf}; use flate2::read::GzDecoder; use log::debug; -use semver::Version; +use semver::{Version, VersionReq}; use serde::Deserialize; use tar::Archive; use crate::core::dependency::{Dependency, Kind}; use crate::core::source::MaybePackage; -use crate::core::{Package, PackageId, Source, SourceId, Summary}; +use crate::core::{Package, PackageId, Source, SourceId, Summary, InternedString}; use crate::sources::PathSource; use crate::util::errors::CargoResultExt; use crate::util::hex; use crate::util::to_url::ToUrl; -use crate::util::{internal, CargoResult, Config, FileLock, Filesystem}; +use crate::util::{internal, CargoResult, Config, Filesystem}; -const INDEX_LOCK: &str = ".cargo-index-lock"; const PACKAGE_SOURCE_LOCK: &str = ".cargo-ok"; pub const CRATES_IO_INDEX: &str = "https://github.com/rust-lang/crates.io-index"; pub const CRATES_IO_REGISTRY: &str = "crates-io"; @@ -194,7 +194,6 @@ pub struct RegistrySource<'cfg> { ops: Box, index: index::RegistryIndex<'cfg>, yanked_whitelist: HashSet, - index_locked: bool, } #[derive(Deserialize)] @@ -358,27 +357,25 @@ pub trait RegistryData { fn index_path(&self) -> &Filesystem; fn load( &self, - _root: &Path, + root: &Path, path: &Path, data: &mut dyn FnMut(&[u8]) -> CargoResult<()>, ) -> CargoResult<()>; fn config(&mut self) -> CargoResult>; fn update_index(&mut self) -> CargoResult<()>; fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult; - fn finish_download( - &mut self, - pkg: PackageId, - checksum: &str, - data: &[u8], - ) -> CargoResult; + fn finish_download(&mut self, pkg: PackageId, checksum: &str, data: &[u8]) + -> CargoResult; fn is_crate_downloaded(&self, _pkg: PackageId) -> bool { true } + fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path; + fn current_version(&self) -> Option; } pub enum MaybeLock { - Ready(FileLock), + Ready(File), Download { url: String, descriptor: String }, } @@ -400,14 +397,7 @@ impl<'cfg> RegistrySource<'cfg> { ) -> RegistrySource<'cfg> { let name = short_name(source_id); let ops = remote::RemoteRegistry::new(source_id, config, &name); - RegistrySource::new( - source_id, - config, - &name, - Box::new(ops), - yanked_whitelist, - true, - ) + RegistrySource::new(source_id, config, &name, Box::new(ops), yanked_whitelist) } pub fn local( @@ -418,14 +408,7 @@ impl<'cfg> RegistrySource<'cfg> { ) -> RegistrySource<'cfg> { let name = short_name(source_id); let ops = local::LocalRegistry::new(path, config, &name); - RegistrySource::new( - source_id, - config, - &name, - Box::new(ops), - yanked_whitelist, - false, - ) + RegistrySource::new(source_id, config, &name, Box::new(ops), yanked_whitelist) } fn new( @@ -434,16 +417,14 @@ impl<'cfg> RegistrySource<'cfg> { name: &str, ops: Box, yanked_whitelist: &HashSet, - index_locked: bool, ) -> RegistrySource<'cfg> { RegistrySource { src_path: config.registry_source_path().join(name), config, source_id, updated: false, - index: index::RegistryIndex::new(source_id, ops.index_path(), config, index_locked), + index: index::RegistryIndex::new(source_id, ops.index_path(), config), yanked_whitelist: yanked_whitelist.clone(), - index_locked, ops, } } @@ -459,36 +440,26 @@ impl<'cfg> RegistrySource<'cfg> { /// compiled. /// /// No action is taken if the source looks like it's already unpacked. - fn unpack_package(&self, pkg: PackageId, tarball: &FileLock) -> CargoResult { + fn unpack_package(&self, pkg: PackageId, tarball: &File) -> CargoResult { // The `.cargo-ok` file is used to track if the source is already - // unpacked and to lock the directory for unpacking. - let mut ok = { - let package_dir = format!("{}-{}", pkg.name(), pkg.version()); - let dst = self.src_path.join(&package_dir); - dst.create_dir()?; - - // Attempt to open a read-only copy first to avoid an exclusive write - // lock and also work with read-only filesystems. If the file has - // any data, assume the source is already unpacked. - if let Ok(ok) = dst.open_ro(PACKAGE_SOURCE_LOCK, self.config, &package_dir) { - let meta = ok.file().metadata()?; - if meta.len() > 0 { - let unpack_dir = ok.parent().to_path_buf(); - return Ok(unpack_dir); - } - } - - dst.open_rw(PACKAGE_SOURCE_LOCK, self.config, &package_dir)? - }; - let unpack_dir = ok.parent().to_path_buf(); - - // If the file has any data, assume the source is already unpacked. - let meta = ok.file().metadata()?; + // unpacked. + let package_dir = format!("{}-{}", pkg.name(), pkg.version()); + let dst = self.src_path.join(&package_dir); + dst.create_dir()?; + let path = dst.join(PACKAGE_SOURCE_LOCK); + let path = self.config.assert_package_cache_locked(&path); + let unpack_dir = path.parent().unwrap(); + let mut ok = OpenOptions::new() + .create(true) + .read(true) + .write(true) + .open(&path)?; + let meta = ok.metadata()?; if meta.len() > 0 { - return Ok(unpack_dir); + return Ok(unpack_dir.to_path_buf()); } - let gz = GzDecoder::new(tarball.file()); + let gz = GzDecoder::new(tarball); let mut tar = Archive::new(gz); let prefix = unpack_dir.file_name().unwrap(); let parent = unpack_dir.parent().unwrap(); @@ -523,19 +494,18 @@ impl<'cfg> RegistrySource<'cfg> { // Write to the lock file to indicate that unpacking was successful. write!(ok, "ok")?; - Ok(unpack_dir) + Ok(unpack_dir.to_path_buf()) } fn do_update(&mut self) -> CargoResult<()> { self.ops.update_index()?; let path = self.ops.index_path(); - self.index = - index::RegistryIndex::new(self.source_id, path, self.config, self.index_locked); + self.index = index::RegistryIndex::new(self.source_id, path, self.config); self.updated = true; Ok(()) } - fn get_pkg(&mut self, package: PackageId, path: &FileLock) -> CargoResult { + fn get_pkg(&mut self, package: PackageId, path: &File) -> CargoResult { let path = self .unpack_package(package, path) .chain_err(|| internal(format!("failed to unpack package `{}`", package)))?; @@ -548,13 +518,12 @@ impl<'cfg> RegistrySource<'cfg> { // After we've loaded the package configure it's summary's `checksum` // field with the checksum we know for this `PackageId`. - let summaries = self + let req = VersionReq::exact(package.version()); + let summary_with_cksum = self .index - .summaries(package.name().as_str(), &mut *self.ops)?; - let summary_with_cksum = summaries - .iter() - .map(|s| &s.0) - .find(|s| s.package_id() == package) + .summaries(package.name(), &req, &mut *self.ops)? + .map(|s| s.summary.clone()) + .next() .expect("summary not found"); if let Some(cksum) = summary_with_cksum.checksum() { pkg.manifest_mut() diff --git a/src/cargo/sources/registry/remote.rs b/src/cargo/sources/registry/remote.rs index 7a0dcbf9b59..a5c580c336f 100644 --- a/src/cargo/sources/registry/remote.rs +++ b/src/cargo/sources/registry/remote.rs @@ -1,24 +1,20 @@ +use crate::core::{PackageId, SourceId, InternedString}; +use crate::sources::git; +use crate::sources::registry::MaybeLock; +use crate::sources::registry::{RegistryConfig, RegistryData, CRATE_TEMPLATE, VERSION_TEMPLATE}; +use crate::util::errors::{CargoResult, CargoResultExt}; +use crate::util::{Config, Filesystem, Sha256}; +use lazycell::LazyCell; +use log::{debug, trace}; use std::cell::{Cell, Ref, RefCell}; use std::fmt::Write as FmtWrite; +use std::fs::{self, File, OpenOptions}; use std::io::prelude::*; use std::io::SeekFrom; use std::mem; use std::path::Path; use std::str; -use lazycell::LazyCell; -use log::{debug, trace}; - -use crate::core::{PackageId, SourceId}; -use crate::sources::git; -use crate::sources::registry::MaybeLock; -use crate::sources::registry::{ - RegistryConfig, RegistryData, CRATE_TEMPLATE, INDEX_LOCK, VERSION_TEMPLATE, -}; -use crate::util::errors::{CargoResult, CargoResultExt}; -use crate::util::{Config, Sha256}; -use crate::util::{FileLock, Filesystem}; - pub struct RemoteRegistry<'cfg> { index_path: Filesystem, cache_path: Filesystem, @@ -27,6 +23,7 @@ pub struct RemoteRegistry<'cfg> { tree: RefCell>>, repo: LazyCell, head: Cell>, + current_sha: Cell>, } impl<'cfg> RemoteRegistry<'cfg> { @@ -39,12 +36,13 @@ impl<'cfg> RemoteRegistry<'cfg> { tree: RefCell::new(None), repo: LazyCell::new(), head: Cell::new(None), + current_sha: Cell::new(None), } } fn repo(&self) -> CargoResult<&git2::Repository> { self.repo.try_borrow_with(|| { - let path = self.index_path.clone().into_path_unlocked(); + let path = self.config.assert_package_cache_locked(&self.index_path); // Fast path without a lock if let Ok(repo) = git2::Repository::open(&path) { @@ -54,15 +52,11 @@ impl<'cfg> RemoteRegistry<'cfg> { // Ok, now we need to lock and try the whole thing over again. trace!("acquiring registry index lock"); - let lock = self.index_path.open_rw( - Path::new(INDEX_LOCK), - self.config, - "the registry index", - )?; match git2::Repository::open(&path) { Ok(repo) => Ok(repo), Err(_) => { - let _ = lock.remove_siblings(); + drop(fs::remove_dir_all(&path)); + fs::create_dir_all(&path)?; // Note that we'd actually prefer to use a bare repository // here as we're not actually going to check anything out. @@ -129,6 +123,8 @@ impl<'cfg> RemoteRegistry<'cfg> { } } +const LAST_UPDATED_FILE: &str = ".last-updated"; + impl<'cfg> RegistryData for RemoteRegistry<'cfg> { fn prepare(&self) -> CargoResult<()> { self.repo()?; // create intermediate dirs and initialize the repo @@ -139,6 +135,19 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { &self.index_path } + fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path { + self.config.assert_package_cache_locked(path) + } + + fn current_version(&self) -> Option { + if let Some(sha) = self.current_sha.get() { + return Some(sha); + } + let sha = InternedString::new(&self.head().ok()?.to_string()); + self.current_sha.set(Some(sha)); + Some(sha) + } + fn load( &self, _root: &Path, @@ -162,9 +171,7 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { fn config(&mut self) -> CargoResult> { debug!("loading config"); self.prepare()?; - let _lock = - self.index_path - .open_ro(Path::new(INDEX_LOCK), self.config, "the registry index")?; + self.config.assert_package_cache_locked(&self.index_path); let mut config = None; self.load(Path::new(""), Path::new("config.json"), &mut |json| { config = Some(serde_json::from_slice(json)?); @@ -213,9 +220,8 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { self.prepare()?; self.head.set(None); *self.tree.borrow_mut() = None; - let _lock = - self.index_path - .open_rw(Path::new(INDEX_LOCK), self.config, "the registry index")?; + self.current_sha.set(None); + let path = self.config.assert_package_cache_locked(&self.index_path); self.config .shell() .status("Updating", self.source_id.display_index())?; @@ -227,6 +233,11 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { git::fetch(repo, url, refspec, self.config) .chain_err(|| format!("failed to fetch `{}`", url))?; self.config.updated_sources().insert(self.source_id); + + // Create a dummy file to record the mtime for when we updated the + // index. + File::create(&path.join(LAST_UPDATED_FILE))?; + Ok(()) } @@ -239,8 +250,10 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { // // If this fails then we fall through to the exclusive path where we may // have to redownload the file. - if let Ok(dst) = self.cache_path.open_ro(&filename, self.config, &filename) { - let meta = dst.file().metadata()?; + let path = self.cache_path.join(&filename); + let path = self.config.assert_package_cache_locked(&path); + if let Ok(dst) = File::open(&path) { + let meta = dst.metadata()?; if meta.len() > 0 { return Ok(MaybeLock::Ready(dst)); } @@ -266,7 +279,7 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { pkg: PackageId, checksum: &str, data: &[u8], - ) -> CargoResult { + ) -> CargoResult { // Verify what we just downloaded let mut state = Sha256::new(); state.update(data); @@ -275,8 +288,15 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { } let filename = self.filename(pkg); - let mut dst = self.cache_path.open_rw(&filename, self.config, &filename)?; - let meta = dst.file().metadata()?; + self.cache_path.create_dir()?; + let path = self.cache_path.join(&filename); + let path = self.config.assert_package_cache_locked(&path); + let mut dst = OpenOptions::new() + .create(true) + .read(true) + .write(true) + .open(&path)?; + let meta = dst.metadata()?; if meta.len() > 0 { return Ok(dst); } @@ -290,8 +310,10 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { let filename = format!("{}-{}.crate", pkg.name(), pkg.version()); let path = Path::new(&filename); - if let Ok(dst) = self.cache_path.open_ro(path, self.config, &filename) { - if let Ok(meta) = dst.file().metadata() { + let path = self.cache_path.join(path); + let path = self.config.assert_package_cache_locked(&path); + if let Ok(dst) = File::open(path) { + if let Ok(meta) = dst.metadata() { return meta.len() > 0; } } diff --git a/src/cargo/util/config.rs b/src/cargo/util/config.rs index b2014dda8d3..c3935d7281e 100644 --- a/src/cargo/util/config.rs +++ b/src/cargo/util/config.rs @@ -29,7 +29,7 @@ use crate::util::errors::{self, internal, CargoResult, CargoResultExt}; use crate::util::toml as cargo_toml; use crate::util::Filesystem; use crate::util::Rustc; -use crate::util::{paths, validate_package_name}; +use crate::util::{paths, validate_package_name, FileLock}; use crate::util::{ToUrl, ToUrlWithBase}; /// Configuration information for cargo. This is not specific to a build, it is information @@ -76,6 +76,9 @@ pub struct Config { profiles: LazyCell, /// Tracks which sources have been updated to avoid multiple updates. updated_sources: LazyCell>>, + /// Lock, if held, of the global package cache along with the number of + /// acquisitions so far. + package_cache_lock: RefCell>, } impl Config { @@ -132,6 +135,7 @@ impl Config { env, profiles: LazyCell::new(), updated_sources: LazyCell::new(), + package_cache_lock: RefCell::new(None), } } @@ -828,6 +832,36 @@ impl Config { }; T::deserialize(d).map_err(|e| e.into()) } + + pub fn assert_package_cache_locked<'a>(&self, f: &'a Filesystem) -> &'a Path { + let ret = f.as_path_unlocked(); + assert!( + self.package_cache_lock.borrow().is_some(), + "pacakge cache lock is not currently held, Cargo forgot to call \ + `acquire_package_cache_lock` before we got to this stack frame", + ); + assert!(ret.starts_with(self.home_path.as_path_unlocked())); + return ret; + } + + pub fn acquire_package_cache_lock<'a>(&'a self) -> CargoResult> { + let mut slot = self.package_cache_lock.borrow_mut(); + match *slot { + Some((_, ref mut cnt)) => { + *cnt += 1; + } + None => { + let lock = self + .home_path + .open_rw(".package-cache", self, "package cache lock") + .chain_err(|| "failed to acquire package cache lock")?; + *slot = Some((lock, 1)); + } + } + Ok(PackageCacheLock(self)) + } + + pub fn release_package_cache_lock(&self) {} } /// A segment of a config key. @@ -1664,3 +1698,16 @@ pub fn save_credentials(cfg: &Config, token: String, registry: Option) - Ok(()) } } + +pub struct PackageCacheLock<'a>(&'a Config); + +impl Drop for PackageCacheLock<'_> { + fn drop(&mut self) { + let mut slot = self.0.package_cache_lock.borrow_mut(); + let (_, cnt) = slot.as_mut().unwrap(); + *cnt -= 1; + if *cnt == 0 { + *slot = None; + } + } +} diff --git a/src/cargo/util/flock.rs b/src/cargo/util/flock.rs index e49a4daaa38..96458bdf356 100644 --- a/src/cargo/util/flock.rs +++ b/src/cargo/util/flock.rs @@ -12,6 +12,7 @@ use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::paths; use crate::util::Config; +#[derive(Debug)] pub struct FileLock { f: Option, path: PathBuf, @@ -136,6 +137,14 @@ impl Filesystem { self.root } + /// Returns the underlying `Path`. + /// + /// Note that this is a relatively dangerous operation and should be used + /// with great caution!. + pub fn as_path_unlocked(&self) -> &Path { + &self.root + } + /// Creates the directory pointed to by this filesystem. /// /// Handles errors where other Cargo processes are also attempting to diff --git a/tests/testsuite/concurrent.rs b/tests/testsuite/concurrent.rs index 0368f48f739..64404038157 100644 --- a/tests/testsuite/concurrent.rs +++ b/tests/testsuite/concurrent.rs @@ -453,7 +453,7 @@ fn debug_release_ok() { let a = a.join().unwrap(); execs() - .with_stderr( + .with_stderr_contains( "\ [COMPILING] foo v0.0.1 [..] [FINISHED] dev [unoptimized + debuginfo] target(s) in [..] @@ -461,7 +461,7 @@ fn debug_release_ok() { ) .run_output(&a); execs() - .with_stderr( + .with_stderr_contains( "\ [COMPILING] foo v0.0.1 [..] [FINISHED] release [optimized] target(s) in [..] diff --git a/tests/testsuite/search.rs b/tests/testsuite/search.rs index cb6f2174898..466e182e5cb 100644 --- a/tests/testsuite/search.rs +++ b/tests/testsuite/search.rs @@ -108,8 +108,10 @@ fn not_update() { let sid = SourceId::for_registry(®istry_url()).unwrap(); let cfg = Config::new(Shell::new(), paths::root(), paths::home().join(".cargo")); + let lock = cfg.acquire_package_cache_lock().unwrap(); let mut regsrc = RegistrySource::remote(sid, &HashSet::new(), &cfg); regsrc.update().unwrap(); + drop(lock); cargo_process("search postgres") .with_stdout_contains("hoare = \"0.1.1\" # Design by contract style assertions for Rust")