Skip to content

Commit

Permalink
Avoid using mtime information for reusing cache files
Browse files Browse the repository at this point in the history
Using mtime information is pretty finnicky across platforms, so instead
take a different strategy where we embed the sha that a cache file was
generated from into the cache file itself. If the registry's sha has
changed then we regenerate the cache file, otherwise we can reuse the
cache file.

This should make cache file generation more robust (any command can
generate a cache file to get used at any time) as well as works better
across platforms (doesn't run into issues with coarse mtime systems and
the like).
  • Loading branch information
alexcrichton committed May 3, 2019
1 parent 783f22b commit 1daff03
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 59 deletions.
85 changes: 45 additions & 40 deletions src/cargo/sources/registry/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,10 @@
//! hopefully those are more obvious inline in the code itself.

use std::collections::{HashMap, HashSet};
use std::fs::{self, File};
use std::io::Read;
use std::fs;
use std::path::Path;
use std::str;

use filetime::FileTime;
use log::info;
use semver::{Version, VersionReq};

Expand Down Expand Up @@ -316,7 +314,7 @@ impl<'cfg> RegistryIndex<'cfg> {
// let root = self.config.assert_package_cache_locked(&self.path);
let root = load.assert_index_locked(&self.path);
let cache_root = root.join(".cache");
let last_index_update = load.last_modified();;
let index_version = load.current_version();

// See module comment in `registry/mod.rs` for why this is structured
// the way it is.
Expand All @@ -338,7 +336,7 @@ impl<'cfg> RegistryIndex<'cfg> {
// along the way produce helpful "did you mean?" suggestions.
for path in UncanonicalizedIter::new(&raw_path).take(1024) {
let summaries = Summaries::parse(
last_index_update,
index_version.as_ref().map(|s| &**s),
&root,
&cache_root,
path.as_ref(),
Expand Down Expand Up @@ -471,7 +469,7 @@ impl Summaries {
/// * `load` - the actual index implementation which may be very slow to
/// call. We avoid this if we can.
pub fn parse(
last_index_update: Option<FileTime>,
index_version: Option<&str>,
root: &Path,
cache_root: &Path,
relative: &Path,
Expand All @@ -483,24 +481,18 @@ impl Summaries {
// of reasons, but consider all of them non-fatal and just log their
// occurrence in case anyone is debugging anything.
let cache_path = cache_root.join(relative);
if let Some(last_index_update) = last_index_update {
match File::open(&cache_path) {
Ok(file) => {
let metadata = file.metadata()?;
let cache_mtime = FileTime::from_last_modification_time(&metadata);
if cache_mtime > last_index_update {
log::debug!("cache for {:?} is fresh", relative);
match Summaries::parse_cache(&file, &metadata) {
Ok(s) => return Ok(Some(s)),
Err(e) => {
log::debug!("failed to parse {:?} cache: {}", relative, e);
}
}
} else {
log::debug!("cache for {:?} is out of date", relative);
if let Some(index_version) = index_version {
match fs::read(&cache_path) {
Ok(contents) => match Summaries::parse_cache(contents, index_version) {
Ok(s) => {
log::debug!("fast path for registry cache of {:?}", relative);
return Ok(Some(s))
}
}
Err(e) => log::debug!("cache for {:?} error: {}", relative, e),
Err(e) => {
log::debug!("failed to parse {:?} cache: {}", relative, e);
}
},
Err(e) => log::debug!("cache missing for {:?} error: {}", relative, e),
}
}

Expand All @@ -510,7 +502,7 @@ impl Summaries {
log::debug!("slow path for {:?}", relative);
let mut ret = Summaries::default();
let mut hit_closure = false;
let mut cache_bytes = Vec::new();
let mut cache_bytes = None;
let err = load.load(root, relative, &mut |contents| {
ret.raw_data = contents.to_vec();
let mut cache = SummariesCache::default();
Expand All @@ -535,7 +527,9 @@ impl Summaries {
ret.versions.insert(version, summary.into());
start = end + 1;
}
cache_bytes = cache.serialize();
if let Some(index_version) = index_version {
cache_bytes = Some(cache.serialize(index_version));
}
Ok(())
});

Expand All @@ -553,26 +547,22 @@ impl Summaries {
//
// This is opportunistic so we ignore failure here but are sure to log
// something in case of error.
//
// Note that we also skip this when `last_index_update` is `None` because it
// means we can't handle the cache anyway.
if last_index_update.is_some() && fs::create_dir_all(cache_path.parent().unwrap()).is_ok() {
let path = Filesystem::new(cache_path.clone());
config.assert_package_cache_locked(&path);
if let Err(e) = fs::write(cache_path, cache_bytes) {
log::info!("failed to write cache: {}", e);
if let Some(cache_bytes) = cache_bytes {
if fs::create_dir_all(cache_path.parent().unwrap()).is_ok() {
let path = Filesystem::new(cache_path.clone());
config.assert_package_cache_locked(&path);
if let Err(e) = fs::write(cache_path, cache_bytes) {
log::info!("failed to write cache: {}", e);
}
}
}
Ok(Some(ret))
}

/// Parses an open `File` which represents information previously cached by
/// Cargo.
pub fn parse_cache(mut file: &File, meta: &fs::Metadata) -> CargoResult<Summaries> {
let mut contents = Vec::new();
contents.reserve(meta.len() as usize + 1);
file.read_to_end(&mut contents)?;
let cache = SummariesCache::parse(&contents)?;
pub fn parse_cache(contents: Vec<u8>, last_index_update: &str) -> CargoResult<Summaries> {
let cache = SummariesCache::parse(&contents, last_index_update)?;
let mut ret = Summaries::default();
for (version, summary) in cache.versions {
let (start, end) = subslice_bounds(&contents, summary);
Expand Down Expand Up @@ -614,7 +604,7 @@ impl Summaries {
const CURRENT_CACHE_VERSION: u8 = 1;

impl<'a> SummariesCache<'a> {
fn parse(data: &'a [u8]) -> CargoResult<SummariesCache<'a>> {
fn parse(data: &'a [u8], last_index_update: &str) -> CargoResult<SummariesCache<'a>> {
// NB: keep this method in sync with `serialize` below
let (first_byte, rest) = data
.split_first()
Expand All @@ -624,6 +614,19 @@ impl<'a> SummariesCache<'a> {
}
let mut iter = memchr::Memchr::new(0, rest);
let mut start = 0;
if let Some(end) = iter.next() {
let update = &rest[start..end];
if update != last_index_update.as_bytes() {
failure::bail!(
"cache out of date: current index ({}) != cache ({})",
last_index_update,
str::from_utf8(update)?,
)
}
start = end + 1;
} else {
failure::bail!("malformed file");
}
let mut ret = SummariesCache::default();
while let Some(version_end) = iter.next() {
let version = &rest[start..version_end];
Expand All @@ -637,7 +640,7 @@ impl<'a> SummariesCache<'a> {
Ok(ret)
}

fn serialize(&self) -> Vec<u8> {
fn serialize(&self, index_version: &str) -> Vec<u8> {
// NB: keep this method in sync with `parse` above
let size = self
.versions
Expand All @@ -646,6 +649,8 @@ impl<'a> SummariesCache<'a> {
.sum();
let mut contents = Vec::with_capacity(size);
contents.push(CURRENT_CACHE_VERSION);
contents.extend_from_slice(index_version.as_bytes());
contents.push(0);
for (version, data) in self.versions.iter() {
contents.extend_from_slice(version.to_string().as_bytes());
contents.push(0);
Expand Down
5 changes: 2 additions & 3 deletions src/cargo/sources/registry/local.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
use crate::core::PackageId;
use crate::core::{PackageId, InternedString};
use crate::sources::registry::{MaybeLock, RegistryConfig, RegistryData};
use crate::util::errors::{CargoResult, CargoResultExt};
use crate::util::paths;
use crate::util::{Config, Filesystem, Sha256};
use filetime::FileTime;
use hex;
use std::fs::File;
use std::io::prelude::*;
Expand Down Expand Up @@ -43,7 +42,7 @@ impl<'cfg> RegistryData for LocalRegistry<'cfg> {
path.as_path_unlocked()
}

fn last_modified(&self) -> Option<FileTime> {
fn current_version(&self) -> Option<InternedString> {
None
}

Expand Down
5 changes: 2 additions & 3 deletions src/cargo/sources/registry/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,6 @@ use std::fs::{File, OpenOptions};
use std::io::Write;
use std::path::{Path, PathBuf};

use filetime::FileTime;
use flate2::read::GzDecoder;
use log::debug;
use semver::{Version, VersionReq};
Expand All @@ -174,7 +173,7 @@ use tar::Archive;

use crate::core::dependency::{Dependency, Kind};
use crate::core::source::MaybePackage;
use crate::core::{Package, PackageId, Source, SourceId, Summary};
use crate::core::{Package, PackageId, Source, SourceId, Summary, InternedString};
use crate::sources::PathSource;
use crate::util::errors::CargoResultExt;
use crate::util::hex;
Expand Down Expand Up @@ -372,7 +371,7 @@ pub trait RegistryData {
true
}
fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path;
fn last_modified(&self) -> Option<FileTime>;
fn current_version(&self) -> Option<InternedString>;
}

pub enum MaybeLock {
Expand Down
23 changes: 10 additions & 13 deletions src/cargo/sources/registry/remote.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
use crate::core::{PackageId, SourceId};
use crate::core::{PackageId, SourceId, InternedString};
use crate::sources::git;
use crate::sources::registry::MaybeLock;
use crate::sources::registry::{RegistryConfig, RegistryData, CRATE_TEMPLATE, VERSION_TEMPLATE};
use crate::util::errors::{CargoResult, CargoResultExt};
use crate::util::{Config, Filesystem, Sha256};
use crate::util::paths;
use filetime::FileTime;
use lazycell::LazyCell;
use log::{debug, trace};
use std::cell::{Cell, Ref, RefCell};
Expand All @@ -25,7 +23,7 @@ pub struct RemoteRegistry<'cfg> {
tree: RefCell<Option<git2::Tree<'static>>>,
repo: LazyCell<git2::Repository>,
head: Cell<Option<git2::Oid>>,
last_updated: Cell<Option<FileTime>>,
current_sha: Cell<Option<InternedString>>,
}

impl<'cfg> RemoteRegistry<'cfg> {
Expand All @@ -38,7 +36,7 @@ impl<'cfg> RemoteRegistry<'cfg> {
tree: RefCell::new(None),
repo: LazyCell::new(),
head: Cell::new(None),
last_updated: Cell::new(None),
current_sha: Cell::new(None),
}
}

Expand Down Expand Up @@ -141,14 +139,13 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
self.config.assert_package_cache_locked(path)
}

fn last_modified(&self) -> Option<FileTime> {
if let Some(time) = self.last_updated.get() {
return Some(time);
fn current_version(&self) -> Option<InternedString> {
if let Some(sha) = self.current_sha.get() {
return Some(sha);
}
let path = self.config.assert_package_cache_locked(&self.index_path);
let mtime = paths::mtime(&path.join(LAST_UPDATED_FILE)).ok();
self.last_updated.set(mtime);
self.last_updated.get()
let sha = InternedString::new(&self.head().ok()?.to_string());
self.current_sha.set(Some(sha));
Some(sha)
}

fn load(
Expand Down Expand Up @@ -223,7 +220,7 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
self.prepare()?;
self.head.set(None);
*self.tree.borrow_mut() = None;
self.last_updated.set(None);
self.current_sha.set(None);
let path = self.config.assert_package_cache_locked(&self.index_path);
self.config
.shell()
Expand Down

0 comments on commit 1daff03

Please sign in to comment.