From 16744072124315924c2fa4086432fa062fb0f264 Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Thu, 27 Oct 2022 19:56:41 -0700 Subject: [PATCH] Clean stale git temp files --- src/cargo/sources/git/utils.rs | 40 ++++++++++++++++++++++++++++++++++ tests/testsuite/git.rs | 29 ++++++++++++++++++++++++ tests/testsuite/git_gc.rs | 2 +- 3 files changed, 70 insertions(+), 1 deletion(-) diff --git a/src/cargo/sources/git/utils.rs b/src/cargo/sources/git/utils.rs index 5238fd3c3d60..d09697660550 100644 --- a/src/cargo/sources/git/utils.rs +++ b/src/cargo/sources/git/utils.rs @@ -14,6 +14,7 @@ use serde::Serialize; use std::borrow::Cow; use std::env; use std::fmt; +use std::fs; use std::path::{Path, PathBuf}; use std::process::Command; use std::str; @@ -812,6 +813,8 @@ pub fn fetch( // request we're about to issue. maybe_gc_repo(repo)?; + clean_repo_temp_files(repo); + // Translate the reference desired here into an actual list of refspecs // which need to get fetched. Additionally record if we're fetching tags. let mut refspecs = Vec::new(); @@ -1001,6 +1004,43 @@ fn maybe_gc_repo(repo: &mut git2::Repository) -> CargoResult<()> { reinitialize(repo) } +/// Removes temporary files left from previous activity. +/// +/// If libgit2 is interrupted while indexing pack files, it will leave behind +/// some temporary files that it doesn't clean up. These can be quite large in +/// size, so this tries to clean things up. +/// +/// This intentionally ignores errors. This is only an opportunistic cleaning, +/// and we don't really care if there are issues (there's unlikely anything +/// that can be done). +/// +/// The git CLI has similar behavior (its temp files look like +/// `objects/pack/tmp_pack_9kUSA8`). Those files are normally deleted via `git +/// prune` which is run by `git gc`. However, it doesn't know about libgit2's +/// filenames, so they never get cleaned up. +fn clean_repo_temp_files(repo: &git2::Repository) { + let path = repo.path().join("objects/pack/pack_git2_*"); + let pattern = match path.to_str() { + Some(p) => p, + None => { + log::warn!("cannot convert {path:?} to a string"); + return; + } + }; + let paths = match glob::glob(pattern) { + Ok(paths) => paths, + Err(_) => return, + }; + for path in paths { + if let Ok(path) = path { + match fs::remove_file(&path) { + Ok(_) => log::debug!("removed stale temp git file {path:?}"), + Err(e) => log::warn!("failed to remove {path:?} while cleaning temp files: {e}"), + } + } + } +} + fn reinitialize(repo: &mut git2::Repository) -> CargoResult<()> { // Here we want to drop the current repository object pointed to by `repo`, // so we initialize temporary repository in a sub-folder, blow away the diff --git a/tests/testsuite/git.rs b/tests/testsuite/git.rs index 615787f19346..8e0ecedb81d9 100644 --- a/tests/testsuite/git.rs +++ b/tests/testsuite/git.rs @@ -10,6 +10,7 @@ use std::sync::Arc; use std::thread; use cargo_test_support::paths::{self, CargoPathExt}; +use cargo_test_support::registry::Package; use cargo_test_support::{basic_lib_manifest, basic_manifest, git, main_file, path2url, project}; use cargo_test_support::{sleep_ms, t, Project}; @@ -3619,3 +3620,31 @@ fn _corrupted_checkout(with_cli: bool) { e.run(); assert!(ok.exists()); } + +#[cargo_test] +fn cleans_temp_pack_files() { + // Checks that cargo removes temp files left by libgit2 when it is + // interrupted (see clean_repo_temp_files). + Package::new("bar", "1.0.0").publish(); + let p = project() + .file( + "Cargo.toml", + r#" + [package] + name = "foo" + version = "0.1.0" + + [dependencies] + bar = "1.0" + "#, + ) + .file("src/lib.rs", "") + .build(); + p.cargo("fetch").run(); + // Simulate what happens when libgit2 is interrupted while indexing a pack file. + let tmp_path = super::git_gc::find_index().join(".git/objects/pack/pack_git2_91ab40da04fdc2e7"); + fs::write(&tmp_path, "test").unwrap(); + // Trigger an index update. + p.cargo("generate-lockfile").run(); + assert!(!tmp_path.exists()); +} diff --git a/tests/testsuite/git_gc.rs b/tests/testsuite/git_gc.rs index ff65c448bb75..045bc78ad5e0 100644 --- a/tests/testsuite/git_gc.rs +++ b/tests/testsuite/git_gc.rs @@ -11,7 +11,7 @@ use cargo_test_support::registry::Package; use url::Url; -fn find_index() -> PathBuf { +pub fn find_index() -> PathBuf { let dir = paths::home().join(".cargo/registry/index"); dir.read_dir().unwrap().next().unwrap().unwrap().path() }