From 695005990072c3f8460e7d68f6f9e39a614bd196 Mon Sep 17 00:00:00 2001 From: Eric Huss Date: Tue, 28 Nov 2023 08:46:30 -0800 Subject: [PATCH] Add more doc comments for gc changes. I missed adding these in https://github.com/rust-lang/cargo/pull/12634. --- src/cargo/core/global_cache_tracker.rs | 26 ++++++++++++++++------- src/cargo/sources/git/source.rs | 10 +++++++++ src/cargo/sources/registry/http_remote.rs | 2 ++ src/cargo/sources/registry/mod.rs | 7 ++++++ src/cargo/sources/registry/remote.rs | 2 ++ src/cargo/util/config/mod.rs | 3 +++ 6 files changed, 42 insertions(+), 8 deletions(-) diff --git a/src/cargo/core/global_cache_tracker.rs b/src/cargo/core/global_cache_tracker.rs index 64491ba3f5a..79ae252d9b3 100644 --- a/src/cargo/core/global_cache_tracker.rs +++ b/src/cargo/core/global_cache_tracker.rs @@ -154,43 +154,53 @@ type Timestamp = u64; /// The key for a registry index entry stored in the database. #[derive(Clone, Debug, Hash, Eq, PartialEq)] pub struct RegistryIndex { + /// A unique name of the registry source. pub encoded_registry_name: InternedString, } /// The key for a registry `.crate` entry stored in the database. #[derive(Clone, Debug, Hash, Eq, PartialEq)] pub struct RegistryCrate { + /// A unique name of the registry source. pub encoded_registry_name: InternedString, + /// The filename of the compressed crate, like `foo-1.2.3.crate`. pub crate_filename: InternedString, + /// The size of the `.crate` file. pub size: u64, } /// The key for a registry src directory entry stored in the database. #[derive(Clone, Debug, Hash, Eq, PartialEq)] pub struct RegistrySrc { + /// A unique name of the registry source. pub encoded_registry_name: InternedString, + /// The directory name of the extracted source, like `foo-1.2.3`. pub package_dir: InternedString, - // Total size of the src directory in bytes. - // - // This can be None when the size is unknown. For example, when the src - // directory already exists on disk, and we just want to update the - // last-use timestamp. We don't want to take the expense of computing disk - // usage unless necessary. `populate_untracked_src` will handle any actual - // NULL values in the database, which can happen when the src directory is - // created by an older version of cargo that did not track sizes. + /// Total size of the src directory in bytes. + /// + /// This can be None when the size is unknown. For example, when the src + /// directory already exists on disk, and we just want to update the + /// last-use timestamp. We don't want to take the expense of computing disk + /// usage unless necessary. [`GlobalCacheTracker::populate_untracked`] + /// will handle any actual NULL values in the database, which can happen + /// when the src directory is created by an older version of cargo that + /// did not track sizes. pub size: Option, } /// The key for a git db entry stored in the database. #[derive(Clone, Debug, Hash, Eq, PartialEq)] pub struct GitDb { + /// A unique name of the git database. pub encoded_git_name: InternedString, } /// The key for a git checkout entry stored in the database. #[derive(Clone, Debug, Hash, Eq, PartialEq)] pub struct GitCheckout { + /// A unique name of the git database. pub encoded_git_name: InternedString, + /// A unique name of the checkout without the database. pub short_name: InternedString, /// Total size of the checkout directory. /// diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index 480bca34f17..664c64bfe12 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -76,7 +76,17 @@ pub struct GitSource<'cfg> { /// The unique identifier of this source. source_id: SourceId, /// The underlying path source to discover packages inside the Git repository. + /// + /// This gets set to `Some` after the git repo has been checked out + /// (automatically handled via [`GitSource::block_until_ready`]). path_source: Option>, + /// A short string that uniquely identifies the version of the checkout. + /// + /// This is typically a 7-character string of the OID hash, automatically + /// increasing in size if it is ambiguous. + /// + /// This is set to `Some` after the git repo has been checked out + /// (automatically handled via [`GitSource::block_until_ready`]). short_id: Option, /// The identifier of this source for Cargo's Git cache directory. /// See [`ident`] for more. diff --git a/src/cargo/sources/registry/http_remote.rs b/src/cargo/sources/registry/http_remote.rs index 1dfae4ad811..821cf3cd162 100644 --- a/src/cargo/sources/registry/http_remote.rs +++ b/src/cargo/sources/registry/http_remote.rs @@ -54,6 +54,8 @@ const UNKNOWN: &'static str = "Unknown"; /// /// [RFC 2789]: https://github.com/rust-lang/rfcs/pull/2789 pub struct HttpRegistry<'cfg> { + /// The name of this source, a unique string (across all sources) used as + /// the directory name where its cached content is stored. name: InternedString, /// Path to the registry index (`$CARGO_HOME/registry/index/$REG-HASH`). /// diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 0d558b88033..5bdd71e7d98 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -240,6 +240,8 @@ struct LockMetadata { /// /// For general concepts of registries, see the [module-level documentation](crate::sources::registry). pub struct RegistrySource<'cfg> { + /// A unique name of the source (typically used as the directory name + /// where its cached content is stored). name: InternedString, /// The unique identifier of this source. source_id: SourceId, @@ -444,6 +446,11 @@ mod remote; /// Generates a unique name for [`SourceId`] to have a unique path to put their /// index files. fn short_name(id: SourceId, is_shallow: bool) -> String { + // CAUTION: This should not change between versions. If you change how + // this is computed, it will orphan previously cached data, forcing the + // cache to be rebuilt and potentially wasting significant disk space. If + // you change it, be cautious of the impact. See `test_cratesio_hash` for + // a similar discussion. let hash = hex::short_hash(&id); let ident = id.url().host_str().unwrap_or("").to_string(); let mut name = format!("{}-{}", ident, hash); diff --git a/src/cargo/sources/registry/remote.rs b/src/cargo/sources/registry/remote.rs index 4e7dd5f6c22..841ee3683d9 100644 --- a/src/cargo/sources/registry/remote.rs +++ b/src/cargo/sources/registry/remote.rs @@ -48,6 +48,8 @@ use tracing::{debug, trace}; /// /// [`HttpRegistry`]: super::http_remote::HttpRegistry pub struct RemoteRegistry<'cfg> { + /// The name of this source, a unique string (across all sources) used as + /// the directory name where its cached content is stored. name: InternedString, /// Path to the registry index (`$CARGO_HOME/registry/index/$REG-HASH`). index_path: Filesystem, diff --git a/src/cargo/util/config/mod.rs b/src/cargo/util/config/mod.rs index bc22e376866..c0dd42d39b8 100644 --- a/src/cargo/util/config/mod.rs +++ b/src/cargo/util/config/mod.rs @@ -245,7 +245,10 @@ pub struct Config { pub nightly_features_allowed: bool, /// WorkspaceRootConfigs that have been found pub ws_roots: RefCell>, + /// The global cache tracker is a database used to track disk cache usage. global_cache_tracker: LazyCell>, + /// A cache of modifications to make to [`Config::global_cache_tracker`], + /// saved to disk in a batch to improve performance. deferred_global_last_use: LazyCell>, }