From 7ad670d3c34de4bfaa305f57877a3bfdae0d07f8 Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Fri, 22 Dec 2023 21:42:49 -0500 Subject: [PATCH] Specialize DefPathHash table to skip crate IDs Instead, we store just the local crate hash as a bare u64. On decoding, we recombine it with the crate's stable crate ID stored separately in metadata. The end result is that we save ~8 bytes/DefIndex in metadata size. One key detail here is that we no longer distinguish in encoded metadata between present and non-present DefPathHashes. It used to be highly likely we could distinguish as we used DefPathHash::default(), an all-zero representation. However in theory even that is fallible as nothing strictly prevents the StableCrateId from being zero. --- compiler/rustc_metadata/src/rmeta/decoder.rs | 14 +++++++++--- compiler/rustc_metadata/src/rmeta/encoder.rs | 8 +++++-- compiler/rustc_metadata/src/rmeta/mod.rs | 9 +++++++- compiler/rustc_metadata/src/rmeta/table.rs | 23 -------------------- compiler/rustc_span/src/def_id.rs | 2 -- 5 files changed, 25 insertions(+), 31 deletions(-) diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index 24ab4f94d5c5b..4b8ef0965f79e 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -6,6 +6,7 @@ use crate::rmeta::*; use rustc_ast as ast; use rustc_data_structures::captures::Captures; +use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::owned_slice::OwnedSlice; use rustc_data_structures::sync::{AppendOnlyVec, AtomicBool, Lock, Lrc, OnceLock}; use rustc_data_structures::unhash::UnhashMap; @@ -1489,9 +1490,16 @@ impl<'a, 'tcx> CrateMetadataRef<'a> { index: DefIndex, def_path_hashes: &mut FxHashMap, ) -> DefPathHash { - *def_path_hashes - .entry(index) - .or_insert_with(|| self.root.tables.def_path_hashes.get(self, index)) + *def_path_hashes.entry(index).or_insert_with(|| { + // This is a hack to workaround the fact that we can't easily encode/decode a Hash64 + // into the FixedSizeEncoding, as Hash64 lacks a Default impl. A future refactor to + // relax the Default restriction will likely fix this. + let fingerprint = Fingerprint::new( + self.root.stable_crate_id.as_u64(), + self.root.tables.def_path_hashes.get(self, index), + ); + DefPathHash::new(self.root.stable_crate_id, fingerprint.split().1) + }) } #[inline] diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index ad3fea65e822f..4acc45022747f 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -467,13 +467,17 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { let def_key = self.lazy(table.def_key(def_index)); let def_path_hash = table.def_path_hash(def_index); self.tables.def_keys.set_some(def_index, def_key); - self.tables.def_path_hashes.set(def_index, def_path_hash); + assert_eq!( + def_path_hash.stable_crate_id(), + self.tcx.def_path_hash(LOCAL_CRATE.as_def_id()).stable_crate_id() + ); + self.tables.def_path_hashes.set(def_index, def_path_hash.local_hash().as_u64()); } } else { for (def_index, def_key, def_path_hash) in table.enumerated_keys_and_path_hashes() { let def_key = self.lazy(def_key); self.tables.def_keys.set_some(def_index, def_key); - self.tables.def_path_hashes.set(def_index, *def_path_hash); + self.tables.def_path_hashes.set(def_index, def_path_hash.local_hash().as_u64()); } } } diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs index a858228489895..81519b8ae5566 100644 --- a/compiler/rustc_metadata/src/rmeta/mod.rs +++ b/compiler/rustc_metadata/src/rmeta/mod.rs @@ -386,7 +386,14 @@ define_tables! { is_type_alias_impl_trait: Table, type_alias_is_lazy: Table, attr_flags: Table, - def_path_hashes: Table, + // The u64 is the crate-local part of the DefPathHash. All hashes in this crate have the same + // StableCrateId, so we omit encoding those into the table. + // + // N.B. this means that we can't distinguish between non-present items and a present but zero + // local hash item. In practice the compiler shouldn't care about non-present items in a foreign + // crate, so this should be OK. If we do start to care we should most likely adjust our hashing + // to reserve a bit (e.g., NonZeroU64). + def_path_hashes: Table, explicit_item_bounds: Table, Span)>>, inferred_outlives_of: Table, Span)>>, inherent_impls: Table>, diff --git a/compiler/rustc_metadata/src/rmeta/table.rs b/compiler/rustc_metadata/src/rmeta/table.rs index 3fc6d9db331ac..136dd296c89d5 100644 --- a/compiler/rustc_metadata/src/rmeta/table.rs +++ b/compiler/rustc_metadata/src/rmeta/table.rs @@ -1,6 +1,5 @@ use crate::rmeta::*; -use rustc_data_structures::fingerprint::Fingerprint; use rustc_hir::def::CtorOf; use rustc_index::Idx; @@ -44,12 +43,6 @@ impl IsDefault for LazyArray { } } -impl IsDefault for DefPathHash { - fn is_default(&self) -> bool { - self.0 == Fingerprint::ZERO - } -} - impl IsDefault for UnusedGenericParams { fn is_default(&self) -> bool { // UnusedGenericParams encodes the *un*usedness as a bitset. @@ -219,22 +212,6 @@ fixed_size_enum! { } } -// We directly encode `DefPathHash` because a `LazyValue` would incur a 25% cost. -impl FixedSizeEncoding for DefPathHash { - type ByteArray = [u8; 16]; - - #[inline] - fn from_bytes(b: &[u8; 16]) -> Self { - DefPathHash(Fingerprint::from_le_bytes(*b)) - } - - #[inline] - fn write_to_bytes(self, b: &mut [u8; 16]) { - debug_assert!(!self.is_default()); - *b = self.0.to_le_bytes(); - } -} - // We directly encode RawDefId because using a `LazyValue` would incur a 50% overhead in the worst case. impl FixedSizeEncoding for Option { type ByteArray = [u8; 8]; diff --git a/compiler/rustc_span/src/def_id.rs b/compiler/rustc_span/src/def_id.rs index b2d51ac6c0dbd..e397fab54593b 100644 --- a/compiler/rustc_span/src/def_id.rs +++ b/compiler/rustc_span/src/def_id.rs @@ -114,8 +114,6 @@ impl DefPathHash { } /// Returns the crate-local part of the [DefPathHash]. - /// - /// Used for tests. #[inline] pub fn local_hash(&self) -> Hash64 { self.0.split().1