From 94a36d2ce53f9991a529f4d8c3621f73e719de23 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 2 Nov 2023 17:02:36 +1100 Subject: [PATCH] Use `FxIndexSet` in the symbol interner. It makes the code a little nicer. As part of this, the interner's `Default` impl is removed and `prefill` is used in a test instead. --- compiler/rustc_span/src/symbol.rs | 31 +++++++++---------------- compiler/rustc_span/src/symbol/tests.rs | 2 +- 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 3f99d2a4b1ffb..923ee630d952f 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -3,7 +3,7 @@ //! type, and vice versa. use rustc_arena::DroplessArena; -use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::fx::FxIndexSet; use rustc_data_structures::stable_hasher::{HashStable, StableHasher, ToStableHashKey}; use rustc_data_structures::sync::Lock; use rustc_macros::HashStable_Generic; @@ -2076,43 +2076,33 @@ impl ToStableHashKey for Symbol { } } -#[derive(Default)] pub(crate) struct Interner(Lock); // The `&'static str`s in this type actually point into the arena. // -// The `FxHashMap`+`Vec` pair could be replaced by `FxIndexSet`, but #75278 -// found that to regress performance up to 2% in some cases. This might be -// revisited after further improvements to `indexmap`. -// // This type is private to prevent accidentally constructing more than one // `Interner` on the same thread, which makes it easy to mix up `Symbol`s // between `Interner`s. -#[derive(Default)] struct InternerInner { arena: DroplessArena, - names: FxHashMap<&'static str, Symbol>, - strings: Vec<&'static str>, + strings: FxIndexSet<&'static str>, } impl Interner { fn prefill(init: &[&'static str]) -> Self { Interner(Lock::new(InternerInner { - strings: init.into(), - names: init.iter().copied().zip((0..).map(Symbol::new)).collect(), - ..Default::default() + arena: Default::default(), + strings: init.iter().copied().collect(), })) } #[inline] fn intern(&self, string: &str) -> Symbol { let mut inner = self.0.lock(); - if let Some(&name) = inner.names.get(string) { - return name; + if let Some(idx) = inner.strings.get_index_of(string) { + return Symbol::new(idx as u32); } - let name = Symbol::new(inner.strings.len() as u32); - // SAFETY: we convert from `&str` to `&[u8]`, clone it into the arena, // and immediately convert the clone back to `&[u8]`, all because there // is no `inner.arena.alloc_str()` method. This is clearly safe. @@ -2122,20 +2112,21 @@ impl Interner { // SAFETY: we can extend the arena allocation to `'static` because we // only access these while the arena is still alive. let string: &'static str = unsafe { &*(string as *const str) }; - inner.strings.push(string); // This second hash table lookup can be avoided by using `RawEntryMut`, // but this code path isn't hot enough for it to be worth it. See // #91445 for details. - inner.names.insert(string, name); - name + let (idx, is_new) = inner.strings.insert_full(string); + debug_assert!(is_new); // due to the get_index_of check above + + Symbol::new(idx as u32) } /// Get the symbol as a string. /// /// [`Symbol::as_str()`] should be used in preference to this function. fn get(&self, symbol: Symbol) -> &str { - self.0.lock().strings[symbol.0.as_usize()] + self.0.lock().strings.get_index(symbol.0.as_usize()).unwrap() } } diff --git a/compiler/rustc_span/src/symbol/tests.rs b/compiler/rustc_span/src/symbol/tests.rs index 0958fce5fee30..4366c5a2c26d2 100644 --- a/compiler/rustc_span/src/symbol/tests.rs +++ b/compiler/rustc_span/src/symbol/tests.rs @@ -4,7 +4,7 @@ use crate::create_default_session_globals_then; #[test] fn interner_tests() { - let i = Interner::default(); + let i = Interner::prefill(&[]); // first one is zero: assert_eq!(i.intern("dog"), Symbol::new(0)); // re-use gets the same entry: