Skip to content

Commit e66b004

Browse files
committed
Auto merge of #157252 - heinwol:symbol-Interner-double-hashing, r=<try>
Rewrite `rustc_span::symbol::Interner` to avoid double hashing
2 parents 4804ad7 + 5c67961 commit e66b004

2 files changed

Lines changed: 65 additions & 34 deletions

File tree

compiler/rustc_span/src/symbol.rs

Lines changed: 54 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22
//! allows bidirectional lookup; i.e., given a value, one can easily find the
33
//! type, and vice versa.
44
5-
use std::hash::{Hash, Hasher};
5+
use std::hash::{BuildHasher, Hash, Hasher};
66
use std::{fmt, str};
77

88
use rustc_arena::DroplessArena;
9-
use rustc_data_structures::fx::{FxHashSet, FxIndexSet};
9+
use rustc_data_structures::fx::FxBuildHasher;
10+
use rustc_data_structures::hash_table::{Entry, HashTable};
1011
use rustc_data_structures::stable_hash::{StableCompare, StableHash, StableHashCtxt, StableHasher};
1112
use rustc_data_structures::sync::Lock;
1213
use rustc_macros::{Decodable, Encodable, StableHash, symbols};
@@ -2720,32 +2721,46 @@ pub(crate) struct Interner(Lock<InternerInner>);
27202721
// between `Interner`s.
27212722
struct InternerInner {
27222723
arena: DroplessArena,
2723-
byte_strs: FxIndexSet<&'static [u8]>,
2724+
byte_strs: HashTable<(&'static [u8], u32)>,
2725+
ids: Vec<&'static [u8]>,
27242726
}
27252727

27262728
impl Interner {
27272729
// These arguments are `&str`, but because of the sharing, we are
27282730
// effectively pre-interning all these strings for both `Symbol` and
27292731
// `ByteSymbol`.
27302732
fn prefill(init: &[&'static str], extra: &[&'static str]) -> Self {
2731-
let byte_strs = FxIndexSet::from_iter(
2732-
init.iter().copied().chain(extra.iter().copied()).map(|str| str.as_bytes()),
2733-
);
2733+
let values = init.iter().copied().chain(extra.iter().copied()).map(|str| str.as_bytes());
2734+
let (size_hint, _) = values.size_hint();
2735+
let mut conflicting_values: Vec<&[u8]> = Vec::new();
27342736

2735-
// The order in which duplicates are reported is irrelevant.
2736-
#[expect(rustc::potential_query_instability)]
2737-
if byte_strs.len() != init.len() + extra.len() {
2737+
let mut byte_strs: HashTable<(&'static [u8], u32)> = HashTable::with_capacity(size_hint);
2738+
let hasher = FxBuildHasher::default();
2739+
let hasher_func = |x: &(_, _)| hasher.hash_one(x.0);
2740+
2741+
let mut ids: Vec<&'static [u8]> = Vec::with_capacity(size_hint);
2742+
2743+
for v in values {
2744+
let entry = byte_strs.entry(hasher.hash_one(&v), |&x| x.0 == v, hasher_func);
2745+
match entry {
2746+
Entry::Occupied(v) => {
2747+
conflicting_values.push(v.get().0);
2748+
}
2749+
Entry::Vacant(view) => {
2750+
view.insert((v, ids.len() as u32));
2751+
ids.push(v);
2752+
}
2753+
}
2754+
}
2755+
2756+
if conflicting_values.len() != 0 {
27382757
panic!(
27392758
"duplicate symbols in the rustc symbol list and the extra symbols added by the driver: {:?}",
2740-
FxHashSet::intersection(
2741-
&init.iter().copied().collect(),
2742-
&extra.iter().copied().collect(),
2743-
)
2744-
.collect::<Vec<_>>()
2759+
conflicting_values
27452760
)
27462761
}
27472762

2748-
Interner(Lock::new(InternerInner { arena: Default::default(), byte_strs }))
2763+
Interner(Lock::new(InternerInner { arena: Default::default(), byte_strs, ids }))
27492764
}
27502765

27512766
fn intern_str(&self, str: &str) -> Symbol {
@@ -2758,24 +2773,29 @@ impl Interner {
27582773

27592774
#[inline]
27602775
fn intern_inner(&self, byte_str: &[u8]) -> u32 {
2761-
let mut inner = self.0.lock();
2762-
if let Some(idx) = inner.byte_strs.get_index_of(byte_str) {
2763-
return idx as u32;
2764-
}
2765-
2766-
let byte_str: &[u8] = inner.arena.alloc_slice(byte_str);
2767-
2768-
// SAFETY: we can extend the arena allocation to `'static` because we
2769-
// only access these while the arena is still alive.
2770-
let byte_str: &'static [u8] = unsafe { &*(byte_str as *const [u8]) };
2771-
2772-
// This second hash table lookup can be avoided by using `RawEntryMut`,
2773-
// but this code path isn't hot enough for it to be worth it. See
2774-
// #91445 for details.
2775-
let (idx, is_new) = inner.byte_strs.insert_full(byte_str);
2776-
debug_assert!(is_new); // due to the get_index_of check above
2777-
2778-
idx as u32
2776+
let hasher = FxBuildHasher::default();
2777+
let hasher_func = |x: &(_, _)| hasher.hash_one(x.0);
2778+
let hash_of_byte_str = hasher.hash_one(byte_str);
2779+
2780+
self.0.with_lock(|inner| {
2781+
let entry = inner.byte_strs.entry(hash_of_byte_str, |&x| x.0 == byte_str, hasher_func);
2782+
match entry {
2783+
Entry::Occupied(v) => {
2784+
return v.get().1;
2785+
}
2786+
Entry::Vacant(view) => {
2787+
let byte_str: &[u8] = inner.arena.alloc_slice(byte_str);
2788+
2789+
// SAFETY: we can extend the arena allocation to `'static` because we
2790+
// only access these while the arena is still alive.
2791+
let byte_str: &'static [u8] = unsafe { &*(byte_str as *const [u8]) };
2792+
let idx = inner.ids.len() as u32;
2793+
view.insert((byte_str, idx));
2794+
inner.ids.push(byte_str);
2795+
return idx;
2796+
}
2797+
};
2798+
})
27792799
}
27802800

27812801
/// Get the symbol as a string.
@@ -2795,7 +2815,7 @@ impl Interner {
27952815
}
27962816

27972817
fn get_inner(&self, index: usize) -> &[u8] {
2798-
self.0.lock().byte_strs.get_index(index).unwrap()
2818+
self.0.with_lock(|inner| inner.ids[index])
27992819
}
28002820
}
28012821

compiler/rustc_span/src/symbol/tests.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,17 @@ fn interner_tests() {
1515
assert_eq!(i.intern_str("dog"), Symbol::new(0));
1616
}
1717

18+
#[test]
19+
fn interner_get() {
20+
let i = Interner::prefill(&["chicken"], &["cow"]);
21+
let dog_idx = i.intern_str("dog"); // 2
22+
let cat_idx = i.intern_str("cat"); // 3
23+
assert_eq!(i.get_str(Symbol::new(0)), "chicken");
24+
assert_eq!(i.get_str(Symbol::new(1)), "cow");
25+
assert_eq!(i.get_str(cat_idx), "cat");
26+
assert_eq!(i.get_str(dog_idx), "dog");
27+
}
28+
1829
#[test]
1930
fn without_first_quote_test() {
2031
create_default_session_globals_then(|| {

0 commit comments

Comments
 (0)