Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 68 additions & 37 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
//! allows bidirectional lookup; i.e., given a value, one can easily find the
//! type, and vice versa.

use std::hash::{Hash, Hasher};
use std::hash::{BuildHasher, Hash, Hasher};
use std::{fmt, str};

use rustc_arena::DroplessArena;
use rustc_data_structures::fx::{FxHashSet, FxIndexSet};
use rustc_data_structures::fx::FxBuildHasher;
use rustc_data_structures::hash_table::{Entry, HashTable};
use rustc_data_structures::stable_hash::{StableCompare, StableHash, StableHashCtxt, StableHasher};
use rustc_data_structures::sync::Lock;
use rustc_data_structures::sync::{Lock, RwLock};
use rustc_macros::{Decodable, Encodable, StableHash, symbols};

use crate::edit_distance::find_best_match_for_name;
Expand Down Expand Up @@ -2703,41 +2704,57 @@ impl StableHash for ByteSymbol {
// string with identical contents (e.g. "foo" and b"foo") are both interned,
// only one copy will be stored and the resulting `Symbol` and `ByteSymbol`
// will have the same index.
pub(crate) struct Interner(Lock<InternerInner>);
pub(crate) struct Interner {
map: RwLock<InternerMap>,
arena: Lock<DroplessArena>,
}

// The `&'static [u8]`s in this type actually point into the arena.
//
// This type is private to prevent accidentally constructing more than one
// `Interner` on the same thread, which makes it easy to mix up `Symbol`s
// between `Interner`s.
struct InternerInner {
arena: DroplessArena,
byte_strs: FxIndexSet<&'static [u8]>,
struct InternerMap {
byte_strs: HashTable<(&'static [u8], u32)>,
ids: Vec<&'static [u8]>,
}

impl Interner {
// These arguments are `&str`, but because of the sharing, we are
// effectively pre-interning all these strings for both `Symbol` and
// `ByteSymbol`.
fn prefill(init: &[&'static str], extra: &[&'static str]) -> Self {
let byte_strs = FxIndexSet::from_iter(
init.iter().copied().chain(extra.iter().copied()).map(|str| str.as_bytes()),
);
let values = init.iter().copied().chain(extra.iter().copied()).map(|str| str.as_bytes());
let (size_hint, _) = values.size_hint();
let mut conflicting_values: Vec<&[u8]> = Vec::new();

let mut byte_strs: HashTable<(&'static [u8], u32)> = HashTable::with_capacity(size_hint);
let hasher = FxBuildHasher::default();
let hasher_func = |x: &(_, _)| hasher.hash_one(x.0);

let mut ids: Vec<&'static [u8]> = Vec::with_capacity(size_hint);

for v in values {
let entry = byte_strs.entry(hasher.hash_one(&v), |&x| x.0 == v, hasher_func);
match entry {
Entry::Occupied(v) => {
conflicting_values.push(v.get().0);
}
Entry::Vacant(view) => {
view.insert((v, ids.len() as u32));
ids.push(v);
}
}
}

// The order in which duplicates are reported is irrelevant.
#[expect(rustc::potential_query_instability)]
if byte_strs.len() != init.len() + extra.len() {
if conflicting_values.len() != 0 {
panic!(
"duplicate symbols in the rustc symbol list and the extra symbols added by the driver: {:?}",
FxHashSet::intersection(
&init.iter().copied().collect(),
&extra.iter().copied().collect(),
)
.collect::<Vec<_>>()
conflicting_values
)
}

Interner(Lock::new(InternerInner { arena: Default::default(), byte_strs }))
Self { map: RwLock::new(InternerMap { byte_strs, ids }), arena: Default::default() }
}

fn intern_str(&self, str: &str) -> Symbol {
Expand All @@ -2750,24 +2767,37 @@ impl Interner {

#[inline]
fn intern_inner(&self, byte_str: &[u8]) -> u32 {
let mut inner = self.0.lock();
if let Some(idx) = inner.byte_strs.get_index_of(byte_str) {
return idx as u32;
let hasher = FxBuildHasher::default();
let hasher_func = |x: &(_, _)| hasher.hash_one(x.0);
let hash_of_byte_str = hasher.hash_one(byte_str);

let map_read_lock = self.map.read();
let entry = map_read_lock.byte_strs.find(hash_of_byte_str, |&x| x.0 == byte_str);
if let Some(&(_, idx)) = entry {
return idx;
}
drop(map_read_lock);

let mut map_write_lock = self.map.write();
let InternerMap { ref mut byte_strs, ref mut ids } = *map_write_lock;
let entry = byte_strs.entry(hash_of_byte_str, |&x| x.0 == byte_str, hasher_func);
match entry {
Entry::Occupied(v) => {
return v.get().1;
}
Entry::Vacant(view) => {
let arena_write_lock = self.arena.lock();
let byte_str: &[u8] = arena_write_lock.alloc_slice(byte_str);

// SAFETY: we can extend the arena allocation to `'static` because we
// only access these while the arena is still alive.
let byte_str: &'static [u8] = unsafe { &*(byte_str as *const [u8]) };
let idx = ids.len() as u32;
view.insert((byte_str, idx));
ids.push(byte_str);
return idx;
}
}

let byte_str: &[u8] = inner.arena.alloc_slice(byte_str);

// SAFETY: we can extend the arena allocation to `'static` because we
// only access these while the arena is still alive.
let byte_str: &'static [u8] = unsafe { &*(byte_str as *const [u8]) };

// This second hash table lookup can be avoided by using `RawEntryMut`,
// but this code path isn't hot enough for it to be worth it. See
// #91445 for details.
let (idx, is_new) = inner.byte_strs.insert_full(byte_str);
debug_assert!(is_new); // due to the get_index_of check above

idx as u32
}

/// Get the symbol as a string.
Expand All @@ -2787,7 +2817,8 @@ impl Interner {
}

fn get_inner(&self, index: usize) -> &[u8] {
self.0.lock().byte_strs.get_index(index).unwrap()
let read_lock = self.map.read();
read_lock.ids[index]
}
}

Expand Down
11 changes: 11 additions & 0 deletions compiler/rustc_span/src/symbol/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,17 @@ fn interner_tests() {
assert_eq!(i.intern_str("dog"), Symbol::new(0));
}

#[test]
fn interner_get() {
let i = Interner::prefill(&["chicken"], &["cow"]);
let dog_idx = i.intern_str("dog"); // 2
let cat_idx = i.intern_str("cat"); // 3
assert_eq!(i.get_str(Symbol::new(0)), "chicken");
assert_eq!(i.get_str(Symbol::new(1)), "cow");
assert_eq!(i.get_str(cat_idx), "cat");
assert_eq!(i.get_str(dog_idx), "dog");
}

#[test]
fn without_first_quote_test() {
create_default_session_globals_then(|| {
Expand Down
Loading