Skip to content

Commit b62dfa6

Browse files
committed
Build the dep-graph reverse index lazily, per DepKind
When a SerializedDepGraph is decoded, it built a fingerprint to index map for every DepKind covering every node. That inverse index is only consulted by node_to_index_opt, which runs for the nodes a session queries directly; the bulk of the graph is reached as edge targets by index and is never looked up by fingerprint, so most of those maps are never read. Replace the eager build with a counting sort that groups node indices into a contiguous range per DepKind, and build the fingerprint map for a kind only the first time a node of that kind is looked up. Decode no longer pays a hash-map insert per node, and kinds that are never looked up never build a map. The on-disk format is unchanged.
1 parent 1ce45a0 commit b62dfa6

2 files changed

Lines changed: 128 additions & 27 deletions

File tree

compiler/rustc_incremental/src/persist/load.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ fn load_dep_graph(sess: &Session) -> LoadResult {
112112
return LoadResult::DataOutOfDate;
113113
}
114114

115-
let prev_graph = SerializedDepGraph::decode(&mut decoder);
115+
let prev_graph = SerializedDepGraph::decode(&mut decoder, &sess.prof);
116116

117117
LoadResult::Ok { prev_graph, prev_work_products }
118118
}

compiler/rustc_middle/src/dep_graph/serialized.rs

Lines changed: 127 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@
4141
4242
use std::cell::RefCell;
4343
use std::cmp::max;
44-
use std::sync::Arc;
4544
use std::sync::atomic::Ordering;
45+
use std::sync::{Arc, OnceLock};
4646
use std::{iter, mem};
4747

4848
use rustc_data_structures::fingerprint::{Fingerprint, PackedFingerprint};
@@ -51,7 +51,7 @@ use rustc_data_structures::outline;
5151
use rustc_data_structures::profiling::SelfProfilerRef;
5252
use rustc_data_structures::sync::{AtomicU64, Lock, WorkerLocal, broadcast};
5353
use rustc_data_structures::unhash::UnhashMap;
54-
use rustc_index::IndexVec;
54+
use rustc_index::{IndexSlice, IndexVec};
5555
use rustc_serialize::opaque::mem_encoder::MemEncoder;
5656
use rustc_serialize::opaque::{FileEncodeResult, FileEncoder, IntEncodedWithFixedSize, MemDecoder};
5757
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
@@ -95,7 +95,7 @@ const DEP_NODE_WIDTH_BITS: usize = DEP_NODE_SIZE / 2;
9595
///
9696
/// There may be unused indices with DepKind::Null in this graph due to batch allocation of
9797
/// indices to threads.
98-
#[derive(Debug, Default)]
98+
#[derive(Default)]
9999
pub struct SerializedDepGraph {
100100
/// The set of all DepNodes in the graph
101101
nodes: IndexVec<SerializedDepNodeIndex, DepNode>,
@@ -113,12 +113,95 @@ pub struct SerializedDepGraph {
113113
/// A flattened list of all edge targets in the graph, stored in the same
114114
/// varint encoding that we use on disk. Edge sources are implicit in edge_list_indices.
115115
edge_list_data: Vec<u8>,
116-
/// For each dep kind, stores a map from key fingerprints back to the index
117-
/// of the corresponding node. This is the inverse of `nodes`.
118-
index: Vec<UnhashMap<PackedFingerprint, SerializedDepNodeIndex>>,
116+
/// The lazily-built inverse of `nodes`: maps a [`DepNode`] back to its
117+
/// [`SerializedDepNodeIndex`] via the node's key fingerprint. See
118+
/// [`LazyNodeIndex`].
119+
reverse_index: LazyNodeIndex,
119120
/// The number of previous compilation sessions. This is used to generate
120121
/// unique anon dep nodes per session.
121122
session_count: u64,
123+
/// Used to time the lazy per-`DepKind` reverse-index build. `None` only for
124+
/// the empty default graph, which is never looked up.
125+
profiler: Option<SelfProfilerRef>,
126+
}
127+
128+
// `SelfProfilerRef` is not `Debug`, so we can't derive this.
129+
impl std::fmt::Debug for SerializedDepGraph {
130+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131+
f.debug_struct("SerializedDepGraph")
132+
.field("nodes", &self.nodes)
133+
.field("value_fingerprints", &self.value_fingerprints)
134+
.field("edge_list_indices", &self.edge_list_indices)
135+
.field("edge_list_data", &self.edge_list_data)
136+
.field("reverse_index", &self.reverse_index)
137+
.field("session_count", &self.session_count)
138+
.finish_non_exhaustive()
139+
}
140+
}
141+
142+
/// The inverse of [`SerializedDepGraph::nodes`], built lazily per [`DepKind`].
143+
///
144+
/// Only few nodes are ever looked up here, and those cluster into a handful of
145+
/// `DepKind`s. Building a map for every kind up front would be wasted work.
146+
#[derive(Debug, Default)]
147+
struct LazyNodeIndex {
148+
/// All (non-`Null`) node indices, grouped into contiguous per-`DepKind`
149+
/// ranges described by `kinds`. For any non-`Null` `DepKind` `k`, all values in
150+
/// `nodes_by_kind[kinds[k].start..][..kinds[k].len]`
151+
/// must be `Some` and have kind `k`.
152+
nodes_by_kind: Vec<Option<SerializedDepNodeIndex>>,
153+
/// For each `DepKind`, the range of `nodes_by_kind` holding its node indices
154+
/// and the lazily-built fingerprint map over that range.
155+
kinds: Vec<LazyKindIndex>,
156+
}
157+
158+
#[derive(Debug, Default)]
159+
struct LazyKindIndex {
160+
/// Offset into `LazyNodeIndex::nodes_by_kind` of this kind's first node.
161+
start: u32,
162+
/// Number of nodes of this kind.
163+
len: u32,
164+
/// `key_fingerprint -> node index`, built from this kind's range on first
165+
/// lookup. Empty kinds (and kinds never looked up) never build a map.
166+
map: OnceLock<UnhashMap<PackedFingerprint, SerializedDepNodeIndex>>,
167+
}
168+
169+
impl LazyKindIndex {
170+
/// Returns this kind's `key_fingerprint -> node index` map.
171+
fn fingerprint_map(
172+
&self,
173+
kind: DepKind,
174+
nodes: &IndexSlice<SerializedDepNodeIndex, DepNode>,
175+
nodes_by_kind: &[Option<SerializedDepNodeIndex>],
176+
profiler: &Option<SelfProfilerRef>,
177+
) -> &UnhashMap<PackedFingerprint, SerializedDepNodeIndex> {
178+
self.map.get_or_init(|| {
179+
let _prof_timer = profiler
180+
.as_ref()
181+
.map(|p| p.generic_activity("incr_comp_load_dep_graph_reverse_index"));
182+
let range = (self.start as usize)..(self.start as usize + self.len as usize);
183+
let mut map =
184+
UnhashMap::with_capacity_and_hasher(self.len as usize, Default::default());
185+
for &idx in &nodes_by_kind[range] {
186+
let idx = idx.expect("counting sort fills every slot of a kind's range");
187+
let node = nodes[idx];
188+
debug_assert_eq!(node.kind, kind);
189+
if map.insert(node.key_fingerprint, idx).is_some()
190+
// Side effect nodes can legitimately share a fingerprint.
191+
&& node.kind != DepKind::SideEffect
192+
{
193+
panic!(
194+
"Error: A dep graph node ({kind:?}) does not have an unique index. \
195+
Running a clean build on a nightly compiler with \
196+
`-Z incremental-verify-ich` can help narrow down the issue for reporting. \
197+
A clean build may also work around the issue.\n
198+
DepNode: {node:?}"
199+
)
200+
}
201+
}
202+
map
203+
})
204+
}
122205
}
123206

124207
impl SerializedDepGraph {
@@ -151,7 +234,14 @@ impl SerializedDepGraph {
151234

152235
#[inline]
153236
pub fn node_to_index_opt(&self, dep_node: &DepNode) -> Option<SerializedDepNodeIndex> {
154-
self.index.get(dep_node.kind.as_usize())?.get(&dep_node.key_fingerprint).copied()
237+
let kind = self.reverse_index.kinds.get(dep_node.kind.as_usize())?;
238+
let map = kind.fingerprint_map(
239+
dep_node.kind,
240+
&self.nodes,
241+
&self.reverse_index.nodes_by_kind,
242+
&self.profiler,
243+
);
244+
map.get(&dep_node.key_fingerprint).copied()
155245
}
156246

157247
#[inline]
@@ -206,8 +296,8 @@ fn mask(bits: usize) -> usize {
206296
}
207297

208298
impl SerializedDepGraph {
209-
#[instrument(level = "debug", skip(d))]
210-
pub fn decode(d: &mut MemDecoder<'_>) -> Arc<SerializedDepGraph> {
299+
#[instrument(level = "debug", skip(d, profiler))]
300+
pub fn decode(d: &mut MemDecoder<'_>, profiler: &SelfProfilerRef) -> Arc<SerializedDepGraph> {
211301
// The last 16 bytes are the node count and edge count.
212302
debug!("position: {:?}", d.position());
213303

@@ -286,36 +376,47 @@ impl SerializedDepGraph {
286376
// end of the array. This padding ensure it doesn't.
287377
edge_list_data.extend(&[0u8; DEP_NODE_PAD]);
288378

289-
// Read the number of each dep kind and use it to create an hash map with a suitable size.
290-
let mut index: Vec<_> = (0..(DepKind::MAX + 1))
291-
.map(|_| UnhashMap::with_capacity_and_hasher(d.read_u32() as usize, Default::default()))
292-
.collect();
379+
// Read the number of nodes of each dep kind, and perform
380+
// counting sort for `LazyNodeIndex`.
381+
let mut kinds = Vec::with_capacity(DepKind::MAX as usize + 1);
382+
let mut offset = 0u32;
383+
for _ in 0..(DepKind::MAX + 1) {
384+
let len = d.read_u32();
385+
kinds.push(LazyKindIndex { start: offset, len, map: OnceLock::new() });
386+
offset += len;
387+
}
388+
debug_assert_eq!(offset as usize, node_count);
293389

294390
let session_count = d.read_u64();
295391

392+
// Counting sort: place each node index into its kind's range. `fill[k]`
393+
// points at the next free slot in kind `k`'s range, so a kind's nodes end
394+
// up contiguous. Slots start as `None` and are each filled exactly once
395+
// (the counts sum to the number of non-`Null` nodes).
396+
let mut nodes_by_kind = vec![None; node_count];
397+
let mut fill: Vec<u32> = kinds.iter().map(|k| k.start).collect();
296398
for (idx, node) in nodes.iter_enumerated() {
297-
if index[node.kind.as_usize()].insert(node.key_fingerprint, idx).is_some() {
298-
// Empty nodes and side effect nodes can have duplicates
299-
if node.kind != DepKind::Null && node.kind != DepKind::SideEffect {
300-
let kind = node.kind;
301-
panic!(
302-
"Error: A dep graph node ({kind:?}) does not have an unique index. \
303-
Running a clean build on a nightly compiler with \
304-
`-Z incremental-verify-ich` can help narrow down the issue for reporting. \
305-
A clean build may also work around the issue.\n
306-
DepNode: {node:?}"
307-
)
308-
}
399+
// Unused indices from batch allocation stay `Null`; they carry no
400+
// encoded node and are never looked up by fingerprint, so skip them.
401+
if node.kind == DepKind::Null {
402+
continue;
309403
}
404+
let k = node.kind.as_usize();
405+
nodes_by_kind[fill[k] as usize] = Some(idx);
406+
fill[k] += 1;
310407
}
408+
// Each kind's range was filled exactly to its end.
409+
debug_assert!(kinds.iter().zip(&fill).all(|(k, &f)| f == k.start + k.len));
410+
let reverse_index = LazyNodeIndex { nodes_by_kind, kinds };
311411

312412
Arc::new(SerializedDepGraph {
313413
nodes,
314414
value_fingerprints,
315415
edge_list_indices,
316416
edge_list_data,
317-
index,
417+
reverse_index,
318418
session_count,
419+
profiler: Some(profiler.clone()),
319420
})
320421
}
321422
}

0 commit comments

Comments
 (0)