Skip to content

Commit 0f950ce

Browse files
Rollup merge of #156413 - fmease:rustdoc-ltd-improvs, r=GuillaumeGomez
rustdoc: Correctness & perf improvements to link-to-definition Rewrite the way we resolve type-dependent paths (incl. method calls) in `SpanMapVisitor`. Instead of trying to (re)find the enclosing body owner each time we encounter such a node, potentially calling `typeck_body` several times per body, keep track of the "active" `BodyId` in the visitor and cache the corresponding `TypeckResults`. The "active" `BodyId` is updated in `visit_nested_body` (add) and in `visit_item` (remove). This fixes a class of ICEs where we tried to look up the definition of type-relative paths located in non-body items nested in bodies, in the overarching body which is never correct. rustdoc@main has a hack to fix this issue for paths specifically found in assoc types in impls. Fixes #147882. Fixes #147057. Fixes #155327. Fixes #149089. Fixes #150153. Fixes #156418. Supersedes #147911. --- `--generate-link-to-definition` is not enabled by default, so we can't perf this as is. Instead, I'm benching them against PR #156348 in PR #156355 using the same parent commit for the try-builds. For context, LTD is *extremely* costly as the perf run for the baseline PR shows: #156348 (comment). "Absolute results": #156355 (comment). "Relative results": [https://perf.rust-lang.org/compare.html?...](https://perf.rust-lang.org/compare.html?start=68c2bff6cb08a87e59246064a6a9f37098e22c3f&end=78d15de5525370011388c8f63847e873c4de14ed&stat=instructions%3Au). Excerpt of the relative results: Primary: Benchmark | Profile | Scenario | Backend | Target | % Change | Significance Threshold | Significance Factor ---|---|---|---|---|---|---|--- nalgebra-0.33.0 | doc | full | llvm | x64 | -5.82% | 0.20% | 29.09x diesel-2.2.10 | doc | full | llvm | x64 | -3.41% | 0.20% | 17.05x image-0.25.6 | doc | full | llvm | x64 | -2.03% | 0.20% | 10.16x regex-automata-0.4.8 | doc | full | llvm | x64 | -1.94% | 0.20% | 9.71x cargo-0.87.1 | doc | full | llvm | x64 | -1.64% | 0.20% | 8.21x clap_derive-4.5.32 | doc | full | llvm | x64 | -1.63% | 0.20% | 8.14x cranelift-codegen-0.119.0 | doc | full | llvm | x64 | -1.48% | 0.20% | 7.41x syn-2.0.101 | doc | full | llvm | x64 | -1.07% | 0.20% | 5.35x ripgrep-14.1.1 | doc | full | llvm | x64 | -0.96% | 0.20% | 4.78x stm32f4-0.15.1 | doc | full | llvm | x64 | -0.80% | 0.20% | 4.01x serde-1.0.219 | doc | full | llvm | x64 | -0.67% | 0.20% | 3.33x hyper-1.6.0 | doc | full | llvm | x64 | -0.66% | 0.20% | 3.30x eza-0.21.2 | doc | full | llvm | x64 | -0.55% | 0.20% | 2.74x unicode-normalization-0.1.24 | doc | full | llvm | x64 | -0.49% | 0.20% | 2.45x serde_derive-1.0.219 | doc | full | llvm | x64 | -0.45% | 0.20% | 2.26x typenum-1.18.0 | doc | full | llvm | x64 | -0.42% | 0.20% | 2.08x bitmaps-3.2.1 | doc | full | llvm | x64 | -0.32% | 0.20% | 1.61x Secondary: Benchmark | Profile | Scenario | Backend | Target | % Change | Significance Threshold | Significance Factor ---|---|---|---|---|---|---|--- deep-vector | doc | full | llvm | x64 | -23.96% | 0.20% | 119.78x large-workspace | doc | full | llvm | x64 | -2.16% | 0.20% | 10.81x deeply-nested-multi | doc | full | llvm | x64 | -1.35% | 0.20% | 6.75x serde-1.0.219-threads4 | doc | full | llvm | x64 | -0.67% | 0.20% | 3.33x wg-grammar | doc | full | llvm | x64 | -0.32% | 0.20% | 1.62x tt-muncher | opt | full | llvm | x64 | -0.31% | 0.20% | 1.56x nalgebra-0.33.0: Query/Function | Time (%) | Time (s) | Time delta | Executions | Executions delta | Hits | Hits delta ---|---|---|---|---|---|---|--- Totals | 109.57% | 2.185 | -0.138 (-5.9%) | 1614146 | -5602 (-0.3%) | 16983840 | -1436455 (-7.8%) typeck_root | 15.75% | 0.344 | -0.105 (-23.4%) | 1704 | -954 (-35.9%) | 393 | -6758 (-94.5%) ...|...|...|...|...|...|...|...
2 parents 5d63c29 + e205784 commit 0f950ce

8 files changed

Lines changed: 272 additions & 182 deletions

File tree

src/doc/rustdoc/src/unstable-features.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,6 +653,15 @@ add the `--scrape-tests` flag.
653653
This flag enables the generation of links in the source code pages which allow the reader
654654
to jump to a type definition.
655655

656+
> [!WARNING]
657+
> In very specific scenarios, enabling this feature may lead to your program getting rejected if you
658+
> rely on rustdoc intentionally not running all semantic analysis passes on function bodies to aid
659+
> with documenting `cfg`-conditional items.
660+
>
661+
> More concretely, rustdoc may choose to type-check bodies if they contain type-dependent paths
662+
> including method calls. This may result in name resolution and type errors getting reported that
663+
> rustdoc would usually suppress.
664+
656665
### `--test-builder`: `rustc`-like program to build tests
657666

658667
* Tracking issue: [#102981](https://github.com/rust-lang/rust/issues/102981)

src/librustdoc/html/render/span_map.rs

Lines changed: 114 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
use std::path::{Path, PathBuf};
22

33
use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
4+
use rustc_hir as hir;
45
use rustc_hir::def::{DefKind, Res};
5-
use rustc_hir::def_id::{DefId, LOCAL_CRATE, LocalDefId};
6+
use rustc_hir::def_id::{DefId, LOCAL_CRATE};
67
use rustc_hir::intravisit::{self, Visitor, VisitorExt};
78
use rustc_hir::{ExprKind, HirId, Item, ItemKind, Mod, Node, QPath};
89
use rustc_middle::hir::nested_filter;
9-
use rustc_middle::ty::TyCtxt;
10+
use rustc_middle::ty::{self, TyCtxt};
1011
use rustc_span::{BytePos, ExpnKind};
1112

1213
use crate::clean::{self, PrimitiveType, rustc_span};
@@ -82,7 +83,8 @@ pub(crate) fn collect_spans_and_sources(
8283
generate_link_to_definition: bool,
8384
) -> (FxIndexMap<PathBuf, String>, FxHashMap<Span, LinkFromSrc>) {
8485
if include_sources {
85-
let mut visitor = SpanMapVisitor { tcx, matches: FxHashMap::default() };
86+
let mut visitor =
87+
SpanMapVisitor { tcx, maybe_typeck_results: None, matches: FxHashMap::default() };
8688

8789
if generate_link_to_definition {
8890
tcx.hir_walk_toplevel_module(&mut visitor);
@@ -96,49 +98,63 @@ pub(crate) fn collect_spans_and_sources(
9698

9799
struct SpanMapVisitor<'tcx> {
98100
pub(crate) tcx: TyCtxt<'tcx>,
101+
pub(crate) maybe_typeck_results: Option<LazyTypeckResults<'tcx>>,
99102
pub(crate) matches: FxHashMap<Span, LinkFromSrc>,
100103
}
101104

102-
impl SpanMapVisitor<'_> {
105+
impl<'tcx> SpanMapVisitor<'tcx> {
106+
/// Returns the typeck results of the current body if we're in one.
107+
///
108+
/// This will typeck the body if it hasn't been already. Since rustdoc intentionally doesn't run
109+
/// all semantic analysis passes on function bodies at the time of writing, this can lead to us
110+
/// "suddenly" rejecting the user's code under `--generate-link-to-definition` while accepting
111+
/// it if that flag isn't passed! So use this method sparingly and think about the consequences
112+
/// including performance!
113+
///
114+
/// This behavior is documented in the rustdoc book. Ideally, it wouldn't be that way but no
115+
/// good solution has been found so far. Don't think about adding some sort of flag to rustc to
116+
/// suppress diagnostic emission that would be unsound wrt. `ErrorGuaranteed`[^1] and generally
117+
/// be quite hacky!
118+
///
119+
/// [^1]: Historical context:
120+
/// <https://github.com/rust-lang/rust/issues/69426#issuecomment-1019412352>.
121+
fn maybe_typeck_results(&mut self) -> Option<&'tcx ty::TypeckResults<'tcx>> {
122+
let results = self.maybe_typeck_results.as_mut()?;
123+
let results = results.cache.get_or_insert_with(|| self.tcx.typeck_body(results.body_id));
124+
Some(results)
125+
}
126+
127+
fn link_for_def(&self, def_id: DefId) -> LinkFromSrc {
128+
if def_id.is_local() {
129+
LinkFromSrc::Local(rustc_span(def_id, self.tcx))
130+
} else {
131+
LinkFromSrc::External(def_id)
132+
}
133+
}
134+
103135
/// This function is where we handle `hir::Path` elements and add them into the "span map".
104-
fn handle_path(&mut self, path: &rustc_hir::Path<'_>, only_use_last_segment: bool) {
136+
fn handle_path(&mut self, path: &hir::Path<'_>, only_use_last_segment: bool) {
105137
match path.res {
106-
// FIXME: For now, we handle `DefKind` if it's not a `DefKind::TyParam`.
107-
// Would be nice to support them too alongside the other `DefKind`
108-
// (such as primitive types!).
109-
Res::Def(kind, def_id) if kind != DefKind::TyParam => {
110-
let link = if def_id.as_local().is_some() {
111-
LinkFromSrc::Local(rustc_span(def_id, self.tcx))
112-
} else {
113-
LinkFromSrc::External(def_id)
114-
};
138+
// FIXME: Properly support type parameters. Note they resolve just fine. The issue is
139+
// that our highlighter would then also linkify their *definition site* for some reason
140+
// linking them to themselves. Const parameters don't exhibit this issue.
141+
Res::Def(DefKind::TyParam, _) => {}
142+
Res::Def(_, def_id) => {
143+
// The segments can be empty for `use *;` in a non-crate-root scope in Rust 2015.
144+
let span = path.segments.last().map_or(path.span, |seg| seg.ident.span);
115145
// In case the path ends with generics, we remove them from the span.
116-
let span = if only_use_last_segment
117-
&& let Some(path_span) = path.segments.last().map(|segment| segment.ident.span)
118-
{
119-
path_span
146+
let span = if only_use_last_segment {
147+
span
120148
} else {
121-
path.segments
122-
.last()
123-
.map(|last| {
124-
// In `use` statements, the included item is not in the path segments.
125-
// However, it doesn't matter because you can't have generics on `use`
126-
// statements.
127-
if path.span.contains(last.ident.span) {
128-
path.span.with_hi(last.ident.span.hi())
129-
} else {
130-
path.span
131-
}
132-
})
133-
.unwrap_or(path.span)
149+
// In `use` statements, the included item is not in the path segments. However,
150+
// it doesn't matter because you can't have generics on `use` statements.
151+
if path.span.contains(span) { path.span.with_hi(span.hi()) } else { path.span }
134152
};
135-
self.matches.insert(span.into(), link);
153+
self.matches.insert(span.into(), self.link_for_def(def_id));
136154
}
137155
Res::Local(_) if let Some(span) = self.tcx.hir_res_span(path.res) => {
138-
let path_span = if only_use_last_segment
139-
&& let Some(path_span) = path.segments.last().map(|segment| segment.ident.span)
140-
{
141-
path_span
156+
let path_span = if only_use_last_segment {
157+
path.segments.last().unwrap().ident.span
142158
} else {
143159
path.span
144160
};
@@ -149,7 +165,6 @@ impl SpanMapVisitor<'_> {
149165
self.matches
150166
.insert(path.span.into(), LinkFromSrc::Primitive(PrimitiveType::from(p)));
151167
}
152-
Res::Err => {}
153168
_ => {}
154169
}
155170
}
@@ -216,43 +231,6 @@ impl SpanMapVisitor<'_> {
216231
self.matches.insert(new_span.into(), link_from_src);
217232
true
218233
}
219-
220-
fn infer_id(&mut self, hir_id: HirId, expr_hir_id: Option<HirId>, span: Span) {
221-
let tcx = self.tcx;
222-
let body_id = tcx.hir_enclosing_body_owner(hir_id);
223-
// FIXME: this is showing error messages for parts of the code that are not
224-
// compiled (because of cfg)!
225-
//
226-
// See discussion in https://github.com/rust-lang/rust/issues/69426#issuecomment-1019412352
227-
let typeck_results = tcx.typeck_body(tcx.hir_body_owned_by(body_id).id());
228-
// Interestingly enough, for method calls, we need the whole expression whereas for static
229-
// method/function calls, we need the call expression specifically.
230-
if let Some(def_id) = typeck_results.type_dependent_def_id(expr_hir_id.unwrap_or(hir_id)) {
231-
let link = if def_id.as_local().is_some() {
232-
LinkFromSrc::Local(rustc_span(def_id, tcx))
233-
} else {
234-
LinkFromSrc::External(def_id)
235-
};
236-
self.matches.insert(span, link);
237-
}
238-
}
239-
}
240-
241-
// This is a reimplementation of `hir_enclosing_body_owner` which allows to fail without
242-
// panicking.
243-
fn hir_enclosing_body_owner(tcx: TyCtxt<'_>, hir_id: HirId) -> Option<LocalDefId> {
244-
for (_, node) in tcx.hir_parent_iter(hir_id) {
245-
// FIXME: associated type impl items don't have an associated body, so we don't handle
246-
// them currently.
247-
if let Node::ImplItem(impl_item) = node
248-
&& matches!(impl_item.kind, rustc_hir::ImplItemKind::Type(_))
249-
{
250-
return None;
251-
} else if let Some((def_id, _)) = node.associated_body() {
252-
return Some(def_id);
253-
}
254-
}
255-
None
256234
}
257235

258236
impl<'tcx> Visitor<'tcx> for SpanMapVisitor<'tcx> {
@@ -262,7 +240,24 @@ impl<'tcx> Visitor<'tcx> for SpanMapVisitor<'tcx> {
262240
self.tcx
263241
}
264242

265-
fn visit_path(&mut self, path: &rustc_hir::Path<'tcx>, _id: HirId) {
243+
fn visit_nested_body(&mut self, body_id: hir::BodyId) -> Self::Result {
244+
let maybe_typeck_results =
245+
self.maybe_typeck_results.replace(LazyTypeckResults { body_id, cache: None });
246+
self.visit_body(self.tcx.hir_body(body_id));
247+
self.maybe_typeck_results = maybe_typeck_results;
248+
}
249+
250+
fn visit_anon_const(&mut self, ct: &'tcx hir::AnonConst) {
251+
// FIXME: Typeck'ing anon consts leads to ICEs in rustc if the parent body wasn't typeck'ed
252+
// yet. See #156418. Figure out what the best and proper solution for this is. Until
253+
// then, let's prevent `typeck` from being called on anon consts by not setting
254+
// `maybe_typeck_results` to `Some(_)`.
255+
let maybe_typeck_results = self.maybe_typeck_results.take();
256+
self.visit_body(self.tcx.hir_body(ct.body));
257+
self.maybe_typeck_results = maybe_typeck_results;
258+
}
259+
260+
fn visit_path(&mut self, path: &hir::Path<'tcx>, _id: HirId) {
266261
if self.handle_macro(path.span) {
267262
return;
268263
}
@@ -272,25 +267,32 @@ impl<'tcx> Visitor<'tcx> for SpanMapVisitor<'tcx> {
272267

273268
fn visit_qpath(&mut self, qpath: &QPath<'tcx>, id: HirId, _span: rustc_span::Span) {
274269
match *qpath {
275-
QPath::TypeRelative(qself, path) => {
276-
if matches!(path.res, Res::Err) {
277-
let tcx = self.tcx;
278-
if let Some(body_id) = hir_enclosing_body_owner(tcx, id) {
279-
let typeck_results = tcx.typeck_body(tcx.hir_body_owned_by(body_id).id());
280-
let path = rustc_hir::Path {
281-
// We change the span to not include parens.
282-
span: path.ident.span,
283-
res: typeck_results.qpath_res(qpath, id),
284-
segments: &[],
285-
};
286-
self.handle_path(&path, false);
287-
}
288-
} else {
289-
self.infer_id(path.hir_id, Some(id), path.ident.span.into());
270+
QPath::TypeRelative(qself, segment) => {
271+
// FIXME: This doesn't work for paths in *types* since HIR ty lowering currently
272+
// doesn't write back the resolution of type-relative paths. Updating it to
273+
// do so should be a simple fix.
274+
// FIXME: This obviously doesn't support item signatures / non-bodies. Sadly, rustc
275+
// currently doesn't keep around that information & thus can't provide an API
276+
// for it.
277+
// `ItemCtxt`s would need a place to write back the resolution of type-
278+
// dependent definitions. Ideally there was some sort of query keyed on the
279+
// `LocalDefId` of the owning item that returns some table with which we can
280+
// map the `HirId` to a `DefId`.
281+
// Of course, we could re-HIR-ty-lower such paths *here* if we were to extend
282+
// the public API of HIR analysis. However, I strongly advise against it as
283+
// it would be too much of a hack.
284+
if let Some(typeck_results) = self.maybe_typeck_results() {
285+
let path = hir::Path {
286+
// We change the span to not include parens.
287+
span: segment.ident.span,
288+
res: typeck_results.qpath_res(qpath, id),
289+
segments: std::slice::from_ref(segment),
290+
};
291+
self.handle_path(&path, false);
290292
}
291293

292294
rustc_ast::visit::try_visit!(self.visit_ty_unambig(qself));
293-
self.visit_path_segment(path);
295+
self.visit_path_segment(segment);
294296
}
295297
QPath::Resolved(maybe_qself, path) => {
296298
self.handle_path(path, true);
@@ -323,23 +325,27 @@ impl<'tcx> Visitor<'tcx> for SpanMapVisitor<'tcx> {
323325
intravisit::walk_mod(self, m);
324326
}
325327

326-
fn visit_expr(&mut self, expr: &'tcx rustc_hir::Expr<'tcx>) {
328+
fn visit_expr(&mut self, expr: &'tcx hir::Expr<'tcx>) {
327329
match expr.kind {
328330
ExprKind::MethodCall(segment, ..) => {
329-
self.infer_id(segment.hir_id, Some(expr.hir_id), segment.ident.span.into())
330-
}
331-
ExprKind::Call(call, ..) => self.infer_id(call.hir_id, None, call.span.into()),
332-
_ => {
333-
if self.handle_macro(expr.span) {
334-
// We don't want to go deeper into the macro.
335-
return;
331+
if let Some(typeck_results) = self.maybe_typeck_results()
332+
&& let Some(def_id) = typeck_results.type_dependent_def_id(expr.hir_id)
333+
{
334+
self.matches.insert(segment.ident.span.into(), self.link_for_def(def_id));
336335
}
337336
}
337+
// We don't want to go deeper into the macro.
338+
_ if self.handle_macro(expr.span) => return,
339+
_ => {}
338340
}
339341
intravisit::walk_expr(self, expr);
340342
}
341343

342344
fn visit_item(&mut self, item: &'tcx Item<'tcx>) {
345+
// We're no longer in a body since we've crossed an item boundary.
346+
// Temporarily take away the typeck results which are only valid in bodies.
347+
let maybe_typeck_results = self.maybe_typeck_results.take();
348+
343349
match item.kind {
344350
ItemKind::Static(..)
345351
| ItemKind::Const(..)
@@ -359,6 +365,15 @@ impl<'tcx> Visitor<'tcx> for SpanMapVisitor<'tcx> {
359365
// We already have "visit_mod" above so no need to check it here.
360366
| ItemKind::Mod(..) => {}
361367
}
368+
362369
intravisit::walk_item(self, item);
370+
371+
self.maybe_typeck_results = maybe_typeck_results;
363372
}
364373
}
374+
375+
/// Lazily computed & cached [`ty::TypeckResults`].
376+
struct LazyTypeckResults<'tcx> {
377+
body_id: hir::BodyId,
378+
cache: Option<&'tcx ty::TypeckResults<'tcx>>,
379+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Like test `assoc-items.rs` but now utilizing unstable features.
2+
// FIXME: Make use of (m)GCA assoc consts once they no longer ICE!
3+
//@ compile-flags: -Zunstable-options --generate-link-to-definition
4+
#![feature(return_type_notation)]
5+
6+
//@ has 'src/assoc_items_extra/assoc-items-extra.rs.html'
7+
8+
trait Trait0 {
9+
fn fn0() -> impl Sized;
10+
fn fn1() -> impl Sized;
11+
}
12+
13+
fn item<T: Trait0>()
14+
where
15+
//@ has - '//a[@href="#9"]' 'fn0'
16+
<T as Trait0>::fn0(..): Copy, // Item, AssocFn, Resolved
17+
// FIXME: Support this:
18+
//@ !has - '//a[@href="#10"]' 'fn1'
19+
T::fn1(..): Copy, // Item, AssocFn, TypeRelative
20+
{}

0 commit comments

Comments
 (0)