Skip to content

Commit 44860d3

Browse files
committed
Auto merge of #152277 - gurry:114865-ice-format-args, r=chenyukang
Validate source snippet when format input is raw string Fixes #114865 The issue occurred because the user's proc macro respanned the format arg to an unrelated multi-byte string and we ICE'd by landing in the middle of a multi-byte char. This PR adds validation that prevents the parser from trying to walk such obviously wrong snippets. Such validation already existed for non-raw strings. This PR adds it for raw strings as well.
2 parents a3e96d8 + 43c78ad commit 44860d3

4 files changed

Lines changed: 147 additions & 11 deletions

File tree

compiler/rustc_parse_format/src/lib.rs

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -310,24 +310,49 @@ impl<'input> Parser<'input> {
310310

311311
let (is_source_literal, end_of_snippet, pre_input_vec) = if let Some(snippet) = snippet {
312312
if let Some(nr_hashes) = style {
313-
// snippet is a raw string, which starts with 'r', a number of hashes, and a quote
314-
// and ends with a quote and the same number of hashes
315-
(true, snippet.len() - nr_hashes - 1, vec![])
313+
// snippet is a raw string
314+
315+
// validate snippet because a proc macro may have
316+
// respanned it to something completely different (fixes #114865)
317+
let prefix_len = nr_hashes + 2; // r + hashes + opening "
318+
let suffix_len = nr_hashes + 1; // closing " + hashes
319+
let snippet_bytes = snippet.as_bytes();
320+
let content_end = snippet.len() - suffix_len;
321+
if snippet.len() >= prefix_len + suffix_len // is sufficiently long
322+
&& snippet_bytes[0] == b'r'
323+
&& snippet_bytes[1..1 + nr_hashes].iter().all(|&c| c == b'#')
324+
&& snippet_bytes[1 + nr_hashes] == b'"'
325+
&& snippet_bytes[content_end] == b'"'
326+
&& snippet_bytes[content_end + 1..].iter().all(|&c| c == b'#')
327+
{
328+
let snippet_without_quotes = &snippet[prefix_len..content_end];
329+
let input_without_newline =
330+
if appended_newline { &input[..input.len() - 1] } else { input };
331+
if snippet_without_quotes == input_without_newline {
332+
(true, snippet.len() - suffix_len, vec![])
333+
} else {
334+
(false, snippet.len(), vec![])
335+
}
336+
} else {
337+
(false, snippet.len(), vec![])
338+
}
316339
} else {
317340
// snippet is not a raw string
318341
if snippet.starts_with('"') {
319342
// snippet looks like an ordinary string literal
320343
// check whether it is the escaped version of input
321-
let without_quotes = &snippet[1..snippet.len() - 1];
344+
let snippet_without_quotes = &snippet[1..snippet.len() - 1];
322345
let (mut ok, mut vec) = (true, vec![]);
323346
let mut chars = input.chars();
324-
rustc_literal_escaper::unescape_str(without_quotes, |range, res| match res {
325-
Ok(ch) if ok && chars.next().is_some_and(|c| ch == c) => {
326-
vec.push((range, ch));
327-
}
328-
_ => {
329-
ok = false;
330-
vec = vec![];
347+
rustc_literal_escaper::unescape_str(snippet_without_quotes, |range, res| {
348+
match res {
349+
Ok(ch) if ok && chars.next().is_some_and(|c| ch == c) => {
350+
vec.push((range, ch));
351+
}
352+
_ => {
353+
ok = false;
354+
vec = vec![];
355+
}
331356
}
332357
});
333358
let end = vec.last().map(|(r, _)| r.end).unwrap_or(0);
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#![feature(proc_macro_span)]
2+
3+
extern crate proc_macro;
4+
5+
use proc_macro::{Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree};
6+
use std::iter::FromIterator;
7+
8+
/// Builds a `println!(<fmt_str>)` token stream with the given span on the format string literal.
9+
fn make_println(fmt_str: &str, span: Span) -> TokenStream {
10+
let mut lit: Literal = fmt_str.parse().unwrap();
11+
lit.set_span(span);
12+
FromIterator::<TokenTree>::from_iter([
13+
Ident::new("println", Span::mixed_site()).into(),
14+
Punct::new('!', Spacing::Alone).into(),
15+
Group::new(Delimiter::Parenthesis, TokenTree::from(lit).into()).into(),
16+
])
17+
}
18+
19+
/// Expands to `println!(r"{}")` with the span of the first input token.
20+
#[proc_macro]
21+
pub fn foo(input: TokenStream) -> TokenStream {
22+
let span = input.into_iter().next().unwrap().span();
23+
make_println(r#"r"{}""#, span)
24+
}
25+
26+
/// Same as `foo` but with hashes: expands to `println!(r##"{}"##)`.
27+
#[proc_macro]
28+
pub fn foo2(input: TokenStream) -> TokenStream {
29+
let span = input.into_iter().next().unwrap().span();
30+
make_println(r###"r##"{}"##"###, span)
31+
}
32+
33+
/// Expands to `println!(r"{}")` with a span joining two input tokens,
34+
/// creating a span whose source text may not be a valid raw string.
35+
#[proc_macro]
36+
pub fn foo3(input: TokenStream) -> TokenStream {
37+
let mut iter = input.into_iter();
38+
let span = iter.next().unwrap().span().join(iter.next().unwrap().span()).unwrap();
39+
make_println(r#"r"{}""#, span)
40+
}
41+
42+
/// Same as `foo3` but with hashes: expands to `println!(r##"{}"##)`.
43+
#[proc_macro]
44+
pub fn foo4(input: TokenStream) -> TokenStream {
45+
let mut iter = input.into_iter();
46+
let span = iter.next().unwrap().span().join(iter.next().unwrap().span()).unwrap();
47+
make_println(r###"r##"{}"##"###, span)
48+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// Regression test for ICE https://github.com/rust-lang/rust/issues/114865
2+
3+
// Tests that we do not ICE when a proc macro expands
4+
// to a string formatting macro (like println!) and respans
5+
// this formatting macro's arg to that of its own input which
6+
// happens to be a multi-byte string (see the auxiliary file
7+
// ice-wrong-span-114865.rs).
8+
9+
//@ proc-macro: ice-wrong-span-114865.rs
10+
11+
extern crate ice_wrong_span_114865;
12+
13+
use ice_wrong_span_114865::{foo, foo2, foo3, foo4};
14+
15+
fn main() {
16+
foo!("字"); //~ ERROR 1 positional argument in format string, but no arguments were given
17+
foo!("r字字"); //~ ERROR 1 positional argument in format string, but no arguments were given
18+
19+
foo2!("字"); //~ ERROR 1 positional argument in format string, but no arguments were given
20+
foo2!("r字字"); //~ ERROR 1 positional argument in format string, but no arguments were given
21+
22+
foo3!(r"abc"); //~ ERROR 1 positional argument in format string, but no arguments were given
23+
24+
foo4!(r##"abcd"##); //~ ERROR 1 positional argument in format string, but no arguments were given
25+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
error: 1 positional argument in format string, but no arguments were given
2+
--> $DIR/ice-wrong-span-114865.rs:16:10
3+
|
4+
LL | foo!("字");
5+
| ^^^^
6+
7+
error: 1 positional argument in format string, but no arguments were given
8+
--> $DIR/ice-wrong-span-114865.rs:17:10
9+
|
10+
LL | foo!("r字字");
11+
| ^^^^^^^
12+
13+
error: 1 positional argument in format string, but no arguments were given
14+
--> $DIR/ice-wrong-span-114865.rs:19:11
15+
|
16+
LL | foo2!("字");
17+
| ^^^^
18+
19+
error: 1 positional argument in format string, but no arguments were given
20+
--> $DIR/ice-wrong-span-114865.rs:20:11
21+
|
22+
LL | foo2!("r字字");
23+
| ^^^^^^^
24+
25+
error: 1 positional argument in format string, but no arguments were given
26+
--> $DIR/ice-wrong-span-114865.rs:22:11
27+
|
28+
LL | foo3!(r"abc" 字);
29+
| ^^^^^^^^^
30+
31+
error: 1 positional argument in format string, but no arguments were given
32+
--> $DIR/ice-wrong-span-114865.rs:24:11
33+
|
34+
LL | foo4!(r##"abcd"## 字);
35+
| ^^^^^^^^^^^^^^
36+
37+
error: aborting due to 6 previous errors
38+

0 commit comments

Comments
 (0)