Skip to content

Commit b89cf0b

Browse files
nsvkeP-E-P
authored andcommitted
Defer literal suffix validation to parser and preserve source fidelity
Number literal evaluation and suffix validation should be done after macro expansion, so we defer these to the parser phase. This preserves source fidelity for macro token trees. gcc/rust/ChangeLog: * ast/rust-ast-collector.cc (TokenCollector::visit): Update Token::make_int and Token::make_float calls to include suffix_start and IntegerLiteralBase::Decimal. * expand/rust-macro-builtins-location.cc (MacroBuiltin::column_handler): Pass string length and base to Token::make_int. (MacroBuiltin::line_handler): Likewise. * lex/rust-lex.cc (Lexer::parse_in_type_suffix): Rename to parse_in_suffix and return string instead of PrimitiveCoreType. (Lexer::parse_in_suffix): Remove underscore stripping to preserve source fidelity for macros. (Lexer::parse_in_exponent_part): Preserve '+' and '-' characters in the raw string. (Lexer::parse_in_decimal): Remove underscore stripping. (Lexer::parse_non_decimal_int_literal): Track suffix start index and pass literal base. (Lexer::parse_non_decimal_int_literals): Use IntegerLiteralBase enum values instead of raw integers. (Lexer::parse_decimal_int_or_float): Track suffix string length and pass base parameters to token creation. * lex/rust-lex.h: Update method signatures for suffix parsing. * lex/rust-token.h (enum class IntegerLiteralBase): New enum to represent numeric bases. * parse/rust-parse-impl-expr.hxx: use LiteralResolve functions to evaluate raw token strings. * parse/rust-parse-impl-pattern.hxx: Use evaluated literal strings for INT and FLOAT tokens. * parse/rust-parse.cc (resolve_literal_suffix): Move suffix validation logic from lexer to parser. (evaluate_integer_literal): New function to strip underscores and convert to decimal via GMP. (evaluate_float_literal): New function to strip underscores from floats. * parse/rust-parse.h (evaluate_integer_literal): Declare in LiteralResolve namespace. (evaluate_float_literal): Likewise. (resolve_literal_suffix): Likewise. * util/rust-token-converter.cc (from_literal): Safely reconstruct raw text and suffix to dynamically determine base and suffix_start for ProcMacros. gcc/testsuite/ChangeLog: * rust/compile/deferred-suffix-validation.rs: New test. * rust/compile/evaluate-integer-or-float.rs: New test. * rust/compile/tuple-index.rs: New test. Signed-off-by: Enes Cevik <nsvke@proton.me>
1 parent f05c89b commit b89cf0b

13 files changed

Lines changed: 495 additions & 261 deletions

gcc/rust/ast/rust-ast-collector.cc

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -397,13 +397,20 @@ TokenCollector::visit (Token &tok)
397397
push (Rust::Token::make_identifier (tok.get_locus (), std::move (data)));
398398
break;
399399
case INT_LITERAL:
400-
push (Rust::Token::make_int (tok.get_locus (), std::move (data),
401-
tok.get_type_hint ()));
402-
break;
403-
case FLOAT_LITERAL:
404-
push (Rust::Token::make_float (tok.get_locus (), std::move (data),
400+
{
401+
auto suffix_start = data.length ();
402+
push (Rust::Token::make_int (tok.get_locus (), std::move (data),
403+
suffix_start, IntegerLiteralBase::Decimal,
405404
tok.get_type_hint ()));
406-
break;
405+
break;
406+
}
407+
case FLOAT_LITERAL:
408+
{
409+
auto suffix_start = data.length ();
410+
push (Rust::Token::make_float (tok.get_locus (), std::move (data),
411+
suffix_start, tok.get_type_hint ()));
412+
break;
413+
}
407414
case STRING_LITERAL:
408415
push (Rust::Token::make_string (tok.get_locus (), std::move (data)));
409416
break;
@@ -857,13 +864,20 @@ TokenCollector::visit (Literal &lit, location_t locus)
857864
push (Rust::Token::make_raw_string (locus, std::move (value)));
858865
break;
859866
case Literal::LitType::INT:
860-
push (
861-
Rust::Token::make_int (locus, std::move (value), lit.get_type_hint ()));
862-
break;
863-
case Literal::LitType::FLOAT:
864-
push (Rust::Token::make_float (locus, std::move (value),
867+
{
868+
auto val_len = value.length ();
869+
push (Rust::Token::make_int (locus, std::move (value), val_len,
870+
IntegerLiteralBase::Decimal,
865871
lit.get_type_hint ()));
866-
break;
872+
break;
873+
}
874+
case Literal::LitType::FLOAT:
875+
{
876+
auto val_len = value.length ();
877+
push (Rust::Token::make_float (locus, std::move (value), val_len,
878+
lit.get_type_hint ()));
879+
break;
880+
}
867881
case Literal::LitType::BOOL:
868882
{
869883
if (value == Values::Keywords::FALSE_LITERAL)
@@ -1237,8 +1251,10 @@ TokenCollector::visit (TupleIndexExpr &expr)
12371251
describe_node (std::string ("TupleIndexExpr"), [this, &expr] () {
12381252
visit (expr.get_tuple_expr ());
12391253
push (Rust::Token::make (DOT, expr.get_locus ()));
1240-
push (Rust::Token::make_int (UNDEF_LOCATION,
1241-
std::to_string (expr.get_tuple_index ())));
1254+
auto str = std::to_string (expr.get_tuple_index ());
1255+
auto suffix_start = str.length ();
1256+
push (Rust::Token::make_int (UNDEF_LOCATION, str, suffix_start,
1257+
IntegerLiteralBase::Decimal));
12421258
});
12431259
}
12441260

@@ -1277,8 +1293,10 @@ TokenCollector::visit (StructExprFieldIndexValue &expr)
12771293
{
12781294
describe_node (std::string ("StructExprFieldIndexValue"), [this, &expr] () {
12791295
visit_items_as_lines (expr.get_outer_attrs ());
1280-
push (Rust::Token::make_int (expr.get_locus (),
1281-
std::to_string (expr.get_index ())));
1296+
auto str = std::to_string (expr.get_index ());
1297+
auto suffix_start = str.length ();
1298+
push (Rust::Token::make_int (expr.get_locus (), str, suffix_start,
1299+
IntegerLiteralBase::Decimal));
12821300
push (Rust::Token::make (COLON, UNDEF_LOCATION));
12831301
visit (expr.get_value ());
12841302
});
@@ -2885,8 +2903,10 @@ TokenCollector::visit (StructPatternFieldTuplePat &pattern)
28852903
describe_node (std::string ("StructPatternFieldTuplePat"), [this,
28862904
&pattern] () {
28872905
visit_items_as_lines (pattern.get_outer_attrs ());
2888-
push (Rust::Token::make_int (pattern.get_locus (),
2889-
std::to_string (pattern.get_index ())));
2906+
auto str = std::to_string (pattern.get_index ());
2907+
auto suffix_start = str.length ();
2908+
push (Rust::Token::make_int (pattern.get_locus (), str, suffix_start,
2909+
IntegerLiteralBase::Decimal));
28902910
push (Rust::Token::make (COLON, pattern.get_locus ()));
28912911
visit (pattern.get_index_pattern ());
28922912
});

gcc/rust/expand/rust-macro-builtins-location.cc

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,10 @@ MacroBuiltin::column_handler (location_t invoc_locus, AST::MacroInvocData &,
3939
{
4040
auto current_column = LOCATION_COLUMN (invoc_locus);
4141

42+
auto str = std::to_string (current_column);
43+
auto str_len = str.length ();
4244
auto column_tok = make_token (
43-
Token::make_int (invoc_locus, std::to_string (current_column)));
45+
Token::make_int (invoc_locus, str, str_len, IntegerLiteralBase::Decimal));
4446
auto column_no = AST::SingleASTNode (std::unique_ptr<AST::Expr> (
4547
new AST::LiteralExpr (std::to_string (current_column), AST::Literal::INT,
4648
PrimitiveCoreType::CORETYPE_U32, {}, invoc_locus)));
@@ -57,8 +59,10 @@ MacroBuiltin::line_handler (location_t invoc_locus, AST::MacroInvocData &,
5759
auto line_no = AST::SingleASTNode (std::unique_ptr<AST::Expr> (
5860
new AST::LiteralExpr (std::to_string (current_line), AST::Literal::INT,
5961
PrimitiveCoreType::CORETYPE_U32, {}, invoc_locus)));
60-
auto tok
61-
= make_token (Token::make_int (invoc_locus, std::to_string (current_line)));
62+
auto str = std::to_string (current_line);
63+
auto str_len = str.length ();
64+
auto tok = make_token (
65+
Token::make_int (invoc_locus, str, str_len, IntegerLiteralBase::Decimal));
6266

6367
return AST::Fragment ({line_no}, std::move (tok));
6468
}

0 commit comments

Comments
 (0)