Skip to content

Commit c03ff35

Browse files
committed
[FIRRTL] Allow keywords as identifiers in expressions
This change allows FIRRTL keywords like UInt, SInt, String, Integer, Bool, Double, and List to be used as identifiers in expression contexts when they are not followed by '(' or '<'. The lexer now creates special tokens (lp_UInt, langle_UInt, etc.) when these keywords are immediately followed by '(' or '<', allowing the parser to distinguish between: - 'UInt' as an identifier (e.g., wire UInt : UInt<8>) - 'UInt<8>(42)' as an integer literal expression - 'List<Integer>()' as a list creation expression Both the type parser and expression parser have been updated to handle these new tokens, ensuring that types like 'UInt<8>' and 'List<Integer>' work correctly in both type and expression contexts. Fixes #7838 AI-assisted-by: Augment (Claude Sonnet 4.5) Signed-off-by: Schuyler Eldridge <schuyler.eldridge@sifive.com>
1 parent 0f99432 commit c03ff35

5 files changed

Lines changed: 208 additions & 41 deletions

File tree

lib/Dialect/FIRRTL/Import/FIRLexer.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,19 @@ FIRToken FIRLexer::lexIdentifierOrKeyword(const char *tokStart) {
419419
}
420420
}
421421

422+
// Check to see if this is a keyword followed by '<' character.
423+
if (*curPtr == '<') {
424+
FIRToken::Kind kind = llvm::StringSwitch<FIRToken::Kind>(spelling)
425+
#define TOK_LESSKEYWORD(SPELLING) .Case(#SPELLING, FIRToken::langle_##SPELLING)
426+
#include "FIRTokenKinds.def"
427+
.Default(FIRToken::identifier);
428+
#undef TOK_LESSKEYWORD
429+
if (kind != FIRToken::identifier) {
430+
++curPtr;
431+
return formToken(kind, tokStart);
432+
}
433+
}
434+
422435
// See if the identifier is a keyword. By default, it is an identifier.
423436
FIRToken::Kind kind = llvm::StringSwitch<FIRToken::Kind>(spelling)
424437
#define TOK_KEYWORD(SPELLING) .Case(#SPELLING, FIRToken::kw_##SPELLING)

lib/Dialect/FIRRTL/Import/FIRLexer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class FIRToken {
3535
#define TOK_PUNCTUATION(NAME, SPELLING) NAME,
3636
#define TOK_KEYWORD(SPELLING) kw_##SPELLING,
3737
#define TOK_LPKEYWORD(SPELLING) lp_##SPELLING,
38+
#define TOK_LESSKEYWORD(SPELLING) langle_##SPELLING,
3839
#include "FIRTokenKinds.def"
3940
};
4041

lib/Dialect/FIRRTL/Import/FIRParser.cpp

Lines changed: 150 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,9 @@ struct FIRParser {
275275
template <typename T>
276276
ParseResult parseOptionalWidth(T &result);
277277

278+
// Parse 'intLit' '>' assuming '<' was already consumed.
279+
ParseResult parseWidth(int32_t &result);
280+
278281
// Parse the 'id' grammar, which is an identifier or an allowed keyword.
279282
ParseResult parseId(StringRef &result, const Twine &message);
280283
ParseResult parseId(StringAttr &result, const Twine &message);
@@ -702,6 +705,18 @@ ParseResult FIRParser::parseOptionalWidth(T &result) {
702705
return success();
703706
}
704707

708+
/// Parse a width specifier: intLit '>'
709+
/// This is used when the '<' has already been consumed.
710+
ParseResult FIRParser::parseWidth(int32_t &result) {
711+
auto widthLoc = getToken().getLoc();
712+
if (parseIntLit(result, "expected width") ||
713+
parseToken(FIRToken::greater, "expected '>'"))
714+
return failure();
715+
if (result < 0)
716+
return emitError(widthLoc, "invalid width specifier"), failure();
717+
return success();
718+
}
719+
705720
/// id ::= Id | keywordAsId
706721
///
707722
/// Parse the 'id' grammar, which is an identifier or an allowed keyword. On
@@ -1016,22 +1031,41 @@ ParseResult FIRParser::parseType(FIRRTLType &result, const Twine &message) {
10161031
break;
10171032

10181033
case FIRToken::kw_UInt:
1034+
consumeToken(FIRToken::kw_UInt);
1035+
// Width is not present since langle_UInt would have been lexed instead.
1036+
result = UIntType::get(getContext(), -1);
1037+
break;
1038+
10191039
case FIRToken::kw_SInt:
1020-
case FIRToken::kw_Analog: {
1040+
consumeToken(FIRToken::kw_SInt);
1041+
// Width is not present since langle_SInt would have been lexed instead.
1042+
result = SIntType::get(getContext(), -1);
1043+
break;
1044+
1045+
case FIRToken::kw_Analog:
1046+
consumeToken(FIRToken::kw_Analog);
1047+
// Width is not present since langle_Analog would have been lexed instead.
1048+
result = AnalogType::get(getContext(), -1);
1049+
break;
1050+
1051+
case FIRToken::langle_UInt:
1052+
case FIRToken::langle_SInt:
1053+
case FIRToken::langle_Analog: {
1054+
// The '<' has already been consumed by the lexer, so we need to parse
1055+
// the mandatory width and the trailing '>'.
10211056
auto kind = getToken().getKind();
10221057
consumeToken();
10231058

1024-
// Parse a width specifier if present.
10251059
int32_t width;
1026-
if (parseOptionalWidth(width))
1060+
if (parseWidth(width))
10271061
return failure();
10281062

1029-
if (kind == FIRToken::kw_SInt)
1063+
if (kind == FIRToken::langle_SInt)
10301064
result = SIntType::get(getContext(), width);
1031-
else if (kind == FIRToken::kw_UInt)
1065+
else if (kind == FIRToken::langle_UInt)
10321066
result = UIntType::get(getContext(), width);
10331067
else {
1034-
assert(kind == FIRToken::kw_Analog);
1068+
assert(kind == FIRToken::langle_Analog);
10351069
result = AnalogType::get(getContext(), width);
10361070
}
10371071
break;
@@ -1219,6 +1253,22 @@ ParseResult FIRParser::parseType(FIRRTLType &result, const Twine &message) {
12191253
if (requireFeature({4, 0, 0}, "Lists") || parseListType(result))
12201254
return failure();
12211255
break;
1256+
1257+
case FIRToken::langle_List: {
1258+
// The '<' has already been consumed by the lexer, so we need to parse
1259+
// the element type and the trailing '>'.
1260+
if (requireFeature({4, 0, 0}, "Lists"))
1261+
return failure();
1262+
consumeToken();
1263+
1264+
PropertyType elementType;
1265+
if (parsePropertyType(elementType, "expected List element type") ||
1266+
parseToken(FIRToken::greater, "expected '>' in List type"))
1267+
return failure();
1268+
1269+
result = ListType::get(getContext(), elementType);
1270+
break;
1271+
}
12221272
}
12231273

12241274
// Handle postfix vector sizes.
@@ -1959,7 +2009,9 @@ struct FIRStmtParser : public FIRParser {
19592009
ParseResult parsePostFixFieldId(Value &result);
19602010
ParseResult parsePostFixIntSubscript(Value &result);
19612011
ParseResult parsePostFixDynamicSubscript(Value &result);
1962-
ParseResult parseIntegerLiteralExp(Value &result);
2012+
ParseResult
2013+
parseIntegerLiteralExp(Value &result, std::optional<bool> isSigned = {},
2014+
std::optional<int32_t> allocatedWidth = {});
19632015
ParseResult parseListExp(Value &result);
19642016
ParseResult parseListConcatExp(Value &result);
19652017
ParseResult parseCatExp(Value &result);
@@ -2227,19 +2279,34 @@ ParseResult FIRStmtParser::parseExpImpl(Value &result, const Twine &message,
22272279
return failure();
22282280
break;
22292281

2230-
case FIRToken::kw_UInt:
2231-
case FIRToken::kw_SInt:
2282+
case FIRToken::langle_UInt:
2283+
case FIRToken::langle_SInt: {
2284+
// The '<' has already been consumed by the lexer, so we need to parse
2285+
// the mandatory width and '>'.
2286+
bool isSigned = getToken().is(FIRToken::langle_SInt);
2287+
consumeToken();
2288+
int32_t width;
2289+
if (parseWidth(width))
2290+
return failure();
2291+
2292+
// Now parse the '(' intLit ')' part.
2293+
if (parseIntegerLiteralExp(result, isSigned, width))
2294+
return failure();
2295+
break;
2296+
}
2297+
2298+
case FIRToken::lp_UInt:
2299+
case FIRToken::lp_SInt:
22322300
if (parseIntegerLiteralExp(result))
22332301
return failure();
22342302
break;
2235-
case FIRToken::kw_String: {
2303+
case FIRToken::lp_String: {
22362304
if (requireFeature({3, 1, 0}, "Strings"))
22372305
return failure();
22382306
locationProcessor.setLoc(getToken().getLoc());
2239-
consumeToken(FIRToken::kw_String);
2307+
consumeToken(FIRToken::lp_String);
22402308
StringRef spelling;
2241-
if (parseToken(FIRToken::l_paren, "expected '(' in String expression") ||
2242-
parseGetSpelling(spelling) ||
2309+
if (parseGetSpelling(spelling) ||
22432310
parseToken(FIRToken::string,
22442311
"expected string literal in String expression") ||
22452312
parseToken(FIRToken::r_paren, "expected ')' in String expression"))
@@ -2249,14 +2316,13 @@ ParseResult FIRStmtParser::parseExpImpl(Value &result, const Twine &message,
22492316
builder, attr, builder.getType<StringType>(), attr);
22502317
break;
22512318
}
2252-
case FIRToken::kw_Integer: {
2319+
case FIRToken::lp_Integer: {
22532320
if (requireFeature({3, 1, 0}, "Integers"))
22542321
return failure();
22552322
locationProcessor.setLoc(getToken().getLoc());
2256-
consumeToken(FIRToken::kw_Integer);
2323+
consumeToken(FIRToken::lp_Integer);
22572324
APInt value;
2258-
if (parseToken(FIRToken::l_paren, "expected '(' in Integer expression") ||
2259-
parseIntLit(value, "expected integer literal in Integer expression") ||
2325+
if (parseIntLit(value, "expected integer literal in Integer expression") ||
22602326
parseToken(FIRToken::r_paren, "expected ')' in Integer expression"))
22612327
return failure();
22622328
APSInt apint(value, /*isUnsigned=*/false);
@@ -2265,13 +2331,11 @@ ParseResult FIRStmtParser::parseExpImpl(Value &result, const Twine &message,
22652331
builder.getType<FIntegerType>(), apint);
22662332
break;
22672333
}
2268-
case FIRToken::kw_Bool: {
2334+
case FIRToken::lp_Bool: {
22692335
if (requireFeature(missingSpecFIRVersion, "Bools"))
22702336
return failure();
22712337
locationProcessor.setLoc(getToken().getLoc());
2272-
consumeToken(FIRToken::kw_Bool);
2273-
if (parseToken(FIRToken::l_paren, "expected '(' in Bool expression"))
2274-
return failure();
2338+
consumeToken(FIRToken::lp_Bool);
22752339
bool value;
22762340
if (consumeIf(FIRToken::kw_true))
22772341
value = true;
@@ -2286,13 +2350,11 @@ ParseResult FIRStmtParser::parseExpImpl(Value &result, const Twine &message,
22862350
builder, attr, builder.getType<BoolType>(), value);
22872351
break;
22882352
}
2289-
case FIRToken::kw_Double: {
2353+
case FIRToken::lp_Double: {
22902354
if (requireFeature(missingSpecFIRVersion, "Doubles"))
22912355
return failure();
22922356
locationProcessor.setLoc(getToken().getLoc());
2293-
consumeToken(FIRToken::kw_Double);
2294-
if (parseToken(FIRToken::l_paren, "expected '(' in Double expression"))
2295-
return failure();
2357+
consumeToken(FIRToken::lp_Double);
22962358
auto spelling = getTokenSpelling();
22972359
if (parseToken(FIRToken::floatingpoint,
22982360
"expected floating point in Double expression") ||
@@ -2308,7 +2370,8 @@ ParseResult FIRStmtParser::parseExpImpl(Value &result, const Twine &message,
23082370
builder, attr, builder.getType<DoubleType>(), attr);
23092371
break;
23102372
}
2311-
case FIRToken::kw_List: {
2373+
case FIRToken::lp_List:
2374+
case FIRToken::langle_List: {
23122375
if (requireFeature({4, 0, 0}, "Lists"))
23132376
return failure();
23142377
if (isLeadingStmt)
@@ -2359,6 +2422,13 @@ ParseResult FIRStmtParser::parseExpImpl(Value &result, const Twine &message,
23592422
// try them.
23602423
case FIRToken::identifier: // exp ::= id
23612424
case FIRToken::literal_identifier:
2425+
case FIRToken::kw_UInt:
2426+
case FIRToken::kw_SInt:
2427+
case FIRToken::kw_String:
2428+
case FIRToken::kw_Integer:
2429+
case FIRToken::kw_Bool:
2430+
case FIRToken::kw_Double:
2431+
case FIRToken::kw_List:
23622432
default: {
23632433
StringRef name;
23642434
auto loc = getToken().getLoc();
@@ -2596,17 +2666,48 @@ ParseResult FIRStmtParser::parsePostFixDynamicSubscript(Value &result) {
25962666

25972667
/// integer-literal-exp ::= 'UInt' optional-width '(' intLit ')'
25982668
/// ::= 'SInt' optional-width '(' intLit ')'
2599-
ParseResult FIRStmtParser::parseIntegerLiteralExp(Value &result) {
2600-
bool isSigned = getToken().is(FIRToken::kw_SInt);
2669+
///
2670+
/// If isSigned and allocatedWidth are provided, it means the type and width
2671+
/// were already parsed (e.g., from a langle_UInt token) and should be used
2672+
/// instead of parsing them from the token stream.
2673+
ParseResult
2674+
FIRStmtParser::parseIntegerLiteralExp(Value &result,
2675+
std::optional<bool> isSignedOpt,
2676+
std::optional<int32_t> allocatedWidth) {
26012677
auto loc = getToken().getLoc();
2602-
consumeToken();
26032678

2604-
// Parse a width specifier if present.
2679+
// Determine signedness and whether '(' was already consumed.
2680+
bool isSigned;
2681+
bool hasLParen;
2682+
if (isSignedOpt) {
2683+
// Signedness was provided by caller (from langle_ token).
2684+
isSigned = *isSignedOpt;
2685+
hasLParen = false;
2686+
} else {
2687+
// Determine from current token (lp_ token).
2688+
isSigned = getToken().is(FIRToken::lp_SInt);
2689+
hasLParen = getToken().isAny(FIRToken::lp_UInt, FIRToken::lp_SInt);
2690+
consumeToken();
2691+
}
2692+
2693+
// Parse a width specifier if not already provided.
26052694
int32_t width;
26062695
APInt value;
2607-
if (parseOptionalWidth(width) ||
2608-
parseToken(FIRToken::l_paren, "expected '(' in integer expression") ||
2609-
parseIntLit(value, "expected integer value") ||
2696+
2697+
if (allocatedWidth) {
2698+
width = *allocatedWidth;
2699+
} else {
2700+
if (parseOptionalWidth(width))
2701+
return failure();
2702+
}
2703+
2704+
// If we consumed an lp_ token, the '(' was already consumed by the lexer.
2705+
// Otherwise, we need to parse it.
2706+
if (!hasLParen &&
2707+
parseToken(FIRToken::l_paren, "expected '(' in integer expression"))
2708+
return failure();
2709+
2710+
if (parseIntLit(value, "expected integer value") ||
26102711
parseToken(FIRToken::r_paren, "expected ')' in integer expression"))
26112712
return failure();
26122713

@@ -2640,13 +2741,24 @@ ParseResult FIRStmtParser::parseIntegerLiteralExp(Value &result) {
26402741
/// list-exp ::= list-type '(' exp* ')'
26412742
ParseResult FIRStmtParser::parseListExp(Value &result) {
26422743
auto loc = getToken().getLoc();
2643-
FIRRTLType type;
2644-
if (parseListType(type))
2744+
bool hasLAngle = getToken().is(FIRToken::langle_List);
2745+
bool hasLParen = getToken().is(FIRToken::lp_List);
2746+
consumeToken();
2747+
2748+
PropertyType elementType;
2749+
// If we consumed a langle_ token, the '<' was already consumed by the lexer.
2750+
if (!hasLAngle && parseToken(FIRToken::less, "expected '<' in List type"))
26452751
return failure();
2646-
auto listType = type_cast<ListType>(type);
2647-
auto elementType = listType.getElementType();
26482752

2649-
if (parseToken(FIRToken::l_paren, "expected '(' in List expression"))
2753+
if (parsePropertyType(elementType, "expected List element type") ||
2754+
parseToken(FIRToken::greater, "expected '>' in List type"))
2755+
return failure();
2756+
2757+
auto listType = ListType::get(getContext(), elementType);
2758+
2759+
// If we consumed an lp_ token, the '(' was already consumed by the lexer.
2760+
if (!hasLParen &&
2761+
parseToken(FIRToken::l_paren, "expected '(' in List expression"))
26502762
return failure();
26512763

26522764
SmallVector<Value, 3> operands;

lib/Dialect/FIRRTL/Import/FIRTokenKinds.def

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#if !defined(TOK_MARKER) && !defined(TOK_IDENTIFIER) && \
1515
!defined(TOK_LITERAL) && !defined(TOK_PUNCTUATION) && \
1616
!defined(TOK_KEYWORD) && !defined(TOK_LPKEYWORD) && \
17-
!defined(TOK_LPKEYWORD_PRIM)
17+
!defined(TOK_LESSKEYWORD) && !defined(TOK_LPKEYWORD_PRIM)
1818
#error Must define one of the TOK_ macros.
1919
#endif
2020

@@ -41,6 +41,9 @@
4141
VERSION, FEATURE) \
4242
TOK_LPKEYWORD(SPELLING)
4343
#endif
44+
#ifndef TOK_LESSKEYWORD
45+
#define TOK_LESSKEYWORD(SPELLING)
46+
#endif
4447

4548
// Markers
4649
TOK_MARKER(eof)
@@ -199,6 +202,21 @@ TOK_LPKEYWORD(intrinsic)
199202
TOK_LPKEYWORD(cat)
200203
TOK_LPKEYWORD(unsafe_domain_cast)
201204

205+
TOK_LPKEYWORD(UInt)
206+
TOK_LPKEYWORD(SInt)
207+
TOK_LPKEYWORD(String)
208+
TOK_LPKEYWORD(Integer)
209+
TOK_LPKEYWORD(Bool)
210+
TOK_LPKEYWORD(Double)
211+
TOK_LPKEYWORD(List)
212+
213+
// Keywords when followed by a '<'. These turn "foo" into
214+
// FIRToken::langle_foo enums.
215+
TOK_LESSKEYWORD(UInt)
216+
TOK_LESSKEYWORD(SInt)
217+
TOK_LESSKEYWORD(Analog)
218+
TOK_LESSKEYWORD(List)
219+
202220
// These are for LPKEYWORD cases that correspond to a primitive operation.
203221
TOK_LPKEYWORD_PRIM(add, AddPrimOp, 2, 0, FIRVersion(0, 0, 0), "Base")
204222
TOK_LPKEYWORD_PRIM(and, AndPrimOp, 2, 0, FIRVersion(0, 0, 0), "Base")
@@ -253,4 +271,5 @@ TOK_LPKEYWORD_PRIM(integer_shl, IntegerShlOp, 2, 0, FIRVersion(3, 1, 0),
253271
#undef TOK_PUNCTUATION
254272
#undef TOK_KEYWORD
255273
#undef TOK_LPKEYWORD
274+
#undef TOK_LESSKEYWORD
256275
#undef TOK_LPKEYWORD_PRIM

0 commit comments

Comments
 (0)