Skip to content

Commit 2780fe2

Browse files
engalarclaude
andcommitted
fix: unify identifier rules to use full keyword list, add regression tests
Replace commonNameKeyword with keyword in all identifier rules (attributeName, enumValueName, indexColumnName, imageName, parameterName, selectAlias) so any lexer token can be used as an identifier. Remove the now-redundant commonNameKeyword rule. Add keyword-as-identifier.mdl regression test covering enum values, entity names, and attribute names that match lexer keywords. Add keyword_coverage_test.go to verify lexer/keyword sync. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 276c906 commit 2780fe2

File tree

7 files changed

+8821
-9278
lines changed

7 files changed

+8821
-9278
lines changed
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
-- ============================================================================
2+
-- Keyword-as-Identifier Regression Tests
3+
-- ============================================================================
4+
--
5+
-- Verifies that lexer keyword tokens can be used as identifiers (entity names,
6+
-- attribute names, enum values) via the parser's `keyword` rule.
7+
--
8+
-- Regression test for PR #186 (168 missing tokens) and PR #174 (OPEN fix).
9+
-- See: https://github.com/mendixlabs/mxcli/pull/186
10+
--
11+
-- Usage:
12+
-- mxcli check mdl-examples/doctype-tests/keyword-as-identifier.mdl
13+
--
14+
-- ============================================================================
15+
16+
CREATE MODULE KeywordTest;
17+
18+
-- MARK: Enum values that are lexer keywords
19+
20+
CREATE ENUMERATION KeywordTest.IssueStatus (
21+
Open,
22+
Closed,
23+
Data,
24+
Filter,
25+
Match,
26+
Empty,
27+
Container,
28+
Node
29+
);
30+
31+
CREATE ENUMERATION KeywordTest.HttpMethod (
32+
Get,
33+
Post,
34+
Put,
35+
Delete,
36+
Patch
37+
);
38+
39+
CREATE ENUMERATION KeywordTest.ActivityKind (
40+
Activity,
41+
Condition,
42+
Loop,
43+
Start,
44+
End,
45+
Split,
46+
Join,
47+
Merge
48+
);
49+
50+
CREATE ENUMERATION KeywordTest.LayoutKind (
51+
Layout,
52+
Header,
53+
Footer,
54+
Body,
55+
Content,
56+
Title,
57+
Text,
58+
Image
59+
);
60+
61+
CREATE ENUMERATION KeywordTest.MiscKeywords (
62+
Action,
63+
Source,
64+
Target,
65+
Owner,
66+
Type,
67+
Name,
68+
Value,
69+
Result,
70+
Object,
71+
Index,
72+
Input,
73+
Output,
74+
Sort,
75+
Order,
76+
Commit,
77+
Close,
78+
Refresh,
79+
Rollback
80+
);
81+
82+
-- MARK: Entity names and attributes that are keywords
83+
84+
CREATE ENTITY KeywordTest.Data (
85+
Filter : String(200),
86+
Match : String(200),
87+
Container : String(200),
88+
Source : String(200),
89+
Target : String(200),
90+
Action : String(200),
91+
Value : String(200),
92+
Status : String(200)
93+
);
94+
95+
CREATE ENTITY KeywordTest.Activity (
96+
Name : String(200),
97+
Type : String(200),
98+
Result : String(200)
99+
);
100+
101+
-- MARK: Microflow using keyword-named enum values in CREATE
102+
103+
CREATE OR REPLACE MICROFLOW KeywordTest.TestKeywordEnumValues ()
104+
RETURNS Nothing
105+
BEGIN
106+
$Issue = CREATE KeywordTest.Data (
107+
Filter = 'test',
108+
Match = 'test',
109+
Status = 'active'
110+
);
111+
112+
$Activity = CREATE KeywordTest.Activity (
113+
Name = 'test',
114+
Type = 'task',
115+
Result = 'ok'
116+
);
117+
END;

mdl/grammar/MDLParser.g4

Lines changed: 11 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -611,8 +611,7 @@ attributeDefinition
611611
attributeName
612612
: IDENTIFIER
613613
| QUOTED_IDENTIFIER // Escape any reserved word ("Range", `Order`)
614-
| commonNameKeyword
615-
| ATTRIBUTE // Allow 'Attribute' as attribute name
614+
| keyword
616615
;
617616

618617
attributeConstraint
@@ -724,7 +723,7 @@ indexAttribute
724723
indexColumnName
725724
: IDENTIFIER
726725
| QUOTED_IDENTIFIER // Escape any reserved word
727-
| commonNameKeyword
726+
| keyword
728727
;
729728

730729
createAssociationStatement
@@ -835,15 +834,12 @@ enumerationValue
835834
: docComment? enumValueName (CAPTION? STRING_LITERAL)?
836835
;
837836

838-
// Allow reserved keywords as enumeration value names
837+
// Allow reserved keywords as enumeration value names.
838+
// Uses the full `keyword` rule so any lexer token can appear as an enum value name.
839839
enumValueName
840840
: IDENTIFIER
841841
| QUOTED_IDENTIFIER // Escape any reserved word
842-
| commonNameKeyword
843-
| SERVICE | SERVICES // OData/auth keywords used as enum values
844-
| GUEST | SESSION | BASIC | CLIENT | CLIENTS
845-
| PUBLISH | EXPOSE | EXTERNAL | PAGING | HEADERS
846-
| DISPLAY | STRUCTURE // Layout/structure keywords used as enum values
842+
| keyword
847843
;
848844

849845
enumerationOptions
@@ -882,7 +878,7 @@ imageCollectionItem
882878
imageName
883879
: IDENTIFIER
884880
| QUOTED_IDENTIFIER
885-
| commonNameKeyword
881+
| keyword
886882
;
887883

888884
// =============================================================================
@@ -1139,7 +1135,7 @@ microflowParameter
11391135
parameterName
11401136
: IDENTIFIER
11411137
| QUOTED_IDENTIFIER // Escape any reserved word
1142-
| commonNameKeyword
1138+
| keyword
11431139
;
11441140

11451141
microflowReturnType
@@ -1778,7 +1774,7 @@ memberAttributeName
17781774
: qualifiedName
17791775
| IDENTIFIER
17801776
| QUOTED_IDENTIFIER // Escape any reserved word
1781-
| commonNameKeyword
1777+
| keyword
17821778
;
17831779

17841780
// Legacy changeList for backwards compatibility
@@ -3126,7 +3122,7 @@ selectItem
31263122
// Allow keywords as aliases in SELECT
31273123
selectAlias
31283124
: IDENTIFIER
3129-
| commonNameKeyword
3125+
| keyword
31303126
;
31313127

31323128
fromClause
@@ -3559,33 +3555,8 @@ annotationValue
35593555
| qualifiedName
35603556
;
35613557

3562-
/**
3563-
* Keywords commonly used as attribute, parameter, enum value, and column names.
3564-
* Excludes DDL keywords (CREATE, ALTER, DROP, ENTITY, etc.) and flow control
3565-
* keywords (BEGIN, END, IF, RETURN, etc.) that would cause parser ambiguity
3566-
* when used in entity/microflow body contexts.
3567-
*/
3568-
commonNameKeyword
3569-
: STATUS | TYPE | VALUE | INDEX // Common data keywords
3570-
| USERNAME | PASSWORD // User-related keywords
3571-
| COUNT | SUM | AVG | MIN | MAX // Aggregate function names
3572-
| ACTION | MESSAGE // Common entity attribute names
3573-
| OWNER | REFERENCE | CASCADE // Association keywords
3574-
| SUCCESS | ERROR | WARNING | INFO | DEBUG | CRITICAL // Log/status keywords
3575-
| DESCRIPTION | ROLE | LEVEL | ACCESS | USER // Security keywords
3576-
| CAPTION | CONTENT | LABEL | TITLE | TEXT // Display/UI keywords
3577-
| FORMAT | RANGE | SOURCE_KW | CHECK // Validation/data keywords
3578-
| FOLDER | NAVIGATION | HOME | VERSION | PRODUCTION // Structure/config keywords
3579-
| SELECTION | EDITABLE | VISIBLE | DATASOURCE // Widget property keywords
3580-
| TABLETWIDTH | PHONEWIDTH // Responsive width keywords
3581-
| WIDTH | HEIGHT | STYLE | CLASS // Styling keywords
3582-
| BOTH | SINGLE | MULTIPLE | NONE // Cardinality keywords
3583-
| PROTOTYPE | OFF // Security level keywords
3584-
| STORAGE | TABLE // Association storage keywords
3585-
| URL | POSITION | SORT // Common attribute names
3586-
;
3587-
3588-
/** Keywords that can be used as identifiers in certain contexts (module/entity names via qualifiedName).
3558+
/** Keywords that can be used as identifiers in certain contexts (module/entity names via qualifiedName,
3559+
* attribute names, enum values, parameter names, etc.).
35893560
* Every word-type lexer token must appear here so that user-defined names (entity, attribute,
35903561
* enum value, module) that happen to match a keyword can still be parsed.
35913562
* Maintain alphabetical order within each group for easy auditing.
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
package grammar
2+
3+
import (
4+
"bufio"
5+
"os"
6+
"regexp"
7+
"strings"
8+
"testing"
9+
)
10+
11+
// TestKeywordRuleCoverage verifies that every lexer token (except structural
12+
// ones like operators, punctuation, literals, and identifiers) is listed in
13+
// the parser's `keyword` rule. This catches the common mistake of adding a
14+
// new token to MDLLexer.g4 but forgetting to add it to the keyword rule,
15+
// which would prevent it from being used as an identifier.
16+
func TestKeywordRuleCoverage(t *testing.T) {
17+
allTokens := parseLexerTokens(t, "parser/MDLLexer.tokens")
18+
keywordTokens := parseKeywordRule(t, "MDLParser.g4")
19+
20+
// Structural tokens that should NOT be in the keyword rule.
21+
excluded := map[string]bool{
22+
// Whitespace & comments
23+
"WS": true, "DOC_COMMENT": true, "BLOCK_COMMENT": true, "LINE_COMMENT": true,
24+
// Identifiers & variables
25+
"IDENTIFIER": true, "HYPHENATED_ID": true, "QUOTED_IDENTIFIER": true, "VARIABLE": true,
26+
// Literals
27+
"STRING_LITERAL": true, "DOLLAR_STRING": true, "NUMBER_LITERAL": true, "MENDIX_TOKEN": true,
28+
// Punctuation
29+
"SEMICOLON": true, "COMMA": true, "DOT": true,
30+
"LPAREN": true, "RPAREN": true,
31+
"LBRACE": true, "RBRACE": true,
32+
"LBRACKET": true, "RBRACKET": true,
33+
"COLON": true, "AT": true, "PIPE": true,
34+
"DOUBLE_COLON": true, "ARROW": true, "QUESTION": true, "HASH": true,
35+
// Operators
36+
"NOT_EQUALS": true, "LESS_THAN_OR_EQUAL": true, "GREATER_THAN_OR_EQUAL": true,
37+
"EQUALS": true, "LESS_THAN": true, "GREATER_THAN": true,
38+
"PLUS": true, "MINUS": true, "STAR": true, "SLASH": true, "PERCENT": true,
39+
// Version marker (not an identifier)
40+
"V3": true,
41+
}
42+
43+
// Tokens missing from keyword rule (in lexer but not in keyword).
44+
var missing []string
45+
for _, tok := range allTokens {
46+
if excluded[tok] {
47+
continue
48+
}
49+
if !keywordTokens[tok] {
50+
missing = append(missing, tok)
51+
}
52+
}
53+
54+
// Tokens in keyword rule but not in lexer (typos or stale entries).
55+
var extra []string
56+
allSet := make(map[string]bool, len(allTokens))
57+
for _, tok := range allTokens {
58+
allSet[tok] = true
59+
}
60+
for tok := range keywordTokens {
61+
if !allSet[tok] {
62+
extra = append(extra, tok)
63+
}
64+
}
65+
66+
if len(missing) > 0 {
67+
t.Errorf("tokens in lexer but missing from keyword rule (%d):\n %s\n"+
68+
"Add them to the keyword rule in MDLParser.g4 or to the excluded set in this test.",
69+
len(missing), strings.Join(missing, ", "))
70+
}
71+
if len(extra) > 0 {
72+
t.Errorf("tokens in keyword rule but not in lexer (%d):\n %s",
73+
len(extra), strings.Join(extra, ", "))
74+
}
75+
}
76+
77+
// parseLexerTokens reads MDLLexer.tokens and returns all symbolic token names
78+
// (skipping literal aliases like '<='=515).
79+
func parseLexerTokens(t *testing.T, path string) []string {
80+
t.Helper()
81+
f, err := os.Open(path)
82+
if err != nil {
83+
t.Fatalf("open %s: %v", path, err)
84+
}
85+
defer f.Close()
86+
87+
var tokens []string
88+
scanner := bufio.NewScanner(f)
89+
for scanner.Scan() {
90+
line := strings.TrimSpace(scanner.Text())
91+
if line == "" || strings.HasPrefix(line, "'") {
92+
continue
93+
}
94+
if idx := strings.IndexByte(line, '='); idx > 0 {
95+
tokens = append(tokens, line[:idx])
96+
}
97+
}
98+
if err := scanner.Err(); err != nil {
99+
t.Fatalf("scan %s: %v", path, err)
100+
}
101+
return tokens
102+
}
103+
104+
// parseKeywordRule reads MDLParser.g4, extracts the `keyword` rule body, and
105+
// returns the set of token names referenced in it.
106+
func parseKeywordRule(t *testing.T, path string) map[string]bool {
107+
t.Helper()
108+
data, err := os.ReadFile(path)
109+
if err != nil {
110+
t.Fatalf("read %s: %v", path, err)
111+
}
112+
text := string(data)
113+
114+
// Find the keyword rule: starts with "keyword" at the beginning of a line,
115+
// ends with ";".
116+
re := regexp.MustCompile(`(?m)^keyword\b([\s\S]*?);`)
117+
m := re.FindStringSubmatch(text)
118+
if m == nil {
119+
t.Fatal("keyword rule not found in MDLParser.g4")
120+
}
121+
122+
// Strip single-line comments (// ...) to avoid matching words in comments.
123+
body := regexp.MustCompile(`//[^\n]*`).ReplaceAllString(m[1], "")
124+
125+
// Extract all UPPERCASE token references (e.g., ADD, ALTER, STRING_TYPE).
126+
tokenRe := regexp.MustCompile(`\b([A-Z][A-Z0-9_]*)\b`)
127+
matches := tokenRe.FindAllStringSubmatch(body, -1)
128+
129+
result := make(map[string]bool, len(matches))
130+
for _, match := range matches {
131+
result[match[1]] = true
132+
}
133+
return result
134+
}

mdl/grammar/parser/MDLParser.interp

Lines changed: 1 addition & 2 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)