Skip to content

Commit f4d9459

Browse files
engalarclaude
andcommitted
fix: unify identifier rules to use full keyword list, add regression tests
Replace commonNameKeyword with keyword in all identifier rules (attributeName, enumValueName, indexColumnName, imageName, parameterName, selectAlias) so any lexer token can be used as an identifier. Remove the now-redundant commonNameKeyword rule. Add keyword-as-identifier.mdl regression test covering enum values, entity names, and attribute names that match lexer keywords. Add keyword_coverage_test.go to verify lexer/keyword sync. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent f3afec2 commit f4d9459

File tree

7 files changed

+8691
-9148
lines changed

7 files changed

+8691
-9148
lines changed
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
-- ============================================================================
2+
-- Keyword-as-Identifier Regression Tests
3+
-- ============================================================================
4+
--
5+
-- Verifies that lexer keyword tokens can be used as identifiers (entity names,
6+
-- attribute names, enum values) via the parser's `keyword` rule.
7+
--
8+
-- Regression test for PR #186 (168 missing tokens) and PR #174 (OPEN fix).
9+
-- See: https://github.com/mendixlabs/mxcli/pull/186
10+
--
11+
-- Usage:
12+
-- mxcli check mdl-examples/doctype-tests/keyword-as-identifier.mdl
13+
--
14+
-- ============================================================================
15+
16+
CREATE MODULE KeywordTest;
17+
18+
-- MARK: Enum values that are lexer keywords
19+
20+
CREATE ENUMERATION KeywordTest.IssueStatus (
21+
Open,
22+
Closed,
23+
Data,
24+
Filter,
25+
Match,
26+
Empty,
27+
Container,
28+
Node
29+
);
30+
31+
CREATE ENUMERATION KeywordTest.HttpMethod (
32+
Get,
33+
Post,
34+
Put,
35+
Delete,
36+
Patch
37+
);
38+
39+
CREATE ENUMERATION KeywordTest.ActivityKind (
40+
Activity,
41+
Condition,
42+
Loop,
43+
Start,
44+
End,
45+
Split,
46+
Join,
47+
Merge
48+
);
49+
50+
CREATE ENUMERATION KeywordTest.LayoutKind (
51+
Layout,
52+
Header,
53+
Footer,
54+
Body,
55+
Content,
56+
Title,
57+
Text,
58+
Image
59+
);
60+
61+
CREATE ENUMERATION KeywordTest.MiscKeywords (
62+
Action,
63+
Source,
64+
Target,
65+
Owner,
66+
Type,
67+
Name,
68+
Value,
69+
Result,
70+
Object,
71+
Index,
72+
Input,
73+
Output,
74+
Sort,
75+
Order,
76+
Commit,
77+
Close,
78+
Refresh,
79+
Rollback
80+
);
81+
82+
-- MARK: Entity names and attributes that are keywords
83+
84+
CREATE ENTITY KeywordTest.Data (
85+
Filter : String(200),
86+
Match : String(200),
87+
Container : String(200),
88+
Source : String(200),
89+
Target : String(200),
90+
Action : String(200),
91+
Value : String(200),
92+
Status : String(200)
93+
);
94+
95+
CREATE ENTITY KeywordTest.Activity (
96+
Name : String(200),
97+
Type : String(200),
98+
Result : String(200)
99+
);
100+
101+
-- MARK: Microflow using keyword-named enum values in CREATE
102+
103+
CREATE OR REPLACE MICROFLOW KeywordTest.TestKeywordEnumValues ()
104+
RETURNS Nothing
105+
BEGIN
106+
$Issue = CREATE KeywordTest.Data (
107+
Filter = 'test',
108+
Match = 'test',
109+
Status = 'active'
110+
);
111+
112+
$Activity = CREATE KeywordTest.Activity (
113+
Name = 'test',
114+
Type = 'task',
115+
Result = 'ok'
116+
);
117+
END;

mdl/grammar/MDLParser.g4

Lines changed: 11 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -575,8 +575,7 @@ attributeDefinition
575575
attributeName
576576
: IDENTIFIER
577577
| QUOTED_IDENTIFIER // Escape any reserved word ("Range", `Order`)
578-
| commonNameKeyword
579-
| ATTRIBUTE // Allow 'Attribute' as attribute name
578+
| keyword
580579
;
581580

582581
attributeConstraint
@@ -680,7 +679,7 @@ indexAttribute
680679
indexColumnName
681680
: IDENTIFIER
682681
| QUOTED_IDENTIFIER // Escape any reserved word
683-
| commonNameKeyword
682+
| keyword
684683
;
685684

686685
createAssociationStatement
@@ -790,15 +789,12 @@ enumerationValue
790789
: docComment? enumValueName (CAPTION? STRING_LITERAL)?
791790
;
792791

793-
// Allow reserved keywords as enumeration value names
792+
// Allow reserved keywords as enumeration value names.
793+
// Uses the full `keyword` rule so any lexer token can appear as an enum value name.
794794
enumValueName
795795
: IDENTIFIER
796796
| QUOTED_IDENTIFIER // Escape any reserved word
797-
| commonNameKeyword
798-
| SERVICE | SERVICES // OData/auth keywords used as enum values
799-
| GUEST | SESSION | BASIC | CLIENT | CLIENTS
800-
| PUBLISH | EXPOSE | EXTERNAL | PAGING | HEADERS
801-
| DISPLAY | STRUCTURE // Layout/structure keywords used as enum values
797+
| keyword
802798
;
803799

804800
enumerationOptions
@@ -837,7 +833,7 @@ imageCollectionItem
837833
imageName
838834
: IDENTIFIER
839835
| QUOTED_IDENTIFIER
840-
| commonNameKeyword
836+
| keyword
841837
;
842838

843839
// =============================================================================
@@ -1094,7 +1090,7 @@ microflowParameter
10941090
parameterName
10951091
: IDENTIFIER
10961092
| QUOTED_IDENTIFIER // Escape any reserved word
1097-
| commonNameKeyword
1093+
| keyword
10981094
;
10991095

11001096
microflowReturnType
@@ -1733,7 +1729,7 @@ memberAttributeName
17331729
: qualifiedName
17341730
| IDENTIFIER
17351731
| QUOTED_IDENTIFIER // Escape any reserved word
1736-
| commonNameKeyword
1732+
| keyword
17371733
;
17381734

17391735
// Legacy changeList for backwards compatibility
@@ -3081,7 +3077,7 @@ selectItem
30813077
// Allow keywords as aliases in SELECT
30823078
selectAlias
30833079
: IDENTIFIER
3084-
| commonNameKeyword
3080+
| keyword
30853081
;
30863082

30873083
fromClause
@@ -3514,33 +3510,8 @@ annotationValue
35143510
| qualifiedName
35153511
;
35163512

3517-
/**
3518-
* Keywords commonly used as attribute, parameter, enum value, and column names.
3519-
* Excludes DDL keywords (CREATE, ALTER, DROP, ENTITY, etc.) and flow control
3520-
* keywords (BEGIN, END, IF, RETURN, etc.) that would cause parser ambiguity
3521-
* when used in entity/microflow body contexts.
3522-
*/
3523-
commonNameKeyword
3524-
: STATUS | TYPE | VALUE | INDEX // Common data keywords
3525-
| USERNAME | PASSWORD // User-related keywords
3526-
| COUNT | SUM | AVG | MIN | MAX // Aggregate function names
3527-
| ACTION | MESSAGE // Common entity attribute names
3528-
| OWNER | REFERENCE | CASCADE // Association keywords
3529-
| SUCCESS | ERROR | WARNING | INFO | DEBUG | CRITICAL // Log/status keywords
3530-
| DESCRIPTION | ROLE | LEVEL | ACCESS | USER // Security keywords
3531-
| CAPTION | CONTENT | LABEL | TITLE | TEXT // Display/UI keywords
3532-
| FORMAT | RANGE | SOURCE_KW | CHECK // Validation/data keywords
3533-
| FOLDER | NAVIGATION | HOME | VERSION | PRODUCTION // Structure/config keywords
3534-
| SELECTION | EDITABLE | VISIBLE | DATASOURCE // Widget property keywords
3535-
| TABLETWIDTH | PHONEWIDTH // Responsive width keywords
3536-
| WIDTH | HEIGHT | STYLE | CLASS // Styling keywords
3537-
| BOTH | SINGLE | MULTIPLE | NONE // Cardinality keywords
3538-
| PROTOTYPE | OFF // Security level keywords
3539-
| STORAGE | TABLE // Association storage keywords
3540-
| URL | POSITION | SORT // Common attribute names
3541-
;
3542-
3543-
/** Keywords that can be used as identifiers in certain contexts (module/entity names via qualifiedName).
3513+
/** Keywords that can be used as identifiers in certain contexts (module/entity names via qualifiedName,
3514+
* attribute names, enum values, parameter names, etc.).
35443515
* Every word-type lexer token must appear here so that user-defined names (entity, attribute,
35453516
* enum value, module) that happen to match a keyword can still be parsed.
35463517
* Maintain alphabetical order within each group for easy auditing.
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
package grammar
2+
3+
import (
4+
"bufio"
5+
"os"
6+
"regexp"
7+
"strings"
8+
"testing"
9+
)
10+
11+
// TestKeywordRuleCoverage verifies that every lexer token (except structural
12+
// ones like operators, punctuation, literals, and identifiers) is listed in
13+
// the parser's `keyword` rule. This catches the common mistake of adding a
14+
// new token to MDLLexer.g4 but forgetting to add it to the keyword rule,
15+
// which would prevent it from being used as an identifier.
16+
func TestKeywordRuleCoverage(t *testing.T) {
17+
allTokens := parseLexerTokens(t, "parser/MDLLexer.tokens")
18+
keywordTokens := parseKeywordRule(t, "MDLParser.g4")
19+
20+
// Structural tokens that should NOT be in the keyword rule.
21+
excluded := map[string]bool{
22+
// Whitespace & comments
23+
"WS": true, "DOC_COMMENT": true, "BLOCK_COMMENT": true, "LINE_COMMENT": true,
24+
// Identifiers & variables
25+
"IDENTIFIER": true, "HYPHENATED_ID": true, "QUOTED_IDENTIFIER": true, "VARIABLE": true,
26+
// Literals
27+
"STRING_LITERAL": true, "DOLLAR_STRING": true, "NUMBER_LITERAL": true, "MENDIX_TOKEN": true,
28+
// Punctuation
29+
"SEMICOLON": true, "COMMA": true, "DOT": true,
30+
"LPAREN": true, "RPAREN": true,
31+
"LBRACE": true, "RBRACE": true,
32+
"LBRACKET": true, "RBRACKET": true,
33+
"COLON": true, "AT": true, "PIPE": true,
34+
"DOUBLE_COLON": true, "ARROW": true, "QUESTION": true, "HASH": true,
35+
// Operators
36+
"NOT_EQUALS": true, "LESS_THAN_OR_EQUAL": true, "GREATER_THAN_OR_EQUAL": true,
37+
"EQUALS": true, "LESS_THAN": true, "GREATER_THAN": true,
38+
"PLUS": true, "MINUS": true, "STAR": true, "SLASH": true, "PERCENT": true,
39+
// Version marker (not an identifier)
40+
"V3": true,
41+
}
42+
43+
// Tokens missing from keyword rule (in lexer but not in keyword).
44+
var missing []string
45+
for _, tok := range allTokens {
46+
if excluded[tok] {
47+
continue
48+
}
49+
if !keywordTokens[tok] {
50+
missing = append(missing, tok)
51+
}
52+
}
53+
54+
// Tokens in keyword rule but not in lexer (typos or stale entries).
55+
var extra []string
56+
allSet := make(map[string]bool, len(allTokens))
57+
for _, tok := range allTokens {
58+
allSet[tok] = true
59+
}
60+
for tok := range keywordTokens {
61+
if !allSet[tok] {
62+
extra = append(extra, tok)
63+
}
64+
}
65+
66+
if len(missing) > 0 {
67+
t.Errorf("tokens in lexer but missing from keyword rule (%d):\n %s\n"+
68+
"Add them to the keyword rule in MDLParser.g4 or to the excluded set in this test.",
69+
len(missing), strings.Join(missing, ", "))
70+
}
71+
if len(extra) > 0 {
72+
t.Errorf("tokens in keyword rule but not in lexer (%d):\n %s",
73+
len(extra), strings.Join(extra, ", "))
74+
}
75+
}
76+
77+
// parseLexerTokens reads MDLLexer.tokens and returns all symbolic token names
78+
// (skipping literal aliases like '<='=515).
79+
func parseLexerTokens(t *testing.T, path string) []string {
80+
t.Helper()
81+
f, err := os.Open(path)
82+
if err != nil {
83+
t.Fatalf("open %s: %v", path, err)
84+
}
85+
defer f.Close()
86+
87+
var tokens []string
88+
scanner := bufio.NewScanner(f)
89+
for scanner.Scan() {
90+
line := strings.TrimSpace(scanner.Text())
91+
if line == "" || strings.HasPrefix(line, "'") {
92+
continue
93+
}
94+
if idx := strings.IndexByte(line, '='); idx > 0 {
95+
tokens = append(tokens, line[:idx])
96+
}
97+
}
98+
if err := scanner.Err(); err != nil {
99+
t.Fatalf("scan %s: %v", path, err)
100+
}
101+
return tokens
102+
}
103+
104+
// parseKeywordRule reads MDLParser.g4, extracts the `keyword` rule body, and
105+
// returns the set of token names referenced in it.
106+
func parseKeywordRule(t *testing.T, path string) map[string]bool {
107+
t.Helper()
108+
data, err := os.ReadFile(path)
109+
if err != nil {
110+
t.Fatalf("read %s: %v", path, err)
111+
}
112+
text := string(data)
113+
114+
// Find the keyword rule: starts with "keyword" at the beginning of a line,
115+
// ends with ";".
116+
re := regexp.MustCompile(`(?m)^keyword\b([\s\S]*?);`)
117+
m := re.FindStringSubmatch(text)
118+
if m == nil {
119+
t.Fatal("keyword rule not found in MDLParser.g4")
120+
}
121+
122+
// Strip single-line comments (// ...) to avoid matching words in comments.
123+
body := regexp.MustCompile(`//[^\n]*`).ReplaceAllString(m[1], "")
124+
125+
// Extract all UPPERCASE token references (e.g., ADD, ALTER, STRING_TYPE).
126+
tokenRe := regexp.MustCompile(`\b([A-Z][A-Z0-9_]*)\b`)
127+
matches := tokenRe.FindAllStringSubmatch(body, -1)
128+
129+
result := make(map[string]bool, len(matches))
130+
for _, match := range matches {
131+
result[match[1]] = true
132+
}
133+
return result
134+
}

mdl/grammar/parser/MDLParser.interp

Lines changed: 1 addition & 2 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)