Skip to content

Commit f636532

Browse files
authored
vscode: update syntax highlighting to support more operators (#1047)
1 parent 9d37508 commit f636532

7 files changed

Lines changed: 193 additions & 52 deletions

File tree

PLAN.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,10 @@ sql for benchmarks maybe?
190190

191191
https://github.com/tanelpoder/tpt-postgres/blob/main/demos/pqrsafe.sql
192192

193+
- Absurd
194+
195+
https://github.com/earendil-works/absurd/blob/56500e5a23beca5e976f329475063f24692d99cc/sql/absurd.sql
196+
193197
### CLI
194198

195199
from `deno`

crates/squawk_parser/tests/data/ok/select_operators.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ select 'null' is not null;
272272
select 1 isnull;
273273

274274
-- notnull (non-standard syntax)
275-
select 'foo' isnull;
275+
select 'foo' notnull;
276276

277277
-- is true
278278
select true is true;

crates/squawk_parser/tests/snapshots/tests__select_operators_ok.snap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2661,7 +2661,7 @@ SOURCE_FILE
26612661
LITERAL
26622662
STRING "'foo'"
26632663
WHITESPACE " "
2664-
ISNULL_KW "isnull"
2664+
NOTNULL_KW "notnull"
26652665
SEMICOLON ";"
26662666
WHITESPACE "\n\n"
26672667
COMMENT "-- is true"

crates/squawk_syntax/src/ast/generated/nodes.rs

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/squawk_syntax/src/postgresql.ungram

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,55 @@ IsNot =
442442
'is' 'not'
443443

444444
Op =
445-
'or' | Gteq | '<' | '>' | FatArrow | '=' | 'in' | Neqb | Lteq | '+' | 'overlaps' | 'like' | 'ilike' | NotLike | NotIlike | NotIn | CustomOp | IsDistinctFrom | IsNotDistinctFrom | OperatorCall | 'is' | '^' | '%' | 'and' | '/' | Neq | 'collate' | '-' | ColonEq | ColonColon | 'value' | ':' | IsNot | SimilarTo | NotSimilarTo | AtTimeZone | IsJson | IsJsonValue | IsNotJson | IsJsonObject | IsJsonArray |IsJsonScalar | IsNotJsonValue | IsNotJsonObject | IsNotJsonArray | IsNotJsonScalar
445+
'or'
446+
| '-'
447+
| ':'
448+
| '/'
449+
| '%'
450+
| '^'
451+
| '+'
452+
| '<'
453+
| '='
454+
| '>'
455+
| 'and'
456+
| 'collate'
457+
| 'ilike'
458+
| 'in'
459+
| 'is'
460+
| 'isnull'
461+
| 'like'
462+
| 'overlaps'
463+
| 'value'
464+
| AtTimeZone
465+
| ColonColon
466+
| ColonEq
467+
| CustomOp
468+
| FatArrow
469+
| Gteq
470+
| IsDistinctFrom
471+
| IsJson
472+
| IsJsonArray
473+
| IsJsonObject
474+
| IsJsonScalar
475+
| IsJsonValue
476+
| IsNormalized
477+
| IsNot
478+
| IsNotDistinctFrom
479+
| IsNotJson
480+
| IsNotJsonArray
481+
| IsNotJsonObject
482+
| IsNotJsonScalar
483+
| IsNotJsonValue
484+
| IsNotNormalized
485+
| Lteq
486+
| Neq
487+
| Neqb
488+
| NotIlike
489+
| NotIn
490+
| NotLike
491+
| NotSimilarTo
492+
| OperatorCall
493+
| SimilarTo
446494

447495
IsJson =
448496
'is' 'json' JsonKeysUniqueClause?

crates/xtask/src/codegen.rs

Lines changed: 88 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -900,22 +900,70 @@ fn generate_nodes(nodes: &[AstNodeSrc], enums: &[AstEnumSrc]) -> String {
900900
format!("{PRELUDE}{output}")
901901
}
902902

903-
fn update_textmate_keywords(all_keywords: &[String]) -> Result<()> {
904-
let tmlanguage_path = project_root().join("squawk-vscode/syntaxes/pgsql.tmLanguage.json");
905-
let content = std::fs::read_to_string(&tmlanguage_path)?;
906-
let mut json: serde_json::Value = serde_json::from_str(&content)?;
903+
// Multi-word keyword phrases that should be highlighted as keywords, not
904+
// operators.
905+
const KEYWORD_PHRASES: &[&str] = &["if not exists", "if exists"];
906+
907+
// Multi-word entries must come before their single-word components so the
908+
// regex engine matches the longest form first.
909+
const KEYWORD_OPERATORS: &[&str] = &[
910+
"not between symmetric",
911+
"is not distinct from",
912+
"between symmetric",
913+
"is distinct from",
914+
"not similar to",
915+
"at time zone",
916+
"not between",
917+
"similar to",
918+
"not ilike",
919+
"not like",
920+
"overlaps",
921+
"between",
922+
"collate",
923+
"notnull",
924+
"is not",
925+
"not in",
926+
"isnull",
927+
"ilike",
928+
"like",
929+
"and",
930+
"not",
931+
"in",
932+
"is",
933+
"or",
934+
];
907935

908-
let mut keywords = all_keywords
936+
fn keyword_phrases_match() -> String {
937+
let patterns: Vec<String> = KEYWORD_PHRASES
909938
.iter()
910-
.map(|k| k.to_lowercase())
911-
.collect::<Vec<_>>();
912-
keywords.sort();
939+
.map(|p| p.replace(' ', "\\s+"))
940+
.collect();
941+
format!("(?i)\\b({})\\b", patterns.join("|"))
942+
}
913943

944+
fn operator_match() -> String {
945+
let operator_patterns: Vec<String> = KEYWORD_OPERATORS
946+
.iter()
947+
.map(|op| op.replace(' ', "\\s+"))
948+
.collect();
949+
format!("(?i)\\b({})\\b", operator_patterns.join("|"))
950+
}
951+
952+
fn keywords_match(all_keywords: &[String]) -> String {
953+
let mut keywords: Vec<String> = all_keywords.iter().map(|k| k.to_lowercase()).collect();
954+
keywords.sort();
914955
let keywords_joined = keywords.join("|");
915-
let match_pattern = format!("(?xi)\\b({keywords_joined})\\b");
956+
format!("(?xi)\\b({keywords_joined})\\b")
957+
}
916958

917-
json["repository"]["keywords"]["patterns"][0]["match"] =
918-
serde_json::Value::String(match_pattern);
959+
fn update_textmate_keywords(all_keywords: &[String]) -> Result<()> {
960+
let tmlanguage_path = project_root().join("squawk-vscode/syntaxes/pgsql.tmLanguage.json");
961+
let content = std::fs::read_to_string(&tmlanguage_path)?;
962+
let mut json: serde_json::Value = serde_json::from_str(&content)?;
963+
964+
json["repository"]["keywords"]["patterns"][0]["match"] = keyword_phrases_match().into();
965+
json["repository"]["keywords"]["patterns"][1]["match"] = operator_match().into();
966+
json["repository"]["keywords"]["patterns"][2]["match"] = keywords_match(all_keywords).into();
919967

920968
let output = serde_json::to_string_pretty(&json)?;
921969
std::fs::write(&tmlanguage_path, format!("{output}\n"))?;
@@ -964,3 +1012,32 @@ fn generate_tokens(tokens: &[(&'static str, &'static str)]) -> String {
9641012
let output = reformat(file.to_string()).replace("#[derive", "\n#[derive");
9651013
format!("{PRELUDE}{output}")
9661014
}
1015+
1016+
#[cfg(test)]
1017+
mod tests {
1018+
use super::*;
1019+
1020+
#[track_caller]
1021+
fn assert_sorted(list: &[&str], name: &str) {
1022+
for window in list.windows(2) {
1023+
let (a, b) = (window[0], window[1]);
1024+
assert!(
1025+
a.len() >= b.len(),
1026+
"{name} not sorted by length descending: \
1027+
{a:?} (len {}) comes before {b:?} (len {})",
1028+
a.len(),
1029+
b.len(),
1030+
);
1031+
}
1032+
}
1033+
1034+
#[test]
1035+
fn keyword_operators_sorted_by_length_desc() {
1036+
assert_sorted(KEYWORD_OPERATORS, "KEYWORD_OPERATORS");
1037+
}
1038+
1039+
#[test]
1040+
fn keyword_phrases_sorted_by_length_desc() {
1041+
assert_sorted(KEYWORD_PHRASES, "KEYWORD_PHRASES");
1042+
}
1043+
}

squawk-vscode/syntaxes/pgsql.tmLanguage.json

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,32 @@
5252
}
5353
],
5454
"repository": {
55+
"comments": {
56+
"patterns": [
57+
{
58+
"captures": {
59+
"1": {
60+
"name": "punctuation.definition.comment.pgsql"
61+
}
62+
},
63+
"match": "(--).*$\\n?",
64+
"name": "comment.line.double-dash.pgsql"
65+
},
66+
{
67+
"begin": "/\\*",
68+
"captures": {
69+
"0": {
70+
"name": "punctuation.definition.comment.pgsql"
71+
}
72+
},
73+
"end": "\\*/",
74+
"name": "comment.block.c"
75+
}
76+
]
77+
},
5578
"create_entities": {
5679
"patterns": [
5780
{
58-
"match": "(?i)\\b(create)\\s+(or\\s+replace\\s+)?(function|view)\\s+((?:(?:[\\w]+|\".+\")\\.){0,2}(?:[\\w]+|\".+\"))",
5981
"captures": {
6082
"1": {
6183
"name": "keyword.other.create.pgsql"
@@ -69,10 +91,10 @@
6991
"4": {
7092
"name": "entity.name.function.pgsql"
7193
}
72-
}
94+
},
95+
"match": "(?i)\\b(create)\\s+(or\\s+replace\\s+)?(function|view)\\s+((?:(?:[\\w]+|\".+\")\\.){0,2}(?:[\\w]+|\".+\"))"
7396
},
7497
{
75-
"match": "(?i)\\b(create)\\s+(aggregate|collation|conversion|database|domain|event\\s+trigger|group|(?:unique\\s+)?index|language|operator\\s+class|operator|rule|schema|sequence|(?:(?:global|local)\\s+)?(?:(?:temp|temporary|unlogged)\\s+)?table|tablespace|trigger|type|user|(?:materialized\\s+)?view)\\s+(if\\s+not\\s+exists\\s+)?((?:(?:[\\w]+|\".+\")\\.){0,2}(?:[\\w]+|\".+\"))",
7698
"captures": {
7799
"1": {
78100
"name": "keyword.other.create.pgsql"
@@ -86,30 +108,8 @@
86108
"4": {
87109
"name": "entity.name.function.pgsql"
88110
}
89-
}
90-
}
91-
]
92-
},
93-
"comments": {
94-
"patterns": [
95-
{
96-
"captures": {
97-
"1": {
98-
"name": "punctuation.definition.comment.pgsql"
99-
}
100111
},
101-
"match": "(--).*$\\n?",
102-
"name": "comment.line.double-dash.pgsql"
103-
},
104-
{
105-
"begin": "/\\*",
106-
"captures": {
107-
"0": {
108-
"name": "punctuation.definition.comment.pgsql"
109-
}
110-
},
111-
"end": "\\*/",
112-
"name": "comment.block.c"
112+
"match": "(?i)\\b(create)\\s+(aggregate|collation|conversion|database|domain|event\\s+trigger|group|(?:unique\\s+)?index|language|operator\\s+class|operator|rule|schema|sequence|(?:(?:global|local)\\s+)?(?:(?:temp|temporary|unlogged)\\s+)?table|tablespace|trigger|type|user|(?:materialized\\s+)?view)\\s+(if\\s+not\\s+exists\\s+)?((?:(?:[\\w]+|\".+\")\\.){0,2}(?:[\\w]+|\".+\"))"
113113
}
114114
]
115115
},
@@ -145,6 +145,14 @@
145145
},
146146
"keywords": {
147147
"patterns": [
148+
{
149+
"match": "(?i)\\b(if\\s+not\\s+exists|if\\s+exists)\\b",
150+
"name": "keyword.other.pgsql"
151+
},
152+
{
153+
"match": "(?i)\\b(not\\s+between\\s+symmetric|is\\s+not\\s+distinct\\s+from|between\\s+symmetric|is\\s+distinct\\s+from|not\\s+similar\\s+to|at\\s+time\\s+zone|not\\s+between|similar\\s+to|not\\s+ilike|not\\s+like|overlaps|between|collate|notnull|is\\s+not|not\\s+in|isnull|ilike|like|and|not|in|is|or)\\b",
154+
"name": "keyword.operator.pgsql"
155+
},
148156
{
149157
"captures": {
150158
"1": {
@@ -162,20 +170,12 @@
162170
"name": "constant.numeric.pgsql"
163171
},
164172
{
165-
"match": "\\*",
166-
"name": "keyword.operator.star.pgsql"
167-
},
168-
{
169-
"match": "[!<>]?=|<>|<|>",
170-
"name": "keyword.operator.comparison.pgsql"
171-
},
172-
{
173-
"match": "-|\\+|/",
174-
"name": "keyword.operator.math.pgsql"
173+
"match": "::",
174+
"name": "keyword.operator.cast.pgsql"
175175
},
176176
{
177-
"match": "\\|\\|",
178-
"name": "keyword.operator.concatenator.pgsql"
177+
"match": "[+\\-*/<>=~!@#%\\^&|`?]",
178+
"name": "keyword.operator.pgsql"
179179
}
180180
]
181181
},

0 commit comments

Comments
 (0)