Skip to content

Commit 4d8db71

Browse files
authored
parser: better unicode escape errors (#1128)
1 parent f93b801 commit 4d8db71

7 files changed

Lines changed: 402 additions & 249 deletions

File tree

β€Žcrates/squawk_syntax/src/snapshots/squawk_syntax__test__unicode_escape_ident_validation.snapβ€Ž

Lines changed: 141 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
source: crates/squawk_syntax/src/test.rs
33
input_file: crates/squawk_syntax/test_data/validation/unicode_escape_ident.sql
44
---
5-
SOURCE_FILE@0..425
5+
SOURCE_FILE@0..489
66
COMMENT@0..5 "-- ok"
77
WHITESPACE@5..6 "\n"
88
SELECT@6..32
@@ -103,142 +103,178 @@ SOURCE_FILE@0..425
103103
STRING@223..226 "'!'"
104104
SEMICOLON@226..227 ";"
105105
WHITESPACE@227..228 "\n"
106-
SELECT@228..241
107-
SELECT_CLAUSE@228..241
106+
SELECT@228..262
107+
SELECT_CLAUSE@228..262
108108
SELECT_KW@228..234 "select"
109109
WHITESPACE@234..235 " "
110-
TARGET_LIST@235..241
111-
TARGET@235..241
112-
NAME_REF@235..241
113-
IDENT@235..241 "U&\" \\\""
114-
SEMICOLON@241..242 ";"
115-
WHITESPACE@242..243 "\n"
116-
SELECT@243..270
117-
SELECT_CLAUSE@243..270
118-
SELECT_KW@243..249 "select"
119-
WHITESPACE@249..250 " "
120-
TARGET_LIST@250..270
121-
TARGET@250..270
122-
NAME_REF@250..270
123-
IDENT@250..259 "U&\"error\""
124-
WHITESPACE@259..260 " "
125-
UESCAPE_KW@260..267 "UESCAPE"
126-
WHITESPACE@267..268 " "
127-
STRING@268..270 "''"
128-
SEMICOLON@270..271 ";"
129-
WHITESPACE@271..272 "\n"
130-
SELECT@272..300
131-
SELECT_CLAUSE@272..300
132-
SELECT_KW@272..278 "select"
133-
WHITESPACE@278..279 " "
134-
TARGET_LIST@279..300
135-
TARGET@279..300
136-
NAME_REF@279..300
137-
IDENT@279..288 "U&\"error\""
138-
WHITESPACE@288..289 " "
139-
UESCAPE_KW@289..296 "UESCAPE"
140-
WHITESPACE@296..297 " "
141-
STRING@297..300 "' '"
142-
SEMICOLON@300..301 ";"
143-
WHITESPACE@301..302 "\n"
144-
SELECT@302..330
145-
SELECT_CLAUSE@302..330
146-
SELECT_KW@302..308 "select"
147-
WHITESPACE@308..309 " "
148-
TARGET_LIST@309..330
149-
TARGET@309..330
150-
NAME_REF@309..330
151-
IDENT@309..318 "U&\"error\""
152-
WHITESPACE@318..319 " "
153-
UESCAPE_KW@319..326 "UESCAPE"
154-
WHITESPACE@326..327 " "
155-
STRING@327..330 "'+'"
156-
SEMICOLON@330..331 ";"
157-
WHITESPACE@331..332 "\n"
158-
SELECT@332..360
159-
SELECT_CLAUSE@332..360
160-
SELECT_KW@332..338 "select"
161-
WHITESPACE@338..339 " "
162-
TARGET_LIST@339..360
163-
TARGET@339..360
164-
NAME_REF@339..360
165-
IDENT@339..348 "U&\"error\""
166-
WHITESPACE@348..349 " "
167-
UESCAPE_KW@349..356 "UESCAPE"
168-
WHITESPACE@356..357 " "
169-
STRING@357..360 "'A'"
170-
SEMICOLON@360..361 ";"
171-
WHITESPACE@361..362 "\n"
172-
SELECT@362..391
173-
SELECT_CLAUSE@362..391
174-
SELECT_KW@362..368 "select"
175-
WHITESPACE@368..369 " "
176-
TARGET_LIST@369..391
177-
TARGET@369..391
178-
NAME_REF@369..391
179-
IDENT@369..378 "U&\"error\""
180-
WHITESPACE@378..379 " "
181-
UESCAPE_KW@379..386 "UESCAPE"
182-
WHITESPACE@386..387 " "
183-
STRING@387..391 "'Γ©'"
184-
SEMICOLON@391..392 ";"
185-
WHITESPACE@392..393 "\n"
186-
SELECT@393..423
187-
SELECT_CLAUSE@393..423
188-
SELECT_KW@393..399 "select"
189-
WHITESPACE@399..400 " "
190-
TARGET_LIST@400..423
191-
TARGET@400..423
192-
NAME_REF@400..423
193-
IDENT@400..409 "U&\"error\""
194-
WHITESPACE@409..410 " "
195-
UESCAPE_KW@410..417 "UESCAPE"
196-
WHITESPACE@417..418 " "
197-
STRING@418..423 "'foo'"
198-
SEMICOLON@423..424 ";"
199-
WHITESPACE@424..425 "\n"
110+
TARGET_LIST@235..262
111+
TARGET@235..262
112+
NAME_REF@235..262
113+
IDENT@235..262 "U&\"many: \\061 \\+0061 ..."
114+
SEMICOLON@262..263 ";"
115+
WHITESPACE@263..264 "\n"
116+
SELECT@264..290
117+
SELECT_CLAUSE@264..290
118+
SELECT_KW@264..270 "select"
119+
WHITESPACE@270..271 " "
120+
TARGET_LIST@271..290
121+
TARGET@271..290
122+
NAME_REF@271..290
123+
IDENT@271..290 "U&\"\\D800\\D801\\DC00\""
124+
SEMICOLON@290..291 ";"
125+
WHITESPACE@291..292 "\n"
126+
SELECT@292..305
127+
SELECT_CLAUSE@292..305
128+
SELECT_KW@292..298 "select"
129+
WHITESPACE@298..299 " "
130+
TARGET_LIST@299..305
131+
TARGET@299..305
132+
NAME_REF@299..305
133+
IDENT@299..305 "U&\" \\\""
134+
SEMICOLON@305..306 ";"
135+
WHITESPACE@306..307 "\n"
136+
SELECT@307..334
137+
SELECT_CLAUSE@307..334
138+
SELECT_KW@307..313 "select"
139+
WHITESPACE@313..314 " "
140+
TARGET_LIST@314..334
141+
TARGET@314..334
142+
NAME_REF@314..334
143+
IDENT@314..323 "U&\"error\""
144+
WHITESPACE@323..324 " "
145+
UESCAPE_KW@324..331 "UESCAPE"
146+
WHITESPACE@331..332 " "
147+
STRING@332..334 "''"
148+
SEMICOLON@334..335 ";"
149+
WHITESPACE@335..336 "\n"
150+
SELECT@336..364
151+
SELECT_CLAUSE@336..364
152+
SELECT_KW@336..342 "select"
153+
WHITESPACE@342..343 " "
154+
TARGET_LIST@343..364
155+
TARGET@343..364
156+
NAME_REF@343..364
157+
IDENT@343..352 "U&\"error\""
158+
WHITESPACE@352..353 " "
159+
UESCAPE_KW@353..360 "UESCAPE"
160+
WHITESPACE@360..361 " "
161+
STRING@361..364 "' '"
162+
SEMICOLON@364..365 ";"
163+
WHITESPACE@365..366 "\n"
164+
SELECT@366..394
165+
SELECT_CLAUSE@366..394
166+
SELECT_KW@366..372 "select"
167+
WHITESPACE@372..373 " "
168+
TARGET_LIST@373..394
169+
TARGET@373..394
170+
NAME_REF@373..394
171+
IDENT@373..382 "U&\"error\""
172+
WHITESPACE@382..383 " "
173+
UESCAPE_KW@383..390 "UESCAPE"
174+
WHITESPACE@390..391 " "
175+
STRING@391..394 "'+'"
176+
SEMICOLON@394..395 ";"
177+
WHITESPACE@395..396 "\n"
178+
SELECT@396..424
179+
SELECT_CLAUSE@396..424
180+
SELECT_KW@396..402 "select"
181+
WHITESPACE@402..403 " "
182+
TARGET_LIST@403..424
183+
TARGET@403..424
184+
NAME_REF@403..424
185+
IDENT@403..412 "U&\"error\""
186+
WHITESPACE@412..413 " "
187+
UESCAPE_KW@413..420 "UESCAPE"
188+
WHITESPACE@420..421 " "
189+
STRING@421..424 "'A'"
190+
SEMICOLON@424..425 ";"
191+
WHITESPACE@425..426 "\n"
192+
SELECT@426..455
193+
SELECT_CLAUSE@426..455
194+
SELECT_KW@426..432 "select"
195+
WHITESPACE@432..433 " "
196+
TARGET_LIST@433..455
197+
TARGET@433..455
198+
NAME_REF@433..455
199+
IDENT@433..442 "U&\"error\""
200+
WHITESPACE@442..443 " "
201+
UESCAPE_KW@443..450 "UESCAPE"
202+
WHITESPACE@450..451 " "
203+
STRING@451..455 "'Γ©'"
204+
SEMICOLON@455..456 ";"
205+
WHITESPACE@456..457 "\n"
206+
SELECT@457..487
207+
SELECT_CLAUSE@457..487
208+
SELECT_KW@457..463 "select"
209+
WHITESPACE@463..464 " "
210+
TARGET_LIST@464..487
211+
TARGET@464..487
212+
NAME_REF@464..487
213+
IDENT@464..473 "U&\"error\""
214+
WHITESPACE@473..474 " "
215+
UESCAPE_KW@474..481 "UESCAPE"
216+
WHITESPACE@481..482 " "
217+
STRING@482..487 "'foo'"
218+
SEMICOLON@487..488 ";"
219+
WHITESPACE@488..489 "\n"
200220

201221
error[syntax-error]: Unicode escape requires 4 hex digits: \XXXX
202222
β•­β–Έ
203223
8 β”‚ select U&"\006";
204-
β•°β•΄ ━━━━━━━━
224+
β•°β•΄ ━━━━
205225
error[syntax-error]: Unicode escape requires 6 hex digits: \+XXXXXX
206226
β•­β–Έ
207227
9 β”‚ select U&"\+0061";
208-
β•°β•΄ ━━━━━━━━━━
228+
β•°β•΄ ━━━━━━
209229
error[syntax-error]: Unicode escape requires 4 hex digits: \XXXX
210230
β•­β–Έ
211231
10 β”‚ select U&"wrong: \06" UESCAPE '\';
212-
β•°β•΄ ━━━━━━━━━━━━━━
232+
β•°β•΄ ━━━
213233
error[syntax-error]: Unicode escape requires 4 hex digits: !XXXX
214234
β•­β–Έ
215235
11 β”‚ select U&"wrong: !061" UESCAPE '!';
216-
β•°β•΄ ━━━━━━━━━━━━━━━
236+
β•°β•΄ ━━━━
237+
error[syntax-error]: Unicode escape requires 4 hex digits: \XXXX
238+
β•­β–Έ
239+
12 β”‚ select U&"many: \061 \+0061 \zzzz";
240+
β•°β•΄ ━━━━
241+
error[syntax-error]: Unicode escape requires 6 hex digits: \+XXXXXX
242+
β•­β–Έ
243+
12 β”‚ select U&"many: \061 \+0061 \zzzz";
244+
β•°β•΄ ━━━━━━
245+
error[syntax-error]: Invalid Unicode escape sequence
246+
β•­β–Έ
247+
12 β”‚ select U&"many: \061 \+0061 \zzzz";
248+
β•°β•΄ ━━
249+
error[syntax-error]: Invalid Unicode surrogate pair
250+
β•­β–Έ
251+
13 β”‚ select U&"\D800\D801\DC00";
252+
β•°β•΄ ━━━━━━━━━━
217253
error[syntax-error]: Invalid Unicode escape sequence
218254
β•­β–Έ
219-
12 β”‚ select U&" \";
220-
β•°β•΄ ━━━━━━
255+
14 β”‚ select U&" \";
256+
β•°β•΄ ━
221257
error[syntax-error]: Invalid unicode escape character
222258
β•­β–Έ
223-
13 β”‚ select U&"error" UESCAPE '';
259+
15 β”‚ select U&"error" UESCAPE '';
224260
β•°β•΄ ━━
225261
error[syntax-error]: Invalid unicode escape character
226262
β•­β–Έ
227-
14 β”‚ select U&"error" UESCAPE ' ';
263+
16 β”‚ select U&"error" UESCAPE ' ';
228264
β•°β•΄ ━━━
229265
error[syntax-error]: Invalid unicode escape character
230266
β•­β–Έ
231-
15 β”‚ select U&"error" UESCAPE '+';
267+
17 β”‚ select U&"error" UESCAPE '+';
232268
β•°β•΄ ━━━
233269
error[syntax-error]: Invalid unicode escape character
234270
β•­β–Έ
235-
16 β”‚ select U&"error" UESCAPE 'A';
271+
18 β”‚ select U&"error" UESCAPE 'A';
236272
β•°β•΄ ━━━
237273
error[syntax-error]: Invalid unicode escape character
238274
β•­β–Έ
239-
17 β”‚ select U&"error" UESCAPE 'Γ©';
275+
19 β”‚ select U&"error" UESCAPE 'Γ©';
240276
β•°β•΄ ━━━
241277
error[syntax-error]: Invalid unicode escape character
242278
β•­β–Έ
243-
18 β”‚ select U&"error" UESCAPE 'foo';
279+
20 β”‚ select U&"error" UESCAPE 'foo';
244280
β•°β•΄ ━━━━━

0 commit comments

Comments
Β (0)