Skip to content

Commit 849449c

Browse files
authored
parser: validate string continuations (#1137)
1 parent 980dc05 commit 849449c

11 files changed

Lines changed: 834 additions & 398 deletions

β€Žcrates/squawk_ide/src/infer.rsβ€Ž

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,11 @@ mod tests {
131131
assert_snapshot!(infer("select E'hello'"), @"text");
132132
}
133133

134+
#[test]
135+
fn unicode_escape_string() {
136+
assert_snapshot!(infer("select U&' \' UESCAPE '!'"), @"text");
137+
}
138+
134139
#[test]
135140
fn boolean_true() {
136141
assert_snapshot!(infer("select true"), @"boolean");
@@ -161,6 +166,11 @@ mod tests {
161166
assert_snapshot!(infer("select b'100'"), @"bit");
162167
}
163168

169+
#[test]
170+
fn byte_string() {
171+
assert_snapshot!(infer("select x'FF'"), @"bit");
172+
}
173+
164174
#[test]
165175
fn bit_varying() {
166176
assert_snapshot!(infer("select b'100'::bit varying"), @"bit");

β€Žcrates/squawk_parser/src/lexed_str.rsβ€Ž

Lines changed: 3 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -255,25 +255,17 @@ impl<'a> Converter<'a> {
255255
"Missing trailing `'` symbol to terminate the hex bit string literal"
256256
.into(),
257257
);
258-
} else {
259-
let inside = &token_text[2..token_text.len() - 1];
260-
if let Some(c) = inside.chars().find(|c| !c.is_ascii_hexdigit()) {
261-
err = Some(format!("\"{c}\" is not a valid hexadecimal digit"));
262-
}
263258
}
259+
// digit validation in squawk_syntax
264260
SyntaxKind::BYTE_STRING
265261
}
266262
squawk_lexer::LiteralKind::BitStr { terminated } => {
267263
if !terminated {
268264
err = Some(
269265
"Missing trailing `'` symbol to terminate the bit string literal".into(),
270266
);
271-
} else {
272-
let inside = &token_text[2..token_text.len() - 1];
273-
if let Some(c) = inside.chars().find(|&c| c != '0' && c != '1') {
274-
err = Some(format!("\"{c}\" is not a valid binary digit"));
275-
}
276267
}
268+
// digit validation in squawk_syntax
277269
SyntaxKind::BIT_STRING
278270
}
279271
squawk_lexer::LiteralKind::DollarQuotedString { terminated } => {
@@ -298,9 +290,8 @@ impl<'a> Converter<'a> {
298290
err = Some(
299291
"Missing trailing `'` symbol to terminate the escape string literal".into(),
300292
);
301-
} else {
302-
err = validate_escape_string_unicode_escapes(token_text);
303293
}
294+
// unicode escape sequences validated in squawk_syntax
304295
SyntaxKind::ESC_STRING
305296
}
306297
};
@@ -309,32 +300,6 @@ impl<'a> Converter<'a> {
309300
}
310301
}
311302

312-
fn validate_escape_string_unicode_escapes(token_text: &str) -> Option<String> {
313-
let mut chars = token_text[2..token_text.len() - 1].chars();
314-
315-
while let Some(c) = chars.next() {
316-
if c != '\\' {
317-
continue;
318-
}
319-
320-
let (required, example) = match chars.next() {
321-
Some('u') => (4, r"\uXXXX"),
322-
Some('U') => (8, r"\UXXXXXXXX"),
323-
_ => continue,
324-
};
325-
326-
for _ in 0..required {
327-
if !chars.next().is_some_and(|c| c.is_ascii_hexdigit()) {
328-
return Some(format!(
329-
"Unicode escape requires {required} hex digits: {example}"
330-
));
331-
}
332-
}
333-
}
334-
335-
None
336-
}
337-
338303
#[cfg(test)]
339304
mod tests {
340305
use annotate_snippets::{AnnotationKind, Level, Renderer, Snippet, renderer::DecorStyle};
@@ -390,16 +355,6 @@ mod tests {
390355
");
391356
}
392357

393-
#[test]
394-
fn hex_invalid_digit() {
395-
assert_snapshot!(lex("select X'1FZ';"), @r#"
396-
error: "Z" is not a valid hexadecimal digit
397-
β•­β–Έ
398-
1 β”‚ select X'1FZ';
399-
β•°β•΄ ━━━━━━
400-
"#);
401-
}
402-
403358
#[test]
404359
fn unterminated_hex_bit_string_error() {
405360
assert_snapshot!(lex("select X'1F;"), @"
@@ -420,16 +375,6 @@ mod tests {
420375
");
421376
}
422377

423-
#[test]
424-
fn invalid_binary_digit_error() {
425-
assert_snapshot!(lex("select b'0 ';"), @r#"
426-
error: " " is not a valid binary digit
427-
β•­β–Έ
428-
1 β”‚ select b'0 ';
429-
β•°β•΄ ━━━━━
430-
"#);
431-
}
432-
433378
#[test]
434379
fn unterminated_dollar_quoted_string_error() {
435380
assert_snapshot!(lex("select $tag$hello;"), @"
@@ -459,24 +404,4 @@ mod tests {
459404
β•°β•΄ ━━━━━━━━
460405
");
461406
}
462-
463-
#[test]
464-
fn invalid_unicode_escape_4_digits_error() {
465-
assert_snapshot!(lex(r"select E'\u00';"), @r"
466-
error: Unicode escape requires 4 hex digits: \uXXXX
467-
β•­β–Έ
468-
1 β”‚ select E'\u00';
469-
β•°β•΄ ━━━━━━━
470-
");
471-
}
472-
473-
#[test]
474-
fn invalid_unicode_escape_8_digits_error() {
475-
assert_snapshot!(lex(r"select E'\UFFFF';"), @r"
476-
error: Unicode escape requires 8 hex digits: \UXXXXXXXX
477-
β•­β–Έ
478-
1 β”‚ select E'\UFFFF';
479-
β•°β•΄ ━━━━━━━━━
480-
");
481-
}
482407
}
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
---
2+
source: crates/squawk_syntax/src/test.rs
3+
input_file: crates/squawk_syntax/test_data/validation/bit_string.sql
4+
---
5+
SOURCE_FILE@0..120
6+
COMMENT@0..5 "-- ok"
7+
WHITESPACE@5..6 "\n"
8+
SELECT@6..18
9+
SELECT_CLAUSE@6..18
10+
SELECT_KW@6..12 "select"
11+
WHITESPACE@12..13 " "
12+
TARGET_LIST@13..18
13+
TARGET@13..18
14+
LITERAL@13..18
15+
BIT_STRING@13..18 "b'01'"
16+
SEMICOLON@18..19 ";"
17+
WHITESPACE@19..20 "\n"
18+
SELECT@20..34
19+
SELECT_CLAUSE@20..34
20+
SELECT_KW@20..26 "select"
21+
WHITESPACE@26..27 " "
22+
TARGET_LIST@27..34
23+
TARGET@27..34
24+
LITERAL@27..34
25+
BIT_STRING@27..34 "B'1010'"
26+
SEMICOLON@34..35 ";"
27+
WHITESPACE@35..36 "\n"
28+
SELECT@36..46
29+
SELECT_CLAUSE@36..46
30+
SELECT_KW@36..42 "select"
31+
WHITESPACE@42..43 " "
32+
TARGET_LIST@43..46
33+
TARGET@43..46
34+
LITERAL@43..46
35+
BIT_STRING@43..46 "b''"
36+
SEMICOLON@46..47 ";"
37+
WHITESPACE@47..49 "\n\n"
38+
COMMENT@49..58 "-- errors"
39+
WHITESPACE@58..59 "\n"
40+
SELECT@59..72
41+
SELECT_CLAUSE@59..72
42+
SELECT_KW@59..65 "select"
43+
WHITESPACE@65..66 " "
44+
TARGET_LIST@66..72
45+
TARGET@66..72
46+
LITERAL@66..72
47+
BIT_STRING@66..72 "b'012'"
48+
SEMICOLON@72..73 ";"
49+
WHITESPACE@73..74 "\n"
50+
SELECT@74..87
51+
SELECT_CLAUSE@74..87
52+
SELECT_KW@74..80 "select"
53+
WHITESPACE@80..81 " "
54+
TARGET_LIST@81..87
55+
TARGET@81..87
56+
LITERAL@81..87
57+
BIT_STRING@81..87 "b'01A'"
58+
SEMICOLON@87..88 ";"
59+
WHITESPACE@88..89 "\n"
60+
SELECT@89..102
61+
SELECT_CLAUSE@89..102
62+
SELECT_KW@89..95 "select"
63+
WHITESPACE@95..96 " "
64+
TARGET_LIST@96..102
65+
TARGET@96..102
66+
LITERAL@96..102
67+
BIT_STRING@96..102 "b'0 1'"
68+
SEMICOLON@102..103 ";"
69+
WHITESPACE@103..104 "\n"
70+
SELECT@104..118
71+
SELECT_CLAUSE@104..118
72+
SELECT_KW@104..110 "select"
73+
WHITESPACE@110..111 " "
74+
TARGET_LIST@111..118
75+
TARGET@111..118
76+
LITERAL@111..118
77+
BIT_STRING@111..118 "B'2345'"
78+
SEMICOLON@118..119 ";"
79+
WHITESPACE@119..120 "\n"
80+
81+
error[syntax-error]: "2" is not a valid binary digit
82+
β•­β–Έ
83+
7 β”‚ select b'012';
84+
β•°β•΄ ━
85+
error[syntax-error]: "A" is not a valid binary digit
86+
β•­β–Έ
87+
8 β”‚ select b'01A';
88+
β•°β•΄ ━
89+
error[syntax-error]: " " is not a valid binary digit
90+
β•­β–Έ
91+
9 β”‚ select b'0 1';
92+
β•°β•΄ ━
93+
error[syntax-error]: "2" is not a valid binary digit
94+
β•­β–Έ
95+
10 β”‚ select B'2345';
96+
β•°β•΄ ━
97+
error[syntax-error]: "3" is not a valid binary digit
98+
β•­β–Έ
99+
10 β”‚ select B'2345';
100+
β•°β•΄ ━
101+
error[syntax-error]: "4" is not a valid binary digit
102+
β•­β–Έ
103+
10 β”‚ select B'2345';
104+
β•°β•΄ ━
105+
error[syntax-error]: "5" is not a valid binary digit
106+
β•­β–Έ
107+
10 β”‚ select B'2345';
108+
β•°β•΄ ━
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
---
2+
source: crates/squawk_syntax/src/test.rs
3+
input_file: crates/squawk_syntax/test_data/validation/byte_string.sql
4+
---
5+
SOURCE_FILE@0..124
6+
COMMENT@0..5 "-- ok"
7+
WHITESPACE@5..6 "\n"
8+
SELECT@6..18
9+
SELECT_CLAUSE@6..18
10+
SELECT_KW@6..12 "select"
11+
WHITESPACE@12..13 " "
12+
TARGET_LIST@13..18
13+
TARGET@13..18
14+
LITERAL@13..18
15+
BYTE_STRING@13..18 "x'1F'"
16+
SEMICOLON@18..19 ";"
17+
WHITESPACE@19..20 "\n"
18+
SELECT@20..38
19+
SELECT_CLAUSE@20..38
20+
SELECT_KW@20..26 "select"
21+
WHITESPACE@26..27 " "
22+
TARGET_LIST@27..38
23+
TARGET@27..38
24+
LITERAL@27..38
25+
BYTE_STRING@27..38 "X'deadBEEF'"
26+
SEMICOLON@38..39 ";"
27+
WHITESPACE@39..40 "\n"
28+
SELECT@40..50
29+
SELECT_CLAUSE@40..50
30+
SELECT_KW@40..46 "select"
31+
WHITESPACE@46..47 " "
32+
TARGET_LIST@47..50
33+
TARGET@47..50
34+
LITERAL@47..50
35+
BYTE_STRING@47..50 "x''"
36+
SEMICOLON@50..51 ";"
37+
WHITESPACE@51..53 "\n\n"
38+
COMMENT@53..62 "-- errors"
39+
WHITESPACE@62..63 "\n"
40+
SELECT@63..76
41+
SELECT_CLAUSE@63..76
42+
SELECT_KW@63..69 "select"
43+
WHITESPACE@69..70 " "
44+
TARGET_LIST@70..76
45+
TARGET@70..76
46+
LITERAL@70..76
47+
BYTE_STRING@70..76 "x'1FZ'"
48+
SEMICOLON@76..77 ";"
49+
WHITESPACE@77..78 "\n"
50+
SELECT@78..90
51+
SELECT_CLAUSE@78..90
52+
SELECT_KW@78..84 "select"
53+
WHITESPACE@84..85 " "
54+
TARGET_LIST@85..90
55+
TARGET@85..90
56+
LITERAL@85..90
57+
BYTE_STRING@85..90 "x'1G'"
58+
SEMICOLON@90..91 ";"
59+
WHITESPACE@91..92 "\n"
60+
SELECT@92..106
61+
SELECT_CLAUSE@92..106
62+
SELECT_KW@92..98 "select"
63+
WHITESPACE@98..99 " "
64+
TARGET_LIST@99..106
65+
TARGET@99..106
66+
LITERAL@99..106
67+
BYTE_STRING@99..106 "x'1G2H'"
68+
SEMICOLON@106..107 ";"
69+
WHITESPACE@107..108 "\n"
70+
SELECT@108..122
71+
SELECT_CLAUSE@108..122
72+
SELECT_KW@108..114 "select"
73+
WHITESPACE@114..115 " "
74+
TARGET_LIST@115..122
75+
TARGET@115..122
76+
LITERAL@115..122
77+
BYTE_STRING@115..122 "X'GHIJ'"
78+
SEMICOLON@122..123 ";"
79+
WHITESPACE@123..124 "\n"
80+
81+
error[syntax-error]: "Z" is not a valid hexadecimal digit
82+
β•­β–Έ
83+
7 β”‚ select x'1FZ';
84+
β•°β•΄ ━
85+
error[syntax-error]: "G" is not a valid hexadecimal digit
86+
β•­β–Έ
87+
8 β”‚ select x'1G';
88+
β•°β•΄ ━
89+
error[syntax-error]: "G" is not a valid hexadecimal digit
90+
β•­β–Έ
91+
9 β”‚ select x'1G2H';
92+
β•°β•΄ ━
93+
error[syntax-error]: "H" is not a valid hexadecimal digit
94+
β•­β–Έ
95+
9 β”‚ select x'1G2H';
96+
β•°β•΄ ━
97+
error[syntax-error]: "G" is not a valid hexadecimal digit
98+
β•­β–Έ
99+
10 β”‚ select X'GHIJ';
100+
β•°β•΄ ━
101+
error[syntax-error]: "H" is not a valid hexadecimal digit
102+
β•­β–Έ
103+
10 β”‚ select X'GHIJ';
104+
β•°β•΄ ━
105+
error[syntax-error]: "I" is not a valid hexadecimal digit
106+
β•­β–Έ
107+
10 β”‚ select X'GHIJ';
108+
β•°β•΄ ━
109+
error[syntax-error]: "J" is not a valid hexadecimal digit
110+
β•­β–Έ
111+
10 β”‚ select X'GHIJ';
112+
β•°β•΄ ━

0 commit comments

Comments
Β (0)