Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
source: crates/squawk_syntax/src/test.rs
input_file: crates/squawk_syntax/test_data/validation/unicode_escape_ident.sql
---
SOURCE_FILE@0..243
SOURCE_FILE@0..425
COMMENT@0..5 "-- ok"
WHITESPACE@5..6 "\n"
SELECT@6..32
Expand Down Expand Up @@ -113,6 +113,90 @@ SOURCE_FILE@0..243
IDENT@235..241 "U&\" \\\""
SEMICOLON@241..242 ";"
WHITESPACE@242..243 "\n"
SELECT@243..270
SELECT_CLAUSE@243..270
SELECT_KW@243..249 "select"
WHITESPACE@249..250 " "
TARGET_LIST@250..270
TARGET@250..270
NAME_REF@250..270
IDENT@250..259 "U&\"error\""
WHITESPACE@259..260 " "
UESCAPE_KW@260..267 "UESCAPE"
WHITESPACE@267..268 " "
STRING@268..270 "''"
SEMICOLON@270..271 ";"
WHITESPACE@271..272 "\n"
SELECT@272..300
SELECT_CLAUSE@272..300
SELECT_KW@272..278 "select"
WHITESPACE@278..279 " "
TARGET_LIST@279..300
TARGET@279..300
NAME_REF@279..300
IDENT@279..288 "U&\"error\""
WHITESPACE@288..289 " "
UESCAPE_KW@289..296 "UESCAPE"
WHITESPACE@296..297 " "
STRING@297..300 "' '"
SEMICOLON@300..301 ";"
WHITESPACE@301..302 "\n"
SELECT@302..330
SELECT_CLAUSE@302..330
SELECT_KW@302..308 "select"
WHITESPACE@308..309 " "
TARGET_LIST@309..330
TARGET@309..330
NAME_REF@309..330
IDENT@309..318 "U&\"error\""
WHITESPACE@318..319 " "
UESCAPE_KW@319..326 "UESCAPE"
WHITESPACE@326..327 " "
STRING@327..330 "'+'"
SEMICOLON@330..331 ";"
WHITESPACE@331..332 "\n"
SELECT@332..360
SELECT_CLAUSE@332..360
SELECT_KW@332..338 "select"
WHITESPACE@338..339 " "
TARGET_LIST@339..360
TARGET@339..360
NAME_REF@339..360
IDENT@339..348 "U&\"error\""
WHITESPACE@348..349 " "
UESCAPE_KW@349..356 "UESCAPE"
WHITESPACE@356..357 " "
STRING@357..360 "'A'"
SEMICOLON@360..361 ";"
WHITESPACE@361..362 "\n"
SELECT@362..391
SELECT_CLAUSE@362..391
SELECT_KW@362..368 "select"
WHITESPACE@368..369 " "
TARGET_LIST@369..391
TARGET@369..391
NAME_REF@369..391
IDENT@369..378 "U&\"error\""
WHITESPACE@378..379 " "
UESCAPE_KW@379..386 "UESCAPE"
WHITESPACE@386..387 " "
STRING@387..391 "'é'"
SEMICOLON@391..392 ";"
WHITESPACE@392..393 "\n"
SELECT@393..423
SELECT_CLAUSE@393..423
SELECT_KW@393..399 "select"
WHITESPACE@399..400 " "
TARGET_LIST@400..423
TARGET@400..423
NAME_REF@400..423
IDENT@400..409 "U&\"error\""
WHITESPACE@409..410 " "
UESCAPE_KW@410..417 "UESCAPE"
WHITESPACE@417..418 " "
STRING@418..423 "'foo'"
SEMICOLON@423..424 ";"
WHITESPACE@424..425 "\n"

error[syntax-error]: Unicode escape requires 4 hex digits: \XXXX
╭▸
Expand All @@ -134,3 +218,27 @@ error[syntax-error]: Invalid Unicode escape sequence
╭▸
12 │ select U&" \";
╰╴ ━━━━━━
error[syntax-error]: Invalid unicode escape character
╭▸
13 │ select U&"error" UESCAPE '';
╰╴ ━━
error[syntax-error]: Invalid unicode escape character
╭▸
14 │ select U&"error" UESCAPE ' ';
╰╴ ━━━
error[syntax-error]: Invalid unicode escape character
╭▸
15 │ select U&"error" UESCAPE '+';
╰╴ ━━━
error[syntax-error]: Invalid unicode escape character
╭▸
16 │ select U&"error" UESCAPE 'A';
╰╴ ━━━
error[syntax-error]: Invalid unicode escape character
╭▸
17 │ select U&"error" UESCAPE 'é';
╰╴ ━━━
error[syntax-error]: Invalid unicode escape character
╭▸
18 │ select U&"error" UESCAPE 'foo';
╰╴ ━━━━━
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
source: crates/squawk_syntax/src/test.rs
input_file: crates/squawk_syntax/test_data/validation/unicode_escape_string.sql
---
SOURCE_FILE@0..241
SOURCE_FILE@0..423
COMMENT@0..5 "-- ok"
WHITESPACE@5..6 "\n"
SELECT@6..30
Expand Down Expand Up @@ -113,6 +113,90 @@ SOURCE_FILE@0..241
UNICODE_ESC_STRING@233..239 "U&' \\'"
SEMICOLON@239..240 ";"
WHITESPACE@240..241 "\n"
SELECT@241..268
SELECT_CLAUSE@241..268
SELECT_KW@241..247 "select"
WHITESPACE@247..248 " "
TARGET_LIST@248..268
TARGET@248..268
LITERAL@248..268
UNICODE_ESC_STRING@248..257 "U&'error'"
WHITESPACE@257..258 " "
UESCAPE_KW@258..265 "UESCAPE"
WHITESPACE@265..266 " "
STRING@266..268 "''"
SEMICOLON@268..269 ";"
WHITESPACE@269..270 "\n"
SELECT@270..298
SELECT_CLAUSE@270..298
SELECT_KW@270..276 "select"
WHITESPACE@276..277 " "
TARGET_LIST@277..298
TARGET@277..298
LITERAL@277..298
UNICODE_ESC_STRING@277..286 "U&'error'"
WHITESPACE@286..287 " "
UESCAPE_KW@287..294 "UESCAPE"
WHITESPACE@294..295 " "
STRING@295..298 "' '"
SEMICOLON@298..299 ";"
WHITESPACE@299..300 "\n"
SELECT@300..328
SELECT_CLAUSE@300..328
SELECT_KW@300..306 "select"
WHITESPACE@306..307 " "
TARGET_LIST@307..328
TARGET@307..328
LITERAL@307..328
UNICODE_ESC_STRING@307..316 "U&'error'"
WHITESPACE@316..317 " "
UESCAPE_KW@317..324 "UESCAPE"
WHITESPACE@324..325 " "
STRING@325..328 "'+'"
SEMICOLON@328..329 ";"
WHITESPACE@329..330 "\n"
SELECT@330..358
SELECT_CLAUSE@330..358
SELECT_KW@330..336 "select"
WHITESPACE@336..337 " "
TARGET_LIST@337..358
TARGET@337..358
LITERAL@337..358
UNICODE_ESC_STRING@337..346 "U&'error'"
WHITESPACE@346..347 " "
UESCAPE_KW@347..354 "UESCAPE"
WHITESPACE@354..355 " "
STRING@355..358 "'A'"
SEMICOLON@358..359 ";"
WHITESPACE@359..360 "\n"
SELECT@360..389
SELECT_CLAUSE@360..389
SELECT_KW@360..366 "select"
WHITESPACE@366..367 " "
TARGET_LIST@367..389
TARGET@367..389
LITERAL@367..389
UNICODE_ESC_STRING@367..376 "U&'error'"
WHITESPACE@376..377 " "
UESCAPE_KW@377..384 "UESCAPE"
WHITESPACE@384..385 " "
STRING@385..389 "'é'"
SEMICOLON@389..390 ";"
WHITESPACE@390..391 "\n"
SELECT@391..421
SELECT_CLAUSE@391..421
SELECT_KW@391..397 "select"
WHITESPACE@397..398 " "
TARGET_LIST@398..421
TARGET@398..421
LITERAL@398..421
UNICODE_ESC_STRING@398..407 "U&'error'"
WHITESPACE@407..408 " "
UESCAPE_KW@408..415 "UESCAPE"
WHITESPACE@415..416 " "
STRING@416..421 "'foo'"
SEMICOLON@421..422 ";"
WHITESPACE@422..423 "\n"

error[syntax-error]: Unicode escape requires 4 hex digits: \XXXX
╭▸
Expand All @@ -134,3 +218,27 @@ error[syntax-error]: Invalid Unicode escape sequence
╭▸
12 │ select U&' \';
╰╴ ━━━━━━
error[syntax-error]: Invalid unicode escape character
╭▸
13 │ select U&'error' UESCAPE '';
╰╴ ━━
error[syntax-error]: Invalid unicode escape character
╭▸
14 │ select U&'error' UESCAPE ' ';
╰╴ ━━━
error[syntax-error]: Invalid unicode escape character
╭▸
15 │ select U&'error' UESCAPE '+';
╰╴ ━━━
error[syntax-error]: Invalid unicode escape character
╭▸
16 │ select U&'error' UESCAPE 'A';
╰╴ ━━━
error[syntax-error]: Invalid unicode escape character
╭▸
17 │ select U&'error' UESCAPE 'é';
╰╴ ━━━
error[syntax-error]: Invalid unicode escape character
╭▸
18 │ select U&'error' UESCAPE 'foo';
╰╴ ━━━━━
41 changes: 34 additions & 7 deletions crates/squawk_syntax/src/validation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,15 @@ fn validate_unicode_esc_string(lit: &ast::Literal) -> Option<SyntaxError> {
UNICODE_ESC_STRING => unicode_esc = Some(token),
UESCAPE_KW => seen_uescape = true,
STRING if seen_uescape => {
escape_char = uescape_char(&token).unwrap_or(escape_char);
escape_char = match uescape_char(&token) {
Some(ch) => ch,
None => {
return Some(SyntaxError::new(
"Invalid unicode escape character",
token.text_range(),
));
}
};
break;
}
_ => (),
Expand Down Expand Up @@ -224,7 +232,15 @@ fn validate_unicode_esc_ident(token: &SyntaxToken) -> Option<SyntaxError> {
UESCAPE_KW => seen_uescape = true,
STRING if seen_uescape => {
if let Some(string_token) = element.as_token() {
escape_char = uescape_char(string_token).unwrap_or(escape_char);
escape_char = match uescape_char(string_token) {
Some(ch) => ch,
None => {
return Some(SyntaxError::new(
"Invalid unicode escape character",
string_token.text_range(),
));
}
};
}
break;
}
Expand All @@ -237,14 +253,25 @@ fn validate_unicode_esc_ident(token: &SyntaxToken) -> Option<SyntaxError> {
Some(SyntaxError::new(err.to_string(), token.text_range()))
}

// https://github.com/postgres/postgres/blob/228a1f9542792c6533ef74c2e7aefad0da1d9a7a/src/backend/parser/parser.c#L350
const fn is_valid_uescape_char(byte: u8) -> bool {
!byte.is_ascii_hexdigit()
&& byte != b'+'
&& byte != b'\''
&& byte != b'"'
&& !matches!(
byte,
b' ' | b'\t' | b'\n' | b'\r' | /* b'\v' */ 0x0B | /* b'\f' */ 0x0C
)
}

fn uescape_char(string_token: &SyntaxToken) -> Option<char> {
let text = string_token.text();
let inner = text.strip_prefix('\'')?.strip_suffix('\'')?;
let mut chars = inner.chars();
match (chars.next(), chars.next()) {
(Some(c), None) => Some(c),
_ => None,
}
let &[byte] = inner.as_bytes() else {
return None;
};
is_valid_uescape_char(byte).then(|| char::from(byte))
}

enum UnicodeEscapeKind {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,9 @@ select U&"\+0061";
select U&"wrong: \06" UESCAPE '\';
select U&"wrong: !061" UESCAPE '!';
select U&" \";
select U&"error" UESCAPE '';
select U&"error" UESCAPE ' ';
select U&"error" UESCAPE '+';
select U&"error" UESCAPE 'A';
select U&"error" UESCAPE 'é';
select U&"error" UESCAPE 'foo';
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,9 @@ select U&'\+0061';
select U&'wrong: \06' UESCAPE '\';
select U&'wrong: !061' UESCAPE '!';
select U&' \';
select U&'error' UESCAPE '';
select U&'error' UESCAPE ' ';
select U&'error' UESCAPE '+';
select U&'error' UESCAPE 'A';
select U&'error' UESCAPE 'é';
select U&'error' UESCAPE 'foo';
1 change: 1 addition & 0 deletions crates/xtask/src/sync_pg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ const IGNORED_LINES: &[&str] = &[
"CREATE SUBSCRIPTION regress_testsub CONNECTION 'foo';",
"CREATE SUBSCRIPTION regress_testsub PUBLICATION foo;",
"SELECT U&'wrong: +0061' UESCAPE +;",
"SELECT U&'wrong: +0061' UESCAPE '+';",
"CREATE STATISTICS tst;",
"CREATE STATISTICS tst ON a, b;",
"CREATE STATISTICS tst ON a FROM (VALUES (x)) AS foo;",
Expand Down
2 changes: 1 addition & 1 deletion postgres/regression_suite/strings.sql
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ SELECT 'tricky' AS U&"\" UESCAPE '!';
-- SELECT U&'wrong: \061';
-- SELECT U&'wrong: \+0061';
-- SELECT U&'wrong: +0061' UESCAPE +;
SELECT U&'wrong: +0061' UESCAPE '+';
-- SELECT U&'wrong: +0061' UESCAPE '+';

-- SELECT U&'wrong: \db99';
-- SELECT U&'wrong: \db99xy';
Expand Down
Loading