diff --git a/crates/squawk_ide/src/infer.rs b/crates/squawk_ide/src/infer.rs index 4478a1aa..756ffe39 100644 --- a/crates/squawk_ide/src/infer.rs +++ b/crates/squawk_ide/src/infer.rs @@ -131,6 +131,11 @@ mod tests { assert_snapshot!(infer("select E'hello'"), @"text"); } + #[test] + fn unicode_escape_string() { + assert_snapshot!(infer("select U&' \' UESCAPE '!'"), @"text"); + } + #[test] fn boolean_true() { assert_snapshot!(infer("select true"), @"boolean"); @@ -161,6 +166,11 @@ mod tests { assert_snapshot!(infer("select b'100'"), @"bit"); } + #[test] + fn byte_string() { + assert_snapshot!(infer("select x'FF'"), @"bit"); + } + #[test] fn bit_varying() { assert_snapshot!(infer("select b'100'::bit varying"), @"bit"); diff --git a/crates/squawk_parser/src/lexed_str.rs b/crates/squawk_parser/src/lexed_str.rs index 803117ec..8d377622 100644 --- a/crates/squawk_parser/src/lexed_str.rs +++ b/crates/squawk_parser/src/lexed_str.rs @@ -255,12 +255,8 @@ impl<'a> Converter<'a> { "Missing trailing `'` symbol to terminate the hex bit string literal" .into(), ); - } else { - let inside = &token_text[2..token_text.len() - 1]; - if let Some(c) = inside.chars().find(|c| !c.is_ascii_hexdigit()) { - err = Some(format!("\"{c}\" is not a valid hexadecimal digit")); - } } + // digit validation in squawk_syntax SyntaxKind::BYTE_STRING } squawk_lexer::LiteralKind::BitStr { terminated } => { @@ -268,12 +264,8 @@ impl<'a> Converter<'a> { err = Some( "Missing trailing `'` symbol to terminate the bit string literal".into(), ); - } else { - let inside = &token_text[2..token_text.len() - 1]; - if let Some(c) = inside.chars().find(|&c| c != '0' && c != '1') { - err = Some(format!("\"{c}\" is not a valid binary digit")); - } } + // digit validation in squawk_syntax SyntaxKind::BIT_STRING } squawk_lexer::LiteralKind::DollarQuotedString { terminated } => { @@ -298,9 +290,8 @@ impl<'a> Converter<'a> { err = Some( "Missing trailing `'` symbol to terminate the escape string literal".into(), ); - } else { - err = validate_escape_string_unicode_escapes(token_text); } + // unicode escape sequences validated in squawk_syntax SyntaxKind::ESC_STRING } }; @@ -309,32 +300,6 @@ impl<'a> Converter<'a> { } } -fn validate_escape_string_unicode_escapes(token_text: &str) -> Option { - let mut chars = token_text[2..token_text.len() - 1].chars(); - - while let Some(c) = chars.next() { - if c != '\\' { - continue; - } - - let (required, example) = match chars.next() { - Some('u') => (4, r"\uXXXX"), - Some('U') => (8, r"\UXXXXXXXX"), - _ => continue, - }; - - for _ in 0..required { - if !chars.next().is_some_and(|c| c.is_ascii_hexdigit()) { - return Some(format!( - "Unicode escape requires {required} hex digits: {example}" - )); - } - } - } - - None -} - #[cfg(test)] mod tests { use annotate_snippets::{AnnotationKind, Level, Renderer, Snippet, renderer::DecorStyle}; @@ -390,16 +355,6 @@ mod tests { "); } - #[test] - fn hex_invalid_digit() { - assert_snapshot!(lex("select X'1FZ';"), @r#" - error: "Z" is not a valid hexadecimal digit - ╭▸ - 1 │ select X'1FZ'; - ╰╴ ━━━━━━ - "#); - } - #[test] fn unterminated_hex_bit_string_error() { assert_snapshot!(lex("select X'1F;"), @" @@ -420,16 +375,6 @@ mod tests { "); } - #[test] - fn invalid_binary_digit_error() { - assert_snapshot!(lex("select b'0 ';"), @r#" - error: " " is not a valid binary digit - ╭▸ - 1 │ select b'0 '; - ╰╴ ━━━━━ - "#); - } - #[test] fn unterminated_dollar_quoted_string_error() { assert_snapshot!(lex("select $tag$hello;"), @" @@ -459,24 +404,4 @@ mod tests { ╰╴ ━━━━━━━━ "); } - - #[test] - fn invalid_unicode_escape_4_digits_error() { - assert_snapshot!(lex(r"select E'\u00';"), @r" - error: Unicode escape requires 4 hex digits: \uXXXX - ╭▸ - 1 │ select E'\u00'; - ╰╴ ━━━━━━━ - "); - } - - #[test] - fn invalid_unicode_escape_8_digits_error() { - assert_snapshot!(lex(r"select E'\UFFFF';"), @r" - error: Unicode escape requires 8 hex digits: \UXXXXXXXX - ╭▸ - 1 │ select E'\UFFFF'; - ╰╴ ━━━━━━━━━ - "); - } } diff --git a/crates/squawk_syntax/src/snapshots/squawk_syntax__test__bit_string_validation.snap b/crates/squawk_syntax/src/snapshots/squawk_syntax__test__bit_string_validation.snap new file mode 100644 index 00000000..3a315c49 --- /dev/null +++ b/crates/squawk_syntax/src/snapshots/squawk_syntax__test__bit_string_validation.snap @@ -0,0 +1,108 @@ +--- +source: crates/squawk_syntax/src/test.rs +input_file: crates/squawk_syntax/test_data/validation/bit_string.sql +--- +SOURCE_FILE@0..120 + COMMENT@0..5 "-- ok" + WHITESPACE@5..6 "\n" + SELECT@6..18 + SELECT_CLAUSE@6..18 + SELECT_KW@6..12 "select" + WHITESPACE@12..13 " " + TARGET_LIST@13..18 + TARGET@13..18 + LITERAL@13..18 + BIT_STRING@13..18 "b'01'" + SEMICOLON@18..19 ";" + WHITESPACE@19..20 "\n" + SELECT@20..34 + SELECT_CLAUSE@20..34 + SELECT_KW@20..26 "select" + WHITESPACE@26..27 " " + TARGET_LIST@27..34 + TARGET@27..34 + LITERAL@27..34 + BIT_STRING@27..34 "B'1010'" + SEMICOLON@34..35 ";" + WHITESPACE@35..36 "\n" + SELECT@36..46 + SELECT_CLAUSE@36..46 + SELECT_KW@36..42 "select" + WHITESPACE@42..43 " " + TARGET_LIST@43..46 + TARGET@43..46 + LITERAL@43..46 + BIT_STRING@43..46 "b''" + SEMICOLON@46..47 ";" + WHITESPACE@47..49 "\n\n" + COMMENT@49..58 "-- errors" + WHITESPACE@58..59 "\n" + SELECT@59..72 + SELECT_CLAUSE@59..72 + SELECT_KW@59..65 "select" + WHITESPACE@65..66 " " + TARGET_LIST@66..72 + TARGET@66..72 + LITERAL@66..72 + BIT_STRING@66..72 "b'012'" + SEMICOLON@72..73 ";" + WHITESPACE@73..74 "\n" + SELECT@74..87 + SELECT_CLAUSE@74..87 + SELECT_KW@74..80 "select" + WHITESPACE@80..81 " " + TARGET_LIST@81..87 + TARGET@81..87 + LITERAL@81..87 + BIT_STRING@81..87 "b'01A'" + SEMICOLON@87..88 ";" + WHITESPACE@88..89 "\n" + SELECT@89..102 + SELECT_CLAUSE@89..102 + SELECT_KW@89..95 "select" + WHITESPACE@95..96 " " + TARGET_LIST@96..102 + TARGET@96..102 + LITERAL@96..102 + BIT_STRING@96..102 "b'0 1'" + SEMICOLON@102..103 ";" + WHITESPACE@103..104 "\n" + SELECT@104..118 + SELECT_CLAUSE@104..118 + SELECT_KW@104..110 "select" + WHITESPACE@110..111 " " + TARGET_LIST@111..118 + TARGET@111..118 + LITERAL@111..118 + BIT_STRING@111..118 "B'2345'" + SEMICOLON@118..119 ";" + WHITESPACE@119..120 "\n" + +error[syntax-error]: "2" is not a valid binary digit + ╭▸ +7 │ select b'012'; + ╰╴ ━ +error[syntax-error]: "A" is not a valid binary digit + ╭▸ +8 │ select b'01A'; + ╰╴ ━ +error[syntax-error]: " " is not a valid binary digit + ╭▸ +9 │ select b'0 1'; + ╰╴ ━ +error[syntax-error]: "2" is not a valid binary digit + ╭▸ +10 │ select B'2345'; + ╰╴ ━ +error[syntax-error]: "3" is not a valid binary digit + ╭▸ +10 │ select B'2345'; + ╰╴ ━ +error[syntax-error]: "4" is not a valid binary digit + ╭▸ +10 │ select B'2345'; + ╰╴ ━ +error[syntax-error]: "5" is not a valid binary digit + ╭▸ +10 │ select B'2345'; + ╰╴ ━ diff --git a/crates/squawk_syntax/src/snapshots/squawk_syntax__test__byte_string_validation.snap b/crates/squawk_syntax/src/snapshots/squawk_syntax__test__byte_string_validation.snap new file mode 100644 index 00000000..a4530b6b --- /dev/null +++ b/crates/squawk_syntax/src/snapshots/squawk_syntax__test__byte_string_validation.snap @@ -0,0 +1,112 @@ +--- +source: crates/squawk_syntax/src/test.rs +input_file: crates/squawk_syntax/test_data/validation/byte_string.sql +--- +SOURCE_FILE@0..124 + COMMENT@0..5 "-- ok" + WHITESPACE@5..6 "\n" + SELECT@6..18 + SELECT_CLAUSE@6..18 + SELECT_KW@6..12 "select" + WHITESPACE@12..13 " " + TARGET_LIST@13..18 + TARGET@13..18 + LITERAL@13..18 + BYTE_STRING@13..18 "x'1F'" + SEMICOLON@18..19 ";" + WHITESPACE@19..20 "\n" + SELECT@20..38 + SELECT_CLAUSE@20..38 + SELECT_KW@20..26 "select" + WHITESPACE@26..27 " " + TARGET_LIST@27..38 + TARGET@27..38 + LITERAL@27..38 + BYTE_STRING@27..38 "X'deadBEEF'" + SEMICOLON@38..39 ";" + WHITESPACE@39..40 "\n" + SELECT@40..50 + SELECT_CLAUSE@40..50 + SELECT_KW@40..46 "select" + WHITESPACE@46..47 " " + TARGET_LIST@47..50 + TARGET@47..50 + LITERAL@47..50 + BYTE_STRING@47..50 "x''" + SEMICOLON@50..51 ";" + WHITESPACE@51..53 "\n\n" + COMMENT@53..62 "-- errors" + WHITESPACE@62..63 "\n" + SELECT@63..76 + SELECT_CLAUSE@63..76 + SELECT_KW@63..69 "select" + WHITESPACE@69..70 " " + TARGET_LIST@70..76 + TARGET@70..76 + LITERAL@70..76 + BYTE_STRING@70..76 "x'1FZ'" + SEMICOLON@76..77 ";" + WHITESPACE@77..78 "\n" + SELECT@78..90 + SELECT_CLAUSE@78..90 + SELECT_KW@78..84 "select" + WHITESPACE@84..85 " " + TARGET_LIST@85..90 + TARGET@85..90 + LITERAL@85..90 + BYTE_STRING@85..90 "x'1G'" + SEMICOLON@90..91 ";" + WHITESPACE@91..92 "\n" + SELECT@92..106 + SELECT_CLAUSE@92..106 + SELECT_KW@92..98 "select" + WHITESPACE@98..99 " " + TARGET_LIST@99..106 + TARGET@99..106 + LITERAL@99..106 + BYTE_STRING@99..106 "x'1G2H'" + SEMICOLON@106..107 ";" + WHITESPACE@107..108 "\n" + SELECT@108..122 + SELECT_CLAUSE@108..122 + SELECT_KW@108..114 "select" + WHITESPACE@114..115 " " + TARGET_LIST@115..122 + TARGET@115..122 + LITERAL@115..122 + BYTE_STRING@115..122 "X'GHIJ'" + SEMICOLON@122..123 ";" + WHITESPACE@123..124 "\n" + +error[syntax-error]: "Z" is not a valid hexadecimal digit + ╭▸ +7 │ select x'1FZ'; + ╰╴ ━ +error[syntax-error]: "G" is not a valid hexadecimal digit + ╭▸ +8 │ select x'1G'; + ╰╴ ━ +error[syntax-error]: "G" is not a valid hexadecimal digit + ╭▸ +9 │ select x'1G2H'; + ╰╴ ━ +error[syntax-error]: "H" is not a valid hexadecimal digit + ╭▸ +9 │ select x'1G2H'; + ╰╴ ━ +error[syntax-error]: "G" is not a valid hexadecimal digit + ╭▸ +10 │ select X'GHIJ'; + ╰╴ ━ +error[syntax-error]: "H" is not a valid hexadecimal digit + ╭▸ +10 │ select X'GHIJ'; + ╰╴ ━ +error[syntax-error]: "I" is not a valid hexadecimal digit + ╭▸ +10 │ select X'GHIJ'; + ╰╴ ━ +error[syntax-error]: "J" is not a valid hexadecimal digit + ╭▸ +10 │ select X'GHIJ'; + ╰╴ ━ diff --git a/crates/squawk_syntax/src/snapshots/squawk_syntax__test__escape_string_validation.snap b/crates/squawk_syntax/src/snapshots/squawk_syntax__test__escape_string_validation.snap new file mode 100644 index 00000000..f40d74a5 --- /dev/null +++ b/crates/squawk_syntax/src/snapshots/squawk_syntax__test__escape_string_validation.snap @@ -0,0 +1,82 @@ +--- +source: crates/squawk_syntax/src/test.rs +input_file: crates/squawk_syntax/test_data/validation/escape_string.sql +--- +SOURCE_FILE@0..148 + COMMENT@0..5 "-- ok" + WHITESPACE@5..6 "\n" + SELECT@6..27 + SELECT_CLAUSE@6..27 + SELECT_KW@6..12 "select" + WHITESPACE@12..13 " " + TARGET_LIST@13..27 + TARGET@13..27 + LITERAL@13..27 + ESC_STRING@13..27 "e'a\\U00000062'" + SEMICOLON@27..28 ";" + WHITESPACE@28..29 "\n" + SELECT@29..44 + SELECT_CLAUSE@29..44 + SELECT_KW@29..35 "select" + WHITESPACE@35..36 " " + TARGET_LIST@36..44 + TARGET@36..44 + LITERAL@36..44 + ESC_STRING@36..44 "e'\\\\u00'" + SEMICOLON@44..45 ";" + WHITESPACE@45..46 "\n" + SELECT@46..71 + SELECT_CLAUSE@46..71 + SELECT_KW@46..52 "select" + WHITESPACE@52..53 " " + TARGET_LIST@53..71 + TARGET@53..71 + LITERAL@53..71 + ESC_STRING@53..71 "e'no escapes here'" + SEMICOLON@71..72 ";" + WHITESPACE@72..74 "\n\n" + COMMENT@74..83 "-- errors" + WHITESPACE@83..84 "\n" + SELECT@84..98 + SELECT_CLAUSE@84..98 + SELECT_KW@84..90 "select" + WHITESPACE@90..91 " " + TARGET_LIST@91..98 + TARGET@91..98 + LITERAL@91..98 + ESC_STRING@91..98 "E'\\u00'" + SEMICOLON@98..99 ";" + WHITESPACE@99..100 "\n" + SELECT@100..116 + SELECT_CLAUSE@100..116 + SELECT_KW@100..106 "select" + WHITESPACE@106..107 " " + TARGET_LIST@107..116 + TARGET@107..116 + LITERAL@107..116 + ESC_STRING@107..116 "E'\\UFFFF'" + SEMICOLON@116..117 ";" + WHITESPACE@117..118 "\n" + SELECT@118..146 + SELECT_CLAUSE@118..146 + SELECT_KW@118..124 "select" + WHITESPACE@124..125 " " + TARGET_LIST@125..146 + TARGET@125..146 + LITERAL@125..146 + ESC_STRING@125..146 "E'hello \\UFGFF world'" + SEMICOLON@146..147 ";" + WHITESPACE@147..148 "\n" + +error[syntax-error]: Unicode escape requires 4 hex digits: \uXXXX + ╭▸ +7 │ select E'\u00'; + ╰╴ ━━━━ +error[syntax-error]: Unicode escape requires 8 hex digits: \UXXXXXXXX + ╭▸ +8 │ select E'\UFFFF'; + ╰╴ ━━━━━━ +error[syntax-error]: Unicode escape requires 8 hex digits: \UXXXXXXXX + ╭▸ +9 │ select E'hello \UFGFF world'; + ╰╴ ━━━ diff --git a/crates/squawk_syntax/src/snapshots/squawk_syntax__test__validate_string_continuation_validation.snap b/crates/squawk_syntax/src/snapshots/squawk_syntax__test__validate_string_continuation_validation.snap index 2fbdcfa1..87952cf1 100644 --- a/crates/squawk_syntax/src/snapshots/squawk_syntax__test__validate_string_continuation_validation.snap +++ b/crates/squawk_syntax/src/snapshots/squawk_syntax__test__validate_string_continuation_validation.snap @@ -2,7 +2,7 @@ source: crates/squawk_syntax/src/test.rs input_file: crates/squawk_syntax/test_data/validation/validate_string_continuation.sql --- -SOURCE_FILE@0..1140 +SOURCE_FILE@0..1228 COMMENT@0..28 "-- ok strings with ne ..." WHITESPACE@28..29 "\n" SELECT@29..48 @@ -212,259 +212,277 @@ SOURCE_FILE@0..1140 WHITESPACE@534..535 " " STRING@535..541 "'buzz'" SEMICOLON@541..542 ";" - WHITESPACE@542..544 "\n\n" - COMMENT@544..574 "-- error unicode esca ..." - WHITESPACE@574..575 "\n" - SELECT@575..594 - SELECT_CLAUSE@575..594 - SELECT_KW@575..581 "select" - WHITESPACE@581..582 " " - TARGET_LIST@582..594 - TARGET@582..594 - CAST_EXPR@582..594 - PATH_TYPE@582..583 - PATH@582..583 - PATH_SEGMENT@582..583 - NAME_REF@582..583 - IDENT@582..583 "u" - LITERAL@583..594 - STRING@583..588 "'foo'" - WHITESPACE@588..589 " " - STRING@589..594 "'bar'" - SEMICOLON@594..595 ";" - WHITESPACE@595..596 "\n" - SELECT@596..629 - SELECT_CLAUSE@596..629 - SELECT_KW@596..602 "select" - WHITESPACE@602..603 " " - TARGET_LIST@603..629 - TARGET@603..629 - CAST_EXPR@603..629 - PATH_TYPE@603..604 - PATH@603..604 - PATH_SEGMENT@603..604 - NAME_REF@603..604 - IDENT@603..604 "u" - LITERAL@604..629 - STRING@604..609 "'foo'" - WHITESPACE@609..610 " " - COMMENT@610..623 "/* comment */" - WHITESPACE@623..624 " " - STRING@624..629 "'bar'" - SEMICOLON@629..630 ";" - WHITESPACE@630..631 "\n" - SELECT@631..697 - SELECT_CLAUSE@631..697 - SELECT_KW@631..637 "select" - WHITESPACE@637..638 " " - TARGET_LIST@638..697 - TARGET@638..697 - CAST_EXPR@638..697 - PATH_TYPE@638..639 - PATH@638..639 - PATH_SEGMENT@638..639 - NAME_REF@638..639 - IDENT@638..639 "u" - LITERAL@639..697 - STRING@639..646 "'hello'" - WHITESPACE@646..647 " " - COMMENT@647..660 "/* comment */" - WHITESPACE@660..661 " " - STRING@661..666 "'bar'" - WHITESPACE@666..667 " " - COMMENT@667..688 "/* another comment */" - WHITESPACE@688..689 " " - STRING@689..697 "' world'" - SEMICOLON@697..698 ";" - WHITESPACE@698..699 "\n" - SELECT@699..733 - SELECT_CLAUSE@699..733 - SELECT_KW@699..705 "select" - WHITESPACE@705..706 " " - TARGET_LIST@706..733 - TARGET@706..733 - CAST_EXPR@706..733 - PATH_TYPE@706..707 - PATH@706..707 - PATH_SEGMENT@706..707 - NAME_REF@706..707 - IDENT@706..707 "u" - LITERAL@707..733 - STRING@707..714 "'hello'" - WHITESPACE@714..715 " " - COMMENT@715..725 "-- comment" - WHITESPACE@725..726 "\n" - STRING@726..733 "'world'" - SEMICOLON@733..734 ";" - WHITESPACE@734..735 "\n" - SELECT@735..761 - SELECT_CLAUSE@735..761 - SELECT_KW@735..741 "select" - WHITESPACE@741..742 " " - TARGET_LIST@742..761 - TARGET@742..761 - CAST_EXPR@742..761 - PATH_TYPE@742..743 - PATH@742..743 - PATH_SEGMENT@742..743 - NAME_REF@742..743 - IDENT@742..743 "u" - LITERAL@743..761 - STRING@743..748 "'foo'" - WHITESPACE@748..749 " " - STRING@749..754 "'bar'" - WHITESPACE@754..755 " " - STRING@755..761 "'buzz'" - SEMICOLON@761..762 ";" - WHITESPACE@762..764 "\n\n" - COMMENT@764..783 "-- error bit string" - WHITESPACE@783..784 "\n" - SELECT@784..801 - SELECT_CLAUSE@784..801 - SELECT_KW@784..790 "select" - WHITESPACE@790..791 " " - TARGET_LIST@791..801 - TARGET@791..801 - LITERAL@791..801 - BIT_STRING@791..796 "b'01'" - WHITESPACE@796..797 " " - STRING@797..801 "'10'" - SEMICOLON@801..802 ";" - WHITESPACE@802..803 "\n" - SELECT@803..834 - SELECT_CLAUSE@803..834 - SELECT_KW@803..809 "select" - WHITESPACE@809..810 " " - TARGET_LIST@810..834 - TARGET@810..834 - LITERAL@810..834 - BIT_STRING@810..815 "b'01'" - WHITESPACE@815..816 " " - COMMENT@816..829 "/* comment */" - WHITESPACE@829..830 " " - STRING@830..834 "'11'" - SEMICOLON@834..835 ";" - WHITESPACE@835..836 "\n" - SELECT@836..894 - SELECT_CLAUSE@836..894 - SELECT_KW@836..842 "select" - WHITESPACE@842..843 " " - TARGET_LIST@843..894 - TARGET@843..894 - LITERAL@843..894 - BIT_STRING@843..848 "b'01'" - WHITESPACE@848..849 " " - COMMENT@849..862 "/* comment */" - WHITESPACE@862..863 " " - STRING@863..867 "'11'" - WHITESPACE@867..868 " " - COMMENT@868..889 "/* another comment */" - WHITESPACE@889..890 " " - STRING@890..894 "'10'" - SEMICOLON@894..895 ";" - WHITESPACE@895..896 "\n" - SELECT@896..925 - SELECT_CLAUSE@896..925 - SELECT_KW@896..902 "select" - WHITESPACE@902..903 " " - TARGET_LIST@903..925 - TARGET@903..925 - LITERAL@903..925 - BIT_STRING@903..909 "b'111'" - WHITESPACE@909..910 " " - COMMENT@910..920 "-- comment" - WHITESPACE@920..921 "\n" - STRING@921..925 "'10'" - SEMICOLON@925..926 ";" - WHITESPACE@926..927 "\n" - SELECT@927..949 - SELECT_CLAUSE@927..949 - SELECT_KW@927..933 "select" - WHITESPACE@933..934 " " - TARGET_LIST@934..949 - TARGET@934..949 - LITERAL@934..949 - BIT_STRING@934..939 "b'10'" + WHITESPACE@542..543 "\n" + SELECT@543..563 + SELECT_CLAUSE@543..563 + SELECT_KW@543..549 "select" + WHITESPACE@549..550 " " + TARGET_LIST@550..563 + TARGET@550..563 + LITERAL@550..563 + ESC_STRING@550..556 "e'foo'" + WHITESPACE@556..557 "\n" + STRING@557..563 "'\\u00'" + SEMICOLON@563..564 ";" + WHITESPACE@564..566 "\n\n" + COMMENT@566..596 "-- error unicode esca ..." + WHITESPACE@596..597 "\n" + SELECT@597..617 + SELECT_CLAUSE@597..617 + SELECT_KW@597..603 "select" + WHITESPACE@603..604 " " + TARGET_LIST@604..617 + TARGET@604..617 + LITERAL@604..617 + UNICODE_ESC_STRING@604..611 "u&'foo'" + WHITESPACE@611..612 " " + STRING@612..617 "'bar'" + SEMICOLON@617..618 ";" + WHITESPACE@618..619 "\n" + SELECT@619..653 + SELECT_CLAUSE@619..653 + SELECT_KW@619..625 "select" + WHITESPACE@625..626 " " + TARGET_LIST@626..653 + TARGET@626..653 + LITERAL@626..653 + UNICODE_ESC_STRING@626..633 "u&'foo'" + WHITESPACE@633..634 " " + COMMENT@634..647 "/* comment */" + WHITESPACE@647..648 " " + STRING@648..653 "'bar'" + SEMICOLON@653..654 ";" + WHITESPACE@654..655 "\n" + SELECT@655..722 + SELECT_CLAUSE@655..722 + SELECT_KW@655..661 "select" + WHITESPACE@661..662 " " + TARGET_LIST@662..722 + TARGET@662..722 + LITERAL@662..722 + UNICODE_ESC_STRING@662..671 "u&'hello'" + WHITESPACE@671..672 " " + COMMENT@672..685 "/* comment */" + WHITESPACE@685..686 " " + STRING@686..691 "'bar'" + WHITESPACE@691..692 " " + COMMENT@692..713 "/* another comment */" + WHITESPACE@713..714 " " + STRING@714..722 "' world'" + SEMICOLON@722..723 ";" + WHITESPACE@723..724 "\n" + SELECT@724..759 + SELECT_CLAUSE@724..759 + SELECT_KW@724..730 "select" + WHITESPACE@730..731 " " + TARGET_LIST@731..759 + TARGET@731..759 + LITERAL@731..759 + UNICODE_ESC_STRING@731..740 "u&'hello'" + WHITESPACE@740..741 " " + COMMENT@741..751 "-- comment" + WHITESPACE@751..752 "\n" + STRING@752..759 "'world'" + SEMICOLON@759..760 ";" + WHITESPACE@760..761 "\n" + SELECT@761..788 + SELECT_CLAUSE@761..788 + SELECT_KW@761..767 "select" + WHITESPACE@767..768 " " + TARGET_LIST@768..788 + TARGET@768..788 + LITERAL@768..788 + UNICODE_ESC_STRING@768..775 "u&'foo'" + WHITESPACE@775..776 " " + STRING@776..781 "'bar'" + WHITESPACE@781..782 " " + STRING@782..788 "'buzz'" + SEMICOLON@788..789 ";" + WHITESPACE@789..790 "\n" + SELECT@790..811 + SELECT_CLAUSE@790..811 + SELECT_KW@790..796 "select" + WHITESPACE@796..797 " " + TARGET_LIST@797..811 + TARGET@797..811 + LITERAL@797..811 + UNICODE_ESC_STRING@797..804 "u&'foo'" + WHITESPACE@804..805 "\n" + STRING@805..811 "'\\010'" + SEMICOLON@811..812 ";" + WHITESPACE@812..814 "\n\n" + COMMENT@814..833 "-- error bit string" + WHITESPACE@833..834 "\n" + SELECT@834..851 + SELECT_CLAUSE@834..851 + SELECT_KW@834..840 "select" + WHITESPACE@840..841 " " + TARGET_LIST@841..851 + TARGET@841..851 + LITERAL@841..851 + BIT_STRING@841..846 "b'01'" + WHITESPACE@846..847 " " + STRING@847..851 "'10'" + SEMICOLON@851..852 ";" + WHITESPACE@852..853 "\n" + SELECT@853..884 + SELECT_CLAUSE@853..884 + SELECT_KW@853..859 "select" + WHITESPACE@859..860 " " + TARGET_LIST@860..884 + TARGET@860..884 + LITERAL@860..884 + BIT_STRING@860..865 "b'01'" + WHITESPACE@865..866 " " + COMMENT@866..879 "/* comment */" + WHITESPACE@879..880 " " + STRING@880..884 "'11'" + SEMICOLON@884..885 ";" + WHITESPACE@885..886 "\n" + SELECT@886..944 + SELECT_CLAUSE@886..944 + SELECT_KW@886..892 "select" + WHITESPACE@892..893 " " + TARGET_LIST@893..944 + TARGET@893..944 + LITERAL@893..944 + BIT_STRING@893..898 "b'01'" + WHITESPACE@898..899 " " + COMMENT@899..912 "/* comment */" + WHITESPACE@912..913 " " + STRING@913..917 "'11'" + WHITESPACE@917..918 " " + COMMENT@918..939 "/* another comment */" WHITESPACE@939..940 " " STRING@940..944 "'10'" - WHITESPACE@944..945 " " - STRING@945..949 "'11'" - SEMICOLON@949..950 ";" - WHITESPACE@950..952 "\n\n" - COMMENT@952..972 "-- error byte string" - WHITESPACE@972..973 "\n" - SELECT@973..990 - SELECT_CLAUSE@973..990 - SELECT_KW@973..979 "select" - WHITESPACE@979..980 " " - TARGET_LIST@980..990 - TARGET@980..990 - LITERAL@980..990 - BYTE_STRING@980..985 "x'0F'" - WHITESPACE@985..986 " " - STRING@986..990 "'10'" - SEMICOLON@990..991 ";" - WHITESPACE@991..992 "\n" - SELECT@992..1023 - SELECT_CLAUSE@992..1023 - SELECT_KW@992..998 "select" - WHITESPACE@998..999 " " - TARGET_LIST@999..1023 - TARGET@999..1023 - LITERAL@999..1023 - BYTE_STRING@999..1004 "x'01'" - WHITESPACE@1004..1005 " " - COMMENT@1005..1018 "/* comment */" - WHITESPACE@1018..1019 " " - STRING@1019..1023 "'1F'" - SEMICOLON@1023..1024 ";" - WHITESPACE@1024..1025 "\n" - SELECT@1025..1083 - SELECT_CLAUSE@1025..1083 - SELECT_KW@1025..1031 "select" - WHITESPACE@1031..1032 " " - TARGET_LIST@1032..1083 - TARGET@1032..1083 - LITERAL@1032..1083 - BYTE_STRING@1032..1037 "x'01'" - WHITESPACE@1037..1038 " " - COMMENT@1038..1051 "/* comment */" - WHITESPACE@1051..1052 " " - STRING@1052..1056 "'1F'" - WHITESPACE@1056..1057 " " - COMMENT@1057..1078 "/* another comment */" - WHITESPACE@1078..1079 " " - STRING@1079..1083 "'10'" - SEMICOLON@1083..1084 ";" - WHITESPACE@1084..1085 "\n" - SELECT@1085..1114 - SELECT_CLAUSE@1085..1114 - SELECT_KW@1085..1091 "select" - WHITESPACE@1091..1092 " " - TARGET_LIST@1092..1114 - TARGET@1092..1114 - LITERAL@1092..1114 - BYTE_STRING@1092..1098 "x'1F1'" - WHITESPACE@1098..1099 " " - COMMENT@1099..1109 "-- comment" - WHITESPACE@1109..1110 "\n" - STRING@1110..1114 "'10'" - SEMICOLON@1114..1115 ";" - WHITESPACE@1115..1116 "\n" - SELECT@1116..1138 - SELECT_CLAUSE@1116..1138 - SELECT_KW@1116..1122 "select" - WHITESPACE@1122..1123 " " - TARGET_LIST@1123..1138 - TARGET@1123..1138 - LITERAL@1123..1138 - BYTE_STRING@1123..1128 "x'1F'" - WHITESPACE@1128..1129 " " - STRING@1129..1133 "'F0'" - WHITESPACE@1133..1134 " " - STRING@1134..1138 "'11'" - SEMICOLON@1138..1139 ";" - WHITESPACE@1139..1140 "\n" + SEMICOLON@944..945 ";" + WHITESPACE@945..946 "\n" + SELECT@946..975 + SELECT_CLAUSE@946..975 + SELECT_KW@946..952 "select" + WHITESPACE@952..953 " " + TARGET_LIST@953..975 + TARGET@953..975 + LITERAL@953..975 + BIT_STRING@953..959 "b'111'" + WHITESPACE@959..960 " " + COMMENT@960..970 "-- comment" + WHITESPACE@970..971 "\n" + STRING@971..975 "'10'" + SEMICOLON@975..976 ";" + WHITESPACE@976..977 "\n" + SELECT@977..999 + SELECT_CLAUSE@977..999 + SELECT_KW@977..983 "select" + WHITESPACE@983..984 " " + TARGET_LIST@984..999 + TARGET@984..999 + LITERAL@984..999 + BIT_STRING@984..989 "b'10'" + WHITESPACE@989..990 " " + STRING@990..994 "'10'" + WHITESPACE@994..995 " " + STRING@995..999 "'11'" + SEMICOLON@999..1000 ";" + WHITESPACE@1000..1001 "\n" + SELECT@1001..1018 + SELECT_CLAUSE@1001..1018 + SELECT_KW@1001..1007 "select" + WHITESPACE@1007..1008 " " + TARGET_LIST@1008..1018 + TARGET@1008..1018 + LITERAL@1008..1018 + BIT_STRING@1008..1013 "b'10'" + WHITESPACE@1013..1014 "\n" + STRING@1014..1018 "'20'" + SEMICOLON@1018..1019 ";" + WHITESPACE@1019..1021 "\n\n" + COMMENT@1021..1041 "-- error byte string" + WHITESPACE@1041..1042 "\n" + SELECT@1042..1059 + SELECT_CLAUSE@1042..1059 + SELECT_KW@1042..1048 "select" + WHITESPACE@1048..1049 " " + TARGET_LIST@1049..1059 + TARGET@1049..1059 + LITERAL@1049..1059 + BYTE_STRING@1049..1054 "x'0F'" + WHITESPACE@1054..1055 " " + STRING@1055..1059 "'10'" + SEMICOLON@1059..1060 ";" + WHITESPACE@1060..1061 "\n" + SELECT@1061..1092 + SELECT_CLAUSE@1061..1092 + SELECT_KW@1061..1067 "select" + WHITESPACE@1067..1068 " " + TARGET_LIST@1068..1092 + TARGET@1068..1092 + LITERAL@1068..1092 + BYTE_STRING@1068..1073 "x'01'" + WHITESPACE@1073..1074 " " + COMMENT@1074..1087 "/* comment */" + WHITESPACE@1087..1088 " " + STRING@1088..1092 "'1F'" + SEMICOLON@1092..1093 ";" + WHITESPACE@1093..1094 "\n" + SELECT@1094..1152 + SELECT_CLAUSE@1094..1152 + SELECT_KW@1094..1100 "select" + WHITESPACE@1100..1101 " " + TARGET_LIST@1101..1152 + TARGET@1101..1152 + LITERAL@1101..1152 + BYTE_STRING@1101..1106 "x'01'" + WHITESPACE@1106..1107 " " + COMMENT@1107..1120 "/* comment */" + WHITESPACE@1120..1121 " " + STRING@1121..1125 "'1F'" + WHITESPACE@1125..1126 " " + COMMENT@1126..1147 "/* another comment */" + WHITESPACE@1147..1148 " " + STRING@1148..1152 "'10'" + SEMICOLON@1152..1153 ";" + WHITESPACE@1153..1154 "\n" + SELECT@1154..1183 + SELECT_CLAUSE@1154..1183 + SELECT_KW@1154..1160 "select" + WHITESPACE@1160..1161 " " + TARGET_LIST@1161..1183 + TARGET@1161..1183 + LITERAL@1161..1183 + BYTE_STRING@1161..1167 "x'1F1'" + WHITESPACE@1167..1168 " " + COMMENT@1168..1178 "-- comment" + WHITESPACE@1178..1179 "\n" + STRING@1179..1183 "'10'" + SEMICOLON@1183..1184 ";" + WHITESPACE@1184..1185 "\n" + SELECT@1185..1207 + SELECT_CLAUSE@1185..1207 + SELECT_KW@1185..1191 "select" + WHITESPACE@1191..1192 " " + TARGET_LIST@1192..1207 + TARGET@1192..1207 + LITERAL@1192..1207 + BYTE_STRING@1192..1197 "x'1F'" + WHITESPACE@1197..1198 " " + STRING@1198..1202 "'F0'" + WHITESPACE@1202..1203 " " + STRING@1203..1207 "'11'" + SEMICOLON@1207..1208 ";" + WHITESPACE@1208..1209 "\n" + SELECT@1209..1226 + SELECT_CLAUSE@1209..1226 + SELECT_KW@1209..1215 "select" + WHITESPACE@1215..1216 " " + TARGET_LIST@1216..1226 + TARGET@1216..1226 + LITERAL@1216..1226 + BYTE_STRING@1216..1221 "x'1F'" + WHITESPACE@1221..1222 "\n" + STRING@1222..1226 "'G0'" + SEMICOLON@1226..1227 ";" + WHITESPACE@1227..1228 "\n" error[syntax-error]: Expected new line or comma between string literals ╭▸ @@ -522,107 +540,103 @@ error[syntax-error]: Expected new line or comma between string literals ╭▸ 27 │ select e'foo' 'bar' 'buzz'; ╰╴ ━ -error[syntax-error]: unknown literal prefix +error[syntax-error]: Unicode escape requires 4 hex digits: \uXXXX ╭▸ -30 │ select u'foo' 'bar'; - ╰╴ ━ +29 │ '\u00'; + ╰╴ ━━━━ error[syntax-error]: Expected new line or comma between string literals ╭▸ -30 │ select u'foo' 'bar'; - ╰╴ ━ -error[syntax-error]: unknown literal prefix - ╭▸ -31 │ select u'foo' /* comment */ 'bar'; - ╰╴ ━ +32 │ select u&'foo' 'bar'; + ╰╴ ━ error[syntax-error]: Comments between string literals are not allowed. ╭▸ -31 │ select u'foo' /* comment */ 'bar'; - ╰╴ ━━━━━━━━━━━━━ -error[syntax-error]: unknown literal prefix - ╭▸ -32 │ select u'hello' /* comment */ 'bar' /* another comment */ ' world'; - ╰╴ ━ +33 │ select u&'foo' /* comment */ 'bar'; + ╰╴ ━━━━━━━━━━━━━ error[syntax-error]: Comments between string literals are not allowed. ╭▸ -32 │ select u'hello' /* comment */ 'bar' /* another comment */ ' world'; - ╰╴ ━━━━━━━━━━━━━ +34 │ select u&'hello' /* comment */ 'bar' /* another comment */ ' world'; + ╰╴ ━━━━━━━━━━━━━ error[syntax-error]: Comments between string literals are not allowed. ╭▸ -32 │ select u'hello' /* comment */ 'bar' /* another comment */ ' world'; - ╰╴ ━━━━━━━━━━━━━━━━━━━━━ -error[syntax-error]: unknown literal prefix - ╭▸ -33 │ select u'hello' -- comment - ╰╴ ━ +34 │ select u&'hello' /* comment */ 'bar' /* another comment */ ' world'; + ╰╴ ━━━━━━━━━━━━━━━━━━━━━ error[syntax-error]: Comments between string literals are not allowed. ╭▸ -33 │ select u'hello' -- comment - ╰╴ ━━━━━━━━━━ -error[syntax-error]: unknown literal prefix - ╭▸ -35 │ select u'foo' 'bar' 'buzz'; - ╰╴ ━ +35 │ select u&'hello' -- comment + ╰╴ ━━━━━━━━━━ error[syntax-error]: Expected new line or comma between string literals ╭▸ -35 │ select u'foo' 'bar' 'buzz'; - ╰╴ ━ +37 │ select u&'foo' 'bar' 'buzz'; + ╰╴ ━ error[syntax-error]: Expected new line or comma between string literals ╭▸ -35 │ select u'foo' 'bar' 'buzz'; - ╰╴ ━ +37 │ select u&'foo' 'bar' 'buzz'; + ╰╴ ━ +error[syntax-error]: Unicode escape requires 4 hex digits: \XXXX + ╭▸ +39 │ '\010'; + ╰╴ ━━━━ error[syntax-error]: Expected new line or comma between string literals ╭▸ -38 │ select b'01' '10'; +42 │ select b'01' '10'; ╰╴ ━ error[syntax-error]: Comments between string literals are not allowed. ╭▸ -39 │ select b'01' /* comment */ '11'; +43 │ select b'01' /* comment */ '11'; ╰╴ ━━━━━━━━━━━━━ error[syntax-error]: Comments between string literals are not allowed. ╭▸ -40 │ select b'01' /* comment */ '11' /* another comment */ '10'; +44 │ select b'01' /* comment */ '11' /* another comment */ '10'; ╰╴ ━━━━━━━━━━━━━ error[syntax-error]: Comments between string literals are not allowed. ╭▸ -40 │ select b'01' /* comment */ '11' /* another comment */ '10'; +44 │ select b'01' /* comment */ '11' /* another comment */ '10'; ╰╴ ━━━━━━━━━━━━━━━━━━━━━ error[syntax-error]: Comments between string literals are not allowed. ╭▸ -41 │ select b'111' -- comment +45 │ select b'111' -- comment ╰╴ ━━━━━━━━━━ error[syntax-error]: Expected new line or comma between string literals ╭▸ -43 │ select b'10' '10' '11'; +47 │ select b'10' '10' '11'; ╰╴ ━ error[syntax-error]: Expected new line or comma between string literals ╭▸ -43 │ select b'10' '10' '11'; +47 │ select b'10' '10' '11'; ╰╴ ━ +error[syntax-error]: "2" is not a valid binary digit + ╭▸ +49 │ '20'; + ╰╴ ━ error[syntax-error]: Expected new line or comma between string literals ╭▸ -46 │ select x'0F' '10'; +52 │ select x'0F' '10'; ╰╴ ━ error[syntax-error]: Comments between string literals are not allowed. ╭▸ -47 │ select x'01' /* comment */ '1F'; +53 │ select x'01' /* comment */ '1F'; ╰╴ ━━━━━━━━━━━━━ error[syntax-error]: Comments between string literals are not allowed. ╭▸ -48 │ select x'01' /* comment */ '1F' /* another comment */ '10'; +54 │ select x'01' /* comment */ '1F' /* another comment */ '10'; ╰╴ ━━━━━━━━━━━━━ error[syntax-error]: Comments between string literals are not allowed. ╭▸ -48 │ select x'01' /* comment */ '1F' /* another comment */ '10'; +54 │ select x'01' /* comment */ '1F' /* another comment */ '10'; ╰╴ ━━━━━━━━━━━━━━━━━━━━━ error[syntax-error]: Comments between string literals are not allowed. ╭▸ -49 │ select x'1F1' -- comment +55 │ select x'1F1' -- comment ╰╴ ━━━━━━━━━━ error[syntax-error]: Expected new line or comma between string literals ╭▸ -51 │ select x'1F' 'F0' '11'; +57 │ select x'1F' 'F0' '11'; ╰╴ ━ error[syntax-error]: Expected new line or comma between string literals ╭▸ -51 │ select x'1F' 'F0' '11'; +57 │ select x'1F' 'F0' '11'; ╰╴ ━ +error[syntax-error]: "G" is not a valid hexadecimal digit + ╭▸ +59 │ 'G0'; + ╰╴ ━ diff --git a/crates/squawk_syntax/src/validation.rs b/crates/squawk_syntax/src/validation.rs index d73519c2..af27fbff 100644 --- a/crates/squawk_syntax/src/validation.rs +++ b/crates/squawk_syntax/src/validation.rs @@ -178,10 +178,140 @@ fn validate_literal(lit: ast::Literal, acc: &mut Vec) { } validate_unicode_esc_string(&lit, acc); + validate_prefixed_strings(&lit, acc); +} + +#[derive(Clone, Copy)] +enum PrefixedKind { + Bit, + Byte, + Esc, +} + +fn validate_prefixed_strings(lit: &ast::Literal, acc: &mut Vec) { + let mut continuation: Option = None; + for e in lit.syntax().children_with_tokens() { + let Some(token) = e.into_token() else { + continue; + }; + match token.kind() { + ESC_STRING => { + let Some((inner, inner_start)) = prefixed_str_inner(&token, ['e', 'E']) else { + continue; + }; + validate_escape_string_content(inner, inner_start, acc); + continuation = Some(PrefixedKind::Esc); + } + BIT_STRING => { + let Some((inner, inner_start)) = prefixed_str_inner(&token, ['b', 'B']) else { + continue; + }; + validate_bit_string_content(inner, inner_start, acc); + continuation = Some(PrefixedKind::Bit); + } + BYTE_STRING => { + let Some((inner, inner_start)) = prefixed_str_inner(&token, ['x', 'X']) else { + continue; + }; + validate_byte_string_content(inner, inner_start, acc); + continuation = Some(PrefixedKind::Byte); + } + STRING => { + let Some(continuation) = continuation else { + continue; + }; + let Some(inner) = token + .text() + .strip_prefix('\'') + .and_then(|s| s.strip_suffix('\'')) + else { + continue; + }; + let inner_start = token.text_range().start() + TextSize::new(1); + match continuation { + PrefixedKind::Esc => validate_escape_string_content(inner, inner_start, acc), + PrefixedKind::Bit => validate_bit_string_content(inner, inner_start, acc), + PrefixedKind::Byte => validate_byte_string_content(inner, inner_start, acc), + }; + } + WHITESPACE | COMMENT => (), + _ => continuation = None, + } + } +} + +fn validate_bit_string_content(inner: &str, inner_start: TextSize, acc: &mut Vec) { + for (i, c) in inner.char_indices() { + if c != '0' && c != '1' { + acc.push(SyntaxError::new( + format!("\"{c}\" is not a valid binary digit"), + offset_range(inner_start, i..i + c.len_utf8()), + )); + } + } +} + +fn validate_byte_string_content(inner: &str, inner_start: TextSize, acc: &mut Vec) { + for (i, c) in inner.char_indices() { + if !c.is_ascii_hexdigit() { + acc.push(SyntaxError::new( + format!("\"{c}\" is not a valid hexadecimal digit"), + offset_range(inner_start, i..i + c.len_utf8()), + )); + } + } +} + +fn prefixed_str_inner(token: &SyntaxToken, prefix: [char; 2]) -> Option<(&str, TextSize)> { + let inner = token + .text() + .strip_prefix(prefix) + .and_then(|s| s.strip_prefix('\'')) + .and_then(|s| s.strip_suffix('\''))?; + let inner_start = token.text_range().start() + TextSize::new(2); + Some((inner, inner_start)) +} + +fn validate_escape_string_content(inner: &str, inner_start: TextSize, acc: &mut Vec) { + let mut chars = inner.char_indices().peekable(); + while let Some((esc_start, c)) = chars.next() { + if c != '\\' { + continue; + } + let Some((next_pos, next_c)) = chars.next() else { + return; + }; + let (required, example) = match next_c { + 'u' => (4usize, r"\uXXXX"), + 'U' => (8usize, r"\UXXXXXXXX"), + _ => continue, + }; + let mut end = next_pos + next_c.len_utf8(); + let mut got_all = true; + for _ in 0..required { + match chars.peek() { + Some(&(i, ch)) if ch.is_ascii_hexdigit() => { + end = i + ch.len_utf8(); + chars.next(); + } + _ => { + got_all = false; + break; + } + } + } + if !got_all { + acc.push(SyntaxError::new( + format!("Unicode escape requires {required} hex digits: {example}"), + offset_range(inner_start, esc_start..end), + )); + } + } } fn validate_unicode_esc_string(lit: &ast::Literal, acc: &mut Vec) { let mut unicode_esc = None; + let mut continuations: Vec = vec![]; let mut seen_uescape = false; let mut escape_char = '\\'; for e in lit.syntax().children_with_tokens() { @@ -204,36 +334,55 @@ fn validate_unicode_esc_string(lit: &ast::Literal, acc: &mut Vec) { }; break; } + STRING if unicode_esc.is_some() => continuations.push(token), _ => (), } } let Some(token) = unicode_esc else { return; }; - let text = token.text(); - let Some(inside) = text - .strip_prefix("U&'") - .or_else(|| text.strip_prefix("u&'")) + let Some(inner) = token + .text() + .strip_prefix(['u', 'U']) + .and_then(|s| s.strip_prefix("&'")) .and_then(|s| s.strip_suffix('\'')) else { return; }; - let inside_start = token.text_range().start() + TextSize::new(3); - escape_unicode_esc_str(inside, escape_char, |range, result| { + let inner_start = token.text_range().start() + TextSize::new(3); + escape_unicode_esc_str(inner, escape_char, |range, result| { if let Err(err) = result { acc.push(SyntaxError::new( err.to_string(), - offset_range(inside_start, range), + offset_range(inner_start, range), )); } }); + for cont in continuations { + let Some(cont_inner) = cont + .text() + .strip_prefix('\'') + .and_then(|s| s.strip_suffix('\'')) + else { + continue; + }; + let cont_start = cont.text_range().start() + TextSize::new(1); + escape_unicode_esc_str(cont_inner, escape_char, |range, result| { + if let Err(err) = result { + acc.push(SyntaxError::new( + err.to_string(), + offset_range(cont_start, range), + )); + } + }); + } } fn validate_unicode_esc_ident(token: &SyntaxToken, acc: &mut Vec) { - let text = token.text(); - let Some(inside) = text - .strip_prefix("U&\"") - .or_else(|| text.strip_prefix("u&\"")) + let Some(inner) = token + .text() + .strip_prefix(['u', 'U']) + .and_then(|s| s.strip_prefix("&\"")) .and_then(|s| s.strip_suffix('"')) else { return; @@ -266,12 +415,12 @@ fn validate_unicode_esc_ident(token: &SyntaxToken, acc: &mut Vec) { next = element.next_sibling_or_token(); } - let inside_start = token.text_range().start() + TextSize::new(3); - escape_unicode_esc_str(inside, escape_char, |range, result| { + let inner_start = token.text_range().start() + TextSize::new(3); + escape_unicode_esc_str(inner, escape_char, |range, result| { if let Err(err) = result { acc.push(SyntaxError::new( err.to_string(), - offset_range(inside_start, range), + offset_range(inner_start, range), )); } }); @@ -296,8 +445,7 @@ const fn is_valid_uescape_char(byte: u8) -> bool { } fn uescape_char(string_token: &SyntaxToken) -> Option { - let text = string_token.text(); - let inner = text.strip_prefix('\'')?.strip_suffix('\'')?; + let inner = string_token.text().strip_prefix('\'')?.strip_suffix('\'')?; let &[byte] = inner.as_bytes() else { return None; }; diff --git a/crates/squawk_syntax/test_data/validation/bit_string.sql b/crates/squawk_syntax/test_data/validation/bit_string.sql new file mode 100644 index 00000000..66b65eff --- /dev/null +++ b/crates/squawk_syntax/test_data/validation/bit_string.sql @@ -0,0 +1,10 @@ +-- ok +select b'01'; +select B'1010'; +select b''; + +-- errors +select b'012'; +select b'01A'; +select b'0 1'; +select B'2345'; diff --git a/crates/squawk_syntax/test_data/validation/byte_string.sql b/crates/squawk_syntax/test_data/validation/byte_string.sql new file mode 100644 index 00000000..4538b666 --- /dev/null +++ b/crates/squawk_syntax/test_data/validation/byte_string.sql @@ -0,0 +1,10 @@ +-- ok +select x'1F'; +select X'deadBEEF'; +select x''; + +-- errors +select x'1FZ'; +select x'1G'; +select x'1G2H'; +select X'GHIJ'; diff --git a/crates/squawk_syntax/test_data/validation/escape_string.sql b/crates/squawk_syntax/test_data/validation/escape_string.sql new file mode 100644 index 00000000..6c124078 --- /dev/null +++ b/crates/squawk_syntax/test_data/validation/escape_string.sql @@ -0,0 +1,9 @@ +-- ok +select e'a\U00000062'; +select e'\\u00'; +select e'no escapes here'; + +-- errors +select E'\u00'; +select E'\UFFFF'; +select E'hello \UFGFF world'; diff --git a/crates/squawk_syntax/test_data/validation/validate_string_continuation.sql b/crates/squawk_syntax/test_data/validation/validate_string_continuation.sql index 57f14ca3..ce4e4450 100644 --- a/crates/squawk_syntax/test_data/validation/validate_string_continuation.sql +++ b/crates/squawk_syntax/test_data/validation/validate_string_continuation.sql @@ -25,14 +25,18 @@ select e'hello' /* comment */ 'bar' /* another comment */ ' world'; select e'hello' -- comment 'world'; select e'foo' 'bar' 'buzz'; +select e'foo' +'\u00'; -- error unicode escape string -select u'foo' 'bar'; -select u'foo' /* comment */ 'bar'; -select u'hello' /* comment */ 'bar' /* another comment */ ' world'; -select u'hello' -- comment +select u&'foo' 'bar'; +select u&'foo' /* comment */ 'bar'; +select u&'hello' /* comment */ 'bar' /* another comment */ ' world'; +select u&'hello' -- comment 'world'; -select u'foo' 'bar' 'buzz'; +select u&'foo' 'bar' 'buzz'; +select u&'foo' +'\010'; -- error bit string select b'01' '10'; @@ -41,6 +45,8 @@ select b'01' /* comment */ '11' /* another comment */ '10'; select b'111' -- comment '10'; select b'10' '10' '11'; +select b'10' +'20'; -- error byte string select x'0F' '10'; @@ -49,3 +55,5 @@ select x'01' /* comment */ '1F' /* another comment */ '10'; select x'1F1' -- comment '10'; select x'1F' 'F0' '11'; +select x'1F' +'G0';