@@ -527,6 +527,36 @@ chc__strdup(const chc_alloc *al, const char *s, size_t n, chc_err *err)
527527 return p ;
528528}
529529
530+ /* Copy a quoted identifier body (between the outer quote chars), resolving
531+ * the two escape forms ClickHouse's lexer accepts: a doubled quote stands
532+ * for one literal quote (`` `` `` -> `` ` ``, `""` -> `"`), & `\X` keeps X
533+ * verbatim. n is an upper bound; the resolved length is returned via
534+ * *out_len. */
535+ static char *
536+ chc__strdup_unquote (const chc_alloc * al , const char * s , size_t n , char quote ,
537+ size_t * out_len , chc_err * err )
538+ {
539+ char * p = chc__alloc (al , n + 1 , err );
540+ if (!p ) return NULL ;
541+ size_t o = 0 ;
542+ for (size_t i = 0 ; i < n ; i ++ ) {
543+ char c = s [i ];
544+ if (c == '\\' && i + 1 < n ) {
545+ p [o ++ ] = s [++ i ];
546+ continue ;
547+ }
548+ if (c == quote && i + 1 < n && s [i + 1 ] == quote ) {
549+ p [o ++ ] = quote ;
550+ i ++ ;
551+ continue ;
552+ }
553+ p [o ++ ] = c ;
554+ }
555+ p [o ] = '\0' ;
556+ * out_len = o ;
557+ return p ;
558+ }
559+
530560#ifdef CHC_PROVIDE_STDLIB_ALLOC
531561#include <stdlib.h>
532562static void * chc__std_alloc (void * ud , size_t n )
@@ -852,6 +882,10 @@ typedef struct {
852882 chc__tok_kind kind ;
853883 const char * start ;
854884 size_t len ;
885+ /* For CHC__TOK_NAME: 0 = bare identifier; '`' or '"' = quoted, & start/len
886+ * span the body between the outer quotes (still raw -- doubled-quote &
887+ * backslash escapes are resolved when copied out). */
888+ char quote ;
855889} chc__tok ;
856890
857891typedef struct {
@@ -867,37 +901,65 @@ chc__next_tok(chc__lex *lx)
867901 char c = * lx -> cur ;
868902 if (c == ' ' || c == '\t' || c == '\n' || c == '\r' ) { lx -> cur ++ ; continue ; }
869903 const char * st = lx -> cur ;
870- if (c == '(' ) { lx -> cur ++ ; return (chc__tok ){CHC__TOK_LPAREN , st , 1 }; }
871- if (c == ')' ) { lx -> cur ++ ; return (chc__tok ){CHC__TOK_RPAREN , st , 1 }; }
872- if (c == ',' ) { lx -> cur ++ ; return (chc__tok ){CHC__TOK_COMMA , st , 1 }; }
873- if (c == '=' ) { lx -> cur ++ ; return (chc__tok ){CHC__TOK_EQ , st , 1 }; }
904+ if (c == '(' ) { lx -> cur ++ ; return (chc__tok ){CHC__TOK_LPAREN , st , 1 , 0 }; }
905+ if (c == ')' ) { lx -> cur ++ ; return (chc__tok ){CHC__TOK_RPAREN , st , 1 , 0 }; }
906+ if (c == ',' ) { lx -> cur ++ ; return (chc__tok ){CHC__TOK_COMMA , st , 1 , 0 }; }
907+ if (c == '=' ) { lx -> cur ++ ; return (chc__tok ){CHC__TOK_EQ , st , 1 , 0 }; }
874908 if (c == '\'' ) {
875909 /* single-quoted string; clickhouse-cpp does not escape, so we
876910 * accept anything up to the next unescaped quote. */
877911 lx -> cur ++ ;
878912 const char * body = lx -> cur ;
879913 while (lx -> cur < lx -> end && * lx -> cur != '\'' ) lx -> cur ++ ;
880- if (lx -> cur >= lx -> end ) return (chc__tok ){CHC__TOK_INVALID , st , 0 };
914+ if (lx -> cur >= lx -> end ) return (chc__tok ){CHC__TOK_INVALID , st , 0 , 0 };
881915 size_t blen = (size_t ) (lx -> cur - body );
882916 lx -> cur ++ ; /* eat closing ' */
883- return (chc__tok ){CHC__TOK_STRING , body , blen };
917+ return (chc__tok ){CHC__TOK_STRING , body , blen , 0 };
918+ }
919+ if (c == '`' || c == '"' ) {
920+ /* Quoted identifier, matching ClickHouse Lexer.cpp `quotedString`:
921+ * doubled quote (`` `` `` or `""`) & backslash-escapes are skipped
922+ * during scanning, resolved at copy time. */
923+ char q = c ;
924+ lx -> cur ++ ;
925+ const char * body = lx -> cur ;
926+ while (lx -> cur < lx -> end ) {
927+ char d = * lx -> cur ;
928+ if (d == '\\' ) {
929+ lx -> cur ++ ;
930+ if (lx -> cur < lx -> end ) lx -> cur ++ ;
931+ continue ;
932+ }
933+ if (d == q ) {
934+ if (lx -> cur + 1 < lx -> end && lx -> cur [1 ] == q ) {
935+ lx -> cur += 2 ;
936+ continue ;
937+ }
938+ break ;
939+ }
940+ lx -> cur ++ ;
941+ }
942+ if (lx -> cur >= lx -> end ) return (chc__tok ){CHC__TOK_INVALID , st , 0 , 0 };
943+ size_t blen = (size_t ) (lx -> cur - body );
944+ lx -> cur ++ ; /* eat closing quote */
945+ return (chc__tok ){CHC__TOK_NAME , body , blen , q };
884946 }
885947 if (isalpha ((unsigned char ) c ) || c == '_' ) {
886948 while (lx -> cur < lx -> end ) {
887949 char d = * lx -> cur ;
888950 if (!(isalnum ((unsigned char ) d ) || d == '_' )) break ;
889951 lx -> cur ++ ;
890952 }
891- return (chc__tok ){CHC__TOK_NAME , st , (size_t ) (lx -> cur - st )};
953+ return (chc__tok ){CHC__TOK_NAME , st , (size_t ) (lx -> cur - st ), 0 };
892954 }
893955 if (isdigit ((unsigned char ) c ) || c == '-' ) {
894956 lx -> cur ++ ;
895957 while (lx -> cur < lx -> end && isdigit ((unsigned char ) * lx -> cur )) lx -> cur ++ ;
896- return (chc__tok ){CHC__TOK_NUMBER , st , (size_t ) (lx -> cur - st )};
958+ return (chc__tok ){CHC__TOK_NUMBER , st , (size_t ) (lx -> cur - st ), 0 };
897959 }
898- return (chc__tok ){CHC__TOK_INVALID , st , 0 };
960+ return (chc__tok ){CHC__TOK_INVALID , st , 0 , 0 };
899961 }
900- return (chc__tok ){CHC__TOK_EOS , lx -> end , 0 };
962+ return (chc__tok ){CHC__TOK_EOS , lx -> end , 0 , 0 };
901963}
902964
903965static chc__tok
@@ -1255,7 +1317,7 @@ chc__parse_type(chc__lex *lx, const chc_alloc *al,
12551317 chc_type * * out , chc_err * err )
12561318{
12571319 chc__tok head = chc__eat_tok (lx );
1258- if (head .kind != CHC__TOK_NAME )
1320+ if (head .kind != CHC__TOK_NAME || head . quote )
12591321 return chc__err_set (err , CHC_ERR_TYPE , "expected type name" );
12601322
12611323 chc_type * t = chc__calloc (al , sizeof * t , err );
@@ -1397,19 +1459,26 @@ chc__parse_type(chc__lex *lx, const chc_alloc *al,
13971459 bool has_field = false;
13981460 if (is_tuple && la .kind == CHC__TOK_NAME ) {
13991461 chc__eat_tok (lx );
1400- chc__tok la2 = chc__peek_tok ( lx );
1401- if ( la2 . kind == CHC__TOK_NAME ) {
1402- /* `la` is a field-name; `la2` starts the type . */
1462+ if ( la . quote ) {
1463+ /* `\`x\`` or `"x"` is never a type head, so it must be
1464+ * the field label . */
14031465 field = la ;
14041466 has_field = true;
14051467 } else {
1406- /* `la` was the type's leading NAME (terminal or
1407- * parametric like `Tuple(LowCardinality(...))`).
1408- * Put it back & rewind cur to la2's start so the
1409- * next peek re-lexes la2. */
1410- lx -> peeked = la ;
1411- lx -> has_peek = true;
1412- lx -> cur = la2 .start ;
1468+ chc__tok la2 = chc__peek_tok (lx );
1469+ if (la2 .kind == CHC__TOK_NAME ) {
1470+ /* `la` is a field-name; `la2` starts the type. */
1471+ field = la ;
1472+ has_field = true;
1473+ } else {
1474+ /* `la` was the type's leading NAME (terminal or
1475+ * parametric like `Tuple(LowCardinality(...))`).
1476+ * Put it back & rewind cur to la2's start so the
1477+ * next peek re-lexes la2. */
1478+ lx -> peeked = la ;
1479+ lx -> has_peek = true;
1480+ lx -> cur = la2 .start ;
1481+ }
14131482 }
14141483 }
14151484
@@ -1450,15 +1519,22 @@ chc__parse_type(chc__lex *lx, const chc_alloc *al,
14501519 fn_lens [fn_cap ] = 0 ;
14511520 fn_cap = new_cap ;
14521521 if (has_field ) {
1453- fn_buf [fn_cap - 1 ] = chc__strdup (al , field .start , field .len , err );
1522+ size_t flen = field .len ;
1523+ if (field .quote )
1524+ fn_buf [fn_cap - 1 ] = chc__strdup_unquote (al , field .start ,
1525+ field .len , field .quote ,
1526+ & flen , err );
1527+ else
1528+ fn_buf [fn_cap - 1 ] = chc__strdup (al , field .start ,
1529+ field .len , err );
14541530 if (!fn_buf [fn_cap - 1 ]) {
14551531 for (size_t i = 0 ; i < fn_cap - 1 ; i ++ )
14561532 if (fn_buf [i ]) al -> free (al -> ud , fn_buf [i ], fn_lens [i ] + 1 );
14571533 al -> free (al -> ud , fn_buf , fn_cap * sizeof * fn_buf );
14581534 al -> free (al -> ud , fn_lens , fn_cap * sizeof * fn_lens );
14591535 chc_type_destroy (t , al ); return CHC_ERR_OOM ;
14601536 }
1461- fn_lens [fn_cap - 1 ] = field . len ;
1537+ fn_lens [fn_cap - 1 ] = flen ;
14621538 any_named = true;
14631539 }
14641540 }
0 commit comments