Skip to content

Commit e423be4

Browse files
committed
quoted identifier parsing
1 parent 097a423 commit e423be4

2 files changed

Lines changed: 100 additions & 24 deletions

File tree

test/expected/binary_queries_6.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ SELECT c3, c4 FROM ft1 ORDER BY c3, c1 LIMIT 1; -- should work
124124

125125
ALTER SERVER binary_queries_loopback OPTIONS (SET dbname 'no such database');
126126
SELECT c3, c4 FROM ft1 ORDER BY c3, c1 LIMIT 1; -- should fail
127-
ERROR: pg_clickhouse: DB::Exception: Database `no such database` doesn't exist
127+
ERROR: pg_clickhouse: connection error: DB::Exception: Database `no such database` doesn't exist
128128
ALTER USER MAPPING FOR CURRENT_USER SERVER binary_queries_loopback OPTIONS (ADD user 'no such user');
129129
SELECT c3, c4 FROM ft1 ORDER BY c3, c1 LIMIT 1; -- should fail
130130
ERROR: pg_clickhouse: connection error: DB::Exception: no such user: Authentication failed: password is incorrect, or there is no user with such name.

vendor/clickhouse-c/clickhouse.h

Lines changed: 99 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,36 @@ chc__strdup(const chc_alloc *al, const char *s, size_t n, chc_err *err)
527527
return p;
528528
}
529529

530+
/* Copy a quoted identifier body (between the outer quote chars), resolving
531+
* the two escape forms ClickHouse's lexer accepts: a doubled quote stands
532+
* for one literal quote (`` `` `` -> `` ` ``, `""` -> `"`), & `\X` keeps X
533+
* verbatim. n is an upper bound; the resolved length is returned via
534+
* *out_len. */
535+
static char *
536+
chc__strdup_unquote(const chc_alloc *al, const char *s, size_t n, char quote,
537+
size_t *out_len, chc_err *err)
538+
{
539+
char *p = chc__alloc(al, n + 1, err);
540+
if (!p) return NULL;
541+
size_t o = 0;
542+
for (size_t i = 0; i < n; i++) {
543+
char c = s[i];
544+
if (c == '\\' && i + 1 < n) {
545+
p[o++] = s[++i];
546+
continue;
547+
}
548+
if (c == quote && i + 1 < n && s[i + 1] == quote) {
549+
p[o++] = quote;
550+
i++;
551+
continue;
552+
}
553+
p[o++] = c;
554+
}
555+
p[o] = '\0';
556+
*out_len = o;
557+
return p;
558+
}
559+
530560
#ifdef CHC_PROVIDE_STDLIB_ALLOC
531561
#include <stdlib.h>
532562
static void *chc__std_alloc(void *ud, size_t n)
@@ -852,6 +882,10 @@ typedef struct {
852882
chc__tok_kind kind;
853883
const char *start;
854884
size_t len;
885+
/* For CHC__TOK_NAME: 0 = bare identifier; '`' or '"' = quoted, & start/len
886+
* span the body between the outer quotes (still raw -- doubled-quote &
887+
* backslash escapes are resolved when copied out). */
888+
char quote;
855889
} chc__tok;
856890

857891
typedef struct {
@@ -867,37 +901,65 @@ chc__next_tok(chc__lex *lx)
867901
char c = *lx->cur;
868902
if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { lx->cur++; continue; }
869903
const char *st = lx->cur;
870-
if (c == '(') { lx->cur++; return (chc__tok){CHC__TOK_LPAREN, st, 1}; }
871-
if (c == ')') { lx->cur++; return (chc__tok){CHC__TOK_RPAREN, st, 1}; }
872-
if (c == ',') { lx->cur++; return (chc__tok){CHC__TOK_COMMA, st, 1}; }
873-
if (c == '=') { lx->cur++; return (chc__tok){CHC__TOK_EQ, st, 1}; }
904+
if (c == '(') { lx->cur++; return (chc__tok){CHC__TOK_LPAREN, st, 1, 0}; }
905+
if (c == ')') { lx->cur++; return (chc__tok){CHC__TOK_RPAREN, st, 1, 0}; }
906+
if (c == ',') { lx->cur++; return (chc__tok){CHC__TOK_COMMA, st, 1, 0}; }
907+
if (c == '=') { lx->cur++; return (chc__tok){CHC__TOK_EQ, st, 1, 0}; }
874908
if (c == '\'') {
875909
/* single-quoted string; clickhouse-cpp does not escape, so we
876910
* accept anything up to the next unescaped quote. */
877911
lx->cur++;
878912
const char *body = lx->cur;
879913
while (lx->cur < lx->end && *lx->cur != '\'') lx->cur++;
880-
if (lx->cur >= lx->end) return (chc__tok){CHC__TOK_INVALID, st, 0};
914+
if (lx->cur >= lx->end) return (chc__tok){CHC__TOK_INVALID, st, 0, 0};
881915
size_t blen = (size_t) (lx->cur - body);
882916
lx->cur++; /* eat closing ' */
883-
return (chc__tok){CHC__TOK_STRING, body, blen};
917+
return (chc__tok){CHC__TOK_STRING, body, blen, 0};
918+
}
919+
if (c == '`' || c == '"') {
920+
/* Quoted identifier, matching ClickHouse Lexer.cpp `quotedString`:
921+
* doubled quote (`` `` `` or `""`) & backslash-escapes are skipped
922+
* during scanning, resolved at copy time. */
923+
char q = c;
924+
lx->cur++;
925+
const char *body = lx->cur;
926+
while (lx->cur < lx->end) {
927+
char d = *lx->cur;
928+
if (d == '\\') {
929+
lx->cur++;
930+
if (lx->cur < lx->end) lx->cur++;
931+
continue;
932+
}
933+
if (d == q) {
934+
if (lx->cur + 1 < lx->end && lx->cur[1] == q) {
935+
lx->cur += 2;
936+
continue;
937+
}
938+
break;
939+
}
940+
lx->cur++;
941+
}
942+
if (lx->cur >= lx->end) return (chc__tok){CHC__TOK_INVALID, st, 0, 0};
943+
size_t blen = (size_t) (lx->cur - body);
944+
lx->cur++; /* eat closing quote */
945+
return (chc__tok){CHC__TOK_NAME, body, blen, q};
884946
}
885947
if (isalpha((unsigned char) c) || c == '_') {
886948
while (lx->cur < lx->end) {
887949
char d = *lx->cur;
888950
if (!(isalnum((unsigned char) d) || d == '_')) break;
889951
lx->cur++;
890952
}
891-
return (chc__tok){CHC__TOK_NAME, st, (size_t) (lx->cur - st)};
953+
return (chc__tok){CHC__TOK_NAME, st, (size_t) (lx->cur - st), 0};
892954
}
893955
if (isdigit((unsigned char) c) || c == '-') {
894956
lx->cur++;
895957
while (lx->cur < lx->end && isdigit((unsigned char) *lx->cur)) lx->cur++;
896-
return (chc__tok){CHC__TOK_NUMBER, st, (size_t) (lx->cur - st)};
958+
return (chc__tok){CHC__TOK_NUMBER, st, (size_t) (lx->cur - st), 0};
897959
}
898-
return (chc__tok){CHC__TOK_INVALID, st, 0};
960+
return (chc__tok){CHC__TOK_INVALID, st, 0, 0};
899961
}
900-
return (chc__tok){CHC__TOK_EOS, lx->end, 0};
962+
return (chc__tok){CHC__TOK_EOS, lx->end, 0, 0};
901963
}
902964

903965
static chc__tok
@@ -1255,7 +1317,7 @@ chc__parse_type(chc__lex *lx, const chc_alloc *al,
12551317
chc_type **out, chc_err *err)
12561318
{
12571319
chc__tok head = chc__eat_tok(lx);
1258-
if (head.kind != CHC__TOK_NAME)
1320+
if (head.kind != CHC__TOK_NAME || head.quote)
12591321
return chc__err_set(err, CHC_ERR_TYPE, "expected type name");
12601322

12611323
chc_type *t = chc__calloc(al, sizeof *t, err);
@@ -1397,19 +1459,26 @@ chc__parse_type(chc__lex *lx, const chc_alloc *al,
13971459
bool has_field = false;
13981460
if (is_tuple && la.kind == CHC__TOK_NAME) {
13991461
chc__eat_tok(lx);
1400-
chc__tok la2 = chc__peek_tok(lx);
1401-
if (la2.kind == CHC__TOK_NAME) {
1402-
/* `la` is a field-name; `la2` starts the type. */
1462+
if (la.quote) {
1463+
/* `\`x\`` or `"x"` is never a type head, so it must be
1464+
* the field label. */
14031465
field = la;
14041466
has_field = true;
14051467
} else {
1406-
/* `la` was the type's leading NAME (terminal or
1407-
* parametric like `Tuple(LowCardinality(...))`).
1408-
* Put it back & rewind cur to la2's start so the
1409-
* next peek re-lexes la2. */
1410-
lx->peeked = la;
1411-
lx->has_peek = true;
1412-
lx->cur = la2.start;
1468+
chc__tok la2 = chc__peek_tok(lx);
1469+
if (la2.kind == CHC__TOK_NAME) {
1470+
/* `la` is a field-name; `la2` starts the type. */
1471+
field = la;
1472+
has_field = true;
1473+
} else {
1474+
/* `la` was the type's leading NAME (terminal or
1475+
* parametric like `Tuple(LowCardinality(...))`).
1476+
* Put it back & rewind cur to la2's start so the
1477+
* next peek re-lexes la2. */
1478+
lx->peeked = la;
1479+
lx->has_peek = true;
1480+
lx->cur = la2.start;
1481+
}
14131482
}
14141483
}
14151484

@@ -1450,15 +1519,22 @@ chc__parse_type(chc__lex *lx, const chc_alloc *al,
14501519
fn_lens[fn_cap] = 0;
14511520
fn_cap = new_cap;
14521521
if (has_field) {
1453-
fn_buf[fn_cap - 1] = chc__strdup(al, field.start, field.len, err);
1522+
size_t flen = field.len;
1523+
if (field.quote)
1524+
fn_buf[fn_cap - 1] = chc__strdup_unquote(al, field.start,
1525+
field.len, field.quote,
1526+
&flen, err);
1527+
else
1528+
fn_buf[fn_cap - 1] = chc__strdup(al, field.start,
1529+
field.len, err);
14541530
if (!fn_buf[fn_cap - 1]) {
14551531
for (size_t i = 0; i < fn_cap - 1; i++)
14561532
if (fn_buf[i]) al->free(al->ud, fn_buf[i], fn_lens[i] + 1);
14571533
al->free(al->ud, fn_buf, fn_cap * sizeof *fn_buf);
14581534
al->free(al->ud, fn_lens, fn_cap * sizeof *fn_lens);
14591535
chc_type_destroy(t, al); return CHC_ERR_OOM;
14601536
}
1461-
fn_lens[fn_cap - 1] = field.len;
1537+
fn_lens[fn_cap - 1] = flen;
14621538
any_named = true;
14631539
}
14641540
}

0 commit comments

Comments
 (0)