Skip to content

Commit dbf407d

Browse files
committed
INSERT multi-dimensional arrays
1 parent 6b4a573 commit dbf407d

11 files changed

Lines changed: 417 additions & 98 deletions

CHANGELOG.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@ ALTER EXTENSION pg_clickhouse UPDATE TO '0.3';
2727
* Added pushdown for [fuzzystrmatch] functions `soundex()` and
2828
`levenshtein()` (2-arg, mapped to `editDistance`). Thanks to
2929
Philip Dubé for the PR ([#210]).
30+
* Added multidimensional array support across SELECT and INSERT in both the
31+
binary and http engines. Rectangular ClickHouse `Array(Array(...))` values
32+
now map to PostgreSQL multidimensional arrays, jagged arrays not supported,
33+
and PostgreSQL multidimensional arrays inserted into ClickHouse
34+
`Array(Array(...))` columns preserve their nesting. Thanks to Philip Dubé
35+
for the PR ([#233]).
3036

3137
### 🐞 Bug Fixes
3238

@@ -89,10 +95,10 @@ ALTER EXTENSION pg_clickhouse UPDATE TO '0.3';
8995
"ClickHouse/pg_clickhouse#235 Detect TSV NULL marker before unescaping"
9096
[#231]: https://github.com/ClickHouse/pg_clickhouse/pull/231
9197
"ClickHouse/pg_clickhouse#231 Fix column_name option not being respected by inserts"
92-
[#223]: https://github.com/ClickHouse/pg_clickhouse/pull/223
93-
"pg_clickhouse#223 Fix EXPLAIN (VERBOSE) for window functions"
9498
[#228]: https://github.com/ClickHouse/pg_clickhouse/pull/228
95-
"pg_clickhouse#228 Security: revoke PUBLIC execute on clickhouse_raw_query"
99+
"ClickHouse/pg_clickhouse#228 Security: revoke PUBLIC execute on clickhouse_raw_query"
100+
[#233]: https://github.com/ClickHouse/pg_clickhouse/pull/233
101+
"ClickHouse/pg_clickhouse#233 Support multidimensional arrays"
96102

97103
## [v0.2.0] — 2026-04-13
98104

src/binary.cpp

Lines changed: 56 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,57 @@ extern "C"
440440
state->insert_block = (ch_insert_block_h *)block;
441441
}
442442

443+
static void
444+
column_append(clickhouse::ColumnRef col, Datum val, Oid valtype, bool isnull);
445+
446+
/*
447+
* Build a single ColumnArray row's worth of data from a (possibly nested)
448+
* ch_binary_array_t. items_type is the per-row element type of the parent
449+
* ColumnArray: scalar T for Array(T), Array(T) for Array(Array(T)), etc.
450+
*
451+
* For nested types, recurses to build child ColumnArrays and stitches them
452+
* via AppendAsColumn so the outer column's offsets describe the row shape.
453+
*/
454+
static clickhouse::ColumnRef
455+
build_array_row_column(ch_binary_array_t *arr, clickhouse::TypeRef items_type)
456+
{
457+
using namespace clickhouse;
458+
459+
auto col = CreateColumnByType(items_type->GetName());
460+
461+
/* Empty postgres array fits any nesting depth: nothing to walk, so
462+
* skip the dim check and return an empty column at this level. */
463+
if (arr->len == 0)
464+
return col;
465+
466+
if (items_type->GetCode() == Type::Code::Array)
467+
{
468+
if (arr->ndim < 2)
469+
throw std::runtime_error(
470+
"pg_clickhouse: insert array has fewer dimensions than column type");
471+
472+
auto inner_arr = col->AsStrict<ColumnArray>();
473+
auto inner_items_t = items_type->As<clickhouse::ArrayType>()->GetItemType();
474+
475+
for (size_t i = 0; i < arr->len; i++)
476+
{
477+
auto child = (ch_binary_array_t *)DatumGetPointer(arr->datums[i]);
478+
auto sub = build_array_row_column(child, inner_items_t);
479+
480+
inner_arr->AppendAsColumn(sub);
481+
}
482+
return col;
483+
}
484+
485+
if (arr->ndim != 1)
486+
throw std::runtime_error(
487+
"pg_clickhouse: insert array has more dimensions than column type");
488+
489+
for (size_t i = 0; i < arr->len; i++)
490+
column_append(col, arr->datums[i], arr->item_type, arr->nulls[i]);
491+
return col;
492+
}
493+
443494
static void
444495
column_append(clickhouse::ColumnRef col, Datum val, Oid valtype, bool isnull)
445496
{
@@ -667,13 +718,12 @@ extern "C"
667718
case Type::Array:
668719
{
669720
auto arrcol = col->AsStrict<ColumnArray>();
670-
auto items
671-
= CreateColumnByType(arrcol->GetType().As<clickhouse::ArrayType>()->GetItemType()->GetName());
672-
auto arr = (ch_binary_array_t *)DatumGetPointer(val);
673-
for (size_t i = 0; i < arr->len; i++)
674-
column_append(items, arr->datums[i], arr->item_type, arr->nulls[i]);
721+
auto items_type
722+
= arrcol->GetType().As<clickhouse::ArrayType>()->GetItemType();
723+
auto arr = (ch_binary_array_t *)DatumGetPointer(val);
724+
auto one_row = build_array_row_column(arr, items_type);
675725

676-
arrcol->AppendAsColumn(items);
726+
arrcol->AppendAsColumn(one_row);
677727
break;
678728
}
679729
default:

src/convert.c

Lines changed: 86 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ convert_array(ch_convert_state * state, Datum val)
219219

220220
if (slot->len == 0)
221221
val = PointerGetDatum(construct_empty_array(slot->item_type));
222-
else if (slot->ndim == 1)
222+
else if (slot->ndim <= 1)
223223
{
224224
void *arrout = construct_array(slot->datums, slot->len, slot->item_type,
225225
state->typlen, state->typbyval, state->typalign);
@@ -611,6 +611,49 @@ ch_binary_make_tuple_map(TupleDesc indesc, TupleDesc outdesc, Oid relid)
611611
return states;
612612
}
613613

614+
/*
615+
* Chunk a flat postgres array (already extracted into `flat`/`flatnulls` in
616+
* row-major order) into the nested ch_binary_array_t tree the binary engine
617+
* expects for Array(Array(...)) columns. Each interior node carries
618+
* ndim>1 with datums[i] = PointerGetDatum(child); leaves carry ndim==1 with
619+
* scalar datums copied from the flat buffer.
620+
*/
621+
static ch_binary_array_t *
622+
build_nested_binary_array(int level, int ndim, int *dims, Oid item_type,
623+
Datum * flat, bool *flatnulls, size_t * idx)
624+
{
625+
ch_binary_array_t *arr = palloc(sizeof(ch_binary_array_t));
626+
627+
arr->len = dims[level];
628+
arr->ndim = ndim - level;
629+
arr->item_type = item_type;
630+
arr->array_type = InvalidOid;
631+
arr->datums = palloc(sizeof(Datum) * arr->len);
632+
arr->nulls = palloc0(sizeof(bool) * arr->len);
633+
634+
if (level + 1 == ndim)
635+
{
636+
for (size_t i = 0; i < arr->len; i++)
637+
{
638+
arr->datums[i] = flat[*idx];
639+
arr->nulls[i] = flatnulls[*idx];
640+
(*idx)++;
641+
}
642+
}
643+
else
644+
{
645+
for (size_t i = 0; i < arr->len; i++)
646+
{
647+
ch_binary_array_t *child = build_nested_binary_array(level + 1, ndim, dims,
648+
item_type, flat,
649+
flatnulls, idx);
650+
651+
arr->datums[i] = PointerGetDatum(child);
652+
}
653+
}
654+
return arr;
655+
}
656+
614657
void
615658
ch_binary_do_output_conversion(ch_binary_insert_state * insert_state,
616659
TupleTableSlot * slot)
@@ -633,24 +676,51 @@ ch_binary_do_output_conversion(ch_binary_insert_state * insert_state,
633676
AnyArrayType *v = DatumGetAnyArrayP(out_values[i]);
634677
ch_binary_array_t *arr;
635678
array_iter iter;
679+
int ndim = AARR_NDIM(v);
680+
int *dims = AARR_DIMS(v);
681+
size_t total = ArrayGetNItems(ndim, dims);
636682

637-
if (AARR_NDIM(v) > 1)
683+
if (ndim > MAXDIM)
638684
ereport(ERROR,
639-
(errcode(ERRCODE_DATATYPE_MISMATCH),
640-
errmsg("pg_clickhouse: inserted array should have one dimension")));
641-
642-
arr = palloc(sizeof(ch_binary_array_t));
643-
arr->len = ArrayGetNItems(AARR_NDIM(v), AARR_DIMS(v));
644-
arr->ndim = 1;
645-
arr->datums = palloc(sizeof(Datum) * arr->len);
646-
arr->nulls = palloc(sizeof(bool) * arr->len);
647-
arr->item_type = cstate->innertype;
648-
649-
array_iter_setup(&iter, v);
650-
for (size_t j = 0; j < arr->len; j++)
685+
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
686+
errmsg("pg_clickhouse: inserted array depth %d exceeds maximum %d",
687+
ndim, MAXDIM)));
688+
689+
if (ndim <= 1)
690+
{
691+
arr = palloc(sizeof(ch_binary_array_t));
692+
arr->len = total;
693+
arr->ndim = 1;
694+
arr->item_type = cstate->innertype;
695+
arr->array_type = InvalidOid;
696+
arr->datums = total ? palloc(sizeof(Datum) * total) : NULL;
697+
arr->nulls = total ? palloc(sizeof(bool) * total) : NULL;
698+
699+
array_iter_setup(&iter, v);
700+
for (size_t j = 0; j < total; j++)
701+
{
702+
arr->datums[j] = array_iter_next(&iter, &arr->nulls[j], j,
703+
cstate->typlen, cstate->typbyval, cstate->typalign);
704+
}
705+
}
706+
else
651707
{
652-
arr->datums[j] = array_iter_next(&iter, &arr->nulls[j], i,
653-
cstate->typlen, cstate->typbyval, cstate->typalign);
708+
Datum *flat = palloc(sizeof(Datum) * total);
709+
bool *flatnulls = palloc0(sizeof(bool) * total);
710+
size_t idx = 0;
711+
712+
array_iter_setup(&iter, v);
713+
for (size_t j = 0; j < total; j++)
714+
{
715+
flat[j] = array_iter_next(&iter, &flatnulls[j], j,
716+
cstate->typlen, cstate->typbyval, cstate->typalign);
717+
}
718+
719+
arr = build_nested_binary_array(0, ndim, dims, cstate->innertype,
720+
flat, flatnulls, &idx);
721+
722+
pfree(flat);
723+
pfree(flatnulls);
654724
}
655725
out_values[i] = PointerGetDatum(arr);
656726

0 commit comments

Comments
 (0)