Skip to content

Commit 282eab0

Browse files
committed
INSERT multi-dimensional arrays
1 parent 2b3cdd7 commit 282eab0

11 files changed

Lines changed: 412 additions & 95 deletions

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@ ALTER EXTENSION pg_clickhouse UPDATE TO '0.3';
4343
* Added explicit mappings for `mod`, `pow`/`power`, `bit_count(bytea)`, and
4444
`reverse(text)` (→ `reverseUTF8`) to retain previously working pushdowns.
4545
Thanks to Philip Dubé for the PR ([#245]).
46+
* Added multidimensional array support across SELECT and INSERT in both the
47+
binary and http engines. Rectangular ClickHouse `Array(Array(...))` values
48+
now map to PostgreSQL multidimensional arrays, jagged arrays not supported,
49+
and PostgreSQL multidimensional arrays inserted into ClickHouse
50+
`Array(Array(...))` columns preserve their nesting. Thanks to Philip Dubé
51+
for the PR ([#233]).
4652

4753
### 🐞 Bug Fixes
4854

@@ -129,6 +135,8 @@ ALTER EXTENSION pg_clickhouse UPDATE TO '0.3';
129135
"pg_clickhouse#228 Security: revoke PUBLIC execute on clickhouse_raw_query"
130136
[#245]: https://github.com/ClickHouse/pg_clickhouse/pull/245
131137
"ClickHouse/pg_clickhouse#245 Don't push down functions by default"
138+
[#233]: https://github.com/ClickHouse/pg_clickhouse/pull/233
139+
"ClickHouse/pg_clickhouse#233 Support multidimensional arrays"
132140

133141
## [v0.2.0] — 2026-04-13
134142

src/binary.cpp

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,54 @@ extern "C"
464464
* value appropriate to col, and append that value. Raises an exception if
465465
* valtype is not compatible with col's type.
466466
*/
467+
static void column_append(clickhouse::ColumnRef col, Datum val, Oid valtype, bool isnull);
468+
469+
/*
470+
* Build a single ColumnArray row's worth of data from a (possibly nested)
471+
* ch_binary_array_t. items_type is the per-row element type of the parent
472+
* ColumnArray: scalar T for Array(T), Array(T) for Array(Array(T)), etc.
473+
*
474+
* For nested types, recurses to build child ColumnArrays and stitches them
475+
* via AppendAsColumn so the outer column's offsets describe the row shape.
476+
*/
477+
static clickhouse::ColumnRef
478+
build_array_row_column(ch_binary_array_t *arr, clickhouse::TypeRef items_type)
479+
{
480+
using namespace clickhouse;
481+
482+
auto col = CreateColumnByType(items_type->GetName());
483+
484+
/* Empty postgres array fits any nesting depth: nothing to walk, so
485+
* skip the dim check and return an empty column at this level. */
486+
if (arr->len == 0)
487+
return col;
488+
489+
if (items_type->GetCode() == Type::Code::Array)
490+
{
491+
if (arr->ndim < 2)
492+
throw std::runtime_error("pg_clickhouse: insert array has fewer dimensions than column type");
493+
494+
auto inner_arr = col->AsStrict<ColumnArray>();
495+
auto inner_items_t = items_type->As<clickhouse::ArrayType>()->GetItemType();
496+
497+
for (size_t i = 0; i < arr->len; i++)
498+
{
499+
auto child = (ch_binary_array_t *)DatumGetPointer(arr->datums[i]);
500+
auto sub = build_array_row_column(child, inner_items_t);
501+
502+
inner_arr->AppendAsColumn(sub);
503+
}
504+
return col;
505+
}
506+
507+
if (arr->ndim != 1)
508+
throw std::runtime_error("pg_clickhouse: insert array has more dimensions than column type");
509+
510+
for (size_t i = 0; i < arr->len; i++)
511+
column_append(col, arr->datums[i], arr->item_type, arr->nulls[i]);
512+
return col;
513+
}
514+
467515
static void
468516
column_append(clickhouse::ColumnRef col, Datum val, Oid valtype, bool isnull)
469517
{
@@ -691,13 +739,11 @@ extern "C"
691739
case Type::Array:
692740
{
693741
auto arrcol = col->AsStrict<ColumnArray>();
694-
auto items
695-
= CreateColumnByType(arrcol->GetType().As<clickhouse::ArrayType>()->GetItemType()->GetName());
742+
auto items_type = arrcol->GetType().As<clickhouse::ArrayType>()->GetItemType();
696743
auto arr = (ch_binary_array_t *)DatumGetPointer(val);
697-
for (size_t i = 0; i < arr->len; i++)
698-
column_append(items, arr->datums[i], arr->item_type, arr->nulls[i]);
744+
auto one_row = build_array_row_column(arr, items_type);
699745

700-
arrcol->AppendAsColumn(items);
746+
arrcol->AppendAsColumn(one_row);
701747
break;
702748
}
703749
default:

src/convert.c

Lines changed: 87 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ convert_array(ch_convert_state * state, Datum val)
223223

224224
if (slot->len == 0)
225225
val = PointerGetDatum(construct_empty_array(slot->item_type));
226-
else if (slot->ndim == 1)
226+
else if (slot->ndim <= 1)
227227
{
228228
void *arrout = construct_array(slot->datums, slot->len, slot->item_type,
229229
state->typlen, state->typbyval, state->typalign);
@@ -628,14 +628,57 @@ ch_binary_make_tuple_map(TupleDesc indesc, TupleDesc outdesc, Oid relid)
628628
return states;
629629
}
630630

631+
/*
632+
* Chunk a flat postgres array (already extracted into `flat`/`flatnulls` in
633+
* row-major order) into the nested ch_binary_array_t tree the binary engine
634+
* expects for Array(Array(...)) columns. Each interior node carries
635+
* ndim>1 with datums[i] = PointerGetDatum(child); leaves carry ndim==1 with
636+
* scalar datums copied from the flat buffer.
637+
*/
638+
static ch_binary_array_t *
639+
build_nested_binary_array(int level, int ndim, int *dims, Oid item_type,
640+
Datum * flat, bool *flatnulls, size_t * idx)
641+
{
642+
ch_binary_array_t *arr = palloc(sizeof(ch_binary_array_t));
643+
644+
arr->len = dims[level];
645+
arr->ndim = ndim - level;
646+
arr->item_type = item_type;
647+
arr->array_type = InvalidOid;
648+
arr->datums = palloc(sizeof(Datum) * arr->len);
649+
arr->nulls = palloc0(sizeof(bool) * arr->len);
650+
651+
if (level + 1 == ndim)
652+
{
653+
for (size_t i = 0; i < arr->len; i++)
654+
{
655+
arr->datums[i] = flat[*idx];
656+
arr->nulls[i] = flatnulls[*idx];
657+
(*idx)++;
658+
}
659+
}
660+
else
661+
{
662+
for (size_t i = 0; i < arr->len; i++)
663+
{
664+
ch_binary_array_t *child = build_nested_binary_array(level + 1, ndim, dims,
665+
item_type, flat,
666+
flatnulls, idx);
667+
668+
arr->datums[i] = PointerGetDatum(child);
669+
}
670+
}
671+
return arr;
672+
}
673+
631674
/*
632675
* For each value to be output, convert it, if necessary, from the Postgres
633676
* Datum type defined for the foreign table to a Datum that column_append() in
634677
* binary.cpp knows how to convert to a ClickHouse type. No conversion for
635678
* binary-compatible types; other types require a CAST.
636679
* ch_binary_make_tuple_map() makes this determination for each type, stored
637680
* in insert_state->conversion_states)
638-
*/
681+
*/
639682
void
640683
ch_binary_do_output_conversion(ch_binary_insert_state * insert_state,
641684
TupleTableSlot * slot)
@@ -658,24 +701,51 @@ ch_binary_do_output_conversion(ch_binary_insert_state * insert_state,
658701
AnyArrayType *v = DatumGetAnyArrayP(out_values[i]);
659702
ch_binary_array_t *arr;
660703
array_iter iter;
704+
int ndim = AARR_NDIM(v);
705+
int *dims = AARR_DIMS(v);
706+
size_t total = ArrayGetNItems(ndim, dims);
661707

662-
if (AARR_NDIM(v) > 1)
708+
if (ndim > MAXDIM)
663709
ereport(ERROR,
664-
(errcode(ERRCODE_DATATYPE_MISMATCH),
665-
errmsg("pg_clickhouse: inserted array should have one dimension")));
666-
667-
arr = palloc(sizeof(ch_binary_array_t));
668-
arr->len = ArrayGetNItems(AARR_NDIM(v), AARR_DIMS(v));
669-
arr->ndim = 1;
670-
arr->datums = palloc(sizeof(Datum) * arr->len);
671-
arr->nulls = palloc(sizeof(bool) * arr->len);
672-
arr->item_type = cstate->innertype;
673-
674-
array_iter_setup(&iter, v);
675-
for (size_t j = 0; j < arr->len; j++)
710+
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
711+
errmsg("pg_clickhouse: inserted array depth %d exceeds maximum %d",
712+
ndim, MAXDIM)));
713+
714+
if (ndim <= 1)
715+
{
716+
arr = palloc(sizeof(ch_binary_array_t));
717+
arr->len = total;
718+
arr->ndim = 1;
719+
arr->item_type = cstate->innertype;
720+
arr->array_type = InvalidOid;
721+
arr->datums = total ? palloc(sizeof(Datum) * total) : NULL;
722+
arr->nulls = total ? palloc(sizeof(bool) * total) : NULL;
723+
724+
array_iter_setup(&iter, v);
725+
for (size_t j = 0; j < total; j++)
726+
{
727+
arr->datums[j] = array_iter_next(&iter, &arr->nulls[j], j,
728+
cstate->typlen, cstate->typbyval, cstate->typalign);
729+
}
730+
}
731+
else
676732
{
677-
arr->datums[j] = array_iter_next(&iter, &arr->nulls[j], i,
678-
cstate->typlen, cstate->typbyval, cstate->typalign);
733+
Datum *flat = palloc(sizeof(Datum) * total);
734+
bool *flatnulls = palloc0(sizeof(bool) * total);
735+
size_t idx = 0;
736+
737+
array_iter_setup(&iter, v);
738+
for (size_t j = 0; j < total; j++)
739+
{
740+
flat[j] = array_iter_next(&iter, &flatnulls[j], j,
741+
cstate->typlen, cstate->typbyval, cstate->typalign);
742+
}
743+
744+
arr = build_nested_binary_array(0, ndim, dims, cstate->innertype,
745+
flat, flatnulls, &idx);
746+
747+
pfree(flat);
748+
pfree(flatnulls);
679749
}
680750
out_values[i] = PointerGetDatum(arr);
681751

0 commit comments

Comments
 (0)