Skip to content

Commit 60903e8

Browse files
authored
Merge pull request #3171 from perspective-dev/arrow-multi-batch
Fix multi-batch Arrow null validity bug
2 parents f70d1be + 4c669fc commit 60903e8

2 files changed

Lines changed: 58 additions & 5 deletions

File tree

rust/perspective-js/test/js/constructors/arrow.spec.ts

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,48 @@ test.describe("Arrow", function () {
7575
});
7676

7777
test.describe("regressions", () => {
78+
// https://github.com/perspective-dev/perspective/issues/3169
79+
test("null values are preserved across multi-batch Arrow IPC streams", async function () {
80+
function row(
81+
identifier: string,
82+
value: number | null,
83+
date: Date | null,
84+
) {
85+
return arrow.tableFromArrays({
86+
Identifier: arrow.vectorFromArray(
87+
[identifier],
88+
new arrow.Utf8(),
89+
),
90+
Value: arrow.vectorFromArray([value], new arrow.Float64()),
91+
Date: arrow.vectorFromArray([date], new arrow.DateDay()),
92+
});
93+
}
94+
95+
const t1 = row("A", null, null);
96+
const t2 = row("B", 5, null);
97+
const t3 = row("C", null, new Date(Date.UTC(2025, 5, 15)));
98+
99+
const multiBatchTable = new arrow.Table([
100+
...t1.batches,
101+
...t2.batches,
102+
...t3.batches,
103+
]);
104+
expect(multiBatchTable.batches.length).toEqual(3);
105+
106+
const ipc = arrow.tableToIPC(multiBatchTable, "stream");
107+
const table = await perspective.table(ipc.buffer as ArrayBuffer);
108+
const view = await table.view();
109+
const json = await view.to_json();
110+
await view.delete();
111+
await table.delete();
112+
113+
expect(json).toStrictEqual([
114+
{ Identifier: "A", Value: null, Date: null },
115+
{ Identifier: "B", Value: 5, Date: null },
116+
{ Identifier: "C", Value: null, Date: 1749945600000 },
117+
]);
118+
});
119+
78120
test("null equality works correctly in updates", async function () {
79121
async function write_to_json(
80122
buffer: ArrayBuffer,

rust/perspective-server/cpp/perspective/src/cpp/arrow_loader.cpp

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -907,19 +907,30 @@ ArrowLoader::fill_column(
907907
copy_array(col, array, offset, len);
908908
}
909909

910-
// Fill validity bitmap
910+
// Fill validity bitmap. Operate only on the current chunk's
911+
// range [offset, offset+len); a whole-column fill here would
912+
// clobber validity bits set by other chunks in a multi-batch
913+
// ChunkedArray.
911914
std::int64_t null_count = array->null_count();
912915

913916
if (null_count == 0) {
914-
col->valid_raw_fill();
917+
for (uint32_t i = 0; i < len; ++i) {
918+
col->set_valid(offset + i, true);
919+
}
915920
} else {
916921
const uint8_t* null_bitmap = array->null_bitmap_data();
917922

918923
// If the arrow column is of null type, the null
919-
// bitmap is a nullptr - so just mark everything as
920-
// invalid and move on.
924+
// bitmap is a nullptr - so just mark this chunk's rows
925+
// as invalid and move on.
921926
if (null_bitmap == nullptr) {
922-
col->invalid_raw_fill();
927+
for (uint32_t i = 0; i < len; ++i) {
928+
if (is_update) {
929+
col->unset(offset + i);
930+
} else {
931+
col->clear(offset + i);
932+
}
933+
}
923934
} else {
924935
// Read the null bitmap and set the correct rows
925936
// as valid

0 commit comments

Comments
 (0)