Skip to content

Commit 6a99163

Browse files
bluestreak01claude
andcommitted
Lazy null-index fill on the no-nulls fast path
parseNullSection now skips the per-row "nonNullIdx[i] = i" array fill when the column carries no nulls in this batch. Saves O(rowCount * columnCount) trivial assignments per batch on the most common decode path. For a 16K-row x 100-column wide result that's 1.6M skipped iterations every batch. QwpColumnLayout gains a denseIndex(row) helper that returns row directly when nullBitmapAddr == 0, otherwise reads nonNullIdx[row]. All 13 typed accessors in QwpColumnBatch (getBool, getByteValue, getCharValue, getDecimal128High/Low, getDouble, getFloat, getIntValue, getLong, getLong256Word, getLongArray, getLongValue, getShortValue, getUuidHi/Lo, lookupBinaryBytes, lookupStringBytes) now go through denseIndex. parseArrayColumn and parseSymbolColumn hoist the discriminator out of their per-row loops. The public raw-API nonNullIndex(col) lazily materialises the identity array on first call when the column has no nulls, so the existing fragmentation / compression / credit-flow tests that consume the raw API keep working with no per-call cost amortised across reuse. Also drops QwpSpscQueue.clearAndWakeConsumer: zero callers in the codebase and the implementation wrote the consumer-only tail field from a context the doc explicitly described as the producer. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 2f4393a commit 6a99163

4 files changed

Lines changed: 70 additions & 42 deletions

File tree

core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpColumnBatch.java

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ public io.questdb.client.std.bytes.DirectByteSequence getBinaryB(int col, int ro
118118
public boolean getBool(int col, int row) {
119119
QwpColumnLayout l = columnLayouts.getQuick(col);
120120
if (isLayoutNull(l, row)) return false;
121-
int denseIdx = l.nonNullIdx[row];
121+
int denseIdx = l.denseIndex(row);
122122
// Bit-packed: 8 values per byte, LSB-first
123123
byte b = Unsafe.getUnsafe().getByte(l.valuesAddr + (denseIdx >>> 3));
124124
return (b & (1 << (denseIdx & 7))) != 0;
@@ -131,7 +131,7 @@ public boolean getBool(int col, int row) {
131131
public byte getByteValue(int col, int row) {
132132
QwpColumnLayout l = columnLayouts.getQuick(col);
133133
if (isLayoutNull(l, row)) return 0;
134-
return Unsafe.getUnsafe().getByte(l.valuesAddr + l.nonNullIdx[row]);
134+
return Unsafe.getUnsafe().getByte(l.valuesAddr + l.denseIndex(row));
135135
}
136136

137137
/**
@@ -140,7 +140,7 @@ public byte getByteValue(int col, int row) {
140140
public char getCharValue(int col, int row) {
141141
QwpColumnLayout l = columnLayouts.getQuick(col);
142142
if (isLayoutNull(l, row)) return 0;
143-
return (char) Unsafe.getUnsafe().getShort(l.valuesAddr + 2L * l.nonNullIdx[row]);
143+
return (char) Unsafe.getUnsafe().getShort(l.valuesAddr + 2L * l.denseIndex(row));
144144
}
145145

146146
public int getColumnCount() {
@@ -161,7 +161,7 @@ public byte getColumnWireType(int col) {
161161
public long getDecimal128High(int col, int row) {
162162
QwpColumnLayout l = columnLayouts.getQuick(col);
163163
if (isLayoutNull(l, row)) return 0L;
164-
return Unsafe.getUnsafe().getLong(l.valuesAddr + 16L * l.nonNullIdx[row] + 8L);
164+
return Unsafe.getUnsafe().getLong(l.valuesAddr + 16L * l.denseIndex(row) + 8L);
165165
}
166166

167167
/**
@@ -170,7 +170,7 @@ public long getDecimal128High(int col, int row) {
170170
public long getDecimal128Low(int col, int row) {
171171
QwpColumnLayout l = columnLayouts.getQuick(col);
172172
if (isLayoutNull(l, row)) return 0L;
173-
return Unsafe.getUnsafe().getLong(l.valuesAddr + 16L * l.nonNullIdx[row]);
173+
return Unsafe.getUnsafe().getLong(l.valuesAddr + 16L * l.denseIndex(row));
174174
}
175175

176176
public int getDecimalScale(int col) {
@@ -180,7 +180,7 @@ public int getDecimalScale(int col) {
180180
public double getDouble(int col, int row) {
181181
QwpColumnLayout l = columnLayouts.getQuick(col);
182182
if (isLayoutNull(l, row)) return Double.NaN;
183-
return Unsafe.getUnsafe().getDouble(l.valuesAddr + 8L * l.nonNullIdx[row]);
183+
return Unsafe.getUnsafe().getDouble(l.valuesAddr + 8L * l.denseIndex(row));
184184
}
185185

186186
/**
@@ -209,7 +209,7 @@ public double[] getDoubleArrayElements(int col, int row) {
209209
public float getFloat(int col, int row) {
210210
QwpColumnLayout l = columnLayouts.getQuick(col);
211211
if (isLayoutNull(l, row)) return Float.NaN;
212-
return Unsafe.getUnsafe().getFloat(l.valuesAddr + 4L * l.nonNullIdx[row]);
212+
return Unsafe.getUnsafe().getFloat(l.valuesAddr + 4L * l.denseIndex(row));
213213
}
214214

215215
public int getGeohashPrecisionBits(int col) {
@@ -223,7 +223,7 @@ public int getGeohashPrecisionBits(int col) {
223223
public int getIntValue(int col, int row) {
224224
QwpColumnLayout l = columnLayouts.getQuick(col);
225225
if (isLayoutNull(l, row)) return 0;
226-
return Unsafe.getUnsafe().getInt(l.valuesAddr + 4L * l.nonNullIdx[row]);
226+
return Unsafe.getUnsafe().getInt(l.valuesAddr + 4L * l.denseIndex(row));
227227
}
228228

229229
/**
@@ -238,7 +238,7 @@ public long getLong(int col, int row) {
238238
QwpColumnLayout l = columnLayouts.getQuick(col);
239239
if (isLayoutNull(l, row)) return 0L;
240240
byte wt = l.info.wireType;
241-
int denseIdx = l.nonNullIdx[row];
241+
int denseIdx = l.denseIndex(row);
242242
if (wt == QwpConstants.TYPE_LONG || wt == QwpConstants.TYPE_DATE
243243
|| wt == QwpConstants.TYPE_TIMESTAMP || wt == QwpConstants.TYPE_TIMESTAMP_NANOS
244244
|| wt == QwpConstants.TYPE_DECIMAL64) {
@@ -274,7 +274,7 @@ public long getLong(int col, int row) {
274274
public long getLong256Word(int col, int row, int wordIndex) {
275275
QwpColumnLayout l = columnLayouts.getQuick(col);
276276
if (isLayoutNull(l, row)) return 0L;
277-
return Unsafe.getUnsafe().getLong(l.valuesAddr + 32L * l.nonNullIdx[row] + 8L * wordIndex);
277+
return Unsafe.getUnsafe().getLong(l.valuesAddr + 32L * l.denseIndex(row) + 8L * wordIndex);
278278
}
279279

280280
// Raw column-address API -- for zero-branch hot inner loops.
@@ -299,7 +299,7 @@ public long[] getLongArray(int col, int row) {
299299
QwpColumnLayout l = columnLayouts.getQuick(col);
300300
if (isLayoutNull(l, row)) return null;
301301
byte wt = l.info.wireType;
302-
int denseIdx = l.nonNullIdx[row];
302+
int denseIdx = l.denseIndex(row);
303303
if (wt == QwpConstants.TYPE_UUID || wt == QwpConstants.TYPE_DECIMAL128) {
304304
long base = l.valuesAddr + 16L * denseIdx;
305305
return new long[]{Unsafe.getUnsafe().getLong(base), Unsafe.getUnsafe().getLong(base + 8)};
@@ -324,7 +324,7 @@ public long[] getLongArray(int col, int row) {
324324
public long getLongValue(int col, int row) {
325325
QwpColumnLayout l = columnLayouts.getQuick(col);
326326
if (isLayoutNull(l, row)) return 0L;
327-
return Unsafe.getUnsafe().getLong(l.valuesAddr + 8L * l.nonNullIdx[row]);
327+
return Unsafe.getUnsafe().getLong(l.valuesAddr + 8L * l.denseIndex(row));
328328
}
329329

330330
public int getRowCount() {
@@ -337,7 +337,7 @@ public int getRowCount() {
337337
public short getShortValue(int col, int row) {
338338
QwpColumnLayout l = columnLayouts.getQuick(col);
339339
if (isLayoutNull(l, row)) return 0;
340-
return Unsafe.getUnsafe().getShort(l.valuesAddr + 2L * l.nonNullIdx[row]);
340+
return Unsafe.getUnsafe().getShort(l.valuesAddr + 2L * l.denseIndex(row));
341341
}
342342

343343
/**
@@ -379,7 +379,7 @@ public String getString(int col, int row) {
379379
public long getUuidHi(int col, int row) {
380380
QwpColumnLayout l = columnLayouts.getQuick(col);
381381
if (isLayoutNull(l, row)) return 0L;
382-
return Unsafe.getUnsafe().getLong(l.valuesAddr + 16L * l.nonNullIdx[row] + 8L);
382+
return Unsafe.getUnsafe().getLong(l.valuesAddr + 16L * l.denseIndex(row) + 8L);
383383
}
384384

385385
/**
@@ -388,7 +388,7 @@ public long getUuidHi(int col, int row) {
388388
public long getUuidLo(int col, int row) {
389389
QwpColumnLayout l = columnLayouts.getQuick(col);
390390
if (isLayoutNull(l, row)) return 0L;
391-
return Unsafe.getUnsafe().getLong(l.valuesAddr + 16L * l.nonNullIdx[row]);
391+
return Unsafe.getUnsafe().getLong(l.valuesAddr + 16L * l.denseIndex(row));
392392
}
393393

394394
/**
@@ -468,9 +468,23 @@ public int nonNullCount(int col) {
468468
* column's non-null values, or -1 if the row is NULL. Array length equals
469469
* {@link #getRowCount()}. Valid only during the current {@code onBatch}
470470
* callback; do not retain.
471+
* <p>
472+
* For columns with no nulls in this batch the decoder skips populating
473+
* this array (saves O(rowCount * columnCount) per batch on the typical
474+
* no-nulls hot path). This accessor lazy-materialises an identity mapping
475+
* on demand for raw-API callers; the result is cached on the layout so
476+
* repeated calls within the same batch reuse the allocation. The typed
477+
* accessors ({@link #getLong}, {@link #getDouble}, etc.) avoid this
478+
* allocation entirely via {@link QwpColumnLayout#denseIndex}.
471479
*/
472480
public int[] nonNullIndex(int col) {
473-
return columnLayouts.getQuick(col).nonNullIdx;
481+
QwpColumnLayout l = columnLayouts.getQuick(col);
482+
if (l.nullBitmapAddr == 0 && l.nonNullIdx == null) {
483+
int[] arr = new int[rowCount];
484+
for (int i = 0; i < rowCount; i++) arr[i] = i;
485+
l.nonNullIdx = arr;
486+
}
487+
return l.nonNullIdx;
474488
}
475489

476490
/**
@@ -532,7 +546,7 @@ private io.questdb.client.std.bytes.DirectByteSequence lookupBinaryBytes(
532546
if (isNull(col, row)) return null;
533547
QwpColumnLayout l = columnLayouts.getQuick(col);
534548
if (l.info.wireType != QwpConstants.TYPE_BINARY) return null;
535-
int denseIdx = l.nonNullIdx[row];
549+
int denseIdx = l.denseIndex(row);
536550
int startOff = Unsafe.getUnsafe().getInt(l.valuesAddr + 4L * denseIdx);
537551
int endOff = Unsafe.getUnsafe().getInt(l.valuesAddr + 4L * (denseIdx + 1));
538552
return view.of(l.stringBytesAddr + startOff, endOff - startOff);
@@ -547,7 +561,7 @@ private DirectUtf8Sequence lookupStringBytes(int col, int row, DirectUtf8String
547561
if (isNull(col, row)) return null;
548562
QwpColumnLayout l = columnLayouts.getQuick(col);
549563
byte wt = l.info.wireType;
550-
int denseIdx = l.nonNullIdx[row];
564+
int denseIdx = l.denseIndex(row);
551565
if (wt == QwpConstants.TYPE_STRING || wt == QwpConstants.TYPE_VARCHAR) {
552566
int startOff = Unsafe.getUnsafe().getInt(l.valuesAddr + 4L * denseIdx);
553567
int endOff = Unsafe.getUnsafe().getInt(l.valuesAddr + 4L * (denseIdx + 1));

core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpColumnLayout.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,21 @@ public class QwpColumnLayout implements QuietCloseable {
117117
private long timestampDecodeAddr;
118118
private int timestampDecodeCapacity;
119119

120+
/**
121+
* Returns the dense index of {@code row} into the non-null values array.
122+
* For columns with no nulls in this batch ({@code nullBitmapAddr == 0}),
123+
* dense index equals row and {@link #nonNullIdx} is left unread (the
124+
* decoder skips the per-row array fill on this path). Otherwise the
125+
* pre-computed slot is returned.
126+
* <p>
127+
* Caller MUST have null-checked the cell first via the surrounding
128+
* {@code isNull} / {@code isLayoutNull} guard -- this method does not
129+
* detect null rows on its own.
130+
*/
131+
public int denseIndex(int row) {
132+
return nullBitmapAddr == 0 ? row : nonNullIdx[row];
133+
}
134+
120135
public void clear() {
121136
info = null;
122137
valuesAddr = 0;

core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpResultBatchDecoder.java

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -512,8 +512,13 @@ private long parseArrayColumn(QwpColumnLayout layout, int rowCount, long p, long
512512
layout.arrayRowAddr = ensureLongArray(layout.arrayRowAddr, rowCount);
513513
layout.arrayRowLen = ensureIntArray(layout.arrayRowLen, rowCount);
514514
layout.valuesAddr = p;
515+
// Hoist the no-nulls discriminator out of the per-row loop -- when the
516+
// column has no nulls in this batch, every row is non-null and the
517+
// null-skip branch is dead.
518+
boolean noNulls = layout.nullBitmapAddr == 0;
519+
int[] nonNullIdx = layout.nonNullIdx;
515520
for (int i = 0; i < rowCount; i++) {
516-
if (layout.nonNullIdx[i] < 0) {
521+
if (!noNulls && nonNullIdx[i] < 0) {
517522
layout.arrayRowAddr[i] = 0;
518523
layout.arrayRowLen[i] = 0;
519524
continue;
@@ -668,16 +673,25 @@ private long parseDeltaSymbolDict(long p, long limit) throws QwpDecodeException
668673
private long parseNullSection(QwpColumnLayout layout, int rowCount, long p, long limit) throws QwpDecodeException {
669674
if (p >= limit) throw new QwpDecodeException("truncated null flag");
670675
byte flag = Unsafe.getUnsafe().getByte(p++);
671-
layout.nonNullIdx = ensureIntArray(layout.nonNullIdx, rowCount);
672676
if (flag == 0) {
677+
// No nulls in this column -- skip the per-row "nonNullIdx[i] = i"
678+
// array fill entirely. Accessors detect the no-nulls case via
679+
// {@code nullBitmapAddr == 0} and treat dense-index == row directly
680+
// (see {@link QwpColumnLayout#denseIndex}), so the array is unread
681+
// on this path. For a 16K-row x 100-column wide result this saves
682+
// 1.6M trivial assignments per batch. nonNullIdx is nulled so a
683+
// raw-API caller of {@code QwpColumnBatch.nonNullIndex(col)} can
684+
// distinguish "needs identity-fill" from "fully populated by a
685+
// prior with-nulls batch"; that path lazy-materialises on demand.
673686
layout.nullBitmapAddr = 0;
687+
layout.nonNullIdx = null;
674688
layout.nonNullCount = rowCount;
675-
for (int i = 0; i < rowCount; i++) layout.nonNullIdx[i] = i;
676689
return p;
677690
}
678691
int bitmapBytes = (rowCount + 7) >>> 3;
679692
if (p + bitmapBytes > limit) throw new QwpDecodeException("truncated null bitmap");
680693
layout.nullBitmapAddr = p;
694+
layout.nonNullIdx = ensureIntArray(layout.nonNullIdx, rowCount);
681695
int denseIdx = 0;
682696
for (int i = 0; i < rowCount; i++) {
683697
int bi = i >>> 3;
@@ -732,9 +746,13 @@ private long parseSymbolColumn(QwpColumnLayout layout, int rowCount, long p, lon
732746
layout.symbolDictSize = dictSize;
733747
// Materialise per-row IDs into int[rowCount] so random access is O(1).
734748
layout.symbolRowIds = ensureIntArray(layout.symbolRowIds, rowCount);
749+
// Hoist the no-nulls discriminator out of the per-row loop -- when the
750+
// column has no nulls in this batch, every row carries an id and the
751+
// null-skip branch is dead.
752+
boolean noNulls = layout.nullBitmapAddr == 0;
753+
int[] nonNullIdx = layout.nonNullIdx;
735754
for (int i = 0; i < rowCount; i++) {
736-
int denseIdx = layout.nonNullIdx[i];
737-
if (denseIdx < 0) continue; // NULL row; leave slot stale
755+
if (!noNulls && nonNullIdx[i] < 0) continue; // NULL row; leave slot stale
738756
decodeVarint(p, limit);
739757
p = varintPos;
740758
int id = (int) varintValue;

core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpSpscQueue.java

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -69,25 +69,6 @@ public QwpSpscQueue(int capacity) {
6969
this.mask = pow2 - 1;
7070
}
7171

72-
/**
73-
* Drops any queued items and wakes the consumer if it is currently parked.
74-
* Safe to call concurrently with poll/take only if the caller is the
75-
* producer; otherwise external synchronisation is required.
76-
*/
77-
public void clearAndWakeConsumer() {
78-
long h = head;
79-
long t = tail;
80-
while (t < h) {
81-
slots[(int) (t & mask)] = null;
82-
t++;
83-
}
84-
tail = t;
85-
Thread consumer = consumerThread;
86-
if (consumer != null) {
87-
LockSupport.unpark(consumer);
88-
}
89-
}
90-
9172
/**
9273
* Publishes {@code value} to the consumer. Returns {@code false} when the
9374
* ring is full (caller may retry or spin externally). Never blocks.

0 commit comments

Comments
 (0)