feat(ilp): connectionGeneration foundation + encode-mid-reconnect retry

bluestreak01 · claude · bluestreak01 · commit 71afa21cd172 · 2026-04-27T01:53:32.000+01:00
Re-adds the volatile generation counter (and its companion retry loop in flushPendingRows) that the cursor strip had removed. This is the foundation the reconnect work (#20/#21) builds on — the producer needs a way to detect that the wire-side actor has rotated state mid-encode so it can discard now-poisoned schema-ID refs and re-encode with full schema definitions. What lands here: * QwpWebSocketSender: volatile connectionGeneration + lastSeenGeneration pair. Bumped on initial recovery from disk (the recovered FSNs were never seen by *this* server connection, so the first batch must re-publish full schemas). Reconnect path will bump in subsequent work. * flushPendingRows: encode-mid-reconnect retry loop. Sample gen before encode + after finishMessage; if it changed, discard the encoded bytes (table buffers haven't been reset yet — source rows are intact) and retry with reset schema state. Bounded at MAX_SCHEMA_RACE_RETRIES = 10 so reconnect-faster-than-encode surfaces a hard error instead of spinning. * CursorSendEngine.wasRecoveredFromDisk(): single-bit accessor the sender reads during ensureConnected to decide whether to bump. * SegmentRing.openExisting: filter out empty hot-spare leftovers (frameCount=0) from prior sessions. Those carry the provisional baseSeq=0 and would otherwise collide with the real baseSeq=0 segment and trip the contiguity check. Surfaced by the new recovery test — caught a real bug in the recovery scan. * Test hooks bumpConnectionGenerationForTest / accessors for gen and maxSent*Id so reconnect-effect tests can run without spinning up the (still-not-implemented) reconnect path. Tests cover: gen=0 for fresh connect, gen=1 after disk recovery, gen bump triggers schema-state reset on the next encode and is sticky (further flushes without bump don't re-reset). Spec decisions #4 and #5 land here. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
diff --git a/core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpWebSocketSender.java b/core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpWebSocketSender.java
@@ -164,6 +164,22 @@ public class QwpWebSocketSender implements Sender {
     // 0 or -1 means "fast close" (skip the drain); otherwise close blocks
     // up to this many millis for ackedFsn to catch up to publishedFsn.
     private long closeFlushTimeoutMillis = 5_000L;
+    // Single volatile counter, single writer (the wire-side actor that
+    // performs reconnect; for now: ensureConnected during recovery).
+    // Bumped on every successful reconnect AND on initial recovery from
+    // disk. Producer thread reads it inside flushPendingRows to decide
+    // whether to reset schema state (the new server has no memory of the
+    // old connection's schema IDs) and to detect the encode-mid-reconnect
+    // race. See design/qwp-cursor-durability.md "Schema state on reconnect".
+    private volatile long connectionGeneration;
+    // Producer-thread-only mirror of the last connectionGeneration value
+    // we encoded against. When connectionGeneration > lastSeenGeneration,
+    // the producer must reset schema state before the next encode.
+    private long lastSeenGeneration;
+    // Bound on the encode-retry loop in flushPendingRows. Reconnect
+    // firing 10x faster than the producer can encode a single batch is
+    // pathological — surface a hard error rather than spin.
+    private static final int MAX_SCHEMA_RACE_RETRIES = 10;
 
     private QwpWebSocketSender(
             String host,
@@ -842,6 +858,37 @@ public int getPendingRowCount() {
         return pendingRowCount;
     }
 
+    /**
+     * Test hook: simulate a wire-side reconnect by bumping the
+     * connectionGeneration counter. The next call into {@code flushPendingRows}
+     * will detect the divergence and reset schema state. Production wire
+     * code will call this from the I/O loop's reconnect path; tests use
+     * it to exercise the schema-reset machinery without spinning up a
+     * reconnect scenario.
+     */
+    @TestOnly
+    public void bumpConnectionGenerationForTest() {
+        connectionGeneration++;
+    }
+
+    /** Test accessor for the volatile generation counter. */
+    @TestOnly
+    public long getConnectionGenerationForTest() {
+        return connectionGeneration;
+    }
+
+    /** Test accessor: highest schema ID confirmed sent on the current connection. */
+    @TestOnly
+    public int getMaxSentSchemaIdForTest() {
+        return maxSentSchemaId;
+    }
+
+    /** Test accessor: highest symbol ID confirmed sent on the current connection. */
+    @TestOnly
+    public int getMaxSentSymbolIdForTest() {
+        return maxSentSymbolId;
+    }
+
     @TestOnly
     public QwpTableBuffer getTableBuffer(String tableName) {
         QwpTableBuffer buffer = tableBuffers.get(tableName);
@@ -1298,6 +1345,15 @@ private void ensureConnected() {
         // Server starts fresh on each connection — discard any schema IDs
         // retained from prior state.
         resetSchemaStateForNewConnection();
+        // If the cursor engine recovered an existing on-disk slot, the
+        // recovered FSNs were never seen by *this* server connection. Bump
+        // connectionGeneration so flushPendingRows treats the next batch as
+        // post-reconnect (full schema definitions, not refs). lastSeenGeneration
+        // stays at 0 — the divergence is what signals "reset needed" in the
+        // producer's retry loop.
+        if (cursorEngine != null && cursorEngine.wasRecoveredFromDisk()) {
+            connectionGeneration = 1L;
+        }
         connectionError.set(null);
 
         connected = true;
@@ -1344,36 +1400,72 @@ private void flushPendingRows() {
         }
 
         ensureActiveBufferReady();
-        int batchMaxSchemaId = maxSentSchemaId;
-        encoder.beginMessage(tableCount, globalSymbolDictionary, maxSentSymbolId, currentBatchMaxSymbolId);
-        for (int i = 0, n = keys.size(); i < n; i++) {
-            CharSequence tableName = keys.getQuick(i);
-            if (tableName == null) {
-                continue;
-            }
-            QwpTableBuffer tableBuffer = tableBuffers.get(tableName);
-            if (tableBuffer == null || tableBuffer.getRowCount() == 0) {
-                continue;
+        // Encode-mid-reconnect race retry loop. The wire-side actor (today
+        // the recovery startup; soon the I/O loop's reconnect path) bumps
+        // connectionGeneration after resetting wire state. If a bump fires
+        // while we're encoding, the bytes we're about to emit may carry
+        // schema-ID refs the new server has never assigned — the server
+        // would reject the batch and we'd lose data. Detect by sampling
+        // gen before encode and re-sampling after finishMessage; if it
+        // changed, discard the encoded bytes (table buffers are NOT yet
+        // reset, so source rows are intact) and retry. Bounded so
+        // reconnect-faster-than-encode surfaces a hard error.
+        int batchMaxSchemaId;
+        int messageSize;
+        QwpBufferWriter buffer;
+        int retries = 0;
+        while (true) {
+            long genBefore = connectionGeneration;
+            if (genBefore != lastSeenGeneration) {
+                resetSchemaStateForNewConnection();
+                lastSeenGeneration = genBefore;
             }
+            int currBatchMaxSchemaId = maxSentSchemaId;
+            encoder.beginMessage(tableCount, globalSymbolDictionary, maxSentSymbolId, currentBatchMaxSymbolId);
+            for (int i = 0, n = keys.size(); i < n; i++) {
+                CharSequence tableName = keys.getQuick(i);
+                if (tableName == null) {
+                    continue;
+                }
+                QwpTableBuffer tableBuffer = tableBuffers.get(tableName);
+                if (tableBuffer == null || tableBuffer.getRowCount() == 0) {
+                    continue;
+                }
+
+                if (tableBuffer.getSchemaId() < 0) {
+                    if (nextSchemaId >= maxSchemasPerConnection) {
+                        throw new LineSenderException("maximum schemas per connection exceeded")
+                                .put("[maxSchemasPerConnection=").put(maxSchemasPerConnection).put(']');
+                    }
+                    tableBuffer.setSchemaId(nextSchemaId++);
+                }
+                currBatchMaxSchemaId = Math.max(currBatchMaxSchemaId, tableBuffer.getSchemaId());
+                boolean useSchemaRef = tableBuffer.getSchemaId() <= maxSentSchemaId;
 
-            if (tableBuffer.getSchemaId() < 0) {
-                if (nextSchemaId >= maxSchemasPerConnection) {
-                    throw new LineSenderException("maximum schemas per connection exceeded")
-                            .put("[maxSchemasPerConnection=").put(maxSchemasPerConnection).put(']');
+                if (LOG.isDebugEnabled()) {
+                    LOG.debug("Encoding table [name={}, rows={}, maxSentSymbolId={}, batchMaxId={}, useSchemaRef={}]", tableName, tableBuffer.getRowCount(), maxSentSymbolId, currentBatchMaxSymbolId, useSchemaRef);
                 }
-                tableBuffer.setSchemaId(nextSchemaId++);
+
+                encoder.addTable(tableBuffer, useSchemaRef);
             }
-            batchMaxSchemaId = Math.max(batchMaxSchemaId, tableBuffer.getSchemaId());
-            boolean useSchemaRef = tableBuffer.getSchemaId() <= maxSentSchemaId;
+            messageSize = encoder.finishMessage();
+            buffer = encoder.getBuffer();
 
+            // Race detection: did the wire actor bump gen during encode?
+            if (connectionGeneration == genBefore) {
+                batchMaxSchemaId = currBatchMaxSchemaId;
+                break;
+            }
+            if (++retries >= MAX_SCHEMA_RACE_RETRIES) {
+                throw new LineSenderException(
+                        "schema-reset race exceeded retry limit [" + MAX_SCHEMA_RACE_RETRIES
+                                + "] — wire reconnects are firing faster than the user thread "
+                                + "can encode a single batch");
+            }
             if (LOG.isDebugEnabled()) {
-                LOG.debug("Encoding table [name={}, rows={}, maxSentSymbolId={}, batchMaxId={}, useSchemaRef={}]", tableName, tableBuffer.getRowCount(), maxSentSymbolId, currentBatchMaxSymbolId, useSchemaRef);
+                LOG.debug("Schema-reset race detected mid-encode; retrying [attempt={}]", retries);
             }
-
-            encoder.addTable(tableBuffer, useSchemaRef);
         }
-        int messageSize = encoder.finishMessage();
-        QwpBufferWriter buffer = encoder.getBuffer();
 
         activeBuffer.ensureCapacity(messageSize);
         activeBuffer.write(buffer.getBufferPtr(), messageSize);
diff --git a/core/src/main/java/io/questdb/client/cutlass/qwp/client/sf/cursor/CursorSendEngine.java b/core/src/main/java/io/questdb/client/cutlass/qwp/client/sf/cursor/CursorSendEngine.java
@@ -75,6 +75,12 @@ public final class CursorSendEngine implements QuietCloseable {
     private final SegmentRing ring;
     private final long segmentSizeBytes;
     private final long appendDeadlineNanos;
+    // True when the constructor recovered an existing on-disk slot rather
+    // than starting fresh. Read by QwpWebSocketSender during connect to
+    // decide whether to bump connectionGeneration so the first batch
+    // re-publishes schema definitions (the server has no memory of FSNs
+    // we recovered from disk).
+    private final boolean recoveredFromDisk;
     // Number of times appendBlocking observed BACKPRESSURE_NO_SPARE on its first
     // ring.appendOrFsn attempt. One increment per blocking-call that had to wait
     // for the manager (or for ACKs) — not one per spin-park. Producer-thread
@@ -150,6 +156,7 @@ private CursorSendEngine(String sfDir, long segmentSizeBytes, SegmentManager man
         // already on disk and corrupting ACK translation, trim, and replay.
         SegmentRing recovered = memoryMode ? null
                 : SegmentRing.openExisting(sfDir, segmentSizeBytes);
+        this.recoveredFromDisk = recovered != null;
         if (recovered != null) {
             this.ring = recovered;
         } else {
@@ -239,6 +246,17 @@ public void close() {
         ring.close();
     }
 
+    /**
+     * True when this engine opened against a pre-existing on-disk slot
+     * (i.e. {@code SegmentRing.openExisting} returned a non-null ring at
+     * construction). Memory-mode engines and fresh-disk engines return
+     * false. Used by the sender to decide whether to mark schema state as
+     * needing a reset before the first send.
+     */
+    public boolean wasRecoveredFromDisk() {
+        return recoveredFromDisk;
+    }
+
     /** I/O thread accessor: highest FSN whose frame is fully written. */
     public long publishedFsn() {
         return ring.publishedFsn();
diff --git a/core/src/main/java/io/questdb/client/cutlass/qwp/client/sf/cursor/SegmentRing.java b/core/src/main/java/io/questdb/client/cutlass/qwp/client/sf/cursor/SegmentRing.java
@@ -154,7 +154,19 @@ public static SegmentRing openExisting(String sfDir, long maxBytesPerSegment) {
                 if (name != null && name.endsWith(".sfa") && !".".equals(name) && !"..".equals(name)) {
                     String path = sfDir + "/" + name;
                     try {
-                        opened.add(MmapSegment.openExisting(path));
+                        MmapSegment seg = MmapSegment.openExisting(path);
+                        // Filter out empty leftovers — typically hot-spare
+                        // segments the manager pre-allocated for a prior
+                        // session that never got rotated into active. They
+                        // carry the provisional baseSeq=0 and frameCount=0,
+                        // which would otherwise collide with the real
+                        // baseSeq=0 segment and trip the contiguity check
+                        // below. No data to recover; close + skip.
+                        if (seg.frameCount() == 0) {
+                            seg.close();
+                        } else {
+                            opened.add(seg);
+                        }
                     } catch (MmapSegmentException ignored) {
                         // Stray file with the .sfa extension but bad header /
                         // unreadable: skip rather than fail the recovery.
diff --git a/core/src/test/java/io/questdb/client/test/cutlass/qwp/client/ConnectionGenerationTest.java b/core/src/test/java/io/questdb/client/test/cutlass/qwp/client/ConnectionGenerationTest.java