Skip to content

Commit a38487c

Browse files
committed
remove blocking waits from websocket ACK drain
1 parent 491a029 commit a38487c

3 files changed

Lines changed: 163 additions & 23 deletions

File tree

core/src/main/java/io/questdb/client/cutlass/qwp/client/QwpWebSocketSender.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -400,8 +400,8 @@ public void close() {
400400
// Wait for all batches to be sent and acknowledged before closing
401401
if (sendQueue != null) {
402402
sendQueue.flush();
403-
}
404-
if (inFlightWindow != null) {
403+
sendQueue.awaitPendingAcks();
404+
} else if (inFlightWindow != null) {
405405
inFlightWindow.awaitEmpty();
406406
}
407407
} else {
@@ -581,7 +581,11 @@ public void flush() {
581581
sendQueue.flush();
582582

583583
// Wait for all in-flight batches to be acknowledged by the server
584-
inFlightWindow.awaitEmpty();
584+
if (sendQueue != null) {
585+
sendQueue.awaitPendingAcks();
586+
} else {
587+
inFlightWindow.awaitEmpty();
588+
}
585589

586590
LOG.debug("Flush complete [totalBatches={}, totalBytes={}, totalAcked={}]", sendQueue.getTotalBatchesSent(), sendQueue.getTotalBytesSent(), inFlightWindow.getTotalAcked());
587591
} else {

core/src/main/java/io/questdb/client/cutlass/qwp/client/WebSocketSendQueue.java

Lines changed: 49 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
*/
6565
public class WebSocketSendQueue implements QuietCloseable {
6666

67+
private static final int DRAIN_SPIN_TRIES = 16;
6768
public static final long DEFAULT_ENQUEUE_TIMEOUT_MS = 30_000;
6869
public static final long DEFAULT_SHUTDOWN_TIMEOUT_MS = 10_000;
6970
private static final Logger LOG = LoggerFactory.getLogger(WebSocketSendQueue.class);
@@ -264,7 +265,6 @@ public boolean enqueue(MicrobatchBuffer buffer) {
264265
}
265266
}
266267
}
267-
268268
LOG.debug("Enqueued batch [id={}, bytes={}, rows={}]", buffer.getBatchId(), buffer.getBufferPos(), buffer.getRowCount());
269269
return true;
270270
}
@@ -282,27 +282,27 @@ public void flush() {
282282

283283
long startTime = System.currentTimeMillis();
284284

285-
// Wait under lock - I/O thread will notify when processingCount decrements
285+
// Wait under lock until the queue becomes empty and no batch is being sent.
286286
synchronized (processingLock) {
287287
while (running) {
288288
// Atomically check: queue empty AND not processing
289289
if (isPendingEmpty() && processingCount.get() == 0) {
290290
break; // All done
291291
}
292292

293+
long remaining = enqueueTimeoutMs - (System.currentTimeMillis() - startTime);
294+
if (remaining <= 0) {
295+
throw new LineSenderException("Flush timeout after " + enqueueTimeoutMs + "ms, " +
296+
"queue=" + getPendingSize() + ", processing=" + processingCount.get());
297+
}
298+
293299
try {
294-
processingLock.wait(10);
300+
processingLock.wait(remaining);
295301
} catch (InterruptedException e) {
296302
Thread.currentThread().interrupt();
297303
throw new LineSenderException("Interrupted while flushing", e);
298304
}
299305

300-
// Check timeout
301-
if (System.currentTimeMillis() - startTime > enqueueTimeoutMs) {
302-
throw new LineSenderException("Flush timeout after " + enqueueTimeoutMs + "ms, " +
303-
"queue=" + getPendingSize() + ", processing=" + processingCount.get());
304-
}
305-
306306
// Check for errors
307307
checkError();
308308
}
@@ -314,6 +314,19 @@ public void flush() {
314314
LOG.debug("Flush complete");
315315
}
316316

317+
/**
318+
* Waits for all in-flight batches to be acknowledged.
319+
*/
320+
public void awaitPendingAcks() {
321+
if (inFlightWindow == null) {
322+
return;
323+
}
324+
325+
checkError();
326+
inFlightWindow.awaitEmpty();
327+
checkError();
328+
}
329+
317330
/**
318331
* Returns the last error that occurred in the I/O thread, or null if no error.
319332
*/
@@ -387,27 +400,39 @@ private int getPendingSize() {
387400
return pendingBuffer == null ? 0 : 1;
388401
}
389402

403+
private int idleDuringDrain(int idleCycles) {
404+
if (idleCycles < DRAIN_SPIN_TRIES) {
405+
Thread.onSpinWait();
406+
return idleCycles + 1;
407+
}
408+
Thread.yield();
409+
return DRAIN_SPIN_TRIES;
410+
}
411+
390412
/**
391413
* The main I/O loop that handles both sending batches and receiving ACKs.
392414
* <p>
393415
* Uses a state machine:
394416
* <ul>
395417
* <li>IDLE: block on processingLock.wait() until work arrives</li>
396418
* <li>ACTIVE: non-blocking poll queue, send batches, check for ACKs</li>
397-
* <li>DRAINING: no batches but ACKs pending - poll for ACKs with short wait</li>
419+
* <li>DRAINING: no batches but ACKs pending - poll for ACKs with non-blocking backoff</li>
398420
* </ul>
399421
*/
400422
private void ioLoop() {
401423
LOG.info("I/O loop started");
402424

403425
try {
426+
int drainIdleCycles = 0;
404427
while (running || !isPendingEmpty()) {
405428
MicrobatchBuffer batch = null;
406429
boolean hasInFlight = (inFlightWindow != null && inFlightWindow.getInFlightCount() > 0);
407430
IoState state = computeState(hasInFlight);
431+
boolean receivedAcks = false;
408432

409433
switch (state) {
410434
case IDLE:
435+
drainIdleCycles = 0;
411436
// Nothing to do - wait for work under lock
412437
synchronized (processingLock) {
413438
// Re-check under lock to avoid missed wakeup
@@ -425,7 +450,7 @@ private void ioLoop() {
425450
case DRAINING:
426451
// Try to receive any pending ACKs (non-blocking)
427452
if (hasInFlight && client.isConnected()) {
428-
tryReceiveAcks();
453+
receivedAcks = tryReceiveAcks();
429454
}
430455

431456
// Try to dequeue and send a batch
@@ -452,15 +477,16 @@ private void ioLoop() {
452477
}
453478
}
454479

455-
// In DRAINING state with no work, short wait to avoid busy loop
480+
// In DRAINING state with no work, stay non-blocking and use
481+
// a simple spin/yield backoff.
456482
if (state == IoState.DRAINING && batch == null) {
457-
synchronized (processingLock) {
458-
try {
459-
processingLock.wait(10);
460-
} catch (InterruptedException e) {
461-
if (!running) return;
462-
}
483+
if (receivedAcks) {
484+
drainIdleCycles = 0;
485+
} else {
486+
drainIdleCycles = idleDuringDrain(drainIdleCycles);
463487
}
488+
} else {
489+
drainIdleCycles = 0;
464490
}
465491
break;
466492
}
@@ -553,9 +579,11 @@ private void sendBatch(MicrobatchBuffer batch) {
553579
/**
554580
* Tries to receive ACKs from the server (non-blocking).
555581
*/
556-
private void tryReceiveAcks() {
582+
private boolean tryReceiveAcks() {
583+
boolean received = false;
557584
try {
558585
while (client.tryReceiveFrame(responseHandler)) {
586+
received = true;
559587
// Drain all buffered ACKs before returning to the I/O loop.
560588
}
561589
} catch (Exception e) {
@@ -564,14 +592,15 @@ private void tryReceiveAcks() {
564592
failTransport(new LineSenderException("Error receiving response: " + e.getMessage(), e));
565593
}
566594
}
595+
return received;
567596
}
568597

569598
/**
570599
* I/O loop states for the state machine.
571600
* <ul>
572601
* <li>IDLE: queue empty, no in-flight batches - can block waiting for work</li>
573602
* <li>ACTIVE: have batches to send - non-blocking loop</li>
574-
* <li>DRAINING: queue empty but ACKs pending - poll for ACKs, short wait</li>
603+
* <li>DRAINING: queue empty but ACKs pending - poll for ACKs with non-blocking backoff</li>
575604
* </ul>
576605
*/
577606
private enum IoState {

core/src/test/java/io/questdb/client/test/cutlass/qwp/client/WebSocketSendQueueTest.java

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import io.questdb.client.cutlass.line.LineSenderException;
3131
import io.questdb.client.cutlass.qwp.client.InFlightWindow;
3232
import io.questdb.client.cutlass.qwp.client.MicrobatchBuffer;
33+
import io.questdb.client.cutlass.qwp.client.WebSocketResponse;
3334
import io.questdb.client.cutlass.qwp.client.WebSocketSendQueue;
3435
import io.questdb.client.network.PlainSocketFactory;
3536
import io.questdb.client.std.MemoryTag;
@@ -40,6 +41,8 @@
4041
import java.util.concurrent.CountDownLatch;
4142
import java.util.concurrent.TimeUnit;
4243
import java.util.concurrent.atomic.AtomicBoolean;
44+
import java.util.concurrent.atomic.AtomicInteger;
45+
import java.util.concurrent.atomic.AtomicLong;
4346
import java.util.concurrent.atomic.AtomicReference;
4447

4548
import static org.junit.Assert.*;
@@ -266,6 +269,84 @@ public void testFlushFailsWhenServerClosesConnection() throws Exception {
266269
});
267270
}
268271

272+
@Test
273+
public void testAwaitPendingAcksKeepsDrainNonBlocking() throws Exception {
274+
assertMemoryLeak(() -> {
275+
InFlightWindow window = new InFlightWindow(8, 5_000);
276+
FakeWebSocketClient client = new FakeWebSocketClient();
277+
WebSocketSendQueue queue = null;
278+
MicrobatchBuffer batch0 = sealedBuffer((byte) 1);
279+
MicrobatchBuffer batch1 = sealedBuffer((byte) 2);
280+
CountDownLatch secondBatchSent = new CountDownLatch(1);
281+
AtomicBoolean deliverAcks = new AtomicBoolean(false);
282+
AtomicInteger tryReceivePolls = new AtomicInteger();
283+
AtomicLong highestSent = new AtomicLong(-1);
284+
AtomicReference<Throwable> errorRef = new AtomicReference<>();
285+
286+
try {
287+
client.setSendBehavior((dataPtr, length) -> {
288+
long sent = highestSent.incrementAndGet();
289+
if (sent == 1) {
290+
secondBatchSent.countDown();
291+
}
292+
});
293+
client.setReceiveBehavior((handler, timeout) -> {
294+
throw new AssertionError("receiveFrame() must not be used while draining ACKs");
295+
});
296+
client.setTryReceiveBehavior(handler -> {
297+
tryReceivePolls.incrementAndGet();
298+
if (deliverAcks.get()) {
299+
long sent = highestSent.get();
300+
if (sent >= 0 && window.getInFlightCount() > 0) {
301+
emitAck(handler, sent);
302+
return true;
303+
}
304+
}
305+
return false;
306+
});
307+
308+
queue = new WebSocketSendQueue(client, window, 1_000, 500);
309+
queue.enqueue(batch0);
310+
queue.flush();
311+
312+
CountDownLatch finished = new CountDownLatch(1);
313+
WebSocketSendQueue finalQueue = queue;
314+
Thread waiter = new Thread(() -> {
315+
try {
316+
finalQueue.awaitPendingAcks();
317+
} catch (Throwable t) {
318+
errorRef.set(t);
319+
} finally {
320+
finished.countDown();
321+
}
322+
});
323+
waiter.start();
324+
325+
long deadline = System.nanoTime() + TimeUnit.SECONDS.toNanos(2);
326+
while (tryReceivePolls.get() == 0 && System.nanoTime() < deadline) {
327+
Thread.onSpinWait();
328+
}
329+
assertTrue("Expected non-blocking ACK polls while draining", tryReceivePolls.get() > 0);
330+
331+
queue.enqueue(batch1);
332+
assertTrue("I/O thread should still send new work while ACK drain is active",
333+
secondBatchSent.await(1, TimeUnit.SECONDS));
334+
335+
deliverAcks.set(true);
336+
337+
assertTrue("awaitPendingAcks should complete once ACK arrives",
338+
finished.await(2, TimeUnit.SECONDS));
339+
assertNull(errorRef.get());
340+
assertEquals(0, window.getInFlightCount());
341+
} finally {
342+
closeQuietly(queue);
343+
batch0.close();
344+
batch1.close();
345+
client.close();
346+
}
347+
});
348+
}
349+
269350
private static void awaitThreadBlocked(Thread thread) throws InterruptedException {
270351
long deadline = System.nanoTime() + TimeUnit.SECONDS.toNanos(5);
271352
while (System.nanoTime() < deadline) {
@@ -296,6 +377,18 @@ private static void emitBinary(WebSocketFrameHandler handler, byte[] payload) {
296377
}
297378
}
298379

380+
private static void emitAck(WebSocketFrameHandler handler, long sequence) {
381+
WebSocketResponse response = WebSocketResponse.success(sequence);
382+
int size = response.serializedSize();
383+
long ptr = Unsafe.malloc(size, MemoryTag.NATIVE_DEFAULT);
384+
try {
385+
response.writeTo(ptr);
386+
handler.onBinaryMessage(ptr, size);
387+
} finally {
388+
Unsafe.free(ptr, size, MemoryTag.NATIVE_DEFAULT);
389+
}
390+
}
391+
299392
private static MicrobatchBuffer sealedBuffer(byte value) {
300393
MicrobatchBuffer buffer = new MicrobatchBuffer(64);
301394
buffer.writeByte(value);
@@ -312,9 +405,14 @@ private interface TryReceiveBehavior {
312405
boolean tryReceive(WebSocketFrameHandler handler);
313406
}
314407

408+
private interface ReceiveBehavior {
409+
boolean receive(WebSocketFrameHandler handler, int timeout);
410+
}
411+
315412
private static class FakeWebSocketClient extends WebSocketClient {
316413
private volatile TryReceiveBehavior behavior = handler -> false;
317414
private volatile boolean connected = true;
415+
private volatile ReceiveBehavior receiveBehavior = (handler, timeout) -> false;
318416
private volatile SendBehavior sendBehavior = (dataPtr, length) -> {
319417
};
320418

@@ -346,6 +444,15 @@ public void setTryReceiveBehavior(TryReceiveBehavior behavior) {
346444
this.behavior = behavior;
347445
}
348446

447+
public void setReceiveBehavior(ReceiveBehavior receiveBehavior) {
448+
this.receiveBehavior = receiveBehavior;
449+
}
450+
451+
@Override
452+
public boolean receiveFrame(WebSocketFrameHandler handler, int timeout) {
453+
return receiveBehavior.receive(handler, timeout);
454+
}
455+
349456
@Override
350457
public boolean tryReceiveFrame(WebSocketFrameHandler handler) {
351458
return behavior.tryReceive(handler);

0 commit comments

Comments
 (0)