|
| 1 | +# |
| 2 | +# Copyright (c) 2025 Airbyte, Inc., all rights reserved. |
| 3 | +# |
| 4 | + |
| 5 | +import threading |
| 6 | +from queue import Queue |
| 7 | + |
| 8 | +import pytest |
| 9 | + |
| 10 | +from airbyte_cdk.models import ( |
| 11 | + AirbyteControlConnectorConfigMessage, |
| 12 | + AirbyteControlMessage, |
| 13 | + AirbyteMessage, |
| 14 | + AirbyteStateMessage, |
| 15 | + AirbyteStateType, |
| 16 | + Level, |
| 17 | + OrchestratorType, |
| 18 | + Type, |
| 19 | +) |
| 20 | +from airbyte_cdk.sources.message.concurrent_repository import ConcurrentMessageRepository |
| 21 | +from airbyte_cdk.sources.message.repository import InMemoryMessageRepository |
| 22 | + |
| 23 | + |
| 24 | +def _make_state_message(stream_name: str = "test_stream") -> AirbyteMessage: |
| 25 | + return AirbyteMessage( |
| 26 | + type=Type.STATE, |
| 27 | + state=AirbyteStateMessage(type=AirbyteStateType.STREAM, data={"stream_name": stream_name}), |
| 28 | + ) |
| 29 | + |
| 30 | + |
| 31 | +def _make_control_message() -> AirbyteMessage: |
| 32 | + return AirbyteMessage( |
| 33 | + type=Type.CONTROL, |
| 34 | + control=AirbyteControlMessage( |
| 35 | + type=OrchestratorType.CONNECTOR_CONFIG, |
| 36 | + emitted_at=0, |
| 37 | + connectorConfig=AirbyteControlConnectorConfigMessage(config={"key": "value"}), |
| 38 | + ), |
| 39 | + ) |
| 40 | + |
| 41 | + |
| 42 | +@pytest.fixture() |
| 43 | +def small_queue() -> Queue: |
| 44 | + return Queue(maxsize=2) |
| 45 | + |
| 46 | + |
| 47 | +@pytest.fixture() |
| 48 | +def repo(small_queue: Queue) -> ConcurrentMessageRepository: |
| 49 | + return ConcurrentMessageRepository(small_queue, InMemoryMessageRepository()) |
| 50 | + |
| 51 | + |
| 52 | +def test_emit_message_puts_on_queue_when_space_available() -> None: |
| 53 | + """When the queue has space, emit_message places the message directly on it.""" |
| 54 | + queue: Queue = Queue(maxsize=100) |
| 55 | + repo = ConcurrentMessageRepository(queue, InMemoryMessageRepository()) |
| 56 | + |
| 57 | + msg = _make_control_message() |
| 58 | + repo.emit_message(msg) |
| 59 | + |
| 60 | + assert not queue.empty() |
| 61 | + assert queue.get_nowait() == msg |
| 62 | + |
| 63 | + |
| 64 | +def test_emit_message_buffers_when_queue_full_on_consumer_thread( |
| 65 | + small_queue: Queue, repo: ConcurrentMessageRepository |
| 66 | +) -> None: |
| 67 | + """When called from the consumer thread with a full queue, the message goes to _pending.""" |
| 68 | + small_queue.put("filler_1") |
| 69 | + small_queue.put("filler_2") |
| 70 | + assert small_queue.full() |
| 71 | + |
| 72 | + msg = _make_state_message() |
| 73 | + repo.emit_message(msg) |
| 74 | + |
| 75 | + assert len(repo._pending) == 1 |
| 76 | + assert repo._pending[0] == msg |
| 77 | + |
| 78 | + |
| 79 | +def test_consume_queue_drains_pending_buffer( |
| 80 | + small_queue: Queue, repo: ConcurrentMessageRepository |
| 81 | +) -> None: |
| 82 | + """consume_queue yields messages that were buffered due to a full queue.""" |
| 83 | + small_queue.put("filler_1") |
| 84 | + small_queue.put("filler_2") |
| 85 | + |
| 86 | + msg1 = _make_state_message("stream_1") |
| 87 | + msg2 = _make_state_message("stream_2") |
| 88 | + repo.emit_message(msg1) |
| 89 | + repo.emit_message(msg2) |
| 90 | + |
| 91 | + drained = list(repo.consume_queue()) |
| 92 | + assert drained == [msg1, msg2] |
| 93 | + assert len(repo._pending) == 0 |
| 94 | + |
| 95 | + |
| 96 | +def test_consume_queue_empty_when_no_pending(repo: ConcurrentMessageRepository) -> None: |
| 97 | + """consume_queue yields nothing when there are no pending messages.""" |
| 98 | + assert list(repo.consume_queue()) == [] |
| 99 | + |
| 100 | + |
| 101 | +def test_log_message_buffers_when_queue_full_on_consumer_thread( |
| 102 | + small_queue: Queue, repo: ConcurrentMessageRepository |
| 103 | +) -> None: |
| 104 | + """log_message also uses non-blocking put on the consumer thread.""" |
| 105 | + small_queue.put("filler_1") |
| 106 | + small_queue.put("filler_2") |
| 107 | + |
| 108 | + repo.log_message(Level.INFO, lambda: {"message": "test log"}) |
| 109 | + |
| 110 | + assert len(repo._pending) == 1 |
| 111 | + |
| 112 | + |
| 113 | +def test_worker_thread_uses_blocking_put() -> None: |
| 114 | + """Worker threads (non-consumer) should use blocking put for back-pressure.""" |
| 115 | + queue: Queue = Queue(maxsize=1) |
| 116 | + repo = ConcurrentMessageRepository(queue, InMemoryMessageRepository()) |
| 117 | + |
| 118 | + queue.put("filler") |
| 119 | + |
| 120 | + worker_started = threading.Event() |
| 121 | + worker_done = threading.Event() |
| 122 | + |
| 123 | + def worker_emit() -> None: |
| 124 | + worker_started.set() |
| 125 | + repo.emit_message(_make_state_message()) |
| 126 | + worker_done.set() |
| 127 | + |
| 128 | + t = threading.Thread(target=worker_emit, daemon=True) |
| 129 | + t.start() |
| 130 | + |
| 131 | + worker_started.wait(timeout=1.0) |
| 132 | + assert not worker_done.wait(timeout=0.5), "Worker should be blocked on full queue" |
| 133 | + |
| 134 | + queue.get() |
| 135 | + assert worker_done.wait(timeout=2.0), "Worker should complete after queue space freed" |
| 136 | + t.join(timeout=2.0) |
| 137 | + |
| 138 | + |
| 139 | +def test_main_thread_does_not_deadlock_on_full_queue( |
| 140 | + small_queue: Queue, repo: ConcurrentMessageRepository |
| 141 | +) -> None: |
| 142 | + """Simulate the deadlock: main thread emits on a full queue. |
| 143 | +
|
| 144 | + Without the fix this would hang forever because the main thread |
| 145 | + (sole consumer) blocks on queue.put() and nobody drains the queue. |
| 146 | + """ |
| 147 | + small_queue.put("record_1") |
| 148 | + small_queue.put("record_2") |
| 149 | + assert small_queue.full() |
| 150 | + |
| 151 | + state_msg = _make_state_message("contact_lists") |
| 152 | + repo.emit_message(state_msg) # Must return immediately |
| 153 | + |
| 154 | + pending = list(repo.consume_queue()) |
| 155 | + assert len(pending) == 1 |
| 156 | + assert pending[0] == state_msg |
| 157 | + |
| 158 | + |
| 159 | +def test_ordering_preserved_across_queue_and_pending( |
| 160 | + small_queue: Queue, repo: ConcurrentMessageRepository |
| 161 | +) -> None: |
| 162 | + """Messages maintain order: first queued directly, overflow to pending.""" |
| 163 | + msg1 = _make_state_message("stream_1") |
| 164 | + msg2 = _make_state_message("stream_2") |
| 165 | + msg3 = _make_state_message("stream_3") |
| 166 | + |
| 167 | + repo.emit_message(msg1) |
| 168 | + repo.emit_message(msg2) |
| 169 | + assert small_queue.qsize() == 2 |
| 170 | + |
| 171 | + repo.emit_message(msg3) |
| 172 | + assert len(repo._pending) == 1 |
| 173 | + |
| 174 | + from_queue = [small_queue.get_nowait(), small_queue.get_nowait()] |
| 175 | + from_pending = list(repo.consume_queue()) |
| 176 | + all_messages = from_queue + from_pending |
| 177 | + |
| 178 | + assert all_messages == [msg1, msg2, msg3] |
0 commit comments