|
20 | 20 | from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import ( |
21 | 21 | ParentStreamConfig, |
22 | 22 | SubstreamPartitionRouter, |
| 23 | + iterate_with_last_flag, |
23 | 24 | ) |
24 | 25 | from airbyte_cdk.sources.declarative.requesters.request_option import ( |
25 | 26 | RequestOption, |
@@ -611,7 +612,8 @@ def test_request_option( |
611 | 612 | ), |
612 | 613 | { |
613 | 614 | "first_stream": { |
614 | | - "lookback_window": 0, |
| 615 | + "lookback_window": 1, |
| 616 | + "state": {"cursor": "2022-01-01"}, |
615 | 617 | "states": [ |
616 | 618 | {"cursor": {"cursor": "2021-01-02"}, "partition": {"slice": "first"}}, |
617 | 619 | {"cursor": {"cursor": "2022-01-01"}, "partition": {"slice": "second"}}, |
@@ -1079,3 +1081,225 @@ def test_cartesian_product_stream_slicer_warning_log_message( |
1079 | 1081 | assert warning_message in logged_warnings |
1080 | 1082 | else: |
1081 | 1083 | assert warning_message not in logged_warnings |
| 1084 | + |
| 1085 | + |
| 1086 | +@pytest.mark.parametrize( |
| 1087 | + "input_iterable,expected_output", |
| 1088 | + [ |
| 1089 | + pytest.param([], [(None, True)], id="empty_generator_yields_none_sentinel"), |
| 1090 | + pytest.param([1], [(1, True)], id="single_item"), |
| 1091 | + pytest.param([1, 2], [(1, False), (2, True)], id="two_items"), |
| 1092 | + pytest.param([1, 2, 3], [(1, False), (2, False), (3, True)], id="three_items"), |
| 1093 | + pytest.param(["a", "b"], [("a", False), ("b", True)], id="string_items"), |
| 1094 | + ], |
| 1095 | +) |
| 1096 | +def test_iterate_with_last_flag(input_iterable, expected_output): |
| 1097 | + result = list(iterate_with_last_flag(input_iterable)) |
| 1098 | + assert result == expected_output |
| 1099 | + |
| 1100 | + |
| 1101 | +def test_substream_partition_router_no_cursor_update_when_partition_has_no_records(): |
| 1102 | + """ |
| 1103 | + Test that when a partition has no records, the cursor is still properly closed |
| 1104 | + but no slices are yielded for that partition. |
| 1105 | + This tests the fix for SubstreamPartitionRouter updating cursor value when no records |
| 1106 | + were read in partition. |
| 1107 | + """ |
| 1108 | + mock_slices = [ |
| 1109 | + StreamSlice(partition={"slice": "first"}, cursor_slice={}), |
| 1110 | + StreamSlice(partition={"slice": "second"}, cursor_slice={}), |
| 1111 | + ] |
| 1112 | + |
| 1113 | + partition_router = SubstreamPartitionRouter( |
| 1114 | + parent_stream_configs=[ |
| 1115 | + ParentStreamConfig( |
| 1116 | + stream=MockStream( |
| 1117 | + [ |
| 1118 | + InMemoryPartition( |
| 1119 | + "partition_1", |
| 1120 | + "first_stream", |
| 1121 | + mock_slices[0], |
| 1122 | + _build_records_for_slice( |
| 1123 | + [{"id": "record_1"}, {"id": "record_2"}], mock_slices[0] |
| 1124 | + ), |
| 1125 | + ), |
| 1126 | + InMemoryPartition( |
| 1127 | + "partition_2", |
| 1128 | + "first_stream", |
| 1129 | + mock_slices[1], |
| 1130 | + [], |
| 1131 | + ), |
| 1132 | + ], |
| 1133 | + "first_stream", |
| 1134 | + ), |
| 1135 | + parent_key="id", |
| 1136 | + partition_field="partition_field", |
| 1137 | + parameters={}, |
| 1138 | + config={}, |
| 1139 | + ) |
| 1140 | + ], |
| 1141 | + parameters={}, |
| 1142 | + config={}, |
| 1143 | + ) |
| 1144 | + |
| 1145 | + slices = list(partition_router.stream_slices()) |
| 1146 | + assert slices == [ |
| 1147 | + {"partition_field": "record_1", "parent_slice": {"slice": "first"}}, |
| 1148 | + {"partition_field": "record_2", "parent_slice": {"slice": "first"}}, |
| 1149 | + ] |
| 1150 | + |
| 1151 | + |
| 1152 | +def test_substream_partition_router_handles_empty_parent_partitions(): |
| 1153 | + """ |
| 1154 | + Test that when a parent stream generates no partitions (empty generator), |
| 1155 | + the stream_slices method returns early without errors. |
| 1156 | + """ |
| 1157 | + partition_router = SubstreamPartitionRouter( |
| 1158 | + parent_stream_configs=[ |
| 1159 | + ParentStreamConfig( |
| 1160 | + stream=MockStream( |
| 1161 | + [], |
| 1162 | + "first_stream", |
| 1163 | + ), |
| 1164 | + parent_key="id", |
| 1165 | + partition_field="partition_field", |
| 1166 | + parameters={}, |
| 1167 | + config={}, |
| 1168 | + ) |
| 1169 | + ], |
| 1170 | + parameters={}, |
| 1171 | + config={}, |
| 1172 | + ) |
| 1173 | + |
| 1174 | + slices = list(partition_router.stream_slices()) |
| 1175 | + assert slices == [] |
| 1176 | + |
| 1177 | + |
| 1178 | +def test_substream_partition_router_closes_all_partitions_even_when_no_records(): |
| 1179 | + """ |
| 1180 | + Test that cursor.close_partition() is called for all parent stream partitions, |
| 1181 | + even when a partition produces no parent records. |
| 1182 | + This validates that partition lifecycle is properly managed regardless of record count. |
| 1183 | + """ |
| 1184 | + mock_slices = [ |
| 1185 | + StreamSlice(partition={"slice": "first"}, cursor_slice={}), |
| 1186 | + StreamSlice(partition={"slice": "second"}, cursor_slice={}), |
| 1187 | + StreamSlice(partition={"slice": "third"}, cursor_slice={}), |
| 1188 | + ] |
| 1189 | + |
| 1190 | + partition_1 = InMemoryPartition( |
| 1191 | + "partition_1", |
| 1192 | + "first_stream", |
| 1193 | + mock_slices[0], |
| 1194 | + _build_records_for_slice([{"id": "record_1"}], mock_slices[0]), |
| 1195 | + ) |
| 1196 | + partition_2 = InMemoryPartition( |
| 1197 | + "partition_2", |
| 1198 | + "first_stream", |
| 1199 | + mock_slices[1], |
| 1200 | + [], |
| 1201 | + ) |
| 1202 | + partition_3 = InMemoryPartition( |
| 1203 | + "partition_3", |
| 1204 | + "first_stream", |
| 1205 | + mock_slices[2], |
| 1206 | + _build_records_for_slice([{"id": "record_3"}], mock_slices[2]), |
| 1207 | + ) |
| 1208 | + |
| 1209 | + mock_cursor = Mock() |
| 1210 | + mock_cursor.stream_slices.return_value = [] |
| 1211 | + |
| 1212 | + partition_router = SubstreamPartitionRouter( |
| 1213 | + parent_stream_configs=[ |
| 1214 | + ParentStreamConfig( |
| 1215 | + stream=MockStream( |
| 1216 | + [partition_1, partition_2, partition_3], |
| 1217 | + "first_stream", |
| 1218 | + cursor=mock_cursor, |
| 1219 | + ), |
| 1220 | + parent_key="id", |
| 1221 | + partition_field="partition_field", |
| 1222 | + parameters={}, |
| 1223 | + config={}, |
| 1224 | + ) |
| 1225 | + ], |
| 1226 | + parameters={}, |
| 1227 | + config={}, |
| 1228 | + ) |
| 1229 | + |
| 1230 | + slices = list(partition_router.stream_slices()) |
| 1231 | + |
| 1232 | + assert slices == [ |
| 1233 | + {"partition_field": "record_1", "parent_slice": {"slice": "first"}}, |
| 1234 | + {"partition_field": "record_3", "parent_slice": {"slice": "third"}}, |
| 1235 | + ] |
| 1236 | + |
| 1237 | + assert mock_cursor.close_partition.call_count == 3 |
| 1238 | + |
| 1239 | + close_partition_calls = mock_cursor.close_partition.call_args_list |
| 1240 | + assert close_partition_calls[0][0][0] == partition_1 |
| 1241 | + assert close_partition_calls[1][0][0] == partition_2 |
| 1242 | + assert close_partition_calls[2][0][0] == partition_3 |
| 1243 | + |
| 1244 | + |
| 1245 | +def test_substream_partition_router_closes_partition_even_when_parent_key_missing(): |
| 1246 | + """ |
| 1247 | + Test that cursor.close_partition() is called even when the parent_key extraction |
| 1248 | + fails with a KeyError. This ensures partition lifecycle is properly managed |
| 1249 | + regardless of whether the slice can be emitted. |
| 1250 | + """ |
| 1251 | + mock_slices = [ |
| 1252 | + StreamSlice(partition={"slice": "first"}, cursor_slice={}), |
| 1253 | + StreamSlice(partition={"slice": "second"}, cursor_slice={}), |
| 1254 | + ] |
| 1255 | + |
| 1256 | + # First partition has a record with the expected "id" key |
| 1257 | + partition_1 = InMemoryPartition( |
| 1258 | + "partition_1", |
| 1259 | + "first_stream", |
| 1260 | + mock_slices[0], |
| 1261 | + _build_records_for_slice([{"id": "record_1"}], mock_slices[0]), |
| 1262 | + ) |
| 1263 | + # Second partition has a record missing the "id" key (will cause KeyError) |
| 1264 | + partition_2 = InMemoryPartition( |
| 1265 | + "partition_2", |
| 1266 | + "first_stream", |
| 1267 | + mock_slices[1], |
| 1268 | + _build_records_for_slice([{"other_field": "value"}], mock_slices[1]), |
| 1269 | + ) |
| 1270 | + |
| 1271 | + mock_cursor = Mock() |
| 1272 | + mock_cursor.stream_slices.return_value = [] |
| 1273 | + |
| 1274 | + partition_router = SubstreamPartitionRouter( |
| 1275 | + parent_stream_configs=[ |
| 1276 | + ParentStreamConfig( |
| 1277 | + stream=MockStream( |
| 1278 | + [partition_1, partition_2], |
| 1279 | + "first_stream", |
| 1280 | + cursor=mock_cursor, |
| 1281 | + ), |
| 1282 | + parent_key="id", |
| 1283 | + partition_field="partition_field", |
| 1284 | + parameters={}, |
| 1285 | + config={}, |
| 1286 | + ) |
| 1287 | + ], |
| 1288 | + parameters={}, |
| 1289 | + config={}, |
| 1290 | + ) |
| 1291 | + |
| 1292 | + slices = list(partition_router.stream_slices()) |
| 1293 | + |
| 1294 | + # Only the first partition's record should produce a slice |
| 1295 | + # The second partition's record is missing the "id" key, so no slice is emitted |
| 1296 | + assert slices == [ |
| 1297 | + {"partition_field": "record_1", "parent_slice": {"slice": "first"}}, |
| 1298 | + ] |
| 1299 | + |
| 1300 | + # Both partitions should be closed, even though the second one had a KeyError |
| 1301 | + assert mock_cursor.close_partition.call_count == 2 |
| 1302 | + |
| 1303 | + close_partition_calls = mock_cursor.close_partition.call_args_list |
| 1304 | + assert close_partition_calls[0][0][0] == partition_1 |
| 1305 | + assert close_partition_calls[1][0][0] == partition_2 |
0 commit comments