|
8 | 8 |
|
9 | 9 | package org.opensearch.be.datafusion; |
10 | 10 |
|
11 | | -import org.apache.arrow.c.ArrowArray; |
12 | | -import org.apache.arrow.c.ArrowSchema; |
13 | 11 | import org.apache.arrow.c.CDataDictionaryProvider; |
14 | | -import org.apache.arrow.c.Data; |
15 | 12 | import org.apache.arrow.memory.BufferAllocator; |
16 | 13 | import org.apache.arrow.vector.VectorSchemaRoot; |
17 | | -import org.apache.arrow.vector.types.pojo.Field; |
18 | 14 | import org.apache.arrow.vector.types.pojo.Schema; |
19 | 15 | import org.opensearch.analytics.spi.ExchangeSink; |
20 | 16 | import org.opensearch.analytics.spi.ExchangeSinkContext; |
21 | | -import org.opensearch.be.datafusion.nativelib.NativeBridge; |
22 | 17 | import org.opensearch.be.datafusion.nativelib.StreamHandle; |
23 | 18 | import org.opensearch.core.action.ActionListener; |
24 | 19 |
|
|
28 | 23 | import java.util.concurrent.ExecutionException; |
29 | 24 | import java.util.function.Consumer; |
30 | 25 |
|
31 | | -import static org.apache.arrow.c.Data.importField; |
32 | | - |
33 | 26 | /** |
34 | 27 | * Shared lifecycle skeleton for coordinator-side {@link ExchangeSink}s backed by a native |
35 | 28 | * DataFusion local session. Subclasses customise per-batch handling and the close-time |
|
41 | 34 | * and always closes the supplied {@link VectorSchemaRoot} in {@code finally} regardless |
42 | 35 | * of whether {@link #feedBatchUnderLock} succeeds.</li> |
43 | 36 | * <li>{@link #close} flips {@link #closed} once under {@link #feedLock}, runs the |
44 | | - * subclass-specific {@link #closeUnderLock} hook, and unconditionally closes |
45 | | - * {@link #session} in {@code finally}, accumulating any failures and rethrowing.</li> |
| 37 | + * subclass-specific {@link #closeUnderLock} hook, and rethrows any accumulated |
| 38 | + * failure. Subclasses must close {@link #session} themselves inside |
| 39 | + * {@link #closeUnderLock} (typically last, after any owned native streams).</li> |
46 | 40 | * <li>The downstream from {@link ExchangeSinkContext#downstream()} is intentionally NOT |
47 | 41 | * closed here — it accumulates drained results consumed by the walker after the |
48 | 42 | * sink is done.</li> |
@@ -179,38 +173,30 @@ public final void close() { |
179 | 173 | protected abstract void feedBatchUnderLock(VectorSchemaRoot batch); |
180 | 174 |
|
181 | 175 | /** |
182 | | - * Subclass-specific shutdown. Runs after {@link #closed} is set and before |
183 | | - * {@link #session} is closed. Implementations should close their owned native resources |
184 | | - * (sender, output stream, accumulated FFI structs, …) and drain any pending output. |
| 176 | + * Subclass-specific shutdown. Runs after {@link #closed} is set. Implementations must |
| 177 | + * close all owned native resources including {@link #session} — close owned streams |
| 178 | + * before the session. |
185 | 179 | * |
186 | 180 | * @return the first failure encountered (use {@link #accumulate(Throwable, Throwable)} |
187 | 181 | * when multiple steps may fail), or {@code null} on clean shutdown. |
188 | 182 | */ |
189 | 183 | protected abstract Throwable closeUnderLock(); |
190 | 184 |
|
191 | 185 | /** |
192 | | - * Drains a native output stream into {@link ExchangeSinkContext#downstream()}, importing |
193 | | - * each {@link ArrowArray} into a fresh {@link VectorSchemaRoot} on the Java side. |
| 186 | + * Drains a native output stream into {@link ExchangeSinkContext#downstream()}, |
| 187 | + * importing each native batch into a fresh {@link VectorSchemaRoot}. |
| 188 | + * |
| 189 | + * <p>Uses {@link DatafusionResultStream.BatchIterator} directly (instead of |
| 190 | + * {@link DatafusionResultStream}) so the caller retains ownership of {@code outStream} — |
| 191 | + * the iterator manages schema, dictionary provider, and per-batch allocation, but |
| 192 | + * does not close the underlying stream handle. |
194 | 193 | */ |
195 | 194 | protected final void drainOutputIntoDownstream(StreamHandle outStream) { |
196 | 195 | BufferAllocator alloc = ctx.allocator(); |
197 | 196 | try (CDataDictionaryProvider dictProvider = new CDataDictionaryProvider()) { |
198 | | - long schemaAddr = asyncCall(listener -> NativeBridge.streamGetSchema(outStream.getPointer(), listener)); |
199 | | - Schema outSchema; |
200 | | - try (ArrowSchema arrowSchema = ArrowSchema.wrap(schemaAddr)) { |
201 | | - Field structField = importField(alloc, arrowSchema, dictProvider); |
202 | | - outSchema = new Schema(structField.getChildren(), structField.getMetadata()); |
203 | | - } |
204 | | - while (true) { |
205 | | - long arrayAddr = asyncCall(listener -> NativeBridge.streamNext(runtimeHandle.get(), outStream.getPointer(), listener)); |
206 | | - if (arrayAddr == 0) { |
207 | | - break; |
208 | | - } |
209 | | - VectorSchemaRoot vsr = VectorSchemaRoot.create(outSchema, alloc); |
210 | | - try (ArrowArray arrowArray = ArrowArray.wrap(arrayAddr)) { |
211 | | - Data.importIntoVectorSchemaRoot(alloc, arrowArray, vsr, dictProvider); |
212 | | - } |
213 | | - ctx.downstream().feed(vsr); |
| 197 | + DatafusionResultStream.BatchIterator it = new DatafusionResultStream.BatchIterator(outStream, alloc, dictProvider); |
| 198 | + while (it.hasNext()) { |
| 199 | + ctx.downstream().feed(it.next().getArrowRoot()); |
214 | 200 | } |
215 | 201 | } |
216 | 202 | } |
|
0 commit comments