@@ -483,104 +483,112 @@ void DuckDBPyResult::PromoteMaterializedToArrow(idx_t batch_size) {
483483 result = std::move (new_result);
484484}
485485
486- // Re-feed a materialized result as a lazy stream on the user's own context. The
487- // StreamQueryResult co-owns the context, so conversion survives `del conn` and runs under a
488- // live transaction (geometry/extension correctness, #492).
489- void DuckDBPyResult::PromoteMaterializedToStream () {
490- D_ASSERT (result->type == QueryResultType::MATERIALIZED_RESULT );
491- auto client_context = result->client_properties .client_context ;
492- if (!client_context) {
493- throw InternalException (" Cannot promote result to an Arrow stream: the originating client context is gone" );
486+ template <typename T>
487+ T DuckDBPyResult::RunWithArrowSchema (const std::function<T(const ArrowSchema &)> &fun, bool dedup_col_names) {
488+ D_ASSERT (result);
489+ if (!result->client_properties .client_context ) {
490+ throw ConnectionException (" Cannot fetch arrow schema without a valid connection" );
494491 }
495- auto context = client_context->shared_from_this ();
496- auto &materialized = result->Cast <MaterializedQueryResult>();
497- auto names = result->names ;
498- auto select = MakeColumnDataScanStatement (materialized.TakeCollection (), names);
492+ auto ctx = result->client_properties .client_context ->shared_from_this ();
499493
500- unique_ptr<QueryResult> new_result;
501- {
502- D_ASSERT (py::gil_check ());
503- py::gil_scoped_release release;
504- auto pending_query = context->PendingQuery (std::move (select), QueryParameters (true ));
505- new_result = DuckDBPyConnection::CompletePendingQuery (*pending_query);
506- }
507- if (new_result->HasError ()) {
508- new_result->ThrowError ();
494+ auto names = result->names ;
495+ if (dedup_col_names) {
496+ QueryResult::DeduplicateColumns (names);
509497 }
510- new_result->names = std::move (names);
511- result = std::move (new_result);
498+
499+ ArrowSchema arrow_schema;
500+ ctx->RunFunctionInTransaction (
501+ [&] { ArrowConverter::ToArrowSchema (&arrow_schema, result->types , names, result->client_properties ); });
502+
503+ return fun (arrow_schema);
512504}
513505
514- duckdb::pyarrow::Table DuckDBPyResult::FetchArrowTable ( idx_t rows_per_batch, bool to_polars) {
515- if (!result ) {
516- throw InvalidInputException ( " There is no query result" );
517- }
518- // ARROW_RESULT: fresh collector output. MATERIALIZED: re-feed for parallel conversion.
519- // STREAM: a live result, converted directly below (never materialized to re-feed).
506+ duckdb::pyarrow::Table DuckDBPyResult::MaterializedResultToArrowTable ( const ArrowSchema &arrow_schema,
507+ const idx_t rows_per_batch ) {
508+ D_ASSERT ( result);
509+ D_ASSERT (result-> type == QueryResultType:: MATERIALIZED_RESULT || result-> type == QueryResultType:: ARROW_RESULT );
510+
511+ auto pyarrow_schema = pyarrow::ToPyArrowSchema (arrow_schema);
520512 if (result->type == QueryResultType::MATERIALIZED_RESULT ) {
521513 PromoteMaterializedToArrow (rows_per_batch);
522514 }
523-
524- auto names = result->names ;
525- if (to_polars) {
526- QueryResult::DeduplicateColumns (names);
515+ py::list batches;
516+ auto &arrow_result = result->Cast <ArrowQueryResult>();
517+ auto arrays = arrow_result.ConsumeArrays ();
518+ for (auto &array : arrays) {
519+ ArrowArray data = array->arrow_array ;
520+ array->arrow_array .release = nullptr ;
521+ TransformDuckToArrowChunk (pyarrow_schema, data, batches);
527522 }
523+ return pyarrow::ToArrowTable (std::move (batches), pyarrow_schema);
524+ }
528525
529- // Fetch the schema once
530- ArrowSchema arrow_schema;
531- ArrowConverter::ToArrowSchema (&arrow_schema, result->types , names, result->client_properties );
532- auto pyarrow_schema = pyarrow::ToPyArrowSchema (arrow_schema);
533-
534- py::list batches;
535- if (result->type == QueryResultType::ARROW_RESULT ) {
536- auto &arrow_result = result->Cast <ArrowQueryResult>();
537- auto arrays = arrow_result.ConsumeArrays ();
538- for (auto &array : arrays) {
539- ArrowArray data = array->arrow_array ;
540- array->arrow_array .release = nullptr ;
541- TransformDuckToArrowChunk (pyarrow_schema, data, batches);
542- }
543- } else {
544- // STREAM_RESULT: pull the live stream directly into Arrow batches.
545- QueryResultChunkScanState scan_state (*result);
546- while (true ) {
547- ArrowArray data;
548- idx_t count;
549- {
550- D_ASSERT (py::gil_check ());
551- py::gil_scoped_release release;
552- count = ArrowUtil::FetchChunk (scan_state, result->client_properties , rows_per_batch, &data,
553- ArrowTypeExtensionData::GetExtensionTypes (
554- *result->client_properties .client_context , result->types ));
555- }
556- if (count == 0 ) {
557- break ;
558- }
559- TransformDuckToArrowChunk (pyarrow_schema, data, batches);
560- }
526+ duckdb::pyarrow::Table DuckDBPyResult::FetchArrowTable (const idx_t rows_per_batch, const bool to_polars) {
527+ if (!result) {
528+ throw InvalidInputException (" There is no query result" );
561529 }
562530
563- return pyarrow::ToArrowTable (std::move (batches), pyarrow_schema);
531+ return RunWithArrowSchema<duckdb::pyarrow::Table>(
532+ [&](const ArrowSchema &schema) -> duckdb::pyarrow::Table {
533+ if (result->type == QueryResultType::MATERIALIZED_RESULT || result->type == QueryResultType::ARROW_RESULT ) {
534+ return MaterializedResultToArrowTable (schema, rows_per_batch);
535+ }
536+ if (result->type != QueryResultType::STREAM_RESULT ) {
537+ throw InternalException (" FetchArrowTable called with unsupported query result: %d" , result->type );
538+ }
539+ auto pyarrow_schema = pyarrow::ToPyArrowSchema (schema);
540+ py::list batches;
541+ QueryResultChunkScanState scan_state (*result);
542+ while (true ) {
543+ ArrowArray data;
544+ idx_t count;
545+ {
546+ D_ASSERT (py::gil_check ());
547+ py::gil_scoped_release release;
548+ count = ArrowUtil::FetchChunk (scan_state, result->client_properties , rows_per_batch, &data,
549+ ArrowTypeExtensionData::GetExtensionTypes (
550+ *result->client_properties .client_context , result->types ));
551+ }
552+ if (count == 0 ) {
553+ break ;
554+ }
555+ TransformDuckToArrowChunk (pyarrow_schema, data, batches);
556+ }
557+ return pyarrow::ToArrowTable (std::move (batches), pyarrow_schema);
558+ },
559+ to_polars);
564560}
565561
566562ArrowArrayStream DuckDBPyResult::FetchArrowArrayStream (idx_t rows_per_batch) {
567563 if (!result) {
568564 throw InvalidInputException (" There is no query result" );
569565 }
570- // Re-feed a materialized result to get a context-owning stream; a StreamQueryResult is
571- // wrapped directly (already has a live context).
572- if (result->type == QueryResultType::MATERIALIZED_RESULT ) {
573- PromoteMaterializedToStream ();
566+ if (result->type != QueryResultType::STREAM_RESULT ) {
567+ throw InternalException (" FetchArrowArrayStream called with unsupported query result: %d" , result->type );
574568 }
575569 // The wrapper is owned by the ArrowArrayStream's private_data (released with the stream).
576- ResultArrowArrayStreamWrapper * result_stream = new ResultArrowArrayStreamWrapper (std::move (result), rows_per_batch);
570+ const auto result_stream = new ResultArrowArrayStreamWrapper (std::move (result), rows_per_batch);
577571 return result_stream->stream ;
578572}
579573
580574duckdb::pyarrow::RecordBatchReader DuckDBPyResult::FetchRecordBatchReader (idx_t rows_per_batch) {
581575 if (!result) {
582576 throw InvalidInputException (" There is no query result" );
583577 }
578+
579+ if (result->type == QueryResultType::MATERIALIZED_RESULT || result->type == QueryResultType::ARROW_RESULT ) {
580+ constexpr bool dedup_column_names = false ;
581+ return RunWithArrowSchema<duckdb::pyarrow::RecordBatchReader>(
582+ [&](const ArrowSchema &schema) -> duckdb::pyarrow::RecordBatchReader {
583+ const auto table = MaterializedResultToArrowTable (schema, rows_per_batch);
584+ return py::cast<duckdb::pyarrow::RecordBatchReader>(
585+ table.attr (" to_reader" )(py::arg (" max_chunksize" ) = rows_per_batch));
586+ },
587+ dedup_column_names);
588+ }
589+ if (result->type != QueryResultType::STREAM_RESULT ) {
590+ throw InternalException (" FetchRecordBatchReader called with unsupported query result: %d" , result->type );
591+ }
584592 py::gil_scoped_acquire acquire;
585593 auto pyarrow_lib_module = py::module::import (" pyarrow" ).attr (" lib" );
586594 auto record_batch_reader_func = pyarrow_lib_module.attr (" RecordBatchReader" ).attr (" _import_from_c" );
@@ -601,11 +609,23 @@ static void ArrowArrayStreamPyCapsuleDestructor(PyObject *object) {
601609 delete stream;
602610}
603611
604- py::object DuckDBPyResult::FetchArrowCapsule (idx_t rows_per_batch) {
612+ py::object DuckDBPyResult::FetchArrowCapsule (const idx_t rows_per_batch) {
605613 if (!result) {
606614 throw InvalidInputException (" There is no query result" );
607615 }
608- // Lazy streaming capsule backed by a context-owning stream (see FetchArrowArrayStream).
616+
617+ constexpr bool dedup_column_names = false ;
618+ if (result->type == QueryResultType::MATERIALIZED_RESULT || result->type == QueryResultType::ARROW_RESULT ) {
619+ return RunWithArrowSchema<py::object>(
620+ [&](const ArrowSchema &schema) -> py::object {
621+ const auto table = MaterializedResultToArrowTable (schema, rows_per_batch);
622+ return table.attr (" __arrow_c_stream__" )();
623+ },
624+ dedup_column_names);
625+ }
626+ if (result->type != QueryResultType::STREAM_RESULT ) {
627+ throw InternalException (" FetchArrowCapsule called with unsupported query result: %d" , result->type );
628+ }
609629 auto inner_stream = FetchArrowArrayStream (rows_per_batch);
610630 auto stream = new ArrowArrayStream ();
611631 *stream = inner_stream;
0 commit comments