Skip to content

Commit 202917c

Browse files
authored
fix(sql): avoid pyarrow requirement for polars output in DuckDB engine (#9643)
Use `pl.DataFrame(relation)` (Arrow PyCapsule interface) instead of `relation.pl()`, which imports pyarrow. Fixes #9080
1 parent 5700859 commit 202917c

2 files changed

Lines changed: 66 additions & 3 deletions

File tree

marimo/_sql/engines/duckdb.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from collections.abc import Iterator
2323

2424
import duckdb
25+
import polars as pl
2526

2627
# Internal engine names
2728
INTERNAL_DUCKDB_ENGINE = cast(VariableName, "__marimo_duckdb")
@@ -83,9 +84,16 @@ def execute(self, query: str) -> Any:
8384

8485
sql_output_format = self.sql_output_format()
8586

87+
def to_polars() -> pl.DataFrame:
88+
import polars as pl
89+
90+
# Use the Arrow PyCapsule interface (pl.DataFrame(relation))
91+
# instead of relation.pl() so that pyarrow is not required.
92+
return pl.DataFrame(relation)
93+
8694
return convert_to_output(
8795
sql_output_format=sql_output_format,
88-
to_polars=lambda: relation.pl(),
96+
to_polars=to_polars,
8997
to_pandas=lambda: relation.df(),
9098
to_native=lambda: relation,
9199
)

tests/_sql/test_duckdb.py

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22

33
from __future__ import annotations
44

5+
import sys
56
from copy import deepcopy
67
from typing import TYPE_CHECKING
8+
from unittest import mock
79

810
import pytest
911

@@ -228,8 +230,6 @@ def test_duckdb_engine_sql_output_formats(
228230
duckdb_connection: duckdb.DuckDBPyConnection,
229231
) -> None:
230232
"""Test DuckDBEngine execute with different SQL output formats."""
231-
from unittest import mock
232-
233233
import pandas as pd
234234
import polars as pl
235235

@@ -290,3 +290,58 @@ def test_duckdb_engine_sql_output_formats(
290290
result = engine.execute("SELECT * FROM test ORDER BY id")
291291
assert isinstance(result, (pd.DataFrame, pl.DataFrame))
292292
assert len(result) == 4
293+
294+
295+
@pytest.mark.skipif(
296+
not HAS_DUCKDB or not HAS_POLARS,
297+
reason="DuckDB and Polars not installed",
298+
)
299+
@pytest.mark.parametrize(
300+
("sql_output_format", "expected_type_name"),
301+
[
302+
("polars", "DataFrame"),
303+
("lazy-polars", "LazyFrame"),
304+
("auto", "DataFrame"),
305+
],
306+
)
307+
def test_duckdb_engine_polars_no_pyarrow(
308+
duckdb_connection: duckdb.DuckDBPyConnection,
309+
sql_output_format: str,
310+
expected_type_name: str,
311+
) -> None:
312+
"""Polars conversion should not require pyarrow.
313+
314+
Uses the Arrow PyCapsule interface (`pl.DataFrame(relation)`) rather than
315+
`relation.pl()` which historically required pyarrow. Covers every output
316+
format that routes through `to_polars()` (polars, lazy-polars, and auto
317+
when polars is installed).
318+
"""
319+
import polars as pl
320+
321+
# Block `pyarrow` and any already-imported `pyarrow.*` submodules so that
322+
# fresh imports raise ModuleNotFoundError.
323+
blocked_pyarrow = {
324+
name: None
325+
for name in list(sys.modules)
326+
if name == "pyarrow" or name.startswith("pyarrow.")
327+
}
328+
blocked_pyarrow["pyarrow"] = None
329+
330+
with (
331+
mock.patch.dict(sys.modules, blocked_pyarrow),
332+
mock.patch.object(
333+
DuckDBEngine, "sql_output_format", return_value=sql_output_format
334+
),
335+
):
336+
engine = DuckDBEngine(
337+
duckdb_connection,
338+
engine_name=VariableName("test_duckdb"),
339+
)
340+
result = engine.execute("SELECT * FROM test ORDER BY id")
341+
expected_type = getattr(pl, expected_type_name)
342+
assert isinstance(result, expected_type)
343+
# Collect lazy frames so we exercise the full polars conversion path.
344+
materialized = (
345+
result.collect() if expected_type_name == "LazyFrame" else result
346+
)
347+
assert len(materialized) == 4

0 commit comments

Comments
 (0)