Skip to content

Commit c42c9e7

Browse files
authored
Revert " fix: use PyArrowFileIO for S3 access in get_dbt_model_as_dataframe (#2218)" (#2222)
This reverts commit 4a31285.
1 parent 7be6cff commit c42c9e7

1 file changed

Lines changed: 7 additions & 18 deletions

File tree

packages/ol-orchestrate-lib/src/ol_orchestrate/lib/glue_helper.py

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -90,34 +90,23 @@ def create_or_update_table(
9090

9191

9292
def get_dbt_model_as_dataframe(database_name: str, table_name: str) -> pl.LazyFrame:
93-
"""Retrieve a dbt model from AWS Glue as a Polars LazyFrame.
93+
"""Retrieve a dbt model from AWS Glue as a Polars DataFrame.
9494
9595
This function fetches table metadata from AWS Glue and loads the Iceberg
96-
table data into a Polars LazyFrame.
97-
98-
``PyArrowFileIO`` is used so that PyIceberg reads S3 data via PyArrow's
99-
native C++ S3 client instead of the default ``FsspecFileIO`` (which relies
100-
on aiobotocore / aiohttp). After the aiobotocore 3.4.0 → 3.5.0 bump
101-
deployed around 2026-04-27, botocore's lazy loader cache was populated
102-
inside aiobotocore's async event loop thread, blocking all pending S3
103-
coroutines and causing Dagster runs to hang indefinitely.
104-
``PyArrowFileIO`` bypasses aiobotocore entirely and is not affected.
96+
table data into a Polars DataFrame.
10597
10698
Args:
10799
database_name: The Glue database name containing the table
108100
table_name: The name of the table to retrieve
109101
110102
Returns:
111-
A Polars LazyFrame containing the table data
103+
A Polars DataFrame containing the table data
112104
113105
Raises:
114-
Exception: If loading the Iceberg table from Glue or converting it to
115-
a Polars LazyFrame fails.
106+
KeyError: If the table metadata doesn't contain the expected fields
107+
boto3 exceptions: If the AWS Glue API call fails
116108
"""
117-
glue = GlueCatalog(
118-
"default",
119-
client=boto3.client("glue", region_name="us-east-1"),
120-
**{"py-io-impl": "pyiceberg.io.pyarrow.PyArrowFileIO"},
121-
)
109+
glue = GlueCatalog("default", client=boto3.client("glue", region_name="us-east-1"))
122110
table = glue.load_table(f"{database_name}.{table_name}")
111+
123112
return table.to_polars()

0 commit comments

Comments
 (0)