@@ -90,34 +90,23 @@ def create_or_update_table(
9090
9191
9292def get_dbt_model_as_dataframe (database_name : str , table_name : str ) -> pl .LazyFrame :
93- """Retrieve a dbt model from AWS Glue as a Polars LazyFrame .
93+ """Retrieve a dbt model from AWS Glue as a Polars DataFrame .
9494
9595 This function fetches table metadata from AWS Glue and loads the Iceberg
96- table data into a Polars LazyFrame.
97-
98- ``PyArrowFileIO`` is used so that PyIceberg reads S3 data via PyArrow's
99- native C++ S3 client instead of the default ``FsspecFileIO`` (which relies
100- on aiobotocore / aiohttp). After the aiobotocore 3.4.0 → 3.5.0 bump
101- deployed around 2026-04-27, botocore's lazy loader cache was populated
102- inside aiobotocore's async event loop thread, blocking all pending S3
103- coroutines and causing Dagster runs to hang indefinitely.
104- ``PyArrowFileIO`` bypasses aiobotocore entirely and is not affected.
96+ table data into a Polars DataFrame.
10597
10698 Args:
10799 database_name: The Glue database name containing the table
108100 table_name: The name of the table to retrieve
109101
110102 Returns:
111- A Polars LazyFrame containing the table data
103+ A Polars DataFrame containing the table data
112104
113105 Raises:
114- Exception : If loading the Iceberg table from Glue or converting it to
115- a Polars LazyFrame fails.
106+ KeyError : If the table metadata doesn't contain the expected fields
107+ boto3 exceptions: If the AWS Glue API call fails
116108 """
117- glue = GlueCatalog (
118- "default" ,
119- client = boto3 .client ("glue" , region_name = "us-east-1" ),
120- ** {"py-io-impl" : "pyiceberg.io.pyarrow.PyArrowFileIO" },
121- )
109+ glue = GlueCatalog ("default" , client = boto3 .client ("glue" , region_name = "us-east-1" ))
122110 table = glue .load_table (f"{ database_name } .{ table_name } " )
111+
123112 return table .to_polars ()
0 commit comments