@@ -90,23 +90,34 @@ def create_or_update_table(
9090
9191
9292def get_dbt_model_as_dataframe (database_name : str , table_name : str ) -> pl .LazyFrame :
93- """Retrieve a dbt model from AWS Glue as a Polars DataFrame .
93+ """Retrieve a dbt model from AWS Glue as a Polars LazyFrame .
9494
9595 This function fetches table metadata from AWS Glue and loads the Iceberg
96- table data into a Polars DataFrame.
96+ table data into a Polars LazyFrame.
97+
98+ ``PyArrowFileIO`` is used so that PyIceberg reads S3 data via PyArrow's
99+ native C++ S3 client instead of the default ``FsspecFileIO`` (which relies
100+ on aiobotocore / aiohttp). After the aiobotocore 3.4.0 → 3.5.0 bump
101+ deployed around 2026-04-27, botocore's lazy loader cache was populated
102+ inside aiobotocore's async event loop thread, blocking all pending S3
103+ coroutines and causing Dagster runs to hang indefinitely.
104+ ``PyArrowFileIO`` bypasses aiobotocore entirely and is not affected.
97105
98106 Args:
99107 database_name: The Glue database name containing the table
100108 table_name: The name of the table to retrieve
101109
102110 Returns:
103- A Polars DataFrame containing the table data
111+ A Polars LazyFrame containing the table data
104112
105113 Raises:
106- KeyError : If the table metadata doesn't contain the expected fields
107- boto3 exceptions: If the AWS Glue API call fails
114+ Exception : If loading the Iceberg table from Glue or converting it to
115+ a Polars LazyFrame fails.
108116 """
109- glue = GlueCatalog ("default" , client = boto3 .client ("glue" , region_name = "us-east-1" ))
117+ glue = GlueCatalog (
118+ "default" ,
119+ client = boto3 .client ("glue" , region_name = "us-east-1" ),
120+ ** {"py-io-impl" : "pyiceberg.io.pyarrow.PyArrowFileIO" },
121+ )
110122 table = glue .load_table (f"{ database_name } .{ table_name } " )
111-
112123 return table .to_polars ()
0 commit comments