1+ import ast
12import importlib .util
23import io
34import logging
1112from datetime import datetime
1213
1314import deltalake
15+ import numpy as np
1416import pyarrow as pa
1517import pandas as pd
1618import polars as pl
2123from pyspark .sql .functions import monotonically_increasing_id
2224from pyspark .sql .utils import AnalysisException
2325from pyspark .sql .types import (
26+ ArrayType ,
2427 IntegerType ,
2528 StructField ,
2629 StringType ,
@@ -371,8 +374,22 @@ def _recreate_blank_file(self):
371374 self ._s3_client .upload_file (blank_csv , self .BUCKET_NAME , self .RELATIVE_CSV_PATH )
372375
373376 def to_pandas_df (self , ** kwargs : Any ) -> pd .DataFrame | None :
377+ converters = {}
378+
379+ # Convert arrays from csv format to lists
380+ for col in self .STRUCTURE :
381+ if isinstance (col .dataType , ArrayType ):
382+ converters [col .name ] = ast .literal_eval
383+
374384 # Type Checker struggles with BytesIO and S3 Objects
375- return pd .read_csv (io .BytesIO (self ._s3_object ), ** kwargs ) if self .exists () else None # type: ignore
385+ df = pd .read_csv (io .BytesIO (self ._s3_object ), converters = converters , ** kwargs ) if self .exists () else None # type: ignore
386+
387+ # Convert lists in df to np.arrays
388+ for col in self .STRUCTURE :
389+ if isinstance (col .dataType , ArrayType ):
390+ df [col .name ] = df [col .name ].apply (np .array )
391+
392+ return df
376393
377394 def to_polars_df (self , ** kwargs : Any ) -> pl .DataFrame | pl .Series | None :
378395 return pl .read_csv (self .CSV_PATH , ** kwargs ) if self .exists () else None
@@ -394,7 +411,7 @@ def save(self, df: pd.DataFrame | pl.DataFrame) -> None:
394411
395412class LakeHouseCurrentMigration (CSVModel ):
396413 BUCKET_NAME = CONFIG .REFERENCE_S3_BUCKET
397- DATABASE_NAME = LakeHouseDatabase .BRONZE
414+ DATABASE_NAME = LakeHouseDatabase .GOLD
398415 TABLE_NAME = "migrations"
399416 DESCRIPTION = "Keeps track of migrations for all Lakehouse Models"
400417 CSV_NAME = "current_migrations.csv"
@@ -415,7 +432,7 @@ class LakeHouseCurrentMigration(CSVModel):
415432
416433class ExternalDataLoadDate (CSVModel ):
417434 BUCKET_NAME = CONFIG .REFERENCE_S3_BUCKET
418- DATABASE_NAME = LakeHouseDatabase .BRONZE
435+ DATABASE_NAME = LakeHouseDatabase .GOLD
419436 TABLE_NAME = "external_data_load_date"
420437 DESCRIPTION = "Keeps track of load dates of certain external data Lakehouse models"
421438 CSV_NAME = "external_load_date.csv"
0 commit comments