Skip to content

Commit 5560b38

Browse files
author
Michael Aydinbas
committed
Fix BigQuery loading: delete-before-recreate and update logs clustering
WRITE_TRUNCATE does not change existing clustering, causing a 400 error when the schema changes (e.g. R to Python migration). Delete the table first when replace=True so clustering and schema are always recreated cleanly. Update logs clustering to match Python fields: level, error_code, file_name, function. Add clinic_data_static to the auto-load mapping.
1 parent 38efab6 commit 5560b38

1 file changed

Lines changed: 12 additions & 1 deletion

File tree

a4d-python/src/a4d/gcp/bigquery.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from pathlib import Path
99

1010
from google.cloud import bigquery
11+
from google.api_core.exceptions import NotFound
1112
from loguru import logger
1213

1314
from a4d.config import settings
@@ -25,7 +26,7 @@
2526
"product_table_month",
2627
],
2728
"clinic_data_static": ["clinic_id"],
28-
"logs": ["level", "log_file", "file_name"],
29+
"logs": ["level", "error_code", "file_name", "function"],
2930
"tracker_metadata": ["file_name", "clinic_code"],
3031
}
3132

@@ -35,6 +36,7 @@
3536
"patient_data_static.parquet": "patient_data_static",
3637
"patient_data_monthly.parquet": "patient_data_monthly",
3738
"patient_data_annual.parquet": "patient_data_annual",
39+
"clinic_data_static.parquet": "clinic_data_static",
3840
"table_logs.parquet": "logs",
3941
}
4042

@@ -98,6 +100,15 @@ def load_table(
98100
table_ref = f"{project_id}.{dataset}.{table_name}"
99101
logger.info(f"Loading {parquet_path.name}{table_ref}")
100102

103+
# WRITE_TRUNCATE preserves existing clustering, so deleting first ensures
104+
# any schema or clustering changes (e.g. from R→Python migration) take effect.
105+
if replace:
106+
try:
107+
client.delete_table(table_ref)
108+
logger.info(f"Deleted existing table {table_ref} for fresh creation")
109+
except NotFound:
110+
pass
111+
101112
# Configure the load job
102113
job_config = bigquery.LoadJobConfig(
103114
source_format=bigquery.SourceFormat.PARQUET,

0 commit comments

Comments
 (0)