Skip to content

Commit 00547e0

Browse files
committed
feat: implemented caching mechanism
1 parent c746eef commit 00547e0

1 file changed

Lines changed: 12 additions & 6 deletions

File tree

src/webapp/databricks.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,14 @@ def check_types(dict_values: list[list[SchemaType]], file_type: SchemaType) -> b
7676
return True
7777
return False
7878

79+
7980
def _sha256_json(obj: Any) -> str:
80-
return hashlib.sha256(
81-
json.dumps(obj, ensure_ascii=False, separators=(",", ":"), sort_keys=True).encode("utf-8")
82-
).hexdigest()
81+
return hashlib.sha256(
82+
json.dumps(
83+
obj, ensure_ascii=False, separators=(",", ":"), sort_keys=True
84+
).encode("utf-8")
85+
).hexdigest()
86+
8387

8488
# Wrapping the usages in a class makes it easier to unit test via mocks.
8589
class DatabricksControl(BaseModel):
@@ -334,7 +338,7 @@ def fetch_table_data(
334338
schema = databricksify_inst_name(inst_name)
335339
table_fqn = f"`{catalog_name}`.`{schema}_silver`.`{table_name}`"
336340
sql = f"SELECT * FROM {table_fqn}"
337-
341+
338342
try:
339343
ver_sql = f"DESCRIBE HISTORY {table_fqn} LIMIT 1"
340344
ver_resp = w.statement_execution.execute_statement(
@@ -357,7 +361,7 @@ def fetch_table_data(
357361

358362
sql_h = _sha256_json({"sql": sql})
359363
object_name = f"{warehouse_id}/{catalog_name}.{schema}.{table_name}/{sql_h}/{table_version}.json.gz"
360-
364+
361365
storage_client = storage.Client()
362366
bucket = storage_client.bucket(bucket_name)
363367
blob = bucket.blob(object_name)
@@ -448,7 +452,9 @@ def _consume_chunk(chunk_obj: Any) -> int | None:
448452

449453
if bucket_name and object_name and records:
450454
try:
451-
raw = json.dumps(records, ensure_ascii=False, separators=(",", ":")).encode("utf-8")
455+
raw = json.dumps(
456+
records, ensure_ascii=False, separators=(",", ":")
457+
).encode("utf-8")
452458
gz = gzip.compress(raw, compresslevel=6)
453459
storage_client = storage.Client()
454460
bucket = storage_client.bucket(bucket_name)

0 commit comments

Comments
 (0)