Skip to content

Commit 2c3ce4f

Browse files
authored
Merge pull request #143 from datakind/DeleteBatch
added delete batch endpoint
2 parents a027953 + 85341ec commit 2c3ce4f

2 files changed

Lines changed: 157 additions & 0 deletions

File tree

src/webapp/gcsutil.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,55 @@ def delete_file(self, bucket_name: str, file_name: str) -> None:
267267
raise ValueError(file_name + ": File not found.")
268268
blob.delete()
269269

270+
def delete_batch_files(
271+
self,
272+
bucket_name: str,
273+
batch_files: list[str],
274+
) -> Any:
275+
prefix = "validated/"
276+
277+
now_iso = datetime.datetime.now()
278+
deleted: List[Dict[str, str]] = []
279+
not_found: List[str] = []
280+
errors: List[Dict[str, str]] = []
281+
282+
for fname in batch_files:
283+
if not isinstance(fname, str) or not fname.strip():
284+
errors.append(
285+
{
286+
"file": str(fname),
287+
"path": f"{prefix}{fname}",
288+
"error": "invalid filename",
289+
}
290+
)
291+
continue
292+
293+
blob_path = f"{prefix}{fname}"
294+
try:
295+
logger.info("Attempting to delete gs://%s/%s", bucket_name, blob_path)
296+
# One-liner delete; raises NotFound if missing
297+
self.delete_file(bucket_name=bucket_name, file_name=blob_path)
298+
logger.info("Delete successful: gs://%s/%s", bucket_name, blob_path)
299+
deleted.append(
300+
{"file": fname, "path": blob_path, "deleted_at": str(now_iso)}
301+
)
302+
except ValueError:
303+
logger.warning(
304+
"Blob or bucket not found: gs://%s/%s", bucket_name, blob_path
305+
)
306+
not_found.append(fname)
307+
except Exception as e: # network/other unexpected errors
308+
logger.exception(
309+
"Unexpected error deleting gs://%s/%s", bucket_name, blob_path
310+
)
311+
errors.append({"file": fname, "path": blob_path, "error": str(e)})
312+
313+
return {
314+
"deleted": deleted,
315+
"not_found": not_found,
316+
"errors": errors,
317+
}
318+
270319
def validate_file(
271320
self,
272321
bucket_name: str,

src/webapp/routers/data.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,114 @@ def update_batch(
684684
}
685685

686686

687+
@router.patch("/{inst_id}/delete-batch/{batch_id}", response_model=BatchInfo)
688+
def delete_batch(
689+
inst_id: str,
690+
batch_id: str,
691+
current_user: Annotated[BaseUser, Depends(get_current_active_user)],
692+
sql_session: Annotated[Session, Depends(get_session)],
693+
storage_control: Annotated[StorageControl, Depends(StorageControl)],
694+
) -> Any:
695+
has_access_to_inst_or_err(inst_id, current_user)
696+
model_owner_and_higher_or_err(current_user, "modify batch")
697+
698+
local_session.set(sql_session)
699+
sess = local_session.get()
700+
701+
batch = sess.execute(
702+
select(BatchTable).where(
703+
BatchTable.id == str_to_uuid(batch_id),
704+
BatchTable.inst_id == str_to_uuid(inst_id),
705+
)
706+
).scalar_one_or_none()
707+
if batch is None:
708+
raise HTTPException(
709+
status_code=status.HTTP_404_NOT_FOUND, detail="Batch not found."
710+
)
711+
712+
# 2) Gather filenames to delete
713+
batch_files: list[str] = list(
714+
sess.execute(
715+
select(FileTable.name).where(
716+
FileTable.id == str_to_uuid(batch_id),
717+
FileTable.inst_id == str_to_uuid(inst_id),
718+
)
719+
)
720+
.scalars()
721+
.all()
722+
)
723+
724+
if not batch_files:
725+
sess.delete(batch)
726+
sess.flush()
727+
return {
728+
"inst_id": inst_id,
729+
"batch_id": batch_id,
730+
"deleted": [],
731+
"not_found": [],
732+
"errors": [],
733+
"db_deleted_rows": 0,
734+
"batch_deleted": True,
735+
"message": "No files associated with this batch id.",
736+
}
737+
738+
gcs_result = storage_control.delete_batch_files(
739+
bucket_name=get_external_bucket_name(inst_id), batch_files=batch_files
740+
)
741+
742+
if gcs_result.get("errors"):
743+
raise HTTPException(
744+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
745+
detail=f"Unable to delete files {gcs_result['errors']}.",
746+
)
747+
748+
# 4) Delete DB rows only for blobs that were actually deleted
749+
deleted_names = {d["file"] for d in gcs_result.get("deleted", [])}
750+
not_found_names = set(gcs_result.get("not_found", []))
751+
target_names = {n for n in (deleted_names | not_found_names) if n}
752+
753+
db_deleted_rows = 0
754+
if target_names:
755+
try:
756+
rows = (
757+
sess.execute(
758+
select(FileTable).where(
759+
FileTable.inst_id == str_to_uuid(inst_id),
760+
FileTable.id == str_to_uuid(batch_id),
761+
FileTable.name.in_(target_names),
762+
)
763+
)
764+
.scalars()
765+
.all()
766+
)
767+
for r in rows:
768+
sess.delete(r)
769+
db_deleted_rows = len(rows)
770+
except Exception as e:
771+
raise HTTPException(
772+
status_code=500,
773+
detail=f"Deleted in storage, but DB file-row cleanup failed: {e}",
774+
)
775+
try:
776+
sess.delete(batch)
777+
sess.commit()
778+
except Exception as e:
779+
sess.rollback()
780+
raise HTTPException(
781+
status_code=500, detail=f"DB batch delete failed after file cleanup: {e}"
782+
)
783+
784+
return {
785+
"inst_id": inst_id,
786+
"batch_id": batch_id,
787+
"deleted": gcs_result.get("deleted", []), # [{file, path, deleted_at}, ...]
788+
"not_found": sorted(not_found_names),
789+
"errors": gcs_result.get("errors", []),
790+
"db_deleted_rows": db_deleted_rows,
791+
"batch_deleted": True,
792+
}
793+
794+
687795
@router.get("/{inst_id}/file-id/{file_id}", response_model=DataInfo)
688796
def read_file_id_info(
689797
inst_id: str,

0 commit comments

Comments
 (0)