Skip to content

Commit b939110

Browse files
author
Francisco
committed
feat(training): DiskPreflight + DatasetService class with hard-delete + race guards
Group B shipped: - DiskPreflight class rejects training jobs with HTTP 507 when SHARED_PATH or SCRATCH_PATH are below configured free-GB thresholds. Env-configurable via MIN_SAMBA_FREE_GB (default 5) and MIN_SCRATCH_FREE_GB (default 10). - DatasetService class replaces the module-level dataset functions. Delete endpoint now accepts ?hard=true for DB cascade (Dataset row removed, backing File row marked for purge daemon pickup). Both soft and hard delete share race guards: 409 if dataset is mid-preparation, 409 if referenced by any non-terminal training job. - TrainingService and datasets_router updated to use DatasetService directly; no shims.
1 parent 38bb9a4 commit b939110

3 files changed

Lines changed: 274 additions & 163 deletions

File tree

src/api/training/routers/datasets_router.py

Lines changed: 20 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,20 @@
11
# src/api/training/routers/datasets_router.py
22

3-
import os
43
from typing import Optional
54

6-
from fastapi import APIRouter, Depends, HTTPException
5+
from fastapi import APIRouter, Depends, HTTPException, Query
76
from projectdavid_common import UtilsInterface, ValidationInterface
87
from projectdavid_common.schemas.enums import StatusEnum
98
from sqlalchemy.orm import Session
109

1110
from src.api.training.db.database import get_db
1211
from src.api.training.dependencies import get_current_user_id
13-
from src.api.training.services.dataset_service import (
14-
create_dataset,
15-
delete_dataset,
16-
get_dataset,
17-
list_datasets,
18-
prepare_dataset,
19-
)
12+
from src.api.training.services.dataset_service import DatasetService
2013

2114
logging_utility = UtilsInterface.LoggingUtility()
2215

2316
router = APIRouter()
2417

25-
# Note: API_BASE_URL and WORKER_API_KEY removed as they are no longer
26-
# needed by the router for internal file fetching.
2718

2819
# ---------------------------------------------------------------------------
2920
# POST /v1/datasets
@@ -48,8 +39,7 @@ def create_dataset_endpoint(
4839
payload.format,
4940
payload.file_id,
5041
)
51-
dataset = create_dataset(
52-
db=db,
42+
dataset = DatasetService(db).create(
5343
user_id=user_id,
5444
name=payload.name,
5545
fmt=payload.format,
@@ -86,8 +76,8 @@ def list_datasets_endpoint(
8676
status_code=422, detail=f"Invalid status value '{status}'."
8777
)
8878

89-
datasets = list_datasets(
90-
db=db, user_id=user_id, status=status_filter, limit=limit, offset=offset
79+
datasets = DatasetService(db).list(
80+
user_id=user_id, status=status_filter, limit=limit, offset=offset
9181
)
9282
return ValidationInterface.DatasetList(
9383
data=[ValidationInterface.DatasetRead.model_validate(d) for d in datasets],
@@ -110,7 +100,7 @@ def get_dataset_endpoint(
110100
user_id: str = Depends(get_current_user_id),
111101
db: Session = Depends(get_db),
112102
):
113-
dataset = get_dataset(db=db, dataset_id=dataset_id, user_id=user_id)
103+
dataset = DatasetService(db).get(dataset_id=dataset_id, user_id=user_id)
114104
return ValidationInterface.DatasetRead.model_validate(dataset)
115105

116106

@@ -128,15 +118,7 @@ async def prepare_dataset_endpoint(
128118
user_id: str = Depends(get_current_user_id),
129119
db: Session = Depends(get_db),
130120
):
131-
"""
132-
Trigger background preparation.
133-
Service logic now uses direct Samba/Shared DB access.
134-
"""
135-
return prepare_dataset(
136-
db=db,
137-
dataset_id=dataset_id,
138-
user_id=user_id,
139-
)
121+
return DatasetService(db).prepare(dataset_id=dataset_id, user_id=user_id)
140122

141123

142124
# ---------------------------------------------------------------------------
@@ -147,12 +129,23 @@ async def prepare_dataset_endpoint(
147129
@router.delete(
148130
"/{dataset_id}",
149131
response_model=ValidationInterface.DatasetDeleted,
150-
summary="Soft delete a dataset",
132+
summary="Delete a dataset (soft by default; ?hard=true cascades to storage)",
151133
)
152134
def delete_dataset_endpoint(
153135
dataset_id: str,
136+
hard: bool = Query(
137+
False,
138+
description=(
139+
"If true, permanently remove the dataset, its File/FileStorage DB rows, "
140+
"and the physical .jsonl on Samba. Default is soft-delete (reversible)."
141+
),
142+
),
154143
user_id: str = Depends(get_current_user_id),
155144
db: Session = Depends(get_db),
156145
):
157-
result = delete_dataset(db=db, dataset_id=dataset_id, user_id=user_id)
146+
result = DatasetService(db).delete(
147+
dataset_id=dataset_id,
148+
user_id=user_id,
149+
hard=hard,
150+
)
158151
return ValidationInterface.DatasetDeleted(**result)

0 commit comments

Comments
 (0)