Skip to content

Commit c6888d8

Browse files
author
Francisco
committed
Merge branch 'dev'
2 parents 5f7afa9 + d0a6287 commit c6888d8

7 files changed

Lines changed: 504 additions & 166 deletions

File tree

.github/workflows/ci.yml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,15 @@ jobs:
158158
steps:
159159
- uses: actions/checkout@v4
160160

161+
- name: "🧹 Free up disk space on runner"
162+
run: |
163+
sudo rm -rf /usr/share/dotnet
164+
sudo rm -rf /usr/local/lib/android
165+
sudo rm -rf /opt/ghc
166+
sudo rm -rf /opt/hostedtoolcache/CodeQL
167+
sudo docker image prune --all --force
168+
df -h
169+
161170
- name: "⚙️ Setup Docker Buildx"
162171
uses: docker/setup-buildx-action@v3
163172

@@ -228,6 +237,15 @@ jobs:
228237
- name: "🕺 Checkout"
229238
uses: actions/checkout@v4
230239

240+
- name: "🧹 Free up disk space on runner"
241+
run: |
242+
sudo rm -rf /usr/share/dotnet
243+
sudo rm -rf /usr/local/lib/android
244+
sudo rm -rf /opt/ghc
245+
sudo rm -rf /opt/hostedtoolcache/CodeQL
246+
sudo docker image prune --all --force
247+
df -h
248+
231249
- name: "⚙️ Setup QEMU"
232250
uses: docker/setup-qemu-action@v3
233251

@@ -357,6 +375,15 @@ jobs:
357375
persist-credentials: true
358376
token: ${{ secrets.GITHUB_TOKEN }}
359377

378+
- name: "🧹 Free up disk space on runner"
379+
run: |
380+
sudo rm -rf /usr/share/dotnet
381+
sudo rm -rf /usr/local/lib/android
382+
sudo rm -rf /opt/ghc
383+
sudo rm -rf /opt/hostedtoolcache/CodeQL
384+
sudo docker image prune --all --force
385+
df -h
386+
360387
- name: "⚙️ Setup QEMU"
361388
uses: docker/setup-qemu-action@v3
362389

.gitignore

90 Bytes
Binary file not shown.

src/api/entities_api/db/database.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
logging_utility = UtilsInterface.LoggingUtility()
1111

1212
# --- ALL ENGINE AND SESSION LOGIC IS NOW CENTRALIZED HERE ---
13-
1413
DATABASE_URL = os.getenv("DATABASE_URL")
1514
SPECIAL_DB_URL = os.getenv("SPECIAL_DB_URL")
1615

@@ -21,7 +20,24 @@ def running_in_docker() -> bool:
2120

2221

2322
def resolve_special_db_runtime_url(special_raw: str | None) -> str | None:
23+
"""
24+
Resolve the effective SPECIAL_DB_URL for the current runtime.
25+
26+
SPECIAL_DB_URL is intended for host-shell use only (see .env.migrations).
27+
Its value typically points at localhost:3307, which is meaningless inside
28+
containers. If it leaks into container env via .env / env_file, we ignore
29+
it and fall back to DATABASE_URL — and log a warning so the leak is
30+
visible to the operator.
31+
"""
2432
if running_in_docker():
33+
if special_raw:
34+
logging_utility.warning(
35+
"SPECIAL_DB_URL is set inside a container — ignoring and "
36+
"falling back to DATABASE_URL. SPECIAL_DB_URL is host-shell-only; "
37+
"it should live in .env.migrations, not .env. "
38+
"See: https://github.com/project-david-ai/projectdavid-core "
39+
"(container hygiene section)."
40+
)
2541
return DATABASE_URL
2642
return special_raw or None
2743

@@ -71,8 +87,6 @@ def get_db():
7187

7288
# Optional: You can also move the wait logic here to keep all DB startup
7389
# code together, which makes app.py even cleaner.
74-
75-
7690
def _wait_for_engine(engine_to_check, db_name, logger, retries=30, delay=3):
7791
if not engine_to_check:
7892
return

src/api/training/routers/datasets_router.py

Lines changed: 20 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,20 @@
11
# src/api/training/routers/datasets_router.py
22

3-
import os
43
from typing import Optional
54

6-
from fastapi import APIRouter, Depends, HTTPException
5+
from fastapi import APIRouter, Depends, HTTPException, Query
76
from projectdavid_common import UtilsInterface, ValidationInterface
87
from projectdavid_common.schemas.enums import StatusEnum
98
from sqlalchemy.orm import Session
109

1110
from src.api.training.db.database import get_db
1211
from src.api.training.dependencies import get_current_user_id
13-
from src.api.training.services.dataset_service import (
14-
create_dataset,
15-
delete_dataset,
16-
get_dataset,
17-
list_datasets,
18-
prepare_dataset,
19-
)
12+
from src.api.training.services.dataset_service import DatasetService
2013

2114
logging_utility = UtilsInterface.LoggingUtility()
2215

2316
router = APIRouter()
2417

25-
# Note: API_BASE_URL and WORKER_API_KEY removed as they are no longer
26-
# needed by the router for internal file fetching.
2718

2819
# ---------------------------------------------------------------------------
2920
# POST /v1/datasets
@@ -48,8 +39,7 @@ def create_dataset_endpoint(
4839
payload.format,
4940
payload.file_id,
5041
)
51-
dataset = create_dataset(
52-
db=db,
42+
dataset = DatasetService(db).create(
5343
user_id=user_id,
5444
name=payload.name,
5545
fmt=payload.format,
@@ -86,8 +76,8 @@ def list_datasets_endpoint(
8676
status_code=422, detail=f"Invalid status value '{status}'."
8777
)
8878

89-
datasets = list_datasets(
90-
db=db, user_id=user_id, status=status_filter, limit=limit, offset=offset
79+
datasets = DatasetService(db).list(
80+
user_id=user_id, status=status_filter, limit=limit, offset=offset
9181
)
9282
return ValidationInterface.DatasetList(
9383
data=[ValidationInterface.DatasetRead.model_validate(d) for d in datasets],
@@ -110,7 +100,7 @@ def get_dataset_endpoint(
110100
user_id: str = Depends(get_current_user_id),
111101
db: Session = Depends(get_db),
112102
):
113-
dataset = get_dataset(db=db, dataset_id=dataset_id, user_id=user_id)
103+
dataset = DatasetService(db).get(dataset_id=dataset_id, user_id=user_id)
114104
return ValidationInterface.DatasetRead.model_validate(dataset)
115105

116106

@@ -128,15 +118,7 @@ async def prepare_dataset_endpoint(
128118
user_id: str = Depends(get_current_user_id),
129119
db: Session = Depends(get_db),
130120
):
131-
"""
132-
Trigger background preparation.
133-
Service logic now uses direct Samba/Shared DB access.
134-
"""
135-
return prepare_dataset(
136-
db=db,
137-
dataset_id=dataset_id,
138-
user_id=user_id,
139-
)
121+
return DatasetService(db).prepare(dataset_id=dataset_id, user_id=user_id)
140122

141123

142124
# ---------------------------------------------------------------------------
@@ -147,12 +129,23 @@ async def prepare_dataset_endpoint(
147129
@router.delete(
148130
"/{dataset_id}",
149131
response_model=ValidationInterface.DatasetDeleted,
150-
summary="Soft delete a dataset",
132+
summary="Delete a dataset (soft by default; ?hard=true cascades to storage)",
151133
)
152134
def delete_dataset_endpoint(
153135
dataset_id: str,
136+
hard: bool = Query(
137+
False,
138+
description=(
139+
"If true, permanently remove the dataset, its File/FileStorage DB rows, "
140+
"and the physical .jsonl on Samba. Default is soft-delete (reversible)."
141+
),
142+
),
154143
user_id: str = Depends(get_current_user_id),
155144
db: Session = Depends(get_db),
156145
):
157-
result = delete_dataset(db=db, dataset_id=dataset_id, user_id=user_id)
146+
result = DatasetService(db).delete(
147+
dataset_id=dataset_id,
148+
user_id=user_id,
149+
hard=hard,
150+
)
158151
return ValidationInterface.DatasetDeleted(**result)

0 commit comments

Comments
 (0)