|
24 | 24 | from typing import Literal |
25 | 25 |
|
26 | 26 | import pandas as pd |
27 | | -from anyio import to_thread |
28 | 27 | from pysus.api.client import PySUS |
29 | | -from pysus.api.ducklake.catalog import CatalogDataset, CatalogFile, DatasetGroup |
30 | 28 | from pysus.api.types import State |
31 | | -from sqlalchemy.orm import joinedload |
32 | 29 | from tqdm import tqdm |
33 | 30 |
|
34 | 31 |
|
@@ -302,66 +299,44 @@ def list_files( |
302 | 299 | "CNES", |
303 | 300 | "CIHA", |
304 | 301 | ], |
| 302 | + client: Literal["FTP", "DadosGov"] | None = None, |
305 | 303 | group: str | None = None, |
306 | 304 | state: str | None = None, |
307 | 305 | year: int | list[int] | None = None, |
308 | 306 | month: int | list[int] | None = None, |
309 | 307 | **kwargs, |
310 | 308 | ) -> pd.DataFrame: |
311 | | - """List catalog files for a dataset, filtered by group/state/year/month.""" |
| 309 | + """List catalog files filtered by client, group, state, year, and month.""" |
312 | 310 |
|
313 | 311 | async def _list(): |
314 | 312 | async with PySUS() as pysus: |
315 | | - ducklake = await pysus.get_ducklake() |
316 | | - if ducklake._Session is None: |
317 | | - await ducklake.connect() |
318 | | - |
319 | | - def _query(): |
320 | | - with ducklake._Session() as session: |
321 | | - q = session.query(CatalogFile).options( |
322 | | - joinedload(CatalogFile.dataset), |
323 | | - joinedload(CatalogFile.group), |
324 | | - ) |
325 | | - |
326 | | - if dataset: |
327 | | - q = q.join(CatalogDataset).filter( |
328 | | - CatalogDataset.name == dataset.lower() |
329 | | - ) |
| 313 | + years = [year] if isinstance(year, int) else (year or [None]) |
| 314 | + months = [month] if isinstance(month, int) else (month or [None]) |
330 | 315 |
|
331 | | - if group: |
332 | | - q = q.join(DatasetGroup).filter( |
333 | | - DatasetGroup.name == group |
| 316 | + records = [] |
| 317 | + for y in years: |
| 318 | + for m in months: |
| 319 | + records.extend( |
| 320 | + await pysus.query( |
| 321 | + client=client, |
| 322 | + dataset=dataset, |
| 323 | + group=group, |
| 324 | + state=state, |
| 325 | + year=y, |
| 326 | + month=m, |
334 | 327 | ) |
335 | | - |
336 | | - if state: |
337 | | - q = q.filter(CatalogFile.state == state.upper()) |
338 | | - |
339 | | - years = [year] if isinstance(year, int) else (year or []) |
340 | | - months = ( |
341 | | - [month] if isinstance(month, int) else (month or []) |
342 | 328 | ) |
343 | 329 |
|
344 | | - if years: |
345 | | - q = q.filter(CatalogFile.year.in_(years)) |
346 | | - if months: |
347 | | - q = q.filter(CatalogFile.month.in_(months)) |
348 | | - |
349 | | - results = q.all() |
350 | | - session.expunge_all() |
351 | | - return results |
352 | | - |
353 | | - records = await to_thread.run_sync(_query) |
354 | | - |
355 | 330 | return [ |
356 | 331 | { |
357 | | - "name": r.path.split("/")[-1], |
358 | | - "path": r.path, |
| 332 | + "name": str(r.path).split("/")[-1], |
| 333 | + "path": str(r.path), |
359 | 334 | "dataset": r.dataset.name if r.dataset else None, |
360 | 335 | "group": r.group.name if r.group else None, |
361 | | - "year": r.year, |
362 | | - "month": r.month, |
363 | | - "state": r.state, |
364 | | - "modify": r.origin_modified, |
| 336 | + "year": r.record.year, |
| 337 | + "month": r.record.month, |
| 338 | + "state": r.record.state, |
| 339 | + "modify": r.record.origin_modified, |
365 | 340 | } |
366 | 341 | for r in records |
367 | 342 | ] |
|
0 commit comments