Skip to content

Commit 414befc

Browse files
committed
tests: include more tests
1 parent 7a08061 commit 414befc

29 files changed

Lines changed: 5567 additions & 235 deletions

pysus/api/client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
from .ftp import FTPClient
2727
from .models import BaseLocalFile, BaseRemoteFile
2828

29-
if TYPE_CHECKING:
29+
if TYPE_CHECKING: # pragma: no cover
3030
from duckdb import DuckDBPyConnection
3131

3232

@@ -403,7 +403,7 @@ async def download_to_parquet(
403403

404404
if hasattr(local_file, "to_parquet"):
405405
original_path = local_file.path
406-
parquet_file = await local_file.to_parquet(callback=callback) # type: ignore
406+
parquet_file = await local_file.to_parquet(callback=callback)
407407
parquet_file.add_dv = add_dv
408408

409409
await self._update_state(

pysus/api/dadosgov/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from pysus.api.models import BaseRemoteClient, BaseRemoteFile
1414
from pysus.api.types import DADOSGOV
1515

16-
if TYPE_CHECKING:
16+
if TYPE_CHECKING: # pragma: no cover
1717
from .models import Dataset
1818

1919

pysus/api/dadosgov/databases.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ def formatter(self, filename: str) -> dict[str, Any]:
272272
}
273273

274274
m = re.search(r"_(\w{3})-out_(\d{4})_\.csv$", name)
275-
if m:
275+
if m: # pragma: no cover
276276
return {
277277
"state": None,
278278
"year": _parse_year(m.group(2)),

pysus/api/ducklake/catalog/orm/dataset.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,12 @@ class Dataset(Base):
7474
description = Column(String, nullable=True)
7575

7676
groups = relationship(
77-
"DatasetGroup",
77+
"Group",
7878
back_populates="dataset",
7979
cascade="all, delete-orphan",
8080
)
8181
files = relationship(
82-
"CatalogFile",
82+
"File",
8383
back_populates="dataset",
8484
cascade="all, delete-orphan",
8585
)
@@ -128,9 +128,9 @@ class ColumnDefinition(Base):
128128
description = Column(String, nullable=True)
129129
nullable = Column(Boolean, nullable=False, default=True)
130130

131-
dataset = relationship("CatalogDataset", back_populates="columns")
131+
dataset = relationship("Dataset", back_populates="columns")
132132
files = relationship(
133-
"CatalogFile",
133+
"File",
134134
secondary=file_columns,
135135
back_populates="columns",
136136
)
@@ -177,11 +177,11 @@ class Group(Base):
177177
description = Column(String, nullable=True)
178178

179179
dataset = relationship(
180-
"CatalogDataset",
180+
"Dataset",
181181
back_populates="groups",
182182
)
183183
files = relationship(
184-
"CatalogFile",
184+
"File",
185185
back_populates="group",
186186
cascade="all, delete-orphan",
187187
)
@@ -281,11 +281,11 @@ class File(Base):
281281
)
282282

283283
dataset: Mapped["Dataset"] = relationship(
284-
"CatalogDataset",
284+
"Dataset",
285285
back_populates="files",
286286
)
287287
group: Mapped[Optional["Group"]] = relationship(
288-
"DatasetGroup",
288+
"Group",
289289
back_populates="files",
290290
)
291291
columns: Mapped[list["ColumnDefinition"]] = relationship(

pysus/api/ducklake/catalog/orm/default.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ class Base(DeclarativeBase):
1515

1616

1717
class Dataset(Base):
18-
"""ORM model for the datasets table — central registry of available datasets.
18+
"""ORM model for the datasets table — central registry of available
19+
datasets.
1920
2021
Parameters
2122
----------

pysus/api/ducklake/client.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,8 @@ def __init__(self, engine=None, **data) -> None:
7777
self._engine = engine
7878
self._cache_dir: Path = Path(CACHEPATH) / "ducklake"
7979
self._cache_dir.mkdir(parents=True, exist_ok=True)
80-
self._catalog_local: Path = self._cache_dir / "catalog.db"
81-
self._catalog_remote: str = "public/catalog.db"
80+
self._catalog_local: Path = self._cache_dir / "catalog.duckdb"
81+
self._catalog_remote: str = "public/catalog.duckdb"
8282

8383
@property
8484
def name(self) -> str:
@@ -197,7 +197,8 @@ def _setup_engine(self, local_path: Path | None = None):
197197
Parameters
198198
----------
199199
local_path : Path, optional
200-
Path to the catalog database file. Defaults to the discovery catalog.
200+
Path to the catalog database file.
201+
Defaults to the discovery catalog.
201202
"""
202203
if local_path is None:
203204
local_path = self._catalog_local
@@ -210,7 +211,8 @@ def _setup_engine(self, local_path: Path | None = None):
210211
conn.exec_driver_sql("INSTALL ducklake; LOAD ducklake;")
211212

212213
has_pysus = conn.exec_driver_sql(
213-
"SELECT 1 FROM information_schema.schemata WHERE schema_name = 'pysus'"
214+
"SELECT 1 FROM information_schema.schemata"
215+
" WHERE schema_name = 'pysus'"
214216
).fetchone()
215217

216218
if has_pysus:
@@ -326,7 +328,9 @@ async def _download(
326328
else:
327329
raise e
328330

329-
async def _download_catalog(self, local_path: Path, remote_path: str) -> None:
331+
async def _download_catalog(
332+
self, local_path: Path, remote_path: str
333+
) -> None:
330334
"""Download a catalog database from remote storage with retries.
331335
332336
Parameters
@@ -380,7 +384,9 @@ def _get_s3_client(self):
380384
"s3",
381385
endpoint_url=f"https://{self.endpoint}",
382386
aws_access_key_id=self.credentials.access_key.get_secret_value(),
383-
aws_secret_access_key=(self.credentials.secret_key.get_secret_value()),
387+
aws_secret_access_key=(
388+
self.credentials.secret_key.get_secret_value()
389+
),
384390
region_name=self.region,
385391
config=Config(signature_version="s3v4"),
386392
)
@@ -400,7 +406,10 @@ async def _upload_catalog(self) -> None:
400406
if not ds._catalog_local.exists():
401407
continue
402408

403-
def _upload(local=str(ds._catalog_local), name=ds._catalog_name):
409+
_local = str(ds._catalog_local)
410+
_name = ds._catalog_name
411+
412+
def _upload(local=_local, name=_name):
404413
self._s3_client.upload_file(
405414
local,
406415
self.bucket,

pysus/api/ducklake/models.py

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,13 @@
1313
from anyio import to_thread
1414
from pydantic import Field, PrivateAttr
1515
from pysus import CACHEPATH
16+
from pysus.api.ducklake.catalog.orm.dataset import Dataset
17+
from pysus.api.ducklake.catalog.orm.dataset import File as CatalogFile
18+
from pysus.api.ducklake.catalog.orm.dataset import Group
1619
from pysus.api.models import BaseRemoteDataset, BaseRemoteFile, BaseRemoteGroup
1720
from sqlalchemy.orm import contains_eager, joinedload, sessionmaker
1821

19-
from pysus.api.ducklake.catalog.orm.dataset import (
20-
Dataset,
21-
File as CatalogFile,
22-
Group,
23-
)
24-
25-
if TYPE_CHECKING:
22+
if TYPE_CHECKING: # pragma: no cover
2623
from .client import DuckLake
2724

2825

@@ -41,23 +38,19 @@ class File(BaseRemoteFile):
4138
The parent group object, if any.
4239
"""
4340

44-
def __init__(
45-
self,
46-
dataset: "DuckDataset",
47-
record: CatalogFile,
48-
group: Optional["DuckGroup"] = None,
49-
) -> None:
41+
record: CatalogFile = Field(exclude=True)
42+
group: Optional["DuckGroup"] = Field(default=None, exclude=True)
43+
44+
def __init__(self, **data: Any) -> None:
45+
record = data.pop("record")
46+
group = data.pop("group", None)
5047
super().__init__(
5148
path=Path(record.path),
5249
type=record.type or "remote",
53-
dataset=dataset,
50+
record=record, # type: ignore[call-arg]
51+
group=group,
52+
**data,
5453
)
55-
self.record: CatalogFile = record
56-
self.group: Optional["DuckGroup"] = group
57-
58-
@property
59-
def path(self) -> Path:
60-
return Path(self.record.path)
6154

6255
@property
6356
def basename(self) -> str:
@@ -191,7 +184,7 @@ def __init__(self, **data) -> None:
191184
super().__init__(**data)
192185
self._cache_dir: Path = Path(CACHEPATH) / "ducklake"
193186
self._cache_dir.mkdir(parents=True, exist_ok=True)
194-
self._catalog_name: str = f"catalog_{self.record.name.lower()}.db"
187+
self._catalog_name: str = f"catalog_{self.record.name.lower()}.duckdb"
195188
self._catalog_local: Path = self._cache_dir / self._catalog_name
196189

197190
def __repr__(self) -> str:
@@ -306,7 +299,7 @@ def _upload():
306299
self.client._s3_client.upload_file(
307300
str(self._catalog_local),
308301
self.client.bucket,
309-
f"catalog_{self.record.name.lower()}.db",
302+
f"catalog_{self.record.name.lower()}.duckdb",
310303
)
311304

312305
await to_thread.run_sync(_upload)

pysus/api/extensions.py

Lines changed: 2 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from collections.abc import AsyncGenerator, Callable
1111
from datetime import datetime
1212
from pathlib import Path
13-
from typing import ClassVar
1413

1514
import chardet
1615
import pandas as pd
@@ -19,6 +18,7 @@
1918
from anyio import to_thread
2019
from dbfread import DBF as DBFReader
2120
from pydantic import Field, PrivateAttr
21+
from pyreaddbc import dbc2dbf
2222
from pysus import CACHEPATH
2323
from pysus.api.metadata.models import Column
2424
from pysus.api.models import BaseCompressedFile, BaseLocalFile, BaseTabularFile
@@ -60,14 +60,6 @@ def _map_dtype(raw: str) -> str:
6060
return "VARCHAR"
6161

6262

63-
try:
64-
from pyreaddbc import dbc2dbf
65-
66-
DBC_IMPORT = True
67-
except ImportError:
68-
DBC_IMPORT = False
69-
70-
7163
class File(BaseLocalFile):
7264
"""Represents a generic local file with no special handling."""
7365

@@ -839,76 +831,6 @@ def _extract():
839831
return list(await asyncio.gather(*tasks))
840832

841833

842-
class DBCNotImported(BaseTabularFile):
843-
"""Placeholder for DBC files when optional dependency is not installed."""
844-
845-
path: Path = Field(default_factory=lambda: Path("..."))
846-
type: str | FileType = Field(default="remote")
847-
import_err: ClassVar[
848-
str
849-
] = """
850-
run "pip install pysus[dbc]" to handle DBC files.
851-
Make sure you also have libffi installed on the system. It may not work
852-
on Windows
853-
"""
854-
855-
@property
856-
def name(self) -> str:
857-
"""Raise ImportError indicating the missing DBC dependency."""
858-
raise ImportError(self.import_err)
859-
860-
@property
861-
def extension(self) -> str:
862-
"""Return the .dbc extension."""
863-
return ".dbc"
864-
865-
@property
866-
def size(self) -> int:
867-
"""Raise ImportError indicating the missing DBC dependency."""
868-
raise ImportError(self.import_err)
869-
870-
@property
871-
def modify(self) -> datetime:
872-
"""Raise ImportError indicating the missing DBC dependency."""
873-
raise ImportError(self.import_err)
874-
875-
@property
876-
def columns(self) -> list["Column"]:
877-
"""Raise ImportError indicating the missing DBC dependency."""
878-
raise ImportError(self.import_err)
879-
880-
@property
881-
def rows(self) -> int:
882-
"""Raise ImportError indicating the missing DBC dependency."""
883-
raise ImportError(self.import_err)
884-
885-
async def load(self) -> pd.DataFrame:
886-
"""Raise ImportError indicating the missing DBC dependency."""
887-
raise ImportError(self.import_err)
888-
889-
def stream(
890-
self,
891-
chunk_size: int = 10000,
892-
) -> AsyncGenerator[pd.DataFrame, None]:
893-
"""Raise ImportError indicating the missing DBC dependency."""
894-
895-
async def _internal_gen():
896-
"""Yield nothing; always raises ImportError."""
897-
raise ImportError(self.import_err)
898-
yield pd.DataFrame()
899-
900-
return _internal_gen()
901-
902-
async def to_parquet(
903-
self,
904-
output_path: str | Path | None = None,
905-
chunk_size: int = 10000,
906-
callback: Callable[[int, int], None] | None = None,
907-
) -> Parquet:
908-
"""Raise ImportError indicating the missing DBC dependency."""
909-
raise ImportError(self.import_err)
910-
911-
912834
class ExtensionFactory:
913835
"""Factory that maps file extensions and MIME types to handler classes."""
914836

@@ -930,7 +852,7 @@ class ExtensionFactory:
930852
".csv": CSV,
931853
".parquet": Parquet,
932854
".dbf": DBF,
933-
".dbc": DBC if DBC_IMPORT else DBCNotImported, # type: ignore
855+
".dbc": DBC,
934856
".pdf": PDF,
935857
".json": JSON,
936858
}

pysus/api/ftp/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from pysus.api.models import BaseRemoteClient, BaseRemoteFile
1414
from pysus.api.types import FTP as FTP_STR
1515

16-
if TYPE_CHECKING:
16+
if TYPE_CHECKING: # pragma: no cover
1717
from pysus.api.ftp.models import Dataset
1818
from pysus.api.types import State
1919

pysus/api/metadata/models.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ class Column:
7171

7272
@classmethod
7373
def from_schema(cls, name: str, dtype: ColumnType) -> "Column":
74-
"""Create a Column from a file schema, looking up description from columns.py."""
74+
"""Create a Column from a file schema, looking up description from
75+
columns.py."""
7576
return cls(
7677
name=name,
7778
description=pick_description(lookup_column_meta(name)),

0 commit comments

Comments
 (0)