diff --git a/.idea/PySUS.iml b/.idea/PySUS.iml
deleted file mode 100644
index a17c85bf..00000000
--- a/.idea/PySUS.iml
+++ /dev/null
@@ -1,17 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/.idea/misc.xml b/.idea/misc.xml
deleted file mode 100644
index a3544b6c..00000000
--- a/.idea/misc.xml
+++ /dev/null
@@ -1,10 +0,0 @@
-
-
-
-
-
-
-
-
-
-
diff --git a/poetry.lock b/poetry.lock
index 076d6c5a..5ed79c08 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -4057,11 +4057,26 @@ description = "File type identification using libmagic"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
groups = ["main"]
+markers = "sys_platform != \"win32\""
files = [
{file = "python-magic-0.4.27.tar.gz", hash = "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b"},
{file = "python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3"},
]
+[[package]]
+name = "python-magic-bin"
+version = "0.4.14"
+description = "File type identification using libmagic binary package"
+optional = false
+python-versions = "*"
+groups = ["main"]
+markers = "sys_platform == \"win32\""
+files = [
+ {file = "python_magic_bin-0.4.14-py2.py3-none-macosx_10_6_intel.whl", hash = "sha256:7b1743b3dbf16601d6eedf4e7c2c9a637901b0faaf24ad4df4d4527e7d8f66a4"},
+ {file = "python_magic_bin-0.4.14-py2.py3-none-win32.whl", hash = "sha256:34a788c03adde7608028203e2dbb208f1f62225ad91518787ae26d603ae68892"},
+ {file = "python_magic_bin-0.4.14-py2.py3-none-win_amd64.whl", hash = "sha256:90be6206ad31071a36065a2fc169c5afb5e0355cbe6030e87641c6c62edc2b69"},
+]
+
[[package]]
name = "pytz"
version = "2026.2"
@@ -5909,4 +5924,4 @@ tui = ["humanize", "textual"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.10,<3.14"
-content-hash = "61b754b1f0f7a86375c7a562f830223b029a2c5594747dc7082eb99b2da021b9"
+content-hash = "74c5be9c37a010fac0c9e37253c42bc1a9641d4bcd43e617bfcb3ce48e04ff09"
diff --git a/pyproject.toml b/pyproject.toml
index e91b6251..20771497 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,7 +31,8 @@ pydantic = "^2.12.5"
duckdb = "^1.4.4"
duckdb-engine = "^0.17.0"
sqlalchemy = "^2.0.48"
-python-magic = "^0.4.27"
+python-magic = { version = "*", platform = "!=win32" }
+python-magic-bin = { version = "*", platform = "win32" }
chardet = "^7.4.0.post2"
anyio = "^4.13.0"
httpx = ">=0.28.0"
diff --git a/pysus/api/README.md b/pysus/api/README.md
new file mode 100644
index 00000000..e7b83bb0
--- /dev/null
+++ b/pysus/api/README.md
@@ -0,0 +1 @@
+## Roadmap
diff --git a/pysus/api/_impl/databases.py b/pysus/api/_impl/databases.py
index be44cd3a..fa5a7a8c 100644
--- a/pysus/api/_impl/databases.py
+++ b/pysus/api/_impl/databases.py
@@ -24,8 +24,8 @@
from typing import Literal
import pandas as pd
+from pysus.api import types
from pysus.api.client import PySUS
-from pysus.api.types import State
from tqdm import tqdm
@@ -220,7 +220,7 @@ def sinan(
def sinasc(
- state: State,
+ state: types.State,
year: int | list[int],
group: str | None = None,
**kwargs,
@@ -232,7 +232,7 @@ def sinasc(
Parameters
----------
- state : State
+ state : types.State
Two-letter state abbreviation (e.g. ``"RJ"``).
year : int | list[int]
Year or list of years to fetch.
@@ -255,7 +255,7 @@ def sinasc(
def sim(
- state: State,
+ state: types.State,
year: int | list[int],
group: str | None = None,
**kwargs,
@@ -290,7 +290,7 @@ def sim(
def sih(
- state: State,
+ state: types.State,
year: int | list[int],
month: int | list[int],
group: str | None = None,
@@ -303,7 +303,7 @@ def sih(
Parameters
----------
- state : State
+ state : types.State
Two-letter state abbreviation (e.g. ``"RJ"``).
year : int | list[int]
Year or list of years to fetch.
@@ -329,7 +329,7 @@ def sih(
def sia(
- state: State,
+ state: types.State,
year: int | list[int],
month: int | list[int],
group: str | None = None,
@@ -342,7 +342,7 @@ def sia(
Parameters
----------
- state : State
+ state : types.State
Two-letter state abbreviation (e.g. ``"RJ"``).
year : int | list[int]
Year or list of years to fetch.
@@ -368,7 +368,7 @@ def sia(
def pni(
- state: State,
+ state: types.State,
year: int | list[int],
group: str | None = None,
**kwargs,
@@ -430,7 +430,7 @@ def ibge(
def cnes(
- state: State,
+ state: types.State,
year: int | list[int],
month: int | list[int],
group: str | None = None,
@@ -469,7 +469,7 @@ def cnes(
def ciha(
- state: State,
+ state: types.State,
year: int | list[int],
month: int | list[int],
group: str | None = "CIHA",
@@ -508,18 +508,8 @@ def ciha(
def list_files(
- dataset: Literal[
- "SINAN",
- "SINASC",
- "SIM",
- "SIH",
- "SIA",
- "PNI",
- "IBGE",
- "CNES",
- "CIHA",
- ],
- client: Literal["FTP", "DadosGov"] | None = None,
+ dataset: types.DatasetName,
+ client: types.Origin | None = None,
group: str | None = None,
state: str | None = None,
year: int | list[int] | None = None,
@@ -536,7 +526,7 @@ def list_files(
----------
dataset : Literal
Dataset name (e.g. ``"SINAN"``, ``"SINASC"``, etc.).
- client : Literal["FTP", "DadosGov"], optional
+ client : Origin, optional
Data source client to query.
group : str, optional
Group or disease code to filter by.
diff --git a/pysus/api/client.py b/pysus/api/client.py
index 70ba6bc9..28d486ea 100644
--- a/pysus/api/client.py
+++ b/pysus/api/client.py
@@ -13,7 +13,9 @@
import anyio
import duckdb
import pandas as pd
+from duckdb import func
from pysus import CACHEPATH
+from pysus.api.types import Origin
from sqlalchemy import DateTime, Enum, Integer, String, create_engine
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, sessionmaker
from sqlalchemy.pool import NullPool
@@ -24,7 +26,7 @@
from .ftp import FTPClient
from .models import BaseLocalFile, BaseRemoteFile
-if TYPE_CHECKING:
+if TYPE_CHECKING: # pragma: no cover
from duckdb import DuckDBPyConnection
@@ -101,7 +103,7 @@ async def __aenter__(self):
"""Set up DuckLake catalog and return self as async context manager."""
self._ducklake = DuckLake()
- await self._ducklake._load_catalog()
+ await self._ducklake.connect()
self._attach_client_catalog(
"ducklake",
str(self._ducklake.catalog_path),
@@ -124,7 +126,7 @@ async def get_ducklake(self) -> DuckLake:
if self._ducklake is None:
self._ducklake = DuckLake()
- await self._ducklake._load_catalog()
+ await self._ducklake.connect()
self._attach_client_catalog(
"ducklake",
str(self._ducklake.catalog_path),
@@ -477,26 +479,72 @@ def get_completed_remote_paths(self) -> set[str]:
async def query(
self,
- client: Literal["DadosGov", "FTP"] | None = None,
+ client: Origin | None = None,
dataset: str | None = None,
group: str | None = None,
state: str | None = None,
year: int | None = None,
month: int | None = None,
):
- """Query available datasets through the DuckLake catalog."""
+ """Query available datasets through the DuckLake catalog.
+ Parameters
+ ----------
+ client : Origin, optional
+ Source client to filter by.
+ dataset : str, optional
+ Dataset name to filter by.
+ group : str, optional
+ Group name pattern to filter by (case-insensitive ILIKE).
+ state : str, optional
+ Two-letter state code to filter by.
+ year : int, optional
+ Year to filter by.
+ month : int, optional
+ Month to filter by.
+
+ Returns
+ -------
+ list
+ List of matching File objects.
+ """
if self._ducklake is None:
await self.get_ducklake()
- if self._ducklake is not None:
- return await self._ducklake.query(
- client=client,
- dataset=dataset,
+
+ if self._ducklake is None:
+ raise ConnectionError("Could not connect to PySUS s3 bucket")
+
+ all_datasets = await self._ducklake.datasets()
+
+ if dataset:
+ matching = [
+ d for d in all_datasets if d.name.lower() == dataset.lower()
+ ]
+ if not matching:
+ return []
+ target = matching[0]
+ files = await target.query(
group=group,
state=state,
year=year,
month=month,
)
+ else:
+ files = []
+ for ds in all_datasets:
+ ds_files = await ds.query(
+ group=group,
+ state=state,
+ year=year,
+ month=month,
+ )
+ files.extend(ds_files)
+
+ if not client:
+ return files
+
+ prefix = f"public/data/{client.lower()}/"
+ return [f for f in files if f.record.path.startswith(prefix)]
def read_parquet(
self,
@@ -595,8 +643,8 @@ def get_columns(path: Path) -> set[tuple[str, str]]:
duckdb.create_function(
"__pysus_add_dv",
_add_dv_fn,
- null_handling="special",
- )
+ null_handling=func.SPECIAL,
+ ) # type: ignore
except duckdb.NotImplementedException:
pass
selects = [
diff --git a/pysus/api/README.ipynb b/pysus/api/dadosgov/README.md
similarity index 100%
rename from pysus/api/README.ipynb
rename to pysus/api/dadosgov/README.md
diff --git a/pysus/api/dadosgov/client.py b/pysus/api/dadosgov/client.py
index 5487f800..6b8e7113 100644
--- a/pysus/api/dadosgov/client.py
+++ b/pysus/api/dadosgov/client.py
@@ -11,8 +11,9 @@
from pydantic import BaseModel, BeforeValidator, ConfigDict, Field, PrivateAttr
from pysus import __version__
from pysus.api.models import BaseRemoteClient, BaseRemoteFile
+from pysus.api.types import DADOSGOV
-if TYPE_CHECKING:
+if TYPE_CHECKING: # pragma: no cover
from .models import Dataset
@@ -89,7 +90,7 @@ def name(self) -> str:
str
The abbreviated client name ``"DadosGov"``.
"""
- return "DadosGov"
+ return DADOSGOV
@property
def long_name(self) -> str:
diff --git a/pysus/api/dadosgov/databases.py b/pysus/api/dadosgov/databases.py
index 07297095..8000d5f9 100644
--- a/pysus/api/dadosgov/databases.py
+++ b/pysus/api/dadosgov/databases.py
@@ -272,7 +272,7 @@ def formatter(self, filename: str) -> dict[str, Any]:
}
m = re.search(r"_(\w{3})-out_(\d{4})_\.csv$", name)
- if m:
+ if m: # pragma: no cover
return {
"state": None,
"year": _parse_year(m.group(2)),
diff --git a/pysus/api/dadosgov/models.py b/pysus/api/dadosgov/models.py
index 4a962ca8..bbe1d654 100644
--- a/pysus/api/dadosgov/models.py
+++ b/pysus/api/dadosgov/models.py
@@ -271,7 +271,8 @@ def __init__(
A callable that extracts metadata from filenames.
"""
super().__init__(
- record=record, dataset=dataset # type: ignore[call-arg]
+ record=record,
+ dataset=dataset, # type: ignore[call-arg]
)
self._formatter = formatter
@@ -354,7 +355,7 @@ class Dataset(BaseRemoteDataset):
"""
ids: list[str] = []
- client: "DadosGov"
+ client: DadosGov
group_aliases: dict[str, str] = {}
def __repr__(self):
@@ -369,7 +370,7 @@ def formatter(self, filename: str) -> dict[str, Any]:
async def _fetch_content(self) -> list[Group]:
"""Fetch all groups belonging to this dataset."""
items: list[Group] = []
- client: "DadosGov" = self.client
+ client: DadosGov = self.client
if self.ids:
for group_id in self.ids:
record = await client.get_dataset(group_id)
diff --git a/pysus/api/dadosgov/README.ipynb b/pysus/api/ducklake/README.md
similarity index 100%
rename from pysus/api/dadosgov/README.ipynb
rename to pysus/api/ducklake/README.md
diff --git a/pysus/api/ducklake/README.ipynb b/pysus/api/ducklake/catalog/__init__.py
similarity index 100%
rename from pysus/api/ducklake/README.ipynb
rename to pysus/api/ducklake/catalog/__init__.py
diff --git a/pysus/api/ducklake/catalog/columns.py b/pysus/api/ducklake/catalog/columns.py
new file mode 100644
index 00000000..b7cd3751
--- /dev/null
+++ b/pysus/api/ducklake/catalog/columns.py
@@ -0,0 +1,7235 @@
+"""Catalog column definitions extracted from catalog.db.
+
+Maps every column name to a dict of {dataset_name: description}.
+"""
+
+ABAND = {"pni": ""}
+
+ABDOMINAL = {"sinan": ""}
+
+ABRANDAD = {"cnes": ""}
+
+AB_ANOACOM = {"sia": ""}
+
+AB_DTCIRG2 = {"sia": ""}
+
+AB_DTCIRUR = {"sia": ""}
+
+AB_IMC = {"sia": ""}
+
+AB_MESACOM = {"sia": ""}
+
+AB_NUMAIH = {"sia": ""}
+
+AB_NUMAIH2 = {"sia": ""}
+
+AB_PONTBAR = {"sia": ""}
+
+AB_PRCAIH2 = {"sia": ""}
+
+AB_PRCAIH3 = {"sia": ""}
+
+AB_PRCAIH4 = {"sia": ""}
+
+AB_PRCAIH5 = {"sia": ""}
+
+AB_PRCAIH6 = {"sia": ""}
+
+AB_PROCAIH = {"sia": ""}
+
+AB_TABBARR = {"sia": ""}
+
+AB_T_PRC2 = {"sia": ""}
+
+AB_T_PRC3 = {"sia": ""}
+
+AB_T_PRC4 = {"sia": ""}
+
+AB_T_PRC5 = {"sia": ""}
+
+AB_T_PRC6 = {"sia": ""}
+
+ACF_ARTDIA = {"sia": ""}
+
+ACF_DUPLEX = {"sia": ""}
+
+ACF_FLEBIT = {"sia": ""}
+
+ACF_FREMIT = {"sia": ""}
+
+ACF_HEMATO = {"sia": ""}
+
+ACF_PREFAV = {"sia": ""}
+
+ACF_PULSO = {"sia": ""}
+
+ACF_USOCAT = {"sia": ""}
+
+ACF_VEIAVI = {"sia": ""}
+
+ACF_VEIDIA = {"sia": ""}
+
+ACIDO_PEPT = {"sinan": ""}
+
+ACIDTRAB = {"sim": ""}
+
+ACONDIC = {"sinan": ""}
+
+ACUPUNTURA = {"sinan": ""}
+
+AEROFOBIA = {"sinan": ""}
+
+AFASTAMENT = {"sinan": ""}
+
+AFAST_DESG = {"sinan": ""}
+
+AFAST_RISC = {"sinan": ""}
+
+AFAST_TRAB = {"sinan": ""}
+
+AFIRMATIVO = {"sinan": ""}
+
+AGENTE = {"sinan": ""}
+
+AGENTE_1 = {"sinan": ""}
+
+AGENTE_2 = {"sinan": ""}
+
+AGENTE_3 = {"sinan": ""}
+
+AGENTE_DES = {"sinan": ""}
+
+AGENTE_ET0 = {"sinan": ""}
+
+AGENTE_ET1 = {"sinan": ""}
+
+AGENTE_ET2 = {"sinan": ""}
+
+AGENTE_ET3 = {"sinan": ""}
+
+AGENTE_ETI = {"sinan": ""}
+
+AGENTE_OUT = {"sinan": ""}
+
+AGENTE_TOX = {"sinan": ""}
+
+AGHBE = {"sinan": ""}
+
+AGHBS = {"sinan": ""}
+
+AGITACAO = {"sinan": ""}
+
+AGRAVAIDS = {"sinan": ""}
+
+AGRAVALCOO = {"sinan": ""}
+
+AGRAVDIABE = {"sinan": ""}
+
+AGRAVDOENC = {"sinan": ""}
+
+AGRAVDROGA = {"sinan": ""}
+
+AGRAVOUTDE = {"sinan": ""}
+
+AGRAVOUTRA = {"sinan": ""}
+
+AGRAVO_DES = {"sinan": ""}
+
+AGRAVTABAC = {"sinan": ""}
+
+AGRESSIVI = {"sinan": ""}
+
+AGUA_ALIME = {"sinan": ""}
+
+AG_AMEACA = {"sinan": ""}
+
+AG_CORTE = {"sinan": ""}
+
+AG_ENFOR = {"sinan": ""}
+
+AG_ENVEN = {"sinan": ""}
+
+AG_ESPEC = {"sinan": ""}
+
+AG_FOGO = {"sinan": ""}
+
+AG_FORCA = {"sinan": ""}
+
+AG_OBJETO = {"sinan": ""}
+
+AG_OUTROS = {"sinan": ""}
+
+AG_QUENTE = {"sinan": ""}
+
+AIH = {"sih": ""}
+
+ALCATRAO = {"sinan": ""}
+
+ALCOOL = {"sinan": ""}
+
+ALIMENTO_C = {"sinan": ""}
+
+ALRM_ABDOM = {"sinan": ""}
+
+ALRM_HEMAT = {"sinan": ""}
+
+ALRM_HEPAT = {"sinan": ""}
+
+ALRM_HIPOT = {"sinan": ""}
+
+ALRM_LETAR = {"sinan": ""}
+
+ALRM_LIQ = {"sinan": ""}
+
+ALRM_PLAQ = {"sinan": ""}
+
+ALRM_SANG = {"sinan": ""}
+
+ALRM_VOM = {"sinan": ""}
+
+ALTCAUSA = {"sim": ""}
+
+ALVARA = {"cnes": ""}
+
+AMALARIA = {"sinan": ""}
+
+AMBIENTE = {"sinan": ""}
+
+AMB_NSUS = {"cnes": ""}
+
+AMB_SUS = {"cnes": ""}
+
+AMINA = {"sinan": ""}
+
+AMOS_OUT = {"sinan": ""}
+
+AMOS_PCR = {"sinan": ""}
+
+AMPICILINA = {"sinan": ""}
+
+AMPOLAS = {"sinan": ""}
+
+AMP_ACEVAS = {"sia": ""}
+
+AMP_ALBUMI = {"sia": ""}
+
+AMP_CARACT = {"sia": ""}
+
+AMP_DTCLI = {"sia": ""}
+
+AMP_DTINI = {"sia": ""}
+
+AMP_FOSFOR = {"sia": ""}
+
+AMP_HB = {"sia": ""}
+
+AMP_HBSAG = {"sia": ""}
+
+AMP_HCV = {"sia": ""}
+
+AMP_HIV = {"sia": ""}
+
+AMP_INTERC = {"sia": ""}
+
+AMP_KTVSEM = {"sia": ""}
+
+AMP_MAISNE = {"sia": ""}
+
+AMP_PTH = {"sia": ""}
+
+AMP_SEAPTO = {"sia": ""}
+
+AMP_SEPERI = {"sia": ""}
+
+AMP_SITINI = {"sia": ""}
+
+AMP_SITTRA = {"sia": ""}
+
+AMP_TRU = {"sia": ""}
+
+AM_ALTURA = {"sia": ""}
+
+AM_GESTANT = {"sia": ""}
+
+AM_PESO = {"sia": ""}
+
+AM_QTDTRAN = {"sia": ""}
+
+AM_SANGUE = {"sinan": ""}
+
+AM_TRANSPL = {"sia": ""}
+
+ANIMAL = {"sinan": ""}
+
+ANIM_ESP = {"sinan": ""}
+
+ANI_ARANHA = {"sinan": ""}
+
+ANI_LAGART = {"sinan": ""}
+
+ANI_SERPEN = {"sinan": ""}
+
+ANI_TIPO_1 = {"sinan": ""}
+
+ANO = {
+ "ibge": "",
+ "pni": "",
+ "sih": "",
+ "sinan": "",
+}
+
+ANOMES = {"pni": ""}
+
+ANOREXIA = {"sinan": ""}
+
+ANO_CMPT = {
+ "ciha": "",
+ "sih": "",
+}
+
+ANO_DT_SIN = {"sinan": ""}
+
+ANO_NASC = {"sinan": ""}
+
+ANTDTTRANS = {"sinan": ""}
+
+ANTEC_POS = {"sinan": ""}
+
+ANTEC_PRE = {"sinan": ""}
+
+ANTIBIOTIC = {"sinan": ""}
+
+ANTIB_DES = {"sinan": ""}
+
+ANTIHAVIGM = {"sinan": ""}
+
+ANTIHBCIGM = {"sinan": ""}
+
+ANTIHBE = {"sinan": ""}
+
+ANTIHBS = {"sinan": ""}
+
+ANTIHCV = {"sinan": ""}
+
+ANTIHDV = {"sinan": ""}
+
+ANTIHDVIGM = {"sinan": ""}
+
+ANTIHEVIGM = {"sinan": ""}
+
+ANTI_HBS = {"sinan": ""}
+
+ANTI_HCV = {"sinan": ""}
+
+ANTI_HIV = {"sinan": ""}
+
+ANTI_RAB = {"sinan": ""}
+
+ANTMUNTRAN = {"sinan": ""}
+
+ANTRELSE_N = {"sinan": ""}
+
+ANTSIFIL_N = {"sinan": ""}
+
+ANTTRANS_M = {"sinan": ""}
+
+ANTUFTRANS = {"sinan": ""}
+
+ANT_30_DIA = {"sinan": ""}
+
+ANT_AC = {"sinan": ""}
+
+ANT_ACIDEN = {"sinan": ""}
+
+ANT_AIDS = {"sinan": ""}
+
+ANT_ANEMIA = {"sinan": ""}
+
+ANT_ANIMAI = {"sinan": ""}
+
+ANT_ARAGEM = {"sinan": ""}
+
+ANT_ARRANH = {"sinan": ""}
+
+ANT_ARRUMO = {"sinan": ""}
+
+ANT_ASTERI = {"sinan": ""}
+
+ANT_BC = {"sinan": ""}
+
+ANT_BCG = {"sinan": ""}
+
+ANT_CABECA = {"sinan": ""}
+
+ANT_CANCER = {"sinan": ""}
+
+ANT_CANDID = {"sinan": ""}
+
+ANT_CAQUEX = {"sinan": ""}
+
+ANT_CAT_EX = {"sinan": ""}
+
+ANT_CB_CAI = {"sinan": ""}
+
+ANT_CB_CAR = {"sinan": ""}
+
+ANT_CB_COR = {"sinan": ""}
+
+ANT_CB_CRI = {"sinan": ""}
+
+ANT_CB_FOS = {"sinan": ""}
+
+ANT_CB_GRA = {"sinan": ""}
+
+ANT_CB_LAM = {"sinan": ""}
+
+ANT_CB_LAV = {"sinan": ""}
+
+ANT_CB_LIM = {"sinan": ""}
+
+ANT_CB_LIX = {"sinan": ""}
+
+ANT_CB_OUT = {"sinan": ""}
+
+ANT_CB_PLA = {"sinan": ""}
+
+ANT_CB_ROE = {"sinan": ""}
+
+ANT_CB_SIN = {"sinan": ""}
+
+ANT_CB_TER = {"sinan": ""}
+
+ANT_CHAGAS = {"sinan": ""}
+
+ANT_CITO = {"sinan": ""}
+
+ANT_COLHEI = {"sinan": ""}
+
+ANT_CONJ_C = {"sinan": ""}
+
+ANT_CONTAG = {"sinan": ""}
+
+ANT_CONTAT = {"sinan": ""}
+
+ANT_CONT_N = {"sinan": ""}
+
+ANT_CORTE = {"sinan": ""}
+
+ANT_CRIPTO = {"sinan": ""}
+
+ANT_CRIP_1 = {"sinan": ""}
+
+ANT_DERMAT = {"sinan": ""}
+
+ANT_DESMAT = {"sinan": ""}
+
+ANT_DIARRE = {"sinan": ""}
+
+ANT_DILACE = {"sinan": ""}
+
+ANT_DISFUN = {"sinan": ""}
+
+ANT_DOSES = {"sinan": ""}
+
+ANT_DOSES_ = {"sinan": ""}
+
+ANT_DOSE_3 = {"sinan": ""}
+
+ANT_DOSE_4 = {"sinan": ""}
+
+ANT_DOSE_5 = {"sinan": ""}
+
+ANT_DOSE_7 = {"sinan": ""}
+
+ANT_DOSE_C = {"sinan": ""}
+
+ANT_DOSE_T = {"sinan": ""}
+
+ANT_DOS_N = {"sinan": ""}
+
+ANT_DROGA = {"sinan": ""}
+
+ANT_DTULT_ = {"sinan": ""}
+
+ANT_DTUL_3 = {"sinan": ""}
+
+ANT_DTUL_4 = {"sinan": ""}
+
+ANT_DTUL_5 = {"sinan": ""}
+
+ANT_DTUL_7 = {"sinan": ""}
+
+ANT_DTUL_8 = {"sinan": ""}
+
+ANT_DTUL_C = {"sinan": ""}
+
+ANT_DTUL_T = {"sinan": ""}
+
+ANT_DT_ACI = {"sinan": ""}
+
+ANT_DT_EXP = {"sinan": ""}
+
+ANT_DT_INV = {"sinan": ""}
+
+ANT_DT_VAC = {"sinan": ""}
+
+ANT_ESOF_N = {"sinan": ""}
+
+ANT_EVLABO = {"sinan": ""}
+
+ANT_EXPOSI = {"sinan": ""}
+
+ANT_FEBRE = {"sinan": ""}
+
+ANT_HEMOLF = {"sinan": ""}
+
+ANT_HEMO_T = {"sinan": ""}
+
+ANT_HERPES = {"sinan": ""}
+
+ANT_HISTO = {"sinan": ""}
+
+ANT_HUMANO = {"sinan": ""}
+
+ANT_H_SIMP = {"sinan": ""}
+
+ANT_IDADE = {"sinan": ""}
+
+ANT_IMUNO = {"sinan": ""}
+
+ANT_INF_HO = {"sinan": ""}
+
+ANT_INVEST = {"sinan": ""}
+
+ANT_IRA = {"sinan": ""}
+
+ANT_ISOPOR = {"sinan": ""}
+
+ANT_LAMBED = {"sinan": ""}
+
+ANT_LAZER = {"sinan": ""}
+
+ANT_LEUCO = {"sinan": ""}
+
+ANT_LIMPEZ = {"sinan": ""}
+
+ANT_LINFO = {"sinan": ""}
+
+ANT_LINFOM = {"sinan": ""}
+
+ANT_LINFO_ = {"sinan": ""}
+
+ANT_LOCA_1 = {"sinan": ""}
+
+ANT_MAOS = {"sinan": ""}
+
+ANT_MEMBRO = {"sinan": ""}
+
+ANT_MEMB_1 = {"sinan": ""}
+
+ANT_MICRO = {"sinan": ""}
+
+ANT_MOAGEM = {"sinan": ""}
+
+ANT_MORDED = {"sinan": ""}
+
+ANT_MUCOSA = {"sinan": ""}
+
+ANT_MUNIC_ = {"sinan": ""}
+
+ANT_MUNI_C = {"sinan": ""}
+
+ANT_OCUPAC = {"sinan": ""}
+
+ANT_OUTR = {"sinan": ""}
+
+ANT_OUTRA = {"sinan": ""}
+
+ANT_OUTRO = {"sinan": ""}
+
+ANT_OUTROS = {"sinan": ""}
+
+ANT_OUTRO_ = {"sinan": ""}
+
+ANT_OUTR_D = {"sinan": ""}
+
+ANT_OUT_D = {"sinan": ""}
+
+ANT_OU_DE = {"sinan": ""}
+
+ANT_OU_DES = {"sinan": ""}
+
+ANT_PAIS = {"sinan": ""}
+
+ANT_PERINA = {"sinan": ""}
+
+ANT_PLANTI = {"sinan": ""}
+
+ANT_PNEUMO = {"sinan": ""}
+
+ANT_PRE_NA = {"sinan": ""}
+
+ANT_PROFUN = {"sinan": ""}
+
+ANT_PULMON = {"sinan": ""}
+
+ANT_PULM_N = {"sinan": ""}
+
+ANT_RACA = {"sinan": ""}
+
+ANT_REL_CA = {"sinan": ""}
+
+ANT_REL_N = {"sinan": ""}
+
+ANT_RETRO = {"sinan": ""}
+
+ANT_ROEDOR = {"sinan": ""}
+
+ANT_SALMO = {"sinan": ""}
+
+ANT_SARCOM = {"sinan": ""}
+
+ANT_SECUND = {"sinan": ""}
+
+ANT_SENTIN = {"sinan": ""}
+
+ANT_SUPERF = {"sinan": ""}
+
+ANT_TEMPO_ = {"sinan": ""}
+
+ANT_TIPOCO = {"sinan": ""}
+
+ANT_TOSSE = {"sinan": ""}
+
+ANT_TOXO = {"sinan": ""}
+
+ANT_TRANS_ = {"sinan": ""}
+
+ANT_TRASMI = {"sinan": ""}
+
+ANT_TRATAD = {"sinan": ""}
+
+ANT_TRAUMA = {"sinan": ""}
+
+ANT_TRIPLI = {"sinan": ""}
+
+ANT_TRONCO = {"sinan": ""}
+
+ANT_TUBE = {"sinan": ""}
+
+ANT_TUBERC = {"sinan": ""}
+
+ANT_T_HEMO = {"sinan": ""}
+
+ANT_UF = {"sinan": ""}
+
+ANT_UF_1 = {"sinan": ""}
+
+ANT_UF_2 = {"sinan": ""}
+
+ANT_UF_3 = {"sinan": ""}
+
+ANT_UF_CRI = {"sinan": ""}
+
+ANT_ULTI_D = {"sinan": ""}
+
+ANT_VACINA = {"sinan": ""}
+
+AN_ACEVAS = {"sia": ""}
+
+AN_ALBUMI = {"sia": ""}
+
+AN_ALTURA = {"sia": ""}
+
+AN_CNCDO = {"sia": ""}
+
+AN_DIURES = {"sia": ""}
+
+AN_DTPDR = {"sia": ""}
+
+AN_GLICOS = {"sia": ""}
+
+AN_HB = {"sia": ""}
+
+AN_HBSAG = {"sia": ""}
+
+AN_HCV = {"sia": ""}
+
+AN_HIV = {"sia": ""}
+
+AN_INTFIS = {"sia": ""}
+
+AN_PESO = {"sia": ""}
+
+AN_QUALI = {"sinan": ""}
+
+AN_QUANT = {"sinan": ""}
+
+AN_TRU = {"sia": ""}
+
+AN_ULSOAB = {"sia": ""}
+
+AP01CV01 = {"cnes": ""}
+
+AP01CV02 = {"cnes": ""}
+
+AP01CV03 = {"cnes": ""}
+
+AP01CV04 = {"cnes": ""}
+
+AP01CV05 = {"cnes": ""}
+
+AP01CV06 = {"cnes": ""}
+
+AP01CV07 = {"cnes": ""}
+
+AP02CV01 = {"cnes": ""}
+
+AP02CV02 = {"cnes": ""}
+
+AP02CV03 = {"cnes": ""}
+
+AP02CV04 = {"cnes": ""}
+
+AP02CV05 = {"cnes": ""}
+
+AP02CV06 = {"cnes": ""}
+
+AP02CV07 = {"cnes": ""}
+
+AP03CV01 = {"cnes": ""}
+
+AP03CV02 = {"cnes": ""}
+
+AP03CV03 = {"cnes": ""}
+
+AP03CV04 = {"cnes": ""}
+
+AP03CV05 = {"cnes": ""}
+
+AP03CV06 = {"cnes": ""}
+
+AP03CV07 = {"cnes": ""}
+
+AP04CV01 = {"cnes": ""}
+
+AP04CV02 = {"cnes": ""}
+
+AP04CV03 = {"cnes": ""}
+
+AP04CV04 = {"cnes": ""}
+
+AP04CV05 = {"cnes": ""}
+
+AP04CV06 = {"cnes": ""}
+
+AP04CV07 = {"cnes": ""}
+
+AP05CV01 = {"cnes": ""}
+
+AP05CV02 = {"cnes": ""}
+
+AP05CV03 = {"cnes": ""}
+
+AP05CV04 = {"cnes": ""}
+
+AP05CV05 = {"cnes": ""}
+
+AP05CV06 = {"cnes": ""}
+
+AP05CV07 = {"cnes": ""}
+
+AP06CV01 = {"cnes": ""}
+
+AP06CV02 = {"cnes": ""}
+
+AP06CV03 = {"cnes": ""}
+
+AP06CV04 = {"cnes": ""}
+
+AP06CV05 = {"cnes": ""}
+
+AP06CV06 = {"cnes": ""}
+
+AP06CV07 = {"cnes": ""}
+
+AP07CV01 = {"cnes": ""}
+
+AP07CV02 = {"cnes": ""}
+
+AP07CV03 = {"cnes": ""}
+
+AP07CV04 = {"cnes": ""}
+
+AP07CV05 = {"cnes": ""}
+
+AP07CV06 = {"cnes": ""}
+
+AP07CV07 = {"cnes": ""}
+
+APGAR1 = {"sinasc": ""}
+
+APGAR5 = {"sinasc": ""}
+
+AP_ADESAO = {"sia": ""}
+
+AP_ALTA = {"sia": ""}
+
+AP_APACAN = {"sia": ""}
+
+AP_APACANT = {"sia": ""}
+
+AP_ATV_FIS = {"sia": ""}
+
+AP_AUTORIZ = {"sia": ""}
+
+AP_CATEND = {"sia": ""}
+
+AP_CEPPCN = {"sia": ""}
+
+AP_CIDCAS = {"sia": ""}
+
+AP_CIDPRI = {"sia": ""}
+
+AP_CIDSEC = {"sia": ""}
+
+AP_CID_C1 = {"sia": ""}
+
+AP_CID_C2 = {"sia": ""}
+
+AP_CID_C3 = {"sia": ""}
+
+AP_CID_C4 = {"sia": ""}
+
+AP_CID_C5 = {"sia": ""}
+
+AP_CID_CO = {"sia": ""}
+
+AP_CMP = {"sia": ""}
+
+AP_CNPJCPF = {"sia": ""}
+
+AP_CNPJMNT = {"sia": ""}
+
+AP_CNSPCN = {"sia": ""}
+
+AP_CODEMI = {"sia": ""}
+
+AP_CODUNI = {"sia": ""}
+
+AP_COIDADE = {"sia": ""}
+
+AP_COMORB = {"sia": ""}
+
+AP_CONDIC = {"sia": ""}
+
+AP_DTAUT = {"sia": ""}
+
+AP_DTFIM = {"sia": ""}
+
+AP_DTINIC = {"sia": ""}
+
+AP_DTOCOR = {"sia": ""}
+
+AP_DTOOCOR = {"sia": ""}
+
+AP_DTSOLIC = {"sia": ""}
+
+AP_ENCERR = {"sia": ""}
+
+AP_ETNIA = {"sia": ""}
+
+AP_GESTAO = {"sia": ""}
+
+AP_MEDICAM = {"sia": ""}
+
+AP_MNDIF = {"sia": ""}
+
+AP_MN_IND = {"sia": ""}
+
+AP_MOTSAI = {"sia": ""}
+
+AP_MUNPCN = {"sia": ""}
+
+AP_MVM = {"sia": ""}
+
+AP_NATJUR = {"sia": ""}
+
+AP_NUIDADE = {"sia": ""}
+
+AP_OBITO = {"sia": ""}
+
+AP_PERMAN = {"sia": ""}
+
+AP_POLIVIT = {"sia": ""}
+
+AP_PRIPAL = {"sia": ""}
+
+AP_RACACOR = {"sia": ""}
+
+AP_REG_PES = {"sia": ""}
+
+AP_SEXO = {"sia": ""}
+
+AP_TIPPRE = {"sia": ""}
+
+AP_TPAPAC = {"sia": ""}
+
+AP_TPATEN = {"sia": ""}
+
+AP_TPATEND = {"sia": ""}
+
+AP_TPPRE = {"sia": ""}
+
+AP_TPUPS = {"sia": ""}
+
+AP_TRANSF = {"sia": ""}
+
+AP_UFDIF = {"sia": ""}
+
+AP_UFMUN = {"sia": ""}
+
+AP_UFNACIO = {"sia": ""}
+
+AP_UNISOL = {"sia": ""}
+
+AP_VL_AP = {"sia": ""}
+
+AQ_CID10 = {"sia": ""}
+
+AQ_CIDINI1 = {"sia": ""}
+
+AQ_CIDINI2 = {"sia": ""}
+
+AQ_CIDINI3 = {"sia": ""}
+
+AQ_CONTTR = {"sia": ""}
+
+AQ_DTIDEN = {"sia": ""}
+
+AQ_DTINI1 = {"sia": ""}
+
+AQ_DTINI2 = {"sia": ""}
+
+AQ_DTINI3 = {"sia": ""}
+
+AQ_DTINTR = {"sia": ""}
+
+AQ_ESQU_P1 = {"sia": ""}
+
+AQ_ESQU_P2 = {"sia": ""}
+
+AQ_ESTADI = {"sia": ""}
+
+AQ_GRAHIS = {"sia": ""}
+
+AQ_LINFIN = {"sia": ""}
+
+AQ_MED01 = {"sia": ""}
+
+AQ_MED02 = {"sia": ""}
+
+AQ_MED03 = {"sia": ""}
+
+AQ_MED04 = {"sia": ""}
+
+AQ_MED05 = {"sia": ""}
+
+AQ_MED06 = {"sia": ""}
+
+AQ_MED07 = {"sia": ""}
+
+AQ_MED08 = {"sia": ""}
+
+AQ_MED09 = {"sia": ""}
+
+AQ_MED10 = {"sia": ""}
+
+AQ_TOTMAU = {"sia": ""}
+
+AQ_TOTMPL = {"sia": ""}
+
+AQ_TRANTE = {"sia": ""}
+
+AREA = {"sinasc": ""}
+
+AREARES = {"sim": ""}
+
+ARMAZ_FT = {"cnes": ""}
+
+ARRANHAO = {"sinan": ""}
+
+ARRITMIAS = {"sinan": ""}
+
+ARTEI = {"sinan": ""}
+
+ARTEM = {"sinan": ""}
+
+ARTEMI = {"sinan": ""}
+
+ARTESU = {"sinan": ""}
+
+ARTRALGIA = {"sinan": ""}
+
+ARTRITE = {"sinan": ""}
+
+AR_CID10 = {"sia": ""}
+
+AR_CIDINI1 = {"sia": ""}
+
+AR_CIDINI2 = {"sia": ""}
+
+AR_CIDINI3 = {"sia": ""}
+
+AR_CIDTR1 = {"sia": ""}
+
+AR_CIDTR2 = {"sia": ""}
+
+AR_CIDTR3 = {"sia": ""}
+
+AR_CONTTR = {"sia": ""}
+
+AR_DTIDEN = {"sia": ""}
+
+AR_DTINI1 = {"sia": ""}
+
+AR_DTINI2 = {"sia": ""}
+
+AR_DTINI3 = {"sia": ""}
+
+AR_DTINTR = {"sia": ""}
+
+AR_ESTADI = {"sia": ""}
+
+AR_FIMAR1 = {"sia": ""}
+
+AR_FIMAR2 = {"sia": ""}
+
+AR_FIMAR3 = {"sia": ""}
+
+AR_FINALI = {"sia": ""}
+
+AR_GRAHIS = {"sia": ""}
+
+AR_INIAR1 = {"sia": ""}
+
+AR_INIAR2 = {"sia": ""}
+
+AR_INIAR3 = {"sia": ""}
+
+AR_LINFIN = {"sia": ""}
+
+AR_NUMC1 = {"sia": ""}
+
+AR_NUMC2 = {"sia": ""}
+
+AR_NUMC3 = {"sia": ""}
+
+AR_SMRD = {"sia": ""}
+
+AR_TRANTE = {"sia": ""}
+
+ASBESTO = {"sinan": ""}
+
+ASCITE = {"sinan": ""}
+
+ASMA = {"sinan": ""}
+
+ASSENTAD = {"cnes": ""}
+
+ASSINTOM = {"sinan": ""}
+
+ASSINTOMA = {"sinan": ""}
+
+ASSINTOMAT = {"sinan": ""}
+
+ASSISTMED = {"sim": ""}
+
+ASSIST_SOC = {"sinan": ""}
+
+ASTENIA = {"sinan": ""}
+
+ATD_ACEVAS = {"sia": ""}
+
+ATD_ALBUMI = {"sia": ""}
+
+ATD_CARACT = {"sia": ""}
+
+ATD_DTCLI = {"sia": ""}
+
+ATD_DTPDR = {"sia": ""}
+
+ATD_FOSFOR = {"sia": ""}
+
+ATD_HB = {"sia": ""}
+
+ATD_HBSAG = {"sia": ""}
+
+ATD_HCV = {"sia": ""}
+
+ATD_HIV = {"sia": ""}
+
+ATD_INTERC = {"sia": ""}
+
+ATD_KTVSEM = {"sia": ""}
+
+ATD_MAISNE = {"sia": ""}
+
+ATD_PTH = {"sia": ""}
+
+ATD_SEAPTO = {"sia": ""}
+
+ATD_SEPERI = {"sia": ""}
+
+ATD_SITINI = {"sia": ""}
+
+ATD_SITTRA = {"sia": ""}
+
+ATD_TRU = {"sia": ""}
+
+ATENDAMB = {"cnes": ""}
+
+ATENDE_MED = {"sinan": ""}
+
+ATENDHOS = {"cnes": ""}
+
+ATENDIMENT = {"sinan": ""}
+
+ATEND_MULH = {"sinan": ""}
+
+ATEND_PR = {"cnes": ""}
+
+ATESTADO = {"sim": ""}
+
+ATESTANTE = {"sim": ""}
+
+ATE_DT_ALT = {"sinan": ""}
+
+ATE_DT_INT = {"sinan": ""}
+
+ATE_HIPOTE = {"sinan": ""}
+
+ATE_HOSP = {"sinan": ""}
+
+ATE_HOSPIT = {"sinan": ""}
+
+ATE_INTERN = {"sinan": ""}
+
+ATE_MUNICI = {"sinan": ""}
+
+ATE_UF = {"sinan": ""}
+
+ATE_UF_HOS = {"sinan": ""}
+
+ATE_UF_INT = {"sinan": ""}
+
+ATIVIDAD = {"cnes": ""}
+
+ATIVIDA_1 = {"sinan": ""}
+
+ATIVIDA_2 = {"sinan": ""}
+
+ATIVIDA_3 = {"sinan": ""}
+
+AT_ATIVIDA = {"sinan": ""}
+
+AT_LAMINA = {"sinan": ""}
+
+AT_SINTOMA = {"sinan": ""}
+
+AUDITIVA = {"sinan": ""}
+
+AUD_JUST = {"sih": ""}
+
+AUMENTO = {"sinan": ""}
+
+AUTORIZ = {"sia": ""}
+
+AUTOR_ALCO = {"sinan": ""}
+
+AUTOR_SEXO = {"sinan": ""}
+
+AUTO_IMUNE = {"sinan": ""}
+
+AVALIA_N = {"sinan": ""}
+
+AVAL_ATU_N = {"sinan": ""}
+
+AVENTAL = {"sinan": ""}
+
+AV_ACRED = {"cnes": ""}
+
+AV_PNASS = {"cnes": ""}
+
+AZT3TC = {"sinan": ""}
+
+AZT3TC_IND = {"sinan": ""}
+
+AZT3TC_NFV = {"sinan": ""}
+
+BACILOSCOP = {"sinan": ""}
+
+BACILOSC_1 = {"sinan": ""}
+
+BACILOSC_2 = {"sinan": ""}
+
+BACILOSC_3 = {"sinan": ""}
+
+BACILOSC_4 = {"sinan": ""}
+
+BACILOSC_5 = {"sinan": ""}
+
+BACILOSC_6 = {"sinan": ""}
+
+BACILOSC_E = {"sinan": ""}
+
+BACILOSC_O = {"sinan": ""}
+
+BACILOS_E2 = {"sinan": ""}
+
+BACO = {"sinan": ""}
+
+BACTERIA = {"sinan": ""}
+
+BAC_APOS_6 = {"sinan": ""}
+
+BAIRES = {"sim": ""}
+
+BAIRRO_MAE = {"sinasc": ""}
+
+BANCOSANGU = {"sinan": ""}
+
+BENEF_GOV = {"sinan": ""}
+
+BENZENO = {"sinan": ""}
+
+BERILIO = {"sinan": ""}
+
+BIOPSIA = {"sinan": ""}
+
+BIOSSEG = {"sinan": ""}
+
+BLOCOPER = {"cnes": ""}
+
+BLOQUEIO = {"sinan": ""}
+
+BOTA = {"sinan": ""}
+
+BOVINO = {"sinan": ""}
+
+BUSCA_ATIV = {"sinan": ""}
+
+CABECA = {"sinan": ""}
+
+CADMIO = {"sinan": ""}
+
+CALAFRIO = {"sinan": ""}
+
+CANCER = {"sinan": ""}
+
+CAO_GATO = {"sinan": ""}
+
+CAPES = {"sinan": ""}
+
+CAPIVARA = {"sinan": ""}
+
+CARACTER = {"cnes": ""}
+
+CARDIOPATI = {"sinan": ""}
+
+CARRAPATO = {"sinan": ""}
+
+CARTORIO = {
+ "sim": "",
+ "sinasc": "",
+}
+
+CARVAO = {"sinan": ""}
+
+CAR_INT = {
+ "ciha": "",
+ "sih": "",
+}
+
+CASO = {"sinan": ""}
+
+CASO_ISOLA = {"sinan": ""}
+
+CAT = {"sinan": ""}
+
+CATARATA = {"sinan": ""}
+
+CATEND = {"sia": ""}
+
+CAUSABAS = {"sim": ""}
+
+CAUSABAS_O = {"sim": ""}
+
+CAUSAMAT = {"sim": ""}
+
+CBO = {"cnes": ""}
+
+CBOPROF = {"sia": ""}
+
+CBOR = {"sih": ""}
+
+CBOUNICO = {"cnes": ""}
+
+CB_PRE = {"sim": ""}
+
+CD_OUTRO = {"sinan": ""}
+
+CEFALEIA = {"sinan": ""}
+
+CENTRCIR = {"cnes": ""}
+
+CENTRNEO = {"cnes": ""}
+
+CENTROBS = {"cnes": ""}
+
+CEP = {"sih": ""}
+
+CGC_CONSOR = {"ciha": ""}
+
+CGC_HOSP = {
+ "ciha": "",
+ "sih": "",
+}
+
+CGC_MANT = {"sih": ""}
+
+CHAGOMA = {"sinan": ""}
+
+CHOQUE = {"sinan": ""}
+
+CICL_VID = {"sinan": ""}
+
+CIDASSOC = {"sia": ""}
+
+CIDPRI = {"sia": ""}
+
+CID_ACID = {"sinan": ""}
+
+CID_ASSO = {"sih": ""}
+
+CID_LESAO = {"sinan": ""}
+
+CID_MORTE = {"sih": ""}
+
+CID_NOTIF = {"sih": ""}
+
+CIRCOBITO = {"sim": ""}
+
+CIRCUNSTAN = {"sinan": ""}
+
+CIRCUN_DES = {"sinan": ""}
+
+CIRC_LESAO = {"sinan": ""}
+
+CIRURGIA = {"sim": ""}
+
+CIRURGICO = {"sinan": ""}
+
+CLASAVAL = {"cnes": ""}
+
+CLASSATUAL = {"sinan": ""}
+
+CLASSI_FIN = {"sinan": ""}
+
+CLASSOPERA = {"sinan": ""}
+
+CLASS_SR = {"cnes": ""}
+
+CLAS_FORMA = {"sinan": ""}
+
+CLAS_TIPO_ = {"sinan": ""}
+
+CLA_ME_ASS = {"sinan": ""}
+
+CLA_ME_BAC = {"sinan": ""}
+
+CLA_ME_ETI = {"sinan": ""}
+
+CLA_SOROGR = {"sinan": ""}
+
+CLA_TIPO_N = {"sinan": ""}
+
+CLICDCCA_N = {"sinan": ""}
+
+CLIENTEL = {"cnes": ""}
+
+CLINC_CHIK = {"sinan": ""}
+
+CLIND = {"sinan": ""}
+
+CLINDI = {"sinan": ""}
+
+CLI_ABAULA = {"sinan": ""}
+
+CLI_ABDOMI = {"sinan": ""}
+
+CLI_AGUDA = {"sinan": ""}
+
+CLI_AMIGDA = {"sinan": ""}
+
+CLI_ANEMIA = {"sinan": ""}
+
+CLI_ANGUST = {"sinan": ""}
+
+CLI_AQ_D_N = {"sinan": ""}
+
+CLI_AQ_E_N = {"sinan": ""}
+
+CLI_ARRITM = {"sinan": ""}
+
+CLI_ASCEND = {"sinan": ""}
+
+CLI_ASSIME = {"sinan": ""}
+
+CLI_ASTENI = {"sinan": ""}
+
+CLI_A_FMID = {"sinan": ""}
+
+CLI_A_FMIE = {"sinan": ""}
+
+CLI_A_FMSD = {"sinan": ""}
+
+CLI_A_FMSE = {"sinan": ""}
+
+CLI_A_SMID = {"sinan": ""}
+
+CLI_A_SMIE = {"sinan": ""}
+
+CLI_A_SMSD = {"sinan": ""}
+
+CLI_A_SMSE = {"sinan": ""}
+
+CLI_A_S_FA = {"sinan": ""}
+
+CLI_A_TMID = {"sinan": ""}
+
+CLI_A_TMIE = {"sinan": ""}
+
+CLI_A_TMSD = {"sinan": ""}
+
+CLI_A_TMSE = {"sinan": ""}
+
+CLI_A_T_CE = {"sinan": ""}
+
+CLI_A_T_FA = {"sinan": ""}
+
+CLI_BICD_N = {"sinan": ""}
+
+CLI_BICE_N = {"sinan": ""}
+
+CLI_BRUDZ = {"sinan": ""}
+
+CLI_CANDIA = {"sinan": ""}
+
+CLI_CARDIA = {"sinan": ""}
+
+CLI_CAVIDA = {"sinan": ""}
+
+CLI_CDCCRE = {"sinan": ""}
+
+CLI_CDCLIH = {"sinan": ""}
+
+CLI_CDC_CI = {"sinan": ""}
+
+CLI_CDC_CR = {"sinan": ""}
+
+CLI_CDC_EN = {"sinan": ""}
+
+CLI_CDC_GE = {"sinan": ""}
+
+CLI_CDC_HE = {"sinan": ""}
+
+CLI_CDC_HI = {"sinan": ""}
+
+CLI_CDC_IN = {"sinan": ""}
+
+CLI_CDC_IS = {"sinan": ""}
+
+CLI_CDC_LE = {"sinan": ""}
+
+CLI_CDC_LI = {"sinan": ""}
+
+CLI_CDC_ME = {"sinan": ""}
+
+CLI_CDC_MI = {"sinan": ""}
+
+CLI_CDC_PC = {"sinan": ""}
+
+CLI_CDC_PN = {"sinan": ""}
+
+CLI_CDC_SA = {"sinan": ""}
+
+CLI_CDC_SI = {"sinan": ""}
+
+CLI_CDC_SK = {"sinan": ""}
+
+CLI_CDC_TO = {"sinan": ""}
+
+CLI_CEFALE = {"sinan": ""}
+
+CLI_CERVIC = {"sinan": ""}
+
+CLI_CHOQUE = {"sinan": ""}
+
+CLI_CICATR = {"sinan": ""}
+
+CLI_COMA = {"sinan": ""}
+
+CLI_CONDUT = {"sinan": ""}
+
+CLI_CONGES = {"sinan": ""}
+
+CLI_CONJUN = {"sinan": ""}
+
+CLI_CONTAT = {"sinan": ""}
+
+CLI_CONVUL = {"sinan": ""}
+
+CLI_CON_ES = {"sinan": ""}
+
+CLI_CORDAO = {"sinan": ""}
+
+CLI_CO_HIV = {"sinan": ""}
+
+CLI_CRONIC = {"sinan": ""}
+
+CLI_CUTANE = {"sinan": ""}
+
+CLI_CUT_DI = {"sinan": ""}
+
+CLI_DERMA = {"sinan": ""}
+
+CLI_DESCEN = {"sinan": ""}
+
+CLI_DESC_O = {"sinan": ""}
+
+CLI_DIARRE = {"sinan": ""}
+
+CLI_DISPNE = {"sinan": ""}
+
+CLI_DISSEM = {"sinan": ""}
+
+CLI_DOR = {"sinan": ""}
+
+CLI_DORES = {"sinan": ""}
+
+CLI_DT = {"sinan": ""}
+
+CLI_DT_ATE = {"sinan": ""}
+
+CLI_DT_EXA = {"sinan": ""}
+
+CLI_EDEMA = {"sinan": ""}
+
+CLI_EDEMAG = {"sinan": ""}
+
+CLI_EQUIMO = {"sinan": ""}
+
+CLI_ESPECI = {"sinan": ""}
+
+CLI_ESPLEN = {"sinan": ""}
+
+CLI_EXT_D = {"sinan": ""}
+
+CLI_EXT_E = {"sinan": ""}
+
+CLI_FACE = {"sinan": ""}
+
+CLI_FARING = {"sinan": ""}
+
+CLI_FEBRE = {"sinan": ""}
+
+CLI_FLACID = {"sinan": ""}
+
+CLI_FLE_D = {"sinan": ""}
+
+CLI_FLE_E = {"sinan": ""}
+
+CLI_F_MID = {"sinan": ""}
+
+CLI_F_MIE = {"sinan": ""}
+
+CLI_F_MSD = {"sinan": ""}
+
+CLI_F_MSE = {"sinan": ""}
+
+CLI_GARGAN = {"sinan": ""}
+
+CLI_H = {"sinan": ""}
+
+CLI_HEMO = {"sinan": ""}
+
+CLI_HEMOPU = {"sinan": ""}
+
+CLI_HEMORR = {"sinan": ""}
+
+CLI_HEPATI = {"sinan": ""}
+
+CLI_HEPATO = {"sinan": ""}
+
+CLI_HERPEG = {"sinan": ""}
+
+CLI_HERPES = {"sinan": ""}
+
+CLI_HIPOTE = {"sinan": ""}
+
+CLI_H_DESC = {"sinan": ""}
+
+CLI_ICTERI = {"sinan": ""}
+
+CLI_INFCIT = {"sinan": ""}
+
+CLI_INJECA = {"sinan": ""}
+
+CLI_KERNIG = {"sinan": ""}
+
+CLI_LARING = {"sinan": ""}
+
+CLI_LEIOMI = {"sinan": ""}
+
+CLI_LINFA = {"sinan": ""}
+
+CLI_LINFO = {"sinan": ""}
+
+CLI_LOCAL = {"sinan": ""}
+
+CLI_LOCAL_ = {"sinan": ""}
+
+CLI_LOCA_1 = {"sinan": ""}
+
+CLI_LOMBAR = {"sinan": ""}
+
+CLI_MENING = {"sinan": ""}
+
+CLI_MIALGI = {"sinan": ""}
+
+CLI_MIAL_D = {"sinan": ""}
+
+CLI_MIAL_G = {"sinan": ""}
+
+CLI_MIOCAR = {"sinan": ""}
+
+CLI_MIOLIT = {"sinan": ""}
+
+CLI_MUCOSA = {"sinan": ""}
+
+CLI_MUNICI = {"sinan": ""}
+
+CLI_NECROS = {"sinan": ""}
+
+CLI_NEFRIT = {"sinan": ""}
+
+CLI_NEFRO = {"sinan": ""}
+
+CLI_NEURO = {"sinan": ""}
+
+CLI_NEUROL = {"sinan": ""}
+
+CLI_NOCAR = {"sinan": ""}
+
+CLI_NUCA = {"sinan": ""}
+
+CLI_OBSTIP = {"sinan": ""}
+
+CLI_ORGAOS = {"sinan": ""}
+
+CLI_OSTEO = {"sinan": ""}
+
+CLI_OTITE = {"sinan": ""}
+
+CLI_OTRDES = {"sinan": ""}
+
+CLI_OUTRAS = {"sinan": ""}
+
+CLI_OUTRO = {"sinan": ""}
+
+CLI_OUTROS = {"sinan": ""}
+
+CLI_OUTR_2 = {"sinan": ""}
+
+CLI_OUTR_3 = {"sinan": ""}
+
+CLI_OUT_D = {"sinan": ""}
+
+CLI_PALATO = {"sinan": ""}
+
+CLI_PALIDE = {"sinan": ""}
+
+CLI_PANTUR = {"sinan": ""}
+
+CLI_PARALB = {"sinan": ""}
+
+CLI_PARALM = {"sinan": ""}
+
+CLI_PARALP = {"sinan": ""}
+
+CLI_PAROTI = {"sinan": ""}
+
+CLI_PATD_N = {"sinan": ""}
+
+CLI_PATE_N = {"sinan": ""}
+
+CLI_PELE = {"sinan": ""}
+
+CLI_PESCOC = {"sinan": ""}
+
+CLI_PETEQU = {"sinan": ""}
+
+CLI_PROGRE = {"sinan": ""}
+
+CLI_PROST = {"sinan": ""}
+
+CLI_PROSTR = {"sinan": ""}
+
+CLI_PSEUDO = {"sinan": ""}
+
+CLI_PULMAO = {"sinan": ""}
+
+CLI_RENAL = {"sinan": ""}
+
+CLI_RESPI = {"sinan": ""}
+
+CLI_RESPIR = {"sinan": ""}
+
+CLI_RIGIDE = {"sinan": ""}
+
+CLI_RINITE = {"sinan": ""}
+
+CLI_RINORR = {"sinan": ""}
+
+CLI_SINTOM = {"sinan": ""}
+
+CLI_TEMPER = {"sinan": ""}
+
+CLI_TEMPO_ = {"sinan": ""}
+
+CLI_TONTUR = {"sinan": ""}
+
+CLI_TORACI = {"sinan": ""}
+
+CLI_TOSSE = {"sinan": ""}
+
+CLI_TOX1M = {"sinan": ""}
+
+CLI_TRAQUE = {"sinan": ""}
+
+CLI_TRID_N = {"sinan": ""}
+
+CLI_TRIE_N = {"sinan": ""}
+
+CLI_TUBERC = {"sinan": ""}
+
+CLI_TUPULM = {"sinan": ""}
+
+CLI_VAGAIS = {"sinan": ""}
+
+CLI_VARICE = {"sinan": ""}
+
+CLI_VOMITO = {"sinan": ""}
+
+CLORAFEN = {"sinan": ""}
+
+CLOROQ = {"sinan": ""}
+
+CLOROQI = {"sinan": ""}
+
+CMPT = {"sih": ""}
+
+CMPT_FIM = {"cnes": ""}
+
+CMPT_INI = {"cnes": ""}
+
+CNAE = {"sinan": ""}
+
+CNAER = {"sih": ""}
+
+CNAE_PRIN = {"sinan": ""}
+
+CNES = {
+ "ciha": "",
+ "cnes": "",
+ "sih": "",
+}
+
+CNESTERC = {"cnes": ""}
+
+CNES_ESF = {"sia": ""}
+
+CNES_EXEC = {"sia": ""}
+
+CNPJCPF = {"sia": ""}
+
+CNPJMNT = {"sia": ""}
+
+CNPJ_CC = {"sia": ""}
+
+CNPJ_MAN = {"cnes": ""}
+
+CNPJ_MANT = {"sih": ""}
+
+CNSPROF = {"sia": ""}
+
+CNS_ADM = {"cnes": ""}
+
+CNS_CONC = {"cnes": ""}
+
+CNS_CRES = {"cnes": ""}
+
+CNS_FNUC = {"cnes": ""}
+
+CNS_HMTL = {"cnes": ""}
+
+CNS_HMTR = {"cnes": ""}
+
+CNS_MRAD = {"cnes": ""}
+
+CNS_NEFR = {"cnes": ""}
+
+CNS_OCLIN = {"cnes": ""}
+
+CNS_OPED = {"cnes": ""}
+
+CNS_PAC = {"sia": ""}
+
+CNS_PROF = {"cnes": ""}
+
+CNS_RTEC = {"cnes": ""}
+
+COAGTOXMA1 = {"sinan": ""}
+
+COAGTOXMA2 = {"sinan": ""}
+
+COAGTOXMA3 = {"sinan": ""}
+
+COBERT = {"pni": ""}
+
+COBRANCA = {
+ "ciha": "",
+ "sih": "",
+}
+
+COB_ESF = {"sia": ""}
+
+CODANOMAL = {"sinasc": ""}
+
+CODBAINASC = {"sinasc": ""}
+
+CODBAIOCOR = {"sim": ""}
+
+CODBAIRES = {
+ "sim": "",
+ "sinasc": "",
+}
+
+CODCART = {
+ "sim": "",
+ "sinasc": "",
+}
+
+CODEQUIP = {"cnes": ""}
+
+CODESTAB = {
+ "sim": "",
+ "sinasc": "",
+}
+
+CODIFICADO = {"sim": ""}
+
+CODIGO = {
+ "sim": "",
+ "sinasc": "",
+}
+
+CODINST = {"sinasc": ""}
+
+CODISINF = {"sinan": ""}
+
+CODLEITO = {"cnes": ""}
+
+CODMUNCART = {
+ "sim": "",
+ "sinasc": "",
+}
+
+CODMUNNASC = {"sinasc": ""}
+
+CODMUNNATU = {
+ "sim": "",
+ "sinasc": "",
+}
+
+CODMUNOCOR = {"sim": ""}
+
+CODMUNRES = {
+ "sim": "",
+ "sinasc": "",
+}
+
+CODOCUPMAE = {"sinasc": ""}
+
+CODPAISRES = {"sinasc": ""}
+
+CODUFMUN = {"cnes": ""}
+
+CODUFNATU = {"sinasc": ""}
+
+CODUNI = {"sia": ""}
+
+COD_ARQ = {"sih": ""}
+
+COD_CEP = {"cnes": ""}
+
+COD_IDADE = {
+ "ciha": "",
+ "sih": "",
+}
+
+COD_IR = {"cnes": ""}
+
+COD_MUN_HO = {"sinan": ""}
+
+COD_SEG = {"sih": ""}
+
+COD_UF_HOS = {"sinan": ""}
+
+COLETAMARC = {"sinan": ""}
+
+COLETIVA = {"sinan": ""}
+
+COLETRES = {"cnes": ""}
+
+COLET_COMU = {"sinan": ""}
+
+COMA = {"sinan": ""}
+
+COMISS01 = {"cnes": ""}
+
+COMISS02 = {"cnes": ""}
+
+COMISS03 = {"cnes": ""}
+
+COMISS04 = {"cnes": ""}
+
+COMISS05 = {"cnes": ""}
+
+COMISS06 = {"cnes": ""}
+
+COMISS07 = {"cnes": ""}
+
+COMISS08 = {"cnes": ""}
+
+COMISS09 = {"cnes": ""}
+
+COMISS10 = {"cnes": ""}
+
+COMISS11 = {"cnes": ""}
+
+COMISS12 = {"cnes": ""}
+
+COMISSAO = {"cnes": ""}
+
+COMPET = {"sih": ""}
+
+COMPETEN = {"cnes": ""}
+
+COMPLEX = {
+ "sia": "",
+ "sih": "",
+}
+
+COMPLICA = {"sinan": ""}
+
+COMP_OUT = {"sinan": ""}
+
+COMP_OUT_D = {"sinan": ""}
+
+COMUNHOSP = {"sinan": ""}
+
+COMUNINF = {"sinan": ""}
+
+COMUNSVOIM = {"sim": ""}
+
+COM_APUTAC = {"sinan": ""}
+
+COM_CHOQUE = {"sinan": ""}
+
+COM_COMPOR = {"sinan": ""}
+
+COM_DEFICT = {"sinan": ""}
+
+COM_EDEMA = {"sinan": ""}
+
+COM_LOC = {"sinan": ""}
+
+COM_NECROS = {"sinan": ""}
+
+COM_PEST = {"sinan": ""}
+
+COM_RENAL = {"sinan": ""}
+
+COM_SECUND = {"sinan": ""}
+
+COM_SEPTIC = {"sinan": ""}
+
+COM_SISTEM = {"sinan": ""}
+
+CONDIC = {"sia": ""}
+
+CONDIC_ANI = {"sinan": ""}
+
+CONDUTA = {"sinan": ""}
+
+CONDUTA_DE = {"sinan": ""}
+
+CONDUT_DES = {"sinan": ""}
+
+CONFIRMA = {"sinan": ""}
+
+CONFIRMAD = {"sinan": ""}
+
+CONFPESO = {"sinasc": ""}
+
+CONF_INF_M = {"sinan": ""}
+
+CONF_INF_U = {"sinan": ""}
+
+CONF_MAS = {"cnes": ""}
+
+CONJUNTVIT = {"sinan": ""}
+
+CONSELHO = {"cnes": ""}
+
+CONSPRENAT = {"sinasc": ""}
+
+CONSTIPA = {"sinan": ""}
+
+CONSULTAS = {"sinasc": ""}
+
+CONS_ABORT = {"sinan": ""}
+
+CONS_COMP = {"sinan": ""}
+
+CONS_DST = {"sinan": ""}
+
+CONS_ESPEC = {"sinan": ""}
+
+CONS_ESTRE = {"sinan": ""}
+
+CONS_GRAV = {"sinan": ""}
+
+CONS_IDO = {"sinan": ""}
+
+CONS_MENT = {"sinan": ""}
+
+CONS_OUTR = {"sinan": ""}
+
+CONS_SUIC = {"sinan": ""}
+
+CONS_TUTEL = {"sinan": ""}
+
+CONT = {"sih": ""}
+
+CONTADOR = {
+ "sim": "",
+ "sinasc": "",
+}
+
+CONTATO = {"sinan": ""}
+
+CONTEXAM = {"sinan": ""}
+
+CONTRACEP1 = {"sih": ""}
+
+CONTRACEP2 = {"sih": ""}
+
+CONTRATE = {"cnes": ""}
+
+CONTRATM = {"cnes": ""}
+
+CONTREG = {"sinan": ""}
+
+CONTROLE = {"sinan": ""}
+
+CONTSRVU = {"cnes": ""}
+
+CONT_OUT = {"sinan": ""}
+
+CONVULSAO = {"sinan": ""}
+
+CON_ALIMEN = {"sinan": ""}
+
+CON_AMBIEN = {"sinan": ""}
+
+CON_AMB_DE = {"sinan": ""}
+
+CON_ANIMAI = {"sinan": ""}
+
+CON_AREA = {"sinan": ""}
+
+CON_AUTOPS = {"sinan": ""}
+
+CON_AUTO_M = {"sinan": ""}
+
+CON_AUTO_U = {"sinan": ""}
+
+CON_CLASSI = {"sinan": ""}
+
+CON_CLASS_ = {"sinan": ""}
+
+CON_CLAS_E = {"sinan": ""}
+
+CON_CONFIR = {"sinan": ""}
+
+CON_CRITER = {"sinan": ""}
+
+CON_DESCAR = {"sinan": ""}
+
+CON_DIAGES = {"sinan": ""}
+
+CON_DIAGNO = {"sinan": ""}
+
+CON_DIAG_D = {"sinan": ""}
+
+CON_DOENCA = {"sinan": ""}
+
+CON_DT_ENC = {"sinan": ""}
+
+CON_DT_OBI = {"sinan": ""}
+
+CON_ENCHEN = {"sinan": ""}
+
+CON_ENTULH = {"sinan": ""}
+
+CON_ESGOTO = {"sinan": ""}
+
+CON_EVOLUC = {"sinan": ""}
+
+CON_FHD = {"sinan": ""}
+
+CON_FORMA = {"sinan": ""}
+
+CON_GRAVID = {"sinan": ""}
+
+CON_IMPORT = {"sinan": ""}
+
+CON_INFECC = {"sinan": ""}
+
+CON_INF_BA = {"sinan": ""}
+
+CON_INF_DI = {"sinan": ""}
+
+CON_INF_MU = {"sinan": ""}
+
+CON_INF_OU = {"sinan": ""}
+
+CON_INF_PA = {"sinan": ""}
+
+CON_INF_UF = {"sinan": ""}
+
+CON_LOCAL = {"sinan": ""}
+
+CON_LOCAL2 = {"sinan": ""}
+
+CON_LOCALI = {"sinan": ""}
+
+CON_MUNICI = {"sinan": ""}
+
+CON_OUTRA = {"sinan": ""}
+
+CON_PAIS = {"sinan": ""}
+
+CON_PROVAV = {"sinan": ""}
+
+CON_RIO = {"sinan": ""}
+
+CON_ROEDOR = {"sinan": ""}
+
+CON_SOROTE = {"sinan": ""}
+
+CON_TERREN = {"sinan": ""}
+
+CON_TRIAT = {"sinan": ""}
+
+CON_UF = {"sinan": ""}
+
+COPAISINF = {"sinan": ""}
+
+COPRO_D_1 = {"sinan": ""}
+
+COPRO_D_2 = {"sinan": ""}
+
+COPRO_D_3 = {"sinan": ""}
+
+COPRO_R1 = {"sinan": ""}
+
+COPRO_R2 = {"sinan": ""}
+
+COPRO_R3 = {"sinan": ""}
+
+CORRACA = {"ibge": ""}
+
+COUFHOSP = {"sinan": ""}
+
+COUFINF = {"sinan": ""}
+
+COUNIDINF = {"sinan": ""}
+
+CO_AGENC = {"cnes": ""}
+
+CO_BANCO = {"cnes": ""}
+
+CO_CIDPRIM = {"sia": ""}
+
+CO_CIDSEC = {"sia": ""}
+
+CO_ERRO = {"sih": ""}
+
+CO_FOCAL = {"sinan": ""}
+
+CO_INE = {"sia": ""}
+
+CO_MUN_EX2 = {"sinan": ""}
+
+CO_MUN_EX3 = {"sinan": ""}
+
+CO_MUN_EXP = {"sinan": ""}
+
+CO_MUN_R1 = {"sinan": ""}
+
+CO_MUN_R2 = {"sinan": ""}
+
+CO_MUN_R3 = {"sinan": ""}
+
+CO_MUN_R4 = {"sinan": ""}
+
+CO_PAIS_1 = {"sinan": ""}
+
+CO_PAIS_2 = {"sinan": ""}
+
+CO_PAIS_3 = {"sinan": ""}
+
+CO_RISCO = {"sinan": ""}
+
+CO_UF_1 = {"sinan": ""}
+
+CO_UF_2 = {"sinan": ""}
+
+CO_UF_3 = {"sinan": ""}
+
+CO_UF_DES1 = {"sinan": ""}
+
+CO_UF_DES2 = {"sinan": ""}
+
+CO_UF_DES3 = {"sinan": ""}
+
+CO_UF_EX2 = {"sinan": ""}
+
+CO_UF_EX3 = {"sinan": ""}
+
+CO_UF_EXP = {"sinan": ""}
+
+CO_UF_R1 = {"sinan": ""}
+
+CO_UF_R2 = {"sinan": ""}
+
+CO_UF_R3 = {"sinan": ""}
+
+CO_UF_R4 = {"sinan": ""}
+
+CPFUNICO = {"cnes": ""}
+
+CPF_AUT = {"sih": ""}
+
+CPF_CNPJ = {"cnes": ""}
+
+CPF_PROF = {"cnes": ""}
+
+CPF_UNICO = {"cnes": ""}
+
+CRITERIO = {"sinan": ""}
+
+CRITICA = {
+ "sim": "",
+ "sinasc": "",
+}
+
+CRI_1000 = {"sinan": ""}
+
+CRI_1500 = {"sinan": ""}
+
+CRI_500 = {"sinan": ""}
+
+CRM = {"sim": ""}
+
+CROMO = {"sinan": ""}
+
+CRSOCOR = {"sim": ""}
+
+CRSRES = {"sim": ""}
+
+CRS_MAE = {"sinasc": ""}
+
+CRS_OCOR = {"sinasc": ""}
+
+CS_ABDOMEN = {"sinan": ""}
+
+CS_ABDOMIN = {"sinan": ""}
+
+CS_ANALISE = {"sinan": ""}
+
+CS_ANTIB = {"sinan": ""}
+
+CS_ANTIBIO = {"sinan": ""}
+
+CS_ANTIB_T = {"sinan": ""}
+
+CS_APNEIA = {"sinan": ""}
+
+CS_ASSINTO = {"sinan": ""}
+
+CS_ATEND_N = {"sinan": ""}
+
+CS_BUSCAAT = {"sinan": ""}
+
+CS_CADASTR = {"sinan": ""}
+
+CS_CAIMBRA = {"sinan": ""}
+
+CS_CHOQUE = {"sinan": ""}
+
+CS_CHORO = {"sinan": ""}
+
+CS_CIANOSE = {"sinan": ""}
+
+CS_COBERTU = {"sinan": ""}
+
+CS_COLETA = {"sinan": ""}
+
+CS_CRISE = {"sinan": ""}
+
+CS_CRISES = {"sinan": ""}
+
+CS_CULTURA = {"sinan": ""}
+
+CS_DESCART = {"sinan": ""}
+
+CS_DESIT = {"sinan": ""}
+
+CS_DESITRA = {"sinan": ""}
+
+CS_DESNUTR = {"sinan": ""}
+
+CS_DIARRE = {"sinan": ""}
+
+CS_DIVULGA = {"sinan": ""}
+
+CS_DOR = {"sinan": ""}
+
+CS_ENCEFAL = {"sinan": ""}
+
+CS_ESCOLAR = {"sinan": ""}
+
+CS_ESCOL_N = {"sinan": ""}
+
+CS_FEBRE = {"sinan": ""}
+
+CS_FLXRET = {"sinan": ""}
+
+CS_FONTE = {"sinan": ""}
+
+CS_FREQUEN = {"sinan": ""}
+
+CS_GESTANT = {"sinan": ""}
+
+CS_HOSPITA = {"sinan": ""}
+
+CS_INF_COT = {"sinan": ""}
+
+CS_INQUERI = {"sinan": ""}
+
+CS_LIQUOR = {"sinan": ""}
+
+CS_LOCAL = {"sinan": ""}
+
+CS_MAMAR = {"sinan": ""}
+
+CS_MATERIA = {"sinan": ""}
+
+CS_MEMBROS = {"sinan": ""}
+
+CS_MENING = {"sinan": ""}
+
+CS_MUCO = {"sinan": ""}
+
+CS_NASCIDO = {"sinan": ""}
+
+CS_NEG_ESP = {"sinan": ""}
+
+CS_NUCA = {"sinan": ""}
+
+CS_OPISTOT = {"sinan": ""}
+
+CS_ORIENTA = {"sinan": ""}
+
+CS_ORIGEM = {"sinan": ""}
+
+CS_OTITE = {"sinan": ""}
+
+CS_OUTRAS = {"sinan": ""}
+
+CS_OUTROS = {"sinan": ""}
+
+CS_OUT_COM = {"sinan": ""}
+
+CS_OUT_SIN = {"sinan": ""}
+
+CS_PNEUMON = {"sinan": ""}
+
+CS_POSITIV = {"sinan": ""}
+
+CS_RACA = {"sinan": ""}
+
+CS_REIDRAT = {"sinan": ""}
+
+CS_RESULTA = {"sinan": ""}
+
+CS_RISO = {"sinan": ""}
+
+CS_SANGUE = {"sinan": ""}
+
+CS_SECRECA = {"sinan": ""}
+
+CS_SEXO = {"sinan": ""}
+
+CS_SIN_OUT = {"sinan": ""}
+
+CS_SUGOU = {"sinan": ""}
+
+CS_SUSPEIT = {"sinan": ""}
+
+CS_TEMP37 = {"sinan": ""}
+
+CS_TEMP_38 = {"sinan": ""}
+
+CS_TIPO = {"sinan": ""}
+
+CS_TOSSE_E = {"sinan": ""}
+
+CS_TOSSE_P = {"sinan": ""}
+
+CS_TRANS = {"sinan": ""}
+
+CS_TRISMO = {"sinan": ""}
+
+CS_URINA = {"sinan": ""}
+
+CS_VACINA = {"sinan": ""}
+
+CS_VACINAC = {"sinan": ""}
+
+CS_VACINAL = {"sinan": ""}
+
+CS_VACTETA = {"sinan": ""}
+
+CS_VAC_N = {"sinan": ""}
+
+CS_VOMITO = {"sinan": ""}
+
+CS_VOMITOS = {"sinan": ""}
+
+CS_ZONA = {"sinan": ""}
+
+CULTURA_ES = {"sinan": ""}
+
+CULTURA_OU = {"sinan": ""}
+
+C_CORREN = {"cnes": ""}
+
+C_D = {"sinan": ""}
+
+C_M = {"sinan": ""}
+
+DATANASC = {"sim": ""}
+
+DATAOBITO = {"sim": ""}
+
+DATAREG = {"sim": ""}
+
+DATA_CART = {"sinasc": ""}
+
+DATA_NASC = {"sinasc": ""}
+
+DE15A39ANO = {"sinan": ""}
+
+DE5A14ANOS = {"sinan": ""}
+
+DEFEN_PUBL = {"sinan": ""}
+
+DEF_AUDITI = {"sinan": ""}
+
+DEF_DIAGNO = {"sinan": ""}
+
+DEF_ESPEC = {"sinan": ""}
+
+DEF_FISICA = {"sinan": ""}
+
+DEF_MENTAL = {"sinan": ""}
+
+DEF_OUT = {"sinan": ""}
+
+DEF_TRANS = {"sinan": ""}
+
+DEF_VISUAL = {"sinan": ""}
+
+DEIONIZA = {"cnes": ""}
+
+DELEG = {"sinan": ""}
+
+DELEG_CRIA = {"sinan": ""}
+
+DELEG_IDOS = {"sinan": ""}
+
+DELEG_MULH = {"sinan": ""}
+
+DENCRIREND = {"ibge": ""}
+
+DENDESOCUP = {"ibge": ""}
+
+DENGUE = {"sinan": ""}
+
+DENRENDA = {"ibge": ""}
+
+DENTARIO = {"sinan": ""}
+
+DENTRABINF = {"ibge": ""}
+
+DESCSEGM = {"cnes": ""}
+
+DESMATA_N = {"sinan": ""}
+
+DESTINOPAC = {"sia": ""}
+
+DEXAME = {"sinan": ""}
+
+DG_OUT_N = {"sinan": ""}
+
+DIABETES = {"sinan": ""}
+
+DIAGNO_LAB = {"sinan": ""}
+
+DIAGSEC1 = {"sih": ""}
+
+DIAGSEC2 = {"sih": ""}
+
+DIAGSEC3 = {"sih": ""}
+
+DIAGSEC4 = {"sih": ""}
+
+DIAGSEC5 = {"sih": ""}
+
+DIAGSEC6 = {"sih": ""}
+
+DIAGSEC7 = {"sih": ""}
+
+DIAGSEC8 = {"sih": ""}
+
+DIAGSEC9 = {"sih": ""}
+
+DIAG_CONF = {"sinan": ""}
+
+DIAG_DESCA = {"sinan": ""}
+
+DIAG_ESP = {"sinan": ""}
+
+DIAG_MAE = {"sinan": ""}
+
+DIAG_PARA = {"sinan": ""}
+
+DIAG_PAR_N = {"sinan": ""}
+
+DIAG_PRINC = {
+ "ciha": "",
+ "sih": "",
+}
+
+DIAG_SEC = {"sih": ""}
+
+DIAG_SECUN = {
+ "ciha": "",
+ "sih": "",
+}
+
+DIALISE = {"cnes": ""}
+
+DIARREIA = {"sinan": ""}
+
+DIAR_ACOM = {"sih": ""}
+
+DIAS = {"sinan": ""}
+
+DIAS_PERM = {
+ "ciha": "",
+ "sih": "",
+}
+
+DIFDATA = {
+ "sim": "",
+ "sinasc": "",
+}
+
+DIFER = {"pni": ""}
+
+DILACERANT = {"sinan": ""}
+
+DINTERNA = {"sinan": ""}
+
+DIR_HUMAN = {"sinan": ""}
+
+DISFAGIA = {"sinan": ""}
+
+DISTRADM = {"cnes": ""}
+
+DISTRSAN = {"cnes": ""}
+
+DOENCA_TRA = {"sinan": ""}
+
+DOMICILI = {"sinan": ""}
+
+DOR = {"sinan": ""}
+
+DORMIU_N = {"sinan": ""}
+
+DOR_COSTAS = {"sinan": ""}
+
+DOR_RETRO = {"sinan": ""}
+
+DOSAGEM = {"sinan": ""}
+
+DOSE = {
+ "pni": "",
+ "sinan": "",
+}
+
+DOSE1 = {"pni": ""}
+
+DOSEN = {"pni": ""}
+
+DOSES = {"sinan": ""}
+
+DOSES_A = {"sinan": ""}
+
+DOSE_RECEB = {"sinan": ""}
+
+DOXOCI = {"sinan": ""}
+
+DROGA = {"sinan": ""}
+
+DROGAS = {"sinan": ""}
+
+DSALIMENTO = {"sinan": ""}
+
+DSCARDIOP = {"sinan": ""}
+
+DSCAUSALIM = {"sinan": ""}
+
+DSFONTE = {"sinan": ""}
+
+DSMOTIVO = {"sinan": ""}
+
+DSTITULO1 = {"sinan": ""}
+
+DSTRAESQUE = {"sinan": ""}
+
+DS_ALI1 = {"sinan": ""}
+
+DS_ALI1OUT = {"sinan": ""}
+
+DS_ALI2 = {"sinan": ""}
+
+DS_ALI2OUT = {"sinan": ""}
+
+DS_ESQUEMA = {"sinan": ""}
+
+DS_FIM_GES = {"sinan": ""}
+
+DS_FORMA = {"sinan": ""}
+
+DS_F_OUTRO = {"sinan": ""}
+
+DS_INDUS = {"sinan": ""}
+
+DS_INF_LOC = {"sinan": ""}
+
+DS_INF_OUT = {"sinan": ""}
+
+DS_INGEST = {"sinan": ""}
+
+DS_INI_GES = {"sinan": ""}
+
+DS_LOCAL1 = {"sinan": ""}
+
+DS_LOCAL2 = {"sinan": ""}
+
+DS_MUN_1 = {"sinan": ""}
+
+DS_MUN_2 = {"sinan": ""}
+
+DS_MUN_3 = {"sinan": ""}
+
+DS_OUTRO = {"sinan": ""}
+
+DS_OUTROSI = {"sinan": ""}
+
+DS_OUTR_LO = {"sinan": ""}
+
+DS_OUT_AMB = {"sinan": ""}
+
+DS_PARES = {"sinan": ""}
+
+DS_RESU_OU = {"sinan": ""}
+
+DS_TRANS1 = {"sinan": ""}
+
+DS_TRANS2 = {"sinan": ""}
+
+DS_TRANS3 = {"sinan": ""}
+
+DS_TRANS_1 = {"sinan": ""}
+
+DS_TRANS_2 = {"sinan": ""}
+
+DS_TRAT = {"sinan": ""}
+
+DTALTA = {"sinan": ""}
+
+DTALTA_N = {"sinan": ""}
+
+DTATEND = {"sinan": ""}
+
+DTATESTADO = {"sim": ""}
+
+DTCADASTRO = {
+ "sim": "",
+ "sinasc": "",
+}
+
+DTCADINF = {"sim": ""}
+
+DTCADINV = {"sim": ""}
+
+DTCONCASO = {"sim": ""}
+
+DTCONFIRMA = {"sinan": ""}
+
+DTCONINV = {"sim": ""}
+
+DTDECLARAC = {"sinasc": ""}
+
+DTDIASINAC = {"sinan": ""}
+
+DTELETRO = {"sinan": ""}
+
+DTFEZESCOL = {"sinan": ""}
+
+DTIMUNO = {"sinan": ""}
+
+DTINICTRAT = {"sinan": ""}
+
+DTINTERNA = {"sinan": ""}
+
+DTINVESTIG = {"sim": ""}
+
+DTISOLA = {"sinan": ""}
+
+DTMICRO1 = {"sinan": ""}
+
+DTMICRO2 = {"sinan": ""}
+
+DTMUDESQ = {"sinan": ""}
+
+DTNASC = {
+ "sia": "",
+ "sim": "",
+ "sinasc": "",
+}
+
+DTNASCMAE = {"sinasc": ""}
+
+DTOBITO = {"sim": ""}
+
+DTPORTAR = {"cnes": ""}
+
+DTPRICONS = {"sinan": ""}
+
+DTRAPIDO1 = {"sinan": ""}
+
+DTRATA = {"sinan": ""}
+
+DTRECEBIM = {
+ "sim": "",
+ "sinasc": "",
+}
+
+DTRECORIG = {
+ "sim": "",
+ "sinasc": "",
+}
+
+DTRECORIGA = {
+ "sim": "",
+ "sinasc": "",
+}
+
+DTREGCART = {
+ "sim": "",
+ "sinasc": "",
+}
+
+DTS1 = {"sinan": ""}
+
+DTS2 = {"sinan": ""}
+
+DTSORO = {"sinan": ""}
+
+DTSOROCOL = {"sinan": ""}
+
+DTSUSPEIC = {"sinan": ""}
+
+DTTESTE1 = {"sinan": ""}
+
+DTTRANSDM = {"sinan": ""}
+
+DTTRANSFU = {"sinan": ""}
+
+DTTRANSRM = {"sinan": ""}
+
+DTTRANSRS = {"sinan": ""}
+
+DTTRANSSE = {"sinan": ""}
+
+DTTRANSSM = {"sinan": ""}
+
+DTTRANSUS = {"sinan": ""}
+
+DTTRAT = {"sinan": ""}
+
+DTTRIAGEM = {"sinan": ""}
+
+DTULTCOMP = {"sinan": ""}
+
+DTULTMENST = {"sinasc": ""}
+
+DT_1VAC = {"sinan": ""}
+
+DT_1_DOSE = {"sinan": ""}
+
+DT_2VAC = {"sinan": ""}
+
+DT_2_DOSE = {"sinan": ""}
+
+DT_3_DOSE = {"sinan": ""}
+
+DT_ACID = {"sinan": ""}
+
+DT_ACIDENT = {"sinan": ""}
+
+DT_ACRED = {"cnes": ""}
+
+DT_ADM_ANT = {"sinan": ""}
+
+DT_ALI1COL = {"sinan": ""}
+
+DT_ALI2COL = {"sinan": ""}
+
+DT_ALRM = {"sinan": ""}
+
+DT_APLI_SO = {"sinan": ""}
+
+DT_ATEND = {
+ "ciha": "",
+ "sia": "",
+}
+
+DT_ATENDE = {"sinan": ""}
+
+DT_ATENDIM = {"sinan": ""}
+
+DT_ATIVA = {"cnes": ""}
+
+DT_ATUAL = {"cnes": ""}
+
+DT_CATARRA = {"sinan": ""}
+
+DT_CHIK_S1 = {"sinan": ""}
+
+DT_CHIK_S2 = {"sinan": ""}
+
+DT_CHOQUE = {"sinan": ""}
+
+DT_COL1 = {"sinan": ""}
+
+DT_COL2 = {"sinan": ""}
+
+DT_COL3 = {"sinan": ""}
+
+DT_COLETA = {"sinan": ""}
+
+DT_COLOUT = {"sinan": ""}
+
+DT_COL_1 = {"sinan": ""}
+
+DT_COL_2 = {"sinan": ""}
+
+DT_COL_DIR = {"sinan": ""}
+
+DT_COL_HE2 = {"sinan": ""}
+
+DT_COL_HEM = {"sinan": ""}
+
+DT_COL_IGM = {"sinan": ""}
+
+DT_COL_IND = {"sinan": ""}
+
+DT_COL_PL2 = {"sinan": ""}
+
+DT_COL_PLQ = {"sinan": ""}
+
+DT_COL_S1 = {"sinan": ""}
+
+DT_COL_S2 = {"sinan": ""}
+
+DT_CONFIRM = {"sinan": ""}
+
+DT_COPRO = {"sinan": ""}
+
+DT_COPRO1 = {"sinan": ""}
+
+DT_COPRO2 = {"sinan": ""}
+
+DT_COPRO3 = {"sinan": ""}
+
+DT_DESAT = {"cnes": ""}
+
+DT_DESC1 = {"sinan": ""}
+
+DT_DESC2 = {"sinan": ""}
+
+DT_DESC3 = {"sinan": ""}
+
+DT_DESLC1 = {"sinan": ""}
+
+DT_DESLC2 = {"sinan": ""}
+
+DT_DESLC3 = {"sinan": ""}
+
+DT_DIAG = {"sinan": ""}
+
+DT_DIGITA = {"sinan": ""}
+
+DT_DOSE = {"sinan": ""}
+
+DT_DOSE_1 = {"sinan": ""}
+
+DT_DOSE_2 = {"sinan": ""}
+
+DT_DOSE_3 = {"sinan": ""}
+
+DT_DOSE_4 = {"sinan": ""}
+
+DT_DOSE_5 = {"sinan": ""}
+
+DT_DOSE_N = {"sinan": ""}
+
+DT_ENCERRA = {"sinan": ""}
+
+DT_ENVIO = {"sinan": ""}
+
+DT_EVOLUC = {"sinan": ""}
+
+DT_EXPED = {"cnes": ""}
+
+DT_EXPO = {"sinan": ""}
+
+DT_FEBRE = {"sinan": ""}
+
+DT_FEZES = {"sinan": ""}
+
+DT_FIM = {"sia": ""}
+
+DT_GRAV = {"sinan": ""}
+
+DT_HEMO1 = {"sinan": ""}
+
+DT_HEMO2 = {"sinan": ""}
+
+DT_HEMO3 = {"sinan": ""}
+
+DT_INICIO = {"sia": ""}
+
+DT_INICIO_ = {"sinan": ""}
+
+DT_INIC_TR = {"sinan": ""}
+
+DT_INI_EPI = {"sinan": ""}
+
+DT_INTER = {"sih": ""}
+
+DT_INTERNA = {"sinan": ""}
+
+DT_INVEST = {"sinan": ""}
+
+DT_LIQUOR = {"sinan": ""}
+
+DT_MOTCOB = {"sia": ""}
+
+DT_MUDANCA = {"sinan": ""}
+
+DT_NASC = {"sinan": ""}
+
+DT_NOTIFIC = {"sinan": ""}
+
+DT_NOTI_AT = {"sinan": ""}
+
+DT_NS1 = {"sinan": ""}
+
+DT_OBITO = {"sinan": ""}
+
+DT_OCOR = {"sinan": ""}
+
+DT_OUTR1 = {"sinan": ""}
+
+DT_OUTR2 = {"sinan": ""}
+
+DT_OUTR3 = {"sinan": ""}
+
+DT_PCR = {"sinan": ""}
+
+DT_PCR_1 = {"sinan": ""}
+
+DT_PCR_2 = {"sinan": ""}
+
+DT_PCR_3 = {"sinan": ""}
+
+DT_PNASS = {"cnes": ""}
+
+DT_PRNT = {"sinan": ""}
+
+DT_PROCESS = {"sia": ""}
+
+DT_PUBLE = {"cnes": ""}
+
+DT_PUBLM = {"cnes": ""}
+
+DT_RAPIDO = {"sinan": ""}
+
+DT_REFORCO = {"sinan": ""}
+
+DT_RESU3 = {"sinan": ""}
+
+DT_RISCO1 = {"sinan": ""}
+
+DT_RISCO2 = {"sinan": ""}
+
+DT_RISCO3 = {"sinan": ""}
+
+DT_RISCO4 = {"sinan": ""}
+
+DT_RTPCR = {"sinan": ""}
+
+DT_R_TRA = {"sinan": ""}
+
+DT_S1 = {"sinan": ""}
+
+DT_S2 = {"sinan": ""}
+
+DT_SAIDA = {
+ "ciha": "",
+ "sih": "",
+}
+
+DT_SIN_PRI = {"sinan": ""}
+
+DT_SORO = {"sinan": ""}
+
+DT_SORO1 = {"sinan": ""}
+
+DT_SORO2 = {"sinan": ""}
+
+DT_SOROR1 = {"sinan": ""}
+
+DT_SOROR2 = {"sinan": ""}
+
+DT_TRANSDM = {"sinan": ""}
+
+DT_TRANSRM = {"sinan": ""}
+
+DT_TRANSRS = {"sinan": ""}
+
+DT_TRANSSE = {"sinan": ""}
+
+DT_TRANSSM = {"sinan": ""}
+
+DT_TRANSUS = {"sinan": ""}
+
+DT_TRIA_11 = {"sinan": ""}
+
+DT_TRISMO = {"sinan": ""}
+
+DT_TRNASRM = {"sinan": ""}
+
+DT_TRNASRS = {"sinan": ""}
+
+DT_TR_RAB = {"sinan": ""}
+
+DT_ULT_DOS = {"sinan": ""}
+
+DT_URO = {"sinan": ""}
+
+DT_URO2 = {"sinan": ""}
+
+DT_URO3 = {"sinan": ""}
+
+DT_VAC1 = {"sinan": ""}
+
+DT_VACINA = {"sinan": ""}
+
+DT_VAC_1 = {"sinan": ""}
+
+DT_VAC_2 = {"sinan": ""}
+
+DT_VAC_3 = {"sinan": ""}
+
+DT_VAC_4 = {"sinan": ""}
+
+DT_VAC_5 = {"sinan": ""}
+
+DT_VAC_ULT = {"sinan": ""}
+
+DT_VENCIM = {"sinan": ""}
+
+DT_VIRAL = {"sinan": ""}
+
+DT_VOP = {"sinan": ""}
+
+DURACAO = {"sinan": ""}
+
+D_DIAR = {"sinan": ""}
+
+D_VOMITO = {"sinan": ""}
+
+ECG = {"sinan": ""}
+
+ECG_RESULT = {"sinan": ""}
+
+EDEMA = {"sinan": ""}
+
+ELISA = {"sinan": ""}
+
+ELISA1 = {"sinan": ""}
+
+ELISA2 = {"sinan": ""}
+
+ELI_IGG_S1 = {"sinan": ""}
+
+ELI_IGG_S2 = {"sinan": ""}
+
+ELI_IGM_S1 = {"sinan": ""}
+
+ELI_IGM_S2 = {"sinan": ""}
+
+EMAGRA = {"sinan": ""}
+
+ENCAMINHA = {"sinan": ""}
+
+ENC_ABRIGO = {"sinan": ""}
+
+ENC_CREAS = {"sinan": ""}
+
+ENC_DEAM = {"sinan": ""}
+
+ENC_DELEG = {"sinan": ""}
+
+ENC_DPCA = {"sinan": ""}
+
+ENC_ESPEC = {"sinan": ""}
+
+ENC_IML = {"sinan": ""}
+
+ENC_MPU = {"sinan": ""}
+
+ENC_MULHER = {"sinan": ""}
+
+ENC_OUTR = {"sinan": ""}
+
+ENC_SAUDE = {"sinan": ""}
+
+ENC_SENTIN = {"sinan": ""}
+
+ENC_TUTELA = {"sinan": ""}
+
+ENC_VARA = {"sinan": ""}
+
+ENDEMICO = {"sinan": ""}
+
+ENDRES = {"sinasc": ""}
+
+ENTERO = {"sinan": ""}
+
+ENTO_ANIMA = {"sinan": ""}
+
+ENTO_CAO = {"sinan": ""}
+
+ENTO_CAPTU = {"sinan": ""}
+
+ENTO_EQUIN = {"sinan": ""}
+
+ENTO_EXIST = {"sinan": ""}
+
+ENTO_EXI_1 = {"sinan": ""}
+
+ENTO_EXI_2 = {"sinan": ""}
+
+ENTO_EXI_3 = {"sinan": ""}
+
+ENTO_EXI_4 = {"sinan": ""}
+
+ENTO_EXTRA = {"sinan": ""}
+
+ENTO_FLEBO = {"sinan": ""}
+
+ENTO_INSET = {"sinan": ""}
+
+ENTO_INTRA = {"sinan": ""}
+
+ENTO_LOCAL = {"sinan": ""}
+
+ENTO_OUTRO = {"sinan": ""}
+
+ENTO_PERID = {"sinan": ""}
+
+ENTO_PROXI = {"sinan": ""}
+
+ENTO_TRANS = {"sinan": ""}
+
+ENTRADA = {"sinan": ""}
+
+EPICUTA = {"sinan": ""}
+
+EPISTAXE = {"sinan": ""}
+
+EPIS_RACIO = {"sinan": ""}
+
+EPI_PESTE = {"sinan": ""}
+
+EQBRALTA = {"cnes": ""}
+
+EQBRBAIX = {"cnes": ""}
+
+EQBRMEDI = {"cnes": ""}
+
+EQDOSCLI = {"cnes": ""}
+
+EQFONSEL = {"cnes": ""}
+
+EQSISPLN = {"cnes": ""}
+
+EQUINOS = {"sinan": ""}
+
+EQ_MAREA = {"cnes": ""}
+
+EQ_MINDI = {"cnes": ""}
+
+ESC = {"sim": ""}
+
+ESC2010 = {"sim": ""}
+
+ESCFALAGR1 = {"sim": ""}
+
+ESCMAE = {
+ "sim": "",
+ "sinasc": "",
+}
+
+ESCMAE2010 = {
+ "sim": "",
+ "sinasc": "",
+}
+
+ESCMAEAGR1 = {
+ "sim": "",
+ "sinasc": "",
+}
+
+ESCOLA = {"cnes": ""}
+
+ESCOLARID = {"ibge": ""}
+
+ESCOLMAE = {"sinan": ""}
+
+ESCOLMAE_N = {"sinan": ""}
+
+ESC_MAE_N = {"sinan": ""}
+
+ESFERA_A = {"cnes": ""}
+
+ESPEC = {
+ "ciha": "",
+ "sih": "",
+}
+
+ESPECIE = {"sinan": ""}
+
+ESPECIE_N = {"sinan": ""}
+
+ESPECIFICO = {"sinan": ""}
+
+ESPLENO = {"sinan": ""}
+
+ESPLENOM = {"sinan": ""}
+
+ESP_OUT = {"sinan": ""}
+
+ESQ_ATU_N = {"sinan": ""}
+
+ESQ_INI_N = {"sinan": ""}
+
+ESTABDESCR = {"sim": ""}
+
+ESTAB_OCOR = {"sinasc": ""}
+
+ESTCIV = {"sim": ""}
+
+ESTCIVIL = {"sim": ""}
+
+ESTCIVMAE = {"sinasc": ""}
+
+ESTREPTOMI = {"sinan": ""}
+
+ETAMBUTOL = {"sinan": ""}
+
+ETIOL_OUTR = {"sinan": ""}
+
+ETIONAMIDA = {"sinan": ""}
+
+ETNIA = {
+ "sia": "",
+ "sih": "",
+ "sim": "",
+ "sinasc": "",
+}
+
+EVIDENCIA = {"sinan": ""}
+
+EVOLUCAO = {"sinan": ""}
+
+EVOL_AFAST = {"sinan": ""}
+
+EVOR1_DT_R = {"sinan": ""}
+
+EVOR_A_MID = {"sinan": ""}
+
+EVOR_A_MIE = {"sinan": ""}
+
+EVOR_A_MSD = {"sinan": ""}
+
+EVOR_A_MSE = {"sinan": ""}
+
+EVOR_DT_RE = {"sinan": ""}
+
+EVOR_F_MID = {"sinan": ""}
+
+EVOR_F_MIE = {"sinan": ""}
+
+EVOR_F_MSD = {"sinan": ""}
+
+EVOR_F_MSE = {"sinan": ""}
+
+EVOR_RC_ED = {"sinan": ""}
+
+EVOR_RC_EE = {"sinan": ""}
+
+EVOR_RC_FD = {"sinan": ""}
+
+EVOR_RC_FE = {"sinan": ""}
+
+EVOR_S_FAC = {"sinan": ""}
+
+EVOR_S_MID = {"sinan": ""}
+
+EVOR_S_MIE = {"sinan": ""}
+
+EVOR_S_MSD = {"sinan": ""}
+
+EVOR_S_MSE = {"sinan": ""}
+
+EVO_DIAG = {"sinan": ""}
+
+EVO_DIAG_N = {"sinan": ""}
+
+EVO_DT_OBI = {"sinan": ""}
+
+EVO_OUTR = {"sinan": ""}
+
+EXAME = {
+ "sim": "",
+ "sinan": "",
+}
+
+EXANTEMA = {"sinan": ""}
+
+EXPDIFDATA = {"sim": ""}
+
+EXPO_N = {"sinan": ""}
+
+EXTRAPU1_N = {"sinan": ""}
+
+EXTRAPU2_N = {"sinan": ""}
+
+EXTRAPUL_O = {"sinan": ""}
+
+FACIAL = {"sinan": ""}
+
+FAEC_TP = {"sih": ""}
+
+FALA = {"sinan": ""}
+
+FALENCIA = {"sinan": ""}
+
+FC_CONTATO = {"sinan": ""}
+
+FC_CONT_DE = {"sinan": ""}
+
+FEBRE = {"sinan": ""}
+
+FEN_HEMORR = {"sinan": ""}
+
+FERIMENTO = {"sinan": ""}
+
+FERIMENT_N = {"sinan": ""}
+
+FEZES = {"sinan": ""}
+
+FIGADO = {"sinan": ""}
+
+FILHMORT = {"sim": ""}
+
+FILHVIVOS = {"sim": ""}
+
+FIL_ABORT = {"sinasc": ""}
+
+FIL_MORTOS = {"sinasc": ""}
+
+FIL_VIVOS = {"sinasc": ""}
+
+FIM = {"sia": ""}
+
+FIM_ANIMAL = {"sinan": ""}
+
+FINANC = {"sih": ""}
+
+FISCALIZA = {"sinan": ""}
+
+FLOGISTICO = {"sinan": ""}
+
+FLUXO_AERE = {"sinan": ""}
+
+FLXRECEBI = {"sinan": ""}
+
+FOI_MATA = {"sinan": ""}
+
+FONTE = {
+ "ciha": "",
+ "sim": "",
+ "sinan": "",
+}
+
+FONTEINV = {"sim": ""}
+
+FONTES = {"sim": ""}
+
+FONTESINF = {"sim": ""}
+
+FONTE_ORC = {"sih": ""}
+
+FONTINFO = {"sim": ""}
+
+FORMA = {"sinan": ""}
+
+FORMACLINI = {"sinan": ""}
+
+FORMA_CO = {"sinan": ""}
+
+FORMA_TF = {"sinan": ""}
+
+FORMA_TI = {"sinan": ""}
+
+FORMA_TS = {"sinan": ""}
+
+FORMA_TT = {"sinan": ""}
+
+FO_ANT_HBC = {"sinan": ""}
+
+FO_ANT_HCV = {"sinan": ""}
+
+FO_ANT_HIV = {"sinan": ""}
+
+FO_HBSAG = {"sinan": ""}
+
+FRAQUEZA = {"sinan": ""}
+
+FUMA = {"sinan": ""}
+
+FXETARIA = {"ibge": ""}
+
+FX_ETARIA = {"pni": ""}
+
+F_AREIA = {"cnes": ""}
+
+F_CARVAO = {"cnes": ""}
+
+GANGLIOS = {"sinan": ""}
+
+GASES = {"sinan": ""}
+
+GENGIVO = {"sinan": ""}
+
+GENOT_G = {"sinan": ""}
+
+GENOT_P = {"sinan": ""}
+
+GEN_VHC = {"sinan": ""}
+
+GESPRG1E = {"cnes": ""}
+
+GESPRG1M = {"cnes": ""}
+
+GESPRG2E = {"cnes": ""}
+
+GESPRG2M = {"cnes": ""}
+
+GESPRG3E = {"cnes": ""}
+
+GESPRG3M = {"cnes": ""}
+
+GESPRG4E = {"cnes": ""}
+
+GESPRG4M = {"cnes": ""}
+
+GESPRG5E = {"cnes": ""}
+
+GESPRG5M = {"cnes": ""}
+
+GESPRG6E = {"cnes": ""}
+
+GESPRG6M = {"cnes": ""}
+
+GESTACAO = {
+ "sim": "",
+ "sinasc": "",
+}
+
+GESTANTE = {"sinan": ""}
+
+GESTAO = {
+ "ciha": "",
+ "sia": "",
+ "sih": "",
+}
+
+GESTOR_COD = {"sih": ""}
+
+GESTOR_CPF = {"sih": ""}
+
+GESTOR_DT = {"sih": ""}
+
+GESTOR_TP = {"sih": ""}
+
+GESTRISCO = {"sih": ""}
+
+GLAUCOMA = {"sinan": ""}
+
+GRAVIDEZ = {
+ "sim": "",
+ "sinasc": "",
+}
+
+GRAV_AST = {"sinan": ""}
+
+GRAV_CONSC = {"sinan": ""}
+
+GRAV_CONV = {"sinan": ""}
+
+GRAV_ENCH = {"sinan": ""}
+
+GRAV_EXTRE = {"sinan": ""}
+
+GRAV_HEMAT = {"sinan": ""}
+
+GRAV_HIPOT = {"sinan": ""}
+
+GRAV_INSUF = {"sinan": ""}
+
+GRAV_MELEN = {"sinan": ""}
+
+GRAV_METRO = {"sinan": ""}
+
+GRAV_MIOC = {"sinan": ""}
+
+GRAV_ORGAO = {"sinan": ""}
+
+GRAV_PULSO = {"sinan": ""}
+
+GRAV_SANG = {"sinan": ""}
+
+GRAV_TAQUI = {"sinan": ""}
+
+G_D = {"sinan": ""}
+
+G_M = {"sinan": ""}
+
+HANSENIASE = {"sinan": ""}
+
+HAV = {"sinan": ""}
+
+HA_PAUSA = {"sinan": ""}
+
+HBC_TOTAL = {"sinan": ""}
+
+HBSAG = {"sinan": ""}
+
+HBV = {"sinan": ""}
+
+HCV = {"sinan": ""}
+
+HDV = {"sinan": ""}
+
+HEMATOLOG = {"sinan": ""}
+
+HEMATURA = {"sinan": ""}
+
+HEMA_MAIOR = {"sinan": ""}
+
+HEMA_MENOR = {"sinan": ""}
+
+HEMO = {"sinan": ""}
+
+HEMOCULT = {"sinan": ""}
+
+HEMODIALIS = {"sinan": ""}
+
+HEMORRAG = {"sinan": ""}
+
+HEMORRAGI = {"sinan": ""}
+
+HEMOTERA = {"cnes": ""}
+
+HEMO_D_1 = {"sinan": ""}
+
+HEMO_D_2 = {"sinan": ""}
+
+HEMO_D_3 = {"sinan": ""}
+
+HEMO_IGG = {"sinan": ""}
+
+HEMO_IGM = {"sinan": ""}
+
+HEMO_R1 = {"sinan": ""}
+
+HEMO_R2 = {"sinan": ""}
+
+HEMO_R3 = {"sinan": ""}
+
+HEM_IGG_S1 = {"sinan": ""}
+
+HEM_IGG_S2 = {"sinan": ""}
+
+HEM_IGM_S1 = {"sinan": ""}
+
+HEM_IGM_S2 = {"sinan": ""}
+
+HEPAESPLE = {"sinan": ""}
+
+HEPATITA = {"sinan": ""}
+
+HEPATITB = {"sinan": ""}
+
+HEPATITE_N = {"sinan": ""}
+
+HEPATO = {"sinan": ""}
+
+HEPATOME = {"sinan": ""}
+
+HEPATOPAT = {"sinan": ""}
+
+HEPA_ESP = {"sinan": ""}
+
+HERBIV_DES = {"sinan": ""}
+
+HEV = {"sinan": ""}
+
+HIDROCARBO = {"sinan": ""}
+
+HIDROFOBI = {"sinan": ""}
+
+HIPEREMIA = {"sinan": ""}
+
+HIPERTEN = {"sinan": ""}
+
+HIPERTENSA = {"sinan": ""}
+
+HIPOREXIA = {"sinan": ""}
+
+HIPOTENSAO = {"sinan": ""}
+
+HISTOLOG_N = {"sinan": ""}
+
+HISTOPA = {"sinan": ""}
+
+HISTOPATO = {"sinan": ""}
+
+HISTOPATOL = {"sinan": ""}
+
+HISTOPA_N = {"sinan": ""}
+
+HISTORIA = {"sinan": ""}
+
+HIV = {"sinan": ""}
+
+HOMONIMO = {
+ "ciha": "",
+ "sih": "",
+}
+
+HORAHOSP = {"cnes": ""}
+
+HORANASC = {"sinasc": ""}
+
+HORAOBITO = {"sim": ""}
+
+HORAOUTR = {"cnes": ""}
+
+HORA_ACID = {"sinan": ""}
+
+HORA_AMB = {"cnes": ""}
+
+HORA_JOR = {"sinan": ""}
+
+HORA_OCOR = {"sinan": ""}
+
+HORMONIO = {"sinan": ""}
+
+HOSPITAL = {"sinan": ""}
+
+HOSPITALIZ = {"sinan": ""}
+
+HOSP_NSUS = {"cnes": ""}
+
+HOSP_SUS = {"cnes": ""}
+
+ICTERICIA = {"sinan": ""}
+
+IDADE = {
+ "ciha": "",
+ "ibge": "",
+ "sih": "",
+ "sim": "",
+}
+
+IDADEMAE = {
+ "sim": "",
+ "sinan": "",
+ "sinasc": "",
+}
+
+IDADEMAX = {"sia": ""}
+
+IDADEMIN = {"sia": ""}
+
+IDADEPAC = {"sia": ""}
+
+IDADEPAI = {"sinasc": ""}
+
+IDADE_MAE = {
+ "sinan": "",
+ "sinasc": "",
+}
+
+IDANOMAL = {"sinasc": ""}
+
+IDENT = {"sih": ""}
+
+IDENT_GEN = {"sinan": ""}
+
+IDENT_MICR = {"sinan": ""}
+
+IDEQUIPE = {"cnes": ""}
+
+ID_AGRAVO = {"sinan": ""}
+
+ID_AREA = {"cnes": ""}
+
+ID_ARTRALG = {"sinan": ""}
+
+ID_CNS_SUS = {"sinan": ""}
+
+ID_CONJUNT = {"sinan": ""}
+
+ID_CORIZA = {"sinan": ""}
+
+ID_DG_DES = {"sinan": ""}
+
+ID_DG_NOT = {"sinan": ""}
+
+ID_DT_RESI = {"sinan": ""}
+
+ID_ETIOLOG = {"sinan": ""}
+
+ID_EV_NOT = {"sinan": ""}
+
+ID_GANGLIO = {"sinan": ""}
+
+ID_HOSPIT = {"sinan": ""}
+
+ID_LIQUOR = {"sinan": ""}
+
+ID_MN_OCOR = {"sinan": ""}
+
+ID_MN_RESI = {"sinan": ""}
+
+ID_MUNICIP = {"sinan": ""}
+
+ID_MUNIC_2 = {"sinan": ""}
+
+ID_MUNIC_A = {"sinan": ""}
+
+ID_MUNI_AT = {"sinan": ""}
+
+ID_MUNI_RE = {"sinan": ""}
+
+ID_NOTIFIC = {"sinan": ""}
+
+ID_OCUPACA = {"sinan": ""}
+
+ID_OCUPA_N = {"sinan": ""}
+
+ID_OCUP_MA = {"sinan": ""}
+
+ID_PAIS = {"sinan": ""}
+
+ID_REGIONA = {"sinan": ""}
+
+ID_RETRO = {"sinan": ""}
+
+ID_RE_IGG = {"sinan": ""}
+
+ID_RE_IGG_ = {"sinan": ""}
+
+ID_RE_IGM = {"sinan": ""}
+
+ID_RE_IGM_ = {"sinan": ""}
+
+ID_RE_IG_1 = {"sinan": ""}
+
+ID_RE_IG_2 = {"sinan": ""}
+
+ID_RG_RESI = {"sinan": ""}
+
+ID_S1_IGG = {"sinan": ""}
+
+ID_S1_IGG_ = {"sinan": ""}
+
+ID_S1_IGM = {"sinan": ""}
+
+ID_S1_IGM_ = {"sinan": ""}
+
+ID_S1_IG_1 = {"sinan": ""}
+
+ID_S1_IG_2 = {"sinan": ""}
+
+ID_S2_IGG = {"sinan": ""}
+
+ID_S2_IGG_ = {"sinan": ""}
+
+ID_S2_IGM = {"sinan": ""}
+
+ID_S2_IGM_ = {"sinan": ""}
+
+ID_S2_IG_1 = {"sinan": ""}
+
+ID_S2_IG_2 = {"sinan": ""}
+
+ID_SANGUE = {"sinan": ""}
+
+ID_SECRECA = {"sinan": ""}
+
+ID_SEGM = {"cnes": ""}
+
+ID_TOSSE = {"sinan": ""}
+
+ID_UNIDADE = {"sinan": ""}
+
+ID_UNID_AT = {"sinan": ""}
+
+ID_URINA = {"sinan": ""}
+
+IFI = {"sinan": ""}
+
+IGG_S1 = {"sinan": ""}
+
+IGG_S2 = {"sinan": ""}
+
+IGG_T2 = {"sinan": ""}
+
+IGM_S1 = {"sinan": ""}
+
+IGM_S2 = {"sinan": ""}
+
+IGM_T1 = {"sinan": ""}
+
+IMPLANTA = {"sinan": ""}
+
+IMUNO = {
+ "pni": "",
+ "sinan": "",
+}
+
+IMUNOH = {"sinan": ""}
+
+IMUNOHIST = {"sinan": ""}
+
+IMUNOH_N = {"sinan": ""}
+
+IMUNO_DIRE = {"sinan": ""}
+
+IMUNO_INDI = {"sinan": ""}
+
+IMU_HEP_B = {"sinan": ""}
+
+IMU_IGG_S1 = {"sinan": ""}
+
+IMU_IGG_S2 = {"sinan": ""}
+
+IMU_IGM_S1 = {"sinan": ""}
+
+IMU_IGM_S2 = {"sinan": ""}
+
+INAL_CRACK = {"sinan": ""}
+
+INDIGENA = {"cnes": ""}
+
+INDIVIDUAL = {"sinan": ""}
+
+IND_NSUS = {"cnes": ""}
+
+IND_SUS = {"cnes": ""}
+
+IND_VDRL = {"sih": ""}
+
+INESPECIF = {"sinan": ""}
+
+INFAN_JUV = {"sinan": ""}
+
+INFECCIOSO = {"sinan": ""}
+
+INFEHOSP = {"sih": ""}
+
+INFERIORES = {"sinan": ""}
+
+INFILTRA = {"sinan": ""}
+
+INICIO = {"sia": ""}
+
+INJETAVEIS = {"sinan": ""}
+
+INSC_PN = {"sih": ""}
+
+INSTITUCIO = {"sinan": ""}
+
+INSTRMAE = {"sim": ""}
+
+INSTRPAI = {"sim": ""}
+
+INSTRU = {"sih": ""}
+
+INSTRUCAO = {"sim": ""}
+
+INSTR_MAE = {"sinasc": ""}
+
+INSUFICIEN = {"sinan": ""}
+
+INTOX_CHUM = {"sinan": ""}
+
+INTOX_MERC = {"sinan": ""}
+
+INTOX_META = {"sinan": ""}
+
+INT_TEMPO = {"sinan": ""}
+
+IN_AIDS = {"sinan": ""}
+
+IN_TP_VAL = {"sih": ""}
+
+IN_VINCULA = {"sinan": ""}
+
+IONIZANTES = {"sinan": ""}
+
+ISOLAMENTO = {"sinan": ""}
+
+ISONIAZIDA = {"sinan": ""}
+
+KOTELCHUCK = {"sinasc": ""}
+
+LABC_DT = {"sinan": ""}
+
+LABC_DT_1 = {"sinan": ""}
+
+LABC_DT_2 = {"sinan": ""}
+
+LABC_EVIDE = {"sinan": ""}
+
+LABC_IGG = {"sinan": ""}
+
+LABC_LIQUO = {"sinan": ""}
+
+LABC_LIQ_1 = {"sinan": ""}
+
+LABC_SANGU = {"sinan": ""}
+
+LABC_TITUL = {"sinan": ""}
+
+LABC_TIT_1 = {"sinan": ""}
+
+LABC_TIT_2 = {"sinan": ""}
+
+LAB_AGLIQU = {"sinan": ""}
+
+LAB_AGSANG = {"sinan": ""}
+
+LAB_ASPECT = {"sinan": ""}
+
+LAB_ATIPIC = {"sinan": ""}
+
+LAB_BCESCA = {"sinan": ""}
+
+LAB_BCLESA = {"sinan": ""}
+
+LAB_BCLIQU = {"sinan": ""}
+
+LAB_BCSANG = {"sinan": ""}
+
+LAB_BD = {"sinan": ""}
+
+LAB_BI = {"sinan": ""}
+
+LAB_BILATE = {"sinan": ""}
+
+LAB_BT = {"sinan": ""}
+
+LAB_CELEBR = {"sinan": ""}
+
+LAB_CILIQU = {"sinan": ""}
+
+LAB_CISANG = {"sinan": ""}
+
+LAB_CLOR = {"sinan": ""}
+
+LAB_COLHEU = {"sinan": ""}
+
+LAB_CONF = {"sinan": ""}
+
+LAB_CONFIR = {"sinan": ""}
+
+LAB_CON_F = {"sinan": ""}
+
+LAB_CREATI = {"sinan": ""}
+
+LAB_CTESCA = {"sinan": ""}
+
+LAB_CTLESA = {"sinan": ""}
+
+LAB_CTLIQU = {"sinan": ""}
+
+LAB_CTSANG = {"sinan": ""}
+
+LAB_CULTUR = {"sinan": ""}
+
+LAB_DATA_C = {"sinan": ""}
+
+LAB_DERRAM = {"sinan": ""}
+
+LAB_DIFUSO = {"sinan": ""}
+
+LAB_DT3 = {"sinan": ""}
+
+LAB_DTPUNC = {"sinan": ""}
+
+LAB_DT_1 = {"sinan": ""}
+
+LAB_DT_2 = {"sinan": ""}
+
+LAB_DT_3 = {"sinan": ""}
+
+LAB_DT_C1 = {"sinan": ""}
+
+LAB_DT_CEN = {"sinan": ""}
+
+LAB_DT_E_1 = {"sinan": ""}
+
+LAB_DT_F1 = {"sinan": ""}
+
+LAB_DT_L_1 = {"sinan": ""}
+
+LAB_DT_L_2 = {"sinan": ""}
+
+LAB_DT_NLE = {"sinan": ""}
+
+LAB_DT_R1 = {"sinan": ""}
+
+LAB_DT_RE1 = {"sinan": ""}
+
+LAB_ELIS_1 = {"sinan": ""}
+
+LAB_ELIS_2 = {"sinan": ""}
+
+LAB_EOSI = {"sinan": ""}
+
+LAB_ESFR = {"sinan": ""}
+
+LAB_E_D_1 = {"sinan": ""}
+
+LAB_GLICO = {"sinan": ""}
+
+LAB_HEMA = {"sinan": ""}
+
+LAB_HEMATO = {"sinan": ""}
+
+LAB_HEMA_N = {"sinan": ""}
+
+LAB_HEMO = {"sinan": ""}
+
+LAB_HISTOP = {"sinan": ""}
+
+LAB_IGG = {"sinan": ""}
+
+LAB_IGG_R = {"sinan": ""}
+
+LAB_IGM = {"sinan": ""}
+
+LAB_IGM_R = {"sinan": ""}
+
+LAB_IMUNO = {"sinan": ""}
+
+LAB_INTEST = {"sinan": ""}
+
+LAB_IRM = {"sinan": ""}
+
+LAB_ISFEZE = {"sinan": ""}
+
+LAB_ISLIQU = {"sinan": ""}
+
+LAB_LEUCO = {"sinan": ""}
+
+LAB_LEUC_N = {"sinan": ""}
+
+LAB_LINFO = {"sinan": ""}
+
+LAB_LOCAL = {"sinan": ""}
+
+LAB_L_CEL1 = {"sinan": ""}
+
+LAB_L_CEL2 = {"sinan": ""}
+
+LAB_L_CL1 = {"sinan": ""}
+
+LAB_L_CL2 = {"sinan": ""}
+
+LAB_L_C_DE = {"sinan": ""}
+
+LAB_L_GLI1 = {"sinan": ""}
+
+LAB_L_GLI2 = {"sinan": ""}
+
+LAB_L_LIN1 = {"sinan": ""}
+
+LAB_L_LIN2 = {"sinan": ""}
+
+LAB_L_OUT = {"sinan": ""}
+
+LAB_L_PRO1 = {"sinan": ""}
+
+LAB_L_PRO2 = {"sinan": ""}
+
+LAB_L_S_DE = {"sinan": ""}
+
+LAB_MACRO = {"sinan": ""}
+
+LAB_MATE_N = {"sinan": ""}
+
+LAB_MEDULA = {"sinan": ""}
+
+LAB_METODO = {"sinan": ""}
+
+LAB_MET_D = {"sinan": ""}
+
+LAB_MICRO = {"sinan": ""}
+
+LAB_MICRON = {"sinan": ""}
+
+LAB_MICR_1 = {"sinan": ""}
+
+LAB_MICR_2 = {"sinan": ""}
+
+LAB_MONO = {"sinan": ""}
+
+LAB_NEUTRO = {"sinan": ""}
+
+LAB_OUTRO = {"sinan": ""}
+
+LAB_OUT_D = {"sinan": ""}
+
+LAB_OUT_E = {"sinan": ""}
+
+LAB_PARASI = {"sinan": ""}
+
+LAB_PARTO = {"sinan": ""}
+
+LAB_PCESCA = {"sinan": ""}
+
+LAB_PCLESA = {"sinan": ""}
+
+LAB_PCLIQU = {"sinan": ""}
+
+LAB_PCR_1 = {"sinan": ""}
+
+LAB_PCR_2 = {"sinan": ""}
+
+LAB_PCR_3 = {"sinan": ""}
+
+LAB_PCSANG = {"sinan": ""}
+
+LAB_PLAQUE = {"sinan": ""}
+
+LAB_POTASS = {"sinan": ""}
+
+LAB_PROD1 = {"sinan": ""}
+
+LAB_PROD2 = {"sinan": ""}
+
+LAB_PROT = {"sinan": ""}
+
+LAB_PROVAS = {"sinan": ""}
+
+LAB_PUNCAO = {"sinan": ""}
+
+LAB_Q_F = {"sinan": ""}
+
+LAB_RADIOL = {"sinan": ""}
+
+LAB_REALIZ = {"sinan": ""}
+
+LAB_RESULT = {"sinan": ""}
+
+LAB_RES_B = {"sinan": ""}
+
+LAB_RES_F1 = {"sinan": ""}
+
+LAB_RES_F2 = {"sinan": ""}
+
+LAB_RES_F3 = {"sinan": ""}
+
+LAB_RTPCR = {"sinan": ""}
+
+LAB_R_1 = {"sinan": ""}
+
+LAB_R_2 = {"sinan": ""}
+
+LAB_SORO = {"sinan": ""}
+
+LAB_SOROAG = {"sinan": ""}
+
+LAB_SOR_DE = {"sinan": ""}
+
+LAB_S_1 = {"sinan": ""}
+
+LAB_S_2 = {"sinan": ""}
+
+LAB_S_3 = {"sinan": ""}
+
+LAB_S_4 = {"sinan": ""}
+
+LAB_S_5 = {"sinan": ""}
+
+LAB_TGO = {"sinan": ""}
+
+LAB_TGO_D = {"sinan": ""}
+
+LAB_TGP = {"sinan": ""}
+
+LAB_TGP_D = {"sinan": ""}
+
+LAB_TITU_2 = {"sinan": ""}
+
+LAB_TRIAGE = {"sinan": ""}
+
+LAB_TROMBO = {"sinan": ""}
+
+LAB_UF = {"sinan": ""}
+
+LAB_UREIA = {"sinan": ""}
+
+LAB_VACINA = {"sinan": ""}
+
+LAB_VAC_DE = {"sinan": ""}
+
+LACO = {"sinan": ""}
+
+LACO_N = {"sinan": ""}
+
+LAMBEDURA = {"sinan": ""}
+
+LAVOURA = {"sinan": ""}
+
+LEITE = {"sinan": ""}
+
+LEITHOSP = {"cnes": ""}
+
+LESAO = {"sinan": ""}
+
+LESAO_CORP = {"sinan": ""}
+
+LESAO_DES = {"sinan": ""}
+
+LESAO_ESPE = {"sinan": ""}
+
+LESAO_NAT = {"sinan": ""}
+
+LESOES = {"sinan": ""}
+
+LES_AUTOP = {"sinan": ""}
+
+LEUCOPENIA = {"sinan": ""}
+
+LIMITA_MOV = {"sinan": ""}
+
+LINFADENO = {"sinan": ""}
+
+LINHAA = {"sim": ""}
+
+LINHAB = {"sim": ""}
+
+LINHAC = {"sim": ""}
+
+LINHAD = {"sim": ""}
+
+LINHAII = {"sim": ""}
+
+LOCACID = {"sim": ""}
+
+LOCAL_ACID = {"sinan": ""}
+
+LOCAL_ESPE = {"sinan": ""}
+
+LOCAL_OCOR = {
+ "sinan": "",
+ "sinasc": "",
+}
+
+LOCA_MID_N = {"sinan": ""}
+
+LOCA_MIE_N = {"sinan": ""}
+
+LOCA_MSD_N = {"sinan": ""}
+
+LOCA_MSE_N = {"sinan": ""}
+
+LOCNASC = {"sinasc": ""}
+
+LOCOCOR = {"sim": ""}
+
+LOC_EXPO = {"sinan": ""}
+
+LOC_EXP_DE = {"sinan": ""}
+
+LOC_INF = {"sinan": ""}
+
+LOC_REALIZ = {"sia": ""}
+
+LOTE1 = {"sinan": ""}
+
+LOTE2 = {"sinan": ""}
+
+LOTE_VAC = {"sinan": ""}
+
+LUVA = {"sinan": ""}
+
+MAECHAGA = {"sinan": ""}
+
+MAIS_6HS = {"sinan": ""}
+
+MAIS_TRAB = {"sinan": ""}
+
+MANIFESTA = {"sinan": ""}
+
+MANIPULA = {"sinan": ""}
+
+MANI_HEMOR = {"sinan": ""}
+
+MAOS_N = {"sinan": ""}
+
+MAPORTAR = {"cnes": ""}
+
+MAQ_OUTR = {"cnes": ""}
+
+MAQ_PROP = {"cnes": ""}
+
+MARCA_UCI = {"sih": ""}
+
+MARCA_UTI = {"sih": ""}
+
+MASCARA = {"sinan": ""}
+
+MATBIOLOGI = {"sinan": ""}
+
+MATERIAL = {"sinan": ""}
+
+MAT_ORG = {"sinan": ""}
+
+MAT_ORG_DE = {"sinan": ""}
+
+MAX_INC = {"sinan": ""}
+
+MAX_ST_INC = {"sinan": ""}
+
+MCLI_LOCAL = {"sinan": ""}
+
+MCLI_SIST = {"sinan": ""}
+
+MEDICA = {"sinan": ""}
+
+MEDICAMENT = {"sinan": ""}
+
+MED_BLOQUE = {"sinan": ""}
+
+MED_CASO_S = {"sinan": ""}
+
+MED_CONTR = {"sinan": ""}
+
+MED_DT_EVO = {"sinan": ""}
+
+MED_DT_QUI = {"sinan": ""}
+
+MED_IDEN_C = {"sinan": ""}
+
+MED_MATERI = {"sinan": ""}
+
+MED_NUCOMU = {"sinan": ""}
+
+MED_OUTRO = {"sinan": ""}
+
+MED_PREVEN = {"sinan": ""}
+
+MED_QUAN_C = {"sinan": ""}
+
+MED_QUAN_M = {"sinan": ""}
+
+MED_QUAN_P = {"sinan": ""}
+
+MED_QUIMIO = {"sinan": ""}
+
+MEFLOQ = {"sinan": ""}
+
+MENINGO = {"sinan": ""}
+
+MENINGOE = {"sinan": ""}
+
+MENOR_5ANO = {"sinan": ""}
+
+MENOS_MOV = {"sinan": ""}
+
+MENTAL = {"sinan": ""}
+
+MES = {
+ "pni": "",
+ "sih": "",
+}
+
+MESPRENAT = {"sinasc": ""}
+
+MES_CMPT = {
+ "ciha": "",
+ "sih": "",
+}
+
+METAL = {"sinan": ""}
+
+METRO = {"sinan": ""}
+
+MIALGIA = {"sinan": ""}
+
+MICRO1_S1 = {"sinan": ""}
+
+MICRO1_S_2 = {"sinan": ""}
+
+MICRO1_T_1 = {"sinan": ""}
+
+MICRO1_T_2 = {"sinan": ""}
+
+MICRO2_S1 = {"sinan": ""}
+
+MICRO2_S_2 = {"sinan": ""}
+
+MICRO2_T_1 = {"sinan": ""}
+
+MICRO2_T_2 = {"sinan": ""}
+
+MICROCEFA = {"sinan": ""}
+
+MICRO_HEMA = {"sinan": ""}
+
+MICR_REG = {"cnes": ""}
+
+MIGRADO_W = {"sinan": ""}
+
+MINTERNA = {"sinan": ""}
+
+MIN_ACID = {"sinan": ""}
+
+MIN_JOR = {"sinan": ""}
+
+MIOCARDI = {"sinan": ""}
+
+MNDIF = {"sia": ""}
+
+MN_IND = {"sia": ""}
+
+MOAGEM_N = {"sinan": ""}
+
+MODALIDADE = {"ciha": ""}
+
+MODODETECT = {"sinan": ""}
+
+MODOENTR = {"sinan": ""}
+
+MORDEDURA = {"sinan": ""}
+
+MORTE = {
+ "ciha": "",
+ "sih": "",
+}
+
+MORTEPARTO = {"sim": ""}
+
+MOTDESAT = {"cnes": ""}
+
+MOT_COB = {"sia": ""}
+
+MPU = {"sinan": ""}
+
+MTRANSFU = {"sinan": ""}
+
+MUCOSA = {"sinan": ""}
+
+MUDA_TRAB = {"sinan": ""}
+
+MUNCOD = {"ibge": ""}
+
+MUNIC = {"pni": ""}
+
+MUNICIPIO = {"sinan": ""}
+
+MUNIC_LOC = {"sih": ""}
+
+MUNIC_MOV = {
+ "ciha": "",
+ "sih": "",
+}
+
+MUNIC_RES = {
+ "ciha": "",
+ "ibge": "",
+ "sih": "",
+}
+
+MUNIOCOR = {"sim": ""}
+
+MUNIRES = {"sim": ""}
+
+MUNIRESAT = {"sinan": ""}
+
+MUNI_MAE = {"sinasc": ""}
+
+MUNI_OCOR = {"sinasc": ""}
+
+MUNPAC = {"sia": ""}
+
+MUN_1 = {"sinan": ""}
+
+MUN_2 = {"sinan": ""}
+
+MUN_3 = {"sinan": ""}
+
+MUN_ACID = {"sinan": ""}
+
+MUN_ATENDE = {"sinan": ""}
+
+MUN_DES1 = {"sinan": ""}
+
+MUN_DES2 = {"sinan": ""}
+
+MUN_DES3 = {"sinan": ""}
+
+MUN_EMP = {"sinan": ""}
+
+MUN_HOSP = {"sinan": ""}
+
+MUN_ING = {"sinan": ""}
+
+MUN_MOV = {"sih": ""}
+
+MUN_PRE_NA = {"sinan": ""}
+
+MUN_RES = {"sih": ""}
+
+MUN_TRANSF = {"sinan": ""}
+
+MUSCULAR = {"sinan": ""}
+
+NACIONAL = {
+ "ciha": "",
+ "sih": "",
+}
+
+NACION_PAC = {"sia": ""}
+
+NAO_IONIZA = {"sinan": ""}
+
+NASC = {
+ "ciha": "",
+ "sih": "",
+}
+
+NATURAL = {"sim": ""}
+
+NATURALMAE = {"sinasc": ""}
+
+NATUREZA = {
+ "ciha": "",
+ "cnes": "",
+ "sih": "",
+}
+
+NAT_JUR = {
+ "cnes": "",
+ "sia": "",
+ "sih": "",
+}
+
+NAUSEA = {"sinan": ""}
+
+NAUSEAS = {"sinan": ""}
+
+NDUPLIC = {"sinan": ""}
+
+NDUPLIC_N = {"sinan": ""}
+
+NECROPSIA = {"sim": ""}
+
+NECROSE = {"sinan": ""}
+
+NENHUM = {"sinan": ""}
+
+NEOPLASICO = {"sinan": ""}
+
+NERVOSAFET = {"sinan": ""}
+
+NIQUEL = {"sinan": ""}
+
+NIVATE_A = {"cnes": ""}
+
+NIVATE_H = {"cnes": ""}
+
+NIV_DEP = {"cnes": ""}
+
+NIV_HIER = {"cnes": ""}
+
+NM_ANTIBIO = {"sinan": ""}
+
+NM_MUNIC_H = {"sinan": ""}
+
+NM_MUN_HOS = {"sinan": ""}
+
+NM_OUT_COM = {"sinan": ""}
+
+NM_OUT_SIN = {"sinan": ""}
+
+NM_SIN_OUT = {"sinan": ""}
+
+NOCOLINF = {"sinan": ""}
+
+NOMEAREA = {"cnes": ""}
+
+NOMEFANT = {"sih": ""}
+
+NOMEPROF = {"cnes": ""}
+
+NOME_BACT = {"sinan": ""}
+
+NOME_EQP = {"cnes": ""}
+
+NOME_PARAS = {"sinan": ""}
+
+NOME_VIRUS = {"sinan": ""}
+
+NOPROPIN = {"sinan": ""}
+
+NOVO = {"sinasc": ""}
+
+NO_ATENOUT = {"sinan": ""}
+
+NO_COBOUTR = {"sinan": ""}
+
+NO_OUPARTO = {"sinan": ""}
+
+NO_OUTRAS = {"sinan": ""}
+
+NU10_19_N = {"sinan": ""}
+
+NU1_4_F_NU = {"sinan": ""}
+
+NU5_9_F_NU = {"sinan": ""}
+
+NUATEND = {"sinan": ""}
+
+NUCONSOME = {"sinan": ""}
+
+NUDIASINF = {"sim": ""}
+
+NUDIASOBCO = {"sim": ""}
+
+NUDIASOBIN = {"sim": ""}
+
+NULEITOS = {"cnes": ""}
+
+NUMCRIPOB = {"ibge": ""}
+
+NUMCRIPOBX = {"ibge": ""}
+
+NUMDESOCUP = {"ibge": ""}
+
+NUMERODN = {
+ "sim": "",
+ "sinasc": "",
+}
+
+NUMERODV = {"sinasc": ""}
+
+NUMEROLOTE = {
+ "sim": "",
+ "sinasc": "",
+}
+
+NUMEXPORT = {"sim": ""}
+
+NUMPOBRES = {"ibge": ""}
+
+NUMPOBRESX = {"ibge": ""}
+
+NUMREGCART = {
+ "sim": "",
+ "sinasc": "",
+}
+
+NUMRENDA = {"ibge": ""}
+
+NUMTRABINF = {"ibge": ""}
+
+NUM_CON_N = {"sinan": ""}
+
+NUM_DOSES = {"sinan": ""}
+
+NUM_ENVOLV = {"sinan": ""}
+
+NUM_EXPORT = {"sinasc": ""}
+
+NUM_FILHOS = {"sih": ""}
+
+NUM_PROC = {"sih": ""}
+
+NUTEMPO = {"sinan": ""}
+
+NUTEMPORIS = {"sinan": ""}
+
+NU_10_19 = {"sinan": ""}
+
+NU_10_19IG = {"sinan": ""}
+
+NU_10_19_M = {"sinan": ""}
+
+NU_1_4_IGN = {"sinan": ""}
+
+NU_1_4_NU = {"sinan": ""}
+
+NU_1_4_TOT = {"sinan": ""}
+
+NU_1_F_NU = {"sinan": ""}
+
+NU_1_IGN = {"sinan": ""}
+
+NU_1_M_NU = {"sinan": ""}
+
+NU_1_TOT_N = {"sinan": ""}
+
+NU_20_49 = {"sinan": ""}
+
+NU_20_49IG = {"sinan": ""}
+
+NU_20_49_F = {"sinan": ""}
+
+NU_20_49_N = {"sinan": ""}
+
+NU_50_F_NU = {"sinan": ""}
+
+NU_50_IGN = {"sinan": ""}
+
+NU_50_M_NU = {"sinan": ""}
+
+NU_50_TOT = {"sinan": ""}
+
+NU_5_9_IGN = {"sinan": ""}
+
+NU_5_9_NU = {"sinan": ""}
+
+NU_5_9_TOT = {"sinan": ""}
+
+NU_ABDOM_N = {"sinan": ""}
+
+NU_AFAST = {"sinan": ""}
+
+NU_AMPOLAS = {"sinan": ""}
+
+NU_AMPOL_1 = {"sinan": ""}
+
+NU_AMPOL_3 = {"sinan": ""}
+
+NU_AMPOL_4 = {"sinan": ""}
+
+NU_AMPOL_6 = {"sinan": ""}
+
+NU_AMPOL_8 = {"sinan": ""}
+
+NU_AMPOL_9 = {"sinan": ""}
+
+NU_AMPO_5 = {"sinan": ""}
+
+NU_AMPO_7 = {"sinan": ""}
+
+NU_ANO = {"sinan": ""}
+
+NU_A_ALIM = {"sinan": ""}
+
+NU_A_CLINI = {"sinan": ""}
+
+NU_A_NUM_1 = {"sinan": ""}
+
+NU_A_NUM_2 = {"sinan": ""}
+
+NU_A_NUM_3 = {"sinan": ""}
+
+NU_CASO = {"sinan": ""}
+
+NU_CASOEXA = {"sinan": ""}
+
+NU_CASOPOS = {"sinan": ""}
+
+NU_CEFAL_N = {"sinan": ""}
+
+NU_CELULA = {"sinan": ""}
+
+NU_CLI_NUM = {"sinan": ""}
+
+NU_COMU_EX = {"sinan": ""}
+
+NU_CONTATO = {"sinan": ""}
+
+NU_DIARR_N = {"sinan": ""}
+
+NU_DOSE = {"sinan": ""}
+
+NU_ENTR = {"sinan": ""}
+
+NU_ENT_DOE = {"sinan": ""}
+
+NU_FEBRE_N = {"sinan": ""}
+
+NU_F_TOT = {"sinan": ""}
+
+NU_F_TOT_N = {"sinan": ""}
+
+NU_GESTA = {"sinan": ""}
+
+NU_IDADE = {"sinan": ""}
+
+NU_IDADE_N = {"sinan": ""}
+
+NU_IGN_NU = {"sinan": ""}
+
+NU_IGRA_NU = {"sinan": ""}
+
+NU_IG_F_NU = {"sinan": ""}
+
+NU_IG_IGN = {"sinan": ""}
+
+NU_INCUB_M = {"sinan": ""}
+
+NU_INC_ME = {"sinan": ""}
+
+NU_LESOES = {"sinan": ""}
+
+NU_LOTE = {"sinan": ""}
+
+NU_LOTE_H = {"sinan": ""}
+
+NU_LOTE_I = {"sinan": ""}
+
+NU_LOTE_IA = {"sinan": ""}
+
+NU_LOTE_V = {"sinan": ""}
+
+NU_NAUSE_P = {"sinan": ""}
+
+NU_NEURO_N = {"sinan": ""}
+
+NU_NOTIFIC = {"sinan": ""}
+
+NU_NUM_2 = {"sinan": ""}
+
+NU_NUM_3 = {"sinan": ""}
+
+NU_OBITO = {"sinan": ""}
+
+NU_OUTRO_N = {"sinan": ""}
+
+NU_PA_TOT = {"sia": ""}
+
+NU_PROTEI = {"sinan": ""}
+
+NU_RESU_3 = {"sinan": ""}
+
+NU_SEMA_EP = {"sinan": ""}
+
+NU_TOT = {"sinan": ""}
+
+NU_TOT_HOS = {"sinan": ""}
+
+NU_TOT_IGN = {"sinan": ""}
+
+NU_TO_F_NU = {"sinan": ""}
+
+NU_TRAB = {"sinan": ""}
+
+NU_VOMTO_N = {"sinan": ""}
+
+NU_VPA_TOT = {"sia": ""}
+
+N_AIH = {"sih": ""}
+
+N_DIAR = {"sinan": ""}
+
+N_VOMITO = {"sinan": ""}
+
+OBITOFE1 = {"sim": ""}
+
+OBITOFE2 = {"sim": ""}
+
+OBITOGRAV = {"sim": ""}
+
+OBITOPARTO = {"sim": ""}
+
+OBITOPUERP = {"sim": ""}
+
+OBSERVACAO = {"sinan": ""}
+
+OCULOS = {"sinan": ""}
+
+OCUP = {"sim": ""}
+
+OCUPACAO = {
+ "sim": "",
+ "sinan": "",
+}
+
+OCUPACIO = {"sinan": ""}
+
+OCUPMAE = {"sim": ""}
+
+OCUPPAI = {"sim": ""}
+
+OLEOS = {"sinan": ""}
+
+OLIGURIA = {"sinan": ""}
+
+ORAL = {"sinan": ""}
+
+ORGEXPED = {"cnes": ""}
+
+ORIENT_SEX = {"sinan": ""}
+
+ORIGEM = {
+ "sim": "",
+ "sinan": "",
+ "sinasc": "",
+}
+
+ORIGEM_PAC = {"sia": ""}
+
+ORTV1050 = {"cnes": ""}
+
+ORV50150 = {"cnes": ""}
+
+OSMOSE_R = {"cnes": ""}
+
+OSSEA = {"sinan": ""}
+
+OUTRAS = {"sinan": ""}
+
+OUTRAS_DES = {"sinan": ""}
+
+OUTRA_ATIV = {"sinan": ""}
+
+OUTRA_DST = {"sinan": ""}
+
+OUTRO = {"sinan": ""}
+
+OUTROANI = {"sinan": ""}
+
+OUTROS = {"sinan": ""}
+
+OUTROS_DES = {"sinan": ""}
+
+OUTROS_ESP = {"sinan": ""}
+
+OUTROS_M = {"sinan": ""}
+
+OUTROS_M_D = {"sinan": ""}
+
+OUTRO_ARV = {"sinan": ""}
+
+OUTRO_DES = {"sinan": ""}
+
+OUTRO_DOE = {"sinan": ""}
+
+OUTRO_ESP = {"sinan": ""}
+
+OUTRO_EX = {"sinan": ""}
+
+OUTRO_EXP = {"sinan": ""}
+
+OUTRO_S = {"sinan": ""}
+
+OUTRO_SIN = {"sinan": ""}
+
+OUTRO_S_D = {"sinan": ""}
+
+OUTR_ATI_D = {"sinan": ""}
+
+OUTR_D1 = {"sinan": ""}
+
+OUTR_D2 = {"sinan": ""}
+
+OUTR_D3 = {"sinan": ""}
+
+OUTR_R1 = {"sinan": ""}
+
+OUTR_R2 = {"sinan": ""}
+
+OUTR_R3 = {"sinan": ""}
+
+OUT_AGENTE = {"sinan": ""}
+
+OUT_AGRAVO = {"sinan": ""}
+
+OUT_ARV_ES = {"sinan": ""}
+
+OUT_CONTAT = {"sinan": ""}
+
+OUT_DOE_DE = {"sinan": ""}
+
+OUT_EXAME = {"sinan": ""}
+
+OUT_EXP_DE = {"sinan": ""}
+
+OUT_MEDIC = {"sinan": ""}
+
+OUT_TRAT = {"cnes": ""}
+
+OUT_VEZES = {"sinan": ""}
+
+OUT_VINCUL = {"sinan": ""}
+
+OUT_VIRUS = {"sinan": ""}
+
+OV150500 = {"cnes": ""}
+
+PAIS_EXP = {"sinan": ""}
+
+PALIDEZ = {"sinan": ""}
+
+PALQ_MAIOR = {"sinan": ""}
+
+PARALISIA = {"sinan": ""}
+
+PARASITA = {"sinan": ""}
+
+PARASITO = {"sinan": ""}
+
+PARESTESI = {"sinan": ""}
+
+PARIDADE = {"sinasc": ""}
+
+PARTO = {
+ "sim": "",
+ "sinasc": "",
+}
+
+PART_CORP1 = {"sinan": ""}
+
+PART_CORP2 = {"sinan": ""}
+
+PART_CORP3 = {"sinan": ""}
+
+PAR_ANTIDU = {"sinan": ""}
+
+PAR_DT_PAR = {"sinan": ""}
+
+PAR_EVOLUC = {"sinan": ""}
+
+PAR_INICPR = {"sinan": ""}
+
+PAR_TIPO = {"sinan": ""}
+
+PAR_UFPART = {"sinan": ""}
+
+PA_ALTA = {"sia": ""}
+
+PA_AUTORIZ = {"sia": ""}
+
+PA_CATEND = {"sia": ""}
+
+PA_CBOCOD = {"sia": ""}
+
+PA_CID = {"sia": ""}
+
+PA_CIDCAS = {"sia": ""}
+
+PA_CIDPRI = {"sia": ""}
+
+PA_CIDSEC = {"sia": ""}
+
+PA_CLASS_S = {"sia": ""}
+
+PA_CMP = {"sia": ""}
+
+PA_CNPJCPF = {"sia": ""}
+
+PA_CNPJMNT = {"sia": ""}
+
+PA_CNPJ_CC = {"sia": ""}
+
+PA_CNSMED = {"sia": ""}
+
+PA_CODESP = {"sia": ""}
+
+PA_CODOCO = {"sia": ""}
+
+PA_CODPRO = {"sia": ""}
+
+PA_CODUNI = {"sia": ""}
+
+PA_CONDIC = {"sia": ""}
+
+PA_DATPR = {"sia": ""}
+
+PA_DATREF = {"sia": ""}
+
+PA_DES1 = {"sinan": ""}
+
+PA_DES2 = {"sinan": ""}
+
+PA_DES3 = {"sinan": ""}
+
+PA_DIF_VAL = {"sia": ""}
+
+PA_DOCORIG = {"sia": ""}
+
+PA_ENCERR = {"sia": ""}
+
+PA_EQUIPE = {"sia": ""}
+
+PA_ETNIA = {"sia": ""}
+
+PA_FLER = {"sia": ""}
+
+PA_FLIDADE = {"sia": ""}
+
+PA_FLQT = {"sia": ""}
+
+PA_FNTORC = {"sia": ""}
+
+PA_FXETAR = {"sia": ""}
+
+PA_GESTAO = {"sia": ""}
+
+PA_IDADE = {"sia": ""}
+
+PA_INCOUT = {"sia": ""}
+
+PA_INCURG = {"sia": ""}
+
+PA_INDICA = {"sia": ""}
+
+PA_INE = {"sia": ""}
+
+PA_MNDIF = {"sia": ""}
+
+PA_MN_IND = {"sia": ""}
+
+PA_MORFOL = {"sia": ""}
+
+PA_MOTSAI = {"sia": ""}
+
+PA_MUNAT = {"sia": ""}
+
+PA_MUNPCN = {"sia": ""}
+
+PA_MVM = {"sia": ""}
+
+PA_NAT_JUR = {"sia": ""}
+
+PA_NH = {"sia": ""}
+
+PA_NIVCPL = {"sia": ""}
+
+PA_NUMAPA = {"sia": ""}
+
+PA_OBITO = {"sia": ""}
+
+PA_PERMAN = {"sia": ""}
+
+PA_PROC_ID = {"sia": ""}
+
+PA_QTDAPR = {"sia": ""}
+
+PA_QTDPRO = {"sia": ""}
+
+PA_RACACOR = {"sia": ""}
+
+PA_RCB = {"sia": ""}
+
+PA_RCBDF = {"sia": ""}
+
+PA_REGCT = {"sia": ""}
+
+PA_SEXO = {"sia": ""}
+
+PA_SRV = {"sia": ""}
+
+PA_SRV_C = {"sia": ""}
+
+PA_SUBFIN = {"sia": ""}
+
+PA_TIPATE = {"sia": ""}
+
+PA_TIPPRE = {"sia": ""}
+
+PA_TIPPRO = {"sia": ""}
+
+PA_TPFIN = {"sia": ""}
+
+PA_TPUPS = {"sia": ""}
+
+PA_TP_EQP = {"sia": ""}
+
+PA_TRANSF = {"sia": ""}
+
+PA_UFDIF = {"sia": ""}
+
+PA_UFMUN = {"sia": ""}
+
+PA_VALAPR = {"sia": ""}
+
+PA_VALPRO = {"sia": ""}
+
+PA_VL_CF = {"sia": ""}
+
+PA_VL_CL = {"sia": ""}
+
+PA_VL_INC = {"sia": ""}
+
+PCRUZ = {"sinan": ""}
+
+PELE_INTEG = {"sinan": ""}
+
+PELE_NAO_I = {"sinan": ""}
+
+PEN_ANAL = {"sinan": ""}
+
+PEN_ORAL = {"sinan": ""}
+
+PEN_VAGINA = {"sinan": ""}
+
+PERCUTANEA = {"sinan": ""}
+
+PERFURA = {"sinan": ""}
+
+PERICARDI = {"sinan": ""}
+
+PERIODO = {"sinan": ""}
+
+PERMANEN = {"sia": ""}
+
+PES = {"sinan": ""}
+
+PESCOU_N = {"sinan": ""}
+
+PESO = {
+ "sim": "",
+ "sinan": "",
+ "sinasc": "",
+}
+
+PESONASC = {"sim": ""}
+
+PETEQUIAS = {"sinan": ""}
+
+PETEQUIA_N = {"sinan": ""}
+
+PF_PJ = {"cnes": ""}
+
+PIRAZINAMI = {"sinan": ""}
+
+PLANJ_RD = {"cnes": ""}
+
+PLAQ_MENOR = {"sinan": ""}
+
+PLASMATICO = {"sinan": ""}
+
+PLEURAL = {"sinan": ""}
+
+PMALARIA = {"sinan": ""}
+
+PMM = {"sinan": ""}
+
+POEIRAS = {"sinan": ""}
+
+POE_ABRASI = {"sinan": ""}
+
+POE_MISTA = {"sinan": ""}
+
+POE_ORGANI = {"sinan": ""}
+
+POLIADENO = {"sinan": ""}
+
+POP = {"pni": ""}
+
+POPALFAB = {"ibge": ""}
+
+POPDEPEND = {"ibge": ""}
+
+POPGERAL = {"cnes": ""}
+
+POPNALFAB = {"ibge": ""}
+
+POPTOT = {"ibge": ""}
+
+POPULACAO = {"ibge": ""}
+
+POP_IMIG = {"sinan": ""}
+
+POP_LIBER = {"sinan": ""}
+
+POP_RUA = {"sinan": ""}
+
+POP_SAUDE = {"sinan": ""}
+
+PORTARIA = {"cnes": ""}
+
+POS_EXPOS = {"sinan": ""}
+
+PREFIXODN = {"sinasc": ""}
+
+PREMIOS = {"sinan": ""}
+
+PRESENCA = {"sinan": ""}
+
+PRE_ANTRET = {"sinan": ""}
+
+PRE_DT_RET = {"sinan": ""}
+
+PRE_EXPOS = {"sinan": ""}
+
+PRE_MUNIPA = {"sinan": ""}
+
+PRE_MUNIRE = {"sinan": ""}
+
+PRE_NATAL = {"sinasc": ""}
+
+PRE_PRENAT = {"sinan": ""}
+
+PRE_UFREL = {"sinan": ""}
+
+PRIMAQ = {"sinan": ""}
+
+PROC_ABORT = {"sinan": ""}
+
+PROC_CONTR = {"sinan": ""}
+
+PROC_DST = {"sinan": ""}
+
+PROC_HEPB = {"sinan": ""}
+
+PROC_HIV = {"sinan": ""}
+
+PROC_ID = {"sia": ""}
+
+PROC_REA = {
+ "ciha": "",
+ "sih": "",
+}
+
+PROC_SANG = {"sinan": ""}
+
+PROC_SEMEN = {"sinan": ""}
+
+PROC_SOLIC = {"sih": ""}
+
+PROC_VAGIN = {"sinan": ""}
+
+PROFNSUS = {"cnes": ""}
+
+PROFUNDO = {"sinan": ""}
+
+PROF_SUS = {"cnes": ""}
+
+PRONASCI = {"cnes": ""}
+
+PROSTACAO = {"sinan": ""}
+
+PROVA_BIOL = {"sinan": ""}
+
+PSICO_FARM = {"sinan": ""}
+
+PTRANSFU = {"sinan": ""}
+
+PULSO = {"sinan": ""}
+
+PURPURA = {"sinan": ""}
+
+PUSUARIO = {"sinan": ""}
+
+P_ATIVO_1 = {"sinan": ""}
+
+P_ATIVO_2 = {"sinan": ""}
+
+P_ATIVO_3 = {"sinan": ""}
+
+QTDATE = {"sia": ""}
+
+QTDFILMORT = {
+ "sim": "",
+ "sinasc": "",
+}
+
+QTDFILVIVO = {
+ "sim": "",
+ "sinasc": "",
+}
+
+QTDGESTANT = {"sinasc": ""}
+
+QTDPARTCES = {"sinasc": ""}
+
+QTDPARTNOR = {"sinasc": ""}
+
+QTDPCN = {"sia": ""}
+
+QTINST01 = {"cnes": ""}
+
+QTINST02 = {"cnes": ""}
+
+QTINST03 = {"cnes": ""}
+
+QTINST04 = {"cnes": ""}
+
+QTINST05 = {"cnes": ""}
+
+QTINST06 = {"cnes": ""}
+
+QTINST07 = {"cnes": ""}
+
+QTINST08 = {"cnes": ""}
+
+QTINST09 = {"cnes": ""}
+
+QTINST10 = {"cnes": ""}
+
+QTINST11 = {"cnes": ""}
+
+QTINST12 = {"cnes": ""}
+
+QTINST13 = {"cnes": ""}
+
+QTINST14 = {"cnes": ""}
+
+QTINST15 = {"cnes": ""}
+
+QTINST16 = {"cnes": ""}
+
+QTINST17 = {"cnes": ""}
+
+QTINST18 = {"cnes": ""}
+
+QTINST19 = {"cnes": ""}
+
+QTINST20 = {"cnes": ""}
+
+QTINST21 = {"cnes": ""}
+
+QTINST22 = {"cnes": ""}
+
+QTINST23 = {"cnes": ""}
+
+QTINST24 = {"cnes": ""}
+
+QTINST25 = {"cnes": ""}
+
+QTINST26 = {"cnes": ""}
+
+QTINST27 = {"cnes": ""}
+
+QTINST28 = {"cnes": ""}
+
+QTINST29 = {"cnes": ""}
+
+QTINST30 = {"cnes": ""}
+
+QTINST31 = {"cnes": ""}
+
+QTINST32 = {"cnes": ""}
+
+QTINST33 = {"cnes": ""}
+
+QTINST34 = {"cnes": ""}
+
+QTINST35 = {"cnes": ""}
+
+QTINST36 = {"cnes": ""}
+
+QTINST37 = {"cnes": ""}
+
+QTLEIT05 = {"cnes": ""}
+
+QTLEIT06 = {"cnes": ""}
+
+QTLEIT07 = {"cnes": ""}
+
+QTLEIT08 = {"cnes": ""}
+
+QTLEIT09 = {"cnes": ""}
+
+QTLEIT19 = {"cnes": ""}
+
+QTLEIT20 = {"cnes": ""}
+
+QTLEIT21 = {"cnes": ""}
+
+QTLEIT22 = {"cnes": ""}
+
+QTLEIT23 = {"cnes": ""}
+
+QTLEIT32 = {"cnes": ""}
+
+QTLEIT34 = {"cnes": ""}
+
+QTLEIT38 = {"cnes": ""}
+
+QTLEIT39 = {"cnes": ""}
+
+QTLEIT40 = {"cnes": ""}
+
+QTLEITP1 = {"cnes": ""}
+
+QTLEITP2 = {"cnes": ""}
+
+QTLEITP3 = {"cnes": ""}
+
+QT_AGIPL = {"cnes": ""}
+
+QT_AGLTN = {"cnes": ""}
+
+QT_APRES = {"sia": ""}
+
+QT_APROV = {"sia": ""}
+
+QT_CADRE = {"cnes": ""}
+
+QT_CAPFL = {"cnes": ""}
+
+QT_CENRE = {"cnes": ""}
+
+QT_CONRA = {"cnes": ""}
+
+QT_CONTR = {"cnes": ""}
+
+QT_DIARIAS = {"sih": ""}
+
+QT_DOSE = {"pni": ""}
+
+QT_EXIST = {"cnes": ""}
+
+QT_EXTPL = {"cnes": ""}
+
+QT_FRE18 = {"cnes": ""}
+
+QT_FRE30 = {"cnes": ""}
+
+QT_IRRHE = {"cnes": ""}
+
+QT_MAQAF = {"cnes": ""}
+
+QT_NSUS = {"cnes": ""}
+
+QT_PROC = {"ciha": ""}
+
+QT_REFAS = {"cnes": ""}
+
+QT_REFRE = {"cnes": ""}
+
+QT_REFSA = {"cnes": ""}
+
+QT_SELAD = {"cnes": ""}
+
+QT_SUS = {"cnes": ""}
+
+QT_TOTAL_C = {"sinan": ""}
+
+QT_USO = {"cnes": ""}
+
+QUANTID = {"sinan": ""}
+
+QUANTOS = {"sinan": ""}
+
+QUAN_COMUN = {"sinan": ""}
+
+QUAN_POSIT = {"sinan": ""}
+
+QUILOMBO = {"cnes": ""}
+
+QUIMRADI = {"cnes": ""}
+
+QUININO = {"sinan": ""}
+
+QUININOI = {"sinan": ""}
+
+QUINOLONA = {"sinan": ""}
+
+RACACOR = {
+ "sia": "",
+ "sim": "",
+ "sinasc": "",
+}
+
+RACACORMAE = {"sinasc": ""}
+
+RACACORN = {"sinasc": ""}
+
+RACACOR_RN = {"sinasc": ""}
+
+RACA_COR = {"sih": ""}
+
+RACA_MAE = {"sinan": ""}
+
+RACCOR = {"sinasc": ""}
+
+RAIOX = {"sinan": ""}
+
+RAIOX_TORA = {"sinan": ""}
+
+RAI_RESULT = {"sinan": ""}
+
+RAZAO = {"sih": ""}
+
+REACAO_SOR = {"sinan": ""}
+
+REACAO_VAC = {"sinan": ""}
+
+RECEMNASC = {"sinan": ""}
+
+RECEM_NASC = {"sinan": ""}
+
+RECUSA_QUI = {"sinan": ""}
+
+REDE_EDUCA = {"sinan": ""}
+
+REDE_SAU = {"sinan": ""}
+
+REFR_AQD_N = {"sinan": ""}
+
+REFR_AQE_N = {"sinan": ""}
+
+REFR_BID_N = {"sinan": ""}
+
+REFR_BIE_N = {"sinan": ""}
+
+REFR_PAD_N = {"sinan": ""}
+
+REFR_PAE_N = {"sinan": ""}
+
+REFR_TRD_N = {"sinan": ""}
+
+REFR_TRE_N = {"sinan": ""}
+
+REGCT = {"sih": ""}
+
+REGIME = {"sinan": ""}
+
+REGISTRO = {
+ "cnes": "",
+ "sim": "",
+}
+
+REGSAUDE = {"cnes": ""}
+
+REL_CAT = {"sinan": ""}
+
+REL_CONHEC = {"sinan": ""}
+
+REL_CONJ = {"sinan": ""}
+
+REL_CUIDA = {"sinan": ""}
+
+REL_DESCO = {"sinan": ""}
+
+REL_ESPEC = {"sinan": ""}
+
+REL_EXCON = {"sinan": ""}
+
+REL_EXNAM = {"sinan": ""}
+
+REL_FILHO = {"sinan": ""}
+
+REL_INST = {"sinan": ""}
+
+REL_IRMAO = {"sinan": ""}
+
+REL_MAD = {"sinan": ""}
+
+REL_MAE = {"sinan": ""}
+
+REL_NAMO = {"sinan": ""}
+
+REL_OUTROS = {"sinan": ""}
+
+REL_PAD = {"sinan": ""}
+
+REL_PAI = {"sinan": ""}
+
+REL_PATRAO = {"sinan": ""}
+
+REL_POL = {"sinan": ""}
+
+REL_PROPRI = {"sinan": ""}
+
+REL_SEXUAL = {"sinan": ""}
+
+REL_TRAB = {"sinan": ""}
+
+REMESSA = {"sih": ""}
+
+RENAL = {"sinan": ""}
+
+REPETITIVO = {"sinan": ""}
+
+RESALIM1 = {"sinan": ""}
+
+RESALIMOUT = {"sinan": ""}
+
+RESPIRATO = {"sinan": ""}
+
+RESULT = {"sinan": ""}
+
+RESUL_HIS = {"sinan": ""}
+
+RESUL_NS1 = {"sinan": ""}
+
+RESUL_OUT = {"sinan": ""}
+
+RESUL_PCR = {"sinan": ""}
+
+RESUL_PCR_ = {"sinan": ""}
+
+RESUL_PRNT = {"sinan": ""}
+
+RESUL_SORO = {"sinan": ""}
+
+RESUL_VIRA = {"sinan": ""}
+
+RESUL_VI_N = {"sinan": ""}
+
+RES_BIOL = {"cnes": ""}
+
+RES_CHIKS1 = {"sinan": ""}
+
+RES_CHIKS2 = {"sinan": ""}
+
+RES_COMU = {"cnes": ""}
+
+RES_HBSAG = {"sinan": ""}
+
+RES_HIST = {"sinan": ""}
+
+RES_IMUNO = {"sinan": ""}
+
+RES_ISOL = {"sinan": ""}
+
+RES_PCR = {"sinan": ""}
+
+RES_QUIM = {"cnes": ""}
+
+RES_RADI = {"cnes": ""}
+
+RETAR_PM = {"sinan": ""}
+
+RETENCAO = {"cnes": ""}
+
+RETINOPA = {"sinan": ""}
+
+RE_ANTIHBC = {"sinan": ""}
+
+RE_ANTIHCV = {"sinan": ""}
+
+RIFAMPICIN = {"sinan": ""}
+
+ROEDOR_N = {"sinan": ""}
+
+ROTA_R = {"sinan": ""}
+
+RUBRICA = {"sih": ""}
+
+RUIDO_OUT = {"sinan": ""}
+
+RUI_OUTDES = {"sinan": ""}
+
+S1_IGG = {"sinan": ""}
+
+S1_IGM = {"sinan": ""}
+
+S1_TIT1 = {"sinan": ""}
+
+S2_IGG = {"sinan": ""}
+
+S2_IGM = {"sinan": ""}
+
+S2_TIT1 = {"sinan": ""}
+
+S3_IGG = {"sinan": ""}
+
+S3_IGM = {"sinan": ""}
+
+SALA_MOL = {"cnes": ""}
+
+SANG = {"sinan": ""}
+
+SANGRAM = {"sinan": ""}
+
+SANGUE = {"sinan": ""}
+
+SEMAGESTAC = {
+ "sim": "",
+ "sinasc": "",
+}
+
+SEMANGEST = {"sim": ""}
+
+SEMIPLEN = {"sih": ""}
+
+SEM_ACID = {"sinan": ""}
+
+SEM_DIAG = {"sinan": ""}
+
+SEM_NOT = {"sinan": ""}
+
+SEM_PRI = {"sinan": ""}
+
+SEM_QUIMIO = {"sinan": ""}
+
+SENSIBILI = {"sinan": ""}
+
+SEQUENCIA = {"sih": ""}
+
+SEQ_AIH5 = {"sih": ""}
+
+SERAP01P = {"cnes": ""}
+
+SERAP01T = {"cnes": ""}
+
+SERAP02P = {"cnes": ""}
+
+SERAP02T = {"cnes": ""}
+
+SERAP03P = {"cnes": ""}
+
+SERAP03T = {"cnes": ""}
+
+SERAP04P = {"cnes": ""}
+
+SERAP04T = {"cnes": ""}
+
+SERAP05P = {"cnes": ""}
+
+SERAP05T = {"cnes": ""}
+
+SERAP06P = {"cnes": ""}
+
+SERAP06T = {"cnes": ""}
+
+SERAP07P = {"cnes": ""}
+
+SERAP07T = {"cnes": ""}
+
+SERAP08P = {"cnes": ""}
+
+SERAP08T = {"cnes": ""}
+
+SERAP09P = {"cnes": ""}
+
+SERAP09T = {"cnes": ""}
+
+SERAP10P = {"cnes": ""}
+
+SERAP10T = {"cnes": ""}
+
+SERAP11P = {"cnes": ""}
+
+SERAP11T = {"cnes": ""}
+
+SERAPOIO = {"cnes": ""}
+
+SERIESCFAL = {"sim": ""}
+
+SERIESCMAE = {
+ "sim": "",
+ "sinasc": "",
+}
+
+SERV_CLA = {"sih": ""}
+
+SERV_ESP = {"cnes": ""}
+
+SEXO = {
+ "ciha": "",
+ "ibge": "",
+ "sih": "",
+ "sim": "",
+ "sinasc": "",
+}
+
+SEXOPAC = {"sia": ""}
+
+SEXUAL = {"sinan": ""}
+
+SEX_ASSEDI = {"sinan": ""}
+
+SEX_ESPEC = {"sinan": ""}
+
+SEX_ESTUPR = {"sinan": ""}
+
+SEX_EXPLO = {"sinan": ""}
+
+SEX_OUTRO = {"sinan": ""}
+
+SEX_PORNO = {"sinan": ""}
+
+SEX_PUDOR = {"sinan": ""}
+
+SGRUPHAB = {"cnes": ""}
+
+SG_UF = {"sinan": ""}
+
+SG_UF_2 = {"sinan": ""}
+
+SG_UF_AT = {"sinan": ""}
+
+SG_UF_INTE = {"sinan": ""}
+
+SG_UF_NOT = {"sinan": ""}
+
+SG_UF_OCOR = {"sinan": ""}
+
+SILICA = {"sinan": ""}
+
+SIMUL_RD = {"cnes": ""}
+
+SINAIS = {"sinan": ""}
+
+SINAIS_ICC = {"sinan": ""}
+
+SINTOMATIC = {"sinan": ""}
+
+SINTO_DES = {"sinan": ""}
+
+SIN_GANG = {"sinan": ""}
+
+SIN_OUT = {"sinan": ""}
+
+SIN_OUTR_E = {"sinan": ""}
+
+SIN_PULM = {"sinan": ""}
+
+SIS_JUST = {"sih": ""}
+
+SITUACAO = {"ibge": ""}
+
+SITUA_12_M = {"sinan": ""}
+
+SITUA_9_M = {"sinan": ""}
+
+SITUA_ENCE = {"sinan": ""}
+
+SIT_CONJUG = {"sinan": ""}
+
+SIT_RUA = {"sia": ""}
+
+SIT_TRAB = {"sinan": ""}
+
+SOLVENTE = {"sinan": ""}
+
+SORO1 = {"sinan": ""}
+
+SORO2 = {"sinan": ""}
+
+SOROTIPO = {"sinan": ""}
+
+SOUTROS = {"sinan": ""}
+
+SP_AA = {"sih": ""}
+
+SP_ATOPROF = {"sih": ""}
+
+SP_CGCHOSP = {"sih": ""}
+
+SP_CIDPRI = {"sih": ""}
+
+SP_CIDSEC = {"sih": ""}
+
+SP_CNES = {"sih": ""}
+
+SP_COMPLEX = {"sih": ""}
+
+SP_CO_FAEC = {"sih": ""}
+
+SP_CPFCGC = {"sih": ""}
+
+SP_DES_HOS = {"sih": ""}
+
+SP_DES_PAC = {"sih": ""}
+
+SP_DTINTER = {"sih": ""}
+
+SP_DTSAIDA = {"sih": ""}
+
+SP_FINANC = {"sih": ""}
+
+SP_GESTOR = {"sih": ""}
+
+SP_MM = {"sih": ""}
+
+SP_M_HOSP = {"sih": ""}
+
+SP_M_PAC = {"sih": ""}
+
+SP_NAIH = {"sih": ""}
+
+SP_NF = {"sih": ""}
+
+SP_NUM_PR = {"sih": ""}
+
+SP_PF_CBO = {"sih": ""}
+
+SP_PF_DOC = {"sih": ""}
+
+SP_PJ_DOC = {"sih": ""}
+
+SP_PROCREA = {"sih": ""}
+
+SP_PTSP = {"sih": ""}
+
+SP_PTSP_NF = {"sih": ""}
+
+SP_QTD_ATO = {"sih": ""}
+
+SP_QT_PROC = {"sih": ""}
+
+SP_TIPO = {"sih": ""}
+
+SP_TP_ATO = {"sih": ""}
+
+SP_UF = {"sih": ""}
+
+SP_U_AIH = {"sih": ""}
+
+SP_VALATO = {"sih": ""}
+
+SRVUNICO = {"cnes": ""}
+
+STALIMENTO = {"sinan": ""}
+
+STANTIBIO = {"sinan": ""}
+
+STANTIBOTU = {"sinan": ""}
+
+STAVALIA = {"sinan": ""}
+
+STBOCA = {"sinan": ""}
+
+STBROMATO = {"sinan": ""}
+
+STBULBAR = {"sinan": ""}
+
+STCARDIACA = {"sinan": ""}
+
+STCASEIRA = {"sinan": ""}
+
+STCEFALEIA = {"sinan": ""}
+
+STCESPARTO = {"sinasc": ""}
+
+STCLINICA = {"sinan": ""}
+
+STCODIFICA = {"sim": ""}
+
+STCOMA = {"sinan": ""}
+
+STCOMERCIO = {"sinan": ""}
+
+STCONSTIPA = {"sinan": ""}
+
+STCURA1 = {"sinan": ""}
+
+STCURA2 = {"sinan": ""}
+
+STCURA3 = {"sinan": ""}
+
+STDESCENDE = {"sinan": ""}
+
+STDIARREIA = {"sinan": ""}
+
+STDIPLOPIA = {"sinan": ""}
+
+STDISARTRI = {"sinan": ""}
+
+STDISFAGIA = {"sinan": ""}
+
+STDISFONIA = {"sinan": ""}
+
+STDISPNEIA = {"sinan": ""}
+
+STDNEPIDEM = {"sinasc": ""}
+
+STDNNOVA = {"sinasc": ""}
+
+STDOEPIDEM = {"sim": ""}
+
+STDOMICILI = {"sinan": ""}
+
+STDONOVA = {"sim": ""}
+
+STELETRO = {"sinan": ""}
+
+STESCOLA = {"sinan": ""}
+
+STEXPALIM = {"sinan": ""}
+
+STFACIAL = {"sinan": ""}
+
+STFEBRE = {"sinan": ""}
+
+STFERIMENT = {"sinan": ""}
+
+STFESTA = {"sinan": ""}
+
+STFEZESMAT = {"sinan": ""}
+
+STFEZESRES = {"sinan": ""}
+
+STFLACIDEZ = {"sinan": ""}
+
+STHOSPITAL = {"sinan": ""}
+
+STMEMINF = {"sinan": ""}
+
+STMEMSUP = {"sinan": ""}
+
+STMIDRIASE = {"sinan": ""}
+
+STNAUSEA = {"sinan": ""}
+
+STOFTALMO = {"sinan": ""}
+
+STOUTROLOC = {"sinan": ""}
+
+STOUTROSIN = {"sinan": ""}
+
+STOUTROTRA = {"sinan": ""}
+
+STPARESTES = {"sinan": ""}
+
+STPTOSE = {"sinan": ""}
+
+STRESPIRA = {"sinan": ""}
+
+STRESS = {"sinan": ""}
+
+STRESTAURA = {"sinan": ""}
+
+STRESULTA = {"sinan": ""}
+
+STSENSIVEL = {"sinan": ""}
+
+STSIMETRIC = {"sinan": ""}
+
+STSORO = {"sinan": ""}
+
+STSOROMAT = {"sinan": ""}
+
+STSORORES = {"sinan": ""}
+
+STTONTURA = {"sinan": ""}
+
+STTRABALHO = {"sinan": ""}
+
+STTRABPART = {"sinasc": ""}
+
+STVENTILA = {"sinan": ""}
+
+STVISAO = {"sinan": ""}
+
+STVOMITO = {"sinan": ""}
+
+ST_ALI1COL = {"sinan": ""}
+
+ST_ALI2COL = {"sinan": ""}
+
+ST_ALI2RES = {"sinan": ""}
+
+ST_ALIMEN = {"sinan": ""}
+
+ST_A_CLINI = {"sinan": ""}
+
+ST_BLOQ = {"sih": ""}
+
+ST_F_OUTRO = {"sinan": ""}
+
+ST_IMPRO = {"sinan": ""}
+
+ST_IMPRO_ = {"sinan": ""}
+
+ST_INAD = {"sinan": ""}
+
+ST_INCUB_M = {"sinan": ""}
+
+ST_INC_ME = {"sinan": ""}
+
+ST_MANIP = {"sinan": ""}
+
+ST_MOT_BLO = {"sih": ""}
+
+ST_SITUAC = {"sih": ""}
+
+SUBFIN = {"sia": ""}
+
+SUDORESE = {"sinan": ""}
+
+SUGE_VINCU = {"sinan": ""}
+
+SULFA = {"sinan": ""}
+
+SUPERFICIA = {"sinan": ""}
+
+SUPERIORES = {"sinan": ""}
+
+SURTO = {"sinan": ""}
+
+SUSPEITOS = {"sinan": ""}
+
+S_ACELL6 = {"cnes": ""}
+
+S_AFERES = {"cnes": ""}
+
+S_ALCOME = {"cnes": ""}
+
+S_ALSEME = {"cnes": ""}
+
+S_ARMAZE = {"cnes": ""}
+
+S_BIOMOL = {"cnes": ""}
+
+S_COLETA = {"cnes": ""}
+
+S_CONTRQ = {"cnes": ""}
+
+S_CPFLUX = {"cnes": ""}
+
+S_DISTRI = {"cnes": ""}
+
+S_DPAC = {"cnes": ""}
+
+S_DPI = {"cnes": ""}
+
+S_ESTOQU = {"cnes": ""}
+
+S_HBSAGN = {"cnes": ""}
+
+S_HBSAGP = {"cnes": ""}
+
+S_HEMOST = {"cnes": ""}
+
+S_IMUNFE = {"cnes": ""}
+
+S_IMUNOH = {"cnes": ""}
+
+S_PREEST = {"cnes": ""}
+
+S_PREPAR = {"cnes": ""}
+
+S_PRETRA = {"cnes": ""}
+
+S_PROCES = {"cnes": ""}
+
+S_QCDURA = {"cnes": ""}
+
+S_QLDURA = {"cnes": ""}
+
+S_REAGN = {"cnes": ""}
+
+S_REAGP = {"cnes": ""}
+
+S_RECEPC = {"cnes": ""}
+
+S_REHCV = {"cnes": ""}
+
+S_SGDOAD = {"cnes": ""}
+
+S_SIMULA = {"cnes": ""}
+
+S_SOROLO = {"cnes": ""}
+
+S_TRANSF = {"cnes": ""}
+
+S_TRICLI = {"cnes": ""}
+
+S_TRIHMT = {"cnes": ""}
+
+TAREFAS = {"sinan": ""}
+
+TATU_PIER = {"sinan": ""}
+
+TECIDOS = {"sinan": ""}
+
+TECNICA = {"sinan": ""}
+
+TEMPO = {"sinan": ""}
+
+TEMPO_FUMA = {"sinan": ""}
+
+TERCEIRIZA = {"sinan": ""}
+
+TERCEIRO = {"cnes": ""}
+
+TESTE_TUBE = {"sinan": ""}
+
+TEST_MOLEC = {"sinan": ""}
+
+TEST_SENSI = {"sinan": ""}
+
+TETRAC = {"sinan": ""}
+
+TIFICA = {"sinan": ""}
+
+TIPEQUIP = {"cnes": ""}
+
+TIPOACID = {"sim": ""}
+
+TIPOBITO = {"sim": ""}
+
+TIPOGRAV = {"sim": ""}
+
+TIPOPARTO = {"sim": ""}
+
+TIPOSEGM = {"cnes": ""}
+
+TIPOVIOL = {"sim": ""}
+
+TIPO_ACID = {"sinan": ""}
+
+TIPO_EQP = {"cnes": ""}
+
+TIPO_GRAV = {"sinasc": ""}
+
+TIPO_INVES = {"sinan": ""}
+
+TIPO_LEITE = {"sinan": ""}
+
+TIPO_PARTO = {"sinasc": ""}
+
+TIPPRE = {"sia": ""}
+
+TIPPRE = {"sia": ""}
+
+TIP_DIARRE = {"sinan": ""}
+
+TIP_SORO = {"sinan": ""}
+
+TIREOIDITE = {"sinan": ""}
+
+TIT_IGG_S1 = {"sinan": ""}
+
+TIT_IGG_S2 = {"sinan": ""}
+
+TIT_IGM_S1 = {"sinan": ""}
+
+TIT_IGM_S2 = {"sinan": ""}
+
+TOMOGRAFIA = {"sinan": ""}
+
+TONR_CER_N = {"sinan": ""}
+
+TONR_FAC_N = {"sinan": ""}
+
+TONR_MID_N = {"sinan": ""}
+
+TONR_MIE_N = {"sinan": ""}
+
+TONR_MSD_N = {"sinan": ""}
+
+TONR_MSE_N = {"sinan": ""}
+
+TONTURA = {"sinan": ""}
+
+TOSSE = {"sinan": ""}
+
+TOT_PT_SP = {"sih": ""}
+
+TPALTA_N = {"sinan": ""}
+
+TPAPRESENT = {"sinasc": ""}
+
+TPASSINA = {"sim": ""}
+
+TPATENDE = {"sinan": ""}
+
+TPAUTOCTO = {"sinan": ""}
+
+TPBOTULISM = {"sinan": ""}
+
+TPBROMATO = {"sinan": ""}
+
+TPCLINICA = {"sinan": ""}
+
+TPCONFIRMA = {"sinan": ""}
+
+TPDISEC1 = {"sih": ""}
+
+TPDISEC2 = {"sih": ""}
+
+TPDISEC3 = {"sih": ""}
+
+TPDISEC4 = {"sih": ""}
+
+TPDISEC5 = {"sih": ""}
+
+TPDISEC6 = {"sih": ""}
+
+TPDISEC7 = {"sih": ""}
+
+TPDISEC8 = {"sih": ""}
+
+TPDISEC9 = {"sih": ""}
+
+TPDOCRESP = {"sinasc": ""}
+
+TPESQPAR = {"sinan": ""}
+
+TPESQUEMA = {"sinan": ""}
+
+TPEVIDENCI = {"sinan": ""}
+
+TPEXANTE = {"sinan": ""}
+
+TPEXP = {"sinan": ""}
+
+TPFEZESTOX = {"sinan": ""}
+
+TPFIN = {"sia": ""}
+
+TPFUNCRESP = {"sinasc": ""}
+
+TPGESTAO = {"cnes": ""}
+
+TPIDADEPAC = {"sia": ""}
+
+TPMETESTIM = {"sinasc": ""}
+
+TPMORTEOCO = {"sim": ""}
+
+TPMOTPARC = {"sinan": ""}
+
+TPNASCASSI = {"sinasc": ""}
+
+TPNEURO = {"sinan": ""}
+
+TPNIVELINV = {"sim": ""}
+
+TPOBITOCOR = {"sim": ""}
+
+TPPOS = {"sim": ""}
+
+TPRAPIDO1 = {"sinan": ""}
+
+TPRAPIDO2 = {"sinan": ""}
+
+TPRAPIDO3 = {"sinan": ""}
+
+TPRESGINFO = {"sim": ""}
+
+TPROBSON = {"sinasc": ""}
+
+TPRUIDO = {"sinan": ""}
+
+TPSOROTOX = {"sinan": ""}
+
+TPTEMPO = {"sinan": ""}
+
+TPTEMPORIS = {"sinan": ""}
+
+TPTESTE1 = {"sinan": ""}
+
+TPUNINOT = {"sinan": ""}
+
+TPUPS = {"sia": ""}
+
+TP_ACIDENT = {"sinan": ""}
+
+TP_AFAST = {"sinan": ""}
+
+TP_ALI1TOX = {"sinan": ""}
+
+TP_ALI2TO = {"sinan": ""}
+
+TP_AMB_OCO = {"sinan": ""}
+
+TP_ANALISE = {"sinan": ""}
+
+TP_CAUSA = {"sinan": ""}
+
+TP_CAUSOUT = {"sinan": ""}
+
+TP_COLOUT = {"sinan": ""}
+
+TP_DESAT = {"cnes": ""}
+
+TP_DROGA = {"sia": ""}
+
+TP_IDENTFI = {"sinan": ""}
+
+TP_INDIRET = {"sinan": ""}
+
+TP_LEITO = {"cnes": ""}
+
+TP_LIQUOR = {"sinan": ""}
+
+TP_LOCAL = {"sinan": ""}
+
+TP_LOCALLE = {"sinan": ""}
+
+TP_MOTORA = {"sinan": ""}
+
+TP_NOT = {"sinan": ""}
+
+TP_ORIGEM = {"sinan": ""}
+
+TP_PREST = {"cnes": ""}
+
+TP_PROFILA = {"sinan": ""}
+
+TP_PRO_PRE = {"sinan": ""}
+
+TP_REPETE = {"sinan": ""}
+
+TP_SENSITI = {"sinan": ""}
+
+TP_SISTEMA = {"sinan": ""}
+
+TP_SOROHCV = {"sinan": ""}
+
+TP_TEMP_FU = {"sinan": ""}
+
+TP_TOXOUTR = {"sinan": ""}
+
+TP_UNID = {"cnes": ""}
+
+TP_VACINA = {"sinan": ""}
+
+TP_ZN_OCO = {"sinan": ""}
+
+TRAB_DESC = {"sinan": ""}
+
+TRAB_DOE = {"sinan": ""}
+
+TRANSF = {"sinan": ""}
+
+TRANSFU = {"sinan": ""}
+
+TRANSFUSAO = {"sinan": ""}
+
+TRANSPLA = {"sinan": ""}
+
+TRANSPO_N = {"sinan": ""}
+
+TRAN_COMP = {"sinan": ""}
+
+TRAN_MENT = {"sinan": ""}
+
+TRATADO = {"sinan": ""}
+
+TRATAM = {"sinan": ""}
+
+TRATAMENTO = {"sinan": ""}
+
+TRATANAO = {"sinan": ""}
+
+TRATPARC = {"sinan": ""}
+
+TRATSUP_AT = {"sinan": ""}
+
+TRAT_ATUAL = {"sinan": ""}
+
+TRAT_SUPER = {"sinan": ""}
+
+TRA_AMPOLA = {"sinan": ""}
+
+TRA_ANTIBI = {"sinan": ""}
+
+TRA_ANTIGO = {"sinan": ""}
+
+TRA_ANTIVI = {"sinan": ""}
+
+TRA_CLASSI = {"sinan": ""}
+
+TRA_CORTIC = {"sinan": ""}
+
+TRA_CPAP = {"sinan": ""}
+
+TRA_DATA_A = {"sinan": ""}
+
+TRA_DATA_S = {"sinan": ""}
+
+TRA_DIAG_C = {"sinan": ""}
+
+TRA_DIAG_T = {"sinan": ""}
+
+TRA_DOSE = {"sinan": ""}
+
+TRA_DROGA_ = {"sinan": ""}
+
+TRA_DT = {"sinan": ""}
+
+TRA_DT_ALT = {"sinan": ""}
+
+TRA_DT_INT = {"sinan": ""}
+
+TRA_ESPECI = {"sinan": ""}
+
+TRA_ESQUEM = {"sinan": ""}
+
+TRA_ESQU_1 = {"sinan": ""}
+
+TRA_HOSP = {"sinan": ""}
+
+TRA_INDI_N = {"sinan": ""}
+
+TRA_INFILT = {"sinan": ""}
+
+TRA_INFI_1 = {"sinan": ""}
+
+TRA_INTERR = {"sinan": ""}
+
+TRA_MECANI = {"sinan": ""}
+
+TRA_MOTIVO = {"sinan": ""}
+
+TRA_MUNICI = {"sinan": ""}
+
+TRA_NUM_PA = {"sinan": ""}
+
+TRA_OUTRA_ = {"sinan": ""}
+
+TRA_OUTR_N = {"sinan": ""}
+
+TRA_PESO = {"sinan": ""}
+
+TRA_QTD_SO = {"sinan": ""}
+
+TRA_SORO = {"sinan": ""}
+
+TRA_TRATAM = {"sinan": ""}
+
+TRA_UF = {"sinan": ""}
+
+TRA_VASOAT = {"sinan": ""}
+
+TREINA_MIL = {"sinan": ""}
+
+TRESMAIS = {"sinan": ""}
+
+TRONCO = {"sinan": ""}
+
+TUBE = {"sinan": ""}
+
+TURNO_AT = {"cnes": ""}
+
+T_FEBRE = {"sinan": ""}
+
+UF = {
+ "pni": "",
+ "sinan": "",
+}
+
+UFATUAL = {"sinan": ""}
+
+UFCOD = {"ibge": ""}
+
+UFDIF = {"sia": ""}
+
+UFINFORM = {
+ "sim": "",
+ "sinasc": "",
+}
+
+UFINTERNA = {"sinan": ""}
+
+UFMUN = {"sia": ""}
+
+UFMUNRES = {"cnes": ""}
+
+UFRESAT = {"sinan": ""}
+
+UFTRANSFU = {"sinan": ""}
+
+UF_ACID = {"sinan": ""}
+
+UF_ATENDE = {"sinan": ""}
+
+UF_EMP = {"sinan": ""}
+
+UF_H = {"sinan": ""}
+
+UF_HOSP = {"sinan": ""}
+
+UF_HOSPITA = {"sinan": ""}
+
+UF_ING = {"sinan": ""}
+
+UF_PRE_NAT = {"sinan": ""}
+
+UF_RES = {"sih": ""}
+
+UF_TRANSF = {"sinan": ""}
+
+UF_ZI = {"sih": ""}
+
+UNI_ATENDE = {"sinan": ""}
+
+UN_COBAL = {"cnes": ""}
+
+URGEMERG = {"cnes": ""}
+
+URINA = {"sinan": ""}
+
+URO_D = {"sinan": ""}
+
+URO_D_2 = {"sinan": ""}
+
+URO_D_3 = {"sinan": ""}
+
+URO_R1 = {"sinan": ""}
+
+URO_R2 = {"sinan": ""}
+
+URO_R3 = {"sinan": ""}
+
+US_ORTP = {"sih": ""}
+
+US_RN = {"sih": ""}
+
+US_SADT = {"sih": ""}
+
+US_SANGUE = {"sih": ""}
+
+US_SH = {"sih": ""}
+
+US_SP = {"sih": ""}
+
+US_TOT = {"sih": ""}
+
+UTILIZACAO = {"sinan": ""}
+
+UTIL_DESC = {"sinan": ""}
+
+UTI_INT_AL = {"sih": ""}
+
+UTI_INT_AN = {"sih": ""}
+
+UTI_INT_IN = {"sih": ""}
+
+UTI_INT_TO = {
+ "ciha": "",
+ "sih": "",
+}
+
+UTI_MES_AL = {"sih": ""}
+
+UTI_MES_AN = {"sih": ""}
+
+UTI_MES_IN = {"sih": ""}
+
+UTI_MES_TO = {
+ "ciha": "",
+ "sih": "",
+}
+
+UTI_TOTAL = {"sih": ""}
+
+UTRANSFU = {"sinan": ""}
+
+VACINA = {"sinan": ""}
+
+VACINACAO = {"sinan": ""}
+
+VACINAD = {"sinan": ""}
+
+VACINADO = {"sinan": ""}
+
+VACINADUPL = {"sinan": ""}
+
+VACINARUBE = {"sinan": ""}
+
+VAC_HEP_B = {"sinan": ""}
+
+VAL_ACOMP = {"sih": ""}
+
+VAL_OBSANG = {"sih": ""}
+
+VAL_ORTP = {"sih": ""}
+
+VAL_PED1AC = {"sih": ""}
+
+VAL_RN = {"sih": ""}
+
+VAL_SADT = {"sih": ""}
+
+VAL_SADTSR = {"sih": ""}
+
+VAL_SANG = {"sih": ""}
+
+VAL_SANGUE = {"sih": ""}
+
+VAL_SH = {"sih": ""}
+
+VAL_SH_FED = {"sih": ""}
+
+VAL_SH_GES = {"sih": ""}
+
+VAL_SP = {"sih": ""}
+
+VAL_SP_FED = {"sih": ""}
+
+VAL_SP_GES = {"sih": ""}
+
+VAL_TOT = {"sih": ""}
+
+VAL_TRANSP = {"sih": ""}
+
+VAL_UCI = {"sih": ""}
+
+VAL_UTI = {"sih": ""}
+
+VARIA_VIR = {"sinan": ""}
+
+VERSAOSCB = {"sim": ""}
+
+VERSAOSIST = {
+ "sim": "",
+ "sinasc": "",
+}
+
+VIA_1 = {"sinan": ""}
+
+VIA_2 = {"sinan": ""}
+
+VIA_3 = {"sinan": ""}
+
+VINCPREV = {"sih": ""}
+
+VINCULAC = {"cnes": ""}
+
+VINCULO = {"sinan": ""}
+
+VINCUL_A = {"cnes": ""}
+
+VINCUL_C = {"cnes": ""}
+
+VINCUL_N = {"cnes": ""}
+
+VINC_ESP = {"sinan": ""}
+
+VINC_OUT = {"sinan": ""}
+
+VINC_SUS = {"cnes": ""}
+
+VIOL_ESPEC = {"sinan": ""}
+
+VIOL_FINAN = {"sinan": ""}
+
+VIOL_FISIC = {"sinan": ""}
+
+VIOL_INFAN = {"sinan": ""}
+
+VIOL_LEGAL = {"sinan": ""}
+
+VIOL_MOTIV = {"sinan": ""}
+
+VIOL_NEGLI = {"sinan": ""}
+
+VIOL_OUTR = {"sinan": ""}
+
+VIOL_PSICO = {"sinan": ""}
+
+VIOL_SEXU = {"sinan": ""}
+
+VIOL_TORT = {"sinan": ""}
+
+VIOL_TRAF = {"sinan": ""}
+
+VL_APRES = {"sia": ""}
+
+VL_APROV = {"sia": ""}
+
+VOMITO = {"sinan": ""}
+
+VOMITOS = {"sinan": ""}
+
+VOP_VORH = {"sinan": ""}
+
+XENODIAG = {"sinan": ""}
+
+ZONA = {"sinan": ""}
+
+ZUMBIDO = {"sinan": ""}
+
+agravaids = {"sinan": ""}
+
+agravalcoo = {"sinan": ""}
+
+agravdiabe = {"sinan": ""}
+
+agravdoenc = {"sinan": ""}
+
+agravdroga = {"sinan": ""}
+
+agravoutra = {"sinan": ""}
+
+agravtabac = {"sinan": ""}
+
+ant_anemia = {"sinan": ""}
+
+ant_asteri = {"sinan": ""}
+
+ant_candid = {"sinan": ""}
+
+ant_caquex = {"sinan": ""}
+
+ant_contag = {"sinan": ""}
+
+ant_dermat = {"sinan": ""}
+
+ant_diarre = {"sinan": ""}
+
+ant_disfun = {"sinan": ""}
+
+ant_droga = {"sinan": ""}
+
+ant_esof_n = {"sinan": ""}
+
+ant_febre = {"sinan": ""}
+
+ant_herpes = {"sinan": ""}
+
+ant_linfo = {"sinan": ""}
+
+ant_pneumo = {"sinan": ""}
+
+ant_pulmon = {"sinan": ""}
+
+ant_rel_ca = {"sinan": ""}
+
+ant_tosse = {"sinan": ""}
+
+ant_toxo = {"sinan": ""}
+
+ant_trasmi = {"sinan": ""}
+
+ant_tuberc = {"sinan": ""}
+
+antrelse_n = {"sinan": ""}
+
+antsifil_n = {"sinan": ""}
+
+aval_atu_n = {"sinan": ""}
+
+avalia_n = {"sinan": ""}
+
+bacilosc_1 = {"sinan": ""}
+
+bacilosc_2 = {"sinan": ""}
+
+bacilosc_3 = {"sinan": ""}
+
+bacilosc_4 = {"sinan": ""}
+
+bacilosc_5 = {"sinan": ""}
+
+bacilosc_6 = {"sinan": ""}
+
+bacilosco = {"sinan": ""}
+
+cancro_mole = {"sinan": ""}
+
+caract_genomica = {"sinan": ""}
+
+clado = {"sinan": ""}
+
+clamidea = {"sinan": ""}
+
+classatual = {"sinan": ""}
+
+classi_fin = {"sinan": ""}
+
+classopera = {"sinan": ""}
+
+co_uf_res = {"sinan": ""}
+
+comp_sexual = {"sinan": ""}
+
+contador = {
+ "sim": "",
+ "sinasc": "",
+}
+
+contag_cd4 = {"sinan": ""}
+
+contat_animal = {"sinan": ""}
+
+contexam = {"sinan": ""}
+
+contreg = {"sinan": ""}
+
+criterio = {"sinan": ""}
+
+cs_escol_n = {"sinan": ""}
+
+cs_gestant = {"sinan": ""}
+
+cs_raca = {"sinan": ""}
+
+cs_sexo = {"sinan": ""}
+
+cs_zona = {"sinan": ""}
+
+cultura_es = {"sinan": ""}
+
+data_vacina = {"sinan": ""}
+
+def_diagno = {"sinan": ""}
+
+dip = {"sinan": ""}
+
+doenca_tra1 = {"sinan": ""}
+
+donovanose = {"sinan": ""}
+
+dose_receb = {"sinan": ""}
+
+dt_coleta = {"sinan": ""}
+
+dt_diag = {"sinan": ""}
+
+dt_encerra = {"sinan": ""}
+
+dt_evolucao = {"sinan": ""}
+
+dt_inic_tr = {"sinan": ""}
+
+dt_interna = {"sinan": ""}
+
+dt_nasc = {"sinan": ""}
+
+dt_noti_at = {"sinan": ""}
+
+dt_notific = {"sinan": ""}
+
+dt_obito = {"sinan": ""}
+
+dt_sin_pri = {"sinan": ""}
+
+dtalta_n = {"sinan": ""}
+
+dtinictrat = {"sinan": ""}
+
+dtultcomp = {"sinan": ""}
+
+esq_atu_n = {"sinan": ""}
+
+esq_ini_n = {"sinan": ""}
+
+estrangeiro = {"sinan": ""}
+
+evolucao = {"sinan": ""}
+
+forma = {"sinan": ""}
+
+formaclini = {"sinan": ""}
+
+gonorreia = {"sinan": ""}
+
+herpes_genital = {"sinan": ""}
+
+histopatol = {"sinan": ""}
+
+hiv = {"sinan": ""}
+
+hospital = {"sinan": ""}
+
+hpv = {"sinan": ""}
+
+htlv = {"sinan": ""}
+
+id_agravo = {"sinan": ""}
+
+id_mn_resi = {"sinan": ""}
+
+id_municip = {"sinan": ""}
+
+id_regiona = {"sinan": ""}
+
+id_rg_resi = {"sinan": ""}
+
+id_unidade = {"sinan": ""}
+
+ident_genero = {"sinan": ""}
+
+ist_ativa = {"sinan": ""}
+
+lab_triage = {"sinan": ""}
+
+labc_igg = {"sinan": ""}
+
+linfogranuloma = {"sinan": ""}
+
+local_cont = {"sinan": ""}
+
+met_lab = {"sinan": ""}
+
+mododetect = {"sinan": ""}
+
+modoentr = {"sinan": ""}
+
+mycoplasma_genital = {"sinan": ""}
+
+name = {"sinan": ""}
+
+nervosafet = {"sinan": ""}
+
+nu_ano = {"sinan": ""}
+
+nu_idade_n = {"sinan": ""}
+
+nu_lesoes = {"sinan": ""}
+
+orienta_sexual = {"sinan": ""}
+
+outro_des = {"sinan": ""}
+
+owner_org = {"sinan": ""}
+
+pac_imunossup = {"sinan": ""}
+
+pop_liber = {"sinan": ""}
+
+profile = {"sinan": ""}
+
+profis_saude = {"sinan": ""}
+
+raiox_tora = {"sinan": ""}
+
+resources = {"sinan": ""}
+
+resultado_exa_lab = {"sinan": ""}
+
+sg_uf = {"sinan": ""}
+
+sg_uf_not = {"sinan": ""}
+
+sifilis = {"sinan": ""}
+
+sintoma = {"sinan": ""}
+
+situa_ence = {"sinan": ""}
+
+test_molec = {"sinan": ""}
+
+test_sensi = {"sinan": ""}
+
+title = {"sinan": ""}
+
+tp_amost = {"sinan": ""}
+
+tpalta_n = {"sinan": ""}
+
+tpesquema = {"sinan": ""}
+
+tra_esquem = {"sinan": ""}
+
+transm = {"sinan": ""}
+
+tratamento = {"sinan": ""}
+
+tratamento_mpox = {"sinan": ""}
+
+tratparc = {"sinan": ""}
+
+tratsup_at = {"sinan": ""}
+
+trichomomas_vaginals = {"sinan": ""}
+
+uti = {"sinan": ""}
+
+vacina = {"sinan": ""}
+
+verruga_genital = {"sinan": ""}
+
+vinculo_epi = {"sinan": ""}
diff --git a/pysus/api/ftp/README.ipynb b/pysus/api/ducklake/catalog/orm/__init__.py
similarity index 100%
rename from pysus/api/ftp/README.ipynb
rename to pysus/api/ducklake/catalog/orm/__init__.py
diff --git a/pysus/api/ducklake/catalog.py b/pysus/api/ducklake/catalog/orm/dataset.py
similarity index 83%
rename from pysus/api/ducklake/catalog.py
rename to pysus/api/ducklake/catalog/orm/dataset.py
index a9c0fd05..687799ce 100644
--- a/pysus/api/ducklake/catalog.py
+++ b/pysus/api/ducklake/catalog/orm/dataset.py
@@ -1,18 +1,16 @@
-"""SQLAlchemy ORM models for the DuckLake catalog schema.
+"""Per-dataset catalog ORM models — stored in ``catalog_.db``.
-Defines tables for datasets, groups, files, and columns stored
-in the pysus schema of the local DuckDB catalog.
+Defines tables for groups, files, and columns within a single dataset.
"""
-import enum
from datetime import datetime
from typing import Optional
from sqlalchemy import (
+ BigInteger,
Boolean,
Column,
DateTime,
- Enum,
ForeignKey,
Index,
Integer,
@@ -24,7 +22,7 @@
class Base(DeclarativeBase):
- """Base class for all DuckLake catalog ORM models."""
+ """Base class for per-dataset catalog ORM models."""
pass
@@ -48,29 +46,7 @@ class Base(DeclarativeBase):
)
-class CatalogTable(Base):
- """Abstract base for catalog tables sharing the pysus schema."""
-
- __abstract__ = True
- __table_args__: tuple = ({"schema": "pysus"},)
-
-
-class Origin(enum.Enum):
- """Origin type for a dataset.
-
- Attributes
- ----------
- FTP : str
- Dataset sourced from the FTP server.
- API : str
- Dataset sourced from an API.
- """
-
- FTP = "ftp"
- API = "api"
-
-
-class CatalogDataset(CatalogTable):
+class Dataset(Base):
"""ORM model for the datasets table, representing a dataset collection.
Parameters
@@ -83,11 +59,10 @@ class CatalogDataset(CatalogTable):
Human-readable full name.
description : str, optional
Optional description of the dataset contents.
- origin : Origin
- Whether the dataset originates from FTP or an API.
"""
__tablename__ = "datasets"
+ __table_args__: tuple = ({"schema": "pysus"},)
id = Column(
Integer,
@@ -97,15 +72,14 @@ class CatalogDataset(CatalogTable):
name = Column(String, nullable=False, unique=True, index=True)
long_name = Column(String, nullable=False)
description = Column(String, nullable=True)
- origin = Column(Enum(Origin), nullable=False)
groups = relationship(
- "DatasetGroup",
+ "Group",
back_populates="dataset",
cascade="all, delete-orphan",
)
files = relationship(
- "CatalogFile",
+ "File",
back_populates="dataset",
cascade="all, delete-orphan",
)
@@ -116,7 +90,7 @@ class CatalogDataset(CatalogTable):
)
-class ColumnDefinition(CatalogTable):
+class ColumnDefinition(Base):
"""ORM model for dataset column metadata.
Parameters
@@ -136,6 +110,7 @@ class ColumnDefinition(CatalogTable):
"""
__tablename__ = "dataset_columns"
+ __table_args__: tuple = ({"schema": "pysus"},)
id = Column(
Integer,
@@ -153,9 +128,9 @@ class ColumnDefinition(CatalogTable):
description = Column(String, nullable=True)
nullable = Column(Boolean, nullable=False, default=True)
- dataset = relationship("CatalogDataset", back_populates="columns")
+ dataset = relationship("Dataset", back_populates="columns")
files = relationship(
- "CatalogFile",
+ "File",
secondary=file_columns,
back_populates="columns",
)
@@ -166,7 +141,7 @@ class ColumnDefinition(CatalogTable):
)
-class DatasetGroup(CatalogTable):
+class Group(Base):
"""ORM model for dataset groups, grouping related files within a dataset.
Parameters
@@ -184,6 +159,7 @@ class DatasetGroup(CatalogTable):
"""
__tablename__ = "dataset_groups"
+ __table_args__: tuple = ({"schema": "pysus"},)
id = Column(
Integer,
@@ -200,9 +176,12 @@ class DatasetGroup(CatalogTable):
long_name = Column(String, nullable=False)
description = Column(String, nullable=True)
- dataset = relationship("CatalogDataset", back_populates="groups")
+ dataset = relationship(
+ "Dataset",
+ back_populates="groups",
+ )
files = relationship(
- "CatalogFile",
+ "File",
back_populates="group",
cascade="all, delete-orphan",
)
@@ -213,7 +192,7 @@ class DatasetGroup(CatalogTable):
)
-class CatalogFile(CatalogTable):
+class File(Base):
"""ORM model for the files table, representing individual data files.
Parameters
@@ -230,10 +209,14 @@ class CatalogFile(CatalogTable):
File size in bytes.
rows : int
Number of rows in the file.
+ type : str, optional
+ File type identifier.
modified : datetime
Timestamp of the last known modification.
origin_modified : datetime, optional
Original modification timestamp from the source.
+ origin_size : int
+ Original file size in bytes.
origin_path : str
Original source path of the file.
sha256 : str, optional
@@ -247,6 +230,7 @@ class CatalogFile(CatalogTable):
"""
__tablename__ = "files"
+ __table_args__: tuple = ({"schema": "pysus"},)
id: Mapped[int] = mapped_column(
Integer,
@@ -264,13 +248,15 @@ class CatalogFile(CatalogTable):
)
path: Mapped[str] = mapped_column(String, nullable=False, unique=True)
- size: Mapped[int] = mapped_column(Integer, nullable=False)
+ size: Mapped[int] = mapped_column(BigInteger, nullable=False)
rows: Mapped[int] = mapped_column(Integer, nullable=False)
+ type: Mapped[str] = mapped_column(String, nullable=True)
modified: Mapped[datetime] = mapped_column(DateTime, nullable=False)
origin_modified: Mapped[datetime | None] = mapped_column(
DateTime,
nullable=True,
)
+ origin_size: Mapped[int] = mapped_column(BigInteger, nullable=False)
origin_path: Mapped[str] = mapped_column(String, nullable=False)
sha256: Mapped[str | None] = mapped_column(
String(64),
@@ -294,12 +280,12 @@ class CatalogFile(CatalogTable):
index=True,
)
- dataset: Mapped["CatalogDataset"] = relationship(
- "CatalogDataset",
+ dataset: Mapped["Dataset"] = relationship(
+ "Dataset",
back_populates="files",
)
- group: Mapped[Optional["DatasetGroup"]] = relationship(
- "DatasetGroup",
+ group: Mapped[Optional["Group"]] = relationship(
+ "Group",
back_populates="files",
)
columns: Mapped[list["ColumnDefinition"]] = relationship(
diff --git a/pysus/api/ducklake/catalog/orm/default.py b/pysus/api/ducklake/catalog/orm/default.py
new file mode 100644
index 00000000..bd412080
--- /dev/null
+++ b/pysus/api/ducklake/catalog/orm/default.py
@@ -0,0 +1,43 @@
+"""Central discovery catalog ORM models — stored in ``catalog.db``.
+
+Tracks only available datasets. File-level metadata lives in
+per-dataset ``catalog_.db`` files defined in ``.dataset``.
+"""
+
+from sqlalchemy import Column, Integer, Sequence, String
+from sqlalchemy.orm import DeclarativeBase
+
+
+class Base(DeclarativeBase):
+ """Base class for central catalog ORM models."""
+
+ pass
+
+
+class Dataset(Base):
+ """ORM model for the datasets table — central registry of available
+ datasets.
+
+ Parameters
+ ----------
+ id : int, optional
+ Primary key (auto-generated by sequence).
+ name : str
+ Unique short name for the dataset.
+ long_name : str
+ Human-readable full name.
+ description : str, optional
+ Optional description of the dataset contents.
+ """
+
+ __tablename__ = "datasets"
+ __table_args__: tuple = ({"schema": "pysus"},)
+
+ id = Column(
+ Integer,
+ Sequence("datasets_id_seq", schema="pysus"),
+ primary_key=True,
+ )
+ name = Column(String, nullable=False, unique=True, index=True)
+ long_name = Column(String, nullable=False)
+ description = Column(String, nullable=True)
diff --git a/pysus/api/ducklake/catalog/parsers.py b/pysus/api/ducklake/catalog/parsers.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pysus/api/ducklake/client.py b/pysus/api/ducklake/client.py
index 21e9525b..f7339569 100644
--- a/pysus/api/ducklake/client.py
+++ b/pysus/api/ducklake/client.py
@@ -1,12 +1,12 @@
-"""High-level client for DuckLake S3-based dataset catalog.
+"""High-level client for DuckLake S3-based public health dataset catalog.
-Provides authentication, catalog synchronization, dataset querying,
-and file download capabilities backed by a local DuckDB engine.
+Provides authentication, dataset discovery, and file download
+capabilities backed by per-dataset DuckDB engines.
"""
from collections.abc import Callable
from pathlib import Path
-from typing import Any, Literal
+from typing import Any
import boto3
import httpx
@@ -15,103 +15,15 @@
from pydantic import BaseModel, PrivateAttr, SecretStr
from pysus import CACHEPATH
from pysus.api.models import BaseRemoteClient, BaseRemoteFile
+from pysus.api.types import DUCKLAKE
from sqlalchemy import create_engine
-from sqlalchemy.orm import contains_eager, joinedload, sessionmaker
+from sqlalchemy.orm import sessionmaker
from sqlalchemy.pool import StaticPool
-from .catalog import CatalogDataset, CatalogFile, DatasetGroup
+from .catalog.orm.default import Dataset
from .models import DuckDataset, File
-class CatalogDatasetAdapter:
- """Adapter wrapping a CatalogDataset ORM record for use by File objects.
-
- Parameters
- ----------
- catalog_dataset : CatalogDataset
- The ORM record to wrap.
- ducklake : DuckLake
- The parent DuckLake client instance.
- """
-
- def __init__(self, catalog_dataset: CatalogDataset, ducklake):
- self.name = catalog_dataset.name
- self.long_name = catalog_dataset.long_name or ""
- self.description = catalog_dataset.description or ""
- self.group_definitions: dict[str, str] = {}
- self.ducklake = ducklake
- self.client = ducklake
-
- @property
- def content(self):
- """Query the DuckLake client for files in this dataset.
-
- Returns
- -------
- list
- List of files belonging to this dataset.
- """
- return self.ducklake.query(dataset=self.name.upper())
-
-
-class DatasetGroupAdapter:
- """Adapter wrapping a DatasetGroup ORM record for use by File objects.
-
- Parameters
- ----------
- dataset_group : DatasetGroup
- The ORM record to wrap.
- dataset : CatalogDataset
- The parent dataset.
- """
-
- def __init__(self, dataset_group: DatasetGroup, dataset):
- self.name = dataset_group.name
- self.long_name = dataset_group.long_name or ""
- self.description = dataset_group.description or ""
- self.dataset = dataset
-
- def __str__(self):
- """Return the group name as its string representation.
-
- Returns
- -------
- str
- The short name of the group.
- """
- return self.name
-
- @property
- async def files(self):
- """Return the list of files in this group.
-
- Returns
- -------
- list
- List of file objects in this group.
- """
- return []
-
- async def _fetch_files(self):
- """Fetch files from the remote source for this group."""
- return []
-
- async def search(self, **kwargs):
- """Search for files within this group matching the given criteria.
-
- Parameters
- ----------
- ``**kwargs``
- Arbitrary filter criteria.
-
- Returns
- -------
- list
- List of matching file objects.
- """
- return []
-
-
class DuckLakeCredentials(BaseModel):
"""Credentials for authenticating with the S3-compatible object storage.
@@ -140,8 +52,6 @@ class DuckLake(BaseRemoteClient):
Bucket name containing the catalog.
credentials : DuckLakeCredentials, optional
Credentials for authenticated S3 operations.
- engine : object, optional
- Pre-configured SQLAlchemy engine to reuse.
"""
endpoint: str = "nbg1.your-objectstorage.com"
@@ -149,28 +59,26 @@ class DuckLake(BaseRemoteClient):
bucket: str = "pysus"
credentials: DuckLakeCredentials | None = None
- _cache_dir: Path = PrivateAttr()
- _catalog_local: Path = PrivateAttr()
- _catalog_remote: str = "public/catalog.db"
_s3_client: Any = PrivateAttr(default=None)
- _engine: Any = PrivateAttr(default=None)
_Session: Any = PrivateAttr(default=None)
+ _datasets: list = PrivateAttr(default_factory=list)
- def __init__(self, engine=None, **data):
- """Initialize the DuckLake client with an optional existing engine.
+ def __init__(self, engine=None, **data) -> None:
+ """Initialize the DuckLake client.
Parameters
----------
engine : object, optional
- Pre-configured SQLAlchemy engine instead of creating a new one.
+ Pre-configured SQLAlchemy engine for the discovery catalog.
``**data``
- Additional fields passed to the Pydantic base model.
+ Fields passed to the Pydantic base model.
"""
super().__init__(**data)
self._engine = engine
- self._cache_dir = Path(CACHEPATH) / "ducklake"
+ self._cache_dir: Path = Path(CACHEPATH) / "ducklake"
self._cache_dir.mkdir(parents=True, exist_ok=True)
- self._catalog_local = self._cache_dir / "catalog.db"
+ self._catalog_local: Path = self._cache_dir / "catalog.duckdb"
+ self._catalog_remote: str = "public/catalog.duckdb"
@property
def name(self) -> str:
@@ -181,7 +89,7 @@ def name(self) -> str:
str
The client short name.
"""
- return "DuckLake"
+ return DUCKLAKE
@property
def long_name(self) -> str:
@@ -207,18 +115,18 @@ def description(self) -> str:
@property
def catalog_path(self) -> Path:
- """Return the local path to the downloaded catalog database.
+ """Return the local path to the discovery catalog database.
Returns
-------
Path
- Filesystem path to the local catalog database file.
+ Filesystem path to the local discovery catalog file.
"""
return self._catalog_local
@property
def _catalog_url(self) -> str:
- """Return the remote URL of the catalog database file."""
+ """Return the remote URL of the discovery catalog."""
return f"https://{self.endpoint}/{self.bucket}/{self._catalog_remote}"
@property
@@ -244,16 +152,7 @@ async def datasets(self, **kwargs) -> list[DuckDataset]:
def _fetch():
with self._Session() as session:
- results = (
- session.query(CatalogDataset)
- .options(
- joinedload(CatalogDataset.groups).joinedload(
- DatasetGroup.files
- ),
- joinedload(CatalogDataset.files),
- )
- .all()
- )
+ results = session.query(Dataset).all()
session.expunge_all()
return results
@@ -292,10 +191,19 @@ async def login(
self._get_s3_client,
)
- def _setup_engine(self):
- """Create and configure the DuckDB engine with S3 settings."""
+ def _setup_engine(self, local_path: Path | None = None):
+ """Create and configure a DuckDB engine with S3 settings.
+
+ Parameters
+ ----------
+ local_path : Path, optional
+ Path to the catalog database file.
+ Defaults to the discovery catalog.
+ """
+ if local_path is None:
+ local_path = self._catalog_local
engine = create_engine(
- f"duckdb:///{self._catalog_local}",
+ f"duckdb:///{local_path}",
poolclass=StaticPool,
)
@@ -303,10 +211,8 @@ def _setup_engine(self):
conn.exec_driver_sql("INSTALL ducklake; LOAD ducklake;")
has_pysus = conn.exec_driver_sql(
- """
- SELECT 1 FROM information_schema.schemata WHERE
- schema_name = 'pysus'
- """
+ "SELECT 1 FROM information_schema.schemata"
+ " WHERE schema_name = 'pysus'"
).fetchone()
if has_pysus:
@@ -336,8 +242,8 @@ def _setup_engine(self):
return engine
- async def connect(self, force: bool = False):
- """Connect to the catalog, downloading it first if necessary.
+ async def connect(self, force: bool = False) -> None:
+ """Connect to the discovery catalog, downloading first if needed.
Parameters
----------
@@ -349,66 +255,72 @@ async def connect(self, force: bool = False):
self._Session = sessionmaker(bind=self._engine)
return
- await self._load_catalog()
+ await self._download_catalog(
+ self._catalog_local,
+ self._catalog_remote,
+ )
self._engine = await to_thread.run_sync(self._setup_engine)
self._Session = sessionmaker(bind=self._engine)
- async def close(self):
- """Dispose the engine, then upload the catalog if authenticated.
+ async def close(self, update_catalog: bool = False) -> None:
+ """Close all datasets and dispose the discovery engine.
- Raises
- ------
- PermissionError
- If the client is not authenticated but an upload is required.
+ Parameters
+ ----------
+ update_catalog : bool, optional
+ Whether to upload all per-dataset catalogs before closing.
+ Requires authenticated credentials.
"""
+ if update_catalog:
+ await self._upload_catalog()
+
+ datasets: list["DuckDataset"] = list(self._datasets)
+ for ds in datasets:
+ await ds.close(update_catalog=update_catalog)
+ self._datasets.clear()
+
if self._engine:
await to_thread.run_sync(self._engine.dispose)
-
self._engine = None
self._Session = None
-
- if self._is_authenticated:
- await self._upload_catalog()
-
self._s3_client = None
- async def _download_file(
+ async def _download(
self,
- file: BaseRemoteFile,
- output: Path,
+ remote_path: str,
+ local_path: Path,
+ *,
callback: Callable[[int, int], None] | None = None,
- ) -> Path:
- """Download a single file from object storage to the local path."""
- if not isinstance(file, File):
- raise ValueError("FTP File was not properly instantiated")
-
- url = f"https://{self.endpoint}/{self.bucket}/{file.record.path}"
- async with httpx.AsyncClient(follow_redirects=True) as client:
- async with client.stream("GET", url) as r:
- r.raise_for_status()
- total = int(r.headers.get("Content-Length", 0))
- downloaded = 0
- with open(output, "wb") as f:
- async for chunk in r.aiter_bytes(chunk_size=1024 * 1024):
- await to_thread.run_sync(f.write, chunk)
- downloaded += len(chunk)
- if callback:
- callback(downloaded, total)
- return output
+ ) -> None:
+ """Download *remote_path* to *local_path* with streaming and retries.
- async def _download_catalog(self, client: httpx.AsyncClient):
- """Download the catalog database from remote storage with retries."""
+ Parameters
+ ----------
+ remote_path : str
+ Object key within the bucket.
+ local_path : Path
+ Local destination path.
+ callback : Callable[[int, int], None], optional
+ Progress callback receiving ``(downloaded, total)`` bytes.
+ """
+ url = f"https://{self.endpoint}/{self.bucket}/{remote_path}"
max_retries = 5
for attempt in range(max_retries):
try:
- async with client.stream("GET", self._catalog_url) as r:
- r.raise_for_status()
- with open(self._catalog_local, "wb") as f:
- async for chunk in r.aiter_bytes(
- chunk_size=1024 * 1024,
- ):
- await to_thread.run_sync(f.write, chunk)
+ async with httpx.AsyncClient(follow_redirects=True) as client:
+ async with client.stream("GET", url) as r:
+ r.raise_for_status()
+ total = int(r.headers.get("Content-Length", 0))
+ downloaded = 0
+ with open(local_path, "wb") as f:
+ async for chunk in r.aiter_bytes(
+ chunk_size=1024 * 1024,
+ ):
+ await to_thread.run_sync(f.write, chunk)
+ downloaded += len(chunk)
+ if callback:
+ callback(downloaded, total)
return
except OSError as e:
if attempt < max_retries - 1:
@@ -416,6 +328,54 @@ async def _download_catalog(self, client: httpx.AsyncClient):
else:
raise e
+ async def _download_catalog(
+ self, local_path: Path, remote_path: str
+ ) -> None:
+ """Download a catalog database from remote storage with retries.
+
+ Parameters
+ ----------
+ local_path : Path
+ Local destination path for the catalog file.
+ remote_path : str
+ Remote object key within the bucket.
+ """
+ url = f"https://{self.endpoint}/{self.bucket}/{remote_path}"
+
+ if local_path.exists():
+ try:
+ local_size = local_path.stat().st_size
+ except OSError:
+ local_size = -1
+ else:
+ local_size = -1
+
+ async with httpx.AsyncClient(follow_redirects=True) as client:
+ try:
+ head = await client.head(url)
+ head.raise_for_status()
+ remote_size = int(head.headers.get("content-length", 0))
+ except Exception: # noqa: B902
+ remote_size = 0
+
+ if remote_size == local_size:
+ return
+
+ await self._download(remote_path, local_path)
+
+ async def _download_file(
+ self,
+ file: BaseRemoteFile,
+ output: Path,
+ callback: Callable[[int, int], None] | None = None,
+ ) -> Path:
+ """Download a single file from object storage to the local path."""
+ if not isinstance(file, File):
+ raise ValueError("FTP File was not properly instantiated")
+
+ await self._download(file.record.path, output, callback=callback)
+ return output
+
def _get_s3_client(self):
"""Create and return a boto3 S3 client for the configured endpoint."""
if not self.credentials:
@@ -431,142 +391,32 @@ def _get_s3_client(self):
config=Config(signature_version="s3v4"),
)
- async def _load_catalog(self):
- """Download remote catalog if the local copy is outdated or missing."""
- async with httpx.AsyncClient(follow_redirects=True) as client:
- local_size = -1
- if self._catalog_local.exists():
- try:
- local_size = self._catalog_local.stat().st_size
- except OSError:
- pass
- try:
- head = await client.head(self._catalog_url)
- head.raise_for_status()
- remote_size = int(head.headers.get("content-length", 0))
- except Exception: # noqa: B902
- remote_size = 0
- if remote_size != local_size:
- await self._download_catalog(client)
+ async def _upload_catalog(self) -> None:
+ """Upload all per-dataset catalogs to remote storage.
- async def _upload_catalog(self):
- """Upload the local catalog database to remote storage."""
- if not self._is_authenticated:
+ Requires authenticated credentials.
+ """
+ if not self.credentials:
raise PermissionError(
"Admin credentials required to upload catalog.",
)
- def _upload():
- self._s3_client.upload_file(
- str(self._catalog_local),
- self.bucket,
- self._catalog_remote,
- )
-
- await to_thread.run_sync(_upload)
-
- async def query(
- self,
- client: Literal["FTP", "DadosGov"] | None = None,
- dataset: str | None = None,
- group: str | None = None,
- state: str | None = None,
- year: int | None = None,
- month: int | None = None,
- ) -> list[File]:
- """Filter catalog files by client, dataset, group, state, year.
+ datasets = await self.datasets()
+ for ds in datasets:
+ if not ds._catalog_local.exists():
+ continue
- Parameters
- ----------
- client : Literal["FTP", "DadosGov"], optional
- Source client to filter by.
- dataset : str, optional
- Dataset name to filter by.
- group : str, optional
- Group name pattern to filter by (case-insensitive ILIKE).
- state : str, optional
- Two-letter state code to filter by.
- year : int, optional
- Year to filter by.
- month : int, optional
- Month to filter by.
+ _local = str(ds._catalog_local)
+ _name = ds._catalog_name
- Returns
- -------
- list[:class:`~pysus.api.ducklake.models.File`]
- List of matching file objects.
- """
- if not self._Session:
- await self.connect()
-
- def _query():
- with self._Session() as session:
- q = session.query(CatalogFile)
-
- if dataset:
- q = (
- q.join(CatalogFile.dataset)
- .options(contains_eager(CatalogFile.dataset))
- .filter(CatalogDataset.name == dataset.lower())
- )
- else:
- q = q.options(joinedload(CatalogFile.dataset))
-
- if group:
- q = (
- q.join(CatalogFile.group)
- .options(contains_eager(CatalogFile.group))
- .filter(DatasetGroup.name.ilike(group))
- )
- else:
- q = q.options(joinedload(CatalogFile.group))
-
- if state:
- q = q.filter(CatalogFile.state == state.upper())
-
- if year:
- q = q.filter(CatalogFile.year == year)
-
- if month:
- q = q.filter(CatalogFile.month == month)
+ def _upload(local=_local, name=_name):
+ self._s3_client.upload_file(
+ local,
+ self.bucket,
+ name,
+ )
- results = q.all()
- session.expunge_all()
- return results
+ await to_thread.run_sync(_upload)
- records = await to_thread.run_sync(_query)
- if client:
- prefix = f"public/data/{client.lower()}/"
- records = [r for r in records if r.path.startswith(prefix)]
- else:
- ftp = [r for r in records if r.path.startswith("public/data/ftp/")]
- dadosgov = [
- r for r in records if r.path.startswith("public/data/dadosgov/")
- ]
- ftp_keys = set()
- for r in ftp:
- stem = Path(r.path).stem
- key = (r.dataset_id, r.year, r.month, stem)
- ftp_keys.add(key)
-
- def has_ftp_match(r):
- stem = Path(r.path).stem
- if stem.endswith(".csv"):
- stem = stem[:-4]
- key = (r.dataset_id, r.year, r.month, stem)
- return key in ftp_keys
-
- records = ftp + [r for r in dadosgov if not has_ftp_match(r)]
-
- return [
- File(
- path=r.path,
- record=r,
- dataset=CatalogDatasetAdapter(r.dataset, self),
- group=(
- DatasetGroupAdapter(r.group, r.dataset) if r.group else None
- ),
- )
- for r in records
- ]
+DuckDataset.model_rebuild(_types_namespace={"DuckLake": DuckLake})
diff --git a/pysus/api/ducklake/models.py b/pysus/api/ducklake/models.py
index 306a96f6..c9c4d3e5 100644
--- a/pysus/api/ducklake/models.py
+++ b/pysus/api/ducklake/models.py
@@ -8,19 +8,19 @@
from collections.abc import Callable
from datetime import datetime
from pathlib import Path
-from typing import Any, Union
+from typing import TYPE_CHECKING, Any, Optional, Union
-import anyio
-from pydantic import Field
+from anyio import to_thread
+from pydantic import Field, PrivateAttr
from pysus import CACHEPATH
-from pysus.api.models import (
- BaseRemoteClient,
- BaseRemoteDataset,
- BaseRemoteFile,
- BaseRemoteGroup,
-)
+from pysus.api.ducklake.catalog.orm.dataset import Dataset
+from pysus.api.ducklake.catalog.orm.dataset import File as CatalogFile
+from pysus.api.ducklake.catalog.orm.dataset import Group
+from pysus.api.models import BaseRemoteDataset, BaseRemoteFile, BaseRemoteGroup
+from sqlalchemy.orm import contains_eager, joinedload, sessionmaker
-from .catalog import CatalogDataset, CatalogFile, DatasetGroup
+if TYPE_CHECKING: # pragma: no cover
+ from .client import DuckLake
class File(BaseRemoteFile):
@@ -39,9 +39,18 @@ class File(BaseRemoteFile):
"""
record: CatalogFile = Field(exclude=True)
- type: str = "remote"
- dataset: Any
- group: Any = None
+ group: Optional["DuckGroup"] = Field(default=None, exclude=True)
+
+ def __init__(self, **data: Any) -> None:
+ record = data.pop("record")
+ group = data.pop("group", None)
+ super().__init__(
+ path=Path(record.path),
+ type=record.type or "remote",
+ record=record, # type: ignore[call-arg]
+ group=group,
+ **data,
+ )
@property
def basename(self) -> str:
@@ -147,23 +156,36 @@ def _calculate():
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()
- actual_hash = await anyio.to_thread.run_sync(_calculate)
+ actual_hash = await to_thread.run_sync(_calculate)
return actual_hash == self.sha256
class DuckDataset(BaseRemoteDataset):
"""A dataset from the DuckLake catalog, containing groups and files.
+ Each dataset manages its own DuckDB engine connected to a
+ per-dataset catalog file (``catalog_.db``).
+
Parameters
----------
- record : CatalogDataset
+ record : Dataset
The underlying ORM record.
client : BaseRemoteClient
The parent client instance.
"""
- record: CatalogDataset = Field(exclude=True)
- client: BaseRemoteClient = Field(exclude=True)
+ record: Dataset = Field(exclude=True)
+ client: "DuckLake" = Field(exclude=True)
+
+ _engine: Any = PrivateAttr(default=None)
+ _Session: Any = PrivateAttr(default=None)
+
+ def __init__(self, **data) -> None:
+ super().__init__(**data)
+ self._cache_dir: Path = Path(CACHEPATH) / "ducklake"
+ self._cache_dir.mkdir(parents=True, exist_ok=True)
+ self._catalog_name: str = f"catalog_{self.record.name.lower()}.duckdb"
+ self._catalog_local: Path = self._cache_dir / self._catalog_name
def __repr__(self) -> str:
"""Return a string representation of the dataset.
@@ -184,7 +206,7 @@ def name(self) -> str:
str
The dataset short name.
"""
- return self.record.name
+ return self.record.name # type: ignore
@property
def long_name(self) -> str:
@@ -195,11 +217,7 @@ def long_name(self) -> str:
str
The dataset display name, falling back to the short name.
"""
- return (
- self.record.dataset_metadata.long_name
- if self.record.dataset_metadata
- else self.name
- )
+ return "" # TODO:
@property
def description(self) -> str:
@@ -210,30 +228,171 @@ def description(self) -> str:
str
The dataset description, or an empty string if unavailable.
"""
- return (
- self.record.dataset_metadata.description
- if self.record.dataset_metadata
- else ""
+ return "" # TODO:
+
+ @property
+ def catalog_path(self) -> Path:
+ """Return the local path to the downloaded catalog database.
+
+ Returns
+ -------
+ Path
+ Filesystem path to the local catalog database file.
+ """
+ return self._catalog_local
+
+ async def connect(
+ self,
+ force: bool = False,
+ callback: Callable[[int, int], None] | None = None,
+ ) -> None:
+ """Connect to the catalog, downloading it first if necessary.
+
+ Parameters
+ ----------
+ force : bool, optional
+ Whether to re-download and re-connect even if already connected.
+ """
+ if self._engine and not force:
+ if not self._Session:
+ self._Session = sessionmaker(bind=self._engine)
+ return
+
+ if self not in self.client._datasets:
+ self.client._datasets.append(self)
+
+ await self.client._download(
+ f"public/{self._catalog_name}",
+ self._catalog_local,
+ callback=callback,
)
+ self._engine = await to_thread.run_sync(
+ lambda: self.client._setup_engine(self._catalog_local)
+ )
+ self._Session = sessionmaker(bind=self._engine)
+
+ async def close(self, update_catalog: bool = False):
+ """Dispose the engine, optionally uploading the per-dataset catalog.
+
+ Parameters
+ ----------
+ update_catalog : bool, optional
+ Whether to upload the per-dataset catalog to remote storage.
+ Requires the parent client to be authenticated.
+ """
+ if self._engine:
+ await to_thread.run_sync(self._engine.dispose)
+ self._engine = None
+ self._Session = None
+
+ if update_catalog and self.client._is_authenticated:
+ await self._upload_catalog()
+
+ async def _upload_catalog(self):
+ """Upload the per-dataset catalog to remote storage."""
+ if not self.client.credentials:
+ raise PermissionError(
+ "Admin credentials required to upload catalog.",
+ )
+
+ def _upload():
+ self.client._s3_client.upload_file(
+ str(self._catalog_local),
+ self.client.bucket,
+ f"catalog_{self.record.name.lower()}.duckdb",
+ )
+
+ await to_thread.run_sync(_upload)
+
+ async def query(
+ self,
+ group: str | None = None,
+ state: str | None = None,
+ year: int | None = None,
+ month: int | None = None,
+ ) -> list[File]:
+ """Filter files in this dataset's catalog by group, state, year, month.
+
+ Parameters
+ ----------
+ group : str, optional
+ Group name pattern to filter by (case-insensitive ILIKE).
+ state : str, optional
+ Two-letter state code to filter by.
+ year : int, optional
+ Year to filter by.
+ month : int, optional
+ Month to filter by.
+
+ Returns
+ -------
+ list[File]
+ List of matching file objects.
+ """
+ if not self._Session:
+ await self.connect()
+
+ def _query() -> list[CatalogFile]:
+ with self._Session() as session:
+ q = session.query(CatalogFile).options(
+ joinedload(CatalogFile.group),
+ joinedload(CatalogFile.dataset),
+ )
+ if group:
+ q = (
+ q.join(CatalogFile.group)
+ .options(contains_eager(CatalogFile.group))
+ .filter(Group.name.ilike(group))
+ )
+ if state:
+ q = q.filter(CatalogFile.state == state.upper())
+ if year:
+ q = q.filter(CatalogFile.year == year)
+ if month:
+ q = q.filter(CatalogFile.month == month)
+ results = q.all()
+ session.expunge_all()
+ return results
+
+ records: list[CatalogFile] = await to_thread.run_sync(_query)
+ return [File(record=r, dataset=self) for r in records]
async def _fetch_content(self) -> list[Union["DuckGroup", File]]:
"""Fetch groups and files belonging to this dataset."""
+ if not self._Session:
+ await self.connect()
+
+ def _fetch():
+ with self._Session() as session:
+ dataset = (
+ session.query(Dataset)
+ .options(
+ joinedload(Dataset.groups).joinedload(Group.files),
+ joinedload(Dataset.files),
+ )
+ .filter(Dataset.name == self.record.name)
+ .first()
+ )
+ if not dataset:
+ return [], []
+ session.expunge_all()
+ return dataset.groups, dataset.files
+
+ groups, files = await to_thread.run_sync(_fetch)
+
items: list[Union["DuckGroup", File]] = []
- if self.record.groups:
- items.extend(
- [DuckGroup(record=g, dataset=self) for g in self.record.groups]
- )
+ if groups:
+ items.extend([DuckGroup(record=g, dataset=self) for g in groups])
- if self.record.files:
+ if files:
items.extend(
[
File(
- path=f.path,
record=f,
dataset=self,
)
- for f in self.record.files
+ for f in files
]
)
@@ -245,13 +404,13 @@ class DuckGroup(BaseRemoteGroup):
Parameters
----------
- record : DatasetGroup
+ record : Group
The underlying ORM record.
dataset : DuckDataset
The parent dataset instance.
"""
- record: DatasetGroup = Field(exclude=True)
+ record: Group = Field(exclude=True)
dataset: DuckDataset = Field(exclude=True)
@property
@@ -263,7 +422,7 @@ def name(self) -> str:
str
The group short name.
"""
- return self.record.name
+ return self.record.name # type: ignore
@property
def long_name(self) -> str:
@@ -274,11 +433,7 @@ def long_name(self) -> str:
str
The group display name, falling back to the short name.
"""
- return (
- self.record.group_metadata.long_name
- if self.record.group_metadata
- else self.name
- )
+ return self.record.long_name or self.name # type: ignore
@property
def description(self) -> str:
@@ -289,15 +444,12 @@ def description(self) -> str:
str
The group description, or an empty string if unavailable.
"""
- if self.record.group_metadata:
- return self.record.group_metadata.description
- return ""
+ return self.record.description # type: ignore
async def _fetch_files(self) -> list[BaseRemoteFile]:
"""Fetch the list of files belonging to this group."""
files: list[BaseRemoteFile] = [
File(
- path=f.path,
record=f,
group=self,
dataset=self.dataset,
diff --git a/pysus/api/extensions.py b/pysus/api/extensions.py
index 59de4dd7..bdbed991 100644
--- a/pysus/api/extensions.py
+++ b/pysus/api/extensions.py
@@ -10,7 +10,6 @@
from collections.abc import AsyncGenerator, Callable
from datetime import datetime
from pathlib import Path
-from typing import ClassVar
import chardet
import pandas as pd
@@ -19,17 +18,46 @@
from anyio import to_thread
from dbfread import DBF as DBFReader
from pydantic import Field, PrivateAttr
+from pyreaddbc import dbc2dbf
from pysus import CACHEPATH
+from pysus.api.metadata.models import Column
from pysus.api.models import BaseCompressedFile, BaseLocalFile, BaseTabularFile
from .types import FileType
-try:
- from pyreaddbc import dbc2dbf
-
- DBC_IMPORT = True
-except ImportError:
- DBC_IMPORT = False
+_DTYPE_MAP: dict[str, str] = {
+ "int8": "INTEGER",
+ "int16": "INTEGER",
+ "int32": "INTEGER",
+ "int64": "BIGINT",
+ "uint8": "INTEGER",
+ "uint16": "INTEGER",
+ "uint32": "INTEGER",
+ "uint64": "BIGINT",
+ "float": "FLOAT",
+ "float16": "FLOAT",
+ "float32": "FLOAT",
+ "float64": "DOUBLE",
+ "double": "DOUBLE",
+ "bool": "BOOLEAN",
+ "bool_": "BOOLEAN",
+ "date32": "DATE",
+ "date64": "DATE",
+ "date": "DATE",
+ "datetime64[ns]": "DATE",
+ "object": "VARCHAR",
+ "string": "VARCHAR",
+ "utf8": "VARCHAR",
+ "large_string": "VARCHAR",
+}
+
+
+def _map_dtype(raw: str) -> str:
+ raw_lower = raw.lower().split("[")[0].split("(")[0].strip()
+ for key, val in _DTYPE_MAP.items():
+ if raw_lower == key or raw_lower.startswith(key):
+ return val
+ return "VARCHAR"
class File(BaseLocalFile):
@@ -97,8 +125,9 @@ class CSV(BaseTabularFile):
_sep: str | None = PrivateAttr(default=None)
@property
- def columns(self) -> list[str]:
- """Return the column names from the CSV header row."""
+ def columns(self) -> list["Column"]:
+ """Return the column metadata from the CSV header row."""
+
if self._encoding is not None:
enc = self._encoding
else:
@@ -112,7 +141,10 @@ def columns(self) -> list[str]:
)
self._encoding = enc
df = pd.read_csv(self.path, sep=",", nrows=0, encoding=enc)
- return df.columns.tolist()
+ return [
+ Column.from_schema(name=col, dtype=_map_dtype(str(dt)))
+ for col, dt in zip(df.columns, df.dtypes)
+ ]
@property
def rows(self) -> int:
@@ -207,9 +239,16 @@ def schema(self) -> pa.Schema:
return pq.read_schema(self.path)
@property
- def columns(self) -> list[str]:
- """Return the column names from the Parquet schema."""
- return pq.read_schema(self.path).names
+ def columns(self) -> list["Column"]:
+ """Return the column metadata from the Parquet schema."""
+
+ schema = pq.read_schema(self.path)
+ return [
+ Column.from_schema(
+ name=field.name, dtype=_map_dtype(str(field.type))
+ )
+ for field in schema
+ ]
@property
def rows(self) -> int:
@@ -298,9 +337,24 @@ class DBF(BaseTabularFile):
type: FileType = Field("DBF")
@property
- def columns(self) -> list[str]:
- """Return the field names from the DBF file."""
- return DBFReader(self.path, load=False).field_names
+ def columns(self) -> list["Column"]:
+ """Return the column metadata from the DBF file."""
+
+ reader = DBFReader(self.path, load=False)
+ _DBF_DTYPE = {
+ "C": "VARCHAR",
+ "N": "INTEGER",
+ "F": "FLOAT",
+ "D": "DATE",
+ "L": "BOOLEAN",
+ "M": "VARCHAR",
+ }
+ return [
+ Column.from_schema(
+ name=f.name, dtype=_DBF_DTYPE.get(f.type, "VARCHAR")
+ )
+ for f in reader.fields
+ ]
@property
def rows(self) -> int:
@@ -440,7 +494,7 @@ class DBC(BaseTabularFile):
type: FileType = Field("DBC")
@property
- def columns(self) -> list[str]:
+ def columns(self) -> list["Column"]:
"""Not supported for DBC files. Convert to Parquet first."""
raise NotImplementedError(
"DBC metadata cannot be read directly. Convert to Parquet first."
@@ -473,7 +527,8 @@ async def to_parquet(
chunk_size: int = 30000,
callback: Callable[[int, int], None] | None = None,
) -> "Parquet":
- """Decompress DBC to DBF, then convert to Parquet."""
+ import gc
+
from pysus.api.extensions import ExtensionFactory
if output_path is None:
@@ -501,9 +556,21 @@ async def to_parquet(
chunk_size=chunk_size,
callback=callback,
)
+ except Exception as err: # noqa
+ if "dbf_ext" in locals():
+ del dbf_ext
+ gc.collect()
+ raise err
finally:
if tmp_dbf_path.exists():
- await to_thread.run_sync(tmp_dbf_path.unlink)
+ try:
+ await to_thread.run_sync(tmp_dbf_path.unlink)
+ except PermissionError:
+ gc.collect()
+ try:
+ await to_thread.run_sync(tmp_dbf_path.unlink)
+ except PermissionError:
+ pass
class JSON(BaseTabularFile):
@@ -512,14 +579,18 @@ class JSON(BaseTabularFile):
type: FileType = Field("JSON")
@property
- def columns(self) -> list[str]:
- """Return the column names from the JSON file."""
+ def columns(self) -> list["Column"]:
+ """Return the column metadata from the JSON file."""
+
df = (
pd.read_json(self.path, nrows=0)
if self.path.stat().st_size > 0
else pd.DataFrame()
)
- return df.columns.tolist()
+ return [
+ Column.from_schema(name=col, dtype=_map_dtype(str(dt)))
+ for col, dt in zip(df.columns, df.dtypes)
+ ]
@property
def rows(self) -> int:
@@ -773,76 +844,6 @@ def _extract():
return list(await asyncio.gather(*tasks))
-class DBCNotImported(BaseTabularFile):
- """Placeholder for DBC files when optional dependency is not installed."""
-
- path: Path = Field(default_factory=lambda: Path("..."))
- type: str | FileType = Field(default="remote")
- import_err: ClassVar[
- str
- ] = """
- run "pip install pysus[dbc]" to handle DBC files.
- Make sure you also have libffi installed on the system. It may not work
- on Windows
- """
-
- @property
- def name(self) -> str:
- """Raise ImportError indicating the missing DBC dependency."""
- raise ImportError(self.import_err)
-
- @property
- def extension(self) -> str:
- """Return the .dbc extension."""
- return ".dbc"
-
- @property
- def size(self) -> int:
- """Raise ImportError indicating the missing DBC dependency."""
- raise ImportError(self.import_err)
-
- @property
- def modify(self) -> datetime:
- """Raise ImportError indicating the missing DBC dependency."""
- raise ImportError(self.import_err)
-
- @property
- def columns(self) -> list[str]:
- """Raise ImportError indicating the missing DBC dependency."""
- raise ImportError(self.import_err)
-
- @property
- def rows(self) -> int:
- """Raise ImportError indicating the missing DBC dependency."""
- raise ImportError(self.import_err)
-
- async def load(self) -> pd.DataFrame:
- """Raise ImportError indicating the missing DBC dependency."""
- raise ImportError(self.import_err)
-
- def stream(
- self,
- chunk_size: int = 10000,
- ) -> AsyncGenerator[pd.DataFrame, None]:
- """Raise ImportError indicating the missing DBC dependency."""
-
- async def _internal_gen():
- """Yield nothing; always raises ImportError."""
- raise ImportError(self.import_err)
- yield pd.DataFrame()
-
- return _internal_gen()
-
- async def to_parquet(
- self,
- output_path: str | Path | None = None,
- chunk_size: int = 10000,
- callback: Callable[[int, int], None] | None = None,
- ) -> Parquet:
- """Raise ImportError indicating the missing DBC dependency."""
- raise ImportError(self.import_err)
-
-
class ExtensionFactory:
"""Factory that maps file extensions and MIME types to handler classes."""
@@ -864,7 +865,7 @@ class ExtensionFactory:
".csv": CSV,
".parquet": Parquet,
".dbf": DBF,
- ".dbc": DBC if DBC_IMPORT else DBCNotImported, # type: ignore
+ ".dbc": DBC,
".pdf": PDF,
".json": JSON,
}
diff --git a/pysus/api/ftp/README.md b/pysus/api/ftp/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/pysus/api/ftp/client.py b/pysus/api/ftp/client.py
index 76872e54..5c265d0a 100644
--- a/pysus/api/ftp/client.py
+++ b/pysus/api/ftp/client.py
@@ -11,8 +11,9 @@
from anyio import to_thread
from pydantic import PrivateAttr
from pysus.api.models import BaseRemoteClient, BaseRemoteFile
+from pysus.api.types import FTP as FTP_STR
-if TYPE_CHECKING:
+if TYPE_CHECKING: # pragma: no cover
from pysus.api.ftp.models import Dataset
from pysus.api.types import State
@@ -55,7 +56,7 @@ def name(self) -> str:
str
The client short name ("FTP").
"""
- return "FTP"
+ return FTP_STR
@property
def long_name(self) -> str:
diff --git a/pysus/api/metadata/__init__.py b/pysus/api/metadata/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pysus/api/metadata/models.py b/pysus/api/metadata/models.py
new file mode 100644
index 00000000..62a20bc9
--- /dev/null
+++ b/pysus/api/metadata/models.py
@@ -0,0 +1,80 @@
+from dataclasses import dataclass, field
+
+from pysus.api.types import ColumnType, Origin
+
+
+def lookup_column_meta(name: str) -> dict[str, str] | None:
+ """Look up column metadata from the global columns.py constants.
+
+ Returns the {dataset: description} dict if the column name exists
+ as a constant in columns.py, or None if not found.
+ """
+ try:
+ from pysus.api.ducklake.catalog import columns as _cols
+
+ return getattr(_cols, name.upper(), None)
+ except ImportError:
+ return None
+
+
+def pick_description(meta: dict[str, str] | None) -> str:
+ """Pick the best description from a column metadata dict."""
+ if meta is None:
+ return ""
+ for desc in meta.values():
+ if desc:
+ return desc
+ return ""
+
+
+@dataclass
+class Dataset:
+ name: str
+ long_name: str
+ description: str
+
+
+@dataclass
+class DatasetGroup:
+ name: str
+ long_name: str
+ description: str
+
+
+@dataclass
+class FileMeta:
+ name: str
+ path: str
+ size: int
+ state: str | None = None
+ uf: str | None = None
+ year: int | None = None
+ month: int | None = None
+ origin_path: str | None = None
+ origin_size: int | None = None
+
+
+@dataclass
+class File:
+ origin: Origin
+ dataset: Dataset | None = None
+ group: DatasetGroup | None = None
+ columns: list["Column"] = field(default_factory=list)
+ _meta: FileMeta | None = None
+
+
+@dataclass
+class Column:
+ name: str
+ description: str
+ dtype: ColumnType
+
+ @classmethod
+ def from_schema(cls, name: str, dtype: ColumnType) -> "Column":
+ """Create a Column from a file schema, looking up description from
+ columns.py."""
+ return cls(
+ name=name,
+ description=pick_description(lookup_column_meta(name)),
+ dtype=dtype,
+ )
diff --git a/pysus/api/metadata/report.py b/pysus/api/metadata/report.py
new file mode 100644
index 00000000..1f1f0fe8
--- /dev/null
+++ b/pysus/api/metadata/report.py
@@ -0,0 +1,10 @@
+class Header:
+ """..."""
+
+
+class Columns:
+ """..."""
+
+
+class Footer:
+ """..."""
diff --git a/pysus/api/models.py b/pysus/api/models.py
index 6f9029a0..9f0c0967 100644
--- a/pysus/api/models.py
+++ b/pysus/api/models.py
@@ -27,8 +27,9 @@
from .types import FileType, State
-if TYPE_CHECKING:
+if TYPE_CHECKING: # pragma: no cover
from extensions import Parquet
+ from pysus.api.metadata.models import Column
class BaseFile(BaseModel, ABC):
@@ -151,8 +152,8 @@ class BaseTabularFile(BaseLocalFile, ABC):
@property
@abstractmethod
- def columns(self) -> list[str]:
- """Return the list of column names."""
+ def columns(self) -> list[Column]:
+ """Return the list of column metadata."""
@property
@abstractmethod
@@ -212,49 +213,52 @@ async def to_parquet(
)
try:
- async for chunk in self.stream(
- chunk_size=chunk_size,
- ): # type: ignore
- if chunk.empty:
- continue
-
- rows_in_chunk = len(chunk)
- current_rows += rows_in_chunk
-
- table = await to_thread.run_sync(
- pa.Table.from_pandas,
- chunk,
- )
-
- schema = table.schema
- if any(pa.types.is_null(f.type) for f in schema):
- new_fields = [
- (
- pa.field(f.name, pa.string(), nullable=True)
- if pa.types.is_null(f.type)
- else f
- )
- for f in schema
- ]
- table = table.cast(pa.schema(new_fields))
-
- if writer is None:
- writer = await to_thread.run_sync(
- pq.ParquetWriter, output_path, table.schema
+ try:
+ async for chunk in self.stream(
+ chunk_size=chunk_size,
+ ):
+ if chunk.empty:
+ continue
+
+ rows_in_chunk = len(chunk)
+ current_rows += rows_in_chunk
+
+ table = await to_thread.run_sync(
+ pa.Table.from_pandas,
+ chunk,
)
- await to_thread.run_sync(writer.write_table, table)
+ schema = table.schema
+ if any(pa.types.is_null(f.type) for f in schema):
+ new_fields = [
+ (
+ pa.field(f.name, pa.string(), nullable=True)
+ if pa.types.is_null(f.type)
+ else f
+ )
+ for f in schema
+ ]
+ table = table.cast(pa.schema(new_fields))
+
+ if writer is None:
+ writer = await to_thread.run_sync(
+ pq.ParquetWriter, output_path, table.schema
+ )
+
+ await to_thread.run_sync(writer.write_table, table)
- pbar.update(rows_in_chunk)
+ pbar.update(rows_in_chunk)
- if callback:
- callback(current_rows, total_rows)
+ if callback:
+ callback(current_rows, total_rows)
- await asyncio.sleep(0)
+ await asyncio.sleep(0)
+ finally:
+ if writer:
+ await to_thread.run_sync(writer.close)
+ writer = None
finally:
pbar.close()
- if writer:
- await to_thread.run_sync(writer.close)
output = await ExtensionFactory.instantiate(output_path)
if not isinstance(output, Parquet):
diff --git a/pysus/api/types.py b/pysus/api/types.py
index 2ed8f95e..2e3708a8 100644
--- a/pysus/api/types.py
+++ b/pysus/api/types.py
@@ -1,53 +1,128 @@
-"""Type aliases used across the PySUS API.
-
-FileType:
- Discriminated union of supported local file types
- (FILE, DIR, PARQUET, CSV, JSON, PDF, DBC, DBF, ZIP).
-
-State:
- Brazilian state abbreviations (AC, AL, AP, ..., DF).
-"""
-
-from typing import Literal
-
-FileType = Literal[
- "FILE",
- "DIR",
- "PARQUET",
- "CSV",
- "JSON",
- "PDF",
- "DBC",
- "DBF",
- "ZIP",
-]
-
-State = Literal[
- "AC",
- "AL",
- "AP",
- "AM",
- "BA",
- "CE",
- "ES",
- "GO",
- "MA",
- "MT",
- "MS",
- "MG",
- "PA",
- "PB",
- "PR",
- "PE",
- "PI",
- "RJ",
- "RN",
- "RS",
- "RO",
- "RR",
- "SC",
- "SP",
- "SE",
- "TO",
- "DF",
-]
+from typing import Annotated, TypeAlias
+
+from pydantic import AfterValidator
+
+
+def _validate_origin(v: str) -> str:
+ valid = (FTP, DADOSGOV, DUCKLAKE)
+ assert v in valid, f"Invalid origin: {v!r}"
+ return v
+
+
+def _validate_column_type(v: str) -> str:
+ valid = (
+ "VARCHAR",
+ "INTEGER",
+ "BIGINT",
+ "FLOAT",
+ "DOUBLE",
+ "BOOLEAN",
+ "DATE",
+ )
+ assert v in valid, f"Invalid column type: {v!r}"
+ return v
+
+
+def _validate_file_type(v: str) -> str:
+ valid = (
+ "FILE",
+ "DIR",
+ "PARQUET",
+ "CSV",
+ "JSON",
+ "PDF",
+ "DBC",
+ "DBF",
+ "ZIP",
+ )
+ assert v in valid, f"Invalid file type: {v!r}"
+ return v
+
+
+def _validate_dataset_name(v: str) -> str:
+ valid = (
+ "SINAN",
+ "SINASC",
+ "SIM",
+ "SIH",
+ "SIA",
+ "PNI",
+ "IBGE",
+ "CNES",
+ "CIHA",
+ )
+ assert v in valid, f"Invalid dataset name: {v!r}"
+ return v
+
+
+def _validate_state(v: str) -> str:
+ valid = (
+ "AC",
+ "AL",
+ "AP",
+ "AM",
+ "BA",
+ "CE",
+ "ES",
+ "GO",
+ "MA",
+ "MT",
+ "MS",
+ "MG",
+ "PA",
+ "PB",
+ "PR",
+ "PE",
+ "PI",
+ "RJ",
+ "RN",
+ "RS",
+ "RO",
+ "RR",
+ "SC",
+ "SP",
+ "SE",
+ "TO",
+ "DF",
+ )
+ assert v in valid, f"Invalid state: {v!r}"
+ return v
+
+
+FTP: Annotated[str, AfterValidator(_validate_origin)] = "FTP"
+DADOSGOV: Annotated[str, AfterValidator(_validate_origin)] = "DadosGov"
+DUCKLAKE: Annotated[str, AfterValidator(_validate_origin)] = "DuckLake"
+
+VARCHAR: Annotated[str, AfterValidator(_validate_column_type)] = "VARCHAR"
+INTEGER: Annotated[str, AfterValidator(_validate_column_type)] = "INTEGER"
+BIGINT: Annotated[str, AfterValidator(_validate_column_type)] = "BIGINT"
+FLOAT: Annotated[str, AfterValidator(_validate_column_type)] = "FLOAT"
+DOUBLE: Annotated[str, AfterValidator(_validate_column_type)] = "DOUBLE"
+BOOLEAN: Annotated[str, AfterValidator(_validate_column_type)] = "BOOLEAN"
+DATE: Annotated[str, AfterValidator(_validate_column_type)] = "DATE"
+
+FILE: Annotated[str, AfterValidator(_validate_file_type)] = "FILE"
+DIR: Annotated[str, AfterValidator(_validate_file_type)] = "DIR"
+PARQUET: Annotated[str, AfterValidator(_validate_file_type)] = "PARQUET"
+CSV: Annotated[str, AfterValidator(_validate_file_type)] = "CSV"
+JSON: Annotated[str, AfterValidator(_validate_file_type)] = "JSON"
+PDF: Annotated[str, AfterValidator(_validate_file_type)] = "PDF"
+DBC: Annotated[str, AfterValidator(_validate_file_type)] = "DBC"
+DBF: Annotated[str, AfterValidator(_validate_file_type)] = "DBF"
+ZIP: Annotated[str, AfterValidator(_validate_file_type)] = "ZIP"
+
+SINAN: Annotated[str, AfterValidator(_validate_dataset_name)] = "SINAN"
+SINASC: Annotated[str, AfterValidator(_validate_dataset_name)] = "SINASC"
+SIM: Annotated[str, AfterValidator(_validate_dataset_name)] = "SIM"
+SIH: Annotated[str, AfterValidator(_validate_dataset_name)] = "SIH"
+SIA: Annotated[str, AfterValidator(_validate_dataset_name)] = "SIA"
+PNI: Annotated[str, AfterValidator(_validate_dataset_name)] = "PNI"
+IBGE: Annotated[str, AfterValidator(_validate_dataset_name)] = "IBGE"
+CNES: Annotated[str, AfterValidator(_validate_dataset_name)] = "CNES"
+CIHA: Annotated[str, AfterValidator(_validate_dataset_name)] = "CIHA"
+
+Origin: TypeAlias = Annotated[str, AfterValidator(_validate_origin)]
+ColumnType: TypeAlias = Annotated[str, AfterValidator(_validate_column_type)]
+FileType: TypeAlias = Annotated[str, AfterValidator(_validate_file_type)]
+DatasetName: TypeAlias = Annotated[str, AfterValidator(_validate_dataset_name)]
+State: TypeAlias = Annotated[str, AfterValidator(_validate_state)]
diff --git a/pysus/tests/api/dadosgov/test_client.py b/pysus/tests/api/dadosgov/test_client.py
new file mode 100644
index 00000000..9c728040
--- /dev/null
+++ b/pysus/tests/api/dadosgov/test_client.py
@@ -0,0 +1,545 @@
+"""Tests for pysus.api.dadosgov.client."""
+
+from datetime import datetime
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import httpx
+import pytest
+from pysus import __version__
+from pysus.api.dadosgov.client import (
+ ConjuntoDados,
+ DadosGov,
+ Recurso,
+ to_bool,
+ to_datetime,
+)
+
+
+class TestToDatetime:
+ def test_valid_datetime_string(self):
+ result = to_datetime("01/02/2024 10:30:00")
+ assert isinstance(result, datetime)
+ assert result.year == 2024
+ assert result.month == 2
+ assert result.day == 1
+ assert result.hour == 10
+ assert result.minute == 30
+ assert result.second == 0
+
+ def test_valid_date_string(self):
+ result = to_datetime("15/03/2024")
+ assert isinstance(result, datetime)
+ assert result.year == 2024
+ assert result.month == 3
+ assert result.day == 15
+
+ def test_none_value(self):
+ assert to_datetime(None) is None
+
+ def test_empty_string(self):
+ assert to_datetime("") is None
+
+ def test_indisponivel_value(self):
+ assert to_datetime("IndisponÃvel") is None
+
+ def test_indisponivel_with_accent(self):
+ assert to_datetime("Dado IndisponÃvel") is None
+
+ def test_invalid_string(self):
+ assert to_datetime("not-a-date") is None
+
+ def test_non_string_non_none(self):
+ assert to_datetime(12345) is None
+
+
+class TestToBool:
+ def test_true_bool(self):
+ assert to_bool(True) is True
+
+ def test_false_bool(self):
+ assert to_bool(False) is False
+
+ def test_sim_string(self):
+ assert to_bool("sim") is True
+
+ def test_nao_string(self):
+ assert to_bool("não") is False
+
+ def test_true_string(self):
+ assert to_bool("true") is True
+
+ def test_false_string(self):
+ assert to_bool("false") is False
+
+ def test_1_string(self):
+ assert to_bool("1") is True
+
+ def test_0_string(self):
+ assert to_bool("0") is False
+
+ def test_Sim_capitalized(self):
+ assert to_bool("Sim") is True
+
+ def test_TRUE_uppercase(self):
+ assert to_bool("TRUE") is True
+
+ def test_arbitrary_string(self):
+ assert to_bool("qualquer") is False
+
+ def test_integer_one(self):
+ assert to_bool(1) is True
+
+ def test_integer_zero(self):
+ assert to_bool(0) is False
+
+
+class TestRecurso:
+ def test_fields_from_aliases(self):
+ r = Recurso(
+ id="r1",
+ titulo="Arquivo CSV",
+ link="https://example.com/file.csv",
+ tamanho=1024,
+ dataUltimaAtualizacaoArquivo="10/05/2024",
+ nomeArquivo="dados.csv",
+ )
+ assert r.id == "r1"
+ assert r.title == "Arquivo CSV"
+ assert r.url == "https://example.com/file.csv"
+ assert r.api_size == 1024
+ assert isinstance(r.last_modified, datetime)
+ assert r.file_name == "dados.csv"
+
+ def test_fields_from_names(self):
+ r = Recurso(
+ id="r2",
+ title="CSV File",
+ url="https://example.com/data.zip",
+ api_size=2048,
+ file_name="data.zip",
+ )
+ assert r.id == "r2"
+ assert r.title == "CSV File"
+ assert r.url == "https://example.com/data.zip"
+ assert r.api_size == 2048
+ assert r.file_name == "data.zip"
+ assert r.last_modified is None
+
+ def test_last_modified_none_when_indisponivel(self):
+ r = Recurso(
+ id="r3",
+ title="No Date",
+ url="https://example.com/file",
+ api_size=0,
+ dataUltimaAtualizacaoArquivo="IndisponÃvel",
+ )
+ assert r.last_modified is None
+
+ @pytest.mark.asyncio
+ async def test_get_size_head_success(self):
+ r = Recurso(
+ id="r4",
+ title="Test",
+ url="https://example.com/file.csv",
+ api_size=0,
+ )
+ mock_response = MagicMock()
+ mock_response.status_code = 200
+ mock_response.headers = {"Content-Length": "5000"}
+
+ mock_client = AsyncMock()
+ mock_client.head.return_value = mock_response
+ mock_client.__aenter__.return_value = mock_client
+
+ with patch("httpx.AsyncClient", return_value=mock_client):
+ size = await r.get_size()
+
+ assert size == 5000
+ mock_client.head.assert_called_once_with("https://example.com/file.csv")
+
+ @pytest.mark.asyncio
+ async def test_get_size_head_405_fallback_to_get(self):
+ r = Recurso(
+ id="r5",
+ title="Test",
+ url="https://example.com/file.csv",
+ api_size=0,
+ )
+ head_response = MagicMock()
+ head_response.status_code = 405
+
+ get_response = MagicMock()
+ get_response.headers = {"Content-Length": "3000"}
+
+ mock_client = AsyncMock()
+ mock_client.head.return_value = head_response
+ mock_client.get.return_value = get_response
+ mock_client.__aenter__.return_value = mock_client
+
+ with patch("httpx.AsyncClient", return_value=mock_client):
+ size = await r.get_size()
+
+ assert size == 3000
+ mock_client.head.assert_called_once()
+ mock_client.get.assert_called_once_with(
+ "https://example.com/file.csv", headers={"Range": "bytes=0-0"}
+ )
+
+ @pytest.mark.asyncio
+ async def test_get_size_no_content_length(self):
+ r = Recurso(
+ id="r6",
+ title="Test",
+ url="https://example.com/file.csv",
+ api_size=0,
+ )
+ mock_response = MagicMock()
+ mock_response.status_code = 200
+ mock_response.headers = {}
+
+ mock_client = AsyncMock()
+ mock_client.head.return_value = mock_response
+ mock_client.__aenter__.return_value = mock_client
+
+ with patch("httpx.AsyncClient", return_value=mock_client):
+ size = await r.get_size()
+
+ assert size == 0
+
+
+class TestConjuntoDados:
+ def test_fields_from_aliases(self):
+ c = ConjuntoDados(
+ id="c1",
+ titulo="Dataset Teste",
+ nome="dataset-teste",
+ recursos=[
+ Recurso(
+ id="r1",
+ titulo="Resource",
+ link="http://example.com",
+ tamanho=100,
+ )
+ ],
+ )
+ assert c.id == "c1"
+ assert c.title == "Dataset Teste"
+ assert c.slug == "dataset-teste"
+ assert len(c.resources) == 1
+ assert c.resources[0].id == "r1"
+
+ def test_fields_from_names(self):
+ c = ConjuntoDados(id="c2", title="Dataset", slug="dataset")
+ assert c.id == "c2"
+ assert c.title == "Dataset"
+ assert c.slug == "dataset"
+ assert c.resources == []
+
+
+class TestDadosGov:
+ def test_name(self):
+ client = DadosGov()
+ assert client.name == "DadosGov"
+
+ def test_long_name(self):
+ client = DadosGov()
+ assert client.long_name == "Portal Brasileiro de Dados Abertos"
+
+ def test_description(self):
+ client = DadosGov()
+ expected = "Interface de acesso ao API do Portal de Dados Abertos"
+ assert client.description == expected
+
+ @pytest.mark.asyncio
+ async def test_connect_with_token_creates_client(self):
+ client = DadosGov()
+ assert client._client is None
+
+ with patch("httpx.AsyncClient") as mock_async_client:
+ await client.connect(token="test-token-123")
+
+ assert client._token == "test-token-123"
+ assert client._client is not None
+ mock_async_client.assert_called_once_with(
+ base_url="https://dados.gov.br/dados/api",
+ headers={
+ "Accept": "application/json",
+ "User-Agent": f"PySUS/{__version__}",
+ "chave-api-dados-abertos": "test-token-123",
+ },
+ timeout=30.0,
+ follow_redirects=True,
+ )
+
+ @pytest.mark.asyncio
+ async def test_connect_without_token_raises_value_error(self):
+ client = DadosGov()
+ with pytest.raises(
+ ValueError, match="A token is required to connect to DadosGov"
+ ):
+ await client.connect(token=None)
+
+ @pytest.mark.asyncio
+ async def test_connect_with_existing_token(self):
+ client = DadosGov()
+ client._token = "existing-token"
+
+ with patch("httpx.AsyncClient") as mock_async_client:
+ await client.connect()
+
+ assert client._token == "existing-token"
+ mock_async_client.assert_called_once()
+
+ @pytest.mark.asyncio
+ async def test_connect_replaces_existing_client(self):
+ client = DadosGov()
+ old_close = AsyncMock()
+ old_client = AsyncMock()
+ old_client.aclose = old_close
+ client._client = old_client
+
+ with patch("httpx.AsyncClient"):
+ await client.connect(token="new-token")
+
+ old_close.assert_awaited_once()
+ assert client._token == "new-token"
+
+ @pytest.mark.asyncio
+ async def test_login_delegates_to_connect(self):
+ client = DadosGov()
+ with patch(
+ "pysus.api.dadosgov.client.DadosGov.connect"
+ ) as mock_connect:
+ mock_connect.return_value = None
+ await client.login(token="login-token")
+ mock_connect.assert_awaited_once_with(token="login-token")
+
+ @pytest.mark.asyncio
+ async def test_login_with_kwargs(self):
+ client = DadosGov()
+ with patch(
+ "pysus.api.dadosgov.client.DadosGov.connect"
+ ) as mock_connect:
+ mock_connect.return_value = None
+ await client.login(token="t", extra_param="ignored")
+ mock_connect.assert_awaited_once_with(token="t")
+
+ @pytest.mark.asyncio
+ async def test_close_with_active_client(self):
+ client = DadosGov()
+ mock_http = AsyncMock()
+ client._client = mock_http
+
+ await client.close()
+
+ mock_http.aclose.assert_awaited_once()
+ assert client._client is None
+
+ @pytest.mark.asyncio
+ async def test_close_without_client(self):
+ client = DadosGov()
+ client._client = None
+ await client.close()
+ assert client._client is None
+
+ @pytest.mark.asyncio
+ async def test_datasets_returns_list(self):
+ client = DadosGov()
+ result = await client.datasets()
+ assert isinstance(result, list)
+ assert len(result) > 0
+ from pysus.api.dadosgov.databases import AVAILABLE_DATABASES
+
+ assert len(result) == len(AVAILABLE_DATABASES)
+ for ds in result:
+ assert ds.client is client
+
+ @pytest.mark.asyncio
+ async def test_list_datasets_connection_error(self):
+ client = DadosGov()
+ client._client = None
+ with pytest.raises(ConnectionError, match="Client not connected"):
+ await client.list_datasets()
+
+ @pytest.mark.asyncio
+ async def test_list_datasets_success(self):
+ client = DadosGov()
+ mock_http = AsyncMock(spec=httpx.AsyncClient)
+ mock_response = MagicMock()
+ mock_response.json.return_value = [
+ {
+ "id": "ds1",
+ "titulo": "Dataset 1",
+ "nome": "ds-1",
+ "recursos": [],
+ },
+ {
+ "id": "ds2",
+ "titulo": "Dataset 2",
+ "nome": "ds-2",
+ "recursos": [
+ {
+ "id": "r1",
+ "titulo": "Resource",
+ "link": "http://example.com",
+ "tamanho": 50,
+ }
+ ],
+ },
+ ]
+ mock_http.get.return_value = mock_response
+ client._client = mock_http
+
+ results = await client.list_datasets(
+ pagina=2,
+ nome_conjunto="teste",
+ dados_abertos=True,
+ id_organizacao="org1",
+ )
+
+ assert len(results) == 2
+ assert results[0].id == "ds1"
+ assert results[1].id == "ds2"
+ assert len(results[1].resources) == 1
+ mock_http.get.assert_awaited_once_with(
+ "publico/conjuntos-dados",
+ params={
+ "pagina": 2,
+ "nomeConjuntoDados": "teste",
+ "dadosAbertos": True,
+ "isPrivado": False,
+ "idOrganizacao": "org1",
+ },
+ )
+
+ @pytest.mark.asyncio
+ async def test_list_datasets_minimal_params(self):
+ client = DadosGov()
+ mock_http = AsyncMock(spec=httpx.AsyncClient)
+ mock_response = MagicMock()
+ mock_response.json.return_value = []
+ mock_http.get.return_value = mock_response
+ client._client = mock_http
+
+ results = await client.list_datasets()
+
+ assert results == []
+ mock_http.get.assert_awaited_once_with(
+ "publico/conjuntos-dados",
+ params={"pagina": 1, "isPrivado": False},
+ )
+
+ @pytest.mark.asyncio
+ async def test_get_dataset_connection_error(self):
+ client = DadosGov()
+ client._client = None
+ with pytest.raises(ConnectionError, match="Client not connected"):
+ await client.get_dataset("some-id")
+
+ @pytest.mark.asyncio
+ async def test_get_dataset_success(self):
+ client = DadosGov()
+ mock_http = AsyncMock(spec=httpx.AsyncClient)
+ mock_response = MagicMock()
+ mock_response.json.return_value = {
+ "id": "ds1",
+ "titulo": "Single Dataset",
+ "nome": "single-ds",
+ "recursos": [],
+ }
+ mock_http.get.return_value = mock_response
+ client._client = mock_http
+
+ result = await client.get_dataset("ds1")
+
+ assert result.id == "ds1"
+ assert result.title == "Single Dataset"
+ mock_http.get.assert_awaited_once_with("publico/conjuntos-dados/ds1")
+
+ @pytest.mark.asyncio
+ async def test_download_file_connection_error(self):
+ client = DadosGov()
+ client._client = None
+ mock_file = MagicMock()
+ mock_file.path = "http://example.com/file.csv"
+ with pytest.raises(ConnectionError, match="Client not connected"):
+ await client._download_file(mock_file, Path("/tmp/out.csv"))
+
+ @pytest.mark.asyncio
+ async def test_download_file_success(self, tmp_path):
+ client = DadosGov()
+ mock_http = AsyncMock(spec=httpx.AsyncClient)
+
+ async def _aiter_bytes():
+ yield b"12345"
+ yield b"67890"
+
+ mock_response = MagicMock()
+ mock_response.headers = {"Content-Length": "10"}
+ mock_response.aiter_bytes = _aiter_bytes
+
+ cm = AsyncMock()
+ cm.__aenter__.return_value = mock_response
+ cm.__aexit__.return_value = None
+
+ mock_http.stream.return_value = cm
+ client._client = mock_http
+
+ mock_file = MagicMock()
+ mock_file.path = "https:/example.com/file.csv"
+
+ output = tmp_path / "test_download.csv"
+ callback = MagicMock()
+
+ try:
+ result = await client._download_file(
+ mock_file, output, callback=callback
+ )
+
+ assert result == output
+ mock_http.stream.assert_called_once_with(
+ "GET", "https://example.com/file.csv"
+ )
+ assert output.read_bytes() == b"1234567890"
+ assert callback.call_count == 2
+ callback.assert_any_call(5, 10)
+ callback.assert_any_call(10, 10)
+ finally:
+ if output.exists():
+ output.unlink()
+
+ @pytest.mark.asyncio
+ async def test_download_file_no_callback(self, tmp_path):
+ client = DadosGov()
+ mock_http = AsyncMock(spec=httpx.AsyncClient)
+
+ async def _aiter_bytes():
+ yield b"data"
+
+ mock_response = MagicMock()
+ mock_response.headers = {"Content-Length": "4"}
+ mock_response.aiter_bytes = _aiter_bytes
+
+ cm = AsyncMock()
+ cm.__aenter__.return_value = mock_response
+ cm.__aexit__.return_value = None
+
+ mock_http.stream.return_value = cm
+ client._client = mock_http
+
+ mock_file = MagicMock()
+ mock_file.path = "http:/example.com/file.csv"
+
+ output = tmp_path / "test_download_nocb.csv"
+
+ try:
+ result = await client._download_file(mock_file, output)
+
+ assert result == output
+ mock_http.stream.assert_called_once_with(
+ "GET", "http://example.com/file.csv"
+ )
+ finally:
+ if output.exists():
+ output.unlink()
diff --git a/pysus/tests/api/dadosgov/test_databases.py b/pysus/tests/api/dadosgov/test_databases.py
new file mode 100644
index 00000000..698b723a
--- /dev/null
+++ b/pysus/tests/api/dadosgov/test_databases.py
@@ -0,0 +1,411 @@
+"""Tests for pysus.api.dadosgov.databases."""
+
+from typing import Any
+from unittest.mock import patch
+
+from pysus.api.dadosgov.client import DadosGov
+from pysus.api.dadosgov.databases import (
+ AVAILABLE_DATABASES,
+ CNES,
+ COVID19,
+ MONTHS,
+ PNI,
+ SIA,
+ SIM,
+ SINAN,
+ SINASC,
+ _parse_year,
+ _skip,
+)
+
+# ---------------------------------------------------------------------------
+# Helper
+# ---------------------------------------------------------------------------
+
+
+def db_instance(db_class):
+ return db_class(client=DadosGov())
+
+
+# ---------------------------------------------------------------------------
+# MONTHS
+# ---------------------------------------------------------------------------
+
+
+class TestMONTHS:
+ def test_all_months_present(self):
+ assert MONTHS == {
+ "jan": 1,
+ "fev": 2,
+ "mar": 3,
+ "abr": 4,
+ "mai": 5,
+ "jun": 6,
+ "jul": 7,
+ "ago": 8,
+ "set": 9,
+ "out": 10,
+ "nov": 11,
+ "dez": 12,
+ }
+
+
+# ---------------------------------------------------------------------------
+# _parse_year
+# ---------------------------------------------------------------------------
+
+
+class TestParseYear:
+ def test_valid_year(self):
+ assert _parse_year("2024") == 2024
+
+ def test_year_below_range(self):
+ assert _parse_year("1969") is None
+
+ def test_year_above_range(self):
+ assert _parse_year("2101") is None
+
+ def test_non_numeric(self):
+ assert _parse_year("abcd") is None
+
+ def test_boundary_low(self):
+ assert _parse_year("1970") == 1970
+
+ def test_boundary_high(self):
+ assert _parse_year("2100") == 2100
+
+
+# ---------------------------------------------------------------------------
+# _skip
+# ---------------------------------------------------------------------------
+
+
+class TestSkip:
+ def test_get_prefix(self):
+ assert _skip("get_dados.csv") is True
+ assert _skip("get_.pdf") is True
+
+ def test_pdf_suffix(self):
+ assert _skip("documento.pdf") is True
+
+ def test_normal_file(self):
+ assert _skip("dados.csv") is False
+
+ def test_empty_string(self):
+ assert _skip("") is False
+
+
+# ---------------------------------------------------------------------------
+# Base dataset test helpers
+# ---------------------------------------------------------------------------
+
+
+class BaseDatasetMixin:
+ db_class: Any = None
+ expected_name = ""
+ expected_long_name = ""
+
+ def test_name(self):
+ ds = db_instance(self.db_class)
+ assert ds.name == self.expected_name
+
+ def test_long_name(self):
+ ds = db_instance(self.db_class)
+ assert ds.long_name == self.expected_long_name
+
+ def test_description_is_string(self):
+ ds = db_instance(self.db_class)
+ assert isinstance(ds.description, str)
+ assert len(ds.description) > 0
+
+ def test_ids_are_strings(self):
+ ds = db_instance(self.db_class)
+ assert isinstance(ds.ids, list)
+ for i in ds.ids:
+ assert isinstance(i, str)
+ assert len(i) > 0
+
+ def test_formatter_skip_pdf(self):
+ ds = db_instance(self.db_class)
+ assert ds.formatter("document.pdf") == {
+ "state": None,
+ "year": None,
+ "month": None,
+ }
+
+ def test_formatter_skip_get_prefix(self):
+ ds = db_instance(self.db_class)
+ assert ds.formatter("get_dados.csv") == {
+ "state": None,
+ "year": None,
+ "month": None,
+ }
+
+ def test_formatter_unrecognised(self):
+ ds = db_instance(self.db_class)
+ assert ds.formatter("random_file.xyz") == {
+ "state": None,
+ "year": None,
+ "month": None,
+ }
+
+ def test_formatter_exception_handler(self):
+ ds = db_instance(self.db_class)
+ with patch(
+ "pysus.api.dadosgov.databases._skip", side_effect=ValueError("test")
+ ):
+ assert ds.formatter("anything.csv") == {
+ "state": None,
+ "year": None,
+ "month": None,
+ }
+
+
+# ---------------------------------------------------------------------------
+# CNES
+# ---------------------------------------------------------------------------
+
+
+class TestCNES(BaseDatasetMixin):
+ db_class = CNES
+ expected_name = "CNES"
+ expected_long_name = "Cadastro Nacional de Estabelecimentos de Saúde"
+
+ def test_formatter_valid_pattern(self):
+ ds = db_instance(CNES)
+ result = ds.formatter("arquivo_01-2024.csv")
+ assert result == {"state": None, "year": 2024, "month": 1}
+
+ def test_formatter_month_and_year(self):
+ ds = db_instance(CNES)
+ result = ds.formatter("dados_12-2023.csv")
+ assert result == {"state": None, "year": 2023, "month": 12}
+
+
+# ---------------------------------------------------------------------------
+# PNI
+# ---------------------------------------------------------------------------
+
+
+class TestPNI(BaseDatasetMixin):
+ db_class = PNI
+ expected_name = "PNI"
+ expected_long_name = "Programa Nacional de Imunizações"
+
+ def test_formatter_valid_pattern(self):
+ ds = db_instance(PNI)
+ result = ds.formatter("vacinacao_jan_2024_csv.zip")
+ assert result == {"state": None, "year": 2024, "month": 1}
+
+ def test_formatter_different_month(self):
+ ds = db_instance(PNI)
+ result = ds.formatter("vacinacao_dez_2023_csv.zip")
+ assert result == {"state": None, "year": 2023, "month": 12}
+
+ def test_formatter_invalid_month(self):
+ ds = db_instance(PNI)
+ result = ds.formatter("vacinacao_xxx_2024_csv.zip")
+ assert result == {"state": None, "year": 2024, "month": None}
+
+ def test_formatter_uppercase_filename(self):
+ ds = db_instance(PNI)
+ result = ds.formatter("VACINACAO_JAN_2024_CSV.ZIP")
+ assert result == {"state": None, "year": 2024, "month": 1}
+
+ def test_group_aliases(self):
+ ds = db_instance(PNI)
+ p = "doses-aplicadas-pelo-programa-de-nacional-de-imunizacoes-pni"
+ assert ds.group_aliases[p] == "DPNI"
+ assert ds.group_aliases[f"{p}-2020"] == "DPNI"
+ assert ds.group_aliases[f"dataset-{p}_2022"] == "DPNI"
+
+
+# ---------------------------------------------------------------------------
+# SIA
+# ---------------------------------------------------------------------------
+
+
+class TestSIA(BaseDatasetMixin):
+ db_class = SIA
+ expected_name = "SIA"
+ expected_long_name = "Sistema de Informações Ambulatoriais"
+
+ def test_formatter_year_pattern(self):
+ ds = db_instance(SIA)
+ result = ds.formatter("arquivo_2024_.csv")
+ assert result == {"state": None, "year": 2024, "month": None}
+
+ def test_formatter_month_year_pattern(self):
+ ds = db_instance(SIA)
+ result = ds.formatter("arquivo_jun-out_2024_.csv")
+ assert result == {"state": None, "year": 2024, "month": None}
+
+ def test_formatter_month_year_upper(self):
+ ds = db_instance(SIA)
+ result = ds.formatter("ARQUIVO_JUN-OUT_2024_.CSV")
+ assert result == {"state": None, "year": 2024, "month": None}
+
+
+# ---------------------------------------------------------------------------
+# SINAN
+# ---------------------------------------------------------------------------
+
+
+class TestSINAN(BaseDatasetMixin):
+ db_class = SINAN
+ expected_name = "SINAN"
+ expected_long_name = "Sistema de Informação de Agravos de Notificação"
+
+ def test_formatter_dengue_pattern(self):
+ ds = db_instance(SINAN)
+ result = ds.formatter("DENGBR24.CSV.ZIP")
+ assert result == {"state": "BR", "year": 2024, "month": None}
+
+ def test_formatter_tuberculose_pattern(self):
+ ds = db_instance(SINAN)
+ result = ds.formatter("TUBEBR99.CSV.ZIP")
+ assert result == {"state": "BR", "year": 1999, "month": None}
+
+ def test_formatter_monkeypox_pattern(self):
+ ds = db_instance(SINAN)
+ result = ds.formatter("MPX_2023_OPENDATASUS.CSV.ZIP")
+ assert result == {"state": None, "year": 2023, "month": None}
+
+ def test_formatter_lowercase_gets_uppercased(self):
+ ds = db_instance(SINAN)
+ result = ds.formatter("dengbr24.csv.zip")
+ assert result == {"state": "BR", "year": 2024, "month": None}
+
+ def test_group_aliases(self):
+ ds = db_instance(SINAN)
+ aliases = ds.group_aliases
+ assert aliases["arboviroses-dengue"] == "DENG"
+ assert aliases["arboviroses-febre-de-chikungunya"] == "CHIK"
+ assert aliases["arboviroses-zika-virus"] == "ZIKA"
+ assert aliases["hanseniase"] == "HANS"
+ assert aliases["dados-tuberculose"] == "TUBE"
+ assert aliases["sifilis"] == "SIFA"
+
+
+# ---------------------------------------------------------------------------
+# SIM
+# ---------------------------------------------------------------------------
+
+
+class TestSIM(BaseDatasetMixin):
+ db_class = SIM
+ expected_name = "SIM"
+ expected_long_name = "Sistema de Informação sobre Mortalidade"
+
+ def test_formatter_mortalidade_geral(self):
+ ds = db_instance(SIM)
+ result = ds.formatter("Mortalidade_Geral_2024_csv.zip")
+ assert result == {"state": None, "year": 2024, "month": None}
+
+ def test_formatter_do_pattern(self):
+ ds = db_instance(SIM)
+ result = ds.formatter("DO24OPEN")
+ assert result == {"state": None, "year": 2024, "month": None}
+
+ def test_formatter_do_century_handling(self):
+ ds = db_instance(SIM)
+ result = ds.formatter("DO99OPEN")
+ assert result == {"state": None, "year": 1999, "month": None}
+
+ def test_group_aliases(self):
+ ds = db_instance(SIM)
+ assert ds.group_aliases["sim-1979-2019"] == "DO"
+
+
+# ---------------------------------------------------------------------------
+# SINASC
+# ---------------------------------------------------------------------------
+
+
+class TestSINASC(BaseDatasetMixin):
+ db_class = SINASC
+ expected_name = "SINASC"
+ expected_long_name = "Sistema de Informações sobre Nascidos Vivos"
+
+ def test_formatter_sinasc_pattern(self):
+ ds = db_instance(SINASC)
+ result = ds.formatter("SINASC_2024_csv.zip")
+ assert result == {"state": None, "year": 2024, "month": None}
+
+ def test_formatter_dnbr_pattern(self):
+ ds = db_instance(SINASC)
+ result = ds.formatter("DNBR2024_csv.zip")
+ assert result == {"state": "BR", "year": 2024, "month": None}
+
+ def test_group_aliases(self):
+ ds = db_instance(SINASC)
+ key = "sistema-de-informacao-sobre-nascidos-vivos-sinasc-1996-a-20201"
+ assert ds.group_aliases[key] == "DN"
+
+
+# ---------------------------------------------------------------------------
+# COVID19
+# ---------------------------------------------------------------------------
+
+
+class TestCOVID19(BaseDatasetMixin):
+ db_class = COVID19
+ expected_name = "COVID19"
+ expected_long_name = "Casos Confirmados de COVID-19"
+
+ def test_formatter_csv_file(self):
+ ds = db_instance(COVID19)
+ assert ds.formatter("casos_covid.csv") == {
+ "state": None,
+ "year": None,
+ "month": None,
+ }
+
+ def test_formatter_xlsx_file(self):
+ ds = db_instance(COVID19)
+ assert ds.formatter("casos_covid.xlsx") == {
+ "state": None,
+ "year": None,
+ "month": None,
+ }
+
+ def test_formatter_other_file(self):
+ ds = db_instance(COVID19)
+ assert ds.formatter("casos_covid.json") == {
+ "state": None,
+ "year": None,
+ "month": None,
+ }
+
+ def test_formatter_uppercase_xlsx(self):
+ ds = db_instance(COVID19)
+ assert ds.formatter("casos_covid.XLSX") == {
+ "state": None,
+ "year": None,
+ "month": None,
+ }
+
+ def test_formatter_xlsx_prefixed_with_get(self):
+ ds = db_instance(COVID19)
+ assert ds.formatter("get_casos.xlsx") == {
+ "state": None,
+ "year": None,
+ "month": None,
+ }
+
+
+# ---------------------------------------------------------------------------
+# AVAILABLE_DATABASES
+# ---------------------------------------------------------------------------
+
+
+class TestAVAILABLEDATABASES:
+ def test_contains_all_databases(self):
+ expected = {CNES, PNI, SIA, SINAN, SIM, SINASC, COVID19}
+ assert set(AVAILABLE_DATABASES) == expected
+
+ def test_all_can_be_instantiated(self):
+ for db_class in AVAILABLE_DATABASES:
+ ds = db_class(client=DadosGov())
+ assert ds.name is not None
+ assert ds.long_name is not None
diff --git a/pysus/tests/api/dadosgov/test_models.py b/pysus/tests/api/dadosgov/test_models.py
new file mode 100644
index 00000000..dfbcc005
--- /dev/null
+++ b/pysus/tests/api/dadosgov/test_models.py
@@ -0,0 +1,772 @@
+"""Tests for pysus.api.dadosgov.models."""
+
+import asyncio
+from datetime import datetime
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from pysus import CACHEPATH
+from pysus.api.dadosgov.client import ConjuntoDados, DadosGov, Recurso
+from pysus.api.dadosgov.models import Dataset, File, Group, _dedup_entries
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def make_recurso(**overrides) -> Recurso:
+ kwargs = {
+ "id": "r1",
+ "titulo": "Resource",
+ "link": "http://example.com/file.csv",
+ "tamanho": 100,
+ "dataUltimaAtualizacaoArquivo": "01/01/2024",
+ }
+ kwargs.update(overrides)
+ return Recurso(**kwargs) # type: ignore[arg-type]
+
+
+def make_conjunto(resources=None) -> ConjuntoDados:
+ if resources is None:
+ resources = [make_recurso()]
+ return ConjuntoDados(
+ id="c1",
+ titulo="Conjunto Teste",
+ nome="conjunto-teste",
+ recursos=resources,
+ )
+
+
+class MockDataset(Dataset):
+ group_aliases: dict = {}
+
+ @property
+ def name(self) -> str:
+ return "TestDS"
+
+ @property
+ def long_name(self) -> str:
+ return "Test Dataset"
+
+ @property
+ def description(self) -> str:
+ return "A test dataset"
+
+ async def _fetch_content(self):
+ return await super()._fetch_content()
+
+ def formatter(self, filename: str) -> dict:
+ return {}
+
+
+# ---------------------------------------------------------------------------
+# _dedup_entries
+# ---------------------------------------------------------------------------
+
+
+class TestDedupEntries:
+ def test_prefers_csv_over_json_xml(self):
+ entries = [
+ ("data.csv", "csv_obj", {"fmt": "csv"}),
+ ("data.json", "json_obj", {"fmt": "json"}),
+ ("data.xml", "xml_obj", {"fmt": "xml"}),
+ ]
+ result = _dedup_entries(entries)
+ assert len(result) == 1
+ assert result[0][0] == "data.csv"
+
+ def test_multiple_stems(self):
+ entries = [
+ ("a.csv", "a_csv", {}),
+ ("a.json", "a_json", {}),
+ ("b.csv", "b_csv", {}),
+ ]
+ result = _dedup_entries(entries)
+ assert len(result) == 2
+ filenames = {r[0] for r in result}
+ assert filenames == {"a.csv", "b.csv"}
+
+ def test_no_format_match_returns_all(self):
+ entries = [("readme.txt", "t1", {}), ("notes.md", "t2", {})]
+ result = _dedup_entries(entries)
+ assert len(result) == 2
+
+ def test_single_entry(self):
+ entries = [("data.csv", "obj", {})]
+ result = _dedup_entries(entries)
+ assert result == entries
+
+ def test_zip_format_detection(self):
+ entries = [("data.csv.zip", "cz", {}), ("data.json.zip", "jz", {})]
+ result = _dedup_entries(entries)
+ assert len(result) == 1
+ assert result[0][0] == "data.csv.zip"
+
+ def test_only_json_and_xml_no_csv(self):
+ entries = [("data.json", "j", {}), ("data.xml", "x", {})]
+ result = _dedup_entries(entries)
+ assert len(result) == 2
+
+ def test_empty_list(self):
+ assert _dedup_entries([]) == []
+
+
+# ---------------------------------------------------------------------------
+# File
+# ---------------------------------------------------------------------------
+
+
+class TestFileInit:
+ def test_init_with_metadata(self):
+ recurso = make_recurso()
+ ds = MockDataset(client=DadosGov())
+ conj = make_conjunto([recurso])
+ group = Group(record=conj, dataset=ds)
+ f = File(
+ record=recurso,
+ dataset=ds,
+ group=group,
+ path=recurso.url,
+ _metadata={"year": 2024, "month": 1, "state": "SP"},
+ )
+ assert f.record is recurso
+ assert f.dataset is ds
+ assert f.group is group
+ assert f.year == 2024
+ assert f.month == 1
+ assert f.state == "SP"
+
+ def test_init_without_metadata(self):
+ recurso = make_recurso()
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path=recurso.url)
+ assert f._metadata == {}
+
+ def test_repr_returns_basename(self):
+ recurso = make_recurso()
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path="http://example.com/data.csv")
+ assert repr(f) == "data.csv"
+
+
+class TestFileModelPostInit:
+ def test_with_api_size_and_last_modified_no_task(self):
+ recurso = make_recurso(tamanho=100)
+ ds = MockDataset(client=DadosGov())
+
+ with patch.object(asyncio, "get_running_loop") as mock_loop:
+ File(record=recurso, dataset=ds, path=recurso.url)
+ mock_loop.create_task.assert_not_called()
+
+ def test_without_api_size_creates_task(self):
+ recurso = make_recurso(tamanho=0)
+ ds = MockDataset(client=DadosGov())
+ mock_loop = MagicMock()
+
+ with patch.object(asyncio, "get_running_loop", return_value=mock_loop):
+ File(record=recurso, dataset=ds, path=recurso.url)
+ mock_loop.create_task.assert_called_once()
+
+ def test_without_last_modified_creates_task(self):
+ recurso = make_recurso(dataUltimaAtualizacaoArquivo="IndisponÃvel")
+ ds = MockDataset(client=DadosGov())
+ mock_loop = MagicMock()
+
+ with patch.object(asyncio, "get_running_loop", return_value=mock_loop):
+ File(record=recurso, dataset=ds, path=recurso.url)
+ mock_loop.create_task.assert_called_once()
+
+ def test_no_event_loop_runtime_error_swallowed(self):
+ recurso = make_recurso(tamanho=0)
+ ds = MockDataset(client=DadosGov())
+
+ def _raise():
+ raise RuntimeError("No running event loop")
+
+ with patch.object(asyncio, "get_running_loop", side_effect=_raise):
+ File(record=recurso, dataset=ds, path=recurso.url)
+
+
+class TestFileProperties:
+ def test_extension_from_file_name(self):
+ recurso = make_recurso(nomeArquivo="dados.csv")
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path=recurso.url)
+ assert f.extension == ".csv"
+
+ def test_extension_from_url_when_no_file_name(self):
+ recurso = make_recurso(
+ nomeArquivo=None, link="http://example.com/arquivo.zip"
+ )
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path=recurso.url)
+ assert f.extension == ".zip"
+
+ def test_extension_from_url_with_query_string(self):
+ recurso = make_recurso(
+ nomeArquivo=None, link="http://example.com/arquivo.csv?download=1"
+ )
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path=recurso.url)
+ assert f.extension == ".csv"
+
+ def test_size_from_api_size(self):
+ recurso = make_recurso(tamanho=500)
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path=recurso.url)
+ assert f.size == 500
+
+ def test_size_zero_when_no_api_size(self):
+ recurso = make_recurso(tamanho=0)
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path=recurso.url)
+ assert f.size == 0
+
+ def test_modify_returns_datetime(self):
+ recurso = make_recurso()
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path=recurso.url)
+ assert isinstance(f.modify, datetime)
+
+ def test_modify_raises_value_error_when_none(self):
+ recurso = make_recurso(dataUltimaAtualizacaoArquivo="IndisponÃvel")
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path=recurso.url)
+ with pytest.raises(ValueError, match="File requires a modify date"):
+ f.modify
+
+ def test_year_month_state_from_metadata(self):
+ recurso = make_recurso()
+ ds = MockDataset(client=DadosGov())
+ f = File(
+ record=recurso,
+ dataset=ds,
+ path=recurso.url,
+ _metadata={"year": 2023, "month": 6, "state": "RJ"},
+ )
+ assert f.year == 2023
+ assert f.month == 6
+ assert f.state == "RJ"
+
+ def test_year_month_state_defaults_to_none(self):
+ recurso = make_recurso()
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path=recurso.url)
+ assert f.year is None
+ assert f.month is None
+ assert f.state is None
+
+
+class TestFileFetchMetadata:
+ @pytest.mark.asyncio
+ async def test_head_success_updates_record(self):
+ recurso = make_recurso(
+ tamanho=0, dataUltimaAtualizacaoArquivo="IndisponÃvel"
+ )
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path="http://example.com/file.csv")
+
+ mock_response = MagicMock()
+ mock_response.status_code = 200
+ mock_response.headers = {
+ "Content-Length": "999",
+ "Last-Modified": "Mon, 15 Jan 2024 10:30:00 GMT",
+ }
+
+ mock_client = AsyncMock()
+ mock_client.head.return_value = mock_response
+ mock_client.__aenter__.return_value = mock_client
+
+ with patch("httpx.AsyncClient", return_value=mock_client):
+ await f.fetch_metadata()
+
+ assert f.record.api_size == 999
+ assert f.record.last_modified is not None
+
+ @pytest.mark.asyncio
+ async def test_head_405_fallback_to_get(self):
+ recurso = make_recurso(tamanho=0)
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path="http://example.com/file.csv")
+
+ head_response = MagicMock()
+ head_response.status_code = 405
+
+ get_response = MagicMock()
+ get_response.headers = {
+ "Content-Length": "777",
+ "Last-Modified": "Tue, 01 Feb 2024 00:00:00 GMT",
+ }
+
+ mock_client = AsyncMock()
+ mock_client.head.return_value = head_response
+ mock_client.get.return_value = get_response
+ mock_client.__aenter__.return_value = mock_client
+
+ with patch("httpx.AsyncClient", return_value=mock_client):
+ await f.fetch_metadata()
+
+ assert f.record.api_size == 777
+
+ called_args, called_kwargs = mock_client.get.call_args
+ actual_url = Path(called_args[0]).as_posix()
+
+ assert actual_url in (
+ "http:/example.com/file.csv",
+ "http://example.com/file.csv",
+ )
+ assert called_kwargs == {"headers": {"Range": "bytes=0-0"}}
+
+ @pytest.mark.asyncio
+ async def test_no_content_length_header(self):
+ recurso = make_recurso(tamanho=0)
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path="http://example.com/file.csv")
+
+ mock_response = MagicMock()
+ mock_response.status_code = 200
+ mock_response.headers = {}
+
+ mock_client = AsyncMock()
+ mock_client.head.return_value = mock_response
+ mock_client.__aenter__.return_value = mock_client
+
+ with patch("httpx.AsyncClient", return_value=mock_client):
+ await f.fetch_metadata()
+
+ assert f.record.api_size == 0
+
+ @pytest.mark.asyncio
+ async def test_exception_is_caught(self):
+ recurso = make_recurso(tamanho=0)
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path="http://example.com/file.csv")
+
+ mock_client = AsyncMock()
+ mock_client.head.side_effect = Exception("Network error")
+ mock_client.__aenter__.return_value = mock_client
+
+ with patch("httpx.AsyncClient", return_value=mock_client):
+ await f.fetch_metadata()
+
+ assert f.record.api_size == 0
+
+ @pytest.mark.asyncio
+ async def test_parse_typeerror_is_caught(self):
+ recurso = make_recurso(tamanho=0)
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path="http://example.com/file.csv")
+
+ mock_response = MagicMock()
+ mock_response.status_code = 200
+ mock_response.headers = {
+ "Content-Length": "100",
+ "Last-Modified": "invalid-date-string",
+ }
+
+ mock_client = AsyncMock()
+ mock_client.head.return_value = mock_response
+ mock_client.__aenter__.return_value = mock_client
+
+ with patch("httpx.AsyncClient", return_value=mock_client):
+ with patch(
+ "pysus.api.dadosgov.models.parse", side_effect=TypeError
+ ):
+ await f.fetch_metadata()
+
+ assert f.record.api_size == 100
+ assert f.record.last_modified == datetime(2024, 1, 1)
+
+ @pytest.mark.asyncio
+ async def test_parse_valueerror_is_caught(self):
+ recurso = make_recurso(tamanho=0)
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path="http://example.com/file.csv")
+
+ mock_response = MagicMock()
+ mock_response.status_code = 200
+ mock_response.headers = {
+ "Content-Length": "100",
+ "Last-Modified": "invalid-date-string",
+ }
+
+ mock_client = AsyncMock()
+ mock_client.head.return_value = mock_response
+ mock_client.__aenter__.return_value = mock_client
+
+ with patch("httpx.AsyncClient", return_value=mock_client):
+ with patch(
+ "pysus.api.dadosgov.models.parse", side_effect=ValueError
+ ):
+ await f.fetch_metadata()
+
+ assert f.record.api_size == 100
+ assert f.record.last_modified == datetime(2024, 1, 1)
+
+
+class TestFileDownload:
+ @pytest.mark.asyncio
+ async def test_download_delegates_to_client(self):
+ recurso = make_recurso()
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, group=None, path=recurso.url)
+
+ output = Path("/tmp/test_out.csv")
+ callback = MagicMock()
+
+ with patch.object(
+ ds.client, "_download_file", new_callable=AsyncMock
+ ) as mock_dl:
+ mock_dl.return_value = output
+ result = await f._download(output=output, callback=callback)
+
+ assert result == output
+ mock_dl.assert_awaited_once_with(f, output, callback=callback)
+
+ @pytest.mark.asyncio
+ async def test_download_default_output(self):
+ recurso = make_recurso()
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path=recurso.url)
+
+ expected = CACHEPATH / f.name
+
+ with patch.object(
+ ds.client, "_download_file", new_callable=AsyncMock
+ ) as mock_dl:
+ mock_dl.return_value = expected
+ result = await f._download()
+
+ assert result == expected
+
+
+class TestFileFetchSize:
+ @pytest.mark.asyncio
+ async def test_head_success_updates_and_returns_size(self):
+ recurso = make_recurso(tamanho=0)
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path="http://example.com/file.csv")
+
+ mock_response = MagicMock()
+ mock_response.status_code = 200
+ mock_response.headers = {"Content-Length": "1234"}
+
+ mock_client = AsyncMock()
+ mock_client.head.return_value = mock_response
+ mock_client.__aenter__.return_value = mock_client
+
+ with patch("httpx.AsyncClient", return_value=mock_client):
+ size = await f.fetch_size()
+
+ assert size == 1234
+ assert f.record.api_size == 1234
+
+ @pytest.mark.asyncio
+ async def test_head_405_fallback_to_get(self):
+ recurso = make_recurso(tamanho=0)
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path="http://example.com/file.csv")
+
+ head_response = MagicMock()
+ head_response.status_code = 405
+
+ get_response = MagicMock()
+ get_response.headers = {"Content-Length": "5678"}
+
+ mock_client = AsyncMock()
+ mock_client.head.return_value = head_response
+ mock_client.get.return_value = get_response
+ mock_client.__aenter__.return_value = mock_client
+
+ with patch("httpx.AsyncClient", return_value=mock_client):
+ size = await f.fetch_size()
+
+ assert size == 5678
+ assert f.record.api_size == 5678
+
+ called_args, called_kwargs = mock_client.get.call_args
+ actual_url = Path(called_args[0]).as_posix()
+
+ assert actual_url in (
+ "http:/example.com/file.csv",
+ "http://example.com/file.csv",
+ )
+ assert called_kwargs == {"headers": {"Range": "bytes=0-0"}}
+
+ @pytest.mark.asyncio
+ async def test_head_returns_zero_content_length(self):
+ recurso = make_recurso(tamanho=0)
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path="http://example.com/file.csv")
+
+ mock_response = MagicMock()
+ mock_response.status_code = 200
+ mock_response.headers = {"Content-Length": "0"}
+
+ mock_client = AsyncMock()
+ mock_client.head.return_value = mock_response
+ mock_client.__aenter__.return_value = mock_client
+
+ with patch("httpx.AsyncClient", return_value=mock_client):
+ size = await f.fetch_size()
+
+ assert size == 0
+ assert f.record.api_size == 0
+
+ @pytest.mark.asyncio
+ async def test_exception_returns_zero(self):
+ recurso = make_recurso(tamanho=0)
+ ds = MockDataset(client=DadosGov())
+ f = File(record=recurso, dataset=ds, path="http://example.com/file.csv")
+
+ mock_client = AsyncMock()
+ mock_client.head.side_effect = Exception("Timeout")
+ mock_client.__aenter__.return_value = mock_client
+
+ with patch("httpx.AsyncClient", return_value=mock_client):
+ size = await f.fetch_size()
+
+ assert size == 0
+
+
+# ---------------------------------------------------------------------------
+# Group
+# ---------------------------------------------------------------------------
+
+
+class TestGroupInit:
+ def test_init_with_formatter(self):
+ ds = MockDataset(client=DadosGov())
+ conj = make_conjunto()
+
+ def formatter(fn):
+ return {"year": 2024}
+
+ g = Group(record=conj, dataset=ds, formatter=formatter)
+ assert g.record is conj
+ assert g.dataset is ds
+ assert g._formatter is formatter
+
+ def test_init_without_formatter(self):
+ ds = MockDataset(client=DadosGov())
+ conj = make_conjunto()
+ g = Group(record=conj, dataset=ds)
+ assert g._formatter is None
+
+ def test_repr_returns_name(self):
+ ds = MockDataset(client=DadosGov())
+ conj = make_conjunto()
+ g = Group(record=conj, dataset=ds)
+ assert repr(g) == g.name
+
+
+class TestGroupProperties:
+ def test_name_with_aliases(self):
+ ds = MockDataset(client=DadosGov())
+ ds.group_aliases = {"conjunto-teste": "CT"}
+ conj = make_conjunto()
+ g = Group(record=conj, dataset=ds)
+ assert g.name == "CT"
+
+ def test_name_without_aliases(self):
+ ds = MockDataset(client=DadosGov())
+ ds.group_aliases = {}
+ conj = make_conjunto()
+ g = Group(record=conj, dataset=ds)
+ assert g.name == "conjunto-teste"
+
+ def test_long_name(self):
+ ds = MockDataset(client=DadosGov())
+ conj = make_conjunto()
+ g = Group(record=conj, dataset=ds)
+ assert g.long_name == "Conjunto Teste"
+
+ def test_description(self):
+ ds = MockDataset(client=DadosGov())
+ conj = make_conjunto()
+ g = Group(record=conj, dataset=ds)
+ assert g.description == ""
+
+
+class TestGroupFetchFiles:
+ @pytest.mark.asyncio
+ async def test_filters_pdf_and_get_prefix(self):
+ resources = [
+ make_recurso(
+ id="r1", link="http://ex.com/doc.pdf", nomeArquivo="doc.pdf"
+ ),
+ make_recurso(
+ id="r2",
+ link="http://ex.com/get_data.csv",
+ nomeArquivo="get_data.csv",
+ ),
+ make_recurso(
+ id="r3", link="http://ex.com/data.csv", nomeArquivo="data.csv"
+ ),
+ ]
+ conj = make_conjunto(resources)
+ ds = MockDataset(client=DadosGov())
+ g = Group(record=conj, dataset=ds)
+
+ files = await g._fetch_files()
+
+ assert len(files) == 1
+ assert files[0].record.id == "r3"
+
+ @pytest.mark.asyncio
+ async def test_deduplicates_preferring_csv(self):
+ resources = [
+ make_recurso(
+ id="r1", link="http://ex.com/data.csv", nomeArquivo="data.csv"
+ ),
+ make_recurso(
+ id="r2", link="http://ex.com/data.json", nomeArquivo="data.json"
+ ),
+ make_recurso(
+ id="r3", link="http://ex.com/data.xml", nomeArquivo="data.xml"
+ ),
+ ]
+ conj = make_conjunto(resources)
+ ds = MockDataset(client=DadosGov())
+ g = Group(record=conj, dataset=ds)
+
+ files = await g._fetch_files()
+
+ assert len(files) == 1
+ assert files[0].record.id == "r1"
+
+ @pytest.mark.asyncio
+ async def test_formatter_applied(self):
+ resources = [
+ make_recurso(
+ id="r1",
+ link="http://ex.com/SP2024.csv",
+ nomeArquivo="SP2024.csv",
+ ),
+ ]
+ conj = make_conjunto(resources)
+ ds = MockDataset(client=DadosGov())
+
+ def formatter(fn):
+ return {"state": "SP", "year": 2024}
+
+ g = Group(record=conj, dataset=ds, formatter=formatter)
+
+ files = await g._fetch_files()
+
+ assert len(files) == 1
+ assert files[0].state == "SP"
+ assert files[0].year == 2024
+
+ @pytest.mark.asyncio
+ async def test_formatter_not_implemented_error_caught(self):
+ resources = [
+ make_recurso(
+ id="r1", link="http://ex.com/data.csv", nomeArquivo="data.csv"
+ ),
+ ]
+ conj = make_conjunto(resources)
+ ds = MockDataset(client=DadosGov())
+
+ def bad_formatter(fn):
+ raise NotImplementedError("not implemented")
+
+ g = Group(record=conj, dataset=ds, formatter=bad_formatter)
+
+ files = await g._fetch_files()
+
+ assert len(files) == 1
+ assert files[0].state is None
+
+ @pytest.mark.asyncio
+ async def test_filename_from_url_when_no_file_name(self):
+ resources = [
+ make_recurso(
+ id="r1",
+ nomeArquivo=None,
+ link="http://ex.com/download?file=data.csv",
+ ),
+ ]
+ conj = make_conjunto(resources)
+ ds = MockDataset(client=DadosGov())
+ g = Group(record=conj, dataset=ds)
+
+ files = await g._fetch_files()
+ assert len(files) == 1
+ assert "download" in str(files[0].path)
+
+
+# ---------------------------------------------------------------------------
+# Dataset
+# ---------------------------------------------------------------------------
+
+
+class TestDatasetContent:
+ @pytest.mark.asyncio
+ async def test_fetch_content_with_ids(self):
+ client = DadosGov()
+ ds = MockDataset(client=client)
+ ds.ids = ["id1", "id2"]
+
+ conj1 = make_conjunto([make_recurso(id="r1")])
+ conj2 = make_conjunto([make_recurso(id="r2")])
+
+ with patch(
+ "pysus.api.dadosgov.client.DadosGov.get_dataset",
+ new_callable=AsyncMock,
+ ) as mock_get:
+ mock_get.side_effect = [conj1, conj2]
+ groups = await ds._fetch_content()
+
+ assert len(groups) == 2
+ assert groups[0].record is conj1
+ assert groups[1].record is conj2
+ assert callable(groups[0]._formatter)
+ assert groups[0]._formatter("x") == ds.formatter("x")
+ mock_get.assert_any_call("id1")
+ mock_get.assert_any_call("id2")
+
+ @pytest.mark.asyncio
+ async def test_fetch_content_empty_ids(self):
+ ds = MockDataset(client=DadosGov())
+ ds.ids = []
+
+ with patch(
+ "pysus.api.dadosgov.client.DadosGov.get_dataset"
+ ) as mock_get:
+ groups = await ds._fetch_content()
+
+ assert groups == []
+ mock_get.assert_not_called()
+
+ def test_repr_returns_name(self):
+ ds = MockDataset(client=DadosGov())
+ assert repr(ds) == "TestDS"
+
+ def test_abstract_formatter_pass(self):
+ class DirectDataset(Dataset):
+ ids: list[str] = ["abc"]
+
+ @property
+ def name(self):
+ return "test"
+
+ @property
+ def long_name(self):
+ return "Test"
+
+ @property
+ def description(self):
+ return "Test dataset"
+
+ def formatter(self, filename):
+ Dataset.formatter(self, filename)
+ return {}
+
+ ds = DirectDataset(client=DadosGov())
+ assert ds.formatter("x.csv") == {}
+
+ def test_formatter_not_abstract(self):
+ ds = MockDataset(client=DadosGov())
+ assert ds.formatter("any.csv") == {}
diff --git a/pysus/tests/api/ducklake/test_catalog.py b/pysus/tests/api/ducklake/test_catalog.py
index ffdde87f..a6d415f6 100644
--- a/pysus/tests/api/ducklake/test_catalog.py
+++ b/pysus/tests/api/ducklake/test_catalog.py
@@ -1,77 +1,112 @@
-from pysus.api.ducklake.catalog import (
- CatalogDataset,
- CatalogFile,
- CatalogTable,
+"""Tests for DuckLake catalog ORM models."""
+
+from pysus.api.ducklake.catalog.orm.dataset import (
ColumnDefinition,
- DatasetGroup,
- Origin,
+ Dataset,
+ File,
+ Group,
file_columns,
)
+from pysus.api.ducklake.catalog.orm.default import Dataset as DefaultDataset
+
+class TestDefaultDataset:
+ def test_tablename(self):
+ assert DefaultDataset.__tablename__ == "datasets"
-class TestOrigin:
- def test_origin_ftp(self):
- assert Origin.FTP.value == "ftp"
+ def test_columns(self):
+ cols = DefaultDataset.__table__.columns
+ assert "id" in cols
+ assert "name" in cols
+ assert "long_name" in cols
+ assert "description" in cols
- def test_origin_api(self):
- assert Origin.API.value == "api"
+ def test_schema(self):
+ assert DefaultDataset.__table_args__[0]["schema"] == "pysus"
-class TestCatalogTable:
- def test_catalog_table_is_abstract(self):
- assert CatalogTable.__abstract__ is True
+class TestDataset:
+ def test_tablename(self):
+ assert Dataset.__tablename__ == "datasets"
+ def test_columns(self):
+ cols = Dataset.__table__.columns
+ assert "id" in cols
+ assert "name" in cols
+ assert "long_name" in cols
+ assert "description" in cols
-class TestCatalogDataset:
- def test_catalog_dataset_tablename(self):
- assert CatalogDataset.__tablename__ == "datasets"
+ def test_schema(self):
+ assert Dataset.__table_args__[0]["schema"] == "pysus"
- def test_catalog_dataset_columns(self):
- assert "id" in CatalogDataset.__table__.columns
- assert "name" in CatalogDataset.__table__.columns
- assert "long_name" in CatalogDataset.__table__.columns
- assert "origin" in CatalogDataset.__table__.columns
+ def test_relationships(self):
+ assert hasattr(Dataset, "groups")
+ assert hasattr(Dataset, "files")
+ assert hasattr(Dataset, "columns")
class TestColumnDefinition:
- def test_column_definition_tablename(self):
+ def test_tablename(self):
assert ColumnDefinition.__tablename__ == "dataset_columns"
- def test_column_definition_columns(self):
- assert "id" in ColumnDefinition.__table__.columns
- assert "dataset_id" in ColumnDefinition.__table__.columns
- assert "name" in ColumnDefinition.__table__.columns
- assert "type" in ColumnDefinition.__table__.columns
-
-
-class TestDatasetGroup:
- def test_dataset_group_tablename(self):
- assert DatasetGroup.__tablename__ == "dataset_groups"
-
- def test_dataset_group_columns(self):
- assert "id" in DatasetGroup.__table__.columns
- assert "dataset_id" in DatasetGroup.__table__.columns
- assert "name" in DatasetGroup.__table__.columns
- assert "long_name" in DatasetGroup.__table__.columns
-
-
-class TestCatalogFile:
- def test_catalog_file_tablename(self):
- assert CatalogFile.__tablename__ == "files"
-
- def test_catalog_file_columns(self):
- assert "id" in CatalogFile.__table__.columns
- assert "dataset_id" in CatalogFile.__table__.columns
- assert "path" in CatalogFile.__table__.columns
- assert "size" in CatalogFile.__table__.columns
- assert "rows" in CatalogFile.__table__.columns
- assert "modified" in CatalogFile.__table__.columns
- assert "year" in CatalogFile.__table__.columns
- assert "month" in CatalogFile.__table__.columns
- assert "state" in CatalogFile.__table__.columns
+ def test_columns(self):
+ cols = ColumnDefinition.__table__.columns
+ assert "id" in cols
+ assert "dataset_id" in cols
+ assert "name" in cols
+ assert "type" in cols
+ assert "description" in cols
+ assert "nullable" in cols
+
+
+class TestGroup:
+ def test_tablename(self):
+ assert Group.__tablename__ == "dataset_groups"
+
+ def test_columns(self):
+ cols = Group.__table__.columns
+ assert "id" in cols
+ assert "dataset_id" in cols
+ assert "name" in cols
+ assert "long_name" in cols
+ assert "description" in cols
+
+ def test_relationships(self):
+ assert hasattr(Group, "dataset")
+ assert hasattr(Group, "files")
+
+
+class TestFile:
+ def test_tablename(self):
+ assert File.__tablename__ == "files"
+
+ def test_columns(self):
+ cols = File.__table__.columns
+ assert "id" in cols
+ assert "dataset_id" in cols
+ assert "group_id" in cols
+ assert "path" in cols
+ assert "size" in cols
+ assert "rows" in cols
+ assert "type" in cols
+ assert "modified" in cols
+ assert "year" in cols
+ assert "month" in cols
+ assert "state" in cols
+ assert "sha256" in cols
+ assert "origin_size" in cols
+ assert "origin_path" in cols
+
+ def test_relationships(self):
+ assert hasattr(File, "dataset")
+ assert hasattr(File, "group")
+ assert hasattr(File, "columns")
class TestFileColumns:
+ def test_file_columns_table_name(self):
+ assert file_columns.name == "file_columns"
+
def test_file_columns_primary_keys(self):
file_id_col = file_columns.c.file_id
column_id_col = file_columns.c.column_id
diff --git a/pysus/tests/api/ducklake/test_client.py b/pysus/tests/api/ducklake/test_client.py
index 244c22f5..e50a6b05 100644
--- a/pysus/tests/api/ducklake/test_client.py
+++ b/pysus/tests/api/ducklake/test_client.py
@@ -1,7 +1,15 @@
-from unittest.mock import MagicMock, patch
+"""Tests for DuckLake client module."""
+
+import errno
+from datetime import datetime
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
import pytest
+from pysus.api.ducklake.catalog.orm.dataset import Dataset as PerDataset
+from pysus.api.ducklake.catalog.orm.dataset import File as CatalogFile
from pysus.api.ducklake.client import DuckLake, DuckLakeCredentials
+from pysus.api.ducklake.models import DuckDataset, File
class TestDuckLakeCredentials:
@@ -23,16 +31,25 @@ async def test_ducklake_init(self):
assert client.endpoint == "nbg1.your-objectstorage.com"
assert client.bucket == "pysus"
+ @pytest.mark.asyncio
+ async def test_description(self):
+ client = DuckLake()
+ assert client.description == ""
+
@pytest.mark.asyncio
async def test_ducklake_catalog_path(self, tmp_path):
with patch("pysus.api.ducklake.client.CACHEPATH", tmp_path):
client = DuckLake()
- assert client.catalog_path == tmp_path / "ducklake" / "catalog.db"
+ assert (
+ client.catalog_path == tmp_path / "ducklake" / "catalog.duckdb"
+ )
@pytest.mark.asyncio
async def test_ducklake_catalog_url(self):
client = DuckLake()
- expected = "https://nbg1.your-objectstorage.com/pysus/public/catalog.db"
+ expected = (
+ "https://nbg1.your-objectstorage.com/pysus/public/catalog.duckdb"
+ )
assert client._catalog_url == expected
@pytest.mark.asyncio
@@ -42,40 +59,68 @@ async def test_is_authenticated_false_no_credentials(self):
@pytest.mark.asyncio
async def test_is_authenticated_with_credentials(self):
- from unittest.mock import patch
-
client = DuckLake()
- with patch.object(client, "_load_catalog"):
+ with patch.object(client, "_download_catalog"):
await client.login(access_key="key", secret_key="secret")
assert client._is_authenticated is True
@pytest.mark.asyncio
async def test_login_sets_credentials(self):
- from unittest.mock import patch
-
client = DuckLake()
- with patch.object(client, "_load_catalog"):
+ with patch.object(client, "_download_catalog"):
await client.login(access_key="key", secret_key="secret")
assert client.credentials is not None
@pytest.mark.asyncio
async def test_login_creates_s3_client(self):
- from unittest.mock import patch
-
client = DuckLake()
- with patch.object(client, "_load_catalog"):
+ with patch.object(client, "_download_catalog"):
await client.login(access_key="key", secret_key="secret")
assert client._s3_client is not None
- client._s3_client = None
+
+ @pytest.mark.asyncio
+ async def test_login_clears_credentials(self):
+ client = DuckLake()
+ client.credentials = DuckLakeCredentials(
+ access_key="test_key",
+ secret_key="test_secret",
+ )
+ with patch.object(client, "_download_catalog"):
+ await client.login()
+ assert client.credentials is None
+ assert client._s3_client is None
@pytest.mark.asyncio
async def test_close_clears_state(self):
client = DuckLake()
- await client.close()
+ client._engine = MagicMock()
+ with patch(
+ "pysus.api.ducklake.client.to_thread.run_sync",
+ side_effect=lambda fn, *a, **kw: fn(),
+ ):
+ await client.close()
assert client._engine is None
assert client._Session is None
assert client._s3_client is None
+ @pytest.mark.asyncio
+ async def test_close_with_datasets(self):
+ client = DuckLake()
+ ds = AsyncMock(spec=DuckDataset)
+ client._datasets.append(ds)
+ await client.close()
+ ds.close.assert_awaited_once_with(update_catalog=False)
+ assert client._datasets == []
+
+ @pytest.mark.asyncio
+ async def test_close_with_update_catalog(self):
+ client = DuckLake()
+ ds = AsyncMock(spec=DuckDataset)
+ client._datasets.append(ds)
+ with patch.object(client, "_upload_catalog") as mock_upload:
+ await client.close(update_catalog=True)
+ mock_upload.assert_awaited_once()
+
@pytest.mark.asyncio
async def test_get_s3_client_requires_credentials(self):
client = DuckLake()
@@ -89,146 +134,517 @@ async def test_upload_catalog_requires_auth(self):
await client._upload_catalog()
-class TestDownloadFile:
- pass
+class TestDuckLakeDatasets:
+ @pytest.mark.asyncio
+ async def test_datasets_creates_session_and_returns_duckdatasets(
+ self, tmp_path
+ ):
+ with patch("pysus.api.ducklake.client.CACHEPATH", tmp_path):
+ client = DuckLake()
+ mock_session = MagicMock()
+ mock_session.__enter__.return_value = mock_session
-class TestLoadCatalog:
- pass
+ record = PerDataset(name="sinan", long_name="SINAN", description="Test")
+ mock_session.query.return_value.all.return_value = [record]
+ client._Session = MagicMock(return_value=mock_session)
-class TestUploadCatalog:
- @pytest.mark.asyncio
- async def test_upload_catalog_without_auth_raises(self):
- client = DuckLake()
- with pytest.raises(PermissionError):
- await client._upload_catalog()
+ def run_sync(fn, *args, **kwargs):
+ return fn()
+
+ with patch(
+ "pysus.api.ducklake.client.to_thread.run_sync",
+ side_effect=run_sync,
+ ):
+ result = await client.datasets()
+ assert len(result) == 1
+ assert isinstance(result[0], DuckDataset)
+ assert result[0].record.name == "sinan"
-class TestDuckLakeQuery:
@pytest.mark.asyncio
- async def test_query_filters_by_dataset(self):
- from pysus.api.ducklake.catalog import CatalogDataset, CatalogFile
+ async def test_datasets_connects_if_no_session(self, tmp_path):
+ with patch("pysus.api.ducklake.client.CACHEPATH", tmp_path):
+ client = DuckLake()
+
+ assert client._Session is None
- client = DuckLake()
mock_session = MagicMock()
+ mock_session.__enter__.return_value = mock_session
+ mock_session.query.return_value.all.return_value = []
- mock_catalog_file = MagicMock(spec=CatalogFile)
- mock_catalog_file.dataset = MagicMock(spec=CatalogDataset)
- mock_catalog_file.dataset.name = "sinan"
- mock_catalog_file.group = None
- mock_catalog_file.path = "test.parquet"
+ async def _connect(*args, **kwargs):
+ client._Session = MagicMock(return_value=mock_session)
- mock_query = MagicMock()
- mock_query.options.return_value.join.return_value.filter.return_value.all.return_value = [ # noqa: E501
- mock_catalog_file
- ]
- mock_session.query.return_value = mock_query
+ with patch.object(
+ DuckLake, "connect", new=AsyncMock(side_effect=_connect)
+ ):
- client._Session = MagicMock(return_value=mock_session)
+ def run_sync(fn, *args, **kwargs):
+ return fn()
+
+ with patch(
+ "pysus.api.ducklake.client.to_thread.run_sync",
+ side_effect=run_sync,
+ ):
+ await client.datasets()
+
+
+class TestDuckLakeSetupEngine:
+ def test_setup_engine_has_pysus_schema(self):
+ with patch("pysus.api.ducklake.client.create_engine") as mock_create:
+ mock_engine = MagicMock()
+ mock_conn = MagicMock()
+ mock_engine.connect.return_value.__enter__.return_value = mock_conn
+ mock_create.return_value = mock_engine
- result = await client.query(dataset="sinan")
- assert isinstance(result, list)
+ mock_conn.exec_driver_sql().fetchone.return_value = (1,)
+ client = DuckLake()
+ result = client._setup_engine()
+
+ calls = [str(c) for c in mock_conn.exec_driver_sql.call_args_list]
+ assert any(
+ "SET search_path" in c and "pysus,main" in c for c in calls
+ )
+ assert result is mock_engine
+
+ def test_setup_engine_no_pysus_schema(self):
+ with patch("pysus.api.ducklake.client.create_engine") as mock_create:
+ mock_engine = MagicMock()
+ mock_conn = MagicMock()
+ mock_engine.connect.return_value.__enter__.return_value = mock_conn
+ mock_create.return_value = mock_engine
+
+ mock_conn.exec_driver_sql().fetchone.return_value = None
+
+ client = DuckLake()
+ result = client._setup_engine()
+
+ calls = [str(c) for c in mock_conn.exec_driver_sql.call_args_list]
+ assert any("SET search_path" in c and "'main'" in c for c in calls)
+ assert result is mock_engine
+
+ def test_setup_engine_with_credentials(self):
+ with patch("pysus.api.ducklake.client.create_engine") as mock_create:
+ mock_engine = MagicMock()
+ mock_conn = MagicMock()
+ mock_engine.connect.return_value.__enter__.return_value = mock_conn
+ mock_create.return_value = mock_engine
+
+ mock_conn.exec_driver_sql().fetchone.return_value = None
+
+ client = DuckLake(
+ credentials=DuckLakeCredentials(
+ access_key="ak", secret_key="sk"
+ )
+ )
+ client._setup_engine()
+
+ calls = [str(c) for c in mock_conn.exec_driver_sql.call_args_list]
+ s3_access = any(
+ "s3_access_key_id" in c and "ak" in c for c in calls
+ )
+ s3_secret = any(
+ "s3_secret_access_key" in c and "sk" in c for c in calls
+ )
+ assert s3_access
+ assert s3_secret
+
+
+class TestDuckLakeConnect:
@pytest.mark.asyncio
- async def test_query_filters_by_group(self):
+ async def test_connect_already_connected_returns_early(self):
client = DuckLake()
client._engine = MagicMock()
- mock_session = MagicMock()
- mock_session.__enter__ = MagicMock(return_value=mock_session)
- mock_session.__exit__ = MagicMock(return_value=False)
+ client._Session = MagicMock()
+ with patch.object(client, "_download_catalog") as mock_dl:
+ await client.connect()
+ mock_dl.assert_not_called()
- mock_query = MagicMock()
- mock_query.options.return_value.join.return_value.filter.return_value.filter.return_value.all.return_value = ( # noqa: E501
- []
+ @pytest.mark.asyncio
+ async def test_connect_creates_session_if_missing(self):
+ client = DuckLake()
+ client._engine = MagicMock()
+ client._Session = None
+ with patch.object(client, "_download_catalog") as mock_dl:
+ await client.connect()
+ assert client._Session is not None
+ mock_dl.assert_not_called()
+
+ @pytest.mark.asyncio
+ async def test_connect_downloads_and_sets_up_engine(self, tmp_path):
+ with patch("pysus.api.ducklake.client.CACHEPATH", tmp_path):
+ client = DuckLake()
+
+ client._engine = None
+
+ def run_sync(fn, *args, **kwargs):
+ return fn()
+
+ with patch.object(client, "_download_catalog") as mock_dl:
+ with patch(
+ "pysus.api.ducklake.client.to_thread.run_sync",
+ side_effect=run_sync,
+ ):
+ with patch.object(
+ client, "_setup_engine", return_value=MagicMock()
+ ):
+ await client.connect()
+ mock_dl.assert_awaited_once_with(
+ client._catalog_local,
+ client._catalog_remote,
+ )
+ assert client._Session is not None
+ assert client._engine is not None
+
+
+class TestDuckLakeDownload:
+ @pytest.mark.asyncio
+ async def test_download_retry_then_success(self, tmp_path):
+ client = DuckLake()
+ local_path = tmp_path / "test.db"
+ remote_path = "public/test.db"
+
+ class FailingAsyncIter:
+ def __aiter__(self):
+ return self
+
+ async def __anext__(self):
+ raise OSError("Connection dropped")
+
+ mock_client = MagicMock()
+ mock_client.__aenter__.return_value = mock_client
+ httpx_patcher = patch(
+ "pysus.api.ducklake.client.httpx.AsyncClient",
+ return_value=mock_client,
+ )
+ sleep_patcher = patch(
+ "pysus.api.ducklake.client.sleep", new_callable=AsyncMock
)
- mock_session.query.return_value = mock_query
- client._Session = MagicMock(return_value=mock_session)
+ first_stream_cm = MagicMock()
+ first_resp = MagicMock()
+ first_stream_cm.__aenter__.return_value = first_resp
+ first_resp.raise_for_status = MagicMock()
+ first_resp.headers.get.return_value = "4"
+ first_resp.aiter_bytes.return_value = FailingAsyncIter()
+
+ second_stream_cm = MagicMock()
+
+ async def success_iter():
+ yield b"data"
- result = await client.query(group="DENGUE")
- assert isinstance(result, list)
+ second_resp = MagicMock()
+ second_stream_cm.__aenter__.return_value = second_resp
+ second_resp.raise_for_status = MagicMock()
+ second_resp.headers.get.return_value = "4"
+ second_resp.aiter_bytes.return_value = success_iter()
+
+ mock_client.stream.side_effect = [first_stream_cm, second_stream_cm]
+
+ with httpx_patcher, sleep_patcher as mock_sleep:
+ await client._download(remote_path, local_path)
+
+ assert local_path.exists()
+ assert local_path.read_bytes() == b"data"
+ assert mock_client.stream.call_count == 2
+ mock_sleep.assert_awaited_once_with(1)
@pytest.mark.asyncio
- async def test_query_filters_by_state(self):
+ async def test_download_retry_exhausted_raises(self, tmp_path):
client = DuckLake()
- client._engine = MagicMock()
- mock_session = MagicMock()
- mock_session.__enter__ = MagicMock(return_value=mock_session)
- mock_session.__exit__ = MagicMock(return_value=False)
+ local_path = tmp_path / "test.db"
+ remote_path = "public/test.db"
- mock_query = MagicMock()
- mock_query.options.return_value.join.return_value.filter.return_value.filter.return_value.all.return_value = ( # noqa: E501
- []
+ class FailingAsyncIter:
+ def __aiter__(self):
+ return self
+
+ async def __anext__(self):
+ raise OSError("Connection dropped")
+
+ mock_client = MagicMock()
+ mock_client.__aenter__.return_value = mock_client
+ httpx_patcher = patch(
+ "pysus.api.ducklake.client.httpx.AsyncClient",
+ return_value=mock_client,
+ )
+ sleep_patcher = patch(
+ "pysus.api.ducklake.client.sleep", new_callable=AsyncMock
)
- mock_session.query.return_value = mock_query
- client._Session = MagicMock(return_value=mock_session)
+ stream_cm = MagicMock()
+ resp = MagicMock()
+ stream_cm.__aenter__.return_value = resp
+ resp.raise_for_status = MagicMock()
+ resp.headers.get.return_value = "4"
+ resp.aiter_bytes.return_value = FailingAsyncIter()
- result = await client.query(state="SP")
- assert isinstance(result, list)
+ mock_client.stream.return_value = stream_cm
+
+ with httpx_patcher, sleep_patcher as mock_sleep:
+ with pytest.raises(OSError, match="Connection dropped"):
+ await client._download(remote_path, local_path)
+
+ assert mock_client.stream.call_count == 5
+ assert mock_sleep.await_count == 4
@pytest.mark.asyncio
- async def test_query_filters_by_year(self):
+ async def test_download_with_callback(self, tmp_path):
client = DuckLake()
- client._engine = MagicMock()
- mock_session = MagicMock()
- mock_session.__enter__ = MagicMock(return_value=mock_session)
- mock_session.__exit__ = MagicMock(return_value=False)
+ local_path = tmp_path / "test.db"
+ remote_path = "public/test.db"
- mock_query = MagicMock()
- mock_query.options.return_value.join.return_value.filter.return_value.filter.return_value.all.return_value = ( # noqa: E501
- []
- )
- mock_session.query.return_value = mock_query
+ mock_client = MagicMock()
+ mock_client.__aenter__.return_value = mock_client
- client._Session = MagicMock(return_value=mock_session)
+ stream_cm = MagicMock()
+
+ async def success_iter():
+ yield b"hello"
+ yield b"world"
+
+ resp = MagicMock()
+ stream_cm.__aenter__.return_value = resp
+ resp.raise_for_status = MagicMock()
+ resp.headers.get.return_value = "10"
+ resp.aiter_bytes.return_value = success_iter()
- result = await client.query(year=2024)
- assert isinstance(result, list)
+ mock_client.stream.return_value = stream_cm
+ callback = MagicMock()
+
+ with patch(
+ "pysus.api.ducklake.client.httpx.AsyncClient",
+ return_value=mock_client,
+ ):
+ await client._download(remote_path, local_path, callback=callback)
+
+ callback.assert_any_call(5, 10)
+ callback.assert_any_call(10, 10)
+
+
+class TestDuckLakeDownloadCatalog:
@pytest.mark.asyncio
- async def test_query_filters_by_month(self):
+ async def test_download_catalog_size_match_skips_download(self, tmp_path):
+ local_path = tmp_path / "catalog.duckdb"
+ local_path.write_text("test")
+ remote_path = "public/catalog.duckdb"
+
client = DuckLake()
- client._engine = MagicMock()
- mock_session = MagicMock()
- mock_session.__enter__ = MagicMock(return_value=mock_session)
- mock_session.__exit__ = MagicMock(return_value=False)
- mock_query = MagicMock()
- mock_query.options.return_value.join.return_value.filter.return_value.filter.return_value.filter.return_value.filter.return_value.all.return_value = ( # noqa: E501
- []
- )
- mock_session.query.return_value = mock_query
+ mock_http = MagicMock()
+ mock_resp = MagicMock()
+ mock_resp.headers = {"content-length": "4"}
+ mock_resp.raise_for_status = MagicMock()
+ mock_http.head = AsyncMock(return_value=mock_resp)
+ mock_http.__aenter__.return_value = mock_http
+
+ with patch(
+ "pysus.api.ducklake.client.httpx.AsyncClient",
+ return_value=mock_http,
+ ):
+ with patch.object(client, "_download") as mock_dl:
+ await client._download_catalog(local_path, remote_path)
+ mock_dl.assert_not_called()
- client._Session = MagicMock(return_value=mock_session)
+ @pytest.mark.asyncio
+ async def test_download_catalog_size_mismatch_downloads(self, tmp_path):
+ local_path = tmp_path / "catalog.duckdb"
+ local_path.write_text("test")
+ remote_path = "public/catalog.duckdb"
+
+ client = DuckLake()
- result = await client.query(month=1)
- assert isinstance(result, list)
+ mock_http = MagicMock()
+ mock_resp = MagicMock()
+ mock_resp.headers = {"content-length": "100"}
+ mock_resp.raise_for_status = MagicMock()
+ mock_http.head = AsyncMock(return_value=mock_resp)
+ mock_http.__aenter__.return_value = mock_http
+
+ with patch(
+ "pysus.api.ducklake.client.httpx.AsyncClient",
+ return_value=mock_http,
+ ):
+ with patch.object(client, "_download") as mock_dl:
+ await client._download_catalog(local_path, remote_path)
+ mock_dl.assert_awaited_once_with(remote_path, local_path)
@pytest.mark.asyncio
- async def test_query_no_filters(self):
- from pysus.api.ducklake.catalog import CatalogDataset, CatalogFile
+ async def test_download_catalog_local_not_exists(self, tmp_path):
+ local_path = tmp_path / "catalog.duckdb"
+ remote_path = "public/catalog.duckdb"
client = DuckLake()
- mock_session = MagicMock()
- mock_catalog_file = MagicMock(spec=CatalogFile)
- mock_catalog_file.path = "public/test.parquet"
- mock_catalog_file.dataset = MagicMock(spec=CatalogDataset)
- mock_catalog_file.dataset.name = "sinan"
- mock_catalog_file.group = None
+ mock_http = MagicMock()
+ mock_resp = MagicMock()
+ mock_resp.headers = {"content-length": "100"}
+ mock_resp.raise_for_status = MagicMock()
+ mock_http.head = AsyncMock(return_value=mock_resp)
+ mock_http.__aenter__.return_value = mock_http
+
+ with patch(
+ "pysus.api.ducklake.client.httpx.AsyncClient",
+ return_value=mock_http,
+ ):
+ with patch.object(client, "_download") as mock_dl:
+ await client._download_catalog(local_path, remote_path)
+ mock_dl.assert_awaited_once_with(remote_path, local_path)
- mock_query = MagicMock()
- mock_query.options.return_value.join.return_value.all.return_value = [
- mock_catalog_file
- ]
- mock_session.query.return_value = mock_query
+ @pytest.mark.asyncio
+ async def test_download_catalog_head_fails(self, tmp_path):
+ local_path = tmp_path / "catalog.duckdb"
+ remote_path = "public/catalog.duckdb"
- client._Session = MagicMock(return_value=mock_session)
+ client = DuckLake()
- try:
- result = await client.query(dataset="sinan")
- assert isinstance(result, list)
- except OSError:
- pass
+ mock_http = MagicMock()
+ mock_resp = MagicMock()
+ mock_resp.headers = {}
+ mock_http.head = AsyncMock(side_effect=Exception("HEAD failed"))
+ mock_http.__aenter__.return_value = mock_http
+
+ with patch(
+ "pysus.api.ducklake.client.httpx.AsyncClient",
+ return_value=mock_http,
+ ):
+ with patch.object(client, "_download") as mock_dl:
+ await client._download_catalog(local_path, remote_path)
+ mock_dl.assert_awaited_once_with(remote_path, local_path)
+
+ @pytest.mark.asyncio
+ async def test_download_catalog_head_no_content_length(self, tmp_path):
+ local_path = tmp_path / "catalog.duckdb"
+ local_path.write_text("test")
+ remote_path = "public/catalog.duckdb"
+
+ client = DuckLake()
+
+ mock_http = MagicMock()
+ mock_resp = MagicMock()
+ mock_resp.headers = {}
+ mock_resp.raise_for_status = MagicMock()
+ mock_http.head = AsyncMock(return_value=mock_resp)
+ mock_http.__aenter__.return_value = mock_http
+
+ with patch(
+ "pysus.api.ducklake.client.httpx.AsyncClient",
+ return_value=mock_http,
+ ):
+ with patch.object(client, "_download") as mock_dl:
+ await client._download_catalog(local_path, remote_path)
+ mock_dl.assert_awaited_once_with(remote_path, local_path)
+
+ @pytest.mark.asyncio
+ async def test_download_catalog_oserror_on_local_stat(self, tmp_path):
+ local_path = tmp_path / "catalog.duckdb"
+ local_path.write_text("test")
+ remote_path = "public/catalog.duckdb"
+
+ client = DuckLake()
+
+ mock_http = MagicMock()
+ mock_resp = MagicMock()
+ mock_resp.headers = {"content-length": "999"}
+ mock_resp.raise_for_status = MagicMock()
+ mock_http.head = AsyncMock(return_value=mock_resp)
+ mock_http.__aenter__.return_value = mock_http
+
+ stat_call_count = 0
+ original_stat = type(local_path).stat
+
+ def broken_stat(self, *args, **kwargs):
+ nonlocal stat_call_count
+ stat_call_count += 1
+ if stat_call_count == 2:
+ raise OSError(errno.EACCES, "permission denied")
+ return original_stat(self, *args, **kwargs)
+
+ with patch.object(type(local_path), "stat", broken_stat):
+ with patch(
+ "pysus.api.ducklake.client.httpx.AsyncClient",
+ return_value=mock_http,
+ ):
+ with patch.object(client, "_download") as mock_dl:
+ await client._download_catalog(local_path, remote_path)
+ mock_dl.assert_awaited_once_with(remote_path, local_path)
+
+
+class TestDuckLakeDownloadFile:
+ @pytest.mark.asyncio
+ async def test_download_file_invalid_type_raises(self):
+ client = DuckLake()
+ with pytest.raises(
+ ValueError, match="FTP File was not properly instantiated"
+ ):
+ await client._download_file(
+ "not-a-file", Path("/tmp/test")
+ ) # type: ignore
+
+ @pytest.mark.asyncio
+ async def test_download_file_valid(self, tmp_path):
+ client = DuckLake()
+
+ record = CatalogFile(
+ path="remote/path/file.csv",
+ type="csv",
+ size=100,
+ rows=10,
+ modified=datetime.now(),
+ origin_size=100,
+ origin_path="remote/path/file.csv",
+ )
+
+ dataset = MagicMock(spec=DuckDataset)
+ f = File(dataset=dataset, record=record) # type: ignore
+
+ output = tmp_path / "output.csv"
+ with patch.object(client, "_download") as mock_dl:
+ result = await client._download_file(f, output)
+ mock_dl.assert_awaited_once_with(record.path, output, callback=None)
+ assert result == output
+
+
+class TestDuckLakeUploadCatalog:
+ @pytest.mark.asyncio
+ async def test_upload_catalog_with_datasets(self, tmp_path):
+ client = DuckLake(
+ credentials=DuckLakeCredentials(access_key="ak", secret_key="sk")
+ )
+ client._s3_client = MagicMock()
+
+ ds = AsyncMock(spec=DuckDataset)
+ local_db = tmp_path / "catalog_test.duckdb"
+ local_db.write_text("data")
+ ds._catalog_local = local_db
+ ds._catalog_name = "catalog_test.duckdb"
+
+ with patch.object(
+ DuckLake, "datasets", new=AsyncMock(return_value=[ds])
+ ):
+ await client._upload_catalog()
+ client._s3_client.upload_file.assert_called_once_with(
+ str(local_db), client.bucket, ds._catalog_name
+ )
+
+ @pytest.mark.asyncio
+ async def test_upload_catalog_skips_missing_local(self, tmp_path):
+ client = DuckLake(
+ credentials=DuckLakeCredentials(access_key="ak", secret_key="sk")
+ )
+ client._s3_client = MagicMock()
+
+ ds = AsyncMock(spec=DuckDataset)
+ nonexistent = tmp_path / "nonexistent.duckdb"
+ ds._catalog_local = nonexistent
+ ds._catalog_name = "catalog_test.duckdb"
+
+ with patch.object(
+ DuckLake, "datasets", new=AsyncMock(return_value=[ds])
+ ):
+ await client._upload_catalog()
+ client._s3_client.upload_file.assert_not_called()
diff --git a/pysus/tests/api/ducklake/test_models.py b/pysus/tests/api/ducklake/test_models.py
new file mode 100644
index 00000000..2b38ea2b
--- /dev/null
+++ b/pysus/tests/api/ducklake/test_models.py
@@ -0,0 +1,635 @@
+"""Tests for DuckLake model wrappers (File, DuckDataset, DuckGroup)."""
+
+import hashlib
+from datetime import datetime
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, create_autospec, patch
+
+import pytest
+from pysus.api.ducklake.catalog.orm.dataset import Dataset
+from pysus.api.ducklake.catalog.orm.dataset import File as CatalogFile
+from pysus.api.ducklake.catalog.orm.dataset import Group
+from pysus.api.ducklake.models import DuckDataset, DuckGroup, File
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def catalog_file_record():
+ rec = CatalogFile(
+ path="remote/data/file.csv",
+ type="csv",
+ size=2048,
+ rows=500,
+ sha256="abc123deadbeef",
+ modified=datetime(2024, 6, 1, 12, 0, 0),
+ origin_size=2048,
+ origin_path="remote/data/file.csv",
+ )
+ return rec
+
+
+@pytest.fixture
+def record():
+ rec = Dataset(
+ name="sinan",
+ long_name="SINAN",
+ description="SINAN dataset",
+ )
+ return rec
+
+
+@pytest.fixture
+def group_record():
+ rec = Group(
+ name="acidentes",
+ long_name="Acidentes",
+ description="Acidentes de trânsito",
+ )
+ return rec
+
+
+@pytest.fixture
+def mock_client():
+ from pysus.api.ducklake.client import DuckLake
+
+ mc = create_autospec(DuckLake, instance=True)
+ mc._datasets = []
+ return mc
+
+
+@pytest.fixture
+def mock_dataset(mock_client, record):
+ with patch("pathlib.Path.mkdir"):
+ ds = DuckDataset(record=record, client=mock_client)
+ return ds
+
+
+@pytest.fixture
+def mock_group(group_record, mock_dataset):
+ with patch("pathlib.Path.mkdir"):
+ g = DuckGroup(record=group_record, dataset=mock_dataset)
+ return g
+
+
+# ---------------------------------------------------------------------------
+# File
+# ---------------------------------------------------------------------------
+
+
+class TestFile:
+ def test_init(self, catalog_file_record, mock_dataset):
+ f = File(dataset=mock_dataset, record=catalog_file_record)
+ assert f.record is catalog_file_record
+ assert f.dataset is mock_dataset
+ assert f.group is None
+
+ def test_init_with_group(
+ self, catalog_file_record, mock_dataset, mock_group
+ ):
+ f = File(
+ dataset=mock_dataset,
+ record=catalog_file_record,
+ group=mock_group,
+ )
+ assert f.group is mock_group
+
+ def test_path(self, catalog_file_record, mock_dataset):
+ f = File(dataset=mock_dataset, record=catalog_file_record)
+ assert f.path == Path("remote/data/file.csv")
+
+ def test_basename(self, catalog_file_record, mock_dataset):
+ f = File(dataset=mock_dataset, record=catalog_file_record)
+ assert f.basename == "file.csv"
+
+ def test_extension(self, catalog_file_record, mock_dataset):
+ f = File(dataset=mock_dataset, record=catalog_file_record)
+ assert f.extension == ".csv"
+
+ def test_size(self, catalog_file_record, mock_dataset):
+ f = File(dataset=mock_dataset, record=catalog_file_record)
+ assert f.size == 2048
+
+ def test_modify(self, catalog_file_record, mock_dataset):
+ f = File(dataset=mock_dataset, record=catalog_file_record)
+ assert f.modify == datetime(2024, 6, 1, 12, 0, 0)
+
+ def test_rows(self, catalog_file_record, mock_dataset):
+ f = File(dataset=mock_dataset, record=catalog_file_record)
+ assert f.rows == 500
+
+ def test_sha256(self, catalog_file_record, mock_dataset):
+ f = File(dataset=mock_dataset, record=catalog_file_record)
+ assert f.sha256 == "abc123deadbeef"
+
+ def test_sha256_none(self, catalog_file_record, mock_dataset):
+ catalog_file_record.sha256 = None
+ f = File(dataset=mock_dataset, record=catalog_file_record)
+ assert f.sha256 is None
+
+ def test_name_fallback(self, catalog_file_record, mock_dataset):
+ f = File(dataset=mock_dataset, record=catalog_file_record)
+ assert f.name == "file.csv"
+
+ @pytest.mark.asyncio
+ async def test_download_with_explicit_output(
+ self, catalog_file_record, mock_dataset
+ ):
+ f = File(dataset=mock_dataset, record=catalog_file_record)
+ output = Path("/tmp/out.csv")
+ cb = MagicMock()
+ mock_dataset.client._download_file.return_value = output
+ result = await f._download(output=output, callback=cb)
+ mock_dataset.client._download_file.assert_awaited_once_with(
+ f, output, callback=cb
+ )
+ assert result == output
+
+ @pytest.mark.asyncio
+ async def test_download_without_output(
+ self, catalog_file_record, mock_dataset
+ ):
+ from pysus import CACHEPATH
+
+ f = File(dataset=mock_dataset, record=catalog_file_record)
+ expected = CACHEPATH / f.name
+ mock_dataset.client._download_file.return_value = expected
+ result = await f._download()
+ mock_dataset.client._download_file.assert_awaited_once_with(
+ f, expected, callback=None
+ )
+ assert result == expected
+
+ @pytest.mark.asyncio
+ async def test_verify_no_hash_returns_true(
+ self, catalog_file_record, mock_dataset, tmp_path
+ ):
+ catalog_file_record.sha256 = None
+ f = File(dataset=mock_dataset, record=catalog_file_record)
+ result = await f.verify(tmp_path / "whatever")
+ assert result is True
+
+ @pytest.mark.asyncio
+ async def test_verify_matching_hash(self, mock_dataset, tmp_path):
+ content = b"hello world, this is test content"
+ expected_hash = hashlib.sha256(content).hexdigest()
+
+ record = CatalogFile(
+ path="remote/data/file.csv",
+ type="csv",
+ sha256=expected_hash,
+ size=len(content),
+ rows=1,
+ modified=datetime.now(),
+ origin_size=len(content),
+ origin_path="remote/data/file.csv",
+ )
+
+ file_path = tmp_path / "test_file.csv"
+ file_path.write_bytes(content)
+
+ f = File(dataset=mock_dataset, record=record)
+ assert await f.verify(file_path) is True
+
+ @pytest.mark.asyncio
+ async def test_verify_mismatching_hash(self, mock_dataset, tmp_path):
+ content = b"hello world, this is test content"
+ wrong_content = b"this content does not match"
+ expected_hash = hashlib.sha256(content).hexdigest()
+
+ record = CatalogFile(
+ path="remote/data/file.csv",
+ type="csv",
+ sha256=expected_hash,
+ size=len(wrong_content),
+ rows=1,
+ modified=datetime.now(),
+ origin_size=len(wrong_content),
+ origin_path="remote/data/file.csv",
+ )
+
+ file_path = tmp_path / "test_file.csv"
+ file_path.write_bytes(wrong_content)
+
+ f = File(dataset=mock_dataset, record=record)
+ assert await f.verify(file_path) is False
+
+
+# ---------------------------------------------------------------------------
+# DuckDataset
+# ---------------------------------------------------------------------------
+
+
+class TestDuckDataset:
+ def test_init(self, mock_client, record):
+ with patch("pathlib.Path.mkdir"):
+ ds = DuckDataset(record=record, client=mock_client)
+ assert ds.record is record
+ assert ds.client is mock_client
+ assert ds._catalog_name == "catalog_sinan.duckdb"
+
+ def test_repr(self, mock_dataset):
+ assert repr(mock_dataset) == "SINAN"
+
+ def test_name(self, mock_dataset):
+ assert mock_dataset.name == "sinan"
+
+ def test_long_name(self, mock_dataset):
+ assert mock_dataset.long_name == ""
+
+ def test_description(self, mock_dataset):
+ assert mock_dataset.description == ""
+
+ def test_catalog_path(self, mock_dataset):
+ from pysus import CACHEPATH
+
+ expected = Path(CACHEPATH) / "ducklake" / "catalog_sinan.duckdb"
+ assert mock_dataset.catalog_path == expected
+
+ @pytest.mark.asyncio
+ async def test_connect_already_connected(self, mock_dataset, mock_client):
+ mock_dataset._engine = MagicMock()
+ mock_dataset._Session = MagicMock()
+ await mock_dataset.connect(force=False)
+ mock_client._download.assert_not_called()
+
+ @pytest.mark.asyncio
+ async def test_connect_force_reconnects(self, mock_dataset, mock_client):
+ mock_dataset._engine = MagicMock()
+ mock_dataset._Session = MagicMock()
+
+ def run_sync(fn, *args, **kwargs):
+ return fn()
+
+ with patch(
+ "pysus.api.ducklake.models.to_thread.run_sync",
+ side_effect=run_sync,
+ ):
+ with patch.object(
+ mock_client, "_setup_engine", return_value=MagicMock()
+ ):
+ await mock_dataset.connect(force=True)
+
+ mock_client._download.assert_awaited_once()
+
+ @pytest.mark.asyncio
+ async def test_connect_creates_session_if_missing(
+ self, mock_dataset, mock_client
+ ):
+ mock_dataset._engine = MagicMock()
+ mock_dataset._Session = None
+ await mock_dataset.connect(force=False)
+ assert mock_dataset._Session is not None
+ mock_client._download.assert_not_called()
+
+ @pytest.mark.asyncio
+ async def test_connect_full_path(self, mock_dataset, mock_client):
+ mock_dataset._engine = None
+
+ def run_sync(fn, *args, **kwargs):
+ return fn()
+
+ with patch(
+ "pysus.api.ducklake.models.to_thread.run_sync",
+ side_effect=run_sync,
+ ):
+ with patch.object(
+ mock_client, "_setup_engine", return_value=MagicMock()
+ ):
+ await mock_dataset.connect()
+
+ mock_client._download.assert_awaited_once()
+ assert mock_dataset._engine is not None
+ assert mock_dataset._Session is not None
+ assert mock_dataset in mock_client._datasets
+
+ @pytest.mark.asyncio
+ async def test_close_disposes_engine(self, mock_dataset):
+ engine = MagicMock()
+ mock_dataset._engine = engine
+ with patch(
+ "pysus.api.ducklake.models.to_thread.run_sync",
+ side_effect=lambda fn, *a, **kw: fn(),
+ ):
+ await mock_dataset.close()
+ engine.dispose.assert_called_once()
+ assert mock_dataset._engine is None
+ assert mock_dataset._Session is None
+
+ @pytest.mark.asyncio
+ async def test_close_noop_when_no_engine(self, mock_dataset):
+ mock_dataset._engine = None
+ await mock_dataset.close()
+
+ @pytest.mark.asyncio
+ async def test_close_with_update_catalog(self, mock_dataset, mock_client):
+ engine = MagicMock()
+ mock_dataset._engine = engine
+ mock_client._is_authenticated = True
+
+ with patch.object(mock_dataset, "_upload_catalog") as mock_upload:
+ with patch(
+ "pysus.api.ducklake.models.to_thread.run_sync",
+ side_effect=lambda fn, *a, **kw: fn(),
+ ):
+ await mock_dataset.close(update_catalog=True)
+ engine.dispose.assert_called_once()
+ mock_upload.assert_awaited_once()
+
+ @pytest.mark.asyncio
+ async def test_upload_catalog_no_credentials_raises(self, mock_dataset):
+ mock_dataset.client.credentials = None
+ with pytest.raises(PermissionError, match="Admin credentials required"):
+ await mock_dataset._upload_catalog()
+
+ @pytest.mark.asyncio
+ async def test_upload_catalog_success(
+ self, mock_dataset, mock_client, tmp_path
+ ):
+ mock_client.credentials = MagicMock()
+ mock_client._s3_client = MagicMock()
+ mock_client.bucket = "pysus"
+ local_db = tmp_path / "catalog_sinan.duckdb"
+ local_db.write_text("data")
+ mock_dataset._catalog_local = local_db
+
+ def run_sync(fn, *args, **kwargs):
+ return fn()
+
+ with patch(
+ "pysus.api.ducklake.models.to_thread.run_sync",
+ side_effect=run_sync,
+ ):
+ await mock_dataset._upload_catalog()
+
+ mock_client._s3_client.upload_file.assert_called_once_with(
+ str(local_db),
+ mock_client.bucket,
+ f"catalog_{mock_dataset.record.name.lower()}.duckdb",
+ )
+
+ @pytest.mark.asyncio
+ async def test_query_no_filters(self, mock_dataset):
+ mock_session = MagicMock()
+ mock_session.__enter__.return_value = mock_session
+ mock_query = MagicMock()
+ mock_session.query.return_value = mock_query
+ mock_query.options.return_value = mock_query
+ mock_query.all.return_value = []
+
+ mock_dataset._Session = MagicMock(return_value=mock_session)
+
+ def run_sync(fn, *args, **kwargs):
+ return fn()
+
+ with patch(
+ "pysus.api.ducklake.models.to_thread.run_sync",
+ side_effect=run_sync,
+ ):
+ result = await mock_dataset.query()
+
+ assert result == []
+
+ @pytest.mark.asyncio
+ async def test_query_with_all_filters(self, mock_dataset):
+ mock_session = MagicMock()
+ mock_session.__enter__.return_value = mock_session
+ mock_query = MagicMock()
+ mock_session.query.return_value = mock_query
+ mock_query.options.return_value = mock_query
+ mock_query.join.return_value = mock_query
+ mock_query.filter.return_value = mock_query
+ mock_query.all.return_value = []
+
+ mock_dataset._Session = MagicMock(return_value=mock_session)
+
+ def run_sync(fn, *args, **kwargs):
+ return fn()
+
+ with patch(
+ "pysus.api.ducklake.models.to_thread.run_sync",
+ side_effect=run_sync,
+ ):
+ result = await mock_dataset.query(
+ group="acidentes%",
+ state="RJ",
+ year=2024,
+ month=6,
+ )
+
+ assert result == []
+
+ @pytest.mark.asyncio
+ async def test_query_connects_if_no_session(self, mock_dataset):
+ mock_dataset._Session = None
+
+ mock_session = MagicMock()
+ mock_session.__enter__.return_value = mock_session
+ mock_query = MagicMock()
+ mock_session.query.return_value = mock_query
+ mock_query.options.return_value = mock_query
+ mock_query.all.return_value = []
+
+ async def _connect(*args, **kwargs):
+ mock_dataset._Session = MagicMock(return_value=mock_session)
+
+ with patch.object(
+ DuckDataset, "connect", new=AsyncMock(side_effect=_connect)
+ ) as mock_connect:
+ with patch(
+ "pysus.api.ducklake.models.to_thread.run_sync",
+ side_effect=lambda fn, *a, **kw: fn(),
+ ):
+ await mock_dataset.query()
+ mock_connect.assert_awaited_once()
+
+ @pytest.mark.asyncio
+ async def test_fetch_content_with_groups_and_files(
+ self, mock_dataset, mock_client
+ ):
+ mock_session = MagicMock()
+ mock_session.__enter__.return_value = mock_session
+ mock_query = MagicMock()
+ mock_session.query.return_value = mock_query
+ mock_query.options.return_value = mock_query
+ mock_query.filter.return_value = mock_query
+
+ group_rec = Group(
+ name="dengue",
+ long_name="Dengue",
+ description="Dengue data",
+ )
+
+ file_rec = CatalogFile(
+ path="remote/dengue/data.csv",
+ type="csv",
+ sha256="hash123",
+ size=100,
+ rows=10,
+ modified=datetime.now(),
+ origin_size=100,
+ origin_path="remote/dengue/data.csv",
+ )
+
+ dataset_rec = Dataset(
+ name="sinan",
+ long_name="SINAN",
+ description="SINAN dataset",
+ )
+ dataset_rec.groups = [group_rec]
+ dataset_rec.files = [file_rec]
+
+ mock_query.first.return_value = dataset_rec
+
+ mock_dataset._Session = MagicMock(return_value=mock_session)
+
+ def run_sync(fn, *args, **kwargs):
+ return fn()
+
+ with patch(
+ "pysus.api.ducklake.models.to_thread.run_sync",
+ side_effect=run_sync,
+ ):
+ items = await mock_dataset._fetch_content()
+
+ assert len(items) == 2
+ assert isinstance(items[0], DuckGroup)
+ assert items[0].record is group_rec
+ assert isinstance(items[1], File)
+ assert items[1].record is file_rec
+
+ @pytest.mark.asyncio
+ async def test_fetch_content_no_dataset(self, mock_dataset):
+ mock_session = MagicMock()
+ mock_session.__enter__.return_value = mock_session
+ mock_query = MagicMock()
+ mock_session.query.return_value = mock_query
+ mock_query.options.return_value = mock_query
+ mock_query.filter.return_value = mock_query
+ mock_query.first.return_value = None
+
+ mock_dataset._Session = MagicMock(return_value=mock_session)
+
+ def run_sync(fn, *args, **kwargs):
+ return fn()
+
+ with patch(
+ "pysus.api.ducklake.models.to_thread.run_sync",
+ side_effect=run_sync,
+ ):
+ items = await mock_dataset._fetch_content()
+
+ assert items == []
+
+ @pytest.mark.asyncio
+ async def test_fetch_content_only_groups(self, mock_dataset):
+ mock_session = MagicMock()
+ mock_session.__enter__.return_value = mock_session
+ mock_query = MagicMock()
+ mock_session.query.return_value = mock_query
+ mock_query.options.return_value = mock_query
+ mock_query.filter.return_value = mock_query
+
+ group_rec = Group(
+ name="dengue",
+ long_name="Dengue",
+ description="Dengue data",
+ )
+
+ dataset_rec = Dataset(
+ name="sinan",
+ long_name="SINAN",
+ description="Test",
+ )
+ dataset_rec.groups = [group_rec]
+ dataset_rec.files = []
+
+ mock_query.first.return_value = dataset_rec
+
+ mock_dataset._Session = MagicMock(return_value=mock_session)
+
+ def run_sync(fn, *args, **kwargs):
+ return fn()
+
+ with patch(
+ "pysus.api.ducklake.models.to_thread.run_sync",
+ side_effect=run_sync,
+ ):
+ items = await mock_dataset._fetch_content()
+
+ assert len(items) == 1
+ assert isinstance(items[0], DuckGroup)
+
+ @pytest.mark.asyncio
+ async def test_fetch_content_connects_if_no_session(self, mock_dataset):
+ mock_dataset._Session = None
+
+ mock_session = MagicMock()
+ mock_session.__enter__.return_value = mock_session
+ mock_query = MagicMock()
+ mock_session.query.return_value = mock_query
+ mock_query.options.return_value = mock_query
+ mock_query.filter.return_value = mock_query
+ ds = Dataset(
+ name="sinan",
+ long_name="SINAN",
+ description="Test",
+ )
+ ds.groups = []
+ ds.files = []
+ mock_query.first.return_value = ds
+
+ async def _connect(*args, **kwargs):
+ mock_dataset._Session = MagicMock(return_value=mock_session)
+
+ with patch.object(
+ DuckDataset, "connect", new=AsyncMock(side_effect=_connect)
+ ) as mock_connect:
+ with patch(
+ "pysus.api.ducklake.models.to_thread.run_sync",
+ side_effect=lambda fn, *a, **kw: fn(),
+ ):
+ await mock_dataset._fetch_content()
+ mock_connect.assert_awaited_once()
+
+
+# ---------------------------------------------------------------------------
+# DuckGroup
+# ---------------------------------------------------------------------------
+
+
+class TestDuckGroup:
+ def test_name(self, mock_group):
+ assert mock_group.name == "acidentes"
+
+ def test_long_name(self, mock_group):
+ assert mock_group.long_name == "Acidentes"
+
+ def test_long_name_fallback(self, mock_group):
+ mock_group.record.long_name = None
+ assert mock_group.long_name == "acidentes"
+
+ def test_description(self, mock_group):
+ assert mock_group.description == "Acidentes de trânsito"
+
+ @pytest.mark.asyncio
+ async def test_fetch_files(self, mock_group, mock_dataset):
+ file_rec = CatalogFile(
+ path="remote/data/file.csv",
+ type="csv",
+ size=100,
+ rows=10,
+ modified=datetime.now(),
+ origin_size=100,
+ origin_path="remote/data/file.csv",
+ )
+ mock_group.record.files = [file_rec]
+
+ files = await mock_group._fetch_files()
+ assert len(files) == 1
+ assert isinstance(files[0], File)
+ assert files[0].record is file_rec
+ assert files[0].group is mock_group
+ assert files[0].dataset is mock_dataset
diff --git a/pysus/tests/api/ftp/test_client.py b/pysus/tests/api/ftp/test_client.py
index e3d6b999..3fb2bb51 100644
--- a/pysus/tests/api/ftp/test_client.py
+++ b/pysus/tests/api/ftp/test_client.py
@@ -12,6 +12,19 @@ def ftp_client():
return client
+def test_name_property(ftp_client):
+ assert ftp_client.name == "FTP"
+
+
+def test_long_name_property(ftp_client):
+ assert ftp_client.long_name == "Pysus FTP Client"
+
+
+def test_description_property(ftp_client):
+ assert isinstance(ftp_client.description, str)
+ assert len(ftp_client.description) > 0
+
+
def test_line_parser_file(ftp_client):
line = "03-09-26 04:30PM 12345 filename.dbc"
info = ftp_client._line_parser(line)
@@ -31,6 +44,17 @@ def test_line_parser_directory(ftp_client):
assert info["type"] == "dir"
+def test_line_parser_with_formatter_on_directory(ftp_client):
+ def mock_formatter(name):
+ return {"year": 2026, "state": "SC"}
+
+ line = "03-09-26 04:30PM DADOS"
+ info = ftp_client._line_parser(line, formatter=mock_formatter)
+
+ assert info["type"] == "dir"
+ assert info["year"] is None
+
+
def test_line_parser_with_formatter(ftp_client):
def mock_formatter(name):
return {"year": 2026, "state": "SC"}
@@ -42,6 +66,54 @@ def mock_formatter(name):
assert info["state"] == "SC"
+def test_line_parser_invalid_line(ftp_client):
+ with pytest.raises(ValueError, match="Invalid FTP line"):
+ ftp_client._line_parser("only three")
+
+
+def test_line_parser_invalid_date(ftp_client):
+ info = ftp_client._line_parser("invalid-date invalid-time DADOS")
+ assert info["name"] == "DADOS"
+ assert info["type"] == "dir"
+ assert isinstance(info["modify"], datetime)
+
+
+@pytest.mark.asyncio
+async def test_close_when_not_connected(ftp_client):
+ ftp_client._ftp = None
+ await ftp_client.close()
+ assert ftp_client._ftp is None
+
+
+@pytest.mark.asyncio
+async def test_connect_when_already_connected(ftp_client):
+ mock_ftp = MagicMock()
+ ftp_client._ftp = mock_ftp
+ await ftp_client.connect()
+ mock_ftp.quit.assert_not_called()
+ mock_ftp.close.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_close_normal(ftp_client):
+ mock_ftp = MagicMock()
+ ftp_client._ftp = mock_ftp
+ await ftp_client.close()
+ mock_ftp.quit.assert_called_once()
+ assert ftp_client._ftp is None
+
+
+@pytest.mark.asyncio
+async def test_close_quit_raises_exception(ftp_client):
+ mock_ftp = MagicMock()
+ mock_ftp.quit.side_effect = Exception("connection error")
+ ftp_client._ftp = mock_ftp
+ await ftp_client.close()
+ mock_ftp.quit.assert_called_once()
+ mock_ftp.close.assert_called_once()
+ assert ftp_client._ftp is None
+
+
@pytest.mark.asyncio
async def test_connect_and_login(ftp_client):
with patch("pysus.api.ftp.client.FTPLib") as mock_ftplib:
@@ -54,6 +126,13 @@ async def test_connect_and_login(ftp_client):
mock_instance.login.assert_called_once()
+@pytest.mark.asyncio
+async def test_datasets_raises_connection_error(ftp_client):
+ ftp_client._ftp = None
+ with pytest.raises(ConnectionError, match="not connected"):
+ await ftp_client.datasets()
+
+
@pytest.mark.asyncio
async def test_download_file_reconnects_on_failure(ftp_client):
mock_ftp_internal = MagicMock()
@@ -71,6 +150,45 @@ async def test_download_file_reconnects_on_failure(ftp_client):
assert mock_connect.call_count >= 1
+@pytest.mark.asyncio
+async def test_download_file_with_callback(ftp_client):
+ mock_ftp_internal = MagicMock()
+ ftp_client._ftp = mock_ftp_internal
+
+ mock_file = MagicMock()
+ mock_file.path = "remote/path.dbc"
+
+ callback = MagicMock()
+
+ def simulate_retrbinary(cmd, cb):
+ cb(b"chunk_data")
+
+ mock_ftp_internal.retrbinary.side_effect = simulate_retrbinary
+
+ with patch("builtins.open", MagicMock()):
+ await ftp_client._download_file(
+ mock_file, pathlib.Path("test.dbc"), callback=callback
+ )
+ callback.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_download_file_without_callback(ftp_client):
+ mock_ftp_internal = MagicMock()
+ ftp_client._ftp = mock_ftp_internal
+
+ mock_file = MagicMock()
+ mock_file.path = "remote/path.dbc"
+
+ def simulate_retrbinary(cmd, cb):
+ cb(b"chunk_data")
+
+ mock_ftp_internal.retrbinary.side_effect = simulate_retrbinary
+
+ with patch("builtins.open", MagicMock()):
+ await ftp_client._download_file(mock_file, pathlib.Path("test.dbc"))
+
+
@pytest.mark.asyncio
async def test_list_directory_calls_ftp_methods(ftp_client):
mock_ftp_internal = MagicMock()
diff --git a/pysus/tests/api/ftp/test_databases.py b/pysus/tests/api/ftp/test_databases.py
index 7379f133..dfec8be6 100644
--- a/pysus/tests/api/ftp/test_databases.py
+++ b/pysus/tests/api/ftp/test_databases.py
@@ -2,7 +2,18 @@
import pytest
from pysus.api.ftp.client import FTP
-from pysus.api.ftp.databases import AVAILABLE_DATABASES
+from pysus.api.ftp.databases import (
+ AVAILABLE_DATABASES,
+ CIHA,
+ CNES,
+ IBGEDATASUS,
+ PNI,
+ SIA,
+ SIH,
+ SIM,
+ SINAN,
+ SINASC,
+)
@pytest.fixture
@@ -68,3 +79,95 @@ async def test_ciha_search_logic(mock_client):
assert res["year"] == 2011
assert res["month"] == 1
assert res["group"]["name"] == "CIHA"
+
+
+def test_ciha_formatter_exception(mock_client):
+ db = CIHA(client=mock_client)
+ result = db.formatter("A")
+ assert result == {"group": None, "state": None, "year": None, "month": None}
+
+
+def test_cnes_formatter_exception(mock_client):
+ db = CNES(client=mock_client)
+ result = db.formatter("A")
+ assert result == {"group": None, "state": None, "year": None, "month": None}
+
+
+def test_sinasc_formatter_exception(mock_client):
+ db = SINASC(client=mock_client)
+ result = db.formatter("A")
+ assert result == {"group": None, "state": None, "year": None}
+
+
+def test_sim_formatter_cid9(mock_client):
+ db = SIM(client=mock_client)
+ result = db.formatter("CID9DOAC96.dbc")
+ assert result["state"] == "AC"
+ assert result["year"] == 1996
+
+
+def test_sim_formatter_exception(mock_client):
+ db = SIM(client=mock_client)
+ result = db.formatter("A")
+ assert result == {"group": None, "state": None, "year": None}
+
+
+def test_pni_formatter_exception(mock_client):
+ db = PNI(client=mock_client)
+ result = db.formatter("A")
+ assert result == {"group": None, "state": None, "year": None}
+
+
+def test_ibge_formatter_proj(mock_client):
+ db = IBGEDATASUS(client=mock_client)
+ result = db.formatter("PROJBR00.zip")
+ assert result["year"] == 2000
+ assert result["group"]["name"] == "PROJ"
+
+
+def test_ibge_formatter_exception(mock_client):
+ db = IBGEDATASUS(client=mock_client)
+ result = db.formatter("A")
+ assert result == {"group": None, "year": None}
+
+
+def test_sia_formatter_group_not_in_definitions(mock_client):
+ db = SIA(client=mock_client)
+ result = db.formatter("ZZAC0001.dbc")
+ assert result["group"] is None
+
+
+def test_sia_formatter_exception(mock_client):
+ db = SIA(client=mock_client)
+ result = db.formatter("A")
+ assert result == {"group": None, "state": None, "year": None, "month": None}
+
+
+def test_sih_formatter_exception(mock_client):
+ db = SIH(client=mock_client)
+ result = db.formatter("A")
+ assert result == {"group": None, "state": None, "year": None, "month": None}
+
+
+def test_sinan_formatter_src(mock_client):
+ db = SINAN(client=mock_client)
+ result = db.formatter("SRCBR06.dbc")
+ assert result["group"]["name"] == "SRC"
+
+
+def test_sinan_formatter_leibr22(mock_client):
+ db = SINAN(client=mock_client)
+ result = db.formatter("LEIBR22.dbc")
+ assert result["group"]["name"] == "LEIV"
+
+
+def test_sinan_formatter_lerbr19(mock_client):
+ db = SINAN(client=mock_client)
+ result = db.formatter("LERBR19.dbc")
+ assert result["group"]["name"] == "LERD"
+
+
+def test_sinan_formatter_exception(mock_client):
+ db = SINAN(client=mock_client)
+ result = db.formatter("A")
+ assert result == {"group": None, "year": None}
diff --git a/pysus/tests/api/ftp/test_models.py b/pysus/tests/api/ftp/test_models.py
index 136577f0..5014e63c 100644
--- a/pysus/tests/api/ftp/test_models.py
+++ b/pysus/tests/api/ftp/test_models.py
@@ -1,6 +1,6 @@
from datetime import datetime
from pathlib import Path
-from unittest.mock import AsyncMock, MagicMock
+from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from pysus.api.ftp.client import FTP
@@ -52,6 +52,81 @@ async def test_file_properties(mock_dataset):
assert isinstance(file.modify, datetime)
+def test_file_init_path_from_info(mock_dataset):
+ info = {"path": "/root/test.dbc", "name": "test.dbc", "size": 1000}
+ file = File(
+ _info=info,
+ type="file",
+ dataset=mock_dataset,
+ )
+ assert file.path == Path("/root/test.dbc")
+
+
+def test_file_repr(mock_dataset):
+ file = File(
+ path="/root/test.dbc",
+ _info={"path": "/root/test.dbc", "name": "test.dbc"},
+ type="file",
+ dataset=mock_dataset,
+ )
+ assert repr(file) == "test.dbc"
+
+
+def test_file_month(mock_dataset):
+ info = {"path": "/root/test.dbc", "name": "test.dbc", "month": 6}
+ file = File(
+ path="/root/test.dbc",
+ _info=info,
+ type="file",
+ dataset=mock_dataset,
+ )
+ assert file.month == 6
+
+
+def test_file_modify_raises_value_error(mock_dataset):
+ info = {"path": "/root/test.dbc", "name": "test.dbc"}
+ file = File(
+ path="/root/test.dbc",
+ _info=info,
+ type="file",
+ dataset=mock_dataset,
+ )
+ with pytest.raises(ValueError, match="modify"):
+ _ = file.modify
+
+
+@pytest.mark.asyncio
+async def test_file_download_no_output(mock_client, mock_dataset, tmp_path):
+ file = File(
+ path="/root/test.dbc",
+ _info={"path": "/root/test.dbc", "name": "test.dbc"},
+ type="file",
+ dataset=mock_dataset,
+ )
+ cache_dir = tmp_path / "cache"
+ cache_dir.mkdir(parents=True, exist_ok=True)
+ with patch("pysus.api.ftp.models.CACHEPATH", cache_dir):
+ await file._download()
+ mock_client._download_file.assert_called_once()
+ args, _ = mock_client._download_file.call_args
+ assert args[1] == cache_dir / "test.dbc"
+
+
+@pytest.mark.asyncio
+async def test_file_download_calls_client(mock_client, mock_dataset, tmp_path):
+ file = File(
+ path="/root/test.dbc",
+ _info={"path": "/root/test.dbc", "name": "test.dbc"},
+ type="file",
+ dataset=mock_dataset,
+ )
+
+ dest = Path(tmp_path / "test.dbc")
+ await file._download(output=dest)
+
+ mock_client._download_file.assert_called_once_with(file, dest, None)
+
+
@pytest.mark.asyncio
async def test_directory_load(mock_client, mock_dataset):
mock_client._list_directory.return_value = [
@@ -75,6 +150,23 @@ async def test_directory_load(mock_client, mock_dataset):
assert Path(content[1].path) == Path("/root/file.dbc")
+@pytest.mark.asyncio
+async def test_directory_load_no_ftp_client():
+ dr = Directory(path="/root/test", client=MagicMock())
+ with pytest.raises(ValueError, match="no ftp client found"):
+ await dr.load()
+
+
+def test_directory_str():
+ dr = Directory(path="/root/test")
+ assert str(dr).replace("\\", "/") == "/root/test"
+
+
+def test_directory_repr():
+ dr = Directory(path="/root/test")
+ assert repr(dr).replace("\\", "/") == ""
+
+
@pytest.mark.asyncio
async def test_group_instantiation(mock_dataset):
group = Group(
@@ -90,6 +182,50 @@ async def test_group_instantiation(mock_dataset):
assert group.path == "/root/DC"
+def test_group_description(mock_dataset):
+ group = Group(
+ name="TEST",
+ path="/root/TEST",
+ dataset=mock_dataset,
+ long_name="Test Group",
+ description="A test group description",
+ )
+ assert group.description == "A test group description"
+
+
+@pytest.mark.asyncio
+async def test_group_content(mock_client, mock_dataset):
+ group = Group(
+ name="TEST",
+ path="/root/TEST",
+ dataset=mock_dataset,
+ long_name="Test Group",
+ description="Test",
+ )
+ group._dir._content = [MagicMock(spec=Directory), MagicMock(spec=File)]
+ group._dir.loaded = True
+ content = await group.content
+ assert len(content) == 2
+
+
+@pytest.mark.asyncio
+async def test_group_fetch_files(mock_client, mock_dataset):
+ group = Group(
+ name="TEST",
+ path="/root/TEST",
+ dataset=mock_dataset,
+ long_name="Test Group",
+ description="Test",
+ )
+ dir1 = MagicMock(spec=Directory)
+ file1 = MagicMock(spec=File)
+ group._dir._content = [dir1, file1]
+ group._dir.loaded = True
+ files = await group._fetch_files()
+ assert len(files) == 1
+ assert files[0] is file1
+
+
@pytest.mark.asyncio
async def test_dataset_fetch_content(mock_client):
class TestDB(Dataset):
@@ -128,15 +264,74 @@ def formatter(self, f):
@pytest.mark.asyncio
-async def test_file_download_calls_client(mock_client, mock_dataset, tmp_path):
- file = File(
- path="/root/test.dbc",
- _info={"path": "/root/test.dbc", "name": "test.dbc"},
- type="file",
- dataset=mock_dataset,
- )
+async def test_dataset_fetch_content_skips_non_file_non_dir(mock_client):
+ class TestDB(Dataset):
+ @property
+ def name(self):
+ return "TEST"
- dest = Path(tmp_path / "test.dbc")
- await file._download(output=dest)
+ @property
+ def long_name(self):
+ return "Test DB"
- mock_client._download_file.assert_called_once_with(file, dest, None)
+ @property
+ def description(self):
+ return "Testing"
+
+ def formatter(self, f):
+ return {}
+
+ db = TestDB(client=mock_client)
+ root = Directory(path="/root", client=mock_client, dataset=db)
+ db.paths = [root]
+ root._content = [MagicMock(spec=object)]
+ root.loaded = True
+
+ result = await db._fetch_content()
+ assert len(result) == 0
+
+
+@pytest.mark.asyncio
+async def test_dataset_fetch_content_raises_runtime_error(mock_client):
+ class TestDB(Dataset):
+ @property
+ def name(self):
+ return "TEST"
+
+ @property
+ def long_name(self):
+ return "Test DB"
+
+ @property
+ def description(self):
+ return "Testing"
+
+ def formatter(self, f):
+ return {}
+
+ db = TestDB(client=mock_client)
+ fake_dir = MagicMock()
+ db.paths = [fake_dir]
+ with pytest.raises(RuntimeError, match="not instantiated"):
+ await db._fetch_content()
+
+
+def test_dataset_repr(mock_client):
+ class TestDB(Dataset):
+ @property
+ def name(self):
+ return "TEST"
+
+ @property
+ def long_name(self):
+ return "Test DB"
+
+ @property
+ def description(self):
+ return "Testing"
+
+ def formatter(self, f):
+ return {}
+
+ db = TestDB(client=mock_client)
+ assert repr(db) == "TEST"
diff --git a/pysus/tests/api/test_client.py b/pysus/tests/api/test_client.py
index 0de234bc..32c48cc3 100644
--- a/pysus/tests/api/test_client.py
+++ b/pysus/tests/api/test_client.py
@@ -75,6 +75,23 @@ def test_download_status_values(self):
assert DownloadStatus.MISSING.value == "missing"
+class TestGetLocalFile:
+ @pytest.mark.asyncio
+ async def test_get_local_file_returns_none_when_no_records(
+ self, test_db_path
+ ):
+ client = PySUS(db_path=test_db_path)
+
+ mock_remote_file = MagicMock()
+ mock_remote_file.client.name = "FTP"
+ mock_remote_file.path = "/remote/nonexistent.dbc"
+
+ result = await client.get_local_file(mock_remote_file)
+ assert result is None
+
+ await client.__aexit__(None, None, None)
+
+
class TestLocalFileState:
@pytest.mark.asyncio
async def test_update_state_creates_record(self, test_db_path, tmp_path):
@@ -149,7 +166,8 @@ async def test_get_local_file_finds_existing(self, test_db_path, tmp_path):
mock_remote_file.path = "/remote/test.dbc"
with patch(
- "pysus.api.extensions.ExtensionFactory.instantiate"
+ "pysus.api.extensions.ExtensionFactory.instantiate",
+ new_callable=AsyncMock,
) as mock_factory:
mock_factory.return_value = MagicMock()
await client.get_local_file(mock_remote_file)
@@ -186,10 +204,113 @@ async def test_get_completed_remote_paths(self, test_db_path, tmp_path):
await client.__aexit__(None, None, None)
+class TestGetLocalHierarchy:
+ @pytest.mark.asyncio
+ async def test_get_local_hierarchy_all_branches(
+ self, test_db_path, tmp_path
+ ):
+ client = PySUS(db_path=test_db_path)
+
+ file1 = (
+ tmp_path / "downloads" / "ftp" / "sinasc" / "DC" / "DNAC2024.dbc"
+ )
+ file1.parent.mkdir(parents=True, exist_ok=True)
+ file1.write_text("dummy")
+
+ file2 = tmp_path / "downloads" / "ftp" / "sinasc" / "DNAC2024.dbc"
+ file2.parent.mkdir(parents=True, exist_ok=True)
+ file2.write_text("dummy")
+
+ file3 = tmp_path / "short" / "path.dbc"
+ file3.parent.mkdir(parents=True, exist_ok=True)
+ file3.write_text("dummy")
+
+ dir_path = tmp_path / "downloads" / "ftp" / "sinasc" / "DC"
+ dir_path.mkdir(parents=True, exist_ok=True)
+
+ with client.Session() as session:
+ r1 = LocalFileState(
+ path=str(file1),
+ remote_path="/remote/file1.dbc",
+ client_name="ftp",
+ status=DownloadStatus.COMPLETED,
+ group="DC",
+ )
+ session.add(r1)
+
+ r2 = LocalFileState(
+ path=str(file2),
+ remote_path="/remote/file2.dbc",
+ client_name="ftp",
+ status=DownloadStatus.COMPLETED,
+ group=None,
+ )
+ session.add(r2)
+
+ r3 = LocalFileState(
+ path=str(file3),
+ remote_path="/remote/file3.dbc",
+ client_name="ftp",
+ status=DownloadStatus.PENDING,
+ group="X",
+ )
+ session.add(r3)
+
+ r4 = LocalFileState(
+ path=str(dir_path),
+ remote_path="/remote/dir.dbc",
+ client_name="ftp",
+ status=DownloadStatus.COMPLETED,
+ group="DC",
+ )
+ session.add(r4)
+
+ session.commit()
+
+ hierarchy = client.get_local_hierarchy()
+
+ assert "FTP" in hierarchy
+ ftp_dict = hierarchy["FTP"]
+
+ assert "DC" in ftp_dict
+ ds_dc = ftp_dict["DC"]
+ assert "DC" in ds_dc
+ assert len(ds_dc["DC"]) == 1
+ assert ds_dc["DC"][0]["name"] == "DNAC2024.dbc"
+ assert ds_dc["DC"][0]["status"] == DownloadStatus.COMPLETED
+
+ assert "ftp" in ftp_dict
+ ds_ftp = ftp_dict["ftp"]
+ assert "" in ds_ftp
+ assert len(ds_ftp[""]) == 1
+ assert ds_ftp[""][0]["name"] == "DNAC2024.dbc"
+
+ assert "sinasc" in ftp_dict
+ ds_sinasc = ftp_dict["sinasc"]
+ assert "DC" in ds_sinasc
+ assert ds_sinasc["DC"][0]["name"] == "DC"
+
+ dc_dict = ftp_dict.get("short")
+ assert dc_dict is not None
+ assert "X" in dc_dict
+ assert dc_dict["X"][0]["status"] == DownloadStatus.PENDING
+
+ await client.__aexit__(None, None, None)
+
+
class TestPySUSQuery:
+ @pytest.fixture
+ def mock_dataset(self):
+ ds = MagicMock()
+ ds.name = "sinan"
+ ds.query = AsyncMock(return_value=[])
+ return ds
+
@pytest.mark.asyncio
- async def test_query_with_dataset(self, test_db_path, tmp_path):
- from unittest.mock import AsyncMock, MagicMock
+ async def test_query_with_dataset(
+ self, test_db_path, tmp_path, mock_dataset
+ ):
+ from unittest.mock import MagicMock
from pysus.api.ducklake.client import DuckLake
@@ -198,16 +319,16 @@ async def test_query_with_dataset(self, test_db_path, tmp_path):
mock_ducklake = MagicMock(spec=DuckLake)
mock_file = MagicMock()
mock_file.path = tmp_path / "test.parquet"
- mock_ducklake.query = AsyncMock(return_value=[mock_file])
+ mock_dataset.query = AsyncMock(return_value=[mock_file])
+ mock_ducklake.datasets = AsyncMock(return_value=[mock_dataset])
client._ducklake = mock_ducklake
client._attach_client_catalog = MagicMock()
result = await client.query(dataset="sinan")
- mock_ducklake.query.assert_called_once_with(
- client=None,
- dataset="sinan",
+ mock_ducklake.datasets.assert_called_once()
+ mock_dataset.query.assert_called_once_with(
group=None,
state=None,
year=None,
@@ -217,24 +338,22 @@ async def test_query_with_dataset(self, test_db_path, tmp_path):
await client.__aexit__(None, None, None)
@pytest.mark.asyncio
- async def test_query_with_group(self, test_db_path):
- from unittest.mock import AsyncMock, MagicMock
+ async def test_query_with_group(self, test_db_path, mock_dataset):
+ from unittest.mock import MagicMock
from pysus.api.ducklake.client import DuckLake
client = PySUS(db_path=test_db_path)
mock_ducklake = MagicMock(spec=DuckLake)
- mock_ducklake.query = AsyncMock(return_value=[])
+ mock_ducklake.datasets = AsyncMock(return_value=[mock_dataset])
client._ducklake = mock_ducklake
client._attach_client_catalog = MagicMock()
await client.query(dataset="sinan", group="DENGUE")
- mock_ducklake.query.assert_called_once_with(
- client=None,
- dataset="sinan",
+ mock_dataset.query.assert_called_once_with(
group="DENGUE",
state=None,
year=None,
@@ -251,7 +370,10 @@ async def test_query_with_all_params(self, test_db_path):
client = PySUS(db_path=test_db_path)
mock_ducklake = MagicMock(spec=DuckLake)
- mock_ducklake.query = AsyncMock(return_value=[])
+ ds = MagicMock()
+ ds.name = "sinasc"
+ ds.query = AsyncMock(return_value=[])
+ mock_ducklake.datasets = AsyncMock(return_value=[ds])
client._ducklake = mock_ducklake
client._attach_client_catalog = MagicMock()
@@ -264,9 +386,7 @@ async def test_query_with_all_params(self, test_db_path):
month=1,
)
- mock_ducklake.query.assert_called_once_with(
- client=None,
- dataset="sinasc",
+ ds.query.assert_called_once_with(
group="DC",
state="SP",
year=2024,
@@ -275,7 +395,7 @@ async def test_query_with_all_params(self, test_db_path):
await client.__aexit__(None, None, None)
@pytest.mark.asyncio
- async def test_query_initializes_ducklake(self, test_db_path):
+ async def test_query_initializes_ducklake(self, test_db_path, mock_dataset):
from unittest.mock import AsyncMock, MagicMock, patch
import duckdb
@@ -285,8 +405,8 @@ async def test_query_initializes_ducklake(self, test_db_path):
assert client._ducklake is None
mock_ducklake_instance = MagicMock(spec=DuckLake)
- mock_ducklake_instance.query = AsyncMock(return_value=[])
- tmp_catalog_path = test_db_path.parent / "catalog.db"
+ mock_ducklake_instance.datasets = AsyncMock(return_value=[mock_dataset])
+ tmp_catalog_path = test_db_path.parent / "catalog.duckdb"
mock_ducklake_instance.catalog_path = tmp_catalog_path
# Create the catalog database
@@ -301,6 +421,112 @@ async def test_query_initializes_ducklake(self, test_db_path):
assert client._ducklake is not None
await client.__aexit__(None, None, None)
+ @pytest.mark.asyncio
+ async def test_query_raises_connection_error_when_ducklake_stays_none(
+ self, test_db_path
+ ):
+ client = PySUS(db_path=test_db_path)
+ client._ducklake = None
+
+ with patch.object(
+ client, "get_ducklake", new=AsyncMock(return_value=None)
+ ):
+ with pytest.raises(
+ ConnectionError, match="Could not connect to PySUS s3 bucket"
+ ):
+ await client.query(dataset="sinan")
+
+ await client.__aexit__(None, None, None)
+
+ @pytest.mark.asyncio
+ async def test_query_dataset_not_found_returns_empty(self, test_db_path):
+ from unittest.mock import AsyncMock, MagicMock
+
+ from pysus.api.ducklake.client import DuckLake
+
+ client = PySUS(db_path=test_db_path)
+
+ mock_ducklake = MagicMock(spec=DuckLake)
+ ds = MagicMock()
+ ds.name = "sinasc"
+ mock_ducklake.datasets = AsyncMock(return_value=[ds])
+
+ client._ducklake = mock_ducklake
+ client._attach_client_catalog = MagicMock()
+
+ result = await client.query(dataset="sinan")
+ assert result == []
+
+ await client.__aexit__(None, None, None)
+
+ @pytest.mark.asyncio
+ async def test_query_no_dataset_iterates_all(self, test_db_path):
+ from unittest.mock import AsyncMock, MagicMock
+
+ from pysus.api.ducklake.client import DuckLake
+
+ client = PySUS(db_path=test_db_path)
+
+ mock_ducklake = MagicMock(spec=DuckLake)
+ ds1 = MagicMock()
+ ds1.name = "sinan"
+ ds1.query = AsyncMock(return_value=["file1"])
+ ds2 = MagicMock()
+ ds2.name = "sinasc"
+ ds2.query = AsyncMock(return_value=["file2", "file3"])
+ mock_ducklake.datasets = AsyncMock(return_value=[ds1, ds2])
+
+ client._ducklake = mock_ducklake
+ client._attach_client_catalog = MagicMock()
+
+ result = await client.query()
+
+ ds1.query.assert_awaited_once_with(
+ group=None,
+ state=None,
+ year=None,
+ month=None,
+ )
+ ds2.query.assert_awaited_once_with(
+ group=None,
+ state=None,
+ year=None,
+ month=None,
+ )
+ assert result == ["file1", "file2", "file3"]
+
+ await client.__aexit__(None, None, None)
+
+ @pytest.mark.asyncio
+ async def test_query_with_client_filter(self, test_db_path):
+ from unittest.mock import AsyncMock, MagicMock
+
+ from pysus.api.ducklake.client import DuckLake
+ from pysus.api.types import FTP
+
+ client = PySUS(db_path=test_db_path)
+
+ mock_ducklake = MagicMock(spec=DuckLake)
+ ds = MagicMock()
+ ds.name = "sinan"
+
+ mock_file1 = MagicMock()
+ mock_file1.record.path = "public/data/ftp/somefile"
+ mock_file2 = MagicMock()
+ mock_file2.record.path = "public/data/dadosgov/otherfile"
+
+ ds.query = AsyncMock(return_value=[mock_file1, mock_file2])
+ mock_ducklake.datasets = AsyncMock(return_value=[ds])
+
+ client._ducklake = mock_ducklake
+ client._attach_client_catalog = MagicMock()
+
+ result = await client.query(dataset="sinan", client=FTP)
+
+ assert result == [mock_file1]
+
+ await client.__aexit__(None, None, None)
+
class TestDownload:
@pytest.mark.asyncio
@@ -365,7 +591,10 @@ async def test_download_re_fetches_when_size_differs(self, test_db_path):
get_ftp_patch,
):
with patch.object(
- ExtensionFactory, "instantiate", return_value=mock_local
+ ExtensionFactory,
+ "instantiate",
+ new_callable=AsyncMock,
+ return_value=mock_local,
):
mock_client = AsyncMock()
mock_client._download_file = AsyncMock()
@@ -406,7 +635,10 @@ async def _slow_download(*args, **kwargs):
),
patch.object(client, "_update_state", new=AsyncMock()),
patch.object(
- ExtensionFactory, "instantiate", return_value=mock_local
+ ExtensionFactory,
+ "instantiate",
+ new_callable=AsyncMock,
+ return_value=mock_local,
),
):
mock_client = AsyncMock()
@@ -418,6 +650,210 @@ async def _slow_download(*args, **kwargs):
):
await client.download(mock_file, timeout=0.001)
+ @pytest.mark.asyncio
+ async def test_download_with_ducklake_client(self, test_db_path):
+ from unittest.mock import AsyncMock, MagicMock, patch
+
+ from pysus.api.extensions import ExtensionFactory
+
+ client = PySUS(db_path=test_db_path)
+
+ mock_local = MagicMock()
+ mock_local.path.exists.return_value = False
+
+ mock_file = MagicMock()
+ mock_file.client.name = "ducklake"
+ mock_file.size = 1000
+ mock_file.path = test_db_path.parent / "remote.ducklake"
+ mock_file.basename = "remote.ducklake"
+ mock_file.year = None
+ mock_file.month = None
+ mock_file.state = None
+ mock_group = MagicMock()
+ mock_group.name = None
+ mock_file.group = MagicMock()
+
+ with (
+ patch.object(
+ client, "get_local_file", new=AsyncMock(return_value=mock_local)
+ ),
+ patch.object(
+ client,
+ "_get_dest_path",
+ return_value=test_db_path.parent / "test.ducklake",
+ ),
+ patch.object(client, "_update_state", new=AsyncMock()),
+ patch.object(
+ ExtensionFactory,
+ "instantiate",
+ new_callable=AsyncMock,
+ return_value=mock_local,
+ ),
+ ):
+ mock_ducklake = AsyncMock()
+ mock_ducklake._download_file = AsyncMock()
+ client._ducklake = mock_ducklake
+
+ result = await client.download(mock_file)
+
+ assert result is not None
+
+ await client.__aexit__(None, None, None)
+
+ @pytest.mark.asyncio
+ async def test_download_with_dadosgov_client(self, test_db_path):
+ from unittest.mock import AsyncMock, MagicMock, patch
+
+ from pysus.api.extensions import ExtensionFactory
+
+ client = PySUS(db_path=test_db_path)
+
+ mock_local = MagicMock()
+ mock_local.path.exists.return_value = False
+
+ mock_file = MagicMock()
+ mock_file.client.name = "dadosgov"
+ mock_file.size = 1000
+ mock_file.path = test_db_path.parent / "remote.dadosgov"
+ mock_file.basename = "remote.dadosgov"
+ mock_file.year = None
+ mock_file.month = None
+ mock_file.state = None
+ mock_file.group = MagicMock()
+ mock_file.group.name = None
+
+ with (
+ patch.object(
+ client, "get_local_file", new=AsyncMock(return_value=mock_local)
+ ),
+ patch.object(
+ client,
+ "_get_dest_path",
+ return_value=test_db_path.parent / "test.dadosgov",
+ ),
+ patch.object(client, "_update_state", new=AsyncMock()),
+ patch.object(
+ ExtensionFactory,
+ "instantiate",
+ new_callable=AsyncMock,
+ return_value=mock_local,
+ ),
+ ):
+ mock_dadosgov = AsyncMock()
+ mock_dadosgov._download_file = AsyncMock()
+ client._dadosgov = mock_dadosgov
+
+ result = await client.download(mock_file, token="test_token")
+
+ assert result is not None
+
+ await client.__aexit__(None, None, None)
+
+ @pytest.mark.asyncio
+ async def test_download_with_unknown_client_raises_valueerror(
+ self, test_db_path
+ ):
+ from unittest.mock import AsyncMock, MagicMock, patch
+
+ client = PySUS(db_path=test_db_path)
+
+ mock_local = MagicMock()
+ mock_local.path.exists.return_value = False
+
+ mock_file = MagicMock()
+ mock_file.client.name = "unknown"
+ mock_file.size = 1000
+ mock_file.basename = "test.unknown"
+ mock_file.path = test_db_path.parent / "test.unknown"
+
+ with (
+ patch.object(
+ client, "get_local_file", new=AsyncMock(return_value=mock_local)
+ ),
+ patch.object(
+ client,
+ "_get_dest_path",
+ return_value=test_db_path.parent / "test.unknown",
+ ),
+ patch.object(client, "_update_state", new=AsyncMock()),
+ ):
+ with pytest.raises(
+ RuntimeError,
+ match=(
+ "Unexpected error downloading test.unknown:"
+ " No download logic for client: unknown"
+ ),
+ ):
+ await client.download(mock_file)
+
+ await client.__aexit__(None, None, None)
+
+
+class TestDownloadToParquet:
+ @pytest.mark.asyncio
+ async def test_download_to_parquet_success(self, test_db_path, tmp_path):
+ from unittest.mock import AsyncMock, MagicMock, patch
+
+ client = PySUS(db_path=test_db_path)
+
+ original_path = tmp_path / "test.dbc"
+ original_path.write_text("dummy content")
+
+ parquet_path = tmp_path / "test.parquet"
+
+ mock_parquet_file = MagicMock()
+ mock_parquet_file.path = parquet_path
+
+ mock_local_file = MagicMock()
+ mock_local_file.path = original_path
+ mock_local_file.to_parquet = AsyncMock(return_value=mock_parquet_file)
+
+ mock_file = MagicMock()
+ mock_file.path = "/remote/test.dbc"
+ mock_file.client.name = "ftp"
+ mock_file.year = 2024
+ mock_file.month = 1
+ mock_file.state = "SP"
+ mock_file.group = MagicMock()
+ mock_file.group.name = "DC"
+
+ with (
+ patch.object(
+ client, "download", new=AsyncMock(return_value=mock_local_file)
+ ),
+ patch.object(client, "_update_state", new=AsyncMock()),
+ patch.object(client, "_delete_record", new=AsyncMock()),
+ ):
+ result = await client.download_to_parquet(mock_file)
+
+ assert result == mock_parquet_file
+ assert result.add_dv is True
+ mock_local_file.to_parquet.assert_awaited_once()
+
+ await client.__aexit__(None, None, None)
+
+ @pytest.mark.asyncio
+ async def test_download_to_parquet_not_tabular_raises(self, test_db_path):
+ from unittest.mock import AsyncMock, MagicMock, patch
+
+ client = PySUS(db_path=test_db_path)
+
+ mock_local_file = MagicMock(spec=[])
+
+ mock_file = MagicMock()
+ mock_file.path = "/remote/test.dbc"
+ mock_file.client.name = "ftp"
+
+ with patch.object(
+ client, "download", new=AsyncMock(return_value=mock_local_file)
+ ):
+ with pytest.raises(
+ NotImplementedError, match="can't be converted to Parquet"
+ ):
+ await client.download_to_parquet(mock_file)
+
+ await client.__aexit__(None, None, None)
+
class TestReadParquet:
def test_read_parquet_single_path(self, tmp_path):
@@ -474,6 +910,38 @@ def test_read_parquet_intersection_mode(self, tmp_path):
assert len(df) == 2
assert list(df.columns) == ["a"]
+ def test_read_parquet_intersection_no_common_columns(self, tmp_path):
+ import duckdb
+ import pandas as pd
+
+ parquet1 = tmp_path / "test1.parquet"
+ parquet2 = tmp_path / "test2.parquet"
+
+ pd.DataFrame({"a": [1], "b": [2]}).to_parquet(parquet1)
+ pd.DataFrame({"c": [3], "d": [4]}).to_parquet(parquet2)
+
+ from pysus.api.client import PySUS
+
+ client = PySUS(db_path=tmp_path / "config.db")
+
+ original_execute = duckdb.execute
+
+ def side_effect(sql, *args, **kwargs):
+ if sql == "SELECT * WHERE 1=0":
+ result = MagicMock()
+ result.description = []
+ result.df.return_value = pd.DataFrame()
+ result.fetchall.return_value = []
+ return result
+ return original_execute(sql, *args, **kwargs)
+
+ with patch.object(duckdb, "execute", side_effect=side_effect):
+ result = client.read_parquet(
+ [parquet1, parquet2], mode="intersection"
+ )
+ df = result.df()
+ assert len(df) == 0
+
def test_read_parquet_strict_mode_matching_schemas(self, tmp_path):
import pandas as pd
@@ -524,6 +992,22 @@ def test_read_parquet_with_sql(self, tmp_path):
assert len(df) == 2
assert list(df.columns) == ["a"]
+ def test_read_parquet_sql_not_select(self, tmp_path):
+ import pandas as pd
+
+ parquet_file = tmp_path / "test.parquet"
+ pd.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}).to_parquet(
+ parquet_file
+ )
+
+ from pysus.api.client import PySUS
+
+ client = PySUS(db_path=tmp_path / "config.db")
+ result = client.read_parquet([parquet_file], sql="a + b AS c")
+ df = result.df()
+
+ assert list(df.columns) == ["c"]
+
def test_read_parquet_no_paths_raises(self, tmp_path):
from pysus.api.client import PySUS
@@ -581,6 +1065,27 @@ def test_read_parquet_add_dv_false_returns_raw(self, tmp_path):
out = result.df()
assert out["ID_MUNICIP"].iloc[0] == "261160"
+ def test_read_parquet_add_dv_create_function_exception(self, tmp_path):
+ import duckdb
+ import pandas as pd
+
+ parquet_file = tmp_path / "test.parquet"
+ df = pd.DataFrame({"ID_MUNICIP": ["261160"], "value": [1]})
+ df.to_parquet(parquet_file)
+
+ from pysus.api.client import PySUS
+
+ client = PySUS(db_path=tmp_path / "config.db")
+
+ with patch.object(
+ duckdb,
+ "create_function",
+ side_effect=duckdb.NotImplementedException(),
+ ):
+ result = client.read_parquet([parquet_file], add_dv=True)
+ out = result.df()
+ assert out["ID_MUNICIP"].iloc[0] == "2611606"
+
class TestPySUSGetMethods:
@pytest.mark.asyncio
@@ -625,13 +1130,13 @@ async def test_aenter(self, test_db_path):
with (
patch.object(
- DuckLake, "_load_catalog", new_callable=AsyncMock
- ) as mock_load,
+ DuckLake, "_download_catalog", new_callable=AsyncMock
+ ) as mock_download,
patch.object(PySUS, "_attach_client_catalog") as mock_attach,
):
await client.__aenter__()
assert client._ducklake is not None
- mock_load.assert_called_once()
+ mock_download.assert_called_once()
mock_attach.assert_called_once()
await client.__aexit__(None, None, None)
diff --git a/pysus/tests/api/test_databases.py b/pysus/tests/api/test_databases.py
index 7bd37c4d..8a398038 100644
--- a/pysus/tests/api/test_databases.py
+++ b/pysus/tests/api/test_databases.py
@@ -1,5 +1,9 @@
+import asyncio
+import sys
from unittest.mock import AsyncMock, MagicMock, patch
+import pytest
+
class TestSinan:
def test_sinan_calls_fetch_data(self):
@@ -301,6 +305,95 @@ def test_fetch_data_no_files(self):
assert len(result) == 0
mock_pysus.download.assert_not_called()
+ def test_fetch_data_with_progress(self):
+ with (
+ patch("pysus.api._impl.databases.PySUS") as mock_pysus_class,
+ patch("pysus.api._impl.databases.tqdm", new=lambda x, **kw: x),
+ ):
+ mock_pysus = MagicMock()
+ enter_mock = AsyncMock(return_value=mock_pysus)
+ exit_mock = AsyncMock()
+ mock_pysus_class.return_value.__aenter__ = enter_mock
+ mock_pysus_class.return_value.__aexit__ = exit_mock
+
+ mock_file = MagicMock()
+ mock_file.path = "/tmp/test.parquet"
+ mock_pysus.query = AsyncMock(return_value=[mock_file, mock_file])
+ mock_pysus.download = AsyncMock(return_value=mock_file)
+ mock_pysus.read_parquet.return_value.df.return_value = MagicMock()
+
+ from pysus.api._impl.databases import _fetch_data
+
+ _fetch_data(dataset="sinan", year=2024, show_progress=True)
+
+ assert mock_pysus.download.call_count == 2
+
+
+class TestFetchDataRunningLoop:
+ def test_fetch_data_running_loop_no_nest_asyncio_raises(self):
+ saved = sys.modules.pop("nest_asyncio", None)
+ import builtins
+
+ real_import = builtins.__import__
+
+ def raising_import(name, *args, **kwargs):
+ if name == "nest_asyncio":
+ raise ImportError(f"No module named {name}")
+ return real_import(name, *args, **kwargs)
+
+ try:
+
+ async def _inner():
+ from pysus.api._impl.databases import _fetch_data
+
+ with patch("builtins.__import__", side_effect=raising_import):
+ with pytest.raises(
+ RuntimeError, match="nest_asyncio is required"
+ ):
+ _fetch_data(
+ dataset="sinan",
+ year=2024,
+ show_progress=False,
+ )
+
+ asyncio.run(_inner())
+ finally:
+ if saved is not None:
+ sys.modules["nest_asyncio"] = saved
+
+ def test_fetch_data_running_loop_with_nest_asyncio(self):
+ nest_mock = MagicMock()
+
+ async def _inner():
+ with (
+ patch("pysus.api._impl.databases.PySUS") as mock_pysus_class,
+ patch.dict("sys.modules", {"nest_asyncio": nest_mock}),
+ ):
+ mock_pysus = MagicMock()
+ mock_pysus_class.return_value.__aenter__ = AsyncMock(
+ return_value=mock_pysus
+ )
+ mock_pysus_class.return_value.__aexit__ = AsyncMock()
+ mock_pysus.query = AsyncMock(return_value=[])
+
+ from pysus.api._impl.databases import _fetch_data
+
+ loop = asyncio.get_running_loop()
+ expected = MagicMock()
+
+ with patch.object(
+ loop, "run_until_complete", return_value=expected
+ ):
+ result = _fetch_data(
+ dataset="sinan",
+ year=2024,
+ show_progress=False,
+ )
+ nest_mock.apply.assert_called_once()
+ assert result == expected
+
+ asyncio.run(_inner())
+
class TestListFiles:
def _mock_asyncio_run(self, return_value):
@@ -385,3 +478,91 @@ def test_list_files_empty_result(self):
assert isinstance(result, pd.DataFrame)
assert len(result) == 0
+
+ def test_list_files_with_real_coroutine(self):
+ import pandas as pd
+
+ mock_record = MagicMock()
+ mock_record.path = "/remote/sinan/dengue.parquet"
+ mock_record.dataset.name = "sinan"
+ mock_record.group.name = "DENGUE"
+ mock_record.record.year = 2024
+ mock_record.record.month = 1
+ mock_record.record.state = "SP"
+ mock_record.record.origin_modified = "2024-01-15"
+
+ with patch("pysus.api._impl.databases.PySUS") as mock_pysus_class:
+ mock_pysus = MagicMock()
+ mock_pysus_class.return_value.__aenter__ = AsyncMock(
+ return_value=mock_pysus
+ )
+ mock_pysus_class.return_value.__aexit__ = AsyncMock()
+ mock_pysus.query = AsyncMock(return_value=[mock_record])
+
+ from pysus.api._impl.databases import list_files
+
+ result = list_files(dataset="SINAN", year=2024, month=1)
+
+ assert isinstance(result, pd.DataFrame)
+ assert len(result) == 1
+ assert result.iloc[0]["name"] == "dengue.parquet"
+ assert result.iloc[0]["path"] == "/remote/sinan/dengue.parquet"
+ assert result.iloc[0]["dataset"] == "sinan"
+ assert result.iloc[0]["group"] == "DENGUE"
+ assert result.iloc[0]["year"] == 2024
+ assert result.iloc[0]["month"] == 1
+ assert result.iloc[0]["state"] == "SP"
+ assert result.iloc[0]["modify"] == "2024-01-15"
+
+ def test_list_files_with_none_fields(self):
+ mock_record = MagicMock()
+ mock_record.path = "/remote/sinan/dengue.parquet"
+ mock_record.dataset = None
+ mock_record.group = None
+ mock_record.record.year = 2024
+ mock_record.record.month = 1
+ mock_record.record.state = "SP"
+ mock_record.record.origin_modified = "2024-01-15"
+
+ with patch("pysus.api._impl.databases.PySUS") as mock_pysus_class:
+ mock_pysus = MagicMock()
+ mock_pysus_class.return_value.__aenter__ = AsyncMock(
+ return_value=mock_pysus
+ )
+ mock_pysus_class.return_value.__aexit__ = AsyncMock()
+ mock_pysus.query = AsyncMock(return_value=[mock_record])
+
+ from pysus.api._impl.databases import list_files
+
+ result = list_files(dataset="SINAN")
+
+ assert result.iloc[0]["dataset"] is None
+ assert result.iloc[0]["group"] is None
+
+ def test_list_files_with_multiple_records(self):
+ records = []
+ for i in range(3):
+ r = MagicMock()
+ r.path = f"/remote/sinan/file{i}.parquet"
+ r.dataset.name = "sinan"
+ r.group.name = "DENGUE"
+ r.record.year = 2024
+ r.record.month = i + 1
+ r.record.state = "SP"
+ r.record.origin_modified = "2024-01-15"
+ records.append(r)
+
+ with patch("pysus.api._impl.databases.PySUS") as mock_pysus_class:
+ mock_pysus = MagicMock()
+ mock_pysus_class.return_value.__aenter__ = AsyncMock(
+ return_value=mock_pysus
+ )
+ mock_pysus_class.return_value.__aexit__ = AsyncMock()
+ mock_pysus.query = AsyncMock(side_effect=[records[:2], records[2:]])
+
+ from pysus.api._impl.databases import list_files
+
+ result = list_files(dataset="SINAN", year=[2023, 2024])
+
+ assert len(result) == 3
+ assert mock_pysus.query.call_count == 2
diff --git a/pysus/tests/api/test_extensions.py b/pysus/tests/api/test_extensions.py
index f7fc9559..1e3e8458 100644
--- a/pysus/tests/api/test_extensions.py
+++ b/pysus/tests/api/test_extensions.py
@@ -1,15 +1,19 @@
+import builtins
import gzip
import json
+import struct
import tarfile
import zipfile
from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
import pandas as pd
+import pyarrow as pa
+import pyarrow.parquet as pq
import pytest
from pysus.api.extensions import (
CSV,
DBC,
- DBC_IMPORT,
DBF,
JSON,
PDF,
@@ -20,7 +24,9 @@
Parquet,
Tar,
Zip,
+ _map_dtype,
)
+from pysus.api.models import BaseLocalFile
@pytest.fixture
@@ -35,6 +41,70 @@ async def collect_async(gen):
return out
+def _create_dbf(path, fields, records):
+ """Create a minimal valid DBF file at *path*.
+
+ Parameters
+ ----------
+ fields : list of (name, type, length, decimal)
+ records : list of tuples
+ """
+ from datetime import date
+
+ today = date.today()
+ num_records = len(records)
+ field_desc_len = 32 * len(fields)
+ header_len = 32 + field_desc_len + 1
+ record_len = 1 + sum(f[2] for f in fields)
+
+ buf = bytearray()
+ # Version (0x03 = FoxBASE)
+ buf.append(0x03)
+ # Last update date
+ buf.append(today.year - 1900)
+ buf.append(today.month)
+ buf.append(today.day)
+ # Number of records
+ buf.extend(struct.pack("= 1
+ assert chunks[0]["ID_MUNICIP"].iloc[0] == "2611606"
+ assert str(chunks[0]["DT_NOTIFIC"].iloc[0]) == "2023-01-01"
+
+
+# ---------------------------------------------------------------------------
+# New tests for lines 315, 324-326: parse_dftypes edge cases
+# ---------------------------------------------------------------------------
+
+
+def test_parse_dftypes_edge_cases():
+ df = pd.DataFrame(
+ {
+ "DT_NOTIFIC": [123, "not_a_date", "20230101"],
+ "CODMUNRES": [float("nan"), None, " 330455 "],
+ "IDADE": [None, float("nan"), "25"],
+ }
+ )
+ result = Parquet.parse_dftypes(df)
+
+ assert result["DT_NOTIFIC"].iloc[0] == 123
+ assert result["DT_NOTIFIC"].iloc[1] == "not_a_date"
+ assert str(result["DT_NOTIFIC"].iloc[2]) == "2023-01-01"
+
+ assert pd.isna(result["CODMUNRES"].iloc[0])
+ assert pd.isna(result["CODMUNRES"].iloc[1])
+ assert result["CODMUNRES"].iloc[2] == 330455
+
+ assert pd.isna(result["IDADE"].iloc[0])
+ assert pd.isna(result["IDADE"].iloc[1])
+ assert result["IDADE"].iloc[2] == 25
+
+
+# ---------------------------------------------------------------------------
+# New tests for lines 351-360, 370, 394, 402-403, 413-427: DBF
+# ---------------------------------------------------------------------------
+
+
+def test_dbf_columns(tmp_dir):
+ pytest.importorskip("dbfread")
+ dbf_path = tmp_dir / "test.dbf"
+ _create_dbf(
+ dbf_path,
+ [("NAME", "C", 20, 0), ("AGE", "N", 3, 0), ("SALARY", "F", 10, 2)],
+ [("Alice", 30, 5000.00)],
+ )
+ obj = DBF(path=dbf_path)
+ cols = obj.columns
+ assert len(cols) == 3
+ assert cols[0].dtype == "VARCHAR"
+ assert cols[1].dtype == "INTEGER"
+ assert cols[2].dtype == "FLOAT"
+
+
+def test_dbf_rows(tmp_dir):
+ pytest.importorskip("dbfread")
+ dbf_path = tmp_dir / "test.dbf"
+ _create_dbf(
+ dbf_path,
+ [("NAME", "C", 10, 0)],
+ [("Alice",), ("Bob",), ("Charlie",)],
+ )
+ obj = DBF(path=dbf_path)
+ assert obj.rows == 3
+
+
+def test_dbf_decode_column_non_string():
+ obj = DBF(path=Path("/dummy"))
+ assert obj.decode_column(123) == 123
+ assert obj.decode_column(45.6) == 45.6
+ assert obj.decode_column(None) is None
+
+
+@pytest.mark.asyncio
+async def test_dbf_load(tmp_dir):
+ pytest.importorskip("dbfread")
+ dbf_path = tmp_dir / "test.dbf"
+ _create_dbf(
+ dbf_path,
+ [("NAME", "C", 10, 0), ("AGE", "N", 3, 0)],
+ [("Alice", 30), ("Bob", 25)],
+ )
+ obj = DBF(path=dbf_path)
+ df = await obj.load()
+ assert len(df) == 2
+ assert list(df.columns) == ["NAME", "AGE"]
+ assert df["NAME"].iloc[0] == "Alice"
+
+
+@pytest.mark.asyncio
+async def test_dbf_stream(tmp_dir):
+ pytest.importorskip("dbfread")
+ dbf_path = tmp_dir / "test.dbf"
+ _create_dbf(
+ dbf_path,
+ [("VAL", "N", 5, 0)],
+ [(10,), (20,), (30,), (40,), (50,)],
+ )
+ obj = DBF(path=dbf_path)
+ chunks = await collect_async(obj.stream(chunk_size=2))
+ assert len(chunks) >= 2
+ assert all(isinstance(c, pd.DataFrame) for c in chunks)
+ assert len(chunks[0]) == 2
+
+
+# ---------------------------------------------------------------------------
+# New tests for lines 445-447, 452-490, 493-496: DBF.to_parquet
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_dbf_to_parquet(tmp_dir):
+ pytest.importorskip("dbfread")
+ dbf_path = tmp_dir / "test.dbf"
+ _create_dbf(
+ dbf_path,
+ [("NAME", "C", 10, 0)],
+ [("Alice",), ("Bob",), ("Charlie",)],
+ )
+ obj = DBF(path=dbf_path)
+
+ calls = []
+
+ def cb(current, total):
+ calls.append((current, total))
+
+ result = await obj.to_parquet(chunk_size=2, callback=cb)
+ assert isinstance(result, Parquet)
+ assert result.rows == 3
+ assert len(calls) >= 1
+
+
+@pytest.mark.asyncio
+async def test_dbf_to_parquet_empty(tmp_dir):
+ pytest.importorskip("dbfread")
+ dbf_path = tmp_dir / "test.dbf"
+ _create_dbf(dbf_path, [("NAME", "C", 10, 0)], [])
+ obj = DBF(path=dbf_path)
+ with pytest.raises(TypeError):
+ await obj.to_parquet()
+
+
+@pytest.mark.asyncio
+async def test_dbf_to_parquet_output_exists(tmp_dir):
+ pytest.importorskip("dbfread")
+ dbf_path = tmp_dir / "test.dbf"
+ _create_dbf(dbf_path, [("NAME", "C", 10, 0)], [("Alice",)])
+ obj = DBF(path=dbf_path)
+
+ out = tmp_dir / "existing.parquet"
+ pd.DataFrame({"x": [1]}).to_parquet(out)
+
+ result = await obj.to_parquet(output_path=out)
+ assert isinstance(result, Parquet)
+ assert result.rows == 1
+
+
+@pytest.mark.asyncio
+async def test_dbf_to_parquet_output_not_parquet(tmp_dir):
+ pytest.importorskip("dbfread")
+ dbf_path = tmp_dir / "test.dbf"
+ _create_dbf(dbf_path, [("NAME", "C", 10, 0)], [("Alice",)])
+ obj = DBF(path=dbf_path)
+
+ out = tmp_dir / "out.csv"
+ out.write_text("a,b\n1,2")
+
+ with pytest.raises(RuntimeError, match="Could not parse"):
+ await obj.to_parquet(output_path=out)
+
+
+@pytest.mark.asyncio
+async def test_dbf_to_parquet_non_parquet_extension(tmp_dir):
+ pytest.importorskip("dbfread")
+ dbf_path = tmp_dir / "test.dbf"
+ _create_dbf(dbf_path, [("NAME", "C", 10, 0)], [("Alice",)])
+ obj = DBF(path=dbf_path)
+
+ out = tmp_dir / "out.custom"
+ with pytest.raises(RuntimeError, match="Could not parse"):
+ await obj.to_parquet(output_path=out)
+
+
+# ---------------------------------------------------------------------------
+# New tests for lines 507, 514, 520-521, 528-530, 542, 546-549, 560: DBC
+# ---------------------------------------------------------------------------
+
+
+def test_dbc_columns_raises():
+ obj = DBC(path=Path("test.dbc"))
+ with pytest.raises(NotImplementedError):
+ _ = obj.columns
+
+
+def test_dbc_rows_raises():
+ obj = DBC(path=Path("test.dbc"))
+ with pytest.raises(NotImplementedError):
+ _ = obj.rows
+
+
+@pytest.mark.asyncio
+async def test_dbc_load_raises(tmp_dir):
+ path = tmp_dir / "test.dbc"
+ path.write_bytes(b"dummy")
+ obj = DBC(path=path)
+ with pytest.raises(struct.error):
+ await obj.load()
+
+
+@pytest.mark.asyncio
+async def test_dbc_stream_raises(tmp_dir):
+ path = tmp_dir / "test.dbc"
+ path.write_bytes(b"dummy")
+ obj = DBC(path=path)
+
+ try:
+ with pytest.raises(struct.error):
+ async for _ in obj.stream():
+ pass
+ finally:
+ import gc
+
+ gc.collect()
+
+
+@pytest.mark.asyncio
+async def test_dbc_to_parquet_output_exists_is_parquet(tmp_dir):
+ path = tmp_dir / "test.dbc"
+ path.write_bytes(b"dummy")
+ obj = DBC(path=path)
+
+ out = tmp_dir / "out.parquet"
+ pd.DataFrame({"x": [1]}).to_parquet(out)
+
+ result = await obj.to_parquet(output_path=out)
+ assert isinstance(result, Parquet)
+
+
+@pytest.mark.asyncio
+async def test_dbc_to_parquet_output_exists_not_parquet(tmp_dir):
+ path = tmp_dir / "test.dbc"
+ path.write_bytes(b"dummy")
+ obj = DBC(path=path)
+
+ out = tmp_dir / "out.csv"
+ out.write_text("a,b\n1,2")
+
+ with pytest.raises(RuntimeError, match="Could not parse"):
+ await obj.to_parquet(output_path=out)
+
+
+# ---------------------------------------------------------------------------
+# New tests for lines 580-585, 593: JSON.columns and JSON.rows
+# ---------------------------------------------------------------------------
+
+
+def test_json_columns(tmp_dir):
+ path = tmp_dir / "data.json"
+ path.write_text('[{"a": 1, "b": "x"}]')
+ obj = JSON(path=path)
+ with pytest.raises(ValueError, match="nrows can only be passed"):
+ _ = obj.columns
+
+
+def test_json_columns_empty(tmp_dir):
+ path = tmp_dir / "empty.json"
+ path.write_text("")
+ obj = JSON(path=path)
+ cols = obj.columns
+ assert cols == []
+
+
+def test_json_rows(tmp_dir):
+ path = tmp_dir / "data.json"
+ path.write_text('[{"a": 1}, {"a": 2}, {"a": 3}]')
+ obj = JSON(path=path)
+ assert obj.rows == 3
+
+
+# ---------------------------------------------------------------------------
+# New tests for line 628: PDF.stream without chunk_size
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_pdf_stream_no_chunk_size(tmp_dir):
+ path = tmp_dir / "file.pdf"
+ content = b"%PDF-1.4\n...content..."
+ path.write_bytes(content)
+
+ obj = PDF(path=path)
+ chunks = await collect_async(obj.stream())
+ assert b"".join(chunks) == content
+ assert len(chunks) == 1
+
+
+# ---------------------------------------------------------------------------
+# New tests for line 642: Zip.load
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_zip_load(tmp_dir):
+ zip_path = tmp_dir / "test.zip"
+ with zipfile.ZipFile(zip_path, "w") as z:
+ z.writestr("test.txt", "hello")
+
+ obj = Zip(path=zip_path)
+ result = await obj.load()
+ assert isinstance(result, zipfile.ZipFile)
+
+
+# ---------------------------------------------------------------------------
+# New tests for lines 692-718: Zip.to_parquet
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_zip_to_parquet(tmp_dir):
+ zip_path = tmp_dir / "data.zip"
+ csv_path = tmp_dir / "inner.csv"
+ pd.DataFrame({"x": [1, 2], "y": [3, 4]}).to_csv(csv_path, index=False)
+ with zipfile.ZipFile(zip_path, "w") as z:
+ z.write(csv_path, arcname="inner.csv")
+
+ obj = Zip(path=zip_path)
+ pq_obj = await obj.to_parquet()
+ assert isinstance(pq_obj, Parquet)
+ df = await pq_obj.load()
+ assert len(df) == 2
+
+ parquet_path = tmp_dir / "data.parquet"
+ assert parquet_path.exists()
+ temp_dir = tmp_dir / "data.tmp_extract"
+ assert not temp_dir.exists()
+
+
+# ---------------------------------------------------------------------------
+# New tests for lines 723-740: Zip._safe_cleanup
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_zip_safe_cleanup_nonexistent(tmp_dir):
+ obj = Zip(path=tmp_dir / "dummy.zip")
+ await obj._safe_cleanup(tmp_dir / "nonexistent")
+ # Should not raise
+
+
+@pytest.mark.asyncio
+async def test_zip_safe_cleanup_with_files(tmp_dir):
+ (tmp_dir / "f1.txt").write_text("a")
+ (tmp_dir / "f2.txt").write_text("b")
+ obj = Zip(path=tmp_dir / "dummy.zip")
+ await obj._safe_cleanup(tmp_dir)
+ assert not (tmp_dir / "f1.txt").exists()
+ assert not (tmp_dir / "f2.txt").exists()
+ assert not tmp_dir.exists()
+
+
+@pytest.mark.asyncio
+async def test_zip_safe_cleanup_with_subdir(tmp_dir):
+ sub = tmp_dir / "sub"
+ sub.mkdir()
+ (sub / "nested.txt").write_text("nested")
+ (tmp_dir / "top.txt").write_text("top")
+ obj = Zip(path=tmp_dir / "dummy.zip")
+ await obj._safe_cleanup(tmp_dir)
+ assert not tmp_dir.exists()
+
+
+# ---------------------------------------------------------------------------
+# New tests for line 764: GZip.open_member
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_gzip_open_member(tmp_dir):
+ path = tmp_dir / "data.csv.gz"
+ raw = b"a,b\n1,2"
+ with gzip.open(path, "wb") as f:
+ f.write(raw)
+
+ obj = GZip(path=path)
+ result = await obj.open_member("data.csv")
+ assert result == raw
+
+
+# ---------------------------------------------------------------------------
+# New tests for line 799: Tar.load
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_tar_load(tmp_dir):
+ tar_path = tmp_dir / "file.tar"
+ f = tmp_dir / "a.txt"
+ f.write_text("hello")
+ with tarfile.open(tar_path, "w") as t:
+ t.add(f, arcname="a.txt")
+
+ obj = Tar(path=tar_path)
+ result = await obj.load()
+ assert isinstance(result, tarfile.TarFile)
+
+
+# ---------------------------------------------------------------------------
+# New tests for ExtensionFactory._identify (lines 944, 947-949, 957-958)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_extension_factory_identify_magic_not_available(tmp_dir):
+ orig = ExtensionFactory._magic_available
+ ExtensionFactory._magic_available = False
+ try:
+ path = tmp_dir / "test.csv"
+ path.write_text("a,b\n1,2")
+ result = await ExtensionFactory._identify(path)
+ assert result is None
+ finally:
+ ExtensionFactory._magic_available = orig
+
+
+@pytest.mark.asyncio
+async def test_extension_factory_identify_magic_import_error(
+ monkeypatch, tmp_dir
+):
+ orig_available = ExtensionFactory._magic_available
+ ExtensionFactory._magic_available = True
+ try:
+ path = tmp_dir / "test.csv"
+ path.write_text("a,b\n1,2")
+
+ original_import = builtins.__import__
+
+ def mock_import(name, globals=None, locals=None, fromlist=(), level=0):
+ if name == "magic":
+ raise ImportError("Mock error")
+ return original_import(name, globals, locals, fromlist, level)
+
+ monkeypatch.setattr(builtins, "__import__", mock_import)
+
+ result = await ExtensionFactory._identify(path)
+ assert result is None
+ assert not ExtensionFactory._magic_available
+ finally:
+ ExtensionFactory._magic_available = orig_available
+
+
+@pytest.mark.asyncio
+async def test_extension_factory_identify_magic_exception(monkeypatch, tmp_dir):
+ magic = pytest.importorskip("magic")
+ orig_available = ExtensionFactory._magic_available
+ ExtensionFactory._magic_available = True
+ try:
+ path = tmp_dir / "test.csv"
+ path.write_text("a,b\n1,2")
+
+ def mock_from_file(*args, **kwargs):
+ raise magic.MagicException("Mock error")
+
+ monkeypatch.setattr(magic, "from_file", mock_from_file)
+
+ result = await ExtensionFactory._identify(path)
+ assert result is None
+ assert ExtensionFactory._magic_available
+ finally:
+ ExtensionFactory._magic_available = orig_available
+
+
+# ---------------------------------------------------------------------------
+# New tests for line 1010: ExtensionFactory.instantiate non-string file_type
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_instantiate_non_string_file_type(monkeypatch, tmp_dir):
+ path = tmp_dir / "test.custom"
+ path.write_text("data")
+
+ class CustomFile:
+ type = 42
+
+ def __init__(self, **kwargs):
+ for k, v in kwargs.items():
+ setattr(self, k, v)
+
+ monkeypatch.setitem(ExtensionFactory._extensions, ".custom", CustomFile)
+
+ obj = await ExtensionFactory.instantiate(path)
+ assert obj.type == "FILE"
+
+
+# ---------------------------------------------------------------------------
+# New tests for line 486: pq.ParquetWriter in empty DBF
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_dbf_to_parquet_empty_writer(tmp_dir):
+ dbf_path = tmp_dir / "empty.dbf"
+ _create_dbf(dbf_path, [("NAME", "C", 10, 0)], [])
+ out_path = tmp_dir / "empty_out.parquet"
+ db = DBF(path=dbf_path)
+
+ mock_table = MagicMock(spec=pa.Table)
+ mock_writer = MagicMock()
+ mock_parquet = MagicMock(spec=Parquet)
+
+ with (
+ patch("pysus.api.extensions.pa") as mock_pa,
+ patch(
+ "pysus.api.extensions.pq.ParquetWriter", return_value=mock_writer
+ ),
+ patch.object(
+ ExtensionFactory, "instantiate", return_value=mock_parquet
+ ),
+ ):
+ mock_pa.Table.from_pandas.return_value = mock_table
+ result = await db.to_parquet(out_path)
+
+ assert result is mock_parquet
+ mock_writer.close.assert_called_once()
+
+
+# ---------------------------------------------------------------------------
+# New tests for line 521: DBC.load success path
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_dbc_load_success(tmp_dir):
+ dbf_path = tmp_dir / "test.dbc"
+ dbf_path.touch()
+ obj = DBC(path=dbf_path)
+
+ def mock_dbc2dbf(infile, outfile):
+ _create_dbf(Path(outfile), [("NAME", "C", 10, 0)], [("NAME", b"Alice")])
+
+ mock_parquet = MagicMock(spec=Parquet)
+ mock_parquet.load = AsyncMock(return_value=pd.DataFrame({"x": [1]}))
+
+ with (
+ patch("pysus.api.extensions.dbc2dbf", side_effect=mock_dbc2dbf),
+ patch.object(DBF, "to_parquet", return_value=mock_parquet),
+ ):
+ df = await obj.load()
+
+ assert list(df.columns) == ["x"]
+ assert len(df) == 1
+
+
+# ---------------------------------------------------------------------------
+# New tests for lines 529-530: DBC.stream success path
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_dbc_stream_success(tmp_dir):
+ dbf_path = tmp_dir / "test.dbc"
+ dbf_path.touch()
+ obj = DBC(path=dbf_path)
+
+ def mock_dbc2dbf(infile, outfile):
+ _create_dbf(Path(outfile), [("NAME", "C", 10, 0)], [("NAME", b"Alice")])
+
+ async def _mock_stream(**kw):
+ yield pd.DataFrame({"x": [1]})
+
+ mock_parquet = MagicMock(spec=Parquet)
+ mock_parquet.stream = _mock_stream
+
+ with (
+ patch("pysus.api.extensions.dbc2dbf", side_effect=mock_dbc2dbf),
+ patch.object(DBF, "to_parquet", return_value=mock_parquet),
+ ):
+ chunks = [chunk async for chunk in obj.stream(chunk_size=100)]
+
+ assert len(chunks) == 1
+ assert list(chunks[0].columns) == ["x"]
+
+
+# ---------------------------------------------------------------------------
+# New tests for line 560: DBC.to_parquet non-BaseTabularFile after dbc2dbf
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_dbc_to_parquet_not_tabular(tmp_dir):
+ dbf_path = tmp_dir / "test.dbc"
+ dbf_path.touch()
+ obj = DBC(path=dbf_path)
+
+ def mock_dbc2dbf(infile, outfile):
+ pass
+
+ mock_non_tabular = MagicMock(spec=BaseLocalFile)
+
+ with (
+ patch("pysus.api.extensions.dbc2dbf", side_effect=mock_dbc2dbf),
+ patch.object(
+ ExtensionFactory, "instantiate", return_value=mock_non_tabular
+ ),
+ ):
+ with pytest.raises(RuntimeError, match="Not a DBF"):
+ await obj.to_parquet()
+
+
+# ---------------------------------------------------------------------------
+# New tests for line 708: Zip.to_parquet with no tabular file inside
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_zip_to_parquet_no_tabular(tmp_dir):
+ zip_path = tmp_dir / "data.zip"
+ text_path = tmp_dir / "readme.txt"
+ text_path.write_text("hello")
+ with zipfile.ZipFile(zip_path, "w") as z:
+ z.write(text_path, arcname="readme.txt")
+
+ obj = Zip(path=zip_path)
+ with pytest.raises(ValueError, match="No tabular file found"):
+ await obj.to_parquet()
diff --git a/pysus/tests/api/test_metadata.py b/pysus/tests/api/test_metadata.py
new file mode 100644
index 00000000..75329381
--- /dev/null
+++ b/pysus/tests/api/test_metadata.py
@@ -0,0 +1,109 @@
+import builtins
+from unittest.mock import patch
+
+from pysus.api.metadata.models import (
+ Column,
+ Dataset,
+ DatasetGroup,
+ File,
+ FileMeta,
+ lookup_column_meta,
+ pick_description,
+)
+from pysus.api.metadata.report import Columns, Footer, Header
+from pysus.api.types import VARCHAR
+
+
+class TestReportClasses:
+ def test_header_instantiation(self):
+ h = Header()
+ assert isinstance(h, Header)
+
+ def test_columns_instantiation(self):
+ c = Columns()
+ assert isinstance(c, Columns)
+
+ def test_footer_instantiation(self):
+ f = Footer()
+ assert isinstance(f, Footer)
+
+
+class TestLookupColumnMeta:
+ def test_found_returns_dict(self):
+ meta = lookup_column_meta("ABAND")
+ assert meta is not None
+ assert isinstance(meta, dict)
+
+ def test_not_found_returns_none(self):
+ meta = lookup_column_meta("NONEXISTENT_COLUMN_XYZ")
+ assert meta is None
+
+ def test_import_error_returns_none(self):
+ with patch.object(
+ builtins, "__import__", side_effect=ImportError("mock")
+ ):
+ result = lookup_column_meta("ABAND")
+ assert result is None
+
+
+class TestPickDescription:
+ def test_none_meta_returns_empty(self):
+ assert pick_description(None) == ""
+
+ def test_non_empty_value_returns_first_value(self):
+ meta = {"sinan": "Some description"}
+ assert pick_description(meta) == "Some description"
+
+ def test_empty_dict_returns_empty(self):
+ assert pick_description({}) == ""
+
+ def test_all_empty_values_returns_empty(self):
+ meta = {"sinan": "", "sih": ""}
+ assert pick_description(meta) == ""
+
+
+class TestColumnFromSchema:
+ def test_from_schema_creates_column(self):
+ col = Column.from_schema("ABAND", VARCHAR)
+ assert isinstance(col, Column)
+ assert col.name == "ABAND"
+ assert col.dtype == VARCHAR
+
+ def test_from_schema_unknown_column(self):
+ col = Column.from_schema("NONEXISTENT_COLUMN_XYZ", VARCHAR)
+ assert col.name == "NONEXISTENT_COLUMN_XYZ"
+ assert col.description == ""
+ assert col.dtype == VARCHAR
+
+
+class TestDataclassInstantiations:
+ def test_dataset(self):
+ d = Dataset(name="sinan", long_name="SINAN", description="Test")
+ assert d.name == "sinan"
+ assert d.long_name == "SINAN"
+ assert d.description == "Test"
+
+ def test_dataset_group(self):
+ dg = DatasetGroup(name="sinan", long_name="SINAN", description="Test")
+ assert dg.name == "sinan"
+ assert dg.long_name == "SINAN"
+ assert dg.description == "Test"
+
+ def test_file_meta(self):
+ fm = FileMeta(name="test", path="/tmp", size=100)
+ assert fm.name == "test"
+ assert fm.path == "/tmp"
+ assert fm.size == 100
+
+ def test_file(self):
+ f = File(origin="FTP")
+ assert f.origin == "FTP"
+ assert f.dataset is None
+ assert f.group is None
+ assert f.columns == []
+
+ def test_column(self):
+ col = Column(name="ABAND", description="Test", dtype=VARCHAR)
+ assert col.name == "ABAND"
+ assert col.description == "Test"
+ assert col.dtype == VARCHAR
diff --git a/pysus/tests/api/test_models.py b/pysus/tests/api/test_models.py
index f559b96a..1f61de5b 100644
--- a/pysus/tests/api/test_models.py
+++ b/pysus/tests/api/test_models.py
@@ -3,12 +3,23 @@
from datetime import datetime
from pathlib import Path
from typing import Any
-from unittest.mock import MagicMock
+from unittest.mock import AsyncMock, MagicMock, patch
+import pandas as pd
import pytest
from pydantic import ValidationError
+from pysus import CACHEPATH
+from pysus.api.extensions import Parquet
from pysus.api.models import BaseRemoteGroup # noqa
-from pysus.api.models import BaseLocalFile, BaseRemoteDataset, BaseRemoteFile
+from pysus.api.models import (
+ BaseCompressedFile,
+ BaseLocalFile,
+ BaseRemoteClient,
+ BaseRemoteDataset,
+ BaseRemoteFile,
+ BaseRemoteObject,
+ BaseTabularFile,
+)
class MockLocalFile(BaseLocalFile):
@@ -50,7 +61,468 @@ async def _download(
return output
+class MockTabularFile(BaseTabularFile):
+ type: str = "tabular"
+
+ @property
+ def columns(self) -> list:
+ return getattr(self, "_columns_val", [])
+
+ @property
+ def rows(self) -> int:
+ return getattr(self, "_rows_val", 0)
+
+ async def load(self) -> pd.DataFrame:
+ return pd.DataFrame()
+
+ async def stream(
+ self,
+ chunk_size: int = 10000,
+ ) -> AsyncGenerator[pd.DataFrame, None]:
+ for chunk in getattr(self, "_chunks", []):
+ yield chunk
+
+
+class MockCompressedFile(BaseCompressedFile):
+ type: str = "compressed"
+
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ self._members_list = ["member1.txt", "member2.txt"]
+ self._member_data = {
+ "member1.txt": b"content1",
+ "member2.txt": b"content2",
+ }
+
+ async def load(self) -> bytes:
+ return b""
+
+ async def list_members(self) -> list[str]:
+ return self._members_list
+
+ async def open_member(self, member_name: str) -> Any:
+ return self._member_data.get(member_name, b"")
+
+ async def extract(
+ self,
+ target_dir: Path = CACHEPATH,
+ ) -> list[BaseLocalFile]:
+ return []
+
+
+class MockRemoteGroup(BaseRemoteGroup):
+ type: str = "group"
+
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ self._files = None
+ self._mock_files = []
+ self._name_val = "test_group"
+
+ @property
+ def name(self) -> str:
+ return self._name_val
+
+ @property
+ def long_name(self) -> str:
+ return "Test Group"
+
+ @property
+ def description(self) -> str:
+ return "A test group"
+
+ async def _fetch_files(self) -> list[BaseRemoteFile]:
+ return self._mock_files
+
+
+class MockRemoteDataset(BaseRemoteDataset):
+ type: str = "dataset"
+
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ self._content = None
+ self._mock_content = []
+ self._name_val = "test_dataset"
+
+ @property
+ def name(self) -> str:
+ return self._name_val
+
+ @property
+ def long_name(self) -> str:
+ return "Test Dataset"
+
+ @property
+ def description(self) -> str:
+ return "A test dataset"
+
+ async def _fetch_content(self):
+ return self._mock_content
+
+
MockRemoteFile.model_rebuild()
+MockTabularFile.model_rebuild()
+MockCompressedFile.model_rebuild()
+MockRemoteGroup.model_rebuild()
+MockRemoteDataset.model_rebuild()
+
+
+# --- BaseFile ---
+
+
+def test_base_file_str(tmp_path):
+ path = tmp_path / "some_file.txt"
+ path.write_text("hello")
+ f = MockLocalFile(path=path)
+ assert str(f) == "some_file.txt"
+
+
+# --- BaseLocalFile ---
+
+
+def test_base_local_file_name(tmp_path):
+ path = tmp_path / "my_data.csv"
+ path.write_text("a,b\n1,2")
+ f = MockLocalFile(path=path)
+ assert f.name == "my_data.csv"
+
+
+def test_base_local_file_extension(tmp_path):
+ path = tmp_path / "data.csv"
+ path.write_text("a,b\n1,2")
+ f = MockLocalFile(path=path)
+ assert f.extension == ".csv"
+
+
+def test_base_local_file_size(tmp_path):
+ path = tmp_path / "data.bin"
+ content = b"hello"
+ path.write_bytes(content)
+ f = MockLocalFile(path=path)
+ assert f.size == len(content)
+
+
+def test_base_local_file_modify(tmp_path):
+ path = tmp_path / "data.txt"
+ path.write_text("hello")
+ f = MockLocalFile(path=path)
+ assert isinstance(f.modify, datetime)
+
+
+# --- BaseTabularFile.to_parquet ---
+
+
+@pytest.mark.asyncio
+async def test_to_parquet_no_output_path(tmp_path):
+ tabular = MockTabularFile(path=tmp_path / "source.csv")
+ tabular._chunks = [pd.DataFrame({"a": [1, 2, 3]})]
+ tabular._rows_val = 3
+
+ with patch(
+ "pysus.api.extensions.ExtensionFactory.instantiate"
+ ) as mock_inst:
+ mock_inst.return_value = MagicMock(spec=Parquet)
+ result = await tabular.to_parquet()
+ assert isinstance(result, MagicMock)
+
+
+@pytest.mark.asyncio
+async def test_to_parquet_empty_chunk(tmp_path):
+ tabular = MockTabularFile(path=tmp_path / "source.csv")
+ tabular._chunks = [pd.DataFrame(), pd.DataFrame({"a": [1, 2, 3]})]
+ tabular._rows_val = 3
+ out = tmp_path / "out.parquet"
+
+ with patch(
+ "pysus.api.extensions.ExtensionFactory.instantiate"
+ ) as mock_inst:
+ mock_inst.return_value = MagicMock(spec=Parquet)
+ result = await tabular.to_parquet(output_path=out)
+ assert isinstance(result, MagicMock)
+
+
+@pytest.mark.asyncio
+async def test_to_parquet_null_schema(tmp_path):
+ tabular = MockTabularFile(path=tmp_path / "source.csv")
+ tabular._chunks = [pd.DataFrame({"a": [1], "b": [None]})]
+ tabular._rows_val = 1
+ out = tmp_path / "out.parquet"
+
+ with patch(
+ "pysus.api.extensions.ExtensionFactory.instantiate"
+ ) as mock_inst:
+ mock_inst.return_value = MagicMock(spec=Parquet)
+ result = await tabular.to_parquet(output_path=out)
+ assert isinstance(result, MagicMock)
+
+
+@pytest.mark.asyncio
+async def test_to_parquet_callback(tmp_path):
+ tabular = MockTabularFile(path=tmp_path / "source.csv")
+ tabular._chunks = [pd.DataFrame({"a": [1, 2, 3]})]
+ tabular._rows_val = 3
+ out = tmp_path / "out.parquet"
+ callback = MagicMock()
+
+ with patch(
+ "pysus.api.extensions.ExtensionFactory.instantiate"
+ ) as mock_inst:
+ mock_inst.return_value = MagicMock(spec=Parquet)
+ await tabular.to_parquet(output_path=out, callback=callback)
+ callback.assert_called_once_with(3, 3)
+
+
+@pytest.mark.asyncio
+async def test_to_parquet_cleanup(tmp_path):
+ tabular = MockTabularFile(path=tmp_path / "source.csv")
+ tabular._chunks = [pd.DataFrame({"a": [1, 2, 3]})]
+ tabular._rows_val = 3
+ out = tmp_path / "out.parquet"
+
+ with patch("pyarrow.parquet.ParquetWriter") as mock_writer_cls:
+ mock_writer = MagicMock()
+ mock_writer_cls.return_value = mock_writer
+ with patch(
+ "pysus.api.extensions.ExtensionFactory.instantiate"
+ ) as mock_inst:
+ mock_inst.return_value = MagicMock(spec=Parquet)
+ await tabular.to_parquet(output_path=out)
+ mock_writer.close.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_to_parquet_value_error(tmp_path):
+ tabular = MockTabularFile(path=tmp_path / "source.csv")
+ tabular._chunks = [pd.DataFrame({"a": [1]})]
+ tabular._rows_val = 1
+ out = tmp_path / "out.parquet"
+
+ with patch(
+ "pysus.api.extensions.ExtensionFactory.instantiate"
+ ) as mock_inst:
+ mock_inst.return_value = "not_a_parquet"
+ with pytest.raises(ValueError, match="Could not parse"):
+ await tabular.to_parquet(output_path=out)
+
+
+# --- BaseCompressedFile ---
+
+
+@pytest.mark.asyncio
+async def test_base_compressed_file_stream(tmp_path):
+ path = tmp_path / "archive.zip"
+ path.write_text("dummy")
+ comp = MockCompressedFile(path=path)
+ results = []
+ async for member in comp.stream():
+ results.append(member)
+ assert results == [b"content1", b"content2"]
+
+
+# --- SearchableMixin ---
+
+
+def test_searchable_mixin_matches():
+ obj = MagicMock()
+ obj.year = 2024
+ obj.month = 6
+ mixin = BaseRemoteFile.__bases__[1]()
+ assert mixin._matches(obj, year=2024, month=6) is True
+ assert mixin._matches(obj, year=2025) is False
+ assert mixin._matches(obj, extra_attr="missing") is False
+
+
+# --- BaseRemoteFile ---
+
+
+def test_base_remote_file_name():
+ ds = MagicMock(spec=BaseRemoteDataset)
+ f = MockRemoteFile(path="remote/path.txt", dataset=ds)
+ assert f.name == "path.txt"
+
+
+def test_base_remote_file_client():
+ fake_client = MagicMock(spec=BaseRemoteClient)
+ ds = MagicMock(spec=BaseRemoteDataset)
+ ds.client = fake_client
+ f = MockRemoteFile(path="remote/path.txt", dataset=ds)
+ assert f.client is fake_client
+
+
+def test_base_remote_file_year():
+ ds = MagicMock(spec=BaseRemoteDataset)
+ f = MockRemoteFile(path="r/p.txt", dataset=ds)
+ assert f.year is None
+
+
+def test_base_remote_file_month():
+ ds = MagicMock(spec=BaseRemoteDataset)
+ f = MockRemoteFile(path="r/p.txt", dataset=ds)
+ assert f.month is None
+
+
+def test_base_remote_file_state():
+ ds = MagicMock(spec=BaseRemoteDataset)
+ f = MockRemoteFile(path="r/p.txt", dataset=ds)
+ assert f.state is None
+
+
+@pytest.mark.asyncio
+async def test_remote_file_download_default_cache(tmp_path):
+ ds = MagicMock(spec=BaseRemoteDataset)
+ remote = MockRemoteFile(path="remote/path.txt", dataset=ds)
+
+ with patch("pysus.api.extensions.ExtensionFactory.instantiate") as mi:
+ mock_local = MagicMock(spec=BaseLocalFile)
+ mi.return_value = mock_local
+ with patch("pysus.api.models.CACHEPATH", tmp_path):
+ result = await remote.download()
+ assert result == mock_local
+ assert (tmp_path / "path.txt").exists()
+
+
+@pytest.mark.asyncio
+async def test_remote_file_download_output_dir(tmp_path):
+ ds = MagicMock(spec=BaseRemoteDataset)
+ remote = MockRemoteFile(path="remote/path.txt", dataset=ds)
+ out_dir = tmp_path / "outdir"
+ out_dir.mkdir()
+
+ with patch("pysus.api.extensions.ExtensionFactory.instantiate") as mi:
+ mock_local = MagicMock(spec=BaseLocalFile)
+ mi.return_value = mock_local
+ result = await remote.download(output=out_dir)
+ assert result == mock_local
+ assert (out_dir / "path.txt").exists()
+
+
+# --- BaseRemoteObject ---
+
+
+def test_base_remote_object_str():
+ class NamedObj(BaseRemoteObject):
+ type: str = "test"
+
+ @property
+ def name(self) -> str:
+ return "my_name"
+
+ @property
+ def long_name(self) -> str:
+ return "My Name"
+
+ @property
+ def description(self) -> str:
+ return "Desc"
+
+ obj = NamedObj()
+ assert str(obj) == "my_name"
+
+
+# --- BaseRemoteGroup ---
+
+
+@pytest.mark.asyncio
+async def test_base_remote_group_parent():
+ ds = MagicMock(spec=BaseRemoteDataset)
+ group = MockRemoteGroup(dataset=ds)
+ assert group.parent is ds
+
+
+@pytest.mark.asyncio
+async def test_base_remote_group_files(tmp_path):
+ ds = MagicMock(spec=BaseRemoteDataset)
+ mock_files = [MagicMock(spec=BaseRemoteFile)]
+ group = MockRemoteGroup(dataset=ds)
+ group._mock_files = mock_files
+ group._files = None
+
+ files = await group.files
+ assert files == mock_files
+ assert group._files is mock_files
+
+
+@pytest.mark.asyncio
+async def test_base_remote_group_files_cached():
+ ds = MagicMock(spec=BaseRemoteDataset)
+ cached = [MagicMock(spec=BaseRemoteFile)]
+ group = MockRemoteGroup(dataset=ds)
+ group._files = cached
+
+ files = await group.files
+ assert files is cached
+
+
+@pytest.mark.asyncio
+async def test_base_remote_group_search_all():
+ ds = MagicMock(spec=BaseRemoteDataset)
+ f1 = MagicMock(spec=BaseRemoteFile, year=2024)
+ f2 = MagicMock(spec=BaseRemoteFile, year=2025)
+ group = MockRemoteGroup(dataset=ds)
+ group._mock_files = [f1, f2]
+ group._files = None
+
+ result = await group.search()
+ assert result == [f1, f2]
+
+
+@pytest.mark.asyncio
+async def test_base_remote_group_search_with_kwargs():
+ ds = MagicMock(spec=BaseRemoteDataset)
+ f1 = MagicMock(spec=BaseRemoteFile)
+ f1.year = 2024
+ f2 = MagicMock(spec=BaseRemoteFile)
+ f2.year = 2025
+ group = MockRemoteGroup(dataset=ds)
+ group._mock_files = [f1, f2]
+ group._files = None
+
+ result = await group.search(year=2024)
+ assert result == [f1]
+
+
+# --- BaseRemoteDataset ---
+
+
+@pytest.mark.asyncio
+async def test_base_remote_dataset_search():
+ client = MagicMock(spec=BaseRemoteClient)
+ ds = MockRemoteDataset(client=client)
+ ds._content = None
+
+ f1 = MagicMock(spec=BaseRemoteFile)
+ f1.year = 2024
+ f2 = MagicMock(spec=BaseRemoteFile)
+ f2.year = 2025
+
+ group = MagicMock(spec=BaseRemoteGroup)
+ group.search = AsyncMock(return_value=[f1])
+
+ ds._mock_content = [group, f2]
+
+ result = await ds.search(year=2024)
+ assert result == [f1]
+ group.search.assert_called_once_with(year=2024)
+
+
+@pytest.mark.asyncio
+async def test_base_remote_dataset_search_no_kwargs():
+ client = MagicMock(spec=BaseRemoteClient)
+ ds = MockRemoteDataset(client=client)
+ ds._content = None
+
+ f1 = MagicMock(spec=BaseRemoteFile)
+ f1.year = 2024
+ f2 = MagicMock(spec=BaseRemoteFile)
+ f2.year = 2025
+
+ ds._mock_content = [f1, f2]
+
+ result = await ds.search()
+ assert result == [f1, f2]
+
+
+# --- Existing tests (unchanged) ---
@pytest.mark.asyncio
diff --git a/pysus/tests/api/test_types.py b/pysus/tests/api/test_types.py
index 775c9cfd..b659452d 100644
--- a/pysus/tests/api/test_types.py
+++ b/pysus/tests/api/test_types.py
@@ -1,25 +1,117 @@
-from pysus.api.types import FileType, State
+import pytest
+from pydantic import TypeAdapter, ValidationError
+from pysus.api.types import (
+ BIGINT,
+ BOOLEAN,
+ CIHA,
+ CNES,
+ CSV,
+ DADOSGOV,
+ DATE,
+ DBC,
+ DBF,
+ DIR,
+ DOUBLE,
+ DUCKLAKE,
+ FILE,
+ FLOAT,
+ FTP,
+ IBGE,
+ INTEGER,
+ JSON,
+ PARQUET,
+ PDF,
+ PNI,
+ SIA,
+ SIH,
+ SIM,
+ SINAN,
+ SINASC,
+ VARCHAR,
+ ZIP,
+ ColumnType,
+ DatasetName,
+ FileType,
+ Origin,
+ State,
+)
+
+
+class TestOrigin:
+ def test_valid_origins(self):
+ adapter = TypeAdapter(Origin)
+ for origin in (FTP, DADOSGOV, DUCKLAKE):
+ assert adapter.validate_python(origin) == origin
+
+ def test_invalid_origin_raises(self):
+ with pytest.raises(ValidationError):
+ TypeAdapter(Origin).validate_python("INVALID")
+
+ def test_origin_constants(self):
+ assert FTP == "FTP"
+ assert DADOSGOV == "DadosGov"
+ assert DUCKLAKE == "DuckLake"
+
+
+class TestColumnType:
+ def test_valid_column_types(self):
+ adapter = TypeAdapter(ColumnType)
+ valid = (VARCHAR, INTEGER, BIGINT, FLOAT, DOUBLE, BOOLEAN, DATE)
+ for ct in valid:
+ assert adapter.validate_python(ct) == ct
+
+ def test_invalid_column_type_raises(self):
+ with pytest.raises(ValidationError):
+ TypeAdapter(ColumnType).validate_python("INVALID")
+
+ def test_column_type_constants(self):
+ assert VARCHAR == "VARCHAR"
+ assert INTEGER == "INTEGER"
+ assert BIGINT == "BIGINT"
+ assert FLOAT == "FLOAT"
+ assert DOUBLE == "DOUBLE"
+ assert BOOLEAN == "BOOLEAN"
+ assert DATE == "DATE"
+
+
+class TestDatasetName:
+ def test_valid_dataset_names(self):
+ adapter = TypeAdapter(DatasetName)
+ valid = (SINAN, SINASC, SIM, SIH, SIA, PNI, IBGE, CNES, CIHA)
+ for dn in valid:
+ assert adapter.validate_python(dn) == dn
+
+ def test_invalid_dataset_name_raises(self):
+ with pytest.raises(ValidationError):
+ TypeAdapter(DatasetName).validate_python("INVALID")
+
+ def test_dataset_name_constants(self):
+ assert SINAN == "SINAN"
+ assert SINASC == "SINASC"
+ assert SIM == "SIM"
+ assert SIH == "SIH"
+ assert SIA == "SIA"
+ assert PNI == "PNI"
+ assert IBGE == "IBGE"
+ assert CNES == "CNES"
+ assert CIHA == "CIHA"
class TestFileType:
def test_file_types_are_valid(self):
- valid_types: list[FileType] = [
- "FILE",
- "DIR",
- "PARQUET",
- "CSV",
- "JSON",
- "PDF",
- "DBC",
- "DBF",
- "ZIP",
- ]
+ adapter = TypeAdapter(FileType)
+ valid_types = [FILE, DIR, PARQUET, CSV, JSON, PDF, DBC, DBF, ZIP]
for ft in valid_types:
- assert ft in FileType.__args__
+ assert adapter.validate_python(ft) == ft
+
+ def test_invalid_file_type_raises(self):
+ with pytest.raises(ValidationError):
+ TypeAdapter(FileType).validate_python("INVALID")
class TestState:
def test_all_brazilian_states_present(self):
+ adapter = TypeAdapter(State)
expected_states = {
"AC",
"AL",
@@ -49,5 +141,9 @@ def test_all_brazilian_states_present(self):
"TO",
"DF",
}
- actual_states = set(State.__args__) # type: ignore
- assert actual_states == expected_states
+ for state in expected_states:
+ adapter.validate_python(state)
+
+ def test_invalid_state_raises(self):
+ with pytest.raises(ValidationError):
+ TypeAdapter(State).validate_python("XX")
diff --git a/pysus/tests/api/test_utils.py b/pysus/tests/api/test_utils.py
index 93ebbdd6..f90f5c2d 100644
--- a/pysus/tests/api/test_utils.py
+++ b/pysus/tests/api/test_utils.py
@@ -42,3 +42,11 @@ def test_add_dv_empty():
def test_add_dv_non_digit():
assert add_dv("abc") == "abc"
+
+
+def test_add_dv_5digit_returns_as_is():
+ assert add_dv("12345") == "12345"
+
+
+def test_add_dv_8digit_returns_as_is():
+ assert add_dv("12345678") == "12345678"
diff --git a/pysus/tests/conftest.py b/pysus/tests/conftest.py
new file mode 100644
index 00000000..c55637aa
--- /dev/null
+++ b/pysus/tests/conftest.py
@@ -0,0 +1,9 @@
+"""pytest configuration - mocks duckdb.functional before any other imports."""
+
+import sys
+from unittest.mock import MagicMock
+
+if "duckdb.functional" not in sys.modules:
+ _mock = MagicMock()
+ _mock.SPECIAL = "SPECIAL"
+ sys.modules["duckdb.functional"] = _mock