diff --git a/.idea/PySUS.iml b/.idea/PySUS.iml deleted file mode 100644 index a17c85bf..00000000 --- a/.idea/PySUS.iml +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - - - - - - - - - diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index a3544b6c..00000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - diff --git a/poetry.lock b/poetry.lock index 076d6c5a..5ed79c08 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4057,11 +4057,26 @@ description = "File type identification using libmagic" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" groups = ["main"] +markers = "sys_platform != \"win32\"" files = [ {file = "python-magic-0.4.27.tar.gz", hash = "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b"}, {file = "python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3"}, ] +[[package]] +name = "python-magic-bin" +version = "0.4.14" +description = "File type identification using libmagic binary package" +optional = false +python-versions = "*" +groups = ["main"] +markers = "sys_platform == \"win32\"" +files = [ + {file = "python_magic_bin-0.4.14-py2.py3-none-macosx_10_6_intel.whl", hash = "sha256:7b1743b3dbf16601d6eedf4e7c2c9a637901b0faaf24ad4df4d4527e7d8f66a4"}, + {file = "python_magic_bin-0.4.14-py2.py3-none-win32.whl", hash = "sha256:34a788c03adde7608028203e2dbb208f1f62225ad91518787ae26d603ae68892"}, + {file = "python_magic_bin-0.4.14-py2.py3-none-win_amd64.whl", hash = "sha256:90be6206ad31071a36065a2fc169c5afb5e0355cbe6030e87641c6c62edc2b69"}, +] + [[package]] name = "pytz" version = "2026.2" @@ -5909,4 +5924,4 @@ tui = ["humanize", "textual"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.14" -content-hash = "61b754b1f0f7a86375c7a562f830223b029a2c5594747dc7082eb99b2da021b9" +content-hash = "74c5be9c37a010fac0c9e37253c42bc1a9641d4bcd43e617bfcb3ce48e04ff09" diff --git a/pyproject.toml b/pyproject.toml index e91b6251..20771497 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,8 @@ pydantic = "^2.12.5" duckdb = "^1.4.4" duckdb-engine = "^0.17.0" sqlalchemy = "^2.0.48" -python-magic = "^0.4.27" +python-magic = { version = "*", platform = "!=win32" } +python-magic-bin = { version = "*", platform = "win32" } chardet = "^7.4.0.post2" anyio = "^4.13.0" httpx = ">=0.28.0" diff --git a/pysus/api/README.md b/pysus/api/README.md new file mode 100644 index 00000000..e7b83bb0 --- /dev/null +++ b/pysus/api/README.md @@ -0,0 +1 @@ +## Roadmap diff --git a/pysus/api/_impl/databases.py b/pysus/api/_impl/databases.py index be44cd3a..fa5a7a8c 100644 --- a/pysus/api/_impl/databases.py +++ b/pysus/api/_impl/databases.py @@ -24,8 +24,8 @@ from typing import Literal import pandas as pd +from pysus.api import types from pysus.api.client import PySUS -from pysus.api.types import State from tqdm import tqdm @@ -220,7 +220,7 @@ def sinan( def sinasc( - state: State, + state: types.State, year: int | list[int], group: str | None = None, **kwargs, @@ -232,7 +232,7 @@ def sinasc( Parameters ---------- - state : State + state : types.State Two-letter state abbreviation (e.g. ``"RJ"``). year : int | list[int] Year or list of years to fetch. @@ -255,7 +255,7 @@ def sinasc( def sim( - state: State, + state: types.State, year: int | list[int], group: str | None = None, **kwargs, @@ -290,7 +290,7 @@ def sim( def sih( - state: State, + state: types.State, year: int | list[int], month: int | list[int], group: str | None = None, @@ -303,7 +303,7 @@ def sih( Parameters ---------- - state : State + state : types.State Two-letter state abbreviation (e.g. ``"RJ"``). year : int | list[int] Year or list of years to fetch. @@ -329,7 +329,7 @@ def sih( def sia( - state: State, + state: types.State, year: int | list[int], month: int | list[int], group: str | None = None, @@ -342,7 +342,7 @@ def sia( Parameters ---------- - state : State + state : types.State Two-letter state abbreviation (e.g. ``"RJ"``). year : int | list[int] Year or list of years to fetch. @@ -368,7 +368,7 @@ def sia( def pni( - state: State, + state: types.State, year: int | list[int], group: str | None = None, **kwargs, @@ -430,7 +430,7 @@ def ibge( def cnes( - state: State, + state: types.State, year: int | list[int], month: int | list[int], group: str | None = None, @@ -469,7 +469,7 @@ def cnes( def ciha( - state: State, + state: types.State, year: int | list[int], month: int | list[int], group: str | None = "CIHA", @@ -508,18 +508,8 @@ def ciha( def list_files( - dataset: Literal[ - "SINAN", - "SINASC", - "SIM", - "SIH", - "SIA", - "PNI", - "IBGE", - "CNES", - "CIHA", - ], - client: Literal["FTP", "DadosGov"] | None = None, + dataset: types.DatasetName, + client: types.Origin | None = None, group: str | None = None, state: str | None = None, year: int | list[int] | None = None, @@ -536,7 +526,7 @@ def list_files( ---------- dataset : Literal Dataset name (e.g. ``"SINAN"``, ``"SINASC"``, etc.). - client : Literal["FTP", "DadosGov"], optional + client : Origin, optional Data source client to query. group : str, optional Group or disease code to filter by. diff --git a/pysus/api/client.py b/pysus/api/client.py index 70ba6bc9..28d486ea 100644 --- a/pysus/api/client.py +++ b/pysus/api/client.py @@ -13,7 +13,9 @@ import anyio import duckdb import pandas as pd +from duckdb import func from pysus import CACHEPATH +from pysus.api.types import Origin from sqlalchemy import DateTime, Enum, Integer, String, create_engine from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, sessionmaker from sqlalchemy.pool import NullPool @@ -24,7 +26,7 @@ from .ftp import FTPClient from .models import BaseLocalFile, BaseRemoteFile -if TYPE_CHECKING: +if TYPE_CHECKING: # pragma: no cover from duckdb import DuckDBPyConnection @@ -101,7 +103,7 @@ async def __aenter__(self): """Set up DuckLake catalog and return self as async context manager.""" self._ducklake = DuckLake() - await self._ducklake._load_catalog() + await self._ducklake.connect() self._attach_client_catalog( "ducklake", str(self._ducklake.catalog_path), @@ -124,7 +126,7 @@ async def get_ducklake(self) -> DuckLake: if self._ducklake is None: self._ducklake = DuckLake() - await self._ducklake._load_catalog() + await self._ducklake.connect() self._attach_client_catalog( "ducklake", str(self._ducklake.catalog_path), @@ -477,26 +479,72 @@ def get_completed_remote_paths(self) -> set[str]: async def query( self, - client: Literal["DadosGov", "FTP"] | None = None, + client: Origin | None = None, dataset: str | None = None, group: str | None = None, state: str | None = None, year: int | None = None, month: int | None = None, ): - """Query available datasets through the DuckLake catalog.""" + """Query available datasets through the DuckLake catalog. + Parameters + ---------- + client : Origin, optional + Source client to filter by. + dataset : str, optional + Dataset name to filter by. + group : str, optional + Group name pattern to filter by (case-insensitive ILIKE). + state : str, optional + Two-letter state code to filter by. + year : int, optional + Year to filter by. + month : int, optional + Month to filter by. + + Returns + ------- + list + List of matching File objects. + """ if self._ducklake is None: await self.get_ducklake() - if self._ducklake is not None: - return await self._ducklake.query( - client=client, - dataset=dataset, + + if self._ducklake is None: + raise ConnectionError("Could not connect to PySUS s3 bucket") + + all_datasets = await self._ducklake.datasets() + + if dataset: + matching = [ + d for d in all_datasets if d.name.lower() == dataset.lower() + ] + if not matching: + return [] + target = matching[0] + files = await target.query( group=group, state=state, year=year, month=month, ) + else: + files = [] + for ds in all_datasets: + ds_files = await ds.query( + group=group, + state=state, + year=year, + month=month, + ) + files.extend(ds_files) + + if not client: + return files + + prefix = f"public/data/{client.lower()}/" + return [f for f in files if f.record.path.startswith(prefix)] def read_parquet( self, @@ -595,8 +643,8 @@ def get_columns(path: Path) -> set[tuple[str, str]]: duckdb.create_function( "__pysus_add_dv", _add_dv_fn, - null_handling="special", - ) + null_handling=func.SPECIAL, + ) # type: ignore except duckdb.NotImplementedException: pass selects = [ diff --git a/pysus/api/README.ipynb b/pysus/api/dadosgov/README.md similarity index 100% rename from pysus/api/README.ipynb rename to pysus/api/dadosgov/README.md diff --git a/pysus/api/dadosgov/client.py b/pysus/api/dadosgov/client.py index 5487f800..6b8e7113 100644 --- a/pysus/api/dadosgov/client.py +++ b/pysus/api/dadosgov/client.py @@ -11,8 +11,9 @@ from pydantic import BaseModel, BeforeValidator, ConfigDict, Field, PrivateAttr from pysus import __version__ from pysus.api.models import BaseRemoteClient, BaseRemoteFile +from pysus.api.types import DADOSGOV -if TYPE_CHECKING: +if TYPE_CHECKING: # pragma: no cover from .models import Dataset @@ -89,7 +90,7 @@ def name(self) -> str: str The abbreviated client name ``"DadosGov"``. """ - return "DadosGov" + return DADOSGOV @property def long_name(self) -> str: diff --git a/pysus/api/dadosgov/databases.py b/pysus/api/dadosgov/databases.py index 07297095..8000d5f9 100644 --- a/pysus/api/dadosgov/databases.py +++ b/pysus/api/dadosgov/databases.py @@ -272,7 +272,7 @@ def formatter(self, filename: str) -> dict[str, Any]: } m = re.search(r"_(\w{3})-out_(\d{4})_\.csv$", name) - if m: + if m: # pragma: no cover return { "state": None, "year": _parse_year(m.group(2)), diff --git a/pysus/api/dadosgov/models.py b/pysus/api/dadosgov/models.py index 4a962ca8..bbe1d654 100644 --- a/pysus/api/dadosgov/models.py +++ b/pysus/api/dadosgov/models.py @@ -271,7 +271,8 @@ def __init__( A callable that extracts metadata from filenames. """ super().__init__( - record=record, dataset=dataset # type: ignore[call-arg] + record=record, + dataset=dataset, # type: ignore[call-arg] ) self._formatter = formatter @@ -354,7 +355,7 @@ class Dataset(BaseRemoteDataset): """ ids: list[str] = [] - client: "DadosGov" + client: DadosGov group_aliases: dict[str, str] = {} def __repr__(self): @@ -369,7 +370,7 @@ def formatter(self, filename: str) -> dict[str, Any]: async def _fetch_content(self) -> list[Group]: """Fetch all groups belonging to this dataset.""" items: list[Group] = [] - client: "DadosGov" = self.client + client: DadosGov = self.client if self.ids: for group_id in self.ids: record = await client.get_dataset(group_id) diff --git a/pysus/api/dadosgov/README.ipynb b/pysus/api/ducklake/README.md similarity index 100% rename from pysus/api/dadosgov/README.ipynb rename to pysus/api/ducklake/README.md diff --git a/pysus/api/ducklake/README.ipynb b/pysus/api/ducklake/catalog/__init__.py similarity index 100% rename from pysus/api/ducklake/README.ipynb rename to pysus/api/ducklake/catalog/__init__.py diff --git a/pysus/api/ducklake/catalog/columns.py b/pysus/api/ducklake/catalog/columns.py new file mode 100644 index 00000000..b7cd3751 --- /dev/null +++ b/pysus/api/ducklake/catalog/columns.py @@ -0,0 +1,7235 @@ +"""Catalog column definitions extracted from catalog.db. + +Maps every column name to a dict of {dataset_name: description}. +""" + +ABAND = {"pni": ""} + +ABDOMINAL = {"sinan": ""} + +ABRANDAD = {"cnes": ""} + +AB_ANOACOM = {"sia": ""} + +AB_DTCIRG2 = {"sia": ""} + +AB_DTCIRUR = {"sia": ""} + +AB_IMC = {"sia": ""} + +AB_MESACOM = {"sia": ""} + +AB_NUMAIH = {"sia": ""} + +AB_NUMAIH2 = {"sia": ""} + +AB_PONTBAR = {"sia": ""} + +AB_PRCAIH2 = {"sia": ""} + +AB_PRCAIH3 = {"sia": ""} + +AB_PRCAIH4 = {"sia": ""} + +AB_PRCAIH5 = {"sia": ""} + +AB_PRCAIH6 = {"sia": ""} + +AB_PROCAIH = {"sia": ""} + +AB_TABBARR = {"sia": ""} + +AB_T_PRC2 = {"sia": ""} + +AB_T_PRC3 = {"sia": ""} + +AB_T_PRC4 = {"sia": ""} + +AB_T_PRC5 = {"sia": ""} + +AB_T_PRC6 = {"sia": ""} + +ACF_ARTDIA = {"sia": ""} + +ACF_DUPLEX = {"sia": ""} + +ACF_FLEBIT = {"sia": ""} + +ACF_FREMIT = {"sia": ""} + +ACF_HEMATO = {"sia": ""} + +ACF_PREFAV = {"sia": ""} + +ACF_PULSO = {"sia": ""} + +ACF_USOCAT = {"sia": ""} + +ACF_VEIAVI = {"sia": ""} + +ACF_VEIDIA = {"sia": ""} + +ACIDO_PEPT = {"sinan": ""} + +ACIDTRAB = {"sim": ""} + +ACONDIC = {"sinan": ""} + +ACUPUNTURA = {"sinan": ""} + +AEROFOBIA = {"sinan": ""} + +AFASTAMENT = {"sinan": ""} + +AFAST_DESG = {"sinan": ""} + +AFAST_RISC = {"sinan": ""} + +AFAST_TRAB = {"sinan": ""} + +AFIRMATIVO = {"sinan": ""} + +AGENTE = {"sinan": ""} + +AGENTE_1 = {"sinan": ""} + +AGENTE_2 = {"sinan": ""} + +AGENTE_3 = {"sinan": ""} + +AGENTE_DES = {"sinan": ""} + +AGENTE_ET0 = {"sinan": ""} + +AGENTE_ET1 = {"sinan": ""} + +AGENTE_ET2 = {"sinan": ""} + +AGENTE_ET3 = {"sinan": ""} + +AGENTE_ETI = {"sinan": ""} + +AGENTE_OUT = {"sinan": ""} + +AGENTE_TOX = {"sinan": ""} + +AGHBE = {"sinan": ""} + +AGHBS = {"sinan": ""} + +AGITACAO = {"sinan": ""} + +AGRAVAIDS = {"sinan": ""} + +AGRAVALCOO = {"sinan": ""} + +AGRAVDIABE = {"sinan": ""} + +AGRAVDOENC = {"sinan": ""} + +AGRAVDROGA = {"sinan": ""} + +AGRAVOUTDE = {"sinan": ""} + +AGRAVOUTRA = {"sinan": ""} + +AGRAVO_DES = {"sinan": ""} + +AGRAVTABAC = {"sinan": ""} + +AGRESSIVI = {"sinan": ""} + +AGUA_ALIME = {"sinan": ""} + +AG_AMEACA = {"sinan": ""} + +AG_CORTE = {"sinan": ""} + +AG_ENFOR = {"sinan": ""} + +AG_ENVEN = {"sinan": ""} + +AG_ESPEC = {"sinan": ""} + +AG_FOGO = {"sinan": ""} + +AG_FORCA = {"sinan": ""} + +AG_OBJETO = {"sinan": ""} + +AG_OUTROS = {"sinan": ""} + +AG_QUENTE = {"sinan": ""} + +AIH = {"sih": ""} + +ALCATRAO = {"sinan": ""} + +ALCOOL = {"sinan": ""} + +ALIMENTO_C = {"sinan": ""} + +ALRM_ABDOM = {"sinan": ""} + +ALRM_HEMAT = {"sinan": ""} + +ALRM_HEPAT = {"sinan": ""} + +ALRM_HIPOT = {"sinan": ""} + +ALRM_LETAR = {"sinan": ""} + +ALRM_LIQ = {"sinan": ""} + +ALRM_PLAQ = {"sinan": ""} + +ALRM_SANG = {"sinan": ""} + +ALRM_VOM = {"sinan": ""} + +ALTCAUSA = {"sim": ""} + +ALVARA = {"cnes": ""} + +AMALARIA = {"sinan": ""} + +AMBIENTE = {"sinan": ""} + +AMB_NSUS = {"cnes": ""} + +AMB_SUS = {"cnes": ""} + +AMINA = {"sinan": ""} + +AMOS_OUT = {"sinan": ""} + +AMOS_PCR = {"sinan": ""} + +AMPICILINA = {"sinan": ""} + +AMPOLAS = {"sinan": ""} + +AMP_ACEVAS = {"sia": ""} + +AMP_ALBUMI = {"sia": ""} + +AMP_CARACT = {"sia": ""} + +AMP_DTCLI = {"sia": ""} + +AMP_DTINI = {"sia": ""} + +AMP_FOSFOR = {"sia": ""} + +AMP_HB = {"sia": ""} + +AMP_HBSAG = {"sia": ""} + +AMP_HCV = {"sia": ""} + +AMP_HIV = {"sia": ""} + +AMP_INTERC = {"sia": ""} + +AMP_KTVSEM = {"sia": ""} + +AMP_MAISNE = {"sia": ""} + +AMP_PTH = {"sia": ""} + +AMP_SEAPTO = {"sia": ""} + +AMP_SEPERI = {"sia": ""} + +AMP_SITINI = {"sia": ""} + +AMP_SITTRA = {"sia": ""} + +AMP_TRU = {"sia": ""} + +AM_ALTURA = {"sia": ""} + +AM_GESTANT = {"sia": ""} + +AM_PESO = {"sia": ""} + +AM_QTDTRAN = {"sia": ""} + +AM_SANGUE = {"sinan": ""} + +AM_TRANSPL = {"sia": ""} + +ANIMAL = {"sinan": ""} + +ANIM_ESP = {"sinan": ""} + +ANI_ARANHA = {"sinan": ""} + +ANI_LAGART = {"sinan": ""} + +ANI_SERPEN = {"sinan": ""} + +ANI_TIPO_1 = {"sinan": ""} + +ANO = { + "ibge": "", + "pni": "", + "sih": "", + "sinan": "", +} + +ANOMES = {"pni": ""} + +ANOREXIA = {"sinan": ""} + +ANO_CMPT = { + "ciha": "", + "sih": "", +} + +ANO_DT_SIN = {"sinan": ""} + +ANO_NASC = {"sinan": ""} + +ANTDTTRANS = {"sinan": ""} + +ANTEC_POS = {"sinan": ""} + +ANTEC_PRE = {"sinan": ""} + +ANTIBIOTIC = {"sinan": ""} + +ANTIB_DES = {"sinan": ""} + +ANTIHAVIGM = {"sinan": ""} + +ANTIHBCIGM = {"sinan": ""} + +ANTIHBE = {"sinan": ""} + +ANTIHBS = {"sinan": ""} + +ANTIHCV = {"sinan": ""} + +ANTIHDV = {"sinan": ""} + +ANTIHDVIGM = {"sinan": ""} + +ANTIHEVIGM = {"sinan": ""} + +ANTI_HBS = {"sinan": ""} + +ANTI_HCV = {"sinan": ""} + +ANTI_HIV = {"sinan": ""} + +ANTI_RAB = {"sinan": ""} + +ANTMUNTRAN = {"sinan": ""} + +ANTRELSE_N = {"sinan": ""} + +ANTSIFIL_N = {"sinan": ""} + +ANTTRANS_M = {"sinan": ""} + +ANTUFTRANS = {"sinan": ""} + +ANT_30_DIA = {"sinan": ""} + +ANT_AC = {"sinan": ""} + +ANT_ACIDEN = {"sinan": ""} + +ANT_AIDS = {"sinan": ""} + +ANT_ANEMIA = {"sinan": ""} + +ANT_ANIMAI = {"sinan": ""} + +ANT_ARAGEM = {"sinan": ""} + +ANT_ARRANH = {"sinan": ""} + +ANT_ARRUMO = {"sinan": ""} + +ANT_ASTERI = {"sinan": ""} + +ANT_BC = {"sinan": ""} + +ANT_BCG = {"sinan": ""} + +ANT_CABECA = {"sinan": ""} + +ANT_CANCER = {"sinan": ""} + +ANT_CANDID = {"sinan": ""} + +ANT_CAQUEX = {"sinan": ""} + +ANT_CAT_EX = {"sinan": ""} + +ANT_CB_CAI = {"sinan": ""} + +ANT_CB_CAR = {"sinan": ""} + +ANT_CB_COR = {"sinan": ""} + +ANT_CB_CRI = {"sinan": ""} + +ANT_CB_FOS = {"sinan": ""} + +ANT_CB_GRA = {"sinan": ""} + +ANT_CB_LAM = {"sinan": ""} + +ANT_CB_LAV = {"sinan": ""} + +ANT_CB_LIM = {"sinan": ""} + +ANT_CB_LIX = {"sinan": ""} + +ANT_CB_OUT = {"sinan": ""} + +ANT_CB_PLA = {"sinan": ""} + +ANT_CB_ROE = {"sinan": ""} + +ANT_CB_SIN = {"sinan": ""} + +ANT_CB_TER = {"sinan": ""} + +ANT_CHAGAS = {"sinan": ""} + +ANT_CITO = {"sinan": ""} + +ANT_COLHEI = {"sinan": ""} + +ANT_CONJ_C = {"sinan": ""} + +ANT_CONTAG = {"sinan": ""} + +ANT_CONTAT = {"sinan": ""} + +ANT_CONT_N = {"sinan": ""} + +ANT_CORTE = {"sinan": ""} + +ANT_CRIPTO = {"sinan": ""} + +ANT_CRIP_1 = {"sinan": ""} + +ANT_DERMAT = {"sinan": ""} + +ANT_DESMAT = {"sinan": ""} + +ANT_DIARRE = {"sinan": ""} + +ANT_DILACE = {"sinan": ""} + +ANT_DISFUN = {"sinan": ""} + +ANT_DOSES = {"sinan": ""} + +ANT_DOSES_ = {"sinan": ""} + +ANT_DOSE_3 = {"sinan": ""} + +ANT_DOSE_4 = {"sinan": ""} + +ANT_DOSE_5 = {"sinan": ""} + +ANT_DOSE_7 = {"sinan": ""} + +ANT_DOSE_C = {"sinan": ""} + +ANT_DOSE_T = {"sinan": ""} + +ANT_DOS_N = {"sinan": ""} + +ANT_DROGA = {"sinan": ""} + +ANT_DTULT_ = {"sinan": ""} + +ANT_DTUL_3 = {"sinan": ""} + +ANT_DTUL_4 = {"sinan": ""} + +ANT_DTUL_5 = {"sinan": ""} + +ANT_DTUL_7 = {"sinan": ""} + +ANT_DTUL_8 = {"sinan": ""} + +ANT_DTUL_C = {"sinan": ""} + +ANT_DTUL_T = {"sinan": ""} + +ANT_DT_ACI = {"sinan": ""} + +ANT_DT_EXP = {"sinan": ""} + +ANT_DT_INV = {"sinan": ""} + +ANT_DT_VAC = {"sinan": ""} + +ANT_ESOF_N = {"sinan": ""} + +ANT_EVLABO = {"sinan": ""} + +ANT_EXPOSI = {"sinan": ""} + +ANT_FEBRE = {"sinan": ""} + +ANT_HEMOLF = {"sinan": ""} + +ANT_HEMO_T = {"sinan": ""} + +ANT_HERPES = {"sinan": ""} + +ANT_HISTO = {"sinan": ""} + +ANT_HUMANO = {"sinan": ""} + +ANT_H_SIMP = {"sinan": ""} + +ANT_IDADE = {"sinan": ""} + +ANT_IMUNO = {"sinan": ""} + +ANT_INF_HO = {"sinan": ""} + +ANT_INVEST = {"sinan": ""} + +ANT_IRA = {"sinan": ""} + +ANT_ISOPOR = {"sinan": ""} + +ANT_LAMBED = {"sinan": ""} + +ANT_LAZER = {"sinan": ""} + +ANT_LEUCO = {"sinan": ""} + +ANT_LIMPEZ = {"sinan": ""} + +ANT_LINFO = {"sinan": ""} + +ANT_LINFOM = {"sinan": ""} + +ANT_LINFO_ = {"sinan": ""} + +ANT_LOCA_1 = {"sinan": ""} + +ANT_MAOS = {"sinan": ""} + +ANT_MEMBRO = {"sinan": ""} + +ANT_MEMB_1 = {"sinan": ""} + +ANT_MICRO = {"sinan": ""} + +ANT_MOAGEM = {"sinan": ""} + +ANT_MORDED = {"sinan": ""} + +ANT_MUCOSA = {"sinan": ""} + +ANT_MUNIC_ = {"sinan": ""} + +ANT_MUNI_C = {"sinan": ""} + +ANT_OCUPAC = {"sinan": ""} + +ANT_OUTR = {"sinan": ""} + +ANT_OUTRA = {"sinan": ""} + +ANT_OUTRO = {"sinan": ""} + +ANT_OUTROS = {"sinan": ""} + +ANT_OUTRO_ = {"sinan": ""} + +ANT_OUTR_D = {"sinan": ""} + +ANT_OUT_D = {"sinan": ""} + +ANT_OU_DE = {"sinan": ""} + +ANT_OU_DES = {"sinan": ""} + +ANT_PAIS = {"sinan": ""} + +ANT_PERINA = {"sinan": ""} + +ANT_PLANTI = {"sinan": ""} + +ANT_PNEUMO = {"sinan": ""} + +ANT_PRE_NA = {"sinan": ""} + +ANT_PROFUN = {"sinan": ""} + +ANT_PULMON = {"sinan": ""} + +ANT_PULM_N = {"sinan": ""} + +ANT_RACA = {"sinan": ""} + +ANT_REL_CA = {"sinan": ""} + +ANT_REL_N = {"sinan": ""} + +ANT_RETRO = {"sinan": ""} + +ANT_ROEDOR = {"sinan": ""} + +ANT_SALMO = {"sinan": ""} + +ANT_SARCOM = {"sinan": ""} + +ANT_SECUND = {"sinan": ""} + +ANT_SENTIN = {"sinan": ""} + +ANT_SUPERF = {"sinan": ""} + +ANT_TEMPO_ = {"sinan": ""} + +ANT_TIPOCO = {"sinan": ""} + +ANT_TOSSE = {"sinan": ""} + +ANT_TOXO = {"sinan": ""} + +ANT_TRANS_ = {"sinan": ""} + +ANT_TRASMI = {"sinan": ""} + +ANT_TRATAD = {"sinan": ""} + +ANT_TRAUMA = {"sinan": ""} + +ANT_TRIPLI = {"sinan": ""} + +ANT_TRONCO = {"sinan": ""} + +ANT_TUBE = {"sinan": ""} + +ANT_TUBERC = {"sinan": ""} + +ANT_T_HEMO = {"sinan": ""} + +ANT_UF = {"sinan": ""} + +ANT_UF_1 = {"sinan": ""} + +ANT_UF_2 = {"sinan": ""} + +ANT_UF_3 = {"sinan": ""} + +ANT_UF_CRI = {"sinan": ""} + +ANT_ULTI_D = {"sinan": ""} + +ANT_VACINA = {"sinan": ""} + +AN_ACEVAS = {"sia": ""} + +AN_ALBUMI = {"sia": ""} + +AN_ALTURA = {"sia": ""} + +AN_CNCDO = {"sia": ""} + +AN_DIURES = {"sia": ""} + +AN_DTPDR = {"sia": ""} + +AN_GLICOS = {"sia": ""} + +AN_HB = {"sia": ""} + +AN_HBSAG = {"sia": ""} + +AN_HCV = {"sia": ""} + +AN_HIV = {"sia": ""} + +AN_INTFIS = {"sia": ""} + +AN_PESO = {"sia": ""} + +AN_QUALI = {"sinan": ""} + +AN_QUANT = {"sinan": ""} + +AN_TRU = {"sia": ""} + +AN_ULSOAB = {"sia": ""} + +AP01CV01 = {"cnes": ""} + +AP01CV02 = {"cnes": ""} + +AP01CV03 = {"cnes": ""} + +AP01CV04 = {"cnes": ""} + +AP01CV05 = {"cnes": ""} + +AP01CV06 = {"cnes": ""} + +AP01CV07 = {"cnes": ""} + +AP02CV01 = {"cnes": ""} + +AP02CV02 = {"cnes": ""} + +AP02CV03 = {"cnes": ""} + +AP02CV04 = {"cnes": ""} + +AP02CV05 = {"cnes": ""} + +AP02CV06 = {"cnes": ""} + +AP02CV07 = {"cnes": ""} + +AP03CV01 = {"cnes": ""} + +AP03CV02 = {"cnes": ""} + +AP03CV03 = {"cnes": ""} + +AP03CV04 = {"cnes": ""} + +AP03CV05 = {"cnes": ""} + +AP03CV06 = {"cnes": ""} + +AP03CV07 = {"cnes": ""} + +AP04CV01 = {"cnes": ""} + +AP04CV02 = {"cnes": ""} + +AP04CV03 = {"cnes": ""} + +AP04CV04 = {"cnes": ""} + +AP04CV05 = {"cnes": ""} + +AP04CV06 = {"cnes": ""} + +AP04CV07 = {"cnes": ""} + +AP05CV01 = {"cnes": ""} + +AP05CV02 = {"cnes": ""} + +AP05CV03 = {"cnes": ""} + +AP05CV04 = {"cnes": ""} + +AP05CV05 = {"cnes": ""} + +AP05CV06 = {"cnes": ""} + +AP05CV07 = {"cnes": ""} + +AP06CV01 = {"cnes": ""} + +AP06CV02 = {"cnes": ""} + +AP06CV03 = {"cnes": ""} + +AP06CV04 = {"cnes": ""} + +AP06CV05 = {"cnes": ""} + +AP06CV06 = {"cnes": ""} + +AP06CV07 = {"cnes": ""} + +AP07CV01 = {"cnes": ""} + +AP07CV02 = {"cnes": ""} + +AP07CV03 = {"cnes": ""} + +AP07CV04 = {"cnes": ""} + +AP07CV05 = {"cnes": ""} + +AP07CV06 = {"cnes": ""} + +AP07CV07 = {"cnes": ""} + +APGAR1 = {"sinasc": ""} + +APGAR5 = {"sinasc": ""} + +AP_ADESAO = {"sia": ""} + +AP_ALTA = {"sia": ""} + +AP_APACAN = {"sia": ""} + +AP_APACANT = {"sia": ""} + +AP_ATV_FIS = {"sia": ""} + +AP_AUTORIZ = {"sia": ""} + +AP_CATEND = {"sia": ""} + +AP_CEPPCN = {"sia": ""} + +AP_CIDCAS = {"sia": ""} + +AP_CIDPRI = {"sia": ""} + +AP_CIDSEC = {"sia": ""} + +AP_CID_C1 = {"sia": ""} + +AP_CID_C2 = {"sia": ""} + +AP_CID_C3 = {"sia": ""} + +AP_CID_C4 = {"sia": ""} + +AP_CID_C5 = {"sia": ""} + +AP_CID_CO = {"sia": ""} + +AP_CMP = {"sia": ""} + +AP_CNPJCPF = {"sia": ""} + +AP_CNPJMNT = {"sia": ""} + +AP_CNSPCN = {"sia": ""} + +AP_CODEMI = {"sia": ""} + +AP_CODUNI = {"sia": ""} + +AP_COIDADE = {"sia": ""} + +AP_COMORB = {"sia": ""} + +AP_CONDIC = {"sia": ""} + +AP_DTAUT = {"sia": ""} + +AP_DTFIM = {"sia": ""} + +AP_DTINIC = {"sia": ""} + +AP_DTOCOR = {"sia": ""} + +AP_DTOOCOR = {"sia": ""} + +AP_DTSOLIC = {"sia": ""} + +AP_ENCERR = {"sia": ""} + +AP_ETNIA = {"sia": ""} + +AP_GESTAO = {"sia": ""} + +AP_MEDICAM = {"sia": ""} + +AP_MNDIF = {"sia": ""} + +AP_MN_IND = {"sia": ""} + +AP_MOTSAI = {"sia": ""} + +AP_MUNPCN = {"sia": ""} + +AP_MVM = {"sia": ""} + +AP_NATJUR = {"sia": ""} + +AP_NUIDADE = {"sia": ""} + +AP_OBITO = {"sia": ""} + +AP_PERMAN = {"sia": ""} + +AP_POLIVIT = {"sia": ""} + +AP_PRIPAL = {"sia": ""} + +AP_RACACOR = {"sia": ""} + +AP_REG_PES = {"sia": ""} + +AP_SEXO = {"sia": ""} + +AP_TIPPRE = {"sia": ""} + +AP_TPAPAC = {"sia": ""} + +AP_TPATEN = {"sia": ""} + +AP_TPATEND = {"sia": ""} + +AP_TPPRE = {"sia": ""} + +AP_TPUPS = {"sia": ""} + +AP_TRANSF = {"sia": ""} + +AP_UFDIF = {"sia": ""} + +AP_UFMUN = {"sia": ""} + +AP_UFNACIO = {"sia": ""} + +AP_UNISOL = {"sia": ""} + +AP_VL_AP = {"sia": ""} + +AQ_CID10 = {"sia": ""} + +AQ_CIDINI1 = {"sia": ""} + +AQ_CIDINI2 = {"sia": ""} + +AQ_CIDINI3 = {"sia": ""} + +AQ_CONTTR = {"sia": ""} + +AQ_DTIDEN = {"sia": ""} + +AQ_DTINI1 = {"sia": ""} + +AQ_DTINI2 = {"sia": ""} + +AQ_DTINI3 = {"sia": ""} + +AQ_DTINTR = {"sia": ""} + +AQ_ESQU_P1 = {"sia": ""} + +AQ_ESQU_P2 = {"sia": ""} + +AQ_ESTADI = {"sia": ""} + +AQ_GRAHIS = {"sia": ""} + +AQ_LINFIN = {"sia": ""} + +AQ_MED01 = {"sia": ""} + +AQ_MED02 = {"sia": ""} + +AQ_MED03 = {"sia": ""} + +AQ_MED04 = {"sia": ""} + +AQ_MED05 = {"sia": ""} + +AQ_MED06 = {"sia": ""} + +AQ_MED07 = {"sia": ""} + +AQ_MED08 = {"sia": ""} + +AQ_MED09 = {"sia": ""} + +AQ_MED10 = {"sia": ""} + +AQ_TOTMAU = {"sia": ""} + +AQ_TOTMPL = {"sia": ""} + +AQ_TRANTE = {"sia": ""} + +AREA = {"sinasc": ""} + +AREARES = {"sim": ""} + +ARMAZ_FT = {"cnes": ""} + +ARRANHAO = {"sinan": ""} + +ARRITMIAS = {"sinan": ""} + +ARTEI = {"sinan": ""} + +ARTEM = {"sinan": ""} + +ARTEMI = {"sinan": ""} + +ARTESU = {"sinan": ""} + +ARTRALGIA = {"sinan": ""} + +ARTRITE = {"sinan": ""} + +AR_CID10 = {"sia": ""} + +AR_CIDINI1 = {"sia": ""} + +AR_CIDINI2 = {"sia": ""} + +AR_CIDINI3 = {"sia": ""} + +AR_CIDTR1 = {"sia": ""} + +AR_CIDTR2 = {"sia": ""} + +AR_CIDTR3 = {"sia": ""} + +AR_CONTTR = {"sia": ""} + +AR_DTIDEN = {"sia": ""} + +AR_DTINI1 = {"sia": ""} + +AR_DTINI2 = {"sia": ""} + +AR_DTINI3 = {"sia": ""} + +AR_DTINTR = {"sia": ""} + +AR_ESTADI = {"sia": ""} + +AR_FIMAR1 = {"sia": ""} + +AR_FIMAR2 = {"sia": ""} + +AR_FIMAR3 = {"sia": ""} + +AR_FINALI = {"sia": ""} + +AR_GRAHIS = {"sia": ""} + +AR_INIAR1 = {"sia": ""} + +AR_INIAR2 = {"sia": ""} + +AR_INIAR3 = {"sia": ""} + +AR_LINFIN = {"sia": ""} + +AR_NUMC1 = {"sia": ""} + +AR_NUMC2 = {"sia": ""} + +AR_NUMC3 = {"sia": ""} + +AR_SMRD = {"sia": ""} + +AR_TRANTE = {"sia": ""} + +ASBESTO = {"sinan": ""} + +ASCITE = {"sinan": ""} + +ASMA = {"sinan": ""} + +ASSENTAD = {"cnes": ""} + +ASSINTOM = {"sinan": ""} + +ASSINTOMA = {"sinan": ""} + +ASSINTOMAT = {"sinan": ""} + +ASSISTMED = {"sim": ""} + +ASSIST_SOC = {"sinan": ""} + +ASTENIA = {"sinan": ""} + +ATD_ACEVAS = {"sia": ""} + +ATD_ALBUMI = {"sia": ""} + +ATD_CARACT = {"sia": ""} + +ATD_DTCLI = {"sia": ""} + +ATD_DTPDR = {"sia": ""} + +ATD_FOSFOR = {"sia": ""} + +ATD_HB = {"sia": ""} + +ATD_HBSAG = {"sia": ""} + +ATD_HCV = {"sia": ""} + +ATD_HIV = {"sia": ""} + +ATD_INTERC = {"sia": ""} + +ATD_KTVSEM = {"sia": ""} + +ATD_MAISNE = {"sia": ""} + +ATD_PTH = {"sia": ""} + +ATD_SEAPTO = {"sia": ""} + +ATD_SEPERI = {"sia": ""} + +ATD_SITINI = {"sia": ""} + +ATD_SITTRA = {"sia": ""} + +ATD_TRU = {"sia": ""} + +ATENDAMB = {"cnes": ""} + +ATENDE_MED = {"sinan": ""} + +ATENDHOS = {"cnes": ""} + +ATENDIMENT = {"sinan": ""} + +ATEND_MULH = {"sinan": ""} + +ATEND_PR = {"cnes": ""} + +ATESTADO = {"sim": ""} + +ATESTANTE = {"sim": ""} + +ATE_DT_ALT = {"sinan": ""} + +ATE_DT_INT = {"sinan": ""} + +ATE_HIPOTE = {"sinan": ""} + +ATE_HOSP = {"sinan": ""} + +ATE_HOSPIT = {"sinan": ""} + +ATE_INTERN = {"sinan": ""} + +ATE_MUNICI = {"sinan": ""} + +ATE_UF = {"sinan": ""} + +ATE_UF_HOS = {"sinan": ""} + +ATE_UF_INT = {"sinan": ""} + +ATIVIDAD = {"cnes": ""} + +ATIVIDA_1 = {"sinan": ""} + +ATIVIDA_2 = {"sinan": ""} + +ATIVIDA_3 = {"sinan": ""} + +AT_ATIVIDA = {"sinan": ""} + +AT_LAMINA = {"sinan": ""} + +AT_SINTOMA = {"sinan": ""} + +AUDITIVA = {"sinan": ""} + +AUD_JUST = {"sih": ""} + +AUMENTO = {"sinan": ""} + +AUTORIZ = {"sia": ""} + +AUTOR_ALCO = {"sinan": ""} + +AUTOR_SEXO = {"sinan": ""} + +AUTO_IMUNE = {"sinan": ""} + +AVALIA_N = {"sinan": ""} + +AVAL_ATU_N = {"sinan": ""} + +AVENTAL = {"sinan": ""} + +AV_ACRED = {"cnes": ""} + +AV_PNASS = {"cnes": ""} + +AZT3TC = {"sinan": ""} + +AZT3TC_IND = {"sinan": ""} + +AZT3TC_NFV = {"sinan": ""} + +BACILOSCOP = {"sinan": ""} + +BACILOSC_1 = {"sinan": ""} + +BACILOSC_2 = {"sinan": ""} + +BACILOSC_3 = {"sinan": ""} + +BACILOSC_4 = {"sinan": ""} + +BACILOSC_5 = {"sinan": ""} + +BACILOSC_6 = {"sinan": ""} + +BACILOSC_E = {"sinan": ""} + +BACILOSC_O = {"sinan": ""} + +BACILOS_E2 = {"sinan": ""} + +BACO = {"sinan": ""} + +BACTERIA = {"sinan": ""} + +BAC_APOS_6 = {"sinan": ""} + +BAIRES = {"sim": ""} + +BAIRRO_MAE = {"sinasc": ""} + +BANCOSANGU = {"sinan": ""} + +BENEF_GOV = {"sinan": ""} + +BENZENO = {"sinan": ""} + +BERILIO = {"sinan": ""} + +BIOPSIA = {"sinan": ""} + +BIOSSEG = {"sinan": ""} + +BLOCOPER = {"cnes": ""} + +BLOQUEIO = {"sinan": ""} + +BOTA = {"sinan": ""} + +BOVINO = {"sinan": ""} + +BUSCA_ATIV = {"sinan": ""} + +CABECA = {"sinan": ""} + +CADMIO = {"sinan": ""} + +CALAFRIO = {"sinan": ""} + +CANCER = {"sinan": ""} + +CAO_GATO = {"sinan": ""} + +CAPES = {"sinan": ""} + +CAPIVARA = {"sinan": ""} + +CARACTER = {"cnes": ""} + +CARDIOPATI = {"sinan": ""} + +CARRAPATO = {"sinan": ""} + +CARTORIO = { + "sim": "", + "sinasc": "", +} + +CARVAO = {"sinan": ""} + +CAR_INT = { + "ciha": "", + "sih": "", +} + +CASO = {"sinan": ""} + +CASO_ISOLA = {"sinan": ""} + +CAT = {"sinan": ""} + +CATARATA = {"sinan": ""} + +CATEND = {"sia": ""} + +CAUSABAS = {"sim": ""} + +CAUSABAS_O = {"sim": ""} + +CAUSAMAT = {"sim": ""} + +CBO = {"cnes": ""} + +CBOPROF = {"sia": ""} + +CBOR = {"sih": ""} + +CBOUNICO = {"cnes": ""} + +CB_PRE = {"sim": ""} + +CD_OUTRO = {"sinan": ""} + +CEFALEIA = {"sinan": ""} + +CENTRCIR = {"cnes": ""} + +CENTRNEO = {"cnes": ""} + +CENTROBS = {"cnes": ""} + +CEP = {"sih": ""} + +CGC_CONSOR = {"ciha": ""} + +CGC_HOSP = { + "ciha": "", + "sih": "", +} + +CGC_MANT = {"sih": ""} + +CHAGOMA = {"sinan": ""} + +CHOQUE = {"sinan": ""} + +CICL_VID = {"sinan": ""} + +CIDASSOC = {"sia": ""} + +CIDPRI = {"sia": ""} + +CID_ACID = {"sinan": ""} + +CID_ASSO = {"sih": ""} + +CID_LESAO = {"sinan": ""} + +CID_MORTE = {"sih": ""} + +CID_NOTIF = {"sih": ""} + +CIRCOBITO = {"sim": ""} + +CIRCUNSTAN = {"sinan": ""} + +CIRCUN_DES = {"sinan": ""} + +CIRC_LESAO = {"sinan": ""} + +CIRURGIA = {"sim": ""} + +CIRURGICO = {"sinan": ""} + +CLASAVAL = {"cnes": ""} + +CLASSATUAL = {"sinan": ""} + +CLASSI_FIN = {"sinan": ""} + +CLASSOPERA = {"sinan": ""} + +CLASS_SR = {"cnes": ""} + +CLAS_FORMA = {"sinan": ""} + +CLAS_TIPO_ = {"sinan": ""} + +CLA_ME_ASS = {"sinan": ""} + +CLA_ME_BAC = {"sinan": ""} + +CLA_ME_ETI = {"sinan": ""} + +CLA_SOROGR = {"sinan": ""} + +CLA_TIPO_N = {"sinan": ""} + +CLICDCCA_N = {"sinan": ""} + +CLIENTEL = {"cnes": ""} + +CLINC_CHIK = {"sinan": ""} + +CLIND = {"sinan": ""} + +CLINDI = {"sinan": ""} + +CLI_ABAULA = {"sinan": ""} + +CLI_ABDOMI = {"sinan": ""} + +CLI_AGUDA = {"sinan": ""} + +CLI_AMIGDA = {"sinan": ""} + +CLI_ANEMIA = {"sinan": ""} + +CLI_ANGUST = {"sinan": ""} + +CLI_AQ_D_N = {"sinan": ""} + +CLI_AQ_E_N = {"sinan": ""} + +CLI_ARRITM = {"sinan": ""} + +CLI_ASCEND = {"sinan": ""} + +CLI_ASSIME = {"sinan": ""} + +CLI_ASTENI = {"sinan": ""} + +CLI_A_FMID = {"sinan": ""} + +CLI_A_FMIE = {"sinan": ""} + +CLI_A_FMSD = {"sinan": ""} + +CLI_A_FMSE = {"sinan": ""} + +CLI_A_SMID = {"sinan": ""} + +CLI_A_SMIE = {"sinan": ""} + +CLI_A_SMSD = {"sinan": ""} + +CLI_A_SMSE = {"sinan": ""} + +CLI_A_S_FA = {"sinan": ""} + +CLI_A_TMID = {"sinan": ""} + +CLI_A_TMIE = {"sinan": ""} + +CLI_A_TMSD = {"sinan": ""} + +CLI_A_TMSE = {"sinan": ""} + +CLI_A_T_CE = {"sinan": ""} + +CLI_A_T_FA = {"sinan": ""} + +CLI_BICD_N = {"sinan": ""} + +CLI_BICE_N = {"sinan": ""} + +CLI_BRUDZ = {"sinan": ""} + +CLI_CANDIA = {"sinan": ""} + +CLI_CARDIA = {"sinan": ""} + +CLI_CAVIDA = {"sinan": ""} + +CLI_CDCCRE = {"sinan": ""} + +CLI_CDCLIH = {"sinan": ""} + +CLI_CDC_CI = {"sinan": ""} + +CLI_CDC_CR = {"sinan": ""} + +CLI_CDC_EN = {"sinan": ""} + +CLI_CDC_GE = {"sinan": ""} + +CLI_CDC_HE = {"sinan": ""} + +CLI_CDC_HI = {"sinan": ""} + +CLI_CDC_IN = {"sinan": ""} + +CLI_CDC_IS = {"sinan": ""} + +CLI_CDC_LE = {"sinan": ""} + +CLI_CDC_LI = {"sinan": ""} + +CLI_CDC_ME = {"sinan": ""} + +CLI_CDC_MI = {"sinan": ""} + +CLI_CDC_PC = {"sinan": ""} + +CLI_CDC_PN = {"sinan": ""} + +CLI_CDC_SA = {"sinan": ""} + +CLI_CDC_SI = {"sinan": ""} + +CLI_CDC_SK = {"sinan": ""} + +CLI_CDC_TO = {"sinan": ""} + +CLI_CEFALE = {"sinan": ""} + +CLI_CERVIC = {"sinan": ""} + +CLI_CHOQUE = {"sinan": ""} + +CLI_CICATR = {"sinan": ""} + +CLI_COMA = {"sinan": ""} + +CLI_CONDUT = {"sinan": ""} + +CLI_CONGES = {"sinan": ""} + +CLI_CONJUN = {"sinan": ""} + +CLI_CONTAT = {"sinan": ""} + +CLI_CONVUL = {"sinan": ""} + +CLI_CON_ES = {"sinan": ""} + +CLI_CORDAO = {"sinan": ""} + +CLI_CO_HIV = {"sinan": ""} + +CLI_CRONIC = {"sinan": ""} + +CLI_CUTANE = {"sinan": ""} + +CLI_CUT_DI = {"sinan": ""} + +CLI_DERMA = {"sinan": ""} + +CLI_DESCEN = {"sinan": ""} + +CLI_DESC_O = {"sinan": ""} + +CLI_DIARRE = {"sinan": ""} + +CLI_DISPNE = {"sinan": ""} + +CLI_DISSEM = {"sinan": ""} + +CLI_DOR = {"sinan": ""} + +CLI_DORES = {"sinan": ""} + +CLI_DT = {"sinan": ""} + +CLI_DT_ATE = {"sinan": ""} + +CLI_DT_EXA = {"sinan": ""} + +CLI_EDEMA = {"sinan": ""} + +CLI_EDEMAG = {"sinan": ""} + +CLI_EQUIMO = {"sinan": ""} + +CLI_ESPECI = {"sinan": ""} + +CLI_ESPLEN = {"sinan": ""} + +CLI_EXT_D = {"sinan": ""} + +CLI_EXT_E = {"sinan": ""} + +CLI_FACE = {"sinan": ""} + +CLI_FARING = {"sinan": ""} + +CLI_FEBRE = {"sinan": ""} + +CLI_FLACID = {"sinan": ""} + +CLI_FLE_D = {"sinan": ""} + +CLI_FLE_E = {"sinan": ""} + +CLI_F_MID = {"sinan": ""} + +CLI_F_MIE = {"sinan": ""} + +CLI_F_MSD = {"sinan": ""} + +CLI_F_MSE = {"sinan": ""} + +CLI_GARGAN = {"sinan": ""} + +CLI_H = {"sinan": ""} + +CLI_HEMO = {"sinan": ""} + +CLI_HEMOPU = {"sinan": ""} + +CLI_HEMORR = {"sinan": ""} + +CLI_HEPATI = {"sinan": ""} + +CLI_HEPATO = {"sinan": ""} + +CLI_HERPEG = {"sinan": ""} + +CLI_HERPES = {"sinan": ""} + +CLI_HIPOTE = {"sinan": ""} + +CLI_H_DESC = {"sinan": ""} + +CLI_ICTERI = {"sinan": ""} + +CLI_INFCIT = {"sinan": ""} + +CLI_INJECA = {"sinan": ""} + +CLI_KERNIG = {"sinan": ""} + +CLI_LARING = {"sinan": ""} + +CLI_LEIOMI = {"sinan": ""} + +CLI_LINFA = {"sinan": ""} + +CLI_LINFO = {"sinan": ""} + +CLI_LOCAL = {"sinan": ""} + +CLI_LOCAL_ = {"sinan": ""} + +CLI_LOCA_1 = {"sinan": ""} + +CLI_LOMBAR = {"sinan": ""} + +CLI_MENING = {"sinan": ""} + +CLI_MIALGI = {"sinan": ""} + +CLI_MIAL_D = {"sinan": ""} + +CLI_MIAL_G = {"sinan": ""} + +CLI_MIOCAR = {"sinan": ""} + +CLI_MIOLIT = {"sinan": ""} + +CLI_MUCOSA = {"sinan": ""} + +CLI_MUNICI = {"sinan": ""} + +CLI_NECROS = {"sinan": ""} + +CLI_NEFRIT = {"sinan": ""} + +CLI_NEFRO = {"sinan": ""} + +CLI_NEURO = {"sinan": ""} + +CLI_NEUROL = {"sinan": ""} + +CLI_NOCAR = {"sinan": ""} + +CLI_NUCA = {"sinan": ""} + +CLI_OBSTIP = {"sinan": ""} + +CLI_ORGAOS = {"sinan": ""} + +CLI_OSTEO = {"sinan": ""} + +CLI_OTITE = {"sinan": ""} + +CLI_OTRDES = {"sinan": ""} + +CLI_OUTRAS = {"sinan": ""} + +CLI_OUTRO = {"sinan": ""} + +CLI_OUTROS = {"sinan": ""} + +CLI_OUTR_2 = {"sinan": ""} + +CLI_OUTR_3 = {"sinan": ""} + +CLI_OUT_D = {"sinan": ""} + +CLI_PALATO = {"sinan": ""} + +CLI_PALIDE = {"sinan": ""} + +CLI_PANTUR = {"sinan": ""} + +CLI_PARALB = {"sinan": ""} + +CLI_PARALM = {"sinan": ""} + +CLI_PARALP = {"sinan": ""} + +CLI_PAROTI = {"sinan": ""} + +CLI_PATD_N = {"sinan": ""} + +CLI_PATE_N = {"sinan": ""} + +CLI_PELE = {"sinan": ""} + +CLI_PESCOC = {"sinan": ""} + +CLI_PETEQU = {"sinan": ""} + +CLI_PROGRE = {"sinan": ""} + +CLI_PROST = {"sinan": ""} + +CLI_PROSTR = {"sinan": ""} + +CLI_PSEUDO = {"sinan": ""} + +CLI_PULMAO = {"sinan": ""} + +CLI_RENAL = {"sinan": ""} + +CLI_RESPI = {"sinan": ""} + +CLI_RESPIR = {"sinan": ""} + +CLI_RIGIDE = {"sinan": ""} + +CLI_RINITE = {"sinan": ""} + +CLI_RINORR = {"sinan": ""} + +CLI_SINTOM = {"sinan": ""} + +CLI_TEMPER = {"sinan": ""} + +CLI_TEMPO_ = {"sinan": ""} + +CLI_TONTUR = {"sinan": ""} + +CLI_TORACI = {"sinan": ""} + +CLI_TOSSE = {"sinan": ""} + +CLI_TOX1M = {"sinan": ""} + +CLI_TRAQUE = {"sinan": ""} + +CLI_TRID_N = {"sinan": ""} + +CLI_TRIE_N = {"sinan": ""} + +CLI_TUBERC = {"sinan": ""} + +CLI_TUPULM = {"sinan": ""} + +CLI_VAGAIS = {"sinan": ""} + +CLI_VARICE = {"sinan": ""} + +CLI_VOMITO = {"sinan": ""} + +CLORAFEN = {"sinan": ""} + +CLOROQ = {"sinan": ""} + +CLOROQI = {"sinan": ""} + +CMPT = {"sih": ""} + +CMPT_FIM = {"cnes": ""} + +CMPT_INI = {"cnes": ""} + +CNAE = {"sinan": ""} + +CNAER = {"sih": ""} + +CNAE_PRIN = {"sinan": ""} + +CNES = { + "ciha": "", + "cnes": "", + "sih": "", +} + +CNESTERC = {"cnes": ""} + +CNES_ESF = {"sia": ""} + +CNES_EXEC = {"sia": ""} + +CNPJCPF = {"sia": ""} + +CNPJMNT = {"sia": ""} + +CNPJ_CC = {"sia": ""} + +CNPJ_MAN = {"cnes": ""} + +CNPJ_MANT = {"sih": ""} + +CNSPROF = {"sia": ""} + +CNS_ADM = {"cnes": ""} + +CNS_CONC = {"cnes": ""} + +CNS_CRES = {"cnes": ""} + +CNS_FNUC = {"cnes": ""} + +CNS_HMTL = {"cnes": ""} + +CNS_HMTR = {"cnes": ""} + +CNS_MRAD = {"cnes": ""} + +CNS_NEFR = {"cnes": ""} + +CNS_OCLIN = {"cnes": ""} + +CNS_OPED = {"cnes": ""} + +CNS_PAC = {"sia": ""} + +CNS_PROF = {"cnes": ""} + +CNS_RTEC = {"cnes": ""} + +COAGTOXMA1 = {"sinan": ""} + +COAGTOXMA2 = {"sinan": ""} + +COAGTOXMA3 = {"sinan": ""} + +COBERT = {"pni": ""} + +COBRANCA = { + "ciha": "", + "sih": "", +} + +COB_ESF = {"sia": ""} + +CODANOMAL = {"sinasc": ""} + +CODBAINASC = {"sinasc": ""} + +CODBAIOCOR = {"sim": ""} + +CODBAIRES = { + "sim": "", + "sinasc": "", +} + +CODCART = { + "sim": "", + "sinasc": "", +} + +CODEQUIP = {"cnes": ""} + +CODESTAB = { + "sim": "", + "sinasc": "", +} + +CODIFICADO = {"sim": ""} + +CODIGO = { + "sim": "", + "sinasc": "", +} + +CODINST = {"sinasc": ""} + +CODISINF = {"sinan": ""} + +CODLEITO = {"cnes": ""} + +CODMUNCART = { + "sim": "", + "sinasc": "", +} + +CODMUNNASC = {"sinasc": ""} + +CODMUNNATU = { + "sim": "", + "sinasc": "", +} + +CODMUNOCOR = {"sim": ""} + +CODMUNRES = { + "sim": "", + "sinasc": "", +} + +CODOCUPMAE = {"sinasc": ""} + +CODPAISRES = {"sinasc": ""} + +CODUFMUN = {"cnes": ""} + +CODUFNATU = {"sinasc": ""} + +CODUNI = {"sia": ""} + +COD_ARQ = {"sih": ""} + +COD_CEP = {"cnes": ""} + +COD_IDADE = { + "ciha": "", + "sih": "", +} + +COD_IR = {"cnes": ""} + +COD_MUN_HO = {"sinan": ""} + +COD_SEG = {"sih": ""} + +COD_UF_HOS = {"sinan": ""} + +COLETAMARC = {"sinan": ""} + +COLETIVA = {"sinan": ""} + +COLETRES = {"cnes": ""} + +COLET_COMU = {"sinan": ""} + +COMA = {"sinan": ""} + +COMISS01 = {"cnes": ""} + +COMISS02 = {"cnes": ""} + +COMISS03 = {"cnes": ""} + +COMISS04 = {"cnes": ""} + +COMISS05 = {"cnes": ""} + +COMISS06 = {"cnes": ""} + +COMISS07 = {"cnes": ""} + +COMISS08 = {"cnes": ""} + +COMISS09 = {"cnes": ""} + +COMISS10 = {"cnes": ""} + +COMISS11 = {"cnes": ""} + +COMISS12 = {"cnes": ""} + +COMISSAO = {"cnes": ""} + +COMPET = {"sih": ""} + +COMPETEN = {"cnes": ""} + +COMPLEX = { + "sia": "", + "sih": "", +} + +COMPLICA = {"sinan": ""} + +COMP_OUT = {"sinan": ""} + +COMP_OUT_D = {"sinan": ""} + +COMUNHOSP = {"sinan": ""} + +COMUNINF = {"sinan": ""} + +COMUNSVOIM = {"sim": ""} + +COM_APUTAC = {"sinan": ""} + +COM_CHOQUE = {"sinan": ""} + +COM_COMPOR = {"sinan": ""} + +COM_DEFICT = {"sinan": ""} + +COM_EDEMA = {"sinan": ""} + +COM_LOC = {"sinan": ""} + +COM_NECROS = {"sinan": ""} + +COM_PEST = {"sinan": ""} + +COM_RENAL = {"sinan": ""} + +COM_SECUND = {"sinan": ""} + +COM_SEPTIC = {"sinan": ""} + +COM_SISTEM = {"sinan": ""} + +CONDIC = {"sia": ""} + +CONDIC_ANI = {"sinan": ""} + +CONDUTA = {"sinan": ""} + +CONDUTA_DE = {"sinan": ""} + +CONDUT_DES = {"sinan": ""} + +CONFIRMA = {"sinan": ""} + +CONFIRMAD = {"sinan": ""} + +CONFPESO = {"sinasc": ""} + +CONF_INF_M = {"sinan": ""} + +CONF_INF_U = {"sinan": ""} + +CONF_MAS = {"cnes": ""} + +CONJUNTVIT = {"sinan": ""} + +CONSELHO = {"cnes": ""} + +CONSPRENAT = {"sinasc": ""} + +CONSTIPA = {"sinan": ""} + +CONSULTAS = {"sinasc": ""} + +CONS_ABORT = {"sinan": ""} + +CONS_COMP = {"sinan": ""} + +CONS_DST = {"sinan": ""} + +CONS_ESPEC = {"sinan": ""} + +CONS_ESTRE = {"sinan": ""} + +CONS_GRAV = {"sinan": ""} + +CONS_IDO = {"sinan": ""} + +CONS_MENT = {"sinan": ""} + +CONS_OUTR = {"sinan": ""} + +CONS_SUIC = {"sinan": ""} + +CONS_TUTEL = {"sinan": ""} + +CONT = {"sih": ""} + +CONTADOR = { + "sim": "", + "sinasc": "", +} + +CONTATO = {"sinan": ""} + +CONTEXAM = {"sinan": ""} + +CONTRACEP1 = {"sih": ""} + +CONTRACEP2 = {"sih": ""} + +CONTRATE = {"cnes": ""} + +CONTRATM = {"cnes": ""} + +CONTREG = {"sinan": ""} + +CONTROLE = {"sinan": ""} + +CONTSRVU = {"cnes": ""} + +CONT_OUT = {"sinan": ""} + +CONVULSAO = {"sinan": ""} + +CON_ALIMEN = {"sinan": ""} + +CON_AMBIEN = {"sinan": ""} + +CON_AMB_DE = {"sinan": ""} + +CON_ANIMAI = {"sinan": ""} + +CON_AREA = {"sinan": ""} + +CON_AUTOPS = {"sinan": ""} + +CON_AUTO_M = {"sinan": ""} + +CON_AUTO_U = {"sinan": ""} + +CON_CLASSI = {"sinan": ""} + +CON_CLASS_ = {"sinan": ""} + +CON_CLAS_E = {"sinan": ""} + +CON_CONFIR = {"sinan": ""} + +CON_CRITER = {"sinan": ""} + +CON_DESCAR = {"sinan": ""} + +CON_DIAGES = {"sinan": ""} + +CON_DIAGNO = {"sinan": ""} + +CON_DIAG_D = {"sinan": ""} + +CON_DOENCA = {"sinan": ""} + +CON_DT_ENC = {"sinan": ""} + +CON_DT_OBI = {"sinan": ""} + +CON_ENCHEN = {"sinan": ""} + +CON_ENTULH = {"sinan": ""} + +CON_ESGOTO = {"sinan": ""} + +CON_EVOLUC = {"sinan": ""} + +CON_FHD = {"sinan": ""} + +CON_FORMA = {"sinan": ""} + +CON_GRAVID = {"sinan": ""} + +CON_IMPORT = {"sinan": ""} + +CON_INFECC = {"sinan": ""} + +CON_INF_BA = {"sinan": ""} + +CON_INF_DI = {"sinan": ""} + +CON_INF_MU = {"sinan": ""} + +CON_INF_OU = {"sinan": ""} + +CON_INF_PA = {"sinan": ""} + +CON_INF_UF = {"sinan": ""} + +CON_LOCAL = {"sinan": ""} + +CON_LOCAL2 = {"sinan": ""} + +CON_LOCALI = {"sinan": ""} + +CON_MUNICI = {"sinan": ""} + +CON_OUTRA = {"sinan": ""} + +CON_PAIS = {"sinan": ""} + +CON_PROVAV = {"sinan": ""} + +CON_RIO = {"sinan": ""} + +CON_ROEDOR = {"sinan": ""} + +CON_SOROTE = {"sinan": ""} + +CON_TERREN = {"sinan": ""} + +CON_TRIAT = {"sinan": ""} + +CON_UF = {"sinan": ""} + +COPAISINF = {"sinan": ""} + +COPRO_D_1 = {"sinan": ""} + +COPRO_D_2 = {"sinan": ""} + +COPRO_D_3 = {"sinan": ""} + +COPRO_R1 = {"sinan": ""} + +COPRO_R2 = {"sinan": ""} + +COPRO_R3 = {"sinan": ""} + +CORRACA = {"ibge": ""} + +COUFHOSP = {"sinan": ""} + +COUFINF = {"sinan": ""} + +COUNIDINF = {"sinan": ""} + +CO_AGENC = {"cnes": ""} + +CO_BANCO = {"cnes": ""} + +CO_CIDPRIM = {"sia": ""} + +CO_CIDSEC = {"sia": ""} + +CO_ERRO = {"sih": ""} + +CO_FOCAL = {"sinan": ""} + +CO_INE = {"sia": ""} + +CO_MUN_EX2 = {"sinan": ""} + +CO_MUN_EX3 = {"sinan": ""} + +CO_MUN_EXP = {"sinan": ""} + +CO_MUN_R1 = {"sinan": ""} + +CO_MUN_R2 = {"sinan": ""} + +CO_MUN_R3 = {"sinan": ""} + +CO_MUN_R4 = {"sinan": ""} + +CO_PAIS_1 = {"sinan": ""} + +CO_PAIS_2 = {"sinan": ""} + +CO_PAIS_3 = {"sinan": ""} + +CO_RISCO = {"sinan": ""} + +CO_UF_1 = {"sinan": ""} + +CO_UF_2 = {"sinan": ""} + +CO_UF_3 = {"sinan": ""} + +CO_UF_DES1 = {"sinan": ""} + +CO_UF_DES2 = {"sinan": ""} + +CO_UF_DES3 = {"sinan": ""} + +CO_UF_EX2 = {"sinan": ""} + +CO_UF_EX3 = {"sinan": ""} + +CO_UF_EXP = {"sinan": ""} + +CO_UF_R1 = {"sinan": ""} + +CO_UF_R2 = {"sinan": ""} + +CO_UF_R3 = {"sinan": ""} + +CO_UF_R4 = {"sinan": ""} + +CPFUNICO = {"cnes": ""} + +CPF_AUT = {"sih": ""} + +CPF_CNPJ = {"cnes": ""} + +CPF_PROF = {"cnes": ""} + +CPF_UNICO = {"cnes": ""} + +CRITERIO = {"sinan": ""} + +CRITICA = { + "sim": "", + "sinasc": "", +} + +CRI_1000 = {"sinan": ""} + +CRI_1500 = {"sinan": ""} + +CRI_500 = {"sinan": ""} + +CRM = {"sim": ""} + +CROMO = {"sinan": ""} + +CRSOCOR = {"sim": ""} + +CRSRES = {"sim": ""} + +CRS_MAE = {"sinasc": ""} + +CRS_OCOR = {"sinasc": ""} + +CS_ABDOMEN = {"sinan": ""} + +CS_ABDOMIN = {"sinan": ""} + +CS_ANALISE = {"sinan": ""} + +CS_ANTIB = {"sinan": ""} + +CS_ANTIBIO = {"sinan": ""} + +CS_ANTIB_T = {"sinan": ""} + +CS_APNEIA = {"sinan": ""} + +CS_ASSINTO = {"sinan": ""} + +CS_ATEND_N = {"sinan": ""} + +CS_BUSCAAT = {"sinan": ""} + +CS_CADASTR = {"sinan": ""} + +CS_CAIMBRA = {"sinan": ""} + +CS_CHOQUE = {"sinan": ""} + +CS_CHORO = {"sinan": ""} + +CS_CIANOSE = {"sinan": ""} + +CS_COBERTU = {"sinan": ""} + +CS_COLETA = {"sinan": ""} + +CS_CRISE = {"sinan": ""} + +CS_CRISES = {"sinan": ""} + +CS_CULTURA = {"sinan": ""} + +CS_DESCART = {"sinan": ""} + +CS_DESIT = {"sinan": ""} + +CS_DESITRA = {"sinan": ""} + +CS_DESNUTR = {"sinan": ""} + +CS_DIARRE = {"sinan": ""} + +CS_DIVULGA = {"sinan": ""} + +CS_DOR = {"sinan": ""} + +CS_ENCEFAL = {"sinan": ""} + +CS_ESCOLAR = {"sinan": ""} + +CS_ESCOL_N = {"sinan": ""} + +CS_FEBRE = {"sinan": ""} + +CS_FLXRET = {"sinan": ""} + +CS_FONTE = {"sinan": ""} + +CS_FREQUEN = {"sinan": ""} + +CS_GESTANT = {"sinan": ""} + +CS_HOSPITA = {"sinan": ""} + +CS_INF_COT = {"sinan": ""} + +CS_INQUERI = {"sinan": ""} + +CS_LIQUOR = {"sinan": ""} + +CS_LOCAL = {"sinan": ""} + +CS_MAMAR = {"sinan": ""} + +CS_MATERIA = {"sinan": ""} + +CS_MEMBROS = {"sinan": ""} + +CS_MENING = {"sinan": ""} + +CS_MUCO = {"sinan": ""} + +CS_NASCIDO = {"sinan": ""} + +CS_NEG_ESP = {"sinan": ""} + +CS_NUCA = {"sinan": ""} + +CS_OPISTOT = {"sinan": ""} + +CS_ORIENTA = {"sinan": ""} + +CS_ORIGEM = {"sinan": ""} + +CS_OTITE = {"sinan": ""} + +CS_OUTRAS = {"sinan": ""} + +CS_OUTROS = {"sinan": ""} + +CS_OUT_COM = {"sinan": ""} + +CS_OUT_SIN = {"sinan": ""} + +CS_PNEUMON = {"sinan": ""} + +CS_POSITIV = {"sinan": ""} + +CS_RACA = {"sinan": ""} + +CS_REIDRAT = {"sinan": ""} + +CS_RESULTA = {"sinan": ""} + +CS_RISO = {"sinan": ""} + +CS_SANGUE = {"sinan": ""} + +CS_SECRECA = {"sinan": ""} + +CS_SEXO = {"sinan": ""} + +CS_SIN_OUT = {"sinan": ""} + +CS_SUGOU = {"sinan": ""} + +CS_SUSPEIT = {"sinan": ""} + +CS_TEMP37 = {"sinan": ""} + +CS_TEMP_38 = {"sinan": ""} + +CS_TIPO = {"sinan": ""} + +CS_TOSSE_E = {"sinan": ""} + +CS_TOSSE_P = {"sinan": ""} + +CS_TRANS = {"sinan": ""} + +CS_TRISMO = {"sinan": ""} + +CS_URINA = {"sinan": ""} + +CS_VACINA = {"sinan": ""} + +CS_VACINAC = {"sinan": ""} + +CS_VACINAL = {"sinan": ""} + +CS_VACTETA = {"sinan": ""} + +CS_VAC_N = {"sinan": ""} + +CS_VOMITO = {"sinan": ""} + +CS_VOMITOS = {"sinan": ""} + +CS_ZONA = {"sinan": ""} + +CULTURA_ES = {"sinan": ""} + +CULTURA_OU = {"sinan": ""} + +C_CORREN = {"cnes": ""} + +C_D = {"sinan": ""} + +C_M = {"sinan": ""} + +DATANASC = {"sim": ""} + +DATAOBITO = {"sim": ""} + +DATAREG = {"sim": ""} + +DATA_CART = {"sinasc": ""} + +DATA_NASC = {"sinasc": ""} + +DE15A39ANO = {"sinan": ""} + +DE5A14ANOS = {"sinan": ""} + +DEFEN_PUBL = {"sinan": ""} + +DEF_AUDITI = {"sinan": ""} + +DEF_DIAGNO = {"sinan": ""} + +DEF_ESPEC = {"sinan": ""} + +DEF_FISICA = {"sinan": ""} + +DEF_MENTAL = {"sinan": ""} + +DEF_OUT = {"sinan": ""} + +DEF_TRANS = {"sinan": ""} + +DEF_VISUAL = {"sinan": ""} + +DEIONIZA = {"cnes": ""} + +DELEG = {"sinan": ""} + +DELEG_CRIA = {"sinan": ""} + +DELEG_IDOS = {"sinan": ""} + +DELEG_MULH = {"sinan": ""} + +DENCRIREND = {"ibge": ""} + +DENDESOCUP = {"ibge": ""} + +DENGUE = {"sinan": ""} + +DENRENDA = {"ibge": ""} + +DENTARIO = {"sinan": ""} + +DENTRABINF = {"ibge": ""} + +DESCSEGM = {"cnes": ""} + +DESMATA_N = {"sinan": ""} + +DESTINOPAC = {"sia": ""} + +DEXAME = {"sinan": ""} + +DG_OUT_N = {"sinan": ""} + +DIABETES = {"sinan": ""} + +DIAGNO_LAB = {"sinan": ""} + +DIAGSEC1 = {"sih": ""} + +DIAGSEC2 = {"sih": ""} + +DIAGSEC3 = {"sih": ""} + +DIAGSEC4 = {"sih": ""} + +DIAGSEC5 = {"sih": ""} + +DIAGSEC6 = {"sih": ""} + +DIAGSEC7 = {"sih": ""} + +DIAGSEC8 = {"sih": ""} + +DIAGSEC9 = {"sih": ""} + +DIAG_CONF = {"sinan": ""} + +DIAG_DESCA = {"sinan": ""} + +DIAG_ESP = {"sinan": ""} + +DIAG_MAE = {"sinan": ""} + +DIAG_PARA = {"sinan": ""} + +DIAG_PAR_N = {"sinan": ""} + +DIAG_PRINC = { + "ciha": "", + "sih": "", +} + +DIAG_SEC = {"sih": ""} + +DIAG_SECUN = { + "ciha": "", + "sih": "", +} + +DIALISE = {"cnes": ""} + +DIARREIA = {"sinan": ""} + +DIAR_ACOM = {"sih": ""} + +DIAS = {"sinan": ""} + +DIAS_PERM = { + "ciha": "", + "sih": "", +} + +DIFDATA = { + "sim": "", + "sinasc": "", +} + +DIFER = {"pni": ""} + +DILACERANT = {"sinan": ""} + +DINTERNA = {"sinan": ""} + +DIR_HUMAN = {"sinan": ""} + +DISFAGIA = {"sinan": ""} + +DISTRADM = {"cnes": ""} + +DISTRSAN = {"cnes": ""} + +DOENCA_TRA = {"sinan": ""} + +DOMICILI = {"sinan": ""} + +DOR = {"sinan": ""} + +DORMIU_N = {"sinan": ""} + +DOR_COSTAS = {"sinan": ""} + +DOR_RETRO = {"sinan": ""} + +DOSAGEM = {"sinan": ""} + +DOSE = { + "pni": "", + "sinan": "", +} + +DOSE1 = {"pni": ""} + +DOSEN = {"pni": ""} + +DOSES = {"sinan": ""} + +DOSES_A = {"sinan": ""} + +DOSE_RECEB = {"sinan": ""} + +DOXOCI = {"sinan": ""} + +DROGA = {"sinan": ""} + +DROGAS = {"sinan": ""} + +DSALIMENTO = {"sinan": ""} + +DSCARDIOP = {"sinan": ""} + +DSCAUSALIM = {"sinan": ""} + +DSFONTE = {"sinan": ""} + +DSMOTIVO = {"sinan": ""} + +DSTITULO1 = {"sinan": ""} + +DSTRAESQUE = {"sinan": ""} + +DS_ALI1 = {"sinan": ""} + +DS_ALI1OUT = {"sinan": ""} + +DS_ALI2 = {"sinan": ""} + +DS_ALI2OUT = {"sinan": ""} + +DS_ESQUEMA = {"sinan": ""} + +DS_FIM_GES = {"sinan": ""} + +DS_FORMA = {"sinan": ""} + +DS_F_OUTRO = {"sinan": ""} + +DS_INDUS = {"sinan": ""} + +DS_INF_LOC = {"sinan": ""} + +DS_INF_OUT = {"sinan": ""} + +DS_INGEST = {"sinan": ""} + +DS_INI_GES = {"sinan": ""} + +DS_LOCAL1 = {"sinan": ""} + +DS_LOCAL2 = {"sinan": ""} + +DS_MUN_1 = {"sinan": ""} + +DS_MUN_2 = {"sinan": ""} + +DS_MUN_3 = {"sinan": ""} + +DS_OUTRO = {"sinan": ""} + +DS_OUTROSI = {"sinan": ""} + +DS_OUTR_LO = {"sinan": ""} + +DS_OUT_AMB = {"sinan": ""} + +DS_PARES = {"sinan": ""} + +DS_RESU_OU = {"sinan": ""} + +DS_TRANS1 = {"sinan": ""} + +DS_TRANS2 = {"sinan": ""} + +DS_TRANS3 = {"sinan": ""} + +DS_TRANS_1 = {"sinan": ""} + +DS_TRANS_2 = {"sinan": ""} + +DS_TRAT = {"sinan": ""} + +DTALTA = {"sinan": ""} + +DTALTA_N = {"sinan": ""} + +DTATEND = {"sinan": ""} + +DTATESTADO = {"sim": ""} + +DTCADASTRO = { + "sim": "", + "sinasc": "", +} + +DTCADINF = {"sim": ""} + +DTCADINV = {"sim": ""} + +DTCONCASO = {"sim": ""} + +DTCONFIRMA = {"sinan": ""} + +DTCONINV = {"sim": ""} + +DTDECLARAC = {"sinasc": ""} + +DTDIASINAC = {"sinan": ""} + +DTELETRO = {"sinan": ""} + +DTFEZESCOL = {"sinan": ""} + +DTIMUNO = {"sinan": ""} + +DTINICTRAT = {"sinan": ""} + +DTINTERNA = {"sinan": ""} + +DTINVESTIG = {"sim": ""} + +DTISOLA = {"sinan": ""} + +DTMICRO1 = {"sinan": ""} + +DTMICRO2 = {"sinan": ""} + +DTMUDESQ = {"sinan": ""} + +DTNASC = { + "sia": "", + "sim": "", + "sinasc": "", +} + +DTNASCMAE = {"sinasc": ""} + +DTOBITO = {"sim": ""} + +DTPORTAR = {"cnes": ""} + +DTPRICONS = {"sinan": ""} + +DTRAPIDO1 = {"sinan": ""} + +DTRATA = {"sinan": ""} + +DTRECEBIM = { + "sim": "", + "sinasc": "", +} + +DTRECORIG = { + "sim": "", + "sinasc": "", +} + +DTRECORIGA = { + "sim": "", + "sinasc": "", +} + +DTREGCART = { + "sim": "", + "sinasc": "", +} + +DTS1 = {"sinan": ""} + +DTS2 = {"sinan": ""} + +DTSORO = {"sinan": ""} + +DTSOROCOL = {"sinan": ""} + +DTSUSPEIC = {"sinan": ""} + +DTTESTE1 = {"sinan": ""} + +DTTRANSDM = {"sinan": ""} + +DTTRANSFU = {"sinan": ""} + +DTTRANSRM = {"sinan": ""} + +DTTRANSRS = {"sinan": ""} + +DTTRANSSE = {"sinan": ""} + +DTTRANSSM = {"sinan": ""} + +DTTRANSUS = {"sinan": ""} + +DTTRAT = {"sinan": ""} + +DTTRIAGEM = {"sinan": ""} + +DTULTCOMP = {"sinan": ""} + +DTULTMENST = {"sinasc": ""} + +DT_1VAC = {"sinan": ""} + +DT_1_DOSE = {"sinan": ""} + +DT_2VAC = {"sinan": ""} + +DT_2_DOSE = {"sinan": ""} + +DT_3_DOSE = {"sinan": ""} + +DT_ACID = {"sinan": ""} + +DT_ACIDENT = {"sinan": ""} + +DT_ACRED = {"cnes": ""} + +DT_ADM_ANT = {"sinan": ""} + +DT_ALI1COL = {"sinan": ""} + +DT_ALI2COL = {"sinan": ""} + +DT_ALRM = {"sinan": ""} + +DT_APLI_SO = {"sinan": ""} + +DT_ATEND = { + "ciha": "", + "sia": "", +} + +DT_ATENDE = {"sinan": ""} + +DT_ATENDIM = {"sinan": ""} + +DT_ATIVA = {"cnes": ""} + +DT_ATUAL = {"cnes": ""} + +DT_CATARRA = {"sinan": ""} + +DT_CHIK_S1 = {"sinan": ""} + +DT_CHIK_S2 = {"sinan": ""} + +DT_CHOQUE = {"sinan": ""} + +DT_COL1 = {"sinan": ""} + +DT_COL2 = {"sinan": ""} + +DT_COL3 = {"sinan": ""} + +DT_COLETA = {"sinan": ""} + +DT_COLOUT = {"sinan": ""} + +DT_COL_1 = {"sinan": ""} + +DT_COL_2 = {"sinan": ""} + +DT_COL_DIR = {"sinan": ""} + +DT_COL_HE2 = {"sinan": ""} + +DT_COL_HEM = {"sinan": ""} + +DT_COL_IGM = {"sinan": ""} + +DT_COL_IND = {"sinan": ""} + +DT_COL_PL2 = {"sinan": ""} + +DT_COL_PLQ = {"sinan": ""} + +DT_COL_S1 = {"sinan": ""} + +DT_COL_S2 = {"sinan": ""} + +DT_CONFIRM = {"sinan": ""} + +DT_COPRO = {"sinan": ""} + +DT_COPRO1 = {"sinan": ""} + +DT_COPRO2 = {"sinan": ""} + +DT_COPRO3 = {"sinan": ""} + +DT_DESAT = {"cnes": ""} + +DT_DESC1 = {"sinan": ""} + +DT_DESC2 = {"sinan": ""} + +DT_DESC3 = {"sinan": ""} + +DT_DESLC1 = {"sinan": ""} + +DT_DESLC2 = {"sinan": ""} + +DT_DESLC3 = {"sinan": ""} + +DT_DIAG = {"sinan": ""} + +DT_DIGITA = {"sinan": ""} + +DT_DOSE = {"sinan": ""} + +DT_DOSE_1 = {"sinan": ""} + +DT_DOSE_2 = {"sinan": ""} + +DT_DOSE_3 = {"sinan": ""} + +DT_DOSE_4 = {"sinan": ""} + +DT_DOSE_5 = {"sinan": ""} + +DT_DOSE_N = {"sinan": ""} + +DT_ENCERRA = {"sinan": ""} + +DT_ENVIO = {"sinan": ""} + +DT_EVOLUC = {"sinan": ""} + +DT_EXPED = {"cnes": ""} + +DT_EXPO = {"sinan": ""} + +DT_FEBRE = {"sinan": ""} + +DT_FEZES = {"sinan": ""} + +DT_FIM = {"sia": ""} + +DT_GRAV = {"sinan": ""} + +DT_HEMO1 = {"sinan": ""} + +DT_HEMO2 = {"sinan": ""} + +DT_HEMO3 = {"sinan": ""} + +DT_INICIO = {"sia": ""} + +DT_INICIO_ = {"sinan": ""} + +DT_INIC_TR = {"sinan": ""} + +DT_INI_EPI = {"sinan": ""} + +DT_INTER = {"sih": ""} + +DT_INTERNA = {"sinan": ""} + +DT_INVEST = {"sinan": ""} + +DT_LIQUOR = {"sinan": ""} + +DT_MOTCOB = {"sia": ""} + +DT_MUDANCA = {"sinan": ""} + +DT_NASC = {"sinan": ""} + +DT_NOTIFIC = {"sinan": ""} + +DT_NOTI_AT = {"sinan": ""} + +DT_NS1 = {"sinan": ""} + +DT_OBITO = {"sinan": ""} + +DT_OCOR = {"sinan": ""} + +DT_OUTR1 = {"sinan": ""} + +DT_OUTR2 = {"sinan": ""} + +DT_OUTR3 = {"sinan": ""} + +DT_PCR = {"sinan": ""} + +DT_PCR_1 = {"sinan": ""} + +DT_PCR_2 = {"sinan": ""} + +DT_PCR_3 = {"sinan": ""} + +DT_PNASS = {"cnes": ""} + +DT_PRNT = {"sinan": ""} + +DT_PROCESS = {"sia": ""} + +DT_PUBLE = {"cnes": ""} + +DT_PUBLM = {"cnes": ""} + +DT_RAPIDO = {"sinan": ""} + +DT_REFORCO = {"sinan": ""} + +DT_RESU3 = {"sinan": ""} + +DT_RISCO1 = {"sinan": ""} + +DT_RISCO2 = {"sinan": ""} + +DT_RISCO3 = {"sinan": ""} + +DT_RISCO4 = {"sinan": ""} + +DT_RTPCR = {"sinan": ""} + +DT_R_TRA = {"sinan": ""} + +DT_S1 = {"sinan": ""} + +DT_S2 = {"sinan": ""} + +DT_SAIDA = { + "ciha": "", + "sih": "", +} + +DT_SIN_PRI = {"sinan": ""} + +DT_SORO = {"sinan": ""} + +DT_SORO1 = {"sinan": ""} + +DT_SORO2 = {"sinan": ""} + +DT_SOROR1 = {"sinan": ""} + +DT_SOROR2 = {"sinan": ""} + +DT_TRANSDM = {"sinan": ""} + +DT_TRANSRM = {"sinan": ""} + +DT_TRANSRS = {"sinan": ""} + +DT_TRANSSE = {"sinan": ""} + +DT_TRANSSM = {"sinan": ""} + +DT_TRANSUS = {"sinan": ""} + +DT_TRIA_11 = {"sinan": ""} + +DT_TRISMO = {"sinan": ""} + +DT_TRNASRM = {"sinan": ""} + +DT_TRNASRS = {"sinan": ""} + +DT_TR_RAB = {"sinan": ""} + +DT_ULT_DOS = {"sinan": ""} + +DT_URO = {"sinan": ""} + +DT_URO2 = {"sinan": ""} + +DT_URO3 = {"sinan": ""} + +DT_VAC1 = {"sinan": ""} + +DT_VACINA = {"sinan": ""} + +DT_VAC_1 = {"sinan": ""} + +DT_VAC_2 = {"sinan": ""} + +DT_VAC_3 = {"sinan": ""} + +DT_VAC_4 = {"sinan": ""} + +DT_VAC_5 = {"sinan": ""} + +DT_VAC_ULT = {"sinan": ""} + +DT_VENCIM = {"sinan": ""} + +DT_VIRAL = {"sinan": ""} + +DT_VOP = {"sinan": ""} + +DURACAO = {"sinan": ""} + +D_DIAR = {"sinan": ""} + +D_VOMITO = {"sinan": ""} + +ECG = {"sinan": ""} + +ECG_RESULT = {"sinan": ""} + +EDEMA = {"sinan": ""} + +ELISA = {"sinan": ""} + +ELISA1 = {"sinan": ""} + +ELISA2 = {"sinan": ""} + +ELI_IGG_S1 = {"sinan": ""} + +ELI_IGG_S2 = {"sinan": ""} + +ELI_IGM_S1 = {"sinan": ""} + +ELI_IGM_S2 = {"sinan": ""} + +EMAGRA = {"sinan": ""} + +ENCAMINHA = {"sinan": ""} + +ENC_ABRIGO = {"sinan": ""} + +ENC_CREAS = {"sinan": ""} + +ENC_DEAM = {"sinan": ""} + +ENC_DELEG = {"sinan": ""} + +ENC_DPCA = {"sinan": ""} + +ENC_ESPEC = {"sinan": ""} + +ENC_IML = {"sinan": ""} + +ENC_MPU = {"sinan": ""} + +ENC_MULHER = {"sinan": ""} + +ENC_OUTR = {"sinan": ""} + +ENC_SAUDE = {"sinan": ""} + +ENC_SENTIN = {"sinan": ""} + +ENC_TUTELA = {"sinan": ""} + +ENC_VARA = {"sinan": ""} + +ENDEMICO = {"sinan": ""} + +ENDRES = {"sinasc": ""} + +ENTERO = {"sinan": ""} + +ENTO_ANIMA = {"sinan": ""} + +ENTO_CAO = {"sinan": ""} + +ENTO_CAPTU = {"sinan": ""} + +ENTO_EQUIN = {"sinan": ""} + +ENTO_EXIST = {"sinan": ""} + +ENTO_EXI_1 = {"sinan": ""} + +ENTO_EXI_2 = {"sinan": ""} + +ENTO_EXI_3 = {"sinan": ""} + +ENTO_EXI_4 = {"sinan": ""} + +ENTO_EXTRA = {"sinan": ""} + +ENTO_FLEBO = {"sinan": ""} + +ENTO_INSET = {"sinan": ""} + +ENTO_INTRA = {"sinan": ""} + +ENTO_LOCAL = {"sinan": ""} + +ENTO_OUTRO = {"sinan": ""} + +ENTO_PERID = {"sinan": ""} + +ENTO_PROXI = {"sinan": ""} + +ENTO_TRANS = {"sinan": ""} + +ENTRADA = {"sinan": ""} + +EPICUTA = {"sinan": ""} + +EPISTAXE = {"sinan": ""} + +EPIS_RACIO = {"sinan": ""} + +EPI_PESTE = {"sinan": ""} + +EQBRALTA = {"cnes": ""} + +EQBRBAIX = {"cnes": ""} + +EQBRMEDI = {"cnes": ""} + +EQDOSCLI = {"cnes": ""} + +EQFONSEL = {"cnes": ""} + +EQSISPLN = {"cnes": ""} + +EQUINOS = {"sinan": ""} + +EQ_MAREA = {"cnes": ""} + +EQ_MINDI = {"cnes": ""} + +ESC = {"sim": ""} + +ESC2010 = {"sim": ""} + +ESCFALAGR1 = {"sim": ""} + +ESCMAE = { + "sim": "", + "sinasc": "", +} + +ESCMAE2010 = { + "sim": "", + "sinasc": "", +} + +ESCMAEAGR1 = { + "sim": "", + "sinasc": "", +} + +ESCOLA = {"cnes": ""} + +ESCOLARID = {"ibge": ""} + +ESCOLMAE = {"sinan": ""} + +ESCOLMAE_N = {"sinan": ""} + +ESC_MAE_N = {"sinan": ""} + +ESFERA_A = {"cnes": ""} + +ESPEC = { + "ciha": "", + "sih": "", +} + +ESPECIE = {"sinan": ""} + +ESPECIE_N = {"sinan": ""} + +ESPECIFICO = {"sinan": ""} + +ESPLENO = {"sinan": ""} + +ESPLENOM = {"sinan": ""} + +ESP_OUT = {"sinan": ""} + +ESQ_ATU_N = {"sinan": ""} + +ESQ_INI_N = {"sinan": ""} + +ESTABDESCR = {"sim": ""} + +ESTAB_OCOR = {"sinasc": ""} + +ESTCIV = {"sim": ""} + +ESTCIVIL = {"sim": ""} + +ESTCIVMAE = {"sinasc": ""} + +ESTREPTOMI = {"sinan": ""} + +ETAMBUTOL = {"sinan": ""} + +ETIOL_OUTR = {"sinan": ""} + +ETIONAMIDA = {"sinan": ""} + +ETNIA = { + "sia": "", + "sih": "", + "sim": "", + "sinasc": "", +} + +EVIDENCIA = {"sinan": ""} + +EVOLUCAO = {"sinan": ""} + +EVOL_AFAST = {"sinan": ""} + +EVOR1_DT_R = {"sinan": ""} + +EVOR_A_MID = {"sinan": ""} + +EVOR_A_MIE = {"sinan": ""} + +EVOR_A_MSD = {"sinan": ""} + +EVOR_A_MSE = {"sinan": ""} + +EVOR_DT_RE = {"sinan": ""} + +EVOR_F_MID = {"sinan": ""} + +EVOR_F_MIE = {"sinan": ""} + +EVOR_F_MSD = {"sinan": ""} + +EVOR_F_MSE = {"sinan": ""} + +EVOR_RC_ED = {"sinan": ""} + +EVOR_RC_EE = {"sinan": ""} + +EVOR_RC_FD = {"sinan": ""} + +EVOR_RC_FE = {"sinan": ""} + +EVOR_S_FAC = {"sinan": ""} + +EVOR_S_MID = {"sinan": ""} + +EVOR_S_MIE = {"sinan": ""} + +EVOR_S_MSD = {"sinan": ""} + +EVOR_S_MSE = {"sinan": ""} + +EVO_DIAG = {"sinan": ""} + +EVO_DIAG_N = {"sinan": ""} + +EVO_DT_OBI = {"sinan": ""} + +EVO_OUTR = {"sinan": ""} + +EXAME = { + "sim": "", + "sinan": "", +} + +EXANTEMA = {"sinan": ""} + +EXPDIFDATA = {"sim": ""} + +EXPO_N = {"sinan": ""} + +EXTRAPU1_N = {"sinan": ""} + +EXTRAPU2_N = {"sinan": ""} + +EXTRAPUL_O = {"sinan": ""} + +FACIAL = {"sinan": ""} + +FAEC_TP = {"sih": ""} + +FALA = {"sinan": ""} + +FALENCIA = {"sinan": ""} + +FC_CONTATO = {"sinan": ""} + +FC_CONT_DE = {"sinan": ""} + +FEBRE = {"sinan": ""} + +FEN_HEMORR = {"sinan": ""} + +FERIMENTO = {"sinan": ""} + +FERIMENT_N = {"sinan": ""} + +FEZES = {"sinan": ""} + +FIGADO = {"sinan": ""} + +FILHMORT = {"sim": ""} + +FILHVIVOS = {"sim": ""} + +FIL_ABORT = {"sinasc": ""} + +FIL_MORTOS = {"sinasc": ""} + +FIL_VIVOS = {"sinasc": ""} + +FIM = {"sia": ""} + +FIM_ANIMAL = {"sinan": ""} + +FINANC = {"sih": ""} + +FISCALIZA = {"sinan": ""} + +FLOGISTICO = {"sinan": ""} + +FLUXO_AERE = {"sinan": ""} + +FLXRECEBI = {"sinan": ""} + +FOI_MATA = {"sinan": ""} + +FONTE = { + "ciha": "", + "sim": "", + "sinan": "", +} + +FONTEINV = {"sim": ""} + +FONTES = {"sim": ""} + +FONTESINF = {"sim": ""} + +FONTE_ORC = {"sih": ""} + +FONTINFO = {"sim": ""} + +FORMA = {"sinan": ""} + +FORMACLINI = {"sinan": ""} + +FORMA_CO = {"sinan": ""} + +FORMA_TF = {"sinan": ""} + +FORMA_TI = {"sinan": ""} + +FORMA_TS = {"sinan": ""} + +FORMA_TT = {"sinan": ""} + +FO_ANT_HBC = {"sinan": ""} + +FO_ANT_HCV = {"sinan": ""} + +FO_ANT_HIV = {"sinan": ""} + +FO_HBSAG = {"sinan": ""} + +FRAQUEZA = {"sinan": ""} + +FUMA = {"sinan": ""} + +FXETARIA = {"ibge": ""} + +FX_ETARIA = {"pni": ""} + +F_AREIA = {"cnes": ""} + +F_CARVAO = {"cnes": ""} + +GANGLIOS = {"sinan": ""} + +GASES = {"sinan": ""} + +GENGIVO = {"sinan": ""} + +GENOT_G = {"sinan": ""} + +GENOT_P = {"sinan": ""} + +GEN_VHC = {"sinan": ""} + +GESPRG1E = {"cnes": ""} + +GESPRG1M = {"cnes": ""} + +GESPRG2E = {"cnes": ""} + +GESPRG2M = {"cnes": ""} + +GESPRG3E = {"cnes": ""} + +GESPRG3M = {"cnes": ""} + +GESPRG4E = {"cnes": ""} + +GESPRG4M = {"cnes": ""} + +GESPRG5E = {"cnes": ""} + +GESPRG5M = {"cnes": ""} + +GESPRG6E = {"cnes": ""} + +GESPRG6M = {"cnes": ""} + +GESTACAO = { + "sim": "", + "sinasc": "", +} + +GESTANTE = {"sinan": ""} + +GESTAO = { + "ciha": "", + "sia": "", + "sih": "", +} + +GESTOR_COD = {"sih": ""} + +GESTOR_CPF = {"sih": ""} + +GESTOR_DT = {"sih": ""} + +GESTOR_TP = {"sih": ""} + +GESTRISCO = {"sih": ""} + +GLAUCOMA = {"sinan": ""} + +GRAVIDEZ = { + "sim": "", + "sinasc": "", +} + +GRAV_AST = {"sinan": ""} + +GRAV_CONSC = {"sinan": ""} + +GRAV_CONV = {"sinan": ""} + +GRAV_ENCH = {"sinan": ""} + +GRAV_EXTRE = {"sinan": ""} + +GRAV_HEMAT = {"sinan": ""} + +GRAV_HIPOT = {"sinan": ""} + +GRAV_INSUF = {"sinan": ""} + +GRAV_MELEN = {"sinan": ""} + +GRAV_METRO = {"sinan": ""} + +GRAV_MIOC = {"sinan": ""} + +GRAV_ORGAO = {"sinan": ""} + +GRAV_PULSO = {"sinan": ""} + +GRAV_SANG = {"sinan": ""} + +GRAV_TAQUI = {"sinan": ""} + +G_D = {"sinan": ""} + +G_M = {"sinan": ""} + +HANSENIASE = {"sinan": ""} + +HAV = {"sinan": ""} + +HA_PAUSA = {"sinan": ""} + +HBC_TOTAL = {"sinan": ""} + +HBSAG = {"sinan": ""} + +HBV = {"sinan": ""} + +HCV = {"sinan": ""} + +HDV = {"sinan": ""} + +HEMATOLOG = {"sinan": ""} + +HEMATURA = {"sinan": ""} + +HEMA_MAIOR = {"sinan": ""} + +HEMA_MENOR = {"sinan": ""} + +HEMO = {"sinan": ""} + +HEMOCULT = {"sinan": ""} + +HEMODIALIS = {"sinan": ""} + +HEMORRAG = {"sinan": ""} + +HEMORRAGI = {"sinan": ""} + +HEMOTERA = {"cnes": ""} + +HEMO_D_1 = {"sinan": ""} + +HEMO_D_2 = {"sinan": ""} + +HEMO_D_3 = {"sinan": ""} + +HEMO_IGG = {"sinan": ""} + +HEMO_IGM = {"sinan": ""} + +HEMO_R1 = {"sinan": ""} + +HEMO_R2 = {"sinan": ""} + +HEMO_R3 = {"sinan": ""} + +HEM_IGG_S1 = {"sinan": ""} + +HEM_IGG_S2 = {"sinan": ""} + +HEM_IGM_S1 = {"sinan": ""} + +HEM_IGM_S2 = {"sinan": ""} + +HEPAESPLE = {"sinan": ""} + +HEPATITA = {"sinan": ""} + +HEPATITB = {"sinan": ""} + +HEPATITE_N = {"sinan": ""} + +HEPATO = {"sinan": ""} + +HEPATOME = {"sinan": ""} + +HEPATOPAT = {"sinan": ""} + +HEPA_ESP = {"sinan": ""} + +HERBIV_DES = {"sinan": ""} + +HEV = {"sinan": ""} + +HIDROCARBO = {"sinan": ""} + +HIDROFOBI = {"sinan": ""} + +HIPEREMIA = {"sinan": ""} + +HIPERTEN = {"sinan": ""} + +HIPERTENSA = {"sinan": ""} + +HIPOREXIA = {"sinan": ""} + +HIPOTENSAO = {"sinan": ""} + +HISTOLOG_N = {"sinan": ""} + +HISTOPA = {"sinan": ""} + +HISTOPATO = {"sinan": ""} + +HISTOPATOL = {"sinan": ""} + +HISTOPA_N = {"sinan": ""} + +HISTORIA = {"sinan": ""} + +HIV = {"sinan": ""} + +HOMONIMO = { + "ciha": "", + "sih": "", +} + +HORAHOSP = {"cnes": ""} + +HORANASC = {"sinasc": ""} + +HORAOBITO = {"sim": ""} + +HORAOUTR = {"cnes": ""} + +HORA_ACID = {"sinan": ""} + +HORA_AMB = {"cnes": ""} + +HORA_JOR = {"sinan": ""} + +HORA_OCOR = {"sinan": ""} + +HORMONIO = {"sinan": ""} + +HOSPITAL = {"sinan": ""} + +HOSPITALIZ = {"sinan": ""} + +HOSP_NSUS = {"cnes": ""} + +HOSP_SUS = {"cnes": ""} + +ICTERICIA = {"sinan": ""} + +IDADE = { + "ciha": "", + "ibge": "", + "sih": "", + "sim": "", +} + +IDADEMAE = { + "sim": "", + "sinan": "", + "sinasc": "", +} + +IDADEMAX = {"sia": ""} + +IDADEMIN = {"sia": ""} + +IDADEPAC = {"sia": ""} + +IDADEPAI = {"sinasc": ""} + +IDADE_MAE = { + "sinan": "", + "sinasc": "", +} + +IDANOMAL = {"sinasc": ""} + +IDENT = {"sih": ""} + +IDENT_GEN = {"sinan": ""} + +IDENT_MICR = {"sinan": ""} + +IDEQUIPE = {"cnes": ""} + +ID_AGRAVO = {"sinan": ""} + +ID_AREA = {"cnes": ""} + +ID_ARTRALG = {"sinan": ""} + +ID_CNS_SUS = {"sinan": ""} + +ID_CONJUNT = {"sinan": ""} + +ID_CORIZA = {"sinan": ""} + +ID_DG_DES = {"sinan": ""} + +ID_DG_NOT = {"sinan": ""} + +ID_DT_RESI = {"sinan": ""} + +ID_ETIOLOG = {"sinan": ""} + +ID_EV_NOT = {"sinan": ""} + +ID_GANGLIO = {"sinan": ""} + +ID_HOSPIT = {"sinan": ""} + +ID_LIQUOR = {"sinan": ""} + +ID_MN_OCOR = {"sinan": ""} + +ID_MN_RESI = {"sinan": ""} + +ID_MUNICIP = {"sinan": ""} + +ID_MUNIC_2 = {"sinan": ""} + +ID_MUNIC_A = {"sinan": ""} + +ID_MUNI_AT = {"sinan": ""} + +ID_MUNI_RE = {"sinan": ""} + +ID_NOTIFIC = {"sinan": ""} + +ID_OCUPACA = {"sinan": ""} + +ID_OCUPA_N = {"sinan": ""} + +ID_OCUP_MA = {"sinan": ""} + +ID_PAIS = {"sinan": ""} + +ID_REGIONA = {"sinan": ""} + +ID_RETRO = {"sinan": ""} + +ID_RE_IGG = {"sinan": ""} + +ID_RE_IGG_ = {"sinan": ""} + +ID_RE_IGM = {"sinan": ""} + +ID_RE_IGM_ = {"sinan": ""} + +ID_RE_IG_1 = {"sinan": ""} + +ID_RE_IG_2 = {"sinan": ""} + +ID_RG_RESI = {"sinan": ""} + +ID_S1_IGG = {"sinan": ""} + +ID_S1_IGG_ = {"sinan": ""} + +ID_S1_IGM = {"sinan": ""} + +ID_S1_IGM_ = {"sinan": ""} + +ID_S1_IG_1 = {"sinan": ""} + +ID_S1_IG_2 = {"sinan": ""} + +ID_S2_IGG = {"sinan": ""} + +ID_S2_IGG_ = {"sinan": ""} + +ID_S2_IGM = {"sinan": ""} + +ID_S2_IGM_ = {"sinan": ""} + +ID_S2_IG_1 = {"sinan": ""} + +ID_S2_IG_2 = {"sinan": ""} + +ID_SANGUE = {"sinan": ""} + +ID_SECRECA = {"sinan": ""} + +ID_SEGM = {"cnes": ""} + +ID_TOSSE = {"sinan": ""} + +ID_UNIDADE = {"sinan": ""} + +ID_UNID_AT = {"sinan": ""} + +ID_URINA = {"sinan": ""} + +IFI = {"sinan": ""} + +IGG_S1 = {"sinan": ""} + +IGG_S2 = {"sinan": ""} + +IGG_T2 = {"sinan": ""} + +IGM_S1 = {"sinan": ""} + +IGM_S2 = {"sinan": ""} + +IGM_T1 = {"sinan": ""} + +IMPLANTA = {"sinan": ""} + +IMUNO = { + "pni": "", + "sinan": "", +} + +IMUNOH = {"sinan": ""} + +IMUNOHIST = {"sinan": ""} + +IMUNOH_N = {"sinan": ""} + +IMUNO_DIRE = {"sinan": ""} + +IMUNO_INDI = {"sinan": ""} + +IMU_HEP_B = {"sinan": ""} + +IMU_IGG_S1 = {"sinan": ""} + +IMU_IGG_S2 = {"sinan": ""} + +IMU_IGM_S1 = {"sinan": ""} + +IMU_IGM_S2 = {"sinan": ""} + +INAL_CRACK = {"sinan": ""} + +INDIGENA = {"cnes": ""} + +INDIVIDUAL = {"sinan": ""} + +IND_NSUS = {"cnes": ""} + +IND_SUS = {"cnes": ""} + +IND_VDRL = {"sih": ""} + +INESPECIF = {"sinan": ""} + +INFAN_JUV = {"sinan": ""} + +INFECCIOSO = {"sinan": ""} + +INFEHOSP = {"sih": ""} + +INFERIORES = {"sinan": ""} + +INFILTRA = {"sinan": ""} + +INICIO = {"sia": ""} + +INJETAVEIS = {"sinan": ""} + +INSC_PN = {"sih": ""} + +INSTITUCIO = {"sinan": ""} + +INSTRMAE = {"sim": ""} + +INSTRPAI = {"sim": ""} + +INSTRU = {"sih": ""} + +INSTRUCAO = {"sim": ""} + +INSTR_MAE = {"sinasc": ""} + +INSUFICIEN = {"sinan": ""} + +INTOX_CHUM = {"sinan": ""} + +INTOX_MERC = {"sinan": ""} + +INTOX_META = {"sinan": ""} + +INT_TEMPO = {"sinan": ""} + +IN_AIDS = {"sinan": ""} + +IN_TP_VAL = {"sih": ""} + +IN_VINCULA = {"sinan": ""} + +IONIZANTES = {"sinan": ""} + +ISOLAMENTO = {"sinan": ""} + +ISONIAZIDA = {"sinan": ""} + +KOTELCHUCK = {"sinasc": ""} + +LABC_DT = {"sinan": ""} + +LABC_DT_1 = {"sinan": ""} + +LABC_DT_2 = {"sinan": ""} + +LABC_EVIDE = {"sinan": ""} + +LABC_IGG = {"sinan": ""} + +LABC_LIQUO = {"sinan": ""} + +LABC_LIQ_1 = {"sinan": ""} + +LABC_SANGU = {"sinan": ""} + +LABC_TITUL = {"sinan": ""} + +LABC_TIT_1 = {"sinan": ""} + +LABC_TIT_2 = {"sinan": ""} + +LAB_AGLIQU = {"sinan": ""} + +LAB_AGSANG = {"sinan": ""} + +LAB_ASPECT = {"sinan": ""} + +LAB_ATIPIC = {"sinan": ""} + +LAB_BCESCA = {"sinan": ""} + +LAB_BCLESA = {"sinan": ""} + +LAB_BCLIQU = {"sinan": ""} + +LAB_BCSANG = {"sinan": ""} + +LAB_BD = {"sinan": ""} + +LAB_BI = {"sinan": ""} + +LAB_BILATE = {"sinan": ""} + +LAB_BT = {"sinan": ""} + +LAB_CELEBR = {"sinan": ""} + +LAB_CILIQU = {"sinan": ""} + +LAB_CISANG = {"sinan": ""} + +LAB_CLOR = {"sinan": ""} + +LAB_COLHEU = {"sinan": ""} + +LAB_CONF = {"sinan": ""} + +LAB_CONFIR = {"sinan": ""} + +LAB_CON_F = {"sinan": ""} + +LAB_CREATI = {"sinan": ""} + +LAB_CTESCA = {"sinan": ""} + +LAB_CTLESA = {"sinan": ""} + +LAB_CTLIQU = {"sinan": ""} + +LAB_CTSANG = {"sinan": ""} + +LAB_CULTUR = {"sinan": ""} + +LAB_DATA_C = {"sinan": ""} + +LAB_DERRAM = {"sinan": ""} + +LAB_DIFUSO = {"sinan": ""} + +LAB_DT3 = {"sinan": ""} + +LAB_DTPUNC = {"sinan": ""} + +LAB_DT_1 = {"sinan": ""} + +LAB_DT_2 = {"sinan": ""} + +LAB_DT_3 = {"sinan": ""} + +LAB_DT_C1 = {"sinan": ""} + +LAB_DT_CEN = {"sinan": ""} + +LAB_DT_E_1 = {"sinan": ""} + +LAB_DT_F1 = {"sinan": ""} + +LAB_DT_L_1 = {"sinan": ""} + +LAB_DT_L_2 = {"sinan": ""} + +LAB_DT_NLE = {"sinan": ""} + +LAB_DT_R1 = {"sinan": ""} + +LAB_DT_RE1 = {"sinan": ""} + +LAB_ELIS_1 = {"sinan": ""} + +LAB_ELIS_2 = {"sinan": ""} + +LAB_EOSI = {"sinan": ""} + +LAB_ESFR = {"sinan": ""} + +LAB_E_D_1 = {"sinan": ""} + +LAB_GLICO = {"sinan": ""} + +LAB_HEMA = {"sinan": ""} + +LAB_HEMATO = {"sinan": ""} + +LAB_HEMA_N = {"sinan": ""} + +LAB_HEMO = {"sinan": ""} + +LAB_HISTOP = {"sinan": ""} + +LAB_IGG = {"sinan": ""} + +LAB_IGG_R = {"sinan": ""} + +LAB_IGM = {"sinan": ""} + +LAB_IGM_R = {"sinan": ""} + +LAB_IMUNO = {"sinan": ""} + +LAB_INTEST = {"sinan": ""} + +LAB_IRM = {"sinan": ""} + +LAB_ISFEZE = {"sinan": ""} + +LAB_ISLIQU = {"sinan": ""} + +LAB_LEUCO = {"sinan": ""} + +LAB_LEUC_N = {"sinan": ""} + +LAB_LINFO = {"sinan": ""} + +LAB_LOCAL = {"sinan": ""} + +LAB_L_CEL1 = {"sinan": ""} + +LAB_L_CEL2 = {"sinan": ""} + +LAB_L_CL1 = {"sinan": ""} + +LAB_L_CL2 = {"sinan": ""} + +LAB_L_C_DE = {"sinan": ""} + +LAB_L_GLI1 = {"sinan": ""} + +LAB_L_GLI2 = {"sinan": ""} + +LAB_L_LIN1 = {"sinan": ""} + +LAB_L_LIN2 = {"sinan": ""} + +LAB_L_OUT = {"sinan": ""} + +LAB_L_PRO1 = {"sinan": ""} + +LAB_L_PRO2 = {"sinan": ""} + +LAB_L_S_DE = {"sinan": ""} + +LAB_MACRO = {"sinan": ""} + +LAB_MATE_N = {"sinan": ""} + +LAB_MEDULA = {"sinan": ""} + +LAB_METODO = {"sinan": ""} + +LAB_MET_D = {"sinan": ""} + +LAB_MICRO = {"sinan": ""} + +LAB_MICRON = {"sinan": ""} + +LAB_MICR_1 = {"sinan": ""} + +LAB_MICR_2 = {"sinan": ""} + +LAB_MONO = {"sinan": ""} + +LAB_NEUTRO = {"sinan": ""} + +LAB_OUTRO = {"sinan": ""} + +LAB_OUT_D = {"sinan": ""} + +LAB_OUT_E = {"sinan": ""} + +LAB_PARASI = {"sinan": ""} + +LAB_PARTO = {"sinan": ""} + +LAB_PCESCA = {"sinan": ""} + +LAB_PCLESA = {"sinan": ""} + +LAB_PCLIQU = {"sinan": ""} + +LAB_PCR_1 = {"sinan": ""} + +LAB_PCR_2 = {"sinan": ""} + +LAB_PCR_3 = {"sinan": ""} + +LAB_PCSANG = {"sinan": ""} + +LAB_PLAQUE = {"sinan": ""} + +LAB_POTASS = {"sinan": ""} + +LAB_PROD1 = {"sinan": ""} + +LAB_PROD2 = {"sinan": ""} + +LAB_PROT = {"sinan": ""} + +LAB_PROVAS = {"sinan": ""} + +LAB_PUNCAO = {"sinan": ""} + +LAB_Q_F = {"sinan": ""} + +LAB_RADIOL = {"sinan": ""} + +LAB_REALIZ = {"sinan": ""} + +LAB_RESULT = {"sinan": ""} + +LAB_RES_B = {"sinan": ""} + +LAB_RES_F1 = {"sinan": ""} + +LAB_RES_F2 = {"sinan": ""} + +LAB_RES_F3 = {"sinan": ""} + +LAB_RTPCR = {"sinan": ""} + +LAB_R_1 = {"sinan": ""} + +LAB_R_2 = {"sinan": ""} + +LAB_SORO = {"sinan": ""} + +LAB_SOROAG = {"sinan": ""} + +LAB_SOR_DE = {"sinan": ""} + +LAB_S_1 = {"sinan": ""} + +LAB_S_2 = {"sinan": ""} + +LAB_S_3 = {"sinan": ""} + +LAB_S_4 = {"sinan": ""} + +LAB_S_5 = {"sinan": ""} + +LAB_TGO = {"sinan": ""} + +LAB_TGO_D = {"sinan": ""} + +LAB_TGP = {"sinan": ""} + +LAB_TGP_D = {"sinan": ""} + +LAB_TITU_2 = {"sinan": ""} + +LAB_TRIAGE = {"sinan": ""} + +LAB_TROMBO = {"sinan": ""} + +LAB_UF = {"sinan": ""} + +LAB_UREIA = {"sinan": ""} + +LAB_VACINA = {"sinan": ""} + +LAB_VAC_DE = {"sinan": ""} + +LACO = {"sinan": ""} + +LACO_N = {"sinan": ""} + +LAMBEDURA = {"sinan": ""} + +LAVOURA = {"sinan": ""} + +LEITE = {"sinan": ""} + +LEITHOSP = {"cnes": ""} + +LESAO = {"sinan": ""} + +LESAO_CORP = {"sinan": ""} + +LESAO_DES = {"sinan": ""} + +LESAO_ESPE = {"sinan": ""} + +LESAO_NAT = {"sinan": ""} + +LESOES = {"sinan": ""} + +LES_AUTOP = {"sinan": ""} + +LEUCOPENIA = {"sinan": ""} + +LIMITA_MOV = {"sinan": ""} + +LINFADENO = {"sinan": ""} + +LINHAA = {"sim": ""} + +LINHAB = {"sim": ""} + +LINHAC = {"sim": ""} + +LINHAD = {"sim": ""} + +LINHAII = {"sim": ""} + +LOCACID = {"sim": ""} + +LOCAL_ACID = {"sinan": ""} + +LOCAL_ESPE = {"sinan": ""} + +LOCAL_OCOR = { + "sinan": "", + "sinasc": "", +} + +LOCA_MID_N = {"sinan": ""} + +LOCA_MIE_N = {"sinan": ""} + +LOCA_MSD_N = {"sinan": ""} + +LOCA_MSE_N = {"sinan": ""} + +LOCNASC = {"sinasc": ""} + +LOCOCOR = {"sim": ""} + +LOC_EXPO = {"sinan": ""} + +LOC_EXP_DE = {"sinan": ""} + +LOC_INF = {"sinan": ""} + +LOC_REALIZ = {"sia": ""} + +LOTE1 = {"sinan": ""} + +LOTE2 = {"sinan": ""} + +LOTE_VAC = {"sinan": ""} + +LUVA = {"sinan": ""} + +MAECHAGA = {"sinan": ""} + +MAIS_6HS = {"sinan": ""} + +MAIS_TRAB = {"sinan": ""} + +MANIFESTA = {"sinan": ""} + +MANIPULA = {"sinan": ""} + +MANI_HEMOR = {"sinan": ""} + +MAOS_N = {"sinan": ""} + +MAPORTAR = {"cnes": ""} + +MAQ_OUTR = {"cnes": ""} + +MAQ_PROP = {"cnes": ""} + +MARCA_UCI = {"sih": ""} + +MARCA_UTI = {"sih": ""} + +MASCARA = {"sinan": ""} + +MATBIOLOGI = {"sinan": ""} + +MATERIAL = {"sinan": ""} + +MAT_ORG = {"sinan": ""} + +MAT_ORG_DE = {"sinan": ""} + +MAX_INC = {"sinan": ""} + +MAX_ST_INC = {"sinan": ""} + +MCLI_LOCAL = {"sinan": ""} + +MCLI_SIST = {"sinan": ""} + +MEDICA = {"sinan": ""} + +MEDICAMENT = {"sinan": ""} + +MED_BLOQUE = {"sinan": ""} + +MED_CASO_S = {"sinan": ""} + +MED_CONTR = {"sinan": ""} + +MED_DT_EVO = {"sinan": ""} + +MED_DT_QUI = {"sinan": ""} + +MED_IDEN_C = {"sinan": ""} + +MED_MATERI = {"sinan": ""} + +MED_NUCOMU = {"sinan": ""} + +MED_OUTRO = {"sinan": ""} + +MED_PREVEN = {"sinan": ""} + +MED_QUAN_C = {"sinan": ""} + +MED_QUAN_M = {"sinan": ""} + +MED_QUAN_P = {"sinan": ""} + +MED_QUIMIO = {"sinan": ""} + +MEFLOQ = {"sinan": ""} + +MENINGO = {"sinan": ""} + +MENINGOE = {"sinan": ""} + +MENOR_5ANO = {"sinan": ""} + +MENOS_MOV = {"sinan": ""} + +MENTAL = {"sinan": ""} + +MES = { + "pni": "", + "sih": "", +} + +MESPRENAT = {"sinasc": ""} + +MES_CMPT = { + "ciha": "", + "sih": "", +} + +METAL = {"sinan": ""} + +METRO = {"sinan": ""} + +MIALGIA = {"sinan": ""} + +MICRO1_S1 = {"sinan": ""} + +MICRO1_S_2 = {"sinan": ""} + +MICRO1_T_1 = {"sinan": ""} + +MICRO1_T_2 = {"sinan": ""} + +MICRO2_S1 = {"sinan": ""} + +MICRO2_S_2 = {"sinan": ""} + +MICRO2_T_1 = {"sinan": ""} + +MICRO2_T_2 = {"sinan": ""} + +MICROCEFA = {"sinan": ""} + +MICRO_HEMA = {"sinan": ""} + +MICR_REG = {"cnes": ""} + +MIGRADO_W = {"sinan": ""} + +MINTERNA = {"sinan": ""} + +MIN_ACID = {"sinan": ""} + +MIN_JOR = {"sinan": ""} + +MIOCARDI = {"sinan": ""} + +MNDIF = {"sia": ""} + +MN_IND = {"sia": ""} + +MOAGEM_N = {"sinan": ""} + +MODALIDADE = {"ciha": ""} + +MODODETECT = {"sinan": ""} + +MODOENTR = {"sinan": ""} + +MORDEDURA = {"sinan": ""} + +MORTE = { + "ciha": "", + "sih": "", +} + +MORTEPARTO = {"sim": ""} + +MOTDESAT = {"cnes": ""} + +MOT_COB = {"sia": ""} + +MPU = {"sinan": ""} + +MTRANSFU = {"sinan": ""} + +MUCOSA = {"sinan": ""} + +MUDA_TRAB = {"sinan": ""} + +MUNCOD = {"ibge": ""} + +MUNIC = {"pni": ""} + +MUNICIPIO = {"sinan": ""} + +MUNIC_LOC = {"sih": ""} + +MUNIC_MOV = { + "ciha": "", + "sih": "", +} + +MUNIC_RES = { + "ciha": "", + "ibge": "", + "sih": "", +} + +MUNIOCOR = {"sim": ""} + +MUNIRES = {"sim": ""} + +MUNIRESAT = {"sinan": ""} + +MUNI_MAE = {"sinasc": ""} + +MUNI_OCOR = {"sinasc": ""} + +MUNPAC = {"sia": ""} + +MUN_1 = {"sinan": ""} + +MUN_2 = {"sinan": ""} + +MUN_3 = {"sinan": ""} + +MUN_ACID = {"sinan": ""} + +MUN_ATENDE = {"sinan": ""} + +MUN_DES1 = {"sinan": ""} + +MUN_DES2 = {"sinan": ""} + +MUN_DES3 = {"sinan": ""} + +MUN_EMP = {"sinan": ""} + +MUN_HOSP = {"sinan": ""} + +MUN_ING = {"sinan": ""} + +MUN_MOV = {"sih": ""} + +MUN_PRE_NA = {"sinan": ""} + +MUN_RES = {"sih": ""} + +MUN_TRANSF = {"sinan": ""} + +MUSCULAR = {"sinan": ""} + +NACIONAL = { + "ciha": "", + "sih": "", +} + +NACION_PAC = {"sia": ""} + +NAO_IONIZA = {"sinan": ""} + +NASC = { + "ciha": "", + "sih": "", +} + +NATURAL = {"sim": ""} + +NATURALMAE = {"sinasc": ""} + +NATUREZA = { + "ciha": "", + "cnes": "", + "sih": "", +} + +NAT_JUR = { + "cnes": "", + "sia": "", + "sih": "", +} + +NAUSEA = {"sinan": ""} + +NAUSEAS = {"sinan": ""} + +NDUPLIC = {"sinan": ""} + +NDUPLIC_N = {"sinan": ""} + +NECROPSIA = {"sim": ""} + +NECROSE = {"sinan": ""} + +NENHUM = {"sinan": ""} + +NEOPLASICO = {"sinan": ""} + +NERVOSAFET = {"sinan": ""} + +NIQUEL = {"sinan": ""} + +NIVATE_A = {"cnes": ""} + +NIVATE_H = {"cnes": ""} + +NIV_DEP = {"cnes": ""} + +NIV_HIER = {"cnes": ""} + +NM_ANTIBIO = {"sinan": ""} + +NM_MUNIC_H = {"sinan": ""} + +NM_MUN_HOS = {"sinan": ""} + +NM_OUT_COM = {"sinan": ""} + +NM_OUT_SIN = {"sinan": ""} + +NM_SIN_OUT = {"sinan": ""} + +NOCOLINF = {"sinan": ""} + +NOMEAREA = {"cnes": ""} + +NOMEFANT = {"sih": ""} + +NOMEPROF = {"cnes": ""} + +NOME_BACT = {"sinan": ""} + +NOME_EQP = {"cnes": ""} + +NOME_PARAS = {"sinan": ""} + +NOME_VIRUS = {"sinan": ""} + +NOPROPIN = {"sinan": ""} + +NOVO = {"sinasc": ""} + +NO_ATENOUT = {"sinan": ""} + +NO_COBOUTR = {"sinan": ""} + +NO_OUPARTO = {"sinan": ""} + +NO_OUTRAS = {"sinan": ""} + +NU10_19_N = {"sinan": ""} + +NU1_4_F_NU = {"sinan": ""} + +NU5_9_F_NU = {"sinan": ""} + +NUATEND = {"sinan": ""} + +NUCONSOME = {"sinan": ""} + +NUDIASINF = {"sim": ""} + +NUDIASOBCO = {"sim": ""} + +NUDIASOBIN = {"sim": ""} + +NULEITOS = {"cnes": ""} + +NUMCRIPOB = {"ibge": ""} + +NUMCRIPOBX = {"ibge": ""} + +NUMDESOCUP = {"ibge": ""} + +NUMERODN = { + "sim": "", + "sinasc": "", +} + +NUMERODV = {"sinasc": ""} + +NUMEROLOTE = { + "sim": "", + "sinasc": "", +} + +NUMEXPORT = {"sim": ""} + +NUMPOBRES = {"ibge": ""} + +NUMPOBRESX = {"ibge": ""} + +NUMREGCART = { + "sim": "", + "sinasc": "", +} + +NUMRENDA = {"ibge": ""} + +NUMTRABINF = {"ibge": ""} + +NUM_CON_N = {"sinan": ""} + +NUM_DOSES = {"sinan": ""} + +NUM_ENVOLV = {"sinan": ""} + +NUM_EXPORT = {"sinasc": ""} + +NUM_FILHOS = {"sih": ""} + +NUM_PROC = {"sih": ""} + +NUTEMPO = {"sinan": ""} + +NUTEMPORIS = {"sinan": ""} + +NU_10_19 = {"sinan": ""} + +NU_10_19IG = {"sinan": ""} + +NU_10_19_M = {"sinan": ""} + +NU_1_4_IGN = {"sinan": ""} + +NU_1_4_NU = {"sinan": ""} + +NU_1_4_TOT = {"sinan": ""} + +NU_1_F_NU = {"sinan": ""} + +NU_1_IGN = {"sinan": ""} + +NU_1_M_NU = {"sinan": ""} + +NU_1_TOT_N = {"sinan": ""} + +NU_20_49 = {"sinan": ""} + +NU_20_49IG = {"sinan": ""} + +NU_20_49_F = {"sinan": ""} + +NU_20_49_N = {"sinan": ""} + +NU_50_F_NU = {"sinan": ""} + +NU_50_IGN = {"sinan": ""} + +NU_50_M_NU = {"sinan": ""} + +NU_50_TOT = {"sinan": ""} + +NU_5_9_IGN = {"sinan": ""} + +NU_5_9_NU = {"sinan": ""} + +NU_5_9_TOT = {"sinan": ""} + +NU_ABDOM_N = {"sinan": ""} + +NU_AFAST = {"sinan": ""} + +NU_AMPOLAS = {"sinan": ""} + +NU_AMPOL_1 = {"sinan": ""} + +NU_AMPOL_3 = {"sinan": ""} + +NU_AMPOL_4 = {"sinan": ""} + +NU_AMPOL_6 = {"sinan": ""} + +NU_AMPOL_8 = {"sinan": ""} + +NU_AMPOL_9 = {"sinan": ""} + +NU_AMPO_5 = {"sinan": ""} + +NU_AMPO_7 = {"sinan": ""} + +NU_ANO = {"sinan": ""} + +NU_A_ALIM = {"sinan": ""} + +NU_A_CLINI = {"sinan": ""} + +NU_A_NUM_1 = {"sinan": ""} + +NU_A_NUM_2 = {"sinan": ""} + +NU_A_NUM_3 = {"sinan": ""} + +NU_CASO = {"sinan": ""} + +NU_CASOEXA = {"sinan": ""} + +NU_CASOPOS = {"sinan": ""} + +NU_CEFAL_N = {"sinan": ""} + +NU_CELULA = {"sinan": ""} + +NU_CLI_NUM = {"sinan": ""} + +NU_COMU_EX = {"sinan": ""} + +NU_CONTATO = {"sinan": ""} + +NU_DIARR_N = {"sinan": ""} + +NU_DOSE = {"sinan": ""} + +NU_ENTR = {"sinan": ""} + +NU_ENT_DOE = {"sinan": ""} + +NU_FEBRE_N = {"sinan": ""} + +NU_F_TOT = {"sinan": ""} + +NU_F_TOT_N = {"sinan": ""} + +NU_GESTA = {"sinan": ""} + +NU_IDADE = {"sinan": ""} + +NU_IDADE_N = {"sinan": ""} + +NU_IGN_NU = {"sinan": ""} + +NU_IGRA_NU = {"sinan": ""} + +NU_IG_F_NU = {"sinan": ""} + +NU_IG_IGN = {"sinan": ""} + +NU_INCUB_M = {"sinan": ""} + +NU_INC_ME = {"sinan": ""} + +NU_LESOES = {"sinan": ""} + +NU_LOTE = {"sinan": ""} + +NU_LOTE_H = {"sinan": ""} + +NU_LOTE_I = {"sinan": ""} + +NU_LOTE_IA = {"sinan": ""} + +NU_LOTE_V = {"sinan": ""} + +NU_NAUSE_P = {"sinan": ""} + +NU_NEURO_N = {"sinan": ""} + +NU_NOTIFIC = {"sinan": ""} + +NU_NUM_2 = {"sinan": ""} + +NU_NUM_3 = {"sinan": ""} + +NU_OBITO = {"sinan": ""} + +NU_OUTRO_N = {"sinan": ""} + +NU_PA_TOT = {"sia": ""} + +NU_PROTEI = {"sinan": ""} + +NU_RESU_3 = {"sinan": ""} + +NU_SEMA_EP = {"sinan": ""} + +NU_TOT = {"sinan": ""} + +NU_TOT_HOS = {"sinan": ""} + +NU_TOT_IGN = {"sinan": ""} + +NU_TO_F_NU = {"sinan": ""} + +NU_TRAB = {"sinan": ""} + +NU_VOMTO_N = {"sinan": ""} + +NU_VPA_TOT = {"sia": ""} + +N_AIH = {"sih": ""} + +N_DIAR = {"sinan": ""} + +N_VOMITO = {"sinan": ""} + +OBITOFE1 = {"sim": ""} + +OBITOFE2 = {"sim": ""} + +OBITOGRAV = {"sim": ""} + +OBITOPARTO = {"sim": ""} + +OBITOPUERP = {"sim": ""} + +OBSERVACAO = {"sinan": ""} + +OCULOS = {"sinan": ""} + +OCUP = {"sim": ""} + +OCUPACAO = { + "sim": "", + "sinan": "", +} + +OCUPACIO = {"sinan": ""} + +OCUPMAE = {"sim": ""} + +OCUPPAI = {"sim": ""} + +OLEOS = {"sinan": ""} + +OLIGURIA = {"sinan": ""} + +ORAL = {"sinan": ""} + +ORGEXPED = {"cnes": ""} + +ORIENT_SEX = {"sinan": ""} + +ORIGEM = { + "sim": "", + "sinan": "", + "sinasc": "", +} + +ORIGEM_PAC = {"sia": ""} + +ORTV1050 = {"cnes": ""} + +ORV50150 = {"cnes": ""} + +OSMOSE_R = {"cnes": ""} + +OSSEA = {"sinan": ""} + +OUTRAS = {"sinan": ""} + +OUTRAS_DES = {"sinan": ""} + +OUTRA_ATIV = {"sinan": ""} + +OUTRA_DST = {"sinan": ""} + +OUTRO = {"sinan": ""} + +OUTROANI = {"sinan": ""} + +OUTROS = {"sinan": ""} + +OUTROS_DES = {"sinan": ""} + +OUTROS_ESP = {"sinan": ""} + +OUTROS_M = {"sinan": ""} + +OUTROS_M_D = {"sinan": ""} + +OUTRO_ARV = {"sinan": ""} + +OUTRO_DES = {"sinan": ""} + +OUTRO_DOE = {"sinan": ""} + +OUTRO_ESP = {"sinan": ""} + +OUTRO_EX = {"sinan": ""} + +OUTRO_EXP = {"sinan": ""} + +OUTRO_S = {"sinan": ""} + +OUTRO_SIN = {"sinan": ""} + +OUTRO_S_D = {"sinan": ""} + +OUTR_ATI_D = {"sinan": ""} + +OUTR_D1 = {"sinan": ""} + +OUTR_D2 = {"sinan": ""} + +OUTR_D3 = {"sinan": ""} + +OUTR_R1 = {"sinan": ""} + +OUTR_R2 = {"sinan": ""} + +OUTR_R3 = {"sinan": ""} + +OUT_AGENTE = {"sinan": ""} + +OUT_AGRAVO = {"sinan": ""} + +OUT_ARV_ES = {"sinan": ""} + +OUT_CONTAT = {"sinan": ""} + +OUT_DOE_DE = {"sinan": ""} + +OUT_EXAME = {"sinan": ""} + +OUT_EXP_DE = {"sinan": ""} + +OUT_MEDIC = {"sinan": ""} + +OUT_TRAT = {"cnes": ""} + +OUT_VEZES = {"sinan": ""} + +OUT_VINCUL = {"sinan": ""} + +OUT_VIRUS = {"sinan": ""} + +OV150500 = {"cnes": ""} + +PAIS_EXP = {"sinan": ""} + +PALIDEZ = {"sinan": ""} + +PALQ_MAIOR = {"sinan": ""} + +PARALISIA = {"sinan": ""} + +PARASITA = {"sinan": ""} + +PARASITO = {"sinan": ""} + +PARESTESI = {"sinan": ""} + +PARIDADE = {"sinasc": ""} + +PARTO = { + "sim": "", + "sinasc": "", +} + +PART_CORP1 = {"sinan": ""} + +PART_CORP2 = {"sinan": ""} + +PART_CORP3 = {"sinan": ""} + +PAR_ANTIDU = {"sinan": ""} + +PAR_DT_PAR = {"sinan": ""} + +PAR_EVOLUC = {"sinan": ""} + +PAR_INICPR = {"sinan": ""} + +PAR_TIPO = {"sinan": ""} + +PAR_UFPART = {"sinan": ""} + +PA_ALTA = {"sia": ""} + +PA_AUTORIZ = {"sia": ""} + +PA_CATEND = {"sia": ""} + +PA_CBOCOD = {"sia": ""} + +PA_CID = {"sia": ""} + +PA_CIDCAS = {"sia": ""} + +PA_CIDPRI = {"sia": ""} + +PA_CIDSEC = {"sia": ""} + +PA_CLASS_S = {"sia": ""} + +PA_CMP = {"sia": ""} + +PA_CNPJCPF = {"sia": ""} + +PA_CNPJMNT = {"sia": ""} + +PA_CNPJ_CC = {"sia": ""} + +PA_CNSMED = {"sia": ""} + +PA_CODESP = {"sia": ""} + +PA_CODOCO = {"sia": ""} + +PA_CODPRO = {"sia": ""} + +PA_CODUNI = {"sia": ""} + +PA_CONDIC = {"sia": ""} + +PA_DATPR = {"sia": ""} + +PA_DATREF = {"sia": ""} + +PA_DES1 = {"sinan": ""} + +PA_DES2 = {"sinan": ""} + +PA_DES3 = {"sinan": ""} + +PA_DIF_VAL = {"sia": ""} + +PA_DOCORIG = {"sia": ""} + +PA_ENCERR = {"sia": ""} + +PA_EQUIPE = {"sia": ""} + +PA_ETNIA = {"sia": ""} + +PA_FLER = {"sia": ""} + +PA_FLIDADE = {"sia": ""} + +PA_FLQT = {"sia": ""} + +PA_FNTORC = {"sia": ""} + +PA_FXETAR = {"sia": ""} + +PA_GESTAO = {"sia": ""} + +PA_IDADE = {"sia": ""} + +PA_INCOUT = {"sia": ""} + +PA_INCURG = {"sia": ""} + +PA_INDICA = {"sia": ""} + +PA_INE = {"sia": ""} + +PA_MNDIF = {"sia": ""} + +PA_MN_IND = {"sia": ""} + +PA_MORFOL = {"sia": ""} + +PA_MOTSAI = {"sia": ""} + +PA_MUNAT = {"sia": ""} + +PA_MUNPCN = {"sia": ""} + +PA_MVM = {"sia": ""} + +PA_NAT_JUR = {"sia": ""} + +PA_NH = {"sia": ""} + +PA_NIVCPL = {"sia": ""} + +PA_NUMAPA = {"sia": ""} + +PA_OBITO = {"sia": ""} + +PA_PERMAN = {"sia": ""} + +PA_PROC_ID = {"sia": ""} + +PA_QTDAPR = {"sia": ""} + +PA_QTDPRO = {"sia": ""} + +PA_RACACOR = {"sia": ""} + +PA_RCB = {"sia": ""} + +PA_RCBDF = {"sia": ""} + +PA_REGCT = {"sia": ""} + +PA_SEXO = {"sia": ""} + +PA_SRV = {"sia": ""} + +PA_SRV_C = {"sia": ""} + +PA_SUBFIN = {"sia": ""} + +PA_TIPATE = {"sia": ""} + +PA_TIPPRE = {"sia": ""} + +PA_TIPPRO = {"sia": ""} + +PA_TPFIN = {"sia": ""} + +PA_TPUPS = {"sia": ""} + +PA_TP_EQP = {"sia": ""} + +PA_TRANSF = {"sia": ""} + +PA_UFDIF = {"sia": ""} + +PA_UFMUN = {"sia": ""} + +PA_VALAPR = {"sia": ""} + +PA_VALPRO = {"sia": ""} + +PA_VL_CF = {"sia": ""} + +PA_VL_CL = {"sia": ""} + +PA_VL_INC = {"sia": ""} + +PCRUZ = {"sinan": ""} + +PELE_INTEG = {"sinan": ""} + +PELE_NAO_I = {"sinan": ""} + +PEN_ANAL = {"sinan": ""} + +PEN_ORAL = {"sinan": ""} + +PEN_VAGINA = {"sinan": ""} + +PERCUTANEA = {"sinan": ""} + +PERFURA = {"sinan": ""} + +PERICARDI = {"sinan": ""} + +PERIODO = {"sinan": ""} + +PERMANEN = {"sia": ""} + +PES = {"sinan": ""} + +PESCOU_N = {"sinan": ""} + +PESO = { + "sim": "", + "sinan": "", + "sinasc": "", +} + +PESONASC = {"sim": ""} + +PETEQUIAS = {"sinan": ""} + +PETEQUIA_N = {"sinan": ""} + +PF_PJ = {"cnes": ""} + +PIRAZINAMI = {"sinan": ""} + +PLANJ_RD = {"cnes": ""} + +PLAQ_MENOR = {"sinan": ""} + +PLASMATICO = {"sinan": ""} + +PLEURAL = {"sinan": ""} + +PMALARIA = {"sinan": ""} + +PMM = {"sinan": ""} + +POEIRAS = {"sinan": ""} + +POE_ABRASI = {"sinan": ""} + +POE_MISTA = {"sinan": ""} + +POE_ORGANI = {"sinan": ""} + +POLIADENO = {"sinan": ""} + +POP = {"pni": ""} + +POPALFAB = {"ibge": ""} + +POPDEPEND = {"ibge": ""} + +POPGERAL = {"cnes": ""} + +POPNALFAB = {"ibge": ""} + +POPTOT = {"ibge": ""} + +POPULACAO = {"ibge": ""} + +POP_IMIG = {"sinan": ""} + +POP_LIBER = {"sinan": ""} + +POP_RUA = {"sinan": ""} + +POP_SAUDE = {"sinan": ""} + +PORTARIA = {"cnes": ""} + +POS_EXPOS = {"sinan": ""} + +PREFIXODN = {"sinasc": ""} + +PREMIOS = {"sinan": ""} + +PRESENCA = {"sinan": ""} + +PRE_ANTRET = {"sinan": ""} + +PRE_DT_RET = {"sinan": ""} + +PRE_EXPOS = {"sinan": ""} + +PRE_MUNIPA = {"sinan": ""} + +PRE_MUNIRE = {"sinan": ""} + +PRE_NATAL = {"sinasc": ""} + +PRE_PRENAT = {"sinan": ""} + +PRE_UFREL = {"sinan": ""} + +PRIMAQ = {"sinan": ""} + +PROC_ABORT = {"sinan": ""} + +PROC_CONTR = {"sinan": ""} + +PROC_DST = {"sinan": ""} + +PROC_HEPB = {"sinan": ""} + +PROC_HIV = {"sinan": ""} + +PROC_ID = {"sia": ""} + +PROC_REA = { + "ciha": "", + "sih": "", +} + +PROC_SANG = {"sinan": ""} + +PROC_SEMEN = {"sinan": ""} + +PROC_SOLIC = {"sih": ""} + +PROC_VAGIN = {"sinan": ""} + +PROFNSUS = {"cnes": ""} + +PROFUNDO = {"sinan": ""} + +PROF_SUS = {"cnes": ""} + +PRONASCI = {"cnes": ""} + +PROSTACAO = {"sinan": ""} + +PROVA_BIOL = {"sinan": ""} + +PSICO_FARM = {"sinan": ""} + +PTRANSFU = {"sinan": ""} + +PULSO = {"sinan": ""} + +PURPURA = {"sinan": ""} + +PUSUARIO = {"sinan": ""} + +P_ATIVO_1 = {"sinan": ""} + +P_ATIVO_2 = {"sinan": ""} + +P_ATIVO_3 = {"sinan": ""} + +QTDATE = {"sia": ""} + +QTDFILMORT = { + "sim": "", + "sinasc": "", +} + +QTDFILVIVO = { + "sim": "", + "sinasc": "", +} + +QTDGESTANT = {"sinasc": ""} + +QTDPARTCES = {"sinasc": ""} + +QTDPARTNOR = {"sinasc": ""} + +QTDPCN = {"sia": ""} + +QTINST01 = {"cnes": ""} + +QTINST02 = {"cnes": ""} + +QTINST03 = {"cnes": ""} + +QTINST04 = {"cnes": ""} + +QTINST05 = {"cnes": ""} + +QTINST06 = {"cnes": ""} + +QTINST07 = {"cnes": ""} + +QTINST08 = {"cnes": ""} + +QTINST09 = {"cnes": ""} + +QTINST10 = {"cnes": ""} + +QTINST11 = {"cnes": ""} + +QTINST12 = {"cnes": ""} + +QTINST13 = {"cnes": ""} + +QTINST14 = {"cnes": ""} + +QTINST15 = {"cnes": ""} + +QTINST16 = {"cnes": ""} + +QTINST17 = {"cnes": ""} + +QTINST18 = {"cnes": ""} + +QTINST19 = {"cnes": ""} + +QTINST20 = {"cnes": ""} + +QTINST21 = {"cnes": ""} + +QTINST22 = {"cnes": ""} + +QTINST23 = {"cnes": ""} + +QTINST24 = {"cnes": ""} + +QTINST25 = {"cnes": ""} + +QTINST26 = {"cnes": ""} + +QTINST27 = {"cnes": ""} + +QTINST28 = {"cnes": ""} + +QTINST29 = {"cnes": ""} + +QTINST30 = {"cnes": ""} + +QTINST31 = {"cnes": ""} + +QTINST32 = {"cnes": ""} + +QTINST33 = {"cnes": ""} + +QTINST34 = {"cnes": ""} + +QTINST35 = {"cnes": ""} + +QTINST36 = {"cnes": ""} + +QTINST37 = {"cnes": ""} + +QTLEIT05 = {"cnes": ""} + +QTLEIT06 = {"cnes": ""} + +QTLEIT07 = {"cnes": ""} + +QTLEIT08 = {"cnes": ""} + +QTLEIT09 = {"cnes": ""} + +QTLEIT19 = {"cnes": ""} + +QTLEIT20 = {"cnes": ""} + +QTLEIT21 = {"cnes": ""} + +QTLEIT22 = {"cnes": ""} + +QTLEIT23 = {"cnes": ""} + +QTLEIT32 = {"cnes": ""} + +QTLEIT34 = {"cnes": ""} + +QTLEIT38 = {"cnes": ""} + +QTLEIT39 = {"cnes": ""} + +QTLEIT40 = {"cnes": ""} + +QTLEITP1 = {"cnes": ""} + +QTLEITP2 = {"cnes": ""} + +QTLEITP3 = {"cnes": ""} + +QT_AGIPL = {"cnes": ""} + +QT_AGLTN = {"cnes": ""} + +QT_APRES = {"sia": ""} + +QT_APROV = {"sia": ""} + +QT_CADRE = {"cnes": ""} + +QT_CAPFL = {"cnes": ""} + +QT_CENRE = {"cnes": ""} + +QT_CONRA = {"cnes": ""} + +QT_CONTR = {"cnes": ""} + +QT_DIARIAS = {"sih": ""} + +QT_DOSE = {"pni": ""} + +QT_EXIST = {"cnes": ""} + +QT_EXTPL = {"cnes": ""} + +QT_FRE18 = {"cnes": ""} + +QT_FRE30 = {"cnes": ""} + +QT_IRRHE = {"cnes": ""} + +QT_MAQAF = {"cnes": ""} + +QT_NSUS = {"cnes": ""} + +QT_PROC = {"ciha": ""} + +QT_REFAS = {"cnes": ""} + +QT_REFRE = {"cnes": ""} + +QT_REFSA = {"cnes": ""} + +QT_SELAD = {"cnes": ""} + +QT_SUS = {"cnes": ""} + +QT_TOTAL_C = {"sinan": ""} + +QT_USO = {"cnes": ""} + +QUANTID = {"sinan": ""} + +QUANTOS = {"sinan": ""} + +QUAN_COMUN = {"sinan": ""} + +QUAN_POSIT = {"sinan": ""} + +QUILOMBO = {"cnes": ""} + +QUIMRADI = {"cnes": ""} + +QUININO = {"sinan": ""} + +QUININOI = {"sinan": ""} + +QUINOLONA = {"sinan": ""} + +RACACOR = { + "sia": "", + "sim": "", + "sinasc": "", +} + +RACACORMAE = {"sinasc": ""} + +RACACORN = {"sinasc": ""} + +RACACOR_RN = {"sinasc": ""} + +RACA_COR = {"sih": ""} + +RACA_MAE = {"sinan": ""} + +RACCOR = {"sinasc": ""} + +RAIOX = {"sinan": ""} + +RAIOX_TORA = {"sinan": ""} + +RAI_RESULT = {"sinan": ""} + +RAZAO = {"sih": ""} + +REACAO_SOR = {"sinan": ""} + +REACAO_VAC = {"sinan": ""} + +RECEMNASC = {"sinan": ""} + +RECEM_NASC = {"sinan": ""} + +RECUSA_QUI = {"sinan": ""} + +REDE_EDUCA = {"sinan": ""} + +REDE_SAU = {"sinan": ""} + +REFR_AQD_N = {"sinan": ""} + +REFR_AQE_N = {"sinan": ""} + +REFR_BID_N = {"sinan": ""} + +REFR_BIE_N = {"sinan": ""} + +REFR_PAD_N = {"sinan": ""} + +REFR_PAE_N = {"sinan": ""} + +REFR_TRD_N = {"sinan": ""} + +REFR_TRE_N = {"sinan": ""} + +REGCT = {"sih": ""} + +REGIME = {"sinan": ""} + +REGISTRO = { + "cnes": "", + "sim": "", +} + +REGSAUDE = {"cnes": ""} + +REL_CAT = {"sinan": ""} + +REL_CONHEC = {"sinan": ""} + +REL_CONJ = {"sinan": ""} + +REL_CUIDA = {"sinan": ""} + +REL_DESCO = {"sinan": ""} + +REL_ESPEC = {"sinan": ""} + +REL_EXCON = {"sinan": ""} + +REL_EXNAM = {"sinan": ""} + +REL_FILHO = {"sinan": ""} + +REL_INST = {"sinan": ""} + +REL_IRMAO = {"sinan": ""} + +REL_MAD = {"sinan": ""} + +REL_MAE = {"sinan": ""} + +REL_NAMO = {"sinan": ""} + +REL_OUTROS = {"sinan": ""} + +REL_PAD = {"sinan": ""} + +REL_PAI = {"sinan": ""} + +REL_PATRAO = {"sinan": ""} + +REL_POL = {"sinan": ""} + +REL_PROPRI = {"sinan": ""} + +REL_SEXUAL = {"sinan": ""} + +REL_TRAB = {"sinan": ""} + +REMESSA = {"sih": ""} + +RENAL = {"sinan": ""} + +REPETITIVO = {"sinan": ""} + +RESALIM1 = {"sinan": ""} + +RESALIMOUT = {"sinan": ""} + +RESPIRATO = {"sinan": ""} + +RESULT = {"sinan": ""} + +RESUL_HIS = {"sinan": ""} + +RESUL_NS1 = {"sinan": ""} + +RESUL_OUT = {"sinan": ""} + +RESUL_PCR = {"sinan": ""} + +RESUL_PCR_ = {"sinan": ""} + +RESUL_PRNT = {"sinan": ""} + +RESUL_SORO = {"sinan": ""} + +RESUL_VIRA = {"sinan": ""} + +RESUL_VI_N = {"sinan": ""} + +RES_BIOL = {"cnes": ""} + +RES_CHIKS1 = {"sinan": ""} + +RES_CHIKS2 = {"sinan": ""} + +RES_COMU = {"cnes": ""} + +RES_HBSAG = {"sinan": ""} + +RES_HIST = {"sinan": ""} + +RES_IMUNO = {"sinan": ""} + +RES_ISOL = {"sinan": ""} + +RES_PCR = {"sinan": ""} + +RES_QUIM = {"cnes": ""} + +RES_RADI = {"cnes": ""} + +RETAR_PM = {"sinan": ""} + +RETENCAO = {"cnes": ""} + +RETINOPA = {"sinan": ""} + +RE_ANTIHBC = {"sinan": ""} + +RE_ANTIHCV = {"sinan": ""} + +RIFAMPICIN = {"sinan": ""} + +ROEDOR_N = {"sinan": ""} + +ROTA_R = {"sinan": ""} + +RUBRICA = {"sih": ""} + +RUIDO_OUT = {"sinan": ""} + +RUI_OUTDES = {"sinan": ""} + +S1_IGG = {"sinan": ""} + +S1_IGM = {"sinan": ""} + +S1_TIT1 = {"sinan": ""} + +S2_IGG = {"sinan": ""} + +S2_IGM = {"sinan": ""} + +S2_TIT1 = {"sinan": ""} + +S3_IGG = {"sinan": ""} + +S3_IGM = {"sinan": ""} + +SALA_MOL = {"cnes": ""} + +SANG = {"sinan": ""} + +SANGRAM = {"sinan": ""} + +SANGUE = {"sinan": ""} + +SEMAGESTAC = { + "sim": "", + "sinasc": "", +} + +SEMANGEST = {"sim": ""} + +SEMIPLEN = {"sih": ""} + +SEM_ACID = {"sinan": ""} + +SEM_DIAG = {"sinan": ""} + +SEM_NOT = {"sinan": ""} + +SEM_PRI = {"sinan": ""} + +SEM_QUIMIO = {"sinan": ""} + +SENSIBILI = {"sinan": ""} + +SEQUENCIA = {"sih": ""} + +SEQ_AIH5 = {"sih": ""} + +SERAP01P = {"cnes": ""} + +SERAP01T = {"cnes": ""} + +SERAP02P = {"cnes": ""} + +SERAP02T = {"cnes": ""} + +SERAP03P = {"cnes": ""} + +SERAP03T = {"cnes": ""} + +SERAP04P = {"cnes": ""} + +SERAP04T = {"cnes": ""} + +SERAP05P = {"cnes": ""} + +SERAP05T = {"cnes": ""} + +SERAP06P = {"cnes": ""} + +SERAP06T = {"cnes": ""} + +SERAP07P = {"cnes": ""} + +SERAP07T = {"cnes": ""} + +SERAP08P = {"cnes": ""} + +SERAP08T = {"cnes": ""} + +SERAP09P = {"cnes": ""} + +SERAP09T = {"cnes": ""} + +SERAP10P = {"cnes": ""} + +SERAP10T = {"cnes": ""} + +SERAP11P = {"cnes": ""} + +SERAP11T = {"cnes": ""} + +SERAPOIO = {"cnes": ""} + +SERIESCFAL = {"sim": ""} + +SERIESCMAE = { + "sim": "", + "sinasc": "", +} + +SERV_CLA = {"sih": ""} + +SERV_ESP = {"cnes": ""} + +SEXO = { + "ciha": "", + "ibge": "", + "sih": "", + "sim": "", + "sinasc": "", +} + +SEXOPAC = {"sia": ""} + +SEXUAL = {"sinan": ""} + +SEX_ASSEDI = {"sinan": ""} + +SEX_ESPEC = {"sinan": ""} + +SEX_ESTUPR = {"sinan": ""} + +SEX_EXPLO = {"sinan": ""} + +SEX_OUTRO = {"sinan": ""} + +SEX_PORNO = {"sinan": ""} + +SEX_PUDOR = {"sinan": ""} + +SGRUPHAB = {"cnes": ""} + +SG_UF = {"sinan": ""} + +SG_UF_2 = {"sinan": ""} + +SG_UF_AT = {"sinan": ""} + +SG_UF_INTE = {"sinan": ""} + +SG_UF_NOT = {"sinan": ""} + +SG_UF_OCOR = {"sinan": ""} + +SILICA = {"sinan": ""} + +SIMUL_RD = {"cnes": ""} + +SINAIS = {"sinan": ""} + +SINAIS_ICC = {"sinan": ""} + +SINTOMATIC = {"sinan": ""} + +SINTO_DES = {"sinan": ""} + +SIN_GANG = {"sinan": ""} + +SIN_OUT = {"sinan": ""} + +SIN_OUTR_E = {"sinan": ""} + +SIN_PULM = {"sinan": ""} + +SIS_JUST = {"sih": ""} + +SITUACAO = {"ibge": ""} + +SITUA_12_M = {"sinan": ""} + +SITUA_9_M = {"sinan": ""} + +SITUA_ENCE = {"sinan": ""} + +SIT_CONJUG = {"sinan": ""} + +SIT_RUA = {"sia": ""} + +SIT_TRAB = {"sinan": ""} + +SOLVENTE = {"sinan": ""} + +SORO1 = {"sinan": ""} + +SORO2 = {"sinan": ""} + +SOROTIPO = {"sinan": ""} + +SOUTROS = {"sinan": ""} + +SP_AA = {"sih": ""} + +SP_ATOPROF = {"sih": ""} + +SP_CGCHOSP = {"sih": ""} + +SP_CIDPRI = {"sih": ""} + +SP_CIDSEC = {"sih": ""} + +SP_CNES = {"sih": ""} + +SP_COMPLEX = {"sih": ""} + +SP_CO_FAEC = {"sih": ""} + +SP_CPFCGC = {"sih": ""} + +SP_DES_HOS = {"sih": ""} + +SP_DES_PAC = {"sih": ""} + +SP_DTINTER = {"sih": ""} + +SP_DTSAIDA = {"sih": ""} + +SP_FINANC = {"sih": ""} + +SP_GESTOR = {"sih": ""} + +SP_MM = {"sih": ""} + +SP_M_HOSP = {"sih": ""} + +SP_M_PAC = {"sih": ""} + +SP_NAIH = {"sih": ""} + +SP_NF = {"sih": ""} + +SP_NUM_PR = {"sih": ""} + +SP_PF_CBO = {"sih": ""} + +SP_PF_DOC = {"sih": ""} + +SP_PJ_DOC = {"sih": ""} + +SP_PROCREA = {"sih": ""} + +SP_PTSP = {"sih": ""} + +SP_PTSP_NF = {"sih": ""} + +SP_QTD_ATO = {"sih": ""} + +SP_QT_PROC = {"sih": ""} + +SP_TIPO = {"sih": ""} + +SP_TP_ATO = {"sih": ""} + +SP_UF = {"sih": ""} + +SP_U_AIH = {"sih": ""} + +SP_VALATO = {"sih": ""} + +SRVUNICO = {"cnes": ""} + +STALIMENTO = {"sinan": ""} + +STANTIBIO = {"sinan": ""} + +STANTIBOTU = {"sinan": ""} + +STAVALIA = {"sinan": ""} + +STBOCA = {"sinan": ""} + +STBROMATO = {"sinan": ""} + +STBULBAR = {"sinan": ""} + +STCARDIACA = {"sinan": ""} + +STCASEIRA = {"sinan": ""} + +STCEFALEIA = {"sinan": ""} + +STCESPARTO = {"sinasc": ""} + +STCLINICA = {"sinan": ""} + +STCODIFICA = {"sim": ""} + +STCOMA = {"sinan": ""} + +STCOMERCIO = {"sinan": ""} + +STCONSTIPA = {"sinan": ""} + +STCURA1 = {"sinan": ""} + +STCURA2 = {"sinan": ""} + +STCURA3 = {"sinan": ""} + +STDESCENDE = {"sinan": ""} + +STDIARREIA = {"sinan": ""} + +STDIPLOPIA = {"sinan": ""} + +STDISARTRI = {"sinan": ""} + +STDISFAGIA = {"sinan": ""} + +STDISFONIA = {"sinan": ""} + +STDISPNEIA = {"sinan": ""} + +STDNEPIDEM = {"sinasc": ""} + +STDNNOVA = {"sinasc": ""} + +STDOEPIDEM = {"sim": ""} + +STDOMICILI = {"sinan": ""} + +STDONOVA = {"sim": ""} + +STELETRO = {"sinan": ""} + +STESCOLA = {"sinan": ""} + +STEXPALIM = {"sinan": ""} + +STFACIAL = {"sinan": ""} + +STFEBRE = {"sinan": ""} + +STFERIMENT = {"sinan": ""} + +STFESTA = {"sinan": ""} + +STFEZESMAT = {"sinan": ""} + +STFEZESRES = {"sinan": ""} + +STFLACIDEZ = {"sinan": ""} + +STHOSPITAL = {"sinan": ""} + +STMEMINF = {"sinan": ""} + +STMEMSUP = {"sinan": ""} + +STMIDRIASE = {"sinan": ""} + +STNAUSEA = {"sinan": ""} + +STOFTALMO = {"sinan": ""} + +STOUTROLOC = {"sinan": ""} + +STOUTROSIN = {"sinan": ""} + +STOUTROTRA = {"sinan": ""} + +STPARESTES = {"sinan": ""} + +STPTOSE = {"sinan": ""} + +STRESPIRA = {"sinan": ""} + +STRESS = {"sinan": ""} + +STRESTAURA = {"sinan": ""} + +STRESULTA = {"sinan": ""} + +STSENSIVEL = {"sinan": ""} + +STSIMETRIC = {"sinan": ""} + +STSORO = {"sinan": ""} + +STSOROMAT = {"sinan": ""} + +STSORORES = {"sinan": ""} + +STTONTURA = {"sinan": ""} + +STTRABALHO = {"sinan": ""} + +STTRABPART = {"sinasc": ""} + +STVENTILA = {"sinan": ""} + +STVISAO = {"sinan": ""} + +STVOMITO = {"sinan": ""} + +ST_ALI1COL = {"sinan": ""} + +ST_ALI2COL = {"sinan": ""} + +ST_ALI2RES = {"sinan": ""} + +ST_ALIMEN = {"sinan": ""} + +ST_A_CLINI = {"sinan": ""} + +ST_BLOQ = {"sih": ""} + +ST_F_OUTRO = {"sinan": ""} + +ST_IMPRO = {"sinan": ""} + +ST_IMPRO_ = {"sinan": ""} + +ST_INAD = {"sinan": ""} + +ST_INCUB_M = {"sinan": ""} + +ST_INC_ME = {"sinan": ""} + +ST_MANIP = {"sinan": ""} + +ST_MOT_BLO = {"sih": ""} + +ST_SITUAC = {"sih": ""} + +SUBFIN = {"sia": ""} + +SUDORESE = {"sinan": ""} + +SUGE_VINCU = {"sinan": ""} + +SULFA = {"sinan": ""} + +SUPERFICIA = {"sinan": ""} + +SUPERIORES = {"sinan": ""} + +SURTO = {"sinan": ""} + +SUSPEITOS = {"sinan": ""} + +S_ACELL6 = {"cnes": ""} + +S_AFERES = {"cnes": ""} + +S_ALCOME = {"cnes": ""} + +S_ALSEME = {"cnes": ""} + +S_ARMAZE = {"cnes": ""} + +S_BIOMOL = {"cnes": ""} + +S_COLETA = {"cnes": ""} + +S_CONTRQ = {"cnes": ""} + +S_CPFLUX = {"cnes": ""} + +S_DISTRI = {"cnes": ""} + +S_DPAC = {"cnes": ""} + +S_DPI = {"cnes": ""} + +S_ESTOQU = {"cnes": ""} + +S_HBSAGN = {"cnes": ""} + +S_HBSAGP = {"cnes": ""} + +S_HEMOST = {"cnes": ""} + +S_IMUNFE = {"cnes": ""} + +S_IMUNOH = {"cnes": ""} + +S_PREEST = {"cnes": ""} + +S_PREPAR = {"cnes": ""} + +S_PRETRA = {"cnes": ""} + +S_PROCES = {"cnes": ""} + +S_QCDURA = {"cnes": ""} + +S_QLDURA = {"cnes": ""} + +S_REAGN = {"cnes": ""} + +S_REAGP = {"cnes": ""} + +S_RECEPC = {"cnes": ""} + +S_REHCV = {"cnes": ""} + +S_SGDOAD = {"cnes": ""} + +S_SIMULA = {"cnes": ""} + +S_SOROLO = {"cnes": ""} + +S_TRANSF = {"cnes": ""} + +S_TRICLI = {"cnes": ""} + +S_TRIHMT = {"cnes": ""} + +TAREFAS = {"sinan": ""} + +TATU_PIER = {"sinan": ""} + +TECIDOS = {"sinan": ""} + +TECNICA = {"sinan": ""} + +TEMPO = {"sinan": ""} + +TEMPO_FUMA = {"sinan": ""} + +TERCEIRIZA = {"sinan": ""} + +TERCEIRO = {"cnes": ""} + +TESTE_TUBE = {"sinan": ""} + +TEST_MOLEC = {"sinan": ""} + +TEST_SENSI = {"sinan": ""} + +TETRAC = {"sinan": ""} + +TIFICA = {"sinan": ""} + +TIPEQUIP = {"cnes": ""} + +TIPOACID = {"sim": ""} + +TIPOBITO = {"sim": ""} + +TIPOGRAV = {"sim": ""} + +TIPOPARTO = {"sim": ""} + +TIPOSEGM = {"cnes": ""} + +TIPOVIOL = {"sim": ""} + +TIPO_ACID = {"sinan": ""} + +TIPO_EQP = {"cnes": ""} + +TIPO_GRAV = {"sinasc": ""} + +TIPO_INVES = {"sinan": ""} + +TIPO_LEITE = {"sinan": ""} + +TIPO_PARTO = {"sinasc": ""} + +TIPPRE = {"sia": ""} + +TIPPRE = {"sia": ""} + +TIP_DIARRE = {"sinan": ""} + +TIP_SORO = {"sinan": ""} + +TIREOIDITE = {"sinan": ""} + +TIT_IGG_S1 = {"sinan": ""} + +TIT_IGG_S2 = {"sinan": ""} + +TIT_IGM_S1 = {"sinan": ""} + +TIT_IGM_S2 = {"sinan": ""} + +TOMOGRAFIA = {"sinan": ""} + +TONR_CER_N = {"sinan": ""} + +TONR_FAC_N = {"sinan": ""} + +TONR_MID_N = {"sinan": ""} + +TONR_MIE_N = {"sinan": ""} + +TONR_MSD_N = {"sinan": ""} + +TONR_MSE_N = {"sinan": ""} + +TONTURA = {"sinan": ""} + +TOSSE = {"sinan": ""} + +TOT_PT_SP = {"sih": ""} + +TPALTA_N = {"sinan": ""} + +TPAPRESENT = {"sinasc": ""} + +TPASSINA = {"sim": ""} + +TPATENDE = {"sinan": ""} + +TPAUTOCTO = {"sinan": ""} + +TPBOTULISM = {"sinan": ""} + +TPBROMATO = {"sinan": ""} + +TPCLINICA = {"sinan": ""} + +TPCONFIRMA = {"sinan": ""} + +TPDISEC1 = {"sih": ""} + +TPDISEC2 = {"sih": ""} + +TPDISEC3 = {"sih": ""} + +TPDISEC4 = {"sih": ""} + +TPDISEC5 = {"sih": ""} + +TPDISEC6 = {"sih": ""} + +TPDISEC7 = {"sih": ""} + +TPDISEC8 = {"sih": ""} + +TPDISEC9 = {"sih": ""} + +TPDOCRESP = {"sinasc": ""} + +TPESQPAR = {"sinan": ""} + +TPESQUEMA = {"sinan": ""} + +TPEVIDENCI = {"sinan": ""} + +TPEXANTE = {"sinan": ""} + +TPEXP = {"sinan": ""} + +TPFEZESTOX = {"sinan": ""} + +TPFIN = {"sia": ""} + +TPFUNCRESP = {"sinasc": ""} + +TPGESTAO = {"cnes": ""} + +TPIDADEPAC = {"sia": ""} + +TPMETESTIM = {"sinasc": ""} + +TPMORTEOCO = {"sim": ""} + +TPMOTPARC = {"sinan": ""} + +TPNASCASSI = {"sinasc": ""} + +TPNEURO = {"sinan": ""} + +TPNIVELINV = {"sim": ""} + +TPOBITOCOR = {"sim": ""} + +TPPOS = {"sim": ""} + +TPRAPIDO1 = {"sinan": ""} + +TPRAPIDO2 = {"sinan": ""} + +TPRAPIDO3 = {"sinan": ""} + +TPRESGINFO = {"sim": ""} + +TPROBSON = {"sinasc": ""} + +TPRUIDO = {"sinan": ""} + +TPSOROTOX = {"sinan": ""} + +TPTEMPO = {"sinan": ""} + +TPTEMPORIS = {"sinan": ""} + +TPTESTE1 = {"sinan": ""} + +TPUNINOT = {"sinan": ""} + +TPUPS = {"sia": ""} + +TP_ACIDENT = {"sinan": ""} + +TP_AFAST = {"sinan": ""} + +TP_ALI1TOX = {"sinan": ""} + +TP_ALI2TO = {"sinan": ""} + +TP_AMB_OCO = {"sinan": ""} + +TP_ANALISE = {"sinan": ""} + +TP_CAUSA = {"sinan": ""} + +TP_CAUSOUT = {"sinan": ""} + +TP_COLOUT = {"sinan": ""} + +TP_DESAT = {"cnes": ""} + +TP_DROGA = {"sia": ""} + +TP_IDENTFI = {"sinan": ""} + +TP_INDIRET = {"sinan": ""} + +TP_LEITO = {"cnes": ""} + +TP_LIQUOR = {"sinan": ""} + +TP_LOCAL = {"sinan": ""} + +TP_LOCALLE = {"sinan": ""} + +TP_MOTORA = {"sinan": ""} + +TP_NOT = {"sinan": ""} + +TP_ORIGEM = {"sinan": ""} + +TP_PREST = {"cnes": ""} + +TP_PROFILA = {"sinan": ""} + +TP_PRO_PRE = {"sinan": ""} + +TP_REPETE = {"sinan": ""} + +TP_SENSITI = {"sinan": ""} + +TP_SISTEMA = {"sinan": ""} + +TP_SOROHCV = {"sinan": ""} + +TP_TEMP_FU = {"sinan": ""} + +TP_TOXOUTR = {"sinan": ""} + +TP_UNID = {"cnes": ""} + +TP_VACINA = {"sinan": ""} + +TP_ZN_OCO = {"sinan": ""} + +TRAB_DESC = {"sinan": ""} + +TRAB_DOE = {"sinan": ""} + +TRANSF = {"sinan": ""} + +TRANSFU = {"sinan": ""} + +TRANSFUSAO = {"sinan": ""} + +TRANSPLA = {"sinan": ""} + +TRANSPO_N = {"sinan": ""} + +TRAN_COMP = {"sinan": ""} + +TRAN_MENT = {"sinan": ""} + +TRATADO = {"sinan": ""} + +TRATAM = {"sinan": ""} + +TRATAMENTO = {"sinan": ""} + +TRATANAO = {"sinan": ""} + +TRATPARC = {"sinan": ""} + +TRATSUP_AT = {"sinan": ""} + +TRAT_ATUAL = {"sinan": ""} + +TRAT_SUPER = {"sinan": ""} + +TRA_AMPOLA = {"sinan": ""} + +TRA_ANTIBI = {"sinan": ""} + +TRA_ANTIGO = {"sinan": ""} + +TRA_ANTIVI = {"sinan": ""} + +TRA_CLASSI = {"sinan": ""} + +TRA_CORTIC = {"sinan": ""} + +TRA_CPAP = {"sinan": ""} + +TRA_DATA_A = {"sinan": ""} + +TRA_DATA_S = {"sinan": ""} + +TRA_DIAG_C = {"sinan": ""} + +TRA_DIAG_T = {"sinan": ""} + +TRA_DOSE = {"sinan": ""} + +TRA_DROGA_ = {"sinan": ""} + +TRA_DT = {"sinan": ""} + +TRA_DT_ALT = {"sinan": ""} + +TRA_DT_INT = {"sinan": ""} + +TRA_ESPECI = {"sinan": ""} + +TRA_ESQUEM = {"sinan": ""} + +TRA_ESQU_1 = {"sinan": ""} + +TRA_HOSP = {"sinan": ""} + +TRA_INDI_N = {"sinan": ""} + +TRA_INFILT = {"sinan": ""} + +TRA_INFI_1 = {"sinan": ""} + +TRA_INTERR = {"sinan": ""} + +TRA_MECANI = {"sinan": ""} + +TRA_MOTIVO = {"sinan": ""} + +TRA_MUNICI = {"sinan": ""} + +TRA_NUM_PA = {"sinan": ""} + +TRA_OUTRA_ = {"sinan": ""} + +TRA_OUTR_N = {"sinan": ""} + +TRA_PESO = {"sinan": ""} + +TRA_QTD_SO = {"sinan": ""} + +TRA_SORO = {"sinan": ""} + +TRA_TRATAM = {"sinan": ""} + +TRA_UF = {"sinan": ""} + +TRA_VASOAT = {"sinan": ""} + +TREINA_MIL = {"sinan": ""} + +TRESMAIS = {"sinan": ""} + +TRONCO = {"sinan": ""} + +TUBE = {"sinan": ""} + +TURNO_AT = {"cnes": ""} + +T_FEBRE = {"sinan": ""} + +UF = { + "pni": "", + "sinan": "", +} + +UFATUAL = {"sinan": ""} + +UFCOD = {"ibge": ""} + +UFDIF = {"sia": ""} + +UFINFORM = { + "sim": "", + "sinasc": "", +} + +UFINTERNA = {"sinan": ""} + +UFMUN = {"sia": ""} + +UFMUNRES = {"cnes": ""} + +UFRESAT = {"sinan": ""} + +UFTRANSFU = {"sinan": ""} + +UF_ACID = {"sinan": ""} + +UF_ATENDE = {"sinan": ""} + +UF_EMP = {"sinan": ""} + +UF_H = {"sinan": ""} + +UF_HOSP = {"sinan": ""} + +UF_HOSPITA = {"sinan": ""} + +UF_ING = {"sinan": ""} + +UF_PRE_NAT = {"sinan": ""} + +UF_RES = {"sih": ""} + +UF_TRANSF = {"sinan": ""} + +UF_ZI = {"sih": ""} + +UNI_ATENDE = {"sinan": ""} + +UN_COBAL = {"cnes": ""} + +URGEMERG = {"cnes": ""} + +URINA = {"sinan": ""} + +URO_D = {"sinan": ""} + +URO_D_2 = {"sinan": ""} + +URO_D_3 = {"sinan": ""} + +URO_R1 = {"sinan": ""} + +URO_R2 = {"sinan": ""} + +URO_R3 = {"sinan": ""} + +US_ORTP = {"sih": ""} + +US_RN = {"sih": ""} + +US_SADT = {"sih": ""} + +US_SANGUE = {"sih": ""} + +US_SH = {"sih": ""} + +US_SP = {"sih": ""} + +US_TOT = {"sih": ""} + +UTILIZACAO = {"sinan": ""} + +UTIL_DESC = {"sinan": ""} + +UTI_INT_AL = {"sih": ""} + +UTI_INT_AN = {"sih": ""} + +UTI_INT_IN = {"sih": ""} + +UTI_INT_TO = { + "ciha": "", + "sih": "", +} + +UTI_MES_AL = {"sih": ""} + +UTI_MES_AN = {"sih": ""} + +UTI_MES_IN = {"sih": ""} + +UTI_MES_TO = { + "ciha": "", + "sih": "", +} + +UTI_TOTAL = {"sih": ""} + +UTRANSFU = {"sinan": ""} + +VACINA = {"sinan": ""} + +VACINACAO = {"sinan": ""} + +VACINAD = {"sinan": ""} + +VACINADO = {"sinan": ""} + +VACINADUPL = {"sinan": ""} + +VACINARUBE = {"sinan": ""} + +VAC_HEP_B = {"sinan": ""} + +VAL_ACOMP = {"sih": ""} + +VAL_OBSANG = {"sih": ""} + +VAL_ORTP = {"sih": ""} + +VAL_PED1AC = {"sih": ""} + +VAL_RN = {"sih": ""} + +VAL_SADT = {"sih": ""} + +VAL_SADTSR = {"sih": ""} + +VAL_SANG = {"sih": ""} + +VAL_SANGUE = {"sih": ""} + +VAL_SH = {"sih": ""} + +VAL_SH_FED = {"sih": ""} + +VAL_SH_GES = {"sih": ""} + +VAL_SP = {"sih": ""} + +VAL_SP_FED = {"sih": ""} + +VAL_SP_GES = {"sih": ""} + +VAL_TOT = {"sih": ""} + +VAL_TRANSP = {"sih": ""} + +VAL_UCI = {"sih": ""} + +VAL_UTI = {"sih": ""} + +VARIA_VIR = {"sinan": ""} + +VERSAOSCB = {"sim": ""} + +VERSAOSIST = { + "sim": "", + "sinasc": "", +} + +VIA_1 = {"sinan": ""} + +VIA_2 = {"sinan": ""} + +VIA_3 = {"sinan": ""} + +VINCPREV = {"sih": ""} + +VINCULAC = {"cnes": ""} + +VINCULO = {"sinan": ""} + +VINCUL_A = {"cnes": ""} + +VINCUL_C = {"cnes": ""} + +VINCUL_N = {"cnes": ""} + +VINC_ESP = {"sinan": ""} + +VINC_OUT = {"sinan": ""} + +VINC_SUS = {"cnes": ""} + +VIOL_ESPEC = {"sinan": ""} + +VIOL_FINAN = {"sinan": ""} + +VIOL_FISIC = {"sinan": ""} + +VIOL_INFAN = {"sinan": ""} + +VIOL_LEGAL = {"sinan": ""} + +VIOL_MOTIV = {"sinan": ""} + +VIOL_NEGLI = {"sinan": ""} + +VIOL_OUTR = {"sinan": ""} + +VIOL_PSICO = {"sinan": ""} + +VIOL_SEXU = {"sinan": ""} + +VIOL_TORT = {"sinan": ""} + +VIOL_TRAF = {"sinan": ""} + +VL_APRES = {"sia": ""} + +VL_APROV = {"sia": ""} + +VOMITO = {"sinan": ""} + +VOMITOS = {"sinan": ""} + +VOP_VORH = {"sinan": ""} + +XENODIAG = {"sinan": ""} + +ZONA = {"sinan": ""} + +ZUMBIDO = {"sinan": ""} + +agravaids = {"sinan": ""} + +agravalcoo = {"sinan": ""} + +agravdiabe = {"sinan": ""} + +agravdoenc = {"sinan": ""} + +agravdroga = {"sinan": ""} + +agravoutra = {"sinan": ""} + +agravtabac = {"sinan": ""} + +ant_anemia = {"sinan": ""} + +ant_asteri = {"sinan": ""} + +ant_candid = {"sinan": ""} + +ant_caquex = {"sinan": ""} + +ant_contag = {"sinan": ""} + +ant_dermat = {"sinan": ""} + +ant_diarre = {"sinan": ""} + +ant_disfun = {"sinan": ""} + +ant_droga = {"sinan": ""} + +ant_esof_n = {"sinan": ""} + +ant_febre = {"sinan": ""} + +ant_herpes = {"sinan": ""} + +ant_linfo = {"sinan": ""} + +ant_pneumo = {"sinan": ""} + +ant_pulmon = {"sinan": ""} + +ant_rel_ca = {"sinan": ""} + +ant_tosse = {"sinan": ""} + +ant_toxo = {"sinan": ""} + +ant_trasmi = {"sinan": ""} + +ant_tuberc = {"sinan": ""} + +antrelse_n = {"sinan": ""} + +antsifil_n = {"sinan": ""} + +aval_atu_n = {"sinan": ""} + +avalia_n = {"sinan": ""} + +bacilosc_1 = {"sinan": ""} + +bacilosc_2 = {"sinan": ""} + +bacilosc_3 = {"sinan": ""} + +bacilosc_4 = {"sinan": ""} + +bacilosc_5 = {"sinan": ""} + +bacilosc_6 = {"sinan": ""} + +bacilosco = {"sinan": ""} + +cancro_mole = {"sinan": ""} + +caract_genomica = {"sinan": ""} + +clado = {"sinan": ""} + +clamidea = {"sinan": ""} + +classatual = {"sinan": ""} + +classi_fin = {"sinan": ""} + +classopera = {"sinan": ""} + +co_uf_res = {"sinan": ""} + +comp_sexual = {"sinan": ""} + +contador = { + "sim": "", + "sinasc": "", +} + +contag_cd4 = {"sinan": ""} + +contat_animal = {"sinan": ""} + +contexam = {"sinan": ""} + +contreg = {"sinan": ""} + +criterio = {"sinan": ""} + +cs_escol_n = {"sinan": ""} + +cs_gestant = {"sinan": ""} + +cs_raca = {"sinan": ""} + +cs_sexo = {"sinan": ""} + +cs_zona = {"sinan": ""} + +cultura_es = {"sinan": ""} + +data_vacina = {"sinan": ""} + +def_diagno = {"sinan": ""} + +dip = {"sinan": ""} + +doenca_tra1 = {"sinan": ""} + +donovanose = {"sinan": ""} + +dose_receb = {"sinan": ""} + +dt_coleta = {"sinan": ""} + +dt_diag = {"sinan": ""} + +dt_encerra = {"sinan": ""} + +dt_evolucao = {"sinan": ""} + +dt_inic_tr = {"sinan": ""} + +dt_interna = {"sinan": ""} + +dt_nasc = {"sinan": ""} + +dt_noti_at = {"sinan": ""} + +dt_notific = {"sinan": ""} + +dt_obito = {"sinan": ""} + +dt_sin_pri = {"sinan": ""} + +dtalta_n = {"sinan": ""} + +dtinictrat = {"sinan": ""} + +dtultcomp = {"sinan": ""} + +esq_atu_n = {"sinan": ""} + +esq_ini_n = {"sinan": ""} + +estrangeiro = {"sinan": ""} + +evolucao = {"sinan": ""} + +forma = {"sinan": ""} + +formaclini = {"sinan": ""} + +gonorreia = {"sinan": ""} + +herpes_genital = {"sinan": ""} + +histopatol = {"sinan": ""} + +hiv = {"sinan": ""} + +hospital = {"sinan": ""} + +hpv = {"sinan": ""} + +htlv = {"sinan": ""} + +id_agravo = {"sinan": ""} + +id_mn_resi = {"sinan": ""} + +id_municip = {"sinan": ""} + +id_regiona = {"sinan": ""} + +id_rg_resi = {"sinan": ""} + +id_unidade = {"sinan": ""} + +ident_genero = {"sinan": ""} + +ist_ativa = {"sinan": ""} + +lab_triage = {"sinan": ""} + +labc_igg = {"sinan": ""} + +linfogranuloma = {"sinan": ""} + +local_cont = {"sinan": ""} + +met_lab = {"sinan": ""} + +mododetect = {"sinan": ""} + +modoentr = {"sinan": ""} + +mycoplasma_genital = {"sinan": ""} + +name = {"sinan": ""} + +nervosafet = {"sinan": ""} + +nu_ano = {"sinan": ""} + +nu_idade_n = {"sinan": ""} + +nu_lesoes = {"sinan": ""} + +orienta_sexual = {"sinan": ""} + +outro_des = {"sinan": ""} + +owner_org = {"sinan": ""} + +pac_imunossup = {"sinan": ""} + +pop_liber = {"sinan": ""} + +profile = {"sinan": ""} + +profis_saude = {"sinan": ""} + +raiox_tora = {"sinan": ""} + +resources = {"sinan": ""} + +resultado_exa_lab = {"sinan": ""} + +sg_uf = {"sinan": ""} + +sg_uf_not = {"sinan": ""} + +sifilis = {"sinan": ""} + +sintoma = {"sinan": ""} + +situa_ence = {"sinan": ""} + +test_molec = {"sinan": ""} + +test_sensi = {"sinan": ""} + +title = {"sinan": ""} + +tp_amost = {"sinan": ""} + +tpalta_n = {"sinan": ""} + +tpesquema = {"sinan": ""} + +tra_esquem = {"sinan": ""} + +transm = {"sinan": ""} + +tratamento = {"sinan": ""} + +tratamento_mpox = {"sinan": ""} + +tratparc = {"sinan": ""} + +tratsup_at = {"sinan": ""} + +trichomomas_vaginals = {"sinan": ""} + +uti = {"sinan": ""} + +vacina = {"sinan": ""} + +verruga_genital = {"sinan": ""} + +vinculo_epi = {"sinan": ""} diff --git a/pysus/api/ftp/README.ipynb b/pysus/api/ducklake/catalog/orm/__init__.py similarity index 100% rename from pysus/api/ftp/README.ipynb rename to pysus/api/ducklake/catalog/orm/__init__.py diff --git a/pysus/api/ducklake/catalog.py b/pysus/api/ducklake/catalog/orm/dataset.py similarity index 83% rename from pysus/api/ducklake/catalog.py rename to pysus/api/ducklake/catalog/orm/dataset.py index a9c0fd05..687799ce 100644 --- a/pysus/api/ducklake/catalog.py +++ b/pysus/api/ducklake/catalog/orm/dataset.py @@ -1,18 +1,16 @@ -"""SQLAlchemy ORM models for the DuckLake catalog schema. +"""Per-dataset catalog ORM models — stored in ``catalog_.db``. -Defines tables for datasets, groups, files, and columns stored -in the pysus schema of the local DuckDB catalog. +Defines tables for groups, files, and columns within a single dataset. """ -import enum from datetime import datetime from typing import Optional from sqlalchemy import ( + BigInteger, Boolean, Column, DateTime, - Enum, ForeignKey, Index, Integer, @@ -24,7 +22,7 @@ class Base(DeclarativeBase): - """Base class for all DuckLake catalog ORM models.""" + """Base class for per-dataset catalog ORM models.""" pass @@ -48,29 +46,7 @@ class Base(DeclarativeBase): ) -class CatalogTable(Base): - """Abstract base for catalog tables sharing the pysus schema.""" - - __abstract__ = True - __table_args__: tuple = ({"schema": "pysus"},) - - -class Origin(enum.Enum): - """Origin type for a dataset. - - Attributes - ---------- - FTP : str - Dataset sourced from the FTP server. - API : str - Dataset sourced from an API. - """ - - FTP = "ftp" - API = "api" - - -class CatalogDataset(CatalogTable): +class Dataset(Base): """ORM model for the datasets table, representing a dataset collection. Parameters @@ -83,11 +59,10 @@ class CatalogDataset(CatalogTable): Human-readable full name. description : str, optional Optional description of the dataset contents. - origin : Origin - Whether the dataset originates from FTP or an API. """ __tablename__ = "datasets" + __table_args__: tuple = ({"schema": "pysus"},) id = Column( Integer, @@ -97,15 +72,14 @@ class CatalogDataset(CatalogTable): name = Column(String, nullable=False, unique=True, index=True) long_name = Column(String, nullable=False) description = Column(String, nullable=True) - origin = Column(Enum(Origin), nullable=False) groups = relationship( - "DatasetGroup", + "Group", back_populates="dataset", cascade="all, delete-orphan", ) files = relationship( - "CatalogFile", + "File", back_populates="dataset", cascade="all, delete-orphan", ) @@ -116,7 +90,7 @@ class CatalogDataset(CatalogTable): ) -class ColumnDefinition(CatalogTable): +class ColumnDefinition(Base): """ORM model for dataset column metadata. Parameters @@ -136,6 +110,7 @@ class ColumnDefinition(CatalogTable): """ __tablename__ = "dataset_columns" + __table_args__: tuple = ({"schema": "pysus"},) id = Column( Integer, @@ -153,9 +128,9 @@ class ColumnDefinition(CatalogTable): description = Column(String, nullable=True) nullable = Column(Boolean, nullable=False, default=True) - dataset = relationship("CatalogDataset", back_populates="columns") + dataset = relationship("Dataset", back_populates="columns") files = relationship( - "CatalogFile", + "File", secondary=file_columns, back_populates="columns", ) @@ -166,7 +141,7 @@ class ColumnDefinition(CatalogTable): ) -class DatasetGroup(CatalogTable): +class Group(Base): """ORM model for dataset groups, grouping related files within a dataset. Parameters @@ -184,6 +159,7 @@ class DatasetGroup(CatalogTable): """ __tablename__ = "dataset_groups" + __table_args__: tuple = ({"schema": "pysus"},) id = Column( Integer, @@ -200,9 +176,12 @@ class DatasetGroup(CatalogTable): long_name = Column(String, nullable=False) description = Column(String, nullable=True) - dataset = relationship("CatalogDataset", back_populates="groups") + dataset = relationship( + "Dataset", + back_populates="groups", + ) files = relationship( - "CatalogFile", + "File", back_populates="group", cascade="all, delete-orphan", ) @@ -213,7 +192,7 @@ class DatasetGroup(CatalogTable): ) -class CatalogFile(CatalogTable): +class File(Base): """ORM model for the files table, representing individual data files. Parameters @@ -230,10 +209,14 @@ class CatalogFile(CatalogTable): File size in bytes. rows : int Number of rows in the file. + type : str, optional + File type identifier. modified : datetime Timestamp of the last known modification. origin_modified : datetime, optional Original modification timestamp from the source. + origin_size : int + Original file size in bytes. origin_path : str Original source path of the file. sha256 : str, optional @@ -247,6 +230,7 @@ class CatalogFile(CatalogTable): """ __tablename__ = "files" + __table_args__: tuple = ({"schema": "pysus"},) id: Mapped[int] = mapped_column( Integer, @@ -264,13 +248,15 @@ class CatalogFile(CatalogTable): ) path: Mapped[str] = mapped_column(String, nullable=False, unique=True) - size: Mapped[int] = mapped_column(Integer, nullable=False) + size: Mapped[int] = mapped_column(BigInteger, nullable=False) rows: Mapped[int] = mapped_column(Integer, nullable=False) + type: Mapped[str] = mapped_column(String, nullable=True) modified: Mapped[datetime] = mapped_column(DateTime, nullable=False) origin_modified: Mapped[datetime | None] = mapped_column( DateTime, nullable=True, ) + origin_size: Mapped[int] = mapped_column(BigInteger, nullable=False) origin_path: Mapped[str] = mapped_column(String, nullable=False) sha256: Mapped[str | None] = mapped_column( String(64), @@ -294,12 +280,12 @@ class CatalogFile(CatalogTable): index=True, ) - dataset: Mapped["CatalogDataset"] = relationship( - "CatalogDataset", + dataset: Mapped["Dataset"] = relationship( + "Dataset", back_populates="files", ) - group: Mapped[Optional["DatasetGroup"]] = relationship( - "DatasetGroup", + group: Mapped[Optional["Group"]] = relationship( + "Group", back_populates="files", ) columns: Mapped[list["ColumnDefinition"]] = relationship( diff --git a/pysus/api/ducklake/catalog/orm/default.py b/pysus/api/ducklake/catalog/orm/default.py new file mode 100644 index 00000000..bd412080 --- /dev/null +++ b/pysus/api/ducklake/catalog/orm/default.py @@ -0,0 +1,43 @@ +"""Central discovery catalog ORM models — stored in ``catalog.db``. + +Tracks only available datasets. File-level metadata lives in +per-dataset ``catalog_.db`` files defined in ``.dataset``. +""" + +from sqlalchemy import Column, Integer, Sequence, String +from sqlalchemy.orm import DeclarativeBase + + +class Base(DeclarativeBase): + """Base class for central catalog ORM models.""" + + pass + + +class Dataset(Base): + """ORM model for the datasets table — central registry of available + datasets. + + Parameters + ---------- + id : int, optional + Primary key (auto-generated by sequence). + name : str + Unique short name for the dataset. + long_name : str + Human-readable full name. + description : str, optional + Optional description of the dataset contents. + """ + + __tablename__ = "datasets" + __table_args__: tuple = ({"schema": "pysus"},) + + id = Column( + Integer, + Sequence("datasets_id_seq", schema="pysus"), + primary_key=True, + ) + name = Column(String, nullable=False, unique=True, index=True) + long_name = Column(String, nullable=False) + description = Column(String, nullable=True) diff --git a/pysus/api/ducklake/catalog/parsers.py b/pysus/api/ducklake/catalog/parsers.py new file mode 100644 index 00000000..e69de29b diff --git a/pysus/api/ducklake/client.py b/pysus/api/ducklake/client.py index 21e9525b..f7339569 100644 --- a/pysus/api/ducklake/client.py +++ b/pysus/api/ducklake/client.py @@ -1,12 +1,12 @@ -"""High-level client for DuckLake S3-based dataset catalog. +"""High-level client for DuckLake S3-based public health dataset catalog. -Provides authentication, catalog synchronization, dataset querying, -and file download capabilities backed by a local DuckDB engine. +Provides authentication, dataset discovery, and file download +capabilities backed by per-dataset DuckDB engines. """ from collections.abc import Callable from pathlib import Path -from typing import Any, Literal +from typing import Any import boto3 import httpx @@ -15,103 +15,15 @@ from pydantic import BaseModel, PrivateAttr, SecretStr from pysus import CACHEPATH from pysus.api.models import BaseRemoteClient, BaseRemoteFile +from pysus.api.types import DUCKLAKE from sqlalchemy import create_engine -from sqlalchemy.orm import contains_eager, joinedload, sessionmaker +from sqlalchemy.orm import sessionmaker from sqlalchemy.pool import StaticPool -from .catalog import CatalogDataset, CatalogFile, DatasetGroup +from .catalog.orm.default import Dataset from .models import DuckDataset, File -class CatalogDatasetAdapter: - """Adapter wrapping a CatalogDataset ORM record for use by File objects. - - Parameters - ---------- - catalog_dataset : CatalogDataset - The ORM record to wrap. - ducklake : DuckLake - The parent DuckLake client instance. - """ - - def __init__(self, catalog_dataset: CatalogDataset, ducklake): - self.name = catalog_dataset.name - self.long_name = catalog_dataset.long_name or "" - self.description = catalog_dataset.description or "" - self.group_definitions: dict[str, str] = {} - self.ducklake = ducklake - self.client = ducklake - - @property - def content(self): - """Query the DuckLake client for files in this dataset. - - Returns - ------- - list - List of files belonging to this dataset. - """ - return self.ducklake.query(dataset=self.name.upper()) - - -class DatasetGroupAdapter: - """Adapter wrapping a DatasetGroup ORM record for use by File objects. - - Parameters - ---------- - dataset_group : DatasetGroup - The ORM record to wrap. - dataset : CatalogDataset - The parent dataset. - """ - - def __init__(self, dataset_group: DatasetGroup, dataset): - self.name = dataset_group.name - self.long_name = dataset_group.long_name or "" - self.description = dataset_group.description or "" - self.dataset = dataset - - def __str__(self): - """Return the group name as its string representation. - - Returns - ------- - str - The short name of the group. - """ - return self.name - - @property - async def files(self): - """Return the list of files in this group. - - Returns - ------- - list - List of file objects in this group. - """ - return [] - - async def _fetch_files(self): - """Fetch files from the remote source for this group.""" - return [] - - async def search(self, **kwargs): - """Search for files within this group matching the given criteria. - - Parameters - ---------- - ``**kwargs`` - Arbitrary filter criteria. - - Returns - ------- - list - List of matching file objects. - """ - return [] - - class DuckLakeCredentials(BaseModel): """Credentials for authenticating with the S3-compatible object storage. @@ -140,8 +52,6 @@ class DuckLake(BaseRemoteClient): Bucket name containing the catalog. credentials : DuckLakeCredentials, optional Credentials for authenticated S3 operations. - engine : object, optional - Pre-configured SQLAlchemy engine to reuse. """ endpoint: str = "nbg1.your-objectstorage.com" @@ -149,28 +59,26 @@ class DuckLake(BaseRemoteClient): bucket: str = "pysus" credentials: DuckLakeCredentials | None = None - _cache_dir: Path = PrivateAttr() - _catalog_local: Path = PrivateAttr() - _catalog_remote: str = "public/catalog.db" _s3_client: Any = PrivateAttr(default=None) - _engine: Any = PrivateAttr(default=None) _Session: Any = PrivateAttr(default=None) + _datasets: list = PrivateAttr(default_factory=list) - def __init__(self, engine=None, **data): - """Initialize the DuckLake client with an optional existing engine. + def __init__(self, engine=None, **data) -> None: + """Initialize the DuckLake client. Parameters ---------- engine : object, optional - Pre-configured SQLAlchemy engine instead of creating a new one. + Pre-configured SQLAlchemy engine for the discovery catalog. ``**data`` - Additional fields passed to the Pydantic base model. + Fields passed to the Pydantic base model. """ super().__init__(**data) self._engine = engine - self._cache_dir = Path(CACHEPATH) / "ducklake" + self._cache_dir: Path = Path(CACHEPATH) / "ducklake" self._cache_dir.mkdir(parents=True, exist_ok=True) - self._catalog_local = self._cache_dir / "catalog.db" + self._catalog_local: Path = self._cache_dir / "catalog.duckdb" + self._catalog_remote: str = "public/catalog.duckdb" @property def name(self) -> str: @@ -181,7 +89,7 @@ def name(self) -> str: str The client short name. """ - return "DuckLake" + return DUCKLAKE @property def long_name(self) -> str: @@ -207,18 +115,18 @@ def description(self) -> str: @property def catalog_path(self) -> Path: - """Return the local path to the downloaded catalog database. + """Return the local path to the discovery catalog database. Returns ------- Path - Filesystem path to the local catalog database file. + Filesystem path to the local discovery catalog file. """ return self._catalog_local @property def _catalog_url(self) -> str: - """Return the remote URL of the catalog database file.""" + """Return the remote URL of the discovery catalog.""" return f"https://{self.endpoint}/{self.bucket}/{self._catalog_remote}" @property @@ -244,16 +152,7 @@ async def datasets(self, **kwargs) -> list[DuckDataset]: def _fetch(): with self._Session() as session: - results = ( - session.query(CatalogDataset) - .options( - joinedload(CatalogDataset.groups).joinedload( - DatasetGroup.files - ), - joinedload(CatalogDataset.files), - ) - .all() - ) + results = session.query(Dataset).all() session.expunge_all() return results @@ -292,10 +191,19 @@ async def login( self._get_s3_client, ) - def _setup_engine(self): - """Create and configure the DuckDB engine with S3 settings.""" + def _setup_engine(self, local_path: Path | None = None): + """Create and configure a DuckDB engine with S3 settings. + + Parameters + ---------- + local_path : Path, optional + Path to the catalog database file. + Defaults to the discovery catalog. + """ + if local_path is None: + local_path = self._catalog_local engine = create_engine( - f"duckdb:///{self._catalog_local}", + f"duckdb:///{local_path}", poolclass=StaticPool, ) @@ -303,10 +211,8 @@ def _setup_engine(self): conn.exec_driver_sql("INSTALL ducklake; LOAD ducklake;") has_pysus = conn.exec_driver_sql( - """ - SELECT 1 FROM information_schema.schemata WHERE - schema_name = 'pysus' - """ + "SELECT 1 FROM information_schema.schemata" + " WHERE schema_name = 'pysus'" ).fetchone() if has_pysus: @@ -336,8 +242,8 @@ def _setup_engine(self): return engine - async def connect(self, force: bool = False): - """Connect to the catalog, downloading it first if necessary. + async def connect(self, force: bool = False) -> None: + """Connect to the discovery catalog, downloading first if needed. Parameters ---------- @@ -349,66 +255,72 @@ async def connect(self, force: bool = False): self._Session = sessionmaker(bind=self._engine) return - await self._load_catalog() + await self._download_catalog( + self._catalog_local, + self._catalog_remote, + ) self._engine = await to_thread.run_sync(self._setup_engine) self._Session = sessionmaker(bind=self._engine) - async def close(self): - """Dispose the engine, then upload the catalog if authenticated. + async def close(self, update_catalog: bool = False) -> None: + """Close all datasets and dispose the discovery engine. - Raises - ------ - PermissionError - If the client is not authenticated but an upload is required. + Parameters + ---------- + update_catalog : bool, optional + Whether to upload all per-dataset catalogs before closing. + Requires authenticated credentials. """ + if update_catalog: + await self._upload_catalog() + + datasets: list["DuckDataset"] = list(self._datasets) + for ds in datasets: + await ds.close(update_catalog=update_catalog) + self._datasets.clear() + if self._engine: await to_thread.run_sync(self._engine.dispose) - self._engine = None self._Session = None - - if self._is_authenticated: - await self._upload_catalog() - self._s3_client = None - async def _download_file( + async def _download( self, - file: BaseRemoteFile, - output: Path, + remote_path: str, + local_path: Path, + *, callback: Callable[[int, int], None] | None = None, - ) -> Path: - """Download a single file from object storage to the local path.""" - if not isinstance(file, File): - raise ValueError("FTP File was not properly instantiated") - - url = f"https://{self.endpoint}/{self.bucket}/{file.record.path}" - async with httpx.AsyncClient(follow_redirects=True) as client: - async with client.stream("GET", url) as r: - r.raise_for_status() - total = int(r.headers.get("Content-Length", 0)) - downloaded = 0 - with open(output, "wb") as f: - async for chunk in r.aiter_bytes(chunk_size=1024 * 1024): - await to_thread.run_sync(f.write, chunk) - downloaded += len(chunk) - if callback: - callback(downloaded, total) - return output + ) -> None: + """Download *remote_path* to *local_path* with streaming and retries. - async def _download_catalog(self, client: httpx.AsyncClient): - """Download the catalog database from remote storage with retries.""" + Parameters + ---------- + remote_path : str + Object key within the bucket. + local_path : Path + Local destination path. + callback : Callable[[int, int], None], optional + Progress callback receiving ``(downloaded, total)`` bytes. + """ + url = f"https://{self.endpoint}/{self.bucket}/{remote_path}" max_retries = 5 for attempt in range(max_retries): try: - async with client.stream("GET", self._catalog_url) as r: - r.raise_for_status() - with open(self._catalog_local, "wb") as f: - async for chunk in r.aiter_bytes( - chunk_size=1024 * 1024, - ): - await to_thread.run_sync(f.write, chunk) + async with httpx.AsyncClient(follow_redirects=True) as client: + async with client.stream("GET", url) as r: + r.raise_for_status() + total = int(r.headers.get("Content-Length", 0)) + downloaded = 0 + with open(local_path, "wb") as f: + async for chunk in r.aiter_bytes( + chunk_size=1024 * 1024, + ): + await to_thread.run_sync(f.write, chunk) + downloaded += len(chunk) + if callback: + callback(downloaded, total) return except OSError as e: if attempt < max_retries - 1: @@ -416,6 +328,54 @@ async def _download_catalog(self, client: httpx.AsyncClient): else: raise e + async def _download_catalog( + self, local_path: Path, remote_path: str + ) -> None: + """Download a catalog database from remote storage with retries. + + Parameters + ---------- + local_path : Path + Local destination path for the catalog file. + remote_path : str + Remote object key within the bucket. + """ + url = f"https://{self.endpoint}/{self.bucket}/{remote_path}" + + if local_path.exists(): + try: + local_size = local_path.stat().st_size + except OSError: + local_size = -1 + else: + local_size = -1 + + async with httpx.AsyncClient(follow_redirects=True) as client: + try: + head = await client.head(url) + head.raise_for_status() + remote_size = int(head.headers.get("content-length", 0)) + except Exception: # noqa: B902 + remote_size = 0 + + if remote_size == local_size: + return + + await self._download(remote_path, local_path) + + async def _download_file( + self, + file: BaseRemoteFile, + output: Path, + callback: Callable[[int, int], None] | None = None, + ) -> Path: + """Download a single file from object storage to the local path.""" + if not isinstance(file, File): + raise ValueError("FTP File was not properly instantiated") + + await self._download(file.record.path, output, callback=callback) + return output + def _get_s3_client(self): """Create and return a boto3 S3 client for the configured endpoint.""" if not self.credentials: @@ -431,142 +391,32 @@ def _get_s3_client(self): config=Config(signature_version="s3v4"), ) - async def _load_catalog(self): - """Download remote catalog if the local copy is outdated or missing.""" - async with httpx.AsyncClient(follow_redirects=True) as client: - local_size = -1 - if self._catalog_local.exists(): - try: - local_size = self._catalog_local.stat().st_size - except OSError: - pass - try: - head = await client.head(self._catalog_url) - head.raise_for_status() - remote_size = int(head.headers.get("content-length", 0)) - except Exception: # noqa: B902 - remote_size = 0 - if remote_size != local_size: - await self._download_catalog(client) + async def _upload_catalog(self) -> None: + """Upload all per-dataset catalogs to remote storage. - async def _upload_catalog(self): - """Upload the local catalog database to remote storage.""" - if not self._is_authenticated: + Requires authenticated credentials. + """ + if not self.credentials: raise PermissionError( "Admin credentials required to upload catalog.", ) - def _upload(): - self._s3_client.upload_file( - str(self._catalog_local), - self.bucket, - self._catalog_remote, - ) - - await to_thread.run_sync(_upload) - - async def query( - self, - client: Literal["FTP", "DadosGov"] | None = None, - dataset: str | None = None, - group: str | None = None, - state: str | None = None, - year: int | None = None, - month: int | None = None, - ) -> list[File]: - """Filter catalog files by client, dataset, group, state, year. + datasets = await self.datasets() + for ds in datasets: + if not ds._catalog_local.exists(): + continue - Parameters - ---------- - client : Literal["FTP", "DadosGov"], optional - Source client to filter by. - dataset : str, optional - Dataset name to filter by. - group : str, optional - Group name pattern to filter by (case-insensitive ILIKE). - state : str, optional - Two-letter state code to filter by. - year : int, optional - Year to filter by. - month : int, optional - Month to filter by. + _local = str(ds._catalog_local) + _name = ds._catalog_name - Returns - ------- - list[:class:`~pysus.api.ducklake.models.File`] - List of matching file objects. - """ - if not self._Session: - await self.connect() - - def _query(): - with self._Session() as session: - q = session.query(CatalogFile) - - if dataset: - q = ( - q.join(CatalogFile.dataset) - .options(contains_eager(CatalogFile.dataset)) - .filter(CatalogDataset.name == dataset.lower()) - ) - else: - q = q.options(joinedload(CatalogFile.dataset)) - - if group: - q = ( - q.join(CatalogFile.group) - .options(contains_eager(CatalogFile.group)) - .filter(DatasetGroup.name.ilike(group)) - ) - else: - q = q.options(joinedload(CatalogFile.group)) - - if state: - q = q.filter(CatalogFile.state == state.upper()) - - if year: - q = q.filter(CatalogFile.year == year) - - if month: - q = q.filter(CatalogFile.month == month) + def _upload(local=_local, name=_name): + self._s3_client.upload_file( + local, + self.bucket, + name, + ) - results = q.all() - session.expunge_all() - return results + await to_thread.run_sync(_upload) - records = await to_thread.run_sync(_query) - if client: - prefix = f"public/data/{client.lower()}/" - records = [r for r in records if r.path.startswith(prefix)] - else: - ftp = [r for r in records if r.path.startswith("public/data/ftp/")] - dadosgov = [ - r for r in records if r.path.startswith("public/data/dadosgov/") - ] - ftp_keys = set() - for r in ftp: - stem = Path(r.path).stem - key = (r.dataset_id, r.year, r.month, stem) - ftp_keys.add(key) - - def has_ftp_match(r): - stem = Path(r.path).stem - if stem.endswith(".csv"): - stem = stem[:-4] - key = (r.dataset_id, r.year, r.month, stem) - return key in ftp_keys - - records = ftp + [r for r in dadosgov if not has_ftp_match(r)] - - return [ - File( - path=r.path, - record=r, - dataset=CatalogDatasetAdapter(r.dataset, self), - group=( - DatasetGroupAdapter(r.group, r.dataset) if r.group else None - ), - ) - for r in records - ] +DuckDataset.model_rebuild(_types_namespace={"DuckLake": DuckLake}) diff --git a/pysus/api/ducklake/models.py b/pysus/api/ducklake/models.py index 306a96f6..c9c4d3e5 100644 --- a/pysus/api/ducklake/models.py +++ b/pysus/api/ducklake/models.py @@ -8,19 +8,19 @@ from collections.abc import Callable from datetime import datetime from pathlib import Path -from typing import Any, Union +from typing import TYPE_CHECKING, Any, Optional, Union -import anyio -from pydantic import Field +from anyio import to_thread +from pydantic import Field, PrivateAttr from pysus import CACHEPATH -from pysus.api.models import ( - BaseRemoteClient, - BaseRemoteDataset, - BaseRemoteFile, - BaseRemoteGroup, -) +from pysus.api.ducklake.catalog.orm.dataset import Dataset +from pysus.api.ducklake.catalog.orm.dataset import File as CatalogFile +from pysus.api.ducklake.catalog.orm.dataset import Group +from pysus.api.models import BaseRemoteDataset, BaseRemoteFile, BaseRemoteGroup +from sqlalchemy.orm import contains_eager, joinedload, sessionmaker -from .catalog import CatalogDataset, CatalogFile, DatasetGroup +if TYPE_CHECKING: # pragma: no cover + from .client import DuckLake class File(BaseRemoteFile): @@ -39,9 +39,18 @@ class File(BaseRemoteFile): """ record: CatalogFile = Field(exclude=True) - type: str = "remote" - dataset: Any - group: Any = None + group: Optional["DuckGroup"] = Field(default=None, exclude=True) + + def __init__(self, **data: Any) -> None: + record = data.pop("record") + group = data.pop("group", None) + super().__init__( + path=Path(record.path), + type=record.type or "remote", + record=record, # type: ignore[call-arg] + group=group, + **data, + ) @property def basename(self) -> str: @@ -147,23 +156,36 @@ def _calculate(): sha256_hash.update(byte_block) return sha256_hash.hexdigest() - actual_hash = await anyio.to_thread.run_sync(_calculate) + actual_hash = await to_thread.run_sync(_calculate) return actual_hash == self.sha256 class DuckDataset(BaseRemoteDataset): """A dataset from the DuckLake catalog, containing groups and files. + Each dataset manages its own DuckDB engine connected to a + per-dataset catalog file (``catalog_.db``). + Parameters ---------- - record : CatalogDataset + record : Dataset The underlying ORM record. client : BaseRemoteClient The parent client instance. """ - record: CatalogDataset = Field(exclude=True) - client: BaseRemoteClient = Field(exclude=True) + record: Dataset = Field(exclude=True) + client: "DuckLake" = Field(exclude=True) + + _engine: Any = PrivateAttr(default=None) + _Session: Any = PrivateAttr(default=None) + + def __init__(self, **data) -> None: + super().__init__(**data) + self._cache_dir: Path = Path(CACHEPATH) / "ducklake" + self._cache_dir.mkdir(parents=True, exist_ok=True) + self._catalog_name: str = f"catalog_{self.record.name.lower()}.duckdb" + self._catalog_local: Path = self._cache_dir / self._catalog_name def __repr__(self) -> str: """Return a string representation of the dataset. @@ -184,7 +206,7 @@ def name(self) -> str: str The dataset short name. """ - return self.record.name + return self.record.name # type: ignore @property def long_name(self) -> str: @@ -195,11 +217,7 @@ def long_name(self) -> str: str The dataset display name, falling back to the short name. """ - return ( - self.record.dataset_metadata.long_name - if self.record.dataset_metadata - else self.name - ) + return "" # TODO: @property def description(self) -> str: @@ -210,30 +228,171 @@ def description(self) -> str: str The dataset description, or an empty string if unavailable. """ - return ( - self.record.dataset_metadata.description - if self.record.dataset_metadata - else "" + return "" # TODO: + + @property + def catalog_path(self) -> Path: + """Return the local path to the downloaded catalog database. + + Returns + ------- + Path + Filesystem path to the local catalog database file. + """ + return self._catalog_local + + async def connect( + self, + force: bool = False, + callback: Callable[[int, int], None] | None = None, + ) -> None: + """Connect to the catalog, downloading it first if necessary. + + Parameters + ---------- + force : bool, optional + Whether to re-download and re-connect even if already connected. + """ + if self._engine and not force: + if not self._Session: + self._Session = sessionmaker(bind=self._engine) + return + + if self not in self.client._datasets: + self.client._datasets.append(self) + + await self.client._download( + f"public/{self._catalog_name}", + self._catalog_local, + callback=callback, ) + self._engine = await to_thread.run_sync( + lambda: self.client._setup_engine(self._catalog_local) + ) + self._Session = sessionmaker(bind=self._engine) + + async def close(self, update_catalog: bool = False): + """Dispose the engine, optionally uploading the per-dataset catalog. + + Parameters + ---------- + update_catalog : bool, optional + Whether to upload the per-dataset catalog to remote storage. + Requires the parent client to be authenticated. + """ + if self._engine: + await to_thread.run_sync(self._engine.dispose) + self._engine = None + self._Session = None + + if update_catalog and self.client._is_authenticated: + await self._upload_catalog() + + async def _upload_catalog(self): + """Upload the per-dataset catalog to remote storage.""" + if not self.client.credentials: + raise PermissionError( + "Admin credentials required to upload catalog.", + ) + + def _upload(): + self.client._s3_client.upload_file( + str(self._catalog_local), + self.client.bucket, + f"catalog_{self.record.name.lower()}.duckdb", + ) + + await to_thread.run_sync(_upload) + + async def query( + self, + group: str | None = None, + state: str | None = None, + year: int | None = None, + month: int | None = None, + ) -> list[File]: + """Filter files in this dataset's catalog by group, state, year, month. + + Parameters + ---------- + group : str, optional + Group name pattern to filter by (case-insensitive ILIKE). + state : str, optional + Two-letter state code to filter by. + year : int, optional + Year to filter by. + month : int, optional + Month to filter by. + + Returns + ------- + list[File] + List of matching file objects. + """ + if not self._Session: + await self.connect() + + def _query() -> list[CatalogFile]: + with self._Session() as session: + q = session.query(CatalogFile).options( + joinedload(CatalogFile.group), + joinedload(CatalogFile.dataset), + ) + if group: + q = ( + q.join(CatalogFile.group) + .options(contains_eager(CatalogFile.group)) + .filter(Group.name.ilike(group)) + ) + if state: + q = q.filter(CatalogFile.state == state.upper()) + if year: + q = q.filter(CatalogFile.year == year) + if month: + q = q.filter(CatalogFile.month == month) + results = q.all() + session.expunge_all() + return results + + records: list[CatalogFile] = await to_thread.run_sync(_query) + return [File(record=r, dataset=self) for r in records] async def _fetch_content(self) -> list[Union["DuckGroup", File]]: """Fetch groups and files belonging to this dataset.""" + if not self._Session: + await self.connect() + + def _fetch(): + with self._Session() as session: + dataset = ( + session.query(Dataset) + .options( + joinedload(Dataset.groups).joinedload(Group.files), + joinedload(Dataset.files), + ) + .filter(Dataset.name == self.record.name) + .first() + ) + if not dataset: + return [], [] + session.expunge_all() + return dataset.groups, dataset.files + + groups, files = await to_thread.run_sync(_fetch) + items: list[Union["DuckGroup", File]] = [] - if self.record.groups: - items.extend( - [DuckGroup(record=g, dataset=self) for g in self.record.groups] - ) + if groups: + items.extend([DuckGroup(record=g, dataset=self) for g in groups]) - if self.record.files: + if files: items.extend( [ File( - path=f.path, record=f, dataset=self, ) - for f in self.record.files + for f in files ] ) @@ -245,13 +404,13 @@ class DuckGroup(BaseRemoteGroup): Parameters ---------- - record : DatasetGroup + record : Group The underlying ORM record. dataset : DuckDataset The parent dataset instance. """ - record: DatasetGroup = Field(exclude=True) + record: Group = Field(exclude=True) dataset: DuckDataset = Field(exclude=True) @property @@ -263,7 +422,7 @@ def name(self) -> str: str The group short name. """ - return self.record.name + return self.record.name # type: ignore @property def long_name(self) -> str: @@ -274,11 +433,7 @@ def long_name(self) -> str: str The group display name, falling back to the short name. """ - return ( - self.record.group_metadata.long_name - if self.record.group_metadata - else self.name - ) + return self.record.long_name or self.name # type: ignore @property def description(self) -> str: @@ -289,15 +444,12 @@ def description(self) -> str: str The group description, or an empty string if unavailable. """ - if self.record.group_metadata: - return self.record.group_metadata.description - return "" + return self.record.description # type: ignore async def _fetch_files(self) -> list[BaseRemoteFile]: """Fetch the list of files belonging to this group.""" files: list[BaseRemoteFile] = [ File( - path=f.path, record=f, group=self, dataset=self.dataset, diff --git a/pysus/api/extensions.py b/pysus/api/extensions.py index 59de4dd7..bdbed991 100644 --- a/pysus/api/extensions.py +++ b/pysus/api/extensions.py @@ -10,7 +10,6 @@ from collections.abc import AsyncGenerator, Callable from datetime import datetime from pathlib import Path -from typing import ClassVar import chardet import pandas as pd @@ -19,17 +18,46 @@ from anyio import to_thread from dbfread import DBF as DBFReader from pydantic import Field, PrivateAttr +from pyreaddbc import dbc2dbf from pysus import CACHEPATH +from pysus.api.metadata.models import Column from pysus.api.models import BaseCompressedFile, BaseLocalFile, BaseTabularFile from .types import FileType -try: - from pyreaddbc import dbc2dbf - - DBC_IMPORT = True -except ImportError: - DBC_IMPORT = False +_DTYPE_MAP: dict[str, str] = { + "int8": "INTEGER", + "int16": "INTEGER", + "int32": "INTEGER", + "int64": "BIGINT", + "uint8": "INTEGER", + "uint16": "INTEGER", + "uint32": "INTEGER", + "uint64": "BIGINT", + "float": "FLOAT", + "float16": "FLOAT", + "float32": "FLOAT", + "float64": "DOUBLE", + "double": "DOUBLE", + "bool": "BOOLEAN", + "bool_": "BOOLEAN", + "date32": "DATE", + "date64": "DATE", + "date": "DATE", + "datetime64[ns]": "DATE", + "object": "VARCHAR", + "string": "VARCHAR", + "utf8": "VARCHAR", + "large_string": "VARCHAR", +} + + +def _map_dtype(raw: str) -> str: + raw_lower = raw.lower().split("[")[0].split("(")[0].strip() + for key, val in _DTYPE_MAP.items(): + if raw_lower == key or raw_lower.startswith(key): + return val + return "VARCHAR" class File(BaseLocalFile): @@ -97,8 +125,9 @@ class CSV(BaseTabularFile): _sep: str | None = PrivateAttr(default=None) @property - def columns(self) -> list[str]: - """Return the column names from the CSV header row.""" + def columns(self) -> list["Column"]: + """Return the column metadata from the CSV header row.""" + if self._encoding is not None: enc = self._encoding else: @@ -112,7 +141,10 @@ def columns(self) -> list[str]: ) self._encoding = enc df = pd.read_csv(self.path, sep=",", nrows=0, encoding=enc) - return df.columns.tolist() + return [ + Column.from_schema(name=col, dtype=_map_dtype(str(dt))) + for col, dt in zip(df.columns, df.dtypes) + ] @property def rows(self) -> int: @@ -207,9 +239,16 @@ def schema(self) -> pa.Schema: return pq.read_schema(self.path) @property - def columns(self) -> list[str]: - """Return the column names from the Parquet schema.""" - return pq.read_schema(self.path).names + def columns(self) -> list["Column"]: + """Return the column metadata from the Parquet schema.""" + + schema = pq.read_schema(self.path) + return [ + Column.from_schema( + name=field.name, dtype=_map_dtype(str(field.type)) + ) + for field in schema + ] @property def rows(self) -> int: @@ -298,9 +337,24 @@ class DBF(BaseTabularFile): type: FileType = Field("DBF") @property - def columns(self) -> list[str]: - """Return the field names from the DBF file.""" - return DBFReader(self.path, load=False).field_names + def columns(self) -> list["Column"]: + """Return the column metadata from the DBF file.""" + + reader = DBFReader(self.path, load=False) + _DBF_DTYPE = { + "C": "VARCHAR", + "N": "INTEGER", + "F": "FLOAT", + "D": "DATE", + "L": "BOOLEAN", + "M": "VARCHAR", + } + return [ + Column.from_schema( + name=f.name, dtype=_DBF_DTYPE.get(f.type, "VARCHAR") + ) + for f in reader.fields + ] @property def rows(self) -> int: @@ -440,7 +494,7 @@ class DBC(BaseTabularFile): type: FileType = Field("DBC") @property - def columns(self) -> list[str]: + def columns(self) -> list["Column"]: """Not supported for DBC files. Convert to Parquet first.""" raise NotImplementedError( "DBC metadata cannot be read directly. Convert to Parquet first." @@ -473,7 +527,8 @@ async def to_parquet( chunk_size: int = 30000, callback: Callable[[int, int], None] | None = None, ) -> "Parquet": - """Decompress DBC to DBF, then convert to Parquet.""" + import gc + from pysus.api.extensions import ExtensionFactory if output_path is None: @@ -501,9 +556,21 @@ async def to_parquet( chunk_size=chunk_size, callback=callback, ) + except Exception as err: # noqa + if "dbf_ext" in locals(): + del dbf_ext + gc.collect() + raise err finally: if tmp_dbf_path.exists(): - await to_thread.run_sync(tmp_dbf_path.unlink) + try: + await to_thread.run_sync(tmp_dbf_path.unlink) + except PermissionError: + gc.collect() + try: + await to_thread.run_sync(tmp_dbf_path.unlink) + except PermissionError: + pass class JSON(BaseTabularFile): @@ -512,14 +579,18 @@ class JSON(BaseTabularFile): type: FileType = Field("JSON") @property - def columns(self) -> list[str]: - """Return the column names from the JSON file.""" + def columns(self) -> list["Column"]: + """Return the column metadata from the JSON file.""" + df = ( pd.read_json(self.path, nrows=0) if self.path.stat().st_size > 0 else pd.DataFrame() ) - return df.columns.tolist() + return [ + Column.from_schema(name=col, dtype=_map_dtype(str(dt))) + for col, dt in zip(df.columns, df.dtypes) + ] @property def rows(self) -> int: @@ -773,76 +844,6 @@ def _extract(): return list(await asyncio.gather(*tasks)) -class DBCNotImported(BaseTabularFile): - """Placeholder for DBC files when optional dependency is not installed.""" - - path: Path = Field(default_factory=lambda: Path("...")) - type: str | FileType = Field(default="remote") - import_err: ClassVar[ - str - ] = """ - run "pip install pysus[dbc]" to handle DBC files. - Make sure you also have libffi installed on the system. It may not work - on Windows - """ - - @property - def name(self) -> str: - """Raise ImportError indicating the missing DBC dependency.""" - raise ImportError(self.import_err) - - @property - def extension(self) -> str: - """Return the .dbc extension.""" - return ".dbc" - - @property - def size(self) -> int: - """Raise ImportError indicating the missing DBC dependency.""" - raise ImportError(self.import_err) - - @property - def modify(self) -> datetime: - """Raise ImportError indicating the missing DBC dependency.""" - raise ImportError(self.import_err) - - @property - def columns(self) -> list[str]: - """Raise ImportError indicating the missing DBC dependency.""" - raise ImportError(self.import_err) - - @property - def rows(self) -> int: - """Raise ImportError indicating the missing DBC dependency.""" - raise ImportError(self.import_err) - - async def load(self) -> pd.DataFrame: - """Raise ImportError indicating the missing DBC dependency.""" - raise ImportError(self.import_err) - - def stream( - self, - chunk_size: int = 10000, - ) -> AsyncGenerator[pd.DataFrame, None]: - """Raise ImportError indicating the missing DBC dependency.""" - - async def _internal_gen(): - """Yield nothing; always raises ImportError.""" - raise ImportError(self.import_err) - yield pd.DataFrame() - - return _internal_gen() - - async def to_parquet( - self, - output_path: str | Path | None = None, - chunk_size: int = 10000, - callback: Callable[[int, int], None] | None = None, - ) -> Parquet: - """Raise ImportError indicating the missing DBC dependency.""" - raise ImportError(self.import_err) - - class ExtensionFactory: """Factory that maps file extensions and MIME types to handler classes.""" @@ -864,7 +865,7 @@ class ExtensionFactory: ".csv": CSV, ".parquet": Parquet, ".dbf": DBF, - ".dbc": DBC if DBC_IMPORT else DBCNotImported, # type: ignore + ".dbc": DBC, ".pdf": PDF, ".json": JSON, } diff --git a/pysus/api/ftp/README.md b/pysus/api/ftp/README.md new file mode 100644 index 00000000..e69de29b diff --git a/pysus/api/ftp/client.py b/pysus/api/ftp/client.py index 76872e54..5c265d0a 100644 --- a/pysus/api/ftp/client.py +++ b/pysus/api/ftp/client.py @@ -11,8 +11,9 @@ from anyio import to_thread from pydantic import PrivateAttr from pysus.api.models import BaseRemoteClient, BaseRemoteFile +from pysus.api.types import FTP as FTP_STR -if TYPE_CHECKING: +if TYPE_CHECKING: # pragma: no cover from pysus.api.ftp.models import Dataset from pysus.api.types import State @@ -55,7 +56,7 @@ def name(self) -> str: str The client short name ("FTP"). """ - return "FTP" + return FTP_STR @property def long_name(self) -> str: diff --git a/pysus/api/metadata/__init__.py b/pysus/api/metadata/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pysus/api/metadata/models.py b/pysus/api/metadata/models.py new file mode 100644 index 00000000..62a20bc9 --- /dev/null +++ b/pysus/api/metadata/models.py @@ -0,0 +1,80 @@ +from dataclasses import dataclass, field + +from pysus.api.types import ColumnType, Origin + + +def lookup_column_meta(name: str) -> dict[str, str] | None: + """Look up column metadata from the global columns.py constants. + + Returns the {dataset: description} dict if the column name exists + as a constant in columns.py, or None if not found. + """ + try: + from pysus.api.ducklake.catalog import columns as _cols + + return getattr(_cols, name.upper(), None) + except ImportError: + return None + + +def pick_description(meta: dict[str, str] | None) -> str: + """Pick the best description from a column metadata dict.""" + if meta is None: + return "" + for desc in meta.values(): + if desc: + return desc + return "" + + +@dataclass +class Dataset: + name: str + long_name: str + description: str + + +@dataclass +class DatasetGroup: + name: str + long_name: str + description: str + + +@dataclass +class FileMeta: + name: str + path: str + size: int + state: str | None = None + uf: str | None = None + year: int | None = None + month: int | None = None + origin_path: str | None = None + origin_size: int | None = None + + +@dataclass +class File: + origin: Origin + dataset: Dataset | None = None + group: DatasetGroup | None = None + columns: list["Column"] = field(default_factory=list) + _meta: FileMeta | None = None + + +@dataclass +class Column: + name: str + description: str + dtype: ColumnType + + @classmethod + def from_schema(cls, name: str, dtype: ColumnType) -> "Column": + """Create a Column from a file schema, looking up description from + columns.py.""" + return cls( + name=name, + description=pick_description(lookup_column_meta(name)), + dtype=dtype, + ) diff --git a/pysus/api/metadata/report.py b/pysus/api/metadata/report.py new file mode 100644 index 00000000..1f1f0fe8 --- /dev/null +++ b/pysus/api/metadata/report.py @@ -0,0 +1,10 @@ +class Header: + """...""" + + +class Columns: + """...""" + + +class Footer: + """...""" diff --git a/pysus/api/models.py b/pysus/api/models.py index 6f9029a0..9f0c0967 100644 --- a/pysus/api/models.py +++ b/pysus/api/models.py @@ -27,8 +27,9 @@ from .types import FileType, State -if TYPE_CHECKING: +if TYPE_CHECKING: # pragma: no cover from extensions import Parquet + from pysus.api.metadata.models import Column class BaseFile(BaseModel, ABC): @@ -151,8 +152,8 @@ class BaseTabularFile(BaseLocalFile, ABC): @property @abstractmethod - def columns(self) -> list[str]: - """Return the list of column names.""" + def columns(self) -> list[Column]: + """Return the list of column metadata.""" @property @abstractmethod @@ -212,49 +213,52 @@ async def to_parquet( ) try: - async for chunk in self.stream( - chunk_size=chunk_size, - ): # type: ignore - if chunk.empty: - continue - - rows_in_chunk = len(chunk) - current_rows += rows_in_chunk - - table = await to_thread.run_sync( - pa.Table.from_pandas, - chunk, - ) - - schema = table.schema - if any(pa.types.is_null(f.type) for f in schema): - new_fields = [ - ( - pa.field(f.name, pa.string(), nullable=True) - if pa.types.is_null(f.type) - else f - ) - for f in schema - ] - table = table.cast(pa.schema(new_fields)) - - if writer is None: - writer = await to_thread.run_sync( - pq.ParquetWriter, output_path, table.schema + try: + async for chunk in self.stream( + chunk_size=chunk_size, + ): + if chunk.empty: + continue + + rows_in_chunk = len(chunk) + current_rows += rows_in_chunk + + table = await to_thread.run_sync( + pa.Table.from_pandas, + chunk, ) - await to_thread.run_sync(writer.write_table, table) + schema = table.schema + if any(pa.types.is_null(f.type) for f in schema): + new_fields = [ + ( + pa.field(f.name, pa.string(), nullable=True) + if pa.types.is_null(f.type) + else f + ) + for f in schema + ] + table = table.cast(pa.schema(new_fields)) + + if writer is None: + writer = await to_thread.run_sync( + pq.ParquetWriter, output_path, table.schema + ) + + await to_thread.run_sync(writer.write_table, table) - pbar.update(rows_in_chunk) + pbar.update(rows_in_chunk) - if callback: - callback(current_rows, total_rows) + if callback: + callback(current_rows, total_rows) - await asyncio.sleep(0) + await asyncio.sleep(0) + finally: + if writer: + await to_thread.run_sync(writer.close) + writer = None finally: pbar.close() - if writer: - await to_thread.run_sync(writer.close) output = await ExtensionFactory.instantiate(output_path) if not isinstance(output, Parquet): diff --git a/pysus/api/types.py b/pysus/api/types.py index 2ed8f95e..2e3708a8 100644 --- a/pysus/api/types.py +++ b/pysus/api/types.py @@ -1,53 +1,128 @@ -"""Type aliases used across the PySUS API. - -FileType: - Discriminated union of supported local file types - (FILE, DIR, PARQUET, CSV, JSON, PDF, DBC, DBF, ZIP). - -State: - Brazilian state abbreviations (AC, AL, AP, ..., DF). -""" - -from typing import Literal - -FileType = Literal[ - "FILE", - "DIR", - "PARQUET", - "CSV", - "JSON", - "PDF", - "DBC", - "DBF", - "ZIP", -] - -State = Literal[ - "AC", - "AL", - "AP", - "AM", - "BA", - "CE", - "ES", - "GO", - "MA", - "MT", - "MS", - "MG", - "PA", - "PB", - "PR", - "PE", - "PI", - "RJ", - "RN", - "RS", - "RO", - "RR", - "SC", - "SP", - "SE", - "TO", - "DF", -] +from typing import Annotated, TypeAlias + +from pydantic import AfterValidator + + +def _validate_origin(v: str) -> str: + valid = (FTP, DADOSGOV, DUCKLAKE) + assert v in valid, f"Invalid origin: {v!r}" + return v + + +def _validate_column_type(v: str) -> str: + valid = ( + "VARCHAR", + "INTEGER", + "BIGINT", + "FLOAT", + "DOUBLE", + "BOOLEAN", + "DATE", + ) + assert v in valid, f"Invalid column type: {v!r}" + return v + + +def _validate_file_type(v: str) -> str: + valid = ( + "FILE", + "DIR", + "PARQUET", + "CSV", + "JSON", + "PDF", + "DBC", + "DBF", + "ZIP", + ) + assert v in valid, f"Invalid file type: {v!r}" + return v + + +def _validate_dataset_name(v: str) -> str: + valid = ( + "SINAN", + "SINASC", + "SIM", + "SIH", + "SIA", + "PNI", + "IBGE", + "CNES", + "CIHA", + ) + assert v in valid, f"Invalid dataset name: {v!r}" + return v + + +def _validate_state(v: str) -> str: + valid = ( + "AC", + "AL", + "AP", + "AM", + "BA", + "CE", + "ES", + "GO", + "MA", + "MT", + "MS", + "MG", + "PA", + "PB", + "PR", + "PE", + "PI", + "RJ", + "RN", + "RS", + "RO", + "RR", + "SC", + "SP", + "SE", + "TO", + "DF", + ) + assert v in valid, f"Invalid state: {v!r}" + return v + + +FTP: Annotated[str, AfterValidator(_validate_origin)] = "FTP" +DADOSGOV: Annotated[str, AfterValidator(_validate_origin)] = "DadosGov" +DUCKLAKE: Annotated[str, AfterValidator(_validate_origin)] = "DuckLake" + +VARCHAR: Annotated[str, AfterValidator(_validate_column_type)] = "VARCHAR" +INTEGER: Annotated[str, AfterValidator(_validate_column_type)] = "INTEGER" +BIGINT: Annotated[str, AfterValidator(_validate_column_type)] = "BIGINT" +FLOAT: Annotated[str, AfterValidator(_validate_column_type)] = "FLOAT" +DOUBLE: Annotated[str, AfterValidator(_validate_column_type)] = "DOUBLE" +BOOLEAN: Annotated[str, AfterValidator(_validate_column_type)] = "BOOLEAN" +DATE: Annotated[str, AfterValidator(_validate_column_type)] = "DATE" + +FILE: Annotated[str, AfterValidator(_validate_file_type)] = "FILE" +DIR: Annotated[str, AfterValidator(_validate_file_type)] = "DIR" +PARQUET: Annotated[str, AfterValidator(_validate_file_type)] = "PARQUET" +CSV: Annotated[str, AfterValidator(_validate_file_type)] = "CSV" +JSON: Annotated[str, AfterValidator(_validate_file_type)] = "JSON" +PDF: Annotated[str, AfterValidator(_validate_file_type)] = "PDF" +DBC: Annotated[str, AfterValidator(_validate_file_type)] = "DBC" +DBF: Annotated[str, AfterValidator(_validate_file_type)] = "DBF" +ZIP: Annotated[str, AfterValidator(_validate_file_type)] = "ZIP" + +SINAN: Annotated[str, AfterValidator(_validate_dataset_name)] = "SINAN" +SINASC: Annotated[str, AfterValidator(_validate_dataset_name)] = "SINASC" +SIM: Annotated[str, AfterValidator(_validate_dataset_name)] = "SIM" +SIH: Annotated[str, AfterValidator(_validate_dataset_name)] = "SIH" +SIA: Annotated[str, AfterValidator(_validate_dataset_name)] = "SIA" +PNI: Annotated[str, AfterValidator(_validate_dataset_name)] = "PNI" +IBGE: Annotated[str, AfterValidator(_validate_dataset_name)] = "IBGE" +CNES: Annotated[str, AfterValidator(_validate_dataset_name)] = "CNES" +CIHA: Annotated[str, AfterValidator(_validate_dataset_name)] = "CIHA" + +Origin: TypeAlias = Annotated[str, AfterValidator(_validate_origin)] +ColumnType: TypeAlias = Annotated[str, AfterValidator(_validate_column_type)] +FileType: TypeAlias = Annotated[str, AfterValidator(_validate_file_type)] +DatasetName: TypeAlias = Annotated[str, AfterValidator(_validate_dataset_name)] +State: TypeAlias = Annotated[str, AfterValidator(_validate_state)] diff --git a/pysus/tests/api/dadosgov/test_client.py b/pysus/tests/api/dadosgov/test_client.py new file mode 100644 index 00000000..9c728040 --- /dev/null +++ b/pysus/tests/api/dadosgov/test_client.py @@ -0,0 +1,545 @@ +"""Tests for pysus.api.dadosgov.client.""" + +from datetime import datetime +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + +import httpx +import pytest +from pysus import __version__ +from pysus.api.dadosgov.client import ( + ConjuntoDados, + DadosGov, + Recurso, + to_bool, + to_datetime, +) + + +class TestToDatetime: + def test_valid_datetime_string(self): + result = to_datetime("01/02/2024 10:30:00") + assert isinstance(result, datetime) + assert result.year == 2024 + assert result.month == 2 + assert result.day == 1 + assert result.hour == 10 + assert result.minute == 30 + assert result.second == 0 + + def test_valid_date_string(self): + result = to_datetime("15/03/2024") + assert isinstance(result, datetime) + assert result.year == 2024 + assert result.month == 3 + assert result.day == 15 + + def test_none_value(self): + assert to_datetime(None) is None + + def test_empty_string(self): + assert to_datetime("") is None + + def test_indisponivel_value(self): + assert to_datetime("Indisponível") is None + + def test_indisponivel_with_accent(self): + assert to_datetime("Dado Indisponível") is None + + def test_invalid_string(self): + assert to_datetime("not-a-date") is None + + def test_non_string_non_none(self): + assert to_datetime(12345) is None + + +class TestToBool: + def test_true_bool(self): + assert to_bool(True) is True + + def test_false_bool(self): + assert to_bool(False) is False + + def test_sim_string(self): + assert to_bool("sim") is True + + def test_nao_string(self): + assert to_bool("não") is False + + def test_true_string(self): + assert to_bool("true") is True + + def test_false_string(self): + assert to_bool("false") is False + + def test_1_string(self): + assert to_bool("1") is True + + def test_0_string(self): + assert to_bool("0") is False + + def test_Sim_capitalized(self): + assert to_bool("Sim") is True + + def test_TRUE_uppercase(self): + assert to_bool("TRUE") is True + + def test_arbitrary_string(self): + assert to_bool("qualquer") is False + + def test_integer_one(self): + assert to_bool(1) is True + + def test_integer_zero(self): + assert to_bool(0) is False + + +class TestRecurso: + def test_fields_from_aliases(self): + r = Recurso( + id="r1", + titulo="Arquivo CSV", + link="https://example.com/file.csv", + tamanho=1024, + dataUltimaAtualizacaoArquivo="10/05/2024", + nomeArquivo="dados.csv", + ) + assert r.id == "r1" + assert r.title == "Arquivo CSV" + assert r.url == "https://example.com/file.csv" + assert r.api_size == 1024 + assert isinstance(r.last_modified, datetime) + assert r.file_name == "dados.csv" + + def test_fields_from_names(self): + r = Recurso( + id="r2", + title="CSV File", + url="https://example.com/data.zip", + api_size=2048, + file_name="data.zip", + ) + assert r.id == "r2" + assert r.title == "CSV File" + assert r.url == "https://example.com/data.zip" + assert r.api_size == 2048 + assert r.file_name == "data.zip" + assert r.last_modified is None + + def test_last_modified_none_when_indisponivel(self): + r = Recurso( + id="r3", + title="No Date", + url="https://example.com/file", + api_size=0, + dataUltimaAtualizacaoArquivo="Indisponível", + ) + assert r.last_modified is None + + @pytest.mark.asyncio + async def test_get_size_head_success(self): + r = Recurso( + id="r4", + title="Test", + url="https://example.com/file.csv", + api_size=0, + ) + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"Content-Length": "5000"} + + mock_client = AsyncMock() + mock_client.head.return_value = mock_response + mock_client.__aenter__.return_value = mock_client + + with patch("httpx.AsyncClient", return_value=mock_client): + size = await r.get_size() + + assert size == 5000 + mock_client.head.assert_called_once_with("https://example.com/file.csv") + + @pytest.mark.asyncio + async def test_get_size_head_405_fallback_to_get(self): + r = Recurso( + id="r5", + title="Test", + url="https://example.com/file.csv", + api_size=0, + ) + head_response = MagicMock() + head_response.status_code = 405 + + get_response = MagicMock() + get_response.headers = {"Content-Length": "3000"} + + mock_client = AsyncMock() + mock_client.head.return_value = head_response + mock_client.get.return_value = get_response + mock_client.__aenter__.return_value = mock_client + + with patch("httpx.AsyncClient", return_value=mock_client): + size = await r.get_size() + + assert size == 3000 + mock_client.head.assert_called_once() + mock_client.get.assert_called_once_with( + "https://example.com/file.csv", headers={"Range": "bytes=0-0"} + ) + + @pytest.mark.asyncio + async def test_get_size_no_content_length(self): + r = Recurso( + id="r6", + title="Test", + url="https://example.com/file.csv", + api_size=0, + ) + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {} + + mock_client = AsyncMock() + mock_client.head.return_value = mock_response + mock_client.__aenter__.return_value = mock_client + + with patch("httpx.AsyncClient", return_value=mock_client): + size = await r.get_size() + + assert size == 0 + + +class TestConjuntoDados: + def test_fields_from_aliases(self): + c = ConjuntoDados( + id="c1", + titulo="Dataset Teste", + nome="dataset-teste", + recursos=[ + Recurso( + id="r1", + titulo="Resource", + link="http://example.com", + tamanho=100, + ) + ], + ) + assert c.id == "c1" + assert c.title == "Dataset Teste" + assert c.slug == "dataset-teste" + assert len(c.resources) == 1 + assert c.resources[0].id == "r1" + + def test_fields_from_names(self): + c = ConjuntoDados(id="c2", title="Dataset", slug="dataset") + assert c.id == "c2" + assert c.title == "Dataset" + assert c.slug == "dataset" + assert c.resources == [] + + +class TestDadosGov: + def test_name(self): + client = DadosGov() + assert client.name == "DadosGov" + + def test_long_name(self): + client = DadosGov() + assert client.long_name == "Portal Brasileiro de Dados Abertos" + + def test_description(self): + client = DadosGov() + expected = "Interface de acesso ao API do Portal de Dados Abertos" + assert client.description == expected + + @pytest.mark.asyncio + async def test_connect_with_token_creates_client(self): + client = DadosGov() + assert client._client is None + + with patch("httpx.AsyncClient") as mock_async_client: + await client.connect(token="test-token-123") + + assert client._token == "test-token-123" + assert client._client is not None + mock_async_client.assert_called_once_with( + base_url="https://dados.gov.br/dados/api", + headers={ + "Accept": "application/json", + "User-Agent": f"PySUS/{__version__}", + "chave-api-dados-abertos": "test-token-123", + }, + timeout=30.0, + follow_redirects=True, + ) + + @pytest.mark.asyncio + async def test_connect_without_token_raises_value_error(self): + client = DadosGov() + with pytest.raises( + ValueError, match="A token is required to connect to DadosGov" + ): + await client.connect(token=None) + + @pytest.mark.asyncio + async def test_connect_with_existing_token(self): + client = DadosGov() + client._token = "existing-token" + + with patch("httpx.AsyncClient") as mock_async_client: + await client.connect() + + assert client._token == "existing-token" + mock_async_client.assert_called_once() + + @pytest.mark.asyncio + async def test_connect_replaces_existing_client(self): + client = DadosGov() + old_close = AsyncMock() + old_client = AsyncMock() + old_client.aclose = old_close + client._client = old_client + + with patch("httpx.AsyncClient"): + await client.connect(token="new-token") + + old_close.assert_awaited_once() + assert client._token == "new-token" + + @pytest.mark.asyncio + async def test_login_delegates_to_connect(self): + client = DadosGov() + with patch( + "pysus.api.dadosgov.client.DadosGov.connect" + ) as mock_connect: + mock_connect.return_value = None + await client.login(token="login-token") + mock_connect.assert_awaited_once_with(token="login-token") + + @pytest.mark.asyncio + async def test_login_with_kwargs(self): + client = DadosGov() + with patch( + "pysus.api.dadosgov.client.DadosGov.connect" + ) as mock_connect: + mock_connect.return_value = None + await client.login(token="t", extra_param="ignored") + mock_connect.assert_awaited_once_with(token="t") + + @pytest.mark.asyncio + async def test_close_with_active_client(self): + client = DadosGov() + mock_http = AsyncMock() + client._client = mock_http + + await client.close() + + mock_http.aclose.assert_awaited_once() + assert client._client is None + + @pytest.mark.asyncio + async def test_close_without_client(self): + client = DadosGov() + client._client = None + await client.close() + assert client._client is None + + @pytest.mark.asyncio + async def test_datasets_returns_list(self): + client = DadosGov() + result = await client.datasets() + assert isinstance(result, list) + assert len(result) > 0 + from pysus.api.dadosgov.databases import AVAILABLE_DATABASES + + assert len(result) == len(AVAILABLE_DATABASES) + for ds in result: + assert ds.client is client + + @pytest.mark.asyncio + async def test_list_datasets_connection_error(self): + client = DadosGov() + client._client = None + with pytest.raises(ConnectionError, match="Client not connected"): + await client.list_datasets() + + @pytest.mark.asyncio + async def test_list_datasets_success(self): + client = DadosGov() + mock_http = AsyncMock(spec=httpx.AsyncClient) + mock_response = MagicMock() + mock_response.json.return_value = [ + { + "id": "ds1", + "titulo": "Dataset 1", + "nome": "ds-1", + "recursos": [], + }, + { + "id": "ds2", + "titulo": "Dataset 2", + "nome": "ds-2", + "recursos": [ + { + "id": "r1", + "titulo": "Resource", + "link": "http://example.com", + "tamanho": 50, + } + ], + }, + ] + mock_http.get.return_value = mock_response + client._client = mock_http + + results = await client.list_datasets( + pagina=2, + nome_conjunto="teste", + dados_abertos=True, + id_organizacao="org1", + ) + + assert len(results) == 2 + assert results[0].id == "ds1" + assert results[1].id == "ds2" + assert len(results[1].resources) == 1 + mock_http.get.assert_awaited_once_with( + "publico/conjuntos-dados", + params={ + "pagina": 2, + "nomeConjuntoDados": "teste", + "dadosAbertos": True, + "isPrivado": False, + "idOrganizacao": "org1", + }, + ) + + @pytest.mark.asyncio + async def test_list_datasets_minimal_params(self): + client = DadosGov() + mock_http = AsyncMock(spec=httpx.AsyncClient) + mock_response = MagicMock() + mock_response.json.return_value = [] + mock_http.get.return_value = mock_response + client._client = mock_http + + results = await client.list_datasets() + + assert results == [] + mock_http.get.assert_awaited_once_with( + "publico/conjuntos-dados", + params={"pagina": 1, "isPrivado": False}, + ) + + @pytest.mark.asyncio + async def test_get_dataset_connection_error(self): + client = DadosGov() + client._client = None + with pytest.raises(ConnectionError, match="Client not connected"): + await client.get_dataset("some-id") + + @pytest.mark.asyncio + async def test_get_dataset_success(self): + client = DadosGov() + mock_http = AsyncMock(spec=httpx.AsyncClient) + mock_response = MagicMock() + mock_response.json.return_value = { + "id": "ds1", + "titulo": "Single Dataset", + "nome": "single-ds", + "recursos": [], + } + mock_http.get.return_value = mock_response + client._client = mock_http + + result = await client.get_dataset("ds1") + + assert result.id == "ds1" + assert result.title == "Single Dataset" + mock_http.get.assert_awaited_once_with("publico/conjuntos-dados/ds1") + + @pytest.mark.asyncio + async def test_download_file_connection_error(self): + client = DadosGov() + client._client = None + mock_file = MagicMock() + mock_file.path = "http://example.com/file.csv" + with pytest.raises(ConnectionError, match="Client not connected"): + await client._download_file(mock_file, Path("/tmp/out.csv")) + + @pytest.mark.asyncio + async def test_download_file_success(self, tmp_path): + client = DadosGov() + mock_http = AsyncMock(spec=httpx.AsyncClient) + + async def _aiter_bytes(): + yield b"12345" + yield b"67890" + + mock_response = MagicMock() + mock_response.headers = {"Content-Length": "10"} + mock_response.aiter_bytes = _aiter_bytes + + cm = AsyncMock() + cm.__aenter__.return_value = mock_response + cm.__aexit__.return_value = None + + mock_http.stream.return_value = cm + client._client = mock_http + + mock_file = MagicMock() + mock_file.path = "https:/example.com/file.csv" + + output = tmp_path / "test_download.csv" + callback = MagicMock() + + try: + result = await client._download_file( + mock_file, output, callback=callback + ) + + assert result == output + mock_http.stream.assert_called_once_with( + "GET", "https://example.com/file.csv" + ) + assert output.read_bytes() == b"1234567890" + assert callback.call_count == 2 + callback.assert_any_call(5, 10) + callback.assert_any_call(10, 10) + finally: + if output.exists(): + output.unlink() + + @pytest.mark.asyncio + async def test_download_file_no_callback(self, tmp_path): + client = DadosGov() + mock_http = AsyncMock(spec=httpx.AsyncClient) + + async def _aiter_bytes(): + yield b"data" + + mock_response = MagicMock() + mock_response.headers = {"Content-Length": "4"} + mock_response.aiter_bytes = _aiter_bytes + + cm = AsyncMock() + cm.__aenter__.return_value = mock_response + cm.__aexit__.return_value = None + + mock_http.stream.return_value = cm + client._client = mock_http + + mock_file = MagicMock() + mock_file.path = "http:/example.com/file.csv" + + output = tmp_path / "test_download_nocb.csv" + + try: + result = await client._download_file(mock_file, output) + + assert result == output + mock_http.stream.assert_called_once_with( + "GET", "http://example.com/file.csv" + ) + finally: + if output.exists(): + output.unlink() diff --git a/pysus/tests/api/dadosgov/test_databases.py b/pysus/tests/api/dadosgov/test_databases.py new file mode 100644 index 00000000..698b723a --- /dev/null +++ b/pysus/tests/api/dadosgov/test_databases.py @@ -0,0 +1,411 @@ +"""Tests for pysus.api.dadosgov.databases.""" + +from typing import Any +from unittest.mock import patch + +from pysus.api.dadosgov.client import DadosGov +from pysus.api.dadosgov.databases import ( + AVAILABLE_DATABASES, + CNES, + COVID19, + MONTHS, + PNI, + SIA, + SIM, + SINAN, + SINASC, + _parse_year, + _skip, +) + +# --------------------------------------------------------------------------- +# Helper +# --------------------------------------------------------------------------- + + +def db_instance(db_class): + return db_class(client=DadosGov()) + + +# --------------------------------------------------------------------------- +# MONTHS +# --------------------------------------------------------------------------- + + +class TestMONTHS: + def test_all_months_present(self): + assert MONTHS == { + "jan": 1, + "fev": 2, + "mar": 3, + "abr": 4, + "mai": 5, + "jun": 6, + "jul": 7, + "ago": 8, + "set": 9, + "out": 10, + "nov": 11, + "dez": 12, + } + + +# --------------------------------------------------------------------------- +# _parse_year +# --------------------------------------------------------------------------- + + +class TestParseYear: + def test_valid_year(self): + assert _parse_year("2024") == 2024 + + def test_year_below_range(self): + assert _parse_year("1969") is None + + def test_year_above_range(self): + assert _parse_year("2101") is None + + def test_non_numeric(self): + assert _parse_year("abcd") is None + + def test_boundary_low(self): + assert _parse_year("1970") == 1970 + + def test_boundary_high(self): + assert _parse_year("2100") == 2100 + + +# --------------------------------------------------------------------------- +# _skip +# --------------------------------------------------------------------------- + + +class TestSkip: + def test_get_prefix(self): + assert _skip("get_dados.csv") is True + assert _skip("get_.pdf") is True + + def test_pdf_suffix(self): + assert _skip("documento.pdf") is True + + def test_normal_file(self): + assert _skip("dados.csv") is False + + def test_empty_string(self): + assert _skip("") is False + + +# --------------------------------------------------------------------------- +# Base dataset test helpers +# --------------------------------------------------------------------------- + + +class BaseDatasetMixin: + db_class: Any = None + expected_name = "" + expected_long_name = "" + + def test_name(self): + ds = db_instance(self.db_class) + assert ds.name == self.expected_name + + def test_long_name(self): + ds = db_instance(self.db_class) + assert ds.long_name == self.expected_long_name + + def test_description_is_string(self): + ds = db_instance(self.db_class) + assert isinstance(ds.description, str) + assert len(ds.description) > 0 + + def test_ids_are_strings(self): + ds = db_instance(self.db_class) + assert isinstance(ds.ids, list) + for i in ds.ids: + assert isinstance(i, str) + assert len(i) > 0 + + def test_formatter_skip_pdf(self): + ds = db_instance(self.db_class) + assert ds.formatter("document.pdf") == { + "state": None, + "year": None, + "month": None, + } + + def test_formatter_skip_get_prefix(self): + ds = db_instance(self.db_class) + assert ds.formatter("get_dados.csv") == { + "state": None, + "year": None, + "month": None, + } + + def test_formatter_unrecognised(self): + ds = db_instance(self.db_class) + assert ds.formatter("random_file.xyz") == { + "state": None, + "year": None, + "month": None, + } + + def test_formatter_exception_handler(self): + ds = db_instance(self.db_class) + with patch( + "pysus.api.dadosgov.databases._skip", side_effect=ValueError("test") + ): + assert ds.formatter("anything.csv") == { + "state": None, + "year": None, + "month": None, + } + + +# --------------------------------------------------------------------------- +# CNES +# --------------------------------------------------------------------------- + + +class TestCNES(BaseDatasetMixin): + db_class = CNES + expected_name = "CNES" + expected_long_name = "Cadastro Nacional de Estabelecimentos de Saúde" + + def test_formatter_valid_pattern(self): + ds = db_instance(CNES) + result = ds.formatter("arquivo_01-2024.csv") + assert result == {"state": None, "year": 2024, "month": 1} + + def test_formatter_month_and_year(self): + ds = db_instance(CNES) + result = ds.formatter("dados_12-2023.csv") + assert result == {"state": None, "year": 2023, "month": 12} + + +# --------------------------------------------------------------------------- +# PNI +# --------------------------------------------------------------------------- + + +class TestPNI(BaseDatasetMixin): + db_class = PNI + expected_name = "PNI" + expected_long_name = "Programa Nacional de Imunizações" + + def test_formatter_valid_pattern(self): + ds = db_instance(PNI) + result = ds.formatter("vacinacao_jan_2024_csv.zip") + assert result == {"state": None, "year": 2024, "month": 1} + + def test_formatter_different_month(self): + ds = db_instance(PNI) + result = ds.formatter("vacinacao_dez_2023_csv.zip") + assert result == {"state": None, "year": 2023, "month": 12} + + def test_formatter_invalid_month(self): + ds = db_instance(PNI) + result = ds.formatter("vacinacao_xxx_2024_csv.zip") + assert result == {"state": None, "year": 2024, "month": None} + + def test_formatter_uppercase_filename(self): + ds = db_instance(PNI) + result = ds.formatter("VACINACAO_JAN_2024_CSV.ZIP") + assert result == {"state": None, "year": 2024, "month": 1} + + def test_group_aliases(self): + ds = db_instance(PNI) + p = "doses-aplicadas-pelo-programa-de-nacional-de-imunizacoes-pni" + assert ds.group_aliases[p] == "DPNI" + assert ds.group_aliases[f"{p}-2020"] == "DPNI" + assert ds.group_aliases[f"dataset-{p}_2022"] == "DPNI" + + +# --------------------------------------------------------------------------- +# SIA +# --------------------------------------------------------------------------- + + +class TestSIA(BaseDatasetMixin): + db_class = SIA + expected_name = "SIA" + expected_long_name = "Sistema de Informações Ambulatoriais" + + def test_formatter_year_pattern(self): + ds = db_instance(SIA) + result = ds.formatter("arquivo_2024_.csv") + assert result == {"state": None, "year": 2024, "month": None} + + def test_formatter_month_year_pattern(self): + ds = db_instance(SIA) + result = ds.formatter("arquivo_jun-out_2024_.csv") + assert result == {"state": None, "year": 2024, "month": None} + + def test_formatter_month_year_upper(self): + ds = db_instance(SIA) + result = ds.formatter("ARQUIVO_JUN-OUT_2024_.CSV") + assert result == {"state": None, "year": 2024, "month": None} + + +# --------------------------------------------------------------------------- +# SINAN +# --------------------------------------------------------------------------- + + +class TestSINAN(BaseDatasetMixin): + db_class = SINAN + expected_name = "SINAN" + expected_long_name = "Sistema de Informação de Agravos de Notificação" + + def test_formatter_dengue_pattern(self): + ds = db_instance(SINAN) + result = ds.formatter("DENGBR24.CSV.ZIP") + assert result == {"state": "BR", "year": 2024, "month": None} + + def test_formatter_tuberculose_pattern(self): + ds = db_instance(SINAN) + result = ds.formatter("TUBEBR99.CSV.ZIP") + assert result == {"state": "BR", "year": 1999, "month": None} + + def test_formatter_monkeypox_pattern(self): + ds = db_instance(SINAN) + result = ds.formatter("MPX_2023_OPENDATASUS.CSV.ZIP") + assert result == {"state": None, "year": 2023, "month": None} + + def test_formatter_lowercase_gets_uppercased(self): + ds = db_instance(SINAN) + result = ds.formatter("dengbr24.csv.zip") + assert result == {"state": "BR", "year": 2024, "month": None} + + def test_group_aliases(self): + ds = db_instance(SINAN) + aliases = ds.group_aliases + assert aliases["arboviroses-dengue"] == "DENG" + assert aliases["arboviroses-febre-de-chikungunya"] == "CHIK" + assert aliases["arboviroses-zika-virus"] == "ZIKA" + assert aliases["hanseniase"] == "HANS" + assert aliases["dados-tuberculose"] == "TUBE" + assert aliases["sifilis"] == "SIFA" + + +# --------------------------------------------------------------------------- +# SIM +# --------------------------------------------------------------------------- + + +class TestSIM(BaseDatasetMixin): + db_class = SIM + expected_name = "SIM" + expected_long_name = "Sistema de Informação sobre Mortalidade" + + def test_formatter_mortalidade_geral(self): + ds = db_instance(SIM) + result = ds.formatter("Mortalidade_Geral_2024_csv.zip") + assert result == {"state": None, "year": 2024, "month": None} + + def test_formatter_do_pattern(self): + ds = db_instance(SIM) + result = ds.formatter("DO24OPEN") + assert result == {"state": None, "year": 2024, "month": None} + + def test_formatter_do_century_handling(self): + ds = db_instance(SIM) + result = ds.formatter("DO99OPEN") + assert result == {"state": None, "year": 1999, "month": None} + + def test_group_aliases(self): + ds = db_instance(SIM) + assert ds.group_aliases["sim-1979-2019"] == "DO" + + +# --------------------------------------------------------------------------- +# SINASC +# --------------------------------------------------------------------------- + + +class TestSINASC(BaseDatasetMixin): + db_class = SINASC + expected_name = "SINASC" + expected_long_name = "Sistema de Informações sobre Nascidos Vivos" + + def test_formatter_sinasc_pattern(self): + ds = db_instance(SINASC) + result = ds.formatter("SINASC_2024_csv.zip") + assert result == {"state": None, "year": 2024, "month": None} + + def test_formatter_dnbr_pattern(self): + ds = db_instance(SINASC) + result = ds.formatter("DNBR2024_csv.zip") + assert result == {"state": "BR", "year": 2024, "month": None} + + def test_group_aliases(self): + ds = db_instance(SINASC) + key = "sistema-de-informacao-sobre-nascidos-vivos-sinasc-1996-a-20201" + assert ds.group_aliases[key] == "DN" + + +# --------------------------------------------------------------------------- +# COVID19 +# --------------------------------------------------------------------------- + + +class TestCOVID19(BaseDatasetMixin): + db_class = COVID19 + expected_name = "COVID19" + expected_long_name = "Casos Confirmados de COVID-19" + + def test_formatter_csv_file(self): + ds = db_instance(COVID19) + assert ds.formatter("casos_covid.csv") == { + "state": None, + "year": None, + "month": None, + } + + def test_formatter_xlsx_file(self): + ds = db_instance(COVID19) + assert ds.formatter("casos_covid.xlsx") == { + "state": None, + "year": None, + "month": None, + } + + def test_formatter_other_file(self): + ds = db_instance(COVID19) + assert ds.formatter("casos_covid.json") == { + "state": None, + "year": None, + "month": None, + } + + def test_formatter_uppercase_xlsx(self): + ds = db_instance(COVID19) + assert ds.formatter("casos_covid.XLSX") == { + "state": None, + "year": None, + "month": None, + } + + def test_formatter_xlsx_prefixed_with_get(self): + ds = db_instance(COVID19) + assert ds.formatter("get_casos.xlsx") == { + "state": None, + "year": None, + "month": None, + } + + +# --------------------------------------------------------------------------- +# AVAILABLE_DATABASES +# --------------------------------------------------------------------------- + + +class TestAVAILABLEDATABASES: + def test_contains_all_databases(self): + expected = {CNES, PNI, SIA, SINAN, SIM, SINASC, COVID19} + assert set(AVAILABLE_DATABASES) == expected + + def test_all_can_be_instantiated(self): + for db_class in AVAILABLE_DATABASES: + ds = db_class(client=DadosGov()) + assert ds.name is not None + assert ds.long_name is not None diff --git a/pysus/tests/api/dadosgov/test_models.py b/pysus/tests/api/dadosgov/test_models.py new file mode 100644 index 00000000..dfbcc005 --- /dev/null +++ b/pysus/tests/api/dadosgov/test_models.py @@ -0,0 +1,772 @@ +"""Tests for pysus.api.dadosgov.models.""" + +import asyncio +from datetime import datetime +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from pysus import CACHEPATH +from pysus.api.dadosgov.client import ConjuntoDados, DadosGov, Recurso +from pysus.api.dadosgov.models import Dataset, File, Group, _dedup_entries + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def make_recurso(**overrides) -> Recurso: + kwargs = { + "id": "r1", + "titulo": "Resource", + "link": "http://example.com/file.csv", + "tamanho": 100, + "dataUltimaAtualizacaoArquivo": "01/01/2024", + } + kwargs.update(overrides) + return Recurso(**kwargs) # type: ignore[arg-type] + + +def make_conjunto(resources=None) -> ConjuntoDados: + if resources is None: + resources = [make_recurso()] + return ConjuntoDados( + id="c1", + titulo="Conjunto Teste", + nome="conjunto-teste", + recursos=resources, + ) + + +class MockDataset(Dataset): + group_aliases: dict = {} + + @property + def name(self) -> str: + return "TestDS" + + @property + def long_name(self) -> str: + return "Test Dataset" + + @property + def description(self) -> str: + return "A test dataset" + + async def _fetch_content(self): + return await super()._fetch_content() + + def formatter(self, filename: str) -> dict: + return {} + + +# --------------------------------------------------------------------------- +# _dedup_entries +# --------------------------------------------------------------------------- + + +class TestDedupEntries: + def test_prefers_csv_over_json_xml(self): + entries = [ + ("data.csv", "csv_obj", {"fmt": "csv"}), + ("data.json", "json_obj", {"fmt": "json"}), + ("data.xml", "xml_obj", {"fmt": "xml"}), + ] + result = _dedup_entries(entries) + assert len(result) == 1 + assert result[0][0] == "data.csv" + + def test_multiple_stems(self): + entries = [ + ("a.csv", "a_csv", {}), + ("a.json", "a_json", {}), + ("b.csv", "b_csv", {}), + ] + result = _dedup_entries(entries) + assert len(result) == 2 + filenames = {r[0] for r in result} + assert filenames == {"a.csv", "b.csv"} + + def test_no_format_match_returns_all(self): + entries = [("readme.txt", "t1", {}), ("notes.md", "t2", {})] + result = _dedup_entries(entries) + assert len(result) == 2 + + def test_single_entry(self): + entries = [("data.csv", "obj", {})] + result = _dedup_entries(entries) + assert result == entries + + def test_zip_format_detection(self): + entries = [("data.csv.zip", "cz", {}), ("data.json.zip", "jz", {})] + result = _dedup_entries(entries) + assert len(result) == 1 + assert result[0][0] == "data.csv.zip" + + def test_only_json_and_xml_no_csv(self): + entries = [("data.json", "j", {}), ("data.xml", "x", {})] + result = _dedup_entries(entries) + assert len(result) == 2 + + def test_empty_list(self): + assert _dedup_entries([]) == [] + + +# --------------------------------------------------------------------------- +# File +# --------------------------------------------------------------------------- + + +class TestFileInit: + def test_init_with_metadata(self): + recurso = make_recurso() + ds = MockDataset(client=DadosGov()) + conj = make_conjunto([recurso]) + group = Group(record=conj, dataset=ds) + f = File( + record=recurso, + dataset=ds, + group=group, + path=recurso.url, + _metadata={"year": 2024, "month": 1, "state": "SP"}, + ) + assert f.record is recurso + assert f.dataset is ds + assert f.group is group + assert f.year == 2024 + assert f.month == 1 + assert f.state == "SP" + + def test_init_without_metadata(self): + recurso = make_recurso() + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path=recurso.url) + assert f._metadata == {} + + def test_repr_returns_basename(self): + recurso = make_recurso() + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path="http://example.com/data.csv") + assert repr(f) == "data.csv" + + +class TestFileModelPostInit: + def test_with_api_size_and_last_modified_no_task(self): + recurso = make_recurso(tamanho=100) + ds = MockDataset(client=DadosGov()) + + with patch.object(asyncio, "get_running_loop") as mock_loop: + File(record=recurso, dataset=ds, path=recurso.url) + mock_loop.create_task.assert_not_called() + + def test_without_api_size_creates_task(self): + recurso = make_recurso(tamanho=0) + ds = MockDataset(client=DadosGov()) + mock_loop = MagicMock() + + with patch.object(asyncio, "get_running_loop", return_value=mock_loop): + File(record=recurso, dataset=ds, path=recurso.url) + mock_loop.create_task.assert_called_once() + + def test_without_last_modified_creates_task(self): + recurso = make_recurso(dataUltimaAtualizacaoArquivo="Indisponível") + ds = MockDataset(client=DadosGov()) + mock_loop = MagicMock() + + with patch.object(asyncio, "get_running_loop", return_value=mock_loop): + File(record=recurso, dataset=ds, path=recurso.url) + mock_loop.create_task.assert_called_once() + + def test_no_event_loop_runtime_error_swallowed(self): + recurso = make_recurso(tamanho=0) + ds = MockDataset(client=DadosGov()) + + def _raise(): + raise RuntimeError("No running event loop") + + with patch.object(asyncio, "get_running_loop", side_effect=_raise): + File(record=recurso, dataset=ds, path=recurso.url) + + +class TestFileProperties: + def test_extension_from_file_name(self): + recurso = make_recurso(nomeArquivo="dados.csv") + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path=recurso.url) + assert f.extension == ".csv" + + def test_extension_from_url_when_no_file_name(self): + recurso = make_recurso( + nomeArquivo=None, link="http://example.com/arquivo.zip" + ) + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path=recurso.url) + assert f.extension == ".zip" + + def test_extension_from_url_with_query_string(self): + recurso = make_recurso( + nomeArquivo=None, link="http://example.com/arquivo.csv?download=1" + ) + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path=recurso.url) + assert f.extension == ".csv" + + def test_size_from_api_size(self): + recurso = make_recurso(tamanho=500) + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path=recurso.url) + assert f.size == 500 + + def test_size_zero_when_no_api_size(self): + recurso = make_recurso(tamanho=0) + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path=recurso.url) + assert f.size == 0 + + def test_modify_returns_datetime(self): + recurso = make_recurso() + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path=recurso.url) + assert isinstance(f.modify, datetime) + + def test_modify_raises_value_error_when_none(self): + recurso = make_recurso(dataUltimaAtualizacaoArquivo="Indisponível") + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path=recurso.url) + with pytest.raises(ValueError, match="File requires a modify date"): + f.modify + + def test_year_month_state_from_metadata(self): + recurso = make_recurso() + ds = MockDataset(client=DadosGov()) + f = File( + record=recurso, + dataset=ds, + path=recurso.url, + _metadata={"year": 2023, "month": 6, "state": "RJ"}, + ) + assert f.year == 2023 + assert f.month == 6 + assert f.state == "RJ" + + def test_year_month_state_defaults_to_none(self): + recurso = make_recurso() + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path=recurso.url) + assert f.year is None + assert f.month is None + assert f.state is None + + +class TestFileFetchMetadata: + @pytest.mark.asyncio + async def test_head_success_updates_record(self): + recurso = make_recurso( + tamanho=0, dataUltimaAtualizacaoArquivo="Indisponível" + ) + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path="http://example.com/file.csv") + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = { + "Content-Length": "999", + "Last-Modified": "Mon, 15 Jan 2024 10:30:00 GMT", + } + + mock_client = AsyncMock() + mock_client.head.return_value = mock_response + mock_client.__aenter__.return_value = mock_client + + with patch("httpx.AsyncClient", return_value=mock_client): + await f.fetch_metadata() + + assert f.record.api_size == 999 + assert f.record.last_modified is not None + + @pytest.mark.asyncio + async def test_head_405_fallback_to_get(self): + recurso = make_recurso(tamanho=0) + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path="http://example.com/file.csv") + + head_response = MagicMock() + head_response.status_code = 405 + + get_response = MagicMock() + get_response.headers = { + "Content-Length": "777", + "Last-Modified": "Tue, 01 Feb 2024 00:00:00 GMT", + } + + mock_client = AsyncMock() + mock_client.head.return_value = head_response + mock_client.get.return_value = get_response + mock_client.__aenter__.return_value = mock_client + + with patch("httpx.AsyncClient", return_value=mock_client): + await f.fetch_metadata() + + assert f.record.api_size == 777 + + called_args, called_kwargs = mock_client.get.call_args + actual_url = Path(called_args[0]).as_posix() + + assert actual_url in ( + "http:/example.com/file.csv", + "http://example.com/file.csv", + ) + assert called_kwargs == {"headers": {"Range": "bytes=0-0"}} + + @pytest.mark.asyncio + async def test_no_content_length_header(self): + recurso = make_recurso(tamanho=0) + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path="http://example.com/file.csv") + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {} + + mock_client = AsyncMock() + mock_client.head.return_value = mock_response + mock_client.__aenter__.return_value = mock_client + + with patch("httpx.AsyncClient", return_value=mock_client): + await f.fetch_metadata() + + assert f.record.api_size == 0 + + @pytest.mark.asyncio + async def test_exception_is_caught(self): + recurso = make_recurso(tamanho=0) + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path="http://example.com/file.csv") + + mock_client = AsyncMock() + mock_client.head.side_effect = Exception("Network error") + mock_client.__aenter__.return_value = mock_client + + with patch("httpx.AsyncClient", return_value=mock_client): + await f.fetch_metadata() + + assert f.record.api_size == 0 + + @pytest.mark.asyncio + async def test_parse_typeerror_is_caught(self): + recurso = make_recurso(tamanho=0) + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path="http://example.com/file.csv") + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = { + "Content-Length": "100", + "Last-Modified": "invalid-date-string", + } + + mock_client = AsyncMock() + mock_client.head.return_value = mock_response + mock_client.__aenter__.return_value = mock_client + + with patch("httpx.AsyncClient", return_value=mock_client): + with patch( + "pysus.api.dadosgov.models.parse", side_effect=TypeError + ): + await f.fetch_metadata() + + assert f.record.api_size == 100 + assert f.record.last_modified == datetime(2024, 1, 1) + + @pytest.mark.asyncio + async def test_parse_valueerror_is_caught(self): + recurso = make_recurso(tamanho=0) + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path="http://example.com/file.csv") + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = { + "Content-Length": "100", + "Last-Modified": "invalid-date-string", + } + + mock_client = AsyncMock() + mock_client.head.return_value = mock_response + mock_client.__aenter__.return_value = mock_client + + with patch("httpx.AsyncClient", return_value=mock_client): + with patch( + "pysus.api.dadosgov.models.parse", side_effect=ValueError + ): + await f.fetch_metadata() + + assert f.record.api_size == 100 + assert f.record.last_modified == datetime(2024, 1, 1) + + +class TestFileDownload: + @pytest.mark.asyncio + async def test_download_delegates_to_client(self): + recurso = make_recurso() + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, group=None, path=recurso.url) + + output = Path("/tmp/test_out.csv") + callback = MagicMock() + + with patch.object( + ds.client, "_download_file", new_callable=AsyncMock + ) as mock_dl: + mock_dl.return_value = output + result = await f._download(output=output, callback=callback) + + assert result == output + mock_dl.assert_awaited_once_with(f, output, callback=callback) + + @pytest.mark.asyncio + async def test_download_default_output(self): + recurso = make_recurso() + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path=recurso.url) + + expected = CACHEPATH / f.name + + with patch.object( + ds.client, "_download_file", new_callable=AsyncMock + ) as mock_dl: + mock_dl.return_value = expected + result = await f._download() + + assert result == expected + + +class TestFileFetchSize: + @pytest.mark.asyncio + async def test_head_success_updates_and_returns_size(self): + recurso = make_recurso(tamanho=0) + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path="http://example.com/file.csv") + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"Content-Length": "1234"} + + mock_client = AsyncMock() + mock_client.head.return_value = mock_response + mock_client.__aenter__.return_value = mock_client + + with patch("httpx.AsyncClient", return_value=mock_client): + size = await f.fetch_size() + + assert size == 1234 + assert f.record.api_size == 1234 + + @pytest.mark.asyncio + async def test_head_405_fallback_to_get(self): + recurso = make_recurso(tamanho=0) + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path="http://example.com/file.csv") + + head_response = MagicMock() + head_response.status_code = 405 + + get_response = MagicMock() + get_response.headers = {"Content-Length": "5678"} + + mock_client = AsyncMock() + mock_client.head.return_value = head_response + mock_client.get.return_value = get_response + mock_client.__aenter__.return_value = mock_client + + with patch("httpx.AsyncClient", return_value=mock_client): + size = await f.fetch_size() + + assert size == 5678 + assert f.record.api_size == 5678 + + called_args, called_kwargs = mock_client.get.call_args + actual_url = Path(called_args[0]).as_posix() + + assert actual_url in ( + "http:/example.com/file.csv", + "http://example.com/file.csv", + ) + assert called_kwargs == {"headers": {"Range": "bytes=0-0"}} + + @pytest.mark.asyncio + async def test_head_returns_zero_content_length(self): + recurso = make_recurso(tamanho=0) + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path="http://example.com/file.csv") + + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"Content-Length": "0"} + + mock_client = AsyncMock() + mock_client.head.return_value = mock_response + mock_client.__aenter__.return_value = mock_client + + with patch("httpx.AsyncClient", return_value=mock_client): + size = await f.fetch_size() + + assert size == 0 + assert f.record.api_size == 0 + + @pytest.mark.asyncio + async def test_exception_returns_zero(self): + recurso = make_recurso(tamanho=0) + ds = MockDataset(client=DadosGov()) + f = File(record=recurso, dataset=ds, path="http://example.com/file.csv") + + mock_client = AsyncMock() + mock_client.head.side_effect = Exception("Timeout") + mock_client.__aenter__.return_value = mock_client + + with patch("httpx.AsyncClient", return_value=mock_client): + size = await f.fetch_size() + + assert size == 0 + + +# --------------------------------------------------------------------------- +# Group +# --------------------------------------------------------------------------- + + +class TestGroupInit: + def test_init_with_formatter(self): + ds = MockDataset(client=DadosGov()) + conj = make_conjunto() + + def formatter(fn): + return {"year": 2024} + + g = Group(record=conj, dataset=ds, formatter=formatter) + assert g.record is conj + assert g.dataset is ds + assert g._formatter is formatter + + def test_init_without_formatter(self): + ds = MockDataset(client=DadosGov()) + conj = make_conjunto() + g = Group(record=conj, dataset=ds) + assert g._formatter is None + + def test_repr_returns_name(self): + ds = MockDataset(client=DadosGov()) + conj = make_conjunto() + g = Group(record=conj, dataset=ds) + assert repr(g) == g.name + + +class TestGroupProperties: + def test_name_with_aliases(self): + ds = MockDataset(client=DadosGov()) + ds.group_aliases = {"conjunto-teste": "CT"} + conj = make_conjunto() + g = Group(record=conj, dataset=ds) + assert g.name == "CT" + + def test_name_without_aliases(self): + ds = MockDataset(client=DadosGov()) + ds.group_aliases = {} + conj = make_conjunto() + g = Group(record=conj, dataset=ds) + assert g.name == "conjunto-teste" + + def test_long_name(self): + ds = MockDataset(client=DadosGov()) + conj = make_conjunto() + g = Group(record=conj, dataset=ds) + assert g.long_name == "Conjunto Teste" + + def test_description(self): + ds = MockDataset(client=DadosGov()) + conj = make_conjunto() + g = Group(record=conj, dataset=ds) + assert g.description == "" + + +class TestGroupFetchFiles: + @pytest.mark.asyncio + async def test_filters_pdf_and_get_prefix(self): + resources = [ + make_recurso( + id="r1", link="http://ex.com/doc.pdf", nomeArquivo="doc.pdf" + ), + make_recurso( + id="r2", + link="http://ex.com/get_data.csv", + nomeArquivo="get_data.csv", + ), + make_recurso( + id="r3", link="http://ex.com/data.csv", nomeArquivo="data.csv" + ), + ] + conj = make_conjunto(resources) + ds = MockDataset(client=DadosGov()) + g = Group(record=conj, dataset=ds) + + files = await g._fetch_files() + + assert len(files) == 1 + assert files[0].record.id == "r3" + + @pytest.mark.asyncio + async def test_deduplicates_preferring_csv(self): + resources = [ + make_recurso( + id="r1", link="http://ex.com/data.csv", nomeArquivo="data.csv" + ), + make_recurso( + id="r2", link="http://ex.com/data.json", nomeArquivo="data.json" + ), + make_recurso( + id="r3", link="http://ex.com/data.xml", nomeArquivo="data.xml" + ), + ] + conj = make_conjunto(resources) + ds = MockDataset(client=DadosGov()) + g = Group(record=conj, dataset=ds) + + files = await g._fetch_files() + + assert len(files) == 1 + assert files[0].record.id == "r1" + + @pytest.mark.asyncio + async def test_formatter_applied(self): + resources = [ + make_recurso( + id="r1", + link="http://ex.com/SP2024.csv", + nomeArquivo="SP2024.csv", + ), + ] + conj = make_conjunto(resources) + ds = MockDataset(client=DadosGov()) + + def formatter(fn): + return {"state": "SP", "year": 2024} + + g = Group(record=conj, dataset=ds, formatter=formatter) + + files = await g._fetch_files() + + assert len(files) == 1 + assert files[0].state == "SP" + assert files[0].year == 2024 + + @pytest.mark.asyncio + async def test_formatter_not_implemented_error_caught(self): + resources = [ + make_recurso( + id="r1", link="http://ex.com/data.csv", nomeArquivo="data.csv" + ), + ] + conj = make_conjunto(resources) + ds = MockDataset(client=DadosGov()) + + def bad_formatter(fn): + raise NotImplementedError("not implemented") + + g = Group(record=conj, dataset=ds, formatter=bad_formatter) + + files = await g._fetch_files() + + assert len(files) == 1 + assert files[0].state is None + + @pytest.mark.asyncio + async def test_filename_from_url_when_no_file_name(self): + resources = [ + make_recurso( + id="r1", + nomeArquivo=None, + link="http://ex.com/download?file=data.csv", + ), + ] + conj = make_conjunto(resources) + ds = MockDataset(client=DadosGov()) + g = Group(record=conj, dataset=ds) + + files = await g._fetch_files() + assert len(files) == 1 + assert "download" in str(files[0].path) + + +# --------------------------------------------------------------------------- +# Dataset +# --------------------------------------------------------------------------- + + +class TestDatasetContent: + @pytest.mark.asyncio + async def test_fetch_content_with_ids(self): + client = DadosGov() + ds = MockDataset(client=client) + ds.ids = ["id1", "id2"] + + conj1 = make_conjunto([make_recurso(id="r1")]) + conj2 = make_conjunto([make_recurso(id="r2")]) + + with patch( + "pysus.api.dadosgov.client.DadosGov.get_dataset", + new_callable=AsyncMock, + ) as mock_get: + mock_get.side_effect = [conj1, conj2] + groups = await ds._fetch_content() + + assert len(groups) == 2 + assert groups[0].record is conj1 + assert groups[1].record is conj2 + assert callable(groups[0]._formatter) + assert groups[0]._formatter("x") == ds.formatter("x") + mock_get.assert_any_call("id1") + mock_get.assert_any_call("id2") + + @pytest.mark.asyncio + async def test_fetch_content_empty_ids(self): + ds = MockDataset(client=DadosGov()) + ds.ids = [] + + with patch( + "pysus.api.dadosgov.client.DadosGov.get_dataset" + ) as mock_get: + groups = await ds._fetch_content() + + assert groups == [] + mock_get.assert_not_called() + + def test_repr_returns_name(self): + ds = MockDataset(client=DadosGov()) + assert repr(ds) == "TestDS" + + def test_abstract_formatter_pass(self): + class DirectDataset(Dataset): + ids: list[str] = ["abc"] + + @property + def name(self): + return "test" + + @property + def long_name(self): + return "Test" + + @property + def description(self): + return "Test dataset" + + def formatter(self, filename): + Dataset.formatter(self, filename) + return {} + + ds = DirectDataset(client=DadosGov()) + assert ds.formatter("x.csv") == {} + + def test_formatter_not_abstract(self): + ds = MockDataset(client=DadosGov()) + assert ds.formatter("any.csv") == {} diff --git a/pysus/tests/api/ducklake/test_catalog.py b/pysus/tests/api/ducklake/test_catalog.py index ffdde87f..a6d415f6 100644 --- a/pysus/tests/api/ducklake/test_catalog.py +++ b/pysus/tests/api/ducklake/test_catalog.py @@ -1,77 +1,112 @@ -from pysus.api.ducklake.catalog import ( - CatalogDataset, - CatalogFile, - CatalogTable, +"""Tests for DuckLake catalog ORM models.""" + +from pysus.api.ducklake.catalog.orm.dataset import ( ColumnDefinition, - DatasetGroup, - Origin, + Dataset, + File, + Group, file_columns, ) +from pysus.api.ducklake.catalog.orm.default import Dataset as DefaultDataset + +class TestDefaultDataset: + def test_tablename(self): + assert DefaultDataset.__tablename__ == "datasets" -class TestOrigin: - def test_origin_ftp(self): - assert Origin.FTP.value == "ftp" + def test_columns(self): + cols = DefaultDataset.__table__.columns + assert "id" in cols + assert "name" in cols + assert "long_name" in cols + assert "description" in cols - def test_origin_api(self): - assert Origin.API.value == "api" + def test_schema(self): + assert DefaultDataset.__table_args__[0]["schema"] == "pysus" -class TestCatalogTable: - def test_catalog_table_is_abstract(self): - assert CatalogTable.__abstract__ is True +class TestDataset: + def test_tablename(self): + assert Dataset.__tablename__ == "datasets" + def test_columns(self): + cols = Dataset.__table__.columns + assert "id" in cols + assert "name" in cols + assert "long_name" in cols + assert "description" in cols -class TestCatalogDataset: - def test_catalog_dataset_tablename(self): - assert CatalogDataset.__tablename__ == "datasets" + def test_schema(self): + assert Dataset.__table_args__[0]["schema"] == "pysus" - def test_catalog_dataset_columns(self): - assert "id" in CatalogDataset.__table__.columns - assert "name" in CatalogDataset.__table__.columns - assert "long_name" in CatalogDataset.__table__.columns - assert "origin" in CatalogDataset.__table__.columns + def test_relationships(self): + assert hasattr(Dataset, "groups") + assert hasattr(Dataset, "files") + assert hasattr(Dataset, "columns") class TestColumnDefinition: - def test_column_definition_tablename(self): + def test_tablename(self): assert ColumnDefinition.__tablename__ == "dataset_columns" - def test_column_definition_columns(self): - assert "id" in ColumnDefinition.__table__.columns - assert "dataset_id" in ColumnDefinition.__table__.columns - assert "name" in ColumnDefinition.__table__.columns - assert "type" in ColumnDefinition.__table__.columns - - -class TestDatasetGroup: - def test_dataset_group_tablename(self): - assert DatasetGroup.__tablename__ == "dataset_groups" - - def test_dataset_group_columns(self): - assert "id" in DatasetGroup.__table__.columns - assert "dataset_id" in DatasetGroup.__table__.columns - assert "name" in DatasetGroup.__table__.columns - assert "long_name" in DatasetGroup.__table__.columns - - -class TestCatalogFile: - def test_catalog_file_tablename(self): - assert CatalogFile.__tablename__ == "files" - - def test_catalog_file_columns(self): - assert "id" in CatalogFile.__table__.columns - assert "dataset_id" in CatalogFile.__table__.columns - assert "path" in CatalogFile.__table__.columns - assert "size" in CatalogFile.__table__.columns - assert "rows" in CatalogFile.__table__.columns - assert "modified" in CatalogFile.__table__.columns - assert "year" in CatalogFile.__table__.columns - assert "month" in CatalogFile.__table__.columns - assert "state" in CatalogFile.__table__.columns + def test_columns(self): + cols = ColumnDefinition.__table__.columns + assert "id" in cols + assert "dataset_id" in cols + assert "name" in cols + assert "type" in cols + assert "description" in cols + assert "nullable" in cols + + +class TestGroup: + def test_tablename(self): + assert Group.__tablename__ == "dataset_groups" + + def test_columns(self): + cols = Group.__table__.columns + assert "id" in cols + assert "dataset_id" in cols + assert "name" in cols + assert "long_name" in cols + assert "description" in cols + + def test_relationships(self): + assert hasattr(Group, "dataset") + assert hasattr(Group, "files") + + +class TestFile: + def test_tablename(self): + assert File.__tablename__ == "files" + + def test_columns(self): + cols = File.__table__.columns + assert "id" in cols + assert "dataset_id" in cols + assert "group_id" in cols + assert "path" in cols + assert "size" in cols + assert "rows" in cols + assert "type" in cols + assert "modified" in cols + assert "year" in cols + assert "month" in cols + assert "state" in cols + assert "sha256" in cols + assert "origin_size" in cols + assert "origin_path" in cols + + def test_relationships(self): + assert hasattr(File, "dataset") + assert hasattr(File, "group") + assert hasattr(File, "columns") class TestFileColumns: + def test_file_columns_table_name(self): + assert file_columns.name == "file_columns" + def test_file_columns_primary_keys(self): file_id_col = file_columns.c.file_id column_id_col = file_columns.c.column_id diff --git a/pysus/tests/api/ducklake/test_client.py b/pysus/tests/api/ducklake/test_client.py index 244c22f5..e50a6b05 100644 --- a/pysus/tests/api/ducklake/test_client.py +++ b/pysus/tests/api/ducklake/test_client.py @@ -1,7 +1,15 @@ -from unittest.mock import MagicMock, patch +"""Tests for DuckLake client module.""" + +import errno +from datetime import datetime +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch import pytest +from pysus.api.ducklake.catalog.orm.dataset import Dataset as PerDataset +from pysus.api.ducklake.catalog.orm.dataset import File as CatalogFile from pysus.api.ducklake.client import DuckLake, DuckLakeCredentials +from pysus.api.ducklake.models import DuckDataset, File class TestDuckLakeCredentials: @@ -23,16 +31,25 @@ async def test_ducklake_init(self): assert client.endpoint == "nbg1.your-objectstorage.com" assert client.bucket == "pysus" + @pytest.mark.asyncio + async def test_description(self): + client = DuckLake() + assert client.description == "" + @pytest.mark.asyncio async def test_ducklake_catalog_path(self, tmp_path): with patch("pysus.api.ducklake.client.CACHEPATH", tmp_path): client = DuckLake() - assert client.catalog_path == tmp_path / "ducklake" / "catalog.db" + assert ( + client.catalog_path == tmp_path / "ducklake" / "catalog.duckdb" + ) @pytest.mark.asyncio async def test_ducklake_catalog_url(self): client = DuckLake() - expected = "https://nbg1.your-objectstorage.com/pysus/public/catalog.db" + expected = ( + "https://nbg1.your-objectstorage.com/pysus/public/catalog.duckdb" + ) assert client._catalog_url == expected @pytest.mark.asyncio @@ -42,40 +59,68 @@ async def test_is_authenticated_false_no_credentials(self): @pytest.mark.asyncio async def test_is_authenticated_with_credentials(self): - from unittest.mock import patch - client = DuckLake() - with patch.object(client, "_load_catalog"): + with patch.object(client, "_download_catalog"): await client.login(access_key="key", secret_key="secret") assert client._is_authenticated is True @pytest.mark.asyncio async def test_login_sets_credentials(self): - from unittest.mock import patch - client = DuckLake() - with patch.object(client, "_load_catalog"): + with patch.object(client, "_download_catalog"): await client.login(access_key="key", secret_key="secret") assert client.credentials is not None @pytest.mark.asyncio async def test_login_creates_s3_client(self): - from unittest.mock import patch - client = DuckLake() - with patch.object(client, "_load_catalog"): + with patch.object(client, "_download_catalog"): await client.login(access_key="key", secret_key="secret") assert client._s3_client is not None - client._s3_client = None + + @pytest.mark.asyncio + async def test_login_clears_credentials(self): + client = DuckLake() + client.credentials = DuckLakeCredentials( + access_key="test_key", + secret_key="test_secret", + ) + with patch.object(client, "_download_catalog"): + await client.login() + assert client.credentials is None + assert client._s3_client is None @pytest.mark.asyncio async def test_close_clears_state(self): client = DuckLake() - await client.close() + client._engine = MagicMock() + with patch( + "pysus.api.ducklake.client.to_thread.run_sync", + side_effect=lambda fn, *a, **kw: fn(), + ): + await client.close() assert client._engine is None assert client._Session is None assert client._s3_client is None + @pytest.mark.asyncio + async def test_close_with_datasets(self): + client = DuckLake() + ds = AsyncMock(spec=DuckDataset) + client._datasets.append(ds) + await client.close() + ds.close.assert_awaited_once_with(update_catalog=False) + assert client._datasets == [] + + @pytest.mark.asyncio + async def test_close_with_update_catalog(self): + client = DuckLake() + ds = AsyncMock(spec=DuckDataset) + client._datasets.append(ds) + with patch.object(client, "_upload_catalog") as mock_upload: + await client.close(update_catalog=True) + mock_upload.assert_awaited_once() + @pytest.mark.asyncio async def test_get_s3_client_requires_credentials(self): client = DuckLake() @@ -89,146 +134,517 @@ async def test_upload_catalog_requires_auth(self): await client._upload_catalog() -class TestDownloadFile: - pass +class TestDuckLakeDatasets: + @pytest.mark.asyncio + async def test_datasets_creates_session_and_returns_duckdatasets( + self, tmp_path + ): + with patch("pysus.api.ducklake.client.CACHEPATH", tmp_path): + client = DuckLake() + mock_session = MagicMock() + mock_session.__enter__.return_value = mock_session -class TestLoadCatalog: - pass + record = PerDataset(name="sinan", long_name="SINAN", description="Test") + mock_session.query.return_value.all.return_value = [record] + client._Session = MagicMock(return_value=mock_session) -class TestUploadCatalog: - @pytest.mark.asyncio - async def test_upload_catalog_without_auth_raises(self): - client = DuckLake() - with pytest.raises(PermissionError): - await client._upload_catalog() + def run_sync(fn, *args, **kwargs): + return fn() + + with patch( + "pysus.api.ducklake.client.to_thread.run_sync", + side_effect=run_sync, + ): + result = await client.datasets() + assert len(result) == 1 + assert isinstance(result[0], DuckDataset) + assert result[0].record.name == "sinan" -class TestDuckLakeQuery: @pytest.mark.asyncio - async def test_query_filters_by_dataset(self): - from pysus.api.ducklake.catalog import CatalogDataset, CatalogFile + async def test_datasets_connects_if_no_session(self, tmp_path): + with patch("pysus.api.ducklake.client.CACHEPATH", tmp_path): + client = DuckLake() + + assert client._Session is None - client = DuckLake() mock_session = MagicMock() + mock_session.__enter__.return_value = mock_session + mock_session.query.return_value.all.return_value = [] - mock_catalog_file = MagicMock(spec=CatalogFile) - mock_catalog_file.dataset = MagicMock(spec=CatalogDataset) - mock_catalog_file.dataset.name = "sinan" - mock_catalog_file.group = None - mock_catalog_file.path = "test.parquet" + async def _connect(*args, **kwargs): + client._Session = MagicMock(return_value=mock_session) - mock_query = MagicMock() - mock_query.options.return_value.join.return_value.filter.return_value.all.return_value = [ # noqa: E501 - mock_catalog_file - ] - mock_session.query.return_value = mock_query + with patch.object( + DuckLake, "connect", new=AsyncMock(side_effect=_connect) + ): - client._Session = MagicMock(return_value=mock_session) + def run_sync(fn, *args, **kwargs): + return fn() + + with patch( + "pysus.api.ducklake.client.to_thread.run_sync", + side_effect=run_sync, + ): + await client.datasets() + + +class TestDuckLakeSetupEngine: + def test_setup_engine_has_pysus_schema(self): + with patch("pysus.api.ducklake.client.create_engine") as mock_create: + mock_engine = MagicMock() + mock_conn = MagicMock() + mock_engine.connect.return_value.__enter__.return_value = mock_conn + mock_create.return_value = mock_engine - result = await client.query(dataset="sinan") - assert isinstance(result, list) + mock_conn.exec_driver_sql().fetchone.return_value = (1,) + client = DuckLake() + result = client._setup_engine() + + calls = [str(c) for c in mock_conn.exec_driver_sql.call_args_list] + assert any( + "SET search_path" in c and "pysus,main" in c for c in calls + ) + assert result is mock_engine + + def test_setup_engine_no_pysus_schema(self): + with patch("pysus.api.ducklake.client.create_engine") as mock_create: + mock_engine = MagicMock() + mock_conn = MagicMock() + mock_engine.connect.return_value.__enter__.return_value = mock_conn + mock_create.return_value = mock_engine + + mock_conn.exec_driver_sql().fetchone.return_value = None + + client = DuckLake() + result = client._setup_engine() + + calls = [str(c) for c in mock_conn.exec_driver_sql.call_args_list] + assert any("SET search_path" in c and "'main'" in c for c in calls) + assert result is mock_engine + + def test_setup_engine_with_credentials(self): + with patch("pysus.api.ducklake.client.create_engine") as mock_create: + mock_engine = MagicMock() + mock_conn = MagicMock() + mock_engine.connect.return_value.__enter__.return_value = mock_conn + mock_create.return_value = mock_engine + + mock_conn.exec_driver_sql().fetchone.return_value = None + + client = DuckLake( + credentials=DuckLakeCredentials( + access_key="ak", secret_key="sk" + ) + ) + client._setup_engine() + + calls = [str(c) for c in mock_conn.exec_driver_sql.call_args_list] + s3_access = any( + "s3_access_key_id" in c and "ak" in c for c in calls + ) + s3_secret = any( + "s3_secret_access_key" in c and "sk" in c for c in calls + ) + assert s3_access + assert s3_secret + + +class TestDuckLakeConnect: @pytest.mark.asyncio - async def test_query_filters_by_group(self): + async def test_connect_already_connected_returns_early(self): client = DuckLake() client._engine = MagicMock() - mock_session = MagicMock() - mock_session.__enter__ = MagicMock(return_value=mock_session) - mock_session.__exit__ = MagicMock(return_value=False) + client._Session = MagicMock() + with patch.object(client, "_download_catalog") as mock_dl: + await client.connect() + mock_dl.assert_not_called() - mock_query = MagicMock() - mock_query.options.return_value.join.return_value.filter.return_value.filter.return_value.all.return_value = ( # noqa: E501 - [] + @pytest.mark.asyncio + async def test_connect_creates_session_if_missing(self): + client = DuckLake() + client._engine = MagicMock() + client._Session = None + with patch.object(client, "_download_catalog") as mock_dl: + await client.connect() + assert client._Session is not None + mock_dl.assert_not_called() + + @pytest.mark.asyncio + async def test_connect_downloads_and_sets_up_engine(self, tmp_path): + with patch("pysus.api.ducklake.client.CACHEPATH", tmp_path): + client = DuckLake() + + client._engine = None + + def run_sync(fn, *args, **kwargs): + return fn() + + with patch.object(client, "_download_catalog") as mock_dl: + with patch( + "pysus.api.ducklake.client.to_thread.run_sync", + side_effect=run_sync, + ): + with patch.object( + client, "_setup_engine", return_value=MagicMock() + ): + await client.connect() + mock_dl.assert_awaited_once_with( + client._catalog_local, + client._catalog_remote, + ) + assert client._Session is not None + assert client._engine is not None + + +class TestDuckLakeDownload: + @pytest.mark.asyncio + async def test_download_retry_then_success(self, tmp_path): + client = DuckLake() + local_path = tmp_path / "test.db" + remote_path = "public/test.db" + + class FailingAsyncIter: + def __aiter__(self): + return self + + async def __anext__(self): + raise OSError("Connection dropped") + + mock_client = MagicMock() + mock_client.__aenter__.return_value = mock_client + httpx_patcher = patch( + "pysus.api.ducklake.client.httpx.AsyncClient", + return_value=mock_client, + ) + sleep_patcher = patch( + "pysus.api.ducklake.client.sleep", new_callable=AsyncMock ) - mock_session.query.return_value = mock_query - client._Session = MagicMock(return_value=mock_session) + first_stream_cm = MagicMock() + first_resp = MagicMock() + first_stream_cm.__aenter__.return_value = first_resp + first_resp.raise_for_status = MagicMock() + first_resp.headers.get.return_value = "4" + first_resp.aiter_bytes.return_value = FailingAsyncIter() + + second_stream_cm = MagicMock() + + async def success_iter(): + yield b"data" - result = await client.query(group="DENGUE") - assert isinstance(result, list) + second_resp = MagicMock() + second_stream_cm.__aenter__.return_value = second_resp + second_resp.raise_for_status = MagicMock() + second_resp.headers.get.return_value = "4" + second_resp.aiter_bytes.return_value = success_iter() + + mock_client.stream.side_effect = [first_stream_cm, second_stream_cm] + + with httpx_patcher, sleep_patcher as mock_sleep: + await client._download(remote_path, local_path) + + assert local_path.exists() + assert local_path.read_bytes() == b"data" + assert mock_client.stream.call_count == 2 + mock_sleep.assert_awaited_once_with(1) @pytest.mark.asyncio - async def test_query_filters_by_state(self): + async def test_download_retry_exhausted_raises(self, tmp_path): client = DuckLake() - client._engine = MagicMock() - mock_session = MagicMock() - mock_session.__enter__ = MagicMock(return_value=mock_session) - mock_session.__exit__ = MagicMock(return_value=False) + local_path = tmp_path / "test.db" + remote_path = "public/test.db" - mock_query = MagicMock() - mock_query.options.return_value.join.return_value.filter.return_value.filter.return_value.all.return_value = ( # noqa: E501 - [] + class FailingAsyncIter: + def __aiter__(self): + return self + + async def __anext__(self): + raise OSError("Connection dropped") + + mock_client = MagicMock() + mock_client.__aenter__.return_value = mock_client + httpx_patcher = patch( + "pysus.api.ducklake.client.httpx.AsyncClient", + return_value=mock_client, + ) + sleep_patcher = patch( + "pysus.api.ducklake.client.sleep", new_callable=AsyncMock ) - mock_session.query.return_value = mock_query - client._Session = MagicMock(return_value=mock_session) + stream_cm = MagicMock() + resp = MagicMock() + stream_cm.__aenter__.return_value = resp + resp.raise_for_status = MagicMock() + resp.headers.get.return_value = "4" + resp.aiter_bytes.return_value = FailingAsyncIter() - result = await client.query(state="SP") - assert isinstance(result, list) + mock_client.stream.return_value = stream_cm + + with httpx_patcher, sleep_patcher as mock_sleep: + with pytest.raises(OSError, match="Connection dropped"): + await client._download(remote_path, local_path) + + assert mock_client.stream.call_count == 5 + assert mock_sleep.await_count == 4 @pytest.mark.asyncio - async def test_query_filters_by_year(self): + async def test_download_with_callback(self, tmp_path): client = DuckLake() - client._engine = MagicMock() - mock_session = MagicMock() - mock_session.__enter__ = MagicMock(return_value=mock_session) - mock_session.__exit__ = MagicMock(return_value=False) + local_path = tmp_path / "test.db" + remote_path = "public/test.db" - mock_query = MagicMock() - mock_query.options.return_value.join.return_value.filter.return_value.filter.return_value.all.return_value = ( # noqa: E501 - [] - ) - mock_session.query.return_value = mock_query + mock_client = MagicMock() + mock_client.__aenter__.return_value = mock_client - client._Session = MagicMock(return_value=mock_session) + stream_cm = MagicMock() + + async def success_iter(): + yield b"hello" + yield b"world" + + resp = MagicMock() + stream_cm.__aenter__.return_value = resp + resp.raise_for_status = MagicMock() + resp.headers.get.return_value = "10" + resp.aiter_bytes.return_value = success_iter() - result = await client.query(year=2024) - assert isinstance(result, list) + mock_client.stream.return_value = stream_cm + callback = MagicMock() + + with patch( + "pysus.api.ducklake.client.httpx.AsyncClient", + return_value=mock_client, + ): + await client._download(remote_path, local_path, callback=callback) + + callback.assert_any_call(5, 10) + callback.assert_any_call(10, 10) + + +class TestDuckLakeDownloadCatalog: @pytest.mark.asyncio - async def test_query_filters_by_month(self): + async def test_download_catalog_size_match_skips_download(self, tmp_path): + local_path = tmp_path / "catalog.duckdb" + local_path.write_text("test") + remote_path = "public/catalog.duckdb" + client = DuckLake() - client._engine = MagicMock() - mock_session = MagicMock() - mock_session.__enter__ = MagicMock(return_value=mock_session) - mock_session.__exit__ = MagicMock(return_value=False) - mock_query = MagicMock() - mock_query.options.return_value.join.return_value.filter.return_value.filter.return_value.filter.return_value.filter.return_value.all.return_value = ( # noqa: E501 - [] - ) - mock_session.query.return_value = mock_query + mock_http = MagicMock() + mock_resp = MagicMock() + mock_resp.headers = {"content-length": "4"} + mock_resp.raise_for_status = MagicMock() + mock_http.head = AsyncMock(return_value=mock_resp) + mock_http.__aenter__.return_value = mock_http + + with patch( + "pysus.api.ducklake.client.httpx.AsyncClient", + return_value=mock_http, + ): + with patch.object(client, "_download") as mock_dl: + await client._download_catalog(local_path, remote_path) + mock_dl.assert_not_called() - client._Session = MagicMock(return_value=mock_session) + @pytest.mark.asyncio + async def test_download_catalog_size_mismatch_downloads(self, tmp_path): + local_path = tmp_path / "catalog.duckdb" + local_path.write_text("test") + remote_path = "public/catalog.duckdb" + + client = DuckLake() - result = await client.query(month=1) - assert isinstance(result, list) + mock_http = MagicMock() + mock_resp = MagicMock() + mock_resp.headers = {"content-length": "100"} + mock_resp.raise_for_status = MagicMock() + mock_http.head = AsyncMock(return_value=mock_resp) + mock_http.__aenter__.return_value = mock_http + + with patch( + "pysus.api.ducklake.client.httpx.AsyncClient", + return_value=mock_http, + ): + with patch.object(client, "_download") as mock_dl: + await client._download_catalog(local_path, remote_path) + mock_dl.assert_awaited_once_with(remote_path, local_path) @pytest.mark.asyncio - async def test_query_no_filters(self): - from pysus.api.ducklake.catalog import CatalogDataset, CatalogFile + async def test_download_catalog_local_not_exists(self, tmp_path): + local_path = tmp_path / "catalog.duckdb" + remote_path = "public/catalog.duckdb" client = DuckLake() - mock_session = MagicMock() - mock_catalog_file = MagicMock(spec=CatalogFile) - mock_catalog_file.path = "public/test.parquet" - mock_catalog_file.dataset = MagicMock(spec=CatalogDataset) - mock_catalog_file.dataset.name = "sinan" - mock_catalog_file.group = None + mock_http = MagicMock() + mock_resp = MagicMock() + mock_resp.headers = {"content-length": "100"} + mock_resp.raise_for_status = MagicMock() + mock_http.head = AsyncMock(return_value=mock_resp) + mock_http.__aenter__.return_value = mock_http + + with patch( + "pysus.api.ducklake.client.httpx.AsyncClient", + return_value=mock_http, + ): + with patch.object(client, "_download") as mock_dl: + await client._download_catalog(local_path, remote_path) + mock_dl.assert_awaited_once_with(remote_path, local_path) - mock_query = MagicMock() - mock_query.options.return_value.join.return_value.all.return_value = [ - mock_catalog_file - ] - mock_session.query.return_value = mock_query + @pytest.mark.asyncio + async def test_download_catalog_head_fails(self, tmp_path): + local_path = tmp_path / "catalog.duckdb" + remote_path = "public/catalog.duckdb" - client._Session = MagicMock(return_value=mock_session) + client = DuckLake() - try: - result = await client.query(dataset="sinan") - assert isinstance(result, list) - except OSError: - pass + mock_http = MagicMock() + mock_resp = MagicMock() + mock_resp.headers = {} + mock_http.head = AsyncMock(side_effect=Exception("HEAD failed")) + mock_http.__aenter__.return_value = mock_http + + with patch( + "pysus.api.ducklake.client.httpx.AsyncClient", + return_value=mock_http, + ): + with patch.object(client, "_download") as mock_dl: + await client._download_catalog(local_path, remote_path) + mock_dl.assert_awaited_once_with(remote_path, local_path) + + @pytest.mark.asyncio + async def test_download_catalog_head_no_content_length(self, tmp_path): + local_path = tmp_path / "catalog.duckdb" + local_path.write_text("test") + remote_path = "public/catalog.duckdb" + + client = DuckLake() + + mock_http = MagicMock() + mock_resp = MagicMock() + mock_resp.headers = {} + mock_resp.raise_for_status = MagicMock() + mock_http.head = AsyncMock(return_value=mock_resp) + mock_http.__aenter__.return_value = mock_http + + with patch( + "pysus.api.ducklake.client.httpx.AsyncClient", + return_value=mock_http, + ): + with patch.object(client, "_download") as mock_dl: + await client._download_catalog(local_path, remote_path) + mock_dl.assert_awaited_once_with(remote_path, local_path) + + @pytest.mark.asyncio + async def test_download_catalog_oserror_on_local_stat(self, tmp_path): + local_path = tmp_path / "catalog.duckdb" + local_path.write_text("test") + remote_path = "public/catalog.duckdb" + + client = DuckLake() + + mock_http = MagicMock() + mock_resp = MagicMock() + mock_resp.headers = {"content-length": "999"} + mock_resp.raise_for_status = MagicMock() + mock_http.head = AsyncMock(return_value=mock_resp) + mock_http.__aenter__.return_value = mock_http + + stat_call_count = 0 + original_stat = type(local_path).stat + + def broken_stat(self, *args, **kwargs): + nonlocal stat_call_count + stat_call_count += 1 + if stat_call_count == 2: + raise OSError(errno.EACCES, "permission denied") + return original_stat(self, *args, **kwargs) + + with patch.object(type(local_path), "stat", broken_stat): + with patch( + "pysus.api.ducklake.client.httpx.AsyncClient", + return_value=mock_http, + ): + with patch.object(client, "_download") as mock_dl: + await client._download_catalog(local_path, remote_path) + mock_dl.assert_awaited_once_with(remote_path, local_path) + + +class TestDuckLakeDownloadFile: + @pytest.mark.asyncio + async def test_download_file_invalid_type_raises(self): + client = DuckLake() + with pytest.raises( + ValueError, match="FTP File was not properly instantiated" + ): + await client._download_file( + "not-a-file", Path("/tmp/test") + ) # type: ignore + + @pytest.mark.asyncio + async def test_download_file_valid(self, tmp_path): + client = DuckLake() + + record = CatalogFile( + path="remote/path/file.csv", + type="csv", + size=100, + rows=10, + modified=datetime.now(), + origin_size=100, + origin_path="remote/path/file.csv", + ) + + dataset = MagicMock(spec=DuckDataset) + f = File(dataset=dataset, record=record) # type: ignore + + output = tmp_path / "output.csv" + with patch.object(client, "_download") as mock_dl: + result = await client._download_file(f, output) + mock_dl.assert_awaited_once_with(record.path, output, callback=None) + assert result == output + + +class TestDuckLakeUploadCatalog: + @pytest.mark.asyncio + async def test_upload_catalog_with_datasets(self, tmp_path): + client = DuckLake( + credentials=DuckLakeCredentials(access_key="ak", secret_key="sk") + ) + client._s3_client = MagicMock() + + ds = AsyncMock(spec=DuckDataset) + local_db = tmp_path / "catalog_test.duckdb" + local_db.write_text("data") + ds._catalog_local = local_db + ds._catalog_name = "catalog_test.duckdb" + + with patch.object( + DuckLake, "datasets", new=AsyncMock(return_value=[ds]) + ): + await client._upload_catalog() + client._s3_client.upload_file.assert_called_once_with( + str(local_db), client.bucket, ds._catalog_name + ) + + @pytest.mark.asyncio + async def test_upload_catalog_skips_missing_local(self, tmp_path): + client = DuckLake( + credentials=DuckLakeCredentials(access_key="ak", secret_key="sk") + ) + client._s3_client = MagicMock() + + ds = AsyncMock(spec=DuckDataset) + nonexistent = tmp_path / "nonexistent.duckdb" + ds._catalog_local = nonexistent + ds._catalog_name = "catalog_test.duckdb" + + with patch.object( + DuckLake, "datasets", new=AsyncMock(return_value=[ds]) + ): + await client._upload_catalog() + client._s3_client.upload_file.assert_not_called() diff --git a/pysus/tests/api/ducklake/test_models.py b/pysus/tests/api/ducklake/test_models.py new file mode 100644 index 00000000..2b38ea2b --- /dev/null +++ b/pysus/tests/api/ducklake/test_models.py @@ -0,0 +1,635 @@ +"""Tests for DuckLake model wrappers (File, DuckDataset, DuckGroup).""" + +import hashlib +from datetime import datetime +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, create_autospec, patch + +import pytest +from pysus.api.ducklake.catalog.orm.dataset import Dataset +from pysus.api.ducklake.catalog.orm.dataset import File as CatalogFile +from pysus.api.ducklake.catalog.orm.dataset import Group +from pysus.api.ducklake.models import DuckDataset, DuckGroup, File + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def catalog_file_record(): + rec = CatalogFile( + path="remote/data/file.csv", + type="csv", + size=2048, + rows=500, + sha256="abc123deadbeef", + modified=datetime(2024, 6, 1, 12, 0, 0), + origin_size=2048, + origin_path="remote/data/file.csv", + ) + return rec + + +@pytest.fixture +def record(): + rec = Dataset( + name="sinan", + long_name="SINAN", + description="SINAN dataset", + ) + return rec + + +@pytest.fixture +def group_record(): + rec = Group( + name="acidentes", + long_name="Acidentes", + description="Acidentes de trânsito", + ) + return rec + + +@pytest.fixture +def mock_client(): + from pysus.api.ducklake.client import DuckLake + + mc = create_autospec(DuckLake, instance=True) + mc._datasets = [] + return mc + + +@pytest.fixture +def mock_dataset(mock_client, record): + with patch("pathlib.Path.mkdir"): + ds = DuckDataset(record=record, client=mock_client) + return ds + + +@pytest.fixture +def mock_group(group_record, mock_dataset): + with patch("pathlib.Path.mkdir"): + g = DuckGroup(record=group_record, dataset=mock_dataset) + return g + + +# --------------------------------------------------------------------------- +# File +# --------------------------------------------------------------------------- + + +class TestFile: + def test_init(self, catalog_file_record, mock_dataset): + f = File(dataset=mock_dataset, record=catalog_file_record) + assert f.record is catalog_file_record + assert f.dataset is mock_dataset + assert f.group is None + + def test_init_with_group( + self, catalog_file_record, mock_dataset, mock_group + ): + f = File( + dataset=mock_dataset, + record=catalog_file_record, + group=mock_group, + ) + assert f.group is mock_group + + def test_path(self, catalog_file_record, mock_dataset): + f = File(dataset=mock_dataset, record=catalog_file_record) + assert f.path == Path("remote/data/file.csv") + + def test_basename(self, catalog_file_record, mock_dataset): + f = File(dataset=mock_dataset, record=catalog_file_record) + assert f.basename == "file.csv" + + def test_extension(self, catalog_file_record, mock_dataset): + f = File(dataset=mock_dataset, record=catalog_file_record) + assert f.extension == ".csv" + + def test_size(self, catalog_file_record, mock_dataset): + f = File(dataset=mock_dataset, record=catalog_file_record) + assert f.size == 2048 + + def test_modify(self, catalog_file_record, mock_dataset): + f = File(dataset=mock_dataset, record=catalog_file_record) + assert f.modify == datetime(2024, 6, 1, 12, 0, 0) + + def test_rows(self, catalog_file_record, mock_dataset): + f = File(dataset=mock_dataset, record=catalog_file_record) + assert f.rows == 500 + + def test_sha256(self, catalog_file_record, mock_dataset): + f = File(dataset=mock_dataset, record=catalog_file_record) + assert f.sha256 == "abc123deadbeef" + + def test_sha256_none(self, catalog_file_record, mock_dataset): + catalog_file_record.sha256 = None + f = File(dataset=mock_dataset, record=catalog_file_record) + assert f.sha256 is None + + def test_name_fallback(self, catalog_file_record, mock_dataset): + f = File(dataset=mock_dataset, record=catalog_file_record) + assert f.name == "file.csv" + + @pytest.mark.asyncio + async def test_download_with_explicit_output( + self, catalog_file_record, mock_dataset + ): + f = File(dataset=mock_dataset, record=catalog_file_record) + output = Path("/tmp/out.csv") + cb = MagicMock() + mock_dataset.client._download_file.return_value = output + result = await f._download(output=output, callback=cb) + mock_dataset.client._download_file.assert_awaited_once_with( + f, output, callback=cb + ) + assert result == output + + @pytest.mark.asyncio + async def test_download_without_output( + self, catalog_file_record, mock_dataset + ): + from pysus import CACHEPATH + + f = File(dataset=mock_dataset, record=catalog_file_record) + expected = CACHEPATH / f.name + mock_dataset.client._download_file.return_value = expected + result = await f._download() + mock_dataset.client._download_file.assert_awaited_once_with( + f, expected, callback=None + ) + assert result == expected + + @pytest.mark.asyncio + async def test_verify_no_hash_returns_true( + self, catalog_file_record, mock_dataset, tmp_path + ): + catalog_file_record.sha256 = None + f = File(dataset=mock_dataset, record=catalog_file_record) + result = await f.verify(tmp_path / "whatever") + assert result is True + + @pytest.mark.asyncio + async def test_verify_matching_hash(self, mock_dataset, tmp_path): + content = b"hello world, this is test content" + expected_hash = hashlib.sha256(content).hexdigest() + + record = CatalogFile( + path="remote/data/file.csv", + type="csv", + sha256=expected_hash, + size=len(content), + rows=1, + modified=datetime.now(), + origin_size=len(content), + origin_path="remote/data/file.csv", + ) + + file_path = tmp_path / "test_file.csv" + file_path.write_bytes(content) + + f = File(dataset=mock_dataset, record=record) + assert await f.verify(file_path) is True + + @pytest.mark.asyncio + async def test_verify_mismatching_hash(self, mock_dataset, tmp_path): + content = b"hello world, this is test content" + wrong_content = b"this content does not match" + expected_hash = hashlib.sha256(content).hexdigest() + + record = CatalogFile( + path="remote/data/file.csv", + type="csv", + sha256=expected_hash, + size=len(wrong_content), + rows=1, + modified=datetime.now(), + origin_size=len(wrong_content), + origin_path="remote/data/file.csv", + ) + + file_path = tmp_path / "test_file.csv" + file_path.write_bytes(wrong_content) + + f = File(dataset=mock_dataset, record=record) + assert await f.verify(file_path) is False + + +# --------------------------------------------------------------------------- +# DuckDataset +# --------------------------------------------------------------------------- + + +class TestDuckDataset: + def test_init(self, mock_client, record): + with patch("pathlib.Path.mkdir"): + ds = DuckDataset(record=record, client=mock_client) + assert ds.record is record + assert ds.client is mock_client + assert ds._catalog_name == "catalog_sinan.duckdb" + + def test_repr(self, mock_dataset): + assert repr(mock_dataset) == "SINAN" + + def test_name(self, mock_dataset): + assert mock_dataset.name == "sinan" + + def test_long_name(self, mock_dataset): + assert mock_dataset.long_name == "" + + def test_description(self, mock_dataset): + assert mock_dataset.description == "" + + def test_catalog_path(self, mock_dataset): + from pysus import CACHEPATH + + expected = Path(CACHEPATH) / "ducklake" / "catalog_sinan.duckdb" + assert mock_dataset.catalog_path == expected + + @pytest.mark.asyncio + async def test_connect_already_connected(self, mock_dataset, mock_client): + mock_dataset._engine = MagicMock() + mock_dataset._Session = MagicMock() + await mock_dataset.connect(force=False) + mock_client._download.assert_not_called() + + @pytest.mark.asyncio + async def test_connect_force_reconnects(self, mock_dataset, mock_client): + mock_dataset._engine = MagicMock() + mock_dataset._Session = MagicMock() + + def run_sync(fn, *args, **kwargs): + return fn() + + with patch( + "pysus.api.ducklake.models.to_thread.run_sync", + side_effect=run_sync, + ): + with patch.object( + mock_client, "_setup_engine", return_value=MagicMock() + ): + await mock_dataset.connect(force=True) + + mock_client._download.assert_awaited_once() + + @pytest.mark.asyncio + async def test_connect_creates_session_if_missing( + self, mock_dataset, mock_client + ): + mock_dataset._engine = MagicMock() + mock_dataset._Session = None + await mock_dataset.connect(force=False) + assert mock_dataset._Session is not None + mock_client._download.assert_not_called() + + @pytest.mark.asyncio + async def test_connect_full_path(self, mock_dataset, mock_client): + mock_dataset._engine = None + + def run_sync(fn, *args, **kwargs): + return fn() + + with patch( + "pysus.api.ducklake.models.to_thread.run_sync", + side_effect=run_sync, + ): + with patch.object( + mock_client, "_setup_engine", return_value=MagicMock() + ): + await mock_dataset.connect() + + mock_client._download.assert_awaited_once() + assert mock_dataset._engine is not None + assert mock_dataset._Session is not None + assert mock_dataset in mock_client._datasets + + @pytest.mark.asyncio + async def test_close_disposes_engine(self, mock_dataset): + engine = MagicMock() + mock_dataset._engine = engine + with patch( + "pysus.api.ducklake.models.to_thread.run_sync", + side_effect=lambda fn, *a, **kw: fn(), + ): + await mock_dataset.close() + engine.dispose.assert_called_once() + assert mock_dataset._engine is None + assert mock_dataset._Session is None + + @pytest.mark.asyncio + async def test_close_noop_when_no_engine(self, mock_dataset): + mock_dataset._engine = None + await mock_dataset.close() + + @pytest.mark.asyncio + async def test_close_with_update_catalog(self, mock_dataset, mock_client): + engine = MagicMock() + mock_dataset._engine = engine + mock_client._is_authenticated = True + + with patch.object(mock_dataset, "_upload_catalog") as mock_upload: + with patch( + "pysus.api.ducklake.models.to_thread.run_sync", + side_effect=lambda fn, *a, **kw: fn(), + ): + await mock_dataset.close(update_catalog=True) + engine.dispose.assert_called_once() + mock_upload.assert_awaited_once() + + @pytest.mark.asyncio + async def test_upload_catalog_no_credentials_raises(self, mock_dataset): + mock_dataset.client.credentials = None + with pytest.raises(PermissionError, match="Admin credentials required"): + await mock_dataset._upload_catalog() + + @pytest.mark.asyncio + async def test_upload_catalog_success( + self, mock_dataset, mock_client, tmp_path + ): + mock_client.credentials = MagicMock() + mock_client._s3_client = MagicMock() + mock_client.bucket = "pysus" + local_db = tmp_path / "catalog_sinan.duckdb" + local_db.write_text("data") + mock_dataset._catalog_local = local_db + + def run_sync(fn, *args, **kwargs): + return fn() + + with patch( + "pysus.api.ducklake.models.to_thread.run_sync", + side_effect=run_sync, + ): + await mock_dataset._upload_catalog() + + mock_client._s3_client.upload_file.assert_called_once_with( + str(local_db), + mock_client.bucket, + f"catalog_{mock_dataset.record.name.lower()}.duckdb", + ) + + @pytest.mark.asyncio + async def test_query_no_filters(self, mock_dataset): + mock_session = MagicMock() + mock_session.__enter__.return_value = mock_session + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.options.return_value = mock_query + mock_query.all.return_value = [] + + mock_dataset._Session = MagicMock(return_value=mock_session) + + def run_sync(fn, *args, **kwargs): + return fn() + + with patch( + "pysus.api.ducklake.models.to_thread.run_sync", + side_effect=run_sync, + ): + result = await mock_dataset.query() + + assert result == [] + + @pytest.mark.asyncio + async def test_query_with_all_filters(self, mock_dataset): + mock_session = MagicMock() + mock_session.__enter__.return_value = mock_session + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.options.return_value = mock_query + mock_query.join.return_value = mock_query + mock_query.filter.return_value = mock_query + mock_query.all.return_value = [] + + mock_dataset._Session = MagicMock(return_value=mock_session) + + def run_sync(fn, *args, **kwargs): + return fn() + + with patch( + "pysus.api.ducklake.models.to_thread.run_sync", + side_effect=run_sync, + ): + result = await mock_dataset.query( + group="acidentes%", + state="RJ", + year=2024, + month=6, + ) + + assert result == [] + + @pytest.mark.asyncio + async def test_query_connects_if_no_session(self, mock_dataset): + mock_dataset._Session = None + + mock_session = MagicMock() + mock_session.__enter__.return_value = mock_session + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.options.return_value = mock_query + mock_query.all.return_value = [] + + async def _connect(*args, **kwargs): + mock_dataset._Session = MagicMock(return_value=mock_session) + + with patch.object( + DuckDataset, "connect", new=AsyncMock(side_effect=_connect) + ) as mock_connect: + with patch( + "pysus.api.ducklake.models.to_thread.run_sync", + side_effect=lambda fn, *a, **kw: fn(), + ): + await mock_dataset.query() + mock_connect.assert_awaited_once() + + @pytest.mark.asyncio + async def test_fetch_content_with_groups_and_files( + self, mock_dataset, mock_client + ): + mock_session = MagicMock() + mock_session.__enter__.return_value = mock_session + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.options.return_value = mock_query + mock_query.filter.return_value = mock_query + + group_rec = Group( + name="dengue", + long_name="Dengue", + description="Dengue data", + ) + + file_rec = CatalogFile( + path="remote/dengue/data.csv", + type="csv", + sha256="hash123", + size=100, + rows=10, + modified=datetime.now(), + origin_size=100, + origin_path="remote/dengue/data.csv", + ) + + dataset_rec = Dataset( + name="sinan", + long_name="SINAN", + description="SINAN dataset", + ) + dataset_rec.groups = [group_rec] + dataset_rec.files = [file_rec] + + mock_query.first.return_value = dataset_rec + + mock_dataset._Session = MagicMock(return_value=mock_session) + + def run_sync(fn, *args, **kwargs): + return fn() + + with patch( + "pysus.api.ducklake.models.to_thread.run_sync", + side_effect=run_sync, + ): + items = await mock_dataset._fetch_content() + + assert len(items) == 2 + assert isinstance(items[0], DuckGroup) + assert items[0].record is group_rec + assert isinstance(items[1], File) + assert items[1].record is file_rec + + @pytest.mark.asyncio + async def test_fetch_content_no_dataset(self, mock_dataset): + mock_session = MagicMock() + mock_session.__enter__.return_value = mock_session + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.options.return_value = mock_query + mock_query.filter.return_value = mock_query + mock_query.first.return_value = None + + mock_dataset._Session = MagicMock(return_value=mock_session) + + def run_sync(fn, *args, **kwargs): + return fn() + + with patch( + "pysus.api.ducklake.models.to_thread.run_sync", + side_effect=run_sync, + ): + items = await mock_dataset._fetch_content() + + assert items == [] + + @pytest.mark.asyncio + async def test_fetch_content_only_groups(self, mock_dataset): + mock_session = MagicMock() + mock_session.__enter__.return_value = mock_session + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.options.return_value = mock_query + mock_query.filter.return_value = mock_query + + group_rec = Group( + name="dengue", + long_name="Dengue", + description="Dengue data", + ) + + dataset_rec = Dataset( + name="sinan", + long_name="SINAN", + description="Test", + ) + dataset_rec.groups = [group_rec] + dataset_rec.files = [] + + mock_query.first.return_value = dataset_rec + + mock_dataset._Session = MagicMock(return_value=mock_session) + + def run_sync(fn, *args, **kwargs): + return fn() + + with patch( + "pysus.api.ducklake.models.to_thread.run_sync", + side_effect=run_sync, + ): + items = await mock_dataset._fetch_content() + + assert len(items) == 1 + assert isinstance(items[0], DuckGroup) + + @pytest.mark.asyncio + async def test_fetch_content_connects_if_no_session(self, mock_dataset): + mock_dataset._Session = None + + mock_session = MagicMock() + mock_session.__enter__.return_value = mock_session + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.options.return_value = mock_query + mock_query.filter.return_value = mock_query + ds = Dataset( + name="sinan", + long_name="SINAN", + description="Test", + ) + ds.groups = [] + ds.files = [] + mock_query.first.return_value = ds + + async def _connect(*args, **kwargs): + mock_dataset._Session = MagicMock(return_value=mock_session) + + with patch.object( + DuckDataset, "connect", new=AsyncMock(side_effect=_connect) + ) as mock_connect: + with patch( + "pysus.api.ducklake.models.to_thread.run_sync", + side_effect=lambda fn, *a, **kw: fn(), + ): + await mock_dataset._fetch_content() + mock_connect.assert_awaited_once() + + +# --------------------------------------------------------------------------- +# DuckGroup +# --------------------------------------------------------------------------- + + +class TestDuckGroup: + def test_name(self, mock_group): + assert mock_group.name == "acidentes" + + def test_long_name(self, mock_group): + assert mock_group.long_name == "Acidentes" + + def test_long_name_fallback(self, mock_group): + mock_group.record.long_name = None + assert mock_group.long_name == "acidentes" + + def test_description(self, mock_group): + assert mock_group.description == "Acidentes de trânsito" + + @pytest.mark.asyncio + async def test_fetch_files(self, mock_group, mock_dataset): + file_rec = CatalogFile( + path="remote/data/file.csv", + type="csv", + size=100, + rows=10, + modified=datetime.now(), + origin_size=100, + origin_path="remote/data/file.csv", + ) + mock_group.record.files = [file_rec] + + files = await mock_group._fetch_files() + assert len(files) == 1 + assert isinstance(files[0], File) + assert files[0].record is file_rec + assert files[0].group is mock_group + assert files[0].dataset is mock_dataset diff --git a/pysus/tests/api/ftp/test_client.py b/pysus/tests/api/ftp/test_client.py index e3d6b999..3fb2bb51 100644 --- a/pysus/tests/api/ftp/test_client.py +++ b/pysus/tests/api/ftp/test_client.py @@ -12,6 +12,19 @@ def ftp_client(): return client +def test_name_property(ftp_client): + assert ftp_client.name == "FTP" + + +def test_long_name_property(ftp_client): + assert ftp_client.long_name == "Pysus FTP Client" + + +def test_description_property(ftp_client): + assert isinstance(ftp_client.description, str) + assert len(ftp_client.description) > 0 + + def test_line_parser_file(ftp_client): line = "03-09-26 04:30PM 12345 filename.dbc" info = ftp_client._line_parser(line) @@ -31,6 +44,17 @@ def test_line_parser_directory(ftp_client): assert info["type"] == "dir" +def test_line_parser_with_formatter_on_directory(ftp_client): + def mock_formatter(name): + return {"year": 2026, "state": "SC"} + + line = "03-09-26 04:30PM DADOS" + info = ftp_client._line_parser(line, formatter=mock_formatter) + + assert info["type"] == "dir" + assert info["year"] is None + + def test_line_parser_with_formatter(ftp_client): def mock_formatter(name): return {"year": 2026, "state": "SC"} @@ -42,6 +66,54 @@ def mock_formatter(name): assert info["state"] == "SC" +def test_line_parser_invalid_line(ftp_client): + with pytest.raises(ValueError, match="Invalid FTP line"): + ftp_client._line_parser("only three") + + +def test_line_parser_invalid_date(ftp_client): + info = ftp_client._line_parser("invalid-date invalid-time DADOS") + assert info["name"] == "DADOS" + assert info["type"] == "dir" + assert isinstance(info["modify"], datetime) + + +@pytest.mark.asyncio +async def test_close_when_not_connected(ftp_client): + ftp_client._ftp = None + await ftp_client.close() + assert ftp_client._ftp is None + + +@pytest.mark.asyncio +async def test_connect_when_already_connected(ftp_client): + mock_ftp = MagicMock() + ftp_client._ftp = mock_ftp + await ftp_client.connect() + mock_ftp.quit.assert_not_called() + mock_ftp.close.assert_not_called() + + +@pytest.mark.asyncio +async def test_close_normal(ftp_client): + mock_ftp = MagicMock() + ftp_client._ftp = mock_ftp + await ftp_client.close() + mock_ftp.quit.assert_called_once() + assert ftp_client._ftp is None + + +@pytest.mark.asyncio +async def test_close_quit_raises_exception(ftp_client): + mock_ftp = MagicMock() + mock_ftp.quit.side_effect = Exception("connection error") + ftp_client._ftp = mock_ftp + await ftp_client.close() + mock_ftp.quit.assert_called_once() + mock_ftp.close.assert_called_once() + assert ftp_client._ftp is None + + @pytest.mark.asyncio async def test_connect_and_login(ftp_client): with patch("pysus.api.ftp.client.FTPLib") as mock_ftplib: @@ -54,6 +126,13 @@ async def test_connect_and_login(ftp_client): mock_instance.login.assert_called_once() +@pytest.mark.asyncio +async def test_datasets_raises_connection_error(ftp_client): + ftp_client._ftp = None + with pytest.raises(ConnectionError, match="not connected"): + await ftp_client.datasets() + + @pytest.mark.asyncio async def test_download_file_reconnects_on_failure(ftp_client): mock_ftp_internal = MagicMock() @@ -71,6 +150,45 @@ async def test_download_file_reconnects_on_failure(ftp_client): assert mock_connect.call_count >= 1 +@pytest.mark.asyncio +async def test_download_file_with_callback(ftp_client): + mock_ftp_internal = MagicMock() + ftp_client._ftp = mock_ftp_internal + + mock_file = MagicMock() + mock_file.path = "remote/path.dbc" + + callback = MagicMock() + + def simulate_retrbinary(cmd, cb): + cb(b"chunk_data") + + mock_ftp_internal.retrbinary.side_effect = simulate_retrbinary + + with patch("builtins.open", MagicMock()): + await ftp_client._download_file( + mock_file, pathlib.Path("test.dbc"), callback=callback + ) + callback.assert_called_once() + + +@pytest.mark.asyncio +async def test_download_file_without_callback(ftp_client): + mock_ftp_internal = MagicMock() + ftp_client._ftp = mock_ftp_internal + + mock_file = MagicMock() + mock_file.path = "remote/path.dbc" + + def simulate_retrbinary(cmd, cb): + cb(b"chunk_data") + + mock_ftp_internal.retrbinary.side_effect = simulate_retrbinary + + with patch("builtins.open", MagicMock()): + await ftp_client._download_file(mock_file, pathlib.Path("test.dbc")) + + @pytest.mark.asyncio async def test_list_directory_calls_ftp_methods(ftp_client): mock_ftp_internal = MagicMock() diff --git a/pysus/tests/api/ftp/test_databases.py b/pysus/tests/api/ftp/test_databases.py index 7379f133..dfec8be6 100644 --- a/pysus/tests/api/ftp/test_databases.py +++ b/pysus/tests/api/ftp/test_databases.py @@ -2,7 +2,18 @@ import pytest from pysus.api.ftp.client import FTP -from pysus.api.ftp.databases import AVAILABLE_DATABASES +from pysus.api.ftp.databases import ( + AVAILABLE_DATABASES, + CIHA, + CNES, + IBGEDATASUS, + PNI, + SIA, + SIH, + SIM, + SINAN, + SINASC, +) @pytest.fixture @@ -68,3 +79,95 @@ async def test_ciha_search_logic(mock_client): assert res["year"] == 2011 assert res["month"] == 1 assert res["group"]["name"] == "CIHA" + + +def test_ciha_formatter_exception(mock_client): + db = CIHA(client=mock_client) + result = db.formatter("A") + assert result == {"group": None, "state": None, "year": None, "month": None} + + +def test_cnes_formatter_exception(mock_client): + db = CNES(client=mock_client) + result = db.formatter("A") + assert result == {"group": None, "state": None, "year": None, "month": None} + + +def test_sinasc_formatter_exception(mock_client): + db = SINASC(client=mock_client) + result = db.formatter("A") + assert result == {"group": None, "state": None, "year": None} + + +def test_sim_formatter_cid9(mock_client): + db = SIM(client=mock_client) + result = db.formatter("CID9DOAC96.dbc") + assert result["state"] == "AC" + assert result["year"] == 1996 + + +def test_sim_formatter_exception(mock_client): + db = SIM(client=mock_client) + result = db.formatter("A") + assert result == {"group": None, "state": None, "year": None} + + +def test_pni_formatter_exception(mock_client): + db = PNI(client=mock_client) + result = db.formatter("A") + assert result == {"group": None, "state": None, "year": None} + + +def test_ibge_formatter_proj(mock_client): + db = IBGEDATASUS(client=mock_client) + result = db.formatter("PROJBR00.zip") + assert result["year"] == 2000 + assert result["group"]["name"] == "PROJ" + + +def test_ibge_formatter_exception(mock_client): + db = IBGEDATASUS(client=mock_client) + result = db.formatter("A") + assert result == {"group": None, "year": None} + + +def test_sia_formatter_group_not_in_definitions(mock_client): + db = SIA(client=mock_client) + result = db.formatter("ZZAC0001.dbc") + assert result["group"] is None + + +def test_sia_formatter_exception(mock_client): + db = SIA(client=mock_client) + result = db.formatter("A") + assert result == {"group": None, "state": None, "year": None, "month": None} + + +def test_sih_formatter_exception(mock_client): + db = SIH(client=mock_client) + result = db.formatter("A") + assert result == {"group": None, "state": None, "year": None, "month": None} + + +def test_sinan_formatter_src(mock_client): + db = SINAN(client=mock_client) + result = db.formatter("SRCBR06.dbc") + assert result["group"]["name"] == "SRC" + + +def test_sinan_formatter_leibr22(mock_client): + db = SINAN(client=mock_client) + result = db.formatter("LEIBR22.dbc") + assert result["group"]["name"] == "LEIV" + + +def test_sinan_formatter_lerbr19(mock_client): + db = SINAN(client=mock_client) + result = db.formatter("LERBR19.dbc") + assert result["group"]["name"] == "LERD" + + +def test_sinan_formatter_exception(mock_client): + db = SINAN(client=mock_client) + result = db.formatter("A") + assert result == {"group": None, "year": None} diff --git a/pysus/tests/api/ftp/test_models.py b/pysus/tests/api/ftp/test_models.py index 136577f0..5014e63c 100644 --- a/pysus/tests/api/ftp/test_models.py +++ b/pysus/tests/api/ftp/test_models.py @@ -1,6 +1,6 @@ from datetime import datetime from pathlib import Path -from unittest.mock import AsyncMock, MagicMock +from unittest.mock import AsyncMock, MagicMock, patch import pytest from pysus.api.ftp.client import FTP @@ -52,6 +52,81 @@ async def test_file_properties(mock_dataset): assert isinstance(file.modify, datetime) +def test_file_init_path_from_info(mock_dataset): + info = {"path": "/root/test.dbc", "name": "test.dbc", "size": 1000} + file = File( + _info=info, + type="file", + dataset=mock_dataset, + ) + assert file.path == Path("/root/test.dbc") + + +def test_file_repr(mock_dataset): + file = File( + path="/root/test.dbc", + _info={"path": "/root/test.dbc", "name": "test.dbc"}, + type="file", + dataset=mock_dataset, + ) + assert repr(file) == "test.dbc" + + +def test_file_month(mock_dataset): + info = {"path": "/root/test.dbc", "name": "test.dbc", "month": 6} + file = File( + path="/root/test.dbc", + _info=info, + type="file", + dataset=mock_dataset, + ) + assert file.month == 6 + + +def test_file_modify_raises_value_error(mock_dataset): + info = {"path": "/root/test.dbc", "name": "test.dbc"} + file = File( + path="/root/test.dbc", + _info=info, + type="file", + dataset=mock_dataset, + ) + with pytest.raises(ValueError, match="modify"): + _ = file.modify + + +@pytest.mark.asyncio +async def test_file_download_no_output(mock_client, mock_dataset, tmp_path): + file = File( + path="/root/test.dbc", + _info={"path": "/root/test.dbc", "name": "test.dbc"}, + type="file", + dataset=mock_dataset, + ) + cache_dir = tmp_path / "cache" + cache_dir.mkdir(parents=True, exist_ok=True) + with patch("pysus.api.ftp.models.CACHEPATH", cache_dir): + await file._download() + mock_client._download_file.assert_called_once() + args, _ = mock_client._download_file.call_args + assert args[1] == cache_dir / "test.dbc" + + +@pytest.mark.asyncio +async def test_file_download_calls_client(mock_client, mock_dataset, tmp_path): + file = File( + path="/root/test.dbc", + _info={"path": "/root/test.dbc", "name": "test.dbc"}, + type="file", + dataset=mock_dataset, + ) + + dest = Path(tmp_path / "test.dbc") + await file._download(output=dest) + + mock_client._download_file.assert_called_once_with(file, dest, None) + + @pytest.mark.asyncio async def test_directory_load(mock_client, mock_dataset): mock_client._list_directory.return_value = [ @@ -75,6 +150,23 @@ async def test_directory_load(mock_client, mock_dataset): assert Path(content[1].path) == Path("/root/file.dbc") +@pytest.mark.asyncio +async def test_directory_load_no_ftp_client(): + dr = Directory(path="/root/test", client=MagicMock()) + with pytest.raises(ValueError, match="no ftp client found"): + await dr.load() + + +def test_directory_str(): + dr = Directory(path="/root/test") + assert str(dr).replace("\\", "/") == "/root/test" + + +def test_directory_repr(): + dr = Directory(path="/root/test") + assert repr(dr).replace("\\", "/") == "" + + @pytest.mark.asyncio async def test_group_instantiation(mock_dataset): group = Group( @@ -90,6 +182,50 @@ async def test_group_instantiation(mock_dataset): assert group.path == "/root/DC" +def test_group_description(mock_dataset): + group = Group( + name="TEST", + path="/root/TEST", + dataset=mock_dataset, + long_name="Test Group", + description="A test group description", + ) + assert group.description == "A test group description" + + +@pytest.mark.asyncio +async def test_group_content(mock_client, mock_dataset): + group = Group( + name="TEST", + path="/root/TEST", + dataset=mock_dataset, + long_name="Test Group", + description="Test", + ) + group._dir._content = [MagicMock(spec=Directory), MagicMock(spec=File)] + group._dir.loaded = True + content = await group.content + assert len(content) == 2 + + +@pytest.mark.asyncio +async def test_group_fetch_files(mock_client, mock_dataset): + group = Group( + name="TEST", + path="/root/TEST", + dataset=mock_dataset, + long_name="Test Group", + description="Test", + ) + dir1 = MagicMock(spec=Directory) + file1 = MagicMock(spec=File) + group._dir._content = [dir1, file1] + group._dir.loaded = True + files = await group._fetch_files() + assert len(files) == 1 + assert files[0] is file1 + + @pytest.mark.asyncio async def test_dataset_fetch_content(mock_client): class TestDB(Dataset): @@ -128,15 +264,74 @@ def formatter(self, f): @pytest.mark.asyncio -async def test_file_download_calls_client(mock_client, mock_dataset, tmp_path): - file = File( - path="/root/test.dbc", - _info={"path": "/root/test.dbc", "name": "test.dbc"}, - type="file", - dataset=mock_dataset, - ) +async def test_dataset_fetch_content_skips_non_file_non_dir(mock_client): + class TestDB(Dataset): + @property + def name(self): + return "TEST" - dest = Path(tmp_path / "test.dbc") - await file._download(output=dest) + @property + def long_name(self): + return "Test DB" - mock_client._download_file.assert_called_once_with(file, dest, None) + @property + def description(self): + return "Testing" + + def formatter(self, f): + return {} + + db = TestDB(client=mock_client) + root = Directory(path="/root", client=mock_client, dataset=db) + db.paths = [root] + root._content = [MagicMock(spec=object)] + root.loaded = True + + result = await db._fetch_content() + assert len(result) == 0 + + +@pytest.mark.asyncio +async def test_dataset_fetch_content_raises_runtime_error(mock_client): + class TestDB(Dataset): + @property + def name(self): + return "TEST" + + @property + def long_name(self): + return "Test DB" + + @property + def description(self): + return "Testing" + + def formatter(self, f): + return {} + + db = TestDB(client=mock_client) + fake_dir = MagicMock() + db.paths = [fake_dir] + with pytest.raises(RuntimeError, match="not instantiated"): + await db._fetch_content() + + +def test_dataset_repr(mock_client): + class TestDB(Dataset): + @property + def name(self): + return "TEST" + + @property + def long_name(self): + return "Test DB" + + @property + def description(self): + return "Testing" + + def formatter(self, f): + return {} + + db = TestDB(client=mock_client) + assert repr(db) == "TEST" diff --git a/pysus/tests/api/test_client.py b/pysus/tests/api/test_client.py index 0de234bc..32c48cc3 100644 --- a/pysus/tests/api/test_client.py +++ b/pysus/tests/api/test_client.py @@ -75,6 +75,23 @@ def test_download_status_values(self): assert DownloadStatus.MISSING.value == "missing" +class TestGetLocalFile: + @pytest.mark.asyncio + async def test_get_local_file_returns_none_when_no_records( + self, test_db_path + ): + client = PySUS(db_path=test_db_path) + + mock_remote_file = MagicMock() + mock_remote_file.client.name = "FTP" + mock_remote_file.path = "/remote/nonexistent.dbc" + + result = await client.get_local_file(mock_remote_file) + assert result is None + + await client.__aexit__(None, None, None) + + class TestLocalFileState: @pytest.mark.asyncio async def test_update_state_creates_record(self, test_db_path, tmp_path): @@ -149,7 +166,8 @@ async def test_get_local_file_finds_existing(self, test_db_path, tmp_path): mock_remote_file.path = "/remote/test.dbc" with patch( - "pysus.api.extensions.ExtensionFactory.instantiate" + "pysus.api.extensions.ExtensionFactory.instantiate", + new_callable=AsyncMock, ) as mock_factory: mock_factory.return_value = MagicMock() await client.get_local_file(mock_remote_file) @@ -186,10 +204,113 @@ async def test_get_completed_remote_paths(self, test_db_path, tmp_path): await client.__aexit__(None, None, None) +class TestGetLocalHierarchy: + @pytest.mark.asyncio + async def test_get_local_hierarchy_all_branches( + self, test_db_path, tmp_path + ): + client = PySUS(db_path=test_db_path) + + file1 = ( + tmp_path / "downloads" / "ftp" / "sinasc" / "DC" / "DNAC2024.dbc" + ) + file1.parent.mkdir(parents=True, exist_ok=True) + file1.write_text("dummy") + + file2 = tmp_path / "downloads" / "ftp" / "sinasc" / "DNAC2024.dbc" + file2.parent.mkdir(parents=True, exist_ok=True) + file2.write_text("dummy") + + file3 = tmp_path / "short" / "path.dbc" + file3.parent.mkdir(parents=True, exist_ok=True) + file3.write_text("dummy") + + dir_path = tmp_path / "downloads" / "ftp" / "sinasc" / "DC" + dir_path.mkdir(parents=True, exist_ok=True) + + with client.Session() as session: + r1 = LocalFileState( + path=str(file1), + remote_path="/remote/file1.dbc", + client_name="ftp", + status=DownloadStatus.COMPLETED, + group="DC", + ) + session.add(r1) + + r2 = LocalFileState( + path=str(file2), + remote_path="/remote/file2.dbc", + client_name="ftp", + status=DownloadStatus.COMPLETED, + group=None, + ) + session.add(r2) + + r3 = LocalFileState( + path=str(file3), + remote_path="/remote/file3.dbc", + client_name="ftp", + status=DownloadStatus.PENDING, + group="X", + ) + session.add(r3) + + r4 = LocalFileState( + path=str(dir_path), + remote_path="/remote/dir.dbc", + client_name="ftp", + status=DownloadStatus.COMPLETED, + group="DC", + ) + session.add(r4) + + session.commit() + + hierarchy = client.get_local_hierarchy() + + assert "FTP" in hierarchy + ftp_dict = hierarchy["FTP"] + + assert "DC" in ftp_dict + ds_dc = ftp_dict["DC"] + assert "DC" in ds_dc + assert len(ds_dc["DC"]) == 1 + assert ds_dc["DC"][0]["name"] == "DNAC2024.dbc" + assert ds_dc["DC"][0]["status"] == DownloadStatus.COMPLETED + + assert "ftp" in ftp_dict + ds_ftp = ftp_dict["ftp"] + assert "" in ds_ftp + assert len(ds_ftp[""]) == 1 + assert ds_ftp[""][0]["name"] == "DNAC2024.dbc" + + assert "sinasc" in ftp_dict + ds_sinasc = ftp_dict["sinasc"] + assert "DC" in ds_sinasc + assert ds_sinasc["DC"][0]["name"] == "DC" + + dc_dict = ftp_dict.get("short") + assert dc_dict is not None + assert "X" in dc_dict + assert dc_dict["X"][0]["status"] == DownloadStatus.PENDING + + await client.__aexit__(None, None, None) + + class TestPySUSQuery: + @pytest.fixture + def mock_dataset(self): + ds = MagicMock() + ds.name = "sinan" + ds.query = AsyncMock(return_value=[]) + return ds + @pytest.mark.asyncio - async def test_query_with_dataset(self, test_db_path, tmp_path): - from unittest.mock import AsyncMock, MagicMock + async def test_query_with_dataset( + self, test_db_path, tmp_path, mock_dataset + ): + from unittest.mock import MagicMock from pysus.api.ducklake.client import DuckLake @@ -198,16 +319,16 @@ async def test_query_with_dataset(self, test_db_path, tmp_path): mock_ducklake = MagicMock(spec=DuckLake) mock_file = MagicMock() mock_file.path = tmp_path / "test.parquet" - mock_ducklake.query = AsyncMock(return_value=[mock_file]) + mock_dataset.query = AsyncMock(return_value=[mock_file]) + mock_ducklake.datasets = AsyncMock(return_value=[mock_dataset]) client._ducklake = mock_ducklake client._attach_client_catalog = MagicMock() result = await client.query(dataset="sinan") - mock_ducklake.query.assert_called_once_with( - client=None, - dataset="sinan", + mock_ducklake.datasets.assert_called_once() + mock_dataset.query.assert_called_once_with( group=None, state=None, year=None, @@ -217,24 +338,22 @@ async def test_query_with_dataset(self, test_db_path, tmp_path): await client.__aexit__(None, None, None) @pytest.mark.asyncio - async def test_query_with_group(self, test_db_path): - from unittest.mock import AsyncMock, MagicMock + async def test_query_with_group(self, test_db_path, mock_dataset): + from unittest.mock import MagicMock from pysus.api.ducklake.client import DuckLake client = PySUS(db_path=test_db_path) mock_ducklake = MagicMock(spec=DuckLake) - mock_ducklake.query = AsyncMock(return_value=[]) + mock_ducklake.datasets = AsyncMock(return_value=[mock_dataset]) client._ducklake = mock_ducklake client._attach_client_catalog = MagicMock() await client.query(dataset="sinan", group="DENGUE") - mock_ducklake.query.assert_called_once_with( - client=None, - dataset="sinan", + mock_dataset.query.assert_called_once_with( group="DENGUE", state=None, year=None, @@ -251,7 +370,10 @@ async def test_query_with_all_params(self, test_db_path): client = PySUS(db_path=test_db_path) mock_ducklake = MagicMock(spec=DuckLake) - mock_ducklake.query = AsyncMock(return_value=[]) + ds = MagicMock() + ds.name = "sinasc" + ds.query = AsyncMock(return_value=[]) + mock_ducklake.datasets = AsyncMock(return_value=[ds]) client._ducklake = mock_ducklake client._attach_client_catalog = MagicMock() @@ -264,9 +386,7 @@ async def test_query_with_all_params(self, test_db_path): month=1, ) - mock_ducklake.query.assert_called_once_with( - client=None, - dataset="sinasc", + ds.query.assert_called_once_with( group="DC", state="SP", year=2024, @@ -275,7 +395,7 @@ async def test_query_with_all_params(self, test_db_path): await client.__aexit__(None, None, None) @pytest.mark.asyncio - async def test_query_initializes_ducklake(self, test_db_path): + async def test_query_initializes_ducklake(self, test_db_path, mock_dataset): from unittest.mock import AsyncMock, MagicMock, patch import duckdb @@ -285,8 +405,8 @@ async def test_query_initializes_ducklake(self, test_db_path): assert client._ducklake is None mock_ducklake_instance = MagicMock(spec=DuckLake) - mock_ducklake_instance.query = AsyncMock(return_value=[]) - tmp_catalog_path = test_db_path.parent / "catalog.db" + mock_ducklake_instance.datasets = AsyncMock(return_value=[mock_dataset]) + tmp_catalog_path = test_db_path.parent / "catalog.duckdb" mock_ducklake_instance.catalog_path = tmp_catalog_path # Create the catalog database @@ -301,6 +421,112 @@ async def test_query_initializes_ducklake(self, test_db_path): assert client._ducklake is not None await client.__aexit__(None, None, None) + @pytest.mark.asyncio + async def test_query_raises_connection_error_when_ducklake_stays_none( + self, test_db_path + ): + client = PySUS(db_path=test_db_path) + client._ducklake = None + + with patch.object( + client, "get_ducklake", new=AsyncMock(return_value=None) + ): + with pytest.raises( + ConnectionError, match="Could not connect to PySUS s3 bucket" + ): + await client.query(dataset="sinan") + + await client.__aexit__(None, None, None) + + @pytest.mark.asyncio + async def test_query_dataset_not_found_returns_empty(self, test_db_path): + from unittest.mock import AsyncMock, MagicMock + + from pysus.api.ducklake.client import DuckLake + + client = PySUS(db_path=test_db_path) + + mock_ducklake = MagicMock(spec=DuckLake) + ds = MagicMock() + ds.name = "sinasc" + mock_ducklake.datasets = AsyncMock(return_value=[ds]) + + client._ducklake = mock_ducklake + client._attach_client_catalog = MagicMock() + + result = await client.query(dataset="sinan") + assert result == [] + + await client.__aexit__(None, None, None) + + @pytest.mark.asyncio + async def test_query_no_dataset_iterates_all(self, test_db_path): + from unittest.mock import AsyncMock, MagicMock + + from pysus.api.ducklake.client import DuckLake + + client = PySUS(db_path=test_db_path) + + mock_ducklake = MagicMock(spec=DuckLake) + ds1 = MagicMock() + ds1.name = "sinan" + ds1.query = AsyncMock(return_value=["file1"]) + ds2 = MagicMock() + ds2.name = "sinasc" + ds2.query = AsyncMock(return_value=["file2", "file3"]) + mock_ducklake.datasets = AsyncMock(return_value=[ds1, ds2]) + + client._ducklake = mock_ducklake + client._attach_client_catalog = MagicMock() + + result = await client.query() + + ds1.query.assert_awaited_once_with( + group=None, + state=None, + year=None, + month=None, + ) + ds2.query.assert_awaited_once_with( + group=None, + state=None, + year=None, + month=None, + ) + assert result == ["file1", "file2", "file3"] + + await client.__aexit__(None, None, None) + + @pytest.mark.asyncio + async def test_query_with_client_filter(self, test_db_path): + from unittest.mock import AsyncMock, MagicMock + + from pysus.api.ducklake.client import DuckLake + from pysus.api.types import FTP + + client = PySUS(db_path=test_db_path) + + mock_ducklake = MagicMock(spec=DuckLake) + ds = MagicMock() + ds.name = "sinan" + + mock_file1 = MagicMock() + mock_file1.record.path = "public/data/ftp/somefile" + mock_file2 = MagicMock() + mock_file2.record.path = "public/data/dadosgov/otherfile" + + ds.query = AsyncMock(return_value=[mock_file1, mock_file2]) + mock_ducklake.datasets = AsyncMock(return_value=[ds]) + + client._ducklake = mock_ducklake + client._attach_client_catalog = MagicMock() + + result = await client.query(dataset="sinan", client=FTP) + + assert result == [mock_file1] + + await client.__aexit__(None, None, None) + class TestDownload: @pytest.mark.asyncio @@ -365,7 +591,10 @@ async def test_download_re_fetches_when_size_differs(self, test_db_path): get_ftp_patch, ): with patch.object( - ExtensionFactory, "instantiate", return_value=mock_local + ExtensionFactory, + "instantiate", + new_callable=AsyncMock, + return_value=mock_local, ): mock_client = AsyncMock() mock_client._download_file = AsyncMock() @@ -406,7 +635,10 @@ async def _slow_download(*args, **kwargs): ), patch.object(client, "_update_state", new=AsyncMock()), patch.object( - ExtensionFactory, "instantiate", return_value=mock_local + ExtensionFactory, + "instantiate", + new_callable=AsyncMock, + return_value=mock_local, ), ): mock_client = AsyncMock() @@ -418,6 +650,210 @@ async def _slow_download(*args, **kwargs): ): await client.download(mock_file, timeout=0.001) + @pytest.mark.asyncio + async def test_download_with_ducklake_client(self, test_db_path): + from unittest.mock import AsyncMock, MagicMock, patch + + from pysus.api.extensions import ExtensionFactory + + client = PySUS(db_path=test_db_path) + + mock_local = MagicMock() + mock_local.path.exists.return_value = False + + mock_file = MagicMock() + mock_file.client.name = "ducklake" + mock_file.size = 1000 + mock_file.path = test_db_path.parent / "remote.ducklake" + mock_file.basename = "remote.ducklake" + mock_file.year = None + mock_file.month = None + mock_file.state = None + mock_group = MagicMock() + mock_group.name = None + mock_file.group = MagicMock() + + with ( + patch.object( + client, "get_local_file", new=AsyncMock(return_value=mock_local) + ), + patch.object( + client, + "_get_dest_path", + return_value=test_db_path.parent / "test.ducklake", + ), + patch.object(client, "_update_state", new=AsyncMock()), + patch.object( + ExtensionFactory, + "instantiate", + new_callable=AsyncMock, + return_value=mock_local, + ), + ): + mock_ducklake = AsyncMock() + mock_ducklake._download_file = AsyncMock() + client._ducklake = mock_ducklake + + result = await client.download(mock_file) + + assert result is not None + + await client.__aexit__(None, None, None) + + @pytest.mark.asyncio + async def test_download_with_dadosgov_client(self, test_db_path): + from unittest.mock import AsyncMock, MagicMock, patch + + from pysus.api.extensions import ExtensionFactory + + client = PySUS(db_path=test_db_path) + + mock_local = MagicMock() + mock_local.path.exists.return_value = False + + mock_file = MagicMock() + mock_file.client.name = "dadosgov" + mock_file.size = 1000 + mock_file.path = test_db_path.parent / "remote.dadosgov" + mock_file.basename = "remote.dadosgov" + mock_file.year = None + mock_file.month = None + mock_file.state = None + mock_file.group = MagicMock() + mock_file.group.name = None + + with ( + patch.object( + client, "get_local_file", new=AsyncMock(return_value=mock_local) + ), + patch.object( + client, + "_get_dest_path", + return_value=test_db_path.parent / "test.dadosgov", + ), + patch.object(client, "_update_state", new=AsyncMock()), + patch.object( + ExtensionFactory, + "instantiate", + new_callable=AsyncMock, + return_value=mock_local, + ), + ): + mock_dadosgov = AsyncMock() + mock_dadosgov._download_file = AsyncMock() + client._dadosgov = mock_dadosgov + + result = await client.download(mock_file, token="test_token") + + assert result is not None + + await client.__aexit__(None, None, None) + + @pytest.mark.asyncio + async def test_download_with_unknown_client_raises_valueerror( + self, test_db_path + ): + from unittest.mock import AsyncMock, MagicMock, patch + + client = PySUS(db_path=test_db_path) + + mock_local = MagicMock() + mock_local.path.exists.return_value = False + + mock_file = MagicMock() + mock_file.client.name = "unknown" + mock_file.size = 1000 + mock_file.basename = "test.unknown" + mock_file.path = test_db_path.parent / "test.unknown" + + with ( + patch.object( + client, "get_local_file", new=AsyncMock(return_value=mock_local) + ), + patch.object( + client, + "_get_dest_path", + return_value=test_db_path.parent / "test.unknown", + ), + patch.object(client, "_update_state", new=AsyncMock()), + ): + with pytest.raises( + RuntimeError, + match=( + "Unexpected error downloading test.unknown:" + " No download logic for client: unknown" + ), + ): + await client.download(mock_file) + + await client.__aexit__(None, None, None) + + +class TestDownloadToParquet: + @pytest.mark.asyncio + async def test_download_to_parquet_success(self, test_db_path, tmp_path): + from unittest.mock import AsyncMock, MagicMock, patch + + client = PySUS(db_path=test_db_path) + + original_path = tmp_path / "test.dbc" + original_path.write_text("dummy content") + + parquet_path = tmp_path / "test.parquet" + + mock_parquet_file = MagicMock() + mock_parquet_file.path = parquet_path + + mock_local_file = MagicMock() + mock_local_file.path = original_path + mock_local_file.to_parquet = AsyncMock(return_value=mock_parquet_file) + + mock_file = MagicMock() + mock_file.path = "/remote/test.dbc" + mock_file.client.name = "ftp" + mock_file.year = 2024 + mock_file.month = 1 + mock_file.state = "SP" + mock_file.group = MagicMock() + mock_file.group.name = "DC" + + with ( + patch.object( + client, "download", new=AsyncMock(return_value=mock_local_file) + ), + patch.object(client, "_update_state", new=AsyncMock()), + patch.object(client, "_delete_record", new=AsyncMock()), + ): + result = await client.download_to_parquet(mock_file) + + assert result == mock_parquet_file + assert result.add_dv is True + mock_local_file.to_parquet.assert_awaited_once() + + await client.__aexit__(None, None, None) + + @pytest.mark.asyncio + async def test_download_to_parquet_not_tabular_raises(self, test_db_path): + from unittest.mock import AsyncMock, MagicMock, patch + + client = PySUS(db_path=test_db_path) + + mock_local_file = MagicMock(spec=[]) + + mock_file = MagicMock() + mock_file.path = "/remote/test.dbc" + mock_file.client.name = "ftp" + + with patch.object( + client, "download", new=AsyncMock(return_value=mock_local_file) + ): + with pytest.raises( + NotImplementedError, match="can't be converted to Parquet" + ): + await client.download_to_parquet(mock_file) + + await client.__aexit__(None, None, None) + class TestReadParquet: def test_read_parquet_single_path(self, tmp_path): @@ -474,6 +910,38 @@ def test_read_parquet_intersection_mode(self, tmp_path): assert len(df) == 2 assert list(df.columns) == ["a"] + def test_read_parquet_intersection_no_common_columns(self, tmp_path): + import duckdb + import pandas as pd + + parquet1 = tmp_path / "test1.parquet" + parquet2 = tmp_path / "test2.parquet" + + pd.DataFrame({"a": [1], "b": [2]}).to_parquet(parquet1) + pd.DataFrame({"c": [3], "d": [4]}).to_parquet(parquet2) + + from pysus.api.client import PySUS + + client = PySUS(db_path=tmp_path / "config.db") + + original_execute = duckdb.execute + + def side_effect(sql, *args, **kwargs): + if sql == "SELECT * WHERE 1=0": + result = MagicMock() + result.description = [] + result.df.return_value = pd.DataFrame() + result.fetchall.return_value = [] + return result + return original_execute(sql, *args, **kwargs) + + with patch.object(duckdb, "execute", side_effect=side_effect): + result = client.read_parquet( + [parquet1, parquet2], mode="intersection" + ) + df = result.df() + assert len(df) == 0 + def test_read_parquet_strict_mode_matching_schemas(self, tmp_path): import pandas as pd @@ -524,6 +992,22 @@ def test_read_parquet_with_sql(self, tmp_path): assert len(df) == 2 assert list(df.columns) == ["a"] + def test_read_parquet_sql_not_select(self, tmp_path): + import pandas as pd + + parquet_file = tmp_path / "test.parquet" + pd.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}).to_parquet( + parquet_file + ) + + from pysus.api.client import PySUS + + client = PySUS(db_path=tmp_path / "config.db") + result = client.read_parquet([parquet_file], sql="a + b AS c") + df = result.df() + + assert list(df.columns) == ["c"] + def test_read_parquet_no_paths_raises(self, tmp_path): from pysus.api.client import PySUS @@ -581,6 +1065,27 @@ def test_read_parquet_add_dv_false_returns_raw(self, tmp_path): out = result.df() assert out["ID_MUNICIP"].iloc[0] == "261160" + def test_read_parquet_add_dv_create_function_exception(self, tmp_path): + import duckdb + import pandas as pd + + parquet_file = tmp_path / "test.parquet" + df = pd.DataFrame({"ID_MUNICIP": ["261160"], "value": [1]}) + df.to_parquet(parquet_file) + + from pysus.api.client import PySUS + + client = PySUS(db_path=tmp_path / "config.db") + + with patch.object( + duckdb, + "create_function", + side_effect=duckdb.NotImplementedException(), + ): + result = client.read_parquet([parquet_file], add_dv=True) + out = result.df() + assert out["ID_MUNICIP"].iloc[0] == "2611606" + class TestPySUSGetMethods: @pytest.mark.asyncio @@ -625,13 +1130,13 @@ async def test_aenter(self, test_db_path): with ( patch.object( - DuckLake, "_load_catalog", new_callable=AsyncMock - ) as mock_load, + DuckLake, "_download_catalog", new_callable=AsyncMock + ) as mock_download, patch.object(PySUS, "_attach_client_catalog") as mock_attach, ): await client.__aenter__() assert client._ducklake is not None - mock_load.assert_called_once() + mock_download.assert_called_once() mock_attach.assert_called_once() await client.__aexit__(None, None, None) diff --git a/pysus/tests/api/test_databases.py b/pysus/tests/api/test_databases.py index 7bd37c4d..8a398038 100644 --- a/pysus/tests/api/test_databases.py +++ b/pysus/tests/api/test_databases.py @@ -1,5 +1,9 @@ +import asyncio +import sys from unittest.mock import AsyncMock, MagicMock, patch +import pytest + class TestSinan: def test_sinan_calls_fetch_data(self): @@ -301,6 +305,95 @@ def test_fetch_data_no_files(self): assert len(result) == 0 mock_pysus.download.assert_not_called() + def test_fetch_data_with_progress(self): + with ( + patch("pysus.api._impl.databases.PySUS") as mock_pysus_class, + patch("pysus.api._impl.databases.tqdm", new=lambda x, **kw: x), + ): + mock_pysus = MagicMock() + enter_mock = AsyncMock(return_value=mock_pysus) + exit_mock = AsyncMock() + mock_pysus_class.return_value.__aenter__ = enter_mock + mock_pysus_class.return_value.__aexit__ = exit_mock + + mock_file = MagicMock() + mock_file.path = "/tmp/test.parquet" + mock_pysus.query = AsyncMock(return_value=[mock_file, mock_file]) + mock_pysus.download = AsyncMock(return_value=mock_file) + mock_pysus.read_parquet.return_value.df.return_value = MagicMock() + + from pysus.api._impl.databases import _fetch_data + + _fetch_data(dataset="sinan", year=2024, show_progress=True) + + assert mock_pysus.download.call_count == 2 + + +class TestFetchDataRunningLoop: + def test_fetch_data_running_loop_no_nest_asyncio_raises(self): + saved = sys.modules.pop("nest_asyncio", None) + import builtins + + real_import = builtins.__import__ + + def raising_import(name, *args, **kwargs): + if name == "nest_asyncio": + raise ImportError(f"No module named {name}") + return real_import(name, *args, **kwargs) + + try: + + async def _inner(): + from pysus.api._impl.databases import _fetch_data + + with patch("builtins.__import__", side_effect=raising_import): + with pytest.raises( + RuntimeError, match="nest_asyncio is required" + ): + _fetch_data( + dataset="sinan", + year=2024, + show_progress=False, + ) + + asyncio.run(_inner()) + finally: + if saved is not None: + sys.modules["nest_asyncio"] = saved + + def test_fetch_data_running_loop_with_nest_asyncio(self): + nest_mock = MagicMock() + + async def _inner(): + with ( + patch("pysus.api._impl.databases.PySUS") as mock_pysus_class, + patch.dict("sys.modules", {"nest_asyncio": nest_mock}), + ): + mock_pysus = MagicMock() + mock_pysus_class.return_value.__aenter__ = AsyncMock( + return_value=mock_pysus + ) + mock_pysus_class.return_value.__aexit__ = AsyncMock() + mock_pysus.query = AsyncMock(return_value=[]) + + from pysus.api._impl.databases import _fetch_data + + loop = asyncio.get_running_loop() + expected = MagicMock() + + with patch.object( + loop, "run_until_complete", return_value=expected + ): + result = _fetch_data( + dataset="sinan", + year=2024, + show_progress=False, + ) + nest_mock.apply.assert_called_once() + assert result == expected + + asyncio.run(_inner()) + class TestListFiles: def _mock_asyncio_run(self, return_value): @@ -385,3 +478,91 @@ def test_list_files_empty_result(self): assert isinstance(result, pd.DataFrame) assert len(result) == 0 + + def test_list_files_with_real_coroutine(self): + import pandas as pd + + mock_record = MagicMock() + mock_record.path = "/remote/sinan/dengue.parquet" + mock_record.dataset.name = "sinan" + mock_record.group.name = "DENGUE" + mock_record.record.year = 2024 + mock_record.record.month = 1 + mock_record.record.state = "SP" + mock_record.record.origin_modified = "2024-01-15" + + with patch("pysus.api._impl.databases.PySUS") as mock_pysus_class: + mock_pysus = MagicMock() + mock_pysus_class.return_value.__aenter__ = AsyncMock( + return_value=mock_pysus + ) + mock_pysus_class.return_value.__aexit__ = AsyncMock() + mock_pysus.query = AsyncMock(return_value=[mock_record]) + + from pysus.api._impl.databases import list_files + + result = list_files(dataset="SINAN", year=2024, month=1) + + assert isinstance(result, pd.DataFrame) + assert len(result) == 1 + assert result.iloc[0]["name"] == "dengue.parquet" + assert result.iloc[0]["path"] == "/remote/sinan/dengue.parquet" + assert result.iloc[0]["dataset"] == "sinan" + assert result.iloc[0]["group"] == "DENGUE" + assert result.iloc[0]["year"] == 2024 + assert result.iloc[0]["month"] == 1 + assert result.iloc[0]["state"] == "SP" + assert result.iloc[0]["modify"] == "2024-01-15" + + def test_list_files_with_none_fields(self): + mock_record = MagicMock() + mock_record.path = "/remote/sinan/dengue.parquet" + mock_record.dataset = None + mock_record.group = None + mock_record.record.year = 2024 + mock_record.record.month = 1 + mock_record.record.state = "SP" + mock_record.record.origin_modified = "2024-01-15" + + with patch("pysus.api._impl.databases.PySUS") as mock_pysus_class: + mock_pysus = MagicMock() + mock_pysus_class.return_value.__aenter__ = AsyncMock( + return_value=mock_pysus + ) + mock_pysus_class.return_value.__aexit__ = AsyncMock() + mock_pysus.query = AsyncMock(return_value=[mock_record]) + + from pysus.api._impl.databases import list_files + + result = list_files(dataset="SINAN") + + assert result.iloc[0]["dataset"] is None + assert result.iloc[0]["group"] is None + + def test_list_files_with_multiple_records(self): + records = [] + for i in range(3): + r = MagicMock() + r.path = f"/remote/sinan/file{i}.parquet" + r.dataset.name = "sinan" + r.group.name = "DENGUE" + r.record.year = 2024 + r.record.month = i + 1 + r.record.state = "SP" + r.record.origin_modified = "2024-01-15" + records.append(r) + + with patch("pysus.api._impl.databases.PySUS") as mock_pysus_class: + mock_pysus = MagicMock() + mock_pysus_class.return_value.__aenter__ = AsyncMock( + return_value=mock_pysus + ) + mock_pysus_class.return_value.__aexit__ = AsyncMock() + mock_pysus.query = AsyncMock(side_effect=[records[:2], records[2:]]) + + from pysus.api._impl.databases import list_files + + result = list_files(dataset="SINAN", year=[2023, 2024]) + + assert len(result) == 3 + assert mock_pysus.query.call_count == 2 diff --git a/pysus/tests/api/test_extensions.py b/pysus/tests/api/test_extensions.py index f7fc9559..1e3e8458 100644 --- a/pysus/tests/api/test_extensions.py +++ b/pysus/tests/api/test_extensions.py @@ -1,15 +1,19 @@ +import builtins import gzip import json +import struct import tarfile import zipfile from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch import pandas as pd +import pyarrow as pa +import pyarrow.parquet as pq import pytest from pysus.api.extensions import ( CSV, DBC, - DBC_IMPORT, DBF, JSON, PDF, @@ -20,7 +24,9 @@ Parquet, Tar, Zip, + _map_dtype, ) +from pysus.api.models import BaseLocalFile @pytest.fixture @@ -35,6 +41,70 @@ async def collect_async(gen): return out +def _create_dbf(path, fields, records): + """Create a minimal valid DBF file at *path*. + + Parameters + ---------- + fields : list of (name, type, length, decimal) + records : list of tuples + """ + from datetime import date + + today = date.today() + num_records = len(records) + field_desc_len = 32 * len(fields) + header_len = 32 + field_desc_len + 1 + record_len = 1 + sum(f[2] for f in fields) + + buf = bytearray() + # Version (0x03 = FoxBASE) + buf.append(0x03) + # Last update date + buf.append(today.year - 1900) + buf.append(today.month) + buf.append(today.day) + # Number of records + buf.extend(struct.pack("= 1 + assert chunks[0]["ID_MUNICIP"].iloc[0] == "2611606" + assert str(chunks[0]["DT_NOTIFIC"].iloc[0]) == "2023-01-01" + + +# --------------------------------------------------------------------------- +# New tests for lines 315, 324-326: parse_dftypes edge cases +# --------------------------------------------------------------------------- + + +def test_parse_dftypes_edge_cases(): + df = pd.DataFrame( + { + "DT_NOTIFIC": [123, "not_a_date", "20230101"], + "CODMUNRES": [float("nan"), None, " 330455 "], + "IDADE": [None, float("nan"), "25"], + } + ) + result = Parquet.parse_dftypes(df) + + assert result["DT_NOTIFIC"].iloc[0] == 123 + assert result["DT_NOTIFIC"].iloc[1] == "not_a_date" + assert str(result["DT_NOTIFIC"].iloc[2]) == "2023-01-01" + + assert pd.isna(result["CODMUNRES"].iloc[0]) + assert pd.isna(result["CODMUNRES"].iloc[1]) + assert result["CODMUNRES"].iloc[2] == 330455 + + assert pd.isna(result["IDADE"].iloc[0]) + assert pd.isna(result["IDADE"].iloc[1]) + assert result["IDADE"].iloc[2] == 25 + + +# --------------------------------------------------------------------------- +# New tests for lines 351-360, 370, 394, 402-403, 413-427: DBF +# --------------------------------------------------------------------------- + + +def test_dbf_columns(tmp_dir): + pytest.importorskip("dbfread") + dbf_path = tmp_dir / "test.dbf" + _create_dbf( + dbf_path, + [("NAME", "C", 20, 0), ("AGE", "N", 3, 0), ("SALARY", "F", 10, 2)], + [("Alice", 30, 5000.00)], + ) + obj = DBF(path=dbf_path) + cols = obj.columns + assert len(cols) == 3 + assert cols[0].dtype == "VARCHAR" + assert cols[1].dtype == "INTEGER" + assert cols[2].dtype == "FLOAT" + + +def test_dbf_rows(tmp_dir): + pytest.importorskip("dbfread") + dbf_path = tmp_dir / "test.dbf" + _create_dbf( + dbf_path, + [("NAME", "C", 10, 0)], + [("Alice",), ("Bob",), ("Charlie",)], + ) + obj = DBF(path=dbf_path) + assert obj.rows == 3 + + +def test_dbf_decode_column_non_string(): + obj = DBF(path=Path("/dummy")) + assert obj.decode_column(123) == 123 + assert obj.decode_column(45.6) == 45.6 + assert obj.decode_column(None) is None + + +@pytest.mark.asyncio +async def test_dbf_load(tmp_dir): + pytest.importorskip("dbfread") + dbf_path = tmp_dir / "test.dbf" + _create_dbf( + dbf_path, + [("NAME", "C", 10, 0), ("AGE", "N", 3, 0)], + [("Alice", 30), ("Bob", 25)], + ) + obj = DBF(path=dbf_path) + df = await obj.load() + assert len(df) == 2 + assert list(df.columns) == ["NAME", "AGE"] + assert df["NAME"].iloc[0] == "Alice" + + +@pytest.mark.asyncio +async def test_dbf_stream(tmp_dir): + pytest.importorskip("dbfread") + dbf_path = tmp_dir / "test.dbf" + _create_dbf( + dbf_path, + [("VAL", "N", 5, 0)], + [(10,), (20,), (30,), (40,), (50,)], + ) + obj = DBF(path=dbf_path) + chunks = await collect_async(obj.stream(chunk_size=2)) + assert len(chunks) >= 2 + assert all(isinstance(c, pd.DataFrame) for c in chunks) + assert len(chunks[0]) == 2 + + +# --------------------------------------------------------------------------- +# New tests for lines 445-447, 452-490, 493-496: DBF.to_parquet +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_dbf_to_parquet(tmp_dir): + pytest.importorskip("dbfread") + dbf_path = tmp_dir / "test.dbf" + _create_dbf( + dbf_path, + [("NAME", "C", 10, 0)], + [("Alice",), ("Bob",), ("Charlie",)], + ) + obj = DBF(path=dbf_path) + + calls = [] + + def cb(current, total): + calls.append((current, total)) + + result = await obj.to_parquet(chunk_size=2, callback=cb) + assert isinstance(result, Parquet) + assert result.rows == 3 + assert len(calls) >= 1 + + +@pytest.mark.asyncio +async def test_dbf_to_parquet_empty(tmp_dir): + pytest.importorskip("dbfread") + dbf_path = tmp_dir / "test.dbf" + _create_dbf(dbf_path, [("NAME", "C", 10, 0)], []) + obj = DBF(path=dbf_path) + with pytest.raises(TypeError): + await obj.to_parquet() + + +@pytest.mark.asyncio +async def test_dbf_to_parquet_output_exists(tmp_dir): + pytest.importorskip("dbfread") + dbf_path = tmp_dir / "test.dbf" + _create_dbf(dbf_path, [("NAME", "C", 10, 0)], [("Alice",)]) + obj = DBF(path=dbf_path) + + out = tmp_dir / "existing.parquet" + pd.DataFrame({"x": [1]}).to_parquet(out) + + result = await obj.to_parquet(output_path=out) + assert isinstance(result, Parquet) + assert result.rows == 1 + + +@pytest.mark.asyncio +async def test_dbf_to_parquet_output_not_parquet(tmp_dir): + pytest.importorskip("dbfread") + dbf_path = tmp_dir / "test.dbf" + _create_dbf(dbf_path, [("NAME", "C", 10, 0)], [("Alice",)]) + obj = DBF(path=dbf_path) + + out = tmp_dir / "out.csv" + out.write_text("a,b\n1,2") + + with pytest.raises(RuntimeError, match="Could not parse"): + await obj.to_parquet(output_path=out) + + +@pytest.mark.asyncio +async def test_dbf_to_parquet_non_parquet_extension(tmp_dir): + pytest.importorskip("dbfread") + dbf_path = tmp_dir / "test.dbf" + _create_dbf(dbf_path, [("NAME", "C", 10, 0)], [("Alice",)]) + obj = DBF(path=dbf_path) + + out = tmp_dir / "out.custom" + with pytest.raises(RuntimeError, match="Could not parse"): + await obj.to_parquet(output_path=out) + + +# --------------------------------------------------------------------------- +# New tests for lines 507, 514, 520-521, 528-530, 542, 546-549, 560: DBC +# --------------------------------------------------------------------------- + + +def test_dbc_columns_raises(): + obj = DBC(path=Path("test.dbc")) + with pytest.raises(NotImplementedError): + _ = obj.columns + + +def test_dbc_rows_raises(): + obj = DBC(path=Path("test.dbc")) + with pytest.raises(NotImplementedError): + _ = obj.rows + + +@pytest.mark.asyncio +async def test_dbc_load_raises(tmp_dir): + path = tmp_dir / "test.dbc" + path.write_bytes(b"dummy") + obj = DBC(path=path) + with pytest.raises(struct.error): + await obj.load() + + +@pytest.mark.asyncio +async def test_dbc_stream_raises(tmp_dir): + path = tmp_dir / "test.dbc" + path.write_bytes(b"dummy") + obj = DBC(path=path) + + try: + with pytest.raises(struct.error): + async for _ in obj.stream(): + pass + finally: + import gc + + gc.collect() + + +@pytest.mark.asyncio +async def test_dbc_to_parquet_output_exists_is_parquet(tmp_dir): + path = tmp_dir / "test.dbc" + path.write_bytes(b"dummy") + obj = DBC(path=path) + + out = tmp_dir / "out.parquet" + pd.DataFrame({"x": [1]}).to_parquet(out) + + result = await obj.to_parquet(output_path=out) + assert isinstance(result, Parquet) + + +@pytest.mark.asyncio +async def test_dbc_to_parquet_output_exists_not_parquet(tmp_dir): + path = tmp_dir / "test.dbc" + path.write_bytes(b"dummy") + obj = DBC(path=path) + + out = tmp_dir / "out.csv" + out.write_text("a,b\n1,2") + + with pytest.raises(RuntimeError, match="Could not parse"): + await obj.to_parquet(output_path=out) + + +# --------------------------------------------------------------------------- +# New tests for lines 580-585, 593: JSON.columns and JSON.rows +# --------------------------------------------------------------------------- + + +def test_json_columns(tmp_dir): + path = tmp_dir / "data.json" + path.write_text('[{"a": 1, "b": "x"}]') + obj = JSON(path=path) + with pytest.raises(ValueError, match="nrows can only be passed"): + _ = obj.columns + + +def test_json_columns_empty(tmp_dir): + path = tmp_dir / "empty.json" + path.write_text("") + obj = JSON(path=path) + cols = obj.columns + assert cols == [] + + +def test_json_rows(tmp_dir): + path = tmp_dir / "data.json" + path.write_text('[{"a": 1}, {"a": 2}, {"a": 3}]') + obj = JSON(path=path) + assert obj.rows == 3 + + +# --------------------------------------------------------------------------- +# New tests for line 628: PDF.stream without chunk_size +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_pdf_stream_no_chunk_size(tmp_dir): + path = tmp_dir / "file.pdf" + content = b"%PDF-1.4\n...content..." + path.write_bytes(content) + + obj = PDF(path=path) + chunks = await collect_async(obj.stream()) + assert b"".join(chunks) == content + assert len(chunks) == 1 + + +# --------------------------------------------------------------------------- +# New tests for line 642: Zip.load +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_zip_load(tmp_dir): + zip_path = tmp_dir / "test.zip" + with zipfile.ZipFile(zip_path, "w") as z: + z.writestr("test.txt", "hello") + + obj = Zip(path=zip_path) + result = await obj.load() + assert isinstance(result, zipfile.ZipFile) + + +# --------------------------------------------------------------------------- +# New tests for lines 692-718: Zip.to_parquet +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_zip_to_parquet(tmp_dir): + zip_path = tmp_dir / "data.zip" + csv_path = tmp_dir / "inner.csv" + pd.DataFrame({"x": [1, 2], "y": [3, 4]}).to_csv(csv_path, index=False) + with zipfile.ZipFile(zip_path, "w") as z: + z.write(csv_path, arcname="inner.csv") + + obj = Zip(path=zip_path) + pq_obj = await obj.to_parquet() + assert isinstance(pq_obj, Parquet) + df = await pq_obj.load() + assert len(df) == 2 + + parquet_path = tmp_dir / "data.parquet" + assert parquet_path.exists() + temp_dir = tmp_dir / "data.tmp_extract" + assert not temp_dir.exists() + + +# --------------------------------------------------------------------------- +# New tests for lines 723-740: Zip._safe_cleanup +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_zip_safe_cleanup_nonexistent(tmp_dir): + obj = Zip(path=tmp_dir / "dummy.zip") + await obj._safe_cleanup(tmp_dir / "nonexistent") + # Should not raise + + +@pytest.mark.asyncio +async def test_zip_safe_cleanup_with_files(tmp_dir): + (tmp_dir / "f1.txt").write_text("a") + (tmp_dir / "f2.txt").write_text("b") + obj = Zip(path=tmp_dir / "dummy.zip") + await obj._safe_cleanup(tmp_dir) + assert not (tmp_dir / "f1.txt").exists() + assert not (tmp_dir / "f2.txt").exists() + assert not tmp_dir.exists() + + +@pytest.mark.asyncio +async def test_zip_safe_cleanup_with_subdir(tmp_dir): + sub = tmp_dir / "sub" + sub.mkdir() + (sub / "nested.txt").write_text("nested") + (tmp_dir / "top.txt").write_text("top") + obj = Zip(path=tmp_dir / "dummy.zip") + await obj._safe_cleanup(tmp_dir) + assert not tmp_dir.exists() + + +# --------------------------------------------------------------------------- +# New tests for line 764: GZip.open_member +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_gzip_open_member(tmp_dir): + path = tmp_dir / "data.csv.gz" + raw = b"a,b\n1,2" + with gzip.open(path, "wb") as f: + f.write(raw) + + obj = GZip(path=path) + result = await obj.open_member("data.csv") + assert result == raw + + +# --------------------------------------------------------------------------- +# New tests for line 799: Tar.load +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_tar_load(tmp_dir): + tar_path = tmp_dir / "file.tar" + f = tmp_dir / "a.txt" + f.write_text("hello") + with tarfile.open(tar_path, "w") as t: + t.add(f, arcname="a.txt") + + obj = Tar(path=tar_path) + result = await obj.load() + assert isinstance(result, tarfile.TarFile) + + +# --------------------------------------------------------------------------- +# New tests for ExtensionFactory._identify (lines 944, 947-949, 957-958) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_extension_factory_identify_magic_not_available(tmp_dir): + orig = ExtensionFactory._magic_available + ExtensionFactory._magic_available = False + try: + path = tmp_dir / "test.csv" + path.write_text("a,b\n1,2") + result = await ExtensionFactory._identify(path) + assert result is None + finally: + ExtensionFactory._magic_available = orig + + +@pytest.mark.asyncio +async def test_extension_factory_identify_magic_import_error( + monkeypatch, tmp_dir +): + orig_available = ExtensionFactory._magic_available + ExtensionFactory._magic_available = True + try: + path = tmp_dir / "test.csv" + path.write_text("a,b\n1,2") + + original_import = builtins.__import__ + + def mock_import(name, globals=None, locals=None, fromlist=(), level=0): + if name == "magic": + raise ImportError("Mock error") + return original_import(name, globals, locals, fromlist, level) + + monkeypatch.setattr(builtins, "__import__", mock_import) + + result = await ExtensionFactory._identify(path) + assert result is None + assert not ExtensionFactory._magic_available + finally: + ExtensionFactory._magic_available = orig_available + + +@pytest.mark.asyncio +async def test_extension_factory_identify_magic_exception(monkeypatch, tmp_dir): + magic = pytest.importorskip("magic") + orig_available = ExtensionFactory._magic_available + ExtensionFactory._magic_available = True + try: + path = tmp_dir / "test.csv" + path.write_text("a,b\n1,2") + + def mock_from_file(*args, **kwargs): + raise magic.MagicException("Mock error") + + monkeypatch.setattr(magic, "from_file", mock_from_file) + + result = await ExtensionFactory._identify(path) + assert result is None + assert ExtensionFactory._magic_available + finally: + ExtensionFactory._magic_available = orig_available + + +# --------------------------------------------------------------------------- +# New tests for line 1010: ExtensionFactory.instantiate non-string file_type +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_instantiate_non_string_file_type(monkeypatch, tmp_dir): + path = tmp_dir / "test.custom" + path.write_text("data") + + class CustomFile: + type = 42 + + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + + monkeypatch.setitem(ExtensionFactory._extensions, ".custom", CustomFile) + + obj = await ExtensionFactory.instantiate(path) + assert obj.type == "FILE" + + +# --------------------------------------------------------------------------- +# New tests for line 486: pq.ParquetWriter in empty DBF +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_dbf_to_parquet_empty_writer(tmp_dir): + dbf_path = tmp_dir / "empty.dbf" + _create_dbf(dbf_path, [("NAME", "C", 10, 0)], []) + out_path = tmp_dir / "empty_out.parquet" + db = DBF(path=dbf_path) + + mock_table = MagicMock(spec=pa.Table) + mock_writer = MagicMock() + mock_parquet = MagicMock(spec=Parquet) + + with ( + patch("pysus.api.extensions.pa") as mock_pa, + patch( + "pysus.api.extensions.pq.ParquetWriter", return_value=mock_writer + ), + patch.object( + ExtensionFactory, "instantiate", return_value=mock_parquet + ), + ): + mock_pa.Table.from_pandas.return_value = mock_table + result = await db.to_parquet(out_path) + + assert result is mock_parquet + mock_writer.close.assert_called_once() + + +# --------------------------------------------------------------------------- +# New tests for line 521: DBC.load success path +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_dbc_load_success(tmp_dir): + dbf_path = tmp_dir / "test.dbc" + dbf_path.touch() + obj = DBC(path=dbf_path) + + def mock_dbc2dbf(infile, outfile): + _create_dbf(Path(outfile), [("NAME", "C", 10, 0)], [("NAME", b"Alice")]) + + mock_parquet = MagicMock(spec=Parquet) + mock_parquet.load = AsyncMock(return_value=pd.DataFrame({"x": [1]})) + + with ( + patch("pysus.api.extensions.dbc2dbf", side_effect=mock_dbc2dbf), + patch.object(DBF, "to_parquet", return_value=mock_parquet), + ): + df = await obj.load() + + assert list(df.columns) == ["x"] + assert len(df) == 1 + + +# --------------------------------------------------------------------------- +# New tests for lines 529-530: DBC.stream success path +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_dbc_stream_success(tmp_dir): + dbf_path = tmp_dir / "test.dbc" + dbf_path.touch() + obj = DBC(path=dbf_path) + + def mock_dbc2dbf(infile, outfile): + _create_dbf(Path(outfile), [("NAME", "C", 10, 0)], [("NAME", b"Alice")]) + + async def _mock_stream(**kw): + yield pd.DataFrame({"x": [1]}) + + mock_parquet = MagicMock(spec=Parquet) + mock_parquet.stream = _mock_stream + + with ( + patch("pysus.api.extensions.dbc2dbf", side_effect=mock_dbc2dbf), + patch.object(DBF, "to_parquet", return_value=mock_parquet), + ): + chunks = [chunk async for chunk in obj.stream(chunk_size=100)] + + assert len(chunks) == 1 + assert list(chunks[0].columns) == ["x"] + + +# --------------------------------------------------------------------------- +# New tests for line 560: DBC.to_parquet non-BaseTabularFile after dbc2dbf +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_dbc_to_parquet_not_tabular(tmp_dir): + dbf_path = tmp_dir / "test.dbc" + dbf_path.touch() + obj = DBC(path=dbf_path) + + def mock_dbc2dbf(infile, outfile): + pass + + mock_non_tabular = MagicMock(spec=BaseLocalFile) + + with ( + patch("pysus.api.extensions.dbc2dbf", side_effect=mock_dbc2dbf), + patch.object( + ExtensionFactory, "instantiate", return_value=mock_non_tabular + ), + ): + with pytest.raises(RuntimeError, match="Not a DBF"): + await obj.to_parquet() + + +# --------------------------------------------------------------------------- +# New tests for line 708: Zip.to_parquet with no tabular file inside +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_zip_to_parquet_no_tabular(tmp_dir): + zip_path = tmp_dir / "data.zip" + text_path = tmp_dir / "readme.txt" + text_path.write_text("hello") + with zipfile.ZipFile(zip_path, "w") as z: + z.write(text_path, arcname="readme.txt") + + obj = Zip(path=zip_path) + with pytest.raises(ValueError, match="No tabular file found"): + await obj.to_parquet() diff --git a/pysus/tests/api/test_metadata.py b/pysus/tests/api/test_metadata.py new file mode 100644 index 00000000..75329381 --- /dev/null +++ b/pysus/tests/api/test_metadata.py @@ -0,0 +1,109 @@ +import builtins +from unittest.mock import patch + +from pysus.api.metadata.models import ( + Column, + Dataset, + DatasetGroup, + File, + FileMeta, + lookup_column_meta, + pick_description, +) +from pysus.api.metadata.report import Columns, Footer, Header +from pysus.api.types import VARCHAR + + +class TestReportClasses: + def test_header_instantiation(self): + h = Header() + assert isinstance(h, Header) + + def test_columns_instantiation(self): + c = Columns() + assert isinstance(c, Columns) + + def test_footer_instantiation(self): + f = Footer() + assert isinstance(f, Footer) + + +class TestLookupColumnMeta: + def test_found_returns_dict(self): + meta = lookup_column_meta("ABAND") + assert meta is not None + assert isinstance(meta, dict) + + def test_not_found_returns_none(self): + meta = lookup_column_meta("NONEXISTENT_COLUMN_XYZ") + assert meta is None + + def test_import_error_returns_none(self): + with patch.object( + builtins, "__import__", side_effect=ImportError("mock") + ): + result = lookup_column_meta("ABAND") + assert result is None + + +class TestPickDescription: + def test_none_meta_returns_empty(self): + assert pick_description(None) == "" + + def test_non_empty_value_returns_first_value(self): + meta = {"sinan": "Some description"} + assert pick_description(meta) == "Some description" + + def test_empty_dict_returns_empty(self): + assert pick_description({}) == "" + + def test_all_empty_values_returns_empty(self): + meta = {"sinan": "", "sih": ""} + assert pick_description(meta) == "" + + +class TestColumnFromSchema: + def test_from_schema_creates_column(self): + col = Column.from_schema("ABAND", VARCHAR) + assert isinstance(col, Column) + assert col.name == "ABAND" + assert col.dtype == VARCHAR + + def test_from_schema_unknown_column(self): + col = Column.from_schema("NONEXISTENT_COLUMN_XYZ", VARCHAR) + assert col.name == "NONEXISTENT_COLUMN_XYZ" + assert col.description == "" + assert col.dtype == VARCHAR + + +class TestDataclassInstantiations: + def test_dataset(self): + d = Dataset(name="sinan", long_name="SINAN", description="Test") + assert d.name == "sinan" + assert d.long_name == "SINAN" + assert d.description == "Test" + + def test_dataset_group(self): + dg = DatasetGroup(name="sinan", long_name="SINAN", description="Test") + assert dg.name == "sinan" + assert dg.long_name == "SINAN" + assert dg.description == "Test" + + def test_file_meta(self): + fm = FileMeta(name="test", path="/tmp", size=100) + assert fm.name == "test" + assert fm.path == "/tmp" + assert fm.size == 100 + + def test_file(self): + f = File(origin="FTP") + assert f.origin == "FTP" + assert f.dataset is None + assert f.group is None + assert f.columns == [] + + def test_column(self): + col = Column(name="ABAND", description="Test", dtype=VARCHAR) + assert col.name == "ABAND" + assert col.description == "Test" + assert col.dtype == VARCHAR diff --git a/pysus/tests/api/test_models.py b/pysus/tests/api/test_models.py index f559b96a..1f61de5b 100644 --- a/pysus/tests/api/test_models.py +++ b/pysus/tests/api/test_models.py @@ -3,12 +3,23 @@ from datetime import datetime from pathlib import Path from typing import Any -from unittest.mock import MagicMock +from unittest.mock import AsyncMock, MagicMock, patch +import pandas as pd import pytest from pydantic import ValidationError +from pysus import CACHEPATH +from pysus.api.extensions import Parquet from pysus.api.models import BaseRemoteGroup # noqa -from pysus.api.models import BaseLocalFile, BaseRemoteDataset, BaseRemoteFile +from pysus.api.models import ( + BaseCompressedFile, + BaseLocalFile, + BaseRemoteClient, + BaseRemoteDataset, + BaseRemoteFile, + BaseRemoteObject, + BaseTabularFile, +) class MockLocalFile(BaseLocalFile): @@ -50,7 +61,468 @@ async def _download( return output +class MockTabularFile(BaseTabularFile): + type: str = "tabular" + + @property + def columns(self) -> list: + return getattr(self, "_columns_val", []) + + @property + def rows(self) -> int: + return getattr(self, "_rows_val", 0) + + async def load(self) -> pd.DataFrame: + return pd.DataFrame() + + async def stream( + self, + chunk_size: int = 10000, + ) -> AsyncGenerator[pd.DataFrame, None]: + for chunk in getattr(self, "_chunks", []): + yield chunk + + +class MockCompressedFile(BaseCompressedFile): + type: str = "compressed" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self._members_list = ["member1.txt", "member2.txt"] + self._member_data = { + "member1.txt": b"content1", + "member2.txt": b"content2", + } + + async def load(self) -> bytes: + return b"" + + async def list_members(self) -> list[str]: + return self._members_list + + async def open_member(self, member_name: str) -> Any: + return self._member_data.get(member_name, b"") + + async def extract( + self, + target_dir: Path = CACHEPATH, + ) -> list[BaseLocalFile]: + return [] + + +class MockRemoteGroup(BaseRemoteGroup): + type: str = "group" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self._files = None + self._mock_files = [] + self._name_val = "test_group" + + @property + def name(self) -> str: + return self._name_val + + @property + def long_name(self) -> str: + return "Test Group" + + @property + def description(self) -> str: + return "A test group" + + async def _fetch_files(self) -> list[BaseRemoteFile]: + return self._mock_files + + +class MockRemoteDataset(BaseRemoteDataset): + type: str = "dataset" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self._content = None + self._mock_content = [] + self._name_val = "test_dataset" + + @property + def name(self) -> str: + return self._name_val + + @property + def long_name(self) -> str: + return "Test Dataset" + + @property + def description(self) -> str: + return "A test dataset" + + async def _fetch_content(self): + return self._mock_content + + MockRemoteFile.model_rebuild() +MockTabularFile.model_rebuild() +MockCompressedFile.model_rebuild() +MockRemoteGroup.model_rebuild() +MockRemoteDataset.model_rebuild() + + +# --- BaseFile --- + + +def test_base_file_str(tmp_path): + path = tmp_path / "some_file.txt" + path.write_text("hello") + f = MockLocalFile(path=path) + assert str(f) == "some_file.txt" + + +# --- BaseLocalFile --- + + +def test_base_local_file_name(tmp_path): + path = tmp_path / "my_data.csv" + path.write_text("a,b\n1,2") + f = MockLocalFile(path=path) + assert f.name == "my_data.csv" + + +def test_base_local_file_extension(tmp_path): + path = tmp_path / "data.csv" + path.write_text("a,b\n1,2") + f = MockLocalFile(path=path) + assert f.extension == ".csv" + + +def test_base_local_file_size(tmp_path): + path = tmp_path / "data.bin" + content = b"hello" + path.write_bytes(content) + f = MockLocalFile(path=path) + assert f.size == len(content) + + +def test_base_local_file_modify(tmp_path): + path = tmp_path / "data.txt" + path.write_text("hello") + f = MockLocalFile(path=path) + assert isinstance(f.modify, datetime) + + +# --- BaseTabularFile.to_parquet --- + + +@pytest.mark.asyncio +async def test_to_parquet_no_output_path(tmp_path): + tabular = MockTabularFile(path=tmp_path / "source.csv") + tabular._chunks = [pd.DataFrame({"a": [1, 2, 3]})] + tabular._rows_val = 3 + + with patch( + "pysus.api.extensions.ExtensionFactory.instantiate" + ) as mock_inst: + mock_inst.return_value = MagicMock(spec=Parquet) + result = await tabular.to_parquet() + assert isinstance(result, MagicMock) + + +@pytest.mark.asyncio +async def test_to_parquet_empty_chunk(tmp_path): + tabular = MockTabularFile(path=tmp_path / "source.csv") + tabular._chunks = [pd.DataFrame(), pd.DataFrame({"a": [1, 2, 3]})] + tabular._rows_val = 3 + out = tmp_path / "out.parquet" + + with patch( + "pysus.api.extensions.ExtensionFactory.instantiate" + ) as mock_inst: + mock_inst.return_value = MagicMock(spec=Parquet) + result = await tabular.to_parquet(output_path=out) + assert isinstance(result, MagicMock) + + +@pytest.mark.asyncio +async def test_to_parquet_null_schema(tmp_path): + tabular = MockTabularFile(path=tmp_path / "source.csv") + tabular._chunks = [pd.DataFrame({"a": [1], "b": [None]})] + tabular._rows_val = 1 + out = tmp_path / "out.parquet" + + with patch( + "pysus.api.extensions.ExtensionFactory.instantiate" + ) as mock_inst: + mock_inst.return_value = MagicMock(spec=Parquet) + result = await tabular.to_parquet(output_path=out) + assert isinstance(result, MagicMock) + + +@pytest.mark.asyncio +async def test_to_parquet_callback(tmp_path): + tabular = MockTabularFile(path=tmp_path / "source.csv") + tabular._chunks = [pd.DataFrame({"a": [1, 2, 3]})] + tabular._rows_val = 3 + out = tmp_path / "out.parquet" + callback = MagicMock() + + with patch( + "pysus.api.extensions.ExtensionFactory.instantiate" + ) as mock_inst: + mock_inst.return_value = MagicMock(spec=Parquet) + await tabular.to_parquet(output_path=out, callback=callback) + callback.assert_called_once_with(3, 3) + + +@pytest.mark.asyncio +async def test_to_parquet_cleanup(tmp_path): + tabular = MockTabularFile(path=tmp_path / "source.csv") + tabular._chunks = [pd.DataFrame({"a": [1, 2, 3]})] + tabular._rows_val = 3 + out = tmp_path / "out.parquet" + + with patch("pyarrow.parquet.ParquetWriter") as mock_writer_cls: + mock_writer = MagicMock() + mock_writer_cls.return_value = mock_writer + with patch( + "pysus.api.extensions.ExtensionFactory.instantiate" + ) as mock_inst: + mock_inst.return_value = MagicMock(spec=Parquet) + await tabular.to_parquet(output_path=out) + mock_writer.close.assert_called_once() + + +@pytest.mark.asyncio +async def test_to_parquet_value_error(tmp_path): + tabular = MockTabularFile(path=tmp_path / "source.csv") + tabular._chunks = [pd.DataFrame({"a": [1]})] + tabular._rows_val = 1 + out = tmp_path / "out.parquet" + + with patch( + "pysus.api.extensions.ExtensionFactory.instantiate" + ) as mock_inst: + mock_inst.return_value = "not_a_parquet" + with pytest.raises(ValueError, match="Could not parse"): + await tabular.to_parquet(output_path=out) + + +# --- BaseCompressedFile --- + + +@pytest.mark.asyncio +async def test_base_compressed_file_stream(tmp_path): + path = tmp_path / "archive.zip" + path.write_text("dummy") + comp = MockCompressedFile(path=path) + results = [] + async for member in comp.stream(): + results.append(member) + assert results == [b"content1", b"content2"] + + +# --- SearchableMixin --- + + +def test_searchable_mixin_matches(): + obj = MagicMock() + obj.year = 2024 + obj.month = 6 + mixin = BaseRemoteFile.__bases__[1]() + assert mixin._matches(obj, year=2024, month=6) is True + assert mixin._matches(obj, year=2025) is False + assert mixin._matches(obj, extra_attr="missing") is False + + +# --- BaseRemoteFile --- + + +def test_base_remote_file_name(): + ds = MagicMock(spec=BaseRemoteDataset) + f = MockRemoteFile(path="remote/path.txt", dataset=ds) + assert f.name == "path.txt" + + +def test_base_remote_file_client(): + fake_client = MagicMock(spec=BaseRemoteClient) + ds = MagicMock(spec=BaseRemoteDataset) + ds.client = fake_client + f = MockRemoteFile(path="remote/path.txt", dataset=ds) + assert f.client is fake_client + + +def test_base_remote_file_year(): + ds = MagicMock(spec=BaseRemoteDataset) + f = MockRemoteFile(path="r/p.txt", dataset=ds) + assert f.year is None + + +def test_base_remote_file_month(): + ds = MagicMock(spec=BaseRemoteDataset) + f = MockRemoteFile(path="r/p.txt", dataset=ds) + assert f.month is None + + +def test_base_remote_file_state(): + ds = MagicMock(spec=BaseRemoteDataset) + f = MockRemoteFile(path="r/p.txt", dataset=ds) + assert f.state is None + + +@pytest.mark.asyncio +async def test_remote_file_download_default_cache(tmp_path): + ds = MagicMock(spec=BaseRemoteDataset) + remote = MockRemoteFile(path="remote/path.txt", dataset=ds) + + with patch("pysus.api.extensions.ExtensionFactory.instantiate") as mi: + mock_local = MagicMock(spec=BaseLocalFile) + mi.return_value = mock_local + with patch("pysus.api.models.CACHEPATH", tmp_path): + result = await remote.download() + assert result == mock_local + assert (tmp_path / "path.txt").exists() + + +@pytest.mark.asyncio +async def test_remote_file_download_output_dir(tmp_path): + ds = MagicMock(spec=BaseRemoteDataset) + remote = MockRemoteFile(path="remote/path.txt", dataset=ds) + out_dir = tmp_path / "outdir" + out_dir.mkdir() + + with patch("pysus.api.extensions.ExtensionFactory.instantiate") as mi: + mock_local = MagicMock(spec=BaseLocalFile) + mi.return_value = mock_local + result = await remote.download(output=out_dir) + assert result == mock_local + assert (out_dir / "path.txt").exists() + + +# --- BaseRemoteObject --- + + +def test_base_remote_object_str(): + class NamedObj(BaseRemoteObject): + type: str = "test" + + @property + def name(self) -> str: + return "my_name" + + @property + def long_name(self) -> str: + return "My Name" + + @property + def description(self) -> str: + return "Desc" + + obj = NamedObj() + assert str(obj) == "my_name" + + +# --- BaseRemoteGroup --- + + +@pytest.mark.asyncio +async def test_base_remote_group_parent(): + ds = MagicMock(spec=BaseRemoteDataset) + group = MockRemoteGroup(dataset=ds) + assert group.parent is ds + + +@pytest.mark.asyncio +async def test_base_remote_group_files(tmp_path): + ds = MagicMock(spec=BaseRemoteDataset) + mock_files = [MagicMock(spec=BaseRemoteFile)] + group = MockRemoteGroup(dataset=ds) + group._mock_files = mock_files + group._files = None + + files = await group.files + assert files == mock_files + assert group._files is mock_files + + +@pytest.mark.asyncio +async def test_base_remote_group_files_cached(): + ds = MagicMock(spec=BaseRemoteDataset) + cached = [MagicMock(spec=BaseRemoteFile)] + group = MockRemoteGroup(dataset=ds) + group._files = cached + + files = await group.files + assert files is cached + + +@pytest.mark.asyncio +async def test_base_remote_group_search_all(): + ds = MagicMock(spec=BaseRemoteDataset) + f1 = MagicMock(spec=BaseRemoteFile, year=2024) + f2 = MagicMock(spec=BaseRemoteFile, year=2025) + group = MockRemoteGroup(dataset=ds) + group._mock_files = [f1, f2] + group._files = None + + result = await group.search() + assert result == [f1, f2] + + +@pytest.mark.asyncio +async def test_base_remote_group_search_with_kwargs(): + ds = MagicMock(spec=BaseRemoteDataset) + f1 = MagicMock(spec=BaseRemoteFile) + f1.year = 2024 + f2 = MagicMock(spec=BaseRemoteFile) + f2.year = 2025 + group = MockRemoteGroup(dataset=ds) + group._mock_files = [f1, f2] + group._files = None + + result = await group.search(year=2024) + assert result == [f1] + + +# --- BaseRemoteDataset --- + + +@pytest.mark.asyncio +async def test_base_remote_dataset_search(): + client = MagicMock(spec=BaseRemoteClient) + ds = MockRemoteDataset(client=client) + ds._content = None + + f1 = MagicMock(spec=BaseRemoteFile) + f1.year = 2024 + f2 = MagicMock(spec=BaseRemoteFile) + f2.year = 2025 + + group = MagicMock(spec=BaseRemoteGroup) + group.search = AsyncMock(return_value=[f1]) + + ds._mock_content = [group, f2] + + result = await ds.search(year=2024) + assert result == [f1] + group.search.assert_called_once_with(year=2024) + + +@pytest.mark.asyncio +async def test_base_remote_dataset_search_no_kwargs(): + client = MagicMock(spec=BaseRemoteClient) + ds = MockRemoteDataset(client=client) + ds._content = None + + f1 = MagicMock(spec=BaseRemoteFile) + f1.year = 2024 + f2 = MagicMock(spec=BaseRemoteFile) + f2.year = 2025 + + ds._mock_content = [f1, f2] + + result = await ds.search() + assert result == [f1, f2] + + +# --- Existing tests (unchanged) --- @pytest.mark.asyncio diff --git a/pysus/tests/api/test_types.py b/pysus/tests/api/test_types.py index 775c9cfd..b659452d 100644 --- a/pysus/tests/api/test_types.py +++ b/pysus/tests/api/test_types.py @@ -1,25 +1,117 @@ -from pysus.api.types import FileType, State +import pytest +from pydantic import TypeAdapter, ValidationError +from pysus.api.types import ( + BIGINT, + BOOLEAN, + CIHA, + CNES, + CSV, + DADOSGOV, + DATE, + DBC, + DBF, + DIR, + DOUBLE, + DUCKLAKE, + FILE, + FLOAT, + FTP, + IBGE, + INTEGER, + JSON, + PARQUET, + PDF, + PNI, + SIA, + SIH, + SIM, + SINAN, + SINASC, + VARCHAR, + ZIP, + ColumnType, + DatasetName, + FileType, + Origin, + State, +) + + +class TestOrigin: + def test_valid_origins(self): + adapter = TypeAdapter(Origin) + for origin in (FTP, DADOSGOV, DUCKLAKE): + assert adapter.validate_python(origin) == origin + + def test_invalid_origin_raises(self): + with pytest.raises(ValidationError): + TypeAdapter(Origin).validate_python("INVALID") + + def test_origin_constants(self): + assert FTP == "FTP" + assert DADOSGOV == "DadosGov" + assert DUCKLAKE == "DuckLake" + + +class TestColumnType: + def test_valid_column_types(self): + adapter = TypeAdapter(ColumnType) + valid = (VARCHAR, INTEGER, BIGINT, FLOAT, DOUBLE, BOOLEAN, DATE) + for ct in valid: + assert adapter.validate_python(ct) == ct + + def test_invalid_column_type_raises(self): + with pytest.raises(ValidationError): + TypeAdapter(ColumnType).validate_python("INVALID") + + def test_column_type_constants(self): + assert VARCHAR == "VARCHAR" + assert INTEGER == "INTEGER" + assert BIGINT == "BIGINT" + assert FLOAT == "FLOAT" + assert DOUBLE == "DOUBLE" + assert BOOLEAN == "BOOLEAN" + assert DATE == "DATE" + + +class TestDatasetName: + def test_valid_dataset_names(self): + adapter = TypeAdapter(DatasetName) + valid = (SINAN, SINASC, SIM, SIH, SIA, PNI, IBGE, CNES, CIHA) + for dn in valid: + assert adapter.validate_python(dn) == dn + + def test_invalid_dataset_name_raises(self): + with pytest.raises(ValidationError): + TypeAdapter(DatasetName).validate_python("INVALID") + + def test_dataset_name_constants(self): + assert SINAN == "SINAN" + assert SINASC == "SINASC" + assert SIM == "SIM" + assert SIH == "SIH" + assert SIA == "SIA" + assert PNI == "PNI" + assert IBGE == "IBGE" + assert CNES == "CNES" + assert CIHA == "CIHA" class TestFileType: def test_file_types_are_valid(self): - valid_types: list[FileType] = [ - "FILE", - "DIR", - "PARQUET", - "CSV", - "JSON", - "PDF", - "DBC", - "DBF", - "ZIP", - ] + adapter = TypeAdapter(FileType) + valid_types = [FILE, DIR, PARQUET, CSV, JSON, PDF, DBC, DBF, ZIP] for ft in valid_types: - assert ft in FileType.__args__ + assert adapter.validate_python(ft) == ft + + def test_invalid_file_type_raises(self): + with pytest.raises(ValidationError): + TypeAdapter(FileType).validate_python("INVALID") class TestState: def test_all_brazilian_states_present(self): + adapter = TypeAdapter(State) expected_states = { "AC", "AL", @@ -49,5 +141,9 @@ def test_all_brazilian_states_present(self): "TO", "DF", } - actual_states = set(State.__args__) # type: ignore - assert actual_states == expected_states + for state in expected_states: + adapter.validate_python(state) + + def test_invalid_state_raises(self): + with pytest.raises(ValidationError): + TypeAdapter(State).validate_python("XX") diff --git a/pysus/tests/api/test_utils.py b/pysus/tests/api/test_utils.py index 93ebbdd6..f90f5c2d 100644 --- a/pysus/tests/api/test_utils.py +++ b/pysus/tests/api/test_utils.py @@ -42,3 +42,11 @@ def test_add_dv_empty(): def test_add_dv_non_digit(): assert add_dv("abc") == "abc" + + +def test_add_dv_5digit_returns_as_is(): + assert add_dv("12345") == "12345" + + +def test_add_dv_8digit_returns_as_is(): + assert add_dv("12345678") == "12345678" diff --git a/pysus/tests/conftest.py b/pysus/tests/conftest.py new file mode 100644 index 00000000..c55637aa --- /dev/null +++ b/pysus/tests/conftest.py @@ -0,0 +1,9 @@ +"""pytest configuration - mocks duckdb.functional before any other imports.""" + +import sys +from unittest.mock import MagicMock + +if "duckdb.functional" not in sys.modules: + _mock = MagicMock() + _mock.SPECIAL = "SPECIAL" + sys.modules["duckdb.functional"] = _mock