11"""Pre-configured health database definitions accessible via dados.gov.br."""
22
3+ import re
34from typing import Any
45
6+ from pysus .utils import zfill_year
7+
58from .models import Dataset
69
10+ MONTHS : dict [str , int ] = {
11+ "jan" : 1 ,
12+ "fev" : 2 ,
13+ "mar" : 3 ,
14+ "abr" : 4 ,
15+ "mai" : 5 ,
16+ "jun" : 6 ,
17+ "jul" : 7 ,
18+ "ago" : 8 ,
19+ "set" : 9 ,
20+ "out" : 10 ,
21+ "nov" : 11 ,
22+ "dez" : 12 ,
23+ }
24+
25+
26+ def _parse_year (val : str ) -> int | None :
27+ try :
28+ y = int (val )
29+ return y if 1970 <= y <= 2100 else None
30+ except ValueError :
31+ return None
32+
33+
34+ def _skip (name : str ) -> bool :
35+ return name .startswith ("get_" ) or name .lower ().endswith (".pdf" )
36+
737
838class CNES (Dataset ):
939 """Cadastro Nacional de Estabelecimentos de Saúde (CNES)."""
@@ -32,8 +62,23 @@ def description(self) -> str:
3262 )
3363
3464 def formatter (self , filename : str ) -> dict [str , Any ]:
35- """Extract metadata from a filename (not yet implemented)."""
36- raise NotImplementedError ()
65+ """Parse a CNES filename and extract metadata."""
66+ try :
67+ name = filename .strip ()
68+ if _skip (name ):
69+ return {"state" : None , "year" : None , "month" : None }
70+
71+ m = re .search (r"_(\d{2})-(\d{4})\.csv$" , name )
72+ if m :
73+ return {
74+ "state" : None ,
75+ "year" : _parse_year (m .group (2 )),
76+ "month" : int (m .group (1 )),
77+ }
78+
79+ return {"state" : None , "year" : None , "month" : None }
80+ except (IndexError , ValueError ):
81+ return {"state" : None , "year" : None , "month" : None }
3782
3883
3984class PNI (Dataset ):
@@ -49,6 +94,18 @@ class PNI(Dataset):
4994 "9a25b796-80e3-444a-a4e7-405f5596d8ab" ,
5095 ]
5196
97+ _PNI_PREFIX = "doses-aplicadas-pelo-programa-de-nacional-de-imunizacoes-pni"
98+
99+ group_aliases : dict [str , str ] = {
100+ _PNI_PREFIX : "DPNI" ,
101+ f"{ _PNI_PREFIX } -2020" : "DPNI" ,
102+ f"{ _PNI_PREFIX } -2021" : "DPNI" ,
103+ f"dataset-{ _PNI_PREFIX } _2022" : "DPNI" ,
104+ f"{ _PNI_PREFIX } -2023" : "DPNI" ,
105+ f"{ _PNI_PREFIX } -2025" : "DPNI" ,
106+ f"{ _PNI_PREFIX } -2026" : "DPNI" ,
107+ }
108+
52109 @property
53110 def name (self ) -> str :
54111 """Return the short name."""
@@ -64,8 +121,21 @@ def description(self) -> str:
64121 return "O PNI monitora a cobertura vacinal e doses aplicadas no Brasil."
65122
66123 def formatter (self , filename : str ) -> dict [str , Any ]:
67- """Extract metadata from a filename (not yet implemented)."""
68- raise NotImplementedError ()
124+ """Parse a PNI vaccination filename into month and year."""
125+ try :
126+ name = filename .strip ().lower ()
127+ if _skip (name ):
128+ return {"state" : None , "year" : None , "month" : None }
129+
130+ m = re .match (r"vacinacao_(\w{3})_(\d{4})_csv\.zip" , name )
131+ if m :
132+ month = MONTHS .get (m .group (1 ))
133+ year = _parse_year (m .group (2 ))
134+ return {"state" : None , "year" : year , "month" : month }
135+
136+ return {"state" : None , "year" : None , "month" : None }
137+ except (IndexError , ValueError ):
138+ return {"state" : None , "year" : None , "month" : None }
69139
70140
71141class SIA (Dataset ):
@@ -92,8 +162,31 @@ def description(self) -> str:
92162 """
93163
94164 def formatter (self , filename : str ) -> dict [str , Any ]:
95- """Extract metadata from a filename (not yet implemented)."""
96- raise NotImplementedError ()
165+ """Parse an SIA filename into year."""
166+ try :
167+ name = filename .strip ().lower ()
168+ if _skip (name ):
169+ return {"state" : None , "year" : None , "month" : None }
170+
171+ m = re .search (r"_(\d{4})_\.csv$" , name )
172+ if m :
173+ return {
174+ "state" : None ,
175+ "year" : _parse_year (m .group (1 )),
176+ "month" : None ,
177+ }
178+
179+ m = re .search (r"_(\w{3})-out_(\d{4})_\.csv$" , name )
180+ if m :
181+ return {
182+ "state" : None ,
183+ "year" : _parse_year (m .group (2 )),
184+ "month" : None ,
185+ }
186+
187+ return {"state" : None , "year" : None , "month" : None }
188+ except (IndexError , ValueError ):
189+ return {"state" : None , "year" : None , "month" : None }
97190
98191
99192class SINAN (Dataset ):
@@ -104,8 +197,21 @@ class SINAN(Dataset):
104197 "5699abe0-0510-4da8-b47d-209b3bb32b34" ,
105198 "4557ba96-7d52-4a56-bd6f-f99a5af09f77" ,
106199 "740ce8f4-7a5d-4351-aad4-7623f2490ada" ,
200+ "cf044c1b-b966-4d0e-bab0-f3aa65897b7d" ,
201+ "2d4997fb-cd11-4ce2-b217-09cd50e3151f" ,
202+ "8a585222-4c2e-43b7-807d-59355ee79c48" ,
203+ "527e8665-de64-4f81-b7c3-40b59c7d1d3c" ,
107204 ]
108205
206+ group_aliases : dict [str , str ] = {
207+ "arboviroses-dengue" : "DENG" ,
208+ "arboviroses-febre-de-chikungunya" : "CHIK" ,
209+ "arboviroses-zika-virus" : "ZIKA" ,
210+ "hanseniase" : "HANS" ,
211+ "dados-tuberculose" : "TUBE" ,
212+ "sifilis" : "SIFA" ,
213+ }
214+
109215 @property
110216 def name (self ) -> str :
111217 """Return the short name."""
@@ -124,8 +230,31 @@ def description(self) -> str:
124230 """
125231
126232 def formatter (self , filename : str ) -> dict [str , Any ]:
127- """Extract metadata from a filename (not yet implemented)."""
128- raise NotImplementedError ()
233+ """Parse a SINAN filename into state and year."""
234+ try :
235+ name = filename .strip ().upper ()
236+ if _skip (name ):
237+ return {"state" : None , "year" : None , "month" : None }
238+
239+ m = re .match (r"(\w{4})(BR)(\d{2})\.CSV\.ZIP" , name )
240+ if m :
241+ return {
242+ "state" : m .group (2 ),
243+ "year" : zfill_year (m .group (3 )),
244+ "month" : None ,
245+ }
246+
247+ m = re .match (r"MPX_(\d{4})_OPENDATASUS\.CSV\.ZIP" , name )
248+ if m :
249+ return {
250+ "state" : None ,
251+ "year" : _parse_year (m .group (1 )),
252+ "month" : None ,
253+ }
254+
255+ return {"state" : None , "year" : None , "month" : None }
256+ except (IndexError , ValueError ):
257+ return {"state" : None , "year" : None , "month" : None }
129258
130259
131260class SIM (Dataset ):
@@ -135,6 +264,10 @@ class SIM(Dataset):
135264 "5f121f4d-47c6-428e-8ec6-e8ec56417172" ,
136265 ]
137266
267+ group_aliases : dict [str , str ] = {
268+ "sim-1979-2019" : "DO" ,
269+ }
270+
138271 @property
139272 def name (self ) -> str :
140273 """Return the short name."""
@@ -152,8 +285,31 @@ def description(self) -> str:
152285 """
153286
154287 def formatter (self , filename : str ) -> dict [str , Any ]:
155- """Extract metadata from a filename (not yet implemented)."""
156- raise NotImplementedError ()
288+ """Parse a SIM filename into year."""
289+ try :
290+ name = filename .strip ()
291+ if _skip (name ):
292+ return {"state" : None , "year" : None , "month" : None }
293+
294+ m = re .search (r"Mortalidade_Geral_(\d{4})_csv\.zip" , name )
295+ if m :
296+ return {
297+ "state" : None ,
298+ "year" : _parse_year (m .group (1 )),
299+ "month" : None ,
300+ }
301+
302+ m = re .match (r"DO(\d{2})OPEN" , name )
303+ if m :
304+ return {
305+ "state" : None ,
306+ "year" : zfill_year (m .group (1 )),
307+ "month" : None ,
308+ }
309+
310+ return {"state" : None , "year" : None , "month" : None }
311+ except (IndexError , ValueError ):
312+ return {"state" : None , "year" : None , "month" : None }
157313
158314
159315class SINASC (Dataset ):
@@ -163,6 +319,10 @@ class SINASC(Dataset):
163319 "441cc6bd-684a-4afd-a88b-ba4734c9e83e" ,
164320 ]
165321
322+ group_aliases : dict [str , str ] = {
323+ "sistema-de-informacao-sobre-nascidos-vivos-sinasc-1996-a-20201" : "DN" ,
324+ }
325+
166326 @property
167327 def name (self ) -> str :
168328 """Return the short name."""
@@ -181,8 +341,67 @@ def description(self) -> str:
181341 """
182342
183343 def formatter (self , filename : str ) -> dict [str , Any ]:
184- """Extract metadata from a filename (not yet implemented)."""
185- raise NotImplementedError ()
344+ """Parse a SINASC filename into year."""
345+ try :
346+ name = filename .strip ()
347+ if _skip (name ):
348+ return {"state" : None , "year" : None , "month" : None }
349+
350+ m = re .search (r"SINASC_(\d{4})_csv\.zip" , name )
351+ if m :
352+ return {
353+ "state" : None ,
354+ "year" : _parse_year (m .group (1 )),
355+ "month" : None ,
356+ }
357+
358+ m = re .search (r"DNBR(\d{4})_csv\.zip" , name )
359+ if m :
360+ return {
361+ "state" : "BR" ,
362+ "year" : _parse_year (m .group (1 )),
363+ "month" : None ,
364+ }
365+
366+ return {"state" : None , "year" : None , "month" : None }
367+ except (IndexError , ValueError ):
368+ return {"state" : None , "year" : None , "month" : None }
369+
370+
371+ class COVID19 (Dataset ):
372+ """Casos Confirmados de COVID-19."""
373+
374+ ids : list [str ] = [
375+ "1ba1801e-aec0-4dba-ae2a-7732f0a0c9f7" ,
376+ ]
377+
378+ @property
379+ def name (self ) -> str :
380+ """Return the short name."""
381+ return "COVID19"
382+
383+ @property
384+ def long_name (self ) -> str :
385+ """Return the human-readable name."""
386+ return "Casos Confirmados de COVID-19"
387+
388+ @property
389+ def description (self ) -> str :
390+ return "Dados anonimizados de casos confirmados de COVID-19."
391+
392+ def formatter (self , filename : str ) -> dict [str , Any ]:
393+ """Parse a COVID-19 filename."""
394+ try :
395+ name = filename .strip ().lower ()
396+ if _skip (name ) or name .endswith (".xlsx" ):
397+ return {"state" : None , "year" : None , "month" : None }
398+
399+ if name .endswith (".csv" ):
400+ return {"state" : None , "year" : None , "month" : None }
401+
402+ return {"state" : None , "year" : None , "month" : None }
403+ except (IndexError , ValueError ):
404+ return {"state" : None , "year" : None , "month" : None }
186405
187406
188407AVAILABLE_DATABASES : list [type [Dataset ]] = [
@@ -192,4 +411,5 @@ def formatter(self, filename: str) -> dict[str, Any]:
192411 SIM ,
193412 SINAN ,
194413 SINASC ,
414+ COVID19 ,
195415]
0 commit comments