Skip to content

Commit 001af1b

Browse files
committed
update tqdm
1 parent cd723c5 commit 001af1b

15 files changed

Lines changed: 403 additions & 245 deletions

File tree

.github/workflows/python-package.yml

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,21 @@ jobs:
3131
auto-update-conda: true
3232
conda-solver: libmamba
3333

34-
- name: Run jupyterlab with PySUS
35-
run: |
36-
make run-jupyter-pysus
34+
# - name: Run jupyterlab with PySUS
35+
# run: |
36+
# make run-jupyter-pysus
3737
# make test-jupyter-pysus ## takes too long
3838

3939
- name: Linting & Tests
4040
run: |
41-
export CI=1
42-
poetry install
43-
pre-commit run --all-files
41+
pip install poetry poetry-plugin-export
42+
43+
poetry config virtualenvs.create false
44+
45+
poetry export --with dev --format requirements.txt --output reqs.txt --without-hashes
46+
pip install -r reqs.txt
47+
pip install -e .
48+
49+
# pre-commit run --all-files
50+
4451
make test-pysus

conda/dev.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,4 @@ dependencies:
77
- python>=3.10,<3.14
88
- jupyter
99
- make
10-
- poetry
1110
- pip

poetry.lock

Lines changed: 7 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ python-dateutil = "2.8.2"
2020
fastparquet = ">=2023.10.1,<=2024.11.0"
2121
pyarrow = ">=11.0.0"
2222
numpy = ">1,<3"
23-
tqdm = "4.64.0"
23+
tqdm = ">=4.67.0"
2424
wget = "^3.2"
2525
loguru = "^0.6.0"
2626
Unidecode = "^1.3.6"

pysus/api/dadosgov/client.py

Lines changed: 121 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,134 @@
1-
from typing import List, Optional
1+
from __future__ import annotations
22

3-
import requests
4-
from pydantic import TypeAdapter
3+
import pathlib
4+
from typing import Callable, Dict, List, Optional
5+
6+
import httpx
7+
from pydantic import PrivateAttr
58
from pysus import __version__
6-
from pysus.api.dadosgov.models import Dataset, DatasetSummary
7-
from pysus.api.models import BaseRemoteClient
9+
from pysus.api.models import BaseRemoteClient, BaseRemoteDataset, BaseRemoteFile
10+
11+
from .models import Dataset
812

913

1014
class DadosGov(BaseRemoteClient):
11-
def __init__(self, token: str):
12-
self.base_url = "https://dados.gov.br/dados/api"
13-
self.session = requests.Session()
14-
self.session.headers.update(
15-
{
16-
"Accept": "application/json",
17-
"User-Agent": f"PySUS/{__version__}",
18-
"chave-api-dados-abertos": token,
19-
}
15+
base_url: str = "https://dados.gov.br/dados/api"
16+
17+
_token: Optional[str] = PrivateAttr(default=None)
18+
_client: Optional[httpx.AsyncClient] = PrivateAttr(default=None)
19+
20+
def __init__(self, **data):
21+
super().__init__(**data)
22+
23+
@property
24+
def name(self) -> str:
25+
return "DadosGov"
26+
27+
@property
28+
def long_name(self) -> str:
29+
return "Portal Brasileiro de Dados Abertos"
30+
31+
@property
32+
def description(self) -> str:
33+
return "Interface de acesso ao API do Portal de Dados Abertos"
34+
35+
async def connect(self, token: Optional[str] = None) -> None:
36+
_token = token or self._token
37+
38+
if not _token:
39+
raise ValueError(
40+
"A token is required to connect to DadosGov. "
41+
"Pass it to connect(token=...) or login(token=...)."
42+
)
43+
44+
self._token = _token
45+
46+
if self._client:
47+
await self.close()
48+
49+
headers = {
50+
"Accept": "application/json",
51+
"User-Agent": f"PySUS/{__version__}",
52+
"chave-api-dados-abertos": self._token,
53+
}
54+
55+
self._client = httpx.AsyncClient(
56+
base_url=self.base_url,
57+
headers=headers,
58+
timeout=60.0,
59+
follow_redirects=True,
2060
)
2161

22-
def _get(self, endpoint: str, params: Optional[dict] = None):
23-
url = f"{self.base_url}/{endpoint.lstrip('/')}"
24-
response = self.session.get(url, params=params)
25-
response.raise_for_status()
26-
return response.json()
62+
async def login(self, token: Optional[str] = None, **kwargs) -> None:
63+
await self.connect(token=token)
64+
65+
async def close(self) -> None:
66+
if self._client:
67+
await self._client.aclose()
68+
self._client = None
69+
70+
async def datasets(self, **kwargs) -> List[Dataset]:
71+
from .databases import AVAILABLE_DATABASES
72+
73+
return [db_class(client=self) for db_class in AVAILABLE_DATABASES]
74+
75+
async def list_datasets(self, **kwargs) -> List[Dataset]:
76+
if self._client is None:
77+
raise ConnectionError(
78+
"Client not connected. Call login(token=...) first.",
79+
)
2780

28-
def list_datasets(
29-
self,
30-
pagina: int = 1,
31-
nome_conjunto: Optional[str] = None,
32-
dados_abertos: Optional[bool] = None,
33-
is_privado: bool = False,
34-
id_organizacao: Optional[str] = None,
35-
) -> List[DatasetSummary]:
3681
params = {
37-
"pagina": pagina,
38-
"nomeConjuntoDados": nome_conjunto,
39-
"dadosAbertos": dados_abertos,
40-
"isPrivado": is_privado,
41-
"idOrganizacao": id_organizacao,
82+
"pagina": kwargs.get("pagina", 1),
83+
"nomeConjuntoDados": kwargs.get("nome_conjunto"),
84+
"dadosAbertos": kwargs.get("dados_abertos"),
85+
"isPrivado": kwargs.get("is_privado", False),
86+
"idOrganizacao": kwargs.get("id_organizacao"),
4287
}
43-
4488
params = {k: v for k, v in params.items() if v is not None}
4589

46-
data = self._get("/publico/conjuntos-dados", params=params)
47-
adapter = TypeAdapter(List[DatasetSummary])
48-
return adapter.validate_python(data)
90+
response = await self._client.get(
91+
"publico/conjuntos-dados",
92+
params=params,
93+
)
94+
response.raise_for_status()
95+
96+
data = response.json()
97+
return [Dataset(**item, client=self) for item in data]
98+
99+
async def get_dataset(
100+
self, id: str, group_definitions: Optional[Dict[str, str]] = None
101+
) -> Dataset:
102+
if self._client is None:
103+
raise ConnectionError(
104+
"Client not connected. Call login(token=...) first.",
105+
)
106+
107+
response = await self._client.get(f"publico/conjuntos-dados/{id}")
108+
response.raise_for_status()
109+
110+
return Dataset(
111+
**response.json(),
112+
client=self,
113+
group_definitions=group_definitions or {},
114+
)
115+
116+
async def _download_file(
117+
self,
118+
file: BaseRemoteFile,
119+
output: pathlib.Path,
120+
callback: Optional[Callable[[int], None]] = None,
121+
) -> pathlib.Path:
122+
if self._client is None:
123+
raise ConnectionError(
124+
"Client not connected. Call login(token=...) first.",
125+
)
49126

50-
def get_dataset(self, id: str) -> Dataset:
51-
data = self._get(f"/publico/conjuntos-dados/{id}")
52-
return Dataset.model_validate(data)
127+
async with self._client.stream("GET", str(file.path)) as response:
128+
response.raise_for_status()
129+
with open(output, "wb") as f:
130+
async for chunk in response.aiter_bytes():
131+
f.write(chunk)
132+
if callback:
133+
callback(len(chunk))
134+
return output

0 commit comments

Comments
 (0)