Skip to content

Commit 8b63729

Browse files
committed
chore: fix tests & implement a semaphore on async downloads that was causing a throttle with too many files downloading at the same time
1 parent 7c9bfbb commit 8b63729

23 files changed

Lines changed: 811 additions & 1058 deletions

.github/workflows/python-package.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,10 @@ jobs:
7171
- uses: actions/checkout@v4
7272

7373
- name: Build Docker image
74-
run: docker compose -f docker/docker-compose.yaml build
74+
run: docker compose build
7575

7676
- name: Start container
77-
run: docker compose -f docker/docker-compose.yaml up -d
77+
run: docker compose up -d
7878

7979
- name: Wait for Jupyter
8080
run: |
@@ -84,8 +84,8 @@ jobs:
8484
done
8585
8686
- name: Run tests inside container
87-
run: docker compose -f docker/docker-compose.yaml exec -T -w /usr/src jupyter python3 -m pytest -vv pysus/tests/ --retries 3 --retry-delay 15 -x -o cache_dir=/tmp/.pytest_cache
87+
run: docker compose exec -T -w /usr/src jupyter python3 -m pytest -vv pysus/tests/ --retries 3 --retry-delay 15 -x -o cache_dir=/tmp/.pytest_cache
8888

8989
- name: Cleanup
9090
if: always()
91-
run: docker compose -f docker/docker-compose.yaml down -v
91+
run: docker compose down -v

.github/workflows/release.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,11 @@ jobs:
9191
with:
9292
python-version: "3.12"
9393

94+
- name: Install system dependencies
95+
run: |
96+
sudo apt-get update
97+
sudo apt-get install -y pandoc
98+
9499
- name: Install dependencies
95100
run: |
96101
pip install poetry wget

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ export PRINT_HELP_PYSCRIPT
2222
help:
2323
@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
2424

25-
DOCKER = docker compose -p pysus -f docker/docker-compose.yaml
25+
DOCKER = docker compose -p pysus
2626
SERVICE :=
2727
SEMANTIC_RELEASE = npx --yes \
2828
-p semantic-release \
@@ -56,7 +56,7 @@ test-pysus: ## run tests quickly with the default Python
5656

5757
.PHONY: test-pysus-with-coverage
5858
test-pysus-with-coverage: ## run tests with coverage report
59-
poetry run pytest -vv pysus/tests/ --retries 3 --retry-delay 15 --cov=pysus --cov-report=xml:coverage.xml --cov-report=term-missing
59+
poetry run pytest -vv pysus/tests/ --cov=pysus --cov-report=xml:coverage.xml --cov-report=term-missing
6060

6161
.PHONY: lint
6262
lint:

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,15 @@ docker run -p 8888:8888 alertadengue/pysus
3737
Or build locally and start the container:
3838

3939
```bash
40-
docker compose -f docker/docker-compose.yaml up --build
40+
docker compose up --build
4141
```
4242

4343
Then open [http://127.0.0.1:8888/lab](http://127.0.0.1:8888/lab) in your browser.
4444

4545
Stop the container:
4646

4747
```bash
48-
docker compose -f docker/docker-compose.yaml down
48+
docker compose down
4949
```
5050

5151
## Quick Start
@@ -268,7 +268,7 @@ pytest tests/
268268
Run tests inside the Docker container:
269269

270270
```bash
271-
docker compose -f docker/docker-compose.yaml exec -T -w /usr/src jupyter python3 -m pytest pysus/tests/
271+
docker compose exec -T -w /usr/src jupyter python3 -m pytest pysus/tests/
272272
```
273273

274274
## License

docs/source/conf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@
2121
}
2222

2323
templates_path = ["_templates"]
24+
25+
# Explicitly map extensions to ensure notebooks are routed to nbsphinx
2426
source_suffix = ".rst"
27+
2528
master_doc = "index"
2629

2730
project = "PySUS"

docs/source/databases/getting_started_pysus.ipynb

Lines changed: 382 additions & 370 deletions
Large diffs are not rendered by default.

docs/source/installation.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,15 @@ Or build locally and start the container:
3434

3535
.. code-block:: bash
3636
37-
docker compose -f docker/docker-compose.yaml up --build
37+
docker compose up --build
3838
3939
Then open http://127.0.0.1:8888/lab in your browser.
4040

4141
Stop the container with:
4242

4343
.. code-block:: bash
4444
45-
docker compose -f docker/docker-compose.yaml down
45+
docker compose down
4646
4747
Development
4848
-----------

pysus/api/_impl/databases.py

Lines changed: 34 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88
"""
99

1010
import asyncio
11-
from typing import Literal
11+
from typing import Literal, cast
1212

1313
import pandas as pd
1414
from pysus.api import types
1515
from pysus.api.client import PySUS
16-
from tqdm import tqdm
16+
from tqdm.asyncio import tqdm
1717

1818
__all__ = [
1919
"sinan",
@@ -57,10 +57,9 @@ def _fetch_data(
5757
month : int | list[int], optional
5858
Month or list of months to fetch.
5959
show_progress : bool, optional
60-
Whether to display a tqdm progress bar during download. Default is True.
60+
Whether to display a tqdm progress bar during download.
6161
as_dataframe : bool, optional
6262
Whether to concatenate and return the data as a pandas DataFrame.
63-
Default is False.
6463
**kwargs
6564
Additional arguments forwarded to :meth:`PySUS.read_parquet`.
6665
@@ -71,48 +70,41 @@ def _fetch_data(
7170
as_dataframe is True, returns a concatenated DataFrame.
7271
"""
7372

74-
async def _fetch():
75-
73+
async def _fetch() -> list[str] | pd.DataFrame:
7674
async with PySUS() as pysus:
77-
years = [year] if isinstance(year, int) else (year or [None])
78-
months = [month] if isinstance(month, int) else (month or [None])
75+
files = await pysus.query(
76+
dataset=dataset,
77+
group=group,
78+
state=state,
79+
year=year,
80+
month=month,
81+
)
7982

80-
files = []
81-
for y in years:
82-
for m in months:
83-
files.extend(
84-
await pysus.query(
85-
dataset=dataset,
86-
group=group,
87-
state=state,
88-
year=y,
89-
month=m,
90-
)
91-
)
83+
if not files:
84+
return pd.DataFrame() if as_dataframe else cast(list[str], [])
85+
86+
sem = asyncio.Semaphore(3)
87+
88+
async def _throttled_download(f):
89+
async with sem:
90+
return await pysus.download(f)
91+
92+
tasks = [_throttled_download(f) for f in files]
9293

93-
paths = []
9494
if show_progress:
95-
for file in tqdm(
96-
files,
95+
downloaded_files = await tqdm.gather(
96+
*tasks,
9797
desc=f"Downloading {dataset}",
9898
unit="file",
99-
):
100-
f = await pysus.download(file)
101-
paths.append(str(f.path))
99+
)
102100
else:
103-
for file in files:
104-
f = await pysus.download(file)
105-
paths.append(str(f.path))
101+
downloaded_files = await asyncio.gather(*tasks)
102+
103+
paths: list[str] = [str(f.path) for f in downloaded_files]
106104

107105
if as_dataframe:
108-
return (
109-
pysus.read_parquet(
110-
paths,
111-
**kwargs,
112-
).df()
113-
if paths
114-
else pd.DataFrame()
115-
)
106+
res = pysus.read_parquet(paths, **kwargs).df()
107+
return cast(pd.DataFrame, res)
116108

117109
return paths
118110

@@ -132,9 +124,11 @@ async def _fetch():
132124
"Install it with: pip install nest_asyncio"
133125
)
134126
raise RuntimeError(msg) from None
135-
return loop.run_until_complete(_fetch())
136-
else:
137-
return asyncio.run(_fetch())
127+
result = loop.run_until_complete(_fetch())
128+
return cast(list[str] | pd.DataFrame, result)
129+
130+
result = asyncio.run(_fetch())
131+
return cast(list[str] | pd.DataFrame, result)
138132

139133

140134
def sinan(

pysus/api/client.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,11 @@ async def download(
314314
)
315315
return await ExtensionFactory.instantiate(local_path)
316316

317-
except Exception as e: # noqa: B902
317+
except Exception as e: # noqa
318+
import traceback
319+
320+
traceback.print_exc()
321+
318322
await self._update_state(
319323
local_path,
320324
str(remote_path),

pysus/api/ducklake/catalog/adapters.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ def __init__(self, name: str, dataset_id: int, engine=None, **data) -> None:
239239
super().__init__(engine=engine, **data)
240240
self.dataset_name: str = name
241241
self.db_local: Path = self.cache_dir / f"catalog_{name}.duckdb"
242-
self.db_remote: Path = Path(f"datasets/catalog_{name}.duckdb")
242+
self.db_remote: Path = Path(f"public/catalog_{name}.duckdb")
243243
self.dataset_id = dataset_id
244244

245245

0 commit comments

Comments
 (0)