Skip to content

Commit 9730011

Browse files
authored
chore(tests): use download_models.py in tests data preparation (#432)
* fix(tests): update dataset url in pre-commits test preparation * Use to fetch test data
1 parent da1fcea commit 9730011

9 files changed

Lines changed: 65 additions & 295 deletions

File tree

.github/workflows/test_accuracy.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
uv sync --locked --extra tests --extra-index-url https://download.pytorch.org/whl/cpu
2626
- name: Prepare test data
2727
run: |
28-
uv run python tests/accuracy/prepare_data.py -d data
28+
uv run python tests/accuracy/download_models.py -d data -j tests/accuracy/public_scope.json -l
2929
- name: Run Python Test
3030
run: |
3131
uv run pytest --data=./data tests/accuracy/test_accuracy.py

.github/workflows/test_precommit.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
uv sync --locked --extra tests --extra-index-url https://download.pytorch.org/whl/cpu
2828
- name: Prepare test data
2929
run: |
30-
uv run python tests/precommit/prepare_data.py -d data -p tests/precommit/public_scope.json
30+
uv run python tests/accuracy/download_models.py -d data -j tests/precommit/public_scope.json -l
3131
- name: Run test
3232
run: |
3333
uv run pytest --data=./data tests/functional

CONTRIBUTING.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,9 @@ Set up your development environment to start contributing. This involves install
5656
```bash
5757
uv run pre-commit run --all-files
5858
uv run pytest tests/unit
59-
uv run python tests/precommit/prepare_data.py -d data -p tests/precommit/public_scope.json
59+
uv run python tests/accuracy/download_models.py -d data -j tests/precommit/public_scope.json -l
6060
uv run pytest --data=./data tests/functional
61-
uv run python tests/accuracy/prepare_data.py -d data
61+
uv run python tests/accuracy/download_models.py -d data -j tests/accuracy/public_scope.json -l
6262
uv run pytest --data=./data tests/accuracy/test_accuracy.py
6363
```
6464

examples/visual_prompting/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ To run the pipeline out-of-the box you can download the test data by running the
2525

2626
```bash
2727
pip install httpx
28-
python tests/accuracy/prepare_data.py -d data
28+
python tests/accuracy/download_models.py -d data -j tests/accuracy/public_scope.json -l
2929
```
3030

3131
and then run

examples/zsl_visual_prompting/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ To run the pipeline out-of-the box you can download the test data by running the
2828

2929
```bash
3030
pip install httpx
31-
python tests/accuracy/prepare_data.py -d data
31+
python tests/accuracy/download_models.py -d data -j tests/accuracy/public_scope.json -l
3232
```
3333

3434
and then run

tests/accuracy/download_models.py

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#!#!/usr/bin/env -S uv run --script
12
#
23
# Copyright (C) 2025 Intel Corporation
34
# SPDX-License-Identifier: Apache-2.0
@@ -6,12 +7,18 @@
67
import asyncio
78
import json
89
import time
10+
from io import BytesIO
911
from pathlib import Path
12+
from zipfile import ZipFile
1013

1114
import httpx
1215

1316

1417
async def stream_file(client, url, filename, semaphore):
18+
if Path(filename).exists():
19+
print(f"Skipping already downloaded {filename}")
20+
return
21+
1522
async with semaphore:
1623
start_time = time.time()
1724
total_bytes = 0
@@ -28,6 +35,30 @@ async def stream_file(client, url, filename, semaphore):
2835
print(f"Downloaded {url} - {total_bytes:.2f} MB in {download_time:.2f}s ({speed_mbps:.2f} MB/s)")
2936

3037

38+
async def download_single_image(client, url, filename):
39+
image = await client.get(url)
40+
with Path(filename).open("wb") as im:
41+
im.write(image.content)
42+
43+
44+
async def download_images(data_dir):
45+
async with httpx.AsyncClient(timeout=20.0) as client:
46+
COCO128_URL = "https://storage.geti.intel.com/geti_predict/test/images/coco128.zip"
47+
archive = await client.get(COCO128_URL, follow_redirects=True)
48+
with ZipFile(BytesIO(archive.content)) as zfile:
49+
zfile.extractall(data_dir)
50+
51+
image_downloads = [
52+
(
53+
"https://storage.geti.intel.com/geti_predict/test/images/BloodImage_00007.jpg",
54+
data_dir / "BloodImage_00007.jpg",
55+
),
56+
("https://storage.geti.intel.com/geti_predict/test/images/cards.png", data_dir / "cards.png"),
57+
]
58+
59+
await asyncio.gather(*[download_single_image(client, url, filename) for url, filename in image_downloads])
60+
61+
3162
async def main():
3263
parser = argparse.ArgumentParser()
3364
parser.add_argument(
@@ -44,6 +75,12 @@ async def main():
4475
required=True,
4576
help="Path to the JSON file with model information",
4677
)
78+
parser.add_argument(
79+
"-l",
80+
"--legacy",
81+
action="store_true",
82+
help="Download models using legacy directory structure (used in public_scope.json",
83+
)
4784
args = parser.parse_args()
4885

4986
with args.json_path.open("r") as f:
@@ -54,18 +91,34 @@ async def main():
5491
args.data_dir.mkdir(parents=True, exist_ok=True)
5592
async with httpx.AsyncClient(timeout=60.0) as client:
5693
tasks = []
57-
for model_entry in models_data:
58-
model_name = model_entry["name"]
94+
95+
model_names = []
96+
for model_data in models_data:
97+
model_names.append(model_data["name"])
98+
if args.legacy and "encoder" in model_data:
99+
model_names.append(model_data["encoder"])
100+
if args.legacy and "extra_model" in model_data:
101+
model_names.append(model_data["extra_model"])
102+
103+
for model_name in model_names:
59104
download_url = base_path + model_name
105+
if args.legacy:
106+
if model_name.endswith(".onnx"):
107+
download_url = base_path + model_name.replace(".", "/model.")
108+
else:
109+
download_url = base_path + model_name.replace(".", "/openvino.")
60110
save_path = args.data_dir / model_name
61111
save_path.parent.mkdir(parents=True, exist_ok=True)
112+
62113
tasks.append(stream_file(client, download_url, save_path, semaphore))
63114

64115
if model_name.endswith(".xml"):
65116
tasks.append(
66117
stream_file(client, download_url.replace(".xml", ".bin"), save_path.with_suffix(".bin"), semaphore),
67118
)
68119

120+
tasks.append(download_images(args.data_dir))
121+
69122
print(f"Starting download of {len(tasks)} files with max 10 concurrent downloads...")
70123
await asyncio.gather(*tasks)
71124
print(f"All {len(tasks)} files downloaded successfully!")

tests/accuracy/prepare_data.py

Lines changed: 0 additions & 177 deletions
This file was deleted.

0 commit comments

Comments
 (0)