From 1111816dfc8571bcd672441f763a4facd427510c Mon Sep 17 00:00:00 2001 From: "Tybulewicz, Tomasz" Date: Wed, 5 Nov 2025 09:40:45 +0100 Subject: [PATCH 1/2] fix(tests): update dataset url in pre-commits test preparation --- tests/precommit/prepare_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/precommit/prepare_data.py b/tests/precommit/prepare_data.py index bfcde9f9..c56ef6a3 100644 --- a/tests/precommit/prepare_data.py +++ b/tests/precommit/prepare_data.py @@ -70,7 +70,7 @@ def prepare_data(data_dir="./data"): from io import BytesIO from zipfile import ZipFile - COCO128_URL = "https://ultralytics.com/assets/coco128.zip" + COCO128_URL = "https://storage.geti.intel.com/geti_predict/test/images/coco128.zip" with urlopen(COCO128_URL) as zipresp, ZipFile(BytesIO(zipresp.read())) as zfile: # noqa: S310 zfile.extractall(data_dir) From 1a364885d099f5b0ebb829a2430e35d6627fd3b0 Mon Sep 17 00:00:00 2001 From: "Tybulewicz, Tomasz" Date: Wed, 5 Nov 2025 12:20:23 +0100 Subject: [PATCH 2/2] Use to fetch test data --- .github/workflows/test_accuracy.yml | 2 +- .github/workflows/test_precommit.yml | 2 +- CONTRIBUTING.md | 4 +- examples/visual_prompting/README.md | 2 +- examples/zsl_visual_prompting/README.md | 2 +- tests/accuracy/download_models.py | 57 +++++++- tests/accuracy/prepare_data.py | 177 ------------------------ tests/precommit/prepare_data.py | 110 --------------- tests/precommit/public_scope.json | 4 + 9 files changed, 65 insertions(+), 295 deletions(-) delete mode 100644 tests/accuracy/prepare_data.py delete mode 100644 tests/precommit/prepare_data.py diff --git a/.github/workflows/test_accuracy.yml b/.github/workflows/test_accuracy.yml index d80c2ce5..beceda5d 100644 --- a/.github/workflows/test_accuracy.yml +++ b/.github/workflows/test_accuracy.yml @@ -25,7 +25,7 @@ jobs: uv sync --locked --extra tests --extra-index-url https://download.pytorch.org/whl/cpu - name: Prepare test data run: | - uv run python tests/accuracy/prepare_data.py -d data + uv run python tests/accuracy/download_models.py -d data -j tests/accuracy/public_scope.json -l - name: Run Python Test run: | uv run pytest --data=./data tests/accuracy/test_accuracy.py diff --git a/.github/workflows/test_precommit.yml b/.github/workflows/test_precommit.yml index 0d7a1d43..b1175617 100644 --- a/.github/workflows/test_precommit.yml +++ b/.github/workflows/test_precommit.yml @@ -27,7 +27,7 @@ jobs: uv sync --locked --extra tests --extra-index-url https://download.pytorch.org/whl/cpu - name: Prepare test data run: | - uv run python tests/precommit/prepare_data.py -d data -p tests/precommit/public_scope.json + uv run python tests/accuracy/download_models.py -d data -j tests/precommit/public_scope.json -l - name: Run test run: | uv run pytest --data=./data tests/functional diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 27a656ca..3b8323d4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -56,9 +56,9 @@ Set up your development environment to start contributing. This involves install ```bash uv run pre-commit run --all-files uv run pytest tests/unit - uv run python tests/precommit/prepare_data.py -d data -p tests/precommit/public_scope.json + uv run python tests/accuracy/download_models.py -d data -j tests/precommit/public_scope.json -l uv run pytest --data=./data tests/functional - uv run python tests/accuracy/prepare_data.py -d data + uv run python tests/accuracy/download_models.py -d data -j tests/accuracy/public_scope.json -l uv run pytest --data=./data tests/accuracy/test_accuracy.py ``` diff --git a/examples/visual_prompting/README.md b/examples/visual_prompting/README.md index 0809bbc3..9eed1bb7 100644 --- a/examples/visual_prompting/README.md +++ b/examples/visual_prompting/README.md @@ -25,7 +25,7 @@ To run the pipeline out-of-the box you can download the test data by running the ```bash pip install httpx -python tests/accuracy/prepare_data.py -d data +python tests/accuracy/download_models.py -d data -j tests/accuracy/public_scope.json -l ``` and then run diff --git a/examples/zsl_visual_prompting/README.md b/examples/zsl_visual_prompting/README.md index b43519a3..9e1af365 100644 --- a/examples/zsl_visual_prompting/README.md +++ b/examples/zsl_visual_prompting/README.md @@ -28,7 +28,7 @@ To run the pipeline out-of-the box you can download the test data by running the ```bash pip install httpx -python tests/accuracy/prepare_data.py -d data +python tests/accuracy/download_models.py -d data -j tests/accuracy/public_scope.json -l ``` and then run diff --git a/tests/accuracy/download_models.py b/tests/accuracy/download_models.py index 7b152847..5ed55eba 100644 --- a/tests/accuracy/download_models.py +++ b/tests/accuracy/download_models.py @@ -1,3 +1,4 @@ +#!#!/usr/bin/env -S uv run --script # # Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -6,12 +7,18 @@ import asyncio import json import time +from io import BytesIO from pathlib import Path +from zipfile import ZipFile import httpx async def stream_file(client, url, filename, semaphore): + if Path(filename).exists(): + print(f"Skipping already downloaded {filename}") + return + async with semaphore: start_time = time.time() total_bytes = 0 @@ -28,6 +35,30 @@ async def stream_file(client, url, filename, semaphore): print(f"Downloaded {url} - {total_bytes:.2f} MB in {download_time:.2f}s ({speed_mbps:.2f} MB/s)") +async def download_single_image(client, url, filename): + image = await client.get(url) + with Path(filename).open("wb") as im: + im.write(image.content) + + +async def download_images(data_dir): + async with httpx.AsyncClient(timeout=20.0) as client: + COCO128_URL = "https://storage.geti.intel.com/geti_predict/test/images/coco128.zip" + archive = await client.get(COCO128_URL, follow_redirects=True) + with ZipFile(BytesIO(archive.content)) as zfile: + zfile.extractall(data_dir) + + image_downloads = [ + ( + "https://storage.geti.intel.com/geti_predict/test/images/BloodImage_00007.jpg", + data_dir / "BloodImage_00007.jpg", + ), + ("https://storage.geti.intel.com/geti_predict/test/images/cards.png", data_dir / "cards.png"), + ] + + await asyncio.gather(*[download_single_image(client, url, filename) for url, filename in image_downloads]) + + async def main(): parser = argparse.ArgumentParser() parser.add_argument( @@ -44,6 +75,12 @@ async def main(): required=True, help="Path to the JSON file with model information", ) + parser.add_argument( + "-l", + "--legacy", + action="store_true", + help="Download models using legacy directory structure (used in public_scope.json", + ) args = parser.parse_args() with args.json_path.open("r") as f: @@ -54,11 +91,25 @@ async def main(): args.data_dir.mkdir(parents=True, exist_ok=True) async with httpx.AsyncClient(timeout=60.0) as client: tasks = [] - for model_entry in models_data: - model_name = model_entry["name"] + + model_names = [] + for model_data in models_data: + model_names.append(model_data["name"]) + if args.legacy and "encoder" in model_data: + model_names.append(model_data["encoder"]) + if args.legacy and "extra_model" in model_data: + model_names.append(model_data["extra_model"]) + + for model_name in model_names: download_url = base_path + model_name + if args.legacy: + if model_name.endswith(".onnx"): + download_url = base_path + model_name.replace(".", "/model.") + else: + download_url = base_path + model_name.replace(".", "/openvino.") save_path = args.data_dir / model_name save_path.parent.mkdir(parents=True, exist_ok=True) + tasks.append(stream_file(client, download_url, save_path, semaphore)) if model_name.endswith(".xml"): @@ -66,6 +117,8 @@ async def main(): stream_file(client, download_url.replace(".xml", ".bin"), save_path.with_suffix(".bin"), semaphore), ) + tasks.append(download_images(args.data_dir)) + print(f"Starting download of {len(tasks)} files with max 10 concurrent downloads...") await asyncio.gather(*tasks) print(f"All {len(tasks)} files downloaded successfully!") diff --git a/tests/accuracy/prepare_data.py b/tests/accuracy/prepare_data.py deleted file mode 100644 index 7cd3d3a7..00000000 --- a/tests/accuracy/prepare_data.py +++ /dev/null @@ -1,177 +0,0 @@ -# -# Copyright (C) 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# -import argparse -import asyncio -from io import BytesIO -from pathlib import Path -from zipfile import ZipFile - -import httpx - - -async def download_single_image(client, url, filename): - image = await client.get(url) - with Path(filename).open("wb") as im: - im.write(image.content) - - -async def download_images(data_dir): - async with httpx.AsyncClient(timeout=20.0) as client: - COCO128_URL = "https://storage.geti.intel.com/geti_predict/test/images/coco128.zip" - archive = await client.get(COCO128_URL, follow_redirects=True) - with ZipFile(BytesIO(archive.content)) as zfile: - zfile.extractall(data_dir) - - image_downloads = [ - ( - "https://storage.geti.intel.com/geti_predict/test/images/BloodImage_00007.jpg", - data_dir / "BloodImage_00007.jpg", - ), - ("https://storage.geti.intel.com/geti_predict/test/images/cards.png", data_dir / "cards.png"), - ] - - await asyncio.gather(*[download_single_image(client, url, filename) for url, filename in image_downloads]) - - -async def stream_file(client, url, filename): - async with client.stream("GET", url) as stream: - with Path(filename).open("wb") as file: - async for data in stream.aiter_bytes(): - file.write(data) - - -async def download_otx_model(client, otx_models_dir, model_name, format="xml"): - if format == "onnx": - await stream_file( - client, - f"https://storage.geti.intel.com/geti_predict/test/otx_models/{model_name}/model.onnx", - f"{otx_models_dir}/{model_name}.onnx", - ) - else: - await asyncio.gather( - stream_file( - client, - f"https://storage.geti.intel.com/geti_predict/test/otx_models/{model_name}/openvino.xml", - f"{otx_models_dir}/{model_name}.xml", - ), - stream_file( - client, - f"https://storage.geti.intel.com/geti_predict/test/otx_models/{model_name}/openvino.bin", - f"{otx_models_dir}/{model_name}.bin", - ), - ) - - -async def download_anomalib_model(client, models_dir, model_name): - await asyncio.gather( - stream_file( - client, - f"https://storage.geti.intel.com/geti_predict/test/anomalib_models/{model_name}/openvino.xml", - f"{models_dir}/{model_name}.xml", - ), - stream_file( - client, - f"https://storage.geti.intel.com/geti_predict/test/anomalib_models/{model_name}/openvino.bin", - f"{models_dir}/{model_name}.bin", - ), - ) - - -async def main(): - parser = argparse.ArgumentParser() - parser.add_argument( - "-d", - "--data_dir", - type=Path, - required=True, - help="Directory to store downloaded models and datasets", - ) - args = parser.parse_args() - - otx_models_dir = args.data_dir / "otx_models" - otx_models_dir.mkdir(parents=True, exist_ok=True) - anomalib_models_dir = args.data_dir / "anomalib_models" - anomalib_models_dir.mkdir(parents=True, exist_ok=True) - async with httpx.AsyncClient(timeout=20.0) as client: - await asyncio.gather( - download_images(args.data_dir), - download_otx_model(client, otx_models_dir, "mlc_mobilenetv3_large_voc"), - download_otx_model(client, otx_models_dir, "mlc_efficient_b0_voc"), - download_otx_model(client, otx_models_dir, "mlc_efficient_v2s_voc"), - download_otx_model(client, otx_models_dir, "det_mobilenetv2_atss_bccd"), - download_otx_model( - client, - otx_models_dir, - "det_mobilenetv2_atss_bccd_onnx", - "onnx", - ), - download_otx_model(client, otx_models_dir, "cls_mobilenetv3_large_cars"), - download_otx_model( - client, - otx_models_dir, - "cls_mobilenetv3_large_cars", - "onnx", - ), - download_otx_model(client, otx_models_dir, "cls_efficient_b0_cars"), - download_otx_model(client, otx_models_dir, "cls_efficient_v2s_cars"), - download_otx_model(client, otx_models_dir, "tinynet_imagenet"), - download_otx_model(client, otx_models_dir, "Lite-hrnet-18"), - download_otx_model(client, otx_models_dir, "Lite-hrnet-18_mod2"), - download_otx_model(client, otx_models_dir, "Lite-hrnet-s_mod2"), - download_otx_model(client, otx_models_dir, "Lite-hrnet-s_mod2", "onnx"), - download_otx_model(client, otx_models_dir, "Lite-hrnet-x-mod3"), - download_otx_model( - client, - otx_models_dir, - "is_efficientnetb2b_maskrcnn_coco_reduced", - ), - download_otx_model( - client, - otx_models_dir, - "is_efficientnetb2b_maskrcnn_coco_reduced_onnx", - "onnx", - ), - download_otx_model( - client, - otx_models_dir, - "is_resnet50_maskrcnn_coco_reduced", - ), - download_otx_model(client, otx_models_dir, "mobilenet_v3_large_hc_cf"), - download_otx_model( - client, - otx_models_dir, - "classification_model_with_xai_head", - ), - download_otx_model(client, otx_models_dir, "detection_model_with_xai_head"), - download_otx_model( - client, - otx_models_dir, - "segmentation_model_with_xai_head", - ), - download_otx_model(client, otx_models_dir, "maskrcnn_model_with_xai_head"), - download_otx_model(client, otx_models_dir, "maskrcnn_xai_tiling"), - download_otx_model(client, otx_models_dir, "tile_classifier"), - download_otx_model(client, otx_models_dir, "anomaly_padim_bottle_mvtec"), - download_otx_model(client, otx_models_dir, "anomaly_stfpm_bottle_mvtec"), - download_otx_model(client, otx_models_dir, "deit-tiny"), - download_otx_model( - client, - otx_models_dir, - "cls_efficient_b0_shuffled_outputs", - ), - download_otx_model(client, otx_models_dir, "action_cls_xd3_kinetic"), - download_otx_model(client, otx_models_dir, "sam_vit_b_zsl_encoder"), - download_otx_model(client, otx_models_dir, "sam_vit_b_zsl_decoder"), - download_otx_model(client, otx_models_dir, "rtmpose_tiny"), - download_otx_model(client, otx_models_dir, "segnext_t_tiling"), - download_otx_model(client, otx_models_dir, "ssd-card-detection"), - download_anomalib_model(client, anomalib_models_dir, "padim"), - download_anomalib_model(client, anomalib_models_dir, "stfpm"), - download_anomalib_model(client, anomalib_models_dir, "uflow"), - ) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/tests/precommit/prepare_data.py b/tests/precommit/prepare_data.py deleted file mode 100644 index c56ef6a3..00000000 --- a/tests/precommit/prepare_data.py +++ /dev/null @@ -1,110 +0,0 @@ -# -# Copyright (C) 2020-2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# -import argparse -import json -import os -from pathlib import Path -from urllib.request import urlopen, urlretrieve - - -def retrieve_otx_model(data_dir, model_name, format="xml"): - destination_folder = Path(data_dir) / "otx_models" - destination_folder.mkdir(parents=True, exist_ok=True) - if format == "onnx": - urlretrieve( - f"https://storage.geti.intel.com/geti_predict/test/otx_models/{model_name}/model.onnx", - destination_folder / f"{model_name}.onnx", - ) - else: - urlretrieve( - f"https://storage.geti.intel.com/geti_predict/test/otx_models/{model_name}/openvino.xml", - destination_folder / f"{model_name}.xml", - ) - urlretrieve( - f"https://storage.geti.intel.com/geti_predict/test/otx_models/{model_name}/openvino.bin", - f"{destination_folder}/{model_name}.bin", - ) - - -def retrieve_anomalib_model(data_dir, model_name, format="xml"): - destination_folder = Path(data_dir) / "anomalib_models" - destination_folder.mkdir(parents=True, exist_ok=True) - urlretrieve( - f"https://storage.geti.intel.com/geti_predict/test/anomalib_models/{model_name}/openvino.xml", - destination_folder / f"{model_name}.xml", - ) - urlretrieve( - f"https://storage.geti.intel.com/geti_predict/test/anomalib_models/{model_name}/openvino.bin", - f"{destination_folder}/{model_name}.bin", - ) - - -def prepare_model( - data_dir="./data", - public_scope=Path(__file__).resolve().parent / "public_scope.json", -): - # TODO refactor this test so that it does not use eval - # flake8: noqa: F401 - from model_api.models import AnomalyDetection, ClassificationModel, DetectionModel, SegmentationModel - - # Mapping of model type strings to actual classes for security - MODEL_TYPE_MAPPING = { - "AnomalyDetection": AnomalyDetection, - "ClassificationModel": ClassificationModel, - "DetectionModel": DetectionModel, - "SegmentationModel": SegmentationModel, - } - - with Path(public_scope).open("r") as f: - public_scope = json.load(f) - - for model in public_scope: - if model["name"].endswith(".xml") or model["name"].endswith(".onnx"): - continue - model = MODEL_TYPE_MAPPING[model["type"]].create_model(model["name"], download_dir=data_dir) - - -def prepare_data(data_dir="./data"): - from io import BytesIO - from zipfile import ZipFile - - COCO128_URL = "https://storage.geti.intel.com/geti_predict/test/images/coco128.zip" - - with urlopen(COCO128_URL) as zipresp, ZipFile(BytesIO(zipresp.read())) as zfile: # noqa: S310 - zfile.extractall(data_dir) - - urlretrieve( - "https://raw.githubusercontent.com/Shenggan/BCCD_Dataset/master/BCCD/JPEGImages/BloodImage_00007.jpg", - Path(data_dir) / "BloodImage_00007.jpg", - ) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Data and model preparate script") - parser.add_argument( - "-d", - dest="data_dir", - default="./data", - help="Directory to store downloaded models and datasets", - ) - parser.add_argument( - "-p", - dest="public_scope", - default=Path(__file__).resolve().parent / "public_scope.json", - help="JSON file with public model description", - ) - - args = parser.parse_args() - - prepare_model(args.data_dir, args.public_scope) - prepare_data(args.data_dir) - retrieve_otx_model(args.data_dir, "mlc_mobilenetv3_large_voc") - retrieve_otx_model(args.data_dir, "detection_model_with_xai_head") - retrieve_otx_model(args.data_dir, "Lite-hrnet-18_mod2") - retrieve_otx_model(args.data_dir, "tinynet_imagenet") - retrieve_otx_model(args.data_dir, "cls_mobilenetv3_large_cars", "onnx") - retrieve_anomalib_model(args.data_dir, "padim") - retrieve_anomalib_model(args.data_dir, "stfpm") - retrieve_anomalib_model(args.data_dir, "uflow") diff --git a/tests/precommit/public_scope.json b/tests/precommit/public_scope.json index 76cfb6e8..2c15784b 100644 --- a/tests/precommit/public_scope.json +++ b/tests/precommit/public_scope.json @@ -11,6 +11,10 @@ "name": "otx_models/Lite-hrnet-18_mod2.xml", "type": "SegmentationModel" }, + { + "name": "otx_models/cls_mobilenetv3_large_cars.onnx", + "type": "ClassificationModel" + }, { "name": "otx_models/tinynet_imagenet.xml", "type": "ClassificationModel"