1+ #!#!/usr/bin/env -S uv run --script
12#
23# Copyright (C) 2025 Intel Corporation
34# SPDX-License-Identifier: Apache-2.0
67import asyncio
78import json
89import time
10+ from io import BytesIO
911from pathlib import Path
12+ from zipfile import ZipFile
1013
1114import httpx
1215
1316
1417async def stream_file (client , url , filename , semaphore ):
18+ if Path (filename ).exists ():
19+ print (f"Skipping already downloaded { filename } " )
20+ return
21+
1522 async with semaphore :
1623 start_time = time .time ()
1724 total_bytes = 0
@@ -28,6 +35,30 @@ async def stream_file(client, url, filename, semaphore):
2835 print (f"Downloaded { url } - { total_bytes :.2f} MB in { download_time :.2f} s ({ speed_mbps :.2f} MB/s)" )
2936
3037
38+ async def download_single_image (client , url , filename ):
39+ image = await client .get (url )
40+ with Path (filename ).open ("wb" ) as im :
41+ im .write (image .content )
42+
43+
44+ async def download_images (data_dir ):
45+ async with httpx .AsyncClient (timeout = 20.0 ) as client :
46+ COCO128_URL = "https://storage.geti.intel.com/geti_predict/test/images/coco128.zip"
47+ archive = await client .get (COCO128_URL , follow_redirects = True )
48+ with ZipFile (BytesIO (archive .content )) as zfile :
49+ zfile .extractall (data_dir )
50+
51+ image_downloads = [
52+ (
53+ "https://storage.geti.intel.com/geti_predict/test/images/BloodImage_00007.jpg" ,
54+ data_dir / "BloodImage_00007.jpg" ,
55+ ),
56+ ("https://storage.geti.intel.com/geti_predict/test/images/cards.png" , data_dir / "cards.png" ),
57+ ]
58+
59+ await asyncio .gather (* [download_single_image (client , url , filename ) for url , filename in image_downloads ])
60+
61+
3162async def main ():
3263 parser = argparse .ArgumentParser ()
3364 parser .add_argument (
@@ -44,6 +75,12 @@ async def main():
4475 required = True ,
4576 help = "Path to the JSON file with model information" ,
4677 )
78+ parser .add_argument (
79+ "-l" ,
80+ "--legacy" ,
81+ action = "store_true" ,
82+ help = "Download models using legacy directory structure (used in public_scope.json" ,
83+ )
4784 args = parser .parse_args ()
4885
4986 with args .json_path .open ("r" ) as f :
@@ -54,18 +91,34 @@ async def main():
5491 args .data_dir .mkdir (parents = True , exist_ok = True )
5592 async with httpx .AsyncClient (timeout = 60.0 ) as client :
5693 tasks = []
57- for model_entry in models_data :
58- model_name = model_entry ["name" ]
94+
95+ model_names = []
96+ for model_data in models_data :
97+ model_names .append (model_data ["name" ])
98+ if args .legacy and "encoder" in model_data :
99+ model_names .append (model_data ["encoder" ])
100+ if args .legacy and "extra_model" in model_data :
101+ model_names .append (model_data ["extra_model" ])
102+
103+ for model_name in model_names :
59104 download_url = base_path + model_name
105+ if args .legacy :
106+ if model_name .endswith (".onnx" ):
107+ download_url = base_path + model_name .replace ("." , "/model." )
108+ else :
109+ download_url = base_path + model_name .replace ("." , "/openvino." )
60110 save_path = args .data_dir / model_name
61111 save_path .parent .mkdir (parents = True , exist_ok = True )
112+
62113 tasks .append (stream_file (client , download_url , save_path , semaphore ))
63114
64115 if model_name .endswith (".xml" ):
65116 tasks .append (
66117 stream_file (client , download_url .replace (".xml" , ".bin" ), save_path .with_suffix (".bin" ), semaphore ),
67118 )
68119
120+ tasks .append (download_images (args .data_dir ))
121+
69122 print (f"Starting download of { len (tasks )} files with max 10 concurrent downloads..." )
70123 await asyncio .gather (* tasks )
71124 print (f"All { len (tasks )} files downloaded successfully!" )
0 commit comments