Skip to content

Commit acfbcea

Browse files
authored
Merge pull request #23 from BioContainers/optimze_quay
use specific tag endpoint, reduce the load on quay.io site
2 parents ab5cfc4 + ae943ec commit acfbcea

1 file changed

Lines changed: 66 additions & 30 deletions

File tree

populate_build.py

Lines changed: 66 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222

2323

2424
logger = logging.getLogger()
25+
QUAY_REGISTRY_TAG_PAGE_SIZE = 100
26+
QUAY_TAG_FETCH_CONCURRENCY = 20
27+
QUAY_TAG_FETCH_PER_SECOND = 20
2528

2629

2730
class RepositoryKind(str, Enum):
@@ -53,16 +56,11 @@ class RepositoryListResponse(pydantic.BaseModel):
5356
next_page: Optional[str] = None
5457

5558

56-
class RepositoryTag(pydantic.BaseModel):
57-
"""Define the repository tag data of interest."""
59+
class TagListResponse(pydantic.BaseModel):
60+
"""Define the repository tag list data of interest."""
5861

5962
name: str
60-
61-
62-
class SingleRepositoryResponse(Repository):
63-
"""Define the single repository data of interest."""
64-
65-
tags: Dict[str, RepositoryTag]
63+
tags: List[str]
6664

6765

6866
class ContainerImageParser(HTMLParser):
@@ -111,14 +109,33 @@ async def fetch_all(
111109
params["namespace"] = repository
112110
async with httpx.AsyncClient(
113111
base_url=api_url, headers=headers, timeout=httpx.Timeout(12)
114-
) as client:
115-
names = await cls._fetch_names(client=client, params=params)
112+
) as api_client, httpx.AsyncClient(
113+
base_url=cls._registry_url(api_url),
114+
headers=headers,
115+
timeout=httpx.Timeout(12),
116+
) as registry_client:
117+
names = await cls._fetch_names(client=api_client, params=params)
116118
images = await cls._fetch_tags(
117-
client=client, repository=repository, names=names
119+
client=registry_client, repository=repository, names=names
118120
)
119121
log_images(log_file=log_file, images=images)
120122
return images
121123

124+
@staticmethod
125+
def _registry_url(api_url: str) -> str:
126+
"""Derive the registry API root from the Quay API root."""
127+
url = httpx.URL(api_url)
128+
api_path = url.path
129+
if api_path.endswith("/api/v1/"):
130+
registry_prefix = api_path[:-len("/api/v1/")]
131+
registry_path = f"{registry_prefix}/v2/"
132+
elif api_path.endswith("/api/v1"):
133+
registry_prefix = api_path[:-len("/api/v1")]
134+
registry_path = f"{registry_prefix}/v2/"
135+
else:
136+
registry_path = "/v2/"
137+
return str(url.copy_with(path=registry_path, query=None, fragment=None))
138+
122139
@classmethod
123140
async def _fetch_names(
124141
cls, client: httpx.AsyncClient, params: Dict[str, str]
@@ -171,8 +188,8 @@ async def _fetch_tags(
171188
client: httpx.AsyncClient,
172189
repository: str,
173190
names: List[str],
174-
max_concurrency: int = 10,
175-
max_per_second: int = 10,
191+
max_concurrency: int = QUAY_TAG_FETCH_CONCURRENCY,
192+
max_per_second: int = QUAY_TAG_FETCH_PER_SECOND,
176193
) -> List[str]:
177194
"""
178195
Fetch the image tags for each given container image.
@@ -181,21 +198,17 @@ async def _fetch_tags(
181198
limits.
182199
183200
"""
184-
requests = [
185-
client.build_request(method="GET", url=f"repository/{repository}/{name}")
186-
for name in names
187-
]
188201
images = []
189202
with cls._progress_bar() as pbar:
190-
task = pbar.add_task(description="Image Tags", total=len(requests))
203+
task = pbar.add_task(description="Image Tags", total=len(names))
191204
async with aiometer.amap(
192-
partial(cls._fetch_single_repository, client),
193-
requests,
205+
partial(cls._fetch_single_repository_tags, client, repository),
206+
names,
194207
max_at_once=max_concurrency,
195208
max_per_second=max_per_second,
196209
) as results:
197-
async for repo in results: # type: SingleRepositoryResponse
198-
images.extend((f"{repo.name}:{tag}" for tag in repo.tags))
210+
async for repo_images in results:
211+
images.extend(repo_images)
199212
pbar.update(task, advance=1)
200213
return images
201214

@@ -220,13 +233,30 @@ def _progress_bar(cls) -> rprog.Progress:
220233
retry=tenacity.retry_if_exception_type(httpx.HTTPError),
221234
before=tenacity.before_log(logger, logging.DEBUG),
222235
)
223-
async def _fetch_single_repository(
224-
client: httpx.AsyncClient, request: httpx.Request
225-
) -> SingleRepositoryResponse:
226-
"""Fetch a single repository resource and parse the response."""
227-
response = await client.send(request=request)
228-
response.raise_for_status()
229-
return SingleRepositoryResponse.parse_obj(response.json())
236+
async def _fetch_single_repository_tags(
237+
client: httpx.AsyncClient, repository: str, name: str
238+
) -> List[str]:
239+
"""Fetch all tags for a single repository from the registry tag endpoint."""
240+
images = []
241+
next_url = f"{repository}/{name}/tags/list"
242+
params: Optional[Dict[str, int]] = {"n": QUAY_REGISTRY_TAG_PAGE_SIZE}
243+
while next_url:
244+
response = await client.get(next_url, params=params)
245+
response.raise_for_status()
246+
payload = TagListResponse.parse_obj(response.json())
247+
images.extend((f"{name}:{tag}" for tag in payload.tags))
248+
next_link = response.links.get("next")
249+
next_url = QuayImageFetcher._normalize_next_link(client, next_link["url"]) if next_link else ""
250+
params = None
251+
return images
252+
253+
@staticmethod
254+
def _normalize_next_link(client: httpx.AsyncClient, next_url: str) -> str:
255+
"""Normalize relative Quay pagination links against the registry client base path."""
256+
base_path = client.base_url.path
257+
if next_url.startswith(base_path):
258+
return next_url[len(base_path):]
259+
return next_url.lstrip("/")
230260

231261

232262
class SingularityImageFetcher:
@@ -309,8 +339,14 @@ def get_new_images(
309339

310340
def parse_denylist(filename: Path) -> List[str]:
311341
"""Parse the list of images to skip."""
342+
denylist = list()
312343
with filename.open() as handle:
313-
return [entry for line in handle.readlines() if (entry := line.strip()) and not line.startswith('#')]
344+
for line in handle.readlines():
345+
entry = line.strip()
346+
if entry and not line.startswith('#'):
347+
denylist.append(entry)
348+
logger.info(f"{len(denylist):,} entries found on the skip-list")
349+
return denylist
314350

315351

316352
def generate_build_script(filename: Path, images: List[str]) -> None:

0 commit comments

Comments
 (0)