Skip to content

Commit 1be3ddf

Browse files
authored
Merge pull request #565 from laurelmay/fix-caching-urls
fix: hashlint caches wrong URL when redirected
2 parents 7977de3 + 6ef5f3e commit 1be3ddf

1 file changed

Lines changed: 52 additions & 17 deletions

File tree

scripts/hashlint.py

Lines changed: 52 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,7 @@
1212
from collections import namedtuple
1313
from pathlib import Path
1414

15-
# Importing these types using in the string type hints is helpful for some
16-
# editors to actually support hinting for these types.
17-
# pylint: disable=unused-import
18-
from typing import Any, Dict, List, Set, Tuple, Type
15+
from typing import Any, Dict, List, Set, Tuple, Self
1916

2017
import aiohttp
2118
import jinja2
@@ -38,7 +35,12 @@ class CacheItem:
3835
An item stored in the URL/ETag cache.
3936
"""
4037

41-
def __init__(self, url, etag, last_modified, file_hash):
38+
url: str
39+
etag: str
40+
last_modified: str
41+
hash: str
42+
43+
def __init__(self, url: str, etag: str, last_modified: str, file_hash: str):
4244
self.url = url
4345
self.etag = etag
4446
self.last_modified = last_modified
@@ -63,7 +65,7 @@ def to_json(self) -> Dict[str, Dict[str, str]]:
6365
}
6466

6567
@classmethod
66-
def from_json(cls, data) -> "List[Type[CacheItem]]":
68+
def from_json(cls, data) -> List[Self]:
6769
"""
6870
Load an item from the cache.
6971
"""
@@ -76,13 +78,13 @@ def from_json(cls, data) -> "List[Type[CacheItem]]":
7678

7779
@classmethod
7880
async def from_http_response(
79-
cls, response: aiohttp.ClientResponse
80-
) -> "Type[CacheItem]":
81+
cls, response: aiohttp.ClientResponse, request_url: str
82+
) -> Self:
8183
"""
8284
Parse a cache item from an HTTP response
8385
"""
8486
headers = response.headers
85-
url = response.url
87+
url = request_url
8688
data = await response.read()
8789
file_hash = hashlib.sha1(data).hexdigest()
8890

@@ -94,6 +96,8 @@ class Cache:
9496
A cache of all downloaded items.
9597
"""
9698

99+
_items: List[CacheItem]
100+
97101
def __init__(self):
98102
self._items = []
99103

@@ -106,6 +110,11 @@ def __setitem__(self, url, item):
106110
self._items.remove(match)
107111
self._items.append(item)
108112

113+
def __delitem__(self, url):
114+
if matches := [cached for cached in self._items if cached.url == url]:
115+
for match in matches:
116+
self._items.remove(match)
117+
109118
def __bool__(self):
110119
return bool(self._items)
111120

@@ -119,7 +128,7 @@ def __repr__(self):
119128
return f"<Cache items={self._items!r}>"
120129

121130
@classmethod
122-
def from_json(cls, data: Dict[str, Dict[str, str]]) -> 'Type[Cache]':
131+
def from_json(cls, data: Dict[str, Dict[str, str]]) -> Self:
123132
"""
124133
Load a cache from a dictionary.
125134
"""
@@ -160,6 +169,7 @@ async def check_software_hash(
160169
"""
161170

162171
headers = {}
172+
cache_item = None
163173
if check_data.url in cache:
164174
cache_item = cache[check_data.url]
165175
if cache_item.etag:
@@ -169,14 +179,27 @@ async def check_software_hash(
169179

170180
try:
171181
async with session.get(
172-
check_data.url, headers=headers, timeout=600
182+
check_data.url,
183+
headers=headers,
184+
timeout=aiohttp.ClientTimeout(total=600),
173185
) as response:
174186
if response.status == 200:
175-
cache_item = await CacheItem.from_http_response(response)
187+
cache_item = await CacheItem.from_http_response(
188+
response, check_data.url
189+
)
190+
cache[check_data.url] = cache_item
191+
elif response.status == 304:
192+
# The cached data matched (Not Modified) so there's nothing to do
176193
cache[check_data.url] = cache_item
177-
except aiohttp.ClientError:
194+
else:
195+
print(
196+
f"{check_data.source_file}: Fetch failed {check_data.url} ({response.status})",
197+
file=sys.stderr,
198+
)
199+
return False
200+
except aiohttp.ClientError as client_error:
178201
print(
179-
f"{check_data.source_file}: Unable to download {check_data.url}",
202+
f"{check_data.source_file}: Unable to download {check_data.url} ({client_error})",
180203
file=sys.stderr,
181204
)
182205
return False
@@ -205,7 +228,9 @@ def process_variable(source: str, variable: str, value: str) -> str:
205228
return template.render(**{variable: value})
206229

207230

208-
def urls_for_file(file: str, ansible_data: Dict[str, Any], lookup_data: Dict[str, Any]):
231+
def urls_for_file(
232+
file: str, ansible_data: Dict[str, Any], lookup_data: Dict[str, Any]
233+
) -> set[CheckData]:
209234
"""
210235
Return a set of all URLs in the given file key in the URLs mapping.
211236
"""
@@ -240,6 +265,15 @@ def load_cache() -> Cache:
240265
return Cache()
241266

242267

268+
def trim_cache(cache, urls):
269+
"""
270+
Trim the cache to only keep current URLs
271+
"""
272+
for item in cache:
273+
if item not in urls:
274+
del cache[item]
275+
276+
243277
def write_cache(cache: Cache):
244278
"""
245279
Save the cache to disk.
@@ -249,7 +283,7 @@ def write_cache(cache: Cache):
249283
json.dump(cache.to_json(), cache_file, indent=4)
250284

251285

252-
def get_urls() -> Tuple[Set[str], int]:
286+
def get_urls() -> Tuple[Set[CheckData], int]:
253287
"""
254288
Load the list of URLs to validate hashes for as well as the number of errors
255289
encountered parsing the list.
@@ -292,9 +326,10 @@ async def main():
292326
for result in await asyncio.gather(*tasks):
293327
# This relies on the fact that True gets coerced to 1 and that False gets
294328
# coerced to 0 when converted to an integer. The check method returns True
295-
# on success and we need to count failures.
329+
# on success, and we need to count failures.
296330
errors += not result
297331

332+
trim_cache(cache, [check_data.url for check_data in to_check])
298333
write_cache(cache)
299334
print(f"Wrote cache: {cache}")
300335
return errors

0 commit comments

Comments
 (0)