@@ -118,10 +118,8 @@ def __init__(self, observed: str, expected: str):
118118retry_decorator = retry (
119119 exceptions = ( # pyright: ignore[reportArgumentType]
120120 httpx .HTTPError ,
121- TimeoutError ,
122121 OSError ,
123122 WrongChecksum ,
124- WorkflowError ,
125123 ),
126124 tries = 5 ,
127125 delay = 3 ,
@@ -264,19 +262,23 @@ def _get_rate_limit_wait_time(self, headers: httpx.Headers) -> float | None:
264262 return wait_seconds
265263
266264 @asynccontextmanager
267- async def httpr (self , method : str , url : str ):
265+ async def httpr (self , method : str , url : str , headers : dict [ str , str ] | None = None ):
268266 """
269267 HTTP request wrapper with rate limiting and exception logging.
270268
271269 Args:
272270 method: HTTP method (e.g., "get", "post")
273271 url: URL to request
272+ headers: Optional additional HTTP headers
274273
275274 Yields:
276275 httpx.Response object
277276 """
278277 try :
279- async with self .client () as client , client .stream (method , url ) as response :
278+ async with (
279+ self .client () as client ,
280+ client .stream (method , url , headers = headers ) as response ,
281+ ):
280282 wait_time = self ._get_rate_limit_wait_time (response .headers )
281283 if wait_time is not None :
282284 logger .info (
@@ -340,10 +342,7 @@ async def get_http_metadata(self, parsed: ParseResult) -> FileMetadata | None:
340342 if response .status_code == 405 :
341343 # HEAD not supported; assume file exists with unknown size/mtime
342344 return FileMetadata (checksum = None , size = 0 , mtime = 0.0 )
343- if response .status_code != 200 :
344- raise WorkflowError (
345- f"Failed to fetch HTTP metadata: HTTP { response .status_code } ({ url } )"
346- )
345+ response .raise_for_status ()
347346
348347 size = int (response .headers .get ("content-length" , 0 ))
349348
@@ -391,10 +390,7 @@ async def get_zenodo_metadata(self, url: ParseResult) -> FileMetadata | None:
391390 api_url = f"https://{ netloc } /api/records/{ record_id } "
392391
393392 async with self .httpr ("get" , api_url ) as response :
394- if response .status_code != 200 :
395- raise WorkflowError (
396- f"Failed to fetch Zenodo record metadata: HTTP { response .status_code } ({ api_url } )"
397- )
393+ response .raise_for_status ()
398394
399395 # Read the full response body
400396 content = await response .aread ()
@@ -516,10 +512,7 @@ async def get_gcs_metadata(self, url: ParseResult) -> FileMetadata | None:
516512 async with self .httpr ("get" , api_url ) as response :
517513 if response .status_code == 404 :
518514 return None
519- if response .status_code != 200 :
520- raise WorkflowError (
521- f"Failed to fetch GCS object metadata: HTTP { response .status_code } ({ api_url } )"
522- )
515+ response .raise_for_status ()
523516
524517 content = await response .aread ()
525518 data = json .loads (content )
@@ -728,20 +721,34 @@ async def managed_retrieve(self):
728721 return
729722
730723 try :
724+ # Check for existing partial file to resume
725+ offset = local_path .stat ().st_size if local_path .exists () else 0
726+ headers = {"Range" : f"bytes={ offset } -" } if offset > 0 else None
727+
731728 # Download using a get request, rate limit errors are detected and raise
732729 # WorkflowError to trigger a retry
733- async with self .provider .httpr ("get" , query ) as response :
734- if response .status_code != 200 :
735- raise WorkflowError (
736- f"Failed to download: HTTP { response .status_code } ({ query } )"
730+ async with self .provider .httpr ("get" , query , headers = headers ) as response :
731+ if response .status_code == 206 :
732+ # Server supports resume - append to existing partial file
733+ mode = "ab"
734+ logger .info (f"Resuming { filename } from byte { offset } " )
735+ elif response .status_code == 200 :
736+ # Server doesn't support Range - discard partial and restart
737+ mode = "wb"
738+ offset = 0
739+ else :
740+ response .raise_for_status ()
741+ raise AssertionError (
742+ f"Unhandled status code: { response .status_code } "
737743 )
738744
739- total_size = int (response .headers .get ("content-length" , 0 ))
745+ total_size = int (response .headers .get ("content-length" , 0 )) + offset
740746
741747 # Download to local path with progress bar
742- with local_path .open (mode = "wb" ) as f :
748+ with local_path .open (mode = mode ) as f :
743749 with tqdm (
744750 total = total_size ,
751+ initial = offset ,
745752 unit = "B" ,
746753 unit_scale = True ,
747754 desc = filename ,
@@ -758,7 +765,11 @@ async def managed_retrieve(self):
758765 if self .provider .cache :
759766 self .provider .cache .put (query , local_path )
760767
761- except :
768+ except httpx .TransportError :
769+ # Mid-transfer interruption - keep partial file for resume on next retry
770+ raise
771+ except : # noqa: E722
772+ # Any other error (wrong checksum, HTTP error, KeyboardInterrupt) - delete and maybe restart
762773 if local_path .exists ():
763774 local_path .unlink ()
764775 raise
0 commit comments