@@ -113,14 +113,11 @@ class RecordLinks(TypedDict):
113113 reserve_doi : str
114114
115115
116- # AWS S3 multipart limits (used by Invenio RDM)
117- MIN_UPLOAD_PART_SIZE = 50 * 1024 * 1024 # 50 MiB
116+ # AWS S3 multipart default limits (used by Invenio RDM)
117+ MIN_UPLOAD_PART_SIZE = 5 * 1024 * 1024 # 5 MiB
118118MAX_UPLOAD_PART_SIZE = 5 * 1024 ** 3 # 5 GiB
119119MAX_UPLOAD_PARTS = 10_000
120120
121- # Default threshold for using multipart upload (100 MiB)
122- DEFAULT_MULTIPART_THRESHOLD = 100 * 1024 * 1024
123-
124121
125122def calculate_multipart_params (file_size : int , preferred_part_size : int | None = None ) -> tuple [int , int ]:
126123 """Calculate optimal parts count and part size for multipart upload.
@@ -391,16 +388,12 @@ def upload_file_to_draft_container(
391388 context : FilesSourceRuntimeContext [RDMFileSourceConfiguration ],
392389 ):
393390 file_size = os .path .getsize (file_path )
394- threshold = context .config .multipart_threshold
395-
396- # Use default threshold if not configured
397- if threshold is None or threshold <= 0 :
398- threshold = DEFAULT_MULTIPART_THRESHOLD
399-
400- use_multipart = file_size >= threshold
401391
392+ threshold_mb = context .config .multipart_threshold
393+ # Convert threshold from MB to bytes (config value is always in MB)
394+ threshold_bytes = threshold_mb * 1024 * 1024 if threshold_mb else None
395+ use_multipart = file_size >= threshold_bytes if threshold_bytes else False
402396 if use_multipart :
403- log .info (f"Using multipart upload for file '{ filename } ' ({ file_size } bytes >= threshold { threshold } )" )
404397 self ._upload_file_multipart (record_id , filename , file_path , file_size , context )
405398 else :
406399 self ._upload_file_single (record_id , filename , file_path , context , file_size )
@@ -434,11 +427,10 @@ def _upload_file_single(
434427 response = requests .put (upload_file_content_url , data = file , headers = headers )
435428 # Handle 413 (Payload Too Large) - suggest using multipart upload
436429 if response .status_code == 413 :
437- threshold_mb = DEFAULT_MULTIPART_THRESHOLD / (1024 * 1024 )
438430 raise Exception (
439431 f"Failed to upload file '{ filename } ' ({ file_size } bytes): HTTP 413 Payload Too Large. "
440432 f"The server rejected the upload because the file is too large for a single request. "
441- f"Please configure 'multipart_threshold' to { threshold_mb } MB or lower to enable multipart upload for files of this size."
433+ f"Please configure 'multipart_threshold' in the file source configuration to enable multipart upload for files of this size."
442434 )
443435 self ._ensure_response_has_expected_status_code (response , 200 )
444436
@@ -463,7 +455,9 @@ def _upload_file_multipart(
463455 4. Upload parts (parallel for > 2 parts)
464456 5. POST to commit URL
465457 """
466- preferred_part_size = context .config .multipart_chunk_size
458+ preferred_part_size_mb = context .config .multipart_chunk_size
459+ # Convert chunk size from MB to bytes (config value is always in MB)
460+ preferred_part_size = preferred_part_size_mb * 1024 * 1024 if preferred_part_size_mb else None
467461 num_parts , part_size = calculate_multipart_params (file_size , preferred_part_size )
468462
469463 log .info (f"Multipart upload: { num_parts } parts of { part_size } bytes each for '{ filename } '" )
@@ -472,7 +466,6 @@ def _upload_file_multipart(
472466 upload_file_url = record ["links" ]["files" ]
473467 headers = self ._get_request_headers (context , auth_required = True )
474468
475- # Initialize multipart upload with transfer metadata
476469 file_metadata = {
477470 "key" : filename ,
478471 "size" : file_size ,
@@ -497,13 +490,8 @@ def _upload_file_multipart(
497490 )
498491
499492 # Sort part links by part number to ensure correct ordering
500- # Invenio uses 'part' key, not 'part_number'
501493 part_links = sorted (part_links , key = lambda p : p .get ("part" , 0 ))
502-
503- # Upload parts
504494 self ._upload_parts (file_path , file_size , part_size , part_links , headers )
505-
506- # Commit multipart upload
507495 response = requests .post (commit_url , json = {}, headers = headers )
508496 self ._ensure_response_has_expected_status_code (response , 200 )
509497 log .info (f"Multipart upload completed for '{ filename } '" )
0 commit comments