Skip to content

Commit 1ecb8aa

Browse files
yarikopticclaude
andcommitted
ENH: Reject zarr chunks >5 GiB and improve upload failure logging
- Add S3_MAX_SINGLE_PART_UPLOAD (5 GiB) constant and check file size in UploadItem.from_entry() before attempting upload. S3 rejects single-part PUTs larger than 5 GiB, and since the server mints presigned URLs without knowing the file size, the client must guard against this. Raises ValueError with a clear message about the multipart upload limitation. - Log file size alongside filepath in all upload failure paths: _upload_zarr_file now logs at WARNING level for both HTTPError (non-403) and generic Exception cases, making it immediately clear from logs which file failed and how large it was. - Include file size in _handle_failed_items_and_raise per-item error log lines. Motivated by #1821 where 2.6 GiB level-0 zarr chunks failed with ConnectionAbortedError but the logs didn't show the file sizes, making it hard to diagnose the size-related nature of the failure. Co-Authored-By: Claude Code 2.1.81 / Claude Opus 4.6 <noreply@anthropic.com>
1 parent a682735 commit 1ecb8aa

2 files changed

Lines changed: 31 additions & 5 deletions

File tree

dandi/consts.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,11 @@ def urls(self) -> Iterator[str]:
193193
#: MIME type assigned to & used to identify Zarr assets
194194
ZARR_MIME_TYPE = "application/x-zarr"
195195

196+
#: Maximum file size for a single S3 PUT upload (5 GiB).
197+
#: S3 rejects single-part PUTs larger than this; such files would need
198+
#: multipart upload which is not yet supported for zarr chunks.
199+
S3_MAX_SINGLE_PART_UPLOAD = 5 * 1024**3
200+
196201
#: Maximum number of Zarr directory entries to upload at once
197202
ZARR_UPLOAD_BATCH_SIZE = 255
198203

dandi/files/zarr.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from dandi import get_logger
2828
from dandi.consts import (
2929
MAX_ZARR_DEPTH,
30+
S3_MAX_SINGLE_PART_UPLOAD,
3031
ZARR_DELETE_BATCH_SIZE,
3132
ZARR_MIME_TYPE,
3233
ZARR_UPLOAD_BATCH_SIZE,
@@ -910,7 +911,7 @@ def _handle_failed_items_and_raise(
910911

911912
# Log all failures
912913
for item, error in failed_items:
913-
lgr.error("Failed to upload %s: %s", item.filepath, error)
914+
lgr.error("Failed to upload %s (%d bytes): %s", item.filepath, item.size, error)
914915

915916
# Summary diagnostics
916917
exc_counts = Counter(type(error).__name__ for _, error in failed_items)
@@ -976,17 +977,29 @@ def _upload_zarr_file(
976977
# Check if this is a 403 error that we should retry with a new URL
977978
if e.response is not None and e.response.status_code == 403:
978979
lgr.debug(
979-
"Got 403 error uploading %s, will retry with new URL: %s",
980+
"Got 403 error uploading %s (%d bytes), will retry with new URL: %s",
980981
item.filepath,
982+
item.size,
981983
str(e),
982984
)
983985
return UploadResult(item=item, status=UploadStatus.RETRY_NEEDED, error=e)
984986
else:
985-
# Other HTTP error - don't retry
987+
lgr.warning(
988+
"HTTP error uploading %s (%d bytes): %s",
989+
item.filepath,
990+
item.size,
991+
e,
992+
)
986993
return UploadResult(item=item, status=UploadStatus.FAILED, error=e)
987994
except Exception as e:
988995
post_upload_size_check(item.filepath, item.size, True)
989-
# Non-HTTP error - don't retry
996+
lgr.warning(
997+
"Error uploading %s (%d bytes): %s: %s",
998+
item.filepath,
999+
item.size,
1000+
type(e).__name__,
1001+
e,
1002+
)
9901003
return UploadResult(item=item, status=UploadStatus.FAILED, error=e)
9911004
else:
9921005
post_upload_size_check(item.filepath, item.size, False)
@@ -1081,11 +1094,19 @@ def from_entry(cls, e: LocalZarrEntry, digest: str) -> UploadItem:
10811094
content_type = "application/json"
10821095
else:
10831096
content_type = None
1097+
size = pre_upload_size_check(e.filepath)
1098+
if size > S3_MAX_SINGLE_PART_UPLOAD:
1099+
raise ValueError(
1100+
f"Zarr chunk {e.filepath} is {size / 1024**3:.2f} GiB,"
1101+
f" exceeding the S3 single-part upload limit of"
1102+
f" {S3_MAX_SINGLE_PART_UPLOAD / 1024**3:.0f} GiB."
1103+
f" Multipart upload for zarr chunks is not yet supported."
1104+
)
10841105
return cls(
10851106
entry_path=str(e),
10861107
filepath=e.filepath,
10871108
digest=digest,
1088-
size=pre_upload_size_check(e.filepath),
1109+
size=size,
10891110
content_type=content_type,
10901111
)
10911112

0 commit comments

Comments
 (0)