feat: Add arm64 support, default chunk size to 512 MB, extract filename from URL fallback

roblOnTour · roblOnTour · commit 25fd15eaa4b5 · 2026-02-16T19:12:51.000+01:00
diff --git a/lambda-s3-download/README.md b/lambda-s3-download/README.md
@@ -45,7 +45,7 @@ The Lambda function:
 
 1. Receives a download URL and filename via the event payload
 2. Initiates an S3 multipart upload with SHA256 checksums
-3. Streams the file from the URL in chunks (default 128 MB), writing each chunk to `/tmp` and uploading it as a multipart part
+3. Streams the file from the URL in chunks (default 512 MB), writing each chunk to `/tmp` and uploading it as a multipart part
 4. Cleans up each chunk from `/tmp` after uploading to stay within the 10 GB ephemeral storage limit
 5. Completes the multipart upload and returns the S3 object checksum
 6. If any step fails, aborts the multipart upload to avoid orphaned parts
@@ -73,12 +73,13 @@ Optional event parameters:
 |---|---|---|
 | `target_bucket` | S3 bucket name (overrides the deployed parameter) | Value from template parameter |
 | `target_bucket_region` | S3 bucket region | Lambda's region |
-| `chunk_size_mb` | Size of each download chunk in MB (clamped between 5 and 5120) | 128 |
+| `chunk_size_mb` | Size of each download chunk in MB (clamped between 5 and 5120) | 512 |
 
 ## Known Limitations
 
 - The Lambda function has a 15-minute maximum timeout. If the download and upload combined take longer than that, the function will be killed mid-stream and the multipart upload will be left incomplete. Consider setting an [S3 lifecycle rule](https://docs.aws.amazon.com/AmazonS3/latest/userguide/mpu-abort-incomplete-mpu-lifecycle-config.html) on the target bucket to auto-clean incomplete multipart uploads.
 - The `download_filename` should be a flat filename (e.g. `file.zip`). If it contains slashes (e.g. `path/to/file.zip`), the temporary file path in `/tmp` will include subdirectories that may not exist, causing a write failure.
+- The maximum downloadable file size is limited by the 15-minute Lambda timeout, not by S3 (which supports up to 5 TB via multipart upload with 10,000 parts). In practice, Lambda can usually download roughly 55-110 GB in 15 minutes depending on network speed between Lambda and the source URL, so your mileage may vary. At the default chunk size of 512 MB, the 10,000 parts limit allows up to ~5 TB.
 
 ## Cleanup
 
diff --git a/lambda-s3-download/example-pattern.json b/lambda-s3-download/example-pattern.json
@@ -8,7 +8,7 @@
     "headline": "How it works",
     "text": [
       "This pattern deploys a Lambda function that streams a file from a URL and uploads it to an S3 bucket using multipart upload.",
-      "The file is downloaded in configurable chunks (default 128 MB, clamped between 5 MB and 5 GB) and written to /tmp before being uploaded as individual parts. Each chunk is cleaned up from /tmp after upload, allowing the function to handle files larger than Lambda's memory or ephemeral storage limits.",
+      "The file is downloaded in configurable chunks (default 512 MB, clamped between 5 MB and 5 GB) and written to /tmp before being uploaded as individual parts. Each chunk is cleaned up from /tmp after upload, allowing the function to handle files larger than Lambda's memory or ephemeral storage limits.",
       "SHA256 checksums are calculated for each part and verified on completion. If any step fails, the multipart upload is automatically aborted to avoid orphaned parts."
     ]
   },
diff --git a/lambda-s3-download/src/app.py b/lambda-s3-download/src/app.py
@@ -3,6 +3,7 @@
 import json
 import os
 from pathlib import Path
+from urllib.parse import urlparse
 
 
 def lambda_handler(event, context):
@@ -11,11 +12,11 @@ def lambda_handler(event, context):
     target_bucket_region = event.get("target_bucket_region", os.environ.get("AWS_REGION"))
     
     download_url = event["download_url"]
-    download_filename = event["download_filename"]
+    download_filename = event.get("download_filename", urlparse(download_url).path.split("/")[-1])
 
     # Cap chunk size under 5 GB to be inside S3 max part size and not exhaust max Lambda memory
     # Floor chunk size at 5 MB to fit the S3 minimum part size
-    chunk_size_mb = min(max(int(event.get("chunk_size_mb", 128)), 5), 5120)
+    chunk_size_mb = min(max(int(event.get("chunk_size_mb", 512)), 5), 5120)
 
     # open a multipart s3 upload request.
     s3 = boto3.client("s3", region_name = target_bucket_region)
diff --git a/lambda-s3-download/src/requirements.txt b/lambda-s3-download/src/requirements.txt
@@ -1,6 +1 @@
-boto3
-json
-os
-Path
 requests
-
diff --git a/lambda-s3-download/template.yaml b/lambda-s3-download/template.yaml
@@ -14,6 +14,8 @@ Resources:
       Handler: app.lambda_handler
       Runtime: python3.12
       CodeUri: src/
+      Architectures:
+        - arm64
       Timeout: 900
       MemorySize: 1024
       EphemeralStorage:

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`"headline": "How it works",`
`9`	`9`	`"text": [`
`10`	`10`	`"This pattern deploys a Lambda function that streams a file from a URL and uploads it to an S3 bucket using multipart upload.",`
`11`		`- "The file is downloaded in configurable chunks (default 128 MB, clamped between 5 MB and 5 GB) and written to /tmp before being uploaded as individual parts. Each chunk is cleaned up from /tmp after upload, allowing the function to handle files larger than Lambda's memory or ephemeral storage limits.",`
	`11`	`+ "The file is downloaded in configurable chunks (default 512 MB, clamped between 5 MB and 5 GB) and written to /tmp before being uploaded as individual parts. Each chunk is cleaned up from /tmp after upload, allowing the function to handle files larger than Lambda's memory or ephemeral storage limits.",`
`12`	`12`	`"SHA256 checksums are calculated for each part and verified on completion. If any step fails, the multipart upload is automatically aborted to avoid orphaned parts."`
`13`	`13`	`]`
`14`	`14`	`},`
-Original file line number
+Diff line change
@@ @@ -1,6 +1 @@ @@
 -boto3
 -json
 -os
 -Path
 requests
+-