Skip to content

Commit 1675a43

Browse files
committed
resolved aws cli request format
1 parent 8160590 commit 1675a43

5 files changed

Lines changed: 282 additions & 3 deletions

File tree

crates/fula-cli/src/handlers/multipart.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use crate::pinning::{check_can_upload, pin_for_user};
55
use crate::state::UserSession;
66
use crate::multipart::UploadPart;
77
use crate::xml;
8+
use super::object::try_decode_chunked;
89
use axum::{
910
extract::{Extension, Path, Query, State},
1011
http::{HeaderMap, StatusCode},
@@ -85,6 +86,7 @@ pub async fn upload_part(
8586
Extension(session): Extension<UserSession>,
8687
Path((bucket, key)): Path<(String, String)>,
8788
Query(params): Query<MultipartParams>,
89+
headers: HeaderMap,
8890
body: Bytes,
8991
) -> Result<Response, ApiError> {
9092
if !session.can_write() {
@@ -93,7 +95,7 @@ pub async fn upload_part(
9395

9496
let upload_id = params.upload_id
9597
.ok_or_else(|| ApiError::s3(S3ErrorCode::InvalidArgument, "Missing uploadId"))?;
96-
98+
9799
let part_number = params.part_number
98100
.ok_or_else(|| ApiError::s3(S3ErrorCode::InvalidArgument, "Missing partNumber"))?;
99101

@@ -113,6 +115,12 @@ pub async fn upload_part(
113115
return Err(ApiError::s3(S3ErrorCode::InvalidArgument, "Bucket/key mismatch"));
114116
}
115117

118+
// Decode chunked encoding if present (same as put_object)
119+
let body = match try_decode_chunked(&headers, &body) {
120+
Some(decoded) => decoded,
121+
None => body,
122+
};
123+
116124
// Store part data
117125
let cid = state.block_store.put_block(&body).await?;
118126

crates/fula-cli/src/handlers/object.rs

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,14 @@ pub async fn put_object(
4343
));
4444
}
4545

46+
// Decode HTTP chunked transfer encoding if present in the body.
47+
// AWS CLI and other S3 clients may send chunked-encoded bodies that a
48+
// reverse proxy (e.g., nginx) forwards without decoding.
49+
let body = match try_decode_chunked(&headers, &body) {
50+
Some(decoded) => decoded,
51+
None => body,
52+
};
53+
4654
// Store the data
4755
let cid = state.block_store.put_block(&body).await?;
4856

@@ -265,6 +273,115 @@ pub async fn get_object(
265273
Ok(response.body(Body::from(body_data)).unwrap())
266274
}
267275

276+
/// Attempt to decode HTTP chunked transfer encoding from a request body.
277+
/// Returns Some(decoded) if the body was chunked-encoded, None otherwise.
278+
///
279+
/// This handles two cases:
280+
/// 1. AWS `Content-Encoding: aws-chunked` with streaming SigV4 signatures
281+
/// 2. Plain HTTP chunked TE where a reverse proxy stripped the Transfer-Encoding header
282+
pub(crate) fn try_decode_chunked(headers: &HeaderMap, body: &Bytes) -> Option<Bytes> {
283+
let has_decoded_len = headers.get("x-amz-decoded-content-length").is_some();
284+
let has_aws_chunked = headers
285+
.get(header::CONTENT_ENCODING)
286+
.and_then(|v| v.to_str().ok())
287+
.map(|v| v.contains("aws-chunked"))
288+
.unwrap_or(false);
289+
290+
if !has_decoded_len && !has_aws_chunked && !looks_like_chunked(body) {
291+
return None;
292+
}
293+
294+
decode_chunked_body(body).map(|decoded| {
295+
tracing::info!(
296+
original_len = body.len(),
297+
decoded_len = decoded.len(),
298+
has_decoded_len,
299+
has_aws_chunked,
300+
"Decoded chunked request body"
301+
);
302+
decoded
303+
})
304+
}
305+
306+
/// Check if a body appears to be HTTP chunked transfer-encoded.
307+
fn looks_like_chunked(body: &[u8]) -> bool {
308+
if body.len() < 4 {
309+
return false;
310+
}
311+
312+
// Find first \r\n (chunk-size line delimiter)
313+
let crlf_pos = match body.windows(2).position(|w| w == b"\r\n") {
314+
Some(pos) if pos > 0 && pos <= 100 => pos,
315+
_ => return false,
316+
};
317+
318+
// Extract hex size (before any chunk extensions like ";chunk-signature=...")
319+
let size_line = match std::str::from_utf8(&body[..crlf_pos]) {
320+
Ok(s) => s,
321+
Err(_) => return false,
322+
};
323+
let size_hex = size_line.split(';').next().unwrap_or("");
324+
325+
let chunk_size = match usize::from_str_radix(size_hex.trim(), 16) {
326+
Ok(s) if s > 0 => s,
327+
_ => return false,
328+
};
329+
330+
// Chunk data must fit within the remaining body
331+
let data_start = crlf_pos + 2;
332+
chunk_size <= body.len().saturating_sub(data_start)
333+
}
334+
335+
/// Decode HTTP chunked transfer encoding from raw bytes.
336+
/// Handles both plain chunked and aws-chunked (ignoring chunk extensions).
337+
fn decode_chunked_body(body: &[u8]) -> Option<Bytes> {
338+
let mut decoded = Vec::new();
339+
let mut pos = 0;
340+
341+
while pos < body.len() {
342+
let remaining = &body[pos..];
343+
344+
// Find the \r\n ending the chunk-size line
345+
let crlf_pos = remaining.windows(2).position(|w| w == b"\r\n")?;
346+
347+
if crlf_pos == 0 {
348+
// Empty line — skip
349+
pos += 2;
350+
continue;
351+
}
352+
353+
// Parse chunk size (ignore extensions after ';')
354+
let size_line = std::str::from_utf8(&remaining[..crlf_pos]).ok()?;
355+
let size_hex = size_line.split(';').next()?;
356+
let chunk_size = usize::from_str_radix(size_hex.trim(), 16).ok()?;
357+
358+
if chunk_size == 0 {
359+
break; // Terminal chunk
360+
}
361+
362+
let data_start = pos + crlf_pos + 2;
363+
let data_end = data_start + chunk_size;
364+
365+
if data_end > body.len() {
366+
return None; // Truncated — probably not chunked after all
367+
}
368+
369+
decoded.extend_from_slice(&body[data_start..data_end]);
370+
371+
// Skip the \r\n after chunk data
372+
pos = data_end;
373+
if pos + 2 <= body.len() && body[pos] == b'\r' && body[pos + 1] == b'\n' {
374+
pos += 2;
375+
}
376+
}
377+
378+
if decoded.is_empty() {
379+
None
380+
} else {
381+
Some(Bytes::from(decoded))
382+
}
383+
}
384+
268385
/// Parse Range header (e.g., "bytes=0-1023" or "bytes=500-" or "bytes=-500")
269386
fn parse_range_header(range: &str, total_size: usize) -> Result<(usize, usize), ()> {
270387
let range = range.strip_prefix("bytes=").ok_or(())?;

crates/fula-cli/src/routes.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ async fn object_put_handler(
170170
part_number: query.part_number,
171171
uploads: query.uploads.clone(),
172172
};
173-
handlers::upload_part(state, session, path, axum::extract::Query(mp_params), body).await
173+
handlers::upload_part(state, session, path, axum::extract::Query(mp_params), headers, body).await
174174
} else if query.tagging.is_some() {
175175
handlers::put_object_tagging(state, session, path, body).await
176176
} else if headers.contains_key("x-amz-copy-source") {

target/.rustc_info.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"rustc_fingerprint":7713099781947737724,"outputs":{"17747080675513052775":{"success":true,"status":"","code":0,"stdout":"rustc 1.94.0-nightly (ba86c0460 2025-12-06)\nbinary: rustc\ncommit-hash: ba86c0460b0233319e01fd789a42a7276eade805\ncommit-date: 2025-12-06\nhost: x86_64-pc-windows-msvc\nrelease: 1.94.0-nightly\nLLVM version: 21.1.5\n","stderr":""},"7971740275564407648":{"success":true,"status":"","code":0,"stdout":"___.exe\nlib___.rlib\n___.dll\n___.dll\n___.lib\n___.dll\nC:\\Users\\ehsan\\.rustup\\toolchains\\nightly-x86_64-pc-windows-msvc\npacked\n___\ndebug_assertions\nemscripten_wasm_eh\nfmt_debug=\"full\"\noverflow_checks\npanic=\"unwind\"\nproc_macro\nrelocation_model=\"pic\"\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"msvc\"\ntarget_family=\"windows\"\ntarget_feature=\"cmpxchg16b\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"lahfsahf\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_feature=\"sse3\"\ntarget_feature=\"x87\"\ntarget_has_atomic\ntarget_has_atomic=\"128\"\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_has_atomic_equal_alignment=\"128\"\ntarget_has_atomic_equal_alignment=\"16\"\ntarget_has_atomic_equal_alignment=\"32\"\ntarget_has_atomic_equal_alignment=\"64\"\ntarget_has_atomic_equal_alignment=\"8\"\ntarget_has_atomic_equal_alignment=\"ptr\"\ntarget_has_atomic_load_store\ntarget_has_atomic_load_store=\"128\"\ntarget_has_atomic_load_store=\"16\"\ntarget_has_atomic_load_store=\"32\"\ntarget_has_atomic_load_store=\"64\"\ntarget_has_atomic_load_store=\"8\"\ntarget_has_atomic_load_store=\"ptr\"\ntarget_has_reliable_f128\ntarget_has_reliable_f16\ntarget_has_reliable_f16_math\ntarget_os=\"windows\"\ntarget_pointer_width=\"64\"\ntarget_thread_local\ntarget_vendor=\"pc\"\nub_checks\nwindows\n","stderr":""}},"successes":{}}
1+
{"rustc_fingerprint":16197101729860176073,"outputs":{"7971740275564407648":{"success":true,"status":"","code":0,"stdout":"___\nlib___.rlib\nlib___.so\nlib___.so\nlib___.a\nlib___.so\n/root/.rustup/toolchains/stable-x86_64-unknown-linux-gnu\noff\npacked\nunpacked\n___\ndebug_assertions\npanic=\"unwind\"\nproc_macro\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"gnu\"\ntarget_family=\"unix\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_os=\"linux\"\ntarget_pointer_width=\"64\"\ntarget_vendor=\"unknown\"\nunix\n","stderr":""},"17747080675513052775":{"success":true,"status":"","code":0,"stdout":"rustc 1.92.0 (ded5c06cf 2025-12-08)\nbinary: rustc\ncommit-hash: ded5c06cf21d2b93bffd5d884aa6e96934ee4234\ncommit-date: 2025-12-08\nhost: x86_64-unknown-linux-gnu\nrelease: 1.92.0\nLLVM version: 21.1.3\n","stderr":""}},"successes":{}}

tests/api_tests.rs

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -898,6 +898,160 @@ async fn test_large_compressible_object_no_compression() {
898898
assert_eq!(body, content);
899899
}
900900

901+
/// Regression test: PUT with HTTP chunked transfer-encoded body must be decoded.
902+
///
903+
/// AWS CLI sends request bodies with chunked transfer encoding that a reverse
904+
/// proxy (nginx) may forward without decoding. Without decoding, the chunked
905+
/// framing (e.g., "100000\r\n") gets stored as part of the object data,
906+
/// corrupting the file.
907+
#[tokio::test]
908+
async fn test_put_object_decodes_chunked_body() {
909+
let (base_url, _) = spawn_server(false).await;
910+
let client = Client::builder()
911+
.no_gzip()
912+
.no_brotli()
913+
.no_deflate()
914+
.build()
915+
.unwrap();
916+
917+
let bucket = "chunked-decode-bucket";
918+
let key = "photo.jpg";
919+
920+
// Simulate a JPEG file (starts with FF D8 FF)
921+
let original_data: Vec<u8> = {
922+
let mut v = vec![0xFF, 0xD8, 0xFF, 0xE0];
923+
v.extend_from_slice(&[0x42; 2044]); // 2048 bytes total
924+
v
925+
};
926+
927+
// Wrap the data in HTTP chunked transfer encoding framing
928+
// This simulates what happens when nginx strips Transfer-Encoding header
929+
// but forwards the raw chunked body
930+
let chunk_size_hex = format!("{:x}", original_data.len());
931+
let mut chunked_body: Vec<u8> = Vec::new();
932+
chunked_body.extend_from_slice(chunk_size_hex.as_bytes()); // "800"
933+
chunked_body.extend_from_slice(b"\r\n");
934+
chunked_body.extend_from_slice(&original_data);
935+
chunked_body.extend_from_slice(b"\r\n");
936+
chunked_body.extend_from_slice(b"0\r\n\r\n"); // Terminal chunk
937+
938+
assert_ne!(chunked_body.len(), original_data.len(), "Chunked body should be larger");
939+
940+
// Create bucket
941+
client.put(&format!("{}/{}", base_url, bucket)).send().await.unwrap();
942+
943+
// Upload the chunked-encoded body (simulating broken proxy behavior)
944+
let res = client.put(&format!("{}/{}/{}", base_url, bucket, key))
945+
.body(chunked_body)
946+
.header("Content-Type", "image/jpeg")
947+
.send()
948+
.await
949+
.unwrap();
950+
assert_eq!(res.status(), StatusCode::OK);
951+
952+
// Download and verify the gateway decoded the chunked framing
953+
let res = client.get(&format!("{}/{}/{}", base_url, bucket, key))
954+
.send()
955+
.await
956+
.unwrap();
957+
assert_eq!(res.status(), StatusCode::OK);
958+
959+
let body = res.bytes().await.unwrap();
960+
assert_eq!(
961+
body.len(), original_data.len(),
962+
"Downloaded size ({}) should match original ({}), not chunked body",
963+
body.len(), original_data.len()
964+
);
965+
assert_eq!(
966+
&body[..4], &[0xFF, 0xD8, 0xFF, 0xE0],
967+
"Body must start with JPEG magic, not chunked framing"
968+
);
969+
assert_eq!(body.as_ref(), original_data.as_slice(), "Decoded body must match original");
970+
}
971+
972+
/// Test that chunked decoding handles aws-chunked format (with chunk-signature extensions)
973+
#[tokio::test]
974+
async fn test_put_object_decodes_aws_chunked_body() {
975+
let (base_url, _) = spawn_server(false).await;
976+
let client = Client::builder()
977+
.no_gzip()
978+
.no_brotli()
979+
.no_deflate()
980+
.build()
981+
.unwrap();
982+
983+
let bucket = "aws-chunked-bucket";
984+
let key = "data.bin";
985+
let original_data = b"Hello, this is test data for aws-chunked decoding!";
986+
987+
// Build aws-chunked encoded body:
988+
// <hex-size>;chunk-signature=<fake-sig>\r\n<data>\r\n0;chunk-signature=<fake-sig>\r\n\r\n
989+
let fake_sig = "a".repeat(64);
990+
let mut chunked_body: Vec<u8> = Vec::new();
991+
chunked_body.extend_from_slice(format!("{:x};chunk-signature={}\r\n", original_data.len(), fake_sig).as_bytes());
992+
chunked_body.extend_from_slice(original_data);
993+
chunked_body.extend_from_slice(b"\r\n");
994+
chunked_body.extend_from_slice(format!("0;chunk-signature={}\r\n\r\n", fake_sig).as_bytes());
995+
996+
// Create bucket
997+
client.put(&format!("{}/{}", base_url, bucket)).send().await.unwrap();
998+
999+
// Upload with x-amz-decoded-content-length header (AWS streaming signature signal)
1000+
let res = client.put(&format!("{}/{}/{}", base_url, bucket, key))
1001+
.body(chunked_body)
1002+
.header("Content-Encoding", "aws-chunked")
1003+
.header("x-amz-decoded-content-length", original_data.len().to_string())
1004+
.send()
1005+
.await
1006+
.unwrap();
1007+
assert_eq!(res.status(), StatusCode::OK);
1008+
1009+
// Download and verify
1010+
let res = client.get(&format!("{}/{}/{}", base_url, bucket, key))
1011+
.send()
1012+
.await
1013+
.unwrap();
1014+
assert_eq!(res.status(), StatusCode::OK);
1015+
1016+
let body = res.bytes().await.unwrap();
1017+
assert_eq!(body.as_ref(), original_data, "Body must be decoded aws-chunked content");
1018+
}
1019+
1020+
/// Test that normal (non-chunked) uploads are NOT affected by chunked decoding
1021+
#[tokio::test]
1022+
async fn test_put_object_normal_body_unaffected() {
1023+
let (base_url, _) = spawn_server(false).await;
1024+
let client = Client::new();
1025+
1026+
let bucket = "normal-body-bucket";
1027+
let key = "binary.dat";
1028+
1029+
// Binary data that could theoretically look like a chunk header if we're not careful
1030+
// (starts with bytes that are valid hex chars in ASCII)
1031+
let original_data: Vec<u8> = vec![0x41, 0x42, 0x43, 0x44, 0x45, 0x46]; // "ABCDEF" in ASCII
1032+
1033+
// Create bucket
1034+
client.put(&format!("{}/{}", base_url, bucket)).send().await.unwrap();
1035+
1036+
// Upload normally
1037+
let res = client.put(&format!("{}/{}/{}", base_url, bucket, key))
1038+
.body(original_data.clone())
1039+
.send()
1040+
.await
1041+
.unwrap();
1042+
assert_eq!(res.status(), StatusCode::OK);
1043+
1044+
// Download - should be identical
1045+
let res = client.get(&format!("{}/{}/{}", base_url, bucket, key))
1046+
.send()
1047+
.await
1048+
.unwrap();
1049+
assert_eq!(res.status(), StatusCode::OK);
1050+
1051+
let body = res.bytes().await.unwrap();
1052+
assert_eq!(body.as_ref(), original_data.as_slice(), "Normal upload must not be modified");
1053+
}
1054+
9011055
/// Test duplicate bucket creation for same user fails
9021056
#[tokio::test]
9031057
async fn test_multitenant_duplicate_bucket_same_user() {

0 commit comments

Comments
 (0)