From 1e4dcd07992932231f6ebe2a744d46c44a04eecb Mon Sep 17 00:00:00 2001 From: Nicolas Dreno Date: Thu, 2 Jul 2026 14:26:44 +0200 Subject: [PATCH] security(data-plane): canonical percent-decode, raw header limits, admin exposure warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the data-plane hardening items (area 3, issue #9). - DP-4: replace the per-byte `byte as char` query decoder (which corrupted multi-byte UTF-8, e.g. %C3%A9) with one canonical UTF-8-correct `percent_decode`, and apply it to captured path-parameter values too — so routing, validation, and dispatch all agree on the decoded value. Path segments are still matched raw, so `%2F` stays a literal slash within its segment rather than acting as a separator. - DP-5: enforce header limits on the raw HeaderMap. `HeaderMap::len()` counts every header line, so duplicate names can no longer undercount the header limit, and each value is size-checked (not just the last for a given name). - DP-8: the unauthenticated admin port (/health, /metrics, /provenance) now logs a startup warning when bound to a non-loopback address, since /provenance and /metrics expose build/operational metadata. Documented. Also updates the validation integration test to expect 413 (payload-too-large) for oversized bodies, matching the RFC-correct status introduced in PR #94. AR rebinding residual (pin the vetted IP via a custom DNS resolver to close the resolve-then-connect TOCTOU) is deferred: it is a cross-cutting refactor of the WASM HTTP client's DNS path. Left tracked in #9. --- CHANGELOG.md | 1 + crates/barbacane-test/tests/validation.rs | 6 +- crates/barbacane/src/admin.rs | 12 +++ crates/barbacane/src/main.rs | 32 +++++- crates/barbacane/src/validator.rs | 119 ++++++++++++++++++---- docs/reference/configuration.md | 10 ++ 6 files changed, 155 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b2d98c5..55f9905 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **plugin**: `jwt-auth` and `oidc-auth` now declare the `log` capability they use, so they load under WASM capability enforcement (they emit a one-time warning when no `audience` is configured). - **ci**: the adversarial security suite (`crates/barbacane-test/tests/security/`) now runs in CI. - **security (control plane)**: request-body size limits — 1 MiB default for JSON endpoints, 32 MiB for spec/plugin uploads — bound in-memory buffering; database errors are routed through the generic error mapper (no schema disclosure); upload filenames are sanitized to a safe basename (defeating path traversal into the compile temp dir and CRLF/quote injection in `Content-Disposition`); and concurrent WebSocket sessions are capped so unauthenticated sockets can't pile up during the registration window. +- **security (data plane)**: a single canonical, UTF-8-correct percent-decoder is applied to query and path parameters before validation and dispatch (the previous per-byte decode corrupted multi-byte sequences); request header limits are enforced on the raw header map so duplicate header names can't undercount the header limit or skip per-value size checks; and the unauthenticated admin port (`/health`, `/metrics`, `/provenance`) logs a startup warning when bound to a non-loopback address. - **deps**: bump `anyhow` to 1.0.103 (RUSTSEC-2026-0190). ## [0.7.0] - 2026-05-05 diff --git a/crates/barbacane-test/tests/validation.rs b/crates/barbacane-test/tests/validation.rs index 744686a..80ba417 100644 --- a/crates/barbacane-test/tests/validation.rs +++ b/crates/barbacane-test/tests/validation.rs @@ -236,14 +236,14 @@ async fn test_limits_body_size_exceeds_limit() { ); // Server may either: - // 1. Return 400 before the client finishes sending + // 1. Return 413 Payload Too Large before the client finishes sending // 2. Close the connection early (connection reset, broken pipe, etc.) // Both are valid behaviors for rejecting oversized bodies match gateway.post("/users", &large_body).await { Ok(resp) => { - assert_eq!(resp.status(), 400); + assert_eq!(resp.status(), 413); let body: serde_json::Value = resp.json().await.unwrap(); - assert_eq!(body["type"], "urn:barbacane:error:validation-failed"); + assert_eq!(body["type"], "urn:barbacane:error:payload-too-large"); } Err(_) => { // Any connection error is acceptable when the server rejects a large body diff --git a/crates/barbacane/src/admin.rs b/crates/barbacane/src/admin.rs index 3b89128..85e49ba 100644 --- a/crates/barbacane/src/admin.rs +++ b/crates/barbacane/src/admin.rs @@ -42,6 +42,18 @@ pub async fn start_admin_server( .await .map_err(|e| format!("admin: failed to bind to {}: {}", addr, e))?; + // The admin endpoints (/health, /metrics, /provenance) are unauthenticated + // by design (metrics scraping). /provenance and /metrics expose build and + // operational metadata, so warn when the port is reachable off-host. + if !addr.ip().is_loopback() { + tracing::warn!( + %addr, + "admin API (incl. /provenance, /metrics) is bound to a non-loopback \ + address and is UNAUTHENTICATED; restrict it to loopback or place it \ + behind a trusted network boundary" + ); + } + loop { tokio::select! { _ = shutdown_rx.changed() => { diff --git a/crates/barbacane/src/main.rs b/crates/barbacane/src/main.rs index 697e0c1..91831d9 100644 --- a/crates/barbacane/src/main.rs +++ b/crates/barbacane/src/main.rs @@ -1099,7 +1099,26 @@ impl Gateway { ))); } - // Extract headers for validation + // Enforce header limits on the RAW header map first: `HeaderMap::len()` + // counts every header line (including repeated names), so a flood of + // duplicate-named headers can't slip under the count limit, and each + // value is size-checked (not just the last one for a given name). + let raw_headers = req.headers(); + let header_limit_error = self + .limits + .validate_header_count(raw_headers.len()) + .err() + .or_else(|| { + raw_headers.iter().find_map(|(name, value)| { + self.limits + .validate_header_size(name.as_str(), name.as_str().len() + value.len()) + .err() + }) + }); + + // Extract headers for downstream validation and plugin forwarding. This + // collapses duplicate names (last value wins), which is fine now that the + // limit checks above ran against the raw map. let headers: HashMap = req .headers() .iter() @@ -1107,7 +1126,7 @@ impl Gateway { .collect(); // Check header limits - if let Err(e) = self.limits.validate_headers(&headers) { + if let Some(e) = header_limit_error { let response = self.validation_error_response(&[e]); self.record_request_metrics( &method_str, @@ -1148,7 +1167,14 @@ impl Gateway { // Route lookup match self.router.lookup(&path, &method_str) { - RouteMatch::Found { entry, params } => { + RouteMatch::Found { entry, mut params } => { + // Canonically percent-decode captured path-parameter values once, + // so routing, validation, and the dispatcher all see the same + // decoded value (segments were matched raw; `%2F` stays a literal + // slash within its segment rather than acting as a separator). + for (_, value) in params.iter_mut() { + *value = barbacane_lib::validator::percent_decode(value); + } let operation = &self.operations[entry.operation_index]; let validator = &self.validators[entry.operation_index]; let route_path = operation.path.clone(); diff --git a/crates/barbacane/src/validator.rs b/crates/barbacane/src/validator.rs index 2f860e3..ccf31d3 100644 --- a/crates/barbacane/src/validator.rs +++ b/crates/barbacane/src/validator.rs @@ -243,6 +243,32 @@ impl RequestLimits { Ok(()) } + /// Validate the raw header count. Unlike [`validate_headers`], which operates + /// on a name→value map that collapses duplicate names, this counts the total + /// number of header lines so repeated names cannot undercount the limit. + pub fn validate_header_count(&self, count: usize) -> Result<(), ValidationError2> { + if count > self.max_headers { + return Err(ValidationError2::TooManyHeaders { + count, + limit: self.max_headers, + }); + } + Ok(()) + } + + /// Validate a single header line's size (name bytes + value bytes). Applied + /// per raw header so a duplicate name's value isn't skipped. + pub fn validate_header_size(&self, name: &str, size: usize) -> Result<(), ValidationError2> { + if size > self.max_header_size { + return Err(ValidationError2::HeaderTooLarge { + name: name.to_string(), + size, + limit: self.max_header_size, + }); + } + Ok(()) + } + /// Validate body size. pub fn validate_body_size(&self, body_len: usize) -> Result<(), ValidationError2> { if body_len > self.max_body_size { @@ -437,7 +463,7 @@ impl OperationValidator { let mut parts = pair.splitn(2, '='); let key = parts.next()?; let value = parts.next().unwrap_or(""); - Some((urlencoding_decode(key), urlencoding_decode(value))) + Some((percent_decode(key), percent_decode(value))) }) .collect(); @@ -582,28 +608,49 @@ impl OperationValidator { } } -/// Simple URL decoding (handles %XX escapes). -fn urlencoding_decode(input: &str) -> String { - let mut result = String::with_capacity(input.len()); - let mut chars = input.chars().peekable(); - - while let Some(c) = chars.next() { - if c == '%' { - let hex: String = chars.by_ref().take(2).collect(); - if let Ok(byte) = u8::from_str_radix(&hex, 16) { - result.push(byte as char); - } else { - result.push('%'); - result.push_str(&hex); +/// Canonical percent-decoding of a URL component (handles `%XX` escapes and `+`). +/// +/// Decodes into a byte buffer and interprets the result as UTF-8, so multi-byte +/// sequences like `%C3%A9` decode to `é` rather than being corrupted by a naive +/// per-byte `as char` cast. This is the single decoder used for both query and +/// path parameters so routing and validation agree on the decoded value. +pub fn percent_decode(input: &str) -> String { + let bytes = input.as_bytes(); + let mut out: Vec = Vec::with_capacity(bytes.len()); + let mut i = 0; + while i < bytes.len() { + match bytes[i] { + b'%' if i + 2 < bytes.len() => match (hex_val(bytes[i + 1]), hex_val(bytes[i + 2])) { + (Some(h), Some(l)) => { + out.push((h << 4) | l); + i += 3; + } + _ => { + out.push(b'%'); + i += 1; + } + }, + b'+' => { + out.push(b' '); + i += 1; + } + b => { + out.push(b); + i += 1; } - } else if c == '+' { - result.push(' '); - } else { - result.push(c); } } + String::from_utf8_lossy(&out).into_owned() +} - result +/// Parse a single ASCII hex digit into its 0..=15 value. +fn hex_val(b: u8) -> Option { + match b { + b'0'..=b'9' => Some(b - b'0'), + b'a'..=b'f' => Some(b - b'a' + 10), + b'A'..=b'F' => Some(b - b'A' + 10), + _ => None, + } } #[cfg(test)] @@ -1313,4 +1360,38 @@ mod tests { let result = validator.validate_querystring(Some("anything")); assert!(result.is_ok()); } + + #[test] + fn percent_decode_handles_multibyte_utf8() { + // %C3%A9 is UTF-8 for é; the old per-byte `as char` cast corrupted this. + assert_eq!(percent_decode("caf%C3%A9"), "café"); + // %E2%82%AC is the euro sign €. + assert_eq!(percent_decode("%E2%82%AC"), "€"); + // ASCII, plus-as-space, and passthrough. + assert_eq!(percent_decode("a%20b+c"), "a b c"); + // A malformed escape is left intact rather than dropped. + assert_eq!(percent_decode("100%"), "100%"); + assert_eq!(percent_decode("%zz"), "%zz"); + } + + #[test] + fn header_count_counts_duplicates() { + let limits = RequestLimits { + max_headers: 2, + ..Default::default() + }; + // A map collapses duplicate names, but the raw count is what matters. + assert!(limits.validate_header_count(2).is_ok()); + assert!(limits.validate_header_count(3).is_err()); + } + + #[test] + fn header_size_is_checked_per_entry() { + let limits = RequestLimits { + max_header_size: 10, + ..Default::default() + }; + assert!(limits.validate_header_size("x", 10).is_ok()); + assert!(limits.validate_header_size("x-big", 11).is_err()); + } } diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index c7b185f..719efb8 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -47,6 +47,16 @@ These changes are intentional secure defaults. Adopt them as follows: capabilities; custom plugins must list theirs under `[capabilities] host_functions = [...]`. +## Admin endpoints (loopback by default) + +The data plane serves `/health`, `/metrics`, and `/provenance` on a dedicated +admin port (`--admin-bind`). These endpoints are **unauthenticated** so metrics +scrapers can reach them; `/provenance` and `/metrics` expose build and +operational metadata. Keep the admin port bound to loopback (the default) or +behind a trusted network boundary. Binding it to a non-loopback address (e.g. +`--admin-bind 0.0.0.0:...`) logs a startup warning because it exposes that +metadata off-host. + ## Artifact signing quickstart ```bash