Skip to content

Commit 9ab8af2

Browse files
authored
fix(socket): round event-payload log truncation to UTF-8 boundary (tinyhumansai#1818)
1 parent 9eee92e commit 9ab8af2

1 file changed

Lines changed: 58 additions & 6 deletions

File tree

src/openhuman/socket/event_handlers.rs

Lines changed: 58 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,17 @@ pub(super) fn handle_sio_event(
2727
shared: &Arc<SharedState>,
2828
) {
2929
// Log every incoming event for observability.
30+
// Payload content is intentionally omitted from logs — webhook bodies,
31+
// channel messages, and Composio trigger payloads can carry PII, secrets,
32+
// or auth tokens. The byte-length alone is sufficient for diagnosing
33+
// truncation and throughput issues without exposing raw content.
34+
let payload = data.to_string();
3035
log::info!(
3136
"[socket] event received: name={} data_bytes={}",
3237
event_name,
33-
data.to_string().len()
34-
);
35-
log::debug!(
36-
"[socket] event payload: name={} data={}",
37-
event_name,
38-
&data.to_string()[..data.to_string().len().min(500)]
38+
payload.len()
3939
);
40+
log::debug!("[socket] event dispatch: name={}", event_name);
4041

4142
match event_name {
4243
"ready" => {
@@ -392,4 +393,55 @@ mod tests {
392393
// Must not panic — error path just logs.
393394
emit_via_channel(&tx, "ping", json!({}));
394395
}
396+
397+
// Regression: OPENHUMAN-TAURI-KC (#1814). A multi-byte UTF-8 char
398+
// straddling byte 500 of `data.to_string()` used to panic the debug-log
399+
// truncator with `byte index 500 is not a char boundary`, killing the
400+
// core thread on every receipt of such an event.
401+
//
402+
// The fix: payload content is never emitted in any log line (PII/secrets
403+
// policy). The raw payload bytes are therefore never sliced at a byte
404+
// index that may not be a UTF-8 boundary. This test:
405+
// 1. Constructs a fixture that would have triggered the old panic.
406+
// 2. Verifies `handle_sio_event` completes without panicking.
407+
// 3. Verifies the debug-log format string for the pre-match lines does
408+
// NOT include any payload slice — confirmed structurally by the code
409+
// review and enforced at the type level (the `payload` binding is
410+
// only used via `.len()` after this change).
411+
#[test]
412+
fn handle_sio_event_payload_redacted_no_panic_on_multibyte_boundary() {
413+
// Build a payload whose JSON serialization places the 2-byte Cyrillic
414+
// `'н'` exactly at bytes 499..501. `json!({"data": <s>}).to_string()`
415+
// emits `{"data":"<s>"}`, so the 9-byte prefix `{"data":"` plus 490
416+
// ASCII bytes lands the next char at byte 499.
417+
let mut s = "a".repeat(490);
418+
s.push('н'); // 2 bytes — straddles byte 500
419+
s.push_str(&"b".repeat(20)); // trailing pad past the 500-byte cap
420+
let payload = json!({ "data": s });
421+
let serialized = payload.to_string();
422+
assert!(
423+
serialized.len() > 500,
424+
"fixture must exceed the 500-byte boundary"
425+
);
426+
assert!(
427+
!serialized.is_char_boundary(500),
428+
"fixture must place a multi-byte char across byte 500"
429+
);
430+
431+
// Confirm that the payload string, if sliced at byte 500, would panic —
432+
// i.e. that the old code really was broken for this input.
433+
let would_panic = std::panic::catch_unwind(|| {
434+
let _ = &serialized[..500];
435+
});
436+
assert!(
437+
would_panic.is_err(),
438+
"slice at byte 500 should panic for this fixture (validates the fixture itself)"
439+
);
440+
441+
let shared = make_shared();
442+
let (tx, _rx) = mpsc::unbounded_channel::<String>();
443+
// Any event name exercises the pre-match log path. Must not panic.
444+
handle_sio_event("anything.unhandled", payload, &tx, &shared);
445+
assert_eq!(*shared.status.read(), ConnectionStatus::Disconnected);
446+
}
395447
}

0 commit comments

Comments
 (0)