Skip to content

Commit 58ddf4a

Browse files
committed
fix(inference): sanitize pytorch kv truncate io errors
Route PyTorch KV-cache truncate temp-file read and write failures through the canonical pytorch_worker_kv_truncate_failed error path. Generate the truncate request id before temp-file IO so local IO failures preserve request correlation and use the shared worker-message sanitizer instead of ad hoc inference errors. Update the backend README and inference execution boundary plan with the KV truncate temp-file hygiene rule. Validation: cargo fmt --all; cargo test -p inference --features backend-pytorch pytorch_kv; cargo test -p inference --features backend-pytorch test_pytorch_worker_kv_truncate; cargo check -p inference --features backend-pytorch; cargo check -p inference --no-default-features; git diff --check.
1 parent 3e974b4 commit 58ddf4a

4 files changed

Lines changed: 49 additions & 12 deletions

File tree

crates/inference/src/backend/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,9 @@ fn create_backend() {
187187
malformed loaded-model/live-KV metadata through the canonical
188188
`pytorch_worker_kv_*_failed` paths so request ids and bounded diagnostics are
189189
preserved even when Python returns an unexpected shape.
190+
- PyTorch KV-cache truncate temp-file read/write failures must use the
191+
canonical `pytorch_worker_kv_truncate_failed` path so local temp paths are
192+
sanitized before they can reach backend or workflow diagnostics.
190193
- Backend-native generation fields and kwargs must stay inside backend-local
191194
mapping helpers. PyTorch maps canonical generation options to
192195
Transformers-style kwargs, while llama.cpp maps them to bounded

crates/inference/src/backend/pytorch.rs

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,10 @@ fn kv_worker_failure_from_message(
144144
.into_backend_error()
145145
}
146146

147+
fn kv_truncate_worker_failure_from_message(request_id: &str, message: String) -> BackendError {
148+
kv_worker_failure_from_message(request_id, "pytorch_worker_kv_truncate_failed", message)
149+
}
150+
147151
fn kv_loaded_info_unavailable_error(request_id: &str) -> BackendError {
148152
kv_worker_failure_from_message(
149153
request_id,
@@ -2106,21 +2110,25 @@ impl InferenceBackend for PyTorchBackend {
21062110
token_position: usize,
21072111
_active_config: Option<&BackendConfig>,
21082112
) -> Result<Vec<u8>, BackendError> {
2113+
let request_id = format!("pytorch-kv-truncate-{}", Uuid::new_v4().simple());
21092114
let temp_path = std::env::temp_dir().join(format!(
21102115
"pantograph-pytorch-kv-truncate-{}.bin",
21112116
uuid::Uuid::new_v4()
21122117
));
2113-
std::fs::write(&temp_path, data)
2114-
.map_err(|e| BackendError::Inference(format!("Failed to write KV temp file: {}", e)))?;
2118+
std::fs::write(&temp_path, data).map_err(|e| {
2119+
kv_truncate_worker_failure_from_message(
2120+
&request_id,
2121+
format!("Failed to write KV temp file: {e}"),
2122+
)
2123+
})?;
21152124
let truncate_result = tokio::task::spawn_blocking({
21162125
let temp_path = temp_path.clone();
2117-
let request_id = format!("pytorch-kv-truncate-{}", Uuid::new_v4().simple());
2126+
let request_id = request_id.clone();
21182127
move || {
21192128
Python::with_gil(|py| -> Result<(), BackendError> {
21202129
let worker = pytorch_worker::worker_module(py).map_err(|e| {
2121-
kv_worker_failure_from_message(
2130+
kv_truncate_worker_failure_from_message(
21222131
&request_id,
2123-
"pytorch_worker_kv_truncate_failed",
21242132
format!("Failed to get worker module: {}", e),
21252133
)
21262134
})?;
@@ -2130,9 +2138,8 @@ impl InferenceBackend for PyTorchBackend {
21302138
(temp_path.to_string_lossy().to_string(), token_position),
21312139
)
21322140
.map_err(|e| {
2133-
kv_worker_failure_from_message(
2141+
kv_truncate_worker_failure_from_message(
21342142
&request_id,
2135-
"pytorch_worker_kv_truncate_failed",
21362143
format!("PyTorch KV truncate failed: {}", e),
21372144
)
21382145
})?;
@@ -2142,8 +2149,12 @@ impl InferenceBackend for PyTorchBackend {
21422149
})
21432150
.await
21442151
.map_err(|e| BackendError::Inference(task_join_error_message(e)))?;
2145-
let read_result = std::fs::read(&temp_path)
2146-
.map_err(|e| BackendError::Inference(format!("Failed to read KV temp file: {}", e)));
2152+
let read_result = std::fs::read(&temp_path).map_err(|e| {
2153+
kv_truncate_worker_failure_from_message(
2154+
&request_id,
2155+
format!("Failed to read KV temp file: {e}"),
2156+
)
2157+
});
21472158
let _ = std::fs::remove_file(&temp_path);
21482159
truncate_result?;
21492160
read_result

crates/inference/src/backend/pytorch_tests.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2486,9 +2486,8 @@ fn test_pytorch_kv_live_info_malformed_result_normalizes_to_backend_error() {
24862486

24872487
#[test]
24882488
fn test_pytorch_worker_kv_truncate_transport_error_normalizes_to_backend_error() {
2489-
match kv_worker_failure_from_message(
2489+
match kv_truncate_worker_failure_from_message(
24902490
"req-kv-truncate",
2491-
"pytorch_worker_kv_truncate_failed",
24922491
"PyTorch KV truncate failed: invalid marker.".to_string(),
24932492
) {
24942493
BackendError::Inference(message) => {
@@ -2500,6 +2499,23 @@ fn test_pytorch_worker_kv_truncate_transport_error_normalizes_to_backend_error()
25002499
}
25012500
}
25022501

2502+
#[test]
2503+
fn test_pytorch_worker_kv_truncate_temp_file_errors_strip_local_paths() {
2504+
match kv_truncate_worker_failure_from_message(
2505+
"req-kv-truncate-temp",
2506+
"Failed to write KV temp file: Permission denied at /tmp/pantograph-pytorch-kv-truncate-private.bin".to_string(),
2507+
) {
2508+
BackendError::Inference(message) => {
2509+
assert!(message.contains("pytorch_worker_kv_truncate_failed"));
2510+
assert!(message.contains("req-kv-truncate-temp"));
2511+
assert!(message.contains("Failed to write KV temp file"));
2512+
assert!(message.contains("[local-path]"));
2513+
assert!(!message.contains("/tmp/pantograph-pytorch-kv-truncate-private.bin"));
2514+
}
2515+
other => panic!("expected Inference error, got {other:?}"),
2516+
}
2517+
}
2518+
25032519
#[test]
25042520
fn test_pytorch_worker_envelope_rejects_missing_required_fields() {
25052521
let fixture = include_str!(

docs/plans/inference-execution-boundary-contracts/plan.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1852,7 +1852,10 @@ using Python Transformers behind the boundary for broad HF-compatible support.
18521852
codes without exposing cache bytes or file paths in the canonical code.
18531853
PyTorch live-KV loaded-model and live-KV metadata extraction failures now
18541854
use the same canonical KV worker failure shape when Python returns no active
1855-
model or malformed KV metadata.
1855+
model or malformed KV metadata. PyTorch KV-cache truncation temp-file
1856+
read/write failures now also use the canonical
1857+
`pytorch_worker_kv_truncate_failed` path so local temp paths are sanitized
1858+
before backend errors can become workflow diagnostics.
18561859
PyTorch backend trait KV slot save/restore/clear/truncate worker failures
18571860
now use the same canonical KV worker failure shape.
18581861
Non-streaming PyTorch generate-text worker transport failures now also retain
@@ -3834,6 +3837,10 @@ Update during implementation:
38343837
envelope and typed response decoder with request-id correlation,
38353838
malformed-response rejection, and canonical sanitized unload errors before
38363839
clearing Rust-side loaded-model state.
3840+
- 2026-05-06: PyTorch KV-cache truncation temp-file read/write failures now
3841+
route through canonical `pytorch_worker_kv_truncate_failed` errors with a
3842+
generated request id and shared path sanitizer instead of ad hoc inference
3843+
errors that could expose local temp paths.
38373844
- 2026-05-05: Added append-only `ChatChunk.cache_handle_id` stream metadata and
38383845
threaded terminal text/chat cache-handle ids through typed gateway results,
38393846
backend-execution lifecycle completion events, and `llm-inference.kv_cache_out`

0 commit comments

Comments
 (0)