From 84353d7282d416b05db584e9f45bd2417dd8acc7 Mon Sep 17 00:00:00 2001 From: Pratyush Sharma <56130065+pratyush618@users.noreply.github.com> Date: Fri, 24 Apr 2026 15:19:10 +0530 Subject: [PATCH 1/2] fix(formats): bound ZIP entry reads and preallocations on untrusted input EPUB, PPTX, and DOCX are ZIP archives, and the format crates were reading every entry with unbounded `read_to_string` / `read_to_end`. A crafted archive could declare a tiny compressed size that expands to multi-GB, exhausting memory. Each zip-based crate now has a `safe_read` helper (or inline equivalent in DOCX's single metadata reader) that: - rejects entries whose declared size exceeds a per-entry cap (100 MB for EPUB/PPTX, 16 MB for DOCX's tiny `core.xml`) - uses `Read::take(cap + 1)` as defense in depth against archives that lie about their declared size XLSX and PPTX also sized `Vec::with_capacity` directly from attacker-controlled counts (workbook sheet list, archive slide listing); both now cap the preallocation at a sane ceiling. Tests cover the normal path and the oversized-entry rejection in paperjam-epub. --- crates/paperjam-docx/src/metadata.rs | 20 ++++- crates/paperjam-epub/src/error.rs | 3 + crates/paperjam-epub/src/lib.rs | 1 + crates/paperjam-epub/src/parser.rs | 38 ++------- crates/paperjam-epub/src/safe_read.rs | 115 ++++++++++++++++++++++++++ crates/paperjam-pptx/src/error.rs | 4 + crates/paperjam-pptx/src/lib.rs | 1 + crates/paperjam-pptx/src/metadata.rs | 12 ++- crates/paperjam-pptx/src/parser.rs | 22 ++--- crates/paperjam-pptx/src/safe_read.rs | 40 +++++++++ crates/paperjam-xlsx/src/reader.rs | 4 +- 11 files changed, 203 insertions(+), 57 deletions(-) create mode 100644 crates/paperjam-epub/src/safe_read.rs create mode 100644 crates/paperjam-pptx/src/safe_read.rs diff --git a/crates/paperjam-docx/src/metadata.rs b/crates/paperjam-docx/src/metadata.rs index 1ddb5eb..ed277f7 100644 --- a/crates/paperjam-docx/src/metadata.rs +++ b/crates/paperjam-docx/src/metadata.rs @@ -34,13 +34,29 @@ impl DocxDocument { } } -/// Read `docProps/core.xml` from the DOCX ZIP archive. +/// Per-entry decompressed byte limit when reading the DOCX archive. +/// `docProps/core.xml` is a tiny metadata file; anything claiming more than +/// this is almost certainly malicious. +const MAX_ENTRY_BYTES: u64 = 16 * 1024 * 1024; + +/// Read `docProps/core.xml` from the DOCX ZIP archive with a size cap. fn read_core_xml_from_zip(bytes: &[u8]) -> Option { let cursor = Cursor::new(bytes); let mut archive = zip::ZipArchive::new(cursor).ok()?; let mut file = archive.by_name("docProps/core.xml").ok()?; + + if file.size() > MAX_ENTRY_BYTES { + return None; + } + let mut contents = String::new(); - file.read_to_string(&mut contents).ok()?; + let read = (&mut file) + .take(MAX_ENTRY_BYTES + 1) + .read_to_string(&mut contents) + .ok()?; + if read as u64 > MAX_ENTRY_BYTES { + return None; + } Some(contents) } diff --git a/crates/paperjam-epub/src/error.rs b/crates/paperjam-epub/src/error.rs index 230b967..6cbb9c5 100644 --- a/crates/paperjam-epub/src/error.rs +++ b/crates/paperjam-epub/src/error.rs @@ -17,6 +17,9 @@ pub enum EpubError { #[error("invalid EPUB structure: {0}")] InvalidStructure(String), + + #[error("EPUB entry `{name}` is too large ({size} bytes, limit {limit})")] + EntryTooLarge { name: String, size: u64, limit: u64 }, } impl From for EpubError { diff --git a/crates/paperjam-epub/src/lib.rs b/crates/paperjam-epub/src/lib.rs index f222c6e..d85af44 100644 --- a/crates/paperjam-epub/src/lib.rs +++ b/crates/paperjam-epub/src/lib.rs @@ -4,6 +4,7 @@ mod image; mod markdown; mod metadata; pub mod parser; +mod safe_read; mod structure; mod table; mod text; diff --git a/crates/paperjam-epub/src/parser.rs b/crates/paperjam-epub/src/parser.rs index 0c621de..932e08b 100644 --- a/crates/paperjam-epub/src/parser.rs +++ b/crates/paperjam-epub/src/parser.rs @@ -1,11 +1,11 @@ use std::collections::HashMap; -use std::io::Read; use quick_xml::events::Event; use quick_xml::Reader; use crate::document::{ChapterData, EpubDocument, OpfMetadata, TocEntry}; use crate::error::{EpubError, Result}; +use crate::safe_read::{read_entry_bytes, read_entry_string}; use crate::toc; /// Parse an EPUB document from raw bytes. @@ -14,7 +14,7 @@ pub fn parse_epub(bytes: &[u8]) -> Result { let mut archive = zip::ZipArchive::new(cursor)?; // 1. Find the OPF path from container.xml. - let container_xml = read_zip_entry_string(&mut archive, "META-INF/container.xml")?; + let container_xml = read_entry_string(&mut archive, "META-INF/container.xml")?; let opf_path = parse_container_xml(&container_xml)?; let opf_base_dir = opf_path .rsplit_once('/') @@ -22,7 +22,7 @@ pub fn parse_epub(bytes: &[u8]) -> Result { .unwrap_or_default(); // 2. Parse OPF: metadata, manifest, spine. - let opf_xml = read_zip_entry_string(&mut archive, &opf_path)?; + let opf_xml = read_entry_string(&mut archive, &opf_path)?; let (opf_metadata, manifest, spine) = parse_opf(&opf_xml)?; // 3. Parse TOC. @@ -33,7 +33,7 @@ pub fn parse_epub(bytes: &[u8]) -> Result { for (idx, spine_idref) in spine.iter().enumerate() { if let Some(href) = manifest.get(spine_idref) { let full_path = resolve_path(&opf_base_dir, href); - match read_zip_entry_bytes(&mut archive, &full_path) { + match read_entry_bytes(&mut archive, &full_path) { Ok(html_bytes) => { let html_doc = paperjam_html::HtmlDocument::from_bytes(&html_bytes)?; let title = find_toc_title(&toc_entries, href); @@ -67,30 +67,6 @@ pub fn parse_epub(bytes: &[u8]) -> Result { // Helpers // --------------------------------------------------------------------------- -fn read_zip_entry_string( - archive: &mut zip::ZipArchive>, - name: &str, -) -> Result { - let mut file = archive - .by_name(name) - .map_err(|_| EpubError::MissingEntry(name.to_string()))?; - let mut buf = String::new(); - file.read_to_string(&mut buf)?; - Ok(buf) -} - -fn read_zip_entry_bytes( - archive: &mut zip::ZipArchive>, - name: &str, -) -> Result> { - let mut file = archive - .by_name(name) - .map_err(|_| EpubError::MissingEntry(name.to_string()))?; - let mut buf = Vec::new(); - file.read_to_end(&mut buf)?; - Ok(buf) -} - fn resolve_path(base_dir: &str, href: &str) -> String { if base_dir.is_empty() { href.to_string() @@ -284,7 +260,7 @@ fn parse_toc_from_manifest( for (id, href) in manifest { if id == "ncx" || href.ends_with(".ncx") { let full_path = resolve_path(opf_base_dir, href); - if let Ok(xml) = read_zip_entry_string(archive, &full_path) { + if let Ok(xml) = read_entry_string(archive, &full_path) { let entries = toc::parse_ncx(&xml); if !entries.is_empty() { return entries; @@ -297,7 +273,7 @@ fn parse_toc_from_manifest( for href in manifest.values() { if href.contains("nav") && (href.ends_with(".xhtml") || href.ends_with(".html")) { let full_path = resolve_path(opf_base_dir, href); - if let Ok(html_bytes) = read_zip_entry_bytes(archive, &full_path) { + if let Ok(html_bytes) = read_entry_bytes(archive, &full_path) { let entries = toc::parse_nav_xhtml(&html_bytes); if !entries.is_empty() { return entries; @@ -322,7 +298,7 @@ fn collect_images( let lower = href.to_ascii_lowercase(); if image_extensions.iter().any(|ext| lower.ends_with(ext)) { let full_path = resolve_path(opf_base_dir, href); - if let Ok(data) = read_zip_entry_bytes(archive, &full_path) { + if let Ok(data) = read_entry_bytes(archive, &full_path) { images.push((href.clone(), data)); } } diff --git a/crates/paperjam-epub/src/safe_read.rs b/crates/paperjam-epub/src/safe_read.rs new file mode 100644 index 0000000..167800d --- /dev/null +++ b/crates/paperjam-epub/src/safe_read.rs @@ -0,0 +1,115 @@ +//! Bounded readers for ZIP entries to mitigate decompression-bomb attacks. +//! +//! EPUB files are ZIP archives; malicious inputs can declare tiny compressed +//! sizes that expand to gigabytes. We cap the decompressed length on every +//! entry we pull out of the archive. + +use std::io::Read; + +use crate::error::{EpubError, Result}; + +/// Per-entry decompressed byte limit. Normal EPUB entries (XHTML chapters, +/// cover images, fonts) are comfortably under this. A document that exceeds +/// it is either pathological or malicious. +pub const MAX_ENTRY_BYTES: u64 = 100 * 1024 * 1024; + +pub fn read_entry_string( + archive: &mut zip::ZipArchive>, + name: &str, +) -> Result { + let mut entry = archive + .by_name(name) + .map_err(|_| EpubError::MissingEntry(name.to_string()))?; + + let declared = entry.size(); + if declared > MAX_ENTRY_BYTES { + return Err(EpubError::EntryTooLarge { + name: name.to_string(), + size: declared, + limit: MAX_ENTRY_BYTES, + }); + } + + let mut buf = String::new(); + let read = (&mut entry) + .take(MAX_ENTRY_BYTES + 1) + .read_to_string(&mut buf)?; + if read as u64 > MAX_ENTRY_BYTES { + return Err(EpubError::EntryTooLarge { + name: name.to_string(), + size: read as u64, + limit: MAX_ENTRY_BYTES, + }); + } + Ok(buf) +} + +pub fn read_entry_bytes( + archive: &mut zip::ZipArchive>, + name: &str, +) -> Result> { + let mut entry = archive + .by_name(name) + .map_err(|_| EpubError::MissingEntry(name.to_string()))?; + + let declared = entry.size(); + if declared > MAX_ENTRY_BYTES { + return Err(EpubError::EntryTooLarge { + name: name.to_string(), + size: declared, + limit: MAX_ENTRY_BYTES, + }); + } + + let mut buf = Vec::new(); + let read = (&mut entry) + .take(MAX_ENTRY_BYTES + 1) + .read_to_end(&mut buf)?; + if read as u64 > MAX_ENTRY_BYTES { + return Err(EpubError::EntryTooLarge { + name: name.to_string(), + size: read as u64, + limit: MAX_ENTRY_BYTES, + }); + } + Ok(buf) +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::io::{Cursor, Write}; + + fn build_archive_with_entry(name: &str, contents: &[u8]) -> Vec { + let mut buf = Vec::new(); + { + let mut w = zip::ZipWriter::new(Cursor::new(&mut buf)); + w.start_file::<_, ()>(name, zip::write::SimpleFileOptions::default()) + .unwrap(); + w.write_all(contents).unwrap(); + w.finish().unwrap(); + } + buf + } + + #[test] + fn small_entry_reads_normally() { + let bytes = build_archive_with_entry("hello.txt", b"hello world"); + let bytes_slice: &[u8] = &bytes; + let mut archive = zip::ZipArchive::new(Cursor::new(bytes_slice)).unwrap(); + let s = read_entry_string(&mut archive, "hello.txt").unwrap(); + assert_eq!(s, "hello world"); + } + + #[test] + fn oversized_entry_is_rejected_by_declared_size() { + // Build an archive whose single entry exceeds the per-entry cap. + let blob = vec![b'a'; (MAX_ENTRY_BYTES as usize) + 1]; + let bytes = build_archive_with_entry("big.bin", &blob); + let bytes_slice: &[u8] = &bytes; + let mut archive = zip::ZipArchive::new(Cursor::new(bytes_slice)).unwrap(); + let err = read_entry_bytes(&mut archive, "big.bin").unwrap_err(); + assert!(matches!(err, EpubError::EntryTooLarge { .. })); + } +} diff --git a/crates/paperjam-pptx/src/error.rs b/crates/paperjam-pptx/src/error.rs index f07b261..4217ad9 100644 --- a/crates/paperjam-pptx/src/error.rs +++ b/crates/paperjam-pptx/src/error.rs @@ -20,6 +20,10 @@ pub enum PptxError { /// The PPTX structure is invalid or unsupported. #[error("invalid PPTX structure: {0}")] InvalidStructure(String), + + /// A ZIP entry exceeded the per-entry decompressed byte limit. + #[error("PPTX entry `{name}` is too large ({size} bytes, limit {limit})")] + EntryTooLarge { name: String, size: u64, limit: u64 }, } impl From for PptxError { diff --git a/crates/paperjam-pptx/src/lib.rs b/crates/paperjam-pptx/src/lib.rs index 7f81a03..7fe2f6b 100644 --- a/crates/paperjam-pptx/src/lib.rs +++ b/crates/paperjam-pptx/src/lib.rs @@ -3,6 +3,7 @@ pub mod error; pub mod markdown; pub mod metadata; pub mod parser; +mod safe_read; pub mod structure; pub mod table; pub mod text; diff --git a/crates/paperjam-pptx/src/metadata.rs b/crates/paperjam-pptx/src/metadata.rs index bef6632..4be3080 100644 --- a/crates/paperjam-pptx/src/metadata.rs +++ b/crates/paperjam-pptx/src/metadata.rs @@ -1,4 +1,5 @@ use crate::error::{PptxError, Result}; +use crate::safe_read::read_entry_string; use quick_xml::events::Event; use quick_xml::Reader; use std::io::Read; @@ -38,13 +39,10 @@ impl PptxMetadata { pub fn parse_metadata( archive: &mut ZipArchive, ) -> Result { - let xml = match archive.by_name("docProps/core.xml") { - Ok(mut entry) => { - let mut buf = String::new(); - entry.read_to_string(&mut buf)?; - buf - } - Err(_) => return Ok(PptxMetadata::default()), + let xml = match read_entry_string(archive, "docProps/core.xml") { + Ok(buf) => buf, + Err(PptxError::MissingEntry(_)) => return Ok(PptxMetadata::default()), + Err(e) => return Err(e), }; let mut reader = Reader::from_str(&xml); diff --git a/crates/paperjam-pptx/src/parser.rs b/crates/paperjam-pptx/src/parser.rs index 4f6d73f..acd7f01 100644 --- a/crates/paperjam-pptx/src/parser.rs +++ b/crates/paperjam-pptx/src/parser.rs @@ -1,6 +1,7 @@ use crate::document::{PptxDocument, SlideData, TextBlock}; use crate::error::{PptxError, Result}; use crate::metadata; +use crate::safe_read::read_entry_string; use paperjam_model::table::{Cell, Row, Table, TableStrategy}; use quick_xml::events::Event; use quick_xml::Reader; @@ -15,11 +16,13 @@ pub fn parse_pptx(bytes: &[u8]) -> Result { let meta = metadata::parse_metadata(&mut archive)?; let slide_names = find_slide_files(&archive); - let mut slides = Vec::with_capacity(slide_names.len()); + // Cap the initial allocation — slide_names comes from the archive + // listing and is attacker-controlled. + let mut slides = Vec::with_capacity(slide_names.len().min(4096)); for (idx, name) in slide_names.iter().enumerate() { - let slide_xml = read_zip_entry(&mut archive, name)?; + let slide_xml = read_entry_string(&mut archive, name)?; let notes_path = format!("ppt/notesSlides/notesSlide{}.xml", idx + 1); - let notes_xml = read_zip_entry(&mut archive, ¬es_path).ok(); + let notes_xml = read_entry_string(&mut archive, ¬es_path).ok(); let slide = parse_slide(&slide_xml, idx + 1, notes_xml.as_deref())?; slides.push(slide); } @@ -59,19 +62,6 @@ fn extract_slide_number(name: &str) -> usize { stem.parse::().unwrap_or(usize::MAX) } -/// Read a ZIP entry by path and return its contents as a UTF-8 string. -fn read_zip_entry( - archive: &mut ZipArchive, - path: &str, -) -> Result { - let mut entry = archive - .by_name(path) - .map_err(|_| PptxError::MissingEntry(path.to_string()))?; - let mut buf = String::new(); - entry.read_to_string(&mut buf)?; - Ok(buf) -} - // --------------------------------------------------------------------------- // Slide XML parsing // --------------------------------------------------------------------------- diff --git a/crates/paperjam-pptx/src/safe_read.rs b/crates/paperjam-pptx/src/safe_read.rs new file mode 100644 index 0000000..2a2a3b0 --- /dev/null +++ b/crates/paperjam-pptx/src/safe_read.rs @@ -0,0 +1,40 @@ +//! Bounded readers for ZIP entries to mitigate decompression-bomb attacks. + +use std::io::{Read, Seek}; + +use crate::error::{PptxError, Result}; + +/// Per-entry decompressed byte limit. Typical slide XML is tens of KB; even +/// large decks rarely exceed a few MB per entry. +pub const MAX_ENTRY_BYTES: u64 = 100 * 1024 * 1024; + +pub fn read_entry_string( + archive: &mut zip::ZipArchive, + name: &str, +) -> Result { + let mut entry = archive + .by_name(name) + .map_err(|_| PptxError::MissingEntry(name.to_string()))?; + + let declared = entry.size(); + if declared > MAX_ENTRY_BYTES { + return Err(PptxError::EntryTooLarge { + name: name.to_string(), + size: declared, + limit: MAX_ENTRY_BYTES, + }); + } + + let mut buf = String::new(); + let read = (&mut entry) + .take(MAX_ENTRY_BYTES + 1) + .read_to_string(&mut buf)?; + if read as u64 > MAX_ENTRY_BYTES { + return Err(PptxError::EntryTooLarge { + name: name.to_string(), + size: read as u64, + limit: MAX_ENTRY_BYTES, + }); + } + Ok(buf) +} diff --git a/crates/paperjam-xlsx/src/reader.rs b/crates/paperjam-xlsx/src/reader.rs index 863ac91..3bd69b8 100644 --- a/crates/paperjam-xlsx/src/reader.rs +++ b/crates/paperjam-xlsx/src/reader.rs @@ -9,7 +9,9 @@ pub fn read_xlsx(bytes: &[u8]) -> Result { let mut workbook: Xlsx<_> = Xlsx::new(cursor)?; let sheet_names = workbook.sheet_names().to_vec(); - let mut sheets = Vec::with_capacity(sheet_names.len()); + // Cap the initial allocation — sheet_names comes from the workbook + // header and is attacker-controlled. + let mut sheets = Vec::with_capacity(sheet_names.len().min(1024)); for name in &sheet_names { if let Ok(range) = workbook.worksheet_range(name) { From 5c4b7621065d78bf794ef68a81167f2ee4f237cf Mon Sep 17 00:00:00 2001 From: Pratyush Sharma <56130065+pratyush618@users.noreply.github.com> Date: Fri, 24 Apr 2026 15:19:27 +0530 Subject: [PATCH 2/2] feat(mcp): sandbox resolved paths to the working directory by default `resolve_path` previously accepted any absolute path and did no `..` normalisation, so an MCP client could read or write anywhere the server process had access. The server is typically launched by a local assistant with the caller's full user privileges, so this was an open-ended filesystem primitive. resolve_path now: - canonicalises both the working directory and the candidate path (falling back to canonicalising the nearest existing ancestor so that save operations to new files still work) - rejects any resolved path that is not contained within the working directory, with a structured `McpError::PathEscapesSandbox` error The CLI gains `--allow-absolute-paths` for users who relied on the old permissive behaviour, paired with a new `ServerConfig` / `PaperjamServer::with_config` constructor so callers can make the same choice programmatically. Defaults are safe; opting out is explicit. Tests cover the allowed, rejected, traversal, non-existent-child, and opt-out cases. --- crates/paperjam-mcp/src/error.rs | 3 + crates/paperjam-mcp/src/lib.rs | 163 +++++++++++++++++++++++++++++-- crates/paperjam-mcp/src/main.rs | 16 ++- 3 files changed, 168 insertions(+), 14 deletions(-) diff --git a/crates/paperjam-mcp/src/error.rs b/crates/paperjam-mcp/src/error.rs index 0dcd3ba..0dc3e50 100644 --- a/crates/paperjam-mcp/src/error.rs +++ b/crates/paperjam-mcp/src/error.rs @@ -15,6 +15,9 @@ pub enum McpError { #[error("pipeline error: {0}")] Pipeline(#[from] paperjam_pipeline::PipelineError), + #[error("path `{0}` escapes the working directory; pass --allow-absolute-paths to the server to opt out of sandboxing")] + PathEscapesSandbox(String), + #[error("{0}")] Other(String), } diff --git a/crates/paperjam-mcp/src/lib.rs b/crates/paperjam-mcp/src/lib.rs index cd8790f..c65a1c7 100644 --- a/crates/paperjam-mcp/src/lib.rs +++ b/crates/paperjam-mcp/src/lib.rs @@ -1,7 +1,7 @@ pub mod error; pub mod session; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; use rmcp::handler::server::router::tool::ToolRouter; @@ -11,30 +11,115 @@ use rmcp::{tool, tool_handler, tool_router, ServerHandler}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use crate::error::McpError; use crate::session::SessionManager; +/// Configuration for the MCP server. +#[derive(Debug, Clone)] +pub struct ServerConfig { + /// Root directory for relative paths. Also acts as the sandbox root unless + /// `allow_absolute_paths` is set. + pub working_dir: PathBuf, + /// If true, disable sandbox containment and allow paths that point outside + /// `working_dir`. Default: false. + pub allow_absolute_paths: bool, +} + /// The paperjam MCP server. pub struct PaperjamServer { sessions: Arc>, working_dir: PathBuf, + working_dir_canonical: Option, + allow_absolute_paths: bool, tool_router: ToolRouter, } impl PaperjamServer { + /// Construct a server rooted at `working_dir` with sandboxing enabled. pub fn new(working_dir: PathBuf) -> Self { + Self::with_config(ServerConfig { + working_dir, + allow_absolute_paths: false, + }) + } + + pub fn with_config(config: ServerConfig) -> Self { + let working_dir_canonical = config.working_dir.canonicalize().ok(); Self { sessions: Arc::new(Mutex::new(SessionManager::new())), - working_dir, + working_dir: config.working_dir, + working_dir_canonical, + allow_absolute_paths: config.allow_absolute_paths, tool_router: Self::tool_router(), } } - fn resolve_path(&self, path: &str) -> PathBuf { - let p = PathBuf::from(path); - if p.is_absolute() { - p - } else { - self.working_dir.join(p) + /// Resolve a user-supplied path against the server's working directory. + /// + /// Unless `allow_absolute_paths` is set, the result must be contained + /// within `working_dir`. A non-existent target (common for save + /// operations) is accepted if its nearest existing ancestor is within + /// the sandbox. + fn resolve_path(&self, path: &str) -> Result { + let candidate = { + let p = PathBuf::from(path); + if p.is_absolute() { + p + } else { + self.working_dir.join(p) + } + }; + + if self.allow_absolute_paths { + return Ok(candidate); + } + + let root = self + .working_dir_canonical + .as_deref() + .unwrap_or(self.working_dir.as_path()); + + // Canonicalize as much of the candidate as exists on disk, then + // append any non-existent tail so that `..` components are resolved + // before the containment check. + let checked = canonicalize_with_fallback(&candidate) + .ok_or_else(|| McpError::PathEscapesSandbox(path.to_string()))?; + + if !checked.starts_with(root) { + return Err(McpError::PathEscapesSandbox(path.to_string())); + } + + Ok(checked) + } +} + +/// Walk up `path` until an existing ancestor is found, canonicalize it, then +/// re-append the non-existent tail. Returns `None` if no ancestor exists. +fn canonicalize_with_fallback(path: &Path) -> Option { + if let Ok(c) = path.canonicalize() { + return Some(c); + } + let mut tail: Vec<&std::ffi::OsStr> = Vec::new(); + let mut cur = path; + loop { + if let Some(name) = cur.file_name() { + tail.push(name); + } + match cur.parent() { + Some(parent) => { + if let Ok(canon) = parent.canonicalize() { + let mut out = canon; + for name in tail.iter().rev() { + out.push(name); + } + return Some(out); + } + cur = parent; + if cur.as_os_str().is_empty() { + return None; + } + } + None => return None, } } } @@ -127,7 +212,10 @@ impl PaperjamServer { description = "Open a document from a file path. Supports PDF, DOCX, XLSX, PPTX, HTML, EPUB. Returns a session ID for subsequent operations." )] async fn open_document(&self, params: Parameters) -> String { - let path = self.resolve_path(¶ms.0.path); + let path = match self.resolve_path(¶ms.0.path) { + Ok(p) => p, + Err(e) => return format!("Error: {}", e), + }; match self.sessions.lock().unwrap().open_from_path(&path) { Ok(session_id) => { let sessions = self.sessions.lock().unwrap(); @@ -428,7 +516,10 @@ impl PaperjamServer { } }; - let path = self.resolve_path(¶ms.0.output_path); + let path = match self.resolve_path(¶ms.0.output_path) { + Ok(p) => p, + Err(e) => return format!("Error: {}", e), + }; if let Some(parent) = path.parent() { if let Err(e) = std::fs::create_dir_all(parent) { return format!("Error: {}", e); @@ -469,3 +560,55 @@ impl PaperjamServer { #[tool_handler(router = self.tool_router)] impl ServerHandler for PaperjamServer {} + +#[cfg(test)] +mod tests { + use super::*; + + fn server(tmp: &Path, allow_absolute: bool) -> PaperjamServer { + PaperjamServer::with_config(ServerConfig { + working_dir: tmp.to_path_buf(), + allow_absolute_paths: allow_absolute, + }) + } + + #[test] + fn relative_path_inside_sandbox_is_allowed() { + let tmp = std::env::temp_dir().canonicalize().unwrap(); + let s = server(&tmp, false); + let resolved = s.resolve_path("some_file.pdf").unwrap(); + assert!(resolved.starts_with(&tmp)); + } + + #[test] + fn absolute_path_outside_sandbox_is_rejected() { + let tmp = std::env::temp_dir().canonicalize().unwrap(); + let s = server(&tmp, false); + let err = s.resolve_path("/etc/passwd").unwrap_err(); + assert!(matches!(err, McpError::PathEscapesSandbox(_))); + } + + #[test] + fn parent_traversal_is_rejected() { + let tmp = std::env::temp_dir().canonicalize().unwrap(); + let s = server(&tmp, false); + let err = s.resolve_path("../../../etc/passwd").unwrap_err(); + assert!(matches!(err, McpError::PathEscapesSandbox(_))); + } + + #[test] + fn nonexistent_child_path_is_accepted() { + let tmp = std::env::temp_dir().canonicalize().unwrap(); + let s = server(&tmp, false); + let resolved = s.resolve_path("does_not_exist_yet.pdf").unwrap(); + assert!(resolved.starts_with(&tmp)); + } + + #[test] + fn allow_absolute_flag_bypasses_containment_check() { + let tmp = std::env::temp_dir().canonicalize().unwrap(); + let s = server(&tmp, true); + let resolved = s.resolve_path("/etc/passwd").unwrap(); + assert_eq!(resolved, PathBuf::from("/etc/passwd")); + } +} diff --git a/crates/paperjam-mcp/src/main.rs b/crates/paperjam-mcp/src/main.rs index 212ddf5..f223b31 100644 --- a/crates/paperjam-mcp/src/main.rs +++ b/crates/paperjam-mcp/src/main.rs @@ -2,14 +2,22 @@ use rmcp::ServiceExt; #[tokio::main] async fn main() -> Result<(), Box> { + let args: Vec = std::env::args().collect(); + // Determine working directory from args or current directory. - let working_dir = std::env::args() - .skip_while(|a| a != "--working-dir") - .nth(1) + let working_dir = args + .iter() + .position(|a| a == "--working-dir") + .and_then(|i| args.get(i + 1)) .map(std::path::PathBuf::from) .unwrap_or_else(|| std::env::current_dir().unwrap_or_default()); - let server = paperjam_mcp::PaperjamServer::new(working_dir); + let allow_absolute_paths = args.iter().any(|a| a == "--allow-absolute-paths"); + + let server = paperjam_mcp::PaperjamServer::with_config(paperjam_mcp::ServerConfig { + working_dir, + allow_absolute_paths, + }); // Start stdio transport. let transport = (tokio::io::stdin(), tokio::io::stdout());