Skip to content

Commit f311d36

Browse files
fix(cache-memory): reject symlinks in agent memory to prevent Stage 3 credential theft (#524)
* Initial plan * fix(cache-memory): reject symlinks in agent memory to prevent Stage 3 credential theft Three-part defense against the symlink-following attack described in the security audit: 1. collect_files: use entry.file_type() (does NOT follow symlinks) instead of path.is_dir() when deciding whether to recurse. Both file symlinks and directory symlinks are now skipped with a warning, so they are never added to the file list at all. 2. process_agent_memory: canonicalize the memory source base directory once before the per-file loop, then canonicalize each source file path and verify it starts_with the canonical base. This TOCTOU guard catches any symlink that slips through the collection phase (e.g., via a race condition) before any read or copy takes place. 3. Tests (Unix-only): added four new #[cfg(unix)] async tests that create real symlinks on disk and confirm: - collect_files skips file symlinks - collect_files skips directory symlinks (no recursion) - process_agent_memory skips file symlinks pointing outside the base - process_agent_memory skips directory symlinks pointing outside the base Agent-Logs-Url: https://github.com/githubnext/ado-aw/sessions/efdc2afe-1050-4068-8433-cd9a60762981 Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com> * fix(cache-memory): address code review feedback on symlink fix - Add clarifying comment on else-if branch in collect_files explaining it is only reached for real (non-symlink) directories - Improve canonicalize error message to explain the security purpose - Add test_collect_files_skips_relative_symlinks covering the relative symlink attack vector (e.g. ../secret.txt) - Fix assertion style: 'must NOT' -> 'must not' Agent-Logs-Url: https://github.com/githubnext/ado-aw/sessions/efdc2afe-1050-4068-8433-cd9a60762981 Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com> * fix(cache-memory): reject symlink at agent_memory base directory level The existing fix guarded against symlinks *inside* agent_memory, but if the agent_memory entry itself is a directory symlink the old `is_dir()` check follows it (returning true), and canonicalize() resolves relative to the symlink target — so every collected file passes the starts_with guard and sensitive files are copied. Fix: replace `exists()/is_dir()` with `symlink_metadata()` (lstat) on memory_source and explicitly reject the symlink case before any further processing. Also adds test_process_memory_rejects_base_directory_symlink which plants a `agent_memory -> /sensitive/dir` symlink and asserts nothing is copied to the output. Agent-Logs-Url: https://github.com/githubnext/ado-aw/sessions/971a1892-4711-4142-98ad-d85574576689 Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com>
1 parent 13bfdeb commit f311d36

1 file changed

Lines changed: 259 additions & 8 deletions

File tree

src/tools/cache_memory/execute.rs

Lines changed: 259 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
//! During Stage 3 execution, this module validates and copies sanitized files
66
//! to the final safe_outputs artifact for pickup by the next run.
77
8-
use anyhow::{Result, ensure};
8+
use anyhow::{Context, Result, ensure};
99
use log::{debug, info, warn};
1010
use serde::Deserialize;
1111
use std::path::{Path, PathBuf};
@@ -124,13 +124,22 @@ async fn validate_memory_file_content(path: &Path) -> Result<bool> {
124124
}
125125
}
126126

127-
/// Recursively collect all files under a directory with their relative paths
127+
/// Recursively collect all files under a directory with their relative paths.
128+
///
129+
/// Symlinks (both file and directory) are skipped with a warning to prevent
130+
/// symlink-following attacks that could expose files outside the memory directory.
128131
async fn collect_files(base: &Path, current: &Path) -> Result<Vec<PathBuf>> {
129132
let mut files = Vec::new();
130133
let mut entries = tokio::fs::read_dir(current).await?;
131134
while let Some(entry) = entries.next_entry().await? {
132135
let path = entry.path();
133-
if path.is_dir() {
136+
// Use file_type() which does NOT follow symlinks (unlike path.is_dir()).
137+
let file_type = entry.file_type().await?;
138+
if file_type.is_symlink() {
139+
warn!("Skipping symlink in memory directory: {}", path.display());
140+
} else if file_type.is_dir() {
141+
// This branch is only reached when file_type.is_symlink() is false,
142+
// so `path` is guaranteed to be a real directory (not a symlink-to-dir).
134143
let mut sub_files = Box::pin(collect_files(base, &path)).await?;
135144
files.append(&mut sub_files);
136145
} else {
@@ -152,11 +161,26 @@ pub async fn process_agent_memory(
152161
) -> Result<ExecutionResult> {
153162
let memory_source = source_dir.join(AGENT_MEMORY_DIR);
154163

155-
if !memory_source.exists() || !memory_source.is_dir() {
156-
info!("No agent_memory directory found, skipping memory processing");
157-
return Ok(ExecutionResult::success(
158-
"No agent memory to process",
159-
));
164+
// Use symlink_metadata (lstat) so we do NOT follow a symlink at the base
165+
// directory level — a top-level `agent_memory -> /sensitive/dir` symlink
166+
// in the artifact would otherwise bypass all per-file containment checks.
167+
match tokio::fs::symlink_metadata(&memory_source).await {
168+
Err(_) => {
169+
info!("No agent_memory directory found, skipping memory processing");
170+
return Ok(ExecutionResult::success("No agent memory to process"));
171+
}
172+
Ok(m) if m.is_symlink() => {
173+
warn!(
174+
"agent_memory is a symlink — skipping to prevent directory escape: {}",
175+
memory_source.display()
176+
);
177+
return Ok(ExecutionResult::success("No agent memory to process"));
178+
}
179+
Ok(m) if !m.is_dir() => {
180+
info!("No agent_memory directory found, skipping memory processing");
181+
return Ok(ExecutionResult::success("No agent memory to process"));
182+
}
183+
Ok(_) => {} // real directory, proceed
160184
}
161185

162186
info!(
@@ -175,6 +199,16 @@ pub async fn process_agent_memory(
175199

176200
info!("Found {} file(s) in agent_memory", files.len());
177201

202+
// Canonicalize the base directory once for containment checks below.
203+
// This is required for the symlink-following defense: we compare canonical
204+
// resolved paths to ensure no file escapes the memory directory boundary.
205+
let canonical_base = tokio::fs::canonicalize(&memory_source).await.with_context(|| {
206+
format!(
207+
"Cannot verify memory directory containment (symlink defense requires canonical path): {}",
208+
memory_source.display()
209+
)
210+
})?;
211+
178212
let memory_output = output_dir.join(AGENT_MEMORY_DIR);
179213
let mut total_size: u64 = 0;
180214
let mut copied_count = 0;
@@ -184,6 +218,39 @@ pub async fn process_agent_memory(
184218
for relative_path in &files {
185219
let source_file = memory_source.join(relative_path);
186220

221+
// Defense-in-depth: verify the fully-resolved source path is still within
222+
// the memory base directory, guarding against TOCTOU symlink races.
223+
match tokio::fs::canonicalize(&source_file).await {
224+
Ok(canonical_source) => {
225+
if !canonical_source.starts_with(&canonical_base) {
226+
warn!(
227+
"Skipping path that escapes memory directory (possible symlink attack): {}",
228+
relative_path.display()
229+
);
230+
skipped_count += 1;
231+
skipped_reasons.push(format!(
232+
"{}: resolved path escapes memory directory",
233+
relative_path.display()
234+
));
235+
continue;
236+
}
237+
}
238+
Err(e) => {
239+
warn!(
240+
"Skipping unresolvable path '{}': {}",
241+
relative_path.display(),
242+
e
243+
);
244+
skipped_count += 1;
245+
skipped_reasons.push(format!(
246+
"{}: cannot resolve path ({})",
247+
relative_path.display(),
248+
e
249+
));
250+
continue;
251+
}
252+
}
253+
187254
// Validate path safety
188255
if let Err(e) = validate_memory_path(relative_path) {
189256
warn!("Skipping unsafe path '{}': {}", relative_path.display(), e);
@@ -485,4 +552,188 @@ allowed-extensions:
485552
let config: MemoryConfig = serde_yaml::from_str(yaml).unwrap();
486553
assert!(config.allowed_extensions.is_empty());
487554
}
555+
556+
// --- Symlink security tests (Unix only) ---
557+
558+
#[cfg(unix)]
559+
#[tokio::test]
560+
async fn test_collect_files_skips_file_symlinks() {
561+
let temp = tempfile::tempdir().unwrap();
562+
let mem_dir = temp.path().join(AGENT_MEMORY_DIR);
563+
std::fs::create_dir(&mem_dir).unwrap();
564+
565+
// A real file that should be collected
566+
std::fs::write(mem_dir.join("real.md"), "real content").unwrap();
567+
568+
// A target file sitting outside the memory directory
569+
let target = temp.path().join("secret.txt");
570+
std::fs::write(&target, "secret data").unwrap();
571+
572+
// Symlink inside mem_dir pointing to the outside target
573+
std::os::unix::fs::symlink(&target, mem_dir.join("symlink.txt")).unwrap();
574+
575+
let files = collect_files(&mem_dir, &mem_dir).await.unwrap();
576+
577+
// Only the real file should be collected; the symlink must be skipped
578+
assert_eq!(files.len(), 1, "symlink should be skipped");
579+
assert_eq!(files[0], std::path::PathBuf::from("real.md"));
580+
}
581+
582+
#[cfg(unix)]
583+
#[tokio::test]
584+
async fn test_collect_files_skips_relative_symlinks() {
585+
// Relative symlinks (e.g., ../../etc/passwd) are just as dangerous as
586+
// absolute ones and must also be rejected.
587+
let temp = tempfile::tempdir().unwrap();
588+
let mem_dir = temp.path().join(AGENT_MEMORY_DIR);
589+
std::fs::create_dir(&mem_dir).unwrap();
590+
591+
// A real file
592+
std::fs::write(mem_dir.join("real.md"), "real content").unwrap();
593+
594+
// Relative symlink: within mem_dir, "../secret.txt" points one level up
595+
let target = temp.path().join("secret.txt");
596+
std::fs::write(&target, "secret data").unwrap();
597+
std::os::unix::fs::symlink("../secret.txt", mem_dir.join("relative_link.txt")).unwrap();
598+
599+
let files = collect_files(&mem_dir, &mem_dir).await.unwrap();
600+
601+
// Only the real file should be collected; the relative symlink must be skipped
602+
assert_eq!(files.len(), 1, "relative symlink should be skipped");
603+
assert_eq!(files[0], std::path::PathBuf::from("real.md"));
604+
}
605+
606+
#[cfg(unix)]
607+
#[tokio::test]
608+
async fn test_collect_files_skips_directory_symlinks() {
609+
let temp = tempfile::tempdir().unwrap();
610+
let mem_dir = temp.path().join(AGENT_MEMORY_DIR);
611+
std::fs::create_dir(&mem_dir).unwrap();
612+
613+
// A target directory outside the memory dir containing a sensitive file
614+
let target_dir = temp.path().join("outside_dir");
615+
std::fs::create_dir(&target_dir).unwrap();
616+
std::fs::write(target_dir.join("secret.md"), "secret data").unwrap();
617+
618+
// Directory symlink inside mem_dir pointing to the outside directory
619+
std::os::unix::fs::symlink(&target_dir, mem_dir.join("linked_dir")).unwrap();
620+
621+
let files = collect_files(&mem_dir, &mem_dir).await.unwrap();
622+
623+
// The directory symlink must not be recursed into; nothing should be collected
624+
assert_eq!(files.len(), 0, "directory symlink should not be followed");
625+
}
626+
627+
#[cfg(unix)]
628+
#[tokio::test]
629+
async fn test_process_memory_skips_file_symlinks() {
630+
let temp = tempfile::tempdir().unwrap();
631+
let output = tempfile::tempdir().unwrap();
632+
let mem_dir = temp.path().join(AGENT_MEMORY_DIR);
633+
std::fs::create_dir(&mem_dir).unwrap();
634+
635+
// A legitimate file
636+
std::fs::write(mem_dir.join("notes.md"), "safe notes").unwrap();
637+
638+
// A sensitive file outside the memory dir (simulating /proc/self/environ)
639+
let sensitive = temp.path().join("environ");
640+
std::fs::write(&sensitive, "SECRET_TOKEN=hunter2\nOTHER=val").unwrap();
641+
642+
// Symlink inside the memory dir pointing at the sensitive file
643+
std::os::unix::fs::symlink(&sensitive, mem_dir.join("env.txt")).unwrap();
644+
645+
let config = MemoryConfig::default();
646+
let result = process_agent_memory(temp.path(), output.path(), &config)
647+
.await
648+
.unwrap();
649+
650+
assert!(result.success);
651+
// Only notes.md should be copied; the symlink is silently dropped by collect_files
652+
assert!(
653+
result.message.contains("1 file(s) copied"),
654+
"unexpected message: {}",
655+
result.message
656+
);
657+
658+
let out_memory = output.path().join(AGENT_MEMORY_DIR);
659+
assert!(out_memory.join("notes.md").exists(), "notes.md should be copied");
660+
assert!(!out_memory.join("env.txt").exists(), "symlink target must not be copied");
661+
662+
// The sensitive contents must not appear in the output at all
663+
let out_notes = std::fs::read_to_string(out_memory.join("notes.md")).unwrap();
664+
assert!(!out_notes.contains("SECRET_TOKEN"));
665+
}
666+
667+
#[cfg(unix)]
668+
#[tokio::test]
669+
async fn test_process_memory_skips_directory_symlinks() {
670+
let temp = tempfile::tempdir().unwrap();
671+
let output = tempfile::tempdir().unwrap();
672+
let mem_dir = temp.path().join(AGENT_MEMORY_DIR);
673+
std::fs::create_dir(&mem_dir).unwrap();
674+
675+
// A legitimate file
676+
std::fs::write(mem_dir.join("notes.md"), "safe notes").unwrap();
677+
678+
// A directory outside the memory dir with sensitive contents
679+
let outside_dir = temp.path().join("outside");
680+
std::fs::create_dir(&outside_dir).unwrap();
681+
std::fs::write(outside_dir.join("passwd"), "root:x:0:0").unwrap();
682+
683+
// Directory symlink inside the memory dir pointing outside
684+
std::os::unix::fs::symlink(&outside_dir, mem_dir.join("etc_backup")).unwrap();
685+
686+
let config = MemoryConfig::default();
687+
let result = process_agent_memory(temp.path(), output.path(), &config)
688+
.await
689+
.unwrap();
690+
691+
assert!(result.success);
692+
// Only notes.md should be copied
693+
assert!(
694+
result.message.contains("1 file(s) copied"),
695+
"unexpected message: {}",
696+
result.message
697+
);
698+
699+
let out_memory = output.path().join(AGENT_MEMORY_DIR);
700+
assert!(out_memory.join("notes.md").exists());
701+
// The outside directory must not have been recursed into
702+
assert!(!out_memory.join("etc_backup").exists());
703+
assert!(!out_memory.join("etc_backup").join("passwd").exists());
704+
}
705+
706+
#[cfg(unix)]
707+
#[tokio::test]
708+
async fn test_process_memory_rejects_base_directory_symlink() {
709+
// If the entire agent_memory entry is a symlink to an outside directory,
710+
// all per-file containment checks would resolve relative to that target,
711+
// so every file inside would "pass" the starts_with guard. The fix is to
712+
// reject agent_memory itself when it is a symlink (lstat check).
713+
let temp = tempfile::tempdir().unwrap();
714+
let output = tempfile::tempdir().unwrap();
715+
716+
// Sensitive directory outside the staging area
717+
let outside_dir = temp.path().join("sensitive");
718+
std::fs::create_dir(&outside_dir).unwrap();
719+
std::fs::write(outside_dir.join("environ"), "SC_WRITE_TOKEN=secret").unwrap();
720+
721+
// The whole agent_memory entry is a symlink to the sensitive directory
722+
std::os::unix::fs::symlink(&outside_dir, temp.path().join(AGENT_MEMORY_DIR)).unwrap();
723+
724+
let config = MemoryConfig::default();
725+
let result = process_agent_memory(temp.path(), output.path(), &config)
726+
.await
727+
.unwrap();
728+
729+
assert!(result.success);
730+
// Should be treated as "no memory" — nothing copied
731+
assert!(
732+
result.message.contains("No agent memory"),
733+
"unexpected message: {}",
734+
result.message
735+
);
736+
// Sensitive file must not appear in output
737+
assert!(!output.path().join(AGENT_MEMORY_DIR).join("environ").exists());
738+
}
488739
}

0 commit comments

Comments
 (0)