Skip to content

Commit 1b953bf

Browse files
jamesadevineCopilot
andcommitted
feat(safeoutputs): surface skipped symlinks in PR description and tighten patch check
Addresses PR #549 review feedback ("Address bugs and suggestions"): 1. Skipped symlinks are no longer invisible to the PR author. collect_changes_from_worktree and collect_changes_from_diff_tree now return (Vec<changes>, Vec<skipped_symlink_paths>). When non-empty, an explicit `> [!WARNING]` block listing each skipped path is appended to the PR description before posting, so the agent can see that some intended file content was deliberately dropped for safety. The Stage 3 warn! log is also retained for infrastructure observability. 2. Tightened patch-validation against suffix bypass. validate_patch_paths now compares the trimmed line for exact equality with "new file mode 120000" / "new mode 120000", rather than using starts_with. This eliminates any ambiguity from hypothetical future mode strings sharing the 120000 prefix, while still catching the real attack vectors. A new test exercises trailing whitespace and patch-body content containing '120000' to confirm neither bypasses nor false-rejects occur. 3. New tests: - test_validate_patch_paths_symlink_mode_suffix_not_bypass - test_append_skipped_symlink_notice_empty_is_passthrough - test_append_skipped_symlink_notice_lists_paths All 48 create_pull_request tests pass; full bin suite (1510 tests) green; cargo clippy clean. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 14ab2c1 commit 1b953bf

1 file changed

Lines changed: 155 additions & 20 deletions

File tree

src/safeoutputs/create_pull_request.rs

Lines changed: 155 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -955,12 +955,19 @@ impl Executor for CreatePrResult {
955955
};
956956

957957
debug!("Change detection output:\n{}", status_str);
958-
let changes = if use_diff_tree {
958+
let (changes, skipped_symlinks) = if use_diff_tree {
959959
collect_changes_from_diff_tree(&worktree_path, &status_str).await?
960960
} else {
961961
collect_changes_from_worktree(&worktree_path, &status_str).await?
962962
};
963963
debug!("Collected {} file changes for push", changes.len());
964+
if !skipped_symlinks.is_empty() {
965+
warn!(
966+
"Skipped {} symlink(s) when collecting PR file changes: {}",
967+
skipped_symlinks.len(),
968+
skipped_symlinks.join(", ")
969+
);
970+
}
964971

965972
if changes.is_empty() {
966973
// Handle no-changes based on config
@@ -1189,12 +1196,19 @@ impl Executor for CreatePrResult {
11891196

11901197
// Append agent stats then provenance footer to description.
11911198
// Footer goes last as the final unambiguous provenance marker.
1199+
// If any symlinks were skipped during file collection, surface that in the
1200+
// PR description so the agent/PR author can see that some intended file
1201+
// content was dropped for safety (otherwise the warning only appears in
1202+
// Stage 3 infrastructure logs).
11921203
let description_with_stats = crate::agent_stats::append_stats_to_body(
11931204
&self.description,
11941205
ctx,
11951206
config.include_stats,
11961207
);
1197-
let description_final = format!("{}{}", description_with_stats, generate_pr_footer());
1208+
let description_with_symlink_notice =
1209+
append_skipped_symlink_notice(&description_with_stats, &skipped_symlinks);
1210+
let description_final =
1211+
format!("{}{}", description_with_symlink_notice, generate_pr_footer());
11981212

11991213
// Create the pull request via REST API
12001214
info!("Creating pull request");
@@ -1623,8 +1637,9 @@ async fn add_reviewers_to_pr(
16231637
async fn collect_changes_from_worktree(
16241638
worktree_path: &std::path::Path,
16251639
status_output: &str,
1626-
) -> anyhow::Result<Vec<serde_json::Value>> {
1640+
) -> anyhow::Result<(Vec<serde_json::Value>, Vec<String>)> {
16271641
let mut changes = Vec::new();
1642+
let mut skipped_symlinks: Vec<String> = Vec::new();
16281643

16291644
for line in status_output.lines() {
16301645
if line.len() < 3 {
@@ -1656,13 +1671,25 @@ async fn collect_changes_from_worktree(
16561671
}
16571672
// New/untracked files
16581673
"??" | "A " | " A" | "AM" => {
1659-
push_file_change_skipping_symlinks(&mut changes, "add", file_path, &full_path)
1660-
.await?;
1674+
push_file_change_skipping_symlinks(
1675+
&mut changes,
1676+
&mut skipped_symlinks,
1677+
"add",
1678+
file_path,
1679+
&full_path,
1680+
)
1681+
.await?;
16611682
}
16621683
// Modified files
16631684
" M" | "M " | "MM" => {
1664-
push_file_change_skipping_symlinks(&mut changes, "edit", file_path, &full_path)
1665-
.await?;
1685+
push_file_change_skipping_symlinks(
1686+
&mut changes,
1687+
&mut skipped_symlinks,
1688+
"edit",
1689+
file_path,
1690+
&full_path,
1691+
)
1692+
.await?;
16661693
}
16671694
// Renamed files - format is "R old_path -> new_path"
16681695
// For "RM" (renamed + modified), we emit both a rename and an edit change.
@@ -1688,6 +1715,7 @@ async fn collect_changes_from_worktree(
16881715
let new_full_path = worktree_path.join(new_path_trimmed);
16891716
push_file_change_skipping_symlinks(
16901717
&mut changes,
1718+
&mut skipped_symlinks,
16911719
"edit",
16921720
new_path_trimmed,
16931721
&new_full_path,
@@ -1698,13 +1726,19 @@ async fn collect_changes_from_worktree(
16981726
}
16991727
// Other statuses - try to handle as edit if file exists
17001728
_ => {
1701-
push_file_change_skipping_symlinks(&mut changes, "edit", file_path, &full_path)
1702-
.await?;
1729+
push_file_change_skipping_symlinks(
1730+
&mut changes,
1731+
&mut skipped_symlinks,
1732+
"edit",
1733+
file_path,
1734+
&full_path,
1735+
)
1736+
.await?;
17031737
}
17041738
}
17051739
}
17061740

1707-
Ok(changes)
1741+
Ok((changes, skipped_symlinks))
17081742
}
17091743

17101744
/// Collect file changes from a git diff-tree --name-status output.
@@ -1714,8 +1748,9 @@ async fn collect_changes_from_worktree(
17141748
async fn collect_changes_from_diff_tree(
17151749
worktree_path: &std::path::Path,
17161750
diff_tree_output: &str,
1717-
) -> anyhow::Result<Vec<serde_json::Value>> {
1751+
) -> anyhow::Result<(Vec<serde_json::Value>, Vec<String>)> {
17181752
let mut changes = Vec::new();
1753+
let mut skipped_symlinks: Vec<String> = Vec::new();
17191754

17201755
for line in diff_tree_output.lines() {
17211756
let line = line.trim();
@@ -1746,7 +1781,14 @@ async fn collect_changes_from_diff_tree(
17461781
}));
17471782
} else if status_code == "A" {
17481783
// Added file
1749-
push_file_change_skipping_symlinks(&mut changes, "add", file_path, &full_path).await?;
1784+
push_file_change_skipping_symlinks(
1785+
&mut changes,
1786+
&mut skipped_symlinks,
1787+
"add",
1788+
file_path,
1789+
&full_path,
1790+
)
1791+
.await?;
17501792
} else if status_code.starts_with('R') && parts.len() >= 3 {
17511793
// Renamed file: R100\told_path\tnew_path
17521794
let old_path = file_path;
@@ -1768,6 +1810,7 @@ async fn collect_changes_from_diff_tree(
17681810
if status_code != "R100" {
17691811
push_file_change_skipping_symlinks(
17701812
&mut changes,
1813+
&mut skipped_symlinks,
17711814
"edit",
17721815
new_path,
17731816
&new_full_path,
@@ -1780,16 +1823,28 @@ async fn collect_changes_from_diff_tree(
17801823
validate_single_path(dest_path)?;
17811824

17821825
let dest_full_path = worktree_path.join(dest_path);
1783-
push_file_change_skipping_symlinks(&mut changes, "add", dest_path, &dest_full_path)
1784-
.await?;
1826+
push_file_change_skipping_symlinks(
1827+
&mut changes,
1828+
&mut skipped_symlinks,
1829+
"add",
1830+
dest_path,
1831+
&dest_full_path,
1832+
)
1833+
.await?;
17851834
} else {
17861835
// Modified or other — read current content
1787-
push_file_change_skipping_symlinks(&mut changes, "edit", file_path, &full_path)
1788-
.await?;
1836+
push_file_change_skipping_symlinks(
1837+
&mut changes,
1838+
&mut skipped_symlinks,
1839+
"edit",
1840+
file_path,
1841+
&full_path,
1842+
)
1843+
.await?;
17891844
}
17901845
}
17911846

1792-
Ok(changes)
1847+
Ok((changes, skipped_symlinks))
17931848
}
17941849

17951850
/// Push a file change into `changes`, skipping symlinks with a warning.
@@ -1798,8 +1853,13 @@ async fn collect_changes_from_diff_tree(
17981853
/// logic used in multiple places when collecting changes from a worktree or diff tree.
17991854
/// Uses `symlink_metadata` so symlinks are detected without being followed — this is
18001855
/// the primary defense against symlink-following exfiltration attacks in Stage 3.
1856+
///
1857+
/// Skipped symlink paths are appended to `skipped_symlinks` so the caller can surface
1858+
/// them in the PR description (the agent that produced the PR would otherwise have no
1859+
/// way to see that some of its intended file content was dropped).
18011860
async fn push_file_change_skipping_symlinks(
18021861
changes: &mut Vec<serde_json::Value>,
1862+
skipped_symlinks: &mut Vec<String>,
18031863
change_type: &str,
18041864
file_path: &str,
18051865
full_path: &std::path::Path,
@@ -1813,6 +1873,7 @@ async fn push_file_change_skipping_symlinks(
18131873
"Skipping symlink in worktree: {} (symlink-following attack prevention)",
18141874
file_path
18151875
);
1876+
skipped_symlinks.push(file_path.to_string());
18161877
}
18171878
Ok(_) => {
18181879
// Not a regular file (e.g. directory, fifo, socket) — silently skip.
@@ -1827,6 +1888,29 @@ async fn push_file_change_skipping_symlinks(
18271888
Ok(())
18281889
}
18291890

1891+
/// If any symlinks were skipped during PR file collection, append a clearly
1892+
/// marked notice to the PR description so the agent/author can see that some
1893+
/// intended content was deliberately dropped.
1894+
fn append_skipped_symlink_notice(body: &str, skipped_symlinks: &[String]) -> String {
1895+
if skipped_symlinks.is_empty() {
1896+
return body.to_string();
1897+
}
1898+
let mut notice = String::from(
1899+
"\n\n> [!WARNING]\n\
1900+
> **Symbolic links omitted from this pull request**\n\
1901+
>\n\
1902+
> The following symlinked paths were detected in the worktree and skipped\n\
1903+
> when uploading file changes (symlinks are never followed for safety):\n\
1904+
>\n",
1905+
);
1906+
for path in skipped_symlinks {
1907+
notice.push_str("> - `");
1908+
notice.push_str(path);
1909+
notice.push_str("`\n");
1910+
}
1911+
format!("{}{}", body, notice)
1912+
}
1913+
18301914
/// Read a file and produce an ADO push change entry.
18311915
/// Handles both text (rawtext) and binary (base64encoded) content.
18321916
async fn read_file_change(
@@ -1912,9 +1996,10 @@ fn validate_patch_paths(patch_content: &str) -> anyhow::Result<()> {
19121996
{
19131997
let path = line.splitn(3, ' ').nth(2).unwrap_or("").trim_matches('"');
19141998
validate_single_path(path)?;
1915-
} else if line.starts_with("new file mode 120000")
1916-
|| line.starts_with("new mode 120000")
1917-
{
1999+
} else if {
2000+
let trimmed = line.trim_end();
2001+
trimmed == "new file mode 120000" || trimmed == "new mode 120000"
2002+
} {
19182003
// Reject patch lines that INTRODUCE a symlink (git mode 120000).
19192004
// Either of these lines means the resulting tree contains a symlink:
19202005
// - "new file mode 120000" — a freshly added symlink
@@ -1927,6 +2012,10 @@ fn validate_patch_paths(patch_content: &str) -> anyhow::Result<()> {
19272012
// with "old mode 120000" + "new mode 100644" converts a symlink into a
19282013
// regular file, which is a legitimate cleanup operation and produces a
19292014
// safe worktree.
2015+
//
2016+
// The match is on the exact trimmed line (rather than `starts_with`) so
2017+
// we don't accidentally reject hypothetical future mode strings that
2018+
// happen to share the "120000" prefix.
19302019
anyhow::bail!(
19312020
"Patch introduces a symlink (mode 120000), which is not allowed"
19322021
);
@@ -2348,6 +2437,52 @@ index 0000000..abcdefg
23482437
);
23492438
}
23502439

2440+
#[test]
2441+
fn test_validate_patch_paths_symlink_mode_suffix_not_bypass() {
2442+
// The mode check uses exact line equality (after trim_end), so a fake
2443+
// mode string with extra digits like "120000 1" or "1200001" must NOT
2444+
// be misclassified — but we still want any genuine "new file mode 120000"
2445+
// followed by trailing whitespace to be rejected.
2446+
let patch_trailing_ws = "diff --git a/x b/x\nnew file mode 120000 \n";
2447+
assert!(
2448+
validate_patch_paths(patch_trailing_ws).is_err(),
2449+
"trailing whitespace must not let a symlink mode line bypass the check"
2450+
);
2451+
2452+
// A line that merely happens to share a "120000" prefix segment must
2453+
// not pass the check accidentally either — but it's also not a real
2454+
// git mode line. We just want to make sure exact-match doesn't reject
2455+
// benign content that contains "120000" in a non-mode context.
2456+
let patch_benign = "diff --git a/x b/x\n\
2457+
--- a/x\n\
2458+
+++ b/x\n\
2459+
@@ -1 +1 @@\n\
2460+
-count: 1200000\n\
2461+
+count: 1200001\n";
2462+
assert!(
2463+
validate_patch_paths(patch_benign).is_ok(),
2464+
"patch body lines containing '120000' as data must not be rejected"
2465+
);
2466+
}
2467+
2468+
#[test]
2469+
fn test_append_skipped_symlink_notice_empty_is_passthrough() {
2470+
let body = "Some PR description";
2471+
let out = append_skipped_symlink_notice(body, &[]);
2472+
assert_eq!(out, body);
2473+
}
2474+
2475+
#[test]
2476+
fn test_append_skipped_symlink_notice_lists_paths() {
2477+
let body = "Some PR description";
2478+
let skipped = vec!["secrets.txt".to_string(), "subdir/leak".to_string()];
2479+
let out = append_skipped_symlink_notice(body, &skipped);
2480+
assert!(out.starts_with(body));
2481+
assert!(out.contains("Symbolic links omitted"));
2482+
assert!(out.contains("`secrets.txt`"));
2483+
assert!(out.contains("`subdir/leak`"));
2484+
}
2485+
23512486
#[test]
23522487
fn test_validate_single_path_valid() {
23532488
assert!(validate_single_path("src/main.rs").is_ok());

0 commit comments

Comments
 (0)