fix(audit): address PR review feedback

jamesadevine · Copilot · jamesadevine · commit c07ea298e12c · 2026-06-01T22:08:24.000+01:00
Three issues raised by the Rust PR Reviewer on #691: 1. **Lexicographic sort wrong for multi-digit run IDs.** Previously `find_artifact_dir` / `find_verdict_path` / `top_level_dirs_with_prefix` picked the "lexicographically last" `<prefix>_<id>` directory, which sorts `_9` after `_10` (because `'9' > '1'`). On a build retry that produced both `analyzed_outputs_9` and `analyzed_outputs_10`, the older verdict would be read and the run could be mis-classified as safe. New `crate::audit::cmp_numeric_suffix` extracts the trailing token after the final `_`, parses it as `u64`, and compares numerically with a lexicographic tie-breaker for non-numeric suffixes. All three call sites now use it. Regression tests added in mod.rs, detection.rs, and cli.rs. 2. **Security: `ADO_AW_TEST_ORG_URL` was always active in production.** The override was `#[doc(hidden)]` but not gated by build mode, so a stray env var (debugging leftover, hostile CI environment) could silently redirect ADO REST calls to an attacker-controlled URL in a release binary. Gated on `cfg(debug_assertions)`: debug builds (`cargo test`, `cargo run`) keep the override AND emit a loud `warn!` on every invocation; release builds (all published artifacts via `cargo build --release`) replace the body with a no-op so a stray env var has no effect. The integration test in `tests/audit_it.rs` continues to work because `cargo test` builds in debug mode. 3. **Blocking `std::fs::read_dir` in async context.** `safe_outputs.rs` had two helpers (`top_level_dirs_with_prefix`, `collect_named_files`) using sync I/O from inside `async fn analyze_safe_outputs`. On a Tokio multi-thread runtime this blocks an executor thread for the duration of the directory walk. Both helpers converted to `async fn` using `tokio::fs::read_dir`. The recursive `collect_named_files` uses `Box::pin` to satisfy the async-recursion shape (consistent with the existing pattern in `crate::detect::scan_directory`). Tests: 1745 unit tests + 3 integration tests pass (up from 1740 — 5 new regression tests for the numeric-suffix bug). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
diff --git a/src/ado/mod.rs b/src/ado/mod.rs
@@ -829,16 +829,42 @@ pub async fn resolve_ado_context(
     Ok(ctx)
 }
 
-#[doc(hidden)]
+/// Test-only override that lets the integration tests in `tests/audit_it.rs`
+/// redirect ADO REST calls at a mock server via the `ADO_AW_TEST_ORG_URL`
+/// environment variable.
+///
+/// **Compiled out of release builds.** All published artifacts ship with
+/// `cargo build --release`, which sets `debug_assertions = false` and
+/// replaces the body of this function with a no-op via the
+/// `#[cfg(not(debug_assertions))]` branch below. This prevents an
+/// attacker-controlled env var (a leftover from a debugging session, a
+/// hostile CI environment, etc.) from silently redirecting production
+/// ADO API calls. Debug builds — used by `cargo test`, integration
+/// tests, and `cargo run` during development — keep the override
+/// available, and emit a `warn!` on every invocation so the override is
+/// loud and obvious in logs.
+#[cfg(debug_assertions)]
 fn apply_test_org_url_override(ctx: &mut AdoContext) {
     if let Ok(org_url) = std::env::var("ADO_AW_TEST_ORG_URL") {
         let org_url = org_url.trim().trim_end_matches('/');
         if !org_url.is_empty() {
+            log::warn!(
+                "ADO_AW_TEST_ORG_URL test override active: redirecting ADO REST calls \
+                 from {} to {} (this branch is compiled out of release builds)",
+                ctx.org_url,
+                org_url
+            );
             ctx.org_url = org_url.to_string();
         }
     }
 }
 
+#[cfg(not(debug_assertions))]
+fn apply_test_org_url_override(_: &mut AdoContext) {
+    // Release builds intentionally ignore ADO_AW_TEST_ORG_URL so that a
+    // stray env var cannot redirect production ADO API calls.
+}
+
 /// Builds the list of definitions to update from explicit IDs or auto-detection.
 /// Returns `None` when auto-detection finds no agentic pipelines (caller should exit cleanly).
 pub async fn resolve_definitions(
diff --git a/src/audit/analyzers/detection.rs b/src/audit/analyzers/detection.rs
@@ -115,7 +115,9 @@ async fn find_verdict_path(download_root: &Path) -> Option<PathBuf> {
 
         let path = entry.path();
         match &latest_dir {
-            Some((current_name, _)) if name <= *current_name => {}
+            Some((current_name, _))
+                if crate::audit::cmp_numeric_suffix(&name, current_name)
+                    != std::cmp::Ordering::Greater => {}
             _ => latest_dir = Some((name, path)),
         }
     }
@@ -293,7 +295,7 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn uses_lexicographically_last_analyzed_outputs_directory() {
+    async fn uses_highest_numbered_analyzed_outputs_directory() {
         let temp_dir = TempDir::new().unwrap();
         write_verdict(
             &temp_dir,
@@ -317,4 +319,31 @@ mod tests {
             Some(expected_verdict_path("analyzed_outputs_42"))
         );
     }
+
+    /// Regression: lexicographic sort would pick `analyzed_outputs_9`
+    /// here. Numeric-suffix sort must pick `analyzed_outputs_10`.
+    #[tokio::test]
+    async fn picks_highest_numeric_suffix_not_lexicographic() {
+        let temp_dir = TempDir::new().unwrap();
+        write_verdict(
+            &temp_dir,
+            "analyzed_outputs_9",
+            r#"{"prompt_injection":false,"secret_leak":false,"malicious_patch":false,"reasons":[]}"#,
+        )
+        .await;
+        write_verdict(
+            &temp_dir,
+            "analyzed_outputs_10",
+            r#"{"prompt_injection":true,"secret_leak":false,"malicious_patch":false,"reasons":["newer verdict"]}"#,
+        )
+        .await;
+
+        let analysis = analyze_detection(temp_dir.path()).await.unwrap().unwrap();
+
+        assert!(analysis.threats.prompt_injection);
+        assert_eq!(
+            analysis.verdict_path,
+            Some(expected_verdict_path("analyzed_outputs_10"))
+        );
+    }
 }
diff --git a/src/audit/analyzers/safe_outputs.rs b/src/audit/analyzers/safe_outputs.rs
@@ -4,9 +4,9 @@ use anyhow::Context;
 use serde::Deserialize;
 use serde_json::Value;
 use std::collections::{BTreeMap, VecDeque};
-use std::fs;
 use std::io::ErrorKind;
 use std::path::{Path, PathBuf};
+use tokio::fs;
 
 use crate::audit::model::{
     CreatedItemReport, Finding, RejectedSafeOutputsRollup, SafeOutputExecution,
@@ -63,9 +63,9 @@ struct IndexedExecutionRecord {
 pub async fn analyze_safe_outputs(
     download_root: &std::path::Path,
 ) -> anyhow::Result<SafeOutputAnalysis> {
-    let proposals_path = find_proposals_file(download_root)?;
-    let detection_path = find_detection_file(download_root)?;
-    let executions_path = find_execution_file(download_root)?;
+    let proposals_path = find_proposals_file(download_root).await?;
+    let detection_path = find_detection_file(download_root).await?;
+    let executions_path = find_execution_file(download_root).await?;
 
     let proposals = load_proposals(proposals_path.as_deref()).await?;
     let detection = load_detection_verdict(detection_path.as_deref()).await?;
@@ -526,46 +526,58 @@ fn truncate_reason(reason: String, max_chars: usize) -> String {
     }
 }
 
-fn find_proposals_file(download_root: &Path) -> anyhow::Result<Option<PathBuf>> {
-    for directory in top_level_dirs_with_prefix(download_root, "agent_outputs_")? {
+async fn find_proposals_file(download_root: &Path) -> anyhow::Result<Option<PathBuf>> {
+    for directory in top_level_dirs_with_prefix(download_root, "agent_outputs_").await? {
         for candidate in [
             directory.join("staging").join(SAFE_OUTPUT_FILENAME),
             directory.join(SAFE_OUTPUT_FILENAME),
         ] {
-            if candidate.is_file() {
+            if fs::metadata(&candidate)
+                .await
+                .map(|m| m.is_file())
+                .unwrap_or(false)
+            {
                 return Ok(Some(candidate));
             }
         }
     }
     Ok(None)
 }
 
-fn find_detection_file(download_root: &Path) -> anyhow::Result<Option<PathBuf>> {
-    for directory in top_level_dirs_with_prefix(download_root, "analyzed_outputs_")? {
+async fn find_detection_file(download_root: &Path) -> anyhow::Result<Option<PathBuf>> {
+    for directory in top_level_dirs_with_prefix(download_root, "analyzed_outputs_").await? {
         let candidate = directory.join("threat-analysis.json");
-        if candidate.is_file() {
+        if fs::metadata(&candidate)
+            .await
+            .map(|m| m.is_file())
+            .unwrap_or(false)
+        {
             return Ok(Some(candidate));
         }
     }
     Ok(None)
 }
 
-fn find_execution_file(download_root: &Path) -> anyhow::Result<Option<PathBuf>> {
+async fn find_execution_file(download_root: &Path) -> anyhow::Result<Option<PathBuf>> {
     let preferred = download_root
         .join("safe_outputs")
         .join(EXECUTED_NDJSON_FILENAME);
-    if preferred.is_file() {
+    if fs::metadata(&preferred)
+        .await
+        .map(|m| m.is_file())
+        .unwrap_or(false)
+    {
         return Ok(Some(preferred));
     }
 
     let mut matches = Vec::new();
-    collect_named_files(download_root, EXECUTED_NDJSON_FILENAME, &mut matches)?;
+    collect_named_files(download_root, EXECUTED_NDJSON_FILENAME, &mut matches).await?;
     matches.sort();
     Ok(matches.into_iter().next())
 }
 
-fn top_level_dirs_with_prefix(root: &Path, prefix: &str) -> anyhow::Result<Vec<PathBuf>> {
-    let entries = match fs::read_dir(root) {
+async fn top_level_dirs_with_prefix(root: &Path, prefix: &str) -> anyhow::Result<Vec<PathBuf>> {
+    let mut entries = match fs::read_dir(root).await {
         Ok(entries) => entries,
         Err(error) if error.kind() == ErrorKind::NotFound => return Ok(Vec::new()),
         Err(error) => {
@@ -574,11 +586,20 @@ fn top_level_dirs_with_prefix(root: &Path, prefix: &str) -> anyhow::Result<Vec<P
         }
     };
 
-    let mut matches = Vec::new();
-    for entry in entries {
-        let entry = entry.with_context(|| format!("Failed to iterate {}", root.display()))?;
+    let mut matches: Vec<(String, PathBuf)> = Vec::new();
+    loop {
+        let entry = match entries.next_entry().await {
+            Ok(Some(entry)) => entry,
+            Ok(None) => break,
+            Err(error) => {
+                return Err(error)
+                    .with_context(|| format!("Failed to iterate {}", root.display()));
+            }
+        };
+
         let file_type = entry
             .file_type()
+            .await
             .with_context(|| format!("Failed to inspect {}", entry.path().display()))?;
         if !file_type.is_dir() {
             continue;
@@ -588,43 +609,55 @@ fn top_level_dirs_with_prefix(root: &Path, prefix: &str) -> anyhow::Result<Vec<P
             continue;
         };
         if name.starts_with(prefix) {
-            matches.push(entry.path());
+            matches.push((name, entry.path()));
         }
     }
-    matches.sort();
-    Ok(matches)
+    // Sort by numeric suffix so `agent_outputs_10` outranks
+    // `agent_outputs_9` (lexicographic sort gets this wrong).
+    matches.sort_by(|(a, _), (b, _)| crate::audit::cmp_numeric_suffix(a, b));
+    Ok(matches.into_iter().map(|(_, path)| path).collect())
 }
 
-fn collect_named_files(
-    root: &Path,
-    file_name: &str,
-    matches: &mut Vec<PathBuf>,
-) -> anyhow::Result<()> {
-    let entries = match fs::read_dir(root) {
-        Ok(entries) => entries,
-        Err(error) if error.kind() == ErrorKind::NotFound => return Ok(()),
-        Err(error) => {
-            return Err(error)
-                .with_context(|| format!("Failed to read directory {}", root.display()));
-        }
-    };
+fn collect_named_files<'a>(
+    root: &'a Path,
+    file_name: &'a str,
+    matches: &'a mut Vec<PathBuf>,
+) -> std::pin::Pin<Box<dyn std::future::Future<Output = anyhow::Result<()>> + Send + 'a>> {
+    Box::pin(async move {
+        let mut entries = match fs::read_dir(root).await {
+            Ok(entries) => entries,
+            Err(error) if error.kind() == ErrorKind::NotFound => return Ok(()),
+            Err(error) => {
+                return Err(error)
+                    .with_context(|| format!("Failed to read directory {}", root.display()));
+            }
+        };
 
-    for entry in entries {
-        let entry = entry.with_context(|| format!("Failed to iterate {}", root.display()))?;
-        let path = entry.path();
-        let file_type = entry
-            .file_type()
-            .with_context(|| format!("Failed to inspect {}", path.display()))?;
-        if file_type.is_dir() {
-            collect_named_files(&path, file_name, matches)?;
-        } else if file_type.is_file()
-            && path.file_name().and_then(|name| name.to_str()) == Some(file_name)
-        {
-            matches.push(path);
+        loop {
+            let entry = match entries.next_entry().await {
+                Ok(Some(entry)) => entry,
+                Ok(None) => break,
+                Err(error) => {
+                    return Err(error)
+                        .with_context(|| format!("Failed to iterate {}", root.display()));
+                }
+            };
+            let path = entry.path();
+            let file_type = entry
+                .file_type()
+                .await
+                .with_context(|| format!("Failed to inspect {}", path.display()))?;
+            if file_type.is_dir() {
+                collect_named_files(&path, file_name, matches).await?;
+            } else if file_type.is_file()
+                && path.file_name().and_then(|name| name.to_str()) == Some(file_name)
+            {
+                matches.push(path);
+            }
         }
-    }
 
-    Ok(())
+        Ok(())
+    })
 }
 
 #[cfg(test)]
diff --git a/src/audit/cli.rs b/src/audit/cli.rs
@@ -611,17 +611,19 @@ async fn collect_files_under(run_dir: &Path, start_dir: &Path) -> Result<Vec<Fil
 
 async fn find_artifact_dir(run_dir: &Path, prefix: &str) -> Option<PathBuf> {
     let mut entries = tokio::fs::read_dir(run_dir).await.ok()?;
-    let mut hits = Vec::new();
+    let mut hits: Vec<(String, PathBuf)> = Vec::new();
     while let Ok(Some(entry)) = entries.next_entry().await {
         if entry.file_type().await.map(|t| t.is_dir()).unwrap_or(false)
             && let Some(name) = entry.file_name().to_str()
             && (name == prefix || name.starts_with(&format!("{}_", prefix)))
         {
-            hits.push(entry.path());
+            hits.push((name.to_string(), entry.path()));
         }
     }
-    hits.sort();
-    hits.pop()
+    // Numeric-suffix sort so `agent_outputs_10` outranks
+    // `agent_outputs_9` (lexicographic sort gets this wrong).
+    hits.sort_by(|(a, _), (b, _)| crate::audit::cmp_numeric_suffix(a, b));
+    hits.pop().map(|(_, path)| path)
 }
 
 fn is_authz_error(error: &anyhow::Error) -> bool {
@@ -670,7 +672,7 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn find_artifact_dir_picks_lexicographically_last_match() {
+    async fn find_artifact_dir_picks_highest_numbered_match() {
         let temp_dir = tempfile::tempdir().expect("tempdir");
         tokio::fs::create_dir_all(temp_dir.path().join("agent_outputs_001"))
             .await
@@ -692,6 +694,28 @@ mod tests {
         );
     }
 
+    /// Regression test: lexicographic sort would pick `agent_outputs_9`
+    /// here (because `'9' > '1'`); numeric-suffix sort must pick
+    /// `agent_outputs_10` instead.
+    #[tokio::test]
+    async fn find_artifact_dir_orders_multi_digit_suffixes_numerically() {
+        let temp_dir = tempfile::tempdir().expect("tempdir");
+        for suffix in ["1", "2", "9", "10", "100"] {
+            tokio::fs::create_dir_all(temp_dir.path().join(format!("agent_outputs_{suffix}")))
+                .await
+                .expect("create dir");
+        }
+
+        let found = find_artifact_dir(temp_dir.path(), "agent_outputs")
+            .await
+            .expect("find artifact dir");
+
+        assert_eq!(
+            found.file_name().and_then(|name| name.to_str()),
+            Some("agent_outputs_100")
+        );
+    }
+
     #[test]
     fn artifact_filter_mapping_matches_expected_sets() {
         let filters = vec![
diff --git a/src/audit/mod.rs b/src/audit/mod.rs