Skip to content

Commit 29b221e

Browse files
jamesadevineCopilotCopilot
authored
feat(safeoutputs): add daily smoke suite and reject unknown safe-output keys (#563)
* feat(safeoutputs): add daily smoke suite and reject unknown safe-output keys * Add tests/safe-outputs/ — 26 daily-scheduled agentic-pipeline fixtures covering all 22 production safe outputs plus a janitor, a queue-build target, and a smoke-failure-reporter that uses ado-aw-debug.create-issue (PR #492). * Add validate_safe_outputs_keys to fail compile on unrecognised safe-outputs keys (previously silently dropped with only a console warning). Includes a related_safe_output_names helper that lists known tools sharing the typo's first hyphen-segment as a hint. * Rename create_pr.rs -> create_pull_request.rs so the file name matches the YAML key. Fix the long-standing reply_to_pr_comment.rs name mismatch: the MCP tool was registered as reply-to-pr-review-comment while every user-facing doc, README, prompt, and site page said reply-to-pr-comment. Tool name now matches the docs across mcp.rs tool registration, execute.rs dispatch, and unit tests. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix(compile): address code review suggestions in validate_safe_outputs_keys Agent-Logs-Url: https://github.com/githubnext/ado-aw/sessions/3358604b-8ec8-4b6e-918b-81f8b68ef58f Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com> * fix(mcp): use wire name in create-pull-request log string Agent-Logs-Url: https://github.com/githubnext/ado-aw/sessions/b3784ea7-b6cf-4671-9d6c-2371a37962e6 Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com> --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
1 parent f8ec8c0 commit 29b221e

61 files changed

Lines changed: 22901 additions & 23 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitattributes

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,29 @@
1-
.github/workflows/*.lock.yml linguist-generated=true merge=ours
1+
.github/workflows/*.lock.yml linguist-generated=true merge=ours
2+
# BEGIN ado-aw managed (do not edit)
3+
tests/safe-outputs/add-build-tag.lock.yml linguist-generated=true merge=ours text eol=lf
4+
tests/safe-outputs/add-pr-comment.lock.yml linguist-generated=true merge=ours text eol=lf
5+
tests/safe-outputs/comment-on-work-item.lock.yml linguist-generated=true merge=ours text eol=lf
6+
tests/safe-outputs/create-branch.lock.yml linguist-generated=true merge=ours text eol=lf
7+
tests/safe-outputs/create-git-tag.lock.yml linguist-generated=true merge=ours text eol=lf
8+
tests/safe-outputs/create-pull-request.lock.yml linguist-generated=true merge=ours text eol=lf
9+
tests/safe-outputs/create-wiki-page.lock.yml linguist-generated=true merge=ours text eol=lf
10+
tests/safe-outputs/create-work-item.lock.yml linguist-generated=true merge=ours text eol=lf
11+
tests/safe-outputs/janitor.lock.yml linguist-generated=true merge=ours text eol=lf
12+
tests/safe-outputs/link-work-items.lock.yml linguist-generated=true merge=ours text eol=lf
13+
tests/safe-outputs/missing-data.lock.yml linguist-generated=true merge=ours text eol=lf
14+
tests/safe-outputs/missing-tool.lock.yml linguist-generated=true merge=ours text eol=lf
15+
tests/safe-outputs/noop-target.lock.yml linguist-generated=true merge=ours text eol=lf
16+
tests/safe-outputs/noop.lock.yml linguist-generated=true merge=ours text eol=lf
17+
tests/safe-outputs/queue-build.lock.yml linguist-generated=true merge=ours text eol=lf
18+
tests/safe-outputs/reply-to-pr-comment.lock.yml linguist-generated=true merge=ours text eol=lf
19+
tests/safe-outputs/report-incomplete.lock.yml linguist-generated=true merge=ours text eol=lf
20+
tests/safe-outputs/resolve-pr-thread.lock.yml linguist-generated=true merge=ours text eol=lf
21+
tests/safe-outputs/smoke-failure-reporter.lock.yml linguist-generated=true merge=ours text eol=lf
22+
tests/safe-outputs/submit-pr-review.lock.yml linguist-generated=true merge=ours text eol=lf
23+
tests/safe-outputs/update-pr.lock.yml linguist-generated=true merge=ours text eol=lf
24+
tests/safe-outputs/update-wiki-page.lock.yml linguist-generated=true merge=ours text eol=lf
25+
tests/safe-outputs/update-work-item.lock.yml linguist-generated=true merge=ours text eol=lf
26+
tests/safe-outputs/upload-build-attachment.lock.yml linguist-generated=true merge=ours text eol=lf
27+
tests/safe-outputs/upload-pipeline-artifact.lock.yml linguist-generated=true merge=ours text eol=lf
28+
tests/safe-outputs/upload-workitem-attachment.lock.yml linguist-generated=true merge=ours text eol=lf
29+
# END ado-aw managed

src/compile/common.rs

Lines changed: 270 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1633,11 +1633,10 @@ pub fn generate_enabled_tools_args(front_matter: &FrontMatter) -> String {
16331633
);
16341634
continue;
16351635
}
1636+
// Unreachable in practice: validate_safe_outputs_keys bails before
1637+
// the pipeline reaches this point. The check is kept as a defensive
1638+
// guard for callers that bypass the validation phase.
16361639
if !ALL_KNOWN_SAFE_OUTPUTS.contains(&key.as_str()) {
1637-
eprintln!(
1638-
"Warning: unrecognized safe-output tool '{}' — skipping (no registered tool matches this name)",
1639-
key
1640-
);
16411640
continue;
16421641
}
16431642
effective_mcp_tool_count += 1;
@@ -1654,8 +1653,9 @@ pub fn generate_enabled_tools_args(front_matter: &FrontMatter) -> String {
16541653
}
16551654

16561655
if effective_mcp_tool_count == 0 {
1657-
// Every user-specified key was either invalid or unrecognized.
1658-
// Return empty to keep all tools available (backward compat).
1656+
// Every user-specified key was either a non-MCP key or a guard path
1657+
// from the defensive check above. Return empty to keep all tools
1658+
// available (backward compat).
16591659
return String::new();
16601660
}
16611661

@@ -1874,6 +1874,117 @@ pub fn validate_resolve_pr_thread_statuses(front_matter: &FrontMatter) -> Result
18741874
Ok(())
18751875
}
18761876

1877+
/// Validate that every key under `safe-outputs:` is a known tool name.
1878+
///
1879+
/// Unknown keys (typos, stale renamed tools, debug-only tools used in the
1880+
/// regular safe-output surface) used to be silently dropped with a warning
1881+
/// in `generate_enabled_tools_args`, which made user-visible failures hide
1882+
/// as "the tool just didn't run" at Stage 1. This validator promotes the
1883+
/// warning to a hard compile error and points at candidates that share the
1884+
/// typo's first hyphen-segment so users can spot the rename.
1885+
///
1886+
/// Special-cases preserved as warnings (with `continue` in
1887+
/// `generate_enabled_tools_args`):
1888+
///
1889+
/// * `memory` — migrated to `tools: cache-memory:`. Surfaces as a warning
1890+
/// in `generate_enabled_tools_args` for back-compat; this validator
1891+
/// skips it so the migration path stays soft.
1892+
///
1893+
/// `DEBUG_ONLY_TOOLS` keys are independently rejected by
1894+
/// `validate_ado_aw_debug_config` with a more specific error message;
1895+
/// this validator skips them so the operator gets that better message.
1896+
pub fn validate_safe_outputs_keys(front_matter: &FrontMatter) -> Result<()> {
1897+
use crate::safeoutputs::{
1898+
ALL_KNOWN_SAFE_OUTPUTS, DEBUG_ONLY_TOOLS, NON_MCP_SAFE_OUTPUT_KEYS,
1899+
};
1900+
1901+
let mut unknown: Vec<(String, Vec<&'static str>)> = Vec::new();
1902+
let mut invalid_names: Vec<String> = Vec::new();
1903+
1904+
for key in front_matter.safe_outputs.keys() {
1905+
if !validate::is_safe_tool_name(key) {
1906+
invalid_names.push(key.clone());
1907+
continue;
1908+
}
1909+
if NON_MCP_SAFE_OUTPUT_KEYS.contains(&key.as_str()) {
1910+
continue;
1911+
}
1912+
// `memory` is a known migration path — left as a warning in
1913+
// generate_enabled_tools_args. Don't promote it to an error.
1914+
if key == "memory" {
1915+
continue;
1916+
}
1917+
// Debug-only tools get a more specific error from
1918+
// validate_ado_aw_debug_config, so skip them here.
1919+
if DEBUG_ONLY_TOOLS.contains(&key.as_str()) {
1920+
continue;
1921+
}
1922+
if !ALL_KNOWN_SAFE_OUTPUTS.contains(&key.as_str()) {
1923+
let related = related_safe_output_names(key);
1924+
unknown.push((key.clone(), related));
1925+
}
1926+
}
1927+
1928+
if !invalid_names.is_empty() {
1929+
invalid_names.sort();
1930+
let list = invalid_names
1931+
.iter()
1932+
.map(|n| format!(" - {n}"))
1933+
.collect::<Vec<_>>()
1934+
.join("\n");
1935+
anyhow::bail!(
1936+
"safe-outputs contains tool name(s) with invalid characters:\n{list}\n\n\
1937+
Tool names must contain only ASCII letters, digits, and hyphens. Example:\n\n \
1938+
safe-outputs:\n create-work-item: {{}}\n",
1939+
);
1940+
}
1941+
1942+
if !unknown.is_empty() {
1943+
// Stable order for deterministic error messages.
1944+
unknown.sort_by(|a, b| a.0.cmp(&b.0));
1945+
let mut msg = String::from("safe-outputs contains unrecognised tool name(s):\n");
1946+
for (name, related) in &unknown {
1947+
if related.is_empty() {
1948+
msg.push_str(&format!(" - {name}\n"));
1949+
} else {
1950+
msg.push_str(&format!(
1951+
" - {name} (similar known tools: {})\n",
1952+
related.join(", ")
1953+
));
1954+
}
1955+
}
1956+
msg.push_str(
1957+
"\nValid safe-output keys are listed in docs/safe-outputs.md. \
1958+
Each key must match exactly the kebab-case name registered by a \
1959+
tool in src/safeoutputs/ (e.g. `create-pull-request`, not \
1960+
`create-pr`).",
1961+
);
1962+
anyhow::bail!("{}", msg);
1963+
}
1964+
1965+
Ok(())
1966+
}
1967+
1968+
/// Return all known safe-output names that share `key`'s first
1969+
/// hyphen-separated segment (e.g. `create-pr` → all `create-*` tools).
1970+
/// If no candidate shares the head, returns an empty vec — better to give
1971+
/// no suggestion than a misleading one (`update-pr` for `create-pr`).
1972+
fn related_safe_output_names(key: &str) -> Vec<&'static str> {
1973+
use crate::safeoutputs::ALL_KNOWN_SAFE_OUTPUTS;
1974+
1975+
let head = key.split('-').next().unwrap_or_default();
1976+
if head.is_empty() {
1977+
return Vec::new();
1978+
}
1979+
let mut matches: Vec<&'static str> = ALL_KNOWN_SAFE_OUTPUTS
1980+
.iter()
1981+
.copied()
1982+
.filter(|name| name.split('-').next() == Some(head))
1983+
.collect();
1984+
matches.sort();
1985+
matches
1986+
}
1987+
18771988
/// Generate the setup job YAML.
18781989
///
18791990
/// Extension `setup_steps()` are injected first (download + gate steps for
@@ -2897,6 +3008,7 @@ pub async fn compile_shared(
28973008

28983009
// 10. Validations
28993010
validate_write_permissions(front_matter)?;
3011+
validate_safe_outputs_keys(front_matter)?;
29003012
validate_comment_target(front_matter)?;
29013013
validate_update_work_item_target(front_matter)?;
29023014
validate_submit_pr_review_events(front_matter)?;
@@ -4771,6 +4883,158 @@ ado-aw-debug:
47714883
assert!(result.is_err());
47724884
}
47734885

4886+
#[test]
4887+
fn test_validate_safe_outputs_keys_accepts_known_keys() {
4888+
let yaml = r#"---
4889+
name: test
4890+
description: test
4891+
safe-outputs:
4892+
noop: {}
4893+
create-pull-request:
4894+
target-branch: main
4895+
create-work-item: {}
4896+
---
4897+
"#;
4898+
let (fm, _) = parse_markdown(yaml).unwrap();
4899+
assert!(validate_safe_outputs_keys(&fm).is_ok());
4900+
}
4901+
4902+
#[test]
4903+
fn test_validate_safe_outputs_keys_accepts_empty_section() {
4904+
let (fm, _) = parse_markdown("---\nname: test\ndescription: test\n---\n").unwrap();
4905+
assert!(validate_safe_outputs_keys(&fm).is_ok());
4906+
}
4907+
4908+
#[test]
4909+
fn test_validate_safe_outputs_keys_rejects_unknown_typo_with_suggestion() {
4910+
// Common past-and-current bug: tool was renamed from `create-pr` to
4911+
// `create-pull-request` but a user (or our own smoke fixtures, before
4912+
// the rename) still references the old name. The compiler used to
4913+
// silently drop the key with only a warning.
4914+
let yaml = r#"---
4915+
name: test
4916+
description: test
4917+
safe-outputs:
4918+
create-pr:
4919+
target-branch: main
4920+
---
4921+
"#;
4922+
let (fm, _) = parse_markdown(yaml).unwrap();
4923+
let result = validate_safe_outputs_keys(&fm);
4924+
assert!(result.is_err());
4925+
let msg = result.unwrap_err().to_string();
4926+
assert!(msg.contains("create-pr"), "msg: {}", msg);
4927+
// The validator lists all `create-*` tools as hints, so the actual
4928+
// renamed-from match must appear among them.
4929+
assert!(
4930+
msg.contains("create-pull-request"),
4931+
"expected create-pull-request to appear in similar-tools list, got: {}",
4932+
msg
4933+
);
4934+
}
4935+
4936+
#[test]
4937+
fn test_validate_safe_outputs_keys_rejects_unknown_no_close_match() {
4938+
let yaml = r#"---
4939+
name: test
4940+
description: test
4941+
safe-outputs:
4942+
fabricated-tool-name: {}
4943+
---
4944+
"#;
4945+
let (fm, _) = parse_markdown(yaml).unwrap();
4946+
let result = validate_safe_outputs_keys(&fm);
4947+
assert!(result.is_err());
4948+
let msg = result.unwrap_err().to_string();
4949+
assert!(msg.contains("fabricated-tool-name"), "msg: {}", msg);
4950+
// No similar-tools line for keys that don't share a prefix with anything real.
4951+
assert!(!msg.contains("similar known tools"), "msg: {}", msg);
4952+
}
4953+
4954+
#[test]
4955+
fn test_validate_safe_outputs_keys_does_not_double_report_debug_only_tool() {
4956+
// create-issue is in DEBUG_ONLY_TOOLS — validate_ado_aw_debug_config
4957+
// gives a better error for it. This validator should skip rather
4958+
// than redundantly flag it as "unknown".
4959+
let yaml = r#"---
4960+
name: test
4961+
description: test
4962+
safe-outputs:
4963+
create-issue:
4964+
target-repo: githubnext/ado-aw
4965+
---
4966+
"#;
4967+
let (fm, _) = parse_markdown(yaml).unwrap();
4968+
assert!(validate_safe_outputs_keys(&fm).is_ok());
4969+
}
4970+
4971+
#[test]
4972+
fn test_validate_safe_outputs_keys_allows_memory_migration_key() {
4973+
// `memory` was migrated to `tools: cache-memory:`. The compiler
4974+
// emits a soft warning for it during enabled-tools generation,
4975+
// so this strict validator must not promote it to an error.
4976+
let yaml = r#"---
4977+
name: test
4978+
description: test
4979+
safe-outputs:
4980+
memory: {}
4981+
---
4982+
"#;
4983+
let (fm, _) = parse_markdown(yaml).unwrap();
4984+
assert!(validate_safe_outputs_keys(&fm).is_ok());
4985+
}
4986+
4987+
#[test]
4988+
fn test_validate_safe_outputs_keys_rejects_invalid_characters() {
4989+
let yaml = r#"---
4990+
name: test
4991+
description: test
4992+
safe-outputs:
4993+
bad name with spaces: {}
4994+
---
4995+
"#;
4996+
let (fm, _) = parse_markdown(yaml).unwrap();
4997+
let result = validate_safe_outputs_keys(&fm);
4998+
assert!(result.is_err());
4999+
let msg = result.unwrap_err().to_string();
5000+
assert!(
5001+
msg.contains("ASCII letters, digits, and hyphens"),
5002+
"msg: {}",
5003+
msg
5004+
);
5005+
}
5006+
5007+
#[test]
5008+
fn test_validate_safe_outputs_keys_reports_all_invalid_characters() {
5009+
// Both invalid keys should appear in the single error (not just the first).
5010+
let yaml = "---\nname: test\ndescription: test\nsafe-outputs:\n bad key: {}\n also bad!: {}\n---\n";
5011+
let (fm, _) = parse_markdown(yaml).unwrap();
5012+
let result = validate_safe_outputs_keys(&fm);
5013+
assert!(result.is_err());
5014+
let msg = result.unwrap_err().to_string();
5015+
assert!(
5016+
msg.contains("bad key") && msg.contains("also bad!"),
5017+
"expected both keys in error, got: {}",
5018+
msg
5019+
);
5020+
}
5021+
5022+
#[test]
5023+
fn test_related_safe_output_names_returns_all_create_tools_for_create_pr() {
5024+
let related = related_safe_output_names("create-pr");
5025+
assert!(related.contains(&"create-pull-request"));
5026+
assert!(related.contains(&"create-branch"));
5027+
assert!(related.contains(&"create-work-item"));
5028+
// Sanity check: nothing from a different first segment should leak in.
5029+
assert!(!related.contains(&"update-pr"));
5030+
}
5031+
5032+
#[test]
5033+
fn test_related_safe_output_names_returns_empty_for_distant_string() {
5034+
let related = related_safe_output_names("fabricated-tool-name");
5035+
assert!(related.is_empty());
5036+
}
5037+
47745038
#[test]
47755039
fn test_validate_rejects_create_issue_under_safe_outputs() {
47765040
// Defence-in-depth: `create-issue` MUST NOT appear under

src/execute.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ async fn dispatch_pr_tools(
337337
"add-pr-comment" => AddPrCommentResult,
338338
"update-pr" => UpdatePrResult,
339339
"submit-pr-review" => SubmitPrReviewResult,
340-
"reply-to-pr-review-comment" => ReplyToPrCommentResult,
340+
"reply-to-pr-comment" => ReplyToPrCommentResult,
341341
"resolve-pr-thread" => ResolvePrThreadResult,
342342
})
343343
}

src/mcp.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -711,7 +711,7 @@ Use 'self' for the pipeline's own repository, or a repository alias from the che
711711
&self,
712712
params: Parameters<CreatePrParams>,
713713
) -> Result<CallToolResult, McpError> {
714-
info!("Tool called: create_pr - '{}'", params.0.title);
714+
info!("Tool called: create-pull-request - '{}'", params.0.title);
715715
// Sanitize untrusted agent-provided text fields (IS-01)
716716
let mut sanitized = params.0;
717717
sanitized.title = sanitize_text(&sanitized.title);
@@ -1352,7 +1352,7 @@ submitted during safe output processing. Requires 'allowed-events' to be configu
13521352
}
13531353

13541354
#[tool(
1355-
name = "reply-to-pr-review-comment",
1355+
name = "reply-to-pr-comment",
13561356
description = "Reply to an existing review comment thread on an Azure DevOps pull request. \
13571357
Provide the PR ID, thread ID, and reply content. The reply will be posted during safe output processing."
13581358
)]
@@ -1361,7 +1361,7 @@ Provide the PR ID, thread ID, and reply content. The reply will be posted during
13611361
params: Parameters<ReplyToPrCommentParams>,
13621362
) -> Result<CallToolResult, McpError> {
13631363
info!(
1364-
"Tool called: reply-to-pr-review-comment - PR #{} thread #{}",
1364+
"Tool called: reply-to-pr-comment - PR #{} thread #{}",
13651365
params.0.pull_request_id, params.0.thread_id
13661366
);
13671367
let mut sanitized = params.0;
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ pub enum ProtectedFiles {
379379
Allowed,
380380
}
381381

382-
/// Configuration for the create_pr tool (specified in front matter)
382+
/// Configuration for the create-pull-request tool (specified in front matter)
383383
///
384384
/// Example front matter:
385385
/// ```yaml

0 commit comments

Comments
 (0)