Skip to content

Commit e88d8da

Browse files
feat: apply max budget enforcement to all safe-output tools (#91)
* refactor: raise max budget to ToolResult trait level Move the max safe-output budget from per-config-struct fields to the ToolResult trait as DEFAULT_MAX, making it a first-class concept that each tool declares at the type level. - Add DEFAULT_MAX associated constant (default: 1) to ToolResult trait - Extend tool_result! macro with optional default_max parameter - Remove max field and default_max() from all 6 config structs - Replace MaxConfig deserialization with resolve_max() that reads the operator's frontmatter override, falling back to T::DEFAULT_MAX - Use register_budgets! macro to build budget map from concrete types - Simplify config structs to derive(Default) where possible Adding a new budgeted tool now only requires: 1. Set default_max in the tool_result! macro (or override DEFAULT_MAX) 2. Add the type to register_budgets!() in execute.rs Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix: address PR review feedback - Fix UTF-8 panic in extract_entry_context: use char_indices() for safe truncation instead of byte-offset slicing - Add safety comment above register_budgets! noting the manual sync requirement when adding new ToolResult implementors - Expand tool_result! macro doc comment to explain both arms (with and without default_max) - Add test for multi-byte UTF-8 title truncation Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix: sanitize control chars in log context before sanitization pass extract_entry_context runs before execute_sanitized, so raw NDJSON strings could contain newlines or other control characters that produce misleading log entries. Strip control chars from title and path fields before formatting into log messages. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent d1180b0 commit e88d8da

7 files changed

Lines changed: 337 additions & 120 deletions

File tree

AGENTS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -800,6 +800,7 @@ Creates an Azure DevOps work item.
800800
- `assignee` - User to assign (email or display name)
801801
- `tags` - List of tags to apply
802802
- `custom-fields` - Map of custom field reference names to values (e.g., `Custom.MyField: "value"`)
803+
- `max` - Maximum number of create-work-item outputs allowed per run (default: 1)
803804
- `artifact-link` - Configuration for GitHub Copilot artifact linking:
804805
- `enabled` - Whether to add an artifact link (default: false)
805806
- `repository` - Repository name override (defaults to BUILD_REPOSITORY_NAME)
@@ -897,6 +898,7 @@ Note: The source branch name is auto-generated from a sanitized version of the P
897898
- `reviewers` - List of reviewer emails to add
898899
- `labels` - List of labels to apply
899900
- `work-items` - List of work item IDs to link
901+
- `max` - Maximum number of create-pull-request outputs allowed per run (default: 1)
900902
901903
**Multi-repository support:**
902904
When `workspace: root` and multiple repositories are checked out, agents can create PRs for any allowed repository:
@@ -974,6 +976,7 @@ safe-outputs:
974976
path-prefix: "/agent-output" # Optional — prepended to the agent-supplied path (restricts write scope)
975977
title-prefix: "[Agent] " # Optional — prepended to the last path segment (the page title)
976978
comment: "Created by agent" # Optional — default commit comment when agent omits one
979+
max: 1 # Maximum number of create-wiki-page outputs allowed per run (default: 1)
977980
```
978981

979982
Note: `wiki-name` is required. If it is not set, execution fails with an explicit error message.
@@ -995,6 +998,7 @@ safe-outputs:
995998
path-prefix: "/agent-output" # Optional — prepended to the agent-supplied path (restricts write scope)
996999
title-prefix: "[Agent] " # Optional — prepended to the last path segment (the page title)
9971000
comment: "Updated by agent" # Optional — default commit comment when agent omits one
1001+
max: 1 # Maximum number of update-wiki-page outputs allowed per run (default: 1)
9981002
```
9991003

10001004
Note: `wiki-name` is required. If it is not set, execution fails with an explicit error message.

src/execute.rs

Lines changed: 267 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@
66
use anyhow::{Result, bail};
77
use log::{debug, error, info, warn};
88
use serde_json::Value;
9+
use std::collections::HashMap;
910
use std::path::Path;
1011

1112
use crate::ndjson::{self, SAFE_OUTPUT_FILENAME};
1213
use crate::tools::{
13-
CommentOnWorkItemConfig, CommentOnWorkItemResult, CreatePrResult, CreateWikiPageResult, CreateWorkItemResult, ExecutionContext, ExecutionResult,
14-
Executor, UpdateWikiPageResult, UpdateWorkItemConfig, UpdateWorkItemResult,
14+
CreatePrResult, CreateWikiPageResult, CreateWorkItemResult, CommentOnWorkItemResult,
15+
ExecutionContext, ExecutionResult, Executor, ToolResult,
16+
UpdateWikiPageResult, UpdateWorkItemResult,
1517
};
1618

1719
// Re-export memory types for use by main.rs
@@ -87,15 +89,31 @@ pub async fn execute_safe_outputs(
8789
}
8890
}
8991

90-
// Fetch the update-work-item max once; used to skip excess entries without aborting the batch
91-
let update_wi_config: UpdateWorkItemConfig = ctx.get_tool_config("update-work-item");
92-
let max_update_wi = update_wi_config.max as usize;
93-
let mut update_wi_executed: usize = 0;
94-
95-
// Fetch the comment-on-work-item max once; same skip-and-continue pattern
96-
let comment_wi_config: CommentOnWorkItemConfig = ctx.get_tool_config("comment-on-work-item");
97-
let max_comment_wi = comment_wi_config.max as usize;
98-
let mut comment_wi_executed: usize = 0;
92+
// Build budget map: tool_name → (executed_count, max_allowed).
93+
// Each tool declares its DEFAULT_MAX via the ToolResult trait; the operator can
94+
// override it with `max` in the front-matter config JSON.
95+
//
96+
// IMPORTANT: When adding a new ToolResult implementor, also register it here
97+
// so its budget is enforced. There is no compile-time guard for this.
98+
let mut budgets: HashMap<&str, (usize, usize)> = HashMap::new();
99+
macro_rules! register_budgets {
100+
($($tool:ty),+ $(,)?) => {
101+
$({
102+
let name = <$tool>::NAME;
103+
let default = <$tool>::DEFAULT_MAX;
104+
let max = resolve_max(ctx, name, default);
105+
budgets.insert(name, (0, max));
106+
})+
107+
};
108+
}
109+
register_budgets!(
110+
CreateWorkItemResult,
111+
CreatePrResult,
112+
UpdateWorkItemResult,
113+
CommentOnWorkItemResult,
114+
CreateWikiPageResult,
115+
UpdateWikiPageResult,
116+
);
99117

100118
let mut results = Vec::new();
101119
for (i, entry) in entries.iter().enumerate() {
@@ -107,35 +125,18 @@ pub async fn execute_safe_outputs(
107125
entry_json
108126
);
109127

110-
// Enforce update-work-item max: skip excess entries rather than aborting the whole batch.
128+
// Generic budget enforcement: skip excess entries rather than aborting the whole batch.
111129
// Budget is consumed before execution so that failed attempts (target policy rejection,
112130
// network errors) still count — this prevents unbounded retries against a failing endpoint.
113-
if entry.get("name").and_then(|n| n.as_str()) == Some("update-work-item") {
114-
let wi_id = entry
115-
.get("id")
116-
.and_then(|v| v.as_u64())
117-
.map(|id| format!(" (work item #{})", id))
118-
.unwrap_or_default();
119-
if let Some(result) = check_budget(entries.len(), i, "update-work-item", &wi_id, update_wi_executed, max_update_wi) {
120-
results.push(result);
121-
continue;
122-
}
123-
update_wi_executed += 1;
124-
}
125-
126-
// Enforce comment-on-work-item max: same skip-and-continue pattern as update-work-item.
127-
// Budget is consumed before execution so that failed attempts still count.
128-
if entry.get("name").and_then(|n| n.as_str()) == Some("comment-on-work-item") {
129-
let wi_id = entry
130-
.get("work_item_id")
131-
.and_then(|v| v.as_i64())
132-
.map(|id| format!(" (work item #{})", id))
133-
.unwrap_or_default();
134-
if let Some(result) = check_budget(entries.len(), i, "comment-on-work-item", &wi_id, comment_wi_executed, max_comment_wi) {
135-
results.push(result);
136-
continue;
131+
if let Some(tool_name) = entry.get("name").and_then(|n| n.as_str()) {
132+
if let Some((executed, max)) = budgets.get_mut(tool_name) {
133+
let context_id = extract_entry_context(entry);
134+
if let Some(result) = check_budget(entries.len(), i, tool_name, &context_id, *executed, *max) {
135+
results.push(result);
136+
continue;
137+
}
138+
*executed += 1;
137139
}
138-
comment_wi_executed += 1;
139140
}
140141

141142
match execute_safe_output(entry, ctx).await {
@@ -284,6 +285,43 @@ pub async fn execute_safe_output(
284285
Ok((tool_name.to_string(), result))
285286
}
286287

288+
/// Read the operator's `max` override from the tool's config JSON, falling back to the
289+
/// tool's `DEFAULT_MAX` (declared on the `ToolResult` trait) when not configured.
290+
fn resolve_max(ctx: &ExecutionContext, tool_name: &str, default_max: u32) -> usize {
291+
ctx.tool_configs
292+
.get(tool_name)
293+
.and_then(|v| v.get("max"))
294+
.and_then(|v| v.as_u64())
295+
.map(|v| v as usize)
296+
.unwrap_or(default_max as usize)
297+
}
298+
299+
/// Extract a human-readable context identifier from a safe-output entry for log messages.
300+
/// Called before sanitization, so all string values are stripped of control characters
301+
/// to prevent log injection.
302+
fn extract_entry_context(entry: &Value) -> String {
303+
if let Some(id) = entry.get("id").and_then(|v| v.as_u64()) {
304+
return format!(" (work item #{})", id);
305+
}
306+
if let Some(id) = entry.get("work_item_id").and_then(|v| v.as_i64()) {
307+
return format!(" (work item #{})", id);
308+
}
309+
if let Some(title) = entry.get("title").and_then(|v| v.as_str()) {
310+
let clean: String = title.chars().filter(|c| !c.is_control()).collect();
311+
let truncated: &str = if clean.chars().count() > 40 {
312+
&clean[..clean.char_indices().nth(40).map(|(i, _)| i).unwrap_or(clean.len())]
313+
} else {
314+
&clean
315+
};
316+
return format!(" (\"{}\")", truncated);
317+
}
318+
if let Some(path) = entry.get("path").and_then(|v| v.as_str()) {
319+
let clean: String = path.chars().filter(|c| !c.is_control()).collect();
320+
return format!(" (path: {})", clean);
321+
}
322+
String::new()
323+
}
324+
287325
/// Returns `Some(result)` when the budget for `tool_name` is exhausted so the caller can push the
288326
/// result and `continue` to the next entry. Returns `None` when a budget slot is still available
289327
/// and the caller should proceed with execution.
@@ -735,4 +773,196 @@ mod tests {
735773
let r = result.unwrap();
736774
assert!(r.message.contains("(work item #42)"));
737775
}
776+
777+
// --- extract_entry_context unit tests ---
778+
779+
#[test]
780+
fn test_extract_entry_context_with_id() {
781+
let entry = serde_json::json!({"name": "update-work-item", "id": 42});
782+
assert_eq!(extract_entry_context(&entry), " (work item #42)");
783+
}
784+
785+
#[test]
786+
fn test_extract_entry_context_with_work_item_id() {
787+
let entry = serde_json::json!({"name": "comment-on-work-item", "work_item_id": 99});
788+
assert_eq!(extract_entry_context(&entry), " (work item #99)");
789+
}
790+
791+
#[test]
792+
fn test_extract_entry_context_with_title() {
793+
let entry = serde_json::json!({"name": "create-work-item", "title": "Fix the bug"});
794+
assert_eq!(extract_entry_context(&entry), " (\"Fix the bug\")");
795+
}
796+
797+
#[test]
798+
fn test_extract_entry_context_with_path() {
799+
let entry = serde_json::json!({"name": "create-wiki-page", "path": "/Overview/NewPage"});
800+
assert_eq!(extract_entry_context(&entry), " (path: /Overview/NewPage)");
801+
}
802+
803+
#[test]
804+
fn test_extract_entry_context_truncates_long_title_utf8_safe() {
805+
// 41 emoji characters — each is 4 bytes, so naive &title[..40] would panic
806+
let title = "🔥".repeat(41);
807+
let entry = serde_json::json!({"name": "create-work-item", "title": title});
808+
let ctx = extract_entry_context(&entry);
809+
assert!(ctx.starts_with(" (\""));
810+
assert!(ctx.ends_with("\")"));
811+
// Should contain exactly 40 emoji chars (not panic)
812+
let inner = &ctx[3..ctx.len() - 2];
813+
assert_eq!(inner.chars().count(), 40);
814+
}
815+
816+
#[test]
817+
fn test_extract_entry_context_empty() {
818+
let entry = serde_json::json!({"name": "noop"});
819+
assert_eq!(extract_entry_context(&entry), "");
820+
}
821+
822+
#[test]
823+
fn test_extract_entry_context_strips_control_chars() {
824+
let entry = serde_json::json!({"name": "create-work-item", "title": "Good\ntitle\r\nhere"});
825+
assert_eq!(extract_entry_context(&entry), " (\"Goodtitlehere\")");
826+
}
827+
828+
#[test]
829+
fn test_extract_entry_context_strips_control_chars_from_path() {
830+
let entry = serde_json::json!({"name": "create-wiki-page", "path": "/Page\n/Injected"});
831+
assert_eq!(extract_entry_context(&entry), " (path: /Page/Injected)");
832+
}
833+
834+
// --- resolve_max and DEFAULT_MAX unit tests ---
835+
836+
#[test]
837+
fn test_default_max_trait_constant() {
838+
assert_eq!(CreateWorkItemResult::DEFAULT_MAX, 1);
839+
assert_eq!(CreatePrResult::DEFAULT_MAX, 1);
840+
assert_eq!(UpdateWorkItemResult::DEFAULT_MAX, 1);
841+
assert_eq!(CommentOnWorkItemResult::DEFAULT_MAX, 1);
842+
assert_eq!(CreateWikiPageResult::DEFAULT_MAX, 1);
843+
assert_eq!(UpdateWikiPageResult::DEFAULT_MAX, 1);
844+
}
845+
846+
#[test]
847+
fn test_resolve_max_uses_config_override() {
848+
let mut tool_configs = HashMap::new();
849+
tool_configs.insert("test-tool".to_string(), serde_json::json!({"max": 5}));
850+
let ctx = ExecutionContext {
851+
tool_configs,
852+
..ExecutionContext::default()
853+
};
854+
assert_eq!(resolve_max(&ctx, "test-tool", 1), 5);
855+
}
856+
857+
#[test]
858+
fn test_resolve_max_falls_back_to_default() {
859+
let ctx = ExecutionContext::default();
860+
assert_eq!(resolve_max(&ctx, "nonexistent-tool", 3), 3);
861+
}
862+
863+
#[test]
864+
fn test_resolve_max_uses_default_when_no_max_in_config() {
865+
let mut tool_configs = HashMap::new();
866+
tool_configs.insert("test-tool".to_string(), serde_json::json!({"other": true}));
867+
let ctx = ExecutionContext {
868+
tool_configs,
869+
..ExecutionContext::default()
870+
};
871+
assert_eq!(resolve_max(&ctx, "test-tool", 7), 7);
872+
}
873+
874+
// --- Generic budget enforcement for all tool types ---
875+
876+
#[tokio::test]
877+
async fn test_budget_enforcement_create_work_item_max() {
878+
let temp_dir = tempfile::tempdir().unwrap();
879+
let safe_output_path = temp_dir.path().join(SAFE_OUTPUT_FILENAME);
880+
881+
// Write 3 create-work-item entries + 1 noop; max set to 2
882+
let ndjson = r#"{"name":"create-work-item","title":"First item","description":"A description that is definitely longer than thirty characters."}
883+
{"name":"create-work-item","title":"Second item","description":"A description that is definitely longer than thirty characters."}
884+
{"name":"create-work-item","title":"Third item","description":"A description that is definitely longer than thirty characters."}
885+
{"name":"noop","context":"still runs"}
886+
"#;
887+
tokio::fs::write(&safe_output_path, ndjson).await.unwrap();
888+
889+
let mut tool_configs = HashMap::new();
890+
tool_configs.insert("create-work-item".to_string(), serde_json::json!({"max": 2}));
891+
892+
let ctx = ExecutionContext {
893+
ado_org_url: Some("https://dev.azure.com/org".to_string()),
894+
ado_organization: Some("org".to_string()),
895+
ado_project: Some("Proj".to_string()),
896+
access_token: Some("token".to_string()),
897+
working_directory: PathBuf::from("."),
898+
source_directory: PathBuf::from("."),
899+
tool_configs,
900+
repository_id: None,
901+
repository_name: None,
902+
allowed_repositories: HashMap::new(),
903+
};
904+
905+
let results = execute_safe_outputs(temp_dir.path(), &ctx).await;
906+
assert!(results.is_ok(), "Batch should not abort when max is exceeded");
907+
let results = results.unwrap();
908+
assert_eq!(results.len(), 4, "Expected 4 results");
909+
910+
// Only 1 should be skipped (max=2 allows first 2, third is skipped)
911+
let skipped: Vec<_> = results
912+
.iter()
913+
.filter(|r| r.message.contains("maximum create-work-item count"))
914+
.collect();
915+
assert_eq!(skipped.len(), 1, "Expected 1 skipped entry, got: {:?}", skipped);
916+
917+
// noop still runs
918+
assert!(results[3].success, "noop should still succeed");
919+
}
920+
921+
#[tokio::test]
922+
async fn test_budget_enforcement_mixed_tools_independent_budgets() {
923+
let temp_dir = tempfile::tempdir().unwrap();
924+
let safe_output_path = temp_dir.path().join(SAFE_OUTPUT_FILENAME);
925+
926+
// Mix of tools: each has max=1 (default), so only the first of each type should pass budget
927+
let ndjson = r#"{"name":"create-work-item","title":"WI 1","description":"A description that is definitely longer than thirty characters."}
928+
{"name":"create-work-item","title":"WI 2","description":"A description that is definitely longer than thirty characters."}
929+
{"name":"create-wiki-page","path":"/Page1","content":"Some valid wiki content here."}
930+
{"name":"create-wiki-page","path":"/Page2","content":"Some valid wiki content here."}
931+
{"name":"noop","context":"always runs"}
932+
"#;
933+
tokio::fs::write(&safe_output_path, ndjson).await.unwrap();
934+
935+
let ctx = ExecutionContext {
936+
ado_org_url: Some("https://dev.azure.com/org".to_string()),
937+
ado_organization: Some("org".to_string()),
938+
ado_project: Some("Proj".to_string()),
939+
access_token: Some("token".to_string()),
940+
working_directory: PathBuf::from("."),
941+
source_directory: PathBuf::from("."),
942+
tool_configs: HashMap::new(), // defaults: max=1 for all
943+
repository_id: None,
944+
repository_name: None,
945+
allowed_repositories: HashMap::new(),
946+
};
947+
948+
let results = execute_safe_outputs(temp_dir.path(), &ctx).await.unwrap();
949+
assert_eq!(results.len(), 5);
950+
951+
// Second create-work-item should be skipped
952+
let cwi_skipped: Vec<_> = results
953+
.iter()
954+
.filter(|r| r.message.contains("maximum create-work-item count"))
955+
.collect();
956+
assert_eq!(cwi_skipped.len(), 1, "Expected 1 skipped create-work-item");
957+
958+
// Second create-wiki-page should be skipped
959+
let cwp_skipped: Vec<_> = results
960+
.iter()
961+
.filter(|r| r.message.contains("maximum create-wiki-page count"))
962+
.collect();
963+
assert_eq!(cwp_skipped.len(), 1, "Expected 1 skipped create-wiki-page");
964+
965+
// noop always runs
966+
assert!(results[4].success, "noop should still succeed");
967+
}
738968
}

0 commit comments

Comments
 (0)