From b052c27e9b60108c2e3128d3870b5844cc5eb339 Mon Sep 17 00:00:00 2001 From: James Devine Date: Thu, 16 Apr 2026 13:35:21 +0100 Subject: [PATCH 1/8] refactor: extract shared compile flow and move helpers to common.rs Move helper functions, MCPG generation, and MCP validation from standalone.rs to common.rs. Extract compile_shared() function with CompileConfig struct so both standalone and 1ES compilers can share the common compilation flow. Standalone compiler is now a thin wrapper that provides target-specific values (AWF domains, MCPG config, firewall version) via extra_replacements. This is a pure refactor - standalone output is byte-identical to before (verified via golden snapshot comparison). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/compile/common.rs | 1724 +++++++++++++++++++++++++++++++++++- src/compile/standalone.rs | 1756 +------------------------------------ 2 files changed, 1750 insertions(+), 1730 deletions(-) diff --git a/src/compile/common.rs b/src/compile/common.rs index 6dc0647d..b9d97c26 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -1,9 +1,11 @@ //! Common helper functions shared across all compile targets. use anyhow::{Context, Result}; +use std::collections::{HashMap, HashSet}; +use std::path::Path; use super::types::{FrontMatter, PipelineParameter, Repository, TriggerConfig}; -use super::extensions::CompilerExtension; +use super::extensions::{CompilerExtension, Extension, McpgServerConfig, McpgGatewayConfig, McpgConfig, CompileContext}; use crate::compile::types::McpConfig; use crate::fuzzy_schedule; @@ -1218,10 +1220,753 @@ pub fn validate_resolve_pr_thread_statuses(front_matter: &FrontMatter) -> Result Ok(()) } +/// Generate the setup job YAML +pub fn generate_setup_job(setup_steps: &[serde_yaml::Value], agent_name: &str, pool: &str) -> String { + if setup_steps.is_empty() { + return String::new(); + } + + let steps_yaml = format_steps_yaml_indented(setup_steps, 4); + + format!( + r#"- job: SetupJob + displayName: "{} - Setup" + pool: + name: {} + steps: + - checkout: self +{} +"#, + agent_name, pool, steps_yaml + ) +} + +/// Generate the teardown job YAML +pub fn generate_teardown_job( + teardown_steps: &[serde_yaml::Value], + agent_name: &str, + pool: &str, +) -> String { + if teardown_steps.is_empty() { + return String::new(); + } + + let steps_yaml = format_steps_yaml(teardown_steps); + + format!( + r#" - job: TeardownJob + displayName: "{} - Teardown" + dependsOn: ProcessSafeOutputs + pool: + name: {} + steps: + - checkout: self +{} +"#, + agent_name, pool, steps_yaml + ) +} + +/// Generate prepare steps (inline), including extension steps and user-defined steps. +pub fn generate_prepare_steps( + prepare_steps: &[serde_yaml::Value], + extensions: &[super::extensions::Extension], +) -> Result { + let mut parts = Vec::new(); + + // Extension prepare steps and prompt supplements (runtimes + first-party tools) + for ext in extensions { + for step in ext.prepare_steps() { + parts.push(step); + } + if let Some(prompt) = ext.prompt_supplement() { + parts.push(super::extensions::wrap_prompt_append(&prompt, ext.name())?); + } + } + + if !prepare_steps.is_empty() { + parts.push(format_steps_yaml_indented(prepare_steps, 0)); + } + + Ok(parts.join("\n\n")) +} + +/// Generate finalize steps (inline) +pub fn generate_finalize_steps(finalize_steps: &[serde_yaml::Value]) -> String { + if finalize_steps.is_empty() { + return String::new(); + } + + format_steps_yaml_indented(finalize_steps, 0) +} + +/// Generate dependsOn clause for setup job +pub fn generate_agentic_depends_on(setup_steps: &[serde_yaml::Value]) -> String { + if !setup_steps.is_empty() { + "dependsOn: SetupJob".to_string() + } else { + String::new() + } +} + +/// Sensitive host path prefixes that should not be bind-mounted into MCP containers. +pub const SENSITIVE_MOUNT_PREFIXES: &[&str] = &[ + "/etc", + "/root", + "/home", + "/proc", + "/sys", +]; + +/// Docker runtime flag names that grant dangerous host access. +/// Checked both as `--flag=value` and as `--flag value` (split across two args). +pub const DANGEROUS_DOCKER_FLAGS: &[&str] = &[ + "--privileged", + "--cap-add", + "--security-opt", + "--pid", + "--network", + "--ipc", + "--user", + "-u", + "--add-host", + "--entrypoint", +]; + +/// Validate a container image name for injection attempts. +/// Allows `[a-zA-Z0-9./_:-]` which covers standard Docker image references. +pub fn validate_container_image(image: &str, mcp_name: &str) -> Vec { + let mut warnings = Vec::new(); + if image.is_empty() { + warnings.push(format!("Warning: MCP '{}': container image name is empty.", mcp_name)); + return warnings; + } + if !image.chars().all(|c| c.is_ascii_alphanumeric() || "._/:-@".contains(c)) { + warnings.push(format!( + "Warning: MCP '{}': container image '{}' contains unexpected characters. \ + Image names should only contain [a-zA-Z0-9./_:-@].", + mcp_name, image + )); + } + warnings +} + +/// Validate a volume mount source path, warning on sensitive host directories. +/// Docker socket mounts are escalated to stderr warnings since they grant container escape. +/// Note: paths are lowercased for comparison to catch cross-platform casing (e.g. `/ETC/shadow`). +pub fn validate_mount_source(mount: &str, mcp_name: &str) -> Vec { + let mut warnings = Vec::new(); + // Format: "source:dest:mode" + if let Some(source) = mount.split(':').next() { + let source_lower = source.to_lowercase(); + if source_lower.contains("docker.sock") { + warnings.push(format!( + "Warning: MCP '{}': mount '{}' exposes the Docker socket to the MCP container. \ + This grants full host Docker access and may allow container escape.", + mcp_name, mount + )); + return warnings; + } + for prefix in SENSITIVE_MOUNT_PREFIXES { + // Match exact path or path with trailing separator to avoid false positives + // (e.g. /etc matches /etc and /etc/shadow, but not /etc-configs) + if source_lower == *prefix || source_lower.starts_with(&format!("{}/", prefix)) { + warnings.push(format!( + "Warning: MCP '{}': mount source '{}' references a sensitive host path ({}). \ + Ensure this is intentional.", + mcp_name, source, prefix + )); + break; + } + } + } + warnings +} + +/// Validate Docker runtime args for dangerous flags that could escalate privileges. +/// Also detects volume mounts smuggled via `-v`/`--volume` that bypass `mounts` validation. +/// Handles both `--flag=value` and `--flag value` (split) forms. +pub fn validate_docker_args(args: &[String], mcp_name: &str) -> Vec { + let mut warnings = Vec::new(); + for (i, arg) in args.iter().enumerate() { + let arg_lower = arg.to_lowercase(); + // Check for dangerous Docker flags (both --flag=value and --flag value) + for dangerous in DANGEROUS_DOCKER_FLAGS { + if arg_lower == *dangerous + || arg_lower.starts_with(&format!("{}=", dangerous)) + { + let extra_hint = if *dangerous == "--entrypoint" { + " Use the 'entrypoint:' field instead of passing --entrypoint in args." + } else { + "" + }; + warnings.push(format!( + "Warning: MCP '{}': Docker arg '{}' grants elevated privileges. \ + Ensure this is intentional.{}", + mcp_name, arg, extra_hint + )); + } + } + // Check for volume mounts smuggled via args (bypasses mounts validation) + if arg == "-v" || arg == "--volume" { + if let Some(mount_spec) = args.get(i + 1) { + warnings.push(format!( + "Warning: MCP '{}': volume mount '{}' in args bypasses mounts validation. \ + Use the 'mounts:' field instead.", + mcp_name, mount_spec + )); + warnings.extend(validate_mount_source(mount_spec, mcp_name)); + } else { + warnings.push(format!( + "Warning: MCP '{}': '{}' flag is the last arg with no mount spec following it. \ + This is likely a malformed args list.", + mcp_name, arg + )); + } + } else if arg_lower.starts_with("-v=") || arg_lower.starts_with("--volume=") { + let mount_spec = arg.splitn(2, '=').nth(1).unwrap_or(""); + warnings.push(format!( + "Warning: MCP '{}': volume mount '{}' in args bypasses mounts validation. \ + Use the 'mounts:' field instead.", + mcp_name, mount_spec + )); + warnings.extend(validate_mount_source(mount_spec, mcp_name)); + } + } + warnings +} + +/// Validate that an MCP HTTP URL uses an allowed scheme. +pub fn validate_mcp_url(url: &str, mcp_name: &str) -> Vec { + let mut warnings = Vec::new(); + if !url.starts_with("https://") && !url.starts_with("http://") { + warnings.push(format!( + "Warning: MCP '{}': URL '{}' does not use http:// or https:// scheme. \ + This may not work with MCPG.", + mcp_name, url + )); + } + warnings +} + +/// Warn when env values or headers look like they contain inline secrets. +/// Secrets should use pipeline variables and passthrough ("") instead. +pub fn warn_potential_secrets(mcp_name: &str, env: &HashMap, headers: &HashMap) -> Vec { + let mut warnings = Vec::new(); + for (key, value) in env { + if !value.is_empty() && (key.to_lowercase().contains("token") + || key.to_lowercase().contains("secret") + || key.to_lowercase().contains("key") + || key.to_lowercase().contains("password") + || key.to_lowercase().contains("pat")) + { + warnings.push(format!( + "Warning: MCP '{}': env var '{}' has an inline value that may be a secret. \ + Use an empty string (\"\") for passthrough from pipeline variables instead.", + mcp_name, key + )); + } + } + for (key, value) in headers { + if value.to_lowercase().contains("bearer ") + || key.to_lowercase() == "authorization" + { + warnings.push(format!( + "Warning: MCP '{}': header '{}' may contain inline credentials. \ + These will appear in plaintext in the compiled pipeline YAML.", + mcp_name, key + )); + } + } + warnings +} + +/// Validate that a string is a legal environment variable name (`[A-Za-z_][A-Za-z0-9_]*`). +/// Prevents injection of arbitrary Docker flags via user-controlled front matter keys. +pub fn is_valid_env_var_name(name: &str) -> bool { + let mut chars = name.chars(); + chars + .next() + .map_or(false, |c| c.is_ascii_alphabetic() || c == '_') + && chars.all(|c| c.is_ascii_alphanumeric() || c == '_') +} + +/// Generate MCPG configuration from front matter. +/// +/// Converts the front matter `mcp-servers` definitions into MCPG-compatible JSON. +/// SafeOutputs is always included as an HTTP backend. Extension-contributed MCPG +/// entries (e.g., azure-devops) are included via the `extensions` parameter. +pub fn generate_mcpg_config( + front_matter: &FrontMatter, + ctx: &CompileContext, + extensions: &[super::extensions::Extension], +) -> Result { + let mut mcp_servers = HashMap::new(); + + // SafeOutputs is always included as an HTTP backend. + // MCPG runs with --network host, so it reaches SafeOutputs via localhost + // (not host.docker.internal, which requires Docker DNS and isn't available + // in host network mode on Linux). + mcp_servers.insert( + "safeoutputs".to_string(), + McpgServerConfig { + server_type: "http".to_string(), + container: None, + entrypoint: None, + entrypoint_args: None, + mounts: None, + args: None, + url: Some("http://localhost:${SAFE_OUTPUTS_PORT}/mcp".to_string()), + headers: Some(HashMap::from([( + "Authorization".to_string(), + "Bearer ${SAFE_OUTPUTS_API_KEY}".to_string(), + )])), + env: None, + tools: None, + }, + ); + + // Add extension-contributed MCPG server entries (e.g., azure-devops) + for ext in extensions { + for (name, config) in ext.mcpg_servers(ctx)? { + mcp_servers.insert(name, config); + } + } + + for (name, config) in &front_matter.mcp_servers { + // Prevent user-defined MCPs from overwriting the reserved safeoutputs backend + if name.eq_ignore_ascii_case("safeoutputs") { + log::warn!( + "MCP name 'safeoutputs' is reserved for the safe outputs HTTP backend — skipping" + ); + continue; + } + + // Skip if already auto-configured by an extension (e.g., tools.azure-devops) + if mcp_servers.contains_key(name) { + continue; + } + + let (is_enabled, options) = match config { + McpConfig::Enabled(enabled) => (*enabled, None), + McpConfig::WithOptions(opts) => (opts.enabled.unwrap_or(true), Some(opts)), + }; + + if !is_enabled { + continue; + } + + if let Some(opts) = options { + if opts.container.is_some() && opts.url.is_some() { + log::warn!( + "MCP '{}': both 'container' and 'url' are set — using 'container' (stdio). \ + Remove 'url' to silence this warning.", + name + ); + } + + if let Some(container) = &opts.container { + // Container-based stdio MCP (MCPG-native, per spec §3.2.1) + for w in validate_container_image(container, name) { eprintln!("{}", w); } + // Validate mount paths for sensitive host directories + for mount in &opts.mounts { + for w in validate_mount_source(mount, name) { eprintln!("{}", w); } + } + // Validate Docker runtime args for privilege escalation + for w in validate_docker_args(&opts.args, name) { eprintln!("{}", w); } + // Warn about potential inline secrets (check headers too in case user set both) + for w in warn_potential_secrets(name, &opts.env, &opts.headers) { eprintln!("{}", w); } + let entrypoint_args = if opts.entrypoint_args.is_empty() { + None + } else { + Some(opts.entrypoint_args.clone()) + }; + let args = if opts.args.is_empty() { + None + } else { + Some(opts.args.clone()) + }; + let mounts = if opts.mounts.is_empty() { + None + } else { + Some(opts.mounts.clone()) + }; + let env = if opts.env.is_empty() { + None + } else { + Some(opts.env.clone()) + }; + let tools = if opts.allowed.is_empty() { + None + } else { + Some(opts.allowed.clone()) + }; + mcp_servers.insert( + name.clone(), + McpgServerConfig { + server_type: "stdio".to_string(), + container: Some(container.clone()), + entrypoint: opts.entrypoint.clone(), + entrypoint_args, + mounts, + args, + url: None, + headers: None, + env, + tools, + }, + ); + } else if let Some(url) = &opts.url { + // HTTP-based MCP (remote server) + for w in validate_mcp_url(url, name) { eprintln!("{}", w); } + // Warn about potential inline secrets in headers + for w in warn_potential_secrets(name, &HashMap::new(), &opts.headers) { eprintln!("{}", w); } + if !opts.env.is_empty() { + eprintln!( + "Warning: MCP '{}': env vars are not supported for HTTP MCPs — they will be ignored. \ + Use headers for authentication instead.", + name + ); + } + let headers = if opts.headers.is_empty() { + None + } else { + Some(opts.headers.clone()) + }; + let tools = if opts.allowed.is_empty() { + None + } else { + Some(opts.allowed.clone()) + }; + mcp_servers.insert( + name.clone(), + McpgServerConfig { + server_type: "http".to_string(), + container: None, + entrypoint: None, + entrypoint_args: None, + mounts: None, + args: None, + url: Some(url.clone()), + headers, + env: None, + tools, + }, + ); + } else { + log::warn!("MCP '{}' has no container or url — skipping", name); + continue; + } + } else { + log::warn!("MCP '{}' has no container or url — skipping", name); + } + } + + Ok(McpgConfig { + mcp_servers, + gateway: McpgGatewayConfig { + port: MCPG_PORT, + domain: "host.docker.internal".to_string(), + api_key: "${MCP_GATEWAY_API_KEY}".to_string(), + payload_dir: "/tmp/gh-aw/mcp-payloads".to_string(), + }, + }) +} + +/// Generate additional `-e` flags for the MCPG Docker run command. +/// +/// MCP containers spawned by MCPG may need environment variables that flow from +/// the pipeline through the MCPG container (passthrough). This function: +/// 1. Auto-maps `AZURE_DEVOPS_EXT_PAT` from `SC_READ_TOKEN` when `permissions.read` is configured +/// 2. Collects passthrough env vars (value is `""`) from container-based MCP configs +/// +/// Only container-based MCPs are considered — HTTP MCPs don't have child containers +/// that need env passthrough. +/// +/// Returns flags formatted for inline insertion in the `docker run` command. +/// The marker sits after the last hardcoded `-e` flag, so the output must +/// include leading `\\\n` for line continuation when non-empty. +pub fn generate_mcpg_docker_env(front_matter: &FrontMatter) -> String { + let mut env_flags: Vec = Vec::new(); + let mut seen: HashSet = HashSet::new(); + + // Check if any container MCP requests AZURE_DEVOPS_EXT_PAT passthrough + let any_mcp_needs_ado_token = front_matter.mcp_servers.values().any(|config| { + matches!(config, McpConfig::WithOptions(opts) + if opts.enabled.unwrap_or(true) + && opts.container.is_some() + && opts.env.contains_key("AZURE_DEVOPS_EXT_PAT")) + }); + + // Also check if tools.azure-devops is enabled (auto-configured ADO MCP always needs token) + let ado_tool_needs_token = front_matter + .tools + .as_ref() + .and_then(|t| t.azure_devops.as_ref()) + .is_some_and(|ado| ado.is_enabled()); + + // Auto-map AZURE_DEVOPS_EXT_PAT from SC_READ_TOKEN when permissions.read is configured + // AND at least one container MCP requests it via env passthrough (or the ADO tool is enabled) + if any_mcp_needs_ado_token || ado_tool_needs_token { + if front_matter.permissions.as_ref().and_then(|p| p.read.as_ref()).is_some() { + env_flags.push( + "-e AZURE_DEVOPS_EXT_PAT=\"$(SC_READ_TOKEN)\"".to_string(), + ); + seen.insert("AZURE_DEVOPS_EXT_PAT".to_string()); + } else { + eprintln!( + "Warning: one or more container MCPs request AZURE_DEVOPS_EXT_PAT passthrough \ + but permissions.read is not configured. The token will be empty at runtime. \ + Add `permissions: {{ read: }}` to enable auto-mapping." + ); + } + } + + // Collect passthrough env vars from container-based MCP configs only. + // HTTP MCPs don't have child containers — env passthrough doesn't apply. + for (mcp_name, config) in &front_matter.mcp_servers { + let opts = match config { + McpConfig::WithOptions(opts) if opts.enabled.unwrap_or(true) => opts, + _ => continue, + }; + + // Only container-based MCPs need env passthrough on the MCPG Docker run + if opts.container.is_none() { + continue; + } + + for (var_name, var_value) in &opts.env { + // Validate env var name to prevent Docker flag injection (e.g. "X --privileged") + if !is_valid_env_var_name(var_name) { + log::warn!( + "MCP '{}': skipping invalid env var name '{}' — must match [A-Za-z_][A-Za-z0-9_]*", + mcp_name, var_name + ); + continue; + } + if seen.contains(var_name) { + continue; + } + // Passthrough: empty string means forward from host/pipeline environment + if var_value.is_empty() { + env_flags.push(format!("-e {}", var_name)); + seen.insert(var_name.clone()); + } + } + } + + env_flags.sort(); + if env_flags.is_empty() { + // No extra flags — emit a lone `\` so the bash line continuation from the + // preceding `-e MCP_GATEWAY_API_KEY=...` flag connects to the image name on + // the next line. This is valid bash: a backslash at end-of-line continues + // the command. replace_with_indent preserves this on its own indented line. + "\\".to_string() + } else { + // Emit each flag on its own line with `\` continuation. + // replace_with_indent handles indentation from the template (base.yml), + // so we only emit the content without hardcoded spaces. + let flags = env_flags.join(" \\\n"); + format!("{} \\", flags) + } +} + +// ==================== Shared compile flow ==================== + +/// Target-specific overrides for the shared compile flow. +pub struct CompileConfig { + /// The base YAML template content (the template string itself). + pub template: String, + /// Additional placeholder→value replacements beyond the shared set. + /// These are applied after the shared replacements. + pub extra_replacements: Vec<(String, String)>, +} + +/// Shared compilation flow used by both standalone and 1ES compilers. +/// +/// This function handles the common pipeline compilation steps: +/// 1. Validates front matter +/// 2. Generates all shared placeholder values +/// 3. Runs extension validations +/// 4. Applies replacements to the template +/// 5. Prepends the header comment +/// +/// Target-specific values are provided via `CompileConfig.extra_replacements`. +pub async fn compile_shared( + input_path: &Path, + output_path: &Path, + front_matter: &FrontMatter, + markdown_body: &str, + extensions: &[Extension], + config: CompileConfig, +) -> Result { + // 1. Validate + validate_front_matter_identity(front_matter)?; + + // 2. Generate schedule + let schedule = match &front_matter.schedule { + Some(s) => generate_schedule(&front_matter.name, s) + .with_context(|| format!("Failed to parse schedule '{}'", s.expression()))?, + None => String::new(), + }; + + let repositories = generate_repositories(&front_matter.repositories); + let checkout_steps = generate_checkout_steps(&front_matter.checkout); + let checkout_self = generate_checkout_self(); + let agent_name = sanitize_filename(&front_matter.name); + + // 3. Build compile context and run extension validations + let input_dir = input_path.parent().unwrap_or(Path::new(".")); + let ctx = CompileContext::new(front_matter, input_dir).await; + + for ext in extensions { + for warning in ext.validate(&ctx)? { + eprintln!("Warning: {}", warning); + } + } + + // 4. Generate copilot params + let copilot_params = generate_copilot_params(front_matter, extensions)?; + + // 5. Compute workspace, working directory, triggers + let effective_workspace = compute_effective_workspace( + &front_matter.workspace, + &front_matter.checkout, + &front_matter.name, + ); + let working_directory = generate_working_directory(&effective_workspace); + let pipeline_resources = generate_pipeline_resources(&front_matter.triggers)?; + let has_schedule = front_matter.schedule.is_some(); + let pr_trigger = generate_pr_trigger(&front_matter.triggers, has_schedule); + let ci_trigger = generate_ci_trigger(&front_matter.triggers, has_schedule); + + // 6. Generate source path and pipeline path + let source_path = generate_source_path(input_path); + let pipeline_path = generate_pipeline_path(output_path); + + // 7. Pool name + let pool = front_matter + .pool + .as_ref() + .map(|p| p.name().to_string()) + .unwrap_or_else(|| DEFAULT_POOL.to_string()); + + // 8. Setup/teardown jobs, parameters, prepare/finalize steps + let setup_job = generate_setup_job(&front_matter.setup, &front_matter.name, &pool); + let teardown_job = generate_teardown_job(&front_matter.teardown, &front_matter.name, &pool); + let has_memory = front_matter + .tools + .as_ref() + .and_then(|t| t.cache_memory.as_ref()) + .is_some_and(|cm| cm.is_enabled()); + let parameters = build_parameters(&front_matter.parameters, has_memory); + let parameters_yaml = generate_parameters(¶meters)?; + let prepare_steps = generate_prepare_steps(&front_matter.steps, extensions)?; + let finalize_steps = generate_finalize_steps(&front_matter.post_steps); + let agentic_depends_on = generate_agentic_depends_on(&front_matter.setup); + let job_timeout = generate_job_timeout(front_matter); + + // 9. Token acquisition and env vars + let acquire_read_token = generate_acquire_ado_token( + front_matter + .permissions + .as_ref() + .and_then(|p| p.read.as_deref()), + "SC_READ_TOKEN", + ); + let copilot_ado_env = generate_copilot_ado_env( + front_matter + .permissions + .as_ref() + .and_then(|p| p.read.as_deref()), + ); + let acquire_write_token = generate_acquire_ado_token( + front_matter + .permissions + .as_ref() + .and_then(|p| p.write.as_deref()), + "SC_WRITE_TOKEN", + ); + let executor_ado_env = generate_executor_ado_env( + front_matter + .permissions + .as_ref() + .and_then(|p| p.write.as_deref()), + ); + + // 10. Validations + validate_write_permissions(front_matter)?; + validate_comment_target(front_matter)?; + validate_update_work_item_target(front_matter)?; + validate_submit_pr_review_events(front_matter)?; + validate_update_pr_votes(front_matter)?; + validate_resolve_pr_thread_statuses(front_matter)?; + + // 11. Threat analysis prompt + let threat_analysis_prompt = include_str!("../../templates/threat-analysis.md"); + let template = replace_with_indent( + &config.template, + "{{ threat_analysis_prompt }}", + threat_analysis_prompt, + ); + + // 12. Shared replacements + let compiler_version = env!("CARGO_PKG_VERSION"); + let replacements: Vec<(&str, &str)> = vec![ + ("{{ parameters }}", ¶meters_yaml), + ("{{ compiler_version }}", compiler_version), + ("{{ copilot_version }}", COPILOT_CLI_VERSION), + ("{{ pool }}", &pool), + ("{{ setup_job }}", &setup_job), + ("{{ teardown_job }}", &teardown_job), + ("{{ prepare_steps }}", &prepare_steps), + ("{{ finalize_steps }}", &finalize_steps), + ("{{ agentic_depends_on }}", &agentic_depends_on), + ("{{ job_timeout }}", &job_timeout), + ("{{ repositories }}", &repositories), + ("{{ schedule }}", &schedule), + ("{{ pipeline_resources }}", &pipeline_resources), + ("{{ pr_trigger }}", &pr_trigger), + ("{{ ci_trigger }}", &ci_trigger), + ("{{ checkout_self }}", &checkout_self), + ("{{ checkout_repositories }}", &checkout_steps), + ("{{ agent }}", &agent_name), + ("{{ agent_name }}", &front_matter.name), + ("{{ agent_description }}", &front_matter.description), + ("{{ copilot_params }}", &copilot_params), + ("{{ source_path }}", &source_path), + ("{{ pipeline_path }}", &pipeline_path), + ("{{ working_directory }}", &working_directory), + ("{{ workspace }}", &working_directory), + ("{{ agent_content }}", markdown_body), + ("{{ acquire_ado_token }}", &acquire_read_token), + ("{{ copilot_ado_env }}", &copilot_ado_env), + ("{{ acquire_write_token }}", &acquire_write_token), + ("{{ executor_ado_env }}", &executor_ado_env), + ]; + + let mut pipeline_yaml = replacements + .into_iter() + .fold(template, |yaml, (placeholder, replacement)| { + replace_with_indent(&yaml, placeholder, replacement) + }); + + // 13. Apply extra replacements (target-specific) + for (placeholder, replacement) in &config.extra_replacements { + pipeline_yaml = replace_with_indent(&pipeline_yaml, placeholder, replacement); + } + + // 14. Prepend header + let header = generate_header_comment(input_path); + Ok(format!("{}{}", header, pipeline_yaml)) +} + #[cfg(test)] mod tests { use super::*; use crate::compile::types::{McpConfig, McpOptions, Repository}; + use crate::compile::extensions::{CompileContext, collect_extensions}; + use std::collections::HashMap; /// Helper: create a minimal FrontMatter by parsing YAML fn minimal_front_matter() -> FrontMatter { @@ -2570,4 +3315,981 @@ mod tests { assert!(result.contains("project: 'My''Project'")); assert!(result.contains("- 'it''s-branch'")); } + + // ─── generate_prepare_steps ────────────────────────────────────────────── + + #[test] + fn test_generate_prepare_steps_with_memory_includes_memory_preamble() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n cache-memory: true\n---\n", + ).unwrap(); + let exts = crate::compile::extensions::collect_extensions(&fm); + let result = generate_prepare_steps(&[], &exts).unwrap(); + assert!( + !result.is_empty(), + "memory steps must be emitted when cache-memory enabled" + ); + assert!( + result.contains("agent_memory"), + "should reference memory directory" + ); + } + + #[test] + fn test_generate_prepare_steps_without_memory_and_no_steps_is_empty() { + let fm = minimal_front_matter(); + let exts = crate::compile::extensions::collect_extensions(&fm); + let result = generate_prepare_steps(&[], &exts).unwrap(); + assert!(result.is_empty(), "no steps and no memory should produce empty output"); + } + + #[test] + fn test_generate_prepare_steps_with_memory_includes_download_and_prompt() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n cache-memory: true\n---\n", + ).unwrap(); + let exts = crate::compile::extensions::collect_extensions(&fm); + let result = generate_prepare_steps(&[], &exts).unwrap(); + assert!( + result.contains("DownloadPipelineArtifact"), + "memory steps must include the artifact download task" + ); + assert!( + result.contains("Agent Memory"), + "memory steps must include the memory prompt" + ); + } + + #[test] + fn test_generate_prepare_steps_without_memory_with_user_steps() { + let fm = minimal_front_matter(); + let exts = crate::compile::extensions::collect_extensions(&fm); + let step: serde_yaml::Value = + serde_yaml::from_str("bash: echo hello\ndisplayName: greet").unwrap(); + let result = generate_prepare_steps(&[step], &exts).unwrap(); + assert!(!result.is_empty(), "user steps should be present"); + assert!( + !result.contains("agent_memory"), + "no memory reference when cache-memory not enabled" + ); + } + + #[test] + fn test_generate_prepare_steps_with_memory_and_user_steps() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n cache-memory: true\n---\n", + ).unwrap(); + let exts = crate::compile::extensions::collect_extensions(&fm); + let step: serde_yaml::Value = + serde_yaml::from_str("bash: echo hello\ndisplayName: greet").unwrap(); + let result = generate_prepare_steps(&[step], &exts).unwrap(); + assert!( + result.contains("agent_memory"), + "memory reference must be present" + ); + assert!( + result.contains("echo hello"), + "user step must also be present" + ); + } + + #[test] + fn test_generate_prepare_steps_with_lean() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\nruntimes:\n lean: true\n---\n", + ).unwrap(); + let exts = crate::compile::extensions::collect_extensions(&fm); + let result = generate_prepare_steps(&[], &exts).unwrap(); + assert!(result.contains("elan-init.sh"), "should include elan installer"); + assert!(result.contains("Lean 4"), "should include Lean prompt"); + assert!(result.contains("--default-toolchain stable"), "should default to stable"); + assert!(result.contains("/tmp/awf-tools/"), "should symlink into awf-tools for AWF chroot"); + } + + #[test] + fn test_generate_prepare_steps_with_lean_custom_toolchain() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\nruntimes:\n lean:\n toolchain: \"leanprover/lean4:v4.29.1\"\n---\n", + ).unwrap(); + let exts = crate::compile::extensions::collect_extensions(&fm); + let result = generate_prepare_steps(&[], &exts).unwrap(); + assert!( + result.contains("--default-toolchain leanprover/lean4:v4.29.1"), + "should use specified toolchain" + ); + } + + #[test] + fn test_generate_prepare_steps_with_lean_and_memory() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\nruntimes:\n lean: true\ntools:\n cache-memory: true\n---\n", + ).unwrap(); + let exts = crate::compile::extensions::collect_extensions(&fm); + let result = generate_prepare_steps(&[], &exts).unwrap(); + assert!(result.contains("agent_memory"), "memory steps present"); + assert!(result.contains("elan-init.sh"), "lean install present"); + assert!(result.contains("Lean 4"), "lean prompt present"); + } + + // ═══════════════════════════════════════════════════════════════════════ + // Tests moved from standalone.rs — MCPG config, docker env, validation + // ═══════════════════════════════════════════════════════════════════════ + + #[test] + fn test_generate_firewall_config_custom_mcp() { + let mut fm = minimal_front_matter(); + fm.mcp_servers.insert( + "my-tool".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("node:20-slim".to_string()), + entrypoint: Some("node".to_string()), + entrypoint_args: vec!["server.js".to_string()], + allowed: vec!["do_thing".to_string()], + ..Default::default() + }), + ); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let server = config.mcp_servers.get("my-tool").unwrap(); + assert_eq!(server.server_type, "stdio"); + assert_eq!(server.container.as_ref().unwrap(), "node:20-slim"); + assert_eq!(server.entrypoint.as_ref().unwrap(), "node"); + assert_eq!( + server.entrypoint_args.as_ref().unwrap(), + &vec!["server.js"] + ); + assert_eq!( + server.tools.as_ref().unwrap(), + &vec!["do_thing".to_string()] + ); + } + + #[test] + fn test_generate_mcpg_config_mcp_without_transport_skipped() { + let mut fm = minimal_front_matter(); + // An MCP with no container or url should be skipped + fm.mcp_servers + .insert("phantom".to_string(), McpConfig::Enabled(true)); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + assert!(!config.mcp_servers.contains_key("phantom")); + // safeoutputs is always present + assert!(config.mcp_servers.contains_key("safeoutputs")); + } + + #[test] + fn test_generate_mcpg_config_disabled_mcp_skipped() { + let mut fm = minimal_front_matter(); + fm.mcp_servers + .insert("my-tool".to_string(), McpConfig::Enabled(false)); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + assert!(!config.mcp_servers.contains_key("my-tool")); + } + + #[test] + fn test_generate_mcpg_config_empty_mcp_servers() { + let fm = minimal_front_matter(); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + // Only safeoutputs should be present + assert_eq!(config.mcp_servers.len(), 1); + assert!(config.mcp_servers.contains_key("safeoutputs")); + } + + #[test] + fn test_generate_mcpg_config_gateway_defaults() { + let fm = minimal_front_matter(); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + assert_eq!(config.gateway.port, 80); + assert_eq!(config.gateway.domain, "host.docker.internal"); + assert_eq!(config.gateway.api_key, "${MCP_GATEWAY_API_KEY}"); + assert_eq!(config.gateway.payload_dir, "/tmp/gh-aw/mcp-payloads"); + } + + #[test] + fn test_generate_mcpg_config_json_roundtrip() { + let mut fm = minimal_front_matter(); + fm.mcp_servers.insert( + "my-tool".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("python:3.12-slim".to_string()), + entrypoint: Some("python".to_string()), + entrypoint_args: vec!["-m".to_string(), "server".to_string()], + allowed: vec!["query".to_string()], + ..Default::default() + }), + ); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let json = serde_json::to_string_pretty(&config).expect("Config should serialize to JSON"); + let parsed: serde_json::Value = + serde_json::from_str(&json).expect("Serialized JSON should parse back"); + + // Verify top-level structure matches MCPG expectation + assert!( + parsed.get("mcpServers").is_some(), + "Should have mcpServers key" + ); + assert!(parsed.get("gateway").is_some(), "Should have gateway key"); + + let gw = parsed.get("gateway").unwrap(); + assert!(gw.get("port").is_some(), "Gateway should have port"); + assert!(gw.get("domain").is_some(), "Gateway should have domain"); + assert!(gw.get("apiKey").is_some(), "Gateway should have apiKey"); + assert!( + gw.get("payloadDir").is_some(), + "Gateway should have payloadDir" + ); + } + + #[test] + fn test_generate_mcpg_config_safeoutputs_variable_placeholders() { + let fm = minimal_front_matter(); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let so = config.mcp_servers.get("safeoutputs").unwrap(); + + // URL should reference the runtime-substituted port + let url = so.url.as_ref().unwrap(); + assert!( + url.contains("${SAFE_OUTPUTS_PORT}"), + "SafeOutputs URL should use ${{SAFE_OUTPUTS_PORT}} placeholder, got: {url}" + ); + + // Auth header should reference the runtime-substituted API key + let headers = so.headers.as_ref().unwrap(); + let auth = headers.get("Authorization").unwrap(); + assert!( + auth.contains("${SAFE_OUTPUTS_API_KEY}"), + "SafeOutputs auth header should use ${{SAFE_OUTPUTS_API_KEY}} placeholder, got: {auth}" + ); + } + + #[test] + fn test_generate_mcpg_config_safeoutputs_is_http_type() { + let fm = minimal_front_matter(); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let so = config.mcp_servers.get("safeoutputs").unwrap(); + assert_eq!(so.server_type, "http"); + assert!( + so.container.is_none(), + "HTTP backend should have no container" + ); + assert!(so.args.is_none(), "HTTP backend should have no args"); + assert!(so.url.is_some(), "HTTP backend must have a URL"); + } + + #[test] + fn test_generate_mcpg_config_container_mcp_is_stdio_type() { + let mut fm = minimal_front_matter(); + fm.mcp_servers.insert( + "runner".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("node:20-slim".to_string()), + entrypoint: Some("node".to_string()), + entrypoint_args: vec!["srv.js".to_string()], + allowed: vec!["run".to_string()], + ..Default::default() + }), + ); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let srv = config.mcp_servers.get("runner").unwrap(); + assert_eq!(srv.server_type, "stdio"); + assert!( + srv.container.is_some(), + "stdio server must have a container" + ); + assert!(srv.url.is_none(), "stdio server should have no URL"); + } + + #[test] + fn test_generate_mcpg_config_container_with_env() { + let mut fm = minimal_front_matter(); + let mut env = HashMap::new(); + env.insert("TOKEN".to_string(), "secret".to_string()); + fm.mcp_servers.insert( + "with-env".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("node:20-slim".to_string()), + env, + ..Default::default() + }), + ); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let srv = config.mcp_servers.get("with-env").unwrap(); + let e = srv.env.as_ref().unwrap(); + assert_eq!(e.get("TOKEN").unwrap(), "secret"); + } + + #[test] + fn test_generate_mcpg_config_reserved_safeoutputs_name_rejected() { + let mut fm = minimal_front_matter(); + fm.mcp_servers.insert( + "safeoutputs".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("evil:latest".to_string()), + ..Default::default() + }), + ); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + // The reserved entry should still be the HTTP backend, not the user's container + let so = config.mcp_servers.get("safeoutputs").unwrap(); + assert_eq!( + so.server_type, "http", + "safeoutputs should remain HTTP backend" + ); + assert!( + so.container.is_none(), + "User container should not overwrite safeoutputs" + ); + } + + #[test] + fn test_generate_mcpg_config_safeoutputs_reserved_name_skipped() { + let mut fm = minimal_front_matter(); + fm.mcp_servers.insert( + "SafeOutputs".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("node:20-slim".to_string()), + entrypoint: Some("node".to_string()), + entrypoint_args: vec!["evil.js".to_string()], + allowed: vec!["hijack".to_string()], + ..Default::default() + }), + ); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + // The user-defined "SafeOutputs" must not overwrite the built-in entry + let so = config.mcp_servers.get("safeoutputs").unwrap(); + assert_eq!(so.server_type, "http"); + assert!(so.url.as_ref().unwrap().contains("localhost")); + // No stdio entry should have been added under any casing + assert_eq!(config.mcp_servers.len(), 1); + } + + #[test] + fn test_generate_mcpg_config_http_mcp() { + let mut fm = minimal_front_matter(); + fm.mcp_servers.insert( + "remote".to_string(), + McpConfig::WithOptions(McpOptions { + url: Some("https://mcp.example.com/api".to_string()), + headers: { + let mut h = HashMap::new(); + h.insert("X-Custom".to_string(), "value".to_string()); + h + }, + allowed: vec!["query".to_string()], + ..Default::default() + }), + ); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let srv = config.mcp_servers.get("remote").unwrap(); + assert_eq!(srv.server_type, "http"); + assert_eq!( + srv.url.as_ref().unwrap(), + "https://mcp.example.com/api" + ); + assert_eq!( + srv.headers.as_ref().unwrap().get("X-Custom").unwrap(), + "value" + ); + assert!(srv.container.is_none(), "HTTP server should have no container"); + } + + #[test] + fn test_generate_mcpg_config_container_with_entrypoint() { + let mut fm = minimal_front_matter(); + fm.mcp_servers.insert( + "ado".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("node:20-slim".to_string()), + entrypoint: Some("npx".to_string()), + entrypoint_args: vec!["-y".to_string(), "@azure-devops/mcp".to_string()], + ..Default::default() + }), + ); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let srv = config.mcp_servers.get("ado").unwrap(); + assert_eq!(srv.server_type, "stdio"); + assert_eq!(srv.container.as_ref().unwrap(), "node:20-slim"); + assert_eq!(srv.entrypoint.as_ref().unwrap(), "npx"); + assert_eq!( + srv.entrypoint_args.as_ref().unwrap(), + &vec!["-y", "@azure-devops/mcp"] + ); + } + + #[test] + fn test_generate_mcpg_config_container_with_mounts() { + let mut fm = minimal_front_matter(); + fm.mcp_servers.insert( + "data-tool".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("data-tool:latest".to_string()), + mounts: vec!["/host/data:/app/data:ro".to_string()], + ..Default::default() + }), + ); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let srv = config.mcp_servers.get("data-tool").unwrap(); + assert_eq!( + srv.mounts.as_ref().unwrap(), + &vec!["/host/data:/app/data:ro"] + ); + } + + #[test] + fn test_generate_mcpg_config_no_transport_skipped() { + let mut fm = minimal_front_matter(); + // MCP with options but no container or url should be skipped + fm.mcp_servers.insert( + "no-transport".to_string(), + McpConfig::WithOptions(McpOptions { + allowed: vec!["tool".to_string()], + ..Default::default() + }), + ); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + assert!(!config.mcp_servers.contains_key("no-transport")); + } + + #[test] + fn test_generate_mcpg_docker_env_with_permissions_read() { + let mut fm = minimal_front_matter(); + fm.permissions = Some(crate::compile::types::PermissionsConfig { + read: Some("my-read-sc".to_string()), + write: None, + }); + // A container MCP must request AZURE_DEVOPS_EXT_PAT for the auto-map to trigger + fm.mcp_servers.insert( + "ado-tool".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("node:20-slim".to_string()), + env: { + let mut e = HashMap::new(); + e.insert("AZURE_DEVOPS_EXT_PAT".to_string(), "".to_string()); + e + }, + ..Default::default() + }), + ); + let env = generate_mcpg_docker_env(&fm); + assert!( + env.contains("-e AZURE_DEVOPS_EXT_PAT=\"$(SC_READ_TOKEN)\""), + "Should auto-map ADO token when permissions.read is set and MCP requests it" + ); + } + + #[test] + fn test_generate_mcpg_docker_env_permissions_read_no_mcp_request() { + let mut fm = minimal_front_matter(); + fm.permissions = Some(crate::compile::types::PermissionsConfig { + read: Some("my-read-sc".to_string()), + write: None, + }); + // No MCP requests AZURE_DEVOPS_EXT_PAT — auto-map should NOT trigger + fm.mcp_servers.insert( + "unrelated-tool".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("node:20-slim".to_string()), + ..Default::default() + }), + ); + let env = generate_mcpg_docker_env(&fm); + assert!( + !env.contains("AZURE_DEVOPS_EXT_PAT"), + "Should NOT auto-map ADO token when no MCP requests it" + ); + } + + #[test] + fn test_generate_mcpg_docker_env_dedup_auto_map_and_passthrough() { + // When permissions.read is set AND MCP has AZURE_DEVOPS_EXT_PAT: "", + // the auto-mapped form (with SC_READ_TOKEN) should win — no duplicate + let mut fm = minimal_front_matter(); + fm.permissions = Some(crate::compile::types::PermissionsConfig { + read: Some("my-read-sc".to_string()), + write: None, + }); + fm.mcp_servers.insert( + "ado-tool".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("node:20-slim".to_string()), + env: { + let mut e = HashMap::new(); + e.insert("AZURE_DEVOPS_EXT_PAT".to_string(), "".to_string()); + e + }, + ..Default::default() + }), + ); + let env = generate_mcpg_docker_env(&fm); + // Should have the SC_READ_TOKEN form (auto-mapped), not bare passthrough + assert!( + env.contains("-e AZURE_DEVOPS_EXT_PAT=\"$(SC_READ_TOKEN)\""), + "Auto-mapped form should be present" + ); + // Should appear exactly once + let count = env.matches("AZURE_DEVOPS_EXT_PAT").count(); + assert_eq!(count, 1, "AZURE_DEVOPS_EXT_PAT should appear exactly once, got {}", count); + } + + #[test] + fn test_generate_mcpg_docker_env_without_permissions() { + let fm = minimal_front_matter(); + let env = generate_mcpg_docker_env(&fm); + assert!( + !env.contains("AZURE_DEVOPS_EXT_PAT"), + "Should not map ADO token when permissions.read is not set" + ); + } + + #[test] + fn test_generate_mcpg_docker_env_passthrough_vars() { + let mut fm = minimal_front_matter(); + fm.mcp_servers.insert( + "tool".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("img:latest".to_string()), + env: { + let mut e = HashMap::new(); + e.insert("PASS_THROUGH".to_string(), "".to_string()); + e.insert("STATIC".to_string(), "value".to_string()); + e + }, + ..Default::default() + }), + ); + let env = generate_mcpg_docker_env(&fm); + assert!(env.contains("-e PASS_THROUGH"), "Should include passthrough var"); + assert!(!env.contains("-e STATIC"), "Should NOT include static var"); + } + + #[test] + fn test_generate_mcpg_docker_env_rejects_invalid_names() { + let mut fm = minimal_front_matter(); + fm.mcp_servers.insert( + "evil".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("img:latest".to_string()), + env: { + let mut e = HashMap::new(); + // Injection attempt: env var name with Docker flag + e.insert("MY_VAR --privileged".to_string(), "".to_string()); + // Valid env var for comparison + e.insert("GOOD_VAR".to_string(), "".to_string()); + e + }, + ..Default::default() + }), + ); + let env = generate_mcpg_docker_env(&fm); + assert!( + !env.contains("--privileged"), + "Should reject invalid env var name with Docker flag injection" + ); + assert!( + env.contains("-e GOOD_VAR"), + "Should include valid env var" + ); + } + + #[test] + fn test_is_valid_env_var_name() { + assert!(is_valid_env_var_name("MY_VAR")); + assert!(is_valid_env_var_name("_PRIVATE")); + assert!(is_valid_env_var_name("A")); + assert!(is_valid_env_var_name("VAR123")); + assert!(!is_valid_env_var_name("")); + assert!(!is_valid_env_var_name("123ABC")); + assert!(!is_valid_env_var_name("MY-VAR")); + assert!(!is_valid_env_var_name("MY VAR")); + assert!(!is_valid_env_var_name("X --privileged")); + assert!(!is_valid_env_var_name("X -v /etc:/etc:rw")); + } + + // ─── tools.azure-devops MCPG integration ──────────────────────────────── + + #[test] + fn test_ado_tool_generates_mcpg_entry() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops: true\n---\n", + ) + .unwrap(); + // Pass inferred org since no explicit org is set + let config = generate_mcpg_config(&fm, &CompileContext::for_test_with_org(&fm, "inferred-org"), &collect_extensions(&fm)).unwrap(); + let ado = config.mcp_servers.get("azure-devops").unwrap(); + assert_eq!(ado.server_type, "stdio"); + assert_eq!(ado.container.as_deref(), Some(ADO_MCP_IMAGE)); + assert_eq!(ado.entrypoint.as_deref(), Some(ADO_MCP_ENTRYPOINT)); + let args = ado.entrypoint_args.as_ref().unwrap(); + assert!(args.contains(&"-y".to_string())); + assert!(args.contains(&ADO_MCP_PACKAGE.to_string())); + assert!(args.contains(&"inferred-org".to_string())); + // Should have AZURE_DEVOPS_EXT_PAT in env + let env = ado.env.as_ref().unwrap(); + assert!(env.contains_key("AZURE_DEVOPS_EXT_PAT")); + } + + #[test] + fn test_ado_tool_with_toolsets() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops:\n toolsets: [repos, wit, core]\n---\n", + ) + .unwrap(); + let config = generate_mcpg_config(&fm, &CompileContext::for_test_with_org(&fm, "myorg"), &collect_extensions(&fm)).unwrap(); + let ado = config.mcp_servers.get("azure-devops").unwrap(); + let args = ado.entrypoint_args.as_ref().unwrap(); + assert!(args.contains(&"-d".to_string())); + assert!(args.contains(&"repos".to_string())); + assert!(args.contains(&"wit".to_string())); + assert!(args.contains(&"core".to_string())); + } + + #[test] + fn test_ado_tool_with_org_override() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: myorg\n---\n", + ) + .unwrap(); + // Explicit org should be used even when inferred_org is None + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let ado = config.mcp_servers.get("azure-devops").unwrap(); + let args = ado.entrypoint_args.as_ref().unwrap(); + assert!(args.contains(&"myorg".to_string())); + } + + #[test] + fn test_ado_tool_explicit_org_overrides_inferred() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: explicit-org\n---\n", + ) + .unwrap(); + let config = generate_mcpg_config(&fm, &CompileContext::for_test_with_org(&fm, "inferred-org"), &collect_extensions(&fm)).unwrap(); + let ado = config.mcp_servers.get("azure-devops").unwrap(); + let args = ado.entrypoint_args.as_ref().unwrap(); + assert!(args.contains(&"explicit-org".to_string())); + assert!(!args.contains(&"inferred-org".to_string())); + } + + #[test] + fn test_ado_tool_no_org_fails() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops: true\n---\n", + ) + .unwrap(); + // No explicit org and no inferred org — should fail + let result = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)); + assert!(result.is_err()); + assert!( + result.unwrap_err().to_string().contains("no ADO organization"), + "Error should mention missing org" + ); + } + + #[test] + fn test_ado_tool_invalid_org_fails() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: \"my org/bad\"\n---\n", + ) + .unwrap(); + let result = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)); + assert!(result.is_err()); + assert!( + result.unwrap_err().to_string().contains("Invalid ADO org name"), + "Error should mention invalid org" + ); + } + + #[test] + fn test_ado_tool_invalid_toolset_fails() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: myorg\n toolsets: [\"repos\", \"bad toolset\"]\n---\n", + ) + .unwrap(); + let result = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)); + assert!(result.is_err()); + assert!( + result.unwrap_err().to_string().contains("Invalid ADO toolset name"), + "Error should mention invalid toolset" + ); + } + + #[test] + fn test_ado_tool_with_allowed_tools() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: myorg\n allowed:\n - wit_get_work_item\n - core_list_projects\n---\n", + ) + .unwrap(); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let ado = config.mcp_servers.get("azure-devops").unwrap(); + let tools = ado.tools.as_ref().unwrap(); + assert_eq!(tools, &["wit_get_work_item", "core_list_projects"]); + } + + #[test] + fn test_ado_tool_disabled_not_generated() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops: false\n---\n", + ) + .unwrap(); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + assert!(!config.mcp_servers.contains_key("azure-devops")); + } + + #[test] + fn test_ado_tool_not_set_not_generated() { + let fm = minimal_front_matter(); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + assert!(!config.mcp_servers.contains_key("azure-devops")); + } + + #[test] + fn test_ado_tool_skips_manual_mcp_entry() { + // When tools.azure-devops is enabled AND mcp-servers also has azure-devops, + // the tools config takes precedence and the manual entry is skipped. + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: auto-org\nmcp-servers:\n azure-devops:\n container: \"node:20-slim\"\n entrypoint: \"npx\"\n entrypoint-args: [\"-y\", \"@azure-devops/mcp\", \"manual-org\"]\n---\n", + ) + .unwrap(); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let ado = config.mcp_servers.get("azure-devops").unwrap(); + // Should use the auto-configured org, not the manual one + let args = ado.entrypoint_args.as_ref().unwrap(); + assert!(args.contains(&"auto-org".to_string())); + assert!(!args.contains(&"manual-org".to_string())); + } + + #[test] + fn test_ado_tool_docker_env_passthrough() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops: true\npermissions:\n read: my-read-sc\n---\n", + ) + .unwrap(); + let env = generate_mcpg_docker_env(&fm); + assert!( + env.contains("AZURE_DEVOPS_EXT_PAT"), + "Should include ADO token passthrough when permissions.read is set" + ); + } + + // ─── validate_docker_args ──────────────────────────────────────────────── + + #[test] + fn test_validate_docker_args_privileged_flag() { + let warnings = validate_docker_args(&["--privileged".to_string()], "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("--privileged"), "should warn about --privileged"); + } + + #[test] + fn test_validate_docker_args_entrypoint_in_args_warns() { + let warnings = validate_docker_args( + &[ + "--entrypoint".to_string(), + "/bin/sh".to_string(), + ], + "my-mcp", + ); + assert!(warnings.iter().any(|w| w.contains("--entrypoint") && w.contains("entrypoint:")), + "should warn about --entrypoint with hint to use entrypoint: field"); + } + + #[test] + fn test_validate_docker_args_volume_flag_calls_mount_validation() { + // -v docker.sock in args bypasses `mounts:` validation; should produce warnings + let warnings = validate_docker_args( + &[ + "-v".to_string(), + "/var/run/docker.sock:/var/run/docker.sock".to_string(), + ], + "my-mcp", + ); + assert!(warnings.iter().any(|w| w.contains("bypasses mounts validation")), + "should warn about volume mount in args"); + assert!(warnings.iter().any(|w| w.contains("Docker socket")), + "should propagate mount source warning for docker.sock"); + } + + #[test] + fn test_validate_docker_args_volume_equals_form() { + // --volume=source:dest form should also be detected + let warnings = validate_docker_args( + &["--volume=/var/run/docker.sock:/var/run/docker.sock".to_string()], + "my-mcp", + ); + assert!(warnings.iter().any(|w| w.contains("bypasses mounts validation")), + "should warn about --volume= form"); + } + + #[test] + fn test_validate_docker_args_safe_args_no_warnings() { + // A legitimate arg like --read-only should produce no warnings + let warnings = validate_docker_args(&["--read-only".to_string()], "my-mcp"); + assert!(warnings.is_empty(), "safe args should not produce warnings"); + } + + #[test] + fn test_validate_docker_args_empty_no_warnings() { + let warnings = validate_docker_args(&[], "my-mcp"); + assert!(warnings.is_empty(), "empty args should not produce warnings"); + } + + #[test] + fn test_validate_docker_args_volume_flag_trailing_warns() { + // -v as the last arg with no mount spec is malformed + let warnings = validate_docker_args(&["-v".to_string()], "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("malformed"), "trailing -v with no mount spec should warn"); + } + + #[test] + fn test_validate_docker_args_long_volume_flag_trailing_warns() { + // --volume as the last arg with no mount spec is malformed + let warnings = validate_docker_args(&["--volume".to_string()], "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("malformed"), "trailing --volume with no mount spec should warn"); + } + + // ─── validate_mcp_url ──────────────────────────────────────────────────── + + #[test] + fn test_validate_mcp_url_https_no_warnings() { + let warnings = validate_mcp_url("https://mcp.dev.azure.com/myorg", "my-mcp"); + assert!(warnings.is_empty(), "https URL should not produce warnings"); + } + + #[test] + fn test_validate_mcp_url_http_no_warnings() { + let warnings = validate_mcp_url("http://localhost:8100/mcp", "my-mcp"); + assert!(warnings.is_empty(), "http URL should not produce warnings"); + } + + #[test] + fn test_validate_mcp_url_bad_scheme_warns() { + let warnings = validate_mcp_url("ftp://files.example.com", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("does not use http://"), "non-HTTP scheme should warn"); + } + + #[test] + fn test_validate_mcp_url_no_scheme_warns() { + let warnings = validate_mcp_url("mcp.dev.azure.com/myorg", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("does not use http://"), "URL without scheme should warn"); + } + + // ─── validate_mount_source ─────────────────────────────────────────────── + + #[test] + fn test_validate_mount_source_docker_sock() { + let warnings = validate_mount_source("/var/run/docker.sock:/var/run/docker.sock:rw", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("Docker socket"), "should warn about Docker socket exposure"); + } + + #[test] + fn test_validate_mount_source_sensitive_path_etc() { + let warnings = validate_mount_source("/etc/passwd:/data/passwd:ro", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("sensitive host path"), "should warn about /etc mount"); + } + + #[test] + fn test_validate_mount_source_sensitive_path_proc() { + let warnings = validate_mount_source("/proc:/host/proc:ro", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("sensitive host path"), "should warn about /proc mount"); + } + + #[test] + fn test_validate_mount_source_case_insensitive() { + // /ETC/shadow should match sensitive /etc prefix (lowercased comparison) + let warnings = validate_mount_source("/ETC/shadow:/data/shadow:ro", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("sensitive host path"), "case-insensitive match should trigger warning"); + } + + #[test] + fn test_validate_mount_source_no_false_positive_on_etc_configs() { + // /etc-configs should NOT match the /etc prefix (path boundary check requires trailing /) + let warnings = validate_mount_source("/etc-configs:/app/config:ro", "my-mcp"); + assert!(warnings.is_empty(), "/etc-configs must not match /etc prefix due to path boundary check"); + } + + #[test] + fn test_validate_mount_source_safe_path_no_warnings() { + // /app/data is not a sensitive path; should produce no warnings + let warnings = validate_mount_source("/app/data:/app/data:ro", "my-mcp"); + assert!(warnings.is_empty(), "safe path should not produce warnings"); + } + + // ─── validate_container_image ──────────────────────────────────────────── + + #[test] + fn test_validate_container_image_empty_string() { + let warnings = validate_container_image("", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("empty"), "should warn about empty image name"); + } + + #[test] + fn test_validate_container_image_shell_metacharacters() { + let warnings = validate_container_image("node:20-slim; rm -rf /", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("unexpected characters"), "should warn about shell metacharacters"); + } + + #[test] + fn test_validate_container_image_valid_name_no_warnings() { + // Standard image references should produce no warnings + assert!(validate_container_image("node:20-slim", "my-mcp").is_empty()); + assert!(validate_container_image("ghcr.io/org/image:latest", "my-mcp").is_empty()); + assert!(validate_container_image("python:3.12-slim", "my-mcp").is_empty()); + } + + // ─── warn_potential_secrets ────────────────────────────────────────────── + + #[test] + fn test_warn_potential_secrets_token_env_var_triggers() { + let env = HashMap::from([("API_TOKEN".to_string(), "secret123".to_string())]); + let headers = HashMap::new(); + let warnings = warn_potential_secrets("my-mcp", &env, &headers); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("API_TOKEN"), "should warn about secret-looking env var"); + } + + #[test] + fn test_warn_potential_secrets_empty_passthrough_no_warnings() { + // Empty string = passthrough; should NOT trigger a warning + let env = HashMap::from([("API_TOKEN".to_string(), "".to_string())]); + let headers = HashMap::new(); + let warnings = warn_potential_secrets("my-mcp", &env, &headers); + assert!(warnings.is_empty(), "empty passthrough value must not trigger a warning"); + } + + #[test] + fn test_warn_potential_secrets_authorization_header_triggers() { + let env = HashMap::new(); + let headers = + HashMap::from([("Authorization".to_string(), "Bearer abc".to_string())]); + let warnings = warn_potential_secrets("my-mcp", &env, &headers); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("Authorization"), "should warn about Authorization header"); + } + + #[test] + fn test_warn_potential_secrets_bearer_value_triggers() { + // A header whose value starts with "Bearer " should also warn + let env = HashMap::new(); + let headers = + HashMap::from([("X-Custom-Auth".to_string(), "Bearer token123".to_string())]); + let warnings = warn_potential_secrets("my-mcp", &env, &headers); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("X-Custom-Auth"), "should warn about header with Bearer value"); + } + + #[test] + fn test_warn_potential_secrets_safe_env_no_warnings() { + // Env keys with non-secret names and non-empty values should produce no warnings + let env = HashMap::from([("MY_CONFIG".to_string(), "value".to_string())]); + let headers = HashMap::new(); + let warnings = warn_potential_secrets("my-mcp", &env, &headers); + assert!(warnings.is_empty(), "non-secret env var should not produce warnings"); + } } diff --git a/src/compile/standalone.rs b/src/compile/standalone.rs index cafe7e5b..d146b9d1 100644 --- a/src/compile/standalone.rs +++ b/src/compile/standalone.rs @@ -9,24 +9,17 @@ use anyhow::{Context, Result}; use async_trait::async_trait; use log::info; -use std::collections::HashMap; use std::path::Path; use super::Compiler; use super::common::{ - self, AWF_VERSION, COPILOT_CLI_VERSION, DEFAULT_POOL, MCPG_PORT, MCPG_VERSION, MCPG_IMAGE, - build_parameters, compute_effective_workspace, generate_acquire_ado_token, - generate_cancel_previous_builds, generate_checkout_self, generate_checkout_steps, - generate_ci_trigger, generate_copilot_ado_env, generate_copilot_params, - generate_enabled_tools_args, generate_executor_ado_env, generate_header_comment, - generate_job_timeout, generate_parameters, generate_pipeline_path, generate_pipeline_resources, - generate_pr_trigger, generate_repositories, generate_schedule, generate_source_path, - generate_working_directory, replace_with_indent, sanitize_filename, validate_comment_target, - validate_front_matter_identity, validate_resolve_pr_thread_statuses, - validate_submit_pr_review_events, validate_update_pr_votes, validate_update_work_item_target, - validate_write_permissions, + AWF_VERSION, MCPG_VERSION, MCPG_IMAGE, + CompileConfig, compile_shared, + generate_cancel_previous_builds, + generate_enabled_tools_args, + generate_mcpg_config, generate_mcpg_docker_env, }; -use super::extensions::{CompilerExtension, McpgServerConfig, McpgGatewayConfig, McpgConfig}; +use super::extensions::CompilerExtension; use super::types::{FrontMatter, McpConfig}; use crate::allowed_hosts::{CORE_ALLOWED_HOSTS, mcp_required_hosts}; use crate::ecosystem_domains::{get_ecosystem_domains, is_ecosystem_identifier, is_known_ecosystem}; @@ -50,207 +43,38 @@ impl Compiler for StandaloneCompiler { ) -> Result { info!("Compiling for standalone target"); - // Validate inputs early, before any values are used in template substitution - validate_front_matter_identity(front_matter)?; - - // Load base template - let template = include_str!("../../templates/base.yml"); - - // Generate schedule - let schedule = match &front_matter.schedule { - Some(s) => generate_schedule(&front_matter.name, s) - .with_context(|| format!("Failed to parse schedule '{}'", s.expression()))?, - None => String::new(), - }; - - let repositories = generate_repositories(&front_matter.repositories); - let checkout_steps = generate_checkout_steps(&front_matter.checkout); - let checkout_self = generate_checkout_self(); - let agent_name = sanitize_filename(&front_matter.name); - - // Collect compiler extensions (runtimes + first-party tools) + // Collect extensions (needed before compile_shared for MCPG config) let extensions = super::extensions::collect_extensions(front_matter); - // Build compile context with inferred metadata (ADO org from git remote, etc.) + // Build compile context for MCPG config generation let input_dir = input_path.parent().unwrap_or(std::path::Path::new(".")); let ctx = super::extensions::CompileContext::new(front_matter, input_dir).await; - // Run extension validations (warnings + errors) - for ext in &extensions { - for warning in ext.validate(&ctx)? { - eprintln!("Warning: {}", warning); - } - } - - let copilot_params = generate_copilot_params(front_matter, &extensions)?; - - // Compute effective workspace - let effective_workspace = compute_effective_workspace( - &front_matter.workspace, - &front_matter.checkout, - &front_matter.name, - ); - let working_directory = generate_working_directory(&effective_workspace); - let pipeline_resources = generate_pipeline_resources(&front_matter.triggers)?; - let has_schedule = front_matter.schedule.is_some(); - let pr_trigger = generate_pr_trigger(&front_matter.triggers, has_schedule); - let ci_trigger = generate_ci_trigger(&front_matter.triggers, has_schedule); - let cancel_previous_builds = generate_cancel_previous_builds(&front_matter.triggers); - - // Generate source path for Stage 2 - let source_path = generate_source_path(input_path); - - // Generate pipeline path for integrity checking - let pipeline_path = generate_pipeline_path(output_path); - - // Generate comma-separated domain list for AWF + // Standalone-specific values let allowed_domains = generate_allowed_domains(front_matter, &extensions)?; - - // Generate --enabled-tools args for SafeOutputs tool filtering let enabled_tools_args = generate_enabled_tools_args(front_matter); + let cancel_previous_builds = generate_cancel_previous_builds(&front_matter.triggers); - // Pool name - let pool = front_matter - .pool - .as_ref() - .map(|p| p.name().to_string()) - .unwrap_or_else(|| DEFAULT_POOL.to_string()); - - // Generate hooks - let setup_job = generate_setup_job(&front_matter.setup, &front_matter.name, &pool); - let teardown_job = generate_teardown_job(&front_matter.teardown, &front_matter.name, &pool); - let has_memory = front_matter - .tools - .as_ref() - .and_then(|t| t.cache_memory.as_ref()) - .is_some_and(|cm| cm.is_enabled()); - - // Build parameters list: user-defined + auto-injected clearMemory for memory - let parameters = build_parameters(&front_matter.parameters, has_memory); - let parameters_yaml = generate_parameters(¶meters)?; - - let prepare_steps = generate_prepare_steps(&front_matter.steps, &extensions)?; - let finalize_steps = generate_finalize_steps(&front_matter.post_steps); - let agentic_depends_on = generate_agentic_depends_on(&front_matter.setup); - let job_timeout = generate_job_timeout(front_matter); - - // Generate service connection token acquisition steps and env vars - let acquire_read_token = generate_acquire_ado_token( - front_matter - .permissions - .as_ref() - .and_then(|p| p.read.as_deref()), - "SC_READ_TOKEN", - ); - let copilot_ado_env = generate_copilot_ado_env( - front_matter - .permissions - .as_ref() - .and_then(|p| p.read.as_deref()), - ); - let acquire_write_token = generate_acquire_ado_token( - front_matter - .permissions - .as_ref() - .and_then(|p| p.write.as_deref()), - "SC_WRITE_TOKEN", - ); - let executor_ado_env = generate_executor_ado_env( - front_matter - .permissions - .as_ref() - .and_then(|p| p.write.as_deref()), - ); - - // Validate that write-requiring safe-outputs have a write service connection - validate_write_permissions(front_matter)?; - // Validate comment-on-work-item has required target field - validate_comment_target(front_matter)?; - // Validate update-work-item has required target field - validate_update_work_item_target(front_matter)?; - // Validate submit-pr-review has required allowed-events field - validate_submit_pr_review_events(front_matter)?; - // Validate update-pr vote operation has required allowed-votes field - validate_update_pr_votes(front_matter)?; - // Validate resolve-pr-review-thread has required allowed-statuses field - validate_resolve_pr_thread_statuses(front_matter)?; - - // Load threat analysis prompt template - let threat_analysis_prompt = include_str!("../../templates/threat-analysis.md"); - - // Insert threat analysis prompt first - let template = replace_with_indent( - template, - "{{ threat_analysis_prompt }}", - threat_analysis_prompt, - ); - - // Replace template markers - let compiler_version = env!("CARGO_PKG_VERSION"); - let replacements: Vec<(&str, &str)> = vec![ - ("{{ parameters }}", ¶meters_yaml), - ("{{ compiler_version }}", compiler_version), - ("{{ firewall_version }}", AWF_VERSION), - ("{{ mcpg_version }}", MCPG_VERSION), - ("{{ mcpg_image }}", MCPG_IMAGE), - ("{{ copilot_version }}", COPILOT_CLI_VERSION), - ("{{ pool }}", &pool), - ("{{ setup_job }}", &setup_job), - ("{{ teardown_job }}", &teardown_job), - ("{{ prepare_steps }}", &prepare_steps), - ("{{ finalize_steps }}", &finalize_steps), - ("{{ agentic_depends_on }}", &agentic_depends_on), - ("{{ job_timeout }}", &job_timeout), - ("{{ repositories }}", &repositories), - ("{{ schedule }}", &schedule), - ("{{ pipeline_resources }}", &pipeline_resources), - ("{{ pr_trigger }}", &pr_trigger), - ("{{ ci_trigger }}", &ci_trigger), - ("{{ checkout_self }}", &checkout_self), - ("{{ checkout_repositories }}", &checkout_steps), - ("{{ cancel_previous_builds }}", &cancel_previous_builds), - ("{{ agent }}", &agent_name), - ("{{ agent_name }}", &front_matter.name), - ("{{ agent_description }}", &front_matter.description), - ("{{ copilot_params }}", &copilot_params), - ("{{ source_path }}", &source_path), - ("{{ pipeline_path }}", &pipeline_path), - ("{{ working_directory }}", &working_directory), - ("{{ workspace }}", &working_directory), - ("{{ allowed_domains }}", &allowed_domains), - ("{{ enabled_tools_args }}", &enabled_tools_args), - ("{{ agent_content }}", markdown_body), - ("{{ acquire_ado_token }}", &acquire_read_token), - ("{{ copilot_ado_env }}", &copilot_ado_env), - ("{{ acquire_write_token }}", &acquire_write_token), - ("{{ executor_ado_env }}", &executor_ado_env), - ]; - - let pipeline_yaml = replacements - .into_iter() - .fold(template, |yaml, (placeholder, replacement)| { - replace_with_indent(&yaml, placeholder, replacement) - }); - - // Always generate MCPG config — safeoutputs is always required regardless - // of whether additional mcp-servers are configured in front matter. - let config = generate_mcpg_config(front_matter, &ctx, &extensions)?; + let config_obj = generate_mcpg_config(front_matter, &ctx, &extensions)?; let mcpg_config_json = - serde_json::to_string_pretty(&config).context("Failed to serialize MCPG config")?; - - let pipeline_yaml = - replace_with_indent(&pipeline_yaml, "{{ mcpg_config }}", &mcpg_config_json); - - // Generate additional -e flags for MCPG Docker run (env passthrough for MCP containers) + serde_json::to_string_pretty(&config_obj).context("Failed to serialize MCPG config")?; let mcpg_docker_env = generate_mcpg_docker_env(front_matter); - let pipeline_yaml = - replace_with_indent(&pipeline_yaml, "{{ mcpg_docker_env }}", &mcpg_docker_env); - // Prepend header comment for pipeline detection - let header = generate_header_comment(input_path); - let pipeline_yaml = format!("{}{}", header, pipeline_yaml); + let config = CompileConfig { + template: include_str!("../../templates/base.yml").to_string(), + extra_replacements: vec![ + ("{{ firewall_version }}".into(), AWF_VERSION.into()), + ("{{ mcpg_version }}".into(), MCPG_VERSION.into()), + ("{{ mcpg_image }}".into(), MCPG_IMAGE.into()), + ("{{ allowed_domains }}".into(), allowed_domains), + ("{{ enabled_tools_args }}".into(), enabled_tools_args), + ("{{ cancel_previous_builds }}".into(), cancel_previous_builds), + ("{{ mcpg_config }}".into(), mcpg_config_json), + ("{{ mcpg_docker_env }}".into(), mcpg_docker_env), + ], + }; - Ok(pipeline_yaml) + compile_shared(input_path, output_path, front_matter, markdown_body, &extensions, config).await } } @@ -395,1428 +219,16 @@ fn generate_allowed_domains( Ok(allowlist.join(",")) } -/// Generate the setup job YAML -fn generate_setup_job(setup_steps: &[serde_yaml::Value], agent_name: &str, pool: &str) -> String { - if setup_steps.is_empty() { - return String::new(); - } - - let steps_yaml = common::format_steps_yaml_indented(setup_steps, 4); - - format!( - r#"- job: SetupJob - displayName: "{} - Setup" - pool: - name: {} - steps: - - checkout: self -{} -"#, - agent_name, pool, steps_yaml - ) -} - -/// Generate the teardown job YAML -fn generate_teardown_job( - teardown_steps: &[serde_yaml::Value], - agent_name: &str, - pool: &str, -) -> String { - if teardown_steps.is_empty() { - return String::new(); - } - - let steps_yaml = common::format_steps_yaml(teardown_steps); - - format!( - r#" - job: TeardownJob - displayName: "{} - Teardown" - dependsOn: ProcessSafeOutputs - pool: - name: {} - steps: - - checkout: self -{} -"#, - agent_name, pool, steps_yaml - ) -} - -/// Generate prepare steps (inline), including extension steps and user-defined steps. -fn generate_prepare_steps( - prepare_steps: &[serde_yaml::Value], - extensions: &[super::extensions::Extension], -) -> Result { - let mut parts = Vec::new(); - - // Extension prepare steps and prompt supplements (runtimes + first-party tools) - for ext in extensions { - for step in ext.prepare_steps() { - parts.push(step); - } - if let Some(prompt) = ext.prompt_supplement() { - parts.push(super::extensions::wrap_prompt_append(&prompt, ext.name())?); - } - } - - if !prepare_steps.is_empty() { - parts.push(common::format_steps_yaml_indented(prepare_steps, 0)); - } - - Ok(parts.join("\n\n")) -} - -/// Generate finalize steps (inline) -fn generate_finalize_steps(finalize_steps: &[serde_yaml::Value]) -> String { - if finalize_steps.is_empty() { - return String::new(); - } - - common::format_steps_yaml_indented(finalize_steps, 0) -} - -/// Generate dependsOn clause for setup job -fn generate_agentic_depends_on(setup_steps: &[serde_yaml::Value]) -> String { - if !setup_steps.is_empty() { - "dependsOn: SetupJob".to_string() - } else { - String::new() - } -} - -/// Generate MCPG configuration from front matter. -/// -/// Converts the front matter `mcp-servers` definitions into MCPG-compatible JSON. -/// SafeOutputs is always included as an HTTP backend. Extension-contributed MCPG -/// entries (e.g., azure-devops) are included via the `extensions` parameter. -pub fn generate_mcpg_config( - front_matter: &FrontMatter, - ctx: &super::extensions::CompileContext, - extensions: &[super::extensions::Extension], -) -> Result { - let mut mcp_servers = HashMap::new(); - - // SafeOutputs is always included as an HTTP backend. - // MCPG runs with --network host, so it reaches SafeOutputs via localhost - // (not host.docker.internal, which requires Docker DNS and isn't available - // in host network mode on Linux). - mcp_servers.insert( - "safeoutputs".to_string(), - McpgServerConfig { - server_type: "http".to_string(), - container: None, - entrypoint: None, - entrypoint_args: None, - mounts: None, - args: None, - url: Some("http://localhost:${SAFE_OUTPUTS_PORT}/mcp".to_string()), - headers: Some(HashMap::from([( - "Authorization".to_string(), - "Bearer ${SAFE_OUTPUTS_API_KEY}".to_string(), - )])), - env: None, - tools: None, - }, - ); - - // Add extension-contributed MCPG server entries (e.g., azure-devops) - for ext in extensions { - for (name, config) in ext.mcpg_servers(ctx)? { - mcp_servers.insert(name, config); - } - } - - for (name, config) in &front_matter.mcp_servers { - // Prevent user-defined MCPs from overwriting the reserved safeoutputs backend - if name.eq_ignore_ascii_case("safeoutputs") { - log::warn!( - "MCP name 'safeoutputs' is reserved for the safe outputs HTTP backend — skipping" - ); - continue; - } - - // Skip if already auto-configured by an extension (e.g., tools.azure-devops) - if mcp_servers.contains_key(name) { - continue; - } - - let (is_enabled, options) = match config { - McpConfig::Enabled(enabled) => (*enabled, None), - McpConfig::WithOptions(opts) => (opts.enabled.unwrap_or(true), Some(opts)), - }; - - if !is_enabled { - continue; - } - - if let Some(opts) = options { - if opts.container.is_some() && opts.url.is_some() { - log::warn!( - "MCP '{}': both 'container' and 'url' are set — using 'container' (stdio). \ - Remove 'url' to silence this warning.", - name - ); - } - - if let Some(container) = &opts.container { - // Container-based stdio MCP (MCPG-native, per spec §3.2.1) - for w in validate_container_image(container, name) { eprintln!("{}", w); } - // Validate mount paths for sensitive host directories - for mount in &opts.mounts { - for w in validate_mount_source(mount, name) { eprintln!("{}", w); } - } - // Validate Docker runtime args for privilege escalation - for w in validate_docker_args(&opts.args, name) { eprintln!("{}", w); } - // Warn about potential inline secrets (check headers too in case user set both) - for w in warn_potential_secrets(name, &opts.env, &opts.headers) { eprintln!("{}", w); } - let entrypoint_args = if opts.entrypoint_args.is_empty() { - None - } else { - Some(opts.entrypoint_args.clone()) - }; - let args = if opts.args.is_empty() { - None - } else { - Some(opts.args.clone()) - }; - let mounts = if opts.mounts.is_empty() { - None - } else { - Some(opts.mounts.clone()) - }; - let env = if opts.env.is_empty() { - None - } else { - Some(opts.env.clone()) - }; - let tools = if opts.allowed.is_empty() { - None - } else { - Some(opts.allowed.clone()) - }; - mcp_servers.insert( - name.clone(), - McpgServerConfig { - server_type: "stdio".to_string(), - container: Some(container.clone()), - entrypoint: opts.entrypoint.clone(), - entrypoint_args, - mounts, - args, - url: None, - headers: None, - env, - tools, - }, - ); - } else if let Some(url) = &opts.url { - // HTTP-based MCP (remote server) - for w in validate_mcp_url(url, name) { eprintln!("{}", w); } - // Warn about potential inline secrets in headers - for w in warn_potential_secrets(name, &HashMap::new(), &opts.headers) { eprintln!("{}", w); } - if !opts.env.is_empty() { - eprintln!( - "Warning: MCP '{}': env vars are not supported for HTTP MCPs — they will be ignored. \ - Use headers for authentication instead.", - name - ); - } - let headers = if opts.headers.is_empty() { - None - } else { - Some(opts.headers.clone()) - }; - let tools = if opts.allowed.is_empty() { - None - } else { - Some(opts.allowed.clone()) - }; - mcp_servers.insert( - name.clone(), - McpgServerConfig { - server_type: "http".to_string(), - container: None, - entrypoint: None, - entrypoint_args: None, - mounts: None, - args: None, - url: Some(url.clone()), - headers, - env: None, - tools, - }, - ); - } else { - log::warn!("MCP '{}' has no container or url — skipping", name); - continue; - } - } else { - log::warn!("MCP '{}' has no container or url — skipping", name); - } - } - - Ok(McpgConfig { - mcp_servers, - gateway: McpgGatewayConfig { - port: MCPG_PORT, - domain: "host.docker.internal".to_string(), - api_key: "${MCP_GATEWAY_API_KEY}".to_string(), - payload_dir: "/tmp/gh-aw/mcp-payloads".to_string(), - }, - }) -} - -/// Sensitive host path prefixes that should not be bind-mounted into MCP containers. -const SENSITIVE_MOUNT_PREFIXES: &[&str] = &[ - "/etc", - "/root", - "/home", - "/proc", - "/sys", -]; - -/// Docker runtime flag names that grant dangerous host access. -/// Checked both as `--flag=value` and as `--flag value` (split across two args). -const DANGEROUS_DOCKER_FLAGS: &[&str] = &[ - "--privileged", - "--cap-add", - "--security-opt", - "--pid", - "--network", - "--ipc", - "--user", - "-u", - "--add-host", - "--entrypoint", -]; - -/// Validate a container image name for injection attempts. -/// Allows `[a-zA-Z0-9./_:-]` which covers standard Docker image references. -fn validate_container_image(image: &str, mcp_name: &str) -> Vec { - let mut warnings = Vec::new(); - if image.is_empty() { - warnings.push(format!("Warning: MCP '{}': container image name is empty.", mcp_name)); - return warnings; - } - if !image.chars().all(|c| c.is_ascii_alphanumeric() || "._/:-@".contains(c)) { - warnings.push(format!( - "Warning: MCP '{}': container image '{}' contains unexpected characters. \ - Image names should only contain [a-zA-Z0-9./_:-@].", - mcp_name, image - )); - } - warnings -} - -/// Validate a volume mount source path, warning on sensitive host directories. -/// Docker socket mounts are escalated to stderr warnings since they grant container escape. -/// Note: paths are lowercased for comparison to catch cross-platform casing (e.g. `/ETC/shadow`). -fn validate_mount_source(mount: &str, mcp_name: &str) -> Vec { - let mut warnings = Vec::new(); - // Format: "source:dest:mode" - if let Some(source) = mount.split(':').next() { - let source_lower = source.to_lowercase(); - if source_lower.contains("docker.sock") { - warnings.push(format!( - "Warning: MCP '{}': mount '{}' exposes the Docker socket to the MCP container. \ - This grants full host Docker access and may allow container escape.", - mcp_name, mount - )); - return warnings; - } - for prefix in SENSITIVE_MOUNT_PREFIXES { - // Match exact path or path with trailing separator to avoid false positives - // (e.g. /etc matches /etc and /etc/shadow, but not /etc-configs) - if source_lower == *prefix || source_lower.starts_with(&format!("{}/", prefix)) { - warnings.push(format!( - "Warning: MCP '{}': mount source '{}' references a sensitive host path ({}). \ - Ensure this is intentional.", - mcp_name, source, prefix - )); - break; - } - } - } - warnings -} - -/// Validate Docker runtime args for dangerous flags that could escalate privileges. -/// Also detects volume mounts smuggled via `-v`/`--volume` that bypass `mounts` validation. -/// Handles both `--flag=value` and `--flag value` (split) forms. -fn validate_docker_args(args: &[String], mcp_name: &str) -> Vec { - let mut warnings = Vec::new(); - for (i, arg) in args.iter().enumerate() { - let arg_lower = arg.to_lowercase(); - // Check for dangerous Docker flags (both --flag=value and --flag value) - for dangerous in DANGEROUS_DOCKER_FLAGS { - if arg_lower == *dangerous - || arg_lower.starts_with(&format!("{}=", dangerous)) - { - let extra_hint = if *dangerous == "--entrypoint" { - " Use the 'entrypoint:' field instead of passing --entrypoint in args." - } else { - "" - }; - warnings.push(format!( - "Warning: MCP '{}': Docker arg '{}' grants elevated privileges. \ - Ensure this is intentional.{}", - mcp_name, arg, extra_hint - )); - } - } - // Check for volume mounts smuggled via args (bypasses mounts validation) - if arg == "-v" || arg == "--volume" { - if let Some(mount_spec) = args.get(i + 1) { - warnings.push(format!( - "Warning: MCP '{}': volume mount '{}' in args bypasses mounts validation. \ - Use the 'mounts:' field instead.", - mcp_name, mount_spec - )); - warnings.extend(validate_mount_source(mount_spec, mcp_name)); - } else { - warnings.push(format!( - "Warning: MCP '{}': '{}' flag is the last arg with no mount spec following it. \ - This is likely a malformed args list.", - mcp_name, arg - )); - } - } else if arg_lower.starts_with("-v=") || arg_lower.starts_with("--volume=") { - let mount_spec = arg.splitn(2, '=').nth(1).unwrap_or(""); - warnings.push(format!( - "Warning: MCP '{}': volume mount '{}' in args bypasses mounts validation. \ - Use the 'mounts:' field instead.", - mcp_name, mount_spec - )); - warnings.extend(validate_mount_source(mount_spec, mcp_name)); - } - } - warnings -} - -/// Validate that an MCP HTTP URL uses an allowed scheme. -fn validate_mcp_url(url: &str, mcp_name: &str) -> Vec { - let mut warnings = Vec::new(); - if !url.starts_with("https://") && !url.starts_with("http://") { - warnings.push(format!( - "Warning: MCP '{}': URL '{}' does not use http:// or https:// scheme. \ - This may not work with MCPG.", - mcp_name, url - )); - } - warnings -} - -/// Warn when env values or headers look like they contain inline secrets. -/// Secrets should use pipeline variables and passthrough ("") instead. -fn warn_potential_secrets(mcp_name: &str, env: &HashMap, headers: &HashMap) -> Vec { - let mut warnings = Vec::new(); - for (key, value) in env { - if !value.is_empty() && (key.to_lowercase().contains("token") - || key.to_lowercase().contains("secret") - || key.to_lowercase().contains("key") - || key.to_lowercase().contains("password") - || key.to_lowercase().contains("pat")) - { - warnings.push(format!( - "Warning: MCP '{}': env var '{}' has an inline value that may be a secret. \ - Use an empty string (\"\") for passthrough from pipeline variables instead.", - mcp_name, key - )); - } - } - for (key, value) in headers { - if value.to_lowercase().contains("bearer ") - || key.to_lowercase() == "authorization" - { - warnings.push(format!( - "Warning: MCP '{}': header '{}' may contain inline credentials. \ - These will appear in plaintext in the compiled pipeline YAML.", - mcp_name, key - )); - } - } - warnings -} - -/// Validate that a string is a legal environment variable name (`[A-Za-z_][A-Za-z0-9_]*`). -/// Prevents injection of arbitrary Docker flags via user-controlled front matter keys. -fn is_valid_env_var_name(name: &str) -> bool { - let mut chars = name.chars(); - chars - .next() - .map_or(false, |c| c.is_ascii_alphabetic() || c == '_') - && chars.all(|c| c.is_ascii_alphanumeric() || c == '_') -} - -/// Generate additional `-e` flags for the MCPG Docker run command. -/// -/// MCP containers spawned by MCPG may need environment variables that flow from -/// the pipeline through the MCPG container (passthrough). This function: -/// 1. Auto-maps `AZURE_DEVOPS_EXT_PAT` from `SC_READ_TOKEN` when `permissions.read` is configured -/// 2. Collects passthrough env vars (value is `""`) from container-based MCP configs -/// -/// Only container-based MCPs are considered — HTTP MCPs don't have child containers -/// that need env passthrough. -/// -/// Returns flags formatted for inline insertion in the `docker run` command. -/// The marker sits after the last hardcoded `-e` flag, so the output must -/// include leading `\\\n` for line continuation when non-empty. -pub fn generate_mcpg_docker_env(front_matter: &FrontMatter) -> String { - let mut env_flags: Vec = Vec::new(); - let mut seen: std::collections::HashSet = std::collections::HashSet::new(); - - // Check if any container MCP requests AZURE_DEVOPS_EXT_PAT passthrough - let any_mcp_needs_ado_token = front_matter.mcp_servers.values().any(|config| { - matches!(config, McpConfig::WithOptions(opts) - if opts.enabled.unwrap_or(true) - && opts.container.is_some() - && opts.env.contains_key("AZURE_DEVOPS_EXT_PAT")) - }); - - // Also check if tools.azure-devops is enabled (auto-configured ADO MCP always needs token) - let ado_tool_needs_token = front_matter - .tools - .as_ref() - .and_then(|t| t.azure_devops.as_ref()) - .is_some_and(|ado| ado.is_enabled()); - - // Auto-map AZURE_DEVOPS_EXT_PAT from SC_READ_TOKEN when permissions.read is configured - // AND at least one container MCP requests it via env passthrough (or the ADO tool is enabled) - if any_mcp_needs_ado_token || ado_tool_needs_token { - if front_matter.permissions.as_ref().and_then(|p| p.read.as_ref()).is_some() { - env_flags.push( - "-e AZURE_DEVOPS_EXT_PAT=\"$(SC_READ_TOKEN)\"".to_string(), - ); - seen.insert("AZURE_DEVOPS_EXT_PAT".to_string()); - } else { - eprintln!( - "Warning: one or more container MCPs request AZURE_DEVOPS_EXT_PAT passthrough \ - but permissions.read is not configured. The token will be empty at runtime. \ - Add `permissions: {{ read: }}` to enable auto-mapping." - ); - } - } - - // Collect passthrough env vars from container-based MCP configs only. - // HTTP MCPs don't have child containers — env passthrough doesn't apply. - for (mcp_name, config) in &front_matter.mcp_servers { - let opts = match config { - McpConfig::WithOptions(opts) if opts.enabled.unwrap_or(true) => opts, - _ => continue, - }; - - // Only container-based MCPs need env passthrough on the MCPG Docker run - if opts.container.is_none() { - continue; - } - - for (var_name, var_value) in &opts.env { - // Validate env var name to prevent Docker flag injection (e.g. "X --privileged") - if !is_valid_env_var_name(var_name) { - log::warn!( - "MCP '{}': skipping invalid env var name '{}' — must match [A-Za-z_][A-Za-z0-9_]*", - mcp_name, var_name - ); - continue; - } - if seen.contains(var_name) { - continue; - } - // Passthrough: empty string means forward from host/pipeline environment - if var_value.is_empty() { - env_flags.push(format!("-e {}", var_name)); - seen.insert(var_name.clone()); - } - } - } - - env_flags.sort(); - if env_flags.is_empty() { - // No extra flags — emit a lone `\` so the bash line continuation from the - // preceding `-e MCP_GATEWAY_API_KEY=...` flag connects to the image name on - // the next line. This is valid bash: a backslash at end-of-line continues - // the command. replace_with_indent preserves this on its own indented line. - "\\".to_string() - } else { - // Emit each flag on its own line with `\` continuation. - // replace_with_indent handles indentation from the template (base.yml), - // so we only emit the content without hardcoded spaces. - let flags = env_flags.join(" \\\n"); - format!("{} \\", flags) - } -} - #[cfg(test)] mod tests { use super::*; - use crate::compile::common::{ - parse_markdown, ADO_MCP_IMAGE, ADO_MCP_ENTRYPOINT, ADO_MCP_PACKAGE, ADO_MCP_SERVER_NAME, - }; - use crate::compile::types::{McpConfig, McpOptions}; + use crate::compile::common::parse_markdown; fn minimal_front_matter() -> FrontMatter { let (fm, _) = parse_markdown("---\nname: test-agent\ndescription: test\n---\n").unwrap(); fm } - #[test] - fn test_generate_firewall_config_custom_mcp() { - let mut fm = minimal_front_matter(); - fm.mcp_servers.insert( - "my-tool".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("node:20-slim".to_string()), - entrypoint: Some("node".to_string()), - entrypoint_args: vec!["server.js".to_string()], - allowed: vec!["do_thing".to_string()], - ..Default::default() - }), - ); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let server = config.mcp_servers.get("my-tool").unwrap(); - assert_eq!(server.server_type, "stdio"); - assert_eq!(server.container.as_ref().unwrap(), "node:20-slim"); - assert_eq!(server.entrypoint.as_ref().unwrap(), "node"); - assert_eq!( - server.entrypoint_args.as_ref().unwrap(), - &vec!["server.js"] - ); - assert_eq!( - server.tools.as_ref().unwrap(), - &vec!["do_thing".to_string()] - ); - } - - #[test] - fn test_generate_mcpg_config_mcp_without_transport_skipped() { - let mut fm = minimal_front_matter(); - // An MCP with no container or url should be skipped - fm.mcp_servers - .insert("phantom".to_string(), McpConfig::Enabled(true)); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - assert!(!config.mcp_servers.contains_key("phantom")); - // safeoutputs is always present - assert!(config.mcp_servers.contains_key("safeoutputs")); - } - - #[test] - fn test_generate_mcpg_config_disabled_mcp_skipped() { - let mut fm = minimal_front_matter(); - fm.mcp_servers - .insert("my-tool".to_string(), McpConfig::Enabled(false)); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - assert!(!config.mcp_servers.contains_key("my-tool")); - } - - #[test] - fn test_generate_mcpg_config_empty_mcp_servers() { - let fm = minimal_front_matter(); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - // Only safeoutputs should be present - assert_eq!(config.mcp_servers.len(), 1); - assert!(config.mcp_servers.contains_key("safeoutputs")); - } - - #[test] - fn test_generate_mcpg_config_gateway_defaults() { - let fm = minimal_front_matter(); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - assert_eq!(config.gateway.port, 80); - assert_eq!(config.gateway.domain, "host.docker.internal"); - assert_eq!(config.gateway.api_key, "${MCP_GATEWAY_API_KEY}"); - assert_eq!(config.gateway.payload_dir, "/tmp/gh-aw/mcp-payloads"); - } - - #[test] - fn test_generate_mcpg_config_json_roundtrip() { - let mut fm = minimal_front_matter(); - fm.mcp_servers.insert( - "my-tool".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("python:3.12-slim".to_string()), - entrypoint: Some("python".to_string()), - entrypoint_args: vec!["-m".to_string(), "server".to_string()], - allowed: vec!["query".to_string()], - ..Default::default() - }), - ); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let json = serde_json::to_string_pretty(&config).expect("Config should serialize to JSON"); - let parsed: serde_json::Value = - serde_json::from_str(&json).expect("Serialized JSON should parse back"); - - // Verify top-level structure matches MCPG expectation - assert!( - parsed.get("mcpServers").is_some(), - "Should have mcpServers key" - ); - assert!(parsed.get("gateway").is_some(), "Should have gateway key"); - - let gw = parsed.get("gateway").unwrap(); - assert!(gw.get("port").is_some(), "Gateway should have port"); - assert!(gw.get("domain").is_some(), "Gateway should have domain"); - assert!(gw.get("apiKey").is_some(), "Gateway should have apiKey"); - assert!( - gw.get("payloadDir").is_some(), - "Gateway should have payloadDir" - ); - } - - #[test] - fn test_generate_mcpg_config_safeoutputs_variable_placeholders() { - let fm = minimal_front_matter(); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let so = config.mcp_servers.get("safeoutputs").unwrap(); - - // URL should reference the runtime-substituted port - let url = so.url.as_ref().unwrap(); - assert!( - url.contains("${SAFE_OUTPUTS_PORT}"), - "SafeOutputs URL should use ${{SAFE_OUTPUTS_PORT}} placeholder, got: {url}" - ); - - // Auth header should reference the runtime-substituted API key - let headers = so.headers.as_ref().unwrap(); - let auth = headers.get("Authorization").unwrap(); - assert!( - auth.contains("${SAFE_OUTPUTS_API_KEY}"), - "SafeOutputs auth header should use ${{SAFE_OUTPUTS_API_KEY}} placeholder, got: {auth}" - ); - } - - #[test] - fn test_generate_mcpg_config_safeoutputs_is_http_type() { - let fm = minimal_front_matter(); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let so = config.mcp_servers.get("safeoutputs").unwrap(); - assert_eq!(so.server_type, "http"); - assert!( - so.container.is_none(), - "HTTP backend should have no container" - ); - assert!(so.args.is_none(), "HTTP backend should have no args"); - assert!(so.url.is_some(), "HTTP backend must have a URL"); - } - - #[test] - fn test_generate_mcpg_config_container_mcp_is_stdio_type() { - let mut fm = minimal_front_matter(); - fm.mcp_servers.insert( - "runner".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("node:20-slim".to_string()), - entrypoint: Some("node".to_string()), - entrypoint_args: vec!["srv.js".to_string()], - allowed: vec!["run".to_string()], - ..Default::default() - }), - ); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let srv = config.mcp_servers.get("runner").unwrap(); - assert_eq!(srv.server_type, "stdio"); - assert!( - srv.container.is_some(), - "stdio server must have a container" - ); - assert!(srv.url.is_none(), "stdio server should have no URL"); - } - - #[test] - fn test_generate_mcpg_config_container_with_env() { - let mut fm = minimal_front_matter(); - let mut env = std::collections::HashMap::new(); - env.insert("TOKEN".to_string(), "secret".to_string()); - fm.mcp_servers.insert( - "with-env".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("node:20-slim".to_string()), - env, - ..Default::default() - }), - ); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let srv = config.mcp_servers.get("with-env").unwrap(); - let e = srv.env.as_ref().unwrap(); - assert_eq!(e.get("TOKEN").unwrap(), "secret"); - } - - #[test] - fn test_generate_mcpg_config_reserved_safeoutputs_name_rejected() { - let mut fm = minimal_front_matter(); - fm.mcp_servers.insert( - "safeoutputs".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("evil:latest".to_string()), - ..Default::default() - }), - ); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - // The reserved entry should still be the HTTP backend, not the user's container - let so = config.mcp_servers.get("safeoutputs").unwrap(); - assert_eq!( - so.server_type, "http", - "safeoutputs should remain HTTP backend" - ); - assert!( - so.container.is_none(), - "User container should not overwrite safeoutputs" - ); - } - - #[test] - fn test_generate_mcpg_config_safeoutputs_reserved_name_skipped() { - let mut fm = minimal_front_matter(); - fm.mcp_servers.insert( - "SafeOutputs".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("node:20-slim".to_string()), - entrypoint: Some("node".to_string()), - entrypoint_args: vec!["evil.js".to_string()], - allowed: vec!["hijack".to_string()], - ..Default::default() - }), - ); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - // The user-defined "SafeOutputs" must not overwrite the built-in entry - let so = config.mcp_servers.get("safeoutputs").unwrap(); - assert_eq!(so.server_type, "http"); - assert!(so.url.as_ref().unwrap().contains("localhost")); - // No stdio entry should have been added under any casing - assert_eq!(config.mcp_servers.len(), 1); - } - - #[test] - fn test_generate_mcpg_config_http_mcp() { - let mut fm = minimal_front_matter(); - fm.mcp_servers.insert( - "remote".to_string(), - McpConfig::WithOptions(McpOptions { - url: Some("https://mcp.example.com/api".to_string()), - headers: { - let mut h = HashMap::new(); - h.insert("X-Custom".to_string(), "value".to_string()); - h - }, - allowed: vec!["query".to_string()], - ..Default::default() - }), - ); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let srv = config.mcp_servers.get("remote").unwrap(); - assert_eq!(srv.server_type, "http"); - assert_eq!( - srv.url.as_ref().unwrap(), - "https://mcp.example.com/api" - ); - assert_eq!( - srv.headers.as_ref().unwrap().get("X-Custom").unwrap(), - "value" - ); - assert!(srv.container.is_none(), "HTTP server should have no container"); - } - - #[test] - fn test_generate_mcpg_config_container_with_entrypoint() { - let mut fm = minimal_front_matter(); - fm.mcp_servers.insert( - "ado".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("node:20-slim".to_string()), - entrypoint: Some("npx".to_string()), - entrypoint_args: vec!["-y".to_string(), "@azure-devops/mcp".to_string()], - ..Default::default() - }), - ); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let srv = config.mcp_servers.get("ado").unwrap(); - assert_eq!(srv.server_type, "stdio"); - assert_eq!(srv.container.as_ref().unwrap(), "node:20-slim"); - assert_eq!(srv.entrypoint.as_ref().unwrap(), "npx"); - assert_eq!( - srv.entrypoint_args.as_ref().unwrap(), - &vec!["-y", "@azure-devops/mcp"] - ); - } - - #[test] - fn test_generate_mcpg_config_container_with_mounts() { - let mut fm = minimal_front_matter(); - fm.mcp_servers.insert( - "data-tool".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("data-tool:latest".to_string()), - mounts: vec!["/host/data:/app/data:ro".to_string()], - ..Default::default() - }), - ); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let srv = config.mcp_servers.get("data-tool").unwrap(); - assert_eq!( - srv.mounts.as_ref().unwrap(), - &vec!["/host/data:/app/data:ro"] - ); - } - - #[test] - fn test_generate_mcpg_config_no_transport_skipped() { - let mut fm = minimal_front_matter(); - // MCP with options but no container or url should be skipped - fm.mcp_servers.insert( - "no-transport".to_string(), - McpConfig::WithOptions(McpOptions { - allowed: vec!["tool".to_string()], - ..Default::default() - }), - ); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - assert!(!config.mcp_servers.contains_key("no-transport")); - } - - #[test] - fn test_generate_mcpg_docker_env_with_permissions_read() { - let mut fm = minimal_front_matter(); - fm.permissions = Some(crate::compile::types::PermissionsConfig { - read: Some("my-read-sc".to_string()), - write: None, - }); - // A container MCP must request AZURE_DEVOPS_EXT_PAT for the auto-map to trigger - fm.mcp_servers.insert( - "ado-tool".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("node:20-slim".to_string()), - env: { - let mut e = HashMap::new(); - e.insert("AZURE_DEVOPS_EXT_PAT".to_string(), "".to_string()); - e - }, - ..Default::default() - }), - ); - let env = generate_mcpg_docker_env(&fm); - assert!( - env.contains("-e AZURE_DEVOPS_EXT_PAT=\"$(SC_READ_TOKEN)\""), - "Should auto-map ADO token when permissions.read is set and MCP requests it" - ); - } - - #[test] - fn test_generate_mcpg_docker_env_permissions_read_no_mcp_request() { - let mut fm = minimal_front_matter(); - fm.permissions = Some(crate::compile::types::PermissionsConfig { - read: Some("my-read-sc".to_string()), - write: None, - }); - // No MCP requests AZURE_DEVOPS_EXT_PAT — auto-map should NOT trigger - fm.mcp_servers.insert( - "unrelated-tool".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("node:20-slim".to_string()), - ..Default::default() - }), - ); - let env = generate_mcpg_docker_env(&fm); - assert!( - !env.contains("AZURE_DEVOPS_EXT_PAT"), - "Should NOT auto-map ADO token when no MCP requests it" - ); - } - - #[test] - fn test_generate_mcpg_docker_env_dedup_auto_map_and_passthrough() { - // When permissions.read is set AND MCP has AZURE_DEVOPS_EXT_PAT: "", - // the auto-mapped form (with SC_READ_TOKEN) should win — no duplicate - let mut fm = minimal_front_matter(); - fm.permissions = Some(crate::compile::types::PermissionsConfig { - read: Some("my-read-sc".to_string()), - write: None, - }); - fm.mcp_servers.insert( - "ado-tool".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("node:20-slim".to_string()), - env: { - let mut e = HashMap::new(); - e.insert("AZURE_DEVOPS_EXT_PAT".to_string(), "".to_string()); - e - }, - ..Default::default() - }), - ); - let env = generate_mcpg_docker_env(&fm); - // Should have the SC_READ_TOKEN form (auto-mapped), not bare passthrough - assert!( - env.contains("-e AZURE_DEVOPS_EXT_PAT=\"$(SC_READ_TOKEN)\""), - "Auto-mapped form should be present" - ); - // Should appear exactly once - let count = env.matches("AZURE_DEVOPS_EXT_PAT").count(); - assert_eq!(count, 1, "AZURE_DEVOPS_EXT_PAT should appear exactly once, got {}", count); - } - - #[test] - fn test_generate_mcpg_docker_env_without_permissions() { - let fm = minimal_front_matter(); - let env = generate_mcpg_docker_env(&fm); - assert!( - !env.contains("AZURE_DEVOPS_EXT_PAT"), - "Should not map ADO token when permissions.read is not set" - ); - } - - #[test] - fn test_generate_mcpg_docker_env_passthrough_vars() { - let mut fm = minimal_front_matter(); - fm.mcp_servers.insert( - "tool".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("img:latest".to_string()), - env: { - let mut e = HashMap::new(); - e.insert("PASS_THROUGH".to_string(), "".to_string()); - e.insert("STATIC".to_string(), "value".to_string()); - e - }, - ..Default::default() - }), - ); - let env = generate_mcpg_docker_env(&fm); - assert!(env.contains("-e PASS_THROUGH"), "Should include passthrough var"); - assert!(!env.contains("-e STATIC"), "Should NOT include static var"); - } - - #[test] - fn test_generate_mcpg_docker_env_rejects_invalid_names() { - let mut fm = minimal_front_matter(); - fm.mcp_servers.insert( - "evil".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("img:latest".to_string()), - env: { - let mut e = HashMap::new(); - // Injection attempt: env var name with Docker flag - e.insert("MY_VAR --privileged".to_string(), "".to_string()); - // Valid env var for comparison - e.insert("GOOD_VAR".to_string(), "".to_string()); - e - }, - ..Default::default() - }), - ); - let env = generate_mcpg_docker_env(&fm); - assert!( - !env.contains("--privileged"), - "Should reject invalid env var name with Docker flag injection" - ); - assert!( - env.contains("-e GOOD_VAR"), - "Should include valid env var" - ); - } - - #[test] - fn test_is_valid_env_var_name() { - assert!(is_valid_env_var_name("MY_VAR")); - assert!(is_valid_env_var_name("_PRIVATE")); - assert!(is_valid_env_var_name("A")); - assert!(is_valid_env_var_name("VAR123")); - assert!(!is_valid_env_var_name("")); - assert!(!is_valid_env_var_name("123ABC")); - assert!(!is_valid_env_var_name("MY-VAR")); - assert!(!is_valid_env_var_name("MY VAR")); - assert!(!is_valid_env_var_name("X --privileged")); - assert!(!is_valid_env_var_name("X -v /etc:/etc:rw")); - } - - // ─── tools.azure-devops MCPG integration ──────────────────────────────── - - #[test] - fn test_ado_tool_generates_mcpg_entry() { - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops: true\n---\n", - ) - .unwrap(); - // Pass inferred org since no explicit org is set - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test_with_org(&fm, "inferred-org"), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let ado = config.mcp_servers.get("azure-devops").unwrap(); - assert_eq!(ado.server_type, "stdio"); - assert_eq!(ado.container.as_deref(), Some(ADO_MCP_IMAGE)); - assert_eq!(ado.entrypoint.as_deref(), Some(ADO_MCP_ENTRYPOINT)); - let args = ado.entrypoint_args.as_ref().unwrap(); - assert!(args.contains(&"-y".to_string())); - assert!(args.contains(&ADO_MCP_PACKAGE.to_string())); - assert!(args.contains(&"inferred-org".to_string())); - // Should have AZURE_DEVOPS_EXT_PAT in env - let env = ado.env.as_ref().unwrap(); - assert!(env.contains_key("AZURE_DEVOPS_EXT_PAT")); - } - - #[test] - fn test_ado_tool_with_toolsets() { - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops:\n toolsets: [repos, wit, core]\n---\n", - ) - .unwrap(); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test_with_org(&fm, "myorg"), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let ado = config.mcp_servers.get("azure-devops").unwrap(); - let args = ado.entrypoint_args.as_ref().unwrap(); - assert!(args.contains(&"-d".to_string())); - assert!(args.contains(&"repos".to_string())); - assert!(args.contains(&"wit".to_string())); - assert!(args.contains(&"core".to_string())); - } - - #[test] - fn test_ado_tool_with_org_override() { - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: myorg\n---\n", - ) - .unwrap(); - // Explicit org should be used even when inferred_org is None - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let ado = config.mcp_servers.get("azure-devops").unwrap(); - let args = ado.entrypoint_args.as_ref().unwrap(); - assert!(args.contains(&"myorg".to_string())); - } - - #[test] - fn test_ado_tool_explicit_org_overrides_inferred() { - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: explicit-org\n---\n", - ) - .unwrap(); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test_with_org(&fm, "inferred-org"), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let ado = config.mcp_servers.get("azure-devops").unwrap(); - let args = ado.entrypoint_args.as_ref().unwrap(); - assert!(args.contains(&"explicit-org".to_string())); - assert!(!args.contains(&"inferred-org".to_string())); - } - - #[test] - fn test_ado_tool_no_org_fails() { - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops: true\n---\n", - ) - .unwrap(); - // No explicit org and no inferred org — should fail - let result = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)); - assert!(result.is_err()); - assert!( - result.unwrap_err().to_string().contains("no ADO organization"), - "Error should mention missing org" - ); - } - - #[test] - fn test_ado_tool_invalid_org_fails() { - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: \"my org/bad\"\n---\n", - ) - .unwrap(); - let result = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)); - assert!(result.is_err()); - assert!( - result.unwrap_err().to_string().contains("Invalid ADO org name"), - "Error should mention invalid org" - ); - } - - #[test] - fn test_ado_tool_invalid_toolset_fails() { - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: myorg\n toolsets: [\"repos\", \"bad toolset\"]\n---\n", - ) - .unwrap(); - let result = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)); - assert!(result.is_err()); - assert!( - result.unwrap_err().to_string().contains("Invalid ADO toolset name"), - "Error should mention invalid toolset" - ); - } - - #[test] - fn test_ado_tool_with_allowed_tools() { - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: myorg\n allowed:\n - wit_get_work_item\n - core_list_projects\n---\n", - ) - .unwrap(); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let ado = config.mcp_servers.get("azure-devops").unwrap(); - let tools = ado.tools.as_ref().unwrap(); - assert_eq!(tools, &["wit_get_work_item", "core_list_projects"]); - } - - #[test] - fn test_ado_tool_disabled_not_generated() { - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops: false\n---\n", - ) - .unwrap(); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - assert!(!config.mcp_servers.contains_key("azure-devops")); - } - - #[test] - fn test_ado_tool_not_set_not_generated() { - let fm = minimal_front_matter(); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - assert!(!config.mcp_servers.contains_key("azure-devops")); - } - - #[test] - fn test_ado_tool_skips_manual_mcp_entry() { - // When tools.azure-devops is enabled AND mcp-servers also has azure-devops, - // the tools config takes precedence and the manual entry is skipped. - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: auto-org\nmcp-servers:\n azure-devops:\n container: \"node:20-slim\"\n entrypoint: \"npx\"\n entrypoint-args: [\"-y\", \"@azure-devops/mcp\", \"manual-org\"]\n---\n", - ) - .unwrap(); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let ado = config.mcp_servers.get("azure-devops").unwrap(); - // Should use the auto-configured org, not the manual one - let args = ado.entrypoint_args.as_ref().unwrap(); - assert!(args.contains(&"auto-org".to_string())); - assert!(!args.contains(&"manual-org".to_string())); - } - - #[test] - fn test_ado_tool_docker_env_passthrough() { - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops: true\npermissions:\n read: my-read-sc\n---\n", - ) - .unwrap(); - let env = generate_mcpg_docker_env(&fm); - assert!( - env.contains("AZURE_DEVOPS_EXT_PAT"), - "Should include ADO token passthrough when permissions.read is set" - ); - } - - // ─── validate_docker_args ──────────────────────────────────────────────── - - #[test] - fn test_validate_docker_args_privileged_flag() { - let warnings = validate_docker_args(&["--privileged".to_string()], "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("--privileged"), "should warn about --privileged"); - } - - #[test] - fn test_validate_docker_args_entrypoint_in_args_warns() { - let warnings = validate_docker_args( - &[ - "--entrypoint".to_string(), - "/bin/sh".to_string(), - ], - "my-mcp", - ); - assert!(warnings.iter().any(|w| w.contains("--entrypoint") && w.contains("entrypoint:")), - "should warn about --entrypoint with hint to use entrypoint: field"); - } - - #[test] - fn test_validate_docker_args_volume_flag_calls_mount_validation() { - // -v docker.sock in args bypasses `mounts:` validation; should produce warnings - let warnings = validate_docker_args( - &[ - "-v".to_string(), - "/var/run/docker.sock:/var/run/docker.sock".to_string(), - ], - "my-mcp", - ); - assert!(warnings.iter().any(|w| w.contains("bypasses mounts validation")), - "should warn about volume mount in args"); - assert!(warnings.iter().any(|w| w.contains("Docker socket")), - "should propagate mount source warning for docker.sock"); - } - - #[test] - fn test_validate_docker_args_volume_equals_form() { - // --volume=source:dest form should also be detected - let warnings = validate_docker_args( - &["--volume=/var/run/docker.sock:/var/run/docker.sock".to_string()], - "my-mcp", - ); - assert!(warnings.iter().any(|w| w.contains("bypasses mounts validation")), - "should warn about --volume= form"); - } - - #[test] - fn test_validate_docker_args_safe_args_no_warnings() { - // A legitimate arg like --read-only should produce no warnings - let warnings = validate_docker_args(&["--read-only".to_string()], "my-mcp"); - assert!(warnings.is_empty(), "safe args should not produce warnings"); - } - - #[test] - fn test_validate_docker_args_empty_no_warnings() { - let warnings = validate_docker_args(&[], "my-mcp"); - assert!(warnings.is_empty(), "empty args should not produce warnings"); - } - - #[test] - fn test_validate_docker_args_volume_flag_trailing_warns() { - // -v as the last arg with no mount spec is malformed - let warnings = validate_docker_args(&["-v".to_string()], "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("malformed"), "trailing -v with no mount spec should warn"); - } - - #[test] - fn test_validate_docker_args_long_volume_flag_trailing_warns() { - // --volume as the last arg with no mount spec is malformed - let warnings = validate_docker_args(&["--volume".to_string()], "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("malformed"), "trailing --volume with no mount spec should warn"); - } - - // ─── validate_mcp_url ──────────────────────────────────────────────────── - - #[test] - fn test_validate_mcp_url_https_no_warnings() { - let warnings = validate_mcp_url("https://mcp.dev.azure.com/myorg", "my-mcp"); - assert!(warnings.is_empty(), "https URL should not produce warnings"); - } - - #[test] - fn test_validate_mcp_url_http_no_warnings() { - let warnings = validate_mcp_url("http://localhost:8100/mcp", "my-mcp"); - assert!(warnings.is_empty(), "http URL should not produce warnings"); - } - - #[test] - fn test_validate_mcp_url_bad_scheme_warns() { - let warnings = validate_mcp_url("ftp://files.example.com", "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("does not use http://"), "non-HTTP scheme should warn"); - } - - #[test] - fn test_validate_mcp_url_no_scheme_warns() { - let warnings = validate_mcp_url("mcp.dev.azure.com/myorg", "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("does not use http://"), "URL without scheme should warn"); - } - - // ─── validate_mount_source ─────────────────────────────────────────────── - - #[test] - fn test_validate_mount_source_docker_sock() { - let warnings = validate_mount_source("/var/run/docker.sock:/var/run/docker.sock:rw", "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("Docker socket"), "should warn about Docker socket exposure"); - } - - #[test] - fn test_validate_mount_source_sensitive_path_etc() { - let warnings = validate_mount_source("/etc/passwd:/data/passwd:ro", "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("sensitive host path"), "should warn about /etc mount"); - } - - #[test] - fn test_validate_mount_source_sensitive_path_proc() { - let warnings = validate_mount_source("/proc:/host/proc:ro", "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("sensitive host path"), "should warn about /proc mount"); - } - - #[test] - fn test_validate_mount_source_case_insensitive() { - // /ETC/shadow should match sensitive /etc prefix (lowercased comparison) - let warnings = validate_mount_source("/ETC/shadow:/data/shadow:ro", "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("sensitive host path"), "case-insensitive match should trigger warning"); - } - - #[test] - fn test_validate_mount_source_no_false_positive_on_etc_configs() { - // /etc-configs should NOT match the /etc prefix (path boundary check requires trailing /) - let warnings = validate_mount_source("/etc-configs:/app/config:ro", "my-mcp"); - assert!(warnings.is_empty(), "/etc-configs must not match /etc prefix due to path boundary check"); - } - - #[test] - fn test_validate_mount_source_safe_path_no_warnings() { - // /app/data is not a sensitive path; should produce no warnings - let warnings = validate_mount_source("/app/data:/app/data:ro", "my-mcp"); - assert!(warnings.is_empty(), "safe path should not produce warnings"); - } - - // ─── validate_container_image ──────────────────────────────────────────── - - #[test] - fn test_validate_container_image_empty_string() { - let warnings = validate_container_image("", "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("empty"), "should warn about empty image name"); - } - - #[test] - fn test_validate_container_image_shell_metacharacters() { - let warnings = validate_container_image("node:20-slim; rm -rf /", "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("unexpected characters"), "should warn about shell metacharacters"); - } - - #[test] - fn test_validate_container_image_valid_name_no_warnings() { - // Standard image references should produce no warnings - assert!(validate_container_image("node:20-slim", "my-mcp").is_empty()); - assert!(validate_container_image("ghcr.io/org/image:latest", "my-mcp").is_empty()); - assert!(validate_container_image("python:3.12-slim", "my-mcp").is_empty()); - } - - // ─── warn_potential_secrets ────────────────────────────────────────────── - - #[test] - fn test_warn_potential_secrets_token_env_var_triggers() { - let env = HashMap::from([("API_TOKEN".to_string(), "secret123".to_string())]); - let headers = HashMap::new(); - let warnings = warn_potential_secrets("my-mcp", &env, &headers); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("API_TOKEN"), "should warn about secret-looking env var"); - } - - #[test] - fn test_warn_potential_secrets_empty_passthrough_no_warnings() { - // Empty string = passthrough; should NOT trigger a warning - let env = HashMap::from([("API_TOKEN".to_string(), "".to_string())]); - let headers = HashMap::new(); - let warnings = warn_potential_secrets("my-mcp", &env, &headers); - assert!(warnings.is_empty(), "empty passthrough value must not trigger a warning"); - } - - #[test] - fn test_warn_potential_secrets_authorization_header_triggers() { - let env = HashMap::new(); - let headers = - HashMap::from([("Authorization".to_string(), "Bearer abc".to_string())]); - let warnings = warn_potential_secrets("my-mcp", &env, &headers); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("Authorization"), "should warn about Authorization header"); - } - - #[test] - fn test_warn_potential_secrets_bearer_value_triggers() { - // A header whose value starts with "Bearer " should also warn - let env = HashMap::new(); - let headers = - HashMap::from([("X-Custom-Auth".to_string(), "Bearer token123".to_string())]); - let warnings = warn_potential_secrets("my-mcp", &env, &headers); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("X-Custom-Auth"), "should warn about header with Bearer value"); - } - - #[test] - fn test_warn_potential_secrets_safe_env_no_warnings() { - // Env keys with non-secret names and non-empty values should produce no warnings - let env = HashMap::from([("MY_CONFIG".to_string(), "value".to_string())]); - let headers = HashMap::new(); - let warnings = warn_potential_secrets("my-mcp", &env, &headers); - assert!(warnings.is_empty(), "non-secret env var should not produce warnings"); - } - // ─── generate_allowed_domains ──────────────────────────────────────────── #[test] @@ -1983,118 +395,4 @@ mod tests { assert!(domains.contains("crates.io"), "rust domains present"); } - // ─── generate_prepare_steps ────────────────────────────────────────────── - - #[test] - fn test_generate_prepare_steps_with_memory_includes_memory_preamble() { - let (fm, _) = crate::compile::common::parse_markdown( - "---\nname: test\ndescription: test\ntools:\n cache-memory: true\n---\n", - ).unwrap(); - let exts = super::super::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); - assert!( - !result.is_empty(), - "memory steps must be emitted when cache-memory enabled" - ); - assert!( - result.contains("agent_memory"), - "should reference memory directory" - ); - } - - #[test] - fn test_generate_prepare_steps_without_memory_and_no_steps_is_empty() { - let fm = minimal_front_matter(); - let exts = super::super::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); - assert!(result.is_empty(), "no steps and no memory should produce empty output"); - } - - #[test] - fn test_generate_prepare_steps_with_memory_includes_download_and_prompt() { - let (fm, _) = crate::compile::common::parse_markdown( - "---\nname: test\ndescription: test\ntools:\n cache-memory: true\n---\n", - ).unwrap(); - let exts = super::super::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); - assert!( - result.contains("DownloadPipelineArtifact"), - "memory steps must include the artifact download task" - ); - assert!( - result.contains("Agent Memory"), - "memory steps must include the memory prompt" - ); - } - - #[test] - fn test_generate_prepare_steps_without_memory_with_user_steps() { - let fm = minimal_front_matter(); - let exts = super::super::extensions::collect_extensions(&fm); - let step: serde_yaml::Value = - serde_yaml::from_str("bash: echo hello\ndisplayName: greet").unwrap(); - let result = generate_prepare_steps(&[step], &exts).unwrap(); - assert!(!result.is_empty(), "user steps should be present"); - assert!( - !result.contains("agent_memory"), - "no memory reference when cache-memory not enabled" - ); - } - - #[test] - fn test_generate_prepare_steps_with_memory_and_user_steps() { - let (fm, _) = crate::compile::common::parse_markdown( - "---\nname: test\ndescription: test\ntools:\n cache-memory: true\n---\n", - ).unwrap(); - let exts = super::super::extensions::collect_extensions(&fm); - let step: serde_yaml::Value = - serde_yaml::from_str("bash: echo hello\ndisplayName: greet").unwrap(); - let result = generate_prepare_steps(&[step], &exts).unwrap(); - assert!( - result.contains("agent_memory"), - "memory reference must be present" - ); - assert!( - result.contains("echo hello"), - "user step must also be present" - ); - } - - #[test] - fn test_generate_prepare_steps_with_lean() { - let (fm, _) = crate::compile::common::parse_markdown( - "---\nname: test\ndescription: test\nruntimes:\n lean: true\n---\n", - ).unwrap(); - let exts = super::super::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); - assert!(result.contains("elan-init.sh"), "should include elan installer"); - assert!(result.contains("Lean 4"), "should include Lean prompt"); - assert!(result.contains("--default-toolchain stable"), "should default to stable"); - assert!(result.contains("/tmp/awf-tools/"), "should symlink into awf-tools for AWF chroot"); - } - - #[test] - fn test_generate_prepare_steps_with_lean_custom_toolchain() { - let (fm, _) = crate::compile::common::parse_markdown( - "---\nname: test\ndescription: test\nruntimes:\n lean:\n toolchain: \"leanprover/lean4:v4.29.1\"\n---\n", - ).unwrap(); - let exts = super::super::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); - assert!( - result.contains("--default-toolchain leanprover/lean4:v4.29.1"), - "should use specified toolchain" - ); - } - - #[test] - fn test_generate_prepare_steps_with_lean_and_memory() { - let (fm, _) = crate::compile::common::parse_markdown( - "---\nname: test\ndescription: test\nruntimes:\n lean: true\ntools:\n cache-memory: true\n---\n", - ).unwrap(); - let exts = super::super::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); - assert!(result.contains("agent_memory"), "memory steps present"); - assert!(result.contains("elan-init.sh"), "lean install present"); - assert!(result.contains("Lean 4"), "lean prompt present"); - } } From 3b506852df4ef1e54d5ef30292a35f37fceee6a3 Mon Sep 17 00:00:00 2001 From: James Devine Date: Thu, 16 Apr 2026 14:02:59 +0100 Subject: [PATCH 2/8] feat: rewrite 1ES compiler to use Copilot CLI + AWF + MCPG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the legacy Agency job type (agencyJob) with direct Copilot CLI invocation, AWF network isolation, and MCP Gateway — matching the standalone pipeline execution model. Changes: - templates/1es-base.yml: Complete rewrite. All three jobs (PerformAgenticTask, AnalyzeSafeOutputs, ProcessSafeOutputs) now use templateContext.type: buildJob with the same step sequence as standalone. Dropped Agency concepts: commandOptions, globalOptions, logLevel, mcpConfiguration, agentContextRoot, AgencyArtifact. - src/compile/onees.rs: Rewritten as thin wrapper using compile_shared(). Removed generate_agent_context_root, generate_mcp_configuration, generate_inline_steps. Only 1ES-specific setup/teardown helpers remain. - src/compile/common.rs: Moved generate_allowed_domains from standalone. Removed dead is_custom_mcp function. - src/compile/standalone.rs: Removed now-unnecessary imports. Both compilers now share the same execution model and compile flow. Standalone output is verified byte-identical to pre-refactor baseline. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/compile/common.rs | 149 ++++++- src/compile/onees.rs | 509 +++------------------ src/compile/standalone.rs | 148 +------ templates/1es-base.yml | 905 ++++++++++++++++++++++++++------------ 4 files changed, 841 insertions(+), 870 deletions(-) diff --git a/src/compile/common.rs b/src/compile/common.rs index b9d97c26..d3d910c4 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -8,13 +8,8 @@ use super::types::{FrontMatter, PipelineParameter, Repository, TriggerConfig}; use super::extensions::{CompilerExtension, Extension, McpgServerConfig, McpgGatewayConfig, McpgConfig, CompileContext}; use crate::compile::types::McpConfig; use crate::fuzzy_schedule; - -/// Check if an MCP has a transport configuration (container or URL). -/// MCPs with a container are containerized stdio servers; MCPs with a URL -/// are HTTP servers. Both are routed through the MCP Gateway (MCPG). -pub fn is_custom_mcp(config: &McpConfig) -> bool { - matches!(config, McpConfig::WithOptions(opts) if opts.container.is_some() || opts.url.is_some()) -} +use crate::allowed_hosts::{CORE_ALLOWED_HOSTS, mcp_required_hosts}; +use crate::ecosystem_domains::{get_ecosystem_domains, is_ecosystem_identifier, is_known_ecosystem}; /// Parse the markdown file and extract front matter and body pub fn parse_markdown(content: &str) -> Result<(FrontMatter, String)> { @@ -1771,6 +1766,146 @@ pub fn generate_mcpg_docker_env(front_matter: &FrontMatter) -> String { } } +// ==================== Domain allowlist ==================== + +/// Generate the allowed domains list for AWF network isolation. +/// +/// This generates a comma-separated list of domain patterns for AWF's +/// `--allow-domains` flag. The list includes: +/// 1. Core Azure DevOps/GitHub endpoints +/// 2. MCP-specific endpoints for each enabled MCP +/// 3. User-specified additional hosts from network.allowed +pub fn generate_allowed_domains( + front_matter: &FrontMatter, + extensions: &[super::extensions::Extension], +) -> Result { + // Collect enabled MCP names (user-defined MCPs, not first-party tools) + let enabled_mcps: Vec = front_matter + .mcp_servers + .iter() + .filter_map(|(name, config)| { + let is_enabled = match config { + McpConfig::Enabled(enabled) => *enabled, + McpConfig::WithOptions(_) => true, + }; + if is_enabled { Some(name.clone()) } else { None } + }) + .collect(); + + // Get user-specified hosts + let user_hosts: Vec = front_matter + .network + .as_ref() + .map(|n| n.allowed.clone()) + .unwrap_or_default(); + + // Generate the allowlist by combining core + MCP + extension + user hosts + let mut hosts: HashSet = HashSet::new(); + + // Add core hosts + for host in CORE_ALLOWED_HOSTS { + hosts.insert((*host).to_string()); + } + + // Add host.docker.internal — required for the AWF container to reach + // MCPG and SafeOutputs on the host. + hosts.insert("host.docker.internal".to_string()); + + // Add MCP-specific hosts (user-defined MCPs via mcp_required_hosts lookup) + for mcp in &enabled_mcps { + for host in mcp_required_hosts(mcp) { + hosts.insert((*host).to_string()); + } + } + + // Add extension-declared hosts (runtimes + first-party tools). + // Extensions may return ecosystem identifiers (e.g., "lean") which are + // expanded to their domain lists, or raw domain names. + for ext in extensions { + for host in ext.required_hosts() { + if is_ecosystem_identifier(&host) { + let domains = get_ecosystem_domains(&host); + if domains.is_empty() { + eprintln!( + "warning: extension '{}' requires unknown ecosystem '{}'; \ + no domains added", + ext.name(), + host + ); + } + for domain in domains { + hosts.insert(domain); + } + } else { + hosts.insert(host); + } + } + } + + // Add user-specified hosts (validated against DNS-safe characters) + // Entries may be ecosystem identifiers (e.g., "python", "rust") which + // expand to their domain lists, or raw domain names. + for host in &user_hosts { + if is_ecosystem_identifier(host) { + let domains = get_ecosystem_domains(host); + if domains.is_empty() && !is_known_ecosystem(host) { + eprintln!( + "warning: network.allowed contains unknown ecosystem identifier '{}'. \ + Known ecosystems: python, rust, node, go, java, etc. \ + If this is a domain name, it should contain a dot.", + host + ); + } + for domain in domains { + hosts.insert(domain); + } + } else { + let valid_chars = !host.is_empty() + && host + .chars() + .all(|c| c.is_ascii_alphanumeric() || matches!(c, '.' | '-' | '*')); + if !valid_chars { + anyhow::bail!( + "network.allowed domain '{}' contains characters invalid in DNS names. \ + Only ASCII alphanumerics, '.', '-', and '*' are allowed.", + host + ); + } + if host.contains('*') && !(host.starts_with("*.") && !host[2..].contains('*')) { + anyhow::bail!( + "network.allowed domain '{}' uses '*' in an unsupported position. \ + Wildcards must appear only as a leading prefix (e.g. '*.example.com').", + host + ); + } + hosts.insert(host.clone()); + } + } + + // Remove blocked hosts (supports both ecosystem identifiers and raw domains) + let blocked_hosts: Vec = front_matter + .network + .as_ref() + .map(|n| n.blocked.clone()) + .unwrap_or_default(); + for blocked in &blocked_hosts { + if is_ecosystem_identifier(blocked) { + for domain in get_ecosystem_domains(blocked) { + hosts.remove(&domain); + } + } else { + hosts.remove(blocked); + } + } + + // Sort for deterministic output + let mut allowlist: Vec = hosts.into_iter().collect(); + allowlist.sort(); + + // Format as comma-separated list for AWF --allow-domains + Ok(allowlist.join(",")) +} + // ==================== Shared compile flow ==================== /// Target-specific overrides for the shared compile flow. diff --git a/src/compile/onees.rs b/src/compile/onees.rs index 2ef24fa1..21a091c0 100644 --- a/src/compile/onees.rs +++ b/src/compile/onees.rs @@ -1,35 +1,25 @@ //! 1ES Pipeline Template compiler. //! -//! This compiler generates a pipeline that extends the 1ES Unofficial Pipeline Template: -//! - Uses `templateContext.type: agencyJob` for the main agent job -//! - Integrates with 1ES SDL scanning and compliance tools -//! - Custom jobs for threat analysis and safe output processing -//! -//! Limitations: -//! - MCP servers use service connections (no custom `command:` support) -//! - Network isolation is handled by OneBranch (no custom proxy allow-lists) +//! This compiler generates a pipeline that extends the 1ES Unofficial Pipeline Template +//! with Copilot CLI, AWF network isolation, and MCP Gateway — matching the standalone +//! pipeline model while maintaining 1ES SDL compliance. use anyhow::{Context, Result}; use async_trait::async_trait; use log::info; -use std::collections::HashMap; use std::path::Path; use super::Compiler; use super::common::{ - self, AWF_VERSION, COPILOT_CLI_VERSION, DEFAULT_POOL, build_parameters, - compute_effective_workspace, generate_acquire_ado_token, generate_checkout_self, - generate_checkout_steps, generate_ci_trigger, generate_copilot_ado_env, - generate_copilot_params, generate_executor_ado_env, generate_header_comment, - generate_job_timeout, generate_parameters, generate_pipeline_path, - generate_pipeline_resources, generate_pr_trigger, generate_repositories, generate_schedule, - generate_source_path, generate_working_directory, is_custom_mcp, replace_with_indent, - validate_comment_target, validate_front_matter_identity, - validate_resolve_pr_thread_statuses, validate_submit_pr_review_events, - validate_update_pr_votes, validate_update_work_item_target, validate_write_permissions, + AWF_VERSION, MCPG_VERSION, MCPG_IMAGE, + CompileConfig, compile_shared, replace_with_indent, + generate_allowed_domains, + generate_cancel_previous_builds, + generate_enabled_tools_args, + generate_mcpg_config, generate_mcpg_docker_env, + format_steps_yaml_indented, }; -use super::extensions::CompilerExtension; -use super::types::{FrontMatter, McpConfig}; +use super::types::FrontMatter; /// 1ES Pipeline Template compiler. pub struct OneESCompiler; @@ -49,357 +39,83 @@ impl Compiler for OneESCompiler { ) -> Result { info!("Compiling for 1ES target"); - // Validate inputs early, before any values are used in template substitution - validate_front_matter_identity(front_matter)?; - - // Load 1ES template - let template = include_str!("../../templates/1es-base.yml"); - - // Generate schedule - let schedule = match &front_matter.schedule { - Some(s) => generate_schedule(&front_matter.name, s) - .with_context(|| format!("Failed to parse schedule '{}'", s.expression()))?, - None => String::new(), - }; - - let repositories = generate_repositories(&front_matter.repositories); - let checkout_steps = generate_checkout_steps(&front_matter.checkout); - let checkout_self = generate_checkout_self(); + // Collect extensions (needed for MCPG config and allowed domains) let extensions = super::extensions::collect_extensions(front_matter); - // Build compile context with inferred metadata - let input_dir = input_path.parent().unwrap_or(std::path::Path::new(".")); + // Build compile context for MCPG config generation + let input_dir = input_path.parent().unwrap_or(Path::new(".")); let ctx = super::extensions::CompileContext::new(front_matter, input_dir).await; - // Run extension validations (warnings + errors) - for ext in &extensions { - for warning in ext.validate(&ctx)? { - eprintln!("Warning: {}", warning); - } - } - - let copilot_params = generate_copilot_params(front_matter, &extensions)?; - let has_memory = front_matter - .tools - .as_ref() - .and_then(|t| t.cache_memory.as_ref()) - .is_some_and(|cm| cm.is_enabled()); - let parameters = build_parameters(&front_matter.parameters, has_memory); - let parameters_yaml = generate_parameters(¶meters)?; + // Generate values shared with standalone that are passed as extra replacements + let allowed_domains = generate_allowed_domains(front_matter, &extensions)?; + let enabled_tools_args = generate_enabled_tools_args(front_matter); + let cancel_previous_builds = generate_cancel_previous_builds(&front_matter.triggers); - let effective_workspace = compute_effective_workspace( - &front_matter.workspace, - &front_matter.checkout, - &front_matter.name, - ); - let working_directory = generate_working_directory(&effective_workspace); - let pipeline_resources = generate_pipeline_resources(&front_matter.triggers)?; - let has_schedule = front_matter.schedule.is_some(); - let pr_trigger = generate_pr_trigger(&front_matter.triggers, has_schedule); - let ci_trigger = generate_ci_trigger(&front_matter.triggers, has_schedule); - let source_path = generate_source_path(input_path); - let pipeline_path = generate_pipeline_path(output_path); - - // Pool - for 1ES we need both name and os - let pool = front_matter - .pool - .as_ref() - .map(|p| p.name().to_string()) - .unwrap_or_else(|| DEFAULT_POOL.to_string()); - - // Generate 1ES-specific content - let agent_context_root = generate_agent_context_root(&effective_workspace); - let mcp_configuration = generate_mcp_configuration(&front_matter.mcp_servers); - let prepare_steps = generate_inline_steps(&front_matter.steps); - - // Default finalize step to avoid empty stepList - let default_finalize_step = serde_yaml::from_str::( - r#"bash: echo "Agent task completed" -displayName: "Finalize""#, - ) - .expect("default finalize step should be valid YAML"); - let finalize_steps = if front_matter.post_steps.is_empty() { - generate_inline_steps(&[default_finalize_step]) - } else { - generate_inline_steps(&front_matter.post_steps) - }; + let mcpg_config = generate_mcpg_config(front_matter, &ctx, &extensions)?; + let mcpg_config_json = serde_json::to_string_pretty(&mcpg_config) + .context("Failed to serialize MCPG config")?; + let mcpg_docker_env = generate_mcpg_docker_env(front_matter); + // Generate 1ES-specific setup/teardown jobs (no per-job pool, uses templateContext) + // Pre-replace these in the template before compile_shared, which would otherwise + // use the standalone versions (with pool: per job). let setup_job = generate_setup_job(&front_matter.setup, &front_matter.name); let teardown_job = generate_teardown_job(&front_matter.teardown, &front_matter.name); - let agentic_depends_on = if !front_matter.setup.is_empty() { - "dependsOn: SetupJob".to_string() - } else { - String::new() - }; - let job_timeout = generate_job_timeout(front_matter); - - // Load threat analysis prompt template - let threat_analysis_prompt = include_str!("../../templates/threat-analysis.md"); + let mut template = include_str!("../../templates/1es-base.yml").to_string(); + template = replace_with_indent(&template, "{{ setup_job }}", &setup_job); + template = replace_with_indent(&template, "{{ teardown_job }}", &teardown_job); - // Insert threat analysis prompt first - let template = replace_with_indent( + let config = CompileConfig { template, - "{{ threat_analysis_prompt }}", - threat_analysis_prompt, - ); - - // Generate service connection token acquisition steps and env vars - let acquire_read_token = generate_acquire_ado_token( - front_matter - .permissions - .as_ref() - .and_then(|p| p.read.as_deref()), - "SC_READ_TOKEN", - ); - let copilot_ado_env = generate_copilot_ado_env( - front_matter - .permissions - .as_ref() - .and_then(|p| p.read.as_deref()), - ); - let acquire_write_token = generate_acquire_ado_token( - front_matter - .permissions - .as_ref() - .and_then(|p| p.write.as_deref()), - "SC_WRITE_TOKEN", - ); - let executor_ado_env = generate_executor_ado_env( - front_matter - .permissions - .as_ref() - .and_then(|p| p.write.as_deref()), - ); - - // Validate that write-requiring safe-outputs have a write service connection - validate_write_permissions(front_matter)?; - // Validate comment-on-work-item has required target field - validate_comment_target(front_matter)?; - // Validate update-work-item has required target field - validate_update_work_item_target(front_matter)?; - // Validate submit-pr-review has required allowed-events field - validate_submit_pr_review_events(front_matter)?; - // Validate update-pr vote operation has required allowed-votes field - validate_update_pr_votes(front_matter)?; - // Validate resolve-pr-review-thread has required allowed-statuses field - validate_resolve_pr_thread_statuses(front_matter)?; - - // NOTE: 1ES target does not support --enabled-tools filtering (safe-outputs - // tool filtering). 1ES uses service connections for MCP servers rather than - // mcp-http, so generate_enabled_tools_args is not called here. If safe-outputs - // filtering is needed for 1ES, it would require changes to the 1ES pipeline - // template and agency job configuration. - - // Replace all template markers - let compiler_version = env!("CARGO_PKG_VERSION"); - let replacements: Vec<(&str, &str)> = vec![ - ("{{ parameters }}", ¶meters_yaml), - ("{{ compiler_version }}", compiler_version), - // No-op for 1ES (template doesn't use AWF), but included for forward-compatibility - ("{{ firewall_version }}", AWF_VERSION), - ("{{ copilot_version }}", COPILOT_CLI_VERSION), - ("{{ pool }}", &pool), - ("{{ schedule }}", &schedule), - ("{{ pr_trigger }}", &pr_trigger), - ("{{ ci_trigger }}", &ci_trigger), - ("{{ repositories }}", &repositories), - ("{{ pipeline_resources }}", &pipeline_resources), - ("{{ checkout_self }}", &checkout_self), - ("{{ checkout_repositories }}", &checkout_steps), - ("{{ agent_name }}", &front_matter.name), - ("{{ agent_description }}", &front_matter.description), - ("{{ agent_context_root }}", &agent_context_root), - ("{{ agent_content }}", markdown_body), - ("{{ prepare_steps }}", &prepare_steps), - ("{{ finalize_steps }}", &finalize_steps), - ("{{ global_options }}", ""), - ("{{ log_level }}", ""), - ("{{ mcp_configuration }}", &mcp_configuration), - ("{{ agentic_depends_on }}", &agentic_depends_on), - ("{{ job_timeout }}", &job_timeout), - ("{{ setup_job }}", &setup_job), - ("{{ teardown_job }}", &teardown_job), - ("{{ source_path }}", &source_path), - ("{{ pipeline_path }}", &pipeline_path), - ("{{ working_directory }}", &working_directory), - ("{{ workspace }}", &working_directory), - ("{{ copilot_params }}", &copilot_params), - ("{{ acquire_ado_token }}", &acquire_read_token), - ("{{ copilot_ado_env }}", &copilot_ado_env), - ("{{ acquire_write_token }}", &acquire_write_token), - ("{{ executor_ado_env }}", &executor_ado_env), - ]; - - let pipeline_yaml = replacements - .into_iter() - .fold(template, |yaml, (placeholder, replacement)| { - replace_with_indent(&yaml, placeholder, replacement) - }); - - // Warn about custom MCP limitations - if front_matter - .mcp_servers - .iter() - .any(|(_, c)| is_custom_mcp(c)) - { - eprintln!( - "Warning: Custom MCP servers (with container: or url:) are not supported in 1ES target. \ - They will be ignored. Use standalone target for full MCP support." - ); - } - - // Prepend header comment for pipeline detection - let header = generate_header_comment(input_path); - let pipeline_yaml = format!("{}{}", header, pipeline_yaml); + extra_replacements: vec![ + ("{{ firewall_version }}".into(), AWF_VERSION.into()), + ("{{ mcpg_version }}".into(), MCPG_VERSION.into()), + ("{{ mcpg_image }}".into(), MCPG_IMAGE.into()), + ("{{ allowed_domains }}".into(), allowed_domains), + ("{{ enabled_tools_args }}".into(), enabled_tools_args), + ("{{ cancel_previous_builds }}".into(), cancel_previous_builds), + ("{{ mcpg_config }}".into(), mcpg_config_json), + ("{{ mcpg_docker_env }}".into(), mcpg_docker_env), + ], + }; - Ok(pipeline_yaml) + compile_shared(input_path, output_path, front_matter, markdown_body, &extensions, config).await } } // ==================== 1ES-specific helpers ==================== -/// Generate agent context root for 1ES templates -fn generate_agent_context_root(effective_workspace: &str) -> String { - match effective_workspace { - "repo" => "$(Build.Repository.Name)".to_string(), - "root" | _ => ".".to_string(), - } -} - -/// Generate MCP configuration for 1ES templates. -/// -/// In 1ES, MCPs require service connections. Only MCPs with explicit -/// `service_connection` configuration or custom commands are included. -fn generate_mcp_configuration(mcps: &HashMap) -> String { - let mut mcp_entries: Vec<_> = mcps - .iter() - .filter_map(|(name, config)| { - let (is_enabled, opts) = match config { - McpConfig::Enabled(enabled) => (*enabled, None), - McpConfig::WithOptions(o) => (o.enabled.unwrap_or(true), Some(o)), - }; - - if !is_enabled { - return None; - } - - // Custom MCPs with container/url: not supported in 1ES (needs service connection) - if is_custom_mcp(config) { - log::warn!( - "MCP '{}' uses custom container/url — not supported in 1ES target (requires service connection)", - name - ); - return None; - } - - // Use explicit service connection or generate default. - // Warn when falling back to the naming convention — the generated - // service connection reference may not exist in the ADO project. - let service_connection = opts - .and_then(|o| o.service_connection.clone()) - .unwrap_or_else(|| { - let default = format!("mcp-{}-service-connection", name); - log::warn!( - "MCP '{}' has no explicit service connection in 1ES target — \ - assuming '{}' exists", - name, - default, - ); - default - }); - - Some((name.clone(), service_connection)) - }) - .collect(); - - if mcp_entries.is_empty() { - return "{}".to_string(); - } - - // Sort for deterministic output - mcp_entries.sort_by(|a, b| a.0.cmp(&b.0)); - - mcp_entries - .iter() - .map(|(name, sc)| format!("{}:\n serviceConnection: {}", name, sc)) - .collect::>() - .join("\n") -} - -/// Generate inline steps YAML (for adding to existing step list) -/// Returns empty string when no steps (blank lines are valid in YAML) -fn generate_inline_steps(steps: &[serde_yaml::Value]) -> String { - if steps.is_empty() { - return String::new(); - } - - common::format_steps_yaml_indented(steps, 0) -} - -/// Generate setup job for 1ES template +/// Generate setup job for 1ES template. +/// Unlike standalone, 1ES jobs don't have per-job `pool:` — the pool is at +/// the top-level `parameters.pool`. Jobs use `templateContext: type: buildJob`. fn generate_setup_job(setup_steps: &[serde_yaml::Value], agent_name: &str) -> String { if setup_steps.is_empty() { return String::new(); } - let steps_yaml: Vec<_> = setup_steps - .iter() - .filter_map(|step| { - serde_yaml::to_string(step).ok().map(|yaml| { - yaml.trim() - .lines() - .enumerate() - .map(|(i, line)| { - if i == 0 { - format!("- {}", line.trim_start_matches("---").trim()) - } else { - format!(" {}", line) - } - }) - .collect::>() - .join("\n") - }) - }) - .collect(); + let steps_yaml = format_steps_yaml_indented(setup_steps, 4); format!( r#"- job: SetupJob displayName: "{} - Setup" templateContext: type: buildJob - steps: - - checkout: self - {}"#, - agent_name, - steps_yaml.join("\n ") + steps: + - checkout: self +{} +"#, + agent_name, steps_yaml ) } -/// Generate teardown job for 1ES template +/// Generate teardown job for 1ES template. +/// Unlike standalone, 1ES jobs don't have per-job `pool:`. fn generate_teardown_job(teardown_steps: &[serde_yaml::Value], agent_name: &str) -> String { if teardown_steps.is_empty() { return String::new(); } - let steps_yaml: Vec<_> = teardown_steps - .iter() - .filter_map(|step| { - serde_yaml::to_string(step).ok().map(|yaml| { - yaml.trim() - .lines() - .enumerate() - .map(|(i, line)| { - if i == 0 { - format!("- {}", line.trim_start_matches("---").trim()) - } else { - format!(" {}", line) - } - }) - .collect::>() - .join("\n") - }) - }) - .collect(); + let steps_yaml = format_steps_yaml_indented(teardown_steps, 4); format!( r#"- job: TeardownJob @@ -407,121 +123,17 @@ fn generate_teardown_job(teardown_steps: &[serde_yaml::Value], agent_name: &str) dependsOn: ProcessSafeOutputs templateContext: type: buildJob - steps: - - checkout: self - {}"#, - agent_name, - steps_yaml.join("\n ") + steps: + - checkout: self +{} +"#, + agent_name, steps_yaml ) } #[cfg(test)] mod tests { use super::*; - use super::super::types::McpOptions; - - // ─── generate_agent_context_root ───────────────────────────────────────── - - #[test] - fn test_generate_agent_context_root_repo() { - assert_eq!( - generate_agent_context_root("repo"), - "$(Build.Repository.Name)" - ); - } - - #[test] - fn test_generate_agent_context_root_root() { - assert_eq!(generate_agent_context_root("root"), "."); - } - - #[test] - fn test_generate_agent_context_root_unknown_defaults_to_dot() { - // Any unrecognised workspace value should fall through to "." - assert_eq!(generate_agent_context_root("something-else"), "."); - } - - // ─── generate_mcp_configuration ────────────────────────────────────────── - - #[test] - fn test_generate_mcp_configuration_empty_returns_braces() { - let mcps = HashMap::new(); - let result = generate_mcp_configuration(&mcps); - assert_eq!(result, "{}"); - } - - #[test] - fn test_generate_mcp_configuration_skips_custom_mcp_with_command() { - let mut mcps = HashMap::new(); - mcps.insert( - "my-tool".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("node:20-slim".to_string()), - ..Default::default() - }), - ); - let result = generate_mcp_configuration(&mcps); - // Custom MCPs with `command:` are not supported in 1ES — must be excluded - assert!( - !result.contains("my-tool"), - "Custom MCP with command should be excluded in 1ES target" - ); - assert_eq!(result, "{}", "Only custom MCPs → empty config"); - } - - #[test] - fn test_generate_mcp_configuration_service_connection_mcp() { - let mut mcps = HashMap::new(); - mcps.insert( - "my-mcp".to_string(), - McpConfig::WithOptions(McpOptions { - service_connection: Some("mcp-my-mcp-sc".to_string()), - ..Default::default() - }), - ); - let result = generate_mcp_configuration(&mcps); - assert!(result.contains("my-mcp"), "Service-connection MCP should appear in output"); - assert!( - result.contains("serviceConnection: mcp-my-mcp-sc"), - "Should reference the explicit service connection" - ); - } - - #[test] - fn test_generate_mcp_configuration_default_service_connection_naming() { - // When no explicit service_connection is set, a default name is generated. - let mut mcps = HashMap::new(); - mcps.insert("my-tool".to_string(), McpConfig::Enabled(true)); - let result = generate_mcp_configuration(&mcps); - assert!(result.contains("my-tool")); - assert!(result.contains("serviceConnection: mcp-my-tool-service-connection")); - } - - #[test] - fn test_generate_mcp_configuration_disabled_mcp_excluded() { - let mut mcps = HashMap::new(); - mcps.insert("disabled-mcp".to_string(), McpConfig::Enabled(false)); - let result = generate_mcp_configuration(&mcps); - assert!(!result.contains("disabled-mcp"), "Disabled MCP should not appear in output"); - assert_eq!(result, "{}"); - } - - // ─── generate_inline_steps ──────────────────────────────────────────────── - - #[test] - fn test_generate_inline_steps_empty() { - let result = generate_inline_steps(&[]); - assert!(result.is_empty(), "Empty steps list should return empty string"); - } - - #[test] - fn test_generate_inline_steps_single_step() { - let step: serde_yaml::Value = - serde_yaml::from_str("bash: echo hello").expect("valid yaml"); - let result = generate_inline_steps(&[step]); - assert!(result.contains("bash"), "Step YAML should contain the bash key"); - assert!(result.contains("echo hello"), "Step YAML should contain the command"); - } // ─── generate_setup_job ────────────────────────────────────────────────── @@ -543,6 +155,9 @@ mod tests { ); assert!(result.contains("checkout: self"), "Should include self checkout"); assert!(result.contains("echo setup"), "Should include the step content"); + assert!(result.contains("templateContext"), "Should include templateContext"); + assert!(result.contains("type: buildJob"), "Should use buildJob type"); + assert!(!result.contains("pool:"), "Should not include per-job pool"); } // ─── generate_teardown_job ─────────────────────────────────────────────── @@ -569,5 +184,7 @@ mod tests { ); assert!(result.contains("checkout: self"), "Should include self checkout"); assert!(result.contains("echo teardown"), "Should include the step content"); + assert!(result.contains("templateContext"), "Should include templateContext"); + assert!(!result.contains("pool:"), "Should not include per-job pool"); } } \ No newline at end of file diff --git a/src/compile/standalone.rs b/src/compile/standalone.rs index d146b9d1..a3d0d94c 100644 --- a/src/compile/standalone.rs +++ b/src/compile/standalone.rs @@ -15,15 +15,12 @@ use super::Compiler; use super::common::{ AWF_VERSION, MCPG_VERSION, MCPG_IMAGE, CompileConfig, compile_shared, + generate_allowed_domains, generate_cancel_previous_builds, generate_enabled_tools_args, generate_mcpg_config, generate_mcpg_docker_env, }; -use super::extensions::CompilerExtension; -use super::types::{FrontMatter, McpConfig}; -use crate::allowed_hosts::{CORE_ALLOWED_HOSTS, mcp_required_hosts}; -use crate::ecosystem_domains::{get_ecosystem_domains, is_ecosystem_identifier, is_known_ecosystem}; -use std::collections::HashSet; +use super::types::FrontMatter; /// Standalone pipeline compiler. pub struct StandaloneCompiler; @@ -78,147 +75,6 @@ impl Compiler for StandaloneCompiler { } } -// ==================== Standalone-specific helpers ==================== - -/// Generate the allowed domains list for AWF network isolation. -/// -/// This generates a comma-separated list of domain patterns for AWF's -/// `--allow-domains` flag. The list includes: -/// 1. Core Azure DevOps/GitHub endpoints -/// 2. MCP-specific endpoints for each enabled MCP -/// 3. User-specified additional hosts from network.allowed -fn generate_allowed_domains( - front_matter: &FrontMatter, - extensions: &[super::extensions::Extension], -) -> Result { - // Collect enabled MCP names (user-defined MCPs, not first-party tools) - let enabled_mcps: Vec = front_matter - .mcp_servers - .iter() - .filter_map(|(name, config)| { - let is_enabled = match config { - McpConfig::Enabled(enabled) => *enabled, - McpConfig::WithOptions(_) => true, - }; - if is_enabled { Some(name.clone()) } else { None } - }) - .collect(); - - // Get user-specified hosts - let user_hosts: Vec = front_matter - .network - .as_ref() - .map(|n| n.allowed.clone()) - .unwrap_or_default(); - - // Generate the allowlist by combining core + MCP + extension + user hosts - let mut hosts: HashSet = HashSet::new(); - - // Add core hosts - for host in CORE_ALLOWED_HOSTS { - hosts.insert((*host).to_string()); - } - - // Add host.docker.internal — required for the AWF container to reach - // MCPG and SafeOutputs on the host. Only added for standalone pipelines - // that always use MCPG. - hosts.insert("host.docker.internal".to_string()); - - // Add MCP-specific hosts (user-defined MCPs via mcp_required_hosts lookup) - for mcp in &enabled_mcps { - for host in mcp_required_hosts(mcp) { - hosts.insert((*host).to_string()); - } - } - - // Add extension-declared hosts (runtimes + first-party tools). - // Extensions may return ecosystem identifiers (e.g., "lean") which are - // expanded to their domain lists, or raw domain names. - for ext in extensions { - for host in ext.required_hosts() { - if is_ecosystem_identifier(&host) { - let domains = get_ecosystem_domains(&host); - if domains.is_empty() { - eprintln!( - "warning: extension '{}' requires unknown ecosystem '{}'; \ - no domains added", - ext.name(), - host - ); - } - for domain in domains { - hosts.insert(domain); - } - } else { - hosts.insert(host); - } - } - } - - // Add user-specified hosts (validated against DNS-safe characters) - // Entries may be ecosystem identifiers (e.g., "python", "rust") which - // expand to their domain lists, or raw domain names. - for host in &user_hosts { - if is_ecosystem_identifier(host) { - let domains = get_ecosystem_domains(host); - if domains.is_empty() && !is_known_ecosystem(host) { - eprintln!( - "warning: network.allowed contains unknown ecosystem identifier '{}'. \ - Known ecosystems: python, rust, node, go, java, etc. \ - If this is a domain name, it should contain a dot.", - host - ); - } - for domain in domains { - hosts.insert(domain); - } - } else { - let valid_chars = !host.is_empty() - && host - .chars() - .all(|c| c.is_ascii_alphanumeric() || matches!(c, '.' | '-' | '*')); - if !valid_chars { - anyhow::bail!( - "network.allowed domain '{}' contains characters invalid in DNS names. \ - Only ASCII alphanumerics, '.', '-', and '*' are allowed.", - host - ); - } - if host.contains('*') && !(host.starts_with("*.") && !host[2..].contains('*')) { - anyhow::bail!( - "network.allowed domain '{}' uses '*' in an unsupported position. \ - Wildcards must appear only as a leading prefix (e.g. '*.example.com').", - host - ); - } - hosts.insert(host.clone()); - } - } - - // Remove blocked hosts (supports both ecosystem identifiers and raw domains) - let blocked_hosts: Vec = front_matter - .network - .as_ref() - .map(|n| n.blocked.clone()) - .unwrap_or_default(); - for blocked in &blocked_hosts { - if is_ecosystem_identifier(blocked) { - for domain in get_ecosystem_domains(blocked) { - hosts.remove(&domain); - } - } else { - hosts.remove(blocked); - } - } - - // Sort for deterministic output - let mut allowlist: Vec = hosts.into_iter().collect(); - allowlist.sort(); - - // Format as comma-separated list for AWF --allow-domains - Ok(allowlist.join(",")) -} - #[cfg(test)] mod tests { use super::*; diff --git a/templates/1es-base.yml b/templates/1es-base.yml index fe49a8c5..ddf302b4 100644 --- a/templates/1es-base.yml +++ b/templates/1es-base.yml @@ -1,6 +1,6 @@ # 1ES Pipeline Template for Agentic Pipelines -# This template extends the 1ES Unofficial Pipeline Template and uses the Agency job type -# for the main agent task, while adding custom jobs for safe output analysis and processing. +# This template extends the 1ES Unofficial Pipeline Template with Copilot CLI, +# AWF network isolation, and MCP Gateway — matching the standalone pipeline model. name: {{ agent_name }}-$(BuildID) {{ parameters }} @@ -31,7 +31,7 @@ extends: name: AZS-1ES-W-MMS2022 os: windows featureFlags: - disableNetworkIsolation: true # Agency requires network access for AI services + disableNetworkIsolation: true # AWF handles network isolation at application layer runPrerequisitesOnImage: false # Pool image has 1ES prerequisites preinstalled stages: - stage: AgentStage @@ -39,104 +39,376 @@ extends: jobs: {{ setup_job }} - # Main agentic task using the 1ES Agency job type - job: PerformAgenticTask - displayName: "{{ agent_name }} (Agent)" + displayName: "{{ agent_name }} (Agent Automations)" {{ agentic_depends_on }} {{ job_timeout }} templateContext: - type: agencyJob - arguments: - agentContextRoot: {{ agent_context_root }} - skipSourceSync: false - preAgentSteps: - - {{ checkout_repositories }} - - {{ prepare_steps }} - - - bash: | - COMPILER_VERSION="{{ compiler_version }}" - DOWNLOAD_DIR="$(Pipeline.Workspace)/agentic-pipeline-compiler" - DOWNLOAD_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/ado-aw-linux-x64" - CHECKSUM_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/checksums.txt" - - mkdir -p "$DOWNLOAD_DIR" - echo "Downloading ado-aw v${COMPILER_VERSION} from GitHub Releases..." - curl -fsSL -o "$DOWNLOAD_DIR/ado-aw-linux-x64" "$DOWNLOAD_URL" - curl -fsSL -o "$DOWNLOAD_DIR/checksums.txt" "$CHECKSUM_URL" - - echo "Verifying checksum..." - cd "$DOWNLOAD_DIR" - grep "ado-aw-linux-x64" checksums.txt | sha256sum -c - - mv ado-aw-linux-x64 ado-aw - chmod +x ado-aw - displayName: "Download agentic pipeline compiler (v{{ compiler_version }})" - - - bash: | - AGENTIC_PIPELINES_PATH="$(Pipeline.Workspace)/agentic-pipeline-compiler/ado-aw" - chmod +x "$AGENTIC_PIPELINES_PATH" - $AGENTIC_PIPELINES_PATH check "{{ pipeline_path }}" - displayName: "Verify pipeline integrity" - - - bash: | - mkdir -p "$HOME/.copilot" - mkdir -p "$(Agent.TempDirectory)/staging" - - AGENTIC_PIPELINES_PATH="$(Pipeline.Workspace)/agentic-pipeline-compiler/ado-aw" - chmod +x "$AGENTIC_PIPELINES_PATH" - - # Generate MCP config for safeoutputs - cat > "$HOME/.copilot/mcp-config.json" << EOF - { - "mcpServers": { - "safeoutputs": { - "type": "stdio", - "tools": ["*"], - "command": "$AGENTIC_PIPELINES_PATH", - "args": ["mcp", "$(Agent.TempDirectory)/staging", "{{ working_directory }}"] + type: buildJob + outputs: + - output: pipelineArtifact + path: $(Agent.TempDirectory)/staging + artifact: agent_outputs_$(Build.BuildId) + condition: always() + steps: + {{ checkout_self }} + {{ checkout_repositories }} + + {{ acquire_ado_token }} + + {{ cancel_previous_builds }} + + - task: NuGetAuthenticate@1 + displayName: "Authenticate NuGet Feed" + + - task: NuGetCommand@2 + displayName: "Install Copilot CLI" + inputs: + command: 'custom' + arguments: 'install Microsoft.Copilot.CLI.linux-x64 -Source "https://pkgs.dev.azure.com/msazuresphere/_packaging/Guardian1ESPTUpstreamOrgFeed/nuget/v3/index.json" -Version {{ copilot_version }} -OutputDirectory $(Agent.TempDirectory)/tools -ExcludeVersion -NonInteractive' + + - bash: | + ls -la "$(Agent.TempDirectory)/tools" + echo "##vso[task.prependpath]$(Agent.TempDirectory)/tools/Microsoft.Copilot.CLI.linux-x64" + + # Copy copilot binary to /tmp so it's accessible inside AWF container + # (AWF auto-mounts /tmp:/tmp:rw but not Agent.TempDirectory) + mkdir -p /tmp/awf-tools + cp "$(Agent.TempDirectory)/tools/Microsoft.Copilot.CLI.linux-x64/copilot" /tmp/awf-tools/copilot + chmod +x /tmp/awf-tools/copilot + displayName: "Add copilot to PATH" + + - bash: | + copilot --version + copilot -h + displayName: "Output copilot version" + + - bash: | + COMPILER_VERSION="{{ compiler_version }}" + DOWNLOAD_DIR="$(Pipeline.Workspace)/agentic-pipeline-compiler" + DOWNLOAD_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/ado-aw-linux-x64" + CHECKSUM_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/checksums.txt" + + mkdir -p "$DOWNLOAD_DIR" + echo "Downloading ado-aw v${COMPILER_VERSION} from GitHub Releases..." + curl -fsSL -o "$DOWNLOAD_DIR/ado-aw-linux-x64" "$DOWNLOAD_URL" + curl -fsSL -o "$DOWNLOAD_DIR/checksums.txt" "$CHECKSUM_URL" + + echo "Verifying checksum..." + cd "$DOWNLOAD_DIR" + grep "ado-aw-linux-x64" checksums.txt | sha256sum -c - + mv ado-aw-linux-x64 ado-aw + chmod +x ado-aw + displayName: "Download agentic pipeline compiler (v{{ compiler_version }})" + + - bash: | + AGENTIC_PIPELINES_PATH="$(Pipeline.Workspace)/agentic-pipeline-compiler/ado-aw" + chmod +x "$AGENTIC_PIPELINES_PATH" + $AGENTIC_PIPELINES_PATH check "{{ pipeline_path }}" + displayName: "Verify pipeline integrity" + + - bash: | + mkdir -p "$(Agent.TempDirectory)/staging" + + # Generate MCPG API key early so it's available as an ADO secret variable + # for both the MCPG config and the agent's mcp-config.json + MCP_GATEWAY_API_KEY=$(openssl rand -base64 45 | tr -d '/+=') + echo "##vso[task.setvariable variable=MCP_GATEWAY_API_KEY;issecret=true]$MCP_GATEWAY_API_KEY" + + # Write MCPG (MCP Gateway) configuration to a file + cat > "$(Agent.TempDirectory)/staging/mcpg-config.json" << 'MCPG_CONFIG_EOF' + {{ mcpg_config }} + MCPG_CONFIG_EOF + + echo "MCPG config:" + cat "$(Agent.TempDirectory)/staging/mcpg-config.json" + + # Validate JSON + python3 -m json.tool "$(Agent.TempDirectory)/staging/mcpg-config.json" > /dev/null && echo "JSON is valid" + displayName: "Prepare MCPG config" + + - bash: | + mkdir -p "$HOME/.copilot" + mkdir -p /tmp/awf-tools/staging + + echo "HOME: $HOME" + + # Use absolute path since MCP subprocess may not inherit PATH + AGENTIC_PIPELINES_PATH="$(Pipeline.Workspace)/agentic-pipeline-compiler/ado-aw" + + # Verify the binary exists and is executable + ls -la "$AGENTIC_PIPELINES_PATH" + chmod +x "$AGENTIC_PIPELINES_PATH" + + $AGENTIC_PIPELINES_PATH -h + + # Copy compiler binary to /tmp so it's accessible inside AWF container + cp "$AGENTIC_PIPELINES_PATH" /tmp/awf-tools/ado-aw + chmod +x /tmp/awf-tools/ado-aw + + # Copy MCPG config to /tmp + cp "$(Agent.TempDirectory)/staging/mcpg-config.json" /tmp/awf-tools/staging/mcpg-config.json + + # Generate MCP config for copilot CLI pointing to MCPG gateway on host. + # The agent inside AWF reaches MCPG via host.docker.internal. + # MCPG enforces client auth via the gateway API key. + cat > /tmp/awf-tools/mcp-config.json << EOF + { + "mcpServers": { + "mcpg": { + "type": "http", + "url": "http://host.docker.internal:80/mcp", + "headers": { + "Authorization": "Bearer $(MCP_GATEWAY_API_KEY)" } } } - EOF - - echo "Generated MCP config:" - cat "$HOME/.copilot/mcp-config.json" - python3 -m json.tool "$HOME/.copilot/mcp-config.json" > /dev/null && echo "JSON is valid" - displayName: "Configure safeoutputs MCP" - - - bash: | - # Write agent instructions to a prompt file - cat > "$(Agent.TempDirectory)/agent-prompt.md" << 'AGENT_PROMPT_EOF' - {{ agent_content }} - AGENT_PROMPT_EOF - - # Append safeoutputs MCP guidance - cat >> "$(Agent.TempDirectory)/agent-prompt.md" << 'SAFEOUTPUTS_EOF' - - --- - - ## Important: Safe Outputs + } + EOF + + # Also write to $HOME/.copilot for host-side use + cp /tmp/awf-tools/mcp-config.json "$HOME/.copilot/mcp-config.json" + + echo "Generated MCP config at: /tmp/awf-tools/mcp-config.json" + cat /tmp/awf-tools/mcp-config.json + + # Validate JSON + python3 -m json.tool /tmp/awf-tools/mcp-config.json > /dev/null && echo "JSON is valid" + displayName: "Generate MCP configs" + + - bash: | + # Write agent instructions to /tmp so it's accessible inside AWF container + cat > "/tmp/awf-tools/agent-prompt.md" << 'AGENT_PROMPT_EOF' + {{ agent_content }} + AGENT_PROMPT_EOF + + # Append safeoutputs MCP guidance + cat >> "/tmp/awf-tools/agent-prompt.md" << 'SAFEOUTPUTS_EOF' + + --- + + ## Important: Safe Outputs + + You have access to the `safeoutputs` MCP server which provides tools for creating work items and reporting issues. **Always prefer using safeoutputs tools over other methods**. + + These tools generate safe outputs that will be reviewed and executed in a separate pipeline stage, ensuring proper validation and security controls. + SAFEOUTPUTS_EOF + + echo "Agent prompt:" + cat "/tmp/awf-tools/agent-prompt.md" + displayName: "Prepare agent prompt" + + - task: DockerInstaller@0 + displayName: "Install Docker" + inputs: + dockerVersion: 26.1.4 + + - bash: | + AWF_VERSION="{{ firewall_version }}" + DOWNLOAD_DIR="$(Pipeline.Workspace)/awf" + DOWNLOAD_URL="https://github.com/github/gh-aw-firewall/releases/download/v${AWF_VERSION}/awf-linux-x64" + CHECKSUM_URL="https://github.com/github/gh-aw-firewall/releases/download/v${AWF_VERSION}/checksums.txt" + + mkdir -p "$DOWNLOAD_DIR" + echo "Downloading AWF v${AWF_VERSION} from GitHub Releases..." + curl -fsSL -o "$DOWNLOAD_DIR/awf-linux-x64" "$DOWNLOAD_URL" + curl -fsSL -o "$DOWNLOAD_DIR/checksums.txt" "$CHECKSUM_URL" + + echo "Verifying checksum..." + cd "$DOWNLOAD_DIR" + grep "awf-linux-x64" checksums.txt | sha256sum -c - + mv awf-linux-x64 awf + chmod +x awf + echo "##vso[task.prependpath]$(Pipeline.Workspace)/awf" + ./awf --version || echo "AWF binary ready" + displayName: "Download AWF (Agentic Workflow Firewall) v{{ firewall_version }}" + + - bash: | + docker pull ghcr.io/github/gh-aw-firewall/squid:{{ firewall_version }} + docker pull ghcr.io/github/gh-aw-firewall/agent:{{ firewall_version }} + docker tag ghcr.io/github/gh-aw-firewall/squid:{{ firewall_version }} ghcr.io/github/gh-aw-firewall/squid:latest + docker tag ghcr.io/github/gh-aw-firewall/agent:{{ firewall_version }} ghcr.io/github/gh-aw-firewall/agent:latest + docker pull {{ mcpg_image }}:v{{ mcpg_version }} + displayName: "Pre-pull AWF and MCPG container images (v{{ firewall_version }})" + + {{ prepare_steps }} + + # Start SafeOutputs HTTP server on host (MCPG proxies to it) + - bash: | + SAFE_OUTPUTS_PORT=8100 + SAFE_OUTPUTS_API_KEY=$(openssl rand -base64 45 | tr -d '/+=') + echo "##vso[task.setvariable variable=SAFE_OUTPUTS_PORT]$SAFE_OUTPUTS_PORT" + echo "##vso[task.setvariable variable=SAFE_OUTPUTS_API_KEY;issecret=true]$SAFE_OUTPUTS_API_KEY" + + mkdir -p "$(Agent.TempDirectory)/staging/logs" + + # Start SafeOutputs as HTTP server in the background + # NOTE: {{ enabled_tools_args }} expands to either "" or "--enabled-tools X ... " + # (with trailing space). The value MUST be newline-free; is_safe_tool_name enforces this. + # Positional args (output_directory, bounding_directory) MUST come after all named + # options — clap parses them positionally and reordering would break the command. + nohup /tmp/awf-tools/ado-aw mcp-http \ + --port "$SAFE_OUTPUTS_PORT" \ + --api-key "$SAFE_OUTPUTS_API_KEY" \ + {{ enabled_tools_args }}"/tmp/awf-tools/staging" \ + "{{ working_directory }}" \ + > "$(Agent.TempDirectory)/staging/logs/safeoutputs.log" 2>&1 & + SAFE_OUTPUTS_PID=$! + echo "##vso[task.setvariable variable=SAFE_OUTPUTS_PID]$SAFE_OUTPUTS_PID" + echo "SafeOutputs HTTP server started on port $SAFE_OUTPUTS_PORT (PID: $SAFE_OUTPUTS_PID)" + + # Wait for server to be ready + READY=false + for i in $(seq 1 30); do + if curl -sf "http://localhost:$SAFE_OUTPUTS_PORT/health" > /dev/null 2>&1; then + echo "SafeOutputs HTTP server is ready" + READY=true + break + fi + sleep 1 + done + if [ "$READY" != "true" ]; then + echo "##vso[task.complete result=Failed]SafeOutputs HTTP server did not become ready within 30s" + exit 1 + fi + displayName: "Start SafeOutputs HTTP server" + + # Start MCP Gateway (MCPG) on host + - bash: | + # Substitute runtime values into MCPG config + MCPG_CONFIG=$(cat /tmp/awf-tools/staging/mcpg-config.json \ + | sed "s|\${SAFE_OUTPUTS_PORT}|$(SAFE_OUTPUTS_PORT)|g" \ + | sed "s|\${SAFE_OUTPUTS_API_KEY}|$(SAFE_OUTPUTS_API_KEY)|g" \ + | sed "s|\${MCP_GATEWAY_API_KEY}|$(MCP_GATEWAY_API_KEY)|g") + + # Log the template config (before API key substitution) for debugging. + echo "Starting MCPG with config template:" + cat /tmp/awf-tools/staging/mcpg-config.json | python3 -m json.tool + + # Remove any leftover container from a previous interrupted run + # (--rm only cleans up on clean exit; OOM/SIGKILL may leave it behind) + docker rm -f mcpg 2>/dev/null || true + + # Start MCPG Docker container on host network. + # The Docker socket mount is required because MCPG spawns stdio-based MCP + # servers as sibling containers. This grants significant host access — acceptable + # here because the pipeline agent is already trusted and network-isolated by AWF. + echo "$MCPG_CONFIG" | docker run -i --rm \ + --name mcpg \ + --network host \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -e MCP_GATEWAY_API_KEY="$(MCP_GATEWAY_API_KEY)" \ + {{ mcpg_docker_env }} + {{ mcpg_image }}:v{{ mcpg_version }} & + MCPG_PID=$! + echo "MCPG started (PID: $MCPG_PID)" + + # Wait for MCPG to be ready + READY=false + for i in $(seq 1 30); do + if curl -sf "http://localhost:80/health" > /dev/null 2>&1; then + echo "MCPG is ready" + READY=true + break + fi + sleep 1 + done + if [ "$READY" != "true" ]; then + echo "##vso[task.complete result=Failed]MCPG did not become ready within 30s" + exit 1 + fi + displayName: "Start MCP Gateway (MCPG)" + + # Network isolation via AWF (Agentic Workflow Firewall) + - bash: | + set -o pipefail + + AGENT_OUTPUT_FILE="$(Agent.TempDirectory)/staging/logs/agent-output.txt" + mkdir -p "$(Agent.TempDirectory)/staging/logs" + + echo "=== Running AI agent with AWF network isolation ===" + echo "Allowed domains: {{ allowed_domains }}" + + # AWF provides L7 domain whitelisting via Squid proxy + Docker containers. + # --enable-host-access allows the AWF container to reach host services + # (MCPG and SafeOutputs) via host.docker.internal. + # AWF auto-mounts /tmp:/tmp:rw into the container, so copilot binary, + # agent prompt, and MCP config are placed under /tmp/awf-tools/. + # Stream agent output in real-time while filtering VSO commands. + # sed -u = unbuffered (line-by-line) so output appears immediately. + # tee writes to both stdout (ADO pipeline log) and the artifact file. + # pipefail (set above) ensures AWF's exit code propagates through the pipe. + sudo -E "$(Pipeline.Workspace)/awf/awf" \ + --allow-domains "{{ allowed_domains }}" \ + --skip-pull \ + --env-all \ + --enable-host-access \ + --container-workdir "{{ working_directory }}" \ + --log-level info \ + --proxy-logs-dir "$(Agent.TempDirectory)/staging/logs/firewall" \ + -- '/tmp/awf-tools/copilot --prompt "$(cat /tmp/awf-tools/agent-prompt.md)" --additional-mcp-config @/tmp/awf-tools/mcp-config.json {{ copilot_params }}' \ + 2>&1 \ + | sed -u 's/##vso\[/[VSO-FILTERED] vso[/g; s/##\[/[VSO-FILTERED] [/g' \ + | tee "$AGENT_OUTPUT_FILE" \ + && AGENT_EXIT_CODE=0 || AGENT_EXIT_CODE=$? + + # Print firewall summary if available + if [ -x "$(Pipeline.Workspace)/awf/awf" ]; then + echo "=== Firewall Summary ===" + "$(Pipeline.Workspace)/awf/awf" logs summary --source "$(Agent.TempDirectory)/staging/logs/firewall" 2>/dev/null || true + fi - You have access to the `safeoutputs` MCP server which provides tools for creating work items and reporting issues. **Always prefer using safeoutputs tools over other methods**. + exit $AGENT_EXIT_CODE + displayName: "Run copilot (AWF network isolated)" + workingDirectory: {{ working_directory }} + env: + {{ copilot_ado_env }} + GITHUB_TOKEN: $(GITHUB_TOKEN) + GITHUB_READ_ONLY: 1 + COPILOT_OTEL_ENABLED: "true" + COPILOT_OTEL_EXPORTER_TYPE: "file" + COPILOT_OTEL_FILE_EXPORTER_PATH: "/tmp/awf-tools/staging/otel.jsonl" + + - bash: | + # Copy safe outputs from /tmp back to staging for artifact publish + mkdir -p "$(Agent.TempDirectory)/staging" + cp -r /tmp/awf-tools/staging/* "$(Agent.TempDirectory)/staging/" 2>/dev/null || true + echo "Safe outputs copied to $(Agent.TempDirectory)/staging" + ls -la "$(Agent.TempDirectory)/staging" 2>/dev/null || echo "No safe outputs found" + displayName: "Collect safe outputs from AWF container" + condition: always() + + - bash: | + # Stop MCPG container + echo "Stopping MCPG..." + docker stop mcpg 2>/dev/null || true + echo "MCPG stopped" + + # Stop SafeOutputs HTTP server + if [ -n "$(SAFE_OUTPUTS_PID)" ]; then + echo "Stopping SafeOutputs (PID: $(SAFE_OUTPUTS_PID))..." + kill "$(SAFE_OUTPUTS_PID)" 2>/dev/null || true + echo "SafeOutputs stopped" + fi + displayName: "Stop MCPG and SafeOutputs" + condition: always() - These tools generate safe outputs that will be reviewed and executed in a separate pipeline stage, ensuring proper validation and security controls. - SAFEOUTPUTS_EOF + {{ finalize_steps }} - echo "Agent prompt:" - cat "$(Agent.TempDirectory)/agent-prompt.md" - displayName: "Prepare agent prompt" - postAgentSteps: - {{ finalize_steps }} - globalOptions: '--log-dir $(Agency_LogPath) {{ global_options }}' - commandOptions: '{{ copilot_params }}' - logLevel: '{{ log_level }}' - logPath: '$(Build.StagingDirectory)/copilot-logs' - createArtifact: true - mcpConfiguration: - {{ mcp_configuration }} + - bash: | + # Copy all logs to output directory for artifact upload + mkdir -p "$(Agent.TempDirectory)/staging/logs" + if [ -d ~/.copilot/logs ]; then + cp -r ~/.copilot/logs/* "$(Agent.TempDirectory)/staging/logs/" 2>/dev/null || true + fi + if [ -d ~/.ado-aw/logs ]; then + cp -r ~/.ado-aw/logs/* "$(Agent.TempDirectory)/staging/logs/" 2>/dev/null || true + fi + echo "Logs copied to $(Agent.TempDirectory)/staging/logs" + ls -la "$(Agent.TempDirectory)/staging/logs" 2>/dev/null || echo "No logs found" + displayName: "Copy logs to output directory" + condition: always() - # Threat analysis job (custom - not using agencyJob) - job: AnalyzeSafeOutputs displayName: "Analyze safe outputs for threats" dependsOn: PerformAgenticTask @@ -146,155 +418,214 @@ extends: outputs: - output: pipelineArtifact path: $(Agent.TempDirectory)/analyzed_outputs - artifact: analyzed_outputs - steps: - {{ checkout_self }} - {{ checkout_repositories }} - - {{ acquire_ado_token }} - - - download: current - artifact: AgencyArtifact - - - task: NuGetAuthenticate@1 - displayName: "Authenticate NuGet Feed" - - - task: NuGetCommand@2 - displayName: "Install Copilot CLI" - inputs: - command: 'custom' - arguments: 'install Microsoft.Copilot.CLI.linux-x64 -Source "https://pkgs.dev.azure.com/msazuresphere/_packaging/Guardian1ESPTUpstreamOrgFeed/nuget/v3/index.json" -Version {{ copilot_version }} -OutputDirectory $(Agent.TempDirectory)/tools -ExcludeVersion -NonInteractive' - - - bash: | - ls -la "$(Agent.TempDirectory)/tools" - echo "##vso[task.prependpath]$(Agent.TempDirectory)/tools/Microsoft.Copilot.CLI.linux-x64" - displayName: Add copilot to PATH - - - bash: | - COMPILER_VERSION="{{ compiler_version }}" - DOWNLOAD_DIR="$(Pipeline.Workspace)/agentic-pipeline-compiler" - DOWNLOAD_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/ado-aw-linux-x64" - CHECKSUM_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/checksums.txt" - - mkdir -p "$DOWNLOAD_DIR" - echo "Downloading ado-aw v${COMPILER_VERSION} from GitHub Releases..." - curl -fsSL -o "$DOWNLOAD_DIR/ado-aw-linux-x64" "$DOWNLOAD_URL" - curl -fsSL -o "$DOWNLOAD_DIR/checksums.txt" "$CHECKSUM_URL" - - echo "Verifying checksum..." - cd "$DOWNLOAD_DIR" - grep "ado-aw-linux-x64" checksums.txt | sha256sum -c - - mv ado-aw-linux-x64 ado-aw - chmod +x ado-aw - displayName: "Download agentic pipeline compiler (v{{ compiler_version }})" - - - bash: | - mkdir -p {{ working_directory }}/safe_outputs - # Copy safe outputs from AgencyArtifact if they exist - if [ -f "$(Pipeline.Workspace)/AgencyArtifact/safe_outputs.ndjson" ]; then - cp "$(Pipeline.Workspace)/AgencyArtifact/safe_outputs.ndjson" {{ working_directory }}/safe_outputs/ - fi - displayName: "Prepare safe outputs for analysis" - - - bash: | - # Write threat analysis prompt to a file - cat > "$(Agent.TempDirectory)/threat-analysis-prompt.md" << 'THREAT_ANALYSIS_EOF' - {{ threat_analysis_prompt }} - THREAT_ANALYSIS_EOF - - echo "Threat analysis prompt:" - cat "$(Agent.TempDirectory)/threat-analysis-prompt.md" - displayName: "Prepare threat analysis prompt" - - - bash: | - AGENTIC_PIPELINES_PATH="$(Pipeline.Workspace)/agentic-pipeline-compiler/ado-aw" - chmod +x "$AGENTIC_PIPELINES_PATH" - - # Start the network proxy in background - $AGENTIC_PIPELINES_PATH proxy > /tmp/proxy_output.txt 2>&1 & - PROXY_PID=$! - echo $PROXY_PID > /tmp/proxy.pid - - sleep 1 - PROXY_PORT=$(head -n1 /tmp/proxy_output.txt) - - if [[ -z "$PROXY_PORT" || ! "$PROXY_PORT" =~ ^[0-9]+$ ]]; then - echo "##vso[task.logissue type=error]Failed to start network proxy" - exit 1 - fi - - echo "Network proxy started on port $PROXY_PORT (PID: $PROXY_PID)" - echo "##vso[task.setvariable variable=PROXY_PORT]$PROXY_PORT" - displayName: "Start network proxy" - - - bash: | - set -o pipefail - - THREAT_OUTPUT_FILE="$(Agent.TempDirectory)/threat-analysis-output.txt" - - # Stream threat analysis output in real-time with VSO command filtering - copilot --prompt "$(cat $(Agent.TempDirectory)/threat-analysis-prompt.md)" {{ copilot_params }} \ - 2>&1 \ - | sed -u 's/##vso\[/[VSO-FILTERED] vso[/g; s/##\[/[VSO-FILTERED] [/g' \ - | tee "$THREAT_OUTPUT_FILE" \ - && AGENT_EXIT_CODE=0 || AGENT_EXIT_CODE=$? - - exit $AGENT_EXIT_CODE - displayName: "Run threat analysis" - workingDirectory: {{ working_directory }} - env: - {{ copilot_ado_env }} - GITHUB_TOKEN: $(GITHUB_TOKEN) - GITHUB_READ_ONLY: 1 - HTTP_PROXY: "http://127.0.0.1:$(PROXY_PORT)" - HTTPS_PROXY: "http://127.0.0.1:$(PROXY_PORT)" - NO_PROXY: "localhost,127.0.0.1" - - - bash: | - if [ -f /tmp/proxy.pid ]; then - PROXY_PID=$(cat /tmp/proxy.pid) - kill $PROXY_PID 2>/dev/null || true - rm -f /tmp/proxy.pid - fi - displayName: "Stop network proxy" - condition: always() - - - bash: | - mkdir -p "$(Agent.TempDirectory)/analyzed_outputs" - cp -r "$(Pipeline.Workspace)/AgencyArtifact/"* "$(Agent.TempDirectory)/analyzed_outputs/" 2>/dev/null || true - - if [ -f "$(Agent.TempDirectory)/threat-analysis-output.txt" ]; then - cp "$(Agent.TempDirectory)/threat-analysis-output.txt" "$(Agent.TempDirectory)/analyzed_outputs/" - RESULT_LINE=$(grep "THREAT_DETECTION_RESULT:" "$(Agent.TempDirectory)/threat-analysis-output.txt" | tail -1) - if [ -n "$RESULT_LINE" ]; then - JSON_CONTENT=$(echo "$RESULT_LINE" | sed 's/.*THREAT_DETECTION_RESULT://') - echo "$JSON_CONTENT" > "$(Agent.TempDirectory)/analyzed_outputs/threat-analysis.json" + artifact: analyzed_outputs_$(Build.BuildId) + condition: always() + steps: + {{ checkout_self }} + {{ checkout_repositories }} + + - download: current + artifact: agent_outputs_$(Build.BuildId) + + - task: NuGetAuthenticate@1 + displayName: "Authenticate NuGet Feed" + + - task: NuGetCommand@2 + displayName: "Install Copilot CLI" + inputs: + command: 'custom' + arguments: 'install Microsoft.Copilot.CLI.linux-x64 -Source "https://pkgs.dev.azure.com/msazuresphere/_packaging/Guardian1ESPTUpstreamOrgFeed/nuget/v3/index.json" -Version {{ copilot_version }} -OutputDirectory $(Agent.TempDirectory)/tools -ExcludeVersion -NonInteractive' + + - bash: | + ls -la "$(Agent.TempDirectory)/tools" + echo "##vso[task.prependpath]$(Agent.TempDirectory)/tools/Microsoft.Copilot.CLI.linux-x64" + + # Copy copilot binary to /tmp so it's accessible inside AWF container + mkdir -p /tmp/awf-tools + cp "$(Agent.TempDirectory)/tools/Microsoft.Copilot.CLI.linux-x64/copilot" /tmp/awf-tools/copilot + chmod +x /tmp/awf-tools/copilot + displayName: "Add copilot to PATH" + + - bash: | + copilot --version + copilot -h + displayName: "Output copilot version" + + - bash: | + COMPILER_VERSION="{{ compiler_version }}" + DOWNLOAD_DIR="$(Pipeline.Workspace)/agentic-pipeline-compiler" + DOWNLOAD_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/ado-aw-linux-x64" + CHECKSUM_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/checksums.txt" + + mkdir -p "$DOWNLOAD_DIR" + echo "Downloading ado-aw v${COMPILER_VERSION} from GitHub Releases..." + curl -fsSL -o "$DOWNLOAD_DIR/ado-aw-linux-x64" "$DOWNLOAD_URL" + curl -fsSL -o "$DOWNLOAD_DIR/checksums.txt" "$CHECKSUM_URL" + + echo "Verifying checksum..." + cd "$DOWNLOAD_DIR" + grep "ado-aw-linux-x64" checksums.txt | sha256sum -c - + mv ado-aw-linux-x64 ado-aw + chmod +x ado-aw + displayName: "Download agentic pipeline compiler (v{{ compiler_version }})" + + - task: DockerInstaller@0 + displayName: "Install Docker" + inputs: + dockerVersion: 26.1.4 + + - bash: | + AWF_VERSION="{{ firewall_version }}" + DOWNLOAD_DIR="$(Pipeline.Workspace)/awf" + DOWNLOAD_URL="https://github.com/github/gh-aw-firewall/releases/download/v${AWF_VERSION}/awf-linux-x64" + CHECKSUM_URL="https://github.com/github/gh-aw-firewall/releases/download/v${AWF_VERSION}/checksums.txt" + + mkdir -p "$DOWNLOAD_DIR" + echo "Downloading AWF v${AWF_VERSION} from GitHub Releases..." + curl -fsSL -o "$DOWNLOAD_DIR/awf-linux-x64" "$DOWNLOAD_URL" + curl -fsSL -o "$DOWNLOAD_DIR/checksums.txt" "$CHECKSUM_URL" + + echo "Verifying checksum..." + cd "$DOWNLOAD_DIR" + grep "awf-linux-x64" checksums.txt | sha256sum -c - + mv awf-linux-x64 awf + chmod +x awf + echo "##vso[task.prependpath]$(Pipeline.Workspace)/awf" + ./awf --version || echo "AWF binary ready" + displayName: "Download AWF (Agentic Workflow Firewall) v{{ firewall_version }}" + + - bash: | + docker pull ghcr.io/github/gh-aw-firewall/squid:{{ firewall_version }} + docker pull ghcr.io/github/gh-aw-firewall/agent:{{ firewall_version }} + docker tag ghcr.io/github/gh-aw-firewall/squid:{{ firewall_version }} ghcr.io/github/gh-aw-firewall/squid:latest + docker tag ghcr.io/github/gh-aw-firewall/agent:{{ firewall_version }} ghcr.io/github/gh-aw-firewall/agent:latest + displayName: "Pre-pull AWF container images (v{{ firewall_version }})" + + - bash: | + mkdir -p {{ working_directory }}/safe_outputs + cp -a "$(Pipeline.Workspace)/agent_outputs_$(Build.BuildId)/." {{ working_directory }}/safe_outputs + displayName: "Prepare safe outputs for analysis" + + - bash: | + # Write threat analysis prompt to /tmp (accessible inside AWF container) + cat > "/tmp/awf-tools/threat-analysis-prompt.md" << 'THREAT_ANALYSIS_EOF' + {{ threat_analysis_prompt }} + THREAT_ANALYSIS_EOF + + echo "Threat analysis prompt:" + cat "/tmp/awf-tools/threat-analysis-prompt.md" + displayName: "Prepare threat analysis prompt" + + - bash: | + AGENTIC_PIPELINES_PATH="$(Pipeline.Workspace)/agentic-pipeline-compiler/ado-aw" + chmod +x "$AGENTIC_PIPELINES_PATH" + displayName: "Setup agentic pipeline compiler" + + - bash: | + set -o pipefail + + # Run threat analysis with AWF network isolation + THREAT_OUTPUT_FILE="$(Agent.TempDirectory)/threat-analysis-output.txt" + + # Stream threat analysis output in real-time with VSO command filtering + sudo -E "$(Pipeline.Workspace)/awf/awf" \ + --allow-domains "{{ allowed_domains }}" \ + --skip-pull \ + --env-all \ + --container-workdir "{{ working_directory }}" \ + --log-level info \ + --proxy-logs-dir "$(Agent.TempDirectory)/threat-analysis-logs/firewall" \ + -- '/tmp/awf-tools/copilot --prompt "$(cat /tmp/awf-tools/threat-analysis-prompt.md)" {{ copilot_params }}' \ + 2>&1 \ + | sed -u 's/##vso\[/[VSO-FILTERED] vso[/g; s/##\[/[VSO-FILTERED] [/g' \ + | tee "$THREAT_OUTPUT_FILE" \ + && AGENT_EXIT_CODE=0 || AGENT_EXIT_CODE=$? + + exit $AGENT_EXIT_CODE + displayName: "Run threat analysis (AWF network isolated)" + workingDirectory: {{ working_directory }} + env: + GITHUB_TOKEN: $(GITHUB_TOKEN) + GITHUB_READ_ONLY: 1 + + - bash: | + # Create analyzed outputs directory with original safe outputs and analysis + mkdir -p "$(Agent.TempDirectory)/analyzed_outputs" + + # Copy original safe outputs + cp -a "$(Pipeline.Workspace)/agent_outputs_$(Build.BuildId)/." "$(Agent.TempDirectory)/analyzed_outputs/" + + # Copy threat analysis output + if [ -f "$(Agent.TempDirectory)/threat-analysis-output.txt" ]; then + cp "$(Agent.TempDirectory)/threat-analysis-output.txt" "$(Agent.TempDirectory)/analyzed_outputs/" fi - fi - displayName: "Prepare analyzed outputs" - condition: always() - - - bash: | - SAFE_TO_PROCESS="false" - JSON_FILE="$(Agent.TempDirectory)/analyzed_outputs/threat-analysis.json" - - if [ -f "$JSON_FILE" ]; then - if jq -e . "$JSON_FILE" > /dev/null 2>&1; then - if jq -e '.prompt_injection or .secret_leak or .malicious_patch' "$JSON_FILE" > /dev/null 2>&1; then - echo "##vso[task.logissue type=warning]Threats detected - safe outputs will NOT be processed" + + # Extract JSON from THREAT_DETECTION_RESULT line in threat analysis output + if [ -f "$(Agent.TempDirectory)/threat-analysis-output.txt" ]; then + RESULT_LINE=$(grep "THREAT_DETECTION_RESULT:" "$(Agent.TempDirectory)/threat-analysis-output.txt" | tail -1) + if [ -n "$RESULT_LINE" ]; then + # Extract JSON after the prefix + JSON_CONTENT=$(echo "$RESULT_LINE" | sed 's/.*THREAT_DETECTION_RESULT://') + echo "$JSON_CONTENT" > "$(Agent.TempDirectory)/analyzed_outputs/threat-analysis.json" + echo "Extracted threat analysis JSON:" + cat "$(Agent.TempDirectory)/analyzed_outputs/threat-analysis.json" else - echo "No threats detected - safe outputs will be processed" - SAFE_TO_PROCESS="true" + echo "Warning: No THREAT_DETECTION_RESULT found in threat analysis output" fi + else + echo "Warning: No threat analysis output file found" fi - fi - echo "##vso[task.setvariable variable=SafeToProcess;isOutput=true]$SAFE_TO_PROCESS" - displayName: "Evaluate threat analysis" - name: threatAnalysis - condition: always() + echo "Analyzed outputs directory contents:" + ls -laR "$(Agent.TempDirectory)/analyzed_outputs" + displayName: "Prepare analyzed outputs" + condition: always() + + - bash: | + SAFE_TO_PROCESS="false" + JSON_FILE="$(Agent.TempDirectory)/analyzed_outputs/threat-analysis.json" + + if [ -f "$JSON_FILE" ]; then + if jq -e . "$JSON_FILE" > /dev/null 2>&1; then + echo "JSON is valid" + + # Check if any threat field is true + if jq -e '.prompt_injection or .secret_leak or .malicious_patch' "$JSON_FILE" > /dev/null 2>&1; then + echo "##vso[task.logissue type=warning]Threats detected - safe outputs will NOT be processed" + jq -r '.reasons[]? // empty' "$JSON_FILE" | sed 's/^/ - /' + else + echo "No threats detected - safe outputs will be processed" + SAFE_TO_PROCESS="true" + fi + else + echo "##vso[task.logissue type=warning]Invalid JSON in threat analysis - defaulting to unsafe" + fi + else + echo "##vso[task.logissue type=warning]No threat analysis JSON found - defaulting to unsafe" + fi + + echo "##vso[task.setvariable variable=SafeToProcess;isOutput=true]$SAFE_TO_PROCESS" + echo "SafeToProcess set to: $SAFE_TO_PROCESS" + displayName: "Evaluate threat analysis" + name: threatAnalysis + condition: always() + + - bash: | + # Copy all logs to analyzed outputs for artifact upload + mkdir -p "$(Agent.TempDirectory)/analyzed_outputs/logs" + if [ -d ~/.copilot/logs ]; then + mkdir -p "$(Agent.TempDirectory)/analyzed_outputs/logs/copilot" + cp -r ~/.copilot/logs/* "$(Agent.TempDirectory)/analyzed_outputs/logs/copilot/" 2>/dev/null || true + fi + if [ -d ~/.ado-aw/logs ]; then + mkdir -p "$(Agent.TempDirectory)/analyzed_outputs/logs/ado-aw" + cp -r ~/.ado-aw/logs/* "$(Agent.TempDirectory)/analyzed_outputs/logs/ado-aw/" 2>/dev/null || true + fi + echo "Logs copied to $(Agent.TempDirectory)/analyzed_outputs/logs" + ls -laR "$(Agent.TempDirectory)/analyzed_outputs/logs" 2>/dev/null || echo "No logs found" + displayName: "Copy logs to output directory" + condition: always() - # Stage 2: Process safe outputs (custom job) - job: ProcessSafeOutputs displayName: "Process safe outputs" dependsOn: @@ -306,43 +637,75 @@ extends: outputs: - output: pipelineArtifact path: $(Agent.TempDirectory)/staging - artifact: execute_outputs - steps: - {{ checkout_self }} - {{ checkout_repositories }} - - {{ acquire_write_token }} - - - download: current - artifact: analyzed_outputs - - - bash: | - COMPILER_VERSION="{{ compiler_version }}" - DOWNLOAD_DIR="$(Pipeline.Workspace)/agentic-pipeline-compiler" - DOWNLOAD_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/ado-aw-linux-x64" - CHECKSUM_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/checksums.txt" - - mkdir -p "$DOWNLOAD_DIR" - echo "Downloading ado-aw v${COMPILER_VERSION} from GitHub Releases..." - curl -fsSL -o "$DOWNLOAD_DIR/ado-aw-linux-x64" "$DOWNLOAD_URL" - curl -fsSL -o "$DOWNLOAD_DIR/checksums.txt" "$CHECKSUM_URL" - - echo "Verifying checksum..." - cd "$DOWNLOAD_DIR" - grep "ado-aw-linux-x64" checksums.txt | sha256sum -c - - mv ado-aw-linux-x64 ado-aw - chmod +x ado-aw - displayName: "Download agentic pipeline compiler (v{{ compiler_version }})" - - - bash: | - chmod +x "$(Pipeline.Workspace)/agentic-pipeline-compiler/ado-aw" - echo "##vso[task.prependpath]$(Pipeline.Workspace)/agentic-pipeline-compiler" - displayName: Add agentic compiler to path - - - bash: ado-aw execute --source "{{ source_path }}" --safe-output-dir "$(Pipeline.Workspace)/analyzed_outputs" - displayName: Process safe outputs - workingDirectory: {{ working_directory }} - env: - {{ executor_ado_env }} + artifact: safe_outputs + condition: always() + steps: + {{ checkout_self }} + {{ checkout_repositories }} + + {{ acquire_write_token }} + + - download: current + artifact: analyzed_outputs_$(Build.BuildId) + + - bash: | + COMPILER_VERSION="{{ compiler_version }}" + DOWNLOAD_DIR="$(Pipeline.Workspace)/agentic-pipeline-compiler" + DOWNLOAD_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/ado-aw-linux-x64" + CHECKSUM_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/checksums.txt" + + mkdir -p "$DOWNLOAD_DIR" + echo "Downloading ado-aw v${COMPILER_VERSION} from GitHub Releases..." + curl -fsSL -o "$DOWNLOAD_DIR/ado-aw-linux-x64" "$DOWNLOAD_URL" + curl -fsSL -o "$DOWNLOAD_DIR/checksums.txt" "$CHECKSUM_URL" + + echo "Verifying checksum..." + cd "$DOWNLOAD_DIR" + grep "ado-aw-linux-x64" checksums.txt | sha256sum -c - + mv ado-aw-linux-x64 ado-aw + chmod +x ado-aw + displayName: "Download agentic pipeline compiler (v{{ compiler_version }})" + + - bash: | + ls -la "$(Pipeline.Workspace)/agentic-pipeline-compiler" + chmod +x "$(Pipeline.Workspace)/agentic-pipeline-compiler/ado-aw" + echo "##vso[task.prependpath]$(Pipeline.Workspace)/agentic-pipeline-compiler" + displayName: Add agentic compiler to path + + - bash: | + mkdir -p "$(Agent.TempDirectory)/staging" + displayName: "Prepare output directory" + + - bash: | + ado-aw execute --source "{{ source_path }}" --safe-output-dir "$(Pipeline.Workspace)/analyzed_outputs_$(Build.BuildId)" --output-dir "$(Agent.TempDirectory)/staging" + EXIT_CODE=$? + if [ $EXIT_CODE -eq 2 ]; then + echo "##vso[task.complete result=SucceededWithIssues;]Executor completed with warnings" + exit 0 + fi + exit $EXIT_CODE + displayName: Execute safe outputs (Stage 2) + workingDirectory: {{ working_directory }} + env: + {{ executor_ado_env }} + + - bash: | + # Copy all logs to output directory for artifact upload + mkdir -p "$(Agent.TempDirectory)/staging/logs" + # Copy agent output log from analyzed_outputs for optimisation use + cp "$(Pipeline.Workspace)/analyzed_outputs_$(Build.BuildId)/logs/agent-output.txt" \ + "$(Agent.TempDirectory)/staging/logs/agent-output.txt" 2>/dev/null || true + if [ -d ~/.copilot/logs ]; then + mkdir -p "$(Agent.TempDirectory)/staging/logs/copilot" + cp -r ~/.copilot/logs/* "$(Agent.TempDirectory)/staging/logs/copilot/" 2>/dev/null || true + fi + if [ -d ~/.ado-aw/logs ]; then + mkdir -p "$(Agent.TempDirectory)/staging/logs/ado-aw" + cp -r ~/.ado-aw/logs/* "$(Agent.TempDirectory)/staging/logs/ado-aw/" 2>/dev/null || true + fi + echo "Logs copied to $(Agent.TempDirectory)/staging/logs" + ls -laR "$(Agent.TempDirectory)/staging/logs" 2>/dev/null || echo "No logs found" + displayName: "Copy logs to output directory" + condition: always() {{ teardown_job }} From 84d91f923068c1f6086a5152d2e97cccbbdfb238 Mon Sep 17 00:00:00 2001 From: James Devine Date: Thu, 16 Apr 2026 14:04:43 +0100 Subject: [PATCH 3/8] docs: update documentation for unified compiler architecture Remove Agency/agencyJob references from AGENTS.md and module docs. Update 1ES target description to reflect shared execution model (Copilot CLI + AWF + MCPG). Remove obsolete 1ES-specific marker documentation (agent_context_root, mcp_configuration, global_options, log_level). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- AGENTS.md | 42 +++++++----------------------------------- src/compile/mod.rs | 6 +++--- 2 files changed, 10 insertions(+), 38 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 752981aa..1064e7b7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -523,20 +523,17 @@ This is the recommended target for maximum flexibility and security controls. #### `1es` Generates a pipeline that extends the 1ES Unofficial Pipeline Template: -- Uses `templateContext.type: agencyJob` for the main agent job +- Uses `templateContext.type: buildJob` with Copilot CLI + AWF + MCPG (same execution model as standalone) - Integrates with 1ES SDL scanning and compliance tools -- Custom jobs for threat analysis and safe output processing -- **Limitations:** - - MCP servers use service connections (no custom `command:` support) - - Network isolation is handled by OneBranch (no custom proxy allow-lists) - - Requires 1ES Pipeline Templates repository access +- Full 3-job pipeline: PerformAgenticTask → AnalyzeSafeOutputs → ProcessSafeOutputs +- Requires 1ES Pipeline Templates repository access Example: ```yaml target: 1es ``` -When using `target: 1es`, the pipeline will extend `1es/1ES.Unofficial.PipelineTemplate.yml@1ESPipelinesTemplates` and MCPs will require corresponding service connections (naming convention: `mcp--service-connection`). +When using `target: 1es`, the pipeline will extend `1es/1ES.Unofficial.PipelineTemplate.yml@1ESPipelinesTemplates`. ### Output Format (Azure DevOps YAML) @@ -919,34 +916,9 @@ https://pkgs.dev.azure.com/msazuresphere/_packaging/Guardian1ESPTUpstreamOrgFeed ### 1ES-Specific Template Markers -The following markers are specific to the 1ES target (`target: 1es`) and are not used in standalone pipelines: +The 1ES target uses the same template markers as standalone, plus the 1ES-specific `extends:` / `stages:` / `templateContext` wrapping. The 1ES template includes `templateContext.type: buildJob` for all jobs, and the pool is specified at the top-level `parameters.pool` rather than per-job. -## {{ agent_context_root }} - -Should be replaced with the agent context root for 1ES Agency jobs. This determines the working directory context for the agent: -- `repo`: `$(Build.Repository.Name)` - the repository subfolder -- `root`: `.` - the checkout root - -## {{ mcp_configuration }} - -Should be replaced with the MCP server configuration for 1ES templates. For each `mcp-servers:` entry without a `command:` field, generates a service connection reference using the entry name: - -```yaml -my-mcp: - serviceConnection: mcp-my-mcp-service-connection -other-mcp: - serviceConnection: mcp-other-mcp-service-connection -``` - -Custom MCP servers (with `command:` field) are not supported in 1ES target. Only entries without a `command:` (which have a corresponding service connection) are supported. - -## {{ global_options }} - -Reserved for future use. Currently replaced with an empty string. - -## {{ log_level }} - -Reserved for future use. Currently replaced with an empty string. +Both targets share the same execution model (Copilot CLI + AWF + MCPG) and the same set of template markers. ### CLI Commands @@ -1675,7 +1647,7 @@ The following domains are always allowed (defined in `allowed_hosts.rs`): | `*.in.applicationinsights.azure.com` | Application Insights ingestion | | `dc.services.visualstudio.com` | Visual Studio telemetry | | `rt.services.visualstudio.com` | Visual Studio runtime telemetry | -| `config.edge.skype.com` | Agency configuration | +| `config.edge.skype.com` | Configuration | | `host.docker.internal` | MCP Gateway (MCPG) on host | ### Adding Additional Hosts diff --git a/src/compile/mod.rs b/src/compile/mod.rs index 7b497829..e4ee1713 100644 --- a/src/compile/mod.rs +++ b/src/compile/mod.rs @@ -1,10 +1,10 @@ //! Pipeline compilation module. //! //! This module provides compilation of agent markdown files into Azure DevOps pipeline YAML. -//! Two targets are supported: +//! Two targets are supported, both sharing the same execution model (Copilot CLI + AWF + MCPG): //! -//! - **Standalone**: Full-featured pipeline with custom network proxy, MCP firewall, and safe outputs -//! - **1ES**: Integration with 1ES Pipeline Templates using the agencyJob type +//! - **Standalone**: Self-contained pipeline with AWF network isolation +//! - **1ES**: Integration with 1ES Pipeline Templates for SDL compliance mod common; pub mod extensions; From 6ae1732102a37b3ed74417970fe9b2c486eefd0c Mon Sep 17 00:00:00 2001 From: James Devine Date: Thu, 16 Apr 2026 14:09:30 +0100 Subject: [PATCH 4/8] test: add YAML validation tests for compiled pipeline output Add integration tests that compile fixtures and verify the output is valid, parseable YAML. Tests cover: - 1ES: valid YAML with correct 'extends' and 'resources' structure - Standalone minimal: valid YAML with 'jobs' key - Standalone pipeline-trigger: valid YAML - Standalone complete: compile-only (has pre-existing indentation issue in multi-repository output) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/compiler_tests.rs | 125 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/tests/compiler_tests.rs b/tests/compiler_tests.rs index 7577020a..89db475d 100644 --- a/tests/compiler_tests.rs +++ b/tests/compiler_tests.rs @@ -2916,3 +2916,128 @@ network: let _ = fs::remove_dir_all(&temp_dir); } + +// ─── YAML validation tests ────────────────────────────────────────────────── + +/// Helper: compile a fixture and return the compiled YAML string. +fn compile_fixture(fixture_name: &str) -> String { + let temp_dir = std::env::temp_dir().join(format!( + "agentic-pipeline-yaml-validation-{}-{}", + fixture_name.replace('.', "-"), + std::process::id() + )); + fs::create_dir_all(&temp_dir).expect("Failed to create temp directory"); + + let fixture_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("fixtures") + .join(fixture_name); + + let output_path = temp_dir.join(fixture_name.replace(".md", ".yml")); + + let binary_path = PathBuf::from(env!("CARGO_BIN_EXE_ado-aw")); + let output = std::process::Command::new(&binary_path) + .args([ + "compile", + fixture_path.to_str().unwrap(), + "-o", + output_path.to_str().unwrap(), + ]) + .output() + .expect("Failed to run compiler"); + + assert!( + output.status.success(), + "Compilation of {} should succeed: {}", + fixture_name, + String::from_utf8_lossy(&output.stderr) + ); + + let compiled = fs::read_to_string(&output_path).expect("Should read compiled YAML"); + let _ = fs::remove_dir_all(&temp_dir); + compiled +} + +/// Validate that compiled YAML is parseable as valid YAML. +/// Strips the leading `# @ado-aw` header comment before parsing. +fn assert_valid_yaml(compiled: &str, fixture_name: &str) { + let yaml_content: String = compiled + .lines() + .skip_while(|line| line.starts_with('#') || line.is_empty()) + .collect::>() + .join("\n"); + + let parsed: Result = serde_yaml::from_str(&yaml_content); + assert!( + parsed.is_ok(), + "Compiled YAML for {} should be valid YAML, got parse error: {}", + fixture_name, + parsed.err().unwrap() + ); + + let doc = parsed.unwrap(); + assert!( + doc.is_mapping(), + "Compiled YAML for {} should be a YAML mapping at top level", + fixture_name + ); +} + +/// Test that the 1ES fixture produces valid YAML with correct structure +#[test] +fn test_1es_compiled_output_is_valid_yaml() { + let compiled = compile_fixture("1es-test-agent.md"); + assert_valid_yaml(&compiled, "1es-test-agent.md"); + + let yaml_content: String = compiled + .lines() + .skip_while(|line| line.starts_with('#') || line.is_empty()) + .collect::>() + .join("\n"); + let doc: serde_yaml::Value = serde_yaml::from_str(&yaml_content).unwrap(); + + assert!( + doc.get("extends").is_some(), + "1ES YAML should have 'extends' key" + ); + assert!( + doc.get("resources").is_some(), + "1ES YAML should have 'resources' key" + ); +} + +/// Test that the minimal standalone fixture produces valid YAML with correct structure +#[test] +fn test_standalone_minimal_compiled_output_is_valid_yaml() { + let compiled = compile_fixture("minimal-agent.md"); + assert_valid_yaml(&compiled, "minimal-agent.md"); + + let yaml_content: String = compiled + .lines() + .skip_while(|line| line.starts_with('#') || line.is_empty()) + .collect::>() + .join("\n"); + let doc: serde_yaml::Value = serde_yaml::from_str(&yaml_content).unwrap(); + + assert!( + doc.get("jobs").is_some(), + "Standalone YAML should have 'jobs' key" + ); +} + +/// Test that the complete standalone fixture produces valid YAML +/// Note: complete-agent.md has a known pre-existing indentation issue in +/// multi-repository output (generate_repositories), so we skip strict +/// YAML validation for now and just verify it compiles without error. +#[test] +fn test_standalone_complete_compiled_output_compiles() { + // Verifies compilation succeeds (compile_fixture asserts success) + let _compiled = compile_fixture("complete-agent.md"); +} + +/// Test that the pipeline-trigger fixture produces valid YAML +#[test] +fn test_standalone_pipeline_trigger_compiled_output_is_valid_yaml() { + let compiled = compile_fixture("pipeline-trigger-agent.md"); + assert_valid_yaml(&compiled, "pipeline-trigger-agent.md"); +} From 9c849248fcc09dc015669e7a0a3a541a21cd55aa Mon Sep 17 00:00:00 2001 From: James Devine Date: Thu, 16 Apr 2026 14:15:29 +0100 Subject: [PATCH 5/8] fix: correct indentation in generate_repositories, checkout_steps, and teardown_job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - generate_repositories: remove hardcoded 6-space indent on sub-fields; output flat YAML and let replace_with_indent handle template-level indentation - generate_checkout_steps: same fix, remove hardcoded 14-space join - generate_teardown_job (common.rs): match setup_job pattern — output flat YAML starting at column 0 instead of baking in 2-space indent; move template placeholder from column 0 to column 2 to match setup_job - generate_setup_job/teardown_job (onees.rs): fix step indentation from 4 to 6 to match the steps: nesting depth in templateContext - Upgrade complete-agent test from compile-only to full YAML validation Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/compile/common.rs | 25 +++++++++++-------------- src/compile/onees.rs | 4 ++-- templates/base.yml | 2 +- tests/compiler_tests.rs | 9 +++------ 4 files changed, 17 insertions(+), 23 deletions(-) diff --git a/src/compile/common.rs b/src/compile/common.rs index d3d910c4..369108d7 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -411,15 +411,12 @@ pub fn generate_repositories(repositories: &[Repository]) -> String { .iter() .map(|repo| { format!( - r#"- repository: {} - type: {} - name: {} - ref: {}"#, + "- repository: {}\n type: {}\n name: {}\n ref: {}", repo.repository, repo.repo_type, repo.name, repo.repo_ref ) }) .collect::>() - .join("\n ") + .join("\n") } /// Generate checkout steps YAML @@ -432,7 +429,7 @@ pub fn generate_checkout_steps(checkout: &[String]) -> String { .iter() .map(|name| format!("- checkout: {}", name)) .collect::>() - .join("\n ") + .join("\n") } /// Generate `checkout: self` step. @@ -1246,16 +1243,16 @@ pub fn generate_teardown_job( return String::new(); } - let steps_yaml = format_steps_yaml(teardown_steps); + let steps_yaml = format_steps_yaml_indented(teardown_steps, 4); format!( - r#" - job: TeardownJob - displayName: "{} - Teardown" - dependsOn: ProcessSafeOutputs - pool: - name: {} - steps: - - checkout: self + r#"- job: TeardownJob + displayName: "{} - Teardown" + dependsOn: ProcessSafeOutputs + pool: + name: {} + steps: + - checkout: self {} "#, agent_name, pool, steps_yaml diff --git a/src/compile/onees.rs b/src/compile/onees.rs index 21a091c0..4ae7fc7e 100644 --- a/src/compile/onees.rs +++ b/src/compile/onees.rs @@ -93,7 +93,7 @@ fn generate_setup_job(setup_steps: &[serde_yaml::Value], agent_name: &str) -> St return String::new(); } - let steps_yaml = format_steps_yaml_indented(setup_steps, 4); + let steps_yaml = format_steps_yaml_indented(setup_steps, 6); format!( r#"- job: SetupJob @@ -115,7 +115,7 @@ fn generate_teardown_job(teardown_steps: &[serde_yaml::Value], agent_name: &str) return String::new(); } - let steps_yaml = format_steps_yaml_indented(teardown_steps, 4); + let steps_yaml = format_steps_yaml_indented(teardown_steps, 6); format!( r#"- job: TeardownJob diff --git a/templates/base.yml b/templates/base.yml index 1aefbdb0..3e257753 100644 --- a/templates/base.yml +++ b/templates/base.yml @@ -680,4 +680,4 @@ jobs: artifact: safe_outputs condition: always() -{{ teardown_job }} \ No newline at end of file + {{ teardown_job }} \ No newline at end of file diff --git a/tests/compiler_tests.rs b/tests/compiler_tests.rs index 89db475d..334adcbf 100644 --- a/tests/compiler_tests.rs +++ b/tests/compiler_tests.rs @@ -3026,13 +3026,10 @@ fn test_standalone_minimal_compiled_output_is_valid_yaml() { } /// Test that the complete standalone fixture produces valid YAML -/// Note: complete-agent.md has a known pre-existing indentation issue in -/// multi-repository output (generate_repositories), so we skip strict -/// YAML validation for now and just verify it compiles without error. #[test] -fn test_standalone_complete_compiled_output_compiles() { - // Verifies compilation succeeds (compile_fixture asserts success) - let _compiled = compile_fixture("complete-agent.md"); +fn test_standalone_complete_compiled_output_is_valid_yaml() { + let compiled = compile_fixture("complete-agent.md"); + assert_valid_yaml(&compiled, "complete-agent.md"); } /// Test that the pipeline-trigger fixture produces valid YAML From e12ba663377e2b5fd6adfe30a91380f948e40212 Mon Sep 17 00:00:00 2001 From: James Devine Date: Thu, 16 Apr 2026 15:05:31 +0100 Subject: [PATCH 6/8] fix: eliminate double CompileContext creation and fragile replacement ordering - compile_shared() now accepts &CompileContext instead of building its own, eliminating duplicate git remote I/O on every compilation - extra_replacements are applied before shared replacements, so targets can cleanly override shared markers (e.g., 1ES setup/teardown jobs) via the intended CompileConfig mechanism - 1ES compiler no longer pre-replaces markers in the template string before calling compile_shared; uses extra_replacements instead Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/compile/common.rs | 31 ++++++++++++++++++------------- src/compile/onees.rs | 17 ++++++++--------- src/compile/standalone.rs | 2 +- 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/src/compile/common.rs b/src/compile/common.rs index 369108d7..62bbeaee 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -1910,7 +1910,9 @@ pub struct CompileConfig { /// The base YAML template content (the template string itself). pub template: String, /// Additional placeholder→value replacements beyond the shared set. - /// These are applied after the shared replacements. + /// These are applied **before** the shared replacements, allowing + /// target-specific overrides of shared markers (e.g., 1ES-specific + /// setup/teardown jobs that differ from the standalone defaults). pub extra_replacements: Vec<(String, String)>, } @@ -1923,13 +1925,16 @@ pub struct CompileConfig { /// 4. Applies replacements to the template /// 5. Prepends the header comment /// -/// Target-specific values are provided via `CompileConfig.extra_replacements`. +/// Target-specific values are provided via `CompileConfig.extra_replacements`, +/// which are applied before the shared replacements so that targets can +/// override shared markers (e.g., `{{ setup_job }}`, `{{ teardown_job }}`). pub async fn compile_shared( input_path: &Path, output_path: &Path, front_matter: &FrontMatter, markdown_body: &str, extensions: &[Extension], + ctx: &CompileContext<'_>, config: CompileConfig, ) -> Result { // 1. Validate @@ -1947,12 +1952,9 @@ pub async fn compile_shared( let checkout_self = generate_checkout_self(); let agent_name = sanitize_filename(&front_matter.name); - // 3. Build compile context and run extension validations - let input_dir = input_path.parent().unwrap_or(Path::new(".")); - let ctx = CompileContext::new(front_matter, input_dir).await; - + // 3. Run extension validations for ext in extensions { - for warning in ext.validate(&ctx)? { + for warning in ext.validate(ctx)? { eprintln!("Warning: {}", warning); } } @@ -2042,7 +2044,15 @@ pub async fn compile_shared( threat_analysis_prompt, ); - // 12. Shared replacements + // 12. Apply extra replacements first (target-specific overrides) + // These run before shared replacements so targets can override shared + // markers like {{ setup_job }} and {{ teardown_job }}. + let mut template = template; + for (placeholder, replacement) in &config.extra_replacements { + template = replace_with_indent(&template, placeholder, replacement); + } + + // 13. Shared replacements let compiler_version = env!("CARGO_PKG_VERSION"); let replacements: Vec<(&str, &str)> = vec![ ("{{ parameters }}", ¶meters_yaml), @@ -2083,11 +2093,6 @@ pub async fn compile_shared( replace_with_indent(&yaml, placeholder, replacement) }); - // 13. Apply extra replacements (target-specific) - for (placeholder, replacement) in &config.extra_replacements { - pipeline_yaml = replace_with_indent(&pipeline_yaml, placeholder, replacement); - } - // 14. Prepend header let header = generate_header_comment(input_path); Ok(format!("{}{}", header, pipeline_yaml)) diff --git a/src/compile/onees.rs b/src/compile/onees.rs index 4ae7fc7e..a845826d 100644 --- a/src/compile/onees.rs +++ b/src/compile/onees.rs @@ -12,7 +12,7 @@ use std::path::Path; use super::Compiler; use super::common::{ AWF_VERSION, MCPG_VERSION, MCPG_IMAGE, - CompileConfig, compile_shared, replace_with_indent, + CompileConfig, compile_shared, generate_allowed_domains, generate_cancel_previous_builds, generate_enabled_tools_args, @@ -56,17 +56,14 @@ impl Compiler for OneESCompiler { .context("Failed to serialize MCPG config")?; let mcpg_docker_env = generate_mcpg_docker_env(front_matter); - // Generate 1ES-specific setup/teardown jobs (no per-job pool, uses templateContext) - // Pre-replace these in the template before compile_shared, which would otherwise - // use the standalone versions (with pool: per job). + // Generate 1ES-specific setup/teardown jobs (no per-job pool, uses templateContext). + // These override the shared {{ setup_job }} / {{ teardown_job }} markers via + // extra_replacements, which are applied before the shared replacements. let setup_job = generate_setup_job(&front_matter.setup, &front_matter.name); let teardown_job = generate_teardown_job(&front_matter.teardown, &front_matter.name); - let mut template = include_str!("../../templates/1es-base.yml").to_string(); - template = replace_with_indent(&template, "{{ setup_job }}", &setup_job); - template = replace_with_indent(&template, "{{ teardown_job }}", &teardown_job); let config = CompileConfig { - template, + template: include_str!("../../templates/1es-base.yml").to_string(), extra_replacements: vec![ ("{{ firewall_version }}".into(), AWF_VERSION.into()), ("{{ mcpg_version }}".into(), MCPG_VERSION.into()), @@ -76,10 +73,12 @@ impl Compiler for OneESCompiler { ("{{ cancel_previous_builds }}".into(), cancel_previous_builds), ("{{ mcpg_config }}".into(), mcpg_config_json), ("{{ mcpg_docker_env }}".into(), mcpg_docker_env), + ("{{ setup_job }}".into(), setup_job), + ("{{ teardown_job }}".into(), teardown_job), ], }; - compile_shared(input_path, output_path, front_matter, markdown_body, &extensions, config).await + compile_shared(input_path, output_path, front_matter, markdown_body, &extensions, &ctx, config).await } } diff --git a/src/compile/standalone.rs b/src/compile/standalone.rs index a3d0d94c..5afcec04 100644 --- a/src/compile/standalone.rs +++ b/src/compile/standalone.rs @@ -71,7 +71,7 @@ impl Compiler for StandaloneCompiler { ], }; - compile_shared(input_path, output_path, front_matter, markdown_body, &extensions, config).await + compile_shared(input_path, output_path, front_matter, markdown_body, &extensions, &ctx, config).await } } From 94dc734290548a83e38f6b07e2d07b6b0ac7127f Mon Sep 17 00:00:00 2001 From: James Devine Date: Thu, 16 Apr 2026 15:07:44 +0100 Subject: [PATCH 7/8] test: strengthen 1ES YAML validation with content assertions Add assertions verifying that key pipeline content is present in the compiled 1ES output: Copilot CLI install, AWF, MCPG, SafeOutputs, copilot invocation, threat analysis, safe output execution, and all three job names. Also verify no Agency remnants (agencyJob, AgencyArtifact, commandOptions). These catch placeholder substitution regressions that the structural YAML validity and no-unreplaced-markers tests would miss. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/compiler_tests.rs | 57 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tests/compiler_tests.rs b/tests/compiler_tests.rs index 334adcbf..b341bff5 100644 --- a/tests/compiler_tests.rs +++ b/tests/compiler_tests.rs @@ -2996,6 +2996,7 @@ fn test_1es_compiled_output_is_valid_yaml() { .join("\n"); let doc: serde_yaml::Value = serde_yaml::from_str(&yaml_content).unwrap(); + // Verify 1ES wrapping structure assert!( doc.get("extends").is_some(), "1ES YAML should have 'extends' key" @@ -3004,6 +3005,62 @@ fn test_1es_compiled_output_is_valid_yaml() { doc.get("resources").is_some(), "1ES YAML should have 'resources' key" ); + + // Verify key pipeline content was substituted (catches placeholder regressions) + assert!( + compiled.contains("Copilot.CLI.linux-x64"), + "1ES output should contain Copilot CLI install" + ); + assert!( + compiled.contains("awf"), + "1ES output should contain AWF references" + ); + assert!( + compiled.contains("mcpg"), + "1ES output should contain MCPG references" + ); + assert!( + compiled.contains("SafeOutputs"), + "1ES output should contain SafeOutputs references" + ); + assert!( + compiled.contains("copilot --prompt"), + "1ES output should contain copilot invocation (copilot_params substituted)" + ); + assert!( + compiled.contains("threat-analysis"), + "1ES output should contain threat analysis step" + ); + assert!( + compiled.contains("ado-aw execute"), + "1ES output should contain safe output executor step" + ); + assert!( + compiled.contains("PerformAgenticTask"), + "1ES output should contain PerformAgenticTask job" + ); + assert!( + compiled.contains("AnalyzeSafeOutputs"), + "1ES output should contain AnalyzeSafeOutputs job" + ); + assert!( + compiled.contains("ProcessSafeOutputs"), + "1ES output should contain ProcessSafeOutputs job" + ); + + // Verify no Agency remnants + assert!( + !compiled.contains("agencyJob"), + "1ES output should not contain agencyJob" + ); + assert!( + !compiled.contains("AgencyArtifact"), + "1ES output should not contain AgencyArtifact" + ); + assert!( + !compiled.contains("commandOptions"), + "1ES output should not contain commandOptions" + ); } /// Test that the minimal standalone fixture produces valid YAML with correct structure From 33db1546ee666e52da99824369b5aefc8ea9148f Mon Sep 17 00:00:00 2001 From: James Devine Date: Thu, 16 Apr 2026 15:24:51 +0100 Subject: [PATCH 8/8] fix: stale doc comment and unescaped service connection name in YAML - Remove orphaned schedule doc comment accidentally prepended to generate_parameters() during earlier refactoring - Escape single quotes in service connection names when emitting azureSubscription YAML values to prevent malformed pipeline output Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/compile/common.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/compile/common.rs b/src/compile/common.rs index 62bbeaee..71e1fa62 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -79,7 +79,6 @@ pub fn replace_with_indent(template: &str, placeholder: &str, replacement: &str) } /// Generate a schedule YAML block from a ScheduleConfig. -/// When no explicit schedule branches are configured, defaults to `main`. /// Generate the top-level `parameters:` YAML block from front matter parameters. /// /// Returns a YAML block like: @@ -892,7 +891,7 @@ pub fn generate_acquire_ado_token(service_connection: Option<&str>, variable_nam r#" displayName: "Acquire ADO token ({variable_name})""# )); lines.push(" inputs:".to_string()); - lines.push(format!(" azureSubscription: '{}'", sc)); + lines.push(format!(" azureSubscription: '{}'", sc.replace('\'', "''"))); lines.push(" scriptType: 'bash'".to_string()); lines.push(" scriptLocation: 'inlineScript'".to_string()); lines.push(" addSpnToEnvironment: true".to_string());