diff --git a/AGENTS.md b/AGENTS.md index 752981aa..1064e7b7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -523,20 +523,17 @@ This is the recommended target for maximum flexibility and security controls. #### `1es` Generates a pipeline that extends the 1ES Unofficial Pipeline Template: -- Uses `templateContext.type: agencyJob` for the main agent job +- Uses `templateContext.type: buildJob` with Copilot CLI + AWF + MCPG (same execution model as standalone) - Integrates with 1ES SDL scanning and compliance tools -- Custom jobs for threat analysis and safe output processing -- **Limitations:** - - MCP servers use service connections (no custom `command:` support) - - Network isolation is handled by OneBranch (no custom proxy allow-lists) - - Requires 1ES Pipeline Templates repository access +- Full 3-job pipeline: PerformAgenticTask → AnalyzeSafeOutputs → ProcessSafeOutputs +- Requires 1ES Pipeline Templates repository access Example: ```yaml target: 1es ``` -When using `target: 1es`, the pipeline will extend `1es/1ES.Unofficial.PipelineTemplate.yml@1ESPipelinesTemplates` and MCPs will require corresponding service connections (naming convention: `mcp--service-connection`). +When using `target: 1es`, the pipeline will extend `1es/1ES.Unofficial.PipelineTemplate.yml@1ESPipelinesTemplates`. ### Output Format (Azure DevOps YAML) @@ -919,34 +916,9 @@ https://pkgs.dev.azure.com/msazuresphere/_packaging/Guardian1ESPTUpstreamOrgFeed ### 1ES-Specific Template Markers -The following markers are specific to the 1ES target (`target: 1es`) and are not used in standalone pipelines: +The 1ES target uses the same template markers as standalone, plus the 1ES-specific `extends:` / `stages:` / `templateContext` wrapping. The 1ES template includes `templateContext.type: buildJob` for all jobs, and the pool is specified at the top-level `parameters.pool` rather than per-job. -## {{ agent_context_root }} - -Should be replaced with the agent context root for 1ES Agency jobs. This determines the working directory context for the agent: -- `repo`: `$(Build.Repository.Name)` - the repository subfolder -- `root`: `.` - the checkout root - -## {{ mcp_configuration }} - -Should be replaced with the MCP server configuration for 1ES templates. For each `mcp-servers:` entry without a `command:` field, generates a service connection reference using the entry name: - -```yaml -my-mcp: - serviceConnection: mcp-my-mcp-service-connection -other-mcp: - serviceConnection: mcp-other-mcp-service-connection -``` - -Custom MCP servers (with `command:` field) are not supported in 1ES target. Only entries without a `command:` (which have a corresponding service connection) are supported. - -## {{ global_options }} - -Reserved for future use. Currently replaced with an empty string. - -## {{ log_level }} - -Reserved for future use. Currently replaced with an empty string. +Both targets share the same execution model (Copilot CLI + AWF + MCPG) and the same set of template markers. ### CLI Commands @@ -1675,7 +1647,7 @@ The following domains are always allowed (defined in `allowed_hosts.rs`): | `*.in.applicationinsights.azure.com` | Application Insights ingestion | | `dc.services.visualstudio.com` | Visual Studio telemetry | | `rt.services.visualstudio.com` | Visual Studio runtime telemetry | -| `config.edge.skype.com` | Agency configuration | +| `config.edge.skype.com` | Configuration | | `host.docker.internal` | MCP Gateway (MCPG) on host | ### Adding Additional Hosts diff --git a/src/compile/common.rs b/src/compile/common.rs index 6dc0647d..71e1fa62 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -1,18 +1,15 @@ //! Common helper functions shared across all compile targets. use anyhow::{Context, Result}; +use std::collections::{HashMap, HashSet}; +use std::path::Path; use super::types::{FrontMatter, PipelineParameter, Repository, TriggerConfig}; -use super::extensions::CompilerExtension; +use super::extensions::{CompilerExtension, Extension, McpgServerConfig, McpgGatewayConfig, McpgConfig, CompileContext}; use crate::compile::types::McpConfig; use crate::fuzzy_schedule; - -/// Check if an MCP has a transport configuration (container or URL). -/// MCPs with a container are containerized stdio servers; MCPs with a URL -/// are HTTP servers. Both are routed through the MCP Gateway (MCPG). -pub fn is_custom_mcp(config: &McpConfig) -> bool { - matches!(config, McpConfig::WithOptions(opts) if opts.container.is_some() || opts.url.is_some()) -} +use crate::allowed_hosts::{CORE_ALLOWED_HOSTS, mcp_required_hosts}; +use crate::ecosystem_domains::{get_ecosystem_domains, is_ecosystem_identifier, is_known_ecosystem}; /// Parse the markdown file and extract front matter and body pub fn parse_markdown(content: &str) -> Result<(FrontMatter, String)> { @@ -82,7 +79,6 @@ pub fn replace_with_indent(template: &str, placeholder: &str, replacement: &str) } /// Generate a schedule YAML block from a ScheduleConfig. -/// When no explicit schedule branches are configured, defaults to `main`. /// Generate the top-level `parameters:` YAML block from front matter parameters. /// /// Returns a YAML block like: @@ -414,15 +410,12 @@ pub fn generate_repositories(repositories: &[Repository]) -> String { .iter() .map(|repo| { format!( - r#"- repository: {} - type: {} - name: {} - ref: {}"#, + "- repository: {}\n type: {}\n name: {}\n ref: {}", repo.repository, repo.repo_type, repo.name, repo.repo_ref ) }) .collect::>() - .join("\n ") + .join("\n") } /// Generate checkout steps YAML @@ -435,7 +428,7 @@ pub fn generate_checkout_steps(checkout: &[String]) -> String { .iter() .map(|name| format!("- checkout: {}", name)) .collect::>() - .join("\n ") + .join("\n") } /// Generate `checkout: self` step. @@ -898,7 +891,7 @@ pub fn generate_acquire_ado_token(service_connection: Option<&str>, variable_nam r#" displayName: "Acquire ADO token ({variable_name})""# )); lines.push(" inputs:".to_string()); - lines.push(format!(" azureSubscription: '{}'", sc)); + lines.push(format!(" azureSubscription: '{}'", sc.replace('\'', "''"))); lines.push(" scriptType: 'bash'".to_string()); lines.push(" scriptLocation: 'inlineScript'".to_string()); lines.push(" addSpnToEnvironment: true".to_string()); @@ -1218,10 +1211,898 @@ pub fn validate_resolve_pr_thread_statuses(front_matter: &FrontMatter) -> Result Ok(()) } +/// Generate the setup job YAML +pub fn generate_setup_job(setup_steps: &[serde_yaml::Value], agent_name: &str, pool: &str) -> String { + if setup_steps.is_empty() { + return String::new(); + } + + let steps_yaml = format_steps_yaml_indented(setup_steps, 4); + + format!( + r#"- job: SetupJob + displayName: "{} - Setup" + pool: + name: {} + steps: + - checkout: self +{} +"#, + agent_name, pool, steps_yaml + ) +} + +/// Generate the teardown job YAML +pub fn generate_teardown_job( + teardown_steps: &[serde_yaml::Value], + agent_name: &str, + pool: &str, +) -> String { + if teardown_steps.is_empty() { + return String::new(); + } + + let steps_yaml = format_steps_yaml_indented(teardown_steps, 4); + + format!( + r#"- job: TeardownJob + displayName: "{} - Teardown" + dependsOn: ProcessSafeOutputs + pool: + name: {} + steps: + - checkout: self +{} +"#, + agent_name, pool, steps_yaml + ) +} + +/// Generate prepare steps (inline), including extension steps and user-defined steps. +pub fn generate_prepare_steps( + prepare_steps: &[serde_yaml::Value], + extensions: &[super::extensions::Extension], +) -> Result { + let mut parts = Vec::new(); + + // Extension prepare steps and prompt supplements (runtimes + first-party tools) + for ext in extensions { + for step in ext.prepare_steps() { + parts.push(step); + } + if let Some(prompt) = ext.prompt_supplement() { + parts.push(super::extensions::wrap_prompt_append(&prompt, ext.name())?); + } + } + + if !prepare_steps.is_empty() { + parts.push(format_steps_yaml_indented(prepare_steps, 0)); + } + + Ok(parts.join("\n\n")) +} + +/// Generate finalize steps (inline) +pub fn generate_finalize_steps(finalize_steps: &[serde_yaml::Value]) -> String { + if finalize_steps.is_empty() { + return String::new(); + } + + format_steps_yaml_indented(finalize_steps, 0) +} + +/// Generate dependsOn clause for setup job +pub fn generate_agentic_depends_on(setup_steps: &[serde_yaml::Value]) -> String { + if !setup_steps.is_empty() { + "dependsOn: SetupJob".to_string() + } else { + String::new() + } +} + +/// Sensitive host path prefixes that should not be bind-mounted into MCP containers. +pub const SENSITIVE_MOUNT_PREFIXES: &[&str] = &[ + "/etc", + "/root", + "/home", + "/proc", + "/sys", +]; + +/// Docker runtime flag names that grant dangerous host access. +/// Checked both as `--flag=value` and as `--flag value` (split across two args). +pub const DANGEROUS_DOCKER_FLAGS: &[&str] = &[ + "--privileged", + "--cap-add", + "--security-opt", + "--pid", + "--network", + "--ipc", + "--user", + "-u", + "--add-host", + "--entrypoint", +]; + +/// Validate a container image name for injection attempts. +/// Allows `[a-zA-Z0-9./_:-]` which covers standard Docker image references. +pub fn validate_container_image(image: &str, mcp_name: &str) -> Vec { + let mut warnings = Vec::new(); + if image.is_empty() { + warnings.push(format!("Warning: MCP '{}': container image name is empty.", mcp_name)); + return warnings; + } + if !image.chars().all(|c| c.is_ascii_alphanumeric() || "._/:-@".contains(c)) { + warnings.push(format!( + "Warning: MCP '{}': container image '{}' contains unexpected characters. \ + Image names should only contain [a-zA-Z0-9./_:-@].", + mcp_name, image + )); + } + warnings +} + +/// Validate a volume mount source path, warning on sensitive host directories. +/// Docker socket mounts are escalated to stderr warnings since they grant container escape. +/// Note: paths are lowercased for comparison to catch cross-platform casing (e.g. `/ETC/shadow`). +pub fn validate_mount_source(mount: &str, mcp_name: &str) -> Vec { + let mut warnings = Vec::new(); + // Format: "source:dest:mode" + if let Some(source) = mount.split(':').next() { + let source_lower = source.to_lowercase(); + if source_lower.contains("docker.sock") { + warnings.push(format!( + "Warning: MCP '{}': mount '{}' exposes the Docker socket to the MCP container. \ + This grants full host Docker access and may allow container escape.", + mcp_name, mount + )); + return warnings; + } + for prefix in SENSITIVE_MOUNT_PREFIXES { + // Match exact path or path with trailing separator to avoid false positives + // (e.g. /etc matches /etc and /etc/shadow, but not /etc-configs) + if source_lower == *prefix || source_lower.starts_with(&format!("{}/", prefix)) { + warnings.push(format!( + "Warning: MCP '{}': mount source '{}' references a sensitive host path ({}). \ + Ensure this is intentional.", + mcp_name, source, prefix + )); + break; + } + } + } + warnings +} + +/// Validate Docker runtime args for dangerous flags that could escalate privileges. +/// Also detects volume mounts smuggled via `-v`/`--volume` that bypass `mounts` validation. +/// Handles both `--flag=value` and `--flag value` (split) forms. +pub fn validate_docker_args(args: &[String], mcp_name: &str) -> Vec { + let mut warnings = Vec::new(); + for (i, arg) in args.iter().enumerate() { + let arg_lower = arg.to_lowercase(); + // Check for dangerous Docker flags (both --flag=value and --flag value) + for dangerous in DANGEROUS_DOCKER_FLAGS { + if arg_lower == *dangerous + || arg_lower.starts_with(&format!("{}=", dangerous)) + { + let extra_hint = if *dangerous == "--entrypoint" { + " Use the 'entrypoint:' field instead of passing --entrypoint in args." + } else { + "" + }; + warnings.push(format!( + "Warning: MCP '{}': Docker arg '{}' grants elevated privileges. \ + Ensure this is intentional.{}", + mcp_name, arg, extra_hint + )); + } + } + // Check for volume mounts smuggled via args (bypasses mounts validation) + if arg == "-v" || arg == "--volume" { + if let Some(mount_spec) = args.get(i + 1) { + warnings.push(format!( + "Warning: MCP '{}': volume mount '{}' in args bypasses mounts validation. \ + Use the 'mounts:' field instead.", + mcp_name, mount_spec + )); + warnings.extend(validate_mount_source(mount_spec, mcp_name)); + } else { + warnings.push(format!( + "Warning: MCP '{}': '{}' flag is the last arg with no mount spec following it. \ + This is likely a malformed args list.", + mcp_name, arg + )); + } + } else if arg_lower.starts_with("-v=") || arg_lower.starts_with("--volume=") { + let mount_spec = arg.splitn(2, '=').nth(1).unwrap_or(""); + warnings.push(format!( + "Warning: MCP '{}': volume mount '{}' in args bypasses mounts validation. \ + Use the 'mounts:' field instead.", + mcp_name, mount_spec + )); + warnings.extend(validate_mount_source(mount_spec, mcp_name)); + } + } + warnings +} + +/// Validate that an MCP HTTP URL uses an allowed scheme. +pub fn validate_mcp_url(url: &str, mcp_name: &str) -> Vec { + let mut warnings = Vec::new(); + if !url.starts_with("https://") && !url.starts_with("http://") { + warnings.push(format!( + "Warning: MCP '{}': URL '{}' does not use http:// or https:// scheme. \ + This may not work with MCPG.", + mcp_name, url + )); + } + warnings +} + +/// Warn when env values or headers look like they contain inline secrets. +/// Secrets should use pipeline variables and passthrough ("") instead. +pub fn warn_potential_secrets(mcp_name: &str, env: &HashMap, headers: &HashMap) -> Vec { + let mut warnings = Vec::new(); + for (key, value) in env { + if !value.is_empty() && (key.to_lowercase().contains("token") + || key.to_lowercase().contains("secret") + || key.to_lowercase().contains("key") + || key.to_lowercase().contains("password") + || key.to_lowercase().contains("pat")) + { + warnings.push(format!( + "Warning: MCP '{}': env var '{}' has an inline value that may be a secret. \ + Use an empty string (\"\") for passthrough from pipeline variables instead.", + mcp_name, key + )); + } + } + for (key, value) in headers { + if value.to_lowercase().contains("bearer ") + || key.to_lowercase() == "authorization" + { + warnings.push(format!( + "Warning: MCP '{}': header '{}' may contain inline credentials. \ + These will appear in plaintext in the compiled pipeline YAML.", + mcp_name, key + )); + } + } + warnings +} + +/// Validate that a string is a legal environment variable name (`[A-Za-z_][A-Za-z0-9_]*`). +/// Prevents injection of arbitrary Docker flags via user-controlled front matter keys. +pub fn is_valid_env_var_name(name: &str) -> bool { + let mut chars = name.chars(); + chars + .next() + .map_or(false, |c| c.is_ascii_alphabetic() || c == '_') + && chars.all(|c| c.is_ascii_alphanumeric() || c == '_') +} + +/// Generate MCPG configuration from front matter. +/// +/// Converts the front matter `mcp-servers` definitions into MCPG-compatible JSON. +/// SafeOutputs is always included as an HTTP backend. Extension-contributed MCPG +/// entries (e.g., azure-devops) are included via the `extensions` parameter. +pub fn generate_mcpg_config( + front_matter: &FrontMatter, + ctx: &CompileContext, + extensions: &[super::extensions::Extension], +) -> Result { + let mut mcp_servers = HashMap::new(); + + // SafeOutputs is always included as an HTTP backend. + // MCPG runs with --network host, so it reaches SafeOutputs via localhost + // (not host.docker.internal, which requires Docker DNS and isn't available + // in host network mode on Linux). + mcp_servers.insert( + "safeoutputs".to_string(), + McpgServerConfig { + server_type: "http".to_string(), + container: None, + entrypoint: None, + entrypoint_args: None, + mounts: None, + args: None, + url: Some("http://localhost:${SAFE_OUTPUTS_PORT}/mcp".to_string()), + headers: Some(HashMap::from([( + "Authorization".to_string(), + "Bearer ${SAFE_OUTPUTS_API_KEY}".to_string(), + )])), + env: None, + tools: None, + }, + ); + + // Add extension-contributed MCPG server entries (e.g., azure-devops) + for ext in extensions { + for (name, config) in ext.mcpg_servers(ctx)? { + mcp_servers.insert(name, config); + } + } + + for (name, config) in &front_matter.mcp_servers { + // Prevent user-defined MCPs from overwriting the reserved safeoutputs backend + if name.eq_ignore_ascii_case("safeoutputs") { + log::warn!( + "MCP name 'safeoutputs' is reserved for the safe outputs HTTP backend — skipping" + ); + continue; + } + + // Skip if already auto-configured by an extension (e.g., tools.azure-devops) + if mcp_servers.contains_key(name) { + continue; + } + + let (is_enabled, options) = match config { + McpConfig::Enabled(enabled) => (*enabled, None), + McpConfig::WithOptions(opts) => (opts.enabled.unwrap_or(true), Some(opts)), + }; + + if !is_enabled { + continue; + } + + if let Some(opts) = options { + if opts.container.is_some() && opts.url.is_some() { + log::warn!( + "MCP '{}': both 'container' and 'url' are set — using 'container' (stdio). \ + Remove 'url' to silence this warning.", + name + ); + } + + if let Some(container) = &opts.container { + // Container-based stdio MCP (MCPG-native, per spec §3.2.1) + for w in validate_container_image(container, name) { eprintln!("{}", w); } + // Validate mount paths for sensitive host directories + for mount in &opts.mounts { + for w in validate_mount_source(mount, name) { eprintln!("{}", w); } + } + // Validate Docker runtime args for privilege escalation + for w in validate_docker_args(&opts.args, name) { eprintln!("{}", w); } + // Warn about potential inline secrets (check headers too in case user set both) + for w in warn_potential_secrets(name, &opts.env, &opts.headers) { eprintln!("{}", w); } + let entrypoint_args = if opts.entrypoint_args.is_empty() { + None + } else { + Some(opts.entrypoint_args.clone()) + }; + let args = if opts.args.is_empty() { + None + } else { + Some(opts.args.clone()) + }; + let mounts = if opts.mounts.is_empty() { + None + } else { + Some(opts.mounts.clone()) + }; + let env = if opts.env.is_empty() { + None + } else { + Some(opts.env.clone()) + }; + let tools = if opts.allowed.is_empty() { + None + } else { + Some(opts.allowed.clone()) + }; + mcp_servers.insert( + name.clone(), + McpgServerConfig { + server_type: "stdio".to_string(), + container: Some(container.clone()), + entrypoint: opts.entrypoint.clone(), + entrypoint_args, + mounts, + args, + url: None, + headers: None, + env, + tools, + }, + ); + } else if let Some(url) = &opts.url { + // HTTP-based MCP (remote server) + for w in validate_mcp_url(url, name) { eprintln!("{}", w); } + // Warn about potential inline secrets in headers + for w in warn_potential_secrets(name, &HashMap::new(), &opts.headers) { eprintln!("{}", w); } + if !opts.env.is_empty() { + eprintln!( + "Warning: MCP '{}': env vars are not supported for HTTP MCPs — they will be ignored. \ + Use headers for authentication instead.", + name + ); + } + let headers = if opts.headers.is_empty() { + None + } else { + Some(opts.headers.clone()) + }; + let tools = if opts.allowed.is_empty() { + None + } else { + Some(opts.allowed.clone()) + }; + mcp_servers.insert( + name.clone(), + McpgServerConfig { + server_type: "http".to_string(), + container: None, + entrypoint: None, + entrypoint_args: None, + mounts: None, + args: None, + url: Some(url.clone()), + headers, + env: None, + tools, + }, + ); + } else { + log::warn!("MCP '{}' has no container or url — skipping", name); + continue; + } + } else { + log::warn!("MCP '{}' has no container or url — skipping", name); + } + } + + Ok(McpgConfig { + mcp_servers, + gateway: McpgGatewayConfig { + port: MCPG_PORT, + domain: "host.docker.internal".to_string(), + api_key: "${MCP_GATEWAY_API_KEY}".to_string(), + payload_dir: "/tmp/gh-aw/mcp-payloads".to_string(), + }, + }) +} + +/// Generate additional `-e` flags for the MCPG Docker run command. +/// +/// MCP containers spawned by MCPG may need environment variables that flow from +/// the pipeline through the MCPG container (passthrough). This function: +/// 1. Auto-maps `AZURE_DEVOPS_EXT_PAT` from `SC_READ_TOKEN` when `permissions.read` is configured +/// 2. Collects passthrough env vars (value is `""`) from container-based MCP configs +/// +/// Only container-based MCPs are considered — HTTP MCPs don't have child containers +/// that need env passthrough. +/// +/// Returns flags formatted for inline insertion in the `docker run` command. +/// The marker sits after the last hardcoded `-e` flag, so the output must +/// include leading `\\\n` for line continuation when non-empty. +pub fn generate_mcpg_docker_env(front_matter: &FrontMatter) -> String { + let mut env_flags: Vec = Vec::new(); + let mut seen: HashSet = HashSet::new(); + + // Check if any container MCP requests AZURE_DEVOPS_EXT_PAT passthrough + let any_mcp_needs_ado_token = front_matter.mcp_servers.values().any(|config| { + matches!(config, McpConfig::WithOptions(opts) + if opts.enabled.unwrap_or(true) + && opts.container.is_some() + && opts.env.contains_key("AZURE_DEVOPS_EXT_PAT")) + }); + + // Also check if tools.azure-devops is enabled (auto-configured ADO MCP always needs token) + let ado_tool_needs_token = front_matter + .tools + .as_ref() + .and_then(|t| t.azure_devops.as_ref()) + .is_some_and(|ado| ado.is_enabled()); + + // Auto-map AZURE_DEVOPS_EXT_PAT from SC_READ_TOKEN when permissions.read is configured + // AND at least one container MCP requests it via env passthrough (or the ADO tool is enabled) + if any_mcp_needs_ado_token || ado_tool_needs_token { + if front_matter.permissions.as_ref().and_then(|p| p.read.as_ref()).is_some() { + env_flags.push( + "-e AZURE_DEVOPS_EXT_PAT=\"$(SC_READ_TOKEN)\"".to_string(), + ); + seen.insert("AZURE_DEVOPS_EXT_PAT".to_string()); + } else { + eprintln!( + "Warning: one or more container MCPs request AZURE_DEVOPS_EXT_PAT passthrough \ + but permissions.read is not configured. The token will be empty at runtime. \ + Add `permissions: {{ read: }}` to enable auto-mapping." + ); + } + } + + // Collect passthrough env vars from container-based MCP configs only. + // HTTP MCPs don't have child containers — env passthrough doesn't apply. + for (mcp_name, config) in &front_matter.mcp_servers { + let opts = match config { + McpConfig::WithOptions(opts) if opts.enabled.unwrap_or(true) => opts, + _ => continue, + }; + + // Only container-based MCPs need env passthrough on the MCPG Docker run + if opts.container.is_none() { + continue; + } + + for (var_name, var_value) in &opts.env { + // Validate env var name to prevent Docker flag injection (e.g. "X --privileged") + if !is_valid_env_var_name(var_name) { + log::warn!( + "MCP '{}': skipping invalid env var name '{}' — must match [A-Za-z_][A-Za-z0-9_]*", + mcp_name, var_name + ); + continue; + } + if seen.contains(var_name) { + continue; + } + // Passthrough: empty string means forward from host/pipeline environment + if var_value.is_empty() { + env_flags.push(format!("-e {}", var_name)); + seen.insert(var_name.clone()); + } + } + } + + env_flags.sort(); + if env_flags.is_empty() { + // No extra flags — emit a lone `\` so the bash line continuation from the + // preceding `-e MCP_GATEWAY_API_KEY=...` flag connects to the image name on + // the next line. This is valid bash: a backslash at end-of-line continues + // the command. replace_with_indent preserves this on its own indented line. + "\\".to_string() + } else { + // Emit each flag on its own line with `\` continuation. + // replace_with_indent handles indentation from the template (base.yml), + // so we only emit the content without hardcoded spaces. + let flags = env_flags.join(" \\\n"); + format!("{} \\", flags) + } +} + +// ==================== Domain allowlist ==================== + +/// Generate the allowed domains list for AWF network isolation. +/// +/// This generates a comma-separated list of domain patterns for AWF's +/// `--allow-domains` flag. The list includes: +/// 1. Core Azure DevOps/GitHub endpoints +/// 2. MCP-specific endpoints for each enabled MCP +/// 3. User-specified additional hosts from network.allowed +pub fn generate_allowed_domains( + front_matter: &FrontMatter, + extensions: &[super::extensions::Extension], +) -> Result { + // Collect enabled MCP names (user-defined MCPs, not first-party tools) + let enabled_mcps: Vec = front_matter + .mcp_servers + .iter() + .filter_map(|(name, config)| { + let is_enabled = match config { + McpConfig::Enabled(enabled) => *enabled, + McpConfig::WithOptions(_) => true, + }; + if is_enabled { Some(name.clone()) } else { None } + }) + .collect(); + + // Get user-specified hosts + let user_hosts: Vec = front_matter + .network + .as_ref() + .map(|n| n.allowed.clone()) + .unwrap_or_default(); + + // Generate the allowlist by combining core + MCP + extension + user hosts + let mut hosts: HashSet = HashSet::new(); + + // Add core hosts + for host in CORE_ALLOWED_HOSTS { + hosts.insert((*host).to_string()); + } + + // Add host.docker.internal — required for the AWF container to reach + // MCPG and SafeOutputs on the host. + hosts.insert("host.docker.internal".to_string()); + + // Add MCP-specific hosts (user-defined MCPs via mcp_required_hosts lookup) + for mcp in &enabled_mcps { + for host in mcp_required_hosts(mcp) { + hosts.insert((*host).to_string()); + } + } + + // Add extension-declared hosts (runtimes + first-party tools). + // Extensions may return ecosystem identifiers (e.g., "lean") which are + // expanded to their domain lists, or raw domain names. + for ext in extensions { + for host in ext.required_hosts() { + if is_ecosystem_identifier(&host) { + let domains = get_ecosystem_domains(&host); + if domains.is_empty() { + eprintln!( + "warning: extension '{}' requires unknown ecosystem '{}'; \ + no domains added", + ext.name(), + host + ); + } + for domain in domains { + hosts.insert(domain); + } + } else { + hosts.insert(host); + } + } + } + + // Add user-specified hosts (validated against DNS-safe characters) + // Entries may be ecosystem identifiers (e.g., "python", "rust") which + // expand to their domain lists, or raw domain names. + for host in &user_hosts { + if is_ecosystem_identifier(host) { + let domains = get_ecosystem_domains(host); + if domains.is_empty() && !is_known_ecosystem(host) { + eprintln!( + "warning: network.allowed contains unknown ecosystem identifier '{}'. \ + Known ecosystems: python, rust, node, go, java, etc. \ + If this is a domain name, it should contain a dot.", + host + ); + } + for domain in domains { + hosts.insert(domain); + } + } else { + let valid_chars = !host.is_empty() + && host + .chars() + .all(|c| c.is_ascii_alphanumeric() || matches!(c, '.' | '-' | '*')); + if !valid_chars { + anyhow::bail!( + "network.allowed domain '{}' contains characters invalid in DNS names. \ + Only ASCII alphanumerics, '.', '-', and '*' are allowed.", + host + ); + } + if host.contains('*') && !(host.starts_with("*.") && !host[2..].contains('*')) { + anyhow::bail!( + "network.allowed domain '{}' uses '*' in an unsupported position. \ + Wildcards must appear only as a leading prefix (e.g. '*.example.com').", + host + ); + } + hosts.insert(host.clone()); + } + } + + // Remove blocked hosts (supports both ecosystem identifiers and raw domains) + let blocked_hosts: Vec = front_matter + .network + .as_ref() + .map(|n| n.blocked.clone()) + .unwrap_or_default(); + for blocked in &blocked_hosts { + if is_ecosystem_identifier(blocked) { + for domain in get_ecosystem_domains(blocked) { + hosts.remove(&domain); + } + } else { + hosts.remove(blocked); + } + } + + // Sort for deterministic output + let mut allowlist: Vec = hosts.into_iter().collect(); + allowlist.sort(); + + // Format as comma-separated list for AWF --allow-domains + Ok(allowlist.join(",")) +} + +// ==================== Shared compile flow ==================== + +/// Target-specific overrides for the shared compile flow. +pub struct CompileConfig { + /// The base YAML template content (the template string itself). + pub template: String, + /// Additional placeholder→value replacements beyond the shared set. + /// These are applied **before** the shared replacements, allowing + /// target-specific overrides of shared markers (e.g., 1ES-specific + /// setup/teardown jobs that differ from the standalone defaults). + pub extra_replacements: Vec<(String, String)>, +} + +/// Shared compilation flow used by both standalone and 1ES compilers. +/// +/// This function handles the common pipeline compilation steps: +/// 1. Validates front matter +/// 2. Generates all shared placeholder values +/// 3. Runs extension validations +/// 4. Applies replacements to the template +/// 5. Prepends the header comment +/// +/// Target-specific values are provided via `CompileConfig.extra_replacements`, +/// which are applied before the shared replacements so that targets can +/// override shared markers (e.g., `{{ setup_job }}`, `{{ teardown_job }}`). +pub async fn compile_shared( + input_path: &Path, + output_path: &Path, + front_matter: &FrontMatter, + markdown_body: &str, + extensions: &[Extension], + ctx: &CompileContext<'_>, + config: CompileConfig, +) -> Result { + // 1. Validate + validate_front_matter_identity(front_matter)?; + + // 2. Generate schedule + let schedule = match &front_matter.schedule { + Some(s) => generate_schedule(&front_matter.name, s) + .with_context(|| format!("Failed to parse schedule '{}'", s.expression()))?, + None => String::new(), + }; + + let repositories = generate_repositories(&front_matter.repositories); + let checkout_steps = generate_checkout_steps(&front_matter.checkout); + let checkout_self = generate_checkout_self(); + let agent_name = sanitize_filename(&front_matter.name); + + // 3. Run extension validations + for ext in extensions { + for warning in ext.validate(ctx)? { + eprintln!("Warning: {}", warning); + } + } + + // 4. Generate copilot params + let copilot_params = generate_copilot_params(front_matter, extensions)?; + + // 5. Compute workspace, working directory, triggers + let effective_workspace = compute_effective_workspace( + &front_matter.workspace, + &front_matter.checkout, + &front_matter.name, + ); + let working_directory = generate_working_directory(&effective_workspace); + let pipeline_resources = generate_pipeline_resources(&front_matter.triggers)?; + let has_schedule = front_matter.schedule.is_some(); + let pr_trigger = generate_pr_trigger(&front_matter.triggers, has_schedule); + let ci_trigger = generate_ci_trigger(&front_matter.triggers, has_schedule); + + // 6. Generate source path and pipeline path + let source_path = generate_source_path(input_path); + let pipeline_path = generate_pipeline_path(output_path); + + // 7. Pool name + let pool = front_matter + .pool + .as_ref() + .map(|p| p.name().to_string()) + .unwrap_or_else(|| DEFAULT_POOL.to_string()); + + // 8. Setup/teardown jobs, parameters, prepare/finalize steps + let setup_job = generate_setup_job(&front_matter.setup, &front_matter.name, &pool); + let teardown_job = generate_teardown_job(&front_matter.teardown, &front_matter.name, &pool); + let has_memory = front_matter + .tools + .as_ref() + .and_then(|t| t.cache_memory.as_ref()) + .is_some_and(|cm| cm.is_enabled()); + let parameters = build_parameters(&front_matter.parameters, has_memory); + let parameters_yaml = generate_parameters(¶meters)?; + let prepare_steps = generate_prepare_steps(&front_matter.steps, extensions)?; + let finalize_steps = generate_finalize_steps(&front_matter.post_steps); + let agentic_depends_on = generate_agentic_depends_on(&front_matter.setup); + let job_timeout = generate_job_timeout(front_matter); + + // 9. Token acquisition and env vars + let acquire_read_token = generate_acquire_ado_token( + front_matter + .permissions + .as_ref() + .and_then(|p| p.read.as_deref()), + "SC_READ_TOKEN", + ); + let copilot_ado_env = generate_copilot_ado_env( + front_matter + .permissions + .as_ref() + .and_then(|p| p.read.as_deref()), + ); + let acquire_write_token = generate_acquire_ado_token( + front_matter + .permissions + .as_ref() + .and_then(|p| p.write.as_deref()), + "SC_WRITE_TOKEN", + ); + let executor_ado_env = generate_executor_ado_env( + front_matter + .permissions + .as_ref() + .and_then(|p| p.write.as_deref()), + ); + + // 10. Validations + validate_write_permissions(front_matter)?; + validate_comment_target(front_matter)?; + validate_update_work_item_target(front_matter)?; + validate_submit_pr_review_events(front_matter)?; + validate_update_pr_votes(front_matter)?; + validate_resolve_pr_thread_statuses(front_matter)?; + + // 11. Threat analysis prompt + let threat_analysis_prompt = include_str!("../../templates/threat-analysis.md"); + let template = replace_with_indent( + &config.template, + "{{ threat_analysis_prompt }}", + threat_analysis_prompt, + ); + + // 12. Apply extra replacements first (target-specific overrides) + // These run before shared replacements so targets can override shared + // markers like {{ setup_job }} and {{ teardown_job }}. + let mut template = template; + for (placeholder, replacement) in &config.extra_replacements { + template = replace_with_indent(&template, placeholder, replacement); + } + + // 13. Shared replacements + let compiler_version = env!("CARGO_PKG_VERSION"); + let replacements: Vec<(&str, &str)> = vec![ + ("{{ parameters }}", ¶meters_yaml), + ("{{ compiler_version }}", compiler_version), + ("{{ copilot_version }}", COPILOT_CLI_VERSION), + ("{{ pool }}", &pool), + ("{{ setup_job }}", &setup_job), + ("{{ teardown_job }}", &teardown_job), + ("{{ prepare_steps }}", &prepare_steps), + ("{{ finalize_steps }}", &finalize_steps), + ("{{ agentic_depends_on }}", &agentic_depends_on), + ("{{ job_timeout }}", &job_timeout), + ("{{ repositories }}", &repositories), + ("{{ schedule }}", &schedule), + ("{{ pipeline_resources }}", &pipeline_resources), + ("{{ pr_trigger }}", &pr_trigger), + ("{{ ci_trigger }}", &ci_trigger), + ("{{ checkout_self }}", &checkout_self), + ("{{ checkout_repositories }}", &checkout_steps), + ("{{ agent }}", &agent_name), + ("{{ agent_name }}", &front_matter.name), + ("{{ agent_description }}", &front_matter.description), + ("{{ copilot_params }}", &copilot_params), + ("{{ source_path }}", &source_path), + ("{{ pipeline_path }}", &pipeline_path), + ("{{ working_directory }}", &working_directory), + ("{{ workspace }}", &working_directory), + ("{{ agent_content }}", markdown_body), + ("{{ acquire_ado_token }}", &acquire_read_token), + ("{{ copilot_ado_env }}", &copilot_ado_env), + ("{{ acquire_write_token }}", &acquire_write_token), + ("{{ executor_ado_env }}", &executor_ado_env), + ]; + + let mut pipeline_yaml = replacements + .into_iter() + .fold(template, |yaml, (placeholder, replacement)| { + replace_with_indent(&yaml, placeholder, replacement) + }); + + // 14. Prepend header + let header = generate_header_comment(input_path); + Ok(format!("{}{}", header, pipeline_yaml)) +} + #[cfg(test)] mod tests { use super::*; use crate::compile::types::{McpConfig, McpOptions, Repository}; + use crate::compile::extensions::{CompileContext, collect_extensions}; + use std::collections::HashMap; /// Helper: create a minimal FrontMatter by parsing YAML fn minimal_front_matter() -> FrontMatter { @@ -2570,4 +3451,981 @@ mod tests { assert!(result.contains("project: 'My''Project'")); assert!(result.contains("- 'it''s-branch'")); } + + // ─── generate_prepare_steps ────────────────────────────────────────────── + + #[test] + fn test_generate_prepare_steps_with_memory_includes_memory_preamble() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n cache-memory: true\n---\n", + ).unwrap(); + let exts = crate::compile::extensions::collect_extensions(&fm); + let result = generate_prepare_steps(&[], &exts).unwrap(); + assert!( + !result.is_empty(), + "memory steps must be emitted when cache-memory enabled" + ); + assert!( + result.contains("agent_memory"), + "should reference memory directory" + ); + } + + #[test] + fn test_generate_prepare_steps_without_memory_and_no_steps_is_empty() { + let fm = minimal_front_matter(); + let exts = crate::compile::extensions::collect_extensions(&fm); + let result = generate_prepare_steps(&[], &exts).unwrap(); + assert!(result.is_empty(), "no steps and no memory should produce empty output"); + } + + #[test] + fn test_generate_prepare_steps_with_memory_includes_download_and_prompt() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n cache-memory: true\n---\n", + ).unwrap(); + let exts = crate::compile::extensions::collect_extensions(&fm); + let result = generate_prepare_steps(&[], &exts).unwrap(); + assert!( + result.contains("DownloadPipelineArtifact"), + "memory steps must include the artifact download task" + ); + assert!( + result.contains("Agent Memory"), + "memory steps must include the memory prompt" + ); + } + + #[test] + fn test_generate_prepare_steps_without_memory_with_user_steps() { + let fm = minimal_front_matter(); + let exts = crate::compile::extensions::collect_extensions(&fm); + let step: serde_yaml::Value = + serde_yaml::from_str("bash: echo hello\ndisplayName: greet").unwrap(); + let result = generate_prepare_steps(&[step], &exts).unwrap(); + assert!(!result.is_empty(), "user steps should be present"); + assert!( + !result.contains("agent_memory"), + "no memory reference when cache-memory not enabled" + ); + } + + #[test] + fn test_generate_prepare_steps_with_memory_and_user_steps() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n cache-memory: true\n---\n", + ).unwrap(); + let exts = crate::compile::extensions::collect_extensions(&fm); + let step: serde_yaml::Value = + serde_yaml::from_str("bash: echo hello\ndisplayName: greet").unwrap(); + let result = generate_prepare_steps(&[step], &exts).unwrap(); + assert!( + result.contains("agent_memory"), + "memory reference must be present" + ); + assert!( + result.contains("echo hello"), + "user step must also be present" + ); + } + + #[test] + fn test_generate_prepare_steps_with_lean() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\nruntimes:\n lean: true\n---\n", + ).unwrap(); + let exts = crate::compile::extensions::collect_extensions(&fm); + let result = generate_prepare_steps(&[], &exts).unwrap(); + assert!(result.contains("elan-init.sh"), "should include elan installer"); + assert!(result.contains("Lean 4"), "should include Lean prompt"); + assert!(result.contains("--default-toolchain stable"), "should default to stable"); + assert!(result.contains("/tmp/awf-tools/"), "should symlink into awf-tools for AWF chroot"); + } + + #[test] + fn test_generate_prepare_steps_with_lean_custom_toolchain() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\nruntimes:\n lean:\n toolchain: \"leanprover/lean4:v4.29.1\"\n---\n", + ).unwrap(); + let exts = crate::compile::extensions::collect_extensions(&fm); + let result = generate_prepare_steps(&[], &exts).unwrap(); + assert!( + result.contains("--default-toolchain leanprover/lean4:v4.29.1"), + "should use specified toolchain" + ); + } + + #[test] + fn test_generate_prepare_steps_with_lean_and_memory() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\nruntimes:\n lean: true\ntools:\n cache-memory: true\n---\n", + ).unwrap(); + let exts = crate::compile::extensions::collect_extensions(&fm); + let result = generate_prepare_steps(&[], &exts).unwrap(); + assert!(result.contains("agent_memory"), "memory steps present"); + assert!(result.contains("elan-init.sh"), "lean install present"); + assert!(result.contains("Lean 4"), "lean prompt present"); + } + + // ═══════════════════════════════════════════════════════════════════════ + // Tests moved from standalone.rs — MCPG config, docker env, validation + // ═══════════════════════════════════════════════════════════════════════ + + #[test] + fn test_generate_firewall_config_custom_mcp() { + let mut fm = minimal_front_matter(); + fm.mcp_servers.insert( + "my-tool".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("node:20-slim".to_string()), + entrypoint: Some("node".to_string()), + entrypoint_args: vec!["server.js".to_string()], + allowed: vec!["do_thing".to_string()], + ..Default::default() + }), + ); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let server = config.mcp_servers.get("my-tool").unwrap(); + assert_eq!(server.server_type, "stdio"); + assert_eq!(server.container.as_ref().unwrap(), "node:20-slim"); + assert_eq!(server.entrypoint.as_ref().unwrap(), "node"); + assert_eq!( + server.entrypoint_args.as_ref().unwrap(), + &vec!["server.js"] + ); + assert_eq!( + server.tools.as_ref().unwrap(), + &vec!["do_thing".to_string()] + ); + } + + #[test] + fn test_generate_mcpg_config_mcp_without_transport_skipped() { + let mut fm = minimal_front_matter(); + // An MCP with no container or url should be skipped + fm.mcp_servers + .insert("phantom".to_string(), McpConfig::Enabled(true)); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + assert!(!config.mcp_servers.contains_key("phantom")); + // safeoutputs is always present + assert!(config.mcp_servers.contains_key("safeoutputs")); + } + + #[test] + fn test_generate_mcpg_config_disabled_mcp_skipped() { + let mut fm = minimal_front_matter(); + fm.mcp_servers + .insert("my-tool".to_string(), McpConfig::Enabled(false)); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + assert!(!config.mcp_servers.contains_key("my-tool")); + } + + #[test] + fn test_generate_mcpg_config_empty_mcp_servers() { + let fm = minimal_front_matter(); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + // Only safeoutputs should be present + assert_eq!(config.mcp_servers.len(), 1); + assert!(config.mcp_servers.contains_key("safeoutputs")); + } + + #[test] + fn test_generate_mcpg_config_gateway_defaults() { + let fm = minimal_front_matter(); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + assert_eq!(config.gateway.port, 80); + assert_eq!(config.gateway.domain, "host.docker.internal"); + assert_eq!(config.gateway.api_key, "${MCP_GATEWAY_API_KEY}"); + assert_eq!(config.gateway.payload_dir, "/tmp/gh-aw/mcp-payloads"); + } + + #[test] + fn test_generate_mcpg_config_json_roundtrip() { + let mut fm = minimal_front_matter(); + fm.mcp_servers.insert( + "my-tool".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("python:3.12-slim".to_string()), + entrypoint: Some("python".to_string()), + entrypoint_args: vec!["-m".to_string(), "server".to_string()], + allowed: vec!["query".to_string()], + ..Default::default() + }), + ); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let json = serde_json::to_string_pretty(&config).expect("Config should serialize to JSON"); + let parsed: serde_json::Value = + serde_json::from_str(&json).expect("Serialized JSON should parse back"); + + // Verify top-level structure matches MCPG expectation + assert!( + parsed.get("mcpServers").is_some(), + "Should have mcpServers key" + ); + assert!(parsed.get("gateway").is_some(), "Should have gateway key"); + + let gw = parsed.get("gateway").unwrap(); + assert!(gw.get("port").is_some(), "Gateway should have port"); + assert!(gw.get("domain").is_some(), "Gateway should have domain"); + assert!(gw.get("apiKey").is_some(), "Gateway should have apiKey"); + assert!( + gw.get("payloadDir").is_some(), + "Gateway should have payloadDir" + ); + } + + #[test] + fn test_generate_mcpg_config_safeoutputs_variable_placeholders() { + let fm = minimal_front_matter(); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let so = config.mcp_servers.get("safeoutputs").unwrap(); + + // URL should reference the runtime-substituted port + let url = so.url.as_ref().unwrap(); + assert!( + url.contains("${SAFE_OUTPUTS_PORT}"), + "SafeOutputs URL should use ${{SAFE_OUTPUTS_PORT}} placeholder, got: {url}" + ); + + // Auth header should reference the runtime-substituted API key + let headers = so.headers.as_ref().unwrap(); + let auth = headers.get("Authorization").unwrap(); + assert!( + auth.contains("${SAFE_OUTPUTS_API_KEY}"), + "SafeOutputs auth header should use ${{SAFE_OUTPUTS_API_KEY}} placeholder, got: {auth}" + ); + } + + #[test] + fn test_generate_mcpg_config_safeoutputs_is_http_type() { + let fm = minimal_front_matter(); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let so = config.mcp_servers.get("safeoutputs").unwrap(); + assert_eq!(so.server_type, "http"); + assert!( + so.container.is_none(), + "HTTP backend should have no container" + ); + assert!(so.args.is_none(), "HTTP backend should have no args"); + assert!(so.url.is_some(), "HTTP backend must have a URL"); + } + + #[test] + fn test_generate_mcpg_config_container_mcp_is_stdio_type() { + let mut fm = minimal_front_matter(); + fm.mcp_servers.insert( + "runner".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("node:20-slim".to_string()), + entrypoint: Some("node".to_string()), + entrypoint_args: vec!["srv.js".to_string()], + allowed: vec!["run".to_string()], + ..Default::default() + }), + ); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let srv = config.mcp_servers.get("runner").unwrap(); + assert_eq!(srv.server_type, "stdio"); + assert!( + srv.container.is_some(), + "stdio server must have a container" + ); + assert!(srv.url.is_none(), "stdio server should have no URL"); + } + + #[test] + fn test_generate_mcpg_config_container_with_env() { + let mut fm = minimal_front_matter(); + let mut env = HashMap::new(); + env.insert("TOKEN".to_string(), "secret".to_string()); + fm.mcp_servers.insert( + "with-env".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("node:20-slim".to_string()), + env, + ..Default::default() + }), + ); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let srv = config.mcp_servers.get("with-env").unwrap(); + let e = srv.env.as_ref().unwrap(); + assert_eq!(e.get("TOKEN").unwrap(), "secret"); + } + + #[test] + fn test_generate_mcpg_config_reserved_safeoutputs_name_rejected() { + let mut fm = minimal_front_matter(); + fm.mcp_servers.insert( + "safeoutputs".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("evil:latest".to_string()), + ..Default::default() + }), + ); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + // The reserved entry should still be the HTTP backend, not the user's container + let so = config.mcp_servers.get("safeoutputs").unwrap(); + assert_eq!( + so.server_type, "http", + "safeoutputs should remain HTTP backend" + ); + assert!( + so.container.is_none(), + "User container should not overwrite safeoutputs" + ); + } + + #[test] + fn test_generate_mcpg_config_safeoutputs_reserved_name_skipped() { + let mut fm = minimal_front_matter(); + fm.mcp_servers.insert( + "SafeOutputs".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("node:20-slim".to_string()), + entrypoint: Some("node".to_string()), + entrypoint_args: vec!["evil.js".to_string()], + allowed: vec!["hijack".to_string()], + ..Default::default() + }), + ); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + // The user-defined "SafeOutputs" must not overwrite the built-in entry + let so = config.mcp_servers.get("safeoutputs").unwrap(); + assert_eq!(so.server_type, "http"); + assert!(so.url.as_ref().unwrap().contains("localhost")); + // No stdio entry should have been added under any casing + assert_eq!(config.mcp_servers.len(), 1); + } + + #[test] + fn test_generate_mcpg_config_http_mcp() { + let mut fm = minimal_front_matter(); + fm.mcp_servers.insert( + "remote".to_string(), + McpConfig::WithOptions(McpOptions { + url: Some("https://mcp.example.com/api".to_string()), + headers: { + let mut h = HashMap::new(); + h.insert("X-Custom".to_string(), "value".to_string()); + h + }, + allowed: vec!["query".to_string()], + ..Default::default() + }), + ); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let srv = config.mcp_servers.get("remote").unwrap(); + assert_eq!(srv.server_type, "http"); + assert_eq!( + srv.url.as_ref().unwrap(), + "https://mcp.example.com/api" + ); + assert_eq!( + srv.headers.as_ref().unwrap().get("X-Custom").unwrap(), + "value" + ); + assert!(srv.container.is_none(), "HTTP server should have no container"); + } + + #[test] + fn test_generate_mcpg_config_container_with_entrypoint() { + let mut fm = minimal_front_matter(); + fm.mcp_servers.insert( + "ado".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("node:20-slim".to_string()), + entrypoint: Some("npx".to_string()), + entrypoint_args: vec!["-y".to_string(), "@azure-devops/mcp".to_string()], + ..Default::default() + }), + ); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let srv = config.mcp_servers.get("ado").unwrap(); + assert_eq!(srv.server_type, "stdio"); + assert_eq!(srv.container.as_ref().unwrap(), "node:20-slim"); + assert_eq!(srv.entrypoint.as_ref().unwrap(), "npx"); + assert_eq!( + srv.entrypoint_args.as_ref().unwrap(), + &vec!["-y", "@azure-devops/mcp"] + ); + } + + #[test] + fn test_generate_mcpg_config_container_with_mounts() { + let mut fm = minimal_front_matter(); + fm.mcp_servers.insert( + "data-tool".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("data-tool:latest".to_string()), + mounts: vec!["/host/data:/app/data:ro".to_string()], + ..Default::default() + }), + ); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let srv = config.mcp_servers.get("data-tool").unwrap(); + assert_eq!( + srv.mounts.as_ref().unwrap(), + &vec!["/host/data:/app/data:ro"] + ); + } + + #[test] + fn test_generate_mcpg_config_no_transport_skipped() { + let mut fm = minimal_front_matter(); + // MCP with options but no container or url should be skipped + fm.mcp_servers.insert( + "no-transport".to_string(), + McpConfig::WithOptions(McpOptions { + allowed: vec!["tool".to_string()], + ..Default::default() + }), + ); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + assert!(!config.mcp_servers.contains_key("no-transport")); + } + + #[test] + fn test_generate_mcpg_docker_env_with_permissions_read() { + let mut fm = minimal_front_matter(); + fm.permissions = Some(crate::compile::types::PermissionsConfig { + read: Some("my-read-sc".to_string()), + write: None, + }); + // A container MCP must request AZURE_DEVOPS_EXT_PAT for the auto-map to trigger + fm.mcp_servers.insert( + "ado-tool".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("node:20-slim".to_string()), + env: { + let mut e = HashMap::new(); + e.insert("AZURE_DEVOPS_EXT_PAT".to_string(), "".to_string()); + e + }, + ..Default::default() + }), + ); + let env = generate_mcpg_docker_env(&fm); + assert!( + env.contains("-e AZURE_DEVOPS_EXT_PAT=\"$(SC_READ_TOKEN)\""), + "Should auto-map ADO token when permissions.read is set and MCP requests it" + ); + } + + #[test] + fn test_generate_mcpg_docker_env_permissions_read_no_mcp_request() { + let mut fm = minimal_front_matter(); + fm.permissions = Some(crate::compile::types::PermissionsConfig { + read: Some("my-read-sc".to_string()), + write: None, + }); + // No MCP requests AZURE_DEVOPS_EXT_PAT — auto-map should NOT trigger + fm.mcp_servers.insert( + "unrelated-tool".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("node:20-slim".to_string()), + ..Default::default() + }), + ); + let env = generate_mcpg_docker_env(&fm); + assert!( + !env.contains("AZURE_DEVOPS_EXT_PAT"), + "Should NOT auto-map ADO token when no MCP requests it" + ); + } + + #[test] + fn test_generate_mcpg_docker_env_dedup_auto_map_and_passthrough() { + // When permissions.read is set AND MCP has AZURE_DEVOPS_EXT_PAT: "", + // the auto-mapped form (with SC_READ_TOKEN) should win — no duplicate + let mut fm = minimal_front_matter(); + fm.permissions = Some(crate::compile::types::PermissionsConfig { + read: Some("my-read-sc".to_string()), + write: None, + }); + fm.mcp_servers.insert( + "ado-tool".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("node:20-slim".to_string()), + env: { + let mut e = HashMap::new(); + e.insert("AZURE_DEVOPS_EXT_PAT".to_string(), "".to_string()); + e + }, + ..Default::default() + }), + ); + let env = generate_mcpg_docker_env(&fm); + // Should have the SC_READ_TOKEN form (auto-mapped), not bare passthrough + assert!( + env.contains("-e AZURE_DEVOPS_EXT_PAT=\"$(SC_READ_TOKEN)\""), + "Auto-mapped form should be present" + ); + // Should appear exactly once + let count = env.matches("AZURE_DEVOPS_EXT_PAT").count(); + assert_eq!(count, 1, "AZURE_DEVOPS_EXT_PAT should appear exactly once, got {}", count); + } + + #[test] + fn test_generate_mcpg_docker_env_without_permissions() { + let fm = minimal_front_matter(); + let env = generate_mcpg_docker_env(&fm); + assert!( + !env.contains("AZURE_DEVOPS_EXT_PAT"), + "Should not map ADO token when permissions.read is not set" + ); + } + + #[test] + fn test_generate_mcpg_docker_env_passthrough_vars() { + let mut fm = minimal_front_matter(); + fm.mcp_servers.insert( + "tool".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("img:latest".to_string()), + env: { + let mut e = HashMap::new(); + e.insert("PASS_THROUGH".to_string(), "".to_string()); + e.insert("STATIC".to_string(), "value".to_string()); + e + }, + ..Default::default() + }), + ); + let env = generate_mcpg_docker_env(&fm); + assert!(env.contains("-e PASS_THROUGH"), "Should include passthrough var"); + assert!(!env.contains("-e STATIC"), "Should NOT include static var"); + } + + #[test] + fn test_generate_mcpg_docker_env_rejects_invalid_names() { + let mut fm = minimal_front_matter(); + fm.mcp_servers.insert( + "evil".to_string(), + McpConfig::WithOptions(McpOptions { + container: Some("img:latest".to_string()), + env: { + let mut e = HashMap::new(); + // Injection attempt: env var name with Docker flag + e.insert("MY_VAR --privileged".to_string(), "".to_string()); + // Valid env var for comparison + e.insert("GOOD_VAR".to_string(), "".to_string()); + e + }, + ..Default::default() + }), + ); + let env = generate_mcpg_docker_env(&fm); + assert!( + !env.contains("--privileged"), + "Should reject invalid env var name with Docker flag injection" + ); + assert!( + env.contains("-e GOOD_VAR"), + "Should include valid env var" + ); + } + + #[test] + fn test_is_valid_env_var_name() { + assert!(is_valid_env_var_name("MY_VAR")); + assert!(is_valid_env_var_name("_PRIVATE")); + assert!(is_valid_env_var_name("A")); + assert!(is_valid_env_var_name("VAR123")); + assert!(!is_valid_env_var_name("")); + assert!(!is_valid_env_var_name("123ABC")); + assert!(!is_valid_env_var_name("MY-VAR")); + assert!(!is_valid_env_var_name("MY VAR")); + assert!(!is_valid_env_var_name("X --privileged")); + assert!(!is_valid_env_var_name("X -v /etc:/etc:rw")); + } + + // ─── tools.azure-devops MCPG integration ──────────────────────────────── + + #[test] + fn test_ado_tool_generates_mcpg_entry() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops: true\n---\n", + ) + .unwrap(); + // Pass inferred org since no explicit org is set + let config = generate_mcpg_config(&fm, &CompileContext::for_test_with_org(&fm, "inferred-org"), &collect_extensions(&fm)).unwrap(); + let ado = config.mcp_servers.get("azure-devops").unwrap(); + assert_eq!(ado.server_type, "stdio"); + assert_eq!(ado.container.as_deref(), Some(ADO_MCP_IMAGE)); + assert_eq!(ado.entrypoint.as_deref(), Some(ADO_MCP_ENTRYPOINT)); + let args = ado.entrypoint_args.as_ref().unwrap(); + assert!(args.contains(&"-y".to_string())); + assert!(args.contains(&ADO_MCP_PACKAGE.to_string())); + assert!(args.contains(&"inferred-org".to_string())); + // Should have AZURE_DEVOPS_EXT_PAT in env + let env = ado.env.as_ref().unwrap(); + assert!(env.contains_key("AZURE_DEVOPS_EXT_PAT")); + } + + #[test] + fn test_ado_tool_with_toolsets() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops:\n toolsets: [repos, wit, core]\n---\n", + ) + .unwrap(); + let config = generate_mcpg_config(&fm, &CompileContext::for_test_with_org(&fm, "myorg"), &collect_extensions(&fm)).unwrap(); + let ado = config.mcp_servers.get("azure-devops").unwrap(); + let args = ado.entrypoint_args.as_ref().unwrap(); + assert!(args.contains(&"-d".to_string())); + assert!(args.contains(&"repos".to_string())); + assert!(args.contains(&"wit".to_string())); + assert!(args.contains(&"core".to_string())); + } + + #[test] + fn test_ado_tool_with_org_override() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: myorg\n---\n", + ) + .unwrap(); + // Explicit org should be used even when inferred_org is None + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let ado = config.mcp_servers.get("azure-devops").unwrap(); + let args = ado.entrypoint_args.as_ref().unwrap(); + assert!(args.contains(&"myorg".to_string())); + } + + #[test] + fn test_ado_tool_explicit_org_overrides_inferred() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: explicit-org\n---\n", + ) + .unwrap(); + let config = generate_mcpg_config(&fm, &CompileContext::for_test_with_org(&fm, "inferred-org"), &collect_extensions(&fm)).unwrap(); + let ado = config.mcp_servers.get("azure-devops").unwrap(); + let args = ado.entrypoint_args.as_ref().unwrap(); + assert!(args.contains(&"explicit-org".to_string())); + assert!(!args.contains(&"inferred-org".to_string())); + } + + #[test] + fn test_ado_tool_no_org_fails() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops: true\n---\n", + ) + .unwrap(); + // No explicit org and no inferred org — should fail + let result = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)); + assert!(result.is_err()); + assert!( + result.unwrap_err().to_string().contains("no ADO organization"), + "Error should mention missing org" + ); + } + + #[test] + fn test_ado_tool_invalid_org_fails() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: \"my org/bad\"\n---\n", + ) + .unwrap(); + let result = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)); + assert!(result.is_err()); + assert!( + result.unwrap_err().to_string().contains("Invalid ADO org name"), + "Error should mention invalid org" + ); + } + + #[test] + fn test_ado_tool_invalid_toolset_fails() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: myorg\n toolsets: [\"repos\", \"bad toolset\"]\n---\n", + ) + .unwrap(); + let result = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)); + assert!(result.is_err()); + assert!( + result.unwrap_err().to_string().contains("Invalid ADO toolset name"), + "Error should mention invalid toolset" + ); + } + + #[test] + fn test_ado_tool_with_allowed_tools() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: myorg\n allowed:\n - wit_get_work_item\n - core_list_projects\n---\n", + ) + .unwrap(); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let ado = config.mcp_servers.get("azure-devops").unwrap(); + let tools = ado.tools.as_ref().unwrap(); + assert_eq!(tools, &["wit_get_work_item", "core_list_projects"]); + } + + #[test] + fn test_ado_tool_disabled_not_generated() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops: false\n---\n", + ) + .unwrap(); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + assert!(!config.mcp_servers.contains_key("azure-devops")); + } + + #[test] + fn test_ado_tool_not_set_not_generated() { + let fm = minimal_front_matter(); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + assert!(!config.mcp_servers.contains_key("azure-devops")); + } + + #[test] + fn test_ado_tool_skips_manual_mcp_entry() { + // When tools.azure-devops is enabled AND mcp-servers also has azure-devops, + // the tools config takes precedence and the manual entry is skipped. + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: auto-org\nmcp-servers:\n azure-devops:\n container: \"node:20-slim\"\n entrypoint: \"npx\"\n entrypoint-args: [\"-y\", \"@azure-devops/mcp\", \"manual-org\"]\n---\n", + ) + .unwrap(); + let config = generate_mcpg_config(&fm, &CompileContext::for_test(&fm), &collect_extensions(&fm)).unwrap(); + let ado = config.mcp_servers.get("azure-devops").unwrap(); + // Should use the auto-configured org, not the manual one + let args = ado.entrypoint_args.as_ref().unwrap(); + assert!(args.contains(&"auto-org".to_string())); + assert!(!args.contains(&"manual-org".to_string())); + } + + #[test] + fn test_ado_tool_docker_env_passthrough() { + let (fm, _) = parse_markdown( + "---\nname: test\ndescription: test\ntools:\n azure-devops: true\npermissions:\n read: my-read-sc\n---\n", + ) + .unwrap(); + let env = generate_mcpg_docker_env(&fm); + assert!( + env.contains("AZURE_DEVOPS_EXT_PAT"), + "Should include ADO token passthrough when permissions.read is set" + ); + } + + // ─── validate_docker_args ──────────────────────────────────────────────── + + #[test] + fn test_validate_docker_args_privileged_flag() { + let warnings = validate_docker_args(&["--privileged".to_string()], "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("--privileged"), "should warn about --privileged"); + } + + #[test] + fn test_validate_docker_args_entrypoint_in_args_warns() { + let warnings = validate_docker_args( + &[ + "--entrypoint".to_string(), + "/bin/sh".to_string(), + ], + "my-mcp", + ); + assert!(warnings.iter().any(|w| w.contains("--entrypoint") && w.contains("entrypoint:")), + "should warn about --entrypoint with hint to use entrypoint: field"); + } + + #[test] + fn test_validate_docker_args_volume_flag_calls_mount_validation() { + // -v docker.sock in args bypasses `mounts:` validation; should produce warnings + let warnings = validate_docker_args( + &[ + "-v".to_string(), + "/var/run/docker.sock:/var/run/docker.sock".to_string(), + ], + "my-mcp", + ); + assert!(warnings.iter().any(|w| w.contains("bypasses mounts validation")), + "should warn about volume mount in args"); + assert!(warnings.iter().any(|w| w.contains("Docker socket")), + "should propagate mount source warning for docker.sock"); + } + + #[test] + fn test_validate_docker_args_volume_equals_form() { + // --volume=source:dest form should also be detected + let warnings = validate_docker_args( + &["--volume=/var/run/docker.sock:/var/run/docker.sock".to_string()], + "my-mcp", + ); + assert!(warnings.iter().any(|w| w.contains("bypasses mounts validation")), + "should warn about --volume= form"); + } + + #[test] + fn test_validate_docker_args_safe_args_no_warnings() { + // A legitimate arg like --read-only should produce no warnings + let warnings = validate_docker_args(&["--read-only".to_string()], "my-mcp"); + assert!(warnings.is_empty(), "safe args should not produce warnings"); + } + + #[test] + fn test_validate_docker_args_empty_no_warnings() { + let warnings = validate_docker_args(&[], "my-mcp"); + assert!(warnings.is_empty(), "empty args should not produce warnings"); + } + + #[test] + fn test_validate_docker_args_volume_flag_trailing_warns() { + // -v as the last arg with no mount spec is malformed + let warnings = validate_docker_args(&["-v".to_string()], "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("malformed"), "trailing -v with no mount spec should warn"); + } + + #[test] + fn test_validate_docker_args_long_volume_flag_trailing_warns() { + // --volume as the last arg with no mount spec is malformed + let warnings = validate_docker_args(&["--volume".to_string()], "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("malformed"), "trailing --volume with no mount spec should warn"); + } + + // ─── validate_mcp_url ──────────────────────────────────────────────────── + + #[test] + fn test_validate_mcp_url_https_no_warnings() { + let warnings = validate_mcp_url("https://mcp.dev.azure.com/myorg", "my-mcp"); + assert!(warnings.is_empty(), "https URL should not produce warnings"); + } + + #[test] + fn test_validate_mcp_url_http_no_warnings() { + let warnings = validate_mcp_url("http://localhost:8100/mcp", "my-mcp"); + assert!(warnings.is_empty(), "http URL should not produce warnings"); + } + + #[test] + fn test_validate_mcp_url_bad_scheme_warns() { + let warnings = validate_mcp_url("ftp://files.example.com", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("does not use http://"), "non-HTTP scheme should warn"); + } + + #[test] + fn test_validate_mcp_url_no_scheme_warns() { + let warnings = validate_mcp_url("mcp.dev.azure.com/myorg", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("does not use http://"), "URL without scheme should warn"); + } + + // ─── validate_mount_source ─────────────────────────────────────────────── + + #[test] + fn test_validate_mount_source_docker_sock() { + let warnings = validate_mount_source("/var/run/docker.sock:/var/run/docker.sock:rw", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("Docker socket"), "should warn about Docker socket exposure"); + } + + #[test] + fn test_validate_mount_source_sensitive_path_etc() { + let warnings = validate_mount_source("/etc/passwd:/data/passwd:ro", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("sensitive host path"), "should warn about /etc mount"); + } + + #[test] + fn test_validate_mount_source_sensitive_path_proc() { + let warnings = validate_mount_source("/proc:/host/proc:ro", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("sensitive host path"), "should warn about /proc mount"); + } + + #[test] + fn test_validate_mount_source_case_insensitive() { + // /ETC/shadow should match sensitive /etc prefix (lowercased comparison) + let warnings = validate_mount_source("/ETC/shadow:/data/shadow:ro", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("sensitive host path"), "case-insensitive match should trigger warning"); + } + + #[test] + fn test_validate_mount_source_no_false_positive_on_etc_configs() { + // /etc-configs should NOT match the /etc prefix (path boundary check requires trailing /) + let warnings = validate_mount_source("/etc-configs:/app/config:ro", "my-mcp"); + assert!(warnings.is_empty(), "/etc-configs must not match /etc prefix due to path boundary check"); + } + + #[test] + fn test_validate_mount_source_safe_path_no_warnings() { + // /app/data is not a sensitive path; should produce no warnings + let warnings = validate_mount_source("/app/data:/app/data:ro", "my-mcp"); + assert!(warnings.is_empty(), "safe path should not produce warnings"); + } + + // ─── validate_container_image ──────────────────────────────────────────── + + #[test] + fn test_validate_container_image_empty_string() { + let warnings = validate_container_image("", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("empty"), "should warn about empty image name"); + } + + #[test] + fn test_validate_container_image_shell_metacharacters() { + let warnings = validate_container_image("node:20-slim; rm -rf /", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("unexpected characters"), "should warn about shell metacharacters"); + } + + #[test] + fn test_validate_container_image_valid_name_no_warnings() { + // Standard image references should produce no warnings + assert!(validate_container_image("node:20-slim", "my-mcp").is_empty()); + assert!(validate_container_image("ghcr.io/org/image:latest", "my-mcp").is_empty()); + assert!(validate_container_image("python:3.12-slim", "my-mcp").is_empty()); + } + + // ─── warn_potential_secrets ────────────────────────────────────────────── + + #[test] + fn test_warn_potential_secrets_token_env_var_triggers() { + let env = HashMap::from([("API_TOKEN".to_string(), "secret123".to_string())]); + let headers = HashMap::new(); + let warnings = warn_potential_secrets("my-mcp", &env, &headers); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("API_TOKEN"), "should warn about secret-looking env var"); + } + + #[test] + fn test_warn_potential_secrets_empty_passthrough_no_warnings() { + // Empty string = passthrough; should NOT trigger a warning + let env = HashMap::from([("API_TOKEN".to_string(), "".to_string())]); + let headers = HashMap::new(); + let warnings = warn_potential_secrets("my-mcp", &env, &headers); + assert!(warnings.is_empty(), "empty passthrough value must not trigger a warning"); + } + + #[test] + fn test_warn_potential_secrets_authorization_header_triggers() { + let env = HashMap::new(); + let headers = + HashMap::from([("Authorization".to_string(), "Bearer abc".to_string())]); + let warnings = warn_potential_secrets("my-mcp", &env, &headers); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("Authorization"), "should warn about Authorization header"); + } + + #[test] + fn test_warn_potential_secrets_bearer_value_triggers() { + // A header whose value starts with "Bearer " should also warn + let env = HashMap::new(); + let headers = + HashMap::from([("X-Custom-Auth".to_string(), "Bearer token123".to_string())]); + let warnings = warn_potential_secrets("my-mcp", &env, &headers); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("X-Custom-Auth"), "should warn about header with Bearer value"); + } + + #[test] + fn test_warn_potential_secrets_safe_env_no_warnings() { + // Env keys with non-secret names and non-empty values should produce no warnings + let env = HashMap::from([("MY_CONFIG".to_string(), "value".to_string())]); + let headers = HashMap::new(); + let warnings = warn_potential_secrets("my-mcp", &env, &headers); + assert!(warnings.is_empty(), "non-secret env var should not produce warnings"); + } } diff --git a/src/compile/mod.rs b/src/compile/mod.rs index 7b497829..e4ee1713 100644 --- a/src/compile/mod.rs +++ b/src/compile/mod.rs @@ -1,10 +1,10 @@ //! Pipeline compilation module. //! //! This module provides compilation of agent markdown files into Azure DevOps pipeline YAML. -//! Two targets are supported: +//! Two targets are supported, both sharing the same execution model (Copilot CLI + AWF + MCPG): //! -//! - **Standalone**: Full-featured pipeline with custom network proxy, MCP firewall, and safe outputs -//! - **1ES**: Integration with 1ES Pipeline Templates using the agencyJob type +//! - **Standalone**: Self-contained pipeline with AWF network isolation +//! - **1ES**: Integration with 1ES Pipeline Templates for SDL compliance mod common; pub mod extensions; diff --git a/src/compile/onees.rs b/src/compile/onees.rs index 2ef24fa1..a845826d 100644 --- a/src/compile/onees.rs +++ b/src/compile/onees.rs @@ -1,35 +1,25 @@ //! 1ES Pipeline Template compiler. //! -//! This compiler generates a pipeline that extends the 1ES Unofficial Pipeline Template: -//! - Uses `templateContext.type: agencyJob` for the main agent job -//! - Integrates with 1ES SDL scanning and compliance tools -//! - Custom jobs for threat analysis and safe output processing -//! -//! Limitations: -//! - MCP servers use service connections (no custom `command:` support) -//! - Network isolation is handled by OneBranch (no custom proxy allow-lists) +//! This compiler generates a pipeline that extends the 1ES Unofficial Pipeline Template +//! with Copilot CLI, AWF network isolation, and MCP Gateway — matching the standalone +//! pipeline model while maintaining 1ES SDL compliance. use anyhow::{Context, Result}; use async_trait::async_trait; use log::info; -use std::collections::HashMap; use std::path::Path; use super::Compiler; use super::common::{ - self, AWF_VERSION, COPILOT_CLI_VERSION, DEFAULT_POOL, build_parameters, - compute_effective_workspace, generate_acquire_ado_token, generate_checkout_self, - generate_checkout_steps, generate_ci_trigger, generate_copilot_ado_env, - generate_copilot_params, generate_executor_ado_env, generate_header_comment, - generate_job_timeout, generate_parameters, generate_pipeline_path, - generate_pipeline_resources, generate_pr_trigger, generate_repositories, generate_schedule, - generate_source_path, generate_working_directory, is_custom_mcp, replace_with_indent, - validate_comment_target, validate_front_matter_identity, - validate_resolve_pr_thread_statuses, validate_submit_pr_review_events, - validate_update_pr_votes, validate_update_work_item_target, validate_write_permissions, + AWF_VERSION, MCPG_VERSION, MCPG_IMAGE, + CompileConfig, compile_shared, + generate_allowed_domains, + generate_cancel_previous_builds, + generate_enabled_tools_args, + generate_mcpg_config, generate_mcpg_docker_env, + format_steps_yaml_indented, }; -use super::extensions::CompilerExtension; -use super::types::{FrontMatter, McpConfig}; +use super::types::FrontMatter; /// 1ES Pipeline Template compiler. pub struct OneESCompiler; @@ -49,357 +39,82 @@ impl Compiler for OneESCompiler { ) -> Result { info!("Compiling for 1ES target"); - // Validate inputs early, before any values are used in template substitution - validate_front_matter_identity(front_matter)?; - - // Load 1ES template - let template = include_str!("../../templates/1es-base.yml"); - - // Generate schedule - let schedule = match &front_matter.schedule { - Some(s) => generate_schedule(&front_matter.name, s) - .with_context(|| format!("Failed to parse schedule '{}'", s.expression()))?, - None => String::new(), - }; - - let repositories = generate_repositories(&front_matter.repositories); - let checkout_steps = generate_checkout_steps(&front_matter.checkout); - let checkout_self = generate_checkout_self(); + // Collect extensions (needed for MCPG config and allowed domains) let extensions = super::extensions::collect_extensions(front_matter); - // Build compile context with inferred metadata - let input_dir = input_path.parent().unwrap_or(std::path::Path::new(".")); + // Build compile context for MCPG config generation + let input_dir = input_path.parent().unwrap_or(Path::new(".")); let ctx = super::extensions::CompileContext::new(front_matter, input_dir).await; - // Run extension validations (warnings + errors) - for ext in &extensions { - for warning in ext.validate(&ctx)? { - eprintln!("Warning: {}", warning); - } - } - - let copilot_params = generate_copilot_params(front_matter, &extensions)?; - let has_memory = front_matter - .tools - .as_ref() - .and_then(|t| t.cache_memory.as_ref()) - .is_some_and(|cm| cm.is_enabled()); - let parameters = build_parameters(&front_matter.parameters, has_memory); - let parameters_yaml = generate_parameters(¶meters)?; + // Generate values shared with standalone that are passed as extra replacements + let allowed_domains = generate_allowed_domains(front_matter, &extensions)?; + let enabled_tools_args = generate_enabled_tools_args(front_matter); + let cancel_previous_builds = generate_cancel_previous_builds(&front_matter.triggers); - let effective_workspace = compute_effective_workspace( - &front_matter.workspace, - &front_matter.checkout, - &front_matter.name, - ); - let working_directory = generate_working_directory(&effective_workspace); - let pipeline_resources = generate_pipeline_resources(&front_matter.triggers)?; - let has_schedule = front_matter.schedule.is_some(); - let pr_trigger = generate_pr_trigger(&front_matter.triggers, has_schedule); - let ci_trigger = generate_ci_trigger(&front_matter.triggers, has_schedule); - let source_path = generate_source_path(input_path); - let pipeline_path = generate_pipeline_path(output_path); - - // Pool - for 1ES we need both name and os - let pool = front_matter - .pool - .as_ref() - .map(|p| p.name().to_string()) - .unwrap_or_else(|| DEFAULT_POOL.to_string()); - - // Generate 1ES-specific content - let agent_context_root = generate_agent_context_root(&effective_workspace); - let mcp_configuration = generate_mcp_configuration(&front_matter.mcp_servers); - let prepare_steps = generate_inline_steps(&front_matter.steps); - - // Default finalize step to avoid empty stepList - let default_finalize_step = serde_yaml::from_str::( - r#"bash: echo "Agent task completed" -displayName: "Finalize""#, - ) - .expect("default finalize step should be valid YAML"); - let finalize_steps = if front_matter.post_steps.is_empty() { - generate_inline_steps(&[default_finalize_step]) - } else { - generate_inline_steps(&front_matter.post_steps) - }; + let mcpg_config = generate_mcpg_config(front_matter, &ctx, &extensions)?; + let mcpg_config_json = serde_json::to_string_pretty(&mcpg_config) + .context("Failed to serialize MCPG config")?; + let mcpg_docker_env = generate_mcpg_docker_env(front_matter); + // Generate 1ES-specific setup/teardown jobs (no per-job pool, uses templateContext). + // These override the shared {{ setup_job }} / {{ teardown_job }} markers via + // extra_replacements, which are applied before the shared replacements. let setup_job = generate_setup_job(&front_matter.setup, &front_matter.name); let teardown_job = generate_teardown_job(&front_matter.teardown, &front_matter.name); - let agentic_depends_on = if !front_matter.setup.is_empty() { - "dependsOn: SetupJob".to_string() - } else { - String::new() - }; - let job_timeout = generate_job_timeout(front_matter); - - // Load threat analysis prompt template - let threat_analysis_prompt = include_str!("../../templates/threat-analysis.md"); - - // Insert threat analysis prompt first - let template = replace_with_indent( - template, - "{{ threat_analysis_prompt }}", - threat_analysis_prompt, - ); - - // Generate service connection token acquisition steps and env vars - let acquire_read_token = generate_acquire_ado_token( - front_matter - .permissions - .as_ref() - .and_then(|p| p.read.as_deref()), - "SC_READ_TOKEN", - ); - let copilot_ado_env = generate_copilot_ado_env( - front_matter - .permissions - .as_ref() - .and_then(|p| p.read.as_deref()), - ); - let acquire_write_token = generate_acquire_ado_token( - front_matter - .permissions - .as_ref() - .and_then(|p| p.write.as_deref()), - "SC_WRITE_TOKEN", - ); - let executor_ado_env = generate_executor_ado_env( - front_matter - .permissions - .as_ref() - .and_then(|p| p.write.as_deref()), - ); - - // Validate that write-requiring safe-outputs have a write service connection - validate_write_permissions(front_matter)?; - // Validate comment-on-work-item has required target field - validate_comment_target(front_matter)?; - // Validate update-work-item has required target field - validate_update_work_item_target(front_matter)?; - // Validate submit-pr-review has required allowed-events field - validate_submit_pr_review_events(front_matter)?; - // Validate update-pr vote operation has required allowed-votes field - validate_update_pr_votes(front_matter)?; - // Validate resolve-pr-review-thread has required allowed-statuses field - validate_resolve_pr_thread_statuses(front_matter)?; - - // NOTE: 1ES target does not support --enabled-tools filtering (safe-outputs - // tool filtering). 1ES uses service connections for MCP servers rather than - // mcp-http, so generate_enabled_tools_args is not called here. If safe-outputs - // filtering is needed for 1ES, it would require changes to the 1ES pipeline - // template and agency job configuration. - // Replace all template markers - let compiler_version = env!("CARGO_PKG_VERSION"); - let replacements: Vec<(&str, &str)> = vec![ - ("{{ parameters }}", ¶meters_yaml), - ("{{ compiler_version }}", compiler_version), - // No-op for 1ES (template doesn't use AWF), but included for forward-compatibility - ("{{ firewall_version }}", AWF_VERSION), - ("{{ copilot_version }}", COPILOT_CLI_VERSION), - ("{{ pool }}", &pool), - ("{{ schedule }}", &schedule), - ("{{ pr_trigger }}", &pr_trigger), - ("{{ ci_trigger }}", &ci_trigger), - ("{{ repositories }}", &repositories), - ("{{ pipeline_resources }}", &pipeline_resources), - ("{{ checkout_self }}", &checkout_self), - ("{{ checkout_repositories }}", &checkout_steps), - ("{{ agent_name }}", &front_matter.name), - ("{{ agent_description }}", &front_matter.description), - ("{{ agent_context_root }}", &agent_context_root), - ("{{ agent_content }}", markdown_body), - ("{{ prepare_steps }}", &prepare_steps), - ("{{ finalize_steps }}", &finalize_steps), - ("{{ global_options }}", ""), - ("{{ log_level }}", ""), - ("{{ mcp_configuration }}", &mcp_configuration), - ("{{ agentic_depends_on }}", &agentic_depends_on), - ("{{ job_timeout }}", &job_timeout), - ("{{ setup_job }}", &setup_job), - ("{{ teardown_job }}", &teardown_job), - ("{{ source_path }}", &source_path), - ("{{ pipeline_path }}", &pipeline_path), - ("{{ working_directory }}", &working_directory), - ("{{ workspace }}", &working_directory), - ("{{ copilot_params }}", &copilot_params), - ("{{ acquire_ado_token }}", &acquire_read_token), - ("{{ copilot_ado_env }}", &copilot_ado_env), - ("{{ acquire_write_token }}", &acquire_write_token), - ("{{ executor_ado_env }}", &executor_ado_env), - ]; - - let pipeline_yaml = replacements - .into_iter() - .fold(template, |yaml, (placeholder, replacement)| { - replace_with_indent(&yaml, placeholder, replacement) - }); - - // Warn about custom MCP limitations - if front_matter - .mcp_servers - .iter() - .any(|(_, c)| is_custom_mcp(c)) - { - eprintln!( - "Warning: Custom MCP servers (with container: or url:) are not supported in 1ES target. \ - They will be ignored. Use standalone target for full MCP support." - ); - } - - // Prepend header comment for pipeline detection - let header = generate_header_comment(input_path); - let pipeline_yaml = format!("{}{}", header, pipeline_yaml); + let config = CompileConfig { + template: include_str!("../../templates/1es-base.yml").to_string(), + extra_replacements: vec![ + ("{{ firewall_version }}".into(), AWF_VERSION.into()), + ("{{ mcpg_version }}".into(), MCPG_VERSION.into()), + ("{{ mcpg_image }}".into(), MCPG_IMAGE.into()), + ("{{ allowed_domains }}".into(), allowed_domains), + ("{{ enabled_tools_args }}".into(), enabled_tools_args), + ("{{ cancel_previous_builds }}".into(), cancel_previous_builds), + ("{{ mcpg_config }}".into(), mcpg_config_json), + ("{{ mcpg_docker_env }}".into(), mcpg_docker_env), + ("{{ setup_job }}".into(), setup_job), + ("{{ teardown_job }}".into(), teardown_job), + ], + }; - Ok(pipeline_yaml) + compile_shared(input_path, output_path, front_matter, markdown_body, &extensions, &ctx, config).await } } // ==================== 1ES-specific helpers ==================== -/// Generate agent context root for 1ES templates -fn generate_agent_context_root(effective_workspace: &str) -> String { - match effective_workspace { - "repo" => "$(Build.Repository.Name)".to_string(), - "root" | _ => ".".to_string(), - } -} - -/// Generate MCP configuration for 1ES templates. -/// -/// In 1ES, MCPs require service connections. Only MCPs with explicit -/// `service_connection` configuration or custom commands are included. -fn generate_mcp_configuration(mcps: &HashMap) -> String { - let mut mcp_entries: Vec<_> = mcps - .iter() - .filter_map(|(name, config)| { - let (is_enabled, opts) = match config { - McpConfig::Enabled(enabled) => (*enabled, None), - McpConfig::WithOptions(o) => (o.enabled.unwrap_or(true), Some(o)), - }; - - if !is_enabled { - return None; - } - - // Custom MCPs with container/url: not supported in 1ES (needs service connection) - if is_custom_mcp(config) { - log::warn!( - "MCP '{}' uses custom container/url — not supported in 1ES target (requires service connection)", - name - ); - return None; - } - - // Use explicit service connection or generate default. - // Warn when falling back to the naming convention — the generated - // service connection reference may not exist in the ADO project. - let service_connection = opts - .and_then(|o| o.service_connection.clone()) - .unwrap_or_else(|| { - let default = format!("mcp-{}-service-connection", name); - log::warn!( - "MCP '{}' has no explicit service connection in 1ES target — \ - assuming '{}' exists", - name, - default, - ); - default - }); - - Some((name.clone(), service_connection)) - }) - .collect(); - - if mcp_entries.is_empty() { - return "{}".to_string(); - } - - // Sort for deterministic output - mcp_entries.sort_by(|a, b| a.0.cmp(&b.0)); - - mcp_entries - .iter() - .map(|(name, sc)| format!("{}:\n serviceConnection: {}", name, sc)) - .collect::>() - .join("\n") -} - -/// Generate inline steps YAML (for adding to existing step list) -/// Returns empty string when no steps (blank lines are valid in YAML) -fn generate_inline_steps(steps: &[serde_yaml::Value]) -> String { - if steps.is_empty() { - return String::new(); - } - - common::format_steps_yaml_indented(steps, 0) -} - -/// Generate setup job for 1ES template +/// Generate setup job for 1ES template. +/// Unlike standalone, 1ES jobs don't have per-job `pool:` — the pool is at +/// the top-level `parameters.pool`. Jobs use `templateContext: type: buildJob`. fn generate_setup_job(setup_steps: &[serde_yaml::Value], agent_name: &str) -> String { if setup_steps.is_empty() { return String::new(); } - let steps_yaml: Vec<_> = setup_steps - .iter() - .filter_map(|step| { - serde_yaml::to_string(step).ok().map(|yaml| { - yaml.trim() - .lines() - .enumerate() - .map(|(i, line)| { - if i == 0 { - format!("- {}", line.trim_start_matches("---").trim()) - } else { - format!(" {}", line) - } - }) - .collect::>() - .join("\n") - }) - }) - .collect(); + let steps_yaml = format_steps_yaml_indented(setup_steps, 6); format!( r#"- job: SetupJob displayName: "{} - Setup" templateContext: type: buildJob - steps: - - checkout: self - {}"#, - agent_name, - steps_yaml.join("\n ") + steps: + - checkout: self +{} +"#, + agent_name, steps_yaml ) } -/// Generate teardown job for 1ES template +/// Generate teardown job for 1ES template. +/// Unlike standalone, 1ES jobs don't have per-job `pool:`. fn generate_teardown_job(teardown_steps: &[serde_yaml::Value], agent_name: &str) -> String { if teardown_steps.is_empty() { return String::new(); } - let steps_yaml: Vec<_> = teardown_steps - .iter() - .filter_map(|step| { - serde_yaml::to_string(step).ok().map(|yaml| { - yaml.trim() - .lines() - .enumerate() - .map(|(i, line)| { - if i == 0 { - format!("- {}", line.trim_start_matches("---").trim()) - } else { - format!(" {}", line) - } - }) - .collect::>() - .join("\n") - }) - }) - .collect(); + let steps_yaml = format_steps_yaml_indented(teardown_steps, 6); format!( r#"- job: TeardownJob @@ -407,121 +122,17 @@ fn generate_teardown_job(teardown_steps: &[serde_yaml::Value], agent_name: &str) dependsOn: ProcessSafeOutputs templateContext: type: buildJob - steps: - - checkout: self - {}"#, - agent_name, - steps_yaml.join("\n ") + steps: + - checkout: self +{} +"#, + agent_name, steps_yaml ) } #[cfg(test)] mod tests { use super::*; - use super::super::types::McpOptions; - - // ─── generate_agent_context_root ───────────────────────────────────────── - - #[test] - fn test_generate_agent_context_root_repo() { - assert_eq!( - generate_agent_context_root("repo"), - "$(Build.Repository.Name)" - ); - } - - #[test] - fn test_generate_agent_context_root_root() { - assert_eq!(generate_agent_context_root("root"), "."); - } - - #[test] - fn test_generate_agent_context_root_unknown_defaults_to_dot() { - // Any unrecognised workspace value should fall through to "." - assert_eq!(generate_agent_context_root("something-else"), "."); - } - - // ─── generate_mcp_configuration ────────────────────────────────────────── - - #[test] - fn test_generate_mcp_configuration_empty_returns_braces() { - let mcps = HashMap::new(); - let result = generate_mcp_configuration(&mcps); - assert_eq!(result, "{}"); - } - - #[test] - fn test_generate_mcp_configuration_skips_custom_mcp_with_command() { - let mut mcps = HashMap::new(); - mcps.insert( - "my-tool".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("node:20-slim".to_string()), - ..Default::default() - }), - ); - let result = generate_mcp_configuration(&mcps); - // Custom MCPs with `command:` are not supported in 1ES — must be excluded - assert!( - !result.contains("my-tool"), - "Custom MCP with command should be excluded in 1ES target" - ); - assert_eq!(result, "{}", "Only custom MCPs → empty config"); - } - - #[test] - fn test_generate_mcp_configuration_service_connection_mcp() { - let mut mcps = HashMap::new(); - mcps.insert( - "my-mcp".to_string(), - McpConfig::WithOptions(McpOptions { - service_connection: Some("mcp-my-mcp-sc".to_string()), - ..Default::default() - }), - ); - let result = generate_mcp_configuration(&mcps); - assert!(result.contains("my-mcp"), "Service-connection MCP should appear in output"); - assert!( - result.contains("serviceConnection: mcp-my-mcp-sc"), - "Should reference the explicit service connection" - ); - } - - #[test] - fn test_generate_mcp_configuration_default_service_connection_naming() { - // When no explicit service_connection is set, a default name is generated. - let mut mcps = HashMap::new(); - mcps.insert("my-tool".to_string(), McpConfig::Enabled(true)); - let result = generate_mcp_configuration(&mcps); - assert!(result.contains("my-tool")); - assert!(result.contains("serviceConnection: mcp-my-tool-service-connection")); - } - - #[test] - fn test_generate_mcp_configuration_disabled_mcp_excluded() { - let mut mcps = HashMap::new(); - mcps.insert("disabled-mcp".to_string(), McpConfig::Enabled(false)); - let result = generate_mcp_configuration(&mcps); - assert!(!result.contains("disabled-mcp"), "Disabled MCP should not appear in output"); - assert_eq!(result, "{}"); - } - - // ─── generate_inline_steps ──────────────────────────────────────────────── - - #[test] - fn test_generate_inline_steps_empty() { - let result = generate_inline_steps(&[]); - assert!(result.is_empty(), "Empty steps list should return empty string"); - } - - #[test] - fn test_generate_inline_steps_single_step() { - let step: serde_yaml::Value = - serde_yaml::from_str("bash: echo hello").expect("valid yaml"); - let result = generate_inline_steps(&[step]); - assert!(result.contains("bash"), "Step YAML should contain the bash key"); - assert!(result.contains("echo hello"), "Step YAML should contain the command"); - } // ─── generate_setup_job ────────────────────────────────────────────────── @@ -543,6 +154,9 @@ mod tests { ); assert!(result.contains("checkout: self"), "Should include self checkout"); assert!(result.contains("echo setup"), "Should include the step content"); + assert!(result.contains("templateContext"), "Should include templateContext"); + assert!(result.contains("type: buildJob"), "Should use buildJob type"); + assert!(!result.contains("pool:"), "Should not include per-job pool"); } // ─── generate_teardown_job ─────────────────────────────────────────────── @@ -569,5 +183,7 @@ mod tests { ); assert!(result.contains("checkout: self"), "Should include self checkout"); assert!(result.contains("echo teardown"), "Should include the step content"); + assert!(result.contains("templateContext"), "Should include templateContext"); + assert!(!result.contains("pool:"), "Should not include per-job pool"); } } \ No newline at end of file diff --git a/src/compile/standalone.rs b/src/compile/standalone.rs index cafe7e5b..5afcec04 100644 --- a/src/compile/standalone.rs +++ b/src/compile/standalone.rs @@ -9,28 +9,18 @@ use anyhow::{Context, Result}; use async_trait::async_trait; use log::info; -use std::collections::HashMap; use std::path::Path; use super::Compiler; use super::common::{ - self, AWF_VERSION, COPILOT_CLI_VERSION, DEFAULT_POOL, MCPG_PORT, MCPG_VERSION, MCPG_IMAGE, - build_parameters, compute_effective_workspace, generate_acquire_ado_token, - generate_cancel_previous_builds, generate_checkout_self, generate_checkout_steps, - generate_ci_trigger, generate_copilot_ado_env, generate_copilot_params, - generate_enabled_tools_args, generate_executor_ado_env, generate_header_comment, - generate_job_timeout, generate_parameters, generate_pipeline_path, generate_pipeline_resources, - generate_pr_trigger, generate_repositories, generate_schedule, generate_source_path, - generate_working_directory, replace_with_indent, sanitize_filename, validate_comment_target, - validate_front_matter_identity, validate_resolve_pr_thread_statuses, - validate_submit_pr_review_events, validate_update_pr_votes, validate_update_work_item_target, - validate_write_permissions, + AWF_VERSION, MCPG_VERSION, MCPG_IMAGE, + CompileConfig, compile_shared, + generate_allowed_domains, + generate_cancel_previous_builds, + generate_enabled_tools_args, + generate_mcpg_config, generate_mcpg_docker_env, }; -use super::extensions::{CompilerExtension, McpgServerConfig, McpgGatewayConfig, McpgConfig}; -use super::types::{FrontMatter, McpConfig}; -use crate::allowed_hosts::{CORE_ALLOWED_HOSTS, mcp_required_hosts}; -use crate::ecosystem_domains::{get_ecosystem_domains, is_ecosystem_identifier, is_known_ecosystem}; -use std::collections::HashSet; +use super::types::FrontMatter; /// Standalone pipeline compiler. pub struct StandaloneCompiler; @@ -50,1773 +40,51 @@ impl Compiler for StandaloneCompiler { ) -> Result { info!("Compiling for standalone target"); - // Validate inputs early, before any values are used in template substitution - validate_front_matter_identity(front_matter)?; - - // Load base template - let template = include_str!("../../templates/base.yml"); - - // Generate schedule - let schedule = match &front_matter.schedule { - Some(s) => generate_schedule(&front_matter.name, s) - .with_context(|| format!("Failed to parse schedule '{}'", s.expression()))?, - None => String::new(), - }; - - let repositories = generate_repositories(&front_matter.repositories); - let checkout_steps = generate_checkout_steps(&front_matter.checkout); - let checkout_self = generate_checkout_self(); - let agent_name = sanitize_filename(&front_matter.name); - - // Collect compiler extensions (runtimes + first-party tools) + // Collect extensions (needed before compile_shared for MCPG config) let extensions = super::extensions::collect_extensions(front_matter); - // Build compile context with inferred metadata (ADO org from git remote, etc.) + // Build compile context for MCPG config generation let input_dir = input_path.parent().unwrap_or(std::path::Path::new(".")); let ctx = super::extensions::CompileContext::new(front_matter, input_dir).await; - // Run extension validations (warnings + errors) - for ext in &extensions { - for warning in ext.validate(&ctx)? { - eprintln!("Warning: {}", warning); - } - } - - let copilot_params = generate_copilot_params(front_matter, &extensions)?; - - // Compute effective workspace - let effective_workspace = compute_effective_workspace( - &front_matter.workspace, - &front_matter.checkout, - &front_matter.name, - ); - let working_directory = generate_working_directory(&effective_workspace); - let pipeline_resources = generate_pipeline_resources(&front_matter.triggers)?; - let has_schedule = front_matter.schedule.is_some(); - let pr_trigger = generate_pr_trigger(&front_matter.triggers, has_schedule); - let ci_trigger = generate_ci_trigger(&front_matter.triggers, has_schedule); - let cancel_previous_builds = generate_cancel_previous_builds(&front_matter.triggers); - - // Generate source path for Stage 2 - let source_path = generate_source_path(input_path); - - // Generate pipeline path for integrity checking - let pipeline_path = generate_pipeline_path(output_path); - - // Generate comma-separated domain list for AWF + // Standalone-specific values let allowed_domains = generate_allowed_domains(front_matter, &extensions)?; - - // Generate --enabled-tools args for SafeOutputs tool filtering let enabled_tools_args = generate_enabled_tools_args(front_matter); + let cancel_previous_builds = generate_cancel_previous_builds(&front_matter.triggers); - // Pool name - let pool = front_matter - .pool - .as_ref() - .map(|p| p.name().to_string()) - .unwrap_or_else(|| DEFAULT_POOL.to_string()); - - // Generate hooks - let setup_job = generate_setup_job(&front_matter.setup, &front_matter.name, &pool); - let teardown_job = generate_teardown_job(&front_matter.teardown, &front_matter.name, &pool); - let has_memory = front_matter - .tools - .as_ref() - .and_then(|t| t.cache_memory.as_ref()) - .is_some_and(|cm| cm.is_enabled()); - - // Build parameters list: user-defined + auto-injected clearMemory for memory - let parameters = build_parameters(&front_matter.parameters, has_memory); - let parameters_yaml = generate_parameters(¶meters)?; - - let prepare_steps = generate_prepare_steps(&front_matter.steps, &extensions)?; - let finalize_steps = generate_finalize_steps(&front_matter.post_steps); - let agentic_depends_on = generate_agentic_depends_on(&front_matter.setup); - let job_timeout = generate_job_timeout(front_matter); - - // Generate service connection token acquisition steps and env vars - let acquire_read_token = generate_acquire_ado_token( - front_matter - .permissions - .as_ref() - .and_then(|p| p.read.as_deref()), - "SC_READ_TOKEN", - ); - let copilot_ado_env = generate_copilot_ado_env( - front_matter - .permissions - .as_ref() - .and_then(|p| p.read.as_deref()), - ); - let acquire_write_token = generate_acquire_ado_token( - front_matter - .permissions - .as_ref() - .and_then(|p| p.write.as_deref()), - "SC_WRITE_TOKEN", - ); - let executor_ado_env = generate_executor_ado_env( - front_matter - .permissions - .as_ref() - .and_then(|p| p.write.as_deref()), - ); - - // Validate that write-requiring safe-outputs have a write service connection - validate_write_permissions(front_matter)?; - // Validate comment-on-work-item has required target field - validate_comment_target(front_matter)?; - // Validate update-work-item has required target field - validate_update_work_item_target(front_matter)?; - // Validate submit-pr-review has required allowed-events field - validate_submit_pr_review_events(front_matter)?; - // Validate update-pr vote operation has required allowed-votes field - validate_update_pr_votes(front_matter)?; - // Validate resolve-pr-review-thread has required allowed-statuses field - validate_resolve_pr_thread_statuses(front_matter)?; - - // Load threat analysis prompt template - let threat_analysis_prompt = include_str!("../../templates/threat-analysis.md"); - - // Insert threat analysis prompt first - let template = replace_with_indent( - template, - "{{ threat_analysis_prompt }}", - threat_analysis_prompt, - ); - - // Replace template markers - let compiler_version = env!("CARGO_PKG_VERSION"); - let replacements: Vec<(&str, &str)> = vec![ - ("{{ parameters }}", ¶meters_yaml), - ("{{ compiler_version }}", compiler_version), - ("{{ firewall_version }}", AWF_VERSION), - ("{{ mcpg_version }}", MCPG_VERSION), - ("{{ mcpg_image }}", MCPG_IMAGE), - ("{{ copilot_version }}", COPILOT_CLI_VERSION), - ("{{ pool }}", &pool), - ("{{ setup_job }}", &setup_job), - ("{{ teardown_job }}", &teardown_job), - ("{{ prepare_steps }}", &prepare_steps), - ("{{ finalize_steps }}", &finalize_steps), - ("{{ agentic_depends_on }}", &agentic_depends_on), - ("{{ job_timeout }}", &job_timeout), - ("{{ repositories }}", &repositories), - ("{{ schedule }}", &schedule), - ("{{ pipeline_resources }}", &pipeline_resources), - ("{{ pr_trigger }}", &pr_trigger), - ("{{ ci_trigger }}", &ci_trigger), - ("{{ checkout_self }}", &checkout_self), - ("{{ checkout_repositories }}", &checkout_steps), - ("{{ cancel_previous_builds }}", &cancel_previous_builds), - ("{{ agent }}", &agent_name), - ("{{ agent_name }}", &front_matter.name), - ("{{ agent_description }}", &front_matter.description), - ("{{ copilot_params }}", &copilot_params), - ("{{ source_path }}", &source_path), - ("{{ pipeline_path }}", &pipeline_path), - ("{{ working_directory }}", &working_directory), - ("{{ workspace }}", &working_directory), - ("{{ allowed_domains }}", &allowed_domains), - ("{{ enabled_tools_args }}", &enabled_tools_args), - ("{{ agent_content }}", markdown_body), - ("{{ acquire_ado_token }}", &acquire_read_token), - ("{{ copilot_ado_env }}", &copilot_ado_env), - ("{{ acquire_write_token }}", &acquire_write_token), - ("{{ executor_ado_env }}", &executor_ado_env), - ]; - - let pipeline_yaml = replacements - .into_iter() - .fold(template, |yaml, (placeholder, replacement)| { - replace_with_indent(&yaml, placeholder, replacement) - }); - - // Always generate MCPG config — safeoutputs is always required regardless - // of whether additional mcp-servers are configured in front matter. - let config = generate_mcpg_config(front_matter, &ctx, &extensions)?; + let config_obj = generate_mcpg_config(front_matter, &ctx, &extensions)?; let mcpg_config_json = - serde_json::to_string_pretty(&config).context("Failed to serialize MCPG config")?; - - let pipeline_yaml = - replace_with_indent(&pipeline_yaml, "{{ mcpg_config }}", &mcpg_config_json); - - // Generate additional -e flags for MCPG Docker run (env passthrough for MCP containers) + serde_json::to_string_pretty(&config_obj).context("Failed to serialize MCPG config")?; let mcpg_docker_env = generate_mcpg_docker_env(front_matter); - let pipeline_yaml = - replace_with_indent(&pipeline_yaml, "{{ mcpg_docker_env }}", &mcpg_docker_env); - - // Prepend header comment for pipeline detection - let header = generate_header_comment(input_path); - let pipeline_yaml = format!("{}{}", header, pipeline_yaml); - - Ok(pipeline_yaml) - } -} - -// ==================== Standalone-specific helpers ==================== - -/// Generate the allowed domains list for AWF network isolation. -/// -/// This generates a comma-separated list of domain patterns for AWF's -/// `--allow-domains` flag. The list includes: -/// 1. Core Azure DevOps/GitHub endpoints -/// 2. MCP-specific endpoints for each enabled MCP -/// 3. User-specified additional hosts from network.allowed -fn generate_allowed_domains( - front_matter: &FrontMatter, - extensions: &[super::extensions::Extension], -) -> Result { - // Collect enabled MCP names (user-defined MCPs, not first-party tools) - let enabled_mcps: Vec = front_matter - .mcp_servers - .iter() - .filter_map(|(name, config)| { - let is_enabled = match config { - McpConfig::Enabled(enabled) => *enabled, - McpConfig::WithOptions(_) => true, - }; - if is_enabled { Some(name.clone()) } else { None } - }) - .collect(); - - // Get user-specified hosts - let user_hosts: Vec = front_matter - .network - .as_ref() - .map(|n| n.allowed.clone()) - .unwrap_or_default(); - - // Generate the allowlist by combining core + MCP + extension + user hosts - let mut hosts: HashSet = HashSet::new(); - - // Add core hosts - for host in CORE_ALLOWED_HOSTS { - hosts.insert((*host).to_string()); - } - - // Add host.docker.internal — required for the AWF container to reach - // MCPG and SafeOutputs on the host. Only added for standalone pipelines - // that always use MCPG. - hosts.insert("host.docker.internal".to_string()); - - // Add MCP-specific hosts (user-defined MCPs via mcp_required_hosts lookup) - for mcp in &enabled_mcps { - for host in mcp_required_hosts(mcp) { - hosts.insert((*host).to_string()); - } - } - - // Add extension-declared hosts (runtimes + first-party tools). - // Extensions may return ecosystem identifiers (e.g., "lean") which are - // expanded to their domain lists, or raw domain names. - for ext in extensions { - for host in ext.required_hosts() { - if is_ecosystem_identifier(&host) { - let domains = get_ecosystem_domains(&host); - if domains.is_empty() { - eprintln!( - "warning: extension '{}' requires unknown ecosystem '{}'; \ - no domains added", - ext.name(), - host - ); - } - for domain in domains { - hosts.insert(domain); - } - } else { - hosts.insert(host); - } - } - } - - // Add user-specified hosts (validated against DNS-safe characters) - // Entries may be ecosystem identifiers (e.g., "python", "rust") which - // expand to their domain lists, or raw domain names. - for host in &user_hosts { - if is_ecosystem_identifier(host) { - let domains = get_ecosystem_domains(host); - if domains.is_empty() && !is_known_ecosystem(host) { - eprintln!( - "warning: network.allowed contains unknown ecosystem identifier '{}'. \ - Known ecosystems: python, rust, node, go, java, etc. \ - If this is a domain name, it should contain a dot.", - host - ); - } - for domain in domains { - hosts.insert(domain); - } - } else { - let valid_chars = !host.is_empty() - && host - .chars() - .all(|c| c.is_ascii_alphanumeric() || matches!(c, '.' | '-' | '*')); - if !valid_chars { - anyhow::bail!( - "network.allowed domain '{}' contains characters invalid in DNS names. \ - Only ASCII alphanumerics, '.', '-', and '*' are allowed.", - host - ); - } - if host.contains('*') && !(host.starts_with("*.") && !host[2..].contains('*')) { - anyhow::bail!( - "network.allowed domain '{}' uses '*' in an unsupported position. \ - Wildcards must appear only as a leading prefix (e.g. '*.example.com').", - host - ); - } - hosts.insert(host.clone()); - } - } - - // Remove blocked hosts (supports both ecosystem identifiers and raw domains) - let blocked_hosts: Vec = front_matter - .network - .as_ref() - .map(|n| n.blocked.clone()) - .unwrap_or_default(); - for blocked in &blocked_hosts { - if is_ecosystem_identifier(blocked) { - for domain in get_ecosystem_domains(blocked) { - hosts.remove(&domain); - } - } else { - hosts.remove(blocked); - } - } - - // Sort for deterministic output - let mut allowlist: Vec = hosts.into_iter().collect(); - allowlist.sort(); - - // Format as comma-separated list for AWF --allow-domains - Ok(allowlist.join(",")) -} - -/// Generate the setup job YAML -fn generate_setup_job(setup_steps: &[serde_yaml::Value], agent_name: &str, pool: &str) -> String { - if setup_steps.is_empty() { - return String::new(); - } - - let steps_yaml = common::format_steps_yaml_indented(setup_steps, 4); - - format!( - r#"- job: SetupJob - displayName: "{} - Setup" - pool: - name: {} - steps: - - checkout: self -{} -"#, - agent_name, pool, steps_yaml - ) -} - -/// Generate the teardown job YAML -fn generate_teardown_job( - teardown_steps: &[serde_yaml::Value], - agent_name: &str, - pool: &str, -) -> String { - if teardown_steps.is_empty() { - return String::new(); - } - - let steps_yaml = common::format_steps_yaml(teardown_steps); - - format!( - r#" - job: TeardownJob - displayName: "{} - Teardown" - dependsOn: ProcessSafeOutputs - pool: - name: {} - steps: - - checkout: self -{} -"#, - agent_name, pool, steps_yaml - ) -} - -/// Generate prepare steps (inline), including extension steps and user-defined steps. -fn generate_prepare_steps( - prepare_steps: &[serde_yaml::Value], - extensions: &[super::extensions::Extension], -) -> Result { - let mut parts = Vec::new(); - - // Extension prepare steps and prompt supplements (runtimes + first-party tools) - for ext in extensions { - for step in ext.prepare_steps() { - parts.push(step); - } - if let Some(prompt) = ext.prompt_supplement() { - parts.push(super::extensions::wrap_prompt_append(&prompt, ext.name())?); - } - } - - if !prepare_steps.is_empty() { - parts.push(common::format_steps_yaml_indented(prepare_steps, 0)); - } - - Ok(parts.join("\n\n")) -} - -/// Generate finalize steps (inline) -fn generate_finalize_steps(finalize_steps: &[serde_yaml::Value]) -> String { - if finalize_steps.is_empty() { - return String::new(); - } - - common::format_steps_yaml_indented(finalize_steps, 0) -} - -/// Generate dependsOn clause for setup job -fn generate_agentic_depends_on(setup_steps: &[serde_yaml::Value]) -> String { - if !setup_steps.is_empty() { - "dependsOn: SetupJob".to_string() - } else { - String::new() - } -} - -/// Generate MCPG configuration from front matter. -/// -/// Converts the front matter `mcp-servers` definitions into MCPG-compatible JSON. -/// SafeOutputs is always included as an HTTP backend. Extension-contributed MCPG -/// entries (e.g., azure-devops) are included via the `extensions` parameter. -pub fn generate_mcpg_config( - front_matter: &FrontMatter, - ctx: &super::extensions::CompileContext, - extensions: &[super::extensions::Extension], -) -> Result { - let mut mcp_servers = HashMap::new(); - - // SafeOutputs is always included as an HTTP backend. - // MCPG runs with --network host, so it reaches SafeOutputs via localhost - // (not host.docker.internal, which requires Docker DNS and isn't available - // in host network mode on Linux). - mcp_servers.insert( - "safeoutputs".to_string(), - McpgServerConfig { - server_type: "http".to_string(), - container: None, - entrypoint: None, - entrypoint_args: None, - mounts: None, - args: None, - url: Some("http://localhost:${SAFE_OUTPUTS_PORT}/mcp".to_string()), - headers: Some(HashMap::from([( - "Authorization".to_string(), - "Bearer ${SAFE_OUTPUTS_API_KEY}".to_string(), - )])), - env: None, - tools: None, - }, - ); - - // Add extension-contributed MCPG server entries (e.g., azure-devops) - for ext in extensions { - for (name, config) in ext.mcpg_servers(ctx)? { - mcp_servers.insert(name, config); - } - } - - for (name, config) in &front_matter.mcp_servers { - // Prevent user-defined MCPs from overwriting the reserved safeoutputs backend - if name.eq_ignore_ascii_case("safeoutputs") { - log::warn!( - "MCP name 'safeoutputs' is reserved for the safe outputs HTTP backend — skipping" - ); - continue; - } - - // Skip if already auto-configured by an extension (e.g., tools.azure-devops) - if mcp_servers.contains_key(name) { - continue; - } - - let (is_enabled, options) = match config { - McpConfig::Enabled(enabled) => (*enabled, None), - McpConfig::WithOptions(opts) => (opts.enabled.unwrap_or(true), Some(opts)), - }; - - if !is_enabled { - continue; - } - - if let Some(opts) = options { - if opts.container.is_some() && opts.url.is_some() { - log::warn!( - "MCP '{}': both 'container' and 'url' are set — using 'container' (stdio). \ - Remove 'url' to silence this warning.", - name - ); - } - - if let Some(container) = &opts.container { - // Container-based stdio MCP (MCPG-native, per spec §3.2.1) - for w in validate_container_image(container, name) { eprintln!("{}", w); } - // Validate mount paths for sensitive host directories - for mount in &opts.mounts { - for w in validate_mount_source(mount, name) { eprintln!("{}", w); } - } - // Validate Docker runtime args for privilege escalation - for w in validate_docker_args(&opts.args, name) { eprintln!("{}", w); } - // Warn about potential inline secrets (check headers too in case user set both) - for w in warn_potential_secrets(name, &opts.env, &opts.headers) { eprintln!("{}", w); } - let entrypoint_args = if opts.entrypoint_args.is_empty() { - None - } else { - Some(opts.entrypoint_args.clone()) - }; - let args = if opts.args.is_empty() { - None - } else { - Some(opts.args.clone()) - }; - let mounts = if opts.mounts.is_empty() { - None - } else { - Some(opts.mounts.clone()) - }; - let env = if opts.env.is_empty() { - None - } else { - Some(opts.env.clone()) - }; - let tools = if opts.allowed.is_empty() { - None - } else { - Some(opts.allowed.clone()) - }; - mcp_servers.insert( - name.clone(), - McpgServerConfig { - server_type: "stdio".to_string(), - container: Some(container.clone()), - entrypoint: opts.entrypoint.clone(), - entrypoint_args, - mounts, - args, - url: None, - headers: None, - env, - tools, - }, - ); - } else if let Some(url) = &opts.url { - // HTTP-based MCP (remote server) - for w in validate_mcp_url(url, name) { eprintln!("{}", w); } - // Warn about potential inline secrets in headers - for w in warn_potential_secrets(name, &HashMap::new(), &opts.headers) { eprintln!("{}", w); } - if !opts.env.is_empty() { - eprintln!( - "Warning: MCP '{}': env vars are not supported for HTTP MCPs — they will be ignored. \ - Use headers for authentication instead.", - name - ); - } - let headers = if opts.headers.is_empty() { - None - } else { - Some(opts.headers.clone()) - }; - let tools = if opts.allowed.is_empty() { - None - } else { - Some(opts.allowed.clone()) - }; - mcp_servers.insert( - name.clone(), - McpgServerConfig { - server_type: "http".to_string(), - container: None, - entrypoint: None, - entrypoint_args: None, - mounts: None, - args: None, - url: Some(url.clone()), - headers, - env: None, - tools, - }, - ); - } else { - log::warn!("MCP '{}' has no container or url — skipping", name); - continue; - } - } else { - log::warn!("MCP '{}' has no container or url — skipping", name); - } - } - - Ok(McpgConfig { - mcp_servers, - gateway: McpgGatewayConfig { - port: MCPG_PORT, - domain: "host.docker.internal".to_string(), - api_key: "${MCP_GATEWAY_API_KEY}".to_string(), - payload_dir: "/tmp/gh-aw/mcp-payloads".to_string(), - }, - }) -} - -/// Sensitive host path prefixes that should not be bind-mounted into MCP containers. -const SENSITIVE_MOUNT_PREFIXES: &[&str] = &[ - "/etc", - "/root", - "/home", - "/proc", - "/sys", -]; - -/// Docker runtime flag names that grant dangerous host access. -/// Checked both as `--flag=value` and as `--flag value` (split across two args). -const DANGEROUS_DOCKER_FLAGS: &[&str] = &[ - "--privileged", - "--cap-add", - "--security-opt", - "--pid", - "--network", - "--ipc", - "--user", - "-u", - "--add-host", - "--entrypoint", -]; - -/// Validate a container image name for injection attempts. -/// Allows `[a-zA-Z0-9./_:-]` which covers standard Docker image references. -fn validate_container_image(image: &str, mcp_name: &str) -> Vec { - let mut warnings = Vec::new(); - if image.is_empty() { - warnings.push(format!("Warning: MCP '{}': container image name is empty.", mcp_name)); - return warnings; - } - if !image.chars().all(|c| c.is_ascii_alphanumeric() || "._/:-@".contains(c)) { - warnings.push(format!( - "Warning: MCP '{}': container image '{}' contains unexpected characters. \ - Image names should only contain [a-zA-Z0-9./_:-@].", - mcp_name, image - )); - } - warnings -} - -/// Validate a volume mount source path, warning on sensitive host directories. -/// Docker socket mounts are escalated to stderr warnings since they grant container escape. -/// Note: paths are lowercased for comparison to catch cross-platform casing (e.g. `/ETC/shadow`). -fn validate_mount_source(mount: &str, mcp_name: &str) -> Vec { - let mut warnings = Vec::new(); - // Format: "source:dest:mode" - if let Some(source) = mount.split(':').next() { - let source_lower = source.to_lowercase(); - if source_lower.contains("docker.sock") { - warnings.push(format!( - "Warning: MCP '{}': mount '{}' exposes the Docker socket to the MCP container. \ - This grants full host Docker access and may allow container escape.", - mcp_name, mount - )); - return warnings; - } - for prefix in SENSITIVE_MOUNT_PREFIXES { - // Match exact path or path with trailing separator to avoid false positives - // (e.g. /etc matches /etc and /etc/shadow, but not /etc-configs) - if source_lower == *prefix || source_lower.starts_with(&format!("{}/", prefix)) { - warnings.push(format!( - "Warning: MCP '{}': mount source '{}' references a sensitive host path ({}). \ - Ensure this is intentional.", - mcp_name, source, prefix - )); - break; - } - } - } - warnings -} - -/// Validate Docker runtime args for dangerous flags that could escalate privileges. -/// Also detects volume mounts smuggled via `-v`/`--volume` that bypass `mounts` validation. -/// Handles both `--flag=value` and `--flag value` (split) forms. -fn validate_docker_args(args: &[String], mcp_name: &str) -> Vec { - let mut warnings = Vec::new(); - for (i, arg) in args.iter().enumerate() { - let arg_lower = arg.to_lowercase(); - // Check for dangerous Docker flags (both --flag=value and --flag value) - for dangerous in DANGEROUS_DOCKER_FLAGS { - if arg_lower == *dangerous - || arg_lower.starts_with(&format!("{}=", dangerous)) - { - let extra_hint = if *dangerous == "--entrypoint" { - " Use the 'entrypoint:' field instead of passing --entrypoint in args." - } else { - "" - }; - warnings.push(format!( - "Warning: MCP '{}': Docker arg '{}' grants elevated privileges. \ - Ensure this is intentional.{}", - mcp_name, arg, extra_hint - )); - } - } - // Check for volume mounts smuggled via args (bypasses mounts validation) - if arg == "-v" || arg == "--volume" { - if let Some(mount_spec) = args.get(i + 1) { - warnings.push(format!( - "Warning: MCP '{}': volume mount '{}' in args bypasses mounts validation. \ - Use the 'mounts:' field instead.", - mcp_name, mount_spec - )); - warnings.extend(validate_mount_source(mount_spec, mcp_name)); - } else { - warnings.push(format!( - "Warning: MCP '{}': '{}' flag is the last arg with no mount spec following it. \ - This is likely a malformed args list.", - mcp_name, arg - )); - } - } else if arg_lower.starts_with("-v=") || arg_lower.starts_with("--volume=") { - let mount_spec = arg.splitn(2, '=').nth(1).unwrap_or(""); - warnings.push(format!( - "Warning: MCP '{}': volume mount '{}' in args bypasses mounts validation. \ - Use the 'mounts:' field instead.", - mcp_name, mount_spec - )); - warnings.extend(validate_mount_source(mount_spec, mcp_name)); - } - } - warnings -} - -/// Validate that an MCP HTTP URL uses an allowed scheme. -fn validate_mcp_url(url: &str, mcp_name: &str) -> Vec { - let mut warnings = Vec::new(); - if !url.starts_with("https://") && !url.starts_with("http://") { - warnings.push(format!( - "Warning: MCP '{}': URL '{}' does not use http:// or https:// scheme. \ - This may not work with MCPG.", - mcp_name, url - )); - } - warnings -} - -/// Warn when env values or headers look like they contain inline secrets. -/// Secrets should use pipeline variables and passthrough ("") instead. -fn warn_potential_secrets(mcp_name: &str, env: &HashMap, headers: &HashMap) -> Vec { - let mut warnings = Vec::new(); - for (key, value) in env { - if !value.is_empty() && (key.to_lowercase().contains("token") - || key.to_lowercase().contains("secret") - || key.to_lowercase().contains("key") - || key.to_lowercase().contains("password") - || key.to_lowercase().contains("pat")) - { - warnings.push(format!( - "Warning: MCP '{}': env var '{}' has an inline value that may be a secret. \ - Use an empty string (\"\") for passthrough from pipeline variables instead.", - mcp_name, key - )); - } - } - for (key, value) in headers { - if value.to_lowercase().contains("bearer ") - || key.to_lowercase() == "authorization" - { - warnings.push(format!( - "Warning: MCP '{}': header '{}' may contain inline credentials. \ - These will appear in plaintext in the compiled pipeline YAML.", - mcp_name, key - )); - } - } - warnings -} - -/// Validate that a string is a legal environment variable name (`[A-Za-z_][A-Za-z0-9_]*`). -/// Prevents injection of arbitrary Docker flags via user-controlled front matter keys. -fn is_valid_env_var_name(name: &str) -> bool { - let mut chars = name.chars(); - chars - .next() - .map_or(false, |c| c.is_ascii_alphabetic() || c == '_') - && chars.all(|c| c.is_ascii_alphanumeric() || c == '_') -} - -/// Generate additional `-e` flags for the MCPG Docker run command. -/// -/// MCP containers spawned by MCPG may need environment variables that flow from -/// the pipeline through the MCPG container (passthrough). This function: -/// 1. Auto-maps `AZURE_DEVOPS_EXT_PAT` from `SC_READ_TOKEN` when `permissions.read` is configured -/// 2. Collects passthrough env vars (value is `""`) from container-based MCP configs -/// -/// Only container-based MCPs are considered — HTTP MCPs don't have child containers -/// that need env passthrough. -/// -/// Returns flags formatted for inline insertion in the `docker run` command. -/// The marker sits after the last hardcoded `-e` flag, so the output must -/// include leading `\\\n` for line continuation when non-empty. -pub fn generate_mcpg_docker_env(front_matter: &FrontMatter) -> String { - let mut env_flags: Vec = Vec::new(); - let mut seen: std::collections::HashSet = std::collections::HashSet::new(); - // Check if any container MCP requests AZURE_DEVOPS_EXT_PAT passthrough - let any_mcp_needs_ado_token = front_matter.mcp_servers.values().any(|config| { - matches!(config, McpConfig::WithOptions(opts) - if opts.enabled.unwrap_or(true) - && opts.container.is_some() - && opts.env.contains_key("AZURE_DEVOPS_EXT_PAT")) - }); - - // Also check if tools.azure-devops is enabled (auto-configured ADO MCP always needs token) - let ado_tool_needs_token = front_matter - .tools - .as_ref() - .and_then(|t| t.azure_devops.as_ref()) - .is_some_and(|ado| ado.is_enabled()); - - // Auto-map AZURE_DEVOPS_EXT_PAT from SC_READ_TOKEN when permissions.read is configured - // AND at least one container MCP requests it via env passthrough (or the ADO tool is enabled) - if any_mcp_needs_ado_token || ado_tool_needs_token { - if front_matter.permissions.as_ref().and_then(|p| p.read.as_ref()).is_some() { - env_flags.push( - "-e AZURE_DEVOPS_EXT_PAT=\"$(SC_READ_TOKEN)\"".to_string(), - ); - seen.insert("AZURE_DEVOPS_EXT_PAT".to_string()); - } else { - eprintln!( - "Warning: one or more container MCPs request AZURE_DEVOPS_EXT_PAT passthrough \ - but permissions.read is not configured. The token will be empty at runtime. \ - Add `permissions: {{ read: }}` to enable auto-mapping." - ); - } - } - - // Collect passthrough env vars from container-based MCP configs only. - // HTTP MCPs don't have child containers — env passthrough doesn't apply. - for (mcp_name, config) in &front_matter.mcp_servers { - let opts = match config { - McpConfig::WithOptions(opts) if opts.enabled.unwrap_or(true) => opts, - _ => continue, + let config = CompileConfig { + template: include_str!("../../templates/base.yml").to_string(), + extra_replacements: vec![ + ("{{ firewall_version }}".into(), AWF_VERSION.into()), + ("{{ mcpg_version }}".into(), MCPG_VERSION.into()), + ("{{ mcpg_image }}".into(), MCPG_IMAGE.into()), + ("{{ allowed_domains }}".into(), allowed_domains), + ("{{ enabled_tools_args }}".into(), enabled_tools_args), + ("{{ cancel_previous_builds }}".into(), cancel_previous_builds), + ("{{ mcpg_config }}".into(), mcpg_config_json), + ("{{ mcpg_docker_env }}".into(), mcpg_docker_env), + ], }; - // Only container-based MCPs need env passthrough on the MCPG Docker run - if opts.container.is_none() { - continue; - } - - for (var_name, var_value) in &opts.env { - // Validate env var name to prevent Docker flag injection (e.g. "X --privileged") - if !is_valid_env_var_name(var_name) { - log::warn!( - "MCP '{}': skipping invalid env var name '{}' — must match [A-Za-z_][A-Za-z0-9_]*", - mcp_name, var_name - ); - continue; - } - if seen.contains(var_name) { - continue; - } - // Passthrough: empty string means forward from host/pipeline environment - if var_value.is_empty() { - env_flags.push(format!("-e {}", var_name)); - seen.insert(var_name.clone()); - } - } - } - - env_flags.sort(); - if env_flags.is_empty() { - // No extra flags — emit a lone `\` so the bash line continuation from the - // preceding `-e MCP_GATEWAY_API_KEY=...` flag connects to the image name on - // the next line. This is valid bash: a backslash at end-of-line continues - // the command. replace_with_indent preserves this on its own indented line. - "\\".to_string() - } else { - // Emit each flag on its own line with `\` continuation. - // replace_with_indent handles indentation from the template (base.yml), - // so we only emit the content without hardcoded spaces. - let flags = env_flags.join(" \\\n"); - format!("{} \\", flags) + compile_shared(input_path, output_path, front_matter, markdown_body, &extensions, &ctx, config).await } } #[cfg(test)] mod tests { use super::*; - use crate::compile::common::{ - parse_markdown, ADO_MCP_IMAGE, ADO_MCP_ENTRYPOINT, ADO_MCP_PACKAGE, ADO_MCP_SERVER_NAME, - }; - use crate::compile::types::{McpConfig, McpOptions}; + use crate::compile::common::parse_markdown; fn minimal_front_matter() -> FrontMatter { let (fm, _) = parse_markdown("---\nname: test-agent\ndescription: test\n---\n").unwrap(); fm } - #[test] - fn test_generate_firewall_config_custom_mcp() { - let mut fm = minimal_front_matter(); - fm.mcp_servers.insert( - "my-tool".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("node:20-slim".to_string()), - entrypoint: Some("node".to_string()), - entrypoint_args: vec!["server.js".to_string()], - allowed: vec!["do_thing".to_string()], - ..Default::default() - }), - ); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let server = config.mcp_servers.get("my-tool").unwrap(); - assert_eq!(server.server_type, "stdio"); - assert_eq!(server.container.as_ref().unwrap(), "node:20-slim"); - assert_eq!(server.entrypoint.as_ref().unwrap(), "node"); - assert_eq!( - server.entrypoint_args.as_ref().unwrap(), - &vec!["server.js"] - ); - assert_eq!( - server.tools.as_ref().unwrap(), - &vec!["do_thing".to_string()] - ); - } - - #[test] - fn test_generate_mcpg_config_mcp_without_transport_skipped() { - let mut fm = minimal_front_matter(); - // An MCP with no container or url should be skipped - fm.mcp_servers - .insert("phantom".to_string(), McpConfig::Enabled(true)); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - assert!(!config.mcp_servers.contains_key("phantom")); - // safeoutputs is always present - assert!(config.mcp_servers.contains_key("safeoutputs")); - } - - #[test] - fn test_generate_mcpg_config_disabled_mcp_skipped() { - let mut fm = minimal_front_matter(); - fm.mcp_servers - .insert("my-tool".to_string(), McpConfig::Enabled(false)); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - assert!(!config.mcp_servers.contains_key("my-tool")); - } - - #[test] - fn test_generate_mcpg_config_empty_mcp_servers() { - let fm = minimal_front_matter(); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - // Only safeoutputs should be present - assert_eq!(config.mcp_servers.len(), 1); - assert!(config.mcp_servers.contains_key("safeoutputs")); - } - - #[test] - fn test_generate_mcpg_config_gateway_defaults() { - let fm = minimal_front_matter(); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - assert_eq!(config.gateway.port, 80); - assert_eq!(config.gateway.domain, "host.docker.internal"); - assert_eq!(config.gateway.api_key, "${MCP_GATEWAY_API_KEY}"); - assert_eq!(config.gateway.payload_dir, "/tmp/gh-aw/mcp-payloads"); - } - - #[test] - fn test_generate_mcpg_config_json_roundtrip() { - let mut fm = minimal_front_matter(); - fm.mcp_servers.insert( - "my-tool".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("python:3.12-slim".to_string()), - entrypoint: Some("python".to_string()), - entrypoint_args: vec!["-m".to_string(), "server".to_string()], - allowed: vec!["query".to_string()], - ..Default::default() - }), - ); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let json = serde_json::to_string_pretty(&config).expect("Config should serialize to JSON"); - let parsed: serde_json::Value = - serde_json::from_str(&json).expect("Serialized JSON should parse back"); - - // Verify top-level structure matches MCPG expectation - assert!( - parsed.get("mcpServers").is_some(), - "Should have mcpServers key" - ); - assert!(parsed.get("gateway").is_some(), "Should have gateway key"); - - let gw = parsed.get("gateway").unwrap(); - assert!(gw.get("port").is_some(), "Gateway should have port"); - assert!(gw.get("domain").is_some(), "Gateway should have domain"); - assert!(gw.get("apiKey").is_some(), "Gateway should have apiKey"); - assert!( - gw.get("payloadDir").is_some(), - "Gateway should have payloadDir" - ); - } - - #[test] - fn test_generate_mcpg_config_safeoutputs_variable_placeholders() { - let fm = minimal_front_matter(); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let so = config.mcp_servers.get("safeoutputs").unwrap(); - - // URL should reference the runtime-substituted port - let url = so.url.as_ref().unwrap(); - assert!( - url.contains("${SAFE_OUTPUTS_PORT}"), - "SafeOutputs URL should use ${{SAFE_OUTPUTS_PORT}} placeholder, got: {url}" - ); - - // Auth header should reference the runtime-substituted API key - let headers = so.headers.as_ref().unwrap(); - let auth = headers.get("Authorization").unwrap(); - assert!( - auth.contains("${SAFE_OUTPUTS_API_KEY}"), - "SafeOutputs auth header should use ${{SAFE_OUTPUTS_API_KEY}} placeholder, got: {auth}" - ); - } - - #[test] - fn test_generate_mcpg_config_safeoutputs_is_http_type() { - let fm = minimal_front_matter(); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let so = config.mcp_servers.get("safeoutputs").unwrap(); - assert_eq!(so.server_type, "http"); - assert!( - so.container.is_none(), - "HTTP backend should have no container" - ); - assert!(so.args.is_none(), "HTTP backend should have no args"); - assert!(so.url.is_some(), "HTTP backend must have a URL"); - } - - #[test] - fn test_generate_mcpg_config_container_mcp_is_stdio_type() { - let mut fm = minimal_front_matter(); - fm.mcp_servers.insert( - "runner".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("node:20-slim".to_string()), - entrypoint: Some("node".to_string()), - entrypoint_args: vec!["srv.js".to_string()], - allowed: vec!["run".to_string()], - ..Default::default() - }), - ); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let srv = config.mcp_servers.get("runner").unwrap(); - assert_eq!(srv.server_type, "stdio"); - assert!( - srv.container.is_some(), - "stdio server must have a container" - ); - assert!(srv.url.is_none(), "stdio server should have no URL"); - } - - #[test] - fn test_generate_mcpg_config_container_with_env() { - let mut fm = minimal_front_matter(); - let mut env = std::collections::HashMap::new(); - env.insert("TOKEN".to_string(), "secret".to_string()); - fm.mcp_servers.insert( - "with-env".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("node:20-slim".to_string()), - env, - ..Default::default() - }), - ); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let srv = config.mcp_servers.get("with-env").unwrap(); - let e = srv.env.as_ref().unwrap(); - assert_eq!(e.get("TOKEN").unwrap(), "secret"); - } - - #[test] - fn test_generate_mcpg_config_reserved_safeoutputs_name_rejected() { - let mut fm = minimal_front_matter(); - fm.mcp_servers.insert( - "safeoutputs".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("evil:latest".to_string()), - ..Default::default() - }), - ); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - // The reserved entry should still be the HTTP backend, not the user's container - let so = config.mcp_servers.get("safeoutputs").unwrap(); - assert_eq!( - so.server_type, "http", - "safeoutputs should remain HTTP backend" - ); - assert!( - so.container.is_none(), - "User container should not overwrite safeoutputs" - ); - } - - #[test] - fn test_generate_mcpg_config_safeoutputs_reserved_name_skipped() { - let mut fm = minimal_front_matter(); - fm.mcp_servers.insert( - "SafeOutputs".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("node:20-slim".to_string()), - entrypoint: Some("node".to_string()), - entrypoint_args: vec!["evil.js".to_string()], - allowed: vec!["hijack".to_string()], - ..Default::default() - }), - ); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - // The user-defined "SafeOutputs" must not overwrite the built-in entry - let so = config.mcp_servers.get("safeoutputs").unwrap(); - assert_eq!(so.server_type, "http"); - assert!(so.url.as_ref().unwrap().contains("localhost")); - // No stdio entry should have been added under any casing - assert_eq!(config.mcp_servers.len(), 1); - } - - #[test] - fn test_generate_mcpg_config_http_mcp() { - let mut fm = minimal_front_matter(); - fm.mcp_servers.insert( - "remote".to_string(), - McpConfig::WithOptions(McpOptions { - url: Some("https://mcp.example.com/api".to_string()), - headers: { - let mut h = HashMap::new(); - h.insert("X-Custom".to_string(), "value".to_string()); - h - }, - allowed: vec!["query".to_string()], - ..Default::default() - }), - ); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let srv = config.mcp_servers.get("remote").unwrap(); - assert_eq!(srv.server_type, "http"); - assert_eq!( - srv.url.as_ref().unwrap(), - "https://mcp.example.com/api" - ); - assert_eq!( - srv.headers.as_ref().unwrap().get("X-Custom").unwrap(), - "value" - ); - assert!(srv.container.is_none(), "HTTP server should have no container"); - } - - #[test] - fn test_generate_mcpg_config_container_with_entrypoint() { - let mut fm = minimal_front_matter(); - fm.mcp_servers.insert( - "ado".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("node:20-slim".to_string()), - entrypoint: Some("npx".to_string()), - entrypoint_args: vec!["-y".to_string(), "@azure-devops/mcp".to_string()], - ..Default::default() - }), - ); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let srv = config.mcp_servers.get("ado").unwrap(); - assert_eq!(srv.server_type, "stdio"); - assert_eq!(srv.container.as_ref().unwrap(), "node:20-slim"); - assert_eq!(srv.entrypoint.as_ref().unwrap(), "npx"); - assert_eq!( - srv.entrypoint_args.as_ref().unwrap(), - &vec!["-y", "@azure-devops/mcp"] - ); - } - - #[test] - fn test_generate_mcpg_config_container_with_mounts() { - let mut fm = minimal_front_matter(); - fm.mcp_servers.insert( - "data-tool".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("data-tool:latest".to_string()), - mounts: vec!["/host/data:/app/data:ro".to_string()], - ..Default::default() - }), - ); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let srv = config.mcp_servers.get("data-tool").unwrap(); - assert_eq!( - srv.mounts.as_ref().unwrap(), - &vec!["/host/data:/app/data:ro"] - ); - } - - #[test] - fn test_generate_mcpg_config_no_transport_skipped() { - let mut fm = minimal_front_matter(); - // MCP with options but no container or url should be skipped - fm.mcp_servers.insert( - "no-transport".to_string(), - McpConfig::WithOptions(McpOptions { - allowed: vec!["tool".to_string()], - ..Default::default() - }), - ); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - assert!(!config.mcp_servers.contains_key("no-transport")); - } - - #[test] - fn test_generate_mcpg_docker_env_with_permissions_read() { - let mut fm = minimal_front_matter(); - fm.permissions = Some(crate::compile::types::PermissionsConfig { - read: Some("my-read-sc".to_string()), - write: None, - }); - // A container MCP must request AZURE_DEVOPS_EXT_PAT for the auto-map to trigger - fm.mcp_servers.insert( - "ado-tool".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("node:20-slim".to_string()), - env: { - let mut e = HashMap::new(); - e.insert("AZURE_DEVOPS_EXT_PAT".to_string(), "".to_string()); - e - }, - ..Default::default() - }), - ); - let env = generate_mcpg_docker_env(&fm); - assert!( - env.contains("-e AZURE_DEVOPS_EXT_PAT=\"$(SC_READ_TOKEN)\""), - "Should auto-map ADO token when permissions.read is set and MCP requests it" - ); - } - - #[test] - fn test_generate_mcpg_docker_env_permissions_read_no_mcp_request() { - let mut fm = minimal_front_matter(); - fm.permissions = Some(crate::compile::types::PermissionsConfig { - read: Some("my-read-sc".to_string()), - write: None, - }); - // No MCP requests AZURE_DEVOPS_EXT_PAT — auto-map should NOT trigger - fm.mcp_servers.insert( - "unrelated-tool".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("node:20-slim".to_string()), - ..Default::default() - }), - ); - let env = generate_mcpg_docker_env(&fm); - assert!( - !env.contains("AZURE_DEVOPS_EXT_PAT"), - "Should NOT auto-map ADO token when no MCP requests it" - ); - } - - #[test] - fn test_generate_mcpg_docker_env_dedup_auto_map_and_passthrough() { - // When permissions.read is set AND MCP has AZURE_DEVOPS_EXT_PAT: "", - // the auto-mapped form (with SC_READ_TOKEN) should win — no duplicate - let mut fm = minimal_front_matter(); - fm.permissions = Some(crate::compile::types::PermissionsConfig { - read: Some("my-read-sc".to_string()), - write: None, - }); - fm.mcp_servers.insert( - "ado-tool".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("node:20-slim".to_string()), - env: { - let mut e = HashMap::new(); - e.insert("AZURE_DEVOPS_EXT_PAT".to_string(), "".to_string()); - e - }, - ..Default::default() - }), - ); - let env = generate_mcpg_docker_env(&fm); - // Should have the SC_READ_TOKEN form (auto-mapped), not bare passthrough - assert!( - env.contains("-e AZURE_DEVOPS_EXT_PAT=\"$(SC_READ_TOKEN)\""), - "Auto-mapped form should be present" - ); - // Should appear exactly once - let count = env.matches("AZURE_DEVOPS_EXT_PAT").count(); - assert_eq!(count, 1, "AZURE_DEVOPS_EXT_PAT should appear exactly once, got {}", count); - } - - #[test] - fn test_generate_mcpg_docker_env_without_permissions() { - let fm = minimal_front_matter(); - let env = generate_mcpg_docker_env(&fm); - assert!( - !env.contains("AZURE_DEVOPS_EXT_PAT"), - "Should not map ADO token when permissions.read is not set" - ); - } - - #[test] - fn test_generate_mcpg_docker_env_passthrough_vars() { - let mut fm = minimal_front_matter(); - fm.mcp_servers.insert( - "tool".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("img:latest".to_string()), - env: { - let mut e = HashMap::new(); - e.insert("PASS_THROUGH".to_string(), "".to_string()); - e.insert("STATIC".to_string(), "value".to_string()); - e - }, - ..Default::default() - }), - ); - let env = generate_mcpg_docker_env(&fm); - assert!(env.contains("-e PASS_THROUGH"), "Should include passthrough var"); - assert!(!env.contains("-e STATIC"), "Should NOT include static var"); - } - - #[test] - fn test_generate_mcpg_docker_env_rejects_invalid_names() { - let mut fm = minimal_front_matter(); - fm.mcp_servers.insert( - "evil".to_string(), - McpConfig::WithOptions(McpOptions { - container: Some("img:latest".to_string()), - env: { - let mut e = HashMap::new(); - // Injection attempt: env var name with Docker flag - e.insert("MY_VAR --privileged".to_string(), "".to_string()); - // Valid env var for comparison - e.insert("GOOD_VAR".to_string(), "".to_string()); - e - }, - ..Default::default() - }), - ); - let env = generate_mcpg_docker_env(&fm); - assert!( - !env.contains("--privileged"), - "Should reject invalid env var name with Docker flag injection" - ); - assert!( - env.contains("-e GOOD_VAR"), - "Should include valid env var" - ); - } - - #[test] - fn test_is_valid_env_var_name() { - assert!(is_valid_env_var_name("MY_VAR")); - assert!(is_valid_env_var_name("_PRIVATE")); - assert!(is_valid_env_var_name("A")); - assert!(is_valid_env_var_name("VAR123")); - assert!(!is_valid_env_var_name("")); - assert!(!is_valid_env_var_name("123ABC")); - assert!(!is_valid_env_var_name("MY-VAR")); - assert!(!is_valid_env_var_name("MY VAR")); - assert!(!is_valid_env_var_name("X --privileged")); - assert!(!is_valid_env_var_name("X -v /etc:/etc:rw")); - } - - // ─── tools.azure-devops MCPG integration ──────────────────────────────── - - #[test] - fn test_ado_tool_generates_mcpg_entry() { - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops: true\n---\n", - ) - .unwrap(); - // Pass inferred org since no explicit org is set - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test_with_org(&fm, "inferred-org"), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let ado = config.mcp_servers.get("azure-devops").unwrap(); - assert_eq!(ado.server_type, "stdio"); - assert_eq!(ado.container.as_deref(), Some(ADO_MCP_IMAGE)); - assert_eq!(ado.entrypoint.as_deref(), Some(ADO_MCP_ENTRYPOINT)); - let args = ado.entrypoint_args.as_ref().unwrap(); - assert!(args.contains(&"-y".to_string())); - assert!(args.contains(&ADO_MCP_PACKAGE.to_string())); - assert!(args.contains(&"inferred-org".to_string())); - // Should have AZURE_DEVOPS_EXT_PAT in env - let env = ado.env.as_ref().unwrap(); - assert!(env.contains_key("AZURE_DEVOPS_EXT_PAT")); - } - - #[test] - fn test_ado_tool_with_toolsets() { - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops:\n toolsets: [repos, wit, core]\n---\n", - ) - .unwrap(); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test_with_org(&fm, "myorg"), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let ado = config.mcp_servers.get("azure-devops").unwrap(); - let args = ado.entrypoint_args.as_ref().unwrap(); - assert!(args.contains(&"-d".to_string())); - assert!(args.contains(&"repos".to_string())); - assert!(args.contains(&"wit".to_string())); - assert!(args.contains(&"core".to_string())); - } - - #[test] - fn test_ado_tool_with_org_override() { - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: myorg\n---\n", - ) - .unwrap(); - // Explicit org should be used even when inferred_org is None - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let ado = config.mcp_servers.get("azure-devops").unwrap(); - let args = ado.entrypoint_args.as_ref().unwrap(); - assert!(args.contains(&"myorg".to_string())); - } - - #[test] - fn test_ado_tool_explicit_org_overrides_inferred() { - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: explicit-org\n---\n", - ) - .unwrap(); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test_with_org(&fm, "inferred-org"), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let ado = config.mcp_servers.get("azure-devops").unwrap(); - let args = ado.entrypoint_args.as_ref().unwrap(); - assert!(args.contains(&"explicit-org".to_string())); - assert!(!args.contains(&"inferred-org".to_string())); - } - - #[test] - fn test_ado_tool_no_org_fails() { - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops: true\n---\n", - ) - .unwrap(); - // No explicit org and no inferred org — should fail - let result = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)); - assert!(result.is_err()); - assert!( - result.unwrap_err().to_string().contains("no ADO organization"), - "Error should mention missing org" - ); - } - - #[test] - fn test_ado_tool_invalid_org_fails() { - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: \"my org/bad\"\n---\n", - ) - .unwrap(); - let result = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)); - assert!(result.is_err()); - assert!( - result.unwrap_err().to_string().contains("Invalid ADO org name"), - "Error should mention invalid org" - ); - } - - #[test] - fn test_ado_tool_invalid_toolset_fails() { - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: myorg\n toolsets: [\"repos\", \"bad toolset\"]\n---\n", - ) - .unwrap(); - let result = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)); - assert!(result.is_err()); - assert!( - result.unwrap_err().to_string().contains("Invalid ADO toolset name"), - "Error should mention invalid toolset" - ); - } - - #[test] - fn test_ado_tool_with_allowed_tools() { - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: myorg\n allowed:\n - wit_get_work_item\n - core_list_projects\n---\n", - ) - .unwrap(); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let ado = config.mcp_servers.get("azure-devops").unwrap(); - let tools = ado.tools.as_ref().unwrap(); - assert_eq!(tools, &["wit_get_work_item", "core_list_projects"]); - } - - #[test] - fn test_ado_tool_disabled_not_generated() { - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops: false\n---\n", - ) - .unwrap(); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - assert!(!config.mcp_servers.contains_key("azure-devops")); - } - - #[test] - fn test_ado_tool_not_set_not_generated() { - let fm = minimal_front_matter(); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - assert!(!config.mcp_servers.contains_key("azure-devops")); - } - - #[test] - fn test_ado_tool_skips_manual_mcp_entry() { - // When tools.azure-devops is enabled AND mcp-servers also has azure-devops, - // the tools config takes precedence and the manual entry is skipped. - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops:\n org: auto-org\nmcp-servers:\n azure-devops:\n container: \"node:20-slim\"\n entrypoint: \"npx\"\n entrypoint-args: [\"-y\", \"@azure-devops/mcp\", \"manual-org\"]\n---\n", - ) - .unwrap(); - let config = generate_mcpg_config(&fm, &super::super::extensions::CompileContext::for_test(&fm), &super::super::extensions::collect_extensions(&fm)).unwrap(); - let ado = config.mcp_servers.get("azure-devops").unwrap(); - // Should use the auto-configured org, not the manual one - let args = ado.entrypoint_args.as_ref().unwrap(); - assert!(args.contains(&"auto-org".to_string())); - assert!(!args.contains(&"manual-org".to_string())); - } - - #[test] - fn test_ado_tool_docker_env_passthrough() { - let (fm, _) = parse_markdown( - "---\nname: test\ndescription: test\ntools:\n azure-devops: true\npermissions:\n read: my-read-sc\n---\n", - ) - .unwrap(); - let env = generate_mcpg_docker_env(&fm); - assert!( - env.contains("AZURE_DEVOPS_EXT_PAT"), - "Should include ADO token passthrough when permissions.read is set" - ); - } - - // ─── validate_docker_args ──────────────────────────────────────────────── - - #[test] - fn test_validate_docker_args_privileged_flag() { - let warnings = validate_docker_args(&["--privileged".to_string()], "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("--privileged"), "should warn about --privileged"); - } - - #[test] - fn test_validate_docker_args_entrypoint_in_args_warns() { - let warnings = validate_docker_args( - &[ - "--entrypoint".to_string(), - "/bin/sh".to_string(), - ], - "my-mcp", - ); - assert!(warnings.iter().any(|w| w.contains("--entrypoint") && w.contains("entrypoint:")), - "should warn about --entrypoint with hint to use entrypoint: field"); - } - - #[test] - fn test_validate_docker_args_volume_flag_calls_mount_validation() { - // -v docker.sock in args bypasses `mounts:` validation; should produce warnings - let warnings = validate_docker_args( - &[ - "-v".to_string(), - "/var/run/docker.sock:/var/run/docker.sock".to_string(), - ], - "my-mcp", - ); - assert!(warnings.iter().any(|w| w.contains("bypasses mounts validation")), - "should warn about volume mount in args"); - assert!(warnings.iter().any(|w| w.contains("Docker socket")), - "should propagate mount source warning for docker.sock"); - } - - #[test] - fn test_validate_docker_args_volume_equals_form() { - // --volume=source:dest form should also be detected - let warnings = validate_docker_args( - &["--volume=/var/run/docker.sock:/var/run/docker.sock".to_string()], - "my-mcp", - ); - assert!(warnings.iter().any(|w| w.contains("bypasses mounts validation")), - "should warn about --volume= form"); - } - - #[test] - fn test_validate_docker_args_safe_args_no_warnings() { - // A legitimate arg like --read-only should produce no warnings - let warnings = validate_docker_args(&["--read-only".to_string()], "my-mcp"); - assert!(warnings.is_empty(), "safe args should not produce warnings"); - } - - #[test] - fn test_validate_docker_args_empty_no_warnings() { - let warnings = validate_docker_args(&[], "my-mcp"); - assert!(warnings.is_empty(), "empty args should not produce warnings"); - } - - #[test] - fn test_validate_docker_args_volume_flag_trailing_warns() { - // -v as the last arg with no mount spec is malformed - let warnings = validate_docker_args(&["-v".to_string()], "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("malformed"), "trailing -v with no mount spec should warn"); - } - - #[test] - fn test_validate_docker_args_long_volume_flag_trailing_warns() { - // --volume as the last arg with no mount spec is malformed - let warnings = validate_docker_args(&["--volume".to_string()], "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("malformed"), "trailing --volume with no mount spec should warn"); - } - - // ─── validate_mcp_url ──────────────────────────────────────────────────── - - #[test] - fn test_validate_mcp_url_https_no_warnings() { - let warnings = validate_mcp_url("https://mcp.dev.azure.com/myorg", "my-mcp"); - assert!(warnings.is_empty(), "https URL should not produce warnings"); - } - - #[test] - fn test_validate_mcp_url_http_no_warnings() { - let warnings = validate_mcp_url("http://localhost:8100/mcp", "my-mcp"); - assert!(warnings.is_empty(), "http URL should not produce warnings"); - } - - #[test] - fn test_validate_mcp_url_bad_scheme_warns() { - let warnings = validate_mcp_url("ftp://files.example.com", "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("does not use http://"), "non-HTTP scheme should warn"); - } - - #[test] - fn test_validate_mcp_url_no_scheme_warns() { - let warnings = validate_mcp_url("mcp.dev.azure.com/myorg", "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("does not use http://"), "URL without scheme should warn"); - } - - // ─── validate_mount_source ─────────────────────────────────────────────── - - #[test] - fn test_validate_mount_source_docker_sock() { - let warnings = validate_mount_source("/var/run/docker.sock:/var/run/docker.sock:rw", "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("Docker socket"), "should warn about Docker socket exposure"); - } - - #[test] - fn test_validate_mount_source_sensitive_path_etc() { - let warnings = validate_mount_source("/etc/passwd:/data/passwd:ro", "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("sensitive host path"), "should warn about /etc mount"); - } - - #[test] - fn test_validate_mount_source_sensitive_path_proc() { - let warnings = validate_mount_source("/proc:/host/proc:ro", "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("sensitive host path"), "should warn about /proc mount"); - } - - #[test] - fn test_validate_mount_source_case_insensitive() { - // /ETC/shadow should match sensitive /etc prefix (lowercased comparison) - let warnings = validate_mount_source("/ETC/shadow:/data/shadow:ro", "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("sensitive host path"), "case-insensitive match should trigger warning"); - } - - #[test] - fn test_validate_mount_source_no_false_positive_on_etc_configs() { - // /etc-configs should NOT match the /etc prefix (path boundary check requires trailing /) - let warnings = validate_mount_source("/etc-configs:/app/config:ro", "my-mcp"); - assert!(warnings.is_empty(), "/etc-configs must not match /etc prefix due to path boundary check"); - } - - #[test] - fn test_validate_mount_source_safe_path_no_warnings() { - // /app/data is not a sensitive path; should produce no warnings - let warnings = validate_mount_source("/app/data:/app/data:ro", "my-mcp"); - assert!(warnings.is_empty(), "safe path should not produce warnings"); - } - - // ─── validate_container_image ──────────────────────────────────────────── - - #[test] - fn test_validate_container_image_empty_string() { - let warnings = validate_container_image("", "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("empty"), "should warn about empty image name"); - } - - #[test] - fn test_validate_container_image_shell_metacharacters() { - let warnings = validate_container_image("node:20-slim; rm -rf /", "my-mcp"); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("unexpected characters"), "should warn about shell metacharacters"); - } - - #[test] - fn test_validate_container_image_valid_name_no_warnings() { - // Standard image references should produce no warnings - assert!(validate_container_image("node:20-slim", "my-mcp").is_empty()); - assert!(validate_container_image("ghcr.io/org/image:latest", "my-mcp").is_empty()); - assert!(validate_container_image("python:3.12-slim", "my-mcp").is_empty()); - } - - // ─── warn_potential_secrets ────────────────────────────────────────────── - - #[test] - fn test_warn_potential_secrets_token_env_var_triggers() { - let env = HashMap::from([("API_TOKEN".to_string(), "secret123".to_string())]); - let headers = HashMap::new(); - let warnings = warn_potential_secrets("my-mcp", &env, &headers); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("API_TOKEN"), "should warn about secret-looking env var"); - } - - #[test] - fn test_warn_potential_secrets_empty_passthrough_no_warnings() { - // Empty string = passthrough; should NOT trigger a warning - let env = HashMap::from([("API_TOKEN".to_string(), "".to_string())]); - let headers = HashMap::new(); - let warnings = warn_potential_secrets("my-mcp", &env, &headers); - assert!(warnings.is_empty(), "empty passthrough value must not trigger a warning"); - } - - #[test] - fn test_warn_potential_secrets_authorization_header_triggers() { - let env = HashMap::new(); - let headers = - HashMap::from([("Authorization".to_string(), "Bearer abc".to_string())]); - let warnings = warn_potential_secrets("my-mcp", &env, &headers); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("Authorization"), "should warn about Authorization header"); - } - - #[test] - fn test_warn_potential_secrets_bearer_value_triggers() { - // A header whose value starts with "Bearer " should also warn - let env = HashMap::new(); - let headers = - HashMap::from([("X-Custom-Auth".to_string(), "Bearer token123".to_string())]); - let warnings = warn_potential_secrets("my-mcp", &env, &headers); - assert_eq!(warnings.len(), 1); - assert!(warnings[0].contains("X-Custom-Auth"), "should warn about header with Bearer value"); - } - - #[test] - fn test_warn_potential_secrets_safe_env_no_warnings() { - // Env keys with non-secret names and non-empty values should produce no warnings - let env = HashMap::from([("MY_CONFIG".to_string(), "value".to_string())]); - let headers = HashMap::new(); - let warnings = warn_potential_secrets("my-mcp", &env, &headers); - assert!(warnings.is_empty(), "non-secret env var should not produce warnings"); - } - // ─── generate_allowed_domains ──────────────────────────────────────────── #[test] @@ -1983,118 +251,4 @@ mod tests { assert!(domains.contains("crates.io"), "rust domains present"); } - // ─── generate_prepare_steps ────────────────────────────────────────────── - - #[test] - fn test_generate_prepare_steps_with_memory_includes_memory_preamble() { - let (fm, _) = crate::compile::common::parse_markdown( - "---\nname: test\ndescription: test\ntools:\n cache-memory: true\n---\n", - ).unwrap(); - let exts = super::super::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); - assert!( - !result.is_empty(), - "memory steps must be emitted when cache-memory enabled" - ); - assert!( - result.contains("agent_memory"), - "should reference memory directory" - ); - } - - #[test] - fn test_generate_prepare_steps_without_memory_and_no_steps_is_empty() { - let fm = minimal_front_matter(); - let exts = super::super::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); - assert!(result.is_empty(), "no steps and no memory should produce empty output"); - } - - #[test] - fn test_generate_prepare_steps_with_memory_includes_download_and_prompt() { - let (fm, _) = crate::compile::common::parse_markdown( - "---\nname: test\ndescription: test\ntools:\n cache-memory: true\n---\n", - ).unwrap(); - let exts = super::super::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); - assert!( - result.contains("DownloadPipelineArtifact"), - "memory steps must include the artifact download task" - ); - assert!( - result.contains("Agent Memory"), - "memory steps must include the memory prompt" - ); - } - - #[test] - fn test_generate_prepare_steps_without_memory_with_user_steps() { - let fm = minimal_front_matter(); - let exts = super::super::extensions::collect_extensions(&fm); - let step: serde_yaml::Value = - serde_yaml::from_str("bash: echo hello\ndisplayName: greet").unwrap(); - let result = generate_prepare_steps(&[step], &exts).unwrap(); - assert!(!result.is_empty(), "user steps should be present"); - assert!( - !result.contains("agent_memory"), - "no memory reference when cache-memory not enabled" - ); - } - - #[test] - fn test_generate_prepare_steps_with_memory_and_user_steps() { - let (fm, _) = crate::compile::common::parse_markdown( - "---\nname: test\ndescription: test\ntools:\n cache-memory: true\n---\n", - ).unwrap(); - let exts = super::super::extensions::collect_extensions(&fm); - let step: serde_yaml::Value = - serde_yaml::from_str("bash: echo hello\ndisplayName: greet").unwrap(); - let result = generate_prepare_steps(&[step], &exts).unwrap(); - assert!( - result.contains("agent_memory"), - "memory reference must be present" - ); - assert!( - result.contains("echo hello"), - "user step must also be present" - ); - } - - #[test] - fn test_generate_prepare_steps_with_lean() { - let (fm, _) = crate::compile::common::parse_markdown( - "---\nname: test\ndescription: test\nruntimes:\n lean: true\n---\n", - ).unwrap(); - let exts = super::super::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); - assert!(result.contains("elan-init.sh"), "should include elan installer"); - assert!(result.contains("Lean 4"), "should include Lean prompt"); - assert!(result.contains("--default-toolchain stable"), "should default to stable"); - assert!(result.contains("/tmp/awf-tools/"), "should symlink into awf-tools for AWF chroot"); - } - - #[test] - fn test_generate_prepare_steps_with_lean_custom_toolchain() { - let (fm, _) = crate::compile::common::parse_markdown( - "---\nname: test\ndescription: test\nruntimes:\n lean:\n toolchain: \"leanprover/lean4:v4.29.1\"\n---\n", - ).unwrap(); - let exts = super::super::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); - assert!( - result.contains("--default-toolchain leanprover/lean4:v4.29.1"), - "should use specified toolchain" - ); - } - - #[test] - fn test_generate_prepare_steps_with_lean_and_memory() { - let (fm, _) = crate::compile::common::parse_markdown( - "---\nname: test\ndescription: test\nruntimes:\n lean: true\ntools:\n cache-memory: true\n---\n", - ).unwrap(); - let exts = super::super::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); - assert!(result.contains("agent_memory"), "memory steps present"); - assert!(result.contains("elan-init.sh"), "lean install present"); - assert!(result.contains("Lean 4"), "lean prompt present"); - } } diff --git a/templates/1es-base.yml b/templates/1es-base.yml index fe49a8c5..ddf302b4 100644 --- a/templates/1es-base.yml +++ b/templates/1es-base.yml @@ -1,6 +1,6 @@ # 1ES Pipeline Template for Agentic Pipelines -# This template extends the 1ES Unofficial Pipeline Template and uses the Agency job type -# for the main agent task, while adding custom jobs for safe output analysis and processing. +# This template extends the 1ES Unofficial Pipeline Template with Copilot CLI, +# AWF network isolation, and MCP Gateway — matching the standalone pipeline model. name: {{ agent_name }}-$(BuildID) {{ parameters }} @@ -31,7 +31,7 @@ extends: name: AZS-1ES-W-MMS2022 os: windows featureFlags: - disableNetworkIsolation: true # Agency requires network access for AI services + disableNetworkIsolation: true # AWF handles network isolation at application layer runPrerequisitesOnImage: false # Pool image has 1ES prerequisites preinstalled stages: - stage: AgentStage @@ -39,104 +39,376 @@ extends: jobs: {{ setup_job }} - # Main agentic task using the 1ES Agency job type - job: PerformAgenticTask - displayName: "{{ agent_name }} (Agent)" + displayName: "{{ agent_name }} (Agent Automations)" {{ agentic_depends_on }} {{ job_timeout }} templateContext: - type: agencyJob - arguments: - agentContextRoot: {{ agent_context_root }} - skipSourceSync: false - preAgentSteps: - - {{ checkout_repositories }} - - {{ prepare_steps }} - - - bash: | - COMPILER_VERSION="{{ compiler_version }}" - DOWNLOAD_DIR="$(Pipeline.Workspace)/agentic-pipeline-compiler" - DOWNLOAD_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/ado-aw-linux-x64" - CHECKSUM_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/checksums.txt" - - mkdir -p "$DOWNLOAD_DIR" - echo "Downloading ado-aw v${COMPILER_VERSION} from GitHub Releases..." - curl -fsSL -o "$DOWNLOAD_DIR/ado-aw-linux-x64" "$DOWNLOAD_URL" - curl -fsSL -o "$DOWNLOAD_DIR/checksums.txt" "$CHECKSUM_URL" - - echo "Verifying checksum..." - cd "$DOWNLOAD_DIR" - grep "ado-aw-linux-x64" checksums.txt | sha256sum -c - - mv ado-aw-linux-x64 ado-aw - chmod +x ado-aw - displayName: "Download agentic pipeline compiler (v{{ compiler_version }})" - - - bash: | - AGENTIC_PIPELINES_PATH="$(Pipeline.Workspace)/agentic-pipeline-compiler/ado-aw" - chmod +x "$AGENTIC_PIPELINES_PATH" - $AGENTIC_PIPELINES_PATH check "{{ pipeline_path }}" - displayName: "Verify pipeline integrity" - - - bash: | - mkdir -p "$HOME/.copilot" - mkdir -p "$(Agent.TempDirectory)/staging" - - AGENTIC_PIPELINES_PATH="$(Pipeline.Workspace)/agentic-pipeline-compiler/ado-aw" - chmod +x "$AGENTIC_PIPELINES_PATH" - - # Generate MCP config for safeoutputs - cat > "$HOME/.copilot/mcp-config.json" << EOF - { - "mcpServers": { - "safeoutputs": { - "type": "stdio", - "tools": ["*"], - "command": "$AGENTIC_PIPELINES_PATH", - "args": ["mcp", "$(Agent.TempDirectory)/staging", "{{ working_directory }}"] + type: buildJob + outputs: + - output: pipelineArtifact + path: $(Agent.TempDirectory)/staging + artifact: agent_outputs_$(Build.BuildId) + condition: always() + steps: + {{ checkout_self }} + {{ checkout_repositories }} + + {{ acquire_ado_token }} + + {{ cancel_previous_builds }} + + - task: NuGetAuthenticate@1 + displayName: "Authenticate NuGet Feed" + + - task: NuGetCommand@2 + displayName: "Install Copilot CLI" + inputs: + command: 'custom' + arguments: 'install Microsoft.Copilot.CLI.linux-x64 -Source "https://pkgs.dev.azure.com/msazuresphere/_packaging/Guardian1ESPTUpstreamOrgFeed/nuget/v3/index.json" -Version {{ copilot_version }} -OutputDirectory $(Agent.TempDirectory)/tools -ExcludeVersion -NonInteractive' + + - bash: | + ls -la "$(Agent.TempDirectory)/tools" + echo "##vso[task.prependpath]$(Agent.TempDirectory)/tools/Microsoft.Copilot.CLI.linux-x64" + + # Copy copilot binary to /tmp so it's accessible inside AWF container + # (AWF auto-mounts /tmp:/tmp:rw but not Agent.TempDirectory) + mkdir -p /tmp/awf-tools + cp "$(Agent.TempDirectory)/tools/Microsoft.Copilot.CLI.linux-x64/copilot" /tmp/awf-tools/copilot + chmod +x /tmp/awf-tools/copilot + displayName: "Add copilot to PATH" + + - bash: | + copilot --version + copilot -h + displayName: "Output copilot version" + + - bash: | + COMPILER_VERSION="{{ compiler_version }}" + DOWNLOAD_DIR="$(Pipeline.Workspace)/agentic-pipeline-compiler" + DOWNLOAD_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/ado-aw-linux-x64" + CHECKSUM_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/checksums.txt" + + mkdir -p "$DOWNLOAD_DIR" + echo "Downloading ado-aw v${COMPILER_VERSION} from GitHub Releases..." + curl -fsSL -o "$DOWNLOAD_DIR/ado-aw-linux-x64" "$DOWNLOAD_URL" + curl -fsSL -o "$DOWNLOAD_DIR/checksums.txt" "$CHECKSUM_URL" + + echo "Verifying checksum..." + cd "$DOWNLOAD_DIR" + grep "ado-aw-linux-x64" checksums.txt | sha256sum -c - + mv ado-aw-linux-x64 ado-aw + chmod +x ado-aw + displayName: "Download agentic pipeline compiler (v{{ compiler_version }})" + + - bash: | + AGENTIC_PIPELINES_PATH="$(Pipeline.Workspace)/agentic-pipeline-compiler/ado-aw" + chmod +x "$AGENTIC_PIPELINES_PATH" + $AGENTIC_PIPELINES_PATH check "{{ pipeline_path }}" + displayName: "Verify pipeline integrity" + + - bash: | + mkdir -p "$(Agent.TempDirectory)/staging" + + # Generate MCPG API key early so it's available as an ADO secret variable + # for both the MCPG config and the agent's mcp-config.json + MCP_GATEWAY_API_KEY=$(openssl rand -base64 45 | tr -d '/+=') + echo "##vso[task.setvariable variable=MCP_GATEWAY_API_KEY;issecret=true]$MCP_GATEWAY_API_KEY" + + # Write MCPG (MCP Gateway) configuration to a file + cat > "$(Agent.TempDirectory)/staging/mcpg-config.json" << 'MCPG_CONFIG_EOF' + {{ mcpg_config }} + MCPG_CONFIG_EOF + + echo "MCPG config:" + cat "$(Agent.TempDirectory)/staging/mcpg-config.json" + + # Validate JSON + python3 -m json.tool "$(Agent.TempDirectory)/staging/mcpg-config.json" > /dev/null && echo "JSON is valid" + displayName: "Prepare MCPG config" + + - bash: | + mkdir -p "$HOME/.copilot" + mkdir -p /tmp/awf-tools/staging + + echo "HOME: $HOME" + + # Use absolute path since MCP subprocess may not inherit PATH + AGENTIC_PIPELINES_PATH="$(Pipeline.Workspace)/agentic-pipeline-compiler/ado-aw" + + # Verify the binary exists and is executable + ls -la "$AGENTIC_PIPELINES_PATH" + chmod +x "$AGENTIC_PIPELINES_PATH" + + $AGENTIC_PIPELINES_PATH -h + + # Copy compiler binary to /tmp so it's accessible inside AWF container + cp "$AGENTIC_PIPELINES_PATH" /tmp/awf-tools/ado-aw + chmod +x /tmp/awf-tools/ado-aw + + # Copy MCPG config to /tmp + cp "$(Agent.TempDirectory)/staging/mcpg-config.json" /tmp/awf-tools/staging/mcpg-config.json + + # Generate MCP config for copilot CLI pointing to MCPG gateway on host. + # The agent inside AWF reaches MCPG via host.docker.internal. + # MCPG enforces client auth via the gateway API key. + cat > /tmp/awf-tools/mcp-config.json << EOF + { + "mcpServers": { + "mcpg": { + "type": "http", + "url": "http://host.docker.internal:80/mcp", + "headers": { + "Authorization": "Bearer $(MCP_GATEWAY_API_KEY)" } } } - EOF - - echo "Generated MCP config:" - cat "$HOME/.copilot/mcp-config.json" - python3 -m json.tool "$HOME/.copilot/mcp-config.json" > /dev/null && echo "JSON is valid" - displayName: "Configure safeoutputs MCP" - - - bash: | - # Write agent instructions to a prompt file - cat > "$(Agent.TempDirectory)/agent-prompt.md" << 'AGENT_PROMPT_EOF' - {{ agent_content }} - AGENT_PROMPT_EOF - - # Append safeoutputs MCP guidance - cat >> "$(Agent.TempDirectory)/agent-prompt.md" << 'SAFEOUTPUTS_EOF' - - --- - - ## Important: Safe Outputs + } + EOF + + # Also write to $HOME/.copilot for host-side use + cp /tmp/awf-tools/mcp-config.json "$HOME/.copilot/mcp-config.json" + + echo "Generated MCP config at: /tmp/awf-tools/mcp-config.json" + cat /tmp/awf-tools/mcp-config.json + + # Validate JSON + python3 -m json.tool /tmp/awf-tools/mcp-config.json > /dev/null && echo "JSON is valid" + displayName: "Generate MCP configs" + + - bash: | + # Write agent instructions to /tmp so it's accessible inside AWF container + cat > "/tmp/awf-tools/agent-prompt.md" << 'AGENT_PROMPT_EOF' + {{ agent_content }} + AGENT_PROMPT_EOF + + # Append safeoutputs MCP guidance + cat >> "/tmp/awf-tools/agent-prompt.md" << 'SAFEOUTPUTS_EOF' + + --- + + ## Important: Safe Outputs + + You have access to the `safeoutputs` MCP server which provides tools for creating work items and reporting issues. **Always prefer using safeoutputs tools over other methods**. + + These tools generate safe outputs that will be reviewed and executed in a separate pipeline stage, ensuring proper validation and security controls. + SAFEOUTPUTS_EOF + + echo "Agent prompt:" + cat "/tmp/awf-tools/agent-prompt.md" + displayName: "Prepare agent prompt" + + - task: DockerInstaller@0 + displayName: "Install Docker" + inputs: + dockerVersion: 26.1.4 + + - bash: | + AWF_VERSION="{{ firewall_version }}" + DOWNLOAD_DIR="$(Pipeline.Workspace)/awf" + DOWNLOAD_URL="https://github.com/github/gh-aw-firewall/releases/download/v${AWF_VERSION}/awf-linux-x64" + CHECKSUM_URL="https://github.com/github/gh-aw-firewall/releases/download/v${AWF_VERSION}/checksums.txt" + + mkdir -p "$DOWNLOAD_DIR" + echo "Downloading AWF v${AWF_VERSION} from GitHub Releases..." + curl -fsSL -o "$DOWNLOAD_DIR/awf-linux-x64" "$DOWNLOAD_URL" + curl -fsSL -o "$DOWNLOAD_DIR/checksums.txt" "$CHECKSUM_URL" + + echo "Verifying checksum..." + cd "$DOWNLOAD_DIR" + grep "awf-linux-x64" checksums.txt | sha256sum -c - + mv awf-linux-x64 awf + chmod +x awf + echo "##vso[task.prependpath]$(Pipeline.Workspace)/awf" + ./awf --version || echo "AWF binary ready" + displayName: "Download AWF (Agentic Workflow Firewall) v{{ firewall_version }}" + + - bash: | + docker pull ghcr.io/github/gh-aw-firewall/squid:{{ firewall_version }} + docker pull ghcr.io/github/gh-aw-firewall/agent:{{ firewall_version }} + docker tag ghcr.io/github/gh-aw-firewall/squid:{{ firewall_version }} ghcr.io/github/gh-aw-firewall/squid:latest + docker tag ghcr.io/github/gh-aw-firewall/agent:{{ firewall_version }} ghcr.io/github/gh-aw-firewall/agent:latest + docker pull {{ mcpg_image }}:v{{ mcpg_version }} + displayName: "Pre-pull AWF and MCPG container images (v{{ firewall_version }})" + + {{ prepare_steps }} + + # Start SafeOutputs HTTP server on host (MCPG proxies to it) + - bash: | + SAFE_OUTPUTS_PORT=8100 + SAFE_OUTPUTS_API_KEY=$(openssl rand -base64 45 | tr -d '/+=') + echo "##vso[task.setvariable variable=SAFE_OUTPUTS_PORT]$SAFE_OUTPUTS_PORT" + echo "##vso[task.setvariable variable=SAFE_OUTPUTS_API_KEY;issecret=true]$SAFE_OUTPUTS_API_KEY" + + mkdir -p "$(Agent.TempDirectory)/staging/logs" + + # Start SafeOutputs as HTTP server in the background + # NOTE: {{ enabled_tools_args }} expands to either "" or "--enabled-tools X ... " + # (with trailing space). The value MUST be newline-free; is_safe_tool_name enforces this. + # Positional args (output_directory, bounding_directory) MUST come after all named + # options — clap parses them positionally and reordering would break the command. + nohup /tmp/awf-tools/ado-aw mcp-http \ + --port "$SAFE_OUTPUTS_PORT" \ + --api-key "$SAFE_OUTPUTS_API_KEY" \ + {{ enabled_tools_args }}"/tmp/awf-tools/staging" \ + "{{ working_directory }}" \ + > "$(Agent.TempDirectory)/staging/logs/safeoutputs.log" 2>&1 & + SAFE_OUTPUTS_PID=$! + echo "##vso[task.setvariable variable=SAFE_OUTPUTS_PID]$SAFE_OUTPUTS_PID" + echo "SafeOutputs HTTP server started on port $SAFE_OUTPUTS_PORT (PID: $SAFE_OUTPUTS_PID)" + + # Wait for server to be ready + READY=false + for i in $(seq 1 30); do + if curl -sf "http://localhost:$SAFE_OUTPUTS_PORT/health" > /dev/null 2>&1; then + echo "SafeOutputs HTTP server is ready" + READY=true + break + fi + sleep 1 + done + if [ "$READY" != "true" ]; then + echo "##vso[task.complete result=Failed]SafeOutputs HTTP server did not become ready within 30s" + exit 1 + fi + displayName: "Start SafeOutputs HTTP server" + + # Start MCP Gateway (MCPG) on host + - bash: | + # Substitute runtime values into MCPG config + MCPG_CONFIG=$(cat /tmp/awf-tools/staging/mcpg-config.json \ + | sed "s|\${SAFE_OUTPUTS_PORT}|$(SAFE_OUTPUTS_PORT)|g" \ + | sed "s|\${SAFE_OUTPUTS_API_KEY}|$(SAFE_OUTPUTS_API_KEY)|g" \ + | sed "s|\${MCP_GATEWAY_API_KEY}|$(MCP_GATEWAY_API_KEY)|g") + + # Log the template config (before API key substitution) for debugging. + echo "Starting MCPG with config template:" + cat /tmp/awf-tools/staging/mcpg-config.json | python3 -m json.tool + + # Remove any leftover container from a previous interrupted run + # (--rm only cleans up on clean exit; OOM/SIGKILL may leave it behind) + docker rm -f mcpg 2>/dev/null || true + + # Start MCPG Docker container on host network. + # The Docker socket mount is required because MCPG spawns stdio-based MCP + # servers as sibling containers. This grants significant host access — acceptable + # here because the pipeline agent is already trusted and network-isolated by AWF. + echo "$MCPG_CONFIG" | docker run -i --rm \ + --name mcpg \ + --network host \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -e MCP_GATEWAY_API_KEY="$(MCP_GATEWAY_API_KEY)" \ + {{ mcpg_docker_env }} + {{ mcpg_image }}:v{{ mcpg_version }} & + MCPG_PID=$! + echo "MCPG started (PID: $MCPG_PID)" + + # Wait for MCPG to be ready + READY=false + for i in $(seq 1 30); do + if curl -sf "http://localhost:80/health" > /dev/null 2>&1; then + echo "MCPG is ready" + READY=true + break + fi + sleep 1 + done + if [ "$READY" != "true" ]; then + echo "##vso[task.complete result=Failed]MCPG did not become ready within 30s" + exit 1 + fi + displayName: "Start MCP Gateway (MCPG)" + + # Network isolation via AWF (Agentic Workflow Firewall) + - bash: | + set -o pipefail + + AGENT_OUTPUT_FILE="$(Agent.TempDirectory)/staging/logs/agent-output.txt" + mkdir -p "$(Agent.TempDirectory)/staging/logs" + + echo "=== Running AI agent with AWF network isolation ===" + echo "Allowed domains: {{ allowed_domains }}" + + # AWF provides L7 domain whitelisting via Squid proxy + Docker containers. + # --enable-host-access allows the AWF container to reach host services + # (MCPG and SafeOutputs) via host.docker.internal. + # AWF auto-mounts /tmp:/tmp:rw into the container, so copilot binary, + # agent prompt, and MCP config are placed under /tmp/awf-tools/. + # Stream agent output in real-time while filtering VSO commands. + # sed -u = unbuffered (line-by-line) so output appears immediately. + # tee writes to both stdout (ADO pipeline log) and the artifact file. + # pipefail (set above) ensures AWF's exit code propagates through the pipe. + sudo -E "$(Pipeline.Workspace)/awf/awf" \ + --allow-domains "{{ allowed_domains }}" \ + --skip-pull \ + --env-all \ + --enable-host-access \ + --container-workdir "{{ working_directory }}" \ + --log-level info \ + --proxy-logs-dir "$(Agent.TempDirectory)/staging/logs/firewall" \ + -- '/tmp/awf-tools/copilot --prompt "$(cat /tmp/awf-tools/agent-prompt.md)" --additional-mcp-config @/tmp/awf-tools/mcp-config.json {{ copilot_params }}' \ + 2>&1 \ + | sed -u 's/##vso\[/[VSO-FILTERED] vso[/g; s/##\[/[VSO-FILTERED] [/g' \ + | tee "$AGENT_OUTPUT_FILE" \ + && AGENT_EXIT_CODE=0 || AGENT_EXIT_CODE=$? + + # Print firewall summary if available + if [ -x "$(Pipeline.Workspace)/awf/awf" ]; then + echo "=== Firewall Summary ===" + "$(Pipeline.Workspace)/awf/awf" logs summary --source "$(Agent.TempDirectory)/staging/logs/firewall" 2>/dev/null || true + fi - You have access to the `safeoutputs` MCP server which provides tools for creating work items and reporting issues. **Always prefer using safeoutputs tools over other methods**. + exit $AGENT_EXIT_CODE + displayName: "Run copilot (AWF network isolated)" + workingDirectory: {{ working_directory }} + env: + {{ copilot_ado_env }} + GITHUB_TOKEN: $(GITHUB_TOKEN) + GITHUB_READ_ONLY: 1 + COPILOT_OTEL_ENABLED: "true" + COPILOT_OTEL_EXPORTER_TYPE: "file" + COPILOT_OTEL_FILE_EXPORTER_PATH: "/tmp/awf-tools/staging/otel.jsonl" + + - bash: | + # Copy safe outputs from /tmp back to staging for artifact publish + mkdir -p "$(Agent.TempDirectory)/staging" + cp -r /tmp/awf-tools/staging/* "$(Agent.TempDirectory)/staging/" 2>/dev/null || true + echo "Safe outputs copied to $(Agent.TempDirectory)/staging" + ls -la "$(Agent.TempDirectory)/staging" 2>/dev/null || echo "No safe outputs found" + displayName: "Collect safe outputs from AWF container" + condition: always() + + - bash: | + # Stop MCPG container + echo "Stopping MCPG..." + docker stop mcpg 2>/dev/null || true + echo "MCPG stopped" + + # Stop SafeOutputs HTTP server + if [ -n "$(SAFE_OUTPUTS_PID)" ]; then + echo "Stopping SafeOutputs (PID: $(SAFE_OUTPUTS_PID))..." + kill "$(SAFE_OUTPUTS_PID)" 2>/dev/null || true + echo "SafeOutputs stopped" + fi + displayName: "Stop MCPG and SafeOutputs" + condition: always() - These tools generate safe outputs that will be reviewed and executed in a separate pipeline stage, ensuring proper validation and security controls. - SAFEOUTPUTS_EOF + {{ finalize_steps }} - echo "Agent prompt:" - cat "$(Agent.TempDirectory)/agent-prompt.md" - displayName: "Prepare agent prompt" - postAgentSteps: - {{ finalize_steps }} - globalOptions: '--log-dir $(Agency_LogPath) {{ global_options }}' - commandOptions: '{{ copilot_params }}' - logLevel: '{{ log_level }}' - logPath: '$(Build.StagingDirectory)/copilot-logs' - createArtifact: true - mcpConfiguration: - {{ mcp_configuration }} + - bash: | + # Copy all logs to output directory for artifact upload + mkdir -p "$(Agent.TempDirectory)/staging/logs" + if [ -d ~/.copilot/logs ]; then + cp -r ~/.copilot/logs/* "$(Agent.TempDirectory)/staging/logs/" 2>/dev/null || true + fi + if [ -d ~/.ado-aw/logs ]; then + cp -r ~/.ado-aw/logs/* "$(Agent.TempDirectory)/staging/logs/" 2>/dev/null || true + fi + echo "Logs copied to $(Agent.TempDirectory)/staging/logs" + ls -la "$(Agent.TempDirectory)/staging/logs" 2>/dev/null || echo "No logs found" + displayName: "Copy logs to output directory" + condition: always() - # Threat analysis job (custom - not using agencyJob) - job: AnalyzeSafeOutputs displayName: "Analyze safe outputs for threats" dependsOn: PerformAgenticTask @@ -146,155 +418,214 @@ extends: outputs: - output: pipelineArtifact path: $(Agent.TempDirectory)/analyzed_outputs - artifact: analyzed_outputs - steps: - {{ checkout_self }} - {{ checkout_repositories }} - - {{ acquire_ado_token }} - - - download: current - artifact: AgencyArtifact - - - task: NuGetAuthenticate@1 - displayName: "Authenticate NuGet Feed" - - - task: NuGetCommand@2 - displayName: "Install Copilot CLI" - inputs: - command: 'custom' - arguments: 'install Microsoft.Copilot.CLI.linux-x64 -Source "https://pkgs.dev.azure.com/msazuresphere/_packaging/Guardian1ESPTUpstreamOrgFeed/nuget/v3/index.json" -Version {{ copilot_version }} -OutputDirectory $(Agent.TempDirectory)/tools -ExcludeVersion -NonInteractive' - - - bash: | - ls -la "$(Agent.TempDirectory)/tools" - echo "##vso[task.prependpath]$(Agent.TempDirectory)/tools/Microsoft.Copilot.CLI.linux-x64" - displayName: Add copilot to PATH - - - bash: | - COMPILER_VERSION="{{ compiler_version }}" - DOWNLOAD_DIR="$(Pipeline.Workspace)/agentic-pipeline-compiler" - DOWNLOAD_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/ado-aw-linux-x64" - CHECKSUM_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/checksums.txt" - - mkdir -p "$DOWNLOAD_DIR" - echo "Downloading ado-aw v${COMPILER_VERSION} from GitHub Releases..." - curl -fsSL -o "$DOWNLOAD_DIR/ado-aw-linux-x64" "$DOWNLOAD_URL" - curl -fsSL -o "$DOWNLOAD_DIR/checksums.txt" "$CHECKSUM_URL" - - echo "Verifying checksum..." - cd "$DOWNLOAD_DIR" - grep "ado-aw-linux-x64" checksums.txt | sha256sum -c - - mv ado-aw-linux-x64 ado-aw - chmod +x ado-aw - displayName: "Download agentic pipeline compiler (v{{ compiler_version }})" - - - bash: | - mkdir -p {{ working_directory }}/safe_outputs - # Copy safe outputs from AgencyArtifact if they exist - if [ -f "$(Pipeline.Workspace)/AgencyArtifact/safe_outputs.ndjson" ]; then - cp "$(Pipeline.Workspace)/AgencyArtifact/safe_outputs.ndjson" {{ working_directory }}/safe_outputs/ - fi - displayName: "Prepare safe outputs for analysis" - - - bash: | - # Write threat analysis prompt to a file - cat > "$(Agent.TempDirectory)/threat-analysis-prompt.md" << 'THREAT_ANALYSIS_EOF' - {{ threat_analysis_prompt }} - THREAT_ANALYSIS_EOF - - echo "Threat analysis prompt:" - cat "$(Agent.TempDirectory)/threat-analysis-prompt.md" - displayName: "Prepare threat analysis prompt" - - - bash: | - AGENTIC_PIPELINES_PATH="$(Pipeline.Workspace)/agentic-pipeline-compiler/ado-aw" - chmod +x "$AGENTIC_PIPELINES_PATH" - - # Start the network proxy in background - $AGENTIC_PIPELINES_PATH proxy > /tmp/proxy_output.txt 2>&1 & - PROXY_PID=$! - echo $PROXY_PID > /tmp/proxy.pid - - sleep 1 - PROXY_PORT=$(head -n1 /tmp/proxy_output.txt) - - if [[ -z "$PROXY_PORT" || ! "$PROXY_PORT" =~ ^[0-9]+$ ]]; then - echo "##vso[task.logissue type=error]Failed to start network proxy" - exit 1 - fi - - echo "Network proxy started on port $PROXY_PORT (PID: $PROXY_PID)" - echo "##vso[task.setvariable variable=PROXY_PORT]$PROXY_PORT" - displayName: "Start network proxy" - - - bash: | - set -o pipefail - - THREAT_OUTPUT_FILE="$(Agent.TempDirectory)/threat-analysis-output.txt" - - # Stream threat analysis output in real-time with VSO command filtering - copilot --prompt "$(cat $(Agent.TempDirectory)/threat-analysis-prompt.md)" {{ copilot_params }} \ - 2>&1 \ - | sed -u 's/##vso\[/[VSO-FILTERED] vso[/g; s/##\[/[VSO-FILTERED] [/g' \ - | tee "$THREAT_OUTPUT_FILE" \ - && AGENT_EXIT_CODE=0 || AGENT_EXIT_CODE=$? - - exit $AGENT_EXIT_CODE - displayName: "Run threat analysis" - workingDirectory: {{ working_directory }} - env: - {{ copilot_ado_env }} - GITHUB_TOKEN: $(GITHUB_TOKEN) - GITHUB_READ_ONLY: 1 - HTTP_PROXY: "http://127.0.0.1:$(PROXY_PORT)" - HTTPS_PROXY: "http://127.0.0.1:$(PROXY_PORT)" - NO_PROXY: "localhost,127.0.0.1" - - - bash: | - if [ -f /tmp/proxy.pid ]; then - PROXY_PID=$(cat /tmp/proxy.pid) - kill $PROXY_PID 2>/dev/null || true - rm -f /tmp/proxy.pid - fi - displayName: "Stop network proxy" - condition: always() - - - bash: | - mkdir -p "$(Agent.TempDirectory)/analyzed_outputs" - cp -r "$(Pipeline.Workspace)/AgencyArtifact/"* "$(Agent.TempDirectory)/analyzed_outputs/" 2>/dev/null || true - - if [ -f "$(Agent.TempDirectory)/threat-analysis-output.txt" ]; then - cp "$(Agent.TempDirectory)/threat-analysis-output.txt" "$(Agent.TempDirectory)/analyzed_outputs/" - RESULT_LINE=$(grep "THREAT_DETECTION_RESULT:" "$(Agent.TempDirectory)/threat-analysis-output.txt" | tail -1) - if [ -n "$RESULT_LINE" ]; then - JSON_CONTENT=$(echo "$RESULT_LINE" | sed 's/.*THREAT_DETECTION_RESULT://') - echo "$JSON_CONTENT" > "$(Agent.TempDirectory)/analyzed_outputs/threat-analysis.json" + artifact: analyzed_outputs_$(Build.BuildId) + condition: always() + steps: + {{ checkout_self }} + {{ checkout_repositories }} + + - download: current + artifact: agent_outputs_$(Build.BuildId) + + - task: NuGetAuthenticate@1 + displayName: "Authenticate NuGet Feed" + + - task: NuGetCommand@2 + displayName: "Install Copilot CLI" + inputs: + command: 'custom' + arguments: 'install Microsoft.Copilot.CLI.linux-x64 -Source "https://pkgs.dev.azure.com/msazuresphere/_packaging/Guardian1ESPTUpstreamOrgFeed/nuget/v3/index.json" -Version {{ copilot_version }} -OutputDirectory $(Agent.TempDirectory)/tools -ExcludeVersion -NonInteractive' + + - bash: | + ls -la "$(Agent.TempDirectory)/tools" + echo "##vso[task.prependpath]$(Agent.TempDirectory)/tools/Microsoft.Copilot.CLI.linux-x64" + + # Copy copilot binary to /tmp so it's accessible inside AWF container + mkdir -p /tmp/awf-tools + cp "$(Agent.TempDirectory)/tools/Microsoft.Copilot.CLI.linux-x64/copilot" /tmp/awf-tools/copilot + chmod +x /tmp/awf-tools/copilot + displayName: "Add copilot to PATH" + + - bash: | + copilot --version + copilot -h + displayName: "Output copilot version" + + - bash: | + COMPILER_VERSION="{{ compiler_version }}" + DOWNLOAD_DIR="$(Pipeline.Workspace)/agentic-pipeline-compiler" + DOWNLOAD_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/ado-aw-linux-x64" + CHECKSUM_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/checksums.txt" + + mkdir -p "$DOWNLOAD_DIR" + echo "Downloading ado-aw v${COMPILER_VERSION} from GitHub Releases..." + curl -fsSL -o "$DOWNLOAD_DIR/ado-aw-linux-x64" "$DOWNLOAD_URL" + curl -fsSL -o "$DOWNLOAD_DIR/checksums.txt" "$CHECKSUM_URL" + + echo "Verifying checksum..." + cd "$DOWNLOAD_DIR" + grep "ado-aw-linux-x64" checksums.txt | sha256sum -c - + mv ado-aw-linux-x64 ado-aw + chmod +x ado-aw + displayName: "Download agentic pipeline compiler (v{{ compiler_version }})" + + - task: DockerInstaller@0 + displayName: "Install Docker" + inputs: + dockerVersion: 26.1.4 + + - bash: | + AWF_VERSION="{{ firewall_version }}" + DOWNLOAD_DIR="$(Pipeline.Workspace)/awf" + DOWNLOAD_URL="https://github.com/github/gh-aw-firewall/releases/download/v${AWF_VERSION}/awf-linux-x64" + CHECKSUM_URL="https://github.com/github/gh-aw-firewall/releases/download/v${AWF_VERSION}/checksums.txt" + + mkdir -p "$DOWNLOAD_DIR" + echo "Downloading AWF v${AWF_VERSION} from GitHub Releases..." + curl -fsSL -o "$DOWNLOAD_DIR/awf-linux-x64" "$DOWNLOAD_URL" + curl -fsSL -o "$DOWNLOAD_DIR/checksums.txt" "$CHECKSUM_URL" + + echo "Verifying checksum..." + cd "$DOWNLOAD_DIR" + grep "awf-linux-x64" checksums.txt | sha256sum -c - + mv awf-linux-x64 awf + chmod +x awf + echo "##vso[task.prependpath]$(Pipeline.Workspace)/awf" + ./awf --version || echo "AWF binary ready" + displayName: "Download AWF (Agentic Workflow Firewall) v{{ firewall_version }}" + + - bash: | + docker pull ghcr.io/github/gh-aw-firewall/squid:{{ firewall_version }} + docker pull ghcr.io/github/gh-aw-firewall/agent:{{ firewall_version }} + docker tag ghcr.io/github/gh-aw-firewall/squid:{{ firewall_version }} ghcr.io/github/gh-aw-firewall/squid:latest + docker tag ghcr.io/github/gh-aw-firewall/agent:{{ firewall_version }} ghcr.io/github/gh-aw-firewall/agent:latest + displayName: "Pre-pull AWF container images (v{{ firewall_version }})" + + - bash: | + mkdir -p {{ working_directory }}/safe_outputs + cp -a "$(Pipeline.Workspace)/agent_outputs_$(Build.BuildId)/." {{ working_directory }}/safe_outputs + displayName: "Prepare safe outputs for analysis" + + - bash: | + # Write threat analysis prompt to /tmp (accessible inside AWF container) + cat > "/tmp/awf-tools/threat-analysis-prompt.md" << 'THREAT_ANALYSIS_EOF' + {{ threat_analysis_prompt }} + THREAT_ANALYSIS_EOF + + echo "Threat analysis prompt:" + cat "/tmp/awf-tools/threat-analysis-prompt.md" + displayName: "Prepare threat analysis prompt" + + - bash: | + AGENTIC_PIPELINES_PATH="$(Pipeline.Workspace)/agentic-pipeline-compiler/ado-aw" + chmod +x "$AGENTIC_PIPELINES_PATH" + displayName: "Setup agentic pipeline compiler" + + - bash: | + set -o pipefail + + # Run threat analysis with AWF network isolation + THREAT_OUTPUT_FILE="$(Agent.TempDirectory)/threat-analysis-output.txt" + + # Stream threat analysis output in real-time with VSO command filtering + sudo -E "$(Pipeline.Workspace)/awf/awf" \ + --allow-domains "{{ allowed_domains }}" \ + --skip-pull \ + --env-all \ + --container-workdir "{{ working_directory }}" \ + --log-level info \ + --proxy-logs-dir "$(Agent.TempDirectory)/threat-analysis-logs/firewall" \ + -- '/tmp/awf-tools/copilot --prompt "$(cat /tmp/awf-tools/threat-analysis-prompt.md)" {{ copilot_params }}' \ + 2>&1 \ + | sed -u 's/##vso\[/[VSO-FILTERED] vso[/g; s/##\[/[VSO-FILTERED] [/g' \ + | tee "$THREAT_OUTPUT_FILE" \ + && AGENT_EXIT_CODE=0 || AGENT_EXIT_CODE=$? + + exit $AGENT_EXIT_CODE + displayName: "Run threat analysis (AWF network isolated)" + workingDirectory: {{ working_directory }} + env: + GITHUB_TOKEN: $(GITHUB_TOKEN) + GITHUB_READ_ONLY: 1 + + - bash: | + # Create analyzed outputs directory with original safe outputs and analysis + mkdir -p "$(Agent.TempDirectory)/analyzed_outputs" + + # Copy original safe outputs + cp -a "$(Pipeline.Workspace)/agent_outputs_$(Build.BuildId)/." "$(Agent.TempDirectory)/analyzed_outputs/" + + # Copy threat analysis output + if [ -f "$(Agent.TempDirectory)/threat-analysis-output.txt" ]; then + cp "$(Agent.TempDirectory)/threat-analysis-output.txt" "$(Agent.TempDirectory)/analyzed_outputs/" fi - fi - displayName: "Prepare analyzed outputs" - condition: always() - - - bash: | - SAFE_TO_PROCESS="false" - JSON_FILE="$(Agent.TempDirectory)/analyzed_outputs/threat-analysis.json" - - if [ -f "$JSON_FILE" ]; then - if jq -e . "$JSON_FILE" > /dev/null 2>&1; then - if jq -e '.prompt_injection or .secret_leak or .malicious_patch' "$JSON_FILE" > /dev/null 2>&1; then - echo "##vso[task.logissue type=warning]Threats detected - safe outputs will NOT be processed" + + # Extract JSON from THREAT_DETECTION_RESULT line in threat analysis output + if [ -f "$(Agent.TempDirectory)/threat-analysis-output.txt" ]; then + RESULT_LINE=$(grep "THREAT_DETECTION_RESULT:" "$(Agent.TempDirectory)/threat-analysis-output.txt" | tail -1) + if [ -n "$RESULT_LINE" ]; then + # Extract JSON after the prefix + JSON_CONTENT=$(echo "$RESULT_LINE" | sed 's/.*THREAT_DETECTION_RESULT://') + echo "$JSON_CONTENT" > "$(Agent.TempDirectory)/analyzed_outputs/threat-analysis.json" + echo "Extracted threat analysis JSON:" + cat "$(Agent.TempDirectory)/analyzed_outputs/threat-analysis.json" else - echo "No threats detected - safe outputs will be processed" - SAFE_TO_PROCESS="true" + echo "Warning: No THREAT_DETECTION_RESULT found in threat analysis output" fi + else + echo "Warning: No threat analysis output file found" fi - fi - echo "##vso[task.setvariable variable=SafeToProcess;isOutput=true]$SAFE_TO_PROCESS" - displayName: "Evaluate threat analysis" - name: threatAnalysis - condition: always() + echo "Analyzed outputs directory contents:" + ls -laR "$(Agent.TempDirectory)/analyzed_outputs" + displayName: "Prepare analyzed outputs" + condition: always() + + - bash: | + SAFE_TO_PROCESS="false" + JSON_FILE="$(Agent.TempDirectory)/analyzed_outputs/threat-analysis.json" + + if [ -f "$JSON_FILE" ]; then + if jq -e . "$JSON_FILE" > /dev/null 2>&1; then + echo "JSON is valid" + + # Check if any threat field is true + if jq -e '.prompt_injection or .secret_leak or .malicious_patch' "$JSON_FILE" > /dev/null 2>&1; then + echo "##vso[task.logissue type=warning]Threats detected - safe outputs will NOT be processed" + jq -r '.reasons[]? // empty' "$JSON_FILE" | sed 's/^/ - /' + else + echo "No threats detected - safe outputs will be processed" + SAFE_TO_PROCESS="true" + fi + else + echo "##vso[task.logissue type=warning]Invalid JSON in threat analysis - defaulting to unsafe" + fi + else + echo "##vso[task.logissue type=warning]No threat analysis JSON found - defaulting to unsafe" + fi + + echo "##vso[task.setvariable variable=SafeToProcess;isOutput=true]$SAFE_TO_PROCESS" + echo "SafeToProcess set to: $SAFE_TO_PROCESS" + displayName: "Evaluate threat analysis" + name: threatAnalysis + condition: always() + + - bash: | + # Copy all logs to analyzed outputs for artifact upload + mkdir -p "$(Agent.TempDirectory)/analyzed_outputs/logs" + if [ -d ~/.copilot/logs ]; then + mkdir -p "$(Agent.TempDirectory)/analyzed_outputs/logs/copilot" + cp -r ~/.copilot/logs/* "$(Agent.TempDirectory)/analyzed_outputs/logs/copilot/" 2>/dev/null || true + fi + if [ -d ~/.ado-aw/logs ]; then + mkdir -p "$(Agent.TempDirectory)/analyzed_outputs/logs/ado-aw" + cp -r ~/.ado-aw/logs/* "$(Agent.TempDirectory)/analyzed_outputs/logs/ado-aw/" 2>/dev/null || true + fi + echo "Logs copied to $(Agent.TempDirectory)/analyzed_outputs/logs" + ls -laR "$(Agent.TempDirectory)/analyzed_outputs/logs" 2>/dev/null || echo "No logs found" + displayName: "Copy logs to output directory" + condition: always() - # Stage 2: Process safe outputs (custom job) - job: ProcessSafeOutputs displayName: "Process safe outputs" dependsOn: @@ -306,43 +637,75 @@ extends: outputs: - output: pipelineArtifact path: $(Agent.TempDirectory)/staging - artifact: execute_outputs - steps: - {{ checkout_self }} - {{ checkout_repositories }} - - {{ acquire_write_token }} - - - download: current - artifact: analyzed_outputs - - - bash: | - COMPILER_VERSION="{{ compiler_version }}" - DOWNLOAD_DIR="$(Pipeline.Workspace)/agentic-pipeline-compiler" - DOWNLOAD_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/ado-aw-linux-x64" - CHECKSUM_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/checksums.txt" - - mkdir -p "$DOWNLOAD_DIR" - echo "Downloading ado-aw v${COMPILER_VERSION} from GitHub Releases..." - curl -fsSL -o "$DOWNLOAD_DIR/ado-aw-linux-x64" "$DOWNLOAD_URL" - curl -fsSL -o "$DOWNLOAD_DIR/checksums.txt" "$CHECKSUM_URL" - - echo "Verifying checksum..." - cd "$DOWNLOAD_DIR" - grep "ado-aw-linux-x64" checksums.txt | sha256sum -c - - mv ado-aw-linux-x64 ado-aw - chmod +x ado-aw - displayName: "Download agentic pipeline compiler (v{{ compiler_version }})" - - - bash: | - chmod +x "$(Pipeline.Workspace)/agentic-pipeline-compiler/ado-aw" - echo "##vso[task.prependpath]$(Pipeline.Workspace)/agentic-pipeline-compiler" - displayName: Add agentic compiler to path - - - bash: ado-aw execute --source "{{ source_path }}" --safe-output-dir "$(Pipeline.Workspace)/analyzed_outputs" - displayName: Process safe outputs - workingDirectory: {{ working_directory }} - env: - {{ executor_ado_env }} + artifact: safe_outputs + condition: always() + steps: + {{ checkout_self }} + {{ checkout_repositories }} + + {{ acquire_write_token }} + + - download: current + artifact: analyzed_outputs_$(Build.BuildId) + + - bash: | + COMPILER_VERSION="{{ compiler_version }}" + DOWNLOAD_DIR="$(Pipeline.Workspace)/agentic-pipeline-compiler" + DOWNLOAD_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/ado-aw-linux-x64" + CHECKSUM_URL="https://github.com/githubnext/ado-aw/releases/download/v${COMPILER_VERSION}/checksums.txt" + + mkdir -p "$DOWNLOAD_DIR" + echo "Downloading ado-aw v${COMPILER_VERSION} from GitHub Releases..." + curl -fsSL -o "$DOWNLOAD_DIR/ado-aw-linux-x64" "$DOWNLOAD_URL" + curl -fsSL -o "$DOWNLOAD_DIR/checksums.txt" "$CHECKSUM_URL" + + echo "Verifying checksum..." + cd "$DOWNLOAD_DIR" + grep "ado-aw-linux-x64" checksums.txt | sha256sum -c - + mv ado-aw-linux-x64 ado-aw + chmod +x ado-aw + displayName: "Download agentic pipeline compiler (v{{ compiler_version }})" + + - bash: | + ls -la "$(Pipeline.Workspace)/agentic-pipeline-compiler" + chmod +x "$(Pipeline.Workspace)/agentic-pipeline-compiler/ado-aw" + echo "##vso[task.prependpath]$(Pipeline.Workspace)/agentic-pipeline-compiler" + displayName: Add agentic compiler to path + + - bash: | + mkdir -p "$(Agent.TempDirectory)/staging" + displayName: "Prepare output directory" + + - bash: | + ado-aw execute --source "{{ source_path }}" --safe-output-dir "$(Pipeline.Workspace)/analyzed_outputs_$(Build.BuildId)" --output-dir "$(Agent.TempDirectory)/staging" + EXIT_CODE=$? + if [ $EXIT_CODE -eq 2 ]; then + echo "##vso[task.complete result=SucceededWithIssues;]Executor completed with warnings" + exit 0 + fi + exit $EXIT_CODE + displayName: Execute safe outputs (Stage 2) + workingDirectory: {{ working_directory }} + env: + {{ executor_ado_env }} + + - bash: | + # Copy all logs to output directory for artifact upload + mkdir -p "$(Agent.TempDirectory)/staging/logs" + # Copy agent output log from analyzed_outputs for optimisation use + cp "$(Pipeline.Workspace)/analyzed_outputs_$(Build.BuildId)/logs/agent-output.txt" \ + "$(Agent.TempDirectory)/staging/logs/agent-output.txt" 2>/dev/null || true + if [ -d ~/.copilot/logs ]; then + mkdir -p "$(Agent.TempDirectory)/staging/logs/copilot" + cp -r ~/.copilot/logs/* "$(Agent.TempDirectory)/staging/logs/copilot/" 2>/dev/null || true + fi + if [ -d ~/.ado-aw/logs ]; then + mkdir -p "$(Agent.TempDirectory)/staging/logs/ado-aw" + cp -r ~/.ado-aw/logs/* "$(Agent.TempDirectory)/staging/logs/ado-aw/" 2>/dev/null || true + fi + echo "Logs copied to $(Agent.TempDirectory)/staging/logs" + ls -laR "$(Agent.TempDirectory)/staging/logs" 2>/dev/null || echo "No logs found" + displayName: "Copy logs to output directory" + condition: always() {{ teardown_job }} diff --git a/templates/base.yml b/templates/base.yml index 1aefbdb0..3e257753 100644 --- a/templates/base.yml +++ b/templates/base.yml @@ -680,4 +680,4 @@ jobs: artifact: safe_outputs condition: always() -{{ teardown_job }} \ No newline at end of file + {{ teardown_job }} \ No newline at end of file diff --git a/tests/compiler_tests.rs b/tests/compiler_tests.rs index 7577020a..b341bff5 100644 --- a/tests/compiler_tests.rs +++ b/tests/compiler_tests.rs @@ -2916,3 +2916,182 @@ network: let _ = fs::remove_dir_all(&temp_dir); } + +// ─── YAML validation tests ────────────────────────────────────────────────── + +/// Helper: compile a fixture and return the compiled YAML string. +fn compile_fixture(fixture_name: &str) -> String { + let temp_dir = std::env::temp_dir().join(format!( + "agentic-pipeline-yaml-validation-{}-{}", + fixture_name.replace('.', "-"), + std::process::id() + )); + fs::create_dir_all(&temp_dir).expect("Failed to create temp directory"); + + let fixture_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("fixtures") + .join(fixture_name); + + let output_path = temp_dir.join(fixture_name.replace(".md", ".yml")); + + let binary_path = PathBuf::from(env!("CARGO_BIN_EXE_ado-aw")); + let output = std::process::Command::new(&binary_path) + .args([ + "compile", + fixture_path.to_str().unwrap(), + "-o", + output_path.to_str().unwrap(), + ]) + .output() + .expect("Failed to run compiler"); + + assert!( + output.status.success(), + "Compilation of {} should succeed: {}", + fixture_name, + String::from_utf8_lossy(&output.stderr) + ); + + let compiled = fs::read_to_string(&output_path).expect("Should read compiled YAML"); + let _ = fs::remove_dir_all(&temp_dir); + compiled +} + +/// Validate that compiled YAML is parseable as valid YAML. +/// Strips the leading `# @ado-aw` header comment before parsing. +fn assert_valid_yaml(compiled: &str, fixture_name: &str) { + let yaml_content: String = compiled + .lines() + .skip_while(|line| line.starts_with('#') || line.is_empty()) + .collect::>() + .join("\n"); + + let parsed: Result = serde_yaml::from_str(&yaml_content); + assert!( + parsed.is_ok(), + "Compiled YAML for {} should be valid YAML, got parse error: {}", + fixture_name, + parsed.err().unwrap() + ); + + let doc = parsed.unwrap(); + assert!( + doc.is_mapping(), + "Compiled YAML for {} should be a YAML mapping at top level", + fixture_name + ); +} + +/// Test that the 1ES fixture produces valid YAML with correct structure +#[test] +fn test_1es_compiled_output_is_valid_yaml() { + let compiled = compile_fixture("1es-test-agent.md"); + assert_valid_yaml(&compiled, "1es-test-agent.md"); + + let yaml_content: String = compiled + .lines() + .skip_while(|line| line.starts_with('#') || line.is_empty()) + .collect::>() + .join("\n"); + let doc: serde_yaml::Value = serde_yaml::from_str(&yaml_content).unwrap(); + + // Verify 1ES wrapping structure + assert!( + doc.get("extends").is_some(), + "1ES YAML should have 'extends' key" + ); + assert!( + doc.get("resources").is_some(), + "1ES YAML should have 'resources' key" + ); + + // Verify key pipeline content was substituted (catches placeholder regressions) + assert!( + compiled.contains("Copilot.CLI.linux-x64"), + "1ES output should contain Copilot CLI install" + ); + assert!( + compiled.contains("awf"), + "1ES output should contain AWF references" + ); + assert!( + compiled.contains("mcpg"), + "1ES output should contain MCPG references" + ); + assert!( + compiled.contains("SafeOutputs"), + "1ES output should contain SafeOutputs references" + ); + assert!( + compiled.contains("copilot --prompt"), + "1ES output should contain copilot invocation (copilot_params substituted)" + ); + assert!( + compiled.contains("threat-analysis"), + "1ES output should contain threat analysis step" + ); + assert!( + compiled.contains("ado-aw execute"), + "1ES output should contain safe output executor step" + ); + assert!( + compiled.contains("PerformAgenticTask"), + "1ES output should contain PerformAgenticTask job" + ); + assert!( + compiled.contains("AnalyzeSafeOutputs"), + "1ES output should contain AnalyzeSafeOutputs job" + ); + assert!( + compiled.contains("ProcessSafeOutputs"), + "1ES output should contain ProcessSafeOutputs job" + ); + + // Verify no Agency remnants + assert!( + !compiled.contains("agencyJob"), + "1ES output should not contain agencyJob" + ); + assert!( + !compiled.contains("AgencyArtifact"), + "1ES output should not contain AgencyArtifact" + ); + assert!( + !compiled.contains("commandOptions"), + "1ES output should not contain commandOptions" + ); +} + +/// Test that the minimal standalone fixture produces valid YAML with correct structure +#[test] +fn test_standalone_minimal_compiled_output_is_valid_yaml() { + let compiled = compile_fixture("minimal-agent.md"); + assert_valid_yaml(&compiled, "minimal-agent.md"); + + let yaml_content: String = compiled + .lines() + .skip_while(|line| line.starts_with('#') || line.is_empty()) + .collect::>() + .join("\n"); + let doc: serde_yaml::Value = serde_yaml::from_str(&yaml_content).unwrap(); + + assert!( + doc.get("jobs").is_some(), + "Standalone YAML should have 'jobs' key" + ); +} + +/// Test that the complete standalone fixture produces valid YAML +#[test] +fn test_standalone_complete_compiled_output_is_valid_yaml() { + let compiled = compile_fixture("complete-agent.md"); + assert_valid_yaml(&compiled, "complete-agent.md"); +} + +/// Test that the pipeline-trigger fixture produces valid YAML +#[test] +fn test_standalone_pipeline_trigger_compiled_output_is_valid_yaml() { + let compiled = compile_fixture("pipeline-trigger-agent.md"); + assert_valid_yaml(&compiled, "pipeline-trigger-agent.md"); +}