diff --git a/AGENTS.md b/AGENTS.md index 61b4173e..3b8dd5d5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1406,6 +1406,17 @@ network: All hosts (core + MCP-specific + user-specified) are combined into a comma-separated domain list passed to AWF's `--allow-domains` flag. +#### Blocking Hosts + +The `network.blocked` field removes hosts from the combined allowlist using **exact-string matching**. Blocking `"github.com"` removes only that exact entry — it does **not** remove wildcard variants like `"*.github.com"`. To fully block a domain and its subdomains, list both the exact host and the wildcard pattern: + +```yaml +network: + blocked: + - "github.com" + - "*.github.com" +``` + ### Permissions (ADO Access Tokens) ADO does not support fine-grained permissions — there are two access levels: blanket read and blanket write. Tokens are minted from ARM service connections; `System.AccessToken` is never used for agent or executor operations. diff --git a/src/compile/standalone.rs b/src/compile/standalone.rs index 1f603e37..d28fe8d7 100644 --- a/src/compile/standalone.rs +++ b/src/compile/standalone.rs @@ -669,15 +669,15 @@ pub fn generate_mcpg_config(front_matter: &FrontMatter, inferred_org: Option<&st if let Some(container) = &opts.container { // Container-based stdio MCP (MCPG-native, per spec §3.2.1) - validate_container_image(container, name); + for w in validate_container_image(container, name) { eprintln!("{}", w); } // Validate mount paths for sensitive host directories for mount in &opts.mounts { - validate_mount_source(mount, name); + for w in validate_mount_source(mount, name) { eprintln!("{}", w); } } // Validate Docker runtime args for privilege escalation - validate_docker_args(&opts.args, name); + for w in validate_docker_args(&opts.args, name) { eprintln!("{}", w); } // Warn about potential inline secrets (check headers too in case user set both) - warn_potential_secrets(name, &opts.env, &opts.headers); + for w in warn_potential_secrets(name, &opts.env, &opts.headers) { eprintln!("{}", w); } let entrypoint_args = if opts.entrypoint_args.is_empty() { None } else { @@ -720,9 +720,9 @@ pub fn generate_mcpg_config(front_matter: &FrontMatter, inferred_org: Option<&st ); } else if let Some(url) = &opts.url { // HTTP-based MCP (remote server) - validate_mcp_url(url, name); + for w in validate_mcp_url(url, name) { eprintln!("{}", w); } // Warn about potential inline secrets in headers - warn_potential_secrets(name, &HashMap::new(), &opts.headers); + for w in warn_potential_secrets(name, &HashMap::new(), &opts.headers) { eprintln!("{}", w); } if !opts.env.is_empty() { eprintln!( "Warning: MCP '{}': env vars are not supported for HTTP MCPs — they will be ignored. \ @@ -801,54 +801,59 @@ const DANGEROUS_DOCKER_FLAGS: &[&str] = &[ /// Validate a container image name for injection attempts. /// Allows `[a-zA-Z0-9./_:-]` which covers standard Docker image references. -fn validate_container_image(image: &str, mcp_name: &str) { +fn validate_container_image(image: &str, mcp_name: &str) -> Vec { + let mut warnings = Vec::new(); if image.is_empty() { - eprintln!("Warning: MCP '{}': container image name is empty.", mcp_name); - return; + warnings.push(format!("Warning: MCP '{}': container image name is empty.", mcp_name)); + return warnings; } if !image.chars().all(|c| c.is_ascii_alphanumeric() || "._/:-@".contains(c)) { - eprintln!( + warnings.push(format!( "Warning: MCP '{}': container image '{}' contains unexpected characters. \ Image names should only contain [a-zA-Z0-9./_:-@].", mcp_name, image - ); + )); } + warnings } /// Validate a volume mount source path, warning on sensitive host directories. /// Docker socket mounts are escalated to stderr warnings since they grant container escape. /// Note: paths are lowercased for comparison to catch cross-platform casing (e.g. `/ETC/shadow`). -fn validate_mount_source(mount: &str, mcp_name: &str) { +fn validate_mount_source(mount: &str, mcp_name: &str) -> Vec { + let mut warnings = Vec::new(); // Format: "source:dest:mode" if let Some(source) = mount.split(':').next() { let source_lower = source.to_lowercase(); if source_lower.contains("docker.sock") { - eprintln!( + warnings.push(format!( "Warning: MCP '{}': mount '{}' exposes the Docker socket to the MCP container. \ This grants full host Docker access and may allow container escape.", mcp_name, mount - ); - return; + )); + return warnings; } for prefix in SENSITIVE_MOUNT_PREFIXES { // Match exact path or path with trailing separator to avoid false positives // (e.g. /etc matches /etc and /etc/shadow, but not /etc-configs) if source_lower == *prefix || source_lower.starts_with(&format!("{}/", prefix)) { - eprintln!( + warnings.push(format!( "Warning: MCP '{}': mount source '{}' references a sensitive host path ({}). \ Ensure this is intentional.", mcp_name, source, prefix - ); + )); break; } } } + warnings } /// Validate Docker runtime args for dangerous flags that could escalate privileges. /// Also detects volume mounts smuggled via `-v`/`--volume` that bypass `mounts` validation. /// Handles both `--flag=value` and `--flag value` (split) forms. -fn validate_docker_args(args: &[String], mcp_name: &str) { +fn validate_docker_args(args: &[String], mcp_name: &str) -> Vec { + let mut warnings = Vec::new(); for (i, arg) in args.iter().enumerate() { let arg_lower = arg.to_lowercase(); // Check for dangerous Docker flags (both --flag=value and --flag value) @@ -861,49 +866,59 @@ fn validate_docker_args(args: &[String], mcp_name: &str) { } else { "" }; - eprintln!( + warnings.push(format!( "Warning: MCP '{}': Docker arg '{}' grants elevated privileges. \ Ensure this is intentional.{}", mcp_name, arg, extra_hint - ); + )); } } // Check for volume mounts smuggled via args (bypasses mounts validation) if arg == "-v" || arg == "--volume" { if let Some(mount_spec) = args.get(i + 1) { - eprintln!( + warnings.push(format!( "Warning: MCP '{}': volume mount '{}' in args bypasses mounts validation. \ Use the 'mounts:' field instead.", mcp_name, mount_spec - ); - validate_mount_source(mount_spec, mcp_name); + )); + warnings.extend(validate_mount_source(mount_spec, mcp_name)); + } else { + warnings.push(format!( + "Warning: MCP '{}': '{}' flag is the last arg with no mount spec following it. \ + This is likely a malformed args list.", + mcp_name, arg + )); } } else if arg_lower.starts_with("-v=") || arg_lower.starts_with("--volume=") { let mount_spec = arg.splitn(2, '=').nth(1).unwrap_or(""); - eprintln!( + warnings.push(format!( "Warning: MCP '{}': volume mount '{}' in args bypasses mounts validation. \ Use the 'mounts:' field instead.", mcp_name, mount_spec - ); - validate_mount_source(mount_spec, mcp_name); + )); + warnings.extend(validate_mount_source(mount_spec, mcp_name)); } } + warnings } /// Validate that an MCP HTTP URL uses an allowed scheme. -fn validate_mcp_url(url: &str, mcp_name: &str) { +fn validate_mcp_url(url: &str, mcp_name: &str) -> Vec { + let mut warnings = Vec::new(); if !url.starts_with("https://") && !url.starts_with("http://") { - eprintln!( + warnings.push(format!( "Warning: MCP '{}': URL '{}' does not use http:// or https:// scheme. \ This may not work with MCPG.", mcp_name, url - ); + )); } + warnings } /// Warn when env values or headers look like they contain inline secrets. /// Secrets should use pipeline variables and passthrough ("") instead. -fn warn_potential_secrets(mcp_name: &str, env: &HashMap, headers: &HashMap) { +fn warn_potential_secrets(mcp_name: &str, env: &HashMap, headers: &HashMap) -> Vec { + let mut warnings = Vec::new(); for (key, value) in env { if !value.is_empty() && (key.to_lowercase().contains("token") || key.to_lowercase().contains("secret") @@ -911,24 +926,25 @@ fn warn_potential_secrets(mcp_name: &str, env: &HashMap, headers || key.to_lowercase().contains("password") || key.to_lowercase().contains("pat")) { - eprintln!( + warnings.push(format!( "Warning: MCP '{}': env var '{}' has an inline value that may be a secret. \ Use an empty string (\"\") for passthrough from pipeline variables instead.", mcp_name, key - ); + )); } } for (key, value) in headers { if value.to_lowercase().contains("bearer ") || key.to_lowercase() == "authorization" { - eprintln!( + warnings.push(format!( "Warning: MCP '{}': header '{}' may contain inline credentials. \ These will appear in plaintext in the compiled pipeline YAML.", mcp_name, key - ); + )); } } + warnings } /// Validate that a string is a legal environment variable name (`[A-Za-z_][A-Za-z0-9_]*`). @@ -1746,4 +1762,361 @@ mod tests { "Should include ADO token passthrough when permissions.read is set" ); } + + // ─── validate_docker_args ──────────────────────────────────────────────── + + #[test] + fn test_validate_docker_args_privileged_flag() { + let warnings = validate_docker_args(&["--privileged".to_string()], "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("--privileged"), "should warn about --privileged"); + } + + #[test] + fn test_validate_docker_args_entrypoint_in_args_warns() { + let warnings = validate_docker_args( + &[ + "--entrypoint".to_string(), + "/bin/sh".to_string(), + ], + "my-mcp", + ); + assert!(warnings.iter().any(|w| w.contains("--entrypoint") && w.contains("entrypoint:")), + "should warn about --entrypoint with hint to use entrypoint: field"); + } + + #[test] + fn test_validate_docker_args_volume_flag_calls_mount_validation() { + // -v docker.sock in args bypasses `mounts:` validation; should produce warnings + let warnings = validate_docker_args( + &[ + "-v".to_string(), + "/var/run/docker.sock:/var/run/docker.sock".to_string(), + ], + "my-mcp", + ); + assert!(warnings.iter().any(|w| w.contains("bypasses mounts validation")), + "should warn about volume mount in args"); + assert!(warnings.iter().any(|w| w.contains("Docker socket")), + "should propagate mount source warning for docker.sock"); + } + + #[test] + fn test_validate_docker_args_volume_equals_form() { + // --volume=source:dest form should also be detected + let warnings = validate_docker_args( + &["--volume=/var/run/docker.sock:/var/run/docker.sock".to_string()], + "my-mcp", + ); + assert!(warnings.iter().any(|w| w.contains("bypasses mounts validation")), + "should warn about --volume= form"); + } + + #[test] + fn test_validate_docker_args_safe_args_no_warnings() { + // A legitimate arg like --read-only should produce no warnings + let warnings = validate_docker_args(&["--read-only".to_string()], "my-mcp"); + assert!(warnings.is_empty(), "safe args should not produce warnings"); + } + + #[test] + fn test_validate_docker_args_empty_no_warnings() { + let warnings = validate_docker_args(&[], "my-mcp"); + assert!(warnings.is_empty(), "empty args should not produce warnings"); + } + + #[test] + fn test_validate_docker_args_volume_flag_trailing_warns() { + // -v as the last arg with no mount spec is malformed + let warnings = validate_docker_args(&["-v".to_string()], "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("malformed"), "trailing -v with no mount spec should warn"); + } + + #[test] + fn test_validate_docker_args_long_volume_flag_trailing_warns() { + // --volume as the last arg with no mount spec is malformed + let warnings = validate_docker_args(&["--volume".to_string()], "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("malformed"), "trailing --volume with no mount spec should warn"); + } + + // ─── validate_mcp_url ──────────────────────────────────────────────────── + + #[test] + fn test_validate_mcp_url_https_no_warnings() { + let warnings = validate_mcp_url("https://mcp.dev.azure.com/myorg", "my-mcp"); + assert!(warnings.is_empty(), "https URL should not produce warnings"); + } + + #[test] + fn test_validate_mcp_url_http_no_warnings() { + let warnings = validate_mcp_url("http://localhost:8100/mcp", "my-mcp"); + assert!(warnings.is_empty(), "http URL should not produce warnings"); + } + + #[test] + fn test_validate_mcp_url_bad_scheme_warns() { + let warnings = validate_mcp_url("ftp://files.example.com", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("does not use http://"), "non-HTTP scheme should warn"); + } + + #[test] + fn test_validate_mcp_url_no_scheme_warns() { + let warnings = validate_mcp_url("mcp.dev.azure.com/myorg", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("does not use http://"), "URL without scheme should warn"); + } + + // ─── validate_mount_source ─────────────────────────────────────────────── + + #[test] + fn test_validate_mount_source_docker_sock() { + let warnings = validate_mount_source("/var/run/docker.sock:/var/run/docker.sock:rw", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("Docker socket"), "should warn about Docker socket exposure"); + } + + #[test] + fn test_validate_mount_source_sensitive_path_etc() { + let warnings = validate_mount_source("/etc/passwd:/data/passwd:ro", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("sensitive host path"), "should warn about /etc mount"); + } + + #[test] + fn test_validate_mount_source_sensitive_path_proc() { + let warnings = validate_mount_source("/proc:/host/proc:ro", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("sensitive host path"), "should warn about /proc mount"); + } + + #[test] + fn test_validate_mount_source_case_insensitive() { + // /ETC/shadow should match sensitive /etc prefix (lowercased comparison) + let warnings = validate_mount_source("/ETC/shadow:/data/shadow:ro", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("sensitive host path"), "case-insensitive match should trigger warning"); + } + + #[test] + fn test_validate_mount_source_no_false_positive_on_etc_configs() { + // /etc-configs should NOT match the /etc prefix (path boundary check requires trailing /) + let warnings = validate_mount_source("/etc-configs:/app/config:ro", "my-mcp"); + assert!(warnings.is_empty(), "/etc-configs must not match /etc prefix due to path boundary check"); + } + + #[test] + fn test_validate_mount_source_safe_path_no_warnings() { + // /app/data is not a sensitive path; should produce no warnings + let warnings = validate_mount_source("/app/data:/app/data:ro", "my-mcp"); + assert!(warnings.is_empty(), "safe path should not produce warnings"); + } + + // ─── validate_container_image ──────────────────────────────────────────── + + #[test] + fn test_validate_container_image_empty_string() { + let warnings = validate_container_image("", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("empty"), "should warn about empty image name"); + } + + #[test] + fn test_validate_container_image_shell_metacharacters() { + let warnings = validate_container_image("node:20-slim; rm -rf /", "my-mcp"); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("unexpected characters"), "should warn about shell metacharacters"); + } + + #[test] + fn test_validate_container_image_valid_name_no_warnings() { + // Standard image references should produce no warnings + assert!(validate_container_image("node:20-slim", "my-mcp").is_empty()); + assert!(validate_container_image("ghcr.io/org/image:latest", "my-mcp").is_empty()); + assert!(validate_container_image("python:3.12-slim", "my-mcp").is_empty()); + } + + // ─── warn_potential_secrets ────────────────────────────────────────────── + + #[test] + fn test_warn_potential_secrets_token_env_var_triggers() { + let env = HashMap::from([("API_TOKEN".to_string(), "secret123".to_string())]); + let headers = HashMap::new(); + let warnings = warn_potential_secrets("my-mcp", &env, &headers); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("API_TOKEN"), "should warn about secret-looking env var"); + } + + #[test] + fn test_warn_potential_secrets_empty_passthrough_no_warnings() { + // Empty string = passthrough; should NOT trigger a warning + let env = HashMap::from([("API_TOKEN".to_string(), "".to_string())]); + let headers = HashMap::new(); + let warnings = warn_potential_secrets("my-mcp", &env, &headers); + assert!(warnings.is_empty(), "empty passthrough value must not trigger a warning"); + } + + #[test] + fn test_warn_potential_secrets_authorization_header_triggers() { + let env = HashMap::new(); + let headers = + HashMap::from([("Authorization".to_string(), "Bearer abc".to_string())]); + let warnings = warn_potential_secrets("my-mcp", &env, &headers); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("Authorization"), "should warn about Authorization header"); + } + + #[test] + fn test_warn_potential_secrets_bearer_value_triggers() { + // A header whose value starts with "Bearer " should also warn + let env = HashMap::new(); + let headers = + HashMap::from([("X-Custom-Auth".to_string(), "Bearer token123".to_string())]); + let warnings = warn_potential_secrets("my-mcp", &env, &headers); + assert_eq!(warnings.len(), 1); + assert!(warnings[0].contains("X-Custom-Auth"), "should warn about header with Bearer value"); + } + + #[test] + fn test_warn_potential_secrets_safe_env_no_warnings() { + // Env keys with non-secret names and non-empty values should produce no warnings + let env = HashMap::from([("MY_CONFIG".to_string(), "value".to_string())]); + let headers = HashMap::new(); + let warnings = warn_potential_secrets("my-mcp", &env, &headers); + assert!(warnings.is_empty(), "non-secret env var should not produce warnings"); + } + + // ─── generate_allowed_domains ──────────────────────────────────────────── + + #[test] + fn test_generate_allowed_domains_blocked_takes_precedence_over_allow() { + let mut fm = minimal_front_matter(); + fm.network = Some(crate::compile::types::NetworkConfig { + allow: vec!["evil.example.com".to_string()], + blocked: vec!["evil.example.com".to_string()], + }); + let domains = generate_allowed_domains(&fm).unwrap(); + assert!( + !domains.contains("evil.example.com"), + "blocked host must be excluded even if also in allow" + ); + } + + #[test] + fn test_generate_allowed_domains_host_docker_internal_always_present() { + let fm = minimal_front_matter(); + let domains = generate_allowed_domains(&fm).unwrap(); + assert!( + domains.contains("host.docker.internal"), + "host.docker.internal must always be in the allowlist" + ); + } + + #[test] + fn test_generate_allowed_domains_user_allow_host_included() { + let mut fm = minimal_front_matter(); + fm.network = Some(crate::compile::types::NetworkConfig { + allow: vec!["api.mycompany.com".to_string()], + blocked: vec![], + }); + let domains = generate_allowed_domains(&fm).unwrap(); + assert!( + domains.contains("api.mycompany.com"), + "user-specified allow host must be present in the allowlist" + ); + } + + #[test] + fn test_generate_allowed_domains_blocked_core_host_removed() { + // Blocking a core host (e.g. github.com) via network.blocked removes it. + // Note: blocking uses exact-string removal — blocking "github.com" does NOT + // also remove wildcard variants like "*.github.com". This is intentional. + let mut fm = minimal_front_matter(); + fm.network = Some(crate::compile::types::NetworkConfig { + allow: vec![], + blocked: vec!["github.com".to_string()], + }); + let domains = generate_allowed_domains(&fm).unwrap(); + let domain_list: Vec<&str> = domains.split(',').collect(); + assert!( + !domain_list.contains(&"github.com"), + "blocked host must be removed even if it is in the core allowlist" + ); + } + + #[test] + fn test_generate_allowed_domains_invalid_host_returns_error() { + let mut fm = minimal_front_matter(); + fm.network = Some(crate::compile::types::NetworkConfig { + allow: vec!["bad host!".to_string()], + blocked: vec![], + }); + let result = generate_allowed_domains(&fm); + assert!(result.is_err(), "invalid DNS characters should return an error"); + } + + // ─── generate_prepare_steps ────────────────────────────────────────────── + + #[test] + fn test_generate_prepare_steps_with_memory_includes_memory_preamble() { + let result = generate_prepare_steps(&[], true); + assert!( + !result.is_empty(), + "memory steps must be emitted when has_memory=true" + ); + assert!( + result.contains("agent_memory"), + "should reference memory directory" + ); + } + + #[test] + fn test_generate_prepare_steps_without_memory_and_no_steps_is_empty() { + let result = generate_prepare_steps(&[], false); + assert!(result.is_empty(), "no steps and no memory should produce empty output"); + } + + #[test] + fn test_generate_prepare_steps_with_memory_includes_download_and_prompt() { + let result = generate_prepare_steps(&[], true); + assert!( + result.contains("DownloadPipelineArtifact"), + "memory steps must include the artifact download task" + ); + assert!( + result.contains("Agent Memory"), + "memory steps must include the memory prompt" + ); + } + + #[test] + fn test_generate_prepare_steps_without_memory_with_user_steps() { + // User-supplied prepare steps without memory should be included as-is + let step: serde_yaml::Value = + serde_yaml::from_str("bash: echo hello\ndisplayName: greet").unwrap(); + let result = generate_prepare_steps(&[step], false); + assert!(!result.is_empty(), "user steps should be present"); + assert!( + !result.contains("agent_memory"), + "no memory reference when has_memory=false" + ); + } + + #[test] + fn test_generate_prepare_steps_with_memory_and_user_steps() { + // When memory is enabled AND user steps are present, both should appear + let step: serde_yaml::Value = + serde_yaml::from_str("bash: echo hello\ndisplayName: greet").unwrap(); + let result = generate_prepare_steps(&[step], true); + assert!( + result.contains("agent_memory"), + "memory reference must be present" + ); + assert!( + result.contains("echo hello"), + "user step must also be present" + ); + } }