From d0148925dcd372d5ede2753a32a4a0d5664dc80b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 8 May 2026 19:57:07 +0000 Subject: [PATCH 01/12] feat(compile): add compact `repos:` front-matter syntax Replace the verbose `repositories:` + `checkout:` pair with a single `repos:` block. Each entry can be a string shorthand ("org/repo" or "alias=org/repo") or an object with name/alias/type/ref/checkout fields. Checkout defaults to true. Legacy fields remain supported with a deprecation warning; mixing the two is rejected at compile time. Agent-Logs-Url: https://github.com/githubnext/ado-aw/sessions/c9ae0fbb-1f1c-4874-b990-16fd3b4bc3e3 Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com> --- docs/front-matter.md | 95 +++++++++--- src/compile/common.rs | 353 +++++++++++++++++++++++++++++++++++++++++- src/compile/mod.rs | 13 +- src/compile/types.rs | 104 ++++++++++++- src/main.rs | 8 +- 5 files changed, 550 insertions(+), 23 deletions(-) diff --git a/docs/front-matter.md b/docs/front-matter.md index cdcf9eb0..5f90c971 100644 --- a/docs/front-matter.md +++ b/docs/front-matter.md @@ -16,20 +16,17 @@ engine: copilot # Engine identifier. Defaults to copilot. Currently only 'copilo # id: copilot # model: claude-opus-4.7 # timeout-minutes: 30 -workspace: repo # Optional: "root", "repo" (alias: "self"), or a checked-out repository alias. If not specified, defaults to "root" when no additional repositories are listed in `checkout:`, and to "repo" when one or more additional repos are checked out. See "Workspace Defaults" below. +workspace: repo # Optional: "root", "repo" (alias: "self"), or a checked-out repository alias. If not specified, defaults to "root" when no additional repositories are listed in `repos:`, and to "repo" when one or more additional repos are checked out. See "Workspace Defaults" below. pool: AZS-1ES-L-MMS-ubuntu-22.04 # Agent pool name (string format). Defaults to AZS-1ES-L-MMS-ubuntu-22.04. # pool: # Alternative object format (required for 1ES if specifying os) # name: AZS-1ES-L-MMS-ubuntu-22.04 # os: linux # Operating system: "linux" or "windows". Defaults to "linux". -repositories: # a list of repository resources available to the pipeline (for pre/post jobs, templates, etc.) - - repository: reponame - type: git - name: my-org/my-repo - - repository: another-repo - type: git - name: my-org/another-repo -checkout: # optional list of repository aliases for the agent to checkout and work with (must be subset of repositories) - - reponame # only checkout reponame, not another-repo +repos: # compact repository declarations (replaces repositories: + checkout:) + - my-org/my-repo # shorthand: alias="my-repo", type=git, ref=refs/heads/main, checkout=true + - reponame=my-org/another-repo # shorthand with explicit alias + - name: my-org/templates # object form for full control + ref: refs/heads/release/2.x + checkout: false # declared as resource only, not checked out by the agent tools: # optional tool configuration bash: ["cat", "ls", "grep"] # bash command allow-list (defaults to safe built-in list) edit: true # enable file editing tool (default: true) @@ -152,19 +149,81 @@ Build the project and run all tests... ## Workspace Defaults The `workspace:` field controls which directory the agent runs in. When it is -not set explicitly, the compiler chooses a default based on the `checkout:` -list: +not set explicitly, the compiler chooses a default based on which repositories +are checked out (entries in `repos:` with `checkout: true`, which is the +default): -- If `checkout:` is empty (i.e. only the pipeline's own repository is checked - out via the implicit `self`), `workspace:` defaults to **`root`** — the - agent runs in the pipeline's working directory root. -- If `checkout:` lists one or more additional repository aliases, - `workspace:` defaults to **`repo`** — the agent runs inside the first - checked-out repository's directory. +- If no additional repositories are checked out (i.e. only the pipeline's own + repository is checked out via the implicit `self`), `workspace:` defaults to + **`root`** — the agent runs in the pipeline's working directory root. +- If one or more additional repositories are checked out, `workspace:` defaults + to **`repo`** — the agent runs inside the trigger repository's directory. Set `workspace:` explicitly to `root`, `repo` (alias `self`), or a specific checked-out repository alias to override this behavior. +## Repositories (`repos:`) + +The `repos:` field provides a compact way to declare additional repository +resources and control which ones the agent checks out. It replaces the legacy +`repositories:` + `checkout:` pair. + +Each entry can be: + +| Form | Syntax | Description | +|------|--------|-------------| +| **Shorthand** | `- org/repo` | Alias derived from last segment, type=git, ref=refs/heads/main, checkout=true | +| **Shorthand with alias** | `- alias=org/repo` | Explicit alias before `=` | +| **Object** | `- name: org/repo` | Full control over all fields | + +Object fields: + +| Field | Default | Description | +|------------|------------------------|-------------| +| `name` | *(required)* | Full `org/repo` name (maps to ADO `name:`) | +| `alias` | last segment of `name` | Repository alias (maps to ADO `repository:`) | +| `type` | `git` | ADO repository resource type | +| `ref` | `refs/heads/main` | Branch or tag reference | +| `checkout` | `true` | Whether the agent job clones this repo | + +### Examples + +Three repos, all checked out (most common case): + +```yaml +repos: + - my-org/tools + - my-org/schemas + - my-org/docs +``` + +Mixed: two checked out, one resource-only (used by templates): + +```yaml +repos: + - my-org/tools + - my-org/schemas + - name: my-org/pipeline-templates + checkout: false +``` + +Custom ref and explicit alias: + +```yaml +repos: + - name: my-org/docs + alias: docs-v2 + ref: refs/heads/release/2.x +``` + +### Legacy syntax (deprecated) + +The legacy `repositories:` + `checkout:` fields still work but emit a +deprecation warning. They cannot be mixed with `repos:`. Migrate by +converting each `repositories:` entry into a `repos:` entry — any entry +that appeared in `checkout:` keeps the default `checkout: true`; any that +did not should set `checkout: false`. + ## Filter Validation The compiler validates filter configurations at compile time and will emit diff --git a/src/compile/common.rs b/src/compile/common.rs index b64590a1..aa0b1d50 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -4,7 +4,7 @@ use anyhow::{Context, Result}; use std::collections::{HashMap, HashSet}; use std::path::Path; -use super::types::{FrontMatter, OnConfig, PipelineParameter, Repository}; +use super::types::{FrontMatter, OnConfig, PipelineParameter, Repository, ReposItem}; use super::extensions::{CompilerExtension, Extension, McpgServerConfig, McpgGatewayConfig, McpgConfig, CompileContext}; use crate::compile::types::McpConfig; use crate::fuzzy_schedule; @@ -333,6 +333,140 @@ pub fn generate_checkout_self() -> String { "- checkout: self".to_string() } +// ────────────────────────────────────────────────────────────────────────────── +// Compact `repos:` lowering +// ────────────────────────────────────────────────────────────────────────────── + +/// Lower a `repos:` list into the internal `(Vec, Vec)` pair +/// consumed by the rest of the compiler. Also validates aliases for collisions. +pub fn lower_repos(items: &[ReposItem]) -> Result<(Vec, Vec)> { + let mut repositories: Vec = Vec::new(); + let mut checkout: Vec = Vec::new(); + let mut seen_aliases: HashSet = HashSet::new(); + + for item in items { + let (name, alias, repo_type, repo_ref, do_checkout) = match item { + ReposItem::Shorthand(s) => { + let (alias, name) = parse_shorthand(s)?; + (name, alias, "git".to_string(), "refs/heads/main".to_string(), true) + } + ReposItem::Full(entry) => { + let alias = match &entry.alias { + Some(a) => a.clone(), + None => derive_alias(&entry.name)?, + }; + ( + entry.name.clone(), + alias, + entry.repo_type.clone(), + entry.repo_ref.clone(), + entry.checkout, + ) + } + }; + + // Reject duplicate aliases + if !seen_aliases.insert(alias.clone()) { + anyhow::bail!( + "Duplicate repository alias '{}' in repos. \ + Use the `alias` field (or `alias=org/repo` shorthand) to disambiguate.", + alias + ); + } + + // Reject reserved names + if RESERVED_WORKSPACE_NAMES.contains(&alias.as_str()) { + anyhow::bail!( + "Repository alias '{}' is reserved by the 'workspace:' resolver ({:?}). \ + Rename the alias to avoid ambiguity.", + alias, + RESERVED_WORKSPACE_NAMES + ); + } + + repositories.push(Repository { + repository: alias.clone(), + repo_type, + name, + repo_ref, + }); + + if do_checkout { + checkout.push(alias); + } + } + + Ok((repositories, checkout)) +} + +/// Parse a shorthand string: `"org/repo"` → (derived alias, name), or +/// `"alias=org/repo"` → (alias, name). +fn parse_shorthand(s: &str) -> Result<(String, String)> { + if let Some((alias, name)) = s.split_once('=') { + let alias = alias.trim().to_string(); + let name = name.trim().to_string(); + if alias.is_empty() { + anyhow::bail!("repos shorthand '{}' has an empty alias before '='", s); + } + if name.is_empty() { + anyhow::bail!("repos shorthand '{}' has an empty name after '='", s); + } + Ok((alias, name)) + } else { + let alias = derive_alias(s)?; + Ok((alias, s.to_string())) + } +} + +/// Derive the alias from a full `org/repo` name (last path segment). +fn derive_alias(name: &str) -> Result { + let alias = name + .rsplit('/') + .next() + .unwrap_or(name) + .to_string(); + if alias.is_empty() { + anyhow::bail!( + "Cannot derive a repository alias from '{}'. \ + Provide an explicit `alias` field.", + name + ); + } + Ok(alias) +} + +/// Resolve the `repos:` / legacy `repositories:` + `checkout:` fields in a +/// `FrontMatter` into the canonical `(Vec, Vec)` pair. +/// +/// - If `repos:` is non-empty, the legacy fields must be empty (mixing is rejected). +/// - If `repos:` is empty, legacy fields are used as-is (with a deprecation warning +/// when they are non-empty). +/// - If both are empty, returns `(vec![], vec![])`. +pub fn resolve_repos(front_matter: &FrontMatter) -> Result<(Vec, Vec)> { + let has_new = !front_matter.repos.is_empty(); + let has_legacy = !front_matter.repositories.is_empty() || !front_matter.checkout.is_empty(); + + if has_new && has_legacy { + anyhow::bail!( + "Cannot mix `repos:` with legacy `repositories:` / `checkout:`. \ + Migrate to `repos:` or remove it to keep using the legacy fields." + ); + } + + if has_new { + lower_repos(&front_matter.repos) + } else { + if has_legacy { + eprintln!( + "Warning: `repositories:` and `checkout:` are deprecated. \ + Use the compact `repos:` syntax instead. \ + See docs/front-matter.md for migration guidance." + ); + } + Ok((front_matter.repositories.clone(), front_matter.checkout.clone())) + } +} + /// Names that are reserved by the `workspace:` resolver and therefore cannot /// be used as repository aliases / `checkout:` entries. If a user defines a /// repository named `repo` and writes `workspace: repo`, the special-cased @@ -5493,4 +5627,221 @@ mod tests { assert!(out.contains("name: org/repo-a"), "out: {out}"); assert!(out.contains("ref: refs/heads/develop"), "out: {out}"); } + + // ────────────────────────────────────────────────────────────────────── + // Tests for compact `repos:` lowering + // ────────────────────────────────────────────────────────────────────── + + use super::{lower_repos, resolve_repos, parse_shorthand, derive_alias}; + use crate::compile::types::{ReposItem, RepoEntry}; + + #[test] + fn test_repos_shorthand_simple() { + let items = vec![ReposItem::Shorthand("my-org/my-repo".to_string())]; + let (repos, checkout) = lower_repos(&items).unwrap(); + assert_eq!(repos.len(), 1); + assert_eq!(repos[0].repository, "my-repo"); + assert_eq!(repos[0].name, "my-org/my-repo"); + assert_eq!(repos[0].repo_type, "git"); + assert_eq!(repos[0].repo_ref, "refs/heads/main"); + assert_eq!(checkout, vec!["my-repo"]); + } + + #[test] + fn test_repos_shorthand_with_alias() { + let items = vec![ReposItem::Shorthand("schemas=my-org/internal-schemas".to_string())]; + let (repos, checkout) = lower_repos(&items).unwrap(); + assert_eq!(repos.len(), 1); + assert_eq!(repos[0].repository, "schemas"); + assert_eq!(repos[0].name, "my-org/internal-schemas"); + assert_eq!(checkout, vec!["schemas"]); + } + + #[test] + fn test_repos_object_form_defaults() { + let items = vec![ReposItem::Full(RepoEntry { + name: "my-org/docs".to_string(), + alias: None, + repo_type: "git".to_string(), + repo_ref: "refs/heads/main".to_string(), + checkout: true, + })]; + let (repos, checkout) = lower_repos(&items).unwrap(); + assert_eq!(repos[0].repository, "docs"); + assert_eq!(repos[0].name, "my-org/docs"); + assert_eq!(checkout, vec!["docs"]); + } + + #[test] + fn test_repos_object_form_no_checkout() { + let items = vec![ReposItem::Full(RepoEntry { + name: "my-org/big-monorepo".to_string(), + alias: None, + repo_type: "git".to_string(), + repo_ref: "refs/heads/main".to_string(), + checkout: false, + })]; + let (repos, checkout) = lower_repos(&items).unwrap(); + assert_eq!(repos.len(), 1); + assert_eq!(repos[0].repository, "big-monorepo"); + assert!(checkout.is_empty()); + } + + #[test] + fn test_repos_object_form_custom_ref() { + let items = vec![ReposItem::Full(RepoEntry { + name: "my-org/docs".to_string(), + alias: Some("docs-v2".to_string()), + repo_type: "git".to_string(), + repo_ref: "refs/heads/release/2.x".to_string(), + checkout: true, + })]; + let (repos, checkout) = lower_repos(&items).unwrap(); + assert_eq!(repos[0].repository, "docs-v2"); + assert_eq!(repos[0].repo_ref, "refs/heads/release/2.x"); + assert_eq!(checkout, vec!["docs-v2"]); + } + + #[test] + fn test_repos_rejects_duplicate_aliases() { + let items = vec![ + ReposItem::Shorthand("org/tools".to_string()), + ReposItem::Shorthand("other-org/tools".to_string()), + ]; + let err = lower_repos(&items).unwrap_err(); + assert!(err.to_string().contains("Duplicate repository alias 'tools'"), "{err}"); + } + + #[test] + fn test_repos_rejects_reserved_alias() { + let items = vec![ReposItem::Shorthand("org/self".to_string())]; + let err = lower_repos(&items).unwrap_err(); + assert!(err.to_string().contains("reserved"), "{err}"); + + let items = vec![ReposItem::Shorthand("org/repo".to_string())]; + let err = lower_repos(&items).unwrap_err(); + assert!(err.to_string().contains("reserved"), "{err}"); + } + + #[test] + fn test_repos_multiple_mixed() { + let items = vec![ + ReposItem::Shorthand("my-org/tools".to_string()), + ReposItem::Shorthand("schemas=my-org/internal-schemas".to_string()), + ReposItem::Full(RepoEntry { + name: "my-org/templates".to_string(), + alias: None, + repo_type: "git".to_string(), + repo_ref: "refs/heads/main".to_string(), + checkout: false, + }), + ]; + let (repos, checkout) = lower_repos(&items).unwrap(); + assert_eq!(repos.len(), 3); + assert_eq!(checkout, vec!["tools", "schemas"]); + assert_eq!(repos[2].repository, "templates"); + } + + #[test] + fn test_parse_shorthand_simple() { + let (alias, name) = parse_shorthand("org/my-repo").unwrap(); + assert_eq!(alias, "my-repo"); + assert_eq!(name, "org/my-repo"); + } + + #[test] + fn test_parse_shorthand_with_equals() { + let (alias, name) = parse_shorthand("custom=org/my-repo").unwrap(); + assert_eq!(alias, "custom"); + assert_eq!(name, "org/my-repo"); + } + + #[test] + fn test_parse_shorthand_empty_alias_rejected() { + let err = parse_shorthand("=org/repo").unwrap_err(); + assert!(err.to_string().contains("empty alias"), "{err}"); + } + + #[test] + fn test_derive_alias_basic() { + assert_eq!(derive_alias("org/my-repo").unwrap(), "my-repo"); + assert_eq!(derive_alias("my-repo").unwrap(), "my-repo"); + assert_eq!(derive_alias("a/b/c").unwrap(), "c"); + } + + #[test] + fn test_resolve_repos_rejects_mixing() { + use crate::compile::types::FrontMatter; + let yaml = r#" +name: "test" +description: "test" +repos: + - my-org/tools +repositories: + - repository: foo + type: git + name: org/foo +"#; + let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); + let err = resolve_repos(&fm).unwrap_err(); + assert!(err.to_string().contains("Cannot mix"), "{err}"); + } + + #[test] + fn test_resolve_repos_empty() { + use crate::compile::types::FrontMatter; + let yaml = r#" +name: "test" +description: "test" +"#; + let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); + let (repos, checkout) = resolve_repos(&fm).unwrap(); + assert!(repos.is_empty()); + assert!(checkout.is_empty()); + } + + #[test] + fn test_resolve_repos_compact_syntax() { + use crate::compile::types::FrontMatter; + let yaml = r#" +name: "test" +description: "test" +repos: + - my-org/tools + - schemas=my-org/internal-schemas + - name: my-org/docs + ref: refs/heads/v2 + checkout: false +"#; + let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); + let (repos, checkout) = resolve_repos(&fm).unwrap(); + assert_eq!(repos.len(), 3); + assert_eq!(repos[0].repository, "tools"); + assert_eq!(repos[0].name, "my-org/tools"); + assert_eq!(repos[1].repository, "schemas"); + assert_eq!(repos[1].name, "my-org/internal-schemas"); + assert_eq!(repos[2].repository, "docs"); + assert_eq!(repos[2].repo_ref, "refs/heads/v2"); + assert_eq!(checkout, vec!["tools", "schemas"]); + } + + #[test] + fn test_resolve_repos_legacy_compat() { + use crate::compile::types::FrontMatter; + let yaml = r#" +name: "test" +description: "test" +repositories: + - repository: tools + type: git + name: my-org/tools +checkout: + - tools +"#; + let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); + let (repos, checkout) = resolve_repos(&fm).unwrap(); + assert_eq!(repos.len(), 1); + assert_eq!(repos[0].repository, "tools"); + assert_eq!(checkout, vec!["tools"]); + } } diff --git a/src/compile/mod.rs b/src/compile/mod.rs index bad700eb..a6a1ea33 100644 --- a/src/compile/mod.rs +++ b/src/compile/mod.rs @@ -7,6 +7,7 @@ //! - **1ES**: Integration with 1ES Pipeline Templates for SDL compliance mod common; +pub(crate) use common::resolve_repos; pub mod extensions; pub(crate) mod filter_ir; mod gitattributes; @@ -95,9 +96,14 @@ async fn compile_pipeline_inner( debug!("Target: {:?}", front_matter.target); debug!("Engine: {} (model: {})", front_matter.engine.engine_id(), front_matter.engine.model().unwrap_or("default")); debug!("Schedule: {:?}", front_matter.schedule()); - debug!("Repositories: {}", front_matter.repositories.len()); debug!("MCP servers configured: {}", front_matter.mcp_servers.len()); + // Resolve repos: new compact syntax or legacy repositories: + checkout: + let (resolved_repos, resolved_checkout) = common::resolve_repos(&front_matter)?; + front_matter.repositories = resolved_repos; + front_matter.checkout = resolved_checkout; + debug!("Repositories: {}", front_matter.repositories.len()); + // Validate checkout list against repositories common::validate_checkout_list(&front_matter.repositories, &front_matter.checkout)?; @@ -354,6 +360,11 @@ pub async fn check_pipeline(pipeline_path: &str) -> Result<()> { use crate::sanitize::SanitizeConfig; front_matter.sanitize_config_fields(); + // Resolve repos (compact or legacy) + let (resolved_repos, resolved_checkout) = common::resolve_repos(&front_matter)?; + front_matter.repositories = resolved_repos; + front_matter.checkout = resolved_checkout; + common::validate_checkout_list(&front_matter.repositories, &front_matter.checkout)?; let compiler: Box = match front_matter.target { diff --git a/src/compile/types.rs b/src/compile/types.rs index 46ddb8c0..66594225 100644 --- a/src/compile/types.rs +++ b/src/compile/types.rs @@ -602,10 +602,14 @@ pub struct FrontMatter { /// Runtime configuration for language environments (e.g., Lean 4) #[serde(default)] pub runtimes: Option, - /// Additional repository resources + /// Compact repository declarations (new unified syntax). + /// Each entry declares a repository resource and optionally whether to check it out. + #[serde(default)] + pub repos: Vec, + /// Additional repository resources (legacy — use `repos:` instead) #[serde(default)] pub repositories: Vec, - /// Repositories to checkout (subset of repositories) + /// Repositories to checkout (legacy — use `repos:` instead; subset of repositories) #[serde(default)] pub checkout: Vec, /// MCP server configurations @@ -697,6 +701,9 @@ impl SanitizeConfigTrait for FrontMatter { if let Some(ref mut r) = self.runtimes { r.sanitize_config_fields(); } + for item in &mut self.repos { + item.sanitize(); + } for repo in &mut self.repositories { repo.sanitize_config_fields(); } @@ -795,6 +802,99 @@ fn default_ref() -> String { "refs/heads/main".to_string() } +// ────────────────────────────────────────────────────────────────────────────── +// Compact `repos:` syntax — a single block that replaces both `repositories:` +// and `checkout:` with sensible defaults. +// ────────────────────────────────────────────────────────────────────────────── + +/// Object form for a `repos:` entry. +#[derive(Debug, Deserialize, Clone)] +pub struct RepoEntry { + /// Full repo name in the form `org/repo` (maps to ADO `name:`). + pub name: String, + /// Optional alias (maps to ADO `repository:`). Defaults to the last segment of `name`. + #[serde(default)] + pub alias: Option, + /// ADO repository resource type. Defaults to `"git"`. + #[serde(default = "default_repo_type", rename = "type")] + pub repo_type: String, + /// Branch/tag ref. Defaults to `"refs/heads/main"`. + #[serde(default = "default_ref", rename = "ref")] + pub repo_ref: String, + /// Whether the agent job checks out this repository. Defaults to `true`. + #[serde(default = "default_checkout")] + pub checkout: bool, +} + +fn default_repo_type() -> String { + "git".to_string() +} + +fn default_checkout() -> bool { + true +} + +/// A single item in the `repos:` list — either a string shorthand or an object. +#[derive(Debug, Clone)] +pub enum ReposItem { + /// String shorthand: `"org/repo"` or `"alias=org/repo"`. + Shorthand(String), + /// Full object form with explicit fields. + Full(RepoEntry), +} + +impl<'de> Deserialize<'de> for ReposItem { + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + use serde::de; + + struct ReposItemVisitor; + + impl<'de> de::Visitor<'de> for ReposItemVisitor { + type Value = ReposItem; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("a string shorthand (\"org/repo\" or \"alias=org/repo\") or an object with at least a `name` field") + } + + fn visit_str(self, value: &str) -> std::result::Result { + Ok(ReposItem::Shorthand(value.to_string())) + } + + fn visit_map(self, map: M) -> std::result::Result + where + M: de::MapAccess<'de>, + { + let entry = RepoEntry::deserialize(de::value::MapAccessDeserializer::new(map))?; + Ok(ReposItem::Full(entry)) + } + } + + deserializer.deserialize_any(ReposItemVisitor) + } +} + +impl ReposItem { + /// Sanitize all user-provided fields through `sanitize_config`. + pub fn sanitize(&mut self) { + match self { + ReposItem::Shorthand(s) => { + *s = crate::sanitize::sanitize_config(s); + } + ReposItem::Full(entry) => { + entry.name = crate::sanitize::sanitize_config(&entry.name); + if let Some(ref mut a) = entry.alias { + *a = crate::sanitize::sanitize_config(a); + } + entry.repo_type = crate::sanitize::sanitize_config(&entry.repo_type); + entry.repo_ref = crate::sanitize::sanitize_config(&entry.repo_ref); + } + } + } +} + /// MCP configuration - can be `true` for simple enablement or an object with options #[derive(Debug, Deserialize, Clone)] #[serde(untagged)] diff --git a/src/main.rs b/src/main.rs index 583ecbc5..a6f48184 100644 --- a/src/main.rs +++ b/src/main.rs @@ -229,9 +229,15 @@ async fn run_execute( .await .with_context(|| format!("Failed to read source file: {}", source.display()))?; - let (front_matter, _) = compile::parse_markdown(&content) + let (mut front_matter, _) = compile::parse_markdown(&content) .with_context(|| format!("Failed to parse source file: {}", source.display()))?; + // Resolve compact repos: syntax into the legacy fields for execution + let (resolved_repos, resolved_checkout) = compile::resolve_repos(&front_matter) + .with_context(|| "Failed to resolve repository configuration")?; + front_matter.repositories = resolved_repos; + front_matter.checkout = resolved_checkout; + println!("Loaded tool configs from: {}", source.display()); // Extract tools config before moving front_matter into build_execution_context From ed25d43640f1fe90266d478e09f68b85d45f5c61 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 8 May 2026 20:01:39 +0000 Subject: [PATCH 02/12] fix(compile): address review feedback for repos: syntax - Handle trailing slash in derive_alias (trim before splitting) - Add tests for reserved alias via explicit alias (shorthand & object) - Add test for empty name after '=' in shorthand - Add test for trailing slash alias derivation Agent-Logs-Url: https://github.com/githubnext/ado-aw/sessions/c9ae0fbb-1f1c-4874-b990-16fd3b4bc3e3 Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com> --- src/compile/common.rs | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/src/compile/common.rs b/src/compile/common.rs index aa0b1d50..1b9ba467 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -420,10 +420,12 @@ fn parse_shorthand(s: &str) -> Result<(String, String)> { /// Derive the alias from a full `org/repo` name (last path segment). fn derive_alias(name: &str) -> Result { - let alias = name + // Trim trailing slashes to handle "org/repo/" gracefully + let trimmed = name.trim_end_matches('/'); + let alias = trimmed .rsplit('/') .next() - .unwrap_or(name) + .unwrap_or(trimmed) .to_string(); if alias.is_empty() { anyhow::bail!( @@ -5721,6 +5723,22 @@ mod tests { let items = vec![ReposItem::Shorthand("org/repo".to_string())]; let err = lower_repos(&items).unwrap_err(); assert!(err.to_string().contains("reserved"), "{err}"); + + // Reserved via explicit alias in shorthand form + let items = vec![ReposItem::Shorthand("self=org/some-repo".to_string())]; + let err = lower_repos(&items).unwrap_err(); + assert!(err.to_string().contains("reserved"), "{err}"); + + // Reserved via explicit alias in object form + let items = vec![ReposItem::Full(RepoEntry { + name: "org/fine-repo".to_string(), + alias: Some("root".to_string()), + repo_type: "git".to_string(), + repo_ref: "refs/heads/main".to_string(), + checkout: true, + })]; + let err = lower_repos(&items).unwrap_err(); + assert!(err.to_string().contains("reserved"), "{err}"); } #[test] @@ -5762,6 +5780,12 @@ mod tests { assert!(err.to_string().contains("empty alias"), "{err}"); } + #[test] + fn test_parse_shorthand_empty_name_rejected() { + let err = parse_shorthand("alias=").unwrap_err(); + assert!(err.to_string().contains("empty name"), "{err}"); + } + #[test] fn test_derive_alias_basic() { assert_eq!(derive_alias("org/my-repo").unwrap(), "my-repo"); @@ -5769,6 +5793,12 @@ mod tests { assert_eq!(derive_alias("a/b/c").unwrap(), "c"); } + #[test] + fn test_derive_alias_trailing_slash() { + // Trailing slash should be trimmed gracefully + assert_eq!(derive_alias("org/repo/").unwrap(), "repo"); + } + #[test] fn test_resolve_repos_rejects_mixing() { use crate::compile::types::FrontMatter; From 4c31cbe27b8febc3524e9c6cbbdfefa0a847ae7b Mon Sep 17 00:00:00 2001 From: James Devine Date: Fri, 8 May 2026 21:53:13 +0100 Subject: [PATCH 03/12] feat(compile): autorewrite front matter via versioned migration chain Introduces a Django/Rails-style schema-version field on FrontMatter and a registered migration framework so breaking grammar changes auto-migrate existing user sources during compile rather than hard-failing. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com> --- AGENTS.md | 7 + docs/extending.md | 3 + docs/front-matter.md | 1 + docs/migrations.md | 481 ++++++++++++++++++ src/compile/common.rs | 368 +++++++++++++- src/compile/migration_integration_test.rs | 231 +++++++++ src/compile/migrations/helpers.rs | 198 ++++++++ src/compile/migrations/mod.rs | 567 ++++++++++++++++++++++ src/compile/mod.rs | 208 +++++++- src/compile/types.rs | 46 ++ src/main.rs | 6 +- tests/migration_tests.rs | 382 +++++++++++++++ 12 files changed, 2472 insertions(+), 26 deletions(-) create mode 100644 docs/migrations.md create mode 100644 src/compile/migration_integration_test.rs create mode 100644 src/compile/migrations/helpers.rs create mode 100644 src/compile/migrations/mod.rs create mode 100644 tests/migration_tests.rs diff --git a/AGENTS.md b/AGENTS.md index 02de6d1f..3070a06b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -61,6 +61,10 @@ Every compiled pipeline runs as three sequential jobs: │ │ │ ├── safe_outputs.rs # Always-on SafeOutputs MCP extension │ │ │ ├── trigger_filters.rs # Trigger filter extension (gate evaluator delivery) │ │ │ └── tests.rs # Extension integration tests +│ │ ├── migrations/ # Front-matter migrations (one file per version step) +│ │ │ ├── mod.rs # Migration struct, MIGRATIONS registry, runner, CURRENT_SCHEMA_VERSION +│ │ │ └── helpers.rs # take_key, insert_no_overwrite, rename_key, ConflictPolicy +│ │ ├── migration_integration_test.rs # White-box rewrite-path tests (stub registry injection) │ │ └── types.rs # Front matter grammar and types │ ├── init.rs # Repository initialization for AI-first authoring │ ├── execute.rs # Stage 3 safe output execution @@ -192,6 +196,9 @@ index to jump to the right page. - [`docs/filter-ir.md`](docs/filter-ir.md) — filter expression IR specification: `Fact`/`Predicate` types, three-pass compilation (lower → validate → codegen), gate step generation, adding new filter types. +- [`docs/migrations.md`](docs/migrations.md) — front-matter migration + framework: schema-version field, automatic source rewrite on + breaking-change updates, contributor workflow for adding migrations. - [`docs/local-development.md`](docs/local-development.md) — local development setup notes. diff --git a/docs/extending.md b/docs/extending.md index f4a235f2..bed8e450 100644 --- a/docs/extending.md +++ b/docs/extending.md @@ -9,6 +9,9 @@ When extending the compiler: 1. **New CLI commands**: Add variants to the `Commands` enum in `main.rs` 2. **New compile targets**: Implement the `Compiler` trait in a new file under `src/compile/` 3. **New front matter fields**: Add fields to `FrontMatter` in `src/compile/types.rs` + - **Breaking changes** (renames, removals, type changes, added required fields) + require adding a migration under `src/compile/migrations/` in the same PR. + See [`docs/migrations.md`](migrations.md). 4. **New template markers**: Handle replacements in the target-specific compiler (e.g., `standalone.rs` or `onees.rs`) 5. **New safe-output tools**: Add to `src/safeoutputs/` — implement `ToolResult`, `Executor`, register in `mod.rs`, `mcp.rs`, `execute.rs` 6. **New first-class tools**: Create `src/tools//` with `mod.rs` and `extension.rs` (CompilerExtension impl). Add `execute.rs` if the tool has Stage 3 runtime logic. Extend `ToolsConfig` in `types.rs`, add collection in `collect_extensions()` diff --git a/docs/front-matter.md b/docs/front-matter.md index 5f90c971..21caf6ef 100644 --- a/docs/front-matter.md +++ b/docs/front-matter.md @@ -8,6 +8,7 @@ The compiler expects markdown files with YAML front matter similar to gh-aw: ```markdown --- +schema-version: 1 # Optional: front-matter schema version (defaults to 1). The compiler bumps this in place when migrations apply during compilation. See docs/migrations.md. name: "name for this agent" description: "One line description for this agent" target: standalone # Optional: "standalone" (default) or "1es". See docs/targets.md. diff --git a/docs/migrations.md b/docs/migrations.md new file mode 100644 index 00000000..4045cdaa --- /dev/null +++ b/docs/migrations.md @@ -0,0 +1,481 @@ +# Front-matter Migrations + +_Part of the [ado-aw documentation](../AGENTS.md)._ + +The `ado-aw` compiler maintains a versioned schema for the front-matter +grammar. When a breaking change is introduced (a renamed field, a removed +field, a type change), the compiler ships a **migration** that rewrites +older sources to the new shape automatically. Users see a warning during +compilation; their source files are updated in place; the build moves on. + +This page is the reference for both users (what migrations mean for me) +and contributors (how to add one). + +## How it works + +### `schema-version` field + +Every front-matter file carries an optional `schema-version: ` +field. When the field is missing, it defaults to `1`. The compiler bumps +this in place after running migrations. + +```yaml +--- +schema-version: 1 +name: my-agent +description: my agent +--- +``` + +End users typically don't write this field by hand. The compiler keeps +it accurate when migrations apply. + +### The compile flow + +1. `ado-aw compile` reads the source `.md` and parses the front matter + as an **untyped** `serde_yaml::Mapping`. This step never trips on + removed/renamed fields. +2. The migration runner walks the registered chain from the source's + `schema-version` up to the compiler's current version, applying each + migration's transformation in order. +3. The compiler runs all the usual validation and codegen against the + migrated, typed `FrontMatter`. +4. **Only on a fully successful compile**, the source `.md` is + atomically rewritten with the migrated front matter and a warning + prints to stderr. A failed compile leaves the source untouched. +5. The `.lock.yml` is written atomically last. + +### What gets preserved on rewrite + +- **Body markdown** is preserved byte-for-byte (everything after the + closing `---`). +- **Front-matter key order** is preserved by `serde_yaml`'s + insertion-ordered mapping. +- **Front-matter comments** are NOT preserved. `serde_yaml` round-trip + drops them. The warning emitted on rewrite calls this out so it isn't + a surprise. If you have important context in a front-matter comment, + move it into the markdown body before running compile. +- **Quote and scalar styles** in YAML may be normalized. This is + cosmetic. + +### Atomicity and lost-update protection + +The rewrite uses `tempfile + rename` for atomicity (no torn writes). +Before the rename, the runner re-reads the source and compares its +SHA-256 to the snapshot taken at parse time. If the file changed +between parse and rewrite, the runner aborts with a clear error +("source file ... changed during compilation; refusing to migrate") +rather than clobbering whoever wrote the file. + +### `check` command behavior + +`ado-aw check` exits non-zero when migrations are pending — there is no +opt-in flag and no warning-only mode. Rationale: compiled pipelines +download the **same** `ado-aw` version that produced them +(`src/data/base.yml`, `src/data/1es-base.yml`), so the in-pipeline +integrity check is internally consistent by construction. The only time +`check` sees a pending migration is when a developer runs a newer +`ado-aw` locally against an older source — exactly when we want to +fail loudly. The fix is `ado-aw compile`, which migrates the source in +place. + +### `execute` command behavior + +The Stage 3 executor runs migrations in memory only. It never rewrites +the source (the executor's working tree is not the source-of-truth +tree). When a migration applies, it logs a warning and continues. + +## Adding a migration + +You need a migration whenever you introduce a breaking change to the +front-matter grammar: + +- Renaming a field +- Removing a field +- Changing a field's type or shape +- Adding a required field that didn't exist before +- Changing the meaning of an existing field + +Non-breaking changes (adding an optional field, accepting a new +variant) do **not** need a migration. + +### File layout + +Migrations live in `src/compile/migrations/`: + +``` +src/compile/migrations/ +├── mod.rs # Framework + MIGRATIONS registry +├── helpers.rs # take_key, insert_no_overwrite, rename_key, ConflictPolicy +├── 0001_engine_id_split.rs +├── 0002_permissions_field.rs +└── 0003_safeoutput_renames.rs +``` + +The filename prefix is the zero-padded `from_version`. Files sort +naturally in the directory listing in chain order. + +### Anatomy of a migration + +```rust +// src/compile/migrations/0001_engine_id_split.rs + +use anyhow::Result; +use serde_yaml::Mapping; + +use super::{ConflictPolicy, Migration, MigrationContext}; + +pub static MIGRATION: Migration = Migration { + from_version: 1, + to_version: 2, + id: "engine_id_split", + summary: "engine: -> engine: { id: copilot, model: }", + introduced_in: "0.27.0", + apply: apply_migration, +}; + +fn apply_migration(fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { + // ... your transformation ... + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn upgrades_engine_string_to_object() { + // before / after fixture pair + } + + #[test] + fn already_current_shape_is_preserved() { + // defensive: every from_version=1 migration runs on every + // unstamped source in the wild, including ones that were + // authored *yesterday* against the current shape. + } +} +``` + +### Registry append + +Two edits in `src/compile/migrations/mod.rs`: + +```rust +mod m0001_engine_id_split; // <-- add module declaration + +pub static MIGRATIONS: &[&'static Migration] = &[ + &m0001_engine_id_split::MIGRATION, // <-- append at the end +]; +``` + +`CURRENT_SCHEMA_VERSION` is computed automatically from the registry +length. Tests in `mod.rs` enforce that the registry is contiguous, that +migration ids are unique, and that filenames match `from_version`. A +malformed registry fails fast at runtime via `ensure!()`. + +### Use the helpers + +Migrations should prefer `helpers::*` over raw `Mapping` manipulation: + +- `take_key(map, "old")` — remove and return. +- `insert_no_overwrite(map, "new", value)` — error on conflict. +- `rename_key(map, "old", "new", ConflictPolicy::Error)` — default + policy is **error**, never silent overwrite. + +Silent overwrite is almost always a bug when transforming user data. + +### Defensive predicates for `from_version: 1` + +Because the `schema-version` field doesn't exist in any source written +before this framework shipped, every existing file looks like v1 — +including files that were authored yesterday against the current +shape. Migrations from `from_version: 1` must be **shape-detecting and +conflict-aware**: + +- If both old and new keys are present → error rather than silent + overwrite (default `ConflictPolicy::Error`). +- If the old key has the new shape already → no-op (don't migrate). + +Once we ship v2, this concern goes away for v2 → v3 etc. — v2 sources +have an explicit `schema-version: 2` stamp, so the framework only runs +migrations against actual v2 documents. + +### Concurrent PRs + +If two PRs each add a migration `N → N+1`, the second one to merge +must rebase. The rebase is mechanical: + +1. Renumber the file prefix: `0003_…rs` → `0004_…rs`. +2. Bump `from_version` and `to_version` in the static. +3. Update any per-migration test fixtures that stamped the old + version. +4. Re-append your migration to `MIGRATIONS` after the freshly merged + one. + +The contiguity tests fail loudly if a rebase leaves a gap, so this is +hard to get wrong silently. + +### What if my change can't be expressed as a `Mapping` rewrite? + +The migration `apply` function receives only the front-matter mapping. +It cannot inspect the markdown body, the file path, the lock file, or +git state. If your change requires that information, do not write a +migration that guesses. Instead, add a migration that **errors** with +an actionable "manual migration required: " message so +the user knows exactly what to fix. + +## Worked example: `engine_id_split` + +This is the migration that would have caught the `0.17.0` breaking +change (`engine: ` → `engine: { id: copilot, model: }`). +It's a complete file you could drop into `src/compile/migrations/` +and register today. Read it as a template for your own migration. + +```rust +// src/compile/migrations/0001_engine_id_split.rs + +//! engine: -> engine: { id: copilot, model: } +//! +//! Before 0.17.0 the `engine` field was a model name (e.g. +//! `engine: claude-opus-4.5`). The grammar changed to use engine +//! identifiers (`engine: copilot`), with the model nested in an object +//! form (`engine: { id: copilot, model: }`). +//! +//! This migration detects the old shape and rewrites it. + +use anyhow::{bail, Result}; +use serde_yaml::{Mapping, Value}; + +use super::{Migration, MigrationContext}; + +pub static MIGRATION: Migration = Migration { + from_version: 1, + to_version: 2, + id: "engine_id_split", + summary: "engine: -> engine: { id: copilot, model: }", + introduced_in: "0.27.0", + apply: apply_migration, +}; + +/// Engine identifiers that are valid as the simple-form string. When +/// `engine` is a string equal to one of these, the source is already +/// using the current grammar and we leave it alone. +const KNOWN_ENGINE_IDS: &[&str] = &["copilot"]; + +fn apply_migration(fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { + let key = Value::String("engine".to_string()); + + // No `engine` field at all — the front matter relies on the + // default. Default is `copilot` in both old and new grammars, so + // there's nothing to migrate. + let Some(engine) = fm.get(&key) else { + return Ok(()); + }; + + match engine { + // Already-object form. Either authored against the new + // grammar from day one, or migrated by an earlier run. No-op. + Value::Mapping(_) => Ok(()), + + // String form. Two cases: + // - The string is a known engine identifier (`copilot`): + // the source is already using the current simple form. + // No-op. + // - Anything else: the string is a *model name* from the + // old grammar. Wrap it in the object form. + Value::String(s) => { + if KNOWN_ENGINE_IDS.contains(&s.as_str()) { + return Ok(()); + } + let model = s.clone(); + let mut object = Mapping::new(); + object.insert( + Value::String("id".to_string()), + Value::String("copilot".to_string()), + ); + object.insert( + Value::String("model".to_string()), + Value::String(model), + ); + fm.insert(key, Value::Mapping(object)); + Ok(()) + } + + // Unexpected shape (number, bool, sequence, …). Refuse rather + // than guess — the user needs to fix this by hand. + other => bail!( + "engine field has unexpected shape (expected string or mapping, \ + got {}); manual migration required", + describe(other) + ), + } +} + +fn describe(v: &Value) -> &'static str { + match v { + Value::Null => "null", + Value::Bool(_) => "bool", + Value::Number(_) => "number", + Value::String(_) => "string", + Value::Sequence(_) => "sequence", + Value::Mapping(_) => "mapping", + Value::Tagged(_) => "tagged", + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn run(input: &str) -> Mapping { + let mut m: Mapping = serde_yaml::from_str(input).unwrap(); + apply_migration(&mut m, &MigrationContext {}).expect("apply"); + m + } + + fn run_err(input: &str) -> String { + let mut m: Mapping = serde_yaml::from_str(input).unwrap(); + format!( + "{}", + apply_migration(&mut m, &MigrationContext {}).unwrap_err() + ) + } + + #[test] + fn rewrites_legacy_model_string_into_object_form() { + let after = run("name: x\nengine: claude-opus-4.5\n"); + let engine = after + .get(Value::String("engine".into())) + .unwrap() + .as_mapping() + .expect("engine should now be a mapping"); + assert_eq!( + engine.get(Value::String("id".into())), + Some(&Value::String("copilot".into())) + ); + assert_eq!( + engine.get(Value::String("model".into())), + Some(&Value::String("claude-opus-4.5".into())) + ); + } + + #[test] + fn already_current_simple_form_is_noop() { + // Defensive: every from_version=1 migration must be safe on + // sources that look like they're already current. Here the + // user explicitly wrote the new simple form `engine: copilot`. + let after = run("name: x\nengine: copilot\n"); + assert_eq!( + after.get(Value::String("engine".into())), + Some(&Value::String("copilot".into())), + "string `copilot` should be left untouched" + ); + } + + #[test] + fn already_object_form_is_noop() { + let input = "name: x\nengine:\n id: copilot\n model: claude-opus-4.7\n"; + let mut original: Mapping = serde_yaml::from_str(input).unwrap(); + let after = run(input); + assert_eq!( + after.get(Value::String("engine".into())), + original + .remove(Value::String("engine".into())) + .as_ref() + ); + } + + #[test] + fn missing_engine_field_is_noop() { + let after = run("name: x\ndescription: y\n"); + assert!(!after.contains_key(Value::String("engine".into()))); + } + + #[test] + fn unexpected_engine_shape_is_rejected() { + let err = run_err("name: x\nengine: 42\n"); + assert!( + err.contains("manual migration required"), + "expected actionable error, got: {}", + err + ); + } +} +``` + +### What this example illustrates + +1. **The data model does the work.** `Migration` is a static — + every field is set once at the top of the file. The runner reads + `from_version`/`to_version` to walk the chain. `apply` is a plain + `fn` pointer, so the registry is `&'static [&'static Migration]` + — zero allocation, zero indirection beyond the function call. + +2. **`Value` pattern-matching beats `if let` ladders.** The + `match engine { Mapping(_) => …, String(s) => …, other => … }` + shape is the natural way to express "I support these shapes; + reject others." It catches numeric/boolean/null engines that + someone might have written by mistake. + +3. **Defensive predicates for `from_version: 1`.** Three of the five + tests exercise no-op cases (already-object, already-simple + `copilot`, missing field). These matter because **every existing + `.md` file in the wild looks like v1** — they don't carry + `schema-version`. The migration runs on all of them, so it has + to be safe on already-current shapes. Once we ship v2 the + defensiveness requirement drops for v2 → v3 migrations: v2 + sources have an explicit stamp. + +4. **Hard error on unexpected shapes.** The `other => bail!(…)` + arm is the "manual migration required" escape hatch. We don't + try to guess what `engine: 42` means — we surface a clear error + so the user fixes it by hand. + +5. **Registering it is two lines** in `src/compile/migrations/mod.rs`: + + ```rust + mod m0001_engine_id_split; + + pub static MIGRATIONS: &[&'static Migration] = &[ + &m0001_engine_id_split::MIGRATION, + ]; + ``` + + `CURRENT_SCHEMA_VERSION` automatically becomes 2. The + registry-contiguity test, the filename-prefix test, and the + id-uniqueness test all keep passing. + +## Tests + +The migration framework is covered by three layers of tests: + +- **Unit tests** in `src/compile/migrations/{mod.rs,helpers.rs}` + cover registry contiguity, helper edge cases, and individual + migration apply functions. +- **White-box integration tests** in + `src/compile/migration_integration_test.rs` exercise the rewrite + path end-to-end (parse → migrate → compile → atomic source rewrite + → lock-file write) using a stub migration registry injected via the + crate-private `compile_pipeline_with_registry` and + `parse_markdown_detailed_with_registry` hooks. They live inside + `src/` because the production registry is empty and integration + tests in `tests/` cannot link against crate internals. +- **Black-box CLI tests** in `tests/migration_tests.rs` spawn the + compiled `ado-aw` binary as a subprocess and assert on the + user-visible behavior of `compile` and `check` for sources with + various `schema-version` shapes (future versions, non-integer values, + healthy round-trip, etc.). + +When you add a real migration, ship the per-migration before/after +fixtures alongside it in the migration's own file +(`src/compile/migrations/_.rs`). + +## See also + +- [`docs/front-matter.md`](front-matter.md) — the full front-matter + grammar (and where the `schema-version` field is documented for end + users). +- [`docs/extending.md`](extending.md) — broader guidance for adding + features to the compiler, including the requirement to add a + migration alongside any breaking front-matter change. diff --git a/src/compile/common.rs b/src/compile/common.rs index 1b9ba467..eb345de5 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -12,27 +12,216 @@ use crate::allowed_hosts::{CORE_ALLOWED_HOSTS, mcp_required_hosts}; use crate::ecosystem_domains::{get_ecosystem_domains, is_ecosystem_identifier, is_known_ecosystem}; use crate::validate; -/// Parse the markdown file and extract front matter and body -pub fn parse_markdown(content: &str) -> Result<(FrontMatter, String)> { - let content = content.trim(); +/// Atomically write `contents` to `path`. +/// +/// Uses [`tempfile::NamedTempFile`] in the destination's parent +/// directory so the final `persist` is a same-filesystem rename. This +/// guarantees readers either see the old file or the new file in full — +/// never a half-written state. +/// +/// Behavior: +/// +/// - Creates the tempfile in `path.parent()` (falls back to `.` when +/// the parent is empty, matching `tokio::fs::write` semantics). +/// - On Unix, preserves the existing file's mode if the target exists. +/// Otherwise the tempfile keeps its default mode (0o600 from +/// `tempfile`'s implementation). +/// - When the destination is a symlink, the rename replaces the +/// symlink with a regular file (matches `tokio::fs::write`; the +/// symlink target is *not* followed). +pub async fn atomic_write(path: &Path, contents: &str) -> Result<()> { + let path = path.to_path_buf(); + let owned_contents = contents.to_string(); + // tempfile is sync; do the whole thing on a blocking task so we + // don't block the async runtime on large writes / fsync. + tokio::task::spawn_blocking(move || atomic_write_blocking(&path, &owned_contents)) + .await + .context("atomic_write task panicked")? +} + +fn atomic_write_blocking(path: &Path, contents: &str) -> Result<()> { + use std::io::Write; + + let parent = path.parent().filter(|p| !p.as_os_str().is_empty()); + let mut tmp = match parent { + Some(dir) => tempfile::NamedTempFile::new_in(dir).with_context(|| { + format!( + "failed to create temporary file in {}", + dir.display() + ) + })?, + None => tempfile::NamedTempFile::new() + .context("failed to create temporary file in current directory")?, + }; + + tmp.write_all(contents.as_bytes()) + .with_context(|| format!("failed to write temporary file for {}", path.display()))?; + tmp.as_file() + .sync_all() + .with_context(|| format!("failed to fsync temporary file for {}", path.display()))?; + + // On Unix, copy the existing file's mode onto the tempfile so + // permissions are preserved across the atomic rename. + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + if let Ok(meta) = std::fs::metadata(path) { + let mode = meta.permissions().mode(); + std::fs::set_permissions( + tmp.path(), + std::fs::Permissions::from_mode(mode), + ) + .with_context(|| { + format!( + "failed to copy permissions from {} to temp file", + path.display() + ) + })?; + } + } + + tmp.persist(path) + .with_context(|| format!("failed to atomically rename into {}", path.display()))?; + Ok(()) +} - if !content.starts_with("---") { +/// Detailed parse result. Holds enough information to rewrite the +/// source on disk byte-faithfully when migrations apply. +/// +/// See [`parse_markdown_detailed`]. +#[derive(Debug)] +pub struct ParsedSource { + /// Typed front matter, after migrations have been applied to the + /// underlying mapping. + pub front_matter: FrontMatter, + /// Body for compilation, with leading/trailing whitespace trimmed + /// (matches the legacy `parse_markdown` second tuple element). + pub markdown_body: String, + /// Migration outcome. + pub migrations: super::migrations::MigrationReport, + /// The migrated front-matter mapping, with `schema-version` + /// bumped. Used to reconstruct the source for rewrite. + pub front_matter_mapping: serde_yaml::Mapping, + /// The body region exactly as it appeared after the closing `---`, + /// byte-for-byte (no trim). Includes any leading newline. + pub body_raw: String, + /// SHA-256 of the original source bytes (lost-update protection). + pub source_sha256: [u8; 32], +} + +/// Parse the markdown file, run the migration chain on the front matter +/// in memory, and return both the typed `FrontMatter` and the raw +/// fragments needed to rewrite the source on disk byte-faithfully. +/// +/// Use this from callers that may rewrite the source (the `compile` +/// command). Callers that only want the typed view of the front matter +/// should use the backward-compatible [`parse_markdown`] wrapper. +pub fn parse_markdown_detailed(content: &str) -> Result { + parse_markdown_detailed_with_registry(content, super::migrations::MIGRATIONS) +} + +/// Variant of [`parse_markdown_detailed`] that allows injecting an +/// explicit migration registry. Used by tests; production callers go +/// through the no-arg version that reads the global +/// [`super::migrations::MIGRATIONS`]. +pub(crate) fn parse_markdown_detailed_with_registry( + content: &str, + registry: &[&'static super::migrations::Migration], +) -> Result { + use sha2::Digest; + + // Lost-update protection: hash the raw input as it was provided, so + // a rewrite path can later re-read the file and compare. + let mut hasher = sha2::Sha256::new(); + hasher.update(content.as_bytes()); + let source_sha256: [u8; 32] = hasher.finalize().into(); + + // Allow leading whitespace before the opening fence (preserves + // historical leniency). We compute a byte offset into `content` so + // that `body_raw` extraction is purely byte-faithful. + let leading_ws = content.bytes().take_while(|b| b.is_ascii_whitespace()).count(); + let after_lead = &content[leading_ws..]; + if !after_lead.starts_with("---") { anyhow::bail!("Markdown file must start with YAML front matter (---)"); } - // Find the closing --- - let rest = &content[3..]; - let end_idx = rest + let after_open = &after_lead[3..]; + let end_idx = after_open .find("\n---") .context("Could not find closing --- for front matter")?; - let yaml_content = &rest[..end_idx]; - let markdown_body = rest[end_idx + 4..].trim(); + let yaml_str = &after_open[..end_idx]; + let body_raw_slice = &after_open[end_idx + 4..]; + let body_raw = body_raw_slice.to_string(); + let markdown_body = body_raw_slice.trim().to_string(); + // Stage 1: parse to untyped Value, reject non-mapping at top level. + let parsed_value: serde_yaml::Value = + serde_yaml::from_str(yaml_str).context("Failed to parse YAML front matter")?; + let mut mapping = match parsed_value { + serde_yaml::Value::Mapping(m) => m, + other => { + anyhow::bail!( + "YAML front matter must be a mapping/object, got {}", + yaml_value_kind(&other) + ); + } + }; + + // Stage 2: run the migration chain on the untyped mapping. + let report = super::migrations::migrate_front_matter_with(&mut mapping, registry) + .context("Failed to migrate front matter")?; + + // Stage 3: deserialize the (possibly migrated) mapping into the + // typed FrontMatter. Errors here mean either the user wrote an + // unsupported shape or a migration produced invalid output. let front_matter: FrontMatter = - serde_yaml::from_str(yaml_content).context("Failed to parse YAML front matter")?; + serde_yaml::from_value(serde_yaml::Value::Mapping(mapping.clone())) + .context("Failed to parse YAML front matter")?; + + Ok(ParsedSource { + front_matter, + markdown_body, + migrations: report, + front_matter_mapping: mapping, + body_raw, + source_sha256, + }) +} + +/// Reconstruct full source content from a migrated [`ParsedSource`]. +/// +/// Output shape: +/// - `---\n` +/// - the migrated YAML mapping (`serde_yaml::to_string` always ends +/// with `\n`) +/// - `---` +/// - the original body region byte-for-byte (`body_raw`) +pub fn reconstruct_source(parsed: &ParsedSource) -> Result { + let yaml_serialized = serde_yaml::to_string(&parsed.front_matter_mapping) + .context("Failed to serialize migrated front matter")?; + Ok(format!("---\n{}---{}", yaml_serialized, parsed.body_raw)) +} + +fn yaml_value_kind(v: &serde_yaml::Value) -> &'static str { + match v { + serde_yaml::Value::Null => "null", + serde_yaml::Value::Bool(_) => "bool", + serde_yaml::Value::Number(_) => "number", + serde_yaml::Value::String(_) => "string", + serde_yaml::Value::Sequence(_) => "sequence", + serde_yaml::Value::Mapping(_) => "mapping", + serde_yaml::Value::Tagged(_) => "tagged", + } +} - Ok((front_matter, markdown_body.to_string())) +/// Backward-compatible parse: returns the typed front matter and the +/// trimmed body. New callers that may rewrite the source on disk +/// should use [`parse_markdown_detailed`] instead. +#[allow(dead_code)] +pub fn parse_markdown(content: &str) -> Result<(FrontMatter, String)> { + let parsed = parse_markdown_detailed(content)?; + Ok((parsed.front_matter, parsed.markdown_body)) } /// Replace a placeholder in the template, preserving the indentation for multi-line content. @@ -2499,6 +2688,163 @@ mod tests { fm } + // ─── atomic_write ───────────────────────────────────────────────────────── + + #[tokio::test] + async fn atomic_write_creates_new_file() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("a.txt"); + atomic_write(&path, "hello\n").await.unwrap(); + let read = tokio::fs::read_to_string(&path).await.unwrap(); + assert_eq!(read, "hello\n"); + } + + #[tokio::test] + async fn atomic_write_overwrites_existing_file() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("a.txt"); + tokio::fs::write(&path, "old contents").await.unwrap(); + atomic_write(&path, "new contents").await.unwrap(); + let read = tokio::fs::read_to_string(&path).await.unwrap(); + assert_eq!(read, "new contents"); + } + + #[cfg(unix)] + #[tokio::test] + async fn atomic_write_preserves_unix_mode() { + use std::os::unix::fs::PermissionsExt; + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("a.txt"); + tokio::fs::write(&path, "old").await.unwrap(); + std::fs::set_permissions(&path, std::fs::Permissions::from_mode(0o640)).unwrap(); + atomic_write(&path, "new").await.unwrap(); + let mode = std::fs::metadata(&path).unwrap().permissions().mode(); + assert_eq!(mode & 0o777, 0o640, "expected mode 0o640, got {:o}", mode & 0o777); + } + + #[cfg(unix)] + #[tokio::test] + async fn atomic_write_replaces_symlink_with_regular_file() { + let dir = tempfile::tempdir().unwrap(); + let target = dir.path().join("target.txt"); + let link = dir.path().join("link.txt"); + tokio::fs::write(&target, "target").await.unwrap(); + std::os::unix::fs::symlink(&target, &link).unwrap(); + + atomic_write(&link, "via-link").await.unwrap(); + + // Link is now a regular file; target is unchanged. + let link_meta = std::fs::symlink_metadata(&link).unwrap(); + assert!( + !link_meta.file_type().is_symlink(), + "symlink should have been replaced" + ); + assert_eq!(tokio::fs::read_to_string(&link).await.unwrap(), "via-link"); + assert_eq!(tokio::fs::read_to_string(&target).await.unwrap(), "target"); + } + + // ─── parse_markdown_detailed ────────────────────────────────────────────── + + #[test] + fn parse_markdown_detailed_preserves_body_byte_for_byte() { + // Case 1: trailing newline. + let original = "---\nname: x\ndescription: y\n---\nbody line\n"; + let parsed = parse_markdown_detailed(original).unwrap(); + assert_eq!(parsed.body_raw, "\nbody line\n"); + let reconstructed = reconstruct_source(&parsed).unwrap(); + // No migrations ran, so the YAML round-trip is the only + // structural change. The body region is byte-faithful. + assert!(reconstructed.ends_with("---\nbody line\n")); + + // Case 2: no trailing newline at all. + let original = "---\nname: x\ndescription: y\n---\nbody"; + let parsed = parse_markdown_detailed(original).unwrap(); + assert_eq!(parsed.body_raw, "\nbody"); + let reconstructed = reconstruct_source(&parsed).unwrap(); + assert!(reconstructed.ends_with("---\nbody")); + + // Case 3: empty body, but trailing newline. + let original = "---\nname: x\ndescription: y\n---\n"; + let parsed = parse_markdown_detailed(original).unwrap(); + assert_eq!(parsed.body_raw, "\n"); + let reconstructed = reconstruct_source(&parsed).unwrap(); + assert!(reconstructed.ends_with("---\n")); + + // Case 4: blank lines between closing fence and content are + // preserved as-is in body_raw. + let original = "---\nname: x\ndescription: y\n---\n\n\n## heading\n\nbody.\n"; + let parsed = parse_markdown_detailed(original).unwrap(); + assert_eq!(parsed.body_raw, "\n\n\n## heading\n\nbody.\n"); + } + + #[test] + fn parse_markdown_detailed_byte_faithful_when_no_migration_runs() { + // With the registry empty, parsing a v1 source and reconstructing + // it should produce a byte-identical document apart from + // serde_yaml's canonical formatting of the YAML mapping. We + // assert the body region matches exactly. + let original = "---\nname: x\ndescription: y\n---\n## body\n"; + let parsed = parse_markdown_detailed(original).unwrap(); + assert!(!parsed.migrations.changed()); + let reconstructed = reconstruct_source(&parsed).unwrap(); + // Find the closing fence in both and compare the suffix. + let orig_suffix = &original[original.find("\n---\n").unwrap()..]; + let recon_suffix = &reconstructed[reconstructed.find("\n---\n").unwrap()..]; + assert_eq!(orig_suffix, recon_suffix, "body region must be byte-identical"); + } + + #[test] + fn parse_markdown_detailed_allows_leading_whitespace() { + let original = "\n \n---\nname: x\ndescription: y\n---\nbody\n"; + let parsed = parse_markdown_detailed(original).unwrap(); + assert_eq!(parsed.front_matter.name, "x"); + } + + #[test] + fn parse_markdown_detailed_rejects_missing_open_fence() { + let original = "name: x\ndescription: y\nbody\n"; + let err = parse_markdown_detailed(original).unwrap_err(); + assert!( + format!("{}", err).contains("must start with YAML front matter"), + "got: {}", + err + ); + } + + #[test] + fn parse_markdown_detailed_rejects_non_mapping_top_level() { + let original = "---\n- a\n- b\n---\nbody\n"; + let err = parse_markdown_detailed(original).unwrap_err(); + assert!( + format!("{}", err).contains("must be a mapping"), + "got: {}", + err + ); + } + + #[test] + fn parse_markdown_detailed_rejects_missing_close_fence() { + let original = "---\nname: x\nno-fence"; + let err = parse_markdown_detailed(original).unwrap_err(); + assert!( + format!("{}", err).contains("Could not find closing"), + "got: {}", + err + ); + } + + #[test] + fn parse_markdown_detailed_records_source_hash() { + let a = "---\nname: x\ndescription: y\n---\n"; + let b = "---\nname: x\ndescription: y\n---\nextra\n"; + let pa = parse_markdown_detailed(a).unwrap(); + let pb = parse_markdown_detailed(b).unwrap(); + assert_ne!(pa.source_sha256, pb.source_sha256); + // Hashing is stable over re-parses of the same input. + let pa2 = parse_markdown_detailed(a).unwrap(); + assert_eq!(pa.source_sha256, pa2.source_sha256); + } + // ─── compute_effective_workspace ───────────────────────────────────────── #[test] diff --git a/src/compile/migration_integration_test.rs b/src/compile/migration_integration_test.rs new file mode 100644 index 00000000..7e086059 --- /dev/null +++ b/src/compile/migration_integration_test.rs @@ -0,0 +1,231 @@ +//! White-box integration tests for the front-matter migration framework. +//! +//! These tests exercise the rewrite path end-to-end (parse → migrate → +//! compile → atomic source rewrite → lock-file write) using a stub +//! migration registry. They live inside `src/` because they need +//! access to crate-private hooks (`compile_pipeline_with_registry`, +//! `perform_source_rewrite_if_needed`, +//! `common::parse_markdown_detailed_with_registry`) that the empty +//! production [`super::migrations::MIGRATIONS`] registry cannot +//! exercise. +//! +//! Black-box subprocess tests of the user-visible CLI behavior live in +//! [`tests/migration_tests.rs`](../../tests/migration_tests.rs). + +#![cfg(test)] + +use super::common; +use super::migrations::{ConflictPolicy, Migration, MigrationContext}; +use super::{compile_pipeline, compile_pipeline_with_registry, perform_source_rewrite_if_needed}; +use anyhow::Result; +use serde_yaml::Mapping; + +// ─── Stub registry ────────────────────────────────────────────────────────── + +/// Stub migration: renames the `legacy-name` top-level key to `name`. +/// Drives end-to-end rewrite tests without registering a real migration. +static TEST_RENAME_LEGACY_NAME: Migration = Migration { + from_version: 1, + to_version: 2, + id: "test_rename_legacy_name", + summary: "rename `legacy-name` -> `name`", + introduced_in: "test", + apply: rename_legacy_name, +}; + +fn rename_legacy_name(fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { + super::migrations::rename_key(fm, "legacy-name", "name", ConflictPolicy::Error)?; + Ok(()) +} + +/// Source whose typed deserialization would fail without a migration: +/// it has `legacy-name` instead of the required `name` field. +fn stale_source() -> &'static str { + "---\nlegacy-name: my-agent\ndescription: test description\n---\n## Body\nHello.\n" +} + +fn write_temp_md(name: &str, content: &str) -> (tempfile::TempDir, std::path::PathBuf) { + let dir = tempfile::tempdir().expect("tempdir"); + let path = dir.path().join(name); + std::fs::write(&path, content).expect("write temp md"); + (dir, path) +} + +// ─── End-to-end rewrite path ──────────────────────────────────────────────── + +#[tokio::test] +async fn migration_rewrites_stale_source_and_preserves_body() { + let (dir, source_path) = write_temp_md("agent.md", stale_source()); + + let registry: &[&'static Migration] = &[&TEST_RENAME_LEGACY_NAME]; + let migrated = compile_pipeline_with_registry( + &source_path.to_string_lossy(), + None, + true, // skip_integrity + false, // debug_pipeline + registry, + ) + .await + .expect("compile_pipeline_with_registry should succeed"); + + assert!(migrated, "expected compile to report a source migration"); + + // Source file rewritten: contains `name: my-agent`, no + // `legacy-name`, has `schema-version: 2`. + let rewritten = std::fs::read_to_string(&source_path).expect("read rewritten"); + assert!( + rewritten.contains("name: my-agent"), + "rewritten source missing `name: my-agent`:\n{}", + rewritten + ); + assert!( + !rewritten.contains("legacy-name"), + "rewritten source still contains legacy-name:\n{}", + rewritten + ); + assert!( + rewritten.contains("schema-version: 2"), + "rewritten source missing `schema-version: 2`:\n{}", + rewritten + ); + + // Body region byte-identical to the original (everything after + // the closing `---`). + let orig = stale_source(); + let orig_body = &orig[orig.find("\n---\n").unwrap() + 5..]; + let new_body = &rewritten[rewritten.find("\n---\n").unwrap() + 5..]; + assert_eq!(orig_body, new_body, "body region not preserved byte-for-byte"); + + // Lock file generated. + let lock = source_path.with_extension("lock.yml"); + assert!(lock.exists(), "expected {} to exist", lock.display()); + + // Re-running parse with the same stub registry produces no + // further migration — the rewrite moved the source to current. + let after = std::fs::read_to_string(&source_path).unwrap(); + let parsed_again = + common::parse_markdown_detailed_with_registry(&after, registry).unwrap(); + assert!( + !parsed_again.migrations.changed(), + "expected post-rewrite source to be at current schema-version, but \ + {} more migration(s) ran", + parsed_again.migrations.applied.len() + ); + + drop(dir); +} + +#[tokio::test] +async fn migration_skip_when_no_stub_registry_runs() { + // With the empty production registry, a healthy v1 source must + // compile without rewriting anything. + let healthy = "---\nname: x\ndescription: y\n---\nbody\n"; + let (dir, source_path) = write_temp_md("agent.md", healthy); + let registry: &[&'static Migration] = &[]; + let migrated = compile_pipeline_with_registry( + &source_path.to_string_lossy(), + None, + true, + false, + registry, + ) + .await + .expect("compile should succeed"); + assert!(!migrated, "expected no rewrite for already-current source"); + let after = std::fs::read_to_string(&source_path).unwrap(); + assert_eq!(after, healthy, "source must be byte-identical"); + drop(dir); +} + +#[tokio::test] +async fn migration_with_only_version_bump_still_writes() { + fn noop(_fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { + Ok(()) + } + static NOOP_MIG: Migration = Migration { + from_version: 1, + to_version: 2, + id: "test_noop_v1_to_v2", + summary: "no-op", + introduced_in: "test", + apply: noop, + }; + + // Even a no-op migration changes bytes on disk because the + // schema-version field is added. Verify we DO rewrite. + let healthy = "---\nname: x\ndescription: y\n---\nbody\n"; + let (dir, source_path) = write_temp_md("agent.md", healthy); + let registry: &[&'static Migration] = &[&NOOP_MIG]; + let migrated = compile_pipeline_with_registry( + &source_path.to_string_lossy(), + None, + true, + false, + registry, + ) + .await + .expect("compile should succeed"); + assert!( + migrated, + "expected rewrite because schema-version field was added" + ); + let after = std::fs::read_to_string(&source_path).unwrap(); + assert!(after.contains("schema-version: 2")); + drop(dir); +} + +// ─── Lost-update guard ────────────────────────────────────────────────────── + +#[tokio::test] +async fn perform_source_rewrite_lost_update_guard() { + // Construct a parsed-source pointing at a file whose hash will + // not match (we mutate it after computing the hash). This + // exercises the lost-update guard directly without needing a + // full compile. + let (dir, source_path) = write_temp_md("agent.md", stale_source()); + let original = std::fs::read_to_string(&source_path).unwrap(); + let registry: &[&'static Migration] = &[&TEST_RENAME_LEGACY_NAME]; + let parsed = + common::parse_markdown_detailed_with_registry(&original, registry).unwrap(); + assert!(parsed.migrations.changed()); + + // Mutate the source after parse (simulates editor / concurrent + // tool overwriting). + std::fs::write(&source_path, "different bytes\n").unwrap(); + + // Rewrite must refuse. + let result = + perform_source_rewrite_if_needed(&source_path, &original, &parsed).await; + let err = result.expect_err("expected lost-update guard to fire"); + assert!( + format!("{:#}", err).contains("changed during compilation"), + "unexpected error: {:#}", + err + ); + + // The source is left as the interloper wrote it (we did not + // clobber); it is *not* the migrated version. + let after = std::fs::read_to_string(&source_path).unwrap(); + assert_eq!(after, "different bytes\n"); + + drop(dir); +} + +// ─── Future-version safety net ────────────────────────────────────────────── + +#[tokio::test] +async fn check_pipeline_fails_on_future_schema_version() { + // The real registry is empty (CURRENT=1), so a source claiming + // schema-version: 2 is a future version. compile should reject it + // loudly through the public entry point. + let future = "---\nschema-version: 2\nname: x\ndescription: y\n---\n"; + let (dir, source_path) = write_temp_md("agent.md", future); + let result = compile_pipeline(&source_path.to_string_lossy(), None, true, false).await; + let err = result.expect_err("future-version source should fail compile"); + assert!( + format!("{:#}", err).contains("only supports up to"), + "unexpected error: {:#}", + err + ); + drop(dir); +} diff --git a/src/compile/migrations/helpers.rs b/src/compile/migrations/helpers.rs new file mode 100644 index 00000000..20987346 --- /dev/null +++ b/src/compile/migrations/helpers.rs @@ -0,0 +1,198 @@ +//! Helpers for writing migrations against `serde_yaml::Mapping`. +//! +//! Migrations should prefer these over raw `Mapping` manipulation so that +//! conflicts (e.g. both old and new keys present) are surfaced rather than +//! silently overwritten. + +use anyhow::{bail, Result}; +use serde_yaml::{Mapping, Value}; + +/// Conflict policy used by [`rename_key`] when the destination key is +/// already present. +#[allow(dead_code)] +#[derive(Debug, Clone, Copy)] +pub enum ConflictPolicy { + /// Default: error if `new` already exists when renaming `old → new`. + Error, + /// Keep the existing `new` value, drop `old`. + PreferNew, + /// Overwrite `new` with the value from `old`. + PreferOld, +} + +/// Remove and return the value at `key`, or `None` if absent. +#[allow(dead_code)] +pub fn take_key(m: &mut Mapping, key: &str) -> Option { + m.remove(Value::String(key.to_string())) +} + +/// Insert `value` at `key`, returning `Err` if the key already exists. +/// +/// Prefer this over `Mapping::insert` in migrations: silent overwrite is +/// almost always a bug when transforming user data. +#[allow(dead_code)] +pub fn insert_no_overwrite( + m: &mut Mapping, + key: &str, + value: Value, +) -> Result<()> { + if m.contains_key(Value::String(key.to_string())) { + bail!( + "refusing to overwrite existing key `{}` in front matter", + key + ); + } + m.insert(Value::String(key.to_string()), value); + Ok(()) +} + +/// Rename `old` → `new` according to `policy`. +/// +/// Returns `Ok(true)` when the rename happened (regardless of policy +/// branch), `Ok(false)` when `old` was absent (no-op). +#[allow(dead_code)] +pub fn rename_key( + m: &mut Mapping, + old: &str, + new: &str, + policy: ConflictPolicy, +) -> Result { + let Some(old_value) = take_key(m, old) else { + return Ok(false); + }; + let new_present = m.contains_key(Value::String(new.to_string())); + match (new_present, policy) { + (false, _) => { + m.insert(Value::String(new.to_string()), old_value); + Ok(true) + } + (true, ConflictPolicy::Error) => { + bail!( + "refusing to rename `{}` -> `{}`: destination key already exists \ + (set both old and new keys at once is ambiguous)", + old, + new + ); + } + (true, ConflictPolicy::PreferNew) => Ok(true), + (true, ConflictPolicy::PreferOld) => { + m.insert(Value::String(new.to_string()), old_value); + Ok(true) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn map_with(pairs: &[(&str, &str)]) -> Mapping { + let mut m = Mapping::new(); + for (k, v) in pairs { + m.insert( + Value::String((*k).to_string()), + Value::String((*v).to_string()), + ); + } + m + } + + #[test] + fn take_key_present_returns_value_and_removes_it() { + let mut m = map_with(&[("a", "1"), ("b", "2")]); + let v = take_key(&mut m, "a").unwrap(); + assert_eq!(v, Value::String("1".to_string())); + assert!(!m.contains_key(Value::String("a".to_string()))); + assert!(m.contains_key(Value::String("b".to_string()))); + } + + #[test] + fn take_key_absent_returns_none() { + let mut m = map_with(&[("a", "1")]); + assert!(take_key(&mut m, "missing").is_none()); + assert!(m.contains_key(Value::String("a".to_string()))); + } + + #[test] + fn insert_no_overwrite_inserts_when_absent() { + let mut m = Mapping::new(); + insert_no_overwrite(&mut m, "k", Value::String("v".into())).unwrap(); + assert_eq!( + m.get(Value::String("k".into())), + Some(&Value::String("v".into())) + ); + } + + #[test] + fn insert_no_overwrite_errors_on_conflict() { + let mut m = map_with(&[("k", "v1")]); + let err = insert_no_overwrite(&mut m, "k", Value::String("v2".into())) + .unwrap_err(); + assert!( + format!("{}", err).contains("refusing to overwrite"), + "unexpected error message: {}", + err + ); + assert_eq!( + m.get(Value::String("k".into())), + Some(&Value::String("v1".into())) + ); + } + + #[test] + fn rename_key_old_absent_is_noop() { + let mut m = map_with(&[("b", "2")]); + let renamed = rename_key(&mut m, "a", "z", ConflictPolicy::Error).unwrap(); + assert!(!renamed); + assert!(!m.contains_key(Value::String("z".into()))); + } + + #[test] + fn rename_key_new_absent_moves_value() { + let mut m = map_with(&[("old", "v")]); + let renamed = rename_key(&mut m, "old", "new", ConflictPolicy::Error).unwrap(); + assert!(renamed); + assert!(!m.contains_key(Value::String("old".into()))); + assert_eq!( + m.get(Value::String("new".into())), + Some(&Value::String("v".into())) + ); + } + + #[test] + fn rename_key_new_present_with_error_policy_fails() { + let mut m = map_with(&[("old", "v_old"), ("new", "v_new")]); + let err = rename_key(&mut m, "old", "new", ConflictPolicy::Error).unwrap_err(); + assert!( + format!("{}", err).contains("destination key already exists"), + "unexpected error message: {}", + err + ); + } + + #[test] + fn rename_key_new_present_with_prefer_new_drops_old() { + let mut m = map_with(&[("old", "v_old"), ("new", "v_new")]); + let renamed = + rename_key(&mut m, "old", "new", ConflictPolicy::PreferNew).unwrap(); + assert!(renamed); + assert!(!m.contains_key(Value::String("old".into()))); + assert_eq!( + m.get(Value::String("new".into())), + Some(&Value::String("v_new".into())) + ); + } + + #[test] + fn rename_key_new_present_with_prefer_old_overwrites() { + let mut m = map_with(&[("old", "v_old"), ("new", "v_new")]); + let renamed = + rename_key(&mut m, "old", "new", ConflictPolicy::PreferOld).unwrap(); + assert!(renamed); + assert!(!m.contains_key(Value::String("old".into()))); + assert_eq!( + m.get(Value::String("new".into())), + Some(&Value::String("v_old".into())) + ); + } +} diff --git a/src/compile/migrations/mod.rs b/src/compile/migrations/mod.rs new file mode 100644 index 00000000..0de2a531 --- /dev/null +++ b/src/compile/migrations/mod.rs @@ -0,0 +1,567 @@ +//! Front-matter migration framework. +//! +//! A version-stamped, append-only chain of transformations that rewrites +//! older source front-matter shapes into the current one before typed +//! deserialization. Modeled on Django/Rails-style schema migrations. +//! +//! See `docs/migrations.md` for the full contributor reference. In +//! short: +//! +//! - Each migration goes from `from_version: N` to `to_version: N + 1`. +//! - Migrations live in `src/compile/migrations/_.rs` and are +//! appended to [`MIGRATIONS`] in strict ascending order. +//! - [`CURRENT_SCHEMA_VERSION`] is `1 + MIGRATIONS.len()`. +//! - Each source `.md` carries a `schema-version: ` field (missing +//! = 1). The compiler bumps it in place when migrations apply. +//! - Migrations operate on the **untyped** `serde_yaml::Mapping` +//! representation so they can rewrite shapes that no longer match the +//! typed `FrontMatter` (renamed/removed fields, +//! `deny_unknown_fields`). +//! +//! The runner is intentionally simple: read current version, validate +//! the registry is contiguous at runtime, and walk the chain. + +use anyhow::{bail, ensure, Context, Result}; +use serde_yaml::{Mapping, Value}; + +mod helpers; +#[allow(unused_imports)] +pub use helpers::{insert_no_overwrite, rename_key, take_key, ConflictPolicy}; + +/// Front-matter key that holds the schema version. Kebab-case to match +/// the rest of the front-matter grammar. +#[allow(dead_code)] +pub const SCHEMA_VERSION_KEY: &str = "schema-version"; + +/// Forward-compatible context passed to every migration. Currently +/// empty; we keep it in the signature so future migrations can be given +/// (e.g.) the source path without breaking the function pointer type. +#[non_exhaustive] +pub struct MigrationContext {} + +/// A single front-matter migration step. +/// +/// Migrations are pure functions that mutate the front-matter mapping +/// in place. They must NOT bump [`SCHEMA_VERSION_KEY`] themselves — the +/// runner does that after each successful step. They must be +/// deterministic and side-effect-free (documentation convention; not +/// enforced by the type system). +pub struct Migration { + /// Source schema version this migration consumes. + pub from_version: u32, + /// Target schema version produced (always `from_version + 1`). + pub to_version: u32, + /// Stable snake_case identifier used in logs, errors, and tests. + pub id: &'static str, + /// Human-facing one-line summary, surfaced in the compile warning. + pub summary: &'static str, + /// Compiler version that introduced this migration (e.g. "0.27.0"). + /// Currently used only for human-readable provenance; not consumed + /// by the runner. + #[allow(dead_code)] + pub introduced_in: &'static str, + /// The transformation. See type-level docs for invariants. + pub apply: fn(&mut Mapping, &MigrationContext) -> Result<()>, +} + +/// The fixed registry of migrations, in strict ascending `from_version` +/// order. Append-only — never reorder, never delete entries. +/// +/// Adding a migration is two edits: +/// +/// 1. Create `src/compile/migrations/_.rs` with a +/// `pub static MIGRATION: Migration` and register the module here. +/// 2. Append `&::MIGRATION` to this slice. +pub static MIGRATIONS: &[&'static Migration] = &[]; + +/// Total number of schema versions known to this compiler. +/// +/// Computed from the registry length so it can never drift. +#[allow(dead_code)] +pub const CURRENT_SCHEMA_VERSION: u32 = 1 + MIGRATIONS.len() as u32; + +/// Result of running the migration chain on a single front-matter +/// mapping. +#[derive(Debug, Clone)] +pub struct MigrationReport { + /// The schema version present in the source before migration. + pub from_version: u32, + /// The schema version after migration (always + /// [`CURRENT_SCHEMA_VERSION`] when the runner returns Ok). + pub to_version: u32, + /// Ordered list of migrations that ran, with id + summary so + /// callers (warnings, logs) don't need to re-query the registry. + pub applied: Vec, +} + +/// Record of a single migration that ran. Carries enough info for +/// warning emission and tests without re-looking up the registry. +#[derive(Debug, Clone)] +pub struct AppliedMigration { + pub id: &'static str, + pub summary: &'static str, +} + +impl MigrationReport { + /// Build a no-migration report for a source already at `version`. + pub fn unchanged(version: u32) -> Self { + Self { + from_version: version, + to_version: version, + applied: Vec::new(), + } + } + + /// Returns true when at least one migration ran. + pub fn changed(&self) -> bool { + !self.applied.is_empty() + } + + /// IDs of migrations that ran, in order. + #[allow(dead_code)] + pub fn applied_ids(&self) -> Vec<&'static str> { + self.applied.iter().map(|a| a.id).collect() + } +} + +/// Read [`SCHEMA_VERSION_KEY`] from the mapping. Missing field → 1. +/// Returns Err on any non-positive-integer value (zero, negative, +/// float, string, sequence, null). +pub fn read_schema_version(fm: &Mapping) -> Result { + let Some(value) = fm.get(Value::String(SCHEMA_VERSION_KEY.to_string())) else { + return Ok(1); + }; + let n = value.as_u64().ok_or_else(|| { + anyhow::anyhow!( + "front matter `schema-version` must be a positive integer (>= 1), got: {}", + describe_yaml_value(value) + ) + })?; + if n == 0 { + bail!( + "front matter `schema-version` must be a positive integer (>= 1), got: 0" + ); + } + if n > u32::MAX as u64 { + bail!( + "front matter `schema-version` must be a positive integer (>= 1), got: {} (exceeds u32::MAX)", + n + ); + } + Ok(n as u32) +} + +/// Set [`SCHEMA_VERSION_KEY`] on the mapping. Inserts at the end if +/// absent, updates in place otherwise. +pub fn set_schema_version(fm: &mut Mapping, version: u32) { + fm.insert( + Value::String(SCHEMA_VERSION_KEY.to_string()), + Value::Number(serde_yaml::Number::from(version as u64)), + ); +} + +/// Apply the registered migration chain to `fm`. +/// +/// Equivalent to [`migrate_front_matter_with`] called with the global +/// [`MIGRATIONS`] registry. +#[allow(dead_code)] +pub fn migrate_front_matter(fm: &mut Mapping) -> Result { + migrate_front_matter_with(fm, MIGRATIONS) +} + +/// Apply an explicit migration chain. Used by tests with a stub +/// registry; production code calls [`migrate_front_matter`]. +pub fn migrate_front_matter_with( + fm: &mut Mapping, + registry: &[&'static Migration], +) -> Result { + let target_version = 1 + registry.len() as u32; + let mut current = read_schema_version(fm)?; + let from_version = current; + + if current > target_version { + bail!( + "source `schema-version` is {}, but this compiler only supports up to {}. Upgrade ado-aw.", + current, + target_version + ); + } + + if current == target_version { + return Ok(MigrationReport::unchanged(current)); + } + + let mut applied: Vec = Vec::new(); + + while current < target_version { + let idx = (current - 1) as usize; + let m = registry.get(idx).ok_or_else(|| { + anyhow::anyhow!( + "migration registry corrupt: expected entry at index {} for from_version={}", + idx, + current + ) + })?; + ensure!( + m.from_version == current && m.to_version == current + 1, + "migration registry corrupt: expected from_version={} at index {}, found from_version={} to_version={}", + current, + idx, + m.from_version, + m.to_version + ); + + let ctx = MigrationContext {}; + (m.apply)(fm, &ctx).with_context(|| { + format!( + "migration {} ({} -> {}) failed", + m.id, m.from_version, m.to_version + ) + })?; + + set_schema_version(fm, m.to_version); + applied.push(AppliedMigration { + id: m.id, + summary: m.summary, + }); + current = m.to_version; + } + + Ok(MigrationReport { + from_version, + to_version: current, + applied, + }) +} + +fn describe_yaml_value(v: &Value) -> String { + match v { + Value::Null => "null".to_string(), + Value::Bool(b) => format!("bool ({})", b), + Value::Number(n) => format!("number ({})", n), + Value::String(s) => format!("string ({:?})", s), + Value::Sequence(_) => "sequence".to_string(), + Value::Mapping(_) => "mapping".to_string(), + Value::Tagged(_) => "tagged".to_string(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // ─── Registry health (operates on the real MIGRATIONS slice) ────────── + + #[test] + fn registry_is_contiguous_and_starts_at_one() { + // Vacuously true with an empty registry; the assertions still + // exercise the loop machinery so this test guards against future + // regressions when migrations are added. + for (i, m) in MIGRATIONS.iter().enumerate() { + assert_eq!( + m.from_version, + (i + 1) as u32, + "migration at index {} has from_version={}; expected {}", + i, + m.from_version, + i + 1 + ); + assert_eq!( + m.to_version, + (i + 2) as u32, + "migration at index {} has to_version={}; expected {}", + i, + m.to_version, + i + 2 + ); + } + } + + #[test] + fn registry_ids_are_unique() { + let mut seen = std::collections::BTreeSet::new(); + for m in MIGRATIONS { + assert!( + seen.insert(m.id), + "duplicate migration id `{}` in MIGRATIONS", + m.id + ); + } + } + + #[test] + fn migration_filenames_match_from_version() { + // Scan src/compile/migrations/*.rs and check that each numeric + // migration file's prefix matches its from_version. + let dir = std::path::Path::new(env!("CARGO_MANIFEST_DIR")) + .join("src/compile/migrations"); + let mut numeric_files: Vec<(u32, String)> = Vec::new(); + for entry in std::fs::read_dir(&dir).expect("read migrations dir") { + let entry = entry.expect("dir entry"); + let path = entry.path(); + if path.extension().and_then(|e| e.to_str()) != Some("rs") { + continue; + } + let stem = path + .file_stem() + .and_then(|s| s.to_str()) + .expect("utf8 stem") + .to_string(); + if stem == "mod" || stem == "helpers" { + continue; + } + // Expect `_` where NNNN is the zero-padded + // from_version. Files that don't match this shape are tests + // or fixtures and should not exist in this directory. + let (prefix, _rest) = stem.split_once('_').unwrap_or_else(|| { + panic!( + "migration file {:?} does not match `_.rs`", + path + ) + }); + let n: u32 = prefix.parse().unwrap_or_else(|_| { + panic!( + "migration file {:?} has non-numeric prefix `{}`", + path, prefix + ) + }); + numeric_files.push((n, stem)); + } + numeric_files.sort(); + assert_eq!( + numeric_files.len(), + MIGRATIONS.len(), + "number of `_.rs` files ({}) does not match \ + MIGRATIONS.len() ({}); files: {:?}", + numeric_files.len(), + MIGRATIONS.len(), + numeric_files + ); + for (i, (n, stem)) in numeric_files.iter().enumerate() { + assert_eq!( + *n, + (i + 1) as u32, + "migration file `{}` has from_version prefix {} but is at index {}; expected {}", + stem, + n, + i, + i + 1 + ); + } + } + + // ─── read_schema_version ────────────────────────────────────────────── + + fn map_with_version(version: Option<&str>) -> Mapping { + let mut m = Mapping::new(); + if let Some(v) = version { + // Insert as raw YAML so we can test integer/string/etc. + let parsed: Value = serde_yaml::from_str(v).unwrap(); + m.insert(Value::String(SCHEMA_VERSION_KEY.to_string()), parsed); + } + m + } + + #[test] + fn read_schema_version_missing_returns_one() { + let m = Mapping::new(); + assert_eq!(read_schema_version(&m).unwrap(), 1); + } + + #[test] + fn read_schema_version_integer_is_returned() { + let m = map_with_version(Some("5")); + assert_eq!(read_schema_version(&m).unwrap(), 5); + } + + #[test] + fn read_schema_version_zero_rejected() { + let m = map_with_version(Some("0")); + let err = read_schema_version(&m).unwrap_err(); + assert!( + format!("{}", err).contains("must be a positive integer"), + "got: {}", + err + ); + } + + #[test] + fn read_schema_version_negative_rejected() { + let m = map_with_version(Some("-1")); + let err = read_schema_version(&m).unwrap_err(); + assert!( + format!("{}", err).contains("must be a positive integer"), + "got: {}", + err + ); + } + + #[test] + fn read_schema_version_string_rejected() { + let m = map_with_version(Some("\"five\"")); + let err = read_schema_version(&m).unwrap_err(); + assert!( + format!("{}", err).contains("must be a positive integer"), + "got: {}", + err + ); + } + + #[test] + fn read_schema_version_float_rejected() { + let m = map_with_version(Some("2.5")); + let err = read_schema_version(&m).unwrap_err(); + assert!( + format!("{}", err).contains("must be a positive integer"), + "got: {}", + err + ); + } + + #[test] + fn read_schema_version_null_rejected() { + let m = map_with_version(Some("null")); + let err = read_schema_version(&m).unwrap_err(); + assert!( + format!("{}", err).contains("must be a positive integer"), + "got: {}", + err + ); + } + + // ─── set_schema_version ─────────────────────────────────────────────── + + #[test] + fn set_schema_version_inserts_when_absent() { + let mut m = Mapping::new(); + set_schema_version(&mut m, 3); + assert_eq!(read_schema_version(&m).unwrap(), 3); + } + + #[test] + fn set_schema_version_updates_in_place() { + let mut m = map_with_version(Some("1")); + set_schema_version(&mut m, 4); + assert_eq!(read_schema_version(&m).unwrap(), 4); + } + + // ─── migrate_front_matter_with ──────────────────────────────────────── + + #[test] + fn migrate_empty_registry_is_noop_for_v1() { + let mut m: Mapping = serde_yaml::from_str("name: x\n").unwrap(); + let report = migrate_front_matter_with(&mut m, &[]).unwrap(); + assert!(!report.changed()); + assert_eq!(report.from_version, 1); + assert_eq!(report.to_version, 1); + } + + #[test] + fn migrate_already_current_is_noop() { + // Empty registry → CURRENT == 1. Source explicitly stamped 1. + let mut m: Mapping = + serde_yaml::from_str("schema-version: 1\nname: x\n").unwrap(); + let report = migrate_front_matter_with(&mut m, &[]).unwrap(); + assert!(!report.changed()); + } + + #[test] + fn migrate_future_version_rejected() { + // Empty registry → CURRENT == 1. Source claims version 2. + let mut m: Mapping = + serde_yaml::from_str("schema-version: 2\nname: x\n").unwrap(); + let err = migrate_front_matter_with(&mut m, &[]).unwrap_err(); + assert!( + format!("{}", err).contains("only supports up to"), + "got: {}", + err + ); + } + + fn noop_apply(_fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { + Ok(()) + } + + static TEST_MIG_1_TO_2: Migration = Migration { + from_version: 1, + to_version: 2, + id: "test_v1_to_v2", + summary: "test migration", + introduced_in: "test", + apply: noop_apply, + }; + + static TEST_MIG_2_TO_3: Migration = Migration { + from_version: 2, + to_version: 3, + id: "test_v2_to_v3", + summary: "test migration", + introduced_in: "test", + apply: rename_a_to_b, + }; + + fn rename_a_to_b(fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { + rename_key(fm, "a", "b", ConflictPolicy::Error)?; + Ok(()) + } + + #[test] + fn migrate_with_test_registry_chains_migrations() { + let registry: &[&'static Migration] = + &[&TEST_MIG_1_TO_2, &TEST_MIG_2_TO_3]; + let mut m: Mapping = serde_yaml::from_str("a: 1\n").unwrap(); + let report = migrate_front_matter_with(&mut m, registry).unwrap(); + assert!(report.changed()); + assert_eq!(report.from_version, 1); + assert_eq!(report.to_version, 3); + assert_eq!(report.applied_ids(), vec!["test_v1_to_v2", "test_v2_to_v3"]); + assert_eq!(read_schema_version(&m).unwrap(), 3); + // The rename in the second migration moved a → b. + assert!(!m.contains_key(Value::String("a".into()))); + assert!(m.contains_key(Value::String("b".into()))); + } + + #[test] + fn migrate_starting_partway_through_chain() { + let registry: &[&'static Migration] = + &[&TEST_MIG_1_TO_2, &TEST_MIG_2_TO_3]; + // Source is already at v2, only the second migration should run. + let mut m: Mapping = + serde_yaml::from_str("schema-version: 2\na: 1\n").unwrap(); + let report = migrate_front_matter_with(&mut m, registry).unwrap(); + assert_eq!(report.from_version, 2); + assert_eq!(report.to_version, 3); + assert_eq!(report.applied_ids(), vec!["test_v2_to_v3"]); + } + + fn failing_apply(_fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { + bail!("intentional test failure"); + } + + static TEST_MIG_FAIL: Migration = Migration { + from_version: 1, + to_version: 2, + id: "test_fail", + summary: "always fails", + introduced_in: "test", + apply: failing_apply, + }; + + #[test] + fn migrate_aborts_on_step_failure_with_context() { + let registry: &[&'static Migration] = &[&TEST_MIG_FAIL]; + let mut m: Mapping = serde_yaml::from_str("a: 1\n").unwrap(); + let err = migrate_front_matter_with(&mut m, registry).unwrap_err(); + let chain = format!("{:#}", err); + assert!( + chain.contains("test_fail (1 -> 2) failed"), + "expected migration context, got: {}", + chain + ); + assert!( + chain.contains("intentional test failure"), + "expected inner error, got: {}", + chain + ); + } +} diff --git a/src/compile/mod.rs b/src/compile/mod.rs index a6a1ea33..16dc71c1 100644 --- a/src/compile/mod.rs +++ b/src/compile/mod.rs @@ -11,6 +11,9 @@ pub(crate) use common::resolve_repos; pub mod extensions; pub(crate) mod filter_ir; mod gitattributes; +#[cfg(test)] +mod migration_integration_test; +pub(crate) mod migrations; mod onees; pub(crate) mod pr_filters; mod standalone; @@ -21,7 +24,10 @@ use async_trait::async_trait; use log::{debug, info}; use std::path::{Path, PathBuf}; +#[allow(unused_imports)] pub use common::parse_markdown; +#[allow(unused_imports)] +pub use common::{atomic_write, parse_markdown_detailed, reconstruct_source, ParsedSource}; pub use common::HEADER_MARKER; pub use common::ADO_MCP_ENTRYPOINT; pub use common::ADO_MCP_IMAGE; @@ -58,7 +64,40 @@ pub async fn compile_pipeline( skip_integrity: bool, debug_pipeline: bool, ) -> Result<()> { - compile_pipeline_inner(input_path, output_path, skip_integrity, debug_pipeline, true).await + compile_pipeline_inner( + input_path, + output_path, + skip_integrity, + debug_pipeline, + true, + migrations::MIGRATIONS, + ) + .await + .map(|_migrated| ()) +} + +/// Test-only entry point that injects a custom migration registry. +/// +/// Production code goes through [`compile_pipeline`]. Tests use this +/// to drive end-to-end migration scenarios without polluting the real +/// [`migrations::MIGRATIONS`] slice. +#[cfg(test)] +async fn compile_pipeline_with_registry( + input_path: &str, + output_path: Option<&str>, + skip_integrity: bool, + debug_pipeline: bool, + registry: &[&'static migrations::Migration], +) -> Result { + compile_pipeline_inner( + input_path, + output_path, + skip_integrity, + debug_pipeline, + false, + registry, + ) + .await } /// Internal compile entry point that lets the caller opt out of the @@ -66,13 +105,17 @@ pub async fn compile_pipeline( /// (`compile_all_pipelines`) skip the per-pipeline sync to avoid an /// O(N²)-ish series of full-tree scans and instead perform a single sync /// after the whole batch completes. +/// +/// Returns whether the source `.md` was rewritten by the migration +/// pass. Batch callers use this to surface an aggregate count. async fn compile_pipeline_inner( input_path: &str, output_path: Option<&str>, skip_integrity: bool, debug_pipeline: bool, sync_gitattributes: bool, -) -> Result<()> { + registry: &[&'static migrations::Migration], +) -> Result { let input_path = Path::new(input_path); info!("Compiling pipeline from: {}", input_path.display()); @@ -83,11 +126,22 @@ async fn compile_pipeline_inner( .with_context(|| format!("Failed to read input file: {}", input_path.display()))?; debug!("Input file size: {} bytes", content.len()); - let (mut front_matter, markdown_body) = parse_markdown(&content)?; + let parsed = common::parse_markdown_detailed_with_registry(&content, registry)?; + let mut front_matter = parsed.front_matter; + let markdown_body = parsed.markdown_body.clone(); + let migrations = parsed.migrations.clone(); + let front_matter_mapping = parsed.front_matter_mapping.clone(); + let body_raw = parsed.body_raw.clone(); + let source_sha256 = parsed.source_sha256; // Sanitize all front matter text fields before any further processing. // This neutralizes pipeline command injection (##vso[), strips control // characters, and enforces content limits across all config values. + // + // Note: sanitization mutates the typed FrontMatter in memory but does + // NOT touch the `front_matter_mapping` we keep around for source + // rewrite. That is intentional — we don't want to silently rewrite + // user source bytes via sanitize on disk. use crate::sanitize::SanitizeConfig; front_matter.sanitize_config_fields(); @@ -140,7 +194,8 @@ async fn compile_pipeline_inner( info!("Using {} compiler", compiler.target_name()); - // Compile + // Compile (no source mutation yet — a failure here must leave the + // source byte-identical). let pipeline_yaml = compiler .compile(input_path, &yaml_output_path, &front_matter, &markdown_body, skip_integrity, debug_pipeline) .await?; @@ -148,8 +203,34 @@ async fn compile_pipeline_inner( // Clean up spacing artifacts from empty placeholder replacements let pipeline_yaml = clean_generated_yaml(&pipeline_yaml); - // Write output - tokio::fs::write(&yaml_output_path, &pipeline_yaml) + // Source rewrite: only after a fully-successful compile. + // + // We perform this BEFORE writing the .lock.yml so that a partial + // state (rewritten source + stale lock file, or rewritten lock file + // pointing at unmigrated source) cannot escape: if rewrite fails, + // we abort before the lock file ever gets touched. + let mut migrated = false; + if migrations.changed() { + // Rebuild the parsed handle (the typed front_matter has been + // sanitized and consumed; rewrite needs the unmodified mapping). + let parsed_for_rewrite = common::ParsedSource { + front_matter: serde_yaml::from_value(serde_yaml::Value::Mapping( + front_matter_mapping.clone(), + )) + .context("Failed to round-trip migrated front matter for rewrite")?, + markdown_body: markdown_body.clone(), + migrations: migrations.clone(), + front_matter_mapping: front_matter_mapping.clone(), + body_raw: body_raw.clone(), + source_sha256, + }; + migrated = perform_source_rewrite_if_needed(input_path, &content, &parsed_for_rewrite) + .await?; + } + + // Write output via atomic_write so a crash mid-write cannot leave a + // half-written .lock.yml on disk. + common::atomic_write(&yaml_output_path, &pipeline_yaml) .await .with_context(|| { format!( @@ -175,7 +256,71 @@ async fn compile_pipeline_inner( } } - Ok(()) + Ok(migrated) +} + +/// Reconstruct the migrated source, run the lost-update guard, and +/// atomically rewrite the source `.md` if the content actually +/// changed. Returns whether a write happened. +/// +/// On success, emits the documented stderr warning so users always see +/// when their source was migrated. This warning is *not* gated by +/// `--verbose`/`--debug`. +async fn perform_source_rewrite_if_needed( + input_path: &Path, + original_content: &str, + parsed: &common::ParsedSource, +) -> Result { + let new_content = common::reconstruct_source(parsed)?; + if new_content == original_content { + // Migrations ran but their net effect is a no-op on disk — + // skip the rewrite to avoid gratuitous diffs. + return Ok(false); + } + + // Lost-update guard: re-read the source and compare its hash to + // the snapshot we took when parsing. If anyone else mutated the + // file in the meantime, refuse to clobber their changes. + use sha2::Digest; + let current_bytes = tokio::fs::read(input_path).await.with_context(|| { + format!( + "Failed to re-read source for migration safety check: {}", + input_path.display() + ) + })?; + let mut hasher = sha2::Sha256::new(); + hasher.update(¤t_bytes); + let current_hash: [u8; 32] = hasher.finalize().into(); + if current_hash != parsed.source_sha256 { + anyhow::bail!( + "source file {} changed during compilation; refusing to migrate. Re-run `ado-aw compile`.", + input_path.display() + ); + } + + common::atomic_write(input_path, &new_content) + .await + .with_context(|| { + format!( + "Failed to atomically rewrite migrated source: {}", + input_path.display() + ) + })?; + + eprintln!( + "warning: migrated front matter in {}: schema-version {} -> {}", + input_path.display(), + parsed.migrations.from_version, + parsed.migrations.to_version + ); + for applied in &parsed.migrations.applied { + eprintln!(" - {}: {}", applied.id, applied.summary); + } + eprintln!( + "note: front-matter comments are not preserved when migrations rewrite the source." + ); + + Ok(true) } /// Locate the repo root containing `output_path`, scan it for all compiled @@ -227,6 +372,7 @@ pub async fn compile_all_pipelines(skip_integrity: bool, debug_pipeline: bool) - let mut success_count = 0; let mut skip_count = 0; let mut fail_count = 0; + let mut migrated_count = 0; for pipeline in &detected { let source_path = root.join(&pipeline.source); @@ -245,8 +391,13 @@ pub async fn compile_all_pipelines(skip_integrity: bool, debug_pipeline: bool) - let source_str = source_path.to_string_lossy(); let output_str = yaml_output_path.to_string_lossy(); - match compile_pipeline_inner(&source_str, Some(&output_str), skip_integrity, debug_pipeline, false).await { - Ok(()) => success_count += 1, + match compile_pipeline_inner(&source_str, Some(&output_str), skip_integrity, debug_pipeline, false, migrations::MIGRATIONS).await { + Ok(migrated) => { + success_count += 1; + if migrated { + migrated_count += 1; + } + } Err(e) => { eprintln!( " Error compiling '{}': {:#}", @@ -269,10 +420,17 @@ pub async fn compile_all_pipelines(skip_integrity: bool, debug_pipeline: bool) - } println!(); - println!( - "Done: {} compiled, {} skipped, {} failed.", - success_count, skip_count, fail_count - ); + if migrated_count > 0 { + println!( + "Done: {} compiled, {} skipped, {} failed; {} source file(s) migrated.", + success_count, skip_count, fail_count, migrated_count + ); + } else { + println!( + "Done: {} compiled, {} skipped, {} failed.", + success_count, skip_count, fail_count + ); + } if fail_count > 0 { anyhow::bail!("{} pipeline(s) failed to compile", fail_count); @@ -355,7 +513,29 @@ pub async fn check_pipeline(pipeline_path: &str) -> Result<()> { ) })?; - let (mut front_matter, markdown_body) = parse_markdown(&content)?; + let parsed = parse_markdown_detailed(&content)?; + + // Pending-migration enforcement: `check` MUST NOT silently let + // a stale source pass. The runtime integrity check inside + // compiled pipelines uses the same ado-aw version that produced + // the pipeline (see src/data/base.yml / 1es-base.yml — they + // download `ado-aw v${COMPILER_VERSION}`), so a check failure + // here only happens locally / in CI when a developer runs a + // newer ado-aw against an older source. That is exactly when + // we want to fail loudly so `ado-aw compile` is run. + if parsed.migrations.changed() { + anyhow::bail!( + "error: {} is at schema-version {}; this compiler requires schema-version {}.\n\ + hint: run `ado-aw compile {}` to migrate the source in place.", + source_path.display(), + parsed.migrations.from_version, + parsed.migrations.to_version, + source_path.display(), + ); + } + + let mut front_matter = parsed.front_matter; + let markdown_body = parsed.markdown_body; use crate::sanitize::SanitizeConfig; front_matter.sanitize_config_fields(); diff --git a/src/compile/types.rs b/src/compile/types.rs index 66594225..f68bbc9d 100644 --- a/src/compile/types.rs +++ b/src/compile/types.rs @@ -7,6 +7,15 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; use crate::sanitize::SanitizeConfig as SanitizeConfigTrait; +/// Default `schema-version` used when the field is absent from the +/// front matter. Defined here so serde's `#[serde(default = "...")]` +/// can refer to it. Note: source files that simply omit the field +/// continue to behave as schema-version 1; this default is not the +/// "current" schema version. +fn default_schema_version() -> u32 { + 1 +} + /// Target platform for compiled pipeline #[derive(Debug, Deserialize, Clone, Default, PartialEq)] #[serde(rename_all = "lowercase")] @@ -580,6 +589,13 @@ pub struct PipelineParameter { #[derive(Debug, Deserialize)] #[serde(deny_unknown_fields)] pub struct FrontMatter { + /// Schema version of the front-matter grammar. Missing field defaults + /// to 1. The compiler bumps this in place when migrations apply + /// during compilation; users typically don't write it by hand. + /// See [`crate::compile::migrations`] for the migration framework. + #[serde(default = "default_schema_version", rename = "schema-version")] + #[allow(dead_code)] + pub schema_version: u32, /// Agent name (required) pub name: String, /// One-line description (required) @@ -1243,6 +1259,36 @@ impl SanitizeConfigTrait for LabelFilter { mod tests { use super::*; + // ─── schema_version field ─────────────────────────────────────────────── + + #[test] + fn schema_version_defaults_to_one_when_absent() { + let yaml = "name: x\ndescription: y\n"; + let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); + assert_eq!(fm.schema_version, 1); + } + + #[test] + fn schema_version_accepts_explicit_integer() { + let yaml = "schema-version: 5\nname: x\ndescription: y\n"; + let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); + assert_eq!(fm.schema_version, 5); + } + + #[test] + fn schema_version_string_value_fails_typed_deserialize() { + // The migration runner is the first line of defense (rejects with a + // friendly message before typed deserialization). This test is a + // defense-in-depth check that the typed FrontMatter would also + // reject a non-integer `schema-version`. + let yaml = "schema-version: notanumber\nname: x\ndescription: y\n"; + let result: Result = serde_yaml::from_str(yaml); + assert!( + result.is_err(), + "expected non-integer schema-version to fail typed deserialize" + ); + } + // ─── PoolConfig deserialization ────────────────────────────────────────── #[test] diff --git a/src/main.rs b/src/main.rs index a6f48184..23618619 100644 --- a/src/main.rs +++ b/src/main.rs @@ -224,7 +224,11 @@ async fn run_execute( ado_project: Option, dry_run: bool, ) -> Result<()> { - // Read and parse source markdown to get tool configs + // Read and parse source markdown to get tool configs. + // Use parse_markdown_detailed so Stage 3 benefits from in-memory + // migration when a source is mid-migration. Stage 3 must NOT + // rewrite the source file (the executor's working tree is not the + // source-of-truth tree), so we just emit a log warning. let content = tokio::fs::read_to_string(&source) .await .with_context(|| format!("Failed to read source file: {}", source.display()))?; diff --git a/tests/migration_tests.rs b/tests/migration_tests.rs new file mode 100644 index 00000000..75d4f2e9 --- /dev/null +++ b/tests/migration_tests.rs @@ -0,0 +1,382 @@ +//! Integration tests for the front-matter migration framework. +//! +//! These tests spawn the compiled `ado-aw` binary as a subprocess +//! (matching the pattern used in `tests/compiler_tests.rs`) and assert +//! on the user-visible behavior of `compile` and `check` for sources +//! with various `schema-version` shapes. +//! +//! The migration registry shipped with this binary is intentionally +//! empty (`CURRENT_SCHEMA_VERSION == 1`); the rewrite path is exercised +//! by the white-box tests in `src/compile/migrations/integration_test.rs`, +//! which can inject a stub registry. These tests cover the user-facing +//! CLI behaviors that don't require migration registration: +//! +//! - Future-version sources are rejected with a clear message. +//! - Non-integer / zero / negative `schema-version` is rejected. +//! - Healthy v1 sources (no `schema-version` field, or explicit `1`) +//! compile and `check` cleanly without rewriting the source. +//! - The full `compile` -> `check` round-trip succeeds. + +use std::fs; +use std::path::PathBuf; +use std::process::Command; + +/// Set up a unique temp directory for each test run. +fn fresh_temp_dir(label: &str) -> PathBuf { + let dir = std::env::temp_dir().join(format!( + "ado-aw-migration-tests-{}-{}-{}", + label, + std::process::id(), + // Add a thread-local counter to avoid intra-process collisions + // when multiple tests run in parallel. + rand_suffix(), + )); + fs::create_dir_all(&dir).expect("create temp dir"); + dir +} + +/// Same as [`fresh_temp_dir`] but also creates an empty `.git/` +/// directory at the root so `ado-aw check` (which walks up to the +/// repo root) can resolve a source path from the compiled lock file's +/// `@ado-aw` header. +fn fresh_git_temp_dir(label: &str) -> PathBuf { + let dir = fresh_temp_dir(label); + fs::create_dir(dir.join(".git")).expect("create .git dir"); + dir +} + +/// Lightweight randomness for test temp dir uniqueness — no crate dep. +fn rand_suffix() -> String { + use std::time::{SystemTime, UNIX_EPOCH}; + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0); + format!("{:x}", nanos) +} + +fn ado_aw_binary() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_ado-aw")) +} + +/// Run `ado-aw compile `, returning the captured output. +fn run_compile(source: &PathBuf) -> std::process::Output { + Command::new(ado_aw_binary()) + .args(["compile", source.to_str().unwrap()]) + .output() + .expect("Failed to run ado-aw compile") +} + +/// Run `ado-aw check `, returning the captured output. +fn run_check(pipeline: &PathBuf) -> std::process::Output { + Command::new(ado_aw_binary()) + .args(["check", pipeline.to_str().unwrap()]) + .output() + .expect("Failed to run ado-aw check") +} + +/// Write a source file to `dir/agent.md` and return its path. +fn write_source(dir: &PathBuf, content: &str) -> PathBuf { + let path = dir.join("agent.md"); + fs::write(&path, content).expect("write source"); + path +} + +// ─── Future-version rejection ────────────────────────────────────────────── + +#[test] +fn compile_rejects_future_schema_version() { + let dir = fresh_temp_dir("future-version"); + let source = write_source( + &dir, + "---\nschema-version: 99\nname: x\ndescription: y\n---\nbody\n", + ); + + let output = run_compile(&source); + + assert!( + !output.status.success(), + "compile should fail on future schema-version: stdout={:?} stderr={:?}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("only supports up to"), + "stderr should mention compiler's supported schema-version range, got: {}", + stderr + ); + assert!( + stderr.contains("99"), + "stderr should mention the offending version 99, got: {}", + stderr + ); + + let _ = fs::remove_dir_all(&dir); +} + +// ─── Invalid schema-version values ───────────────────────────────────────── + +#[test] +fn compile_rejects_zero_schema_version() { + let dir = fresh_temp_dir("zero-version"); + let source = write_source( + &dir, + "---\nschema-version: 0\nname: x\ndescription: y\n---\nbody\n", + ); + + let output = run_compile(&source); + + assert!( + !output.status.success(), + "compile should fail on zero schema-version" + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("must be a positive integer"), + "stderr should reject zero with `must be a positive integer`, got: {}", + stderr + ); + + let _ = fs::remove_dir_all(&dir); +} + +#[test] +fn compile_rejects_negative_schema_version() { + let dir = fresh_temp_dir("negative-version"); + let source = write_source( + &dir, + "---\nschema-version: -1\nname: x\ndescription: y\n---\nbody\n", + ); + + let output = run_compile(&source); + + assert!( + !output.status.success(), + "compile should fail on negative schema-version" + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("must be a positive integer"), + "stderr should reject negative with `must be a positive integer`, got: {}", + stderr + ); + + let _ = fs::remove_dir_all(&dir); +} + +#[test] +fn compile_rejects_non_integer_schema_version() { + let dir = fresh_temp_dir("non-integer-version"); + let source = write_source( + &dir, + "---\nschema-version: \"not-a-number\"\nname: x\ndescription: y\n---\nbody\n", + ); + + let output = run_compile(&source); + + assert!( + !output.status.success(), + "compile should fail on non-integer schema-version" + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("must be a positive integer"), + "stderr should reject non-integer with `must be a positive integer`, got: {}", + stderr + ); + + let _ = fs::remove_dir_all(&dir); +} + +#[test] +fn compile_rejects_float_schema_version() { + let dir = fresh_temp_dir("float-version"); + let source = write_source( + &dir, + "---\nschema-version: 1.5\nname: x\ndescription: y\n---\nbody\n", + ); + + let output = run_compile(&source); + + assert!( + !output.status.success(), + "compile should fail on float schema-version" + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("must be a positive integer"), + "stderr should reject float with `must be a positive integer`, got: {}", + stderr + ); + + let _ = fs::remove_dir_all(&dir); +} + +// ─── Healthy v1 round-trip ───────────────────────────────────────────────── + +#[test] +fn compile_succeeds_on_unstamped_v1_source() { + let dir = fresh_temp_dir("unstamped-v1"); + let original = "---\nname: smoketest\ndescription: smoketest description\n---\n## Body\n\nHello.\n"; + let source = write_source(&dir, original); + + let output = run_compile(&source); + + assert!( + output.status.success(), + "compile should succeed on healthy unstamped source: stdout={:?} stderr={:?}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + + // Lock file must be generated. + let lock = source.with_extension("lock.yml"); + assert!( + lock.exists(), + "expected compiled YAML at {}", + lock.display() + ); + + // With CURRENT_SCHEMA_VERSION == 1 and no migrations registered, + // the source must NOT be rewritten — verify byte-identity. + let after = fs::read_to_string(&source).expect("re-read source"); + assert_eq!( + after, original, + "source must be byte-identical after compile when no migrations apply" + ); + + // Stderr should NOT contain a migration warning. + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + !stderr.contains("warning: migrated front matter"), + "no migration warning expected, got stderr: {}", + stderr + ); + + let _ = fs::remove_dir_all(&dir); +} + +#[test] +fn compile_succeeds_on_explicitly_stamped_v1_source() { + let dir = fresh_temp_dir("stamped-v1"); + let original = "---\nschema-version: 1\nname: x\ndescription: y\n---\n## Body\n"; + let source = write_source(&dir, original); + + let output = run_compile(&source); + + assert!( + output.status.success(), + "compile should succeed on explicitly stamped v1 source: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let after = fs::read_to_string(&source).expect("re-read source"); + assert_eq!( + after, original, + "explicitly stamped v1 source must be byte-identical" + ); + + let _ = fs::remove_dir_all(&dir); +} + +#[test] +fn compile_then_check_round_trip_passes() { + let dir = fresh_git_temp_dir("round-trip"); + let source = write_source( + &dir, + "---\nname: round-trip-agent\ndescription: round-trip\n---\n## Body\n", + ); + + let compile_output = run_compile(&source); + assert!( + compile_output.status.success(), + "compile should succeed: {}", + String::from_utf8_lossy(&compile_output.stderr) + ); + + let lock = source.with_extension("lock.yml"); + assert!(lock.exists(), "expected lock file at {}", lock.display()); + + // `check` reads the source path from the lock file's @ado-aw header. + // The header records an absolute or relative path from the compile + // invocation; since we passed an absolute path, that's what we get. + let check_output = run_check(&lock); + assert!( + check_output.status.success(), + "check should succeed: stdout={:?} stderr={:?}", + String::from_utf8_lossy(&check_output.stdout), + String::from_utf8_lossy(&check_output.stderr) + ); + + let _ = fs::remove_dir_all(&dir); +} + +// ─── check rejects future-version sources ────────────────────────────────── + +#[test] +fn check_rejects_future_schema_version() { + // Setup: compile a healthy source so we have a valid lock file with + // a header pointing at our source path. Then mutate the source to + // claim a future schema version, and confirm `check` fails. + let dir = fresh_git_temp_dir("check-future"); + let source = write_source( + &dir, + "---\nname: x\ndescription: y\n---\n## Body\n", + ); + + let compile_output = run_compile(&source); + assert!( + compile_output.status.success(), + "initial compile should succeed: {}", + String::from_utf8_lossy(&compile_output.stderr) + ); + + // Mutate the source to claim a future version. Note: this mutation + // would also fail the lock-file integrity check, but the migration + // runner runs first so we observe the schema-version error. + fs::write( + &source, + "---\nschema-version: 99\nname: x\ndescription: y\n---\n## Body\n", + ) + .expect("rewrite source"); + + let lock = source.with_extension("lock.yml"); + let check_output = run_check(&lock); + assert!( + !check_output.status.success(), + "check should fail when source is at a future schema-version" + ); + let stderr = String::from_utf8_lossy(&check_output.stderr); + assert!( + stderr.contains("only supports up to") || stderr.contains("Failed to migrate"), + "stderr should report the future-version error, got: {}", + stderr + ); + + let _ = fs::remove_dir_all(&dir); +} + +// ─── Non-mapping front matter ────────────────────────────────────────────── + +#[test] +fn compile_rejects_non_mapping_top_level_yaml() { + let dir = fresh_temp_dir("non-mapping"); + let source = write_source(&dir, "---\n- a\n- b\n---\nbody\n"); + + let output = run_compile(&source); + + assert!( + !output.status.success(), + "compile should fail when front matter is a sequence not a mapping" + ); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("must be a mapping"), + "stderr should report non-mapping error, got: {}", + stderr + ); + + let _ = fs::remove_dir_all(&dir); +} From c5638221c6d17914d1f8ceefcdf6544ebd9a74b4 Mon Sep 17 00:00:00 2001 From: James Devine Date: Fri, 8 May 2026 22:03:51 +0100 Subject: [PATCH 04/12] fix(compile): use checked u32 arithmetic in migration runner Address code review finding: the runner did `1 + registry.len() as u32` and `m.to_version == current + 1` without overflow checks. With realistic registries this is unreachable, but rust panics on overflow in debug mode and wraps in release. Switch to checked_add so we surface a clear error either way; preserves existing behavior on all realistic inputs. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com> --- src/compile/migrations/mod.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/compile/migrations/mod.rs b/src/compile/migrations/mod.rs index 0de2a531..0f8d3e5b 100644 --- a/src/compile/migrations/mod.rs +++ b/src/compile/migrations/mod.rs @@ -175,7 +175,17 @@ pub fn migrate_front_matter_with( fm: &mut Mapping, registry: &[&'static Migration], ) -> Result { - let target_version = 1 + registry.len() as u32; + // Use checked arithmetic so we surface a clear error rather than + // panic-or-wrap if a registry ever grows past u32::MAX. Realistic + // registries are tiny — this is a "no panic in library code" guard. + let registry_len: u32 = registry + .len() + .try_into() + .ok() + .context("migration registry has more than u32::MAX entries")?; + let target_version = 1u32 + .checked_add(registry_len) + .context("migration registry too large: target_version would overflow u32")?; let mut current = read_schema_version(fm)?; let from_version = current; @@ -202,8 +212,11 @@ pub fn migrate_front_matter_with( current ) })?; + let next_version = current + .checked_add(1) + .context("migration version overflow: current + 1 exceeds u32::MAX")?; ensure!( - m.from_version == current && m.to_version == current + 1, + m.from_version == current && m.to_version == next_version, "migration registry corrupt: expected from_version={} at index {}, found from_version={} to_version={}", current, idx, From 3804230d01b1213c008b5e37bb3657a28acc445a Mon Sep 17 00:00:00 2001 From: James Devine Date: Fri, 8 May 2026 22:14:13 +0100 Subject: [PATCH 05/12] fix(compile): address PR review feedback on migration framework Four findings from the Rust PR Reviewer bot: 1. check_pipeline's bail message had a redundant "error:" prefix that produced "Error: error: ..." once anyhow's top-level handler added its own "Error:" wrapper. Drop the prefix and reformat the hint onto an indented continuation line. 2. compile_pipeline_inner did a redundant serde_yaml::from_value round-trip just to satisfy ParsedSource.front_matter, even though perform_source_rewrite_if_needed never reads that field. Refactor the helper to take the four primitive fields it actually uses (mapping, body_raw, source_sha256, migrations) instead of the full struct. reconstruct_source likewise takes individual fields now. 3. Misleading comment in tests/migration_tests.rs claimed a "thread-local counter" was used; only a nanosecond timestamp was. Replaced rand_suffix() with timestamp + AtomicU64 monotonic seq so parallel tests scheduled in the same nanosecond always get distinct dirs. 4. Leading whitespace before the opening `---` was silently stripped on migration rewrite. parse_markdown_detailed already tolerates it on read; capture it as a new ParsedSource.leading_whitespace field and emit it from reconstruct_source so byte-faithful preservation extends to whitespace prefixes (BOM-stripped editor blank lines, etc.). All 1369 tests still pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com> --- src/compile/common.rs | 66 +++++++++++++++++++---- src/compile/migration_integration_test.rs | 12 ++++- src/compile/mod.rs | 53 +++++++++--------- tests/migration_tests.rs | 12 +++-- 4 files changed, 102 insertions(+), 41 deletions(-) diff --git a/src/compile/common.rs b/src/compile/common.rs index eb345de5..4807fe54 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -102,6 +102,10 @@ pub struct ParsedSource { /// The migrated front-matter mapping, with `schema-version` /// bumped. Used to reconstruct the source for rewrite. pub front_matter_mapping: serde_yaml::Mapping, + /// Whitespace bytes that appeared before the opening `---` fence, + /// preserved verbatim so that source rewrite is byte-faithful. + /// Empty in the typical case where the file starts with `---`. + pub leading_whitespace: String, /// The body region exactly as it appeared after the closing `---`, /// byte-for-byte (no trim). Includes any leading newline. pub body_raw: String, @@ -138,8 +142,12 @@ pub(crate) fn parse_markdown_detailed_with_registry( // Allow leading whitespace before the opening fence (preserves // historical leniency). We compute a byte offset into `content` so - // that `body_raw` extraction is purely byte-faithful. + // that `body_raw` extraction is purely byte-faithful, and we keep + // the whitespace prefix around so that source rewrites preserve + // anything the user (or their editor) put before the opening + // fence. let leading_ws = content.bytes().take_while(|b| b.is_ascii_whitespace()).count(); + let leading_whitespace = content[..leading_ws].to_string(); let after_lead = &content[leading_ws..]; if !after_lead.starts_with("---") { anyhow::bail!("Markdown file must start with YAML front matter (---)"); @@ -184,23 +192,37 @@ pub(crate) fn parse_markdown_detailed_with_registry( markdown_body, migrations: report, front_matter_mapping: mapping, + leading_whitespace, body_raw, source_sha256, }) } -/// Reconstruct full source content from a migrated [`ParsedSource`]. +/// Reconstruct full source content from migration outputs. +/// +/// Takes the individual fragments rather than the full +/// [`ParsedSource`] so callers that have already destructured the +/// parse don't have to round-trip a fresh `front_matter` through +/// serde just to satisfy the typed field. /// /// Output shape: +/// - `leading_whitespace` (typically empty) /// - `---\n` /// - the migrated YAML mapping (`serde_yaml::to_string` always ends /// with `\n`) /// - `---` /// - the original body region byte-for-byte (`body_raw`) -pub fn reconstruct_source(parsed: &ParsedSource) -> Result { - let yaml_serialized = serde_yaml::to_string(&parsed.front_matter_mapping) +pub fn reconstruct_source( + leading_whitespace: &str, + front_matter_mapping: &serde_yaml::Mapping, + body_raw: &str, +) -> Result { + let yaml_serialized = serde_yaml::to_string(front_matter_mapping) .context("Failed to serialize migrated front matter")?; - Ok(format!("---\n{}---{}", yaml_serialized, parsed.body_raw)) + Ok(format!( + "{}---\n{}---{}", + leading_whitespace, yaml_serialized, body_raw + )) } fn yaml_value_kind(v: &serde_yaml::Value) -> &'static str { @@ -2745,13 +2767,22 @@ mod tests { // ─── parse_markdown_detailed ────────────────────────────────────────────── + fn reconstruct(parsed: &ParsedSource) -> String { + reconstruct_source( + &parsed.leading_whitespace, + &parsed.front_matter_mapping, + &parsed.body_raw, + ) + .unwrap() + } + #[test] fn parse_markdown_detailed_preserves_body_byte_for_byte() { // Case 1: trailing newline. let original = "---\nname: x\ndescription: y\n---\nbody line\n"; let parsed = parse_markdown_detailed(original).unwrap(); assert_eq!(parsed.body_raw, "\nbody line\n"); - let reconstructed = reconstruct_source(&parsed).unwrap(); + let reconstructed = reconstruct(&parsed); // No migrations ran, so the YAML round-trip is the only // structural change. The body region is byte-faithful. assert!(reconstructed.ends_with("---\nbody line\n")); @@ -2760,14 +2791,14 @@ mod tests { let original = "---\nname: x\ndescription: y\n---\nbody"; let parsed = parse_markdown_detailed(original).unwrap(); assert_eq!(parsed.body_raw, "\nbody"); - let reconstructed = reconstruct_source(&parsed).unwrap(); + let reconstructed = reconstruct(&parsed); assert!(reconstructed.ends_with("---\nbody")); // Case 3: empty body, but trailing newline. let original = "---\nname: x\ndescription: y\n---\n"; let parsed = parse_markdown_detailed(original).unwrap(); assert_eq!(parsed.body_raw, "\n"); - let reconstructed = reconstruct_source(&parsed).unwrap(); + let reconstructed = reconstruct(&parsed); assert!(reconstructed.ends_with("---\n")); // Case 4: blank lines between closing fence and content are @@ -2786,13 +2817,30 @@ mod tests { let original = "---\nname: x\ndescription: y\n---\n## body\n"; let parsed = parse_markdown_detailed(original).unwrap(); assert!(!parsed.migrations.changed()); - let reconstructed = reconstruct_source(&parsed).unwrap(); + let reconstructed = reconstruct(&parsed); // Find the closing fence in both and compare the suffix. let orig_suffix = &original[original.find("\n---\n").unwrap()..]; let recon_suffix = &reconstructed[reconstructed.find("\n---\n").unwrap()..]; assert_eq!(orig_suffix, recon_suffix, "body region must be byte-identical"); } + #[test] + fn parse_markdown_detailed_preserves_leading_whitespace() { + // Leading blank lines / spaces (e.g. from editor BOM-strippers + // adding a blank line) must round-trip through reconstruction + // so migration rewrites don't silently normalize them away. + let original = "\n \n---\nname: x\ndescription: y\n---\nbody\n"; + let parsed = parse_markdown_detailed(original).unwrap(); + assert_eq!(parsed.leading_whitespace, "\n \n"); + let reconstructed = reconstruct(&parsed); + assert!( + reconstructed.starts_with("\n \n---\n"), + "expected leading whitespace preserved, got: {:?}", + &reconstructed[..20.min(reconstructed.len())] + ); + assert!(reconstructed.ends_with("---\nbody\n")); + } + #[test] fn parse_markdown_detailed_allows_leading_whitespace() { let original = "\n \n---\nname: x\ndescription: y\n---\nbody\n"; diff --git a/src/compile/migration_integration_test.rs b/src/compile/migration_integration_test.rs index 7e086059..e7bb5737 100644 --- a/src/compile/migration_integration_test.rs +++ b/src/compile/migration_integration_test.rs @@ -194,8 +194,16 @@ async fn perform_source_rewrite_lost_update_guard() { std::fs::write(&source_path, "different bytes\n").unwrap(); // Rewrite must refuse. - let result = - perform_source_rewrite_if_needed(&source_path, &original, &parsed).await; + let result = perform_source_rewrite_if_needed( + &source_path, + &original, + &parsed.leading_whitespace, + &parsed.front_matter_mapping, + &parsed.body_raw, + &parsed.source_sha256, + &parsed.migrations, + ) + .await; let err = result.expect_err("expected lost-update guard to fire"); assert!( format!("{:#}", err).contains("changed during compilation"), diff --git a/src/compile/mod.rs b/src/compile/mod.rs index 16dc71c1..43aa654e 100644 --- a/src/compile/mod.rs +++ b/src/compile/mod.rs @@ -128,10 +128,11 @@ async fn compile_pipeline_inner( let parsed = common::parse_markdown_detailed_with_registry(&content, registry)?; let mut front_matter = parsed.front_matter; - let markdown_body = parsed.markdown_body.clone(); - let migrations = parsed.migrations.clone(); - let front_matter_mapping = parsed.front_matter_mapping.clone(); - let body_raw = parsed.body_raw.clone(); + let markdown_body = parsed.markdown_body; + let migrations = parsed.migrations; + let front_matter_mapping = parsed.front_matter_mapping; + let leading_whitespace = parsed.leading_whitespace; + let body_raw = parsed.body_raw; let source_sha256 = parsed.source_sha256; // Sanitize all front matter text fields before any further processing. @@ -211,21 +212,16 @@ async fn compile_pipeline_inner( // we abort before the lock file ever gets touched. let mut migrated = false; if migrations.changed() { - // Rebuild the parsed handle (the typed front_matter has been - // sanitized and consumed; rewrite needs the unmodified mapping). - let parsed_for_rewrite = common::ParsedSource { - front_matter: serde_yaml::from_value(serde_yaml::Value::Mapping( - front_matter_mapping.clone(), - )) - .context("Failed to round-trip migrated front matter for rewrite")?, - markdown_body: markdown_body.clone(), - migrations: migrations.clone(), - front_matter_mapping: front_matter_mapping.clone(), - body_raw: body_raw.clone(), - source_sha256, - }; - migrated = perform_source_rewrite_if_needed(input_path, &content, &parsed_for_rewrite) - .await?; + migrated = perform_source_rewrite_if_needed( + input_path, + &content, + &leading_whitespace, + &front_matter_mapping, + &body_raw, + &source_sha256, + &migrations, + ) + .await?; } // Write output via atomic_write so a crash mid-write cannot leave a @@ -269,9 +265,14 @@ async fn compile_pipeline_inner( async fn perform_source_rewrite_if_needed( input_path: &Path, original_content: &str, - parsed: &common::ParsedSource, + leading_whitespace: &str, + front_matter_mapping: &serde_yaml::Mapping, + body_raw: &str, + source_sha256: &[u8; 32], + migrations: &migrations::MigrationReport, ) -> Result { - let new_content = common::reconstruct_source(parsed)?; + let new_content = + common::reconstruct_source(leading_whitespace, front_matter_mapping, body_raw)?; if new_content == original_content { // Migrations ran but their net effect is a no-op on disk — // skip the rewrite to avoid gratuitous diffs. @@ -291,7 +292,7 @@ async fn perform_source_rewrite_if_needed( let mut hasher = sha2::Sha256::new(); hasher.update(¤t_bytes); let current_hash: [u8; 32] = hasher.finalize().into(); - if current_hash != parsed.source_sha256 { + if ¤t_hash != source_sha256 { anyhow::bail!( "source file {} changed during compilation; refusing to migrate. Re-run `ado-aw compile`.", input_path.display() @@ -310,10 +311,10 @@ async fn perform_source_rewrite_if_needed( eprintln!( "warning: migrated front matter in {}: schema-version {} -> {}", input_path.display(), - parsed.migrations.from_version, - parsed.migrations.to_version + migrations.from_version, + migrations.to_version ); - for applied in &parsed.migrations.applied { + for applied in &migrations.applied { eprintln!(" - {}: {}", applied.id, applied.summary); } eprintln!( @@ -525,7 +526,7 @@ pub async fn check_pipeline(pipeline_path: &str) -> Result<()> { // we want to fail loudly so `ado-aw compile` is run. if parsed.migrations.changed() { anyhow::bail!( - "error: {} is at schema-version {}; this compiler requires schema-version {}.\n\ + "{} is at schema-version {}; this compiler requires schema-version {}.\n \ hint: run `ado-aw compile {}` to migrate the source in place.", source_path.display(), parsed.migrations.from_version, diff --git a/tests/migration_tests.rs b/tests/migration_tests.rs index 75d4f2e9..c7deeb2f 100644 --- a/tests/migration_tests.rs +++ b/tests/migration_tests.rs @@ -27,8 +27,6 @@ fn fresh_temp_dir(label: &str) -> PathBuf { "ado-aw-migration-tests-{}-{}-{}", label, std::process::id(), - // Add a thread-local counter to avoid intra-process collisions - // when multiple tests run in parallel. rand_suffix(), )); fs::create_dir_all(&dir).expect("create temp dir"); @@ -45,14 +43,20 @@ fn fresh_git_temp_dir(label: &str) -> PathBuf { dir } -/// Lightweight randomness for test temp dir uniqueness — no crate dep. +/// Suffix that's unique within the process for the lifetime of a +/// single test binary run. Uses a wall-clock nanosecond timestamp +/// combined with a monotonic atomic counter so two parallel tests +/// scheduled in the same nanosecond still get distinct directories. fn rand_suffix() -> String { + use std::sync::atomic::{AtomicU64, Ordering}; use std::time::{SystemTime, UNIX_EPOCH}; + static SEQ: AtomicU64 = AtomicU64::new(0); let nanos = SystemTime::now() .duration_since(UNIX_EPOCH) .map(|d| d.as_nanos()) .unwrap_or(0); - format!("{:x}", nanos) + let seq = SEQ.fetch_add(1, Ordering::Relaxed); + format!("{:x}-{:x}", nanos, seq) } fn ado_aw_binary() -> PathBuf { From b733af1958b8106228935637f8921ede21c4b3f6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 8 May 2026 21:38:08 +0000 Subject: [PATCH 06/12] feat(compile): migrate `repositories:`+`checkout:` -> `repos:` via migration framework Rebased on feat/frontmatter-migrations and added 0001_repos_unified migration so existing sources are auto-rewritten to the new compact `repos:` shape on compile. With the migration in place, the legacy fields are removed from FrontMatter; the typed deserializer now only accepts `repos:`. The migration runner ensures any source that still declares `repositories:`/`checkout:` is converted (and stamped to schema-version 2) before typed deserialization runs. Agent-Logs-Url: https://github.com/githubnext/ado-aw/sessions/c4473df6-4e6d-4a84-9fa7-74f55749dc78 Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com> --- docs/front-matter.md | 25 +- src/compile/common.rs | 108 ++--- src/compile/migration_integration_test.rs | 8 +- src/compile/migrations/0001_repos_unified.rs | 398 +++++++++++++++++++ src/compile/migrations/mod.rs | 7 +- src/compile/mod.rs | 12 +- src/compile/types.rs | 15 +- tests/compiler_tests.rs | 4 +- tests/fixtures/1es-test-agent.md | 7 +- tests/fixtures/azure-devops-mcp-agent.md | 17 +- tests/fixtures/comment-on-work-item-agent.md | 7 +- tests/fixtures/complete-agent.md | 65 +-- tests/fixtures/minimal-agent.md | 5 +- tests/fixtures/pipeline-filter-agent.md | 20 +- tests/fixtures/pipeline-trigger-agent.md | 11 +- tests/fixtures/pr-filter-tier1-agent.md | 26 +- tests/fixtures/pr-filter-tier2-agent.md | 21 +- tests/migration_tests.rs | 32 +- 18 files changed, 594 insertions(+), 194 deletions(-) create mode 100644 src/compile/migrations/0001_repos_unified.rs diff --git a/docs/front-matter.md b/docs/front-matter.md index 21caf6ef..d6300f8e 100644 --- a/docs/front-matter.md +++ b/docs/front-matter.md @@ -217,13 +217,24 @@ repos: ref: refs/heads/release/2.x ``` -### Legacy syntax (deprecated) - -The legacy `repositories:` + `checkout:` fields still work but emit a -deprecation warning. They cannot be mixed with `repos:`. Migrate by -converting each `repositories:` entry into a `repos:` entry — any entry -that appeared in `checkout:` keeps the default `checkout: true`; any that -did not should set `checkout: false`. +### Migration from legacy syntax + +The previous front-matter grammar used two separate fields, +`repositories:` (the resource list) and `checkout:` (the alias list of +which to clone). On compile, the [front-matter migration +framework](migrations.md) automatically rewrites old sources to the +new `repos:` shape — you don't have to do it by hand. The first time +you `ado-aw compile` an older file, you'll see a warning and the file +will be updated in place. + +The mapping is: +- Each `repositories:` entry becomes a `repos:` entry + (`repository:` → `alias:`, the rest is preserved). +- Any entry whose alias appeared in `checkout:` keeps the default + `checkout: true` (and the field is omitted from the rewritten YAML + since it's the default). +- Any entry whose alias did NOT appear in `checkout:` gets an + explicit `checkout: false` (resource-only). ## Filter Validation diff --git a/src/compile/common.rs b/src/compile/common.rs index 4807fe54..9dcaa69f 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -648,38 +648,6 @@ fn derive_alias(name: &str) -> Result { Ok(alias) } -/// Resolve the `repos:` / legacy `repositories:` + `checkout:` fields in a -/// `FrontMatter` into the canonical `(Vec, Vec)` pair. -/// -/// - If `repos:` is non-empty, the legacy fields must be empty (mixing is rejected). -/// - If `repos:` is empty, legacy fields are used as-is (with a deprecation warning -/// when they are non-empty). -/// - If both are empty, returns `(vec![], vec![])`. -pub fn resolve_repos(front_matter: &FrontMatter) -> Result<(Vec, Vec)> { - let has_new = !front_matter.repos.is_empty(); - let has_legacy = !front_matter.repositories.is_empty() || !front_matter.checkout.is_empty(); - - if has_new && has_legacy { - anyhow::bail!( - "Cannot mix `repos:` with legacy `repositories:` / `checkout:`. \ - Migrate to `repos:` or remove it to keep using the legacy fields." - ); - } - - if has_new { - lower_repos(&front_matter.repos) - } else { - if has_legacy { - eprintln!( - "Warning: `repositories:` and `checkout:` are deprecated. \ - Use the compact `repos:` syntax instead. \ - See docs/front-matter.md for migration guidance." - ); - } - Ok((front_matter.repositories.clone(), front_matter.checkout.clone())) - } -} - /// Names that are reserved by the `workspace:` resolver and therefore cannot /// be used as repository aliases / `checkout:` entries. If a user defines a /// repository named `repo` and writes `workspace: repo`, the special-cased @@ -2810,11 +2778,9 @@ mod tests { #[test] fn parse_markdown_detailed_byte_faithful_when_no_migration_runs() { - // With the registry empty, parsing a v1 source and reconstructing - // it should produce a byte-identical document apart from - // serde_yaml's canonical formatting of the YAML mapping. We - // assert the body region matches exactly. - let original = "---\nname: x\ndescription: y\n---\n## body\n"; + // A source already at the current schema version goes through + // unchanged (no migration applied → byte-identical body). + let original = "---\nschema-version: 2\nname: x\ndescription: y\n---\n## body\n"; let parsed = parse_markdown_detailed(original).unwrap(); assert!(!parsed.migrations.changed()); let reconstructed = reconstruct(&parsed); @@ -6028,7 +5994,7 @@ mod tests { // Tests for compact `repos:` lowering // ────────────────────────────────────────────────────────────────────── - use super::{lower_repos, resolve_repos, parse_shorthand, derive_alias}; + use super::{lower_repos, parse_shorthand, derive_alias}; use crate::compile::types::{ReposItem, RepoEntry}; #[test] @@ -6194,38 +6160,7 @@ mod tests { } #[test] - fn test_resolve_repos_rejects_mixing() { - use crate::compile::types::FrontMatter; - let yaml = r#" -name: "test" -description: "test" -repos: - - my-org/tools -repositories: - - repository: foo - type: git - name: org/foo -"#; - let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); - let err = resolve_repos(&fm).unwrap_err(); - assert!(err.to_string().contains("Cannot mix"), "{err}"); - } - - #[test] - fn test_resolve_repos_empty() { - use crate::compile::types::FrontMatter; - let yaml = r#" -name: "test" -description: "test" -"#; - let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); - let (repos, checkout) = resolve_repos(&fm).unwrap(); - assert!(repos.is_empty()); - assert!(checkout.is_empty()); - } - - #[test] - fn test_resolve_repos_compact_syntax() { + fn test_repos_via_front_matter_compact_syntax() { use crate::compile::types::FrontMatter; let yaml = r#" name: "test" @@ -6238,7 +6173,7 @@ repos: checkout: false "#; let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); - let (repos, checkout) = resolve_repos(&fm).unwrap(); + let (repos, checkout) = lower_repos(&fm.repos).unwrap(); assert_eq!(repos.len(), 3); assert_eq!(repos[0].repository, "tools"); assert_eq!(repos[0].name, "my-org/tools"); @@ -6250,8 +6185,26 @@ repos: } #[test] - fn test_resolve_repos_legacy_compat() { + fn test_repos_via_front_matter_empty() { + use crate::compile::types::FrontMatter; + let yaml = r#" +name: "test" +description: "test" +"#; + let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); + let (repos, checkout) = lower_repos(&fm.repos).unwrap(); + assert!(repos.is_empty()); + assert!(checkout.is_empty()); + } + + #[test] + fn test_repos_legacy_fields_rejected_by_deny_unknown() { use crate::compile::types::FrontMatter; + // After migration framework rewrites old sources, the typed + // FrontMatter must not accept the legacy keys directly. The + // `deny_unknown_fields` derive ensures unmigrated sources fail + // typed deserialization (the migration runs first and converts + // them). let yaml = r#" name: "test" description: "test" @@ -6262,10 +6215,11 @@ repositories: checkout: - tools "#; - let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); - let (repos, checkout) = resolve_repos(&fm).unwrap(); - assert_eq!(repos.len(), 1); - assert_eq!(repos[0].repository, "tools"); - assert_eq!(checkout, vec!["tools"]); + let err = serde_yaml::from_str::(yaml).unwrap_err(); + let msg = err.to_string(); + assert!( + msg.contains("repositories") || msg.contains("checkout") || msg.contains("unknown field"), + "expected unknown-field error, got: {msg}" + ); } } diff --git a/src/compile/migration_integration_test.rs b/src/compile/migration_integration_test.rs index e7bb5737..6c297f56 100644 --- a/src/compile/migration_integration_test.rs +++ b/src/compile/migration_integration_test.rs @@ -223,10 +223,10 @@ async fn perform_source_rewrite_lost_update_guard() { #[tokio::test] async fn check_pipeline_fails_on_future_schema_version() { - // The real registry is empty (CURRENT=1), so a source claiming - // schema-version: 2 is a future version. compile should reject it - // loudly through the public entry point. - let future = "---\nschema-version: 2\nname: x\ndescription: y\n---\n"; + // CURRENT_SCHEMA_VERSION is 2 (one registered migration), so a + // source claiming schema-version: 3 is a future version. compile + // should reject it loudly through the public entry point. + let future = "---\nschema-version: 3\nname: x\ndescription: y\n---\n"; let (dir, source_path) = write_temp_md("agent.md", future); let result = compile_pipeline(&source_path.to_string_lossy(), None, true, false).await; let err = result.expect_err("future-version source should fail compile"); diff --git a/src/compile/migrations/0001_repos_unified.rs b/src/compile/migrations/0001_repos_unified.rs new file mode 100644 index 00000000..b75efc2c --- /dev/null +++ b/src/compile/migrations/0001_repos_unified.rs @@ -0,0 +1,398 @@ +//! `repositories:` + `checkout:` → unified `repos:` +//! +//! Before this migration, additional repository resources had to be +//! declared twice: once under `repositories:` (mirroring ADO's +//! `resources.repositories` schema) and again under `checkout:` (the +//! alias list deciding which to actually clone). This migration folds +//! both into a single `repos:` block where each entry carries its own +//! `checkout: true|false` flag (defaulting to `true`). +//! +//! Conversion rules: +//! - Each `repositories:` entry becomes a `repos:` entry preserving +//! `repository` (→ `alias`), `name`, `type`, and `ref`. +//! - Entries listed in `checkout:` keep `checkout: true` (the default, +//! so the field is omitted on output). +//! - Entries NOT listed in `checkout:` get an explicit `checkout: false`. +//! - `checkout:` aliases that don't match any `repositories:` entry are +//! rejected (the legacy compiler also rejected these via +//! `validate_checkout_list`). +//! - Mixing the legacy fields with an already-present `repos:` is +//! rejected — the user must pick one shape. +//! - Sources with neither legacy field are no-ops. + +use anyhow::{bail, Result}; +use serde_yaml::{Mapping, Value}; + +use super::{take_key, Migration, MigrationContext}; + +pub static MIGRATION: Migration = Migration { + from_version: 1, + to_version: 2, + id: "repos_unified", + summary: "repositories: + checkout: -> unified repos:", + introduced_in: "0.28.0", + apply: apply_migration, +}; + +fn apply_migration(fm: &mut Mapping, _ctx: &MigrationContext) -> Result<()> { + let has_repos = fm.contains_key(Value::String("repos".to_string())); + let has_repositories = fm.contains_key(Value::String("repositories".to_string())); + let has_checkout = fm.contains_key(Value::String("checkout".to_string())); + + // No legacy fields → already in the new shape (or doesn't use any + // additional repos at all). No-op. + if !has_repositories && !has_checkout { + return Ok(()); + } + + // Mixing the legacy fields with the new `repos:` is ambiguous — + // refuse rather than guess which one wins. + if has_repos { + bail!( + "front matter has both the new `repos:` field and the legacy \ + `repositories:`/`checkout:` fields. Pick one: remove the \ + legacy fields to use `repos:`, or remove `repos:` to let \ + this migration convert the legacy fields." + ); + } + + // `checkout:` without any `repositories:` is incoherent — the + // aliases would have nothing to refer to. The legacy compiler + // would have failed `validate_checkout_list` for the same reason. + if has_checkout && !has_repositories { + bail!( + "front matter has `checkout:` but no `repositories:`. \ + Either remove `checkout:` or add the corresponding \ + `repositories:` entries (then re-run compile to migrate \ + to `repos:`)." + ); + } + + let repositories = take_key(fm, "repositories").expect("checked above"); + let checkout = take_key(fm, "checkout"); + + let repositories_seq = match repositories { + Value::Sequence(s) => s, + Value::Null => Vec::new(), + other => bail!( + "front matter `repositories:` must be a sequence, got {}", + describe(&other) + ), + }; + + // Collect the checkout alias allow-list. Order doesn't matter for + // membership; we preserve `repositories:` order in the output. + let checkout_aliases: Vec = match checkout { + None | Some(Value::Null) => Vec::new(), + Some(Value::Sequence(s)) => { + let mut out = Vec::with_capacity(s.len()); + for v in s { + match v { + Value::String(name) => out.push(name), + other => bail!( + "front matter `checkout:` entries must be strings, got {}", + describe(&other) + ), + } + } + out + } + Some(other) => bail!( + "front matter `checkout:` must be a sequence of strings, got {}", + describe(&other) + ), + }; + + // Track which checkout aliases we've matched so we can flag + // dangling references (alias listed in checkout but absent from + // repositories). + let mut matched: std::collections::BTreeSet = + std::collections::BTreeSet::new(); + + let mut repos_seq: Vec = Vec::with_capacity(repositories_seq.len()); + for repo in repositories_seq { + let mut repo_map = match repo { + Value::Mapping(m) => m, + other => bail!( + "front matter `repositories:` entries must be mappings, got {}", + describe(&other) + ), + }; + + // The legacy `repository:` key becomes the new `alias:` key. + let alias_value = repo_map.remove(Value::String("repository".to_string())); + let alias_str = match alias_value.as_ref() { + Some(Value::String(s)) => Some(s.clone()), + Some(other) => bail!( + "front matter `repositories:` entry has non-string `repository:` field ({}); \ + manual migration required", + describe(other) + ), + None => None, + }; + + // Build the new entry. Preserve insertion order: + // alias, type, name, ref, checkout. + let mut new_entry = Mapping::new(); + if let Some(alias) = alias_str.as_deref() { + new_entry.insert( + Value::String("alias".to_string()), + Value::String(alias.to_string()), + ); + } + if let Some(v) = repo_map.remove(Value::String("type".to_string())) { + new_entry.insert(Value::String("type".to_string()), v); + } + if let Some(v) = repo_map.remove(Value::String("name".to_string())) { + new_entry.insert(Value::String("name".to_string()), v); + } else { + bail!( + "front matter `repositories:` entry is missing the required `name:` field; \ + manual migration required" + ); + } + if let Some(v) = repo_map.remove(Value::String("ref".to_string())) { + new_entry.insert(Value::String("ref".to_string()), v); + } + // Carry over any unknown keys verbatim so the migration is + // forward-compatible with future ADO `resources.repositories` + // fields we don't yet model. + for (k, v) in repo_map { + new_entry.insert(k, v); + } + + // Determine checkout flag. If `checkout:` was absent entirely, + // legacy semantics treat all repositories as resource-only + // (the agent job didn't clone any). If `checkout:` was + // present, only listed aliases get cloned. + let do_checkout = if !has_checkout { + // Legacy: no `checkout:` block at all means nothing was + // cloned by the agent. + false + } else if let Some(alias) = alias_str.as_deref() { + let listed = checkout_aliases.iter().any(|a| a == alias); + if listed { + matched.insert(alias.to_string()); + } + listed + } else { + // Anonymous entry (no `repository:` alias) cannot be + // referenced from `checkout:` — treat as resource-only. + false + }; + + // Only emit the `checkout` field when it deviates from the + // default of `true`. Keeps migrated output compact. + if !do_checkout { + new_entry.insert( + Value::String("checkout".to_string()), + Value::Bool(false), + ); + } + + repos_seq.push(Value::Mapping(new_entry)); + } + + // Surface dangling checkout aliases (listed but no matching repo). + for alias in &checkout_aliases { + if !matched.contains(alias) { + bail!( + "front matter `checkout:` references alias `{}` that does not appear \ + in `repositories:`; manual migration required", + alias + ); + } + } + + // Insert `repos:` only when we actually have entries; an empty + // `repositories:` should not produce an empty `repos:` block. + if !repos_seq.is_empty() { + fm.insert( + Value::String("repos".to_string()), + Value::Sequence(repos_seq), + ); + } + + Ok(()) +} + +fn describe(v: &Value) -> &'static str { + match v { + Value::Null => "null", + Value::Bool(_) => "bool", + Value::Number(_) => "number", + Value::String(_) => "string", + Value::Sequence(_) => "sequence", + Value::Mapping(_) => "mapping", + Value::Tagged(_) => "tagged", + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn run(input: &str) -> Mapping { + let mut m: Mapping = serde_yaml::from_str(input).unwrap(); + apply_migration(&mut m, &MigrationContext {}).expect("apply"); + m + } + + fn run_err(input: &str) -> String { + let mut m: Mapping = serde_yaml::from_str(input).unwrap(); + format!( + "{}", + apply_migration(&mut m, &MigrationContext {}).unwrap_err() + ) + } + + fn repos(m: &Mapping) -> &Vec { + m.get(Value::String("repos".into())) + .expect("repos key") + .as_sequence() + .expect("repos sequence") + } + + #[test] + fn converts_full_legacy_block_with_checkout_subset() { + let after = run( + "name: x\n\ + repositories:\n\ + - repository: tools\n type: git\n name: my-org/tools\n\ + - repository: schemas\n type: git\n name: my-org/schemas\n\ + - repository: docs\n type: git\n name: my-org/docs\n\ + checkout:\n- tools\n- schemas\n", + ); + // Legacy keys removed + assert!(!after.contains_key(Value::String("repositories".into()))); + assert!(!after.contains_key(Value::String("checkout".into()))); + + let r = repos(&after); + assert_eq!(r.len(), 3); + + let r0 = r[0].as_mapping().unwrap(); + assert_eq!(r0.get(Value::String("alias".into())).unwrap().as_str(), Some("tools")); + assert_eq!(r0.get(Value::String("name".into())).unwrap().as_str(), Some("my-org/tools")); + assert_eq!(r0.get(Value::String("type".into())).unwrap().as_str(), Some("git")); + // checked out -> default, no explicit `checkout:` key. + assert!(r0.get(Value::String("checkout".into())).is_none()); + + let r2 = r[2].as_mapping().unwrap(); + // `docs` is NOT in checkout list -> explicit `checkout: false`. + assert_eq!( + r2.get(Value::String("checkout".into())), + Some(&Value::Bool(false)) + ); + } + + #[test] + fn converts_repositories_only_to_resource_only_entries() { + // `repositories:` without `checkout:` means no entry was + // cloned by the agent in the legacy semantics. + let after = run( + "name: x\n\ + repositories:\n\ + - repository: tpl\n type: git\n name: org/tpl\n", + ); + let r = repos(&after); + assert_eq!(r.len(), 1); + assert_eq!( + r[0].as_mapping().unwrap().get(Value::String("checkout".into())), + Some(&Value::Bool(false)), + "without an explicit checkout list, repos default to resource-only" + ); + } + + #[test] + fn preserves_ref_field() { + let after = run( + "name: x\n\ + repositories:\n\ + - repository: docs\n type: git\n name: org/docs\n ref: refs/heads/release/2.x\n\ + checkout: [docs]\n", + ); + let r = repos(&after); + let entry = r[0].as_mapping().unwrap(); + assert_eq!( + entry.get(Value::String("ref".into())).unwrap().as_str(), + Some("refs/heads/release/2.x") + ); + } + + #[test] + fn rejects_mixing_repos_and_legacy_fields() { + let err = run_err( + "name: x\n\ + repos:\n- org/foo\n\ + repositories:\n- repository: bar\n type: git\n name: org/bar\n", + ); + assert!(err.contains("Pick one"), "got: {}", err); + } + + #[test] + fn rejects_checkout_without_repositories() { + let err = run_err("name: x\ncheckout: [foo]\n"); + assert!(err.contains("`checkout:` but no `repositories:`"), "got: {}", err); + } + + #[test] + fn rejects_dangling_checkout_alias() { + let err = run_err( + "name: x\n\ + repositories:\n- repository: a\n type: git\n name: org/a\n\ + checkout: [b]\n", + ); + assert!(err.contains("does not appear in `repositories:`"), "got: {}", err); + } + + #[test] + fn no_legacy_fields_is_noop() { + let after = run("name: x\ndescription: y\n"); + assert!(!after.contains_key(Value::String("repos".into()))); + assert!(!after.contains_key(Value::String("repositories".into()))); + assert!(!after.contains_key(Value::String("checkout".into()))); + } + + #[test] + fn already_using_repos_alone_is_noop() { + // Defensive: every from_version=1 migration must be safe on + // already-current sources. A file with only `repos:` (no legacy + // fields) goes through unchanged. + let after = run( + "name: x\nrepos:\n- my-org/tools\n", + ); + let r = repos(&after); + assert_eq!(r.len(), 1); + assert_eq!(r[0].as_str(), Some("my-org/tools")); + } + + #[test] + fn empty_repositories_sequence_does_not_emit_repos_key() { + let after = run("name: x\nrepositories: []\n"); + assert!(!after.contains_key(Value::String("repos".into()))); + } + + #[test] + fn rejects_non_mapping_repository_entry() { + let err = run_err( + "name: x\nrepositories:\n- a-string-not-a-mapping\n", + ); + assert!(err.contains("must be mappings"), "got: {}", err); + } + + #[test] + fn carries_over_unknown_repository_keys() { + // Forward-compat: don't drop fields we don't yet model. + let after = run( + "name: x\n\ + repositories:\n- repository: a\n type: git\n name: org/a\n trigger: none\n\ + checkout: [a]\n", + ); + let r = repos(&after); + let entry = r[0].as_mapping().unwrap(); + assert_eq!( + entry.get(Value::String("trigger".into())).unwrap().as_str(), + Some("none") + ); + } +} diff --git a/src/compile/migrations/mod.rs b/src/compile/migrations/mod.rs index 0f8d3e5b..2cd79f81 100644 --- a/src/compile/migrations/mod.rs +++ b/src/compile/migrations/mod.rs @@ -28,6 +28,9 @@ mod helpers; #[allow(unused_imports)] pub use helpers::{insert_no_overwrite, rename_key, take_key, ConflictPolicy}; +#[path = "0001_repos_unified.rs"] +mod m0001_repos_unified; + /// Front-matter key that holds the schema version. Kebab-case to match /// the rest of the front-matter grammar. #[allow(dead_code)] @@ -72,7 +75,9 @@ pub struct Migration { /// 1. Create `src/compile/migrations/_.rs` with a /// `pub static MIGRATION: Migration` and register the module here. /// 2. Append `&::MIGRATION` to this slice. -pub static MIGRATIONS: &[&'static Migration] = &[]; +pub static MIGRATIONS: &[&'static Migration] = &[ + &m0001_repos_unified::MIGRATION, +]; /// Total number of schema versions known to this compiler. /// diff --git a/src/compile/mod.rs b/src/compile/mod.rs index 43aa654e..dbf0f332 100644 --- a/src/compile/mod.rs +++ b/src/compile/mod.rs @@ -7,7 +7,7 @@ //! - **1ES**: Integration with 1ES Pipeline Templates for SDL compliance mod common; -pub(crate) use common::resolve_repos; +pub(crate) use common::lower_repos; pub mod extensions; pub(crate) mod filter_ir; mod gitattributes; @@ -153,8 +153,10 @@ async fn compile_pipeline_inner( debug!("Schedule: {:?}", front_matter.schedule()); debug!("MCP servers configured: {}", front_matter.mcp_servers.len()); - // Resolve repos: new compact syntax or legacy repositories: + checkout: - let (resolved_repos, resolved_checkout) = common::resolve_repos(&front_matter)?; + // Lower compact `repos:` into the internal Repository/checkout pair. + // The migration framework has already converted any legacy + // `repositories:` + `checkout:` shapes into `repos:`. + let (resolved_repos, resolved_checkout) = common::lower_repos(&front_matter.repos)?; front_matter.repositories = resolved_repos; front_matter.checkout = resolved_checkout; debug!("Repositories: {}", front_matter.repositories.len()); @@ -541,8 +543,8 @@ pub async fn check_pipeline(pipeline_path: &str) -> Result<()> { use crate::sanitize::SanitizeConfig; front_matter.sanitize_config_fields(); - // Resolve repos (compact or legacy) - let (resolved_repos, resolved_checkout) = common::resolve_repos(&front_matter)?; + // Lower compact `repos:` into the internal Repository/checkout pair. + let (resolved_repos, resolved_checkout) = common::lower_repos(&front_matter.repos)?; front_matter.repositories = resolved_repos; front_matter.checkout = resolved_checkout; diff --git a/src/compile/types.rs b/src/compile/types.rs index f68bbc9d..bba0acb0 100644 --- a/src/compile/types.rs +++ b/src/compile/types.rs @@ -618,15 +618,20 @@ pub struct FrontMatter { /// Runtime configuration for language environments (e.g., Lean 4) #[serde(default)] pub runtimes: Option, - /// Compact repository declarations (new unified syntax). + /// Compact repository declarations. /// Each entry declares a repository resource and optionally whether to check it out. #[serde(default)] pub repos: Vec, - /// Additional repository resources (legacy — use `repos:` instead) - #[serde(default)] + /// Lowered `Vec` form, populated by `lower_repos()` in + /// `compile/common.rs` after the migration framework has converted + /// the source to the unified `repos:` shape. Not deserialized from + /// YAML directly — sources use `repos:`. + #[serde(skip)] pub repositories: Vec, - /// Repositories to checkout (legacy — use `repos:` instead; subset of repositories) - #[serde(default)] + /// Lowered checkout-alias list, populated by `lower_repos()` from + /// `repos:` entries with `checkout: true`. Not deserialized from + /// YAML directly. + #[serde(skip)] pub checkout: Vec, /// MCP server configurations #[serde(default, rename = "mcp-servers")] diff --git a/tests/compiler_tests.rs b/tests/compiler_tests.rs index 5f62a873..13cb3cbc 100644 --- a/tests/compiler_tests.rs +++ b/tests/compiler_tests.rs @@ -324,8 +324,8 @@ fn test_fixture_complete_agent() { assert!(content.contains("description:"), "Should have description"); assert!(content.contains("schedule:"), "Should have schedule"); assert!( - content.contains("repositories:"), - "Should have repositories" + content.contains("repos:"), + "Should have repos block" ); assert!( content.contains("mcp-servers:"), diff --git a/tests/fixtures/1es-test-agent.md b/tests/fixtures/1es-test-agent.md index a4a83c34..837ce3df 100644 --- a/tests/fixtures/1es-test-agent.md +++ b/tests/fixtures/1es-test-agent.md @@ -1,12 +1,13 @@ --- -name: "1ES Test Agent" -description: "Test agent for 1ES target compilation" +name: 1ES Test Agent +description: Test agent for 1ES target compilation target: 1es mcp-servers: ado: true icm: allowed: - - create_incident + - create_incident +schema-version: 2 --- ## Instructions diff --git a/tests/fixtures/azure-devops-mcp-agent.md b/tests/fixtures/azure-devops-mcp-agent.md index 7c2b2f24..dc435197 100644 --- a/tests/fixtures/azure-devops-mcp-agent.md +++ b/tests/fixtures/azure-devops-mcp-agent.md @@ -1,21 +1,24 @@ --- -name: "Azure DevOps MCP Agent" -description: "Agent with Azure DevOps MCP via first-class tool integration" +name: Azure DevOps MCP Agent +description: Agent with Azure DevOps MCP via first-class tool integration tools: azure-devops: org: myorg - toolsets: [core, work-items] + toolsets: + - core + - work-items allowed: - - core_list_projects - - wit_get_work_item - - wit_create_work_item - - wit_my_work_items + - core_list_projects + - wit_get_work_item + - wit_create_work_item + - wit_my_work_items permissions: read: my-read-arm-connection write: my-write-arm-connection safe-outputs: create-work-item: work-item-type: Task +schema-version: 2 --- ## Azure DevOps MCP Integration Test diff --git a/tests/fixtures/comment-on-work-item-agent.md b/tests/fixtures/comment-on-work-item-agent.md index 811945e1..6b547f59 100644 --- a/tests/fixtures/comment-on-work-item-agent.md +++ b/tests/fixtures/comment-on-work-item-agent.md @@ -1,12 +1,13 @@ --- -name: "Comment on Work Item Agent" -description: "Agent that comments on work items with area path scoping" +name: Comment on Work Item Agent +description: Agent that comments on work items with area path scoping permissions: write: my-write-sc safe-outputs: comment-on-work-item: - target: "MyProject\\Backend" + target: MyProject\Backend max: 3 +schema-version: 2 --- ## Comment on Work Item Agent diff --git a/tests/fixtures/complete-agent.md b/tests/fixtures/complete-agent.md index 44b497f9..04186c18 100644 --- a/tests/fixtures/complete-agent.md +++ b/tests/fixtures/complete-agent.md @@ -1,39 +1,35 @@ --- -name: "Complete Test Agent" -description: "A complete test agent with all features enabled" +name: Complete Test Agent +description: A complete test agent with all features enabled on: schedule: daily around 14:00 -repositories: - - repository: test-repo-1 - type: git - name: test-org/test-repo-1 - ref: refs/heads/main - - repository: test-repo-2 - type: git - name: test-org/test-repo-2 - ref: refs/heads/develop -checkout: - - test-repo-1 +teardown: +- bash: echo "Teardown step" + displayName: Teardown +setup: +- bash: echo "Setup step" + displayName: Setup mcp-servers: ado: true es-chat: true bluebird: true icm: allowed: - - create_incident - - get_incident + - create_incident + - get_incident kusto: allowed: - - query + - query custom-tool: - container: "node:20-slim" - entrypoint: "node" - entrypoint-args: ["server.js"] + container: node:20-slim + entrypoint: node + entrypoint-args: + - server.js allowed: - - custom_function_1 - - custom_function_2 + - custom_function_1 + - custom_function_2 env: - NODE_ENV: "test" + NODE_ENV: test permissions: read: my-read-arm-connection write: my-write-arm-connection @@ -43,17 +39,22 @@ safe-outputs: create-work-item: work-item-type: Task steps: - - bash: echo "Preparing context" - displayName: "Prepare context" +- bash: echo "Preparing context" + displayName: Prepare context post-steps: - - bash: echo "Finalizing" - displayName: "Finalize" -setup: - - bash: echo "Setup step" - displayName: "Setup" -teardown: - - bash: echo "Teardown step" - displayName: "Teardown" +- bash: echo "Finalizing" + displayName: Finalize +repos: +- alias: test-repo-1 + type: git + name: test-org/test-repo-1 + ref: refs/heads/main +- alias: test-repo-2 + type: git + name: test-org/test-repo-2 + ref: refs/heads/develop + checkout: false +schema-version: 2 --- ## Complete Test Agent diff --git a/tests/fixtures/minimal-agent.md b/tests/fixtures/minimal-agent.md index 9fda8eb5..f9115bb4 100644 --- a/tests/fixtures/minimal-agent.md +++ b/tests/fixtures/minimal-agent.md @@ -1,6 +1,7 @@ --- -name: "Minimal Test Agent" -description: "A minimal agent for testing basic functionality" +name: Minimal Test Agent +description: A minimal agent for testing basic functionality +schema-version: 2 --- ## Minimal Agent diff --git a/tests/fixtures/pipeline-filter-agent.md b/tests/fixtures/pipeline-filter-agent.md index 6c002b0b..66c4d10f 100644 --- a/tests/fixtures/pipeline-filter-agent.md +++ b/tests/fixtures/pipeline-filter-agent.md @@ -1,19 +1,21 @@ --- -name: "Pipeline Filter Agent" -description: "Agent triggered by upstream pipeline with filters" +name: Pipeline Filter Agent +description: Agent triggered by upstream pipeline with filters on: pipeline: - name: "Build Pipeline" - project: "OtherProject" + name: Build Pipeline + project: OtherProject branches: - - main + - main filters: - source-pipeline: "Build*" + source-pipeline: Build* time-window: - start: "08:00" - end: "20:00" + start: 08:00 + end: 20:00 build-reason: - include: [ResourceTrigger] + include: + - ResourceTrigger +schema-version: 2 --- ## Pipeline Filter Agent diff --git a/tests/fixtures/pipeline-trigger-agent.md b/tests/fixtures/pipeline-trigger-agent.md index 70b02490..dc0cd128 100644 --- a/tests/fixtures/pipeline-trigger-agent.md +++ b/tests/fixtures/pipeline-trigger-agent.md @@ -1,12 +1,13 @@ --- -name: "Pipeline Trigger Agent" -description: "Agent triggered by an upstream pipeline" +name: Pipeline Trigger Agent +description: Agent triggered by an upstream pipeline on: pipeline: - name: "Build Pipeline" - project: "OtherProject" + name: Build Pipeline + project: OtherProject branches: - - main + - main +schema-version: 2 --- ## Task diff --git a/tests/fixtures/pr-filter-tier1-agent.md b/tests/fixtures/pr-filter-tier1-agent.md index 2fb42338..c8cbc7ef 100644 --- a/tests/fixtures/pr-filter-tier1-agent.md +++ b/tests/fixtures/pr-filter-tier1-agent.md @@ -1,20 +1,26 @@ --- -name: "PR Filter Tier 1 Agent" -description: "Agent with Tier 1 PR filters (pipeline variables only, no evaluator needed)" +name: PR Filter Tier 1 Agent +description: Agent with Tier 1 PR filters (pipeline variables only, no evaluator needed) on: pr: branches: - include: [main] + include: + - main filters: - title: "*[agent]*" + title: '*[agent]*' author: - include: ["dev@corp.com"] - exclude: ["bot@noreply.com"] - source-branch: "feature/*" - target-branch: "main" - commit-message: "*[skip-agent]*" + include: + - dev@corp.com + exclude: + - bot@noreply.com + source-branch: feature/* + target-branch: main + commit-message: '*[skip-agent]*' build-reason: - include: [PullRequest, Manual] + include: + - PullRequest + - Manual +schema-version: 2 --- ## Tier 1 Filter Agent diff --git a/tests/fixtures/pr-filter-tier2-agent.md b/tests/fixtures/pr-filter-tier2-agent.md index c07972d5..415311db 100644 --- a/tests/fixtures/pr-filter-tier2-agent.md +++ b/tests/fixtures/pr-filter-tier2-agent.md @@ -1,21 +1,26 @@ --- -name: "PR Filter Tier 2 Agent" -description: "Agent with Tier 2/3 PR filters (requires evaluator extension)" +name: PR Filter Tier 2 Agent +description: Agent with Tier 2/3 PR filters (requires evaluator extension) on: pr: branches: - include: [main] + include: + - main filters: - title: "*[review]*" + title: '*[review]*' labels: - any-of: ["run-agent", "needs-review"] - none-of: ["do-not-run"] + any-of: + - run-agent + - needs-review + none-of: + - do-not-run draft: false time-window: - start: "09:00" - end: "17:00" + start: 09:00 + end: 17:00 min-changes: 1 max-changes: 500 +schema-version: 2 --- ## Tier 2 Filter Agent diff --git a/tests/migration_tests.rs b/tests/migration_tests.rs index c7deeb2f..e20ae670 100644 --- a/tests/migration_tests.rs +++ b/tests/migration_tests.rs @@ -243,20 +243,18 @@ fn compile_succeeds_on_unstamped_v1_source() { lock.display() ); - // With CURRENT_SCHEMA_VERSION == 1 and no migrations registered, - // the source must NOT be rewritten — verify byte-identity. + // With at least one registered migration (the repos_unified + // migration), every unstamped v1 source is rewritten to add a + // `schema-version` stamp at the latest known version. The body + // must still be preserved. let after = fs::read_to_string(&source).expect("re-read source"); - assert_eq!( - after, original, - "source must be byte-identical after compile when no migrations apply" + assert!( + after.contains("schema-version:"), + "rewritten source should carry a `schema-version` stamp, got: {after}" ); - - // Stderr should NOT contain a migration warning. - let stderr = String::from_utf8_lossy(&output.stderr); assert!( - !stderr.contains("warning: migrated front matter"), - "no migration warning expected, got stderr: {}", - stderr + after.contains("## Body"), + "body must be preserved across migration, got: {after}" ); let _ = fs::remove_dir_all(&dir); @@ -276,10 +274,16 @@ fn compile_succeeds_on_explicitly_stamped_v1_source() { String::from_utf8_lossy(&output.stderr) ); + // Source is rewritten by the migration framework (version stamp + // bumps to the current version), but the body must be preserved. let after = fs::read_to_string(&source).expect("re-read source"); - assert_eq!( - after, original, - "explicitly stamped v1 source must be byte-identical" + assert!( + !after.contains("schema-version: 1\n"), + "schema-version should have been bumped past 1, got: {after}" + ); + assert!( + after.contains("## Body"), + "body must be preserved across migration, got: {after}" ); let _ = fs::remove_dir_all(&dir); From 9c0c7bae56c880e433d2b36c8607b50e77f061be Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 8 May 2026 21:39:36 +0000 Subject: [PATCH 07/12] fix(main): use parse_markdown_detailed + lower_repos in execute path Agent-Logs-Url: https://github.com/githubnext/ado-aw/sessions/c4473df6-4e6d-4a84-9fa7-74f55749dc78 Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com> --- src/main.rs | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/main.rs b/src/main.rs index 23618619..99e42b68 100644 --- a/src/main.rs +++ b/src/main.rs @@ -233,11 +233,25 @@ async fn run_execute( .await .with_context(|| format!("Failed to read source file: {}", source.display()))?; - let (mut front_matter, _) = compile::parse_markdown(&content) + let parsed = compile::parse_markdown_detailed(&content) .with_context(|| format!("Failed to parse source file: {}", source.display()))?; - // Resolve compact repos: syntax into the legacy fields for execution - let (resolved_repos, resolved_checkout) = compile::resolve_repos(&front_matter) + if parsed.migrations.changed() { + log::warn!( + "front matter at {} is at schema-version {}; running with in-memory migration to {}. Run `ado-aw compile {}` to update the source.", + source.display(), + parsed.migrations.from_version, + parsed.migrations.to_version, + source.display(), + ); + } + + let mut front_matter = parsed.front_matter; + + // Lower compact repos: into the internal Repository/checkout pair used by + // build_execution_context. The migration framework has already converted + // any legacy `repositories:` + `checkout:` shapes into `repos:`. + let (resolved_repos, resolved_checkout) = compile::lower_repos(&front_matter.repos) .with_context(|| "Failed to resolve repository configuration")?; front_matter.repositories = resolved_repos; front_matter.checkout = resolved_checkout; From 4d994b75865d839b7ed9f8846f1bd83ae48e716d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 8 May 2026 19:57:07 +0000 Subject: [PATCH 08/12] feat(compile): add compact `repos:` front-matter syntax Replace the verbose `repositories:` + `checkout:` pair with a single `repos:` block. Each entry can be a string shorthand ("org/repo" or "alias=org/repo") or an object with name/alias/type/ref/checkout fields. Checkout defaults to true. Legacy fields remain supported with a deprecation warning; mixing the two is rejected at compile time. Agent-Logs-Url: https://github.com/githubnext/ado-aw/sessions/c9ae0fbb-1f1c-4874-b990-16fd3b4bc3e3 Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com> --- docs/front-matter.md | 95 +++++++++--- src/compile/common.rs | 353 +++++++++++++++++++++++++++++++++++++++++- src/compile/mod.rs | 13 +- src/compile/types.rs | 104 ++++++++++++- src/main.rs | 8 +- 5 files changed, 550 insertions(+), 23 deletions(-) diff --git a/docs/front-matter.md b/docs/front-matter.md index cdcf9eb0..5f90c971 100644 --- a/docs/front-matter.md +++ b/docs/front-matter.md @@ -16,20 +16,17 @@ engine: copilot # Engine identifier. Defaults to copilot. Currently only 'copilo # id: copilot # model: claude-opus-4.7 # timeout-minutes: 30 -workspace: repo # Optional: "root", "repo" (alias: "self"), or a checked-out repository alias. If not specified, defaults to "root" when no additional repositories are listed in `checkout:`, and to "repo" when one or more additional repos are checked out. See "Workspace Defaults" below. +workspace: repo # Optional: "root", "repo" (alias: "self"), or a checked-out repository alias. If not specified, defaults to "root" when no additional repositories are listed in `repos:`, and to "repo" when one or more additional repos are checked out. See "Workspace Defaults" below. pool: AZS-1ES-L-MMS-ubuntu-22.04 # Agent pool name (string format). Defaults to AZS-1ES-L-MMS-ubuntu-22.04. # pool: # Alternative object format (required for 1ES if specifying os) # name: AZS-1ES-L-MMS-ubuntu-22.04 # os: linux # Operating system: "linux" or "windows". Defaults to "linux". -repositories: # a list of repository resources available to the pipeline (for pre/post jobs, templates, etc.) - - repository: reponame - type: git - name: my-org/my-repo - - repository: another-repo - type: git - name: my-org/another-repo -checkout: # optional list of repository aliases for the agent to checkout and work with (must be subset of repositories) - - reponame # only checkout reponame, not another-repo +repos: # compact repository declarations (replaces repositories: + checkout:) + - my-org/my-repo # shorthand: alias="my-repo", type=git, ref=refs/heads/main, checkout=true + - reponame=my-org/another-repo # shorthand with explicit alias + - name: my-org/templates # object form for full control + ref: refs/heads/release/2.x + checkout: false # declared as resource only, not checked out by the agent tools: # optional tool configuration bash: ["cat", "ls", "grep"] # bash command allow-list (defaults to safe built-in list) edit: true # enable file editing tool (default: true) @@ -152,19 +149,81 @@ Build the project and run all tests... ## Workspace Defaults The `workspace:` field controls which directory the agent runs in. When it is -not set explicitly, the compiler chooses a default based on the `checkout:` -list: +not set explicitly, the compiler chooses a default based on which repositories +are checked out (entries in `repos:` with `checkout: true`, which is the +default): -- If `checkout:` is empty (i.e. only the pipeline's own repository is checked - out via the implicit `self`), `workspace:` defaults to **`root`** — the - agent runs in the pipeline's working directory root. -- If `checkout:` lists one or more additional repository aliases, - `workspace:` defaults to **`repo`** — the agent runs inside the first - checked-out repository's directory. +- If no additional repositories are checked out (i.e. only the pipeline's own + repository is checked out via the implicit `self`), `workspace:` defaults to + **`root`** — the agent runs in the pipeline's working directory root. +- If one or more additional repositories are checked out, `workspace:` defaults + to **`repo`** — the agent runs inside the trigger repository's directory. Set `workspace:` explicitly to `root`, `repo` (alias `self`), or a specific checked-out repository alias to override this behavior. +## Repositories (`repos:`) + +The `repos:` field provides a compact way to declare additional repository +resources and control which ones the agent checks out. It replaces the legacy +`repositories:` + `checkout:` pair. + +Each entry can be: + +| Form | Syntax | Description | +|------|--------|-------------| +| **Shorthand** | `- org/repo` | Alias derived from last segment, type=git, ref=refs/heads/main, checkout=true | +| **Shorthand with alias** | `- alias=org/repo` | Explicit alias before `=` | +| **Object** | `- name: org/repo` | Full control over all fields | + +Object fields: + +| Field | Default | Description | +|------------|------------------------|-------------| +| `name` | *(required)* | Full `org/repo` name (maps to ADO `name:`) | +| `alias` | last segment of `name` | Repository alias (maps to ADO `repository:`) | +| `type` | `git` | ADO repository resource type | +| `ref` | `refs/heads/main` | Branch or tag reference | +| `checkout` | `true` | Whether the agent job clones this repo | + +### Examples + +Three repos, all checked out (most common case): + +```yaml +repos: + - my-org/tools + - my-org/schemas + - my-org/docs +``` + +Mixed: two checked out, one resource-only (used by templates): + +```yaml +repos: + - my-org/tools + - my-org/schemas + - name: my-org/pipeline-templates + checkout: false +``` + +Custom ref and explicit alias: + +```yaml +repos: + - name: my-org/docs + alias: docs-v2 + ref: refs/heads/release/2.x +``` + +### Legacy syntax (deprecated) + +The legacy `repositories:` + `checkout:` fields still work but emit a +deprecation warning. They cannot be mixed with `repos:`. Migrate by +converting each `repositories:` entry into a `repos:` entry — any entry +that appeared in `checkout:` keeps the default `checkout: true`; any that +did not should set `checkout: false`. + ## Filter Validation The compiler validates filter configurations at compile time and will emit diff --git a/src/compile/common.rs b/src/compile/common.rs index a79b8589..338824b1 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -4,7 +4,7 @@ use anyhow::{Context, Result}; use std::collections::{HashMap, HashSet}; use std::path::Path; -use super::types::{FrontMatter, OnConfig, PipelineParameter, Repository}; +use super::types::{FrontMatter, OnConfig, PipelineParameter, Repository, ReposItem}; use super::extensions::{CompilerExtension, Extension, McpgServerConfig, McpgGatewayConfig, McpgConfig, CompileContext}; use crate::compile::types::McpConfig; use crate::fuzzy_schedule; @@ -574,6 +574,140 @@ pub fn generate_checkout_self() -> String { "- checkout: self".to_string() } +// ────────────────────────────────────────────────────────────────────────────── +// Compact `repos:` lowering +// ────────────────────────────────────────────────────────────────────────────── + +/// Lower a `repos:` list into the internal `(Vec, Vec)` pair +/// consumed by the rest of the compiler. Also validates aliases for collisions. +pub fn lower_repos(items: &[ReposItem]) -> Result<(Vec, Vec)> { + let mut repositories: Vec = Vec::new(); + let mut checkout: Vec = Vec::new(); + let mut seen_aliases: HashSet = HashSet::new(); + + for item in items { + let (name, alias, repo_type, repo_ref, do_checkout) = match item { + ReposItem::Shorthand(s) => { + let (alias, name) = parse_shorthand(s)?; + (name, alias, "git".to_string(), "refs/heads/main".to_string(), true) + } + ReposItem::Full(entry) => { + let alias = match &entry.alias { + Some(a) => a.clone(), + None => derive_alias(&entry.name)?, + }; + ( + entry.name.clone(), + alias, + entry.repo_type.clone(), + entry.repo_ref.clone(), + entry.checkout, + ) + } + }; + + // Reject duplicate aliases + if !seen_aliases.insert(alias.clone()) { + anyhow::bail!( + "Duplicate repository alias '{}' in repos. \ + Use the `alias` field (or `alias=org/repo` shorthand) to disambiguate.", + alias + ); + } + + // Reject reserved names + if RESERVED_WORKSPACE_NAMES.contains(&alias.as_str()) { + anyhow::bail!( + "Repository alias '{}' is reserved by the 'workspace:' resolver ({:?}). \ + Rename the alias to avoid ambiguity.", + alias, + RESERVED_WORKSPACE_NAMES + ); + } + + repositories.push(Repository { + repository: alias.clone(), + repo_type, + name, + repo_ref, + }); + + if do_checkout { + checkout.push(alias); + } + } + + Ok((repositories, checkout)) +} + +/// Parse a shorthand string: `"org/repo"` → (derived alias, name), or +/// `"alias=org/repo"` → (alias, name). +fn parse_shorthand(s: &str) -> Result<(String, String)> { + if let Some((alias, name)) = s.split_once('=') { + let alias = alias.trim().to_string(); + let name = name.trim().to_string(); + if alias.is_empty() { + anyhow::bail!("repos shorthand '{}' has an empty alias before '='", s); + } + if name.is_empty() { + anyhow::bail!("repos shorthand '{}' has an empty name after '='", s); + } + Ok((alias, name)) + } else { + let alias = derive_alias(s)?; + Ok((alias, s.to_string())) + } +} + +/// Derive the alias from a full `org/repo` name (last path segment). +fn derive_alias(name: &str) -> Result { + let alias = name + .rsplit('/') + .next() + .unwrap_or(name) + .to_string(); + if alias.is_empty() { + anyhow::bail!( + "Cannot derive a repository alias from '{}'. \ + Provide an explicit `alias` field.", + name + ); + } + Ok(alias) +} + +/// Resolve the `repos:` / legacy `repositories:` + `checkout:` fields in a +/// `FrontMatter` into the canonical `(Vec, Vec)` pair. +/// +/// - If `repos:` is non-empty, the legacy fields must be empty (mixing is rejected). +/// - If `repos:` is empty, legacy fields are used as-is (with a deprecation warning +/// when they are non-empty). +/// - If both are empty, returns `(vec![], vec![])`. +pub fn resolve_repos(front_matter: &FrontMatter) -> Result<(Vec, Vec)> { + let has_new = !front_matter.repos.is_empty(); + let has_legacy = !front_matter.repositories.is_empty() || !front_matter.checkout.is_empty(); + + if has_new && has_legacy { + anyhow::bail!( + "Cannot mix `repos:` with legacy `repositories:` / `checkout:`. \ + Migrate to `repos:` or remove it to keep using the legacy fields." + ); + } + + if has_new { + lower_repos(&front_matter.repos) + } else { + if has_legacy { + eprintln!( + "Warning: `repositories:` and `checkout:` are deprecated. \ + Use the compact `repos:` syntax instead. \ + See docs/front-matter.md for migration guidance." + ); + } + Ok((front_matter.repositories.clone(), front_matter.checkout.clone())) + } +} + /// Names that are reserved by the `workspace:` resolver and therefore cannot /// be used as repository aliases / `checkout:` entries. If a user defines a /// repository named `repo` and writes `workspace: repo`, the special-cased @@ -5942,4 +6076,221 @@ mod tests { assert!(out.contains("name: org/repo-a"), "out: {out}"); assert!(out.contains("ref: refs/heads/develop"), "out: {out}"); } + + // ────────────────────────────────────────────────────────────────────── + // Tests for compact `repos:` lowering + // ────────────────────────────────────────────────────────────────────── + + use super::{lower_repos, resolve_repos, parse_shorthand, derive_alias}; + use crate::compile::types::{ReposItem, RepoEntry}; + + #[test] + fn test_repos_shorthand_simple() { + let items = vec![ReposItem::Shorthand("my-org/my-repo".to_string())]; + let (repos, checkout) = lower_repos(&items).unwrap(); + assert_eq!(repos.len(), 1); + assert_eq!(repos[0].repository, "my-repo"); + assert_eq!(repos[0].name, "my-org/my-repo"); + assert_eq!(repos[0].repo_type, "git"); + assert_eq!(repos[0].repo_ref, "refs/heads/main"); + assert_eq!(checkout, vec!["my-repo"]); + } + + #[test] + fn test_repos_shorthand_with_alias() { + let items = vec![ReposItem::Shorthand("schemas=my-org/internal-schemas".to_string())]; + let (repos, checkout) = lower_repos(&items).unwrap(); + assert_eq!(repos.len(), 1); + assert_eq!(repos[0].repository, "schemas"); + assert_eq!(repos[0].name, "my-org/internal-schemas"); + assert_eq!(checkout, vec!["schemas"]); + } + + #[test] + fn test_repos_object_form_defaults() { + let items = vec![ReposItem::Full(RepoEntry { + name: "my-org/docs".to_string(), + alias: None, + repo_type: "git".to_string(), + repo_ref: "refs/heads/main".to_string(), + checkout: true, + })]; + let (repos, checkout) = lower_repos(&items).unwrap(); + assert_eq!(repos[0].repository, "docs"); + assert_eq!(repos[0].name, "my-org/docs"); + assert_eq!(checkout, vec!["docs"]); + } + + #[test] + fn test_repos_object_form_no_checkout() { + let items = vec![ReposItem::Full(RepoEntry { + name: "my-org/big-monorepo".to_string(), + alias: None, + repo_type: "git".to_string(), + repo_ref: "refs/heads/main".to_string(), + checkout: false, + })]; + let (repos, checkout) = lower_repos(&items).unwrap(); + assert_eq!(repos.len(), 1); + assert_eq!(repos[0].repository, "big-monorepo"); + assert!(checkout.is_empty()); + } + + #[test] + fn test_repos_object_form_custom_ref() { + let items = vec![ReposItem::Full(RepoEntry { + name: "my-org/docs".to_string(), + alias: Some("docs-v2".to_string()), + repo_type: "git".to_string(), + repo_ref: "refs/heads/release/2.x".to_string(), + checkout: true, + })]; + let (repos, checkout) = lower_repos(&items).unwrap(); + assert_eq!(repos[0].repository, "docs-v2"); + assert_eq!(repos[0].repo_ref, "refs/heads/release/2.x"); + assert_eq!(checkout, vec!["docs-v2"]); + } + + #[test] + fn test_repos_rejects_duplicate_aliases() { + let items = vec![ + ReposItem::Shorthand("org/tools".to_string()), + ReposItem::Shorthand("other-org/tools".to_string()), + ]; + let err = lower_repos(&items).unwrap_err(); + assert!(err.to_string().contains("Duplicate repository alias 'tools'"), "{err}"); + } + + #[test] + fn test_repos_rejects_reserved_alias() { + let items = vec![ReposItem::Shorthand("org/self".to_string())]; + let err = lower_repos(&items).unwrap_err(); + assert!(err.to_string().contains("reserved"), "{err}"); + + let items = vec![ReposItem::Shorthand("org/repo".to_string())]; + let err = lower_repos(&items).unwrap_err(); + assert!(err.to_string().contains("reserved"), "{err}"); + } + + #[test] + fn test_repos_multiple_mixed() { + let items = vec![ + ReposItem::Shorthand("my-org/tools".to_string()), + ReposItem::Shorthand("schemas=my-org/internal-schemas".to_string()), + ReposItem::Full(RepoEntry { + name: "my-org/templates".to_string(), + alias: None, + repo_type: "git".to_string(), + repo_ref: "refs/heads/main".to_string(), + checkout: false, + }), + ]; + let (repos, checkout) = lower_repos(&items).unwrap(); + assert_eq!(repos.len(), 3); + assert_eq!(checkout, vec!["tools", "schemas"]); + assert_eq!(repos[2].repository, "templates"); + } + + #[test] + fn test_parse_shorthand_simple() { + let (alias, name) = parse_shorthand("org/my-repo").unwrap(); + assert_eq!(alias, "my-repo"); + assert_eq!(name, "org/my-repo"); + } + + #[test] + fn test_parse_shorthand_with_equals() { + let (alias, name) = parse_shorthand("custom=org/my-repo").unwrap(); + assert_eq!(alias, "custom"); + assert_eq!(name, "org/my-repo"); + } + + #[test] + fn test_parse_shorthand_empty_alias_rejected() { + let err = parse_shorthand("=org/repo").unwrap_err(); + assert!(err.to_string().contains("empty alias"), "{err}"); + } + + #[test] + fn test_derive_alias_basic() { + assert_eq!(derive_alias("org/my-repo").unwrap(), "my-repo"); + assert_eq!(derive_alias("my-repo").unwrap(), "my-repo"); + assert_eq!(derive_alias("a/b/c").unwrap(), "c"); + } + + #[test] + fn test_resolve_repos_rejects_mixing() { + use crate::compile::types::FrontMatter; + let yaml = r#" +name: "test" +description: "test" +repos: + - my-org/tools +repositories: + - repository: foo + type: git + name: org/foo +"#; + let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); + let err = resolve_repos(&fm).unwrap_err(); + assert!(err.to_string().contains("Cannot mix"), "{err}"); + } + + #[test] + fn test_resolve_repos_empty() { + use crate::compile::types::FrontMatter; + let yaml = r#" +name: "test" +description: "test" +"#; + let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); + let (repos, checkout) = resolve_repos(&fm).unwrap(); + assert!(repos.is_empty()); + assert!(checkout.is_empty()); + } + + #[test] + fn test_resolve_repos_compact_syntax() { + use crate::compile::types::FrontMatter; + let yaml = r#" +name: "test" +description: "test" +repos: + - my-org/tools + - schemas=my-org/internal-schemas + - name: my-org/docs + ref: refs/heads/v2 + checkout: false +"#; + let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); + let (repos, checkout) = resolve_repos(&fm).unwrap(); + assert_eq!(repos.len(), 3); + assert_eq!(repos[0].repository, "tools"); + assert_eq!(repos[0].name, "my-org/tools"); + assert_eq!(repos[1].repository, "schemas"); + assert_eq!(repos[1].name, "my-org/internal-schemas"); + assert_eq!(repos[2].repository, "docs"); + assert_eq!(repos[2].repo_ref, "refs/heads/v2"); + assert_eq!(checkout, vec!["tools", "schemas"]); + } + + #[test] + fn test_resolve_repos_legacy_compat() { + use crate::compile::types::FrontMatter; + let yaml = r#" +name: "test" +description: "test" +repositories: + - repository: tools + type: git + name: my-org/tools +checkout: + - tools +"#; + let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); + let (repos, checkout) = resolve_repos(&fm).unwrap(); + assert_eq!(repos.len(), 1); + assert_eq!(repos[0].repository, "tools"); + assert_eq!(checkout, vec!["tools"]); + } } diff --git a/src/compile/mod.rs b/src/compile/mod.rs index 63bfa15a..58cbfbf7 100644 --- a/src/compile/mod.rs +++ b/src/compile/mod.rs @@ -7,6 +7,7 @@ //! - **1ES**: Integration with 1ES Pipeline Templates for SDL compliance mod common; +pub(crate) use common::resolve_repos; pub mod extensions; pub(crate) mod filter_ir; mod gitattributes; @@ -150,9 +151,14 @@ async fn compile_pipeline_inner( debug!("Target: {:?}", front_matter.target); debug!("Engine: {} (model: {})", front_matter.engine.engine_id(), front_matter.engine.model().unwrap_or("default")); debug!("Schedule: {:?}", front_matter.schedule()); - debug!("Repositories: {}", front_matter.repositories.len()); debug!("MCP servers configured: {}", front_matter.mcp_servers.len()); + // Resolve repos: new compact syntax or legacy repositories: + checkout: + let (resolved_repos, resolved_checkout) = common::resolve_repos(&front_matter)?; + front_matter.repositories = resolved_repos; + front_matter.checkout = resolved_checkout; + debug!("Repositories: {}", front_matter.repositories.len()); + // Validate checkout list against repositories common::validate_checkout_list(&front_matter.repositories, &front_matter.checkout)?; @@ -529,6 +535,11 @@ pub async fn check_pipeline(pipeline_path: &str) -> Result<()> { use crate::sanitize::SanitizeConfig; front_matter.sanitize_config_fields(); + // Resolve repos (compact or legacy) + let (resolved_repos, resolved_checkout) = common::resolve_repos(&front_matter)?; + front_matter.repositories = resolved_repos; + front_matter.checkout = resolved_checkout; + common::validate_checkout_list(&front_matter.repositories, &front_matter.checkout)?; let compiler: Box = match front_matter.target { diff --git a/src/compile/types.rs b/src/compile/types.rs index 46ddb8c0..66594225 100644 --- a/src/compile/types.rs +++ b/src/compile/types.rs @@ -602,10 +602,14 @@ pub struct FrontMatter { /// Runtime configuration for language environments (e.g., Lean 4) #[serde(default)] pub runtimes: Option, - /// Additional repository resources + /// Compact repository declarations (new unified syntax). + /// Each entry declares a repository resource and optionally whether to check it out. + #[serde(default)] + pub repos: Vec, + /// Additional repository resources (legacy — use `repos:` instead) #[serde(default)] pub repositories: Vec, - /// Repositories to checkout (subset of repositories) + /// Repositories to checkout (legacy — use `repos:` instead; subset of repositories) #[serde(default)] pub checkout: Vec, /// MCP server configurations @@ -697,6 +701,9 @@ impl SanitizeConfigTrait for FrontMatter { if let Some(ref mut r) = self.runtimes { r.sanitize_config_fields(); } + for item in &mut self.repos { + item.sanitize(); + } for repo in &mut self.repositories { repo.sanitize_config_fields(); } @@ -795,6 +802,99 @@ fn default_ref() -> String { "refs/heads/main".to_string() } +// ────────────────────────────────────────────────────────────────────────────── +// Compact `repos:` syntax — a single block that replaces both `repositories:` +// and `checkout:` with sensible defaults. +// ────────────────────────────────────────────────────────────────────────────── + +/// Object form for a `repos:` entry. +#[derive(Debug, Deserialize, Clone)] +pub struct RepoEntry { + /// Full repo name in the form `org/repo` (maps to ADO `name:`). + pub name: String, + /// Optional alias (maps to ADO `repository:`). Defaults to the last segment of `name`. + #[serde(default)] + pub alias: Option, + /// ADO repository resource type. Defaults to `"git"`. + #[serde(default = "default_repo_type", rename = "type")] + pub repo_type: String, + /// Branch/tag ref. Defaults to `"refs/heads/main"`. + #[serde(default = "default_ref", rename = "ref")] + pub repo_ref: String, + /// Whether the agent job checks out this repository. Defaults to `true`. + #[serde(default = "default_checkout")] + pub checkout: bool, +} + +fn default_repo_type() -> String { + "git".to_string() +} + +fn default_checkout() -> bool { + true +} + +/// A single item in the `repos:` list — either a string shorthand or an object. +#[derive(Debug, Clone)] +pub enum ReposItem { + /// String shorthand: `"org/repo"` or `"alias=org/repo"`. + Shorthand(String), + /// Full object form with explicit fields. + Full(RepoEntry), +} + +impl<'de> Deserialize<'de> for ReposItem { + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + use serde::de; + + struct ReposItemVisitor; + + impl<'de> de::Visitor<'de> for ReposItemVisitor { + type Value = ReposItem; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("a string shorthand (\"org/repo\" or \"alias=org/repo\") or an object with at least a `name` field") + } + + fn visit_str(self, value: &str) -> std::result::Result { + Ok(ReposItem::Shorthand(value.to_string())) + } + + fn visit_map(self, map: M) -> std::result::Result + where + M: de::MapAccess<'de>, + { + let entry = RepoEntry::deserialize(de::value::MapAccessDeserializer::new(map))?; + Ok(ReposItem::Full(entry)) + } + } + + deserializer.deserialize_any(ReposItemVisitor) + } +} + +impl ReposItem { + /// Sanitize all user-provided fields through `sanitize_config`. + pub fn sanitize(&mut self) { + match self { + ReposItem::Shorthand(s) => { + *s = crate::sanitize::sanitize_config(s); + } + ReposItem::Full(entry) => { + entry.name = crate::sanitize::sanitize_config(&entry.name); + if let Some(ref mut a) = entry.alias { + *a = crate::sanitize::sanitize_config(a); + } + entry.repo_type = crate::sanitize::sanitize_config(&entry.repo_type); + entry.repo_ref = crate::sanitize::sanitize_config(&entry.repo_ref); + } + } + } +} + /// MCP configuration - can be `true` for simple enablement or an object with options #[derive(Debug, Deserialize, Clone)] #[serde(untagged)] diff --git a/src/main.rs b/src/main.rs index fa6253c8..1ee74b93 100644 --- a/src/main.rs +++ b/src/main.rs @@ -244,7 +244,13 @@ async fn run_execute( ); } - let front_matter = parsed.front_matter; + let mut front_matter = parsed.front_matter; + + // Resolve compact repos: syntax into the legacy fields for execution + let (resolved_repos, resolved_checkout) = compile::resolve_repos(&front_matter) + .with_context(|| "Failed to resolve repository configuration")?; + front_matter.repositories = resolved_repos; + front_matter.checkout = resolved_checkout; println!("Loaded tool configs from: {}", source.display()); From 046775fa1ad9f97542042dda273bd3c5183a60b7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 8 May 2026 20:01:39 +0000 Subject: [PATCH 09/12] fix(compile): address review feedback for repos: syntax - Handle trailing slash in derive_alias (trim before splitting) - Add tests for reserved alias via explicit alias (shorthand & object) - Add test for empty name after '=' in shorthand - Add test for trailing slash alias derivation Agent-Logs-Url: https://github.com/githubnext/ado-aw/sessions/c9ae0fbb-1f1c-4874-b990-16fd3b4bc3e3 Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com> --- src/compile/common.rs | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/src/compile/common.rs b/src/compile/common.rs index 338824b1..895bfc81 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -661,10 +661,12 @@ fn parse_shorthand(s: &str) -> Result<(String, String)> { /// Derive the alias from a full `org/repo` name (last path segment). fn derive_alias(name: &str) -> Result { - let alias = name + // Trim trailing slashes to handle "org/repo/" gracefully + let trimmed = name.trim_end_matches('/'); + let alias = trimmed .rsplit('/') .next() - .unwrap_or(name) + .unwrap_or(trimmed) .to_string(); if alias.is_empty() { anyhow::bail!( @@ -6170,6 +6172,22 @@ mod tests { let items = vec![ReposItem::Shorthand("org/repo".to_string())]; let err = lower_repos(&items).unwrap_err(); assert!(err.to_string().contains("reserved"), "{err}"); + + // Reserved via explicit alias in shorthand form + let items = vec![ReposItem::Shorthand("self=org/some-repo".to_string())]; + let err = lower_repos(&items).unwrap_err(); + assert!(err.to_string().contains("reserved"), "{err}"); + + // Reserved via explicit alias in object form + let items = vec![ReposItem::Full(RepoEntry { + name: "org/fine-repo".to_string(), + alias: Some("root".to_string()), + repo_type: "git".to_string(), + repo_ref: "refs/heads/main".to_string(), + checkout: true, + })]; + let err = lower_repos(&items).unwrap_err(); + assert!(err.to_string().contains("reserved"), "{err}"); } #[test] @@ -6211,6 +6229,12 @@ mod tests { assert!(err.to_string().contains("empty alias"), "{err}"); } + #[test] + fn test_parse_shorthand_empty_name_rejected() { + let err = parse_shorthand("alias=").unwrap_err(); + assert!(err.to_string().contains("empty name"), "{err}"); + } + #[test] fn test_derive_alias_basic() { assert_eq!(derive_alias("org/my-repo").unwrap(), "my-repo"); @@ -6218,6 +6242,12 @@ mod tests { assert_eq!(derive_alias("a/b/c").unwrap(), "c"); } + #[test] + fn test_derive_alias_trailing_slash() { + // Trailing slash should be trimmed gracefully + assert_eq!(derive_alias("org/repo/").unwrap(), "repo"); + } + #[test] fn test_resolve_repos_rejects_mixing() { use crate::compile::types::FrontMatter; From 6058baaa409ab960998e11f16b95b9ad537ca2ae Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 9 May 2026 07:27:23 +0000 Subject: [PATCH 10/12] feat(compile): add repos_unified codemod for legacy repositories: + checkout: rewrite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the previous front-matter migration framework approach with the codemod framework merged in #476. The new 0001_repos_unified codemod detects the legacy repositories: + checkout: shape in raw YAML and rewrites it in place to the unified repos: shape before typed deserialization. Idempotent on already-current sources. Drops the typed repositories: / checkout: fields from FrontMatter (now #[serde(skip)] Rust-only fields populated by lower_repos); the codemod runs first so user YAML never reaches typed deserialization with legacy keys. resolve_repos is simplified accordingly — the codemod owns the mixing/legacy concerns. Updates docs/front-matter.md to point at docs/codemods.md. Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com> --- docs/front-matter.md | 16 +- src/compile/codemods/0001_repos_unified.rs | 421 +++++++++++++++++++++ src/compile/codemods/helpers.rs | 5 - src/compile/codemods/mod.rs | 22 +- src/compile/common.rs | 79 +--- src/compile/types.rs | 15 +- tests/compiler_tests.rs | 4 +- tests/fixtures/complete-agent.md | 64 ++-- 8 files changed, 502 insertions(+), 124 deletions(-) create mode 100644 src/compile/codemods/0001_repos_unified.rs diff --git a/docs/front-matter.md b/docs/front-matter.md index 5f90c971..aa81206a 100644 --- a/docs/front-matter.md +++ b/docs/front-matter.md @@ -216,13 +216,15 @@ repos: ref: refs/heads/release/2.x ``` -### Legacy syntax (deprecated) - -The legacy `repositories:` + `checkout:` fields still work but emit a -deprecation warning. They cannot be mixed with `repos:`. Migrate by -converting each `repositories:` entry into a `repos:` entry — any entry -that appeared in `checkout:` keeps the default `checkout: true`; any that -did not should set `checkout: false`. +### Legacy syntax (auto-rewritten) + +The legacy `repositories:` + `checkout:` fields are auto-converted to +`repos:` by the [`repos_unified` codemod](codemods.md). On the next +`ado-aw compile`, any source that still uses the legacy fields is +rewritten in place to the new shape — each `repositories:` entry +becomes a `repos:` entry, with `checkout: false` added for entries +that weren't listed under `checkout:`. Mixing the legacy fields with +an existing `repos:` block is rejected; pick one shape. ## Filter Validation diff --git a/src/compile/codemods/0001_repos_unified.rs b/src/compile/codemods/0001_repos_unified.rs new file mode 100644 index 00000000..8fcc220c --- /dev/null +++ b/src/compile/codemods/0001_repos_unified.rs @@ -0,0 +1,421 @@ +//! `repositories:` + `checkout:` → unified `repos:` +//! +//! Before this codemod, additional repository resources had to be +//! declared twice: once under `repositories:` (mirroring ADO's +//! `resources.repositories` schema) and again under `checkout:` (the +//! alias list deciding which to actually clone). This codemod folds +//! both into a single `repos:` block where each entry carries its own +//! `checkout: true|false` flag (defaulting to `true`). +//! +//! Conversion rules: +//! - Each `repositories:` entry becomes a `repos:` entry preserving +//! `repository` (→ `alias`), `name`, `type`, and `ref`. +//! - Entries listed in `checkout:` keep `checkout: true` (the default, +//! so the field is omitted on output). +//! - Entries NOT listed in `checkout:` get an explicit `checkout: false`. +//! - `checkout:` aliases that don't match any `repositories:` entry are +//! rejected (the legacy compiler also rejected these via +//! `validate_checkout_list`). +//! - Mixing the legacy fields with an already-present `repos:` is +//! rejected — the user must pick one shape. +//! - Sources with neither legacy field are no-ops (idempotent). + +use anyhow::{bail, Result}; +use serde_yaml::{Mapping, Value}; + +use super::{take_key, Codemod, CodemodContext}; + +pub static CODEMOD: Codemod = Codemod { + id: "repos_unified", + summary: "repositories: + checkout: -> unified repos:", + introduced_in: "0.28.0", + apply: apply_codemod, +}; + +fn apply_codemod(fm: &mut Mapping, _ctx: &CodemodContext) -> Result { + let has_repos = fm.contains_key(Value::String("repos".to_string())); + let has_repositories = fm.contains_key(Value::String("repositories".to_string())); + let has_checkout = fm.contains_key(Value::String("checkout".to_string())); + + // No legacy fields → already in the new shape (or doesn't use any + // additional repos at all). No-op (idempotent). + if !has_repositories && !has_checkout { + return Ok(false); + } + + // Mixing the legacy fields with the new `repos:` is ambiguous — + // refuse rather than guess which one wins. + if has_repos { + bail!( + "front matter has both the new `repos:` field and the legacy \ + `repositories:`/`checkout:` fields. Pick one: remove the \ + legacy fields to use `repos:`, or remove `repos:` to let \ + this codemod convert the legacy fields." + ); + } + + // `checkout:` without any `repositories:` is incoherent — the + // aliases would have nothing to refer to. The legacy compiler + // would have failed `validate_checkout_list` for the same reason. + if has_checkout && !has_repositories { + bail!( + "front matter has `checkout:` but no `repositories:`. \ + Either remove `checkout:` or add the corresponding \ + `repositories:` entries (then re-run compile to migrate \ + to `repos:`)." + ); + } + + let repositories = take_key(fm, "repositories").expect("checked above"); + let checkout = take_key(fm, "checkout"); + + let repositories_seq = match repositories { + Value::Sequence(s) => s, + Value::Null => Vec::new(), + other => bail!( + "front matter `repositories:` must be a sequence, got {}", + describe(&other) + ), + }; + + // Collect the checkout alias allow-list. Order doesn't matter for + // membership; we preserve `repositories:` order in the output. + let checkout_aliases: Vec = match checkout { + None | Some(Value::Null) => Vec::new(), + Some(Value::Sequence(s)) => { + let mut out = Vec::with_capacity(s.len()); + for v in s { + match v { + Value::String(name) => out.push(name), + other => bail!( + "front matter `checkout:` entries must be strings, got {}", + describe(&other) + ), + } + } + out + } + Some(other) => bail!( + "front matter `checkout:` must be a sequence of strings, got {}", + describe(&other) + ), + }; + + // Track which checkout aliases we've matched so we can flag + // dangling references (alias listed in checkout but absent from + // repositories). + let mut matched: std::collections::BTreeSet = + std::collections::BTreeSet::new(); + + let mut repos_seq: Vec = Vec::with_capacity(repositories_seq.len()); + for repo in repositories_seq { + let mut repo_map = match repo { + Value::Mapping(m) => m, + other => bail!( + "front matter `repositories:` entries must be mappings, got {}", + describe(&other) + ), + }; + + // The legacy `repository:` key becomes the new `alias:` key. + let alias_value = repo_map.remove(Value::String("repository".to_string())); + let alias_str = match alias_value.as_ref() { + Some(Value::String(s)) => Some(s.clone()), + Some(other) => bail!( + "front matter `repositories:` entry has non-string `repository:` field ({}); \ + manual migration required", + describe(other) + ), + None => None, + }; + + // Build the new entry. Preserve insertion order: + // alias, type, name, ref, checkout. + let mut new_entry = Mapping::new(); + if let Some(alias) = alias_str.as_deref() { + new_entry.insert( + Value::String("alias".to_string()), + Value::String(alias.to_string()), + ); + } + if let Some(v) = repo_map.remove(Value::String("type".to_string())) { + new_entry.insert(Value::String("type".to_string()), v); + } + if let Some(v) = repo_map.remove(Value::String("name".to_string())) { + new_entry.insert(Value::String("name".to_string()), v); + } else { + bail!( + "front matter `repositories:` entry is missing the required `name:` field; \ + manual migration required" + ); + } + if let Some(v) = repo_map.remove(Value::String("ref".to_string())) { + new_entry.insert(Value::String("ref".to_string()), v); + } + // Carry over any unknown keys verbatim so the codemod is + // forward-compatible with future ADO `resources.repositories` + // fields we don't yet model. + for (k, v) in repo_map { + new_entry.insert(k, v); + } + + // Determine checkout flag. If `checkout:` was absent entirely, + // legacy semantics treat all repositories as resource-only + // (the agent job didn't clone any). If `checkout:` was + // present, only listed aliases get cloned. + let do_checkout = if !has_checkout { + // Legacy: no `checkout:` block at all means nothing was + // cloned by the agent. + false + } else if let Some(alias) = alias_str.as_deref() { + let listed = checkout_aliases.iter().any(|a| a == alias); + if listed { + matched.insert(alias.to_string()); + } + listed + } else { + // Anonymous entry (no `repository:` alias) cannot be + // referenced from `checkout:` — treat as resource-only. + false + }; + + // Only emit the `checkout` field when it deviates from the + // default of `true`. Keeps rewritten output compact. + if !do_checkout { + new_entry.insert( + Value::String("checkout".to_string()), + Value::Bool(false), + ); + } + + repos_seq.push(Value::Mapping(new_entry)); + } + + // Surface dangling checkout aliases (listed but no matching repo). + for alias in &checkout_aliases { + if !matched.contains(alias) { + bail!( + "front matter `checkout:` references alias `{}` that does not appear \ + in `repositories:`; manual migration required", + alias + ); + } + } + + // Insert `repos:` only when we actually have entries; an empty + // `repositories:` should not produce an empty `repos:` block. + if !repos_seq.is_empty() { + fm.insert( + Value::String("repos".to_string()), + Value::Sequence(repos_seq), + ); + } + + Ok(true) +} + +fn describe(v: &Value) -> &'static str { + match v { + Value::Null => "null", + Value::Bool(_) => "bool", + Value::Number(_) => "number", + Value::String(_) => "string", + Value::Sequence(_) => "sequence", + Value::Mapping(_) => "mapping", + Value::Tagged(_) => "tagged", + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn run(input: &str) -> Mapping { + let mut m: Mapping = serde_yaml::from_str(input).unwrap(); + let changed = apply_codemod(&mut m, &CodemodContext {}).expect("apply"); + assert!(changed, "expected codemod to fire on input:\n{}", input); + m + } + + fn run_noop(input: &str) -> Mapping { + let mut m: Mapping = serde_yaml::from_str(input).unwrap(); + let changed = apply_codemod(&mut m, &CodemodContext {}).expect("apply"); + assert!(!changed, "expected codemod to be a no-op on input:\n{}", input); + m + } + + fn run_err(input: &str) -> String { + let mut m: Mapping = serde_yaml::from_str(input).unwrap(); + format!( + "{}", + apply_codemod(&mut m, &CodemodContext {}).unwrap_err() + ) + } + + fn repos(m: &Mapping) -> &Vec { + m.get(Value::String("repos".into())) + .expect("repos key") + .as_sequence() + .expect("repos sequence") + } + + #[test] + fn converts_full_legacy_block_with_checkout_subset() { + let after = run( + "name: x\n\ + repositories:\n\ + - repository: tools\n type: git\n name: my-org/tools\n\ + - repository: schemas\n type: git\n name: my-org/schemas\n\ + - repository: docs\n type: git\n name: my-org/docs\n\ + checkout:\n- tools\n- schemas\n", + ); + // Legacy keys removed + assert!(!after.contains_key(Value::String("repositories".into()))); + assert!(!after.contains_key(Value::String("checkout".into()))); + + let r = repos(&after); + assert_eq!(r.len(), 3); + + let r0 = r[0].as_mapping().unwrap(); + assert_eq!(r0.get(Value::String("alias".into())).unwrap().as_str(), Some("tools")); + assert_eq!(r0.get(Value::String("name".into())).unwrap().as_str(), Some("my-org/tools")); + assert_eq!(r0.get(Value::String("type".into())).unwrap().as_str(), Some("git")); + // checked out -> default, no explicit `checkout:` key. + assert!(r0.get(Value::String("checkout".into())).is_none()); + + let r2 = r[2].as_mapping().unwrap(); + // `docs` is NOT in checkout list -> explicit `checkout: false`. + assert_eq!( + r2.get(Value::String("checkout".into())), + Some(&Value::Bool(false)) + ); + } + + #[test] + fn converts_repositories_only_to_resource_only_entries() { + // `repositories:` without `checkout:` means no entry was + // cloned by the agent in the legacy semantics. + let after = run( + "name: x\n\ + repositories:\n\ + - repository: tpl\n type: git\n name: org/tpl\n", + ); + let r = repos(&after); + assert_eq!(r.len(), 1); + assert_eq!( + r[0].as_mapping().unwrap().get(Value::String("checkout".into())), + Some(&Value::Bool(false)), + "without an explicit checkout list, repos default to resource-only" + ); + } + + #[test] + fn preserves_ref_field() { + let after = run( + "name: x\n\ + repositories:\n\ + - repository: docs\n type: git\n name: org/docs\n ref: refs/heads/release/2.x\n\ + checkout: [docs]\n", + ); + let r = repos(&after); + let entry = r[0].as_mapping().unwrap(); + assert_eq!( + entry.get(Value::String("ref".into())).unwrap().as_str(), + Some("refs/heads/release/2.x") + ); + } + + #[test] + fn rejects_mixing_repos_and_legacy_fields() { + let err = run_err( + "name: x\n\ + repos:\n- org/foo\n\ + repositories:\n- repository: bar\n type: git\n name: org/bar\n", + ); + assert!(err.contains("Pick one"), "got: {}", err); + } + + #[test] + fn rejects_checkout_without_repositories() { + let err = run_err("name: x\ncheckout: [foo]\n"); + assert!(err.contains("`checkout:` but no `repositories:`"), "got: {}", err); + } + + #[test] + fn rejects_dangling_checkout_alias() { + let err = run_err( + "name: x\n\ + repositories:\n- repository: a\n type: git\n name: org/a\n\ + checkout: [b]\n", + ); + assert!(err.contains("does not appear in `repositories:`"), "got: {}", err); + } + + #[test] + fn no_legacy_fields_is_noop() { + let after = run_noop("name: x\ndescription: y\n"); + assert!(!after.contains_key(Value::String("repos".into()))); + assert!(!after.contains_key(Value::String("repositories".into()))); + assert!(!after.contains_key(Value::String("checkout".into()))); + } + + #[test] + fn already_using_repos_alone_is_noop() { + // Idempotency: a file with only `repos:` (no legacy fields) is + // a no-op. + let after = run_noop( + "name: x\nrepos:\n- my-org/tools\n", + ); + let r = repos(&after); + assert_eq!(r.len(), 1); + assert_eq!(r[0].as_str(), Some("my-org/tools")); + } + + #[test] + fn empty_repositories_sequence_does_not_emit_repos_key() { + let after = run("name: x\nrepositories: []\n"); + assert!(!after.contains_key(Value::String("repos".into()))); + } + + #[test] + fn rejects_non_mapping_repository_entry() { + let err = run_err( + "name: x\nrepositories:\n- a-string-not-a-mapping\n", + ); + assert!(err.contains("must be mappings"), "got: {}", err); + } + + #[test] + fn carries_over_unknown_repository_keys() { + // Forward-compat: don't drop fields we don't yet model. + let after = run( + "name: x\n\ + repositories:\n- repository: a\n type: git\n name: org/a\n trigger: none\n\ + checkout: [a]\n", + ); + let r = repos(&after); + let entry = r[0].as_mapping().unwrap(); + assert_eq!( + entry.get(Value::String("trigger".into())).unwrap().as_str(), + Some("none") + ); + } + + #[test] + fn idempotent_when_run_twice() { + // Critical codemod invariant: running twice produces the same + // final state as running once. + let mut m: Mapping = serde_yaml::from_str( + "name: x\n\ + repositories:\n- repository: a\n type: git\n name: org/a\n\ + checkout: [a]\n", + ) + .unwrap(); + let first = apply_codemod(&mut m, &CodemodContext {}).expect("first"); + assert!(first, "first run should fire"); + let snapshot = m.clone(); + let second = apply_codemod(&mut m, &CodemodContext {}).expect("second"); + assert!(!second, "second run should be a no-op"); + assert_eq!(m, snapshot, "second run must not mutate"); + } +} diff --git a/src/compile/codemods/helpers.rs b/src/compile/codemods/helpers.rs index 59331fa2..fa02350d 100644 --- a/src/compile/codemods/helpers.rs +++ b/src/compile/codemods/helpers.rs @@ -9,7 +9,6 @@ use serde_yaml::{Mapping, Value}; /// Conflict policy used by [`rename_key`] when the destination key is /// already present. -// TODO(codemods): remove when the first real codemod is registered. #[allow(dead_code)] #[derive(Debug, Clone, Copy)] pub enum ConflictPolicy { @@ -22,8 +21,6 @@ pub enum ConflictPolicy { } /// Remove and return the value at `key`, or `None` if absent. -// TODO(codemods): remove when the first real codemod is registered. -#[allow(dead_code)] pub fn take_key(m: &mut Mapping, key: &str) -> Option { m.remove(Value::String(key.to_string())) } @@ -32,7 +29,6 @@ pub fn take_key(m: &mut Mapping, key: &str) -> Option { /// /// Prefer this over `Mapping::insert` in migrations: silent overwrite is /// almost always a bug when transforming user data. -// TODO(codemods): remove when the first real codemod is registered. #[allow(dead_code)] pub fn insert_no_overwrite( m: &mut Mapping, @@ -68,7 +64,6 @@ pub fn insert_no_overwrite( /// rely on this invariant when chaining migrations: a failed rename /// won't leave the mapping in a half-mutated state for the next call /// to inspect. -// TODO(codemods): remove when the first real codemod is registered. #[allow(dead_code)] pub fn rename_key( m: &mut Mapping, diff --git a/src/compile/codemods/mod.rs b/src/compile/codemods/mod.rs index 6e308eca..b952ac28 100644 --- a/src/compile/codemods/mod.rs +++ b/src/compile/codemods/mod.rs @@ -33,15 +33,10 @@ use anyhow::{Context, Result}; use serde_yaml::Mapping; mod helpers; -// `#[allow(unused_imports)]` here mirrors the per-item -// `#[allow(dead_code)]` annotations in `helpers.rs`. While the -// CODEMODS registry is empty, no in-crate caller exercises these -// re-exports — the lint warns. Once the first real codemod lands -// and uses one of these helpers, both the re-export attribute and -// the per-item `dead_code` allows on `helpers.rs` should be -// removed. -// TODO(codemods): remove when the first real codemod is registered. -#[allow(unused_imports)] +#[path = "0001_repos_unified.rs"] +mod m0001_repos_unified; + +#[allow(unused_imports)] // Re-exported for future codemods; only `take_key` is in-tree use. pub use helpers::{insert_no_overwrite, rename_key, take_key, ConflictPolicy}; /// Forward-compatible context passed to every codemod. Currently @@ -74,7 +69,6 @@ pub struct Codemod { pub summary: &'static str, /// Compiler version that introduced this codemod (e.g. "0.27.0"). /// Provenance only; not consumed by the runner. - // TODO(codemods): remove when the first real codemod is registered. #[allow(dead_code)] pub introduced_in: &'static str, /// The transformation. Returns `Ok(true)` when the codemod @@ -89,7 +83,9 @@ pub struct Codemod { /// having run first (e.g. A renames `foo` → `bar`, B operates on /// `bar`); idempotency means any codemod can re-run on any source /// without harm. -pub static CODEMODS: &[&'static Codemod] = &[]; +pub static CODEMODS: &[&'static Codemod] = &[ + &m0001_repos_unified::CODEMOD, +]; /// Result of running the codemod registry on a single front-matter /// mapping. @@ -109,7 +105,6 @@ pub struct AppliedCodemod { impl CodemodReport { /// An empty report (no codemod fired). - // TODO(codemods): remove when the first real codemod is registered. #[allow(dead_code)] pub fn empty() -> Self { Self { @@ -123,8 +118,6 @@ impl CodemodReport { } /// IDs of codemods that ran, in order. Helpful for tests. - // TODO(codemods): remove when the first real codemod is registered. - #[allow(dead_code)] pub fn applied_ids(&self) -> Vec<&'static str> { self.applied.iter().map(|a| a.id).collect() } @@ -134,7 +127,6 @@ impl CodemodReport { /// /// Equivalent to [`apply_codemods_with`] called with the global /// [`CODEMODS`] registry. -// TODO(codemods): remove when the first real codemod is registered. #[allow(dead_code)] pub fn apply_codemods(fm: &mut Mapping) -> Result { apply_codemods_with(fm, CODEMODS) diff --git a/src/compile/common.rs b/src/compile/common.rs index 895bfc81..9aed8350 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -678,36 +678,18 @@ fn derive_alias(name: &str) -> Result { Ok(alias) } -/// Resolve the `repos:` / legacy `repositories:` + `checkout:` fields in a -/// `FrontMatter` into the canonical `(Vec, Vec)` pair. +/// Resolve the `repos:` field in a `FrontMatter` into the canonical +/// `(Vec, Vec)` pair consumed by the rest of the compiler. /// -/// - If `repos:` is non-empty, the legacy fields must be empty (mixing is rejected). -/// - If `repos:` is empty, legacy fields are used as-is (with a deprecation warning -/// when they are non-empty). -/// - If both are empty, returns `(vec![], vec![])`. +/// The legacy `repositories:` + `checkout:` fields are converted to `repos:` +/// by the `repos_unified` codemod (`src/compile/codemods/0001_repos_unified.rs`) +/// before typed deserialization, so by the time this function runs the only +/// shape it sees is `repos:`. pub fn resolve_repos(front_matter: &FrontMatter) -> Result<(Vec, Vec)> { - let has_new = !front_matter.repos.is_empty(); - let has_legacy = !front_matter.repositories.is_empty() || !front_matter.checkout.is_empty(); - - if has_new && has_legacy { - anyhow::bail!( - "Cannot mix `repos:` with legacy `repositories:` / `checkout:`. \ - Migrate to `repos:` or remove it to keep using the legacy fields." - ); - } - - if has_new { - lower_repos(&front_matter.repos) - } else { - if has_legacy { - eprintln!( - "Warning: `repositories:` and `checkout:` are deprecated. \ - Use the compact `repos:` syntax instead. \ - See docs/front-matter.md for migration guidance." - ); - } - Ok((front_matter.repositories.clone(), front_matter.checkout.clone())) + if front_matter.repos.is_empty() { + return Ok((Vec::new(), Vec::new())); } + lower_repos(&front_matter.repos) } /// Names that are reserved by the `workspace:` resolver and therefore cannot @@ -6248,24 +6230,6 @@ mod tests { assert_eq!(derive_alias("org/repo/").unwrap(), "repo"); } - #[test] - fn test_resolve_repos_rejects_mixing() { - use crate::compile::types::FrontMatter; - let yaml = r#" -name: "test" -description: "test" -repos: - - my-org/tools -repositories: - - repository: foo - type: git - name: org/foo -"#; - let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); - let err = resolve_repos(&fm).unwrap_err(); - assert!(err.to_string().contains("Cannot mix"), "{err}"); - } - #[test] fn test_resolve_repos_empty() { use crate::compile::types::FrontMatter; @@ -6305,19 +6269,18 @@ repos: } #[test] - fn test_resolve_repos_legacy_compat() { - use crate::compile::types::FrontMatter; - let yaml = r#" -name: "test" -description: "test" -repositories: - - repository: tools - type: git - name: my-org/tools -checkout: - - tools -"#; - let fm: FrontMatter = serde_yaml::from_str(yaml).unwrap(); + fn test_resolve_repos_legacy_via_codemod() { + // Legacy `repositories:` + `checkout:` now flow through the + // `repos_unified` codemod and arrive at typed deserialization + // already in the unified `repos:` shape. + use crate::compile::parse_markdown; + let source = "---\n\ + name: test\n\ + description: test\n\ + repositories:\n - repository: tools\n type: git\n name: my-org/tools\n\ + checkout:\n - tools\n\ + ---\nbody\n"; + let (fm, _) = parse_markdown(source).unwrap(); let (repos, checkout) = resolve_repos(&fm).unwrap(); assert_eq!(repos.len(), 1); assert_eq!(repos[0].repository, "tools"); diff --git a/src/compile/types.rs b/src/compile/types.rs index 66594225..0cac5d3a 100644 --- a/src/compile/types.rs +++ b/src/compile/types.rs @@ -602,15 +602,20 @@ pub struct FrontMatter { /// Runtime configuration for language environments (e.g., Lean 4) #[serde(default)] pub runtimes: Option, - /// Compact repository declarations (new unified syntax). + /// Compact repository declarations. /// Each entry declares a repository resource and optionally whether to check it out. #[serde(default)] pub repos: Vec, - /// Additional repository resources (legacy — use `repos:` instead) - #[serde(default)] + /// Lowered `Vec` form, populated by `lower_repos()` in + /// `compile/common.rs` after the codemod registry has converted any + /// legacy `repositories:` + `checkout:` shape into the unified + /// `repos:` shape. Not deserialized from YAML directly. + #[serde(skip)] pub repositories: Vec, - /// Repositories to checkout (legacy — use `repos:` instead; subset of repositories) - #[serde(default)] + /// Lowered checkout-alias list, populated by `lower_repos()` from + /// `repos:` entries with `checkout: true`. Not deserialized from + /// YAML directly. + #[serde(skip)] pub checkout: Vec, /// MCP server configurations #[serde(default, rename = "mcp-servers")] diff --git a/tests/compiler_tests.rs b/tests/compiler_tests.rs index 5f62a873..7d3a55c4 100644 --- a/tests/compiler_tests.rs +++ b/tests/compiler_tests.rs @@ -324,8 +324,8 @@ fn test_fixture_complete_agent() { assert!(content.contains("description:"), "Should have description"); assert!(content.contains("schedule:"), "Should have schedule"); assert!( - content.contains("repositories:"), - "Should have repositories" + content.contains("repos:"), + "Should have repos" ); assert!( content.contains("mcp-servers:"), diff --git a/tests/fixtures/complete-agent.md b/tests/fixtures/complete-agent.md index 44b497f9..8e38d797 100644 --- a/tests/fixtures/complete-agent.md +++ b/tests/fixtures/complete-agent.md @@ -1,39 +1,35 @@ --- -name: "Complete Test Agent" -description: "A complete test agent with all features enabled" +name: Complete Test Agent +description: A complete test agent with all features enabled on: schedule: daily around 14:00 -repositories: - - repository: test-repo-1 - type: git - name: test-org/test-repo-1 - ref: refs/heads/main - - repository: test-repo-2 - type: git - name: test-org/test-repo-2 - ref: refs/heads/develop -checkout: - - test-repo-1 +teardown: +- bash: echo "Teardown step" + displayName: Teardown +setup: +- bash: echo "Setup step" + displayName: Setup mcp-servers: ado: true es-chat: true bluebird: true icm: allowed: - - create_incident - - get_incident + - create_incident + - get_incident kusto: allowed: - - query + - query custom-tool: - container: "node:20-slim" - entrypoint: "node" - entrypoint-args: ["server.js"] + container: node:20-slim + entrypoint: node + entrypoint-args: + - server.js allowed: - - custom_function_1 - - custom_function_2 + - custom_function_1 + - custom_function_2 env: - NODE_ENV: "test" + NODE_ENV: test permissions: read: my-read-arm-connection write: my-write-arm-connection @@ -43,17 +39,21 @@ safe-outputs: create-work-item: work-item-type: Task steps: - - bash: echo "Preparing context" - displayName: "Prepare context" +- bash: echo "Preparing context" + displayName: Prepare context post-steps: - - bash: echo "Finalizing" - displayName: "Finalize" -setup: - - bash: echo "Setup step" - displayName: "Setup" -teardown: - - bash: echo "Teardown step" - displayName: "Teardown" +- bash: echo "Finalizing" + displayName: Finalize +repos: +- alias: test-repo-1 + type: git + name: test-org/test-repo-1 + ref: refs/heads/main +- alias: test-repo-2 + type: git + name: test-org/test-repo-2 + ref: refs/heads/develop + checkout: false --- ## Complete Test Agent From 1a5af1e12d38adcda01f2359aab6b8f251eea622 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 9 May 2026 07:32:26 +0000 Subject: [PATCH 11/12] docs(codemods): fix helpers.rs doc strings to refer to codemods (was migrations) Agent-Logs-Url: https://github.com/githubnext/ado-aw/sessions/32568271-c9c7-4a9c-a0f8-37d10b112446 Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com> --- src/compile/codemods/helpers.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/compile/codemods/helpers.rs b/src/compile/codemods/helpers.rs index fa02350d..7485f27a 100644 --- a/src/compile/codemods/helpers.rs +++ b/src/compile/codemods/helpers.rs @@ -1,6 +1,6 @@ -//! Helpers for writing migrations against `serde_yaml::Mapping`. +//! Helpers for writing codemods against `serde_yaml::Mapping`. //! -//! Migrations should prefer these over raw `Mapping` manipulation so that +//! Codemods should prefer these over raw `Mapping` manipulation so that //! conflicts (e.g. both old and new keys present) are surfaced rather than //! silently overwritten. @@ -27,7 +27,7 @@ pub fn take_key(m: &mut Mapping, key: &str) -> Option { /// Insert `value` at `key`, returning `Err` if the key already exists. /// -/// Prefer this over `Mapping::insert` in migrations: silent overwrite is +/// Prefer this over `Mapping::insert` in codemods: silent overwrite is /// almost always a bug when transforming user data. #[allow(dead_code)] pub fn insert_no_overwrite( @@ -61,7 +61,7 @@ pub fn insert_no_overwrite( /// Returns `Ok(false)` when `old` was absent (no-op). /// /// The mapping is left **unchanged** on the error path. Callers can -/// rely on this invariant when chaining migrations: a failed rename +/// rely on this invariant when chaining codemods: a failed rename /// won't leave the mapping in a half-mutated state for the next call /// to inspect. #[allow(dead_code)] From da9d474bd1899c19d9b3a227248e215774839949 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 9 May 2026 07:50:20 +0000 Subject: [PATCH 12/12] fix(compile): address rust-review bug findings (sanitize before lowering, no-op on empty repositories:) Agent-Logs-Url: https://github.com/githubnext/ado-aw/sessions/512194bf-d86b-457b-b919-752efb1eff3d Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com> --- src/compile/codemods/0001_repos_unified.rs | 16 +++++++++++++++- src/main.rs | 6 ++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/compile/codemods/0001_repos_unified.rs b/src/compile/codemods/0001_repos_unified.rs index 8fcc220c..88511c32 100644 --- a/src/compile/codemods/0001_repos_unified.rs +++ b/src/compile/codemods/0001_repos_unified.rs @@ -101,6 +101,16 @@ fn apply_codemod(fm: &mut Mapping, _ctx: &CodemodContext) -> Result { ), }; + // Trivially-empty sources: an empty `repositories:` (and either no + // `checkout:` or an empty one) carries no semantic content. We + // already removed the vacuous keys from the mapping above, but + // report this as a no-op so the caller doesn't surface a + // "deprecated shapes" warning or rewrite the file just to drop + // empty stubs. + if repositories_seq.is_empty() && checkout_aliases.is_empty() { + return Ok(false); + } + // Track which checkout aliases we've matched so we can flag // dangling references (alias listed in checkout but absent from // repositories). @@ -373,7 +383,11 @@ mod tests { #[test] fn empty_repositories_sequence_does_not_emit_repos_key() { - let after = run("name: x\nrepositories: []\n"); + // A trivially-empty `repositories: []` carries no semantic + // content — the codemod cleans up the stub key but reports + // changed=false so the caller doesn't surface a rewrite + // warning. + let after = run_noop("name: x\nrepositories: []\n"); assert!(!after.contains_key(Value::String("repos".into()))); } diff --git a/src/main.rs b/src/main.rs index 1ee74b93..6dcb07c6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -246,6 +246,12 @@ async fn run_execute( let mut front_matter = parsed.front_matter; + // Sanitize before lowering repos, mirroring compile_pipeline_inner + // and check_pipeline so unsanitized fields never flow into the + // execution context. + use crate::sanitize::SanitizeConfig; + front_matter.sanitize_config_fields(); + // Resolve compact repos: syntax into the legacy fields for execution let (resolved_repos, resolved_checkout) = compile::resolve_repos(&front_matter) .with_context(|| "Failed to resolve repository configuration")?;