From 17dabc55f45ad4333fa0a5a67f335b2272ce02ff Mon Sep 17 00:00:00 2001 From: James Devine Date: Wed, 6 May 2026 22:57:40 +0100 Subject: [PATCH 1/2] fix(safeoutputs): rewrite upload-pipeline-artifact flow to fix HTTP 405 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prior implementation tried to create a per-artifact file container via `POST /_apis/resources/containers`, which is not a real ADO endpoint (only GET is registered for the unscoped path). The earlier #421 fix adding `scopeIdentifier` did not help because no POST handler exists on the unscoped route. Rewrite to use the canonical 2-step flow that all official Azure DevOps tools use: upload bytes to the agent's own pre-existing build container (exposed via `BUILD_CONTAINERID`), then associate an artifact record on the target build with `resource.data` pointing back at our container. Cross-build publishing now works as a side-effect: the artifact record on the target build holds a pointer; ADO does not require the container to belong to the target build (this is exactly how `DownloadBuildArtifacts@1 buildType=specific` already works in the wild). The artifact bytes physically live in the agent's own build's container, so cross-published artifacts share the agent build's retention — documented in docs/safe-outputs.md. Avoid silent byte-overwrites in the agent's shared container when multiple calls in one run share an artifact name (e.g. publishing the same `TriageSummary` to many failing builds at once) by inserting a 6-character hash discriminator into the internal container folder name. The hash is invisible in standard download paths (web UI zip wrapper, DownloadBuildArtifacts@1, DownloadPipelineArtifact@2 — all strip the prefix) and the user-visible `artifact_name` is unaffected. Add a `require-unique-names: true` config opt-out that uses a clean folder name and rejects in-run reuse of `(effective_build_id, artifact_name)` early. The dedupe state lives on `ExecutionContext` in an `Arc>` so all calls in one Stage 3 run share it. Other fixes: - Switch upload query param `scopeIdentifier` -> `scope` (canonical name; all SDKs use `scope`; `scopeIdentifier` is a response model field name, not a query key). - Fix `resource.type` casing: `Container` (was lowercase `container`). - Improve associate-POST error messages on 401/403/404/409 with actionable hints. Manual smoke test recommended before relying on this in production: 1. Cross-build publish to a *completed* target build — Microsoft does not document a build-state restriction on the artifact-record POST, but it is the riskiest unverified piece of the flow. 2. Confirm the container folder hash suffix does not surface prominently in the inline 'Browse' view of the ADO Artifacts tab. If either fails, follow-ups are: switch to build attachments for the completed-build case (loses Artifacts-tab visibility), or use `require-unique-names: true` to remove the suffix. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- docs/safe-outputs.md | 45 ++- src/safeoutputs/create_pr.rs | 4 + src/safeoutputs/result.rs | 45 ++- src/safeoutputs/upload_build_attachment.rs | 4 + src/safeoutputs/upload_pipeline_artifact.rs | 313 +++++++++++++++----- 5 files changed, 339 insertions(+), 72 deletions(-) diff --git a/docs/safe-outputs.md b/docs/safe-outputs.md index bf4ce835..67ae95d5 100644 --- a/docs/safe-outputs.md +++ b/docs/safe-outputs.md @@ -480,15 +480,26 @@ artifacts instead. Publishes a workspace file as an Azure DevOps **pipeline artifact** that appears in the **Artifacts tab** of the build summary page. Uses the ADO build artifacts -REST API (container creation + file upload + artifact association). +REST API in two steps: + +1. **Upload bytes** to the agent's own per-build file container (Azure DevOps + creates one container per build and exposes its ID via `BUILD_CONTAINERID`). +2. **Associate** the artifact record (`name = artifact_name`) with the target + build via `POST /{project}/_apis/build/builds/{effective_build_id}/artifacts`. **Omit `build_id` to target the current pipeline run** — the executor resolves the build ID from the `BUILD_BUILDID` environment variable automatically. When -`build_id` is provided, the artifact is published to that specific build. +`build_id` is provided, the artifact record is published to that specific build +("cross-build publishing"). The artifact bytes still live in the agent's own +build container; only the record's pointer is associated with the target build. +This means cross-published artifacts share the agent build's retention — if the +agent's build is purged, the cross-referenced artifact stops being downloadable. +Cross-project publishing is not supported (the associate POST uses the current +pipeline's project). The tool stages the file during Stage 1 (MCP) by copying it into the -safe-outputs directory; Stage 3 reads the staged copy and executes the three-step -REST API flow to create the artifact. +safe-outputs directory; Stage 3 reads the staged copy and executes the two-step +REST flow. **Agent parameters:** - `build_id` *(optional)* - Target build ID. Omit to publish to the current pipeline run. Must be positive when specified. @@ -504,13 +515,37 @@ safe-outputs: allowed-artifact-names: [] # Optional — restrict names (suffix `*` = prefix match) allowed-build-ids: [] # Optional — restrict target builds (skipped when targeting current build) name-prefix: "" # Optional — prepended to the agent-supplied artifact name + require-unique-names: false # Optional — see "Reusing artifact names" below max: 3 # Maximum per run (default: 3) ``` +**Reusing artifact names within one agent run:** +By default, the same `artifact_name` may be reused across multiple +`upload-pipeline-artifact` calls in one run (e.g. publishing a `TriageSummary` +to many failing builds at once). The executor inserts a short hash suffix +(`{artifact_name}__{6 hex}`) into the **internal container folder name** so +the calls don't silently overwrite each other's bytes in the agent's shared +build container. The hash lives only in internal addressing — it does not +appear in the `record.name` your downstream consumers query for, in the web UI +"Download as zip" filename, or in the contents of files extracted by the +`DownloadBuildArtifacts@1` / `DownloadPipelineArtifact@2` tasks (all of which +strip the container folder prefix). + +Set `require-unique-names: true` to use a clean container folder +(`{artifact_name}` only, no suffix) and reject in-run reuse of +`(effective_build_id, artifact_name)` with a clear early error before any HTTP +call. Use this when you guarantee one artifact per name per run and want the +shortest possible internal addressing. + +Two records with the same `name` on the **same** target build still collide at +the record level (ADO returns 409 from the associate call) regardless of this +setting; use distinct `artifact_name` values when targeting one build with +multiple uploads. + **Notes:** - Single-file only; directory uploads are not supported. - When `build_id` is omitted and `allowed-build-ids` is configured, the allow-list check is skipped — the current build is implicitly trusted. -- Requires `SYSTEM_TEAMPROJECTID` to be available in the execution environment (set automatically by Azure DevOps). +- Requires `BUILD_CONTAINERID`, `BUILD_BUILDID`, and `SYSTEM_TEAMPROJECTID` (all set automatically inside an Azure DevOps pipeline job) and `vso.build_execute` scope on the executor's token (the existing write service connection provides this). ### cache-memory (moved to `tools:`) Memory is now configured as a first-class tool under `tools: cache-memory:` instead of `safe-outputs: memory:`. See the [Cache Memory section](./tools.md#cache-memory-cache-memory) in `docs/tools.md` for details. diff --git a/src/safeoutputs/create_pr.rs b/src/safeoutputs/create_pr.rs index f571892e..4b26613d 100644 --- a/src/safeoutputs/create_pr.rs +++ b/src/safeoutputs/create_pr.rs @@ -2468,6 +2468,10 @@ new file mode 100755 pull_request_id: None, pull_request_source_branch: None, pull_request_target_branch: None, + build_container_id: None, + uploaded_pipeline_artifact_keys: std::sync::Arc::new(std::sync::Mutex::new( + std::collections::HashSet::new(), + )), }; let outcome = result.execute_impl(&ctx).await.unwrap(); assert!(!outcome.success); diff --git a/src/safeoutputs/result.rs b/src/safeoutputs/result.rs index d01cd2e2..6096d08b 100644 --- a/src/safeoutputs/result.rs +++ b/src/safeoutputs/result.rs @@ -3,7 +3,8 @@ use rmcp::ErrorData as McpError; use rmcp::model::ErrorCode; use serde::Serialize; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; +use std::sync::{Arc, Mutex}; use crate::sanitize::{SanitizeConfig, SanitizeContent}; @@ -67,6 +68,11 @@ pub struct ExecutionContext { // ── ADO build variables (from BUILD_*/SYSTEM_*) ─────────────────────── /// Numeric build ID (`BUILD_BUILDID`) pub build_id: Option, + /// Numeric file-container ID for the current build (`BUILD_CONTAINERID`). + /// Azure DevOps pre-creates one container per build at job initialization; + /// all artifacts in the build share this container, differentiated by item path. + /// Required by `upload-pipeline-artifact` to know where to upload bytes. + pub build_container_id: Option, /// Human-readable build number (`BUILD_BUILDNUMBER`) #[allow(dead_code)] pub build_number: Option, @@ -110,6 +116,19 @@ pub struct ExecutionContext { /// PR target branch (`SYSTEM_PULLREQUEST_TARGETBRANCH`) #[allow(dead_code)] pub pull_request_target_branch: Option, + + /// Per-run dedupe set for `upload-pipeline-artifact` when the + /// `require-unique-names` config is set. Stores `format!("{}/{}", + /// effective_build_id, final_name)` keys; the executor checks-and-inserts + /// before any HTTP call so a second call with the same target build / + /// artifact name fails fast instead of silently overwriting bytes in + /// the agent's shared file container. + /// + /// Wrapped in `Arc>` so all calls in one Stage 3 run see the + /// same set even though `ExecutionContext` is shared by reference and + /// the `Clone` semantics need to share state. Each `Default` instance + /// gets its own fresh empty set, which is correct for tests. + pub uploaded_pipeline_artifact_keys: Arc>>, } impl ExecutionContext { @@ -182,6 +201,7 @@ impl ExecutionContext { // Build identification build_id: env("BUILD_BUILDID").and_then(|s| s.parse().ok()), + build_container_id: env("BUILD_CONTAINERID").and_then(|s| s.parse().ok()), build_number: env("BUILD_BUILDNUMBER"), build_reason: env("BUILD_REASON"), definition_name: env("BUILD_DEFINITIONNAME"), @@ -199,6 +219,9 @@ impl ExecutionContext { pull_request_id: env("SYSTEM_PULLREQUEST_PULLREQUESTID"), pull_request_source_branch: env("SYSTEM_PULLREQUEST_SOURCEBRANCH"), pull_request_target_branch: env("SYSTEM_PULLREQUEST_TARGETBRANCH"), + + // Per-run state for upload-pipeline-artifact dedupe. + uploaded_pipeline_artifact_keys: Arc::new(Mutex::new(HashSet::new())), } } } @@ -729,6 +752,26 @@ mod tests { assert!(ctx.build_id.is_none()); } + #[test] + fn test_from_env_lookup_build_container_id_parses_numeric() { + let ctx = + ExecutionContext::from_env_lookup(env_from(&[("BUILD_CONTAINERID", "112233")])); + assert_eq!(ctx.build_container_id, Some(112233)); + } + + #[test] + fn test_from_env_lookup_build_container_id_none_for_non_numeric() { + let ctx = + ExecutionContext::from_env_lookup(env_from(&[("BUILD_CONTAINERID", "not-numeric")])); + assert!(ctx.build_container_id.is_none()); + } + + #[test] + fn test_from_env_lookup_build_container_id_none_when_unset() { + let ctx = ExecutionContext::from_env_lookup(env_from(&[])); + assert!(ctx.build_container_id.is_none()); + } + #[test] fn test_from_env_lookup_populates_triggered_by_fields() { let ctx = ExecutionContext::from_env_lookup(env_from(&[ diff --git a/src/safeoutputs/upload_build_attachment.rs b/src/safeoutputs/upload_build_attachment.rs index 6b80dc44..9cc12eab 100644 --- a/src/safeoutputs/upload_build_attachment.rs +++ b/src/safeoutputs/upload_build_attachment.rs @@ -817,6 +817,10 @@ attachment-type: "agent-artifact" pull_request_id: None, pull_request_source_branch: None, pull_request_target_branch: None, + build_container_id: None, + uploaded_pipeline_artifact_keys: std::sync::Arc::new(std::sync::Mutex::new( + std::collections::HashSet::new(), + )), } } diff --git a/src/safeoutputs/upload_pipeline_artifact.rs b/src/safeoutputs/upload_pipeline_artifact.rs index 5be08a18..93f1ecc9 100644 --- a/src/safeoutputs/upload_pipeline_artifact.rs +++ b/src/safeoutputs/upload_pipeline_artifact.rs @@ -6,14 +6,28 @@ //! build artifacts published through this tool appear in the **Artifacts tab** //! of the build summary page in Azure DevOps. //! -//! The upload is a three-step REST flow: +//! The upload is a two-step REST flow that always reuses the agent's own +//! pre-existing build container (Azure DevOps creates one container per build +//! at job initialization and exposes its ID via `BUILD_CONTAINERID`): //! -//! 1. **Create container** — `POST /_apis/resources/containers` returns a -//! `containerId`. -//! 2. **Upload file** — `PUT /_apis/resources/containers/{containerId}` sends -//! the file bytes with `itemPath` and `scope` query params. -//! 3. **Associate artifact** — `POST /_apis/build/builds/{buildId}/artifacts` -//! links the container to the build as a named artifact. +//! 1. **Upload bytes** — `PUT /_apis/resources/Containers/{BUILD_CONTAINERID}?itemPath={folder}/{file}&scope={projectId}` +//! sends the file body into the agent's own build container. +//! 2. **Associate artifact** — `POST /{project}/_apis/build/builds/{effective_build_id}/artifacts` +//! registers a record with `resource.type = "Container"` and +//! `resource.data = "#/{BUILD_CONTAINERID}/{folder}"`. `effective_build_id` +//! is the current build by default, or an agent-supplied `build_id` for +//! cross-build publishing (the artifact record points at the agent's +//! container; ADO does not require the container to belong to the target +//! build — that is how `DownloadBuildArtifacts@1 buildType=specific` works +//! in the wild). +//! +//! By default the container `folder` is `{artifact_name}__{6 hex hash}` so +//! that two calls with the same `artifact_name` (e.g. publishing +//! `TriageSummary` to many failing builds in one run) never silently overwrite +//! each other's bytes. The hash lives only in internal addressing — the +//! user-visible `artifact_name` your downstream consumers query is unaffected. +//! Set `safe-outputs.upload-pipeline-artifact.require-unique-names: true` to +//! disable the suffix and reject in-run reuse with a clear early error. //! //! The flow mirrors `upload-build-attachment`: //! @@ -25,7 +39,7 @@ //! `ctx.working_directory.join(staged_file)`, applies operator-supplied //! limits (`max-file-size`, `allowed-extensions`, `allowed-artifact-names`, //! `allowed-build-ids`, `name-prefix`), resolves the target build ID, and -//! executes the three-step upload flow. +//! executes the two-step upload flow. use ado_aw_derive::SanitizeConfig; use log::{debug, info, warn}; @@ -47,6 +61,15 @@ pub struct UploadPipelineArtifactParams { /// pipeline run** — the executor resolves the build ID from the /// `BUILD_BUILDID` environment variable automatically. When provided, /// must be a positive integer. + /// + /// **Cross-build behavior:** when set to a build other than the current + /// run, the artifact bytes still live in the agent's own build container; + /// only the artifact *record* (`name`, `data` pointer) is associated with + /// the target build. This means cross-published artifacts share the + /// agent build's retention policy — if the agent's build is purged, the + /// cross-referenced artifact on the target build stops being downloadable. + /// Cross-project publishing is not supported (the associate POST uses + /// the current pipeline's project). pub build_id: Option, /// The artifact name shown in the Artifacts tab. ADO requires a non-empty @@ -194,6 +217,20 @@ pub struct UploadPipelineArtifactConfig { /// Prefix prepended to the agent-supplied artifact name before publishing. #[serde(default, rename = "name-prefix")] pub name_prefix: Option, + + /// When `false` (default), the executor inserts a short hash suffix into + /// the internal container folder so multiple calls in one agent run with + /// the same `artifact_name` (e.g. publishing `TriageSummary` to many + /// failing builds at once) do not silently overwrite each other's bytes + /// in the agent's shared file container. The suffix lives only in + /// internal addressing — the user-visible `artifact_name` your downstream + /// consumers query is unaffected. + /// + /// Set to `true` to use a clean folder name (`{artifact_name}` exactly) + /// and reject any in-run reuse of `(effective_build_id, artifact_name)` + /// with a clear error before any HTTP call. + #[serde(default, rename = "require-unique-names")] + pub require_unique_names: bool, } fn default_pipeline_max_file_size() -> u64 { @@ -208,6 +245,7 @@ impl Default for UploadPipelineArtifactConfig { allowed_artifact_names: Vec::new(), allowed_build_ids: Vec::new(), name_prefix: None, + require_unique_names: false, } } } @@ -394,6 +432,52 @@ impl Executor for UploadPipelineArtifactResult { .as_ref() .context("No access token available (SYSTEM_ACCESSTOKEN or AZURE_DEVOPS_EXT_PAT)")?; + // Resolve the agent's own build container ID (Azure DevOps pre-creates + // one container per build at job initialization and exposes it via + // BUILD_CONTAINERID). All artifacts in the build share this container, + // including those whose record we associate with a different build. + let container_id = ctx.build_container_id.context( + "BUILD_CONTAINERID not set or invalid — required to publish a \ + pipeline artifact; this tool must run inside an Azure DevOps \ + pipeline job", + )?; + + // ── Per-run dedupe (when require-unique-names is set) ──────────── + // Reject reuse of (effective_build_id, final_name) before any HTTP + // call so two cross-build calls sharing a name don't silently + // overwrite each other's bytes in the shared container. + let dedupe_key = format!("{}/{}", effective_build_id, final_name); + if config.require_unique_names { + let mut seen = ctx + .uploaded_pipeline_artifact_keys + .lock() + .expect("uploaded_pipeline_artifact_keys mutex poisoned"); + if seen.contains(&dedupe_key) { + return Ok(ExecutionResult::failure(format!( + "upload-pipeline-artifact: artifact_name '{}' was already used \ + on build #{} in this run; require-unique-names is configured \ + to reject reuse", + final_name, effective_build_id + ))); + } + seen.insert(dedupe_key); + } + + // Internal container folder. The user-visible artifact name is + // `final_name`; the folder name carries an optional discriminator so + // multiple calls in one run (different target builds, same name) + // don't collide on the shared container path. The discriminator is + // invisible in standard download paths (web UI zip wrapper, + // DownloadBuildArtifacts@1, DownloadPipelineArtifact@2 — all strip + // the prefix) and is only seen by callers that hit + // `GET /_apis/resources/Containers/{id}?itemPath=...` directly. + let container_folder = if config.require_unique_names { + final_name.clone() + } else { + let disc = &crate::hash::sha256_hex(self.staged_file.as_bytes())[..6]; + format!("{}__{}", final_name, disc) + }; + let client = reqwest::Client::new(); // Derive a filename from the original file path for use as the @@ -403,59 +487,15 @@ impl Executor for UploadPipelineArtifactResult { .and_then(|n| n.to_str()) .unwrap_or(&self.staged_file); - // ── Step 1: Create container ───────────────────────────────────── - // The `scopeIdentifier` query parameter (project GUID) is required for - // the POST to route correctly in ADO. Omitting it causes a 405 because - // the unscoped `_apis/resources/containers` collection does not support - // POST. The body only needs the container name; the project scope must - // be in the query string. - let container_url = format!( - "{}/_apis/resources/containers?scopeIdentifier={}&api-version=7.1-preview.4", - org_url.trim_end_matches('/'), - utf8_percent_encode(project_id, PATH_SEGMENT), - ); - debug!("Creating container for artifact '{}': {}", final_name, container_url); - - let container_body = serde_json::json!({ - "name": final_name, - }); - let container_resp = client - .post(&container_url) - .header("Content-Type", "application/json") - .basic_auth("", Some(token)) - .json(&container_body) - .send() - .await - .context("Failed to create artifact container")?; - - if !container_resp.status().is_success() { - let status = container_resp.status(); - let error_body = container_resp - .text() - .await - .unwrap_or_else(|_| "Unknown error".to_string()); - return Ok(ExecutionResult::failure(format!( - "Failed to create artifact container (HTTP {}): {}", - status, error_body - ))); - } - let container_json: serde_json::Value = container_resp - .json() - .await - .context("Failed to parse container creation response")?; - let container_id = container_json - .get("containerId") - .and_then(|v| v.as_u64()) - .context("Container creation response missing 'containerId'")?; - debug!("Container created: id={}", container_id); - - // ── Step 2: Upload file to container ───────────────────────────── - // Use `scopeIdentifier` (not `scope`) to match the ADO containers API. + // ── Step 1: Upload file to the agent's own build container ────── + // The canonical query parameter is `scope` (not `scopeIdentifier`, + // which is the response model field name); all official ADO SDKs + // (Node, Python, Extension API) use `scope=`. let upload_url = format!( - "{}/_apis/resources/containers/{}?itemPath={}/{}&scopeIdentifier={}&api-version=7.1-preview.4", + "{}/_apis/resources/Containers/{}?itemPath={}/{}&scope={}&api-version=7.1-preview.4", org_url.trim_end_matches('/'), container_id, - utf8_percent_encode(&final_name, PATH_SEGMENT), + utf8_percent_encode(&container_folder, PATH_SEGMENT), utf8_percent_encode(filename, PATH_SEGMENT), utf8_percent_encode(project_id, PATH_SEGMENT), ); @@ -477,26 +517,35 @@ impl Executor for UploadPipelineArtifactResult { .await .unwrap_or_else(|_| "Unknown error".to_string()); return Ok(ExecutionResult::failure(format!( - "Failed to upload file to container (HTTP {}): {}", - status, error_body + "Failed to upload file to container #{} (HTTP {}): {}", + container_id, status, error_body ))); } debug!("File uploaded to container {}", container_id); - // ── Step 3: Associate artifact with build ──────────────────────── + // ── Step 2: Associate artifact record with the target build ───── + // For cross-build publishing the artifact bytes physically live in + // the agent's own container; the record on the target build holds a + // pointer (`resource.data = "#/{containerId}/{folder}"`). ADO does + // not require the container to belong to the target build — that is + // exactly how `DownloadBuildArtifacts@1 buildType=specific` already + // works (it follows cross-build container pointers). let artifact_url = format!( "{}/{}/_apis/build/builds/{}/artifacts?api-version=7.1", org_url.trim_end_matches('/'), utf8_percent_encode(project, PATH_SEGMENT), effective_build_id, ); - debug!("Associating artifact '{}' with build #{}: {}", final_name, effective_build_id, artifact_url); + debug!( + "Associating artifact '{}' (folder '{}') with build #{}: {}", + final_name, container_folder, effective_build_id, artifact_url + ); let artifact_body = serde_json::json!({ "name": final_name, "resource": { - "data": format!("#/{}/{}", container_id, final_name), - "type": "container", + "data": format!("#/{}/{}", container_id, container_folder), + "type": "Container", }, }); let artifact_resp = client @@ -538,6 +587,8 @@ impl Executor for UploadPipelineArtifactResult { "size_bytes": file_size, "download_url": download_url, "project": project, + "container_id": container_id, + "container_folder": container_folder, }), )) } else { @@ -546,9 +597,16 @@ impl Executor for UploadPipelineArtifactResult { .text() .await .unwrap_or_else(|_| "Unknown error".to_string()); + // Best-effort hint when the most common failure modes show up. + let hint = match status.as_u16() { + 401 | 403 => " — token may lack 'Build (Read & Execute)' scope on the target build's project", + 404 => " — target build does not exist or is in a different project (cross-project publishing is not supported)", + 409 => " — an artifact with this name already exists on the target build", + _ => "", + }; Ok(ExecutionResult::failure(format!( - "Failed to associate artifact with build #{} (HTTP {}): {}", - effective_build_id, status, error_body + "Failed to associate artifact with build #{} (HTTP {}){}: {}", + effective_build_id, status, hint, error_body ))) } } @@ -858,4 +916,127 @@ name-prefix: "ci-" assert!(!exec_result.success); assert!(exec_result.message.contains("not in the allowed list")); } + + /// Non-dry-run with no `BUILD_CONTAINERID` must fail with a clear message + /// before any HTTP call, rather than silently doing nothing. + #[tokio::test] + async fn test_fails_when_build_container_id_missing() { + let dir = tempfile::tempdir().unwrap(); + let staged = dir.path().join("staged-file.pdf"); + let content = b"test content"; + std::fs::write(&staged, content).unwrap(); + let hash = crate::hash::sha256_hex(content); + + let result = UploadPipelineArtifactResult::new( + None, + "agent-report".to_string(), + "out/report.pdf".to_string(), + "staged-file.pdf".to_string(), + content.len() as u64, + hash, + ); + + let ctx = ExecutionContext { + working_directory: dir.path().to_path_buf(), + build_id: Some(123), + dry_run: false, + ado_org_url: Some("https://dev.azure.com/test".to_string()), + ado_project: Some("TestProject".to_string()), + ado_project_id: Some("proj-guid".to_string()), + access_token: Some("fake-token".to_string()), + // Intentionally no build_container_id. + ..Default::default() + }; + + // execute_impl returns Err for missing required env (consistent with + // the rest of the file); execute_safe_outputs converts it to a failure. + let err = result.execute_impl(&ctx).await.unwrap_err(); + assert!( + err.to_string().contains("BUILD_CONTAINERID"), + "expected BUILD_CONTAINERID error, got: {}", + err + ); + } + + /// The default container folder embeds a 6-hex hash discriminator derived + /// from the staged file name; two calls with the same artifact_name but + /// different staged_file values produce different folders so their bytes + /// can't collide in the shared build container. + #[test] + fn test_default_container_folder_has_hash_suffix() { + // Same logic the executor uses inline; mirror it here so changing the + // discriminator scheme is a single-place refactor caught by this test. + fn folder_for(final_name: &str, staged_file: &str) -> String { + let disc = &crate::hash::sha256_hex(staged_file.as_bytes())[..6]; + format!("{}__{}", final_name, disc) + } + + let a = folder_for("TriageSummary", "staged-abc.md"); + let b = folder_for("TriageSummary", "staged-def.md"); + + assert!(a.starts_with("TriageSummary__")); + assert_eq!(a.len(), "TriageSummary".len() + 2 + 6); + assert_ne!(a, b, "different staged_file must produce different folders"); + // Determinism: same inputs yield same folder. + assert_eq!(a, folder_for("TriageSummary", "staged-abc.md")); + } + + /// When `require-unique-names` is set, the executor uses the clean folder + /// name (no discriminator suffix) — and the per-run dedupe set on the + /// ExecutionContext rejects a second call with the same + /// (effective_build_id, final_name) before any HTTP call is made. + #[tokio::test] + async fn test_require_unique_names_rejects_in_run_reuse() { + let dir = tempfile::tempdir().unwrap(); + let staged_a = dir.path().join("staged-a.md"); + let staged_b = dir.path().join("staged-b.md"); + std::fs::write(&staged_a, b"first").unwrap(); + std::fs::write(&staged_b, b"second").unwrap(); + + let mut ctx = ExecutionContext { + working_directory: dir.path().to_path_buf(), + build_id: Some(100), + dry_run: false, + ado_org_url: Some("https://dev.azure.com/test".to_string()), + ado_project: Some("TestProject".to_string()), + ado_project_id: Some("proj-guid".to_string()), + access_token: Some("fake-token".to_string()), + build_container_id: Some(42), + ..Default::default() + }; + ctx.tool_configs.insert( + "upload-pipeline-artifact".to_string(), + serde_json::json!({"require-unique-names": true}), + ); + + // First call: simulate the dedupe set being seeded by an earlier + // successful publish — we insert the key directly so we don't need + // a live HTTP server to run the upload. + { + let mut seen = ctx.uploaded_pipeline_artifact_keys.lock().unwrap(); + seen.insert("100/TriageSummary".to_string()); + } + + let second = UploadPipelineArtifactResult::new( + Some(100), + "TriageSummary".to_string(), + "out/triage.md".to_string(), + "staged-b.md".to_string(), + b"second".len() as u64, + crate::hash::sha256_hex(b"second"), + ); + + let exec_result = second.execute_impl(&ctx).await.unwrap(); + assert!(!exec_result.success, "second call should be rejected"); + assert!( + exec_result.message.contains("already used"), + "expected dedupe error, got: {}", + exec_result.message + ); + assert!( + exec_result.message.contains("require-unique-names"), + "error should reference the config key, got: {}", + exec_result.message + ); + } } From bfb45352fa6cc1102e275129227ada3e95f375b6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 7 May 2026 06:54:04 +0000 Subject: [PATCH 2/2] fix(safeoutputs): address review bugs and suggestions in upload-pipeline-artifact - Bug fix: move seen.insert(dedupe_key) to after successful HTTP execution so transient errors don't permanently block retries of the same artifact - Switch discriminator from staged_file name bytes to content hash (live_hash) so distinct content always maps to a distinct container folder regardless of filename; identical content maps to the same folder (idempotent PUT) - Replace .expect() on mutex lock with map_err(|e| anyhow!(...))? to stay consistent with the error-propagation style used throughout the executor - Update test to mirror new content-hash-based discriminator logic Agent-Logs-Url: https://github.com/githubnext/ado-aw/sessions/1b52c2a1-cca5-48be-b6ae-8bb0c68d0f97 Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com> --- src/safeoutputs/upload_pipeline_artifact.rs | 60 ++++++++++++++------- 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/src/safeoutputs/upload_pipeline_artifact.rs b/src/safeoutputs/upload_pipeline_artifact.rs index 93f1ecc9..c100b42c 100644 --- a/src/safeoutputs/upload_pipeline_artifact.rs +++ b/src/safeoutputs/upload_pipeline_artifact.rs @@ -448,10 +448,10 @@ impl Executor for UploadPipelineArtifactResult { // overwrite each other's bytes in the shared container. let dedupe_key = format!("{}/{}", effective_build_id, final_name); if config.require_unique_names { - let mut seen = ctx + let seen = ctx .uploaded_pipeline_artifact_keys .lock() - .expect("uploaded_pipeline_artifact_keys mutex poisoned"); + .map_err(|e| anyhow::anyhow!("uploaded_pipeline_artifact_keys mutex poisoned: {}", e))?; if seen.contains(&dedupe_key) { return Ok(ExecutionResult::failure(format!( "upload-pipeline-artifact: artifact_name '{}' was already used \ @@ -460,21 +460,26 @@ impl Executor for UploadPipelineArtifactResult { final_name, effective_build_id ))); } - seen.insert(dedupe_key); + // Note: the key is inserted only after the HTTP calls succeed below, + // so a transient network failure doesn't permanently block retries. } // Internal container folder. The user-visible artifact name is // `final_name`; the folder name carries an optional discriminator so - // multiple calls in one run (different target builds, same name) - // don't collide on the shared container path. The discriminator is - // invisible in standard download paths (web UI zip wrapper, - // DownloadBuildArtifacts@1, DownloadPipelineArtifact@2 — all strip - // the prefix) and is only seen by callers that hit - // `GET /_apis/resources/Containers/{id}?itemPath=...` directly. + // multiple calls in one run sharing the same name but uploading + // different content don't overwrite each other's bytes in the shared + // container. The discriminator is derived from the file content hash + // (already computed above) so distinct content always maps to a + // distinct folder — identical content maps to the same folder, which + // is safe (idempotent PUT). The discriminator is invisible in standard + // download paths (web UI zip wrapper, DownloadBuildArtifacts@1, + // DownloadPipelineArtifact@2 — all strip the prefix) and is only seen + // by callers that hit `GET /_apis/resources/Containers/{id}?itemPath=…` + // directly. let container_folder = if config.require_unique_names { final_name.clone() } else { - let disc = &crate::hash::sha256_hex(self.staged_file.as_bytes())[..6]; + let disc = &live_hash[..6]; format!("{}__{}", final_name, disc) }; @@ -575,6 +580,16 @@ impl Executor for UploadPipelineArtifactResult { self.file_path, final_name, effective_build_id ); + // Record the successful publish in the dedupe set now that both + // HTTP steps have completed — done here (not before the calls) so + // a transient failure doesn't permanently block retries. + if config.require_unique_names { + ctx.uploaded_pipeline_artifact_keys + .lock() + .map_err(|e| anyhow::anyhow!("uploaded_pipeline_artifact_keys mutex poisoned: {}", e))? + .insert(dedupe_key); + } + Ok(ExecutionResult::success_with_data( format!( "Published '{}' as pipeline artifact '{}' on build #{}", @@ -959,26 +974,33 @@ name-prefix: "ci-" } /// The default container folder embeds a 6-hex hash discriminator derived - /// from the staged file name; two calls with the same artifact_name but - /// different staged_file values produce different folders so their bytes - /// can't collide in the shared build container. + /// from the file content hash; two calls uploading different content + /// (regardless of staged file name) produce different folders so their + /// bytes can't collide in the shared build container. Identical content + /// maps to the same folder (idempotent PUT — safe). #[test] fn test_default_container_folder_has_hash_suffix() { // Same logic the executor uses inline; mirror it here so changing the // discriminator scheme is a single-place refactor caught by this test. - fn folder_for(final_name: &str, staged_file: &str) -> String { - let disc = &crate::hash::sha256_hex(staged_file.as_bytes())[..6]; + fn folder_for(final_name: &str, content: &[u8]) -> String { + let disc = &crate::hash::sha256_hex(content)[..6]; format!("{}__{}", final_name, disc) } - let a = folder_for("TriageSummary", "staged-abc.md"); - let b = folder_for("TriageSummary", "staged-def.md"); + let a = folder_for("TriageSummary", b"content-alpha"); + let b = folder_for("TriageSummary", b"content-beta"); assert!(a.starts_with("TriageSummary__")); assert_eq!(a.len(), "TriageSummary".len() + 2 + 6); - assert_ne!(a, b, "different staged_file must produce different folders"); + assert_ne!(a, b, "different content must produce different folders"); // Determinism: same inputs yield same folder. - assert_eq!(a, folder_for("TriageSummary", "staged-abc.md")); + assert_eq!(a, folder_for("TriageSummary", b"content-alpha")); + // Identical content → same folder regardless of call order (idempotent). + assert_eq!( + folder_for("TriageSummary", b"same-content"), + folder_for("TriageSummary", b"same-content"), + "same content must always map to the same folder" + ); } /// When `require-unique-names` is set, the executor uses the clean folder