Skip to content

Commit 709fa2c

Browse files
authored
fix(secrets): address fourth-round Rust PR review feedback on #624
- `normalize_repo_url`: percent-decode before comparing, so a project named "My Project" matches whether ADO returns the encoded form (`My%20Project`) or the decoded form. The previous implementation assumed ADO always returns percent-encoded URLs; that assumption is documented in code now and the comparison is encoding-independent. New unit tests cover the encoded/decoded equivalence and the case-insensitive/trailing-slash behaviour. - `discovered_to_matched`: stop silently truncating consumers that include multiple ado-aw templates. The `yaml_path` field used by `print_matched_summary` now joins every marker source with `, ` so e.g. `agents/a.md, agents/b.md` shows up honestly in the CLI summary. New unit test asserts both sources are surfaced. - `##vso[` defence-in-depth: the marker step's runtime echo already neutralises `##vso[` and `##[` prefixes, but the same raw source string was flowing through `MarkerMetadata` -> `MatchedDefinition::yaml_path` -> `print_matched_summary` (which writes to stdout). When the CLI is invoked from inside an ADO pipeline step, the agent's stdout scanner would still pick up an attacker-controlled `##vso[...]` payload. New `sanitize_for_vso_logging` helper in the discovery module applies the same convention (`##vso[` -> `[vso-filtered][`, `##[` -> `[filtered][`) when building the `yaml_path`. New unit test asserts the sanitisation. - `ADO_AW_PREVIEW_CONCURRENCY=0` now emits a `warn!` before clamping to 1, instead of silently masking the typo. Operators who set `=0` will see the warning and can correct the env value rather than wondering why their concurrency tuning had no effect. - New unit test for the `--source` + no-git-remote bail in `secrets::resolve_for_command`: previously the helpful "no Azure DevOps git remote was detected; try --all-repos" error path was untested. Now asserted via a `tokio::test` that constructs an empty AdoContext and verifies the error message contains both the cause and the suggested mitigation. All 1753 tests pass; clippy clean on touched files. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: jamesadevine <4742697+jamesadevine@users.noreply.github.com>
1 parent d3c23c3 commit 709fa2c

2 files changed

Lines changed: 204 additions & 14 deletions

File tree

src/ado/discovery.rs

Lines changed: 162 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -241,10 +241,24 @@ pub async fn discover_ado_aw_pipelines(
241241
// `match_definitions_in`.
242242
let lock_map = build_lock_path_map(local_lock_paths);
243243

244-
let permits = std::env::var("ADO_AW_PREVIEW_CONCURRENCY")
244+
// Resolve concurrency from env; warn (not silently clamp) when an
245+
// operator sets `=0`, since the deadlock-avoidance `.max(1)` would
246+
// mask the typo and leave the user wondering why throughput hasn't
247+
// changed.
248+
let raw_permits = std::env::var("ADO_AW_PREVIEW_CONCURRENCY")
245249
.ok()
246-
.and_then(|v| v.parse().ok())
247-
.unwrap_or(DEFAULT_PREVIEW_CONCURRENCY);
250+
.and_then(|v| v.parse::<usize>().ok());
251+
let permits = match raw_permits {
252+
Some(0) => {
253+
warn!(
254+
"ADO_AW_PREVIEW_CONCURRENCY=0 would deadlock the Preview semaphore; \
255+
clamping to 1. Set a positive integer to control concurrency.",
256+
);
257+
1
258+
}
259+
Some(n) => n,
260+
None => DEFAULT_PREVIEW_CONCURRENCY,
261+
};
248262
let semaphore = Arc::new(Semaphore::new(permits.max(1)));
249263

250264
let mut handles = Vec::with_capacity(filtered.len());
@@ -310,11 +324,28 @@ fn apply_scope_filter(
310324
}
311325
}
312326

313-
/// Normalize a repo URL for equality comparison. Strips trailing slash
314-
/// and lowercases the scheme/host portion (ADO is case-insensitive on
315-
/// org/project/repo names).
327+
/// Normalize a repo URL for equality comparison.
328+
///
329+
/// Two normalizations are applied so the comparison is robust to the
330+
/// shape ADO returns:
331+
///
332+
/// 1. **Percent-decode** the URL so a project named e.g. `My Project`
333+
/// compares equal whether ADO returned `My%20Project` or the (rare
334+
/// but legal) decoded `My Project`. Lossy decoding on invalid UTF-8
335+
/// keeps us forward-compatible — anything ADO can return, we can
336+
/// compare.
337+
/// 2. **ASCII-lowercase** because ADO is case-insensitive on org /
338+
/// project / repo identifiers, and trim any trailing `/`.
339+
///
340+
/// Without (1), the comparison would silently fail for any project
341+
/// name containing a percent-reserved character if `ado_ctx.repo_url()`
342+
/// emitted the encoded form and `repository.url` returned the decoded
343+
/// form (or vice-versa).
316344
fn normalize_repo_url(url: &str) -> String {
317-
url.trim_end_matches('/').to_ascii_lowercase()
345+
let decoded = percent_encoding::percent_decode_str(url)
346+
.decode_utf8_lossy()
347+
.into_owned();
348+
decoded.trim_end_matches('/').to_ascii_lowercase()
318349
}
319350

320351
/// Build a `(normalized yamlFilename → local lock path)` lookup table
@@ -518,21 +549,47 @@ pub fn discovered_to_matched(d: &DiscoveredPipeline) -> Option<MatchedDefinition
518549
| DiscoveryStatus::PreviewFailed(_) => return None,
519550
}
520551

552+
// Join every marker's source path so consumers that include
553+
// multiple templates show up honestly in the CLI summary instead
554+
// of silently truncating to whichever marker happened to be
555+
// first. Also apply `sanitize_for_vso_logging` here: the
556+
// `yaml_path` ends up in `print_matched_summary` (which writes to
557+
// stdout), and if `ado-aw secrets set --all-repos` is ever invoked
558+
// from inside an ADO pipeline step, an attacker-controlled marker
559+
// source path containing `##vso[` would otherwise be processed by
560+
// the agent's logging-command scanner.
561+
let yaml_path = if d.markers.is_empty() {
562+
String::new()
563+
} else {
564+
let joined = d
565+
.markers
566+
.iter()
567+
.map(|m| m.source.as_str())
568+
.collect::<Vec<_>>()
569+
.join(", ");
570+
sanitize_for_vso_logging(&joined)
571+
};
572+
521573
Some(MatchedDefinition {
522574
id: d.definition_id,
523575
name: d.definition_name.clone(),
524576
match_method: MatchMethod::Discovery,
525-
// Prefer the first marker's source path so downstream summaries
526-
// can show "→ agents/foo.md" without further lookup.
527-
yaml_path: d
528-
.markers
529-
.first()
530-
.map(|m| m.source.clone())
531-
.unwrap_or_default(),
577+
yaml_path,
532578
queue_status: d.queue_status.clone(),
533579
})
534580
}
535581

582+
/// Neutralise ADO build-agent logging-command prefixes (`##vso[`,
583+
/// `##[`). Mirrors `crate::compile::extensions::ado_aw_marker` and
584+
/// `crate::agent_stats::sanitize_for_markdown` so that data flowing
585+
/// from a Preview-discovered marker through the CLI's own stdout
586+
/// cannot smuggle a `task.setvariable` (or similar) when the CLI is
587+
/// invoked from inside an ADO pipeline step.
588+
fn sanitize_for_vso_logging(s: &str) -> String {
589+
s.replace("##vso[", "[vso-filtered][")
590+
.replace("##[", "[filtered][")
591+
}
592+
536593
/// CLI-facing wrapper: run Preview-driven discovery with the given
537594
/// scope, optionally filter to consumers of a single template source,
538595
/// and return the result as `Vec<MatchedDefinition>`.
@@ -915,4 +972,95 @@ mod tests {
915972
assert!(discovered_to_matched(&discovered(DiscoveryStatus::Direct)).is_some());
916973
assert!(discovered_to_matched(&discovered(DiscoveryStatus::Consumer)).is_some());
917974
}
975+
976+
#[test]
977+
fn discovered_to_matched_joins_multiple_marker_sources() {
978+
// A consumer that includes two templates must surface both
979+
// sources in the yaml_path summary, not silently truncate to
980+
// whichever happened to be first.
981+
let mut d = discovered(DiscoveryStatus::Consumer);
982+
d.markers = vec![
983+
MarkerMetadata {
984+
schema: 1,
985+
source: "agents/a.md".to_string(),
986+
version: "1.0".to_string(),
987+
target: "job".to_string(),
988+
},
989+
MarkerMetadata {
990+
schema: 1,
991+
source: "agents/b.md".to_string(),
992+
version: "1.0".to_string(),
993+
target: "stage".to_string(),
994+
},
995+
];
996+
let matched = discovered_to_matched(&d).expect("Consumer kept");
997+
assert!(
998+
matched.yaml_path.contains("agents/a.md")
999+
&& matched.yaml_path.contains("agents/b.md"),
1000+
"expected both marker sources in yaml_path, got: {}",
1001+
matched.yaml_path
1002+
);
1003+
}
1004+
1005+
#[test]
1006+
fn discovered_to_matched_sanitises_vso_in_yaml_path() {
1007+
// The yaml_path ends up in stdout via print_matched_summary.
1008+
// If the CLI is invoked from inside an ADO pipeline step, an
1009+
// attacker-controlled marker source path containing `##vso[`
1010+
// would otherwise be processed by the agent's logging-command
1011+
// scanner.
1012+
let mut d = discovered(DiscoveryStatus::Consumer);
1013+
d.markers = vec![MarkerMetadata {
1014+
schema: 1,
1015+
source: "agents/##vso[task.setvariable variable=X]value.md".to_string(),
1016+
version: "1.0".to_string(),
1017+
target: "job".to_string(),
1018+
}];
1019+
let matched = discovered_to_matched(&d).expect("Consumer kept");
1020+
assert!(
1021+
!matched.yaml_path.contains("##vso["),
1022+
"raw ##vso[ leaked into yaml_path: {}",
1023+
matched.yaml_path,
1024+
);
1025+
assert!(
1026+
matched.yaml_path.contains("[vso-filtered]["),
1027+
"expected `##vso[` neutralised to `[vso-filtered][`: {}",
1028+
matched.yaml_path,
1029+
);
1030+
}
1031+
1032+
// ── normalize_repo_url ───────────────────────────────────────────
1033+
1034+
#[test]
1035+
fn normalize_repo_url_is_encoding_independent() {
1036+
// ADO usually returns percent-encoded URLs (`My%20Project`),
1037+
// but the comparison must work whichever shape both sides
1038+
// happen to be in.
1039+
let encoded = "https://dev.azure.com/Org/My%20Project/_git/Repo";
1040+
let decoded = "https://dev.azure.com/Org/My Project/_git/Repo";
1041+
assert_eq!(normalize_repo_url(encoded), normalize_repo_url(decoded));
1042+
}
1043+
1044+
#[test]
1045+
fn normalize_repo_url_is_case_insensitive_and_trims_trailing_slash() {
1046+
assert_eq!(
1047+
normalize_repo_url("https://dev.azure.com/Org/P/_git/Repo/"),
1048+
normalize_repo_url("https://dev.azure.com/org/p/_git/repo")
1049+
);
1050+
}
1051+
1052+
// ── sanitize_for_vso_logging ─────────────────────────────────────
1053+
1054+
#[test]
1055+
fn discovery_sanitize_for_vso_logging_neutralises_prefixes() {
1056+
assert_eq!(
1057+
sanitize_for_vso_logging("##vso[task.setvariable variable=X]value"),
1058+
"[vso-filtered][task.setvariable variable=X]value"
1059+
);
1060+
assert_eq!(
1061+
sanitize_for_vso_logging("##[warning]ignore me"),
1062+
"[filtered][warning]ignore me"
1063+
);
1064+
assert_eq!(sanitize_for_vso_logging("agents/foo.md"), "agents/foo.md");
1065+
}
9181066
}

src/secrets.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,48 @@ mod tests {
704704
assert_eq!(out["variables"]["FOO"]["allowOverride"], false);
705705
}
706706

707+
// ============ resolve_for_command precondition ============
708+
709+
#[tokio::test]
710+
async fn source_without_all_repos_bails_when_no_git_remote() {
711+
// CurrentRepo scope + empty repo_name = no git remote was
712+
// detected. `--source` users hitting this case must get a
713+
// targeted error mentioning `--all-repos`, not a generic
714+
// "no pipelines found" further down the pipeline.
715+
let ctx = AdoContext {
716+
org_url: "https://dev.azure.com/example".to_string(),
717+
project: "p".to_string(),
718+
repo_name: String::new(),
719+
};
720+
let auth = AdoAuth::Pat("token".to_string());
721+
let client = reqwest::Client::builder()
722+
.build()
723+
.expect("client builds");
724+
let tmp = tempfile::tempdir().unwrap();
725+
726+
let err = resolve_for_command(
727+
&client,
728+
&ctx,
729+
&auth,
730+
None,
731+
false,
732+
Some("agents/foo.md"),
733+
tmp.path(),
734+
)
735+
.await
736+
.expect_err("expected bail");
737+
738+
let msg = format!("{err}");
739+
assert!(
740+
msg.contains("--all-repos"),
741+
"error should suggest --all-repos; got: {msg}"
742+
);
743+
assert!(
744+
msg.contains("no Azure DevOps git remote"),
745+
"error should explain the root cause; got: {msg}"
746+
);
747+
}
748+
707749
#[test]
708750
fn set_preserves_other_variables() {
709751
let def = serde_json::json!({

0 commit comments

Comments
 (0)