-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcommon.rs
More file actions
5339 lines (4825 loc) · 214 KB
/
common.rs
File metadata and controls
5339 lines (4825 loc) · 214 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//! Common helper functions shared across all compile targets.
use anyhow::{Context, Result};
use std::collections::{HashMap, HashSet};
use std::path::Path;
use super::types::{FrontMatter, OnConfig, PipelineParameter, Repository};
use super::extensions::{CompilerExtension, Extension, McpgServerConfig, McpgGatewayConfig, McpgConfig, CompileContext};
use crate::compile::types::McpConfig;
use crate::fuzzy_schedule;
use crate::allowed_hosts::{CORE_ALLOWED_HOSTS, mcp_required_hosts};
use crate::ecosystem_domains::{get_ecosystem_domains, is_ecosystem_identifier, is_known_ecosystem};
use crate::validate;
/// Parse the markdown file and extract front matter and body
pub fn parse_markdown(content: &str) -> Result<(FrontMatter, String)> {
let content = content.trim();
if !content.starts_with("---") {
anyhow::bail!("Markdown file must start with YAML front matter (---)");
}
// Find the closing ---
let rest = &content[3..];
let end_idx = rest
.find("\n---")
.context("Could not find closing --- for front matter")?;
let yaml_content = &rest[..end_idx];
let markdown_body = rest[end_idx + 4..].trim();
let front_matter: FrontMatter =
serde_yaml::from_str(yaml_content).context("Failed to parse YAML front matter")?;
Ok((front_matter, markdown_body.to_string()))
}
/// Replace a placeholder in the template, preserving the indentation for multi-line content.
pub fn replace_with_indent(template: &str, placeholder: &str, replacement: &str) -> String {
let mut result = String::new();
let mut remaining = template;
while let Some(pos) = remaining.find(placeholder) {
// Find the start of the current line to determine indentation
let line_start = remaining[..pos].rfind('\n').map(|i| i + 1).unwrap_or(0);
let indent = &remaining[line_start..pos];
// Only use indent if it's all whitespace
let indent = if indent.chars().all(|c| c.is_whitespace()) {
indent
} else {
""
};
// Add everything before the placeholder
result.push_str(&remaining[..pos]);
// Add the replacement with proper indentation for each line
let mut first_line = true;
for line in replacement.lines() {
if first_line {
result.push_str(line);
first_line = false;
} else {
result.push('\n');
result.push_str(indent);
result.push_str(line);
}
}
// Handle case where replacement ends with newline
if replacement.ends_with('\n') {
result.push('\n');
}
remaining = &remaining[pos + placeholder.len()..];
}
result.push_str(remaining);
result
}
/// Generate a schedule YAML block from a ScheduleConfig.
/// Generate the top-level `parameters:` YAML block from front matter parameters.
///
/// Returns a YAML block like:
/// ```yaml
/// parameters:
/// - name: clearMemory
/// displayName: "Clear agent memory"
/// type: boolean
/// default: false
/// ```
///
/// Returns an empty string if the parameters list is empty.
/// Returns an error if any parameter name is not a valid ADO identifier.
pub fn generate_parameters(parameters: &[PipelineParameter]) -> Result<String> {
if parameters.is_empty() {
return Ok(String::new());
}
// Validate parameter names — must be valid ADO identifiers to prevent
// YAML injection or template expression injection.
for p in parameters {
if !validate::is_valid_parameter_name(&p.name) {
anyhow::bail!(
"Invalid parameter name '{}': must match [A-Za-z_][A-Za-z0-9_]* (ADO identifier)",
p.name
);
}
// Reject ADO expressions in string fields to prevent template expression injection.
// Parameter definitions should only contain literal values.
if let Some(ref display_name) = p.display_name {
validate::reject_ado_expressions(display_name, &p.name, "displayName")?;
}
if let Some(ref default) = p.default {
validate::reject_ado_expressions_in_value(default, &p.name, "default")?;
}
if let Some(ref values) = p.values {
for v in values {
validate::reject_ado_expressions_in_value(v, &p.name, "values")?;
}
}
}
let yaml = serde_yaml::to_string(&serde_yaml::Value::Sequence(
parameters
.iter()
.map(|p| serde_yaml::to_value(p).context("Failed to serialize pipeline parameter"))
.collect::<Result<Vec<_>>>()?,
))
.context("Failed to serialize parameters to YAML")?;
// serde_yaml outputs the sequence without a key; we need to wrap it under `parameters:`
Ok(format!("parameters:\n{}", yaml))
}
/// Validate front matter `name` and `description` fields.
///
/// These values are substituted directly into the pipeline YAML template and must not
/// contain ADO expressions (`${{`, `$(`, `$[`), the compiler's own template marker
/// delimiter (`{{`), or newlines — any of which could disclose secrets or manipulate
/// pipeline logic via second-order injection.
pub fn validate_front_matter_identity(front_matter: &FrontMatter) -> Result<()> {
for (field, value) in [("name", &front_matter.name), ("description", &front_matter.description)] {
validate::reject_pipeline_injection(value, field)?;
}
// Validate trigger.pipeline fields for newlines and ADO expressions
if let Some(trigger_config) = &front_matter.on_config {
if let Some(pipeline) = &trigger_config.pipeline {
validate::reject_pipeline_injection(&pipeline.name, "on.pipeline.name")?;
if let Some(project) = &pipeline.project {
validate::reject_pipeline_injection(project, "on.pipeline.project")?;
}
for branch in &pipeline.branches {
validate::reject_pipeline_injection(branch, &format!("on.pipeline.branches entry {:?}", branch))?;
}
}
// Validate on.pr branch/path filters for newlines and ADO expressions
if let Some(pr) = &trigger_config.pr {
if let Some(branches) = &pr.branches {
for b in &branches.include {
validate::reject_pipeline_injection(b, &format!("on.pr.branches.include entry {:?}", b))?;
}
for b in &branches.exclude {
validate::reject_pipeline_injection(b, &format!("on.pr.branches.exclude entry {:?}", b))?;
}
}
if let Some(paths) = &pr.paths {
for p in &paths.include {
validate::reject_pipeline_injection(p, &format!("on.pr.paths.include entry {:?}", p))?;
}
for p in &paths.exclude {
validate::reject_pipeline_injection(p, &format!("on.pr.paths.exclude entry {:?}", p))?;
}
}
}
}
Ok(())
}
/// Build the final parameters list by combining user-defined parameters
/// with auto-injected parameters (e.g., `clearMemory` when memory is enabled).
pub fn build_parameters(user_params: &[PipelineParameter], has_memory: bool) -> Vec<PipelineParameter> {
let mut params = user_params.to_vec();
// Auto-inject clearMemory parameter when memory is configured,
// unless the user already defined one with the same name.
if has_memory && !params.iter().any(|p| p.name == "clearMemory") {
params.insert(
0,
PipelineParameter {
name: "clearMemory".to_string(),
display_name: Some("Clear agent memory".to_string()),
param_type: Some("boolean".to_string()),
default: Some(serde_yaml::Value::Bool(false)),
values: None,
},
);
}
params
}
/// Generate a schedule YAML block from a fuzzy schedule expression.
pub fn generate_schedule(name: &str, config: &super::types::ScheduleConfig) -> Result<String> {
let branches = config.branches();
let fallback;
let effective_branches = if branches.is_empty() {
fallback = vec!["main".to_string()];
&fallback
} else {
branches
};
fuzzy_schedule::generate_schedule_yaml(config.expression(), name, effective_branches)
}
/// Generate PR trigger configuration.
///
/// When `triggers.pr` is explicitly configured, PR triggers stay enabled regardless
/// of schedule or pipeline triggers (overrides suppression). Native ADO branch/path
/// filters are emitted if configured.
pub fn generate_pr_trigger(on_config: &Option<OnConfig>, has_schedule: bool) -> String {
let has_pipeline_trigger = on_config
.as_ref()
.and_then(|t| t.pipeline.as_ref())
.is_some();
// Explicit triggers.pr overrides schedule/pipeline suppression
if let Some(pr) = on_config.as_ref().and_then(|o| o.pr.as_ref()) {
return super::pr_filters::generate_native_pr_trigger(pr);
}
match (has_pipeline_trigger, has_schedule) {
(true, true) => "# Disable PR triggers - only run on schedule or when upstream pipeline completes\npr: none".to_string(),
(true, false) => "# Disable PR triggers - only run when upstream pipeline completes\npr: none".to_string(),
(false, true) => "# Disable PR triggers - only run on schedule\npr: none".to_string(),
(false, false) => String::new(),
}
}
/// Generate CI trigger configuration
pub fn generate_ci_trigger(on_config: &Option<OnConfig>, has_schedule: bool) -> String {
let has_pipeline_trigger = on_config
.as_ref()
.and_then(|t| t.pipeline.as_ref())
.is_some();
if has_pipeline_trigger || has_schedule {
"trigger: none".to_string()
} else {
String::new()
}
}
/// Generate pipeline resource YAML for pipeline completion triggers
pub fn generate_pipeline_resources(on_config: &Option<OnConfig>) -> Result<String> {
let Some(trigger_config) = on_config else {
return Ok(String::new());
};
let Some(pipeline) = &trigger_config.pipeline else {
return Ok(String::new());
};
// Generate a valid resource identifier (snake_case) from the pipeline name
let resource_id: String = pipeline
.name
.to_lowercase()
.chars()
.map(|c| if c.is_alphanumeric() { c } else { '_' })
.collect();
let mut yaml = String::from("pipelines:\n");
yaml.push_str(&format!(" - pipeline: {}\n", resource_id));
yaml.push_str(&format!(" source: '{}'\n", pipeline.name.replace('\'', "''")));
if let Some(project) = &pipeline.project {
yaml.push_str(&format!(" project: '{}'\n", project.replace('\'', "''")));
}
// If no branches specified, trigger on any branch
if pipeline.branches.is_empty() {
yaml.push_str(" trigger: true\n");
} else {
yaml.push_str(" trigger:\n");
yaml.push_str(" branches:\n");
yaml.push_str(" include:\n");
for branch in &pipeline.branches {
yaml.push_str(&format!(" - '{}'\n", branch.replace('\'', "''")));
}
}
Ok(yaml)
}
/// Generate repository resources YAML
pub fn generate_repositories(repositories: &[Repository]) -> String {
if repositories.is_empty() {
return String::new();
}
repositories
.iter()
.map(|repo| {
format!(
"- repository: {}\n type: {}\n name: {}\n ref: {}",
repo.repository, repo.repo_type, repo.name, repo.repo_ref
)
})
.collect::<Vec<_>>()
.join("\n")
}
/// Generate checkout steps YAML
pub fn generate_checkout_steps(checkout: &[String]) -> String {
if checkout.is_empty() {
return String::new();
}
checkout
.iter()
.map(|name| format!("- checkout: {}", name))
.collect::<Vec<_>>()
.join("\n")
}
/// Generate `checkout: self` step.
pub fn generate_checkout_self() -> String {
"- checkout: self".to_string()
}
/// Names that are reserved by the `workspace:` resolver and therefore cannot
/// be used as repository aliases / `checkout:` entries. If a user defines a
/// repository named `repo` and writes `workspace: repo`, the special-cased
/// reserved arm would silently win over the alias resolution, producing the
/// wrong working directory. We reject this at compile time instead.
const RESERVED_WORKSPACE_NAMES: &[&str] = &["root", "repo", "self"];
/// Validate that all entries in checkout list exist in repositories
pub fn validate_checkout_list(repositories: &[Repository], checkout: &[String]) -> Result<()> {
if checkout.is_empty() {
return Ok(());
}
let repo_names: std::collections::HashSet<_> =
repositories.iter().map(|r| r.repository.as_str()).collect();
for name in checkout {
if !repo_names.contains(name.as_str()) {
anyhow::bail!(
"Checkout entry '{}' not found in repositories. Available: {:?}",
name,
repo_names
);
}
if RESERVED_WORKSPACE_NAMES.contains(&name.as_str()) {
anyhow::bail!(
"Checkout entry '{}' uses a name reserved by the 'workspace:' resolver \
({:?}). Rename the repository alias to avoid ambiguity with \
'workspace: {}'.",
name,
RESERVED_WORKSPACE_NAMES,
name
);
}
}
Ok(())
}
/// Sentinel prefix used to encode a repository-alias workspace selection
/// in the string returned by [`compute_effective_workspace`]. The prefix is
/// only ever produced internally by `compute_effective_workspace` from a
/// user-supplied alias that has just been checked against the `checkout:`
/// list, so the encoded value never round-trips back through user input.
const WORKSPACE_ALIAS_PREFIX: &str = "alias:";
/// Compute the effective workspace based on explicit setting and checkout configuration.
///
/// Accepted values for `explicit_workspace`:
/// - `"root"` — `$(Build.SourcesDirectory)` (the checkout root)
/// - `"repo"` or `"self"` — the trigger repository's subfolder
/// - any repository alias listed in `checkout` — that repository's subfolder
///
/// Returns an encoded string that [`generate_working_directory`] resolves to
/// the actual ADO path expression.
pub fn compute_effective_workspace(
explicit_workspace: &Option<String>,
checkout: &[String],
agent_name: &str,
) -> Result<String> {
let has_additional_checkouts = !checkout.is_empty();
match explicit_workspace {
Some(ws) => {
let ws = ws.as_str();
match ws {
"root" => Ok("root".to_string()),
"repo" | "self" => {
if !has_additional_checkouts {
eprintln!(
"Warning: Agent '{}' has workspace: {} but no additional repositories in checkout. \
When only 'self' is checked out, $(Build.SourcesDirectory) already contains the repository content. \
The workspace setting has no effect in this case.",
agent_name, ws
);
}
Ok("repo".to_string())
}
alias => {
// Defense in depth: even though aliases are constrained
// by `validate_checkout_list` to match a `repository:`
// name, refuse anything that could escape the workspace
// root once embedded into the working directory path.
if !validate::is_safe_path_segment(alias) {
anyhow::bail!(
"Agent '{}' has workspace: '{}' which is not a safe path \
segment. Repository aliases must not be empty, contain '..', \
'/', '\\\\' or start with '.'.",
agent_name,
alias
);
}
// A single contains() check covers both "alias not in
// checkout" and "checkout is empty" — produce one error
// message that clearly lists what would have been valid.
if !checkout.iter().any(|c| c == alias) {
if checkout.is_empty() {
anyhow::bail!(
"Agent '{}' has workspace: '{}' but no additional repositories are checked out. \
A repository alias for workspace is only valid when at least one repository appears in 'checkout:'. \
Use 'root', 'repo' (or 'self'), or add the repository to the 'checkout:' list.",
agent_name,
alias
);
}
anyhow::bail!(
"Agent '{}' has workspace: '{}' which does not match any checked-out repository. \
Valid values: 'root', 'repo' (or 'self'), or one of {:?}",
agent_name,
alias,
checkout
);
}
Ok(format!("{}{}", WORKSPACE_ALIAS_PREFIX, alias))
}
}
}
None if has_additional_checkouts => Ok("repo".to_string()),
None => Ok("root".to_string()),
}
}
/// Generate the directory where the trigger ("self") repository is checked out.
///
/// This is independent of `workspace:` — it depends only on whether any
/// additional repositories are checked out:
/// - No additional checkouts → `$(Build.SourcesDirectory)` (ADO checks `self`
/// into the root).
/// - One or more additional checkouts → `$(Build.SourcesDirectory)/$(Build.Repository.Name)`
/// (ADO puts each checked-out repo, including `self`, into a subfolder named
/// after the repository).
///
/// Used to anchor paths to files that ship in the trigger repo (e.g. the agent
/// markdown source and the compiled pipeline yaml itself), regardless of where
/// `workspace:` points the agent.
pub fn generate_trigger_repo_directory(checkout: &[String]) -> String {
if checkout.is_empty() {
"$(Build.SourcesDirectory)".to_string()
} else {
"$(Build.SourcesDirectory)/$(Build.Repository.Name)".to_string()
}
}
/// Generate working directory based on workspace setting
pub fn generate_working_directory(effective_workspace: &str) -> String {
if let Some(alias) = effective_workspace.strip_prefix(WORKSPACE_ALIAS_PREFIX) {
return format!("$(Build.SourcesDirectory)/{}", alias);
}
match effective_workspace {
"repo" => "$(Build.SourcesDirectory)/$(Build.Repository.Name)".to_string(),
"root" => "$(Build.SourcesDirectory)".to_string(),
// compute_effective_workspace only ever returns "root", "repo", or an
// "alias:<name>" sentinel; any other value indicates a programming
// error rather than user input. Fall back to the safest path.
other => {
debug_assert!(false, "unexpected effective workspace value: {other}");
"$(Build.SourcesDirectory)".to_string()
}
}
}
/// Generate `timeoutInMinutes` job property from `engine.timeout-minutes`.
/// Returns an empty string when timeout is not configured.
pub fn generate_job_timeout(front_matter: &FrontMatter) -> String {
match front_matter.engine.timeout_minutes() {
Some(minutes) => format!("timeoutInMinutes: {}", minutes),
None => String::new(),
}
}
/// Format a single step's YAML string with proper indentation
#[allow(dead_code)]
pub fn format_step_yaml(step_yaml: &str) -> String {
let trimmed = step_yaml.trim();
trimmed
.lines()
.enumerate()
.map(|(i, line)| {
if i == 0 {
format!(" - {}", line.trim_start_matches("---").trim())
} else {
format!(" {}", line)
}
})
.collect::<Vec<_>>()
.join("\n")
}
/// Format a single step's YAML string with custom base indentation
pub fn format_step_yaml_indented(step_yaml: &str, base_indent: usize) -> String {
let trimmed = step_yaml.trim();
let indent = " ".repeat(base_indent);
let cont_indent = " ".repeat(base_indent + 2);
trimmed
.lines()
.enumerate()
.map(|(i, line)| {
if i == 0 {
format!("{}- {}", indent, line.trim_start_matches("---").trim())
} else {
format!("{}{}", cont_indent, line)
}
})
.collect::<Vec<_>>()
.join("\n")
}
/// Format multiple steps to YAML with proper indentation for jobs
#[allow(dead_code)]
pub fn format_steps_yaml(steps: &[serde_yaml::Value]) -> String {
steps
.iter()
.filter_map(|step| serde_yaml::to_string(step).ok())
.map(|s| format_step_yaml(&s))
.collect::<Vec<_>>()
.join("\n")
}
/// Format multiple steps to YAML with custom base indentation
pub fn format_steps_yaml_indented(steps: &[serde_yaml::Value], base_indent: usize) -> String {
steps
.iter()
.filter_map(|step| serde_yaml::to_string(step).ok())
.map(|s| format_step_yaml_indented(&s, base_indent))
.collect::<Vec<_>>()
.join("\n")
}
/// Sanitize a string to be used as a filename.
///
/// Converts to lowercase, replaces non-alphanumeric characters with dashes,
/// and collapses consecutive dashes into a single dash.
pub fn sanitize_filename(name: &str) -> String {
name.to_lowercase()
.chars()
.map(|c| if c.is_alphanumeric() { c } else { '-' })
.collect::<String>()
.split('-')
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join("-")
}
/// Default pool name
pub const DEFAULT_POOL: &str = "AZS-1ES-L-MMS-ubuntu-22.04";
/// Version of the AWF (Agentic Workflow Firewall) binary to download from GitHub Releases.
/// Update this when upgrading to a new AWF release.
/// See: https://github.com/github/gh-aw-firewall/releases
pub const AWF_VERSION: &str = "0.25.33";
/// Prefix used to identify agentic pipeline YAML files generated by ado-aw.
pub const HEADER_MARKER: &str = "# @ado-aw";
/// Generate the header comment block prepended to all compiled pipeline YAML.
///
/// The header includes:
/// - A human-readable "do not edit" warning
/// - A machine-readable `@ado-aw` marker with source path and compiler version
///
/// The source path is the input path as provided to the compiler (e.g., `agents/my-agent.md`,
/// `.azdo/pipelines/review.md`, or any other location the user chose). Path separators
/// are normalized to forward slashes for cross-platform consistency.
pub fn generate_header_comment(input_path: &std::path::Path) -> String {
let version = env!("CARGO_PKG_VERSION");
let mut source_path = input_path
.to_string_lossy()
.replace('\\', "/")
.replace('\n', "")
.replace('\r', "")
.replace('"', "\\\"");
// Strip redundant leading "./" prefixes to prevent accumulation when
// compile_all_pipelines re-joins paths through Path::new(".").join(source).
while source_path.starts_with("./") {
source_path = source_path[2..].to_string();
}
format!(
"# This file is auto-generated by ado-aw. Do not edit manually.\n\
# @ado-aw source=\"{}\" version={}\n",
source_path, version
)
}
/// Docker image and version for the MCP Gateway (gh-aw-mcpg).
/// Update this when upgrading to a new MCPG release.
/// See: https://github.com/github/gh-aw-mcpg/releases
pub const MCPG_VERSION: &str = "0.3.3";
/// Docker image for the MCPG container.
pub const MCPG_IMAGE: &str = "ghcr.io/github/gh-aw-mcpg";
/// Default port MCPG listens on inside the container (host network mode).
pub const MCPG_PORT: u16 = 80;
/// Domain that the AWF-sandboxed agent uses to reach MCPG on the host.
/// Docker's `host.docker.internal` resolves to the host loopback from
/// inside containers running with `--network host` or via Docker DNS.
pub const MCPG_DOMAIN: &str = "host.docker.internal";
/// Docker base image for the Azure DevOps MCP container.
pub const ADO_MCP_IMAGE: &str = "node:20-slim";
/// Default entrypoint for the Azure DevOps MCP container.
pub const ADO_MCP_ENTRYPOINT: &str = "npx";
/// Default entrypoint args for the Azure DevOps MCP npm package.
pub const ADO_MCP_PACKAGE: &str = "@azure-devops/mcp";
/// Reserved MCPG server name for the auto-configured ADO MCP.
pub const ADO_MCP_SERVER_NAME: &str = "azure-devops";
/// Generate the agent markdown source path for Stage 3 execution.
///
/// Returns a path using `{{ trigger_repo_directory }}` as the base. The agent
/// markdown lives in the trigger ("self") repo, so this anchor is independent
/// of the user's `workspace:` setting (which may point at a different
/// checked-out repo where the agent runs).
///
/// The full relative path of the input file is preserved so that agents compiled
/// from subdirectories (e.g. `ado-aw compile agents/ctf.md`) produce a correct
/// runtime path rather than one that drops the directory component.
///
/// Absolute paths fall back to using only the filename to avoid embedding
/// machine-specific paths in the generated pipeline.
pub fn generate_source_path(input_path: &std::path::Path) -> String {
let relative = normalize_relative_path(input_path).unwrap_or_else(|| {
input_path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("agent.md")
.to_string()
});
format!("{{{{ trigger_repo_directory }}}}/{}", relative)
}
/// Generate the "Verify pipeline integrity" step for the pipeline YAML.
///
/// When `skip` is `false` (the default), returns the full bash step that
/// downloads the ado-aw compiler and runs `ado-aw check` against the
/// pipeline path.
///
/// The step sets `workingDirectory: {{ trigger_repo_directory }}` so that:
/// 1. The relative `{{ pipeline_path }}` argument resolves correctly when
/// `checkout:` produces a multi-repo `$(Build.SourcesDirectory)` layout.
/// 2. `ado-aw check`'s recompile step has access to the trigger repo's
/// `.git` directory, which is required to infer the ADO org from the
/// git remote (used by `tools.azure-devops`).
///
/// When `skip` is `true` (developer builds with `--skip-integrity`),
/// returns an empty string and the step is omitted from the pipeline.
pub fn generate_integrity_check(skip: bool) -> String {
if skip {
return String::new();
}
// Indentation is handled by replace_with_indent at the call site.
r#"- bash: |
AGENTIC_PIPELINES_PATH="$(Pipeline.Workspace)/agentic-pipeline-compiler/ado-aw"
chmod +x "$AGENTIC_PIPELINES_PATH"
$AGENTIC_PIPELINES_PATH check "{{ pipeline_path }}"
workingDirectory: {{ trigger_repo_directory }}
displayName: "Verify pipeline integrity""#
.to_string()
}
/// Generate debug pipeline replacement values for template markers.
///
/// When `debug` is `true`, returns content for MCPG debug diagnostics:
/// - `{{ mcpg_debug_flags }}`: `-e DEBUG="*"` env, stderr tee redirect, and
/// stderr dump on health-check failure
/// - `{{ verify_mcp_backends }}`: full pipeline step that probes each MCPG
/// backend with MCP initialize + tools/list
///
/// When `debug` is `false`, both markers resolve to empty strings.
pub fn generate_debug_pipeline_replacements(debug: bool) -> Vec<(String, String)> {
if !debug {
return vec![
// Emit `\` to maintain bash line continuation (same pattern as
// generate_mcpg_docker_env when no env flags are needed).
("{{ mcpg_debug_flags }}".into(), "\\".into()),
("{{ verify_mcp_backends }}".into(), String::new()),
];
}
let mcpg_debug_flags = r##"-e DEBUG="*" \"##.to_string();
let verify_mcp_backends = r###"# Probe all MCPG backends to force eager launch and surface failures.
# MCPG lazily starts stdio backends on first tool call — without this
# step, a broken backend (e.g., npx timeout) only surfaces as a silent
# missing-tool error during the agent run.
- bash: |
echo "=== Probing MCP backends ==="
PROBE_FAILED=false
for server in $(jq -r '.mcpServers | keys[]' /tmp/awf-tools/mcp-config.json); do
echo ""
echo "--- Probing: $server ---"
# MCP requires initialize handshake before tools/list.
# Send initialize first, then tools/list in a second request
# using the session ID from the initialize response.
INIT_RESPONSE=$(curl -s -D /tmp/probe-headers.txt -o /tmp/probe-init.json -w "%{http_code}" --max-time 120 -X POST \
-H "Authorization: $MCPG_API_KEY" \
-H "Content-Type: application/json" \
-H "Accept: application/json, text/event-stream" \
-d '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2025-03-26","capabilities":{},"clientInfo":{"name":"ado-aw-probe","version":"1.0"}}}' \
"http://localhost:{{ mcpg_port }}/mcp/$server" 2>&1)
SESSION_ID=$(grep -i "mcp-session-id" /tmp/probe-headers.txt 2>/dev/null | tr -d '\r' | awk '{print $2}')
echo "Initialize: HTTP $INIT_RESPONSE, session=$SESSION_ID"
if [ -z "$SESSION_ID" ]; then
echo "##vso[task.logissue type=warning]MCP backend '$server' did not return a session ID"
cat /tmp/probe-init.json 2>/dev/null || true
PROBE_FAILED=true
continue
fi
# Now send tools/list with the session
HTTP_CODE=$(curl -s -o /tmp/probe-response.json -w "%{http_code}" --max-time 120 -X POST \
-H "Authorization: $MCPG_API_KEY" \
-H "Content-Type: application/json" \
-H "Accept: application/json, text/event-stream" \
-H "Mcp-Session-Id: $SESSION_ID" \
-d '{"jsonrpc":"2.0","id":2,"method":"tools/list"}' \
"http://localhost:{{ mcpg_port }}/mcp/$server" 2>&1)
BODY=$(cat /tmp/probe-response.json 2>/dev/null || echo "(empty)")
# Extract tool count from SSE data line
TOOL_COUNT=$(echo "$BODY" | grep '^data:' | sed 's/^data: //' | jq -r '.result.tools | length' 2>/dev/null || echo "?")
echo "tools/list: HTTP $HTTP_CODE"
if [ "$HTTP_CODE" -ge 200 ] && [ "$HTTP_CODE" -lt 300 ] && [ "$TOOL_COUNT" != "?" ]; then
echo "✓ $server: $TOOL_COUNT tools available"
else
echo "##vso[task.logissue type=warning]MCP backend '$server' tools/list returned HTTP $HTTP_CODE"
echo "Response: $BODY"
PROBE_FAILED=true
fi
done
echo ""
echo "=== MCPG health after probes ==="
curl -sf "http://localhost:{{ mcpg_port }}/health" | jq . || true
if [ "$PROBE_FAILED" = "true" ]; then
echo "##vso[task.logissue type=warning]One or more MCP backends failed to initialize — check logs above"
fi
displayName: "Verify MCP backends"
env:
MCPG_API_KEY: $(MCP_GATEWAY_API_KEY)"###
.to_string();
vec![
("{{ mcpg_debug_flags }}".into(), mcpg_debug_flags),
("{{ verify_mcp_backends }}".into(), verify_mcp_backends),
]
}
/// Generate the pipeline YAML path for integrity checking at ADO runtime.
///
/// Returns the path **relative** to the trigger repository root. The integrity
/// check step itself sets `workingDirectory: {{ trigger_repo_directory }}` so
/// that the path resolves correctly and so that `ado-aw check`'s recompile
/// step has access to the trigger repo's `.git` directory (needed to infer
/// the ADO org for `tools.azure-devops`).
///
/// The full relative path is preserved so that pipelines compiled into
/// subdirectories (e.g. `agents/ctf.yml`) produce a correct runtime path
/// rather than one that drops the directory component.
///
/// Absolute paths fall back to using only the filename to avoid embedding
/// machine-specific paths in the generated pipeline.
pub fn generate_pipeline_path(output_path: &std::path::Path) -> String {
normalize_relative_path(output_path).unwrap_or_else(|| {
output_path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("pipeline.yml")
.to_string()
})
}
/// Normalize a path for embedding in a generated pipeline.
///
/// Returns `Some(String)` when `path` is relative, with:
/// - Backslashes converted to forward slashes
/// - Redundant leading `./` prefixes stripped
///
/// For absolute paths the function first tries to compute a relative path from
/// the nearest git repository root (found by walking up the directory tree
/// looking for a `.git` entry). This preserves the directory structure when
/// the user passes an absolute path — e.g.
/// `/home/user/repo/agents/ctf.md` → `agents/ctf.md`.
///
/// Falls back to `None` (callers use filename-only) only when no git root is
/// found, to avoid embedding machine-specific absolute paths in the generated
/// pipeline YAML.
///
/// Note: `..` components in relative paths are passed through unchanged.
/// Callers are responsible for ensuring the path does not traverse outside the
/// repository checkout.
fn normalize_relative_path(path: &std::path::Path) -> Option<String> {
if path.is_absolute() {
// Try to make the path relative to the nearest git repo root so that
// directory structure (e.g. `agents/ctf.md`) is preserved even when
// the user invokes the compiler with an absolute path.
if let Some(git_root) = find_git_root(path) {
if let Ok(rel) = path.strip_prefix(&git_root) {
let s = rel.to_string_lossy().replace('\\', "/");
return Some(s);
}
}
return None;
}
let mut s = path.to_string_lossy().replace('\\', "/");
while let Some(stripped) = s.strip_prefix("./") {
s = stripped.to_string();
}
Some(s)
}
/// Walk up the directory tree from `path` looking for a `.git` entry.
///
/// Returns the first ancestor directory that contains `.git`, or `None` if the
/// traversal reaches the filesystem root without finding one.
fn find_git_root(path: &std::path::Path) -> Option<std::path::PathBuf> {
// Start from the file's parent directory (or the path itself if it is a dir).
let start: &std::path::Path = if path.is_dir() { path } else { path.parent()? };
let mut current = start.to_path_buf();
loop {
if current.join(".git").exists() {
return Some(current);
}
match current.parent() {
Some(parent) => current = parent.to_path_buf(),
None => return None,
}
}
}
// ==================== Permission helpers ====================
/// ADO resource ID for minting ADO-scoped tokens via Azure CLI.
const ADO_RESOURCE_ID: &str = "499b84ac-1321-427f-aa17-267ca6975798";
/// Generate an AzureCLI@2 step to acquire an ADO-scoped token from an ARM service connection.
/// The `variable_name` parameter controls which pipeline variable the token is stored in
/// (e.g. "SC_READ_TOKEN" for the agent, "SC_WRITE_TOKEN" for the executor).
/// Returns empty string if no service connection is provided.
pub fn generate_acquire_ado_token(service_connection: Option<&str>, variable_name: &str) -> String {
match service_connection {
Some(sc) => {
let mut lines = Vec::new();
lines.push("- task: AzureCLI@2".to_string());
lines.push(format!(
r#" displayName: "Acquire ADO token ({variable_name})""#
));
lines.push(" inputs:".to_string());
lines.push(format!(" azureSubscription: '{}'", sc.replace('\'', "''")));
lines.push(" scriptType: 'bash'".to_string());
lines.push(" scriptLocation: 'inlineScript'".to_string());
lines.push(" addSpnToEnvironment: true".to_string());
lines.push(" inlineScript: |".to_string());
lines.push(" ADO_TOKEN=$(az account get-access-token \\".to_string());
lines.push(format!(" --resource {} \\", ADO_RESOURCE_ID));
lines.push(" --query accessToken -o tsv)".to_string());
lines.push(format!(
" echo \"##vso[task.setvariable variable={variable_name};issecret=true]$ADO_TOKEN\""
));
lines.join("\n")
}
None => String::new(),
}
}
/// Generate the env block entries for the executor step (Stage 3 Execution).
/// Uses the write token from the write service connection.
/// When not configured, omits ADO access tokens entirely.
pub fn generate_executor_ado_env(write_service_connection: Option<&str>) -> String {
match write_service_connection {
Some(_) => "SYSTEM_ACCESSTOKEN: $(SC_WRITE_TOKEN)".to_string(),
None => String::new(),
}
}
/// Generate `--enabled-tools` CLI args for the SafeOutputs MCP server.
///
/// Derives the tool list from `safe-outputs:` front matter keys plus always-on
/// diagnostic tools. If `safe-outputs:` is empty, returns an empty string
/// (all tools enabled for backward compatibility).
///
/// Tool names are validated to contain only ASCII alphanumerics and hyphens
/// to prevent shell injection when the args are embedded in bash commands.
/// Unrecognized tool names emit a compile-time warning and are skipped.
pub fn generate_enabled_tools_args(front_matter: &FrontMatter) -> String {
use crate::safeoutputs::{ALL_KNOWN_SAFE_OUTPUTS, ALWAYS_ON_TOOLS, NON_MCP_SAFE_OUTPUT_KEYS};
use std::collections::HashSet;
if front_matter.safe_outputs.is_empty() {
return String::new();
}
// `seen` deduplicates across user keys and ALWAYS_ON_TOOLS (e.g. if the user
// configures `noop` explicitly, it shouldn't appear twice in the output).
let mut seen = HashSet::new();
let mut tools: Vec<String> = Vec::new();
let mut effective_mcp_tool_count = 0usize;
for key in front_matter.safe_outputs.keys() {
if !validate::is_safe_tool_name(key) {
eprintln!(
"Warning: skipping invalid safe-output tool name '{}' (must be ASCII alphanumeric/hyphens only)",
key
);
continue;
}
if NON_MCP_SAFE_OUTPUT_KEYS.contains(&key.as_str()) {
continue;
}
if key == "memory" {
eprintln!(
"Warning: Agent '{}': 'safe-outputs: memory:' has moved to \
'tools: cache-memory:'. Update your front matter to restore memory support.",
front_matter.name
);
continue;
}
if !ALL_KNOWN_SAFE_OUTPUTS.contains(&key.as_str()) {
eprintln!(
"Warning: unrecognized safe-output tool '{}' — skipping (no registered tool matches this name)",
key
);
continue;
}
effective_mcp_tool_count += 1;
if seen.insert(key.clone()) {
tools.push(key.clone());
}
}
if effective_mcp_tool_count == 0 {
// Every user-specified key was either invalid or unrecognized.
// Return empty to keep all tools available (backward compat).
return String::new();
}
// Always include diagnostic tools
for tool in ALWAYS_ON_TOOLS {
let name = tool.to_string();
if seen.insert(name.clone()) {
tools.push(name);
}
}
tools.sort();
let args = tools
.iter()
.map(|t| format!("--enabled-tools {}", t))
.collect::<Vec<_>>()
.join(" ");
// Trailing space so the args don't concatenate with the next positional
// argument when embedded inline in the shell template.
// `args` is never empty here because ALWAYS_ON_TOOLS always contributes entries.