-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcommon.rs
More file actions
7354 lines (6683 loc) · 292 KB
/
common.rs
File metadata and controls
7354 lines (6683 loc) · 292 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//! Common helper functions shared across all compile targets.
use anyhow::{Context, Result};
use std::collections::{HashMap, HashSet};
use std::path::Path;
use super::types::{CompileTarget, FrontMatter, OnConfig, PipelineParameter, PoolConfig, Repository, ReposItem};
use super::extensions::{CompilerExtension, Extension, McpgServerConfig, McpgGatewayConfig, McpgConfig, CompileContext};
use crate::compile::types::McpConfig;
use crate::fuzzy_schedule;
use crate::allowed_hosts::{CORE_ALLOWED_HOSTS, mcp_required_hosts};
use crate::ecosystem_domains::{get_ecosystem_domains, is_ecosystem_identifier, is_known_ecosystem};
use crate::validate;
/// Atomically write `contents` to `path`.
///
/// Uses [`tempfile::NamedTempFile`] in the destination's parent
/// directory so the final `persist` is a same-filesystem rename. This
/// guarantees readers either see the old file or the new file in full —
/// never a half-written state.
///
/// Behavior:
///
/// - Creates the tempfile in `path.parent()` (falls back to `.` when
/// the parent is empty, matching `tokio::fs::write` semantics).
/// - On Unix, preserves the existing file's mode if the target exists.
/// Otherwise the tempfile keeps its default mode (0o600 from
/// `tempfile`'s implementation).
/// - When the destination is a symlink, the rename replaces the
/// symlink with a regular file (matches `tokio::fs::write`; the
/// symlink target is *not* followed).
pub async fn atomic_write(path: &Path, contents: &str) -> Result<()> {
let path = path.to_path_buf();
let owned_contents = contents.to_string();
// tempfile is sync; do the whole thing on a blocking task so we
// don't block the async runtime on large writes / fsync.
tokio::task::spawn_blocking(move || atomic_write_blocking(&path, &owned_contents))
.await
.context("atomic_write task panicked")?
}
fn atomic_write_blocking(path: &Path, contents: &str) -> Result<()> {
use std::io::Write;
// Determine the directory to create the tempfile in. We MUST use
// a path on the same filesystem as the destination so that the
// final `persist` rename is atomic (otherwise it fails with
// EXDEV on Linux when /tmp is a separate tmpfs mount).
//
// - `path.parent() == Some(non-empty)` -> use that parent.
// - `path.parent() == Some("")` (bare filename like "agent.md")
// or `None` -> use the current directory ("."), which is the
// same filesystem as where the file will land.
let parent = path.parent().filter(|p| !p.as_os_str().is_empty());
let parent_dir: &Path = parent.unwrap_or_else(|| Path::new("."));
let mut tmp = tempfile::NamedTempFile::new_in(parent_dir).with_context(|| {
format!(
"failed to create temporary file in {}",
parent_dir.display()
)
})?;
tmp.write_all(contents.as_bytes())
.with_context(|| format!("failed to write temporary file for {}", path.display()))?;
tmp.as_file()
.sync_all()
.with_context(|| format!("failed to fsync temporary file for {}", path.display()))?;
// On Unix, copy the existing file's mode onto the tempfile so
// permissions are preserved across the atomic rename.
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
if let Ok(meta) = std::fs::metadata(path) {
let mode = meta.permissions().mode();
std::fs::set_permissions(
tmp.path(),
std::fs::Permissions::from_mode(mode),
)
.with_context(|| {
format!(
"failed to copy permissions from {} to temp file",
path.display()
)
})?;
}
}
tmp.persist(path)
.with_context(|| format!("failed to atomically rename into {}", path.display()))?;
Ok(())
}
/// Detailed parse result. Holds enough information to rewrite the
/// source on disk byte-faithfully when codemods apply.
///
/// See [`parse_markdown_detailed`].
#[derive(Debug)]
pub struct ParsedSource {
/// Typed front matter, after codemods have been applied to the
/// underlying mapping.
pub front_matter: FrontMatter,
/// Body for compilation, with leading/trailing whitespace trimmed
/// (matches the legacy `parse_markdown` second tuple element).
pub markdown_body: String,
/// Codemod outcome.
pub codemods: super::codemods::CodemodReport,
/// The codemod-rewritten front-matter mapping. Used to
/// reconstruct the source for the on-disk rewrite.
pub front_matter_mapping: serde_yaml::Mapping,
/// Whitespace bytes that appeared before the opening `---` fence,
/// preserved verbatim so that source rewrite is byte-faithful.
/// Empty in the typical case where the file starts with `---`.
pub leading_whitespace: String,
/// The body region exactly as it appeared after the closing `---`,
/// byte-for-byte (no trim). Includes any leading newline.
pub body_raw: String,
/// SHA-256 of the original source bytes (lost-update protection).
pub source_sha256: [u8; 32],
}
/// Parse the markdown file, run the codemod registry on the front
/// matter in memory, and return both the typed `FrontMatter` and the
/// raw fragments needed to rewrite the source on disk byte-faithfully.
///
/// Use this from callers that may rewrite the source (the `compile`
/// command). Callers that only want the typed view of the front matter
/// should use the backward-compatible [`parse_markdown`] wrapper.
pub fn parse_markdown_detailed(content: &str) -> Result<ParsedSource> {
parse_markdown_detailed_with_registry(content, super::codemods::CODEMODS)
}
/// Variant of [`parse_markdown_detailed`] that allows injecting an
/// explicit codemod registry. Used by tests; production callers go
/// through the no-arg version that reads the global
/// [`super::codemods::CODEMODS`].
pub(crate) fn parse_markdown_detailed_with_registry(
content: &str,
registry: &[&'static super::codemods::Codemod],
) -> Result<ParsedSource> {
use sha2::Digest;
// Lost-update protection: hash the raw input as it was provided, so
// a rewrite path can later re-read the file and compare.
let mut hasher = sha2::Sha256::new();
hasher.update(content.as_bytes());
let source_sha256: [u8; 32] = hasher.finalize().into();
// Allow leading whitespace before the opening fence (preserves
// historical leniency). We compute a byte offset into `content` so
// that `body_raw` extraction is purely byte-faithful, and we keep
// the whitespace prefix around so that source rewrites preserve
// anything the user (or their editor) put before the opening
// fence.
let leading_ws = content.bytes().take_while(|b| b.is_ascii_whitespace()).count();
let leading_whitespace = content[..leading_ws].to_string();
let after_lead = &content[leading_ws..];
if !after_lead.starts_with("---") {
anyhow::bail!("Markdown file must start with YAML front matter (---)");
}
let after_open = &after_lead[3..];
let end_idx = after_open
.find("\n---")
.context("Could not find closing --- for front matter")?;
let yaml_str = &after_open[..end_idx];
let body_raw_slice = &after_open[end_idx + 4..];
let body_raw = body_raw_slice.to_string();
let markdown_body = body_raw_slice.trim().to_string();
// Stage 1: parse to untyped Value, reject non-mapping at top level.
let parsed_value: serde_yaml::Value =
serde_yaml::from_str(yaml_str).context("Failed to parse YAML front matter")?;
let mut mapping = match parsed_value {
serde_yaml::Value::Mapping(m) => m,
other => {
anyhow::bail!(
"YAML front matter must be a mapping/object, got {}",
yaml_value_kind(&other)
);
}
};
// Stage 2: run the codemod registry against the untyped mapping.
let report = super::codemods::apply_codemods_with(&mut mapping, registry)
.context("Failed to apply codemods")?;
// Stage 3: deserialize the (possibly modified) mapping into the
// typed FrontMatter. Errors here mean either the user wrote an
// unsupported shape or a codemod produced invalid output. The
// error context differs by case so the user can tell which.
let front_matter: FrontMatter = serde_yaml::from_value(
serde_yaml::Value::Mapping(mapping.clone()),
)
.with_context(|| {
if report.changed() {
let ids = report.applied_ids().join(", ");
format!(
"Failed to parse YAML front matter after applying codemods ({}); \
a codemod likely produced an invalid shape",
ids
)
} else {
"Failed to parse YAML front matter".to_string()
}
})?;
Ok(ParsedSource {
front_matter,
markdown_body,
codemods: report,
front_matter_mapping: mapping,
leading_whitespace,
body_raw,
source_sha256,
})
}
/// Reconstruct full source content from codemod outputs.
///
/// Takes the individual fragments rather than the full
/// [`ParsedSource`] so callers that have already destructured the
/// parse don't have to round-trip a fresh `front_matter` through
/// serde just to satisfy the typed field.
///
/// Output shape:
/// - `leading_whitespace` (typically empty)
/// - `---\n`
/// - the codemod-rewritten YAML mapping (`serde_yaml::to_string`
/// always ends with `\n`); the mapping's existing key order is
/// preserved so user-authored keys keep their original positions
/// - `---`
/// - the original body region byte-for-byte (`body_raw`)
pub fn reconstruct_source(
leading_whitespace: &str,
front_matter_mapping: &serde_yaml::Mapping,
body_raw: &str,
) -> Result<String> {
let yaml_serialized = serde_yaml::to_string(front_matter_mapping)
.context("Failed to serialize codemod-rewritten front matter")?;
// Defensive: the format string assumes the serialized YAML ends
// with `\n` so the closing `---` lands on a new line. This is
// serde_yaml's documented behavior for non-empty mappings, but
// hard-fail loudly if a future version breaks the assumption
// rather than silently producing malformed YAML.
anyhow::ensure!(
yaml_serialized.ends_with('\n'),
"serde_yaml::to_string produced output without trailing newline; \
cannot reconstruct front-matter block safely"
);
Ok(format!(
"{}---\n{}---{}",
leading_whitespace, yaml_serialized, body_raw
))
}
fn yaml_value_kind(v: &serde_yaml::Value) -> &'static str {
match v {
serde_yaml::Value::Null => "null",
serde_yaml::Value::Bool(_) => "bool",
serde_yaml::Value::Number(_) => "number",
serde_yaml::Value::String(_) => "string",
serde_yaml::Value::Sequence(_) => "sequence",
serde_yaml::Value::Mapping(_) => "mapping",
serde_yaml::Value::Tagged(_) => "tagged",
}
}
/// Backward-compatible parse: returns the typed front matter and the
/// trimmed body. New callers that may rewrite the source on disk
/// should use [`parse_markdown_detailed`] instead.
#[allow(dead_code)]
pub fn parse_markdown(content: &str) -> Result<(FrontMatter, String)> {
let parsed = parse_markdown_detailed(content)?;
Ok((parsed.front_matter, parsed.markdown_body))
}
/// Replace a placeholder in the template, preserving the indentation for multi-line content.
pub fn replace_with_indent(template: &str, placeholder: &str, replacement: &str) -> String {
let mut result = String::new();
let mut remaining = template;
while let Some(pos) = remaining.find(placeholder) {
// Find the start of the current line to determine indentation
let line_start = remaining[..pos].rfind('\n').map(|i| i + 1).unwrap_or(0);
let indent = &remaining[line_start..pos];
// Only use indent if it's all whitespace
let indent = if indent.chars().all(|c| c.is_whitespace()) {
indent
} else {
""
};
// Add everything before the placeholder
result.push_str(&remaining[..pos]);
// Add the replacement with proper indentation for each line
let mut first_line = true;
for line in replacement.lines() {
if first_line {
result.push_str(line);
first_line = false;
} else {
result.push('\n');
result.push_str(indent);
result.push_str(line);
}
}
// Handle case where replacement ends with newline
if replacement.ends_with('\n') {
result.push('\n');
}
remaining = &remaining[pos + placeholder.len()..];
}
result.push_str(remaining);
result
}
/// Generate a schedule YAML block from a ScheduleConfig.
/// Generate the top-level `parameters:` YAML block from front matter parameters.
///
/// Returns a YAML block like:
/// ```yaml
/// parameters:
/// - name: clearMemory
/// displayName: "Clear agent memory"
/// type: boolean
/// default: false
/// ```
///
/// Returns an empty string if the parameters list is empty.
/// Returns an error if any parameter name is not a valid ADO identifier.
pub fn generate_parameters(parameters: &[PipelineParameter]) -> Result<String> {
if parameters.is_empty() {
return Ok(String::new());
}
// Validate parameter names — must be valid ADO identifiers to prevent
// YAML injection or template expression injection.
for p in parameters {
if !validate::is_valid_parameter_name(&p.name) {
anyhow::bail!(
"Invalid parameter name '{}': must match [A-Za-z_][A-Za-z0-9_]* (ADO identifier)",
p.name
);
}
// Reject ADO expressions in string fields to prevent template expression injection.
// Parameter definitions should only contain literal values.
if let Some(ref display_name) = p.display_name {
validate::reject_ado_expressions(display_name, &p.name, "displayName")?;
}
if let Some(ref default) = p.default {
validate::reject_ado_expressions_in_value(default, &p.name, "default")?;
}
if let Some(ref values) = p.values {
for v in values {
validate::reject_ado_expressions_in_value(v, &p.name, "values")?;
}
}
}
let yaml = serde_yaml::to_string(&serde_yaml::Value::Sequence(
parameters
.iter()
.map(|p| serde_yaml::to_value(p).context("Failed to serialize pipeline parameter"))
.collect::<Result<Vec<_>>>()?,
))
.context("Failed to serialize parameters to YAML")?;
// serde_yaml outputs the sequence without a key; we need to wrap it under `parameters:`
Ok(format!("parameters:\n{}", yaml))
}
/// Validate front matter `name` and `description` fields.
///
/// These values are substituted directly into the pipeline YAML template and must not
/// contain ADO expressions (`${{`, `$(`, `$[`), the compiler's own template marker
/// delimiter (`{{`), or newlines — any of which could disclose secrets or manipulate
/// pipeline logic via second-order injection.
pub fn validate_front_matter_identity(front_matter: &FrontMatter) -> Result<()> {
for (field, value) in [("name", &front_matter.name), ("description", &front_matter.description)] {
validate::reject_pipeline_injection(value, field)?;
}
// Validate trigger.pipeline fields for newlines and ADO expressions
if let Some(trigger_config) = &front_matter.on_config {
if let Some(pipeline) = &trigger_config.pipeline {
validate::reject_pipeline_injection(&pipeline.name, "on.pipeline.name")?;
if let Some(project) = &pipeline.project {
validate::reject_pipeline_injection(project, "on.pipeline.project")?;
}
for branch in &pipeline.branches {
validate::reject_pipeline_injection(branch, &format!("on.pipeline.branches entry {:?}", branch))?;
}
}
// Validate on.pr branch/path filters for newlines and ADO expressions
if let Some(pr) = &trigger_config.pr {
if let Some(branches) = &pr.branches {
for b in &branches.include {
validate::reject_pipeline_injection(b, &format!("on.pr.branches.include entry {:?}", b))?;
}
for b in &branches.exclude {
validate::reject_pipeline_injection(b, &format!("on.pr.branches.exclude entry {:?}", b))?;
}
}
if let Some(paths) = &pr.paths {
for p in &paths.include {
validate::reject_pipeline_injection(p, &format!("on.pr.paths.include entry {:?}", p))?;
}
for p in &paths.exclude {
validate::reject_pipeline_injection(p, &format!("on.pr.paths.exclude entry {:?}", p))?;
}
}
}
}
Ok(())
}
/// Build the final parameters list by combining user-defined parameters
/// with auto-injected parameters (e.g., `clearMemory` when memory is enabled).
pub fn build_parameters(user_params: &[PipelineParameter], has_memory: bool) -> Vec<PipelineParameter> {
let mut params = user_params.to_vec();
// Auto-inject clearMemory parameter when memory is configured,
// unless the user already defined one with the same name.
if has_memory && !params.iter().any(|p| p.name == "clearMemory") {
params.insert(
0,
PipelineParameter {
name: "clearMemory".to_string(),
display_name: Some("Clear agent memory".to_string()),
param_type: Some("boolean".to_string()),
default: Some(serde_yaml::Value::Bool(false)),
values: None,
},
);
}
params
}
/// Generate a schedule YAML block from a fuzzy schedule expression.
pub fn generate_schedule(name: &str, config: &super::types::ScheduleConfig) -> Result<String> {
let branches = config.branches();
let fallback;
let effective_branches = if branches.is_empty() {
fallback = vec!["main".to_string()];
&fallback
} else {
branches
};
fuzzy_schedule::generate_schedule_yaml(config.expression(), name, effective_branches)
}
/// Generate PR trigger configuration.
///
/// When `triggers.pr` is explicitly configured, PR triggers stay enabled regardless
/// of schedule or pipeline triggers (overrides suppression). Native ADO branch/path
/// filters are emitted if configured.
pub fn generate_pr_trigger(on_config: &Option<OnConfig>, has_schedule: bool) -> String {
let has_pipeline_trigger = on_config
.as_ref()
.and_then(|t| t.pipeline.as_ref())
.is_some();
// Explicit triggers.pr overrides schedule/pipeline suppression
if let Some(pr) = on_config.as_ref().and_then(|o| o.pr.as_ref()) {
return super::pr_filters::generate_native_pr_trigger(pr);
}
match (has_pipeline_trigger, has_schedule) {
(true, true) => "# Disable PR triggers - only run on schedule or when upstream pipeline completes\npr: none".to_string(),
(true, false) => "# Disable PR triggers - only run when upstream pipeline completes\npr: none".to_string(),
(false, true) => "# Disable PR triggers - only run on schedule\npr: none".to_string(),
(false, false) => String::new(),
}
}
/// Generate CI trigger configuration
pub fn generate_ci_trigger(on_config: &Option<OnConfig>, has_schedule: bool) -> String {
let has_pipeline_trigger = on_config
.as_ref()
.and_then(|t| t.pipeline.as_ref())
.is_some();
if has_pipeline_trigger || has_schedule {
"trigger: none".to_string()
} else {
String::new()
}
}
/// Generate pipeline resource YAML for pipeline completion triggers
pub fn generate_pipeline_resources(on_config: &Option<OnConfig>) -> Result<String> {
let Some(trigger_config) = on_config else {
return Ok(String::new());
};
let Some(pipeline) = &trigger_config.pipeline else {
return Ok(String::new());
};
// Generate a valid resource identifier (snake_case) from the pipeline name
let resource_id: String = pipeline
.name
.to_lowercase()
.chars()
.map(|c| if c.is_alphanumeric() { c } else { '_' })
.collect();
let mut yaml = String::from("pipelines:\n");
yaml.push_str(&format!(" - pipeline: {}\n", resource_id));
yaml.push_str(&format!(" source: '{}'\n", pipeline.name.replace('\'', "''")));
if let Some(project) = &pipeline.project {
yaml.push_str(&format!(" project: '{}'\n", project.replace('\'', "''")));
}
// If no branches specified, trigger on any branch
if pipeline.branches.is_empty() {
yaml.push_str(" trigger: true\n");
} else {
yaml.push_str(" trigger:\n");
yaml.push_str(" branches:\n");
yaml.push_str(" include:\n");
for branch in &pipeline.branches {
yaml.push_str(&format!(" - '{}'\n", branch.replace('\'', "''")));
}
}
Ok(yaml)
}
/// Generate repository resources YAML
pub fn generate_repositories(repositories: &[Repository]) -> String {
if repositories.is_empty() {
return String::new();
}
repositories
.iter()
.map(|repo| {
format!(
"- repository: {}\n type: {}\n name: {}\n ref: {}",
repo.repository, repo.repo_type, repo.name, repo.repo_ref
)
})
.collect::<Vec<_>>()
.join("\n")
}
/// Generate checkout steps YAML
pub fn generate_checkout_steps(checkout: &[String]) -> String {
if checkout.is_empty() {
return String::new();
}
checkout
.iter()
.map(|name| format!("- checkout: {}", name))
.collect::<Vec<_>>()
.join("\n")
}
/// Generate `checkout: self` step.
pub fn generate_checkout_self() -> String {
"- checkout: self".to_string()
}
// ──────────────────────────────────────────────────────────────────────────────
// Compact `repos:` lowering
// ──────────────────────────────────────────────────────────────────────────────
/// Lower a `repos:` list into the internal `(Vec<Repository>, Vec<String>)` pair
/// consumed by the rest of the compiler. Also validates aliases for collisions.
pub fn lower_repos(items: &[ReposItem]) -> Result<(Vec<Repository>, Vec<String>)> {
let mut repositories: Vec<Repository> = Vec::new();
let mut checkout: Vec<String> = Vec::new();
let mut seen_aliases: HashSet<String> = HashSet::new();
for item in items {
let (name, alias, repo_type, repo_ref, do_checkout) = match item {
ReposItem::Shorthand(s) => {
let (alias, name) = parse_shorthand(s)?;
(name, alias, "git".to_string(), "refs/heads/main".to_string(), true)
}
ReposItem::Full(entry) => {
let alias = match &entry.alias {
Some(a) => a.clone(),
None => derive_alias(&entry.name)?,
};
(
entry.name.clone(),
alias,
entry.repo_type.clone(),
entry.repo_ref.clone(),
entry.checkout,
)
}
};
// Reject duplicate aliases
if !seen_aliases.insert(alias.clone()) {
anyhow::bail!(
"Duplicate repository alias '{}' in repos. \
Use the `alias` field (or `alias=org/repo` shorthand) to disambiguate.",
alias
);
}
// Reject reserved names
if RESERVED_WORKSPACE_NAMES.contains(&alias.as_str()) {
anyhow::bail!(
"Repository alias '{}' is reserved by the 'workspace:' resolver ({:?}). \
Rename the alias to avoid ambiguity.",
alias,
RESERVED_WORKSPACE_NAMES
);
}
repositories.push(Repository {
repository: alias.clone(),
repo_type,
name,
repo_ref,
});
if do_checkout {
checkout.push(alias);
}
}
Ok((repositories, checkout))
}
/// Parse a shorthand string: `"org/repo"` → (derived alias, name), or
/// `"alias=org/repo"` → (alias, name).
fn parse_shorthand(s: &str) -> Result<(String, String)> {
if let Some((alias, name)) = s.split_once('=') {
let alias = alias.trim().to_string();
let name = name.trim().to_string();
if alias.is_empty() {
anyhow::bail!("repos shorthand '{}' has an empty alias before '='", s);
}
if name.is_empty() {
anyhow::bail!("repos shorthand '{}' has an empty name after '='", s);
}
Ok((alias, name))
} else {
let alias = derive_alias(s)?;
Ok((alias, s.to_string()))
}
}
/// Derive the alias from a full `org/repo` name (last path segment).
fn derive_alias(name: &str) -> Result<String> {
// Trim trailing slashes to handle "org/repo/" gracefully
let trimmed = name.trim_end_matches('/');
let alias = trimmed
.rsplit('/')
.next()
.unwrap_or(trimmed)
.to_string();
if alias.is_empty() {
anyhow::bail!(
"Cannot derive a repository alias from '{}'. \
Provide an explicit `alias` field.",
name
);
}
Ok(alias)
}
/// Resolve the `repos:` field in a `FrontMatter` into the canonical
/// `(Vec<Repository>, Vec<String>)` pair consumed by the rest of the compiler.
///
/// The legacy `repositories:` + `checkout:` fields are converted to `repos:`
/// by the `repos_unified` codemod (`src/compile/codemods/0001_repos_unified.rs`)
/// before typed deserialization, so by the time this function runs the only
/// shape it sees is `repos:`.
pub fn resolve_repos(front_matter: &FrontMatter) -> Result<(Vec<Repository>, Vec<String>)> {
if front_matter.repos.is_empty() {
return Ok((Vec::new(), Vec::new()));
}
lower_repos(&front_matter.repos)
}
/// Names that are reserved by the `workspace:` resolver and therefore cannot
/// be used as repository aliases / `checkout:` entries. If a user defines a
/// repository named `repo` and writes `workspace: repo`, the special-cased
/// reserved arm would silently win over the alias resolution, producing the
/// wrong working directory. We reject this at compile time instead.
const RESERVED_WORKSPACE_NAMES: &[&str] = &["root", "repo", "self"];
/// Validate that no entry in `checkout` resolves to the same on-disk
/// directory as the `self` checkout.
///
/// In ADO multi-repo checkout, both `checkout: self` and an additional
/// `checkout: <alias>` land in `s/<RepositoryName>`, where
/// `<RepositoryName>` is `Build.Repository.Name` for `self` and the
/// trailing path segment of the `name:` field for each `repositories:`
/// entry. When these collide, the second checkout runs `git clean -ffdx`
/// and resets to its configured ref, silently wiping files that exist on
/// the trigger branch but not on the workspace ref. Failing fast at
/// compile time is much more discoverable than the resulting runtime
/// "file not found" errors downstream.
///
/// `self_repo_name` is the trigger repo's `Build.Repository.Name` —
/// usually the trailing segment of the trigger repo's full name, inferred
/// from the local git remote. When `None` (e.g. compiling outside an ADO
/// clone, or in unit tests) the check is skipped because we have no
/// reliable identity for `self`.
pub fn validate_checkout_self_collision(
repositories: &[Repository],
checkout: &[String],
self_repo_name: Option<&str>,
) -> Result<()> {
let Some(self_name) = self_repo_name else {
return Ok(());
};
if checkout.is_empty() {
return Ok(());
}
for alias in checkout {
let Some(repo) = repositories.iter().find(|r| r.repository == *alias) else {
// Unknown aliases are reported by `validate_checkout_list`.
continue;
};
// `rsplit('/').next()` on any &str always yields `Some` — even for
// names without a slash the whole string is returned.
let last_segment = repo.name.rsplit('/').next().expect("rsplit always yields one item");
// ADO is case-insensitive on Windows agents and case-sensitive on
// Linux. Use a case-insensitive comparison so the collision is
// caught regardless of agent OS — the resulting pipeline would
// break on at least one platform either way.
if last_segment.eq_ignore_ascii_case(self_name) {
anyhow::bail!(
"Checkout entry '{}' (repository name '{}') resolves to the same \
directory ('s/{}') as the trigger repository checked out as 'self'. \
The second checkout would overwrite the first, replacing files \
from the trigger branch with the workspace ref. Remove '{}' from \
'checkout:' — the 'self' checkout already provides access to this \
repository.",
alias,
repo.name,
self_name,
alias,
);
}
}
Ok(())
}
/// Validate that all entries in checkout list exist in repositories
pub fn validate_checkout_list(repositories: &[Repository], checkout: &[String]) -> Result<()> {
if checkout.is_empty() {
return Ok(());
}
let repo_names: std::collections::HashSet<_> =
repositories.iter().map(|r| r.repository.as_str()).collect();
for name in checkout {
if !repo_names.contains(name.as_str()) {
anyhow::bail!(
"Checkout entry '{}' not found in repositories. Available: {:?}",
name,
repo_names
);
}
if RESERVED_WORKSPACE_NAMES.contains(&name.as_str()) {
anyhow::bail!(
"Checkout entry '{}' uses a name reserved by the 'workspace:' resolver \
({:?}). Rename the repository alias to avoid ambiguity with \
'workspace: {}'.",
name,
RESERVED_WORKSPACE_NAMES,
name
);
}
}
Ok(())
}
/// Sentinel prefix used to encode a repository-alias workspace selection
/// in the string returned by [`compute_effective_workspace`]. The prefix is
/// only ever produced internally by `compute_effective_workspace` from a
/// user-supplied alias that has just been checked against the `checkout:`
/// list, so the encoded value never round-trips back through user input.
const WORKSPACE_ALIAS_PREFIX: &str = "alias:";
/// Compute the effective workspace based on explicit setting and checkout configuration.
///
/// Accepted values for `explicit_workspace`:
/// - `"root"` — `$(Build.SourcesDirectory)` (the checkout root)
/// - `"repo"` or `"self"` — the trigger repository's subfolder
/// - any repository alias listed in `checkout` — that repository's subfolder
///
/// Returns an encoded string that [`generate_working_directory`] resolves to
/// the actual ADO path expression.
pub fn compute_effective_workspace(
explicit_workspace: &Option<String>,
checkout: &[String],
agent_name: &str,
) -> Result<String> {
let has_additional_checkouts = !checkout.is_empty();
match explicit_workspace {
Some(ws) => {
let ws = ws.as_str();
match ws {
"root" => Ok("root".to_string()),
"repo" | "self" => {
if !has_additional_checkouts {
eprintln!(
"Warning: Agent '{}' has workspace: {} but no additional repositories in checkout. \
When only 'self' is checked out, $(Build.SourcesDirectory) already contains the repository content. \
The workspace setting has no effect in this case.",
agent_name, ws
);
}
Ok("repo".to_string())
}
alias => {
// Defense in depth: even though aliases are constrained
// by `validate_checkout_list` to match a `repository:`
// name, refuse anything that could escape the workspace
// root once embedded into the working directory path.
if !validate::is_safe_path_segment(alias) {
anyhow::bail!(
"Agent '{}' has workspace: '{}' which is not a safe path \
segment. Repository aliases must not be empty, contain '..', \
'/', '\\\\' or start with '.'.",
agent_name,
alias
);
}
// A single contains() check covers both "alias not in
// checkout" and "checkout is empty" — produce one error
// message that clearly lists what would have been valid.
if !checkout.iter().any(|c| c == alias) {
if checkout.is_empty() {
anyhow::bail!(
"Agent '{}' has workspace: '{}' but no additional repositories are checked out. \
A repository alias for workspace is only valid when at least one repository appears in 'checkout:'. \
Use 'root', 'repo' (or 'self'), or add the repository to the 'checkout:' list.",
agent_name,
alias
);
}
anyhow::bail!(
"Agent '{}' has workspace: '{}' which does not match any checked-out repository. \
Valid values: 'root', 'repo' (or 'self'), or one of {:?}",
agent_name,
alias,
checkout
);
}
Ok(format!("{}{}", WORKSPACE_ALIAS_PREFIX, alias))
}
}
}
None if has_additional_checkouts => Ok("repo".to_string()),
None => Ok("root".to_string()),
}
}
/// Generate the directory where the trigger ("self") repository is checked out.
///
/// This is independent of `workspace:` — it depends only on whether any
/// additional repositories are checked out:
/// - No additional checkouts → `$(Build.SourcesDirectory)` (ADO checks `self`
/// into the root).
/// - One or more additional checkouts → `$(Build.SourcesDirectory)/$(Build.Repository.Name)`
/// (ADO puts each checked-out repo, including `self`, into a subfolder named
/// after the repository).
///
/// Used to anchor paths to files that ship in the trigger repo (e.g. the agent
/// markdown source and the compiled pipeline yaml itself), regardless of where
/// `workspace:` points the agent.
pub fn generate_trigger_repo_directory(checkout: &[String]) -> String {
if checkout.is_empty() {
"$(Build.SourcesDirectory)".to_string()
} else {
"$(Build.SourcesDirectory)/$(Build.Repository.Name)".to_string()
}
}
/// Generate working directory based on workspace setting
pub fn generate_working_directory(effective_workspace: &str) -> String {
if let Some(alias) = effective_workspace.strip_prefix(WORKSPACE_ALIAS_PREFIX) {
return format!("$(Build.SourcesDirectory)/{}", alias);
}
match effective_workspace {
"repo" => "$(Build.SourcesDirectory)/$(Build.Repository.Name)".to_string(),
"root" => "$(Build.SourcesDirectory)".to_string(),
// compute_effective_workspace only ever returns "root", "repo", or an
// "alias:<name>" sentinel; any other value indicates a programming
// error rather than user input. Fall back to the safest path.
other => {
debug_assert!(false, "unexpected effective workspace value: {other}");
"$(Build.SourcesDirectory)".to_string()
}
}
}
/// Generate `timeoutInMinutes` job property from `engine.timeout-minutes`.
/// Returns an empty string when timeout is not configured.
pub fn generate_job_timeout(front_matter: &FrontMatter) -> String {
match front_matter.engine.timeout_minutes() {
Some(minutes) => format!("timeoutInMinutes: {}", minutes),
None => String::new(),
}
}
/// Format a single step's YAML string with proper indentation
#[allow(dead_code)]
pub fn format_step_yaml(step_yaml: &str) -> String {
let trimmed = step_yaml.trim();
trimmed
.lines()
.enumerate()
.map(|(i, line)| {
if i == 0 {
format!(" - {}", line.trim_start_matches("---").trim())
} else {
format!(" {}", line)
}
})
.collect::<Vec<_>>()
.join("\n")
}
/// Format a single step's YAML string with custom base indentation
pub fn format_step_yaml_indented(step_yaml: &str, base_indent: usize) -> String {
let trimmed = step_yaml.trim();
let indent = " ".repeat(base_indent);
let cont_indent = " ".repeat(base_indent + 2);
trimmed
.lines()
.enumerate()
.map(|(i, line)| {
if i == 0 {
format!("{}- {}", indent, line.trim_start_matches("---").trim())
} else {
format!("{}{}", cont_indent, line)
}
})
.collect::<Vec<_>>()
.join("\n")
}
/// Format multiple steps to YAML with proper indentation for jobs
#[allow(dead_code)]
pub fn format_steps_yaml(steps: &[serde_yaml::Value]) -> String {
steps
.iter()
.filter_map(|step| serde_yaml::to_string(step).ok())
.map(|s| format_step_yaml(&s))
.collect::<Vec<_>>()
.join("\n")
}
/// Format multiple steps to YAML with custom base indentation
pub fn format_steps_yaml_indented(steps: &[serde_yaml::Value], base_indent: usize) -> String {
steps
.iter()
.filter_map(|step| serde_yaml::to_string(step).ok())
.map(|s| format_step_yaml_indented(&s, base_indent))
.collect::<Vec<_>>()
.join("\n")
}
/// Sanitize a string to be used as a filename.
///
/// Converts to lowercase, replaces non-alphanumeric characters with dashes,
/// and collapses consecutive dashes into a single dash.
pub fn sanitize_filename(name: &str) -> String {
name.to_lowercase()
.chars()
.map(|c| if c.is_alphanumeric() { c } else { '-' })
.collect::<String>()
.split('-')
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join("-")
}
/// Emit `s` as a YAML double-quoted scalar (always quoted, never plain).
///