-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcreate_pull_request.rs
More file actions
2525 lines (2296 loc) · 93 KB
/
create_pull_request.rs
File metadata and controls
2525 lines (2296 loc) · 93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//! Create pull request safe output tool
use log::{debug, info, warn};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use tokio::process::Command;
use ado_aw_derive::SanitizeConfig;
use crate::safeoutputs::{ExecutionContext, ExecutionResult, Executor, ToolResult, Validate};
use crate::sanitize::{SanitizeContent, sanitize as sanitize_text, sanitize_config};
use crate::tool_result;
use crate::validate::reject_pipeline_injection;
use anyhow::{Context, ensure};
/// Maximum allowed patch file size (5 MB)
const MAX_PATCH_SIZE_BYTES: u64 = 5 * 1024 * 1024;
/// Default maximum files allowed in a single PR
const DEFAULT_MAX_FILES: usize = 100;
/// Runtime manifest files that are protected by default (all lowercase for
/// case-insensitive comparison).
/// These are dependency/build files that could be modified to introduce supply chain attacks.
const PROTECTED_MANIFEST_BASENAMES: &[&str] = &[
// npm / Node.js
"package.json",
"package-lock.json",
"yarn.lock",
"pnpm-lock.yaml",
"npm-shrinkwrap.json",
// Go
"go.mod",
"go.sum",
// Python
"requirements.txt",
"pipfile",
"pipfile.lock",
"pyproject.toml",
"setup.py",
"setup.cfg",
"poetry.lock",
// Ruby
"gemfile",
"gemfile.lock",
// Java / Kotlin / Gradle
"pom.xml",
"build.gradle",
"build.gradle.kts",
"settings.gradle",
"settings.gradle.kts",
"gradle.properties",
// .NET / C#
"directory.build.props",
"directory.build.targets",
"global.json",
// Rust
"cargo.toml",
"cargo.lock",
// Bun
"bun.lockb",
"bunfig.toml",
// Deno
"deno.json",
"deno.jsonc",
"deno.lock",
// Elixir
"mix.exs",
"mix.lock",
// Haskell
"stack.yaml",
"stack.yaml.lock",
// Python (uv)
"uv.lock",
// .NET (additional)
"nuget.config",
"directory.packages.props",
// Docker / container
"dockerfile",
"docker-compose.yml",
"docker-compose.yaml",
"compose.yml",
"compose.yaml",
];
/// Path prefixes that are protected by default.
/// Files under these paths are blocked unless protected-files is set to "allowed".
const PROTECTED_PATH_PREFIXES: &[&str] = &[
".github/",
".pipelines/",
".azure-pipelines/",
".agents/",
".claude/",
".codex/",
".copilot/",
];
/// Exact filenames (at repo root) that are protected by default.
const PROTECTED_EXACT_PATHS: &[&str] = &["CODEOWNERS", "docs/CODEOWNERS"];
/// Resolve a reviewer identifier (email, display name, or ID) to an Azure DevOps identity ID.
///
/// If the input is already a GUID, returns it directly. Otherwise, uses the Azure DevOps
/// Identity Picker API to resolve the email or display name to an identity ID.
async fn resolve_reviewer_identity(
client: &reqwest::Client,
organization: &str,
token: &str,
reviewer: &str,
) -> Option<String> {
// Check if already a GUID (36 chars with 4 hyphens)
if reviewer.len() == 36 && reviewer.chars().filter(|c| *c == '-').count() == 4 {
if reviewer.chars().all(|c| c.is_ascii_hexdigit() || c == '-') {
debug!("Reviewer '{}' is already a GUID", reviewer);
return Some(reviewer.to_string());
}
}
// Use Identity Picker API on vssps.dev.azure.com to resolve email or display name
let identity_url = format!(
"https://vssps.dev.azure.com/{}/_apis/identitypicker/identities?api-version=7.1-preview.1",
organization
);
debug!("Identity lookup URL: {}", identity_url);
let query_body = serde_json::json!({
"query": reviewer,
"identityTypes": ["user"],
"operationScopes": ["ims", "source"],
"properties": ["DisplayName", "Mail", "SubjectDescriptor"],
"filterByAncestorEntityIds": [],
"filterByEntityIds": [],
"options": {
"MinResults": 1,
"MaxResults": 5
}
});
match client
.post(&identity_url)
.basic_auth("", Some(token))
.json(&query_body)
.send()
.await
{
Ok(resp) if resp.status().is_success() => {
match resp.json::<serde_json::Value>().await {
Ok(data) => {
// Navigate the response: results[0].identities[0].localId
if let Some(results) = data.get("results").and_then(|r| r.as_array()) {
if let Some(first_result) = results.first() {
if let Some(identities) =
first_result.get("identities").and_then(|i| i.as_array())
{
// Try to find exact match first (by email or display name)
let reviewer_lower = reviewer.to_lowercase();
for identity in identities {
let display_name = identity
.get("displayName")
.and_then(|d| d.as_str())
.unwrap_or_default()
.to_lowercase();
let mail = identity
.get("mail")
.and_then(|m| m.as_str())
.unwrap_or_default()
.to_lowercase();
if display_name == reviewer_lower || mail == reviewer_lower {
if let Some(local_id) =
identity.get("localId").and_then(|id| id.as_str())
{
debug!(
"Resolved reviewer '{}' to ID '{}'",
reviewer, local_id
);
return Some(local_id.to_string());
}
}
}
// Fall back to first result if no exact match
if let Some(first_identity) = identities.first() {
if let Some(local_id) =
first_identity.get("localId").and_then(|id| id.as_str())
{
debug!(
"Resolved reviewer '{}' to first match ID '{}'",
reviewer, local_id
);
return Some(local_id.to_string());
}
}
}
}
}
warn!("No identity found for reviewer '{}'", reviewer);
None
}
Err(e) => {
warn!(
"Failed to parse identity response for '{}': {}",
reviewer, e
);
None
}
}
}
Ok(resp) => {
warn!(
"Identity lookup failed for '{}': {}",
reviewer,
resp.status()
);
None
}
Err(e) => {
warn!("Identity lookup request failed for '{}': {}", reviewer, e);
None
}
}
}
/// Parameters for creating a pull request
#[derive(Deserialize, JsonSchema)]
pub struct CreatePrParams {
/// Title for the pull request; should be concise and descriptive
pub title: String,
/// Description of the changes in the pull request. Use markdown formatting.
/// Explain what changes were made and why.
pub description: String,
/// Repository to create the PR in. Use "self" for the pipeline's own repository,
/// or a repository alias from the checkout list for other repositories.
/// Required when multiple repositories are checked out.
#[serde(default)]
pub repository: Option<String>,
/// Labels to add to the PR for categorization.
/// These may be subject to an operator-configured allowlist.
#[serde(default)]
pub labels: Vec<String>,
}
impl Validate for CreatePrParams {
fn validate(&self) -> anyhow::Result<()> {
ensure!(
self.title.len() >= 5,
"PR title must be at least 5 characters"
);
ensure!(
self.title.len() <= 200,
"PR title must be at most 200 characters"
);
ensure!(
self.description.len() >= 10,
"PR description must be at least 10 characters"
);
if let Some(repository) = &self.repository {
reject_pipeline_injection(repository, "repository")?;
}
Ok(())
}
}
/// Internal params struct mirroring CreatePrResult fields for the tool_result! macro.
/// The actual MCP parameters come from CreatePrParams; this struct enables the macro's
/// TryFrom generation while CreatePrResult is constructed via CreatePrResult::new().
#[derive(Deserialize, JsonSchema)]
struct CreatePrResultFields {
title: String,
description: String,
source_branch: String,
patch_file: String,
repository: String,
#[serde(default)]
agent_labels: Vec<String>,
#[serde(skip_serializing_if = "Option::is_none")]
base_commit: Option<String>,
/// SHA-256 hex digest of the patch file, recorded at staging time.
patch_sha256: String,
}
impl Validate for CreatePrResultFields {}
tool_result! {
name = "create-pull-request",
write = true,
params = CreatePrResultFields,
/// Result of creating a pull request - stored as safe output
pub struct CreatePrResult {
/// Title for the pull request
title: String,
/// Description/body of the pull request (markdown)
description: String,
/// Source branch name (generated or provided)
source_branch: String,
/// Path to the patch file in the safe outputs directory
patch_file: String,
/// Repository alias ("self" or alias from checkout list)
repository: String,
/// Agent-provided labels (validated against allowed-labels at execution time)
#[serde(default)]
agent_labels: Vec<String>,
/// Base commit SHA recorded at patch generation time (merge-base of HEAD and
/// the upstream branch). When present, Stage 3 uses this as the parent commit
/// for the ADO Push API, ensuring the patch applies cleanly even if the target
/// branch has advanced since the agent ran. Falls back to resolving the live
/// target branch HEAD via the ADO refs API when absent (backward compatibility).
///
/// Note: this is the merge-base, not the target branch HEAD. The PR diff in ADO
/// compares file states and displays correctly regardless; however, the branch
/// history shows a parent older than current main. This is normal for topic
/// branches and is resolved when the PR is merged.
#[serde(skip_serializing_if = "Option::is_none")]
base_commit: Option<String>,
/// SHA-256 hex digest of the patch file recorded at Stage 1.
/// Stage 3 re-hashes the file and rejects mismatches — catches
/// patch file tampering between stages.
patch_sha256: String,
}
}
impl SanitizeContent for CreatePrResult {
fn sanitize_content_fields(&mut self) {
self.title = sanitize_text(&self.title);
self.description = sanitize_text(&self.description);
self.repository = sanitize_config(&self.repository);
for label in &mut self.agent_labels {
*label = sanitize_config(label);
}
}
}
impl CreatePrResult {
/// Create a new CreatePrResult with all fields
pub fn new(
title: String,
description: String,
source_branch: String,
patch_file: String,
repository: String,
agent_labels: Vec<String>,
base_commit: Option<String>,
patch_sha256: String,
) -> Self {
Self {
name: Self::NAME.to_string(),
title,
description,
source_branch,
patch_file,
repository,
agent_labels,
base_commit,
patch_sha256,
}
}
}
/// Behavior when the patch is empty or all files were excluded
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum IfNoChanges {
/// Succeed with a warning (default)
Warn,
/// Fail the pipeline step
Error,
/// Succeed silently
Ignore,
}
/// File protection policy controlling whether manifest/CI files can be modified
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum ProtectedFiles {
/// Block modifications to protected files (default)
Blocked,
/// Allow modifications to all files
Allowed,
}
/// Configuration for the create-pull-request tool (specified in front matter)
///
/// Example front matter:
/// ```yaml
/// safe-outputs:
/// create-pull-request:
/// target-branch: main
/// draft: true
/// auto-complete: true
/// delete-source-branch: true
/// squash-merge: true
/// title-prefix: "[Bot] "
/// if-no-changes: warn
/// max-files: 100
/// protected-files: blocked
/// excluded-files:
/// - "*.lock"
/// allowed-labels:
/// - "automated"
/// reviewers:
/// - "user@example.com"
/// labels:
/// - "automated"
/// - "agent-created"
/// ```
#[derive(Debug, Clone, SanitizeConfig, Serialize, Deserialize)]
pub struct CreatePrConfig {
/// Target branch to merge into (default: "main")
#[serde(default = "default_target_branch", rename = "target-branch")]
pub target_branch: String,
/// Whether to create the PR as a draft (default: true)
#[serde(default = "default_draft")]
pub draft: bool,
/// Whether to set auto-complete on the PR (default: false)
#[serde(default, rename = "auto-complete")]
pub auto_complete: bool,
/// Whether to delete source branch after merge (default: true)
#[serde(default = "default_true", rename = "delete-source-branch")]
pub delete_source_branch: bool,
/// Whether to squash commits on merge (default: true)
#[serde(default = "default_true", rename = "squash-merge")]
pub squash_merge: bool,
/// Prefix to prepend to all PR titles
#[serde(default, rename = "title-prefix")]
pub title_prefix: Option<String>,
/// Behavior when the patch is empty: "warn" (default), "error", "ignore"
#[serde(default = "default_if_no_changes", rename = "if-no-changes")]
pub if_no_changes: IfNoChanges,
/// Maximum number of files allowed in a single PR (default: 100)
#[serde(default = "default_max_files", rename = "max-files")]
pub max_files: usize,
/// File protection policy: "blocked" (default) or "allowed"
/// Controls whether manifest/CI files can be modified
#[serde(default = "default_protected_files", rename = "protected-files")]
pub protected_files: ProtectedFiles,
/// Glob patterns for files to exclude from the patch
#[serde(default, rename = "excluded-files")]
pub excluded_files: Vec<String>,
/// Allowlist of labels the agent is permitted to use.
/// If empty, any labels are accepted.
#[serde(default, rename = "allowed-labels")]
pub allowed_labels: Vec<String>,
/// Reviewers to add to the PR (email addresses or user IDs)
#[serde(default)]
pub reviewers: Vec<String>,
/// Labels to add to the PR
#[serde(default)]
pub labels: Vec<String>,
/// Work item IDs to link to the PR
#[serde(default, rename = "work-items")]
pub work_items: Vec<i32>,
/// Whether to record branch info in failure data when PR creation fails (default: true).
/// When enabled, the failure response includes the pushed branch name and target branch
/// so operators can manually create the PR. No work item is created automatically.
#[serde(default = "default_true", rename = "fallback-record-branch")]
pub fallback_record_branch: bool,
/// Whether to include agent execution stats in the PR description (default: true).
#[serde(default = "default_true", rename = "include-stats")]
pub include_stats: bool,
}
fn default_target_branch() -> String {
"main".to_string()
}
fn default_draft() -> bool {
true
}
fn default_true() -> bool {
true
}
fn default_if_no_changes() -> IfNoChanges {
IfNoChanges::Warn
}
fn default_max_files() -> usize {
DEFAULT_MAX_FILES
}
fn default_protected_files() -> ProtectedFiles {
ProtectedFiles::Blocked
}
impl Default for CreatePrConfig {
fn default() -> Self {
Self {
target_branch: default_target_branch(),
draft: true,
auto_complete: false,
delete_source_branch: true,
squash_merge: true,
title_prefix: None,
if_no_changes: default_if_no_changes(),
max_files: default_max_files(),
protected_files: default_protected_files(),
excluded_files: Vec::new(),
allowed_labels: Vec::new(),
reviewers: Vec::new(),
labels: Vec::new(),
work_items: Vec::new(),
fallback_record_branch: true,
include_stats: true,
}
}
}
/// Guard to ensure git worktree is cleaned up on drop
struct WorktreeGuard {
repo_dir: std::path::PathBuf,
worktree_path: std::path::PathBuf,
}
impl Drop for WorktreeGuard {
fn drop(&mut self) {
// Best effort cleanup - ignore errors
let _ = std::process::Command::new("git")
.args([
"worktree",
"remove",
"--force",
&self.worktree_path.to_string_lossy(),
])
.current_dir(&self.repo_dir)
.output();
}
}
#[async_trait::async_trait]
impl Executor for CreatePrResult {
fn dry_run_summary(&self) -> String {
format!("create PR: '{}' in repo '{}'", self.title, self.repository)
}
async fn execute_impl(&self, ctx: &ExecutionContext) -> anyhow::Result<ExecutionResult> {
info!(
"Creating PR: '{}' in repository '{}'",
self.title, self.repository
);
debug!(
"create-pull-request: title='{}', repo='{}', branch='{}', patch='{}'",
self.title, self.repository, self.source_branch, self.patch_file
);
debug!("PR description length: {} chars", self.description.len());
debug!("Source branch: {}", self.source_branch);
debug!("Patch file: {}", self.patch_file);
let config: CreatePrConfig = ctx.get_tool_config("create-pull-request");
debug!("Target branch from config: {}", config.target_branch);
debug!("Draft: {}", config.draft);
debug!("Auto-complete: {}", config.auto_complete);
debug!("Squash merge: {}", config.squash_merge);
if config.draft && config.auto_complete {
warn!(
"auto-complete cannot be set on a draft PR; set draft: false to enable auto-complete"
);
}
// Apply title prefix if configured
let effective_title = if let Some(ref prefix) = config.title_prefix {
format!("{}{}", prefix, self.title)
} else {
self.title.clone()
};
// ADO PR titles have a 400-character limit
let title_char_count = effective_title.chars().count();
if title_char_count > 400 {
return Ok(ExecutionResult::failure(format!(
"PR title too long after applying title-prefix ({} chars, max 400)",
title_char_count
)));
}
// Validate repository against allowed list
debug!(
"Validating repository '{}' against allowed list",
self.repository
);
let repo_id = if crate::safeoutputs::input_refers_to_self(&self.repository, ctx) {
// "self" or a name match against the pipeline's own repository
debug!("Using 'self' repository (matched '{}')", self.repository);
ctx.repository_id
.as_ref()
.or(ctx.repository_name.as_ref())
.context("Repository ID not configured for 'self'")?
.clone()
} else if let Some(ado_repo_name) =
crate::safeoutputs::lookup_allowed_repository(&self.repository, &ctx.allowed_repositories)
{
// Matched against allowed list (by alias, full value, or trailing name)
debug!(
"Repository '{}' resolved to '{}'",
self.repository, ado_repo_name
);
ado_repo_name.clone()
} else if ctx.allowed_repositories.is_empty() {
// No allowed_repositories configured - fall back to default repo (backward compat)
debug!("No allowed_repositories configured, using default repo");
ctx.repository_id
.as_ref()
.or(ctx.repository_name.as_ref())
.context("Repository ID not configured")?
.clone()
} else {
// Repository not in allowed list
warn!(
"Repository '{}' not in allowed list: {:?}",
self.repository,
ctx.allowed_repositories.keys().collect::<Vec<_>>()
);
return Ok(ExecutionResult::failure(format!(
"Repository '{}' is not in the allowed list. Allowed: self, {}",
self.repository,
ctx.allowed_repositories
.keys()
.cloned()
.collect::<Vec<_>>()
.join(", ")
)));
};
debug!("Resolved repository ID: {}", repo_id);
// Get ADO configuration
let org_url = ctx
.ado_org_url
.as_ref()
.context("Azure DevOps organization URL not configured")?;
let organization = ctx
.ado_organization
.as_ref()
.context("Azure DevOps organization name not configured")?;
let project = ctx
.ado_project
.as_ref()
.context("Azure DevOps project not configured")?;
let token = ctx
.access_token
.as_ref()
.context("Access token not configured")?;
debug!(
"ADO org: {}, organization: {}, project: {}",
org_url, organization, project
);
// Validate and read the patch file
let patch_path = ctx.working_directory.join(&self.patch_file);
if !patch_path.exists() {
return Ok(ExecutionResult::failure(format!(
"Patch file not found: {}",
self.patch_file
)));
}
// Security: Enforce patch file size limit
let metadata = tokio::fs::metadata(&patch_path)
.await
.context("Failed to get patch file metadata")?;
if metadata.len() > MAX_PATCH_SIZE_BYTES {
return Ok(ExecutionResult::failure(format!(
"Patch file exceeds maximum size of {} bytes (got {} bytes)",
MAX_PATCH_SIZE_BYTES,
metadata.len()
)));
}
// Read patch content for validation
debug!("Reading patch file content");
let patch_content = tokio::fs::read_to_string(&patch_path)
.await
.context("Failed to read patch file")?;
debug!("Patch content size: {} bytes", patch_content.len());
// SHA-256 integrity check: verify the patch file hasn't been tampered
// with between Stage 1 and Stage 3.
let live_hash =
crate::hash::sha256_hex(patch_content.as_bytes());
if live_hash != self.patch_sha256 {
return Ok(ExecutionResult::failure(format!(
"Patch file SHA-256 mismatch: expected {}, got {} — \
the file may have been tampered with between stages",
self.patch_sha256, live_hash
)));
}
debug!("Patch file SHA-256 verified: {}", live_hash);
// Excluded files are handled via --exclude flags on git am / git apply,
// which filters them at the git level rather than post-processing patch content.
// This is the same approach used by gh-aw (via :(exclude) pathspecs).
// Note: Exclusion happens during patch application (before the protection check).
// If a protected file matches an excluded-files pattern, it is silently dropped
// from the patch rather than triggering a protection error.
let exclude_args: Vec<String> = config
.excluded_files
.iter()
.map(|p| format!("--exclude={}", p))
.collect();
if !exclude_args.is_empty() {
debug!(
"Will apply {} excluded-files patterns via --exclude flags",
exclude_args.len()
);
}
// Security: Validate patch paths before applying
debug!("Validating patch paths for security");
if let Err(e) = validate_patch_paths(&patch_content) {
warn!("Patch path validation failed: {}", e);
return Ok(ExecutionResult::failure(format!(
"Patch validation failed: {}",
e
)));
}
debug!("Patch path validation passed");
// Extract file paths from patch for validation.
// Filter out excluded files before the protection check — if a protected file
// matches an excluded-files pattern, it will be excluded from the patch by
// git am/apply --exclude and should not trigger a protection error.
let patch_paths: Vec<String> = extract_paths_from_patch(&patch_content)
.into_iter()
.filter(|p| {
!config.excluded_files.iter().any(|pat| glob_match_simple(pat, p))
})
.collect();
// Security: File protection check
if config.protected_files != ProtectedFiles::Allowed {
let protected = find_protected_files(&patch_paths);
if !protected.is_empty() {
warn!(
"Patch modifies {} protected file(s): {:?}",
protected.len(),
protected
);
return Ok(ExecutionResult::failure(format!(
"Patch modifies protected files (set protected-files: allowed to override): {}",
protected.join(", ")
)));
}
}
// Security: Max files per PR check (count diff blocks, not paths, to avoid
// double-counting renames which appear in both --- and +++ lines)
let file_count = count_patch_files(&patch_content);
if file_count > config.max_files {
warn!(
"Patch contains {} files, exceeding max of {}",
file_count,
config.max_files
);
return Ok(ExecutionResult::failure(format!(
"Patch contains {} files, exceeding maximum of {} files per PR",
file_count,
config.max_files
)));
}
// Use target branch from config
let target_branch = &config.target_branch;
let mut source_branch = self.source_branch.clone();
let mut source_ref = format!("refs/heads/{}", source_branch);
let target_ref = format!("refs/heads/{}", target_branch);
debug!("Source ref: {}, Target ref: {}", source_ref, target_ref);
// Determine the git repository directory from the source checkout
// For "self", use the source directory; for other repos, use the subdirectory
let repo_git_dir = if self.repository == "self" {
ctx.source_directory.clone()
} else {
ctx.source_directory.join(&self.repository)
};
debug!("Git repository directory: {}", repo_git_dir.display());
// Verify this is a git repository
debug!("Verifying git repository");
let git_check = Command::new("git")
.args(["rev-parse", "--git-dir"])
.current_dir(&repo_git_dir)
.output()
.await
.context("Failed to verify git repository")?;
if !git_check.status.success() {
warn!("Not a git repository: {}", repo_git_dir.display());
return Ok(ExecutionResult::failure(format!(
"Not a git repository: {}",
repo_git_dir.display()
)));
}
debug!("Git repository verified");
// Create a temporary directory for the worktree
let temp_dir = tempfile::tempdir().context("Failed to create temp directory")?;
let worktree_path = temp_dir.path().join("worktree");
debug!("Creating worktree at: {}", worktree_path.display());
// Create a worktree at the target branch
let worktree_output = Command::new("git")
.args([
"worktree",
"add",
&worktree_path.to_string_lossy(),
&format!("origin/{}", target_branch),
])
.current_dir(&repo_git_dir)
.output()
.await
.context("Failed to create git worktree")?;
if !worktree_output.status.success() {
debug!(
"Worktree creation with origin/ prefix failed, trying without: {}",
String::from_utf8_lossy(&worktree_output.stderr)
);
// Try with just the branch name if origin/ prefix fails
let worktree_output = Command::new("git")
.args([
"worktree",
"add",
&worktree_path.to_string_lossy(),
target_branch,
])
.current_dir(&repo_git_dir)
.output()
.await
.context("Failed to create git worktree")?;
if !worktree_output.status.success() {
warn!(
"Failed to create worktree: {}",
String::from_utf8_lossy(&worktree_output.stderr)
);
return Ok(ExecutionResult::failure(format!(
"Failed to create worktree: {}",
String::from_utf8_lossy(&worktree_output.stderr)
)));
}
}
debug!("Worktree created successfully");
// Ensure worktree cleanup on exit
let _worktree_guard = WorktreeGuard {
repo_dir: repo_git_dir.clone(),
worktree_path: worktree_path.clone(),
};
// Create and checkout a local branch in the worktree for patch application.
// Note: this local branch name may differ from the final remote branch name
// if a collision is detected later — the ADO push is REST-only, so the local
// branch name is not used for the remote ref.
debug!("Creating source branch: {}", source_branch);
let checkout_output = Command::new("git")
.args(["checkout", "-b", &source_branch])
.current_dir(&worktree_path)
.output()
.await
.context("Failed to create source branch")?;
if !checkout_output.status.success() {
warn!(
"Failed to create source branch: {}",
String::from_utf8_lossy(&checkout_output.stderr)
);
return Ok(ExecutionResult::failure(format!(
"Failed to create source branch: {}",
String::from_utf8_lossy(&checkout_output.stderr)
)));
}
debug!("Source branch created");
// Record the worktree HEAD before applying the patch so we can diff against
// it later. For multi-commit patches, git am creates N commits and diff-tree HEAD
// alone only shows the last commit's changes — we need base_sha..HEAD.
let base_sha_output = Command::new("git")
.args(["rev-parse", "HEAD"])
.current_dir(&worktree_path)
.output()
.await
.context("Failed to get worktree HEAD SHA")?;
let base_sha = String::from_utf8_lossy(&base_sha_output.stdout).trim().to_string();
debug!("Worktree base SHA before patch: {}", base_sha);
// Apply the patch. Strategy depends on whether excluded-files are configured:
// - Without exclusions: prefer git am --3way (preserves commit metadata)
// with git apply --3way as fallback
// - With exclusions: use git apply --3way directly (git am does not support
// --exclude flags; git apply does)
let patch_committed = match apply_patch_to_worktree(&worktree_path, &patch_path, &exclude_args).await? {
Ok(committed) => committed,
Err(result) => return Ok(result),
};
// Collect changed files. The method depends on how the patch was applied:
// - git am: changes are committed → use git diff-tree to compare base_sha..HEAD
// (covers all commits in multi-commit patches, not just the last one)
// - git apply: changes are in working tree → use git status --porcelain
debug!("Getting list of changed files");
let (status_str, use_diff_tree) = if patch_committed {
let diff_tree_output = Command::new("git")
.args(["diff-tree", "-r", "--name-status", &base_sha, "HEAD"])
.current_dir(&worktree_path)
.output()
.await
.context("Failed to run git diff-tree")?;
if !diff_tree_output.status.success() {
warn!(
"Failed to get diff-tree: {}",
String::from_utf8_lossy(&diff_tree_output.stderr)
);
return Ok(ExecutionResult::failure(format!(
"Failed to get diff-tree: {}",
String::from_utf8_lossy(&diff_tree_output.stderr)
)));
}
(String::from_utf8_lossy(&diff_tree_output.stdout).to_string(), true)
} else {
let status_output = Command::new("git")
.args(["status", "--porcelain"])
.current_dir(&worktree_path)
.output()
.await
.context("Failed to run git status")?;
if !status_output.status.success() {
warn!(
"Failed to get git status: {}",
String::from_utf8_lossy(&status_output.stderr)
);
return Ok(ExecutionResult::failure(format!(
"Failed to get git status: {}",
String::from_utf8_lossy(&status_output.stderr)
)));
}
(String::from_utf8_lossy(&status_output.stdout).to_string(), false)
};
debug!("Change detection output:\n{}", status_str);
let changes = if use_diff_tree {
collect_changes_from_diff_tree(&worktree_path, &status_str).await?
} else {
collect_changes_from_worktree(&worktree_path, &status_str).await?
};
debug!("Collected {} file changes for push", changes.len());
if changes.is_empty() {
// Handle no-changes based on config
match config.if_no_changes {
IfNoChanges::Error => {
warn!("No changes detected after applying patch (if-no-changes: error)");
return Ok(ExecutionResult::failure(
"No changes detected after applying patch".to_string(),
));
}
IfNoChanges::Ignore => {
info!("No changes detected after applying patch (if-no-changes: ignore)");
return Ok(ExecutionResult::success(
"No changes detected — nothing to do".to_string(),
));
}
IfNoChanges::Warn => {
warn!("No changes detected after applying patch (if-no-changes: warn)");
return Ok(ExecutionResult::warning(
"No changes detected after applying patch".to_string(),
));
}
}
}
// Use ADO REST API to create branch and push changes
let client = reqwest::Client::new();
// Get the target branch ref to find the base commit
debug!("Getting target branch ref from ADO");
let refs_url = format!(
"{}{}/_apis/git/repositories/{}/refs?filter=heads/{}&api-version=7.1",
org_url, project, repo_id, target_branch
);
debug!("Refs URL: {}", refs_url);
// Resolve the base commit for the push.