Skip to content

Commit 72e97c7

Browse files
jamesadevineCopilot
andcommitted
fix(compile): address PR review feedback on migration framework
Four findings from the Rust PR Reviewer bot: 1. check_pipeline's bail message had a redundant "error:" prefix that produced "Error: error: ..." once anyhow's top-level handler added its own "Error:" wrapper. Drop the prefix and reformat the hint onto an indented continuation line. 2. compile_pipeline_inner did a redundant serde_yaml::from_value round-trip just to satisfy ParsedSource.front_matter, even though perform_source_rewrite_if_needed never reads that field. Refactor the helper to take the four primitive fields it actually uses (mapping, body_raw, source_sha256, migrations) instead of the full struct. reconstruct_source likewise takes individual fields now. 3. Misleading comment in tests/migration_tests.rs claimed a "thread-local counter" was used; only a nanosecond timestamp was. Replaced rand_suffix() with timestamp + AtomicU64 monotonic seq so parallel tests scheduled in the same nanosecond always get distinct dirs. 4. Leading whitespace before the opening `---` was silently stripped on migration rewrite. parse_markdown_detailed already tolerates it on read; capture it as a new ParsedSource.leading_whitespace field and emit it from reconstruct_source so byte-faithful preservation extends to whitespace prefixes (BOM-stripped editor blank lines, etc.). All 1369 tests still pass. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent a90f543 commit 72e97c7

4 files changed

Lines changed: 102 additions & 41 deletions

File tree

src/compile/common.rs

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,10 @@ pub struct ParsedSource {
102102
/// The migrated front-matter mapping, with `schema-version`
103103
/// bumped. Used to reconstruct the source for rewrite.
104104
pub front_matter_mapping: serde_yaml::Mapping,
105+
/// Whitespace bytes that appeared before the opening `---` fence,
106+
/// preserved verbatim so that source rewrite is byte-faithful.
107+
/// Empty in the typical case where the file starts with `---`.
108+
pub leading_whitespace: String,
105109
/// The body region exactly as it appeared after the closing `---`,
106110
/// byte-for-byte (no trim). Includes any leading newline.
107111
pub body_raw: String,
@@ -138,8 +142,12 @@ pub(crate) fn parse_markdown_detailed_with_registry(
138142

139143
// Allow leading whitespace before the opening fence (preserves
140144
// historical leniency). We compute a byte offset into `content` so
141-
// that `body_raw` extraction is purely byte-faithful.
145+
// that `body_raw` extraction is purely byte-faithful, and we keep
146+
// the whitespace prefix around so that source rewrites preserve
147+
// anything the user (or their editor) put before the opening
148+
// fence.
142149
let leading_ws = content.bytes().take_while(|b| b.is_ascii_whitespace()).count();
150+
let leading_whitespace = content[..leading_ws].to_string();
143151
let after_lead = &content[leading_ws..];
144152
if !after_lead.starts_with("---") {
145153
anyhow::bail!("Markdown file must start with YAML front matter (---)");
@@ -184,23 +192,37 @@ pub(crate) fn parse_markdown_detailed_with_registry(
184192
markdown_body,
185193
migrations: report,
186194
front_matter_mapping: mapping,
195+
leading_whitespace,
187196
body_raw,
188197
source_sha256,
189198
})
190199
}
191200

192-
/// Reconstruct full source content from a migrated [`ParsedSource`].
201+
/// Reconstruct full source content from migration outputs.
202+
///
203+
/// Takes the individual fragments rather than the full
204+
/// [`ParsedSource`] so callers that have already destructured the
205+
/// parse don't have to round-trip a fresh `front_matter` through
206+
/// serde just to satisfy the typed field.
193207
///
194208
/// Output shape:
209+
/// - `leading_whitespace` (typically empty)
195210
/// - `---\n`
196211
/// - the migrated YAML mapping (`serde_yaml::to_string` always ends
197212
/// with `\n`)
198213
/// - `---`
199214
/// - the original body region byte-for-byte (`body_raw`)
200-
pub fn reconstruct_source(parsed: &ParsedSource) -> Result<String> {
201-
let yaml_serialized = serde_yaml::to_string(&parsed.front_matter_mapping)
215+
pub fn reconstruct_source(
216+
leading_whitespace: &str,
217+
front_matter_mapping: &serde_yaml::Mapping,
218+
body_raw: &str,
219+
) -> Result<String> {
220+
let yaml_serialized = serde_yaml::to_string(front_matter_mapping)
202221
.context("Failed to serialize migrated front matter")?;
203-
Ok(format!("---\n{}---{}", yaml_serialized, parsed.body_raw))
222+
Ok(format!(
223+
"{}---\n{}---{}",
224+
leading_whitespace, yaml_serialized, body_raw
225+
))
204226
}
205227

206228
fn yaml_value_kind(v: &serde_yaml::Value) -> &'static str {
@@ -2539,13 +2561,22 @@ mod tests {
25392561

25402562
// ─── parse_markdown_detailed ──────────────────────────────────────────────
25412563

2564+
fn reconstruct(parsed: &ParsedSource) -> String {
2565+
reconstruct_source(
2566+
&parsed.leading_whitespace,
2567+
&parsed.front_matter_mapping,
2568+
&parsed.body_raw,
2569+
)
2570+
.unwrap()
2571+
}
2572+
25422573
#[test]
25432574
fn parse_markdown_detailed_preserves_body_byte_for_byte() {
25442575
// Case 1: trailing newline.
25452576
let original = "---\nname: x\ndescription: y\n---\nbody line\n";
25462577
let parsed = parse_markdown_detailed(original).unwrap();
25472578
assert_eq!(parsed.body_raw, "\nbody line\n");
2548-
let reconstructed = reconstruct_source(&parsed).unwrap();
2579+
let reconstructed = reconstruct(&parsed);
25492580
// No migrations ran, so the YAML round-trip is the only
25502581
// structural change. The body region is byte-faithful.
25512582
assert!(reconstructed.ends_with("---\nbody line\n"));
@@ -2554,14 +2585,14 @@ mod tests {
25542585
let original = "---\nname: x\ndescription: y\n---\nbody";
25552586
let parsed = parse_markdown_detailed(original).unwrap();
25562587
assert_eq!(parsed.body_raw, "\nbody");
2557-
let reconstructed = reconstruct_source(&parsed).unwrap();
2588+
let reconstructed = reconstruct(&parsed);
25582589
assert!(reconstructed.ends_with("---\nbody"));
25592590

25602591
// Case 3: empty body, but trailing newline.
25612592
let original = "---\nname: x\ndescription: y\n---\n";
25622593
let parsed = parse_markdown_detailed(original).unwrap();
25632594
assert_eq!(parsed.body_raw, "\n");
2564-
let reconstructed = reconstruct_source(&parsed).unwrap();
2595+
let reconstructed = reconstruct(&parsed);
25652596
assert!(reconstructed.ends_with("---\n"));
25662597

25672598
// Case 4: blank lines between closing fence and content are
@@ -2580,13 +2611,30 @@ mod tests {
25802611
let original = "---\nname: x\ndescription: y\n---\n## body\n";
25812612
let parsed = parse_markdown_detailed(original).unwrap();
25822613
assert!(!parsed.migrations.changed());
2583-
let reconstructed = reconstruct_source(&parsed).unwrap();
2614+
let reconstructed = reconstruct(&parsed);
25842615
// Find the closing fence in both and compare the suffix.
25852616
let orig_suffix = &original[original.find("\n---\n").unwrap()..];
25862617
let recon_suffix = &reconstructed[reconstructed.find("\n---\n").unwrap()..];
25872618
assert_eq!(orig_suffix, recon_suffix, "body region must be byte-identical");
25882619
}
25892620

2621+
#[test]
2622+
fn parse_markdown_detailed_preserves_leading_whitespace() {
2623+
// Leading blank lines / spaces (e.g. from editor BOM-strippers
2624+
// adding a blank line) must round-trip through reconstruction
2625+
// so migration rewrites don't silently normalize them away.
2626+
let original = "\n \n---\nname: x\ndescription: y\n---\nbody\n";
2627+
let parsed = parse_markdown_detailed(original).unwrap();
2628+
assert_eq!(parsed.leading_whitespace, "\n \n");
2629+
let reconstructed = reconstruct(&parsed);
2630+
assert!(
2631+
reconstructed.starts_with("\n \n---\n"),
2632+
"expected leading whitespace preserved, got: {:?}",
2633+
&reconstructed[..20.min(reconstructed.len())]
2634+
);
2635+
assert!(reconstructed.ends_with("---\nbody\n"));
2636+
}
2637+
25902638
#[test]
25912639
fn parse_markdown_detailed_allows_leading_whitespace() {
25922640
let original = "\n \n---\nname: x\ndescription: y\n---\nbody\n";

src/compile/migration_integration_test.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,16 @@ async fn perform_source_rewrite_lost_update_guard() {
194194
std::fs::write(&source_path, "different bytes\n").unwrap();
195195

196196
// Rewrite must refuse.
197-
let result =
198-
perform_source_rewrite_if_needed(&source_path, &original, &parsed).await;
197+
let result = perform_source_rewrite_if_needed(
198+
&source_path,
199+
&original,
200+
&parsed.leading_whitespace,
201+
&parsed.front_matter_mapping,
202+
&parsed.body_raw,
203+
&parsed.source_sha256,
204+
&parsed.migrations,
205+
)
206+
.await;
199207
let err = result.expect_err("expected lost-update guard to fire");
200208
assert!(
201209
format!("{:#}", err).contains("changed during compilation"),

src/compile/mod.rs

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,11 @@ async fn compile_pipeline_inner(
127127

128128
let parsed = common::parse_markdown_detailed_with_registry(&content, registry)?;
129129
let mut front_matter = parsed.front_matter;
130-
let markdown_body = parsed.markdown_body.clone();
131-
let migrations = parsed.migrations.clone();
132-
let front_matter_mapping = parsed.front_matter_mapping.clone();
133-
let body_raw = parsed.body_raw.clone();
130+
let markdown_body = parsed.markdown_body;
131+
let migrations = parsed.migrations;
132+
let front_matter_mapping = parsed.front_matter_mapping;
133+
let leading_whitespace = parsed.leading_whitespace;
134+
let body_raw = parsed.body_raw;
134135
let source_sha256 = parsed.source_sha256;
135136

136137
// Sanitize all front matter text fields before any further processing.
@@ -205,21 +206,16 @@ async fn compile_pipeline_inner(
205206
// we abort before the lock file ever gets touched.
206207
let mut migrated = false;
207208
if migrations.changed() {
208-
// Rebuild the parsed handle (the typed front_matter has been
209-
// sanitized and consumed; rewrite needs the unmodified mapping).
210-
let parsed_for_rewrite = common::ParsedSource {
211-
front_matter: serde_yaml::from_value(serde_yaml::Value::Mapping(
212-
front_matter_mapping.clone(),
213-
))
214-
.context("Failed to round-trip migrated front matter for rewrite")?,
215-
markdown_body: markdown_body.clone(),
216-
migrations: migrations.clone(),
217-
front_matter_mapping: front_matter_mapping.clone(),
218-
body_raw: body_raw.clone(),
219-
source_sha256,
220-
};
221-
migrated = perform_source_rewrite_if_needed(input_path, &content, &parsed_for_rewrite)
222-
.await?;
209+
migrated = perform_source_rewrite_if_needed(
210+
input_path,
211+
&content,
212+
&leading_whitespace,
213+
&front_matter_mapping,
214+
&body_raw,
215+
&source_sha256,
216+
&migrations,
217+
)
218+
.await?;
223219
}
224220

225221
// Write output via atomic_write so a crash mid-write cannot leave a
@@ -263,9 +259,14 @@ async fn compile_pipeline_inner(
263259
async fn perform_source_rewrite_if_needed(
264260
input_path: &Path,
265261
original_content: &str,
266-
parsed: &common::ParsedSource,
262+
leading_whitespace: &str,
263+
front_matter_mapping: &serde_yaml::Mapping,
264+
body_raw: &str,
265+
source_sha256: &[u8; 32],
266+
migrations: &migrations::MigrationReport,
267267
) -> Result<bool> {
268-
let new_content = common::reconstruct_source(parsed)?;
268+
let new_content =
269+
common::reconstruct_source(leading_whitespace, front_matter_mapping, body_raw)?;
269270
if new_content == original_content {
270271
// Migrations ran but their net effect is a no-op on disk —
271272
// skip the rewrite to avoid gratuitous diffs.
@@ -285,7 +286,7 @@ async fn perform_source_rewrite_if_needed(
285286
let mut hasher = sha2::Sha256::new();
286287
hasher.update(&current_bytes);
287288
let current_hash: [u8; 32] = hasher.finalize().into();
288-
if current_hash != parsed.source_sha256 {
289+
if &current_hash != source_sha256 {
289290
anyhow::bail!(
290291
"source file {} changed during compilation; refusing to migrate. Re-run `ado-aw compile`.",
291292
input_path.display()
@@ -304,10 +305,10 @@ async fn perform_source_rewrite_if_needed(
304305
eprintln!(
305306
"warning: migrated front matter in {}: schema-version {} -> {}",
306307
input_path.display(),
307-
parsed.migrations.from_version,
308-
parsed.migrations.to_version
308+
migrations.from_version,
309+
migrations.to_version
309310
);
310-
for applied in &parsed.migrations.applied {
311+
for applied in &migrations.applied {
311312
eprintln!(" - {}: {}", applied.id, applied.summary);
312313
}
313314
eprintln!(
@@ -519,7 +520,7 @@ pub async fn check_pipeline(pipeline_path: &str) -> Result<()> {
519520
// we want to fail loudly so `ado-aw compile` is run.
520521
if parsed.migrations.changed() {
521522
anyhow::bail!(
522-
"error: {} is at schema-version {}; this compiler requires schema-version {}.\n\
523+
"{} is at schema-version {}; this compiler requires schema-version {}.\n \
523524
hint: run `ado-aw compile {}` to migrate the source in place.",
524525
source_path.display(),
525526
parsed.migrations.from_version,

tests/migration_tests.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@ fn fresh_temp_dir(label: &str) -> PathBuf {
2727
"ado-aw-migration-tests-{}-{}-{}",
2828
label,
2929
std::process::id(),
30-
// Add a thread-local counter to avoid intra-process collisions
31-
// when multiple tests run in parallel.
3230
rand_suffix(),
3331
));
3432
fs::create_dir_all(&dir).expect("create temp dir");
@@ -45,14 +43,20 @@ fn fresh_git_temp_dir(label: &str) -> PathBuf {
4543
dir
4644
}
4745

48-
/// Lightweight randomness for test temp dir uniqueness — no crate dep.
46+
/// Suffix that's unique within the process for the lifetime of a
47+
/// single test binary run. Uses a wall-clock nanosecond timestamp
48+
/// combined with a monotonic atomic counter so two parallel tests
49+
/// scheduled in the same nanosecond still get distinct directories.
4950
fn rand_suffix() -> String {
51+
use std::sync::atomic::{AtomicU64, Ordering};
5052
use std::time::{SystemTime, UNIX_EPOCH};
53+
static SEQ: AtomicU64 = AtomicU64::new(0);
5154
let nanos = SystemTime::now()
5255
.duration_since(UNIX_EPOCH)
5356
.map(|d| d.as_nanos())
5457
.unwrap_or(0);
55-
format!("{:x}", nanos)
58+
let seq = SEQ.fetch_add(1, Ordering::Relaxed);
59+
format!("{:x}-{:x}", nanos, seq)
5660
}
5761

5862
fn ado_aw_binary() -> PathBuf {

0 commit comments

Comments
 (0)