Skip to content

Commit f0c7c13

Browse files
authored
Merge pull request #284 from egohygiene/copilot/implement-file-based-caching
Extend cache hashing to cover config file and template file contents
2 parents 399e93e + f0c28e3 commit f0c7c13

2 files changed

Lines changed: 157 additions & 43 deletions

File tree

src/cache.rs

Lines changed: 73 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,26 @@ impl OutputCache {
3838
/// * transformed document content (post-transform pipeline)
3939
/// * output type string (e.g. `"html"`, `"pdf"`, `"docx"`)
4040
/// * optional template name (empty string when absent)
41+
/// * optional template file content (empty string when absent or unreadable)
4142
///
4243
/// A change to any of these fields produces a different hash, causing a cache
43-
/// miss and triggering a fresh pandoc run.
44-
pub fn compute_output_hash(transformed_content: &str, output_type: &str, template: Option<&str>) -> String {
44+
/// miss and triggering a fresh pandoc run. Including the template file content
45+
/// (not just its name) means that editing a template file invalidates cached
46+
/// render outputs even when the template path has not changed.
47+
pub fn compute_output_hash(
48+
transformed_content: &str,
49+
output_type: &str,
50+
template: Option<&str>,
51+
template_content: Option<&str>,
52+
) -> String {
4553
let mut hasher = Sha256::new();
4654
hasher.update(transformed_content.as_bytes());
4755
hasher.update(b"\x00output-type\x00");
4856
hasher.update(output_type.as_bytes());
4957
hasher.update(b"\x00template\x00");
5058
hasher.update(template.unwrap_or("").as_bytes());
59+
hasher.update(b"\x00template-content\x00");
60+
hasher.update(template_content.unwrap_or("").as_bytes());
5161
format!("{:x}", hasher.finalize())
5262
}
5363

@@ -116,13 +126,22 @@ impl TransformCache {
116126
///
117127
/// The hash covers:
118128
/// * normalized input file content
129+
/// * config file content (raw YAML bytes)
119130
/// * variables (sorted for determinism, regardless of map insertion order)
120131
///
121132
/// This hash uniquely identifies a combination of inputs so that a cache hit
122-
/// guarantees the transform pipeline would produce the same output.
123-
pub fn compute_input_hash(content: &str, variables: &HashMap<String, String>) -> String {
133+
/// guarantees the transform pipeline would produce the same output. Including
134+
/// the raw config file content means that any change to the config (not only
135+
/// to `variables`) invalidates the transform cache.
136+
pub fn compute_input_hash(
137+
content: &str,
138+
config_content: &str,
139+
variables: &HashMap<String, String>,
140+
) -> String {
124141
let mut hasher = Sha256::new();
125142
hasher.update(content.as_bytes());
143+
hasher.update(b"\x00config\x00");
144+
hasher.update(config_content.as_bytes());
126145

127146
// Sort entries so the hash is stable regardless of HashMap iteration order.
128147
let mut sorted: Vec<(&String, &String)> = variables.iter().collect();
@@ -324,46 +343,60 @@ mod tests {
324343

325344
#[test]
326345
fn test_same_inputs_produce_same_hash() {
327-
let h1 = compute_input_hash("hello world", &vars(&[("key", "val")]));
328-
let h2 = compute_input_hash("hello world", &vars(&[("key", "val")]));
346+
let h1 = compute_input_hash("hello world", "", &vars(&[("key", "val")]));
347+
let h2 = compute_input_hash("hello world", "", &vars(&[("key", "val")]));
329348
assert_eq!(h1, h2);
330349
}
331350

332351
#[test]
333352
fn test_different_content_produces_different_hash() {
334-
let h1 = compute_input_hash("content A", &vars(&[]));
335-
let h2 = compute_input_hash("content B", &vars(&[]));
353+
let h1 = compute_input_hash("content A", "", &vars(&[]));
354+
let h2 = compute_input_hash("content B", "", &vars(&[]));
336355
assert_ne!(h1, h2);
337356
}
338357

339358
#[test]
340359
fn test_different_variables_produce_different_hash() {
341-
let h1 = compute_input_hash("same content", &vars(&[("k", "v1")]));
342-
let h2 = compute_input_hash("same content", &vars(&[("k", "v2")]));
360+
let h1 = compute_input_hash("same content", "", &vars(&[("k", "v1")]));
361+
let h2 = compute_input_hash("same content", "", &vars(&[("k", "v2")]));
343362
assert_ne!(h1, h2);
344363
}
345364

346365
#[test]
347366
fn test_variable_order_does_not_affect_hash() {
348-
let h1 = compute_input_hash("content", &vars(&[("a", "1"), ("b", "2")]));
349-
let h2 = compute_input_hash("content", &vars(&[("b", "2"), ("a", "1")]));
367+
let h1 = compute_input_hash("content", "", &vars(&[("a", "1"), ("b", "2")]));
368+
let h2 = compute_input_hash("content", "", &vars(&[("b", "2"), ("a", "1")]));
350369
assert_eq!(h1, h2);
351370
}
352371

353372
#[test]
354373
fn test_empty_variables_produces_stable_hash() {
355-
let h1 = compute_input_hash("content", &vars(&[]));
356-
let h2 = compute_input_hash("content", &vars(&[]));
374+
let h1 = compute_input_hash("content", "", &vars(&[]));
375+
let h2 = compute_input_hash("content", "", &vars(&[]));
357376
assert_eq!(h1, h2);
358377
}
359378

360379
#[test]
361380
fn test_hash_is_hex_string() {
362-
let h = compute_input_hash("test", &vars(&[]));
381+
let h = compute_input_hash("test", "", &vars(&[]));
363382
assert!(h.chars().all(|c| c.is_ascii_hexdigit()), "hash must be hex: {h}");
364383
assert_eq!(h.len(), 64, "SHA-256 hex must be 64 chars");
365384
}
366385

386+
#[test]
387+
fn test_different_config_content_produces_different_hash() {
388+
let h1 = compute_input_hash("same content", "config: a", &vars(&[]));
389+
let h2 = compute_input_hash("same content", "config: b", &vars(&[]));
390+
assert_ne!(h1, h2);
391+
}
392+
393+
#[test]
394+
fn test_empty_config_content_differs_from_nonempty() {
395+
let h1 = compute_input_hash("content", "", &vars(&[]));
396+
let h2 = compute_input_hash("content", "outputs:\n - type: html\n", &vars(&[]));
397+
assert_ne!(h1, h2);
398+
}
399+
367400
// ── TransformCache ───────────────────────────────────────────────────────
368401

369402
#[test]
@@ -438,46 +471,60 @@ mod tests {
438471

439472
#[test]
440473
fn test_output_hash_same_inputs_stable() {
441-
let h1 = compute_output_hash("content", "html", None);
442-
let h2 = compute_output_hash("content", "html", None);
474+
let h1 = compute_output_hash("content", "html", None, None);
475+
let h2 = compute_output_hash("content", "html", None, None);
443476
assert_eq!(h1, h2);
444477
}
445478

446479
#[test]
447480
fn test_output_hash_different_output_type_differs() {
448-
let h1 = compute_output_hash("content", "html", None);
449-
let h2 = compute_output_hash("content", "pdf", None);
481+
let h1 = compute_output_hash("content", "html", None, None);
482+
let h2 = compute_output_hash("content", "pdf", None, None);
450483
assert_ne!(h1, h2);
451484
}
452485

453486
#[test]
454487
fn test_output_hash_different_template_differs() {
455-
let h1 = compute_output_hash("content", "html", Some("a.html"));
456-
let h2 = compute_output_hash("content", "html", Some("b.html"));
488+
let h1 = compute_output_hash("content", "html", Some("a.html"), None);
489+
let h2 = compute_output_hash("content", "html", Some("b.html"), None);
457490
assert_ne!(h1, h2);
458491
}
459492

460493
#[test]
461494
fn test_output_hash_no_template_differs_from_with_template() {
462-
let h1 = compute_output_hash("content", "html", None);
463-
let h2 = compute_output_hash("content", "html", Some("tmpl.html"));
495+
let h1 = compute_output_hash("content", "html", None, None);
496+
let h2 = compute_output_hash("content", "html", Some("tmpl.html"), None);
464497
assert_ne!(h1, h2);
465498
}
466499

467500
#[test]
468501
fn test_output_hash_different_content_differs() {
469-
let h1 = compute_output_hash("content A", "html", None);
470-
let h2 = compute_output_hash("content B", "html", None);
502+
let h1 = compute_output_hash("content A", "html", None, None);
503+
let h2 = compute_output_hash("content B", "html", None, None);
471504
assert_ne!(h1, h2);
472505
}
473506

474507
#[test]
475508
fn test_output_hash_is_hex_string() {
476-
let h = compute_output_hash("content", "html", None);
509+
let h = compute_output_hash("content", "html", None, None);
477510
assert!(h.chars().all(|c| c.is_ascii_hexdigit()), "hash must be hex: {h}");
478511
assert_eq!(h.len(), 64, "SHA-256 hex must be 64 chars");
479512
}
480513

514+
#[test]
515+
fn test_output_hash_different_template_content_differs() {
516+
let h1 = compute_output_hash("content", "html", Some("t.html"), Some("<html>v1</html>"));
517+
let h2 = compute_output_hash("content", "html", Some("t.html"), Some("<html>v2</html>"));
518+
assert_ne!(h1, h2);
519+
}
520+
521+
#[test]
522+
fn test_output_hash_no_template_content_differs_from_some() {
523+
let h1 = compute_output_hash("content", "html", Some("t.html"), None);
524+
let h2 = compute_output_hash("content", "html", Some("t.html"), Some("<html></html>"));
525+
assert_ne!(h1, h2);
526+
}
527+
481528
// ── OutputCache ──────────────────────────────────────────────────────────
482529

483530
#[test]

src/commands/build.rs

Lines changed: 84 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@ fn run_impl(config_path: &str, dry_run: bool, resilient: bool, optimization: Opt
4545
let config = load_config(config_path)?;
4646
info!("Loaded config successfully");
4747

48+
// Read the raw config file content so it can be included in the transform
49+
// cache hash. Any change to the config file (not only to `variables`) will
50+
// then invalidate the cached transform results and force a fresh pipeline run.
51+
let config_content = fs::read_to_string(config_path)
52+
.with_context(|| format!("Failed to re-read config file for hashing: {}", config_path))?;
53+
4854
// CLI flag takes precedence over config file; fall back to config value.
4955
let opt_mode = optimization.unwrap_or(config.optimization);
5056
info!(optimization = %opt_mode, "Using optimization mode");
@@ -115,7 +121,7 @@ fn run_impl(config_path: &str, dry_run: bool, resilient: bool, optimization: Opt
115121
// because some transforms are format-specific. In particular, EmojiTransform skips
116122
// replacement for HTML (which renders emoji natively) but applies it for PDF, DOCX,
117123
// and other formats. A format-keyed cache avoids redundant work across builds.
118-
let base_input_hash = compute_input_hash(&normalized_content, &config.variables);
124+
let base_input_hash = compute_input_hash(&normalized_content, &config_content, &config.variables);
119125
let cache_path = output_dir.join(".renderflow-cache.json");
120126
// Always attempt to read the cache; load_cache handles missing/corrupt files
121127
// gracefully. Only write back to disk in non-dry-run mode.
@@ -217,8 +223,30 @@ fn run_impl(config_path: &str, dry_run: bool, resilient: bool, optimization: Opt
217223
} else {
218224
// Compute a hash of all inputs that determine this output's content.
219225
// If the stored hash matches and the output file already exists, pandoc
220-
// can be skipped entirely.
221-
let output_hash = compute_output_hash(&transformed, &format_str, output.template.as_deref());
226+
// can be skipped entirely. The template file content (not just its
227+
// name) is included so that edits to a template file invalidate the
228+
// output cache even when the template path is unchanged.
229+
let template_content = output.template.as_deref().and_then(|name| {
230+
let path = Path::new("templates").join(name);
231+
match fs::read_to_string(&path) {
232+
Ok(content) => Some(content),
233+
Err(e) => {
234+
warn!(
235+
template = %name,
236+
path = %path.display(),
237+
error = %e,
238+
"Failed to read template file for cache hash; template changes may not invalidate cache"
239+
);
240+
None
241+
}
242+
}
243+
});
244+
let output_hash = compute_output_hash(
245+
&transformed,
246+
&format_str,
247+
output.template.as_deref(),
248+
template_content.as_deref(),
249+
);
222250

223251
if Path::new(&output_path).exists()
224252
&& output_cache.get(&output_path) == Some(output_hash.as_str())
@@ -503,26 +531,29 @@ mod tests {
503531
#[test]
504532
fn test_cache_hit_uses_pre_populated_cache() {
505533
// Pre-populate the cache with the exact hash that the build would
506-
// compute for the input file + format, then run a dry-run. The build
507-
// should detect the cache hit and skip the transform phase for that format.
534+
// compute for the input file + config + format, then run a dry-run.
535+
// The build should detect the cache hit and skip the transform phase
536+
// for that format.
508537
let dir = tempfile::tempdir().expect("failed to create temp dir");
509538
let input_content = "# Test\n";
510539
let input_path = dir.path().join("input.md");
511540
fs::write(&input_path, input_content).expect("failed to write input file");
512541
let output_dir = dir.path().join("dist");
513542

543+
let config_content = format!(
544+
"outputs:\n - type: html\ninput: \"{}\"\noutput_dir: \"{}\"\n",
545+
input_path.display(),
546+
output_dir.display()
547+
);
548+
514549
// Compute the hash the same way the build command will: base hash + "-html".
550+
// The hash now also incorporates the config file content.
515551
let variables = std::collections::HashMap::new();
516-
let base_hash = crate::cache::compute_input_hash(input_content, &variables);
552+
let base_hash = crate::cache::compute_input_hash(input_content, &config_content, &variables);
517553
let hash_key = format!("{base_hash}-html");
518554
let cached_transform = "# Test (from cache)\n";
519555
write_cache_file(&output_dir, &hash_key, cached_transform);
520556

521-
let config_content = format!(
522-
"outputs:\n - type: html\ninput: \"{}\"\noutput_dir: \"{}\"\n",
523-
input_path.display(),
524-
output_dir.display()
525-
);
526557
let mut f = NamedTempFile::new().expect("failed to create temp file");
527558
f.write_all(config_content.as_bytes()).expect("failed to write config");
528559

@@ -541,27 +572,63 @@ mod tests {
541572
fs::write(&input_path, original_content).expect("failed to write input file");
542573
let output_dir = dir.path().join("dist");
543574

544-
// Cache is keyed on the *original* content + format.
575+
let config_content = format!(
576+
"outputs:\n - type: html\ninput: \"{}\"\noutput_dir: \"{}\"\n",
577+
input_path.display(),
578+
output_dir.display()
579+
);
580+
581+
// Cache is keyed on the *original* content + config + format.
545582
let variables = std::collections::HashMap::new();
546-
let old_base_hash = crate::cache::compute_input_hash(original_content, &variables);
583+
let old_base_hash = crate::cache::compute_input_hash(original_content, &config_content, &variables);
547584
let old_hash_key = format!("{old_base_hash}-html");
548585
write_cache_file(&output_dir, &old_hash_key, "cached result");
549586

550587
// Now change the input file — the hash will be different.
551588
let new_content = "# Changed\n";
552589
fs::write(&input_path, new_content).expect("failed to write updated input");
553590

554-
let config_content = format!(
591+
let mut f = NamedTempFile::new().expect("failed to create temp file");
592+
f.write_all(config_content.as_bytes()).expect("failed to write config");
593+
594+
// Dry-run still succeeds; it runs transforms because the hash misses.
595+
let result = run(f.path().to_str().unwrap(), true, None);
596+
assert!(result.is_ok(), "dry-run with cache miss should still succeed: {:?}", result);
597+
}
598+
599+
#[test]
600+
fn test_cache_miss_when_config_changed() {
601+
// After changing the config content the hash changes, so the previously
602+
// cached entry should not match and transforms must run again.
603+
let dir = tempfile::tempdir().expect("failed to create temp dir");
604+
let input_content = "# Hello\n";
605+
let input_path = dir.path().join("input.md");
606+
fs::write(&input_path, input_content).expect("failed to write input file");
607+
let output_dir = dir.path().join("dist");
608+
609+
// Original config — used to seed the cache.
610+
let original_config = format!(
555611
"outputs:\n - type: html\ninput: \"{}\"\noutput_dir: \"{}\"\n",
556612
input_path.display(),
557613
output_dir.display()
558614
);
615+
let variables = std::collections::HashMap::new();
616+
let old_base_hash = crate::cache::compute_input_hash(input_content, &original_config, &variables);
617+
let old_hash_key = format!("{old_base_hash}-html");
618+
write_cache_file(&output_dir, &old_hash_key, "cached result");
619+
620+
// Updated config (adds a variable) — the hash must differ from the original.
621+
let updated_config = format!(
622+
"outputs:\n - type: html\ninput: \"{}\"\noutput_dir: \"{}\"\nvariables:\n title: changed\n",
623+
input_path.display(),
624+
output_dir.display()
625+
);
559626
let mut f = NamedTempFile::new().expect("failed to create temp file");
560-
f.write_all(config_content.as_bytes()).expect("failed to write config");
627+
f.write_all(updated_config.as_bytes()).expect("failed to write config");
561628

562-
// Dry-run still succeeds; it runs transforms because the hash misses.
629+
// Dry-run succeeds; transforms run because the config hash misses.
563630
let result = run(f.path().to_str().unwrap(), true, None);
564-
assert!(result.is_ok(), "dry-run with cache miss should still succeed: {:?}", result);
631+
assert!(result.is_ok(), "dry-run with changed config should succeed: {:?}", result);
565632
}
566633

567634
#[test]

0 commit comments

Comments
 (0)