|
1 | 1 | mod format; |
| 2 | +mod pathfinding; |
2 | 3 | mod transform_edge; |
3 | 4 |
|
4 | 5 | pub use format::Format; |
| 6 | +pub use pathfinding::TransformPath; |
5 | 7 | pub use transform_edge::TransformEdge; |
6 | 8 |
|
7 | 9 | use petgraph::graph::{DiGraph, NodeIndex}; |
| 10 | +use petgraph::visit::EdgeRef; |
8 | 11 | use std::collections::HashMap; |
9 | 12 |
|
10 | 13 | /// A directed graph of document format transformations. |
@@ -100,6 +103,92 @@ impl TransformGraph { |
100 | 103 | }; |
101 | 104 | self.graph.contains_edge(from_idx, to_idx) |
102 | 105 | } |
| 106 | + |
| 107 | + /// Reconstruct the cheapest directed edge between each consecutive pair of |
| 108 | + /// nodes in `node_path` and return them as an ordered `Vec<TransformEdge>`. |
| 109 | + /// |
| 110 | + /// When multiple parallel edges connect the same pair of nodes the one with |
| 111 | + /// the lowest cost is chosen, which is consistent with the cost function |
| 112 | + /// used by the pathfinding algorithms. |
| 113 | + fn edges_from_node_path(&self, node_path: &[NodeIndex]) -> Vec<TransformEdge> { |
| 114 | + node_path |
| 115 | + .windows(2) |
| 116 | + .map(|w| { |
| 117 | + let (a, b) = (w[0], w[1]); |
| 118 | + self.graph |
| 119 | + .edges(a) |
| 120 | + .filter(|e| e.target() == b) |
| 121 | + .min_by(|x, y| { |
| 122 | + x.weight() |
| 123 | + .cost |
| 124 | + .partial_cmp(&y.weight().cost) |
| 125 | + .unwrap_or(std::cmp::Ordering::Equal) |
| 126 | + }) |
| 127 | + .expect("node path contains a pair with no connecting edge") |
| 128 | + .weight() |
| 129 | + .clone() |
| 130 | + }) |
| 131 | + .collect() |
| 132 | + } |
| 133 | + |
| 134 | + /// Find the lowest-cost path from `from` to `to` using Dijkstra's |
| 135 | + /// algorithm. |
| 136 | + /// |
| 137 | + /// Cost is treated as additive (sum of edge costs) and the path is |
| 138 | + /// selected to minimise total cost. Quality is computed multiplicatively |
| 139 | + /// along the chosen path and stored in the returned [`TransformPath`]. |
| 140 | + /// |
| 141 | + /// Returns `None` when no path exists between the two formats. |
| 142 | + pub fn find_path(&self, from: Format, to: Format) -> Option<TransformPath> { |
| 143 | + use petgraph::algo::astar; |
| 144 | + |
| 145 | + let (&from_idx, &to_idx) = |
| 146 | + match (self.nodes.get(&from), self.nodes.get(&to)) { |
| 147 | + (Some(f), Some(t)) => (f, t), |
| 148 | + _ => return None, |
| 149 | + }; |
| 150 | + |
| 151 | + let (_cost, node_path) = astar( |
| 152 | + &self.graph, |
| 153 | + from_idx, |
| 154 | + |n| n == to_idx, |
| 155 | + |e| e.weight().cost, |
| 156 | + |_| 0.0_f32, |
| 157 | + )?; |
| 158 | + |
| 159 | + Some(TransformPath::from_steps(self.edges_from_node_path(&node_path))) |
| 160 | + } |
| 161 | + |
| 162 | + /// Return all simple paths (no repeated nodes) from `from` to `to`. |
| 163 | + /// |
| 164 | + /// The returned [`Vec`] is sorted by `total_cost` ascending so callers can |
| 165 | + /// easily compare candidate pipelines. An empty `Vec` is returned when no |
| 166 | + /// path exists. |
| 167 | + pub fn find_all_paths(&self, from: Format, to: Format) -> Vec<TransformPath> { |
| 168 | + use petgraph::algo::all_simple_paths; |
| 169 | + |
| 170 | + let (&from_idx, &to_idx) = |
| 171 | + match (self.nodes.get(&from), self.nodes.get(&to)) { |
| 172 | + (Some(f), Some(t)) => (f, t), |
| 173 | + _ => return Vec::new(), |
| 174 | + }; |
| 175 | + |
| 176 | + let mut paths: Vec<TransformPath> = |
| 177 | + all_simple_paths::<Vec<_>, _, std::collections::hash_map::RandomState>( |
| 178 | + &self.graph, from_idx, to_idx, 0, None, |
| 179 | + ) |
| 180 | + .map(|node_path: Vec<NodeIndex>| { |
| 181 | + TransformPath::from_steps(self.edges_from_node_path(&node_path)) |
| 182 | + }) |
| 183 | + .collect(); |
| 184 | + |
| 185 | + paths.sort_by(|a, b| { |
| 186 | + a.total_cost |
| 187 | + .partial_cmp(&b.total_cost) |
| 188 | + .unwrap_or(std::cmp::Ordering::Equal) |
| 189 | + }); |
| 190 | + paths |
| 191 | + } |
103 | 192 | } |
104 | 193 |
|
105 | 194 | impl Default for TransformGraph { |
@@ -317,4 +406,163 @@ mod tests { |
317 | 406 | ); |
318 | 407 | } |
319 | 408 | } |
| 409 | + |
| 410 | + // ── find_path ───────────────────────────────────────────────────────────── |
| 411 | + |
| 412 | + #[test] |
| 413 | + fn test_find_path_direct_single_hop() { |
| 414 | + let mut graph = TransformGraph::new(); |
| 415 | + graph.add_transform(markdown_to_pdf()); |
| 416 | + |
| 417 | + let path = graph.find_path(Format::Markdown, Format::Pdf).unwrap(); |
| 418 | + assert_eq!(path.steps.len(), 1); |
| 419 | + assert_eq!(path.steps[0].from, Format::Markdown); |
| 420 | + assert_eq!(path.steps[0].to, Format::Pdf); |
| 421 | + assert!((path.total_cost - 1.0).abs() < 1e-5); |
| 422 | + assert!((path.total_quality - 0.9).abs() < 1e-5); |
| 423 | + } |
| 424 | + |
| 425 | + #[test] |
| 426 | + fn test_find_path_multi_hop() { |
| 427 | + let mut graph = TransformGraph::new(); |
| 428 | + graph.add_transform(markdown_to_html()); // cost 0.5, quality 1.0 |
| 429 | + graph.add_transform(html_to_pdf()); // cost 0.8, quality 0.85 |
| 430 | + |
| 431 | + let path = graph.find_path(Format::Markdown, Format::Pdf).unwrap(); |
| 432 | + assert_eq!(path.steps.len(), 2); |
| 433 | + assert_eq!(path.steps[0].to, Format::Html); |
| 434 | + assert_eq!(path.steps[1].to, Format::Pdf); |
| 435 | + assert!((path.total_cost - 1.3).abs() < 1e-5); |
| 436 | + assert!((path.total_quality - 0.85).abs() < 1e-5); |
| 437 | + } |
| 438 | + |
| 439 | + #[test] |
| 440 | + fn test_find_path_prefers_lower_cost() { |
| 441 | + let mut graph = TransformGraph::new(); |
| 442 | + // Direct path — more expensive. |
| 443 | + graph.add_transform(TransformEdge::new(Format::Markdown, Format::Pdf, 5.0, 0.9)); |
| 444 | + // Indirect path via HTML — cheaper overall (0.5 + 0.8 = 1.3). |
| 445 | + graph.add_transform(markdown_to_html()); |
| 446 | + graph.add_transform(html_to_pdf()); |
| 447 | + |
| 448 | + let path = graph.find_path(Format::Markdown, Format::Pdf).unwrap(); |
| 449 | + // The indirect path (total_cost 1.3) should be chosen over the direct |
| 450 | + // path (total_cost 5.0). |
| 451 | + assert_eq!(path.steps.len(), 2); |
| 452 | + assert!((path.total_cost - 1.3).abs() < 1e-5); |
| 453 | + } |
| 454 | + |
| 455 | + #[test] |
| 456 | + fn test_find_path_returns_none_when_no_path() { |
| 457 | + let mut graph = TransformGraph::new(); |
| 458 | + graph.add_transform(markdown_to_html()); |
| 459 | + // No edge from Html → Pdf, so Markdown → Pdf has no path. |
| 460 | + assert!(graph.find_path(Format::Markdown, Format::Pdf).is_none()); |
| 461 | + } |
| 462 | + |
| 463 | + #[test] |
| 464 | + fn test_find_path_returns_none_for_unknown_format() { |
| 465 | + let graph = TransformGraph::new(); |
| 466 | + assert!(graph.find_path(Format::Markdown, Format::Pdf).is_none()); |
| 467 | + } |
| 468 | + |
| 469 | + #[test] |
| 470 | + fn test_find_path_cost_additive() { |
| 471 | + let mut graph = TransformGraph::new(); |
| 472 | + // Three hops: Markdown → Html (1.0) → Pdf (2.0) — total 3.0 |
| 473 | + graph.add_transform(TransformEdge::new(Format::Markdown, Format::Html, 1.0, 1.0)); |
| 474 | + graph.add_transform(TransformEdge::new(Format::Html, Format::Pdf, 2.0, 1.0)); |
| 475 | + |
| 476 | + let path = graph.find_path(Format::Markdown, Format::Pdf).unwrap(); |
| 477 | + assert!((path.total_cost - 3.0).abs() < 1e-5); |
| 478 | + } |
| 479 | + |
| 480 | + #[test] |
| 481 | + fn test_find_path_quality_multiplicative() { |
| 482 | + let mut graph = TransformGraph::new(); |
| 483 | + // quality: 0.9 * 0.8 = 0.72 |
| 484 | + graph.add_transform(TransformEdge::new(Format::Markdown, Format::Html, 1.0, 0.9)); |
| 485 | + graph.add_transform(TransformEdge::new(Format::Html, Format::Pdf, 1.0, 0.8)); |
| 486 | + |
| 487 | + let path = graph.find_path(Format::Markdown, Format::Pdf).unwrap(); |
| 488 | + assert!((path.total_quality - 0.72).abs() < 1e-5); |
| 489 | + } |
| 490 | + |
| 491 | + #[test] |
| 492 | + fn test_find_path_chooses_cheapest_parallel_edge() { |
| 493 | + let mut graph = TransformGraph::new(); |
| 494 | + // Two parallel edges between the same nodes with different costs. |
| 495 | + graph.add_transform(TransformEdge::new(Format::Markdown, Format::Pdf, 3.0, 0.9)); |
| 496 | + graph.add_transform(TransformEdge::new(Format::Markdown, Format::Pdf, 1.0, 0.7)); |
| 497 | + |
| 498 | + let path = graph.find_path(Format::Markdown, Format::Pdf).unwrap(); |
| 499 | + assert!((path.total_cost - 1.0).abs() < 1e-5); |
| 500 | + } |
| 501 | + |
| 502 | + // ── find_all_paths ──────────────────────────────────────────────────────── |
| 503 | + |
| 504 | + #[test] |
| 505 | + fn test_find_all_paths_single_path() { |
| 506 | + let mut graph = TransformGraph::new(); |
| 507 | + graph.add_transform(markdown_to_pdf()); |
| 508 | + |
| 509 | + let paths = graph.find_all_paths(Format::Markdown, Format::Pdf); |
| 510 | + assert_eq!(paths.len(), 1); |
| 511 | + assert_eq!(paths[0].steps.len(), 1); |
| 512 | + } |
| 513 | + |
| 514 | + #[test] |
| 515 | + fn test_find_all_paths_returns_both_direct_and_indirect() { |
| 516 | + let mut graph = TransformGraph::new(); |
| 517 | + graph.add_transform(markdown_to_pdf()); // direct |
| 518 | + graph.add_transform(markdown_to_html()); |
| 519 | + graph.add_transform(html_to_pdf()); // indirect via Html |
| 520 | + |
| 521 | + let paths = graph.find_all_paths(Format::Markdown, Format::Pdf); |
| 522 | + assert_eq!(paths.len(), 2); |
| 523 | + } |
| 524 | + |
| 525 | + #[test] |
| 526 | + fn test_find_all_paths_sorted_by_cost_ascending() { |
| 527 | + let mut graph = TransformGraph::new(); |
| 528 | + // Direct (cost 5.0) and indirect (cost 1.3). |
| 529 | + graph.add_transform(TransformEdge::new(Format::Markdown, Format::Pdf, 5.0, 0.9)); |
| 530 | + graph.add_transform(markdown_to_html()); |
| 531 | + graph.add_transform(html_to_pdf()); |
| 532 | + |
| 533 | + let paths = graph.find_all_paths(Format::Markdown, Format::Pdf); |
| 534 | + assert_eq!(paths.len(), 2); |
| 535 | + // Cheaper path comes first. |
| 536 | + assert!(paths[0].total_cost <= paths[1].total_cost); |
| 537 | + assert!((paths[0].total_cost - 1.3).abs() < 1e-5); |
| 538 | + assert!((paths[1].total_cost - 5.0).abs() < 1e-5); |
| 539 | + } |
| 540 | + |
| 541 | + #[test] |
| 542 | + fn test_find_all_paths_empty_when_no_path() { |
| 543 | + let mut graph = TransformGraph::new(); |
| 544 | + graph.add_transform(markdown_to_html()); |
| 545 | + |
| 546 | + let paths = graph.find_all_paths(Format::Markdown, Format::Pdf); |
| 547 | + assert!(paths.is_empty()); |
| 548 | + } |
| 549 | + |
| 550 | + #[test] |
| 551 | + fn test_find_all_paths_empty_for_unknown_format() { |
| 552 | + let graph = TransformGraph::new(); |
| 553 | + assert!(graph.find_all_paths(Format::Markdown, Format::Pdf).is_empty()); |
| 554 | + } |
| 555 | + |
| 556 | + #[test] |
| 557 | + fn test_find_all_paths_metrics_correct() { |
| 558 | + let mut graph = TransformGraph::new(); |
| 559 | + // cost 0.5, quality 1.0 then cost 0.8, quality 0.85 |
| 560 | + graph.add_transform(markdown_to_html()); |
| 561 | + graph.add_transform(html_to_pdf()); |
| 562 | + |
| 563 | + let paths = graph.find_all_paths(Format::Markdown, Format::Pdf); |
| 564 | + assert_eq!(paths.len(), 1); |
| 565 | + assert!((paths[0].total_cost - 1.3).abs() < 1e-5); |
| 566 | + assert!((paths[0].total_quality - 0.85).abs() < 1e-5); |
| 567 | + } |
320 | 568 | } |
0 commit comments