Skip to content

Commit 515c7e4

Browse files
authored
Merge pull request #248 from egohygiene/copilot/build-minimal-dag-multi-target
feat: Build minimal DAG for multi-target execution
2 parents df729dd + d051394 commit 515c7e4

2 files changed

Lines changed: 375 additions & 0 deletions

File tree

src/graph/mod.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
mod format;
2+
mod multi_target;
23
mod pathfinding;
34
mod transform_edge;
45

56
pub use format::Format;
7+
pub use multi_target::MultiTargetDag;
68
pub use pathfinding::TransformPath;
79
pub use transform_edge::TransformEdge;
810

@@ -189,6 +191,52 @@ impl TransformGraph {
189191
});
190192
paths
191193
}
194+
195+
/// Build a minimal DAG that covers all `targets` reachable from `from`.
196+
///
197+
/// For each target the cheapest path is computed independently via
198+
/// [`find_path`](Self::find_path). All resulting edges are then merged
199+
/// into a single [`MultiTargetDag`], deduplicating any edges that are
200+
/// shared across paths. When two paths contribute an edge for the same
201+
/// `(from, to)` pair the cheaper edge is kept.
202+
///
203+
/// Returns `None` when at least one target is unreachable from `from`.
204+
/// Returns `Some` with an empty DAG when `targets` is empty.
205+
///
206+
/// # Example
207+
///
208+
/// ```rust
209+
/// use renderflow::graph::{Format, TransformEdge, TransformGraph};
210+
///
211+
/// let mut graph = TransformGraph::new();
212+
/// graph.add_transform(TransformEdge::new(Format::Markdown, Format::Html, 0.5, 1.0));
213+
/// graph.add_transform(TransformEdge::new(Format::Html, Format::Pdf, 0.8, 0.85));
214+
/// graph.add_transform(TransformEdge::new(Format::Html, Format::Docx, 0.6, 0.90));
215+
///
216+
/// let dag = graph
217+
/// .build_multi_target_dag(Format::Markdown, &[Format::Pdf, Format::Docx])
218+
/// .expect("all targets must be reachable");
219+
///
220+
/// // Markdown → Html is shared: 3 unique edges, not 4.
221+
/// assert_eq!(dag.edge_count(), 3);
222+
///
223+
/// let order = dag.execution_order();
224+
/// assert_eq!(order.len(), 3);
225+
/// ```
226+
pub fn build_multi_target_dag(
227+
&self,
228+
from: Format,
229+
targets: &[Format],
230+
) -> Option<MultiTargetDag> {
231+
let mut dag = MultiTargetDag::new();
232+
for &target in targets {
233+
let path = self.find_path(from, target)?;
234+
for edge in path.steps {
235+
dag.merge_edge(edge);
236+
}
237+
}
238+
Some(dag)
239+
}
192240
}
193241

194242
impl Default for TransformGraph {

src/graph/multi_target.rs

Lines changed: 327 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
use super::{Format, TransformEdge};
2+
3+
use petgraph::graph::{DiGraph, NodeIndex};
4+
use petgraph::visit::EdgeRef;
5+
use std::collections::HashMap;
6+
7+
/// A minimal directed acyclic graph that merges the cheapest transformation
8+
/// paths for several output targets from a single source format.
9+
///
10+
/// Intermediate nodes (e.g. `Html` when producing both `Pdf` and `Docx` from
11+
/// `Markdown`) are reused across paths so that each transformation step is
12+
/// represented exactly once in the DAG.
13+
///
14+
/// # Example
15+
///
16+
/// ```rust
17+
/// use renderflow::graph::{Format, TransformEdge, TransformGraph};
18+
///
19+
/// let mut graph = TransformGraph::new();
20+
/// graph.add_transform(TransformEdge::new(Format::Markdown, Format::Html, 0.5, 1.0));
21+
/// graph.add_transform(TransformEdge::new(Format::Html, Format::Pdf, 0.8, 0.85));
22+
/// graph.add_transform(TransformEdge::new(Format::Html, Format::Docx, 0.6, 0.90));
23+
///
24+
/// let dag = graph
25+
/// .build_multi_target_dag(Format::Markdown, &[Format::Pdf, Format::Docx])
26+
/// .expect("all targets must be reachable");
27+
///
28+
/// // Markdown → Html is shared: only 3 unique edges total.
29+
/// assert_eq!(dag.edge_count(), 3);
30+
/// assert!(dag.contains_edge(Format::Markdown, Format::Html));
31+
/// assert!(dag.contains_edge(Format::Html, Format::Pdf));
32+
/// assert!(dag.contains_edge(Format::Html, Format::Docx));
33+
///
34+
/// // Execution order respects dependencies.
35+
/// let order = dag.execution_order();
36+
/// assert_eq!(order.len(), 3);
37+
/// ```
38+
pub struct MultiTargetDag {
39+
pub(super) graph: DiGraph<Format, TransformEdge>,
40+
pub(super) nodes: HashMap<Format, NodeIndex>,
41+
}
42+
43+
impl MultiTargetDag {
44+
pub(super) fn new() -> Self {
45+
Self {
46+
graph: DiGraph::new(),
47+
nodes: HashMap::new(),
48+
}
49+
}
50+
51+
/// Return the [`NodeIndex`] for `format`, inserting a node when one does
52+
/// not already exist.
53+
pub(super) fn get_or_insert_node(&mut self, format: Format) -> NodeIndex {
54+
if let Some(&idx) = self.nodes.get(&format) {
55+
idx
56+
} else {
57+
let idx = self.graph.add_node(format);
58+
self.nodes.insert(format, idx);
59+
idx
60+
}
61+
}
62+
63+
/// Merge `edge` into the DAG.
64+
///
65+
/// If an edge between the same `(from, to)` pair already exists, the one
66+
/// with the lower cost is kept and the other is discarded. This ensures
67+
/// that a shared intermediate step always executes via the cheapest
68+
/// available transformation.
69+
pub(super) fn merge_edge(&mut self, edge: TransformEdge) {
70+
let from_idx = self.get_or_insert_node(edge.from);
71+
let to_idx = self.get_or_insert_node(edge.to);
72+
73+
// Check whether an edge for this (from, to) pair is already present.
74+
let existing_id = self
75+
.graph
76+
.edges(from_idx)
77+
.find(|e| e.target() == to_idx)
78+
.map(|e| e.id());
79+
80+
if let Some(id) = existing_id {
81+
// Keep the cheaper edge.
82+
if edge.cost < self.graph[id].cost {
83+
self.graph[id] = edge;
84+
}
85+
} else {
86+
self.graph.add_edge(from_idx, to_idx, edge);
87+
}
88+
}
89+
90+
/// Return the edges in a valid topological execution order.
91+
///
92+
/// Each [`TransformEdge`] in the returned `Vec` is guaranteed to appear
93+
/// only after all edges that produce its source format. An empty `Vec` is
94+
/// returned when the graph contains a cycle (which should never occur for a
95+
/// well-formed transformation DAG).
96+
pub fn execution_order(&self) -> Vec<&TransformEdge> {
97+
use petgraph::algo::toposort;
98+
99+
let sorted_nodes = match toposort(&self.graph, None) {
100+
Ok(nodes) => nodes,
101+
Err(_) => return Vec::new(),
102+
};
103+
104+
let mut result = Vec::new();
105+
for node in &sorted_nodes {
106+
for edge_ref in self.graph.edges(*node) {
107+
result.push(edge_ref.weight());
108+
}
109+
}
110+
result
111+
}
112+
113+
/// Return all edges stored in the DAG (in arbitrary order).
114+
pub fn all_edges(&self) -> Vec<&TransformEdge> {
115+
self.graph.edge_weights().collect()
116+
}
117+
118+
/// Return `true` when a direct edge from `from` to `to` exists in the DAG.
119+
pub fn contains_edge(&self, from: Format, to: Format) -> bool {
120+
let (Some(&fi), Some(&ti)) = (self.nodes.get(&from), self.nodes.get(&to)) else {
121+
return false;
122+
};
123+
self.graph.contains_edge(fi, ti)
124+
}
125+
126+
/// Return the number of unique edges in the DAG.
127+
pub fn edge_count(&self) -> usize {
128+
self.graph.edge_count()
129+
}
130+
131+
/// Return the number of unique format nodes in the DAG.
132+
pub fn node_count(&self) -> usize {
133+
self.graph.node_count()
134+
}
135+
}
136+
137+
#[cfg(test)]
138+
mod tests {
139+
use super::*;
140+
use crate::graph::TransformGraph;
141+
142+
fn build_graph() -> TransformGraph {
143+
let mut g = TransformGraph::new();
144+
// Markdown → Html (0.5 / 1.0)
145+
g.add_transform(TransformEdge::new(Format::Markdown, Format::Html, 0.5, 1.0));
146+
// Html → Pdf (0.8 / 0.85)
147+
g.add_transform(TransformEdge::new(Format::Html, Format::Pdf, 0.8, 0.85));
148+
// Html → Docx (0.6 / 0.90)
149+
g.add_transform(TransformEdge::new(Format::Html, Format::Docx, 0.6, 0.90));
150+
g
151+
}
152+
153+
// ── build_multi_target_dag ────────────────────────────────────────────────
154+
155+
#[test]
156+
fn test_dag_shared_intermediate_not_duplicated() {
157+
let g = build_graph();
158+
let dag = g
159+
.build_multi_target_dag(Format::Markdown, &[Format::Pdf, Format::Docx])
160+
.unwrap();
161+
162+
// Markdown → Html is shared; only 3 unique edges total.
163+
assert_eq!(dag.edge_count(), 3);
164+
}
165+
166+
#[test]
167+
fn test_dag_contains_expected_edges() {
168+
let g = build_graph();
169+
let dag = g
170+
.build_multi_target_dag(Format::Markdown, &[Format::Pdf, Format::Docx])
171+
.unwrap();
172+
173+
assert!(dag.contains_edge(Format::Markdown, Format::Html));
174+
assert!(dag.contains_edge(Format::Html, Format::Pdf));
175+
assert!(dag.contains_edge(Format::Html, Format::Docx));
176+
}
177+
178+
#[test]
179+
fn test_dag_single_target_equals_path() {
180+
let g = build_graph();
181+
let dag = g
182+
.build_multi_target_dag(Format::Markdown, &[Format::Pdf])
183+
.unwrap();
184+
185+
assert_eq!(dag.edge_count(), 2);
186+
assert!(dag.contains_edge(Format::Markdown, Format::Html));
187+
assert!(dag.contains_edge(Format::Html, Format::Pdf));
188+
}
189+
190+
#[test]
191+
fn test_dag_empty_targets_returns_empty_dag() {
192+
let g = build_graph();
193+
let dag = g
194+
.build_multi_target_dag(Format::Markdown, &[])
195+
.unwrap();
196+
197+
assert_eq!(dag.edge_count(), 0);
198+
assert_eq!(dag.node_count(), 0);
199+
}
200+
201+
#[test]
202+
fn test_dag_unreachable_target_returns_none() {
203+
let mut g = TransformGraph::new();
204+
g.add_transform(TransformEdge::new(Format::Markdown, Format::Html, 0.5, 1.0));
205+
206+
// Epub is not reachable.
207+
let result = g
208+
.build_multi_target_dag(Format::Markdown, &[Format::Html, Format::Epub]);
209+
assert!(result.is_none());
210+
}
211+
212+
// ── execution_order ───────────────────────────────────────────────────────
213+
214+
#[test]
215+
fn test_execution_order_length() {
216+
let g = build_graph();
217+
let dag = g
218+
.build_multi_target_dag(Format::Markdown, &[Format::Pdf, Format::Docx])
219+
.unwrap();
220+
221+
let order = dag.execution_order();
222+
assert_eq!(order.len(), 3);
223+
}
224+
225+
#[test]
226+
fn test_execution_order_source_before_dependents() {
227+
let g = build_graph();
228+
let dag = g
229+
.build_multi_target_dag(Format::Markdown, &[Format::Pdf, Format::Docx])
230+
.unwrap();
231+
232+
let order = dag.execution_order();
233+
234+
// Find all three positions in a single pass.
235+
let mut md_html_pos = None;
236+
let mut html_pdf_pos = None;
237+
let mut html_docx_pos = None;
238+
for (i, e) in order.iter().enumerate() {
239+
match (e.from, e.to) {
240+
(Format::Markdown, Format::Html) => md_html_pos = Some(i),
241+
(Format::Html, Format::Pdf) => html_pdf_pos = Some(i),
242+
(Format::Html, Format::Docx) => html_docx_pos = Some(i),
243+
_ => {}
244+
}
245+
}
246+
247+
let md_html_pos = md_html_pos.expect("Markdown→Html edge must be present");
248+
let html_pdf_pos = html_pdf_pos.expect("Html→Pdf edge must be present");
249+
let html_docx_pos = html_docx_pos.expect("Html→Docx edge must be present");
250+
251+
assert!(
252+
md_html_pos < html_pdf_pos,
253+
"Markdown→Html must precede Html→Pdf"
254+
);
255+
assert!(
256+
md_html_pos < html_docx_pos,
257+
"Markdown→Html must precede Html→Docx"
258+
);
259+
}
260+
261+
#[test]
262+
fn test_execution_order_no_duplicates() {
263+
let g = build_graph();
264+
let dag = g
265+
.build_multi_target_dag(Format::Markdown, &[Format::Pdf, Format::Docx])
266+
.unwrap();
267+
268+
let order = dag.execution_order();
269+
// Build a list of (from, to) pairs and ensure they are all unique.
270+
let mut seen = std::collections::HashSet::new();
271+
for edge in &order {
272+
let pair = (edge.from, edge.to);
273+
assert!(seen.insert(pair), "duplicate edge in execution order: {:?}", pair);
274+
}
275+
}
276+
277+
// ── edge deduplication (cheaper edge wins) ────────────────────────────────
278+
279+
#[test]
280+
fn test_merge_edge_deduplicates_and_keeps_cheaper() {
281+
let mut dag = MultiTargetDag::new();
282+
// Add the same (from, to) pair twice: expensive first, then cheaper.
283+
dag.merge_edge(TransformEdge::new(Format::Markdown, Format::Html, 2.0, 0.9));
284+
dag.merge_edge(TransformEdge::new(Format::Markdown, Format::Html, 1.0, 0.95));
285+
286+
// Only one edge should be stored.
287+
assert_eq!(dag.edge_count(), 1);
288+
// The cheaper edge (cost 1.0) must be kept.
289+
let edges = dag.all_edges();
290+
assert!((edges[0].cost - 1.0).abs() < 1e-5);
291+
}
292+
293+
#[test]
294+
fn test_merge_edge_retains_existing_when_new_is_more_expensive() {
295+
let mut dag = MultiTargetDag::new();
296+
// Add the cheaper edge first, then try to replace it with a more expensive one.
297+
dag.merge_edge(TransformEdge::new(Format::Markdown, Format::Html, 1.0, 0.95));
298+
dag.merge_edge(TransformEdge::new(Format::Markdown, Format::Html, 2.0, 0.9));
299+
300+
assert_eq!(dag.edge_count(), 1);
301+
let edges = dag.all_edges();
302+
assert!((edges[0].cost - 1.0).abs() < 1e-5);
303+
}
304+
305+
#[test]
306+
fn test_node_count_shared_intermediate() {
307+
let g = build_graph();
308+
let dag = g
309+
.build_multi_target_dag(Format::Markdown, &[Format::Pdf, Format::Docx])
310+
.unwrap();
311+
312+
// Nodes: Markdown, Html, Pdf, Docx = 4
313+
assert_eq!(dag.node_count(), 4);
314+
}
315+
316+
// ── all_edges ─────────────────────────────────────────────────────────────
317+
318+
#[test]
319+
fn test_all_edges_count_matches_edge_count() {
320+
let g = build_graph();
321+
let dag = g
322+
.build_multi_target_dag(Format::Markdown, &[Format::Pdf, Format::Docx])
323+
.unwrap();
324+
325+
assert_eq!(dag.all_edges().len(), dag.edge_count());
326+
}
327+
}

0 commit comments

Comments
 (0)