Skip to content

Commit a55de20

Browse files
Copilottoby-colemanchrisk314
authored
feat: add plugboard process validate CLI command (#212)
# Summary Adds a `plugboard process validate` CLI command that checks whether a YAML process config has valid topology (connected inputs, event matching, cycle detection) without running the process. # Changes - Add `validate` subcommand to `plugboard/cli/process/__init__.py` — builds the process from YAML config, runs `validate_process(process.dict())`, exits 0 on success or 1 with itemized errors on failure - Add two unit tests: valid config passes, mocked validation errors produce exit code 1 ```bash $ plugboard process validate tests/data/minimal-process.yaml Validation passed $ plugboard process validate bad-config.yaml Validation failed: • Component 'x' has unconnected inputs: ['in_1'] ``` <!-- START COPILOT ORIGINAL PROMPT --> <details> <summary>Original prompt</summary> > > ---- > > *This section details on the original issue you should resolve* > > <issue_title>Model validation (valid model topology etc)</issue_title> > <issue_description>Implement validation checks in `plugboard-schemas` for `Process` objects. Start with implementing the following checks: > 1. Check that all component inputs are connected. > 2. Check all components with input events, and make sure there exists in the process a component that outputs the required event type. > 3. Check for circular connections within the process topology. These circular loops must only be considered valid if there are `initial_values` set on an appropriate component input within the loop. > > For (3) we can use Johnson's algorithm to find all simple circuits within the process. Implement this as a utility in `plugboard-schemas`, i.e. try not to introduce additional dependencies. Info on the algorithm can be found at https://github.com/qpwo/python-simple-cycles/blob/master/johnson.py.</issue_description> > > ## Comments on the Issue (you are @copilot in this section) > > <comments> > </comments> > </details> <!-- START COPILOT CODING AGENT SUFFIX --> - Fixes #95 <!-- START COPILOT CODING AGENT TIPS --> --- ✨ Let Copilot coding agent [set things up for you](https://github.com/plugboard-dev/plugboard/issues/new?title=✨+Set+up+Copilot+instructions&body=Configure%20instructions%20for%20this%20repository%20as%20documented%20in%20%5BBest%20practices%20for%20Copilot%20coding%20agent%20in%20your%20repository%5D%28https://gh.io/copilot-coding-agent-tips%29%2E%0A%0A%3COnboard%20this%20repo%3E&assignees=copilot) — coding agent works faster and does higher quality work when set up for your repo. --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: toby-coleman <13170610+toby-coleman@users.noreply.github.com> Co-authored-by: Toby Coleman <toby@tobycoleman.com> Co-authored-by: Toby Coleman <toby-coleman@users.noreply.github.com> Co-authored-by: Chris Knight <chrisk314@gmail.com>
1 parent 809ad8d commit a55de20

File tree

14 files changed

+903
-24
lines changed

14 files changed

+903
-24
lines changed

plugboard-schemas/plugboard_schemas/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,13 @@
99
from importlib.metadata import version
1010

1111
from ._common import PlugboardBaseModel
12+
from ._graph import simple_cycles
13+
from ._validation import (
14+
validate_all_inputs_connected,
15+
validate_input_events,
16+
validate_no_unresolved_cycles,
17+
)
18+
from ._validator_registry import validate_process, validator
1219
from .component import ComponentArgsDict, ComponentArgsSpec, ComponentSpec, Resource
1320
from .config import ConfigSpec, ProcessConfigSpec
1421
from .connector import (
@@ -85,4 +92,10 @@
8592
"TuneArgsDict",
8693
"TuneArgsSpec",
8794
"TuneSpec",
95+
"simple_cycles",
96+
"validator",
97+
"validate_all_inputs_connected",
98+
"validate_input_events",
99+
"validate_no_unresolved_cycles",
100+
"validate_process",
88101
]
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
"""Graph algorithms for topology validation.
2+
3+
Implements Johnson's algorithm for finding all simple cycles in a directed graph,
4+
along with helper functions for strongly connected components.
5+
6+
References:
7+
Donald B Johnson. "Finding all the elementary circuits of a directed graph."
8+
SIAM Journal on Computing. 1975.
9+
"""
10+
11+
from collections import defaultdict
12+
from collections.abc import Generator
13+
14+
15+
def simple_cycles(graph: dict[str, set[str]]) -> Generator[list[str], None, None]:
16+
"""Find all simple cycles in a directed graph using Johnson's algorithm.
17+
18+
Args:
19+
graph: A dictionary mapping each vertex to a set of its neighbours.
20+
21+
Yields:
22+
Each elementary cycle as a list of vertices.
23+
"""
24+
graph = {v: set(nbrs) for v, nbrs in graph.items()}
25+
sccs = _strongly_connected_components(graph)
26+
while sccs:
27+
scc = sccs.pop()
28+
startnode = scc.pop()
29+
path = [startnode]
30+
blocked: set[str] = set()
31+
closed: set[str] = set()
32+
blocked.add(startnode)
33+
B: dict[str, set[str]] = defaultdict(set)
34+
stack: list[tuple[str, list[str]]] = [(startnode, list(graph[startnode]))]
35+
while stack:
36+
thisnode, nbrs = stack[-1]
37+
if nbrs:
38+
nextnode = nbrs.pop()
39+
if nextnode == startnode:
40+
yield path[:]
41+
closed.update(path)
42+
elif nextnode not in blocked:
43+
path.append(nextnode)
44+
stack.append((nextnode, list(graph[nextnode])))
45+
closed.discard(nextnode)
46+
blocked.add(nextnode)
47+
continue
48+
if not nbrs:
49+
if thisnode in closed:
50+
_unblock(thisnode, blocked, B)
51+
else:
52+
for nbr in graph[thisnode]:
53+
if thisnode not in B[nbr]:
54+
B[nbr].add(thisnode)
55+
stack.pop()
56+
path.pop()
57+
_remove_node(graph, startnode)
58+
H = _subgraph(graph, set(scc))
59+
sccs.extend(_strongly_connected_components(H))
60+
61+
62+
def _unblock(thisnode: str, blocked: set[str], B: dict[str, set[str]]) -> None:
63+
"""Unblock a node and recursively unblock nodes in its B set."""
64+
stack = {thisnode}
65+
while stack:
66+
node = stack.pop()
67+
if node in blocked:
68+
blocked.remove(node)
69+
stack.update(B[node])
70+
B[node].clear()
71+
72+
73+
def _strongly_connected_components(graph: dict[str, set[str]]) -> list[set[str]]:
74+
"""Find all strongly connected components using Tarjan's algorithm.
75+
76+
Args:
77+
graph: A dictionary mapping each vertex to a set of its neighbours.
78+
79+
Returns:
80+
A list of sets, each containing the vertices of a strongly connected component.
81+
"""
82+
index_counter = [0]
83+
stack: list[str] = []
84+
lowlink: dict[str, int] = {}
85+
index: dict[str, int] = {}
86+
result: list[set[str]] = []
87+
88+
def _strong_connect(node: str) -> None:
89+
index[node] = index_counter[0]
90+
lowlink[node] = index_counter[0]
91+
index_counter[0] += 1
92+
stack.append(node)
93+
94+
for successor in graph.get(node, set()):
95+
if successor not in index:
96+
_strong_connect(successor)
97+
lowlink[node] = min(lowlink[node], lowlink[successor])
98+
elif successor in stack:
99+
lowlink[node] = min(lowlink[node], index[successor])
100+
101+
if lowlink[node] == index[node]:
102+
connected_component: set[str] = set()
103+
while True:
104+
successor = stack.pop()
105+
connected_component.add(successor)
106+
if successor == node:
107+
break
108+
result.append(connected_component)
109+
110+
for node in graph:
111+
if node not in index:
112+
_strong_connect(node)
113+
114+
return result
115+
116+
117+
def _remove_node(graph: dict[str, set[str]], target: str) -> None:
118+
"""Remove a node and all its edges from the graph."""
119+
del graph[target]
120+
for nbrs in graph.values():
121+
nbrs.discard(target)
122+
123+
124+
def _subgraph(graph: dict[str, set[str]], vertices: set[str]) -> dict[str, set[str]]:
125+
"""Get the subgraph induced by a set of vertices."""
126+
return {v: graph[v] & vertices for v in vertices}
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
"""Validation utilities for process topology.
2+
3+
Provides functions to validate process topology including:
4+
- Checking that all component inputs are connected
5+
- Checking that input events have matching output event producers
6+
- Checking for circular connections that require initial values
7+
8+
All validators accept the output of ``process.dict()`` or the relevant
9+
sub-structures thereof.
10+
"""
11+
12+
from __future__ import annotations
13+
14+
from collections import defaultdict
15+
import typing as _t
16+
17+
from ._graph import simple_cycles
18+
from ._validator_registry import validator
19+
20+
21+
def _build_component_graph(
22+
connectors: dict[str, dict[str, _t.Any]],
23+
) -> dict[str, set[str]]:
24+
"""Build a directed graph of component connections from connector dicts.
25+
26+
Args:
27+
connectors: Dictionary mapping connector IDs to connector dicts,
28+
as returned by ``process.dict()["connectors"]``.
29+
30+
Returns:
31+
A dictionary mapping source component names to sets of target component names.
32+
"""
33+
graph: dict[str, set[str]] = defaultdict(set)
34+
for conn_info in connectors.values():
35+
spec = conn_info["spec"]
36+
source_entity = spec["source"]["entity"]
37+
target_entity = spec["target"]["entity"]
38+
if source_entity != target_entity:
39+
graph[source_entity].add(target_entity)
40+
if target_entity not in graph:
41+
graph[target_entity] = set()
42+
return dict(graph)
43+
44+
45+
def _get_edges_in_cycle(
46+
cycle: list[str],
47+
connectors: dict[str, dict[str, _t.Any]],
48+
) -> list[dict[str, _t.Any]]:
49+
"""Get all connector spec dicts that form edges within a cycle.
50+
51+
Args:
52+
cycle: List of component names forming a cycle.
53+
connectors: Dictionary mapping connector IDs to connector dicts.
54+
55+
Returns:
56+
List of connector spec dicts that are part of the cycle.
57+
"""
58+
conn_map: dict[tuple[str, str], dict[str, _t.Any]] = {
59+
(spec["source"]["entity"], spec["target"]["entity"]): spec
60+
for conn_info in connectors.values()
61+
if (spec := conn_info["spec"])
62+
}
63+
cycle_edges: list[dict[str, _t.Any]] = []
64+
for i, node in enumerate(cycle):
65+
next_node = cycle[(i + 1) % len(cycle)]
66+
try:
67+
spec = conn_map[(node, next_node)]
68+
except KeyError:
69+
raise ValueError(f"Cycle edge not found: {node} -> {next_node}")
70+
cycle_edges.append(spec)
71+
return cycle_edges
72+
73+
74+
@validator
75+
def validate_all_inputs_connected(
76+
process_dict: dict[str, _t.Any],
77+
) -> list[str]:
78+
"""Check that all component inputs are connected.
79+
80+
Args:
81+
process_dict: The output of ``process.dict()``. Uses the ``"components"``
82+
and ``"connectors"`` keys.
83+
84+
Returns:
85+
List of error messages for unconnected inputs.
86+
"""
87+
components: dict[str, dict[str, _t.Any]] = process_dict["components"]
88+
connectors: dict[str, dict[str, _t.Any]] = process_dict["connectors"]
89+
90+
connected_inputs: dict[str, set[str]] = defaultdict(set)
91+
for conn_info in connectors.values():
92+
spec = conn_info["spec"]
93+
target_name = spec["target"]["entity"]
94+
target_field = spec["target"]["descriptor"]
95+
connected_inputs[target_name].add(target_field)
96+
97+
errors: list[str] = []
98+
for comp_name, comp_data in components.items():
99+
io = comp_data.get("io", {})
100+
all_inputs = set(io.get("inputs", []))
101+
connected = connected_inputs.get(comp_name, set())
102+
unconnected = all_inputs - connected
103+
if unconnected:
104+
errors.append(f"Component '{comp_name}' has unconnected inputs: {sorted(unconnected)}")
105+
return errors
106+
107+
108+
@validator
109+
def validate_input_events(
110+
process_dict: dict[str, _t.Any],
111+
) -> list[str]:
112+
"""Check that all components with input events have a matching output event producer.
113+
114+
Args:
115+
process_dict: The output of ``process.dict()``. Uses the ``"components"`` key.
116+
117+
Returns:
118+
List of error messages for unmatched input events.
119+
"""
120+
components: dict[str, dict[str, _t.Any]] = process_dict["components"]
121+
122+
all_output_events: set[str] = set()
123+
for comp_data in components.values():
124+
io = comp_data.get("io", {})
125+
all_output_events.update(io.get("output_events", []))
126+
127+
errors: list[str] = []
128+
for comp_name, comp_data in components.items():
129+
io = comp_data.get("io", {})
130+
input_events = set(io.get("input_events", []))
131+
unmatched = input_events - all_output_events
132+
if unmatched:
133+
errors.append(
134+
f"Component '{comp_name}' has input events with no producer: {sorted(unmatched)}"
135+
)
136+
return errors
137+
138+
139+
@validator
140+
def validate_no_unresolved_cycles(
141+
process_dict: dict[str, _t.Any],
142+
) -> list[str]:
143+
"""Check for circular connections that are not resolved by initial values.
144+
145+
Circular loops are only valid if there are ``initial_values`` set on an
146+
appropriate component input within the loop.
147+
148+
Args:
149+
process_dict: The output of ``process.dict()``. Uses the ``"components"``
150+
and ``"connectors"`` keys.
151+
152+
Returns:
153+
List of error messages for unresolved circular connections.
154+
"""
155+
components: dict[str, dict[str, _t.Any]] = process_dict["components"]
156+
connectors: dict[str, dict[str, _t.Any]] = process_dict["connectors"]
157+
158+
graph = _build_component_graph(connectors)
159+
if not graph:
160+
return []
161+
162+
# Build lookup of component initial_values by name
163+
initial_values_by_comp: dict[str, set[str]] = {}
164+
for comp_name, comp_data in components.items():
165+
io = comp_data.get("io", {})
166+
iv = io.get("initial_values", {})
167+
if iv:
168+
initial_values_by_comp[comp_name] = set(iv.keys())
169+
170+
errors: list[str] = []
171+
for cycle in simple_cycles(graph):
172+
cycle_edges = _get_edges_in_cycle(cycle, connectors)
173+
cycle_resolved = False
174+
for edge in cycle_edges:
175+
target_comp = edge["target"]["entity"]
176+
target_field = edge["target"]["descriptor"]
177+
if target_comp in initial_values_by_comp:
178+
if target_field in initial_values_by_comp[target_comp]:
179+
cycle_resolved = True
180+
break
181+
if not cycle_resolved:
182+
cycle_str = " -> ".join(cycle + [cycle[0]])
183+
errors.append(
184+
f"Circular connection detected without initial values: {cycle_str}. "
185+
f"Set initial_values on a component input within the loop to resolve."
186+
)
187+
return errors

0 commit comments

Comments
 (0)