diff --git a/Dockerfile.jupyter b/Dockerfile.jupyter index d5138c6..271bfdc 100644 --- a/Dockerfile.jupyter +++ b/Dockerfile.jupyter @@ -1,13 +1,14 @@ FROM python:3.12-slim ENV PATH="/root/.local/bin:$PATH" +ENV PYTHONPATH="/app/flightpaths/flights" # Used for the notebook server WORKDIR /app # pipx needed for uv installation script # ssh client needed for installing private modelbench dependencies # git needed dvc -RUN apt-get update && apt-get install -y pipx openssh-client git && \ +RUN apt-get update && apt-get install -y pipx openssh-client git graphviz && \ pipx install uv COPY pyproject.toml uv.lock README.md ./ diff --git a/pyproject.toml b/pyproject.toml index 305a024..e9b4f4a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ dependencies = [ "scikit-learn>=1.5.0,<2.0.0", "pandas>=2.2.2,<4", "modelbench @ git+https://github.com/mlcommons/modelbench.git", + "graphviz>=0.20,<1", ] [project.scripts] diff --git a/src/modelplane/evaluator/annotator.py b/src/modelplane/evaluator/annotator.py new file mode 100644 index 0000000..4371a77 --- /dev/null +++ b/src/modelplane/evaluator/annotator.py @@ -0,0 +1,32 @@ +from modelgauge.annotator import Annotator +from modelgauge.prompt import ChatPrompt, TextPrompt +from modelgauge.prompt_formatting import format_chat +from modelgauge.sut import SUTResponse + +from modelplane.evaluator.context import EvalContext +from modelplane.evaluator.dag import EvaluatorDAG +from modelplane.evaluator.outputs import Output + + +class DAGAnnotator(Annotator): + """Annotator that executes a DAG.""" + + def __init__(self, uid: str, dag: EvaluatorDAG) -> None: + super().__init__(uid) + self.dag = dag + + def translate_prompt( + self, + prompt: TextPrompt | ChatPrompt, + response: SUTResponse, + ) -> EvalContext: + prompt_str = ( + prompt.text if isinstance(prompt, TextPrompt) else format_chat(prompt) + ) + return EvalContext( + prompt=prompt_str, + response=response.text, + ) + + def annotate(self, annotation_request: EvalContext) -> Output: + return self.dag.run(annotation_request) diff --git a/src/modelplane/evaluator/context.py b/src/modelplane/evaluator/context.py new file mode 100644 index 0000000..c40c148 --- /dev/null +++ b/src/modelplane/evaluator/context.py @@ -0,0 +1,20 @@ +from typing import Any, Optional + + +class EvalContext: + """Context state passed around during DAG execution.""" + + def __init__( + self, prompt: str, response: str, metadata: Optional[dict[str, Any]] = None + ) -> None: + self.prompt = prompt + self.response = response + self.metadata = metadata or {} + self._parent_outputs = {} + + def set_parent_outputs(self, outputs: dict[str, Any]) -> None: + self._parent_outputs = outputs + + def parent_outputs(self) -> list[Any]: + """Return the NodeOutput for a specific node, or None if it was skipped.""" + return list(self._parent_outputs.values()) diff --git a/src/modelplane/evaluator/dag.py b/src/modelplane/evaluator/dag.py new file mode 100644 index 0000000..b6f093b --- /dev/null +++ b/src/modelplane/evaluator/dag.py @@ -0,0 +1,547 @@ +"""DAGAnnotator and EvaluatorDAG implementation.""" + +import collections +import functools +import os +from concurrent.futures import ThreadPoolExecutor +from itertools import product +from typing import Any, Optional + +import pandas as pd + +from modelplane.evaluator.context import EvalContext +from modelplane.evaluator.nodes import Arbiter, EvaluatorDAGNode, Gate, Output + + +def requires_validate_and_build(method): + @functools.wraps(method) + def wrapper(self, *args, **kwargs): + self._validate_and_build() + return method(self, *args, **kwargs) + + return wrapper + + +class EvaluatorDAG: + """DAG of EvaluatorNodes. + + Usage: + + refusal_gate = MyRefusalGate("RefusalGate", routes_true=[Score(value=1)], routes_false=["NonRefusal"]) + eval_non_refusal = MyNonRefusalEvaluator("NonRefusal", routes=["Arbiter"]) + arbiter = MyArbiter("Arbiter") + + dag = ( + EvaluatorDAG("refusal_gated_safety_evaluator", output_type=Safety) + .add_node(refusal_gate) + .add_node(eval_non_refusal) + .add_node(arbiter) + ) + + # run single + result = dag.run(prompt_uid="123", prompt="...", response="...") + # run batch + results_df = dag.run_dataframe(df) + """ + + DATAFRAME_OUTPUT_COL = "output" + + def __init__(self, name: str, output_type: type) -> None: + self.name = name + self._nodes: dict[str, EvaluatorDAGNode] = {} + self._root_nodes: list[str] = [] + self._ordered: list[str] = [] + self._validated: bool = False + self._predecessors: dict[str, list[str]] = collections.defaultdict(list) + if not issubclass(output_type, Output): + raise ValueError("output_type must be a subclass of Output.") + self._output_type = output_type + + @property + def output_type(self) -> type: + return self._output_type + + def add_node( + self, + node: EvaluatorDAGNode, + ) -> "EvaluatorDAG": + """Register a node with its routes.""" + + if node.name in self._nodes: + raise ValueError( + f"A different node named {node.name} is already registered." + ) + self._nodes[node.name] = node + self._validated = False + return self + + def _validate_and_build(self) -> None: + """ + Validate the DAG: + - All routes reference registered nodes or instances of the output type. + - No cycles. + - All paths lead to an instance of the output type. + + Build: + - _predecessors: dict mapping node name to list of parent node names (for context during execution) + - _root_nodes: list of node names with no incoming routes (starting points) + - _ordered: list of node names in topological order (valid execution order) + """ + # skip validation if we've already done it and the DAG hasn't changed + if self._validated: + return + + # check that all route targets reference registered nodes or instances + # of the output type, and that all Arbiters have compatible output types + for node_name, node in self._nodes.items(): + if isinstance(node, Arbiter): + if not issubclass(node.output_type, self.output_type): + raise ValueError( + f"Node {node_name} is an Arbiter with output_type {node.output_type.__name__}, which is not compatible with the DAG's output_type {self.output_type.__name__}." + ) + for target in node.all_routes(): + if target not in self._nodes and not isinstance( + target, self.output_type + ): + raise ValueError( + f"Node {node_name} routes to unregistered node {target} or incompatible output." + ) + + # check for cycles (kahn's algorithm) + all_routes = {name: node.all_routes() for name, node in self._nodes.items()} + in_degree: dict[str, int] = {n: 0 for n in self._nodes} + for routes in all_routes.values(): + for route in routes: + if isinstance(route, Output): + continue + in_degree[route] += 1 + + root_nodes = [n for n in self._nodes if in_degree[n] == 0] + queue = collections.deque(root_nodes) + ordered: list[str] = [] + while queue: + current = queue.popleft() + ordered.append(current) + for child in all_routes.get(current, []): + if isinstance(child, Output): + continue + in_degree[child] -= 1 + if in_degree[child] == 0: + queue.append(child) + + if len(ordered) != len(self._nodes): + nodes_in_cycle = set(self._nodes) - set(ordered) + raise ValueError(f"DAG contains a cycle. Nodes in cycle: {nodes_in_cycle}") + + # build predecessors + for name, node in self._nodes.items(): + for target in node.all_routes(): + if isinstance(target, Output): + continue + self._predecessors[target].append(name) + + self._validated = True + self._root_nodes = root_nodes + self._ordered = ordered + + def _run_traced( + self, ctx: EvalContext + ) -> tuple[Output, dict[str, Any], set[tuple[str, str]]]: + """Execute the DAG and return (final output, node outputs, traversed edges).""" + node_outputs: dict[str, Any] = {} + traversed_edges: set[tuple[str, str]] = set() + reachable: set[str] = set(self._root_nodes) + for node_name in self._ordered: + if node_name not in reachable: + continue + ctx.set_parent_outputs( + { + pred: node_outputs[pred] + for pred in self._predecessors[node_name] + if pred in node_outputs + } + ) + node = self._nodes[node_name] + output = node.run(ctx) + node_outputs[node_name] = output + if isinstance(output, Output): + traversed_edges.add((node_name, output.name)) + return output, node_outputs, traversed_edges + for target in node.next_nodes(output): + t = target if isinstance(target, str) else target.name + traversed_edges.add((node_name, t)) + if isinstance(target, Output): + return target, node_outputs, traversed_edges + reachable.add(t) + raise ValueError("DAG execution completed without reaching an Output node.") + + @requires_validate_and_build + def run( + self, + ctx: EvalContext, + ) -> Output: + """Execute the DAG on a single prompt/response.""" + output, _, _ = self._run_traced(ctx) + return output + + @requires_validate_and_build + def run_dataframe( + self, + df: pd.DataFrame, + prompt_col: str = "prompt", + response_col: str = "response", + n_jobs: int = 1, + ) -> pd.DataFrame: + """Run the DAG over every row of a DataFrame.""" + + def _run_row(row: Any) -> Output: + ctx = EvalContext( + prompt=str(row[prompt_col]), + response=str(row[response_col]), + ) + return self.run(ctx) + + rows = [row for _, row in df.iterrows()] + + if n_jobs == 1: + records = [_run_row(row) for row in rows] + else: + max_workers = os.cpu_count() if n_jobs == -1 else n_jobs + with ThreadPoolExecutor(max_workers=max_workers) as executor: + records = list(executor.map(_run_row, rows)) + + result_df = pd.DataFrame( + {self.DATAFRAME_OUTPUT_COL: [r.name for r in records]}, index=df.index + ) + return pd.concat([df, result_df], axis=1) + + @requires_validate_and_build + def total_cost(self, ctx: Optional[EvalContext] = None) -> float: + """Run the DAG on ctx and return the total cost of the executed path.""" + if ctx is None: + ctx = EvalContext(prompt="", response="") + _, node_outputs, _ = self._run_traced(ctx) + total = 0.0 + for node_name in node_outputs: + node = self._nodes[node_name] + total += node.cost(ctx) + return total + + @requires_validate_and_build + def total_costs(self) -> dict[str, float]: + """Run the DAG on all terminal paths and report total costs per path.""" + ctx = EvalContext(prompt="", response="") + gates = [name for name, node in self._nodes.items() if isinstance(node, Gate)] + path_costs: dict[str, float] = {} + + for combo in product([True, False], repeat=len(gates)): + gate_outcomes = dict(zip(gates, combo)) + reachable: set[str] = set(self._root_nodes) + path: list[str] = [] + total = 0.0 + + for node_name in self._ordered: + if node_name not in reachable: + continue + node = self._nodes[node_name] + total += node.cost(ctx) + path.append(node_name) + if isinstance(node, Gate): + targets = ( + node.routes_true + if gate_outcomes[node_name] + else node.routes_false + ) + elif isinstance(node, Arbiter): + targets = [] + else: + targets = node.routes + for target in targets: + if not isinstance(target, Output): + reachable.add( + target if isinstance(target, str) else target.name + ) + + base_path = " -> ".join(path) + path_costs[f"{base_path} -> Out ({self.output_type.__name__})"] = total + + return path_costs + + def _visualize( + self, + node_outputs: Optional[dict[str, Any]] = None, + traversed_edges: Optional[set[tuple[str, str]]] = None, + final_output: Optional[Output] = None, + ctx: Optional[EvalContext] = None, + ): + """Render the DAG as a PNG image. In a Jupyter notebook the image is displayed inline. + + When node_outputs/traversed_edges/final_output are provided (via visualize_run), + the hot path is highlighted and each node shows its output value. + + NOTE: this helper method is vibe-coded and provided as-is. + """ + import graphviz + from IPython.display import Image + + traced = node_outputs is not None + + _NODE_STYLES: dict[type, dict] = { + Gate: {"shape": "diamond", "style": "filled", "fillcolor": "#ffe082"}, + Arbiter: {"shape": "hexagon", "style": "filled", "fillcolor": "#e1bee7"}, + Output: { + "shape": "rectangle", + "style": "filled,rounded", + "fillcolor": "#dcedc8", + }, + } + _OUTPUT_TYPE_STYLE = { + "shape": "rectangle", + "style": "filled,rounded,dashed", + "fillcolor": "#dcedc8", + } + _DEFAULT_STYLE = { + "shape": "rectangle", + "style": "filled", + "fillcolor": "#eeeeee", + } + _DIM = { + "style": "filled", + "fillcolor": "#f0f0f0", + "color": "#bbbbbb", + "fontcolor": "#aaaaaa", + } + + _NODE_W, _NODE_H = 1.5, 0.5 # inches, fixed for all nodes + + def _fontsize( + label: str, max_fs: float = 11.0, min_fs: float = 7.0, fill: float = 0.8 + ) -> str: + """Scale font size so the longest line fits within _NODE_W. + + fill: fraction of the node width usable for text. Shapes like diamonds, + hexagons, and parallelograms have less usable area than rectangles, so + pass a smaller fill value for those. + """ + longest = max((len(line) for line in label.split("\n")), default=1) + # approx: each char ≈ 0.55 × fontsize points + fs = (_NODE_W * 72 * fill) / (longest * 0.55) + return f"{max(min_fs, min(max_fs, fs)):.1f}" + + dot = graphviz.Digraph(name=self.name) + dot.attr( + label=self.name, + labelloc="t", + fontsize="13", + fontname="Helvetica", + rankdir="LR", + ranksep="0.5", + nodesep="0.4", + ) + dot.attr( + "node", + fontname="Helvetica", + fontsize="11", + width=str(_NODE_W), + height=str(_NODE_H), + fixedsize="true", + ) + dot.attr("edge", fontname="Helvetica", fontsize="9") + + # implicit input node pinned to the left + top = graphviz.Digraph() + top.attr(rank="min") + + def _truncate(s: str, n: int = 24) -> str: + return s if len(s) <= n else s[: n - 1] + "…" + + if ctx is not None: + input_label = f"p: {_truncate(ctx.prompt)}\nr: {_truncate(ctx.response)}" + else: + input_label = "prompt\nresponse" + top.node( + "__input__", + input_label, + shape="parallelogram", + style="filled", + fillcolor="#b2dfdb", + color="#4db6ac", + fontcolor="#00695c", + fontsize=_fontsize(input_label, fill=0.45), + ) + dot.subgraph(top) + + # collect Output instances directly referenced in routes (from non-Arbiter nodes) + direct_outputs: dict[str, Output] = {} + has_arbiter = any(isinstance(n, Arbiter) for n in self._nodes.values()) + for node in self._nodes.values(): + if not isinstance(node, Arbiter): + for target in node.all_routes(): + if isinstance(target, Output): + direct_outputs[target.name] = target + + # whether the final output came from a direct route or an arbiter + final_from_direct = traced and final_output in direct_outputs.values() + + bottom = graphviz.Digraph() + bottom.attr(rank="max") + + # individual nodes for directly-routed Output instances, shown with their repr + for out_name, out_inst in direct_outputs.items(): + attrs = dict(_NODE_STYLES[Output]) + if traced: + if out_inst is final_output: + attrs["penwidth"] = "2.5" + else: + attrs = dict(_DIM, shape="rectangle", style="filled,rounded") + bottom.node( + out_name, repr(out_inst), fontsize=_fontsize(repr(out_inst)), **attrs + ) + + # synthetic output type node for Arbiters + if has_arbiter: + output_node_id = f"__output_{self.output_type.__name__}__" + output_label = f"{self.output_type.__name__} (?)" + attrs = dict(_OUTPUT_TYPE_STYLE) + if traced: + if not final_from_direct and final_output is not None: + attrs = dict(_NODE_STYLES[Output]) + attrs["penwidth"] = "2.5" + output_label = repr(final_output) + elif final_from_direct: + attrs = dict(_DIM, shape="rectangle", style="filled,rounded") + bottom.node( + output_node_id, output_label, fontsize=_fontsize(output_label), **attrs + ) + + dot.subgraph(bottom) + + # processing nodes + for node_name, node in self._nodes.items(): + base_style = next( + (s for t, s in _NODE_STYLES.items() if isinstance(node, t)), + _DEFAULT_STYLE, + ) + node_was_active = ( + node_outputs is not None and node_name in node_outputs + ) or ( + traversed_edges is not None + and any(src == node_name for src, _ in traversed_edges) + ) + if traced and not node_was_active: + attrs = dict(_DIM, shape=base_style.get("shape", "box")) + label = node_name + else: + attrs = dict(base_style) + if traced: + raw = node_outputs[node_name] # type: ignore[index] + label = f"{node_name}\n{node.format_output(raw)}" + attrs["penwidth"] = "2.5" + else: + label = node_name + _fill = ( + 0.45 + if isinstance(node, Gate) + else 0.65 if isinstance(node, Arbiter) else 0.8 + ) + dot.node(node_name, label, fontsize=_fontsize(label, fill=_fill), **attrs) + + # edges from implicit input to root nodes + for root in self._root_nodes: + dot.edge("__input__", root, color="#888888") + + # edges between processing nodes + for node_name, node in self._nodes.items(): + if isinstance(node, Gate): + for target in node.routes_true: + t = target if isinstance(target, str) else target.name + hot = not traced or (node_name, t) in traversed_edges # type: ignore[operator] + dot.edge( + node_name, + t, + label=" True", + color="#2e7d32" if hot else "#cccccc", + fontcolor="#2e7d32" if hot else "#cccccc", + penwidth="2" if hot and traced else "1", + ) + for target in node.routes_false: + t = target if isinstance(target, str) else target.name + hot = not traced or (node_name, t) in traversed_edges # type: ignore[operator] + dot.edge( + node_name, + t, + label=" False", + color="#c62828" if hot else "#cccccc", + fontcolor="#c62828" if hot else "#cccccc", + penwidth="2" if hot and traced else "1", + ) + elif isinstance(node, Arbiter): + output_node_id = f"__output_{self.output_type.__name__}__" + hot = not traced or node_name in (node_outputs or {}) + dot.edge( + node_name, + output_node_id, + color="#555555" if hot else "#cccccc", + penwidth="2" if hot and traced else "1", + ) + else: + for target in node.routes: + t = target if isinstance(target, str) else target.name + hot = not traced or (node_name, t) in traversed_edges # type: ignore[operator] + edge_label = "" + if traced and hot and node_name in (node_outputs or {}): + edge_label = f" {node.format_output(node_outputs[node_name])}" # type: ignore[index] + dot.edge( + node_name, + t, + label=edge_label, + color="#555555" if hot else "#cccccc", + fontcolor="#555555" if hot else "#cccccc", + penwidth="2" if hot and traced else "1", + ) + + try: + return Image(dot.pipe(format="png")) + except graphviz.ExecutableNotFound as e: + raise RuntimeError( + "Graphviz system binaries not found. Install them with:\n" + " macOS: brew install graphviz\n" + " Ubuntu: apt-get install graphviz\n" + " conda: conda install graphviz" + ) from e + + @requires_validate_and_build + def visualize(self): + """Render the DAG structure as a PNG image (inline in Jupyter notebooks). + + The graph flows left to right. Node shapes and colors: + - Input — teal parallelogram (implicit; represents the prompt/response pair) + - Gate — amber diamond; edges labelled "True" (green) / "False" (red) + - Enricher — light grey rectangle; edges are unlabelled + - Arbiter — light purple hexagon; edge labelled with the output type name + - Output (direct instance) — soft green rounded rectangle, solid border; + label is repr(output) + - Output (type placeholder) — soft green rounded rectangle, dashed border; + label is the class name; shown when the DAG contains + an Arbiter whose concrete value is only known at runtime + + Raises: + RuntimeError: if the Graphviz system binaries are not installed. + """ + return self._visualize() + + @requires_validate_and_build + def visualize_run(self, ctx: EvalContext): + """Run the DAG on ctx and return a visualization with the executed path highlighted. + + Identical layout to visualize(), with the following additions: + - Active nodes are bolded and show their output value beneath the node name. + - Inactive nodes are greyed out. + """ + final_output, node_outputs, traversed_edges = self._run_traced(ctx) + return self._visualize( + node_outputs=node_outputs, + traversed_edges=traversed_edges, + final_output=final_output, + ctx=ctx, + ) diff --git a/src/modelplane/evaluator/nodes.py b/src/modelplane/evaluator/nodes.py new file mode 100644 index 0000000..7b0d58a --- /dev/null +++ b/src/modelplane/evaluator/nodes.py @@ -0,0 +1,150 @@ +""" +Node types for the EvaluatorDAG pipeline. + +Class hierarchy: + + EvaluatorNode (ABC) + ├── Gate (binary test; routes on True/False) + ├── Enricher (produces arbitary output; routes forward unconditionally) + ├── Arbiter (produces output; routes to outputs only) + └── Output (terminal node; carries a verdict value) +""" + +from abc import ABC, abstractmethod +from typing import Any, Optional, Sequence + +from modelplane.evaluator.context import EvalContext +from modelplane.evaluator.outputs import Output + + +class EvaluatorDAGNode(ABC): + def __init__( + self, + name: str, + routes_true: Optional[Sequence[str | Output]] = None, + routes_false: Optional[Sequence[str | Output]] = None, + routes: Optional[Sequence[str | Output]] = None, + ) -> None: + self.name = name + self._routes_true: tuple[str | Output, ...] = tuple(routes_true or []) + self._routes_false: tuple[str | Output, ...] = tuple(routes_false or []) + self._routes: tuple[str | Output, ...] = tuple(routes or []) + self.validate() + + @property + def routes_true(self) -> tuple[str | Output, ...]: + return self._routes_true + + @property + def routes_false(self) -> tuple[str | Output, ...]: + return self._routes_false + + @property + def routes(self) -> tuple[str | Output, ...]: + return self._routes + + @abstractmethod + def run(self, ctx: EvalContext) -> Any: + """Execute the node and return its output.""" + raise NotImplementedError + + def cost(self, ctx: EvalContext) -> float: + """Return the estimated cost of running this node. Default is 0.0; + override for LLM calls or other expensive operations.""" + return 0.0 + + def __repr__(self) -> str: + return f"{self.name!r}: ({self.__class__.__name__})" + + def format_output(self, output: Any) -> str: + """Convenience method to format the node's output for debugging/visualization.""" + if isinstance(output, float): + return f"{output:.3g}" + s = str(output) + return s if len(s) <= 30 else s[:27] + "..." + + def all_routes(self) -> list[str | Output]: + """Return a list of all route targets from this node.""" + return [*self.routes_true, *self.routes_false, *self.routes] + + def next_nodes(self, output: Any) -> tuple[str | Output, ...]: + """Given the node's output value, return the tuple of next node names to activate.""" + if isinstance(self, Gate): + return self.routes_true if output else self.routes_false + else: + return self.routes + + def validate(self) -> None: + """Validate that the node's routing configuration is consistent with its type.""" + # validate that routes with Outputs only have one Output + for route_list in [self.routes_true, self.routes_false, self.routes]: + output_routes = [r for r in route_list if isinstance(r, Output)] + if len(output_routes) > 1: + raise ValueError( + f"{self!r} has multiple Output routes {output_routes}, which is not allowed." + ) + + +def _validate_binary_routes(node: EvaluatorDAGNode) -> None: + if not node.routes_true or not node.routes_false: + raise ValueError(f"{node!r} requires both routes_true and routes_false") + if node.routes: + raise ValueError( + f"{node!r} should not have routes= (use routes_true= / routes_false=)" + ) + + +def _validate_unary_routes(node: EvaluatorDAGNode) -> None: + if not node.routes: + raise ValueError(f"{node!r} requires routes=") + if node.routes_true or node.routes_false: + raise ValueError( + f"{node!r} should not have routes_true= / routes_false= (use routes=)" + ) + + +def _validate_terminal(node: EvaluatorDAGNode) -> None: + if node.routes_true or node.routes_false or node.routes: + raise ValueError(f"{node!r} is terminal and cannot have routing kwargs") + + +class Gate(EvaluatorDAGNode): + """Binary test node.""" + + @abstractmethod + def run(self, ctx: EvalContext) -> bool: + """Return True or False to indicate which route to take from this gate.""" + + def validate(self) -> None: + super().validate() + _validate_binary_routes(self) + + +class Enricher(EvaluatorDAGNode): + """Context transformation node.""" + + @abstractmethod + def run(self, ctx: EvalContext) -> Any: + """Return data representing the enriched context.""" + + def validate(self) -> None: + super().validate() + _validate_unary_routes(self) + + +class Arbiter(EvaluatorDAGNode): + """Takes context and returns an Output indicating the final verdict (based on routes).""" + + @abstractmethod + def run(self, ctx: EvalContext) -> Output: + """Return an Output indicating the final verdict.""" + + def validate(self) -> None: + super().validate() + _validate_terminal(self) + + @property + @abstractmethod + def output_type(self) -> type: + """Return the expected type of the Output's value for validation.""" + raise NotImplementedError diff --git a/src/modelplane/evaluator/outputs.py b/src/modelplane/evaluator/outputs.py new file mode 100644 index 0000000..9810492 --- /dev/null +++ b/src/modelplane/evaluator/outputs.py @@ -0,0 +1,11 @@ +from abc import abstractmethod + + +class Output: + @property + @abstractmethod + def name(self) -> str: + """Return a string name for this output, used for routing and debugging.""" + + def __repr__(self) -> str: + return f"{self.__class__.__name__} ({self.name})" diff --git a/src/modelplane/evaluator/safety.py b/src/modelplane/evaluator/safety.py new file mode 100644 index 0000000..0bf5dbf --- /dev/null +++ b/src/modelplane/evaluator/safety.py @@ -0,0 +1,55 @@ +from modelgauge.annotation import SafetyAnnotation +from modelgauge.annotator import Annotator, SUTResponse, TextPrompt + +from modelplane.evaluator.annotator import DAGAnnotator +from modelplane.evaluator.context import EvalContext +from modelplane.evaluator.dag import EvaluatorDAG +from modelplane.evaluator.nodes import Arbiter +from modelplane.evaluator.outputs import Output + + +class Safety(Output): + + def __init__(self, is_safe: bool) -> None: + self.is_safe = is_safe + + @property + def name(self) -> str: + return "SAFE" if self.is_safe else "UNSAFE" + + +class SafetyArbiter(Arbiter): + @property + def output_type(self) -> type: + return Safety + + +class SafetyDAGAnnotator(DAGAnnotator): + """Implementation of DAGAnnotator that produces a SafetyAnnotation.""" + + def __init__(self, uid: str, dag: EvaluatorDAG) -> None: + super().__init__(uid, dag) + if not issubclass(dag.output_type, Safety): + raise ValueError("All outputs of the DAG must be of type Safety.") + + def translate_response( + self, + request: EvalContext, + response: Safety, + ) -> SafetyAnnotation: + """Map DAGResult verdict to a SafetyAnnotation (is_safe bool).""" + return SafetyAnnotation(is_safe=response.is_safe) + + +class AnnotatorArbiter(SafetyArbiter): + """Arbiter that outputs SAFE or UNSAFE based on the output of a (safety) Annotator.""" + + def __init__(self, name: str, annotator: Annotator) -> None: + super().__init__(name=name) + self.annotator = annotator + + def run(self, ctx: EvalContext) -> Safety: + prompt = TextPrompt(text=ctx.prompt) + response = SUTResponse(text=ctx.response) + annotation = self.annotator.process(prompt, response) + return Safety(is_safe=annotation.is_safe) diff --git a/tests/unit/evaluator/__init__.py b/tests/unit/evaluator/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/evaluator/conftest.py b/tests/unit/evaluator/conftest.py new file mode 100644 index 0000000..2da1064 --- /dev/null +++ b/tests/unit/evaluator/conftest.py @@ -0,0 +1,201 @@ +"""Shared mock node implementations and helpers for evaluator tests.""" + +import os + +import pytest + +from modelplane.evaluator.context import EvalContext +from modelplane.evaluator.dag import EvaluatorDAG +from modelplane.evaluator.outputs import Output +from modelplane.evaluator.safety import Safety + +from .mocks import ( + AlwaysFalse, + AlwaysSafe, + AlwaysTrue, + AlwaysUnsafe, + BadArbiter, + FixedScorer, + LLMEnricher, + LowerCaser, + LowerCaseScorer, + PromptLengthGate, + ThresholdArbiter, + UnexpectedArbiter, + UnexpectedOutput, + UpperCaser, + UpperCaseScorer, +) + +TRUE_BRANCH: tuple[str | Output] = ("true_branch",) +FALSE_BRANCH: tuple[str | Output] = ("false_branch",) +DEFAULT_BRANCH: tuple[str | Output] = ("next_node",) +BAD_BRANCH: tuple[str | Output] = ("undefined_node",) +SCORE1 = 1.0 +SCORE2 = 2.0 + +skip_in_ci = pytest.mark.skipif(os.getenv("CI") == "true", reason="skipped in CI") + + +@pytest.fixture +def always_true_gate() -> AlwaysTrue: + return AlwaysTrue( + name="always_true", routes_true=TRUE_BRANCH, routes_false=FALSE_BRANCH + ) + + +@pytest.fixture +def bad_gate() -> AlwaysTrue: + return AlwaysTrue( + name="bad_gate", routes_true=BAD_BRANCH, routes_false=FALSE_BRANCH + ) + + +@pytest.fixture +def always_false_gate() -> AlwaysFalse: + return AlwaysFalse( + name="always_false", routes_true=TRUE_BRANCH, routes_false=FALSE_BRANCH + ) + + +@pytest.fixture +def lower_caser() -> LowerCaser: + return LowerCaser(name="lower_caser", routes=DEFAULT_BRANCH) + + +@pytest.fixture +def score_1() -> FixedScorer: + return FixedScorer(name="score_1", value=SCORE1, routes=DEFAULT_BRANCH) + + +@pytest.fixture +def score_2() -> FixedScorer: + return FixedScorer(name="score_2", value=SCORE2, routes=DEFAULT_BRANCH) + + +@pytest.fixture +def costly_enricher() -> LLMEnricher: + return LLMEnricher(name="costly_enricher", routes=DEFAULT_BRANCH) + + +@pytest.fixture +def sample_ctx() -> EvalContext: + return EvalContext(prompt="Hello, world", response="This is a response.") + + +@pytest.fixture +def always_unsafe() -> AlwaysUnsafe: + return AlwaysUnsafe(name="always_unsafe") + + +@pytest.fixture +def always_safe() -> AlwaysSafe: + return AlwaysSafe(name="always_safe") + + +@pytest.fixture +def threshold_arbiter() -> ThresholdArbiter: + return ThresholdArbiter(name="threshold_arbiter", threshold=1.5) + + +@pytest.fixture +def one_step_dag(): + return ( + EvaluatorDAG("one_step", output_type=Safety) + .add_node( + AlwaysFalse( + name="gate", + routes_true=[Safety(is_safe=True)], + routes_false=["always_unsafe"], + ) + ) + .add_node(AlwaysUnsafe(name="always_unsafe")) + ) + + +@pytest.fixture +def simple_dag(): + return ( + EvaluatorDAG("simple", output_type=Safety) + .add_node( + AlwaysTrue( + name="always_true", + routes_true=["lower_caser", "prompt_parity"], + routes_false=["always_safe"], + ) + ) + .add_node(AlwaysSafe(name="always_safe")) + .add_node( + PromptLengthGate( + name="prompt_parity", + routes_true=[Safety(is_safe=False)], + routes_false=["upper_caser"], + ) + ) + .add_node( + LowerCaser(name="lower_caser", routes=["lower_scorer", "upper_scorer"]) + ) + .add_node( + UpperCaser(name="upper_caser", routes=["lower_scorer", "upper_scorer"]) + ) + .add_node(LowerCaseScorer(name="lower_scorer", routes=["threshold_arbiter"])) + .add_node(UpperCaseScorer(name="upper_scorer", routes=["threshold_arbiter"])) + .add_node(ThresholdArbiter(name="threshold_arbiter", threshold=0.5)) + ) + + +@pytest.fixture() +def bad_dag_with_cycle(): + return ( + EvaluatorDAG("cyclic", output_type=Safety) + .add_node( + AlwaysTrue( + name="node1", + routes_true=["node2"], + routes_false=["node3"], + ) + ) + .add_node( + AlwaysTrue( + name="node2", + routes_true=["node3"], + routes_false=["node1"], + ) + ) + .add_node( + AlwaysTrue( + name="node3", + routes_true=[Safety(is_safe=True)], + routes_false=[Safety(is_safe=False)], + ) + ) + ) + + +@pytest.fixture +def bad_dag_with_undefined_output(simple_dag): + bad_arbiter = UnexpectedArbiter(name="arbiter") + simple_dag.add_node(bad_arbiter) + return simple_dag + + +@pytest.fixture +def bad_dag_with_bad_arbiter(): + dag = EvaluatorDAG("test", output_type=Safety) + dag.add_node(BadArbiter(name="bad_arbiter")) + return dag + + +@pytest.fixture +def bad_one_step_dag(): + return ( + EvaluatorDAG("one_step", output_type=Safety) + .add_node( + AlwaysFalse( + name="gate", + routes_true=[UnexpectedOutput()], + routes_false=["always_unsafe"], + ) + ) + .add_node(AlwaysUnsafe(name="always_unsafe")) + ) diff --git a/tests/unit/evaluator/mocks.py b/tests/unit/evaluator/mocks.py new file mode 100644 index 0000000..0e1307d --- /dev/null +++ b/tests/unit/evaluator/mocks.py @@ -0,0 +1,178 @@ +from modelplane.evaluator.context import EvalContext +from modelplane.evaluator.nodes import Arbiter, Enricher, Gate +from modelplane.evaluator.outputs import Output +from modelplane.evaluator.safety import Safety + + +class PassthroughGate(Gate): + ROUTE_TO_TAKE: bool + + def run(self, ctx: EvalContext) -> bool: + return self.ROUTE_TO_TAKE + + +class AlwaysTrue(PassthroughGate): + ROUTE_TO_TAKE = True + + def cost(self, ctx: EvalContext) -> float: + return 0.1 + + +class AlwaysFalse(PassthroughGate): + ROUTE_TO_TAKE = False + + def cost(self, ctx: EvalContext) -> float: + return 0.2 + + +class PromptLengthGate(Gate): + def run(self, ctx: EvalContext) -> bool: + return len(ctx.prompt) % 2 == 0 + + def cost(self, ctx: EvalContext) -> float: + return 0.3 + + +class LowerCaser(Enricher): + """Enriches by returning the response lowercased.""" + + def run(self, ctx: EvalContext) -> str: + return ctx.response.lower() + + def cost(self, ctx: EvalContext) -> float: + return 0.4 + + +class UpperCaser(Enricher): + """Enriches by returning the response uppercased.""" + + def run(self, ctx: EvalContext) -> str: + return ctx.response.upper() + + def cost(self, ctx: EvalContext) -> float: + return 0.5 + + +class LLMEnricher(Enricher): + + def run(self, ctx: EvalContext) -> str: + return ctx.response + + def cost(self, ctx: EvalContext) -> float: + return 0.6 + + +class FixedScorer(Enricher): + """Returns a fixed float score regardless of context.""" + + def __init__(self, name: str, value: float, **kwargs): + super().__init__(name, **kwargs) + self.value = value + + def run(self, ctx: EvalContext) -> float: + return self.value + + def cost(self, ctx: EvalContext) -> float: + return 0.7 + + +class LowerCaseScorer(Enricher): + """Scores based on the percentage of lowercase characters in the response.""" + + def run(self, ctx: EvalContext) -> float: + if not ctx.response: + return 0.0 + num_lower = sum(1 for c in ctx.response if c.islower()) + return num_lower / len(ctx.response) + + def cost(self, ctx: EvalContext) -> float: + return 0.8 + + +class UpperCaseScorer(Enricher): + """Scores based on the percentage of uppercase characters in the response.""" + + def run(self, ctx: EvalContext) -> float: + if not ctx.response: + return 0.0 + num_upper = sum(1 for c in ctx.response if c.isupper()) + return num_upper / len(ctx.response) + + def cost(self, ctx: EvalContext) -> float: + return 0.9 + + +class AlwaysUnsafe(Arbiter): + def run(self, ctx: EvalContext) -> Output: + return Safety(is_safe=False) + + def cost(self, ctx: EvalContext) -> float: + return 1.0 + + @property + def output_type(self) -> type: + return Safety + + +class AlwaysSafe(Arbiter): + def run(self, ctx: EvalContext) -> Output: + return Safety(is_safe=True) + + def cost(self, ctx: EvalContext) -> float: + return 1.1 + + @property + def output_type(self) -> type: + return Safety + + +class ThresholdArbiter(Arbiter): + def __init__(self, name: str, threshold: float, **kwargs): + super().__init__(name, **kwargs) + self.threshold = threshold + + def run(self, ctx: EvalContext) -> Output: + scores = ctx.parent_outputs() + score = sum(scores) / len(scores) + return Safety(is_safe=score < self.threshold) + + def cost(self, ctx: EvalContext) -> float: + return 1.2 + + @property + def output_type(self) -> type: + return Safety + + +class UnexpectedOutput(Output): + @property + def name(self) -> str: + return "UNEXPECTED_OUTPUT" + + +class UnexpectedArbiter(Arbiter): + """An arbiter that returns an output not declared in outputs().""" + + def run(self, ctx: EvalContext) -> Output: + return UnexpectedOutput() + + def cost(self, ctx: EvalContext) -> float: + return 1.3 + + @property + def output_type(self) -> type: + return UnexpectedOutput + + +class BadArbiter(Arbiter): + """An arbiter that violates the contract by returning a non-Output value.""" + + def run(self, ctx: EvalContext) -> str: + return "safe" + + def cost(self, ctx: EvalContext) -> float: + return 1.4 + + @property + def output_type(self) -> type: + return Safety diff --git a/tests/unit/evaluator/test_dag.py b/tests/unit/evaluator/test_dag.py new file mode 100644 index 0000000..0158e04 --- /dev/null +++ b/tests/unit/evaluator/test_dag.py @@ -0,0 +1,147 @@ +"""Unit tests for EvaluatorDAG construction, validation, execution, and visualization.""" + +from unittest.mock import patch + +import pandas as pd +import pytest + +from modelplane.evaluator.dag import EvaluatorDAG +from modelplane.evaluator.safety import Safety + +from .conftest import skip_in_ci + + +def test_dag_outputs(simple_dag): + assert simple_dag.output_type == Safety + + +def test_dag_with_bad_output_type(): + with pytest.raises( + ValueError, + match="output_type must be a subclass of Output", + ): + EvaluatorDAG(name="bad_dag", output_type=str) + + +def test_add_node_with_same_name_as_existing_node(simple_dag, always_true_gate): + always_true_gate.name = next(iter(simple_dag._nodes)) + with pytest.raises(ValueError, match="is already registered"): + simple_dag.add_node(always_true_gate) # same name as existing node + + +def test_add_node_with_undefined_target_node(simple_dag, bad_gate): + simple_dag.add_node(bad_gate) + with pytest.raises(ValueError, match="routes to unregistered node"): + simple_dag._validate_and_build() + + +def test_dag_with_cycle(bad_dag_with_cycle): + with pytest.raises(ValueError, match="DAG contains a cycle"): + bad_dag_with_cycle._validate_and_build() + + +def test_dag_with_undefined_output(bad_dag_with_undefined_output): + with pytest.raises( + ValueError, match=r"which is not compatible with the DAG\'s output_type" + ): + bad_dag_with_undefined_output._validate_and_build() + + +def test_dag_with_bad_arbiter(bad_dag_with_bad_arbiter, sample_ctx): + with pytest.raises( + ValueError, + match=r"DAG execution completed without reaching an Output node", + ): + bad_dag_with_bad_arbiter.run(sample_ctx) + + +def test_dag_with_bad_output_route(bad_one_step_dag, sample_ctx): + with pytest.raises( + ValueError, + match=r"incompatible output", + ): + bad_one_step_dag.run(sample_ctx) + + +def test_dag_run(simple_dag, sample_ctx): + result = simple_dag.run(sample_ctx) + assert result.name == "UNSAFE" + + +def test_dag_run_with_dataframe(simple_dag): + # "hello world" (space lowers avg below threshold) → safe + # "helloworld" (no space, avg = 0.5 = threshold) → unsafe + # Alternate even/odd prompt lengths to exercise both enricher paths. + df = pd.DataFrame( + { + "prompt": ["a", "ab", "abc", "abcd"], # odd, even, odd, even + "response": ["hello world", "helloworld", "hello world", "helloworld"], + } + ) + result_df = simple_dag.run_dataframe(df) + + assert len(result_df) == len(df) + assert "prompt" in result_df.columns + assert "response" in result_df.columns + verdicts = result_df[simple_dag.DATAFRAME_OUTPUT_COL].tolist() + expected_verdicts = ["SAFE", "UNSAFE", "SAFE", "UNSAFE"] + assert verdicts == expected_verdicts + + +def test_dag_run_with_dataframe_parallel(simple_dag): + df = pd.DataFrame( + { + "prompt": ["a", "ab", "abc", "abcd"], # odd, even, odd, even + "response": ["hello world", "helloworld", "hello world", "helloworld"], + } + ) + result_df = simple_dag.run_dataframe(df, n_jobs=-1) + + assert len(result_df) == len(df) + assert "prompt" in result_df.columns + assert "response" in result_df.columns + verdicts = result_df[simple_dag.DATAFRAME_OUTPUT_COL].tolist() + expected_verdicts = ["SAFE", "UNSAFE", "SAFE", "UNSAFE"] + assert verdicts == expected_verdicts + + +def test_dag_cost_one_path(simple_dag, sample_ctx): + cost = simple_dag.total_cost(sample_ctx) + # lower_caser and prompt_parity are at the same level from always_true + assert cost == 0.8 + cost = simple_dag.total_cost() + assert cost == 0.8 + + +def test_dag_cost_all_paths(simple_dag): + costs = simple_dag.total_costs() + assert costs == pytest.approx( + { + "always_true -> always_safe -> Out (Safety)": 1.2, + "always_true -> lower_caser -> prompt_parity -> lower_scorer -> upper_scorer -> threshold_arbiter -> Out (Safety)": 3.7, + "always_true -> lower_caser -> prompt_parity -> upper_caser -> lower_scorer -> upper_scorer -> threshold_arbiter -> Out (Safety)": 4.2, + } + ) + + +@skip_in_ci +def test_dag_visualize_runs(simple_dag, one_step_dag, sample_ctx): + simple_dag.visualize() + simple_dag.visualize_run(sample_ctx) + one_step_dag.visualize() + one_step_dag.visualize_run(sample_ctx) + + +def test_visualize_raises_when_graphviz_binary_missing(simple_dag): + import graphviz + + with patch.object( + graphviz.Digraph, + "pipe", + side_effect=graphviz.ExecutableNotFound(["dot"]), + ): + with pytest.raises( + RuntimeError, + match="Graphviz system binaries not found", + ): + simple_dag.visualize() diff --git a/tests/unit/evaluator/test_nodes.py b/tests/unit/evaluator/test_nodes.py new file mode 100644 index 0000000..10725a9 --- /dev/null +++ b/tests/unit/evaluator/test_nodes.py @@ -0,0 +1,106 @@ +"""Unit tests for individual EvaluatorDAGNode subclasses.""" + +import pytest + +from modelplane.evaluator.safety import Safety + +from .conftest import DEFAULT_BRANCH, FALSE_BRANCH, SCORE1, SCORE2, TRUE_BRANCH +from .mocks import AlwaysTrue, AlwaysUnsafe, LowerCaser + + +def test_true_routes_to_true_branch(sample_ctx, always_true_gate): + output = always_true_gate.run(sample_ctx) + assert output + assert always_true_gate.next_nodes(output) == TRUE_BRANCH + + +def test_false_routes_to_false_branch(sample_ctx, always_false_gate): + output = always_false_gate.run(sample_ctx) + assert not output + assert always_false_gate.next_nodes(output) == FALSE_BRANCH + + +def test_lower_caser(sample_ctx, lower_caser): + output = lower_caser.run(sample_ctx) + assert output == sample_ctx.response.lower() + assert lower_caser.next_nodes(output) == DEFAULT_BRANCH + + +def test_fixed_scorer(sample_ctx, score_1): + output = score_1.run(sample_ctx) + assert output == SCORE1 + assert score_1.next_nodes(output) == DEFAULT_BRANCH + + +def test_consistent_arbiters(sample_ctx, score_1, score_2, always_unsafe, always_safe): + parent_outputs = {score_1.name: SCORE1, score_2.name: SCORE2} + sample_ctx.set_parent_outputs(parent_outputs) + output = always_unsafe.run(sample_ctx) + assert output.name == "UNSAFE" + output = always_safe.run(sample_ctx) + assert output.name == "SAFE" + + +def test_threshold_arbiter_true(sample_ctx, threshold_arbiter): + sample_ctx.set_parent_outputs({"parent0": SCORE2, "parent1": SCORE2}) + output = threshold_arbiter.run(sample_ctx) + assert output.name == "UNSAFE" + + +def test_threshold_arbiter_false(sample_ctx, threshold_arbiter): + sample_ctx.set_parent_outputs({"parent0": SCORE1, "parent1": SCORE1}) + output = threshold_arbiter.run(sample_ctx) + assert output.name == "SAFE" + + +def test_gate_with_two_outputs(): + with pytest.raises(ValueError, match="has multiple Output routes"): + AlwaysTrue( + name="bad_gate", + routes_true=[Safety(is_safe=True), Safety(is_safe=False)], + routes_false=FALSE_BRANCH, + ) + + +def test_gate_with_no_true_route(): + with pytest.raises(ValueError, match="requires both routes_true and routes_false"): + AlwaysTrue( + name="bad_gate", + routes_false=FALSE_BRANCH, + ) + + +def test_gate_with_routes(): + with pytest.raises(ValueError, match="should not have routes"): + AlwaysTrue( + name="bad_gate", + routes_true=TRUE_BRANCH, + routes_false=FALSE_BRANCH, + routes=DEFAULT_BRANCH, + ) + + +def test_enricher_with_binary_routes(): + with pytest.raises( + ValueError, match="should not have routes_true= / routes_false=" + ): + LowerCaser( + name="bad_enricher", + routes_true=TRUE_BRANCH, + routes=DEFAULT_BRANCH, + ) + + +def test_enricher_with_no_routes(): + with pytest.raises(ValueError, match="requires routes="): + LowerCaser( + name="bad_enricher", + ) + + +def test_arbiter_with_routes(): + with pytest.raises(ValueError, match="is terminal and cannot have routing kwargs"): + AlwaysUnsafe( + name="bad_arbiter", + routes=DEFAULT_BRANCH, + ) diff --git a/tests/unit/evaluator/test_safety.py b/tests/unit/evaluator/test_safety.py new file mode 100644 index 0000000..2999166 --- /dev/null +++ b/tests/unit/evaluator/test_safety.py @@ -0,0 +1,25 @@ +from modelgauge.annotation import SafetyAnnotation +from modelgauge.annotators.demo_annotator import DemoYBadAnnotator +from modelgauge.prompt import TextPrompt +from modelgauge.sut import SUTResponse + +from modelplane.evaluator.safety import AnnotatorArbiter, Safety, SafetyDAGAnnotator + + +def test_safety_annotator_arbiter(sample_ctx): + annotator = DemoYBadAnnotator("demo_annotator") + arbiter = AnnotatorArbiter(name="demo_arbiter", annotator=annotator) + output = arbiter.run(sample_ctx) + assert output.is_safe + assert isinstance(output, Safety) + assert arbiter.output_type == Safety + + +def test_safety_dag_run(simple_dag, sample_ctx): + safety_annotator = SafetyDAGAnnotator("safety", simple_dag) + output = safety_annotator.process( + prompt=TextPrompt(text=sample_ctx.prompt), + response=SUTResponse(text=sample_ctx.response), + ) + assert not output.is_safe + assert isinstance(output, SafetyAnnotation) diff --git a/uv.lock b/uv.lock index 7b2de6e..8864835 100644 --- a/uv.lock +++ b/uv.lock @@ -1975,6 +1975,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/74/16/a4cf06adbc711bd364a73ce043b0b08d8fa5aae3df11b6ee4248bcdad2e0/graphql_relay-3.2.0-py3-none-any.whl", hash = "sha256:c9b22bd28b170ba1fe674c74384a8ff30a76c8e26f88ac3aa1584dd3179953e5", size = 16940, upload-time = "2022-04-16T11:03:43.895Z" }, ] +[[package]] +name = "graphviz" +version = "0.21" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/b3/3ac91e9be6b761a4b30d66ff165e54439dcd48b83f4e20d644867215f6ca/graphviz-0.21.tar.gz", hash = "sha256:20743e7183be82aaaa8ad6c93f8893c923bd6658a04c32ee115edb3c8a835f78", size = 200434, upload-time = "2025-06-15T09:35:05.824Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/4c/e0ce1ef95d4000ebc1c11801f9b944fa5910ecc15b5e351865763d8657f8/graphviz-0.21-py3-none-any.whl", hash = "sha256:54f33de9f4f911d7e84e4191749cac8cc5653f815b06738c54db9a15ab8b1e42", size = 47300, upload-time = "2025-06-15T09:35:04.433Z" }, +] + [[package]] name = "greenlet" version = "3.3.1" @@ -3266,6 +3275,7 @@ source = { editable = "." } dependencies = [ { name = "click" }, { name = "dvc", extra = ["gs"] }, + { name = "graphviz" }, { name = "jsonlines" }, { name = "jupyter" }, { name = "jupyterlab-git" }, @@ -3298,6 +3308,7 @@ test = [ requires-dist = [ { name = "click", specifier = ">=8,<9" }, { name = "dvc", extras = ["gs"], specifier = ">=3.60,<4" }, + { name = "graphviz", specifier = ">=0.20,<1" }, { name = "jsonlines", specifier = ">=4,<5" }, { name = "jupyter", specifier = ">=1,<2" }, { name = "jupyterlab-git" },