Skip to content

Commit cb1ffda

Browse files
Merge pull request #816 from Pipelex/release/v0.23.7
Release/v0.23.7
2 parents 6692e89 + b749d60 commit cb1ffda

23 files changed

Lines changed: 318 additions & 10 deletions

File tree

.pipelex/pipelex.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ stuff_json_content = true
4242
stuff_text_content = true
4343
stuff_html_content = true
4444
error_stack_traces = true
45+
pipe_and_concept_registry = true
4546

4647
[pipelex.pipeline_execution_config.graph_config.graphs_inclusion]
4748
# Control which graph outputs are generated

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Changelog
22

3+
## [v0.23.7] - 2026-04-06
4+
5+
### Added
6+
7+
- **Graph tracing for pipe run data**: Pipe run data and concept are now included inside the flowchart graph spec, enabling richer visualization of pipe execution results across all pipe types (LLM, extract, compose, search, image gen, sequence, condition, batch, parallel).
8+
- **Assignment pipe**: New `pipe_assignment` pattern for direct value assignment within pipe execution.
9+
310
## [v0.23.6] - 2026-04-06
411

512
### Changed

pipelex/core/pipes/pipe_abstract.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,37 @@ def concept_dependencies(self) -> list[Concept]:
8484

8585
return unique_concepts
8686

87+
def _register_execution_data(self, job_metadata: JobMetadata, execution_data: dict[str, Any]) -> None:
88+
"""Register execution metadata with the graph tracer.
89+
90+
Called by pipe subclasses during execution to capture runtime-resolved data
91+
(rendered prompts, resolved models, etc.) for the GraphSpec.
92+
"""
93+
graph_context = job_metadata.graph_context
94+
if graph_context is None:
95+
return
96+
tracer_manager = GraphTracerManager.get_instance()
97+
if tracer_manager is None or graph_context.parent_node_id is None:
98+
return
99+
tracer_manager.register_execution_data(
100+
graph_id=graph_context.graph_id,
101+
node_id=graph_context.parent_node_id,
102+
execution_data=execution_data,
103+
)
104+
105+
def _make_single_concept_data_for_registry(self, concept: Concept) -> dict[str, Any]:
106+
"""Serialize a single concept for the graph registry, including its JSON Schema."""
107+
concept_dict = concept.model_dump(mode="json")
108+
try:
109+
concept_dict["json_schema"] = concept.get_structure_class().model_json_schema()
110+
except (TypeError, ValueError):
111+
concept_dict["json_schema"] = None
112+
return concept_dict
113+
114+
def _make_concept_data_for_registry(self) -> list[dict[str, Any]]:
115+
"""Serialize all unique concepts from this pipe for the graph registry."""
116+
return [self._make_single_concept_data_for_registry(concept) for concept in self.concept_dependencies]
117+
87118
@field_validator("code", mode="before")
88119
@classmethod
89120
def validate_pipe_code_syntax(cls, code: str) -> str:
@@ -411,13 +442,22 @@ async def run_pipe(
411442
)
412443
input_specs.append(input_spec)
413444

445+
# Serialize pipe and concept data for registries if enabled
446+
pipe_data: dict[str, Any] | None = None
447+
concept_data: list[dict[str, Any]] | None = None
448+
if parent_graph_context.data_inclusion.pipe_and_concept_registry:
449+
pipe_data = self.model_dump(mode="json")
450+
concept_data = self._make_concept_data_for_registry()
451+
414452
graph_node_id, child_graph_context = tracer_manager.on_pipe_start(
415453
graph_context=parent_graph_context,
416454
pipe_code=self.code,
417455
pipe_type=self.type,
418456
node_kind=node_kind,
419457
started_at=started_at,
420458
input_specs=input_specs or None,
459+
pipe_data=pipe_data,
460+
concept_data=concept_data,
421461
)
422462
# Update job metadata with child graph context for nested pipes
423463
if child_graph_context is not None:
@@ -480,11 +520,17 @@ async def run_pipe(
480520
data_html=main_stuff.content.rendered_pretty_html() if parent_graph_context.data_inclusion.stuff_html_content else None,
481521
)
482522

523+
# Serialize output concept for registry if enabled
524+
output_concept_data: dict[str, Any] | None = None
525+
if parent_graph_context.data_inclusion.pipe_and_concept_registry and main_stuff is not None:
526+
output_concept_data = self._make_single_concept_data_for_registry(main_stuff.concept)
527+
483528
tracer_manager.on_pipe_end_success(
484529
graph_id=parent_graph_context.graph_id,
485530
node_id=graph_node_id,
486531
ended_at=datetime.now(timezone.utc),
487532
output_spec=output_spec,
533+
output_concept_data=output_concept_data,
488534
)
489535

490536
pipe_run_params.pop_pipe_from_stack(pipe_code=self.code)
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
"""Base Assignment class for the pipe execution pre_run/run/post_run pattern.
2+
3+
An Assignment carries resolved runtime data through the execution lifecycle:
4+
- Created in prepare_assignment() (pre_run phase): models resolved, prompts rendered
5+
- Consumed in execute() (run phase): pure execution using pre-resolved data
6+
- Finalized in finalize_assignment() (post_run phase): execution metadata captured
7+
8+
Each pipe subclass defines its own Assignment type with operator-specific fields.
9+
"""
10+
11+
from typing import Any
12+
13+
from pydantic import BaseModel, ConfigDict, Field
14+
15+
16+
class PipeAssignment(BaseModel):
17+
"""Base Assignment for pipe execution.
18+
19+
Subclasses add operator-specific fields (rendered prompts, resolved models, etc.).
20+
The execution_data dict collects metadata for the GraphSpec's execution_data field.
21+
"""
22+
23+
model_config = ConfigDict(arbitrary_types_allowed=True)
24+
25+
execution_data: dict[str, Any] = Field(default_factory=dict)

pipelex/graph/graph_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ class DataInclusionConfig(ConfigModel):
1010
stuff_text_content: bool
1111
stuff_html_content: bool
1212
error_stack_traces: bool
13+
pipe_and_concept_registry: bool
1314

1415

1516
class GraphsInclusionConfig(ConfigModel):

pipelex/graph/graph_tracer.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""GraphTracer implementation that builds GraphSpec during pipeline execution."""
22

33
from datetime import datetime, timezone
4+
from typing import Any
45

56
from typing_extensions import override
67

@@ -48,6 +49,7 @@ def __init__(
4849
self.error: ErrorSpec | None = None
4950
self.input_specs: list[IOSpec] = input_specs or []
5051
self.output_specs: list[IOSpec] = []
52+
self.execution_data: dict[str, Any] = {}
5153

5254
def to_node_spec(self) -> NodeSpec:
5355
"""Convert to immutable NodeSpec."""
@@ -76,6 +78,7 @@ def to_node_spec(self) -> NodeSpec:
7678
node_io=node_io,
7779
error=self.error,
7880
metrics=self.metrics,
81+
execution_data=self.execution_data,
7982
)
8083

8184

@@ -113,6 +116,9 @@ def __init__(self) -> None:
113116
# The branch_producer_node_id is snapshotted at registration time, before register_controller_output
114117
# overrides _stuff_producer_map to point branch stuff codes to the controller node
115118
self._parallel_combine_map: dict[str, tuple[str, list[tuple[str, str]]]] = {}
119+
# Registries for pipe and concept data (keyed by pipe_ref and concept_ref)
120+
self._pipe_registry: dict[str, dict[str, Any]] = {}
121+
self._concept_registry: dict[str, dict[str, Any]] = {}
116122

117123
@property
118124
def is_active(self) -> bool:
@@ -143,6 +149,8 @@ def setup(
143149
self._batch_item_map = {}
144150
self._batch_aggregate_map = {}
145151
self._parallel_combine_map = {}
152+
self._pipe_registry = {}
153+
self._concept_registry = {}
146154

147155
return GraphContext(
148156
graph_id=graph_id,
@@ -181,6 +189,8 @@ def teardown(self) -> GraphSpec | None:
181189
pipeline_ref=self._pipeline_ref or PipelineRef(),
182190
nodes=nodes,
183191
edges=self._edges,
192+
pipe_registry=dict(self._pipe_registry),
193+
concept_registry=dict(self._concept_registry),
184194
)
185195

186196
# Reset internal state
@@ -193,6 +203,8 @@ def teardown(self) -> GraphSpec | None:
193203
self._batch_item_map = {}
194204
self._batch_aggregate_map = {}
195205
self._parallel_combine_map = {}
206+
self._pipe_registry = {}
207+
self._concept_registry = {}
196208

197209
return graph
198210

@@ -409,6 +421,8 @@ def on_pipe_start(
409421
node_kind: NodeKind,
410422
started_at: datetime,
411423
input_specs: list[IOSpec] | None = None,
424+
pipe_data: dict[str, Any] | None = None,
425+
concept_data: list[dict[str, Any]] | None = None,
412426
) -> tuple[str, GraphContext]:
413427
"""Record the start of a pipe execution."""
414428
if not self._is_active:
@@ -433,6 +447,18 @@ def on_pipe_start(
433447
)
434448
self._nodes[node_id] = node_data
435449

450+
# Accumulate pipe and concept registry data (deduplicated)
451+
if graph_context.data_inclusion.pipe_and_concept_registry:
452+
if pipe_data is not None:
453+
pipe_ref = f"{pipe_data.get('domain_code', '')}.{pipe_data.get('code', '')}"
454+
if pipe_ref not in self._pipe_registry:
455+
self._pipe_registry[pipe_ref] = pipe_data
456+
if concept_data is not None:
457+
for concept_item in concept_data:
458+
concept_ref = f"{concept_item.get('domain_code', '')}.{concept_item.get('code', '')}"
459+
if concept_ref not in self._concept_registry:
460+
self._concept_registry[concept_ref] = concept_item
461+
436462
# Add containment edge from parent if this is a child pipe
437463
if graph_context.parent_node_id is not None:
438464
self.add_edge(
@@ -449,6 +475,20 @@ def on_pipe_start(
449475

450476
return node_id, child_context
451477

478+
@override
479+
def register_execution_data(
480+
self,
481+
node_id: str,
482+
execution_data: dict[str, Any],
483+
) -> None:
484+
"""Register execution metadata for a node."""
485+
if not self._is_active:
486+
return
487+
node_data = self._nodes.get(node_id)
488+
if node_data is None:
489+
return
490+
node_data.execution_data.update(execution_data)
491+
452492
@override
453493
def on_pipe_end_success(
454494
self,
@@ -457,6 +497,7 @@ def on_pipe_end_success(
457497
output_preview: str | None = None,
458498
metrics: dict[str, float] | None = None,
459499
output_spec: IOSpec | None = None,
500+
output_concept_data: dict[str, Any] | None = None,
460501
) -> None:
461502
"""Record successful completion of a pipe execution."""
462503
if not self._is_active:
@@ -472,6 +513,12 @@ def on_pipe_end_success(
472513
if metrics:
473514
node_data.metrics = metrics
474515

516+
# Accumulate output concept data (deduplicated)
517+
if output_concept_data is not None:
518+
concept_ref = f"{output_concept_data.get('domain_code', '')}.{output_concept_data.get('code', '')}"
519+
if concept_ref not in self._concept_registry:
520+
self._concept_registry[concept_ref] = output_concept_data
521+
475522
# Store output spec and register in producer map for data flow tracking
476523
if output_spec is not None:
477524
# Skip pass-through outputs: if the output digest matches one of the node's

pipelex/graph/graph_tracer_manager.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Graph tracer manager with singleton pattern for access from PipeAbstract without hub imports."""
22

33
from datetime import datetime
4+
from typing import Any
45

56
from pipelex.graph.graph_config import DataInclusionConfig
67
from pipelex.graph.graph_context import GraphContext
@@ -175,6 +176,8 @@ def on_pipe_start(
175176
node_kind: NodeKind,
176177
started_at: datetime,
177178
input_specs: list[IOSpec] | None = None,
179+
pipe_data: dict[str, Any] | None = None,
180+
concept_data: list[dict[str, Any]] | None = None,
178181
) -> tuple[str | None, GraphContext | None]:
179182
"""Record the start of a pipe execution.
180183
@@ -185,6 +188,8 @@ def on_pipe_start(
185188
node_kind: The kind of node (controller, operator, etc.).
186189
started_at: When the pipe started executing.
187190
input_specs: Optional list of IOSpec describing the inputs consumed.
191+
pipe_data: Optional serialized pipe instance for the pipe registry.
192+
concept_data: Optional list of serialized concept dicts for the concept registry.
188193
189194
Returns:
190195
Tuple of (node_id, child_graph_context) if tracing is active, (None, None) otherwise.
@@ -200,6 +205,8 @@ def on_pipe_start(
200205
node_kind=node_kind,
201206
started_at=started_at,
202207
input_specs=input_specs,
208+
pipe_data=pipe_data,
209+
concept_data=concept_data,
203210
)
204211

205212
def on_pipe_end_success(
@@ -210,6 +217,7 @@ def on_pipe_end_success(
210217
output_preview: str | None = None,
211218
metrics: dict[str, float] | None = None,
212219
output_spec: IOSpec | None = None,
220+
output_concept_data: dict[str, Any] | None = None,
213221
) -> None:
214222
"""Record successful completion of a pipe execution.
215223
@@ -220,6 +228,7 @@ def on_pipe_end_success(
220228
output_preview: Optional truncated preview of the output.
221229
metrics: Optional metrics (e.g., token counts).
222230
output_spec: Optional IOSpec describing the output produced.
231+
output_concept_data: Optional serialized concept dict for the actual output concept.
223232
"""
224233
if node_id is None:
225234
return
@@ -234,8 +243,29 @@ def on_pipe_end_success(
234243
output_preview=output_preview,
235244
metrics=metrics,
236245
output_spec=output_spec,
246+
output_concept_data=output_concept_data,
237247
)
238248

249+
def register_execution_data(
250+
self,
251+
graph_id: str,
252+
node_id: str | None,
253+
execution_data: dict[str, Any],
254+
) -> None:
255+
"""Register execution metadata for a node.
256+
257+
Args:
258+
graph_id: The graph identifier.
259+
node_id: The node ID to attach execution data to.
260+
execution_data: Dictionary of execution metadata.
261+
"""
262+
if node_id is None:
263+
return
264+
tracer = self._get_tracer(graph_id)
265+
if tracer is None:
266+
return
267+
tracer.register_execution_data(node_id=node_id, execution_data=execution_data)
268+
239269
def on_pipe_end_error(
240270
self,
241271
graph_id: str,

0 commit comments

Comments
 (0)