Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ class DerivedComponent(BaseComponent):
super(DerivedComponent, self).__init__()


## ...
# ...

dc = DerivedComponent() # ok
```
Expand Down
6 changes: 3 additions & 3 deletions docs-website/docs/concepts/components/supercomponents.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ With this decorator, the `to_dict` and `from_dict` serialization is optional, as
The custom HybridRetriever example SuperComponent below turns your query into embeddings, then runs both a BM25 search and an embedding-based search at the same time. It finally merges those two result sets and returns the combined documents.

```python
## pip install haystack-ai datasets "sentence-transformers>=3.0.0"
# pip install haystack-ai datasets "sentence-transformers>=3.0.0"

from haystack import Document, Pipeline, super_component
from haystack.components.joiners import DocumentJoiner
Expand Down Expand Up @@ -145,7 +145,7 @@ pipeline.add_component("llm", OpenAIChatGenerator())
pipeline.connect("retriever.documents", "prompt_builder.documents")
pipeline.connect("prompt_builder.prompt", "llm.messages")

## Create a super component with simplified input/output mapping
# Create a super component with simplified input/output mapping
wrapper = SuperComponent(
pipeline=pipeline,
input_mapping={
Expand All @@ -157,7 +157,7 @@ wrapper = SuperComponent(
}
)

## Run the pipeline with simplified interface
# Run the pipeline with simplified interface
result = wrapper.run(query="What is the capital of France?")
print(result)
{'replies': [ChatMessage(_role=<ChatRole.ASSISTANT: 'assistant'>,
Expand Down
6 changes: 3 additions & 3 deletions docs-website/docs/concepts/data-classes.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -193,14 +193,14 @@ class StreamingChunk:
```python
from haystack.dataclasses import StreamingChunk, ToolCallDelta, ReasoningContent

## Basic text chunk
# Basic text chunk
chunk = StreamingChunk(
content="Hello world",
start=True,
meta={"model": "gpt-5-mini"},
)

## Tool call chunk
# Tool call chunk
tool_chunk = StreamingChunk(
content="",
tool_calls=[
Expand All @@ -215,7 +215,7 @@ tool_chunk = StreamingChunk(
finish_reason="tool_calls",
)

## Reasoning chunk
# Reasoning chunk
reasoning_chunk = StreamingChunk(
content="",
reasoning=ReasoningContent(
Expand Down
8 changes: 4 additions & 4 deletions docs-website/docs/concepts/data-classes/chatmessage.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -380,10 +380,10 @@ You can no longer directly initialize `ChatMessage` using `role`, `content`, and
```python
from haystack.dataclasses import ChatMessage

## LEGACY - DOES NOT WORK IN 2.9.0
# LEGACY - DOES NOT WORK IN 2.9.0
message = ChatMessage(role=ChatRole.USER, content="Hello!")

## Use the class method instead
# Use the class method instead
message = ChatMessage.from_user("Hello!")
```

Expand All @@ -405,9 +405,9 @@ from haystack.dataclasses import ChatMessage

message = ChatMessage.from_user("Hello!")

## LEGACY - DOES NOT WORK IN 2.9.0
# LEGACY - DOES NOT WORK IN 2.9.0
print(message.content)

## Use the appropriate property instead
# Use the appropriate property instead
print(message.text)
```
10 changes: 5 additions & 5 deletions docs-website/docs/concepts/device-management.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ To use a single device for inference, use either the `ComponentDevice.from_singl
from haystack.utils import ComponentDevice, Device

device = ComponentDevice.from_single(Device.gpu(id=1))
## Alternatively, use a PyTorch device string
# Alternatively, use a PyTorch device string
device = ComponentDevice.from_str("cuda:1")
generator = HuggingFaceLocalGenerator(model="llama2", device=device)
```
Expand Down Expand Up @@ -98,16 +98,16 @@ class MyComponent(Component):
init_params["device"] = ComponentDevice.from_dict(init_params["device"])
return default_from_dict(cls, data)

## Automatically selects a device.
# Automatically selects a device.
c = MyComponent(device=None)

## Uses the first GPU available.
# Uses the first GPU available.
c = MyComponent(device=ComponentDevice.from_str("cuda:0"))

## Uses the CPU.
# Uses the CPU.
c = MyComponent(device=ComponentDevice.from_single(Device.cpu()))

## Allow the component to use multiple devices using a device map.
# Allow the component to use multiple devices using a device map.
c = MyComponent(device=ComponentDevice.from_multiple(DeviceMap({
"layer1": Device.cpu(),
"layer2": Device.gpu(1),
Expand Down
4 changes: 2 additions & 2 deletions docs-website/docs/concepts/experimental-package.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,11 @@ c.run([ChatMessage.from_user("What's an experiment? Be brief.")])
Experiments can also override existing Haystack features. For example, you can opt into an experimental type of `Pipeline` by changing the usual import:

```python
## from haystack import Pipeline
# from haystack import Pipeline
from haystack_experimental import Pipeline

pipe = Pipeline()
## ...
# ...
pipe.run(...)
```

Expand Down
2 changes: 1 addition & 1 deletion docs-website/docs/concepts/jinja-templates.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ template = """
Language: {{ language }}
Question: {{ question }}
"""
## pass both variables when rendering
# pass both variables when rendering
```

It you need to use an f‑string (escape braces):
Expand Down
54 changes: 27 additions & 27 deletions docs-website/docs/concepts/pipelines/creating-pipelines.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ Add components to the pipeline one by one. The order in which you do this doesn'
```python
query_pipeline.add_component("component_name", component_type)

## Here is an example of how you'd add the components initialized in step 2 above:
# Here is an example of how you'd add the components initialized in step 2 above:
query_pipeline.add_component("text_embedder", text_embedder)
query_pipeline.add_component("retriever", retriever)

## You could also add components without initializing them before:
# You could also add components without initializing them before:
query_pipeline.add_component("text_embedder", SentenceTransformersTextEmbedder())
query_pipeline.add_component(
"retriever",
Expand All @@ -77,30 +77,30 @@ To understand what inputs are expected to run your pipeline, use an `.inputs()`
Here's a more visual explanation within the code:

```python
## This is the syntax to connect components. Here you're connecting output1 of component1 to input1 of component2:
# This is the syntax to connect components. Here you're connecting output1 of component1 to input1 of component2:
pipeline.connect("component1.output1", "component2.input1")

## If both components have only one output and input, you can just pass their names:
# If both components have only one output and input, you can just pass their names:
pipeline.connect("component1", "component2")

## If one of the components has only one output but the other has multiple inputs,
## you can pass just the name of the component with a single output, but for the component with
## multiple inputs, you must specify which input you want to connect
# If one of the components has only one output but the other has multiple inputs,
# you can pass just the name of the component with a single output, but for the component with
# multiple inputs, you must specify which input you want to connect

## Here, component1 has only one output, but component2 has multiple inputs:
# Here, component1 has only one output, but component2 has multiple inputs:
pipeline.connect("component1", "component2.input1")

## And here's how it should look like for the semantic document search pipeline we're using as an example:
# And here's how it should look like for the semantic document search pipeline we're using as an example:
pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
## Because the InMemoryEmbeddingRetriever only has one input, this is also correct:
# Because the InMemoryEmbeddingRetriever only has one input, this is also correct:
pipeline.connect("text_embedder.embedding", "retriever")
```

You need to link all the components together, connecting them gradually in pairs. Here's an explicit example for the pipeline we're assembling:

```python
## Imagine this pipeline has four components: text_embedder, retriever, prompt_builder and llm.
## Here's how you would connect them into a pipeline:
# Imagine this pipeline has four components: text_embedder, retriever, prompt_builder and llm.
# Here's how you would connect them into a pipeline:

query_pipeline.connect("text_embedder.embedding", "retriever")
query_pipeline.connect("retriever", "prompt_builder.documents")
Expand All @@ -112,13 +112,13 @@ query_pipeline.connect("prompt_builder", "llm")
Wait for the pipeline to validate the components and connections. If everything is OK, you can now run the pipeline. `Pipeline.run()` can be called in two ways, either passing a dictionary of the component names and their inputs, or by directly passing just the inputs. When passed directly, the pipeline resolves inputs to the correct components.

```python
## Here's one way of calling the run() method
# Here's one way of calling the run() method
results = pipeline.run({"component1": {"input1_value": value1, "input2_value": value2}})

## The inputs can also be passed directly without specifying component names
# The inputs can also be passed directly without specifying component names
results = pipeline.run({"input1_value": value1, "input2_value": value2})

## This is how you'd run the semantic document search pipeline we're using as an example:
# This is how you'd run the semantic document search pipeline we're using as an example:
query = "Here comes the query text"
results = query_pipeline.run({"text_embedder": {"text": query}})
```
Expand All @@ -130,7 +130,7 @@ If you need to understand what component inputs are expected to run your pipelin
This is how it works:

```python
## A short pipeline example that converts webpages into documents
# A short pipeline example that converts webpages into documents
from haystack import Pipeline
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.fetchers import LinkContentFetcher
Expand All @@ -150,19 +150,19 @@ pipeline.add_component(instance=writer, name="writer")
pipeline.connect("fetcher.streams", "converter.sources")
pipeline.connect("converter.documents", "writer.documents")

## Requesting a list of required inputs
# Requesting a list of required inputs
pipeline.inputs()

## {'fetcher': {'urls': {'type': typing.List[str], 'is_mandatory': True}},
## 'converter': {'meta': {'type': typing.Union[typing.Dict[str, typing.Any], typing.List[typing.Dict[str, typing.Any]], NoneType],
## 'is_mandatory': False,
## 'default_value': None},
## 'extraction_kwargs': {'type': typing.Optional[typing.Dict[str, typing.Any]],
## 'is_mandatory': False,
## 'default_value': None}},
## 'writer': {'policy': {'type': typing.Optional[haystack.document_stores.types.policy.DuplicatePolicy],
## 'is_mandatory': False,
## 'default_value': None}}}
# {'fetcher': {'urls': {'type': typing.List[str], 'is_mandatory': True}},
# 'converter': {'meta': {'type': typing.Union[typing.Dict[str, typing.Any], typing.List[typing.Dict[str, typing.Any]], NoneType],
# 'is_mandatory': False,
# 'default_value': None},
# 'extraction_kwargs': {'type': typing.Optional[typing.Dict[str, typing.Any]],
# 'is_mandatory': False,
# 'default_value': None}},
# 'writer': {'policy': {'type': typing.Optional[haystack.document_stores.types.policy.DuplicatePolicy],
# 'is_mandatory': False,
# 'default_value': None}}}
```

From the above response, you can see that the `urls` input is mandatory for `LinkContentFetcher`. This is how you would then run this pipeline:
Expand Down
12 changes: 6 additions & 6 deletions docs-website/docs/concepts/pipelines/debugging-pipelines.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ from haystack.components.generators.chat import OpenAIChatGenerator
from haystack.components.builders.chat_prompt_builder import ChatPromptBuilder
from haystack.dataclasses import ChatMessage

## Documents
# Documents
documents = [
Document(content="Joe lives in Berlin"),
Document(content="Joe is a software engineer"),
]

## Define prompt template
# Define prompt template
prompt_template = [
ChatMessage.from_system("You are a helpful assistant."),
ChatMessage.from_user(
Expand All @@ -47,7 +47,7 @@ prompt_template = [
),
]

## Define pipeline
# Define pipeline
p = Pipeline()
p.add_component(
instance=ChatPromptBuilder(
Expand All @@ -62,16 +62,16 @@ p.add_component(
)
p.connect("prompt_builder", "llm.messages")

## Define question
# Define question
question = "Where does Joe live?"

## Execute pipeline
# Execute pipeline
result = p.run(
{"prompt_builder": {"documents": documents, "query": question}},
include_outputs_from="prompt_builder",
)

## Print result
# Print result
print(result)
```

Expand Down
16 changes: 8 additions & 8 deletions docs-website/docs/concepts/pipelines/pipeline-breakpoints.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ Create a `Breakpoint` by specifying the component name and the visit count at wh
from haystack.dataclasses.breakpoints import Breakpoint
from haystack.core.errors import BreakpointException

## Create a breakpoint that triggers on the first visit to the "llm" component
# Create a breakpoint that triggers on the first visit to the "llm" component
break_point = Breakpoint(
component_name="llm",
visit_count=0, # 0 = first visit, 1 = second visit, etc.
snapshot_file_path="/path/to/snapshots", # Optional: save snapshot to file
)

## Run pipeline with breakpoint
# Run pipeline with breakpoint
try:
result = pipeline.run(data=input_data, break_point=break_point)
except BreakpointException as e:
Expand Down Expand Up @@ -103,10 +103,10 @@ Use the `load_pipeline_snapshot()` to first load the JSON and then pass it to th
```python
from haystack.core.pipeline.breakpoint import load_pipeline_snapshot

## Load the snapshot
# Load the snapshot
snapshot = load_pipeline_snapshot("llm_2025_05_03_11_23_23.json")

## Resume execution from the snapshot
# Resume execution from the snapshot
result = pipeline.run(data={}, pipeline_snapshot=snapshot)
print(result["llm"]["replies"])
```
Expand All @@ -123,7 +123,7 @@ A `ChatGenerator` breakpoint is defined as shown below. You need to define a `Br
```python
from haystack.dataclasses.breakpoints import AgentBreakpoint, Breakpoint, ToolBreakpoint

## Break at chat generator (LLM calls)
# Break at chat generator (LLM calls)
chat_bp = Breakpoint(component_name="chat_generator", visit_count=0)
agent_breakpoint = AgentBreakpoint(break_point=chat_bp, agent_name="my_agent")
```
Expand All @@ -137,7 +137,7 @@ Then, define an `AgentBreakpoint` passing the `ToolBreakpoint` defined before as
```python
from haystack.dataclasses.breakpoints import AgentBreakpoint, Breakpoint, ToolBreakpoint

## Break at tool invoker (tool calls)
# Break at tool invoker (tool calls)
tool_bp = ToolBreakpoint(
component_name="tool_invoker",
visit_count=0,
Expand All @@ -153,11 +153,11 @@ When an Agent breakpoint is triggered, you can resume execution using the sa
```python
from haystack.core.pipeline.breakpoint import load_pipeline_snapshot

## Load the snapshot
# Load the snapshot
snapshot_file = "./agent_debug/agent_chat_generator_2025_07_11_23_23.json"
snapshot = load_pipeline_snapshot(snapshot_file)

## Resume pipeline execution
# Resume pipeline execution
result = pipeline.run(data={}, pipeline_snapshot=snapshot)
print("Pipeline resumed successfully")
print(f"Final result: {result}")
Expand Down
Loading
Loading