Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
9a638ae
improve breakpoints
Amnah199 Jun 5, 2025
189a0e5
updates
Amnah199 Jun 5, 2025
e79a5be
Fixes
Amnah199 Jun 6, 2025
87e3cf7
Remove extra methods
Amnah199 Jun 6, 2025
cc098a7
Update init params
Amnah199 Jun 10, 2025
ca7275d
Merge branch 'main' into fix-breakpoints
Amnah199 Jun 10, 2025
b01ffaf
type fix
Amnah199 Jun 10, 2025
684b584
Add GeneratedAnswer to experimental
Amnah199 Jun 10, 2025
577837d
Add answer builder
Amnah199 Jun 10, 2025
35414dc
Typing fix
Amnah199 Jun 10, 2025
8e82612
PR comments
Amnah199 Jun 11, 2025
0d54ed8
Small fix to PR
Amnah199 Jun 11, 2025
8ecb77d
Fix linting
Amnah199 Jun 11, 2025
e4985da
FIx for consume inputs
Amnah199 Jun 11, 2025
83e4188
Fix linting
Amnah199 Jun 11, 2025
a08bb16
Fix typing
Amnah199 Jun 11, 2025
cd90b63
Remove check_breakpoint
Amnah199 Jun 11, 2025
a02feb3
PR comments
Amnah199 Jun 12, 2025
3b6a8a0
Merge branch 'main' into fix-breakpoints
Amnah199 Jun 12, 2025
3663984
Fix merge
Amnah199 Jun 12, 2025
56e6c5b
Remove init params
Amnah199 Jun 12, 2025
9ae69ff
Create breakpoint.py
Amnah199 Jun 12, 2025
3f6c6bb
PR comments
Amnah199 Jun 12, 2025
8ee3eee
Linting
Amnah199 Jun 12, 2025
65c5d71
Linting
Amnah199 Jun 12, 2025
1c2b4f4
Update haystack_experimental/core/pipeline/breakpoint.py
Amnah199 Jun 12, 2025
018dbea
Update haystack_experimental/core/pipeline/breakpoint.py
Amnah199 Jun 12, 2025
bb149d4
Update haystack_experimental/core/pipeline/breakpoint.py
Amnah199 Jun 12, 2025
faaf049
Update haystack_experimental/core/pipeline/breakpoint.py
Amnah199 Jun 12, 2025
d5441a2
PR comments
Amnah199 Jun 12, 2025
073cb40
Improve docs
Amnah199 Jun 12, 2025
cdf9022
Improve docs
Amnah199 Jun 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 130 additions & 0 deletions haystack_experimental/components/builders/answer_builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

from typing import Any, Dict, List, Optional, Union

from haystack import Document, component, logging
from haystack.components.builders.answer_builder import AnswerBuilder as HaystackAnswerBuilder
from haystack.dataclasses.chat_message import ChatMessage

from haystack_experimental.dataclasses import GeneratedAnswer

logger = logging.getLogger(__name__)


@component
class AnswerBuilder(HaystackAnswerBuilder):
"""
Converts a query and Generator replies into a `GeneratedAnswer` object.

AnswerBuilder parses Generator replies using custom regular expressions.
Check out the usage example below to see how it works.
Optionally, it can also take documents and metadata from the Generator to add to the `GeneratedAnswer` object.
AnswerBuilder works with both non-chat and chat Generators.

### Usage example

```python
from haystack.components.builders import AnswerBuilder

builder = AnswerBuilder(pattern="Answer: (.*)")
builder.run(query="What's the answer?", replies=["This is an argument. Answer: This is the answer."])
```
"""

@component.output_types(answers=List[GeneratedAnswer])
def run( # pylint: disable=too-many-positional-arguments
Comment thread
sjrl marked this conversation as resolved.
self,
query: str,
replies: Union[List[str], List[ChatMessage]],
meta: Optional[List[Dict[str, Any]]] = None,
documents: Optional[List[Document]] = None,
pattern: Optional[str] = None,
reference_pattern: Optional[str] = None,
) -> Dict[str, List[GeneratedAnswer]]:
"""
Turns the output of a Generator into `GeneratedAnswer` objects using regular expressions.

:param query:
The input query used as the Generator prompt.
:param replies:
The output of the Generator. Can be a list of strings or a list of `ChatMessage` objects.
:param meta:
The metadata returned by the Generator. If not specified, the generated answer will contain no metadata.
:param documents:
The documents used as the Generator inputs. If specified, they are added to
the`GeneratedAnswer` objects.
If both `documents` and `reference_pattern` are specified, the documents referenced in the
Generator output are extracted from the input documents and added to the `GeneratedAnswer` objects.
:param pattern:
The regular expression pattern to extract the answer text from the Generator.
If not specified, the entire response is used as the answer.
The regular expression can have one capture group at most.
If present, the capture group text
is used as the answer. If no capture group is present, the whole match is used as the answer.
Examples:
`[^\\n]+$` finds "this is an answer" in a string "this is an argument.\\nthis is an answer".
`Answer: (.*)` finds "this is an answer" in a string
"this is an argument. Answer: this is an answer".
:param reference_pattern:
The regular expression pattern used for parsing the document references.
If not specified, no parsing is done, and all documents are referenced.
References need to be specified as indices of the input documents and start at [1].
Example: `\\[(\\d+)\\]` finds "1" in a string "this is an answer[1]".

:returns: A dictionary with the following keys:
- `answers`: The answers received from the output of the Generator.
"""
if not meta:
meta = [{}] * len(replies)
elif len(replies) != len(meta):
raise ValueError(f"Number of replies ({len(replies)}), and metadata ({len(meta)}) must match.")

if pattern:
AnswerBuilder._check_num_groups_in_regex(pattern)

pattern = pattern or self.pattern
reference_pattern = reference_pattern or self.reference_pattern
all_answers = []

replies_to_iterate = replies
meta_to_iterate = meta

if self.last_message_only and replies:
replies_to_iterate = replies[-1:]
meta_to_iterate = meta[-1:]

for reply, given_metadata in zip(replies_to_iterate, meta_to_iterate):
# Extract content from ChatMessage objects if reply is a ChatMessages, else use the string as is
if isinstance(reply, ChatMessage):
extracted_reply = reply.text or ""
else:
extracted_reply = str(reply)
extracted_metadata = reply.meta if isinstance(reply, ChatMessage) else {}

extracted_metadata = {**extracted_metadata, **given_metadata}
extracted_metadata["all_messages"] = replies

referenced_docs = []
if documents:
if reference_pattern:
reference_idxs = AnswerBuilder._extract_reference_idxs(extracted_reply, reference_pattern)
else:
reference_idxs = [doc_idx for doc_idx, _ in enumerate(documents)]

for idx in reference_idxs:
try:
referenced_docs.append(documents[idx])
except IndexError:
logger.warning(
"Document index '{index}' referenced in Generator output is out of range. ", index=idx + 1
)

answer_string = AnswerBuilder._extract_answer_string(extracted_reply, pattern)
answer = GeneratedAnswer(
data=answer_string, query=query, documents=referenced_docs, meta=extracted_metadata
)
all_answers.append(answer)

return {"answers": all_answers}
9 changes: 8 additions & 1 deletion haystack_experimental/core/pipeline/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@

class PipelineBase(HaystackPipelineBase):
@staticmethod
def _consume_component_inputs(component_name: str, component: Dict, inputs: Dict) -> Dict[str, Any]:
def _consume_component_inputs(
component_name: str, component: Dict, inputs: Dict, is_resume: bool = False
) -> Dict[str, Any]:
"""
Extracts the inputs needed to run for the component and removes them from the global inputs state.

Expand All @@ -28,6 +30,11 @@ def _consume_component_inputs(component_name: str, component: Dict, inputs: Dict
for socket_name, socket in component["input_sockets"].items():
socket_inputs = component_inputs.get(socket_name, [])
socket_inputs = [sock["value"] for sock in socket_inputs if sock["value"] is not _NO_OUTPUT_PRODUCED]

# if we are resuming a component, the inputs are already consumed, so we just return the first input
if is_resume:
consumed_inputs[socket_name] = socket_inputs[0]
continue
if socket_inputs:
if not socket.is_variadic:
# We only care about the first input provided to the socket.
Expand Down
Loading