Skip to content

Commit 6be854d

Browse files
Amnah199sjrl
andauthored
fix: refactor code and update init_params in debug_state (#317)
* improve breakpoints * updates * Fixes * Remove extra methods * Update init params * type fix * Add GeneratedAnswer to experimental * Add answer builder * Typing fix * PR comments * Small fix to PR * Fix linting * FIx for consume inputs * Fix linting * Fix typing * Remove check_breakpoint * PR comments * Fix merge * Remove init params * Create breakpoint.py * PR comments * Linting * Linting * Update haystack_experimental/core/pipeline/breakpoint.py Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com> * Update haystack_experimental/core/pipeline/breakpoint.py Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com> * Update haystack_experimental/core/pipeline/breakpoint.py Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com> * Update haystack_experimental/core/pipeline/breakpoint.py Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com> * PR comments * Improve docs * Improve docs --------- Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
1 parent 05b92ee commit 6be854d

15 files changed

Lines changed: 920 additions & 741 deletions
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
from typing import Any, Dict, List, Optional, Union
6+
7+
from haystack import Document, component, logging
8+
from haystack.components.builders.answer_builder import AnswerBuilder as HaystackAnswerBuilder
9+
from haystack.dataclasses.chat_message import ChatMessage
10+
11+
from haystack_experimental.dataclasses import GeneratedAnswer
12+
13+
logger = logging.getLogger(__name__)
14+
15+
16+
@component
17+
class AnswerBuilder(HaystackAnswerBuilder):
18+
"""
19+
Converts a query and Generator replies into a `GeneratedAnswer` object.
20+
21+
AnswerBuilder parses Generator replies using custom regular expressions.
22+
Check out the usage example below to see how it works.
23+
Optionally, it can also take documents and metadata from the Generator to add to the `GeneratedAnswer` object.
24+
AnswerBuilder works with both non-chat and chat Generators.
25+
26+
### Usage example
27+
28+
```python
29+
from haystack.components.builders import AnswerBuilder
30+
31+
builder = AnswerBuilder(pattern="Answer: (.*)")
32+
builder.run(query="What's the answer?", replies=["This is an argument. Answer: This is the answer."])
33+
```
34+
"""
35+
36+
@component.output_types(answers=List[GeneratedAnswer])
37+
def run( # pylint: disable=too-many-positional-arguments
38+
self,
39+
query: str,
40+
replies: Union[List[str], List[ChatMessage]],
41+
meta: Optional[List[Dict[str, Any]]] = None,
42+
documents: Optional[List[Document]] = None,
43+
pattern: Optional[str] = None,
44+
reference_pattern: Optional[str] = None,
45+
) -> Dict[str, List[GeneratedAnswer]]:
46+
"""
47+
Turns the output of a Generator into `GeneratedAnswer` objects using regular expressions.
48+
49+
:param query:
50+
The input query used as the Generator prompt.
51+
:param replies:
52+
The output of the Generator. Can be a list of strings or a list of `ChatMessage` objects.
53+
:param meta:
54+
The metadata returned by the Generator. If not specified, the generated answer will contain no metadata.
55+
:param documents:
56+
The documents used as the Generator inputs. If specified, they are added to
57+
the`GeneratedAnswer` objects.
58+
If both `documents` and `reference_pattern` are specified, the documents referenced in the
59+
Generator output are extracted from the input documents and added to the `GeneratedAnswer` objects.
60+
:param pattern:
61+
The regular expression pattern to extract the answer text from the Generator.
62+
If not specified, the entire response is used as the answer.
63+
The regular expression can have one capture group at most.
64+
If present, the capture group text
65+
is used as the answer. If no capture group is present, the whole match is used as the answer.
66+
Examples:
67+
`[^\\n]+$` finds "this is an answer" in a string "this is an argument.\\nthis is an answer".
68+
`Answer: (.*)` finds "this is an answer" in a string
69+
"this is an argument. Answer: this is an answer".
70+
:param reference_pattern:
71+
The regular expression pattern used for parsing the document references.
72+
If not specified, no parsing is done, and all documents are referenced.
73+
References need to be specified as indices of the input documents and start at [1].
74+
Example: `\\[(\\d+)\\]` finds "1" in a string "this is an answer[1]".
75+
76+
:returns: A dictionary with the following keys:
77+
- `answers`: The answers received from the output of the Generator.
78+
"""
79+
if not meta:
80+
meta = [{}] * len(replies)
81+
elif len(replies) != len(meta):
82+
raise ValueError(f"Number of replies ({len(replies)}), and metadata ({len(meta)}) must match.")
83+
84+
if pattern:
85+
AnswerBuilder._check_num_groups_in_regex(pattern)
86+
87+
pattern = pattern or self.pattern
88+
reference_pattern = reference_pattern or self.reference_pattern
89+
all_answers = []
90+
91+
replies_to_iterate = replies
92+
meta_to_iterate = meta
93+
94+
if self.last_message_only and replies:
95+
replies_to_iterate = replies[-1:]
96+
meta_to_iterate = meta[-1:]
97+
98+
for reply, given_metadata in zip(replies_to_iterate, meta_to_iterate):
99+
# Extract content from ChatMessage objects if reply is a ChatMessages, else use the string as is
100+
if isinstance(reply, ChatMessage):
101+
extracted_reply = reply.text or ""
102+
else:
103+
extracted_reply = str(reply)
104+
extracted_metadata = reply.meta if isinstance(reply, ChatMessage) else {}
105+
106+
extracted_metadata = {**extracted_metadata, **given_metadata}
107+
extracted_metadata["all_messages"] = replies
108+
109+
referenced_docs = []
110+
if documents:
111+
if reference_pattern:
112+
reference_idxs = AnswerBuilder._extract_reference_idxs(extracted_reply, reference_pattern)
113+
else:
114+
reference_idxs = [doc_idx for doc_idx, _ in enumerate(documents)]
115+
116+
for idx in reference_idxs:
117+
try:
118+
referenced_docs.append(documents[idx])
119+
except IndexError:
120+
logger.warning(
121+
"Document index '{index}' referenced in Generator output is out of range. ", index=idx + 1
122+
)
123+
124+
answer_string = AnswerBuilder._extract_answer_string(extracted_reply, pattern)
125+
answer = GeneratedAnswer(
126+
data=answer_string, query=query, documents=referenced_docs, meta=extracted_metadata
127+
)
128+
all_answers.append(answer)
129+
130+
return {"answers": all_answers}

haystack_experimental/core/pipeline/base.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313

1414
class PipelineBase(HaystackPipelineBase):
1515
@staticmethod
16-
def _consume_component_inputs(component_name: str, component: Dict, inputs: Dict) -> Dict[str, Any]:
16+
def _consume_component_inputs(
17+
component_name: str, component: Dict, inputs: Dict, is_resume: bool = False
18+
) -> Dict[str, Any]:
1719
"""
1820
Extracts the inputs needed to run for the component and removes them from the global inputs state.
1921
@@ -28,6 +30,11 @@ def _consume_component_inputs(component_name: str, component: Dict, inputs: Dict
2830
for socket_name, socket in component["input_sockets"].items():
2931
socket_inputs = component_inputs.get(socket_name, [])
3032
socket_inputs = [sock["value"] for sock in socket_inputs if sock["value"] is not _NO_OUTPUT_PRODUCED]
33+
34+
# if we are resuming a component, the inputs are already consumed, so we just return the first input
35+
if is_resume:
36+
consumed_inputs[socket_name] = socket_inputs[0]
37+
continue
3138
if socket_inputs:
3239
if not socket.is_variadic:
3340
# We only care about the first input provided to the socket.

0 commit comments

Comments
 (0)