From 49606b257f22c60bd6f19153285a701eb500117a Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Tue, 10 Feb 2026 22:36:09 +0100 Subject: [PATCH 01/28] Draft DSPy generators implementation --- integrations/dspy/CHANGELOG.md | 13 ++ integrations/dspy/LICENSE.txt | 201 +++++++++++++++++ .../dspy/examples/chat_generator_example.py | 55 +++++ integrations/dspy/pyproject.toml | 152 +++++++++++++ .../src/haystack_integrations/__init__.py | 3 + .../components/__init__.py | 3 + .../components/generators/__init__.py | 3 + .../components/generators/dspy/__init__.py | 5 + .../generators/dspy/chat/__init__.py | 0 .../generators/dspy/chat/chat_generator.py | 140 ++++++++++++ .../components/generators/dspy/generator.py | 213 ++++++++++++++++++ integrations/dspy/tests/__init__.py | 3 + 12 files changed, 791 insertions(+) create mode 100644 integrations/dspy/CHANGELOG.md create mode 100644 integrations/dspy/LICENSE.txt create mode 100644 integrations/dspy/examples/chat_generator_example.py create mode 100644 integrations/dspy/pyproject.toml create mode 100644 integrations/dspy/src/haystack_integrations/__init__.py create mode 100644 integrations/dspy/src/haystack_integrations/components/__init__.py create mode 100644 integrations/dspy/src/haystack_integrations/components/generators/__init__.py create mode 100644 integrations/dspy/src/haystack_integrations/components/generators/dspy/__init__.py create mode 100644 integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/__init__.py create mode 100644 integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py create mode 100644 integrations/dspy/src/haystack_integrations/components/generators/dspy/generator.py create mode 100644 integrations/dspy/tests/__init__.py diff --git a/integrations/dspy/CHANGELOG.md b/integrations/dspy/CHANGELOG.md new file mode 100644 index 0000000000..7350ecaed9 --- /dev/null +++ b/integrations/dspy/CHANGELOG.md @@ -0,0 +1,13 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Added + +- Initial implementation of `DSPyChatGenerator` component +- Initial implementation of `DSPyProgramRunner` component diff --git a/integrations/dspy/LICENSE.txt b/integrations/dspy/LICENSE.txt new file mode 100644 index 0000000000..3d4485bce6 --- /dev/null +++ b/integrations/dspy/LICENSE.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2024-present deepset GmbH + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/integrations/dspy/examples/chat_generator_example.py b/integrations/dspy/examples/chat_generator_example.py new file mode 100644 index 0000000000..d815916ab5 --- /dev/null +++ b/integrations/dspy/examples/chat_generator_example.py @@ -0,0 +1,55 @@ +import dspy +from haystack import Pipeline +from haystack.dataclasses import ChatMessage +from haystack_integrations.components.generators.dspy import DSPyChatGenerator + + +class QASignature(dspy.Signature): + """Answer questions accurately and concisely.""" + + question = dspy.InputField(desc="The user's question") + answer = dspy.OutputField(desc="A clear, concise answer") + + +def basic_qa_example(): + """Simple question-answering with Chain-of-Thought reasoning.""" + + generator = DSPyChatGenerator( + model="openai/gpt-4o-mini", + signature=QASignature, + module_type="ChainOfThought", + output_field="answer", + ) + + pipeline = Pipeline() + pipeline.add_component("llm", generator) + + messages = [ChatMessage.from_user("What causes rainbows to appear?")] + result = pipeline.run({"llm": {"messages": messages}}) + + print(f"Question: {messages[0].text}") + print(f"Answer: {result['llm']['replies'][0].text}\n") + + +def string_signature_example(): + """Using a simple string signature instead of a class.""" + generator = DSPyChatGenerator( + model="openai/gpt-4o-mini", + signature="question -> answer", + module_type="Predict", + output_field="answer", + ) + + pipeline = Pipeline() + pipeline.add_component("llm", generator) + + messages = [ChatMessage.from_user("What is the capital of Japan?")] + result = pipeline.run({"llm": {"messages": messages}}) + + print(f"Question: {messages[0].text}") + print(f"Answer: {result['llm']['replies'][0].text}\n") + + +if __name__ == "__main__": + basic_qa_example() + string_signature_example() \ No newline at end of file diff --git a/integrations/dspy/pyproject.toml b/integrations/dspy/pyproject.toml new file mode 100644 index 0000000000..fa89baae4f --- /dev/null +++ b/integrations/dspy/pyproject.toml @@ -0,0 +1,152 @@ +[build-system] +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" + +[project] +name = "dspy-haystack" +dynamic = ["version"] +description = 'An integration between DSPy and Haystack for prompt optimization and structured generation' +readme = "README.md" +requires-python = ">=3.10" +license = "Apache-2.0" +keywords = ["haystack", "dspy", "prompt-optimization", "llm", "nlp"] +authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }] +classifiers = [ + "License :: OSI Approved :: Apache Software License", + "Development Status :: 4 - Beta", + "Programming Language :: Python", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dependencies = ["haystack-ai>=2.0.0", "dspy>=3.0.0"] + +[project.urls] +Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/dspy#readme" +Issues = "https://github.com/deepset-ai/haystack-core-integrations/issues" +Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/dspy" + +[tool.hatch.build.targets.wheel] +packages = ["src/haystack_integrations"] + +[tool.hatch.version] +source = "vcs" +tag-pattern = 'integrations\/dspy-v(?P.*)' + +[tool.hatch.version.raw-options] +root = "../.." +git_describe_command = 'git describe --tags --match="integrations/dspy-v[0-9]*"' + +[tool.hatch.envs.default] +installer = "uv" +dependencies = ["haystack-pydoc-tools", "ruff"] + +[tool.hatch.envs.default.scripts] +docs = ["pydoc-markdown pydoc/config_docusaurus.yml"] +fmt = "ruff check --fix {args}; ruff format {args}" +fmt-check = "ruff check {args} && ruff format --check {args}" + +[tool.hatch.envs.test] +dependencies = [ + "pytest", + "pytest-asyncio", + "pytest-cov", + "pytest-rerunfailures", + "mypy", + "pip", +] + +[tool.hatch.envs.test.scripts] +unit = 'pytest -m "not integration" {args:tests}' +integration = 'pytest -m "integration" {args:tests}' +all = 'pytest {args:tests}' +cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x {args:tests}' + +types = """mypy -p haystack_integrations.components.generators.dspy \ +-p haystack_integrations.utils.dspy {args}""" + +[tool.mypy] +install_types = true +non_interactive = true +check_untyped_defs = true +disallow_incomplete_defs = true + +[tool.hatch.metadata] +allow-direct-references = true + +[tool.ruff.lint.isort] +known-first-party = ["haystack_integrations"] + +[tool.ruff] +line-length = 120 + +[tool.ruff.lint] +select = [ + "A", + "ARG", + "B", + "C", + "DTZ", + "E", + "EM", + "F", + "I", + "ICN", + "ISC", + "N", + "PLC", + "PLE", + "PLR", + "PLW", + "Q", + "RUF", + "S", + "T", + "TID", + "UP", + "W", + "YTT", +] +ignore = [ + # Allow non-abstract empty methods in abstract base classes + "B027", + # Ignore checks for possible passwords + "S105", + "S106", + "S107", + # Ignore complexity + "C901", + "PLR0911", + "PLR0912", + "PLR0913", + "PLR0915", +] + +[tool.ruff.lint.flake8-tidy-imports] +ban-relative-imports = "parents" + +[tool.ruff.lint.per-file-ignores] +# Tests can use magic values, assertions, and relative imports +"tests/**/*" = ["PLR2004", "S101", "TID252"] +# Examples can print their output +"examples/**" = ["T201"] + +[tool.coverage.run] +source = ["haystack_integrations"] +branch = true +parallel = false + +[tool.coverage.report] +omit = ["*/tests/*", "*/__init__.py"] +show_missing = true +exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] + +[tool.pytest.ini_options] +markers = ["integration: integration tests"] +log_cli = true +addopts = ["--import-mode=importlib"] +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "class" diff --git a/integrations/dspy/src/haystack_integrations/__init__.py b/integrations/dspy/src/haystack_integrations/__init__.py new file mode 100644 index 0000000000..6b5e14dc19 --- /dev/null +++ b/integrations/dspy/src/haystack_integrations/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/integrations/dspy/src/haystack_integrations/components/__init__.py b/integrations/dspy/src/haystack_integrations/components/__init__.py new file mode 100644 index 0000000000..6b5e14dc19 --- /dev/null +++ b/integrations/dspy/src/haystack_integrations/components/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/integrations/dspy/src/haystack_integrations/components/generators/__init__.py b/integrations/dspy/src/haystack_integrations/components/generators/__init__.py new file mode 100644 index 0000000000..6b5e14dc19 --- /dev/null +++ b/integrations/dspy/src/haystack_integrations/components/generators/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/__init__.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/__init__.py new file mode 100644 index 0000000000..68317e34e6 --- /dev/null +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/__init__.py @@ -0,0 +1,5 @@ +from haystack_integrations.components.generators.dspy.chat.chat_generator import DSPyChatGenerator +from haystack_integrations.components.generators.dspy.generator import DSPyGenerator +from haystack_integrations.components.generators.dspy.program_runner import DSPyProgramRunner + +__all__ = ["DSPyChatGenerator", "DSPyGenerator", "DSPyProgramRunner"] diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/__init__.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py new file mode 100644 index 0000000000..3719d9269c --- /dev/null +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py @@ -0,0 +1,140 @@ +from typing import Any, Callable, Dict, List, Optional, Type, Union + +import dspy +from haystack import component +from haystack.dataclasses import ChatMessage, ChatRole +from haystack.utils import Secret + +from haystack_integrations.components.generators.dspy.generator import DSPyGenerator + + +@component +class DSPyChatGenerator(DSPyGenerator): + """ + A Haystack chat generator component that uses DSPy signatures and modules + for structured generation. + + Accepts and returns ``ChatMessage`` objects, making it compatible with + Haystack chat pipelines. + + Usage example: + + ```python + from haystack.dataclasses import ChatMessage + from haystack_integrations.components.generators.dspy import DSPyChatGenerator + import dspy + + class QASignature(dspy.Signature): + question = dspy.InputField(desc="The user's question") + answer = dspy.OutputField(desc="A clear, concise answer") + + generator = DSPyChatGenerator( + model="openai/gpt-5-mini", + signature=QASignature, + module_type="ChainOfThought", + ) + + messages = [ChatMessage.from_user("What is the capital of France?")] + result = generator.run(messages=messages) + print(result["replies"][0].text) + ``` + """ + + def __init__( + self, + signature: Union[str, Type[dspy.Signature]], + model: str = "openai/gpt-5-mini", + api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"), + module_type: str = "ChainOfThought", + output_field: str = "answer", + generation_kwargs: Optional[Dict[str, Any]] = None, + input_mapping: Optional[Dict[str, str]] = None, + streaming_callback: Optional[Callable] = None, + ): + """ + Initialize the DSPyChatGenerator. + + :param signature: DSPy signature defining I/O structure. Can be a string + like ``"question -> answer"`` or a ``dspy.Signature`` subclass. + :param model: Model identifier (e.g. ``"openai/gpt-5-mini"``). + :param api_key: API key for the LLM provider. + :param module_type: DSPy module type: ``"Predict"``, ``"ChainOfThought"``, or ``"ReAct"``. + :param output_field: Which signature output field to use as the reply. + :param generation_kwargs: Additional generation parameters (temperature, max_tokens, etc.). + :param input_mapping: Maps DSPy signature input field names to run kwarg names. + :param streaming_callback: Callback for streaming responses. + """ + super().__init__( + signature=signature, + model=model, + api_key=api_key, + module_type=module_type, + output_field=output_field, + generation_kwargs=generation_kwargs, + input_mapping=input_mapping, + streaming_callback=streaming_callback, + ) + + @component.output_types(replies=List[ChatMessage]) + def run( + self, + messages: List[ChatMessage], + generation_kwargs: Optional[Dict[str, Any]] = None, + **kwargs, + ) -> Dict[str, Any]: + """ + Run the DSPy module on the given messages. + + :param messages: List of chat messages. The last user message is used as input. + :param generation_kwargs: Optional runtime generation parameters. + :param kwargs: Additional keyword arguments mapped to signature input fields. + :returns: A dictionary with ``replies`` (list of ChatMessage) and ``meta`` (list of dicts). + """ + if not messages: + msg = "The 'messages' parameter cannot be empty." + raise ValueError(msg) + + prompt = self._extract_last_user_message(messages) + result = DSPyGenerator.run(self, prompt=prompt, generation_kwargs=generation_kwargs, **kwargs) + + replies = [ChatMessage.from_assistant(text=text) for text in result["replies"]] + + return {"replies": replies, "meta": result["meta"]} + + @component.output_types(replies=List[ChatMessage]) + async def run_async( + self, + messages: List[ChatMessage], + generation_kwargs: Optional[Dict[str, Any]] = None, + **kwargs, + ) -> Dict[str, Any]: + """ + Asynchronously run the DSPy module on the given messages. + + Uses DSPy's native ``acall`` for true async I/O. + + :param messages: List of chat messages. The last user message is used as input. + :param generation_kwargs: Optional runtime generation parameters. + :param kwargs: Additional keyword arguments mapped to signature input fields. + :returns: A dictionary with ``replies`` (list of ChatMessage) and ``meta`` (list of dicts). + """ + if not messages: + msg = "The 'messages' parameter cannot be empty." + raise ValueError(msg) + + prompt = self._extract_last_user_message(messages) + result = await DSPyGenerator.run_async(self, prompt=prompt, generation_kwargs=generation_kwargs, **kwargs) + + replies = [ChatMessage.from_assistant(text=text) for text in result["replies"]] + + return {"replies": replies, "meta": result["meta"]} + + @staticmethod + def _extract_last_user_message(messages: List[ChatMessage]) -> str: + """Extract the text of the last user message from a list of chat messages.""" + for msg in reversed(messages): + if msg.role == ChatRole.USER: + return msg.text + + # Fallback to last message if no user message found + return messages[-1].text diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/generator.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/generator.py new file mode 100644 index 0000000000..21047843cf --- /dev/null +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/generator.py @@ -0,0 +1,213 @@ +from typing import Any, Callable, Dict, List, Optional, Type, Union + +import dspy +from haystack import component, default_from_dict, default_to_dict +from haystack.utils import Secret, deserialize_secrets_inplace + + +@component +class DSPyGenerator: + """ + A Haystack generator component that uses DSPy signatures and modules + for structured generation. + + Takes a string prompt and returns string replies. + + Usage example: + + ```python + from haystack_integrations.components.generators.dspy import DSPyGenerator + import dspy + + class QASignature(dspy.Signature): + question = dspy.InputField(desc="The user's question") + answer = dspy.OutputField(desc="A clear, concise answer") + + generator = DSPyGenerator( + model="openai/gpt-5-mini", + signature=QASignature, + module_type="ChainOfThought", + ) + + result = generator.run(prompt="What is the capital of France?") + print(result["replies"][0]) + ``` + """ + + VALID_MODULE_TYPES = {"Predict", "ChainOfThought", "ReAct"} + + def __init__( + self, + signature: Union[str, Type[dspy.Signature]], + model: str = "openai/gpt-5-mini", + api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"), + module_type: str = "ChainOfThought", + output_field: str = "answer", + generation_kwargs: Optional[Dict[str, Any]] = None, + input_mapping: Optional[Dict[str, str]] = None, + streaming_callback: Optional[Callable] = None, + ): + """ + Initialize the DSPyGenerator. + + :param signature: DSPy signature defining I/O structure. Can be a string + like ``"question -> answer"`` or a ``dspy.Signature`` subclass. + :param model: Model identifier (e.g. ``"openai/gpt-5-mini"``). + :param api_key: API key for the LLM provider. + :param module_type: DSPy module type: ``"Predict"``, ``"ChainOfThought"``, or ``"ReAct"``. + :param output_field: Which signature output field to use as the reply. + :param generation_kwargs: Additional generation parameters (temperature, max_tokens, etc.). + :param input_mapping: Maps DSPy signature input field names to run kwarg names. + :param streaming_callback: Callback for streaming responses. + """ + if module_type not in self.VALID_MODULE_TYPES: + msg = f"Invalid module_type '{module_type}'. Must be one of {sorted(self.VALID_MODULE_TYPES)}" + raise ValueError(msg) + + self.signature = signature + self.model = model + self.api_key = api_key + self.module_type = module_type + self.output_field = output_field + self.generation_kwargs = generation_kwargs or {} + self.input_mapping = input_mapping + self.streaming_callback = streaming_callback + + self._lm = dspy.LM( + model=self.model, + api_key=self.api_key.resolve_value(), + **self.generation_kwargs, + ) + dspy.configure(lm=self._lm) + + module_class = self._get_module_class(self.module_type) + self._module = module_class(self.signature) + + @component.output_types(replies=List[str]) + def run( + self, + prompt: str, + generation_kwargs: Optional[Dict[str, Any]] = None, + **kwargs, + ) -> Dict[str, Any]: + """ + Run the DSPy module on the given prompt. + + :param prompt: The input prompt string. + :param generation_kwargs: Optional runtime generation parameters that override + the defaults for this call only. + :param kwargs: Additional keyword arguments mapped to signature input fields. + :returns: A dictionary with ``replies`` (list of strings) and ``meta`` (list of dicts). + """ + dspy_inputs = self._build_dspy_inputs(prompt, **kwargs) + + if generation_kwargs: + prediction = self._module(**dspy_inputs, config=generation_kwargs) + else: + prediction = self._module(**dspy_inputs) + + output_text = getattr(prediction, self.output_field, str(prediction)) + meta = {"model": self.model, "module_type": self.module_type} + + return {"replies": [output_text], "meta": [meta]} + + @component.output_types(replies=List[str]) + async def run_async( + self, + prompt: str, + generation_kwargs: Optional[Dict[str, Any]] = None, + **kwargs, + ) -> Dict[str, Any]: + """ + Asynchronously run the DSPy module on the given prompt. + + Uses DSPy's native ``acall`` for true async I/O. + + :param prompt: The input prompt string. + :param generation_kwargs: Optional runtime generation parameters that override + the defaults for this call only. + :param kwargs: Additional keyword arguments mapped to signature input fields. + :returns: A dictionary with ``replies`` (list of strings) and ``meta`` (list of dicts). + """ + dspy_inputs = self._build_dspy_inputs(prompt, **kwargs) + + if generation_kwargs: + prediction = await self._module.acall(**dspy_inputs, config=generation_kwargs) + else: + prediction = await self._module.acall(**dspy_inputs) + + output_text = getattr(prediction, self.output_field, str(prediction)) + meta = {"model": self.model, "module_type": self.module_type} + + return {"replies": [output_text], "meta": [meta]} + + def _build_dspy_inputs(self, prompt: str, **kwargs) -> Dict[str, Any]: + """Build the input dict for the DSPy module call.""" + if self.input_mapping: + dspy_inputs = {} + for sig_field, source in self.input_mapping.items(): + if source in kwargs: + dspy_inputs[sig_field] = kwargs[source] + else: + dspy_inputs[sig_field] = prompt + return dspy_inputs + + # Default: map prompt to the first input field + input_fields = self._get_input_field_names() + dspy_inputs = {input_fields[0]: prompt} + + # Pass any additional kwargs that match remaining input fields + for field in input_fields[1:]: + if field in kwargs: + dspy_inputs[field] = kwargs[field] + + return dspy_inputs + + def _get_input_field_names(self) -> List[str]: + """Get input field names from the signature.""" + if isinstance(self.signature, str): + input_part = self.signature.split("->")[0].strip() + return [f.strip() for f in input_part.split(",")] + return list(self.signature.input_fields.keys()) + + @staticmethod + def _get_module_class(module_type: str): + """Map a module type string to the corresponding DSPy module class.""" + mapping = { + "Predict": dspy.Predict, + "ChainOfThought": dspy.ChainOfThought, + "ReAct": dspy.ReAct, + } + return mapping[module_type] + + def _signature_to_string(self) -> str: + """Convert the signature to a string representation for serialization.""" + if isinstance(self.signature, str): + return self.signature + input_names = list(self.signature.input_fields.keys()) + output_names = list(self.signature.output_fields.keys()) + return ", ".join(input_names) + " -> " + ", ".join(output_names) + + def to_dict(self) -> Dict[str, Any]: + """Serialize this component to a dictionary.""" + kwargs: Dict[str, Any] = { + "signature": self._signature_to_string(), + "model": self.model, + "module_type": self.module_type, + "output_field": self.output_field, + "generation_kwargs": self.generation_kwargs, + "input_mapping": self.input_mapping, + } + try: + kwargs["api_key"] = self.api_key.to_dict() + except ValueError: + # Token-based secrets cannot be serialized; omit them. + pass + return default_to_dict(self, **kwargs) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "DSPyGenerator": + """Deserialize a component from a dictionary.""" + init_params = data.get("init_parameters", {}) + deserialize_secrets_inplace(init_params, ["api_key"]) + return default_from_dict(cls, data) diff --git a/integrations/dspy/tests/__init__.py b/integrations/dspy/tests/__init__.py new file mode 100644 index 0000000000..6b5e14dc19 --- /dev/null +++ b/integrations/dspy/tests/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 From 9ddd7371ae528f624b8861dc952751cf6000ab8c Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Wed, 11 Feb 2026 23:05:37 +0100 Subject: [PATCH 02/28] Addded tests --- .../src/haystack_integrations/__init__.py | 3 - .../components/__init__.py | 3 - .../components/generators/__init__.py | 3 - .../generators/dspy/chat/chat_generator.py | 3 +- .../dspy/tests/test_chat_generator.py | 262 +++++++++++++++++ .../dspy/tests/test_chat_generator_async.py | 89 ++++++ integrations/dspy/tests/test_generator.py | 274 ++++++++++++++++++ 7 files changed, 627 insertions(+), 10 deletions(-) create mode 100644 integrations/dspy/tests/test_chat_generator.py create mode 100644 integrations/dspy/tests/test_chat_generator_async.py create mode 100644 integrations/dspy/tests/test_generator.py diff --git a/integrations/dspy/src/haystack_integrations/__init__.py b/integrations/dspy/src/haystack_integrations/__init__.py index 6b5e14dc19..e69de29bb2 100644 --- a/integrations/dspy/src/haystack_integrations/__init__.py +++ b/integrations/dspy/src/haystack_integrations/__init__.py @@ -1,3 +0,0 @@ -# SPDX-FileCopyrightText: 2024-present deepset GmbH -# -# SPDX-License-Identifier: Apache-2.0 diff --git a/integrations/dspy/src/haystack_integrations/components/__init__.py b/integrations/dspy/src/haystack_integrations/components/__init__.py index 6b5e14dc19..e69de29bb2 100644 --- a/integrations/dspy/src/haystack_integrations/components/__init__.py +++ b/integrations/dspy/src/haystack_integrations/components/__init__.py @@ -1,3 +0,0 @@ -# SPDX-FileCopyrightText: 2024-present deepset GmbH -# -# SPDX-License-Identifier: Apache-2.0 diff --git a/integrations/dspy/src/haystack_integrations/components/generators/__init__.py b/integrations/dspy/src/haystack_integrations/components/generators/__init__.py index 6b5e14dc19..e69de29bb2 100644 --- a/integrations/dspy/src/haystack_integrations/components/generators/__init__.py +++ b/integrations/dspy/src/haystack_integrations/components/generators/__init__.py @@ -1,3 +0,0 @@ -# SPDX-FileCopyrightText: 2024-present deepset GmbH -# -# SPDX-License-Identifier: Apache-2.0 diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py index 3719d9269c..affce09ef7 100644 --- a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py @@ -64,7 +64,8 @@ def __init__( :param input_mapping: Maps DSPy signature input field names to run kwarg names. :param streaming_callback: Callback for streaming responses. """ - super().__init__( + DSPyGenerator.__init__( + self, signature=signature, model=model, api_key=api_key, diff --git a/integrations/dspy/tests/test_chat_generator.py b/integrations/dspy/tests/test_chat_generator.py new file mode 100644 index 0000000000..2c4a15bfd5 --- /dev/null +++ b/integrations/dspy/tests/test_chat_generator.py @@ -0,0 +1,262 @@ +import os +from unittest.mock import MagicMock, patch + +import pytest +from haystack.dataclasses import ChatMessage +from haystack.utils.auth import Secret + +from haystack_integrations.components.generators.dspy.chat.chat_generator import DSPyChatGenerator + + +@pytest.fixture +def mock_dspy_module(): + """ + Mock DSPy LM, configure, and module classes to avoid real API calls. + """ + with patch("dspy.LM") as mock_lm_class, \ + patch("dspy.configure"), \ + patch("dspy.ChainOfThought") as mock_cot_class, \ + patch("dspy.Predict") as mock_predict_class, \ + patch("dspy.ReAct") as mock_react_class: + mock_lm = MagicMock() + mock_lm_class.return_value = mock_lm + + mock_module = MagicMock() + mock_module.return_value = MagicMock(answer="Hello world!") + mock_cot_class.return_value = mock_module + mock_predict_class.return_value = mock_module + mock_react_class.return_value = mock_module + + yield mock_module + + +@pytest.fixture +def chat_messages(): + return [ + ChatMessage.from_system("You are a helpful assistant"), + ChatMessage.from_user("What's the capital of France"), + ] + + +class TestDSPyChatGenerator: + def test_init_default(self, monkeypatch, mock_dspy_module): + monkeypatch.setenv("OPENAI_API_KEY", "test-api-key") + component = DSPyChatGenerator(signature="question -> answer") + assert component.model == "openai/gpt-5-mini" + assert component.signature == "question -> answer" + assert component.module_type == "ChainOfThought" + assert component.output_field == "answer" + assert component.streaming_callback is None + assert not component.generation_kwargs + assert component.input_mapping is None + + def test_init_fail_wo_api_key(self, monkeypatch): + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + with pytest.raises(ValueError, match=r"None of the .* environment variables are set"): + DSPyChatGenerator(signature="question -> answer") + + def test_init_with_parameters(self, mock_dspy_module): + component = DSPyChatGenerator( + signature="context, question -> answer", + model="openai/gpt-4o", + api_key=Secret.from_token("test-api-key"), + module_type="Predict", + output_field="response", + generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"}, + input_mapping={"context": "context", "question": "question"}, + ) + assert component.model == "openai/gpt-4o" + assert component.signature == "context, question -> answer" + assert component.module_type == "Predict" + assert component.output_field == "response" + assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"} + assert component.input_mapping == {"context": "context", "question": "question"} + + def test_init_with_invalid_module_type(self, mock_dspy_module): + with pytest.raises(ValueError, match="Invalid module_type"): + DSPyChatGenerator( + signature="question -> answer", + api_key=Secret.from_token("test-api-key"), + module_type="InvalidModule", + ) + + def test_to_dict_default(self, monkeypatch, mock_dspy_module): + monkeypatch.setenv("OPENAI_API_KEY", "test-api-key") + component = DSPyChatGenerator( + signature="question -> answer", + api_key=Secret.from_env_var("OPENAI_API_KEY"), + ) + data = component.to_dict() + assert data == { + "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPyChatGenerator", + "init_parameters": { + "api_key": {"env_vars": ["OPENAI_API_KEY"], "strict": True, "type": "env_var"}, + "signature": "question -> answer", + "model": "openai/gpt-5-mini", + "module_type": "ChainOfThought", + "output_field": "answer", + "generation_kwargs": {}, + "input_mapping": None, + }, + } + + def test_to_dict_with_parameters(self, monkeypatch, mock_dspy_module): + monkeypatch.setenv("OPENAI_API_KEY", "test-api-key") + component = DSPyChatGenerator( + signature="context, question -> answer", + model="openai/gpt-4o", + api_key=Secret.from_env_var("OPENAI_API_KEY"), + module_type="Predict", + output_field="response", + generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"}, + input_mapping={"context": "context", "question": "question"}, + ) + data = component.to_dict() + assert data == { + "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPyChatGenerator", + "init_parameters": { + "api_key": {"env_vars": ["OPENAI_API_KEY"], "strict": True, "type": "env_var"}, + "signature": "context, question -> answer", + "model": "openai/gpt-4o", + "module_type": "Predict", + "output_field": "response", + "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"}, + "input_mapping": {"context": "context", "question": "question"}, + }, + } + + def test_from_dict(self, monkeypatch, mock_dspy_module): + monkeypatch.setenv("OPENAI_API_KEY", "fake-api-key") + data = { + "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPyChatGenerator", + "init_parameters": { + "api_key": {"env_vars": ["OPENAI_API_KEY"], "strict": True, "type": "env_var"}, + "signature": "question -> answer", + "model": "openai/gpt-4o", + "module_type": "Predict", + "output_field": "response", + "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"}, + "input_mapping": None, + }, + } + component = DSPyChatGenerator.from_dict(data) + assert component.model == "openai/gpt-4o" + assert component.signature == "question -> answer" + assert component.module_type == "Predict" + assert component.output_field == "response" + assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"} + assert component.api_key == Secret.from_env_var("OPENAI_API_KEY") + assert component.input_mapping is None + + def test_from_dict_fail_wo_env_var(self, monkeypatch): + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + data = { + "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPyChatGenerator", + "init_parameters": { + "api_key": {"env_vars": ["OPENAI_API_KEY"], "strict": True, "type": "env_var"}, + "signature": "question -> answer", + "model": "openai/gpt-4o", + "module_type": "Predict", + "output_field": "response", + "generation_kwargs": {}, + "input_mapping": None, + }, + } + with pytest.raises(ValueError, match=r"None of the .* environment variables are set"): + DSPyChatGenerator.from_dict(data) + + def test_run(self, chat_messages, mock_dspy_module): + component = DSPyChatGenerator( + signature="question -> answer", + api_key=Secret.from_token("test-api-key"), + ) + response = component.run(chat_messages) + + # Verify the mock was called + mock_dspy_module.assert_called_once() + + # Check that the component returns the correct ChatMessage response + assert isinstance(response, dict) + assert "replies" in response + assert isinstance(response["replies"], list) + assert len(response["replies"]) == 1 + assert all(isinstance(reply, ChatMessage) for reply in response["replies"]) + + def test_run_with_params(self, chat_messages, mock_dspy_module): + component = DSPyChatGenerator( + signature="question -> answer", + api_key=Secret.from_token("test-api-key"), + generation_kwargs={"max_tokens": 10, "temperature": 0.5}, + ) + response = component.run(chat_messages, generation_kwargs={"temperature": 0.9}) + + # Check that the component calls the DSPy module with the correct parameters + _, kwargs = mock_dspy_module.call_args + assert kwargs["config"] == {"temperature": 0.9} + + # Check that the component returns the correct response + assert isinstance(response, dict) + assert "replies" in response + assert isinstance(response["replies"], list) + assert len(response["replies"]) == 1 + assert all(isinstance(reply, ChatMessage) for reply in response["replies"]) + + def test_run_with_multiple_messages(self, mock_dspy_module): + component = DSPyChatGenerator( + signature="question -> answer", + api_key=Secret.from_token("test-api-key"), + ) + messages = [ + ChatMessage.from_user("Hello"), + ChatMessage.from_assistant("Hi there!"), + ChatMessage.from_user("What is the capital of Germany?"), + ] + response = component.run(messages=messages) + + # Verify the last user message was used as input + args, _ = mock_dspy_module.call_args + # The first positional kwarg should be the question from the last user message + call_kwargs = mock_dspy_module.call_args.kwargs + assert call_kwargs.get("question") == "What is the capital of Germany?" + + assert "replies" in response + assert len(response["replies"]) == 1 + assert isinstance(response["replies"][0], ChatMessage) + + def test_run_with_empty_messages(self, mock_dspy_module): + component = DSPyChatGenerator( + signature="question -> answer", + api_key=Secret.from_token("test-api-key"), + ) + with pytest.raises(ValueError, match="messages"): + component.run(messages=[]) + + def test_run_with_wrong_model(self, mock_dspy_module): + mock_dspy_module.side_effect = Exception("Invalid model name") + + generator = DSPyChatGenerator( + signature="question -> answer", + api_key=Secret.from_token("test-api-key"), + model="something-obviously-wrong", + ) + + with pytest.raises(Exception, match="Invalid model name"): + generator.run(messages=[ChatMessage.from_user("Whatever")]) + + @pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", + ) + @pytest.mark.integration + def test_live_run(self): + chat_messages = [ChatMessage.from_user("What's the capital of France")] + component = DSPyChatGenerator(signature="question -> answer") + results = component.run(chat_messages) + assert len(results["replies"]) == 1 + message: ChatMessage = results["replies"][0] + assert "Paris" in message.text + + metadata = results["meta"][0] + assert metadata["model"] == "openai/gpt-5-mini" + assert metadata["module_type"] == "ChainOfThought" + diff --git a/integrations/dspy/tests/test_chat_generator_async.py b/integrations/dspy/tests/test_chat_generator_async.py new file mode 100644 index 0000000000..2c803353cb --- /dev/null +++ b/integrations/dspy/tests/test_chat_generator_async.py @@ -0,0 +1,89 @@ +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from haystack.dataclasses import ChatMessage +from haystack.utils.auth import Secret + +from haystack_integrations.components.generators.dspy.chat.chat_generator import DSPyChatGenerator + + +@pytest.fixture +def mock_dspy_module(): + """ + Mock DSPy LM, configure, and module classes to avoid real API calls. + """ + with patch("dspy.LM") as mock_lm_class, \ + patch("dspy.configure"), \ + patch("dspy.ChainOfThought") as mock_cot_class, \ + patch("dspy.Predict") as mock_predict_class, \ + patch("dspy.ReAct") as mock_react_class: + mock_lm = MagicMock() + mock_lm_class.return_value = mock_lm + + mock_module = MagicMock() + mock_module.return_value = MagicMock(answer="Hello world!") + mock_module.acall = AsyncMock(return_value=MagicMock(answer="Hello world!")) + + mock_cot_class.return_value = mock_module + mock_predict_class.return_value = mock_module + mock_react_class.return_value = mock_module + + yield mock_module + + +@pytest.fixture +def chat_messages(): + return [ + ChatMessage.from_system("You are a helpful assistant"), + ChatMessage.from_user("What's the capital of France"), + ] + + +class TestDSPyChatGeneratorAsync: + @pytest.mark.asyncio + async def test_run_async(self, chat_messages, mock_dspy_module): + component = DSPyChatGenerator( + signature="question -> answer", + api_key=Secret.from_token("test-api-key"), + ) + response = await component.run_async(messages=chat_messages) + + # Verify the async mock was called + mock_dspy_module.acall.assert_called_once() + + # Check that the component returns the correct ChatMessage response + assert isinstance(response, dict) + assert "replies" in response + assert isinstance(response["replies"], list) + assert len(response["replies"]) == 1 + assert all(isinstance(reply, ChatMessage) for reply in response["replies"]) + + @pytest.mark.asyncio + async def test_run_async_with_params(self, chat_messages, mock_dspy_module): + component = DSPyChatGenerator( + signature="question -> answer", + api_key=Secret.from_token("test-api-key"), + ) + response = await component.run_async( + messages=chat_messages, + generation_kwargs={"temperature": 0.9}, + ) + + # Check that acall was called with the correct parameters + _, kwargs = mock_dspy_module.acall.call_args + assert kwargs["config"] == {"temperature": 0.9} + + # Check that the component returns the correct response + assert isinstance(response, dict) + assert "replies" in response + assert len(response["replies"]) == 1 + assert all(isinstance(reply, ChatMessage) for reply in response["replies"]) + + @pytest.mark.asyncio + async def test_run_async_with_empty_messages(self, mock_dspy_module): + component = DSPyChatGenerator( + signature="question -> answer", + api_key=Secret.from_token("test-api-key"), + ) + with pytest.raises(ValueError, match="messages"): + await component.run_async(messages=[]) diff --git a/integrations/dspy/tests/test_generator.py b/integrations/dspy/tests/test_generator.py new file mode 100644 index 0000000000..d1e52ae1e3 --- /dev/null +++ b/integrations/dspy/tests/test_generator.py @@ -0,0 +1,274 @@ +import os +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from haystack.utils.auth import Secret + +from haystack_integrations.components.generators.dspy.generator import DSPyGenerator + + +@pytest.fixture +def mock_dspy_module(): + """ + Mock DSPy LM, configure, and module classes to avoid real API calls. + """ + with patch("dspy.LM") as mock_lm_class, \ + patch("dspy.configure"), \ + patch("dspy.ChainOfThought") as mock_cot_class, \ + patch("dspy.Predict") as mock_predict_class, \ + patch("dspy.ReAct") as mock_react_class: + mock_lm = MagicMock() + mock_lm_class.return_value = mock_lm + + mock_module = MagicMock() + mock_module.return_value = MagicMock(answer="Hello world!") + mock_module.acall = AsyncMock(return_value=MagicMock(answer="Hello world!")) + + mock_cot_class.return_value = mock_module + mock_predict_class.return_value = mock_module + mock_react_class.return_value = mock_module + + yield mock_module + + +class TestDSPyGenerator: + def test_init_default(self, monkeypatch, mock_dspy_module): + monkeypatch.setenv("OPENAI_API_KEY", "test-api-key") + component = DSPyGenerator(signature="question -> answer") + assert component.model == "openai/gpt-5-mini" + assert component.signature == "question -> answer" + assert component.module_type == "ChainOfThought" + assert component.output_field == "answer" + assert component.streaming_callback is None + assert not component.generation_kwargs + assert component.input_mapping is None + + def test_init_fail_wo_api_key(self, monkeypatch): + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + with pytest.raises(ValueError, match=r"None of the .* environment variables are set"): + DSPyGenerator(signature="question -> answer") + + def test_init_with_parameters(self, mock_dspy_module): + component = DSPyGenerator( + signature="context, question -> answer", + model="openai/gpt-5-mini", + api_key=Secret.from_token("test-api-key"), + module_type="Predict", + output_field="response", + generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"}, + input_mapping={"context": "context", "question": "question"}, + ) + assert component.model == "openai/gpt-5-mini" + assert component.signature == "context, question -> answer" + assert component.module_type == "Predict" + assert component.output_field == "response" + assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"} + assert component.input_mapping == {"context": "context", "question": "question"} + + def test_init_with_invalid_module_type(self, mock_dspy_module): + with pytest.raises(ValueError, match="Invalid module_type"): + DSPyGenerator( + signature="question -> answer", + api_key=Secret.from_token("test-api-key"), + module_type="InvalidModule", + ) + + def test_to_dict_default(self, monkeypatch, mock_dspy_module): + monkeypatch.setenv("OPENAI_API_KEY", "test-api-key") + component = DSPyGenerator( + signature="question -> answer", + api_key=Secret.from_env_var("OPENAI_API_KEY"), + ) + data = component.to_dict() + assert data == { + "type": "haystack_integrations.components.generators.dspy.generator.DSPyGenerator", + "init_parameters": { + "api_key": {"env_vars": ["OPENAI_API_KEY"], "strict": True, "type": "env_var"}, + "signature": "question -> answer", + "model": "openai/gpt-5-mini", + "module_type": "ChainOfThought", + "output_field": "answer", + "generation_kwargs": {}, + "input_mapping": None, + }, + } + + def test_to_dict_with_parameters(self, monkeypatch, mock_dspy_module): + monkeypatch.setenv("OPENAI_API_KEY", "test-api-key") + component = DSPyGenerator( + signature="context, question -> answer", + model="openai/gpt-5-mini", + api_key=Secret.from_env_var("OPENAI_API_KEY"), + module_type="Predict", + output_field="response", + generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"}, + input_mapping={"context": "context", "question": "question"}, + ) + data = component.to_dict() + assert data == { + "type": "haystack_integrations.components.generators.dspy.generator.DSPyGenerator", + "init_parameters": { + "api_key": {"env_vars": ["OPENAI_API_KEY"], "strict": True, "type": "env_var"}, + "signature": "context, question -> answer", + "model": "openai/gpt-5-mini", + "module_type": "Predict", + "output_field": "response", + "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"}, + "input_mapping": {"context": "context", "question": "question"}, + }, + } + + def test_from_dict(self, monkeypatch, mock_dspy_module): + monkeypatch.setenv("OPENAI_API_KEY", "fake-api-key") + data = { + "type": "haystack_integrations.components.generators.dspy.generator.DSPyGenerator", + "init_parameters": { + "api_key": {"env_vars": ["OPENAI_API_KEY"], "strict": True, "type": "env_var"}, + "signature": "question -> answer", + "model": "openai/gpt-5-mini", + "module_type": "Predict", + "output_field": "response", + "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"}, + "input_mapping": None, + }, + } + component = DSPyGenerator.from_dict(data) + assert component.model == "openai/gpt-5-mini" + assert component.signature == "question -> answer" + assert component.module_type == "Predict" + assert component.output_field == "response" + assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"} + assert component.api_key == Secret.from_env_var("OPENAI_API_KEY") + assert component.input_mapping is None + + def test_from_dict_fail_wo_env_var(self, monkeypatch): + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + data = { + "type": "haystack_integrations.components.generators.dspy.generator.DSPyGenerator", + "init_parameters": { + "api_key": {"env_vars": ["OPENAI_API_KEY"], "strict": True, "type": "env_var"}, + "signature": "question -> answer", + "model": "openai/gpt-5-mini", + "module_type": "Predict", + "output_field": "response", + "generation_kwargs": {}, + "input_mapping": None, + }, + } + with pytest.raises(ValueError, match=r"None of the .* environment variables are set"): + DSPyGenerator.from_dict(data) + + def test_run(self, mock_dspy_module): + component = DSPyGenerator( + signature="question -> answer", + api_key=Secret.from_token("test-api-key"), + ) + response = component.run(prompt="What's Natural Language Processing?") + + # Verify the mock was called + mock_dspy_module.assert_called_once() + + # Check that the component returns the correct response + assert isinstance(response, dict) + assert "replies" in response + assert isinstance(response["replies"], list) + assert len(response["replies"]) == 1 + assert isinstance(response["replies"][0], str) + + def test_run_with_params(self, mock_dspy_module): + component = DSPyGenerator( + signature="question -> answer", + api_key=Secret.from_token("test-api-key"), + generation_kwargs={"max_tokens": 10, "temperature": 0.5}, + ) + response = component.run( + prompt="What's Natural Language Processing?", + generation_kwargs={"temperature": 0.9}, + ) + + # Check that the component calls the DSPy module with the correct parameters + _, kwargs = mock_dspy_module.call_args + assert kwargs["config"] == {"temperature": 0.9} + + # Check that the component returns the correct response + assert isinstance(response, dict) + assert "replies" in response + assert isinstance(response["replies"], list) + assert len(response["replies"]) == 1 + assert isinstance(response["replies"][0], str) + + def test_run_with_wrong_model(self, mock_dspy_module): + mock_dspy_module.side_effect = Exception("Invalid model name") + + generator = DSPyGenerator( + signature="question -> answer", + api_key=Secret.from_token("test-api-key"), + model="something-obviously-wrong", + ) + + with pytest.raises(Exception, match="Invalid model name"): + generator.run(prompt="Whatever") + + @pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", + ) + @pytest.mark.integration + def test_live_run(self): + component = DSPyGenerator(signature="question -> answer") + results = component.run(prompt="What's the capital of France?") + assert len(results["replies"]) == 1 + assert len(results["meta"]) == 1 + response: str = results["replies"][0] + assert "Paris" in response + + metadata = results["meta"][0] + assert metadata["model"] == "openai/gpt-5-mini" + assert metadata["module_type"] == "ChainOfThought" + + @pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", + ) + @pytest.mark.asyncio + async def test_run_async(self, mock_dspy_module): + component = DSPyGenerator( + signature="question -> answer", + api_key=Secret.from_token("test-api-key"), + ) + response = await component.run_async(prompt="What's Natural Language Processing?") + + # Verify the async mock was called + mock_dspy_module.acall.assert_called_once() + + # Check that the component returns the correct response + assert isinstance(response, dict) + assert "replies" in response + assert isinstance(response["replies"], list) + assert len(response["replies"]) == 1 + assert isinstance(response["replies"][0], str) + + @pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", + ) + @pytest.mark.asyncio + async def test_run_async_with_params(self, mock_dspy_module): + component = DSPyGenerator( + signature="question -> answer", + api_key=Secret.from_token("test-api-key"), + ) + response = await component.run_async( + prompt="What's Natural Language Processing?", + generation_kwargs={"temperature": 0.9}, + ) + + # Check that acall was called with the correct parameters + _, kwargs = mock_dspy_module.acall.call_args + assert kwargs["config"] == {"temperature": 0.9} + + # Check that the component returns the correct response + assert isinstance(response, dict) + assert "replies" in response + assert len(response["replies"]) == 1 + assert isinstance(response["replies"][0], str) From 27fe361a6564ab075481322c4313abf3f9f74543 Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Mon, 23 Feb 2026 14:37:14 +0100 Subject: [PATCH 03/28] Remove generator, tests, and updates as discussed --- integrations/dspy/README.md | 65 +++++ .../dspy/examples/chat_generator_example.py | 4 +- integrations/dspy/pyproject.toml | 3 +- .../components/generators/dspy/__init__.py | 4 +- .../generators/dspy/chat/__init__.py | 3 + .../generators/dspy/chat/chat_generator.py | 169 +++++++++-- .../components/generators/dspy/generator.py | 213 -------------- .../dspy/tests/test_chat_generator.py | 126 ++++++-- .../dspy/tests/test_chat_generator_async.py | 16 +- integrations/dspy/tests/test_generator.py | 274 ------------------ 10 files changed, 325 insertions(+), 552 deletions(-) create mode 100644 integrations/dspy/README.md delete mode 100644 integrations/dspy/src/haystack_integrations/components/generators/dspy/generator.py delete mode 100644 integrations/dspy/tests/test_generator.py diff --git a/integrations/dspy/README.md b/integrations/dspy/README.md new file mode 100644 index 0000000000..27fc2675cd --- /dev/null +++ b/integrations/dspy/README.md @@ -0,0 +1,65 @@ +# dspy-haystack + +[![PyPI - Version](https://img.shields.io/pypi/v/dspy-haystack.svg)](https://pypi.org/project/dspy-haystack) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/dspy-haystack.svg)](https://pypi.org/project/dspy-haystack) + +An integration between [DSPy](https://github.com/stanfordnlp/dspy) and [Haystack](https://haystack.deepset.ai/). + +DSPy is a framework for algorithmically optimizing prompts for Language Models by applying classical machine learning concepts (training data, evaluation metrics, optimization). + +This integration provides: +- **DSPyChatGenerator** — a Haystack ChatGenerator component that uses DSPy signatures and modules for structured generation + +## Installation + +```bash +pip install dspy-haystack +``` + +## Quick Start + +### DSPyChatGenerator + +A Haystack chat generator that uses DSPy signatures for structured generation with built-in reasoning patterns (Chain-of-Thought, Predict, ReAct). + +```python +from haystack import Pipeline +from haystack.dataclasses import ChatMessage +from haystack_integrations.components.generators.dspy import DSPyChatGenerator +import dspy + +# Define a DSPy signature +class QASignature(dspy.Signature): + """Answer questions accurately and concisely.""" + question = dspy.InputField(desc="The user's question") + answer = dspy.OutputField(desc="A clear, concise answer") + +# Create the generator +generator = DSPyChatGenerator( + model="openai/gpt-5-mini", + signature=QASignature, + module_type="ChainOfThought" +) + +# Use in pipeline +pipeline = Pipeline() +pipeline.add_component("llm", generator) + +messages = [ChatMessage.from_user("What is the capital of France?")] +result = pipeline.run({"llm": {"messages": messages}}) +print(result["llm"]["replies"][0].text) +``` + +You can also use string signatures for quick prototyping: + +```python +generator = DSPyChatGenerator( + model="openai/gpt-5-mini", + signature="question -> answer", + module_type="Predict" +) +``` + +## License + +`dspy-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. diff --git a/integrations/dspy/examples/chat_generator_example.py b/integrations/dspy/examples/chat_generator_example.py index d815916ab5..1af6d0ca1f 100644 --- a/integrations/dspy/examples/chat_generator_example.py +++ b/integrations/dspy/examples/chat_generator_example.py @@ -15,7 +15,7 @@ def basic_qa_example(): """Simple question-answering with Chain-of-Thought reasoning.""" generator = DSPyChatGenerator( - model="openai/gpt-4o-mini", + model="openai/gpt-5-mini", signature=QASignature, module_type="ChainOfThought", output_field="answer", @@ -34,7 +34,7 @@ def basic_qa_example(): def string_signature_example(): """Using a simple string signature instead of a class.""" generator = DSPyChatGenerator( - model="openai/gpt-4o-mini", + model="openai/gpt-5-mini", signature="question -> answer", module_type="Predict", output_field="answer", diff --git a/integrations/dspy/pyproject.toml b/integrations/dspy/pyproject.toml index fa89baae4f..92e94af522 100644 --- a/integrations/dspy/pyproject.toml +++ b/integrations/dspy/pyproject.toml @@ -65,8 +65,7 @@ integration = 'pytest -m "integration" {args:tests}' all = 'pytest {args:tests}' cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x {args:tests}' -types = """mypy -p haystack_integrations.components.generators.dspy \ --p haystack_integrations.utils.dspy {args}""" +types = "mypy -p haystack_integrations.components.generators.dspy {args}" [tool.mypy] install_types = true diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/__init__.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/__init__.py index 68317e34e6..5418b2abf2 100644 --- a/integrations/dspy/src/haystack_integrations/components/generators/dspy/__init__.py +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/__init__.py @@ -1,5 +1,3 @@ from haystack_integrations.components.generators.dspy.chat.chat_generator import DSPyChatGenerator -from haystack_integrations.components.generators.dspy.generator import DSPyGenerator -from haystack_integrations.components.generators.dspy.program_runner import DSPyProgramRunner -__all__ = ["DSPyChatGenerator", "DSPyGenerator", "DSPyProgramRunner"] +__all__ = ["DSPyChatGenerator"] diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/__init__.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/__init__.py index e69de29bb2..5418b2abf2 100644 --- a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/__init__.py +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/__init__.py @@ -0,0 +1,3 @@ +from haystack_integrations.components.generators.dspy.chat.chat_generator import DSPyChatGenerator + +__all__ = ["DSPyChatGenerator"] diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py index affce09ef7..7c590e6122 100644 --- a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py @@ -1,15 +1,48 @@ from typing import Any, Callable, Dict, List, Optional, Type, Union import dspy -from haystack import component +from haystack import component, default_from_dict, default_to_dict from haystack.dataclasses import ChatMessage, ChatRole -from haystack.utils import Secret +from haystack.utils import Secret, deserialize_secrets_inplace -from haystack_integrations.components.generators.dspy.generator import DSPyGenerator +VALID_MODULE_TYPES = {"Predict", "ChainOfThought", "ReAct"} + + +def configure_dspy_lm(model: str, api_key: str, **kwargs: Any) -> dspy.LM: + """ + Create and configure a DSPy language model. + + :param model: Model identifier (e.g. ``"openai/gpt-5-mini"``). + :param api_key: Resolved API key string. + :param kwargs: Additional keyword arguments passed to ``dspy.LM``. + :returns: The configured ``dspy.LM`` instance. + """ + lm = dspy.LM(model=model, api_key=api_key, **kwargs) + dspy.configure(lm=lm) + return lm + + +def get_dspy_module_class(module_type: str): + """ + Map a module type string to the corresponding DSPy module class. + + :param module_type: One of ``"Predict"``, ``"ChainOfThought"``, or ``"ReAct"``. + :returns: The DSPy module class. + :raises ValueError: If the module type is not recognized. + """ + mapping = { + "Predict": dspy.Predict, + "ChainOfThought": dspy.ChainOfThought, + "ReAct": dspy.ReAct, + } + if module_type not in mapping: + msg = f"Invalid module_type '{module_type}'. Must be one of {sorted(VALID_MODULE_TYPES)}" + raise ValueError(msg) + return mapping[module_type] @component -class DSPyChatGenerator(DSPyGenerator): +class DSPyChatGenerator: """ A Haystack chat generator component that uses DSPy signatures and modules for structured generation. @@ -64,18 +97,94 @@ def __init__( :param input_mapping: Maps DSPy signature input field names to run kwarg names. :param streaming_callback: Callback for streaming responses. """ - DSPyGenerator.__init__( - self, - signature=signature, - model=model, - api_key=api_key, - module_type=module_type, - output_field=output_field, - generation_kwargs=generation_kwargs, - input_mapping=input_mapping, - streaming_callback=streaming_callback, + if module_type not in VALID_MODULE_TYPES: + msg = f"Invalid module_type '{module_type}'. Must be one of {sorted(VALID_MODULE_TYPES)}" + raise ValueError(msg) + + self.signature = signature + self.model = model + self.api_key = api_key + self.module_type = module_type + self.output_field = output_field + self.generation_kwargs = generation_kwargs or {} + self.input_mapping = input_mapping + self.streaming_callback = streaming_callback + + self._lm = configure_dspy_lm( + model=self.model, + api_key=self.api_key.resolve_value(), + **self.generation_kwargs, ) + module_class = get_dspy_module_class(self.module_type) + self._module = module_class(self.signature) + + def _build_dspy_inputs(self, prompt: str, **kwargs) -> Dict[str, Any]: + """Build the input dict for the DSPy module call.""" + if self.input_mapping: + dspy_inputs = {} + for sig_field, source in self.input_mapping.items(): + if source in kwargs: + dspy_inputs[sig_field] = kwargs[source] + else: + dspy_inputs[sig_field] = prompt + return dspy_inputs + + input_fields = self._get_input_field_names() + dspy_inputs = {input_fields[0]: prompt} + + for field in input_fields[1:]: + if field in kwargs: + dspy_inputs[field] = kwargs[field] + + return dspy_inputs + + def _get_input_field_names(self) -> List[str]: + """Get input field names from the signature.""" + if isinstance(self.signature, str): + input_part = self.signature.split("->")[0].strip() + return [f.strip() for f in input_part.split(",")] + return list(self.signature.input_fields.keys()) + + @staticmethod + def _extract_last_user_message(messages: List[ChatMessage]) -> str: + """Extract the text of the last user message from a list of chat messages.""" + for msg in reversed(messages): + if msg.role == ChatRole.USER: + return msg.text + return messages[-1].text + + def _signature_to_string(self) -> str: + """Convert the signature to a string representation for serialization.""" + if isinstance(self.signature, str): + return self.signature + input_names = list(self.signature.input_fields.keys()) + output_names = list(self.signature.output_fields.keys()) + return ", ".join(input_names) + " -> " + ", ".join(output_names) + + def to_dict(self) -> Dict[str, Any]: + """Serialize this component to a dictionary.""" + kwargs: Dict[str, Any] = { + "signature": self._signature_to_string(), + "model": self.model, + "module_type": self.module_type, + "output_field": self.output_field, + "generation_kwargs": self.generation_kwargs, + "input_mapping": self.input_mapping, + } + try: + kwargs["api_key"] = self.api_key.to_dict() + except ValueError: + pass + return default_to_dict(self, **kwargs) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "DSPyChatGenerator": + """Deserialize a component from a dictionary.""" + init_params = data.get("init_parameters", {}) + deserialize_secrets_inplace(init_params, ["api_key"]) + return default_from_dict(cls, data) + @component.output_types(replies=List[ChatMessage]) def run( self, @@ -96,11 +205,17 @@ def run( raise ValueError(msg) prompt = self._extract_last_user_message(messages) - result = DSPyGenerator.run(self, prompt=prompt, generation_kwargs=generation_kwargs, **kwargs) + dspy_inputs = self._build_dspy_inputs(prompt, **kwargs) - replies = [ChatMessage.from_assistant(text=text) for text in result["replies"]] + if generation_kwargs: + prediction = self._module(**dspy_inputs, config=generation_kwargs) + else: + prediction = self._module(**dspy_inputs) - return {"replies": replies, "meta": result["meta"]} + output_text = getattr(prediction, self.output_field, str(prediction)) + + replies = [ChatMessage.from_assistant(text=output_text)] + return {"replies": replies} @component.output_types(replies=List[ChatMessage]) async def run_async( @@ -124,18 +239,14 @@ async def run_async( raise ValueError(msg) prompt = self._extract_last_user_message(messages) - result = await DSPyGenerator.run_async(self, prompt=prompt, generation_kwargs=generation_kwargs, **kwargs) + dspy_inputs = self._build_dspy_inputs(prompt, **kwargs) - replies = [ChatMessage.from_assistant(text=text) for text in result["replies"]] + if generation_kwargs: + prediction = await self._module.acall(**dspy_inputs, config=generation_kwargs) + else: + prediction = await self._module.acall(**dspy_inputs) - return {"replies": replies, "meta": result["meta"]} + output_text = getattr(prediction, self.output_field, str(prediction)) - @staticmethod - def _extract_last_user_message(messages: List[ChatMessage]) -> str: - """Extract the text of the last user message from a list of chat messages.""" - for msg in reversed(messages): - if msg.role == ChatRole.USER: - return msg.text - - # Fallback to last message if no user message found - return messages[-1].text + replies = [ChatMessage.from_assistant(text=output_text)] + return {"replies": replies} diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/generator.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/generator.py deleted file mode 100644 index 21047843cf..0000000000 --- a/integrations/dspy/src/haystack_integrations/components/generators/dspy/generator.py +++ /dev/null @@ -1,213 +0,0 @@ -from typing import Any, Callable, Dict, List, Optional, Type, Union - -import dspy -from haystack import component, default_from_dict, default_to_dict -from haystack.utils import Secret, deserialize_secrets_inplace - - -@component -class DSPyGenerator: - """ - A Haystack generator component that uses DSPy signatures and modules - for structured generation. - - Takes a string prompt and returns string replies. - - Usage example: - - ```python - from haystack_integrations.components.generators.dspy import DSPyGenerator - import dspy - - class QASignature(dspy.Signature): - question = dspy.InputField(desc="The user's question") - answer = dspy.OutputField(desc="A clear, concise answer") - - generator = DSPyGenerator( - model="openai/gpt-5-mini", - signature=QASignature, - module_type="ChainOfThought", - ) - - result = generator.run(prompt="What is the capital of France?") - print(result["replies"][0]) - ``` - """ - - VALID_MODULE_TYPES = {"Predict", "ChainOfThought", "ReAct"} - - def __init__( - self, - signature: Union[str, Type[dspy.Signature]], - model: str = "openai/gpt-5-mini", - api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"), - module_type: str = "ChainOfThought", - output_field: str = "answer", - generation_kwargs: Optional[Dict[str, Any]] = None, - input_mapping: Optional[Dict[str, str]] = None, - streaming_callback: Optional[Callable] = None, - ): - """ - Initialize the DSPyGenerator. - - :param signature: DSPy signature defining I/O structure. Can be a string - like ``"question -> answer"`` or a ``dspy.Signature`` subclass. - :param model: Model identifier (e.g. ``"openai/gpt-5-mini"``). - :param api_key: API key for the LLM provider. - :param module_type: DSPy module type: ``"Predict"``, ``"ChainOfThought"``, or ``"ReAct"``. - :param output_field: Which signature output field to use as the reply. - :param generation_kwargs: Additional generation parameters (temperature, max_tokens, etc.). - :param input_mapping: Maps DSPy signature input field names to run kwarg names. - :param streaming_callback: Callback for streaming responses. - """ - if module_type not in self.VALID_MODULE_TYPES: - msg = f"Invalid module_type '{module_type}'. Must be one of {sorted(self.VALID_MODULE_TYPES)}" - raise ValueError(msg) - - self.signature = signature - self.model = model - self.api_key = api_key - self.module_type = module_type - self.output_field = output_field - self.generation_kwargs = generation_kwargs or {} - self.input_mapping = input_mapping - self.streaming_callback = streaming_callback - - self._lm = dspy.LM( - model=self.model, - api_key=self.api_key.resolve_value(), - **self.generation_kwargs, - ) - dspy.configure(lm=self._lm) - - module_class = self._get_module_class(self.module_type) - self._module = module_class(self.signature) - - @component.output_types(replies=List[str]) - def run( - self, - prompt: str, - generation_kwargs: Optional[Dict[str, Any]] = None, - **kwargs, - ) -> Dict[str, Any]: - """ - Run the DSPy module on the given prompt. - - :param prompt: The input prompt string. - :param generation_kwargs: Optional runtime generation parameters that override - the defaults for this call only. - :param kwargs: Additional keyword arguments mapped to signature input fields. - :returns: A dictionary with ``replies`` (list of strings) and ``meta`` (list of dicts). - """ - dspy_inputs = self._build_dspy_inputs(prompt, **kwargs) - - if generation_kwargs: - prediction = self._module(**dspy_inputs, config=generation_kwargs) - else: - prediction = self._module(**dspy_inputs) - - output_text = getattr(prediction, self.output_field, str(prediction)) - meta = {"model": self.model, "module_type": self.module_type} - - return {"replies": [output_text], "meta": [meta]} - - @component.output_types(replies=List[str]) - async def run_async( - self, - prompt: str, - generation_kwargs: Optional[Dict[str, Any]] = None, - **kwargs, - ) -> Dict[str, Any]: - """ - Asynchronously run the DSPy module on the given prompt. - - Uses DSPy's native ``acall`` for true async I/O. - - :param prompt: The input prompt string. - :param generation_kwargs: Optional runtime generation parameters that override - the defaults for this call only. - :param kwargs: Additional keyword arguments mapped to signature input fields. - :returns: A dictionary with ``replies`` (list of strings) and ``meta`` (list of dicts). - """ - dspy_inputs = self._build_dspy_inputs(prompt, **kwargs) - - if generation_kwargs: - prediction = await self._module.acall(**dspy_inputs, config=generation_kwargs) - else: - prediction = await self._module.acall(**dspy_inputs) - - output_text = getattr(prediction, self.output_field, str(prediction)) - meta = {"model": self.model, "module_type": self.module_type} - - return {"replies": [output_text], "meta": [meta]} - - def _build_dspy_inputs(self, prompt: str, **kwargs) -> Dict[str, Any]: - """Build the input dict for the DSPy module call.""" - if self.input_mapping: - dspy_inputs = {} - for sig_field, source in self.input_mapping.items(): - if source in kwargs: - dspy_inputs[sig_field] = kwargs[source] - else: - dspy_inputs[sig_field] = prompt - return dspy_inputs - - # Default: map prompt to the first input field - input_fields = self._get_input_field_names() - dspy_inputs = {input_fields[0]: prompt} - - # Pass any additional kwargs that match remaining input fields - for field in input_fields[1:]: - if field in kwargs: - dspy_inputs[field] = kwargs[field] - - return dspy_inputs - - def _get_input_field_names(self) -> List[str]: - """Get input field names from the signature.""" - if isinstance(self.signature, str): - input_part = self.signature.split("->")[0].strip() - return [f.strip() for f in input_part.split(",")] - return list(self.signature.input_fields.keys()) - - @staticmethod - def _get_module_class(module_type: str): - """Map a module type string to the corresponding DSPy module class.""" - mapping = { - "Predict": dspy.Predict, - "ChainOfThought": dspy.ChainOfThought, - "ReAct": dspy.ReAct, - } - return mapping[module_type] - - def _signature_to_string(self) -> str: - """Convert the signature to a string representation for serialization.""" - if isinstance(self.signature, str): - return self.signature - input_names = list(self.signature.input_fields.keys()) - output_names = list(self.signature.output_fields.keys()) - return ", ".join(input_names) + " -> " + ", ".join(output_names) - - def to_dict(self) -> Dict[str, Any]: - """Serialize this component to a dictionary.""" - kwargs: Dict[str, Any] = { - "signature": self._signature_to_string(), - "model": self.model, - "module_type": self.module_type, - "output_field": self.output_field, - "generation_kwargs": self.generation_kwargs, - "input_mapping": self.input_mapping, - } - try: - kwargs["api_key"] = self.api_key.to_dict() - except ValueError: - # Token-based secrets cannot be serialized; omit them. - pass - return default_to_dict(self, **kwargs) - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "DSPyGenerator": - """Deserialize a component from a dictionary.""" - init_params = data.get("init_parameters", {}) - deserialize_secrets_inplace(init_params, ["api_key"]) - return default_from_dict(cls, data) diff --git a/integrations/dspy/tests/test_chat_generator.py b/integrations/dspy/tests/test_chat_generator.py index 2c4a15bfd5..033eb3ef5f 100644 --- a/integrations/dspy/tests/test_chat_generator.py +++ b/integrations/dspy/tests/test_chat_generator.py @@ -1,11 +1,17 @@ import os from unittest.mock import MagicMock, patch +import dspy import pytest from haystack.dataclasses import ChatMessage from haystack.utils.auth import Secret -from haystack_integrations.components.generators.dspy.chat.chat_generator import DSPyChatGenerator +from haystack_integrations.components.generators.dspy.chat.chat_generator import ( + VALID_MODULE_TYPES, + DSPyChatGenerator, + configure_dspy_lm, + get_dspy_module_class, +) @pytest.fixture @@ -13,11 +19,13 @@ def mock_dspy_module(): """ Mock DSPy LM, configure, and module classes to avoid real API calls. """ - with patch("dspy.LM") as mock_lm_class, \ - patch("dspy.configure"), \ - patch("dspy.ChainOfThought") as mock_cot_class, \ - patch("dspy.Predict") as mock_predict_class, \ - patch("dspy.ReAct") as mock_react_class: + with ( + patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.LM") as mock_lm_class, + patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.configure"), + patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.ChainOfThought") as mock_cot_class, + patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.Predict") as mock_predict_class, + patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.ReAct") as mock_react_class, + ): mock_lm = MagicMock() mock_lm_class.return_value = mock_lm @@ -38,6 +46,65 @@ def chat_messages(): ] +@pytest.fixture +def sample_qa_signature(): + class QASignature(dspy.Signature): + question: str = dspy.InputField() + answer: str = dspy.OutputField() + + return QASignature + + +class TestValidModuleTypes: + def test_contains_expected_types(self): + assert VALID_MODULE_TYPES == {"Predict", "ChainOfThought", "ReAct"} + + +class TestGetDspyModuleClass: + def test_predict(self): + assert get_dspy_module_class("Predict") is dspy.Predict + + def test_chain_of_thought(self): + assert get_dspy_module_class("ChainOfThought") is dspy.ChainOfThought + + def test_react(self): + assert get_dspy_module_class("ReAct") is dspy.ReAct + + def test_invalid_type_raises(self): + with pytest.raises(ValueError, match="Invalid module_type 'Unknown'"): + get_dspy_module_class("Unknown") + + def test_invalid_type_lists_valid_options(self): + with pytest.raises(ValueError, match="ChainOfThought"): + get_dspy_module_class("BadType") + + +class TestConfigureDspyLm: + @patch("dspy.configure") + @patch("dspy.LM") + def test_creates_lm_and_configures(self, mock_lm_class, mock_configure): + mock_lm = MagicMock() + mock_lm_class.return_value = mock_lm + + result = configure_dspy_lm(model="openai/gpt-5-mini", api_key="test-key") + + mock_lm_class.assert_called_once_with(model="openai/gpt-5-mini", api_key="test-key") + mock_configure.assert_called_once_with(lm=mock_lm) + assert result is mock_lm + + @patch("dspy.configure") + @patch("dspy.LM") + def test_passes_extra_kwargs(self, mock_lm_class, mock_configure): + mock_lm = MagicMock() + mock_lm_class.return_value = mock_lm + + configure_dspy_lm(model="openai/gpt-5-mini", api_key="test-key", temperature=0.7, max_tokens=100) + + mock_lm_class.assert_called_once_with( + model="openai/gpt-5-mini", api_key="test-key", temperature=0.7, max_tokens=100 + ) + + class TestDSPyChatGenerator: def test_init_default(self, monkeypatch, mock_dspy_module): monkeypatch.setenv("OPENAI_API_KEY", "test-api-key") @@ -80,6 +147,13 @@ def test_init_with_invalid_module_type(self, mock_dspy_module): module_type="InvalidModule", ) + def test_init_with_signature_class(self, mock_dspy_module, sample_qa_signature): + component = DSPyChatGenerator( + signature=sample_qa_signature, + api_key=Secret.from_token("test-api-key"), + ) + assert component.signature is sample_qa_signature + def test_to_dict_default(self, monkeypatch, mock_dspy_module): monkeypatch.setenv("OPENAI_API_KEY", "test-api-key") component = DSPyChatGenerator( @@ -172,17 +246,15 @@ def test_run(self, chat_messages, mock_dspy_module): ) response = component.run(chat_messages) - # Verify the mock was called mock_dspy_module.assert_called_once() - # Check that the component returns the correct ChatMessage response assert isinstance(response, dict) assert "replies" in response assert isinstance(response["replies"], list) assert len(response["replies"]) == 1 assert all(isinstance(reply, ChatMessage) for reply in response["replies"]) - def test_run_with_params(self, chat_messages, mock_dspy_module): + def test_run_with_generation_kwargs(self, chat_messages, mock_dspy_module): component = DSPyChatGenerator( signature="question -> answer", api_key=Secret.from_token("test-api-key"), @@ -190,14 +262,11 @@ def test_run_with_params(self, chat_messages, mock_dspy_module): ) response = component.run(chat_messages, generation_kwargs={"temperature": 0.9}) - # Check that the component calls the DSPy module with the correct parameters _, kwargs = mock_dspy_module.call_args assert kwargs["config"] == {"temperature": 0.9} - # Check that the component returns the correct response assert isinstance(response, dict) assert "replies" in response - assert isinstance(response["replies"], list) assert len(response["replies"]) == 1 assert all(isinstance(reply, ChatMessage) for reply in response["replies"]) @@ -213,9 +282,6 @@ def test_run_with_multiple_messages(self, mock_dspy_module): ] response = component.run(messages=messages) - # Verify the last user message was used as input - args, _ = mock_dspy_module.call_args - # The first positional kwarg should be the question from the last user message call_kwargs = mock_dspy_module.call_args.kwargs assert call_kwargs.get("question") == "What is the capital of Germany?" @@ -231,6 +297,31 @@ def test_run_with_empty_messages(self, mock_dspy_module): with pytest.raises(ValueError, match="messages"): component.run(messages=[]) + def test_run_with_custom_output_field(self, mock_dspy_module): + mock_dspy_module.return_value = MagicMock(summary="This is a summary") + component = DSPyChatGenerator( + signature="text -> summary", + api_key=Secret.from_token("test-api-key"), + output_field="summary", + ) + messages = [ChatMessage.from_user("Summarize this text")] + response = component.run(messages=messages) + + assert response["replies"][0].text == "This is a summary" + + def test_run_with_input_mapping(self, mock_dspy_module): + component = DSPyChatGenerator( + signature="context, question -> answer", + api_key=Secret.from_token("test-api-key"), + input_mapping={"context": "context", "question": "question"}, + ) + messages = [ChatMessage.from_user("What is ML?")] + response = component.run(messages=messages, context="Machine learning is a subset of AI.") + + call_kwargs = mock_dspy_module.call_args.kwargs + assert call_kwargs.get("context") == "Machine learning is a subset of AI." + assert call_kwargs.get("question") == "What is ML?" + def test_run_with_wrong_model(self, mock_dspy_module): mock_dspy_module.side_effect = Exception("Invalid model name") @@ -255,8 +346,3 @@ def test_live_run(self): assert len(results["replies"]) == 1 message: ChatMessage = results["replies"][0] assert "Paris" in message.text - - metadata = results["meta"][0] - assert metadata["model"] == "openai/gpt-5-mini" - assert metadata["module_type"] == "ChainOfThought" - diff --git a/integrations/dspy/tests/test_chat_generator_async.py b/integrations/dspy/tests/test_chat_generator_async.py index 2c803353cb..87a8931c87 100644 --- a/integrations/dspy/tests/test_chat_generator_async.py +++ b/integrations/dspy/tests/test_chat_generator_async.py @@ -12,11 +12,13 @@ def mock_dspy_module(): """ Mock DSPy LM, configure, and module classes to avoid real API calls. """ - with patch("dspy.LM") as mock_lm_class, \ - patch("dspy.configure"), \ - patch("dspy.ChainOfThought") as mock_cot_class, \ - patch("dspy.Predict") as mock_predict_class, \ - patch("dspy.ReAct") as mock_react_class: + with ( + patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.LM") as mock_lm_class, + patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.configure"), + patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.ChainOfThought") as mock_cot_class, + patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.Predict") as mock_predict_class, + patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.ReAct") as mock_react_class, + ): mock_lm = MagicMock() mock_lm_class.return_value = mock_lm @@ -48,10 +50,8 @@ async def test_run_async(self, chat_messages, mock_dspy_module): ) response = await component.run_async(messages=chat_messages) - # Verify the async mock was called mock_dspy_module.acall.assert_called_once() - # Check that the component returns the correct ChatMessage response assert isinstance(response, dict) assert "replies" in response assert isinstance(response["replies"], list) @@ -69,11 +69,9 @@ async def test_run_async_with_params(self, chat_messages, mock_dspy_module): generation_kwargs={"temperature": 0.9}, ) - # Check that acall was called with the correct parameters _, kwargs = mock_dspy_module.acall.call_args assert kwargs["config"] == {"temperature": 0.9} - # Check that the component returns the correct response assert isinstance(response, dict) assert "replies" in response assert len(response["replies"]) == 1 diff --git a/integrations/dspy/tests/test_generator.py b/integrations/dspy/tests/test_generator.py deleted file mode 100644 index d1e52ae1e3..0000000000 --- a/integrations/dspy/tests/test_generator.py +++ /dev/null @@ -1,274 +0,0 @@ -import os -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest -from haystack.utils.auth import Secret - -from haystack_integrations.components.generators.dspy.generator import DSPyGenerator - - -@pytest.fixture -def mock_dspy_module(): - """ - Mock DSPy LM, configure, and module classes to avoid real API calls. - """ - with patch("dspy.LM") as mock_lm_class, \ - patch("dspy.configure"), \ - patch("dspy.ChainOfThought") as mock_cot_class, \ - patch("dspy.Predict") as mock_predict_class, \ - patch("dspy.ReAct") as mock_react_class: - mock_lm = MagicMock() - mock_lm_class.return_value = mock_lm - - mock_module = MagicMock() - mock_module.return_value = MagicMock(answer="Hello world!") - mock_module.acall = AsyncMock(return_value=MagicMock(answer="Hello world!")) - - mock_cot_class.return_value = mock_module - mock_predict_class.return_value = mock_module - mock_react_class.return_value = mock_module - - yield mock_module - - -class TestDSPyGenerator: - def test_init_default(self, monkeypatch, mock_dspy_module): - monkeypatch.setenv("OPENAI_API_KEY", "test-api-key") - component = DSPyGenerator(signature="question -> answer") - assert component.model == "openai/gpt-5-mini" - assert component.signature == "question -> answer" - assert component.module_type == "ChainOfThought" - assert component.output_field == "answer" - assert component.streaming_callback is None - assert not component.generation_kwargs - assert component.input_mapping is None - - def test_init_fail_wo_api_key(self, monkeypatch): - monkeypatch.delenv("OPENAI_API_KEY", raising=False) - with pytest.raises(ValueError, match=r"None of the .* environment variables are set"): - DSPyGenerator(signature="question -> answer") - - def test_init_with_parameters(self, mock_dspy_module): - component = DSPyGenerator( - signature="context, question -> answer", - model="openai/gpt-5-mini", - api_key=Secret.from_token("test-api-key"), - module_type="Predict", - output_field="response", - generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"}, - input_mapping={"context": "context", "question": "question"}, - ) - assert component.model == "openai/gpt-5-mini" - assert component.signature == "context, question -> answer" - assert component.module_type == "Predict" - assert component.output_field == "response" - assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"} - assert component.input_mapping == {"context": "context", "question": "question"} - - def test_init_with_invalid_module_type(self, mock_dspy_module): - with pytest.raises(ValueError, match="Invalid module_type"): - DSPyGenerator( - signature="question -> answer", - api_key=Secret.from_token("test-api-key"), - module_type="InvalidModule", - ) - - def test_to_dict_default(self, monkeypatch, mock_dspy_module): - monkeypatch.setenv("OPENAI_API_KEY", "test-api-key") - component = DSPyGenerator( - signature="question -> answer", - api_key=Secret.from_env_var("OPENAI_API_KEY"), - ) - data = component.to_dict() - assert data == { - "type": "haystack_integrations.components.generators.dspy.generator.DSPyGenerator", - "init_parameters": { - "api_key": {"env_vars": ["OPENAI_API_KEY"], "strict": True, "type": "env_var"}, - "signature": "question -> answer", - "model": "openai/gpt-5-mini", - "module_type": "ChainOfThought", - "output_field": "answer", - "generation_kwargs": {}, - "input_mapping": None, - }, - } - - def test_to_dict_with_parameters(self, monkeypatch, mock_dspy_module): - monkeypatch.setenv("OPENAI_API_KEY", "test-api-key") - component = DSPyGenerator( - signature="context, question -> answer", - model="openai/gpt-5-mini", - api_key=Secret.from_env_var("OPENAI_API_KEY"), - module_type="Predict", - output_field="response", - generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"}, - input_mapping={"context": "context", "question": "question"}, - ) - data = component.to_dict() - assert data == { - "type": "haystack_integrations.components.generators.dspy.generator.DSPyGenerator", - "init_parameters": { - "api_key": {"env_vars": ["OPENAI_API_KEY"], "strict": True, "type": "env_var"}, - "signature": "context, question -> answer", - "model": "openai/gpt-5-mini", - "module_type": "Predict", - "output_field": "response", - "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"}, - "input_mapping": {"context": "context", "question": "question"}, - }, - } - - def test_from_dict(self, monkeypatch, mock_dspy_module): - monkeypatch.setenv("OPENAI_API_KEY", "fake-api-key") - data = { - "type": "haystack_integrations.components.generators.dspy.generator.DSPyGenerator", - "init_parameters": { - "api_key": {"env_vars": ["OPENAI_API_KEY"], "strict": True, "type": "env_var"}, - "signature": "question -> answer", - "model": "openai/gpt-5-mini", - "module_type": "Predict", - "output_field": "response", - "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"}, - "input_mapping": None, - }, - } - component = DSPyGenerator.from_dict(data) - assert component.model == "openai/gpt-5-mini" - assert component.signature == "question -> answer" - assert component.module_type == "Predict" - assert component.output_field == "response" - assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"} - assert component.api_key == Secret.from_env_var("OPENAI_API_KEY") - assert component.input_mapping is None - - def test_from_dict_fail_wo_env_var(self, monkeypatch): - monkeypatch.delenv("OPENAI_API_KEY", raising=False) - data = { - "type": "haystack_integrations.components.generators.dspy.generator.DSPyGenerator", - "init_parameters": { - "api_key": {"env_vars": ["OPENAI_API_KEY"], "strict": True, "type": "env_var"}, - "signature": "question -> answer", - "model": "openai/gpt-5-mini", - "module_type": "Predict", - "output_field": "response", - "generation_kwargs": {}, - "input_mapping": None, - }, - } - with pytest.raises(ValueError, match=r"None of the .* environment variables are set"): - DSPyGenerator.from_dict(data) - - def test_run(self, mock_dspy_module): - component = DSPyGenerator( - signature="question -> answer", - api_key=Secret.from_token("test-api-key"), - ) - response = component.run(prompt="What's Natural Language Processing?") - - # Verify the mock was called - mock_dspy_module.assert_called_once() - - # Check that the component returns the correct response - assert isinstance(response, dict) - assert "replies" in response - assert isinstance(response["replies"], list) - assert len(response["replies"]) == 1 - assert isinstance(response["replies"][0], str) - - def test_run_with_params(self, mock_dspy_module): - component = DSPyGenerator( - signature="question -> answer", - api_key=Secret.from_token("test-api-key"), - generation_kwargs={"max_tokens": 10, "temperature": 0.5}, - ) - response = component.run( - prompt="What's Natural Language Processing?", - generation_kwargs={"temperature": 0.9}, - ) - - # Check that the component calls the DSPy module with the correct parameters - _, kwargs = mock_dspy_module.call_args - assert kwargs["config"] == {"temperature": 0.9} - - # Check that the component returns the correct response - assert isinstance(response, dict) - assert "replies" in response - assert isinstance(response["replies"], list) - assert len(response["replies"]) == 1 - assert isinstance(response["replies"][0], str) - - def test_run_with_wrong_model(self, mock_dspy_module): - mock_dspy_module.side_effect = Exception("Invalid model name") - - generator = DSPyGenerator( - signature="question -> answer", - api_key=Secret.from_token("test-api-key"), - model="something-obviously-wrong", - ) - - with pytest.raises(Exception, match="Invalid model name"): - generator.run(prompt="Whatever") - - @pytest.mark.skipif( - not os.environ.get("OPENAI_API_KEY", None), - reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", - ) - @pytest.mark.integration - def test_live_run(self): - component = DSPyGenerator(signature="question -> answer") - results = component.run(prompt="What's the capital of France?") - assert len(results["replies"]) == 1 - assert len(results["meta"]) == 1 - response: str = results["replies"][0] - assert "Paris" in response - - metadata = results["meta"][0] - assert metadata["model"] == "openai/gpt-5-mini" - assert metadata["module_type"] == "ChainOfThought" - - @pytest.mark.skipif( - not os.environ.get("OPENAI_API_KEY", None), - reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", - ) - @pytest.mark.asyncio - async def test_run_async(self, mock_dspy_module): - component = DSPyGenerator( - signature="question -> answer", - api_key=Secret.from_token("test-api-key"), - ) - response = await component.run_async(prompt="What's Natural Language Processing?") - - # Verify the async mock was called - mock_dspy_module.acall.assert_called_once() - - # Check that the component returns the correct response - assert isinstance(response, dict) - assert "replies" in response - assert isinstance(response["replies"], list) - assert len(response["replies"]) == 1 - assert isinstance(response["replies"][0], str) - - @pytest.mark.skipif( - not os.environ.get("OPENAI_API_KEY", None), - reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", - ) - @pytest.mark.asyncio - async def test_run_async_with_params(self, mock_dspy_module): - component = DSPyGenerator( - signature="question -> answer", - api_key=Secret.from_token("test-api-key"), - ) - response = await component.run_async( - prompt="What's Natural Language Processing?", - generation_kwargs={"temperature": 0.9}, - ) - - # Check that acall was called with the correct parameters - _, kwargs = mock_dspy_module.acall.call_args - assert kwargs["config"] == {"temperature": 0.9} - - # Check that the component returns the correct response - assert isinstance(response, dict) - assert "replies" in response - assert len(response["replies"]) == 1 - assert isinstance(response["replies"][0], str) From a03bce308a82c788082e7f0a496f20661151a7a6 Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Mon, 23 Feb 2026 14:41:55 +0100 Subject: [PATCH 04/28] Applied formatting --- .../dspy/examples/chat_generator_example.py | 3 +- .../generators/dspy/chat/chat_generator.py | 39 ++++++++++--------- .../dspy/tests/test_chat_generator.py | 8 +++- .../dspy/tests/test_chat_generator_async.py | 8 +++- 4 files changed, 34 insertions(+), 24 deletions(-) diff --git a/integrations/dspy/examples/chat_generator_example.py b/integrations/dspy/examples/chat_generator_example.py index 1af6d0ca1f..922d5430e2 100644 --- a/integrations/dspy/examples/chat_generator_example.py +++ b/integrations/dspy/examples/chat_generator_example.py @@ -1,6 +1,7 @@ import dspy from haystack import Pipeline from haystack.dataclasses import ChatMessage + from haystack_integrations.components.generators.dspy import DSPyChatGenerator @@ -52,4 +53,4 @@ def string_signature_example(): if __name__ == "__main__": basic_qa_example() - string_signature_example() \ No newline at end of file + string_signature_example() diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py index 7c590e6122..1fe8af906a 100644 --- a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py @@ -1,4 +1,5 @@ -from typing import Any, Callable, Dict, List, Optional, Type, Union +from collections.abc import Callable +from typing import Any import dspy from haystack import component, default_from_dict, default_to_dict @@ -75,14 +76,14 @@ class QASignature(dspy.Signature): def __init__( self, - signature: Union[str, Type[dspy.Signature]], + signature: str | type[dspy.Signature], model: str = "openai/gpt-5-mini", api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"), module_type: str = "ChainOfThought", output_field: str = "answer", - generation_kwargs: Optional[Dict[str, Any]] = None, - input_mapping: Optional[Dict[str, str]] = None, - streaming_callback: Optional[Callable] = None, + generation_kwargs: dict[str, Any] | None = None, + input_mapping: dict[str, str] | None = None, + streaming_callback: Callable | None = None, ): """ Initialize the DSPyChatGenerator. @@ -119,7 +120,7 @@ def __init__( module_class = get_dspy_module_class(self.module_type) self._module = module_class(self.signature) - def _build_dspy_inputs(self, prompt: str, **kwargs) -> Dict[str, Any]: + def _build_dspy_inputs(self, prompt: str, **kwargs) -> dict[str, Any]: """Build the input dict for the DSPy module call.""" if self.input_mapping: dspy_inputs = {} @@ -139,7 +140,7 @@ def _build_dspy_inputs(self, prompt: str, **kwargs) -> Dict[str, Any]: return dspy_inputs - def _get_input_field_names(self) -> List[str]: + def _get_input_field_names(self) -> list[str]: """Get input field names from the signature.""" if isinstance(self.signature, str): input_part = self.signature.split("->")[0].strip() @@ -147,7 +148,7 @@ def _get_input_field_names(self) -> List[str]: return list(self.signature.input_fields.keys()) @staticmethod - def _extract_last_user_message(messages: List[ChatMessage]) -> str: + def _extract_last_user_message(messages: list[ChatMessage]) -> str: """Extract the text of the last user message from a list of chat messages.""" for msg in reversed(messages): if msg.role == ChatRole.USER: @@ -162,9 +163,9 @@ def _signature_to_string(self) -> str: output_names = list(self.signature.output_fields.keys()) return ", ".join(input_names) + " -> " + ", ".join(output_names) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: """Serialize this component to a dictionary.""" - kwargs: Dict[str, Any] = { + kwargs: dict[str, Any] = { "signature": self._signature_to_string(), "model": self.model, "module_type": self.module_type, @@ -179,19 +180,19 @@ def to_dict(self) -> Dict[str, Any]: return default_to_dict(self, **kwargs) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "DSPyChatGenerator": + def from_dict(cls, data: dict[str, Any]) -> "DSPyChatGenerator": """Deserialize a component from a dictionary.""" init_params = data.get("init_parameters", {}) deserialize_secrets_inplace(init_params, ["api_key"]) return default_from_dict(cls, data) - @component.output_types(replies=List[ChatMessage]) + @component.output_types(replies=list[ChatMessage]) def run( self, - messages: List[ChatMessage], - generation_kwargs: Optional[Dict[str, Any]] = None, + messages: list[ChatMessage], + generation_kwargs: dict[str, Any] | None = None, **kwargs, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """ Run the DSPy module on the given messages. @@ -217,13 +218,13 @@ def run( replies = [ChatMessage.from_assistant(text=output_text)] return {"replies": replies} - @component.output_types(replies=List[ChatMessage]) + @component.output_types(replies=list[ChatMessage]) async def run_async( self, - messages: List[ChatMessage], - generation_kwargs: Optional[Dict[str, Any]] = None, + messages: list[ChatMessage], + generation_kwargs: dict[str, Any] | None = None, **kwargs, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """ Asynchronously run the DSPy module on the given messages. diff --git a/integrations/dspy/tests/test_chat_generator.py b/integrations/dspy/tests/test_chat_generator.py index 033eb3ef5f..61d7c4cf63 100644 --- a/integrations/dspy/tests/test_chat_generator.py +++ b/integrations/dspy/tests/test_chat_generator.py @@ -22,8 +22,12 @@ def mock_dspy_module(): with ( patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.LM") as mock_lm_class, patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.configure"), - patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.ChainOfThought") as mock_cot_class, - patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.Predict") as mock_predict_class, + patch( + "haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.ChainOfThought" + ) as mock_cot_class, + patch( + "haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.Predict" + ) as mock_predict_class, patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.ReAct") as mock_react_class, ): mock_lm = MagicMock() diff --git a/integrations/dspy/tests/test_chat_generator_async.py b/integrations/dspy/tests/test_chat_generator_async.py index 87a8931c87..239059a63b 100644 --- a/integrations/dspy/tests/test_chat_generator_async.py +++ b/integrations/dspy/tests/test_chat_generator_async.py @@ -15,8 +15,12 @@ def mock_dspy_module(): with ( patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.LM") as mock_lm_class, patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.configure"), - patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.ChainOfThought") as mock_cot_class, - patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.Predict") as mock_predict_class, + patch( + "haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.ChainOfThought" + ) as mock_cot_class, + patch( + "haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.Predict" + ) as mock_predict_class, patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.ReAct") as mock_react_class, ): mock_lm = MagicMock() From a8c6ce1174d38e688c0c85b873b550cbeb2f4846 Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Mon, 23 Feb 2026 14:48:01 +0100 Subject: [PATCH 05/28] Add pydoc --- integrations/dspy/pydoc/config_docusaurus.yml | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 integrations/dspy/pydoc/config_docusaurus.yml diff --git a/integrations/dspy/pydoc/config_docusaurus.yml b/integrations/dspy/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..3bbd9aecb4 --- /dev/null +++ b/integrations/dspy/pydoc/config_docusaurus.yml @@ -0,0 +1,28 @@ +loaders: +- ignore_when_discovered: + - __init__ + modules: + - haystack_integrations.components.generators.dspy.chat.chat_generator + search_path: + - ../src + type: haystack_pydoc_tools.loaders.CustomPythonLoader +processors: +- do_not_filter_modules: false + documented_only: true + expression: null + skip_empty_modules: true + type: filter +- type: smart +- type: crossref +renderer: + description: DSPy integration for Haystack + id: integrations-dspy + markdown: + add_member_class_prefix: false + add_method_class_prefix: true + classdef_code_block: false + descriptive_class_title: false + descriptive_module_title: true + filename: dspy.md + title: DSPy + type: haystack_pydoc_tools.renderers.DocusaurusRenderer From 0d951e06297ed472a17eb59e28ee6a12faf08185 Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Tue, 24 Feb 2026 18:17:41 +0100 Subject: [PATCH 06/28] Updates after review: imports, tests, project file --- integrations/dspy/CHANGELOG.md | 13 ---- integrations/dspy/README.md | 61 +-------------- integrations/dspy/pydoc/config_docusaurus.yml | 29 ++----- integrations/dspy/pyproject.toml | 4 +- .../components/generators/dspy/__init__.py | 2 +- .../generators/dspy/chat/__init__.py | 3 - .../generators/dspy/chat/chat_generator.py | 8 +- .../dspy/tests/test_chat_generator.py | 77 ++++++++++++++++--- 8 files changed, 85 insertions(+), 112 deletions(-) delete mode 100644 integrations/dspy/CHANGELOG.md delete mode 100644 integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/__init__.py diff --git a/integrations/dspy/CHANGELOG.md b/integrations/dspy/CHANGELOG.md deleted file mode 100644 index 7350ecaed9..0000000000 --- a/integrations/dspy/CHANGELOG.md +++ /dev/null @@ -1,13 +0,0 @@ -# Changelog - -All notable changes to this project will be documented in this file. - -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - -## [Unreleased] - -### Added - -- Initial implementation of `DSPyChatGenerator` component -- Initial implementation of `DSPyProgramRunner` component diff --git a/integrations/dspy/README.md b/integrations/dspy/README.md index 27fc2675cd..7f0b8051fc 100644 --- a/integrations/dspy/README.md +++ b/integrations/dspy/README.md @@ -3,63 +3,8 @@ [![PyPI - Version](https://img.shields.io/pypi/v/dspy-haystack.svg)](https://pypi.org/project/dspy-haystack) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/dspy-haystack.svg)](https://pypi.org/project/dspy-haystack) -An integration between [DSPy](https://github.com/stanfordnlp/dspy) and [Haystack](https://haystack.deepset.ai/). +--- -DSPy is a framework for algorithmically optimizing prompts for Language Models by applying classical machine learning concepts (training data, evaluation metrics, optimization). +## Contributing -This integration provides: -- **DSPyChatGenerator** — a Haystack ChatGenerator component that uses DSPy signatures and modules for structured generation - -## Installation - -```bash -pip install dspy-haystack -``` - -## Quick Start - -### DSPyChatGenerator - -A Haystack chat generator that uses DSPy signatures for structured generation with built-in reasoning patterns (Chain-of-Thought, Predict, ReAct). - -```python -from haystack import Pipeline -from haystack.dataclasses import ChatMessage -from haystack_integrations.components.generators.dspy import DSPyChatGenerator -import dspy - -# Define a DSPy signature -class QASignature(dspy.Signature): - """Answer questions accurately and concisely.""" - question = dspy.InputField(desc="The user's question") - answer = dspy.OutputField(desc="A clear, concise answer") - -# Create the generator -generator = DSPyChatGenerator( - model="openai/gpt-5-mini", - signature=QASignature, - module_type="ChainOfThought" -) - -# Use in pipeline -pipeline = Pipeline() -pipeline.add_component("llm", generator) - -messages = [ChatMessage.from_user("What is the capital of France?")] -result = pipeline.run({"llm": {"messages": messages}}) -print(result["llm"]["replies"][0].text) -``` - -You can also use string signatures for quick prototyping: - -```python -generator = DSPyChatGenerator( - model="openai/gpt-5-mini", - signature="question -> answer", - module_type="Predict" -) -``` - -## License - -`dspy-haystack` is distributed under the terms of the [Apache-2.0](https://spdx.org/licenses/Apache-2.0.html) license. +Refer to the general [Contribution Guidelines](https://github.com/deepset-ai/haystack-core-integrations/blob/main/CONTRIBUTING.md). diff --git a/integrations/dspy/pydoc/config_docusaurus.yml b/integrations/dspy/pydoc/config_docusaurus.yml index 3bbd9aecb4..13c624885f 100644 --- a/integrations/dspy/pydoc/config_docusaurus.yml +++ b/integrations/dspy/pydoc/config_docusaurus.yml @@ -1,28 +1,13 @@ loaders: -- ignore_when_discovered: - - __init__ - modules: - - haystack_integrations.components.generators.dspy.chat.chat_generator - search_path: - - ../src - type: haystack_pydoc_tools.loaders.CustomPythonLoader + - modules: + - haystack_integrations.components.generators.dspy.chat.chat_generator + search_path: [../src] processors: -- do_not_filter_modules: false - documented_only: true - expression: null - skip_empty_modules: true - type: filter -- type: smart -- type: crossref + - type: filter + documented_only: true + skip_empty_modules: true renderer: description: DSPy integration for Haystack id: integrations-dspy - markdown: - add_member_class_prefix: false - add_method_class_prefix: true - classdef_code_block: false - descriptive_class_title: false - descriptive_module_title: true - filename: dspy.md + filename: dspy.md title: DSPy - type: haystack_pydoc_tools.renderers.DocusaurusRenderer diff --git a/integrations/dspy/pyproject.toml b/integrations/dspy/pyproject.toml index 92e94af522..2cc285c2dc 100644 --- a/integrations/dspy/pyproject.toml +++ b/integrations/dspy/pyproject.toml @@ -45,7 +45,7 @@ installer = "uv" dependencies = ["haystack-pydoc-tools", "ruff"] [tool.hatch.envs.default.scripts] -docs = ["pydoc-markdown pydoc/config_docusaurus.yml"] +docs = ["haystack-pydoc pydoc/config_docusaurus.yml"] fmt = "ruff check --fix {args}; ruff format {args}" fmt-check = "ruff check {args} && ruff format --check {args}" @@ -146,6 +146,6 @@ exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] [tool.pytest.ini_options] markers = ["integration: integration tests"] log_cli = true -addopts = ["--import-mode=importlib"] +addopts = ["--strict-markers"] asyncio_mode = "auto" asyncio_default_fixture_loop_scope = "class" diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/__init__.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/__init__.py index 5418b2abf2..68cdcadc0d 100644 --- a/integrations/dspy/src/haystack_integrations/components/generators/dspy/__init__.py +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/__init__.py @@ -1,3 +1,3 @@ -from haystack_integrations.components.generators.dspy.chat.chat_generator import DSPyChatGenerator +from .chat.chat_generator import DSPyChatGenerator __all__ = ["DSPyChatGenerator"] diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/__init__.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/__init__.py deleted file mode 100644 index 5418b2abf2..0000000000 --- a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from haystack_integrations.components.generators.dspy.chat.chat_generator import DSPyChatGenerator - -__all__ = ["DSPyChatGenerator"] diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py index 1fe8af906a..78b0bb4759 100644 --- a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py @@ -9,7 +9,7 @@ VALID_MODULE_TYPES = {"Predict", "ChainOfThought", "ReAct"} -def configure_dspy_lm(model: str, api_key: str, **kwargs: Any) -> dspy.LM: +def _configure_dspy_lm(model: str, api_key: str, **kwargs: Any) -> dspy.LM: """ Create and configure a DSPy language model. @@ -23,7 +23,7 @@ def configure_dspy_lm(model: str, api_key: str, **kwargs: Any) -> dspy.LM: return lm -def get_dspy_module_class(module_type: str): +def _get_dspy_module_class(module_type: str): """ Map a module type string to the corresponding DSPy module class. @@ -111,13 +111,13 @@ def __init__( self.input_mapping = input_mapping self.streaming_callback = streaming_callback - self._lm = configure_dspy_lm( + self._lm = _configure_dspy_lm( model=self.model, api_key=self.api_key.resolve_value(), **self.generation_kwargs, ) - module_class = get_dspy_module_class(self.module_type) + module_class = _get_dspy_module_class(self.module_type) self._module = module_class(self.signature) def _build_dspy_inputs(self, prompt: str, **kwargs) -> dict[str, Any]: diff --git a/integrations/dspy/tests/test_chat_generator.py b/integrations/dspy/tests/test_chat_generator.py index 61d7c4cf63..18819f0ac5 100644 --- a/integrations/dspy/tests/test_chat_generator.py +++ b/integrations/dspy/tests/test_chat_generator.py @@ -9,8 +9,8 @@ from haystack_integrations.components.generators.dspy.chat.chat_generator import ( VALID_MODULE_TYPES, DSPyChatGenerator, - configure_dspy_lm, - get_dspy_module_class, + _configure_dspy_lm, + _get_dspy_module_class, ) @@ -66,21 +66,21 @@ def test_contains_expected_types(self): class TestGetDspyModuleClass: def test_predict(self): - assert get_dspy_module_class("Predict") is dspy.Predict + assert _get_dspy_module_class("Predict") is dspy.Predict def test_chain_of_thought(self): - assert get_dspy_module_class("ChainOfThought") is dspy.ChainOfThought + assert _get_dspy_module_class("ChainOfThought") is dspy.ChainOfThought def test_react(self): - assert get_dspy_module_class("ReAct") is dspy.ReAct + assert _get_dspy_module_class("ReAct") is dspy.ReAct def test_invalid_type_raises(self): with pytest.raises(ValueError, match="Invalid module_type 'Unknown'"): - get_dspy_module_class("Unknown") + _get_dspy_module_class("Unknown") def test_invalid_type_lists_valid_options(self): with pytest.raises(ValueError, match="ChainOfThought"): - get_dspy_module_class("BadType") + _get_dspy_module_class("BadType") class TestConfigureDspyLm: @@ -90,7 +90,7 @@ def test_creates_lm_and_configures(self, mock_lm_class, mock_configure): mock_lm = MagicMock() mock_lm_class.return_value = mock_lm - result = configure_dspy_lm(model="openai/gpt-5-mini", api_key="test-key") + result = _configure_dspy_lm(model="openai/gpt-5-mini", api_key="test-key") mock_lm_class.assert_called_once_with(model="openai/gpt-5-mini", api_key="test-key") mock_configure.assert_called_once_with(lm=mock_lm) @@ -102,7 +102,7 @@ def test_passes_extra_kwargs(self, mock_lm_class, mock_configure): mock_lm = MagicMock() mock_lm_class.return_value = mock_lm - configure_dspy_lm(model="openai/gpt-5-mini", api_key="test-key", temperature=0.7, max_tokens=100) + _configure_dspy_lm(model="openai/gpt-5-mini", api_key="test-key", temperature=0.7, max_tokens=100) mock_lm_class.assert_called_once_with( model="openai/gpt-5-mini", api_key="test-key", temperature=0.7, max_tokens=100 @@ -350,3 +350,62 @@ def test_live_run(self): assert len(results["replies"]) == 1 message: ChatMessage = results["replies"][0] assert "Paris" in message.text + + @pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", + ) + @pytest.mark.integration + def test_live_run_with_predict_module(self): + """Test using the Predict module type with a string signature.""" + chat_messages = [ChatMessage.from_user("What is 2 + 2?")] + component = DSPyChatGenerator( + signature="question -> answer", + module_type="Predict", + ) + results = component.run(chat_messages) + assert len(results["replies"]) == 1 + assert "4" in results["replies"][0].text + + @pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", + ) + @pytest.mark.integration + def test_live_run_with_signature_class(self): + """Test using a dspy.Signature class instead of a string signature.""" + + class QASignature(dspy.Signature): + """Answer questions accurately and concisely.""" + + question = dspy.InputField(desc="The user's question") + answer = dspy.OutputField(desc="A clear, concise answer") + + chat_messages = [ChatMessage.from_user("What language is spoken in Brazil?")] + component = DSPyChatGenerator( + signature=QASignature, + module_type="ChainOfThought", + ) + results = component.run(chat_messages) + assert len(results["replies"]) == 1 + assert "Portuguese" in results["replies"][0].text + + @pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", + ) + @pytest.mark.integration + def test_live_run_with_multi_field_signature(self): + """Test using a multi-input signature with input_mapping.""" + chat_messages = [ChatMessage.from_user("What is the main topic?")] + component = DSPyChatGenerator( + signature="context, question -> answer", + module_type="Predict", + input_mapping={"context": "context", "question": "question"}, + ) + results = component.run( + chat_messages, + context="Python is a popular programming language created by Guido van Rossum.", + ) + assert len(results["replies"]) == 1 + assert results["replies"][0].text From 3d0c273babb23f7ce7d368c656c9170a806f27e8 Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Wed, 4 Mar 2026 18:46:34 +0100 Subject: [PATCH 07/28] PR Review fixes --- .../generators/dspy/chat/chat_generator.py | 104 +++++++--- .../dspy/tests/test_chat_generator.py | 190 ++++++++++++++---- .../dspy/tests/test_chat_generator_async.py | 15 +- 3 files changed, 231 insertions(+), 78 deletions(-) diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py index 78b0bb4759..df2408a720 100644 --- a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py @@ -1,24 +1,28 @@ -from collections.abc import Callable +import importlib from typing import Any import dspy from haystack import component, default_from_dict, default_to_dict from haystack.dataclasses import ChatMessage, ChatRole -from haystack.utils import Secret, deserialize_secrets_inplace +from haystack.dataclasses.streaming_chunk import StreamingCallbackT +from haystack.utils import deserialize_callable, serialize_callable VALID_MODULE_TYPES = {"Predict", "ChainOfThought", "ReAct"} -def _configure_dspy_lm(model: str, api_key: str, **kwargs: Any) -> dspy.LM: +def _configure_dspy_lm(model: str, api_base: str | None = None, **kwargs: Any) -> dspy.LM: """ Create and configure a DSPy language model. :param model: Model identifier (e.g. ``"openai/gpt-5-mini"``). - :param api_key: Resolved API key string. + :param api_base: Optional base URL for the API (useful for local models). :param kwargs: Additional keyword arguments passed to ``dspy.LM``. :returns: The configured ``dspy.LM`` instance. """ - lm = dspy.LM(model=model, api_key=api_key, **kwargs) + lm_kwargs: dict[str, Any] = {"model": model, **kwargs} + if api_base is not None: + lm_kwargs["api_base"] = api_base + lm = dspy.LM(**lm_kwargs) dspy.configure(lm=lm) return lm @@ -42,6 +46,24 @@ def _get_dspy_module_class(module_type: str): return mapping[module_type] +def _resolve_signature(signature: str | type[dspy.Signature]) -> str | type[dspy.Signature]: + """ + Resolve a signature that may be a fully qualified class path string. + + If the string contains a dot, it is treated as a fully qualified class path + (e.g. ``"mymodule.QASignature"``) and imported. Otherwise, it is returned as-is + (a DSPy shorthand like ``"question -> answer"``). + + :param signature: A string or dspy.Signature subclass. + :returns: The resolved signature. + """ + if isinstance(signature, str) and "." in signature and "->" not in signature: + module_path, class_name = signature.rsplit(".", 1) + module = importlib.import_module(module_path) + return getattr(module, class_name) + return signature + + @component class DSPyChatGenerator: """ @@ -51,6 +73,9 @@ class DSPyChatGenerator: Accepts and returns ``ChatMessage`` objects, making it compatible with Haystack chat pipelines. + The API key is read automatically from environment variables by DSPy/litellm + (e.g. ``OPENAI_API_KEY``). Use ``api_base`` for local or self-hosted models. + Usage example: ```python @@ -78,12 +103,13 @@ def __init__( self, signature: str | type[dspy.Signature], model: str = "openai/gpt-5-mini", - api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"), + api_base: str | None = None, module_type: str = "ChainOfThought", output_field: str = "answer", generation_kwargs: dict[str, Any] | None = None, + module_kwargs: dict[str, Any] | None = None, input_mapping: dict[str, str] | None = None, - streaming_callback: Callable | None = None, + streaming_callback: StreamingCallbackT | None = None, ): """ Initialize the DSPyChatGenerator. @@ -91,11 +117,17 @@ def __init__( :param signature: DSPy signature defining I/O structure. Can be a string like ``"question -> answer"`` or a ``dspy.Signature`` subclass. :param model: Model identifier (e.g. ``"openai/gpt-5-mini"``). - :param api_key: API key for the LLM provider. + :param api_base: Optional base URL for the API (useful for local models). :param module_type: DSPy module type: ``"Predict"``, ``"ChainOfThought"``, or ``"ReAct"``. :param output_field: Which signature output field to use as the reply. :param generation_kwargs: Additional generation parameters (temperature, max_tokens, etc.). - :param input_mapping: Maps DSPy signature input field names to run kwarg names. + :param module_kwargs: Additional keyword arguments passed to the DSPy module constructor. + For example, use ``{"tools": [tool1, tool2]}`` when using the ``"ReAct"`` module type. + :param input_mapping: Maps DSPy signature input field names to ``run()`` kwarg names. + For example, if your signature has an input field ``"context"`` but your pipeline + provides it as ``"documents"``, use ``{"context": "documents"}``. When not provided, + the first input field receives the last user message text, and remaining fields + are matched by name from ``**kwargs``. :param streaming_callback: Callback for streaming responses. """ if module_type not in VALID_MODULE_TYPES: @@ -104,21 +136,22 @@ def __init__( self.signature = signature self.model = model - self.api_key = api_key + self.api_base = api_base self.module_type = module_type self.output_field = output_field self.generation_kwargs = generation_kwargs or {} + self.module_kwargs = module_kwargs or {} self.input_mapping = input_mapping self.streaming_callback = streaming_callback self._lm = _configure_dspy_lm( model=self.model, - api_key=self.api_key.resolve_value(), + api_base=self.api_base, **self.generation_kwargs, ) module_class = _get_dspy_module_class(self.module_type) - self._module = module_class(self.signature) + self._module = module_class(self.signature, **self.module_kwargs) def _build_dspy_inputs(self, prompt: str, **kwargs) -> dict[str, Any]: """Build the input dict for the DSPy module call.""" @@ -156,34 +189,49 @@ def _extract_last_user_message(messages: list[ChatMessage]) -> str: return messages[-1].text def _signature_to_string(self) -> str: - """Convert the signature to a string representation for serialization.""" + """ + Convert the signature to a serializable string representation. + + For string signatures, returns the string as-is. + For ``dspy.Signature`` subclasses, returns the fully qualified class path + (e.g. ``"mymodule.QASignature"``), which can be imported back during + deserialization. + """ if isinstance(self.signature, str): return self.signature - input_names = list(self.signature.input_fields.keys()) - output_names = list(self.signature.output_fields.keys()) - return ", ".join(input_names) + " -> " + ", ".join(output_names) + return f"{self.signature.__module__}.{self.signature.__qualname__}" def to_dict(self) -> dict[str, Any]: """Serialize this component to a dictionary.""" + callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None kwargs: dict[str, Any] = { "signature": self._signature_to_string(), "model": self.model, + "api_base": self.api_base, "module_type": self.module_type, "output_field": self.output_field, "generation_kwargs": self.generation_kwargs, + "module_kwargs": self.module_kwargs, "input_mapping": self.input_mapping, + "streaming_callback": callback_name, } - try: - kwargs["api_key"] = self.api_key.to_dict() - except ValueError: - pass return default_to_dict(self, **kwargs) @classmethod def from_dict(cls, data: dict[str, Any]) -> "DSPyChatGenerator": """Deserialize a component from a dictionary.""" init_params = data.get("init_parameters", {}) - deserialize_secrets_inplace(init_params, ["api_key"]) + + # Resolve signature class path if needed + signature = init_params.get("signature") + if signature: + init_params["signature"] = _resolve_signature(signature) + + # Deserialize streaming callback + serialized_callback = init_params.get("streaming_callback") + if serialized_callback: + init_params["streaming_callback"] = deserialize_callable(serialized_callback) + return default_from_dict(cls, data) @component.output_types(replies=list[ChatMessage]) @@ -199,7 +247,7 @@ def run( :param messages: List of chat messages. The last user message is used as input. :param generation_kwargs: Optional runtime generation parameters. :param kwargs: Additional keyword arguments mapped to signature input fields. - :returns: A dictionary with ``replies`` (list of ChatMessage) and ``meta`` (list of dicts). + :returns: A dictionary with ``replies`` (list of ChatMessage). """ if not messages: msg = "The 'messages' parameter cannot be empty." @@ -208,10 +256,7 @@ def run( prompt = self._extract_last_user_message(messages) dspy_inputs = self._build_dspy_inputs(prompt, **kwargs) - if generation_kwargs: - prediction = self._module(**dspy_inputs, config=generation_kwargs) - else: - prediction = self._module(**dspy_inputs) + prediction = self._module(**dspy_inputs, config=generation_kwargs or {}) output_text = getattr(prediction, self.output_field, str(prediction)) @@ -233,7 +278,7 @@ async def run_async( :param messages: List of chat messages. The last user message is used as input. :param generation_kwargs: Optional runtime generation parameters. :param kwargs: Additional keyword arguments mapped to signature input fields. - :returns: A dictionary with ``replies`` (list of ChatMessage) and ``meta`` (list of dicts). + :returns: A dictionary with ``replies`` (list of ChatMessage). """ if not messages: msg = "The 'messages' parameter cannot be empty." @@ -242,10 +287,7 @@ async def run_async( prompt = self._extract_last_user_message(messages) dspy_inputs = self._build_dspy_inputs(prompt, **kwargs) - if generation_kwargs: - prediction = await self._module.acall(**dspy_inputs, config=generation_kwargs) - else: - prediction = await self._module.acall(**dspy_inputs) + prediction = await self._module.acall(**dspy_inputs, config=generation_kwargs or {}) output_text = getattr(prediction, self.output_field, str(prediction)) diff --git a/integrations/dspy/tests/test_chat_generator.py b/integrations/dspy/tests/test_chat_generator.py index 18819f0ac5..e200f778fe 100644 --- a/integrations/dspy/tests/test_chat_generator.py +++ b/integrations/dspy/tests/test_chat_generator.py @@ -4,16 +4,20 @@ import dspy import pytest from haystack.dataclasses import ChatMessage -from haystack.utils.auth import Secret from haystack_integrations.components.generators.dspy.chat.chat_generator import ( VALID_MODULE_TYPES, DSPyChatGenerator, _configure_dspy_lm, _get_dspy_module_class, + _resolve_signature, ) +def _sample_streaming_callback(chunk): + """Module-level callback for serialization tests.""" + + @pytest.fixture def mock_dspy_module(): """ @@ -90,9 +94,9 @@ def test_creates_lm_and_configures(self, mock_lm_class, mock_configure): mock_lm = MagicMock() mock_lm_class.return_value = mock_lm - result = _configure_dspy_lm(model="openai/gpt-5-mini", api_key="test-key") + result = _configure_dspy_lm(model="openai/gpt-5-mini") - mock_lm_class.assert_called_once_with(model="openai/gpt-5-mini", api_key="test-key") + mock_lm_class.assert_called_once_with(model="openai/gpt-5-mini") mock_configure.assert_called_once_with(lm=mock_lm) assert result is mock_lm @@ -102,16 +106,54 @@ def test_passes_extra_kwargs(self, mock_lm_class, mock_configure): mock_lm = MagicMock() mock_lm_class.return_value = mock_lm - _configure_dspy_lm(model="openai/gpt-5-mini", api_key="test-key", temperature=0.7, max_tokens=100) + _configure_dspy_lm(model="openai/gpt-5-mini", temperature=0.7, max_tokens=100) - mock_lm_class.assert_called_once_with( - model="openai/gpt-5-mini", api_key="test-key", temperature=0.7, max_tokens=100 - ) + mock_lm_class.assert_called_once_with(model="openai/gpt-5-mini", temperature=0.7, max_tokens=100) + + @patch("dspy.configure") + @patch("dspy.LM") + def test_passes_api_base(self, mock_lm_class, mock_configure): + mock_lm = MagicMock() + mock_lm_class.return_value = mock_lm + + _configure_dspy_lm(model="openai/local-model", api_base="http://localhost:8000") + + mock_lm_class.assert_called_once_with(model="openai/local-model", api_base="http://localhost:8000") + + @patch("dspy.configure") + @patch("dspy.LM") + def test_omits_api_base_when_none(self, mock_lm_class, mock_configure): + mock_lm = MagicMock() + mock_lm_class.return_value = mock_lm + + _configure_dspy_lm(model="openai/gpt-5-mini") + + mock_lm_class.assert_called_once_with(model="openai/gpt-5-mini") + + +class TestResolveSignature: + def test_string_signature_passthrough(self): + sig = "question -> answer" + assert _resolve_signature(sig) == sig + + def test_signature_class_passthrough(self): + class MySig(dspy.Signature): + question: str = dspy.InputField() + answer: str = dspy.OutputField() + + assert _resolve_signature(MySig) is MySig + + def test_resolves_dotted_class_path(self): + result = _resolve_signature("dspy.Signature") + assert result is dspy.Signature + + def test_invalid_dotted_path_raises(self): + with pytest.raises((ImportError, ModuleNotFoundError)): + _resolve_signature("nonexistent.module.ClassName") class TestDSPyChatGenerator: - def test_init_default(self, monkeypatch, mock_dspy_module): - monkeypatch.setenv("OPENAI_API_KEY", "test-api-key") + def test_init_default(self, mock_dspy_module): component = DSPyChatGenerator(signature="question -> answer") assert component.model == "openai/gpt-5-mini" assert component.signature == "question -> answer" @@ -120,20 +162,18 @@ def test_init_default(self, monkeypatch, mock_dspy_module): assert component.streaming_callback is None assert not component.generation_kwargs assert component.input_mapping is None - - def test_init_fail_wo_api_key(self, monkeypatch): - monkeypatch.delenv("OPENAI_API_KEY", raising=False) - with pytest.raises(ValueError, match=r"None of the .* environment variables are set"): - DSPyChatGenerator(signature="question -> answer") + assert component.api_base is None + assert not component.module_kwargs def test_init_with_parameters(self, mock_dspy_module): component = DSPyChatGenerator( signature="context, question -> answer", model="openai/gpt-4o", - api_key=Secret.from_token("test-api-key"), + api_base="http://localhost:8000", module_type="Predict", output_field="response", generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"}, + module_kwargs={"some_param": "value"}, input_mapping={"context": "context", "question": "question"}, ) assert component.model == "openai/gpt-4o" @@ -141,49 +181,66 @@ def test_init_with_parameters(self, mock_dspy_module): assert component.module_type == "Predict" assert component.output_field == "response" assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"} + assert component.module_kwargs == {"some_param": "value"} assert component.input_mapping == {"context": "context", "question": "question"} + assert component.api_base == "http://localhost:8000" def test_init_with_invalid_module_type(self, mock_dspy_module): with pytest.raises(ValueError, match="Invalid module_type"): DSPyChatGenerator( signature="question -> answer", - api_key=Secret.from_token("test-api-key"), module_type="InvalidModule", ) def test_init_with_signature_class(self, mock_dspy_module, sample_qa_signature): component = DSPyChatGenerator( signature=sample_qa_signature, - api_key=Secret.from_token("test-api-key"), ) assert component.signature is sample_qa_signature - def test_to_dict_default(self, monkeypatch, mock_dspy_module): - monkeypatch.setenv("OPENAI_API_KEY", "test-api-key") + def test_init_with_module_kwargs(self, mock_dspy_module): + """Test that module_kwargs are passed to the DSPy module constructor.""" + tools = [MagicMock(), MagicMock()] + component = DSPyChatGenerator( + signature="question -> answer", + module_type="ReAct", + module_kwargs={"tools": tools}, + ) + assert component.module_kwargs == {"tools": tools} + + def test_init_with_api_base(self, mock_dspy_module): + """Test initialization with api_base for local models.""" + component = DSPyChatGenerator( + signature="question -> answer", + api_base="http://localhost:11434/v1", + ) + assert component.api_base == "http://localhost:11434/v1" + + def test_to_dict_default(self, mock_dspy_module): component = DSPyChatGenerator( signature="question -> answer", - api_key=Secret.from_env_var("OPENAI_API_KEY"), ) data = component.to_dict() assert data == { "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPyChatGenerator", "init_parameters": { - "api_key": {"env_vars": ["OPENAI_API_KEY"], "strict": True, "type": "env_var"}, "signature": "question -> answer", "model": "openai/gpt-5-mini", + "api_base": None, "module_type": "ChainOfThought", "output_field": "answer", "generation_kwargs": {}, + "module_kwargs": {}, "input_mapping": None, + "streaming_callback": None, }, } - def test_to_dict_with_parameters(self, monkeypatch, mock_dspy_module): - monkeypatch.setenv("OPENAI_API_KEY", "test-api-key") + def test_to_dict_with_parameters(self, mock_dspy_module): component = DSPyChatGenerator( signature="context, question -> answer", model="openai/gpt-4o", - api_key=Secret.from_env_var("OPENAI_API_KEY"), + api_base="http://localhost:8000", module_type="Predict", output_field="response", generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"}, @@ -193,28 +250,52 @@ def test_to_dict_with_parameters(self, monkeypatch, mock_dspy_module): assert data == { "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPyChatGenerator", "init_parameters": { - "api_key": {"env_vars": ["OPENAI_API_KEY"], "strict": True, "type": "env_var"}, "signature": "context, question -> answer", "model": "openai/gpt-4o", + "api_base": "http://localhost:8000", "module_type": "Predict", "output_field": "response", "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"}, + "module_kwargs": {}, "input_mapping": {"context": "context", "question": "question"}, + "streaming_callback": None, }, } - def test_from_dict(self, monkeypatch, mock_dspy_module): - monkeypatch.setenv("OPENAI_API_KEY", "fake-api-key") + def test_to_dict_with_signature_class(self, mock_dspy_module, sample_qa_signature): + """Test that signature classes are serialized as fully qualified class paths.""" + component = DSPyChatGenerator( + signature=sample_qa_signature, + ) + data = component.to_dict() + sig_value = data["init_parameters"]["signature"] + # Should be a dotted path like "test_chat_generator.TestDSPyChatGenerator...QASignature" + assert "QASignature" in sig_value + assert "." in sig_value + + def test_to_dict_with_streaming_callback(self, mock_dspy_module): + """Test that streaming_callback is serialized.""" + component = DSPyChatGenerator( + signature="question -> answer", + streaming_callback=_sample_streaming_callback, + ) + data = component.to_dict() + assert data["init_parameters"]["streaming_callback"] is not None + assert "streaming_callback" in data["init_parameters"]["streaming_callback"] + + def test_from_dict(self, mock_dspy_module): data = { "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPyChatGenerator", "init_parameters": { - "api_key": {"env_vars": ["OPENAI_API_KEY"], "strict": True, "type": "env_var"}, "signature": "question -> answer", "model": "openai/gpt-4o", + "api_base": None, "module_type": "Predict", "output_field": "response", "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"}, + "module_kwargs": {}, "input_mapping": None, + "streaming_callback": None, }, } component = DSPyChatGenerator.from_dict(data) @@ -223,30 +304,49 @@ def test_from_dict(self, monkeypatch, mock_dspy_module): assert component.module_type == "Predict" assert component.output_field == "response" assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"} - assert component.api_key == Secret.from_env_var("OPENAI_API_KEY") assert component.input_mapping is None + assert component.api_base is None - def test_from_dict_fail_wo_env_var(self, monkeypatch): - monkeypatch.delenv("OPENAI_API_KEY", raising=False) + def test_from_dict_with_api_base(self, mock_dspy_module): + """Test deserialization with api_base.""" data = { "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPyChatGenerator", "init_parameters": { - "api_key": {"env_vars": ["OPENAI_API_KEY"], "strict": True, "type": "env_var"}, "signature": "question -> answer", - "model": "openai/gpt-4o", + "model": "openai/local-model", + "api_base": "http://localhost:8000", "module_type": "Predict", - "output_field": "response", + "output_field": "answer", + "generation_kwargs": {}, + "module_kwargs": {}, + "input_mapping": None, + "streaming_callback": None, + }, + } + component = DSPyChatGenerator.from_dict(data) + assert component.api_base == "http://localhost:8000" + + def test_from_dict_resolves_signature_class_path(self, mock_dspy_module): + """Test that from_dict resolves a dotted signature class path.""" + data = { + "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPyChatGenerator", + "init_parameters": { + "signature": "dspy.Signature", + "model": "openai/gpt-5-mini", + "module_type": "Predict", + "output_field": "answer", "generation_kwargs": {}, + "module_kwargs": {}, "input_mapping": None, + "streaming_callback": None, }, } - with pytest.raises(ValueError, match=r"None of the .* environment variables are set"): - DSPyChatGenerator.from_dict(data) + component = DSPyChatGenerator.from_dict(data) + assert component.signature is dspy.Signature def test_run(self, chat_messages, mock_dspy_module): component = DSPyChatGenerator( signature="question -> answer", - api_key=Secret.from_token("test-api-key"), ) response = component.run(chat_messages) @@ -258,10 +358,19 @@ def test_run(self, chat_messages, mock_dspy_module): assert len(response["replies"]) == 1 assert all(isinstance(reply, ChatMessage) for reply in response["replies"]) + def test_run_always_passes_config(self, chat_messages, mock_dspy_module): + """Test that config is always passed (even as empty dict) - simplified call.""" + component = DSPyChatGenerator( + signature="question -> answer", + ) + component.run(chat_messages) + + _, kwargs = mock_dspy_module.call_args + assert kwargs["config"] == {} + def test_run_with_generation_kwargs(self, chat_messages, mock_dspy_module): component = DSPyChatGenerator( signature="question -> answer", - api_key=Secret.from_token("test-api-key"), generation_kwargs={"max_tokens": 10, "temperature": 0.5}, ) response = component.run(chat_messages, generation_kwargs={"temperature": 0.9}) @@ -277,7 +386,6 @@ def test_run_with_generation_kwargs(self, chat_messages, mock_dspy_module): def test_run_with_multiple_messages(self, mock_dspy_module): component = DSPyChatGenerator( signature="question -> answer", - api_key=Secret.from_token("test-api-key"), ) messages = [ ChatMessage.from_user("Hello"), @@ -296,7 +404,6 @@ def test_run_with_multiple_messages(self, mock_dspy_module): def test_run_with_empty_messages(self, mock_dspy_module): component = DSPyChatGenerator( signature="question -> answer", - api_key=Secret.from_token("test-api-key"), ) with pytest.raises(ValueError, match="messages"): component.run(messages=[]) @@ -305,7 +412,6 @@ def test_run_with_custom_output_field(self, mock_dspy_module): mock_dspy_module.return_value = MagicMock(summary="This is a summary") component = DSPyChatGenerator( signature="text -> summary", - api_key=Secret.from_token("test-api-key"), output_field="summary", ) messages = [ChatMessage.from_user("Summarize this text")] @@ -316,7 +422,6 @@ def test_run_with_custom_output_field(self, mock_dspy_module): def test_run_with_input_mapping(self, mock_dspy_module): component = DSPyChatGenerator( signature="context, question -> answer", - api_key=Secret.from_token("test-api-key"), input_mapping={"context": "context", "question": "question"}, ) messages = [ChatMessage.from_user("What is ML?")] @@ -331,7 +436,6 @@ def test_run_with_wrong_model(self, mock_dspy_module): generator = DSPyChatGenerator( signature="question -> answer", - api_key=Secret.from_token("test-api-key"), model="something-obviously-wrong", ) diff --git a/integrations/dspy/tests/test_chat_generator_async.py b/integrations/dspy/tests/test_chat_generator_async.py index 239059a63b..55dac80c80 100644 --- a/integrations/dspy/tests/test_chat_generator_async.py +++ b/integrations/dspy/tests/test_chat_generator_async.py @@ -2,7 +2,6 @@ import pytest from haystack.dataclasses import ChatMessage -from haystack.utils.auth import Secret from haystack_integrations.components.generators.dspy.chat.chat_generator import DSPyChatGenerator @@ -50,7 +49,6 @@ class TestDSPyChatGeneratorAsync: async def test_run_async(self, chat_messages, mock_dspy_module): component = DSPyChatGenerator( signature="question -> answer", - api_key=Secret.from_token("test-api-key"), ) response = await component.run_async(messages=chat_messages) @@ -62,11 +60,21 @@ async def test_run_async(self, chat_messages, mock_dspy_module): assert len(response["replies"]) == 1 assert all(isinstance(reply, ChatMessage) for reply in response["replies"]) + @pytest.mark.asyncio + async def test_run_async_always_passes_config(self, chat_messages, mock_dspy_module): + """Test that config is always passed (even as empty dict) in async mode.""" + component = DSPyChatGenerator( + signature="question -> answer", + ) + await component.run_async(messages=chat_messages) + + _, kwargs = mock_dspy_module.acall.call_args + assert kwargs["config"] == {} + @pytest.mark.asyncio async def test_run_async_with_params(self, chat_messages, mock_dspy_module): component = DSPyChatGenerator( signature="question -> answer", - api_key=Secret.from_token("test-api-key"), ) response = await component.run_async( messages=chat_messages, @@ -85,7 +93,6 @@ async def test_run_async_with_params(self, chat_messages, mock_dspy_module): async def test_run_async_with_empty_messages(self, mock_dspy_module): component = DSPyChatGenerator( signature="question -> answer", - api_key=Secret.from_token("test-api-key"), ) with pytest.raises(ValueError, match="messages"): await component.run_async(messages=[]) From 98b74c44c6e291de138b9e5521346a4fd9b6dd49 Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Wed, 4 Mar 2026 18:46:54 +0100 Subject: [PATCH 08/28] ReAct and Serialization/Deserializaion examples --- .../dspy/examples/react_agent_example.py | 76 ++++++++++++++++ .../signature_serialization_example.py | 86 +++++++++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 integrations/dspy/examples/react_agent_example.py create mode 100644 integrations/dspy/examples/signature_serialization_example.py diff --git a/integrations/dspy/examples/react_agent_example.py b/integrations/dspy/examples/react_agent_example.py new file mode 100644 index 0000000000..a8b9f91791 --- /dev/null +++ b/integrations/dspy/examples/react_agent_example.py @@ -0,0 +1,76 @@ +import dspy +from haystack import Pipeline +from haystack.dataclasses import ChatMessage + +from haystack_integrations.components.generators.dspy import DSPyChatGenerator + + +def get_weather(city: str) -> str: + """Return the current weather for a city (stub).""" + weather_data = { + "paris": "15°C, partly cloudy", + "tokyo": "22°C, sunny", + "new york": "8°C, rainy", + } + return weather_data.get(city.lower(), f"No weather data available for {city}") + + +def get_population(city: str) -> str: + """Return the population of a city (stub).""" + population_data = { + "paris": "2.1 million (city proper), 12.4 million (metro)", + "tokyo": "13.9 million (city proper), 37.4 million (metro)", + "new york": "8.3 million (city proper), 19.8 million (metro)", + } + return population_data.get(city.lower(), f"No population data available for {city}") + + +class CityInfoSignature(dspy.Signature): + """Answer questions about cities using available tools.""" + + question = dspy.InputField(desc="A question about a city") + answer = dspy.OutputField(desc="A detailed answer based on tool results") + + +def react_agent_example(): + """Use ReAct to answer a question that requires tool calls.""" + + generator = DSPyChatGenerator( + model="openai/gpt-5-mini", + signature=CityInfoSignature, + module_type="ReAct", + output_field="answer", + module_kwargs={"tools": [get_weather, get_population]}, + ) + + pipeline = Pipeline() + pipeline.add_component("agent", generator) + + messages = [ChatMessage.from_user("What is the weather and population of Tokyo?")] + result = pipeline.run({"agent": {"messages": messages}}) + + print(f"Question: {messages[0].text}") + print(f"Answer : {result['agent']['replies'][0].text}\n") + + +def react_string_signature_example(): + """ReAct with a string signature and tools.""" + + generator = DSPyChatGenerator( + model="openai/gpt-5-mini", + signature="question -> answer", + module_type="ReAct", + output_field="answer", + module_kwargs={"tools": [get_weather]}, + ) + + messages = [ChatMessage.from_user("What's the weather like in Paris?")] + result = generator.run(messages=messages) + + print(f"Question: {messages[0].text}") + print(f"Answer : {result['replies'][0].text}\n") + + +if __name__ == "__main__": + react_agent_example() + react_string_signature_example() diff --git a/integrations/dspy/examples/signature_serialization_example.py b/integrations/dspy/examples/signature_serialization_example.py new file mode 100644 index 0000000000..480e48bc77 --- /dev/null +++ b/integrations/dspy/examples/signature_serialization_example.py @@ -0,0 +1,86 @@ +from typing import Literal + +import dspy +import pydantic +from haystack.dataclasses import ChatMessage + +from haystack_integrations.components.generators.dspy import DSPyChatGenerator + +class Source(pydantic.BaseModel): + """A single cited source.""" + title: str + url: str + relevance: float + + +class StructuredAnswer(pydantic.BaseModel): + """A rich answer with metadata.""" + summary: str + confidence: Literal["low", "medium", "high"] + sources: list[Source] + key_facts: list[str] + + +class ResearchSignature(dspy.Signature): + """Research a topic and return a structured, cited answer.""" + question: str = dspy.InputField(desc="The research question") + context: str = dspy.InputField(desc="Background material or documents to ground the answer") + answer: StructuredAnswer = dspy.OutputField(desc="A structured answer with sources and confidence") + follow_up_questions: list[str] = dspy.OutputField(desc="Suggested follow-up questions for deeper research") + + +def print_signature_fields(sig): + """Pretty-print input/output fields and their types.""" + print(" Input fields:") + for name, field in sig.input_fields.items(): + annotation = field.annotation if hasattr(field, "annotation") else "str" + print(f" {name}: {annotation}") + print(" Output fields:") + for name, field in sig.output_fields.items(): + annotation = field.annotation if hasattr(field, "annotation") else "str" + print(f" {name}: {annotation}") + + +def main(): + generator = DSPyChatGenerator( + model="openai/gpt-5-mini", + signature=ResearchSignature, + module_type="ChainOfThought", + output_field="answer", + input_mapping={"question": "question", "context": "context"}, + ) + + print("=== Original generator ===") + print(f" signature class: {generator.signature.__name__}") + print_signature_fields(generator.signature) + + data = generator.to_dict() + sig_value = data["init_parameters"]["signature"] + print(f"\n=== Serialized ===") + print(f" signature value: {sig_value}") + print(f" all init params: {list(data['init_parameters'].keys())}") + + restored = DSPyChatGenerator.from_dict(data) + print(f"\n=== Restored generator ===") + print(f" signature class: {restored.signature.__name__}") + print(f" same class? : {restored.signature is ResearchSignature}") + print_signature_fields(restored.signature) + + messages = [ChatMessage.from_user("What are the main causes of coral reef bleaching?")] + result = restored.run( + messages=messages, + context=( + "Coral bleaching occurs when corals expel their symbiotic algae (zooxanthellae) " + "due to stress. Major stressors include elevated sea surface temperatures, ocean " + "acidification from increased CO2 absorption, pollution runoff, and overexposure " + "to sunlight. The Great Barrier Reef experienced mass bleaching events in 2016, " + "2017, 2020, and 2022, primarily driven by marine heatwaves." + ), + ) + print(f"\n=== Inference ===") + print(f" Question: {messages[0].text}") + print(f" Answer : {result['replies'][0].text}") + + +if __name__ == "__main__": + main() From 631882386fd370c30f0b0f49af0d7b25268b2dfb Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Wed, 4 Mar 2026 18:50:35 +0100 Subject: [PATCH 09/28] Renamed DSPyChatGenerator into DSPySignatureChatGenerator --- .../dspy/examples/chat_generator_example.py | 6 +- .../dspy/examples/react_agent_example.py | 6 +- .../signature_serialization_example.py | 6 +- .../components/generators/dspy/__init__.py | 4 +- .../generators/dspy/chat/chat_generator.py | 10 +-- .../dspy/tests/test_chat_generator.py | 66 +++++++++---------- .../dspy/tests/test_chat_generator_async.py | 12 ++-- 7 files changed, 55 insertions(+), 55 deletions(-) diff --git a/integrations/dspy/examples/chat_generator_example.py b/integrations/dspy/examples/chat_generator_example.py index 922d5430e2..66af376a21 100644 --- a/integrations/dspy/examples/chat_generator_example.py +++ b/integrations/dspy/examples/chat_generator_example.py @@ -2,7 +2,7 @@ from haystack import Pipeline from haystack.dataclasses import ChatMessage -from haystack_integrations.components.generators.dspy import DSPyChatGenerator +from haystack_integrations.components.generators.dspy import DSPySignatureChatGenerator class QASignature(dspy.Signature): @@ -15,7 +15,7 @@ class QASignature(dspy.Signature): def basic_qa_example(): """Simple question-answering with Chain-of-Thought reasoning.""" - generator = DSPyChatGenerator( + generator = DSPySignatureChatGenerator( model="openai/gpt-5-mini", signature=QASignature, module_type="ChainOfThought", @@ -34,7 +34,7 @@ def basic_qa_example(): def string_signature_example(): """Using a simple string signature instead of a class.""" - generator = DSPyChatGenerator( + generator = DSPySignatureChatGenerator( model="openai/gpt-5-mini", signature="question -> answer", module_type="Predict", diff --git a/integrations/dspy/examples/react_agent_example.py b/integrations/dspy/examples/react_agent_example.py index a8b9f91791..8cd98b989b 100644 --- a/integrations/dspy/examples/react_agent_example.py +++ b/integrations/dspy/examples/react_agent_example.py @@ -2,7 +2,7 @@ from haystack import Pipeline from haystack.dataclasses import ChatMessage -from haystack_integrations.components.generators.dspy import DSPyChatGenerator +from haystack_integrations.components.generators.dspy import DSPySignatureChatGenerator def get_weather(city: str) -> str: @@ -35,7 +35,7 @@ class CityInfoSignature(dspy.Signature): def react_agent_example(): """Use ReAct to answer a question that requires tool calls.""" - generator = DSPyChatGenerator( + generator = DSPySignatureChatGenerator( model="openai/gpt-5-mini", signature=CityInfoSignature, module_type="ReAct", @@ -56,7 +56,7 @@ def react_agent_example(): def react_string_signature_example(): """ReAct with a string signature and tools.""" - generator = DSPyChatGenerator( + generator = DSPySignatureChatGenerator( model="openai/gpt-5-mini", signature="question -> answer", module_type="ReAct", diff --git a/integrations/dspy/examples/signature_serialization_example.py b/integrations/dspy/examples/signature_serialization_example.py index 480e48bc77..cbac28e1be 100644 --- a/integrations/dspy/examples/signature_serialization_example.py +++ b/integrations/dspy/examples/signature_serialization_example.py @@ -4,7 +4,7 @@ import pydantic from haystack.dataclasses import ChatMessage -from haystack_integrations.components.generators.dspy import DSPyChatGenerator +from haystack_integrations.components.generators.dspy import DSPySignatureChatGenerator class Source(pydantic.BaseModel): """A single cited source.""" @@ -42,7 +42,7 @@ def print_signature_fields(sig): def main(): - generator = DSPyChatGenerator( + generator = DSPySignatureChatGenerator( model="openai/gpt-5-mini", signature=ResearchSignature, module_type="ChainOfThought", @@ -60,7 +60,7 @@ def main(): print(f" signature value: {sig_value}") print(f" all init params: {list(data['init_parameters'].keys())}") - restored = DSPyChatGenerator.from_dict(data) + restored = DSPySignatureChatGenerator.from_dict(data) print(f"\n=== Restored generator ===") print(f" signature class: {restored.signature.__name__}") print(f" same class? : {restored.signature is ResearchSignature}") diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/__init__.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/__init__.py index 68cdcadc0d..96d6a9dc81 100644 --- a/integrations/dspy/src/haystack_integrations/components/generators/dspy/__init__.py +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/__init__.py @@ -1,3 +1,3 @@ -from .chat.chat_generator import DSPyChatGenerator +from .chat.chat_generator import DSPySignatureChatGenerator -__all__ = ["DSPyChatGenerator"] +__all__ = ["DSPySignatureChatGenerator"] diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py index df2408a720..17d46bba39 100644 --- a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py @@ -65,7 +65,7 @@ def _resolve_signature(signature: str | type[dspy.Signature]) -> str | type[dspy @component -class DSPyChatGenerator: +class DSPySignatureChatGenerator: """ A Haystack chat generator component that uses DSPy signatures and modules for structured generation. @@ -80,14 +80,14 @@ class DSPyChatGenerator: ```python from haystack.dataclasses import ChatMessage - from haystack_integrations.components.generators.dspy import DSPyChatGenerator + from haystack_integrations.components.generators.dspy import DSPySignatureChatGenerator import dspy class QASignature(dspy.Signature): question = dspy.InputField(desc="The user's question") answer = dspy.OutputField(desc="A clear, concise answer") - generator = DSPyChatGenerator( + generator = DSPySignatureChatGenerator( model="openai/gpt-5-mini", signature=QASignature, module_type="ChainOfThought", @@ -112,7 +112,7 @@ def __init__( streaming_callback: StreamingCallbackT | None = None, ): """ - Initialize the DSPyChatGenerator. + Initialize the DSPySignatureChatGenerator. :param signature: DSPy signature defining I/O structure. Can be a string like ``"question -> answer"`` or a ``dspy.Signature`` subclass. @@ -218,7 +218,7 @@ def to_dict(self) -> dict[str, Any]: return default_to_dict(self, **kwargs) @classmethod - def from_dict(cls, data: dict[str, Any]) -> "DSPyChatGenerator": + def from_dict(cls, data: dict[str, Any]) -> "DSPySignatureChatGenerator": """Deserialize a component from a dictionary.""" init_params = data.get("init_parameters", {}) diff --git a/integrations/dspy/tests/test_chat_generator.py b/integrations/dspy/tests/test_chat_generator.py index e200f778fe..04ecdc3a35 100644 --- a/integrations/dspy/tests/test_chat_generator.py +++ b/integrations/dspy/tests/test_chat_generator.py @@ -7,7 +7,7 @@ from haystack_integrations.components.generators.dspy.chat.chat_generator import ( VALID_MODULE_TYPES, - DSPyChatGenerator, + DSPySignatureChatGenerator, _configure_dspy_lm, _get_dspy_module_class, _resolve_signature, @@ -152,9 +152,9 @@ def test_invalid_dotted_path_raises(self): _resolve_signature("nonexistent.module.ClassName") -class TestDSPyChatGenerator: +class TestDSPySignatureChatGenerator: def test_init_default(self, mock_dspy_module): - component = DSPyChatGenerator(signature="question -> answer") + component = DSPySignatureChatGenerator(signature="question -> answer") assert component.model == "openai/gpt-5-mini" assert component.signature == "question -> answer" assert component.module_type == "ChainOfThought" @@ -166,7 +166,7 @@ def test_init_default(self, mock_dspy_module): assert not component.module_kwargs def test_init_with_parameters(self, mock_dspy_module): - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature="context, question -> answer", model="openai/gpt-4o", api_base="http://localhost:8000", @@ -187,13 +187,13 @@ def test_init_with_parameters(self, mock_dspy_module): def test_init_with_invalid_module_type(self, mock_dspy_module): with pytest.raises(ValueError, match="Invalid module_type"): - DSPyChatGenerator( + DSPySignatureChatGenerator( signature="question -> answer", module_type="InvalidModule", ) def test_init_with_signature_class(self, mock_dspy_module, sample_qa_signature): - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature=sample_qa_signature, ) assert component.signature is sample_qa_signature @@ -201,7 +201,7 @@ def test_init_with_signature_class(self, mock_dspy_module, sample_qa_signature): def test_init_with_module_kwargs(self, mock_dspy_module): """Test that module_kwargs are passed to the DSPy module constructor.""" tools = [MagicMock(), MagicMock()] - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature="question -> answer", module_type="ReAct", module_kwargs={"tools": tools}, @@ -210,19 +210,19 @@ def test_init_with_module_kwargs(self, mock_dspy_module): def test_init_with_api_base(self, mock_dspy_module): """Test initialization with api_base for local models.""" - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature="question -> answer", api_base="http://localhost:11434/v1", ) assert component.api_base == "http://localhost:11434/v1" def test_to_dict_default(self, mock_dspy_module): - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature="question -> answer", ) data = component.to_dict() assert data == { - "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPyChatGenerator", + "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPySignatureChatGenerator", "init_parameters": { "signature": "question -> answer", "model": "openai/gpt-5-mini", @@ -237,7 +237,7 @@ def test_to_dict_default(self, mock_dspy_module): } def test_to_dict_with_parameters(self, mock_dspy_module): - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature="context, question -> answer", model="openai/gpt-4o", api_base="http://localhost:8000", @@ -248,7 +248,7 @@ def test_to_dict_with_parameters(self, mock_dspy_module): ) data = component.to_dict() assert data == { - "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPyChatGenerator", + "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPySignatureChatGenerator", "init_parameters": { "signature": "context, question -> answer", "model": "openai/gpt-4o", @@ -264,18 +264,18 @@ def test_to_dict_with_parameters(self, mock_dspy_module): def test_to_dict_with_signature_class(self, mock_dspy_module, sample_qa_signature): """Test that signature classes are serialized as fully qualified class paths.""" - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature=sample_qa_signature, ) data = component.to_dict() sig_value = data["init_parameters"]["signature"] - # Should be a dotted path like "test_chat_generator.TestDSPyChatGenerator...QASignature" + # Should be a dotted path like "test_chat_generator.TestDSPySignatureChatGenerator...QASignature" assert "QASignature" in sig_value assert "." in sig_value def test_to_dict_with_streaming_callback(self, mock_dspy_module): """Test that streaming_callback is serialized.""" - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature="question -> answer", streaming_callback=_sample_streaming_callback, ) @@ -285,7 +285,7 @@ def test_to_dict_with_streaming_callback(self, mock_dspy_module): def test_from_dict(self, mock_dspy_module): data = { - "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPyChatGenerator", + "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPySignatureChatGenerator", "init_parameters": { "signature": "question -> answer", "model": "openai/gpt-4o", @@ -298,7 +298,7 @@ def test_from_dict(self, mock_dspy_module): "streaming_callback": None, }, } - component = DSPyChatGenerator.from_dict(data) + component = DSPySignatureChatGenerator.from_dict(data) assert component.model == "openai/gpt-4o" assert component.signature == "question -> answer" assert component.module_type == "Predict" @@ -310,7 +310,7 @@ def test_from_dict(self, mock_dspy_module): def test_from_dict_with_api_base(self, mock_dspy_module): """Test deserialization with api_base.""" data = { - "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPyChatGenerator", + "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPySignatureChatGenerator", "init_parameters": { "signature": "question -> answer", "model": "openai/local-model", @@ -323,13 +323,13 @@ def test_from_dict_with_api_base(self, mock_dspy_module): "streaming_callback": None, }, } - component = DSPyChatGenerator.from_dict(data) + component = DSPySignatureChatGenerator.from_dict(data) assert component.api_base == "http://localhost:8000" def test_from_dict_resolves_signature_class_path(self, mock_dspy_module): """Test that from_dict resolves a dotted signature class path.""" data = { - "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPyChatGenerator", + "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPySignatureChatGenerator", "init_parameters": { "signature": "dspy.Signature", "model": "openai/gpt-5-mini", @@ -341,11 +341,11 @@ def test_from_dict_resolves_signature_class_path(self, mock_dspy_module): "streaming_callback": None, }, } - component = DSPyChatGenerator.from_dict(data) + component = DSPySignatureChatGenerator.from_dict(data) assert component.signature is dspy.Signature def test_run(self, chat_messages, mock_dspy_module): - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature="question -> answer", ) response = component.run(chat_messages) @@ -360,7 +360,7 @@ def test_run(self, chat_messages, mock_dspy_module): def test_run_always_passes_config(self, chat_messages, mock_dspy_module): """Test that config is always passed (even as empty dict) - simplified call.""" - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature="question -> answer", ) component.run(chat_messages) @@ -369,7 +369,7 @@ def test_run_always_passes_config(self, chat_messages, mock_dspy_module): assert kwargs["config"] == {} def test_run_with_generation_kwargs(self, chat_messages, mock_dspy_module): - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature="question -> answer", generation_kwargs={"max_tokens": 10, "temperature": 0.5}, ) @@ -384,7 +384,7 @@ def test_run_with_generation_kwargs(self, chat_messages, mock_dspy_module): assert all(isinstance(reply, ChatMessage) for reply in response["replies"]) def test_run_with_multiple_messages(self, mock_dspy_module): - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature="question -> answer", ) messages = [ @@ -402,7 +402,7 @@ def test_run_with_multiple_messages(self, mock_dspy_module): assert isinstance(response["replies"][0], ChatMessage) def test_run_with_empty_messages(self, mock_dspy_module): - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature="question -> answer", ) with pytest.raises(ValueError, match="messages"): @@ -410,7 +410,7 @@ def test_run_with_empty_messages(self, mock_dspy_module): def test_run_with_custom_output_field(self, mock_dspy_module): mock_dspy_module.return_value = MagicMock(summary="This is a summary") - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature="text -> summary", output_field="summary", ) @@ -420,7 +420,7 @@ def test_run_with_custom_output_field(self, mock_dspy_module): assert response["replies"][0].text == "This is a summary" def test_run_with_input_mapping(self, mock_dspy_module): - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature="context, question -> answer", input_mapping={"context": "context", "question": "question"}, ) @@ -434,7 +434,7 @@ def test_run_with_input_mapping(self, mock_dspy_module): def test_run_with_wrong_model(self, mock_dspy_module): mock_dspy_module.side_effect = Exception("Invalid model name") - generator = DSPyChatGenerator( + generator = DSPySignatureChatGenerator( signature="question -> answer", model="something-obviously-wrong", ) @@ -449,7 +449,7 @@ def test_run_with_wrong_model(self, mock_dspy_module): @pytest.mark.integration def test_live_run(self): chat_messages = [ChatMessage.from_user("What's the capital of France")] - component = DSPyChatGenerator(signature="question -> answer") + component = DSPySignatureChatGenerator(signature="question -> answer") results = component.run(chat_messages) assert len(results["replies"]) == 1 message: ChatMessage = results["replies"][0] @@ -463,7 +463,7 @@ def test_live_run(self): def test_live_run_with_predict_module(self): """Test using the Predict module type with a string signature.""" chat_messages = [ChatMessage.from_user("What is 2 + 2?")] - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature="question -> answer", module_type="Predict", ) @@ -486,7 +486,7 @@ class QASignature(dspy.Signature): answer = dspy.OutputField(desc="A clear, concise answer") chat_messages = [ChatMessage.from_user("What language is spoken in Brazil?")] - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature=QASignature, module_type="ChainOfThought", ) @@ -502,7 +502,7 @@ class QASignature(dspy.Signature): def test_live_run_with_multi_field_signature(self): """Test using a multi-input signature with input_mapping.""" chat_messages = [ChatMessage.from_user("What is the main topic?")] - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature="context, question -> answer", module_type="Predict", input_mapping={"context": "context", "question": "question"}, diff --git a/integrations/dspy/tests/test_chat_generator_async.py b/integrations/dspy/tests/test_chat_generator_async.py index 55dac80c80..48848da4b3 100644 --- a/integrations/dspy/tests/test_chat_generator_async.py +++ b/integrations/dspy/tests/test_chat_generator_async.py @@ -3,7 +3,7 @@ import pytest from haystack.dataclasses import ChatMessage -from haystack_integrations.components.generators.dspy.chat.chat_generator import DSPyChatGenerator +from haystack_integrations.components.generators.dspy.chat.chat_generator import DSPySignatureChatGenerator @pytest.fixture @@ -44,10 +44,10 @@ def chat_messages(): ] -class TestDSPyChatGeneratorAsync: +class TestDSPySignatureChatGeneratorAsync: @pytest.mark.asyncio async def test_run_async(self, chat_messages, mock_dspy_module): - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature="question -> answer", ) response = await component.run_async(messages=chat_messages) @@ -63,7 +63,7 @@ async def test_run_async(self, chat_messages, mock_dspy_module): @pytest.mark.asyncio async def test_run_async_always_passes_config(self, chat_messages, mock_dspy_module): """Test that config is always passed (even as empty dict) in async mode.""" - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature="question -> answer", ) await component.run_async(messages=chat_messages) @@ -73,7 +73,7 @@ async def test_run_async_always_passes_config(self, chat_messages, mock_dspy_mod @pytest.mark.asyncio async def test_run_async_with_params(self, chat_messages, mock_dspy_module): - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature="question -> answer", ) response = await component.run_async( @@ -91,7 +91,7 @@ async def test_run_async_with_params(self, chat_messages, mock_dspy_module): @pytest.mark.asyncio async def test_run_async_with_empty_messages(self, mock_dspy_module): - component = DSPyChatGenerator( + component = DSPySignatureChatGenerator( signature="question -> answer", ) with pytest.raises(ValueError, match="messages"): From 1fcbc7fccf89672debefd76737240f4619b151bc Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Wed, 4 Mar 2026 18:52:39 +0100 Subject: [PATCH 10/28] Add author --- integrations/dspy/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/dspy/pyproject.toml b/integrations/dspy/pyproject.toml index 2cc285c2dc..6005df44cf 100644 --- a/integrations/dspy/pyproject.toml +++ b/integrations/dspy/pyproject.toml @@ -10,7 +10,7 @@ readme = "README.md" requires-python = ">=3.10" license = "Apache-2.0" keywords = ["haystack", "dspy", "prompt-optimization", "llm", "nlp"] -authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }] +authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }, { name = "Arsenii Shkunkov", email = "shkunkov.as@gmail.com" }] classifiers = [ "License :: OSI Approved :: Apache Software License", "Development Status :: 4 - Beta", From 2233a833634187d014be5f750bde8f7dcb848e8b Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Wed, 4 Mar 2026 18:53:52 +0100 Subject: [PATCH 11/28] Fmt lint --- .../dspy/examples/signature_serialization_example.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/integrations/dspy/examples/signature_serialization_example.py b/integrations/dspy/examples/signature_serialization_example.py index cbac28e1be..4edb66ae32 100644 --- a/integrations/dspy/examples/signature_serialization_example.py +++ b/integrations/dspy/examples/signature_serialization_example.py @@ -6,8 +6,10 @@ from haystack_integrations.components.generators.dspy import DSPySignatureChatGenerator + class Source(pydantic.BaseModel): """A single cited source.""" + title: str url: str relevance: float @@ -15,6 +17,7 @@ class Source(pydantic.BaseModel): class StructuredAnswer(pydantic.BaseModel): """A rich answer with metadata.""" + summary: str confidence: Literal["low", "medium", "high"] sources: list[Source] @@ -23,6 +26,7 @@ class StructuredAnswer(pydantic.BaseModel): class ResearchSignature(dspy.Signature): """Research a topic and return a structured, cited answer.""" + question: str = dspy.InputField(desc="The research question") context: str = dspy.InputField(desc="Background material or documents to ground the answer") answer: StructuredAnswer = dspy.OutputField(desc="A structured answer with sources and confidence") @@ -56,12 +60,12 @@ def main(): data = generator.to_dict() sig_value = data["init_parameters"]["signature"] - print(f"\n=== Serialized ===") + print("\n=== Serialized ===") print(f" signature value: {sig_value}") print(f" all init params: {list(data['init_parameters'].keys())}") restored = DSPySignatureChatGenerator.from_dict(data) - print(f"\n=== Restored generator ===") + print("\n=== Restored generator ===") print(f" signature class: {restored.signature.__name__}") print(f" same class? : {restored.signature is ResearchSignature}") print_signature_fields(restored.signature) @@ -77,7 +81,7 @@ def main(): "2017, 2020, and 2022, primarily driven by marine heatwaves." ), ) - print(f"\n=== Inference ===") + print("\n=== Inference ===") print(f" Question: {messages[0].text}") print(f" Answer : {result['replies'][0].text}") From ab591b6d3ce7aafc8845422a49033fdf2a5d822b Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Sun, 8 Mar 2026 13:17:18 +0100 Subject: [PATCH 12/28] Remove streaming_callback logic for now --- .../generators/dspy/chat/chat_generator.py | 12 ----------- .../dspy/tests/test_chat_generator.py | 20 ------------------- 2 files changed, 32 deletions(-) diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py index 17d46bba39..ea9fb228c6 100644 --- a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py @@ -4,8 +4,6 @@ import dspy from haystack import component, default_from_dict, default_to_dict from haystack.dataclasses import ChatMessage, ChatRole -from haystack.dataclasses.streaming_chunk import StreamingCallbackT -from haystack.utils import deserialize_callable, serialize_callable VALID_MODULE_TYPES = {"Predict", "ChainOfThought", "ReAct"} @@ -109,7 +107,6 @@ def __init__( generation_kwargs: dict[str, Any] | None = None, module_kwargs: dict[str, Any] | None = None, input_mapping: dict[str, str] | None = None, - streaming_callback: StreamingCallbackT | None = None, ): """ Initialize the DSPySignatureChatGenerator. @@ -128,7 +125,6 @@ def __init__( provides it as ``"documents"``, use ``{"context": "documents"}``. When not provided, the first input field receives the last user message text, and remaining fields are matched by name from ``**kwargs``. - :param streaming_callback: Callback for streaming responses. """ if module_type not in VALID_MODULE_TYPES: msg = f"Invalid module_type '{module_type}'. Must be one of {sorted(VALID_MODULE_TYPES)}" @@ -142,7 +138,6 @@ def __init__( self.generation_kwargs = generation_kwargs or {} self.module_kwargs = module_kwargs or {} self.input_mapping = input_mapping - self.streaming_callback = streaming_callback self._lm = _configure_dspy_lm( model=self.model, @@ -203,7 +198,6 @@ def _signature_to_string(self) -> str: def to_dict(self) -> dict[str, Any]: """Serialize this component to a dictionary.""" - callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None kwargs: dict[str, Any] = { "signature": self._signature_to_string(), "model": self.model, @@ -213,7 +207,6 @@ def to_dict(self) -> dict[str, Any]: "generation_kwargs": self.generation_kwargs, "module_kwargs": self.module_kwargs, "input_mapping": self.input_mapping, - "streaming_callback": callback_name, } return default_to_dict(self, **kwargs) @@ -227,11 +220,6 @@ def from_dict(cls, data: dict[str, Any]) -> "DSPySignatureChatGenerator": if signature: init_params["signature"] = _resolve_signature(signature) - # Deserialize streaming callback - serialized_callback = init_params.get("streaming_callback") - if serialized_callback: - init_params["streaming_callback"] = deserialize_callable(serialized_callback) - return default_from_dict(cls, data) @component.output_types(replies=list[ChatMessage]) diff --git a/integrations/dspy/tests/test_chat_generator.py b/integrations/dspy/tests/test_chat_generator.py index 04ecdc3a35..800b354ca5 100644 --- a/integrations/dspy/tests/test_chat_generator.py +++ b/integrations/dspy/tests/test_chat_generator.py @@ -14,10 +14,6 @@ ) -def _sample_streaming_callback(chunk): - """Module-level callback for serialization tests.""" - - @pytest.fixture def mock_dspy_module(): """ @@ -159,7 +155,6 @@ def test_init_default(self, mock_dspy_module): assert component.signature == "question -> answer" assert component.module_type == "ChainOfThought" assert component.output_field == "answer" - assert component.streaming_callback is None assert not component.generation_kwargs assert component.input_mapping is None assert component.api_base is None @@ -232,7 +227,6 @@ def test_to_dict_default(self, mock_dspy_module): "generation_kwargs": {}, "module_kwargs": {}, "input_mapping": None, - "streaming_callback": None, }, } @@ -258,7 +252,6 @@ def test_to_dict_with_parameters(self, mock_dspy_module): "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"}, "module_kwargs": {}, "input_mapping": {"context": "context", "question": "question"}, - "streaming_callback": None, }, } @@ -273,16 +266,6 @@ def test_to_dict_with_signature_class(self, mock_dspy_module, sample_qa_signatur assert "QASignature" in sig_value assert "." in sig_value - def test_to_dict_with_streaming_callback(self, mock_dspy_module): - """Test that streaming_callback is serialized.""" - component = DSPySignatureChatGenerator( - signature="question -> answer", - streaming_callback=_sample_streaming_callback, - ) - data = component.to_dict() - assert data["init_parameters"]["streaming_callback"] is not None - assert "streaming_callback" in data["init_parameters"]["streaming_callback"] - def test_from_dict(self, mock_dspy_module): data = { "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPySignatureChatGenerator", @@ -295,7 +278,6 @@ def test_from_dict(self, mock_dspy_module): "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"}, "module_kwargs": {}, "input_mapping": None, - "streaming_callback": None, }, } component = DSPySignatureChatGenerator.from_dict(data) @@ -320,7 +302,6 @@ def test_from_dict_with_api_base(self, mock_dspy_module): "generation_kwargs": {}, "module_kwargs": {}, "input_mapping": None, - "streaming_callback": None, }, } component = DSPySignatureChatGenerator.from_dict(data) @@ -338,7 +319,6 @@ def test_from_dict_resolves_signature_class_path(self, mock_dspy_module): "generation_kwargs": {}, "module_kwargs": {}, "input_mapping": None, - "streaming_callback": None, }, } component = DSPySignatureChatGenerator.from_dict(data) From 207ac4f7cf255dd2f4975dde537fff8689d5fc31 Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Sun, 8 Mar 2026 13:19:15 +0100 Subject: [PATCH 13/28] Remove init files --- integrations/dspy/src/haystack_integrations/__init__.py | 0 .../dspy/src/haystack_integrations/components/__init__.py | 0 .../src/haystack_integrations/components/generators/__init__.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 integrations/dspy/src/haystack_integrations/__init__.py delete mode 100644 integrations/dspy/src/haystack_integrations/components/__init__.py delete mode 100644 integrations/dspy/src/haystack_integrations/components/generators/__init__.py diff --git a/integrations/dspy/src/haystack_integrations/__init__.py b/integrations/dspy/src/haystack_integrations/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/integrations/dspy/src/haystack_integrations/components/__init__.py b/integrations/dspy/src/haystack_integrations/components/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/integrations/dspy/src/haystack_integrations/components/generators/__init__.py b/integrations/dspy/src/haystack_integrations/components/generators/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 From ef27e6a3541e41ad2f962fed6ac2f814497a4b01 Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Sun, 8 Mar 2026 13:21:38 +0100 Subject: [PATCH 14/28] Added an empty py.typed file --- .../dspy/src/haystack_integrations/components/generators/py.typed | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 integrations/dspy/src/haystack_integrations/components/generators/py.typed diff --git a/integrations/dspy/src/haystack_integrations/components/generators/py.typed b/integrations/dspy/src/haystack_integrations/components/generators/py.typed new file mode 100644 index 0000000000..e69de29bb2 From a8bac2ceaa095cccfdb588c6387175a219c3f643 Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Sun, 8 Mar 2026 13:24:26 +0100 Subject: [PATCH 15/28] Add Github workflow --- .github/workflows/dspy.yml | 74 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 .github/workflows/dspy.yml diff --git a/.github/workflows/dspy.yml b/.github/workflows/dspy.yml new file mode 100644 index 0000000000..4ab4ad8b01 --- /dev/null +++ b/.github/workflows/dspy.yml @@ -0,0 +1,74 @@ +# This workflow comes from https://github.com/ofek/hatch-mypyc +# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml +name: Test / dspy + +on: + schedule: + - cron: "0 0 * * *" + pull_request: + paths: + - "integrations/dspy/**" + - "!integrations/dspy/*.md" + - ".github/workflows/dspy.yml" + +concurrency: + group: dspy-${{ github.head_ref }} + cancel-in-progress: true + +env: + PYTHONUNBUFFERED: "1" + FORCE_COLOR: "1" + +defaults: + run: + working-directory: integrations/dspy + +jobs: + run: + name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + python-version: ["3.10", "3.13"] + + steps: + - uses: actions/checkout@v6 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Hatch + run: pip install hatch "virtualenv<21.0.0" + + - name: Lint + if: matrix.python-version == '3.10' && runner.os == 'Linux' + run: hatch run fmt-check && hatch run test:types + + - name: Run tests + run: hatch run test:cov-retry + + - name: Run unit tests with lowest direct dependencies + run: | + hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt + hatch -e test env run -- uv pip install -r requirements_lowest_direct.txt + hatch run test:unit + + - name: Nightly - run unit tests with Haystack main branch + if: github.event_name == 'schedule' + run: | + hatch env prune + hatch -e test env run -- uv pip install git+https://github.com/deepset-ai/haystack.git@main + hatch run test:unit + + notify-slack-on-failure: + needs: run + if: failure() && github.event_name == 'schedule' + runs-on: ubuntu-slim + steps: + - uses: deepset-ai/notify-slack-action@v1 + with: + slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL_NOTIFICATIONS }} From 8aecd852ac2f086979ba02c25007212e1c90c145 Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Wed, 11 Mar 2026 11:17:55 +0100 Subject: [PATCH 16/28] Fix linter warnings and errors --- integrations/dspy/pyproject.toml | 8 ++++++-- .../generators/dspy/chat/chat_generator.py | 12 ++++++------ integrations/dspy/tests/test_chat_generator.py | 2 +- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/integrations/dspy/pyproject.toml b/integrations/dspy/pyproject.toml index 6005df44cf..23d90bdf9b 100644 --- a/integrations/dspy/pyproject.toml +++ b/integrations/dspy/pyproject.toml @@ -22,7 +22,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -dependencies = ["haystack-ai>=2.0.0", "dspy>=3.0.0"] +dependencies = ["haystack-ai>=2.22.0", "dspy>=3.0.0"] [project.urls] Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/dspy#readme" @@ -73,6 +73,10 @@ non_interactive = true check_untyped_defs = true disallow_incomplete_defs = true +[[tool.mypy.overrides]] +module = "dspy.*" +ignore_missing_imports = true + [tool.hatch.metadata] allow-direct-references = true @@ -129,7 +133,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252"] +"tests/**/*" = ["PLR2004", "S101", "TID252", "ARG002"] # Examples can print their output "examples/**" = ["T201"] diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py index ea9fb228c6..aa404be1cc 100644 --- a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py @@ -25,7 +25,7 @@ def _configure_dspy_lm(model: str, api_base: str | None = None, **kwargs: Any) - return lm -def _get_dspy_module_class(module_type: str): +def _get_dspy_module_class(module_type: str) -> type: """ Map a module type string to the corresponding DSPy module class. @@ -148,7 +148,7 @@ def __init__( module_class = _get_dspy_module_class(self.module_type) self._module = module_class(self.signature, **self.module_kwargs) - def _build_dspy_inputs(self, prompt: str, **kwargs) -> dict[str, Any]: + def _build_dspy_inputs(self, prompt: str, **kwargs: Any) -> dict[str, Any]: """Build the input dict for the DSPy module call.""" if self.input_mapping: dspy_inputs = {} @@ -180,8 +180,8 @@ def _extract_last_user_message(messages: list[ChatMessage]) -> str: """Extract the text of the last user message from a list of chat messages.""" for msg in reversed(messages): if msg.role == ChatRole.USER: - return msg.text - return messages[-1].text + return msg.text or "" + return messages[-1].text or "" def _signature_to_string(self) -> str: """ @@ -227,7 +227,7 @@ def run( self, messages: list[ChatMessage], generation_kwargs: dict[str, Any] | None = None, - **kwargs, + **kwargs: Any, ) -> dict[str, Any]: """ Run the DSPy module on the given messages. @@ -256,7 +256,7 @@ async def run_async( self, messages: list[ChatMessage], generation_kwargs: dict[str, Any] | None = None, - **kwargs, + **kwargs: Any, ) -> dict[str, Any]: """ Asynchronously run the DSPy module on the given messages. diff --git a/integrations/dspy/tests/test_chat_generator.py b/integrations/dspy/tests/test_chat_generator.py index 800b354ca5..d89f365939 100644 --- a/integrations/dspy/tests/test_chat_generator.py +++ b/integrations/dspy/tests/test_chat_generator.py @@ -405,7 +405,7 @@ def test_run_with_input_mapping(self, mock_dspy_module): input_mapping={"context": "context", "question": "question"}, ) messages = [ChatMessage.from_user("What is ML?")] - response = component.run(messages=messages, context="Machine learning is a subset of AI.") + component.run(messages=messages, context="Machine learning is a subset of AI.") call_kwargs = mock_dspy_module.call_args.kwargs assert call_kwargs.get("context") == "Machine learning is a subset of AI." From 17b3a5a10d4b1ce4dc8a8bec0a51dd3f47218612 Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Tue, 17 Mar 2026 18:05:37 +0100 Subject: [PATCH 17/28] Remove double backticks --- .../generators/dspy/chat/chat_generator.py | 92 +++++++++---------- 1 file changed, 44 insertions(+), 48 deletions(-) diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py index aa404be1cc..512849125e 100644 --- a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py @@ -12,10 +12,10 @@ def _configure_dspy_lm(model: str, api_base: str | None = None, **kwargs: Any) - """ Create and configure a DSPy language model. - :param model: Model identifier (e.g. ``"openai/gpt-5-mini"``). + :param model: Model identifier (e.g. `"openai/gpt-5-mini"`). :param api_base: Optional base URL for the API (useful for local models). - :param kwargs: Additional keyword arguments passed to ``dspy.LM``. - :returns: The configured ``dspy.LM`` instance. + :param kwargs: Additional keyword arguments passed to `dspy.LM`. + :returns: The configured `dspy.LM` instance. """ lm_kwargs: dict[str, Any] = {"model": model, **kwargs} if api_base is not None: @@ -29,7 +29,7 @@ def _get_dspy_module_class(module_type: str) -> type: """ Map a module type string to the corresponding DSPy module class. - :param module_type: One of ``"Predict"``, ``"ChainOfThought"``, or ``"ReAct"``. + :param module_type: One of `"Predict"`, `"ChainOfThought"`, or `"ReAct"`. :returns: The DSPy module class. :raises ValueError: If the module type is not recognized. """ @@ -44,35 +44,17 @@ def _get_dspy_module_class(module_type: str) -> type: return mapping[module_type] -def _resolve_signature(signature: str | type[dspy.Signature]) -> str | type[dspy.Signature]: - """ - Resolve a signature that may be a fully qualified class path string. - - If the string contains a dot, it is treated as a fully qualified class path - (e.g. ``"mymodule.QASignature"``) and imported. Otherwise, it is returned as-is - (a DSPy shorthand like ``"question -> answer"``). - - :param signature: A string or dspy.Signature subclass. - :returns: The resolved signature. - """ - if isinstance(signature, str) and "." in signature and "->" not in signature: - module_path, class_name = signature.rsplit(".", 1) - module = importlib.import_module(module_path) - return getattr(module, class_name) - return signature - - @component class DSPySignatureChatGenerator: """ A Haystack chat generator component that uses DSPy signatures and modules for structured generation. - Accepts and returns ``ChatMessage`` objects, making it compatible with + Accepts and returns `ChatMessage` objects, making it compatible with Haystack chat pipelines. The API key is read automatically from environment variables by DSPy/litellm - (e.g. ``OPENAI_API_KEY``). Use ``api_base`` for local or self-hosted models. + (e.g. `OPENAI_API_KEY`). Use `api_base` for local or self-hosted models. Usage example: @@ -112,19 +94,19 @@ def __init__( Initialize the DSPySignatureChatGenerator. :param signature: DSPy signature defining I/O structure. Can be a string - like ``"question -> answer"`` or a ``dspy.Signature`` subclass. - :param model: Model identifier (e.g. ``"openai/gpt-5-mini"``). + like `"question -> answer"` or a `dspy.Signature` subclass. + :param model: Model identifier (e.g. `"openai/gpt-5-mini"`). :param api_base: Optional base URL for the API (useful for local models). - :param module_type: DSPy module type: ``"Predict"``, ``"ChainOfThought"``, or ``"ReAct"``. + :param module_type: DSPy module type: `"Predict"`, `"ChainOfThought"`, or `"ReAct"`. :param output_field: Which signature output field to use as the reply. :param generation_kwargs: Additional generation parameters (temperature, max_tokens, etc.). :param module_kwargs: Additional keyword arguments passed to the DSPy module constructor. - For example, use ``{"tools": [tool1, tool2]}`` when using the ``"ReAct"`` module type. - :param input_mapping: Maps DSPy signature input field names to ``run()`` kwarg names. - For example, if your signature has an input field ``"context"`` but your pipeline - provides it as ``"documents"``, use ``{"context": "documents"}``. When not provided, + For example, use `{"tools": [tool1, tool2]}` when using the `"ReAct"` module type. + :param input_mapping: Maps DSPy signature input field names to `run()` kwarg names. + For example, if your signature has an input field `"context"` but your pipeline + provides it as `"documents"`, use `{"context": "documents"}`. When not provided, the first input field receives the last user message text, and remaining fields - are matched by name from ``**kwargs``. + are matched by name from `**kwargs`. """ if module_type not in VALID_MODULE_TYPES: msg = f"Invalid module_type '{module_type}'. Must be one of {sorted(VALID_MODULE_TYPES)}" @@ -183,23 +165,38 @@ def _extract_last_user_message(messages: list[ChatMessage]) -> str: return msg.text or "" return messages[-1].text or "" - def _signature_to_string(self) -> str: + @staticmethod + def _serialize_signature(signature: str | type[dspy.Signature]) -> dict[str, str]: """ - Convert the signature to a serializable string representation. + Serialize the signature to a dictionary. - For string signatures, returns the string as-is. - For ``dspy.Signature`` subclasses, returns the fully qualified class path - (e.g. ``"mymodule.QASignature"``), which can be imported back during - deserialization. + String signatures are stored as `{"str": "question -> answer"}`. + `dspy.Signature` subclasses are stored as + `{"class": "mymodule.QASignature"}`. """ - if isinstance(self.signature, str): - return self.signature - return f"{self.signature.__module__}.{self.signature.__qualname__}" + if isinstance(signature, str): + return {"str": signature} + return {"class": f"{signature.__module__}.{signature.__qualname__}"} + + @staticmethod + def _deserialize_signature(data: dict[str, str]) -> str | type[dspy.Signature]: + """ + Deserialize a signature from a dictionary. + + Accepts `{"str": "question -> answer"}` or + `{"class": "mymodule.QASignature"}`. + """ + if "str" in data: + return data["str"] + class_path = data["class"] + module_path, class_name = class_path.rsplit(".", 1) + module = importlib.import_module(module_path) + return getattr(module, class_name) def to_dict(self) -> dict[str, Any]: """Serialize this component to a dictionary.""" kwargs: dict[str, Any] = { - "signature": self._signature_to_string(), + "signature": self._serialize_signature(self.signature), "model": self.model, "api_base": self.api_base, "module_type": self.module_type, @@ -215,10 +212,9 @@ def from_dict(cls, data: dict[str, Any]) -> "DSPySignatureChatGenerator": """Deserialize a component from a dictionary.""" init_params = data.get("init_parameters", {}) - # Resolve signature class path if needed signature = init_params.get("signature") - if signature: - init_params["signature"] = _resolve_signature(signature) + if isinstance(signature, dict): + init_params["signature"] = cls._deserialize_signature(signature) return default_from_dict(cls, data) @@ -235,7 +231,7 @@ def run( :param messages: List of chat messages. The last user message is used as input. :param generation_kwargs: Optional runtime generation parameters. :param kwargs: Additional keyword arguments mapped to signature input fields. - :returns: A dictionary with ``replies`` (list of ChatMessage). + :returns: A dictionary with `replies` (list of ChatMessage). """ if not messages: msg = "The 'messages' parameter cannot be empty." @@ -261,12 +257,12 @@ async def run_async( """ Asynchronously run the DSPy module on the given messages. - Uses DSPy's native ``acall`` for true async I/O. + Uses DSPy's native `acall` for true async I/O. :param messages: List of chat messages. The last user message is used as input. :param generation_kwargs: Optional runtime generation parameters. :param kwargs: Additional keyword arguments mapped to signature input fields. - :returns: A dictionary with ``replies`` (list of ChatMessage). + :returns: A dictionary with `replies` (list of ChatMessage). """ if not messages: msg = "The 'messages' parameter cannot be empty." From 3113afd4ea0d1735328d80fc56bf0c0b07844b1c Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Tue, 17 Mar 2026 18:59:57 +0100 Subject: [PATCH 18/28] Update github workflow --- .github/workflows/dspy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dspy.yml b/.github/workflows/dspy.yml index 4ab4ad8b01..0f7e1c2c61 100644 --- a/.github/workflows/dspy.yml +++ b/.github/workflows/dspy.yml @@ -42,7 +42,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install Hatch - run: pip install hatch "virtualenv<21.0.0" + run: pip install hatch - name: Lint if: matrix.python-version == '3.10' && runner.os == 'Linux' From a1029cdb2ddae3bfa0255b368c147f775dfdba81 Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Mon, 23 Mar 2026 14:08:46 +0100 Subject: [PATCH 19/28] Explicit type serialization --- .../generators/dspy/chat/chat_generator.py | 23 ++++++----- .../dspy/tests/test_chat_generator.py | 40 +++++-------------- 2 files changed, 23 insertions(+), 40 deletions(-) diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py index 512849125e..18da4a6d4a 100644 --- a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py @@ -170,25 +170,30 @@ def _serialize_signature(signature: str | type[dspy.Signature]) -> dict[str, str """ Serialize the signature to a dictionary. - String signatures are stored as `{"str": "question -> answer"}`. + String signatures are stored as + `{"type": "str", "value": "question -> answer"}`. `dspy.Signature` subclasses are stored as - `{"class": "mymodule.QASignature"}`. + `{"type": "class", "value": "mymodule.QASignature"}`. """ if isinstance(signature, str): - return {"str": signature} - return {"class": f"{signature.__module__}.{signature.__qualname__}"} + return {"type": "str", "value": signature} + return {"type": "class", "value": f"{signature.__module__}.{signature.__qualname__}"} @staticmethod def _deserialize_signature(data: dict[str, str]) -> str | type[dspy.Signature]: """ Deserialize a signature from a dictionary. - Accepts `{"str": "question -> answer"}` or - `{"class": "mymodule.QASignature"}`. + Accepts `{"type": "str", "value": "question -> answer"}` or + `{"type": "class", "value": "mymodule.QASignature"}`. """ - if "str" in data: - return data["str"] - class_path = data["class"] + signature_type = data["type"] + value = data["value"] + + if signature_type == "str": + return value + + class_path = value module_path, class_name = class_path.rsplit(".", 1) module = importlib.import_module(module_path) return getattr(module, class_name) diff --git a/integrations/dspy/tests/test_chat_generator.py b/integrations/dspy/tests/test_chat_generator.py index d89f365939..247dae58a8 100644 --- a/integrations/dspy/tests/test_chat_generator.py +++ b/integrations/dspy/tests/test_chat_generator.py @@ -10,7 +10,6 @@ DSPySignatureChatGenerator, _configure_dspy_lm, _get_dspy_module_class, - _resolve_signature, ) @@ -127,27 +126,6 @@ def test_omits_api_base_when_none(self, mock_lm_class, mock_configure): mock_lm_class.assert_called_once_with(model="openai/gpt-5-mini") -class TestResolveSignature: - def test_string_signature_passthrough(self): - sig = "question -> answer" - assert _resolve_signature(sig) == sig - - def test_signature_class_passthrough(self): - class MySig(dspy.Signature): - question: str = dspy.InputField() - answer: str = dspy.OutputField() - - assert _resolve_signature(MySig) is MySig - - def test_resolves_dotted_class_path(self): - result = _resolve_signature("dspy.Signature") - assert result is dspy.Signature - - def test_invalid_dotted_path_raises(self): - with pytest.raises((ImportError, ModuleNotFoundError)): - _resolve_signature("nonexistent.module.ClassName") - - class TestDSPySignatureChatGenerator: def test_init_default(self, mock_dspy_module): component = DSPySignatureChatGenerator(signature="question -> answer") @@ -219,7 +197,7 @@ def test_to_dict_default(self, mock_dspy_module): assert data == { "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPySignatureChatGenerator", "init_parameters": { - "signature": "question -> answer", + "signature": {"type": "str", "value": "question -> answer"}, "model": "openai/gpt-5-mini", "api_base": None, "module_type": "ChainOfThought", @@ -244,7 +222,7 @@ def test_to_dict_with_parameters(self, mock_dspy_module): assert data == { "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPySignatureChatGenerator", "init_parameters": { - "signature": "context, question -> answer", + "signature": {"type": "str", "value": "context, question -> answer"}, "model": "openai/gpt-4o", "api_base": "http://localhost:8000", "module_type": "Predict", @@ -261,16 +239,16 @@ def test_to_dict_with_signature_class(self, mock_dspy_module, sample_qa_signatur signature=sample_qa_signature, ) data = component.to_dict() - sig_value = data["init_parameters"]["signature"] - # Should be a dotted path like "test_chat_generator.TestDSPySignatureChatGenerator...QASignature" - assert "QASignature" in sig_value - assert "." in sig_value + signature_value = data["init_parameters"]["signature"] + assert signature_value["type"] == "class" + assert "QASignature" in signature_value["value"] + assert "." in signature_value["value"] def test_from_dict(self, mock_dspy_module): data = { "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPySignatureChatGenerator", "init_parameters": { - "signature": "question -> answer", + "signature": {"type": "str", "value": "question -> answer"}, "model": "openai/gpt-4o", "api_base": None, "module_type": "Predict", @@ -294,7 +272,7 @@ def test_from_dict_with_api_base(self, mock_dspy_module): data = { "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPySignatureChatGenerator", "init_parameters": { - "signature": "question -> answer", + "signature": {"type": "str", "value": "question -> answer"}, "model": "openai/local-model", "api_base": "http://localhost:8000", "module_type": "Predict", @@ -312,7 +290,7 @@ def test_from_dict_resolves_signature_class_path(self, mock_dspy_module): data = { "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPySignatureChatGenerator", "init_parameters": { - "signature": "dspy.Signature", + "signature": {"type": "class", "value": "dspy.Signature"}, "model": "openai/gpt-5-mini", "module_type": "Predict", "output_field": "answer", From 100497a5c5c3018a32b1c4fb7be5e02c8e72bf62 Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Mon, 23 Mar 2026 14:21:39 +0100 Subject: [PATCH 20/28] Fix silent fallbacks and added tests for such cases --- .../generators/dspy/chat/chat_generator.py | 31 +++++++++---- .../dspy/tests/test_chat_generator.py | 45 +++++++++++++++++++ 2 files changed, 68 insertions(+), 8 deletions(-) diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py index 18da4a6d4a..6ae3f0c22e 100644 --- a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py @@ -162,8 +162,12 @@ def _extract_last_user_message(messages: list[ChatMessage]) -> str: """Extract the text of the last user message from a list of chat messages.""" for msg in reversed(messages): if msg.role == ChatRole.USER: - return msg.text or "" - return messages[-1].text or "" + if not msg.text: + err = "The last user message has no text content." + raise ValueError(err) + return msg.text + err = "No user message found in 'messages'." + raise ValueError(err) @staticmethod def _serialize_signature(signature: str | type[dspy.Signature]) -> dict[str, str]: @@ -193,10 +197,13 @@ def _deserialize_signature(data: dict[str, str]) -> str | type[dspy.Signature]: if signature_type == "str": return value - class_path = value - module_path, class_name = class_path.rsplit(".", 1) - module = importlib.import_module(module_path) - return getattr(module, class_name) + if signature_type == "class": + module_path, class_name = value.rsplit(".", 1) + module = importlib.import_module(module_path) + return getattr(module, class_name) + + msg = f"Unknown signature type '{signature_type}'. Must be 'str' or 'class'." + raise ValueError(msg) def to_dict(self) -> dict[str, Any]: """Serialize this component to a dictionary.""" @@ -247,7 +254,11 @@ def run( prediction = self._module(**dspy_inputs, config=generation_kwargs or {}) - output_text = getattr(prediction, self.output_field, str(prediction)) + if not hasattr(prediction, self.output_field): + available = list(prediction.keys()) + msg = f"Output field '{self.output_field}' not found in prediction. Available fields: {available}" + raise ValueError(msg) + output_text = getattr(prediction, self.output_field) replies = [ChatMessage.from_assistant(text=output_text)] return {"replies": replies} @@ -278,7 +289,11 @@ async def run_async( prediction = await self._module.acall(**dspy_inputs, config=generation_kwargs or {}) - output_text = getattr(prediction, self.output_field, str(prediction)) + if not hasattr(prediction, self.output_field): + available = list(prediction.keys()) + msg = f"Output field '{self.output_field}' not found in prediction. Available fields: {available}" + raise ValueError(msg) + output_text = getattr(prediction, self.output_field) replies = [ChatMessage.from_assistant(text=output_text)] return {"replies": replies} diff --git a/integrations/dspy/tests/test_chat_generator.py b/integrations/dspy/tests/test_chat_generator.py index 247dae58a8..fd7d2e19ba 100644 --- a/integrations/dspy/tests/test_chat_generator.py +++ b/integrations/dspy/tests/test_chat_generator.py @@ -302,6 +302,23 @@ def test_from_dict_resolves_signature_class_path(self, mock_dspy_module): component = DSPySignatureChatGenerator.from_dict(data) assert component.signature is dspy.Signature + def test_from_dict_with_unknown_signature_type(self, mock_dspy_module): + """Test that from_dict raises an error for unknown signature types.""" + data = { + "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPySignatureChatGenerator", + "init_parameters": { + "signature": {"type": "unknown", "value": "question -> answer"}, + "model": "openai/gpt-5-mini", + "module_type": "Predict", + "output_field": "answer", + "generation_kwargs": {}, + "module_kwargs": {}, + "input_mapping": None, + }, + } + with pytest.raises(ValueError, match="Unknown signature type 'unknown'"): + DSPySignatureChatGenerator.from_dict(data) + def test_run(self, chat_messages, mock_dspy_module): component = DSPySignatureChatGenerator( signature="question -> answer", @@ -366,6 +383,34 @@ def test_run_with_empty_messages(self, mock_dspy_module): with pytest.raises(ValueError, match="messages"): component.run(messages=[]) + def test_run_with_no_user_message(self, mock_dspy_module): + component = DSPySignatureChatGenerator( + signature="question -> answer", + ) + messages = [ChatMessage.from_assistant("I'm an assistant")] + with pytest.raises(ValueError, match="No user message found"): + component.run(messages=messages) + + def test_run_with_empty_user_message(self, mock_dspy_module): + component = DSPySignatureChatGenerator( + signature="question -> answer", + ) + messages = [ChatMessage.from_user("")] + with pytest.raises(ValueError, match="no text content"): + component.run(messages=messages) + + def test_run_with_wrong_output_field(self, mock_dspy_module): + prediction = MagicMock(spec=["answer", "keys"]) + prediction.keys.return_value = ["answer"] + mock_dspy_module.return_value = prediction + component = DSPySignatureChatGenerator( + signature="question -> answer", + output_field="nonexistent", + ) + messages = [ChatMessage.from_user("Hello")] + with pytest.raises(ValueError, match="Output field 'nonexistent' not found"): + component.run(messages=messages) + def test_run_with_custom_output_field(self, mock_dspy_module): mock_dspy_module.return_value = MagicMock(summary="This is a summary") component = DSPySignatureChatGenerator( From 87bb606cfab74d50dd8e519e7c96aff2a6931f99 Mon Sep 17 00:00:00 2001 From: Stefano Fiorucci Date: Thu, 26 Mar 2026 11:10:25 +0100 Subject: [PATCH 21/28] Update .github/workflows/dspy.yml --- .github/workflows/dspy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dspy.yml b/.github/workflows/dspy.yml index 0f7e1c2c61..6b511a33e4 100644 --- a/.github/workflows/dspy.yml +++ b/.github/workflows/dspy.yml @@ -31,7 +31,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - python-version: ["3.10", "3.13"] + python-version: ["3.10", "3.14"] steps: - uses: actions/checkout@v6 From 237356e640ea915184a96baf5319df5c45885eef Mon Sep 17 00:00:00 2001 From: Stefano Fiorucci Date: Thu, 26 Mar 2026 11:25:11 +0100 Subject: [PATCH 22/28] Update .github/workflows/dspy.yml --- .github/workflows/dspy.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/dspy.yml b/.github/workflows/dspy.yml index 6b511a33e4..ae74cb2b33 100644 --- a/.github/workflows/dspy.yml +++ b/.github/workflows/dspy.yml @@ -18,6 +18,7 @@ concurrency: env: PYTHONUNBUFFERED: "1" FORCE_COLOR: "1" + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} defaults: run: From 7481d6dd3a2498fc59a9f4e2080943e18ba803e4 Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Wed, 1 Apr 2026 22:37:30 +0200 Subject: [PATCH 23/28] Removed signature serialization example --- .../signature_serialization_example.py | 90 ------------------- 1 file changed, 90 deletions(-) delete mode 100644 integrations/dspy/examples/signature_serialization_example.py diff --git a/integrations/dspy/examples/signature_serialization_example.py b/integrations/dspy/examples/signature_serialization_example.py deleted file mode 100644 index 4edb66ae32..0000000000 --- a/integrations/dspy/examples/signature_serialization_example.py +++ /dev/null @@ -1,90 +0,0 @@ -from typing import Literal - -import dspy -import pydantic -from haystack.dataclasses import ChatMessage - -from haystack_integrations.components.generators.dspy import DSPySignatureChatGenerator - - -class Source(pydantic.BaseModel): - """A single cited source.""" - - title: str - url: str - relevance: float - - -class StructuredAnswer(pydantic.BaseModel): - """A rich answer with metadata.""" - - summary: str - confidence: Literal["low", "medium", "high"] - sources: list[Source] - key_facts: list[str] - - -class ResearchSignature(dspy.Signature): - """Research a topic and return a structured, cited answer.""" - - question: str = dspy.InputField(desc="The research question") - context: str = dspy.InputField(desc="Background material or documents to ground the answer") - answer: StructuredAnswer = dspy.OutputField(desc="A structured answer with sources and confidence") - follow_up_questions: list[str] = dspy.OutputField(desc="Suggested follow-up questions for deeper research") - - -def print_signature_fields(sig): - """Pretty-print input/output fields and their types.""" - print(" Input fields:") - for name, field in sig.input_fields.items(): - annotation = field.annotation if hasattr(field, "annotation") else "str" - print(f" {name}: {annotation}") - print(" Output fields:") - for name, field in sig.output_fields.items(): - annotation = field.annotation if hasattr(field, "annotation") else "str" - print(f" {name}: {annotation}") - - -def main(): - generator = DSPySignatureChatGenerator( - model="openai/gpt-5-mini", - signature=ResearchSignature, - module_type="ChainOfThought", - output_field="answer", - input_mapping={"question": "question", "context": "context"}, - ) - - print("=== Original generator ===") - print(f" signature class: {generator.signature.__name__}") - print_signature_fields(generator.signature) - - data = generator.to_dict() - sig_value = data["init_parameters"]["signature"] - print("\n=== Serialized ===") - print(f" signature value: {sig_value}") - print(f" all init params: {list(data['init_parameters'].keys())}") - - restored = DSPySignatureChatGenerator.from_dict(data) - print("\n=== Restored generator ===") - print(f" signature class: {restored.signature.__name__}") - print(f" same class? : {restored.signature is ResearchSignature}") - print_signature_fields(restored.signature) - - messages = [ChatMessage.from_user("What are the main causes of coral reef bleaching?")] - result = restored.run( - messages=messages, - context=( - "Coral bleaching occurs when corals expel their symbiotic algae (zooxanthellae) " - "due to stress. Major stressors include elevated sea surface temperatures, ocean " - "acidification from increased CO2 absorption, pollution runoff, and overexposure " - "to sunlight. The Great Barrier Reef experienced mass bleaching events in 2016, " - "2017, 2020, and 2022, primarily driven by marine heatwaves." - ), - ) - print("\n=== Inference ===") - print(f" Question: {messages[0].text}") - print(f" Answer : {result['replies'][0].text}") - - -if __name__ == "__main__": - main() From ba3c490af9d99d3bf8bda355baa7a500d57ed697 Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Wed, 1 Apr 2026 22:39:04 +0200 Subject: [PATCH 24/28] Added ruff rules related to docstrings --- integrations/dspy/pyproject.toml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/integrations/dspy/pyproject.toml b/integrations/dspy/pyproject.toml index 23d90bdf9b..ff9a23bebc 100644 --- a/integrations/dspy/pyproject.toml +++ b/integrations/dspy/pyproject.toml @@ -92,6 +92,13 @@ select = [ "ARG", "B", "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty "DTZ", "E", "EM", From 8075a22cf1778003694eb56fd6e77b15818e7f3d Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Thu, 2 Apr 2026 10:03:25 +0200 Subject: [PATCH 25/28] Fix dspy version and avoid malicious litellm versions --- integrations/dspy/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/dspy/pyproject.toml b/integrations/dspy/pyproject.toml index ff9a23bebc..50b9e7299a 100644 --- a/integrations/dspy/pyproject.toml +++ b/integrations/dspy/pyproject.toml @@ -22,7 +22,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -dependencies = ["haystack-ai>=2.22.0", "dspy>=3.0.0"] +dependencies = ["haystack-ai>=2.22.0", "dspy>=3.1.3", "litellm!=1.82.7,!=1.82.8"] [project.urls] Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/dspy#readme" From d5d9ebaef47e4918ed01f1e1f6d130bf7dddcaff Mon Sep 17 00:00:00 2001 From: Arsenii Shkunkov Date: Thu, 2 Apr 2026 10:32:06 +0200 Subject: [PATCH 26/28] Added "D102", "D103" to the test file ignores --- integrations/dspy/pyproject.toml | 2 +- .../components/generators/dspy/chat/chat_generator.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/integrations/dspy/pyproject.toml b/integrations/dspy/pyproject.toml index 50b9e7299a..f127326e73 100644 --- a/integrations/dspy/pyproject.toml +++ b/integrations/dspy/pyproject.toml @@ -140,7 +140,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports -"tests/**/*" = ["PLR2004", "S101", "TID252", "ARG002"] +"tests/**/*" = ["PLR2004", "S101", "TID252", "ARG002", "D102", "D103"] # Examples can print their output "examples/**" = ["T201"] diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py index 6ae3f0c22e..f7ea6f612f 100644 --- a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py @@ -47,8 +47,7 @@ def _get_dspy_module_class(module_type: str) -> type: @component class DSPySignatureChatGenerator: """ - A Haystack chat generator component that uses DSPy signatures and modules - for structured generation. + A Haystack chat generator component that uses DSPy signatures and modules for structured generation. Accepts and returns `ChatMessage` objects, making it compatible with Haystack chat pipelines. From 4004d6c8b9ee2a0cf2b8e463080aa80eb6016a00 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Thu, 2 Apr 2026 11:34:44 +0200 Subject: [PATCH 27/28] test coverage support --- .github/workflows/CI_coverage_comment.yml | 1 + .github/workflows/dspy.yml | 83 +++++++++++++++++++---- README.md | 1 + integrations/dspy/README.md | 2 + integrations/dspy/pyproject.toml | 4 +- 5 files changed, 76 insertions(+), 15 deletions(-) diff --git a/.github/workflows/CI_coverage_comment.yml b/.github/workflows/CI_coverage_comment.yml index c786bf6698..f4b83385a5 100644 --- a/.github/workflows/CI_coverage_comment.yml +++ b/.github/workflows/CI_coverage_comment.yml @@ -15,6 +15,7 @@ on: - "Test / cohere" - "Test / cometapi" - "Test / deepeval" + - "Test / dspy" - "Test / elasticsearch" - "Test / faiss" - "Test / fastembed" diff --git a/.github/workflows/dspy.yml b/.github/workflows/dspy.yml index ae74cb2b33..b246bcfc59 100644 --- a/.github/workflows/dspy.yml +++ b/.github/workflows/dspy.yml @@ -10,35 +10,67 @@ on: - "integrations/dspy/**" - "!integrations/dspy/*.md" - ".github/workflows/dspy.yml" + push: + branches: + - main + paths: + - "integrations/dspy/**" + - "!integrations/dspy/*.md" + - ".github/workflows/dspy.yml" + +defaults: + run: + working-directory: integrations/dspy concurrency: - group: dspy-${{ github.head_ref }} + group: dspy-${{ github.head_ref || github.sha }} cancel-in-progress: true env: PYTHONUNBUFFERED: "1" FORCE_COLOR: "1" - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - -defaults: - run: - working-directory: integrations/dspy + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + TEST_MATRIX_OS: '["ubuntu-latest", "windows-latest", "macos-latest"]' + TEST_MATRIX_PYTHON: '["3.10", "3.14"]' jobs: + compute-test-matrix: + runs-on: ubuntu-slim + defaults: + run: + working-directory: . + outputs: + os: ${{ steps.set.outputs.os }} + python-version: ${{ steps.set.outputs.python-version }} + steps: + - id: set + run: | + echo 'os=${{ github.event_name == 'push' && '["ubuntu-latest"]' || env.TEST_MATRIX_OS }}' >> "$GITHUB_OUTPUT" + echo 'python-version=${{ github.event_name == 'push' && '["3.10"]' || env.TEST_MATRIX_PYTHON }}' >> "$GITHUB_OUTPUT" + run: name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }} + needs: compute-test-matrix runs-on: ${{ matrix.os }} + permissions: + contents: write + pull-requests: write strategy: fail-fast: false matrix: - os: [ubuntu-latest] - python-version: ["3.10", "3.14"] + os: ${{ fromJSON(needs.compute-test-matrix.outputs.os) }} + python-version: ${{ fromJSON(needs.compute-test-matrix.outputs.python-version) }} steps: - - uses: actions/checkout@v6 + - name: Support longpaths + if: matrix.os == 'windows-latest' + working-directory: . + run: git config --system core.longpaths true + + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v6 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: ${{ matrix.python-version }} @@ -46,13 +78,36 @@ jobs: run: pip install hatch - name: Lint - if: matrix.python-version == '3.10' && runner.os == 'Linux' + if: github.event_name != 'push' && matrix.python-version == '3.10' && runner.os == 'Linux' run: hatch run fmt-check && hatch run test:types - - name: Run tests - run: hatch run test:cov-retry + - name: Run unit tests + run: hatch run test:unit-cov-retry + + # On PR: generates coverage comment artifact. On push to main: stores coverage baseline on data branch. + - name: Store unit tests coverage + if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name != 'schedule' + uses: py-cov-action/python-coverage-comment-action@7188638f871f721a365d644f505d1ff3df20d683 # v3.40 + with: + GITHUB_TOKEN: ${{ github.token }} + COVERAGE_PATH: integrations/dspy + SUBPROJECT_ID: dspy + COMMENT_ARTIFACT_NAME: coverage-comment-dspy + + - name: Run integration tests + run: hatch run test:integration-cov-append-retry + + - name: Store combined coverage + if: github.event_name == 'push' + uses: py-cov-action/python-coverage-comment-action@7188638f871f721a365d644f505d1ff3df20d683 # v3.40 + with: + GITHUB_TOKEN: ${{ github.token }} + COVERAGE_PATH: integrations/dspy + SUBPROJECT_ID: dspy-combined + COMMENT_ARTIFACT_NAME: coverage-comment-dspy-combined - name: Run unit tests with lowest direct dependencies + if: github.event_name != 'push' run: | hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt hatch -e test env run -- uv pip install -r requirements_lowest_direct.txt @@ -70,6 +125,6 @@ jobs: if: failure() && github.event_name == 'schedule' runs-on: ubuntu-slim steps: - - uses: deepset-ai/notify-slack-action@v1 + - uses: deepset-ai/notify-slack-action@3cda73b77a148f16f703274198e7771340cf862b # v1 with: slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL_NOTIFICATIONS }} diff --git a/README.md b/README.md index 609955c351..f52e01b852 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta | [cohere-haystack](integrations/cohere/) | Embedder, Generator, Ranker | [![PyPI - Version](https://img.shields.io/pypi/v/cohere-haystack.svg)](https://pypi.org/project/cohere-haystack) | [![Test / cohere](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/cohere.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/cohere.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-cohere/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-cohere/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-cohere-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-cohere-combined/htmlcov/index.html) | | [cometapi-haystack](integrations/cometapi/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/cometapi-haystack.svg)](https://pypi.org/project/cometapi-haystack) | [![Test / cometapi](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/cometapi.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/cometapi.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-cometapi/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-cometapi/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-cometapi-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-cometapi-combined/htmlcov/index.html) | | [deepeval-haystack](integrations/deepeval/) | Evaluator | [![PyPI - Version](https://img.shields.io/pypi/v/deepeval-haystack.svg)](https://pypi.org/project/deepeval-haystack) | [![Test / deepeval](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/deepeval.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/deepeval.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-deepeval/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-deepeval/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-deepeval-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-deepeval-combined/htmlcov/index.html) | +| [dspy-haystack](integrations/dspy/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/dspy-haystack.svg)](https://pypi.org/project/dspy-haystack) | [![Test / dspy](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/dspy.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/dspy.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-dspy/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-dspy/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-dspy-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-dspy-combined/htmlcov/index.html) | | [elasticsearch-haystack](integrations/elasticsearch/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/elasticsearch-haystack.svg)](https://pypi.org/project/elasticsearch-haystack) | [![Test / elasticsearch](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/elasticsearch.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/elasticsearch.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-elasticsearch/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-elasticsearch/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-elasticsearch-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-elasticsearch-combined/htmlcov/index.html) | | [faiss-haystack](integrations/faiss/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/faiss-haystack.svg)](https://pypi.org/project/faiss-haystack) | [![Test / faiss](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/faiss.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/faiss.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-faiss/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-faiss/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-faiss-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-faiss-combined/htmlcov/index.html) | | [fastembed-haystack](integrations/fastembed/) | Embedder, Ranker | [![PyPI - Version](https://img.shields.io/pypi/v/fastembed-haystack.svg)](https://pypi.org/project/fastembed-haystack/) | [![Test / fastembed](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/fastembed.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/fastembed.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-fastembed/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-fastembed/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-fastembed-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-fastembed-combined/htmlcov/index.html) | diff --git a/integrations/dspy/README.md b/integrations/dspy/README.md index 7f0b8051fc..9cec6ccd63 100644 --- a/integrations/dspy/README.md +++ b/integrations/dspy/README.md @@ -8,3 +8,5 @@ ## Contributing Refer to the general [Contribution Guidelines](https://github.com/deepset-ai/haystack-core-integrations/blob/main/CONTRIBUTING.md). + +To run integration tests locally, you need to export the `OPENAI_API_KEY` environment variable. \ No newline at end of file diff --git a/integrations/dspy/pyproject.toml b/integrations/dspy/pyproject.toml index 23d90bdf9b..2ae916b068 100644 --- a/integrations/dspy/pyproject.toml +++ b/integrations/dspy/pyproject.toml @@ -63,7 +63,8 @@ dependencies = [ unit = 'pytest -m "not integration" {args:tests}' integration = 'pytest -m "integration" {args:tests}' all = 'pytest {args:tests}' -cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x {args:tests}' +unit-cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x -m "not integration" {args:tests}' +integration-cov-append-retry = 'pytest --cov=haystack_integrations --cov-append --reruns 3 --reruns-delay 30 -x -m "integration" {args:tests}' types = "mypy -p haystack_integrations.components.generators.dspy {args}" @@ -141,6 +142,7 @@ ban-relative-imports = "parents" source = ["haystack_integrations"] branch = true parallel = false +relative_files = true [tool.coverage.report] omit = ["*/tests/*", "*/__init__.py"] From 1e52b707b2e08f01323e1df3da0421b3aae8ed82 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Thu, 2 Apr 2026 11:49:18 +0200 Subject: [PATCH 28/28] llm set on the module level, not global --- .../generators/dspy/chat/chat_generator.py | 11 ++++---- .../dspy/tests/test_chat_generator.py | 26 +++++++------------ .../dspy/tests/test_chat_generator_async.py | 1 - 3 files changed, 15 insertions(+), 23 deletions(-) diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py index f7ea6f612f..26464a42b8 100644 --- a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py @@ -8,9 +8,9 @@ VALID_MODULE_TYPES = {"Predict", "ChainOfThought", "ReAct"} -def _configure_dspy_lm(model: str, api_base: str | None = None, **kwargs: Any) -> dspy.LM: +def _create_dspy_lm(model: str, api_base: str | None = None, **kwargs: Any) -> dspy.LM: """ - Create and configure a DSPy language model. + Create a DSPy language model instance. :param model: Model identifier (e.g. `"openai/gpt-5-mini"`). :param api_base: Optional base URL for the API (useful for local models). @@ -20,9 +20,7 @@ def _configure_dspy_lm(model: str, api_base: str | None = None, **kwargs: Any) - lm_kwargs: dict[str, Any] = {"model": model, **kwargs} if api_base is not None: lm_kwargs["api_base"] = api_base - lm = dspy.LM(**lm_kwargs) - dspy.configure(lm=lm) - return lm + return dspy.LM(**lm_kwargs) def _get_dspy_module_class(module_type: str) -> type: @@ -120,7 +118,7 @@ def __init__( self.module_kwargs = module_kwargs or {} self.input_mapping = input_mapping - self._lm = _configure_dspy_lm( + self._lm = _create_dspy_lm( model=self.model, api_base=self.api_base, **self.generation_kwargs, @@ -128,6 +126,7 @@ def __init__( module_class = _get_dspy_module_class(self.module_type) self._module = module_class(self.signature, **self.module_kwargs) + self._module.set_lm(self._lm) def _build_dspy_inputs(self, prompt: str, **kwargs: Any) -> dict[str, Any]: """Build the input dict for the DSPy module call.""" diff --git a/integrations/dspy/tests/test_chat_generator.py b/integrations/dspy/tests/test_chat_generator.py index fd7d2e19ba..477b9ff9c9 100644 --- a/integrations/dspy/tests/test_chat_generator.py +++ b/integrations/dspy/tests/test_chat_generator.py @@ -8,7 +8,7 @@ from haystack_integrations.components.generators.dspy.chat.chat_generator import ( VALID_MODULE_TYPES, DSPySignatureChatGenerator, - _configure_dspy_lm, + _create_dspy_lm, _get_dspy_module_class, ) @@ -20,7 +20,6 @@ def mock_dspy_module(): """ with ( patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.LM") as mock_lm_class, - patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.configure"), patch( "haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.ChainOfThought" ) as mock_cot_class, @@ -82,46 +81,41 @@ def test_invalid_type_lists_valid_options(self): _get_dspy_module_class("BadType") -class TestConfigureDspyLm: - @patch("dspy.configure") +class TestCreateDspyLm: @patch("dspy.LM") - def test_creates_lm_and_configures(self, mock_lm_class, mock_configure): + def test_creates_lm(self, mock_lm_class): mock_lm = MagicMock() mock_lm_class.return_value = mock_lm - result = _configure_dspy_lm(model="openai/gpt-5-mini") + result = _create_dspy_lm(model="openai/gpt-5-mini") mock_lm_class.assert_called_once_with(model="openai/gpt-5-mini") - mock_configure.assert_called_once_with(lm=mock_lm) assert result is mock_lm - @patch("dspy.configure") @patch("dspy.LM") - def test_passes_extra_kwargs(self, mock_lm_class, mock_configure): + def test_passes_extra_kwargs(self, mock_lm_class): mock_lm = MagicMock() mock_lm_class.return_value = mock_lm - _configure_dspy_lm(model="openai/gpt-5-mini", temperature=0.7, max_tokens=100) + _create_dspy_lm(model="openai/gpt-5-mini", temperature=0.7, max_tokens=100) mock_lm_class.assert_called_once_with(model="openai/gpt-5-mini", temperature=0.7, max_tokens=100) - @patch("dspy.configure") @patch("dspy.LM") - def test_passes_api_base(self, mock_lm_class, mock_configure): + def test_passes_api_base(self, mock_lm_class): mock_lm = MagicMock() mock_lm_class.return_value = mock_lm - _configure_dspy_lm(model="openai/local-model", api_base="http://localhost:8000") + _create_dspy_lm(model="openai/local-model", api_base="http://localhost:8000") mock_lm_class.assert_called_once_with(model="openai/local-model", api_base="http://localhost:8000") - @patch("dspy.configure") @patch("dspy.LM") - def test_omits_api_base_when_none(self, mock_lm_class, mock_configure): + def test_omits_api_base_when_none(self, mock_lm_class): mock_lm = MagicMock() mock_lm_class.return_value = mock_lm - _configure_dspy_lm(model="openai/gpt-5-mini") + _create_dspy_lm(model="openai/gpt-5-mini") mock_lm_class.assert_called_once_with(model="openai/gpt-5-mini") diff --git a/integrations/dspy/tests/test_chat_generator_async.py b/integrations/dspy/tests/test_chat_generator_async.py index 48848da4b3..e83a8b1255 100644 --- a/integrations/dspy/tests/test_chat_generator_async.py +++ b/integrations/dspy/tests/test_chat_generator_async.py @@ -13,7 +13,6 @@ def mock_dspy_module(): """ with ( patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.LM") as mock_lm_class, - patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.configure"), patch( "haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.ChainOfThought" ) as mock_cot_class,