diff --git a/.github/workflows/CI_coverage_comment.yml b/.github/workflows/CI_coverage_comment.yml index c786bf6698..f4b83385a5 100644 --- a/.github/workflows/CI_coverage_comment.yml +++ b/.github/workflows/CI_coverage_comment.yml @@ -15,6 +15,7 @@ on: - "Test / cohere" - "Test / cometapi" - "Test / deepeval" + - "Test / dspy" - "Test / elasticsearch" - "Test / faiss" - "Test / fastembed" diff --git a/.github/workflows/dspy.yml b/.github/workflows/dspy.yml new file mode 100644 index 0000000000..b246bcfc59 --- /dev/null +++ b/.github/workflows/dspy.yml @@ -0,0 +1,130 @@ +# This workflow comes from https://github.com/ofek/hatch-mypyc +# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml +name: Test / dspy + +on: + schedule: + - cron: "0 0 * * *" + pull_request: + paths: + - "integrations/dspy/**" + - "!integrations/dspy/*.md" + - ".github/workflows/dspy.yml" + push: + branches: + - main + paths: + - "integrations/dspy/**" + - "!integrations/dspy/*.md" + - ".github/workflows/dspy.yml" + +defaults: + run: + working-directory: integrations/dspy + +concurrency: + group: dspy-${{ github.head_ref || github.sha }} + cancel-in-progress: true + +env: + PYTHONUNBUFFERED: "1" + FORCE_COLOR: "1" + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + TEST_MATRIX_OS: '["ubuntu-latest", "windows-latest", "macos-latest"]' + TEST_MATRIX_PYTHON: '["3.10", "3.14"]' + +jobs: + compute-test-matrix: + runs-on: ubuntu-slim + defaults: + run: + working-directory: . + outputs: + os: ${{ steps.set.outputs.os }} + python-version: ${{ steps.set.outputs.python-version }} + steps: + - id: set + run: | + echo 'os=${{ github.event_name == 'push' && '["ubuntu-latest"]' || env.TEST_MATRIX_OS }}' >> "$GITHUB_OUTPUT" + echo 'python-version=${{ github.event_name == 'push' && '["3.10"]' || env.TEST_MATRIX_PYTHON }}' >> "$GITHUB_OUTPUT" + + run: + name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }} + needs: compute-test-matrix + runs-on: ${{ matrix.os }} + permissions: + contents: write + pull-requests: write + strategy: + fail-fast: false + matrix: + os: ${{ fromJSON(needs.compute-test-matrix.outputs.os) }} + python-version: ${{ fromJSON(needs.compute-test-matrix.outputs.python-version) }} + + steps: + - name: Support longpaths + if: matrix.os == 'windows-latest' + working-directory: . + run: git config --system core.longpaths true + + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Hatch + run: pip install hatch + + - name: Lint + if: github.event_name != 'push' && matrix.python-version == '3.10' && runner.os == 'Linux' + run: hatch run fmt-check && hatch run test:types + + - name: Run unit tests + run: hatch run test:unit-cov-retry + + # On PR: generates coverage comment artifact. On push to main: stores coverage baseline on data branch. + - name: Store unit tests coverage + if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name != 'schedule' + uses: py-cov-action/python-coverage-comment-action@7188638f871f721a365d644f505d1ff3df20d683 # v3.40 + with: + GITHUB_TOKEN: ${{ github.token }} + COVERAGE_PATH: integrations/dspy + SUBPROJECT_ID: dspy + COMMENT_ARTIFACT_NAME: coverage-comment-dspy + + - name: Run integration tests + run: hatch run test:integration-cov-append-retry + + - name: Store combined coverage + if: github.event_name == 'push' + uses: py-cov-action/python-coverage-comment-action@7188638f871f721a365d644f505d1ff3df20d683 # v3.40 + with: + GITHUB_TOKEN: ${{ github.token }} + COVERAGE_PATH: integrations/dspy + SUBPROJECT_ID: dspy-combined + COMMENT_ARTIFACT_NAME: coverage-comment-dspy-combined + + - name: Run unit tests with lowest direct dependencies + if: github.event_name != 'push' + run: | + hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt + hatch -e test env run -- uv pip install -r requirements_lowest_direct.txt + hatch run test:unit + + - name: Nightly - run unit tests with Haystack main branch + if: github.event_name == 'schedule' + run: | + hatch env prune + hatch -e test env run -- uv pip install git+https://github.com/deepset-ai/haystack.git@main + hatch run test:unit + + notify-slack-on-failure: + needs: run + if: failure() && github.event_name == 'schedule' + runs-on: ubuntu-slim + steps: + - uses: deepset-ai/notify-slack-action@3cda73b77a148f16f703274198e7771340cf862b # v1 + with: + slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL_NOTIFICATIONS }} diff --git a/README.md b/README.md index 609955c351..f52e01b852 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta | [cohere-haystack](integrations/cohere/) | Embedder, Generator, Ranker | [![PyPI - Version](https://img.shields.io/pypi/v/cohere-haystack.svg)](https://pypi.org/project/cohere-haystack) | [![Test / cohere](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/cohere.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/cohere.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-cohere/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-cohere/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-cohere-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-cohere-combined/htmlcov/index.html) | | [cometapi-haystack](integrations/cometapi/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/cometapi-haystack.svg)](https://pypi.org/project/cometapi-haystack) | [![Test / cometapi](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/cometapi.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/cometapi.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-cometapi/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-cometapi/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-cometapi-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-cometapi-combined/htmlcov/index.html) | | [deepeval-haystack](integrations/deepeval/) | Evaluator | [![PyPI - Version](https://img.shields.io/pypi/v/deepeval-haystack.svg)](https://pypi.org/project/deepeval-haystack) | [![Test / deepeval](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/deepeval.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/deepeval.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-deepeval/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-deepeval/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-deepeval-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-deepeval-combined/htmlcov/index.html) | +| [dspy-haystack](integrations/dspy/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/dspy-haystack.svg)](https://pypi.org/project/dspy-haystack) | [![Test / dspy](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/dspy.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/dspy.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-dspy/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-dspy/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-dspy-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-dspy-combined/htmlcov/index.html) | | [elasticsearch-haystack](integrations/elasticsearch/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/elasticsearch-haystack.svg)](https://pypi.org/project/elasticsearch-haystack) | [![Test / elasticsearch](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/elasticsearch.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/elasticsearch.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-elasticsearch/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-elasticsearch/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-elasticsearch-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-elasticsearch-combined/htmlcov/index.html) | | [faiss-haystack](integrations/faiss/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/faiss-haystack.svg)](https://pypi.org/project/faiss-haystack) | [![Test / faiss](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/faiss.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/faiss.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-faiss/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-faiss/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-faiss-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-faiss-combined/htmlcov/index.html) | | [fastembed-haystack](integrations/fastembed/) | Embedder, Ranker | [![PyPI - Version](https://img.shields.io/pypi/v/fastembed-haystack.svg)](https://pypi.org/project/fastembed-haystack/) | [![Test / fastembed](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/fastembed.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/fastembed.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-fastembed/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-fastembed/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-fastembed-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-fastembed-combined/htmlcov/index.html) | diff --git a/integrations/dspy/LICENSE.txt b/integrations/dspy/LICENSE.txt new file mode 100644 index 0000000000..3d4485bce6 --- /dev/null +++ b/integrations/dspy/LICENSE.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2024-present deepset GmbH + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/integrations/dspy/README.md b/integrations/dspy/README.md new file mode 100644 index 0000000000..9cec6ccd63 --- /dev/null +++ b/integrations/dspy/README.md @@ -0,0 +1,12 @@ +# dspy-haystack + +[![PyPI - Version](https://img.shields.io/pypi/v/dspy-haystack.svg)](https://pypi.org/project/dspy-haystack) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/dspy-haystack.svg)](https://pypi.org/project/dspy-haystack) + +--- + +## Contributing + +Refer to the general [Contribution Guidelines](https://github.com/deepset-ai/haystack-core-integrations/blob/main/CONTRIBUTING.md). + +To run integration tests locally, you need to export the `OPENAI_API_KEY` environment variable. \ No newline at end of file diff --git a/integrations/dspy/examples/chat_generator_example.py b/integrations/dspy/examples/chat_generator_example.py new file mode 100644 index 0000000000..66af376a21 --- /dev/null +++ b/integrations/dspy/examples/chat_generator_example.py @@ -0,0 +1,56 @@ +import dspy +from haystack import Pipeline +from haystack.dataclasses import ChatMessage + +from haystack_integrations.components.generators.dspy import DSPySignatureChatGenerator + + +class QASignature(dspy.Signature): + """Answer questions accurately and concisely.""" + + question = dspy.InputField(desc="The user's question") + answer = dspy.OutputField(desc="A clear, concise answer") + + +def basic_qa_example(): + """Simple question-answering with Chain-of-Thought reasoning.""" + + generator = DSPySignatureChatGenerator( + model="openai/gpt-5-mini", + signature=QASignature, + module_type="ChainOfThought", + output_field="answer", + ) + + pipeline = Pipeline() + pipeline.add_component("llm", generator) + + messages = [ChatMessage.from_user("What causes rainbows to appear?")] + result = pipeline.run({"llm": {"messages": messages}}) + + print(f"Question: {messages[0].text}") + print(f"Answer: {result['llm']['replies'][0].text}\n") + + +def string_signature_example(): + """Using a simple string signature instead of a class.""" + generator = DSPySignatureChatGenerator( + model="openai/gpt-5-mini", + signature="question -> answer", + module_type="Predict", + output_field="answer", + ) + + pipeline = Pipeline() + pipeline.add_component("llm", generator) + + messages = [ChatMessage.from_user("What is the capital of Japan?")] + result = pipeline.run({"llm": {"messages": messages}}) + + print(f"Question: {messages[0].text}") + print(f"Answer: {result['llm']['replies'][0].text}\n") + + +if __name__ == "__main__": + basic_qa_example() + string_signature_example() diff --git a/integrations/dspy/examples/react_agent_example.py b/integrations/dspy/examples/react_agent_example.py new file mode 100644 index 0000000000..8cd98b989b --- /dev/null +++ b/integrations/dspy/examples/react_agent_example.py @@ -0,0 +1,76 @@ +import dspy +from haystack import Pipeline +from haystack.dataclasses import ChatMessage + +from haystack_integrations.components.generators.dspy import DSPySignatureChatGenerator + + +def get_weather(city: str) -> str: + """Return the current weather for a city (stub).""" + weather_data = { + "paris": "15°C, partly cloudy", + "tokyo": "22°C, sunny", + "new york": "8°C, rainy", + } + return weather_data.get(city.lower(), f"No weather data available for {city}") + + +def get_population(city: str) -> str: + """Return the population of a city (stub).""" + population_data = { + "paris": "2.1 million (city proper), 12.4 million (metro)", + "tokyo": "13.9 million (city proper), 37.4 million (metro)", + "new york": "8.3 million (city proper), 19.8 million (metro)", + } + return population_data.get(city.lower(), f"No population data available for {city}") + + +class CityInfoSignature(dspy.Signature): + """Answer questions about cities using available tools.""" + + question = dspy.InputField(desc="A question about a city") + answer = dspy.OutputField(desc="A detailed answer based on tool results") + + +def react_agent_example(): + """Use ReAct to answer a question that requires tool calls.""" + + generator = DSPySignatureChatGenerator( + model="openai/gpt-5-mini", + signature=CityInfoSignature, + module_type="ReAct", + output_field="answer", + module_kwargs={"tools": [get_weather, get_population]}, + ) + + pipeline = Pipeline() + pipeline.add_component("agent", generator) + + messages = [ChatMessage.from_user("What is the weather and population of Tokyo?")] + result = pipeline.run({"agent": {"messages": messages}}) + + print(f"Question: {messages[0].text}") + print(f"Answer : {result['agent']['replies'][0].text}\n") + + +def react_string_signature_example(): + """ReAct with a string signature and tools.""" + + generator = DSPySignatureChatGenerator( + model="openai/gpt-5-mini", + signature="question -> answer", + module_type="ReAct", + output_field="answer", + module_kwargs={"tools": [get_weather]}, + ) + + messages = [ChatMessage.from_user("What's the weather like in Paris?")] + result = generator.run(messages=messages) + + print(f"Question: {messages[0].text}") + print(f"Answer : {result['replies'][0].text}\n") + + +if __name__ == "__main__": + react_agent_example() + react_string_signature_example() diff --git a/integrations/dspy/pydoc/config_docusaurus.yml b/integrations/dspy/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..13c624885f --- /dev/null +++ b/integrations/dspy/pydoc/config_docusaurus.yml @@ -0,0 +1,13 @@ +loaders: + - modules: + - haystack_integrations.components.generators.dspy.chat.chat_generator + search_path: [../src] +processors: + - type: filter + documented_only: true + skip_empty_modules: true +renderer: + description: DSPy integration for Haystack + id: integrations-dspy + filename: dspy.md + title: DSPy diff --git a/integrations/dspy/pyproject.toml b/integrations/dspy/pyproject.toml new file mode 100644 index 0000000000..cae40fd3bb --- /dev/null +++ b/integrations/dspy/pyproject.toml @@ -0,0 +1,164 @@ +[build-system] +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" + +[project] +name = "dspy-haystack" +dynamic = ["version"] +description = 'An integration between DSPy and Haystack for prompt optimization and structured generation' +readme = "README.md" +requires-python = ">=3.10" +license = "Apache-2.0" +keywords = ["haystack", "dspy", "prompt-optimization", "llm", "nlp"] +authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }, { name = "Arsenii Shkunkov", email = "shkunkov.as@gmail.com" }] +classifiers = [ + "License :: OSI Approved :: Apache Software License", + "Development Status :: 4 - Beta", + "Programming Language :: Python", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dependencies = ["haystack-ai>=2.22.0", "dspy>=3.1.3", "litellm!=1.82.7,!=1.82.8"] + +[project.urls] +Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/dspy#readme" +Issues = "https://github.com/deepset-ai/haystack-core-integrations/issues" +Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/dspy" + +[tool.hatch.build.targets.wheel] +packages = ["src/haystack_integrations"] + +[tool.hatch.version] +source = "vcs" +tag-pattern = 'integrations\/dspy-v(?P.*)' + +[tool.hatch.version.raw-options] +root = "../.." +git_describe_command = 'git describe --tags --match="integrations/dspy-v[0-9]*"' + +[tool.hatch.envs.default] +installer = "uv" +dependencies = ["haystack-pydoc-tools", "ruff"] + +[tool.hatch.envs.default.scripts] +docs = ["haystack-pydoc pydoc/config_docusaurus.yml"] +fmt = "ruff check --fix {args}; ruff format {args}" +fmt-check = "ruff check {args} && ruff format --check {args}" + +[tool.hatch.envs.test] +dependencies = [ + "pytest", + "pytest-asyncio", + "pytest-cov", + "pytest-rerunfailures", + "mypy", + "pip", +] + +[tool.hatch.envs.test.scripts] +unit = 'pytest -m "not integration" {args:tests}' +integration = 'pytest -m "integration" {args:tests}' +all = 'pytest {args:tests}' +unit-cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x -m "not integration" {args:tests}' +integration-cov-append-retry = 'pytest --cov=haystack_integrations --cov-append --reruns 3 --reruns-delay 30 -x -m "integration" {args:tests}' + +types = "mypy -p haystack_integrations.components.generators.dspy {args}" + +[tool.mypy] +install_types = true +non_interactive = true +check_untyped_defs = true +disallow_incomplete_defs = true + +[[tool.mypy.overrides]] +module = "dspy.*" +ignore_missing_imports = true + +[tool.hatch.metadata] +allow-direct-references = true + +[tool.ruff.lint.isort] +known-first-party = ["haystack_integrations"] + +[tool.ruff] +line-length = 120 + +[tool.ruff.lint] +select = [ + "A", + "ARG", + "B", + "C", + "D102", # Missing docstring in public method + "D103", # Missing docstring in public function + "D205", # 1 blank line required between summary line and description + "D209", # Closing triple quotes go to new line + "D213", # summary lines must be positioned on the second physical line of the docstring + "D417", # Missing argument descriptions in the docstring + "D419", # Docstring is empty + "DTZ", + "E", + "EM", + "F", + "I", + "ICN", + "ISC", + "N", + "PLC", + "PLE", + "PLR", + "PLW", + "Q", + "RUF", + "S", + "T", + "TID", + "UP", + "W", + "YTT", +] +ignore = [ + # Allow non-abstract empty methods in abstract base classes + "B027", + # Ignore checks for possible passwords + "S105", + "S106", + "S107", + # Ignore complexity + "C901", + "PLR0911", + "PLR0912", + "PLR0913", + "PLR0915", +] + +[tool.ruff.lint.flake8-tidy-imports] +ban-relative-imports = "parents" + +[tool.ruff.lint.per-file-ignores] +# Tests can use magic values, assertions, and relative imports +"tests/**/*" = ["PLR2004", "S101", "TID252", "ARG002", "D102", "D103"] +# Examples can print their output +"examples/**" = ["T201"] + +[tool.coverage.run] +source = ["haystack_integrations"] +branch = true +parallel = false +relative_files = true + +[tool.coverage.report] +omit = ["*/tests/*", "*/__init__.py"] +show_missing = true +exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] + +[tool.pytest.ini_options] +markers = ["integration: integration tests"] +log_cli = true +addopts = ["--strict-markers"] +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "class" diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/__init__.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/__init__.py new file mode 100644 index 0000000000..96d6a9dc81 --- /dev/null +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/__init__.py @@ -0,0 +1,3 @@ +from .chat.chat_generator import DSPySignatureChatGenerator + +__all__ = ["DSPySignatureChatGenerator"] diff --git a/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py new file mode 100644 index 0000000000..26464a42b8 --- /dev/null +++ b/integrations/dspy/src/haystack_integrations/components/generators/dspy/chat/chat_generator.py @@ -0,0 +1,297 @@ +import importlib +from typing import Any + +import dspy +from haystack import component, default_from_dict, default_to_dict +from haystack.dataclasses import ChatMessage, ChatRole + +VALID_MODULE_TYPES = {"Predict", "ChainOfThought", "ReAct"} + + +def _create_dspy_lm(model: str, api_base: str | None = None, **kwargs: Any) -> dspy.LM: + """ + Create a DSPy language model instance. + + :param model: Model identifier (e.g. `"openai/gpt-5-mini"`). + :param api_base: Optional base URL for the API (useful for local models). + :param kwargs: Additional keyword arguments passed to `dspy.LM`. + :returns: The configured `dspy.LM` instance. + """ + lm_kwargs: dict[str, Any] = {"model": model, **kwargs} + if api_base is not None: + lm_kwargs["api_base"] = api_base + return dspy.LM(**lm_kwargs) + + +def _get_dspy_module_class(module_type: str) -> type: + """ + Map a module type string to the corresponding DSPy module class. + + :param module_type: One of `"Predict"`, `"ChainOfThought"`, or `"ReAct"`. + :returns: The DSPy module class. + :raises ValueError: If the module type is not recognized. + """ + mapping = { + "Predict": dspy.Predict, + "ChainOfThought": dspy.ChainOfThought, + "ReAct": dspy.ReAct, + } + if module_type not in mapping: + msg = f"Invalid module_type '{module_type}'. Must be one of {sorted(VALID_MODULE_TYPES)}" + raise ValueError(msg) + return mapping[module_type] + + +@component +class DSPySignatureChatGenerator: + """ + A Haystack chat generator component that uses DSPy signatures and modules for structured generation. + + Accepts and returns `ChatMessage` objects, making it compatible with + Haystack chat pipelines. + + The API key is read automatically from environment variables by DSPy/litellm + (e.g. `OPENAI_API_KEY`). Use `api_base` for local or self-hosted models. + + Usage example: + + ```python + from haystack.dataclasses import ChatMessage + from haystack_integrations.components.generators.dspy import DSPySignatureChatGenerator + import dspy + + class QASignature(dspy.Signature): + question = dspy.InputField(desc="The user's question") + answer = dspy.OutputField(desc="A clear, concise answer") + + generator = DSPySignatureChatGenerator( + model="openai/gpt-5-mini", + signature=QASignature, + module_type="ChainOfThought", + ) + + messages = [ChatMessage.from_user("What is the capital of France?")] + result = generator.run(messages=messages) + print(result["replies"][0].text) + ``` + """ + + def __init__( + self, + signature: str | type[dspy.Signature], + model: str = "openai/gpt-5-mini", + api_base: str | None = None, + module_type: str = "ChainOfThought", + output_field: str = "answer", + generation_kwargs: dict[str, Any] | None = None, + module_kwargs: dict[str, Any] | None = None, + input_mapping: dict[str, str] | None = None, + ): + """ + Initialize the DSPySignatureChatGenerator. + + :param signature: DSPy signature defining I/O structure. Can be a string + like `"question -> answer"` or a `dspy.Signature` subclass. + :param model: Model identifier (e.g. `"openai/gpt-5-mini"`). + :param api_base: Optional base URL for the API (useful for local models). + :param module_type: DSPy module type: `"Predict"`, `"ChainOfThought"`, or `"ReAct"`. + :param output_field: Which signature output field to use as the reply. + :param generation_kwargs: Additional generation parameters (temperature, max_tokens, etc.). + :param module_kwargs: Additional keyword arguments passed to the DSPy module constructor. + For example, use `{"tools": [tool1, tool2]}` when using the `"ReAct"` module type. + :param input_mapping: Maps DSPy signature input field names to `run()` kwarg names. + For example, if your signature has an input field `"context"` but your pipeline + provides it as `"documents"`, use `{"context": "documents"}`. When not provided, + the first input field receives the last user message text, and remaining fields + are matched by name from `**kwargs`. + """ + if module_type not in VALID_MODULE_TYPES: + msg = f"Invalid module_type '{module_type}'. Must be one of {sorted(VALID_MODULE_TYPES)}" + raise ValueError(msg) + + self.signature = signature + self.model = model + self.api_base = api_base + self.module_type = module_type + self.output_field = output_field + self.generation_kwargs = generation_kwargs or {} + self.module_kwargs = module_kwargs or {} + self.input_mapping = input_mapping + + self._lm = _create_dspy_lm( + model=self.model, + api_base=self.api_base, + **self.generation_kwargs, + ) + + module_class = _get_dspy_module_class(self.module_type) + self._module = module_class(self.signature, **self.module_kwargs) + self._module.set_lm(self._lm) + + def _build_dspy_inputs(self, prompt: str, **kwargs: Any) -> dict[str, Any]: + """Build the input dict for the DSPy module call.""" + if self.input_mapping: + dspy_inputs = {} + for sig_field, source in self.input_mapping.items(): + if source in kwargs: + dspy_inputs[sig_field] = kwargs[source] + else: + dspy_inputs[sig_field] = prompt + return dspy_inputs + + input_fields = self._get_input_field_names() + dspy_inputs = {input_fields[0]: prompt} + + for field in input_fields[1:]: + if field in kwargs: + dspy_inputs[field] = kwargs[field] + + return dspy_inputs + + def _get_input_field_names(self) -> list[str]: + """Get input field names from the signature.""" + if isinstance(self.signature, str): + input_part = self.signature.split("->")[0].strip() + return [f.strip() for f in input_part.split(",")] + return list(self.signature.input_fields.keys()) + + @staticmethod + def _extract_last_user_message(messages: list[ChatMessage]) -> str: + """Extract the text of the last user message from a list of chat messages.""" + for msg in reversed(messages): + if msg.role == ChatRole.USER: + if not msg.text: + err = "The last user message has no text content." + raise ValueError(err) + return msg.text + err = "No user message found in 'messages'." + raise ValueError(err) + + @staticmethod + def _serialize_signature(signature: str | type[dspy.Signature]) -> dict[str, str]: + """ + Serialize the signature to a dictionary. + + String signatures are stored as + `{"type": "str", "value": "question -> answer"}`. + `dspy.Signature` subclasses are stored as + `{"type": "class", "value": "mymodule.QASignature"}`. + """ + if isinstance(signature, str): + return {"type": "str", "value": signature} + return {"type": "class", "value": f"{signature.__module__}.{signature.__qualname__}"} + + @staticmethod + def _deserialize_signature(data: dict[str, str]) -> str | type[dspy.Signature]: + """ + Deserialize a signature from a dictionary. + + Accepts `{"type": "str", "value": "question -> answer"}` or + `{"type": "class", "value": "mymodule.QASignature"}`. + """ + signature_type = data["type"] + value = data["value"] + + if signature_type == "str": + return value + + if signature_type == "class": + module_path, class_name = value.rsplit(".", 1) + module = importlib.import_module(module_path) + return getattr(module, class_name) + + msg = f"Unknown signature type '{signature_type}'. Must be 'str' or 'class'." + raise ValueError(msg) + + def to_dict(self) -> dict[str, Any]: + """Serialize this component to a dictionary.""" + kwargs: dict[str, Any] = { + "signature": self._serialize_signature(self.signature), + "model": self.model, + "api_base": self.api_base, + "module_type": self.module_type, + "output_field": self.output_field, + "generation_kwargs": self.generation_kwargs, + "module_kwargs": self.module_kwargs, + "input_mapping": self.input_mapping, + } + return default_to_dict(self, **kwargs) + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "DSPySignatureChatGenerator": + """Deserialize a component from a dictionary.""" + init_params = data.get("init_parameters", {}) + + signature = init_params.get("signature") + if isinstance(signature, dict): + init_params["signature"] = cls._deserialize_signature(signature) + + return default_from_dict(cls, data) + + @component.output_types(replies=list[ChatMessage]) + def run( + self, + messages: list[ChatMessage], + generation_kwargs: dict[str, Any] | None = None, + **kwargs: Any, + ) -> dict[str, Any]: + """ + Run the DSPy module on the given messages. + + :param messages: List of chat messages. The last user message is used as input. + :param generation_kwargs: Optional runtime generation parameters. + :param kwargs: Additional keyword arguments mapped to signature input fields. + :returns: A dictionary with `replies` (list of ChatMessage). + """ + if not messages: + msg = "The 'messages' parameter cannot be empty." + raise ValueError(msg) + + prompt = self._extract_last_user_message(messages) + dspy_inputs = self._build_dspy_inputs(prompt, **kwargs) + + prediction = self._module(**dspy_inputs, config=generation_kwargs or {}) + + if not hasattr(prediction, self.output_field): + available = list(prediction.keys()) + msg = f"Output field '{self.output_field}' not found in prediction. Available fields: {available}" + raise ValueError(msg) + output_text = getattr(prediction, self.output_field) + + replies = [ChatMessage.from_assistant(text=output_text)] + return {"replies": replies} + + @component.output_types(replies=list[ChatMessage]) + async def run_async( + self, + messages: list[ChatMessage], + generation_kwargs: dict[str, Any] | None = None, + **kwargs: Any, + ) -> dict[str, Any]: + """ + Asynchronously run the DSPy module on the given messages. + + Uses DSPy's native `acall` for true async I/O. + + :param messages: List of chat messages. The last user message is used as input. + :param generation_kwargs: Optional runtime generation parameters. + :param kwargs: Additional keyword arguments mapped to signature input fields. + :returns: A dictionary with `replies` (list of ChatMessage). + """ + if not messages: + msg = "The 'messages' parameter cannot be empty." + raise ValueError(msg) + + prompt = self._extract_last_user_message(messages) + dspy_inputs = self._build_dspy_inputs(prompt, **kwargs) + + prediction = await self._module.acall(**dspy_inputs, config=generation_kwargs or {}) + + if not hasattr(prediction, self.output_field): + available = list(prediction.keys()) + msg = f"Output field '{self.output_field}' not found in prediction. Available fields: {available}" + raise ValueError(msg) + output_text = getattr(prediction, self.output_field) + + replies = [ChatMessage.from_assistant(text=output_text)] + return {"replies": replies} diff --git a/integrations/dspy/src/haystack_integrations/components/generators/py.typed b/integrations/dspy/src/haystack_integrations/components/generators/py.typed new file mode 100644 index 0000000000..e69de29bb2 diff --git a/integrations/dspy/tests/__init__.py b/integrations/dspy/tests/__init__.py new file mode 100644 index 0000000000..6b5e14dc19 --- /dev/null +++ b/integrations/dspy/tests/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2024-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/integrations/dspy/tests/test_chat_generator.py b/integrations/dspy/tests/test_chat_generator.py new file mode 100644 index 0000000000..477b9ff9c9 --- /dev/null +++ b/integrations/dspy/tests/test_chat_generator.py @@ -0,0 +1,512 @@ +import os +from unittest.mock import MagicMock, patch + +import dspy +import pytest +from haystack.dataclasses import ChatMessage + +from haystack_integrations.components.generators.dspy.chat.chat_generator import ( + VALID_MODULE_TYPES, + DSPySignatureChatGenerator, + _create_dspy_lm, + _get_dspy_module_class, +) + + +@pytest.fixture +def mock_dspy_module(): + """ + Mock DSPy LM, configure, and module classes to avoid real API calls. + """ + with ( + patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.LM") as mock_lm_class, + patch( + "haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.ChainOfThought" + ) as mock_cot_class, + patch( + "haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.Predict" + ) as mock_predict_class, + patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.ReAct") as mock_react_class, + ): + mock_lm = MagicMock() + mock_lm_class.return_value = mock_lm + + mock_module = MagicMock() + mock_module.return_value = MagicMock(answer="Hello world!") + mock_cot_class.return_value = mock_module + mock_predict_class.return_value = mock_module + mock_react_class.return_value = mock_module + + yield mock_module + + +@pytest.fixture +def chat_messages(): + return [ + ChatMessage.from_system("You are a helpful assistant"), + ChatMessage.from_user("What's the capital of France"), + ] + + +@pytest.fixture +def sample_qa_signature(): + class QASignature(dspy.Signature): + question: str = dspy.InputField() + answer: str = dspy.OutputField() + + return QASignature + + +class TestValidModuleTypes: + def test_contains_expected_types(self): + assert VALID_MODULE_TYPES == {"Predict", "ChainOfThought", "ReAct"} + + +class TestGetDspyModuleClass: + def test_predict(self): + assert _get_dspy_module_class("Predict") is dspy.Predict + + def test_chain_of_thought(self): + assert _get_dspy_module_class("ChainOfThought") is dspy.ChainOfThought + + def test_react(self): + assert _get_dspy_module_class("ReAct") is dspy.ReAct + + def test_invalid_type_raises(self): + with pytest.raises(ValueError, match="Invalid module_type 'Unknown'"): + _get_dspy_module_class("Unknown") + + def test_invalid_type_lists_valid_options(self): + with pytest.raises(ValueError, match="ChainOfThought"): + _get_dspy_module_class("BadType") + + +class TestCreateDspyLm: + @patch("dspy.LM") + def test_creates_lm(self, mock_lm_class): + mock_lm = MagicMock() + mock_lm_class.return_value = mock_lm + + result = _create_dspy_lm(model="openai/gpt-5-mini") + + mock_lm_class.assert_called_once_with(model="openai/gpt-5-mini") + assert result is mock_lm + + @patch("dspy.LM") + def test_passes_extra_kwargs(self, mock_lm_class): + mock_lm = MagicMock() + mock_lm_class.return_value = mock_lm + + _create_dspy_lm(model="openai/gpt-5-mini", temperature=0.7, max_tokens=100) + + mock_lm_class.assert_called_once_with(model="openai/gpt-5-mini", temperature=0.7, max_tokens=100) + + @patch("dspy.LM") + def test_passes_api_base(self, mock_lm_class): + mock_lm = MagicMock() + mock_lm_class.return_value = mock_lm + + _create_dspy_lm(model="openai/local-model", api_base="http://localhost:8000") + + mock_lm_class.assert_called_once_with(model="openai/local-model", api_base="http://localhost:8000") + + @patch("dspy.LM") + def test_omits_api_base_when_none(self, mock_lm_class): + mock_lm = MagicMock() + mock_lm_class.return_value = mock_lm + + _create_dspy_lm(model="openai/gpt-5-mini") + + mock_lm_class.assert_called_once_with(model="openai/gpt-5-mini") + + +class TestDSPySignatureChatGenerator: + def test_init_default(self, mock_dspy_module): + component = DSPySignatureChatGenerator(signature="question -> answer") + assert component.model == "openai/gpt-5-mini" + assert component.signature == "question -> answer" + assert component.module_type == "ChainOfThought" + assert component.output_field == "answer" + assert not component.generation_kwargs + assert component.input_mapping is None + assert component.api_base is None + assert not component.module_kwargs + + def test_init_with_parameters(self, mock_dspy_module): + component = DSPySignatureChatGenerator( + signature="context, question -> answer", + model="openai/gpt-4o", + api_base="http://localhost:8000", + module_type="Predict", + output_field="response", + generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"}, + module_kwargs={"some_param": "value"}, + input_mapping={"context": "context", "question": "question"}, + ) + assert component.model == "openai/gpt-4o" + assert component.signature == "context, question -> answer" + assert component.module_type == "Predict" + assert component.output_field == "response" + assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"} + assert component.module_kwargs == {"some_param": "value"} + assert component.input_mapping == {"context": "context", "question": "question"} + assert component.api_base == "http://localhost:8000" + + def test_init_with_invalid_module_type(self, mock_dspy_module): + with pytest.raises(ValueError, match="Invalid module_type"): + DSPySignatureChatGenerator( + signature="question -> answer", + module_type="InvalidModule", + ) + + def test_init_with_signature_class(self, mock_dspy_module, sample_qa_signature): + component = DSPySignatureChatGenerator( + signature=sample_qa_signature, + ) + assert component.signature is sample_qa_signature + + def test_init_with_module_kwargs(self, mock_dspy_module): + """Test that module_kwargs are passed to the DSPy module constructor.""" + tools = [MagicMock(), MagicMock()] + component = DSPySignatureChatGenerator( + signature="question -> answer", + module_type="ReAct", + module_kwargs={"tools": tools}, + ) + assert component.module_kwargs == {"tools": tools} + + def test_init_with_api_base(self, mock_dspy_module): + """Test initialization with api_base for local models.""" + component = DSPySignatureChatGenerator( + signature="question -> answer", + api_base="http://localhost:11434/v1", + ) + assert component.api_base == "http://localhost:11434/v1" + + def test_to_dict_default(self, mock_dspy_module): + component = DSPySignatureChatGenerator( + signature="question -> answer", + ) + data = component.to_dict() + assert data == { + "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPySignatureChatGenerator", + "init_parameters": { + "signature": {"type": "str", "value": "question -> answer"}, + "model": "openai/gpt-5-mini", + "api_base": None, + "module_type": "ChainOfThought", + "output_field": "answer", + "generation_kwargs": {}, + "module_kwargs": {}, + "input_mapping": None, + }, + } + + def test_to_dict_with_parameters(self, mock_dspy_module): + component = DSPySignatureChatGenerator( + signature="context, question -> answer", + model="openai/gpt-4o", + api_base="http://localhost:8000", + module_type="Predict", + output_field="response", + generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"}, + input_mapping={"context": "context", "question": "question"}, + ) + data = component.to_dict() + assert data == { + "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPySignatureChatGenerator", + "init_parameters": { + "signature": {"type": "str", "value": "context, question -> answer"}, + "model": "openai/gpt-4o", + "api_base": "http://localhost:8000", + "module_type": "Predict", + "output_field": "response", + "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"}, + "module_kwargs": {}, + "input_mapping": {"context": "context", "question": "question"}, + }, + } + + def test_to_dict_with_signature_class(self, mock_dspy_module, sample_qa_signature): + """Test that signature classes are serialized as fully qualified class paths.""" + component = DSPySignatureChatGenerator( + signature=sample_qa_signature, + ) + data = component.to_dict() + signature_value = data["init_parameters"]["signature"] + assert signature_value["type"] == "class" + assert "QASignature" in signature_value["value"] + assert "." in signature_value["value"] + + def test_from_dict(self, mock_dspy_module): + data = { + "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPySignatureChatGenerator", + "init_parameters": { + "signature": {"type": "str", "value": "question -> answer"}, + "model": "openai/gpt-4o", + "api_base": None, + "module_type": "Predict", + "output_field": "response", + "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"}, + "module_kwargs": {}, + "input_mapping": None, + }, + } + component = DSPySignatureChatGenerator.from_dict(data) + assert component.model == "openai/gpt-4o" + assert component.signature == "question -> answer" + assert component.module_type == "Predict" + assert component.output_field == "response" + assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"} + assert component.input_mapping is None + assert component.api_base is None + + def test_from_dict_with_api_base(self, mock_dspy_module): + """Test deserialization with api_base.""" + data = { + "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPySignatureChatGenerator", + "init_parameters": { + "signature": {"type": "str", "value": "question -> answer"}, + "model": "openai/local-model", + "api_base": "http://localhost:8000", + "module_type": "Predict", + "output_field": "answer", + "generation_kwargs": {}, + "module_kwargs": {}, + "input_mapping": None, + }, + } + component = DSPySignatureChatGenerator.from_dict(data) + assert component.api_base == "http://localhost:8000" + + def test_from_dict_resolves_signature_class_path(self, mock_dspy_module): + """Test that from_dict resolves a dotted signature class path.""" + data = { + "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPySignatureChatGenerator", + "init_parameters": { + "signature": {"type": "class", "value": "dspy.Signature"}, + "model": "openai/gpt-5-mini", + "module_type": "Predict", + "output_field": "answer", + "generation_kwargs": {}, + "module_kwargs": {}, + "input_mapping": None, + }, + } + component = DSPySignatureChatGenerator.from_dict(data) + assert component.signature is dspy.Signature + + def test_from_dict_with_unknown_signature_type(self, mock_dspy_module): + """Test that from_dict raises an error for unknown signature types.""" + data = { + "type": "haystack_integrations.components.generators.dspy.chat.chat_generator.DSPySignatureChatGenerator", + "init_parameters": { + "signature": {"type": "unknown", "value": "question -> answer"}, + "model": "openai/gpt-5-mini", + "module_type": "Predict", + "output_field": "answer", + "generation_kwargs": {}, + "module_kwargs": {}, + "input_mapping": None, + }, + } + with pytest.raises(ValueError, match="Unknown signature type 'unknown'"): + DSPySignatureChatGenerator.from_dict(data) + + def test_run(self, chat_messages, mock_dspy_module): + component = DSPySignatureChatGenerator( + signature="question -> answer", + ) + response = component.run(chat_messages) + + mock_dspy_module.assert_called_once() + + assert isinstance(response, dict) + assert "replies" in response + assert isinstance(response["replies"], list) + assert len(response["replies"]) == 1 + assert all(isinstance(reply, ChatMessage) for reply in response["replies"]) + + def test_run_always_passes_config(self, chat_messages, mock_dspy_module): + """Test that config is always passed (even as empty dict) - simplified call.""" + component = DSPySignatureChatGenerator( + signature="question -> answer", + ) + component.run(chat_messages) + + _, kwargs = mock_dspy_module.call_args + assert kwargs["config"] == {} + + def test_run_with_generation_kwargs(self, chat_messages, mock_dspy_module): + component = DSPySignatureChatGenerator( + signature="question -> answer", + generation_kwargs={"max_tokens": 10, "temperature": 0.5}, + ) + response = component.run(chat_messages, generation_kwargs={"temperature": 0.9}) + + _, kwargs = mock_dspy_module.call_args + assert kwargs["config"] == {"temperature": 0.9} + + assert isinstance(response, dict) + assert "replies" in response + assert len(response["replies"]) == 1 + assert all(isinstance(reply, ChatMessage) for reply in response["replies"]) + + def test_run_with_multiple_messages(self, mock_dspy_module): + component = DSPySignatureChatGenerator( + signature="question -> answer", + ) + messages = [ + ChatMessage.from_user("Hello"), + ChatMessage.from_assistant("Hi there!"), + ChatMessage.from_user("What is the capital of Germany?"), + ] + response = component.run(messages=messages) + + call_kwargs = mock_dspy_module.call_args.kwargs + assert call_kwargs.get("question") == "What is the capital of Germany?" + + assert "replies" in response + assert len(response["replies"]) == 1 + assert isinstance(response["replies"][0], ChatMessage) + + def test_run_with_empty_messages(self, mock_dspy_module): + component = DSPySignatureChatGenerator( + signature="question -> answer", + ) + with pytest.raises(ValueError, match="messages"): + component.run(messages=[]) + + def test_run_with_no_user_message(self, mock_dspy_module): + component = DSPySignatureChatGenerator( + signature="question -> answer", + ) + messages = [ChatMessage.from_assistant("I'm an assistant")] + with pytest.raises(ValueError, match="No user message found"): + component.run(messages=messages) + + def test_run_with_empty_user_message(self, mock_dspy_module): + component = DSPySignatureChatGenerator( + signature="question -> answer", + ) + messages = [ChatMessage.from_user("")] + with pytest.raises(ValueError, match="no text content"): + component.run(messages=messages) + + def test_run_with_wrong_output_field(self, mock_dspy_module): + prediction = MagicMock(spec=["answer", "keys"]) + prediction.keys.return_value = ["answer"] + mock_dspy_module.return_value = prediction + component = DSPySignatureChatGenerator( + signature="question -> answer", + output_field="nonexistent", + ) + messages = [ChatMessage.from_user("Hello")] + with pytest.raises(ValueError, match="Output field 'nonexistent' not found"): + component.run(messages=messages) + + def test_run_with_custom_output_field(self, mock_dspy_module): + mock_dspy_module.return_value = MagicMock(summary="This is a summary") + component = DSPySignatureChatGenerator( + signature="text -> summary", + output_field="summary", + ) + messages = [ChatMessage.from_user("Summarize this text")] + response = component.run(messages=messages) + + assert response["replies"][0].text == "This is a summary" + + def test_run_with_input_mapping(self, mock_dspy_module): + component = DSPySignatureChatGenerator( + signature="context, question -> answer", + input_mapping={"context": "context", "question": "question"}, + ) + messages = [ChatMessage.from_user("What is ML?")] + component.run(messages=messages, context="Machine learning is a subset of AI.") + + call_kwargs = mock_dspy_module.call_args.kwargs + assert call_kwargs.get("context") == "Machine learning is a subset of AI." + assert call_kwargs.get("question") == "What is ML?" + + def test_run_with_wrong_model(self, mock_dspy_module): + mock_dspy_module.side_effect = Exception("Invalid model name") + + generator = DSPySignatureChatGenerator( + signature="question -> answer", + model="something-obviously-wrong", + ) + + with pytest.raises(Exception, match="Invalid model name"): + generator.run(messages=[ChatMessage.from_user("Whatever")]) + + @pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", + ) + @pytest.mark.integration + def test_live_run(self): + chat_messages = [ChatMessage.from_user("What's the capital of France")] + component = DSPySignatureChatGenerator(signature="question -> answer") + results = component.run(chat_messages) + assert len(results["replies"]) == 1 + message: ChatMessage = results["replies"][0] + assert "Paris" in message.text + + @pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", + ) + @pytest.mark.integration + def test_live_run_with_predict_module(self): + """Test using the Predict module type with a string signature.""" + chat_messages = [ChatMessage.from_user("What is 2 + 2?")] + component = DSPySignatureChatGenerator( + signature="question -> answer", + module_type="Predict", + ) + results = component.run(chat_messages) + assert len(results["replies"]) == 1 + assert "4" in results["replies"][0].text + + @pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", + ) + @pytest.mark.integration + def test_live_run_with_signature_class(self): + """Test using a dspy.Signature class instead of a string signature.""" + + class QASignature(dspy.Signature): + """Answer questions accurately and concisely.""" + + question = dspy.InputField(desc="The user's question") + answer = dspy.OutputField(desc="A clear, concise answer") + + chat_messages = [ChatMessage.from_user("What language is spoken in Brazil?")] + component = DSPySignatureChatGenerator( + signature=QASignature, + module_type="ChainOfThought", + ) + results = component.run(chat_messages) + assert len(results["replies"]) == 1 + assert "Portuguese" in results["replies"][0].text + + @pytest.mark.skipif( + not os.environ.get("OPENAI_API_KEY", None), + reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.", + ) + @pytest.mark.integration + def test_live_run_with_multi_field_signature(self): + """Test using a multi-input signature with input_mapping.""" + chat_messages = [ChatMessage.from_user("What is the main topic?")] + component = DSPySignatureChatGenerator( + signature="context, question -> answer", + module_type="Predict", + input_mapping={"context": "context", "question": "question"}, + ) + results = component.run( + chat_messages, + context="Python is a popular programming language created by Guido van Rossum.", + ) + assert len(results["replies"]) == 1 + assert results["replies"][0].text diff --git a/integrations/dspy/tests/test_chat_generator_async.py b/integrations/dspy/tests/test_chat_generator_async.py new file mode 100644 index 0000000000..e83a8b1255 --- /dev/null +++ b/integrations/dspy/tests/test_chat_generator_async.py @@ -0,0 +1,97 @@ +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from haystack.dataclasses import ChatMessage + +from haystack_integrations.components.generators.dspy.chat.chat_generator import DSPySignatureChatGenerator + + +@pytest.fixture +def mock_dspy_module(): + """ + Mock DSPy LM, configure, and module classes to avoid real API calls. + """ + with ( + patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.LM") as mock_lm_class, + patch( + "haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.ChainOfThought" + ) as mock_cot_class, + patch( + "haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.Predict" + ) as mock_predict_class, + patch("haystack_integrations.components.generators.dspy.chat.chat_generator.dspy.ReAct") as mock_react_class, + ): + mock_lm = MagicMock() + mock_lm_class.return_value = mock_lm + + mock_module = MagicMock() + mock_module.return_value = MagicMock(answer="Hello world!") + mock_module.acall = AsyncMock(return_value=MagicMock(answer="Hello world!")) + + mock_cot_class.return_value = mock_module + mock_predict_class.return_value = mock_module + mock_react_class.return_value = mock_module + + yield mock_module + + +@pytest.fixture +def chat_messages(): + return [ + ChatMessage.from_system("You are a helpful assistant"), + ChatMessage.from_user("What's the capital of France"), + ] + + +class TestDSPySignatureChatGeneratorAsync: + @pytest.mark.asyncio + async def test_run_async(self, chat_messages, mock_dspy_module): + component = DSPySignatureChatGenerator( + signature="question -> answer", + ) + response = await component.run_async(messages=chat_messages) + + mock_dspy_module.acall.assert_called_once() + + assert isinstance(response, dict) + assert "replies" in response + assert isinstance(response["replies"], list) + assert len(response["replies"]) == 1 + assert all(isinstance(reply, ChatMessage) for reply in response["replies"]) + + @pytest.mark.asyncio + async def test_run_async_always_passes_config(self, chat_messages, mock_dspy_module): + """Test that config is always passed (even as empty dict) in async mode.""" + component = DSPySignatureChatGenerator( + signature="question -> answer", + ) + await component.run_async(messages=chat_messages) + + _, kwargs = mock_dspy_module.acall.call_args + assert kwargs["config"] == {} + + @pytest.mark.asyncio + async def test_run_async_with_params(self, chat_messages, mock_dspy_module): + component = DSPySignatureChatGenerator( + signature="question -> answer", + ) + response = await component.run_async( + messages=chat_messages, + generation_kwargs={"temperature": 0.9}, + ) + + _, kwargs = mock_dspy_module.acall.call_args + assert kwargs["config"] == {"temperature": 0.9} + + assert isinstance(response, dict) + assert "replies" in response + assert len(response["replies"]) == 1 + assert all(isinstance(reply, ChatMessage) for reply in response["replies"]) + + @pytest.mark.asyncio + async def test_run_async_with_empty_messages(self, mock_dspy_module): + component = DSPySignatureChatGenerator( + signature="question -> answer", + ) + with pytest.raises(ValueError, match="messages"): + await component.run_async(messages=[])