diff --git a/.github/labeler.yml b/.github/labeler.yml index e16d1c6ad3..6961f53505 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -228,6 +228,11 @@ integration:snowflake: - any-glob-to-any-file: "integrations/snowflake/**/*" - any-glob-to-any-file: ".github/workflows/snowflake.yml" +integration:sqlalchemy: + - changed-files: + - any-glob-to-any-file: "integrations/sqlalchemy/**/*" + - any-glob-to-any-file: ".github/workflows/sqlalchemy.yml" + integration:stackit: - changed-files: - any-glob-to-any-file: "integrations/stackit/**/*" diff --git a/.github/workflows/CI_coverage_comment.yml b/.github/workflows/CI_coverage_comment.yml index f4b83385a5..9a50afb708 100644 --- a/.github/workflows/CI_coverage_comment.yml +++ b/.github/workflows/CI_coverage_comment.yml @@ -47,6 +47,7 @@ on: - "Test / qdrant" - "Test / ragas" - "Test / snowflake" + - "Test / sqlalchemy" - "Test / stackit" - "Test / tavily" - "Test / togetherai" diff --git a/.github/workflows/sqlalchemy.yml b/.github/workflows/sqlalchemy.yml new file mode 100644 index 0000000000..0916fcec21 --- /dev/null +++ b/.github/workflows/sqlalchemy.yml @@ -0,0 +1,119 @@ +# This workflow comes from https://github.com/ofek/hatch-mypyc +# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml +name: Test / sqlalchemy + +on: + schedule: + - cron: "0 0 * * *" + pull_request: + paths: + - "integrations/sqlalchemy/**" + - "!integrations/sqlalchemy/*.md" + - ".github/workflows/sqlalchemy.yml" + push: + branches: + - main + paths: + - "integrations/sqlalchemy/**" + - "!integrations/sqlalchemy/*.md" + - ".github/workflows/sqlalchemy.yml" + +defaults: + run: + working-directory: integrations/sqlalchemy + +concurrency: + group: sqlalchemy-${{ github.head_ref || github.sha }} + cancel-in-progress: true + +env: + PYTHONUNBUFFERED: "1" + FORCE_COLOR: "1" + TEST_MATRIX_OS: '["ubuntu-latest", "windows-latest", "macos-latest"]' + TEST_MATRIX_PYTHON: '["3.10", "3.14"]' + +jobs: + compute-test-matrix: + runs-on: ubuntu-slim + defaults: + run: + working-directory: . + outputs: + os: ${{ steps.set.outputs.os }} + python-version: ${{ steps.set.outputs.python-version }} + steps: + - id: set + run: | + echo 'os=${{ github.event_name == 'push' && '["ubuntu-latest"]' || env.TEST_MATRIX_OS }}' >> "$GITHUB_OUTPUT" + echo 'python-version=${{ github.event_name == 'push' && '["3.10"]' || env.TEST_MATRIX_PYTHON }}' >> "$GITHUB_OUTPUT" + + run: + name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }} + needs: compute-test-matrix + permissions: + contents: write + pull-requests: write + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: ${{ fromJSON(needs.compute-test-matrix.outputs.os) }} + python-version: ${{ fromJSON(needs.compute-test-matrix.outputs.python-version) }} + + steps: + - name: Support longpaths + if: matrix.os == 'windows-latest' + working-directory: . + run: git config --system core.longpaths true + + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Hatch + run: pip install hatch + + - name: Lint + if: matrix.python-version == '3.10' && runner.os == 'Linux' + run: hatch run fmt-check && hatch run test:types + + - name: Run unit tests + run: hatch run test:unit-cov-retry + + # On PR: generates coverage comment artifact. On push to main: stores coverage baseline on data branch. + - name: Store unit tests coverage + if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name != 'schedule' + uses: py-cov-action/python-coverage-comment-action@7188638f871f721a365d644f505d1ff3df20d683 # v3.40 + with: + GITHUB_TOKEN: ${{ github.token }} + COVERAGE_PATH: integrations/sqlalchemy + SUBPROJECT_ID: sqlalchemy + COMMENT_ARTIFACT_NAME: coverage-comment-sqlalchemy + MINIMUM_GREEN: 90 + MINIMUM_ORANGE: 60 + + - name: Run unit tests with lowest direct dependencies + if: github.event_name != 'push' + run: | + hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt + hatch -e test env run -- uv pip install -r requirements_lowest_direct.txt + hatch run test:unit + + - name: Nightly - run unit tests with Haystack main branch + if: github.event_name == 'schedule' + run: | + hatch env prune + hatch -e test env run -- uv pip install git+https://github.com/deepset-ai/haystack.git@main + hatch run test:unit + + notify-slack-on-failure: + needs: run + if: failure() && github.event_name == 'schedule' + runs-on: ubuntu-slim + steps: + - uses: deepset-ai/notify-slack-action@3cda73b77a148f16f703274198e7771340cf862b # v1 + with: + slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL_NOTIFICATIONS }} diff --git a/README.md b/README.md index 146ccb334d..cbeabec5e6 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta | [qdrant-haystack](integrations/qdrant/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/qdrant-haystack.svg?color=orange)](https://pypi.org/project/qdrant-haystack) | [![Test / qdrant](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/qdrant.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/qdrant.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-qdrant/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-qdrant/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-qdrant-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-qdrant-combined/htmlcov/index.html) | | [ragas-haystack](integrations/ragas/) | Evaluator | [![PyPI - Version](https://img.shields.io/pypi/v/ragas-haystack.svg)](https://pypi.org/project/ragas-haystack) | [![Test / ragas](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/ragas.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/ragas.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-ragas/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-ragas/htmlcov/index.html) | | | [snowflake-haystack](integrations/snowflake/) | Retriever | [![PyPI - Version](https://img.shields.io/pypi/v/snowflake-haystack.svg)](https://pypi.org/project/snowflake-haystack) | [![Test / snowflake](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/snowflake.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/snowflake.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-snowflake/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-snowflake/htmlcov/index.html) | | +| [sqlalchemy-haystack](integrations/sqlalchemy/) | Retriever | [![PyPI - Version](https://img.shields.io/pypi/v/sqlalchemy-haystack.svg)](https://pypi.org/project/sqlalchemy-haystack) | [![Test / sqlalchemy](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/sqlalchemy.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/sqlalchemy.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-sqlalchemy/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-sqlalchemy/htmlcov/index.html) | | | [stackit-haystack](integrations/stackit/) | Embedder, Generator | [![PyPI - Version](https://img.shields.io/pypi/v/stackit-haystack.svg)](https://pypi.org/project/stackit-haystack) | [![Test / stackit](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/stackit.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/stackit.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-stackit/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-stackit/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-stackit-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-stackit-combined/htmlcov/index.html) | | [tavily-haystack](integrations/tavily/) | Websearch | [![PyPI - Version](https://img.shields.io/pypi/v/tavily-haystack.svg)](https://pypi.org/project/tavily-haystack) | [![Test / tavily](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/tavily.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/tavily.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-tavily/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-tavily/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-tavily-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-tavily-combined/htmlcov/index.html) | | [togetherai-haystack](integrations/togetherai/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/togetherai-haystack.svg)](https://pypi.org/project/togetherai-haystack) | [![Test / togetherai](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/togetherai.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/togetherai.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-togetherai/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-togetherai/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-togetherai-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-togetherai-combined/htmlcov/index.html) | diff --git a/integrations/sqlalchemy/CHANGELOG.md b/integrations/sqlalchemy/CHANGELOG.md new file mode 100644 index 0000000000..c61ffd77cb --- /dev/null +++ b/integrations/sqlalchemy/CHANGELOG.md @@ -0,0 +1,7 @@ +# Changelog + +## [integrations/sqlalchemy-v0.1.0] - 2026-04-02 + +### 🚀 Features + +- Initial release of `sqlalchemy-haystack` integration (#3077) diff --git a/integrations/sqlalchemy/LICENSE.txt b/integrations/sqlalchemy/LICENSE.txt new file mode 100644 index 0000000000..35ac8a29ee --- /dev/null +++ b/integrations/sqlalchemy/LICENSE.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2025 deepset GmbH + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/integrations/sqlalchemy/README.md b/integrations/sqlalchemy/README.md new file mode 100644 index 0000000000..5b45fde384 --- /dev/null +++ b/integrations/sqlalchemy/README.md @@ -0,0 +1,12 @@ +# sqlalchemy-haystack + +[![PyPI - Version](https://img.shields.io/pypi/v/sqlalchemy-haystack.svg)](https://pypi.org/project/sqlalchemy-haystack) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/sqlalchemy-haystack.svg)](https://pypi.org/project/sqlalchemy-haystack) + +- [Changelog](https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/sqlalchemy/CHANGELOG.md) + +--- + +## Contributing + +Refer to the general [Contribution Guidelines](https://github.com/deepset-ai/haystack-core-integrations/blob/main/CONTRIBUTING.md). diff --git a/integrations/sqlalchemy/pydoc/config_docusaurus.yml b/integrations/sqlalchemy/pydoc/config_docusaurus.yml new file mode 100644 index 0000000000..5e5d24f6b2 --- /dev/null +++ b/integrations/sqlalchemy/pydoc/config_docusaurus.yml @@ -0,0 +1,13 @@ +loaders: + - modules: + - haystack_integrations.components.retrievers.sqlalchemy.sqlalchemy_table_retriever + search_path: [../src] +processors: + - type: filter + documented_only: true + skip_empty_modules: true +renderer: + description: SQLAlchemy integration for Haystack + id: integrations-sqlalchemy + filename: sqlalchemy.md + title: SQLAlchemy diff --git a/integrations/sqlalchemy/pyproject.toml b/integrations/sqlalchemy/pyproject.toml new file mode 100644 index 0000000000..132baad8bf --- /dev/null +++ b/integrations/sqlalchemy/pyproject.toml @@ -0,0 +1,161 @@ +[build-system] +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" + +[project] +name = "sqlalchemy-haystack" +dynamic = ["version"] +description = 'A SQLAlchemy integration for the Haystack framework.' +readme = "README.md" +requires-python = ">=3.10" +license = "Apache-2.0" +keywords = [] +authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }] +classifiers = [ + "License :: OSI Approved :: Apache Software License", + "Development Status :: 4 - Beta", + "Programming Language :: Python", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dependencies = [ + "haystack-ai>=2.12.0", + "sqlalchemy>=2.0.36", + "pandas>=2.2.3; python_version < '3.11'", + "pandas>=3.0.0; python_version >= '3.11'", +] + +[project.urls] +Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/sqlalchemy#readme" +Issues = "https://github.com/deepset-ai/haystack-core-integrations/issues" +Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/sqlalchemy" + +[tool.hatch.build.targets.wheel] +packages = ["src/haystack_integrations"] + +[tool.hatch.version] +source = "vcs" +tag-pattern = 'integrations\/sqlalchemy-v(?P.*)' + +[tool.hatch.version.raw-options] +root = "../.." +git_describe_command = 'git describe --tags --match="integrations/sqlalchemy-v[0-9]*"' + +[tool.hatch.envs.default] +installer = "uv" +dependencies = ["haystack-pydoc-tools", "ruff"] + +[tool.hatch.envs.default.scripts] +docs = ["haystack-pydoc pydoc/config_docusaurus.yml"] +fmt = "ruff check --fix {args}; ruff format {args}" +fmt-check = "ruff check {args} && ruff format --check {args}" + +[tool.hatch.envs.test] +dependencies = [ + "pytest", + "pytest-asyncio", + "pytest-cov", + "pytest-rerunfailures", + "mypy", + "pip", + "pandas-stubs", +] + +[tool.hatch.envs.test.scripts] +unit = 'pytest -m "not integration" {args:tests}' +integration = 'pytest -m "integration" {args:tests}' +all = 'pytest {args:tests}' +unit-cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x -m "not integration" {args:tests}' +integration-cov-append-retry = 'pytest --cov=haystack_integrations --cov-append --reruns 3 --reruns-delay 30 -x -m "integration" {args:tests}' + +types = "mypy -p haystack_integrations.components.retrievers.sqlalchemy {args}" + +[tool.mypy] +install_types = true +non_interactive = true +check_untyped_defs = true +disallow_incomplete_defs = true + +[tool.ruff] +line-length = 120 + +[tool.ruff.lint] +select = [ + "A", + "ANN", + "ARG", + "B", + "C", + "D102", + "D103", + "D205", + "D209", + "D213", + "D417", + "D419", + "DTZ", + "E", + "EM", + "F", + "I", + "ICN", + "ISC", + "N", + "PLC", + "PLE", + "PLR", + "PLW", + "Q", + "RUF", + "S", + "T", + "TID", + "UP", + "W", + "YTT", +] +ignore = [ + # Allow non-abstract empty methods in abstract base classes + "B027", + # Allow Any in type annotations at dynamic boundaries + "ANN401", + # Ignore checks for possible passwords + "S105", + "S106", + "S107", + # Ignore complexity + "C901", + "PLR0911", + "PLR0912", + "PLR0913", + "PLR0915", + # Ignore SQL injection warning (user-provided queries are expected) + "S608", + # Unused method argument + "ARG002", +] + +[tool.ruff.lint.isort] +known-first-party = ["haystack_integrations"] + +[tool.ruff.lint.flake8-tidy-imports] +ban-relative-imports = "parents" + +[tool.ruff.lint.per-file-ignores] +"tests/**/*" = ["D", "PLR2004", "S101", "TID252", "ANN"] + +[tool.coverage.run] +source = ["haystack_integrations"] +branch = true +relative_files = true +parallel = false + +[tool.coverage.report] +omit = ["*/tests/*", "*/__init__.py"] +show_missing = true +exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] diff --git a/integrations/sqlalchemy/src/haystack_integrations/components/retrievers/py.typed b/integrations/sqlalchemy/src/haystack_integrations/components/retrievers/py.typed new file mode 100644 index 0000000000..e69de29bb2 diff --git a/integrations/sqlalchemy/src/haystack_integrations/components/retrievers/sqlalchemy/__init__.py b/integrations/sqlalchemy/src/haystack_integrations/components/retrievers/sqlalchemy/__init__.py new file mode 100644 index 0000000000..0bf2dab6cb --- /dev/null +++ b/integrations/sqlalchemy/src/haystack_integrations/components/retrievers/sqlalchemy/__init__.py @@ -0,0 +1,7 @@ +# SPDX-FileCopyrightText: 2025-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from .sqlalchemy_table_retriever import SQLAlchemyTableRetriever + +__all__ = ["SQLAlchemyTableRetriever"] diff --git a/integrations/sqlalchemy/src/haystack_integrations/components/retrievers/sqlalchemy/sqlalchemy_table_retriever.py b/integrations/sqlalchemy/src/haystack_integrations/components/retrievers/sqlalchemy/sqlalchemy_table_retriever.py new file mode 100644 index 0000000000..02ef784255 --- /dev/null +++ b/integrations/sqlalchemy/src/haystack_integrations/components/retrievers/sqlalchemy/sqlalchemy_table_retriever.py @@ -0,0 +1,180 @@ +# SPDX-FileCopyrightText: 2025-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from typing import Any + +from haystack import component, default_from_dict, default_to_dict, logging +from haystack.utils import Secret, deserialize_secrets_inplace +from pandas import DataFrame + +from sqlalchemy import create_engine, text +from sqlalchemy.engine import URL, Engine +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.pool import StaticPool + +logger = logging.getLogger(__name__) + +MAX_SYS_ROWS = 10_000 + + +@component +class SQLAlchemyTableRetriever: + """ + Connects to any SQLAlchemy-supported database and executes a SQL query. + + Returns results as a Pandas DataFrame and an optional Markdown-formatted table string. + Supports any database backend that SQLAlchemy supports, including PostgreSQL, MySQL, + SQLite, and MSSQL. + + ### Usage example: + + ```python + from haystack_integrations.components.retrievers.sqlalchemy import SQLAlchemyTableRetriever + + retriever = SQLAlchemyTableRetriever(drivername="sqlite", database=":memory:") + retriever.warm_up() + result = retriever.run(query="SELECT 1 AS value") + print(result["dataframe"]) + print(result["table"]) + ``` + """ + + def __init__( + self, + drivername: str, + username: str | None = None, + password: Secret | None = None, + host: str | None = None, + port: int | None = None, + database: str | None = None, + init_script: str | None = None, + ) -> None: + """ + Initialize SQLAlchemyTableRetriever. + + :param drivername: The SQLAlchemy driver name (e.g., ``"sqlite"``, + ``"postgresql+psycopg2"``). + :param username: Database username. + :param password: Database password as a Haystack ``Secret``. + :param host: Database host. + :param port: Database port. + :param database: Database name or path (e.g., ``":memory:"`` for SQLite in-memory). + :param init_script: Optional SQL statements executed once on ``warm_up()`` + (e.g., to create tables or insert seed data). Multiple statements should be + separated by semicolons. + """ + self.drivername = drivername + self.username = username + self.password = password + self.host = host + self.port = port + self.database = database + self.init_script = init_script + self._engine: Engine | None = None + self._warmed_up = False + + def warm_up(self) -> None: + """ + Initialize the database engine and execute ``init_script`` if provided. + + Called automatically by ``run()`` on first invocation if not already warmed up. + """ + if self._warmed_up: + return + + url = URL.create( + drivername=self.drivername, + username=self.username, + password=self.password.resolve_value() if self.password else None, + host=self.host, + port=self.port, + database=self.database, + ) + + engine_kwargs: dict[str, Any] = {} + if url.drivername.startswith("sqlite") and url.database in (":memory:", "", None): + engine_kwargs["connect_args"] = {"check_same_thread": False} + engine_kwargs["poolclass"] = StaticPool + + self._engine = create_engine(url, **engine_kwargs) + + if self.init_script: + with self._engine.connect() as conn: + for stmt in self.init_script.split(";"): + stripped = stmt.strip() + if stripped: + conn.execute(text(stripped)) + conn.commit() + + self._warmed_up = True + + def to_dict(self) -> dict[str, Any]: + """ + Serialize the component to a dictionary. + + :returns: Dictionary with serialized data. + """ + return default_to_dict( + self, + drivername=self.drivername, + username=self.username, + password=self.password.to_dict() if self.password else None, + host=self.host, + port=self.port, + database=self.database, + init_script=self.init_script, + ) + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "SQLAlchemyTableRetriever": + """ + Deserialize the component from a dictionary. + + :param data: Dictionary to deserialize from. + :returns: Deserialized component. + """ + deserialize_secrets_inplace(data.get("init_parameters", {}), ["password"]) + return default_from_dict(cls, data) + + @staticmethod + def _df_to_markdown(df: DataFrame) -> str: + if df.empty: + return "" + header = "| " + " | ".join(str(c) for c in df.columns) + " |" + separator = "| " + " | ".join("---" for _ in df.columns) + " |" + rows = ["| " + " | ".join(str(v) for v in row) + " |" for row in df.itertuples(index=False)] + return "\n".join([header, separator, *rows]) + + @component.output_types(dataframe=DataFrame, table=str, error=str) + def run(self, query: str) -> dict[str, Any]: + """ + Execute a SQL query and return the results. + + :param query: The SQL query to execute. + :returns: A dictionary with: + + - ``dataframe``: A Pandas DataFrame with the query results. + - ``table``: A Markdown-formatted string of the results. + - ``error``: An error message if the query failed, otherwise an empty string. + """ + if not query: + return {"dataframe": DataFrame(), "table": "", "error": "empty query"} + + if not self._warmed_up: + self.warm_up() + + if self._engine is None: + msg = "Engine is not initialized. Call warm_up() first." + raise RuntimeError(msg) + + try: + with self._engine.connect() as conn: + result = conn.execute(text(query)) + rows = result.fetchmany(MAX_SYS_ROWS) + columns = list(result.keys()) + df = DataFrame(rows, columns=columns) + return {"dataframe": df, "table": self._df_to_markdown(df), "error": ""} + except SQLAlchemyError as e: + logger.warning("Error executing query: {error}", error=str(e)) + return {"dataframe": DataFrame(), "table": "", "error": str(e)} diff --git a/integrations/sqlalchemy/tests/__init__.py b/integrations/sqlalchemy/tests/__init__.py new file mode 100644 index 0000000000..d391382c6b --- /dev/null +++ b/integrations/sqlalchemy/tests/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2025-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 diff --git a/integrations/sqlalchemy/tests/test_sqlalchemy_table_retriever.py b/integrations/sqlalchemy/tests/test_sqlalchemy_table_retriever.py new file mode 100644 index 0000000000..69ad4ac5d7 --- /dev/null +++ b/integrations/sqlalchemy/tests/test_sqlalchemy_table_retriever.py @@ -0,0 +1,142 @@ +# SPDX-FileCopyrightText: 2025-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from haystack.utils import Secret + +import haystack_integrations.components.retrievers.sqlalchemy.sqlalchemy_table_retriever as module +from haystack_integrations.components.retrievers.sqlalchemy import SQLAlchemyTableRetriever + + +class TestSQLAlchemyTableRetrieverInit: + def test_init_defaults(self): + retriever = SQLAlchemyTableRetriever(drivername="sqlite") + assert retriever.drivername == "sqlite" + assert retriever.username is None + assert retriever.password is None + assert retriever.host is None + assert retriever.port is None + assert retriever.database is None + assert retriever.init_script is None + + def test_init_all_params(self): + password = Secret.from_token("secret") + retriever = SQLAlchemyTableRetriever( + drivername="postgresql+psycopg2", + username="user", + password=password, + host="localhost", + port=5432, + database="mydb", + init_script="CREATE TABLE t (x INTEGER)", + ) + assert retriever.drivername == "postgresql+psycopg2" + assert retriever.username == "user" + assert retriever.password is password + assert retriever.host == "localhost" + assert retriever.port == 5432 + assert retriever.database == "mydb" + assert retriever.init_script == "CREATE TABLE t (x INTEGER)" + + +class TestSQLAlchemyTableRetrieverSerialization: + def test_to_dict(self): + password = Secret.from_env_var("DB_PASSWORD") + retriever = SQLAlchemyTableRetriever( + drivername="sqlite", + database=":memory:", + password=password, + ) + d = retriever.to_dict() + expected_type = ( + "haystack_integrations.components.retrievers.sqlalchemy.sqlalchemy_table_retriever.SQLAlchemyTableRetriever" + ) + assert d["type"] == expected_type + params = d["init_parameters"] + assert params["drivername"] == "sqlite" + assert params["database"] == ":memory:" + assert params["password"]["type"] == "env_var" + + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("DB_PASSWORD", "secret") + password = Secret.from_env_var("DB_PASSWORD") + retriever = SQLAlchemyTableRetriever( + drivername="sqlite", + database=":memory:", + password=password, + ) + d = retriever.to_dict() + restored = SQLAlchemyTableRetriever.from_dict(d) + assert restored.drivername == "sqlite" + assert restored.database == ":memory:" + assert restored.password is not None + assert restored.password.resolve_value() == "secret" + + +class TestSQLAlchemyTableRetrieverRun: + def test_run_empty_query(self): + retriever = SQLAlchemyTableRetriever(drivername="sqlite", database=":memory:") + result = retriever.run(query="") + assert result["error"] == "empty query" + assert result["dataframe"].empty + assert result["table"] == "" + + def test_run_returns_dataframe(self): + retriever = SQLAlchemyTableRetriever(drivername="sqlite", database=":memory:") + retriever.warm_up() + result = retriever.run(query="SELECT 1 AS value") + assert not result["dataframe"].empty + assert result["error"] == "" + + def test_run_returns_markdown(self): + retriever = SQLAlchemyTableRetriever(drivername="sqlite", database=":memory:") + retriever.warm_up() + result = retriever.run(query="SELECT 1 AS value") + assert "|" in result["table"] + + def test_run_sql_error(self): + retriever = SQLAlchemyTableRetriever(drivername="sqlite", database=":memory:") + retriever.warm_up() + result = retriever.run(query="SELECT * FROM nonexistent_table_xyz") + assert result["error"] != "" + assert result["dataframe"].empty + + def test_max_row_limit(self, monkeypatch): + init_sql = ( + "CREATE TABLE t (x INTEGER);INSERT INTO t VALUES (1);INSERT INTO t VALUES (2);INSERT INTO t VALUES (3)" + ) + retriever = SQLAlchemyTableRetriever( + drivername="sqlite", + database=":memory:", + init_script=init_sql, + ) + monkeypatch.setattr(module, "MAX_SYS_ROWS", 2) + retriever.warm_up() + result = retriever.run(query="SELECT * FROM t") + assert len(result["dataframe"]) == 2 + + def test_warm_up_with_init_script(self): + init_sql = "CREATE TABLE greetings (msg TEXT); INSERT INTO greetings VALUES ('hello')" + retriever = SQLAlchemyTableRetriever( + drivername="sqlite", + database=":memory:", + init_script=init_sql, + ) + retriever.warm_up() + result = retriever.run(query="SELECT * FROM greetings") + assert not result["dataframe"].empty + assert result["dataframe"].iloc[0]["msg"] == "hello" + + def test_warm_up_idempotent(self): + retriever = SQLAlchemyTableRetriever(drivername="sqlite", database=":memory:") + retriever.warm_up() + engine = retriever._engine + retriever.warm_up() + assert retriever._engine is engine + + def test_run_calls_warm_up_automatically(self): + retriever = SQLAlchemyTableRetriever(drivername="sqlite", database=":memory:") + assert not retriever._warmed_up + result = retriever.run(query="SELECT 1 AS x") + assert retriever._warmed_up + assert not result["dataframe"].empty