diff --git a/.github/labeler.yml b/.github/labeler.yml
index c9cd838a2e..143181b1a9 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -213,6 +213,11 @@ integration:ollama:
- any-glob-to-any-file: "integrations/ollama/**/*"
- any-glob-to-any-file: ".github/workflows/ollama.yml"
+integration:olostep:
+ - changed-files:
+ - any-glob-to-any-file: "integrations/olostep/**/*"
+ - any-glob-to-any-file: ".github/workflows/olostep.yml"
+
integration:openrouter:
- changed-files:
- any-glob-to-any-file: "integrations/openrouter/**/*"
diff --git a/.github/workflows/CI_coverage_comment.yml b/.github/workflows/CI_coverage_comment.yml
index 73c3dbabee..ed1728855d 100644
--- a/.github/workflows/CI_coverage_comment.yml
+++ b/.github/workflows/CI_coverage_comment.yml
@@ -42,6 +42,7 @@ on:
- "Test / mongodb_atlas"
- "Test / nvidia"
- "Test / ollama"
+ - "Test / olostep"
- "Test / openrouter"
- "Test / opensearch"
- "Test / optimum"
diff --git a/.github/workflows/olostep.yml b/.github/workflows/olostep.yml
new file mode 100644
index 0000000000..eb8dee2906
--- /dev/null
+++ b/.github/workflows/olostep.yml
@@ -0,0 +1,144 @@
+# This workflow comes from https://github.com/ofek/hatch-mypyc
+# https://github.com/ofek/hatch-mypyc/blob/5a198c0ba8660494d02716cfc9d79ce4adfb1442/.github/workflows/test.yml
+name: Test / olostep
+
+on:
+ schedule:
+ - cron: "0 0 * * *"
+ pull_request:
+ paths:
+ - "integrations/olostep/**"
+ - "!integrations/olostep/*.md"
+ - ".github/workflows/olostep.yml"
+ push:
+ branches:
+ - main
+ paths:
+ - "integrations/olostep/**"
+ - "!integrations/olostep/*.md"
+ - ".github/workflows/olostep.yml"
+
+defaults:
+ run:
+ working-directory: integrations/olostep
+
+concurrency:
+ group: olostep-${{ github.head_ref || github.sha }}
+ cancel-in-progress: true
+
+env:
+ PYTHONUNBUFFERED: "1"
+ FORCE_COLOR: "1"
+ TEST_MATRIX_OS: '["ubuntu-latest", "windows-latest", "macos-latest"]'
+ TEST_MATRIX_PYTHON: '["3.10", "3.14"]'
+
+jobs:
+ compute-test-matrix:
+ runs-on: ubuntu-slim
+ defaults:
+ run:
+ working-directory: .
+ outputs:
+ os: ${{ steps.set.outputs.os }}
+ python-version: ${{ steps.set.outputs.python-version }}
+ steps:
+ - id: set
+ run: |
+ if [ "${{ github.event_name }}" = "push" ]; then
+ echo 'os=["ubuntu-latest"]' >> "$GITHUB_OUTPUT"
+ echo 'python-version=["3.10"]' >> "$GITHUB_OUTPUT"
+ else
+ echo "os=${TEST_MATRIX_OS}" >> "$GITHUB_OUTPUT"
+ echo "python-version=${TEST_MATRIX_PYTHON}" >> "$GITHUB_OUTPUT"
+ fi
+
+ run:
+ name: Python ${{ matrix.python-version }} on ${{ startsWith(matrix.os, 'macos-') && 'macOS' || startsWith(matrix.os, 'windows-') && 'Windows' || 'Linux' }}
+ needs: compute-test-matrix
+ permissions:
+ contents: write
+ pull-requests: write
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false
+ matrix:
+ os: ${{ fromJSON(needs.compute-test-matrix.outputs.os) }}
+ python-version: ${{ fromJSON(needs.compute-test-matrix.outputs.python-version) }}
+
+ steps:
+ - name: Support longpaths
+ if: matrix.os == 'windows-latest'
+ working-directory: .
+ run: git config --system core.longpaths true
+
+ - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install Hatch
+ run: pip install --upgrade hatch
+ - name: Lint
+ if: matrix.python-version == '3.10' && runner.os == 'Linux'
+ run: hatch run fmt-check && hatch run test:types
+
+ - name: Run unit tests
+ run: hatch run test:unit-cov-retry
+
+ # On PR: posts coverage comment (directly on same-repo PRs; via artifact for fork PRs). On push to main: stores coverage baseline on data branch.
+ - name: Store unit tests coverage
+ id: coverage_comment
+ if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name != 'schedule'
+ uses: py-cov-action/python-coverage-comment-action@7188638f871f721a365d644f505d1ff3df20d683 # v3.40
+ with:
+ GITHUB_TOKEN: ${{ github.token }}
+ COVERAGE_PATH: integrations/olostep
+ SUBPROJECT_ID: olostep
+ MINIMUM_GREEN: 90
+ MINIMUM_ORANGE: 60
+
+ - name: Upload coverage comment to be posted
+ if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name == 'pull_request' && steps.coverage_comment.outputs.COMMENT_FILE_WRITTEN == 'true'
+ uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+ with:
+ name: coverage-comment-olostep
+ path: python-coverage-comment-action-olostep.txt
+
+ - name: Run integration tests
+ run: hatch run test:integration-cov-append-retry
+
+ - name: Store combined coverage
+ if: github.event_name == 'push'
+ uses: py-cov-action/python-coverage-comment-action@7188638f871f721a365d644f505d1ff3df20d683 # v3.40
+ with:
+ GITHUB_TOKEN: ${{ github.token }}
+ COVERAGE_PATH: integrations/olostep
+ SUBPROJECT_ID: olostep-combined
+ MINIMUM_GREEN: 90
+ MINIMUM_ORANGE: 60
+
+ - name: Run unit tests with lowest direct dependencies
+ if: github.event_name != 'push'
+ run: |
+ hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt
+ hatch -e test env run -- uv pip install -r requirements_lowest_direct.txt
+ hatch run test:unit
+
+ - name: Nightly - run unit tests with Haystack main branch
+ if: github.event_name == 'schedule'
+ run: |
+ hatch env prune
+ hatch -e test env run -- uv pip install git+https://github.com/deepset-ai/haystack.git@main
+ hatch run test:unit
+
+
+ notify-slack-on-failure:
+ needs: run
+ if: failure() && github.event_name == 'schedule'
+ runs-on: ubuntu-slim
+ steps:
+ - uses: deepset-ai/notify-slack-action@3cda73b77a148f16f703274198e7771340cf862b # v1
+ with:
+ slack-webhook-url: ${{ secrets.SLACK_WEBHOOK_URL_NOTIFICATIONS }}
diff --git a/README.md b/README.md
index be79b323e5..018bba5d32 100644
--- a/README.md
+++ b/README.md
@@ -68,6 +68,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta
| [mongodb-atlas-haystack](integrations/mongodb_atlas/) | Document Store | [](https://pypi.org/project/mongodb-atlas-haystack) | [](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/mongodb_atlas.yml) | [](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-mongodb_atlas/htmlcov/index.html) | [](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-mongodb_atlas-combined/htmlcov/index.html) |
| [nvidia-haystack](integrations/nvidia/) | Embedder, Generator, Ranker | [](https://pypi.org/project/nvidia-haystack) | [](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/nvidia.yml) | [](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-nvidia/htmlcov/index.html) | [](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-nvidia-combined/htmlcov/index.html) |
| [ollama-haystack](integrations/ollama/) | Embedder, Generator | [](https://pypi.org/project/ollama-haystack) | [](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/ollama.yml) | [](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-ollama/htmlcov/index.html) | [](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-ollama-combined/htmlcov/index.html) |
+| [olostep-haystack](integrations/olostep/) | Tool | [](https://pypi.org/project/olostep-haystack) | [](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/olostep.yml) | [](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-olostep/htmlcov/index.html) | [](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-olostep-combined/htmlcov/index.html) |
| [openrouter-haystack](integrations/openrouter/) | Generator | [](https://pypi.org/project/openrouter-haystack) | [](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/openrouter.yml) | [](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-openrouter/htmlcov/index.html) | [](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-openrouter-combined/htmlcov/index.html) |
| [opensearch-haystack](integrations/opensearch/) | Document Store | [](https://pypi.org/project/opensearch-haystack) | [](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/opensearch.yml) | [](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-opensearch/htmlcov/index.html) | [](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-opensearch-combined/htmlcov/index.html) |
| [optimum-haystack](integrations/optimum/) | Embedder | [](https://pypi.org/project/optimum-haystack) | [](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/optimum.yml) | [](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-optimum/htmlcov/index.html) | [](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-optimum-combined/htmlcov/index.html) |
diff --git a/integrations/olostep/LICENSE.txt b/integrations/olostep/LICENSE.txt
new file mode 100644
index 0000000000..6134ab324f
--- /dev/null
+++ b/integrations/olostep/LICENSE.txt
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright 2023-present deepset GmbH
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/integrations/olostep/README.md b/integrations/olostep/README.md
new file mode 100644
index 0000000000..82edc25144
--- /dev/null
+++ b/integrations/olostep/README.md
@@ -0,0 +1,12 @@
+# olostep-haystack
+
+[](https://pypi.org/project/olostep-haystack)
+[](https://pypi.org/project/olostep-haystack)
+
+- [Changelog](https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/olostep/CHANGELOG.md)
+
+---
+
+## Contributing
+
+Refer to the general [Contribution Guidelines](https://github.com/deepset-ai/haystack-core-integrations/blob/main/CONTRIBUTING.md).
diff --git a/integrations/olostep/olostep.md b/integrations/olostep/olostep.md
new file mode 100644
index 0000000000..a2628a7e7d
--- /dev/null
+++ b/integrations/olostep/olostep.md
@@ -0,0 +1,119 @@
+---
+title: "Olostep"
+id: integrations-olostep
+description: "Olostep integration for Haystack"
+slug: "/integrations-olostep"
+---
+
+
+## olostep_haystack.fetcher
+
+### OlostepFetcherError
+
+Bases: Exception
+
+Raised when Olostep fetching fails.
+
+### OlostepFetcher
+
+Fetch and convert web pages to Markdown using Olostep's scrape API.
+
+Uses SyncOlostepClient (the current Olostep Python SDK).
+Do NOT use the legacy Olostep class with client.scrapes.create().
+
+Usage:
+from olostep_haystack import OlostepFetcher
+fetcher = OlostepFetcher(api_key=Secret.from_env_var("OLOSTEP_API_KEY"))
+result = fetcher.run(urls=["https://example.com"])
+\# result["documents"] -> List[Document]
+
+#### run
+
+```python
+run(urls: list[str]) -> dict[str, Any]
+```
+
+Fetch one or more URLs and return their content as Documents.
+
+**Parameters:**
+
+- **urls** (list\[str\]) – list of URLs to scrape
+
+**Returns:**
+
+- dict\[str, Any\] – dict with 'documents' (List[Document])
+
+**Raises:**
+
+- OlostepFetcherError – on API failure
+
+#### to_dict
+
+```python
+to_dict() -> dict[str, Any]
+```
+
+Serialize the component to a dictionary.
+
+#### from_dict
+
+```python
+from_dict(data: dict[str, Any]) -> OlostepFetcher
+```
+
+Deserialize a component from a dictionary.
+
+## olostep_haystack.web_search
+
+### OlostepSearchError
+
+Bases: Exception
+
+Raised when Olostep search fails.
+
+### OlostepWebSearch
+
+Search the web using Olostep's /searches endpoint.
+
+Usage:
+from olostep_haystack import OlostepWebSearch
+search = OlostepWebSearch(api_key=Secret.from_env_var("OLOSTEP_API_KEY"))
+result = search.run(query="what is haystack?")
+\# result["documents"] -> List[Document]
+\# result["links"] -> List[str]
+
+#### run
+
+```python
+run(query: str) -> dict[str, Any]
+```
+
+Search the web using Olostep.
+
+**Parameters:**
+
+- **query** (str) – the search query string
+
+**Returns:**
+
+- dict\[str, Any\] – dict with 'documents' (List[Document]) and 'links' (List[str])
+
+**Raises:**
+
+- OlostepSearchError – on API failure
+
+#### to_dict
+
+```python
+to_dict() -> dict[str, Any]
+```
+
+Serialize the component to a dictionary.
+
+#### from_dict
+
+```python
+from_dict(data: dict[str, Any]) -> OlostepWebSearch
+```
+
+Deserialize a component from a dictionary.
diff --git a/integrations/olostep/pydoc/config_docusaurus.yml b/integrations/olostep/pydoc/config_docusaurus.yml
new file mode 100644
index 0000000000..160c195852
--- /dev/null
+++ b/integrations/olostep/pydoc/config_docusaurus.yml
@@ -0,0 +1,14 @@
+loaders:
+ - modules:
+ - olostep_haystack.web_search
+ - olostep_haystack.fetcher
+ search_path: [../src]
+processors:
+ - type: filter
+ documented_only: true
+ skip_empty_modules: true
+renderer:
+ description: Olostep integration for Haystack
+ id: integrations-olostep
+ filename: olostep.md
+ title: Olostep
diff --git a/integrations/olostep/pyproject.toml b/integrations/olostep/pyproject.toml
new file mode 100644
index 0000000000..a9a832705d
--- /dev/null
+++ b/integrations/olostep/pyproject.toml
@@ -0,0 +1,164 @@
+[build-system]
+requires = ["hatchling", "hatch-vcs"]
+build-backend = "hatchling.build"
+
+[project]
+name = "olostep-haystack"
+dynamic = ["version"]
+description = "Haystack components for Olostep web search and URL scraping"
+readme = "README.md"
+requires-python = ">=3.11"
+license = "Apache-2.0"
+keywords = ["olostep", "web-search", "scraping", "haystack"]
+authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }]
+classifiers = [
+ "License :: OSI Approved :: Apache Software License",
+ "Development Status :: 4 - Beta",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Programming Language :: Python :: 3.13",
+ "Programming Language :: Python :: 3.14",
+ "Programming Language :: Python :: Implementation :: CPython",
+ "Programming Language :: Python :: Implementation :: PyPy",
+]
+dependencies = ["haystack-ai>=2.24.1", "olostep>=0.1.0", "requests>=2.28.0"]
+
+[project.urls]
+Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/olostep#readme"
+Issues = "https://github.com/deepset-ai/haystack-core-integrations/issues"
+Source = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/olostep"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/haystack_integrations", "src/olostep_haystack"]
+
+[tool.hatch.version]
+source = "vcs"
+tag-pattern = 'integrations\/olostep-v(?P.*)'
+
+[tool.hatch.version.raw-options]
+root = "../.."
+git_describe_command = 'git describe --tags --match="integrations/olostep-v[0-9]*"'
+
+[tool.hatch.envs.default]
+installer = "uv"
+dependencies = ["haystack-pydoc-tools", "ruff"]
+
+[tool.hatch.envs.default.scripts]
+docs = ["haystack-pydoc pydoc/config_docusaurus.yml"]
+fmt = "ruff check --fix {args}; ruff format {args}"
+fmt-check = "ruff check {args} && ruff format --check {args}"
+
+[tool.hatch.envs.test]
+dependencies = [
+ "pytest",
+ "pytest-asyncio",
+ "pytest-cov",
+ "pytest-rerunfailures",
+ "mypy",
+ "pip",
+]
+
+[tool.hatch.envs.test.scripts]
+unit = 'pytest -m "not integration" {args:tests}'
+integration = 'pytest -m "integration" {args:tests}'
+all = 'pytest {args:tests}'
+unit-cov-retry = 'pytest --cov=olostep_haystack --reruns 3 --reruns-delay 30 -x -m "not integration" {args:tests}'
+integration-cov-append-retry = 'pytest --cov=olostep_haystack --cov-append --reruns 3 --reruns-delay 30 -x -m "integration" {args:tests}'
+types = "mypy -p olostep_haystack {args}"
+
+[tool.mypy]
+install_types = true
+non_interactive = true
+check_untyped_defs = true
+disallow_incomplete_defs = true
+
+[[tool.mypy.overrides]]
+module = ["olostep.*"]
+ignore_missing_imports = true
+
+[tool.ruff]
+line-length = 120
+
+[tool.ruff.lint]
+select = [
+ "A",
+ "ANN",
+ "ARG",
+ "B",
+ "C",
+ "D102", # Missing docstring in public method
+ "D103", # Missing docstring in public function
+ "D205", # 1 blank line required between summary line and description
+ "D209", # Closing triple quotes go to new line
+ "D213", # summary lines must be positioned on the second physical line of the docstring
+ "D417", # Missing argument descriptions in the docstring
+ "D419", # Docstring is empty
+ "DTZ",
+ "E",
+ "EM",
+ "F",
+ "I",
+ "ICN",
+ "ISC",
+ "N",
+ "PLC",
+ "PLE",
+ "PLR",
+ "PLW",
+ "Q",
+ "RUF",
+ "S",
+ "T",
+ "TID",
+ "UP",
+ "W",
+ "YTT",
+]
+ignore = [
+ # Allow non-abstract empty methods in abstract base classes
+ "B027",
+ # Allow function calls in argument defaults (common Haystack pattern for Secret.from_env_var)
+ "B008",
+ # Ignore checks for possible passwords
+ "S105",
+ "S106",
+ "S107",
+ # Ignore complexity
+ "C901",
+ "PLR0911",
+ "PLR0912",
+ "PLR0913",
+ "PLR0915",
+ # Allow `Any` type - used legitimately for dynamic types and SDK boundaries
+ "ANN401",
+]
+
+[tool.ruff.lint.isort]
+known-first-party = ["haystack_integrations"]
+
+[tool.ruff.lint.flake8-tidy-imports]
+ban-relative-imports = "parents"
+
+[tool.ruff.lint.per-file-ignores]
+# Tests can use magic values, assertions, relative imports, and don't need type annotations
+"tests/**/*" = ["PLR2004", "S101", "TID252", "D", "ANN"]
+
+[tool.coverage.run]
+source = ["olostep_haystack"]
+branch = true
+parallel = false
+relative_files = true
+
+[tool.coverage.report]
+omit = ["*/tests/*", "*/__init__.py"]
+show_missing = true
+exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"]
+
+[tool.pytest.ini_options]
+addopts = "--strict-markers"
+markers = [
+ "integration: integration tests",
+]
+log_cli = true
+asyncio_default_fixture_loop_scope = "function"
diff --git a/integrations/olostep/src/haystack_integrations/components/tools/olostep/__init__.py b/integrations/olostep/src/haystack_integrations/components/tools/olostep/__init__.py
new file mode 100644
index 0000000000..c1764a6e03
--- /dev/null
+++ b/integrations/olostep/src/haystack_integrations/components/tools/olostep/__init__.py
@@ -0,0 +1,3 @@
+# SPDX-FileCopyrightText: 2022-present deepset GmbH
+#
+# SPDX-License-Identifier: Apache-2.0
diff --git a/integrations/olostep/src/haystack_integrations/components/tools/py.typed b/integrations/olostep/src/haystack_integrations/components/tools/py.typed
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/integrations/olostep/src/olostep_haystack/__init__.py b/integrations/olostep/src/olostep_haystack/__init__.py
new file mode 100644
index 0000000000..9041e260ca
--- /dev/null
+++ b/integrations/olostep/src/olostep_haystack/__init__.py
@@ -0,0 +1,8 @@
+# SPDX-FileCopyrightText: 2024-present Olostep
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from olostep_haystack.fetcher import OlostepFetcher
+from olostep_haystack.web_search import OlostepWebSearch
+
+__all__ = ["OlostepFetcher", "OlostepWebSearch"]
diff --git a/integrations/olostep/src/olostep_haystack/fetcher.py b/integrations/olostep/src/olostep_haystack/fetcher.py
new file mode 100644
index 0000000000..45082b2c53
--- /dev/null
+++ b/integrations/olostep/src/olostep_haystack/fetcher.py
@@ -0,0 +1,93 @@
+# SPDX-FileCopyrightText: 2024-present Olostep
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+from typing import Any
+
+from haystack import Document, component, default_from_dict, default_to_dict
+from haystack.utils import Secret, deserialize_secrets_inplace
+
+logger = logging.getLogger(__name__)
+
+
+class OlostepFetcherError(Exception):
+ """Raised when Olostep fetching fails."""
+
+
+@component
+class OlostepFetcher:
+ """
+ Fetch and convert web pages to Markdown using Olostep's scrape API.
+
+ Uses SyncOlostepClient (the current Olostep Python SDK).
+ Do NOT use the legacy Olostep class with client.scrapes.create().
+
+ Usage:
+ from olostep_haystack import OlostepFetcher
+ fetcher = OlostepFetcher(api_key=Secret.from_env_var("OLOSTEP_API_KEY"))
+ result = fetcher.run(urls=["https://example.com"])
+ # result["documents"] -> List[Document]
+ """
+
+ def __init__(
+ self,
+ api_key: Secret = Secret.from_env_var("OLOSTEP_API_KEY"),
+ format: str = "markdown", # noqa: A002
+ ) -> None:
+ if format not in ("markdown", "html"):
+ msg = "format must be 'markdown' or 'html'"
+ raise ValueError(msg)
+ self.api_key = api_key
+ self.format = format
+
+ @component.output_types(documents=list[Document])
+ def run(self, urls: list[str]) -> dict[str, Any]:
+ """
+ Fetch one or more URLs and return their content as Documents.
+
+ :param urls: list of URLs to scrape
+ :returns: dict with 'documents' (List[Document])
+ :raises OlostepFetcherError: on API failure
+ """
+ from olostep import SyncOlostepClient # noqa: PLC0415
+ from olostep.errors import Olostep_BaseError, OlostepServerError_AuthFailed # noqa: PLC0415
+
+ resolved_key = self.api_key.resolve_value()
+ if not resolved_key:
+ msg = "OLOSTEP_API_KEY is not set. Set it in your environment or pass it explicitly."
+ raise OlostepFetcherError(msg)
+
+ client = SyncOlostepClient(api_key=resolved_key)
+ documents: list[Document] = []
+
+ for url in urls:
+ try:
+ scrape_result = client.scrape(url)
+ content_obj = scrape_result.retrieve([self.format])
+ content = content_obj.markdown_content if self.format == "markdown" else content_obj.html_content
+ if content:
+ documents.append(Document(content=content, meta={"url": url}))
+ else:
+ logger.warning("Olostep returned no %s content for %s", self.format, url)
+ except OlostepServerError_AuthFailed as e:
+ msg = "Olostep authentication failed — check your API key."
+ raise OlostepFetcherError(msg) from e
+ except Olostep_BaseError as e:
+ logger.warning("Olostep error for %s: %s", url, e)
+
+ return {"documents": documents}
+
+ def to_dict(self) -> dict[str, Any]:
+ """Serialize the component to a dictionary."""
+ return default_to_dict(
+ self,
+ api_key=self.api_key.to_dict(),
+ format=self.format,
+ )
+
+ @classmethod
+ def from_dict(cls, data: dict[str, Any]) -> "OlostepFetcher":
+ """Deserialize a component from a dictionary."""
+ deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
+ return default_from_dict(cls, data)
diff --git a/integrations/olostep/src/olostep_haystack/py.typed b/integrations/olostep/src/olostep_haystack/py.typed
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/integrations/olostep/src/olostep_haystack/web_search.py b/integrations/olostep/src/olostep_haystack/web_search.py
new file mode 100644
index 0000000000..1bc8dfaebb
--- /dev/null
+++ b/integrations/olostep/src/olostep_haystack/web_search.py
@@ -0,0 +1,114 @@
+# SPDX-FileCopyrightText: 2024-present Olostep
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+from typing import Any
+
+import requests
+from haystack import Document, component, default_from_dict, default_to_dict
+from haystack.utils import Secret, deserialize_secrets_inplace
+
+logger = logging.getLogger(__name__)
+
+
+class OlostepSearchError(Exception):
+ """Raised when Olostep search fails."""
+
+
+@component
+class OlostepWebSearch:
+ """
+ Search the web using Olostep's /searches endpoint.
+
+ Usage:
+ from olostep_haystack import OlostepWebSearch
+ search = OlostepWebSearch(api_key=Secret.from_env_var("OLOSTEP_API_KEY"))
+ result = search.run(query="what is haystack?")
+ # result["documents"] -> List[Document]
+ # result["links"] -> List[str]
+ """
+
+ def __init__(
+ self,
+ api_key: Secret = Secret.from_env_var("OLOSTEP_API_KEY"),
+ top_k: int = 5,
+ allowed_domains: list[str] | None = None,
+ search_params: dict[str, Any] | None = None,
+ ) -> None:
+ self.api_key = api_key
+ self.top_k = top_k
+ self.allowed_domains = allowed_domains or []
+ self.search_params = search_params or {}
+
+ @component.output_types(documents=list[Document], links=list[str])
+ def run(self, query: str) -> dict[str, Any]:
+ """
+ Search the web using Olostep.
+
+ :param query: the search query string
+ :returns: dict with 'documents' (List[Document]) and 'links' (List[str])
+ :raises OlostepSearchError: on API failure
+ """
+ resolved_key = self.api_key.resolve_value()
+ if not resolved_key:
+ msg = "OLOSTEP_API_KEY is not set. Set it in your environment or pass it explicitly."
+ raise OlostepSearchError(msg)
+
+ try:
+ response = requests.post(
+ "https://api.olostep.com/v1/searches",
+ headers={
+ "Authorization": f"Bearer {resolved_key}",
+ "Content-Type": "application/json",
+ },
+ json={"query": query, **self.search_params},
+ timeout=30,
+ )
+ response.raise_for_status()
+ except requests.HTTPError as e:
+ msg = f"Olostep /searches request failed: {e.response.status_code} {e.response.text}"
+ raise OlostepSearchError(msg) from e
+ except requests.RequestException as e:
+ msg = f"Olostep /searches network error: {e}"
+ raise OlostepSearchError(msg) from e
+
+ data = response.json()
+ links_data = data.get("result", {}).get("links", [])
+
+ if self.allowed_domains:
+ links_data = [
+ link for link in links_data if any(domain in link.get("url", "") for domain in self.allowed_domains)
+ ]
+
+ links_data = links_data[: self.top_k]
+
+ documents = [
+ Document(
+ content=link.get("description", ""),
+ meta={
+ "title": link.get("title", ""),
+ "link": link.get("url", ""),
+ },
+ )
+ for link in links_data
+ ]
+ links = [link.get("url", "") for link in links_data]
+
+ return {"documents": documents, "links": links}
+
+ def to_dict(self) -> dict[str, Any]:
+ """Serialize the component to a dictionary."""
+ return default_to_dict(
+ self,
+ api_key=self.api_key.to_dict(),
+ top_k=self.top_k,
+ allowed_domains=self.allowed_domains,
+ search_params=self.search_params,
+ )
+
+ @classmethod
+ def from_dict(cls, data: dict[str, Any]) -> "OlostepWebSearch":
+ """Deserialize a component from a dictionary."""
+ deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
+ return default_from_dict(cls, data)
diff --git a/integrations/olostep/tests/__init__.py b/integrations/olostep/tests/__init__.py
new file mode 100644
index 0000000000..c1764a6e03
--- /dev/null
+++ b/integrations/olostep/tests/__init__.py
@@ -0,0 +1,3 @@
+# SPDX-FileCopyrightText: 2022-present deepset GmbH
+#
+# SPDX-License-Identifier: Apache-2.0
diff --git a/integrations/olostep/tests/test_olostep_fetcher.py b/integrations/olostep/tests/test_olostep_fetcher.py
new file mode 100644
index 0000000000..092e73c469
--- /dev/null
+++ b/integrations/olostep/tests/test_olostep_fetcher.py
@@ -0,0 +1,76 @@
+# SPDX-FileCopyrightText: 2024-present Olostep
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import logging
+import os
+from unittest.mock import MagicMock, patch
+
+import pytest
+from haystack.utils import Secret
+from olostep.errors import OlostepServerError_AuthFailed
+
+from olostep_haystack.fetcher import OlostepFetcher, OlostepFetcherError
+
+
+class TestOlostepFetcher:
+ def test_run_returns_documents(self):
+ mock_client = MagicMock()
+ mock_scrape_result = MagicMock()
+ mock_content = MagicMock()
+ mock_content.markdown_content = "# Hello World"
+ mock_scrape_result.retrieve.return_value = mock_content
+ mock_client.scrape.return_value = mock_scrape_result
+
+ with patch("olostep.SyncOlostepClient", return_value=mock_client, create=True):
+ fetcher = OlostepFetcher(api_key=Secret.from_token("test-key"))
+ result = fetcher.run(urls=["https://example.com"])
+
+ assert len(result["documents"]) == 1
+ assert result["documents"][0].content == "# Hello World"
+ assert result["documents"][0].meta["url"] == "https://example.com"
+
+ def test_run_empty_content_logs_warning(self, caplog):
+ mock_client = MagicMock()
+ mock_scrape_result = MagicMock()
+ mock_content = MagicMock()
+ mock_content.markdown_content = None
+ mock_scrape_result.retrieve.return_value = mock_content
+ mock_client.scrape.return_value = mock_scrape_result
+
+ with patch("olostep.SyncOlostepClient", return_value=mock_client, create=True):
+ fetcher = OlostepFetcher(api_key=Secret.from_token("test-key"))
+ with caplog.at_level(logging.WARNING, logger="olostep_haystack.fetcher"):
+ result = fetcher.run(urls=["https://example.com"])
+
+ assert result["documents"] == []
+ assert "Olostep returned no markdown content for https://example.com" in caplog.text
+
+ def test_auth_error_raises(self):
+ mock_client = MagicMock()
+ mock_client.scrape.side_effect = OlostepServerError_AuthFailed("auth failed")
+
+ with patch("olostep.SyncOlostepClient", return_value=mock_client, create=True):
+ fetcher = OlostepFetcher(api_key=Secret.from_token("test-key"))
+ with pytest.raises(OlostepFetcherError, match="authentication failed"):
+ fetcher.run(urls=["https://example.com"])
+
+ def test_to_dict_from_dict_round_trip(self, monkeypatch):
+ monkeypatch.setenv("OLOSTEP_API_KEY", "test-key")
+ fetcher = OlostepFetcher(api_key=Secret.from_env_var("OLOSTEP_API_KEY"), format="markdown")
+
+ data = fetcher.to_dict()
+ restored = OlostepFetcher.from_dict(data)
+
+ assert restored.format == "markdown"
+ assert restored.api_key.resolve_value() == "test-key"
+
+ @pytest.mark.skipif(
+ not os.environ.get("OLOSTEP_API_KEY"),
+ reason="Export OLOSTEP_API_KEY to run integration tests.",
+ )
+ @pytest.mark.integration
+ def test_run_integration(self):
+ fetcher = OlostepFetcher(api_key=Secret.from_env_var("OLOSTEP_API_KEY"), format="markdown")
+ result = fetcher.run(urls=["https://example.com"])
+ assert len(result["documents"]) > 0
diff --git a/integrations/olostep/tests/test_olostep_web_search.py b/integrations/olostep/tests/test_olostep_web_search.py
new file mode 100644
index 0000000000..1b6b70afb4
--- /dev/null
+++ b/integrations/olostep/tests/test_olostep_web_search.py
@@ -0,0 +1,86 @@
+# SPDX-FileCopyrightText: 2024-present Olostep
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+from unittest.mock import MagicMock, patch
+
+import pytest
+from haystack import Document
+from haystack.utils import Secret
+
+from olostep_haystack.web_search import OlostepSearchError, OlostepWebSearch
+
+MOCK_RESPONSE = {
+ "result": {
+ "links": [
+ {"url": "https://example.com", "title": "Example", "description": "An example site"},
+ {"url": "https://another.com", "title": "Another", "description": "Another site"},
+ ]
+ }
+}
+
+
+class TestOlostepWebSearch:
+ def test_run_returns_documents_and_links(self):
+ ws = OlostepWebSearch(api_key=Secret.from_token("test-key"), top_k=5)
+
+ mock_response = MagicMock()
+ mock_response.json.return_value = MOCK_RESPONSE
+
+ with patch("olostep_haystack.web_search.requests.post", return_value=mock_response):
+ result = ws.run(query="test")
+
+ assert len(result["documents"]) == 2
+ assert isinstance(result["documents"][0], Document)
+ assert result["documents"][0].content == "An example site"
+ assert result["documents"][0].meta["title"] == "Example"
+ assert result["documents"][0].meta["link"] == "https://example.com"
+ assert result["links"] == ["https://example.com", "https://another.com"]
+
+ def test_run_top_k_limits_results(self):
+ ws = OlostepWebSearch(api_key=Secret.from_token("test-key"), top_k=1)
+
+ mock_response = MagicMock()
+ mock_response.json.return_value = MOCK_RESPONSE
+
+ with patch("olostep_haystack.web_search.requests.post", return_value=mock_response):
+ result = ws.run(query="test")
+
+ assert len(result["documents"]) == 1
+ assert result["links"] == ["https://example.com"]
+
+ def test_missing_api_key_raises(self):
+ ws = OlostepWebSearch(api_key=Secret.from_env_var("OLOSTEP_API_KEY", strict=False))
+
+ with pytest.raises(OlostepSearchError, match="OLOSTEP_API_KEY is not set"):
+ ws.run(query="test")
+
+ def test_to_dict_from_dict_round_trip(self, monkeypatch):
+ monkeypatch.setenv("OLOSTEP_API_KEY", "test-key")
+ ws = OlostepWebSearch(
+ api_key=Secret.from_env_var("OLOSTEP_API_KEY"),
+ top_k=3,
+ allowed_domains=["example.com"],
+ search_params={"foo": "bar"},
+ )
+
+ data = ws.to_dict()
+ restored = OlostepWebSearch.from_dict(data)
+
+ assert restored.top_k == 3
+ assert restored.allowed_domains == ["example.com"]
+ assert restored.search_params == {"foo": "bar"}
+ assert restored.api_key.resolve_value() == "test-key"
+
+ @pytest.mark.skipif(
+ not os.environ.get("OLOSTEP_API_KEY"),
+ reason="Export OLOSTEP_API_KEY to run integration tests.",
+ )
+ @pytest.mark.integration
+ def test_run_integration(self):
+ ws = OlostepWebSearch(api_key=Secret.from_env_var("OLOSTEP_API_KEY"), top_k=3)
+ result = ws.run(query="What is Haystack by deepset?")
+ assert len(result["documents"]) > 0
+ assert len(result["links"]) > 0
+ assert isinstance(result["documents"][0], Document)