Skip to content

Commit 9cc4fc5

Browse files
committed
Address Vespa integration review feedback
1 parent 553ac99 commit 9cc4fc5

23 files changed

Lines changed: 848 additions & 383 deletions

.github/workflows/vespa.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ concurrency:
2929
env:
3030
PYTHONUNBUFFERED: "1"
3131
FORCE_COLOR: "1"
32-
TEST_MATRIX_OS: '["ubuntu-latest", "windows-latest", "macos-latest"]'
32+
TEST_MATRIX_OS: '["ubuntu-latest"]'
3333
TEST_MATRIX_PYTHON: '["3.10", "3.14"]'
3434

3535
jobs:
@@ -82,6 +82,9 @@ jobs:
8282
- name: Run unit tests
8383
run: hatch run test:unit-cov-retry
8484

85+
- name: Run Vespa container
86+
run: docker compose up -d --wait
87+
8588
# On PR: generates coverage comment artifact. On push to main: stores coverage baseline on data branch.
8689
- name: Store unit tests coverage
8790
if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name != 'schedule'
@@ -95,6 +98,9 @@ jobs:
9598
MINIMUM_ORANGE: 60
9699

97100
- name: Run integration tests
101+
env:
102+
VESPA_RUN_INTEGRATION_TESTS: "1"
103+
VESPA_URL: "http://localhost"
98104
run: hatch run test:integration-cov-append-retry
99105

100106
- name: Store combined coverage

integrations/vespa/CHANGELOG.md

Lines changed: 0 additions & 5 deletions
This file was deleted.

integrations/vespa/README.md

Lines changed: 4 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3,41 +3,12 @@
33
[![PyPI - Version](https://img.shields.io/pypi/v/vespa-haystack.svg)](https://pypi.org/project/vespa-haystack)
44
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/vespa-haystack.svg)](https://pypi.org/project/vespa-haystack)
55

6-
- [Changelog](https://github.com/deepset-ai/haystack-core-integrations/blob/main/integrations/vespa/CHANGELOG.md)
7-
6+
- [Integration page](https://haystack.deepset.ai/integrations/vespa)
87
---
98

10-
`vespa-haystack` provides a Haystack `DocumentStore` plus keyword and embedding retrievers for
11-
[Vespa](https://vespa.ai/).
12-
13-
This integration assumes you already have a Vespa application and schema running. The document store
14-
connects to that existing setup and lets you write documents and query them from Haystack pipelines.
15-
16-
## Examples
17-
18-
- [Keyword retrieval example](examples/keyword_retrieval.py)
19-
- [Embedding retrieval example](examples/embedding_retrieval.py)
20-
21-
## Local Smoke Test
22-
23-
To verify the integration against a real local Vespa instance, start Docker Desktop and run:
24-
25-
```bash
26-
hatch run python scripts/local_keyword_smoke_test.py
27-
```
28-
29-
This deploys a minimal Vespa application locally, writes three documents, runs a direct filter query,
30-
and checks keyword retrieval through `VespaKeywordRetriever`.
31-
32-
## Notes
33-
34-
- Set `VESPA_URL` to your Vespa endpoint before running the examples.
35-
- Make sure your Vespa schema field names match the ones you pass into `VespaDocumentStore`.
36-
- Vespa document IDs are written through the Vespa document path (`data_id`). The optional `id_field`
37-
is only used when a query response also exposes an explicit id field.
38-
- For embedding retrieval, your Vespa schema must already include a tensor field and a ranking profile
39-
compatible with nearest-neighbor search. The example assumes a ranking profile named `semantic`.
40-
419
## Contributing
4210

4311
Refer to the general [Contribution Guidelines](https://github.com/deepset-ai/haystack-core-integrations/blob/main/CONTRIBUTING.md).
12+
13+
To run integration tests locally, you need a Docker container running Vespa with the bundled application deployed.
14+
Use the provided `docker-compose.yml` file to start the container: `docker compose up -d`.
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
services:
2+
vespa:
3+
image: vespaengine/vespa:latest
4+
container_name: vespa
5+
hostname: vespa
6+
ports:
7+
- "8080:8080"
8+
- "19071:19071"
9+
volumes:
10+
- ./vespa_app:/vespa_app:ro
11+
healthcheck:
12+
test: ["CMD", "curl", "-f", "http://localhost:19071/ApplicationStatus"]
13+
interval: 10s
14+
timeout: 5s
15+
retries: 30

integrations/vespa/examples/embedding_retrieval.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@
5454
VespaEmbeddingRetriever(
5555
document_store=document_store,
5656
top_k=2,
57-
ranking="semantic",
5857
query_tensor_name="query_embedding",
5958
),
6059
)

integrations/vespa/examples/keyword_retrieval.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
# This example assumes your Vespa schema already exists and contains:
2222
# - a text field named `content`
2323
# - metadata fields `category` and `author`
24+
# - a ranking profile named `bm25`
2425
document_store = VespaDocumentStore(
2526
schema="doc",
2627
namespace="doc",

integrations/vespa/pydoc/config_docusaurus.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
loaders:
22
- modules:
3+
- haystack_integrations.components.retrievers.vespa.keyword_retriever
4+
- haystack_integrations.components.retrievers.vespa.embedding_retriever
35
- haystack_integrations.document_stores.vespa.document_store
6+
- haystack_integrations.document_stores.vespa.filters
47
search_path: [../src]
58
processors:
69
- type: filter

integrations/vespa/pyproject.toml

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,20 @@ build-backend = "hatchling.build"
55
[project]
66
name = "vespa-haystack"
77
dynamic = ["version"]
8-
description = "Haystack integration for vespa"
8+
description = "Haystack integration for Vespa"
99
readme = "README.md"
1010
requires-python = ">=3.10"
1111
license = "Apache-2.0"
12-
keywords = []
12+
keywords = [
13+
"haystack",
14+
"vespa",
15+
"document-store",
16+
"retriever",
17+
"search",
18+
"retrieval",
19+
"rag",
20+
"vector-search",
21+
]
1322
authors = [{ name = "deepset GmbH", email = "info@deepset.ai" }]
1423
classifiers = [
1524
"License :: OSI Approved :: Apache Software License",
@@ -23,7 +32,7 @@ classifiers = [
2332
"Programming Language :: Python :: Implementation :: CPython",
2433
"Programming Language :: Python :: Implementation :: PyPy",
2534
]
26-
dependencies = ["haystack-ai", "pyvespa"]
35+
dependencies = ["haystack-ai>=2.28.0", "pyvespa>=0.58.0"]
2736

2837
[project.urls]
2938
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/vespa#readme"
@@ -119,7 +128,7 @@ select = [
119128
ignore = [
120129
# Allow non-abstract empty methods in abstract base classes
121130
"B027",
122-
# Allow function calls in argument defaults (common Haystack pattern for Secret.from_env_var)
131+
# Allow function calls in argument defaults
123132
"B008",
124133
# Ignore checks for possible passwords
125134
"S105",

integrations/vespa/scripts/local_keyword_smoke_test.py

Lines changed: 0 additions & 110 deletions
This file was deleted.

0 commit comments

Comments
 (0)