Skip to content

Commit a8a385f

Browse files
Merge origin/main into feat/docling-serve-integration
2 parents b3d3f95 + f5ede63 commit a8a385f

466 files changed

Lines changed: 39797 additions & 5927 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/labeler.yml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
# Integrations
2+
integration:alloydb:
3+
- changed-files:
4+
- any-glob-to-any-file: "integrations/alloydb/**/*"
5+
- any-glob-to-any-file: ".github/workflows/alloydb.yml"
6+
27
integration:aimlapi:
38
- changed-files:
49
- any-glob-to-any-file: "integrations/aimlapi/**/*"
@@ -39,6 +44,11 @@ integration:azure-doc-intelligence:
3944
- any-glob-to-any-file: "integrations/azure_doc_intelligence/**/*"
4045
- any-glob-to-any-file: ".github/workflows/azure_doc_intelligence.yml"
4146

47+
integration:chonkie:
48+
- changed-files:
49+
- any-glob-to-any-file: "integrations/chonkie/**/*"
50+
- any-glob-to-any-file: ".github/workflows/chonkie.yml"
51+
4252
integration:chroma:
4353
- changed-files:
4454
- any-glob-to-any-file: "integrations/chroma/**/*"
@@ -69,6 +79,16 @@ integration:docling-serve:
6979
- any-glob-to-any-file: "integrations/docling_serve/**/*"
7080
- any-glob-to-any-file: ".github/workflows/docling_serve.yml"
7181

82+
integration:dspy:
83+
- changed-files:
84+
- any-glob-to-any-file: "integrations/dspy/**/*"
85+
- any-glob-to-any-file: ".github/workflows/dspy.yml"
86+
87+
integration:e2b:
88+
- changed-files:
89+
- any-glob-to-any-file: "integrations/e2b/**/*"
90+
- any-glob-to-any-file: ".github/workflows/e2b.yml"
91+
7292
integration:elasticsearch:
7393
- changed-files:
7494
- any-glob-to-any-file: "integrations/elasticsearch/**/*"
@@ -80,6 +100,11 @@ integration:faiss:
80100
- any-glob-to-any-file: ".github/workflows/faiss.yml"
81101

82102

103+
integration:falkordb:
104+
- changed-files:
105+
- any-glob-to-any-file: "integrations/falkordb/**/*"
106+
- any-glob-to-any-file: ".github/workflows/falkordb.yml"
107+
83108
integration:fastembed:
84109
- changed-files:
85110
- any-glob-to-any-file: "integrations/fastembed/**/*"
@@ -208,6 +233,11 @@ integration:pgvector:
208233
- any-glob-to-any-file: "integrations/pgvector/**/*"
209234
- any-glob-to-any-file: ".github/workflows/pgvector.yml"
210235

236+
integration:presidio:
237+
- changed-files:
238+
- any-glob-to-any-file: "integrations/presidio/**/*"
239+
- any-glob-to-any-file: ".github/workflows/presidio.yml"
240+
211241
integration:pinecone:
212242
- changed-files:
213243
- any-glob-to-any-file: "integrations/pinecone/**/*"
@@ -233,11 +263,21 @@ integration:snowflake:
233263
- any-glob-to-any-file: "integrations/snowflake/**/*"
234264
- any-glob-to-any-file: ".github/workflows/snowflake.yml"
235265

266+
integration:sqlalchemy:
267+
- changed-files:
268+
- any-glob-to-any-file: "integrations/sqlalchemy/**/*"
269+
- any-glob-to-any-file: ".github/workflows/sqlalchemy.yml"
270+
236271
integration:stackit:
237272
- changed-files:
238273
- any-glob-to-any-file: "integrations/stackit/**/*"
239274
- any-glob-to-any-file: ".github/workflows/stackit.yml"
240275

276+
integration:supabase:
277+
- changed-files:
278+
- any-glob-to-any-file: "integrations/supabase/**/*"
279+
- any-glob-to-any-file: ".github/workflows/supabase.yml"
280+
241281
integration:tavily:
242282
- changed-files:
243283
- any-glob-to-any-file: "integrations/tavily/**/*"
@@ -258,6 +298,11 @@ integration:valkey:
258298
- any-glob-to-any-file: "integrations/valkey/**/*"
259299
- any-glob-to-any-file: ".github/workflows/valkey.yml"
260300

301+
integration:vllm:
302+
- changed-files:
303+
- any-glob-to-any-file: "integrations/vllm/**/*"
304+
- any-glob-to-any-file: ".github/workflows/vllm.yml"
305+
261306
integration:watsonx:
262307
- changed-files:
263308
- any-glob-to-any-file: "integrations/watsonx/**/*"

.github/utils/validate_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
# * integrations/<INTEGRATION_FOLDER_NAME>-v1.0.0
55
# * integrations/<INTEGRATION_FOLDER_NAME>-v1.0.0.post0 (for post-releases)
6-
INTEGRATION_VERSION_REGEX = r"integrations/([a-zA-Z_]+)-v([0-9]+\.[0-9]+\.[0-9]+(?:\.post[0-9]+)?)"
6+
INTEGRATION_VERSION_REGEX = r"integrations/([a-zA-Z0-9_]+)-v([0-9]+\.[0-9]+\.[0-9]+(?:\.post[0-9]+)?)"
77

88

99
def validate_version_number(tag: str):

.github/workflows/CI_check_api_ref.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ jobs:
101101
102102
- name: Set up Node.js
103103
if: steps.changed.outputs.integrations != '[]'
104-
uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0
104+
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
105105
with:
106106
node-version: "22"
107107

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
name: Core / Notify maintainers on fork PRs not running integration tests
2+
3+
on:
4+
pull_request_target:
5+
types: [opened, reopened, synchronize]
6+
7+
permissions:
8+
contents: read
9+
pull-requests: write
10+
11+
env:
12+
NON_TEST_SECRETS: "SLACK_WEBHOOK_URL_NOTIFICATIONS"
13+
14+
jobs:
15+
notify:
16+
if: github.event.pull_request.head.repo.fork == true
17+
runs-on: ubuntu-slim
18+
steps:
19+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
20+
with:
21+
ref: ${{ github.event.pull_request.base.sha }}
22+
23+
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
24+
with:
25+
python-version: "3.13"
26+
27+
- name: Detect integrations requiring API keys for integration tests
28+
id: affected
29+
shell: python
30+
env:
31+
GH_TOKEN: ${{ github.token }}
32+
PR_NUMBER: ${{ github.event.pull_request.number }}
33+
run: |
34+
import os
35+
import re
36+
import subprocess
37+
from pathlib import Path
38+
39+
WORKFLOWS_DIR = Path(".github/workflows")
40+
NON_TEST_SECRETS = set(os.environ["NON_TEST_SECRETS"].split())
41+
SECRET_REF = re.compile(r"secrets\.([A-Z0-9_]+)")
42+
43+
44+
def needs_api_key(integration: str) -> bool:
45+
wf = WORKFLOWS_DIR / f"{integration}.yml"
46+
if not wf.exists():
47+
return False
48+
referenced = set(SECRET_REF.findall(wf.read_text()))
49+
return bool(referenced - NON_TEST_SECRETS)
50+
51+
52+
# 1. PR file list
53+
paths = subprocess.check_output(
54+
[
55+
"gh", "pr", "view", os.environ["PR_NUMBER"],
56+
"--json", "files", "-q", ".files[].path",
57+
],
58+
text=True,
59+
).splitlines()
60+
61+
# 2. Integrations touched by this PR
62+
touched = set()
63+
for p in paths:
64+
parts = Path(p).parts
65+
if len(parts) >= 2 and parts[0] == "integrations":
66+
touched.add(parts[1])
67+
elif (
68+
len(parts) == 3
69+
and parts[0] == ".github"
70+
and parts[1] == "workflows"
71+
and parts[2].endswith(".yml")
72+
and not parts[2].startswith("CI_")
73+
):
74+
touched.add(parts[2].removesuffix(".yml"))
75+
76+
# 3. Of those, which need API keys
77+
affected = sorted(t for t in touched if needs_api_key(t))
78+
print(f"touched = {sorted(touched)}")
79+
print(f"affected = {affected}")
80+
81+
list_value = "\n".join(f"- {name}" for name in affected)
82+
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
83+
f.write(f"list<<EOF\n{list_value}\nEOF\n")
84+
85+
- name: Post or update sticky comment
86+
if: steps.affected.outputs.list != ''
87+
uses: marocchino/sticky-pull-request-comment@0ea0beb66eb9baf113663a64ec522f60e49231c0 # v3.0.4
88+
with:
89+
header: fork-pr-api-keys
90+
number: ${{ github.event.pull_request.number }}
91+
message: |
92+
**Heads-up for maintainers**
93+
94+
This PR is from a fork and touches integrations whose integration tests require API keys.
95+
Those tests are **skipped** in CI because fork PRs don't have access to repo secrets for security reasons.
96+
97+
Affected integrations:
98+
${{ steps.affected.outputs.list }}
99+
100+
Please run the integration tests locally (`hatch run test:integration` inside each folder) before approving.
101+
102+
- name: Remove stale comment
103+
if: steps.affected.outputs.list == ''
104+
uses: marocchino/sticky-pull-request-comment@0ea0beb66eb9baf113663a64ec522f60e49231c0 # v3.0.4
105+
with:
106+
header: fork-pr-api-keys
107+
number: ${{ github.event.pull_request.number }}
108+
delete: true

.github/workflows/CI_coverage_comment.yml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Add comment about test coverage to PRs
1+
name: Core / Add comment about test coverage to PRs
22

33
on:
44
workflow_run:
@@ -11,14 +11,17 @@ on:
1111
- "Test / astra"
1212
- "Test / azure_ai_search"
1313
- "Test / azure_doc_intelligence"
14+
- "Test / chonkie"
1415
- "Test / chroma"
1516
- "Test / cohere"
1617
- "Test / cometapi"
1718
- "Test / deepeval"
1819
- "Test / docling_serve"
1920
- "Test / dspy"
21+
- "Test / e2b"
2022
- "Test / elasticsearch"
2123
- "Test / faiss"
24+
- "Test / falkordb"
2225
- "Test / fastembed"
2326
- "Test / firecrawl"
2427
- "Test / github"
@@ -44,15 +47,19 @@ on:
4447
- "Test / paddleocr"
4548
- "Test / pgvector"
4649
- "Test / pinecone"
50+
- "Test / presidio"
4751
- "Test / pyversity"
4852
- "Test / qdrant"
4953
- "Test / ragas"
5054
- "Test / snowflake"
55+
- "Test / sqlalchemy"
5156
- "Test / stackit"
57+
- "Test / supabase"
5258
- "Test / tavily"
5359
- "Test / togetherai"
5460
- "Test / unstructured"
5561
- "Test / valkey"
62+
- "Test / vllm"
5663
- "Test / watsonx"
5764
- "Test / weave"
5865
- "Test / weaviate"
@@ -71,7 +78,7 @@ jobs:
7178
# Workflow names follow "Test / <name>" convention; normalize hyphens to underscores to match directory names
7279
echo "name=$(echo '${{ github.event.workflow_run.name }}' | sed 's/Test \/ //' | tr '-' '_')" >> "$GITHUB_OUTPUT"
7380
74-
- uses: py-cov-action/python-coverage-comment-action@7188638f871f721a365d644f505d1ff3df20d683 # v3.40
81+
- uses: py-cov-action/python-coverage-comment-action@63f52f4fbbffada6e8dee8ec432de7e01df9ba79 # v3.41
7582
with:
7683
GITHUB_TOKEN: ${{ github.token }}
7784
GITHUB_PR_RUN_ID: ${{ github.event.workflow_run.id }}

.github/workflows/CI_docusaurus_sync.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ jobs:
5151
run: hatch run docs
5252

5353
- name: Upload API reference artifact
54-
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
54+
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
5555
with:
5656
name: ${{ steps.pathfinder.outputs.integration_name }}
5757
path: ${{ steps.pathfinder.outputs.project_path }}/${{ steps.pathfinder.outputs.integration_name }}.md
@@ -105,7 +105,7 @@ jobs:
105105
os.remove(artifact_filename)
106106
107107
- name: Create Pull Request
108-
uses: peter-evans/create-pull-request@c0f553fe549906ede9cf27b5156039d195d2ece0 # v8.1.0
108+
uses: peter-evans/create-pull-request@5f6978faf089d4d20b00c7766989d076bb2fc7f1 # v8.1.1
109109
env:
110110
INTEGRATION_NAME: ${{ needs.generate-api-reference.outputs.integration_name }}
111111
with:

.github/workflows/CI_license_compliance.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,22 @@ name: Core / License Compliance
33
on:
44
pull_request:
55
paths:
6-
- "integrations/**/pyproject.toml"
7-
- ".github/workflows/CI_license_compliance.yml"
6+
- "integrations/**/pyproject.toml"
7+
- ".github/workflows/CI_license_compliance.yml"
88
# Since we test PRs, there is no need to run the workflow at each
99
# merge on `main`. Let's use a cron job instead.
1010
schedule:
1111
- cron: "0 0 * * *" # every day at midnight
1212

1313
env:
1414
PYTHON_VERSION: "3.10"
15-
EXCLUDE_PACKAGES: "(?i)^(azure-identity|fastembed|ragas|tqdm|psycopg|mistralai|pgvector).*"
15+
EXCLUDE_PACKAGES: "(?i)^(azure-identity|azure-search-documents|fastembed|tqdm|psycopg|mistralai|pgvector).*"
1616

1717
# Exclusions must be explicitly motivated
1818
#
1919
# - azure-identity is MIT but the license is not available on PyPI
20+
# - azure-search-documents is MIT but the license is not available on PyPI
2021
# - fastembed is Apache 2.0 but the license on PyPI is unclear ("Other/Proprietary License (Apache License)")
21-
# - ragas is Apache 2.0 but the license is not available on PyPI
2222
# - mistralai is Apache 2.0 but the license is not available on PyPI
2323
# - pgvector is MIT but the license is not available on PyPI
2424

@@ -43,7 +43,7 @@ jobs:
4343
- name: Get changed files (for pull requests only)
4444
if: ${{ github.event_name == 'pull_request'}}
4545
id: changed-files
46-
uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5
46+
uses: tj-actions/changed-files@9426d40962ed5378910ee2e21d5f8c6fcbf2dd96 # v47.0.6
4747
with:
4848
files_yaml: |
4949
pyproject:

.github/workflows/CI_project.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ jobs:
1010
name: Add new issues to project for triage
1111
runs-on: ubuntu-slim
1212
steps:
13-
- uses: actions/add-to-project@244f685bbc3b7adfa8466e08b698b5577571133e # v1.0.2
13+
- uses: actions/add-to-project@5afcf98fcd03f1c2f92c3c83f58ae24323cc57fd # v2.0.0
1414
with:
1515
project-url: https://github.com/orgs/deepset-ai/projects/5
1616
github-token: ${{ secrets.GH_PROJECT_PAT }}

.github/workflows/CI_pypi_release.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,12 +57,12 @@ jobs:
5757
run: hatch build
5858

5959
- name: Publish on PyPi
60-
uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
60+
uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
6161
with:
6262
packages-dir: ${{ steps.pathfinder.outputs.project_path }}/dist
6363

6464
- name: Generate changelog
65-
uses: orhun/git-cliff-action@c93ef52f3d0ddcdcc9bd5447d98d458a11cd4f72 # v4.7.1
65+
uses: orhun/git-cliff-action@f50e11560dce63f7c33227798f90b924471a88b5 # v4.8.0
6666
env:
6767
OUTPUT: "${{ steps.pathfinder.outputs.project_path }}/CHANGELOG.md"
6868
with:

.github/workflows/CI_workflows_linting.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Github workflows linter
1+
name: Core / Github workflows linter
22

33
on:
44
pull_request:

0 commit comments

Comments
 (0)