Skip to content

Commit 3ed018f

Browse files
Merge branch 'main' into feat/supabase-bucket-downloader
2 parents 948330f + cfa014f commit 3ed018f

308 files changed

Lines changed: 21711 additions & 2603 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/dependabot.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,5 @@ updates:
44
directory: '/'
55
schedule:
66
interval: 'daily'
7+
cooldown:
8+
default-days: 1

.github/labeler.yml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
# Integrations
2+
integration:alloydb:
3+
- changed-files:
4+
- any-glob-to-any-file: "integrations/alloydb/**/*"
5+
- any-glob-to-any-file: ".github/workflows/alloydb.yml"
6+
27
integration:aimlapi:
38
- changed-files:
49
- any-glob-to-any-file: "integrations/aimlapi/**/*"
@@ -29,6 +34,11 @@ integration:astra:
2934
- any-glob-to-any-file: "integrations/astra/**/*"
3035
- any-glob-to-any-file: ".github/workflows/astra.yml"
3136

37+
integration:brave:
38+
- changed-files:
39+
- any-glob-to-any-file: "integrations/brave/**/*"
40+
- any-glob-to-any-file: ".github/workflows/brave.yml"
41+
3242
integration:azure-ai-search:
3343
- changed-files:
3444
- any-glob-to-any-file: "integrations/azure_ai_search/**/*"
@@ -39,6 +49,11 @@ integration:azure-doc-intelligence:
3949
- any-glob-to-any-file: "integrations/azure_doc_intelligence/**/*"
4050
- any-glob-to-any-file: ".github/workflows/azure_doc_intelligence.yml"
4151

52+
integration:chonkie:
53+
- changed-files:
54+
- any-glob-to-any-file: "integrations/chonkie/**/*"
55+
- any-glob-to-any-file: ".github/workflows/chonkie.yml"
56+
4257
integration:chroma:
4358
- changed-files:
4459
- any-glob-to-any-file: "integrations/chroma/**/*"
@@ -64,11 +79,21 @@ integration:docling:
6479
- any-glob-to-any-file: "integrations/docling/**/*"
6580
- any-glob-to-any-file: ".github/workflows/docling.yml"
6681

82+
integration:docling-serve:
83+
- changed-files:
84+
- any-glob-to-any-file: "integrations/docling_serve/**/*"
85+
- any-glob-to-any-file: ".github/workflows/docling_serve.yml"
86+
6787
integration:dspy:
6888
- changed-files:
6989
- any-glob-to-any-file: "integrations/dspy/**/*"
7090
- any-glob-to-any-file: ".github/workflows/dspy.yml"
7191

92+
integration:e2b:
93+
- changed-files:
94+
- any-glob-to-any-file: "integrations/e2b/**/*"
95+
- any-glob-to-any-file: ".github/workflows/e2b.yml"
96+
7297
integration:elasticsearch:
7398
- changed-files:
7499
- any-glob-to-any-file: "integrations/elasticsearch/**/*"
@@ -79,6 +104,12 @@ integration:faiss:
79104
- any-glob-to-any-file: "integrations/faiss/**/*"
80105
- any-glob-to-any-file: ".github/workflows/faiss.yml"
81106

107+
108+
integration:falkordb:
109+
- changed-files:
110+
- any-glob-to-any-file: "integrations/falkordb/**/*"
111+
- any-glob-to-any-file: ".github/workflows/falkordb.yml"
112+
82113
integration:fastembed:
83114
- changed-files:
84115
- any-glob-to-any-file: "integrations/fastembed/**/*"
@@ -202,6 +233,11 @@ integration:paddleocr:
202233
- any-glob-to-any-file: "integrations/paddleocr/**/*"
203234
- any-glob-to-any-file: ".github/workflows/paddleocr.yml"
204235

236+
integration:perplexity:
237+
- changed-files:
238+
- any-glob-to-any-file: "integrations/perplexity/**/*"
239+
- any-glob-to-any-file: ".github/workflows/perplexity.yml"
240+
205241
integration:pgvector:
206242
- changed-files:
207243
- any-glob-to-any-file: "integrations/pgvector/**/*"

.github/utils/validate_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
# * integrations/<INTEGRATION_FOLDER_NAME>-v1.0.0
55
# * integrations/<INTEGRATION_FOLDER_NAME>-v1.0.0.post0 (for post-releases)
6-
INTEGRATION_VERSION_REGEX = r"integrations/([a-zA-Z_]+)-v([0-9]+\.[0-9]+\.[0-9]+(?:\.post[0-9]+)?)"
6+
INTEGRATION_VERSION_REGEX = r"integrations/([a-zA-Z0-9_]+)-v([0-9]+\.[0-9]+\.[0-9]+(?:\.post[0-9]+)?)"
77

88

99
def validate_version_number(tag: str):

.github/workflows/CI_check_api_ref.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,9 @@ jobs:
8181
8282
- name: Install Hatch
8383
if: steps.changed.outputs.integrations != '[]'
84-
run: pip install hatch
84+
run: |
85+
python -m pip install --upgrade pip
86+
pip install hatch --uploaded-prior-to=P1D
8587
8688
- name: Generate API references
8789
if: steps.changed.outputs.integrations != '[]'
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
name: Core / Notify maintainers on fork PRs not running integration tests
2+
3+
on:
4+
pull_request_target:
5+
types: [opened, reopened, synchronize]
6+
7+
permissions:
8+
contents: read
9+
pull-requests: write
10+
11+
env:
12+
NON_TEST_SECRETS: "SLACK_WEBHOOK_URL_NOTIFICATIONS"
13+
14+
jobs:
15+
notify:
16+
if: github.event.pull_request.head.repo.fork == true
17+
runs-on: ubuntu-slim
18+
steps:
19+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
20+
with:
21+
ref: ${{ github.event.pull_request.base.sha }}
22+
23+
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
24+
with:
25+
python-version: "3.13"
26+
27+
- name: Detect integrations requiring API keys for integration tests
28+
id: affected
29+
shell: python
30+
env:
31+
GH_TOKEN: ${{ github.token }}
32+
PR_NUMBER: ${{ github.event.pull_request.number }}
33+
run: |
34+
import os
35+
import re
36+
import subprocess
37+
from pathlib import Path
38+
39+
WORKFLOWS_DIR = Path(".github/workflows")
40+
NON_TEST_SECRETS = set(os.environ["NON_TEST_SECRETS"].split())
41+
SECRET_REF = re.compile(r"secrets\.([A-Z0-9_]+)")
42+
43+
44+
def needs_api_key(integration: str) -> bool:
45+
wf = WORKFLOWS_DIR / f"{integration}.yml"
46+
if not wf.exists():
47+
return False
48+
referenced = set(SECRET_REF.findall(wf.read_text()))
49+
return bool(referenced - NON_TEST_SECRETS)
50+
51+
52+
# 1. PR file list
53+
paths = subprocess.check_output(
54+
[
55+
"gh", "pr", "view", os.environ["PR_NUMBER"],
56+
"--json", "files", "-q", ".files[].path",
57+
],
58+
text=True,
59+
).splitlines()
60+
61+
# 2. Integrations touched by this PR
62+
touched = set()
63+
for p in paths:
64+
parts = Path(p).parts
65+
if len(parts) >= 2 and parts[0] == "integrations":
66+
touched.add(parts[1])
67+
elif (
68+
len(parts) == 3
69+
and parts[0] == ".github"
70+
and parts[1] == "workflows"
71+
and parts[2].endswith(".yml")
72+
and not parts[2].startswith("CI_")
73+
):
74+
touched.add(parts[2].removesuffix(".yml"))
75+
76+
# 3. Of those, which need API keys
77+
affected = sorted(t for t in touched if needs_api_key(t))
78+
print(f"touched = {sorted(touched)}")
79+
print(f"affected = {affected}")
80+
81+
list_value = "\n".join(f"- {name}" for name in affected)
82+
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
83+
f.write(f"list<<EOF\n{list_value}\nEOF\n")
84+
85+
- name: Post or update sticky comment
86+
if: steps.affected.outputs.list != ''
87+
uses: marocchino/sticky-pull-request-comment@0ea0beb66eb9baf113663a64ec522f60e49231c0 # v3.0.4
88+
with:
89+
header: fork-pr-api-keys
90+
number: ${{ github.event.pull_request.number }}
91+
message: |
92+
**Heads-up for maintainers**
93+
94+
This PR is from a fork and touches integrations whose integration tests require API keys.
95+
Those tests are **skipped** in CI because fork PRs don't have access to repo secrets for security reasons.
96+
97+
Affected integrations:
98+
${{ steps.affected.outputs.list }}
99+
100+
Please run the integration tests locally (`hatch run test:integration` inside each folder) before approving.
101+
102+
- name: Remove stale comment
103+
if: steps.affected.outputs.list == ''
104+
uses: marocchino/sticky-pull-request-comment@0ea0beb66eb9baf113663a64ec522f60e49231c0 # v3.0.4
105+
with:
106+
header: fork-pr-api-keys
107+
number: ${{ github.event.pull_request.number }}
108+
delete: true

.github/workflows/CI_coverage_comment.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,20 @@ on:
99
- "Test / anthropic"
1010
- "Test / arcadedb"
1111
- "Test / astra"
12+
- "Test / brave"
1213
- "Test / azure_ai_search"
1314
- "Test / azure_doc_intelligence"
15+
- "Test / chonkie"
1416
- "Test / chroma"
1517
- "Test / cohere"
1618
- "Test / cometapi"
1719
- "Test / deepeval"
20+
- "Test / docling_serve"
1821
- "Test / dspy"
22+
- "Test / e2b"
1923
- "Test / elasticsearch"
2024
- "Test / faiss"
25+
- "Test / falkordb"
2126
- "Test / fastembed"
2227
- "Test / firecrawl"
2328
- "Test / github"
@@ -41,6 +46,7 @@ on:
4146
- "Test / opensearch"
4247
- "Test / optimum"
4348
- "Test / paddleocr"
49+
- "Test / perplexity"
4450
- "Test / pgvector"
4551
- "Test / pinecone"
4652
- "Test / presidio"

.github/workflows/CI_docusaurus_sync.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ jobs:
3232
python-version: "3.10"
3333

3434
- name: Install Hatch
35-
run: pip install hatch
35+
run: |
36+
python -m pip install --upgrade pip
37+
pip install hatch --uploaded-prior-to=P1D
3638
3739
- name: Get project folder
3840
id: pathfinder

.github/workflows/CI_labeler.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ jobs:
1010
triage:
1111
runs-on: ubuntu-slim
1212
steps:
13-
- uses: actions/labeler@634933edcd8ababfe52f92936142cc22ac488b1b # v6.0.1
13+
- uses: actions/labeler@f27b608878404679385c85cfa523b85ccb86e213 # v6.1.0
1414
with:
1515
repo-token: "${{ secrets.GITHUB_TOKEN }}"

.github/workflows/CI_license_compliance.yml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,22 @@ name: Core / License Compliance
33
on:
44
pull_request:
55
paths:
6-
- "integrations/**/pyproject.toml"
7-
- ".github/workflows/CI_license_compliance.yml"
6+
- "integrations/**/pyproject.toml"
7+
- ".github/workflows/CI_license_compliance.yml"
88
# Since we test PRs, there is no need to run the workflow at each
99
# merge on `main`. Let's use a cron job instead.
1010
schedule:
1111
- cron: "0 0 * * *" # every day at midnight
1212

1313
env:
1414
PYTHON_VERSION: "3.10"
15-
EXCLUDE_PACKAGES: "(?i)^(azure-identity|fastembed|ragas|tqdm|psycopg|mistralai|pgvector).*"
15+
EXCLUDE_PACKAGES: "(?i)^(azure-identity|azure-search-documents|fastembed|tqdm|psycopg|mistralai|pgvector).*"
1616

1717
# Exclusions must be explicitly motivated
1818
#
1919
# - azure-identity is MIT but the license is not available on PyPI
20+
# - azure-search-documents is MIT but the license is not available on PyPI
2021
# - fastembed is Apache 2.0 but the license on PyPI is unclear ("Other/Proprietary License (Apache License)")
21-
# - ragas is Apache 2.0 but the license is not available on PyPI
2222
# - mistralai is Apache 2.0 but the license is not available on PyPI
2323
# - pgvector is MIT but the license is not available on PyPI
2424

@@ -53,7 +53,8 @@ jobs:
5353
5454
- name: Get direct dependencies from pyproject.toml files
5555
run: |
56-
pip install toml
56+
python -m pip install --upgrade pip
57+
pip install toml --uploaded-prior-to=P1D
5758
5859
# Determine the list of pyproject.toml files to process
5960
if [ "${{ github.event_name }}" = "schedule" ] || [ "${{ steps.changed-files.outputs.workflow_any_changed }}" = "true" ]; then

.github/workflows/CI_project.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ jobs:
1010
name: Add new issues to project for triage
1111
runs-on: ubuntu-slim
1212
steps:
13-
- uses: actions/add-to-project@244f685bbc3b7adfa8466e08b698b5577571133e # v1.0.2
13+
- uses: actions/add-to-project@5afcf98fcd03f1c2f92c3c83f58ae24323cc57fd # v2.0.0
1414
with:
1515
project-url: https://github.com/orgs/deepset-ai/projects/5
1616
github-token: ${{ secrets.GH_PROJECT_PAT }}

0 commit comments

Comments
 (0)