Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions .github/utils/docstrings_checksum.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,6 @@ def docstrings_checksum(python_files: Iterator[Path]):
# Get all Haystack and rest_api python files
root: Path = args.root.absolute()
haystack_files = root.glob("haystack/**/*.py")
rest_api_files = root.glob("rest_api/**/*.py")

import itertools

python_files = itertools.chain(haystack_files, rest_api_files)

md5 = docstrings_checksum(python_files)
md5 = docstrings_checksum(haystack_files)
print(md5)
93 changes: 93 additions & 0 deletions .github/workflows/check_api_ref.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
name: Check API reference changes

on:
pull_request:
paths:
- "haystack/**/*.py"
- "pydoc/*.yml"

jobs:
test-api-reference-build:
runs-on: ubuntu-slim
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.13"

- name: Detect API reference changes
id: changed
shell: python
run: |
import os
import subprocess
from pathlib import Path

import sys
sys.path.insert(0, ".github/utils")
from docstrings_checksum import docstrings_checksum

def git(*args):
result = subprocess.run(["git", *args], capture_output=True, text=True)
return result.stdout.strip(), result.returncode

base_sha, _ = git("rev-parse", "HEAD^1")
diff_output, _ = git("diff", "--name-only", f"{base_sha}...HEAD")
changed_files = set(diff_output.splitlines())

needs_check = False

# If any pydoc config changed, always rebuild
if any(f.startswith("pydoc/") and f.endswith(".yml") for f in changed_files):
needs_check = True

# If Python files changed, compare docstring checksums
if not needs_check and any(f.startswith("haystack/") and f.endswith(".py") for f in changed_files):
runner_temp = os.environ["RUNNER_TEMP"]
base_worktree = os.path.join(runner_temp, "base")
_, rc = git("worktree", "add", base_worktree, base_sha)

pr_checksum = docstrings_checksum(Path(".").glob("haystack/**/*.py"))
base_checksum = ""
if rc == 0:
base_checksum = docstrings_checksum(Path(base_worktree).glob("haystack/**/*.py"))

if pr_checksum != base_checksum:
needs_check = True

print(f"API reference check needed: {needs_check}")
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
f.write(f"needs_check={str(needs_check).lower()}\n")

- name: Install Hatch
if: steps.changed.outputs.needs_check == 'true'
run: pip install hatch

- name: Generate API references
if: steps.changed.outputs.needs_check == 'true'
run: hatch run docs

- name: Set up Node.js
if: steps.changed.outputs.needs_check == 'true'
uses: actions/setup-node@v6
with:
node-version: "22"

- name: Run Docusaurus md/mdx checker
if: steps.changed.outputs.needs_check == 'true'
working-directory: tmp_api_reference
run: |
# docusaurus-mdx-checker is a package that is not frequently updated. Its dependency katex sometimes ships a
# broken ESM build, where a __VERSION__ placeholder is left unresolved, causing a ReferenceError at import time.
# Node 22+ prefers ESM when available. We force CJS (CommonJS) resolution to use the working katex build.
# This should be safe because docusaurus-mdx-checker and its dependencies provide CJS builds.
export NODE_OPTIONS="--conditions=require"
npx docusaurus-mdx-checker -v || {
echo ""
echo "For common MDX problems, see https://docusaurus.io/blog/preparing-your-site-for-docusaurus-v3#common-mdx-problems"
exit 1
}
Loading