diff --git a/.github/utils/docstrings_checksum.py b/.github/utils/docstrings_checksum.py index 5b5bef2f08..684854019e 100644 --- a/.github/utils/docstrings_checksum.py +++ b/.github/utils/docstrings_checksum.py @@ -41,11 +41,6 @@ def docstrings_checksum(python_files: Iterator[Path]): # Get all Haystack and rest_api python files root: Path = args.root.absolute() haystack_files = root.glob("haystack/**/*.py") - rest_api_files = root.glob("rest_api/**/*.py") - import itertools - - python_files = itertools.chain(haystack_files, rest_api_files) - - md5 = docstrings_checksum(python_files) + md5 = docstrings_checksum(haystack_files) print(md5) diff --git a/.github/workflows/check_api_ref.yml b/.github/workflows/check_api_ref.yml new file mode 100644 index 0000000000..fd2eb019cb --- /dev/null +++ b/.github/workflows/check_api_ref.yml @@ -0,0 +1,93 @@ +name: Check API reference changes + +on: + pull_request: + paths: + - "haystack/**/*.py" + - "pydoc/*.yml" + +jobs: + test-api-reference-build: + runs-on: ubuntu-slim + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.13" + + - name: Detect API reference changes + id: changed + shell: python + run: | + import os + import subprocess + from pathlib import Path + + import sys + sys.path.insert(0, ".github/utils") + from docstrings_checksum import docstrings_checksum + + def git(*args): + result = subprocess.run(["git", *args], capture_output=True, text=True) + return result.stdout.strip(), result.returncode + + base_sha, _ = git("rev-parse", "HEAD^1") + diff_output, _ = git("diff", "--name-only", f"{base_sha}...HEAD") + changed_files = set(diff_output.splitlines()) + + needs_check = False + + # If any pydoc config changed, always rebuild + if any(f.startswith("pydoc/") and f.endswith(".yml") for f in changed_files): + needs_check = True + + # If Python files changed, compare docstring checksums + if not needs_check and any(f.startswith("haystack/") and f.endswith(".py") for f in changed_files): + runner_temp = os.environ["RUNNER_TEMP"] + base_worktree = os.path.join(runner_temp, "base") + _, rc = git("worktree", "add", base_worktree, base_sha) + + pr_checksum = docstrings_checksum(Path(".").glob("haystack/**/*.py")) + base_checksum = "" + if rc == 0: + base_checksum = docstrings_checksum(Path(base_worktree).glob("haystack/**/*.py")) + + if pr_checksum != base_checksum: + needs_check = True + + print(f"API reference check needed: {needs_check}") + with open(os.environ["GITHUB_OUTPUT"], "a") as f: + f.write(f"needs_check={str(needs_check).lower()}\n") + + - name: Install Hatch + if: steps.changed.outputs.needs_check == 'true' + run: pip install hatch + + - name: Generate API references + if: steps.changed.outputs.needs_check == 'true' + run: hatch run docs + + - name: Set up Node.js + if: steps.changed.outputs.needs_check == 'true' + uses: actions/setup-node@v6 + with: + node-version: "22" + + - name: Run Docusaurus md/mdx checker + if: steps.changed.outputs.needs_check == 'true' + working-directory: tmp_api_reference + run: | + # docusaurus-mdx-checker is a package that is not frequently updated. Its dependency katex sometimes ships a + # broken ESM build, where a __VERSION__ placeholder is left unresolved, causing a ReferenceError at import time. + # Node 22+ prefers ESM when available. We force CJS (CommonJS) resolution to use the working katex build. + # This should be safe because docusaurus-mdx-checker and its dependencies provide CJS builds. + export NODE_OPTIONS="--conditions=require" + npx docusaurus-mdx-checker -v || { + echo "" + echo "For common MDX problems, see https://docusaurus.io/blog/preparing-your-site-for-docusaurus-v3#common-mdx-problems" + exit 1 + }