From 3404266a1de3bc79f45d958372df46dc550d0b20 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Mon, 16 Mar 2026 18:22:02 +0100 Subject: [PATCH 1/3] ci: add workflow to check correct API reference generation --- .github/utils/docstrings_checksum.py | 7 +-- .github/workflows/check_api_ref.yml | 93 ++++++++++++++++++++++++++++ pydoc/agents_api.yml | 2 + 3 files changed, 96 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/check_api_ref.yml diff --git a/.github/utils/docstrings_checksum.py b/.github/utils/docstrings_checksum.py index 5b5bef2f08..684854019e 100644 --- a/.github/utils/docstrings_checksum.py +++ b/.github/utils/docstrings_checksum.py @@ -41,11 +41,6 @@ def docstrings_checksum(python_files: Iterator[Path]): # Get all Haystack and rest_api python files root: Path = args.root.absolute() haystack_files = root.glob("haystack/**/*.py") - rest_api_files = root.glob("rest_api/**/*.py") - import itertools - - python_files = itertools.chain(haystack_files, rest_api_files) - - md5 = docstrings_checksum(python_files) + md5 = docstrings_checksum(haystack_files) print(md5) diff --git a/.github/workflows/check_api_ref.yml b/.github/workflows/check_api_ref.yml new file mode 100644 index 0000000000..fd2eb019cb --- /dev/null +++ b/.github/workflows/check_api_ref.yml @@ -0,0 +1,93 @@ +name: Check API reference changes + +on: + pull_request: + paths: + - "haystack/**/*.py" + - "pydoc/*.yml" + +jobs: + test-api-reference-build: + runs-on: ubuntu-slim + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.13" + + - name: Detect API reference changes + id: changed + shell: python + run: | + import os + import subprocess + from pathlib import Path + + import sys + sys.path.insert(0, ".github/utils") + from docstrings_checksum import docstrings_checksum + + def git(*args): + result = subprocess.run(["git", *args], capture_output=True, text=True) + return result.stdout.strip(), result.returncode + + base_sha, _ = git("rev-parse", "HEAD^1") + diff_output, _ = git("diff", "--name-only", f"{base_sha}...HEAD") + changed_files = set(diff_output.splitlines()) + + needs_check = False + + # If any pydoc config changed, always rebuild + if any(f.startswith("pydoc/") and f.endswith(".yml") for f in changed_files): + needs_check = True + + # If Python files changed, compare docstring checksums + if not needs_check and any(f.startswith("haystack/") and f.endswith(".py") for f in changed_files): + runner_temp = os.environ["RUNNER_TEMP"] + base_worktree = os.path.join(runner_temp, "base") + _, rc = git("worktree", "add", base_worktree, base_sha) + + pr_checksum = docstrings_checksum(Path(".").glob("haystack/**/*.py")) + base_checksum = "" + if rc == 0: + base_checksum = docstrings_checksum(Path(base_worktree).glob("haystack/**/*.py")) + + if pr_checksum != base_checksum: + needs_check = True + + print(f"API reference check needed: {needs_check}") + with open(os.environ["GITHUB_OUTPUT"], "a") as f: + f.write(f"needs_check={str(needs_check).lower()}\n") + + - name: Install Hatch + if: steps.changed.outputs.needs_check == 'true' + run: pip install hatch + + - name: Generate API references + if: steps.changed.outputs.needs_check == 'true' + run: hatch run docs + + - name: Set up Node.js + if: steps.changed.outputs.needs_check == 'true' + uses: actions/setup-node@v6 + with: + node-version: "22" + + - name: Run Docusaurus md/mdx checker + if: steps.changed.outputs.needs_check == 'true' + working-directory: tmp_api_reference + run: | + # docusaurus-mdx-checker is a package that is not frequently updated. Its dependency katex sometimes ships a + # broken ESM build, where a __VERSION__ placeholder is left unresolved, causing a ReferenceError at import time. + # Node 22+ prefers ESM when available. We force CJS (CommonJS) resolution to use the working katex build. + # This should be safe because docusaurus-mdx-checker and its dependencies provide CJS builds. + export NODE_OPTIONS="--conditions=require" + npx docusaurus-mdx-checker -v || { + echo "" + echo "For common MDX problems, see https://docusaurus.io/blog/preparing-your-site-for-docusaurus-v3#common-mdx-problems" + exit 1 + } diff --git a/pydoc/agents_api.yml b/pydoc/agents_api.yml index c553c6d6f7..1c51d07e9b 100644 --- a/pydoc/agents_api.yml +++ b/pydoc/agents_api.yml @@ -10,3 +10,5 @@ renderer: id: agents-api description: Tool-using agents with provider-agnostic chat model support. filename: agents_api.md + +# Trigger From 9380d32ebfa36a763e970c06373cc167483fc191 Mon Sep 17 00:00:00 2001 From: anakin87 Date: Mon, 16 Mar 2026 18:25:22 +0100 Subject: [PATCH 2/3] trigger in docstring --- haystack/components/builders/answer_builder.py | 2 +- pydoc/agents_api.yml | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/haystack/components/builders/answer_builder.py b/haystack/components/builders/answer_builder.py index 3a315907ec..9154400a15 100644 --- a/haystack/components/builders/answer_builder.py +++ b/haystack/components/builders/answer_builder.py @@ -15,7 +15,7 @@ @component class AnswerBuilder: """ - Converts a query and Generator replies into a `GeneratedAnswer` object. + Converts a query and Generator replies into a `GeneratedAnswer` object. TRIGGER!! AnswerBuilder parses Generator replies using custom regular expressions. Check out the usage example below to see how it works. diff --git a/pydoc/agents_api.yml b/pydoc/agents_api.yml index 1c51d07e9b..c553c6d6f7 100644 --- a/pydoc/agents_api.yml +++ b/pydoc/agents_api.yml @@ -10,5 +10,3 @@ renderer: id: agents-api description: Tool-using agents with provider-agnostic chat model support. filename: agents_api.md - -# Trigger From f94930a22db3ed49c978d583787b5343c0e1e9ea Mon Sep 17 00:00:00 2001 From: anakin87 Date: Mon, 16 Mar 2026 18:28:17 +0100 Subject: [PATCH 3/3] rm trigger --- haystack/components/builders/answer_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/components/builders/answer_builder.py b/haystack/components/builders/answer_builder.py index 9154400a15..3a315907ec 100644 --- a/haystack/components/builders/answer_builder.py +++ b/haystack/components/builders/answer_builder.py @@ -15,7 +15,7 @@ @component class AnswerBuilder: """ - Converts a query and Generator replies into a `GeneratedAnswer` object. TRIGGER!! + Converts a query and Generator replies into a `GeneratedAnswer` object. AnswerBuilder parses Generator replies using custom regular expressions. Check out the usage example below to see how it works.