feat: merge-train/fairies (#23055) #566

Workflow file for this run

.github/workflows/docs-typesense.yml at f23aa82

	name: Docs Scraper

	on:
	workflow_dispatch:
	schedule:
	# Run the workflow every night at 5:00 AM UTC, after nightly release and docs update
	- cron: "0 5 * * *"
	push:
	branches:
	- next
	paths:
	- docs/**

	jobs:
	docs-scraper:
	runs-on: ubuntu-latest
	env:
	AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
	AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }}
	NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }}
	steps:
	- name: Checkout code
	uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
	with:
	fetch-depth: 0

	- name: Reindex with Typesense docsearch-scraper
	env:
	# Fail the run if the scraper indexes fewer than this many records.
	# The docsearch-scraper container exits 0 even when its config is broken
	# and the index ends up nearly empty, so this guard turns a silent
	# regression (which happened with #22861 dropping the index from
	# ~12k to 48 records) into a loud CI failure.
	MIN_HITS: "5000"
	run: \|
	set -euo pipefail

	# Derive the version-specific docusaurus_tag values from the docs version
	# configs and append them to the api-nr start_url's docusaurus_tag array.
	# Each plugin instance produces a tag of the form `docs-${pluginId}-${versionName}`.
	# The unversioned tags (participate, root, default) are already in the static
	# config; this step adds entries for `developer` and `network` which bump on
	# release. Without this, the api-nr records would lose contextual search
	# visibility every time mainnet/testnet versions change.
	extra_tags=$(jq -nc \
	--slurpfile dev docs/developer_version_config.json \
	--slurpfile net docs/network_version_config.json \
	'[
	("docs-developer-" + ($dev[0].mainnet // "")),
	("docs-developer-" + ($dev[0].testnet // "")),
	("docs-network-" + ($net[0].mainnet // "")),
	("docs-network-" + ($net[0].testnet // ""))
	] \| map(select(. != "docs-developer-" and . != "docs-network-")) \| unique')
	echo "Derived docusaurus_tag values: $extra_tags"

	config_json=$(jq -c --argjson extra "$extra_tags" '
	.start_urls \|= map(
	if .selectors_key == "api-nr"
	then .extra_attributes.docusaurus_tag = ((.extra_attributes.docusaurus_tag // []) + $extra \| unique)
	else .
	end
	)
	' docs/typesense.config.json)

	docker run \
	-e "TYPESENSE_API_KEY=${{ secrets.TYPESENSE_API_KEY }}" \
	-e "TYPESENSE_HOST=${{ secrets.TYPESENSE_HOST }}" \
	-e "TYPESENSE_PORT=443" \
	-e "TYPESENSE_PROTOCOL=https" \
	-e "CONFIG=$config_json" \
	typesense/docsearch-scraper:0.11.0 2>&1 \| tee scraper.log

	nb_hits=$(grep -oE 'Nb hits: *[0-9]+' scraper.log \| tail -1 \| grep -oE '[0-9]+' \|\| true)
	if [ -z "$nb_hits" ]; then
	echo "::error::Could not parse 'Nb hits' from scraper output — assuming index is broken."
	exit 1
	fi
	echo "Indexed $nb_hits records (threshold: $MIN_HITS)"
	if [ "$nb_hits" -lt "$MIN_HITS" ]; then
	echo "::error::Indexed only $nb_hits records (expected at least $MIN_HITS). Search index is likely broken."
	exit 1
	fi

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat: merge-train/fairies (#23055) #566

Workflow file

feat: merge-train/fairies (#23055) #566

Uh oh!

Workflow file for this run