-
Notifications
You must be signed in to change notification settings - Fork 596
76 lines (70 loc) · 3.13 KB
/
docs-typesense.yml
File metadata and controls
76 lines (70 loc) · 3.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
name: Docs Scraper
on:
workflow_dispatch:
schedule:
# Run the workflow every night at 5:00 AM UTC, after nightly release and docs update
- cron: "0 5 * * *"
push:
branches:
- next
paths:
- docs/**
jobs:
docs-scraper:
runs-on: ubuntu-latest
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
NETLIFY_SITE_ID: ${{ secrets.NETLIFY_SITE_ID }}
NETLIFY_AUTH_TOKEN: ${{ secrets.NETLIFY_AUTH_TOKEN }}
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
- name: Reindex with Typesense docsearch-scraper
env:
# Fail the run if the scraper indexes fewer than this many records.
# The docsearch-scraper container exits 0 even when its config is broken
# and the index ends up nearly empty, so this guard turns a silent
# regression (which happened with #22861 dropping the index from
# ~12k to 48 records) into a loud CI failure.
MIN_HITS: "5000"
TYPESENSE_API_KEY: ${{ secrets.TYPESENSE_API_KEY }}
TYPESENSE_HOST: ${{ secrets.TYPESENSE_HOST }}
run: |
set -euo pipefail
docker run \
-e "TYPESENSE_API_KEY=$TYPESENSE_API_KEY" \
-e "TYPESENSE_HOST=$TYPESENSE_HOST" \
-e "TYPESENSE_PORT=443" \
-e "TYPESENSE_PROTOCOL=https" \
-e "CONFIG=$(cat docs/typesense.config.json)" \
typesense/docsearch-scraper:0.11.0 2>&1 | tee scraper.log
nb_hits=$(grep -oE 'Nb hits: *[0-9]+' scraper.log | tail -1 | grep -oE '[0-9]+' || true)
if [ -z "$nb_hits" ]; then
echo "::error::Could not parse 'Nb hits' from scraper output, assuming index is broken."
exit 1
fi
echo "Indexed $nb_hits records (threshold: $MIN_HITS)"
if [ "$nb_hits" -lt "$MIN_HITS" ]; then
echo "::error::Indexed only $nb_hits records (expected at least $MIN_HITS). Search index is likely broken."
exit 1
fi
# Log how many api-nr records are visible in the live index. The
# docusaurus theme always prepends `default` to its contextual
# docusaurus_tag filter, and no docusaurus page is stamped with
# `default` (each carries its plugin-context tag instead), so this
# facet count is effectively the count of indexed api-nr records.
# Informational only: the count varies with aztec-nr content size.
api_hits=$(curl -fsS \
"https://$TYPESENSE_HOST/collections/aztec-docs/documents/search" \
-H "X-TYPESENSE-API-KEY: $TYPESENSE_API_KEY" \
-G \
--data-urlencode "q=*" \
--data-urlencode "query_by=hierarchy.lvl0" \
--data-urlencode "filter_by=docusaurus_tag:=[default]&&language:=en" \
--data-urlencode "per_page=1" \
| jq -r '.found')
echo "api-nr records visible under docusaurus_tag:=[default]: $api_hits"