|
1 | 1 | #!/usr/bin/env bash |
2 | 2 | # fix-haddock-links.sh |
3 | 3 | # |
4 | | -# Post-processes Haddock HTML generated by `cabal haddock-project` to fix |
5 | | -# cross-package links. Without this, links to external dependencies are |
6 | | -# broken 404s because the hosted site only contains docs for packages in |
7 | | -# this repo. |
| 4 | +# Usage: ./scripts/fix-haddock-links.sh <website-directory> |
8 | 5 | # |
9 | | -# Pipeline: |
10 | | -# 1. Prepare: scan filesystem, create symlinks for versioned directories, |
11 | | -# fetch the CHaP package index, grep HTML for cross-package link |
12 | | -# targets. |
13 | | -# 2. Resolve + rewrite: for each discovered target, probe candidate |
14 | | -# doc-site URLs and rewrite HTML links in place (or mark unmapped |
15 | | -# CHaP packages as unclickable). |
16 | | -# 2b. Re-export rewrite: extract (type, defining package, module) triples |
17 | | -# from each local page's Haddock-emitted "Source" cabal-store link, |
18 | | -# then rewrite the local re-export hrefs (e.g. a reference to |
19 | | -# cardano-ledger-byron's `Address` via `Cardano.Api.Byron`) to point |
20 | | -# at the upstream doc site instead of the local re-export page. |
21 | | -# 3. Validate + annotate: HEAD each rewritten URL; for dead URLs try to |
22 | | -# rescue by probing doc-site subdirectories (api/, protocols/, |
23 | | -# framework/) and parent modules with #t: fragment reconstruction. |
24 | | -# URLs that can't be rescued become annotated plain-text spans so |
25 | | -# there are zero clickable 404s. |
| 6 | +# ─── Why this script exists ────────────────────────────────────────── |
26 | 7 | # |
27 | | -# When dead links are found (unmapped CHaP package or module-level 404), |
28 | | -# the script prints an actionable summary — upstream module name, symbols |
29 | | -# we're linking to, and the referring pages in our own docs — and exits 0 |
30 | | -# by default so the docs site can still deploy. Set |
31 | | -# FIX_HADDOCK_LINKS_STRICT=1 to make the script fail instead. |
32 | | -# In GitHub Actions, each dead link also produces a `::warning::` |
33 | | -# annotation visible in the job summary. |
| 8 | +# `cabal haddock-project` emits cross-package hrefs as relative paths |
| 9 | +# (e.g. href="../cardano-ledger-api-1.2.3-hash/Foo.html") that don't |
| 10 | +# resolve on the published docs site — we only host cardano-api's own |
| 11 | +# output, not its dependencies. Every cross-package reference is a 404 |
| 12 | +# by default. |
34 | 13 | # |
35 | | -# Usage: ./scripts/fix-haddock-links.sh <website-directory> |
| 14 | +# This script replaces each such href with one of: |
| 15 | +# |
| 16 | +# • an absolute URL on the upstream doc site, when the package is |
| 17 | +# on CHaP and we find a valid hosted module page for it; or |
| 18 | +# • a tooltip-annotated, unclickable <span>, when (a) the package |
| 19 | +# is not on CHaP — we deliberately don't link bootlibs like base, |
| 20 | +# bytestring, time, etc. because Haddock's URL shapes don't match |
| 21 | +# Hackage's and readers rarely follow those links anyway; or (b) |
| 22 | +# the package is on CHaP but the probe finds no doc site for it; |
| 23 | +# or (c) the upstream doc site is valid but the specific module |
| 24 | +# page 404s (upstream only publishes umbrella modules, our |
| 25 | +# Haddock asks for the defining sub-module). |
| 26 | +# |
| 27 | +# The published site thus has zero clickable 404s. |
| 28 | +# |
| 29 | +# ─── Pipeline ──────────────────────────────────────────────────────── |
| 30 | +# |
| 31 | +# Phase 1 Scan filesystem, symlink versioned dirs, fetch the CHaP |
| 32 | +# index, grep HTML for cross-package link targets. |
| 33 | +# Phase 2 For each discovered target, probe candidate doc-site URLs |
| 34 | +# and rewrite links (or mark unclickable if unresolvable). |
| 35 | +# Phase 2b Rewrite local re-export pages to point at the defining |
| 36 | +# upstream package, using Haddock's "Source" cabal-store |
| 37 | +# link as ground truth for which package the type lives in. |
| 38 | +# Phase 3 HEAD-validate rewritten URLs; rescue dead ones by probing |
| 39 | +# doc-site subdirs (api/, protocols/, framework/) and parent |
| 40 | +# modules with #t: fragment reconstruction. What can't be |
| 41 | +# rescued becomes an annotated <span>. |
| 42 | +# |
| 43 | +# ─── Doc-site resolution (Phase 2) ─────────────────────────────────── |
| 44 | +# |
| 45 | +# For each CHaP package, try two things in order, first hit wins: |
| 46 | +# |
| 47 | +# 1. Name-suffix heuristic under *.cardano.intersectmbo.org — strip |
| 48 | +# trailing "-token" segments of the package name and HEAD-probe |
| 49 | +# each candidate's doc-index.html. Covers cardano-ledger-*, |
| 50 | +# plutus-*, ouroboros-*, etc. |
| 51 | +# 2. Fixed fallback against IOG_DOC_BASES below — covers packages |
| 52 | +# whose subdomain isn't a suffix of the package name (e.g. |
| 53 | +# cardano-base lives at base.cardano.intersectmbo.org). |
| 54 | +# |
| 55 | +# Misses fall through to "Unmapped CHaP" — see the CI policy below. |
| 56 | +# |
| 57 | +# ─── Why non-CHaP packages are NOT linked ──────────────────────────── |
| 58 | +# |
| 59 | +# Bootlibs (base, bytestring, time, the transformers stack, etc.) are |
| 60 | +# on Hackage. Rewriting their hrefs to Hackage URLs almost always |
| 61 | +# produced 404s — Haddock's per-module URL structure doesn't line up |
| 62 | +# cleanly with Hackage's (src/ source views, -inplace suffixes from |
| 63 | +# local rebuilds), and readers of cardano-api docs rarely click into |
| 64 | +# bootlib internals anyway. We skip them entirely: rendered as |
| 65 | +# unclickable <span>s, no outbound link, no validation, no noise. |
| 66 | +# |
| 67 | +# ─── Dead-link CI policy ───────────────────────────────────────────── |
| 68 | +# |
| 69 | +# Every dead link falls into one of two buckets: |
| 70 | +# |
| 71 | +# Actionable (FAILS CI) |
| 72 | +# A CHaP package the probe couldn't resolve to any doc site. This |
| 73 | +# is usually a gap in IOG_DOC_BASES — add the package's upstream |
| 74 | +# doc base URL, or add the package to KNOWN_UNDOCUMENTED if it |
| 75 | +# genuinely has no published Haddocks anywhere. |
| 76 | +# |
| 77 | +# Unfixable (does NOT fail CI, logged for visibility) |
| 78 | +# Three sub-causes, all outside this repo: |
| 79 | +# a. Module-level 404s on an otherwise-valid upstream doc site. |
| 80 | +# Empirically the cause is upstream publishing only their |
| 81 | +# umbrella "exposed-modules" (e.g. Cardano-Binary.html) while |
| 82 | +# Haddock generates hrefs to the defining sub-module (e.g. |
| 83 | +# Cardano-Binary-FromCBOR.html). We can't make upstream teams |
| 84 | +# publish their internal modules. (It's plausible that our |
| 85 | +# --internal flag in cabal haddock-project contributes; we |
| 86 | +# have not verified. Dropping --internal would cost us our |
| 87 | +# own internal-module pages on the published site, so we |
| 88 | +# leave it on and accept the span noise.) |
| 89 | +# b. Packages in KNOWN_UNDOCUMENTED (no published Haddocks |
| 90 | +# anywhere, only source on GitHub — e.g. kes-agent). |
| 91 | +# c. Haddock-emitted absolute Hackage URLs that lack a package |
| 92 | +# version (Hackage's routing requires one). A handful, out |
| 93 | +# of our control, treated as noise. |
| 94 | +# |
| 95 | +# Escape hatch: FIX_HADDOCK_LINKS_ALLOW_DEAD=1 exits 0 even if there |
| 96 | +# are actionable entries, e.g. to deploy while investigating. Under |
| 97 | +# GitHub Actions, actionable entries emit ::warning:: annotations so |
| 98 | +# they surface in the job UI; unfixable ones are plain log lines. |
36 | 99 |
|
37 | 100 | set -euo pipefail |
38 | 101 |
|
@@ -74,6 +137,18 @@ IOG_DOC_BASES=( |
74 | 137 | "https://ouroboros-consensus.cardano.intersectmbo.org/haddocks" |
75 | 138 | "https://ouroboros-network.cardano.intersectmbo.org" |
76 | 139 | "https://input-output-hk.github.io/io-sim" |
| 140 | + "https://input-output-hk.github.io/typed-protocols" |
| 141 | +) |
| 142 | + |
| 143 | +# CHaP packages we've confirmed have no public Haddocks anywhere (no |
| 144 | +# gh-pages branch, no CloudFront site). Listed here so the script can |
| 145 | +# classify them as "known unfixable" rather than "actionable gap in config" |
| 146 | +# — they still appear as annotated dead-link spans in the output, but do |
| 147 | +# NOT fail CI. Prefer adding a doc site to IOG_DOC_BASES where possible; |
| 148 | +# this list is a last resort. |
| 149 | +KNOWN_UNDOCUMENTED=( |
| 150 | + "kes-agent" |
| 151 | + "kes-agent-crypto" |
77 | 152 | ) |
78 | 153 |
|
79 | 154 | # Some doc sites organise module pages into subdirectories beneath the |
@@ -569,69 +644,105 @@ echo " Dead links rescued: $rescued_count" |
569 | 644 | echo " Dead links annotated: $dead_remaining" |
570 | 645 | echo "=================================" |
571 | 646 |
|
572 | | -total_dead=$(( ${#UNMAPPED_CHAP[@]} + dead_remaining )) |
573 | | -if [[ $total_dead -eq 0 ]]; then |
| 647 | +# Partition Unmapped CHaP into actionable (gap in IOG_DOC_BASES — we can |
| 648 | +# probably fix this) vs known-unfixable (package has no published docs |
| 649 | +# anywhere, listed in KNOWN_UNDOCUMENTED). Module-level 404s are always |
| 650 | +# classified as unfixable: they're driven by upstream sites not publishing |
| 651 | +# internal modules (our --internal flag generates references to them) or |
| 652 | +# by version skew, neither of which is actionable inside this repo. |
| 653 | +ACTIONABLE_UNMAPPED=() |
| 654 | +UNFIXABLE_UNMAPPED=() |
| 655 | +for pkg in "${UNMAPPED_CHAP[@]}"; do |
| 656 | + is_known_undocumented="no" |
| 657 | + for u in "${KNOWN_UNDOCUMENTED[@]}"; do |
| 658 | + [[ "$pkg" == "$u" ]] && { is_known_undocumented="yes"; break; } |
| 659 | + done |
| 660 | + if [[ "$is_known_undocumented" == "yes" ]]; then |
| 661 | + UNFIXABLE_UNMAPPED+=("$pkg") |
| 662 | + else |
| 663 | + ACTIONABLE_UNMAPPED+=("$pkg") |
| 664 | + fi |
| 665 | +done |
| 666 | + |
| 667 | +actionable_count=${#ACTIONABLE_UNMAPPED[@]} |
| 668 | +unfixable_count=$(( ${#UNFIXABLE_UNMAPPED[@]} + dead_remaining )) |
| 669 | + |
| 670 | +if [[ $actionable_count -eq 0 && $unfixable_count -eq 0 ]]; then |
574 | 671 | exit 0 |
575 | 672 | fi |
576 | 673 |
|
577 | | -echo "" |
578 | | -echo "=== fix-haddock-links: dead links detected ===" |
579 | | - |
580 | | -if [[ ${#UNMAPPED_CHAP[@]} -gt 0 ]]; then |
| 674 | +# Actionable section — emit GH Actions ::warning:: annotations for UI visibility. |
| 675 | +if [[ $actionable_count -gt 0 ]]; then |
| 676 | + echo "" |
| 677 | + echo "=== Actionable — fix these (${actionable_count}) ===" |
581 | 678 | echo "" |
582 | | - echo "Unresolvable CHaP packages (${#UNMAPPED_CHAP[@]}):" |
583 | | - for pkg in "${UNMAPPED_CHAP[@]}"; do |
| 679 | + echo "CHaP packages the probe could not resolve to any known doc site:" |
| 680 | + for pkg in "${ACTIONABLE_UNMAPPED[@]}"; do |
584 | 681 | echo " - $pkg" |
585 | 682 | [[ -n "${GITHUB_ACTIONS:-}" ]] && \ |
586 | | - echo "::warning title=Unmapped CHaP package::$pkg has no known doc site. Add its base URL to IOG_DOC_BASES in scripts/fix-haddock-links.sh." |
| 683 | + echo "::warning title=Unmapped CHaP package::${pkg} has no known doc site. Add its base URL to IOG_DOC_BASES in scripts/fix-haddock-links.sh, or add ${pkg} to KNOWN_UNDOCUMENTED if it has no published Haddocks." |
587 | 684 | done |
588 | 685 | echo "" |
589 | | - echo " Fix: find the package's published Haddocks (check its source repo for" |
590 | | - echo " a gh-pages or CloudFront deployment), then append the base URL" |
591 | | - echo " to IOG_DOC_BASES in scripts/fix-haddock-links.sh and re-run." |
| 686 | + echo " To fix each:" |
| 687 | + echo " 1. Check the package's source repo for a gh-pages or CloudFront" |
| 688 | + echo " deployment of its Haddocks." |
| 689 | + echo " 2. If published: append the base URL to IOG_DOC_BASES in" |
| 690 | + echo " scripts/fix-haddock-links.sh and re-run." |
| 691 | + echo " 3. If genuinely unpublished: add the package name to" |
| 692 | + echo " KNOWN_UNDOCUMENTED in scripts/fix-haddock-links.sh so future" |
| 693 | + echo " runs classify it as known-unfixable instead of failing CI." |
592 | 694 | fi |
593 | 695 |
|
594 | | -if [[ $dead_remaining -gt 0 ]]; then |
| 696 | +# Unfixable section — plain log lines, no ::warning:: flood in the UI. |
| 697 | +if [[ $unfixable_count -gt 0 ]]; then |
595 | 698 | echo "" |
596 | | - echo "Module-level 404s (${dead_remaining}):" |
597 | | - while IFS= read -r dead_url; do |
598 | | - [ -z "$dead_url" ] && continue |
599 | | - # Skip rescued URLs — they were rewritten, not annotated. |
600 | | - [[ -v "DEAD_TO_RESCUE[$dead_url]" ]] && continue |
601 | | - echo "" |
602 | | - echo " $dead_url" |
603 | | - echo " Upstream: ${DEAD_MODULE[$dead_url]} (in package ${DEAD_PKG[$dead_url]})" |
604 | | - [[ -n "${DEAD_SYMBOLS[$dead_url]:-}" ]] && \ |
605 | | - echo " Symbols we link to: ${DEAD_SYMBOLS[$dead_url]}" |
606 | | - if [[ -n "${DEAD_REFS[$dead_url]:-}" ]]; then |
607 | | - echo " Linked from our docs:" |
608 | | - while IFS= read -r ref; do |
609 | | - [ -z "$ref" ] && continue |
610 | | - echo " - $ref" |
611 | | - done <<< "${DEAD_REFS[$dead_url]}" |
612 | | - fi |
613 | | - [[ -n "${GITHUB_ACTIONS:-}" ]] && \ |
614 | | - echo "::warning title=Dead haddock link::${DEAD_MODULE[$dead_url]} (in ${DEAD_PKG[$dead_url]}) returned 404 at $dead_url" |
615 | | - done < "$DEAD_URLS_FILE" |
| 699 | + echo "=== Known unfixable — not blocking (${unfixable_count}) ===" |
616 | 700 | echo "" |
617 | | - echo " Fix:" |
618 | | - echo " 1. Visit the upstream doc site and search for the module name." |
619 | | - echo " If it's been renamed, moved, or removed, our re-exports or" |
620 | | - echo " dependency bounds are out of date." |
621 | | - echo " 2. The 'Linked from' HTML paths mirror our Haskell module layout" |
622 | | - echo " (e.g. cardano-api/Cardano-Api-Ledger.html corresponds to the" |
623 | | - echo " source module Cardano.Api.Ledger). Open those .hs files and" |
624 | | - echo " grep for the listed symbols to find the re-export site." |
625 | | - echo " 3. If a dep bound is pinned to a version whose module layout" |
626 | | - echo " doesn't match the published docs, bump it." |
627 | | - echo " 4. Otherwise the links are already annotated as greyed-out spans" |
628 | | - echo " with tooltips — no action needed if that's acceptable." |
| 701 | + echo "These are annotated as greyed-out spans in the published docs and" |
| 702 | + echo "do not fail CI. They stem from causes outside this repo:" |
| 703 | + echo " - Haddock generates cross-package hrefs to internal modules via" |
| 704 | + echo " the --internal flag, but upstream doc sites only publish" |
| 705 | + echo " exposed modules." |
| 706 | + echo " - Some upstream packages have no published Haddocks at all." |
| 707 | + echo " - Haddock occasionally emits absolute Hackage URLs without a" |
| 708 | + echo " package version, which Hackage's routing doesn't accept." |
| 709 | + echo "" |
| 710 | + if [[ ${#UNFIXABLE_UNMAPPED[@]} -gt 0 ]]; then |
| 711 | + echo "CHaP packages with no published docs (${#UNFIXABLE_UNMAPPED[@]}, from KNOWN_UNDOCUMENTED):" |
| 712 | + for pkg in "${UNFIXABLE_UNMAPPED[@]}"; do |
| 713 | + echo " - $pkg" |
| 714 | + done |
| 715 | + echo "" |
| 716 | + fi |
| 717 | + if [[ $dead_remaining -gt 0 ]]; then |
| 718 | + echo "Module-level 404s (${dead_remaining}):" |
| 719 | + while IFS= read -r dead_url; do |
| 720 | + [ -z "$dead_url" ] && continue |
| 721 | + [[ -v "DEAD_TO_RESCUE[$dead_url]" ]] && continue |
| 722 | + echo "" |
| 723 | + echo " $dead_url" |
| 724 | + echo " Upstream: ${DEAD_MODULE[$dead_url]} (in package ${DEAD_PKG[$dead_url]})" |
| 725 | + [[ -n "${DEAD_SYMBOLS[$dead_url]:-}" ]] && \ |
| 726 | + echo " Symbols we link to: ${DEAD_SYMBOLS[$dead_url]}" |
| 727 | + if [[ -n "${DEAD_REFS[$dead_url]:-}" ]]; then |
| 728 | + echo " Linked from our docs:" |
| 729 | + while IFS= read -r ref; do |
| 730 | + [ -z "$ref" ] && continue |
| 731 | + echo " - $ref" |
| 732 | + done <<< "${DEAD_REFS[$dead_url]}" |
| 733 | + fi |
| 734 | + done < "$DEAD_URLS_FILE" |
| 735 | + fi |
629 | 736 | fi |
630 | 737 |
|
631 | 738 | echo "" |
632 | | -if [[ "${FIX_HADDOCK_LINKS_STRICT:-0}" == "1" ]]; then |
633 | | - echo "FIX_HADDOCK_LINKS_STRICT=1 — exiting 1." |
634 | | - exit 1 |
| 739 | +if [[ $actionable_count -eq 0 ]]; then |
| 740 | + echo "No actionable dead links; exiting 0." |
| 741 | + exit 0 |
| 742 | +fi |
| 743 | +if [[ "${FIX_HADDOCK_LINKS_ALLOW_DEAD:-0}" == "1" ]]; then |
| 744 | + echo "FIX_HADDOCK_LINKS_ALLOW_DEAD=1 — accepting actionable dead links, exiting 0." |
| 745 | + exit 0 |
635 | 746 | fi |
636 | | -echo "Exiting 0 (warn-only default). Set FIX_HADDOCK_LINKS_STRICT=1 to fail the build." |
637 | | -exit 0 |
| 747 | +echo "Actionable dead links found — failing the build. Set FIX_HADDOCK_LINKS_ALLOW_DEAD=1 to accept them." |
| 748 | +exit 1 |
0 commit comments