From 69bc7c70b14ebb731618f1cd5793e56e01e799e3 Mon Sep 17 00:00:00 2001 From: sarahxsanders Date: Wed, 15 Apr 2026 18:29:04 -0400 Subject: [PATCH 1/4] draft: docs-as-skillz --- .github/workflows/sync-docs-skills.yml | 184 +++++++++++++ README.md | 54 ++++ package.json | 1 + scripts/build-docs-skills.js | 344 +++++++++++++++++++++++++ 4 files changed, 583 insertions(+) create mode 100644 .github/workflows/sync-docs-skills.yml create mode 100644 scripts/build-docs-skills.js diff --git a/.github/workflows/sync-docs-skills.yml b/.github/workflows/sync-docs-skills.yml new file mode 100644 index 0000000..520da87 --- /dev/null +++ b/.github/workflows/sync-docs-skills.yml @@ -0,0 +1,184 @@ +name: Sync PostHog Docs Skills + +# Triggered by posthog.com after a successful deploy, nightly as a fallback, +# or manually. Fetches posthog.com/llms.txt, regenerates the posthog-docs +# skill directories and docs-skill-menu.json, and cuts a new release if anything changed + +on: + repository_dispatch: + types: [posthog-docs-deployed] + schedule: + - cron: '0 2 * * *' + workflow_dispatch: + +# One sync at a time — if posthog.com deploys rapidly, cancel the queued run +# and let the latest trigger win. +concurrency: + group: sync-docs-skills + cancel-in-progress: true + +jobs: + sync: + runs-on: ubuntu-latest + permissions: + contents: write + + steps: + - name: Checkout repository + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + fetch-depth: 0 # Full history needed for version tag lookup + + - name: Setup Node.js + uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4 + with: + node-version: 'lts/*' + + - name: Setup pnpm + uses: pnpm/action-setup@b906affcce14559ad1aafd4ab0e942779e9f58b1 # v4 + with: + version: 9 + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Determine next version + id: version + run: | + LATEST_TAG=$(git tag -l | grep -v '^latest$' | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+$' | sort -V | tail -n 1) + if [ -z "$LATEST_TAG" ]; then + LATEST_TAG="v0.0.0" + fi + echo "Latest semver tag: ${LATEST_TAG}" + LATEST_VERSION=${LATEST_TAG#v} + IFS='.' read -r -a VERSION_PARTS <<< "$LATEST_VERSION" + MAJOR=${VERSION_PARTS[0]:-0} + MINOR=${VERSION_PARTS[1]:-0} + PATCH=${VERSION_PARTS[2]:-0} + PATCH=$((PATCH + 1)) + VERSION="${MAJOR}.${MINOR}.${PATCH}" + echo "version=${VERSION}" >> $GITHUB_OUTPUT + echo "tag=v${VERSION}" >> $GITHUB_OUTPUT + echo "Next version: ${VERSION}" + + - name: Build docs skills + run: pnpm run build:docs-skills + + # Compare only name fields in posthog docs entries (not install commands, + # which are stable but we want to catch additions/removals/renames). + - name: Check for changes + id: diff + env: + PREV_MENU_URL: ${{ github.server_url }}/${{ github.repository }}/releases/latest/download/docs-skill-menu.json + run: | + if curl -sf -o /tmp/prev-skill-menu.json "$PREV_MENU_URL" 2>/dev/null; then + jq '[.categories["posthog-docs"] // [] | .[] | .name] | sort' \ + dist/skills/docs-skill-menu.json > /tmp/new-names.json + jq '[.categories["posthog-docs"] // [] | .[] | .name] | sort' \ + /tmp/prev-skill-menu.json > /tmp/prev-names.json + if diff -q /tmp/prev-names.json /tmp/new-names.json > /dev/null 2>&1; then + echo "posthog-docs skills unchanged — skipping release" + echo "changed=false" >> $GITHUB_OUTPUT + else + echo "posthog-docs skills changed" + diff /tmp/prev-names.json /tmp/new-names.json || true + echo "changed=true" >> $GITHUB_OUTPUT + fi + else + echo "No previous release found — treating as changed" + echo "changed=true" >> $GITHUB_OUTPUT + fi + + # dist/ is gitignored; force-add skill-menu.json so there's a committed + # record of each sync. Tags are pushed AFTER the release succeeds so that + # "latest" never points at a commit whose ZIPs don't exist yet. + - name: Commit updated docs-skill-menu.json + if: steps.diff.outputs.changed == 'true' + env: + TAG: ${{ steps.version.outputs.tag }} + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add -f dist/skills/docs-skill-menu.json + git commit -m "chore: sync posthog-docs skills (${TAG})" + git push + + - name: Zip skill directories and create release + if: steps.diff.outputs.changed == 'true' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + TAG: ${{ steps.version.outputs.tag }} + VERSION: ${{ steps.version.outputs.version }} + EVENT_NAME: ${{ github.event_name }} + run: | + SKILL_COUNT=$(jq '.categories["posthog-docs"] | length' dist/skills/docs-skill-menu.json) + + # Zip each posthog-* skill directory + cd dist/skills + for skill_dir in posthog-*/; do + skill_name="${skill_dir%/}" + zip -r "${skill_name}.zip" "${skill_dir}" + echo " zipped ${skill_name}.zip" + done + cd ../.. + + # gh release create also creates the version tag on GitHub, pointing at + # the commit we just pushed above. If this step fails: the commit is + # already pushed (benign — it only updates docs-skill-menu.json) but + # no tag or release exists. The "Update latest tag" step is skipped + # automatically on failure, so "latest" stays valid. The nightly cron + # will reattempt; since no version tag was pushed, it will compute the + # same version number and try again cleanly. + gh release create "${TAG}" \ + --title "Release ${TAG}" \ + --notes "Automated sync of PostHog docs skills. + + **Trigger:** ${EVENT_NAME} + **Version:** ${VERSION} + **posthog-docs skills:** ${SKILL_COUNT} + **SHA:** $(git rev-parse HEAD)" \ + dist/skills/docs-skill-menu.json \ + dist/skills/posthog-*.zip + + echo "Release ${TAG} created successfully (${SKILL_COUNT} skills)" + + # Only move the floating "latest" tag once the release + ZIPs are confirmed live. + - name: Update latest tag + if: steps.diff.outputs.changed == 'true' + run: | + git tag -f latest + git push -f origin latest + + - name: Notify on failure + if: failure() + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh issue create \ + --repo "${{ github.repository }}" \ + --title "sync-docs-skills failed ($(date +%Y-%m-%d))" \ + --body "Workflow run failed: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" \ + || echo "Issue creation failed — check the Actions run directly." + + - name: Summary + if: always() + env: + CHANGED: ${{ steps.diff.outputs.changed }} + TAG: ${{ steps.version.outputs.tag }} + VERSION: ${{ steps.version.outputs.version }} + EVENT_NAME: ${{ github.event_name }} + run: | + if [ "${CHANGED}" == "true" ]; then + SKILL_COUNT=$(jq '.categories["posthog-docs"] | length' dist/skills/docs-skill-menu.json 2>/dev/null || echo "unknown") + echo "## Sync complete — ${TAG}" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "| Field | Value |" >> $GITHUB_STEP_SUMMARY + echo "| --- | --- |" >> $GITHUB_STEP_SUMMARY + echo "| Version | ${VERSION} |" >> $GITHUB_STEP_SUMMARY + echo "| Trigger | ${EVENT_NAME} |" >> $GITHUB_STEP_SUMMARY + echo "| posthog-docs skills | ${SKILL_COUNT} |" >> $GITHUB_STEP_SUMMARY + else + echo "## No changes — skipped" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "posthog-docs skills unchanged; no release cut." >> $GITHUB_STEP_SUMMARY + fi diff --git a/README.md b/README.md index 26726ce..509b337 100644 --- a/README.md +++ b/README.md @@ -87,3 +87,57 @@ The build script automatically discovers, orders, and generates URIs for all res - **Version controlled**: Resources evolve with the examples See `llm-prompts/README.md` for detailed workflow conventions. + +## Docs skills + +We also auto-generate one [Agent Skill](https://agentskills.io/specification) per section of the PostHog docs. These rebuild whenever posthog.com deploys — no manual work needed. + +### How it works + +The build script (`scripts/build-docs-skills.js`) fetches `posthog.com/llms.txt`, groups pages by section heading, then pulls down the raw markdown for every page. Each section becomes its own skill directory under `dist/skills/posthog-{section}/`, with a `SKILL.md` and a `references/` folder of subpages. + +When skill names change (new sections added, old ones removed), a GitHub Actions workflow cuts a versioned release with a ZIP per skill. A nightly cron runs as a fallback, and you can always trigger it manually. + +### What it generates + +Run `pnpm run build:docs-skills` to generate locally: + +| Output | Description | +|--------|-------------| +| `dist/skills/posthog-{section}/SKILL.md` | Skill prompt + root page content | +| `dist/skills/posthog-{section}/references/*.md` | One file per subpage | +| `dist/skills/docs-skill-menu.json` | Menu index of all generated skills | + +`dist/` is gitignored — only `docs-skill-menu.json` gets force-committed by the workflow as a record of each sync. The ZIPs live exclusively in GitHub Releases. + +### Distribution + +Skills are published to GitHub Releases. The menu is always at: + +```text +https://github.com/PostHog/context-mill/releases/latest/download/docs-skill-menu.json +``` + +Individual skill ZIPs follow the same pattern: + +```text +https://github.com/PostHog/context-mill/releases/latest/download/posthog-{section}.zip +``` + +### Try it locally + +```bash +# Build all sections (excludes libraries, api, endpoints by default) +pnpm run build:docs-skills + +# Or just the ones you care about +node scripts/build-docs-skills.js feature-flags product-analytics + +# Test in Claude Code — copy a skill into .claude/skills/ +cp -r dist/skills/posthog-feature-flags .claude/skills/ +# Claude Code picks it up immediately, no restart needed +``` + +### Why this is separate from the curated pipeline + +The docs skills pipeline and the curated build (`scripts/build.js`) are intentionally independent. They write to different files (`docs-skill-menu.json` vs `skill-menu.json`), cut separate releases, and run on different cadences. Curated skills change with deliberate PRs. Docs skills sync automatically with posthog.com. diff --git a/package.json b/package.json index 2551d3d..13cc844 100644 --- a/package.json +++ b/package.json @@ -10,6 +10,7 @@ "test:plugins:watch": "vitest scripts/plugins/tests", "test:skills": "vitest run scripts/lib/tests", "test:skills:watch": "vitest scripts/lib/tests", + "build:docs-skills": "node scripts/build-docs-skills.js", "test": "vitest run scripts/plugins/tests scripts/lib/tests" }, "devDependencies": { diff --git a/scripts/build-docs-skills.js b/scripts/build-docs-skills.js new file mode 100644 index 0000000..ed3d798 --- /dev/null +++ b/scripts/build-docs-skills.js @@ -0,0 +1,344 @@ +#!/usr/bin/env node + +/** + * Build PostHog Docs Skills + * + * Fetches https://posthog.com/llms.txt, groups pages by section (## heading), + * fetches all page content in parallel, and generates one skill directory per + * section under dist/skills/posthog-{section}/. + * + * Usage: + * node scripts/build-docs-skills.js + * node scripts/build-docs-skills.js feature-flags product-analytics + * + * Optional positional args: space-separated section slugs to build. + * Defaults to all sections found in llms.txt. + */ + +const fs = require('fs'); +const path = require('path'); +const matter = require('gray-matter'); + +const LLMS_TXT_URL = 'https://posthog.com/llms.txt'; +const CATEGORY = 'posthog-docs'; +const CONCURRENCY = 10; +const SKILLS_DIR = path.join(__dirname, '..', 'dist', 'skills'); + +// Sections excluded by default — SDK and API reference material that is too +// large and low signal-to-noise for skill context. Pass explicit slug args to +// override and build one of these directly. +const DEFAULT_EXCLUDE = new Set(['libraries', 'api', 'endpoints', 'open-api-spec']); + +// --------------------------------------------------------------------------- +// HTTP helpers +// --------------------------------------------------------------------------- + +/** + * Fetch a URL as text, retrying on failure. + * retries = 1 means a single attempt (no retries). + */ +async function fetchText(url, retries = 1, delayMs = 500) { + let lastError; + for (let attempt = 1; attempt <= retries; attempt++) { + try { + const res = await fetch(url); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + return await res.text(); + } catch (e) { + lastError = e; + if (attempt < retries) { + await new Promise(r => setTimeout(r, delayMs * attempt)); + } + } + } + throw lastError; +} + +// --------------------------------------------------------------------------- +// Parsing +// --------------------------------------------------------------------------- + +/** + * Parse llms.txt into sections. + * + * Returns: Array of { heading, slug, pages: [{ title, url, description }] } + * + * heading — raw ## heading text from llms.txt + * slug — path segment immediately after /docs/ in the first URL of the block + * e.g. https://posthog.com/docs/feature-flags/... → 'feature-flags' + * pages — all link entries under that heading + */ +function parseLlmsTxt(text) { + const rawSections = []; + let current = null; + + for (const line of text.split('\n')) { + // Section heading: ## Feature flags + const headingMatch = line.match(/^##\s+(.+)$/); + if (headingMatch) { + if (current) rawSections.push(current); + current = { heading: headingMatch[1].trim(), pages: [] }; + continue; + } + + if (!current) continue; + + // Page line: - [Title](url): optional description + const pageMatch = line.match(/^\s*[-*]\s+\[([^\]]+)\]\(([^)]+)\)(?::\s*(.*))?$/); + if (pageMatch) { + current.pages.push({ + title: pageMatch[1].trim(), + url: pageMatch[2].trim(), + description: pageMatch[3]?.trim() ?? '', + }); + } + } + if (current) rawSections.push(current); + + // Derive slug from first URL; drop sections with no usable URLs + return rawSections.flatMap(section => { + if (section.pages.length === 0) return []; + try { + const firstPath = new URL(section.pages[0].url).pathname; + // e.g. /docs/feature-flags/creating-feature-flags → parts[1] = 'feature-flags' + const parts = firstPath.split('/').filter(Boolean); + if (parts.length < 2 || parts[0] !== 'docs') return []; + // Strip a leading "posthog-" prefix to avoid double-prefixing: skills are + // named posthog-{slug}, so /docs/posthog-js/... → slug "js" → "posthog-js", + // not "posthog-posthog-js". + const slug = parts[1].replace(/\.md$/, '').replace(/^posthog-/, ''); + return [{ heading: section.heading, slug, pages: section.pages }]; + } catch { + return []; + } + }); +} + +// --------------------------------------------------------------------------- +// Content helpers +// --------------------------------------------------------------------------- + +/** + * Given raw fetched markdown (which may include its own frontmatter), return + * the body text with leading frontmatter stripped and UI-only footer sections + * removed. + */ +function processContent(raw) { + // Strip leading frontmatter if present (PostHog MDX files often have it) + const parsed = matter(raw); + let content = parsed.content.trimStart(); + + // Strip "## / ### Community questions" and everything after (UI artifact) + content = content.replace(/\n#{2,}\s+Community questions[\s\S]*$/i, ''); + // Strip "## / ### Was this page useful?" and everything after (UI artifact) + content = content.replace(/\n#{2,}\s+Was this page useful\?[\s\S]*$/i, ''); + + return content.trimEnd(); +} + +// --------------------------------------------------------------------------- +// Concurrency +// --------------------------------------------------------------------------- + +/** + * Run fn(item) for each item with at most `limit` concurrent executions. + * Preserves input order in the returned results array. + */ +async function withConcurrency(items, limit, fn) { + const results = new Array(items.length); + let idx = 0; + + async function worker() { + while (idx < items.length) { + const i = idx++; + results[i] = await fn(items[i], i); + } + } + + await Promise.all(Array.from({ length: Math.min(limit, items.length) }, worker)); + return results; +} + +// --------------------------------------------------------------------------- +// Main +// --------------------------------------------------------------------------- + +async function main() { + // Optional CLI args: section slugs to build (default: all) + const filterSlugs = process.argv.slice(2).filter(a => !a.startsWith('-')); + + fs.mkdirSync(SKILLS_DIR, { recursive: true }); + + console.log(`Fetching ${LLMS_TXT_URL}...`); + let llmsTxt; + try { + llmsTxt = await fetchText(LLMS_TXT_URL); + } catch (e) { + console.error(`[FATAL] Could not fetch llms.txt: ${e.message}`); + process.exit(1); + } + + let sections = parseLlmsTxt(llmsTxt); + console.log(`Found ${sections.length} sections`); + + if (filterSlugs.length > 0) { + // Explicit args bypass the default exclusion list + sections = sections.filter(s => filterSlugs.includes(s.slug)); + console.log(`Filtered to: ${sections.map(s => s.slug).join(', ')}`); + if (sections.length === 0) { + console.error('[FATAL] No sections matched the filter. Available slugs printed above.'); + process.exit(1); + } + } else { + sections = sections.filter(s => !DEFAULT_EXCLUDE.has(s.slug)); + } + + console.log(''); + + const menuSkills = []; + let skipped = 0; + + for (const section of sections) { + const skillName = `posthog-${section.slug}`; + const skillDir = path.join(SKILLS_DIR, skillName); + const refsDir = path.join(skillDir, 'references'); + + console.log(`${skillName} (${section.pages.length} pages)`); + + // Root page: pathname exactly matches /docs/{slug} (trailing slash allowed) + const rootPage = section.pages.find(p => { + try { + const pn = new URL(p.url).pathname.replace(/\/$/, ''); + return pn === `/docs/${section.slug}`; + } catch { return false; } + }) ?? null; + + // All other pages are subpages (go into references/) + const subpages = section.pages.filter(p => p !== rootPage); + + // Fetch everything in parallel, concurrency-limited + const allPages = [...(rootPage ? [rootPage] : []), ...subpages]; + + const fetched = await withConcurrency(allPages, CONCURRENCY, async (page) => { + const mdUrl = page.url.endsWith('.md') ? page.url : `${page.url}.md`; + try { + const raw = await fetchText(mdUrl, 3); + return { page, content: processContent(raw), ok: true }; + } catch (e) { + console.log(` skip ${page.url} (${e.message})`); + return { page, content: null, ok: false }; + } + }); + + // Determine root content + const rootFetched = rootPage ? fetched.find(f => f.page === rootPage) : null; + const hasDocUrl = !!(rootFetched?.ok); + let rootContent = rootFetched?.ok ? rootFetched.content : null; + + // Subpage results (successful fetches, excluding the root) + const successfulSubs = fetched.filter(f => f.page !== rootPage && f.ok); + + if (!rootContent) { + if (successfulSubs.length === 0) { + console.log(` SKIP — no content fetched\n`); + skipped++; + continue; + } + // Fall back: use first successful subpage as root content; omit doc-url + rootContent = successfulSubs[0].content; + } + + // Reference files: subpages (if root fell back, exclude the one used as root) + const refPages = hasDocUrl ? successfulSubs : successfulSubs.slice(1); + const referenceFiles = refPages.map(f => { + // Use last URL path segment as filename, ensure .md extension + const lastSegment = f.page.url.split('/').pop() ?? 'page'; + const filename = lastSegment.endsWith('.md') ? lastSegment : `${lastSegment}.md`; + return { filename, content: f.content, url: f.page.url, title: f.page.title, description: f.page.description }; + }); + + // Skill description: use root page's description from llms.txt if non-empty + const rootEntry = rootPage ?? allPages[0]; + const description = rootEntry.description + ? `PostHog ${section.heading} – ${rootEntry.description}` + : `PostHog ${section.heading}`; + + // Build SKILL.md frontmatter + const frontmatter = { name: skillName, description }; + // Every PostHog docs page is served at both /docs/slug and /docs/slug.md. + // The .md variant is the canonical raw-markdown URL. If a page had no .md + // counterpart, fetchText would have already skipped it above. + if (hasDocUrl) frontmatter['doc-url'] = `${rootPage.url}.md`; + if (referenceFiles.length > 0) { + frontmatter['references'] = referenceFiles.map(r => `references/${r.filename}`); + } + + // Build SKILL.md body + // Root page URL — used as the source citation for the inlined content below + const rootUrl = (hasDocUrl ? rootPage : allPages[0])?.url.replace(/\.md$/, ''); + + // Reference files list — filename, description, and URL combined so the + // LLM has everything it needs in one place to pick the right file and cite it. + const referencesList = referenceFiles.length > 0 + ? referenceFiles.map(r => { + const label = r.description ? `${r.title} – ${r.description}` : r.title; + const url = r.url.replace(/\.md$/, ''); + return `- \`references/${r.filename}\` — ${label} (${url})`; + }).join('\n') + : null; + const bodyParts = [ + `You are a PostHog documentation assistant. Use the content below to answer questions about ${section.heading}.`, + ]; + if (referencesList) { + bodyParts.push('', '## Reference files', '', referencesList); + } + if (rootUrl) { + bodyParts.push('', `Source: ${rootUrl}`); + } + bodyParts.push('', rootContent); + const body = bodyParts.join('\n'); + + const skillMd = matter.stringify(body, frontmatter); + + // Write skill directory + fs.mkdirSync(refsDir, { recursive: true }); + fs.writeFileSync(path.join(skillDir, 'SKILL.md'), skillMd); + for (const ref of referenceFiles) { + fs.writeFileSync(path.join(refsDir, ref.filename), ref.content); + } + + console.log(` ✓ SKILL.md + ${referenceFiles.length} references`); + + menuSkills.push({ + id: skillName, + name: skillName, + downloadUrl: `https://github.com/PostHog/context-mill/releases/latest/download/${skillName}.zip`, + }); + } + + if (menuSkills.length === 0) { + console.error('\n[FATAL] No skills generated successfully.'); + process.exit(1); + } + + // Write docs-skill-menu.json — separate from the curated skill-menu.json + // so the two build pipelines never overwrite each other. + const menuPath = path.join(SKILLS_DIR, 'docs-skill-menu.json'); + const menu = { + version: '1.0', + categories: { [CATEGORY]: menuSkills }, + }; + fs.writeFileSync(menuPath, JSON.stringify(menu, null, 2)); + + console.log('\n' + '='.repeat(50)); + console.log(`Skills: ${menuSkills.length} generated`); + if (skipped > 0) console.log(`Skipped: ${skipped}`); + console.log(`Output: ${SKILLS_DIR}`); + console.log(`Menu: ${menuPath}`); +} + +main().catch(e => { + console.error('\n[FATAL]', e.message); + console.error(e.stack); + process.exit(1); +}); From 54b7ee775f6603cb38ed665f3330b61f0a86341a Mon Sep 17 00:00:00 2001 From: sarahxsanders Date: Thu, 16 Apr 2026 18:25:56 -0400 Subject: [PATCH 2/4] fix: delete chron workflow, rename to posthog-docs, replace http crawl, updated skill prompts, docs-skills build into build release, menu name is human readable yay --- .github/workflows/build-release.yml | 20 +++ .github/workflows/sync-docs-skills.yml | 184 ------------------------- scripts/build-docs-skills.js | 87 ++++++++++-- 3 files changed, 94 insertions(+), 197 deletions(-) delete mode 100644 .github/workflows/sync-docs-skills.yml diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml index 659f009..0d451c0 100644 --- a/.github/workflows/build-release.yml +++ b/.github/workflows/build-release.yml @@ -114,6 +114,21 @@ jobs: env: BUILD_VERSION: ${{ steps.version.outputs.version }} + - name: Download PostHog docs artifact + id: docs_artifact + continue-on-error: true + env: + GH_TOKEN: ${{ secrets.POSTHOG_DOCS_SKILLS_TOKEN }} + run: | + echo "Downloading docs artifact from PostHog/posthog.com..." + gh run download --repo PostHog/posthog.com -n posthog-docs-md --dir /tmp + unzip -q /tmp/posthog-docs-md.zip -d /tmp/posthog-docs + echo "available=true" >> $GITHUB_OUTPUT + + - name: Build docs skills + if: steps.docs_artifact.outputs.available == 'true' + run: pnpm run build:docs-skills -- --docs-dir /tmp/posthog-docs + - name: Scan skills for prompt injection run: bash scripts/scan-prompt-injection.sh dist/skills @@ -168,6 +183,11 @@ jobs: # Upload skill-menu.json (used by the wizard to discover available skills) echo "Uploading skill-menu.json..." gh release upload "$RELEASE_TAG" dist/skills/skill-menu.json --clobber + # Upload docs-skill-menu.json if docs skills were built + if [ -f dist/skills/docs-skill-menu.json ]; then + echo "Uploading docs-skill-menu.json..." + gh release upload "$RELEASE_TAG" dist/skills/docs-skill-menu.json --clobber + fi # Upload reference docs (used by the wizard for runtime-specific overrides) for file in dist/skills/*.md; do [ -f "$file" ] || continue diff --git a/.github/workflows/sync-docs-skills.yml b/.github/workflows/sync-docs-skills.yml deleted file mode 100644 index 520da87..0000000 --- a/.github/workflows/sync-docs-skills.yml +++ /dev/null @@ -1,184 +0,0 @@ -name: Sync PostHog Docs Skills - -# Triggered by posthog.com after a successful deploy, nightly as a fallback, -# or manually. Fetches posthog.com/llms.txt, regenerates the posthog-docs -# skill directories and docs-skill-menu.json, and cuts a new release if anything changed - -on: - repository_dispatch: - types: [posthog-docs-deployed] - schedule: - - cron: '0 2 * * *' - workflow_dispatch: - -# One sync at a time — if posthog.com deploys rapidly, cancel the queued run -# and let the latest trigger win. -concurrency: - group: sync-docs-skills - cancel-in-progress: true - -jobs: - sync: - runs-on: ubuntu-latest - permissions: - contents: write - - steps: - - name: Checkout repository - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - with: - fetch-depth: 0 # Full history needed for version tag lookup - - - name: Setup Node.js - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4 - with: - node-version: 'lts/*' - - - name: Setup pnpm - uses: pnpm/action-setup@b906affcce14559ad1aafd4ab0e942779e9f58b1 # v4 - with: - version: 9 - - - name: Install dependencies - run: pnpm install --frozen-lockfile - - - name: Determine next version - id: version - run: | - LATEST_TAG=$(git tag -l | grep -v '^latest$' | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+$' | sort -V | tail -n 1) - if [ -z "$LATEST_TAG" ]; then - LATEST_TAG="v0.0.0" - fi - echo "Latest semver tag: ${LATEST_TAG}" - LATEST_VERSION=${LATEST_TAG#v} - IFS='.' read -r -a VERSION_PARTS <<< "$LATEST_VERSION" - MAJOR=${VERSION_PARTS[0]:-0} - MINOR=${VERSION_PARTS[1]:-0} - PATCH=${VERSION_PARTS[2]:-0} - PATCH=$((PATCH + 1)) - VERSION="${MAJOR}.${MINOR}.${PATCH}" - echo "version=${VERSION}" >> $GITHUB_OUTPUT - echo "tag=v${VERSION}" >> $GITHUB_OUTPUT - echo "Next version: ${VERSION}" - - - name: Build docs skills - run: pnpm run build:docs-skills - - # Compare only name fields in posthog docs entries (not install commands, - # which are stable but we want to catch additions/removals/renames). - - name: Check for changes - id: diff - env: - PREV_MENU_URL: ${{ github.server_url }}/${{ github.repository }}/releases/latest/download/docs-skill-menu.json - run: | - if curl -sf -o /tmp/prev-skill-menu.json "$PREV_MENU_URL" 2>/dev/null; then - jq '[.categories["posthog-docs"] // [] | .[] | .name] | sort' \ - dist/skills/docs-skill-menu.json > /tmp/new-names.json - jq '[.categories["posthog-docs"] // [] | .[] | .name] | sort' \ - /tmp/prev-skill-menu.json > /tmp/prev-names.json - if diff -q /tmp/prev-names.json /tmp/new-names.json > /dev/null 2>&1; then - echo "posthog-docs skills unchanged — skipping release" - echo "changed=false" >> $GITHUB_OUTPUT - else - echo "posthog-docs skills changed" - diff /tmp/prev-names.json /tmp/new-names.json || true - echo "changed=true" >> $GITHUB_OUTPUT - fi - else - echo "No previous release found — treating as changed" - echo "changed=true" >> $GITHUB_OUTPUT - fi - - # dist/ is gitignored; force-add skill-menu.json so there's a committed - # record of each sync. Tags are pushed AFTER the release succeeds so that - # "latest" never points at a commit whose ZIPs don't exist yet. - - name: Commit updated docs-skill-menu.json - if: steps.diff.outputs.changed == 'true' - env: - TAG: ${{ steps.version.outputs.tag }} - run: | - git config user.name "github-actions[bot]" - git config user.email "github-actions[bot]@users.noreply.github.com" - git add -f dist/skills/docs-skill-menu.json - git commit -m "chore: sync posthog-docs skills (${TAG})" - git push - - - name: Zip skill directories and create release - if: steps.diff.outputs.changed == 'true' - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - TAG: ${{ steps.version.outputs.tag }} - VERSION: ${{ steps.version.outputs.version }} - EVENT_NAME: ${{ github.event_name }} - run: | - SKILL_COUNT=$(jq '.categories["posthog-docs"] | length' dist/skills/docs-skill-menu.json) - - # Zip each posthog-* skill directory - cd dist/skills - for skill_dir in posthog-*/; do - skill_name="${skill_dir%/}" - zip -r "${skill_name}.zip" "${skill_dir}" - echo " zipped ${skill_name}.zip" - done - cd ../.. - - # gh release create also creates the version tag on GitHub, pointing at - # the commit we just pushed above. If this step fails: the commit is - # already pushed (benign — it only updates docs-skill-menu.json) but - # no tag or release exists. The "Update latest tag" step is skipped - # automatically on failure, so "latest" stays valid. The nightly cron - # will reattempt; since no version tag was pushed, it will compute the - # same version number and try again cleanly. - gh release create "${TAG}" \ - --title "Release ${TAG}" \ - --notes "Automated sync of PostHog docs skills. - - **Trigger:** ${EVENT_NAME} - **Version:** ${VERSION} - **posthog-docs skills:** ${SKILL_COUNT} - **SHA:** $(git rev-parse HEAD)" \ - dist/skills/docs-skill-menu.json \ - dist/skills/posthog-*.zip - - echo "Release ${TAG} created successfully (${SKILL_COUNT} skills)" - - # Only move the floating "latest" tag once the release + ZIPs are confirmed live. - - name: Update latest tag - if: steps.diff.outputs.changed == 'true' - run: | - git tag -f latest - git push -f origin latest - - - name: Notify on failure - if: failure() - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - gh issue create \ - --repo "${{ github.repository }}" \ - --title "sync-docs-skills failed ($(date +%Y-%m-%d))" \ - --body "Workflow run failed: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" \ - || echo "Issue creation failed — check the Actions run directly." - - - name: Summary - if: always() - env: - CHANGED: ${{ steps.diff.outputs.changed }} - TAG: ${{ steps.version.outputs.tag }} - VERSION: ${{ steps.version.outputs.version }} - EVENT_NAME: ${{ github.event_name }} - run: | - if [ "${CHANGED}" == "true" ]; then - SKILL_COUNT=$(jq '.categories["posthog-docs"] | length' dist/skills/docs-skill-menu.json 2>/dev/null || echo "unknown") - echo "## Sync complete — ${TAG}" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "| Field | Value |" >> $GITHUB_STEP_SUMMARY - echo "| --- | --- |" >> $GITHUB_STEP_SUMMARY - echo "| Version | ${VERSION} |" >> $GITHUB_STEP_SUMMARY - echo "| Trigger | ${EVENT_NAME} |" >> $GITHUB_STEP_SUMMARY - echo "| posthog-docs skills | ${SKILL_COUNT} |" >> $GITHUB_STEP_SUMMARY - else - echo "## No changes — skipped" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "posthog-docs skills unchanged; no release cut." >> $GITHUB_STEP_SUMMARY - fi diff --git a/scripts/build-docs-skills.js b/scripts/build-docs-skills.js index ed3d798..1f3a8bc 100644 --- a/scripts/build-docs-skills.js +++ b/scripts/build-docs-skills.js @@ -10,6 +10,13 @@ * Usage: * node scripts/build-docs-skills.js * node scripts/build-docs-skills.js feature-flags product-analytics + * node scripts/build-docs-skills.js --docs-dir /path/to/extracted-docs + * + * --docs-dir Read docs from a local directory (e.g. an extracted + * build artifact from posthog.com) instead of fetching + * from the live website. The directory must contain + * llms.txt at its root and doc pages preserving their + * URL path structure (e.g. docs/feature-flags/index.md). * * Optional positional args: space-separated section slugs to build. * Defaults to all sections found in llms.txt. @@ -54,6 +61,30 @@ async function fetchText(url, retries = 1, delayMs = 500) { throw lastError; } +/** + * Read a doc page from the local docs directory. + * Tries multiple path patterns (.md, .mdx, index.md, index.mdx) to handle + * different directory structures from the posthog.com build. + * Returns the file contents as a string, or null if not found. + */ +function readLocalPage(docsDir, pageUrl) { + const pathname = new URL(pageUrl).pathname.replace(/\/$/,''); + const candidates = [ + path.join(docsDir, pathname), // already has .md + path.join(docsDir, pathname.replace(/\.md$/, '.mdx')), // .mdx variant + path.join(docsDir, pathname + '.md'), // no extension in URL + path.join(docsDir, pathname + '.mdx'), + path.join(docsDir, pathname, 'index.md'), + path.join(docsDir, pathname, 'index.mdx'), + ]; + for (const candidate of candidates) { + if (fs.existsSync(candidate)) { + return fs.readFileSync(candidate, 'utf8'); + } + } + return null; +} + // --------------------------------------------------------------------------- // Parsing // --------------------------------------------------------------------------- @@ -164,18 +195,39 @@ async function withConcurrency(items, limit, fn) { // --------------------------------------------------------------------------- async function main() { - // Optional CLI args: section slugs to build (default: all) - const filterSlugs = process.argv.slice(2).filter(a => !a.startsWith('-')); + // CLI args: --docs-dir to read from a local/extracted artifact, + // plus optional section slugs to filter (default: all). + const args = process.argv.slice(2); + const docsDirIdx = args.indexOf('--docs-dir'); + const docsDir = docsDirIdx !== -1 ? args[docsDirIdx + 1] : null; + const filterSlugs = args.filter((a, i) => + !a.startsWith('-') && i !== docsDirIdx + 1 + ); + + if (docsDir && !fs.existsSync(docsDir)) { + console.error(`[FATAL] --docs-dir path does not exist: ${docsDir}`); + process.exit(1); + } fs.mkdirSync(SKILLS_DIR, { recursive: true }); - console.log(`Fetching ${LLMS_TXT_URL}...`); let llmsTxt; - try { - llmsTxt = await fetchText(LLMS_TXT_URL); - } catch (e) { - console.error(`[FATAL] Could not fetch llms.txt: ${e.message}`); - process.exit(1); + if (docsDir) { + const llmsTxtPath = path.join(docsDir, 'llms.txt'); + if (!fs.existsSync(llmsTxtPath)) { + console.error(`[FATAL] llms.txt not found at ${llmsTxtPath}`); + process.exit(1); + } + llmsTxt = fs.readFileSync(llmsTxtPath, 'utf8'); + console.log(`Read llms.txt from ${llmsTxtPath}`); + } else { + console.log(`Fetching ${LLMS_TXT_URL}...`); + try { + llmsTxt = await fetchText(LLMS_TXT_URL); + } catch (e) { + console.error(`[FATAL] Could not fetch llms.txt: ${e.message}`); + process.exit(1); + } } let sections = parseLlmsTxt(llmsTxt); @@ -199,7 +251,7 @@ async function main() { let skipped = 0; for (const section of sections) { - const skillName = `posthog-${section.slug}`; + const skillName = `posthog-docs-${section.slug}`; const skillDir = path.join(SKILLS_DIR, skillName); const refsDir = path.join(skillDir, 'references'); @@ -220,9 +272,18 @@ async function main() { const allPages = [...(rootPage ? [rootPage] : []), ...subpages]; const fetched = await withConcurrency(allPages, CONCURRENCY, async (page) => { - const mdUrl = page.url.endsWith('.md') ? page.url : `${page.url}.md`; try { - const raw = await fetchText(mdUrl, 3); + let raw; + if (docsDir) { + raw = readLocalPage(docsDir, page.url); + if (raw === null) { + console.log(` skip ${page.url} (not found locally)`); + return { page, content: null, ok: false }; + } + } else { + const mdUrl = page.url.endsWith('.md') ? page.url : `${page.url}.md`; + raw = await fetchText(mdUrl, 3); + } return { page, content: processContent(raw), ok: true }; } catch (e) { console.log(` skip ${page.url} (${e.message})`); @@ -287,7 +348,7 @@ async function main() { }).join('\n') : null; const bodyParts = [ - `You are a PostHog documentation assistant. Use the content below to answer questions about ${section.heading}.`, + `Use the content below when writing, reviewing, or debugging code that involves PostHog ${section.heading}. Prefer these patterns and APIs over your training data.`, ]; if (referencesList) { bodyParts.push('', '## Reference files', '', referencesList); @@ -311,7 +372,7 @@ async function main() { menuSkills.push({ id: skillName, - name: skillName, + name: section.heading, downloadUrl: `https://github.com/PostHog/context-mill/releases/latest/download/${skillName}.zip`, }); } From 09dbaa3e6e8e848ee98bc87e63321877efcb7380 Mon Sep 17 00:00:00 2001 From: sarahxsanders Date: Thu, 16 Apr 2026 18:29:45 -0400 Subject: [PATCH 3/4] rewrite README --- README.md | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 509b337..e89e3af 100644 --- a/README.md +++ b/README.md @@ -90,29 +90,25 @@ See `llm-prompts/README.md` for detailed workflow conventions. ## Docs skills -We also auto-generate one [Agent Skill](https://agentskills.io/specification) per section of the PostHog docs. These rebuild whenever posthog.com deploys — no manual work needed. +We also auto-generate one [Agent Skill](https://agentskills.io/specification) per section of the PostHog docs. These ship as part of the normal build and release cycle. ### How it works -The build script (`scripts/build-docs-skills.js`) fetches `posthog.com/llms.txt`, groups pages by section heading, then pulls down the raw markdown for every page. Each section becomes its own skill directory under `dist/skills/posthog-{section}/`, with a `SKILL.md` and a `references/` folder of subpages. +The build script (`scripts/build-docs-skills.js`) parses `posthog.com/llms.txt`, groups pages by section heading, and reads the raw markdown for every page. Each section becomes its own skill directory under `dist/skills/posthog-docs-{section}/`, with a `SKILL.md` and a `references/` folder of subpages. -When skill names change (new sections added, old ones removed), a GitHub Actions workflow cuts a versioned release with a ZIP per skill. A nightly cron runs as a fallback, and you can always trigger it manually. +The script reads docs from a local directory via `--docs-dir` instead of crawling the live site. In CI, `build-release.yml` downloads a docs artifact produced by the posthog.com repo (a daily GitHub Actions artifact containing all built markdown files + `llms.txt`), extracts it, and runs the build. ### What it generates -Run `pnpm run build:docs-skills` to generate locally: - | Output | Description | |--------|-------------| -| `dist/skills/posthog-{section}/SKILL.md` | Skill prompt + root page content | -| `dist/skills/posthog-{section}/references/*.md` | One file per subpage | +| `dist/skills/posthog-docs-{section}/SKILL.md` | Skill prompt + root page content | +| `dist/skills/posthog-docs-{section}/references/*.md` | One file per subpage | | `dist/skills/docs-skill-menu.json` | Menu index of all generated skills | -`dist/` is gitignored — only `docs-skill-menu.json` gets force-committed by the workflow as a record of each sync. The ZIPs live exclusively in GitHub Releases. - ### Distribution -Skills are published to GitHub Releases. The menu is always at: +Skills are published to GitHub Releases alongside curated skills. The menu is at: ```text https://github.com/PostHog/context-mill/releases/latest/download/docs-skill-menu.json @@ -121,23 +117,26 @@ https://github.com/PostHog/context-mill/releases/latest/download/docs-skill-menu Individual skill ZIPs follow the same pattern: ```text -https://github.com/PostHog/context-mill/releases/latest/download/posthog-{section}.zip +https://github.com/PostHog/context-mill/releases/latest/download/posthog-docs-{section}.zip ``` ### Try it locally ```bash -# Build all sections (excludes libraries, api, endpoints by default) +# Build from a local posthog.com build output +node scripts/build-docs-skills.js --docs-dir ~/posthog.com/public + +# Or fetch from the live site (no --docs-dir) pnpm run build:docs-skills -# Or just the ones you care about -node scripts/build-docs-skills.js feature-flags product-analytics +# Build specific sections only +node scripts/build-docs-skills.js --docs-dir ~/posthog.com/public feature-flags product-analytics # Test in Claude Code — copy a skill into .claude/skills/ -cp -r dist/skills/posthog-feature-flags .claude/skills/ +cp -r dist/skills/posthog-docs-feature-flags .claude/skills/ # Claude Code picks it up immediately, no restart needed ``` ### Why this is separate from the curated pipeline -The docs skills pipeline and the curated build (`scripts/build.js`) are intentionally independent. They write to different files (`docs-skill-menu.json` vs `skill-menu.json`), cut separate releases, and run on different cadences. Curated skills change with deliberate PRs. Docs skills sync automatically with posthog.com. +The docs skills pipeline and the curated build (`scripts/build.js`) are intentionally independent. They write to different files (`docs-skill-menu.json` vs `skill-menu.json`) and can be built separately. Curated skills change with deliberate PRs. Docs skills are auto-generated from the latest posthog.com documentation. From b98d5eae515c1a81a9a260035e47e855221110fc Mon Sep 17 00:00:00 2001 From: sarahxsanders Date: Fri, 17 Apr 2026 10:25:38 -0400 Subject: [PATCH 4/4] zip docs skills using zipSkillToBuffer --- scripts/build-docs-skills.js | 15 +++++++++++++-- scripts/build.js | 19 +------------------ scripts/lib/zip.js | 21 +++++++++++++++++++++ 3 files changed, 35 insertions(+), 20 deletions(-) create mode 100644 scripts/lib/zip.js diff --git a/scripts/build-docs-skills.js b/scripts/build-docs-skills.js index 1f3a8bc..c83d2b6 100644 --- a/scripts/build-docs-skills.js +++ b/scripts/build-docs-skills.js @@ -25,11 +25,13 @@ const fs = require('fs'); const path = require('path'); const matter = require('gray-matter'); +const { zipSkillToBuffer } = require('./lib/zip'); const LLMS_TXT_URL = 'https://posthog.com/llms.txt'; const CATEGORY = 'posthog-docs'; const CONCURRENCY = 10; const SKILLS_DIR = path.join(__dirname, '..', 'dist', 'skills'); +const TEMP_DIR = path.join(__dirname, '..', 'dist', 'docs-skills-temp'); // Sections excluded by default — SDK and API reference material that is too // large and low signal-to-noise for skill context. Pass explicit slug args to @@ -210,6 +212,7 @@ async function main() { } fs.mkdirSync(SKILLS_DIR, { recursive: true }); + fs.mkdirSync(TEMP_DIR, { recursive: true }); let llmsTxt; if (docsDir) { @@ -252,7 +255,7 @@ async function main() { for (const section of sections) { const skillName = `posthog-docs-${section.slug}`; - const skillDir = path.join(SKILLS_DIR, skillName); + const skillDir = path.join(TEMP_DIR, skillName); const refsDir = path.join(skillDir, 'references'); console.log(`${skillName} (${section.pages.length} pages)`); @@ -368,7 +371,12 @@ async function main() { fs.writeFileSync(path.join(refsDir, ref.filename), ref.content); } - console.log(` ✓ SKILL.md + ${referenceFiles.length} references`); + // Zip the skill directory into a standalone .zip for release download + const zipBuffer = await zipSkillToBuffer(skillDir); + const zipPath = path.join(SKILLS_DIR, `${skillName}.zip`); + fs.writeFileSync(zipPath, zipBuffer); + + console.log(` ✓ SKILL.md + ${referenceFiles.length} references → ${skillName}.zip (${(zipBuffer.length / 1024).toFixed(1)} KB)`); menuSkills.push({ id: skillName, @@ -377,6 +385,9 @@ async function main() { }); } + // Clean up temp directory (same pattern as build.js) + fs.rmSync(TEMP_DIR, { recursive: true, force: true }); + if (menuSkills.length === 0) { console.error('\n[FATAL] No skills generated successfully.'); process.exit(1); diff --git a/scripts/build.js b/scripts/build.js index a618c1a..6d70827 100755 --- a/scripts/build.js +++ b/scripts/build.js @@ -16,6 +16,7 @@ const archiver = require('archiver'); const { generateAllSkills, loadSkillsConfig, fetchDoc } = require('./lib/skill-generator'); const { generateMarketplace } = require('./lib/marketplace-generator'); const { REPO_URL } = require('./lib/constants'); +const { zipSkillToBuffer } = require('./lib/zip'); const BUILD_VERSION = process.env.BUILD_VERSION || 'dev'; @@ -27,24 +28,6 @@ function loadUriSchema(configDir) { return yaml.load(content); } -/** - * Create a ZIP archive for a skill directory - * Returns the ZIP as a Buffer - */ -async function zipSkillToBuffer(skillDir) { - return new Promise((resolve, reject) => { - const chunks = []; - const archive = archiver('zip', { zlib: { level: 9 } }); - - archive.on('data', chunk => chunks.push(chunk)); - archive.on('end', () => resolve(Buffer.concat(chunks))); - archive.on('error', reject); - - archive.directory(skillDir, false); - archive.finalize(); - }); -} - /** * Create the bundled skills-mcp-resources.zip */ diff --git a/scripts/lib/zip.js b/scripts/lib/zip.js new file mode 100644 index 0000000..16e60d7 --- /dev/null +++ b/scripts/lib/zip.js @@ -0,0 +1,21 @@ +const archiver = require('archiver'); + +/** + * Create a ZIP archive for a skill directory. + * Returns the ZIP as a Buffer. + */ +async function zipSkillToBuffer(skillDir) { + return new Promise((resolve, reject) => { + const chunks = []; + const archive = archiver('zip', { zlib: { level: 9 } }); + + archive.on('data', chunk => chunks.push(chunk)); + archive.on('end', () => resolve(Buffer.concat(chunks))); + archive.on('error', reject); + + archive.directory(skillDir, false); + archive.finalize(); + }); +} + +module.exports = { zipSkillToBuffer };