From 75d7c06d581d20c0704f3806783f39f6a6a11d4b Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 17 Mar 2026 14:49:33 +0000 Subject: [PATCH] feat: add LLM-powered wiki documentation generation via Claude API Adds a new CI workflow and Python script that uses the Claude API to generate comprehensive wiki pages (Architecture, Getting Started, Examples, MEX Acceleration, Performance) from the full repo source. Triggered on push to main when source/examples/docs change, or manually. Requires ANTHROPIC_API_KEY secret to be configured in the repository. https://claude.ai/code/session_0122rv5TcbfDRgWhezmTemfi --- .github/workflows/generate-llm-docs.yml | 56 ++++ scripts/generate_llm_docs.py | 364 ++++++++++++++++++++++++ 2 files changed, 420 insertions(+) create mode 100644 .github/workflows/generate-llm-docs.yml create mode 100644 scripts/generate_llm_docs.py diff --git a/.github/workflows/generate-llm-docs.yml b/.github/workflows/generate-llm-docs.yml new file mode 100644 index 00000000..58c3a84e --- /dev/null +++ b/.github/workflows/generate-llm-docs.yml @@ -0,0 +1,56 @@ +name: Generate Wiki Docs (LLM) + +on: + push: + branches: [main] + paths: + - 'libs/**/*.m' + - 'examples/**/*.m' + - 'docs/**/*.md' + - 'README.md' + - 'scripts/generate_llm_docs.py' + workflow_dispatch: + +permissions: + contents: write + +jobs: + generate: + name: Generate Wiki Documentation + runs-on: ubuntu-latest + steps: + - name: Checkout main repo + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install dependencies + run: pip install anthropic + + - name: Clone wiki repo + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + git clone "https://x-access-token:${GITHUB_TOKEN}@github.com/HanSur94/FastPlot.wiki.git" wiki + + - name: Generate LLM docs + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + run: python3 scripts/generate_llm_docs.py + + - name: Push updated wiki + run: | + cd wiki + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add -A + if git diff --cached --quiet; then + echo "No documentation changes" + else + git commit -m "docs: auto-update wiki pages via LLM from source code" + git push + echo "Wiki LLM docs updated" + fi diff --git a/scripts/generate_llm_docs.py b/scripts/generate_llm_docs.py new file mode 100644 index 00000000..7c9f226f --- /dev/null +++ b/scripts/generate_llm_docs.py @@ -0,0 +1,364 @@ +#!/usr/bin/env python3 +"""Generate comprehensive wiki documentation using Claude API. + +Reads source files, examples, and existing docs, then uses the Claude API +to produce rich wiki pages covering architecture, getting started, examples, +MEX acceleration, and performance. + +Usage: + ANTHROPIC_API_KEY=sk-... python3 scripts/generate_llm_docs.py + +Environment variables: + ANTHROPIC_API_KEY — required, Anthropic API key + WIKI_DIR — optional, override wiki output directory (default: wiki/) + MODEL — optional, Claude model to use (default: claude-sonnet-4-20250514) +""" + +import os +import re +import sys +import json +import time +from pathlib import Path +from typing import Optional + +try: + import anthropic +except ImportError: + print("ERROR: anthropic package not installed. Run: pip install anthropic", file=sys.stderr) + sys.exit(1) + +# --------------------------------------------------------------------------- +# Configuration +# --------------------------------------------------------------------------- + +SCRIPT_DIR = Path(__file__).resolve().parent +PROJECT_ROOT = SCRIPT_DIR.parent +LIBS_DIR = PROJECT_ROOT / "libs" +EXAMPLES_DIR = PROJECT_ROOT / "examples" +DOCS_DIR = PROJECT_ROOT / "docs" +WIKI_DIR = Path(os.environ.get("WIKI_DIR", PROJECT_ROOT / "wiki")) + +MODEL = os.environ.get("MODEL", "claude-sonnet-4-20250514") +MAX_TOKENS = 8192 + +AUTO_NOTICE = ( + "\n\n" +) + +# --------------------------------------------------------------------------- +# File collection helpers +# --------------------------------------------------------------------------- + +def read_file(path: Path, max_lines: int = 500) -> str: + """Read a file, truncating to max_lines if needed.""" + try: + with open(path, "r", encoding="utf-8", errors="replace") as f: + lines = f.readlines() + if len(lines) > max_lines: + lines = lines[:max_lines] + [f"\n... (truncated, {len(lines)} lines total)\n"] + return "".join(lines) + except OSError: + return "" + + +def collect_source_files(lib_dir: Path) -> dict[str, str]: + """Collect all .m files from a library directory (non-recursive, skip private/).""" + files = {} + if not lib_dir.is_dir(): + return files + for mfile in sorted(lib_dir.glob("*.m")): + rel = mfile.relative_to(PROJECT_ROOT) + files[str(rel)] = read_file(mfile) + return files + + +def collect_examples() -> dict[str, str]: + """Collect example files (first 100 lines each for context).""" + files = {} + if not EXAMPLES_DIR.is_dir(): + return files + for mfile in sorted(EXAMPLES_DIR.glob("*.m")): + rel = mfile.relative_to(PROJECT_ROOT) + files[str(rel)] = read_file(mfile, max_lines=100) + return files + + +def collect_all_sources() -> dict[str, str]: + """Collect all relevant source files.""" + sources = {} + for lib_name in ["FastPlot", "Dashboard", "SensorThreshold", "EventDetection", "WebBridge"]: + sources.update(collect_source_files(LIBS_DIR / lib_name)) + + # Include README + readme = PROJECT_ROOT / "README.md" + if readme.exists(): + sources["README.md"] = read_file(readme) + + # Include setup.m + setup = PROJECT_ROOT / "setup.m" + if setup.exists(): + sources["setup.m"] = read_file(setup) + + # Include design docs + for md in sorted(DOCS_DIR.rglob("*.md")): + rel = md.relative_to(PROJECT_ROOT) + sources[str(rel)] = read_file(md, max_lines=300) + + return sources + + +def build_source_context(sources: dict[str, str], max_chars: int = 180000) -> str: + """Build a source context string, respecting a character budget.""" + parts = [] + total = 0 + for path, content in sorted(sources.items()): + entry = f"=== {path} ===\n{content}\n" + if total + len(entry) > max_chars: + parts.append(f"\n... (budget reached, {len(sources) - len(parts)} files omitted)\n") + break + parts.append(entry) + total += len(entry) + return "".join(parts) + + +# --------------------------------------------------------------------------- +# Wiki page definitions +# --------------------------------------------------------------------------- + +PAGES = [ + { + "filename": "Architecture.md", + "title": "Architecture", + "prompt": """\ +Write a comprehensive **Architecture** wiki page for the FastPlot project. + +Cover these topics with clear explanations and diagrams (using markdown/ASCII): +1. **High-level overview** — what FastPlot is and the main components +2. **Render pipeline** — how data flows from addLine() through downsampling to screen +3. **Downsampling engine** — MinMax vs LTTB, pyramid caching, when each is used +4. **MEX acceleration** — how optional C/SIMD kernels integrate with pure MATLAB fallback +5. **Sensor & threshold system** — Sensor, StateChannel, ThresholdRule, violation computation +6. **Event detection pipeline** — EventDetector, IncrementalEventDetector, LiveEventPipeline +7. **Dashboard engine** — DashboardEngine, widgets, layout, serialization +8. **Data storage** — FastPlotDataStore (SQLite), disk-backed queries +9. **Linked axes** — GroupID-based zoom/pan synchronization +10. **WebBridge** — TCP protocol for web visualization + +Use code snippets from the actual source to illustrate key patterns. Write for +developers who want to understand the internals or contribute.""", + }, + { + "filename": "Getting-Started.md", + "title": "Getting Started", + "prompt": """\ +Write a **Getting Started** tutorial wiki page for FastPlot. + +Structure it as a progressive tutorial: +1. **Installation** — git clone, setup.m, requirements (MATLAB R2020b+ / Octave 7+) +2. **Your first plot** — basic example with addLine, render +3. **Adding thresholds** — upper/lower thresholds with violation markers +4. **Multiple lines & linked zoom** — addLine multiple times, GroupID linking +5. **Themes** — switching between dark, light, industrial, scientific, ocean +6. **Datetime axes** — using datenum or datetime on the x-axis +7. **Live mode** — file polling with auto-refresh +8. **Dashboard basics** — DashboardBuilder quick example +9. **Sensor system** — creating Sensors with StateChannels and ThresholdRules +10. **Next steps** — links to other wiki pages (Architecture, API Reference, Examples) + +Use realistic code examples derived from the actual example files. Each section +should have a complete, runnable code snippet. Keep explanations concise but helpful +for someone new to the library.""", + }, + { + "filename": "Examples.md", + "title": "Examples", + "prompt": """\ +Write an **Examples** wiki page that serves as a categorized guide to all example +scripts in the examples/ directory. + +Organize examples into categories: +1. **Basic Plotting** — simple plots, ECG, 100M points +2. **Thresholds & Bands** — alarm bands, dynamic thresholds +3. **Multiple Plots** — multi-line, linked axes, mixed tiles +4. **Dashboards** — dashboard engine, 9-tile, all widgets, live dashboard +5. **Docking** — tabbed dock, many tabs, disk-backed dock +6. **Sensors** — sensor detail, sensor dashboard, multi-sensor linked +7. **Event Detection** — event viewer, live pipeline +8. **Datetime** — datetime axis formatting +9. **Live Mode** — file polling auto-refresh +10. **Disk Storage** — SQLite-backed 100M+ datasets +11. **Advanced** — NaN gaps, LTTB vs MinMax comparison, navigator overlay + +For each example, provide: +- Script name (as a link if possible) +- One-line description of what it demonstrates +- Key code snippet or feature highlight + +Base descriptions on the actual source code of the examples.""", + }, + { + "filename": "MEX-Acceleration.md", + "title": "MEX Acceleration", + "prompt": """\ +Write a **MEX Acceleration** wiki page for FastPlot. + +Cover: +1. **Overview** — what MEX is and why FastPlot uses it for performance +2. **SIMD kernels** — AVX2 (x86) and NEON (ARM) implementations +3. **Compilation** — how setup.m / build_mex.m compiles the MEX files +4. **Fallback mechanism** — how FastPlot auto-detects and falls back to pure MATLAB +5. **Performance comparison** — MEX vs pure MATLAB benchmarks +6. **Supported operations** — which downsampling operations are MEX-accelerated +7. **Troubleshooting** — common compilation issues and how to resolve them +8. **Adding custom MEX** — guide for contributors wanting to add new MEX kernels + +Use actual code from build_mex.m and the C source files where relevant. +Include practical tips for users who may not have a C compiler.""", + }, + { + "filename": "Performance.md", + "title": "Performance", + "prompt": """\ +Write a **Performance** wiki page for FastPlot. + +Cover: +1. **Key metrics** — render time, FPS, point reduction, memory usage +2. **Benchmark methodology** — how benchmarks are run (reference benchmarks/ directory) +3. **Scaling behavior** — how FastPlot performs from 1K to 100M+ data points +4. **Downsampling efficiency** — MinMax vs LTTB speed and quality trade-offs +5. **Memory optimization** — GPU memory comparison with standard plot() +6. **Disk-backed performance** — SQLite DataStore query times for large datasets +7. **MEX vs pure MATLAB** — speedup factors for each operation +8. **Tips for maximum performance** — best practices for users +9. **Comparison with alternatives** — why constant-time rendering regardless of dataset size + +Reference actual benchmark numbers from README and any benchmark scripts. +Present data in tables where appropriate.""", + }, +] + + +# --------------------------------------------------------------------------- +# Claude API interaction +# --------------------------------------------------------------------------- + +def call_claude(client: anthropic.Anthropic, system_prompt: str, user_prompt: str) -> str: + """Call Claude API and return the response text.""" + for attempt in range(3): + try: + response = client.messages.create( + model=MODEL, + max_tokens=MAX_TOKENS, + system=system_prompt, + messages=[{"role": "user", "content": user_prompt}], + ) + return response.content[0].text + except anthropic.RateLimitError: + wait = 2 ** (attempt + 1) + print(f" Rate limited, waiting {wait}s...") + time.sleep(wait) + except anthropic.APIError as e: + if attempt < 2: + wait = 2 ** (attempt + 1) + print(f" API error: {e}, retrying in {wait}s...") + time.sleep(wait) + else: + raise + raise RuntimeError("Failed after 3 retries") + + +def generate_page(client: anthropic.Anthropic, page: dict, source_context: str, + example_context: str) -> str: + """Generate a single wiki page using Claude.""" + system_prompt = """\ +You are a technical documentation writer for FastPlot, an ultra-fast time series \ +plotting library for MATLAB and GNU Octave. You write clear, accurate, well-structured \ +GitHub Wiki pages in Markdown. + +Rules: +- Use accurate information from the provided source code — do not invent APIs or features +- Use GitHub-flavored Markdown with proper headings, code blocks, and tables +- MATLAB code blocks should use ```matlab fencing +- Keep explanations concise but thorough +- Do not include the page title as an H1 — it will be added automatically +- Do not include any auto-generated notice — it will be added automatically +- Reference actual class names, method signatures, and property names from the source +- Link to other wiki pages where relevant using [[Page Name]] syntax""" + + user_prompt = f"""{page['prompt']} + +## Source Code Reference + +{source_context} + +## Example Scripts Reference + +{example_context}""" + + return call_claude(client, system_prompt, user_prompt) + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main(): + api_key = os.environ.get("ANTHROPIC_API_KEY") + if not api_key: + print("ERROR: ANTHROPIC_API_KEY environment variable is required", file=sys.stderr) + sys.exit(1) + + print(f"FastPlot LLM Documentation Generator") + print(f"Project root: {PROJECT_ROOT}") + print(f"Wiki dir: {WIKI_DIR}") + print(f"Model: {MODEL}") + print() + + # Collect source files + print("Collecting source files...") + sources = collect_all_sources() + print(f" Found {len(sources)} source files") + + print("Collecting examples...") + examples = collect_examples() + print(f" Found {len(examples)} example files") + + source_context = build_source_context(sources) + example_context = build_source_context(examples, max_chars=60000) + print(f" Source context: {len(source_context):,} chars") + print(f" Example context: {len(example_context):,} chars") + print() + + # Initialize client + client = anthropic.Anthropic(api_key=api_key) + + # Ensure wiki directory exists + WIKI_DIR.mkdir(parents=True, exist_ok=True) + + # Generate each page + generated = [] + for page in PAGES: + filename = page["filename"] + print(f"Generating {filename}...") + + content = generate_page(client, page, source_context, example_context) + + # Write output + outpath = WIKI_DIR / filename + full_content = AUTO_NOTICE + f"# {page['title']}\n\n" + content + "\n" + with open(outpath, "w", encoding="utf-8") as f: + f.write(full_content) + + print(f" -> Wrote {outpath.relative_to(PROJECT_ROOT)} ({len(content):,} chars)") + generated.append(filename) + + print() + print(f"Done. Generated {len(generated)} wiki pages:") + for f in generated: + print(f" - {f}") + + +if __name__ == "__main__": + main()