diff --git a/.github/workflows/generate-llm-docs.yml b/.github/workflows/generate-llm-docs.yml new file mode 100644 index 00000000..58c3a84e --- /dev/null +++ b/.github/workflows/generate-llm-docs.yml @@ -0,0 +1,56 @@ +name: Generate Wiki Docs (LLM) + +on: + push: + branches: [main] + paths: + - 'libs/**/*.m' + - 'examples/**/*.m' + - 'docs/**/*.md' + - 'README.md' + - 'scripts/generate_llm_docs.py' + workflow_dispatch: + +permissions: + contents: write + +jobs: + generate: + name: Generate Wiki Documentation + runs-on: ubuntu-latest + steps: + - name: Checkout main repo + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install dependencies + run: pip install anthropic + + - name: Clone wiki repo + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + git clone "https://x-access-token:${GITHUB_TOKEN}@github.com/HanSur94/FastPlot.wiki.git" wiki + + - name: Generate LLM docs + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + run: python3 scripts/generate_llm_docs.py + + - name: Push updated wiki + run: | + cd wiki + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add -A + if git diff --cached --quiet; then + echo "No documentation changes" + else + git commit -m "docs: auto-update wiki pages via LLM from source code" + git push + echo "Wiki LLM docs updated" + fi diff --git a/scripts/generate_llm_docs.py b/scripts/generate_llm_docs.py new file mode 100644 index 00000000..7c9f226f --- /dev/null +++ b/scripts/generate_llm_docs.py @@ -0,0 +1,364 @@ +#!/usr/bin/env python3 +"""Generate comprehensive wiki documentation using Claude API. + +Reads source files, examples, and existing docs, then uses the Claude API +to produce rich wiki pages covering architecture, getting started, examples, +MEX acceleration, and performance. + +Usage: + ANTHROPIC_API_KEY=sk-... python3 scripts/generate_llm_docs.py + +Environment variables: + ANTHROPIC_API_KEY — required, Anthropic API key + WIKI_DIR — optional, override wiki output directory (default: wiki/) + MODEL — optional, Claude model to use (default: claude-sonnet-4-20250514) +""" + +import os +import re +import sys +import json +import time +from pathlib import Path +from typing import Optional + +try: + import anthropic +except ImportError: + print("ERROR: anthropic package not installed. Run: pip install anthropic", file=sys.stderr) + sys.exit(1) + +# --------------------------------------------------------------------------- +# Configuration +# --------------------------------------------------------------------------- + +SCRIPT_DIR = Path(__file__).resolve().parent +PROJECT_ROOT = SCRIPT_DIR.parent +LIBS_DIR = PROJECT_ROOT / "libs" +EXAMPLES_DIR = PROJECT_ROOT / "examples" +DOCS_DIR = PROJECT_ROOT / "docs" +WIKI_DIR = Path(os.environ.get("WIKI_DIR", PROJECT_ROOT / "wiki")) + +MODEL = os.environ.get("MODEL", "claude-sonnet-4-20250514") +MAX_TOKENS = 8192 + +AUTO_NOTICE = ( + "\n\n" +) + +# --------------------------------------------------------------------------- +# File collection helpers +# --------------------------------------------------------------------------- + +def read_file(path: Path, max_lines: int = 500) -> str: + """Read a file, truncating to max_lines if needed.""" + try: + with open(path, "r", encoding="utf-8", errors="replace") as f: + lines = f.readlines() + if len(lines) > max_lines: + lines = lines[:max_lines] + [f"\n... (truncated, {len(lines)} lines total)\n"] + return "".join(lines) + except OSError: + return "" + + +def collect_source_files(lib_dir: Path) -> dict[str, str]: + """Collect all .m files from a library directory (non-recursive, skip private/).""" + files = {} + if not lib_dir.is_dir(): + return files + for mfile in sorted(lib_dir.glob("*.m")): + rel = mfile.relative_to(PROJECT_ROOT) + files[str(rel)] = read_file(mfile) + return files + + +def collect_examples() -> dict[str, str]: + """Collect example files (first 100 lines each for context).""" + files = {} + if not EXAMPLES_DIR.is_dir(): + return files + for mfile in sorted(EXAMPLES_DIR.glob("*.m")): + rel = mfile.relative_to(PROJECT_ROOT) + files[str(rel)] = read_file(mfile, max_lines=100) + return files + + +def collect_all_sources() -> dict[str, str]: + """Collect all relevant source files.""" + sources = {} + for lib_name in ["FastPlot", "Dashboard", "SensorThreshold", "EventDetection", "WebBridge"]: + sources.update(collect_source_files(LIBS_DIR / lib_name)) + + # Include README + readme = PROJECT_ROOT / "README.md" + if readme.exists(): + sources["README.md"] = read_file(readme) + + # Include setup.m + setup = PROJECT_ROOT / "setup.m" + if setup.exists(): + sources["setup.m"] = read_file(setup) + + # Include design docs + for md in sorted(DOCS_DIR.rglob("*.md")): + rel = md.relative_to(PROJECT_ROOT) + sources[str(rel)] = read_file(md, max_lines=300) + + return sources + + +def build_source_context(sources: dict[str, str], max_chars: int = 180000) -> str: + """Build a source context string, respecting a character budget.""" + parts = [] + total = 0 + for path, content in sorted(sources.items()): + entry = f"=== {path} ===\n{content}\n" + if total + len(entry) > max_chars: + parts.append(f"\n... (budget reached, {len(sources) - len(parts)} files omitted)\n") + break + parts.append(entry) + total += len(entry) + return "".join(parts) + + +# --------------------------------------------------------------------------- +# Wiki page definitions +# --------------------------------------------------------------------------- + +PAGES = [ + { + "filename": "Architecture.md", + "title": "Architecture", + "prompt": """\ +Write a comprehensive **Architecture** wiki page for the FastPlot project. + +Cover these topics with clear explanations and diagrams (using markdown/ASCII): +1. **High-level overview** — what FastPlot is and the main components +2. **Render pipeline** — how data flows from addLine() through downsampling to screen +3. **Downsampling engine** — MinMax vs LTTB, pyramid caching, when each is used +4. **MEX acceleration** — how optional C/SIMD kernels integrate with pure MATLAB fallback +5. **Sensor & threshold system** — Sensor, StateChannel, ThresholdRule, violation computation +6. **Event detection pipeline** — EventDetector, IncrementalEventDetector, LiveEventPipeline +7. **Dashboard engine** — DashboardEngine, widgets, layout, serialization +8. **Data storage** — FastPlotDataStore (SQLite), disk-backed queries +9. **Linked axes** — GroupID-based zoom/pan synchronization +10. **WebBridge** — TCP protocol for web visualization + +Use code snippets from the actual source to illustrate key patterns. Write for +developers who want to understand the internals or contribute.""", + }, + { + "filename": "Getting-Started.md", + "title": "Getting Started", + "prompt": """\ +Write a **Getting Started** tutorial wiki page for FastPlot. + +Structure it as a progressive tutorial: +1. **Installation** — git clone, setup.m, requirements (MATLAB R2020b+ / Octave 7+) +2. **Your first plot** — basic example with addLine, render +3. **Adding thresholds** — upper/lower thresholds with violation markers +4. **Multiple lines & linked zoom** — addLine multiple times, GroupID linking +5. **Themes** — switching between dark, light, industrial, scientific, ocean +6. **Datetime axes** — using datenum or datetime on the x-axis +7. **Live mode** — file polling with auto-refresh +8. **Dashboard basics** — DashboardBuilder quick example +9. **Sensor system** — creating Sensors with StateChannels and ThresholdRules +10. **Next steps** — links to other wiki pages (Architecture, API Reference, Examples) + +Use realistic code examples derived from the actual example files. Each section +should have a complete, runnable code snippet. Keep explanations concise but helpful +for someone new to the library.""", + }, + { + "filename": "Examples.md", + "title": "Examples", + "prompt": """\ +Write an **Examples** wiki page that serves as a categorized guide to all example +scripts in the examples/ directory. + +Organize examples into categories: +1. **Basic Plotting** — simple plots, ECG, 100M points +2. **Thresholds & Bands** — alarm bands, dynamic thresholds +3. **Multiple Plots** — multi-line, linked axes, mixed tiles +4. **Dashboards** — dashboard engine, 9-tile, all widgets, live dashboard +5. **Docking** — tabbed dock, many tabs, disk-backed dock +6. **Sensors** — sensor detail, sensor dashboard, multi-sensor linked +7. **Event Detection** — event viewer, live pipeline +8. **Datetime** — datetime axis formatting +9. **Live Mode** — file polling auto-refresh +10. **Disk Storage** — SQLite-backed 100M+ datasets +11. **Advanced** — NaN gaps, LTTB vs MinMax comparison, navigator overlay + +For each example, provide: +- Script name (as a link if possible) +- One-line description of what it demonstrates +- Key code snippet or feature highlight + +Base descriptions on the actual source code of the examples.""", + }, + { + "filename": "MEX-Acceleration.md", + "title": "MEX Acceleration", + "prompt": """\ +Write a **MEX Acceleration** wiki page for FastPlot. + +Cover: +1. **Overview** — what MEX is and why FastPlot uses it for performance +2. **SIMD kernels** — AVX2 (x86) and NEON (ARM) implementations +3. **Compilation** — how setup.m / build_mex.m compiles the MEX files +4. **Fallback mechanism** — how FastPlot auto-detects and falls back to pure MATLAB +5. **Performance comparison** — MEX vs pure MATLAB benchmarks +6. **Supported operations** — which downsampling operations are MEX-accelerated +7. **Troubleshooting** — common compilation issues and how to resolve them +8. **Adding custom MEX** — guide for contributors wanting to add new MEX kernels + +Use actual code from build_mex.m and the C source files where relevant. +Include practical tips for users who may not have a C compiler.""", + }, + { + "filename": "Performance.md", + "title": "Performance", + "prompt": """\ +Write a **Performance** wiki page for FastPlot. + +Cover: +1. **Key metrics** — render time, FPS, point reduction, memory usage +2. **Benchmark methodology** — how benchmarks are run (reference benchmarks/ directory) +3. **Scaling behavior** — how FastPlot performs from 1K to 100M+ data points +4. **Downsampling efficiency** — MinMax vs LTTB speed and quality trade-offs +5. **Memory optimization** — GPU memory comparison with standard plot() +6. **Disk-backed performance** — SQLite DataStore query times for large datasets +7. **MEX vs pure MATLAB** — speedup factors for each operation +8. **Tips for maximum performance** — best practices for users +9. **Comparison with alternatives** — why constant-time rendering regardless of dataset size + +Reference actual benchmark numbers from README and any benchmark scripts. +Present data in tables where appropriate.""", + }, +] + + +# --------------------------------------------------------------------------- +# Claude API interaction +# --------------------------------------------------------------------------- + +def call_claude(client: anthropic.Anthropic, system_prompt: str, user_prompt: str) -> str: + """Call Claude API and return the response text.""" + for attempt in range(3): + try: + response = client.messages.create( + model=MODEL, + max_tokens=MAX_TOKENS, + system=system_prompt, + messages=[{"role": "user", "content": user_prompt}], + ) + return response.content[0].text + except anthropic.RateLimitError: + wait = 2 ** (attempt + 1) + print(f" Rate limited, waiting {wait}s...") + time.sleep(wait) + except anthropic.APIError as e: + if attempt < 2: + wait = 2 ** (attempt + 1) + print(f" API error: {e}, retrying in {wait}s...") + time.sleep(wait) + else: + raise + raise RuntimeError("Failed after 3 retries") + + +def generate_page(client: anthropic.Anthropic, page: dict, source_context: str, + example_context: str) -> str: + """Generate a single wiki page using Claude.""" + system_prompt = """\ +You are a technical documentation writer for FastPlot, an ultra-fast time series \ +plotting library for MATLAB and GNU Octave. You write clear, accurate, well-structured \ +GitHub Wiki pages in Markdown. + +Rules: +- Use accurate information from the provided source code — do not invent APIs or features +- Use GitHub-flavored Markdown with proper headings, code blocks, and tables +- MATLAB code blocks should use ```matlab fencing +- Keep explanations concise but thorough +- Do not include the page title as an H1 — it will be added automatically +- Do not include any auto-generated notice — it will be added automatically +- Reference actual class names, method signatures, and property names from the source +- Link to other wiki pages where relevant using [[Page Name]] syntax""" + + user_prompt = f"""{page['prompt']} + +## Source Code Reference + +{source_context} + +## Example Scripts Reference + +{example_context}""" + + return call_claude(client, system_prompt, user_prompt) + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main(): + api_key = os.environ.get("ANTHROPIC_API_KEY") + if not api_key: + print("ERROR: ANTHROPIC_API_KEY environment variable is required", file=sys.stderr) + sys.exit(1) + + print(f"FastPlot LLM Documentation Generator") + print(f"Project root: {PROJECT_ROOT}") + print(f"Wiki dir: {WIKI_DIR}") + print(f"Model: {MODEL}") + print() + + # Collect source files + print("Collecting source files...") + sources = collect_all_sources() + print(f" Found {len(sources)} source files") + + print("Collecting examples...") + examples = collect_examples() + print(f" Found {len(examples)} example files") + + source_context = build_source_context(sources) + example_context = build_source_context(examples, max_chars=60000) + print(f" Source context: {len(source_context):,} chars") + print(f" Example context: {len(example_context):,} chars") + print() + + # Initialize client + client = anthropic.Anthropic(api_key=api_key) + + # Ensure wiki directory exists + WIKI_DIR.mkdir(parents=True, exist_ok=True) + + # Generate each page + generated = [] + for page in PAGES: + filename = page["filename"] + print(f"Generating {filename}...") + + content = generate_page(client, page, source_context, example_context) + + # Write output + outpath = WIKI_DIR / filename + full_content = AUTO_NOTICE + f"# {page['title']}\n\n" + content + "\n" + with open(outpath, "w", encoding="utf-8") as f: + f.write(full_content) + + print(f" -> Wrote {outpath.relative_to(PROJECT_ROOT)} ({len(content):,} chars)") + generated.append(filename) + + print() + print(f"Done. Generated {len(generated)} wiki pages:") + for f in generated: + print(f" - {f}") + + +if __name__ == "__main__": + main()