Implement core engine entry point and refactor Python inference (#43)

Eamon2009 · web-flow · commit 720ffc124641 · 2026-05-17T18:47:58.000+05:30
#42 #41 #40
diff --git a/.github/workflows/github-package.yml b/.github/workflows/github-package.yml
@@ -0,0 +1,44 @@
+name: Publish GitHub Package
+
+on:
+  workflow_dispatch:
+  push:
+    tags:
+      - "v*"
+
+permissions:
+  contents: read
+  packages: write
+
+jobs:
+  publish-github-package:
+    name: Publish to GitHub Packages
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Set up Node.js for GitHub Packages
+        uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+          registry-url: "https://npm.pkg.github.com"
+          scope: "@eamon2009"
+          cache: "npm"
+          cache-dependency-path: frontend/package-lock.json
+
+      - name: Build frontend assets
+        run: |
+          npm --prefix frontend ci
+          npm --prefix frontend run build
+
+      - name: Prepare GitHub Packages metadata
+        run: |
+          npm pkg set name="@eamon2009/quadtrix"
+          npm pkg set publishConfig.registry="https://npm.pkg.github.com"
+
+      - name: Publish package
+        run: npm publish
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/benchmark/benchmark_config.json b/benchmark/benchmark_config.json
@@ -0,0 +1,59 @@
+{
+  "schema_version": 1,
+  "purpose": "Reference benchmark dimensions for the real C++ and Python Quadtrix benchmark suites.",
+  "common": {
+    "runs": 10,
+    "warmup": 3,
+    "quick_runs": 2,
+    "quick_warmup": 1,
+    "generate_tokens": 32,
+    "quick_generate_tokens": 4,
+    "train_steps": 5,
+    "quick_train_steps": 1,
+    "max_data_chars": 1000000,
+    "quick_max_data_chars": 50000
+  },
+  "suites": {
+    "data": [
+      "tokenizer_or_char_encode",
+      "batch_sample_to_device"
+    ],
+    "primitive": [
+      "matmul_3d",
+      "attention_scores_or_softmax3d",
+      "layer_norm"
+    ],
+    "forward": [
+      "batch1_seq8",
+      "batch1_full_context",
+      "configured_batch_full_context"
+    ],
+    "training": [
+      "adamw_step_forward_backward_update"
+    ],
+    "generation": [
+      "empty_prompt",
+      "short_prompt",
+      "long_prompt"
+    ]
+  },
+  "metrics": [
+    "avg_ms",
+    "median_ms",
+    "min_ms",
+    "max_ms",
+    "p90_ms",
+    "p95_ms",
+    "std_ms",
+    "tokens_per_sec",
+    "loss",
+    "parameter_mb_fp32",
+    "memory_mb"
+  ],
+  "outputs": {
+    "cpp_json": "benchmark/results/cpp_benchmark.json",
+    "cpp_csv": "benchmark/results/cpp_benchmark.csv",
+    "python_json": "benchmark/results/python_benchmark.json",
+    "python_csv": "benchmark/results/python_benchmark.csv"
+  }
+}
diff --git a/benchmark/benchmark_training.py b/benchmark/benchmark_training.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python3
+"""Compatibility entry point for the real Python benchmark suite."""
+
+from python_benchmark import main
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/benchmark/compare.py b/benchmark/compare.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+"""Compare Quadtrix C++ and Python benchmark JSON files."""
+
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+from typing import Any
+
+
+DEFAULT_RESULTS = Path(__file__).resolve().parent / "results"
+
+
+def load(path: Path) -> dict[str, Any]:
+    with path.open("r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+def index_rows(result: dict[str, Any]) -> dict[tuple[str, str, int, int], dict[str, Any]]:
+    indexed = {}
+    for row in result.get("results", []):
+        key = (
+            row.get("suite", ""),
+            row.get("name", ""),
+            int(row.get("batch_size") or 0),
+            int(row.get("sequence_length") or 0),
+        )
+        indexed[key] = row
+    return indexed
+
+
+def pct(new: float, old: float) -> float:
+    if old == 0:
+        return 0.0
+    return (new - old) / old * 100.0
+
+
+def compare_backends(cpp_path: Path, python_path: Path) -> int:
+    missing = [str(path) for path in (cpp_path, python_path) if not path.exists()]
+    if missing:
+        print("Missing benchmark result file(s):")
+        for path in missing:
+            print(f"  {path}")
+        print("Run benchmark/run_all.py first, or pass explicit --cpp/--python paths.")
+        return 1
+
+    cpp = load(cpp_path)
+    py = load(python_path)
+    cpp_rows = index_rows(cpp)
+    py_rows = index_rows(py)
+
+    common = sorted(set(cpp_rows) & set(py_rows))
+    if not common:
+        print("No matching benchmark rows found.")
+        return 1
+
+    print("Quadtrix C++ vs Python Benchmark Comparison")
+    print(f"C++:    {cpp_path}")
+    print(f"Python: {python_path}")
+    print()
+    print(f"{'suite':<12} {'name':<24} {'shape':<10} {'cpp ms':>10} {'py ms':>10} {'cpp tok/s':>12} {'py tok/s':>12} {'latency':>10}")
+    print("-" * 110)
+
+    for key in common:
+        suite, name, batch, seq = key
+        c = cpp_rows[key]
+        p = py_rows[key]
+        cpp_ms = float(c.get("avg_ms") or 0.0)
+        py_ms = float(p.get("avg_ms") or 0.0)
+        cpp_tps = float(c.get("tokens_per_sec") or 0.0)
+        py_tps = float(p.get("tokens_per_sec") or 0.0)
+        shape = f"{batch}x{seq}" if batch or seq else "-"
+        delta = pct(cpp_ms, py_ms)
+        print(
+            f"{suite:<12} {name:<24} {shape:<10} "
+            f"{cpp_ms:10.3f} {py_ms:10.3f} {cpp_tps:12.1f} {py_tps:12.1f} {delta:+9.1f}%"
+        )
+    return 0
+
+
+def compare_baseline(current_path: Path, baseline_path: Path, threshold_pct: float) -> int:
+    missing = [str(path) for path in (current_path, baseline_path) if not path.exists()]
+    if missing:
+        print("Missing benchmark result file(s):")
+        for path in missing:
+            print(f"  {path}")
+        return 1
+
+    current = load(current_path)
+    baseline = load(baseline_path)
+    current_rows = index_rows(current)
+    baseline_rows = index_rows(baseline)
+    common = sorted(set(current_rows) & set(baseline_rows))
+
+    print("Quadtrix Benchmark Baseline Comparison")
+    print(f"Current:  {current_path}")
+    print(f"Baseline: {baseline_path}")
+    print()
+
+    regressions = []
+    for key in common:
+        c = current_rows[key]
+        b = baseline_rows[key]
+        delta = pct(float(c.get("avg_ms") or 0.0), float(b.get("avg_ms") or 0.0))
+        if delta > threshold_pct:
+            regressions.append((key, delta, b, c))
+
+    if not regressions:
+        print(f"No latency regressions over {threshold_pct:.1f}%.")
+        return 0
+
+    print(f"Latency regressions over {threshold_pct:.1f}%:")
+    for key, delta, b, c in regressions:
+        suite, name, batch, seq = key
+        print(
+            f"  {suite}/{name} {batch}x{seq}: "
+            f"{float(b.get('avg_ms') or 0.0):.3f} ms -> {float(c.get('avg_ms') or 0.0):.3f} ms ({delta:+.1f}%)"
+        )
+    return 2
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Compare Quadtrix benchmark results.")
+    parser.add_argument("--cpp", type=Path, default=DEFAULT_RESULTS / "cpp_benchmark.json")
+    parser.add_argument("--python", type=Path, default=DEFAULT_RESULTS / "python_benchmark.json")
+    parser.add_argument("--current", type=Path, default=None)
+    parser.add_argument("--baseline", type=Path, default=None)
+    parser.add_argument("--threshold-pct", type=float, default=10.0)
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = parse_args()
+    if args.current and args.baseline:
+        return compare_baseline(args.current, args.baseline, args.threshold_pct)
+    return compare_backends(args.cpp, args.python)
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/benchmark/cpp_benchmark.cpp b/benchmark/cpp_benchmark.cpp
diff --git a/benchmark/python_benchmark.py b/benchmark/python_benchmark.py