KRLabsOrg
diff --git a/‎.github/workflows/docs.yml‎
Lines changed: 42 additions & 0 deletions b/‎.github/workflows/docs.yml‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 27 additions & 11 deletions b/‎README.md‎
Lines changed: 27 additions & 11 deletions
diff --git a/‎configs/default.yaml‎
Lines changed: 4 additions & 3 deletions b/‎configs/default.yaml‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎docs/api/config.md‎
Lines changed: 18 additions & 0 deletions b/‎docs/api/config.md‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎docs/api/dataset.md‎
Lines changed: 7 additions & 0 deletions b/‎docs/api/dataset.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎docs/api/evaluation.md‎
Lines changed: 11 additions & 0 deletions b/‎docs/api/evaluation.md‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎docs/api/extractor.md‎
Lines changed: 11 additions & 0 deletions b/‎docs/api/extractor.md‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎docs/api/training.md‎
Lines changed: 7 additions & 0 deletions b/‎docs/api/training.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎docs/assets/extra.css‎
Lines changed: 17 additions & 0 deletions b/‎docs/assets/extra.css‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎docs/assets/logo.png‎
581 KB b/‎docs/assets/logo.png‎
581 KB
@@ -0,0 +1,42 @@
+name: Deploy Documentation
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'docs/**'
+      - 'mkdocs.yml'
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+concurrency:
+  group: pages
+  cancel-in-progress: false
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+      - run: pip install -e . mkdocs-material "mkdocstrings[python]>=0.24"
+      - run: mkdocs build
+      - uses: actions/upload-pages-artifact@v3
+        with:
+          path: site/
+
+  deploy:
+    needs: build
+    runs-on: ubuntu-latest
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    steps:
+      - id: deployment
+        uses: actions/deploy-pages@v4
@@ -143,19 +143,25 @@ cat output.txt | squeez "Fix the CSRF validation bug"
 
 # Or with a file
 squeez "Fix the CSRF bug" --input-file output.txt
+
+# Explicit extract subcommand also works
+squeez extract "Fix the CSRF bug" --input-file output.txt
 ```
 
 ### Python API
 
 ```python
 from squeez.inference.extractor import ToolOutputExtractor
 
-# Connects to vLLM server (default: localhost:8000)
+# Load model from config/env
 extractor = ToolOutputExtractor()
 
 # Or load model locally
 extractor = ToolOutputExtractor(model_path="./output/squeez_qwen")
 
+# Or connect to a server explicitly
+extractor = ToolOutputExtractor(base_url="http://localhost:8000/v1", model_name="squeez")
+
 filtered = extractor.extract(
     task="Fix the CSRF validation bug in middleware",
     tool_output=raw_output,
@@ -171,17 +177,27 @@ Backend is resolved in order: CLI args > env vars > config file (`squeez.yaml` o
 
 ```yaml
 # squeez.yaml
-model_path: "./output/squeez_qwen"     # local transformers
-# base_url: "https://api.groq.com/openai/v1"  # or remote API
+backend: "transformers"  # optional preference
+local_model_path: "./output/squeez_qwen"
+# server_url: "https://api.groq.com/openai/v1"
+# server_model: "squeez"
 ```
 
 ```bash
 # Or via environment variables
-export SQUEEZ_MODEL_PATH=./output/squeez_qwen
-export SQUEEZ_BASE_URL=https://api.groq.com/openai/v1
+export SQUEEZ_LOCAL_MODEL=./output/squeez_qwen
+export SQUEEZ_SERVER_URL=https://api.groq.com/openai/v1
+export SQUEEZ_SERVER_MODEL=squeez
 export SQUEEZ_API_KEY=gsk_...
 ```
 
+Clear flag names are available on the CLI, with the old names kept as aliases:
+
+```bash
+squeez "Fix the bug" --local-model ./output/squeez_qwen
+squeez "Fix the bug" --server-url http://localhost:8000/v1 --server-model squeez
+```
+
 ### Use with Claude Code
 
 Add this to your project's `CLAUDE.md` (or `~/.claude/CLAUDE.md` for global):
@@ -216,7 +232,7 @@ This pulls the [SWE-bench tool output dataset](https://huggingface.co/datasets/K
 ### 2. Train with LoRA
 
 ```bash
-python -m squeez.training.train \
+squeez train \
     --train-file data/train.jsonl \
     --eval-file data/eval.jsonl
 ```
@@ -226,8 +242,8 @@ Default: Qwen 3.5 2B with LoRA (r=16, alpha=32). See `configs/default.yaml` for
 ### 3. Evaluate
 
 ```bash
-python -m squeez.training.evaluate \
-    --model-path output/squeez_qwen \
+squeez eval \
+    --extractor-model output/squeez_qwen \
     --eval-file data/eval.jsonl
 ```
 
@@ -275,11 +291,11 @@ Built from 2,294 [SWE-bench](https://huggingface.co/datasets/princeton-nlp/SWE-b
 To regenerate the dataset from scratch:
 
 ```bash
-python -m squeez.data.pipeline --phase all \
+squeez pipeline --phase 1 2 3 4 5 6 7 8 \
     --output-dir data \
     --github-token $GITHUB_TOKEN \
-    --openai-api-key $GROQ_API_KEY \
-    --distillation-base-url https://api.groq.com/openai/v1
+    --teacher-api-key $GROQ_API_KEY \
+    --teacher-base-url https://api.groq.com/openai/v1
 ```
 
 ## Citation
 
@@ -1,7 +1,8 @@
 # Inference
-backend: "transformers"  # "transformers" or "vllm"
-model_path: "./output/squeez_qwen"
-base_url: null  # e.g. "http://localhost:8000/v1"
+backend: "transformers"  # optional preference: "transformers" or "vllm"
+local_model_path: "./output/squeez_qwen"
+server_url: null  # OpenAI-compatible API, e.g. "http://localhost:8000/v1"
+server_model: null  # optional remote model id when using a server
 
 # Training hyperparameters
 model: "Qwen/Qwen3.5-2B"
 
@@ -0,0 +1,18 @@
+# Config
+
+Pipeline configuration and constants.
+
+::: squeez.data.config.PipelineConfig
+
+## Constants
+
+::: squeez.data.config
+    options:
+      members:
+        - SYSTEM_PROMPT
+        - TOOL_WEIGHTS
+        - MIN_RELEVANT_RATIO
+        - MAX_RELEVANT_RATIO
+        - MIN_RELEVANT_LINES
+        - MIN_TOTAL_LINES
+        - MAX_TOOL_OUTPUT_LINES
@@ -0,0 +1,7 @@
+# Dataset
+
+SFT dataset for training.
+
+::: squeez.training.dataset.ExtractionSFTDataset
+
+::: squeez.training.dataset.collate_fn
@@ -0,0 +1,11 @@
+# Evaluation
+
+Metrics for evaluating model quality.
+
+::: squeez.training.evaluate.evaluate_model
+
+::: squeez.training.evaluate.compute_line_level_metrics
+
+::: squeez.training.evaluate.compute_rouge_l
+
+::: squeez.training.evaluate.compute_compression_ratio
@@ -0,0 +1,11 @@
+# Extractor
+
+The main entry point for tool output extraction.
+
+::: squeez.inference.extractor.ToolOutputExtractor
+
+---
+
+## Helper functions
+
+::: squeez.inference.extractor._format_prompt
@@ -0,0 +1,7 @@
+# Training
+
+LoRA fine-tuning for tool output extraction.
+
+::: squeez.training.train.train
+
+::: squeez.training.train.load_config
@@ -0,0 +1,17 @@
+/* Squeez brand colors — lemon yellow */
+
+:root {
+  --md-primary-fg-color: #f5c518;
+  --md-primary-fg-color--light: #f7d44c;
+  --md-primary-fg-color--dark: #d4a810;
+  --md-accent-fg-color: #f5c518;
+  --md-accent-fg-color--transparent: rgba(245, 197, 24, 0.1);
+}
+
+[data-md-color-scheme="slate"] {
+  --md-primary-fg-color: #f5c518;
+  --md-primary-fg-color--light: #f7d44c;
+  --md-primary-fg-color--dark: #d4a810;
+  --md-accent-fg-color: #f5c518;
+  --md-accent-fg-color--transparent: rgba(245, 197, 24, 0.1);
+}