Skip to content

Commit cbed69e

Browse files
committed
feat: close remaining integration follow-up issues
1 parent 633f347 commit cbed69e

17 files changed

Lines changed: 451 additions & 109 deletions

Makefile

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# make dev - Build for current platform (fast)
44
# make all - Build for all platforms
55
# make test - Run tests
6+
# make check - Run repository quality gates (Python + Go)
67
# make clean - Remove built binaries
78

89
GO := go
@@ -14,7 +15,7 @@ CMDS := session-init complexity-check trajectory-save
1415
# Platforms to build for
1516
PLATFORMS := darwin/arm64 darwin/amd64 linux/amd64 linux/arm64 windows/amd64
1617

17-
.PHONY: all dev clean test lint help rcc-contract-baseline rcc-contract-gate rcc-contract-test
18+
.PHONY: all dev clean test lint check check-python check-go benchmark benchmark-bounded help rcc-contract-baseline rcc-contract-gate rcc-contract-test
1819

1920
# Default: build for current platform
2021
dev:
@@ -53,6 +54,22 @@ lint:
5354
@command -v golangci-lint >/dev/null 2>&1 || { echo "Install: go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest"; exit 1; }
5455
golangci-lint run ./...
5556

57+
# Canonical repository quality gates
58+
check: check-python check-go
59+
60+
check-python:
61+
UV_CACHE_DIR=.uv-cache uv run --extra dev pytest -q
62+
63+
check-go:
64+
$(GO) test ./...
65+
66+
# Benchmark suites (excluded from default pytest addopts)
67+
benchmark:
68+
UV_CACHE_DIR=.uv-cache uv run --extra dev pytest -q tests/benchmarks --benchmark-only
69+
70+
benchmark-bounded:
71+
UV_CACHE_DIR=.uv-cache RLM_BENCHMARK_BOUNDED=1 RLM_BENCHMARK_ROUNDS=1 RLM_BENCHMARK_ITERATIONS=1 RLM_BENCHMARK_WARMUP_ROUNDS=0 uv run --extra dev pytest -q tests/benchmarks --benchmark-only
72+
5673
# Clean build artifacts
5774
clean:
5875
rm -rf bin/
@@ -68,6 +85,11 @@ help:
6885
@echo " all - Build for all platforms"
6986
@echo " test - Run tests"
7087
@echo " lint - Run linter"
88+
@echo " check - Run Python and Go quality gates"
89+
@echo " check-python - Run Python test suite"
90+
@echo " check-go - Run Go test suite"
91+
@echo " benchmark - Run full benchmark suites"
92+
@echo " benchmark-bounded - Run bounded benchmark mode for restricted environments"
7193
@echo " rcc-contract-baseline - Generate A1-A5 compatibility artifact set"
7294
@echo " rcc-contract-gate - Run A1-A5 compatibility gate (strict)"
7395
@echo " rcc-contract-test - Run A1-A5 probe test suite"

docs/process/full-system-empirical-validation.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ Scope: entire `rlm-claude-code` system surface, including loop (`rlm-core`) cons
1010
|---|---|---|
1111
| `UV_CACHE_DIR=.uv-cache uv run --extra dev pytest -q` | Full Python correctness and integration suite | `3268 passed, 3 deselected, 8 warnings` |
1212
| `go test ./...` | Go-side classifier/hook correctness | all packages passing |
13+
| `make check` | Canonical repo quality gate (Python + Go) | pass (`3281 passed, 3 deselected`) |
1314
| `make rcc-contract-gate` | Loop consumer contract gate (A1-A5) | pass, `all_passed=True`, `claim_scope=claim-ready-for-pinned-vendor-sha-only` |
1415
| `UV_CACHE_DIR=.uv-cache uv run dp enforce pre-commit --policy dp-policy.json --json` | Policy pre-commit gate | `ok=true` |
1516
| `UV_CACHE_DIR=.uv-cache uv run dp review --json` | Review gate | `ok=true` |
@@ -53,21 +54,20 @@ Empirical OODA coverage is exercised by:
5354

5455
Snapshot script results:
5556
- Total spec IDs in `docs/spec`: `422`
56-
- IDs with direct test trace references: `320`
57-
- IDs without direct test trace references: `102`
57+
- IDs with direct test trace references: `361`
58+
- IDs without direct test trace references: `61`
59+
- Non-deferred IDs without direct test trace references: `23`
5860

5961
Largest uncovered buckets:
6062
- `SPEC-15`: 38 IDs (documented deferred lean-REPL scope)
61-
- `SPEC-14`: 17 IDs
62-
- `SPEC-17`: 12 IDs (mostly covered by Go tests rather than Python trace tags)
63-
- `SPEC-13`: 10 IDs
63+
- non-deferred residual buckets: `SPEC-00/01/02/03/04/08/09/12`
6464

6565
Interpretation:
6666
- Core system behavior is empirically green end-to-end.
67-
- Remaining traceability work is primarily explicit trace-tag coverage/documentation alignment rather than failing behavior.
67+
- Prioritized non-deferred traceability gaps in SPEC-13/14/16/17 are now explicitly tagged and covered.
68+
- Remaining non-deferred traceability work is in earlier baseline specs (00/01/02/03/04/08/09/12).
6869

6970
## Remaining Follow-Up Work (Tracked Separately)
7071

71-
1. Add a repo-level `make check` target or align AGENTS defaults to existing gate commands.
72-
2. Expand explicit test trace tags for non-deferred uncovered IDs in SPEC-13/14/16/17.
73-
3. Add a time-bounded benchmark execution mode for CI/sandbox reliability (`tests/benchmarks` can be long-running in restricted environments).
72+
1. Expand explicit trace coverage for residual non-deferred IDs in SPEC-00/01/02/03/04/08/09/12 (`23` IDs remaining).
73+
2. Keep `make benchmark-bounded` as the default benchmark path in constrained environments and capture longitudinal perf baselines.

internal/classify/classify_test.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ import (
44
"testing"
55
)
66

7+
// @trace SPEC-17.11
8+
// @trace SPEC-17.12
9+
10+
// @trace SPEC-17.06
711
func TestIsFastPath(t *testing.T) {
812
tests := []struct {
913
prompt string
@@ -29,13 +33,17 @@ func TestIsFastPath(t *testing.T) {
2933
}
3034
}
3135

36+
// @trace SPEC-17.02
37+
// @trace SPEC-17.03
3238
func TestExtractSignals_MultipleFiles(t *testing.T) {
3339
s := ExtractSignals("Update main.go and utils.py to fix the bug")
3440
if !s.ReferencesMultipleFiles {
3541
t.Error("expected ReferencesMultipleFiles=true for prompt with two file extensions")
3642
}
3743
}
3844

45+
// @trace SPEC-17.02
46+
// @trace SPEC-17.03
3947
func TestExtractSignals_ModulePair(t *testing.T) {
4048
s := ExtractSignals("coordinate auth and api modules")
4149
if !s.ReferencesMultipleFiles {
@@ -46,76 +54,97 @@ func TestExtractSignals_ModulePair(t *testing.T) {
4654
}
4755
}
4856

57+
// @trace SPEC-17.02
58+
// @trace SPEC-17.03
4959
func TestExtractSignals_CrossContext(t *testing.T) {
5060
s := ExtractSignals("why does this fail when we pass nil?")
5161
if !s.RequiresCrossContextReasoning {
5262
t.Error("expected RequiresCrossContextReasoning=true")
5363
}
5464
}
5565

66+
// @trace SPEC-17.02
67+
// @trace SPEC-17.03
5668
func TestExtractSignals_Debugging(t *testing.T) {
5769
s := ExtractSignals("debug the error in the handler")
5870
if !s.DebuggingTask {
5971
t.Error("expected DebuggingTask=true")
6072
}
6173
}
6274

75+
// @trace SPEC-17.02
76+
// @trace SPEC-17.03
6377
func TestExtractSignals_ExhaustiveSearch(t *testing.T) {
6478
s := ExtractSignals("find all instances of the deprecated function")
6579
if !s.RequiresExhaustiveSearch {
6680
t.Error("expected RequiresExhaustiveSearch=true")
6781
}
6882
}
6983

84+
// @trace SPEC-17.02
85+
// @trace SPEC-17.03
7086
func TestExtractSignals_Security(t *testing.T) {
7187
s := ExtractSignals("check for SQL injection vulnerabilities")
7288
if !s.SecurityReviewTask {
7389
t.Error("expected SecurityReviewTask=true")
7490
}
7591
}
7692

93+
// @trace SPEC-17.02
94+
// @trace SPEC-17.03
7795
func TestExtractSignals_Architecture(t *testing.T) {
7896
s := ExtractSignals("how does the authentication system work?")
7997
if !s.ArchitectureAnalysis {
8098
t.Error("expected ArchitectureAnalysis=true")
8199
}
82100
}
83101

102+
// @trace SPEC-17.02
103+
// @trace SPEC-17.03
84104
func TestExtractSignals_Refactor(t *testing.T) {
85105
s := ExtractSignals("Refactor the auth system across all handlers to use JWT")
86106
if !s.ArchitectureAnalysis {
87107
t.Error("expected ArchitectureAnalysis=true for refactor")
88108
}
89109
}
90110

111+
// @trace SPEC-17.02
112+
// @trace SPEC-17.04
91113
func TestExtractSignals_Thorough(t *testing.T) {
92114
s := ExtractSignals("make sure all tests pass before committing")
93115
if !s.UserWantsThorough {
94116
t.Error("expected UserWantsThorough=true")
95117
}
96118
}
97119

120+
// @trace SPEC-17.02
121+
// @trace SPEC-17.05
98122
func TestExtractSignals_Fast(t *testing.T) {
99123
s := ExtractSignals("just show me the file")
100124
if !s.UserWantsFast {
101125
t.Error("expected UserWantsFast=true")
102126
}
103127
}
104128

129+
// @trace SPEC-17.02
130+
// @trace SPEC-17.04
105131
func TestExtractSignals_Continuation(t *testing.T) {
106132
s := ExtractSignals("also fix the tests while you're at it")
107133
if !s.TaskIsContinuation {
108134
t.Error("expected TaskIsContinuation=true")
109135
}
110136
}
111137

138+
// @trace SPEC-17.01
112139
func TestShouldActivate_Simple(t *testing.T) {
113140
activate, reason, mode := ShouldActivate("yes", "", "")
114141
if activate {
115142
t.Errorf("expected no activation for 'yes', got reason=%s mode=%s", reason, mode)
116143
}
117144
}
118145

146+
// @trace SPEC-17.01
147+
// @trace SPEC-17.05
119148
func TestShouldActivate_FastIntent(t *testing.T) {
120149
activate, reason, _ := ShouldActivate("just show me the file contents", "", "")
121150
if activate {
@@ -126,6 +155,7 @@ func TestShouldActivate_FastIntent(t *testing.T) {
126155
}
127156
}
128157

158+
// @trace SPEC-17.01
129159
func TestShouldActivate_Debugging(t *testing.T) {
130160
activate, reason, _ := ShouldActivate("Debug why the API returns 500 errors on POST", "", "")
131161
if !activate {
@@ -136,6 +166,7 @@ func TestShouldActivate_Debugging(t *testing.T) {
136166
}
137167
}
138168

169+
// @trace SPEC-17.01
139170
func TestShouldActivate_Security(t *testing.T) {
140171
activate, reason, _ := ShouldActivate("Find all places where we access the database without auth", "", "")
141172
if !activate {
@@ -147,6 +178,7 @@ func TestShouldActivate_Security(t *testing.T) {
147178
}
148179
}
149180

181+
// @trace SPEC-17.01
150182
func TestShouldActivate_Architecture(t *testing.T) {
151183
activate, reason, _ := ShouldActivate("Refactor the auth system across all handlers to use JWT", "", "")
152184
if !activate {
@@ -158,6 +190,7 @@ func TestShouldActivate_Architecture(t *testing.T) {
158190
}
159191
}
160192

193+
// @trace SPEC-17.01
161194
func TestShouldActivate_MultiModule(t *testing.T) {
162195
activate, reason, _ := ShouldActivate("Update main.go and utils.py with the new config", "", "")
163196
if !activate {
@@ -168,6 +201,7 @@ func TestShouldActivate_MultiModule(t *testing.T) {
168201
}
169202
}
170203

204+
// @trace SPEC-17.07
171205
func TestSuggestMode_DPPhases(t *testing.T) {
172206
tests := []struct {
173207
phase string
@@ -188,13 +222,15 @@ func TestSuggestMode_DPPhases(t *testing.T) {
188222
}
189223
}
190224

225+
// @trace SPEC-17.08
191226
func TestSuggestMode_NotActivated(t *testing.T) {
192227
got := SuggestMode(false, "spec")
193228
if got != "micro" {
194229
t.Errorf("SuggestMode(false, spec) = %q, want micro", got)
195230
}
196231
}
197232

233+
// @trace SPEC-17.04
198234
func TestScore_Accumulative(t *testing.T) {
199235
s := Signals{
200236
ReferencesMultipleFiles: true,
@@ -209,6 +245,7 @@ func TestScore_Accumulative(t *testing.T) {
209245
}
210246
}
211247

248+
// @trace SPEC-17.04
212249
func TestScore_BelowThreshold(t *testing.T) {
213250
s := Signals{
214251
TaskIsContinuation: true, // +1, below threshold of 2

src/auto_activation.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,12 @@ def _check_escalation_triggers(
356356
"""
357357
prompt_lower = prompt.lower()
358358

359+
# SPEC-14.23: Honor strict token budgets by keeping execution in micro mode.
360+
# We use a conservative threshold: if session budget is below the balanced
361+
# entry point, avoid escalation and keep costs bounded.
362+
if self.preferences.budget_tokens < self.thresholds.escalate_to_balanced_tokens:
363+
return ExecutionMode.MICRO, "micro_mode:budget_guard"
364+
359365
# SPEC-14.21: Immediate escalation to THOROUGH
360366
if any(kw in prompt_lower for kw in ["architecture", "design decision", "thorough"]):
361367
return ExecutionMode.THOROUGH, "escalate_thorough:architecture_or_user_request"

src/rich_output.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from __future__ import annotations
1515

1616
import os
17+
import time
1718
from dataclasses import dataclass
1819
from enum import Enum
1920
from typing import Any, Literal
@@ -291,6 +292,8 @@ def __init__(self, config: OutputConfig | None = None):
291292
self.depth_tracker = DepthTracker(max_depth=self.config.max_depth_display)
292293
self._tree: Tree | None = None
293294
self._tree_nodes: dict[int, Any] = {} # depth -> current tree node
295+
self._spinner_index = 0
296+
self._last_progress_ms: float | None = None
294297

295298
def _should_emit(self, level: str) -> bool:
296299
"""Check if event should be emitted based on verbosity."""
@@ -502,6 +505,37 @@ def emit_budget(
502505
)
503506
self.console.print(gauge)
504507

508+
def emit_progress(self, message: str, depth: int = 0, force: bool = False) -> None:
509+
"""
510+
Emit spinner-based progress for long-running operations.
511+
512+
SPEC-13.23: Show progress via spinner.
513+
SPEC-13.25: Throttle updates to reduce output noise.
514+
"""
515+
if not self._should_emit("normal"):
516+
return
517+
518+
max_hz = max(1, self.config.progress_throttle_hz)
519+
interval_ms = 1000.0 / max_hz
520+
now_ms = time.perf_counter() * 1000.0
521+
522+
if (
523+
not force
524+
and self._last_progress_ms is not None
525+
and now_ms - self._last_progress_ms < interval_ms
526+
):
527+
return
528+
529+
spinner_chars = Symbol.SPINNER.value
530+
spinner = spinner_chars[self._spinner_index % len(spinner_chars)]
531+
self._spinner_index += 1
532+
self._last_progress_ms = now_ms
533+
534+
text = self._format_prefix(depth)
535+
text.append(f"{spinner} ", style=Color.DIM.value)
536+
text.append(message, style=Color.DIM.value)
537+
self.console.print(text)
538+
505539
def emit_complete(
506540
self,
507541
tokens_used: int,

0 commit comments

Comments
 (0)