Skip to content

Commit 3af8c2c

Browse files
feat: add validation framework with semantic intent classifier
- Add `/validate` skill with `SKILL.md` and `batch_validate.py` that calls Altimate backend API directly via SSE for single trace, date range, and session modes - Add `validate` CLI command (`altimate-code validate install/status`) that installs the skill to `~/.altimate-code/skills/validate/` - Add conversation-logger that auto-logs user/assistant turns to the backend on session idle; wire `initConversationLogger()` into `InstanceBootstrap` - Add `ValidationIntent.classify()` in `src/altimate/intent/validation-classifier.ts` using Haiku via the existing provider stack to detect validation queries semantically — handles natural language like "audit my session" or "check this trace" without keyword/rule-based matching - Intercept `session.prompt` route to auto-redirect validation queries to the `validate` skill, keeping all other flows unchanged; fails open on any error - Increase Bash tool timeout from 900000ms (~15 min) to 3600000ms (~60 min) - Add failed claims markdown table to validation summary output with: claim ID, trace ID, claim text, claimed value, source tool ID, actual text, actual data, error %, and root cause - Fix `build.ts` to embed skill assets from correct path; auto-install validate skill after `bun run build`; retry on empty SSE stream - Skill output written directly to markdown files; user notified with file path
1 parent 9a02f27 commit 3af8c2c

File tree

17 files changed

+1023
-4
lines changed

17 files changed

+1023
-4
lines changed

.gitignore

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,37 @@ target
2323
.scripts
2424
.direnv/
2525

26+
# Python
27+
__pycache__/
28+
*.pyc
29+
*.pyo
30+
*.egg-info/
31+
32+
# SQLite databases (feedback store creates these at runtime)
33+
*.db
34+
35+
# Runtime logs
36+
*.log
37+
logs/
38+
39+
# Large intermediate files at repo root (generated during benchmark runs)
40+
/queries.json
41+
/queries_1k.json
42+
/results/
43+
44+
# Local runtime config
45+
.altimate-code/
46+
47+
# Commit message scratch files
48+
.github/meta/
49+
50+
# Experiment / simulation artifacts
51+
/data/
52+
/experiments/
53+
/models/
54+
/simulation/
55+
2656
# Local dev files
2757
opencode-dev
28-
logs/
2958
*.bun-build
3059
tsconfig.tsbuildinfo

bun.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/altimate-code/src/provider/models-snapshot.ts

Lines changed: 2 additions & 0 deletions
Large diffs are not rendered by default.

packages/opencode/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
"scripts": {
99
"typecheck": "tsgo --noEmit",
1010
"test": "bun test --timeout 30000",
11-
"build": "bun run script/build.ts",
11+
"build": "bun run script/build.ts && bun run --conditions=browser ./src/index.ts validate install",
1212
"dev": "bun run --conditions=browser ./src/index.ts",
1313
"db": "bun drizzle-kit"
1414
},

packages/opencode/script/build.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,11 @@ const migrations = await Promise.all(
7171
)
7272
console.log(`Loaded ${migrations.length} migrations`)
7373

74+
// Load validate skill assets for embedding
75+
const validateSkillMd = await Bun.file(path.join(dir, "src/skill/validate/SKILL.md")).text()
76+
const validateBatchPy = await Bun.file(path.join(dir, "src/skill/validate/batch_validate.py")).text()
77+
console.log("Loaded validate skill assets")
78+
7479
const singleFlag = process.argv.includes("--single")
7580
const baselineFlag = process.argv.includes("--baseline")
7681
const skipInstall = process.argv.includes("--skip-install")
@@ -218,8 +223,13 @@ for (const item of targets) {
218223
OPENCODE_CHANNEL: `'${Script.channel}'`,
219224
ALTIMATE_ENGINE_VERSION: `'${engineVersion}'`,
220225
OPENCODE_LIBC: item.os === "linux" ? `'${item.abi ?? "glibc"}'` : "undefined",
226+
ALTIMATE_CLI_LIBC: item.os === "linux" ? `'${item.abi ?? "glibc"}'` : "undefined",
221227
OPENCODE_MIGRATIONS: JSON.stringify(migrations),
228+
ALTIMATE_CLI_MIGRATIONS: JSON.stringify(migrations),
222229
OPENCODE_CHANGELOG: JSON.stringify(changelog),
230+
ALTIMATE_CLI_CHANGELOG: JSON.stringify(changelog),
231+
ALTIMATE_VALIDATE_SKILL_MD: JSON.stringify(validateSkillMd),
232+
ALTIMATE_VALIDATE_BATCH_PY: JSON.stringify(validateBatchPy),
223233
OTUI_TREE_SITTER_WORKER_PATH: bunfsRoot + workerRelativePath,
224234
},
225235
})
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import { generateText } from "ai"
2+
import { Provider } from "@/provider/provider"
3+
import { Log } from "@/util/log"
4+
5+
export namespace ValidationIntent {
6+
const log = Log.create({ service: "validation-intent" })
7+
8+
const SYSTEM_PROMPT = `You are a binary intent classifier.
9+
A validation query is any request to check, audit, evaluate, or quality-assess an AI trace, session, or conversation response.
10+
Respond ONLY with valid JSON: {"is_validation": true} or {"is_validation": false}. No explanation.`
11+
12+
// Prefer haiku — fast and cheap. Other small models as fallback.
13+
const CANDIDATE_QUERIES = [
14+
["anthropic", ["haiku"]],
15+
["anthropic", ["claude-3-haiku"]],
16+
["anthropic", ["claude-3-5-haiku"]],
17+
] as const
18+
19+
async function getClassifierLanguageModel() {
20+
for (const [providerID, query] of CANDIDATE_QUERIES) {
21+
try {
22+
const closest = await Provider.closest(providerID, [...query])
23+
if (!closest) continue
24+
const model = await Provider.getModel(closest.providerID, closest.modelID)
25+
return await Provider.getLanguage(model)
26+
} catch {
27+
// try next candidate
28+
}
29+
}
30+
return null
31+
}
32+
33+
export async function classify(message: string): Promise<boolean> {
34+
if (!message.trim()) return false
35+
36+
try {
37+
const language = await getClassifierLanguageModel()
38+
if (!language) return false
39+
40+
const { text } = await generateText({
41+
model: language,
42+
system: SYSTEM_PROMPT,
43+
prompt: message,
44+
maxOutputTokens: 20,
45+
})
46+
47+
const result = JSON.parse(text.trim())
48+
return result.is_validation === true
49+
} catch (e) {
50+
log.warn("intent classification failed, proceeding normally", { error: String(e) })
51+
return false // fail open — let main flow handle it
52+
}
53+
}
54+
}

packages/opencode/src/cli/cmd/tui/app.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ import type { EventSource } from "./context/sdk"
106106
export function tui(input: {
107107
url: string
108108
args: Args
109+
config: TuiConfig.Info
109110
directory?: string
110111
fetch?: typeof fetch
111112
headers?: RequestInit["headers"]

packages/opencode/src/cli/cmd/tui/attach.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ import { cmd } from "../cmd"
22
import { UI } from "@/cli/ui"
33
import { tui } from "./app"
44
import { win32DisableProcessedInput, win32InstallCtrlCGuard } from "./win32"
5+
import { TuiConfig } from "@/config/tui"
6+
import { Instance } from "@/project/instance"
7+
import { existsSync } from "fs"
58

69
export const AttachCommand = cmd({
710
command: "attach <url>",
@@ -63,8 +66,13 @@ export const AttachCommand = cmd({
6366
const auth = `Basic ${Buffer.from(`altimate:${password}`).toString("base64")}`
6467
return { Authorization: auth }
6568
})()
69+
const config = await Instance.provide({
70+
directory: directory && existsSync(directory) ? directory : process.cwd(),
71+
fn: () => TuiConfig.get(),
72+
})
6673
await tui({
6774
url: args.url,
75+
config,
6876
args: {
6977
continue: args.continue,
7078
sessionID: args.session,

packages/opencode/src/cli/cmd/tui/thread.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ import { Filesystem } from "@/util/filesystem"
1212
import type { Event } from "@opencode-ai/sdk/v2"
1313
import type { EventSource } from "./context/sdk"
1414
import { win32DisableProcessedInput, win32InstallCtrlCGuard } from "./win32"
15+
import { TuiConfig } from "@/config/tui"
16+
import { Instance } from "@/project/instance"
1517

1618
declare global {
1719
const OPENCODE_WORKER_PATH: string
@@ -135,6 +137,10 @@ export const TuiThreadCommand = cmd({
135137
if (!args.prompt) return piped
136138
return piped ? piped + "\n" + args.prompt : args.prompt
137139
})
140+
const config = await Instance.provide({
141+
directory: cwd,
142+
fn: () => TuiConfig.get(),
143+
})
138144

139145
// Check if server should be started (port or hostname explicitly set in CLI or config)
140146
const networkOpts = await resolveNetworkOptions(args)
@@ -165,6 +171,8 @@ export const TuiThreadCommand = cmd({
165171

166172
const tuiPromise = tui({
167173
url,
174+
config,
175+
directory: cwd,
168176
fetch: customFetch,
169177
events,
170178
tuiConfig,
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import type { Argv } from "yargs"
2+
import { cmd } from "./cmd"
3+
import * as prompts from "@clack/prompts"
4+
import fs from "fs/promises"
5+
import path from "path"
6+
import os from "os"
7+
8+
// Injected at build time by build.ts (same pattern as ALTIMATE_CLI_MIGRATIONS).
9+
// In development these fall back to reading from disk via getAssets().
10+
declare const ALTIMATE_VALIDATE_SKILL_MD: string
11+
declare const ALTIMATE_VALIDATE_BATCH_PY: string
12+
13+
interface ValidateAssets {
14+
skillMd: string
15+
batchPy: string
16+
}
17+
18+
async function getAssets(): Promise<ValidateAssets> {
19+
if (
20+
typeof ALTIMATE_VALIDATE_SKILL_MD !== "undefined" &&
21+
typeof ALTIMATE_VALIDATE_BATCH_PY !== "undefined"
22+
) {
23+
return {
24+
skillMd: ALTIMATE_VALIDATE_SKILL_MD,
25+
batchPy: ALTIMATE_VALIDATE_BATCH_PY,
26+
}
27+
}
28+
// Development fallback: read from disk relative to this source file
29+
const skillsDir = path.join(import.meta.dir, "../../skill/validate")
30+
const [skillMd, batchPy] = await Promise.all([
31+
fs.readFile(path.join(skillsDir, "SKILL.md"), "utf-8"),
32+
fs.readFile(path.join(skillsDir, "batch_validate.py"), "utf-8"),
33+
])
34+
return { skillMd, batchPy }
35+
}
36+
37+
38+
39+
const InstallSubcommand = cmd({
40+
command: "install",
41+
describe: "install the /validate skill into ~/.altimate-code",
42+
handler: async () => {
43+
prompts.intro("Altimate Validate — Installer")
44+
45+
const { skillMd, batchPy } = await getAssets()
46+
47+
const spinner = prompts.spinner()
48+
spinner.start("Installing /validate skill...")
49+
const skillTargetDir = path.join(os.homedir(), ".altimate-code", "skills", "validate")
50+
await fs.mkdir(skillTargetDir, { recursive: true })
51+
await fs.writeFile(path.join(skillTargetDir, "SKILL.md"), skillMd)
52+
await fs.writeFile(path.join(skillTargetDir, "batch_validate.py"), batchPy)
53+
spinner.stop(`Installed /validate skill → ${skillTargetDir}`)
54+
55+
prompts.outro("Altimate validation skill installed successfully!")
56+
},
57+
})
58+
59+
const StatusSubcommand = cmd({
60+
command: "status",
61+
describe: "check whether the /validate skill is installed",
62+
handler: async () => {
63+
const skillDir = path.join(os.homedir(), ".altimate-code", "skills", "validate")
64+
65+
prompts.intro("Altimate Validate — Installation Status")
66+
67+
const check = (exists: boolean, label: string, detail: string) =>
68+
prompts.log.info(`${exists ? "✓" : "✗"} ${label}${exists ? "" : " (not found)"}: ${detail}`)
69+
70+
const skillMdExists = await fs.access(path.join(skillDir, "SKILL.md")).then(() => true).catch(() => false)
71+
const batchPyExists = await fs.access(path.join(skillDir, "batch_validate.py")).then(() => true).catch(() => false)
72+
check(skillMdExists && batchPyExists, "/validate skill", skillDir)
73+
74+
prompts.outro("Done")
75+
},
76+
})
77+
78+
export const ValidateCommand = cmd({
79+
command: "validate",
80+
describe: "manage the Altimate validation framework (/validate skill)",
81+
builder: (yargs: Argv) => yargs.command(InstallSubcommand).command(StatusSubcommand).demandCommand(),
82+
handler: () => {},
83+
})

0 commit comments

Comments
 (0)