|
| 1 | +// Sleuth Code RAG — query module. |
| 2 | +// Exports askSelf(query, teamId) used by chat-module for the `ask-self` command. |
| 3 | +// Tenancy gate is layer 2 (module-level) per PROJECT/2-WORKING/P1-CODE-RAG.md. |
| 4 | + |
| 5 | +const path = require('node:path'); |
| 6 | +const fs = require('node:fs'); |
| 7 | +const { formatContext } = require('./helpers.js'); |
| 8 | + |
| 9 | +const MODULE_DIR = __dirname; |
| 10 | +const REPO_ROOT = path.join(MODULE_DIR, '..', '..'); |
| 11 | +const DB_PATH = path.join(REPO_ROOT, 'data', 'rag', 'sleuth-rag.sqlite'); |
| 12 | +const PROMPTS_PATH = path.join(MODULE_DIR, 'prompts.json'); |
| 13 | + |
| 14 | +const EMBED_MODEL = 'gemini-embedding-001'; |
| 15 | +const EMBED_DIM = 768; |
| 16 | +const SYNTHESIS_MODEL = 'gemini-pro-latest'; // rolling alias — always newest Gemini Pro |
| 17 | +const TOP_K = 20; // retrieve generously, trust Gemini to sort |
| 18 | +const PRIORITY_BOOST = 0.02; // small nudge — doesn't override clear semantic wins |
| 19 | +const MAX_CONTEXT_CHARS = 80000; // ~20k tokens — spike showed 18k works well |
| 20 | + |
| 21 | +class TenancyError extends Error { |
| 22 | + constructor(message) { |
| 23 | + super(message); |
| 24 | + this.name = 'TenancyError'; |
| 25 | + } |
| 26 | +} |
| 27 | + |
| 28 | +// Lazy-loaded singletons so a missing env var at boot doesn't kill the process. |
| 29 | +// They throw on first askSelf() call instead, which chat-module catches silently. |
| 30 | +let _db = null; |
| 31 | +let _prompts = null; |
| 32 | + |
| 33 | +function getDb() { |
| 34 | + if (_db) return _db; |
| 35 | + if (!fs.existsSync(DB_PATH)) { |
| 36 | + throw new Error(`RAG index missing at ${DB_PATH}. Run: npm run rag:ingest`); |
| 37 | + } |
| 38 | + // Lazy-require native modules so a broken install doesn't poison Sleuth startup |
| 39 | + // for workspaces that never touch ask-self. |
| 40 | + const Database = require('better-sqlite3'); |
| 41 | + const sqliteVec = require('sqlite-vec'); |
| 42 | + _db = new Database(DB_PATH, { readonly: true }); |
| 43 | + sqliteVec.load(_db); |
| 44 | + return _db; |
| 45 | +} |
| 46 | + |
| 47 | +function getPrompts() { |
| 48 | + if (_prompts) return _prompts; |
| 49 | + _prompts = JSON.parse(fs.readFileSync(PROMPTS_PATH, 'utf8')); |
| 50 | + return _prompts; |
| 51 | +} |
| 52 | + |
| 53 | +function assertTenancy(teamId) { |
| 54 | + const allowed = process.env.NEOCHROME_TEAM_ID; |
| 55 | + if (typeof allowed !== 'string' || allowed.length === 0) { |
| 56 | + throw new TenancyError('NEOCHROME_TEAM_ID not configured'); |
| 57 | + } |
| 58 | + if (typeof teamId !== 'string' || teamId.length === 0) { |
| 59 | + throw new TenancyError('teamId argument required'); |
| 60 | + } |
| 61 | + if (teamId !== allowed) { |
| 62 | + throw new TenancyError('teamId does not match allowlist'); |
| 63 | + } |
| 64 | +} |
| 65 | + |
| 66 | +async function embedQuery(query) { |
| 67 | + const apiKey = process.env.GOOGLE_API_KEY; |
| 68 | + if (!apiKey) throw new Error('GOOGLE_API_KEY not set'); |
| 69 | + const endpoint = `https://generativelanguage.googleapis.com/v1beta/models/${EMBED_MODEL}:embedContent?key=${apiKey}`; |
| 70 | + const res = await fetch(endpoint, { |
| 71 | + method: 'POST', |
| 72 | + headers: { 'Content-Type': 'application/json' }, |
| 73 | + body: JSON.stringify({ |
| 74 | + model: `models/${EMBED_MODEL}`, |
| 75 | + content: { parts: [{ text: query }] }, |
| 76 | + taskType: 'RETRIEVAL_QUERY', |
| 77 | + outputDimensionality: EMBED_DIM, |
| 78 | + }), |
| 79 | + }); |
| 80 | + if (!res.ok) throw new Error(`Gemini embed ${res.status}: ${(await res.text()).slice(0, 300)}`); |
| 81 | + const data = await res.json(); |
| 82 | + const values = data?.embedding?.values; |
| 83 | + if (!Array.isArray(values) || values.length !== EMBED_DIM) { |
| 84 | + throw new Error(`Gemini embed: unexpected shape, got ${values?.length} dims`); |
| 85 | + } |
| 86 | + return new Uint8Array(new Float32Array(values).buffer); |
| 87 | +} |
| 88 | + |
| 89 | +function knnSearch(db, queryVec, k = TOP_K) { |
| 90 | + const hits = db.prepare( |
| 91 | + 'SELECT rowid, distance FROM chunks_vec WHERE embedding MATCH ? ORDER BY distance LIMIT ?' |
| 92 | + ).all(queryVec, k); |
| 93 | + if (hits.length === 0) return []; |
| 94 | + const ids = hits.map((h) => Number(h.rowid)); |
| 95 | + const placeholders = ids.map(() => '?').join(','); |
| 96 | + const rows = db.prepare( |
| 97 | + `SELECT id, source, path, pr_number, version, priority, content FROM chunks WHERE id IN (${placeholders})` |
| 98 | + ).all(...ids); |
| 99 | + const byId = new Map(rows.map((r) => [Number(r.id), r])); |
| 100 | + // Re-rank with priority boost: lower score is better. |
| 101 | + // Drop hits whose metadata row is missing (e.g., partial/corrupt index) rather |
| 102 | + // than spreading undefined into the result and throwing. Missing rows are logged |
| 103 | + // once so an operator notices the drift instead of debugging silent gaps. |
| 104 | + const dropped = []; |
| 105 | + const ranked = []; |
| 106 | + for (const h of hits) { |
| 107 | + const row = byId.get(Number(h.rowid)); |
| 108 | + if (!row) { |
| 109 | + dropped.push(h.rowid); |
| 110 | + continue; |
| 111 | + } |
| 112 | + const score = h.distance - (row.priority ?? 1) * PRIORITY_BOOST; |
| 113 | + ranked.push({ ...row, distance: h.distance, score }); |
| 114 | + } |
| 115 | + if (dropped.length > 0) { |
| 116 | + console.warn(`[rag] knnSearch: dropped ${dropped.length} hit(s) with missing metadata rows (rowids: ${dropped.join(', ')}). Rebuild the index with: npm run rag:ingest`); |
| 117 | + } |
| 118 | + return ranked.sort((a, b) => a.score - b.score); |
| 119 | +} |
| 120 | + |
| 121 | +async function synthesize(query, context, systemPrompt) { |
| 122 | + const apiKey = process.env.GOOGLE_API_KEY; |
| 123 | + const endpoint = `https://generativelanguage.googleapis.com/v1beta/models/${SYNTHESIS_MODEL}:generateContent?key=${apiKey}`; |
| 124 | + const userMessage = `CONTEXT (retrieved from Sleuth's own corpus):\n\n${context}\n\n---\n\nQUESTION: ${query}`; |
| 125 | + const body = { |
| 126 | + system_instruction: { parts: [{ text: systemPrompt }] }, |
| 127 | + contents: [{ role: 'user', parts: [{ text: userMessage }] }], |
| 128 | + generationConfig: { temperature: 0.3, maxOutputTokens: 1500 }, |
| 129 | + }; |
| 130 | + const res = await fetch(endpoint, { |
| 131 | + method: 'POST', |
| 132 | + headers: { 'Content-Type': 'application/json' }, |
| 133 | + body: JSON.stringify(body), |
| 134 | + }); |
| 135 | + if (!res.ok) throw new Error(`Gemini synthesis ${res.status}: ${(await res.text()).slice(0, 300)}`); |
| 136 | + const data = await res.json(); |
| 137 | + const text = data?.candidates?.[0]?.content?.parts?.[0]?.text; |
| 138 | + if (!text) throw new Error('Gemini synthesis: empty response'); |
| 139 | + return text; |
| 140 | +} |
| 141 | + |
| 142 | +/** |
| 143 | + * Answer a question about Sleuth itself, grounded in the local RAG index. |
| 144 | + * Strictly gated to the Neochrome workspace via NEOCHROME_TEAM_ID. |
| 145 | + * |
| 146 | + * @param {string} query - The question from the user. |
| 147 | + * @param {string} teamId - The Slack team ID of the workspace the question came from. |
| 148 | + * @returns {Promise<string>} - Formatted answer text to post back in Slack. |
| 149 | + * @throws {TenancyError} - If teamId does not match NEOCHROME_TEAM_ID. |
| 150 | + */ |
| 151 | +async function askSelf(query, teamId) { |
| 152 | + assertTenancy(teamId); |
| 153 | + if (typeof query !== 'string' || query.trim().length === 0) { |
| 154 | + throw new Error('query must be a non-empty string'); |
| 155 | + } |
| 156 | + const prompts = getPrompts(); |
| 157 | + const db = getDb(); |
| 158 | + const queryVec = await embedQuery(query); |
| 159 | + const hits = knnSearch(db, queryVec, TOP_K); |
| 160 | + if (hits.length === 0) { |
| 161 | + return "I couldn't find anything in my index for that question. Try `npm run rag:ingest` or rephrase."; |
| 162 | + } |
| 163 | + const context = formatContext(hits, MAX_CONTEXT_CHARS); |
| 164 | + const answer = await synthesize(query, context, prompts.orchestrator_system); |
| 165 | + const sourcesList = [...new Set(hits.slice(0, 8).map((h) => |
| 166 | + h.source === 'pr' ? `PR #${h.pr_number}` : h.path |
| 167 | + ))]; |
| 168 | + return `${answer}\n\n_Sources consulted: ${sourcesList.join(', ')}_`; |
| 169 | +} |
| 170 | + |
| 171 | +module.exports = { askSelf, TenancyError }; |
0 commit comments