-
-
Notifications
You must be signed in to change notification settings - Fork 214
Expand file tree
/
Copy pathSearchService.mjs
More file actions
399 lines (349 loc) · 19.1 KB
/
Copy pathSearchService.mjs
File metadata and controls
399 lines (349 loc) · 19.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
import aiConfig from '../../mcp/server/knowledge-base/config.mjs';
import Base from '../../../src/core/Base.mjs';
import {buildChatModel} from '../../provider/buildChatModel.mjs';
import {PROVIDER_TIMEOUT_CODE} from '../../provider/createTimeoutError.mjs';
import ChromaManager from './ChromaManager.mjs';
import fs from 'fs-extra';
import logger from '../../mcp/server/knowledge-base/logger.mjs';
import path from 'path';
import QueryService from './QueryService.mjs';
import {checkAskRateLimit} from './helpers/askRateLimit.mjs';
import {getMissingAskSynthesisLeaves} from './helpers/askSynthesisGuard.mjs';
/**
* @summary Orchestrates Retrieval-Augmented Generation (RAG) by combining semantic search with LLM synthesis.
*
* This service acts as the bridge between the user's natural language question and the project's knowledge base.
* Instead of simply returning a list of files, it:
* 1. **Retrieves**: Uses `QueryService` to find the most relevant files based on semantic similarity and intelligent scoring (boosting guides, architectural docs).
* 2. **Reads**: Fetches the full content of these files from the local filesystem to ensure the LLM has complete context (avoiding truncated metadata).
* 3. **Synthesizes**: Sends the query and the file contents to the configured synthesis model (Gemini / OpenAI-compatible / Ollama) to generate a precise, grounded answer.
*
* This "Read-Eval-Generate" loop allows agents to ask complex questions like "How do I implement a Store?" and get a
* code-complete answer without manually searching and reading multiple files.
*
* @class Neo.ai.services.knowledge-base.SearchService
* @extends Neo.core.Base
* @singleton
* @see Neo.ai.services.knowledge-base.QueryService
*/
class SearchService extends Base {
static config = {
/**
* @member {String} className='Neo.ai.services.knowledge-base.SearchService'
* @protected
*/
className: 'Neo.ai.services.knowledge-base.SearchService',
/**
* @member {Boolean} singleton=true
* @protected
*/
singleton: true
}
/**
* @member {Object|null} model=null
* @protected
*/
model = null
/**
* Why the synthesis model is unavailable, when it is — `{code, reason}` set at construct for
* the stale-overlay case (missing `askSynthesis` block). `ask()`'s null-model branch threads
* it into the degraded-references envelope so the caller sees the actionable remediation
* instead of the generic missing-key message. `null` when the model built normally OR for the
* legacy gemini-without-key case (which keeps its established `no_provider` shape).
* @member {Object|null} modelUnavailable=null
* @protected
*/
modelUnavailable = null
/**
* Rolling epoch-ms timestamps of recent ask-synthesis calls, consumed by the per-minute runaway
* breaker in {@link ask}. Pruned to the active window on each call via {@link checkAskRateLimit}.
* @member {Number[]} askCallTimestamps=[]
* @protected
*/
askCallTimestamps = []
/**
* Builds the synthesis model via the configured provider (`gemini` / `openAiCompatible` / `ollama`)
* through the shared `buildChatModel` selector, so local deployments synthesize without a remote
* Gemini key. `this.model` stays `null` only for `gemini` with no API key; `ask()` then returns the
* degraded-reference response rather than attempting a remote call.
* @param {Object} config
*/
construct(config) {
super.construct(config);
// Stale-overlay guard: the gitignored config.mjs is a MATERIALIZED template copy, so a
// clone that pulled an evolved template without `--migrate-config` has no `askSynthesis`
// block — the naked reads below were an uncaught `undefined.provider` TypeError that broke
// the whole KB server boot. Retrieval (query/search) needs no chat model, so the server
// must still boot: remember the reason, leave the model null, and let `ask()` return its
// degraded-references envelope carrying the remediation. The later `aiConfig.askSynthesis`
// reads inside `ask()` are unreachable in this state by construction (null-model early
// return precedes them). No fabricated defaults — the config template owns defaults.
const missing = getMissingAskSynthesisLeaves(aiConfig.askSynthesis, ['provider', 'model', 'timeoutMs', 'timeoutMsRemote', 'maxCallsPerMinute']);
if (missing.length > 0) {
this.modelUnavailable = {
code : 'stale_config',
reason: `askSynthesis config leaves missing: ${missing.join(', ')} — sync the askSynthesis block from config.template.mjs into the local config.mjs (node ai/scripts/setup/initServerConfigs.mjs --migrate-config) and restart knowledge-base.`
};
logger.error(`[SearchService] ${this.modelUnavailable.reason} Retrieval stays available; ask() degrades to references-only until migrated.`);
return;
}
// Build the synthesis model from the dedicated `askSynthesis` block (NOT the global
// `modelProvider`), so the interactive ask path can use a fast remote model while bulk chat
// stays local. `apiKey` resolves NEO_KB_ASK_API_KEY (env-only) read at the use site — never
// inlined. For a local provider, `baseUrl` overrides the host (own-endpoint setups); null falls
// through to the provider's configured host, and `model` selects the per-task model name.
const ask = aiConfig.askSynthesis;
this.model = buildChatModel({
modelProvider : ask.provider,
openAiCompatibleConfig : {...aiConfig.openAiCompatible, ...(ask.baseUrl ? {host: ask.baseUrl} : {}), model: ask.model},
ollamaConfig : {...aiConfig.ollama, ...(ask.baseUrl ? {host: ask.baseUrl} : {}), model: ask.model},
geminiApiKey : ask.apiKey,
geminiModelName : ask.model
});
}
/**
* Ensures the service dependencies are ready.
* @returns {Promise<void>}
*/
async initAsync() {
await super.initAsync();
await ChromaManager.ready();
}
/**
* Returns embedded chunk content when a ranked result carries full source metadata.
* @param {Object} [metadata] Result metadata from QueryService.
* @returns {String} The embedded content or an empty string.
*/
getEmbeddedReferenceContent(metadata = {}) {
return typeof metadata.content === 'string' && metadata.content.trim()
? metadata.content
: '';
}
/**
* Determines whether a ranked result belongs to a tenant/repo that must not be hydrated
* from the local neoRootDir filesystem.
* @param {Object} [metadata] Result metadata from QueryService.
* @returns {Boolean} True when local filesystem hydration is unsafe for this reference.
*/
isNonLocalTenantReference(metadata = {}) {
const defaultTenantId = aiConfig.defaultTenantId;
const defaultRepoSlug = aiConfig.defaultRepoSlug;
if (metadata.repoSlug && metadata.repoSlug !== defaultRepoSlug) {
return true;
}
if (!metadata.tenantId) {
return false;
}
return metadata.tenantId !== defaultTenantId;
}
/**
* Resolves the best available source content for RAG synthesis.
*
* Local Neo references keep using neoRootDir filesystem hydration so agents see the
* current checkout. Tenant-ingested references use Chroma metadata content instead,
* preventing same-relative-path collisions from reading files out of the host repo.
*
* @param {Object} ref Query reference.
* @returns {Promise<String>} Hydrated content or the standard placeholder.
*/
async hydrateReferenceContent(ref) {
const metadata = ref.metadata || {};
const embeddedContent = this.getEmbeddedReferenceContent(metadata);
let content = '';
let absoluteSource = '';
if (this.isNonLocalTenantReference(metadata)) {
if (embeddedContent) {
return embeddedContent;
}
logger.warn(`[SearchService] Missing metadata.content for non-local tenant ref.source="${ref.source}" (tenantId="${metadata.tenantId}", repoSlug="${metadata.repoSlug || ''}") — refusing neoRootDir fallback.`);
return 'No Content (File missing or empty)';
}
absoluteSource = ref.source && path.isAbsolute(ref.source)
? ref.source
: path.resolve(aiConfig.neoRootDir, ref.source || '');
if (absoluteSource && await fs.pathExists(absoluteSource)) {
try {
content = await fs.readFile(absoluteSource, 'utf8');
} catch (err) {
logger.warn(`[SearchService] Failed to read file ${absoluteSource}:`, err.message);
}
}
if (!content && embeddedContent) {
return embeddedContent;
}
if (!content) {
content = 'No Content (File missing or empty)';
logger.warn(`[SearchService] Empty context for ref.source="${ref.source}" (resolved to "${absoluteSource}") — chunk content will not reach the synthesis LLM.`);
}
return content;
}
/**
* @summary Creates the degraded response used when retrieval succeeds but synthesis is unavailable.
*
* Returned as a SUCCESS content payload (NO top-level `error` key) so the MCP boundary delivers
* the references + reason to the caller. `BaseServer.formatToolResult` routes any `'error' in result`
* object to an error envelope (`Tool Error: … Message: …`) that discards `answer`/`references`/`reason`
* — so an `error` key here would defeat the whole point of degrading gracefully. Callers detect
* degradation via `degraded: true`; `degradedCode` disambiguates the cause for diagnostics.
* @param {Object} params
* @param {Object[]} params.references Ranked references returned by QueryService.
* @param {Error|String} params.error The synthesis failure to expose in bounded form.
* @param {String} [params.code] Explicit degraded cause code; when omitted, derived as
* `synthesis_timeout` when the error carries `PROVIDER_TIMEOUT_CODE` (structural — uniform across
* local providers per `createTimeoutError`) or the reason reports a timeout (regex fallback),
* else `synthesis_failed`.
* @returns {{answer: String, references: Object[], degraded: Boolean, degradedCode: String, reason: String}}
* @private
*/
#createDegradedSynthesisResponse({references, error, code}) {
const reason = this.#sanitizeSynthesisError(error);
const isTimeout = error?.code === PROVIDER_TIMEOUT_CODE || /timed out/i.test(reason);
const degradedCode = code || (isTimeout ? 'synthesis_timeout' : 'synthesis_failed');
return {
answer: `Knowledge-base retrieval succeeded, but answer synthesis is currently unavailable (${reason}). Use the references directly while the synthesis provider recovers.`,
references,
degraded: true,
degradedCode,
reason
};
}
/**
* @summary Bounds synthesis-provider errors before returning them through MCP callers.
* @param {Error|String} error The raw provider error.
* @returns {String} A credential-safe, bounded reason string.
* @private
*/
#sanitizeSynthesisError(error) {
const raw = typeof error === 'string'
? error
: (error?.message || 'Synthesis provider unavailable');
return raw
.replace(/AIza[0-9A-Za-z_-]{20,}/g, '[redacted-api-key]')
.slice(0, 500);
}
/**
* Performs a semantic search via QueryService and synthesizes an answer using the LLM.
*
* @param {Object} params
* @param {String} params.query The natural language query.
* @param {String} [params.type='all'] Optional content type filter (e.g., 'guide', 'src').
* @param {Number} [params.limit=5] Number of source files to include in the context.
* @returns {Promise<Object>} The synthesized answer and references.
*/
async ask({query, type = 'all', limit = 5}) {
logger.info(`[SearchService] Processing RAG query: "${query}" (Type: ${type})`);
// 1. Retrieve most relevant files using QueryService's scoring logic
const queryResult = await QueryService.queryDocuments({query, type, limit, includeMetadata: true});
if (queryResult.message || !queryResult.results || queryResult.results.length === 0) {
// An EMPTY collection is the common cold-start cause since the KB artifact download
// left the npm `prepare` chain (it is opt-in now) — name the one-liner so the absence
// is discoverable instead of reading like a bad query.
const count = await ChromaManager.getKnowledgeBaseCollection()
.then(collection => collection.count())
.catch(() => null);
return {
answer: count === 0
? "The knowledge base collection is empty. Populate it with the release artifact via 'npm run ai:download-kb' (or build locally with 'npm run ai:sync-kb')."
: "No relevant documents found in the knowledge base.",
references: []
};
}
const references = queryResult.results.map(r => ({
name : r.source.split('/').pop(),
source: r.source,
score : Number(r.score)
}));
if (!this.model) {
// Thread the construct-time stale-config reason when present; the legacy
// gemini-without-key case keeps its established `no_provider` shape.
const {reason, code} = this.modelUnavailable || {
reason: 'GEMINI_API_KEY is required for RAG features.',
code : 'no_provider'
};
return this.#createDegradedSynthesisResponse({references, error: reason, code});
}
const contextReferences = queryResult.results.map((r, index) => ({
...references[index],
metadata: r.metadata || {}
}));
// 2. Read source contents for context.
//
// All source loaders store `metadata.source` as a path relative to `neoRootDir`
// so the Chroma collection shipped with each neo release remains portable across
// recipients' filesystems. We resolve against the consumer's own `neoRootDir`
// at read time. Before the relative-source fix, this branch did a bare `fs.pathExists(ref.source)`
// which silently succeeded for legacy absolute-path chunks but failed for the
// relative-path chunks emitted by ApiSource / TestSource — producing phantom
// `No Content (File missing or empty)` context. The synthesis LLM then saw
// empty documents and returned placeholder "I don't have enough information"
// answers for every `type='src'` / `type='ai-infrastructure'` query. The
// `path.isAbsolute` short-circuit keeps legacy absolute-path chunks working
// during the grace period when a consumer has not yet re-synced.
//
// Tenant content uses metadata-embedded hydration. The measured chunk distribution
// keeps the V1 storage cost acceptable, while
// avoiding server-mirror infrastructure. Non-local tenants may use the same
// relative `source` strings as Neo itself, so those references hydrate from
// metadata.content and never fall through to the host checkout.
const contextPromises = contextReferences.map(async (ref, index) => {
const content = await this.hydrateReferenceContent(ref);
return `--- DOCUMENT ${index + 1} (${ref.name} from ${ref.source}) ---\n${content}`;
});
const contextDocs = (await Promise.all(contextPromises)).join('\n\n');
const prompt = `
You are an expert Neo.mjs architect.
**CRITICAL INSTRUCTION:** The framework is named "Neo.mjs". Never refer to it as "Neo.js".
Answer the following question using **ONLY** the provided context documents.
If the answer cannot be found in the documents, state that you don't have enough information.
Question: ${query}
Context:
${contextDocs}
Instructions:
1. Synthesize a clear, concise answer.
2. Cite specific classes or files from the context where appropriate.
3. Do not make up code or facts not present in the text.
4. Adhere to the terminology: "Neo.mjs", "App Worker", "VDom Worker", "config system".
`;
// 3. Cost-safety runaway breaker: gate the synthesis call on a rolling per-minute cap.
// Interactive use sits far below the cap; a scripted runaway (the incident class) trips it and we
// return the degraded references instead of issuing the (costly) remote call. State lives on the
// singleton; the rate check is a pure helper (`checkAskRateLimit`) for isolated, mutation-free testing.
const nowMs = Date.now();
const {limited, kept} = checkAskRateLimit(this.askCallTimestamps, nowMs, aiConfig.askSynthesis.maxCallsPerMinute);
this.askCallTimestamps = kept;
if (limited) {
logger.warn(`[SearchService] ask synthesis rate cap (${aiConfig.askSynthesis.maxCallsPerMinute}/min) hit; returning degraded references without calling the provider.`);
return this.#createDegradedSynthesisResponse({
references,
error: `ask synthesis rate limit (${aiConfig.askSynthesis.maxCallsPerMinute}/min) exceeded`,
code : 'rate_limited'
});
}
this.askCallTimestamps.push(nowMs);
// 4. Generate Answer
let result, answer;
try {
// Provider-class timeout selection: `gemini` is the only always-remote class (~5-10s
// typical → 60s flags a hang); `ollama`/`openAiCompatible` get the local-class ceiling —
// a 31B-class local synthesis empirically approaches 5 minutes, and `openAiCompatible`
// may point at exactly such a model, so false-long (a hung self-hosted endpoint waits
// longer) is the safe direction over false-short (a working local model gets cut off).
const ask = aiConfig.askSynthesis;
result = await this.model.generateContent(prompt, {
timeoutMs : ask.provider === 'gemini' ? ask.timeoutMsRemote : ask.timeoutMs,
operationLabel: 'ask_knowledge_base synthesis',
priority : 'interactive'
});
answer = result.response.text();
} catch (error) {
const degraded = this.#createDegradedSynthesisResponse({references, error});
logger.warn(`[SearchService] Synthesis failed after retrieval; returning degraded references: ${degraded.reason}`);
return degraded;
}
return {
answer,
references
};
}
}
export default Neo.setupClass(SearchService);