agent-knowledge/src/two-agent-research-loop.ts at main · tangle-network/agent-knowledge · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
import type { KnowledgeReadinessReport } from '@tangle-network/agent-eval'
import {
  type BuildEvalKnowledgeBundleOptions,
  buildEvalKnowledgeBundle,
  type EvalKnowledgeBundleBuildResult,
  type KnowledgeReadinessSpec,
} from './eval-readiness'
import { createKnowledgeEvent } from './events'
import { buildKnowledgeIndex } from './indexer'
import { applyKnowledgeWriteBlocks } from './proposals'
import { readinessFor } from './readiness-helpers'
import { searchKnowledge } from './search'
import { type AddSourceOptions, type AddSourceTextInput, addSourceText } from './sources'
import { initKnowledgeBase } from './store'
import type { KnowledgeEvent, KnowledgeIndex, KnowledgeSearchResult, SourceRecord } from './types'

/**
 * A knowledge gap the loop surfaces from `scoreKnowledgeReadiness`. The worker
 * targets these; the driver folds the unfilled remainder into the worker's next
 * prompt and runs its own gap-fill pass over them.
 */
export interface KnowledgeGap {
  /** Readiness-spec id this gap belongs to. */
  id: string
  /** Human-readable description of what's missing. */
  description: string
  /** The search query the readiness check ran for this requirement. */
  query: string
  /** True when the gap blocks readiness (vs. a soft, non-blocking gap). */
  blocking: boolean
}

/** A new source the worker (or driver) discovered and wants to add to the KB. */
export type ResearchSourceProposal = AddSourceTextInput

/**
 * What a research agent contributes in one round. Both the worker and (when
 * `driverResearches` is on) the driver produce this shape — the worker ADDS
 * primary findings, the driver gap-FILLS the ones the worker missed.
 *
 * `proposalText` is the safe write-protocol text (`---FILE: knowledge/...---`
 * blocks). The loop only applies it AFTER the driver has verified the round's
 * sources, so a rejected source never reaches the curated pages.
 */
export interface ResearchContribution {
  /** Immutable sources to register (the raw evidence). */
  sources?: ResearchSourceProposal[]
  /** Safe write-protocol text producing curated `knowledge/*.md` pages. */
  proposalText?: string
  /**
   * Build the page write-protocol text FROM the sources the driver accepted —
   * the curated, citing pages the readiness gate searches. Receives the
   * registered `SourceRecord`s (with their assigned ids, so a page's frontmatter
   * `sources:` can cite them). Returns `---FILE: knowledge/...---` block text or
   * `undefined`. Runs after verification, so a page never cites a rejected
   * source. Concatenated after any static `proposalText`.
   */
  buildPages?: (acceptedSources: SourceRecord[]) => string | undefined
  /** Free-form research transcript — products can persist this. */
  notes?: string
  metadata?: Record<string, unknown>
}

/** Context handed to the worker each round. */
export interface WorkerResearchContext {
  root: string
  goal: string
  round: number
  index: KnowledgeIndex
  /** Gaps the readiness gate currently reports — what the worker should close. */
  gaps: KnowledgeGap[]
  /** Steer text the driver folded in from the previous round's remaining gaps. */
  steer?: string
  readiness: EvalKnowledgeBundleBuildResult
  signal?: AbortSignal
}

/** Context handed to the driver's verifier for one candidate source. */
export interface SourceVerificationContext {
  root: string
  goal: string
  round: number
  index: KnowledgeIndex
  gaps: KnowledgeGap[]
  /** Sources already accepted earlier THIS round (in-round dedup). */
  acceptedThisRound: ResearchSourceProposal[]
  signal?: AbortSignal
}

/** A single rejected source plus the reason the driver gave. */
export interface RejectedSource {
  source: ResearchSourceProposal
  reason: string
}

/** Context handed to the driver's gap-fill pass (only when `driverResearches`). */
export interface DriverResearchContext {
  root: string
  goal: string
  round: number
  index: KnowledgeIndex
  /** Gaps STILL open after the worker's accepted contribution applied. */
  remainingGaps: KnowledgeGap[]
  readiness: EvalKnowledgeBundleBuildResult
  signal?: AbortSignal
}

/**
 * The differentiated driver role.
 *
 * - `verifySource` — the gate the worker's additions pass before they commit.
 *   Return `{ accept: true }` to keep a source or `{ accept: false, reason }`
 *   to reject it (not real / not relevant / duplicate). The loop dedups exact
 *   duplicates (same `uri` already in the KB or accepted this round) BEFORE
 *   calling this, so the verifier only sees genuinely-new candidates.
 * - `research` — the driver's OWN gap-fill pass over the gaps the worker left
 *   open. Only invoked when `driverResearches` is true.
 * - `foldGaps` — turn the remaining gaps into a steer string for the worker's
 *   next prompt. Defaults to a compact bulleted list when omitted.
 */
export interface ResearchDriver {
  verifySource(
    source: ResearchSourceProposal,
    ctx: SourceVerificationContext,
  ): Promise<SourceVerdict> | SourceVerdict
  research?(ctx: DriverResearchContext): Promise<ResearchContribution> | ResearchContribution
  foldGaps?(gaps: KnowledgeGap[]): string
}

export type SourceVerdict = { accept: true } | { accept: false; reason: string }

/** The worker: primary research targeting the round's gaps. */
export type ResearchWorker = (
  ctx: WorkerResearchContext,
) => Promise<ResearchContribution> | ResearchContribution

export interface TwoAgentResearchLoopOptions {
  root: string
  goal: string
  worker: ResearchWorker
  driver: ResearchDriver
  /**
   * When false (default), the driver ONLY verifies + gates — a pure coordinator
   * that contributes no research of its own (the "doesn't participate in the
   * work" mode). When true, the driver also runs its `research` gap-fill pass
   * each round over the gaps the worker left open.
   */
  driverResearches?: boolean
  maxRounds?: number
  actor?: string
  /** Readiness specs define the gate; an empty list means the loop never gates. */
  readinessSpecs?: KnowledgeReadinessSpec[]
  readinessTaskId?: string
  readiness?: Omit<BuildEvalKnowledgeBundleOptions, 'taskId' | 'index' | 'specs'>
  sourceOptions?: Pick<AddSourceOptions, 'adapters' | 'now'>
  signal?: AbortSignal
  onRound?: (round: TwoAgentResearchRound) => Promise<void> | void
}

export interface TwoAgentResearchRound {
  round: number
  /** Gaps reported at the START of the round (what the worker targeted). */
  gaps: KnowledgeGap[]
  /** Worker sources accepted by the driver and written to the KB. */
  acceptedWorkerSources: SourceRecord[]
  /** Worker sources the driver rejected (with reasons) — never written. */
  rejectedWorkerSources: RejectedSource[]
  /** Sources the driver itself added in its gap-fill pass. */
  driverSources: SourceRecord[]
  /** Curated pages written this round (worker proposal + driver proposal). */
  writtenPages: string[]
  readiness?: EvalKnowledgeBundleBuildResult
  /** True once the readiness gate reports no blocking gaps. */
  ready: boolean
  event: KnowledgeEvent
  notes: { worker?: string; driver?: string }
}

export interface TwoAgentResearchLoopResult {
  root: string
  goal: string
  rounds: number
  ready: boolean
  index: KnowledgeIndex
  readiness?: EvalKnowledgeBundleBuildResult
  steps: TwoAgentResearchRound[]
}

/**
 * Two-agent (driver + worker) sibling of `runKnowledgeResearchLoop`.
 *
 * Both agents research to grow ONE knowledge base. The roles are differentiated:
 *
 * - **WORKER** = primary research. Each round it reads the open gaps, discovers
 *   new sources, and proposes additions (`sources` + `proposalText`). It ADDS.
 * - **DRIVER** = the verifier / gap-filler / gate. It (1) VERIFIES the worker's
 *   sources before they commit — dedup against the KB, then `verifySource`
 *   rejects ones that aren't real/relevant; (2) GAP-FILLS the gaps the worker
 *   missed with its own research pass (when `driverResearches`); (3) folds the
 *   remaining gaps into the worker's next prompt; and (4) GATES on
 *   `scoreKnowledgeReadiness` — the loop stops as soon as there are no blocking
 *   gaps.
 *
 * Set `driverResearches: false` (default) for the pure-coordinator mode: the
 * driver only verifies + gates and contributes no research itself.
 *
 * Composes existing atoms — `initKnowledgeBase`, `addSourceText`,
 * `applyKnowledgeWriteBlocks`, `buildEvalKnowledgeBundle` (the readiness gate),
 * and `searchKnowledge` — and reinvents none of them.
 */
export async function runTwoAgentResearchLoop(
  options: TwoAgentResearchLoopOptions,
): Promise<TwoAgentResearchLoopResult> {
  const maxRounds = Math.max(1, options.maxRounds ?? 3)
  await initKnowledgeBase(options.root)
  const steps: TwoAgentResearchRound[] = []
  let index = await buildKnowledgeIndex(options.root)
  let readiness = readinessFor(options, index)
  let ready = isReady(readiness?.report)
  let steer: string | undefined

  for (let round = 1; round <= maxRounds && !ready; round++) {
    if (options.signal?.aborted) throw new Error('Two-agent research loop aborted')

    const gaps = gapsFromReadiness(readiness)

    // 1. WORKER: primary research over the open gaps.
    const workerContribution = await options.worker({
      root: options.root,
      goal: options.goal,
      round,
      index,
      gaps,
      steer,
      readiness: requireReadiness(readiness, options),
      signal: options.signal,
    })

    // 2. DRIVER VERIFIES the worker's sources before they commit.
    const accepted: ResearchSourceProposal[] = []
    const rejectedWorkerSources: RejectedSource[] = []
    // Dedup against the ORIGINAL input uri. `addSourceText` rewrites `record.uri`
    // to a slugified raw path and stashes the caller's uri under
    // `metadata.originalUri`, so that — not the stored uri — is the round-to-round
    // identity a verifier dedups against.
    const existingUris = new Set(
      index.sources.flatMap((source) =>
        typeof source.metadata?.originalUri === 'string' ? [source.metadata.originalUri] : [],
      ),
    )
    for (const source of workerContribution.sources ?? []) {
      if (isDuplicate(source, existingUris, accepted)) {
        rejectedWorkerSources.push({ source, reason: 'duplicate: already in the knowledge base' })
        continue
      }
      const verdict = await options.driver.verifySource(source, {
        root: options.root,
        goal: options.goal,
        round,
        index,
        gaps,
        acceptedThisRound: accepted,
        signal: options.signal,
      })
      if (verdict.accept) accepted.push(source)
      else rejectedWorkerSources.push({ source, reason: verdict.reason })
    }

    // Register the accepted worker sources, then apply the worker's curated
    // pages — but only when at least one source survived verification, so a
    // page never cites a rejected source.
    const acceptedWorkerSources = await registerSources(options, accepted)
    const writtenPages: string[] = []
    writtenPages.push(
      ...(await applyPages(options.root, workerContribution, acceptedWorkerSources)),
    )

    // Re-index so the driver's gap-fill pass sees the worker's contribution.
    index = await buildKnowledgeIndex(options.root)
    readiness = readinessFor(options, index)

    // 3. DRIVER GAP-FILLS the gaps the worker left open (opt-in).
    let driverSources: SourceRecord[] = []
    let driverNotes: string | undefined
    if (options.driverResearches && options.driver.research) {
      const remainingGaps = gapsFromReadiness(readiness)
      const driverContribution = await options.driver.research({
        root: options.root,
        goal: options.goal,
        round,
        index,
        remainingGaps,
        readiness: requireReadiness(readiness, options),
        signal: options.signal,
      })
      driverNotes = driverContribution.notes
      driverSources = await registerSources(options, driverContribution.sources ?? [])
      writtenPages.push(...(await applyPages(options.root, driverContribution, driverSources)))
      index = await buildKnowledgeIndex(options.root)
      readiness = readinessFor(options, index)
    }

    // 4. DRIVER GATES on readiness and folds the remainder into the next prompt.
    ready = isReady(readiness?.report)
    const remainingGaps = gapsFromReadiness(readiness)
    steer = ready ? undefined : foldGaps(options.driver, remainingGaps)

    const step: TwoAgentResearchRound = {
      round,
      gaps,
      acceptedWorkerSources,
      rejectedWorkerSources,
      driverSources,
      writtenPages,
      readiness,
      ready,
      event: createKnowledgeEvent({
        type: 'research.iteration',
        actor: options.actor,
        target: options.root,
        metadata: {
          goal: options.goal,
          round,
          ready,
          acceptedWorkerSourceCount: acceptedWorkerSources.length,
          rejectedWorkerSourceCount: rejectedWorkerSources.length,
          driverSourceCount: driverSources.length,
          writtenPageCount: writtenPages.length,
          remainingGapCount: remainingGaps.length,
        },
      }),
      notes: { worker: workerContribution.notes, driver: driverNotes },
    }
    steps.push(step)
    await options.onRound?.(step)
  }

  return {
    root: options.root,
    goal: options.goal,
    rounds: steps.length,
    ready,
    index,
    readiness,
    steps,
  }
}

function isReady(report: KnowledgeReadinessReport | undefined): boolean {
  // No specs ⇒ no gate ⇒ the loop runs to `maxRounds`. With specs, the gate is
  // "no blocking gaps remain".
  if (!report) return false
  return report.blockingMissingRequirements.length === 0
}

function gapsFromReadiness(readiness: EvalKnowledgeBundleBuildResult | undefined): KnowledgeGap[] {
  if (!readiness) return []
  const blocking = readiness.report.blockingMissingRequirements.map((requirement) =>
    gapFor(requirement, readiness, true),
  )
  const nonBlocking = readiness.report.nonBlockingGaps.map((requirement) =>
    gapFor(requirement, readiness, false),
  )
  return [...blocking, ...nonBlocking]
}

function gapFor(
  requirement: { id: string; description: string; metadata?: Record<string, unknown> },
  readiness: EvalKnowledgeBundleBuildResult,
  blocking: boolean,
): KnowledgeGap {
  const spec = readiness.requirements.find((entry) => entry.id === requirement.id)
  const query =
    typeof spec?.metadata?.query === 'string' ? spec.metadata.query : requirement.description
  return { id: requirement.id, description: requirement.description, query, blocking }
}

function foldGaps(driver: ResearchDriver, gaps: KnowledgeGap[]): string | undefined {
  if (gaps.length === 0) return undefined
  if (driver.foldGaps) return driver.foldGaps(gaps)
  return [
    'The knowledge base is still missing the following. Prioritise these next round:',
    ...gaps.map(
      (gap) => `- (${gap.blocking ? 'blocking' : 'soft'}) ${gap.description} [${gap.id}]`,
    ),
  ].join('\n')
}

function isDuplicate(
  source: ResearchSourceProposal,
  existingUris: Set<string>,
  accepted: ResearchSourceProposal[],
): boolean {
  return existingUris.has(source.uri) || accepted.some((candidate) => candidate.uri === source.uri)
}

async function registerSources(
  options: TwoAgentResearchLoopOptions,
  sources: ResearchSourceProposal[],
): Promise<SourceRecord[]> {
  const records: SourceRecord[] = []
  for (const source of sources) {
    records.push(await addSourceText(options.root, source, options.sourceOptions))
  }
  return records
}

/**
 * Apply a contribution's curated pages. Static `proposalText` plus a
 * `buildPages(acceptedSources)` result are concatenated and run through the safe
 * write protocol — but ONLY when at least one source survived verification, so a
 * page never cites a rejected (or absent) source.
 */
async function applyPages(
  root: string,
  contribution: ResearchContribution,
  acceptedSources: SourceRecord[],
): Promise<string[]> {
  if (acceptedSources.length === 0) return []
  const parts: string[] = []
  if (contribution.proposalText) parts.push(contribution.proposalText)
  const built = contribution.buildPages?.(acceptedSources)
  if (built) parts.push(built)
  if (parts.length === 0) return []
  const applied = await applyKnowledgeWriteBlocks(root, parts.join('\n'))
  return applied.written
}

function requireReadiness(
  readiness: EvalKnowledgeBundleBuildResult | undefined,
  options: TwoAgentResearchLoopOptions,
): EvalKnowledgeBundleBuildResult {
  if (readiness) return readiness
  // The worker/driver contexts type `readiness` as required for ergonomics; when
  // no specs are configured there is no gate to report, so synthesise an empty
  // bundle rather than forcing every caller to handle `undefined`.
  return buildEvalKnowledgeBundle({
    ...(options.readiness ?? {}),
    taskId: options.readinessTaskId ?? options.goal,
    index: emptyIndex(options.root),
    specs: [],
  })
}

function emptyIndex(root: string): KnowledgeIndex {
  return {
    root,
    generatedAt: new Date(0).toISOString(),
    sources: [],
    pages: [],
    graph: { nodes: [], edges: [] },
  }
}

/**
 * Helper for verifiers: does the candidate source's text/title overlap any page
 * the readiness search returns for a gap query? A cheap relevance heuristic the
 * driver can compose into `verifySource` (real verifiers can do more).
 */
export function sourceMatchesGaps(
  source: ResearchSourceProposal,
  index: KnowledgeIndex,
  gaps: KnowledgeGap[],
): KnowledgeSearchResult[] {
  const haystack = `${source.title ?? ''}\n${source.text}`.toLowerCase()
  const hits: KnowledgeSearchResult[] = []
  for (const gap of gaps) {
    for (const token of gap.query.toLowerCase().split(/\s+/).filter(Boolean)) {
      if (haystack.includes(token)) {
        hits.push(...searchKnowledge(index, gap.query, 1))
        break
      }
    }
  }
  return hits
}