Skip to content

Commit 304729f

Browse files
committed
Harden passive bridge extraction
- Recover malformed-but-repairable passive extractor JSON and retry once with stricter JSON-only instructions - Tighten passive extractor section guidance and report schema validation failures with non-sensitive field-specific reasons - Make recent fuzzy duplicate suppression section-aware so distinct values and practices themes can both bridge - Document the customer-friendly Cortex Memory explainer link in the README - Add focused unit coverage for JSON recovery, schema diagnostics, values-style candidates, duplicate suppression, and bridge send guards
1 parent 0bac279 commit 304729f

7 files changed

Lines changed: 399 additions & 20 deletions

File tree

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@
88

99
[OpenClaw](https://github.com/openclaw/openclaw) plugin for [Cortex](https://github.com/ubundi/cortex) long-term memory. Gives your agent persistent memory that survives across sessions — who you are, what your project does, decisions you made weeks ago, and how things changed over time.
1010

11+
## Customer-Friendly Explainer
12+
13+
A one-page explainer for companies using the plugin is published at [https://marble-birch-znnf.here.now/](https://marble-birch-znnf.here.now/). It explains what Cortex Memory does for AI agents, how it benefits users and teams, and what controls administrators have.
14+
15+
The source HTML artifact is also stored privately in here.now Drive at `My Drive/openclaw-cortex/cortex-memory-plugin-explainer.html`.
16+
1117
![Features](assets/readme_assets/Feature%20Cards.png)
1218

1319
- **Auto-Recall** — optional cold-start memory injection before the agent reads workspace notes

src/features/bridge/handler.ts

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ interface PassiveRecentCandidate {
7777
contentKey: string;
7878
evidenceKey: string;
7979
evidenceHash: string;
80+
suggestedSection: PassiveBridgeRequest["candidates"][number]["suggested_section"];
8081
sentAt: number;
8182
}
8283

@@ -178,33 +179,35 @@ function passiveTracePrivateFragments(request: PassiveBridgeRequest): string[] {
178179

179180
function isDuplicateRecentPassiveCandidate(
180181
sessionState: BridgeSessionState,
181-
candidate: Pick<PassiveBridgeRequest["candidates"][number], "content" | "evidence_quote">,
182+
candidate: Pick<PassiveBridgeRequest["candidates"][number], "content" | "evidence_quote" | "suggested_section">,
182183
recentCandidates: PassiveRecentCandidate[] = trimPassiveRecentCandidates(sessionState),
183184
): boolean {
184185
const contentKey = passiveCanonicalKey(candidate.content);
185186
const evidenceKey = passiveCanonicalKey(candidate.evidence_quote);
186187
const evidenceHash = passiveEvidenceHash(candidate.evidence_quote);
187-
return recentCandidates.some((recent) => (
188-
recent.evidenceHash === evidenceHash
189-
|| passiveSimilarity(contentKey, recent.contentKey) >= PASSIVE_CONTENT_DUPLICATE_SIMILARITY
190-
|| passiveSimilarity(evidenceKey, recent.evidenceKey) >= PASSIVE_EVIDENCE_DUPLICATE_SIMILARITY
191-
));
188+
return recentCandidates.some((recent) => {
189+
if (recent.evidenceHash === evidenceHash) return true;
190+
if (recent.suggestedSection !== candidate.suggested_section) return false;
191+
return passiveSimilarity(contentKey, recent.contentKey) >= PASSIVE_CONTENT_DUPLICATE_SIMILARITY
192+
|| passiveSimilarity(evidenceKey, recent.evidenceKey) >= PASSIVE_EVIDENCE_DUPLICATE_SIMILARITY;
193+
});
192194
}
193195

194196
function passiveRecentCandidateFor(
195-
candidate: Pick<PassiveBridgeRequest["candidates"][number], "content" | "evidence_quote">,
197+
candidate: Pick<PassiveBridgeRequest["candidates"][number], "content" | "evidence_quote" | "suggested_section">,
196198
): PassiveRecentCandidate {
197199
return {
198200
contentKey: passiveCanonicalKey(candidate.content),
199201
evidenceKey: passiveCanonicalKey(candidate.evidence_quote),
200202
evidenceHash: passiveEvidenceHash(candidate.evidence_quote),
203+
suggestedSection: candidate.suggested_section,
201204
sentAt: Date.now(),
202205
};
203206
}
204207

205208
function rememberRecentPassiveCandidate(
206209
sessionState: BridgeSessionState,
207-
candidate: Pick<PassiveBridgeRequest["candidates"][number], "content" | "evidence_quote">,
210+
candidate: Pick<PassiveBridgeRequest["candidates"][number], "content" | "evidence_quote" | "suggested_section">,
208211
): void {
209212
const recent = trimPassiveRecentCandidates(sessionState);
210213
recent.push(passiveRecentCandidateFor(candidate));

src/features/bridge/openclaw-extractor.ts

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ type DirectPassiveModelCall = (params: {
7575
signal?: AbortSignal;
7676
}) => Promise<string>;
7777

78+
const DIRECT_EXTRACTOR_JSON_RETRY_LIMIT = 1;
79+
7880
type JsonCompatible =
7981
| null
8082
| boolean
@@ -369,6 +371,53 @@ function buildDirectExtractorUserPrompt(input: PassiveExtractorInput): string {
369371
].join("\n");
370372
}
371373

374+
function buildStrictJsonRetryInput(input: PassiveExtractorInput): PassiveExtractorInput {
375+
return {
376+
...input,
377+
prompt: [
378+
input.prompt,
379+
"",
380+
"Your previous response was not parseable JSON.",
381+
"Retry now with exactly one strict JSON object matching the requested schema.",
382+
"Do not include markdown, comments, explanations, trailing commas, or extra top-level keys.",
383+
"If there are no valid candidates, return exactly {\"candidates\":[]}.",
384+
].join("\n"),
385+
};
386+
}
387+
388+
async function runDirectPassiveModelExtractorWithJsonRecovery(params: {
389+
input: PassiveExtractorInput;
390+
config?: OpenClawConfigLike;
391+
modelRef: string;
392+
timeoutMs: number;
393+
directModelCall: DirectPassiveModelCall;
394+
logger: Logger;
395+
}): Promise<PassiveExtractorOutput> {
396+
const startedAt = Date.now();
397+
let lastSyntaxError: SyntaxError | undefined;
398+
for (let attempt = 0; attempt <= DIRECT_EXTRACTOR_JSON_RETRY_LIMIT; attempt++) {
399+
const input = attempt === 0 ? params.input : buildStrictJsonRetryInput(params.input);
400+
const remainingMs = Math.max(1, params.timeoutMs - (Date.now() - startedAt));
401+
const text = await params.directModelCall({
402+
input,
403+
config: params.config,
404+
modelRef: params.modelRef,
405+
timeoutMs: remainingMs,
406+
});
407+
if (!text.trim()) return { candidates: [] };
408+
try {
409+
return parsePassiveExtractorJson(text);
410+
} catch (err) {
411+
if (!(err instanceof SyntaxError)) throw err;
412+
lastSyntaxError = err;
413+
if (attempt >= DIRECT_EXTRACTOR_JSON_RETRY_LIMIT) break;
414+
params.logger.debug?.(`Cortex bridge: passive extractor_json_retry attempt=${attempt + 1} reason=json_parse_failed`);
415+
}
416+
}
417+
params.logger.debug?.("Cortex bridge: passive extractor_json_unrecoverable reason=json_parse_failed");
418+
throw lastSyntaxError ?? new SyntaxError("passive extractor JSON parse failed");
419+
}
420+
372421
function normalizeSecretInput(value: unknown): string | undefined {
373422
if (typeof value !== "string") return undefined;
374423
const trimmed = value.trim();
@@ -811,13 +860,13 @@ export function createOpenClawPassiveModelExtractor(
811860
}
812861

813862
logger.info?.(`Cortex bridge: passive_extractor_model_call_started runner=direct_model timeoutMs=${timeoutMs} maxOutputTokens=${input.maxOutputTokens} model=${modelRef}`);
814-
const text = await (options.directModelCall ?? createPiAiDirectModelCall())({
863+
return runDirectPassiveModelExtractorWithJsonRecovery({
815864
input,
816865
config: directExtractorConfig,
817866
modelRef,
818867
timeoutMs,
868+
directModelCall: options.directModelCall ?? createPiAiDirectModelCall(),
869+
logger,
819870
});
820-
if (!text.trim()) return { candidates: [] };
821-
return parsePassiveExtractorJson(text);
822871
};
823872
}

src/features/bridge/passive.ts

Lines changed: 78 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -227,9 +227,12 @@ export function buildPassiveExtractorPrompt(): string {
227227
"Candidate wording should be concise, durable, operational, and not include unsupported details.",
228228
"Evidence quotes must be exact user-authored substrings from the provided messages.",
229229
"Keep evidence_quote to the shortest exact substring that proves the candidate; do not quote whole long messages.",
230+
"suggested_section must be exactly one of: coreValues, beliefs, principles, ideas, dreams, practices, shadows, legacy.",
231+
"Use coreValues for personal values, principles for operating principles, practices for concrete recurring work habits, and beliefs for durable beliefs.",
230232
"If uncertain, return no candidates.",
231233
"Return JSON only with this shape:",
232-
"{\"candidates\":[{\"content\":\"string\",\"suggested_section\":\"practices\",\"evidence_quote\":\"exact user-authored quote\",\"supporting_evidence_quotes\":[\"optional exact user-authored quotes\"],\"confidence\":0.0,\"risk_tier\":\"low\",\"reason\":\"brief internal review note\"}]}",
234+
"{\"candidates\":[{\"content\":\"string\",\"suggested_section\":\"coreValues|beliefs|principles|ideas|dreams|practices|shadows|legacy\",\"evidence_quote\":\"exact user-authored quote\",\"supporting_evidence_quotes\":[\"optional exact user-authored quotes\"],\"confidence\":0.0,\"risk_tier\":\"low\",\"reason\":\"brief internal review note\"}]}",
235+
"Output must be a single strict JSON object: no markdown fences, no prose, no comments, no trailing commas, and no extra top-level keys.",
233236
].join("\n");
234237
}
235238

@@ -262,8 +265,19 @@ export function buildPassiveExtractorInput(rawMessages: unknown[]): PassiveExtra
262265
}
263266

264267
export function parsePassiveExtractorJson(raw: string): PassiveExtractorOutput {
265-
const trimmed = raw.trim().replace(/^```(?:json)?\s*([\s\S]*?)\s*```$/i, "$1").trim();
266-
const parsed = JSON.parse(trimmed) as unknown;
268+
const trimmed = stripPassiveExtractorJsonWrapper(raw);
269+
let parsed: unknown;
270+
try {
271+
parsed = JSON.parse(trimmed) as unknown;
272+
} catch (err) {
273+
const repaired = repairPassiveExtractorJsonText(trimmed);
274+
if (!repaired || repaired === trimmed) throw err;
275+
try {
276+
parsed = JSON.parse(repaired) as unknown;
277+
} catch {
278+
throw err;
279+
}
280+
}
267281
if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
268282
throw new SyntaxError("passive extractor JSON root must be an object");
269283
}
@@ -278,6 +292,54 @@ export function parsePassiveExtractorJson(raw: string): PassiveExtractorOutput {
278292
return { candidates };
279293
}
280294

295+
function stripPassiveExtractorJsonWrapper(raw: string): string {
296+
return raw.trim().replace(/^```(?:json)?\s*([\s\S]*?)\s*```$/i, "$1").trim();
297+
}
298+
299+
function extractLikelyJsonObject(raw: string): string | undefined {
300+
const first = raw.indexOf("{");
301+
const last = raw.lastIndexOf("}");
302+
if (first < 0 || last <= first) return undefined;
303+
return raw.slice(first, last + 1).trim();
304+
}
305+
306+
function removeTrailingJsonCommas(raw: string): string {
307+
let output = "";
308+
let inString = false;
309+
let escaped = false;
310+
for (let index = 0; index < raw.length; index++) {
311+
const char = raw[index];
312+
if (escaped) {
313+
output += char;
314+
escaped = false;
315+
continue;
316+
}
317+
if (char === "\\" && inString) {
318+
output += char;
319+
escaped = true;
320+
continue;
321+
}
322+
if (char === "\"") {
323+
inString = !inString;
324+
output += char;
325+
continue;
326+
}
327+
if (char === "," && !inString) {
328+
let lookahead = index + 1;
329+
while (/\s/.test(raw[lookahead] ?? "")) lookahead++;
330+
if (raw[lookahead] === "}" || raw[lookahead] === "]") continue;
331+
}
332+
output += char;
333+
}
334+
return output;
335+
}
336+
337+
function repairPassiveExtractorJsonText(raw: string): string | undefined {
338+
const candidate = extractLikelyJsonObject(raw) ?? raw;
339+
const repaired = removeTrailingJsonCommas(candidate).trim();
340+
return repaired ? repaired : undefined;
341+
}
342+
281343
function exactUserEvidenceSource(
282344
quote: string,
283345
messages: PassiveExtractorMessage[],
@@ -313,6 +375,14 @@ function reject(reason: string): { reason: string } {
313375
return { reason };
314376
}
315377

378+
function schemaReject(detail: string): { reason: string } {
379+
return reject(`schema_invalid:${detail}`);
380+
}
381+
382+
function schemaKeyReject(key: string): { reason: string } {
383+
return schemaReject(`unexpected_key:${key.replace(/[^A-Za-z0-9_-]/g, "_").slice(0, 40) || "unknown"}`);
384+
}
385+
316386
const PASSIVE_PRUNE_STOPWORDS = new Set([
317387
"a",
318388
"an",
@@ -462,20 +532,21 @@ export function validatePassiveExtractorCandidates(
462532
"risk_tier",
463533
"reason",
464534
]);
465-
if (Object.keys(typed).some((key) => !allowedKeys.has(key))) {
466-
rejected.push(reject("schema_invalid"));
535+
const unexpectedKey = Object.keys(typed).find((key) => !allowedKeys.has(key));
536+
if (unexpectedKey) {
537+
rejected.push(schemaKeyReject(unexpectedKey));
467538
continue;
468539
}
469540
if (typeof typed.supporting_evidence_quotes !== "undefined" && !Array.isArray(typed.supporting_evidence_quotes)) {
470-
rejected.push(reject("schema_invalid"));
541+
rejected.push(schemaReject("supporting_evidence_quotes"));
471542
continue;
472543
}
473544
if (!content || countWords(content) < 4) {
474545
rejected.push(reject(BLOCKED_CANDIDATE_RE.test(content ?? "") ? "sensitive_content" : "candidate_text_invalid"));
475546
continue;
476547
}
477548
if (!suggestedSection) {
478-
rejected.push(reject("schema_invalid"));
549+
rejected.push(schemaReject("suggested_section"));
479550
continue;
480551
}
481552
if (confidence === undefined || confidence < 0.75) {

0 commit comments

Comments
 (0)