@@ -165,6 +165,15 @@ async function llmCall(messages, opts = {}) {
165165 }
166166
167167 const model = opts . model || ( opts . vlm ? VLM_MODEL : LLM_MODEL ) || undefined ;
168+ // For JSON-expected tests, disable thinking (Qwen3 /no_think directive)
169+ // This prevents the model from wasting tokens on reasoning before outputting JSON
170+ if ( opts . expectJSON ) {
171+ const lastUserIdx = messages . findLastIndex ( m => m . role === 'user' ) ;
172+ if ( lastUserIdx >= 0 ) {
173+ messages = [ ...messages ] ;
174+ messages [ lastUserIdx ] = { ...messages [ lastUserIdx ] , content : messages [ lastUserIdx ] . content + ' /no_think' } ;
175+ }
176+ }
168177
169178 // Build request params
170179 const params = {
@@ -173,6 +182,7 @@ async function llmCall(messages, opts = {}) {
173182 ...( model && { model } ) ,
174183 ...( opts . temperature !== undefined && { temperature : opts . temperature } ) ,
175184 ...( opts . maxTokens && { max_completion_tokens : opts . maxTokens } ) ,
185+ ...( opts . expectJSON && { response_format : { type : 'json_object' } } ) ,
176186 ...( opts . tools && { tools : opts . tools } ) ,
177187 } ;
178188
@@ -181,6 +191,34 @@ async function llmCall(messages, opts = {}) {
181191 const idleMs = opts . timeout || IDLE_TIMEOUT_MS ;
182192 let idleTimer = setTimeout ( ( ) => controller . abort ( ) , idleMs ) ;
183193 const resetIdle = ( ) => { clearTimeout ( idleTimer ) ; idleTimer = setTimeout ( ( ) => controller . abort ( ) , idleMs ) ; } ;
194+ // Log prompt being sent
195+ log ( `\n 📤 Prompt (${ messages . length } messages, params: ${ JSON . stringify ( { maxTokens : opts . maxTokens , expectJSON : ! ! opts . expectJSON , response_format : params . response_format } ) } ):` ) ;
196+ for ( const m of messages ) {
197+ if ( typeof m . content === 'string' ) {
198+ log ( ` [${ m . role } ] ${ m . content } ` ) ;
199+ } else if ( Array . isArray ( m . content ) ) {
200+ // Multi-part content (VLM with images)
201+ for ( const part of m . content ) {
202+ if ( part . type === 'text' ) {
203+ log ( ` [${ m . role } ] ${ part . text } ` ) ;
204+ } else if ( part . type === 'image_url' ) {
205+ const url = part . image_url ?. url || '' ;
206+ const b64Match = url . match ( / ^ d a t a : ( [ ^ ; ] + ) ; b a s e 6 4 , ( .+ ) / ) ;
207+ if ( b64Match ) {
208+ const mimeType = b64Match [ 1 ] ;
209+ const b64Data = b64Match [ 2 ] ;
210+ const sizeKB = Math . round ( b64Data . length * 3 / 4 / 1024 ) ;
211+ log ( ` [${ m . role } ] 🖼️ [Image: ${ mimeType } , ~${ sizeKB } KB]` ) ;
212+ log ( `[IMG:${ url } ]` ) ;
213+ } else {
214+ log ( ` [${ m . role } ] 🖼️ [Image URL: ${ url . slice ( 0 , 80 ) } …]` ) ;
215+ }
216+ }
217+ }
218+ } else {
219+ log ( ` [${ m . role } ] ${ JSON . stringify ( m . content ) . slice ( 0 , 200 ) } ` ) ;
220+ }
221+ }
184222
185223 try {
186224 const stream = await client . chat . completions . create ( params , {
@@ -193,6 +231,7 @@ async function llmCall(messages, opts = {}) {
193231 let model = '' ;
194232 let usage = { } ;
195233 let tokenCount = 0 ;
234+ let tokenBuffer = '' ;
196235
197236 for await ( const chunk of stream ) {
198237 resetIdle ( ) ;
@@ -204,8 +243,43 @@ async function llmCall(messages, opts = {}) {
204243 if ( delta ?. reasoning_content ) reasoningContent += delta . reasoning_content ;
205244 if ( delta ?. content || delta ?. reasoning_content ) {
206245 tokenCount ++ ;
246+ // Buffer and log tokens — tag with field source
247+ const isContent = ! ! delta ?. content ;
248+ const tok = delta ?. content || delta ?. reasoning_content || '' ;
249+ // Tag first token of each field type
250+ if ( tokenCount === 1 ) tokenBuffer += isContent ? '[C] ' : '[R] ' ;
251+ tokenBuffer += tok ;
252+ if ( tokenCount % 20 === 0 ) {
253+ log ( tokenBuffer ) ;
254+ tokenBuffer = '' ;
255+ }
207256 if ( tokenCount % 100 === 0 ) {
208- log ( ` … ${ tokenCount } tokens received` ) ;
257+ log ( ` … ${ tokenCount } tokens (content: ${ content . length } c, reasoning: ${ reasoningContent . length } c)` ) ;
258+ }
259+
260+ // Smart early abort for JSON-expected tests:
261+ // If the model is producing reasoning_content (thinking) for a JSON test,
262+ // abort after 100 reasoning tokens — it should output JSON directly.
263+ if ( opts . expectJSON && ! isContent && tokenCount > 100 ) {
264+ log ( ` ⚠ Aborting: ${ tokenCount } reasoning tokens for JSON test — model is thinking instead of outputting JSON` ) ;
265+ controller . abort ( ) ;
266+ break ;
267+ }
268+ // If content is arriving, check it starts with JSON
269+ if ( opts . expectJSON && isContent && content . length >= 50 ) {
270+ const stripped = content . replace ( / < t h i n k > [ \s \S ] * ?< \/ t h i n k > \s * / gi, '' ) . trimStart ( ) ;
271+ if ( stripped . length >= 50 && ! / ^ \s * [ { \[ ] / . test ( stripped ) ) {
272+ log ( ` ⚠ Aborting: expected JSON but got: "${ stripped . slice ( 0 , 80 ) } …"` ) ;
273+ controller . abort ( ) ;
274+ break ;
275+ }
276+ }
277+ // Hard cap: abort if token count far exceeds maxTokens (server may
278+ // not count thinking tokens toward the limit)
279+ if ( opts . maxTokens && tokenCount > opts . maxTokens * 3 ) {
280+ log ( ` ⚠ Aborting: ${ tokenCount } tokens exceeds ${ opts . maxTokens } ×3 safety limit` ) ;
281+ controller . abort ( ) ;
282+ break ;
209283 }
210284 }
211285
@@ -224,6 +298,9 @@ async function llmCall(messages, opts = {}) {
224298 if ( chunk . usage ) usage = chunk . usage ;
225299 }
226300
301+ // Flush remaining token buffer
302+ if ( tokenBuffer ) log ( tokenBuffer ) ;
303+
227304 // If the model only produced reasoning_content (thinking) with no content,
228305 // use the reasoning output as the response content for evaluation purposes.
229306 if ( ! content && reasoningContent ) {
@@ -337,12 +414,11 @@ ${userMessage}
3374144. Keep system messages (they contain tool results)
338415
339416## Response Format
340- Return ONLY this JSON ( no other text) :
341- {"keep": [0, 5, 8 ], "summary": "brief 1-line summary of dropped exchanges "}
417+ Respond with ONLY a valid JSON object, no other text:
418+ {"keep": [<actual index numbers from the list above> ], "summary": "< brief 1-line summary of what was dropped> "}
342419
343- - "keep": array of message indices to KEEP (from the index list above)
344- - "summary": what the dropped messages were about (so context is not lost entirely)
345- - If nothing should be dropped, set keep to ALL indices and summary to ""` ;
420+ Example: if keeping messages at indices 0, 18, 22 → {"keep": [0, 18, 22], "summary": "Removed 4 duplicate 'what happened today' questions"}
421+ If nothing should be dropped, keep ALL indices and set summary to "".` ;
346422}
347423
348424suite ( '📋 Context Preprocessing' , async ( ) => {
@@ -356,7 +432,7 @@ suite('📋 Context Preprocessing', async () => {
356432 { idx : 18 , ts : '12:56 PM' , text : 'What has happened today' } ,
357433 { idx : 22 , ts : '1:08 PM' , text : 'What has happened today' } ,
358434 ] ;
359- const r = await llmCall ( [ { role : 'user' , content : buildPreprocessPrompt ( idx , 'What has happened today?' ) } ] ) ;
435+ const r = await llmCall ( [ { role : 'user' , content : buildPreprocessPrompt ( idx , 'What has happened today?' ) } ] , { maxTokens : 300 , expectJSON : true } ) ;
360436 const p = parseJSON ( r . content ) ;
361437 assert ( Array . isArray ( p . keep ) , 'keep must be array' ) ;
362438 assert ( p . keep . length <= 3 , `Expected ≤3, got ${ p . keep . length } ` ) ;
@@ -373,7 +449,7 @@ suite('📋 Context Preprocessing', async () => {
373449 { idx : 18 , ts : '12:00 PM' , text : 'What is the system status?' } ,
374450 { idx : 22 , ts : '1:00 PM' , text : 'What has happened today' } ,
375451 ] ;
376- const r = await llmCall ( [ { role : 'user' , content : buildPreprocessPrompt ( idx , 'Any alerts triggered?' ) } ] ) ;
452+ const r = await llmCall ( [ { role : 'user' , content : buildPreprocessPrompt ( idx , 'Any alerts triggered?' ) } ] , { maxTokens : 300 , expectJSON : true } ) ;
377453 const p = parseJSON ( r . content ) ;
378454 assert ( Array . isArray ( p . keep ) , 'keep must be array' ) ;
379455 assert ( p . keep . includes ( 3 ) || p . keep . includes ( 10 ) || p . keep . includes ( 18 ) , 'Should keep unique topics' ) ;
@@ -387,7 +463,7 @@ suite('📋 Context Preprocessing', async () => {
387463 { idx : 6 , ts : '10:00 AM' , text : 'What is the system status?' } ,
388464 { idx : 10 , ts : '11:00 AM' , text : 'Analyze the clip from 9:40 AM' } ,
389465 ] ;
390- const r = await llmCall ( [ { role : 'user' , content : buildPreprocessPrompt ( idx , 'Any new motion events?' ) } ] ) ;
466+ const r = await llmCall ( [ { role : 'user' , content : buildPreprocessPrompt ( idx , 'Any new motion events?' ) } ] , { maxTokens : 300 , expectJSON : true } ) ;
391467 const p = parseJSON ( r . content ) ;
392468 assert ( Array . isArray ( p . keep ) && p . keep . length === 4 , `Expected 4, got ${ p . keep ?. length } ` ) ;
393469 return `kept all 4 ✓` ;
@@ -398,7 +474,7 @@ suite('📋 Context Preprocessing', async () => {
398474 { idx : 0 , ts : '9:00 AM' , text : 'Hello' } ,
399475 { idx : 2 , ts : '9:05 AM' , text : 'Show cameras' } ,
400476 ] ;
401- const r = await llmCall ( [ { role : 'user' , content : buildPreprocessPrompt ( idx , 'Thanks' ) } ] ) ;
477+ const r = await llmCall ( [ { role : 'user' , content : buildPreprocessPrompt ( idx , 'Thanks' ) } ] , { maxTokens : 300 , expectJSON : true } ) ;
402478 const p = parseJSON ( r . content ) ;
403479 assert ( Array . isArray ( p . keep ) , 'keep must be array' ) ;
404480 return `kept ${ p . keep . length } /2` ;
@@ -427,7 +503,7 @@ suite('📋 Context Preprocessing', async () => {
427503 { idx : 36 , ts : '12:30 PM' , text : 'What happened today?' } ,
428504 { idx : 38 , ts : '12:45 PM' , text : 'Were there any packages delivered?' } ,
429505 ] ;
430- const r = await llmCall ( [ { role : 'user' , content : buildPreprocessPrompt ( idx , 'What happened today?' ) } ] ) ;
506+ const r = await llmCall ( [ { role : 'user' , content : buildPreprocessPrompt ( idx , 'What happened today?' ) } ] , { maxTokens : 300 , expectJSON : true } ) ;
431507 const p = parseJSON ( r . content ) ;
432508 assert ( Array . isArray ( p . keep ) , 'keep must be array' ) ;
433509 // 10 duplicates of "What happened today?" → should keep ≤12 of 20
@@ -444,7 +520,7 @@ suite('📋 Context Preprocessing', async () => {
444520 { idx : 3 , ts : '9:05 AM' , text : '[System] Alert triggered: person at front door' } ,
445521 { idx : 4 , ts : '9:10 AM' , text : 'What happened today?' } ,
446522 ] ;
447- const r = await llmCall ( [ { role : 'user' , content : buildPreprocessPrompt ( idx , 'Show me alerts' ) } ] ) ;
523+ const r = await llmCall ( [ { role : 'user' , content : buildPreprocessPrompt ( idx , 'Show me alerts' ) } ] , { maxTokens : 300 , expectJSON : true } ) ;
448524 const p = parseJSON ( r . content ) ;
449525 assert ( Array . isArray ( p . keep ) , 'keep must be array' ) ;
450526 // System messages (idx 1, 3) must be kept
@@ -538,7 +614,7 @@ suite('🧠 Knowledge Distillation', async () => {
538614 const r = await llmCall ( [
539615 { role : 'system' , content : DISTILL_PROMPT } ,
540616 { role : 'user' , content : `## Topic: Camera Setup\n## Existing KIs: (none)\n## Conversation\nUser: I have three cameras. Front door is a Blink Mini, living room is Blink Indoor, side parking is Blink Outdoor.\nAegis: Got it! Want to set up alerts?\nUser: Yes, person detection on front door after 10pm. My name is Sam.\nAegis: Alert set. Nice to meet you, Sam!` } ,
541- ] ) ;
617+ ] , { maxTokens : 500 , expectJSON : true } ) ;
542618 const p = parseJSON ( r . content ) ;
543619 assert ( p && typeof p === 'object' , 'Must return object' ) ;
544620 const facts = ( p . items || [ ] ) . reduce ( ( n , i ) => n + ( i . facts ?. length || 0 ) , 0 ) + ( p . new_items || [ ] ) . reduce ( ( n , i ) => n + ( i . facts ?. length || 0 ) , 0 ) ;
@@ -550,7 +626,7 @@ suite('🧠 Knowledge Distillation', async () => {
550626 const r = await llmCall ( [
551627 { role : 'system' , content : DISTILL_PROMPT } ,
552628 { role : 'user' , content : `## Topic: Greeting\n## Existing KIs: (none)\n## Conversation\nUser: Hi\nAegis: Hello! How can I help?\nUser: Thanks, bye\nAegis: Goodbye!` } ,
553- ] ) ;
629+ ] , { maxTokens : 500 , expectJSON : true } ) ;
554630 const p = parseJSON ( r . content ) ;
555631 const facts = ( p . items || [ ] ) . reduce ( ( n , i ) => n + ( i . facts ?. length || 0 ) , 0 ) + ( p . new_items || [ ] ) . reduce ( ( n , i ) => n + ( i . facts ?. length || 0 ) , 0 ) ;
556632 assert ( facts === 0 , `Expected 0 facts, got ${ facts } ` ) ;
@@ -561,7 +637,7 @@ suite('🧠 Knowledge Distillation', async () => {
561637 const r = await llmCall ( [
562638 { role : 'system' , content : DISTILL_PROMPT } ,
563639 { role : 'user' , content : `## Topic: Alert Configuration\n## Existing KIs: alert_preferences\n## Conversation\nUser: No notifications from side parking 8am-5pm. Too many false alarms from passing cars.\nAegis: Quiet hours set for side parking 8 AM-5 PM.\nUser: Front door alerts go to Telegram. Discord for everything else.\nAegis: Done — front door to Telegram, rest to Discord.` } ,
564- ] ) ;
640+ ] , { maxTokens : 500 , expectJSON : true } ) ;
565641 const p = parseJSON ( r . content ) ;
566642 const facts = ( p . items || [ ] ) . reduce ( ( n , i ) => n + ( i . facts ?. length || 0 ) , 0 ) + ( p . new_items || [ ] ) . reduce ( ( n , i ) => n + ( i . facts ?. length || 0 ) , 0 ) ;
567643 assert ( facts >= 2 , `Expected ≥2 facts, got ${ facts } ` ) ;
@@ -572,7 +648,7 @@ suite('🧠 Knowledge Distillation', async () => {
572648 const r = await llmCall ( [
573649 { role : 'system' , content : DISTILL_PROMPT } ,
574650 { role : 'user' , content : `## Topic: Camera Update\n## Existing KIs: home_profile (facts: ["3 cameras: Blink Mini front, Blink Indoor living, Blink Outdoor side", "Owner: Sam"])\n## Conversation\nUser: I just installed a fourth camera in the backyard. It's a Reolink Argus 3 Pro.\nAegis: Nice upgrade! I've noted your new backyard Reolink camera. That brings your total to 4 cameras.\nUser: Also, I got a dog named Max, golden retriever.\nAegis: Welcome, Max! I'll note that for the pet detections.` } ,
575- ] ) ;
651+ ] , { maxTokens : 500 , expectJSON : true } ) ;
576652 const p = parseJSON ( r . content ) ;
577653 const allFacts = [ ...( p . items || [ ] ) . flatMap ( i => i . facts || [ ] ) , ...( p . new_items || [ ] ) . flatMap ( i => i . facts || [ ] ) ] ;
578654 assert ( allFacts . length >= 2 , `Expected ≥2 facts, got ${ allFacts . length } ` ) ;
@@ -587,7 +663,7 @@ suite('🧠 Knowledge Distillation', async () => {
587663 const r = await llmCall ( [
588664 { role : 'system' , content : DISTILL_PROMPT } ,
589665 { role : 'user' , content : `## Topic: Camera Change\n## Existing KIs: home_profile (facts: ["3 cameras: Blink Mini front, Blink Indoor living, Blink Outdoor side"])\n## Conversation\nUser: I replaced the living room camera. The Blink Indoor died. I put a Ring Indoor there now.\nAegis: Got it — living room camera is now a Ring Indoor. Updated.\nUser: Actually I also moved the side parking camera to the garage instead.\nAegis: Camera moved from side parking to garage, noted.` } ,
590- ] ) ;
666+ ] , { maxTokens : 500 , expectJSON : true } ) ;
591667 const p = parseJSON ( r . content ) ;
592668 const allFacts = [ ...( p . items || [ ] ) . flatMap ( i => i . facts || [ ] ) , ...( p . new_items || [ ] ) . flatMap ( i => i . facts || [ ] ) ] ;
593669 assert ( allFacts . length >= 1 , `Expected ≥1 fact, got ${ allFacts . length } ` ) ;
@@ -634,7 +710,7 @@ suite('🔔 Event Deduplication', async () => {
634710 const r = await llmCall ( [
635711 { role : 'system' , content : 'You are a security event classifier. Respond only with valid JSON.' } ,
636712 { role : 'user' , content : buildDedupPrompt ( s . current , s . recent , s . age_sec ) } ,
637- ] , { maxTokens : 150 , temperature : 0.1 } ) ;
713+ ] , { maxTokens : 150 , temperature : 0.1 , expectJSON : true } ) ;
638714 const p = parseJSON ( r . content ) ;
639715 if ( s . expected_duplicate !== undefined ) {
640716 assert ( p . duplicate === s . expected_duplicate , `Expected duplicate=${ s . expected_duplicate } , got ${ p . duplicate } ` ) ;
@@ -847,7 +923,7 @@ suite('🛡️ Security Classification', async () => {
847923 const r = await llmCall ( [
848924 { role : 'system' , content : SECURITY_CLASSIFY_PROMPT } ,
849925 { role : 'user' , content : `Event description: ${ s . description } ` } ,
850- ] , { maxTokens : 200 , temperature : 0.1 } ) ;
926+ ] , { maxTokens : 200 , temperature : 0.1 , expectJSON : true } ) ;
851927 const p = parseJSON ( r . content ) ;
852928 assert ( expectedClassifications . includes ( p . classification ) ,
853929 `Expected "${ expectedLabel } ", got "${ p . classification } "` ) ;
0 commit comments