@@ -128,8 +128,10 @@ async function llmCall(messages, opts = {}) {
128128 results . tokenTotals . total += usage . total_tokens || 0 ;
129129
130130 // Capture model name from first response
131- if ( ! results . model . name && data . model ) {
132- results . model . name = data . model ;
131+ if ( opts . vlm ) {
132+ if ( ! results . model . vlm && data . model ) results . model . vlm = data . model ;
133+ } else {
134+ if ( ! results . model . name && data . model ) results . model . name = data . model ;
133135 }
134136
135137 return { content, toolCalls, usage, model : data . model } ;
@@ -471,7 +473,9 @@ suite('🔧 Tool Use', async () => {
471473 const scenarios = JSON . parse ( fs . readFileSync ( path . join ( FIXTURES_DIR , 'tool-use-scenarios.json' ) , 'utf8' ) ) ;
472474
473475 for ( const s of scenarios . tool_use_scenarios ) {
474- await test ( `${ s . name } → ${ s . expected_tool } ` , async ( ) => {
476+ const expectedTools = Array . isArray ( s . expected_tool ) ? s . expected_tool : [ s . expected_tool ] ;
477+ const expectedLabel = expectedTools . join ( '|' ) ;
478+ await test ( `${ s . name } → ${ expectedLabel } ` , async ( ) => {
475479 const messages = [
476480 { role : 'system' , content : 'You are Aegis, a home security AI assistant. Use the available tools to answer user questions. Always call the most appropriate tool — never decline to use a tool.' } ,
477481 ...( s . history || [ ] ) ,
@@ -482,15 +486,15 @@ suite('🔧 Tool Use', async () => {
482486 // Check if model returned tool calls
483487 if ( r . toolCalls && r . toolCalls . length > 0 ) {
484488 const toolName = r . toolCalls [ 0 ] . function . name ;
485- assert ( toolName === s . expected_tool , `Expected ${ s . expected_tool } , got ${ toolName } ` ) ;
489+ assert ( expectedTools . includes ( toolName ) , `Expected ${ expectedLabel } , got ${ toolName } ` ) ;
486490 return `tool_call: ${ toolName } (${ r . toolCalls [ 0 ] . function . arguments ?. slice ( 0 , 40 ) || '...' } )` ;
487491 }
488492
489493 // Some models return tool calls in the content (without native tool calling)
490494 const content = stripThink ( r . content ) . toLowerCase ( ) ;
491- assert ( content . includes ( s . expected_tool ) || content . includes ( s . expected_tool . replace ( '_' , ' ' ) ) ,
492- `Expected mention of ${ s . expected_tool } in response` ) ;
493- return `content mentions ${ s . expected_tool } ` ;
495+ const mentioned = expectedTools . some ( t => content . includes ( t ) || content . includes ( t . replace ( '_' , ' ' ) ) ) ;
496+ assert ( mentioned , `Expected mention of ${ expectedLabel } in response` ) ;
497+ return `content mentions ${ expectedLabel } ` ;
494498 } ) ;
495499 }
496500} ) ;
@@ -599,14 +603,16 @@ Respond with ONLY valid JSON:
599603suite ( '🛡️ Security Classification' , async ( ) => {
600604 const scenarios = JSON . parse ( fs . readFileSync ( path . join ( FIXTURES_DIR , 'tool-use-scenarios.json' ) , 'utf8' ) ) ;
601605 for ( const s of scenarios . security_scenarios ) {
602- await test ( `${ s . name } → ${ s . expected_classification } ` , async ( ) => {
606+ const expectedClassifications = Array . isArray ( s . expected_classification ) ? s . expected_classification : [ s . expected_classification ] ;
607+ const expectedLabel = expectedClassifications . join ( '|' ) ;
608+ await test ( `${ s . name } → ${ expectedLabel } ` , async ( ) => {
603609 const r = await llmCall ( [
604610 { role : 'system' , content : SECURITY_CLASSIFY_PROMPT } ,
605611 { role : 'user' , content : `Event description: ${ s . description } ` } ,
606612 ] , { maxTokens : 200 , temperature : 0.1 } ) ;
607613 const p = parseJSON ( r . content ) ;
608- assert ( p . classification === s . expected_classification ,
609- `Expected "${ s . expected_classification } ", got "${ p . classification } "` ) ;
614+ assert ( expectedClassifications . includes ( p . classification ) ,
615+ `Expected "${ expectedLabel } ", got "${ p . classification } "` ) ;
610616 assert ( Array . isArray ( p . tags ) , 'tags must be array' ) ;
611617 return `${ p . classification } [${ p . tags . slice ( 0 , 3 ) . join ( ', ' ) } ]` ;
612618 } ) ;
@@ -830,7 +836,7 @@ async function main() {
830836 log ( `\n${ '═' . repeat ( 66 ) } ` ) ;
831837 log ( ` RESULTS: ${ passed } /${ total } passed, ${ failed } failed, ${ skipped } skipped (${ ( timeMs / 1000 ) . toFixed ( 1 ) } s)` ) ;
832838 log ( ` TOKENS: ${ results . tokenTotals . prompt } prompt + ${ results . tokenTotals . completion } completion = ${ results . tokenTotals . total } total (${ tokPerSec } tok/s)` ) ;
833- log ( ` MODEL: ${ results . model . name } ` ) ;
839+ log ( ` MODEL: ${ results . model . name } ${ results . model . vlm ? ' | VLM: ' + results . model . vlm : '' } ` ) ;
834840 log ( `${ '═' . repeat ( 66 ) } ` ) ;
835841
836842 if ( failed > 0 ) {
0 commit comments