@@ -110,6 +110,7 @@ function printUsage() {
110110 " --plugin=dist|package Load plugin from local dist URI or package name (default: dist)" ,
111111 " --max-cases=N Hard cap number of cases per scenario" ,
112112 " --report-json=PATH Write JSON report to PATH (relative to repo root)" ,
113+ " --strict-capabilities Fail unsupported account/model capabilities instead of skipping them" ,
113114 " --no-restore Keep generated local config files after run" ,
114115 " -h, --help Show help" ,
115116 ] . join ( "\n" ) ,
@@ -180,18 +181,67 @@ export function resolveMatrixTimeoutMs(smoke = false) {
180181 return parsedTimeout ;
181182}
182183
183- function hasCompletedSuccessfully ( output , token ) {
184+ function parseNdjsonEvents ( output ) {
185+ const events = [ ] ;
186+ for ( const line of output . split ( / \r ? \n / ) ) {
187+ const trimmed = line . trim ( ) ;
188+ if ( ! trimmed . startsWith ( "{" ) ) {
189+ continue ;
190+ }
191+ try {
192+ events . push ( JSON . parse ( trimmed ) ) ;
193+ } catch {
194+ // Ignore wrapper noise and partial lines.
195+ }
196+ }
197+ return events ;
198+ }
199+
200+ function findLastIndex ( items , predicate ) {
201+ for ( let index = items . length - 1 ; index >= 0 ; index -= 1 ) {
202+ if ( predicate ( items [ index ] , index ) ) {
203+ return index ;
204+ }
205+ }
206+ return - 1 ;
207+ }
208+
209+ function hasTerminalFailure ( events ) {
210+ const lastCompletedIndex = findLastIndex (
211+ events ,
212+ ( event ) =>
213+ event ?. type === "turn.completed" || event ?. type === "response.completed" ,
214+ ) ;
184215 return (
185- output . includes ( token ) ||
186- output . includes ( '"type":"turn.completed"' ) ||
187- output . includes ( '"type":"response.completed"' )
216+ findLastIndex (
217+ events ,
218+ ( event , index ) =>
219+ index > lastCompletedIndex &&
220+ ( event ?. type === "error" ||
221+ event ?. type === "turn.failed" ||
222+ event ?. type === "response.failed" ||
223+ event ?. type === "response.error" ||
224+ event ?. type === "response.incomplete" ) ,
225+ ) >= 0
188226 ) ;
189227}
190228
191- function getSmokeSkipReason ( exitCode , output ) {
192- if ( exitCode === 124 ) {
193- return "timed-out" ;
229+ function hasCompletedSuccessfully ( output , token ) {
230+ const events = parseNdjsonEvents ( output ) ;
231+ if ( events . length > 0 ) {
232+ if ( hasTerminalFailure ( events ) ) {
233+ return false ;
234+ }
235+ return events . some (
236+ ( event ) =>
237+ event ?. type === "turn.completed" ||
238+ event ?. type === "response.completed" ,
239+ ) ;
194240 }
241+ return output . includes ( token ) ;
242+ }
243+
244+ function getCapabilitySkipReason ( exitCode , output , smoke ) {
195245 if ( / n o t s u p p o r t e d w h e n u s i n g c o d e x w i t h a c h a t g p t a c c o u n t / i. test ( output ) ) {
196246 return "unsupported-model" ;
197247 }
@@ -201,14 +251,26 @@ function getSmokeSkipReason(exitCode, output) {
201251 ) {
202252 return "unsupported-reasoning" ;
203253 }
254+ if ( smoke && exitCode === 124 ) {
255+ return "timed-out" ;
256+ }
204257 return null ;
205258}
206259
207- function finalizeModelCaseResult ( caseInfo , exitCode , output , token , smoke ) {
260+ function finalizeModelCaseResult (
261+ caseInfo ,
262+ exitCode ,
263+ output ,
264+ token ,
265+ { smoke, strictCapabilities } = { } ,
266+ ) {
208267 const hasToken = output . includes ( token ) ;
209268 const completed = hasCompletedSuccessfully ( output , token ) ;
210269 const ok = exitCode === 0 && completed ;
211- const skipReason = ! ok && smoke ? getSmokeSkipReason ( exitCode , output ) : null ;
270+ const skipReason =
271+ ! ok && strictCapabilities !== true
272+ ? getCapabilitySkipReason ( exitCode , output , smoke === true )
273+ : null ;
212274
213275 return {
214276 ...caseInfo ,
@@ -228,8 +290,12 @@ export function __finalizeModelCaseResultForTests(
228290 output ,
229291 token ,
230292 smoke = false ,
293+ strictCapabilities = false ,
231294) {
232- return finalizeModelCaseResult ( caseInfo , exitCode , output , token , smoke ) ;
295+ return finalizeModelCaseResult ( caseInfo , exitCode , output , token , {
296+ smoke,
297+ strictCapabilities,
298+ } ) ;
233299}
234300
235301function stopCodexServersInternal ( ) {
@@ -361,7 +427,10 @@ function executeModelCase(caseInfo, index) {
361427 124 ,
362428 `Timed out after ${ timeoutMs } ms` ,
363429 token ,
364- caseInfo . smoke === true ,
430+ {
431+ smoke : caseInfo . smoke === true ,
432+ strictCapabilities : caseInfo . strictCapabilities === true ,
433+ } ,
365434 ) ;
366435 }
367436
@@ -373,7 +442,10 @@ function executeModelCase(caseInfo, index) {
373442 exitCode ,
374443 combinedOutput ,
375444 token ,
376- caseInfo . smoke === true ,
445+ {
446+ smoke : caseInfo . smoke === true ,
447+ strictCapabilities : caseInfo . strictCapabilities === true ,
448+ } ,
377449 ) ;
378450}
379451
@@ -451,6 +523,7 @@ async function runScenario(scenario, options) {
451523 ( caseInfo ) => ( {
452524 ...caseInfo ,
453525 smoke : options . smoke ,
526+ strictCapabilities : options . strictCapabilities ,
454527 } ) ,
455528 ) ;
456529 console . log ( `\n=== ${ scenario . toUpperCase ( ) } (${ cases . length } cases) ===` ) ;
@@ -492,6 +565,7 @@ async function main() {
492565 const scenarioValue =
493566 parseArgValue ( args , "--scenario" ) ?? ( smoke ? "modern" : "all" ) ;
494567 const pluginMode = parseArgValue ( args , "--plugin" ) ?? "dist" ;
568+ const strictCapabilities = args . includes ( "--strict-capabilities" ) ;
495569 const noRestore = args . includes ( "--no-restore" ) ;
496570 const maxCasesRaw = parseArgValue ( args , "--max-cases" ) ;
497571 const maxCases = maxCasesRaw ? Number . parseInt ( maxCasesRaw , 10 ) : 0 ;
@@ -539,6 +613,7 @@ async function main() {
539613 smoke,
540614 maxCases,
541615 pluginRef,
616+ strictCapabilities,
542617 } ) ;
543618 allResults . push (
544619 ...scenarioResults . map ( ( item ) => ( { ...item , scenario } ) ) ,
0 commit comments