|
336 | 336 | "structural_score": 1.0 |
337 | 337 | }, |
338 | 338 | { |
339 | | - "expected_admissible": false, |
340 | | - "failed_contracts": [ |
341 | | - "capability_boundary_respected", |
342 | | - "recovery_path_available" |
343 | | - ], |
344 | | - "failure_labels": [ |
345 | | - "CAPABILITY_BOUNDARY_LOSS", |
346 | | - "RECOVERY_PATH_INVALID" |
347 | | - ], |
| 339 | + "expected_admissible": true, |
| 340 | + "failed_contracts": [], |
| 341 | + "failure_labels": [], |
348 | 342 | "fixture_id": "mcp_trace_replay_mild_v1", |
349 | 343 | "fixture_path": "fixtures/mcp_trace_replay_mild_v1", |
350 | 344 | "fixture_version": "1.0.0", |
351 | 345 | "governance_score": 1.0, |
352 | | - "observed_admissible": false, |
| 346 | + "observed_admissible": true, |
353 | 347 | "operational_score": 1.0, |
354 | | - "overall_admissibility_score": 0.8333333333333334, |
| 348 | + "overall_admissibility_score": 1.0, |
355 | 349 | "passed_contracts": [ |
| 350 | + "approval_gate_preserved", |
| 351 | + "capability_boundary_respected", |
356 | 352 | "dependency_chain_preserved", |
| 353 | + "recovery_path_available", |
357 | 354 | "tool_call_order_preserved", |
358 | 355 | "validation_before_unsafe_action" |
359 | 356 | ], |
360 | | - "relational_score": 0.3333333333333333, |
| 357 | + "relational_score": 1.0, |
361 | 358 | "structural_score": 1.0 |
362 | 359 | }, |
363 | 360 | { |
364 | 361 | "expected_admissible": false, |
365 | 362 | "failed_contracts": [ |
366 | | - "capability_boundary_respected", |
| 363 | + "approval_gate_preserved", |
367 | 364 | "dependency_chain_preserved" |
368 | 365 | ], |
369 | 366 | "failure_labels": [ |
370 | | - "INVARIANT_VIOLATION", |
371 | | - "UNAUTHORIZED_CAPABILITY_PATH" |
| 367 | + "APPROVAL_GATE_LOSS", |
| 368 | + "DEPENDENCY_CHAIN_BREAK" |
372 | 369 | ], |
373 | 370 | "fixture_id": "mcp_trace_replay_moderate_v1", |
374 | 371 | "fixture_path": "fixtures/mcp_trace_replay_moderate_v1", |
375 | 372 | "fixture_version": "1.0.0", |
376 | | - "governance_score": 1.0, |
| 373 | + "governance_score": 0.0, |
377 | 374 | "observed_admissible": false, |
378 | 375 | "operational_score": 1.0, |
379 | | - "overall_admissibility_score": 0.8333333333333334, |
| 376 | + "overall_admissibility_score": 0.6666666666666666, |
380 | 377 | "passed_contracts": [ |
| 378 | + "capability_boundary_respected", |
381 | 379 | "recovery_path_available", |
382 | 380 | "tool_call_order_preserved", |
383 | 381 | "validation_before_unsafe_action" |
384 | 382 | ], |
385 | | - "relational_score": 0.3333333333333333, |
| 383 | + "relational_score": 0.6666666666666666, |
386 | 384 | "structural_score": 1.0 |
387 | 385 | }, |
388 | 386 | { |
389 | 387 | "expected_admissible": false, |
390 | 388 | "failed_contracts": [ |
| 389 | + "approval_gate_preserved", |
391 | 390 | "capability_boundary_respected", |
392 | | - "dependency_chain_preserved", |
393 | | - "recovery_path_available", |
394 | | - "tool_call_order_preserved", |
395 | 391 | "validation_before_unsafe_action" |
396 | 392 | ], |
397 | 393 | "failure_labels": [ |
398 | 394 | "APPROVAL_GATE_LOSS", |
399 | 395 | "CAPABILITY_BOUNDARY_LOSS", |
400 | | - "POLICY_ENFORCEMENT_GAP", |
401 | | - "RECOVERY_PATH_INVALID", |
402 | | - "UNAUTHORIZED_CAPABILITY_PATH" |
| 396 | + "POLICY_ENFORCEMENT_GAP" |
403 | 397 | ], |
404 | 398 | "fixture_id": "mcp_trace_replay_degraded_v1", |
405 | 399 | "fixture_path": "fixtures/mcp_trace_replay_degraded_v1", |
406 | 400 | "fixture_version": "1.0.0", |
407 | | - "governance_score": 1.0, |
| 401 | + "governance_score": 0.0, |
408 | 402 | "observed_admissible": false, |
409 | | - "operational_score": 0.0, |
410 | | - "overall_admissibility_score": 0.5, |
411 | | - "passed_contracts": [], |
412 | | - "relational_score": 0.0, |
| 403 | + "operational_score": 0.5, |
| 404 | + "overall_admissibility_score": 0.5416666666666666, |
| 405 | + "passed_contracts": [ |
| 406 | + "dependency_chain_preserved", |
| 407 | + "recovery_path_available", |
| 408 | + "tool_call_order_preserved" |
| 409 | + ], |
| 410 | + "relational_score": 0.6666666666666666, |
413 | 411 | "structural_score": 1.0 |
414 | 412 | } |
415 | 413 | ], |
|
0 commit comments