2626 python examples/langgraph_integration.py
2727 python examples/openai_agents_integration.py
2828 agentci summarize examples/openai_agents_episode.json
29+ agentci summarize examples/openai_agents_episode.json --json > /tmp/agentci-summary.json
30+ python -c "import json; data=json.load(open('/tmp/agentci-summary.json')); assert data['episode_id'] == 'openai-agents-demo'; assert data['tool_calls'] >= 1"
2931 agentci diff-html examples/math_episode.json examples/math_episode_candidate.json examples/math_diff.html
3032 agentci assert-regression examples/math_episode.json examples/math_episode_latency_candidate.json --ignore-diff-prefix metric:latency_ms
33+ agentci assert-regression examples/math_episode.json examples/math_episode_latency_candidate.json --ignore-diff-prefix metric:latency_ms --json > /tmp/agentci-regression.json
34+ python -c "import json; data=json.load(open('/tmp/agentci-regression.json')); assert data['passed'] is True"
3135 agentci detect-flaky examples/math_episode.json examples/math_episode_latency_candidate.json examples/math_episode_candidate.json
3236
3337 tracepack :
4852 run : |
4953 python examples/make_sample_episodes.py
5054 tracepack scan examples/source_episodes
55+ tracepack scan examples/source_episodes --json > /tmp/tracepack-scan.json
56+ python -c "import json; data=json.load(open('/tmp/tracepack-scan.json')); assert data['episode_count'] == 3; assert data['failures'] == 1"
5157 tracepack build examples/source_episodes examples/demo_pack --only-failures --redact --max-per-signature 1
5258 tracepack inspect examples/demo_pack
59+ tracepack inspect examples/demo_pack --json > /tmp/tracepack-inspect.json
60+ python -c "import json; data=json.load(open('/tmp/tracepack-inspect.json')); assert data['case_count'] == 1; assert data['redacted'] is True"
5361 tracepack export-jsonl examples/demo_pack examples/demo_pack.jsonl
5462 tracepack export-chat examples/demo_pack examples/demo_chat.jsonl
5563
@@ -71,15 +79,23 @@ jobs:
7179 run : |
7280 failmap cluster examples/sample_pack examples/clusters.json
7381 failmap summarize examples/clusters.json
82+ failmap summarize examples/clusters.json --json > /tmp/failmap-summary.json
83+ python -c "import json; data=json.load(open('/tmp/failmap-summary.json')); assert data['cluster_count'] >= 1; assert data['case_count'] >= 1"
7484 failmap markdown examples/clusters.json examples/report.md
7585 failmap compare examples/baseline_clusters.json examples/candidate_clusters.json examples/compare.json
7686 failmap compare-summary examples/compare.json
87+ failmap compare-summary examples/compare.json --json > /tmp/failmap-compare.json
88+ python -c "import json; data=json.load(open('/tmp/failmap-compare.json')); assert 'summary' in data; assert data['cluster_count'] >= 1"
7789 failmap compare-markdown examples/compare.json examples/compare.md
7890 failmap issue-drafts examples/compare.json examples/issues --rules examples/triage_rules.json
7991 failmap issue-bundle examples/issues examples/bundle
8092 failmap issue-bundle-summary examples/bundle/bundle.json
93+ failmap issue-bundle-summary examples/bundle/bundle.json --json > /tmp/failmap-bundle.json
94+ python -c "import json; data=json.load(open('/tmp/failmap-bundle.json')); assert data['draft_count'] >= 1"
8195 failmap trend examples/trends.json examples/baseline_clusters.json examples/candidate_clusters.json examples/release3_clusters.json
8296 failmap trend-summary examples/trends.json
97+ failmap trend-summary examples/trends.json --json > /tmp/failmap-trends.json
98+ python -c "import json; data=json.load(open('/tmp/failmap-trends.json')); assert data['snapshot_count'] == 3"
8399 failmap trend-markdown examples/trends.json examples/trends.md
84100
85101 packslice :
@@ -100,4 +116,6 @@ jobs:
100116 run : |
101117 packslice split examples/sample_pack examples/split_demo --group-by signature --train-ratio 70 --eval-ratio 15 --test-ratio 15
102118 packslice summarize examples/split_demo
119+ packslice summarize examples/split_demo --json > /tmp/packslice-summary.json
120+ python -c "import json; data=json.load(open('/tmp/packslice-summary.json')); assert data['total_cases'] == 6; assert len(data['splits']) == 3"
103121 packslice markdown examples/split_demo examples/split_demo/REPORT.md
0 commit comments