Commit a6351e5
feat: unified dual-score benchmark (benchmarks/csb/) with 275 tasks in 9 merged suites
Merge 20 suites into 9 thematic suites under benchmarks/csb/.
Agent always produces both direct file edits AND answer.json.
Verifier writes reward_direct.txt + reward_artifact.txt independently.
Original csb_sdlc_* and csb_org_* directories untouched.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>1 parent 1cbeee5 commit a6351e5
File tree
5,153 files changed
+601011
-39
lines changed- agents
- benchmarks/csb
- crossrepo
- ccx-config-trace-010
- environment
- tests
- ccx-crossorg-062
- environment
- tests
- ccx-crossorg-121
- environment
- tests
- ccx-crossorg-132
- environment
- tests
- ccx-crossorg-217
- environment
- tests
- ccx-crossorg-218
- environment
- tests
- ccx-crossorg-219
- environment
- tests
- ccx-crossorg-220
- environment
- tests
- ccx-crossorg-221
- environment
- tests
- ccx-crossorg-222
- environment
- tests
- ccx-crossorg-280
- environment
- tests
- ccx-crossorg-288
- environment
- tests
- ccx-crossorg-295
- environment
- tests
- ccx-dep-trace-001
- environment
- tests
- ccx-dep-trace-102
- environment
- tests
- ccx-dep-trace-106
- environment
- tests
- ccx-dep-trace-116
- environment
- tests
- ccx-dep-trace-123
- environment
- tests
- ccx-dep-trace-133
- environment
- tests
- ccx-dep-trace-171
- environment
- tests
- ccx-dep-trace-172
- environment
- tests
- ccx-dep-trace-173
- environment
- tests
- ccx-dep-trace-253
- environment
- tests
- ccx-dep-trace-254
- environment
- tests
- ccx-dep-trace-258
- environment
- tests
- ccx-dep-trace-260
- environment
- tests
- ccx-dep-trace-264
- environment
- tests
- ccx-dep-trace-265
- environment
- tests
- ccx-dep-trace-266
- environment
- tests
- ccx-dep-trace-267
- environment
- tests
- ccx-dep-trace-268
- environment
- tests
- ccx-dep-trace-271
- environment
- tests
- ccx-dep-trace-273
- environment
- tests
- ccx-dep-trace-293
- environment
- tests
- ccx-platform-091
- environment
- tests
- ccx-platform-094
- environment
- tests
- ccx-platform-100
- environment
- tests
- ccx-platform-104
- environment
- tests
- ccx-platform-238
- environment
- tests
- ccx-platform-239
- environment
- tests
- ccx-platform-240
- environment
- tests
- ccx-platform-241
- environment
- tests
- ccx-platform-242
- environment
- tests
- ccx-platform-243
- environment
- tests
- ccx-platform-248
- environment
- tests
- ccx-platform-249
- environment
- tests
- ccx-platform-291
- environment
- tests
- debug
- ansible-galaxy-tar-regression-prove-001
- environment
- tests
- ccx-incident-032
- environment
- tests
- ccx-incident-034
- environment
- tests
- ccx-incident-037
- environment
- tests
- ccx-incident-108
- environment
- tests
- ccx-incident-110
- environment
- tests
- ccx-incident-113
- environment
- tests
- ccx-incident-125
- environment
- tests
- ccx-incident-131
- environment
- tests
- ccx-incident-139
- environment
- tests
- ccx-incident-144
- environment
- tests
- ccx-incident-145
- environment
- tests
- ccx-incident-148
- environment
- tests
- ccx-incident-149
- environment
- tests
- flipt-auth-cookie-regression-prove-001
- environment
- tests
- fixtures
- empty_input
- perfect_input
- linux-acpi-backlight-fault-001
- environment
- tests
- linux-hda-intel-suspend-fault-001
- environment
- tests
- linux-iwlwifi-subdevice-fault-001
- environment
- tests
- linux-nfs-inode-revalidate-fault-001
- environment
- tests
- qutebrowser-adblock-cache-regression-prove-001
- environment
- tests
- qutebrowser-darkmode-threshold-regression-prove-001
- environment
- tests
- qutebrowser-hsv-color-regression-prove-001
- environment
- tests
- qutebrowser-url-regression-prove-001
- environment
- tests
- teleport-ssh-regression-prove-001
- environment
- tests
- tidb-query-plan-regression-debug-001
- environment
- tests
- vuls-oval-regression-prove-001
- environment
- tests
- document
- docgen-inline-002
- environment
- tests
- envoy-arch-doc-gen-001
- environment
- tests
- envoy-migration-doc-gen-001
- environment
- tests
- godot-gdscript-api-docgen-001
- environment
- tests
- grpc-channel-api-docgen-001
- environment
- tests
- k8s-apiserver-doc-gen-001
- environment
- tests
- fixtures
- empty_input
- perfect_input
- k8s-applyconfig-doc-gen-001
- environment
- tests
- k8s-clientgo-doc-gen-001
- environment
- tests
- k8s-fairqueuing-doc-gen-001
- environment
- tests
- k8s-kubelet-cm-doc-gen-001
- environment
- tests
- kafka-api-doc-gen-001
- environment
- tests
- feature
- bustub-hyperloglog-impl-001
- environment
- tests
- camel-fix-protocol-feat-001
- environment
- tests
- fixtures
- empty_input
- perfect_input
- ccx-agentic-081
- environment
- tests
- ccx-agentic-082
- environment
- tests
- ccx-agentic-083
- environment
- tests
- ccx-agentic-122
- environment
- tests
- ccx-agentic-127
- environment
- tests
- ccx-agentic-223
- environment
- tests
- ccx-agentic-224
- environment
- tests
- ccx-agentic-225
- environment
- tests
- ccx-agentic-229
- environment
- tests
- ccx-agentic-232
- environment
- tests
- ccx-agentic-290
- environment
- tests
- cilium-policy-audit-logger-feat-001
- environment
- tests
- cilium-policy-quota-feat-001
- environment
- tests
- curl-http3-priority-feat-001
- environment
- tests
- django-rate-limit-middleware-feat-001
- environment
- tests
- envoy-custom-header-filter-feat-001
- environment
- tests
- envoy-grpc-server-impl-001
- environment
- tests
- flink-pricing-window-feat-001
- environment
- tests
- k8s-noschedule-taint-feat-001
- environment
- solution
- tests
- k8s-runtime-object-impl-001
- environment
- tests
- numpy-rolling-median-feat-001
- environment
- tests
- pandas-merge-asof-indicator-feat-001
- environment
- tests
- postgres-copy-csv-header-feat-001
- environment
- tests
- prometheus-silence-bulk-api-feat-001
- environment
- tests
- pytorch-gradient-noise-feat-001
- environment
- tests
- servo-css-container-query-feat-001
- environment
- tests
- servo-scrollend-event-feat-001
- environment
- solution
- tests
- strata-cds-tranche-feat-001
- environment
- tests
- tensorrt-mxfp4-quant-feat-001
- environment
- solution
- tests
- terraform-compact-diff-fmt-feat-001
- environment
- tests
- vscode-custom-fold-region-feat-001
- environment
- tests
- vscode-stale-diagnostics-feat-001
- environment
- solution
- tests
- fix
- ansible-abc-imports-fix-001
- environment
- solution
- tests
- django-modelchoice-fk-fix-001
- environment
- tests
- django-select-for-update-fix-001
- environment
- tests
- element-web-roomheaderbuttons-can-crash-fix-001
- environment
- tests
- element-web-unread-indicators-diverge-fix-001
- environment
- tests
- envoy-dfp-host-leak-fix-001
- environment
- tests
- envoy-udp-proxy-cds-fix-001
- environment
- tests
- flink-window-late-data-fix-001
- environment
- tests
- k8s-dra-scheduler-event-fix-001
- environment
- tests
- kafka-producer-bufpool-fix-001
- environment
- tests
- nodebb-notif-dropdown-fix-001
- environment
- solution
- tests
- nodebb-plugin-validate-fix-001
- environment
- solution
- tests
- pytorch-cudnn-version-fix-001
- environment
- tests
- pytorch-dynamo-keyerror-fix-001
- environment
- tests
- fixtures
- empty_input
- perfect_input
- pytorch-release-210-fix-001
- environment
- tests
- pytorch-relu-gelu-fusion-fix-001
- environment
- tests
- pytorch-tracer-graph-cleanup-fix-001
- environment
- tests
- teleport-users-can-delete-fix-001
- environment
- tests
- terraform-plan-null-unknown-fix-001
- environment
- tests
- refactor
- beam-pipeline-builder-refac-001
- environment
- tests
- ccx-migration-026
- environment
- tests
- ccx-migration-107
- environment
- tests
- ccx-migration-114
- environment
- tests
- ccx-migration-117
- environment
- tests
- ccx-migration-195
- environment
- tests
- ccx-migration-196
- environment
- tests
- ccx-migration-197
- environment
- tests
- ccx-migration-198
- environment
- tests
- ccx-migration-199
- environment
- tests
- ccx-migration-200
- environment
- tests
- ccx-migration-201
- environment
- tests
- ccx-migration-202
- environment
- tests
- ccx-migration-203
- environment
- tests
- ccx-migration-204
- environment
- tests
- ccx-migration-205
- environment
- tests
- ccx-migration-206
- environment
- tests
- ccx-migration-207
- environment
- tests
- ccx-migration-274
- environment
- tests
- ccx-migration-275
- environment
- tests
- ccx-migration-276
- environment
Some content is hidden
Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
5,153 files changed
+601011
-39
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
77 | 77 | | |
78 | 78 | | |
79 | 79 | | |
80 | | - | |
| 80 | + | |
| 81 | + | |
| 82 | + | |
| 83 | + | |
| 84 | + | |
| 85 | + | |
| 86 | + | |
| 87 | + | |
| 88 | + | |
| 89 | + | |
| 90 | + | |
| 91 | + | |
| 92 | + | |
| 93 | + | |
| 94 | + | |
| 95 | + | |
| 96 | + | |
| 97 | + | |
81 | 98 | | |
82 | 99 | | |
83 | 100 | | |
| |||
106 | 123 | | |
107 | 124 | | |
108 | 125 | | |
109 | | - | |
| 126 | + | |
110 | 127 | | |
111 | 128 | | |
112 | 129 | | |
| |||
643 | 660 | | |
644 | 661 | | |
645 | 662 | | |
646 | | - | |
647 | | - | |
648 | | - | |
649 | | - | |
650 | | - | |
651 | | - | |
652 | | - | |
653 | | - | |
654 | | - | |
655 | | - | |
656 | | - | |
657 | | - | |
658 | | - | |
659 | | - | |
660 | | - | |
661 | | - | |
662 | | - | |
663 | | - | |
664 | | - | |
665 | | - | |
666 | | - | |
667 | | - | |
668 | | - | |
669 | | - | |
670 | | - | |
671 | | - | |
672 | | - | |
673 | | - | |
674 | | - | |
| 663 | + | |
| 664 | + | |
| 665 | + | |
| 666 | + | |
| 667 | + | |
| 668 | + | |
| 669 | + | |
| 670 | + | |
| 671 | + | |
| 672 | + | |
| 673 | + | |
| 674 | + | |
| 675 | + | |
| 676 | + | |
| 677 | + | |
| 678 | + | |
| 679 | + | |
| 680 | + | |
| 681 | + | |
| 682 | + | |
| 683 | + | |
| 684 | + | |
| 685 | + | |
| 686 | + | |
675 | 687 | | |
676 | 688 | | |
677 | 689 | | |
| |||
830 | 842 | | |
831 | 843 | | |
832 | 844 | | |
833 | | - | |
834 | | - | |
835 | | - | |
836 | | - | |
837 | | - | |
838 | | - | |
| 845 | + | |
839 | 846 | | |
840 | 847 | | |
841 | 848 | | |
| |||
Lines changed: 38 additions & 0 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
| 1 | + | |
| 2 | + | |
| 3 | + | |
| 4 | + | |
| 5 | + | |
| 6 | + | |
| 7 | + | |
| 8 | + | |
| 9 | + | |
| 10 | + | |
| 11 | + | |
| 12 | + | |
| 13 | + | |
| 14 | + | |
| 15 | + | |
| 16 | + | |
| 17 | + | |
| 18 | + | |
| 19 | + | |
| 20 | + | |
| 21 | + | |
| 22 | + | |
| 23 | + | |
| 24 | + | |
| 25 | + | |
| 26 | + | |
| 27 | + | |
| 28 | + | |
| 29 | + | |
| 30 | + | |
| 31 | + | |
| 32 | + | |
| 33 | + | |
| 34 | + | |
| 35 | + | |
| 36 | + | |
| 37 | + | |
| 38 | + | |
Lines changed: 44 additions & 0 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
| 1 | + | |
| 2 | + | |
| 3 | + | |
| 4 | + | |
| 5 | + | |
| 6 | + | |
| 7 | + | |
| 8 | + | |
| 9 | + | |
| 10 | + | |
| 11 | + | |
| 12 | + | |
| 13 | + | |
| 14 | + | |
| 15 | + | |
| 16 | + | |
| 17 | + | |
| 18 | + | |
| 19 | + | |
| 20 | + | |
| 21 | + | |
| 22 | + | |
| 23 | + | |
| 24 | + | |
| 25 | + | |
| 26 | + | |
| 27 | + | |
| 28 | + | |
| 29 | + | |
| 30 | + | |
| 31 | + | |
| 32 | + | |
| 33 | + | |
| 34 | + | |
| 35 | + | |
| 36 | + | |
| 37 | + | |
| 38 | + | |
| 39 | + | |
| 40 | + | |
| 41 | + | |
| 42 | + | |
| 43 | + | |
| 44 | + | |
Lines changed: 35 additions & 0 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
| 1 | + | |
| 2 | + | |
| 3 | + | |
| 4 | + | |
| 5 | + | |
| 6 | + | |
| 7 | + | |
| 8 | + | |
| 9 | + | |
| 10 | + | |
| 11 | + | |
| 12 | + | |
| 13 | + | |
| 14 | + | |
| 15 | + | |
| 16 | + | |
| 17 | + | |
| 18 | + | |
| 19 | + | |
| 20 | + | |
| 21 | + | |
| 22 | + | |
| 23 | + | |
| 24 | + | |
| 25 | + | |
| 26 | + | |
| 27 | + | |
| 28 | + | |
| 29 | + | |
| 30 | + | |
| 31 | + | |
| 32 | + | |
| 33 | + | |
| 34 | + | |
| 35 | + | |
Lines changed: 36 additions & 0 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
| 1 | + | |
| 2 | + | |
| 3 | + | |
| 4 | + | |
| 5 | + | |
| 6 | + | |
| 7 | + | |
| 8 | + | |
| 9 | + | |
| 10 | + | |
| 11 | + | |
| 12 | + | |
| 13 | + | |
| 14 | + | |
| 15 | + | |
| 16 | + | |
| 17 | + | |
| 18 | + | |
| 19 | + | |
| 20 | + | |
| 21 | + | |
| 22 | + | |
| 23 | + | |
| 24 | + | |
| 25 | + | |
| 26 | + | |
| 27 | + | |
| 28 | + | |
| 29 | + | |
| 30 | + | |
| 31 | + | |
| 32 | + | |
| 33 | + | |
| 34 | + | |
| 35 | + | |
| 36 | + | |
Lines changed: 46 additions & 0 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
| 1 | + | |
| 2 | + | |
| 3 | + | |
| 4 | + | |
| 5 | + | |
| 6 | + | |
| 7 | + | |
| 8 | + | |
| 9 | + | |
| 10 | + | |
| 11 | + | |
| 12 | + | |
| 13 | + | |
| 14 | + | |
| 15 | + | |
| 16 | + | |
| 17 | + | |
| 18 | + | |
| 19 | + | |
| 20 | + | |
| 21 | + | |
| 22 | + | |
| 23 | + | |
| 24 | + | |
| 25 | + | |
| 26 | + | |
| 27 | + | |
| 28 | + | |
| 29 | + | |
| 30 | + | |
| 31 | + | |
| 32 | + | |
| 33 | + | |
| 34 | + | |
| 35 | + | |
| 36 | + | |
| 37 | + | |
| 38 | + | |
| 39 | + | |
| 40 | + | |
| 41 | + | |
| 42 | + | |
| 43 | + | |
| 44 | + | |
| 45 | + | |
| 46 | + | |
0 commit comments