Skip to content

Commit b2b168b

Browse files
sjarmakclaude
andcommitted
Hydrate oracle data for 12 scaling-gap Org tasks and fix migration-294 Dockerfile
- Ran hydrate_task_specs.py to populate oracle fields in task_spec.json for all 12 scaling-gap tasks (285-296). Verifier was scoring 0.0 because oracle was empty. - Removed conflicting `nodejs npm` from migration-294 Dockerfiles (dpkg conflict with nodesource Node.js 22 installed during agent setup). - All 12 tasks now have valid paired runs promoted to official. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent fa64168 commit b2b168b

File tree

16 files changed

+3385
-168
lines changed

16 files changed

+3385
-168
lines changed

benchmarks/csb_org_compliance/ccx-compliance-286/tests/task_spec.json

Lines changed: 220 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,34 +6,240 @@
66
"mcp_suite": "csb_org_compliance",
77
"prd": {
88
"user_story": "As a developer, I want to: Audit the privilege and RBAC enforcement in pingcap/tidb. Find all Go source files under pkg/privilege/ and pkg/planner/ that implement privilege checking for SQL statements. Identify: the PrivilegeManager interface, the MySQLPrivilege cache, the RequestVerification method and its callers, the role graph resolution logic, and any files that map SQL statement types to required privileges. For each file report the path and its role in the privilege check flow.",
9-
"constraints": ["Provide specific file paths and repository names in your answer.", "Write your findings to /workspace/answer.json."],
9+
"constraints": [
10+
"Provide specific file paths and repository names in your answer.",
11+
"Write your findings to /workspace/answer.json."
12+
],
1013
"success_definition": "Agent successfully identifies relevant files and symbols across all repos in the tidb-database fixture.",
1114
"seed_prompt": "Audit the privilege and RBAC enforcement in pingcap/tidb. Find all Go source files under pkg/privilege/ and pkg/planner/ that implement privilege checking for SQL statements. Identify: the PrivilegeManager interface, the MySQLPrivilege cache, the RequestVerification method and its callers, the role graph resolution logic, and any files that map SQL statement types to required privileges. For each file report the path and its role in the privilege check flow."
1215
},
1316
"artifacts": {
1417
"repo_set_id": "tidb-database",
1518
"oracle": {
16-
"required_files": [],
17-
"required_symbols": [],
19+
"required_files": [
20+
{
21+
"repo": "sg-evals/tidb--v8.5.0",
22+
"path": "pkg/privilege/privilege.go"
23+
},
24+
{
25+
"repo": "sg-evals/tidb--v8.5.0",
26+
"path": "pkg/privilege/privileges/privileges.go"
27+
},
28+
{
29+
"repo": "sg-evals/tidb--v8.5.0",
30+
"path": "pkg/privilege/privileges/cache.go"
31+
},
32+
{
33+
"repo": "sg-evals/tidb--v8.5.0",
34+
"path": "pkg/privilege/privileges/errors.go"
35+
},
36+
{
37+
"repo": "sg-evals/tidb--v8.5.0",
38+
"path": "pkg/privilege/conn/conn.go"
39+
},
40+
{
41+
"repo": "sg-evals/tidb--v8.5.0",
42+
"path": "pkg/planner/core/optimizer.go"
43+
},
44+
{
45+
"repo": "sg-evals/tidb--v8.5.0",
46+
"path": "pkg/planner/optimize.go"
47+
},
48+
{
49+
"repo": "sg-evals/tidb--v8.5.0",
50+
"path": "pkg/planner/core/planbuilder.go"
51+
},
52+
{
53+
"repo": "sg-evals/tidb--v8.5.0",
54+
"path": "pkg/planner/core/logical_plan_builder.go"
55+
},
56+
{
57+
"repo": "sg-evals/tidb--v8.5.0",
58+
"path": "pkg/planner/core/preprocess.go"
59+
},
60+
{
61+
"repo": "sg-evals/tidb--v8.5.0",
62+
"path": "pkg/planner/core/point_get_plan.go"
63+
},
64+
{
65+
"repo": "sg-evals/tidb--v8.5.0",
66+
"path": "pkg/planner/core/expression_codec_fn.go"
67+
},
68+
{
69+
"repo": "sg-evals/tidb--v8.5.0",
70+
"path": "pkg/planner/core/plan_cache.go"
71+
},
72+
{
73+
"repo": "sg-evals/tidb--v8.5.0",
74+
"path": "pkg/planner/core/plan_cache_utils.go"
75+
},
76+
{
77+
"repo": "sg-evals/tidb--v8.5.0",
78+
"path": "pkg/planner/core/expression_rewriter.go"
79+
}
80+
],
81+
"required_symbols": [
82+
{
83+
"repo": "sg-evals/tidb--v8.5.0",
84+
"path": "pkg/privilege/privilege.go",
85+
"symbol": "Manager"
86+
},
87+
{
88+
"repo": "sg-evals/tidb--v8.5.0",
89+
"path": "pkg/privilege/privileges/cache.go",
90+
"symbol": "MySQLPrivilege"
91+
},
92+
{
93+
"repo": "sg-evals/tidb--v8.5.0",
94+
"path": "pkg/privilege/privileges/cache.go",
95+
"symbol": "Handle"
96+
},
97+
{
98+
"repo": "sg-evals/tidb--v8.5.0",
99+
"path": "pkg/privilege/privileges/cache.go",
100+
"symbol": "immutable"
101+
},
102+
{
103+
"repo": "sg-evals/tidb--v8.5.0",
104+
"path": "pkg/privilege/privileges/cache.go",
105+
"symbol": "roleGraphEdgesTable"
106+
},
107+
{
108+
"repo": "sg-evals/tidb--v8.5.0",
109+
"path": "pkg/privilege/privileges/privileges.go",
110+
"symbol": "UserPrivileges"
111+
},
112+
{
113+
"repo": "sg-evals/tidb--v8.5.0",
114+
"path": "pkg/planner/core/planbuilder.go",
115+
"symbol": "visitInfo"
116+
},
117+
{
118+
"repo": "sg-evals/tidb--v8.5.0",
119+
"path": "pkg/planner/core/optimizer.go",
120+
"symbol": "CheckPrivilege"
121+
},
122+
{
123+
"repo": "sg-evals/tidb--v8.5.0",
124+
"path": "pkg/planner/core/optimizer.go",
125+
"symbol": "VisitInfo4PrivCheck"
126+
},
127+
{
128+
"repo": "sg-evals/tidb--v8.5.0",
129+
"path": "pkg/planner/core/optimizer.go",
130+
"symbol": "CheckTableLock"
131+
}
132+
],
18133
"required_references": [],
19-
"dependency_chains": []
134+
"dependency_chains": [
135+
{
136+
"steps": [
137+
{
138+
"repo": "sg-evals/tidb--v8.5.0",
139+
"path": "pkg/planner/optimize.go",
140+
"symbol": "optimize"
141+
},
142+
{
143+
"repo": "sg-evals/tidb--v8.5.0",
144+
"path": "pkg/planner/core/optimizer.go",
145+
"symbol": "CheckPrivilege"
146+
},
147+
{
148+
"repo": "sg-evals/tidb--v8.5.0",
149+
"path": "pkg/privilege/privilege.go",
150+
"symbol": "Manager.RequestVerification"
151+
},
152+
{
153+
"repo": "sg-evals/tidb--v8.5.0",
154+
"path": "pkg/privilege/privileges/privileges.go",
155+
"symbol": "UserPrivileges.RequestVerification"
156+
},
157+
{
158+
"repo": "sg-evals/tidb--v8.5.0",
159+
"path": "pkg/privilege/privileges/cache.go",
160+
"symbol": "MySQLPrivilege.RequestVerification"
161+
},
162+
{
163+
"repo": "sg-evals/tidb--v8.5.0",
164+
"path": "pkg/privilege/privileges/cache.go",
165+
"symbol": "MySQLPrivilege.FindAllUserEffectiveRoles"
166+
},
167+
{
168+
"repo": "sg-evals/tidb--v8.5.0",
169+
"path": "pkg/privilege/privileges/cache.go",
170+
"symbol": "MySQLPrivilege.FindAllRole"
171+
},
172+
{
173+
"repo": "sg-evals/tidb--v8.5.0",
174+
"path": "pkg/planner/core/planbuilder.go",
175+
"symbol": "PlanBuilder.GetVisitInfo"
176+
},
177+
{
178+
"repo": "sg-evals/tidb--v8.5.0",
179+
"path": "pkg/planner/core/logical_plan_builder.go",
180+
"symbol": "appendVisitInfo"
181+
},
182+
{
183+
"repo": "sg-evals/tidb--v8.5.0",
184+
"path": "pkg/planner/core/logical_plan_builder.go",
185+
"symbol": "appendDynamicVisitInfo"
186+
},
187+
{
188+
"repo": "sg-evals/tidb--v8.5.0",
189+
"path": "pkg/planner/core/optimizer.go",
190+
"symbol": "VisitInfo4PrivCheck"
191+
}
192+
]
193+
}
194+
]
20195
}
21196
},
22197
"evaluation": {
23-
"modes": ["deterministic"],
198+
"modes": [
199+
"deterministic"
200+
],
24201
"checks": [
25-
{
26-
"type": "file_set_match",
27-
"params": {
28-
"search_pattern": "",
29-
"file_filter": ""
30-
}
31-
}
32-
],
202+
{
203+
"type": "file_set_match",
204+
"params": {
205+
"search_pattern": "",
206+
"file_filter": ""
207+
}
208+
},
209+
{
210+
"type": "symbol_resolution",
211+
"params": {}
212+
},
213+
{
214+
"type": "keyword_presence",
215+
"params": {
216+
"required_keywords": [
217+
"Manager",
218+
"MySQLPrivilege",
219+
"Handle",
220+
"immutable",
221+
"roleGraphEdgesTable",
222+
"UserPrivileges",
223+
"visitInfo",
224+
"CheckPrivilege",
225+
"VisitInfo4PrivCheck",
226+
"CheckTableLock"
227+
]
228+
}
229+
},
230+
{
231+
"type": "dependency_chain",
232+
"params": {}
233+
}
234+
],
33235
"eval_script": "/tests/eval.sh",
34236
"pass_exit_code": 0
35237
},
36238
"logging": {
37-
"required_metrics": ["oracle_coverage", "time_to_first_oracle_hit_ms", "unique_repos_touched"]
239+
"required_metrics": [
240+
"oracle_coverage",
241+
"time_to_first_oracle_hit_ms",
242+
"unique_repos_touched"
243+
]
38244
}
39245
}

0 commit comments

Comments
 (0)