@@ -1056,6 +1056,67 @@ def check_ob_negated_solutions(tasks: list[Path]) -> CriterionResult:
10561056 )
10571057
10581058
1059+ def check_og_determinism (tasks : list [Path ]) -> CriterionResult :
1060+ """O.g: Verifiers produce deterministic results (no unseeded randomness)."""
1061+ issues = []
1062+ # Non-deterministic commands that affect scoring when used in comparisons
1063+ NONDETERMINISTIC_CMDS = re .compile (
1064+ r'\$RANDOM|\buuidgen\b|\bshuf\b'
1065+ )
1066+ # date command substitution used in comparisons/assertions (not just logging)
1067+ DATE_IN_COMPARISON = re .compile (
1068+ r'(?:\[\s*.*\$\(date\b|==\s*.*\$\(date\b|!=\s*.*\$\(date\b)'
1069+ )
1070+ # mktemp used in assertions/comparisons (not just for scratch files)
1071+ MKTEMP_IN_ASSERT = re .compile (
1072+ r'(?:diff|cmp|==|!=|grep|assert).*\$\(mktemp|mktemp.*(?:diff|cmp|==|!=|grep|assert)'
1073+ )
1074+ for task_dir in tasks :
1075+ verifier = _get_primary_verifier (task_dir )
1076+ if not verifier :
1077+ continue
1078+
1079+ content = verifier .read_text (errors = "replace" )
1080+ task_name = task_dir .name
1081+ task_issues = []
1082+
1083+ if verifier .suffix == ".sh" :
1084+ if NONDETERMINISTIC_CMDS .search (content ):
1085+ matches = NONDETERMINISTIC_CMDS .findall (content )
1086+ task_issues .append (f"non-deterministic command: { matches [0 ]} " )
1087+
1088+ if DATE_IN_COMPARISON .search (content ):
1089+ task_issues .append ("date output used in comparison/assertion" )
1090+
1091+ if MKTEMP_IN_ASSERT .search (content ):
1092+ task_issues .append ("mktemp path used in assertion/comparison" )
1093+
1094+ elif verifier .suffix == ".py" :
1095+ # Flag unseeded random usage
1096+ if re .search (r'\brandom\.\w+\(' , content ):
1097+ # Check if random is seeded
1098+ if not re .search (r'random\.seed\(' , content ):
1099+ task_issues .append ("unseeded random module usage" )
1100+ # Flag uuid usage in assertions
1101+ if re .search (r'\buuid\.\w+\(' , content ):
1102+ task_issues .append ("uuid generation in verifier" )
1103+
1104+ if task_issues :
1105+ issues .append (f"{ task_name } : { '; ' .join (task_issues )} " )
1106+
1107+ if not issues :
1108+ return CriterionResult (
1109+ criterion_id = "O.g" , status = Status .PASS ,
1110+ evidence = f"No non-deterministic patterns found across { len (tasks )} verifiers" ,
1111+ )
1112+ return CriterionResult (
1113+ criterion_id = "O.g" , status = Status .WARN ,
1114+ evidence = "\n " .join (issues [:10 ]),
1115+ remediation = "Remove non-deterministic commands from verifier scoring logic, or seed random generators" ,
1116+ details = {"issue_count" : len (issues ), "issues" : issues [:20 ]},
1117+ )
1118+
1119+
10591120# ---------------------------------------------------------------------------
10601121# Main auditor
10611122# ---------------------------------------------------------------------------
@@ -1071,6 +1132,7 @@ def check_ob_negated_solutions(tasks: list[Path]) -> CriterionResult:
10711132 "O.a" : check_oa_equivalent_solutions ,
10721133 "O.b" : check_ob_negated_solutions ,
10731134 "O.c" : check_oc_empty_solution_rejected ,
1135+ "O.g" : check_og_determinism ,
10741136 "O.d" : check_od_error_handling ,
10751137 "O.e" : check_oe_multiple_assertions ,
10761138 "O.h" : check_oh_reward_format ,
@@ -1099,7 +1161,7 @@ def check_ob_negated_solutions(tasks: list[Path]) -> CriterionResult:
10991161}
11001162
11011163# Semi-automated / manual checks (skip with note)
1102- SKIP_CHECKS = {"T.2" , "T.9" , "O.f" , "O.g" , " R.6" }
1164+ SKIP_CHECKS = {"T.2" , "T.9" , "O.f" , "R.6" }
11031165
11041166
11051167def audit_suite (suite : str , dimension : Optional [Dimension ] = None ) -> AuditReport :
0 commit comments