From ab721d5c06cc3a49c385dde29fdd304334b65bf8 Mon Sep 17 00:00:00 2001 From: Pedro Naves Date: Sat, 28 Feb 2026 12:45:31 -0300 Subject: [PATCH] fix: normalize test names with strip() before pass/fail comparison --- swe_bench_pro_eval.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/swe_bench_pro_eval.py b/swe_bench_pro_eval.py index cac63a62..302fa132 100644 --- a/swe_bench_pro_eval.py +++ b/swe_bench_pro_eval.py @@ -552,9 +552,9 @@ def main(): eval_results[instance_id] = False else: raw_sample = raw_sample_df.loc[instance_id] - passed_tests = {x["name"] for x in output["tests"] if x["status"] == "PASSED"} - f2p = set(eval(raw_sample["fail_to_pass"])) - p2p = set(eval(raw_sample["pass_to_pass"])) + passed_tests = {x["name"].strip() for x in output["tests"] if x["status"] == "PASSED"} + f2p = {t.strip() for t in eval(raw_sample["fail_to_pass"])} + p2p = {t.strip() for t in eval(raw_sample["pass_to_pass"])} result = (f2p | p2p) <= passed_tests eval_results[instance_id] = result