Skip to content

Commit a0c4f9e

Browse files
authored
fix: groundtruth error (#63)
* fix: missing type class and typevar in benchmark * fix: order of tasks in tfb * fix: allow load_gen_results to load error * remove error_cls unused imports
1 parent b50c21d commit a0c4f9e

5 files changed

Lines changed: 10 additions & 10 deletions

File tree

benchmark/task_19.hs.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ Ad-hoc
77

88
# signature
99
```haskell
10-
quot :: Integral => a -> a -> a
10+
quot :: Integral a => a -> a -> a
1111
```
1212

1313
# code

benchmark/task_59.hs.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ Parametric
77

88
# signature
99
```haskell
10-
foldl :: (b -> a -> b) -> b -> t a -> b
10+
foldl :: Foldable t => (b -> a -> b) -> b -> t a -> b
1111
```
1212

1313
# code

scripts/error_cls.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,17 @@
1-
from os.path import abspath, dirname, basename, join as pjoin
1+
from os.path import abspath, basename, join as pjoin
22
import os
33

44
import orjson
55
from pydantic import BaseModel
66
from openai import OpenAI
77
import fire
8+
from tqdm import tqdm
9+
810
from tfbench import (
9-
analysis_multi_runs,
1011
load_tfb_from_hf,
1112
load_gen_results_jsonl,
12-
evaluate,
1313
LMAnswer,
1414
)
15-
from tqdm import tqdm
16-
1715
from tfbench.evaluation import get_incorrect
1816
from tfbench.common import get_prompt as get_task_prompt, BenchmarkTask
1917

scripts/preprocess_benchmark.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ def main(input_raw_benchmark_path: str = "benchmark", output_path: str = "tfb.js
1414

1515
# read in all files ending with .md in the input_raw_benchmark_path
1616
tasks: list[BenchmarkTask] = []
17-
for file in os.listdir(input_raw_benchmark_path):
17+
files = os.listdir(input_raw_benchmark_path)
18+
files_w_order = sorted(files)
19+
for file in files_w_order:
1820
if not file.endswith(".hs.md"):
1921
continue
2022
with open(os.path.join(input_raw_benchmark_path, file), "r") as f:

src/tfbench/load.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,5 @@ def load_tfb_from_hf(split: str = "base") -> list[BenchmarkTask]:
3535

3636
def load_gen_results_jsonl(result_file: str) -> list[LMAnswer | None]:
3737
"""load generation results from a jsonl file"""
38-
objs: list[dict[str, str | None]] = orjsonl.load(result_file) # type: ignore
39-
return [from_dict(LMAnswer, obj) for obj in objs]
38+
objs: list[dict[str, str]] = orjsonl.load(result_file) # type: ignore
39+
return [from_dict(LMAnswer, obj) if "answer" in obj else None for obj in objs]

0 commit comments

Comments
 (0)