Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmark/task_19.hs.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Ad-hoc

# signature
```haskell
quot :: Integral => a -> a -> a
quot :: Integral a => a -> a -> a
```

# code
Expand Down
2 changes: 1 addition & 1 deletion benchmark/task_59.hs.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Parametric

# signature
```haskell
foldl :: (b -> a -> b) -> b -> t a -> b
foldl :: Foldable t => (b -> a -> b) -> b -> t a -> b
```

# code
Expand Down
8 changes: 3 additions & 5 deletions scripts/error_cls.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
from os.path import abspath, dirname, basename, join as pjoin
from os.path import abspath, basename, join as pjoin
import os

import orjson
from pydantic import BaseModel
from openai import OpenAI
import fire
from tqdm import tqdm

from tfbench import (
analysis_multi_runs,
load_tfb_from_hf,
load_gen_results_jsonl,
evaluate,
LMAnswer,
)
from tqdm import tqdm

from tfbench.evaluation import get_incorrect
from tfbench.common import get_prompt as get_task_prompt, BenchmarkTask

Expand Down
4 changes: 3 additions & 1 deletion scripts/preprocess_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ def main(input_raw_benchmark_path: str = "benchmark", output_path: str = "tfb.js

# read in all files ending with .md in the input_raw_benchmark_path
tasks: list[BenchmarkTask] = []
for file in os.listdir(input_raw_benchmark_path):
files = os.listdir(input_raw_benchmark_path)
files_w_order = sorted(files)
for file in files_w_order:
if not file.endswith(".hs.md"):
continue
with open(os.path.join(input_raw_benchmark_path, file), "r") as f:
Expand Down
4 changes: 2 additions & 2 deletions src/tfbench/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,5 @@ def load_tfb_from_hf(split: str = "base") -> list[BenchmarkTask]:

def load_gen_results_jsonl(result_file: str) -> list[LMAnswer | None]:
"""load generation results from a jsonl file"""
objs: list[dict[str, str | None]] = orjsonl.load(result_file) # type: ignore
return [from_dict(LMAnswer, obj) for obj in objs]
objs: list[dict[str, str]] = orjsonl.load(result_file) # type: ignore
return [from_dict(LMAnswer, obj) if "answer" in obj else None for obj in objs]