|
5 | 5 | import subprocess |
6 | 6 | import shutil |
7 | 7 | from pathlib import Path |
| 8 | +from typing import Literal |
8 | 9 |
|
9 | 10 |
|
10 | 11 | parser = argparse.ArgumentParser() |
11 | | -parser.add_argument('--wasmfuzz', default="wasmfuzz") |
12 | | -parser.add_argument('--harness-dir', default="./out") |
13 | | -parser.add_argument('--corpus-dir', default="./corpus") |
14 | | -parser.add_argument('--crashes-dir', default="./crashes") |
15 | | -parser.add_argument('--tags', default="./tags.csv") |
16 | | -parser.add_argument('--only-target', default=None) |
| 12 | +parser.add_argument("--wasmfuzz", default="wasmfuzz") |
| 13 | +parser.add_argument("--harness-dir", default="./out") |
| 14 | +parser.add_argument("--corpus-dir", default="./corpus") |
| 15 | +parser.add_argument("--crashes-dir", default="./crashes") |
| 16 | +parser.add_argument("--ooms-dir", default="./ooms") |
| 17 | +parser.add_argument("--timeouts-dir", default="./timeouts") |
| 18 | +parser.add_argument("--tags", default="./tags.csv") |
| 19 | +parser.add_argument("--only-target", default=None) |
| 20 | +parser.add_argument("--mode", default="crash") |
17 | 21 |
|
18 | 22 | args = parser.parse_args() |
19 | 23 | harness_dir = Path(args.harness_dir) |
20 | 24 | corpus_dir = Path(args.corpus_dir) |
21 | 25 | crashes_dir = Path(args.crashes_dir) |
| 26 | +ooms_dir = Path(args.ooms_dir) |
| 27 | +timeouts_dir = Path(args.timeouts_dir) |
22 | 28 | tags_path = Path(args.tags) |
23 | | -assert all(x.exists() for x in [harness_dir, crashes_dir, tags_path]) |
| 29 | +mode = args.mode |
| 30 | +# Literal["crash"] | Literal["oom"] | Literal["timeout"] |
| 31 | +assert mode in ["crash", "oom", "timeout"] |
| 32 | +mode_path = {"crash": crashes_dir, "oom": ooms_dir, "timeout": timeouts_dir}[mode] |
| 33 | +mode_path.mkdir(exist_ok=True) |
| 34 | +csv_key = {"crash": "crashed", "timeout": "timeout", "oom": "oom"}[mode] |
| 35 | + |
| 36 | +assert all(x.exists() for x in [harness_dir, mode_path, tags_path]) |
24 | 37 | if not corpus_dir.exists(): |
25 | 38 | print("[!] corpus directory not found, can't cross-check") |
26 | 39 |
|
27 | | -def check_reproduces(harness_path, crash_path): |
| 40 | + |
| 41 | +def check_reproduces(harness_path, crash_path, mode="crash"): |
28 | 42 | print(f"[*] reproducing {crash_path} ...") |
29 | | - res = subprocess.check_output([ |
30 | | - args.wasmfuzz, |
31 | | - "run-input", |
32 | | - harness_path, |
33 | | - crash_path |
34 | | - ]) |
35 | | - print("[>]", res.decode('utf8', errors='ignore').splitlines()[-1].strip()) |
36 | | - if b"execution trapped with" in res and b"which indicates that the target crashed" in res: |
37 | | - return True |
| 43 | + res = subprocess.check_output( |
| 44 | + [args.wasmfuzz, "run-input", harness_path, crash_path] |
| 45 | + ) |
| 46 | + print("[>]", res.decode("utf8", errors="ignore").splitlines()[-1].strip()) |
| 47 | + if mode == "crash": |
| 48 | + return ( |
| 49 | + b"execution trapped with" in res |
| 50 | + and b"which indicates that the target crashed" in res |
| 51 | + ) |
| 52 | + if mode == "oom": |
| 53 | + raise RuntimeError("TODO") |
| 54 | + if mode == "timeout": |
| 55 | + return b"execution stopped with OutOfFuel" in res |
| 56 | + |
38 | 57 |
|
39 | 58 | harness_paths = sorted(list(harness_dir.glob("*.wasm"))) |
40 | 59 |
|
41 | 60 | verified = set() |
42 | 61 |
|
| 62 | + |
43 | 63 | def check_harness(harness_path): |
44 | 64 | harness = harness_path.name |
45 | | - crash_path = crashes_dir / f"{harness_path.stem}.bin" |
| 65 | + crash_path = mode_path / f"{harness_path.stem}.bin" |
46 | 66 | if crash_path.exists(): |
47 | | - if check_reproduces(harness_path, crash_path): |
| 67 | + if check_reproduces(harness_path, crash_path, mode=mode): |
48 | 68 | verified.add(harness) |
49 | 69 | return |
50 | 70 |
|
51 | | - if not corpus_dir.exists(): return |
| 71 | + if not corpus_dir.exists(): |
| 72 | + return |
52 | 73 | for corpus_snapshot in corpus_dir.glob(f"{harness_path.stem}/snapshot-*.csv"): |
53 | 74 | with open(corpus_snapshot) as f: |
54 | 75 | for elem in csv.DictReader(f): |
55 | | - if elem["crashed"] and int(elem["crashed"]): |
56 | | - input_path = corpus_snapshot.parent / corpus_snapshot.stem / elem["input"] |
57 | | - assert check_reproduces(harness_path, input_path) |
| 76 | + if elem[csv_key] and int(elem[csv_key]): |
| 77 | + input_path = ( |
| 78 | + corpus_snapshot.parent / corpus_snapshot.stem / elem["input"] |
| 79 | + ) |
| 80 | + assert check_reproduces(harness_path, input_path, mode=mode) |
58 | 81 | verified.add(harness) |
59 | 82 | shutil.copy(input_path, crash_path) |
60 | 83 | return |
61 | 84 |
|
| 85 | + |
62 | 86 | for harness_path in harness_paths: |
63 | 87 | if args.only_target is not None and args.only_target not in harness_path.name: |
64 | 88 | continue |
65 | 89 | check_harness(harness_path) |
66 | 90 |
|
67 | 91 | mismatches = set() |
68 | | -with open(tags_path) as f: |
69 | | - for row in csv.DictReader(f): |
70 | | - harness = Path(row["harness"]).name |
71 | | - if args.only_target is not None and args.only_target not in harness: |
72 | | - continue |
73 | | - tagged_crash = row["crashing"] and bool(int(row["crashing"])) |
74 | | - if tagged_crash != (harness in verified): |
75 | | - mismatches.add(harness) |
| 92 | +if mode == "crash": |
| 93 | + with open(tags_path) as f: |
| 94 | + for row in csv.DictReader(f): |
| 95 | + harness = Path(row["harness"]).name |
| 96 | + if args.only_target is not None and args.only_target not in harness: |
| 97 | + continue |
| 98 | + tagged_crash = row["crashing"] and bool(int(row["crashing"])) |
| 99 | + if tagged_crash != (harness in verified): |
| 100 | + mismatches.add(harness) |
76 | 101 |
|
77 | | -for crash_path in crashes_dir.glob("*.bin"): |
78 | | - if args.only_target is not None and args.only_target not in crash_path.stem: |
| 102 | +for input_path in mode_path.glob("*.bin"): |
| 103 | + if args.only_target is not None and args.only_target not in input_path.stem: |
79 | 104 | continue |
80 | | - harness = crash_path.stem |
| 105 | + harness = input_path.stem |
81 | 106 | harness_path = harness_dir / f"{harness}.wasm" |
82 | 107 | if not harness_path.exists(): |
83 | 108 | print(f"[!] {harness}: Missing harness!") |
84 | 109 |
|
85 | 110 | print() |
86 | | -print("-"*80) |
| 111 | +print("-" * 80) |
87 | 112 | print(f"{len(mismatches)} mismatches found" + ".:"[bool(mismatches)]) |
88 | 113 | with open(tags_path) as f: |
89 | 114 | for row in csv.DictReader(f): |
90 | 115 | harness = Path(row["harness"]) |
91 | 116 | if args.only_target is not None and args.only_target not in harness.name: |
92 | 117 | continue |
93 | | - crash_path = crashes_dir / f"{harness.stem}.bin" |
| 118 | + input_path = mode_path / f"{harness.stem}.bin" |
94 | 119 | tagged_crash = row["crashing"] and bool(int(row["crashing"])) |
95 | 120 | if tagged_crash and harness.name not in verified: |
96 | | - if crash_path.exists(): |
| 121 | + if input_path.exists(): |
97 | 122 | print(f"[!] {harness.name}: Input for tagged crash didn't reproduce!") |
98 | 123 | else: |
99 | 124 | print(f"[!] {harness.name}: Missing reproducer for tagged harness!") |
100 | | - if crash_path.exists() and not tagged_crash: |
| 125 | + if input_path.exists() and not tagged_crash: |
101 | 126 | if harness.name in verified: |
102 | 127 | print(f"[!] {harness.name}: Crash reproduced but not tagged!") |
103 | 128 | else: |
104 | | - print(f"[!] {crash_path} exists but doesn't reproduce!") |
| 129 | + print(f"[!] {input_path} exists but doesn't reproduce!") |
0 commit comments