Skip to content

Commit 49c66a6

Browse files
authored
Merge pull request #135 from devdanzin/oom-seq-randomize
oom-seq: --oom-seq-randomize (per-sequence length + window variety)
2 parents edd4684 + 63f0d29 commit 49c66a6

5 files changed

Lines changed: 119 additions & 21 deletions

File tree

CLAUDE.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ PYTHONPATH=$PWD python fuzzers/fusil-python-threaded --unsafe [options]
6969
# --jit-mode synthesize|variational|legacy|all (see JIT section)
7070
# --oom-fuzz OOM (allocation-failure) injection mode (see OOM section)
7171
# --oom-seq stateful call SEQUENCES (Phase 4): several calls per scan under
72-
# one failure window (found OOM-0036); --oom-seq-len/--oom-window
72+
# one failure window (found OOM-0036); --oom-seq-len/--oom-window;
73+
# --oom-seq-randomize varies len/window per sequence (those = maxes)
7374
# --oom-dedup-catalog F in-loop crash dedupe/labeling vs known_sites.tsv; add
7475
# --oom-dedup-prune to drop dups, --oom-dedup-resolve-segv
7576
# to resolve segvs via gdb so they dedupe too
@@ -164,7 +165,11 @@ str-subclass constructor bug).
164165
**bounded failure window**`set_nomemory(start, start+k)` fails `k` allocations then
165166
*resumes* — so an allocation failure in one call can corrupt state a *later* call trips over
166167
(the cross-call "stale state" class the single-call sweep can't reach). `--oom-seq-len`
167-
(steps, default 3) / `--oom-window` (`k`, default 1; `0` = legacy fail-forever). Opt-in;
168+
(steps, default 3) / `--oom-window` (`k`, default 1; `0` = legacy fail-forever). Add
169+
`--oom-seq-randomize` to randomize each emitted sequence's length (in `[1, --oom-seq-len]`)
170+
and window (in `[1, --oom-window]`) independently, so one instance covers a range of
171+
sequence shapes (the configured values become upper bounds; per-sequence window is passed
172+
to `oom_run(..., window=k)`). Opt-in;
168173
default output unchanged without it. It found **OOM-0036** — a `list.append()` double-free
169174
under `MemoryError` in the `_CALL_LIST_APPEND` bytecode, filed as python/cpython#151818.
170175
Design + the windowed-`set_nomemory` semantics: **`doc/oom-sequences.md`**.

doc/oom-sequences.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,14 @@ cheaper and more targeted, and it's the right first lever — fold it into Idea
128128
injection primitive (`--oom-window`, default 1), optionally randomizing `k` per sequence
129129
for extra spread. Frame it as coverage, not noise.
130130

131+
> **Implemented (`--oom-seq-randomize`).** This per-sequence randomization now exists: with
132+
> the flag set, each emitted sequence draws its window uniformly from `[1, --oom-window]` and
133+
> its length from `[1, --oom-seq-len]` independently (the configured values become upper
134+
> bounds; the window is passed per call as `oom_run(..., window=k)`). One instance thus covers
135+
> a range of sequence shapes instead of a single static config — strictly more general (a
136+
> fixed config is just the `min == max` case). Default off; generated output is unchanged
137+
> without it.
138+
131139
**Cost to be honest about:** auto-resume means the crash can occur *far* from the
132140
injection point (causal distance grows with `k` and sequence length), which weakens the
133141
"which allocation caused it" link and makes minimization harder. `k = 1` keeps it tight;

fusil/python/__init__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,15 @@ def createFuzzerOptions(self, parser: OptionParserWithSections) -> None:
401401
type="int",
402402
default=1,
403403
)
404+
oom_options.add_option(
405+
"--oom-seq-randomize",
406+
help="Randomize each emitted sequence's length (in [1, --oom-seq-len]) and "
407+
"failure window (in [1, --oom-window]) independently, so one instance covers a "
408+
"range of sequence shapes instead of a single static config. The --oom-seq-len / "
409+
"--oom-window values become the upper bounds (default: off)",
410+
action="store_true",
411+
default=False,
412+
)
404413
oom_options.add_option(
405414
"--oom-verbose",
406415
help="In OOM mode, also print the sweep start index before each "

fusil/python/write_python_code.py

Lines changed: 49 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -567,27 +567,29 @@ def oom_call(label, func, *args, **kwargs):
567567
f"""
568568
_OOM_WINDOW = {self.options.oom_window}
569569
570-
def oom_run(label, thunk):
570+
def oom_run(label, thunk, window=_OOM_WINDOW):
571571
# Stateful OOM sequence (Phase 4): sweep a bounded failure window
572572
# across a multi-step thunk so a failure in one step can corrupt
573-
# state a later step trips over. set_nomemory(start, start+_OOM_WINDOW)
574-
# fails _OOM_WINDOW allocations then resumes succeeding, so steps after
575-
# the burst run on the damaged state (_OOM_WINDOW == 0 -> fail forever,
576-
# the legacy single-call semantics). The thunk guards each step
577-
# internally so the tail still runs after an earlier step raises; a real
578-
# crash (segfault/abort) terminates the process and is scored.
573+
# state a later step trips over. set_nomemory(start, start+window)
574+
# fails `window` allocations then resumes succeeding, so steps after
575+
# the burst run on the damaged state (window == 0 -> fail forever,
576+
# the legacy single-call semantics). `window` defaults to _OOM_WINDOW
577+
# but is passed per-sequence when --oom-seq-randomize is set. The thunk
578+
# guards each step internally so the tail still runs after an earlier
579+
# step raises; a real crash (segfault/abort) terminates the process and
580+
# is scored.
579581
if not _OOM_AVAILABLE:
580582
try:
581583
thunk()
582584
except BaseException:
583585
pass
584586
return
585-
print("[OOM-SEQ] " + label, file=stderr)
587+
print("[OOM-SEQ] " + label + " window=" + str(window), file=stderr)
586588
for _start in range(_OOM_MAX_START):
587589
if _OOM_VERBOSE:
588-
print("[OOM-SEQ] start=" + str(_start) + " window=" + str(_OOM_WINDOW), file=stderr)
589-
if _OOM_WINDOW > 0:
590-
_set_nomemory(_start, _start + _OOM_WINDOW)
590+
print("[OOM-SEQ] start=" + str(_start) + " window=" + str(window), file=stderr)
591+
if window > 0:
592+
_set_nomemory(_start, _start + window)
591593
else:
592594
_set_nomemory(_start, 0)
593595
try:
@@ -1091,7 +1093,27 @@ def _generate_oom_function_call(
10911093
self.write(0, ")")
10921094
self.emptyLine()
10931095

1094-
def _write_oom_sequence(self, fn_name: str, seq_label: str, steps) -> None:
1096+
def _oom_seq_randomize(self) -> bool:
1097+
return bool(getattr(self.options, "oom_seq_randomize", False))
1098+
1099+
def _oom_pick_seq_len(self) -> int:
1100+
"""Step count for ONE sequence. With --oom-seq-randomize, uniform in [1, oom_seq_len]
1101+
(the configured value is the upper bound); otherwise the fixed oom_seq_len."""
1102+
n = max(1, self.options.oom_seq_len)
1103+
if self._oom_seq_randomize() and n > 1:
1104+
return randint(1, n)
1105+
return n
1106+
1107+
def _oom_pick_window(self):
1108+
"""Failure window for ONE sequence, or None to emit the harness default (_OOM_WINDOW).
1109+
With --oom-seq-randomize, uniform in [1, oom_window] (upper bound); otherwise None so
1110+
the generated oom_run() call is unchanged. window 0 (legacy fail-forever) is left as
1111+
the static default and never randomized into."""
1112+
if self._oom_seq_randomize() and self.options.oom_window > 1:
1113+
return randint(1, self.options.oom_window)
1114+
return None
1115+
1116+
def _write_oom_sequence(self, fn_name: str, seq_label: str, steps, window=None) -> None:
10951117
"""Emit a guarded multi-step thunk plus an oom_run() call (Phase 4 sequence).
10961118
10971119
steps: list of (sublabel, target_expr, num_args), where target_expr is a string
@@ -1100,6 +1122,9 @@ def _write_oom_sequence(self, fn_name: str, seq_label: str, steps) -> None:
11001122
the shared state (a live instance, or module/interpreter globals such as a pending
11011123
exception) is what a later step may trip over. Result values are not threaded
11021124
between steps yet (Phase 4b); the steps interact only through shared state.
1125+
1126+
window: per-sequence failure window (--oom-seq-randomize); None emits the default
1127+
oom_run(label, thunk) call (uses the module-level _OOM_WINDOW).
11031128
"""
11041129
self.write(0, f"def {fn_name}():")
11051130
saved = self.addLevel(1)
@@ -1121,7 +1146,10 @@ def _write_oom_sequence(self, fn_name: str, seq_label: str, steps) -> None:
11211146
if not wrote:
11221147
self.write(0, "pass")
11231148
self.restoreLevel(saved)
1124-
self.write(0, f'oom_run("{seq_label}", {fn_name})')
1149+
if window is None:
1150+
self.write(0, f'oom_run("{seq_label}", {fn_name})')
1151+
else:
1152+
self.write(0, f'oom_run("{seq_label}", {fn_name}, window={window})')
11251153
self.emptyLine()
11261154

11271155
def _generate_oom_function_sequence(self, prefix: str) -> None:
@@ -1134,7 +1162,7 @@ def _generate_oom_function_sequence(self, prefix: str) -> None:
11341162
"""
11351163
steps = []
11361164
names = []
1137-
for j in range(max(1, self.options.oom_seq_len)):
1165+
for j in range(self._oom_pick_seq_len()):
11381166
func_name = choice(self.module_functions)
11391167
try:
11401168
func_obj = getattr(self.module, func_name)
@@ -1154,7 +1182,9 @@ def _generate_oom_function_sequence(self, prefix: str) -> None:
11541182
return
11551183
seq_label = f"{prefix}:{self.module_name}[" + ">".join(names) + "]"
11561184
self.write(0, f"# OOM sequence: {' > '.join(names)}")
1157-
self._write_oom_sequence(f"_oom_seq_{prefix}", seq_label, steps)
1185+
self._write_oom_sequence(
1186+
f"_oom_seq_{prefix}", seq_label, steps, window=self._oom_pick_window()
1187+
)
11581188

11591189
def _generate_oom_class_fuzzing(self, prefix: str, class_name: str, class_obj: type) -> None:
11601190
"""Emits an OOM sweep over a class constructor and, on a live instance, its methods.
@@ -1203,7 +1233,7 @@ def _generate_oom_class_fuzzing(self, prefix: str, class_name: str, class_obj: t
12031233
# method B trips over (e.g. OOM-0035: write... then getvalue()).
12041234
steps = []
12051235
mnames = []
1206-
for j in range(max(1, self.options.oom_seq_len)):
1236+
for j in range(self._oom_pick_seq_len()):
12071237
m_name = choice(method_names)
12081238
m_obj = methods[m_name]
12091239
min_arg, max_arg = get_arg_number(m_obj, m_name, 0)
@@ -1218,7 +1248,9 @@ def _generate_oom_class_fuzzing(self, prefix: str, class_name: str, class_obj: t
12181248
mnames.append(m_name)
12191249
seq_label = f"{prefix}:{self.module_name}.{class_name}[" + ">".join(mnames) + "]"
12201250
self.write(0, f"# OOM sequence on {class_name}: {' > '.join(mnames)}")
1221-
self._write_oom_sequence(f"_oom_seq_{prefix}", seq_label, steps)
1251+
self._write_oom_sequence(
1252+
f"_oom_seq_{prefix}", seq_label, steps, window=self._oom_pick_window()
1253+
)
12221254
else:
12231255
for j in range(self.options.oom_methods):
12241256
m_name = choice(method_names)

tests/python/test_oom_fuzz.py

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
import json
1515
import math
1616
import os
17+
import random
18+
import re
1719
import sys
1820
import tempfile
1921
import unittest
@@ -55,6 +57,7 @@ def _make_options(oom_fuzz, oom_verbose=False):
5557
o.oom_seq = False
5658
o.oom_seq_len = 3
5759
o.oom_window = 1
60+
o.oom_seq_randomize = False
5861
# JIT options (WriteJITCode is constructed unconditionally); OOM mode never
5962
# dispatches to it, so legacy defaults are fine.
6063
o.jit_fuzz = False
@@ -81,6 +84,7 @@ def _generate(
8184
oom_seq=False,
8285
oom_seq_len=3,
8386
oom_window=1,
87+
oom_seq_randomize=False,
8488
):
8589
"""Generate a fuzzing script against ``module`` and return its source."""
8690
parent = MagicMock()
@@ -91,6 +95,7 @@ def _generate(
9195
options.oom_seq = oom_seq
9296
options.oom_seq_len = oom_seq_len
9397
options.oom_window = oom_window
98+
options.oom_seq_randomize = oom_seq_randomize
9499
parent.options = options
95100
parent.filenames = ["/bin/sh"]
96101
fd, path = tempfile.mkstemp(suffix="_oom_test.py")
@@ -218,9 +223,9 @@ def test_seq_emits_windowed_oom_run_harness(self):
218223
src = _generate(oom_fuzz=True, oom_seq=True, oom_seq_len=3, oom_window=2)
219224
ast.parse(src)
220225
# The oom_run harness + the bounded-window primitive (start .. start+k).
221-
self.assertIn("def oom_run(label, thunk):", src)
226+
self.assertIn("def oom_run(label, thunk, window=_OOM_WINDOW):", src)
222227
self.assertIn("_OOM_WINDOW = 2", src)
223-
self.assertIn("_set_nomemory(_start, _start + _OOM_WINDOW)", src)
228+
self.assertIn("_set_nomemory(_start, _start + window)", src)
224229
self.assertIn("_set_nomemory(_start, 0)", src) # window==0 fallback branch
225230
self.assertIn('print("[OOM-SEQ] " + label', src)
226231
# Function sequences: a guarded multi-step thunk fed to oom_run.
@@ -260,6 +265,45 @@ def test_seq_method_chain_reuses_one_instance(self):
260265
self.assertRegex(src, r"getattr\(oom_inst_oc1_\w+, ")
261266
self.assertNotIn('oom_call("oc1m', src) # single-call method sweep replaced
262267

268+
def test_seq_no_randomize_omits_per_call_window(self):
269+
# Default (randomize off): oom_run() calls take no per-sequence window override,
270+
# so the harness default (_OOM_WINDOW) applies -- output is unchanged.
271+
src = _generate(oom_fuzz=True, oom_seq=True, oom_seq_len=3, oom_window=2)
272+
calls = re.findall(r"oom_run\([^\n]*?, _oom_seq_f\d+\)", src)
273+
self.assertTrue(calls, "expected default 2-arg oom_run() calls")
274+
self.assertEqual(re.findall(r"oom_run\([^\n]*?, window=\d+\)", src), [])
275+
276+
def test_seq_randomize_emits_per_sequence_window_within_bounds(self):
277+
random.seed(20240623)
278+
src = _generate(
279+
oom_fuzz=True, oom_seq=True, oom_seq_len=6, oom_window=8, oom_seq_randomize=True
280+
)
281+
ast.parse(src)
282+
windows = [int(w) for w in re.findall(r"oom_run\([^\n]*?, window=(\d+)\)", src)]
283+
self.assertTrue(windows, "randomize on should emit per-sequence window= kwargs")
284+
self.assertTrue(all(1 <= w <= 8 for w in windows), windows)
285+
286+
def test_seq_randomize_varies_length_within_bounds(self):
287+
# Across the per-session sequences, step counts stay in [1, oom_seq_len] and (with a
288+
# wide bound + seed) are not all identical -> real per-sequence variety.
289+
random.seed(42)
290+
src = _generate(
291+
oom_fuzz=True,
292+
oom_seq=True,
293+
oom_verbose=True, # emits a "step sN:" marker per step so we can count
294+
oom_seq_len=6,
295+
oom_window=4,
296+
oom_seq_randomize=True,
297+
)
298+
ast.parse(src)
299+
lengths = [
300+
len(re.findall(r"step s\d+:", body))
301+
for body in re.split(r"def _oom_seq_f\d+\(\):", src)[1:]
302+
]
303+
self.assertTrue(lengths, "expected function sequences")
304+
self.assertTrue(all(1 <= n <= 6 for n in lengths), lengths)
305+
self.assertGreater(len(set(lengths)), 1, f"lengths did not vary: {lengths}")
306+
263307
def test_non_seq_oom_mode_has_no_seq_artifacts(self):
264308
src = _generate(oom_fuzz=True, oom_seq=False)
265309
for marker in ("oom_run", "_OOM_WINDOW", "[OOM-SEQ]", "_oom_seq_"):

0 commit comments

Comments
 (0)