Skip to content

Commit 0b09794

Browse files
committed
fix: address all open review comments on fuzz harness
- Remove 'simple' from ARRAY_STYLES (invalid for query params per OAS 3.0) - Handle enum/const generically in sample_value() before type branches - Clamp/swap bounds in integer sampling to prevent ValueError - Encode query arrays using style-appropriate delimiter - Resolve $ref pointers before generating cases - Wrap main fuzz loop in try/finally to always write summary.json - Validate FUZZ_BUDGET as numeric in nightly workflow - Fix gh issue list jq to use '// empty' for null handling - Add 'text' language tag to README fenced code block
1 parent 7d9af9d commit 0b09794

4 files changed

Lines changed: 132 additions & 78 deletions

File tree

.github/workflows/fuzz-nightly.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,13 @@ jobs:
7070
continue-on-error: true
7171
run: |
7272
export PATH=$OPENRESTY_PREFIX/nginx/sbin:$OPENRESTY_PREFIX/bin:$PATH
73-
make fuzz FUZZ_BUDGET=$FUZZ_BUDGET
73+
case "$FUZZ_BUDGET" in
74+
''|*[!0-9]*)
75+
echo "FUZZ_BUDGET must be an integer number of seconds" >&2
76+
exit 2
77+
;;
78+
esac
79+
make fuzz "FUZZ_BUDGET=$FUZZ_BUDGET"
7480
7581
- name: Upload findings
7682
if: steps.fuzz.outcome == 'failure'
@@ -112,7 +118,7 @@ jobs:
112118
113119
# De-dup: reuse any open issue with the fuzz-nightly label.
114120
existing=$(gh issue list --label fuzz-nightly --state open \
115-
--json number --jq '.[0].number' || echo "")
121+
--json number --jq '.[0].number // empty' || echo "")
116122
if [ -n "$existing" ]; then
117123
gh issue comment "$existing" --body "$BODY"
118124
gh issue edit "$existing" --add-assignee jarvis9443 || true

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
fuzz/__pycache__/

fuzz/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ unfixed validator, `utf8_len(table)` Bug 3 against the unfixed jsonschema).
1515

1616
## Architecture
1717

18-
```
18+
```text
1919
mutate_fuzz.py (Python orchestrator)
2020
├─ pick a seed spec from fuzz/seeds/
2121
├─ apply N random mutations (mutators below)

fuzz/mutate_fuzz.py

Lines changed: 122 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
(`/users/{id}` -> `/users/{id}.json`)
1616
2. Add `nullable: true` (and sometimes `null` to enum/const)
1717
3. Add length keywords (`maxLength`, `minLength`) to non-string types
18-
4. Switch query param `style` between simple/form/pipeDelimited/
18+
4. Switch query param `style` between form/pipeDelimited/
1919
spaceDelimited and toggle `explode`
2020
5. Add `required` references to non-existent properties
2121
6. Swap scalar types (`integer` <-> `string` <-> `number` <-> `boolean`)
@@ -26,6 +26,7 @@
2626
from __future__ import annotations
2727

2828
import argparse
29+
import copy
2930
import json
3031
import os
3132
import random
@@ -43,7 +44,7 @@
4344

4445
LITERAL_EXTS = [".json", ".xml", ".txt", ".v2"]
4546
SCALAR_TYPES = ["string", "integer", "number", "boolean"]
46-
ARRAY_STYLES = ["simple", "form", "pipeDelimited", "spaceDelimited"]
47+
ARRAY_STYLES = ["form", "pipeDelimited", "spaceDelimited"]
4748

4849

4950
def _walk(node: Any, fn, path=()):
@@ -183,6 +184,34 @@ def mutate(spec: dict, n: int, rng: random.Random) -> list[str]:
183184
return applied
184185

185186

187+
# $ref resolution ---------------------------------------------------------------
188+
189+
def resolve_refs(spec: dict) -> dict:
190+
"""Recursively resolve all $ref pointers in the spec (in-place)."""
191+
def _resolve(node, root):
192+
if isinstance(node, dict):
193+
if "$ref" in node and isinstance(node["$ref"], str):
194+
ref = node["$ref"]
195+
if ref.startswith("#/"):
196+
parts = ref[2:].split("/")
197+
target = root
198+
for p in parts:
199+
p = p.replace("~1", "/").replace("~0", "~")
200+
if isinstance(target, dict):
201+
target = target.get(p)
202+
else:
203+
return node
204+
if isinstance(target, dict):
205+
resolved = copy.deepcopy(target)
206+
return _resolve(resolved, root)
207+
return node
208+
return {k: _resolve(v, root) for k, v in node.items()}
209+
if isinstance(node, list):
210+
return [_resolve(item, root) for item in node]
211+
return node
212+
return _resolve(spec, spec)
213+
214+
186215
# Case generation -------------------------------------------------------------
187216

188217
def _sample_string(rng: random.Random, schema: dict) -> str:
@@ -214,6 +243,12 @@ def sample_value(schema: dict, rng: random.Random, depth: int = 0):
214243
return None
215244
if schema.get("nullable") and rng.random() < 0.2:
216245
return None
246+
# Handle const/enum generically before type-specific branches
247+
if "const" in schema:
248+
return schema["const"]
249+
enum = schema.get("enum")
250+
if isinstance(enum, list) and enum:
251+
return rng.choice(enum)
217252
t = schema.get("type")
218253
if isinstance(t, list):
219254
t = rng.choice([x for x in t if x != "null"] or t)
@@ -222,7 +257,13 @@ def sample_value(schema: dict, rng: random.Random, depth: int = 0):
222257
if t == "integer":
223258
lo = schema.get("minimum", 0)
224259
hi = schema.get("maximum", lo + 100)
225-
return rng.randint(int(lo), int(hi))
260+
try:
261+
lo, hi = int(lo), int(hi)
262+
except (TypeError, ValueError):
263+
lo, hi = 0, 100
264+
if lo > hi:
265+
lo, hi = hi, lo
266+
return rng.randint(lo, hi)
226267
if t == "number":
227268
return float(rng.randint(0, 100))
228269
if t == "boolean":
@@ -289,13 +330,17 @@ def gen_cases(spec: dict, rng: random.Random, max_per_op: int = 2) -> list[dict]
289330
if val is None:
290331
val = "x"
291332
if p.get("in") == "query":
292-
# For style=form&explode=true with array, send list
293333
if isinstance(val, list) and p.get("explode", True):
294334
req["query"][p["name"]] = val
335+
elif isinstance(val, list):
336+
style = p.get("style", "form")
337+
delim = {
338+
"pipeDelimited": "|",
339+
"spaceDelimited": " ",
340+
}.get(style, ",")
341+
req["query"][p["name"]] = delim.join(map(str, val))
295342
else:
296-
req["query"][p["name"]] = (
297-
",".join(map(str, val)) if isinstance(val, list) else val
298-
)
343+
req["query"][p["name"]] = val
299344
elif p.get("in") == "header":
300345
req["headers"][p["name"]] = str(val)
301346

@@ -420,74 +465,76 @@ def main():
420465
cases_run = 0
421466
t0 = time.time()
422467

423-
with crashes_path.open("w") as crashf:
424-
while time.time() - t0 < args.budget:
425-
seed_path = rng.choice(seeds)
426-
spec = json.loads(seed_path.read_text())
427-
spec.pop("servers", None)
428-
applied = mutate(spec, args.mutations, rng)
429-
cases = gen_cases(spec, rng, max_per_op=2)
430-
if not cases:
431-
continue
432-
results = run_validator(spec, cases, args.deps, args.lib, args.include)
433-
cases_run += len(cases)
434-
rounds += 1
435-
for r in results:
436-
phase = r.get("phase")
437-
if phase in ("crash", "subprocess_error", "timeout"):
438-
rec = {"kind": "crash", "seed": seed_path.name, "applied": applied,
439-
"result": r}
440-
crashes.append(rec)
441-
crashf.write(json.dumps(rec) + "\n")
442-
crashf.flush()
443-
print(f"CRASH on {seed_path.name} after {applied}: "
444-
f"{str(r.get('err') or r.get('stderr'))[:200]}",
445-
file=sys.stderr)
446-
elif phase == "ok" and r.get("label") == "positive" and not r.get("accepted"):
447-
err = (r.get("err") or "").lower()
448-
noisy = any(s in err for s in (
449-
"matches none of the required",
450-
"match only one schema",
451-
"discriminator",
452-
"failed to match pattern",
453-
"string too short, expected at least",
454-
"string too long",
455-
"expected at most",
456-
"expected at least",
457-
"additionalproperties",
458-
"minimum",
459-
"maximum",
460-
"uniqueitems",
461-
"format",
462-
"got userdata",
463-
"expected integer, got",
464-
"expected number, got",
465-
"expected boolean, got",
466-
"is required",
467-
"failed to validate item",
468-
))
469-
if noisy:
470-
continue
471-
rec = {"kind": "false_negative", "seed": seed_path.name,
472-
"applied": applied, "result": r}
473-
false_negatives.append(rec)
474-
crashf.write(json.dumps(rec) + "\n")
475-
crashf.flush()
476-
print(f"FALSE_NEGATIVE on {seed_path.name} after {applied}: "
477-
f"op={r.get('op')} err={(r.get('err') or '')[:200]}",
478-
file=sys.stderr)
479-
480-
summary = {
481-
"rounds": rounds,
482-
"cases_run": cases_run,
483-
"elapsed_s": round(time.time() - t0, 2),
484-
"crash_count": len(crashes),
485-
"false_negative_count": len(false_negatives),
486-
"total_findings": len(crashes) + len(false_negatives),
487-
"crashes_path": str(crashes_path),
488-
}
489-
summary_path.write_text(json.dumps(summary, indent=2))
490-
print(json.dumps(summary, indent=2))
468+
try:
469+
with crashes_path.open("w") as crashf:
470+
while time.time() - t0 < args.budget:
471+
seed_path = rng.choice(seeds)
472+
spec = json.loads(seed_path.read_text())
473+
spec.pop("servers", None)
474+
applied = mutate(spec, args.mutations, rng)
475+
resolved = resolve_refs(spec)
476+
cases = gen_cases(resolved, rng, max_per_op=2)
477+
if not cases:
478+
continue
479+
results = run_validator(spec, cases, args.deps, args.lib, args.include)
480+
cases_run += len(cases)
481+
rounds += 1
482+
for r in results:
483+
phase = r.get("phase")
484+
if phase in ("crash", "subprocess_error", "timeout"):
485+
rec = {"kind": "crash", "seed": seed_path.name, "applied": applied,
486+
"result": r}
487+
crashes.append(rec)
488+
crashf.write(json.dumps(rec) + "\n")
489+
crashf.flush()
490+
print(f"CRASH on {seed_path.name} after {applied}: "
491+
f"{str(r.get('err') or r.get('stderr'))[:200]}",
492+
file=sys.stderr)
493+
elif phase == "ok" and r.get("label") == "positive" and not r.get("accepted"):
494+
err = (r.get("err") or "").lower()
495+
noisy = any(s in err for s in (
496+
"matches none of the required",
497+
"match only one schema",
498+
"discriminator",
499+
"failed to match pattern",
500+
"string too short, expected at least",
501+
"string too long",
502+
"expected at most",
503+
"expected at least",
504+
"additionalproperties",
505+
"minimum",
506+
"maximum",
507+
"uniqueitems",
508+
"format",
509+
"got userdata",
510+
"expected integer, got",
511+
"expected number, got",
512+
"expected boolean, got",
513+
"is required",
514+
"failed to validate item",
515+
))
516+
if noisy:
517+
continue
518+
rec = {"kind": "false_negative", "seed": seed_path.name,
519+
"applied": applied, "result": r}
520+
false_negatives.append(rec)
521+
crashf.write(json.dumps(rec) + "\n")
522+
crashf.flush()
523+
print(f"FALSE_NEGATIVE on {seed_path.name} after {applied}: "
524+
f"op={r.get('op')} err={(r.get('err') or '')[:200]}",
525+
file=sys.stderr)
526+
finally:
527+
summary = {
528+
"rounds": rounds,
529+
"cases_run": cases_run,
530+
"elapsed_s": round(time.time() - t0, 2),
531+
"crash_count": len(crashes),
532+
"false_negative_count": len(false_negatives),
533+
"total_findings": len(crashes) + len(false_negatives),
534+
"crashes_path": str(crashes_path),
535+
}
536+
summary_path.write_text(json.dumps(summary, indent=2))
537+
print(json.dumps(summary, indent=2))
491538
sys.exit(1 if (crashes or false_negatives) else 0)
492539

493540

0 commit comments

Comments
 (0)