1515 (`/users/{id}` -> `/users/{id}.json`)
1616 2. Add `nullable: true` (and sometimes `null` to enum/const)
1717 3. Add length keywords (`maxLength`, `minLength`) to non-string types
18- 4. Switch query param `style` between simple/ form/pipeDelimited/
18+ 4. Switch query param `style` between form/pipeDelimited/
1919 spaceDelimited and toggle `explode`
2020 5. Add `required` references to non-existent properties
2121 6. Swap scalar types (`integer` <-> `string` <-> `number` <-> `boolean`)
2626from __future__ import annotations
2727
2828import argparse
29+ import copy
2930import json
3031import os
3132import random
4344
4445LITERAL_EXTS = [".json" , ".xml" , ".txt" , ".v2" ]
4546SCALAR_TYPES = ["string" , "integer" , "number" , "boolean" ]
46- ARRAY_STYLES = ["simple" , " form" , "pipeDelimited" , "spaceDelimited" ]
47+ ARRAY_STYLES = ["form" , "pipeDelimited" , "spaceDelimited" ]
4748
4849
4950def _walk (node : Any , fn , path = ()):
@@ -183,6 +184,34 @@ def mutate(spec: dict, n: int, rng: random.Random) -> list[str]:
183184 return applied
184185
185186
187+ # $ref resolution ---------------------------------------------------------------
188+
189+ def resolve_refs (spec : dict ) -> dict :
190+ """Recursively resolve all $ref pointers in the spec (in-place)."""
191+ def _resolve (node , root ):
192+ if isinstance (node , dict ):
193+ if "$ref" in node and isinstance (node ["$ref" ], str ):
194+ ref = node ["$ref" ]
195+ if ref .startswith ("#/" ):
196+ parts = ref [2 :].split ("/" )
197+ target = root
198+ for p in parts :
199+ p = p .replace ("~1" , "/" ).replace ("~0" , "~" )
200+ if isinstance (target , dict ):
201+ target = target .get (p )
202+ else :
203+ return node
204+ if isinstance (target , dict ):
205+ resolved = copy .deepcopy (target )
206+ return _resolve (resolved , root )
207+ return node
208+ return {k : _resolve (v , root ) for k , v in node .items ()}
209+ if isinstance (node , list ):
210+ return [_resolve (item , root ) for item in node ]
211+ return node
212+ return _resolve (spec , spec )
213+
214+
186215# Case generation -------------------------------------------------------------
187216
188217def _sample_string (rng : random .Random , schema : dict ) -> str :
@@ -214,6 +243,12 @@ def sample_value(schema: dict, rng: random.Random, depth: int = 0):
214243 return None
215244 if schema .get ("nullable" ) and rng .random () < 0.2 :
216245 return None
246+ # Handle const/enum generically before type-specific branches
247+ if "const" in schema :
248+ return schema ["const" ]
249+ enum = schema .get ("enum" )
250+ if isinstance (enum , list ) and enum :
251+ return rng .choice (enum )
217252 t = schema .get ("type" )
218253 if isinstance (t , list ):
219254 t = rng .choice ([x for x in t if x != "null" ] or t )
@@ -222,7 +257,13 @@ def sample_value(schema: dict, rng: random.Random, depth: int = 0):
222257 if t == "integer" :
223258 lo = schema .get ("minimum" , 0 )
224259 hi = schema .get ("maximum" , lo + 100 )
225- return rng .randint (int (lo ), int (hi ))
260+ try :
261+ lo , hi = int (lo ), int (hi )
262+ except (TypeError , ValueError ):
263+ lo , hi = 0 , 100
264+ if lo > hi :
265+ lo , hi = hi , lo
266+ return rng .randint (lo , hi )
226267 if t == "number" :
227268 return float (rng .randint (0 , 100 ))
228269 if t == "boolean" :
@@ -289,13 +330,17 @@ def gen_cases(spec: dict, rng: random.Random, max_per_op: int = 2) -> list[dict]
289330 if val is None :
290331 val = "x"
291332 if p .get ("in" ) == "query" :
292- # For style=form&explode=true with array, send list
293333 if isinstance (val , list ) and p .get ("explode" , True ):
294334 req ["query" ][p ["name" ]] = val
335+ elif isinstance (val , list ):
336+ style = p .get ("style" , "form" )
337+ delim = {
338+ "pipeDelimited" : "|" ,
339+ "spaceDelimited" : " " ,
340+ }.get (style , "," )
341+ req ["query" ][p ["name" ]] = delim .join (map (str , val ))
295342 else :
296- req ["query" ][p ["name" ]] = (
297- "," .join (map (str , val )) if isinstance (val , list ) else val
298- )
343+ req ["query" ][p ["name" ]] = val
299344 elif p .get ("in" ) == "header" :
300345 req ["headers" ][p ["name" ]] = str (val )
301346
@@ -420,74 +465,76 @@ def main():
420465 cases_run = 0
421466 t0 = time .time ()
422467
423- with crashes_path .open ("w" ) as crashf :
424- while time .time () - t0 < args .budget :
425- seed_path = rng .choice (seeds )
426- spec = json .loads (seed_path .read_text ())
427- spec .pop ("servers" , None )
428- applied = mutate (spec , args .mutations , rng )
429- cases = gen_cases (spec , rng , max_per_op = 2 )
430- if not cases :
431- continue
432- results = run_validator (spec , cases , args .deps , args .lib , args .include )
433- cases_run += len (cases )
434- rounds += 1
435- for r in results :
436- phase = r .get ("phase" )
437- if phase in ("crash" , "subprocess_error" , "timeout" ):
438- rec = {"kind" : "crash" , "seed" : seed_path .name , "applied" : applied ,
439- "result" : r }
440- crashes .append (rec )
441- crashf .write (json .dumps (rec ) + "\n " )
442- crashf .flush ()
443- print (f"CRASH on { seed_path .name } after { applied } : "
444- f"{ str (r .get ('err' ) or r .get ('stderr' ))[:200 ]} " ,
445- file = sys .stderr )
446- elif phase == "ok" and r .get ("label" ) == "positive" and not r .get ("accepted" ):
447- err = (r .get ("err" ) or "" ).lower ()
448- noisy = any (s in err for s in (
449- "matches none of the required" ,
450- "match only one schema" ,
451- "discriminator" ,
452- "failed to match pattern" ,
453- "string too short, expected at least" ,
454- "string too long" ,
455- "expected at most" ,
456- "expected at least" ,
457- "additionalproperties" ,
458- "minimum" ,
459- "maximum" ,
460- "uniqueitems" ,
461- "format" ,
462- "got userdata" ,
463- "expected integer, got" ,
464- "expected number, got" ,
465- "expected boolean, got" ,
466- "is required" ,
467- "failed to validate item" ,
468- ))
469- if noisy :
470- continue
471- rec = {"kind" : "false_negative" , "seed" : seed_path .name ,
472- "applied" : applied , "result" : r }
473- false_negatives .append (rec )
474- crashf .write (json .dumps (rec ) + "\n " )
475- crashf .flush ()
476- print (f"FALSE_NEGATIVE on { seed_path .name } after { applied } : "
477- f"op={ r .get ('op' )} err={ (r .get ('err' ) or '' )[:200 ]} " ,
478- file = sys .stderr )
479-
480- summary = {
481- "rounds" : rounds ,
482- "cases_run" : cases_run ,
483- "elapsed_s" : round (time .time () - t0 , 2 ),
484- "crash_count" : len (crashes ),
485- "false_negative_count" : len (false_negatives ),
486- "total_findings" : len (crashes ) + len (false_negatives ),
487- "crashes_path" : str (crashes_path ),
488- }
489- summary_path .write_text (json .dumps (summary , indent = 2 ))
490- print (json .dumps (summary , indent = 2 ))
468+ try :
469+ with crashes_path .open ("w" ) as crashf :
470+ while time .time () - t0 < args .budget :
471+ seed_path = rng .choice (seeds )
472+ spec = json .loads (seed_path .read_text ())
473+ spec .pop ("servers" , None )
474+ applied = mutate (spec , args .mutations , rng )
475+ resolved = resolve_refs (spec )
476+ cases = gen_cases (resolved , rng , max_per_op = 2 )
477+ if not cases :
478+ continue
479+ results = run_validator (spec , cases , args .deps , args .lib , args .include )
480+ cases_run += len (cases )
481+ rounds += 1
482+ for r in results :
483+ phase = r .get ("phase" )
484+ if phase in ("crash" , "subprocess_error" , "timeout" ):
485+ rec = {"kind" : "crash" , "seed" : seed_path .name , "applied" : applied ,
486+ "result" : r }
487+ crashes .append (rec )
488+ crashf .write (json .dumps (rec ) + "\n " )
489+ crashf .flush ()
490+ print (f"CRASH on { seed_path .name } after { applied } : "
491+ f"{ str (r .get ('err' ) or r .get ('stderr' ))[:200 ]} " ,
492+ file = sys .stderr )
493+ elif phase == "ok" and r .get ("label" ) == "positive" and not r .get ("accepted" ):
494+ err = (r .get ("err" ) or "" ).lower ()
495+ noisy = any (s in err for s in (
496+ "matches none of the required" ,
497+ "match only one schema" ,
498+ "discriminator" ,
499+ "failed to match pattern" ,
500+ "string too short, expected at least" ,
501+ "string too long" ,
502+ "expected at most" ,
503+ "expected at least" ,
504+ "additionalproperties" ,
505+ "minimum" ,
506+ "maximum" ,
507+ "uniqueitems" ,
508+ "format" ,
509+ "got userdata" ,
510+ "expected integer, got" ,
511+ "expected number, got" ,
512+ "expected boolean, got" ,
513+ "is required" ,
514+ "failed to validate item" ,
515+ ))
516+ if noisy :
517+ continue
518+ rec = {"kind" : "false_negative" , "seed" : seed_path .name ,
519+ "applied" : applied , "result" : r }
520+ false_negatives .append (rec )
521+ crashf .write (json .dumps (rec ) + "\n " )
522+ crashf .flush ()
523+ print (f"FALSE_NEGATIVE on { seed_path .name } after { applied } : "
524+ f"op={ r .get ('op' )} err={ (r .get ('err' ) or '' )[:200 ]} " ,
525+ file = sys .stderr )
526+ finally :
527+ summary = {
528+ "rounds" : rounds ,
529+ "cases_run" : cases_run ,
530+ "elapsed_s" : round (time .time () - t0 , 2 ),
531+ "crash_count" : len (crashes ),
532+ "false_negative_count" : len (false_negatives ),
533+ "total_findings" : len (crashes ) + len (false_negatives ),
534+ "crashes_path" : str (crashes_path ),
535+ }
536+ summary_path .write_text (json .dumps (summary , indent = 2 ))
537+ print (json .dumps (summary , indent = 2 ))
491538 sys .exit (1 if (crashes or false_negatives ) else 0 )
492539
493540
0 commit comments