trinity/minicorpus.c at master · kernelslacker/trinity · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
 * Coverage-guided argument retention (mini-corpus).
 *
 * Stores syscall argument snapshots that discovered new KCOV edges.
 * During future arg generation for the same syscall, a stored
 * snapshot may be replayed with per-argument mutations to explore
 * nearby input space.
 *
 * Syscalls with sanitise callbacks or with arg types that carry
 * heap pointers (ARG_IOVEC, ARG_PATHNAME, ARG_SOCKADDR, ARG_MMAP)
 * are excluded — those pointers become stale after deferred-free
 * eviction, causing UAF on replay.
 */

#include <errno.h>
#include <limits.h>
#include <signal.h>
#include <sys/stat.h>
#include <sys/utsname.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>

#include "child.h"
#include "fd.h"
#include "kcov.h"
#include "minicorpus.h"
#include "persist-util.h"
#include "random.h"
#include "rnd.h"
#include "sanitise.h"
#include "shm.h"
#include "strategy.h"
#include "syscall.h"
#include "tables.h"
#include "trinity.h"
#include "utils.h"
#include "pids.h"

struct minicorpus_shared *minicorpus_shm = NULL;

/*
 * Process-wide runtime kill switch for the mutator chain.  Set at init
 * from $TRINITY_DISABLE_MUTATORS=1 and inherited COW by every child.
 * When true, minicorpus_mutate_args() skips splice, xprop, and the
 * weighted-stack mutate steps and feeds the corpus entry through
 * verbatim (fd-safety scrub still runs).  Replay rates and corpus
 * promotion behaviour are otherwise unchanged, so an A/B between
 * enabled and disabled isolates the mutator chain's contribution to
 * iter rate and edge growth from the rest of the replay path.
 */
static bool mutators_disabled;

/*
 * Cross-syscall value propagation (xprop) source whitelist.
 *
 * Within-syscall splice shuffles values between arg slots of one
 * snapshot.  xprop extends the same idea across syscalls: with low
 * probability an arg of the target syscall is overridden with a value
 * pulled from a *different* syscall's corpus pool.  Most arg slots see
 * no benefit from foreign values -- the kernel cheaply rejects
 * type-incoherent garbage with -EINVAL and we burn iterations -- so the
 * initial whitelist is narrow: fd-consuming slots of the target draw
 * from fd-returning syscalls' pools.  That pairing has the highest
 * a-priori chance of producing a value that lands in a region of input
 * space the kernel will follow rather than reject outright.
 *
 * Built once from minicorpus_init() by walking the syscall table
 * (which select_syscall_tables() has already populated by the time
 * init_shm runs) and recording the nr of every syscall with
 * rettype == RET_FD whose argtype set is corpus-replayable.  Inherited
 * COW by every child fork.
 */
#define XPROP_RATIO 64
#define XPROP_FD_SRC_MAX 64

static unsigned int xprop_fd_src_nrs[XPROP_FD_SRC_MAX];
static unsigned int xprop_n_fd_src;

static void xprop_build_whitelist(void);

void minicorpus_init(void)
{
	if (kcov_shm == NULL)
		return;

	/*
	 * Wild-write risk: a child syscall buffer pointer aliasing into
	 * the corpus could corrupt a saved snapshot's args[] (the next
	 * replay feeds garbage to the kernel — at worst ENOSYS / EINVAL,
	 * not a parent crash) or stick a ring->lock byte (one syscall's
	 * saves/replays stall).  The mut_attrib counters can be skewed
	 * but the weight floor (MUT_WEIGHT_FLOOR=50) keeps the scheduler
	 * operational.  No parent crash surface.
	 *
	 * Route through alloc_shared_pool so the default --guard-shared
	 * scope (pools) wraps this long-lived ~1.8 MB region in
	 * PROT_NONE guard pages.  A stray writer that over- or under-
	 * runs the region then faults at the write PC instead of
	 * silently corrupting saved snapshots and propagating the
	 * scribble into the next replay window.  Sibling kcov_shm has
	 * been pool-routed since the guard-armour landed; this lifts
	 * the same coverage to the corpus pool, which prior triages
	 * identified as a comparable wild-writer target.
	 */
	minicorpus_shm = alloc_shared_pool(sizeof(struct minicorpus_shared));
	memset(minicorpus_shm, 0, sizeof(struct minicorpus_shared));

	/* Stamp the writer-pinning canary in every ring.  Only writer of
	 * wp_canary, ever -- the per-syscall sweep (Stage 1) and the HW
	 * watchpoint (Stage 2) detect any subsequent write as the wild
	 * writer.  Stamp unconditionally so the field is initialised even
	 * when neither flag is in use (a future operator switching the
	 * flag on mid-stack would otherwise see a one-time false positive
	 * against the zero memset). */
	{
		unsigned int i;
		for (i = 0; i < MAX_NR_SYSCALL; i++)
			minicorpus_shm->rings[i].wp_canary = WP_CANARY_MAGIC;
	}

	output(0, "KCOV: mini-corpus allocated (%lu KB, %d entries/syscall)\n",
		(unsigned long) sizeof(struct minicorpus_shared) / 1024,
		CORPUS_RING_SIZE);

	xprop_build_whitelist();
	output(0, "KCOV: mini-corpus xprop whitelist: %u fd-returning sources\n",
		xprop_n_fd_src);

	/* Mutator kill switch.  Honour only "1" -- any other value (empty,
	 * "0", arbitrary string) leaves mutators enabled, matching the
	 * least-surprise convention for boolean env gates elsewhere in
	 * trinity.  Logged unconditionally so the chosen mode is visible
	 * in the startup banner alongside the corpus init lines. */
	{
		const char *v = getenv("TRINITY_DISABLE_MUTATORS");

		mutators_disabled = (v != NULL && v[0] == '1' && v[1] == '\0');
		output(0, "KCOV: mini-corpus mutators=%s\n",
		       mutators_disabled ? "DISABLED" : "ENABLED");
	}
}

static void ring_lock(struct corpus_ring *ring)
{
	if (minicorpus_shm != NULL)
		__atomic_fetch_add(&minicorpus_shm->held_count, 1,
				__ATOMIC_RELAXED);
	lock(&ring->lock);
}

static void ring_unlock(struct corpus_ring *ring)
{
	unlock(&ring->lock);
	if (minicorpus_shm != NULL)
		__atomic_sub_fetch(&minicorpus_shm->held_count, 1,
				__ATOMIC_RELAXED);
}

/*
 * Whether a saved syscall's args can be replayed safely.  Rejects the
 * argtype set whose values are runtime-relative — replay either feeds
 * the kernel garbage or, for ARG_PID, an active pid that gets signalled.
 *
 *  - ARG_IOVEC / ARG_PATHNAME / ARG_SOCKADDR / ARG_MMAP: heap pointers
 *    handed out by generic_sanitise().  After deferred-free eviction
 *    they go stale and replay feeds freed memory to the kernel.
 *
 *  - ARG_PID: a pid valid in the saving run is meaningless in the
 *    replaying run.  Worse, dense trinity pid allocation plus kernel
 *    pid recycling means a stale pid frequently HITS a current trinity
 *    child or the parent — replay of kill / tkill / tgkill /
 *    pidfd_send_signal / rt_sigqueueinfo entries cascade-SIGKILLs the
 *    fleet.
 *
 * Three call sites must agree on this list:
 *
 *   minicorpus_save()       — refuse to capture in the first place.
 *   minicorpus_replay()     — refuse to play back from the in-memory
 *                             ring (catches cross-config corpus swap
 *                             where the ring contains entries built
 *                             for a different syscall set).
 *   minicorpus_load_file()  — refuse to admit from on-disk warm-start.
 *                             Covers stale corpora that predate the
 *                             ARG_PID guard, cross-config swap of a
 *                             saved file, and any future syscall whose
 *                             argtype changes to ARG_PID without
 *                             invalidating cached corpora.
 */
static bool corpus_args_replayable(const struct syscallentry *entry)
{
	unsigned int i;

	for (i = 0; i < entry->num_args && i < 6; i++) {
		switch (entry->argtype[i]) {
		case ARG_IOVEC:
		case ARG_IOVEC_IN:
		case ARG_PATHNAME:
		case ARG_SOCKADDR:
		case ARG_MMAP:
		case ARG_PID:
			return false;
		default:
			break;
		}
	}
	return true;
}

static void xprop_consider_nr(unsigned int nr)
{
	struct syscallentry *e;

	if (xprop_n_fd_src >= XPROP_FD_SRC_MAX)
		return;
	e = get_syscall_entry(nr, false);
	if (e == NULL || e->rettype != RET_FD)
		return;
	if (!corpus_args_replayable(e))
		return;
	xprop_fd_src_nrs[xprop_n_fd_src++] = nr;
}

static void xprop_build_whitelist(void)
{
	unsigned int nr;

	for (nr = 0; nr < MAX_NR_SYSCALL; nr++)
		xprop_consider_nr(nr);
}

/*
 * Pull a value from a different syscall's seen-arg pool for use as arg
 * @arg_atype of the target syscall @nr.  Returns true and writes the
 * picked value to *val on a hit; false leaves *val untouched.  Only
 * fd-typed target slots are eligible -- the whitelist source pool is
 * the fd-returning-syscall set built at init.  Self-pairs are filtered
 * (within-syscall shuffling is the splice op's job).
 */
static bool minicorpus_pick_from_other_syscall(unsigned int nr,
					       enum argtype arg_atype,
					       unsigned long *val)
{
	struct corpus_ring *ring;
	unsigned int src_nr, slot, src_arg, num_args;
	unsigned int count, head;
	unsigned long picked;

	/* xprop attempt denominator.  Bumped once per
	 * entry regardless of outcome so the type-hit rate
	 * xprop_hits / xprop_attempts is directly readable, and
	 * the reject-cause breakdown below sums (with hits) to
	 * xprop_attempts minus the xprop_n_fd_src==0 early-out
	 * (the whitelist-uninitialised case, which is not a
	 * realised attempt). */
	if (xprop_n_fd_src == 0)
		return false;
	__atomic_fetch_add(&minicorpus_shm->xprop_attempts, 1UL,
			   __ATOMIC_RELAXED);
	if (!is_fdarg(arg_atype)) {
		__atomic_fetch_add(&minicorpus_shm->xprop_reject_target_not_fdarg,
				   1UL, __ATOMIC_RELAXED);
		return false;
	}

	src_nr = xprop_fd_src_nrs[rnd_modulo_u32(xprop_n_fd_src)];
	if (src_nr == nr) {
		__atomic_fetch_add(&minicorpus_shm->xprop_reject_src_self,
				   1UL, __ATOMIC_RELAXED);
		return false;
	}

	ring = &minicorpus_shm->rings[src_nr];

	/*
	 * Lockless reader.  Single slot, single arg, no joint snapshot
	 * across multiple entries -- drop ring->lock and synchronise on
	 * the writer's release-stores of count/head (see
	 * minicorpus_save_with_reason).  Writers still serialise on
	 * ring->lock; this reader just observes the published view.
	 *
	 * Ordering: the writer publishes count BEFORE head (the
	 * deliberate inversion vs chain_corpus_save documented at the
	 * publish site).  An acquire-load of count is the synchronisation
	 * edge -- it pairs with the writer's release-store of count and
	 * therefore makes the entry stores that preceded that store
	 * visible to us.  The head store happens *after* the count store
	 * in writer program order, so a count-acquire does not also
	 * synchronise the head bump; loading head relaxed can return a
	 * value one publish stale.  That is fine: a stale head still
	 * points one past a slot that was validly published in some
	 * earlier save, so we land on a legitimate xprop source -- "most
	 * recent" is a heuristic here, not a correctness invariant.
	 *
	 * Race tolerance: a concurrent minicorpus_save can overwrite the
	 * slot we are mid-read on.  num_args is validated in [1, 6] post-
	 * snapshot; a torn struct assignment that produces an out-of-
	 * range value just skips the pick, no retry -- same tolerance
	 * the fuzzer applies to the other 75%+ of mutated inputs.
	 * args[] is a fixed-size 6-element array, so reading
	 * args[src_arg] with src_arg < num_args <= 6 is memory-safe even
	 * if the underlying ulong was itself torn; the caller just gets
	 * a slightly-stale value, which is fuzz fodder either way.
	 */
	count = __atomic_load_n(&ring->count, __ATOMIC_ACQUIRE);
	if (count == 0) {
		__atomic_fetch_add(&minicorpus_shm->xprop_reject_src_empty,
				   1UL, __ATOMIC_RELAXED);
		return false;
	}
	head = __atomic_load_n(&ring->head, __ATOMIC_RELAXED);

	/* Newest entry: head points one past the last write.  Adding
	 * CORPUS_RING_SIZE before the subtract keeps the unsigned modulo
	 * well-defined when head is 0 on a wrapped ring. */
	slot = (head + CORPUS_RING_SIZE - 1) % CORPUS_RING_SIZE;

	num_args = ring->entries[slot].num_args;
	if (num_args == 0 || num_args > 6) {
		__atomic_fetch_add(&minicorpus_shm->replay_torn_rejects,
				   1UL, __ATOMIC_RELAXED);
		return false;
	}
	src_arg = rnd_modulo_u32(num_args);
	picked = ring->entries[slot].args[src_arg];

	*val = picked;
	__atomic_fetch_add(&minicorpus_shm->xprop_hits, 1UL,
			   __ATOMIC_RELAXED);
	return true;
}

bool minicorpus_wp_sweep(unsigned long *bad_addr, uint64_t *bad_val)
{
	struct corpus_ring *ring;
	unsigned int i;
	uint64_t observed;
	unsigned int cnt;

	if (minicorpus_shm == NULL)
		return false;

	for (i = 0; i < MAX_NR_SYSCALL; i++) {
		ring = &minicorpus_shm->rings[i];
		observed = ring->wp_canary;
		if (unlikely(observed != WP_CANARY_MAGIC)) {
			if (bad_addr != NULL)
				*bad_addr = (unsigned long) &ring->wp_canary;
			if (bad_val != NULL)
				*bad_val = observed;
			return true;
		}
		/* Documented invariant: count is bounded by the ring size.
		 * A scribble that lands in the count word (the count-word
		 * scribble case) inflates this past 32; surface it the same
		 * way. */
		cnt = ring->count;
		if (unlikely(cnt > CORPUS_RING_SIZE)) {
			if (bad_addr != NULL)
				*bad_addr = (unsigned long) &ring->count;
			if (bad_val != NULL)
				*bad_val = (uint64_t) cnt;
			return true;
		}
	}
	return false;
}

void minicorpus_save(struct syscallrecord *rec)
{
	/* Legacy entry point: callers that haven't been updated to thread
	 * an enum corpus_save_reason through still want PC-source
	 * accounting, matching the pre-CMP-save-gate behaviour. */
	minicorpus_save_with_reason(rec, CORPUS_SAVE_REASON_PC);
}

void minicorpus_save_with_reason(struct syscallrecord *rec,
				 enum corpus_save_reason reason)
{
	struct corpus_ring *ring;
	struct corpus_entry tmp;
	struct syscallentry *entry;
	unsigned int nr = rec->nr;
	unsigned int i;
	unsigned int cur_count;

	if (minicorpus_shm == NULL || nr >= MAX_NR_SYSCALL)
		return;

	/* An out-of-range reason would index off the end of
	 * saves_by_reason[].  Drop the save rather than corrupt unrelated
	 * shm state -- the caller is buggy if this fires, so don't
	 * silently re-bucket it as PC either. */
	if ((unsigned int)reason >= CORPUS_SAVE_NR_REASONS)
		return;

	entry = get_syscall_entry(nr, rec->do32bit);
	if (entry == NULL)
		return;

	if (!corpus_args_replayable(entry))
		return;

	/* Build the entry on the stack unlocked.  None of this work touches
	 * shared state, so holding ring->lock across the arg copy and the
	 * argtype walk would serialise every other saver / replayer on this
	 * syscall's ring for no contention reason.  Zero the whole local
	 * struct so any future corpus_entry field is implicitly initialised
	 * rather than silently publishing uninitialised stack bytes. */
	memset(&tmp, 0, sizeof(tmp));
	tmp.args[0] = rec->a1;
	tmp.args[1] = rec->a2;
	tmp.args[2] = rec->a3;
	tmp.args[3] = rec->a4;
	tmp.args[4] = rec->a5;
	tmp.args[5] = rec->a6;
	tmp.num_args = entry->num_args;

	/* RedQueen-source provenance tag: read the current child's in_reexec
	 * recursion guard inside the save site rather than threading a new
	 * parameter through the random-syscall.c caller.  A NULL child (the
	 * parent post-mortem path is the only realistic caller; the normal
	 * dispatch_step save path always runs inside a child) leaves the
	 * default-zero rq_sourced from the memset above, which is the
	 * correct PC-source attribution for that case. */
	{
		struct childdata *cc = this_child();

		if (cc != NULL && cc->in_reexec)
			tmp.rq_sourced = true;
	}

	/* Errno-gradient provenance tag: the reason argument is the
	 * authoritative source.  Propagates through minicorpus_replay()
	 * into childdata::replay_errno_sourced so frontier_record_new_edge()
	 * can credit a downstream PC-edge win back to the errno-source
	 * save.  Decoupled from rq_sourced above: a single entry can't be
	 * both rq_sourced and errno_sourced (RedQueen captures happen on
	 * the in_reexec path with the PC/CMP reasons; errno saves happen
	 * from handle_syscall_ret with CORPUS_SAVE_REASON_ERRNO). */
	if (reason == CORPUS_SAVE_REASON_ERRNO)
		tmp.errno_sourced = true;

	/* Saved fd numbers are stale on replay — zero them out so mutate_arg
	 * gets a fresh fd rather than trying to reuse a closed one.  Same
	 * treatment for ARG_ADDRESS / ARG_NON_NULL_ADDRESS: raw user pointers
	 * from the saving run's address space are garbage in the replaying
	 * run, but the runtime can re-derive a valid writable page if the
	 * slot is zero. */
	for (i = 0; i < entry->num_args && i < 6; i++) {
		if (is_fdarg(entry->argtype[i]) ||
		    entry->argtype[i] == ARG_ADDRESS ||
		    entry->argtype[i] == ARG_NON_NULL_ADDRESS)
			tmp.args[i] = 0;
	}

	ring = &minicorpus_shm->rings[nr];

	ring_lock(ring);
	ring->entries[ring->head % CORPUS_RING_SIZE] = tmp;
	/* Publish count BEFORE head, with release semantics.  The
	 * planned lockless burst-path reader snapshots count first,
	 * gates on count >= K_RECENT, then computes a slot offset from
	 * a snapshotted head.  If head were observed past count, the
	 * reader would compute against a stale base.  This diverges
	 * from chain_corpus_save()'s head-first ordering by design.
	 * Writers still serialise via ring->lock; the release-stores
	 * exist solely to give the future acquire-load reader a well-
	 * defined view paired with the entry store above. */
	cur_count = ring->count;
	if (cur_count < CORPUS_RING_SIZE)
		__atomic_store_n(&ring->count, cur_count + 1,
				 __ATOMIC_RELEASE);
	__atomic_store_n(&ring->head, ring->head + 1, __ATOMIC_RELEASE);
	ring_unlock(ring);

	__atomic_fetch_add(&minicorpus_shm->mutations, 1UL, __ATOMIC_RELAXED);
	__atomic_fetch_add(&minicorpus_shm->saves_by_reason[reason], 1UL,
			   __ATOMIC_RELAXED);
	/* Ring-overwrite count per incoming reason.  At
	 * a full ring, the save above displaced the oldest existing
	 * entry; bump indexed by the incoming reason so the ratio
	 * evicts_by_reason[r] / saves_by_reason[r] is the realised
	 * "fraction of reason-r saves that evicted" rate the
	 * stratified mini-corpus replay policy hangs on. */
	if (cur_count >= CORPUS_RING_SIZE)
		__atomic_fetch_add(&minicorpus_shm->evicts_by_reason[reason],
				   1UL, __ATOMIC_RELAXED);

	/* Per-syscall RedQueen-source save counter.  Bumped only when the
	 * provenance tag captured above is set, so the per-syscall total is
	 * directly comparable with the rq_sourced_pcedge_wins_per_syscall[]
	 * counter that frontier_record_new_edge() bumps for later PC-edge
	 * wins from this same provenance.  RELAXED: cumulative diagnostic,
	 * consumed only at periodic dump time. */
	if (tmp.rq_sourced)
		__atomic_fetch_add(
			&shm->stats.rq_sourced_saves_per_syscall[nr],
			1UL, __ATOMIC_RELAXED);

	/* Per-syscall errno-source save counter.  Mirror of the rq_sourced
	 * bump above, paired with errno_sourced_pcedge_wins_per_syscall[]
	 * that frontier_record_new_edge() bumps for later PC-edge wins
	 * traced back to an errno-source save. */
	if (tmp.errno_sourced)
		__atomic_fetch_add(
			&shm->stats.errno_sourced_saves_per_syscall[nr],
			1UL, __ATOMIC_RELAXED);
}

/*
 * Per-process attribution stash for the weighted mutator scheduler.
 *
 * mutate_arg() bumps mut_attrib[op] every time it picks case `op`.  After
 * the syscall completes, the post-coverage path drains the stash via
 * minicorpus_mut_attrib_commit(), folding it into shm-wide trials/wins.
 *
 * Process-local — children fork before any mutate_arg call, so each child
 * has its own copy.  No locking needed: a child runs single-threaded.
 */
static unsigned int mut_attrib[MUT_NUM_OPS];

/*
 * Parallel structured-firing stash.  Bumped from inside mutate_arg
 * whenever the structure-aware branch ran (ARG_LIST / ARG_OP /
 * ARG_RANGE with usable arg_param metadata).  Drained by
 * minicorpus_mut_attrib_commit into shm->mut_structured_trials /
 * mut_structured_wins so per-op structured productivity can be
 * compared against the existing aggregate mut_trials / mut_wins.
 * Same per-process / fork-then-single-threaded guarantee as
 * mut_attrib above.
 */
static unsigned int mut_structured_attrib[MUT_NUM_OPS];

/*
 * Process-local replay and splice attribution flags.
 *
 * Set by minicorpus_replay() when the respective event occurs; consumed
 * and cleared by minicorpus_mut_attrib_commit() to attribute wins without
 * needing a second pass over the call path.  Per-process — same
 * fork/single-threaded guarantee as mut_attrib[].
 */
static bool this_replay_ran;
static bool this_replay_spliced;
static bool this_replay_xprop;

/*
 * Process-local per-syscall-replay source pointer.
 *
 * minicorpus_replay() sets these to the (nr, slot) of the corpus entry it
 * picked, so commit() can read and bump the entry's novel_replay_hits
 * baseline counter and gate mutator-win credit on that baseline.
 * Chain-replay (replay_syscall_step) does NOT have a source corpus entry
 * and leaves the flag false; commit() then skips per-op trials/wins
 * updates for chain-replay events so the bandit signal in mut_trials[]/
 * mut_wins[] reflects only the per-syscall-replay path where a baseline
 * can be established.
 *
 * Race tolerance: between minicorpus_replay's slot pick and commit() the
 * ring may rotate and evict the entry, so source_slot can point at a
 * different entry by the time we read its novel_replay_hits.  Crediting
 * a sibling entry's baseline is benign noise -- same shape as the
 * existing replay torn-read tolerance.  Cleared unconditionally in
 * commit() so a fall-through path can't leak source-tracked state into
 * a subsequent chain-replay commit.
 */
static bool this_replay_source_tracked;
static unsigned int this_replay_source_nr;
static unsigned int this_replay_source_slot;
/* Source-entry age (distance-from-head, in slots) at
 * replay-pick time.  Stashed at minicorpus_replay() pick and consumed
 * by minicorpus_mut_attrib_commit() to bin replay_wins_by_age.  Same
 * per-process / fork-then-single-threaded guarantee as the other
 * this_replay_* stashes above; unsigned so an untracked-source
 * commit just sees 0 without touching the histogram (gated on
 * this_replay_source_tracked). */
static unsigned int this_replay_source_age;

/*
 * Process-local CMP-source attribution flag.
 *
 * Set by minicorpus_mut_attrib_set_cmp_source() when the post-syscall
 * coverage signal that's about to drive commit() is CMP-bloom novelty
 * rather than PC-edge novelty.  Consumed and cleared by
 * minicorpus_mut_attrib_commit() -- if (found_new && this_attrib_cmp_source)
 * we bump the dedicated mut_attrib_cmp_wins scalar so stats can
 * separate the two sources without changing mut_wins[]/mut_trials[]
 * (which the weighted scheduler reads).  Same fork/single-threaded
 * guarantee as this_replay_ran above.
 */
static bool this_attrib_cmp_source;

/*
 * Process-local C.2b post-fill struct-field attribution stash.  Set by
 * minicorpus_struct_field_attrib() when struct_field_mutate_one applies
 * a per-tag primitive; consumed and cleared by
 * minicorpus_mut_attrib_commit().  At most one tag per call by
 * construction -- the gated entry point mutates exactly one field per
 * invocation -- so a simple (set, tag) pair captures everything the
 * commit needs.  Same per-process / fork-then-single-threaded guarantee
 * as the rest of the attribution stash.
 */
static enum field_tag this_struct_field_tag;
static bool this_struct_field_set;

/*
 * Floor on the per-case weight in the weighted scheduler.
 *
 * Weights are scaled to [0, 1000] (see weighted_pick_case() comment).
 * A floor of 50 keeps even a thoroughly-failed case at ~5% of a winning
 * case's weight, so it still gets picked occasionally.  Without a floor,
 * a case that produced zero wins after many trials would asymptote to
 * weight 0 and never be retried — and kernel state changes underneath
 * us, so a previously-dead case can become productive later.
 */
#define MUT_WEIGHT_FLOOR 50

/*
 * Pick a mutator case 0..MUT_NUM_OPS-1 weighted by historical productivity.
 *
 * Each case's weight is the Beta(1,1)-prior posterior mean of its success
 * rate, scaled to [0, 1000]:
 *
 *     w[op] = max(MUT_WEIGHT_FLOOR, (wins[op] + 1) * 1000 / (trials[op] + 2))
 *
 * Why this formula:
 *
 *  - The Beta(1,1) prior (uniform) gives every case w=500 on cold start
 *    when trials=wins=0, so we degrade gracefully to uniform random pick
 *    until evidence accumulates.  No special-casing for the empty-stats
 *    state, no warm-up phase to misconfigure.
 *
 *  - Add-one (Laplace) smoothing in the numerator and add-two in the
 *    denominator keep the formula well-defined at trials=0 and prevent a
 *    single early success from pinning a case to weight 1000.  It's the
 *    closed-form posterior mean of a Beta-binomial, not an ad-hoc fudge.
 *
 *  - We use the posterior MEAN rather than full Thompson sampling
 *    (Beta-distribution sampling).  Thompson would also work and be
 *    technically more exploration-aware, but it requires a Gamma
 *    sampler in libc that doesn't exist; the floor + uniform-prior
 *    combination here gives most of the same exploration benefit with
 *    a few lines of integer arithmetic.
 *
 *  - The floor is on the absolute weight, not on relative pick probability.
 *    With six cases and one heavily winning, the floored cases share the
 *    remaining mass — never starved, never dominant.
 *
 * Called once per primitive mutation (not once per syscall): a 4-deep
 * stack consults the scheduler four times.  All loads are __atomic
 * RELAXED — slightly stale fleet-wide counts are fine, the scheduler
 * is statistical not exact.
 */
static unsigned int weighted_pick_case(enum argtype atype)
{
	unsigned int weights[MUT_NUM_OPS];
	unsigned int total = 0;
	unsigned int r, accum, i;

	for (i = 0; i < MUT_NUM_OPS; i++) {
		unsigned long t = __atomic_load_n(&minicorpus_shm->mut_trials[i],
						  __ATOMIC_RELAXED);
		unsigned long s = __atomic_load_n(&minicorpus_shm->mut_wins[i],
						  __ATOMIC_RELAXED);
		unsigned long w = ((s + 1) * 1000UL) / (t + 2UL);

		if (w < MUT_WEIGHT_FLOOR)
			w = MUT_WEIGHT_FLOOR;
		weights[i] = (unsigned int)w;
		total += weights[i];
	}

	/* Case 8 (fd-swap) only does anything useful on fd-typed slots —
	 * pulling a random pool fd into a non-fd arg would just look like
	 * a small-integer noise mutation.  Zero its weight for non-fd args
	 * so the scheduler doesn't waste pick budget on it (and so its
	 * trials/wins ratio stays a meaningful signal of fd-swap value). */
	if (!is_fdarg(atype)) {
		total -= weights[8];
		weights[8] = 0;
	}

	r = rnd_modulo_u32(total);
	accum = 0;
	for (i = 0; i < MUT_NUM_OPS; i++) {
		accum += weights[i];
		if (r < accum)
			return i;
	}
	return MUT_NUM_OPS - 1;
}

/*
 * SHADOW eligibility predicate for the Phase C.3 structure-aware arm
 * picker.  True iff the slot's argtype + arg_param payload would have
 * let try_structured_mutation() fire a type-aware variant for at least
 * one op -- i.e. the same gates that branch already enforces inline.
 * Kept here rather than reaching into try_structured_mutation() so the
 * shadow path can reject degenerate metadata (empty arglist, inverted
 * range) at the same coarse granularity the unstructured fallback
 * currently bypasses it at.
 */
static bool slot_is_structured(enum argtype atype,
		const struct arg_param *params)
{
	if (params == NULL)
		return false;

	switch (atype) {
	case ARG_LIST:
	case ARG_OP:
		return params->list.num != 0 && params->list.values != NULL;
	case ARG_RANGE:
		return params->range.hi >= params->range.low;
	default:
		return false;
	}
}

/*
 * Shadow variant of weighted_pick_case() that adds the existing
 * mut_structured_trials / mut_structured_wins per-op stats as a second
 * Beta arm alongside the live mut_trials / mut_wins arm and draws from
 * the doubled 2 * MUT_NUM_OPS pool.  The op index returned is the
 * arm's op (arm mod MUT_NUM_OPS); the caller treats arms 0..N-1 and
 * N..2N-1 as the same op for divergence accounting, because the live
 * picker only ever returns an op index.  Caller MUST have already
 * confirmed slot_is_structured() -- otherwise the structured half is
 * meaningless and would just double-count the unstructured arm.
 *
 * Uses a fresh rnd_modulo_u32() draw rather than re-using the live
 * picker's r: the doubled-pool total differs from the live total, so
 * the live r does not map onto the same arm interval.  Burns one
 * additional RNG step per shadow sample, which is negligible against
 * the per-call cost.
 *
 * The same fd-only zeroing applied to op 8 in the live picker is
 * applied to both arm copies of op 8 here, so a non-fd structured slot
 * cannot accidentally make the fd-swap op weight non-zero just because
 * the structured arm exists.
 */
static unsigned int weighted_pick_case_shadow_structured(enum argtype atype)
{
	unsigned int weights[2 * MUT_NUM_OPS];
	unsigned int total = 0;
	unsigned int r, accum, i;

	for (i = 0; i < MUT_NUM_OPS; i++) {
		unsigned long t = __atomic_load_n(&minicorpus_shm->mut_trials[i],
						  __ATOMIC_RELAXED);
		unsigned long s = __atomic_load_n(&minicorpus_shm->mut_wins[i],
						  __ATOMIC_RELAXED);
		unsigned long w = ((s + 1) * 1000UL) / (t + 2UL);

		if (w < MUT_WEIGHT_FLOOR)
			w = MUT_WEIGHT_FLOOR;
		weights[i] = (unsigned int)w;
	}
	for (i = 0; i < MUT_NUM_OPS; i++) {
		unsigned long t = __atomic_load_n(
			&minicorpus_shm->mut_structured_trials[i],
			__ATOMIC_RELAXED);
		unsigned long s = __atomic_load_n(
			&minicorpus_shm->mut_structured_wins[i],
			__ATOMIC_RELAXED);
		unsigned long w = ((s + 1) * 1000UL) / (t + 2UL);

		if (w < MUT_WEIGHT_FLOOR)
			w = MUT_WEIGHT_FLOOR;
		weights[MUT_NUM_OPS + i] = (unsigned int)w;
	}

	if (!is_fdarg(atype)) {
		weights[8] = 0;
		weights[MUT_NUM_OPS + 8] = 0;
	}

	for (i = 0; i < 2 * MUT_NUM_OPS; i++)
		total += weights[i];

	r = rnd_modulo_u32(total);
	accum = 0;
	for (i = 0; i < 2 * MUT_NUM_OPS; i++) {
		accum += weights[i];
		if (r < accum)
			return i % MUT_NUM_OPS;
	}
	return MUT_NUM_OPS - 1;
}

void minicorpus_mut_attrib_set_cmp_source(void)
{
	this_attrib_cmp_source = true;
}

void minicorpus_struct_field_attrib(enum field_tag tag)
{
	this_struct_field_tag = tag;
	this_struct_field_set = true;
}

void minicorpus_mut_attrib_commit(bool found_new)
{
	unsigned int i;

	/* Clear the per-child replay-provenance flag unconditionally,
	 * regardless of whether the call had a tracked corpus source.  The
	 * flag is set inside minicorpus_replay() right after the snapshot
	 * picks an entry tagged rq_sourced, and consumed by
	 * frontier_record_new_edge() during the call's kcov pass which has
	 * already completed by the time we get here.  Clearing here keeps
	 * the next iteration's frontier_record_new_edge from mis-crediting
	 * its PC win to a stale source -- whether the next call is a
	 * non-replay (fresh args) or a replay of a non-rq-sourced entry. */
	{
		struct childdata *cc = this_child();

		if (cc != NULL) {
			cc->replay_rq_sourced = false;
			cc->replay_errno_sourced = false;
		}
	}

	if (minicorpus_shm == NULL) {
		/* Still clear the per-process tag so a future shm-armed
		 * commit() doesn't see stale state from before init. */
		this_attrib_cmp_source = false;
		this_replay_source_tracked = false;
		this_struct_field_set = false;
		for (i = 0; i < MUT_NUM_OPS; i++)
			mut_structured_attrib[i] = 0;
		return;
	}

	/* Per-op mutator accounting is gated on having a tracked source
	 * corpus entry (i.e., the call came from minicorpus_replay, not
	 * chain-replay).  Chain-replay shares the same mutator engine but
	 * has no per-entry baseline to subtract intrinsic novelty against,
	 * so feeding its events into mut_trials[]/mut_wins[] would re-
	 * introduce the corpus-marginal-novelty signal that the per-entry
	 * baseline exists to filter out.  Clear the stash unconditionally
	 * so the next call starts clean regardless of whether we credited.
	 *
	 * Per-op granularity: bump trials/wins by ONE per call per op that
	 * participated (mut_attrib[op] > 0), not by the raw pick count.
	 * The old per-pick crediting inflated every call's win signal by
	 * its stack depth, masking real op-quality differences under the
	 * common per-call novelty rate (the uniform ~0.07% pathology).
	 *
	 * Per-entry baseline gate: even on a tracked-source call,
	 * mut_wins[] is only bumped if the source entry has produced novel
	 * coverage in a previous replay (novel_replay_hits > 0).  The first
	 * productive replay of an entry establishes the baseline -- those
	 * edges are the entry's intrinsic value, not the mutator's -- and
	 * is counted as a trial but not a win.  Subsequent productive
	 * replays cross the baseline and are credited to the mutator.
	 */
	if (this_replay_source_tracked) {
		struct corpus_entry *src_entry = NULL;
		bool baseline_established = false;

		if (this_replay_source_nr < MAX_NR_SYSCALL &&
		    this_replay_source_slot < CORPUS_RING_SIZE) {
			src_entry = &minicorpus_shm->rings[this_replay_source_nr]
				    .entries[this_replay_source_slot];
			baseline_established =
				__atomic_load_n(&src_entry->novel_replay_hits,
						__ATOMIC_RELAXED) > 0;
		}

		for (i = 0; i < MUT_NUM_OPS; i++) {
			if (mut_attrib[i] != 0) {
				__atomic_fetch_add(&minicorpus_shm->mut_trials[i],
						   1UL, __ATOMIC_RELAXED);
				if (found_new && baseline_established)
					__atomic_fetch_add(&minicorpus_shm->mut_wins[i],
							   1UL, __ATOMIC_RELAXED);
				mut_attrib[i] = 0;
			}

			/* Structured-firing accounting lives on a parallel
			 * stash because a single call may pick op `i` more
			 * than once with only some of those picks landing on
			 * a structured-typed slot.  Bumped per-call (not
			 * per-pick) and gated by the same baseline rule as
			 * the unstructured pair so the two ratios stay
			 * apples-to-apples. */
			if (mut_structured_attrib[i] != 0) {
				__atomic_fetch_add(&minicorpus_shm->mut_structured_trials[i],
						   1UL, __ATOMIC_RELAXED);
				if (found_new && baseline_established)
					__atomic_fetch_add(&minicorpus_shm->mut_structured_wins[i],
							   1UL, __ATOMIC_RELAXED);
				mut_structured_attrib[i] = 0;
			}
		}

		/* Advance the source entry's baseline if this replay was
		 * productive.  Bump unconditionally on found_new -- baseline
		 * tracking is independent of whether wins were credited this
		 * call (the first productive replay bumps to 1 without
		 * crediting, unlocking subsequent calls).  Tolerates a slot
		 * eviction race: a sibling entry's baseline gets advanced
		 * instead, which is the same benign mis-attribution shape as
		 * the gate read above. */
		if (found_new && src_entry != NULL)
			__atomic_fetch_add(&src_entry->novel_replay_hits,
					   1U, __ATOMIC_RELAXED);

		/* Replay-wins-by-entry-age.  Same
		 * found_new gate the baseline advance above uses --
		 * "productive replay of a tracked source" — but
		 * unconditional on baseline_established because the
		 * histogram measures *coverage discovery* per age
		 * bucket regardless of whether the discovery is the
		 * entry's intrinsic novelty or a mutator credit.
		 * Bucket index = floor(log2(age)) + 1 with age==0
		 * landing in bucket 0; saturates at the last bucket
		 * so any age the ring can hold lands in a defined
		 * slot. */
		if (found_new) {
			unsigned int age = this_replay_source_age;
			unsigned int bucket;

			if (age == 0)
				bucket = 0;
			else {
				unsigned int lz = (unsigned int)__builtin_clz(age);
				unsigned int hi_bit = 31u - lz;

				bucket = hi_bit + 1u;
				if (bucket >= ARRAY_SIZE(minicorpus_shm->replay_wins_by_age))
					bucket = ARRAY_SIZE(minicorpus_shm->replay_wins_by_age) - 1u;
			}
			__atomic_fetch_add(
				&minicorpus_shm->replay_wins_by_age[bucket],
				1UL, __ATOMIC_RELAXED);
		}

		this_replay_source_tracked = false;
	} else {
		/* Untracked source (chain-replay or other non-minicorpus
		 * caller).  Clear both stashes without recording per-op
		 * events -- the bandit signal (and the structured-firing
		 * companion) stays exclusively per-syscall-replay. */
		for (i = 0; i < MUT_NUM_OPS; i++) {
			mut_attrib[i] = 0;
			mut_structured_attrib[i] = 0;
		}
	}

	if (this_replay_ran) {
		if (found_new)
			__atomic_fetch_add(&minicorpus_shm->replay_wins,
					   1UL, __ATOMIC_RELAXED);
		this_replay_ran = false;
	}

	if (this_replay_spliced) {
		if (found_new)
			__atomic_fetch_add(&minicorpus_shm->splice_wins,
					   1UL, __ATOMIC_RELAXED);
		this_replay_spliced = false;
	}

	if (this_replay_xprop) {
		if (found_new)
			__atomic_fetch_add(&minicorpus_shm->xprop_wins,
					   1UL, __ATOMIC_RELAXED);
		this_replay_xprop = false;
	}

	/* CMP-source wins counter.  Bumped at most once per commit so its
	 * units match "calls credited as CMP-source wins" not "per-arg
	 * mutator picks" -- the latter is already covered by mut_wins[]
	 * which the bandit-weighting math consumes unchanged.  Cleared
	 * unconditionally so a stale flag from a found_new=false call
	 * doesn't leak into the next call's attribution. */
	if (this_attrib_cmp_source) {
		if (found_new)
			__atomic_fetch_add(
				&minicorpus_shm->mut_attrib_cmp_wins,
				1UL, __ATOMIC_RELAXED);
		this_attrib_cmp_source = false;
	}

	/*
	 * Per-tag attribution for the C.2b post-fill struct-field mutator.
	 * Exactly one tag per call by construction (struct_field_mutate_one
	 * picks at most one field per invocation), so the stash is a simple
	 * (set, tag) pair.  Trials bump unconditionally on a set stash --
	 * the stash being set IS the "we did a mutation" signal -- and wins
	 * bump only on found_new.  Independent of the per-syscall-replay
	 * baseline gate used by the MUT_NUM_OPS counters above: post-fill
	 * mutation runs on fresh-fill calls, never on replay, so there is
	 * no per-entry baseline to subtract.  Out-of-range tag bytes are