@@ -14,7 +14,8 @@ use std::sync::{Arc, Barrier, Mutex};
1414use std:: time:: Duration ;
1515
1616use fbuild_build:: compile_many:: {
17- compile_many_with, CompileManyRequest , SketchBuildInputs , SketchBuilder , SketchResult , Stage ,
17+ compile_many_with, stage2_jobs_per_worker, CompileManyRequest , SketchBuildInputs ,
18+ SketchBuilder , SketchResult , Stage ,
1819} ;
1920use fbuild_core:: BuildProfile ;
2021
@@ -200,12 +201,16 @@ fn stage1_runs_exactly_once_and_stage2_handles_the_rest() {
200201 "sketch stage called once per remaining sketch"
201202 ) ;
202203
203- // Stage-1 honors framework_jobs; stage-2 always passes jobs=1.
204+ // Stage-1 honors framework_jobs; stage-2 derives per-worker jobs
205+ // from the host's core budget split across `sketch_jobs` workers
206+ // (#335). With `sketch_jobs=4` here, every stage-2 worker must see
207+ // the same `stage2_jobs_per_worker(4)` value the dispatcher passed.
204208 assert_eq ! ( stage1[ 0 ] . 3 , 1 , "framework_jobs=1 forwarded to stage 1" ) ;
209+ let expected_stage2_jobs = stage2_jobs_per_worker ( 4 ) ;
205210 for ( _, _, _, jobs) in & stage2 {
206211 assert_eq ! (
207- * jobs, 1 ,
208- "stage 2 workers always invoke orchestrator with jobs=1 "
212+ * jobs, expected_stage2_jobs ,
213+ "stage- 2 workers must receive jobs=stage2_jobs_per_worker(sketch_jobs) "
209214 ) ;
210215 }
211216
@@ -343,3 +348,75 @@ fn single_sketch_runs_only_stage1() {
343348 assert_eq ! ( result. stage2_count, 0 ) ;
344349 assert_eq ! ( result. results[ 0 ] . stage, Stage :: Stage1Framework ) ;
345350}
351+
352+ /// AC: stage-2 workers must keep total in-flight compile slots at roughly
353+ /// the host's core count — never <1, never silently >cores. The previous
354+ /// `jobs=1` hardcoding (FastLED/fbuild#335) capped each worker to a single
355+ /// compile thread even when the orchestrator was re-building the framework
356+ /// inside the worker, producing a ~2x slowdown vs. a single `fbuild build`
357+ /// on a 16-core host. This locks the new core-split behavior so a future
358+ /// regression to `jobs=1` (or to an oversubscribing default like
359+ /// `cores` per worker) gets caught at unit-test speed instead of in CI.
360+ #[ test]
361+ fn stage2_jobs_per_worker_splits_cores_across_workers ( ) {
362+ let cores = std:: thread:: available_parallelism ( )
363+ . map ( |n| n. get ( ) )
364+ . unwrap_or ( 1 )
365+ . max ( 1 ) ;
366+
367+ // Always >= 1, never zero — the hot path multiplies this in, so a
368+ // bug that produced 0 would silently turn every stage-2 build into a
369+ // no-op.
370+ for sketch_jobs in [ 1usize , 2 , 3 , 4 , 8 , 16 , 32 ] {
371+ let per = stage2_jobs_per_worker ( sketch_jobs) ;
372+ assert ! (
373+ per >= 1 ,
374+ "stage2_jobs_per_worker({sketch_jobs}) returned {per}, expected >= 1"
375+ ) ;
376+ }
377+
378+ // sketch_jobs=1 should grant the worker the whole core budget so
379+ // serial-batch invocations don't regress vs `fbuild build` (which
380+ // uses the same effective parallelism). On a single-core box this
381+ // still resolves to 1 — `available_parallelism()` is the floor.
382+ let lone_worker = stage2_jobs_per_worker ( 1 ) ;
383+ assert_eq ! (
384+ lone_worker,
385+ cores. max( 1 ) ,
386+ "with one stage-2 worker the worker should own the full core budget"
387+ ) ;
388+
389+ // Sum across all stage-2 workers must stay within the host's cores
390+ // — the whole point of splitting is to avoid oversubscription. We
391+ // tolerate a small undershoot when cores doesn't divide sketch_jobs
392+ // evenly (each worker still gets a floor of 1).
393+ for sketch_jobs in [ 2usize , 4 , 8 ] {
394+ let per = stage2_jobs_per_worker ( sketch_jobs) ;
395+ let total = per * sketch_jobs;
396+ // Floor of 1 per worker means total can exceed cores when
397+ // sketch_jobs > cores; bound only when sketch_jobs <= cores.
398+ if sketch_jobs <= cores {
399+ assert ! (
400+ total <= cores,
401+ "{sketch_jobs} workers × {per} jobs = {total} exceeds {cores} cores"
402+ ) ;
403+ }
404+ }
405+
406+ // sketch_jobs > cores ⇒ each worker still gets a floor of 1 — better
407+ // for the worker to spawn no extra parallelism than to stall at 0.
408+ let many = stage2_jobs_per_worker ( cores * 8 ) ;
409+ assert_eq ! (
410+ many, 1 ,
411+ "oversubscribed sketch_jobs should clamp per-worker jobs to the floor of 1"
412+ ) ;
413+
414+ // sketch_jobs=0 must not divide-by-zero — the public API takes
415+ // Option<usize> but the helper takes usize, and callers in the wild
416+ // can hand us a literal 0 (rounding, off-by-one). Treat 0 as 1.
417+ assert_eq ! (
418+ stage2_jobs_per_worker( 0 ) ,
419+ cores. max( 1 ) ,
420+ "stage2_jobs_per_worker(0) must not panic and should treat 0 as 1"
421+ ) ;
422+ }
0 commit comments