@@ -49,6 +49,37 @@ def _available_cpus_for_current_run() -> int:
4949 return max (os .cpu_count () or 1 , 1 )
5050
5151
52+ def _infer_local_process_count (
53+ * ,
54+ requested_num_gpus : int ,
55+ available_gpus : int ,
56+ ) -> int :
57+ """
58+ Estimate how many trainer processes will run on this node for a config section.
59+
60+ In this codebase, when running under Slurm with ``SLURM_NTASKS=1`` and
61+ ``num_gpus > 1``, Lightning uses local multi-GPU spawn (one process per GPU).
62+ For externally launched distributed jobs (``SLURM_NTASKS>1``), each task
63+ should use its own worker budget, so we keep process count at 1 here.
64+ """
65+ slurm_ntasks = os .environ .get ("SLURM_NTASKS" , "1" )
66+ try :
67+ slurm_ntasks_int = int (slurm_ntasks )
68+ except ValueError :
69+ slurm_ntasks_int = 1
70+
71+ resolved_num_gpus = requested_num_gpus
72+ if requested_num_gpus == - 1 :
73+ resolved_num_gpus = available_gpus
74+
75+ # CPU-only / single-GPU / externally launched distributed: no local spawn fan-out.
76+ if resolved_num_gpus <= 1 or slurm_ntasks_int != 1 :
77+ return 1
78+
79+ # Local spawn fan-out: one process per GPU.
80+ return int (resolved_num_gpus )
81+
82+
5283def resolve_runtime_resource_sentinels (
5384 config : DictConfig ,
5485 print_results : bool = True ,
@@ -83,11 +114,16 @@ def resolve_runtime_resource_sentinels(
83114 )
84115
85116 if getattr (section , "num_workers" , None ) == - 1 :
86- section .num_workers = available_cpus
117+ process_count = _infer_local_process_count (
118+ requested_num_gpus = getattr (section , "num_gpus" , 0 ),
119+ available_gpus = available_gpus ,
120+ )
121+ section .num_workers = max (1 , available_cpus // process_count )
87122 if print_results :
88123 print (
89124 f"🔧 Auto-detected system.{ section_name } .num_workers: "
90- f"-1 → { section .num_workers } "
125+ f"-1 → { section .num_workers } "
126+ f"(available_cpus={ available_cpus } , local_processes={ process_count } )"
91127 )
92128
93129 if getattr (section , "num_gpus" , 0 ) < - 1 :
0 commit comments