@@ -147,6 +147,23 @@ def prepare_trainer_input_single(
147147 Path (stat_file_path_single ).mkdir ()
148148 stat_file_path_single = DPPath (stat_file_path_single , "a" )
149149
150+ rank_seed = [rank , seed % (2 ** 32 )] if seed is not None else None
151+
152+ def _make_dp_loader_set (
153+ systems : str | list [str ],
154+ dataset_params : dict [str , Any ],
155+ ) -> DpLoaderSet :
156+ """Create a DpLoaderSet from systems with pattern expansion."""
157+ patterns = dataset_params .get ("rglob_patterns" , None )
158+ systems = process_systems (systems , patterns = patterns )
159+ return DpLoaderSet (
160+ systems ,
161+ dataset_params ["batch_size" ],
162+ model_params_single ["type_map" ],
163+ seed = rank_seed ,
164+ modifier = modifier ,
165+ )
166+
150167 # LMDB path: single string → LmdbDataset
151168 if is_lmdb (training_systems ):
152169 auto_prob = training_dataset_params .get ("auto_prob" , None )
@@ -163,46 +180,21 @@ def prepare_trainer_input_single(
163180 validation_dataset_params ["batch_size" ],
164181 )
165182 elif validation_systems is not None :
166- val_patterns = validation_dataset_params .get ("rglob_patterns" , None )
167- validation_systems = process_systems (validation_systems , val_patterns )
168- rank_seed = [rank , seed % (2 ** 32 )] if seed is not None else None
169- validation_data_single = DpLoaderSet (
170- validation_systems ,
171- validation_dataset_params ["batch_size" ],
172- model_params_single ["type_map" ],
173- seed = rank_seed ,
174- modifier = modifier ,
183+ validation_data_single = _make_dp_loader_set (
184+ validation_systems , validation_dataset_params
175185 )
176186 else :
177187 validation_data_single = None
178188 else :
179189 # Standard npy path
180- trn_patterns = training_dataset_params .get ("rglob_patterns" , None )
181- training_systems = process_systems (training_systems , patterns = trn_patterns )
182- if validation_systems is not None :
183- val_patterns = validation_dataset_params .get ("rglob_patterns" , None )
184- validation_systems = process_systems (validation_systems , val_patterns )
185-
186- # avoid the same batch sequence among devices
187- rank_seed = [rank , seed % (2 ** 32 )] if seed is not None else None
190+ train_data_single = _make_dp_loader_set (
191+ training_systems , training_dataset_params
192+ )
188193 validation_data_single = (
189- DpLoaderSet (
190- validation_systems ,
191- validation_dataset_params ["batch_size" ],
192- model_params_single ["type_map" ],
193- seed = rank_seed ,
194- modifier = modifier ,
195- )
194+ _make_dp_loader_set (validation_systems , validation_dataset_params )
196195 if validation_systems
197196 else None
198197 )
199- train_data_single = DpLoaderSet (
200- training_systems ,
201- training_dataset_params ["batch_size" ],
202- model_params_single ["type_map" ],
203- seed = rank_seed ,
204- modifier = modifier ,
205- )
206198 return (
207199 train_data_single ,
208200 validation_data_single ,
0 commit comments