Skip to content

Commit c23ea84

Browse files
cailmdaleyclaude
andcommitted
refactor(run): replace -e/--exclusive flag with NUMBER_LIST (#746)
The exclusive input-ID flag and the NUMBER_LIST config option converged in FileHandler._format_process_list and did the same thing for a single ID; NUMBER_LIST is now the one mechanism. Pipeline: - remove -e/--exclusive from args.py and its plumbing through run.py, FileHandler, and JobHandler (where it was stored but never used) - NUMBER_LIST entries are now validated against the input file numbers found on disk, preserving -e's early failure on a wrong ID: the run aborts at start-up instead of when a module first opens files - unit tests for the validation (subset passes, typo raises, no-list scan path unchanged) Canfar chain (script-level -e options are unchanged; one ID per headless job remains the interface): - job_sp_canfar.bash, job_sp_canfar_v2.0.bash, and init_run_exclusive_canfar.sh write NUMBER_LIST into a per-job config copy (set_config_number_list: insert-or-replace under [FILE], ID in numbering-scheme form: leading dash, dots->dashes) instead of passing -e to shapepipe_run. Side benefit: the processed ID is recorded in the config copied to the run's log directory. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
1 parent 6b30fd4 commit c23ea84

9 files changed

Lines changed: 173 additions & 55 deletions

File tree

docs/source/configuration.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,10 @@ The following options can be added to the `[FILE]` section of the config file
8484
(*e.g.* `.`, `-`, `:`, *etc.*). *optional*ly a regular expression can also be
8585
passed if it is preceded by `RE:` (*e.g.* `RE:-\d{9}`).
8686
- `NUMBER_LIST` : (`str` or `list`, *optional*) A list of number strings
87-
matching the numbering scheme or a file name.
87+
matching the numbering scheme or a file name. Restricts the run to these
88+
numbers; every entry must match an input file found on disk, otherwise the
89+
run fails at start-up. This is also how a single image is processed per
90+
job (formerly the `-e`/`--exclusive` command-line flag).
8891
- `CORRECT_FILE_PATTERN` : (`bool`, *optional*) Option to allow substring file
8992
patterns. Default value is `True`.
9093

scripts/sh/init_run_exclusive_canfar.sh

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,25 @@ function message() {
144144
fi
145145
}
146146

147+
# Write an updated copy of a shapepipe config with NUMBER_LIST set to the
148+
# given image ID, expressed in the numbering scheme (leading dash, dots ->
149+
# dashes). Replaces the retired shapepipe_run -e/--exclusive flag (#746).
150+
function set_config_number_list() {
151+
local config_orig=$1
152+
local config_upd=$2
153+
local _id=$3
154+
155+
local number="-$(echo $_id | tr '.' '-')"
156+
local config_tmp="${config_upd}.tmp"
157+
158+
if grep -q "^NUMBER_LIST" "$config_orig"; then
159+
perl -pe 's/^NUMBER_LIST\s*=.*/NUMBER_LIST = '$number'/' "$config_orig" > "$config_tmp"
160+
else
161+
perl -pe 's/^\[FILE\][ \t]*$/[FILE]\nNUMBER_LIST = '$number'/' "$config_orig" > "$config_tmp"
162+
fi
163+
mv "$config_tmp" "$config_upd"
164+
}
165+
147166

148167
# Init message
149168
message "test=$test_only" $debug_out -1
@@ -267,7 +286,8 @@ if [ "$fix" == "1" ]; then
267286
message "Unzip weight ($dry_run)" $debug_out -1
268287
command "cd tile_runs/$ID" $dry_run
269288
export SP_RUN=`pwd`
270-
command "shapepipe_run -c cfis/config_tile_Uz.ini -e $ID" $dry_run
289+
command "set_config_number_list cfis/config_tile_Uz.ini config_tile_Uz_upd.ini $ID" $dry_run
290+
command "shapepipe_run -c config_tile_Uz_upd.ini" $dry_run
271291

272292
cd $dir
273293
else
@@ -384,7 +404,8 @@ if [ $do_job != 0 ] && [ "$sp_local" == "1" ]; then
384404
fi
385405
command "update_runs_log_file.py" $dry_run
386406
export SP_RUN=`pwd`
387-
command "shapepipe_run -c cfis/config_exp_Sp.ini -e $exp_ID" $dry_run
407+
command "set_config_number_list cfis/config_exp_Sp.ini config_exp_Sp_upd.ini $exp_ID" $dry_run
408+
command "shapepipe_run -c config_exp_Sp_upd.ini" $dry_run
388409

389410
# Only keep CCD of this ID
390411
command "mkdir -p output/run_sp_exp_Sp_shdu/split_exp_runner/output" $dry_run

scripts/sh/job_sp_canfar.bash

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -261,18 +261,19 @@ function command_sp() {
261261
function command_cfg_shapepipe() {
262262
local config_name=$1
263263
local str=$2
264-
local _n_smp=$3
264+
local _n_smp=$3
265265
local _exclusive=$4
266266

267-
if [ "$exclusive" != "" ]; then
268-
exclusive_flag="-e $_exclusive"
269-
else
270-
exclusive_flag=""
267+
config_upd=$(set_config_n_smp $config_name $_n_smp)
268+
269+
# Run a single image ID via NUMBER_LIST in an updated config copy;
270+
# replaces the retired shapepipe_run -e/--exclusive flag (#746)
271+
if [ "$_exclusive" != "" ]; then
272+
set_config_number_list "$config_upd" "$SP_CONFIG_MOD/$config_name" "$_exclusive"
273+
config_upd="$SP_CONFIG_MOD/$config_name"
271274
fi
272275

273-
config_upd=$(set_config_n_smp $config_name $_n_smp)
274-
#local cmd="/arc/home/kilbinger/.conda/envs/shapepipe/bin/shapepipe_run -c $config_upd $exclusive_flag"
275-
local cmd="shapepipe_run -c $config_upd $exclusive_flag"
276+
local cmd="shapepipe_run -c $config_upd"
276277
command_sp "$cmd" "$str"
277278
}
278279

@@ -337,6 +338,25 @@ function update_config() {
337338
| perl -ane 's/'$key'\s+=.+/'$key' = '$val_upd'/; print' > $config_upd
338339
}
339340

341+
# Write an updated copy of a shapepipe config with NUMBER_LIST set to the
342+
# given image ID, expressed in the numbering scheme (leading dash, dots ->
343+
# dashes). Replaces the retired shapepipe_run -e/--exclusive flag (#746).
344+
function set_config_number_list() {
345+
local config_orig=$1
346+
local config_upd=$2
347+
local _id=$3
348+
349+
local number="-$(echo $_id | tr '.' '-')"
350+
local config_tmp="${config_upd}.tmp"
351+
352+
if grep -q "^NUMBER_LIST" "$config_orig"; then
353+
perl -pe 's/^NUMBER_LIST\s*=.*/NUMBER_LIST = '$number'/' "$config_orig" > "$config_tmp"
354+
else
355+
perl -pe 's/^\[FILE\][ \t]*$/[FILE]\nNUMBER_LIST = '$number'/' "$config_orig" > "$config_tmp"
356+
fi
357+
mv "$config_tmp" "$config_upd"
358+
}
359+
340360
### Start ###
341361

342362
echo "Start processing"

scripts/sh/job_sp_canfar_v2.0.bash

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,9 @@ export SP_RUN=`pwd`
165165
# Config file path
166166
export SP_CONFIG=$SP_RUN/cfis
167167

168+
# Path for updated (per-job) config file copies
169+
export SP_CONFIG_MOD=$SP_RUN/cfis_mod
170+
168171
# Root directory for per-exposure work directories.
169172
# Set SP_EXP in the environment to override; otherwise falls back to the
170173
# conventional layout (SP_RUN = .../v2.0/tiles/IDra/ID, three levels up + exp).
@@ -243,27 +246,48 @@ function command () {
243246
fi
244247
}
245248

249+
# Write an updated copy of a shapepipe config with NUMBER_LIST set to the
250+
# given image ID, expressed in the numbering scheme (leading dash, dots ->
251+
# dashes). Replaces the retired shapepipe_run -e/--exclusive flag (#746).
252+
function set_config_number_list() {
253+
local config_orig=$1
254+
local config_upd=$2
255+
local _id=$3
256+
257+
local number="-$(echo $_id | tr '.' '-')"
258+
local config_tmp="${config_upd}.tmp"
259+
260+
if grep -q "^NUMBER_LIST" "$config_orig"; then
261+
perl -pe 's/^NUMBER_LIST\s*=.*/NUMBER_LIST = '$number'/' "$config_orig" > "$config_tmp"
262+
else
263+
perl -pe 's/^\[FILE\][ \t]*$/[FILE]\nNUMBER_LIST = '$number'/' "$config_orig" > "$config_tmp"
264+
fi
265+
mv "$config_tmp" "$config_upd"
266+
}
267+
246268
# Set up config file and call shapepipe_run.
247-
# Batch size is passed via --batch_size flag; no config editing needed.
269+
# Batch size is passed via --batch_size flag.
248270
function command_cfg_shapepipe() {
249271
local config_name=$1
250272
local str=$2
251273
local _n_smp=$3
252274
local _exclusive=$4
253275

254-
if [ "$exclusive" != "" ]; then
255-
exclusive_flag="-e $_exclusive"
256-
else
257-
exclusive_flag=""
276+
local config="$SP_CONFIG/$config_name"
277+
278+
# Run a single image ID via NUMBER_LIST in an updated config copy;
279+
# replaces the retired shapepipe_run -e/--exclusive flag (#746)
280+
if [ "$_exclusive" != "" ]; then
281+
set_config_number_list "$config" "$SP_CONFIG_MOD/$config_name" "$_exclusive"
282+
config="$SP_CONFIG_MOD/$config_name"
258283
fi
259284

260285
local batch_flag=""
261286
if [[ $_n_smp != -1 ]]; then
262287
batch_flag="--batch_size $_n_smp"
263288
fi
264289

265-
local config="$SP_CONFIG/$config_name"
266-
local cmd="shapepipe_run.py -c $config $exclusive_flag $batch_flag"
290+
local cmd="shapepipe_run.py -c $config $batch_flag"
267291
command "$cmd" "$str"
268292
}
269293

@@ -275,6 +299,7 @@ echo "Start processing"
275299
mkdir -p $SP_RUN
276300
cd $SP_RUN
277301
mkdir -p $OUTPUT
302+
mkdir -p $SP_CONFIG_MOD
278303

279304
# Processing
280305

src/shapepipe/pipeline/args.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -135,12 +135,6 @@ def create_arg_parser():
135135
help="configuration file name",
136136
)
137137

138-
optional.add_argument(
139-
"-e",
140-
"--exclusive",
141-
help="exclusive input file number string",
142-
)
143-
144138
optional.add_argument(
145139
"-b",
146140
"--batch_size",

src/shapepipe/pipeline/file_handler.py

Lines changed: 15 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,12 @@ class FileHandler(object):
3232
List of modules to be run
3333
config : CustomParser
3434
Configuaration parser instance
35-
exclusive : str, optional
36-
Run this file number string exclusively if given, the default is None
3735
verbose : bool, optional
3836
Verbose setting, default is True
3937
4038
"""
4139

42-
def __init__(self, run_name, modules, config, exclusive=None, verbose=True):
40+
def __init__(self, run_name, modules, config, verbose=True):
4341

4442
self._run_name = run_name
4543

@@ -48,7 +46,6 @@ def __init__(self, run_name, modules, config, exclusive=None, verbose=True):
4846
raise ValueError("Invalid module list, check for a trailing comma")
4947

5048
self._config = config
51-
self._exclusive = exclusive
5249
self._verbose = verbose
5350

5451
self.module_runners = get_module_runners(self._module_list)
@@ -1089,7 +1086,20 @@ def _format_process_list(
10891086
if isinstance(self._number_list, type(None)):
10901087
number_list = np.load(memory_map, mmap_mode="r")
10911088
else:
1089+
# NUMBER_LIST comes from the config on faith; intersect it
1090+
# with the numbers actually found on disk so that a wrong ID
1091+
# fails here, at start-up, rather than when a module first
1092+
# tries to open the (non-existent) files (#746).
10921093
number_list = self._number_list
1094+
scanned = set(np.load(memory_map, mmap_mode="r"))
1095+
missing = [num for num in number_list if num not in scanned]
1096+
if missing:
1097+
raise ValueError(
1098+
f"No input file found matching NUMBER_LIST "
1099+
f"entr{'ies' if len(missing) > 1 else 'y'} "
1100+
f"{missing}; {len(scanned)} input file number(s) "
1101+
f"found on disk."
1102+
)
10931103

10941104
if len(number_list) == 0:
10951105
msg = "Empty number list"
@@ -1107,20 +1117,6 @@ def _format_process_list(
11071117
+ f'numbering scheme "{num_scheme}".'
11081118
)
11091119

1110-
# If "exclusive" options is set: discard all non-matching IDs
1111-
if self._exclusive is not None:
1112-
id_to_test = f"-{self._exclusive.replace('.', '-')}"
1113-
if number == id_to_test:
1114-
if self._verbose:
1115-
print(
1116-
f"-- Using exclusive number {self._exclusive} ({id_to_test})"
1117-
)
1118-
else:
1119-
if self._verbose:
1120-
# print(f"Skipping {number}, not equal to {self._exclusive} ({id_to_test})")
1121-
pass
1122-
continue
1123-
11241120
if run_method == "serial":
11251121
process_items = []
11261122
else:
@@ -1134,11 +1130,7 @@ def _format_process_list(
11341130
process_list.append(process_items)
11351131

11361132
if len(process_list) == 0:
1137-
msg = "Empty process list"
1138-
if self._exclusive is not None:
1139-
if len(number_list) > 0:
1140-
msg = f"{msg}. No input file found matching exclusive ID"
1141-
raise ValueError(msg)
1133+
raise ValueError("Empty process list")
11421134

11431135
return process_list
11441136

src/shapepipe/pipeline/job_handler.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,6 @@ class JobHandler(object):
4242
Joblib backend, the default is None (which corresponds to 'loky')
4343
timeout : int, optional
4444
Timeout limit for a given job in seconds, the default is None
45-
exclusive : str, optional
46-
Run this file number string exclusively if given, the default is None
4745
verbose : bool, optional
4846
Verbose setting, default is True
4947
@@ -60,7 +58,6 @@ def __init__(
6058
batch_size=None,
6159
backend=None,
6260
timeout=None,
63-
exclusive=None,
6461
verbose=True,
6562
):
6663

@@ -75,7 +72,6 @@ def __init__(
7572
self._module = module
7673
self._module_runner = self.filehd.module_runners[self._module]
7774
self.error_count = 0
78-
self.exclusive = exclusive
7975
self._verbose = verbose
8076

8177
# Add the job parameters to the log

src/shapepipe/run.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,11 @@ def set_up(self):
6868
self._set_run_name()
6969
self.modules = self.config.getlist("EXECUTION", "MODULE")
7070
self.mode = self.config.get("EXECUTION", "MODE").lower()
71-
self.exclusive = self._args.exclusive
7271
self.verbose = self.config.getboolean("DEFAULT", "VERBOSE")
7372
self.filehd = FileHandler(
7473
self._run_name,
7574
self.modules,
7675
self.config,
77-
exclusive=self._args.exclusive,
7876
verbose=self.verbose,
7977
)
8078
self.error_count = 0
@@ -355,7 +353,6 @@ def run_smp(pipe):
355353
config=pipe.config,
356354
log=pipe.log,
357355
job_type=pipe.run_method[module],
358-
exclusive=pipe.exclusive,
359356
verbose=pipe.verbose,
360357
batch_size=pipe._args.batch_size,
361358
)
@@ -415,7 +412,6 @@ def run_mpi(pipe, comm):
415412
log=pipe.log,
416413
job_type=pipe.run_method[module],
417414
parallel_mode="mpi",
418-
exclusive=pipe.exclusive,
419415
verbose=verbose,
420416
batch_size=pipe._args.batch_size,
421417
)

0 commit comments

Comments
 (0)