From c23ea8451af9af2f2c5cfc0d73d283365d3c2766 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Thu, 11 Jun 2026 22:52:11 +0200 Subject: [PATCH 1/2] refactor(run): replace -e/--exclusive flag with NUMBER_LIST (#746) The exclusive input-ID flag and the NUMBER_LIST config option converged in FileHandler._format_process_list and did the same thing for a single ID; NUMBER_LIST is now the one mechanism. Pipeline: - remove -e/--exclusive from args.py and its plumbing through run.py, FileHandler, and JobHandler (where it was stored but never used) - NUMBER_LIST entries are now validated against the input file numbers found on disk, preserving -e's early failure on a wrong ID: the run aborts at start-up instead of when a module first opens files - unit tests for the validation (subset passes, typo raises, no-list scan path unchanged) Canfar chain (script-level -e options are unchanged; one ID per headless job remains the interface): - job_sp_canfar.bash, job_sp_canfar_v2.0.bash, and init_run_exclusive_canfar.sh write NUMBER_LIST into a per-job config copy (set_config_number_list: insert-or-replace under [FILE], ID in numbering-scheme form: leading dash, dots->dashes) instead of passing -e to shapepipe_run. Side benefit: the processed ID is recorded in the config copied to the run's log directory. Co-Authored-By: Claude Fable 5 --- docs/source/configuration.md | 5 +- scripts/sh/init_run_exclusive_canfar.sh | 25 ++++++++- scripts/sh/job_sp_canfar.bash | 36 +++++++++--- scripts/sh/job_sp_canfar_v2.0.bash | 39 ++++++++++--- src/shapepipe/pipeline/args.py | 6 -- src/shapepipe/pipeline/file_handler.py | 38 +++++-------- src/shapepipe/pipeline/job_handler.py | 4 -- src/shapepipe/run.py | 4 -- src/shapepipe/tests/test_file_handler.py | 71 ++++++++++++++++++++++++ 9 files changed, 173 insertions(+), 55 deletions(-) create mode 100644 src/shapepipe/tests/test_file_handler.py diff --git a/docs/source/configuration.md b/docs/source/configuration.md index 3336123c1..42c3aeaa0 100644 --- a/docs/source/configuration.md +++ b/docs/source/configuration.md @@ -84,7 +84,10 @@ The following options can be added to the `[FILE]` section of the config file (*e.g.* `.`, `-`, `:`, *etc.*). *optional*ly a regular expression can also be passed if it is preceded by `RE:` (*e.g.* `RE:-\d{9}`). - `NUMBER_LIST` : (`str` or `list`, *optional*) A list of number strings - matching the numbering scheme or a file name. + matching the numbering scheme or a file name. Restricts the run to these + numbers; every entry must match an input file found on disk, otherwise the + run fails at start-up. This is also how a single image is processed per + job (formerly the `-e`/`--exclusive` command-line flag). - `CORRECT_FILE_PATTERN` : (`bool`, *optional*) Option to allow substring file patterns. Default value is `True`. diff --git a/scripts/sh/init_run_exclusive_canfar.sh b/scripts/sh/init_run_exclusive_canfar.sh index cb7c9af69..c1de00847 100755 --- a/scripts/sh/init_run_exclusive_canfar.sh +++ b/scripts/sh/init_run_exclusive_canfar.sh @@ -144,6 +144,25 @@ function message() { fi } +# Write an updated copy of a shapepipe config with NUMBER_LIST set to the +# given image ID, expressed in the numbering scheme (leading dash, dots -> +# dashes). Replaces the retired shapepipe_run -e/--exclusive flag (#746). +function set_config_number_list() { + local config_orig=$1 + local config_upd=$2 + local _id=$3 + + local number="-$(echo $_id | tr '.' '-')" + local config_tmp="${config_upd}.tmp" + + if grep -q "^NUMBER_LIST" "$config_orig"; then + perl -pe 's/^NUMBER_LIST\s*=.*/NUMBER_LIST = '$number'/' "$config_orig" > "$config_tmp" + else + perl -pe 's/^\[FILE\][ \t]*$/[FILE]\nNUMBER_LIST = '$number'/' "$config_orig" > "$config_tmp" + fi + mv "$config_tmp" "$config_upd" +} + # Init message message "test=$test_only" $debug_out -1 @@ -267,7 +286,8 @@ if [ "$fix" == "1" ]; then message "Unzip weight ($dry_run)" $debug_out -1 command "cd tile_runs/$ID" $dry_run export SP_RUN=`pwd` - command "shapepipe_run -c cfis/config_tile_Uz.ini -e $ID" $dry_run + command "set_config_number_list cfis/config_tile_Uz.ini config_tile_Uz_upd.ini $ID" $dry_run + command "shapepipe_run -c config_tile_Uz_upd.ini" $dry_run cd $dir else @@ -384,7 +404,8 @@ if [ $do_job != 0 ] && [ "$sp_local" == "1" ]; then fi command "update_runs_log_file.py" $dry_run export SP_RUN=`pwd` - command "shapepipe_run -c cfis/config_exp_Sp.ini -e $exp_ID" $dry_run + command "set_config_number_list cfis/config_exp_Sp.ini config_exp_Sp_upd.ini $exp_ID" $dry_run + command "shapepipe_run -c config_exp_Sp_upd.ini" $dry_run # Only keep CCD of this ID command "mkdir -p output/run_sp_exp_Sp_shdu/split_exp_runner/output" $dry_run diff --git a/scripts/sh/job_sp_canfar.bash b/scripts/sh/job_sp_canfar.bash index 54036c932..3fdaba0ad 100755 --- a/scripts/sh/job_sp_canfar.bash +++ b/scripts/sh/job_sp_canfar.bash @@ -261,18 +261,19 @@ function command_sp() { function command_cfg_shapepipe() { local config_name=$1 local str=$2 - local _n_smp=$3 + local _n_smp=$3 local _exclusive=$4 - if [ "$exclusive" != "" ]; then - exclusive_flag="-e $_exclusive" - else - exclusive_flag="" + config_upd=$(set_config_n_smp $config_name $_n_smp) + + # Run a single image ID via NUMBER_LIST in an updated config copy; + # replaces the retired shapepipe_run -e/--exclusive flag (#746) + if [ "$_exclusive" != "" ]; then + set_config_number_list "$config_upd" "$SP_CONFIG_MOD/$config_name" "$_exclusive" + config_upd="$SP_CONFIG_MOD/$config_name" fi - config_upd=$(set_config_n_smp $config_name $_n_smp) - #local cmd="/arc/home/kilbinger/.conda/envs/shapepipe/bin/shapepipe_run -c $config_upd $exclusive_flag" - local cmd="shapepipe_run -c $config_upd $exclusive_flag" + local cmd="shapepipe_run -c $config_upd" command_sp "$cmd" "$str" } @@ -337,6 +338,25 @@ function update_config() { | perl -ane 's/'$key'\s+=.+/'$key' = '$val_upd'/; print' > $config_upd } +# Write an updated copy of a shapepipe config with NUMBER_LIST set to the +# given image ID, expressed in the numbering scheme (leading dash, dots -> +# dashes). Replaces the retired shapepipe_run -e/--exclusive flag (#746). +function set_config_number_list() { + local config_orig=$1 + local config_upd=$2 + local _id=$3 + + local number="-$(echo $_id | tr '.' '-')" + local config_tmp="${config_upd}.tmp" + + if grep -q "^NUMBER_LIST" "$config_orig"; then + perl -pe 's/^NUMBER_LIST\s*=.*/NUMBER_LIST = '$number'/' "$config_orig" > "$config_tmp" + else + perl -pe 's/^\[FILE\][ \t]*$/[FILE]\nNUMBER_LIST = '$number'/' "$config_orig" > "$config_tmp" + fi + mv "$config_tmp" "$config_upd" +} + ### Start ### echo "Start processing" diff --git a/scripts/sh/job_sp_canfar_v2.0.bash b/scripts/sh/job_sp_canfar_v2.0.bash index c1fa6cd29..fe2b8f9a5 100755 --- a/scripts/sh/job_sp_canfar_v2.0.bash +++ b/scripts/sh/job_sp_canfar_v2.0.bash @@ -165,6 +165,9 @@ export SP_RUN=`pwd` # Config file path export SP_CONFIG=$SP_RUN/cfis +# Path for updated (per-job) config file copies +export SP_CONFIG_MOD=$SP_RUN/cfis_mod + # Root directory for per-exposure work directories. # Set SP_EXP in the environment to override; otherwise falls back to the # conventional layout (SP_RUN = .../v2.0/tiles/IDra/ID, three levels up + exp). @@ -243,18 +246,40 @@ function command () { fi } +# Write an updated copy of a shapepipe config with NUMBER_LIST set to the +# given image ID, expressed in the numbering scheme (leading dash, dots -> +# dashes). Replaces the retired shapepipe_run -e/--exclusive flag (#746). +function set_config_number_list() { + local config_orig=$1 + local config_upd=$2 + local _id=$3 + + local number="-$(echo $_id | tr '.' '-')" + local config_tmp="${config_upd}.tmp" + + if grep -q "^NUMBER_LIST" "$config_orig"; then + perl -pe 's/^NUMBER_LIST\s*=.*/NUMBER_LIST = '$number'/' "$config_orig" > "$config_tmp" + else + perl -pe 's/^\[FILE\][ \t]*$/[FILE]\nNUMBER_LIST = '$number'/' "$config_orig" > "$config_tmp" + fi + mv "$config_tmp" "$config_upd" +} + # Set up config file and call shapepipe_run. -# Batch size is passed via --batch_size flag; no config editing needed. +# Batch size is passed via --batch_size flag. function command_cfg_shapepipe() { local config_name=$1 local str=$2 local _n_smp=$3 local _exclusive=$4 - if [ "$exclusive" != "" ]; then - exclusive_flag="-e $_exclusive" - else - exclusive_flag="" + local config="$SP_CONFIG/$config_name" + + # Run a single image ID via NUMBER_LIST in an updated config copy; + # replaces the retired shapepipe_run -e/--exclusive flag (#746) + if [ "$_exclusive" != "" ]; then + set_config_number_list "$config" "$SP_CONFIG_MOD/$config_name" "$_exclusive" + config="$SP_CONFIG_MOD/$config_name" fi local batch_flag="" @@ -262,8 +287,7 @@ function command_cfg_shapepipe() { batch_flag="--batch_size $_n_smp" fi - local config="$SP_CONFIG/$config_name" - local cmd="shapepipe_run.py -c $config $exclusive_flag $batch_flag" + local cmd="shapepipe_run.py -c $config $batch_flag" command "$cmd" "$str" } @@ -275,6 +299,7 @@ echo "Start processing" mkdir -p $SP_RUN cd $SP_RUN mkdir -p $OUTPUT +mkdir -p $SP_CONFIG_MOD # Processing diff --git a/src/shapepipe/pipeline/args.py b/src/shapepipe/pipeline/args.py index d402132f2..3770f11c3 100644 --- a/src/shapepipe/pipeline/args.py +++ b/src/shapepipe/pipeline/args.py @@ -135,12 +135,6 @@ def create_arg_parser(): help="configuration file name", ) - optional.add_argument( - "-e", - "--exclusive", - help="exclusive input file number string", - ) - optional.add_argument( "-b", "--batch_size", diff --git a/src/shapepipe/pipeline/file_handler.py b/src/shapepipe/pipeline/file_handler.py index 91f200d1d..b471449b5 100644 --- a/src/shapepipe/pipeline/file_handler.py +++ b/src/shapepipe/pipeline/file_handler.py @@ -32,14 +32,12 @@ class FileHandler(object): List of modules to be run config : CustomParser Configuaration parser instance - exclusive : str, optional - Run this file number string exclusively if given, the default is None verbose : bool, optional Verbose setting, default is True """ - def __init__(self, run_name, modules, config, exclusive=None, verbose=True): + def __init__(self, run_name, modules, config, verbose=True): self._run_name = run_name @@ -48,7 +46,6 @@ def __init__(self, run_name, modules, config, exclusive=None, verbose=True): raise ValueError("Invalid module list, check for a trailing comma") self._config = config - self._exclusive = exclusive self._verbose = verbose self.module_runners = get_module_runners(self._module_list) @@ -1089,7 +1086,20 @@ def _format_process_list( if isinstance(self._number_list, type(None)): number_list = np.load(memory_map, mmap_mode="r") else: + # NUMBER_LIST comes from the config on faith; intersect it + # with the numbers actually found on disk so that a wrong ID + # fails here, at start-up, rather than when a module first + # tries to open the (non-existent) files (#746). number_list = self._number_list + scanned = set(np.load(memory_map, mmap_mode="r")) + missing = [num for num in number_list if num not in scanned] + if missing: + raise ValueError( + f"No input file found matching NUMBER_LIST " + f"entr{'ies' if len(missing) > 1 else 'y'} " + f"{missing}; {len(scanned)} input file number(s) " + f"found on disk." + ) if len(number_list) == 0: msg = "Empty number list" @@ -1107,20 +1117,6 @@ def _format_process_list( + f'numbering scheme "{num_scheme}".' ) - # If "exclusive" options is set: discard all non-matching IDs - if self._exclusive is not None: - id_to_test = f"-{self._exclusive.replace('.', '-')}" - if number == id_to_test: - if self._verbose: - print( - f"-- Using exclusive number {self._exclusive} ({id_to_test})" - ) - else: - if self._verbose: - # print(f"Skipping {number}, not equal to {self._exclusive} ({id_to_test})") - pass - continue - if run_method == "serial": process_items = [] else: @@ -1134,11 +1130,7 @@ def _format_process_list( process_list.append(process_items) if len(process_list) == 0: - msg = "Empty process list" - if self._exclusive is not None: - if len(number_list) > 0: - msg = f"{msg}. No input file found matching exclusive ID" - raise ValueError(msg) + raise ValueError("Empty process list") return process_list diff --git a/src/shapepipe/pipeline/job_handler.py b/src/shapepipe/pipeline/job_handler.py index 9a6e01f1f..15807fb3b 100644 --- a/src/shapepipe/pipeline/job_handler.py +++ b/src/shapepipe/pipeline/job_handler.py @@ -42,8 +42,6 @@ class JobHandler(object): Joblib backend, the default is None (which corresponds to 'loky') timeout : int, optional Timeout limit for a given job in seconds, the default is None - exclusive : str, optional - Run this file number string exclusively if given, the default is None verbose : bool, optional Verbose setting, default is True @@ -60,7 +58,6 @@ def __init__( batch_size=None, backend=None, timeout=None, - exclusive=None, verbose=True, ): @@ -75,7 +72,6 @@ def __init__( self._module = module self._module_runner = self.filehd.module_runners[self._module] self.error_count = 0 - self.exclusive = exclusive self._verbose = verbose # Add the job parameters to the log diff --git a/src/shapepipe/run.py b/src/shapepipe/run.py index 4138d2617..962953ede 100644 --- a/src/shapepipe/run.py +++ b/src/shapepipe/run.py @@ -68,13 +68,11 @@ def set_up(self): self._set_run_name() self.modules = self.config.getlist("EXECUTION", "MODULE") self.mode = self.config.get("EXECUTION", "MODE").lower() - self.exclusive = self._args.exclusive self.verbose = self.config.getboolean("DEFAULT", "VERBOSE") self.filehd = FileHandler( self._run_name, self.modules, self.config, - exclusive=self._args.exclusive, verbose=self.verbose, ) self.error_count = 0 @@ -355,7 +353,6 @@ def run_smp(pipe): config=pipe.config, log=pipe.log, job_type=pipe.run_method[module], - exclusive=pipe.exclusive, verbose=pipe.verbose, batch_size=pipe._args.batch_size, ) @@ -415,7 +412,6 @@ def run_mpi(pipe, comm): log=pipe.log, job_type=pipe.run_method[module], parallel_mode="mpi", - exclusive=pipe.exclusive, verbose=verbose, batch_size=pipe._args.batch_size, ) diff --git a/src/shapepipe/tests/test_file_handler.py b/src/shapepipe/tests/test_file_handler.py new file mode 100644 index 000000000..12dcd32d7 --- /dev/null +++ b/src/shapepipe/tests/test_file_handler.py @@ -0,0 +1,71 @@ +"""UNIT TESTS FOR FILE_HANDLER. + +This module contains unit tests for the shapepipe.pipeline.file_handler +module, in particular the early validation of NUMBER_LIST entries +against the input file numbers actually found on disk (#746): a typo in +NUMBER_LIST must fail at start-up, not when a module first tries to +open the non-existent files. + +:Author: Claude (on behalf of Cail Daley) + +""" + +import numpy as np +import pytest + +from shapepipe.pipeline.file_handler import FileHandler + +RE_PATTERN = r"-\d{3}-\d{3}" +NUM_SCHEME = f"RE:{RE_PATTERN}" + + +def _format_process_list(tmp_path, number_list, scanned): + """Run FileHandler._format_process_list on a bare instance.""" + handler = FileHandler.__new__(FileHandler) + handler._number_list = number_list + handler._verbose = False + + memory_map = str(tmp_path / "match.npy") + np.save(memory_map, np.array(scanned)) + + return handler._format_process_list( + [("image", ".fits", str(tmp_path))], + memory_map, + RE_PATTERN, + NUM_SCHEME, + "parallel", + ) + + +def test_number_list_subset_of_scanned_passes(tmp_path): + """NUMBER_LIST entries found on disk yield exactly those processes.""" + process_list = _format_process_list( + tmp_path, + number_list=["-277-282"], + scanned=["-277-282", "-300-301"], + ) + assert process_list == [ + ["-277-282", f"{tmp_path}/image-277-282.fits"], + ] + + +def test_number_list_typo_fails_early(tmp_path): + """A NUMBER_LIST entry absent from disk must raise at start-up.""" + with pytest.raises(ValueError, match="NUMBER_LIST"): + _format_process_list( + tmp_path, + number_list=["-999-999"], + scanned=["-277-282"], + ) + + +def test_no_number_list_uses_scanned(tmp_path): + """Without NUMBER_LIST the scanned numbers drive the process list.""" + process_list = _format_process_list( + tmp_path, + number_list=None, + scanned=["-277-282"], + ) + assert process_list == [ + ["-277-282", f"{tmp_path}/image-277-282.fits"], + ] From e9473a0b06e89896a1c718537e352c4bcd9971b5 Mon Sep 17 00:00:00 2001 From: Cail Daley Date: Thu, 11 Jun 2026 23:09:35 +0200 Subject: [PATCH 2/2] fix(scripts): fail fast if NUMBER_LIST cannot be set; dead -e guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review hardening: set_config_number_list now verifies the key landed in the config copy and aborts otherwise — a config with no NUMBER_LIST line and no bare [FILE] header would previously install an unmodified copy and silently process every image. Also fix init_run_exclusive_canfar.sh's missing-ID guard, which tested an unset variable and never fired. Co-Authored-By: Claude Fable 5 --- scripts/sh/init_run_exclusive_canfar.sh | 6 +++++- scripts/sh/job_sp_canfar.bash | 4 ++++ scripts/sh/job_sp_canfar_v2.0.bash | 4 ++++ src/shapepipe/pipeline/file_handler.py | 8 ++++---- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/scripts/sh/init_run_exclusive_canfar.sh b/scripts/sh/init_run_exclusive_canfar.sh index c1de00847..27efadc11 100755 --- a/scripts/sh/init_run_exclusive_canfar.sh +++ b/scripts/sh/init_run_exclusive_canfar.sh @@ -160,6 +160,10 @@ function set_config_number_list() { else perl -pe 's/^\[FILE\][ \t]*$/[FILE]\nNUMBER_LIST = '$number'/' "$config_orig" > "$config_tmp" fi + if ! grep -q "^NUMBER_LIST = $number$" "$config_tmp"; then + echo "set_config_number_list: failed to set NUMBER_LIST in $config_orig" >&2 + exit 1 + fi mv "$config_tmp" "$config_upd" } @@ -184,7 +188,7 @@ if [ "$job" == "-1" ]; then message "No job indicated, use option -j" $debug_out 2 fi -if [ "$exclusive" == "-1" ]; then +if [ "$ID" == "-1" ]; then message "No image ID indicated, use option -e" $debug_out 3 fi diff --git a/scripts/sh/job_sp_canfar.bash b/scripts/sh/job_sp_canfar.bash index 3fdaba0ad..c99435823 100755 --- a/scripts/sh/job_sp_canfar.bash +++ b/scripts/sh/job_sp_canfar.bash @@ -354,6 +354,10 @@ function set_config_number_list() { else perl -pe 's/^\[FILE\][ \t]*$/[FILE]\nNUMBER_LIST = '$number'/' "$config_orig" > "$config_tmp" fi + if ! grep -q "^NUMBER_LIST = $number$" "$config_tmp"; then + echo "set_config_number_list: failed to set NUMBER_LIST in $config_orig" >&2 + exit 1 + fi mv "$config_tmp" "$config_upd" } diff --git a/scripts/sh/job_sp_canfar_v2.0.bash b/scripts/sh/job_sp_canfar_v2.0.bash index fe2b8f9a5..1c1ce89ba 100755 --- a/scripts/sh/job_sp_canfar_v2.0.bash +++ b/scripts/sh/job_sp_canfar_v2.0.bash @@ -262,6 +262,10 @@ function set_config_number_list() { else perl -pe 's/^\[FILE\][ \t]*$/[FILE]\nNUMBER_LIST = '$number'/' "$config_orig" > "$config_tmp" fi + if ! grep -q "^NUMBER_LIST = $number$" "$config_tmp"; then + echo "set_config_number_list: failed to set NUMBER_LIST in $config_orig" >&2 + exit 1 + fi mv "$config_tmp" "$config_upd" } diff --git a/src/shapepipe/pipeline/file_handler.py b/src/shapepipe/pipeline/file_handler.py index b471449b5..9352248de 100644 --- a/src/shapepipe/pipeline/file_handler.py +++ b/src/shapepipe/pipeline/file_handler.py @@ -1086,10 +1086,10 @@ def _format_process_list( if isinstance(self._number_list, type(None)): number_list = np.load(memory_map, mmap_mode="r") else: - # NUMBER_LIST comes from the config on faith; intersect it - # with the numbers actually found on disk so that a wrong ID - # fails here, at start-up, rather than when a module first - # tries to open the (non-existent) files (#746). + # NUMBER_LIST comes from the config on faith; check every + # entry against the numbers actually found on disk so that a + # wrong ID fails here, at start-up, rather than when a module + # first tries to open the (non-existent) files (#746). number_list = self._number_list scanned = set(np.load(memory_map, mmap_mode="r")) missing = [num for num in number_list if num not in scanned]