fmriprep-workbench/load_config.sh at main · WagnerLab/fmriprep-workbench · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
#!/bin/bash
# ============================================================================
# Configuration Loader for fMRI Preprocessing Pipeline
# ============================================================================
#
# This script loads configuration from config.yaml and exports environment
# variables that can be used throughout the preprocessing pipeline.
#
# USAGE:
#   source ./load_config.sh
#
# NOTE: This script must be sourced, not executed, to properly export
# environment variables to the calling shell.
# ============================================================================

# Get the directory where this script resides
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
_YAML_CONFIG_FILE="${SCRIPT_DIR}/config.yaml"

# Check if config file exists
if [ ! -f "${_YAML_CONFIG_FILE}" ]; then
    echo "ERROR: Configuration file not found: ${_YAML_CONFIG_FILE}"
    echo "Please copy config.template.yaml to config.yaml and configure it for your study."
    return 1 2>/dev/null || exit 1
fi

# Parse YAML and export environment variables using Python
# Store output in variable first (bash 3.2 compatible)
_config_exports=$(_YAML_CONFIG_FILE="${_YAML_CONFIG_FILE}" python3 - <<'EOF'
import yaml
import sys
import os

def expand_var(value, env_vars):
    """Expand environment variables in string values."""
    if not isinstance(value, str):
        return value

    # Replace ${VAR} or $VAR with actual values
    import re
    def replace_var(match):
        var_name = match.group(1) if match.group(1) else match.group(2)
        # First check our accumulated env_vars, then fall back to os.environ
        return env_vars.get(var_name, os.environ.get(var_name, match.group(0)))

    # Handle both ${VAR} and $VAR formats
    result = re.sub(r'\$\{([^}]+)\}|\$([A-Za-z_][A-Za-z0-9_]*)', replace_var, value)
    return result

def is_valid_env_var_name(name):
    """Validate that a name is a safe shell variable identifier."""
    import re
    # Only allow uppercase letters, digits, and underscores
    # Must start with a letter or underscore
    return re.match(r'^[A-Z_][A-Z0-9_]*$', name) is not None

def flatten_dict(d, parent_key='', sep='_', env_vars=None):
    """Flatten nested dictionary and convert keys to uppercase environment variable names."""
    if env_vars is None:
        env_vars = {}

    items = []
    for k, v in d.items():
        # Convert key to uppercase for environment variable
        new_key = f"{parent_key}{sep}{k}".upper() if parent_key else k.upper()

        # Validate that the key is a safe shell identifier
        if not is_valid_env_var_name(new_key):
            print(f"echo 'ERROR: Invalid environment variable name: {new_key}'", file=sys.stderr)
            sys.exit(1)

        if isinstance(v, dict):
            # Recursively flatten nested dictionaries
            items.extend(flatten_dict(v, new_key, sep=sep, env_vars=env_vars).items())
        elif isinstance(v, list):
            # Handle lists (like run_numbers)
            # Store as space-separated string for bash arrays
            expanded_items = [expand_var(str(item), env_vars) for item in v]
            env_vars[new_key] = ' '.join(expanded_items)
            items.append((new_key, env_vars[new_key]))
        elif isinstance(v, bool):
            # Convert Python boolean to lowercase bash boolean
            expanded_value = 'true' if v else 'false'
            env_vars[new_key] = expanded_value
            items.append((new_key, expanded_value))
        else:
            # Expand any variables in the value
            expanded_value = expand_var(str(v), env_vars)
            env_vars[new_key] = expanded_value
            items.append((new_key, expanded_value))

    return dict(items)

try:
    # Read and parse YAML config - always use the _YAML_CONFIG_FILE env var set by the shell
    config_path = os.environ.get('_YAML_CONFIG_FILE', 'config.yaml')
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f)

    # Flatten the configuration
    env_vars = flatten_dict(config)

    # Add special aliases for commonly used variables to maintain compatibility
    # Map new names to old names
    aliases = {
        'BASE_DIR': 'DIRECTORIES_BASE_DIR',
        'SCRIPTS_DIR': 'DIRECTORIES_SCRIPTS_DIR',
        'RAW_DIR': 'DIRECTORIES_RAW_DIR',
        'TRIM_DIR': 'DIRECTORIES_TRIM_DIR',
        'WORKFLOW_LOG_DIR': 'DIRECTORIES_WORKFLOW_LOG_DIR',
        'TEMPLATEFLOW_HOST_HOME': 'DIRECTORIES_TEMPLATEFLOW_HOST_HOME',
        'FMRIPREP_HOST_CACHE': 'DIRECTORIES_FMRIPREP_HOST_CACHE',
        'FREESURFER_LICENSE': 'DIRECTORIES_FREESURFER_LICENSE',
        'USER_EMAIL': 'USER_EMAIL',
        'USER': 'USER_USERNAME',
        'FW_GROUP_ID': 'USER_FW_GROUP_ID',
        'FW_PROJECT_ID': 'USER_FW_PROJECT_ID',
        'FW_CLI_API_KEY_FILE': 'SCAN_FW_CLI_API_KEY_FILE',
        'FW_URL': 'SCAN_FW_URL',
        'CONFIG_FILE': 'SCAN_CONFIG_FILE',
        'EXPERIMENT_TYPE': 'SCAN_EXPERIMENT_TYPE',
        'task_id': 'SCAN_TASK_ID',
        'new_task_id': 'SCAN_NEW_TASK_ID',
        'n_dummy': 'SCAN_N_DUMMY',
        'run_numbers': 'SCAN_RUN_NUMBERS',
        'EXPECTED_FMAP_VOLS': 'VALIDATION_EXPECTED_FMAP_VOLS',
        'EXPECTED_BOLD_VOLS': 'VALIDATION_EXPECTED_BOLD_VOLS',
        'EXPECTED_BOLD_VOLS_AFTER_TRIMMING': 'VALIDATION_EXPECTED_BOLD_VOLS_AFTER_TRIMMING',
        'DIR_PERMISSIONS': 'PERMISSIONS_DIR_PERMISSIONS',
        'FILE_PERMISSIONS': 'PERMISSIONS_FILE_PERMISSIONS',
        'SLURM_EMAIL': 'SLURM_EMAIL',
        'SLURM_TIME': 'SLURM_TIME',
        'DCMNIIX_SLURM_TIME': 'SLURM_DCMNIIX_TIME',
        'SLURM_MEM': 'SLURM_MEM',
        'SLURM_CPUS': 'SLURM_CPUS',
        'SLURM_ARRAY_THROTTLE': 'SLURM_ARRAY_THROTTLE',
        'SLURM_LOG_DIR': 'SLURM_LOG_DIR',
        'SLURM_PARTITION': 'SLURM_PARTITION',
        'FMRIPREP_VERSION': 'PIPELINE_FMRIPREP_VERSION',
        'DERIVS_DIR': 'PIPELINE_DERIVS_DIR',
        'SINGULARITY_IMAGE_DIR': 'PIPELINE_SINGULARITY_IMAGE_DIR',
        'SINGULARITY_IMAGE': 'PIPELINE_SINGULARITY_IMAGE',
        'HEUDICONV_IMAGE': 'PIPELINE_HEUDICONV_IMAGE',
        'FMRIPREP_SLURM_JOB_NAME': 'FMRIPREP_SLURM_JOB_NAME',
        'FMRIPREP_SLURM_ARRAY_SIZE': 'FMRIPREP_SLURM_ARRAY_SIZE',
        'FMRIPREP_SLURM_TIME': 'FMRIPREP_SLURM_TIME',
        'FMRIPREP_SLURM_CPUS_PER_TASK': 'FMRIPREP_SLURM_CPUS_PER_TASK',
        'FMRIPREP_SLURM_MEM_PER_CPU': 'FMRIPREP_SLURM_MEM_PER_CPU',
        'FMRIPREP_OMP_THREADS': 'FMRIPREP_OMP_THREADS',
        'FMRIPREP_NTHREADS': 'FMRIPREP_NTHREADS',
        'FMRIPREP_MEM_MB': 'FMRIPREP_MEM_MB',
        'FMRIPREP_FD_SPIKE_THRESHOLD': 'FMRIPREP_FD_SPIKE_THRESHOLD',
        'FMRIPREP_DVARS_SPIKE_THRESHOLD': 'FMRIPREP_DVARS_SPIKE_THRESHOLD',
        'FMRIPREP_OUTPUT_SPACES': 'FMRIPREP_OUTPUT_SPACES',
        'DEBUG': 'MISC_DEBUG',
    }

    # Expand variables in aliased values
    expanded_env_vars = {}
    for alias, source in aliases.items():
        if source in env_vars:
            value = env_vars[source]
            # Perform additional expansion using both base env vars and already-set aliases
            combined_env_vars = {**env_vars, **expanded_env_vars}
            expanded_value = expand_var(value, combined_env_vars)
            expanded_env_vars[alias] = expanded_value

    # Merge expanded aliases back
    env_vars.update(expanded_env_vars)

    # Output export statements for bash to evaluate
    for key, value in env_vars.items():
        # Escape single quotes in values
        escaped_value = value.replace("'", "'\\''")
        print(f"export {key}='{escaped_value}'")

except FileNotFoundError:
    print("echo 'ERROR: config.yaml not found'", file=sys.stderr)
    sys.exit(1)
except yaml.YAMLError as e:
    # Escape single quotes for bash
    error_msg = str(e).replace("'", "'\\''")
    print(f"echo 'ERROR: Failed to parse config.yaml: {error_msg}'", file=sys.stderr)
    sys.exit(1)
except Exception as e:
    # Escape single quotes for bash
    error_msg = str(e).replace("'", "'\\''")
    print(f"echo 'ERROR: {error_msg}'", file=sys.stderr)
    sys.exit(1)
EOF
)

if [ $? -ne 0 ]; then
    echo "ERROR: Failed to load configuration from ${_YAML_CONFIG_FILE}"
    return 1 2>/dev/null || exit 1
fi

# Evaluate the export statements
eval "$_config_exports"

# Parse fmap_mapping from YAML into bash associative array
# Store output in variable first (bash 3.2 compatible)
_fmap_exports=$(_YAML_CONFIG_FILE="${_YAML_CONFIG_FILE}" python3 - <<'EOF'
import yaml
import os
import sys

def bash_single_quote(s: str) -> str:
    """
    Return a shell-safe single-quoted literal for string s.
    Example: abcQdef becomes quoted with Q escaped
    """
    return "'" + s.replace("'", "'\"'\"'") + "'"

try:
    config_path = os.environ.get('_YAML_CONFIG_FILE', 'config.yaml')
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f) or {}

    fmap_cfg = config.get('fmap_mapping')
    if isinstance(fmap_cfg, dict) and fmap_cfg:
        # Generate a bash 3.2 compatible function instead of associative array
        print("fmap_mapping() {")
        print("    case \"$1\" in")
        for key, value in fmap_cfg.items():
            key_str = "" if key is None else str(key)
            val_str = "" if value is None else str(value)
            print(f"        {bash_single_quote(key_str)}) echo {bash_single_quote(val_str)} ;;")
        print("        *) echo \"\" ;;")
        print("    esac")
        print("}")
    else:
        # Define empty function if no fmap_mapping
        print("fmap_mapping() { echo \"\"; }")
except Exception as e:
    # Escape single quotes for bash
    error_msg = str(e).replace("'", "'\\''")
    print(f"echo 'ERROR loading fmap_mapping: {error_msg}'", file=sys.stderr)
    sys.exit(1)
EOF
)

if [ $? -ne 0 ]; then
    echo "ERROR: Failed to load fmap_mapping from ${_YAML_CONFIG_FILE}"
    return 1 2>/dev/null || exit 1
fi

# Evaluate the fmap_mapping declaration
eval "$_fmap_exports"

# Convert run_numbers from space-separated string to bash array
if [ -n "${run_numbers}" ]; then
    read -ra run_numbers <<< "${run_numbers}"
    export run_numbers
fi

# Interactive prompt to choose which subjects file to use (matching original settings.sh behavior)
select_subjects_file() {
    local step_num=""
    local subjects_file="all-subjects.txt"
    local custom_file=""

    # only prompt if being sourced in an interactive shell and SKIP_SUBJECTS_PROMPT is not set
    if [[ -t 0 && "${SKIP_SUBJECTS_PROMPT}" != "true" ]]; then
        echo "Select subjects file to use:"
        echo "1) Use all-subjects.txt (default)"
        echo "2) Use step-specific subjects file (e.g., 04-subjects.txt)"
        read -p "Enter choice [1/2]: " choice

        if [[ "$choice" == "2" ]]; then
            read -p "Enter step number (e.g., 04): " step_num
            custom_file="${step_num}-subjects.txt"

            if [[ -f "$custom_file" ]]; then
                subjects_file="$custom_file"
                echo "Using $subjects_file"
            else
                echo "Warning: $custom_file not found. Falling back to all-subjects.txt"
            fi
        fi
    fi

    # calculate number of subjects based on selected file
    # Skip comment lines (starting with #) and blank lines
    if [[ -f "$subjects_file" ]]; then
        num_subjects=$(grep -v '^[[:space:]]*#' "$subjects_file" | grep -v '^[[:space:]]*$' | wc -l)
        echo "($(date)) [INFO] Found ${num_subjects} total subjects in $subjects_file"
        array_range="0-$((num_subjects-1))"
    else
        echo "($(date)) [WARNING] $subjects_file not found, defaulting to single subject"
        num_subjects=1
        array_range="0"
    fi

    export SELECTED_SUBJECTS_FILE="$subjects_file"
    export SLURM_ARRAY_SIZE="${array_range}"
}

# Run the function to set up the variables
select_subjects_file

echo "($(date)) [INFO] Configuration loaded successfully from ${_YAML_CONFIG_FILE}"