Skip to content

Commit 87e4c27

Browse files
committed
di cli - add raw data path unpacking
1 parent d87b844 commit 87e4c27

4 files changed

Lines changed: 42 additions & 13 deletions

File tree

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,7 @@ share/python-wheels/
4141
# Raw data files
4242
*.raw
4343
*.d
44-
data/raw_data/example_data/
44+
data/raw_data/example_data/
45+
46+
# Processed data
47+
output

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@ cascade-run:
9191

9292
srun -A mscms -t 240 -N 1 -n time enviroMS run-di -r 2 --mpi /dtemp/mscms/enviroms/data/configuration/enviroms.toml
9393

94+
cli-run-di :
95+
enviroMS run_di configuration/di_enviroms.toml --jobs 1 --replicas 1 --tasks 1
96+
9497
wdl-run-di :
9598

9699
miniwdl run wdl/di_fticr_ms.wdl -i wdl/di_fticr_wdl_input.json --verbose --no-cache --copy-input-files

configuration/di_enviroms.toml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
raw_file_start_scan = 1
22
raw_file_final_scan = 7
3-
file_paths = [ "data/raw_data/20190709_WK_CADY_Auto_S16_H1_Post_O5_1_01_36.tsv" ]
3+
file_paths = ["data/raw_data/NEG_ESI_SRFA_Auto.d", "data/raw_data/SRFA_40ppm_NegativeESI_21T.raw"]
44
output_directory = "output"
55
output_group_name = "..."
66
output_type = "csv"
77
polarity = -1
88
is_centroid = true
9-
corems_toml_path = "configuration/corems.toml"
9+
corems_toml_path = "configuration/di_corems.toml"
1010
calibrate = true
11-
calibration_ref_file_path = "data/raw_data/SRFA.ref"
11+
batch_calibrate = true
12+
calibration_ref_file_path = "./data/reference/SRFA.ref"
1213
plot_mz_error = true
1314
plot_ms_assigned_unassigned = true
1415
plot_c_dbe = true

enviroMS/diWorkflow.py

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from corems.molecular_id.search.priorityAssignment import OxygenPriorityAssignment
2222
from corems.transient.input.brukerSolarix import ReadBrukerSolarix
2323
from corems.encapsulation.output import parameter_to_dict
24+
import matplotlib as mpl
2425
from matplotlib import pyplot as plt
2526
from matplotlib import gridspec as gridspec
2627
from tqdm import tqdm
@@ -195,6 +196,9 @@ def read_workflow_parameter(di_workflow_parameters_toml_file):
195196

196197
def create_plots(mass_spectrum, workflow_params, dirloc):
197198
print("create plots")
199+
# Prevent overflow error when plotting
200+
mpl.rcParams['agg.path.chunksize'] = 10000
201+
198202
ms_by_classes = HeteroatomsClassification(
199203
mass_spectrum, choose_molecular_formula=False
200204
)
@@ -517,8 +521,7 @@ def find_calibration_for_batch(workflow_params):
517521

518522
def run_wdl_direct_infusion_workflow(*args, **kwargs):
519523
print("run wdl direct infusion workflow")
520-
cores = kwargs.get("jobs")
521-
del kwargs["jobs"]
524+
522525
kwargs["polarity"] = -1 if kwargs.get("polarity") == "negative" else 1
523526

524527
workflow_params = DiWorkflowParameters(**kwargs)
@@ -553,26 +556,45 @@ def run_direct_infusion_workflow(workflow_params_file, jobs, replicas):
553556
click.echo("Loading Searching Settings from %s" % workflow_params_file)
554557
workflow_params = read_workflow_parameter(workflow_params_file)
555558

556-
# Set up paths
559+
# File paths need to be a list of strings. If you gave it one string...
560+
if isinstance(workflow_params.file_paths, str):
561+
# If it has a wildcard, get a list of files in the directory
562+
if "*" in workflow_params.file_paths:
563+
p = Path(workflow_params.file_paths)
564+
workflow_params.file_paths = list(Path(p.parent).glob(p.name))
565+
workflow_params.file_paths = list(map(str, workflow_params.file_paths))
566+
# If no wildcard (single filepath), cast to list to match types later
567+
else:
568+
workflow_params.file_paths = list(workflow_params.file_paths)
569+
570+
# Set up output paths
557571
dirloc = Path(workflow_params.output_directory)
558572
dirloc.mkdir(exist_ok=True)
559573

574+
if workflow_params.batch_calibrate:
575+
# Before processing the samples, set calibration based on SRFA
576+
error_boundaries, workflow_params.file_paths = find_calibration_for_batch(workflow_params)
577+
else:
578+
# Not used if not batch_calibrate, placeholder for run_assignment input
579+
error_boundaries = ()
580+
560581
worker_args = replicas * [
561-
(file_path, workflow_params.to_toml())
582+
(file_path, workflow_params.to_toml(), error_boundaries)
562583
for file_path in workflow_params.file_paths
563584
]
564585

565-
cores = jobs
566-
pool = Pool(cores)
586+
# cores = jobs
587+
# pool = Pool(cores)
567588

568589
for worker_arg in worker_args:
590+
print(worker_arg[0])
569591
workflow_worker(worker_arg)
570-
# for i, results in enumerate(pool.imap_unordered(workflow_worker, worker_args), 1):
571592

593+
# for i, results in enumerate(pool.imap_unordered(workflow_worker, worker_args), 1):
572594
# pass
573595

574-
pool.close()
575-
pool.join()
596+
# pool.close()
597+
# pool.join()
576598

577599

578600
def run_di_mpi(workflow_params_file, tasks, replicas):

0 commit comments

Comments
 (0)