Skip to content

Commit 7043184

Browse files
committed
iterate calibration limits
1 parent df47add commit 7043184

1 file changed

Lines changed: 61 additions & 31 deletions

File tree

enviroMS/diWorkflow.py

Lines changed: 61 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from corems.transient.input.brukerSolarix import ReadBrukerSolarix
2323
from corems.encapsulation.output import parameter_to_dict
2424
import matplotlib as mpl
25-
#mpl.use("TkAgg")
25+
mpl.use("Agg")
2626
from matplotlib import pyplot as plt
2727
from matplotlib import gridspec as gridspec
2828
from tqdm import tqdm
@@ -486,11 +486,17 @@ def find_calibration_for_batch(workflow_params):
486486
# Get file paths that have SRFA in the name
487487
srfa_path = list(filter(lambda x: "SRFA" in x, workflow_params.file_paths))
488488

489+
print(srfa_path, type(srfa_path))
490+
489491
if len(srfa_path) > 1:
490492
print("Multiple SRFA files, using the first one. Include only one SRFA file per batch if you don't want this to happen.")
491493

494+
print(srfa_path, type(srfa_path))
495+
492496
srfa_path = srfa_path[0]
493497

498+
print(srfa_path, type(srfa_path))
499+
494500
# Remove the SRFA file you used from the file list so it's not in the output
495501
#workflow_params.file_paths.remove(srfa_path)
496502

@@ -500,41 +506,65 @@ def find_calibration_for_batch(workflow_params):
500506
# Now that we have a mass spectrum, get settings from toml
501507
mass_spectrum.set_parameter_from_toml(workflow_params.corems_toml_path)
502508

503-
# Get calibration error bounds based on standard (usually SRFA)
504-
calib_error_boundaries = HighResRecalibration(mass_spectrum, plot=True, docker = False).determine_error_boundaries()[2]
509+
# Force it to one job. daemon child can not have child process
510+
mass_spectrum.molecular_search_settings.db_jobs = 1
505511

506-
# Overwrite the min and max error tolerances with the values from calibration
507-
mass_spectrum.settings.max_calib_ppm_error = max(calib_error_boundaries)
508-
mass_spectrum.settings.min_calib_ppm_error = min(calib_error_boundaries)
512+
# Initial error boundaries
513+
# err_bound_diff = 15 # default from HighResRecalibration definition
514+
# calib_error_boundaries = (mass_spectrum.settings.min_calib_ppm_error, mass_spectrum.settings.max_calib_ppm_error)
515+
# search_error_boundaries = (mass_spectrum.molecular_search_settings.min_ppm_error, mass_spectrum.molecular_search_settings.max_ppm_error)
509516

510-
# Use new settings to calibrate
511-
ref_file_location = Path(workflow_params.calibration_ref_file_path)
512-
MzDomainCalibration(mass_spectrum, ref_file_location).run()
517+
for ppm_range in (15, 11, 9, 7, 5):
513518

514-
# Force it to one job. daemon child can not have child process
515-
mass_spectrum.molecular_search_settings.db_jobs = 1
519+
print("ppm range: ", ppm_range)
516520

517-
# Identify molecular formulae
518-
SearchMolecularFormulas(mass_spectrum, first_hit=False).run_worker_mass_spectrum()
521+
# Get calibration error bounds based on standard (usually SRFA)
522+
calib_error_boundaries = HighResRecalibration(
523+
mass_spectrum, plot=True,
524+
docker = False, ppmRangeprior = ppm_range
525+
).determine_error_boundaries()[2]
519526

520-
# Extract the error distribution from the Heteroatoms class that makes error plots
521-
mass_spectrum_by_classes = HeteroatomsClassification(
522-
mass_spectrum, choose_molecular_formula=False
523-
)
524-
# mzplot = mass_spectrum_by_classes.plot_mz_error()
525-
# mzplot.set_title("SRFA ppm error vs. m/z")
526-
# mzplot.get_legend().remove()
527-
# scatter_plot = mzplot.collections[0]
528-
# scatter_plot.set_sizes([1])
529-
# plt.show()
530-
mz_error = mass_spectrum_by_classes.mz_error_all()
531-
532-
# Get 5th and 95th percentile of error to use as search error max/min
533-
# i.e. retain the middle 90% (vertically) of the points on the error plot
534-
q = quantiles(mz_error, n = 20)
535-
search_error_boundaries = (q[0], q[18])
536-
537-
return (calib_error_boundaries, search_error_boundaries), workflow_params.file_paths, srfa_path
527+
print("Boundaries from auto recalib: ", calib_error_boundaries)
528+
529+
# Overwrite the min and max error tolerances with the values from calibration
530+
mass_spectrum.settings.max_calib_ppm_error = max(calib_error_boundaries)
531+
mass_spectrum.settings.min_calib_ppm_error = min(calib_error_boundaries)
532+
533+
# Use new settings to calibrate
534+
ref_file_location = Path(workflow_params.calibration_ref_file_path)
535+
MzDomainCalibration(mass_spectrum, ref_file_location).run()
536+
537+
# Identify molecular formulae
538+
SearchMolecularFormulas(mass_spectrum, first_hit=False).run_worker_mass_spectrum()
539+
540+
# Extract the error distribution from the Heteroatoms class that makes error plots
541+
mass_spectrum_by_classes = HeteroatomsClassification(
542+
mass_spectrum, choose_molecular_formula=False
543+
)
544+
# mzplot = mass_spectrum_by_classes.plot_mz_error()
545+
# mzplot.set_title("SRFA ppm error vs. m/z")
546+
# mzplot.get_legend().remove()
547+
# scatter_plot = mzplot.collections[0]
548+
# scatter_plot.set_sizes([1])
549+
# plt.show()
550+
551+
mz_error = mass_spectrum_by_classes.mz_error_all()
552+
553+
# Get 5th and 95th percentile of error to use as search error max/min
554+
# i.e. retain the middle 90% (vertically) of the points on the error plot
555+
q = quantiles(mz_error, n = 20)
556+
search_error_boundaries = (q[0], q[18])
557+
558+
err_bound_diff = search_error_boundaries[1] - search_error_boundaries[0]
559+
print("Search error boundaries: ", search_error_boundaries)
560+
print("Search error diff: ", err_bound_diff)
561+
562+
if err_bound_diff < 5:
563+
break
564+
565+
print(srfa_path, type(srfa_path))
566+
567+
return (calib_error_boundaries, search_error_boundaries), workflow_params.file_paths, Path(srfa_path)
538568

539569

540570
def run_wdl_direct_infusion_workflow(*args, **kwargs):

0 commit comments

Comments
 (0)