2222from corems .transient .input .brukerSolarix import ReadBrukerSolarix
2323from corems .encapsulation .output import parameter_to_dict
2424import matplotlib as mpl
25- # mpl.use("TkAgg ")
25+ mpl .use ("Agg " )
2626from matplotlib import pyplot as plt
2727from matplotlib import gridspec as gridspec
2828from tqdm import tqdm
@@ -486,11 +486,17 @@ def find_calibration_for_batch(workflow_params):
486486 # Get file paths that have SRFA in the name
487487 srfa_path = list (filter (lambda x : "SRFA" in x , workflow_params .file_paths ))
488488
489+ print (srfa_path , type (srfa_path ))
490+
489491 if len (srfa_path ) > 1 :
490492 print ("Multiple SRFA files, using the first one. Include only one SRFA file per batch if you don't want this to happen." )
491493
494+ print (srfa_path , type (srfa_path ))
495+
492496 srfa_path = srfa_path [0 ]
493497
498+ print (srfa_path , type (srfa_path ))
499+
494500 # Remove the SRFA file you used from the file list so it's not in the output
495501 #workflow_params.file_paths.remove(srfa_path)
496502
@@ -500,41 +506,65 @@ def find_calibration_for_batch(workflow_params):
500506 # Now that we have a mass spectrum, get settings from toml
501507 mass_spectrum .set_parameter_from_toml (workflow_params .corems_toml_path )
502508
503- # Get calibration error bounds based on standard (usually SRFA)
504- calib_error_boundaries = HighResRecalibration ( mass_spectrum , plot = True , docker = False ). determine_error_boundaries ()[ 2 ]
509+ # Force it to one job. daemon child can not have child process
510+ mass_spectrum . molecular_search_settings . db_jobs = 1
505511
506- # Overwrite the min and max error tolerances with the values from calibration
507- mass_spectrum .settings .max_calib_ppm_error = max (calib_error_boundaries )
508- mass_spectrum .settings .min_calib_ppm_error = min (calib_error_boundaries )
512+ # Initial error boundaries
513+ # err_bound_diff = 15 # default from HighResRecalibration definition
514+ # calib_error_boundaries = (mass_spectrum.settings.min_calib_ppm_error, mass_spectrum.settings.max_calib_ppm_error)
515+ # search_error_boundaries = (mass_spectrum.molecular_search_settings.min_ppm_error, mass_spectrum.molecular_search_settings.max_ppm_error)
509516
510- # Use new settings to calibrate
511- ref_file_location = Path (workflow_params .calibration_ref_file_path )
512- MzDomainCalibration (mass_spectrum , ref_file_location ).run ()
517+ for ppm_range in (15 , 11 , 9 , 7 , 5 ):
513518
514- # Force it to one job. daemon child can not have child process
515- mass_spectrum .molecular_search_settings .db_jobs = 1
519+ print ("ppm range: " , ppm_range )
516520
517- # Identify molecular formulae
518- SearchMolecularFormulas (mass_spectrum , first_hit = False ).run_worker_mass_spectrum ()
521+ # Get calibration error bounds based on standard (usually SRFA)
522+ calib_error_boundaries = HighResRecalibration (
523+ mass_spectrum , plot = True ,
524+ docker = False , ppmRangeprior = ppm_range
525+ ).determine_error_boundaries ()[2 ]
519526
520- # Extract the error distribution from the Heteroatoms class that makes error plots
521- mass_spectrum_by_classes = HeteroatomsClassification (
522- mass_spectrum , choose_molecular_formula = False
523- )
524- # mzplot = mass_spectrum_by_classes.plot_mz_error()
525- # mzplot.set_title("SRFA ppm error vs. m/z")
526- # mzplot.get_legend().remove()
527- # scatter_plot = mzplot.collections[0]
528- # scatter_plot.set_sizes([1])
529- # plt.show()
530- mz_error = mass_spectrum_by_classes .mz_error_all ()
531-
532- # Get 5th and 95th percentile of error to use as search error max/min
533- # i.e. retain the middle 90% (vertically) of the points on the error plot
534- q = quantiles (mz_error , n = 20 )
535- search_error_boundaries = (q [0 ], q [18 ])
536-
537- return (calib_error_boundaries , search_error_boundaries ), workflow_params .file_paths , srfa_path
527+ print ("Boundaries from auto recalib: " , calib_error_boundaries )
528+
529+ # Overwrite the min and max error tolerances with the values from calibration
530+ mass_spectrum .settings .max_calib_ppm_error = max (calib_error_boundaries )
531+ mass_spectrum .settings .min_calib_ppm_error = min (calib_error_boundaries )
532+
533+ # Use new settings to calibrate
534+ ref_file_location = Path (workflow_params .calibration_ref_file_path )
535+ MzDomainCalibration (mass_spectrum , ref_file_location ).run ()
536+
537+ # Identify molecular formulae
538+ SearchMolecularFormulas (mass_spectrum , first_hit = False ).run_worker_mass_spectrum ()
539+
540+ # Extract the error distribution from the Heteroatoms class that makes error plots
541+ mass_spectrum_by_classes = HeteroatomsClassification (
542+ mass_spectrum , choose_molecular_formula = False
543+ )
544+ # mzplot = mass_spectrum_by_classes.plot_mz_error()
545+ # mzplot.set_title("SRFA ppm error vs. m/z")
546+ # mzplot.get_legend().remove()
547+ # scatter_plot = mzplot.collections[0]
548+ # scatter_plot.set_sizes([1])
549+ # plt.show()
550+
551+ mz_error = mass_spectrum_by_classes .mz_error_all ()
552+
553+ # Get 5th and 95th percentile of error to use as search error max/min
554+ # i.e. retain the middle 90% (vertically) of the points on the error plot
555+ q = quantiles (mz_error , n = 20 )
556+ search_error_boundaries = (q [0 ], q [18 ])
557+
558+ err_bound_diff = search_error_boundaries [1 ] - search_error_boundaries [0 ]
559+ print ("Search error boundaries: " , search_error_boundaries )
560+ print ("Search error diff: " , err_bound_diff )
561+
562+ if err_bound_diff < 5 :
563+ break
564+
565+ print (srfa_path , type (srfa_path ))
566+
567+ return (calib_error_boundaries , search_error_boundaries ), workflow_params .file_paths , Path (srfa_path )
538568
539569
540570def run_wdl_direct_infusion_workflow (* args , ** kwargs ):
0 commit comments