@@ -514,56 +514,55 @@ def find_calibration_for_batch(workflow_params):
514514 # calib_error_boundaries = (mass_spectrum.settings.min_calib_ppm_error, mass_spectrum.settings.max_calib_ppm_error)
515515 # search_error_boundaries = (mass_spectrum.molecular_search_settings.min_ppm_error, mass_spectrum.molecular_search_settings.max_ppm_error)
516516
517- for ppm_range in (15 , 11 , 9 , 7 , 5 ):
517+ # Get calibration error bounds based on standard (usually SRFA)
518+ calib_error_boundaries = HighResRecalibration (
519+ mass_spectrum , plot = True ,
520+ docker = False , #ppmRangeprior = ppm_range
521+ ).determine_error_boundaries ()[2 ]
518522
519- print ("ppm range : " , ppm_range )
523+ print ("Boundaries from auto recalib : " , calib_error_boundaries )
520524
521- # Get calibration error bounds based on standard (usually SRFA)
522- calib_error_boundaries = HighResRecalibration (
523- mass_spectrum , plot = True ,
524- docker = False , ppmRangeprior = ppm_range
525- ).determine_error_boundaries ()[2 ]
525+ # Overwrite the min and max error tolerances with the values from calibration
526+ mass_spectrum .settings .max_calib_ppm_error = max (calib_error_boundaries )
527+ mass_spectrum .settings .min_calib_ppm_error = min (calib_error_boundaries )
526528
527- print ("Boundaries from auto recalib: " , calib_error_boundaries )
529+ # Use new settings to calibrate
530+ ref_file_location = Path (workflow_params .calibration_ref_file_path )
531+ MzDomainCalibration (mass_spectrum , ref_file_location ).run ()
528532
529- # Overwrite the min and max error tolerances with the values from calibration
530- mass_spectrum .settings .max_calib_ppm_error = max (calib_error_boundaries )
531- mass_spectrum .settings .min_calib_ppm_error = min (calib_error_boundaries )
533+ # Identify molecular formulae
534+ SearchMolecularFormulas (mass_spectrum , first_hit = False ).run_worker_mass_spectrum ()
532535
533- # Use new settings to calibrate
534- ref_file_location = Path (workflow_params .calibration_ref_file_path )
535- MzDomainCalibration (mass_spectrum , ref_file_location ).run ()
536+ # Extract the error distribution from the Heteroatoms class that makes error plots
537+ mass_spectrum_by_classes = HeteroatomsClassification (
538+ mass_spectrum , choose_molecular_formula = False
539+ )
540+ mzplot = mass_spectrum_by_classes .plot_mz_error ()
541+ mzplot .set_title ("SRFA ppm error vs. m/z" )
542+ mzplot .get_legend ().remove ()
543+ scatter_plot = mzplot .collections [0 ]
544+ scatter_plot .set_sizes ([1 ])
545+ plt .savefig (Path (workflow_params .output_directory ) / "mz.png" )
536546
537- # Identify molecular formulae
538- SearchMolecularFormulas (mass_spectrum , first_hit = False ).run_worker_mass_spectrum ()
547+ mz_error = mass_spectrum_by_classes .mz_error_all ()
539548
540- # Extract the error distribution from the Heteroatoms class that makes error plots
541- mass_spectrum_by_classes = HeteroatomsClassification (
542- mass_spectrum , choose_molecular_formula = False
543- )
544- # mzplot = mass_spectrum_by_classes.plot_mz_error()
545- # mzplot.set_title("SRFA ppm error vs. m/z")
546- # mzplot.get_legend().remove()
547- # scatter_plot = mzplot.collections[0]
548- # scatter_plot.set_sizes([1])
549- # plt.show()
549+ # Get percentiles of error to use as search error max/min
550+ # i.e. retain the middle x% (vertically) of the points on the error plot
551+ for ppm_range in ((0 , 18 ), (1 , 17 ), (2 , 16 ), (3 , 15 ), (4 , 14 ), (5 , 13 ), (6 , 12 ), (7 , 11 )):
550552
551- mz_error = mass_spectrum_by_classes .mz_error_all ()
552-
553- # Get 5th and 95th percentile of error to use as search error max/min
554- # i.e. retain the middle 90% (vertically) of the points on the error plot
555553 q = quantiles (mz_error , n = 20 )
556- search_error_boundaries = (q [0 ], q [18 ])
557-
554+ search_error_boundaries = (q [ppm_range [0 ]], q [ppm_range [1 ]]) # start at 0 to 18 for 5% and 95%
558555 err_bound_diff = search_error_boundaries [1 ] - search_error_boundaries [0 ]
556+ points_left = [i for i in mz_error if i > search_error_boundaries [0 ] and i < search_error_boundaries [1 ]]
557+
559558 print ("Search error boundaries: " , search_error_boundaries )
560559 print ("Search error diff: " , err_bound_diff )
560+ print ("Number of points left: " , len (points_left ))
561561
562+ # Exit loop once we get to reasonable error boundaries
562563 if err_bound_diff < 5 :
563564 break
564565
565- print (srfa_path , type (srfa_path ))
566-
567566 return (calib_error_boundaries , search_error_boundaries ), workflow_params .file_paths , Path (srfa_path )
568567
569568
0 commit comments