Skip to content

Commit b81bdce

Browse files
committed
fixed iterative search boundaries
1 parent 7043184 commit b81bdce

1 file changed

Lines changed: 33 additions & 34 deletions

File tree

enviroMS/diWorkflow.py

Lines changed: 33 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -514,56 +514,55 @@ def find_calibration_for_batch(workflow_params):
514514
# calib_error_boundaries = (mass_spectrum.settings.min_calib_ppm_error, mass_spectrum.settings.max_calib_ppm_error)
515515
# search_error_boundaries = (mass_spectrum.molecular_search_settings.min_ppm_error, mass_spectrum.molecular_search_settings.max_ppm_error)
516516

517-
for ppm_range in (15, 11, 9, 7, 5):
517+
# Get calibration error bounds based on standard (usually SRFA)
518+
calib_error_boundaries = HighResRecalibration(
519+
mass_spectrum, plot=True,
520+
docker = False, #ppmRangeprior = ppm_range
521+
).determine_error_boundaries()[2]
518522

519-
print("ppm range: ", ppm_range)
523+
print("Boundaries from auto recalib: ", calib_error_boundaries)
520524

521-
# Get calibration error bounds based on standard (usually SRFA)
522-
calib_error_boundaries = HighResRecalibration(
523-
mass_spectrum, plot=True,
524-
docker = False, ppmRangeprior = ppm_range
525-
).determine_error_boundaries()[2]
525+
# Overwrite the min and max error tolerances with the values from calibration
526+
mass_spectrum.settings.max_calib_ppm_error = max(calib_error_boundaries)
527+
mass_spectrum.settings.min_calib_ppm_error = min(calib_error_boundaries)
526528

527-
print("Boundaries from auto recalib: ", calib_error_boundaries)
529+
# Use new settings to calibrate
530+
ref_file_location = Path(workflow_params.calibration_ref_file_path)
531+
MzDomainCalibration(mass_spectrum, ref_file_location).run()
528532

529-
# Overwrite the min and max error tolerances with the values from calibration
530-
mass_spectrum.settings.max_calib_ppm_error = max(calib_error_boundaries)
531-
mass_spectrum.settings.min_calib_ppm_error = min(calib_error_boundaries)
533+
# Identify molecular formulae
534+
SearchMolecularFormulas(mass_spectrum, first_hit=False).run_worker_mass_spectrum()
532535

533-
# Use new settings to calibrate
534-
ref_file_location = Path(workflow_params.calibration_ref_file_path)
535-
MzDomainCalibration(mass_spectrum, ref_file_location).run()
536+
# Extract the error distribution from the Heteroatoms class that makes error plots
537+
mass_spectrum_by_classes = HeteroatomsClassification(
538+
mass_spectrum, choose_molecular_formula=False
539+
)
540+
mzplot = mass_spectrum_by_classes.plot_mz_error()
541+
mzplot.set_title("SRFA ppm error vs. m/z")
542+
mzplot.get_legend().remove()
543+
scatter_plot = mzplot.collections[0]
544+
scatter_plot.set_sizes([1])
545+
plt.savefig(Path(workflow_params.output_directory) / "mz.png")
536546

537-
# Identify molecular formulae
538-
SearchMolecularFormulas(mass_spectrum, first_hit=False).run_worker_mass_spectrum()
547+
mz_error = mass_spectrum_by_classes.mz_error_all()
539548

540-
# Extract the error distribution from the Heteroatoms class that makes error plots
541-
mass_spectrum_by_classes = HeteroatomsClassification(
542-
mass_spectrum, choose_molecular_formula=False
543-
)
544-
# mzplot = mass_spectrum_by_classes.plot_mz_error()
545-
# mzplot.set_title("SRFA ppm error vs. m/z")
546-
# mzplot.get_legend().remove()
547-
# scatter_plot = mzplot.collections[0]
548-
# scatter_plot.set_sizes([1])
549-
# plt.show()
549+
# Get percentiles of error to use as search error max/min
550+
# i.e. retain the middle x% (vertically) of the points on the error plot
551+
for ppm_range in ((0, 18), (1, 17), (2, 16), (3, 15), (4, 14), (5, 13), (6, 12), (7, 11)):
550552

551-
mz_error = mass_spectrum_by_classes.mz_error_all()
552-
553-
# Get 5th and 95th percentile of error to use as search error max/min
554-
# i.e. retain the middle 90% (vertically) of the points on the error plot
555553
q = quantiles(mz_error, n = 20)
556-
search_error_boundaries = (q[0], q[18])
557-
554+
search_error_boundaries = (q[ppm_range[0]], q[ppm_range[1]]) # start at 0 to 18 for 5% and 95%
558555
err_bound_diff = search_error_boundaries[1] - search_error_boundaries[0]
556+
points_left = [i for i in mz_error if i > search_error_boundaries[0] and i < search_error_boundaries[1]]
557+
559558
print("Search error boundaries: ", search_error_boundaries)
560559
print("Search error diff: ", err_bound_diff)
560+
print("Number of points left: ", len(points_left))
561561

562+
# Exit loop once we get to reasonable error boundaries
562563
if err_bound_diff < 5:
563564
break
564565

565-
print(srfa_path, type(srfa_path))
566-
567566
return (calib_error_boundaries, search_error_boundaries), workflow_params.file_paths, Path(srfa_path)
568567

569568

0 commit comments

Comments
 (0)