11from dataclasses import dataclass
2+ from collections import deque
23from typing import Iterable , List , Tuple
34
45import numpy as np
@@ -71,6 +72,21 @@ def deisotope(self, peaks: Iterable[Tuple[float, float]]) -> List[IsotopeCluster
7172 return clusters
7273
7374 def _guess_charge (self , mzs : np .ndarray , mz : float , idx : int ) -> int :
75+ # Prefer charge assignments that match the 13C spacing.
76+ best_charge = None
77+ best_error = None
78+ for charge in range (1 , self .max_charge + 1 ):
79+ target = mz + C13_MZ_DIFF / charge
80+ match_idx = self ._find_peak (mzs , target , idx + 1 )
81+ if match_idx is None :
82+ continue
83+ error = self ._ppm_error (mzs [match_idx ], target )
84+ if best_error is None or error < best_error :
85+ best_error = error
86+ best_charge = charge
87+ if best_charge is not None :
88+ return best_charge
89+
7490 best_charge = 1
7591 best_match = None
7692 for charge in range (1 , self .max_charge + 1 ):
@@ -86,25 +102,33 @@ def _guess_charge(self, mzs: np.ndarray, mz: float, idx: int) -> int:
86102 def _grow_cluster (
87103 self , mzs : np .ndarray , intensities : np .ndarray , start_idx : int , charge : int
88104 ) -> List [int ]:
89- cluster_indices = [start_idx ]
90- isotope_idx = 1
91- prev_intensity = intensities [start_idx ]
92- while True :
93- match = self ._find_isotope_peak (
94- mzs , cluster_indices [- 1 ] + 1 , mzs [start_idx ], charge , isotope_idx
95- )
96- if match is None :
97- break
98- match_idx , match_diff , _ = match
99- max_increase = self .max_relative_intensity_increase
100- if match_diff >= self .heavy_isotope_threshold :
101- max_increase = self .max_relative_intensity_increase_heavy
102- if intensities [match_idx ] > prev_intensity * max_increase :
103- break
104- cluster_indices .append (match_idx )
105- prev_intensity = intensities [match_idx ]
106- isotope_idx += 1
107- return cluster_indices
105+ # Build a connected component of peaks linked by any single-isotope mass
106+ # difference. This avoids splitting fine-structure isotope patterns into
107+ # multiple clusters.
108+ cluster = {start_idx }
109+ queue = deque ([start_idx ])
110+
111+ while queue :
112+ current_idx = queue .popleft ()
113+ current_mz = mzs [current_idx ]
114+ current_intensity = intensities [current_idx ]
115+
116+ for diff in self .isotope_mass_diffs :
117+ target = current_mz + diff / charge
118+ match_idx = self ._find_peak (mzs , target , current_idx + 1 )
119+ if match_idx is None or match_idx in cluster :
120+ continue
121+
122+ max_increase = self .max_relative_intensity_increase
123+ if diff >= self .heavy_isotope_threshold :
124+ max_increase = self .max_relative_intensity_increase_heavy
125+ if intensities [match_idx ] > current_intensity * max_increase :
126+ continue
127+
128+ cluster .add (match_idx )
129+ queue .append (match_idx )
130+
131+ return sorted (cluster )
108132
109133 def _find_isotope_peak (
110134 self , mzs : np .ndarray , start_idx : int , base_mz : float , charge : int , isotope_idx : int
@@ -132,21 +156,29 @@ def _find_peak(self, mzs: np.ndarray, target: float, start_idx: int) -> int | No
132156 else :
133157 right = mid - 1
134158 candidates = []
135- for idx in (left - 1 , left , left + 1 ):
136- if 0 <= idx < len (mzs ):
159+ for idx in (left - 2 , left - 1 , left , left + 1 , left + 2 ):
160+ if start_idx <= idx < len (mzs ):
137161 candidates .append (idx )
162+
163+ best_idx = None
164+ best_error = None
138165 for idx in candidates :
139- if self ._ppm_error (mzs [idx ], target ) <= self .ppm_tolerance :
140- return idx
141- return None
166+ error = self ._ppm_error (mzs [idx ], target )
167+ if error > self .ppm_tolerance :
168+ continue
169+ if best_error is None or error < best_error :
170+ best_error = error
171+ best_idx = idx
172+
173+ return best_idx
142174
143175 @staticmethod
144176 def _ppm_error (mz : float , target : float ) -> float :
145177 return abs (mz - target ) / target * 1e6
146178
147179 @staticmethod
148180 def _default_isotope_mass_diffs (
149- min_abundance : float = 0.0005 , max_shift : float = 4.0
181+ min_abundance : float = 0.0001 , max_shift : float = 4.0
150182 ) -> Tuple [float , ...]:
151183 diffs = {round (C13_MZ_DIFF , 6 )}
152184 for isotopes in NATURAL_ISOTOPES .values ():
0 commit comments