Skip to content

Commit 15be0cb

Browse files
committed
Fix deisotoping fine-structure matching
1 parent 36ddc49 commit 15be0cb

1 file changed

Lines changed: 57 additions & 25 deletions

File tree

vimms/Deisotoping.py

Lines changed: 57 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from dataclasses import dataclass
2+
from collections import deque
23
from typing import Iterable, List, Tuple
34

45
import numpy as np
@@ -71,6 +72,21 @@ def deisotope(self, peaks: Iterable[Tuple[float, float]]) -> List[IsotopeCluster
7172
return clusters
7273

7374
def _guess_charge(self, mzs: np.ndarray, mz: float, idx: int) -> int:
75+
# Prefer charge assignments that match the 13C spacing.
76+
best_charge = None
77+
best_error = None
78+
for charge in range(1, self.max_charge + 1):
79+
target = mz + C13_MZ_DIFF / charge
80+
match_idx = self._find_peak(mzs, target, idx + 1)
81+
if match_idx is None:
82+
continue
83+
error = self._ppm_error(mzs[match_idx], target)
84+
if best_error is None or error < best_error:
85+
best_error = error
86+
best_charge = charge
87+
if best_charge is not None:
88+
return best_charge
89+
7490
best_charge = 1
7591
best_match = None
7692
for charge in range(1, self.max_charge + 1):
@@ -86,25 +102,33 @@ def _guess_charge(self, mzs: np.ndarray, mz: float, idx: int) -> int:
86102
def _grow_cluster(
87103
self, mzs: np.ndarray, intensities: np.ndarray, start_idx: int, charge: int
88104
) -> List[int]:
89-
cluster_indices = [start_idx]
90-
isotope_idx = 1
91-
prev_intensity = intensities[start_idx]
92-
while True:
93-
match = self._find_isotope_peak(
94-
mzs, cluster_indices[-1] + 1, mzs[start_idx], charge, isotope_idx
95-
)
96-
if match is None:
97-
break
98-
match_idx, match_diff, _ = match
99-
max_increase = self.max_relative_intensity_increase
100-
if match_diff >= self.heavy_isotope_threshold:
101-
max_increase = self.max_relative_intensity_increase_heavy
102-
if intensities[match_idx] > prev_intensity * max_increase:
103-
break
104-
cluster_indices.append(match_idx)
105-
prev_intensity = intensities[match_idx]
106-
isotope_idx += 1
107-
return cluster_indices
105+
# Build a connected component of peaks linked by any single-isotope mass
106+
# difference. This avoids splitting fine-structure isotope patterns into
107+
# multiple clusters.
108+
cluster = {start_idx}
109+
queue = deque([start_idx])
110+
111+
while queue:
112+
current_idx = queue.popleft()
113+
current_mz = mzs[current_idx]
114+
current_intensity = intensities[current_idx]
115+
116+
for diff in self.isotope_mass_diffs:
117+
target = current_mz + diff / charge
118+
match_idx = self._find_peak(mzs, target, current_idx + 1)
119+
if match_idx is None or match_idx in cluster:
120+
continue
121+
122+
max_increase = self.max_relative_intensity_increase
123+
if diff >= self.heavy_isotope_threshold:
124+
max_increase = self.max_relative_intensity_increase_heavy
125+
if intensities[match_idx] > current_intensity * max_increase:
126+
continue
127+
128+
cluster.add(match_idx)
129+
queue.append(match_idx)
130+
131+
return sorted(cluster)
108132

109133
def _find_isotope_peak(
110134
self, mzs: np.ndarray, start_idx: int, base_mz: float, charge: int, isotope_idx: int
@@ -132,21 +156,29 @@ def _find_peak(self, mzs: np.ndarray, target: float, start_idx: int) -> int | No
132156
else:
133157
right = mid - 1
134158
candidates = []
135-
for idx in (left - 1, left, left + 1):
136-
if 0 <= idx < len(mzs):
159+
for idx in (left - 2, left - 1, left, left + 1, left + 2):
160+
if start_idx <= idx < len(mzs):
137161
candidates.append(idx)
162+
163+
best_idx = None
164+
best_error = None
138165
for idx in candidates:
139-
if self._ppm_error(mzs[idx], target) <= self.ppm_tolerance:
140-
return idx
141-
return None
166+
error = self._ppm_error(mzs[idx], target)
167+
if error > self.ppm_tolerance:
168+
continue
169+
if best_error is None or error < best_error:
170+
best_error = error
171+
best_idx = idx
172+
173+
return best_idx
142174

143175
@staticmethod
144176
def _ppm_error(mz: float, target: float) -> float:
145177
return abs(mz - target) / target * 1e6
146178

147179
@staticmethod
148180
def _default_isotope_mass_diffs(
149-
min_abundance: float = 0.0005, max_shift: float = 4.0
181+
min_abundance: float = 0.0001, max_shift: float = 4.0
150182
) -> Tuple[float, ...]:
151183
diffs = {round(C13_MZ_DIFF, 6)}
152184
for isotopes in NATURAL_ISOTOPES.values():

0 commit comments

Comments
 (0)