-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathquantify_mixture_file10.py
More file actions
221 lines (180 loc) · 7.75 KB
/
quantify_mixture_file10.py
File metadata and controls
221 lines (180 loc) · 7.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#!/usr/bin/env python3
"""
Quantify metabolites in Model Mixture File 10 (pure shift)
"""
import sys
sys.path.insert(0, '.')
import os
import numpy as np
import matplotlib.pyplot as plt
from lmfit.models import LorentzianModel, ConstantModel
from quantify_all_metabolites_v3_ref20 import (
read_and_process, find_tsp_peak, integrate_peak,
detect_peaks_in_region, fit_region
)
def quantify_mixture_file10():
folder_path = "raw_data/Model_Mixtures-M1-M6_JCAMP-DX"
filename = "10.dx"
print("="*60)
print(f"Quantifying Model Mixture: {filename}")
print("="*60)
# Load spectrum
ppm, spec = read_and_process(os.path.join(folder_path, filename))
tsp = find_tsp_peak(ppm, spec)
ppm_corr = ppm - tsp
# TSP reference (using internal TSP)
tsp_area = integrate_peak(ppm_corr, spec, (-0.2, 0.2))
print(f"\nTSP correction: +{-tsp:.4f} ppm")
print(f"TSP integrated area: {tsp_area:.2e}")
# Normalize spectrum by TSP for fitting
spec_norm = spec / tsp_area
# Define metabolite regions based on known chemical shifts
# Using our validated peak positions from reference samples
metabolite_regions = {
'Alanine': {
'regions': [(1.40, 1.55), (3.70, 3.85)],
'peaks': [[1.48], [3.79]],
'protons': [3, 1],
},
'Lactate': {
'regions': [(1.25, 1.40), (4.05, 4.15)],
'peaks': [[1.33], [4.12]],
'protons': [3, 1],
},
'Glucose': {
'regions': [(5.15, 5.35)], # Anomeric H-1
'peaks': [[5.24]],
'protons': [1],
},
'Valine': {
'regions': [(0.90, 1.15), (2.25, 2.35)],
'peaks': [[0.99, 1.04], [2.28]],
'protons': [6, 1],
},
'Leucine': {
'regions': [(0.90, 1.05), (1.55, 1.75)],
'peaks': [[0.97, 0.98], [1.70]],
'protons': [6, 2],
},
'Glutamate': {
'regions': [(2.05, 2.50)],
'peaks': [[2.14, 2.42]],
'protons': [4],
},
'Asparagine': {
'regions': [(2.65, 3.00), (3.85, 4.10)],
'peaks': [[2.75, 2.85], [3.98]],
'protons': [2, 1],
},
}
# Load reference data (File 20 from each reference folder)
ref_base = "raw_data/Reference_Raw_Date_JCAMP-DX"
results = {}
for met_name, met_info in metabolite_regions.items():
print(f"\n{'='*60}")
print(f"Analyzing: {met_name}")
print(f"{'='*60}")
# Get reference
ref_folder = os.path.join(ref_base, f"{met_name}-Reference")
if not os.path.exists(ref_folder):
print(f" Reference folder not found: {ref_folder}")
continue
ref_file = os.path.join(ref_folder, "20.dx")
if not os.path.exists(ref_file):
print(f" Reference file not found: {ref_file}")
continue
try:
# Load reference
ppm_ref, spec_ref = read_and_process(ref_file)
tsp_ref = find_tsp_peak(ppm_ref, spec_ref)
ppm_ref_corr = ppm_ref - tsp_ref
tsp_area_ref = integrate_peak(ppm_ref_corr, spec_ref, (-0.2, 0.2))
spec_ref_norm = spec_ref / tsp_area_ref
# Fit each region
region_results = []
for i, (region, peaks, protons) in enumerate(zip(
met_info['regions'], met_info['peaks'], met_info['protons'])):
print(f"\n Region {i+1}: {region} ppm, {protons} protons")
# Reference fit
mask_ref = (ppm_ref_corr >= region[0]) & (ppm_ref_corr <= region[1])
x_ref = ppm_ref_corr[mask_ref]
y_ref = spec_ref_norm[mask_ref]
n_peaks = len(peaks)
result_ref, _ = fit_region(x_ref, y_ref, peaks, n_peaks)
# Get reference amplitude
if n_peaks == 1:
ref_amp = result_ref.params['amplitude'].value
else:
ref_amp = sum([result_ref.params[f'p{j}_amplitude'].value
for j in range(1, n_peaks+1)])
ref_sigma = (result_ref.params['sigma'].value if n_peaks == 1
else result_ref.params['p1_sigma'].value)
print(f" Ref amplitude: {ref_amp:.4f}")
# Sample fit
mask_samp = (ppm_corr >= region[0]) & (ppm_corr <= region[1])
x_samp = ppm_corr[mask_samp]
y_samp = spec_norm[mask_samp]
if len(x_samp) == 0 or ref_amp < 1e-10:
continue
# Detect peaks
detected = detect_peaks_in_region(
ppm_corr, spec_norm, region, peaks, n_peaks
)
# Fit sample
result_samp, _ = fit_region(x_samp, y_samp, detected, n_peaks, ref_sigma)
# Get sample amplitude
if n_peaks == 1:
samp_amp = result_samp.params['amplitude'].value
else:
samp_amp = sum([result_samp.params[f'p{j}_amplitude'].value
for j in range(1, n_peaks+1)])
print(f" Sample amplitude: {samp_amp:.4f}")
# Calculate scale and concentration estimate
scale = samp_amp / ref_amp
# Get reference concentration from File 20
# For now, use a placeholder - you would get this from the actual reference data
# Looking at typical reference concentrations
ref_conc_dict = {
'Alanine': 20.32, # File 20
'Lactate': 10.16,
'Glucose': 51.57,
'Valine': 2.60,
'Leucine': 1.37,
'Glutamate': 5.48,
'Asparagine': 7.50,
}
ref_conc = ref_conc_dict.get(met_name, 10.0)
calc_conc = ref_conc * scale
print(f" Scale: {scale:.4f}")
print(f" Calculated conc: {calc_conc:.2f} mM")
region_results.append({
'region': i+1,
'calc_conc': calc_conc,
'protons': protons,
'scale': scale,
})
# Calculate weighted average across regions
if region_results:
weighted_sum = sum(r['calc_conc'] * r['protons'] for r in region_results)
total_protons = sum(r['protons'] for r in region_results)
avg_conc = weighted_sum / total_protons
print(f"\n {'='*40}")
print(f" Weighted average concentration: {avg_conc:.2f} mM")
print(f" {'='*40}")
results[met_name] = {
'concentration': avg_conc,
'regions': region_results,
}
except Exception as e:
print(f" Error: {e}")
import traceback
traceback.print_exc()
# Summary
print(f"\n{'='*60}")
print("SUMMARY - Quantified Metabolites in File 10")
print(f"{'='*60}")
for met_name, result in sorted(results.items()):
print(f"{met_name:15s}: {result['concentration']:6.2f} mM")
return results
if __name__ == "__main__":
results = quantify_mixture_file10()