-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathplot_alanine_magnitude_all.py
More file actions
213 lines (171 loc) · 6.8 KB
/
plot_alanine_magnitude_all.py
File metadata and controls
213 lines (171 loc) · 6.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
"""
Plot magnitude spectra (sqrt(Re² + Im²)) for all alanine files
This gives all positive signals regardless of phase
"""
import nmrglue as ng
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
def read_jcamp_full_spectrum(filepath):
"""Read JCAMP-DX file and return full complex spectrum with ppm scale"""
dic, data = ng.jcampdx.read(filepath)
# Get parameters
npts = len(data)
sw = float(dic['$SWH'][0]) # Sweep width in Hz
sf = float(dic['$SFO1'][0]) # Spectrometer frequency in MHz
offset_hz = float(dic['$OFFSET'][0]) # Offset in Hz
# Calculate ppm scale
# OFFSET is the frequency at the rightmost point (highest ppm for 1H)
offset_ppm = offset_hz / sf
# Create ppm axis (high to low)
hz_per_pt = sw / npts
ppm_per_pt = hz_per_pt / sf
# ppm axis from high to low
ppm = offset_ppm + np.arange(npts) * ppm_per_pt
return ppm, data, dic
def calculate_magnitude_spectrum(data):
"""Calculate magnitude spectrum from complex data"""
# data is complex: real + j*imag
# Magnitude = sqrt(real² + imag²)
return np.abs(data)
# Find all alanine files
raw_data_dir = Path('raw_data/Reference_Raw_Date_JCAMP-DX/Alanine-Reference')
alanine_files = sorted(raw_data_dir.glob('*.dx'))
print(f"Found {len(alanine_files)} alanine files")
for f in alanine_files:
print(f" - {f.name}")
# Create figure with subplots - use even-numbered files only for main quantification
# The odd files (11, 21, etc.) might be duplicates or different acquisitions
main_files = [f for f in alanine_files if int(f.stem) % 10 == 0] # 10, 20, 30, 40, 50, 60, 70
print(f"\nUsing {len(main_files)} main files: {[f.stem for f in main_files]}")
n_files = len(main_files)
n_cols = 3
n_rows = (n_files + n_cols - 1) // n_cols
fig, axes = plt.subplots(n_rows, n_cols, figsize=(18, 4*n_rows))
if n_files == 1:
axes = np.array([axes])
else:
axes = axes.flatten()
colors = plt.cm.tab10(np.linspace(0, 1, n_files))
# Store all data for summary comparison
all_magnitude_data = []
all_ppm = None
file_labels = []
for idx, filepath in enumerate(main_files):
ax = axes[idx]
try:
# Read file
ppm, data, dic = read_jcamp_full_spectrum(filepath)
# Calculate magnitude spectrum
magnitude = calculate_magnitude_spectrum(data)
# Store data
all_magnitude_data.append(magnitude)
if all_ppm is None:
all_ppm = ppm
file_num = filepath.stem
label = f"File {file_num}"
file_labels.append(label)
# Plot
ax.plot(ppm, magnitude, color=colors[idx], linewidth=0.8)
ax.set_xlim(-1, 5) # Focus on main region
ax.set_xlabel('Chemical Shift (ppm)', fontsize=9)
ax.set_ylabel('Magnitude', fontsize=9)
ax.set_title(f'{label} - Magnitude Spectrum', fontsize=10)
ax.grid(True, alpha=0.3)
# Add TSP annotation
tsp_region = (ppm > -0.1) & (ppm < 0.1)
if np.any(tsp_region):
tsp_max = np.max(magnitude[tsp_region])
ax.annotate(f'TSP: {tsp_max:.2e}', xy=(0.02, 0.95), xycoords='axes fraction',
fontsize=8, ha='left', va='top')
print(f"✓ {label}: max magnitude = {np.max(magnitude):.3e}")
except Exception as e:
print(f"✗ Error with {filepath}: {e}")
ax.text(0.5, 0.5, f'Error:\n{e}', ha='center', va='center', transform=ax.transAxes)
ax.set_title(f'{filepath.name}', fontsize=10)
# Hide unused subplots
for idx in range(n_files, len(axes)):
axes[idx].set_visible(False)
plt.tight_layout()
plt.savefig('alanine_magnitude_all_files.png', dpi=150, bbox_inches='tight')
print(f"\nSaved: alanine_magnitude_all_files.png")
plt.close()
# Now create a comparison plot of all files overlaid
fig, axes = plt.subplots(2, 1, figsize=(14, 10))
# Full spectrum comparison
ax1 = axes[0]
for idx, (magnitude, label) in enumerate(zip(all_magnitude_data, file_labels)):
# Normalize by first file for comparison
if idx == 0:
ref_max = np.max(magnitude)
normalized = magnitude / ref_max
ax1.plot(all_ppm, normalized, color=colors[idx], linewidth=0.8, label=label.split()[-1])
ax1.set_xlim(-1, 5)
ax1.set_xlabel('Chemical Shift (ppm)', fontsize=11)
ax1.set_ylabel('Normalized Magnitude', fontsize=11)
ax1.set_title('All Alanine Files - Magnitude Spectra (Normalized to File 10)', fontsize=12)
ax1.legend(loc='upper right', fontsize=8, ncol=3, title='File #')
ax1.grid(True, alpha=0.3)
# TSP region zoom
ax2 = axes[1]
for idx, (magnitude, label) in enumerate(zip(all_magnitude_data, file_labels)):
normalized = magnitude / ref_max
ax2.plot(all_ppm, normalized, color=colors[idx], linewidth=1.5, label=label.split()[-1])
ax2.set_xlim(-0.2, 0.2)
ax2.set_xlabel('Chemical Shift (ppm)', fontsize=11)
ax2.set_ylabel('Normalized Magnitude', fontsize=11)
ax2.set_title('TSP Region - Magnitude Spectra (Normalized)', fontsize=12)
ax2.legend(loc='upper right', fontsize=8, ncol=3, title='File #')
ax2.grid(True, alpha=0.3)
ax2.axvline(x=0, color='k', linestyle='--', alpha=0.5, label='0 ppm')
plt.tight_layout()
plt.savefig('alanine_magnitude_comparison.png', dpi=150, bbox_inches='tight')
print(f"Saved: alanine_magnitude_comparison.png")
plt.close()
# Print TSP intensity comparison table
print("\n" + "="*60)
print("TSP Intensity Comparison (Magnitude Mode)")
print("="*60)
print(f"{'File':<10} {'TSP Max':<15} {'Ratio vs File 10':<15}")
print("-"*60)
tsp_values = []
for magnitude, label in zip(all_magnitude_data, file_labels):
file_num = label.split()[-1]
tsp_region = (all_ppm > -0.1) & (all_ppm < 0.1)
tsp_max = np.max(magnitude[tsp_region])
tsp_values.append((file_num, tsp_max))
# Find File 10 reference
ref_tsp = None
for file_num, tsp_max in tsp_values:
if file_num == '10':
ref_tsp = tsp_max
break
if ref_tsp is None:
ref_tsp = tsp_values[0][1]
for file_num, tsp_max in tsp_values:
ratio = tsp_max / ref_tsp
print(f"File {file_num:<5} {tsp_max:<15.3e} {ratio:<15.2f}")
print("="*60)
# Calculate Scale(TSP) for each file
print("\n" + "="*60)
print("Calculated Scale(TSP) = TSP_sample / TSP_reference")
print("="*60)
print(f"{'File':<10} {'Scale(TSP)':<15} {'Expected (Table S5)':<20}")
print("-"*60)
# File 10 is reference (Scale = 1.0)
expected_scales = {
'10': 1.00,
'20': 0.98,
'30': 0.98,
'40': 0.98,
'50': 0.97,
'60': 0.98,
'70': 0.96,
}
for file_num, tsp_max in tsp_values:
scale_tsp = tsp_max / ref_tsp
expected = expected_scales.get(file_num, 'N/A')
print(f"File {file_num:<5} {scale_tsp:<15.3f} {expected:<20}")
print("="*60)
print(f"\nReference TSP magnitude (File 10): {ref_tsp:.3e}")
print(f"Range of Scale(TSP): {min([v[1] for v in tsp_values])/ref_tsp:.2f} to {max([v[1] for v in tsp_values])/ref_tsp:.2f}")