-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathplot_tsp_corrected_summary.py
More file actions
182 lines (133 loc) · 5.99 KB
/
plot_tsp_corrected_summary.py
File metadata and controls
182 lines (133 loc) · 5.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#!/usr/bin/env python3
"""
Create a summary comparison plot showing TSP-corrected spectra for all metabolites.
"""
import nmrglue as ng
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
from scipy.ndimage import gaussian_filter1d
from pathlib import Path
import re
def get_even_numbered_files(data_dir):
"""Get all even-numbered .dx files from the specified directory."""
dx_files = sorted(data_dir.glob("*.dx"),
key=lambda x: int(re.search(r'(\d+)', x.name).group()))
even_files = [f for f in dx_files
if int(re.search(r'(\d+)', f.name).group()) % 2 == 0]
return even_files
def read_nmr_data(file_path):
"""Read NMR data from JCAMP-DX file."""
dic, data_list = ng.jcampdx.read(str(file_path))
data_real = data_list[0]
data_imag = data_list[1] if len(data_list) > 1 else np.zeros_like(data_real)
data_magnitude = np.sqrt(data_real**2 + data_imag**2)
sfo1 = float(dic['$SFO1'][0])
o1_hz = float(dic['$O1'][0])
sw_hz = float(dic['$SWH'][0])
si = data_real.size
o1_ppm = o1_hz / sfo1
sw_ppm = sw_hz / sfo1
ppm = np.linspace(o1_ppm + sw_ppm/2, o1_ppm - sw_ppm/2, si)
return ppm, data_magnitude, sfo1
def find_tsp_peak(ppm, data, tsp_region=(-0.5, 0.5)):
"""Find the TSP peak position."""
mask = (ppm >= tsp_region[0]) & (ppm <= tsp_region[1])
if not np.any(mask):
return None
ppm_region = ppm[mask]
data_region = data[mask]
peak_idx = np.argmax(data_region)
peak_ppm = ppm_region[peak_idx]
peak_intensity = data_region[peak_idx]
noise = np.std(data_region[data_region < np.percentile(data_region, 50)])
sn_ratio = peak_intensity / noise if noise > 0 else 0
if sn_ratio < 10:
return None
return peak_ppm
def process_and_correct(file_path, sigma=2):
"""Process file and apply TSP correction."""
ppm, magnitude, sfo1 = read_nmr_data(file_path)
smoothed = gaussian_filter1d(magnitude, sigma=sigma)
tsp_ppm = find_tsp_peak(ppm, smoothed)
if tsp_ppm is None:
tsp_ppm = -0.0784
correction = -tsp_ppm
ppm_corrected = ppm + correction
return ppm_corrected, smoothed, correction
def create_summary_plot(base_dir, output_file='tsp_corrected_summary.png'):
"""Create a summary plot with all metabolites."""
folders = sorted([d for d in base_dir.iterdir() if d.is_dir()])
# Create a 4x4 grid (for up to 16 metabolites, we have 14)
n_rows = 4
n_cols = 4
fig, axes = plt.subplots(n_rows, n_cols, figsize=(20, 16))
axes = axes.flatten()
region = (0.9, 3.5)
for idx, folder in enumerate(folders):
ax = axes[idx]
folder_name = folder.name.replace('-Reference', '')
try:
even_files = get_even_numbered_files(folder)
# Use distinct colors
colors = plt.cm.tab10(np.linspace(0, 1, len(even_files)))
corrections = []
for i, file_path in enumerate(even_files):
ppm, intensity, corr = process_and_correct(file_path, sigma=2)
corrections.append(corr)
mask = (ppm >= region[0]) & (ppm <= region[1])
ax.plot(ppm[mask], intensity[mask], color=colors[i],
linewidth=1.2, alpha=0.7)
# Configure plot
ax.set_xlim(region[1], region[0]) # Reversed
ax.set_title(f'{folder_name}\n(corr: +{np.mean(corrections):.4f} ppm)',
fontsize=11, fontweight='bold')
# Major and minor ticks
ax.xaxis.set_major_locator(MultipleLocator(1.0)) # Every 1 ppm
ax.xaxis.set_minor_locator(MultipleLocator(0.05)) # Every 0.05 ppm
ax.tick_params(which='major', length=4, direction='out', labelsize=8)
ax.tick_params(which='minor', length=2, direction='out')
# Grid
ax.grid(True, which='major', alpha=0.2, linestyle='-')
ax.grid(True, which='minor', alpha=0.1, linestyle=':')
# Remove y-axis labels for cleaner look (except leftmost column)
if idx % n_cols != 0:
ax.set_yticklabels([])
# Remove x-axis labels for top rows (except bottom row)
if idx < n_cols * (n_rows - 1):
ax.set_xticklabels([])
except Exception as e:
ax.text(0.5, 0.5, f'Error:\n{str(e)[:30]}',
transform=ax.transAxes, ha='center', va='center',
fontsize=9, color='red')
ax.set_title(folder_name, fontsize=11, fontweight='bold')
# Hide unused subplots
for idx in range(len(folders), n_rows * n_cols):
axes[idx].axis('off')
# Overall title
fig.suptitle('TSP-Corrected ¹H NMR Spectra - All Metabolites (0.9-3.5 ppm)\n'
'Minor ticks: 0.05 ppm | Gaussian smoothing (σ=2)',
fontsize=16, fontweight='bold', y=1.00)
# Common labels
fig.text(0.5, -0.01, 'Chemical Shift (ppm)', ha='center',
fontsize=14, fontweight='bold')
fig.text(-0.01, 0.5, 'Intensity (a.u.)', va='center', rotation='vertical',
fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig(output_file, dpi=200, bbox_inches='tight')
print(f"✅ Summary plot saved to: {output_file}")
plt.close()
def main():
BASE_DIR = Path("/home/tjiang/elise/pure_shift_nmr/new/raw_data/"
"Reference_Raw_Date_JCAMP-DX")
OUTPUT_FILE = "/home/tjiang/elise/pure_shift_nmr/new/tsp_corrected_spectra/" \
"all_metabolites_summary.png"
print("=" * 70)
print("Creating TSP-corrected summary plot for all metabolites")
print("=" * 70)
create_summary_plot(BASE_DIR, OUTPUT_FILE)
print("\nDone!")
if __name__ == "__main__":
main()