Skip to content

Commit f6909f7

Browse files
authored
121 the modified sinc smoother implementation (#213)
* Initial modified sinc smoother implemented * Base FIRfilter class added * Savitzky_golay_filter updated with FIRfilter * modified sinc smoother implemented using the FIR class * ran linting and formatting * Added authorship to files * Minor fixes to implementation of modified sinc smoother * fixed and changed the comments to add more explanation to the approach * Linting performed * Small corrections made before creating PR * added test for kappa corrections * Fixed type return from "Self" to "_BaseFIRFilter" * Added changes suggested during PR review
1 parent 82826e4 commit f6909f7

5 files changed

Lines changed: 477 additions & 77 deletions

File tree

chemotools/smooth/__init__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
from ._mean_filter import MeanFilter
22
from ._median_filter import MedianFilter
3+
from ._modified_sinc_smoother import ModifiedSincFilter
34
from ._savitzky_golay_filter import SavitzkyGolayFilter
45
from ._whittaker_smooth import WhittakerSmooth
56

6-
__all__ = ["MeanFilter", "MedianFilter", "SavitzkyGolayFilter", "WhittakerSmooth"]
7+
__all__ = [
8+
"MeanFilter",
9+
"MedianFilter",
10+
"ModifiedSincFilter",
11+
"SavitzkyGolayFilter",
12+
"WhittakerSmooth",
13+
]

chemotools/smooth/_base.py

Lines changed: 107 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# _base.py
2-
# Authors: Niklas Zell <nik.zoe@web.de>, Pau Cabaneros
2+
# Authors: Niklas Zell <nik.zoe@web.de>, Nusret Emirhan Salli <nusret.emirhan.salli@gmail.com>, Pau Cabaneros
33
# License: MIT
44

55
from __future__ import annotations
@@ -94,3 +94,109 @@ def _solve_whittaker(
9494
logger.debug("Primary solver failed (%s); fallback to sparse LU.", e)
9595
DtD = compute_DtD_sparse(len(x))
9696
return whittaker_smooth_sparse(x, w, self.lam, DtD)
97+
98+
99+
class _BaseFIRFilter(TransformerMixin, OneToOneFeatureMixin, BaseEstimator, ABC):
100+
"""
101+
Base class for linear-phase FIR smoothers.
102+
103+
Subclasses must implement `_compute_kernel(self) -> np.ndarray`
104+
returning a 1D symmetric kernel of odd length whose sum is 1.0.
105+
106+
Parameters
107+
----------
108+
window_size : int, odd >= 3
109+
Number of taps in the FIR kernel.
110+
mode : {"mirror","constant","nearest","wrap","interp"}, default="interp"
111+
Boundary handling. "interp" = linear extrapolation (recommended for MS). # Schmid et al.
112+
axis : int, default=1
113+
Axis along which to smooth for 2D inputs (rows × features). Use 1 to
114+
smooth along feature axis for each row.
115+
"""
116+
117+
def __init__(
118+
self,
119+
window_size: int = 21,
120+
mode: Literal["mirror", "constant", "nearest", "wrap", "interp"] = "interp",
121+
axis: int = 1,
122+
) -> None:
123+
self.window_size = window_size
124+
self.mode = mode
125+
self.axis = axis
126+
127+
# sklearn API
128+
def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> "_BaseFIRFilter":
129+
X = validate_data(
130+
self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
131+
)
132+
133+
if self.window_size < 3 or self.window_size % 2 == 0:
134+
raise ValueError("window_size must be an odd integer >= 3.")
135+
self.kernel_ = self._compute_kernel().astype(np.float64, copy=False)
136+
if self.kernel_.ndim != 1 or self.kernel_.size != self.window_size:
137+
raise ValueError("kernel must be 1D with length equal to window_size.")
138+
if not np.allclose(self.kernel_.sum(), 1.0, atol=1e-12):
139+
raise ValueError("kernel must be DC-preserving (sum == 1).")
140+
self._half_ = (self.window_size - 1) // 2
141+
return self
142+
143+
def transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndarray:
144+
check_is_fitted(self, "kernel_")
145+
X_ = validate_data(
146+
self,
147+
X,
148+
y="no_validation",
149+
ensure_2d=True,
150+
copy=True,
151+
reset=False,
152+
dtype=np.float64,
153+
)
154+
155+
# move smoothing axis to last, convolve row-wise, then move back
156+
ax = self.axis if self.axis >= 0 else X_.ndim + self.axis
157+
X_sw = np.moveaxis(X_, ax, -1)
158+
lead = int(np.prod(X_sw.shape[:-1])) or 1
159+
L = X_sw.shape[-1]
160+
Z = X_sw.reshape(lead, L)
161+
for i in range(lead):
162+
Z[i] = self._apply_filter_1d(Z[i])
163+
out = X_sw.reshape(*X_sw.shape)
164+
return np.moveaxis(out, -1, ax)
165+
166+
@abstractmethod
167+
def _compute_kernel(self) -> np.ndarray:
168+
"""
169+
Subclasses must implement this method to compute the convolution kernel.
170+
"""
171+
raise NotImplementedError
172+
173+
# --- shared convolution/padding ---
174+
def _apply_filter_1d(self, x: np.ndarray) -> np.ndarray:
175+
m = self._half_
176+
xp = self._pad_1d(x, m)
177+
return np.convolve(xp, self.kernel_, mode="valid") # same length as x
178+
179+
def _pad_1d(self, x: np.ndarray, m: int) -> np.ndarray:
180+
if m == 0:
181+
return x.copy()
182+
mode = self.mode
183+
if mode == "interp":
184+
# Linear extrapolation using boundary slopes (paper’s recommendation).
185+
if x.size < 2:
186+
left = np.repeat(x[0], m)
187+
right = np.repeat(x[-1], m)
188+
else:
189+
ls = x[1] - x[0]
190+
rs = x[-1] - x[-2]
191+
left = x[0] - ls * np.arange(m, 0, -1, dtype=np.float64)
192+
right = x[-1] + rs * np.arange(1, m + 1, dtype=np.float64)
193+
return np.concatenate([left, x, right], axis=0)
194+
if mode == "nearest":
195+
return np.pad(x, (m, m), mode="edge")
196+
if mode == "mirror":
197+
return np.pad(x, (m, m), mode="reflect") # mirror without repeating edge
198+
if mode == "wrap":
199+
return np.pad(x, (m, m), mode="wrap")
200+
if mode == "constant":
201+
return np.pad(x, (m, m), mode="constant", constant_values=(x[0], x[-1]))
202+
raise ValueError(f"Unknown mode='{mode}'")
Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
"""
2+
The :mod:chemotools.smooth._modified_sinc_smoother module implements the Modified Sinc Filter (SGF) transformation.
3+
"""
4+
5+
# Authors: Nusret Emirhan Salli <nusret.emirhan.salli@gmail.com>
6+
# License: MIT
7+
8+
from __future__ import annotations
9+
from typing import Literal
10+
import numpy as np
11+
12+
from ._base import _BaseFIRFilter
13+
14+
15+
class ModifiedSincFilter(_BaseFIRFilter):
16+
"""
17+
Modified Sinc smoothing (MS) for denoising while preserving peak positions based on the paper "Why and How Savitzky–Golay Filters Should Be Replaced."
18+
19+
The Modified Sinc smoother is a linear-phase FIR filter: the signal is
20+
convolved with a fixed, symmetric kernel. The kernel is built from:
21+
1) a sinc core with argument ((n + 4) / 2) * pi * x (Eq. 3, p. 187),
22+
2) a special Gaussian-like window w(x) whose value and slope vanish at
23+
the window ends (Eq. 4, p. 187), and
24+
3) small optional correction terms that flatten the passband so low
25+
frequencies are almost unattenuated (Eqs. 7–8 and Table 1, pp. 187–188).
26+
27+
Parameters
28+
----------
29+
window_size : int, default=21
30+
Odd number of taps (2*m + 1). Larger values give stronger smoothing.
31+
32+
n : int, default=6
33+
Even integer >= 2. Controls how many inner zeros the sinc has within
34+
the window. The paper discusses n = 2, 4, 6, 8, 10 (Fig. 2).
35+
36+
alpha : float, default=4.0
37+
Positive window width parameter. Larger alpha reduces side lobes more
38+
aggressively (steeper roll-off).
39+
40+
use_corrections : bool, default=True
41+
If True and n in {6, 8, 10} and the window is large enough, add the
42+
small passband-flattening terms from Eqs. 7–8 (coefficients from Table 1).
43+
44+
mode : {"mirror", "constant", "nearest", "wrap", "interp"}, default="interp"
45+
Boundary strategy, passed to the base FIR class. "interp" performs
46+
linear extrapolation (recommended in the paper).
47+
48+
axis : int, default=1
49+
Axis along which to smooth for 2D inputs (rows x features). Use 1 to
50+
smooth within each row.
51+
52+
Methods
53+
-------
54+
fit(X, y=None)
55+
Inherited from the base class. Validates input and builds the kernel.
56+
57+
transform(X, y=None)
58+
Inherited from the base class. Pads, convolves, and returns the smoothed data.
59+
60+
References
61+
----------
62+
[1] Schmid, M.; Rath, D.; Diebold, U. "Why and How Savitzky–Golay Filters Should Be Replaced."
63+
ACS measurement science Au 2022, 2 (2), 185-196.
64+
65+
Examples
66+
--------
67+
>>> from chemotools.smooth import ModifiedSincFilter
68+
>>> import numpy as np
69+
>>> X = np.array([[0.0, 1.0, 2.0, 1.0, 0.0]], dtype=float)
70+
>>> ms = ModifiedSincFilter(window_size= 9, n=6, alpha=4.0, mode="interp")
71+
>>> X_smooth = ms.fit_transform(X)
72+
"""
73+
74+
def __init__(
75+
self,
76+
window_size: int = 21,
77+
n: int = 6,
78+
alpha: float = 4.0,
79+
use_corrections: bool = True,
80+
mode: Literal["mirror", "constant", "nearest", "wrap", "interp"] = "interp",
81+
axis: int = 1,
82+
) -> None:
83+
super().__init__(window_size=window_size, mode=mode, axis=axis)
84+
self.n = n
85+
self.alpha = alpha
86+
self.use_corrections = use_corrections
87+
88+
def _compute_kernel(self) -> np.ndarray:
89+
"""
90+
Build the Modified Sinc kernel h[i] for i = -m ... m.
91+
92+
Implementation map to the paper:
93+
1) Map index to x = i / (m + 1) (Eq. 5).
94+
2) Base sinc: np.sinc(0.5 * (n + 4) * x) (Eq. 3).
95+
3) Solve a 3x3 system for the window coefficients so that
96+
w(0)=1, w(1)=0, w'(1)=0; then we form w(x) (Eq. 4).
97+
4) h = sinc * w.
98+
5) Optional corrections from based on Eqs. 7–8 (Table 1).
99+
6) Symmetrize and normalize so sum(h)=1 (Eq. 6).
100+
"""
101+
102+
# ---- parameter checks ----
103+
if self.n % 2 != 0 or self.n < 2:
104+
raise ValueError("n must be an even integer >= 2.")
105+
if self.alpha <= 0:
106+
raise ValueError("alpha must be positive.")
107+
108+
# ---- 1) Eq. 5: index -> normalized coordinate in [-1, 1] ----
109+
m = (self.window_size - 1) // 2
110+
i = np.arange(-m, m + 1, dtype=np.float64)
111+
x = i / (m + 1) if m >= 0 else np.array([0.0])
112+
113+
# ---- 2) Eq. 3: modified sinc core (note that numpy's sinc uses sin(pi*u)/(pi*u)) ----
114+
core = np.sinc(0.5 * (self.n + 4) * x)
115+
116+
# ---- 3) Eq. 4: window with w(0)=1, w(1)=0, w'(1)=0 ----
117+
# Precompute the exponentials appearing in those constraints.
118+
E1 = np.exp(
119+
-self.alpha * 1.0
120+
) # exp(-alpha * 1^2) at x = 1 (central Gaussian)
121+
Ep = np.exp(-self.alpha * 1.0) # exp(-alpha * (1-2)^2) = exp(-alpha) for (x-2)
122+
Em = np.exp(
123+
-self.alpha * 9.0
124+
) # exp(-alpha * (1+2)^2) = exp(-9*alpha) for (x+2)
125+
e4 = np.exp(-self.alpha * 4.0) # exp(-alpha * (±2)^2) at x = 0
126+
127+
# Linear system rows correspond to: w(0)=1, w(1)=0, w'(1)=0
128+
M = np.array(
129+
[
130+
[1.0, 2.0 * e4, 1.0],
131+
[E1, (Ep + Em), 1.0],
132+
[-2 * self.alpha * E1, 2 * self.alpha * (Ep - 3 * Em), 0.0],
133+
],
134+
dtype=np.float64,
135+
)
136+
# solve the system using linear algebra.
137+
rhs = np.array([1.0, 0.0, 0.0], dtype=np.float64)
138+
139+
Acoef, Bcoef, Ccoef = np.linalg.solve(M, rhs)
140+
141+
# Window values calculated
142+
window = (
143+
Acoef * np.exp(-self.alpha * x**2)
144+
+ Bcoef
145+
* (
146+
np.exp(-self.alpha * (x - 2.0) ** 2)
147+
+ np.exp(-self.alpha * (x + 2.0) ** 2)
148+
)
149+
+ Ccoef
150+
)
151+
152+
# ---- 4) base kernel = windowed sinc ----
153+
h = core * window
154+
155+
# ---- 5) Eqs. 7–8 + Table 1: optional passband-flattening corrections ----
156+
if (
157+
self.use_corrections
158+
and self._has_kappa_table(self.n)
159+
and (m >= self.n // 2 + 2)
160+
):
161+
# nu = 1 for n/2 odd (n=6,10); nu = 2 for n=8 (Eq. 7)
162+
nu = 1 if ((self.n // 2) % 2 == 1) else 2
163+
kappas = self._kappa_coeffs(
164+
self.n, m
165+
) # kappa = a + b / (c - m)^3 (Eq. 8; Table 1)
166+
corr = 0.0
167+
# add the correction term according to Eq. 7
168+
for j, kappa in enumerate(kappas):
169+
corr += kappa * window * x * np.sin((2 * j + nu) * np.pi * x)
170+
h = h + corr
171+
172+
# ---- 6) Eq. 6: Enforce symmetry and Direct Current = 1 ----
173+
h = 0.5 * (h + h[::-1]) # make it symmetric
174+
s = h.sum()
175+
if not np.isfinite(s) or abs(s) < 1e-15:
176+
raise FloatingPointError(
177+
"Kernel normalization failed; try different parameters."
178+
)
179+
h = h / s # Normalize so sum = 1
180+
return h
181+
182+
# ====== Table 1 (p. 188): kappa fit coefficients for Eq. 8 ======
183+
@staticmethod
184+
def _has_kappa_table(n: int) -> bool:
185+
# The paper provides corrections for n = 6, 8, 10
186+
return n in (6, 8, 10)
187+
188+
@staticmethod
189+
def _kappa_coeffs(n: int, m: int) -> np.ndarray:
190+
"""
191+
Return [kappa_0] for n=6; [kappa_0, kappa_1] for n=8 or 10.
192+
Formula: kappa_j^(n)(m) = a_j^(n) + b_j^(n) / (c_j^(n) - m)^3
193+
(Eq. 8, p. 188; coefficients from Table 1).
194+
"""
195+
if n == 6:
196+
ABC = [(0.00172, 0.02437, 1.64375)]
197+
elif n == 8:
198+
ABC = [(0.00440, 0.08821, 2.35938), (0.00615, 0.02472, 3.63594)]
199+
elif n == 10:
200+
ABC = [(0.00118, 0.04219, 2.74688), (0.00367, 0.12780, 2.77031)]
201+
else:
202+
return np.zeros(0, dtype=np.float64)
203+
return np.asarray([a + b / ((c - m) ** 3) for a, b, c in ABC], dtype=np.float64)

0 commit comments

Comments
 (0)