Skip to content

Commit 3aba45e

Browse files
authored
Merge pull request #4 from seyong92/implement_median_filter
Implement median filter
2 parents a6c0674 + b759af5 commit 3aba45e

4 files changed

Lines changed: 84 additions & 21 deletions

File tree

.github/workflows/python-package.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
strategy:
1717
fail-fast: false
1818
matrix:
19-
python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"]
19+
python-version: ["3.8", "3.9", "3.10"]
2020

2121
steps:
2222
- uses: actions/checkout@v3

pyproject.toml

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,15 @@ license = "GPL-3.0"
88
readme = "README.md"
99

1010
[tool.poetry.dependencies]
11-
python = "^3.6"
12-
numpy = "^1.16.0"
13-
scipy = "^1.0.0"
14-
soundfile = "^0.10.0"
15-
librosa = "^0.8"
11+
python = ">=3.8,<3.11"
12+
numpy = "^1.20"
13+
scipy = "^1.8"
14+
soundfile = "^0.10"
1615

1716
[tool.poetry.dev-dependencies]
18-
pytest = "^5.2"
19-
flake8 = "^3.8.3"
20-
numpydoc = "^1.0.0"
17+
pytest = "^7.0"
18+
flake8 = "^4.0.1"
19+
numpydoc = "^1.0"
2120

2221
[tool.poetry.scripts]
2322
tsmod = 'pytsmod.console:run'

pytsmod/hptsm.py

Lines changed: 75 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
from librosa.effects import hpss
1+
from scipy.ndimage import median_filter
22
import numpy as np
33
from .pvtsm import phase_vocoder
44
from .olatsm import ola
5-
from .utils import _validate_audio
5+
from .utils import _validate_audio, stft, istft
66

77

8-
def hptsm(x, s, hp_kernel_size=31, hp_power=2.0, hp_mask=False, hp_margin=1.0,
8+
def hptsm(x, s, hp_len_harm=10, hp_len_perc=10, hp_mask_mode='binary', hp_win_type='hann',
9+
hp_win_size=1024, hp_hop_size=256, hp_zero_pad=0, hp_fft_shift=False,
910
pv_win_type='hann', pv_win_size=2048, pv_syn_hop_size=512,
1011
pv_zero_pad=0, pv_restore_energy=False, pv_fft_shift=False,
1112
pv_phase_lock=True, ola_win_type='hann',
@@ -35,15 +36,10 @@ def hptsm(x, s, hp_kernel_size=31, hp_power=2.0, hp_mask=False, hp_margin=1.0,
3536
the modified output audio sequence.
3637
"""
3738
x = _validate_audio(x)
38-
x_harm = np.zeros(x.shape)
39-
x_perc = np.zeros(x.shape)
4039

41-
for c, x_chan in enumerate(x):
42-
x_harm_chan, x_perc_chan = hpss(x_chan, kernel_size=hp_kernel_size,
43-
power=hp_power, mask=hp_mask,
44-
margin=hp_margin)
45-
x_harm[c, :] = x_harm_chan
46-
x_perc[c, :] = x_perc_chan
40+
x_harm, x_perc = _hpss(x, len_harm=hp_len_harm, len_perc=hp_len_perc, mask_mode=hp_mask_mode,
41+
win_type=hp_win_type, win_size=hp_win_size, hop_size=hp_hop_size,
42+
zero_pad=hp_zero_pad, fft_shift=hp_fft_shift)
4743

4844
y_harm = phase_vocoder(x_harm, s, win_type=pv_win_type,
4945
win_size=pv_win_size, syn_hop_size=pv_syn_hop_size,
@@ -55,3 +51,71 @@ def hptsm(x, s, hp_kernel_size=31, hp_power=2.0, hp_mask=False, hp_margin=1.0,
5551
syn_hop_size=ola_syn_hop_size)
5652

5753
return y_harm + y_perc
54+
55+
56+
def _hpss(x, len_harm=10, len_perc=10, mask_mode='binary', win_type='hann',
57+
win_size=1024, hop_size=256, zero_pad=0, fft_shift=False):
58+
"""Separate the input audio sequence to a harmonic and a percussive source.
59+
The algorithm is from the following paper.
60+
61+
Derry Fitzgerald, "Harmonic/percussive separation using median filtering." Proc. of the Int. Conf. on Digital Audio Effects (DAFx). Vol. 13. 2010.
62+
63+
Parameters
64+
----------
65+
66+
x : numpy.ndarray [shape=(channel, num_samples) or (num_samples)]
67+
the input audio sequence to separate.
68+
len_harm : int
69+
length of the median filter kernel size for the harmonic source.
70+
len_perc : int
71+
length of the median filter kernel size for the percussive source.
72+
mask_mode : str
73+
mask mode for the separation. binary and relative are available.
74+
win_type : str
75+
type of the window function for the STFT. hann and sin are available.
76+
win_size : int > 0 [scalar]
77+
size of the window function for the STFT and the ISTFT.
78+
hop_size : int > 0 [scalar]
79+
hop size of the analysis/synthesis window for the STFT and the ISTFT.
80+
zero_pad : int > 0 [scalar]
81+
the size of the zero pad in the window function.
82+
fft_shift : bool
83+
apply circular shift to STFT and ISTFT.
84+
85+
Returns
86+
-------
87+
88+
x_harm : numpy.ndarray [shape=(channel, num_samples) or (num_samples)]
89+
the separated harmonic audio sequence.
90+
x_perc : numpy.ndarray [shape=(channel, num_samples) or (num_samples)]
91+
the separated percussive audio sequence.
92+
"""
93+
x_harm = np.zeros(x.shape)
94+
x_perc = np.zeros(x.shape)
95+
96+
for c, x_chan in enumerate(x):
97+
spec = stft(x_chan, ana_hop=hop_size, win_type=win_type, win_size=win_size,
98+
zero_pad=zero_pad, fft_shift=fft_shift)
99+
mag_spec = np.abs(spec)
100+
101+
mag_spec_harm = median_filter(mag_spec, size=[1, len_harm], mode='reflect')
102+
mag_spec_perc = median_filter(mag_spec, size=[len_perc, 1], mode='reflect')
103+
104+
if mask_mode == 'binary':
105+
mask_harm = mag_spec_harm > mag_spec_perc
106+
mask_perc = mag_spec_harm <= mag_spec_perc
107+
elif mask_mode == 'relative':
108+
mask_harm = mag_spec_harm / (mag_spec_harm + mag_spec_perc + np.finfo(float).eps)
109+
mask_perc = mag_spec_perc / (mag_spec_harm + mag_spec_perc + np.finfo(float).eps)
110+
else:
111+
raise Exception("Please use the valid mask mode. (binary, relative)")
112+
113+
spec_harm = mask_harm * spec
114+
spec_perc = mask_perc * spec
115+
116+
x_harm[c, :] = istft(spec_harm, syn_hop=hop_size, win_type=win_type, win_size=win_size,
117+
zero_pad=zero_pad, original_length=x.shape[1], fft_shift=fft_shift)
118+
x_perc[c, :] = istft(spec_perc, syn_hop=hop_size, win_type=win_type, win_size=win_size,
119+
zero_pad=zero_pad, original_length=x.shape[1], fft_shift=fft_shift)
120+
121+
return x_harm.squeeze(), x_perc.squeeze()

pytsmod/utils/stft.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def stft(x, ana_hop=2048, win_type='hann', win_size=4096, zero_pad=0, sr=44100,
2222
zero_pad : int > 0 [scalar]
2323
the size of the zero pad in the window function.
2424
sr : int > 0 [scalar]
25-
the sample rate of the audio sequence.
25+
the sample rate of the audio sequence. Only used for time_frequency_out.
2626
fft_shift : bool
2727
apply circular shift to STFT.
2828
time_frequency_out : bool

0 commit comments

Comments
 (0)