Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:

pull_request:
branches:
- '**'
- "**"

jobs:
test:
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
It provides various speech signal processing modules as PyTorch layers,
allowing users to integrate classic signal processing algorithms directly into neural network architectures and optimize them through backpropagation.

[![Manual](https://img.shields.io/badge/docs-stable-blue.svg)](https://sp-nitech.github.io/diffsptk/3.4.0/)
[![Manual](https://img.shields.io/badge/docs-stable-blue.svg)](https://sp-nitech.github.io/diffsptk/stable/)
[![Downloads](https://static.pepy.tech/badge/diffsptk)](https://pepy.tech/project/diffsptk)
[![ClickPy](https://img.shields.io/badge/downloads-clickpy-yellow.svg)](https://clickpy.clickhouse.com/dashboard/diffsptk)
[![Python Version](https://img.shields.io/pypi/pyversions/diffsptk.svg)](https://pypi.python.org/pypi/diffsptk)
Expand All @@ -22,7 +22,7 @@ allowing users to integrate classic signal processing algorithms directly into n

## Documentation

- [**Reference Manual**](https://sp-nitech.github.io/diffsptk/3.4.0/) - Detailed API documentation and module specifications.
- [**Reference Manual**](https://sp-nitech.github.io/diffsptk/stable/) - Detailed API documentation and module specifications.
- [**Interactive Tutorial**](https://colab.research.google.com/drive/1xAoUKqXadvJXJ7RzN0OceB6y7q5i7Sn6?usp=drive_link) (Google Colab) - Hands-on examples to get started with `diffsptk` in your browser.
- [**Conference Paper**](https://www.isca-archive.org/ssw_2023/yoshimura23_ssw.html) - Technical background and implementation details available on the ISCA Archive.

Expand Down
20 changes: 18 additions & 2 deletions diffsptk/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -3248,7 +3248,12 @@ def zcross(


def zerodf(
x: Tensor, b: Tensor, frame_period: int = 80, ignore_gain: bool = False
x: Tensor,
b: Tensor,
frame_period: int = 80,
ignore_gain: bool = False,
zeroth_index: int = 0,
mode: str = "direct",
) -> Tensor:
"""Apply an all-zero digital filter.

Expand All @@ -3266,12 +3271,23 @@ def zerodf(
ignore_gain : bool
If True, perform filtering without the gain.

zeroth_index : int >= 0
The index of the zeroth coefficient in the filter coefficients.

mode : ['direct', 'efficient']
The implementation mode for time-varying convolution.

Returns
-------
out : Tensor [shape=(..., T)]
The output signal.

"""
return nn.AllZeroDigitalFilter._func(
x, b, frame_period=frame_period, ignore_gain=ignore_gain
x,
b,
frame_period=frame_period,
ignore_gain=ignore_gain,
zeroth_index=zeroth_index,
mode=mode,
)
94 changes: 51 additions & 43 deletions diffsptk/modules/mglsadf.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from .mgc2sp import MelGeneralizedCepstrumToSpectrum
from .root_pol import PolynomialToRoots
from .stft import ShortTimeFourierTransform
from .zerodf import AllZeroDigitalFilter


def is_array_like(x: Any) -> bool:
Expand Down Expand Up @@ -277,18 +278,18 @@ def __init__(
if alpha == 0 and gamma == 0:
cep_order = filter_order

# Prepare padding module.
if self.phase == "minimum":
padding = (cep_order, 0)
cep_orders = (cep_order, 0)
elif self.phase == "maximum":
padding = (0, cep_order)
cep_orders = (0, cep_order)
elif self.phase == "zero":
padding = (cep_order, cep_order)
cep_orders = (cep_order, cep_order)
elif self.phase == "mixed":
padding = cep_order if is_array_like(cep_order) else (cep_order, cep_order)
cep_orders = (
cep_order if is_array_like(cep_order) else (cep_order, cep_order)
)
else:
raise ValueError(f"phase {phase} is not supported.")
self.pad = nn.ConstantPad1d(padding, 0)

# Prepare frequency transformation module.
if self.phase == "mixed":
Expand All @@ -297,7 +298,7 @@ def __init__(
self.mgc2c.append(
MelGeneralizedCepstrumToMelGeneralizedCepstrum(
filter_order[i],
padding[i],
cep_orders[i],
in_alpha=alpha,
in_gamma=gamma,
n_fft=n_fft,
Expand All @@ -318,6 +319,16 @@ def __init__(

self.linear_intpl = LinearInterpolation(frame_period)

self.zerodf = AllZeroDigitalFilter(
sum(cep_orders),
frame_period,
ignore_gain=False,
zeroth_index=cep_orders[1],
mode="efficient",
device=device,
dtype=dtype,
)

cp = mp.taylor(mp.exp, 0, taylor_order)
cp = np.array([float(x) for x in cp])
weights = cp[1:] / cp[:-1]
Expand All @@ -341,29 +352,25 @@ def forward(
c_min = self.mgc2c[0](mc_min)
c_max = self.mgc2c[1](mc_max)
c0 = c_min[..., :1] + c_max[..., :1]
c1_min = c_min[..., 1:].flip(-1)
c1_min = c_min[..., 1:]
c0_dummy = torch.zeros_like(c0)
c1_max = c_max[..., 1:]
c = torch.cat([c1_min, c0_dummy, c1_max], dim=-1)
c1_max = c_max[..., 1:].flip(-1)
c = torch.cat([c1_max, c0_dummy, c1_min], dim=-1)
else:
c = self.mgc2c(mc)
c0, c = remove_gain(c, value=0, return_gain=True)
if self.phase == "minimum":
c = c.flip(-1)
elif self.phase == "maximum":
pass
elif self.phase == "maximum":
c = c.flip(-1)
elif self.phase == "zero":
c = mirror(c, half=True)
else:
raise RuntimeError

c = self.linear_intpl(c)

y = x * self.a[0]
for i in range(1, len(self.a)):
x = self.pad(x)
x = x.unfold(-1, c.size(-1), 1)
x = (x * c).sum(-1) * self.weights[i]
x = self.zerodf(x, c) * self.weights[i]
y += x * self.a[i]

if not self.ignore_gain:
Expand All @@ -389,28 +396,26 @@ def __init__(
) -> None:
super().__init__()

self.frame_period = frame_period
self.ignore_gain = ignore_gain
self.phase = phase
self.n_fft = n_fft

# Prepare padding module.
taps = ir_length - 1
if self.phase == "minimum":
padding = (taps, 0)
ir_orders = (ir_length - 1, 0)
elif self.phase == "maximum":
padding = (0, taps)
ir_orders = (0, ir_length - 1)
elif self.phase == "zero":
padding = (taps, taps)
ir_orders = (ir_length - 1, ir_length - 1)
elif self.phase == "mixed":
padding = (
ir_orders = (
(ir_length[0] - 1, ir_length[1] - 1)
if is_array_like(ir_length)
else (taps, taps)
else (ir_length - 1, ir_length - 1)
)
else:
raise ValueError(f"phase {phase} is not supported.")
self.pad = nn.ConstantPad1d(padding, 0)
self.padding = padding
self.ir_orders = ir_orders

if self.phase in ("minimum", "maximum"):
self.mgc2ir = MelGeneralizedCepstrumToMelGeneralizedCepstrum(
Expand Down Expand Up @@ -444,7 +449,7 @@ def __init__(
self.mgc2c.append(
MelGeneralizedCepstrumToMelGeneralizedCepstrum(
filter_order[i],
padding[i],
ir_orders[i],
in_alpha=alpha,
in_gamma=gamma,
n_fft=n_fft,
Expand All @@ -458,7 +463,15 @@ def __init__(
else:
raise ValueError(f"phase {phase} is not supported.")

self.linear_intpl = LinearInterpolation(frame_period)
self.zerodf = AllZeroDigitalFilter(
sum(ir_orders),
frame_period,
ignore_gain=False,
zeroth_index=ir_orders[1],
mode="efficient",
device=device,
dtype=dtype,
)

def forward(
self,
Expand All @@ -467,9 +480,13 @@ def forward(
) -> torch.Tensor:
if self.phase == "minimum":
h = self.mgc2ir(mc)
h = h.flip(-1)
if self.ignore_gain:
h = h / h[..., :1]
elif self.phase == "maximum":
h = self.mgc2ir(mc)
if self.ignore_gain:
h = h / h[..., :1]
h = h.flip(-1)
elif self.phase == "zero":
c = self.mgc2c(mc)
c[..., 1:] *= 0.5
Expand All @@ -485,25 +502,16 @@ def forward(
c0 = torch.zeros_like(c_min[..., :1])
else:
c0 = c_min[..., :1] + c_max[..., :1]
c = torch.cat([c_min[..., 1:].flip(-1), c0, c_max[..., 1:]], dim=-1)
c = torch.cat([c_max[..., 1:].flip(-1), c0, c_min[..., 1:]], dim=-1)
c = F.pad(c, (0, self.n_fft - c.size(-1)))
c = torch.roll(c, -self.padding[0], dims=-1)
shift = self.ir_orders[1]
c = torch.roll(c, -shift, dims=-1)
h = self.c2ir(c)
h = torch.roll(h, self.padding[0], dims=-1)[..., : sum(self.padding) + 1]
h = torch.roll(h, shift, dims=-1)[..., : sum(self.ir_orders) + 1]
else:
raise RuntimeError

h = self.linear_intpl(h)

if self.ignore_gain:
if self.phase == "minimum":
h = h / h[..., -1:]
elif self.phase == "maximum":
h = h / h[..., :1]

x = self.pad(x)
x = x.unfold(-1, h.size(-1), 1)
y = (x * h).sum(-1)
y = self.zerodf(x, h)
return y


Expand Down
Loading