Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ jobs:
torch: 2.3.1
torchaudio: 2.3.1
- python: "3.13"
torch: 2.9.0
torchaudio: 2.9.0
torch: 2.9.1
torchaudio: 2.9.1

steps:
- name: Clone
uses: actions/checkout@v5
uses: actions/checkout@v6

- name: Python
uses: actions/setup-python@v6
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
[![ClickPy](https://img.shields.io/badge/downloads-clickpy-yellow.svg)](https://clickpy.clickhouse.com/dashboard/diffsptk)
[![Advisor](https://snyk.io/advisor/python/diffsptk/badge.svg)](https://snyk.io/advisor/python/diffsptk)
[![Python Version](https://img.shields.io/pypi/pyversions/diffsptk.svg)](https://pypi.python.org/pypi/diffsptk)
[![PyTorch Version](https://img.shields.io/badge/pytorch-2.3.1%20%7C%202.9.0-orange.svg)](https://pypi.python.org/pypi/diffsptk)
[![PyTorch Version](https://img.shields.io/badge/pytorch-2.3.1%20%7C%202.9.1-orange.svg)](https://pypi.python.org/pypi/diffsptk)
[![PyPI Version](https://img.shields.io/pypi/v/diffsptk.svg)](https://pypi.python.org/pypi/diffsptk)
[![Codecov](https://codecov.io/gh/sp-nitech/diffsptk/branch/master/graph/badge.svg)](https://app.codecov.io/gh/sp-nitech/diffsptk)
[![License](https://img.shields.io/github/license/sp-nitech/diffsptk.svg)](https://github.com/sp-nitech/diffsptk/blob/master/LICENSE)
Expand Down
21 changes: 13 additions & 8 deletions diffsptk/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ def excite(
voiced_region: str = "pulse",
unvoiced_region: str = "gauss",
polarity: str = "auto",
init_phase: str = "zeros",
init_phase: str | float = "zeros",
) -> Tensor:
"""Generate a simple excitation signal.

Expand All @@ -638,14 +638,14 @@ def excite(
'triangle', 'square']
The type of voiced region.

unvoiced_region : ['zeros', 'gauss', 'uniform']
unvoiced_region : ['zeros', 'gauss', 'm-sequence', 'uniform']
The type of unvoiced region.

polarity : ['auto', 'unipolar', 'bipolar']
The polarity.

init_phase : ['zeros', 'random']
The initial phase.
init_phase : ['zeros', 'random'] or float
The initial phase in radians.

Returns
-------
Expand Down Expand Up @@ -1692,7 +1692,8 @@ def levdur(r: Tensor, eps: float | None = None) -> Tensor:
The autocorrelation.

eps : float >= 0 or None
A small value to improve numerical stability.
A small value to improve numerical stability. If None, automatically set
based on the input data type.

Returns
-------
Expand Down Expand Up @@ -1735,7 +1736,8 @@ def lpc(x: Tensor, lpc_order: int, eps: float | None = None) -> Tensor:
The order of the LPC coefficients, :math:`M`.

eps : float >= 0 or None
A small value to improve numerical stability.
A small value to improve numerical stability. If None, automatically set
based on the input data type.

Returns
-------
Expand Down Expand Up @@ -2742,21 +2744,24 @@ def quantize(
)


def rlevdur(a: Tensor) -> Tensor:
def rlevdur(a: Tensor, n_fft: int = 1024) -> Tensor:
"""Solve a Yule-Walker linear system given the LPC coefficients.

Parameters
----------
a : Tensor [shape=(..., M+1)]
The gain and the LPC coefficients.

n_fft : int >> M
The number of FFT bins. Accurate conversion requires a large value.

Returns
-------
out : Tensor [shape=(..., M+1)]
The autocorrelation.

"""
return nn.ReverseLevinsonDurbin._func(a)
return nn.ReverseLevinsonDurbin._func(a, n_fft=n_fft)


def rmse(x: Tensor, y: Tensor, reduction: str = "mean") -> Tensor:
Expand Down
23 changes: 13 additions & 10 deletions diffsptk/modules/excite.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ class ExcitationGeneration(BaseFunctionalModule):
polarity : ['auto', 'unipolar', 'bipolar']
The polarity.

init_phase : ['zeros', 'random']
The initial phase.
init_phase : ['zeros', 'random'] or float
The initial phase in radians.

"""

Expand All @@ -57,7 +57,7 @@ def __init__(
voiced_region: str = "pulse",
unvoiced_region: str = "gauss",
polarity: str = "auto",
init_phase: str = "zeros",
init_phase: str | float = "zeros",
) -> None:
super().__init__()

Expand Down Expand Up @@ -108,7 +108,7 @@ def _precompute(
voiced_region: str,
unvoiced_region: str,
polarity: str,
init_phase: str,
init_phase: str | float,
) -> Precomputed:
ExcitationGeneration._check(frame_period)
return (frame_period, voiced_region, unvoiced_region, polarity, init_phase)
Expand All @@ -121,7 +121,7 @@ def _forward(
voiced_region: str,
unvoiced_region: str,
polarity: str,
init_phase: str,
init_phase: str | float,
) -> torch.Tensor:
# Make mask represents voiced region.
base_mask = torch.clip(p, min=0, max=1)
Expand All @@ -148,12 +148,15 @@ def _forward(
s = torch.cumsum(q.double(), dim=-1)
bias, _ = torch.cummax(s * ~mask, dim=-1)
phase = (s - bias).to(p.dtype)
if init_phase == "zeros":
pass
elif init_phase == "random":
phase += torch.rand_like(p[..., :1])
if isinstance(init_phase, str):
if init_phase == "zeros":
pass
elif init_phase == "random":
phase += torch.rand_like(p[..., :1])
else:
raise ValueError(f"init_phase {init_phase} is not supported.")
else:
raise ValueError(f"init_phase {init_phase} is not supported.")
phase += init_phase / TAU

# Generate excitation signal using phase.
if polarity == "auto":
Expand Down
2 changes: 2 additions & 0 deletions diffsptk/modules/gmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,7 @@ def forward(
nu = px / y.view(-1, 1)
nm = nu * self.mu
a = pxx - y.view(-1, 1) * (2 * nm - mm)
a = torch.nan_to_num(a, nan=0.0, posinf=0.0, neginf=0.0)
b = xi.view(-1, 1) * self.ubm_sigma.diagonal(dim1=-2, dim2=-1)
c = xi.view(-1, 1) * (self.ubm_mu - self.mu) ** 2
sigma = (a + b + c) * z.view(-1, 1)
Expand All @@ -368,6 +369,7 @@ def forward(
nm = outer(nu, self.mu)
mn = nm.transpose(-2, -1)
a = pxx - y.view(-1, 1, 1) * (nm + mn - mm)
a = torch.nan_to_num(a, nan=0.0, posinf=0.0, neginf=0.0)
b = xi.view(-1, 1, 1) * self.ubm_sigma
c = xi.view(-1, 1, 1) * outer(self.ubm_mu - self.mu)
sigma = (a + b + c) * z.view(-1, 1, 1)
Expand Down
5 changes: 3 additions & 2 deletions diffsptk/modules/levdur.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ class LevinsonDurbin(BaseFunctionalModule):
lpc_order : int >= 0
The order of the LPC coefficients, :math:`M`.

eps : float >= 0
A small value to improve numerical stability.
eps : float >= 0 or None
A small value to improve numerical stability. If None, automatically set
based on the data type.

device : torch.device or None
The device of this module.
Expand Down
3 changes: 2 additions & 1 deletion diffsptk/modules/lpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ class LinearPredictiveCodingAnalysis(BaseFunctionalModule):
The order of the LPC coefficients, :math:`M`.

eps : float >= 0 or None
A small value to improve numerical stability.
A small value to improve numerical stability. If None, automatically set
based on the data type.

device : torch.device or None
The device of this module.
Expand Down
2 changes: 1 addition & 1 deletion diffsptk/modules/mgc2mgc.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def _check(
if 1 < abs(out_gamma):
raise ValueError("out_gamma must be in [-1, 1].")
if n_fft <= max(in_order, out_order) + 1:
raise ValueError("n_fft must be much larger then order of cepstrum.")
raise ValueError("n_fft must be much larger than order of cepstrum.")
if 0 == in_gamma and in_mul:
raise ValueError("Invalid combination of in_gamma and in_mul.")

Expand Down
45 changes: 20 additions & 25 deletions diffsptk/modules/rlevdur.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
# limitations under the License. #
# ------------------------------------------------------------------------ #

import numpy as np
import torch
import torch.nn.functional as F

from ..typing import Precomputed
from ..utils.private import check_size, filter_values, remove_gain
from ..utils.private import check_size, filter_values, remove_gain, to
from .base import BaseFunctionalModule


Expand All @@ -31,6 +31,9 @@ class ReverseLevinsonDurbin(BaseFunctionalModule):
lpc_order : int >= 0
The order of the LPC coefficients, :math:`M`.

n_fft : int >> M
The number of FFT bins. Accurate conversion requires a large value.

device : torch.device or None
The device of this module.

Expand All @@ -42,6 +45,7 @@ class ReverseLevinsonDurbin(BaseFunctionalModule):
def __init__(
self,
lpc_order: int,
n_fft: int = 1024,
device: torch.device | None = None,
dtype: torch.dtype | None = None,
) -> None:
Expand All @@ -50,7 +54,7 @@ def __init__(
self.in_dim = lpc_order + 1

_, _, tensors = self._precompute(**filter_values(locals()))
self.register_buffer("eye", tensors[0])
self.register_buffer("phase_factors", tensors[0])

def forward(self, a: torch.Tensor) -> torch.Tensor:
"""Solve a Yule-Walker linear system given the LPC coefficients.
Expand Down Expand Up @@ -95,39 +99,30 @@ def _takes_input_size() -> bool:
return True

@staticmethod
def _check(lpc_order: int) -> None:
def _check(lpc_order: int, n_fft: int) -> None:
if lpc_order < 0:
raise ValueError("lpc_order must be non-negative.")
if n_fft <= lpc_order + 1:
raise ValueError("n_fft must be much larger than lpc_order.")

@staticmethod
def _precompute(
lpc_order: int,
n_fft: int,
device: torch.device | None,
dtype: torch.dtype | None,
) -> Precomputed:
ReverseLevinsonDurbin._check(lpc_order)
eye = torch.eye(lpc_order + 1, device=device, dtype=dtype)
return None, None, (eye,)
ReverseLevinsonDurbin._check(lpc_order, n_fft)
n_freq = n_fft // 2 + 1
omega = torch.linspace(0, np.pi, n_freq, device=device, dtype=torch.double)
m = torch.arange(lpc_order + 1, device=device, dtype=torch.double)
phase_factors = torch.exp(-1j * omega * m.unsqueeze(-1))
return None, None, (to(phase_factors, dtype=dtype),)

@staticmethod
def _forward(a: torch.Tensor, eye: torch.Tensor) -> torch.Tensor:
def _forward(a: torch.Tensor, phase_factors: torch.Tensor) -> torch.Tensor:
M = a.size(-1) - 1
K, a = remove_gain(a, return_gain=True)

U = [a.flip(-1)]
E = [K**2]
for m in range(M):
u0 = U[-1][..., :1]
u1 = U[-1][..., 1 : M - m]
t = 1 / (1 - u0**2)
u = (u1 - u0 * u1.flip(-1)) * t
u = F.pad(u, (0, m + 2))
e = E[-1] * t
U.append(u)
E.append(e)
U = torch.stack(U[::-1], dim=-1)
E = torch.stack(E[::-1], dim=-1)

V = torch.linalg.solve_triangular(U, eye, upper=True, unitriangular=True)
r = torch.matmul(V[..., :1].transpose(-2, -1) * E, V).squeeze(-2)
A = torch.sum(a.unsqueeze(-1) * phase_factors, dim=-2)
r = torch.fft.irfft((K / A.abs()) ** 2)[..., : M + 1]
return r
2 changes: 1 addition & 1 deletion diffsptk/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "3.4.0"
__version__ = "3.4.1.dev0"
2 changes: 1 addition & 1 deletion tests/test_excite.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def compute_error(infile):
["pulse", "sinusoidal", "sawtooth", "inverted-sawtooth", "triangle", "square"],
)
@pytest.mark.parametrize("polarity", ["unipolar", "bipolar"])
@pytest.mark.parametrize("init_phase", ["zeros", "random"])
@pytest.mark.parametrize("init_phase", ["zeros", "random", np.round(np.pi / 2, 2)])
def test_waveform(voiced_region, polarity, init_phase, P=80, verbose=False):
excite = diffsptk.ExcitationGeneration(
P,
Expand Down
2 changes: 1 addition & 1 deletion tools/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# ------------------------------------------------------------------------ #

TAPLO_VERSION := 0.10.0
YAMLFMT_VERSION := 0.17.2
YAMLFMT_VERSION := 0.20.0

all: SPTK taplo yamlfmt

Expand Down