diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 99c11f2..f9ae31e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,12 +21,12 @@ jobs: torch: 2.3.1 torchaudio: 2.3.1 - python: "3.13" - torch: 2.9.0 - torchaudio: 2.9.0 + torch: 2.9.1 + torchaudio: 2.9.1 steps: - name: Clone - uses: actions/checkout@v5 + uses: actions/checkout@v6 - name: Python uses: actions/setup-python@v6 diff --git a/README.md b/README.md index c3f9e6e..b38d750 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![ClickPy](https://img.shields.io/badge/downloads-clickpy-yellow.svg)](https://clickpy.clickhouse.com/dashboard/diffsptk) [![Advisor](https://snyk.io/advisor/python/diffsptk/badge.svg)](https://snyk.io/advisor/python/diffsptk) [![Python Version](https://img.shields.io/pypi/pyversions/diffsptk.svg)](https://pypi.python.org/pypi/diffsptk) -[![PyTorch Version](https://img.shields.io/badge/pytorch-2.3.1%20%7C%202.9.0-orange.svg)](https://pypi.python.org/pypi/diffsptk) +[![PyTorch Version](https://img.shields.io/badge/pytorch-2.3.1%20%7C%202.9.1-orange.svg)](https://pypi.python.org/pypi/diffsptk) [![PyPI Version](https://img.shields.io/pypi/v/diffsptk.svg)](https://pypi.python.org/pypi/diffsptk) [![Codecov](https://codecov.io/gh/sp-nitech/diffsptk/branch/master/graph/badge.svg)](https://app.codecov.io/gh/sp-nitech/diffsptk) [![License](https://img.shields.io/github/license/sp-nitech/diffsptk.svg)](https://github.com/sp-nitech/diffsptk/blob/master/LICENSE) diff --git a/diffsptk/functional.py b/diffsptk/functional.py index 837f310..65e45e0 100644 --- a/diffsptk/functional.py +++ b/diffsptk/functional.py @@ -622,7 +622,7 @@ def excite( voiced_region: str = "pulse", unvoiced_region: str = "gauss", polarity: str = "auto", - init_phase: str = "zeros", + init_phase: str | float = "zeros", ) -> Tensor: """Generate a simple excitation signal. @@ -638,14 +638,14 @@ def excite( 'triangle', 'square'] The type of voiced region. - unvoiced_region : ['zeros', 'gauss', 'uniform'] + unvoiced_region : ['zeros', 'gauss', 'm-sequence', 'uniform'] The type of unvoiced region. polarity : ['auto', 'unipolar', 'bipolar'] The polarity. - init_phase : ['zeros', 'random'] - The initial phase. + init_phase : ['zeros', 'random'] or float + The initial phase in radians. Returns ------- @@ -1692,7 +1692,8 @@ def levdur(r: Tensor, eps: float | None = None) -> Tensor: The autocorrelation. eps : float >= 0 or None - A small value to improve numerical stability. + A small value to improve numerical stability. If None, automatically set + based on the input data type. Returns ------- @@ -1735,7 +1736,8 @@ def lpc(x: Tensor, lpc_order: int, eps: float | None = None) -> Tensor: The order of the LPC coefficients, :math:`M`. eps : float >= 0 or None - A small value to improve numerical stability. + A small value to improve numerical stability. If None, automatically set + based on the input data type. Returns ------- @@ -2742,7 +2744,7 @@ def quantize( ) -def rlevdur(a: Tensor) -> Tensor: +def rlevdur(a: Tensor, n_fft: int = 1024) -> Tensor: """Solve a Yule-Walker linear system given the LPC coefficients. Parameters @@ -2750,13 +2752,16 @@ def rlevdur(a: Tensor) -> Tensor: a : Tensor [shape=(..., M+1)] The gain and the LPC coefficients. + n_fft : int >> M + The number of FFT bins. Accurate conversion requires a large value. + Returns ------- out : Tensor [shape=(..., M+1)] The autocorrelation. """ - return nn.ReverseLevinsonDurbin._func(a) + return nn.ReverseLevinsonDurbin._func(a, n_fft=n_fft) def rmse(x: Tensor, y: Tensor, reduction: str = "mean") -> Tensor: diff --git a/diffsptk/modules/excite.py b/diffsptk/modules/excite.py index e2df43e..bd29711 100644 --- a/diffsptk/modules/excite.py +++ b/diffsptk/modules/excite.py @@ -45,8 +45,8 @@ class ExcitationGeneration(BaseFunctionalModule): polarity : ['auto', 'unipolar', 'bipolar'] The polarity. - init_phase : ['zeros', 'random'] - The initial phase. + init_phase : ['zeros', 'random'] or float + The initial phase in radians. """ @@ -57,7 +57,7 @@ def __init__( voiced_region: str = "pulse", unvoiced_region: str = "gauss", polarity: str = "auto", - init_phase: str = "zeros", + init_phase: str | float = "zeros", ) -> None: super().__init__() @@ -108,7 +108,7 @@ def _precompute( voiced_region: str, unvoiced_region: str, polarity: str, - init_phase: str, + init_phase: str | float, ) -> Precomputed: ExcitationGeneration._check(frame_period) return (frame_period, voiced_region, unvoiced_region, polarity, init_phase) @@ -121,7 +121,7 @@ def _forward( voiced_region: str, unvoiced_region: str, polarity: str, - init_phase: str, + init_phase: str | float, ) -> torch.Tensor: # Make mask represents voiced region. base_mask = torch.clip(p, min=0, max=1) @@ -148,12 +148,15 @@ def _forward( s = torch.cumsum(q.double(), dim=-1) bias, _ = torch.cummax(s * ~mask, dim=-1) phase = (s - bias).to(p.dtype) - if init_phase == "zeros": - pass - elif init_phase == "random": - phase += torch.rand_like(p[..., :1]) + if isinstance(init_phase, str): + if init_phase == "zeros": + pass + elif init_phase == "random": + phase += torch.rand_like(p[..., :1]) + else: + raise ValueError(f"init_phase {init_phase} is not supported.") else: - raise ValueError(f"init_phase {init_phase} is not supported.") + phase += init_phase / TAU # Generate excitation signal using phase. if polarity == "auto": diff --git a/diffsptk/modules/gmm.py b/diffsptk/modules/gmm.py index 0f6c8f7..282792e 100644 --- a/diffsptk/modules/gmm.py +++ b/diffsptk/modules/gmm.py @@ -346,6 +346,7 @@ def forward( nu = px / y.view(-1, 1) nm = nu * self.mu a = pxx - y.view(-1, 1) * (2 * nm - mm) + a = torch.nan_to_num(a, nan=0.0, posinf=0.0, neginf=0.0) b = xi.view(-1, 1) * self.ubm_sigma.diagonal(dim1=-2, dim2=-1) c = xi.view(-1, 1) * (self.ubm_mu - self.mu) ** 2 sigma = (a + b + c) * z.view(-1, 1) @@ -368,6 +369,7 @@ def forward( nm = outer(nu, self.mu) mn = nm.transpose(-2, -1) a = pxx - y.view(-1, 1, 1) * (nm + mn - mm) + a = torch.nan_to_num(a, nan=0.0, posinf=0.0, neginf=0.0) b = xi.view(-1, 1, 1) * self.ubm_sigma c = xi.view(-1, 1, 1) * outer(self.ubm_mu - self.mu) sigma = (a + b + c) * z.view(-1, 1, 1) diff --git a/diffsptk/modules/levdur.py b/diffsptk/modules/levdur.py index dfae782..65d18af 100644 --- a/diffsptk/modules/levdur.py +++ b/diffsptk/modules/levdur.py @@ -30,8 +30,9 @@ class LevinsonDurbin(BaseFunctionalModule): lpc_order : int >= 0 The order of the LPC coefficients, :math:`M`. - eps : float >= 0 - A small value to improve numerical stability. + eps : float >= 0 or None + A small value to improve numerical stability. If None, automatically set + based on the data type. device : torch.device or None The device of this module. diff --git a/diffsptk/modules/lpc.py b/diffsptk/modules/lpc.py index db1617c..32f37e3 100644 --- a/diffsptk/modules/lpc.py +++ b/diffsptk/modules/lpc.py @@ -39,7 +39,8 @@ class LinearPredictiveCodingAnalysis(BaseFunctionalModule): The order of the LPC coefficients, :math:`M`. eps : float >= 0 or None - A small value to improve numerical stability. + A small value to improve numerical stability. If None, automatically set + based on the data type. device : torch.device or None The device of this module. diff --git a/diffsptk/modules/mgc2mgc.py b/diffsptk/modules/mgc2mgc.py index 0c66946..40ad424 100644 --- a/diffsptk/modules/mgc2mgc.py +++ b/diffsptk/modules/mgc2mgc.py @@ -169,7 +169,7 @@ def _check( if 1 < abs(out_gamma): raise ValueError("out_gamma must be in [-1, 1].") if n_fft <= max(in_order, out_order) + 1: - raise ValueError("n_fft must be much larger then order of cepstrum.") + raise ValueError("n_fft must be much larger than order of cepstrum.") if 0 == in_gamma and in_mul: raise ValueError("Invalid combination of in_gamma and in_mul.") diff --git a/diffsptk/modules/rlevdur.py b/diffsptk/modules/rlevdur.py index 07160cf..f04ec4b 100644 --- a/diffsptk/modules/rlevdur.py +++ b/diffsptk/modules/rlevdur.py @@ -14,11 +14,11 @@ # limitations under the License. # # ------------------------------------------------------------------------ # +import numpy as np import torch -import torch.nn.functional as F from ..typing import Precomputed -from ..utils.private import check_size, filter_values, remove_gain +from ..utils.private import check_size, filter_values, remove_gain, to from .base import BaseFunctionalModule @@ -31,6 +31,9 @@ class ReverseLevinsonDurbin(BaseFunctionalModule): lpc_order : int >= 0 The order of the LPC coefficients, :math:`M`. + n_fft : int >> M + The number of FFT bins. Accurate conversion requires a large value. + device : torch.device or None The device of this module. @@ -42,6 +45,7 @@ class ReverseLevinsonDurbin(BaseFunctionalModule): def __init__( self, lpc_order: int, + n_fft: int = 1024, device: torch.device | None = None, dtype: torch.dtype | None = None, ) -> None: @@ -50,7 +54,7 @@ def __init__( self.in_dim = lpc_order + 1 _, _, tensors = self._precompute(**filter_values(locals())) - self.register_buffer("eye", tensors[0]) + self.register_buffer("phase_factors", tensors[0]) def forward(self, a: torch.Tensor) -> torch.Tensor: """Solve a Yule-Walker linear system given the LPC coefficients. @@ -95,39 +99,30 @@ def _takes_input_size() -> bool: return True @staticmethod - def _check(lpc_order: int) -> None: + def _check(lpc_order: int, n_fft: int) -> None: if lpc_order < 0: raise ValueError("lpc_order must be non-negative.") + if n_fft <= lpc_order + 1: + raise ValueError("n_fft must be much larger than lpc_order.") @staticmethod def _precompute( lpc_order: int, + n_fft: int, device: torch.device | None, dtype: torch.dtype | None, ) -> Precomputed: - ReverseLevinsonDurbin._check(lpc_order) - eye = torch.eye(lpc_order + 1, device=device, dtype=dtype) - return None, None, (eye,) + ReverseLevinsonDurbin._check(lpc_order, n_fft) + n_freq = n_fft // 2 + 1 + omega = torch.linspace(0, np.pi, n_freq, device=device, dtype=torch.double) + m = torch.arange(lpc_order + 1, device=device, dtype=torch.double) + phase_factors = torch.exp(-1j * omega * m.unsqueeze(-1)) + return None, None, (to(phase_factors, dtype=dtype),) @staticmethod - def _forward(a: torch.Tensor, eye: torch.Tensor) -> torch.Tensor: + def _forward(a: torch.Tensor, phase_factors: torch.Tensor) -> torch.Tensor: M = a.size(-1) - 1 K, a = remove_gain(a, return_gain=True) - - U = [a.flip(-1)] - E = [K**2] - for m in range(M): - u0 = U[-1][..., :1] - u1 = U[-1][..., 1 : M - m] - t = 1 / (1 - u0**2) - u = (u1 - u0 * u1.flip(-1)) * t - u = F.pad(u, (0, m + 2)) - e = E[-1] * t - U.append(u) - E.append(e) - U = torch.stack(U[::-1], dim=-1) - E = torch.stack(E[::-1], dim=-1) - - V = torch.linalg.solve_triangular(U, eye, upper=True, unitriangular=True) - r = torch.matmul(V[..., :1].transpose(-2, -1) * E, V).squeeze(-2) + A = torch.sum(a.unsqueeze(-1) * phase_factors, dim=-2) + r = torch.fft.irfft((K / A.abs()) ** 2)[..., : M + 1] return r diff --git a/diffsptk/version.py b/diffsptk/version.py index 903a158..6cd661e 100644 --- a/diffsptk/version.py +++ b/diffsptk/version.py @@ -1 +1 @@ -__version__ = "3.4.0" +__version__ = "3.4.1.dev0" diff --git a/tests/test_excite.py b/tests/test_excite.py index 126b0f7..383c3ef 100644 --- a/tests/test_excite.py +++ b/tests/test_excite.py @@ -95,7 +95,7 @@ def compute_error(infile): ["pulse", "sinusoidal", "sawtooth", "inverted-sawtooth", "triangle", "square"], ) @pytest.mark.parametrize("polarity", ["unipolar", "bipolar"]) -@pytest.mark.parametrize("init_phase", ["zeros", "random"]) +@pytest.mark.parametrize("init_phase", ["zeros", "random", np.round(np.pi / 2, 2)]) def test_waveform(voiced_region, polarity, init_phase, P=80, verbose=False): excite = diffsptk.ExcitationGeneration( P, diff --git a/tools/Makefile b/tools/Makefile index 4525f08..fdf1a8e 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -15,7 +15,7 @@ # ------------------------------------------------------------------------ # TAPLO_VERSION := 0.10.0 -YAMLFMT_VERSION := 0.17.2 +YAMLFMT_VERSION := 0.20.0 all: SPTK taplo yamlfmt