Merge pull request #154 from sp-nitech/minor_fix [skip ci]

takenori-y · web-flow · commit 3bd430b051f8 · 2025-11-24T02:31:54.000+09:00
Minor fix
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -21,12 +21,12 @@ jobs:
             torch: 2.3.1
             torchaudio: 2.3.1
           - python: "3.13"
-            torch: 2.9.0
-            torchaudio: 2.9.0
+            torch: 2.9.1
+            torchaudio: 2.9.1
 
     steps:
       - name: Clone
-        uses: actions/checkout@v5
+        uses: actions/checkout@v6
 
       - name: Python
         uses: actions/setup-python@v6
diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@
 [![ClickPy](https://img.shields.io/badge/downloads-clickpy-yellow.svg)](https://clickpy.clickhouse.com/dashboard/diffsptk)
 [![Advisor](https://snyk.io/advisor/python/diffsptk/badge.svg)](https://snyk.io/advisor/python/diffsptk)
 [![Python Version](https://img.shields.io/pypi/pyversions/diffsptk.svg)](https://pypi.python.org/pypi/diffsptk)
-[![PyTorch Version](https://img.shields.io/badge/pytorch-2.3.1%20%7C%202.9.0-orange.svg)](https://pypi.python.org/pypi/diffsptk)
+[![PyTorch Version](https://img.shields.io/badge/pytorch-2.3.1%20%7C%202.9.1-orange.svg)](https://pypi.python.org/pypi/diffsptk)
 [![PyPI Version](https://img.shields.io/pypi/v/diffsptk.svg)](https://pypi.python.org/pypi/diffsptk)
 [![Codecov](https://codecov.io/gh/sp-nitech/diffsptk/branch/master/graph/badge.svg)](https://app.codecov.io/gh/sp-nitech/diffsptk)
 [![License](https://img.shields.io/github/license/sp-nitech/diffsptk.svg)](https://github.com/sp-nitech/diffsptk/blob/master/LICENSE)
diff --git a/diffsptk/functional.py b/diffsptk/functional.py
@@ -622,7 +622,7 @@ def excite(
     voiced_region: str = "pulse",
     unvoiced_region: str = "gauss",
     polarity: str = "auto",
-    init_phase: str = "zeros",
+    init_phase: str | float = "zeros",
 ) -> Tensor:
     """Generate a simple excitation signal.
 
@@ -638,14 +638,14 @@ def excite(
                      'triangle', 'square']
         The type of voiced region.
 
-    unvoiced_region : ['zeros', 'gauss', 'uniform']
+    unvoiced_region : ['zeros', 'gauss', 'm-sequence', 'uniform']
         The type of unvoiced region.
 
     polarity : ['auto', 'unipolar', 'bipolar']
         The polarity.
 
-    init_phase : ['zeros', 'random']
-        The initial phase.
+    init_phase : ['zeros', 'random'] or float
+        The initial phase in radians.
 
     Returns
     -------
@@ -1692,7 +1692,8 @@ def levdur(r: Tensor, eps: float | None = None) -> Tensor:
         The autocorrelation.
 
     eps : float >= 0 or None
-        A small value to improve numerical stability.
+        A small value to improve numerical stability. If None, automatically set
+        based on the input data type.
 
     Returns
     -------
@@ -1735,7 +1736,8 @@ def lpc(x: Tensor, lpc_order: int, eps: float | None = None) -> Tensor:
         The order of the LPC coefficients, :math:`M`.
 
     eps : float >= 0 or None
-        A small value to improve numerical stability.
+        A small value to improve numerical stability. If None, automatically set
+        based on the input data type.
 
     Returns
     -------
@@ -2742,21 +2744,24 @@ def quantize(
     )
 
 
-def rlevdur(a: Tensor) -> Tensor:
+def rlevdur(a: Tensor, n_fft: int = 1024) -> Tensor:
     """Solve a Yule-Walker linear system given the LPC coefficients.
 
     Parameters
     ----------
     a : Tensor [shape=(..., M+1)]
         The gain and the LPC coefficients.
 
+    n_fft : int >> M
+        The number of FFT bins. Accurate conversion requires a large value.
+
     Returns
     -------
     out : Tensor [shape=(..., M+1)]
         The autocorrelation.
 
     """
-    return nn.ReverseLevinsonDurbin._func(a)
+    return nn.ReverseLevinsonDurbin._func(a, n_fft=n_fft)
 
 
 def rmse(x: Tensor, y: Tensor, reduction: str = "mean") -> Tensor:
diff --git a/diffsptk/modules/excite.py b/diffsptk/modules/excite.py
@@ -45,8 +45,8 @@ class ExcitationGeneration(BaseFunctionalModule):
     polarity : ['auto', 'unipolar', 'bipolar']
         The polarity.
 
-    init_phase : ['zeros', 'random']
-        The initial phase.
+    init_phase : ['zeros', 'random'] or float
+        The initial phase in radians.
 
     """
 
@@ -57,7 +57,7 @@ def __init__(
         voiced_region: str = "pulse",
         unvoiced_region: str = "gauss",
         polarity: str = "auto",
-        init_phase: str = "zeros",
+        init_phase: str | float = "zeros",
     ) -> None:
         super().__init__()
 
@@ -108,7 +108,7 @@ def _precompute(
         voiced_region: str,
         unvoiced_region: str,
         polarity: str,
-        init_phase: str,
+        init_phase: str | float,
     ) -> Precomputed:
         ExcitationGeneration._check(frame_period)
         return (frame_period, voiced_region, unvoiced_region, polarity, init_phase)
@@ -121,7 +121,7 @@ def _forward(
         voiced_region: str,
         unvoiced_region: str,
         polarity: str,
-        init_phase: str,
+        init_phase: str | float,
     ) -> torch.Tensor:
         # Make mask represents voiced region.
         base_mask = torch.clip(p, min=0, max=1)
@@ -148,12 +148,15 @@ def _forward(
         s = torch.cumsum(q.double(), dim=-1)
         bias, _ = torch.cummax(s * ~mask, dim=-1)
         phase = (s - bias).to(p.dtype)
-        if init_phase == "zeros":
-            pass
-        elif init_phase == "random":
-            phase += torch.rand_like(p[..., :1])
+        if isinstance(init_phase, str):
+            if init_phase == "zeros":
+                pass
+            elif init_phase == "random":
+                phase += torch.rand_like(p[..., :1])
+            else:
+                raise ValueError(f"init_phase {init_phase} is not supported.")
         else:
-            raise ValueError(f"init_phase {init_phase} is not supported.")
+            phase += init_phase / TAU
 
         # Generate excitation signal using phase.
         if polarity == "auto":
diff --git a/diffsptk/modules/gmm.py b/diffsptk/modules/gmm.py
@@ -346,6 +346,7 @@ def forward(
                     nu = px / y.view(-1, 1)
                     nm = nu * self.mu
                     a = pxx - y.view(-1, 1) * (2 * nm - mm)
+                    a = torch.nan_to_num(a, nan=0.0, posinf=0.0, neginf=0.0)
                     b = xi.view(-1, 1) * self.ubm_sigma.diagonal(dim1=-2, dim2=-1)
                     c = xi.view(-1, 1) * (self.ubm_mu - self.mu) ** 2
                     sigma = (a + b + c) * z.view(-1, 1)
@@ -368,6 +369,7 @@ def forward(
                     nm = outer(nu, self.mu)
                     mn = nm.transpose(-2, -1)
                     a = pxx - y.view(-1, 1, 1) * (nm + mn - mm)
+                    a = torch.nan_to_num(a, nan=0.0, posinf=0.0, neginf=0.0)
                     b = xi.view(-1, 1, 1) * self.ubm_sigma
                     c = xi.view(-1, 1, 1) * outer(self.ubm_mu - self.mu)
                     sigma = (a + b + c) * z.view(-1, 1, 1)
diff --git a/diffsptk/modules/levdur.py b/diffsptk/modules/levdur.py
@@ -30,8 +30,9 @@ class LevinsonDurbin(BaseFunctionalModule):
     lpc_order : int >= 0
         The order of the LPC coefficients, :math:`M`.
 
-    eps : float >= 0
-        A small value to improve numerical stability.
+    eps : float >= 0 or None
+        A small value to improve numerical stability. If None, automatically set
+        based on the data type.
 
     device : torch.device or None
         The device of this module.
diff --git a/diffsptk/modules/lpc.py b/diffsptk/modules/lpc.py
@@ -39,7 +39,8 @@ class LinearPredictiveCodingAnalysis(BaseFunctionalModule):
         The order of the LPC coefficients, :math:`M`.
 
     eps : float >= 0 or None
-        A small value to improve numerical stability.
+        A small value to improve numerical stability. If None, automatically set
+        based on the data type.
 
     device : torch.device or None
         The device of this module.
diff --git a/diffsptk/modules/mgc2mgc.py b/diffsptk/modules/mgc2mgc.py
@@ -169,7 +169,7 @@ def _check(
         if 1 < abs(out_gamma):
             raise ValueError("out_gamma must be in [-1, 1].")
         if n_fft <= max(in_order, out_order) + 1:
-            raise ValueError("n_fft must be much larger then order of cepstrum.")
+            raise ValueError("n_fft must be much larger than order of cepstrum.")
         if 0 == in_gamma and in_mul:
             raise ValueError("Invalid combination of in_gamma and in_mul.")
 
diff --git a/diffsptk/modules/rlevdur.py b/diffsptk/modules/rlevdur.py
@@ -14,11 +14,11 @@
 # limitations under the License.                                           #
 # ------------------------------------------------------------------------ #
 
+import numpy as np
 import torch
-import torch.nn.functional as F
 
 from ..typing import Precomputed
-from ..utils.private import check_size, filter_values, remove_gain
+from ..utils.private import check_size, filter_values, remove_gain, to
 from .base import BaseFunctionalModule
 
 
@@ -31,6 +31,9 @@ class ReverseLevinsonDurbin(BaseFunctionalModule):
     lpc_order : int >= 0
         The order of the LPC coefficients, :math:`M`.
 
+    n_fft : int >> M
+        The number of FFT bins. Accurate conversion requires a large value.
+
     device : torch.device or None
         The device of this module.
 
@@ -42,6 +45,7 @@ class ReverseLevinsonDurbin(BaseFunctionalModule):
     def __init__(
         self,
         lpc_order: int,
+        n_fft: int = 1024,
         device: torch.device | None = None,
         dtype: torch.dtype | None = None,
     ) -> None:
@@ -50,7 +54,7 @@ def __init__(
         self.in_dim = lpc_order + 1
 
         _, _, tensors = self._precompute(**filter_values(locals()))
-        self.register_buffer("eye", tensors[0])
+        self.register_buffer("phase_factors", tensors[0])
 
     def forward(self, a: torch.Tensor) -> torch.Tensor:
         """Solve a Yule-Walker linear system given the LPC coefficients.
@@ -95,39 +99,30 @@ def _takes_input_size() -> bool:
         return True
 
     @staticmethod
-    def _check(lpc_order: int) -> None:
+    def _check(lpc_order: int, n_fft: int) -> None:
         if lpc_order < 0:
             raise ValueError("lpc_order must be non-negative.")
+        if n_fft <= lpc_order + 1:
+            raise ValueError("n_fft must be much larger than lpc_order.")
 
     @staticmethod
     def _precompute(
         lpc_order: int,
+        n_fft: int,
         device: torch.device | None,
         dtype: torch.dtype | None,
     ) -> Precomputed:
-        ReverseLevinsonDurbin._check(lpc_order)
-        eye = torch.eye(lpc_order + 1, device=device, dtype=dtype)
-        return None, None, (eye,)
+        ReverseLevinsonDurbin._check(lpc_order, n_fft)
+        n_freq = n_fft // 2 + 1
+        omega = torch.linspace(0, np.pi, n_freq, device=device, dtype=torch.double)
+        m = torch.arange(lpc_order + 1, device=device, dtype=torch.double)
+        phase_factors = torch.exp(-1j * omega * m.unsqueeze(-1))
+        return None, None, (to(phase_factors, dtype=dtype),)
 
     @staticmethod
-    def _forward(a: torch.Tensor, eye: torch.Tensor) -> torch.Tensor:
+    def _forward(a: torch.Tensor, phase_factors: torch.Tensor) -> torch.Tensor:
         M = a.size(-1) - 1
         K, a = remove_gain(a, return_gain=True)
-
-        U = [a.flip(-1)]
-        E = [K**2]
-        for m in range(M):
-            u0 = U[-1][..., :1]
-            u1 = U[-1][..., 1 : M - m]
-            t = 1 / (1 - u0**2)
-            u = (u1 - u0 * u1.flip(-1)) * t
-            u = F.pad(u, (0, m + 2))
-            e = E[-1] * t
-            U.append(u)
-            E.append(e)
-        U = torch.stack(U[::-1], dim=-1)
-        E = torch.stack(E[::-1], dim=-1)
-
-        V = torch.linalg.solve_triangular(U, eye, upper=True, unitriangular=True)
-        r = torch.matmul(V[..., :1].transpose(-2, -1) * E, V).squeeze(-2)
+        A = torch.sum(a.unsqueeze(-1) * phase_factors, dim=-2)
+        r = torch.fft.irfft((K / A.abs()) ** 2)[..., : M + 1]
         return r
diff --git a/diffsptk/version.py b/diffsptk/version.py
@@ -1 +1 @@
-__version__ = "3.4.0"
+__version__ = "3.4.1.dev0"
diff --git a/tests/test_excite.py b/tests/test_excite.py
@@ -95,7 +95,7 @@ def compute_error(infile):
     ["pulse", "sinusoidal", "sawtooth", "inverted-sawtooth", "triangle", "square"],
 )
 @pytest.mark.parametrize("polarity", ["unipolar", "bipolar"])
-@pytest.mark.parametrize("init_phase", ["zeros", "random"])
+@pytest.mark.parametrize("init_phase", ["zeros", "random", np.round(np.pi / 2, 2)])
 def test_waveform(voiced_region, polarity, init_phase, P=80, verbose=False):
     excite = diffsptk.ExcitationGeneration(
         P,
diff --git a/tools/Makefile b/tools/Makefile
@@ -15,7 +15,7 @@
 # ------------------------------------------------------------------------ #
 
 TAPLO_VERSION   := 0.10.0
-YAMLFMT_VERSION := 0.17.2
+YAMLFMT_VERSION := 0.20.0
 
 all: SPTK taplo yamlfmt
 

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "3.4.0"`
	`1`	`+__version__ = "3.4.1.dev0"`