Politrees
diff --git a/‎download_files.py‎
Lines changed: 10 additions & 7 deletions b/‎download_files.py‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎rvc/lib/algorithm/attentions.py‎
Lines changed: 4 additions & 4 deletions b/‎rvc/lib/algorithm/attentions.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎rvc/lib/algorithm/commons.py‎
Lines changed: 17 additions & 19 deletions b/‎rvc/lib/algorithm/commons.py‎
Lines changed: 17 additions & 19 deletions
diff --git a/‎rvc/lib/algorithm/discriminators.py‎
Lines changed: 10 additions & 10 deletions b/‎rvc/lib/algorithm/discriminators.py‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎rvc/lib/algorithm/encoders.py‎
Lines changed: 10 additions & 11 deletions b/‎rvc/lib/algorithm/encoders.py‎
Lines changed: 10 additions & 11 deletions
@@ -25,13 +25,16 @@ def dl_model(link, model_name, dir_name):
     # Получаем общий размер файла
     total_size = int(r.headers.get("content-length", 0))
     # Используем tqdm для отображения прогресса
-    with open(file_path, "wb") as f, tqdm(
-        desc=f"Установка {model_name}",
-        total=total_size,
-        unit="iB",
-        unit_scale=True,
-        unit_divisor=1024,
-    ) as bar:
+    with (
+        open(file_path, "wb") as f,
+        tqdm(
+            desc=f"Установка {model_name}",
+            total=total_size,
+            unit="iB",
+            unit_scale=True,
+            unit_divisor=1024,
+        ) as bar,
+    ):
         for chunk in r.iter_content(chunk_size=8192):
             f.write(chunk)
             bar.update(len(chunk))
 
@@ -6,8 +6,7 @@
 
 
 class MultiHeadAttention(torch.nn.Module):
-    """
-    Multi-head attention module with optional relative positional encoding and proximal bias.
+    """Multi-head attention module with optional relative positional encoding and proximal bias.
 
     Args:
         channels (int): Number of input channels.
@@ -19,6 +18,7 @@ class MultiHeadAttention(torch.nn.Module):
         block_length (int, optional): Block length for local attention. Defaults to None.
         proximal_bias (bool, optional): Whether to use proximal bias in self-attention. Defaults to False.
         proximal_init (bool, optional): Whether to initialize the key projection weights the same as query projection weights. Defaults to False.
+
     """
 
     def __init__(
@@ -164,8 +164,7 @@ def _attention_bias_proximal(self, length):
 
 
 class FFN(torch.nn.Module):
-    """
-    Feed-forward network module.
+    """Feed-forward network module.
 
     Args:
         in_channels (int): Number of input channels.
@@ -175,6 +174,7 @@ class FFN(torch.nn.Module):
         p_dropout (float, optional): Dropout probability. Defaults to 0.0.
         activation (str, optional): Activation function to use. Defaults to None.
         causal (bool, optional): Whether to use causal padding in the convolution layers. Defaults to False.
+
     """
 
     def __init__(
 
@@ -1,54 +1,52 @@
-from typing import Optional
-
 import torch
 
 
 def init_weights(m, mean=0.0, std=0.01):
-    """
-    Initialize the weights of a module.
+    """Initialize the weights of a module.
 
     Args:
         m: The module to initialize.
         mean: The mean of the normal distribution.
         std: The standard deviation of the normal distribution.
+
     """
     classname = m.__class__.__name__
     if classname.find("Conv") != -1:
         m.weight.data.normal_(mean, std)
 
 
 def get_padding(kernel_size, dilation=1):
-    """
-    Calculate the padding needed for a convolution.
+    """Calculate the padding needed for a convolution.
 
     Args:
         kernel_size: The size of the kernel.
         dilation: The dilation of the convolution.
+
     """
     return int((kernel_size * dilation - dilation) / 2)
 
 
 def convert_pad_shape(pad_shape):
-    """
-    Convert the pad shape to a list of integers.
+    """Convert the pad shape to a list of integers.
 
     Args:
         pad_shape: The pad shape..
+
     """
     l = pad_shape[::-1]
     pad_shape = [item for sublist in l for item in sublist]
     return pad_shape
 
 
 def slice_segments(x: torch.Tensor, ids_str: torch.Tensor, segment_size: int = 4, dim: int = 2):
-    """
-    Slice segments from a tensor, handling tensors with different numbers of dimensions.
+    """Slice segments from a tensor, handling tensors with different numbers of dimensions.
 
     Args:
         x (torch.Tensor): The tensor to slice.
         ids_str (torch.Tensor): The starting indices of the segments.
         segment_size (int, optional): The size of each segment. Defaults to 4.
         dim (int, optional): The dimension to slice across (2D or 3D tensors). Defaults to 2.
+
     """
     if dim == 2:
         ret = torch.zeros_like(x[:, :segment_size])
@@ -67,13 +65,13 @@ def slice_segments(x: torch.Tensor, ids_str: torch.Tensor, segment_size: int = 4
 
 
 def rand_slice_segments(x, x_lengths=None, segment_size=4):
-    """
-    Randomly slice segments from a tensor.
+    """Randomly slice segments from a tensor.
 
     Args:
         x: The tensor to slice.
         x_lengths: The lengths of the sequences.
         segment_size: The size of each segment.
+
     """
     b, _, t = x.size()
     if x_lengths is None:
@@ -86,13 +84,13 @@ def rand_slice_segments(x, x_lengths=None, segment_size=4):
 
 @torch.jit.script
 def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
-    """
-    Fused add tanh sigmoid multiply operation.
+    """Fused add tanh sigmoid multiply operation.
 
     Args:
         input_a: The first input tensor.
         input_b: The second input tensor.
         n_channels: The number of channels.
+
     """
     n_channels_int = n_channels[0]
     in_act = input_a + input_b
@@ -102,13 +100,13 @@ def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
     return acts
 
 
-def sequence_mask(length: torch.Tensor, max_length: Optional[int] = None):
-    """
-    Generate a sequence mask.
+def sequence_mask(length: torch.Tensor, max_length: int | None = None):
+    """Generate a sequence mask.
 
     Args:
         length: The lengths of the sequences.
         max_length: The maximum length of the sequences.
+
     """
     if max_length is None:
         max_length = length.max()
@@ -117,12 +115,12 @@ def sequence_mask(length: torch.Tensor, max_length: Optional[int] = None):
 
 
 def grad_norm(parameters, norm_type: float = 2.0):
-    """
-    Calculates norm of parameter gradients
+    """Calculates norm of parameter gradients
 
     Args:
         parameters: The list of parameters to clip.
         norm_type: The type of norm to use for clipping.
+
     """
     if isinstance(parameters, torch.Tensor):
         parameters = [parameters]
 
@@ -7,8 +7,7 @@
 
 
 class MultiPeriodDiscriminator(torch.nn.Module):
-    """
-    Multi-period discriminator.
+    """Multi-period discriminator.
 
     This class implements a multi-period discriminator, which is used to
     discriminate between real and fake audio signals. The discriminator
@@ -20,7 +19,9 @@ class MultiPeriodDiscriminator(torch.nn.Module):
     def __init__(self, checkpointing: bool = False):
         super().__init__()
         self.checkpointing = checkpointing
-        self.discriminators = torch.nn.ModuleList([DiscriminatorS()] + [DiscriminatorP(period) for period in [2, 3, 5, 7, 11, 17, 23, 37]])  # periods
+        self.discriminators = torch.nn.ModuleList(
+            [DiscriminatorS()] + [DiscriminatorP(period) for period in [2, 3, 5, 7, 11, 17, 23, 37]]
+        )  # periods
 
     def forward(self, y, y_hat):
         y_d_rs, y_d_gs, fmap_rs, fmap_gs = [], [], [], []
@@ -40,8 +41,7 @@ def forward(self, y, y_hat):
 
 
 class DiscriminatorS(torch.nn.Module):
-    """
-    Discriminator for the short-term component.
+    """Discriminator for the short-term component.
 
     This class implements a discriminator for the short-term component
     of the audio signal. The discriminator is composed of a series of
@@ -58,7 +58,7 @@ def __init__(self):
                 weight_norm(torch.nn.Conv1d(256, 1024, 41, 4, groups=64, padding=20)),
                 weight_norm(torch.nn.Conv1d(1024, 1024, 41, 4, groups=256, padding=20)),
                 weight_norm(torch.nn.Conv1d(1024, 1024, 5, 1, padding=2)),
-            ]
+            ],
         )
         self.conv_post = weight_norm(torch.nn.Conv1d(1024, 1, 3, 1, padding=1))
         self.lrelu = torch.nn.LeakyReLU(LRELU_SLOPE)
@@ -75,8 +75,7 @@ def forward(self, x):
 
 
 class DiscriminatorP(torch.nn.Module):
-    """
-    Discriminator for the long-term component.
+    """Discriminator for the long-term component.
 
     This class implements a discriminator for the long-term component
     of the audio signal. The discriminator is composed of a series of
@@ -86,6 +85,7 @@ class DiscriminatorP(torch.nn.Module):
     Args:
         period (int): Period of the discriminator.
         kernel_size (int): Kernel size of the convolutional layers. Defaults to 5.
+
     """
 
     def __init__(self, period: int, kernel_size: int = 5):
@@ -100,14 +100,14 @@ def __init__(self, period: int, kernel_size: int = 5):
                         (kernel_size, 1),
                         (stride, 1),
                         padding=(get_padding(kernel_size, 1), 0),
-                    )
+                    ),
                 )
                 for input_channel, output_channel, stride in zip(
                     [1, 32, 128, 512, 1024],  # input_channels
                     [32, 128, 512, 1024, 1024],  # output_channels
                     [3, 3, 3, 3, 1],  # strides
                 )
-            ]
+            ],
         )
 
         self.conv_post = weight_norm(torch.nn.Conv2d(1024, 1, (3, 1), 1, padding=(1, 0)))
 
@@ -1,5 +1,4 @@
 import math
-from typing import Optional
 
 import torch
 
@@ -10,8 +9,7 @@
 
 
 class Encoder(torch.nn.Module):
-    """
-    Encoder module for the Transformer model.
+    """Encoder module for the Transformer model.
 
     Args:
         hidden_channels (int): Number of hidden channels in the encoder.
@@ -21,6 +19,7 @@ class Encoder(torch.nn.Module):
         kernel_size (int, optional): Kernel size of the convolution layers in the feed-forward network. Defaults to 1.
         p_dropout (float, optional): Dropout probability. Defaults to 0.0.
         window_size (int, optional): Window size for relative positional encoding. Defaults to 10.
+
     """
 
     def __init__(
@@ -49,7 +48,7 @@ def __init__(
                     window_size=window_size,
                 )
                 for _ in range(n_layers)
-            ]
+            ],
         )
         self.norm_layers_1 = torch.nn.ModuleList([LayerNorm(hidden_channels) for _ in range(n_layers)])
         self.ffn_layers = torch.nn.ModuleList(
@@ -62,7 +61,7 @@ def __init__(
                     p_dropout=p_dropout,
                 )
                 for _ in range(n_layers)
-            ]
+            ],
         )
         self.norm_layers_2 = torch.nn.ModuleList([LayerNorm(hidden_channels) for _ in range(n_layers)])
 
@@ -83,8 +82,7 @@ def forward(self, x, x_mask):
 
 
 class TextEncoder(torch.nn.Module):
-    """
-    Text Encoder with configurable embedding dimension.
+    """Text Encoder with configurable embedding dimension.
 
     Args:
         out_channels (int): Output channels of the encoder.
@@ -95,6 +93,7 @@ class TextEncoder(torch.nn.Module):
         kernel_size (int): Kernel size of the convolutional layers.
         p_dropout (float): Dropout probability.
         embedding_dim (int): Embedding dimension for phone embeddings (v1 = 256, v2 = 768).
+
     """
 
     def __init__(
@@ -118,7 +117,7 @@ def __init__(
         self.encoder = Encoder(hidden_channels, filter_channels, n_heads, n_layers, kernel_size, p_dropout)
         self.proj = torch.nn.Conv1d(hidden_channels, out_channels * 2, 1)
 
-    def forward(self, phone: torch.Tensor, pitch: Optional[torch.Tensor], lengths: torch.Tensor):
+    def forward(self, phone: torch.Tensor, pitch: torch.Tensor | None, lengths: torch.Tensor):
         x = self.emb_phone(phone)
         if pitch is not None and self.emb_pitch:
             x += self.emb_pitch(pitch)
@@ -136,8 +135,7 @@ def forward(self, phone: torch.Tensor, pitch: Optional[torch.Tensor], lengths: t
 
 
 class PosteriorEncoder(torch.nn.Module):
-    """
-    Posterior Encoder for inferring latent representation.
+    """Posterior Encoder for inferring latent representation.
 
     Args:
         in_channels (int): Number of channels in the input.
@@ -147,6 +145,7 @@ class PosteriorEncoder(torch.nn.Module):
         dilation_rate (int): Dilation rate of the convolutional layers.
         n_layers (int): Number of layers in the encoder.
         gin_channels (int, optional): Number of channels for the global conditioning input. Defaults to 0.
+
     """
 
     def __init__(
@@ -171,7 +170,7 @@ def __init__(
         )
         self.proj = torch.nn.Conv1d(hidden_channels, out_channels * 2, 1)
 
-    def forward(self, x: torch.Tensor, x_lengths: torch.Tensor, g: Optional[torch.Tensor] = None):
+    def forward(self, x: torch.Tensor, x_lengths: torch.Tensor, g: torch.Tensor | None = None):
         x_mask = sequence_mask(x_lengths, x.size(2)).unsqueeze(1).to(x.dtype)
 
         x = self.pre(x) * x_mask