Skip to content

Commit 44c52da

Browse files
authored
resolve dependencies (#1426)
1 parent c426be3 commit 44c52da

2 files changed

Lines changed: 20 additions & 7 deletions

File tree

diffsynth/core/data/operators.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
import torch, torchvision, imageio, os
33
import imageio.v3 as iio
44
from PIL import Image
5-
import torchaudio
6-
from diffsynth.utils.data.audio import read_audio
75

86

97
class DataProcessingPipeline:
@@ -249,23 +247,27 @@ def __call__(self, data):
249247
class LoadAudio(DataProcessingOperator):
250248
def __init__(self, sr=16000):
251249
self.sr = sr
252-
def __call__(self, data: str):
253250
import librosa
254-
input_audio, sample_rate = librosa.load(data, sr=self.sr)
251+
self.audio_loader = librosa.load
252+
253+
def __call__(self, data: str):
254+
input_audio, sample_rate = self.audio_loader(data, sr=self.sr)
255255
return input_audio
256256

257257

258258
class LoadAudioWithTorchaudio(DataProcessingOperator, FrameSamplerByRateMixin):
259259

260260
def __init__(self, num_frames=121, time_division_factor=8, time_division_remainder=1, frame_rate=24, fix_frame_rate=True):
261261
FrameSamplerByRateMixin.__init__(self, num_frames, time_division_factor, time_division_remainder, frame_rate, fix_frame_rate)
262+
import torchaudio
263+
self.audio_loader = torchaudio.load
262264

263265
def __call__(self, data: str):
264266
try:
265267
reader = self.get_reader(data)
266268
num_frames = self.get_num_frames(reader)
267269
duration = num_frames / self.frame_rate
268-
waveform, sample_rate = torchaudio.load(data)
270+
waveform, sample_rate = self.audio_loader(data)
269271
target_samples = int(duration * sample_rate)
270272
current_samples = waveform.shape[-1]
271273
if current_samples > target_samples:
@@ -285,10 +287,12 @@ def __init__(self, target_sample_rate=None, target_duration=None):
285287
self.target_sample_rate = target_sample_rate
286288
self.target_duration = target_duration
287289
self.resample = True if target_sample_rate is not None else False
290+
from diffsynth.utils.data.audio import read_audio
291+
self.audio_loader = read_audio
288292

289293
def __call__(self, data: str):
290294
try:
291-
waveform, sample_rate = read_audio(data, resample=self.resample, resample_rate=self.target_sample_rate)
295+
waveform, sample_rate = self.audio_loader(data, resample=self.resample, resample_rate=self.target_sample_rate)
292296
if self.target_duration is not None:
293297
target_samples = int(self.target_duration * sample_rate)
294298
current_samples = waveform.shape[-1]

pyproject.toml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,17 @@ npu = [
4848
"torchvision==0.22.1+cpu"
4949
]
5050
audio = [
51+
"av",
5152
"torchaudio",
52-
"torchcodec"
53+
"torchcodec",
54+
"librosa"
55+
]
56+
all = [
57+
"av",
58+
"torchaudio",
59+
"torchcodec",
60+
"librosa",
61+
"streamlit"
5362
]
5463

5564
[tool.setuptools]

0 commit comments

Comments
 (0)