Skip to content

Commit 01a8ba5

Browse files
committed
Add MPS (Apple Silicon) support and update dependencies
This commit adds support for Apple's MPS backend throughout the codebase, allowing usage on Apple Silicon devices. Device selection logic now detects MPS, and memory management calls are updated accordingly. Requirements are relaxed and updated for broader compatibility, including newer versions of faiss-cpu and removal of strict version pins for several packages. Also removes PID file handling in infer.py and improves error handling for config file loading. Training precision now defaults to float32 for better MPS compatibility.
1 parent c7da750 commit 01a8ba5

9 files changed

Lines changed: 97 additions & 63 deletions

File tree

requirements.txt

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,44 +2,41 @@
22
pip>=23.3; sys_platform == 'darwin'
33
wheel; sys_platform == 'darwin'
44
PyYAML; sys_platform == 'darwin'
5-
numpy==1.26.4
6-
requests>=2.31.0,<2.32.0
5+
numpy<2
6+
requests
77
tqdm
88
wget
99

1010
# Audio processing
11-
ffmpeg-python>=0.2.0
12-
faiss-cpu==1.7.3
13-
librosa==0.11.0
14-
scipy==1.11.1
15-
soundfile==0.12.1
11+
ffmpeg-python
12+
faiss-cpu==1.7.4
13+
librosa
14+
scipy
15+
soundfile
1616
noisereduce
1717
pedalboard
1818
stftpitchshift
1919
soxr
2020

2121
# Machine learning and deep learning
22-
omegaconf>=2.0.6; sys_platform == 'darwin'
23-
numba; sys_platform == 'linux'
24-
numba==0.61.0; sys_platform == 'darwin' or sys_platform == 'win32'
25-
torch==2.7.1; sys_platform == 'darwin'
26-
torch==2.7.1+cu128; sys_platform == 'linux' or sys_platform == 'win32'
27-
torchaudio==2.7.1; sys_platform == 'darwin'
28-
torchaudio==2.7.1+cu128; sys_platform == 'linux' or sys_platform == 'win32'
29-
torchvision==0.22.1; sys_platform == 'darwin'
30-
torchvision==0.22.1+cu128; sys_platform == 'linux' or sys_platform == 'win32'
31-
torchcrepe==0.0.23
22+
omegaconf; sys_platform == 'darwin'
23+
numba
24+
torch==2.4.0; sys_platform == 'darwin'
25+
torchaudio==2.4.0; sys_platform == 'darwin'
26+
torchvision==0.19.0; sys_platform == 'darwin'
27+
torchcrepe
3228
torchfcpe
3329
einops
34-
transformers==4.44.2
30+
transformers
31+
beautifulsoup4
3532

3633
# Visualization and UI
37-
matplotlib==3.7.2
34+
matplotlib
3835
tensorboard
3936
tensorboardX
4037

4138
# Miscellaneous utilities
42-
certifi>=2023.07.22; sys_platform == 'darwin'
43-
antlr4-python3-runtime==4.8; sys_platform == 'darwin'
44-
edge-tts==7.2.0
39+
certifi; sys_platform == 'darwin'
40+
antlr4-python3-runtime
41+
edge-tts
4542
webrtcvad

rvc/.DS_Store

8 KB
Binary file not shown.

rvc/configs/config.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,17 @@ def get_instance(*args, **kwargs):
2323
@singleton
2424
class Config:
2525
def __init__(self):
26-
self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
26+
if torch.cuda.is_available():
27+
self.device = "cuda:0"
28+
elif torch.backends.mps.is_available():
29+
self.device = "mps"
30+
else:
31+
self.device = "cpu"
32+
2733
self.gpu_name = (
2834
torch.cuda.get_device_name(int(self.device.split(":")[-1]))
2935
if self.device.startswith("cuda")
30-
else None
36+
else "Apple M-Series" if self.device == "mps" else None
3137
)
3238
self.json_config = self.load_config_json()
3339
self.gpu_mem = None
@@ -44,6 +50,8 @@ def load_config_json(self):
4450
def device_config(self):
4551
if self.device.startswith("cuda"):
4652
self.set_cuda_config()
53+
elif self.device == "mps":
54+
self.gpu_mem = 16 # Default to assumes decent unified memory, or could try to detect
4755
else:
4856
self.device = "cpu"
4957

@@ -63,36 +71,44 @@ def set_cuda_config(self):
6371
)
6472

6573

74+
6675
def max_vram_gpu(gpu):
6776
if torch.cuda.is_available():
6877
gpu_properties = torch.cuda.get_device_properties(gpu)
6978
total_memory_gb = round(gpu_properties.total_memory / 1024 / 1024 / 1024)
7079
return total_memory_gb
80+
elif torch.backends.mps.is_available():
81+
return 16 # Default placeholder
7182
else:
7283
return "8"
7384

7485

7586
def get_gpu_info():
76-
ngpu = torch.cuda.device_count()
77-
gpu_infos = []
78-
if torch.cuda.is_available() or ngpu != 0:
87+
if torch.cuda.is_available():
88+
ngpu = torch.cuda.device_count()
89+
gpu_infos = []
7990
for i in range(ngpu):
8091
gpu_name = torch.cuda.get_device_name(i)
8192
mem = int(
8293
torch.cuda.get_device_properties(i).total_memory / 1024 / 1024 / 1024
8394
+ 0.4
8495
)
8596
gpu_infos.append(f"{i}: {gpu_name} ({mem} GB)")
86-
if len(gpu_infos) > 0:
87-
gpu_info = "\n".join(gpu_infos)
97+
return "\n".join(gpu_infos)
98+
elif torch.backends.mps.is_available():
99+
return "0: Apple M-Series (Unified Memory)"
88100
else:
89-
gpu_info = "Unfortunately, there is no compatible GPU available to support your training."
90-
return gpu_info
101+
return "Unfortunately, there is no compatible GPU available to support your training."
91102

92103

93104
def get_number_of_gpus():
94105
if torch.cuda.is_available():
95106
num_gpus = torch.cuda.device_count()
96107
return "-".join(map(str, range(num_gpus)))
108+
elif torch.backends.mps.is_available():
109+
return "0"
97110
else:
98111
return "-"
112+
113+
114+

rvc/infer/infer.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -363,12 +363,7 @@ def convert_audio_batch(
363363
sid (int, optional): Speaker ID. Default is 0.
364364
**kwargs: Additional keyword arguments.
365365
"""
366-
pid = os.getpid()
367366
try:
368-
with open(
369-
os.path.join(now_dir, "assets", "infer_pid.txt"), "w"
370-
) as pid_file:
371-
pid_file.write(str(pid))
372367
start_time = time.time()
373368
print(f"Converting audio batch '{audio_input_paths}'...")
374369
audio_files = [
@@ -410,8 +405,6 @@ def convert_audio_batch(
410405
except Exception as error:
411406
print(f"An error occurred during audio batch conversion: {error}")
412407
print(traceback.format_exc())
413-
finally:
414-
os.remove(os.path.join(now_dir, "assets", "infer_pid.txt"))
415408

416409
def get_vc(self, weight_root, sid):
417410
"""
@@ -425,6 +418,8 @@ def get_vc(self, weight_root, sid):
425418
self.cleanup_model()
426419
if torch.cuda.is_available():
427420
torch.cuda.empty_cache()
421+
elif torch.backends.mps.is_available():
422+
torch.mps.empty_cache()
428423

429424
if not self.loaded_model or self.loaded_model != weight_root:
430425
self.load_model(weight_root)
@@ -445,10 +440,14 @@ def cleanup_model(self):
445440
self.hubert_model = self.net_g = self.n_spk = self.vc = self.tgt_sr = None
446441
if torch.cuda.is_available():
447442
torch.cuda.empty_cache()
443+
elif torch.backends.mps.is_available():
444+
torch.mps.empty_cache()
448445

449446
del self.net_g, self.cpt
450447
if torch.cuda.is_available():
451448
torch.cuda.empty_cache()
449+
elif torch.backends.mps.is_available():
450+
torch.mps.empty_cache()
452451
self.cpt = None
453452

454453
def load_model(self, weight_root):

rvc/lib/predictors/FCPE.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -726,7 +726,12 @@ def gaussian_blurred_cent(self, cents):
726726
class FCPEInfer:
727727
def __init__(self, model_path, device=None, dtype=torch.float32):
728728
if device is None:
729-
device = "cuda" if torch.cuda.is_available() else "cpu"
729+
if torch.cuda.is_available():
730+
device = "cuda"
731+
elif torch.backends.mps.is_available():
732+
device = "mps"
733+
else:
734+
device = "cpu"
730735
self.device = device
731736
ckpt = torch.load(
732737
model_path, map_location=torch.device(self.device), weights_only=True
@@ -769,7 +774,12 @@ def __init__(self, args, device=None, dtype=torch.float32):
769774
self.sample_rate = args.mel.sampling_rate
770775
self.hop_size = args.mel.hop_size
771776
if device is None:
772-
device = "cuda" if torch.cuda.is_available() else "cpu"
777+
if torch.cuda.is_available():
778+
device = "cuda"
779+
elif torch.backends.mps.is_available():
780+
device = "mps"
781+
else:
782+
device = "cpu"
773783
self.device = device
774784
self.dtype = dtype
775785
self.stft = STFT(
@@ -849,7 +859,15 @@ def __init__(
849859
self.hop_length = hop_length
850860
self.f0_min = f0_min
851861
self.f0_max = f0_max
852-
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
862+
if device is None:
863+
if torch.cuda.is_available():
864+
self.device = "cuda"
865+
elif torch.backends.mps.is_available():
866+
self.device = "mps"
867+
else:
868+
self.device = "cpu"
869+
else:
870+
self.device = device
853871
self.threshold = threshold
854872
self.sample_rate = sample_rate
855873
self.dtype = dtype

rvc/lib/predictors/RMVPE.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,10 @@ def infer_from_audio(self, audio, thred=0.03):
506506
mel = self.mel_extractor(audio, center=True)
507507
del audio
508508
with torch.no_grad():
509-
torch.cuda.empty_cache()
509+
if torch.cuda.is_available():
510+
torch.cuda.empty_cache()
511+
elif torch.backends.mps.is_available():
512+
torch.mps.empty_cache()
510513
hidden = self.mel2hidden(mel)
511514
hidden = hidden.squeeze(0).cpu().numpy()
512515
f0 = self.decode(hidden, thred=thred)

rvc/train/extract/extract.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,14 @@ def run_embedding_extraction(
212212
]
213213
files.append(file_info)
214214

215-
devices = ["cpu"] if gpus == "-" else [f"cuda:{idx}" for idx in gpus.split("-")]
215+
if gpus == "-":
216+
devices = ["cpu"]
217+
elif torch.cuda.is_available():
218+
devices = [f"cuda:{idx}" for idx in gpus.split("-")]
219+
elif torch.backends.mps.is_available():
220+
devices = ["mps"]
221+
else:
222+
devices = ["cpu"]
216223

217224
run_pitch_extraction(files, devices, f0_method, num_processes)
218225

rvc/train/process/extract_model.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,12 @@ def extract_model(
5050
else:
5151
dataset_length = None
5252

53-
with open(os.path.join(now_dir, "assets", "config.json"), "r") as f:
54-
data = json.load(f)
55-
model_author = data.get("model_author", None)
53+
try:
54+
with open(os.path.join(now_dir, "assets", "config.json"), "r") as f:
55+
data = json.load(f)
56+
model_author = data.get("model_author", None)
57+
except (FileNotFoundError, json.JSONDecodeError):
58+
model_author = None
5659

5760
opt = OrderedDict(
5861
weight={

rvc/train/train.py

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -76,20 +76,11 @@
7676

7777
current_dir = os.getcwd()
7878

79+
7980
try:
80-
with open(os.path.join(current_dir, "assets", "config.json"), "r") as f:
81-
config = json.load(f)
82-
precision = config["precision"]
83-
if (
84-
precision == "bf16"
85-
and torch.cuda.is_available()
86-
and torch.cuda.is_bf16_supported()
87-
):
88-
train_dtype = torch.bfloat16
89-
elif precision == "fp16" and torch.cuda.is_available():
90-
train_dtype = torch.float16
91-
else:
92-
train_dtype = torch.float32
81+
# Removed assets/config.json reading logic as assets dir is gone.
82+
# Defaulting to float32 which is safe for MPS.
83+
train_dtype = torch.float32
9384
except (FileNotFoundError, json.JSONDecodeError, KeyError):
9485
train_dtype = torch.float32
9586

@@ -693,7 +684,7 @@ def train_and_evaluate(
693684
) = info
694685

695686
with torch.amp.autocast(
696-
device_type="cuda", enabled=use_amp, dtype=train_dtype
687+
device_type="cuda" if device.type == "cuda" else "cpu", enabled=use_amp, dtype=train_dtype
697688
):
698689
# Forward pass
699690
model_output = net_g(
@@ -712,7 +703,7 @@ def train_and_evaluate(
712703
)
713704
for _ in range(d_step_per_g_step): # default x1
714705
with torch.amp.autocast(
715-
device_type="cuda", enabled=use_amp, dtype=train_dtype
706+
device_type="cuda" if device.type == "cuda" else "cpu", enabled=use_amp, dtype=train_dtype
716707
):
717708
y_d_hat_r, y_d_hat_g, _, _ = net_d(wave, y_hat.detach())
718709
loss_disc, _, _ = discriminator_loss(y_d_hat_r, y_d_hat_g)
@@ -729,7 +720,7 @@ def train_and_evaluate(
729720
optim_d.step()
730721

731722
with torch.amp.autocast(
732-
device_type="cuda", enabled=use_amp, dtype=train_dtype
723+
device_type="cuda" if device.type == "cuda" else "cpu", enabled=use_amp, dtype=train_dtype
733724
):
734725
# Generator backward and update
735726
_, y_d_hat_g, fmap_r, fmap_g = net_d(wave, y_hat)
@@ -886,7 +877,7 @@ def train_and_evaluate(
886877

887878
if epoch % save_every_epoch == 0:
888879
with torch.amp.autocast(
889-
device_type="cuda", enabled=use_amp, dtype=train_dtype
880+
device_type="cuda" if device.type == "cuda" else "cpu", enabled=use_amp, dtype=train_dtype
890881
):
891882
with torch.no_grad():
892883
if hasattr(net_g, "module"):

0 commit comments

Comments
 (0)