Skip to content

Commit 874f7ee

Browse files
committed
Add RMVPE pitch extraction and NSF source to RVC pipeline
Integrated RMVPE pitch extraction model and neural source-filter (NSF) source module into the RVC inference pipeline. Updated Generator to use the NSF source and harmonics, and AudioProcessor to save audio as Int16 PCM for compatibility. Improved feature and F0 upsampling logic, added optional RMVPE loading, and clarified simulator limitations in documentation.
1 parent ada9def commit 874f7ee

12 files changed

Lines changed: 1727 additions & 140 deletions

File tree

Demos/iOS/RVCNative/RVCNative/ContentView.swift

Lines changed: 68 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ struct ContentView: View {
99
@State private var statusMessage: String = "Ready"
1010
@State private var isProcessing: Bool = false
1111
@State private var isImporting: Bool = false
12+
@State private var logs: [String] = []
13+
14+
@State private var isModelLoaded: Bool = false
1215

1316
@StateObject private var inferenceEngine = RVCInference()
1417
@StateObject private var audioRecorder = AudioRecorder()
@@ -133,11 +136,11 @@ struct ContentView: View {
133136
.font(.headline)
134137
.frame(maxWidth: .infinity)
135138
.padding()
136-
.background(isProcessing ? Color.gray : Color.green)
139+
.background(isProcessing || !isModelLoaded ? Color.gray : Color.green)
137140
.foregroundColor(.white)
138141
.cornerRadius(12)
139142
}
140-
.disabled(isProcessing || inputURL == nil)
143+
.disabled(isProcessing || inputURL == nil || !isModelLoaded)
141144

142145
// Play Converted
143146
if let url = outputURL, !isProcessing {
@@ -182,7 +185,7 @@ struct ContentView: View {
182185
.padding()
183186
.onChange(of: inferenceEngine.status) { oldValue, newValue in
184187
statusMessage = newValue
185-
if newValue == "Done!" || newValue.starts(with: "Error") {
188+
if newValue == "Done!" || newValue == "Models Loaded" || newValue.starts(with: "Error") {
186189
isProcessing = false
187190
} else if newValue != "Idle" {
188191
isProcessing = true
@@ -218,41 +221,97 @@ struct ContentView: View {
218221
statusMessage = "Error: \(error.localizedDescription)"
219222
}
220223
}
224+
.onAppear {
225+
inferenceEngine.onLog = { msg in
226+
self.log(msg)
227+
}
228+
loadModel(name: "Coder")
229+
}
230+
}
231+
232+
func log(_ message: String) {
233+
print("DEBUG: \(message)") // Keep in console
234+
DispatchQueue.main.async {
235+
self.logs.append(message)
236+
}
221237
}
222238

223239
func loadModel(name: String) {
240+
log("loadModel called for \(name)")
224241
selectedModel = name
242+
isModelLoaded = false // Reset state
243+
225244
// map name to file
226-
let filename = name.lowercased()
227-
// Coder -> coder.npz
228-
// Slim Shady -> placeholder?
245+
let filename = name.lowercased().replacingOccurrences(of: " ", with: "_")
229246

230-
guard let url = RVCInference.bundle.url(forResource: filename, withExtension: "safetensors") else {
247+
// Try finding in root or Assets subdir
248+
let modelUrl = RVCInference.bundle.url(forResource: filename, withExtension: "safetensors")
249+
?? RVCInference.bundle.url(forResource: filename, withExtension: "safetensors", subdirectory: "Assets")
250+
251+
guard let url = modelUrl else {
252+
log("Failed to find model file: \(filename).safetensors")
231253
statusMessage = "Model \(name) not found in bundle"
232254
return
233255
}
256+
log("Found model at \(url.path)")
234257

235-
let hubertURL = RVCInference.bundle.url(forResource: "hubert_mlx", withExtension: "safetensors")
258+
let hubertUrl = RVCInference.bundle.url(forResource: "hubert_base", withExtension: "safetensors")
259+
?? RVCInference.bundle.url(forResource: "hubert_base", withExtension: "safetensors", subdirectory: "Assets")
260+
261+
guard let hubertURL = hubertUrl else {
262+
log("Failed to find hubert_base.safetensors")
263+
statusMessage = "Hubert model not found!"
264+
return
265+
}
266+
log("Found hubert at \(hubertURL.path)")
267+
268+
// Optional RMVPE
269+
let rmvpeURL = RVCInference.bundle.url(forResource: "rmvpe", withExtension: "safetensors")
270+
?? RVCInference.bundle.url(forResource: "rmvpe", withExtension: "safetensors", subdirectory: "Assets")
271+
?? RVCInference.bundle.url(forResource: "rmvpe", withExtension: "npz")
272+
?? RVCInference.bundle.url(forResource: "rmvpe", withExtension: "npz", subdirectory: "Assets")
273+
?? RVCInference.bundle.url(forResource: "rmvpe_mlx", withExtension: "npz")
274+
?? RVCInference.bundle.url(forResource: "rmvpe_mlx", withExtension: "npz", subdirectory: "Assets")
275+
276+
if let r = rmvpeURL {
277+
log("Found rmvpe at \(r.path)")
278+
} else {
279+
log("RMVPE not found (optional)")
280+
}
236281

237282
Task {
238283
do {
239-
try inferenceEngine.loadWeights(url: url, hubertURL: hubertURL)
284+
log("Starting loadWeights task...")
285+
try await inferenceEngine.loadWeights(hubertURL: hubertURL, modelURL: url, rmvpeURL: rmvpeURL)
286+
log("loadWeights success")
240287
statusMessage = "Loaded \(name)"
288+
isModelLoaded = true
241289
} catch {
290+
log("loadWeights failed: \(error)")
242291
statusMessage = "Failed to load \(name): \(error.localizedDescription)"
292+
isModelLoaded = false
243293
}
244294
}
245295
}
246296

247297
func startInference() async {
248298
guard let input = inputURL else { return }
299+
300+
if selectedModel == "Select Model" {
301+
statusMessage = "Please select a model first."
302+
log("Attempted inference without selecting model")
303+
return
304+
}
305+
306+
log("Starting inference processing...")
249307
isProcessing = true
250308

251309
// Temp output
252310
let output = FileManager.default.temporaryDirectory.appendingPathComponent("output.wav")
253311
self.outputURL = output
254312

255313
await inferenceEngine.infer(audioURL: input, outputURL: output)
314+
log("Inference complete.")
256315
}
257316
}
258317

Demos/iOS/RVCNative/RVCNativePackage/Package.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ let package = Package(
3131
.product(name: "MLX", package: "mlx-swift"),
3232
.product(name: "MLXNN", package: "mlx-swift"),
3333
.product(name: "MLXRandom", package: "mlx-swift"),
34+
.product(name: "MLXFFT", package: "mlx-swift"),
3435
],
3536
resources: [
3637
.copy("Assets")

Demos/iOS/RVCNative/RVCNativePackage/Sources/RVCNativeFeature/RVC/AudioProcessor.swift

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,18 +43,40 @@ final class AudioProcessor: @unchecked Sendable {
4343
// Save MLXArray to WAV
4444
func saveAudio(array: MLXArray, url: URL, sampleRate: Double = 40000) throws {
4545
let count = array.size
46-
// Convert to Swift Array
47-
// Assuming array is 1D float32
4846
let samples = array.asArray(Float.self)
4947

50-
let format = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: sampleRate, channels: 1, interleaved: false)!
51-
let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: AVAudioFrameCount(count))!
48+
// 1. Define File Format: Int16 (Standard WAV)
49+
// This ensures compatibility with all players
50+
let fileSettings: [String: Any] = [
51+
AVFormatIDKey: kAudioFormatLinearPCM,
52+
AVSampleRateKey: sampleRate,
53+
AVNumberOfChannelsKey: 1,
54+
AVLinearPCMBitDepthKey: 16,
55+
AVLinearPCMIsFloatKey: false,
56+
AVLinearPCMIsBigEndianKey: false,
57+
AVLinearPCMIsNonInterleaved: false
58+
]
59+
60+
// 2. Define Buffer Format: Float32
61+
// AVAudioFile processes in Float32 by default
62+
let bufferFormat = AVAudioFormat(commonFormat: .pcmFormatFloat32, sampleRate: sampleRate, channels: 1, interleaved: false)!
63+
let buffer = AVAudioPCMBuffer(pcmFormat: bufferFormat, frameCapacity: AVAudioFrameCount(count))!
5264
buffer.frameLength = AVAudioFrameCount(count)
5365

54-
guard let channelData = buffer.floatChannelData?[0] else { return }
55-
channelData.initialize(from: samples, count: count)
66+
// 3. Fill Buffer (Clamp checks are handled by CoreAudio conversion usually, but safe to clamp)
67+
if let channelData = buffer.floatChannelData?[0] {
68+
for i in 0..<count {
69+
// conversion to Int16 implies clamping, but since we are keeping it as float
70+
// we just ensure it's in -1...1 range so conversion doesn't overflow weirdly
71+
channelData[i] = min(max(samples[i], -1.0), 1.0)
72+
}
73+
}
74+
75+
// 4. Write
76+
try? FileManager.default.removeItem(at: url)
5677

57-
let file = try AVAudioFile(forWriting: url, settings: format.settings)
78+
// Initialize file with Int16 settings, but tell it we will provide Float32 data
79+
let file = try AVAudioFile(forWriting: url, settings: fileSettings, commonFormat: .pcmFormatFloat32, interleaved: false)
5880
try file.write(from: buffer)
5981
}
6082

Demos/iOS/RVCNative/RVCNativePackage/Sources/RVCNativeFeature/RVC/HubertModel.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ public class HubertModel: Module {
309309
var x = feature_extractor(inputValues)
310310
x = feature_projection(x)
311311
x = encoder(x)
312-
x = final_proj(x)
312+
// x = final_proj(x) // Skip projection for RVC feature extraction
313313
return x
314314
}
315315
}

0 commit comments

Comments
 (0)