|
9 | 9 | #from .model_functions import separate_sources |
10 | 10 |
|
11 | 11 | import soundfile as sf |
12 | | - |
| 12 | +import numpy as np |
13 | 13 | from magnolia.dnnseparate.L41model import L41Model |
14 | 14 | from magnolia.dnnseparate.deep_clustering_model import DeepClusteringModel |
15 | 15 | #from .deep_clustering_models import DeepClusteringModel |
16 | 16 | #from .clustering_utils import clustering_separate |
17 | | -from magnolia.utils.clustering_utils import clustering_separate |
| 17 | +from magnolia.utils.clustering_utils import clustering_separate,preprocess_signal |
18 | 18 | #from .l41_models import L41Model |
19 | | - |
| 19 | +from magnolia.factorization.nmf import easy_nmf_separate |
| 20 | +from magnolia.features.preprocessing import undo_preemphasis,make_stft_features |
| 21 | +from magnolia.features.spectral_features import istft |
| 22 | +from magnolia.utils.postprocessing import reconstruct |
20 | 23 | ''' |
21 | 24 | Input: noisy signal path |
22 | 25 | Output: list of separated speakers (in numpy form) |
23 | 26 | ''' |
24 | 27 |
|
25 | 28 | '''def tflow_separate(input_path): |
26 | | - |
27 | | - rate = 10000 |
| 29 | + |
| 30 | + rate = 10000 |
28 | 31 |
|
29 | | - wav_list = [] |
| 32 | + wav_list = [] |
30 | 33 |
|
31 | | - model = Conv1DModel([None,None,251,1],[None,None,251,2],20,600,50) |
32 | | - model.load(app.root_path + '/static/models/better_cnn-mask-model.ckpt') |
| 34 | + model = Conv1DModel([None,None,251,1],[None,None,251,2],20,600,50) |
| 35 | + model.load(app.root_path + '/static/models/better_cnn-mask-model.ckpt') |
33 | 36 |
|
34 | | - outputs = separate_sources(input_path,model) |
35 | | - for row in outputs: |
36 | | - wav_list.append(row) |
| 37 | + outputs = separate_sources(input_path,model) |
| 38 | + for row in outputs: |
| 39 | + wav_list.append(row) |
37 | 40 |
|
38 | | - return wav_list |
| 41 | + return wav_list |
39 | 42 | ''' |
40 | 43 | ''' |
41 | 44 | Input: noisy signal path |
42 | 45 | Output: list of separated speakers (in numpy form) |
43 | 46 | ''' |
44 | 47 | def deep_cluster_separate(input_path): |
45 | 48 |
|
46 | | - |
47 | | - wav_list = [] |
48 | | - |
49 | | - input_signal,sample_rate = sf.read(input_path) |
| 49 | + |
| 50 | + wav_list = [] |
50 | 51 |
|
51 | | - model = DeepClusteringModel() |
52 | | - model.load(app.root_path + '/static/models/deep_clustering.ckpt') |
| 52 | + input_signal,sample_rate = sf.read(input_path) |
53 | 53 |
|
54 | | - outputs = clustering_separate(input_signal,sample_rate,model,2) |
55 | | - for row in outputs: |
56 | | - wav_list.append(row) |
| 54 | + model = DeepClusteringModel() |
| 55 | + model.load(app.root_path + '/static/models/deep_clustering.ckpt') |
57 | 56 |
|
58 | | - return wav_list |
| 57 | + outputs = clustering_separate(input_signal,sample_rate,model,2) |
| 58 | + for row in outputs: |
| 59 | + wav_list.append(row) |
59 | 60 |
|
| 61 | + return wav_list |
60 | 62 |
|
61 | 63 |
|
62 | 64 | ''' |
63 | 65 | Input: noisy signal path |
64 | 66 | Output: list of separated speakers (in numpy form) |
65 | 67 | ''' |
66 | 68 | def l41_separate(input_path): |
67 | | - wav_list = [] |
| 69 | + wav_list = [] |
| 70 | + |
| 71 | + input_signal,sample_rate = sf.read(input_path) |
| 72 | + |
| 73 | + model = L41Model(nonlinearity='tanh', normalize=False) |
| 74 | + model.load(app.root_path + '/static/models/lab41_nonorm-final.ckpt') |
| 75 | + |
| 76 | + outputs = clustering_separate(input_signal,sample_rate,model,2) |
| 77 | + for row in outputs: |
| 78 | + wav_list.append(row) |
| 79 | + |
| 80 | + return wav_list |
| 81 | + |
| 82 | +def nmf_sep(input_path): |
| 83 | + |
| 84 | + wav_list = [] |
| 85 | + |
| 86 | + input_signal,sample_rate = sf.read(input_path) |
| 87 | + |
| 88 | + # Preprocess the signal into an input feature |
| 89 | + #spectrogram, X_in = preprocess_signal(input_signal, sample_rate) |
| 90 | + spectrogram = make_stft_features(input_signal,sample_rate) |
| 91 | + |
| 92 | + print("Shape of spect", spectrogram.shape) |
| 93 | + |
| 94 | + easy_sep = easy_nmf_separate(spectrogram) |
| 95 | + print("easy sep", easy_sep[:10]) |
| 96 | + |
| 97 | + |
| 98 | + separated_speakers = np.square(easy_sep) |
| 99 | + phases = np.unwrap(np.angle(spectrogram)) |
| 100 | + |
| 101 | + print("Shape of sep speakers", separated_speakers.shape) |
| 102 | + |
| 103 | + # Invert the STFT to recover the output waveforms, remembering to undo the |
| 104 | + # preemphasis |
| 105 | + print("Lil phase's", phases[:10]) |
| 106 | + '''waveforms = [] |
| 107 | + for i in range(2): |
| 108 | + waveform = istft(separated_speakers[i]*np.exp(phases*1.0j), 1e4, None, 0.0256, two_sided=False,fft_size=512) |
| 109 | + unemphasized = undo_preemphasis(waveform) |
| 110 | + waveforms.append(unemphasized) |
| 111 | +
|
| 112 | + sources = np.stack(waveforms) |
68 | 113 |
|
69 | | - input_signal,sample_rate = sf.read(input_path) |
| 114 | + for row in sources: |
| 115 | + wav_list.append(row) |
| 116 | + ''' |
| 117 | + for i in range(2): |
| 118 | + recon = reconstruct(separated_speakers[i],phases,fs=10000, window_size=None, step_size=0.0256, square=True, preemphasis=0) |
| 119 | + print("Type of stuffs",type(recon)) |
| 120 | + print("Recon shape",recon.shape) |
| 121 | + wav_list.append(recon) |
70 | 122 |
|
71 | | - model = L41Model(nonlinearity='tanh', normalize=False) |
72 | | - model.load(app.root_path + '/static/models/lab41_nonorm-final.ckpt') |
| 123 | + return wav_list |
73 | 124 |
|
74 | | - outputs = clustering_separate(input_signal,sample_rate,model,2) |
75 | | - for row in outputs: |
76 | | - wav_list.append(row) |
77 | 125 |
|
78 | | - return wav_list |
|
0 commit comments