|
39 | 39 | extern bool ffmpeg_decode_audio(const std::string & ifname, std::vector<uint8_t> & wav_data); |
40 | 40 | #endif |
41 | 41 |
|
| 42 | +// extract f32 PCM frames from an initialized decoder, downmix to mono and keep the stereo split |
| 43 | +static bool read_audio_from_decoder(ma_decoder & decoder, std::vector<float> & pcmf32, std::vector<std::vector<float>> & pcmf32s, bool stereo) { |
| 44 | + ma_result result; |
| 45 | + ma_uint64 frame_count; |
| 46 | + ma_uint64 frames_read; |
| 47 | + |
| 48 | + if ((result = ma_decoder_get_length_in_pcm_frames(&decoder, &frame_count)) != MA_SUCCESS) { |
| 49 | + fprintf(stderr, "error: failed to retrieve the length of the audio data (%s)\n", ma_result_description(result)); |
| 50 | + return false; |
| 51 | + } |
| 52 | + |
| 53 | + pcmf32.resize(stereo ? frame_count*2 : frame_count); |
| 54 | + |
| 55 | + if ((result = ma_decoder_read_pcm_frames(&decoder, pcmf32.data(), frame_count, &frames_read)) != MA_SUCCESS) { |
| 56 | + fprintf(stderr, "error: failed to read the frames of the audio data (%s)\n", ma_result_description(result)); |
| 57 | + return false; |
| 58 | + } |
| 59 | + |
| 60 | + if (stereo) { |
| 61 | + std::vector<float> stereo_data = pcmf32; |
| 62 | + pcmf32.resize(frame_count); |
| 63 | + for (uint64_t i = 0; i < frame_count; i++) { |
| 64 | + pcmf32[i] = (stereo_data[2*i] + stereo_data[2*i + 1]); |
| 65 | + } |
| 66 | + pcmf32s.resize(2); |
| 67 | + pcmf32s[0].resize(frame_count); |
| 68 | + pcmf32s[1].resize(frame_count); |
| 69 | + for (uint64_t i = 0; i < frame_count; i++) { |
| 70 | + pcmf32s[0][i] = stereo_data[2*i]; |
| 71 | + pcmf32s[1][i] = stereo_data[2*i + 1]; |
| 72 | + } |
| 73 | + } |
| 74 | + |
| 75 | + return true; |
| 76 | +} |
| 77 | + |
42 | 78 | bool read_audio_data(const std::string & fname, std::vector<float>& pcmf32, std::vector<std::vector<float>>& pcmf32s, bool stereo) { |
43 | 79 | std::vector<uint8_t> audio_data; // used for pipe input from stdin or ffmpeg decoding output |
44 | 80 |
|
@@ -109,41 +145,22 @@ bool read_audio_data(const std::string & fname, std::vector<float>& pcmf32, std: |
109 | 145 | #endif |
110 | 146 | } |
111 | 147 |
|
112 | | - ma_uint64 frame_count; |
113 | | - ma_uint64 frames_read; |
114 | | - |
115 | | - if ((result = ma_decoder_get_length_in_pcm_frames(&decoder, &frame_count)) != MA_SUCCESS) { |
116 | | - fprintf(stderr, "error: failed to retrieve the length of the audio data (%s)\n", ma_result_description(result)); |
117 | | - |
118 | | - return false; |
119 | | - } |
120 | | - |
121 | | - pcmf32.resize(stereo ? frame_count*2 : frame_count); |
122 | | - |
123 | | - if ((result = ma_decoder_read_pcm_frames(&decoder, pcmf32.data(), frame_count, &frames_read)) != MA_SUCCESS) { |
124 | | - fprintf(stderr, "error: failed to read the frames of the audio data (%s)\n", ma_result_description(result)); |
125 | | - |
126 | | - return false; |
127 | | - } |
128 | | - |
129 | | - if (stereo) { |
130 | | - std::vector<float> stereo_data = pcmf32; |
131 | | - pcmf32.resize(frame_count); |
| 148 | + return read_audio_from_decoder(decoder.decoder, pcmf32, pcmf32s, stereo); |
| 149 | +} |
132 | 150 |
|
133 | | - for (uint64_t i = 0; i < frame_count; i++) { |
134 | | - pcmf32[i] = (stereo_data[2*i] + stereo_data[2*i + 1]); |
135 | | - } |
| 151 | +// decode audio bytes already held in memory |
| 152 | +bool read_audio_data(const char * buffer, size_t buffer_size, std::vector<float> & pcmf32, std::vector<std::vector<float>> & pcmf32s, bool stereo) { |
| 153 | + ma_decoder_config decoder_config = ma_decoder_config_init(ma_format_f32, stereo ? 2 : 1, WHISPER_SAMPLE_RATE); |
| 154 | + ma_decoder decoder; |
136 | 155 |
|
137 | | - pcmf32s.resize(2); |
138 | | - pcmf32s[0].resize(frame_count); |
139 | | - pcmf32s[1].resize(frame_count); |
140 | | - for (uint64_t i = 0; i < frame_count; i++) { |
141 | | - pcmf32s[0][i] = stereo_data[2*i]; |
142 | | - pcmf32s[1][i] = stereo_data[2*i + 1]; |
143 | | - } |
| 156 | + if (ma_decoder_init_memory(buffer, buffer_size, &decoder_config, &decoder) != MA_SUCCESS) { |
| 157 | + fprintf(stderr, "error: failed to decode audio data from memory buffer\n"); |
| 158 | + return false; |
144 | 159 | } |
145 | 160 |
|
146 | | - return true; |
| 161 | + bool ok = read_audio_from_decoder(decoder, pcmf32, pcmf32s, stereo); |
| 162 | + ma_decoder_uninit(&decoder); |
| 163 | + return ok; |
147 | 164 | } |
148 | 165 |
|
149 | 166 | // 500 -> 00:05.000 |
|
0 commit comments