1- // AudioManager_improved .cpp – full rewrite with tighter A/V synchronisation
1+ // AudioManager .cpp – full rewrite with tighter A/V synchronisation
22// -----------------------------------------------------------------------------
33// * Keeps the original public API so that existing call‑sites still compile.
44// * Uses an event‑driven render loop instead of busy‑wait polling where possible.
1616#include < algorithm>
1717#include < cmath>
1818#include < array>
19+ #include < mmreg.h>
20+ // WAVE_FORMAT_EXTENSIBLE sub-format GUIDs for volume scaling.
21+ // Defined inline to avoid pulling in <ks.h>/<ksmedia.h> which may conflict.
22+ static const GUID kSubtypePCM =
23+ {0x00000001 , 0x0000 , 0x0010 , {0x80 , 0x00 , 0x00 , 0xAA , 0x00 , 0x38 , 0x9B , 0x71 }};
24+ static const GUID kSubtypeIEEEFloat =
25+ {0x00000003 , 0x0000 , 0x0010 , {0x80 , 0x00 , 0x00 , 0xAA , 0x00 , 0x38 , 0x9B , 0x71 }};
1926
2027using namespace VideoPlayerUtils ;
2128
@@ -34,7 +41,7 @@ HRESULT InitWASAPI(VideoPlayerInstance* inst, const WAVEFORMATEX* srcFmt)
3441{
3542 if (!inst) return E_INVALIDARG;
3643
37- // Re‑use previously initialised client if still valid
44+ // Reuse previously initialized client if still valid
3845 if (inst->pAudioClient && inst->pRenderClient ) {
3946 inst->bAudioInitialized = TRUE ;
4047 return S_OK;
@@ -48,56 +55,69 @@ HRESULT InitWASAPI(VideoPlayerInstance* inst, const WAVEFORMATEX* srcFmt)
4855 if (!enumerator) return E_FAIL;
4956
5057 hr = enumerator->GetDefaultAudioEndpoint (eRender, eConsole, &inst->pDevice );
51- if (FAILED (hr)) return hr ;
58+ if (FAILED (hr)) goto fail ;
5259
5360 // 2. Activate IAudioClient + IAudioEndpointVolume
5461 hr = inst->pDevice ->Activate (__uuidof (IAudioClient), CLSCTX_ALL, nullptr ,
5562 reinterpret_cast <void **>(&inst->pAudioClient ));
56- if (FAILED (hr)) return hr ;
63+ if (FAILED (hr)) goto fail ;
5764
5865 hr = inst->pDevice ->Activate (__uuidof (IAudioEndpointVolume), CLSCTX_ALL, nullptr ,
5966 reinterpret_cast <void **>(&inst->pAudioEndpointVolume ));
60- if (FAILED (hr)) return hr ;
67+ if (FAILED (hr)) goto fail ;
6168
6269 // 3. Determine the format that will be rendered
6370 if (!srcFmt) {
6471 hr = inst->pAudioClient ->GetMixFormat (&deviceMixFmt);
65- if (FAILED (hr)) return hr ;
66- srcFmt = deviceMixFmt; // use mix format as fall‑back
72+ if (FAILED (hr)) goto fail ;
73+ srcFmt = deviceMixFmt;
6774 }
68- inst->pSourceAudioFormat = reinterpret_cast <WAVEFORMATEX*>(CoTaskMemAlloc (srcFmt->cbSize + sizeof (WAVEFORMATEX)));
75+ inst->pSourceAudioFormat = reinterpret_cast <WAVEFORMATEX*>(
76+ CoTaskMemAlloc (srcFmt->cbSize + sizeof (WAVEFORMATEX)));
77+ if (!inst->pSourceAudioFormat ) { hr = E_OUTOFMEMORY; goto fail; }
6978 memcpy (inst->pSourceAudioFormat , srcFmt, srcFmt->cbSize + sizeof (WAVEFORMATEX));
7079
71- // 4. Create (or re‑use ) the render‑ ready event
80+ // 4. Create (or reuse ) the render- ready event
7281 if (!inst->hAudioSamplesReadyEvent ) {
7382 inst->hAudioSamplesReadyEvent = CreateEvent (nullptr , FALSE , FALSE , nullptr );
7483 if (!inst->hAudioSamplesReadyEvent ) {
7584 hr = HRESULT_FROM_WIN32 (GetLastError ());
76- goto cleanup ;
85+ goto fail ;
7786 }
7887 }
7988
80- // 5. Initialise the audio client in shared, event‑ callback mode
89+ // 5. Initialize the audio client in shared, event- callback mode
8190 hr = inst->pAudioClient ->Initialize (AUDCLNT_SHAREMODE_SHARED,
8291 AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
83- kTargetBufferDuration100ns , // buffer dur
84- 0 , // periodicity → let system decide
92+ kTargetBufferDuration100ns ,
93+ 0 ,
8594 srcFmt,
8695 nullptr );
87- if (FAILED (hr)) goto cleanup ;
96+ if (FAILED (hr)) goto fail ;
8897
8998 hr = inst->pAudioClient ->SetEventHandle (inst->hAudioSamplesReadyEvent );
90- if (FAILED (hr)) goto cleanup ;
99+ if (FAILED (hr)) goto fail ;
91100
92- // 6. Grab the render‑ client service interface
101+ // 6. Grab the render- client service interface
93102 hr = inst->pAudioClient ->GetService (__uuidof (IAudioRenderClient),
94103 reinterpret_cast <void **>(&inst->pRenderClient ));
95- if (FAILED (hr)) goto cleanup ;
104+ if (FAILED (hr)) goto fail ;
96105
97106 inst->bAudioInitialized = TRUE ;
107+ if (deviceMixFmt) CoTaskMemFree (deviceMixFmt);
108+ return S_OK;
98109
99- cleanup:
110+ fail:
111+ // Release any partially-created COM objects so that CloseMedia does not
112+ // call methods (e.g. pAudioClient->Stop()) on an uninitialized client.
113+ if (inst->pRenderClient ) { inst->pRenderClient ->Release (); inst->pRenderClient = nullptr ; }
114+ if (inst->pAudioClient ) { inst->pAudioClient ->Release (); inst->pAudioClient = nullptr ; }
115+ if (inst->pAudioEndpointVolume ) { inst->pAudioEndpointVolume ->Release (); inst->pAudioEndpointVolume = nullptr ; }
116+ if (inst->pDevice ) { inst->pDevice ->Release (); inst->pDevice = nullptr ; }
117+ if (inst->pSourceAudioFormat ) { CoTaskMemFree (inst->pSourceAudioFormat ); inst->pSourceAudioFormat = nullptr ; }
118+ if (inst->hAudioSamplesReadyEvent ) { CloseHandle (inst->hAudioSamplesReadyEvent ); inst->hAudioSamplesReadyEvent = nullptr ; }
100119 if (deviceMixFmt) CoTaskMemFree (deviceMixFmt);
120+ inst->bAudioInitialized = FALSE ;
101121 return hr;
102122}
103123
@@ -165,7 +185,7 @@ DWORD WINAPI AudioThreadProc(LPVOID lpParam)
165185 LONGLONG elapsedMs = currentTimeMs - inst->llPlaybackStartTime - inst->llTotalPauseTime ;
166186
167187 // Apply playback speed to elapsed time
168- double adjustedElapsedMs = elapsedMs * inst->playbackSpeed ;
188+ double adjustedElapsedMs = elapsedMs * inst->playbackSpeed . load (std::memory_order_relaxed) ;
169189
170190 // Convert sample timestamp from 100ns units to milliseconds
171191 double sampleTimeMs = ts100n / 10000.0 ;
@@ -217,18 +237,44 @@ DWORD WINAPI AudioThreadProc(LPVOID lpParam)
217237 const BYTE* chunkStart = srcData + (offsetFrames * blockAlign);
218238 memcpy (dstData, chunkStart, framesWanted * blockAlign);
219239
220- // Apply per‑instance volume in‑place (16‑bit PCM or IEEE‑float)
221- if (inst->instanceVolume < 0 .999f ) {
222- if (inst->pSourceAudioFormat ->wFormatTag == WAVE_FORMAT_PCM &&
223- inst->pSourceAudioFormat ->wBitsPerSample == 16 ) {
240+ // Apply per-instance volume in-place.
241+ // Supports PCM 16-bit, PCM 24-bit, IEEE float 32-bit, and
242+ // WAVE_FORMAT_EXTENSIBLE wrappers around those sub-formats.
243+ const float vol = inst->instanceVolume .load (std::memory_order_relaxed);
244+ if (vol < 0 .999f ) {
245+ WORD formatTag = inst->pSourceAudioFormat ->wFormatTag ;
246+ WORD bitsPerSample = inst->pSourceAudioFormat ->wBitsPerSample ;
247+
248+ // Unwrap WAVE_FORMAT_EXTENSIBLE to the actual sub-format
249+ if (formatTag == WAVE_FORMAT_EXTENSIBLE && inst->pSourceAudioFormat ->cbSize >= 22 ) {
250+ auto * ext = reinterpret_cast <WAVEFORMATEXTENSIBLE*>(inst->pSourceAudioFormat );
251+ if (ext->SubFormat == kSubtypePCM )
252+ formatTag = WAVE_FORMAT_PCM;
253+ else if (ext->SubFormat == kSubtypeIEEEFloat )
254+ formatTag = WAVE_FORMAT_IEEE_FLOAT;
255+ }
256+
257+ if (formatTag == WAVE_FORMAT_PCM && bitsPerSample == 16 ) {
224258 auto * s = reinterpret_cast <int16_t *>(dstData);
225259 size_t n = (framesWanted * blockAlign) / sizeof (int16_t );
226- for (size_t i = 0 ; i < n; ++i) s[i] = static_cast <int16_t >(s[i] * inst->instanceVolume );
227- } else if (inst->pSourceAudioFormat ->wFormatTag == WAVE_FORMAT_IEEE_FLOAT &&
228- inst->pSourceAudioFormat ->wBitsPerSample == 32 ) {
260+ for (size_t i = 0 ; i < n; ++i)
261+ s[i] = static_cast <int16_t >(s[i] * vol);
262+ } else if (formatTag == WAVE_FORMAT_PCM && bitsPerSample == 24 ) {
263+ // 24-bit PCM: 3 bytes per sample, little-endian
264+ size_t totalBytes = framesWanted * blockAlign;
265+ for (size_t i = 0 ; i + 2 < totalBytes; i += 3 ) {
266+ int32_t sample = static_cast <int8_t >(dstData[i + 2 ]);
267+ sample = (sample << 8 ) | dstData[i + 1 ];
268+ sample = (sample << 8 ) | dstData[i];
269+ sample = static_cast <int32_t >(sample * vol);
270+ dstData[i] = static_cast <BYTE>(sample & 0xFF );
271+ dstData[i + 1 ] = static_cast <BYTE>((sample >> 8 ) & 0xFF );
272+ dstData[i + 2 ] = static_cast <BYTE>((sample >> 16 ) & 0xFF );
273+ }
274+ } else if (formatTag == WAVE_FORMAT_IEEE_FLOAT && bitsPerSample == 32 ) {
229275 auto * s = reinterpret_cast <float *>(dstData);
230276 size_t n = (framesWanted * blockAlign) / sizeof (float );
231- for (size_t i = 0 ; i < n; ++i) s[i] *= inst-> instanceVolume ;
277+ for (size_t i = 0 ; i < n; ++i) s[i] *= vol ;
232278 }
233279 }
234280
@@ -296,14 +342,14 @@ void StopAudioThread(VideoPlayerInstance* inst)
296342HRESULT SetVolume (VideoPlayerInstance* inst, float vol)
297343{
298344 if (!inst) return E_INVALIDARG;
299- inst->instanceVolume = std::clamp (vol, 0 .0f , 1 .0f );
345+ inst->instanceVolume . store ( std::clamp (vol, 0 .0f , 1 .0f ), std::memory_order_relaxed );
300346 return S_OK;
301347}
302348
303349HRESULT GetVolume (const VideoPlayerInstance* inst, float * out)
304350{
305351 if (!inst || !out) return E_INVALIDARG;
306- *out = inst->instanceVolume ;
352+ *out = inst->instanceVolume . load (std::memory_order_relaxed) ;
307353 return S_OK;
308354}
309355
0 commit comments