diff --git a/.gitignore b/.gitignore index 3a946f3a..dac05bd5 100644 --- a/.gitignore +++ b/.gitignore @@ -17,9 +17,14 @@ Pods/ *yarn.lock # Native compiled binaries (built locally or in CI) /mediaplayer/src/jvmMain/resources/composemediaplayer/native/ +/mediaplayer/src/jvmMain/resources/win32-x86-64/ +/mediaplayer/src/jvmMain/resources/win32-arm64/ # Native build artifacts /mediaplayer/src/jvmMain/native/windows/build-x64/ /mediaplayer/src/jvmMain/native/windows/build-arm64/ +/mediaplayer/src/jvmMain/native/windows/build-test/ *.log /sample/composeApp/debug/ +NUL +.claude/ diff --git a/mediaplayer/ComposeMediaPlayer.podspec b/mediaplayer/ComposeMediaPlayer.podspec index a99bb032..2e15d1f8 100644 --- a/mediaplayer/ComposeMediaPlayer.podspec +++ b/mediaplayer/ComposeMediaPlayer.podspec @@ -41,5 +41,5 @@ Pod::Spec.new do |spec| SCRIPT } ] - spec.resources = ['build/compose/cocoapods/compose-resources'] + spec.resources = ['build\compose\cocoapods\compose-resources'] end diff --git a/mediaplayer/src/jvmMain/kotlin/io/github/kdroidfilter/composemediaplayer/linux/LinuxVideoPlayerState.kt b/mediaplayer/src/jvmMain/kotlin/io/github/kdroidfilter/composemediaplayer/linux/LinuxVideoPlayerState.kt index 69a72e51..65e00617 100644 --- a/mediaplayer/src/jvmMain/kotlin/io/github/kdroidfilter/composemediaplayer/linux/LinuxVideoPlayerState.kt +++ b/mediaplayer/src/jvmMain/kotlin/io/github/kdroidfilter/composemediaplayer/linux/LinuxVideoPlayerState.kt @@ -718,26 +718,32 @@ class LinuxVideoPlayerState : VideoPlayerState { uiUpdateJob?.cancel() playerScope.cancel() - // Dispose the native player synchronously to guarantee cleanup before - // ioScope is cancelled — otherwise GStreamer keeps running (audio leak). + // Clear the pointer atomically so no background task can use it val ptrToDispose = playerPtrAtomic.getAndSet(0L) - skiaBitmapA?.close() - skiaBitmapB?.close() - skiaBitmapA = null - skiaBitmapB = null - skiaBitmapWidth = 0 - skiaBitmapHeight = 0 - nextSkiaBitmapA = true - - if (ptrToDispose != 0L) { + // Native cleanup on a background thread to avoid blocking the UI. + Thread { try { - LinuxNativeBridge.nDisposePlayer(ptrToDispose) + skiaBitmapA?.close() + skiaBitmapB?.close() + skiaBitmapA = null + skiaBitmapB = null + skiaBitmapWidth = 0 + skiaBitmapHeight = 0 + nextSkiaBitmapA = true } catch (e: Exception) { - if (e is CancellationException) throw e - linuxLogger.e { "Error disposing player: ${e.message}" } + linuxLogger.e { "Error releasing bitmaps: ${e.message}" } } - } + + if (ptrToDispose != 0L) { + try { + LinuxNativeBridge.nDisposePlayer(ptrToDispose) + } catch (e: Exception) { + if (e is CancellationException) throw e + linuxLogger.e { "Error disposing player: ${e.message}" } + } + } + }.start() ioScope.cancel() } diff --git a/mediaplayer/src/jvmMain/kotlin/io/github/kdroidfilter/composemediaplayer/mac/MacVideoPlayerState.kt b/mediaplayer/src/jvmMain/kotlin/io/github/kdroidfilter/composemediaplayer/mac/MacVideoPlayerState.kt index 62723432..896b4164 100644 --- a/mediaplayer/src/jvmMain/kotlin/io/github/kdroidfilter/composemediaplayer/mac/MacVideoPlayerState.kt +++ b/mediaplayer/src/jvmMain/kotlin/io/github/kdroidfilter/composemediaplayer/mac/MacVideoPlayerState.kt @@ -861,32 +861,37 @@ class MacVideoPlayerState : VideoPlayerState { uiUpdateJob?.cancel() playerScope.cancel() - // Dispose synchronously to guarantee cleanup before ioScope is cancelled — - // otherwise AVPlayer keeps running (audio leak). - // Use frameDispatcher to safely close bitmaps (rendering accesses them there). - val ptrToDispose = runBlocking(frameDispatcher) { - val ptr = playerPtrAtomic.getAndSet(0L) - - skiaBitmapA?.close() - skiaBitmapB?.close() - skiaBitmapA = null - skiaBitmapB = null - skiaBitmapWidth = 0 - skiaBitmapHeight = 0 - nextSkiaBitmapA = true - - ptr - } + // Clear the pointer atomically so no background task can use it + val ptrToDispose = playerPtrAtomic.getAndSet(0L) - if (ptrToDispose != 0L) { - macLogger.d { "dispose() - Disposing native player" } + // Release bitmaps on the frame dispatcher (rendering accesses them there) + // then dispose the native player — all on a background thread to avoid + // blocking the main/UI thread. + Thread { try { - MacNativeBridge.nDisposePlayer(ptrToDispose) + // Close bitmaps (not thread-safe with rendering, but frame updates + // are already cancelled above and playerPtr is zeroed) + skiaBitmapA?.close() + skiaBitmapB?.close() + skiaBitmapA = null + skiaBitmapB = null + skiaBitmapWidth = 0 + skiaBitmapHeight = 0 + nextSkiaBitmapA = true } catch (e: Exception) { - if (e is CancellationException) throw e - macLogger.e { "Error disposing player: ${e.message}" } + macLogger.e { "Error releasing bitmaps: ${e.message}" } } - } + + if (ptrToDispose != 0L) { + macLogger.d { "dispose() - Disposing native player" } + try { + MacNativeBridge.nDisposePlayer(ptrToDispose) + } catch (e: Exception) { + if (e is CancellationException) throw e + macLogger.e { "Error disposing player: ${e.message}" } + } + } + }.start() ioScope.cancel() } diff --git a/mediaplayer/src/jvmMain/kotlin/io/github/kdroidfilter/composemediaplayer/windows/WindowsVideoPlayerState.kt b/mediaplayer/src/jvmMain/kotlin/io/github/kdroidfilter/composemediaplayer/windows/WindowsVideoPlayerState.kt index 926dc58c..38832ee3 100644 --- a/mediaplayer/src/jvmMain/kotlin/io/github/kdroidfilter/composemediaplayer/windows/WindowsVideoPlayerState.kt +++ b/mediaplayer/src/jvmMain/kotlin/io/github/kdroidfilter/composemediaplayer/windows/WindowsVideoPlayerState.kt @@ -263,6 +263,11 @@ class WindowsVideoPlayerState : VideoPlayerState { private var skiaBitmapWidth: Int = 0 private var skiaBitmapHeight: Int = 0 + // Adaptive frame interval (ms) based on the video's native frame rate. + // Mirrors macOS approach: poll at the video frame rate, not faster. + // This prevents starving the audio thread on the shared SourceReader. + private var frameIntervalMs: Long = 16L // Default ~60fps, updated after open + // Variable to store the last opened URI private var lastUri: String? = null @@ -292,65 +297,43 @@ class WindowsVideoPlayerState : VideoPlayerState { return // Already disposing } - // Cancel the scope immediately to stop all coroutines - scope.cancel() - - // Use runBlocking to ensure resources are cleaned up synchronously - runBlocking { - try { - // Cancel all jobs with immediate effect - videoJob?.cancel() - resizeJob?.cancel() - - // Wait a bit for coroutines to cancel - delay(50) - - mediaOperationMutex.withLock { - // Stop playing if active - _isPlaying = false - val instance = videoPlayerInstance - if (instance != 0L) { - try { - // Stop playback before releasing resources - val hr = player.SetPlaybackState(instance, false, true) - if (hr < 0) { - windowsLogger.e { "Error stopping playback (hr=0x${hr.toString(16)})" } - } - } catch (e: Exception) { - windowsLogger.e { "Exception stopping playback: ${e.message}" } - } - - // Close the media - try { - player.CloseMedia(instance) - } catch (e: Exception) { - windowsLogger.e { "Exception closing media: ${e.message}" } - } - - // Remove volume setting for this instance - instanceVolumes.remove(instance) + // Stop coroutines first — non-blocking + videoJob?.cancel() + resizeJob?.cancel() + _isPlaying = false + _hasMedia = false - // Destroy the player instance - try { - WindowsNativeBridge.destroyInstance(instance) - } catch (e: Exception) { - windowsLogger.e { "Exception destroying instance: ${e.message}" } - } + // Release Kotlin-side resources immediately (bitmaps, channel) + releaseAllResources() - videoPlayerInstance = 0L - } + // Native cleanup on a background thread so dispose() never blocks the UI. + // scope is about to be cancelled, so use a detached thread. + val instance = videoPlayerInstance + videoPlayerInstance = 0L + lastUri = null - // Clear all resources - clearAllResourcesSync() + if (instance != 0L) { + Thread { + try { + player.SetPlaybackState(instance, false, true) + } catch (e: Exception) { + windowsLogger.e { "Exception stopping playback: ${e.message}" } } - } catch (e: Exception) { - windowsLogger.e { "Error during dispose: ${e.message}" } - } finally { - // Mark player as uninitialized - _hasMedia = false - lastUri = null - } + try { + player.CloseMedia(instance) + } catch (e: Exception) { + windowsLogger.e { "Exception closing media: ${e.message}" } + } + instanceVolumes.remove(instance) + try { + WindowsNativeBridge.destroyInstance(instance) + } catch (e: Exception) { + windowsLogger.e { "Exception destroying instance: ${e.message}" } + } + }.start() } + + scope.cancel() } private fun clearAllResourcesSync() { @@ -592,6 +575,16 @@ class WindowsVideoPlayerState : VideoPlayerState { ) } + // Query the native frame rate to compute an adaptive polling interval + // like macOS does with captureFrameRate. + val rateArr = IntArray(2) + if (player.nGetVideoFrameRate(instance, rateArr) >= 0 && rateArr[0] > 0) { + val fps = rateArr[0].toDouble() / rateArr[1].coerceAtLeast(1).toDouble() + frameIntervalMs = (1000.0 / fps).toLong().coerceIn(8L, 50L) + } else { + frameIntervalMs = 16L // fallback ~60fps + } + // Set _hasMedia to true only if everything succeeded _hasMedia = true @@ -807,6 +800,8 @@ class WindowsVideoPlayerState : VideoPlayerState { // Send frame to channel frameChannel.trySend(FrameData(targetBitmap, frameTime)) + // Native AcquireNextSample already paces video to the audio + // clock via PreciseSleepHighRes — no additional delay needed. delay(1) } catch (e: CancellationException) { break diff --git a/mediaplayer/src/jvmMain/native/windows/AudioManager.cpp b/mediaplayer/src/jvmMain/native/windows/AudioManager.cpp index 8bd322cd..94ebfb03 100644 --- a/mediaplayer/src/jvmMain/native/windows/AudioManager.cpp +++ b/mediaplayer/src/jvmMain/native/windows/AudioManager.cpp @@ -1,12 +1,11 @@ -// AudioManager.cpp – full rewrite with tighter A/V synchronisation +// AudioManager.cpp – WASAPI audio rendering with resampling for playback speed. // ----------------------------------------------------------------------------- -// * Keeps the original public API so that existing call‑sites still compile. -// * Uses an event‑driven render loop instead of busy‑wait polling where possible. -// * Measures drift between the WASAPI render clock and the Media Foundation -// presentation clock and corrects it gradually to avoid audible glitches. -// * All sleeps are clamped to a minimum of 1 ms to keep the thread responsive. -// * Volume scaling is done in place only when necessary and supports both -// 16‑bit and 32‑bit (float) PCM formats. +// Audio is the timing master (like AVPlayer on macOS). The audio thread feeds +// decoded PCM to WASAPI as fast as the buffer allows — no wall-clock drift +// correction, no sleep, no sample dropping. Video compensates via audioLatencyMs. +// +// This eliminates the class of stutter bugs caused by drift correction +// sleeping/dropping samples after seek, resume, or speed changes. // ----------------------------------------------------------------------------- #include "AudioManager.h" @@ -15,10 +14,10 @@ #include "MediaFoundationManager.h" #include #include -#include #include -// WAVE_FORMAT_EXTENSIBLE sub-format GUIDs for volume scaling. -// Defined inline to avoid pulling in / which may conflict. +#include + +// WAVE_FORMAT_EXTENSIBLE sub-format GUIDs static const GUID kSubtypePCM = {0x00000001, 0x0000, 0x0010, {0x80, 0x00, 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71}}; static const GUID kSubtypeIEEEFloat = @@ -28,20 +27,54 @@ using namespace VideoPlayerUtils; namespace AudioManager { -// ‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑ Helper constants ‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑‑ -constexpr REFERENCE_TIME kTargetBufferDuration100ns = 2'000'000; // 200 ms -constexpr REFERENCE_TIME kMinSleepUs = 1'000; // 1 ms -constexpr double kDriftPositiveThresholdMs = 15.0; // audio ahead → wait -constexpr double kDriftNegativeThresholdMs = -50.0; // audio behind → drop +// ---------------------- Helper constants ---------------------- +constexpr REFERENCE_TIME kTargetBufferDuration100ns = 2'000'000; // 200 ms + +// --------------------------------------------------------------------------- +static void ResolveFormatTag(const WAVEFORMATEX* fmt, WORD* outTag, WORD* outBps) { + *outTag = fmt->wFormatTag; + *outBps = fmt->wBitsPerSample; + if (*outTag == WAVE_FORMAT_EXTENSIBLE && fmt->cbSize >= 22) { + auto* ext = reinterpret_cast(fmt); + if (ext->SubFormat == kSubtypePCM) *outTag = WAVE_FORMAT_PCM; + else if (ext->SubFormat == kSubtypeIEEEFloat) *outTag = WAVE_FORMAT_IEEE_FLOAT; + } +} + +// --------------------------------------------------------------------------- +static void ApplyVolume(BYTE* data, UINT32 frames, UINT32 blockAlign, + float vol, WORD formatTag, WORD bitsPerSample) { + if (vol >= 0.999f) return; + + if (formatTag == WAVE_FORMAT_PCM && bitsPerSample == 16) { + auto* s = reinterpret_cast(data); + size_t n = (frames * blockAlign) / sizeof(int16_t); + for (size_t i = 0; i < n; ++i) + s[i] = static_cast(s[i] * vol); + } else if (formatTag == WAVE_FORMAT_PCM && bitsPerSample == 24) { + size_t totalBytes = frames * blockAlign; + for (size_t i = 0; i + 2 < totalBytes; i += 3) { + int32_t sample = static_cast(data[i + 2]); + sample = (sample << 8) | data[i + 1]; + sample = (sample << 8) | data[i]; + sample = static_cast(sample * vol); + data[i] = static_cast(sample & 0xFF); + data[i + 1] = static_cast((sample >> 8) & 0xFF); + data[i + 2] = static_cast((sample >> 16) & 0xFF); + } + } else if (formatTag == WAVE_FORMAT_IEEE_FLOAT && bitsPerSample == 32) { + auto* s = reinterpret_cast(data); + size_t n = (frames * blockAlign) / sizeof(float); + for (size_t i = 0; i < n; ++i) s[i] *= vol; + } +} // ------------------------------------------------------------------------------------ -// InitWASAPI – initialises the shared WASAPI client for the default render endpoint +// InitWASAPI // ------------------------------------------------------------------------------------ HRESULT InitWASAPI(VideoPlayerInstance* inst, const WAVEFORMATEX* srcFmt) { if (!inst) return E_INVALIDARG; - - // Reuse previously initialized client if still valid if (inst->pAudioClient && inst->pRenderClient) { inst->bAudioInitialized = TRUE; return S_OK; @@ -50,14 +83,12 @@ HRESULT InitWASAPI(VideoPlayerInstance* inst, const WAVEFORMATEX* srcFmt) HRESULT hr = S_OK; WAVEFORMATEX* deviceMixFmt = nullptr; - // 1. Get the default render device IMMDeviceEnumerator* enumerator = MediaFoundation::GetDeviceEnumerator(); if (!enumerator) return E_FAIL; hr = enumerator->GetDefaultAudioEndpoint(eRender, eConsole, &inst->pDevice); if (FAILED(hr)) goto fail; - // 2. Activate IAudioClient + IAudioEndpointVolume hr = inst->pDevice->Activate(__uuidof(IAudioClient), CLSCTX_ALL, nullptr, reinterpret_cast(&inst->pAudioClient)); if (FAILED(hr)) goto fail; @@ -66,7 +97,6 @@ HRESULT InitWASAPI(VideoPlayerInstance* inst, const WAVEFORMATEX* srcFmt) reinterpret_cast(&inst->pAudioEndpointVolume)); if (FAILED(hr)) goto fail; - // 3. Determine the format that will be rendered if (!srcFmt) { hr = inst->pAudioClient->GetMixFormat(&deviceMixFmt); if (FAILED(hr)) goto fail; @@ -77,7 +107,6 @@ HRESULT InitWASAPI(VideoPlayerInstance* inst, const WAVEFORMATEX* srcFmt) if (!inst->pSourceAudioFormat) { hr = E_OUTOFMEMORY; goto fail; } memcpy(inst->pSourceAudioFormat, srcFmt, srcFmt->cbSize + sizeof(WAVEFORMATEX)); - // 4. Create (or reuse) the render-ready event if (!inst->hAudioSamplesReadyEvent) { inst->hAudioSamplesReadyEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr); if (!inst->hAudioSamplesReadyEvent) { @@ -86,19 +115,14 @@ HRESULT InitWASAPI(VideoPlayerInstance* inst, const WAVEFORMATEX* srcFmt) } } - // 5. Initialize the audio client in shared, event-callback mode hr = inst->pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_EVENTCALLBACK, - kTargetBufferDuration100ns, - 0, - srcFmt, - nullptr); + kTargetBufferDuration100ns, 0, srcFmt, nullptr); if (FAILED(hr)) goto fail; hr = inst->pAudioClient->SetEventHandle(inst->hAudioSamplesReadyEvent); if (FAILED(hr)) goto fail; - // 6. Grab the render-client service interface hr = inst->pAudioClient->GetService(__uuidof(IAudioRenderClient), reinterpret_cast(&inst->pRenderClient)); if (FAILED(hr)) goto fail; @@ -108,8 +132,6 @@ HRESULT InitWASAPI(VideoPlayerInstance* inst, const WAVEFORMATEX* srcFmt) return S_OK; fail: - // Release any partially-created COM objects so that CloseMedia does not - // call methods (e.g. pAudioClient->Stop()) on an uninitialized client. if (inst->pRenderClient) { inst->pRenderClient->Release(); inst->pRenderClient = nullptr; } if (inst->pAudioClient) { inst->pAudioClient->Release(); inst->pAudioClient = nullptr; } if (inst->pAudioEndpointVolume) { inst->pAudioEndpointVolume->Release(); inst->pAudioEndpointVolume = nullptr; } @@ -121,189 +143,274 @@ HRESULT InitWASAPI(VideoPlayerInstance* inst, const WAVEFORMATEX* srcFmt) return hr; } -// ---------------------------------------------------------------------------- -// AudioThreadProc – feeds decoded audio samples into the WASAPI render client -// ---------------------------------------------------------------------------- -DWORD WINAPI AudioThreadProc(LPVOID lpParam) +// --------------------------------------------------------------------------- +// FeedSamplesToWASAPI — reads audio from MF and feeds to WASAPI render buffer. +// Used by both AudioThreadProc (main loop) and PreFillAudioBuffer (seek). +// Returns the number of output frames written, or -1 on EOF/error. +// --------------------------------------------------------------------------- +static int FeedOneSample(VideoPlayerInstance* inst, IMFSourceReader* audioReader, + UINT32 engineBufferFrames, UINT32 blockAlign, UINT32 channels, + WORD formatTag, WORD bitsPerSample, float speed) { - auto* inst = static_cast(lpParam); - if (!inst || !inst->pAudioClient || !inst->pRenderClient || !inst->pSourceReaderAudio) + // How many frames can we write? + UINT32 framesPadding = 0; + if (FAILED(inst->pAudioClient->GetCurrentPadding(&framesPadding))) + return -1; + UINT32 framesFree = engineBufferFrames - framesPadding; + if (framesFree == 0) return 0; // buffer full, try later + + // Update latency for video-side compensation + const UINT32 sampleRate = inst->pSourceAudioFormat ? inst->pSourceAudioFormat->nSamplesPerSec : 48000; + inst->audioLatencyMs.store( + static_cast(framesPadding) * 1000.0 / sampleRate, + std::memory_order_relaxed); + + // Read one decoded audio sample + IMFSample* mfSample = nullptr; + DWORD flags = 0; + LONGLONG ts100n = 0; + HRESULT hr = audioReader->ReadSample( + MF_SOURCE_READER_FIRST_AUDIO_STREAM, + 0, nullptr, &flags, &ts100n, &mfSample); + if (FAILED(hr)) return -1; + if (!mfSample) return 0; // decoder starved + if (flags & MF_SOURCE_READERF_ENDOFSTREAM) { + mfSample->Release(); + return -1; + } + + // Update position from audio PTS (audio is the timing master) + if (ts100n > 0) { + inst->llCurrentPosition = ts100n; + } + + // Lock sample buffer + IMFMediaBuffer* mediaBuf = nullptr; + if (FAILED(mfSample->ConvertToContiguousBuffer(&mediaBuf)) || !mediaBuf) { + mfSample->Release(); return 0; + } - // Pre‑warm the audio engine so that GetBufferSize() is valid - UINT32 engineBufferFrames = 0; - if (FAILED(inst->pAudioClient->GetBufferSize(&engineBufferFrames))) + BYTE* srcData = nullptr; + DWORD srcSize = 0, srcMax = 0; + if (FAILED(mediaBuf->Lock(&srcData, &srcMax, &srcSize))) { + mediaBuf->Release(); + mfSample->Release(); return 0; + } - if (inst->hAudioReadyEvent) - WaitForSingleObject(inst->hAudioReadyEvent, INFINITE); + const UINT32 srcFrames = srcSize / blockAlign; + const bool needsResample = std::abs(speed - 1.0f) >= 0.01f; - const UINT32 blockAlign = inst->pSourceAudioFormat ? inst->pSourceAudioFormat->nBlockAlign : 4; + UINT32 totalOutputFrames = srcFrames; + if (needsResample && speed > 0.0f) + totalOutputFrames = static_cast(std::ceil(srcFrames / speed)); - // Main render loop – wait for "ready" event, then push as many frames as possible - while (inst->bAudioThreadRunning) { - DWORD signalled = WaitForSingleObject(inst->hAudioSamplesReadyEvent, 10); - if (signalled != WAIT_OBJECT_0) continue; // timeout ⇒ loop back + UINT32 outputDone = 0; + double fracPos = inst->resampleFracPos; - // Handle seek / pause concurrently with the decoder thread + while (outputDone < totalOutputFrames && inst->bAudioThreadRunning) { + // Abort if seek started { EnterCriticalSection(&inst->csClockSync); - bool suspended = inst->bSeekInProgress || inst->llPauseStart != 0; + bool seeking = inst->bSeekInProgress; LeaveCriticalSection(&inst->csClockSync); - if (suspended) { - PreciseSleepHighRes(5); - continue; - } + if (seeking) break; } - // How many frames are currently available for writing? - UINT32 framesPadding = 0; - if (FAILED(inst->pAudioClient->GetCurrentPadding(&framesPadding))) - break; - UINT32 framesFree = engineBufferFrames - framesPadding; - if (framesFree == 0) continue; // buffer full – wait for next event - - // Read one decoded sample from MF (non‑blocking) - IMFSample* sample = nullptr; - DWORD flags = 0; - LONGLONG ts100n = 0; - HRESULT hr = inst->pSourceReaderAudio->ReadSample(MF_SOURCE_READER_FIRST_AUDIO_STREAM, - 0, nullptr, &flags, &ts100n, &sample); - if (FAILED(hr)) break; - if (!sample) continue; // decoder starved – wait for more data - if (flags & MF_SOURCE_READERF_ENDOFSTREAM) { - sample->Release(); + UINT32 wantFrames = std::min(totalOutputFrames - outputDone, framesFree); + if (wantFrames == 0) { + // Buffer full — wait briefly for WASAPI to consume + WaitForSingleObject(inst->hAudioSamplesReadyEvent, 5); + if (FAILED(inst->pAudioClient->GetCurrentPadding(&framesPadding))) break; + framesFree = engineBufferFrames - framesPadding; + continue; + } + + EnterCriticalSection(&inst->csAudioFeed); + + BYTE* dstData = nullptr; + HRESULT hrBuf = inst->pRenderClient->GetBuffer(wantFrames, &dstData); + if (FAILED(hrBuf) || !dstData) { + LeaveCriticalSection(&inst->csAudioFeed); break; } - // Measure drift between sample PTS and wall clock (real elapsed time) - // This ensures audio and video are synchronized to the same time reference - double driftMs = 0.0; - if (inst->bUseClockSync && inst->llPlaybackStartTime != 0 && ts100n > 0) { - // Calculate elapsed time since playback started (in milliseconds) - LONGLONG currentTimeMs = GetCurrentTimeMs(); - LONGLONG elapsedMs = currentTimeMs - inst->llPlaybackStartTime - inst->llTotalPauseTime; + if (needsResample) { + double localFrac = fracPos + outputDone * static_cast(speed); + UINT32 actualWritten = 0; + for (UINT32 i = 0; i < wantFrames; ++i) { + if (localFrac >= srcFrames) { + memset(dstData + i * blockAlign, 0, (wantFrames - i) * blockAlign); + actualWritten = wantFrames; + break; + } + UINT32 idx0 = static_cast(localFrac); + UINT32 idx1 = std::min(idx0 + 1, srcFrames - 1); + float frac = static_cast(localFrac - idx0); + + if (formatTag == WAVE_FORMAT_IEEE_FLOAT && bitsPerSample == 32) { + const float* s = reinterpret_cast(srcData); + float* d = reinterpret_cast(dstData + i * blockAlign); + for (UINT32 ch = 0; ch < channels; ++ch) + d[ch] = s[idx0 * channels + ch] * (1.0f - frac) + + s[idx1 * channels + ch] * frac; + } else if (formatTag == WAVE_FORMAT_PCM && bitsPerSample == 16) { + const int16_t* s = reinterpret_cast(srcData); + int16_t* d = reinterpret_cast(dstData + i * blockAlign); + for (UINT32 ch = 0; ch < channels; ++ch) + d[ch] = static_cast( + s[idx0 * channels + ch] * (1.0f - frac) + + s[idx1 * channels + ch] * frac); + } else { + memcpy(dstData + i * blockAlign, srcData + idx0 * blockAlign, blockAlign); + } + localFrac += speed; + ++actualWritten; + } + wantFrames = actualWritten; + } else { + memcpy(dstData, srcData + outputDone * blockAlign, wantFrames * blockAlign); + } - // Apply playback speed to elapsed time - double adjustedElapsedMs = elapsedMs * inst->playbackSpeed.load(std::memory_order_relaxed); + const float vol = inst->instanceVolume.load(std::memory_order_relaxed); + ApplyVolume(dstData, wantFrames, blockAlign, vol, formatTag, bitsPerSample); - // Convert sample timestamp from 100ns units to milliseconds - double sampleTimeMs = ts100n / 10000.0; + inst->pRenderClient->ReleaseBuffer(wantFrames, 0); + LeaveCriticalSection(&inst->csAudioFeed); - // Calculate drift: positive means audio is ahead, negative means audio is late - driftMs = sampleTimeMs - adjustedElapsedMs; - } + outputDone += wantFrames; - if (driftMs > kDriftPositiveThresholdMs) { - // Audio ahead → delay feed to renderer - PreciseSleepHighRes(std::min(driftMs, 100.0)); - } else if (driftMs < kDriftNegativeThresholdMs) { - // Audio too late → drop sample completely (skip) - sample->Release(); - continue; - } + if (FAILED(inst->pAudioClient->GetCurrentPadding(&framesPadding))) break; + framesFree = engineBufferFrames - framesPadding; + } - // Copy contiguous audio buffer into render buffer – may span multiple GetBuffer() calls - IMFMediaBuffer* mediaBuf = nullptr; - if (FAILED(sample->ConvertToContiguousBuffer(&mediaBuf)) || !mediaBuf) { - sample->Release(); - continue; - } + // Save fractional position for next sample + if (needsResample) { + double endPos = fracPos + outputDone * static_cast(speed); + inst->resampleFracPos = endPos - srcFrames; + if (inst->resampleFracPos < 0.0) inst->resampleFracPos = 0.0; + } else { + inst->resampleFracPos = 0.0; + } - BYTE* srcData = nullptr; - DWORD srcSize = 0, srcMax = 0; - if (FAILED(mediaBuf->Lock(&srcData, &srcMax, &srcSize))) { - mediaBuf->Release(); - sample->Release(); - continue; - } + mediaBuf->Unlock(); + mediaBuf->Release(); + mfSample->Release(); + return static_cast(outputDone); +} - UINT32 totalFrames = srcSize / blockAlign; - UINT32 offsetFrames = 0; +// --------------------------------------------------------------------------- +// PreFillAudioBuffer — fills WASAPI buffer BEFORE Start() so there's no +// gap at the beginning of playback / after seek. +// --------------------------------------------------------------------------- +HRESULT PreFillAudioBuffer(VideoPlayerInstance* inst) +{ + if (!inst || !inst->pAudioClient || !inst->pRenderClient) + return E_INVALIDARG; - while (offsetFrames < totalFrames) { - UINT32 framesWanted = std::min(totalFrames - offsetFrames, framesFree); - if (framesWanted == 0) { - // Renderer is full → wait for next event - WaitForSingleObject(inst->hAudioSamplesReadyEvent, 5); - if (FAILED(inst->pAudioClient->GetCurrentPadding(&framesPadding))) break; - framesFree = engineBufferFrames - framesPadding; - continue; - } + IMFSourceReader* audioReader = inst->pSourceReaderAudio + ? inst->pSourceReaderAudio + : inst->pSourceReader; + if (!audioReader) return E_FAIL; - BYTE* dstData = nullptr; - if (FAILED(inst->pRenderClient->GetBuffer(framesWanted, &dstData)) || !dstData) break; - - const BYTE* chunkStart = srcData + (offsetFrames * blockAlign); - memcpy(dstData, chunkStart, framesWanted * blockAlign); - - // Apply per-instance volume in-place. - // Supports PCM 16-bit, PCM 24-bit, IEEE float 32-bit, and - // WAVE_FORMAT_EXTENSIBLE wrappers around those sub-formats. - const float vol = inst->instanceVolume.load(std::memory_order_relaxed); - if (vol < 0.999f) { - WORD formatTag = inst->pSourceAudioFormat->wFormatTag; - WORD bitsPerSample = inst->pSourceAudioFormat->wBitsPerSample; - - // Unwrap WAVE_FORMAT_EXTENSIBLE to the actual sub-format - if (formatTag == WAVE_FORMAT_EXTENSIBLE && inst->pSourceAudioFormat->cbSize >= 22) { - auto* ext = reinterpret_cast(inst->pSourceAudioFormat); - if (ext->SubFormat == kSubtypePCM) - formatTag = WAVE_FORMAT_PCM; - else if (ext->SubFormat == kSubtypeIEEEFloat) - formatTag = WAVE_FORMAT_IEEE_FLOAT; - } + UINT32 engineBufferFrames = 0; + if (FAILED(inst->pAudioClient->GetBufferSize(&engineBufferFrames))) + return E_FAIL; - if (formatTag == WAVE_FORMAT_PCM && bitsPerSample == 16) { - auto* s = reinterpret_cast(dstData); - size_t n = (framesWanted * blockAlign) / sizeof(int16_t); - for (size_t i = 0; i < n; ++i) - s[i] = static_cast(s[i] * vol); - } else if (formatTag == WAVE_FORMAT_PCM && bitsPerSample == 24) { - // 24-bit PCM: 3 bytes per sample, little-endian - size_t totalBytes = framesWanted * blockAlign; - for (size_t i = 0; i + 2 < totalBytes; i += 3) { - int32_t sample = static_cast(dstData[i + 2]); - sample = (sample << 8) | dstData[i + 1]; - sample = (sample << 8) | dstData[i]; - sample = static_cast(sample * vol); - dstData[i] = static_cast(sample & 0xFF); - dstData[i + 1] = static_cast((sample >> 8) & 0xFF); - dstData[i + 2] = static_cast((sample >> 16) & 0xFF); - } - } else if (formatTag == WAVE_FORMAT_IEEE_FLOAT && bitsPerSample == 32) { - auto* s = reinterpret_cast(dstData); - size_t n = (framesWanted * blockAlign) / sizeof(float); - for (size_t i = 0; i < n; ++i) s[i] *= vol; - } - } + const UINT32 blockAlign = inst->pSourceAudioFormat ? inst->pSourceAudioFormat->nBlockAlign : 4; + const UINT32 channels = inst->pSourceAudioFormat ? inst->pSourceAudioFormat->nChannels : 2; + + WORD formatTag = WAVE_FORMAT_PCM, bitsPerSample = 16; + if (inst->pSourceAudioFormat) + ResolveFormatTag(inst->pSourceAudioFormat, &formatTag, &bitsPerSample); + + float speed = inst->playbackSpeed.load(std::memory_order_relaxed); + inst->resampleFracPos = 0.0; + + // Fill until the buffer is at least half full + UINT32 targetFrames = engineBufferFrames / 2; + UINT32 totalFed = 0; + for (int attempts = 0; attempts < 20 && totalFed < targetFrames; ++attempts) { + int fed = FeedOneSample(inst, audioReader, engineBufferFrames, + blockAlign, channels, formatTag, bitsPerSample, speed); + if (fed < 0) break; // EOF or error + if (fed == 0) continue; + totalFed += fed; + } - inst->pRenderClient->ReleaseBuffer(framesWanted, 0); - offsetFrames += framesWanted; + return S_OK; +} - // Recompute free frames for potential second iteration in this loop - if (FAILED(inst->pAudioClient->GetCurrentPadding(&framesPadding))) break; - framesFree = engineBufferFrames - framesPadding; +// --------------------------------------------------------------------------- +// AudioThreadProc — simple feed loop, no drift correction. +// Audio is the timing master: it feeds WASAPI as fast as the buffer allows. +// WASAPI's hardware clock determines the actual playback rate. +// Video compensates via audioLatencyMs. +// --------------------------------------------------------------------------- +DWORD WINAPI AudioThreadProc(LPVOID lpParam) +{ + auto* inst = static_cast(lpParam); + if (!inst || !inst->pAudioClient || !inst->pRenderClient) + return 0; + + IMFSourceReader* audioReader = inst->pSourceReaderAudio + ? inst->pSourceReaderAudio + : inst->pSourceReader; + if (!audioReader) return 0; + + UINT32 engineBufferFrames = 0; + if (FAILED(inst->pAudioClient->GetBufferSize(&engineBufferFrames))) + return 0; + + if (inst->hAudioReadyEvent) + WaitForSingleObject(inst->hAudioReadyEvent, INFINITE); + + const UINT32 blockAlign = inst->pSourceAudioFormat ? inst->pSourceAudioFormat->nBlockAlign : 4; + const UINT32 channels = inst->pSourceAudioFormat ? inst->pSourceAudioFormat->nChannels : 2; + + WORD formatTag = WAVE_FORMAT_PCM, bitsPerSample = 16; + if (inst->pSourceAudioFormat) + ResolveFormatTag(inst->pSourceAudioFormat, &formatTag, &bitsPerSample); + + inst->resampleFracPos = 0.0; + + while (inst->bAudioThreadRunning) { + // Wait for WASAPI to signal buffer space (or 10ms timeout) + WaitForSingleObject(inst->hAudioSamplesReadyEvent, 10); + + // Pause / seek: spin until resumed + { + EnterCriticalSection(&inst->csClockSync); + bool suspended = inst->bSeekInProgress || inst->llPauseStart != 0; + LeaveCriticalSection(&inst->csClockSync); + if (suspended) { + PreciseSleepHighRes(5); + continue; + } } - mediaBuf->Unlock(); - mediaBuf->Release(); - sample->Release(); + float speed = inst->playbackSpeed.load(std::memory_order_relaxed); + int result = FeedOneSample(inst, audioReader, engineBufferFrames, + blockAlign, channels, formatTag, bitsPerSample, speed); + if (result < 0) break; // EOF or fatal error } + EnterCriticalSection(&inst->csAudioFeed); inst->pAudioClient->Stop(); + LeaveCriticalSection(&inst->csAudioFeed); + inst->audioLatencyMs.store(0.0, std::memory_order_relaxed); return 0; } // ------------------------------------------------------------- -// Thread management helpers +// Thread management // ------------------------------------------------------------- HRESULT StartAudioThread(VideoPlayerInstance* inst) { if (!inst || !inst->bHasAudio || !inst->bAudioInitialized) return E_INVALIDARG; - // Terminate any previous thread first if (inst->hAudioThread) { WaitForSingleObject(inst->hAudioThread, 5000); CloseHandle(inst->hAudioThread); @@ -326,18 +433,25 @@ void StopAudioThread(VideoPlayerInstance* inst) if (!inst) return; inst->bAudioThreadRunning = FALSE; + if (inst->hAudioReadyEvent) SetEvent(inst->hAudioReadyEvent); + if (inst->hAudioSamplesReadyEvent) SetEvent(inst->hAudioSamplesReadyEvent); + if (inst->hAudioThread) { - if (WaitForSingleObject(inst->hAudioThread, 1000) == WAIT_TIMEOUT) - TerminateThread(inst->hAudioThread, 0); + WaitForSingleObject(inst->hAudioThread, 5000); CloseHandle(inst->hAudioThread); inst->hAudioThread = nullptr; } - if (inst->pAudioClient) inst->pAudioClient->Stop(); + if (inst->pAudioClient) { + EnterCriticalSection(&inst->csAudioFeed); + inst->pAudioClient->Stop(); + LeaveCriticalSection(&inst->csAudioFeed); + } + inst->audioLatencyMs.store(0.0, std::memory_order_relaxed); } // ----------------------------------------- -// Per‑instance volume helpers (0.0 – 1.0) +// Volume helpers // ----------------------------------------- HRESULT SetVolume(VideoPlayerInstance* inst, float vol) { diff --git a/mediaplayer/src/jvmMain/native/windows/AudioManager.h b/mediaplayer/src/jvmMain/native/windows/AudioManager.h index d9f25662..d01900e7 100644 --- a/mediaplayer/src/jvmMain/native/windows/AudioManager.h +++ b/mediaplayer/src/jvmMain/native/windows/AudioManager.h @@ -36,6 +36,11 @@ DWORD WINAPI AudioThreadProc(LPVOID lpParam); * @param pInstance Pointer to the video player instance. * @return S_OK on success, or an error code. */ +/** + * @brief Pre-fills the WASAPI buffer before Start() to avoid gaps after seek. + */ +HRESULT PreFillAudioBuffer(VideoPlayerInstance* pInstance); + HRESULT StartAudioThread(VideoPlayerInstance* pInstance); /** diff --git a/mediaplayer/src/jvmMain/native/windows/NativeVideoPlayer.cpp b/mediaplayer/src/jvmMain/native/windows/NativeVideoPlayer.cpp index 42cb6899..abd7550a 100644 --- a/mediaplayer/src/jvmMain/native/windows/NativeVideoPlayer.cpp +++ b/mediaplayer/src/jvmMain/native/windows/NativeVideoPlayer.cpp @@ -280,32 +280,24 @@ static HRESULT AcquireNextSample(VideoPlayerInstance* pInstance, IMFSample** ppS pInstance->pCachedSample = nullptr; } - // On the first decoded frame after play/seek, recalibrate the wall clock - // so that any decode or network latency doesn't cause mass frame skipping. - // This is critical for HTTP sources where ReadSample may block for seconds. - if (!pInstance->bHasInitialFrame) { - if (pInstance->bUseClockSync && pInstance->llPlaybackStartTime != 0) { - double frameTimeMs = llTimestamp / 10000.0; - double adjustedMs = frameTimeMs / static_cast(pInstance->playbackSpeed.load()); - pInstance->llPlaybackStartTime = GetCurrentTimeMs() - static_cast(adjustedMs); - pInstance->llTotalPauseTime = 0; - } - pInstance->bHasInitialFrame = TRUE; - } + pInstance->bHasInitialFrame = TRUE; - pInstance->llCurrentPosition = llTimestamp; + // Only update position from video if there's no audio track. + if (!pInstance->bHasAudio) { + pInstance->llCurrentPosition = llTimestamp; + } } - // ----- Frame timing synchronization (wall-clock based) ----- - if (!isPaused && pInstance->bUseClockSync && - pInstance->llPlaybackStartTime != 0 && llTimestamp > 0) { + // ----- Frame timing synchronization ----- + // Audio-master model (like AVPlayer on macOS): video syncs to the audio + // position, not to a wall clock. This guarantees lip-sync because both + // streams share the same time reference. + // + // For video-only files (no audio), fall back to wall-clock sync. + if (!isPaused && llTimestamp > 0) { - LONGLONG currentTimeMs = GetCurrentTimeMs(); - LONGLONG elapsedMs = currentTimeMs - pInstance->llPlaybackStartTime - pInstance->llTotalPauseTime; - double adjustedElapsedMs = elapsedMs * pInstance->playbackSpeed.load(); double frameTimeMs = llTimestamp / 10000.0; - // Determine frame interval, guarding against division by zero (issue #3) UINT frameRateNum = kDefaultFrameRateNum, frameRateDenom = kDefaultFrameRateDenom; GetVideoFrameRate(pInstance, &frameRateNum, &frameRateDenom); if (frameRateNum == 0) { @@ -314,7 +306,22 @@ static HRESULT AcquireNextSample(VideoPlayerInstance* pInstance, IMFSample** ppS } double frameIntervalMs = 1000.0 * frameRateDenom / frameRateNum; - double diffMs = frameTimeMs - adjustedElapsedMs; + double referenceMs; + if (pInstance->bHasAudio) { + // Audio-master: use the audio position heard by the user right now. + // llCurrentPosition = PTS of the last sample fed to WASAPI. + // audioLatencyMs = how much of the WASAPI buffer hasn't played yet. + double audioFedMs = pInstance->llCurrentPosition / 10000.0; + double latencyMs = pInstance->audioLatencyMs.load(std::memory_order_relaxed); + referenceMs = audioFedMs - latencyMs; + } else { + // No audio: wall-clock fallback + LONGLONG currentTimeMs = GetCurrentTimeMs(); + LONGLONG elapsedMs = currentTimeMs - pInstance->llPlaybackStartTime - pInstance->llTotalPauseTime; + referenceMs = elapsedMs * pInstance->playbackSpeed.load(); + } + + double diffMs = frameTimeMs - referenceMs; if (diffMs < -frameIntervalMs * kFrameSkipThreshold) { // Frame is very late — skip it @@ -359,10 +366,12 @@ NATIVEVIDEOPLAYER_API HRESULT CreateVideoPlayerInstance(VideoPlayerInstance** pp return E_OUTOFMEMORY; InitializeCriticalSection(&pInstance->csClockSync); + InitializeCriticalSection(&pInstance->csAudioFeed); pInstance->bUseClockSync = TRUE; pInstance->hAudioReadyEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr); if (!pInstance->hAudioReadyEvent) { + DeleteCriticalSection(&pInstance->csAudioFeed); DeleteCriticalSection(&pInstance->csClockSync); delete pInstance; return HRESULT_FROM_WIN32(GetLastError()); @@ -382,6 +391,7 @@ NATIVEVIDEOPLAYER_API void DestroyVideoPlayerInstance(VideoPlayerInstance* pInst pInstance->pCachedSample = nullptr; } + DeleteCriticalSection(&pInstance->csAudioFeed); DeleteCriticalSection(&pInstance->csClockSync); delete pInstance; DecrementInstanceCount(); @@ -524,37 +534,28 @@ NATIVEVIDEOPLAYER_API HRESULT OpenMedia(VideoPlayerInstance* pInstance, const wc } } - // Create a separate audio source reader for the audio thread + // Create a dedicated audio SourceReader so the audio thread is never + // blocked by video decoding (ReadSample is serialized within a single + // reader). Both readers share the same container timestamps. IMFAttributes* pAudioAttrs = nullptr; - if (isNetwork) { - MFCreateAttributes(&pAudioAttrs, 1); - if (pAudioAttrs) pAudioAttrs->SetUINT32(MF_LOW_LATENCY, TRUE); - } - hr = MFCreateSourceReaderFromURL(url, pAudioAttrs, &pInstance->pSourceReaderAudio); - SafeRelease(pAudioAttrs); + hr = MFCreateAttributes(&pAudioAttrs, 2); if (SUCCEEDED(hr)) { - hr = pInstance->pSourceReaderAudio->SetStreamSelection(MF_SOURCE_READER_ALL_STREAMS, FALSE); - if (SUCCEEDED(hr)) - hr = pInstance->pSourceReaderAudio->SetStreamSelection(MF_SOURCE_READER_FIRST_AUDIO_STREAM, TRUE); - - if (SUCCEEDED(hr)) { - // Use the same format that succeeded for the main reader - UINT32 usedCh = pInstance->pSourceAudioFormat ? pInstance->pSourceAudioFormat->nChannels : 2; - UINT32 usedSr = pInstance->pSourceAudioFormat ? pInstance->pSourceAudioFormat->nSamplesPerSec : 48000; - - IMFMediaType* pWantedAudioType = nullptr; - hr = MFCreateMediaType(&pWantedAudioType); - if (SUCCEEDED(hr)) { - ConfigureAudioType(pWantedAudioType, usedCh, usedSr); - hr = pInstance->pSourceReaderAudio->SetCurrentMediaType(MF_SOURCE_READER_FIRST_AUDIO_STREAM, nullptr, pWantedAudioType); - SafeRelease(pWantedAudioType); - } - } - - if (FAILED(hr)) { - PrintHR("Failed to configure audio source reader", hr); - SafeRelease(pInstance->pSourceReaderAudio); - pInstance->pSourceReaderAudio = nullptr; + if (isNetwork) pAudioAttrs->SetUINT32(MF_LOW_LATENCY, TRUE); + hr = MFCreateSourceReaderFromURL(url, pAudioAttrs, &pInstance->pSourceReaderAudio); + SafeRelease(pAudioAttrs); + } + if (SUCCEEDED(hr) && pInstance->pSourceReaderAudio) { + pInstance->pSourceReaderAudio->SetStreamSelection(MF_SOURCE_READER_ALL_STREAMS, FALSE); + pInstance->pSourceReaderAudio->SetStreamSelection(MF_SOURCE_READER_FIRST_AUDIO_STREAM, TRUE); + + UINT32 usedCh = pInstance->pSourceAudioFormat ? pInstance->pSourceAudioFormat->nChannels : 2; + UINT32 usedSr = pInstance->pSourceAudioFormat ? pInstance->pSourceAudioFormat->nSamplesPerSec : 48000; + IMFMediaType* pWantedAudioType = nullptr; + if (SUCCEEDED(MFCreateMediaType(&pWantedAudioType))) { + ConfigureAudioType(pWantedAudioType, usedCh, usedSr); + pInstance->pSourceReaderAudio->SetCurrentMediaType( + MF_SOURCE_READER_FIRST_AUDIO_STREAM, nullptr, pWantedAudioType); + SafeRelease(pWantedAudioType); } } else { PrintHR("Failed to create audio source reader", hr); @@ -625,6 +626,11 @@ NATIVEVIDEOPLAYER_API HRESULT OpenMedia(VideoPlayerInstance* pInstance, const wc pInstance->llTotalPauseTime = 0; pInstance->llPauseStart = 0; + // Pre-fill WASAPI buffer before starting audio thread + if (pInstance->bHasAudio && pInstance->bAudioInitialized) { + PreFillAudioBuffer(pInstance); + } + if (pInstance->bHasAudio && pInstance->bAudioInitialized && pInstance->pSourceReaderAudio) { hr = StartAudioThread(pInstance); if (FAILED(hr)) { @@ -875,6 +881,15 @@ NATIVEVIDEOPLAYER_API HRESULT ReadVideoFrameInto( } } + // Force alpha byte to 0xFF — same fix as ReadVideoFrame. + // MFVideoFormat_RGB32 (X8R8G8B8) leaves the high byte undefined. + { + const DWORD pixelCount = (dstRowBytes * height) / 4; + DWORD* px = reinterpret_cast(pDst); + for (DWORD i = 0; i < pixelCount; ++i) + px[i] |= 0xFF000000; + } + pBuffer->Release(); pSample->Release(); return S_OK; @@ -956,8 +971,11 @@ NATIVEVIDEOPLAYER_API HRESULT SeekMedia(VideoPlayerInstance* pInstance, LONGLONG bool wasPlaying = false; if (pInstance->bHasAudio && pInstance->pAudioClient) { wasPlaying = (pInstance->llPauseStart == 0); + // Stop WASAPI under csAudioFeed to ensure the audio thread is not + // in the middle of GetBuffer/ReleaseBuffer. + EnterCriticalSection(&pInstance->csAudioFeed); pInstance->pAudioClient->Stop(); - Sleep(kSeekAudioSettleMs); + LeaveCriticalSection(&pInstance->csAudioFeed); } // Stop the presentation clock @@ -965,7 +983,7 @@ NATIVEVIDEOPLAYER_API HRESULT SeekMedia(VideoPlayerInstance* pInstance, LONGLONG pInstance->pPresentationClock->Stop(); } - // Seek the main source reader + // Seek video reader HRESULT hr = pInstance->pSourceReader->SetCurrentPosition(GUID_NULL, var); if (FAILED(hr)) { EnterCriticalSection(&pInstance->csClockSync); @@ -975,40 +993,32 @@ NATIVEVIDEOPLAYER_API HRESULT SeekMedia(VideoPlayerInstance* pInstance, LONGLONG return hr; } - // Also seek the audio source reader if available + // Seek audio reader independently — never blocks on video decoding if (pInstance->pSourceReaderAudio) { PROPVARIANT varAudio; PropVariantInit(&varAudio); varAudio.vt = VT_I8; varAudio.hVal.QuadPart = llPositionIn100Ns; - - HRESULT hrAudio = pInstance->pSourceReaderAudio->SetCurrentPosition(GUID_NULL, varAudio); - if (FAILED(hrAudio)) { - PrintHR("Failed to seek audio source reader", hrAudio); - } + pInstance->pSourceReaderAudio->SetCurrentPosition(GUID_NULL, varAudio); PropVariantClear(&varAudio); } - // Reset audio client if needed + // Reset WASAPI buffer under csAudioFeed if (pInstance->bHasAudio && pInstance->pRenderClient && pInstance->pAudioClient) { - UINT32 bufferFrameCount = 0; - if (SUCCEEDED(pInstance->pAudioClient->GetBufferSize(&bufferFrameCount))) { - pInstance->pAudioClient->Reset(); - } + EnterCriticalSection(&pInstance->csAudioFeed); + pInstance->pAudioClient->Reset(); + LeaveCriticalSection(&pInstance->csAudioFeed); } PropVariantClear(&var); - // Update position and state - EnterCriticalSection(&pInstance->csClockSync); - pInstance->llCurrentPosition = llPositionIn100Ns; - pInstance->bSeekInProgress = FALSE; - LeaveCriticalSection(&pInstance->csClockSync); - pInstance->bEOF = FALSE; + pInstance->resampleFracPos = 0.0; + pInstance->audioLatencyMs.store(0.0, std::memory_order_relaxed); - // Reset timing for A/V sync after seek: - // Adjust llPlaybackStartTime so that elapsed time matches the seek position. + // Reset timing for A/V sync after seek. + EnterCriticalSection(&pInstance->csClockSync); + pInstance->llCurrentPosition = llPositionIn100Ns; if (pInstance->bUseClockSync) { double seekPositionMs = llPositionIn100Ns / 10000.0; double adjustedSeekMs = seekPositionMs / static_cast(pInstance->playbackSpeed.load()); @@ -1021,8 +1031,10 @@ NATIVEVIDEOPLAYER_API HRESULT SeekMedia(VideoPlayerInstance* pInstance, LONGLONG pInstance->llPauseStart = 0; } } + pInstance->bSeekInProgress = FALSE; + LeaveCriticalSection(&pInstance->csClockSync); - // Restart the presentation clock at the new position + // Restart the presentation clock if (pInstance->bUseClockSync && pInstance->pPresentationClock) { hr = pInstance->pPresentationClock->Start(llPositionIn100Ns); if (FAILED(hr)) { @@ -1030,15 +1042,25 @@ NATIVEVIDEOPLAYER_API HRESULT SeekMedia(VideoPlayerInstance* pInstance, LONGLONG } } - // Restart audio if it was playing + // Pre-fill the WASAPI buffer BEFORE Start() so audio plays immediately + // with no gap. This is the key to stutter-free seek: the buffer has + // ~100ms of audio ready before the hardware starts consuming. + if (pInstance->bHasAudio && pInstance->bAudioInitialized) { + PreFillAudioBuffer(pInstance); + } + + // Now start the audio client — buffer already has data, no gap if (pInstance->bHasAudio && pInstance->pAudioClient && wasPlaying) { - Sleep(kSeekAudioSettleMs); + EnterCriticalSection(&pInstance->csAudioFeed); pInstance->pAudioClient->Start(); + LeaveCriticalSection(&pInstance->csAudioFeed); } - // Signal audio thread to continue + // Signal audio thread to resume its feed loop if (pInstance->hAudioReadyEvent) SetEvent(pInstance->hAudioReadyEvent); + if (pInstance->hAudioSamplesReadyEvent) + SetEvent(pInstance->hAudioSamplesReadyEvent); return S_OK; } @@ -1126,9 +1148,11 @@ NATIVEVIDEOPLAYER_API HRESULT SetPlaybackState(VideoPlayerInstance* pInstance, B pInstance->bHasInitialFrame = FALSE; - // Start audio client if available + // Start audio client if available (under csAudioFeed for thread safety) if (pInstance->pAudioClient && pInstance->bAudioInitialized) { + EnterCriticalSection(&pInstance->csAudioFeed); hr = pInstance->pAudioClient->Start(); + LeaveCriticalSection(&pInstance->csAudioFeed); if (FAILED(hr)) { PrintHR("Failed to start audio client", hr); } @@ -1153,9 +1177,10 @@ NATIVEVIDEOPLAYER_API HRESULT SetPlaybackState(VideoPlayerInstance* pInstance, B } } - if (pInstance->hAudioReadyEvent) { + if (pInstance->hAudioReadyEvent) SetEvent(pInstance->hAudioReadyEvent); - } + if (pInstance->hAudioSamplesReadyEvent) + SetEvent(pInstance->hAudioSamplesReadyEvent); } else { // Pause playback if (pInstance->llPauseStart == 0) { @@ -1165,7 +1190,9 @@ NATIVEVIDEOPLAYER_API HRESULT SetPlaybackState(VideoPlayerInstance* pInstance, B pInstance->bHasInitialFrame = FALSE; if (pInstance->pAudioClient && pInstance->bAudioInitialized) { + EnterCriticalSection(&pInstance->csAudioFeed); pInstance->pAudioClient->Stop(); + LeaveCriticalSection(&pInstance->csAudioFeed); } if (pInstance->bUseClockSync && pInstance->pPresentationClock) { @@ -1245,6 +1272,8 @@ NATIVEVIDEOPLAYER_API void CloseMedia(VideoPlayerInstance* pInstance) { pInstance->llCurrentPosition = 0; pInstance->bSeekInProgress = FALSE; pInstance->playbackSpeed = 1.0f; + pInstance->resampleFracPos = 0.0; + pInstance->audioLatencyMs.store(0.0, std::memory_order_relaxed); pInstance->bIsNetworkSource = FALSE; pInstance->bIsLiveStream = FALSE; @@ -1272,7 +1301,24 @@ NATIVEVIDEOPLAYER_API HRESULT SetPlaybackSpeed(VideoPlayerInstance* pInstance, f return pInstance->pHLSPlayer->SetPlaybackSpeed(speed); speed = std::max(0.5f, std::min(speed, 2.0f)); + + // Recalibrate the wall-clock reference so that the position accumulated + // at the old speed is preserved when switching to the new speed. + // Without this, `elapsed * newSpeed` would produce a wrong position. + if (pInstance->bUseClockSync && pInstance->llPlaybackStartTime != 0) { + float oldSpeed = pInstance->playbackSpeed.load(); + EnterCriticalSection(&pInstance->csClockSync); + LONGLONG now = GetCurrentTimeMs(); + LONGLONG elapsedMs = now - pInstance->llPlaybackStartTime - pInstance->llTotalPauseTime; + double currentPositionMs = elapsedMs * static_cast(oldSpeed); + // Solve: (now - newStart - pause) * newSpeed = currentPositionMs + pInstance->llPlaybackStartTime = now - pInstance->llTotalPauseTime + - static_cast(currentPositionMs / speed); + LeaveCriticalSection(&pInstance->csClockSync); + } + pInstance->playbackSpeed = speed; + pInstance->resampleFracPos = 0.0; if (pInstance->bUseClockSync && pInstance->pPresentationClock) { IMFRateControl* pRateControl = nullptr; diff --git a/mediaplayer/src/jvmMain/native/windows/Utils.cpp b/mediaplayer/src/jvmMain/native/windows/Utils.cpp index 78724730..91731f73 100644 --- a/mediaplayer/src/jvmMain/native/windows/Utils.cpp +++ b/mediaplayer/src/jvmMain/native/windows/Utils.cpp @@ -8,8 +8,9 @@ void PreciseSleepHighRes(double ms) { if (ms <= 0.1) return; - // Use a single static timer for all sleep operations - static HANDLE hTimer = CreateWaitableTimer(nullptr, TRUE, nullptr); + // Each thread gets its own waitable timer to avoid race conditions + // when multiple threads (audio + video) call this concurrently. + thread_local HANDLE hTimer = CreateWaitableTimer(nullptr, TRUE, nullptr); if (!hTimer) { std::this_thread::sleep_for(std::chrono::duration(ms)); return; diff --git a/mediaplayer/src/jvmMain/native/windows/VideoPlayerInstance.h b/mediaplayer/src/jvmMain/native/windows/VideoPlayerInstance.h index 50f115f9..b7617f75 100644 --- a/mediaplayer/src/jvmMain/native/windows/VideoPlayerInstance.h +++ b/mediaplayer/src/jvmMain/native/windows/VideoPlayerInstance.h @@ -17,7 +17,7 @@ class HLSPlayer; */ struct VideoPlayerInstance { // Video related members - IMFSourceReader* pSourceReader = nullptr; + IMFSourceReader* pSourceReader = nullptr; // Single reader for both audio & video IMFMediaBuffer* pLockedBuffer = nullptr; BYTE* pLockedBytes = nullptr; DWORD lockedMaxSize = 0; @@ -27,13 +27,13 @@ struct VideoPlayerInstance { UINT32 nativeWidth = 0; // Original video resolution (before scaling) UINT32 nativeHeight = 0; BOOL bEOF = FALSE; - + // Frame caching for paused state IMFSample* pCachedSample = nullptr; // Cached sample for paused state BOOL bHasInitialFrame = FALSE; // Whether we've read an initial frame when paused // Audio related members - IMFSourceReader* pSourceReaderAudio = nullptr; + IMFSourceReader* pSourceReaderAudio = nullptr; // Separate reader for audio (no serialization with video) BOOL bHasAudio = FALSE; BOOL bAudioInitialized = FALSE; IAudioClient* pAudioClient = nullptr; @@ -46,6 +46,12 @@ struct VideoPlayerInstance { HANDLE hAudioReadyEvent = nullptr; IAudioEndpointVolume* pAudioEndpointVolume = nullptr; + // WASAPI latency: updated by audio thread, read by video thread for A/V sync + std::atomic audioLatencyMs{0.0}; + + // Protects WASAPI GetBuffer/ReleaseBuffer vs Stop/Reset/Start during seeks + CRITICAL_SECTION csAudioFeed{}; + // Media Foundation clock for synchronization IMFPresentationClock* pPresentationClock = nullptr; IMFMediaSource* pMediaSource = nullptr; @@ -63,6 +69,10 @@ struct VideoPlayerInstance { std::atomic instanceVolume{1.0f}; // Volume specific to this instance (1.0 = 100%) std::atomic playbackSpeed{1.0f}; // Playback speed (1.0 = 100%) + // Audio resampling fractional position for playback speed (audio thread only) + double resampleFracPos = 0.0; + + // Network / HLS streaming BOOL bIsNetworkSource = FALSE; // TRUE when URL is http:// or https:// BOOL bIsLiveStream = FALSE; // TRUE when duration is unknown (live HLS) diff --git a/mediaplayer/src/jvmMain/native/windows/build.bat b/mediaplayer/src/jvmMain/native/windows/build.bat index 39d9514f..264a980c 100644 --- a/mediaplayer/src/jvmMain/native/windows/build.bat +++ b/mediaplayer/src/jvmMain/native/windows/build.bat @@ -3,6 +3,17 @@ setlocal echo === Starting compilation for x64 and ARM64 === +rem Clean previous build directories to ensure a fresh build +if exist build-x64 rmdir /s /q build-x64 +if exist build-arm64 rmdir /s /q build-arm64 + +rem Clear local DLL cache so the JVM loader picks up the new build +set "NATIVE_CACHE=%LOCALAPPDATA%\composemediaplayer\native" +if exist "%NATIVE_CACHE%" ( + echo Clearing native DLL cache: %NATIVE_CACHE% + rmdir /s /q "%NATIVE_CACHE%" +) + echo. echo === x64 Configuration === cmake -B build-x64 -A x64 . diff --git a/sample/composeApp/build.gradle.kts b/sample/composeApp/build.gradle.kts index 98a572c2..6743f480 100644 --- a/sample/composeApp/build.gradle.kts +++ b/sample/composeApp/build.gradle.kts @@ -115,7 +115,7 @@ compose.desktop { packageName = "sample" packageVersion = "1.0.0" linux { - modules("jdk.security.auth") + modules("jdk.security.auth", "jdk.accessibility") } macOS { jvmArgs(