Skip to content

Commit ab18327

Browse files
committed
fix(helper): widen NVDEC pool to 32 + retry truncated video via software
NVENC's NVDEC reference-pool starvation surfaces a second failure mode beyond the cold-start one the seg-0 software-decode path already handles: at certain content offsets inside HEVC sources (Tubi catalog at the 16-24 s mark in observed sessions), the decoder runs out of reference frames mid-segment and the encoder drops most video frames. The TS file passes the sync-byte gate and the PES-presence gate because PAT/PMT and audio land normally, but ffprobe reports the video stream as 0.7-1.0 s of duration against a 4 s window. The player either freezes when the video stream ends or desyncs audio. Two changes: 1. Bump -extra_hw_frames from 16 to 32 on the NVENC backend. 16 was a first guess and works for shallow reference chains but Tubi content at certain time offsets needs more. 32 covers the deepest chains observed in the wild without meaningfully growing GPU memory (each surface is ~1 MB at 480p; the total pool is bounded). 2. Post-encode video-duration check in HelperLeaseWorker.RunAsync. After ValidateMpegTs passes, on the hardware path only, run ProbeVideoDurationAsync against the output and compare to the lease's expected duration. If the video stream came back at < 80 % of expected, retry the encode via BuildSegmentCommandSoftwareFallback (the same path seg 0 uses) before uploading. Catches the truncated output locally so a known-bad segment never reaches the server.
1 parent fe2c711 commit ab18327

2 files changed

Lines changed: 139 additions & 2 deletions

File tree

src/WKVRCProxy/Helper/HelperLeaseWorker.cs

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,68 @@ public static async Task<HelperLeaseRunResult> RunAsync(
183183
return Result(false, "local_validation_failed", validationError);
184184
}
185185

186+
// Post-encode video-duration check. NVDEC reference-pool starvation
187+
// on HEVC sources produces a TS that's structurally valid -- sync
188+
// bytes plus PES packets plus full-length audio -- but where the
189+
// encoder dropped most of the video frames mid-segment, leaving
190+
// ~0.7-1.0 s of video out of the 4 s window. Players that try to
191+
// decode it freeze when the video stream ends mid-segment. The
192+
// local validator can't catch this without probing duration, and
193+
// the server has no way to retry on the same helper. If we're on
194+
// the hardware path AND the video came back short, fall back to
195+
// software decode (same path seg 0 uses) before uploading.
196+
if (!softwareDecodeFirst && leaseFrame.Duration > 0)
197+
{
198+
double? vidDuration = await ProbeVideoDurationAsync(ffmpegLocation.Path, outputPath, deadlineCts.Token).ConfigureAwait(false);
199+
if (vidDuration.HasValue && vidDuration.Value < leaseFrame.Duration * 0.8)
200+
{
201+
WarnLease(leaseFrame, "truncated_video", "video_dur="
202+
+ vidDuration.Value.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture)
203+
+ " expected=" + leaseFrame.Duration.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture)
204+
+ " bytes=" + info.Length + " retry=software");
205+
TryDeleteOutput(outputPath);
206+
207+
TranscodeFfmpegCommand swCommand = TranscodeWorkerProcess.BuildSegmentCommandSoftwareFallback(
208+
ffmpegLocation.Path,
209+
lease,
210+
encoder,
211+
outputPath,
212+
leaseFrame.TargetWidth,
213+
leaseFrame.TargetHeight,
214+
leaseFrame.TargetBitrateKbps,
215+
hasAudio: leaseFrame.HasAudio,
216+
quality: quality);
217+
218+
LogLease(leaseFrame, "ffmpeg sw-retry start", "reason=truncated_video deadline_ms=" + deadlineMs);
219+
ffmpegResult = await RunFfmpegAsync(swCommand, deadlineCts.Token, ct).ConfigureAwait(false);
220+
if (ffmpegResult.TimedOut)
221+
{
222+
WarnLease(leaseFrame, "deadline", "sw-retry ffmpeg exceeded "
223+
+ deadlineMs + "ms stderr=" + Snip(ffmpegResult.Stderr, 180));
224+
return Result(false, "deadline", "helper sw-retry exceeded the server deadline");
225+
}
226+
if (ffmpegResult.ExitCode != 0)
227+
{
228+
WarnLease(leaseFrame, "ffmpeg sw-retry failed", "exit=" + ffmpegResult.ExitCode
229+
+ " stderr=" + Snip(ffmpegResult.Stderr, 180));
230+
return Result(false, "ffmpeg_failed", Snip(ffmpegResult.Stderr, 240));
231+
}
232+
info = new FileInfo(outputPath);
233+
if (!info.Exists || info.Length <= 0)
234+
{
235+
WarnLease(leaseFrame, "empty sw-retry output", "ffmpeg sw-retry completed without a segment");
236+
return Result(false, "empty_output", "ffmpeg sw-retry completed without a segment");
237+
}
238+
validationError = ValidateMpegTs(outputPath, info.Length);
239+
if (validationError != null)
240+
{
241+
WarnLease(leaseFrame, "sw-retry local_validation_failed", validationError + " bytes=" + info.Length);
242+
return Result(false, "local_validation_failed", validationError);
243+
}
244+
LogLease(leaseFrame, "ffmpeg sw-retry ok", "bytes=" + info.Length);
245+
}
246+
}
247+
186248
string uploadUrlHost = ExtractUrlHost(leaseFrame.UploadUrl);
187249
LogLease(leaseFrame, "upload start", "upload_url_host=" + uploadUrlHost
188250
+ " bytes=" + info.Length);
@@ -279,6 +341,77 @@ private static HelperRuntimeSignals DefaultSignals()
279341

280342
private sealed record FfmpegProcessResult(bool TimedOut, int ExitCode, string Stderr);
281343

344+
// Probe the video stream's duration via ffprobe. Returns null when the
345+
// probe can't be run, the binary is missing, or the file has no video
346+
// stream. Best-effort: any failure short-circuits to null so the caller
347+
// can decide what to do (the normal case is "skip the truncated-video
348+
// retry and proceed to upload"; the validator on the server will catch
349+
// the file if it's structurally bad).
350+
private static async Task<double?> ProbeVideoDurationAsync(
351+
string ffmpegPath,
352+
string outputPath,
353+
CancellationToken ct)
354+
{
355+
string? ffprobePath = TryResolveFfprobePath(ffmpegPath);
356+
if (string.IsNullOrEmpty(ffprobePath)) return null;
357+
358+
var psi = new ProcessStartInfo
359+
{
360+
FileName = ffprobePath,
361+
UseShellExecute = false,
362+
RedirectStandardOutput = true,
363+
RedirectStandardError = true,
364+
CreateNoWindow = true,
365+
};
366+
psi.ArgumentList.Add("-v");
367+
psi.ArgumentList.Add("error");
368+
psi.ArgumentList.Add("-show_entries");
369+
psi.ArgumentList.Add("stream=duration");
370+
psi.ArgumentList.Add("-select_streams");
371+
psi.ArgumentList.Add("v:0");
372+
psi.ArgumentList.Add("-of");
373+
psi.ArgumentList.Add("csv=p=0");
374+
psi.ArgumentList.Add(outputPath);
375+
376+
try
377+
{
378+
using var p = new Process { StartInfo = psi };
379+
p.Start();
380+
var stdoutTask = p.StandardOutput.ReadToEndAsync(ct);
381+
_ = p.StandardError.ReadToEndAsync(ct);
382+
await p.WaitForExitAsync(ct).ConfigureAwait(false);
383+
if (p.ExitCode != 0) return null;
384+
string stdout = (await stdoutTask.ConfigureAwait(false)).Trim();
385+
if (string.IsNullOrEmpty(stdout)) return null;
386+
if (double.TryParse(stdout, System.Globalization.NumberStyles.Float,
387+
System.Globalization.CultureInfo.InvariantCulture, out double secs)
388+
&& secs > 0)
389+
{
390+
return secs;
391+
}
392+
return null;
393+
}
394+
catch
395+
{
396+
return null;
397+
}
398+
}
399+
400+
private static string? TryResolveFfprobePath(string ffmpegPath)
401+
{
402+
try
403+
{
404+
string? dir = Path.GetDirectoryName(ffmpegPath);
405+
if (string.IsNullOrEmpty(dir)) return null;
406+
string probe = Path.Combine(dir, OperatingSystem.IsWindows() ? "ffprobe.exe" : "ffprobe");
407+
return File.Exists(probe) ? probe : null;
408+
}
409+
catch
410+
{
411+
return null;
412+
}
413+
}
414+
282415
private static async Task<FfmpegProcessResult> RunFfmpegAsync(
283416
TranscodeFfmpegCommand command,
284417
CancellationToken deadlineToken,

src/WKVRCProxy/Helper/TranscodeWorkerProcess.cs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -249,8 +249,12 @@ private static IReadOnlyList<string> HardwareDecodeOptionsFor(HardwareEncoderCap
249249
// (Tubi's catalog among them) use longer reference chains than the cuvid
250250
// default 25 covers and surface as "Could not find ref with POC N" decoder
251251
// warnings followed by a black/short segment. Bumping the pool stops the
252-
// decoder from dropping reference frames it still needs.
253-
HardwareEncoderBackend.Nvenc => new[] { "-hwaccel", "cuda", "-hwaccel_output_format", "cuda", "-extra_hw_frames", "16" },
252+
// decoder from dropping reference frames it still needs. 16 was the first
253+
// bump; observed Tubi content at certain time offsets still overruns it
254+
// and the encoder writes only ~0.7-1 s of video out of the 4 s window
255+
// before NVDEC drops the chain. 32 covers the deepest reference chains
256+
// seen in the wild without meaningfully growing GPU memory.
257+
HardwareEncoderBackend.Nvenc => new[] { "-hwaccel", "cuda", "-hwaccel_output_format", "cuda", "-extra_hw_frames", "32" },
254258
HardwareEncoderBackend.Qsv => new[] { "-hwaccel", "qsv", "-hwaccel_output_format", "qsv" },
255259
HardwareEncoderBackend.Amf or HardwareEncoderBackend.MediaFoundation =>
256260
new[] { "-hwaccel", "d3d11va", "-hwaccel_output_format", "d3d11" },

0 commit comments

Comments
 (0)