Skip to content

Commit 64a4f72

Browse files
fix(subtitles): publish dynamic producer shift, expose sourceTime for cue lookup
Previous fix (26a5158) shifted subtitle cues down by HLSVideoEngine.firstKeyframeSeconds, on the assumption that AVPlayer's HLS clock sits at `source_pts - firstKeyframeSeconds` for the whole session. That holds for initial-start sessions but breaks on producer restarts: matroska seek imprecision routinely lands past the planned keyframe and the producer applies a per-session videoShiftPts = actualFirstDts - desiredFirstTfdt to compensate. On a Cars restart at seg-1240 the shift came out to 3920 ms, so every cue rendered ~3.92 s late even with the prior fix in place. The shift is a per-producer property (changes on every restart), so the engine now plumbs it dynamically: - HLSSegmentProducer fires `onVideoShiftKnown` once when the video gate opens, with the shift in source video TB units. - HLSVideoEngine converts to seconds via the captured `sourceVideoTbSeconds`, stores as `playlistShiftSeconds`, and forwards via `onPlaylistShiftChanged` to the engine. - AetherEngine publishes `playlistShiftSeconds` and a derived `sourceTime` (= currentTime + shift). Subtitle cues stay in their raw source-PTS form; the translation happens at lookup time. - The embedded-subtitle side demuxer now seeks against `sourceTime` instead of `currentTime`, so the seek lands at the actual source playhead rather than `playlistShiftSeconds` before it. Reverts the cue-shift logic from 26a5158: cues land in source PTS again, host renders against `engine.sourceTime`. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 0842e07 commit 64a4f72

3 files changed

Lines changed: 121 additions & 60 deletions

File tree

Sources/AetherEngine/AetherEngine.swift

Lines changed: 67 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -207,19 +207,29 @@ public final class AetherEngine: ObservableObject {
207207
/// background suspension.
208208
private var loadedURL: URL?
209209

210-
/// Seconds to subtract from every embedded or sidecar subtitle
211-
/// cue's startTime / endTime so the cue's time range is in the
212-
/// active player's clock frame rather than source-PTS-seconds.
210+
/// Seconds to ADD to AVPlayer's HLS clock to recover the source
211+
/// PTS of the currently displayed frame on the native path.
213212
///
214-
/// On the native HLS path AVPlayer's `currentTime` is
215-
/// `source_pts - HLSVideoEngine.firstKeyframeSeconds` (the producer
216-
/// subtracts the first keyframe's PTS so seg-0's tfdt lands at the
217-
/// playlist's cumulative-EXTINF origin of 0). Subtitle decoders
218-
/// produce cues in source-PTS-seconds, so on the native path they
219-
/// land late by `firstKeyframeSeconds` (~500 ms for Cars on Apple
220-
/// TV BD remuxes) unless we shift them here. SW path matches source
221-
/// PTS directly, so the offset stays 0 there.
222-
private var playlistOriginOffsetSeconds: Double = 0
213+
/// The producer subtracts `videoShiftPts` from every packet's
214+
/// pts/dts so seg-0's fragment tfdt aligns with the playlist's
215+
/// cumulative-EXTINF origin. AVPlayer's clock therefore sits at
216+
/// `source_pts - playlistShiftSeconds`. Subtitles come from an
217+
/// independent side-demuxer (or pre-decoded sidecar) and land in
218+
/// raw source PTS, so the cue lookup and the side-demuxer's seek
219+
/// have to add this shift back to map between the two clocks.
220+
///
221+
/// Updated by `HLSVideoEngine.onPlaylistShiftChanged` on every
222+
/// producer init / restart (matroska seek imprecision means the
223+
/// shift can differ session-to-session for the same source).
224+
/// 0 on the SW path; the SW renderer's clock already tracks
225+
/// source PTS directly.
226+
@Published public private(set) var playlistShiftSeconds: Double = 0
227+
228+
/// Source PTS of the currently displayed frame, derived on every
229+
/// `currentTime` or `playlistShiftSeconds` update. Hosts that
230+
/// schedule against the source timeline (subtitle overlay, side-
231+
/// demuxer seek) should read this instead of `currentTime`.
232+
@Published public private(set) var sourceTime: Double = 0
223233

224234
/// The `LoadOptions` the host passed for the current session.
225235
/// Replayed on every internal reopen of the source URL
@@ -570,9 +580,15 @@ public final class AetherEngine: ObservableObject {
570580
session.onFirstHDR10PlusDetected = { [weak self] in
571581
Task { @MainActor in self?.handleHDR10PlusDetected() }
572582
}
583+
session.onPlaylistShiftChanged = { [weak self] seconds in
584+
Task { @MainActor in
585+
guard let self = self else { return }
586+
self.playlistShiftSeconds = seconds
587+
self.sourceTime = self.currentTime + seconds
588+
}
589+
}
573590
let playbackURL = try session.start()
574591
self.nativeVideoSession = session
575-
self.playlistOriginOffsetSeconds = session.firstKeyframeSeconds
576592

577593
let host = NativeAVPlayerHost()
578594
host.playerLayer.videoGravity = _videoGravity
@@ -591,7 +607,11 @@ public final class AetherEngine: ObservableObject {
591607

592608
nativeCancellables.removeAll()
593609
host.$currentTime
594-
.sink { [weak self] value in self?.currentTime = value }
610+
.sink { [weak self] value in
611+
guard let self = self else { return }
612+
self.currentTime = value
613+
self.sourceTime = value + self.playlistShiftSeconds
614+
}
595615
.store(in: &nativeCancellables)
596616
host.$duration
597617
.sink { [weak self] value in
@@ -640,13 +660,17 @@ public final class AetherEngine: ObservableObject {
640660
Task { @MainActor in self?.handleHDR10PlusDetected() }
641661
}
642662
self.softwareHost = host
643-
// SW path's currentTime already tracks source PTS directly,
644-
// so subtitle cues need no shift to land in the player clock.
645-
self.playlistOriginOffsetSeconds = 0
663+
// SW path's currentTime tracks source PTS directly, so the
664+
// AVPlayer-clock shift is 0 and sourceTime mirrors currentTime.
665+
self.playlistShiftSeconds = 0
646666

647667
softwareCancellables.removeAll()
648668
host.$currentTime
649-
.sink { [weak self] value in self?.currentTime = value }
669+
.sink { [weak self] value in
670+
guard let self = self else { return }
671+
self.currentTime = value
672+
self.sourceTime = value
673+
}
650674
.store(in: &softwareCancellables)
651675
host.$duration
652676
.sink { [weak self] value in
@@ -739,7 +763,8 @@ public final class AetherEngine: ObservableObject {
739763
let streamIdx = activeEmbeddedSubtitleStreamIndex
740764
embeddedSubtitleTask?.cancel()
741765
subtitleCues = []
742-
startEmbeddedSubtitleTask(url: url, streamIndex: streamIdx, startAt: target)
766+
// Side-demuxer seeks in source PTS, not AVPlayer clock.
767+
startEmbeddedSubtitleTask(url: url, streamIndex: streamIdx, startAt: target + playlistShiftSeconds)
743768
}
744769

745770
// AVPlayer surfaces post-seek readiness via its own KVO; the
@@ -974,7 +999,16 @@ public final class AetherEngine: ObservableObject {
974999
isLoadingSubtitles = true
9751000
activeEmbeddedSubtitleStreamIndex = Int32(index)
9761001

977-
startEmbeddedSubtitleTask(url: url, streamIndex: Int32(index), startAt: currentTime)
1002+
// Side-demuxer seeks in source PTS, not AVPlayer clock. On the
1003+
// native HLS path AVPlayer's currentTime sits at
1004+
// `source_pts - playlistShiftSeconds`, so we add the shift back
1005+
// before handing it to the side demuxer. Without this, the
1006+
// demuxer seek lands `playlistShiftSeconds` before the actual
1007+
// source playhead and the first emitted cue (typically a long-
1008+
// tail past cue) is followed by a gap that reads as "subs are
1009+
// 3-5 s late" — repro on Cars at a restart-driven shift of
1010+
// ~3.92 s.
1011+
startEmbeddedSubtitleTask(url: url, streamIndex: Int32(index), startAt: sourceTime)
9781012
}
9791013

9801014
/// Spin up the side-demuxer Task that streams cues into the
@@ -1148,27 +1182,24 @@ public final class AetherEngine: ObservableObject {
11481182
)
11491183
}
11501184

1151-
// Shift source-PTS-seconds into the active player's clock frame.
1152-
// 0 on the SW path; firstKeyframeSeconds on the native path so
1153-
// cues match AVPlayer.currentTime instead of trailing it by the
1154-
// first-keyframe offset (~500 ms on most MKV BD remuxes).
1155-
let originOffset = playlistOriginOffsetSeconds
1185+
// Cues stay in source PTS; the AVPlayer-clock translation is
1186+
// applied at the lookup boundary (host renders against
1187+
// `engine.sourceTime`, side-demuxer seeks against the same).
11561188

11571189
// PGS clear-event trim: each PGS event implicitly terminates
11581190
// whatever was on screen. Truncate any image cue whose
11591191
// interval straddles the new event's start so it disappears
11601192
// at the right moment instead of staying up for the
11611193
// UINT32_MAX (~50-day) default the decoder hands us.
11621194
if let trimAt = event.pgsTrimAt {
1163-
let shiftedTrim = trimAt - originOffset
11641195
for i in 0..<subtitleCues.count {
11651196
guard case .image = subtitleCues[i].body else { continue }
11661197
let cue = subtitleCues[i]
1167-
if cue.startTime < shiftedTrim && cue.endTime > shiftedTrim {
1198+
if cue.startTime < trimAt && cue.endTime > trimAt {
11681199
subtitleCues[i] = SubtitleCue(
11691200
id: cue.id,
11701201
startTime: cue.startTime,
1171-
endTime: shiftedTrim,
1202+
endTime: trimAt,
11721203
body: cue.body
11731204
)
11741205
}
@@ -1180,22 +1211,16 @@ public final class AetherEngine: ObservableObject {
11801211
// in sorted position so the overlay's lookup (binary search
11811212
// then walk for overlapping cues) stays correct.
11821213
for cue in event.cues {
1183-
let shifted = SubtitleCue(
1184-
id: cue.id,
1185-
startTime: cue.startTime - originOffset,
1186-
endTime: cue.endTime - originOffset,
1187-
body: cue.body
1188-
)
11891214
var lo = 0, hi = subtitleCues.count
11901215
while lo < hi {
11911216
let mid = (lo + hi) / 2
1192-
if subtitleCues[mid].startTime < shifted.startTime {
1217+
if subtitleCues[mid].startTime < cue.startTime {
11931218
lo = mid + 1
11941219
} else {
11951220
hi = mid
11961221
}
11971222
}
1198-
subtitleCues.insert(shifted, at: lo)
1223+
subtitleCues.insert(cue, at: lo)
11991224
}
12001225
}
12011226

@@ -1236,23 +1261,10 @@ public final class AetherEngine: ObservableObject {
12361261
await MainActor.run {
12371262
guard let self = self else { return }
12381263
guard self.isSubtitleActive else { return }
1239-
// Shift sidecar cues into the active player's clock frame.
1240-
// Sidecar SRT/ASS/VTT timestamps are source-PTS-seconds
1241-
// (Jellyfin's subtitle extraction preserves source PTS);
1242-
// on the native path AVPlayer.currentTime sits at
1243-
// source - firstKeyframeSeconds, so cues without the
1244-
// shift trail playback by that amount.
1245-
let offset = self.playlistOriginOffsetSeconds
1246-
self.subtitleCues = offset == 0
1247-
? cues
1248-
: cues.map { cue in
1249-
SubtitleCue(
1250-
id: cue.id,
1251-
startTime: cue.startTime - offset,
1252-
endTime: cue.endTime - offset,
1253-
body: cue.body
1254-
)
1255-
}
1264+
// Sidecar cues stay in source PTS; host renders
1265+
// against `engine.sourceTime`, which already adds the
1266+
// active producer's playlist shift to AVPlayer's clock.
1267+
self.subtitleCues = cues
12561268
self.isLoadingSubtitles = false
12571269
}
12581270
}
@@ -1302,7 +1314,8 @@ public final class AetherEngine: ObservableObject {
13021314

13031315
displayCriteria.reset()
13041316
playbackBackend = .none
1305-
playlistOriginOffsetSeconds = 0
1317+
playlistShiftSeconds = 0
1318+
sourceTime = 0
13061319

13071320
cancelSidecarTask()
13081321
embeddedSubtitleTask?.cancel()

Sources/AetherEngine/Video/HLSSegmentProducer.swift

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,16 @@ final class HLSSegmentProducer: @unchecked Sendable {
262262
/// forwards to AetherEngine for the `videoFormat` upgrade.
263263
var onFirstHDR10PlusDetected: (@Sendable () -> Void)?
264264

265+
/// Fires once when the video gate opens, with the producer's
266+
/// videoShiftPts in source video time base units. Lets the engine
267+
/// translate AVPlayer's playlist clock back to source PTS for the
268+
/// independent side-demuxer subtitle reader (subtitle cues land in
269+
/// raw source PTS but AVPlayer.currentTime sits at
270+
/// `source_pts - videoShiftPts`). Re-fires on every producer
271+
/// restart since matroska seek imprecision can produce a different
272+
/// shift for the same source.
273+
var onVideoShiftKnown: (@Sendable (Int64) -> Void)?
274+
265275
/// Latched once the signature has been seen in this producer's
266276
/// packet stream so the scan goes silent for the remainder of the
267277
/// session. The byte scan is cheap (~µs per packet) but there's no
@@ -639,6 +649,7 @@ final class HLSSegmentProducer: @unchecked Sendable {
639649
+ "shift=\(videoShiftPts)",
640650
category: .session
641651
)
652+
onVideoShiftKnown?(videoShiftPts)
642653
} else {
643654
// Drop pre-keyframe leading B-frames (HEVC RASL).
644655
// An open-GOP source can emit B-frames whose

Sources/AetherEngine/Video/HLSVideoEngine.swift

Lines changed: 43 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -217,13 +217,32 @@ public final class HLSVideoEngine: @unchecked Sendable {
217217
private var firstKeyframePts: Int64 = 0
218218

219219
/// `firstKeyframePts` converted to seconds using the source video
220-
/// time base. Exposed so the AetherEngine subtitle path can shift
221-
/// embedded + sidecar cue timestamps into AVPlayer's clock frame,
222-
/// which is `source_pts - firstKeyframeSeconds`. Without this
223-
/// shift, sources whose first keyframe sits past PTS=0 (most MKV
224-
/// remuxes, e.g. Cars at ~500 ms) render subtitles a constant
225-
/// `firstKeyframeSeconds` late.
220+
/// time base. Retained for diagnostics; the actual AVPlayer-clock
221+
/// to source-PTS translation lives in `playlistShiftSeconds` below,
222+
/// which the producer updates dynamically on each gate open (the
223+
/// shift can differ from `firstKeyframeSeconds` on restart sessions
224+
/// when matroska seek imprecision lands past the planned target).
226225
public private(set) var firstKeyframeSeconds: Double = 0
226+
227+
/// `videoShiftPts` of the currently active producer, converted to
228+
/// seconds via the source video time base. Updated by the producer's
229+
/// `onVideoShiftKnown` callback on every gate open. AVPlayer's HLS
230+
/// clock sits at `source_pts - playlistShiftSeconds`; the subtitle
231+
/// path and side-demuxer seek read this to translate back to
232+
/// source time.
233+
public private(set) var playlistShiftSeconds: Double = 0
234+
235+
/// Source video time base, latched in `start()` so the
236+
/// `onVideoShiftKnown` callback can convert producer PTS shift to
237+
/// seconds without having to thread the TB through the callback
238+
/// signature on every fire.
239+
private var sourceVideoTbSeconds: Double = 1.0 / 1000.0
240+
241+
/// Fires when the active producer's `playlistShiftSeconds` changes
242+
/// (initial gate open or restart). AetherEngine wires this to keep
243+
/// its own published shift in step so the subtitle overlay's cue
244+
/// lookup uses the right source-time conversion.
245+
var onPlaylistShiftChanged: (@Sendable (Double) -> Void)?
227246
/// Session-long FLAC bridge for codecs that aren't legal in fMP4.
228247
/// Owned by the engine (not the producer) so that producer
229248
/// restarts on scrub don't lose the bridge's encoder state. The
@@ -318,6 +337,9 @@ public final class HLSVideoEngine: @unchecked Sendable {
318337
}
319338

320339
let videoTimeBase = videoStream.pointee.time_base
340+
if videoTimeBase.num > 0, videoTimeBase.den > 0 {
341+
sourceVideoTbSeconds = Double(videoTimeBase.num) / Double(videoTimeBase.den)
342+
}
321343
let durationSeconds = dem.duration
322344
guard durationSeconds > 0 else {
323345
throw HLSVideoEngineError.zeroDuration
@@ -938,9 +960,24 @@ public final class HLSVideoEngine: @unchecked Sendable {
938960
prod.onFirstHDR10PlusDetected = { [weak self] in
939961
self?.notifyHDR10PlusOnce()
940962
}
963+
prod.onVideoShiftKnown = { [weak self] shiftPts in
964+
self?.handleVideoShiftKnown(shiftPts)
965+
}
941966
return prod
942967
}
943968

969+
/// Converts the producer's `videoShiftPts` (in source video TB)
970+
/// to seconds and notifies the engine + AetherEngine that the
971+
/// AVPlayer-clock-to-source-PTS translation may have changed.
972+
/// Fires on initial start (shift ≈ firstKeyframeSeconds) and on
973+
/// every restart (shift can be larger when matroska seek
974+
/// imprecision lands past the planned target).
975+
private func handleVideoShiftKnown(_ shiftPts: Int64) {
976+
let seconds = shiftPts == Int64.min ? 0 : Double(shiftPts) * sourceVideoTbSeconds
977+
playlistShiftSeconds = seconds
978+
onPlaylistShiftChanged?(seconds)
979+
}
980+
944981
/// Debounced relay. Producers each have their own once-per-instance
945982
/// scan latch; this guards against re-firing after a scrub restart
946983
/// (which builds a fresh producer that re-scans from packet zero).

0 commit comments

Comments
 (0)