@@ -181,6 +181,12 @@ async function concatStreamsToMP4BoxFile(
181181 let lastVSamp : any = null ;
182182 let lastASamp : any = null ;
183183 for ( const stream of streams ) {
184+ // reset first sample timestamps for each stream to enable normalization
185+ let firstVDTS : number | null = null ;
186+ let firstVCTS : number | null = null ;
187+ let firstADTS : number | null = null ;
188+ let firstACTS : number | null = null ;
189+
184190 await new Promise < void > ( async ( resolve ) => {
185191 autoReadStream ( stream . pipeThrough ( new SampleTransform ( ) ) , {
186192 onDone : resolve ,
@@ -203,10 +209,32 @@ async function concatStreamsToMP4BoxFile(
203209 const offsetCTS = type === 'video' ? vCTS : aCTS ;
204210
205211 samples . forEach ( ( s ) => {
212+ let normalizedDTS : number ;
213+ let normalizedCTS : number ;
214+
215+ if ( type === 'video' ) {
216+ // capture first sample timestamps for normalization
217+ if ( firstVDTS === null ) {
218+ firstVDTS = s . dts ;
219+ firstVCTS = s . cts ;
220+ }
221+ // normalize to start from 0, then add offset
222+ normalizedDTS = s . dts - firstVDTS ;
223+ normalizedCTS = s . cts - ( firstVCTS ?? 0 ) ;
224+ } else {
225+ // same for audio
226+ if ( firstADTS === null ) {
227+ firstADTS = s . dts ;
228+ firstACTS = s . cts ;
229+ }
230+ normalizedDTS = s . dts - firstADTS ;
231+ normalizedCTS = s . cts - ( firstACTS ?? 0 ) ;
232+ }
233+
206234 outfile . addSample ( trackId , s . data , {
207235 duration : s . duration ,
208- dts : s . dts + offsetDTS ,
209- cts : s . cts + offsetCTS ,
236+ dts : normalizedDTS + offsetDTS ,
237+ cts : normalizedCTS + offsetCTS ,
210238 is_sync : s . is_sync ,
211239 } ) ;
212240 } ) ;
@@ -222,13 +250,19 @@ async function concatStreamsToMP4BoxFile(
222250 } ,
223251 } ) ;
224252 } ) ;
225- if ( lastVSamp != null ) {
226- vDTS += lastVSamp . dts ;
227- vCTS += lastVSamp . cts ;
253+ // calculate offsets based on normalized timestamps
254+ if ( lastVSamp != null && firstVDTS !== null && firstVCTS !== null ) {
255+ // duration of this normalized stream
256+ const normalizedVDTS = lastVSamp . dts - firstVDTS + lastVSamp . duration ;
257+ const normalizedVCTS = lastVSamp . cts - firstVCTS + lastVSamp . duration ;
258+ vDTS += normalizedVDTS ;
259+ vCTS += normalizedVCTS ;
228260 }
229- if ( lastASamp != null ) {
230- aDTS += lastASamp . dts ;
231- aCTS += lastASamp . cts ;
261+ // coerce audio timing to match video timing by converting video timescale to audio timescale
262+ if ( lastASamp != null && lastVSamp != null ) {
263+ const videoToAudioRatio = lastASamp . timescale / lastVSamp . timescale ;
264+ aDTS = Math . round ( vDTS * videoToAudioRatio ) ;
265+ aCTS = Math . round ( vCTS * videoToAudioRatio ) ;
232266 }
233267 }
234268}
0 commit comments