Skip to content

Commit e3642e6

Browse files
committed
fix: normalize mp4a wave esds in init segment for MSE compatibility
1 parent 5304005 commit e3642e6

2 files changed

Lines changed: 128 additions & 39 deletions

File tree

src/isofile.ts

Lines changed: 85 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ import { mehdBox } from '#/boxes/mehd';
3030
import { metaBox } from '#/boxes/meta';
3131
import { mfhdBox } from '#/boxes/mfhd';
3232
import { mvhdBox } from '#/boxes/mvhd';
33-
import { stppSampleEntry } from '#/boxes/sampleentries';
33+
import { mp4aSampleEntry, stppSampleEntry } from '#/boxes/sampleentries';
3434
import {
3535
AudioSampleEntry,
3636
HintSampleEntry,
@@ -515,7 +515,7 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {
515515
this.updateSampleLists();
516516

517517
/* If the application needs to be informed that the 'moov' has been found,
518-
we create the information object and callback the application */
518+
we create the information object and callback the application */
519519
if (this.onReady && !this.readySent) {
520520
this.readySent = true;
521521
this.onReady(this.getInfo());
@@ -537,7 +537,7 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {
537537
} else {
538538
if (this.nextParsePosition) {
539539
/* moov has not been parsed but the first buffer was received,
540-
the next fetch should probably be the next box start */
540+
the next fetch should probably be the next box start */
541541
nextFileStart = this.nextParsePosition;
542542
} else {
543543
/* No valid buffer has been parsed yet, we cannot know what to parse next */
@@ -754,8 +754,8 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {
754754
if (!this.sampleProcessingStarted) return;
755755

756756
/* For each track marked for fragmentation,
757-
check if the next sample is there (i.e. if the sample information is known (i.e. moof has arrived) and if it has been downloaded)
758-
and create a fragment with it */
757+
check if the next sample is there (i.e. if the sample information is known (i.e. moof has arrived) and if it has been downloaded)
758+
and create a fragment with it */
759759
if (this.isFragmentationInitialized && this.onSegment !== undefined) {
760760
const consumedTracks = new Set<number>();
761761
while (
@@ -859,8 +859,8 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {
859859
}
860860

861861
/* A fragment is created by a collection of samples, but the segment is the accumulation in the
862-
buffer of these fragments. It is flushed only as requested by the application (nb_samples)
863-
to avoid too many callbacks */
862+
buffer of these fragments. It is flushed only as requested by the application (nb_samples)
863+
to avoid too many callbacks */
864864
if (isSegmentBoundary || isSizeBoundary || isFlush) {
865865
if (isSegmentOverdue) {
866866
Log.warn(
@@ -914,7 +914,7 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {
914914

915915
if (this.onSamples !== undefined) {
916916
/* For each track marked for data export,
917-
check if the next sample is there (i.e. has been downloaded) and send it */
917+
check if the next sample is there (i.e. has been downloaded) and send it */
918918
for (let i = 0; i < this.extractedTracks.length; i++) {
919919
const extractTrak = this.extractedTracks[i];
920920
const trak = extractTrak.trak;
@@ -1038,7 +1038,7 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {
10381038
}
10391039

10401040
/* Finds the byte offset for a given time on a given track
1041-
also returns the time of the previous rap */
1041+
also returns the time of the previous rap */
10421042
seekTrack(time: number, useRap: boolean, trak: trakBox) {
10431043
let rap_seek_sample_num = 0;
10441044
let seek_sample_num = 0;
@@ -1140,7 +1140,7 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {
11401140
seek_info = { offset: this.nextParsePosition, time: 0 };
11411141
} else {
11421142
/* check if the seek position is already in some buffer and
1143-
in that case return the end of that buffer (or of the last contiguous buffer) */
1143+
in that case return the end of that buffer (or of the last contiguous buffer) */
11441144
/* TODO: Should wait until append operations are done */
11451145
seek_info.offset = this.stream.getEndFilePositionAfter(seek_info.offset);
11461146
}
@@ -1234,23 +1234,31 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {
12341234
const stream = new DataStream();
12351235
ftyp.write(stream);
12361236

1237-
/* we can now create the new mvex box */
1238-
const mvex = moov.addBox(new mvexBox());
1239-
if (total_duration) {
1240-
const mehd = mvex.addBox(new mehdBox());
1241-
mehd.fragment_duration = total_duration;
1242-
}
1237+
const restoreCallbacks = ISOFile.normalizeAudioSampleEntriesForFragmentedInit(moov.traks);
1238+
1239+
try {
1240+
/* we can now create the new mvex box */
1241+
const mvex = moov.addBox(new mvexBox());
1242+
if (total_duration) {
1243+
const mehd = mvex.addBox(new mehdBox());
1244+
mehd.fragment_duration = total_duration;
1245+
}
12431246

1244-
// Add trex boxes for each track
1245-
for (let i = 0; i < moov.traks.length; i++) {
1246-
const trex = mvex.addBox(new trexBox());
1247-
trex.track_id = moov.traks[i].tkhd.track_id;
1248-
trex.default_sample_description_index = 1;
1249-
trex.default_sample_duration = moov.traks[i].samples[0]?.duration ?? 0;
1250-
trex.default_sample_size = 0;
1251-
trex.default_sample_flags = 1 << 16;
1247+
// Add trex boxes for each track
1248+
for (let i = 0; i < moov.traks.length; i++) {
1249+
const trex = mvex.addBox(new trexBox());
1250+
trex.track_id = moov.traks[i].tkhd.track_id;
1251+
trex.default_sample_description_index = 1;
1252+
trex.default_sample_duration = moov.traks[i].samples[0]?.duration ?? 0;
1253+
trex.default_sample_size = 0;
1254+
trex.default_sample_flags = 1 << 16;
1255+
}
1256+
moov.write(stream);
1257+
} finally {
1258+
for (let i = restoreCallbacks.length - 1; i >= 0; i--) {
1259+
restoreCallbacks[i]();
1260+
}
12521261
}
1253-
moov.write(stream);
12541262

12551263
return stream.buffer;
12561264
}
@@ -1271,6 +1279,43 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {
12711279
return stream;
12721280
}
12731281

1282+
/** @bundle isofile-write.js */
1283+
private static normalizeAudioSampleEntriesForFragmentedInit(traks: Array<trakBox>) {
1284+
const restoreCallbacks: Array<() => void> = [];
1285+
1286+
for (const trak of traks) {
1287+
const sampleEntries = trak.mdia.minf.stbl.stsd?.entries ?? [];
1288+
const mp4aSampleEntries = sampleEntries.filter(entry => entry instanceof mp4aSampleEntry);
1289+
1290+
for (const mp4aSampleEntry of mp4aSampleEntries) {
1291+
const esds = mp4aSampleEntry.esds ?? mp4aSampleEntry.wave?.esds;
1292+
1293+
if (!esds) {
1294+
continue;
1295+
}
1296+
1297+
const previousEsds = mp4aSampleEntry.esds;
1298+
const previousWave = mp4aSampleEntry.wave;
1299+
const previousBoxes = mp4aSampleEntry.boxes;
1300+
1301+
restoreCallbacks.push(() => {
1302+
mp4aSampleEntry.esds = previousEsds;
1303+
mp4aSampleEntry.wave = previousWave;
1304+
mp4aSampleEntry.boxes = previousBoxes;
1305+
});
1306+
1307+
const boxesWithoutWave = Array.isArray(mp4aSampleEntry.boxes)
1308+
? mp4aSampleEntry.boxes.filter(box => box?.type !== 'wave' && box?.type !== 'esds')
1309+
: [];
1310+
mp4aSampleEntry.esds = esds;
1311+
mp4aSampleEntry.boxes = [...boxesWithoutWave, esds];
1312+
mp4aSampleEntry.wave = undefined;
1313+
}
1314+
}
1315+
1316+
return restoreCallbacks;
1317+
}
1318+
12741319
/** @bundle isofile-write.js */
12751320
initializeSegmentation() {
12761321
if (!this.onSegment) {
@@ -1295,6 +1340,7 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {
12951340
);
12961341
continue;
12971342
}
1343+
12981344
moov.addBox(trak);
12991345
}
13001346

@@ -2409,7 +2455,7 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {
24092455
/* we had enough bytes to get its type and size and it's an 'mdat' */
24102456

24112457
/* special handling for mdat boxes, since we don't actually need to parse it linearly
2412-
we create the box */
2458+
we create the box */
24132459
const box = new mdatBox(ret.size);
24142460
this.parsingMdat = box;
24152461
this.boxes.push(box);
@@ -2435,12 +2481,12 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {
24352481
/* determine the next position in the file to start parsing from */
24362482
if (!this.moovStartFound) {
24372483
/* moov not find yet,
2438-
the file probably has 'mdat' at the beginning, and 'moov' at the end,
2439-
indicate that the downloader should not try to download those bytes now */
2484+
the file probably has 'mdat' at the beginning, and 'moov' at the end,
2485+
indicate that the downloader should not try to download those bytes now */
24402486
this.nextParsePosition = box.start + box.size;
24412487
} else {
24422488
/* we have the start of the moov box,
2443-
the next bytes should try to complete the current 'mdat' */
2489+
the next bytes should try to complete the current 'mdat' */
24442490
this.nextParsePosition = this.stream.findEndContiguousBuf();
24452491
}
24462492
/* not much we can do, wait for more buffers to arrive */
@@ -2456,27 +2502,27 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {
24562502
}
24572503
}
24582504
/* either it's not an mdat box (and we need to parse it, we cannot skip it)
2459-
(TODO: we could skip 'free' boxes ...)
2460-
or we did not have enough data to parse the type and size of the box,
2461-
we try to concatenate the current buffer with the next buffer to restart parsing */
2505+
(TODO: we could skip 'free' boxes ...)
2506+
or we did not have enough data to parse the type and size of the box,
2507+
we try to concatenate the current buffer with the next buffer to restart parsing */
24622508
const merged = this.stream.mergeNextBuffer ? this.stream.mergeNextBuffer() : false;
24632509
if (merged) {
24642510
/* The next buffer was contiguous, the merging succeeded,
2465-
we can now continue parsing,
2466-
the next best position to parse is at the end of this new buffer */
2511+
we can now continue parsing,
2512+
the next best position to parse is at the end of this new buffer */
24672513
this.nextParsePosition = this.stream.getEndPosition();
24682514
return true;
24692515
} else {
24702516
/* we cannot concatenate existing buffers because they are not contiguous or because there is no additional buffer */
24712517
/* The next best position to parse is still at the end of this old buffer */
24722518
if (!ret.type) {
24732519
/* There were not enough bytes in the buffer to parse the box type and length,
2474-
the next fetch should retrieve those missing bytes, i.e. the next bytes after this buffer */
2520+
the next fetch should retrieve those missing bytes, i.e. the next bytes after this buffer */
24752521
this.nextParsePosition = this.stream.getEndPosition();
24762522
} else {
24772523
/* we had enough bytes to parse size and type of the incomplete box
2478-
if we haven't found yet the moov box, skip this one and try the next one
2479-
if we have found the moov box, let's continue linear parsing */
2524+
if we haven't found yet the moov box, skip this one and try the next one
2525+
if we have found the moov box, let's continue linear parsing */
24802526
if (this.moovStartFound) {
24812527
this.nextParsePosition = this.stream.getEndPosition();
24822528
} else {
@@ -2567,8 +2613,8 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {
25672613
return true;
25682614
} else {
25692615
/* we don't have the end of this mdat yet,
2570-
indicate that the next byte to fetch is the end of the buffers we have so far,
2571-
return and wait for more buffer to come */
2616+
indicate that the next byte to fetch is the end of the buffers we have so far,
2617+
return and wait for more buffer to come */
25722618
this.nextParsePosition = this.stream.findEndContiguousBuf();
25732619
return false;
25742620
}

tests/segmentation.test.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import fs from 'fs';
22
import { createFile, MP4BoxBuffer, MultiBufferStream } from '../entries/all';
3+
import { waveBox } from '../src/boxes/qt/wave';
4+
import { mp4aSampleEntry } from '../src/boxes/sampleentries/sampleentry';
35
import { getFilePath, loadAndGetInfo } from './common';
46

57
// Saves the segments to a file
@@ -206,4 +208,45 @@ describe('File Segmentation', () => {
206208
expect(newMP4.getBoxes('moof', false).length).toBe(10);
207209
expect(out.getAbsoluteEndPosition()).toBe(203_175);
208210
});
211+
212+
it('writes a direct esds in init segment and restores source sample entry state', async () => {
213+
const { testFile } = getFilePath('isobmff', '01_simple.mp4');
214+
const { mp4 } = await loadAndGetInfo(testFile, true, true);
215+
216+
const audioTrack = mp4.moov?.traks.find(trak => trak.mdia?.hdlr?.handler === 'soun');
217+
expect(audioTrack).toBeDefined();
218+
if (!audioTrack) {
219+
throw new Error('Missing audio track');
220+
}
221+
const audioTrackId = audioTrack.tkhd.track_id;
222+
const sampleEntry = audioTrack.mdia.minf.stbl.stsd.entries[0] as mp4aSampleEntry;
223+
expect(sampleEntry).toBeInstanceOf(mp4aSampleEntry);
224+
const directEsds = sampleEntry.esds;
225+
expect(directEsds).toBeDefined();
226+
227+
const nestedWave = new waveBox();
228+
nestedWave.esds = directEsds;
229+
const boxesWithoutEsds = (sampleEntry.boxes ?? []).filter(
230+
box => box.type !== 'esds' && box.type !== 'wave',
231+
);
232+
sampleEntry.wave = nestedWave;
233+
sampleEntry.esds = undefined;
234+
sampleEntry.boxes = [...boxesWithoutEsds, nestedWave];
235+
236+
mp4.setSegmentOptions(audioTrackId, undefined, { nbSamples: 50 });
237+
const initSegment = mp4.initializeSegmentation();
238+
239+
const initMp4 = createFile(true);
240+
initMp4.appendBuffer(MP4BoxBuffer.fromArrayBuffer(initSegment.buffer, 0));
241+
initMp4.flush();
242+
243+
const initAudioTrack = initMp4.getTrackById(audioTrackId);
244+
const initSampleEntry = initAudioTrack.mdia.minf.stbl.stsd.entries[0] as mp4aSampleEntry;
245+
246+
expect(initSampleEntry.boxes?.some(box => box.type === 'esds')).toBe(true);
247+
expect(initSampleEntry.boxes?.some(box => box.type === 'wave')).toBe(false);
248+
249+
expect(sampleEntry.boxes?.some(box => box.type === 'wave')).toBe(true);
250+
expect(sampleEntry.boxes?.some(box => box.type === 'esds')).toBe(false);
251+
});
209252
});

0 commit comments

Comments
 (0)