Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/bright-waves-listen.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"mp4box": patch
---

fix: include nested QuickTime wave esds when deriving mp4a codec strings
10 changes: 10 additions & 0 deletions .changeset/quiet-segments-smile.md
Comment thread
DenizUgur marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
"mp4box": patch
---

fix: normalize QuickTime wave esds in fragmented init segments for MSE compatibility

By default, segmentation writes MSE-compatible `mp4a.esds` sample entries when source
QuickTime files store AAC decoder config under `mp4a.wave.esds`. This behavior can be
disabled with `setSegmentOptions(..., { normalizeAudioSampleEntriesForMSE: false })`
to preserve nested QuickTime `wave.esds` sample entries in initialization segments.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ Indicates that the track with the given `track_id` should be segmented, with the

- **nbSamples**: Number, representing the number of frames per segment, i.e. the time between 2 callbacks to onSegment. If not enough data is received to form a segment, received samples are kept. If not provided, the default is 1000.
- **rapAlignement**: boolean, indicating if segments should start with a RAP. If not provided, the default is true.
- **normalizeAudioSampleEntriesForMSE**: boolean, indicating if QuickTime-style audio sample entries should be normalized in initialization segments for Media Source Extensions compatibility. If not provided, the default is true. Set to false to preserve nested QuickTime `wave.esds` sample entries.

```javascript
mp4boxfile.setSegmentOptions(1, sb, { nbSamples: 1000 });
Expand Down
1 change: 1 addition & 0 deletions entries/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ export interface FragmentedTrack<TUser> {
nb_samples_per_fragment: number;
size_per_segment: number;
rapAlignement: boolean;
normalizeAudioSampleEntriesForMSE?: boolean;
state: {
lastFragmentSampleNumber: number;
lastSegmentSampleNumber: number;
Expand Down
2 changes: 2 additions & 0 deletions src/boxes/qt/wave.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { ContainerBox } from '#/containerBox';
import { esdsBox } from '#/boxes/esds';

export class waveBox extends ContainerBox {
static override readonly fourcc = 'wave' as const;
box_name = 'siDecompressionParamBox' as const;
esds: esdsBox;
}
9 changes: 6 additions & 3 deletions src/boxes/sampleentries/sampleentry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { av1CBox } from '#/boxes/av1C';
import { avcCBox } from '#/boxes/avcC';
import { sinfBox } from '#/boxes/defaults';
import { esdsBox } from '#/boxes/esds';
import { waveBox } from '#/boxes/qt/wave';
import { hvcCBox } from '#/boxes/hvcC';
import { lvcCBox } from '#/boxes/lvcC';
import { vpcCBox } from '#/boxes/vpcC';
Expand Down Expand Up @@ -384,12 +385,14 @@ export class mp4aSampleEntry extends AudioSampleEntry {

esds: esdsBox;
esdss: Array<esdsBox>;
wave: waveBox;

getCodec() {
const baseCodec = super.getCodec();
if (this.esds && this.esds.esd) {
const oti = this.esds.esd.getOTI();
const dsi = this.esds.esd.getAudioConfig();
const esds = this.esds ?? this.wave?.esds;
if (esds && esds.esd) {
const oti = esds.esd.getOTI();
const dsi = esds.esd.getAudioConfig();
return baseCodec + '.' + decimalToHex(oti) + (dsi ? '.' + dsi : '');
} else {
return baseCodec;
Expand Down
122 changes: 102 additions & 20 deletions src/isofile.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import { mehdBox } from '#/boxes/mehd';
import { metaBox } from '#/boxes/meta';
import { mfhdBox } from '#/boxes/mfhd';
import { mvhdBox } from '#/boxes/mvhd';
import { stppSampleEntry } from '#/boxes/sampleentries';
import { mp4aSampleEntry, stppSampleEntry } from '#/boxes/sampleentries';
import {
AudioSampleEntry,
HintSampleEntry,
Expand Down Expand Up @@ -245,10 +245,15 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {
nbSamplesPerFragment: number;
sizePerSegment: number;
rapAlignement: boolean;
normalizeAudioSampleEntriesForMSE: boolean;
}>,
) {
// Destructure and provide defaults for optional properties
const { sizePerSegment = Number.MAX_SAFE_INTEGER, rapAlignement = true } = opts;
const {
sizePerSegment = Number.MAX_SAFE_INTEGER,
rapAlignement = true,
normalizeAudioSampleEntriesForMSE = true,
} = opts;

// Set defaults for sample counts
let nbSamples = opts.nbSamples ?? opts.nbSamplesPerFragment ?? 1000;
Expand Down Expand Up @@ -292,6 +297,7 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {
nb_samples_per_fragment: nbSamplesPerFragment,
size_per_segment: sizePerSegment,
rapAlignement,
normalizeAudioSampleEntriesForMSE,
state: {
lastFragmentSampleNumber: 0,
lastSegmentSampleNumber: 0,
Expand Down Expand Up @@ -1261,29 +1267,45 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {
* Modify the file and create the initialization segment
* @bundle isofile-write.js
*/
static writeInitializationSegment(ftyp: ftypBox, moov: moovBox, total_duration: number) {
static writeInitializationSegment(
ftyp: ftypBox,
moov: moovBox,
total_duration: number,
normalizeAudioSampleEntryTrackIds?: Set<number>,
) {
Log.debug('ISOFile', 'Generating initialization segment');

const stream = new DataStream();
ftyp.write(stream);

/* we can now create the new mvex box */
const mvex = moov.addBox(new mvexBox());
if (total_duration) {
const mehd = mvex.addBox(new mehdBox());
mehd.fragment_duration = total_duration;
}
const restoreCallbacks = ISOFile.normalizeAudioSampleEntriesForMSEFragmentedInit(
moov.traks,
normalizeAudioSampleEntryTrackIds,
);

try {
/* we can now create the new mvex box */
const mvex = moov.addBox(new mvexBox());
if (total_duration) {
const mehd = mvex.addBox(new mehdBox());
mehd.fragment_duration = total_duration;
}

// Add trex boxes for each track
for (let i = 0; i < moov.traks.length; i++) {
const trex = mvex.addBox(new trexBox());
trex.track_id = moov.traks[i].tkhd.track_id;
trex.default_sample_description_index = 1;
trex.default_sample_duration = moov.traks[i].samples[0]?.duration ?? 0;
trex.default_sample_size = 0;
trex.default_sample_flags = 1 << 16;
// Add trex boxes for each track
for (let i = 0; i < moov.traks.length; i++) {
const trex = mvex.addBox(new trexBox());
trex.track_id = moov.traks[i].tkhd.track_id;
trex.default_sample_description_index = 1;
trex.default_sample_duration = moov.traks[i].samples[0]?.duration ?? 0;
trex.default_sample_size = 0;
trex.default_sample_flags = 1 << 16;
}
moov.write(stream);
} finally {
for (let i = restoreCallbacks.length - 1; i >= 0; i--) {
restoreCallbacks[i]();
}
}
moov.write(stream);

return stream.buffer;
}
Expand All @@ -1304,6 +1326,51 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {
return stream;
}

/** @bundle isofile-write.js */
private static normalizeAudioSampleEntriesForMSEFragmentedInit(
traks: Array<trakBox>,
normalizeAudioSampleEntryTrackIds?: Set<number>,
) {
const restoreCallbacks: Array<() => void> = [];

for (const trak of traks) {
if (!normalizeAudioSampleEntryTrackIds?.has(trak.tkhd.track_id)) {
continue;
}

for (const sampleEntry of trak.mdia.minf.stbl.stsd?.entries ?? []) {
if (!(sampleEntry instanceof mp4aSampleEntry)) {
continue;
}

const esds = sampleEntry.wave?.esds;

if (sampleEntry.esds || !esds) {
continue;
}

const previousEsds = sampleEntry.esds;
const previousWave = sampleEntry.wave;
const previousBoxes = sampleEntry.boxes;

restoreCallbacks.push(() => {
sampleEntry.esds = previousEsds;
sampleEntry.wave = previousWave;
sampleEntry.boxes = previousBoxes;
});

const boxesWithoutWave = Array.isArray(sampleEntry.boxes)
? sampleEntry.boxes.filter(box => box?.type !== 'wave' && box?.type !== 'esds')
: [];
sampleEntry.esds = esds;
sampleEntry.boxes = [...boxesWithoutWave, esds];
sampleEntry.wave = undefined;
}
}

return restoreCallbacks;
}

/** @bundle isofile-write.js */
initializeSegmentation(mode?: 'combined'): SegmentationInitialization<TSegmentUser>;
initializeSegmentation(
Expand Down Expand Up @@ -1339,6 +1406,11 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {
}

const fragmentDuration = this.moov?.mvex?.mehd.fragment_duration;
const normalizeAudioSampleEntryTrackIds = new Set(
this.fragmentedTracks
.filter(track => track.normalizeAudioSampleEntriesForMSE !== false)
.map(track => track.id),
);

if (mode === 'per-track') {
return tracksToInitialize.map(({ id, user, trak }) => {
Expand All @@ -1349,7 +1421,12 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {
return {
id,
user,
buffer: ISOFile.writeInitializationSegment(this.ftyp, moov, fragmentDuration),
buffer: ISOFile.writeInitializationSegment(
this.ftyp,
moov,
fragmentDuration,
normalizeAudioSampleEntryTrackIds,
),
};
});
}
Expand All @@ -1363,7 +1440,12 @@ export class ISOFile<TSegmentUser = unknown, TSampleUser = unknown> {

return {
tracks: tracksToInitialize.map(({ id, user }) => ({ id, user })),
buffer: ISOFile.writeInitializationSegment(this.ftyp, moov, fragmentDuration),
buffer: ISOFile.writeInitializationSegment(
this.ftyp,
moov,
fragmentDuration,
normalizeAudioSampleEntryTrackIds,
),
};
}

Expand Down
26 changes: 26 additions & 0 deletions tests/codec.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import { mp4aSampleEntry } from '../src/boxes/sampleentries/sampleentry';
import type { esdsBox } from '../src/boxes/esds';
import type { waveBox } from '../src/boxes/qt/wave';

describe('Codec strings', () => {
const makeEsds = () =>
({
esd: {
getOTI: () => 0x40,
getAudioConfig: () => 2,
},
}) as esdsBox;

it('should include AAC object type from nested QuickTime wave esds', () => {
const entry = new mp4aSampleEntry();
entry.wave = { esds: makeEsds() } as waveBox;

expect(entry.getCodec()).toBe('mp4a.40.2');
});

it('should keep bare mp4a when no esds is present', () => {
const entry = new mp4aSampleEntry();

expect(entry.getCodec()).toBe('mp4a');
});
});
Loading