Skip to content

Commit fed936b

Browse files
Merge pull request #3198 from Tolriq:mp3_gapless
PiperOrigin-RevId: 933663429
2 parents 6d63e6c + e284a79 commit fed936b

55 files changed

Lines changed: 1337 additions & 978 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

RELEASENOTES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,8 @@
120120
`FLAG_READ_MFRA_FOR_SEEK_MAP` to the `FragmentedMp4Extractor`, which is
121121
now done by default in `DefaultExtractorsFactory`
122122
([#3088](https://github.com/androidx/media/issues/3088)).
123+
* MP3: Use gapless-aware durations from Xing/Info headers
124+
([#3183](https://github.com/androidx/media/issues/3183)).
123125
* Ignore `av1C` data with unsupported version.
124126
* MP4: Add support for big-endian floating point PCM in `fpcm` boxes.
125127
* Matroska: Parse chapter info to `Chapter` entries in a track's

libraries/common/src/main/java/androidx/media3/common/util/Util.java

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2233,7 +2233,21 @@ public static byte[] getBytesFromHexString(String hexString) {
22332233
*/
22342234
@UnstableApi
22352235
public static String toHexString(byte[] bytes) {
2236-
return BaseEncoding.base16().lowerCase().encode(bytes);
2236+
return toHexString(bytes, 0, bytes.length);
2237+
}
2238+
2239+
/**
2240+
* Returns a string containing a lower-case hex representation of the bytes provided.
2241+
*
2242+
* @param bytes The byte data to convert to hex.
2243+
* @param offset The offset into data to read from.
2244+
* @param length The number of bytes to read from data.
2245+
* @return A String containing the hex representation of {@code bytes} (considering {@code offset}
2246+
* and {@code length}).
2247+
*/
2248+
@UnstableApi
2249+
public static String toHexString(byte[] bytes, int offset, int length) {
2250+
return BaseEncoding.base16().lowerCase().encode(bytes, offset, length);
22372251
}
22382252

22392253
@UnstableApi

libraries/common/src/test/java/androidx/media3/common/util/UtilTest.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,6 +1066,13 @@ public void toHexString_returnsHexString() {
10661066
assertThat(Util.toHexString(bytes)).isEqualTo("12fc06");
10671067
}
10681068

1069+
@Test
1070+
public void toHexString_withOffsetAndLength_returnsHexString() {
1071+
byte[] bytes = createByteArray(0x12, 0xFC, 0x06, 0x2B);
1072+
1073+
assertThat(Util.toHexString(bytes, /* offset= */ 1, /* length= */ 2)).isEqualTo("fc06");
1074+
}
1075+
10691076
@Test
10701077
public void getCodecsOfType_withNull_returnsNull() {
10711078
assertThat(getCodecsOfType(null, C.TRACK_TYPE_VIDEO)).isNull();

libraries/extractor/src/main/java/androidx/media3/extractor/ConstantBitrateSeekMap.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,32 @@ public long getTimeUsAtPosition(long position) {
166166
return getTimeUsAtPosition(position, firstFrameBytePosition, bitrate);
167167
}
168168

169+
/** Returns the byte position of the first frame in the stream (as passed to the constructor). */
170+
protected final long getFirstFramePosition() {
171+
return firstFrameBytePosition;
172+
}
173+
174+
/**
175+
* Returns the size of each frame in the stream in bytes, or {@code 1} if {@link C#LENGTH_UNSET}
176+
* was passed to the constructor.
177+
*/
178+
protected final int getFrameSize() {
179+
return frameSize;
180+
}
181+
182+
/** Returns the bitrate of the stream (as passed to the constructor). */
183+
protected final int getBitrate() {
184+
return bitrate;
185+
}
186+
187+
/**
188+
* Returns whether seeking is permitted if the stream length is unknown (as passed to the
189+
* constructor).
190+
*/
191+
protected final boolean shouldAllowSeeksIfLengthUnknown() {
192+
return allowSeeksIfLengthUnknown;
193+
}
194+
169195
// Internal methods
170196

171197
/**

libraries/extractor/src/main/java/androidx/media3/extractor/mp3/ConstantBitrateSeeker.java

Lines changed: 45 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,24 @@
1818
import androidx.media3.common.C;
1919
import androidx.media3.extractor.ConstantBitrateSeekMap;
2020
import androidx.media3.extractor.MpegAudioUtil;
21+
import androidx.media3.extractor.SeekMap.SeekPoints;
22+
import androidx.media3.extractor.SeekPoint;
2123

2224
/**
2325
* MP3 seeker that doesn't rely on metadata and seeks assuming the source has a constant bitrate.
2426
*/
2527
/* package */ final class ConstantBitrateSeeker extends ConstantBitrateSeekMap implements Seeker {
2628

27-
private final long firstFramePosition;
28-
private final int bitrate;
29-
private final int frameSize;
30-
private final boolean allowSeeksIfLengthUnknown;
29+
private final long durationUs;
3130
private final long dataEndPosition;
3231

3332
/**
3433
* Constructs an instance.
3534
*
35+
* <p>The duration exposed from {@link #getDurationUs()} is computed from {@code inputLength} and
36+
* the bitrate of {@code mpegAudioHeader}, or is {@link C#TIME_UNSET} if {@code inputLength} is
37+
* unknown.
38+
*
3639
* @param inputLength The length of the stream in bytes, or {@link C#LENGTH_UNSET} if unknown.
3740
* @param firstFramePosition The position of the first frame in the stream.
3841
* @param mpegAudioHeader The MPEG audio header associated with the first frame.
@@ -53,23 +56,30 @@ public ConstantBitrateSeeker(
5356
mpegAudioHeader.bitrate,
5457
mpegAudioHeader.frameSize,
5558
allowSeeksIfLengthUnknown,
56-
/* isEstimated= */ true);
59+
/* durationUs= */ C.TIME_UNSET);
5760
}
5861

59-
/** See {@link ConstantBitrateSeekMap#ConstantBitrateSeekMap(long, long, int, int, boolean)}. */
62+
/**
63+
* See {@link ConstantBitrateSeekMap#ConstantBitrateSeekMap(long, long, int, int, boolean)}. Uses
64+
* {@code durationUs} as the duration exposed from {@link #getDurationUs()}, or computes the
65+
* duration from {@code inputLength} and {@code bitrate} if {@code durationUs} is {@link
66+
* C#TIME_UNSET}.
67+
*/
6068
public ConstantBitrateSeeker(
6169
long inputLength,
6270
long firstFramePosition,
6371
int bitrate,
6472
int frameSize,
65-
boolean allowSeeksIfLengthUnknown) {
73+
boolean allowSeeksIfLengthUnknown,
74+
long durationUs) {
6675
this(
6776
inputLength,
6877
firstFramePosition,
6978
bitrate,
7079
frameSize,
7180
allowSeeksIfLengthUnknown,
72-
/* isEstimated= */ true);
81+
/* isEstimated= */ true,
82+
durationUs);
7383
}
7484

7585
private ConstantBitrateSeeker(
@@ -78,18 +88,16 @@ private ConstantBitrateSeeker(
7888
int bitrate,
7989
int frameSize,
8090
boolean allowSeeksIfLengthUnknown,
81-
boolean isEstimated) {
91+
boolean isEstimated,
92+
long durationUs) {
8293
super(
8394
inputLength,
8495
firstFramePosition,
8596
bitrate,
8697
frameSize,
8798
allowSeeksIfLengthUnknown,
8899
isEstimated);
89-
this.firstFramePosition = firstFramePosition;
90-
this.bitrate = bitrate;
91-
this.frameSize = frameSize;
92-
this.allowSeeksIfLengthUnknown = allowSeeksIfLengthUnknown;
100+
this.durationUs = durationUs;
93101
dataEndPosition = inputLength != C.LENGTH_UNSET ? inputLength : C.INDEX_UNSET;
94102
}
95103

@@ -98,28 +106,45 @@ public long getTimeUs(long position) {
98106
return getTimeUsAtPosition(position);
99107
}
100108

109+
@Override
110+
public SeekPoints getSeekPoints(long timeUs) {
111+
if (durationUs != C.TIME_UNSET && timeUs >= durationUs && dataEndPosition != C.INDEX_UNSET) {
112+
long finalFramePosition = Math.max(getFirstFramePosition(), dataEndPosition - getFrameSize());
113+
long frameDurationUs = getTimeUsAtPosition(getFirstFramePosition() + getFrameSize());
114+
return new SeekPoints(
115+
new SeekPoint(Math.max(0, durationUs - frameDurationUs), finalFramePosition));
116+
}
117+
return super.getSeekPoints(timeUs);
118+
}
119+
101120
@Override
102121
public long getDataStartPosition() {
103-
return firstFramePosition;
122+
return getFirstFramePosition();
104123
}
105124

106125
@Override
107126
public long getDataEndPosition() {
108127
return dataEndPosition;
109128
}
110129

130+
@Override
131+
public long getDurationUs() {
132+
return durationUs != C.TIME_UNSET ? durationUs : super.getDurationUs();
133+
}
134+
111135
@Override
112136
public int getAverageBitrate() {
113-
return bitrate;
137+
return getBitrate();
114138
}
115139

116140
public ConstantBitrateSeeker copyWithNewDataEndPosition(long dataEndPosition) {
117141
return new ConstantBitrateSeeker(
118142
/* inputLength= */ dataEndPosition,
119-
firstFramePosition,
120-
bitrate,
121-
frameSize,
122-
allowSeeksIfLengthUnknown,
123-
/* isEstimated= */ false);
143+
getFirstFramePosition(),
144+
getAverageBitrate(),
145+
getFrameSize(),
146+
shouldAllowSeeksIfLengthUnknown(),
147+
/* isEstimated= */ false,
148+
durationUs);
124149
}
125150
}

libraries/extractor/src/main/java/androidx/media3/extractor/mp3/Mp3Extractor.java

Lines changed: 43 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
*/
1616
package androidx.media3.extractor.mp3;
1717

18+
import static androidx.media3.extractor.mp3.Mp3Util.computeAverageBitrate;
1819
import static com.google.common.base.Preconditions.checkNotNull;
1920
import static java.lang.annotation.ElementType.TYPE_USE;
2021
import static java.lang.annotation.RetentionPolicy.SOURCE;
@@ -265,7 +266,10 @@ public int read(ExtractorInput input, PositionHolder seekPosition) throws IOExce
265266
if (readResult == RESULT_END_OF_INPUT && seeker instanceof IndexSeeker) {
266267
// Duration is exact when index seeker is used.
267268
long finalSampleIndex = samplesRead - 1;
268-
long durationUs = finalSampleIndex >= 0 ? computeTimeUs(finalSampleIndex) : C.TIME_UNSET;
269+
long durationUs =
270+
finalSampleIndex >= 0
271+
? computeFinalIndexSeekerDurationUs(finalSampleIndex)
272+
: C.TIME_UNSET;
269273
if (seeker.getDurationUs() != durationUs) {
270274
((IndexSeeker) seeker).setDurationUs(durationUs);
271275
extractorOutput.seekMap(seeker);
@@ -391,6 +395,21 @@ private long computeTimeUs(long samplesRead) {
391395
return basisTimeUs + samplesRead * C.MICROS_PER_SECOND / synchronizedHeader.sampleRate;
392396
}
393397

398+
/**
399+
* Returns the final duration to expose for an {@link IndexSeeker}.
400+
*
401+
* <p>Index seeking finalizes duration from the encoded samples read at EOF. When gapless metadata
402+
* is present, this trims the encoder delay and padding so EOF finalization does not replace an
403+
* initially gapless Xing/Info duration with the longer encoded duration.
404+
*/
405+
private long computeFinalIndexSeekerDurationUs(long finalSampleIndex) {
406+
long finalGaplessSampleIndex =
407+
gaplessInfoHolder.hasGaplessInfo()
408+
? finalSampleIndex - gaplessInfoHolder.encoderDelay - gaplessInfoHolder.encoderPadding
409+
: finalSampleIndex;
410+
return finalGaplessSampleIndex >= 0 ? computeTimeUs(finalGaplessSampleIndex) : C.TIME_UNSET;
411+
}
412+
394413
private boolean synchronize(ExtractorInput input, boolean sniffing) throws IOException {
395414
int validFrameCount = 0;
396415
int candidateSynchronizedHeaderData = 0;
@@ -522,37 +541,37 @@ private Seeker computeSeeker(ExtractorInput input) throws IOException {
522541
return resultSeeker;
523542
}
524543

544+
long durationUs = resultSeeker.getDurationUs();
525545
long inputLength =
526546
resultSeeker.getDataEndPosition() != C.INDEX_UNSET
527547
? resultSeeker.getDataEndPosition()
528548
: input.getLength();
529-
if (resultSeeker.getDurationUs() == C.TIME_UNSET || inputLength == C.LENGTH_UNSET) {
549+
if (durationUs == C.TIME_UNSET || inputLength == C.LENGTH_UNSET) {
530550
// resultSeeker doesn't provide enough info to do 'enhanced' CBR seeking, so we just do
531551
// normal CBR seeking without any additional info from the file.
532552
return getConstantBitrateSeeker(input);
533553
}
534-
// resultSeeker provides a duration and we know the input length, and CBR seeking has been
535-
// requested, so we can do 'enhanced' CBR seeking using this info.
536554
long dataStart =
537555
resultSeeker.getDataStartPosition() != C.INDEX_UNSET
538556
? resultSeeker.getDataStartPosition()
539557
: 0;
540-
long audioLength = inputLength - dataStart;
541-
int bitrate =
542-
Ints.saturatedCast(
543-
Util.scaleLargeValue(
544-
audioLength,
545-
Byte.SIZE * C.MICROS_PER_SECOND,
546-
resultSeeker.getDurationUs(),
547-
RoundingMode.HALF_UP));
548-
// inputLength will never be LENGTH_UNSET because of the if-condition above, so we can
558+
int averageBitrate = computeAverageBitrate(inputLength - dataStart, durationUs);
559+
if (averageBitrate == C.RATE_UNSET_INT) {
560+
// Bitrate couldn't be determined (dataStart >= inputLength?) so we just do normal CBR seeking
561+
return getConstantBitrateSeeker(input);
562+
}
563+
564+
// resultSeeker provides a duration and we know the input length, and CBR seeking has been
565+
// requested, so we can do 'enhanced' CBR seeking using this info. inputLength will never be
566+
// LENGTH_UNSET because of the if-condition above, so we can
549567
// pass (vacuously) false here for allowSeeksIfLengthUnknown.
550568
return new ConstantBitrateSeeker(
551569
inputLength,
552570
dataStart,
553-
bitrate,
571+
averageBitrate,
554572
/* frameSize= */ C.LENGTH_UNSET,
555-
/* allowSeeksIfLengthUnknown= */ false);
573+
/* allowSeeksIfLengthUnknown= */ false,
574+
durationUs);
556575
}
557576

558577
private boolean shouldFallbackToConstantBitrateSeeking(Seeker seeker) {
@@ -663,15 +682,13 @@ private Seeker getConstantBitrateSeeker(
663682

664683
// Derive the bitrate and frame size by averaging over the length of playable audio, to allow
665684
// for 'mostly' CBR streams that might have a small number of frames with a different bitrate.
666-
// We can assume infoFrame.frameCount is set, because otherwise computeDurationUs() would
667-
// have returned C.TIME_UNSET above. See also https://github.com/androidx/media/issues/1376.
668-
int averageBitrate =
669-
Ints.checkedCast(
670-
Util.scaleLargeValue(
671-
audioLength,
672-
C.BITS_PER_BYTE * C.MICROS_PER_SECOND,
673-
durationUs,
674-
RoundingMode.HALF_UP));
685+
// See also https://github.com/androidx/media/issues/1376.
686+
int averageBitrate = computeAverageBitrate(audioLength, durationUs);
687+
if (averageBitrate == C.RATE_UNSET_INT) {
688+
// Invalid Info sizes or durations should fall back to the next frame header bitrate rather
689+
// than constructing a ConstantBitrateSeeker with an unset bitrate.
690+
return null;
691+
}
675692
int frameSize =
676693
Ints.checkedCast(LongMath.divide(audioLength, infoFrame.frameCount, RoundingMode.HALF_UP));
677694
// Set the seeker frame size to the average frame size (even though some constant bitrate
@@ -682,7 +699,8 @@ private Seeker getConstantBitrateSeeker(
682699
/* firstFramePosition= */ infoFramePosition + infoFrame.header.frameSize,
683700
averageBitrate,
684701
frameSize,
685-
/* allowSeeksIfLengthUnknown= */ false);
702+
/* allowSeeksIfLengthUnknown= */ false,
703+
durationUs);
686704
}
687705

688706
/**

libraries/extractor/src/main/java/androidx/media3/extractor/mp3/XingFrame.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -141,18 +141,24 @@ public static XingFrame parse(MpegAudioUtil.Header mpegAudioHeader, ParsableByte
141141

142142
/**
143143
* Compute the stream duration, in microseconds, represented by this frame. Returns {@link
144-
* C#LENGTH_UNSET} if the frame doesn't contain enough information to compute a duration.
144+
* C#TIME_UNSET} if the frame doesn't contain enough information to compute a duration. Encoder
145+
* delay and padding are subtracted if present.
145146
*/
146-
// TODO: b/319235116 - Handle encoder delay and padding when calculating duration.
147147
public long computeDurationUs() {
148148
if (frameCount == C.LENGTH_UNSET || frameCount == 0) {
149149
// If the frame count is missing/invalid, the header can't be used to determine the duration.
150150
return C.TIME_UNSET;
151151
}
152+
long sampleCount = frameCount * header.samplesPerFrame;
153+
if (encoderDelay != C.LENGTH_UNSET && encoderPadding != C.LENGTH_UNSET) {
154+
sampleCount -= encoderDelay + encoderPadding;
155+
}
156+
if (sampleCount <= 0) {
157+
return C.TIME_UNSET;
158+
}
152159
// Audio requires both a start and end PCM sample, so subtract one from the sample count before
153160
// calculating the duration.
154-
return Util.sampleCountToDurationUs(
155-
(frameCount * header.samplesPerFrame) - 1, header.sampleRate);
161+
return Util.sampleCountToDurationUs(sampleCount - 1, header.sampleRate);
156162
}
157163

158164
/** Provide the metadata derived from this Xing frame, such as ReplayGain data. */

0 commit comments

Comments
 (0)