Skip to content

Commit 00afb79

Browse files
committed
fix OOM crash on long audio files (#93)
The end-of-loop backfill in WaveformAnalyzer.extract padded the input sample buffer with `(target - leftSamples.count) * samplesPerPixel * 2` bytes of zeros when the read produced fewer samples than the target - trivially gigabytes for multi-hour files when the reader ended early (e.g. after backgrounding). This crashed on memory exhaustion. Pad the output array with silence-equivalent dB values instead, so the allocation is O(targetSampleCount) regardless of audio duration. Skip the pad entirely when the reader didn't reach .completed - the result is discarded in that case anyway. Wrap the per-chunk read body in an autoreleasepool so CMSampleBuffer references drain promptly during long reads instead of pinning gigabytes until the loop exits. Guard the unconditional startReading() call so a non-.unknown reader fails gracefully rather than via uncatchable ObjC exception. Adds a test target with a reproducer that exercises the buggy condition by inflating samplesPerPixel against a short fixture, plus a smoke test and a defensive crash-resilience test.
1 parent 031de3a commit 00afb79

4 files changed

Lines changed: 249 additions & 43 deletions

File tree

Package.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,9 @@ let package = Package(
2828
name: "DSWaveformImageViews",
2929
dependencies: ["DSWaveformImage"]
3030
),
31+
.testTarget(
32+
name: "DSWaveformImageTests",
33+
dependencies: ["DSWaveformImage"]
34+
),
3135
]
3236
)

Sources/DSWaveformImage/WaveformAnalyzer.swift

Lines changed: 58 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ public struct WaveformAnalyzer: Sendable {
9696

9797
// MARK: - Private
9898

99-
fileprivate extension WaveformAnalyzer {
99+
internal extension WaveformAnalyzer {
100100
func waveformSamples(
101101
track audioAssetTrack: AVAssetTrack,
102102
reader assetReader: AVAssetReader,
@@ -147,58 +147,73 @@ fileprivate extension WaveformAnalyzer {
147147
let samplesPerPixel = max(1, totalSamples / targetSampleCount)
148148
let samplesPerFFT = 4096 // ~100ms at 44.1kHz, rounded to closest pow(2) for FFT
149149

150+
// `startReading()` throws an uncatchable ObjC exception if the reader isn't in `.unknown`
151+
// (e.g. already cancelled or failed). Normal callers always pass a fresh reader, but bail
152+
// gracefully if that contract is violated so we surface as `readerError` rather than crash.
153+
guard assetReader.status == .unknown else {
154+
return WaveformAnalysis(amplitudes: [], fft: outputFFT)
155+
}
150156
assetReader.startReading()
151157
while assetReader.status == .reading {
152-
let trackOutput = assetReader.outputs.first!
153-
154-
guard let nextSampleBuffer = trackOutput.copyNextSampleBuffer(),
155-
let blockBuffer = CMSampleBufferGetDataBuffer(nextSampleBuffer) else {
156-
break
157-
}
158+
// CMSampleBuffer is a Core Foundation type that lives in the autorelease pool.
159+
// Without an explicit drain per iteration, long files iterate thousands of times and
160+
// can keep gigabytes of buffer memory pinned until the loop exits.
161+
let continueReading = autoreleasepool { () -> Bool in
162+
let trackOutput = assetReader.outputs.first!
163+
164+
guard let nextSampleBuffer = trackOutput.copyNextSampleBuffer(),
165+
let blockBuffer = CMSampleBufferGetDataBuffer(nextSampleBuffer) else {
166+
return false
167+
}
158168

159-
var readBufferLength = 0
160-
var readBufferPointer: UnsafeMutablePointer<Int8>? = nil
161-
CMBlockBufferGetDataPointer(blockBuffer, atOffset: 0, lengthAtOffsetOut: &readBufferLength, totalLengthOut: nil, dataPointerOut: &readBufferPointer)
162-
sampleBuffer.append(UnsafeBufferPointer(start: readBufferPointer, count: readBufferLength))
163-
if fftBands != nil {
164-
// don't append data to this buffer unless we're going to use it.
165-
sampleBufferFFT.append(UnsafeBufferPointer(start: readBufferPointer, count: readBufferLength))
166-
}
167-
CMSampleBufferInvalidate(nextSampleBuffer)
169+
var readBufferLength = 0
170+
var readBufferPointer: UnsafeMutablePointer<Int8>? = nil
171+
CMBlockBufferGetDataPointer(blockBuffer, atOffset: 0, lengthAtOffsetOut: &readBufferLength, totalLengthOut: nil, dataPointerOut: &readBufferPointer)
172+
sampleBuffer.append(UnsafeBufferPointer(start: readBufferPointer, count: readBufferLength))
173+
if fftBands != nil {
174+
// don't append data to this buffer unless we're going to use it.
175+
sampleBufferFFT.append(UnsafeBufferPointer(start: readBufferPointer, count: readBufferLength))
176+
}
177+
CMSampleBufferInvalidate(nextSampleBuffer)
168178

169-
let result = process(sampleBuffer, from: assetReader, downsampleTo: samplesPerPixel, channelSelection: channelSelection)
170-
leftSamples += result.left
171-
rightSamples += result.right
179+
let result = process(sampleBuffer, from: assetReader, downsampleTo: samplesPerPixel, channelSelection: channelSelection)
180+
leftSamples += result.left
181+
rightSamples += result.right
172182

173-
if result.bytesConsumed > 0 {
174-
sampleBuffer.removeFirst(result.bytesConsumed)
183+
if result.bytesConsumed > 0 {
184+
sampleBuffer.removeFirst(result.bytesConsumed)
175185

176-
// this takes care of a memory leak where Memory continues to increase even though it should clear after calling .removeFirst(…) above.
177-
sampleBuffer = Data(sampleBuffer)
178-
}
186+
// this takes care of a memory leak where Memory continues to increase even though it should clear after calling .removeFirst(…) above.
187+
sampleBuffer = Data(sampleBuffer)
188+
}
179189

180-
if let fftBands = fftBands, sampleBufferFFT.count / MemoryLayout<Int16>.size >= samplesPerFFT {
181-
let processedFFTs = process(sampleBufferFFT, samplesPerFFT: samplesPerFFT, fftBands: fftBands)
182-
sampleBufferFFT.removeFirst(processedFFTs.count * samplesPerFFT * MemoryLayout<Int16>.size)
183-
outputFFT? += processedFFTs
190+
if let fftBands = fftBands, sampleBufferFFT.count / MemoryLayout<Int16>.size >= samplesPerFFT {
191+
let processedFFTs = process(sampleBufferFFT, samplesPerFFT: samplesPerFFT, fftBands: fftBands)
192+
sampleBufferFFT.removeFirst(processedFFTs.count * samplesPerFFT * MemoryLayout<Int16>.size)
193+
outputFFT? += processedFFTs
194+
}
195+
return true
184196
}
197+
if !continueReading { break }
185198
}
186199

187-
// if we don't have enough pixels yet,
188-
// process leftover samples with padding (to reach multiple of samplesPerPixel for vDSP_desamp)
189-
if leftSamples.count < targetSampleCount {
190-
// each output sample for a single rendered "channel" consumes `samplesPerPixel * inputUnitsPerOutputSample`
191-
// Int16s from the interleaved buffer.
192-
let channelCount = channelInfo(from: assetReader)?.channelCount ?? 1
193-
let inputUnitsPerOutputSample = (channelSelection == .merged) ? 1 : channelCount
194-
let missingSampleCount = (targetSampleCount - leftSamples.count) * samplesPerPixel * inputUnitsPerOutputSample
195-
let backfillPaddingSampleCount = max(0, missingSampleCount - (sampleBuffer.count / MemoryLayout<Int16>.size))
196-
let backfillPaddingByteCount = backfillPaddingSampleCount * MemoryLayout<Int16>.size
197-
let backfillPaddingSamples = [UInt8](repeating: 0, count: backfillPaddingByteCount)
198-
sampleBuffer.append(backfillPaddingSamples, count: backfillPaddingByteCount)
199-
let result = process(sampleBuffer, from: assetReader, downsampleTo: samplesPerPixel, channelSelection: channelSelection)
200-
leftSamples += result.left
201-
rightSamples += result.right
200+
// Pad the *output* with silence-equivalent dB values when the read produced fewer samples
201+
// than the target — e.g. a short tail or a reader that ended early (failed/cancelled after
202+
// backgrounding). These become 1.0 (silence) after `normalize`. Allocation is
203+
// O(targetSampleCount), independent of audio duration — the previous implementation padded
204+
// the *input* buffer with up to `target × samplesPerPixel × 2` bytes of zeros, which
205+
// crashed on multi-hour files (issue #93). We only pad on a clean read; a non-`.completed`
206+
// status means `waveformSamples` will throw and the result is discarded anyway, so skip the
207+
// wasted work.
208+
if assetReader.status == .completed {
209+
if leftSamples.count < targetSampleCount {
210+
let missing = targetSampleCount - leftSamples.count
211+
leftSamples.append(contentsOf: repeatElement(noiseFloorDecibelCutoff, count: missing))
212+
}
213+
if isStereo, rightSamples.count < targetSampleCount {
214+
let missing = targetSampleCount - rightSamples.count
215+
rightSamples.append(contentsOf: repeatElement(noiseFloorDecibelCutoff, count: missing))
216+
}
202217
}
203218

204219
let amplitudes: [Float]
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import AVFoundation
2+
import Darwin
3+
import Foundation
4+
5+
/// Writes a silent mono 16-bit PCM WAV file to a temporary location and returns the URL.
6+
/// Bytes-on-disk = `durationSeconds * sampleRate * 2`. A 10-minute file is ~53 MB.
7+
func makeSilentAudioFile(durationSeconds: Double, sampleRate: Double = 44_100) throws -> URL {
8+
let url = FileManager.default.temporaryDirectory
9+
.appendingPathComponent("dswaveform-test-\(UUID().uuidString).wav")
10+
11+
let settings: [String: Any] = [
12+
AVFormatIDKey: kAudioFormatLinearPCM,
13+
AVSampleRateKey: sampleRate,
14+
AVNumberOfChannelsKey: 1,
15+
AVLinearPCMBitDepthKey: 16,
16+
AVLinearPCMIsBigEndianKey: false,
17+
AVLinearPCMIsFloatKey: false,
18+
AVLinearPCMIsNonInterleaved: false,
19+
]
20+
let file = try AVAudioFile(forWriting: url, settings: settings)
21+
22+
// Write in chunks so we don't allocate the whole audio file as a single PCM buffer.
23+
let format = file.processingFormat
24+
let chunkFrames: AVAudioFrameCount = 44_100 // 1 s of audio at 44.1 kHz
25+
let totalFrames = AVAudioFrameCount(durationSeconds * sampleRate)
26+
var written: AVAudioFrameCount = 0
27+
while written < totalFrames {
28+
let frames = min(chunkFrames, totalFrames - written)
29+
guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frames) else {
30+
throw NSError(domain: "TestSupport", code: 1, userInfo: [NSLocalizedDescriptionKey: "buffer alloc failed"])
31+
}
32+
buffer.frameLength = frames
33+
// The buffer is zero-initialized — that's our silence.
34+
try file.write(from: buffer)
35+
written += frames
36+
}
37+
return url
38+
}
39+
40+
/// Current process physical memory footprint in bytes, via `task_vm_info`.
41+
func currentPhysFootprint() -> Int64 {
42+
var info = task_vm_info_data_t()
43+
var count = mach_msg_type_number_t(MemoryLayout<task_vm_info_data_t>.size / MemoryLayout<integer_t>.size)
44+
let kr = withUnsafeMutablePointer(to: &info) {
45+
$0.withMemoryRebound(to: integer_t.self, capacity: Int(count)) {
46+
task_info(mach_task_self_, task_flavor_t(TASK_VM_INFO), $0, &count)
47+
}
48+
}
49+
return kr == KERN_SUCCESS ? Int64(info.phys_footprint) : 0
50+
}
51+
52+
/// Runs `operation` while polling the process's physical memory footprint, returning the operation's
53+
/// result along with the peak delta over the baseline measured immediately before the call.
54+
func measurePeakMemoryDelta<T>(
55+
pollIntervalNanos: UInt64 = 5_000_000,
56+
during operation: () async throws -> T
57+
) async throws -> (result: T?, error: Error?, peakDeltaBytes: Int64) {
58+
let baseline = currentPhysFootprint()
59+
let peakBox = PeakBox()
60+
let monitor = Task { [peakBox] in
61+
while !Task.isCancelled {
62+
let delta = currentPhysFootprint() - baseline
63+
await peakBox.update(delta)
64+
try? await Task.sleep(nanoseconds: pollIntervalNanos)
65+
}
66+
}
67+
68+
let result: T?
69+
let caught: Error?
70+
do {
71+
result = try await operation()
72+
caught = nil
73+
} catch {
74+
result = nil
75+
caught = error
76+
}
77+
78+
monitor.cancel()
79+
_ = await monitor.value
80+
// Pick up any final spike that may have landed between the last poll and cancel.
81+
await peakBox.update(currentPhysFootprint() - baseline)
82+
let peak = await peakBox.peak
83+
return (result, caught, peak)
84+
}
85+
86+
private actor PeakBox {
87+
private(set) var peak: Int64 = 0
88+
func update(_ value: Int64) { peak = max(peak, value) }
89+
}
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import AVFoundation
2+
import XCTest
3+
@testable import DSWaveformImage
4+
5+
final class WaveformAnalyzerTests: XCTestCase {
6+
/// Defensive: `extract` previously called `startReading()` unconditionally, which throws an
7+
/// uncatchable ObjC exception when the reader isn't in `.unknown`. Normal callers always pass
8+
/// a fresh reader, but the guard prevents a future caller from triggering a hard crash.
9+
func testExtractDoesNotCrashOnCancelledReader() async throws {
10+
let url = try makeSilentAudioFile(durationSeconds: 1)
11+
defer { try? FileManager.default.removeItem(at: url) }
12+
13+
let asset = AVURLAsset(url: url)
14+
let tracks = try await asset.loadTracks(withMediaType: .audio)
15+
let track = try XCTUnwrap(tracks.first)
16+
let reader = try AVAssetReader(asset: asset)
17+
let trackOutput = AVAssetReaderTrackOutput(track: track, outputSettings: [
18+
AVFormatIDKey: kAudioFormatLinearPCM,
19+
AVLinearPCMBitDepthKey: 16,
20+
AVLinearPCMIsBigEndianKey: false,
21+
AVLinearPCMIsFloatKey: false,
22+
AVLinearPCMIsNonInterleaved: false,
23+
])
24+
reader.add(trackOutput)
25+
reader.cancelReading()
26+
27+
let analysis = WaveformAnalyzer().extract(44_100, downsampledTo: 100, from: reader, channelSelection: .merged, fftBands: nil)
28+
XCTAssertEqual(analysis.amplitudes, [], "extract on a non-fresh reader should bail with empty amplitudes")
29+
}
30+
31+
/// Sanity check: a short file analyzes cleanly via the public surface.
32+
func testShortFileAnalyzesCleanly() async throws {
33+
let url = try makeSilentAudioFile(durationSeconds: 2)
34+
defer { try? FileManager.default.removeItem(at: url) }
35+
36+
let samples = try await WaveformAnalyzer().samples(fromAudioAt: url, count: 100)
37+
XCTAssertEqual(samples.count, 100)
38+
}
39+
40+
/// Reproduces issue #93: when the read loop produces far fewer output samples than the target
41+
/// (e.g. the reader fails or is cancelled mid-way after backgrounding), the analyzer's
42+
/// end-of-loop backfill path padded the *input* buffer with
43+
/// `(targetSampleCount - leftSamples.count) * samplesPerPixel * 2` bytes of zeros — easily
44+
/// gigabytes for long files when `samplesPerPixel` is large.
45+
///
46+
/// We simulate the condition without needing a long file or a cancelled reader: pass a
47+
/// deliberately inflated `totalSamples` to `extract` while reading from a short file. That
48+
/// makes `samplesPerPixel` huge (`totalSamples / targetSampleCount`), and the actual read
49+
/// only produces a handful of output samples — exactly the bug's preconditions.
50+
///
51+
/// Post-fix expectation: backfill pads the *output* with silence-equivalent floats and
52+
/// doesn't allocate anywhere near the input-byte-equivalent footprint.
53+
func testBackfillDoesNotAllocateInputBufferWhenReadProducesFewSamples() async throws {
54+
let url = try makeSilentAudioFile(durationSeconds: 2) // 88_200 real samples
55+
defer { try? FileManager.default.removeItem(at: url) }
56+
57+
let asset = AVURLAsset(url: url)
58+
let tracks = try await asset.loadTracks(withMediaType: .audio)
59+
let track = try XCTUnwrap(tracks.first)
60+
let reader = try AVAssetReader(asset: asset)
61+
let trackOutput = AVAssetReaderTrackOutput(track: track, outputSettings: [
62+
AVFormatIDKey: kAudioFormatLinearPCM,
63+
AVLinearPCMBitDepthKey: 16,
64+
AVLinearPCMIsBigEndianKey: false,
65+
AVLinearPCMIsFloatKey: false,
66+
AVLinearPCMIsNonInterleaved: false,
67+
])
68+
reader.add(trackOutput)
69+
70+
let targetSampleCount = 400
71+
// Lie about totalSamples to inflate samplesPerPixel. With samplesPerPixel = 25_000 and
72+
// only 88_200 real samples to consume, the read produces ~3 leftSamples — well under the
73+
// 400 target. The buggy backfill then allocates ~(397 × 25_000 × 2) = 19.85 MB of zeros
74+
// and a 39.7 MB Float copy. The fixed backfill allocates target-count Floats (~1.6 KB).
75+
let fakeTotalSamples = 10_000_000
76+
let buggyAllocBytes: Int64 = 19_850_000 + 39_700_000
77+
78+
let (analysis, _, peakDelta) = try await measurePeakMemoryDelta {
79+
WaveformAnalyzer().extract(
80+
fakeTotalSamples,
81+
downsampledTo: targetSampleCount,
82+
from: reader,
83+
channelSelection: .merged,
84+
fftBands: nil
85+
)
86+
}
87+
88+
XCTAssertNotNil(analysis)
89+
XCTAssertEqual(analysis?.amplitudes.count, targetSampleCount, "backfill should bring count to target")
90+
91+
// Threshold at 25 % of the buggy footprint — well above any noise but well below the bug.
92+
let threshold = buggyAllocBytes / 4
93+
XCTAssertLessThan(
94+
peakDelta, threshold,
95+
"Peak alloc delta \(peakDelta / 1_000_000) MB exceeds threshold \(threshold / 1_000_000) MB — backfill is still padding the input buffer"
96+
)
97+
}
98+
}

0 commit comments

Comments
 (0)