Skip to content

Commit 3b9bf3c

Browse files
Add audio rendering support
Add a complete audio pipeline parallel to the existing video pipeline: - Decode audio from source media files using FFmpeg (libavcodec + libswresample) - Walk OTIO timeline audio tracks: clips, gaps, transitions (crossfade), time warps, track mixing - Extract audio from video clips that contain audio streams - Mux audio alongside video in output containers (MP4, MOV) - Write audio to separate files via -afile option New CLI options: -acodec, -arate, -achannels, -afile, -no_audio New files: - AudioBuffer.h: interleaved float32 audio buffer type - FFmpegAudioRead.h/cpp: audio decoder using FFmpeg + swresample - AudioGraph.h/cpp: OTIO timeline walker for audio (mirrors ImageGraph) - FFmpegAudioWrite.h/cpp: standalone audio file writer Modified files: - FFmpeg.h/cpp: AudioCodec enum (pcm_s16le, pcm_s24le, pcm_f32le, flac, aac) - FFmpegWrite.h/cpp: audio+video muxing in combined writer - App.h/cpp: CLI options and audio render loop integration - TimelineAlgo.h/cpp: getAudioClips() helper - CMakeLists.txt: new sources + libswresample linking - BuildFFmpeg.cmake: AAC codec in minimal build Signed-off-by: Daniel Weinmann <daniel@seasoned.cc>
1 parent 8c9d9ff commit 3b9bf3c

File tree

17 files changed

+1915
-14
lines changed

17 files changed

+1915
-14
lines changed

bin/toucan-render/App.cpp

Lines changed: 94 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#include "App.h"
55

6+
#include <toucanRender/FFmpegAudioWrite.h>
67
#include <toucanRender/FFmpegWrite.h>
78
#include <toucanRender/Read.h>
89
#include <toucanRender/Util.h>
@@ -17,6 +18,7 @@ extern "C"
1718

1819
} // extern "C"
1920

21+
#include <cmath>
2022
#include <stdio.h>
2123

2224
namespace toucan
@@ -42,7 +44,7 @@ namespace toucan
4244
{ "444p16", OIIO::ImageSpec(0, 0, 3, OIIO::TypeDesc::BASETYPE::UINT16) }
4345
};
4446
}
45-
47+
4648
void App::_init(
4749
const std::shared_ptr<ftk::Context>& context,
4850
std::vector<std::string>& argv)
@@ -98,6 +100,29 @@ namespace toucan
98100
std::vector<std::string>{ "-v" },
99101
"Print verbose output.");
100102

103+
_cmdLine.audioCodec = ftk::CmdLineValueOption<std::string>::create(
104+
std::vector<std::string>{ "-acodec" },
105+
"Set the audio codec.",
106+
"",
107+
"pcm_s16le",
108+
ftk::join(ffmpeg::getAudioCodecStrings(), ", "));
109+
_cmdLine.audioSampleRate = ftk::CmdLineValueOption<int>::create(
110+
std::vector<std::string>{ "-arate" },
111+
"Set the audio sample rate.",
112+
"",
113+
48000);
114+
_cmdLine.audioChannelCount = ftk::CmdLineValueOption<int>::create(
115+
std::vector<std::string>{ "-achannels" },
116+
"Set the audio channel count.",
117+
"",
118+
2);
119+
_cmdLine.audioFile = ftk::CmdLineValueOption<std::string>::create(
120+
std::vector<std::string>{ "-afile" },
121+
"Write audio to a separate file.");
122+
_cmdLine.noAudio = ftk::CmdLineFlagOption::create(
123+
std::vector<std::string>{ "-no_audio" },
124+
"Disable audio output.");
125+
101126
IApp::_init(
102127
context,
103128
argv,
@@ -112,7 +137,12 @@ namespace toucan
112137
_cmdLine.printSize,
113138
_cmdLine.raw,
114139
_cmdLine.y4m,
115-
_cmdLine.verbose
140+
_cmdLine.verbose,
141+
_cmdLine.audioCodec,
142+
_cmdLine.audioSampleRate,
143+
_cmdLine.audioChannelCount,
144+
_cmdLine.audioFile,
145+
_cmdLine.noAudio
116146
});
117147

118148
if (_cmdLine.output->hasValue() && _cmdLine.output->getValue() == "-")
@@ -123,7 +153,7 @@ namespace toucan
123153

124154
App::App()
125155
{}
126-
156+
127157
App::~App()
128158
{
129159
if (_swsContext)
@@ -148,7 +178,7 @@ namespace toucan
148178
out->_init(context, argv);
149179
return out;
150180
}
151-
181+
152182
void App::run()
153183
{
154184
const std::filesystem::path parentPath = std::filesystem::path(getExeName()).parent_path();
@@ -165,7 +195,7 @@ namespace toucan
165195
const OTIO_NS::TimeRange& timeRange = _timelineWrapper->getTimeRange();
166196
const OTIO_NS::RationalTime timeInc(1.0, timeRange.duration().rate());
167197
const int frames = timeRange.duration().value();
168-
198+
169199
// Create the image graph.
170200
_graph = std::make_shared<ImageGraph>(
171201
_context,
@@ -195,9 +225,35 @@ namespace toucan
195225
return;
196226
}
197227

228+
// Audio settings.
229+
const int audioSampleRate = _cmdLine.audioSampleRate->hasValue() ?
230+
_cmdLine.audioSampleRate->getValue() : 48000;
231+
const int audioChannelCount = _cmdLine.audioChannelCount->hasValue() ?
232+
_cmdLine.audioChannelCount->getValue() : 2;
233+
234+
// Create the audio graph.
235+
if (!_cmdLine.noAudio->found())
236+
{
237+
_audioGraph = std::make_shared<AudioGraph>(
238+
_context,
239+
inputPath.parent_path(),
240+
_timelineWrapper,
241+
audioSampleRate,
242+
audioChannelCount);
243+
}
244+
198245
// Create the image host.
199246
_host = std::make_shared<ImageEffectHost>(_context, getOpenFXPluginPaths(getExeName()));
200247

248+
// Audio codec.
249+
ffmpeg::AudioCodec audioCodec = ffmpeg::AudioCodec::PCM_S16LE;
250+
if (_cmdLine.audioCodec->hasValue())
251+
{
252+
ffmpeg::fromString(_cmdLine.audioCodec->getValue(), audioCodec);
253+
}
254+
255+
const bool includeAudio = _audioGraph && _audioGraph->hasAudio();
256+
201257
// Open the movie file.
202258
std::shared_ptr<ffmpeg::Write> ffWrite;
203259
if (hasExtension(outputPath.extension().string(), MovieReadNode::getExtensions()))
@@ -211,9 +267,26 @@ namespace toucan
211267
outputPath,
212268
OIIO::ImageSpec(imageSize.x, imageSize.y, 3),
213269
timeRange,
214-
videoCodec);
270+
videoCodec,
271+
includeAudio ? audioSampleRate : 0,
272+
includeAudio ? audioChannelCount : 0,
273+
audioCodec);
274+
}
275+
276+
// Open the separate audio file.
277+
std::shared_ptr<ffmpeg::AudioWrite> audioFileWrite;
278+
if (_cmdLine.audioFile->hasValue() && includeAudio)
279+
{
280+
audioFileWrite = std::make_shared<ffmpeg::AudioWrite>(
281+
std::filesystem::path(_cmdLine.audioFile->getValue()),
282+
audioSampleRate,
283+
audioChannelCount,
284+
audioCodec);
215285
}
216286

287+
const int samplesPerFrame = static_cast<int>(
288+
std::round(static_cast<double>(audioSampleRate) / timeRange.duration().rate()));
289+
217290
// Render the timeline frames.
218291
if (_cmdLine.y4m->hasValue())
219292
{
@@ -261,6 +334,21 @@ namespace toucan
261334
_writeY4mFrame(buf);
262335
}
263336
}
337+
338+
// Render and write audio for this frame.
339+
if (includeAudio)
340+
{
341+
const AudioBuffer audioBuf = _audioGraph->exec(time, samplesPerFrame);
342+
343+
if (ffWrite)
344+
{
345+
ffWrite->writeAudio(audioBuf);
346+
}
347+
if (audioFileWrite)
348+
{
349+
audioFileWrite->writeAudio(audioBuf);
350+
}
351+
}
264352
}
265353
}
266354

@@ -484,4 +572,3 @@ namespace toucan
484572
}
485573
}
486574
}
487-

bin/toucan-render/App.h

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#pragma once
55

6+
#include <toucanRender/AudioGraph.h>
67
#include <toucanRender/ImageEffectHost.h>
78
#include <toucanRender/ImageGraph.h>
89
#include <toucanRender/TimelineWrapper.h>
@@ -31,18 +32,18 @@ namespace toucan
3132

3233
public:
3334
~App();
34-
35+
3536
static std::shared_ptr<App> create(
3637
const std::shared_ptr<ftk::Context>&,
3738
std::vector<std::string>&);
3839

3940
void run() override;
40-
41+
4142
private:
4243
void _writeRawFrame(const OIIO::ImageBuf&);
4344
void _writeY4mHeader();
4445
void _writeY4mFrame(const OIIO::ImageBuf&);
45-
46+
4647
struct CmdLine
4748
{
4849
std::shared_ptr<ftk::CmdLineValueArg<std::string> > input;
@@ -57,11 +58,18 @@ namespace toucan
5758
std::shared_ptr<ftk::CmdLineValueOption<std::string> > raw;
5859
std::shared_ptr<ftk::CmdLineValueOption<std::string> > y4m;
5960
std::shared_ptr<ftk::CmdLineFlagOption> verbose;
61+
62+
std::shared_ptr<ftk::CmdLineValueOption<std::string> > audioCodec;
63+
std::shared_ptr<ftk::CmdLineValueOption<int> > audioSampleRate;
64+
std::shared_ptr<ftk::CmdLineValueOption<int> > audioChannelCount;
65+
std::shared_ptr<ftk::CmdLineValueOption<std::string> > audioFile;
66+
std::shared_ptr<ftk::CmdLineFlagOption> noAudio;
6067
};
6168
CmdLine _cmdLine;
6269

6370
std::shared_ptr<TimelineWrapper> _timelineWrapper;
6471
std::shared_ptr<ImageGraph> _graph;
72+
std::shared_ptr<AudioGraph> _audioGraph;
6573
std::shared_ptr<ImageEffectHost> _host;
6674

6775
AVFrame* _avFrame = nullptr;
@@ -71,4 +79,3 @@ namespace toucan
7179
SwsContext* _swsContext = nullptr;
7280
};
7381
}
74-

cmake/SuperBuild/BuildFFmpeg.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ set(FFmpeg_CONFIGURE_ARGS
100100
if(toucan_FFmpeg_MINIMAL)
101101
list(APPEND FFmpeg_CONFIGURE_ARGS
102102
--disable-decoders
103+
--enable-decoder=aac
103104
--enable-decoder=apv
104105
--enable-decoder=av1
105106
--enable-decoder=flac
@@ -151,6 +152,7 @@ if(toucan_FFmpeg_MINIMAL)
151152
--enable-decoder=vp9
152153
--enable-decoder=yuv4
153154
--disable-encoders
155+
--enable-encoder=aac
154156
--enable-encoder=flac
155157
--enable-encoder=mjpeg
156158
--enable-encoder=mpeg2video
@@ -265,6 +267,7 @@ if(toucan_FFmpeg_MINIMAL)
265267
--enable-muxer=wav
266268
--enable-muxer=yuv4mpegpipe
267269
--disable-parsers
270+
--enable-parser=aac
268271
--enable-parser=apv
269272
--enable-parser=av1
270273
--enable-parser=flac

lib/toucanRender/AudioBuffer.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// Copyright Contributors to the toucan project.
3+
4+
#pragma once
5+
6+
#include <cstddef>
7+
#include <vector>
8+
9+
namespace toucan
10+
{
11+
struct AudioBuffer
12+
{
13+
std::vector<float> data;
14+
int sampleRate = 0;
15+
int channelCount = 0;
16+
int sampleCount = 0;
17+
18+
bool isValid() const { return !data.empty() && sampleRate > 0; }
19+
size_t byteCount() const { return data.size() * sizeof(float); }
20+
};
21+
}

0 commit comments

Comments
 (0)