Skip to content

Commit 3ea2a65

Browse files
committed
wip: audio
1 parent 429f253 commit 3ea2a65

2 files changed

Lines changed: 156 additions & 20 deletions

File tree

manim/scene/scene_file_writer.py

Lines changed: 107 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,50 @@ def add_sound(
400400
new_segment = new_segment.apply_gain(gain)
401401
self.add_audio_segment(new_segment, time, **kwargs)
402402

403+
def _build_audio_segment_for_partial_movie(
404+
self, start_time: float, end_time: float
405+
) -> AudioSegment:
406+
start_ms = int(round(start_time * 1000))
407+
end_ms = int(round(end_time * 1000))
408+
duration_ms = max(end_ms - start_ms, 0)
409+
if duration_ms == 0:
410+
return AudioSegment.silent()
411+
412+
segment = self.audio_segment[start_ms:end_ms]
413+
if len(segment) < duration_ms:
414+
segment += AudioSegment.silent(
415+
duration=duration_ms - len(segment),
416+
frame_rate=segment.frame_rate,
417+
)
418+
419+
return segment
420+
421+
def _write_audio_for_partial_movie(self) -> None:
422+
if self.audio_stream is None:
423+
return
424+
425+
segment = self._build_audio_segment_for_partial_movie(
426+
self.partial_movie_start_time,
427+
self.renderer.time,
428+
)
429+
if len(segment) == 0:
430+
return
431+
432+
samples = np.frombuffer(segment.raw_data, dtype=np.int16)
433+
if segment.channels == 2:
434+
samples = samples.reshape((-1, 2)).T
435+
layout = "stereo"
436+
else:
437+
samples = samples.reshape((1, -1))
438+
layout = "mono"
439+
440+
samples = np.ascontiguousarray(samples)
441+
frame = av.AudioFrame.from_ndarray(samples, format="s16p", layout=layout)
442+
frame.sample_rate = segment.frame_rate
443+
444+
for packet in self.audio_stream.encode(frame):
445+
self.video_container.mux(packet)
446+
403447
# Writers
404448
def begin_animation(
405449
self, allow_write: bool = False, file_path: StrPath | None = None
@@ -552,10 +596,12 @@ def open_partial_movie_stream(self, file_path: StrPath | None = None) -> None:
552596
partial_movie_file_codec = "libx264"
553597
partial_movie_file_pix_fmt = "yuv420p"
554598
av_options = {
555-
"an": "1", # ffmpeg: -an, no audio
556599
"crf": "23", # ffmpeg: -crf, constant rate factor (improved bitrate)
557600
}
558601

602+
if not self.includes_sound:
603+
av_options["an"] = "1"
604+
559605
if config.movie_file_extension == ".webm":
560606
partial_movie_file_codec = "libvpx-vp9"
561607
av_options["-auto-alt-ref"] = "1"
@@ -579,6 +625,14 @@ def open_partial_movie_stream(self, file_path: StrPath | None = None) -> None:
579625
self.video_container: OutputContainer = video_container
580626
self.video_stream: Stream = stream
581627

628+
self.partial_movie_start_time = self.renderer.time
629+
self.audio_stream: Stream | None = None
630+
if not is_gif_format():
631+
audio_codec = (
632+
"libvorbis" if config.movie_file_extension == ".webm" else "aac"
633+
)
634+
self.audio_stream = self.video_container.add_stream(audio_codec)
635+
582636
self.queue: Queue[tuple[int, PixelArray | None]] = Queue()
583637
self.writer_thread = Thread(target=self.listen_and_write, args=())
584638
self.writer_thread.start()
@@ -593,9 +647,16 @@ def close_partial_movie_stream(self) -> None:
593647
self.queue.put((-1, None))
594648
self.writer_thread.join()
595649

650+
if self.audio_stream is not None:
651+
self._write_audio_for_partial_movie()
652+
596653
for packet in self.video_stream.encode():
597654
self.video_container.mux(packet)
598655

656+
if self.audio_stream is not None:
657+
for packet in self.audio_stream.encode():
658+
self.video_container.mux(packet)
659+
599660
self.video_container.close()
600661

601662
logger.info(
@@ -622,7 +683,9 @@ def is_already_cached(self, hash_invocation: str) -> bool:
622683
self.partial_movie_directory
623684
/ f"{hash_invocation}{config['movie_file_extension']}"
624685
)
625-
return path.exists()
686+
return (
687+
path.exists()
688+
) # TODO: hash will not changed if the audio changes, is it a problem?
626689

627690
def combine_files(
628691
self,
@@ -652,7 +715,12 @@ def combine_files(
652715
partial_movies_input = av.open(
653716
str(file_list), options=av_options, format="concat"
654717
)
655-
partial_movies_stream = partial_movies_input.streams.video[0]
718+
partial_movies_video_stream = partial_movies_input.streams.video[0]
719+
partial_movies_audio_stream = (
720+
partial_movies_input.streams.audio[0]
721+
if includes_sound and not create_gif and partial_movies_input.streams.audio
722+
else None
723+
)
656724
output_container = av.open(str(output_file), mode="w")
657725
output_container.metadata["comment"] = (
658726
f"Rendered with Manim Community v{__version__}"
@@ -663,17 +731,17 @@ def combine_files(
663731
and the following code
664732
https://github.com/imageio/imageio/blob/65d79140018bb7c64c0692ea72cb4093e8d632a0/imageio/plugins/pyav.py#L927-L996.
665733
"""
666-
output_stream = output_container.add_stream(
734+
output_video_stream = output_container.add_stream(
667735
codec_name="gif",
668736
)
669-
output_stream.pix_fmt = "rgb8"
737+
output_video_stream.pix_fmt = "rgb8"
670738
if config.transparent:
671-
output_stream.pix_fmt = "pal8"
672-
output_stream.width = config.pixel_width
673-
output_stream.height = config.pixel_height
674-
output_stream.rate = to_av_frame_rate(config.frame_rate)
739+
output_video_stream.pix_fmt = "pal8"
740+
output_video_stream.width = config.pixel_width
741+
output_video_stream.height = config.pixel_height
742+
output_video_stream.rate = to_av_frame_rate(config.frame_rate)
675743
graph = av.filter.Graph()
676-
input_buffer = graph.add_buffer(template=partial_movies_stream)
744+
input_buffer = graph.add_buffer(template=partial_movies_video_stream)
677745
split = graph.add("split")
678746
palettegen = graph.add("palettegen", "stats_mode=diff")
679747
paletteuse = graph.add(
@@ -698,33 +766,51 @@ def combine_files(
698766
while True:
699767
try:
700768
frame = graph.pull()
701-
if output_stream.codec_context.time_base is not None:
702-
frame.time_base = output_stream.codec_context.time_base
769+
if output_video_stream.codec_context.time_base is not None:
770+
frame.time_base = output_video_stream.codec_context.time_base
703771
frame.pts = frames_written
704772
frames_written += 1
705-
output_container.mux(output_stream.encode(frame))
773+
output_container.mux(output_video_stream.encode(frame))
706774
except av.error.EOFError:
707775
break
708776

709-
for packet in output_stream.encode():
777+
for packet in output_video_stream.encode():
710778
output_container.mux(packet)
711779

712780
else:
713-
output_stream = output_container.add_stream_from_template(
714-
template=partial_movies_stream,
781+
output_video_stream = output_container.add_stream_from_template(
782+
template=partial_movies_video_stream,
715783
)
784+
output_audio_stream: Stream | None = None
785+
if includes_sound and partial_movies_audio_stream is not None:
786+
output_audio_stream = output_container.add_stream_from_template(
787+
template=partial_movies_audio_stream,
788+
)
716789
if config.transparent and config.movie_file_extension == ".webm":
717-
output_stream.pix_fmt = "yuva420p"
718-
for packet in partial_movies_input.demux(partial_movies_stream):
790+
output_video_stream.pix_fmt = "yuva420p"
791+
if partial_movies_audio_stream is None:
792+
packets = partial_movies_input.demux(partial_movies_video_stream)
793+
else:
794+
packets = partial_movies_input.demux(
795+
partial_movies_video_stream,
796+
partial_movies_audio_stream,
797+
)
798+
for packet in packets:
719799
# We need to skip the "flushing" packets that `demux` generates.
720800
if packet.dts is None:
721801
continue
722802

803+
packet_type = packet.stream.type
723804
packet.dts = None # This seems to be needed, as dts from consecutive
724805
# files may not be monotically increasing, so we let libav compute it.
725806

726807
# We need to assign the packet to the new stream.
727-
packet.stream = output_stream
808+
if packet_type == "video":
809+
packet.stream = output_video_stream
810+
elif packet_type == "audio" and output_audio_stream is not None:
811+
packet.stream = output_audio_stream
812+
else:
813+
continue
728814
output_container.mux(packet)
729815

730816
partial_movies_input.close()
@@ -755,7 +841,7 @@ def combine_to_movie(self) -> None:
755841
partial_movie_files,
756842
movie_file_path,
757843
is_gif_format(),
758-
self.includes_sound,
844+
includes_sound=False, # We will handle sound separately, as merging multiple audio tracks can cause issues (when the audio tracks is not cut properly between the different partial movie files)
759845
)
760846

761847
# handle sound
@@ -849,6 +935,7 @@ def combine_to_section_videos(self) -> None:
849935
self.combine_files(
850936
section.get_clean_partial_movie_files(),
851937
self.sections_output_dir / section.video,
938+
includes_sound=self.includes_sound, # TODO: maybe we should handle sound separately for sections as well, as merging multiple audio tracks can cause issues (when the audio tracks is not cut properly between the different partial movie files
852939
)
853940
sections_index.append(section.get_dict(self.sections_output_dir))
854941
with (self.sections_output_dir / f"{self.output_name}.json").open("w") as file:

tests/test_scene_rendering/test_file_writer.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,55 @@ def construct(self):
158158
assert "click.mp3 to .wav" in manim_caplog.text
159159

160160

161+
@pytest.mark.slow
162+
def test_partial_movie_files_include_audio(config, tmp_path):
163+
output_filename = "partial_audio"
164+
165+
class AudioPartialScene(Scene):
166+
def construct(self):
167+
click_path = (
168+
Path(__file__).parent.parent.parent
169+
/ "docs"
170+
/ "source"
171+
/ "_static"
172+
/ "click.wav"
173+
)
174+
self.add_sound(click_path)
175+
self.play(Create(Circle()))
176+
self.wait(0.5)
177+
178+
with tempconfig(
179+
{
180+
"media_dir": tmp_path,
181+
"quality": "low_quality",
182+
"format": "mp4",
183+
"output_file": output_filename,
184+
}
185+
):
186+
scene = AudioPartialScene()
187+
scene.render()
188+
189+
partial_files = [
190+
Path(path)
191+
for path in scene.renderer.file_writer.partial_movie_files
192+
if path is not None
193+
]
194+
assert partial_files
195+
196+
has_nonzero_audio = False
197+
for partial_file in partial_files:
198+
with av.open(partial_file) as container:
199+
assert container.streams.audio, "Partial movie missing audio stream"
200+
for frame in container.decode(audio=0):
201+
if np.any(frame.to_ndarray()):
202+
has_nonzero_audio = True
203+
break
204+
if has_nonzero_audio:
205+
break
206+
207+
assert has_nonzero_audio, "All partial audio samples are zero"
208+
209+
161210
@pytest.mark.slow
162211
def test_unicode_partial_movie(config, tmpdir, simple_scenes_path):
163212
# Characters that failed for a user on Windows

0 commit comments

Comments
 (0)