@@ -400,6 +400,50 @@ def add_sound(
400400 new_segment = new_segment .apply_gain (gain )
401401 self .add_audio_segment (new_segment , time , ** kwargs )
402402
403+ def _build_audio_segment_for_partial_movie (
404+ self , start_time : float , end_time : float
405+ ) -> AudioSegment :
406+ start_ms = int (round (start_time * 1000 ))
407+ end_ms = int (round (end_time * 1000 ))
408+ duration_ms = max (end_ms - start_ms , 0 )
409+ if duration_ms == 0 :
410+ return AudioSegment .silent ()
411+
412+ segment = self .audio_segment [start_ms :end_ms ]
413+ if len (segment ) < duration_ms :
414+ segment += AudioSegment .silent (
415+ duration = duration_ms - len (segment ),
416+ frame_rate = segment .frame_rate ,
417+ )
418+
419+ return segment
420+
421+ def _write_audio_for_partial_movie (self ) -> None :
422+ if self .audio_stream is None :
423+ return
424+
425+ segment = self ._build_audio_segment_for_partial_movie (
426+ self .partial_movie_start_time ,
427+ self .renderer .time ,
428+ )
429+ if len (segment ) == 0 :
430+ return
431+
432+ samples = np .frombuffer (segment .raw_data , dtype = np .int16 )
433+ if segment .channels == 2 :
434+ samples = samples .reshape ((- 1 , 2 )).T
435+ layout = "stereo"
436+ else :
437+ samples = samples .reshape ((1 , - 1 ))
438+ layout = "mono"
439+
440+ samples = np .ascontiguousarray (samples )
441+ frame = av .AudioFrame .from_ndarray (samples , format = "s16p" , layout = layout )
442+ frame .sample_rate = segment .frame_rate
443+
444+ for packet in self .audio_stream .encode (frame ):
445+ self .video_container .mux (packet )
446+
403447 # Writers
404448 def begin_animation (
405449 self , allow_write : bool = False , file_path : StrPath | None = None
@@ -552,10 +596,12 @@ def open_partial_movie_stream(self, file_path: StrPath | None = None) -> None:
552596 partial_movie_file_codec = "libx264"
553597 partial_movie_file_pix_fmt = "yuv420p"
554598 av_options = {
555- "an" : "1" , # ffmpeg: -an, no audio
556599 "crf" : "23" , # ffmpeg: -crf, constant rate factor (improved bitrate)
557600 }
558601
602+ if not self .includes_sound :
603+ av_options ["an" ] = "1"
604+
559605 if config .movie_file_extension == ".webm" :
560606 partial_movie_file_codec = "libvpx-vp9"
561607 av_options ["-auto-alt-ref" ] = "1"
@@ -579,6 +625,14 @@ def open_partial_movie_stream(self, file_path: StrPath | None = None) -> None:
579625 self .video_container : OutputContainer = video_container
580626 self .video_stream : Stream = stream
581627
628+ self .partial_movie_start_time = self .renderer .time
629+ self .audio_stream : Stream | None = None
630+ if not is_gif_format ():
631+ audio_codec = (
632+ "libvorbis" if config .movie_file_extension == ".webm" else "aac"
633+ )
634+ self .audio_stream = self .video_container .add_stream (audio_codec )
635+
582636 self .queue : Queue [tuple [int , PixelArray | None ]] = Queue ()
583637 self .writer_thread = Thread (target = self .listen_and_write , args = ())
584638 self .writer_thread .start ()
@@ -593,9 +647,16 @@ def close_partial_movie_stream(self) -> None:
593647 self .queue .put ((- 1 , None ))
594648 self .writer_thread .join ()
595649
650+ if self .audio_stream is not None :
651+ self ._write_audio_for_partial_movie ()
652+
596653 for packet in self .video_stream .encode ():
597654 self .video_container .mux (packet )
598655
656+ if self .audio_stream is not None :
657+ for packet in self .audio_stream .encode ():
658+ self .video_container .mux (packet )
659+
599660 self .video_container .close ()
600661
601662 logger .info (
@@ -622,7 +683,9 @@ def is_already_cached(self, hash_invocation: str) -> bool:
622683 self .partial_movie_directory
623684 / f"{ hash_invocation } { config ['movie_file_extension' ]} "
624685 )
625- return path .exists ()
686+ return (
687+ path .exists ()
688+ ) # TODO: hash will not changed if the audio changes, is it a problem?
626689
627690 def combine_files (
628691 self ,
@@ -652,7 +715,12 @@ def combine_files(
652715 partial_movies_input = av .open (
653716 str (file_list ), options = av_options , format = "concat"
654717 )
655- partial_movies_stream = partial_movies_input .streams .video [0 ]
718+ partial_movies_video_stream = partial_movies_input .streams .video [0 ]
719+ partial_movies_audio_stream = (
720+ partial_movies_input .streams .audio [0 ]
721+ if includes_sound and not create_gif and partial_movies_input .streams .audio
722+ else None
723+ )
656724 output_container = av .open (str (output_file ), mode = "w" )
657725 output_container .metadata ["comment" ] = (
658726 f"Rendered with Manim Community v{ __version__ } "
@@ -663,17 +731,17 @@ def combine_files(
663731 and the following code
664732 https://github.com/imageio/imageio/blob/65d79140018bb7c64c0692ea72cb4093e8d632a0/imageio/plugins/pyav.py#L927-L996.
665733 """
666- output_stream = output_container .add_stream (
734+ output_video_stream = output_container .add_stream (
667735 codec_name = "gif" ,
668736 )
669- output_stream .pix_fmt = "rgb8"
737+ output_video_stream .pix_fmt = "rgb8"
670738 if config .transparent :
671- output_stream .pix_fmt = "pal8"
672- output_stream .width = config .pixel_width
673- output_stream .height = config .pixel_height
674- output_stream .rate = to_av_frame_rate (config .frame_rate )
739+ output_video_stream .pix_fmt = "pal8"
740+ output_video_stream .width = config .pixel_width
741+ output_video_stream .height = config .pixel_height
742+ output_video_stream .rate = to_av_frame_rate (config .frame_rate )
675743 graph = av .filter .Graph ()
676- input_buffer = graph .add_buffer (template = partial_movies_stream )
744+ input_buffer = graph .add_buffer (template = partial_movies_video_stream )
677745 split = graph .add ("split" )
678746 palettegen = graph .add ("palettegen" , "stats_mode=diff" )
679747 paletteuse = graph .add (
@@ -698,33 +766,51 @@ def combine_files(
698766 while True :
699767 try :
700768 frame = graph .pull ()
701- if output_stream .codec_context .time_base is not None :
702- frame .time_base = output_stream .codec_context .time_base
769+ if output_video_stream .codec_context .time_base is not None :
770+ frame .time_base = output_video_stream .codec_context .time_base
703771 frame .pts = frames_written
704772 frames_written += 1
705- output_container .mux (output_stream .encode (frame ))
773+ output_container .mux (output_video_stream .encode (frame ))
706774 except av .error .EOFError :
707775 break
708776
709- for packet in output_stream .encode ():
777+ for packet in output_video_stream .encode ():
710778 output_container .mux (packet )
711779
712780 else :
713- output_stream = output_container .add_stream_from_template (
714- template = partial_movies_stream ,
781+ output_video_stream = output_container .add_stream_from_template (
782+ template = partial_movies_video_stream ,
715783 )
784+ output_audio_stream : Stream | None = None
785+ if includes_sound and partial_movies_audio_stream is not None :
786+ output_audio_stream = output_container .add_stream_from_template (
787+ template = partial_movies_audio_stream ,
788+ )
716789 if config .transparent and config .movie_file_extension == ".webm" :
717- output_stream .pix_fmt = "yuva420p"
718- for packet in partial_movies_input .demux (partial_movies_stream ):
790+ output_video_stream .pix_fmt = "yuva420p"
791+ if partial_movies_audio_stream is None :
792+ packets = partial_movies_input .demux (partial_movies_video_stream )
793+ else :
794+ packets = partial_movies_input .demux (
795+ partial_movies_video_stream ,
796+ partial_movies_audio_stream ,
797+ )
798+ for packet in packets :
719799 # We need to skip the "flushing" packets that `demux` generates.
720800 if packet .dts is None :
721801 continue
722802
803+ packet_type = packet .stream .type
723804 packet .dts = None # This seems to be needed, as dts from consecutive
724805 # files may not be monotically increasing, so we let libav compute it.
725806
726807 # We need to assign the packet to the new stream.
727- packet .stream = output_stream
808+ if packet_type == "video" :
809+ packet .stream = output_video_stream
810+ elif packet_type == "audio" and output_audio_stream is not None :
811+ packet .stream = output_audio_stream
812+ else :
813+ continue
728814 output_container .mux (packet )
729815
730816 partial_movies_input .close ()
@@ -755,7 +841,7 @@ def combine_to_movie(self) -> None:
755841 partial_movie_files ,
756842 movie_file_path ,
757843 is_gif_format (),
758- self . includes_sound ,
844+ includes_sound = False , # We will handle sound separately, as merging multiple audio tracks can cause issues (when the audio tracks is not cut properly between the different partial movie files)
759845 )
760846
761847 # handle sound
@@ -849,6 +935,7 @@ def combine_to_section_videos(self) -> None:
849935 self .combine_files (
850936 section .get_clean_partial_movie_files (),
851937 self .sections_output_dir / section .video ,
938+ includes_sound = self .includes_sound , # TODO: maybe we should handle sound separately for sections as well, as merging multiple audio tracks can cause issues (when the audio tracks is not cut properly between the different partial movie files
852939 )
853940 sections_index .append (section .get_dict (self .sections_output_dir ))
854941 with (self .sections_output_dir / f"{ self .output_name } .json" ).open ("w" ) as file :
0 commit comments