4545
4646from .misc import (doc_loaders_cache , html_to_text , hasher ,
4747 file_hasher , get_splitter , check_docs_tkn_length ,
48- average_word_length , wpm , global_temp_dir )
48+ average_word_length , wpm , loaders_temp_dir_file )
4949from .typechecker import optional_typecheck
5050from .logger import whi , yel , red , log
5151from .flags import is_verbose , is_linux
@@ -189,7 +189,11 @@ def load_one_doc(
189189 The loader is cached"""
190190 text_splitter = get_splitter (task )
191191
192- assert global_temp_dir [0 ] is temp_dir , f"Error handling temp dir: temp_dir is { temp_dir } but global_temp_dir is { global_temp_dir } "
192+ expected_global_dir = loaders_temp_dir_file .read_text ().strip ()
193+ assert expected_global_dir , f"Empty loaders_temp_dir_file at { loaders_temp_dir_file } "
194+ expected_global_dir = Path (expected_global_dir )
195+ assert expected_global_dir .exists (), f"File loaders_temp_dir_file not found in { loaders_temp_dir_file } pointing at '{ expected_global_dir } '"
196+ assert expected_global_dir == temp_dir , f"Error handling temp dir: temp_dir is { temp_dir } but loaders_temp_dir is { expected_global_dir } "
193197
194198 if filetype == "youtube" :
195199 docs = load_youtube_video (** kwargs )
@@ -395,7 +399,7 @@ def load_youtube_video(
395399 )
396400 else :
397401 whi (f"Downloading audio from url: '{ path } '" )
398- file_name = global_temp_dir [ 0 ] / f"youtube_audio_{ uuid .uuid4 ()} " # without extension!
402+ file_name = load_temp_dir / f"youtube_audio_{ uuid .uuid4 ()} " # without extension!
399403 ydl_opts = {
400404 'format' : 'bestaudio/best' ,
401405 'postprocessors' : [{
@@ -410,7 +414,7 @@ def load_youtube_video(
410414 with youtube_dl .YoutubeDL (ydl_opts ) as ydl :
411415 ydl .download ([path ])
412416 candidate = []
413- for f in global_temp_dir [ 0 ] .iterdir ():
417+ for f in load_temp_dir .iterdir ():
414418 if file_name .name in f .name :
415419 candidate .append (f )
416420 assert len (candidate ), f"Audio file of { path } failed to download?"
@@ -530,7 +534,7 @@ def load_anki(
530534 original_db = akp .find_db (user = anki_profile )
531535 name = f"{ anki_profile } " .replace (" " , "_" )
532536 random_val = str (uuid .uuid4 ()).split ("-" )[- 1 ]
533- new_db_path = global_temp_dir [ 0 ] / f"anki_collection_{ name .replace ('/' , '_' )} _{ random_val } "
537+ new_db_path = load_temp_dir / f"anki_collection_{ name .replace ('/' , '_' )} _{ random_val } "
534538 assert not Path (new_db_path ).exists (
535539 ), f"{ new_db_path } already existing!"
536540 shutil .copy (original_db , new_db_path )
@@ -922,8 +926,8 @@ def load_local_audio(
922926 )
923927 red (f"Removed silence from { path .name } : { dur :.1f} -> { new_dur :.1f} in { elapsed :.1f} s" )
924928
925- unsilenced_path_wav = global_temp_dir [ 0 ] / f"unsilenced_audio_{ uuid .uuid4 ()} .wav"
926- unsilenced_path_ogg = global_temp_dir [ 0 ] / f"unsilenced_audio_{ uuid .uuid4 ()} .ogg"
929+ unsilenced_path_wav = load_temp_dir / f"unsilenced_audio_{ uuid .uuid4 ()} .wav"
930+ unsilenced_path_ogg = load_temp_dir / f"unsilenced_audio_{ uuid .uuid4 ()} .ogg"
927931 assert not unsilenced_path_wav .exists ()
928932 assert not unsilenced_path_ogg .exists ()
929933 torchaudio .save (
@@ -1007,7 +1011,7 @@ def load_local_video(
10071011 ) -> List [Document ]:
10081012 assert Path (path ).exists (), f"file not found: '{ path } '"
10091013
1010- audio_path = global_temp_dir [ 0 ] / f"audio_from_video_{ uuid .uuid4 ()} .mp3"
1014+ audio_path = load_temp_dir / f"audio_from_video_{ uuid .uuid4 ()} .mp3"
10111015 assert not audio_path .exists ()
10121016
10131017 # extract audio from video
0 commit comments