4343
4444from unstructured .cleaners .core import clean_extra_whitespace
4545
46- from .misc import (loaddoc_cache , html_to_text , hasher ,
46+ from .misc import (doc_loaders_cache , html_to_text , hasher ,
4747 file_hasher , get_splitter , check_docs_tkn_length ,
4848 average_word_length , wpm )
4949from .typechecker import optional_typecheck
@@ -468,7 +468,7 @@ def load_youtube_video(
468468 return docs
469469
470470@optional_typecheck
471- @loaddoc_cache .cache
471+ @doc_loaders_cache .cache
472472def load_online_pdf (debug : bool , task : str , path : str , ** kwargs ) -> List [Document ]:
473473 whi (f"Loading online pdf: '{ path } '" )
474474
@@ -733,7 +733,7 @@ def load_anki(
733733 return docs
734734
735735@optional_typecheck
736- @loaddoc_cache .cache
736+ @doc_loaders_cache .cache
737737def load_string () -> List [Document ]:
738738 whi ("Loading string" )
739739 content = prompt (
@@ -750,7 +750,7 @@ def load_string() -> List[Document]:
750750 return docs
751751
752752@optional_typecheck
753- @loaddoc_cache .cache (ignore = ["path" ])
753+ @doc_loaders_cache .cache (ignore = ["path" ])
754754def load_txt (path : str , file_hash : str ) -> List [Document ]:
755755 whi (f"Loading txt: '{ path } '" )
756756 assert Path (path ).exists (), f"file not found: '{ path } '"
@@ -760,7 +760,7 @@ def load_txt(path: str, file_hash: str) -> List[Document]:
760760 return docs
761761
762762@optional_typecheck
763- @loaddoc_cache .cache (ignore = ["path" ])
763+ @doc_loaders_cache .cache (ignore = ["path" ])
764764def load_local_html (
765765 path : str ,
766766 file_hash : str ,
@@ -810,7 +810,7 @@ def load_local_html(
810810 ]
811811 return docs
812812
813- @loaddoc_cache .cache
813+ @doc_loaders_cache .cache
814814def eval_load_functions (
815815 load_functions : str ,
816816 ) -> List [Callable ]:
@@ -827,7 +827,7 @@ def eval_load_functions(
827827
828828
829829@optional_typecheck
830- @loaddoc_cache .cache (ignore = ["path" ])
830+ @doc_loaders_cache .cache (ignore = ["path" ])
831831def load_logseq_markdown (debug : bool , path : str , file_hash : str ) -> List [Document ]:
832832 whi (f"Loading logseq markdown file: '{ path } '" )
833833 assert Path (path ).exists (), f"file not found: '{ path } '"
@@ -877,7 +877,7 @@ def load_logseq_markdown(debug: bool, path: str, file_hash: str) -> List[Documen
877877 return docs
878878
879879@optional_typecheck
880- @loaddoc_cache .cache (ignore = ["path" ])
880+ @doc_loaders_cache .cache (ignore = ["path" ])
881881def load_local_audio (
882882 path : str ,
883883 file_hash : str ,
@@ -987,7 +987,7 @@ def load_local_audio(
987987 return docs
988988
989989@optional_typecheck
990- @loaddoc_cache .cache (ignore = ["path" ])
990+ @doc_loaders_cache .cache (ignore = ["path" ])
991991def load_local_video (
992992 path : str ,
993993 file_hash : str ,
@@ -1046,7 +1046,7 @@ def load_local_video(
10461046
10471047
10481048@optional_typecheck
1049- @loaddoc_cache .cache (ignore = ["audio_path" ])
1049+ @doc_loaders_cache .cache (ignore = ["audio_path" ])
10501050def transcribe_audio_deepgram (
10511051 audio_path : str ,
10521052 audio_hash : str ,
@@ -1115,7 +1115,7 @@ def transcribe_audio_deepgram(
11151115 return d
11161116
11171117@optional_typecheck
1118- @loaddoc_cache .cache (ignore = ["audio_path" ])
1118+ @doc_loaders_cache .cache (ignore = ["audio_path" ])
11191119def transcribe_audio_whisper (
11201120 audio_path : str ,
11211121 audio_hash : str ,
@@ -1143,7 +1143,7 @@ def transcribe_audio_whisper(
11431143 return transcript
11441144
11451145@optional_typecheck
1146- @loaddoc_cache .cache (ignore = ["path" ])
1146+ @doc_loaders_cache .cache (ignore = ["path" ])
11471147def load_epub (
11481148 path : str ,
11491149 file_hash : str ,
@@ -1161,7 +1161,7 @@ def load_epub(
11611161 return docs
11621162
11631163@optional_typecheck
1164- @loaddoc_cache .cache (ignore = ["path" ])
1164+ @doc_loaders_cache .cache (ignore = ["path" ])
11651165def load_powerpoint (
11661166 path : str ,
11671167 file_hash : str ,
@@ -1178,7 +1178,7 @@ def load_powerpoint(
11781178 ]
11791179 return docs
11801180@optional_typecheck
1181- @loaddoc_cache .cache (ignore = ["path" ])
1181+ @doc_loaders_cache .cache (ignore = ["path" ])
11821182def load_word_document (
11831183 path : str ,
11841184 file_hash : str ,
@@ -1198,7 +1198,7 @@ def load_word_document(
11981198 return docs
11991199
12001200@optional_typecheck
1201- @loaddoc_cache .cache
1201+ @doc_loaders_cache .cache
12021202def load_url (path : str , title = None ) -> List [Document ]:
12031203 whi (f"Loading url: '{ path } '" )
12041204
@@ -1348,7 +1348,7 @@ def load_url(path: str, title=None) -> List[Document]:
13481348
13491349
13501350@optional_typecheck
1351- @loaddoc_cache .cache
1351+ @doc_loaders_cache .cache
13521352def load_youtube_playlist (playlist_url : str ) -> Any :
13531353 with youtube_dl .YoutubeDL ({"quiet" : False }) as ydl :
13541354 try :
@@ -1362,7 +1362,7 @@ def load_youtube_playlist(playlist_url: str) -> Any:
13621362
13631363
13641364@optional_typecheck
1365- @loaddoc_cache .cache (ignore = ["loader" ])
1365+ @doc_loaders_cache .cache (ignore = ["loader" ])
13661366def cached_yt_loader (
13671367 loader : Any ,
13681368 path : str ,
@@ -1380,7 +1380,7 @@ def cached_yt_loader(
13801380
13811381
13821382@optional_typecheck
1383- @loaddoc_cache .cache (ignore = ["path" ])
1383+ @doc_loaders_cache .cache (ignore = ["path" ])
13841384def _pdf_loader (loader_name : str , path : str , file_hash : str ) -> str :
13851385 loader = pdf_loaders [loader_name ](path )
13861386 content = loader .load ()
0 commit comments