@@ -382,7 +382,6 @@ def load_one_doc(
382382 """choose the appropriate loader for a file, then load it,
383383 split into documents, add some metadata then return.
384384 The loader is cached"""
385- debug = is_debug
386385 text_splitter = get_splitter (task , modelname = llm_name )
387386 assert kwargs , "Received an empty dict of arguments to load. Maybe --path is empty?"
388387
@@ -404,7 +403,6 @@ def load_one_doc(
404403
405404 elif filetype == "pdf" :
406405 docs = load_pdf (
407- debug = debug ,
408406 text_splitter = text_splitter ,
409407 file_hash = file_hash ,
410408 doccheck_min_lang_prob = doccheck_min_lang_prob ,
@@ -416,7 +414,6 @@ def load_one_doc(
416414 elif filetype == "online_pdf" :
417415 docs = load_online_pdf (
418416 text_splitter = text_splitter ,
419- debug = debug ,
420417 file_hash = file_hash ,
421418 doccheck_min_lang_prob = doccheck_min_lang_prob ,
422419 doccheck_min_token = doccheck_min_token ,
@@ -456,7 +453,6 @@ def load_one_doc(
456453
457454 elif filetype == "logseq_markdown" :
458455 docs = load_logseq_markdown (
459- debug = debug ,
460456 file_hash = file_hash ,
461457 text_splitter = text_splitter ,
462458 ** kwargs ,
@@ -797,7 +793,6 @@ def load_youtube_video(
797793def load_online_pdf (
798794 path : str ,
799795 text_splitter : TextSplitter ,
800- debug : bool ,
801796 file_hash : str ,
802797 pdf_parsers : Union [str , List [str ]] = 'pymupdf' , # used only if online loading fails
803798 doccheck_min_lang_prob : float = min_lang_prob ,
@@ -835,7 +830,6 @@ def load_online_pdf(
835830 docs = load_pdf (
836831 path = temp_file .name ,
837832 text_splitter = text_splitter ,
838- debug = debug ,
839833 file_hash = file_hasher ({"path" : temp_file .name }),
840834 pdf_parsers = pdf_parsers ,
841835 doccheck_min_lang_prob = doccheck_min_lang_prob ,
@@ -1542,7 +1536,6 @@ def eval_load_functions(
15421536@optional_strip_unexp_args
15431537@doc_loaders_cache .cache (ignore = ["path" ])
15441538def load_logseq_markdown (
1545- debug : bool ,
15461539 path : str ,
15471540 file_hash : str ,
15481541 text_splitter : TextSplitter ,
@@ -2264,7 +2257,6 @@ def _pdf_loader(loader_name: str, path: str, file_hash: str) -> List[Document]:
22642257def load_pdf (
22652258 path : str ,
22662259 text_splitter : TextSplitter ,
2267- debug : bool ,
22682260 file_hash : str ,
22692261 pdf_parsers : Union [str , List [str ]] = 'pymupdf' ,
22702262 doccheck_min_lang_prob : float = min_lang_prob ,
@@ -2304,7 +2296,7 @@ def load_pdf(
23042296 for loader_name in pdf_parsers :
23052297 pbar .desc = f"Parsing PDF { name } with { loader_name } "
23062298 try :
2307- if debug :
2299+ if is_debug :
23082300 red (f"Trying to parse { path } using { loader_name } " )
23092301
23102302 if pdf_loader_max_timeout > 0 :
@@ -2390,7 +2382,7 @@ def load_pdf(
23902382
23912383 max_prob = max ([v for v in probs .values ()])
23922384
2393- if debug :
2385+ if is_debug :
23942386 yel (f"Language probability after parsing { path } : { probs } " )
23952387
23962388 return loaded_docs [[name for name in probs if probs [name ] == max_prob ][0 ]]
0 commit comments