@@ -45,7 +45,7 @@ def extract_pdf_info(
4545 debug_pages : int | None = raw_dp if isinstance (raw_dp , int ) and raw_dp > 0 else None
4646
4747 # 1) Extract full text (headers/footers filtered)
48- full_text : str = extract_pdf_text (pdf_path , debug = debug , debug_pages = debug_pages )
48+ full_text : str = extract_pdf_text (p , debug = debug , debug_pages = debug_pages )
4949
5050 # Precompute normalized text once (used by decision + metrics)
5151 text_norm : str = normalize_text (full_text )
@@ -56,7 +56,7 @@ def extract_pdf_info(
5656 # 2) Build a light "masthead" blob from first couple of pages
5757 masthead_lines : list [str ] = []
5858 try :
59- for i , layout in enumerate (extract_pages (str (pdf_path ))):
59+ for i , layout in enumerate (extract_pages (str (p ))):
6060 if not isinstance (layout , LTPage ):
6161 continue
6262 if i >= 2 :
@@ -92,8 +92,8 @@ def extract_pdf_info(
9292 # 5) Compose record
9393 record : dict [str , Any ] = {
9494 "filename" : p .name ,
95- "path" : str (pdf_path ),
96- "pages" : count_pages (pdf_path ),
95+ "path" : str (p ),
96+ "pages" : count_pages (p ),
9797 ** header ,
9898 "articles" : articles_ordered ,
9999 }
0 commit comments