1515from colrev .constants import Fields
1616from colrev .constants import RecordState
1717from colrev .review_manager import ReviewManager
18-
18+ import colrev . exceptions as colrev_exceptions
1919
2020def _extract_references_from_records (
2121 review_manager : ReviewManager ,
@@ -28,27 +28,40 @@ def _extract_references_from_records(
2828 if Fields .FILE not in record :
2929 continue
3030
31- tei = colrev .env .tei_parser .TEIParser (
32- pdf_path = Path (record [Fields .FILE ]),
33- tei_path = Path (
34- record [Fields .FILE ]
35- .replace (".pdf" , ".tei.xml" )
36- .replace ("/pdfs" , "/.tei" )
37- ),
38- )
39- refs = tei .get_references (add_intext_citation_count = True )
40- for ref in refs :
41- ref [Fields .ID ] = record ["ID" ] + "_" + ref [Fields .ID ]
42-
43- refs_list .extend (refs )
44- col = Colors .GREEN
45- if len (refs ) == 0 :
46- col = Colors .RED
47- elif len (refs ) < 10 :
48- col = Colors .ORANGE
49- review_manager .logger .info (
50- f" extracted { col } { str (len (refs )).rjust (4 )} { Colors .END } references from { record [Fields .FILE ]} "
51- )
31+ try :
32+ tei = colrev .env .tei_parser .TEIParser (
33+ pdf_path = Path (record [Fields .FILE ]),
34+ tei_path = Path (
35+ record [Fields .FILE ]
36+ .replace (".pdf" , ".tei.xml" )
37+ .replace ("/pdfs" , "/.tei" )
38+ ),
39+ )
40+ refs = tei .get_references (add_intext_citation_count = True )
41+ for ref in refs :
42+ ref [Fields .ID ] = record ["ID" ] + "_" + ref [Fields .ID ]
43+
44+ refs_list .extend (refs )
45+ col = Colors .GREEN
46+ if len (refs ) == 0 :
47+ col = Colors .RED
48+ elif len (refs ) < 10 :
49+ col = Colors .ORANGE
50+ review_manager .logger .info (
51+ f" extracted { col } { str (len (refs )).rjust (4 )} { Colors .END } references from { record [Fields .FILE ]} "
52+ )
53+ except FileNotFoundError :
54+ review_manager .logger .warning (
55+ f"Could not find TEI file for { record [Fields .FILE ]} : "
56+ "Please check the TEI file or the PDF."
57+ )
58+ continue
59+ except colrev_exceptions .TEIException :
60+ review_manager .logger .warning (
61+ f"Could not extract references from { record [Fields .FILE ]} : "
62+ "Please check the TEI file or the PDF."
63+ )
64+ continue
5265
5366 # Create a DataFrame from the flattened list of references
5467 df_all_references = pd .DataFrame (refs_list )
0 commit comments