66import re
77import typing
88from pathlib import Path
9- from xml .etree .ElementTree import ElementTree as StdElementTree
10- from xml .etree .ElementTree import ParseError
11- from xml .etree .ElementTree import register_namespace
12- from xml .etree .ElementTree import tostring
9+ from xml .etree .ElementTree import (
10+ register_namespace ,
11+ ) # nosec B405 - namespace registration only
1312
1413import requests
1514from defusedxml import ElementTree as DefusedET
15+ from defusedxml .common import DefusedXmlException
1616
1717import colrev .env .grobid_service
1818import colrev .exceptions as colrev_exceptions
@@ -147,8 +147,8 @@ def _create_tei(self) -> None:
147147 xml_fstring = file .read ()
148148 self .root = DefusedET .fromstring (xml_fstring )
149149
150- tree = StdElementTree (self .root )
151- tree .write (str (self .tei_path ) , encoding = "utf-8" )
150+ with open (self .tei_path , "wb" ) as file :
151+ file .write (DefusedET . tostring (self .root , encoding = "utf-8" ) )
152152 except requests .exceptions .ConnectionError as exc : # pragma: no cover
153153 print (exc )
154154 print (str (self .pdf_path ))
@@ -158,8 +158,8 @@ def get_tei_str(self) -> str:
158158 """Get the TEI string."""
159159 try :
160160 register_namespace ("tei" , "http://www.tei-c.org/ns/1.0" )
161- return tostring (self .root , encoding = "unicode" )
162- except ParseError as exc : # pragma: no cover
161+ return DefusedET . tostring (self .root , encoding = "unicode" )
162+ except ( DefusedET . ParseError , DefusedXmlException ) as exc : # pragma: no cover
163163 raise colrev_exceptions .TEIException from exc
164164
165165 def get_grobid_version (self ) -> str :
@@ -731,8 +731,8 @@ def mark_references(self, *, records: dict): # type: ignore
731731 # if settings file available: dedupe_io match agains records
732732
733733 if self .tei_path :
734- tree = StdElementTree (self .root )
735- tree .write (str (self .tei_path ))
734+ with open (self .tei_path , "wb" ) as file :
735+ file .write (DefusedET . tostring (self .root , encoding = "utf-8" ))
736736
737737 return self .root
738738
0 commit comments