Skip to content

Commit 5c354a9

Browse files
committed
refactor: remove python 2 back compat in unpack module
1 parent 0cb1e18 commit 5c354a9

1 file changed

Lines changed: 15 additions & 37 deletions

File tree

tika/unpack.py

Lines changed: 15 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,7 @@
2323

2424
from .tika import ServerEndpoint, callServer, parse1
2525

26-
# Python 3 introduced .readable() to tarfile extracted files objects - this
27-
# is required to wrap a TextIOWrapper around the object. However, wrapping
28-
# with TextIOWrapper is only required for csv.reader() in Python 3, so the
29-
# tarfile returned object can be used as is in earlier versions.
30-
_text_wrapper = TextIOWrapper if version_info.major >= 3 else lambda x: x
26+
_text_wrapper = TextIOWrapper
3127

3228

3329
def from_file(filename, serverEndpoint=ServerEndpoint, requestOptions={}):
@@ -81,32 +77,18 @@ def _parse(tarOutput):
8177

8278
metadataMember = tarFile.getmember("__METADATA__")
8379
if not metadataMember.issym() and metadataMember.isfile():
84-
if version_info.major >= 3:
85-
with closing(_text_wrapper(tarFile.extractfile(metadataMember), encoding=tarFile.encoding)) as metadataFile:
86-
metadataReader = csv.reader(_truncate_nulls(metadataFile))
87-
for metadataLine in metadataReader:
88-
# each metadata line comes as a key-value pair, with list values
89-
# returned as extra values in the line - convert single values
90-
# to non-list values to be consistent with parser metadata
91-
assert len(metadataLine) >= 2
92-
93-
if len(metadataLine) > 2:
94-
metadata[metadataLine[0]] = metadataLine[1:]
95-
else:
96-
metadata[metadataLine[0]] = metadataLine[1]
97-
else:
98-
with closing(_text_wrapper(tarFile.extractfile(metadataMember))) as metadataFile:
99-
metadataReader = csv.reader(_truncate_nulls(metadataFile))
100-
for metadataLine in metadataReader:
101-
# each metadata line comes as a key-value pair, with list values
102-
# returned as extra values in the line - convert single values
103-
# to non-list values to be consistent with parser metadata
104-
assert len(metadataLine) >= 2
105-
106-
if len(metadataLine) > 2:
107-
metadata[metadataLine[0]] = metadataLine[1:]
108-
else:
109-
metadata[metadataLine[0]] = metadataLine[1]
80+
with closing(_text_wrapper(tarFile.extractfile(metadataMember), encoding=tarFile.encoding)) as metadataFile:
81+
metadataReader = csv.reader(_truncate_nulls(metadataFile))
82+
for metadataLine in metadataReader:
83+
# each metadata line comes as a key-value pair, with list values
84+
# returned as extra values in the line - convert single values
85+
# to non-list values to be consistent with parser metadata
86+
assert len(metadataLine) >= 2
87+
88+
if len(metadataLine) > 2:
89+
metadata[metadataLine[0]] = metadataLine[1:]
90+
else:
91+
metadata[metadataLine[0]] = metadataLine[1]
11092

11193

11294
# get the content
@@ -116,12 +98,8 @@ def _parse(tarOutput):
11698

11799
contentMember = tarFile.getmember("__TEXT__")
118100
if not contentMember.issym() and contentMember.isfile():
119-
if version_info.major >= 3:
120-
with closing(_text_wrapper(tarFile.extractfile(contentMember), encoding='utf8')) as content_file:
121-
content = content_file.read()
122-
else:
123-
with closing(tarFile.extractfile(contentMember)) as content_file:
124-
content = content_file.read().decode('utf8')
101+
with closing(_text_wrapper(tarFile.extractfile(contentMember), encoding='utf8')) as content_file:
102+
content = content_file.read()
125103

126104
# get the remaining files as attachments
127105
attachments = {}

0 commit comments

Comments
 (0)