Skip to content

Commit b7d72e8

Browse files
Merge pull request #467 from afuetterer/version-info
refactor: remove python 2 back compat in unpack module
2 parents 98fb67f + 5c354a9 commit b7d72e8

1 file changed

Lines changed: 15 additions & 37 deletions

File tree

tika/unpack.py

Lines changed: 15 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,7 @@
2222

2323
from .tika import ServerEndpoint, callServer, parse1
2424

25-
# Python 3 introduced .readable() to tarfile extracted files objects - this
26-
# is required to wrap a TextIOWrapper around the object. However, wrapping
27-
# with TextIOWrapper is only required for csv.reader() in Python 3, so the
28-
# tarfile returned object can be used as is in earlier versions.
29-
_text_wrapper = TextIOWrapper if version_info.major >= 3 else lambda x: x
25+
_text_wrapper = TextIOWrapper
3026

3127

3228
def from_file(filename, serverEndpoint=ServerEndpoint, requestOptions={}):
@@ -80,32 +76,18 @@ def _parse(tarOutput):
8076

8177
metadataMember = tarFile.getmember("__METADATA__")
8278
if not metadataMember.issym() and metadataMember.isfile():
83-
if version_info.major >= 3:
84-
with closing(_text_wrapper(tarFile.extractfile(metadataMember), encoding=tarFile.encoding)) as metadataFile:
85-
metadataReader = csv.reader(_truncate_nulls(metadataFile))
86-
for metadataLine in metadataReader:
87-
# each metadata line comes as a key-value pair, with list values
88-
# returned as extra values in the line - convert single values
89-
# to non-list values to be consistent with parser metadata
90-
assert len(metadataLine) >= 2
91-
92-
if len(metadataLine) > 2:
93-
metadata[metadataLine[0]] = metadataLine[1:]
94-
else:
95-
metadata[metadataLine[0]] = metadataLine[1]
96-
else:
97-
with closing(_text_wrapper(tarFile.extractfile(metadataMember))) as metadataFile:
98-
metadataReader = csv.reader(_truncate_nulls(metadataFile))
99-
for metadataLine in metadataReader:
100-
# each metadata line comes as a key-value pair, with list values
101-
# returned as extra values in the line - convert single values
102-
# to non-list values to be consistent with parser metadata
103-
assert len(metadataLine) >= 2
104-
105-
if len(metadataLine) > 2:
106-
metadata[metadataLine[0]] = metadataLine[1:]
107-
else:
108-
metadata[metadataLine[0]] = metadataLine[1]
79+
with closing(_text_wrapper(tarFile.extractfile(metadataMember), encoding=tarFile.encoding)) as metadataFile:
80+
metadataReader = csv.reader(_truncate_nulls(metadataFile))
81+
for metadataLine in metadataReader:
82+
# each metadata line comes as a key-value pair, with list values
83+
# returned as extra values in the line - convert single values
84+
# to non-list values to be consistent with parser metadata
85+
assert len(metadataLine) >= 2
86+
87+
if len(metadataLine) > 2:
88+
metadata[metadataLine[0]] = metadataLine[1:]
89+
else:
90+
metadata[metadataLine[0]] = metadataLine[1]
10991

11092

11193
# get the content
@@ -115,12 +97,8 @@ def _parse(tarOutput):
11597

11698
contentMember = tarFile.getmember("__TEXT__")
11799
if not contentMember.issym() and contentMember.isfile():
118-
if version_info.major >= 3:
119-
with closing(_text_wrapper(tarFile.extractfile(contentMember), encoding='utf8')) as content_file:
120-
content = content_file.read()
121-
else:
122-
with closing(tarFile.extractfile(contentMember)) as content_file:
123-
content = content_file.read().decode('utf8')
100+
with closing(_text_wrapper(tarFile.extractfile(contentMember), encoding='utf8')) as content_file:
101+
content = content_file.read()
124102

125103
# get the remaining files as attachments
126104
attachments = {}

0 commit comments

Comments
 (0)