Skip to content

Commit c106709

Browse files
committed
test: convert unittest to pytest functions
1 parent 5c575ca commit c106709

1 file changed

Lines changed: 53 additions & 50 deletions

File tree

tests/test_from_file_service.py

Lines changed: 53 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -14,56 +14,59 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616
#
17-
# python -m unittest tika.tests.test_from_file_service
1817

19-
import unittest
2018
from unittest import mock
2119

22-
import tika.parser
23-
24-
25-
class CreateTest(unittest.TestCase):
26-
'test different services in from_file parsing: Content, Metadata or both in recursive mode'
27-
28-
def test_default_service(self):
29-
'parse file using default service'
30-
result = tika.parser.from_file(
31-
'https://boe.es/boe/dias/2019/12/02/pdfs/BOE-A-2019-17288.pdf')
32-
self.assertEqual(result['metadata']['Content-Type'],'application/pdf')
33-
self.assertIn('AUTORIDADES Y PERSONAL',result['content'])
34-
@mock.patch('tika.parser._parse')
35-
@mock.patch('tika.parser.parse1')
36-
def test_remote_endpoint(self, tika_call_mock, _):
37-
result = tika.parser.from_file(
38-
'filename', 'http://tika:9998/tika')
39-
40-
tika_call_mock.assert_called_with(
41-
'all', 'filename', 'http://tika:9998/tika', headers=None, config_path=None,
42-
requestOptions={})
43-
def test_default_service_explicit(self):
44-
'parse file using default service explicitly'
45-
result = tika.parser.from_file(
46-
'https://boe.es/boe/dias/2019/12/02/pdfs/BOE-A-2019-17288.pdf', service='all')
47-
self.assertEqual(result['metadata']['Content-Type'],'application/pdf')
48-
self.assertIn('AUTORIDADES Y PERSONAL',result['content'])
49-
def test_text_service(self):
50-
'parse file using the content only service'
51-
result = tika.parser.from_file(
52-
'https://boe.es/boe/dias/2019/12/02/pdfs/BOE-A-2019-17288.pdf', service='text')
53-
self.assertIsNone(result['metadata'])
54-
self.assertIn('AUTORIDADES Y PERSONAL',result['content'])
55-
def test_meta_service(self):
56-
'parse file using the content only service'
57-
result = tika.parser.from_file(
58-
'https://boe.es/boe/dias/2019/12/02/pdfs/BOE-A-2019-17288.pdf', service='meta')
59-
self.assertIsNone(result['content'])
60-
self.assertEqual(result['metadata']['Content-Type'],'application/pdf')
61-
def test_invalid_service(self):
62-
'parse file using an invalid service should perform the default parsing'
63-
result = tika.parser.from_file(
64-
'https://boe.es/boe/dias/2019/12/02/pdfs/BOE-A-2019-17288.pdf', service='bad')
65-
self.assertEqual(result['metadata']['Content-Type'],'application/pdf')
66-
self.assertIn('AUTORIDADES Y PERSONAL',result['content'])
67-
68-
if __name__ == '__main__':
69-
unittest.main()
20+
from tika import parser
21+
22+
TEST_PDF_URL = "https://boe.es/boe/dias/2019/12/02/pdfs/BOE-A-2019-17288.pdf"
23+
24+
25+
def test_default_service():
26+
"parse file using default service"
27+
result = parser.from_file(TEST_PDF_URL)
28+
assert result["metadata"]["Content-Type"] == "application/pdf"
29+
assert "AUTORIDADES Y PERSONAL" in result["content"]
30+
31+
32+
@mock.patch("tika.parser._parse")
33+
@mock.patch("tika.parser.parse1")
34+
def test_remote_endpoint(tika_call_mock, _):
35+
result = parser.from_file("filename", "http://tika:9998/tika")
36+
37+
tika_call_mock.assert_called_with(
38+
"all",
39+
"filename",
40+
"http://tika:9998/tika",
41+
headers=None,
42+
config_path=None,
43+
requestOptions={},
44+
)
45+
46+
47+
def test_default_service_explicit():
48+
"parse file using default service explicitly"
49+
result = parser.from_file(TEST_PDF_URL, service="all")
50+
assert result["metadata"]["Content-Type"] == "application/pdf"
51+
assert "AUTORIDADES Y PERSONAL" in result["content"]
52+
53+
54+
def test_text_service():
55+
"parse file using the content only service"
56+
result = parser.from_file(TEST_PDF_URL, service="text")
57+
assert result["metadata"] is None
58+
assert "AUTORIDADES Y PERSONAL" in result["content"]
59+
60+
61+
def test_meta_service():
62+
"parse file using the content only service"
63+
result = parser.from_file(TEST_PDF_URL, service="meta")
64+
assert result["content"] is None
65+
assert result["metadata"]["Content-Type"] == "application/pdf"
66+
67+
68+
def test_invalid_service():
69+
"parse file using an invalid service should perform the default parsing"
70+
result = parser.from_file(TEST_PDF_URL, service="bad")
71+
assert result["metadata"]["Content-Type"] == "application/pdf"
72+
assert "AUTORIDADES Y PERSONAL" in result["content"]

0 commit comments

Comments
 (0)