Skip to content

Commit 5492853

Browse files
committed
test: restructure tests
1 parent 0cb1e18 commit 5492853

File tree

6 files changed

+65
-69
lines changed

6 files changed

+65
-69
lines changed

tests/conftest.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from pathlib import Path
2+
3+
import pytest
4+
5+
6+
@pytest.fixture
7+
def test_file_path():
8+
return Path(__file__).parent / "files" / "rwservlet.pdf"
9+

tests/test_detector.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,13 @@
1-
from pathlib import Path
2-
31
from tika import detector
42

5-
TEST_FILE_PATH = Path(__file__).parent / "files" / "rwservlet.pdf"
6-
73

8-
def test_local_binary():
9-
with open(TEST_FILE_PATH, "rb") as file_obj:
4+
def test_local_binary(test_file_path):
5+
with open(test_file_path, "rb") as file_obj:
106
assert detector.from_file(file_obj) == "application/pdf"
117

128

13-
def test_local_path():
14-
assert detector.from_file(str(TEST_FILE_PATH)) == "application/pdf"
9+
def test_local_path(test_file_path):
10+
assert detector.from_file(str(test_file_path)) == "application/pdf"
1511

1612

1713
def test_local_buffer():

tests/test_language.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,13 @@
1-
from pathlib import Path
2-
31
from tika import language
42

5-
TEST_FILE_PATH = Path(__file__).parent / "files" / "rwservlet.pdf"
6-
73

8-
def test_local_binary():
9-
with open(TEST_FILE_PATH, "rb") as file_obj:
4+
def test_local_binary(test_file_path):
5+
with open(test_file_path, "rb") as file_obj:
106
assert language.from_file(file_obj) == "en"
117

128

13-
def test_local_path():
14-
assert language.from_file(str(TEST_FILE_PATH)) == "en"
9+
def test_local_path(test_file_path):
10+
assert language.from_file(str(test_file_path)) == "en"
1511

1612

1713
def test_local_buffer():

tests/test_parser.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from http import HTTPStatus
2+
3+
from tika import parser
4+
5+
6+
def test_remote_pdf():
7+
"""parse remote PDF"""
8+
assert parser.from_file(
9+
"https://upload.wikimedia.org/wikipedia/commons/4/42/Article_feedback_flow_B_-_Thank_editors.pdf")
10+
11+
12+
def test_remote_html():
13+
"""parse remote HTML"""
14+
assert parser.from_file("http://nossl.sh")
15+
16+
17+
def test_remote_mp3():
18+
"""parse remote mp3"""
19+
assert parser.from_file(
20+
"https://archive.org/download/Ainst-Spaceshipdemo.mp3/Ainst-Spaceshipdemo.mp3")
21+
22+
23+
def test_remote_jpg():
24+
"""parse remote jpg"""
25+
assert parser.from_file(
26+
"https://upload.wikimedia.org/wikipedia/commons/b/b7/X_logo.jpg")
27+
28+
29+
def test_local_binary(test_file_path):
30+
"""parse file binary"""
31+
with open(test_file_path, "rb") as file_obj:
32+
assert parser.from_file(file_obj)
33+
34+
35+
def test_local_buffer():
36+
response = parser.from_buffer("Good evening, Dave")
37+
assert response["status"] == HTTPStatus.OK
38+
39+
40+
def test_local_path(test_file_path):
41+
"""parse file path"""
42+
assert parser.from_file(str(test_file_path))
43+

tests/test_pdf.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
1-
from pathlib import Path
2-
31
from tika import pdf
42

5-
TEST_FILE_PATH = Path(__file__).parent / "files" / "rwservlet.pdf"
6-
73

8-
def test_local_path():
9-
text_pages = pdf.text_from_pdf_pages(str(TEST_FILE_PATH))
4+
def test_local_path(test_file_path):
5+
text_pages = pdf.text_from_pdf_pages(str(test_file_path))
106
assert isinstance(text_pages, list)

tests/test_tika.py

Lines changed: 3 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -14,56 +14,12 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616

17-
from http import HTTPStatus
18-
from pathlib import Path
19-
20-
import tika.parser
2117
import tika.tika
22-
23-
TEST_FILE_PATH = Path(__file__).parent / "files" / "rwservlet.pdf"
24-
25-
26-
def test_remote_pdf():
27-
"""parse remote PDF"""
28-
assert tika.parser.from_file(
29-
"https://upload.wikimedia.org/wikipedia/commons/4/42/Article_feedback_flow_B_-_Thank_editors.pdf")
30-
31-
32-
def test_remote_html():
33-
"""parse remote HTML"""
34-
assert tika.parser.from_file("http://nossl.sh")
35-
36-
37-
def test_remote_mp3():
38-
"""parse remote mp3"""
39-
assert tika.parser.from_file(
40-
"https://archive.org/download/Ainst-Spaceshipdemo.mp3/Ainst-Spaceshipdemo.mp3")
41-
42-
43-
def test_remote_jpg():
44-
"""parse remote jpg"""
45-
assert tika.parser.from_file(
46-
"https://upload.wikimedia.org/wikipedia/commons/b/b7/X_logo.jpg")
47-
48-
49-
def test_local_binary():
50-
"""parse file binary"""
51-
with open(TEST_FILE_PATH, "rb") as file_obj:
52-
assert tika.parser.from_file(file_obj)
53-
54-
55-
def test_local_buffer():
56-
response = tika.parser.from_buffer("Good evening, Dave")
57-
assert response["status"] == HTTPStatus.OK
58-
59-
60-
def test_local_path():
61-
"""parse file path"""
62-
assert tika.parser.from_file(str(TEST_FILE_PATH))
18+
from tika import parser
6319

6420

65-
def test_kill_server():
21+
def test_kill_server(test_file_path):
6622
"""parse some file then kills server"""
67-
with open(TEST_FILE_PATH, "rb") as file_obj:
23+
with open(test_file_path, "rb") as file_obj:
6824
tika.parser.from_file(file_obj)
6925
assert tika.tika.killServer() is None

0 commit comments

Comments
 (0)