-
Notifications
You must be signed in to change notification settings - Fork 2.8k
Expand file tree
/
Copy pathtest_issue_163.py
More file actions
135 lines (114 loc) · 5.76 KB
/
test_issue_163.py
File metadata and controls
135 lines (114 loc) · 5.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import pytest
import sys
import os
from unittest.mock import patch, MagicMock
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from pageindex.page_index import (
check_if_toc_extraction_is_complete,
check_if_toc_transformation_is_complete,
toc_detector_single_page,
detect_page_index,
extract_toc_content,
toc_transformer,
)
class TestRobustKeyAccess:
@patch("pageindex.page_index.llm_completion", return_value="")
def test_toc_detector_empty_response(self, mock_llm):
result = toc_detector_single_page("some content", model="test")
assert result == "no"
@patch("pageindex.page_index.llm_completion", return_value='{"toc_detected": "yes"}')
def test_toc_detector_valid_response(self, mock_llm):
result = toc_detector_single_page("some content", model="test")
assert result == "yes"
@patch("pageindex.page_index.llm_completion", return_value="not json at all")
def test_toc_detector_malformed_response(self, mock_llm):
result = toc_detector_single_page("some content", model="test")
assert result == "no"
@patch("pageindex.page_index.llm_completion", return_value="")
def test_extraction_complete_empty_response(self, mock_llm):
result = check_if_toc_extraction_is_complete("doc", "toc", model="test")
assert result == "no"
@patch("pageindex.page_index.llm_completion", return_value='{"completed": "yes"}')
def test_extraction_complete_valid_response(self, mock_llm):
result = check_if_toc_extraction_is_complete("doc", "toc", model="test")
assert result == "yes"
@patch("pageindex.page_index.llm_completion", return_value="")
def test_transformation_complete_empty_response(self, mock_llm):
result = check_if_toc_transformation_is_complete("raw", "cleaned", model="test")
assert result == "no"
@patch("pageindex.page_index.llm_completion", return_value='{"thinking": "looks fine", "completed": "yes"}')
def test_transformation_complete_valid_response(self, mock_llm):
result = check_if_toc_transformation_is_complete("raw", "cleaned", model="test")
assert result == "yes"
@patch("pageindex.page_index.llm_completion", return_value="")
def test_detect_page_index_empty_response(self, mock_llm):
result = detect_page_index("toc text", model="test")
assert result == "no"
class TestExtractTocContentRetryLoop:
@patch("pageindex.page_index.check_if_toc_transformation_is_complete")
@patch("pageindex.page_index.llm_completion")
def test_completes_on_first_try(self, mock_llm, mock_check):
mock_llm.return_value = ("full toc content", "finished")
mock_check.return_value = "yes"
result = extract_toc_content("raw content", model="test")
assert result == "full toc content"
assert mock_llm.call_count == 1
@patch("pageindex.page_index.check_if_toc_transformation_is_complete")
@patch("pageindex.page_index.llm_completion")
def test_continues_on_incomplete(self, mock_llm, mock_check):
mock_llm.side_effect = [
("partial toc", "max_output_reached"),
(" continued toc", "finished"),
]
mock_check.side_effect = ["no", "yes"]
result = extract_toc_content("raw content", model="test")
assert result == "partial toc continued toc"
assert mock_llm.call_count == 2
@patch("pageindex.page_index.check_if_toc_transformation_is_complete")
@patch("pageindex.page_index.llm_completion")
def test_max_retries_raises_exception(self, mock_llm, mock_check):
mock_llm.return_value = ("chunk", "max_output_reached")
mock_check.return_value = "no"
with pytest.raises(Exception, match="Failed to complete table of contents extraction"):
extract_toc_content("raw content", model="test")
assert mock_llm.call_count == 6
@patch("pageindex.page_index.check_if_toc_transformation_is_complete")
@patch("pageindex.page_index.llm_completion")
def test_chat_history_grows_incrementally(self, mock_llm, mock_check):
call_count = [0]
def side_effect(*args, **kwargs):
call_count[0] += 1
if call_count[0] == 1:
return ("initial", "max_output_reached")
if call_count[0] == 2:
history = kwargs.get("chat_history", [])
assert len(history) == 2
return (" part2", "max_output_reached")
if call_count[0] == 3:
history = kwargs.get("chat_history", [])
assert len(history) == 4
return (" part3", "finished")
return ("", "finished")
mock_llm.side_effect = side_effect
mock_check.side_effect = ["no", "no", "yes"]
result = extract_toc_content("raw content", model="test")
assert result == "initial part2 part3"
class TestTocTransformerRetryLoop:
@patch("pageindex.page_index.check_if_toc_transformation_is_complete")
@patch("pageindex.page_index.llm_completion")
def test_completes_on_first_try(self, mock_llm, mock_check):
mock_llm.return_value = (
'{"table_of_contents": [{"structure": "1", "title": "Intro", "page": 1}]}',
"finished",
)
mock_check.return_value = "yes"
result = toc_transformer("raw toc", model="test")
assert len(result) == 1
assert result[0]["title"] == "Intro"
@patch("pageindex.page_index.check_if_toc_transformation_is_complete")
@patch("pageindex.page_index.llm_completion")
def test_handles_missing_table_of_contents_key(self, mock_llm, mock_check):
mock_llm.return_value = ('{"other_key": "value"}', "finished")
mock_check.return_value = "yes"
result = toc_transformer("raw toc", model="test")
assert result == []