forked from NVIDIA/TensorRT-LLM
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_llm_api.py
More file actions
200 lines (162 loc) · 7 KB
/
test_llm_api.py
File metadata and controls
200 lines (162 loc) · 7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
from types import MethodType
from typing import Optional
import pytest
from api_stability_core import (ApiStabilityTestHarness, ClassSnapshot,
MethodSnapshot)
from tensorrt_llm import LLM
from tensorrt_llm.bindings import executor as tllme
from tensorrt_llm.executor.result import IterationResult
from tensorrt_llm.llmapi import (CalibConfig, CompletionOutput,
GuidedDecodingParams, QuantConfig,
RequestOutput)
from tensorrt_llm.sampling_params import (BatchedLogitsProcessor,
LogitsProcessor, SamplingParams)
class TestSamplingParams(ApiStabilityTestHarness):
TEST_CLASS = SamplingParams
REFERENCE_FILE = "sampling_params.yaml"
def test_get_sampling_config(self):
expected_fields = {
"beam_width",
"beam_width_array",
"top_k",
"top_p",
"top_p_min",
"top_p_reset_ids",
"top_p_decay",
"seed",
"temperature",
"min_tokens",
"beam_search_diversity_rate",
"repetition_penalty",
"presence_penalty",
"frequency_penalty",
"prompt_ignore_length",
"length_penalty",
"early_stopping",
"no_repeat_ngram_size",
"num_return_sequences",
"min_p",
}
found_fields = {
f
for f in dir(tllme.SamplingConfig) if not f.startswith('__')
}
error_msg = (
"Found fields in `tllme.SamplingConfig` different than expected; "
f"if `tllme.SamplingConfig` is changed, please update {self.TEST_CLASS.__name__} accordingly."
)
assert found_fields == expected_fields, error_msg
def test_get_output_config(self):
expected_fields = {
"return_log_probs", "return_context_logits",
"return_generation_logits", "exclude_input_from_output",
"return_encoder_output", "return_perf_metrics",
"additional_model_outputs"
}
found_fields = {
f
for f in dir(tllme.OutputConfig) if not f.startswith('__')
}
error_msg = (
"Found fields in `tllme.OutputConfig` different than expected; "
f"if `tllme.OutputConfig` is changed, please update {self.TEST_CLASS.__name__} accordingly."
)
assert found_fields == expected_fields, error_msg
class TestGuidedDecodingParams(ApiStabilityTestHarness):
TEST_CLASS = GuidedDecodingParams
REFERENCE_FILE = "guided_decoding_params.yaml"
class TestLogitsProcessor(ApiStabilityTestHarness):
TEST_CLASS = LogitsProcessor
REFERENCE_FILE = "logits_processor.yaml"
def create_snapshot_from_inspect(self):
method_snapshot = MethodSnapshot.from_inspect(
MethodType(self.TEST_CLASS.__call__, object()))
return ClassSnapshot(methods={"__call__": method_snapshot},
properties={})
def create_snapshot_from_docstring(self):
method_snapshot = MethodSnapshot.from_docstring(
MethodType(self.TEST_CLASS.__call__, object()))
return ClassSnapshot(methods={"__call__": method_snapshot},
properties={})
class TestBatchedLogitsProcessor(ApiStabilityTestHarness):
TEST_CLASS = BatchedLogitsProcessor
REFERENCE_FILE = "batched_logits_processor.yaml"
def create_snapshot_from_inspect(self):
method_snapshot = MethodSnapshot.from_inspect(
MethodType(self.TEST_CLASS.__call__, object()))
return ClassSnapshot(methods={"__call__": method_snapshot},
properties={})
def create_snapshot_from_docstring(self):
method_snapshot = MethodSnapshot.from_docstring(
MethodType(self.TEST_CLASS.__call__, object()))
return ClassSnapshot(methods={"__call__": method_snapshot},
properties={})
class TestLLM(ApiStabilityTestHarness):
TEST_CLASS = LLM
REFERENCE_FILE = "llm.yaml"
def test_modified_init(self, mocker):
mocker.patch.object(self.TEST_CLASS,
"__init__",
new=lambda self, x: None)
with pytest.raises(AssertionError):
self.test_signature()
self.test_docstring()
def test_new_method(self, mocker):
mocker.patch.object(self.TEST_CLASS,
"new_method",
new=lambda self, x: None,
create=True)
with pytest.raises(AssertionError):
self.test_signature()
with pytest.raises(AssertionError):
self.test_docstring()
def test_modified_method_with_same_signature(self, mocker):
def new_get_stats_async(self,
timeout: Optional[float] = 2
) -> IterationResult:
pass
new_get_stats_async.__doc__ = self.TEST_CLASS.get_stats_async.__doc__
mocker.patch.object(self.TEST_CLASS,
"get_stats_async",
new=new_get_stats_async)
self.test_signature()
self.test_docstring()
def test_modified_method_with_modified_signature(self, mocker):
def new_get_stats_async(self,
timeout: Optional[int] = 2) -> IterationResult:
pass
mocker.patch.object(self.TEST_CLASS,
"get_stats_async",
new=new_get_stats_async)
with pytest.raises(AssertionError):
self.test_signature()
with pytest.raises(AssertionError):
self.test_docstring()
def test_modified_docstring(self, mocker):
mocker.patch.object(self.TEST_CLASS, "__doc__", new="")
self.test_signature()
with pytest.raises(AssertionError):
self.test_docstring()
def test_fine_grained_error(self):
# change the dtype of max_batch_size to float to trigger a fine-grained error
self.reference.methods["__init__"].parameters[
"max_batch_size"].annotation = "float"
with pytest.raises(AssertionError) as e:
self.test_signature()
assert "LLM.max_batch_size annotation: typing.Optional[int] != <class 'float'>" in str(
e.value.__cause__)
# restore the original dtype
self.reference.methods["__init__"].parameters[
"max_batch_size"].annotation = "int"
class TestCompletionOutput(ApiStabilityTestHarness):
TEST_CLASS = CompletionOutput
REFERENCE_FILE = "completion_output.yaml"
class TestRequestOutput(ApiStabilityTestHarness):
TEST_CLASS = RequestOutput
REFERENCE_FILE = "request_output.yaml"
class TestQuantConfig(ApiStabilityTestHarness):
TEST_CLASS = QuantConfig
REFERENCE_FILE = "quant_config.yaml"
class TestCalibConfig(ApiStabilityTestHarness):
TEST_CLASS = CalibConfig
REFERENCE_FILE = "calib_config.yaml"