Skip to content

Commit d35fb64

Browse files
feat(api): add max_tokens and temperature to eval judge parameters
1 parent f34ac96 commit d35fb64

3 files changed

Lines changed: 36 additions & 2 deletions

File tree

.stats.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 75
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-3056d7e8e77acd47415c236c6de4ddfd47809c810a8941374b680d8d7cbe653f.yml
3-
openapi_spec_hash: 4450b266f8537f2677a9a2ba0d9fe88c
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-e218fafc0c9b31bd98647d1e2de6decc55f8a7f9719b3b565f94939c2ebcf0df.yml
3+
openapi_spec_hash: 026cc585ef61f52d4d6c4b60b969e323
44
config_hash: 6c214c91fad5ead4849be777fd9e8108

src/together/types/eval_create_params.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,23 @@ class ParametersEvaluationClassifyParametersJudge(TypedDict, total=False):
5151
external_base_url: str
5252
"""Base URL for external judge models. Must be OpenAI-compatible base URL."""
5353

54+
max_tokens: int
55+
"""Maximum number of tokens the judge model can generate.
56+
57+
Defaults to 32768. Increase for reasoning models (e.g. Gemini, o-series) that
58+
consume output token budget for chain-of-thought.
59+
"""
60+
5461
num_workers: int
5562
"""Number of concurrent workers for inference requests.
5663
5764
Overrides the default concurrency for this model. Useful for tuning throughput
5865
when using proxy endpoints (e.g. OpenRouter) or rate-limited external APIs.
5966
"""
6067

68+
temperature: float
69+
"""Sampling temperature for the judge model. Defaults to 0.05."""
70+
6171

6272
class ParametersEvaluationClassifyParametersModelToEvaluateEvaluationModelRequest(TypedDict, total=False):
6373
input_template: Required[str]
@@ -129,13 +139,23 @@ class ParametersEvaluationScoreParametersJudge(TypedDict, total=False):
129139
external_base_url: str
130140
"""Base URL for external judge models. Must be OpenAI-compatible base URL."""
131141

142+
max_tokens: int
143+
"""Maximum number of tokens the judge model can generate.
144+
145+
Defaults to 32768. Increase for reasoning models (e.g. Gemini, o-series) that
146+
consume output token budget for chain-of-thought.
147+
"""
148+
132149
num_workers: int
133150
"""Number of concurrent workers for inference requests.
134151
135152
Overrides the default concurrency for this model. Useful for tuning throughput
136153
when using proxy endpoints (e.g. OpenRouter) or rate-limited external APIs.
137154
"""
138155

156+
temperature: float
157+
"""Sampling temperature for the judge model. Defaults to 0.05."""
158+
139159

140160
class ParametersEvaluationScoreParametersModelToEvaluateEvaluationModelRequest(TypedDict, total=False):
141161
input_template: Required[str]
@@ -210,13 +230,23 @@ class ParametersEvaluationCompareParametersJudge(TypedDict, total=False):
210230
external_base_url: str
211231
"""Base URL for external judge models. Must be OpenAI-compatible base URL."""
212232

233+
max_tokens: int
234+
"""Maximum number of tokens the judge model can generate.
235+
236+
Defaults to 32768. Increase for reasoning models (e.g. Gemini, o-series) that
237+
consume output token budget for chain-of-thought.
238+
"""
239+
213240
num_workers: int
214241
"""Number of concurrent workers for inference requests.
215242
216243
Overrides the default concurrency for this model. Useful for tuning throughput
217244
when using proxy endpoints (e.g. OpenRouter) or rate-limited external APIs.
218245
"""
219246

247+
temperature: float
248+
"""Sampling temperature for the judge model. Defaults to 0.05."""
249+
220250

221251
class ParametersEvaluationCompareParametersModelAEvaluationModelRequest(TypedDict, total=False):
222252
input_template: Required[str]

tests/api_resources/test_evals.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,9 @@ def test_method_create_with_all_params(self, client: Together) -> None:
5252
"system_template": "Imagine you are a helpful assistant",
5353
"external_api_token": "external_api_token",
5454
"external_base_url": "external_base_url",
55+
"max_tokens": 8192,
5556
"num_workers": 5,
57+
"temperature": 0,
5658
},
5759
"labels": ["yes", "no"],
5860
"pass_labels": ["yes"],
@@ -253,7 +255,9 @@ async def test_method_create_with_all_params(self, async_client: AsyncTogether)
253255
"system_template": "Imagine you are a helpful assistant",
254256
"external_api_token": "external_api_token",
255257
"external_base_url": "external_base_url",
258+
"max_tokens": 8192,
256259
"num_workers": 5,
260+
"temperature": 0,
257261
},
258262
"labels": ["yes", "no"],
259263
"pass_labels": ["yes"],

0 commit comments

Comments
 (0)