feat(api): add max_tokens and temperature to eval judge parameters

stainless-app[bot] · stainless-app[bot] · commit d35fb643b2cd · 2026-05-08T02:29:43.000Z
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 75
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-3056d7e8e77acd47415c236c6de4ddfd47809c810a8941374b680d8d7cbe653f.yml
-openapi_spec_hash: 4450b266f8537f2677a9a2ba0d9fe88c
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-e218fafc0c9b31bd98647d1e2de6decc55f8a7f9719b3b565f94939c2ebcf0df.yml
+openapi_spec_hash: 026cc585ef61f52d4d6c4b60b969e323
 config_hash: 6c214c91fad5ead4849be777fd9e8108
diff --git a/src/together/types/eval_create_params.py b/src/together/types/eval_create_params.py
@@ -51,13 +51,23 @@ class ParametersEvaluationClassifyParametersJudge(TypedDict, total=False):
     external_base_url: str
     """Base URL for external judge models. Must be OpenAI-compatible base URL."""
 
+    max_tokens: int
+    """Maximum number of tokens the judge model can generate.
+
+    Defaults to 32768. Increase for reasoning models (e.g. Gemini, o-series) that
+    consume output token budget for chain-of-thought.
+    """
+
     num_workers: int
     """Number of concurrent workers for inference requests.
 
     Overrides the default concurrency for this model. Useful for tuning throughput
     when using proxy endpoints (e.g. OpenRouter) or rate-limited external APIs.
     """
 
+    temperature: float
+    """Sampling temperature for the judge model. Defaults to 0.05."""
+
 
 class ParametersEvaluationClassifyParametersModelToEvaluateEvaluationModelRequest(TypedDict, total=False):
     input_template: Required[str]
@@ -129,13 +139,23 @@ class ParametersEvaluationScoreParametersJudge(TypedDict, total=False):
     external_base_url: str
     """Base URL for external judge models. Must be OpenAI-compatible base URL."""
 
+    max_tokens: int
+    """Maximum number of tokens the judge model can generate.
+
+    Defaults to 32768. Increase for reasoning models (e.g. Gemini, o-series) that
+    consume output token budget for chain-of-thought.
+    """
+
     num_workers: int
     """Number of concurrent workers for inference requests.
 
     Overrides the default concurrency for this model. Useful for tuning throughput
     when using proxy endpoints (e.g. OpenRouter) or rate-limited external APIs.
     """
 
+    temperature: float
+    """Sampling temperature for the judge model. Defaults to 0.05."""
+
 
 class ParametersEvaluationScoreParametersModelToEvaluateEvaluationModelRequest(TypedDict, total=False):
     input_template: Required[str]
@@ -210,13 +230,23 @@ class ParametersEvaluationCompareParametersJudge(TypedDict, total=False):
     external_base_url: str
     """Base URL for external judge models. Must be OpenAI-compatible base URL."""
 
+    max_tokens: int
+    """Maximum number of tokens the judge model can generate.
+
+    Defaults to 32768. Increase for reasoning models (e.g. Gemini, o-series) that
+    consume output token budget for chain-of-thought.
+    """
+
     num_workers: int
     """Number of concurrent workers for inference requests.
 
     Overrides the default concurrency for this model. Useful for tuning throughput
     when using proxy endpoints (e.g. OpenRouter) or rate-limited external APIs.
     """
 
+    temperature: float
+    """Sampling temperature for the judge model. Defaults to 0.05."""
+
 
 class ParametersEvaluationCompareParametersModelAEvaluationModelRequest(TypedDict, total=False):
     input_template: Required[str]
diff --git a/tests/api_resources/test_evals.py b/tests/api_resources/test_evals.py
@@ -52,7 +52,9 @@ def test_method_create_with_all_params(self, client: Together) -> None:
                     "system_template": "Imagine you are a helpful assistant",
                     "external_api_token": "external_api_token",
                     "external_base_url": "external_base_url",
+                    "max_tokens": 8192,
                     "num_workers": 5,
+                    "temperature": 0,
                 },
                 "labels": ["yes", "no"],
                 "pass_labels": ["yes"],
@@ -253,7 +255,9 @@ async def test_method_create_with_all_params(self, async_client: AsyncTogether)
                     "system_template": "Imagine you are a helpful assistant",
                     "external_api_token": "external_api_token",
                     "external_base_url": "external_base_url",
+                    "max_tokens": 8192,
                     "num_workers": 5,
+                    "temperature": 0,
                 },
                 "labels": ["yes", "no"],
                 "pass_labels": ["yes"],