From c7566702fb57fcce808bb23ba1bc9b0737b9c352 Mon Sep 17 00:00:00 2001 From: Blaine Kasten Date: Thu, 7 May 2026 12:53:05 -0500 Subject: [PATCH 01/18] chore: Switch to an async version of DownloadManager (#353) * chore: Switch to an async version of DownloadManager * Update src/together/lib/resources/files.py Co-authored-by: Zain Hasan * Update src/together/lib/resources/files.py Co-authored-by: Zain Hasan * Update src/together/lib/resources/files.py Co-authored-by: Zain Hasan * Update src/together/lib/resources/files.py Co-authored-by: Zain Hasan --------- Co-authored-by: Zain Hasan --- src/together/lib/__init__.py | 2 + .../lib/cli/api/fine_tuning/download.py | 14 +- src/together/lib/resources/__init__.py | 2 + src/together/lib/resources/files.py | 173 +++++++++++++++++- tests/cli/test_fine_tuning.py | 4 +- 5 files changed, 180 insertions(+), 15 deletions(-) diff --git a/src/together/lib/__init__.py b/src/together/lib/__init__.py index d0208cecb..05c6a07ed 100644 --- a/src/together/lib/__init__.py +++ b/src/together/lib/__init__.py @@ -10,10 +10,12 @@ UploadManager, DownloadManager, AsyncUploadManager, + AsyncDownloadManager, ) __all__ = [ "DownloadManager", + "AsyncDownloadManager", "AsyncUploadManager", "UploadManager", "FinetuneTrainingLimits", diff --git a/src/together/lib/cli/api/fine_tuning/download.py b/src/together/lib/cli/api/fine_tuning/download.py index 54ff40271..632aec3b6 100644 --- a/src/together/lib/cli/api/fine_tuning/download.py +++ b/src/together/lib/cli/api/fine_tuning/download.py @@ -7,8 +7,8 @@ from cyclopts import Parameter -from together import APIError, Together, APIStatusError -from together.lib import DownloadManager +from together import APIError, APIStatusError +from together.lib import AsyncDownloadManager from together._utils._json import openapi_dumps from together.lib.cli.utils.config import CLIConfigParameter from together.lib.cli.utils._console import console @@ -87,15 +87,7 @@ async def download( os.environ.setdefault("TOGETHER_DISABLE_TQDM", "true") try: - # TODO: This is a temporary hack, - # We need to make the DownloadManager async so we can use the async client. - sync_client = Together( - api_key=config.client.api_key, - base_url=config.client.base_url, - timeout=config.client.timeout, - max_retries=config.client.max_retries, - ) - file_path, file_size = DownloadManager(sync_client).download( + file_path, file_size = await AsyncDownloadManager(config.client).download( url=url, output=output, remote_name=ft_job.x_model_output_name, diff --git a/src/together/lib/resources/__init__.py b/src/together/lib/resources/__init__.py index 52e9ef43d..be20bf721 100644 --- a/src/together/lib/resources/__init__.py +++ b/src/together/lib/resources/__init__.py @@ -2,10 +2,12 @@ UploadManager, DownloadManager, AsyncUploadManager, + AsyncDownloadManager, ) __all__ = [ "DownloadManager", + "AsyncDownloadManager", "UploadManager", "AsyncUploadManager", ] diff --git a/src/together/lib/resources/files.py b/src/together/lib/resources/files.py index 29fcc3f9d..f3e799185 100644 --- a/src/together/lib/resources/files.py +++ b/src/together/lib/resources/files.py @@ -197,7 +197,7 @@ def download( raise APIStatusError( "Error downloading file", response=e.response, - body=e.response, + body=e.body, ) from e if not fetch_metadata: @@ -267,7 +267,7 @@ def download( raise APIStatusError( "Error downloading file", response=e.response, - body=e.response, + body=e.body, ) from e # Close the response @@ -287,6 +287,175 @@ def download( return str(file_path.resolve()), file_size +class AsyncDownloadManager(AsyncAPIResource): + async def get_file_metadata( + self, + url: str, + output: Path | None = None, + remote_name: str | None = None, + fetch_metadata: bool = False, + ) -> Tuple[Path, int]: + """ + gets remote file head and parses out file name and file size + """ + + if not fetch_metadata: + if isinstance(output, Path): + file_path = output + else: + assert isinstance(remote_name, str) + file_path = Path(remote_name) + + return file_path, 0 + + try: + response = await self._client.get( + path=url, + options=RequestOptions( + headers={"Range": "bytes=0-1"}, + ), + cast_to=httpx.Response, + stream=False, + ) + except APIStatusError as e: + raise APIStatusError( + "Error fetching file metadata", + response=e.response, + body=e.body, + ) from e + + headers = response.headers + + assert isinstance(headers, httpx.Headers) + + file_path = _prepare_output( + headers=headers, + output=output, + remote_name=remote_name, + ) + + file_size = _get_file_size(headers) + + return file_path, file_size + + async def download( + self, + url: str, + output: Path | None = None, + remote_name: str | None = None, + fetch_metadata: bool = False, + ) -> Tuple[str, int]: + # pre-fetch remote file name and file size + file_path, file_size = await self.get_file_metadata(url, output, remote_name, fetch_metadata) + + temp_file_manager = partial(tempfile.NamedTemporaryFile, mode="wb", dir=file_path.parent, delete=False) + + # Prevent parallel downloads of the same file with a lock. + lock_path = Path(file_path.as_posix() + ".lock") + + with FileLock(lock_path.as_posix()): + with temp_file_manager() as temp_file: + try: + response = await self._client.get( + path=url, + cast_to=httpx.Response, + stream=True, + ) + except APIStatusError as e: + lock_path.unlink(missing_ok=True) + raise APIStatusError( + "Error downloading file", + response=e.response, + body=e.body, + ) from e + + if not fetch_metadata: + file_size = int(response.headers.get("content-length", 0)) + + assert file_size != 0, "Unable to retrieve remote file." + + # Download with retry logic + bytes_downloaded = 0 + retry_count = 0 + retry_delay = DOWNLOAD_INITIAL_RETRY_DELAY + + DISABLE_TQDM = os.environ.get("TOGETHER_DISABLE_TQDM", "false").lower() == "true" + + with tqdm( + total=file_size, + unit="B", + unit_scale=True, + desc=f"Downloading file {file_path.name}", + disable=bool(DISABLE_TQDM), + ) as pbar: + while bytes_downloaded < file_size: + try: + # If this is a retry, close the previous response and create a new one with Range header + if bytes_downloaded > 0: + await response.aclose() + + log.info(f"Resuming download from byte {bytes_downloaded}") + response = await self._client.get( + path=url, + cast_to=httpx.Response, + stream=True, + options=RequestOptions( + headers={"Range": f"bytes={bytes_downloaded}-"}, + ), + ) + + # Download chunks + async for chunk in response.aiter_bytes(DOWNLOAD_BLOCK_SIZE): + temp_file.write(chunk) # type: ignore + bytes_downloaded += len(chunk) + pbar.update(len(chunk)) + + # Successfully completed download + break + + except (httpx.RequestError, httpx.StreamError, APIConnectionError) as e: + if retry_count >= MAX_DOWNLOAD_RETRIES: + log.error(f"Download failed after {retry_count} retries") + raise DownloadError( + f"Download failed after {retry_count} retries. Last error: {str(e)}" + ) from e + + retry_count += 1 + log.warning( + f"Download interrupted at {bytes_downloaded}/{file_size} bytes. " + f"Retry {retry_count}/{MAX_DOWNLOAD_RETRIES} in {retry_delay}s..." + ) + await self._sleep(retry_delay) + + # Exponential backoff with max delay cap + retry_delay = min(retry_delay * 2, DOWNLOAD_MAX_RETRY_DELAY) + + except APIStatusError as e: + # For API errors, don't retry + log.error(f"API error during download: {e}") + raise APIStatusError( + "Error downloading file", + response=e.response, + body=e.body, + ) from e + + # Close the response + await response.aclose() + + # Raise exception if remote file size does not match downloaded file size + if os.stat(temp_file.name).st_size != file_size: + raise DownloadError( + f"Downloaded file size `{bytes_downloaded}` bytes does not match remote file size `{file_size}` bytes." + ) + + # Moves temp file to output file path + chmod_and_replace(Path(temp_file.name), file_path) + + lock_path.unlink(missing_ok=True) + + return str(file_path.resolve()), file_size + + class UploadManager(SyncAPIResource): def get_upload_url( self, diff --git a/tests/cli/test_fine_tuning.py b/tests/cli/test_fine_tuning.py index f6ee41b49..08d1b3d1f 100644 --- a/tests/cli/test_fine_tuning.py +++ b/tests/cli/test_fine_tuning.py @@ -210,12 +210,12 @@ class _DM: def __init__(self, _client: object) -> None: pass - def download(self, **kwargs: object) -> tuple[str, int]: + async def download(self, **kwargs: object) -> tuple[str, int]: assert "ft_id=ft-abcd-12" in str(kwargs.get("url", "")) assert "checkpoint=model_output_path" in str(kwargs.get("url", "")) return str(out_file), 1 - with patch.object(_ft_download_mod, "DownloadManager", _DM): + with patch.object(_ft_download_mod, "AsyncDownloadManager", _DM): # Full fine-tunes require explicit --checkpoint-type default (CLI default is merged for LoRA). result = cli_runner.invoke( [ From 9c1211a8143dc435ef351dd57b9553d39ae06b8e Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 7 May 2026 22:04:35 +0000 Subject: [PATCH 02/18] docs(api): add .ogg, .opus, .aac to supported formats in audio transcriptions/translations --- .stats.yml | 4 ++-- src/together/resources/audio/transcriptions.py | 4 ++-- src/together/resources/audio/translations.py | 4 ++-- src/together/types/audio/transcription_create_params.py | 2 +- src/together/types/audio/translation_create_params.py | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.stats.yml b/.stats.yml index 2d4178724..1bdbe8287 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 75 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-e1200616b1a93d40e478800d2c9e06ddeb10b508f2a9aa65810ee31878ba4f23.yml -openapi_spec_hash: 23245993d115722da1b697f10799f4f1 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-13728ec693ca29a36a3cfbe05df62ca69a8e404eab7f6db16759fc5304717eac.yml +openapi_spec_hash: 5334a20f914487feace9c88045b89354 config_hash: 6c214c91fad5ead4849be777fd9e8108 diff --git a/src/together/resources/audio/transcriptions.py b/src/together/resources/audio/transcriptions.py index 374161e3c..0d82acfd7 100644 --- a/src/together/resources/audio/transcriptions.py +++ b/src/together/resources/audio/transcriptions.py @@ -70,7 +70,7 @@ def create( Args: file: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, - .webm, .flac. + .webm, .flac, .ogg, .opus, .aac. diarize: Whether to enable speaker diarization. When enabled, you will get the speaker id for each word in the transcription. In the response, in the words array, you @@ -193,7 +193,7 @@ async def create( Args: file: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, - .webm, .flac. + .webm, .flac, .ogg, .opus, .aac. diarize: Whether to enable speaker diarization. When enabled, you will get the speaker id for each word in the transcription. In the response, in the words array, you diff --git a/src/together/resources/audio/translations.py b/src/together/resources/audio/translations.py index 8f6d69bf1..77815c745 100644 --- a/src/together/resources/audio/translations.py +++ b/src/together/resources/audio/translations.py @@ -67,7 +67,7 @@ def create( Args: file: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, - .webm, .flac. + .webm, .flac, .ogg, .opus, .aac. language: Target output language. Optional ISO 639-1 language code. If omitted, language is set to English. @@ -167,7 +167,7 @@ async def create( Args: file: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, - .webm, .flac. + .webm, .flac, .ogg, .opus, .aac. language: Target output language. Optional ISO 639-1 language code. If omitted, language is set to English. diff --git a/src/together/types/audio/transcription_create_params.py b/src/together/types/audio/transcription_create_params.py index b28fab6fc..a9ac359a3 100644 --- a/src/together/types/audio/transcription_create_params.py +++ b/src/together/types/audio/transcription_create_params.py @@ -14,7 +14,7 @@ class TranscriptionCreateParams(TypedDict, total=False): file: Required[Union[FileTypes, str]] """Audio file upload or public HTTP/HTTPS URL. - Supported formats .wav, .mp3, .m4a, .webm, .flac. + Supported formats .wav, .mp3, .m4a, .webm, .flac, .ogg, .opus, .aac. """ diarize: bool diff --git a/src/together/types/audio/translation_create_params.py b/src/together/types/audio/translation_create_params.py index 5c944f5a3..4fca6e73b 100644 --- a/src/together/types/audio/translation_create_params.py +++ b/src/together/types/audio/translation_create_params.py @@ -14,7 +14,7 @@ class TranslationCreateParams(TypedDict, total=False): file: Required[Union[FileTypes, str]] """Audio file upload or public HTTP/HTTPS URL. - Supported formats .wav, .mp3, .m4a, .webm, .flac. + Supported formats .wav, .mp3, .m4a, .webm, .flac, .ogg, .opus, .aac. """ language: str From 9889ead58a66864a37d01ef0bbe92b4bc8786ff5 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 7 May 2026 22:15:53 +0000 Subject: [PATCH 03/18] docs(api): clarify prompt parameter support in audio transcriptions/translations --- .stats.yml | 4 ++-- src/together/resources/audio/transcriptions.py | 10 ++++++++-- src/together/resources/audio/translations.py | 10 ++++++++-- .../types/audio/transcription_create_params.py | 7 ++++++- src/together/types/audio/translation_create_params.py | 7 ++++++- 5 files changed, 30 insertions(+), 8 deletions(-) diff --git a/.stats.yml b/.stats.yml index 1bdbe8287..65fbf7abe 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 75 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-13728ec693ca29a36a3cfbe05df62ca69a8e404eab7f6db16759fc5304717eac.yml -openapi_spec_hash: 5334a20f914487feace9c88045b89354 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-c120da21adf22dd3902475f6ba7410dd57d9d2995a232e676d59af4806d57d09.yml +openapi_spec_hash: eff04023d0ad22f11bbf107e44fe699e config_hash: 6c214c91fad5ead4849be777fd9e8108 diff --git a/src/together/resources/audio/transcriptions.py b/src/together/resources/audio/transcriptions.py index 0d82acfd7..a2f1980ab 100644 --- a/src/together/resources/audio/transcriptions.py +++ b/src/together/resources/audio/transcriptions.py @@ -94,7 +94,10 @@ def create( model: Model to use for transcription - prompt: Optional text to bias decoding. + prompt: Optional text to bias decoding. Supported only on Whisper-family models (e.g. + `openai/whisper-large-v3`). Other STT models (e.g. + `nvidia/parakeet-tdt-0.6b-v3`) accept the field for API compatibility but ignore + it. response_format: The format of the response @@ -217,7 +220,10 @@ async def create( model: Model to use for transcription - prompt: Optional text to bias decoding. + prompt: Optional text to bias decoding. Supported only on Whisper-family models (e.g. + `openai/whisper-large-v3`). Other STT models (e.g. + `nvidia/parakeet-tdt-0.6b-v3`) accept the field for API compatibility but ignore + it. response_format: The format of the response diff --git a/src/together/resources/audio/translations.py b/src/together/resources/audio/translations.py index 77815c745..326b5de47 100644 --- a/src/together/resources/audio/translations.py +++ b/src/together/resources/audio/translations.py @@ -74,7 +74,10 @@ def create( model: Model to use for translation - prompt: Optional text to bias decoding. + prompt: Optional text to bias decoding. Supported only on Whisper-family models (e.g. + `openai/whisper-large-v3`). Other STT models (e.g. + `nvidia/parakeet-tdt-0.6b-v3`) accept the field for API compatibility but ignore + it. response_format: The format of the response @@ -174,7 +177,10 @@ async def create( model: Model to use for translation - prompt: Optional text to bias decoding. + prompt: Optional text to bias decoding. Supported only on Whisper-family models (e.g. + `openai/whisper-large-v3`). Other STT models (e.g. + `nvidia/parakeet-tdt-0.6b-v3`) accept the field for API compatibility but ignore + it. response_format: The format of the response diff --git a/src/together/types/audio/transcription_create_params.py b/src/together/types/audio/transcription_create_params.py index a9ac359a3..825224001 100644 --- a/src/together/types/audio/transcription_create_params.py +++ b/src/together/types/audio/transcription_create_params.py @@ -55,7 +55,12 @@ class TranscriptionCreateParams(TypedDict, total=False): """Model to use for transcription""" prompt: str - """Optional text to bias decoding.""" + """Optional text to bias decoding. + + Supported only on Whisper-family models (e.g. `openai/whisper-large-v3`). Other + STT models (e.g. `nvidia/parakeet-tdt-0.6b-v3`) accept the field for API + compatibility but ignore it. + """ response_format: Literal["json", "verbose_json"] """The format of the response""" diff --git a/src/together/types/audio/translation_create_params.py b/src/together/types/audio/translation_create_params.py index 4fca6e73b..21896fdf8 100644 --- a/src/together/types/audio/translation_create_params.py +++ b/src/together/types/audio/translation_create_params.py @@ -27,7 +27,12 @@ class TranslationCreateParams(TypedDict, total=False): """Model to use for translation""" prompt: str - """Optional text to bias decoding.""" + """Optional text to bias decoding. + + Supported only on Whisper-family models (e.g. `openai/whisper-large-v3`). Other + STT models (e.g. `nvidia/parakeet-tdt-0.6b-v3`) accept the field for API + compatibility but ignore it. + """ response_format: Literal["json", "verbose_json"] """The format of the response""" From f34ac960a980dbb5750208ff27eae4abc283783a Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 7 May 2026 22:59:11 +0000 Subject: [PATCH 04/18] fix(api): remove task field from audio transcription/translation responses --- .stats.yml | 4 ++-- src/together/types/audio/transcription_create_response.py | 5 +---- src/together/types/audio/translation_create_response.py | 5 +---- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/.stats.yml b/.stats.yml index 65fbf7abe..b947e04a5 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 75 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-c120da21adf22dd3902475f6ba7410dd57d9d2995a232e676d59af4806d57d09.yml -openapi_spec_hash: eff04023d0ad22f11bbf107e44fe699e +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-3056d7e8e77acd47415c236c6de4ddfd47809c810a8941374b680d8d7cbe653f.yml +openapi_spec_hash: 4450b266f8537f2677a9a2ba0d9fe88c config_hash: 6c214c91fad5ead4849be777fd9e8108 diff --git a/src/together/types/audio/transcription_create_response.py b/src/together/types/audio/transcription_create_response.py index 521d0d231..d079e49e7 100644 --- a/src/together/types/audio/transcription_create_response.py +++ b/src/together/types/audio/transcription_create_response.py @@ -1,7 +1,7 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. from typing import List, Union, Optional -from typing_extensions import Literal, TypeAlias +from typing_extensions import TypeAlias from ..._models import BaseModel @@ -93,9 +93,6 @@ class AudioTranscriptionVerboseJsonResponse(BaseModel): segments: List[AudioTranscriptionVerboseJsonResponseSegment] """Array of transcription segments""" - task: Literal["transcribe", "translate"] - """The task performed""" - text: str """The transcribed text""" diff --git a/src/together/types/audio/translation_create_response.py b/src/together/types/audio/translation_create_response.py index cb02a8935..7e7506742 100644 --- a/src/together/types/audio/translation_create_response.py +++ b/src/together/types/audio/translation_create_response.py @@ -1,7 +1,7 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. from typing import List, Union, Optional -from typing_extensions import Literal, TypeAlias +from typing_extensions import TypeAlias from ..._models import BaseModel @@ -57,9 +57,6 @@ class AudioTranslationVerboseJsonResponse(BaseModel): segments: List[AudioTranslationVerboseJsonResponseSegment] """Array of translation segments""" - task: Literal["transcribe", "translate"] - """The task performed""" - text: str """The translated text""" From d35fb643b2cd5eff5ccb2b8b2c0eb4fbc8d30734 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Fri, 8 May 2026 02:29:34 +0000 Subject: [PATCH 05/18] feat(api): add max_tokens and temperature to eval judge parameters --- .stats.yml | 4 ++-- src/together/types/eval_create_params.py | 30 ++++++++++++++++++++++++ tests/api_resources/test_evals.py | 4 ++++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/.stats.yml b/.stats.yml index b947e04a5..ed55c5ab5 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 75 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-3056d7e8e77acd47415c236c6de4ddfd47809c810a8941374b680d8d7cbe653f.yml -openapi_spec_hash: 4450b266f8537f2677a9a2ba0d9fe88c +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-e218fafc0c9b31bd98647d1e2de6decc55f8a7f9719b3b565f94939c2ebcf0df.yml +openapi_spec_hash: 026cc585ef61f52d4d6c4b60b969e323 config_hash: 6c214c91fad5ead4849be777fd9e8108 diff --git a/src/together/types/eval_create_params.py b/src/together/types/eval_create_params.py index 523441868..081698c6c 100644 --- a/src/together/types/eval_create_params.py +++ b/src/together/types/eval_create_params.py @@ -51,6 +51,13 @@ class ParametersEvaluationClassifyParametersJudge(TypedDict, total=False): external_base_url: str """Base URL for external judge models. Must be OpenAI-compatible base URL.""" + max_tokens: int + """Maximum number of tokens the judge model can generate. + + Defaults to 32768. Increase for reasoning models (e.g. Gemini, o-series) that + consume output token budget for chain-of-thought. + """ + num_workers: int """Number of concurrent workers for inference requests. @@ -58,6 +65,9 @@ class ParametersEvaluationClassifyParametersJudge(TypedDict, total=False): when using proxy endpoints (e.g. OpenRouter) or rate-limited external APIs. """ + temperature: float + """Sampling temperature for the judge model. Defaults to 0.05.""" + class ParametersEvaluationClassifyParametersModelToEvaluateEvaluationModelRequest(TypedDict, total=False): input_template: Required[str] @@ -129,6 +139,13 @@ class ParametersEvaluationScoreParametersJudge(TypedDict, total=False): external_base_url: str """Base URL for external judge models. Must be OpenAI-compatible base URL.""" + max_tokens: int + """Maximum number of tokens the judge model can generate. + + Defaults to 32768. Increase for reasoning models (e.g. Gemini, o-series) that + consume output token budget for chain-of-thought. + """ + num_workers: int """Number of concurrent workers for inference requests. @@ -136,6 +153,9 @@ class ParametersEvaluationScoreParametersJudge(TypedDict, total=False): when using proxy endpoints (e.g. OpenRouter) or rate-limited external APIs. """ + temperature: float + """Sampling temperature for the judge model. Defaults to 0.05.""" + class ParametersEvaluationScoreParametersModelToEvaluateEvaluationModelRequest(TypedDict, total=False): input_template: Required[str] @@ -210,6 +230,13 @@ class ParametersEvaluationCompareParametersJudge(TypedDict, total=False): external_base_url: str """Base URL for external judge models. Must be OpenAI-compatible base URL.""" + max_tokens: int + """Maximum number of tokens the judge model can generate. + + Defaults to 32768. Increase for reasoning models (e.g. Gemini, o-series) that + consume output token budget for chain-of-thought. + """ + num_workers: int """Number of concurrent workers for inference requests. @@ -217,6 +244,9 @@ class ParametersEvaluationCompareParametersJudge(TypedDict, total=False): when using proxy endpoints (e.g. OpenRouter) or rate-limited external APIs. """ + temperature: float + """Sampling temperature for the judge model. Defaults to 0.05.""" + class ParametersEvaluationCompareParametersModelAEvaluationModelRequest(TypedDict, total=False): input_template: Required[str] diff --git a/tests/api_resources/test_evals.py b/tests/api_resources/test_evals.py index e18490a11..66a73df47 100644 --- a/tests/api_resources/test_evals.py +++ b/tests/api_resources/test_evals.py @@ -52,7 +52,9 @@ def test_method_create_with_all_params(self, client: Together) -> None: "system_template": "Imagine you are a helpful assistant", "external_api_token": "external_api_token", "external_base_url": "external_base_url", + "max_tokens": 8192, "num_workers": 5, + "temperature": 0, }, "labels": ["yes", "no"], "pass_labels": ["yes"], @@ -253,7 +255,9 @@ async def test_method_create_with_all_params(self, async_client: AsyncTogether) "system_template": "Imagine you are a helpful assistant", "external_api_token": "external_api_token", "external_base_url": "external_base_url", + "max_tokens": 8192, "num_workers": 5, + "temperature": 0, }, "labels": ["yes", "no"], "pass_labels": ["yes"], From d62050fb50b4858ed32fa27d2024c7505d842946 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Fri, 8 May 2026 16:27:02 +0000 Subject: [PATCH 06/18] fix(client): add missing f-string prefix in file type error message --- src/together/_files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/together/_files.py b/src/together/_files.py index 2f7ac6da6..f7507c15f 100644 --- a/src/together/_files.py +++ b/src/together/_files.py @@ -99,7 +99,7 @@ async def async_to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles elif is_sequence_t(files): files = [(key, await _async_transform_file(file)) for key, file in files] else: - raise TypeError("Unexpected file type input {type(files)}, expected mapping or sequence") + raise TypeError(f"Unexpected file type input {type(files)}, expected mapping or sequence") return files From 0794576cd274a8a55240b3a1b780df8b23261a7c Mon Sep 17 00:00:00 2001 From: Blaine Kasten Date: Fri, 8 May 2026 11:30:20 -0500 Subject: [PATCH 07/18] chore: Add help examples to evals commands (#356) * chore: Add help examples to evals commands * Update src/together/lib/cli/api/evals/create.py Co-authored-by: Zain Hasan * Update src/together/lib/cli/api/evals/create.py Co-authored-by: Zain Hasan * Update src/together/lib/cli/api/evals/create.py Co-authored-by: Zain Hasan --------- Co-authored-by: Zain Hasan --- src/together/lib/cli/__init__.py | 14 +-- src/together/lib/cli/api/evals/create.py | 43 ++++++--- src/together/lib/cli/api/evals/list.py | 89 +++++++++++++++---- src/together/lib/cli/api/evals/retrieve.py | 9 +- src/together/lib/cli/api/evals/status.py | 10 +-- src/together/lib/cli/components/model_dump.py | 79 ++++++++++++++++ src/together/lib/cli/utils/_console.py | 2 +- src/together/lib/cli/utils/_help_examples.py | 66 ++++++++++++++ 8 files changed, 263 insertions(+), 49 deletions(-) create mode 100644 src/together/lib/cli/components/model_dump.py diff --git a/src/together/lib/cli/__init__.py b/src/together/lib/cli/__init__.py index fb7006754..541706f36 100644 --- a/src/together/lib/cli/__init__.py +++ b/src/together/lib/cli/__init__.py @@ -27,8 +27,10 @@ from together.lib.cli.utils._api_error import try_handle_server_error_message from together.lib.cli.utils._completion import install_completion from together.lib.cli.utils._help_examples import ( + EVALS_HELP_EXAMPLES, ENDPOINTS_HELP_EXAMPLES, TOP_LEVEL_HELP_EXAMPLES, + EVALS_CREATE_HELP_EXAMPLES, ENDPOINTS_CREATE_HELP_EXAMPLES, ENDPOINTS_UPDATE_HELP_EXAMPLES, ENDPOINTS_HARDWARE_HELP_EXAMPLES, @@ -358,11 +360,13 @@ async def run_command() -> None: ) ## Evals API commands -evals_app = app.command(App(name="evals", help="Run and manage model evaluations")) -evals_app.command((f"{_CLI}.evals.create:create"), alias="-c", help="Create a new eval job") -evals_app.command((f"{_CLI}.evals.list:list"), alias="ls", help="List eval jobs") -evals_app.command((f"{_CLI}.evals.retrieve:retrieve"), help="Get eval job details") -evals_app.command((f"{_CLI}.evals.status:status"), help="Get an eval job's status") +evals_app = app.command(App(name="evals", help="Run and manage model evaluations", help_epilogue=EVALS_HELP_EXAMPLES)) +evals_app.command( + (f"{_CLI}.evals.create:create"), alias="-c", help="Create a new eval job", help_epilogue=EVALS_CREATE_HELP_EXAMPLES +) +evals_app.command((f"{_CLI}.evals.list:list"), alias="ls", help="List eval jobs", help_epilogue="") +evals_app.command((f"{_CLI}.evals.retrieve:retrieve"), help="Get eval job details", help_epilogue="") +evals_app.command((f"{_CLI}.evals.status:status"), help="Get an eval job's status", help_epilogue="") ## Telemetry API commands telemetry_app = app.command(App(name="telemetry", help="Configure CLI telemetry")) diff --git a/src/together/lib/cli/api/evals/create.py b/src/together/lib/cli/api/evals/create.py index 33961ac5a..400397744 100644 --- a/src/together/lib/cli/api/evals/create.py +++ b/src/together/lib/cli/api/evals/create.py @@ -1,7 +1,7 @@ from __future__ import annotations -import json from typing import Any, Dict, Union, Literal, Optional, Annotated, cast +from pathlib import Path from cyclopts import Parameter @@ -35,10 +35,10 @@ async def create( input_data_file_path: Annotated[str, Parameter(help="The path to the input data file")], judge_external_api_token: Annotated[ Optional[str], Parameter(help="API token for access to the external judge model") - ], + ] = None, judge_external_base_url: Annotated[ Optional[str], Parameter(help="Base URL for access to the external judge model") - ], + ] = None, model_field: Annotated[ Optional[str], Parameter( @@ -130,6 +130,14 @@ async def create( labels_list = labels.split(",") if labels else None pass_labels_list = pass_labels.split(",") if pass_labels else None + # If the user passes a path to a file, try to upload it to the files API first + # Uploads are idempotent so we can depend on this API always giving us a file ID + if _check_path_exists(input_data_file_path): + file_upload = await config.client.files.upload(Path(input_data_file_path), purpose="eval", check=False) + training_file = file_upload.id + else: + training_file = input_data_file_path + model_to_evaluate_final: Union[Dict[str, Any], None, str] = None config_params_provided = any( [ @@ -154,7 +162,7 @@ async def create( model_to_evaluate_final = { "model": model_to_evaluate, "model_source": model_to_evaluate_source, - "max_tokens": model_to_evaluate_max_tokens, + "max_tokens": model_to_evaluate_max_tokens if model_to_evaluate_max_tokens is not None else 16000, "temperature": model_to_evaluate_temperature, "system_template": model_to_evaluate_system_template, "input_template": model_to_evaluate_input_template, @@ -185,7 +193,7 @@ async def create( model_a_final = { "model": model_a, "model_source": model_a_source, - "max_tokens": model_a_max_tokens, + "max_tokens": model_a_max_tokens if model_a_max_tokens is not None else 16000, "temperature": model_a_temperature, "system_template": model_a_system_template, "input_template": model_a_input_template, @@ -216,7 +224,7 @@ async def create( model_b_final = { "model": model_b, "model_source": model_b_source, - "max_tokens": model_b_max_tokens, + "max_tokens": model_b_max_tokens if model_b_max_tokens is not None else 16000, "temperature": model_b_temperature, "system_template": model_b_system_template, "input_template": model_b_input_template, @@ -239,7 +247,7 @@ async def create( response = await config.client.evals.create( type=type_val, parameters=ParametersEvaluationClassifyParameters( - input_data_file_path=input_data_file_path, + input_data_file_path=training_file, judge=judge_config, labels=labels_list or [], pass_labels=pass_labels_list or [], @@ -252,7 +260,7 @@ async def create( response = await config.client.evals.create( type="score", parameters=ParametersEvaluationScoreParameters( - input_data_file_path=input_data_file_path, + input_data_file_path=training_file, judge=judge_config, max_score=max_score, min_score=min_score, @@ -264,7 +272,7 @@ async def create( response = await config.client.evals.create( type=type_val, parameters=ParametersEvaluationCompareParameters( - input_data_file_path=input_data_file_path, + input_data_file_path=training_file, judge=judge_config, model_a=cast(ParametersEvaluationCompareParametersModelAEvaluationModelRequest, model_a_final), model_b=cast(ParametersEvaluationCompareParametersModelBEvaluationModelRequest, model_b_final), @@ -274,7 +282,13 @@ async def create( if config.json: console.print_json(openapi_dumps(response).decode("utf-8")) else: - console.print(json.dumps(response.model_dump(exclude_none=True), indent=4)) + url = f"https://api.together.ai/evaluations/result/{response.workflow_id}" + console.print(f"[green]√ Evaluation job created[/green] [dim]([link={url}]{response.workflow_id}[/link])[/dim]") + console.print(f" Evaluations may take some time to complete.\n") + console.print(f" To retrieve the status:") + console.print(f" [dim]-[/dim] [primary]tg evals status {response.workflow_id}[/primary]") + console.print(f" To get the results:") + console.print(f" [dim]-[/dim] [primary]tg evals {response.workflow_id}[/primary]") def _build_judge( @@ -312,3 +326,12 @@ def _build_judge( if judge_external_base_url: judge_config["external_base_url"] = judge_external_base_url return judge_config + + +def _check_path_exists(path_string: str) -> bool: + if path_string == "": + return False + p = Path(path_string) + if p.is_dir(): + raise ValueError(f"Path {path_string} is a directory, not a file. Please provide a file path.") + return p.exists() and p.is_file() diff --git a/src/together/lib/cli/api/evals/list.py b/src/together/lib/cli/api/evals/list.py index 5846a7498..bae80bda5 100644 --- a/src/together/lib/cli/api/evals/list.py +++ b/src/together/lib/cli/api/evals/list.py @@ -6,8 +6,14 @@ from together import omit from together.types import EvaluationJob +from together.lib.utils import log_debug from together._utils._json import openapi_dumps from together.lib.cli.utils.config import CLIConfigParameter +from together.types.evaluation_job import ( + ResultsEvaluationScoreResults, + ResultsEvaluationCompareResults, + ResultsEvaluationClassifyResults, +) from together.lib.cli.utils._console import console from together.lib.cli.components.list import ListTable from together.lib.cli.components.loader import show_loading_status @@ -45,25 +51,16 @@ async def list( return table = ListTable("Evals", empty_message="No evals found") - table.add_primary_column("Workflow ID", ratio=2) - table.add_column("Type") - table.add_column("Status") - table.add_column("Model") - table.add_column("Model A") - table.add_column("Model B") + table.add_primary_column("Workflow ID", ratio=1) + table.add_column("Type", ratio=1) + table.add_column("Result", ratio=4) for job in data: - model = _get_model_to_evaluate_name(job) - model_a = deep_get(job.parameters, ["model_a", "model"], "") - model_b = deep_get(job.parameters, ["model_b", "model"], "") - status_color = status_colors[job.status] if job.status in status_colors else "white" + result = _get_result(job) table.add_row( f"[link=https://api.together.ai/evaluations/result/{job.workflow_id}]{job.workflow_id}[/link]", job.type, - f"[{status_color}]{job.status}[/{status_color}]", - model, - model_a, - model_b, + result, ) console.print(table) if next_cursor: @@ -74,6 +71,60 @@ async def list( T = TypeVar("T") +def _get_result(job: EvaluationJob) -> str: + try: + if job.status != "completed": + status_color = status_colors[job.status] if job.status in status_colors else "white" + return f"status: [{status_color}]{job.status}[/{status_color}]" + + if job.type == "score": + score_job = cast(ResultsEvaluationScoreResults, job.results) + return "\n".join( + [ + f"mean score: [primary]{getattr(score_job.aggregated_scores, 'mean_score', 'N/A')}[/primary]", + f"pass percentage: [primary]{getattr(score_job.aggregated_scores, 'pass_percentage', 'N/A')}[/primary]", + f"std score: [primary]{getattr(score_job.aggregated_scores, 'std_score', 'N/A')}[/primary]", + ] + ) + + if job.type == "compare": + compare_job = cast(ResultsEvaluationCompareResults, job.results) + if ( + compare_job.a_wins is not None + and compare_job.b_wins is not None + and compare_job.a_wins > compare_job.b_wins + ): + return f"Winning Model: [primary]{_get_model_name(job, 'model_a')}[/primary] (model A)" + elif ( + compare_job.b_wins is not None + and compare_job.a_wins is not None + and compare_job.b_wins > compare_job.a_wins + ): + return f"Winning Model: [primary]{_get_model_name(job, 'model_b')}[/primary] (model B)" + else: + return "[primary]Tie[/primary]" + + if job.type == "classify": + classify_job = cast(ResultsEvaluationClassifyResults, job.results) + if classify_job.label_counts is None: + return "No label counts" + + labels = cast( + dict[str, int], classify_job.label_counts + ) # TODO: API has a bug in the shape of the response, so we need to cast it to the correct type + return "\n".join( + [ + f"label: [primary]{label}[/primary] (count: [primary]{count}[/primary])" + for label, count in labels.items() + ] + ) + + return "" + except Exception as e: + log_debug("Error parsing results for evals list", error=e) + return "Internal error" + + def deep_get(dictionary: dict[str, Any] | None, keys: List[str], default: T) -> T: cur = cast(Any, dictionary) for key in keys: @@ -84,15 +135,15 @@ def deep_get(dictionary: dict[str, Any] | None, keys: List[str], default: T) -> return cast(T, cur) -def _get_model_to_evaluate_name(job: EvaluationJob) -> str: +def _get_model_name(job: EvaluationJob, field: str) -> str: """ Get the name of the model to evaluate. Sometimes the parameters.model_to_evaluate is a dict, other times it's a string. """ - model_to_evaluate: str | dict[str, Any] = deep_get(job.parameters, ["model_to_evaluate"], "") + model: str | dict[str, Any] = deep_get(job.parameters, [field], "") - if isinstance(model_to_evaluate, dict): - return deep_get(model_to_evaluate, ["model"], "") + if isinstance(model, dict): + return deep_get(model, ["model"], "") - return model_to_evaluate + return model diff --git a/src/together/lib/cli/api/evals/retrieve.py b/src/together/lib/cli/api/evals/retrieve.py index b0c9601cb..e754c697d 100644 --- a/src/together/lib/cli/api/evals/retrieve.py +++ b/src/together/lib/cli/api/evals/retrieve.py @@ -3,12 +3,12 @@ from typing import Annotated from cyclopts import Parameter -from rich.markup import escape as escape_rich_markup from together._utils._json import openapi_dumps from together.lib.cli.utils.config import CLIConfigParameter from together.lib.cli.utils._console import console from together.lib.cli.components.loader import show_loading_status +from together.lib.cli.components.model_dump import print_model_dump async def retrieve( @@ -22,9 +22,4 @@ async def retrieve( console.print_json(openapi_dumps(response).decode("utf-8")) return - wid = response.workflow_id or evaluation_id - console.print( - f"[dim]Eval[/dim] [bold]{escape_rich_markup(str(wid))}[/bold] — " - f"[dim]status[/dim] [bold]{escape_rich_markup(str(response.status))}[/bold] — " - f"[dim]type[/dim] [bold]{escape_rich_markup(str(response.type))}[/bold]" - ) + print_model_dump(response) diff --git a/src/together/lib/cli/api/evals/status.py b/src/together/lib/cli/api/evals/status.py index b97300436..4975613b4 100644 --- a/src/together/lib/cli/api/evals/status.py +++ b/src/together/lib/cli/api/evals/status.py @@ -18,11 +18,7 @@ async def status( """Get the status and results of a specific evaluation job.""" response = await show_loading_status("Retrieving eval status...", config.client.evals.status(evaluation_id)) if config.json: - console.print_json(openapi_dumps(response).decode("utf-8")) - else: - console.print(f"Status: [bold]{response.status}[/bold]") + console.print_json(openapi_dumps({"status": response.status}).decode("utf-8")) + return - if response.results: - # TODO: Add a pretty print for the results - console.print("\nResults") - console.print_json(openapi_dumps(response.results).decode("utf-8")) + console.print(f"Status: {response.status}") diff --git a/src/together/lib/cli/components/model_dump.py b/src/together/lib/cli/components/model_dump.py new file mode 100644 index 000000000..6bf2005ba --- /dev/null +++ b/src/together/lib/cli/components/model_dump.py @@ -0,0 +1,79 @@ +from __future__ import annotations + +from typing import Any, cast +from datetime import datetime + +from rich.table import Table + +from together import BaseModel +from together.lib.utils.tools import format_datetime +from together.lib.cli.utils._console import console + + +def print_model_dump(model: BaseModel) -> None: + console.print(_pretty_print_results(dump_sorted_model(model), expand=True)) + + +def _pretty_print_results(results: Any, expand: bool = False) -> Table: + table = Table(show_header=False, box=None, padding=(0, 1, 0, 0), expand=expand) + table.add_column("Key", style="dim") + table.add_column("Value", justify="left") + if isinstance(results, dict): + for key, value in cast(dict[str, Any], results).items(): + if isinstance(value, dict) or isinstance(value, list): + table.add_row(_humanize_key(key), _pretty_print_results(value)) + else: + table.add_row(_humanize_key(key), _colorize_value(value)) + elif isinstance(results, list): + for item in cast(list[Any], results): + table.add_row("-", _pretty_print_results(item)) + elif isinstance(results, BaseModel): + table.add_row("", _pretty_print_results(results.model_dump())) + else: + table.add_row("", _colorize_value(results)) + return table + + +def _humanize_key(key: str) -> str: + return f"{key.replace('_', ' ').title()}:" + + +def _colorize_value(value: Any) -> str: + if value is None: + return "[dim italic]n/a[/dim italic]" + if isinstance(value, bool): + return "[bold green]True[/bold green]" if value else "[bold red]False[/bold red]" + if isinstance(value, float): + return f"[bold blue]{value:g}[/bold blue]" + if isinstance(value, int): + return f"[bold blue]{value:d}[/bold blue]" + if isinstance(value, datetime): + return f"[bold blue]{format_datetime(value)}[/bold blue]" + + value = str(value) + value = value.replace("\n", "\\n") + value = value.replace("\t", "\\t") + + return f"[bold blue]{value}[/bold blue]" + + +def dump_sorted_model(model: BaseModel) -> dict[str, Any]: + """Returns a model dump where the properties are sorted by their type: + - ID fields first + - Primitives next + - Dicts/objects next + - Lists last + """ + + def _sort_items(key: str, value: Any) -> int: + # Returns a sort key: 0 for ID fields, 1 for primitives, 2 for dicts/objects, 3 for lists + if key.endswith("_id"): + return 0 + elif isinstance(value, dict) or isinstance(value, BaseModel): + return 2 + elif isinstance(value, list): + return 3 + else: + return 1 + + return dict(sorted(model.model_dump().items(), key=lambda kv: _sort_items(kv[0], kv[1]))) diff --git a/src/together/lib/cli/utils/_console.py b/src/together/lib/cli/utils/_console.py index a3b1ac21b..ad896db94 100644 --- a/src/together/lib/cli/utils/_console.py +++ b/src/together/lib/cli/utils/_console.py @@ -30,4 +30,4 @@ } ) -console = Console(theme=custom_theme) +console = Console(theme=custom_theme, highlight=False) diff --git a/src/together/lib/cli/utils/_help_examples.py b/src/together/lib/cli/utils/_help_examples.py index d7a30353d..56a80c755 100644 --- a/src/together/lib/cli/utils/_help_examples.py +++ b/src/together/lib/cli/utils/_help_examples.py @@ -65,3 +65,69 @@ [dim]-[/dim] Change the auto-stop timeout for an endpoint: [primary]tg endpoints update ENDPOINT_ID --inactive-timeout 30[/primary] """ + +## Evals API commands + +EVALS_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Look at the examples for creating an evaluation job: + [primary]tg evals create --help[/primary] + +[dim]-[/dim] List all evaluation jobs: + [primary]tg evals ls[/primary] + +[dim]-[/dim] Check the status of an evaluation job: + [primary]tg evals status [/primary] + +[dim]-[/dim] Get details of an evaluation job: + [primary]tg evals [/primary] +""" + +EVALS_CREATE_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Run a classification evaluation: + [primary]tg evals create \\ + --type classify \\ + --judge-model deepseek-ai/DeepSeek-V3.1 \\ + --judge-model-source serverless \\ + --judge-system-template "You are a helpful assistant" \\ + --input-data-file-path ./data.jsonl \\ + --model-to-evaluate deepseek-ai/DeepSeek-V3.1 \\ + --model-to-evaluate-source serverless \\ + --model-to-evaluate-system-template "Respond to the following comment. You can be informal but maintain a respectful tone." \\ + --model-to-evaluate-input-template "Here's a comment I saw online. How would you respond to it?\\n\\n{{question}}" \\ + --labels 'Toxic,Non-toxic' \\ + --pass-labels 'Non-toxic'[/primary] + +[dim]-[/dim] Run a score evaluation: + [primary]tg evals create \\ + --type score \\ + --judge-model deepseek-ai/DeepSeek-V3.1 \\ + --judge-model-source serverless \\ + --judge-system-template "Rate the given response on a scale from 1 to 10, where 1 is generic and 10 is unique." \\ + --input-data-file-path ./data.jsonl \\ + --model-to-evaluate deepseek-ai/DeepSeek-V3.1 \\ + --model-to-evaluate-source serverless \\ + --model-to-evaluate-system-template "You are a helpful assistant." \\ + --model-to-evaluate-input-template $'Please respond:\\n\\n{{prompt}}' \\ + --model-to-evaluate-max-tokens 512 \\ + --model-to-evaluate-temperature 0.7 \\ + --min-score 1 \\ + --max-score 10 \\ + --pass-threshold 7 + [/primary] + +[dim]-[/dim] Run a compare evaluation: + [primary]tg evals create \\ + --type compare \\ + --judge-model deepseek-ai/DeepSeek-V3.1 \\ + --judge-model-source serverless \\ + --judge-system-template "You are an expert judge. Given the user task and two model responses, say which is better and why." \\ + --input-data-file-path ./examples/eval_compare_sample.jsonl \\ + --model-a deepseek-ai/DeepSeek-V3.1 \\ + --model-a-source serverless \\ + --model-a-system-template "You are a helpful assistant." \\ + --model-a-input-template $'Answer the following:\\n\\n{{prompt}}' \\ + --model-b deepseek-ai/DeepSeek-V3.1 \\ + --model-b-source serverless \\ + --model-b-system-template "You are a concise assistant." \\ + --model-b-input-template $'Answer the following:\\n\\n{{prompt}}'[/primary] +""" From ad3cdb2d0d4f5511fa06105c4a7169cc72cf9306 Mon Sep 17 00:00:00 2001 From: Blaine Kasten Date: Fri, 8 May 2026 11:53:35 -0500 Subject: [PATCH 08/18] chore: Add example usage to fine-tuning CLI help pages (#357) * chore: Add example usage to fine-tuning CLI help pages * Update src/together/lib/cli/utils/_help_examples.py Co-authored-by: Zain Hasan --------- Co-authored-by: Zain Hasan --- src/together/lib/cli/__init__.py | 35 +++- .../lib/cli/api/fine_tuning/create.py | 60 +++--- .../lib/cli/api/fine_tuning/download.py | 8 +- .../lib/cli/api/fine_tuning/retrieve.py | 175 ++---------------- src/together/lib/cli/components/model_dump.py | 137 +++++++------- src/together/lib/cli/utils/_help_examples.py | 40 ++++ 6 files changed, 193 insertions(+), 262 deletions(-) diff --git a/src/together/lib/cli/__init__.py b/src/together/lib/cli/__init__.py index 541706f36..c9f231fa5 100644 --- a/src/together/lib/cli/__init__.py +++ b/src/together/lib/cli/__init__.py @@ -30,10 +30,13 @@ EVALS_HELP_EXAMPLES, ENDPOINTS_HELP_EXAMPLES, TOP_LEVEL_HELP_EXAMPLES, + FINE_TUNING_HELP_EXAMPLES, EVALS_CREATE_HELP_EXAMPLES, ENDPOINTS_CREATE_HELP_EXAMPLES, ENDPOINTS_UPDATE_HELP_EXAMPLES, ENDPOINTS_HARDWARE_HELP_EXAMPLES, + FINE_TUNING_CREATE_HELP_EXAMPLES, + FINE_TUNING_DOWNLOAD_HELP_EXAMPLES, ) from together.lib.cli.utils._help_formatter import help_formatter from together.lib.cli.utils._preparse_tokens import preparse_tokens @@ -311,21 +314,39 @@ async def run_command() -> None: files_app.command(f"{_CLI}.files.check:check", help="Check a local file for issues") # Fine-tuning API commands -fine_tuning_app = app.command(App(name="fine-tuning", alias="ft", help="Create and manage fine-tuning jobs")) -fine_tuning_app.command((f"{_CLI}.fine_tuning.create:create"), alias="-c", help="Start a new fine-tuning job") -fine_tuning_app.command((f"{_CLI}.fine_tuning.list:list"), alias="ls", help="List fine-tuning jobs") -fine_tuning_app.command((f"{_CLI}.fine_tuning.retrieve:retrieve"), help="Get fine-tuning job details") -fine_tuning_app.command((f"{_CLI}.fine_tuning.cancel:cancel"), help="Cancel a fine-tuning job") -fine_tuning_app.command((f"{_CLI}.fine_tuning.list_events:list_events"), help="List events for a fine-tuning job") +fine_tuning_app = app.command( + App( + name="fine-tuning", + alias="ft", + help="Create and manage fine-tuning jobs", + help_epilogue=FINE_TUNING_HELP_EXAMPLES, + ) +) +fine_tuning_app.command( + (f"{_CLI}.fine_tuning.create:create"), + alias="-c", + help="Start a new fine-tuning job", + help_epilogue=FINE_TUNING_CREATE_HELP_EXAMPLES, +) +fine_tuning_app.command((f"{_CLI}.fine_tuning.list:list"), alias="ls", help="List fine-tuning jobs", help_epilogue="") +fine_tuning_app.command((f"{_CLI}.fine_tuning.retrieve:retrieve"), help="Get fine-tuning job details", help_epilogue="") +fine_tuning_app.command((f"{_CLI}.fine_tuning.cancel:cancel"), help="Cancel a fine-tuning job", help_epilogue="") +fine_tuning_app.command( + (f"{_CLI}.fine_tuning.list_events:list_events"), help="List events for a fine-tuning job", help_epilogue="" +) fine_tuning_app.command( (f"{_CLI}.fine_tuning.list_checkpoints:list_checkpoints"), help="List checkpoints for a fine-tuning job", + help_epilogue="", ) fine_tuning_app.command( (f"{_CLI}.fine_tuning.download:download"), help="Download a fine-tuned model's weights", + help_epilogue=FINE_TUNING_DOWNLOAD_HELP_EXAMPLES, +) +fine_tuning_app.command( + (f"{_CLI}.fine_tuning.delete:delete"), alias="-d", help="Delete a fine-tuning job", help_epilogue="" ) -fine_tuning_app.command((f"{_CLI}.fine_tuning.delete:delete"), alias="-d", help="Delete a fine-tuning job") ## Models API commands models_app = app.command(App(name="models", help="List and upload models")) diff --git a/src/together/lib/cli/api/fine_tuning/create.py b/src/together/lib/cli/api/fine_tuning/create.py index 7a57889cd..2551996d1 100644 --- a/src/together/lib/cli/api/fine_tuning/create.py +++ b/src/together/lib/cli/api/fine_tuning/create.py @@ -5,6 +5,7 @@ from cyclopts import Group, Parameter, validators +from together import BaseModel from together.types import fine_tuning_estimate_price_params as pe_params from together.lib.utils import log_warn from together.lib.cli.api._utils import ( @@ -15,15 +16,15 @@ from together.lib.cli.utils._console import console from together.lib.cli.components.loader import show_loading_status from together.lib.resources.fine_tuning import async_get_model_limits +from together.lib.cli.components.model_dump import print_model_dump def get_confirmation_message(price: str, warning: str) -> str: return ( - """You are about to create a fine-tuning job. The estimated price of this job is {price} + """You are about to create a fine-tuning job. The estimated price of this job is {price}. The actual cost of your job will be determined by the model size, the number of tokens in the training file, the number of tokens in the validation file, the number of epochs, and the number of evaluations. Visit https://www.together.ai/pricing to learn more about pricing. -{warning} -Do you want to proceed? [Y/n]""" +{warning}""" ).format(price=price, warning=warning) @@ -34,11 +35,13 @@ def get_confirmation_message(price: str, warning: str) -> str: ) -def _check_path_exists(path_string: str) -> bool: - if path_string == "": +def _check_path_exists(path_string: Optional[str]) -> bool: + if path_string == "" or path_string is None: return False p = Path(path_string) - return p.exists() and (p.is_file() or p.is_dir()) + if p.is_dir(): + raise ValueError(f"Path {path_string} is a directory, not a file. Please provide a file path.") + return p.exists() and p.is_file() model_group = Group( @@ -58,19 +61,19 @@ async def create( training_file: Annotated[ str, Parameter( - name=["--training-file", "-t"], + alias="-t", help="Training file ID from Files API or local path to a file to upload", ), ], validation_file: Annotated[ - str, + Optional[str], Parameter( - name=["--validation-file", "-v"], + alias="-v", help="Validation file ID from Files API or local path to a file to upload", ), - ] = "", + ] = None, model: Annotated[ - Optional[str], Parameter(group=model_group, help="Name of the base model to run fine-tune job on") + Optional[str], Parameter(group=model_group, alias="-M", help="Name of the base model to run fine-tune job on") ] = None, from_checkpoint: Annotated[ Optional[str], @@ -79,18 +82,16 @@ async def create( help="Checkpoint to continue training from a previous fine-tuning job, formatted as `JOB_ID/OUTPUT_MODEL_NAME:STEP`; STEP is optional and defaults to the final checkpoint", ), ] = None, - n_epochs: Annotated[int, Parameter(name=["--n-epochs", "-ne"], help="Number of epochs to train for")] = 1, + n_epochs: Annotated[int, Parameter(alias="--ne", help="Number of epochs to train for")] = 1, packing: Annotated[bool, Parameter(show_default=True, help="Whether to use packing for training")] = True, n_evals: Annotated[int, Parameter(help="Number of evaluation loops to run")] = 0, max_seq_length: Annotated[int | None, Parameter(help="Maximum sequence length to use for training")] = None, - n_checkpoints: Annotated[int, Parameter(name=["--n-checkpoints", "-c"], help="Number of checkpoints to save")] = 1, + n_checkpoints: Annotated[int, Parameter(alias="-c", help="Number of checkpoints to save")] = 1, batch_size: Annotated[ int | Literal["max"], - Parameter(converter=int_or_max_converter, name=["--batch-size", "-b"], help="Train batch size"), + Parameter(converter=int_or_max_converter, alias="-b", help="Train batch size"), ] = "max", - learning_rate: Annotated[ - float, Parameter(name=["--learning-rate", "-lr"], help="Learning rate") - ] = DEFAULT_LEARNING_RATE, + learning_rate: Annotated[float, Parameter(alias="--lr", help="Learning rate")] = DEFAULT_LEARNING_RATE, lr_scheduler_type: Annotated[ Literal["linear", "cosine"], Parameter(help="Learning rate scheduler type") ] = "cosine", @@ -112,7 +113,10 @@ async def create( ] = "all-linear", training_method: Annotated[ Literal["sft", "dpo"], - Parameter(help="Training method to use: sft (supervised fine-tuning) or dpo (Direct Preference Optimization)"), + Parameter( + alias=("-m"), + help="Training method to use: sft (supervised fine-tuning) or dpo (Direct Preference Optimization)", + ), ] = "sft", dpo_beta: Annotated[Optional[float], Parameter(help="DPO beta parameter")] = None, dpo_normalize_logratios_by_length: Annotated[ @@ -131,7 +135,7 @@ async def create( Parameter(help="Random seed for reproducible training, e.g. 42; uses the server default if unset"), ] = None, confirm: Annotated[ - bool, Parameter(name=["--confirm", "-y"], help="Whether to skip the launch confirmation message") + bool, Parameter(alias=("-y"), negative=(), help="Whether to skip the launch confirmation message") ] = False, train_on_inputs: Annotated[ Optional[BoolOrAuto], @@ -318,13 +322,23 @@ async def create( ) price_str = f"${finetune_price_estimation_result.estimated_total_price:.2f}" warning = _WARNING_MESSAGE_INSUFFICIENT_FUNDS if not finetune_price_estimation_result.allowed_to_proceed else "" - confirmation_message = get_confirmation_message(price=price_str, warning=warning) if not confirm: - resp = input(confirmation_message).strip().lower() + confirmation_message = get_confirmation_message(price=price_str, warning=warning) + console.print(confirmation_message) + resp = input("Do you want to proceed? [Y/n]").strip().lower() if resp and resp != "y" and resp != "yes": return + + console.print(f"Submitting a fine-tuning job with the following parameters:") + print_model_dump(BaseModel(**training_args), show_nulls=False, expand=False, padding=(0, 2)) + response = await show_loading_status( - "Creating fine-tuning job...", config.client.fine_tuning.create(**training_args, verbose=True) + "Creating fine-tuning job...", config.client.fine_tuning.create(**training_args) + ) + url = f"https://api.together.ai/fine-tuning/{response.id}" + console.print( + f"\n[green]√ Fine-tuning job has been submitted.[/green] [dim]([link={url}]{response.id}[/link])[/dim]" ) - console.print(f"\n\nSuccess! Your fine-tuning job {response.id} has been submitted.") + console.print(f"\n You can track the job's progress with the following command:") + console.print(f" [dim]-[/dim] [primary]tg fine-tuning {response.id}[/primary]") diff --git a/src/together/lib/cli/api/fine_tuning/download.py b/src/together/lib/cli/api/fine_tuning/download.py index 632aec3b6..f4d9ae1ec 100644 --- a/src/together/lib/cli/api/fine_tuning/download.py +++ b/src/together/lib/cli/api/fine_tuning/download.py @@ -26,7 +26,7 @@ Parameter(name=["--checkpoint-step", "-s"], help="Fine-tuning checkpoint to download; defaults to latest if unset"), ] CheckpointTypeParam = Annotated[ - Literal["merged", "adapter", "default"], + Optional[Literal["merged", "adapter", "default"]], Parameter( name=["--checkpoint-type", "-c"], help="Checkpoint type ('merged' and 'adapter' apply to LoRA jobs only)", @@ -38,7 +38,7 @@ async def download( fine_tune_id: str, output_dir: OutputDirParam = None, checkpoint_step: CheckpointStepParam = None, - checkpoint_type: CheckpointTypeParam = "merged", + checkpoint_type: CheckpointTypeParam = None, *, config: CLIConfigParameter, ) -> None: @@ -56,9 +56,9 @@ async def download( ft_job = await show_loading_status( "Retrieving fine-tuning job...", config.client.fine_tuning.retrieve(fine_tune_id) ) - loosely_typed_checkpoint_type: str = checkpoint_type + loosely_typed_checkpoint_type: str = checkpoint_type if checkpoint_type is not None else "" if isinstance(ft_job.training_type, TrainingTypeFullTrainingType): - if checkpoint_type != "default": + if checkpoint_type is not None and checkpoint_type != "default": raise ValueError("Only DEFAULT checkpoint type is allowed for FullTrainingType") loosely_typed_checkpoint_type = "model_output_path" elif isinstance(ft_job.training_type, TrainingTypeLoRaTrainingType): diff --git a/src/together/lib/cli/api/fine_tuning/retrieve.py b/src/together/lib/cli/api/fine_tuning/retrieve.py index 70b37eeba..64bc68378 100644 --- a/src/together/lib/cli/api/fine_tuning/retrieve.py +++ b/src/together/lib/cli/api/fine_tuning/retrieve.py @@ -1,175 +1,18 @@ from __future__ import annotations -from typing import Any, cast from datetime import datetime -from rich.markup import escape as escape_rich_markup - -from together.lib.utils import convert_bytes, finetune_price_to_dollars from together._utils._json import openapi_dumps -from together.lib.utils.tools import format_datetime from together.lib.cli.api._utils import generate_progress_bar from together.lib.cli.utils.config import CLIConfigParameter from together.lib.types.fine_tuning import COMPLETED_STATUSES from together.lib.cli.utils._console import console -from together.types.finetune_response import FinetuneResponse from together.lib.cli.components.loader import show_loading_status -from together.lib.cli.api.fine_tuning.list import status_colors +from together.lib.cli.components.model_dump import print_model_dump _NEST_INDENT = 4 -def _plain(v: Any) -> str | None: - """Plain escaped text for a scalar, or None if missing.""" - if v is None: - return None - if isinstance(v, bool): - return "yes" if v else "no" - if isinstance(v, float): - s = f"{v:g}" if not v.is_integer() else str(int(v)) - return escape_rich_markup(s) - if isinstance(v, int): - return escape_rich_markup(f"{v:,}") - return escape_rich_markup(str(v)) - - -def _plain_dt(v: datetime | None) -> str | None: - if v is None: - return None - try: - return escape_rich_markup(format_datetime(v)) - except Exception: - return escape_rich_markup(str(v)) - - -def _plain_price(nano: int | None) -> str | None: - if nano is None: - return None - dollars = finetune_price_to_dollars(float(nano)) - return escape_rich_markup(f"${dollars:,.2f}") - - -def _plain_bytes(n: int | None) -> str | None: - if n is None: - return None - s = convert_bytes(float(n)) - return escape_rich_markup(s or str(n)) - - -def _print_kv(label: str, text: str | None) -> None: - lab = escape_rich_markup(label) - if text is not None: - console.print(f"[dim]{lab}:[/dim] [white]{text}[/white]") - - -def _as_jsonlike(obj: Any) -> Any: - if obj is None: - return None - if hasattr(obj, "model_dump"): - return obj.model_dump(mode="json") - return obj - - -def _walk_jsonlike(data: Any, indent: int) -> None: - """Print JSON-like dict/list trees as indented key/value lines (same style as top-level).""" - pad = " " * indent - if data is None: - console.print(f"{pad}[dim]—[/dim]") - return - if isinstance(data, dict): - d = cast(dict[str, Any], data) - if not d: - console.print(f"{pad}[dim](empty)[/dim]") - return - for key in sorted(d, key=str): - v = d[key] - kdisp = escape_rich_markup(str(key)) - if isinstance(v, dict): - console.print(f"{pad}[dim]{kdisp}:[/dim]") - _walk_jsonlike(v, indent + _NEST_INDENT) - elif isinstance(v, list): - console.print(f"{pad}[dim]{kdisp}:[/dim]") - _walk_jsonlike(v, indent + _NEST_INDENT) - else: - sv = _plain(v) - if sv is not None: - console.print(f"{pad}[dim]{kdisp}:[/dim] [white]{sv}[/white]") - return - if isinstance(data, list): - lst = cast(list[Any], data) - if not lst: - console.print(f"{pad}[dim]—[/dim]") - return - for i, item in enumerate(lst): - if isinstance(item, dict): - console.print(f"{pad}[dim][{i}][/dim]") - _walk_jsonlike(item, indent + _NEST_INDENT) - elif isinstance(item, list): - console.print(f"{pad}[dim][{i}][/dim]") - _walk_jsonlike(item, indent + _NEST_INDENT) - else: - sv = _plain(item) - if sv is None: - console.print(f"{pad}[dim][{i}]:[/dim] [dim]—[/dim]") - else: - console.print(f"{pad}[dim][{i}]:[/dim] [white]{sv}[/white]") - return - sv = _plain(data) - console.print(f"{pad}[white]{sv}[/white]" if sv else f"{pad}[dim]—[/dim]") - - -def _print_nested_section(title: str, obj: Any, indent: int = _NEST_INDENT) -> None: - console.print(f"[dim]{escape_rich_markup(title)}:[/dim]") - if obj is None: - console.print(" " * indent + "[dim]—[/dim]") - return - _walk_jsonlike(_as_jsonlike(obj), indent) - - -def _print_job_details(r: FinetuneResponse, fine_tune_id: str) -> None: - sc = status_colors.get(r.status, "white") - _print_kv("Job ID", _plain(r.id)) - console.print(f"[dim]{escape_rich_markup('Status')}:[/dim] [bold {sc}]{escape_rich_markup(r.status)}[/bold {sc}]") - _print_kv("Model Name", _plain(r.x_model_output_name)) - _print_kv("Total price", _plain_price(r.total_price)) - _print_kv("Created", _plain_dt(r.created_at)) - _print_kv("Started", _plain_dt(r.started_at)) - _print_kv("Updated", _plain_dt(r.updated_at)) - - console.print(f"\n[dim]Training Data:[/dim]") - _print_kv(" Base model", _plain(r.model)) - _print_kv(" Training file", _plain(r.training_file)) - _print_kv(" Validation file", _plain(r.validation_file)) - _print_kv(" Training lines", _plain(r.trainingfile_numlines)) - _print_kv(" Training file size", _plain_bytes(r.trainingfile_size)) - _print_kv(" From checkpoint", _plain(r.from_checkpoint)) - _print_kv(" From HF model", _plain(r.from_hf_model)) - _print_kv(" HF model revision", _plain(r.hf_model_revision)) - _print_kv(" Batch size", _plain(r.batch_size)) - _print_kv(" Learning rate", _plain(r.learning_rate)) - _print_kv(" Warmup ratio", _plain(r.warmup_ratio)) - _print_kv(" Weight decay", _plain(r.weight_decay)) - _print_kv(" Max grad norm", _plain(r.max_grad_norm)) - _print_kv(" Train on inputs", _plain(r.train_on_inputs)) - _print_kv(" Epochs (configured)", _plain(r.n_epochs)) - _print_kv(" Epochs completed", _plain(r.epochs_completed)) - _print_kv(" Checkpoints to save", _plain(r.n_checkpoints)) - _print_kv(" Eval loops", _plain(r.n_evals)) - _print_kv(" Eval steps", _plain(r.eval_steps)) - _print_kv(" Token count", _plain(r.token_count)) - _print_kv(" Parameter count", _plain(r.param_count)) - _print_kv(" Queue depth", _plain(r.queue_depth)) - _print_nested_section(" LR scheduler", r.lr_scheduler) - _print_nested_section(" Training type", r.training_type) - _print_nested_section(" Training method", r.training_method) - _print_nested_section(" Multimodal params", r.multimodal_params) - - if r.events: - console.print("\n[dim]FT Events:[/dim]") - console.print(f" [dim]Total events:[/dim] {len(r.events)}") - console.print(f" [dim]To see event log data run[/dim] tg fine-tuning list-events {fine_tune_id}") - - async def retrieve( fine_tune_id: str, *, @@ -184,11 +27,15 @@ async def retrieve( console.print_json(openapi_dumps(response).decode("utf-8")) return - if response.status in COMPLETED_STATUSES: - _print_job_details(response, fine_tune_id) - return + event_count = len(response.events) if response.events else 0 + response.events = None - progress_text = generate_progress_bar(response, datetime.now().astimezone(), use_rich=True) + if response.status not in COMPLETED_STATUSES: + progress_text = generate_progress_bar(response, datetime.now().astimezone(), use_rich=True) + console.print(progress_text) - console.print(f"[bold primary]Fine-tuning job[/bold primary] [dim]{escape_rich_markup(response.id)}[/dim]") - console.print(progress_text) + print_model_dump(response, show_nulls=False) + if event_count > 0: + console.print("\n[dim]FT Events:[/dim]") + console.print(f" [dim]Total events:[/dim] {event_count}") + console.print(f" [dim]To see event log data run[/dim] tg fine-tuning list-events {fine_tune_id}") diff --git a/src/together/lib/cli/components/model_dump.py b/src/together/lib/cli/components/model_dump.py index 6bf2005ba..fe2e2d196 100644 --- a/src/together/lib/cli/components/model_dump.py +++ b/src/together/lib/cli/components/model_dump.py @@ -4,76 +4,85 @@ from datetime import datetime from rich.table import Table +from rich.padding import PaddingDimensions from together import BaseModel from together.lib.utils.tools import format_datetime from together.lib.cli.utils._console import console -def print_model_dump(model: BaseModel) -> None: - console.print(_pretty_print_results(dump_sorted_model(model), expand=True)) - - -def _pretty_print_results(results: Any, expand: bool = False) -> Table: - table = Table(show_header=False, box=None, padding=(0, 1, 0, 0), expand=expand) - table.add_column("Key", style="dim") - table.add_column("Value", justify="left") - if isinstance(results, dict): - for key, value in cast(dict[str, Any], results).items(): - if isinstance(value, dict) or isinstance(value, list): - table.add_row(_humanize_key(key), _pretty_print_results(value)) +def print_model_dump( + model: BaseModel, show_nulls: bool = True, expand: bool = True, padding: PaddingDimensions = (0, 1, 0, 0) +) -> None: + """Print an entire model with __decent__ formatting.""" + + def _pretty_print_results( + results: Any, show_nulls: bool = True, expand: bool = False, padding: PaddingDimensions = (0, 1, 0, 0) + ) -> Table: + table = Table(show_header=False, box=None, padding=padding, expand=expand) + table.add_column("Key", style="dim") + table.add_column("Value", justify="left") + if isinstance(results, dict): + for key, value in cast(dict[str, Any], results).items(): + if not show_nulls and (value is None or value == ""): + continue + if isinstance(value, dict) or isinstance(value, list): + table.add_row(_humanize_key(key), _pretty_print_results(value)) + else: + table.add_row(_humanize_key(key), _colorize_value(value)) + elif isinstance(results, list): + for item in cast(list[Any], results): + if not show_nulls and item is None: + continue + table.add_row("-", _pretty_print_results(item)) + elif isinstance(results, BaseModel): + table.add_row("", _pretty_print_results(results.model_dump(), show_nulls=show_nulls)) + else: + table.add_row("", _colorize_value(results)) + return table + + def _humanize_key(key: str) -> str: + return f"{key.replace('_', ' ').title()}:" + + def _colorize_value(value: Any) -> str: + if value is None: + return "[dim italic]n/a[/dim italic]" + if isinstance(value, bool): + return f"[bold blue]{value}[/bold blue]" + if isinstance(value, float): + return f"[bold blue]{value:g}[/bold blue]" + if isinstance(value, int): + return f"[bold blue]{value:d}[/bold blue]" + if isinstance(value, datetime): + return f"[bold blue]{format_datetime(value)}[/bold blue]" + + value = str(value) + value = value.replace("\n", "\\n") + value = value.replace("\t", "\\t") + + return f"[bold blue]{value}[/bold blue]" + + def _dump_sorted_model(model: BaseModel) -> dict[str, Any]: + """Returns a model dump where the properties are sorted by their type: + - ID fields first + - Primitives next + - Dicts/objects next + - Lists last + """ + + def _sort_items(key: str, value: Any) -> int: + # Returns a sort key: 0 for ID fields, 1 for primitives, 2 for dicts/objects, 3 for lists + if key.endswith("_id"): + return 0 + elif isinstance(value, dict) or isinstance(value, BaseModel): + return 2 + elif isinstance(value, list): + return 3 else: - table.add_row(_humanize_key(key), _colorize_value(value)) - elif isinstance(results, list): - for item in cast(list[Any], results): - table.add_row("-", _pretty_print_results(item)) - elif isinstance(results, BaseModel): - table.add_row("", _pretty_print_results(results.model_dump())) - else: - table.add_row("", _colorize_value(results)) - return table - - -def _humanize_key(key: str) -> str: - return f"{key.replace('_', ' ').title()}:" - + return 1 -def _colorize_value(value: Any) -> str: - if value is None: - return "[dim italic]n/a[/dim italic]" - if isinstance(value, bool): - return "[bold green]True[/bold green]" if value else "[bold red]False[/bold red]" - if isinstance(value, float): - return f"[bold blue]{value:g}[/bold blue]" - if isinstance(value, int): - return f"[bold blue]{value:d}[/bold blue]" - if isinstance(value, datetime): - return f"[bold blue]{format_datetime(value)}[/bold blue]" - - value = str(value) - value = value.replace("\n", "\\n") - value = value.replace("\t", "\\t") - - return f"[bold blue]{value}[/bold blue]" - - -def dump_sorted_model(model: BaseModel) -> dict[str, Any]: - """Returns a model dump where the properties are sorted by their type: - - ID fields first - - Primitives next - - Dicts/objects next - - Lists last - """ - - def _sort_items(key: str, value: Any) -> int: - # Returns a sort key: 0 for ID fields, 1 for primitives, 2 for dicts/objects, 3 for lists - if key.endswith("_id"): - return 0 - elif isinstance(value, dict) or isinstance(value, BaseModel): - return 2 - elif isinstance(value, list): - return 3 - else: - return 1 + return dict(sorted(model.model_dump().items(), key=lambda kv: _sort_items(kv[0], kv[1]))) - return dict(sorted(model.model_dump().items(), key=lambda kv: _sort_items(kv[0], kv[1]))) + console.print( + _pretty_print_results(_dump_sorted_model(model), show_nulls=show_nulls, expand=expand, padding=padding) + ) diff --git a/src/together/lib/cli/utils/_help_examples.py b/src/together/lib/cli/utils/_help_examples.py index 56a80c755..602b481e0 100644 --- a/src/together/lib/cli/utils/_help_examples.py +++ b/src/together/lib/cli/utils/_help_examples.py @@ -11,6 +11,46 @@ [primary]tg models upload --model-name my-org/my-model --model-source s3-or-hugging-face[/primary] """ +## Fine-tuning API commands +FINE_TUNING_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Create a fine-tuning job: + [primary]tg ft create --model Qwen/Qwen2-1.5B --training-file ./my-dataset.jsonl[/primary] + +[dim]-[/dim] Retrieve a fine-tuning job details: + [primary]tg ft [/primary] + +[dim]-[/dim] Download a fine-tuned model's weights: + [primary]tg ft download --output-dir ./my-model[/primary] + +[dim]-[/dim] List checkpoints for a fine-tuning job: + [primary]tg ft list-checkpoints [/primary] + +[dim]-[/dim] Cancel a fine-tuning job: + [primary]tg ft cancel [/primary] +""" + +FINE_TUNING_CREATE_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Start a supervised fine-tuning job: + [primary]tg ft create -M Qwen/Qwen2-1.5B -t ./my-dataset.jsonl[/primary] + +[dim]-[/dim] Start a preference fine-tuning job: + [primary]tg ft create -m dpo -M Qwen/Qwen2-1.5B -t ./dpo_train_file.jsonl[/primary] + +[dim]-[/dim] Start a fine-tuning job from a checkpoint: + [primary]tg ft create --from-checkpoint JOB_ID/OUTPUT_MODEL_NAME:STEP --training-file ./updated-dataset.jsonl[/primary] + +[dim]-[/dim] Specify the number of checkpoints to save: + [primary]tg ft create --n-checkpoints 3 -M Qwen/Qwen2-1.5B --training-file ./my-dataset.jsonl[/primary] +""" + +FINE_TUNING_DOWNLOAD_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Download a fine-tuned model's weights: + [primary]tg ft download --output-dir ./my-model[/primary] + +[dim]-[/dim] Download a fine-tuned model's weights from a specific checkpoint: + [primary]tg ft download --checkpoint-step 1 --output-dir ./my-model[/primary] +""" + ## Endpoints API commands ENDPOINTS_HELP_EXAMPLES = """[dim]Examples:[/dim] From 13162032b1ae100ecc7c1a08f454b30474f0a6b4 Mon Sep 17 00:00:00 2001 From: Blaine Kasten Date: Fri, 8 May 2026 13:58:11 -0500 Subject: [PATCH 09/18] chore: Add example usage for model commands (#358) --- src/together/lib/cli/__init__.py | 39 +++++++++----------- src/together/lib/cli/utils/_help_examples.py | 22 +++++++++++ 2 files changed, 40 insertions(+), 21 deletions(-) diff --git a/src/together/lib/cli/__init__.py b/src/together/lib/cli/__init__.py index c9f231fa5..4b6b63a4f 100644 --- a/src/together/lib/cli/__init__.py +++ b/src/together/lib/cli/__init__.py @@ -28,10 +28,12 @@ from together.lib.cli.utils._completion import install_completion from together.lib.cli.utils._help_examples import ( EVALS_HELP_EXAMPLES, + MODELS_HELP_EXAMPLES, ENDPOINTS_HELP_EXAMPLES, TOP_LEVEL_HELP_EXAMPLES, FINE_TUNING_HELP_EXAMPLES, EVALS_CREATE_HELP_EXAMPLES, + MODELS_UPLOAD_HELP_EXAMPLES, ENDPOINTS_CREATE_HELP_EXAMPLES, ENDPOINTS_UPDATE_HELP_EXAMPLES, ENDPOINTS_HARDWARE_HELP_EXAMPLES, @@ -63,6 +65,7 @@ def _propagate_global_param_group(target_app: App) -> None: target_app[flag].group = "Global Options" target_app[flag].show = True target_app[flag].help = help_text + target_app.help_epilogue = target_app.help_epilogue or "" except KeyError: pass for sub in target_app.subapps: @@ -328,30 +331,25 @@ async def run_command() -> None: help="Start a new fine-tuning job", help_epilogue=FINE_TUNING_CREATE_HELP_EXAMPLES, ) -fine_tuning_app.command((f"{_CLI}.fine_tuning.list:list"), alias="ls", help="List fine-tuning jobs", help_epilogue="") -fine_tuning_app.command((f"{_CLI}.fine_tuning.retrieve:retrieve"), help="Get fine-tuning job details", help_epilogue="") -fine_tuning_app.command((f"{_CLI}.fine_tuning.cancel:cancel"), help="Cancel a fine-tuning job", help_epilogue="") -fine_tuning_app.command( - (f"{_CLI}.fine_tuning.list_events:list_events"), help="List events for a fine-tuning job", help_epilogue="" -) +fine_tuning_app.command((f"{_CLI}.fine_tuning.list:list"), alias="ls", help="List fine-tuning jobs") +fine_tuning_app.command((f"{_CLI}.fine_tuning.retrieve:retrieve"), help="Get fine-tuning job details") +fine_tuning_app.command((f"{_CLI}.fine_tuning.cancel:cancel"), help="Cancel a fine-tuning job") +fine_tuning_app.command((f"{_CLI}.fine_tuning.list_events:list_events"), help="List events for a fine-tuning job") fine_tuning_app.command( (f"{_CLI}.fine_tuning.list_checkpoints:list_checkpoints"), help="List checkpoints for a fine-tuning job", - help_epilogue="", ) fine_tuning_app.command( (f"{_CLI}.fine_tuning.download:download"), help="Download a fine-tuned model's weights", help_epilogue=FINE_TUNING_DOWNLOAD_HELP_EXAMPLES, ) -fine_tuning_app.command( - (f"{_CLI}.fine_tuning.delete:delete"), alias="-d", help="Delete a fine-tuning job", help_epilogue="" -) +fine_tuning_app.command((f"{_CLI}.fine_tuning.delete:delete"), alias="-d", help="Delete a fine-tuning job") ## Models API commands -models_app = app.command(App(name="models", help="List and upload models")) +models_app = app.command(App(name="models", help="List and upload models", help_epilogue=MODELS_HELP_EXAMPLES)) models_app.command((f"{_CLI}.models.list:list"), alias="ls", help="List available models") -models_app.command((f"{_CLI}.models.upload:upload"), help="Upload a model") +models_app.command((f"{_CLI}.models.upload:upload"), help="Upload a model", help_epilogue=MODELS_UPLOAD_HELP_EXAMPLES) ## Endpoints API commands endpoints_app = app.command(App(name="endpoints", help="Deploy and manage dedicated endpoints")) @@ -366,18 +364,17 @@ async def run_command() -> None: help="Create a new endpoint", help_epilogue=ENDPOINTS_CREATE_HELP_EXAMPLES, ) -endpoints_app.command((f"{_CLI}.endpoints.retrieve:retrieve"), help="Get endpoint details", help_epilogue="") -endpoints_app.command((f"{_CLI}.endpoints.stop:stop"), help="Stop an endpoint", help_epilogue="") -endpoints_app.command((f"{_CLI}.endpoints.start:start"), help="Start an endpoint", help_epilogue="") -endpoints_app.command((f"{_CLI}.endpoints.delete:delete"), alias="-d", help="Delete an endpoint", help_epilogue="") -endpoints_app.command((f"{_CLI}.endpoints.list:list"), alias="ls", help="List your endpoints", help_epilogue="") +endpoints_app.command((f"{_CLI}.endpoints.retrieve:retrieve"), help="Get endpoint details") +endpoints_app.command((f"{_CLI}.endpoints.stop:stop"), help="Stop an endpoint") +endpoints_app.command((f"{_CLI}.endpoints.start:start"), help="Start an endpoint") +endpoints_app.command((f"{_CLI}.endpoints.delete:delete"), alias="-d", help="Delete an endpoint") +endpoints_app.command((f"{_CLI}.endpoints.list:list"), alias="ls", help="List your endpoints") endpoints_app.command( (f"{_CLI}.endpoints.update:update"), help="Update an endpoint", help_epilogue=ENDPOINTS_UPDATE_HELP_EXAMPLES ) endpoints_app.command( (f"{_CLI}.endpoints.availability_zones:availability_zones"), help="List availability zones for deploying models", - help_epilogue="", ) ## Evals API commands @@ -385,9 +382,9 @@ async def run_command() -> None: evals_app.command( (f"{_CLI}.evals.create:create"), alias="-c", help="Create a new eval job", help_epilogue=EVALS_CREATE_HELP_EXAMPLES ) -evals_app.command((f"{_CLI}.evals.list:list"), alias="ls", help="List eval jobs", help_epilogue="") -evals_app.command((f"{_CLI}.evals.retrieve:retrieve"), help="Get eval job details", help_epilogue="") -evals_app.command((f"{_CLI}.evals.status:status"), help="Get an eval job's status", help_epilogue="") +evals_app.command((f"{_CLI}.evals.list:list"), alias="ls", help="List eval jobs") +evals_app.command((f"{_CLI}.evals.retrieve:retrieve"), help="Get eval job details") +evals_app.command((f"{_CLI}.evals.status:status"), help="Get an eval job's status") ## Telemetry API commands telemetry_app = app.command(App(name="telemetry", help="Configure CLI telemetry")) diff --git a/src/together/lib/cli/utils/_help_examples.py b/src/together/lib/cli/utils/_help_examples.py index 602b481e0..35336c231 100644 --- a/src/together/lib/cli/utils/_help_examples.py +++ b/src/together/lib/cli/utils/_help_examples.py @@ -11,6 +11,28 @@ [primary]tg models upload --model-name my-org/my-model --model-source s3-or-hugging-face[/primary] """ +## Models API commands +MODELS_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] List all models: + [primary]tg models list[/primary] + +[dim]-[/dim] Upload a model: + [primary]tg models upload --model-name my-model --model-source s3-or-hugging-face[/primary] +""" + +MODELS_UPLOAD_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Upload a model from S3: + [primary]tg models upload \\ + --model-name my-model \\ + --model-source $(aws s3 presign s3://my-bucket/my-model)[/primary] + +[dim]-[/dim] Upload private model from Hugging Face: + [primary]tg models upload \\ + --model-name my-model \\ + --model-source my-org/model-name \\ + --hf-token $HUGGING_FACE_TOKEN[/primary] +""" + ## Fine-tuning API commands FINE_TUNING_HELP_EXAMPLES = """[dim]Examples:[/dim] [dim]-[/dim] Create a fine-tuning job: From 8d2a18b2647585f4e83bd42c679e699c841cd545 Mon Sep 17 00:00:00 2001 From: Blaine Kasten Date: Fri, 8 May 2026 15:07:03 -0500 Subject: [PATCH 10/18] chore: Add example usage for file commands (#359) --- src/together/lib/cli/__init__.py | 19 +++++++++-- src/together/lib/cli/api/files/upload.py | 4 +-- src/together/lib/cli/utils/_help_examples.py | 35 ++++++++++++++++++++ tests/cli/test_files.py | 14 -------- 4 files changed, 53 insertions(+), 19 deletions(-) diff --git a/src/together/lib/cli/__init__.py b/src/together/lib/cli/__init__.py index 4b6b63a4f..6c31ea7d2 100644 --- a/src/together/lib/cli/__init__.py +++ b/src/together/lib/cli/__init__.py @@ -28,17 +28,20 @@ from together.lib.cli.utils._completion import install_completion from together.lib.cli.utils._help_examples import ( EVALS_HELP_EXAMPLES, + FILES_HELP_EXAMPLES, MODELS_HELP_EXAMPLES, ENDPOINTS_HELP_EXAMPLES, TOP_LEVEL_HELP_EXAMPLES, FINE_TUNING_HELP_EXAMPLES, EVALS_CREATE_HELP_EXAMPLES, + FILES_UPLOAD_HELP_EXAMPLES, MODELS_UPLOAD_HELP_EXAMPLES, ENDPOINTS_CREATE_HELP_EXAMPLES, ENDPOINTS_UPDATE_HELP_EXAMPLES, ENDPOINTS_HARDWARE_HELP_EXAMPLES, FINE_TUNING_CREATE_HELP_EXAMPLES, FINE_TUNING_DOWNLOAD_HELP_EXAMPLES, + FILES_RETRIEVE_CONTENT_HELP_EXAMPLES, ) from together.lib.cli.utils._help_formatter import help_formatter from together.lib.cli.utils._preparse_tokens import preparse_tokens @@ -308,11 +311,21 @@ async def run_command() -> None: _CLI = "together.lib.cli.api" ## Files API commands -files_app = app.command(App(name="files", help="Upload and manage files")) -files_app.command(f"{_CLI}.files.upload:upload", help="Upload a file for fine-tuning, evals, or inference") +files_app = app.command(App(name="files", help="Upload and manage files", help_epilogue=FILES_HELP_EXAMPLES)) +files_app.command( + f"{_CLI}.files.upload:upload", + help="Upload a file for fine-tuning, evals, or inference", + help_epilogue=FILES_UPLOAD_HELP_EXAMPLES, +) files_app.command(f"{_CLI}.files.list:list", alias="ls", help="List your files") files_app.command(f"{_CLI}.files.retrieve:retrieve", help="Get file details") -files_app.command(f"{_CLI}.files.retrieve_content:retrieve_content", help="Download file contents") +files_app.command(f"{_CLI}.files.retrieve_content:retrieve_content", help="Download file contents", show=False) +files_app.command( + f"{_CLI}.files.retrieve_content:retrieve_content", + name="download", + help="Download file contents", + help_epilogue=FILES_RETRIEVE_CONTENT_HELP_EXAMPLES, +) files_app.command(f"{_CLI}.files.delete:delete", alias="-d", help="Delete a file") files_app.command(f"{_CLI}.files.check:check", help="Check a local file for issues") diff --git a/src/together/lib/cli/api/files/upload.py b/src/together/lib/cli/api/files/upload.py index 85615054d..c55222e4d 100644 --- a/src/together/lib/cli/api/files/upload.py +++ b/src/together/lib/cli/api/files/upload.py @@ -19,7 +19,7 @@ async def upload( file: Annotated[Path, Parameter(required=True, help="The file to upload")], purpose: Annotated[Optional[FilePurpose], Parameter(help="The purpose of the file")] = "fine-tune", - check: Annotated[Optional[bool], Parameter(help="Whether to check the file")] = True, + no_check: Annotated[Optional[bool], Parameter(negative=(), help="Skip checking the file for issues")] = False, *, config: CLIConfigParameter, ) -> None: @@ -28,7 +28,7 @@ async def upload( os.environ.setdefault("TOGETHER_DISABLE_TQDM", "true") # Manually handle check here so we can exit and provide the user good error messages - if check: + if not no_check: report = check_file(file) if report["is_check_passed"] is False: if config.json: diff --git a/src/together/lib/cli/utils/_help_examples.py b/src/together/lib/cli/utils/_help_examples.py index 35336c231..99b20e9e6 100644 --- a/src/together/lib/cli/utils/_help_examples.py +++ b/src/together/lib/cli/utils/_help_examples.py @@ -11,6 +11,41 @@ [primary]tg models upload --model-name my-org/my-model --model-source s3-or-hugging-face[/primary] """ +## Files API commands + +FILES_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Upload a file for fine-tuning: + [primary]tg files upload ./my-dataset.jsonl --purpose fine-tune[/primary] + +[dim]-[/dim] Check a local file for issues: + [primary]tg files check ./my-dataset.jsonl[/primary] + +[dim]-[/dim] Remove a file from Together: + [primary]tg files delete [/primary] + +[dim]-[/dim] Download a file: + [primary]tg files download --output ./datasets[/primary] +""" + +FILES_UPLOAD_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Upload a file for fine-tuning: + [primary]tg files upload ./my-dataset.jsonl --purpose fine-tune[/primary] + +[dim]-[/dim] Upload a file for evals: + [primary]tg files upload ./my-dataset.jsonl --purpose evals[/primary] + +[dim]-[/dim] Skip file checks: + [primary]tg files upload ./my-dataset.jsonl --no-check[/primary] +""" + +FILES_RETRIEVE_CONTENT_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Download a file: + [primary]tg files download --output ./datasets[/primary] + +[dim]-[/dim] Print file contents to stdout: + [primary]tg files download --stdout[/primary] +""" + ## Models API commands MODELS_HELP_EXAMPLES = """[dim]Examples:[/dim] [dim]-[/dim] List all models: diff --git a/tests/cli/test_files.py b/tests/cli/test_files.py index dd823d2bd..60171e63d 100644 --- a/tests/cli/test_files.py +++ b/tests/cli/test_files.py @@ -197,17 +197,3 @@ def test_upload_does_not_check_if_disabled(self, tmp_path: Path, cli_runner: Cli call_kw = upload_mock.call_args.kwargs assert call_kw["check"] is False assert "uploaded-id" in result.output - - def test_upload_does_check_if_enabled(self, tmp_path: Path, cli_runner: CliRunner) -> None: - f = tmp_path / "data.jsonl" - f.write_text("{}\n") - uploaded = _file_response() - with patch.object(_files_upload_cli, "check_file") as check_mock, patch( - "together.resources.files.AsyncFilesResource.upload", new_callable=AsyncMock - ) as upload_mock: - upload_mock.return_value = uploaded - check_mock.return_value = {"is_check_passed": True, "message": "Checks passed"} - result = cli_runner.invoke(["files", "upload", str(f), "--check"]) - assert result.exit_code == 0 - check_mock.assert_called_once() - upload_mock.assert_called_once() From a357ed65e81e3b1a94c3e5e63c7d2d8840f3f421 Mon Sep 17 00:00:00 2001 From: Blaine Kasten Date: Fri, 8 May 2026 15:17:26 -0500 Subject: [PATCH 11/18] chore: Add example usage for clusters commands (#360) --- src/together/lib/cli/__init__.py | 49 +++++++-- .../cli/api/beta/clusters/storage/update.py | 28 +++++ src/together/lib/cli/utils/_help_examples.py | 100 ++++++++++++++++++ tests/cli/test_json_mode_pipeable_to_jq.py | 1 + 4 files changed, 172 insertions(+), 6 deletions(-) create mode 100644 src/together/lib/cli/api/beta/clusters/storage/update.py diff --git a/src/together/lib/cli/__init__.py b/src/together/lib/cli/__init__.py index 6c31ea7d2..3819231c2 100644 --- a/src/together/lib/cli/__init__.py +++ b/src/together/lib/cli/__init__.py @@ -35,13 +35,20 @@ FINE_TUNING_HELP_EXAMPLES, EVALS_CREATE_HELP_EXAMPLES, FILES_UPLOAD_HELP_EXAMPLES, + BETA_CLUSTERS_HELP_EXAMPLES, MODELS_UPLOAD_HELP_EXAMPLES, ENDPOINTS_CREATE_HELP_EXAMPLES, ENDPOINTS_UPDATE_HELP_EXAMPLES, ENDPOINTS_HARDWARE_HELP_EXAMPLES, FINE_TUNING_CREATE_HELP_EXAMPLES, + BETA_CLUSTERS_CREATE_HELP_EXAMPLES, + BETA_CLUSTERS_UPDATE_HELP_EXAMPLES, FINE_TUNING_DOWNLOAD_HELP_EXAMPLES, + BETA_CLUSTERS_STORAGE_HELP_EXAMPLES, FILES_RETRIEVE_CONTENT_HELP_EXAMPLES, + BETA_CLUSTERS_STORAGE_CREATE_HELP_EXAMPLES, + BETA_CLUSTERS_STORAGE_UPDATE_HELP_EXAMPLES, + BETA_CLUSTERS_GET_CREDENTIALS_HELP_EXAMPLES, ) from together.lib.cli.utils._help_formatter import help_formatter from together.lib.cli.utils._preparse_tokens import preparse_tokens @@ -412,20 +419,50 @@ async def run_command() -> None: beta_app = app.command(beta_root_app) ### Clusters API commands -clusters_app = beta_app.command(App(name="clusters", help="Create and manage GPU clusters")) +clusters_app = beta_app.command( + App(name="clusters", help="Create and manage GPU clusters", help_epilogue=BETA_CLUSTERS_HELP_EXAMPLES) +) clusters_app.command((f"{_CLI}.beta.clusters.list:list"), alias="ls", help="List your clusters") -clusters_app.command((f"{_CLI}.beta.clusters.create:create"), alias="-c", help="Create a new cluster") +clusters_app.command( + (f"{_CLI}.beta.clusters.create:create"), + alias="-c", + help="Create a new cluster", + help_epilogue=BETA_CLUSTERS_CREATE_HELP_EXAMPLES, +) clusters_app.command((f"{_CLI}.beta.clusters.retrieve:retrieve"), help="Get cluster details") -clusters_app.command((f"{_CLI}.beta.clusters.update:update"), help="Update a cluster") +clusters_app.command( + (f"{_CLI}.beta.clusters.update:update"), + help="Update a cluster", + help_epilogue=BETA_CLUSTERS_UPDATE_HELP_EXAMPLES, +) clusters_app.command((f"{_CLI}.beta.clusters.delete:delete"), alias="-d", help="Delete a cluster") clusters_app.command((f"{_CLI}.beta.clusters.list_regions:list_regions"), help="List regions for deploying clusters") -clusters_app.command((f"{_CLI}.beta.clusters.get_credentials:get_credentials"), help="Get credentials for a cluster") +clusters_app.command( + (f"{_CLI}.beta.clusters.get_credentials:get_credentials"), + help="Get credentials for a cluster", + help_epilogue=BETA_CLUSTERS_GET_CREDENTIALS_HELP_EXAMPLES, +) ### Clusters > Storage API commands -storage_app = clusters_app.command(App(name="storage", help="Manage cluster storage volumes", group="Subcommands")) +storage_app = clusters_app.command( + App( + name="storage", + help="Manage cluster storage volumes", + group="Subcommands", + help_epilogue=BETA_CLUSTERS_STORAGE_HELP_EXAMPLES, + ) +) storage_app.command((f"{_CLI}.beta.clusters.storage.list:list"), alias="ls", help="List storage volumes for a cluster") storage_app.command( - (f"{_CLI}.beta.clusters.storage.create:create"), alias="-c", help="Create a new storage volume for a cluster" + (f"{_CLI}.beta.clusters.storage.create:create"), + alias="-c", + help="Create a new storage volume for a cluster", + help_epilogue=BETA_CLUSTERS_STORAGE_CREATE_HELP_EXAMPLES, +) +storage_app.command( + (f"{_CLI}.beta.clusters.storage.update:update"), + help="Resize a storage volume", + help_epilogue=BETA_CLUSTERS_STORAGE_UPDATE_HELP_EXAMPLES, ) storage_app.command( (f"{_CLI}.beta.clusters.storage.retrieve:retrieve"), diff --git a/src/together/lib/cli/api/beta/clusters/storage/update.py b/src/together/lib/cli/api/beta/clusters/storage/update.py new file mode 100644 index 000000000..59e00da1e --- /dev/null +++ b/src/together/lib/cli/api/beta/clusters/storage/update.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from typing import Annotated + +from cyclopts import Parameter + +from together._utils._json import openapi_dumps +from together.lib.cli.utils.config import CLIConfigParameter +from together.lib.cli.utils._console import console + + +async def update( + volume_id: str, + size_tib: Annotated[int, Parameter(help="New size of the storage volume in TiB")], + *, + config: CLIConfigParameter, +) -> None: + """Update a storage volume (resize).""" + response = await config.client.beta.clusters.storage.update( + volume_id=volume_id, + size_tib=size_tib, + ) + + if config.json: + console.print_json(openapi_dumps(response).decode("utf-8")) + else: + console.print("[blue]Storage volume updated successfully[/blue]") + console.print(f"[primary]Volume ID:[/primary] {response.volume_id}") diff --git a/src/together/lib/cli/utils/_help_examples.py b/src/together/lib/cli/utils/_help_examples.py index 99b20e9e6..466b5fe77 100644 --- a/src/together/lib/cli/utils/_help_examples.py +++ b/src/together/lib/cli/utils/_help_examples.py @@ -228,3 +228,103 @@ --model-b-system-template "You are a concise assistant." \\ --model-b-input-template $'Answer the following:\\n\\n{{prompt}}'[/primary] """ + +## Beta clusters API commands + +BETA_CLUSTERS_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] List clusters and regions: + [primary]tg beta clusters list[/primary] + [primary]tg beta clusters list-regions[/primary] + +[dim]-[/dim] Write kubeconfig for a cluster (default ~/.kube/config): + [primary]tg beta clusters get-credentials [/primary] + +[dim]-[/dim] Print kubeconfig to stdout: + [primary]tg beta clusters get-credentials --file -[/primary] + +[dim]-[/dim] Non-interactive cluster create (see [primary]tg beta clusters create --help[/primary] for flags): + [primary]tg beta clusters create --non-interactive \\ + --name my-cluster --cluster-type KUBERNETES --gpu-type H100_SXM \\ + --region us-central-8 --num-gpus 8 --billing-type ON_DEMAND \\ + --nvidia-driver-version 565 --cuda-version 12.6 --volume [/primary] + +[dim]-[/dim] Update or delete a cluster: + [primary]tg beta clusters update --num-gpus 16 --cluster-type KUBERNETES[/primary] + [primary]tg beta clusters delete [/primary] +""" + +BETA_CLUSTERS_CREATE_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Create interactively (prompts for region, GPUs, drivers, etc.): + [primary]tg beta clusters create[/primary] + +[dim]-[/dim] Create without prompts (supply every required field): + [primary]tg beta clusters create --non-interactive \\ + --name my-cluster \\ + --cluster-type KUBERNETES \\ + --gpu-type H100_SXM \\ + --region us-central-8 \\ + --num-gpus 8 \\ + --billing-type ON_DEMAND \\ + --nvidia-driver-version 565 \\ + --cuda-version 12.6 \\ + --volume [/primary] +""" + +BETA_CLUSTERS_GET_CREDENTIALS_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Merge cluster kubeconfig into the default file ([primary]~/.kube/config[/primary]): + [primary]tg beta clusters get-credentials [/primary] + +[dim]-[/dim] Write to a specific path: + [primary]tg beta clusters get-credentials --file ./my-kubeconfig[/primary] + +[dim]-[/dim] Print kubeconfig to stdout (no file write): + [primary]tg beta clusters get-credentials --file -[/primary] + +[dim]-[/dim] Use a custom context name in the merged kubeconfig: + [primary]tg beta clusters get-credentials --context-name my-prod-k8s[/primary] + +[dim]-[/dim] On name conflicts with an existing kubeconfig, replace the entry: + [primary]tg beta clusters get-credentials --overwrite-existing[/primary] + +[dim]-[/dim] Set this cluster as the default kube context after merge: + [primary]tg beta clusters get-credentials --set-default-context[/primary] +""" + +BETA_CLUSTERS_UPDATE_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Change GPU count: + [primary]tg beta clusters update --num-gpus 16[/primary] + +[dim]-[/dim] Change cluster type: + [primary]tg beta clusters update --cluster-type KUBERNETES[/primary] + +[dim]-[/dim] Update both: + [primary]tg beta clusters update --num-gpus 16 --cluster-type KUBERNETES[/primary] +""" + +BETA_CLUSTERS_STORAGE_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] List storage volumes: + [primary]tg beta clusters storage list[/primary] + +[dim]-[/dim] Create or resize a volume (see subcommand help for options): + [primary]tg beta clusters storage create --region us-east-1 --size-tib 1 --volume-name my-data[/primary] + [primary]tg beta clusters storage update --size-tib 4[/primary] + +[dim]-[/dim] Use a volume when creating a cluster: + [primary]tg beta clusters create --non-interactive ... --volume [/primary] +""" + +BETA_CLUSTERS_STORAGE_CREATE_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Create a 1 TiB volume in a region ([primary]tg beta clusters list-regions[/primary] lists regions): + [primary]tg beta clusters storage create \\ + --region us-east-1 \\ + --size-tib 1 \\ + --volume-name my-training-data[/primary] + +[dim]-[/dim] Attach the volume when creating a cluster: + [primary]tg beta clusters create --non-interactive ... --volume [/primary] +""" + +BETA_CLUSTERS_STORAGE_UPDATE_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Grow a volume to 4 TiB: + [primary]tg beta clusters storage update --size-tib 4[/primary] +""" diff --git a/tests/cli/test_json_mode_pipeable_to_jq.py b/tests/cli/test_json_mode_pipeable_to_jq.py index 91c007a84..1b2ac2f8c 100644 --- a/tests/cli/test_json_mode_pipeable_to_jq.py +++ b/tests/cli/test_json_mode_pipeable_to_jq.py @@ -153,6 +153,7 @@ def test_beta_clusters_json_mode(self) -> None: def test_beta_clusters_storage_json_mode(self) -> None: beta_clusters_storage = JSONValidator(("beta", "clusters", "storage")) beta_clusters_storage.run_and_assert("create --region us-east-1 --size-tib 1 --volume-name test-volume") + beta_clusters_storage.run_and_assert("update storage-123 --size-tib 4") beta_clusters_storage.run_and_assert("delete storage-123") beta_clusters_storage.run_and_assert("list") beta_clusters_storage.run_and_assert("retrieve storage-123") From d7ea64b1bd8848589ec5bc2f0f9718401aa3bade Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Mon, 11 May 2026 16:28:48 +0000 Subject: [PATCH 12/18] codegen metadata --- .stats.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.stats.yml b/.stats.yml index ed55c5ab5..68590a4ca 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 75 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-e218fafc0c9b31bd98647d1e2de6decc55f8a7f9719b3b565f94939c2ebcf0df.yml -openapi_spec_hash: 026cc585ef61f52d4d6c4b60b969e323 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-29a7f71ffca4e4ea9bcc9e9f8836f9ac879e2d8ccc16f298c335cce4e1c641fb.yml +openapi_spec_hash: 08cbd22fc2bc9006a55be3a5cf6e497c config_hash: 6c214c91fad5ead4849be777fd9e8108 From e6638d220491b83708e28db1758620f9a30217d8 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Mon, 11 May 2026 16:55:41 +0000 Subject: [PATCH 13/18] codegen metadata --- .stats.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.stats.yml b/.stats.yml index 68590a4ca..551da14b7 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 75 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-29a7f71ffca4e4ea9bcc9e9f8836f9ac879e2d8ccc16f298c335cce4e1c641fb.yml -openapi_spec_hash: 08cbd22fc2bc9006a55be3a5cf6e497c +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-fffea977331d642c3cde67d736d5e6fd461b2f9e5ecf9e282028a834b1a31a6e.yml +openapi_spec_hash: c541c2196a5702cb5b66eddf261359c5 config_hash: 6c214c91fad5ead4849be777fd9e8108 From 852ef60dc108bef4dc7e80ea528ca7823d7030d9 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Mon, 11 May 2026 17:41:57 +0000 Subject: [PATCH 14/18] feat(internal/types): support eagerly validating pydantic iterators --- src/together/_models.py | 80 +++++++++++++++++++++++++++++++++++++++++ tests/test_models.py | 60 +++++++++++++++++++++++++++++-- 2 files changed, 137 insertions(+), 3 deletions(-) diff --git a/src/together/_models.py b/src/together/_models.py index 29070e055..8c5ab2602 100644 --- a/src/together/_models.py +++ b/src/together/_models.py @@ -25,7 +25,9 @@ ClassVar, Protocol, Required, + Annotated, ParamSpec, + TypeAlias, TypedDict, TypeGuard, final, @@ -79,7 +81,15 @@ from ._constants import RAW_RESPONSE_HEADER if TYPE_CHECKING: + from pydantic import GetCoreSchemaHandler, ValidatorFunctionWrapHandler + from pydantic_core import CoreSchema, core_schema from pydantic_core.core_schema import ModelField, ModelSchema, LiteralSchema, ModelFieldsSchema +else: + try: + from pydantic_core import CoreSchema, core_schema + except ImportError: + CoreSchema = None + core_schema = None __all__ = ["BaseModel", "GenericModel"] @@ -396,6 +406,76 @@ def model_dump_json( ) +class _EagerIterable(list[_T], Generic[_T]): + """ + Accepts any Iterable[T] input (including generators), consumes it + eagerly, and validates all items upfront. + + Validation preserves the original container type where possible + (e.g. a set[T] stays a set[T]). Serialization (model_dump / JSON) + always emits a list — round-tripping through model_dump() will not + restore the original container type. + """ + + @classmethod + def __get_pydantic_core_schema__( + cls, + source_type: Any, + handler: GetCoreSchemaHandler, + ) -> CoreSchema: + (item_type,) = get_args(source_type) or (Any,) + item_schema: CoreSchema = handler.generate_schema(item_type) + list_of_items_schema: CoreSchema = core_schema.list_schema(item_schema) + + return core_schema.no_info_wrap_validator_function( + cls._validate, + list_of_items_schema, + serialization=core_schema.plain_serializer_function_ser_schema( + cls._serialize, + info_arg=False, + ), + ) + + @staticmethod + def _validate(v: Iterable[_T], handler: "ValidatorFunctionWrapHandler") -> Any: + original_type: type[Any] = type(v) + + # Normalize to list so list_schema can validate each item + if isinstance(v, list): + items: list[_T] = v + else: + try: + items = list(v) + except TypeError as e: + raise TypeError("Value is not iterable") from e + + # Validate items against the inner schema + validated: list[_T] = handler(items) + + # Reconstruct original container type + if original_type is list: + return validated + # str(list) produces the list's repr, not a string built from items, + # so skip reconstruction for str and its subclasses. + if issubclass(original_type, str): + return validated + try: + return original_type(validated) + except (TypeError, ValueError): + # If the type cannot be reconstructed, just return the validated list + return validated + + @staticmethod + def _serialize(v: Iterable[_T]) -> list[_T]: + """Always serialize as a list so Pydantic's JSON encoder is happy.""" + if isinstance(v, list): + return v + return list(v) + + +EagerIterable: TypeAlias = Annotated[Iterable[_T], _EagerIterable] + + def _construct_field(value: object, field: FieldInfo, key: str) -> object: if value is None: return field_get_default(field) diff --git a/tests/test_models.py b/tests/test_models.py index c2830354e..1b9cee0c8 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,7 +1,8 @@ import json -from typing import TYPE_CHECKING, Any, Dict, List, Union, Optional, cast +from typing import TYPE_CHECKING, Any, Dict, List, Union, Iterable, Optional, cast from datetime import datetime, timezone -from typing_extensions import Literal, Annotated, TypeAliasType +from collections import deque +from typing_extensions import Literal, Annotated, TypedDict, TypeAliasType import pytest import pydantic @@ -9,7 +10,7 @@ from together._utils import PropertyInfo from together._compat import PYDANTIC_V1, parse_obj, model_dump, model_json -from together._models import DISCRIMINATOR_CACHE, BaseModel, construct_type +from together._models import DISCRIMINATOR_CACHE, BaseModel, EagerIterable, construct_type class BasicModel(BaseModel): @@ -961,3 +962,56 @@ def __getattr__(self, attr: str) -> Item: ... assert model.a.prop == 1 assert isinstance(model.a, Item) assert model.other == "foo" + + +# NOTE: Workaround for Pydantic Iterable behavior. +# Iterable fields are replaced with a ValidatorIterator and may be consumed +# during serialization, which can cause subsequent dumps to return empty data. +# See: https://github.com/pydantic/pydantic/issues/9541 +@pytest.mark.parametrize( + "data, expected_validated", + [ + ([1, 2, 3], [1, 2, 3]), + ((1, 2, 3), (1, 2, 3)), + (set([1, 2, 3]), set([1, 2, 3])), + (iter([1, 2, 3]), [1, 2, 3]), + ([], []), + ((x for x in [1, 2, 3]), [1, 2, 3]), + (map(lambda x: x, [1, 2, 3]), [1, 2, 3]), + (frozenset([1, 2, 3]), frozenset([1, 2, 3])), + (deque([1, 2, 3]), deque([1, 2, 3])), + ], + ids=["list", "tuple", "set", "iterator", "empty", "generator", "map", "frozenset", "deque"], +) +@pytest.mark.skipif(PYDANTIC_V1, reason="this is only supported in pydantic v2") +def test_iterable_construction(data: Iterable[int], expected_validated: Iterable[int]) -> None: + class TypeWithIterable(TypedDict): + items: EagerIterable[int] + + class Model(BaseModel): + data: TypeWithIterable + + m = Model.model_validate({"data": {"items": data}}) + assert m.data["items"] == expected_validated + + # Verify repeated dumps don't lose data (the original bug) + assert m.model_dump()["data"]["items"] == list(expected_validated) + assert m.model_dump()["data"]["items"] == list(expected_validated) + + +@pytest.mark.skipif(PYDANTIC_V1, reason="this is only supported in pydantic v2") +def test_iterable_construction_str_falls_back_to_list() -> None: + # str is iterable (over chars), but str(list_of_chars) produces the list's repr + # rather than reconstructing a string from items. We special-case str to fall + # back to list instead of attempting reconstruction. + class TypeWithIterable(TypedDict): + items: EagerIterable[str] + + class Model(BaseModel): + data: TypeWithIterable + + m = Model.model_validate({"data": {"items": "hello"}}) + + # falls back to list of chars rather than calling str(["h", "e", "l", "l", "o"]) + assert m.data["items"] == ["h", "e", "l", "l", "o"] + assert m.model_dump()["data"]["items"] == ["h", "e", "l", "l", "o"] From 65c175682f4f8432dfe4d880d6dc0a21acc46655 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Mon, 11 May 2026 17:57:03 +0000 Subject: [PATCH 15/18] docs(api): reword docstrings to present tense across resources --- .stats.yml | 4 +- .../resources/beta/clusters/clusters.py | 4 +- src/together/resources/beta/jig/jig.py | 24 ++-- src/together/resources/beta/jig/secrets.py | 12 +- src/together/resources/beta/jig/volumes.py | 8 +- src/together/resources/chat/completions.py | 114 +++++++++--------- .../code_interpreter/code_interpreter.py | 38 +++--- src/together/resources/completions.py | 66 +++++----- src/together/resources/endpoints.py | 30 +++-- src/together/resources/fine_tuning.py | 14 +-- .../types/beta/cluster_create_params.py | 3 +- src/together/types/beta/deployment.py | 5 +- src/together/types/beta/jig/secret.py | 4 +- .../types/beta/jig/secret_create_params.py | 2 +- .../types/beta/jig/secret_update_params.py | 2 +- src/together/types/beta/jig/volume.py | 8 +- .../types/beta/jig/volume_create_params.py | 4 +- .../types/beta/jig/volume_update_params.py | 4 +- src/together/types/beta/jig_deploy_params.py | 13 +- src/together/types/beta/jig_update_params.py | 9 +- .../types/chat/completion_create_params.py | 18 +-- .../types/code_interpreter_execute_params.py | 11 +- .../types/completion_create_params.py | 10 +- src/together/types/endpoint_create_params.py | 8 +- src/together/types/endpoint_update_params.py | 7 +- .../types/fine_tuning_cancel_response.py | 9 +- .../fine_tuning_estimate_price_params.py | 13 +- .../fine_tuning_estimate_price_response.py | 4 +- .../types/fine_tuning_list_response.py | 9 +- src/together/types/finetune_response.py | 4 +- 30 files changed, 213 insertions(+), 248 deletions(-) diff --git a/.stats.yml b/.stats.yml index 551da14b7..29646d90c 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 75 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-fffea977331d642c3cde67d736d5e6fd461b2f9e5ecf9e282028a834b1a31a6e.yml -openapi_spec_hash: c541c2196a5702cb5b66eddf261359c5 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-b584185aba41e3d597bf715d9b704f1c7d2663ae7d8f3f3c35e63d603738ee9c.yml +openapi_spec_hash: 0b26ddf285392dd9f629c1161db62376 config_hash: 6c214c91fad5ead4849be777fd9e8108 diff --git a/src/together/resources/beta/clusters/clusters.py b/src/together/resources/beta/clusters/clusters.py index 153257210..5014087ac 100644 --- a/src/together/resources/beta/clusters/clusters.py +++ b/src/together/resources/beta/clusters/clusters.py @@ -144,7 +144,7 @@ def create( reservation_start_time: Reservation start time of the cluster. This field is required for SCHEDULED billing to specify the reservation start time for the cluster. If not provided, - the cluster will be provisioned immediately. + the cluster provisions immediately. shared_volume: Inline configuration to create a shared volume with the cluster creation. @@ -467,7 +467,7 @@ async def create( reservation_start_time: Reservation start time of the cluster. This field is required for SCHEDULED billing to specify the reservation start time for the cluster. If not provided, - the cluster will be provisioned immediately. + the cluster provisions immediately. shared_volume: Inline configuration to create a shared volume with the cluster creation. diff --git a/src/together/resources/beta/jig/jig.py b/src/together/resources/beta/jig/jig.py index dd318e71c..64ba1862f 100644 --- a/src/together/resources/beta/jig/jig.py +++ b/src/together/resources/beta/jig/jig.py @@ -166,7 +166,7 @@ def update( description: Description is an optional human-readable description of your deployment environment_variables: EnvironmentVariables is a list of environment variables to set in the container. - This will replace all existing environment variables + Replaces all existing environment variables. gpu_count: GPUCount is the number of GPUs to allocate per container instance @@ -196,8 +196,8 @@ def update( termination_grace_period_seconds: TerminationGracePeriodSeconds is the time in seconds to wait for graceful shutdown before forcefully terminating the replica - volumes: Volumes is a list of volume mounts to attach to the container. This will replace - all existing volumes + volumes: Volumes is a list of volume mounts to attach to the container. Replaces all + existing volumes. extra_headers: Send extra headers @@ -320,10 +320,10 @@ def deploy( if not specified health_check_path: HealthCheckPath is the HTTP path for health checks (e.g., "/health"). If set, - the platform will check this endpoint to determine container health + the platform checks this endpoint to determine container health. - max_replicas: MaxReplicas is the maximum number of container instances that can be scaled up - to. If not set, will be set to MinReplicas + max_replicas: MaxReplicas is the maximum number of container instances. Defaults to + MinReplicas if not set. memory: Memory is the amount of RAM to allocate per container instance in GiB (e.g., 0.5 = 512MiB) @@ -576,7 +576,7 @@ async def update( description: Description is an optional human-readable description of your deployment environment_variables: EnvironmentVariables is a list of environment variables to set in the container. - This will replace all existing environment variables + Replaces all existing environment variables. gpu_count: GPUCount is the number of GPUs to allocate per container instance @@ -606,8 +606,8 @@ async def update( termination_grace_period_seconds: TerminationGracePeriodSeconds is the time in seconds to wait for graceful shutdown before forcefully terminating the replica - volumes: Volumes is a list of volume mounts to attach to the container. This will replace - all existing volumes + volumes: Volumes is a list of volume mounts to attach to the container. Replaces all + existing volumes. extra_headers: Send extra headers @@ -730,10 +730,10 @@ async def deploy( if not specified health_check_path: HealthCheckPath is the HTTP path for health checks (e.g., "/health"). If set, - the platform will check this endpoint to determine container health + the platform checks this endpoint to determine container health. - max_replicas: MaxReplicas is the maximum number of container instances that can be scaled up - to. If not set, will be set to MinReplicas + max_replicas: MaxReplicas is the maximum number of container instances. Defaults to + MinReplicas if not set. memory: Memory is the amount of RAM to allocate per container instance in GiB (e.g., 0.5 = 512MiB) diff --git a/src/together/resources/beta/jig/secrets.py b/src/together/resources/beta/jig/secrets.py index 64c80bcae..f282c65ed 100644 --- a/src/together/resources/beta/jig/secrets.py +++ b/src/together/resources/beta/jig/secrets.py @@ -65,7 +65,7 @@ def create( characters) value: Value is the sensitive data to store securely (e.g., API keys, passwords, - tokens). This value will be encrypted at rest + tokens). Encrypted at rest. description: Description is an optional human-readable description of the secret's purpose (max 500 characters) @@ -164,8 +164,8 @@ def update( project_id: ProjectID is ignored - the project is automatically determined from your authentication - value: Value is the new sensitive data to store securely. Updating this will replace - the existing secret value + value: Value is the new sensitive data to store securely. Updating this replaces the + existing secret value. extra_headers: Send extra headers @@ -292,7 +292,7 @@ async def create( characters) value: Value is the sensitive data to store securely (e.g., API keys, passwords, - tokens). This value will be encrypted at rest + tokens). Encrypted at rest. description: Description is an optional human-readable description of the secret's purpose (max 500 characters) @@ -391,8 +391,8 @@ async def update( project_id: ProjectID is ignored - the project is automatically determined from your authentication - value: Value is the new sensitive data to store securely. Updating this will replace - the existing secret value + value: Value is the new sensitive data to store securely. Updating this replaces the + existing secret value. extra_headers: Send extra headers diff --git a/src/together/resources/beta/jig/volumes.py b/src/together/resources/beta/jig/volumes.py index 817058984..3b94490b7 100644 --- a/src/together/resources/beta/jig/volumes.py +++ b/src/together/resources/beta/jig/volumes.py @@ -61,7 +61,7 @@ def create( Create a new volume to preload files in deployments Args: - content: Content specifies the new content that will be preloaded to this volume + content: Content specifies the new content to preload to this volume. name: Name is the unique identifier for the volume within the project @@ -146,7 +146,7 @@ def update( Args: id: Volume ID or name. - content: Content specifies the new content that will be preloaded to this volume + content: Content specifies the new content to preload to this volume. name: Name is the new unique identifier for the volume within the project @@ -270,7 +270,7 @@ async def create( Create a new volume to preload files in deployments Args: - content: Content specifies the new content that will be preloaded to this volume + content: Content specifies the new content to preload to this volume. name: Name is the unique identifier for the volume within the project @@ -355,7 +355,7 @@ async def update( Args: id: Volume ID or name. - content: Content specifies the new content that will be preloaded to this volume + content: Content specifies the new content to preload to this volume. name: Name is the new unique identifier for the volume within the project diff --git a/src/together/resources/chat/completions.py b/src/together/resources/chat/completions.py index cde8485cf..cfcbf6d50 100644 --- a/src/together/resources/chat/completions.py +++ b/src/together/resources/chat/completions.py @@ -99,13 +99,13 @@ def create( chat_template_kwargs: Additional configuration to pass to model engine. - context_length_exceeded_behavior: Defined the behavior of the API when max_tokens exceed the maximum context - length of the model. When set to 'error', API will return 400 with appropriate - error message. When set to 'truncate', override the max_tokens with maximum - context length of the model. + context_length_exceeded_behavior: Defines the behavior of the API when max_tokens exceed the maximum context + length of the model. When set to 'error', the API returns 400 with an + appropriate error message. When set to 'truncate', overrides max_tokens with the + maximum context length of the model. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -113,7 +113,7 @@ def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -151,9 +151,8 @@ def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. stream: If true, stream tokens as Server-Sent Events as the model generates them instead of waiting for the full model response. The stream terminates with @@ -249,13 +248,13 @@ def create( chat_template_kwargs: Additional configuration to pass to model engine. - context_length_exceeded_behavior: Defined the behavior of the API when max_tokens exceed the maximum context - length of the model. When set to 'error', API will return 400 with appropriate - error message. When set to 'truncate', override the max_tokens with maximum - context length of the model. + context_length_exceeded_behavior: Defines the behavior of the API when max_tokens exceed the maximum context + length of the model. When set to 'error', the API returns 400 with an + appropriate error message. When set to 'truncate', overrides max_tokens with the + maximum context length of the model. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -263,7 +262,7 @@ def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -301,9 +300,8 @@ def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. temperature: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate @@ -395,13 +393,13 @@ def create( chat_template_kwargs: Additional configuration to pass to model engine. - context_length_exceeded_behavior: Defined the behavior of the API when max_tokens exceed the maximum context - length of the model. When set to 'error', API will return 400 with appropriate - error message. When set to 'truncate', override the max_tokens with maximum - context length of the model. + context_length_exceeded_behavior: Defines the behavior of the API when max_tokens exceed the maximum context + length of the model. When set to 'error', the API returns 400 with an + appropriate error message. When set to 'truncate', overrides max_tokens with the + maximum context length of the model. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -409,7 +407,7 @@ def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -447,9 +445,8 @@ def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. temperature: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate @@ -640,13 +637,13 @@ async def create( chat_template_kwargs: Additional configuration to pass to model engine. - context_length_exceeded_behavior: Defined the behavior of the API when max_tokens exceed the maximum context - length of the model. When set to 'error', API will return 400 with appropriate - error message. When set to 'truncate', override the max_tokens with maximum - context length of the model. + context_length_exceeded_behavior: Defines the behavior of the API when max_tokens exceed the maximum context + length of the model. When set to 'error', the API returns 400 with an + appropriate error message. When set to 'truncate', overrides max_tokens with the + maximum context length of the model. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -654,7 +651,7 @@ async def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -692,9 +689,8 @@ async def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. stream: If true, stream tokens as Server-Sent Events as the model generates them instead of waiting for the full model response. The stream terminates with @@ -790,13 +786,13 @@ async def create( chat_template_kwargs: Additional configuration to pass to model engine. - context_length_exceeded_behavior: Defined the behavior of the API when max_tokens exceed the maximum context - length of the model. When set to 'error', API will return 400 with appropriate - error message. When set to 'truncate', override the max_tokens with maximum - context length of the model. + context_length_exceeded_behavior: Defines the behavior of the API when max_tokens exceed the maximum context + length of the model. When set to 'error', the API returns 400 with an + appropriate error message. When set to 'truncate', overrides max_tokens with the + maximum context length of the model. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -804,7 +800,7 @@ async def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -842,9 +838,8 @@ async def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. temperature: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate @@ -936,13 +931,13 @@ async def create( chat_template_kwargs: Additional configuration to pass to model engine. - context_length_exceeded_behavior: Defined the behavior of the API when max_tokens exceed the maximum context - length of the model. When set to 'error', API will return 400 with appropriate - error message. When set to 'truncate', override the max_tokens with maximum - context length of the model. + context_length_exceeded_behavior: Defines the behavior of the API when max_tokens exceed the maximum context + length of the model. When set to 'error', the API returns 400 with an + appropriate error message. When set to 'truncate', overrides max_tokens with the + maximum context length of the model. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -950,7 +945,7 @@ async def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -988,9 +983,8 @@ async def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. temperature: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate diff --git a/src/together/resources/code_interpreter/code_interpreter.py b/src/together/resources/code_interpreter/code_interpreter.py index aa231c707..ea2344b96 100644 --- a/src/together/resources/code_interpreter/code_interpreter.py +++ b/src/together/resources/code_interpreter/code_interpreter.py @@ -73,22 +73,21 @@ def execute( """Executes the given code snippet and returns the output. Without a session_id, a - new session will be created to run the code. If you do pass in a valid - session_id, the code will be run in that session. This is useful for running - multiple code snippets in the same environment, because dependencies and similar - things are persisted between calls to the same session. + new session is created to run the code. If you pass a valid session_id, the code + runs in that session. This is useful for running multiple code snippets in the + same environment, because dependencies and similar things are persisted between + calls to the same session. Args: code: Code snippet to execute. - language: Programming language for the code to execute. Currently only supports Python, - but more will be added. + language: Programming language for the code to execute. Currently only supports Python. - files: Files to upload to the session. If present, files will be uploaded before - executing the given code. + files: Files to upload to the session. If present, files are uploaded before executing + the given code. - session_id: Identifier of the current session. Used to make follow-up calls. Requests will - return an error if the session does not belong to the caller or has expired. + session_id: Identifier of the current session. Used to make follow-up calls. Returns an + error if the session does not belong to the caller or has expired. extra_headers: Send extra headers @@ -160,22 +159,21 @@ async def execute( """Executes the given code snippet and returns the output. Without a session_id, a - new session will be created to run the code. If you do pass in a valid - session_id, the code will be run in that session. This is useful for running - multiple code snippets in the same environment, because dependencies and similar - things are persisted between calls to the same session. + new session is created to run the code. If you pass a valid session_id, the code + runs in that session. This is useful for running multiple code snippets in the + same environment, because dependencies and similar things are persisted between + calls to the same session. Args: code: Code snippet to execute. - language: Programming language for the code to execute. Currently only supports Python, - but more will be added. + language: Programming language for the code to execute. Currently only supports Python. - files: Files to upload to the session. If present, files will be uploaded before - executing the given code. + files: Files to upload to the session. If present, files are uploaded before executing + the given code. - session_id: Identifier of the current session. Used to make follow-up calls. Requests will - return an error if the session does not belong to the caller or has expired. + session_id: Identifier of the current session. Used to make follow-up calls. Returns an + error if the session does not belong to the caller or has expired. extra_headers: Send extra headers diff --git a/src/together/resources/completions.py b/src/together/resources/completions.py index 7fb330b52..139f659bf 100644 --- a/src/together/resources/completions.py +++ b/src/together/resources/completions.py @@ -94,8 +94,8 @@ def create( prompt: A string providing context for the model to complete. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -103,7 +103,7 @@ def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -124,9 +124,8 @@ def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. stream: If true, stream tokens as Server-Sent Events as the model generates them instead of waiting for the full model response. The stream terminates with @@ -211,8 +210,8 @@ def create( of waiting for the full model response. The stream terminates with `data: [DONE]`. If false, return a single JSON object containing the results. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -220,7 +219,7 @@ def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -241,9 +240,8 @@ def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. temperature: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate @@ -324,8 +322,8 @@ def create( of waiting for the full model response. The stream terminates with `data: [DONE]`. If false, return a single JSON object containing the results. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -333,7 +331,7 @@ def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -354,9 +352,8 @@ def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. temperature: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate @@ -526,8 +523,8 @@ async def create( prompt: A string providing context for the model to complete. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -535,7 +532,7 @@ async def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -556,9 +553,8 @@ async def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. stream: If true, stream tokens as Server-Sent Events as the model generates them instead of waiting for the full model response. The stream terminates with @@ -643,8 +639,8 @@ async def create( of waiting for the full model response. The stream terminates with `data: [DONE]`. If false, return a single JSON object containing the results. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -652,7 +648,7 @@ async def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -673,9 +669,8 @@ async def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. temperature: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate @@ -756,8 +751,8 @@ async def create( of waiting for the full model response. The stream terminates with `data: [DONE]`. If false, return a single JSON object containing the results. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -765,7 +760,7 @@ async def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -786,9 +781,8 @@ async def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. temperature: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate diff --git a/src/together/resources/endpoints.py b/src/together/resources/endpoints.py index afa3cfce3..9726a9461 100644 --- a/src/together/resources/endpoints.py +++ b/src/together/resources/endpoints.py @@ -74,9 +74,9 @@ def create( ) -> DedicatedEndpoint: """Creates a new dedicated endpoint for serving models. - The endpoint will - automatically start after creation. You can deploy any supported model on - hardware configurations that meet the model's requirements. + The endpoint starts + automatically after creation. You can deploy any supported model on hardware + configurations that meet the model's requirements. Args: autoscaling: Configuration for automatic scaling of the endpoint @@ -93,9 +93,8 @@ def create( display_name: A human-readable name for the endpoint - inactive_timeout: The number of minutes of inactivity after which the endpoint will be - automatically stopped. Set to null, omit or set to 0 to disable automatic - timeout. + inactive_timeout: The number of minutes of inactivity after which the endpoint stops + automatically. Set to null, omit, or set to 0 to disable automatic timeout. state: The desired state of the endpoint @@ -192,8 +191,8 @@ def update( display_name: A human-readable name for the endpoint - inactive_timeout: The number of minutes of inactivity after which the endpoint will be - automatically stopped. Set to 0 to disable automatic timeout. + inactive_timeout: The number of minutes of inactivity after which the endpoint stops + automatically. Set to 0 to disable automatic timeout. state: The desired state of the endpoint @@ -416,9 +415,9 @@ async def create( ) -> DedicatedEndpoint: """Creates a new dedicated endpoint for serving models. - The endpoint will - automatically start after creation. You can deploy any supported model on - hardware configurations that meet the model's requirements. + The endpoint starts + automatically after creation. You can deploy any supported model on hardware + configurations that meet the model's requirements. Args: autoscaling: Configuration for automatic scaling of the endpoint @@ -435,9 +434,8 @@ async def create( display_name: A human-readable name for the endpoint - inactive_timeout: The number of minutes of inactivity after which the endpoint will be - automatically stopped. Set to null, omit or set to 0 to disable automatic - timeout. + inactive_timeout: The number of minutes of inactivity after which the endpoint stops + automatically. Set to null, omit, or set to 0 to disable automatic timeout. state: The desired state of the endpoint @@ -534,8 +532,8 @@ async def update( display_name: A human-readable name for the endpoint - inactive_timeout: The number of minutes of inactivity after which the endpoint will be - automatically stopped. Set to 0 to disable automatic timeout. + inactive_timeout: The number of minutes of inactivity after which the endpoint stops + automatically. Set to 0 to disable automatic timeout. state: The desired state of the endpoint diff --git a/src/together/resources/fine_tuning.py b/src/together/resources/fine_tuning.py index 8217c2e78..9e5008b1d 100644 --- a/src/together/resources/fine_tuning.py +++ b/src/together/resources/fine_tuning.py @@ -506,8 +506,8 @@ def estimate_price( from_checkpoint: The checkpoint identifier to continue training from a previous fine-tuning job. Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or - `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the - final checkpoint will be used. + `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, uses the + final checkpoint. model: Name of the base model to run fine-tune job on @@ -519,8 +519,7 @@ def estimate_price( training_method: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization. - training_type: The training type to use. If not provided, the job will default to LoRA training - type. + training_type: The training type to use. Defaults to LoRA if not provided. validation_file: File-ID of a validation file uploaded to the Together API @@ -1106,8 +1105,8 @@ async def estimate_price( from_checkpoint: The checkpoint identifier to continue training from a previous fine-tuning job. Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or - `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the - final checkpoint will be used. + `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, uses the + final checkpoint. model: Name of the base model to run fine-tune job on @@ -1119,8 +1118,7 @@ async def estimate_price( training_method: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization. - training_type: The training type to use. If not provided, the job will default to LoRA training - type. + training_type: The training type to use. Defaults to LoRA if not provided. validation_file: File-ID of a validation file uploaded to the Together API diff --git a/src/together/types/beta/cluster_create_params.py b/src/together/types/beta/cluster_create_params.py index 5bdcb3438..e961052b7 100644 --- a/src/together/types/beta/cluster_create_params.py +++ b/src/together/types/beta/cluster_create_params.py @@ -97,8 +97,7 @@ class ClusterCreateParams(TypedDict, total=False): """Reservation start time of the cluster. This field is required for SCHEDULED billing to specify the reservation start - time for the cluster. If not provided, the cluster will be provisioned - immediately. + time for the cluster. If not provided, the cluster provisions immediately. """ shared_volume: SharedVolume diff --git a/src/together/types/beta/deployment.py b/src/together/types/beta/deployment.py index 378067c11..f7e36e0af 100644 --- a/src/together/types/beta/deployment.py +++ b/src/together/types/beta/deployment.py @@ -140,10 +140,7 @@ class ReplicaEvents(BaseModel): class Volume(BaseModel): mount_path: str - """ - MountPath is the path in the container where the volume will be mounted (e.g., - "/data") - """ + """MountPath is the path in the container where the volume mounts (e.g., "/data").""" name: str """Name is the name of the volume to mount. diff --git a/src/together/types/beta/jig/secret.py b/src/together/types/beta/jig/secret.py index 3700ebd86..d2927287e 100644 --- a/src/together/types/beta/jig/secret.py +++ b/src/together/types/beta/jig/secret.py @@ -16,13 +16,13 @@ class Secret(BaseModel): """CreatedAt is the ISO8601 timestamp when this secret was created""" created_by: Optional[str] = None - """CreatedBy is the identifier of the user who created this secret""" + """CreatedBy is the identifier of who created this secret.""" description: Optional[str] = None """Description is a human-readable description of the secret's purpose""" last_updated_by: Optional[str] = None - """LastUpdatedBy is the identifier of the user who last updated this secret""" + """LastUpdatedBy is the identifier of who last updated this secret.""" name: Optional[str] = None """Name is the name/key of the secret""" diff --git a/src/together/types/beta/jig/secret_create_params.py b/src/together/types/beta/jig/secret_create_params.py index 9c26e0ef4..793f9d745 100644 --- a/src/together/types/beta/jig/secret_create_params.py +++ b/src/together/types/beta/jig/secret_create_params.py @@ -18,7 +18,7 @@ class SecretCreateParams(TypedDict, total=False): value: Required[str] """ Value is the sensitive data to store securely (e.g., API keys, passwords, - tokens). This value will be encrypted at rest + tokens). Encrypted at rest. """ description: str diff --git a/src/together/types/beta/jig/secret_update_params.py b/src/together/types/beta/jig/secret_update_params.py index 18f7813fa..884d1cefc 100644 --- a/src/together/types/beta/jig/secret_update_params.py +++ b/src/together/types/beta/jig/secret_update_params.py @@ -30,5 +30,5 @@ class SecretUpdateParams(TypedDict, total=False): value: str """Value is the new sensitive data to store securely. - Updating this will replace the existing secret value + Updating this replaces the existing secret value. """ diff --git a/src/together/types/beta/jig/volume.py b/src/together/types/beta/jig/volume.py index b997ab41b..b0e563873 100644 --- a/src/together/types/beta/jig/volume.py +++ b/src/together/types/beta/jig/volume.py @@ -22,8 +22,8 @@ class ContentFile(BaseModel): class Content(BaseModel): files: Optional[List[ContentFile]] = None """ - Files is the list of files that will be preloaded into the volume, if the volume - content type is "files" + Files is the list of files to preload into the volume, if the volume content + type is "files". """ source_prefix: Optional[str] = None @@ -40,7 +40,7 @@ class Content(BaseModel): class VersionHistoryContent(BaseModel): - """Content specifies the new content that will be preloaded to this volume""" + """Content specifies the new content to preload to this volume.""" source_prefix: Optional[str] = None """ @@ -57,7 +57,7 @@ class VersionHistoryContent(BaseModel): class VersionHistory(BaseModel): content: Optional[VersionHistoryContent] = None - """Content specifies the new content that will be preloaded to this volume""" + """Content specifies the new content to preload to this volume.""" mounted_by: Optional[List[str]] = None diff --git a/src/together/types/beta/jig/volume_create_params.py b/src/together/types/beta/jig/volume_create_params.py index 954a3d9de..a7ced1565 100644 --- a/src/together/types/beta/jig/volume_create_params.py +++ b/src/together/types/beta/jig/volume_create_params.py @@ -9,7 +9,7 @@ class VolumeCreateParams(TypedDict, total=False): content: Required[Content] - """Content specifies the new content that will be preloaded to this volume""" + """Content specifies the new content to preload to this volume.""" name: Required[str] """Name is the unique identifier for the volume within the project""" @@ -19,7 +19,7 @@ class VolumeCreateParams(TypedDict, total=False): class Content(TypedDict, total=False): - """Content specifies the new content that will be preloaded to this volume""" + """Content specifies the new content to preload to this volume.""" source_prefix: str """ diff --git a/src/together/types/beta/jig/volume_update_params.py b/src/together/types/beta/jig/volume_update_params.py index 3b1ce6c25..3586492b1 100644 --- a/src/together/types/beta/jig/volume_update_params.py +++ b/src/together/types/beta/jig/volume_update_params.py @@ -9,7 +9,7 @@ class VolumeUpdateParams(TypedDict, total=False): content: Content - """Content specifies the new content that will be preloaded to this volume""" + """Content specifies the new content to preload to this volume.""" name: str """Name is the new unique identifier for the volume within the project""" @@ -19,7 +19,7 @@ class VolumeUpdateParams(TypedDict, total=False): class Content(TypedDict, total=False): - """Content specifies the new content that will be preloaded to this volume""" + """Content specifies the new content to preload to this volume.""" source_prefix: str """ diff --git a/src/together/types/beta/jig_deploy_params.py b/src/together/types/beta/jig_deploy_params.py index b6c5797f2..c0bdc9c54 100644 --- a/src/together/types/beta/jig_deploy_params.py +++ b/src/together/types/beta/jig_deploy_params.py @@ -75,13 +75,13 @@ class JigDeployParams(TypedDict, total=False): health_check_path: str """HealthCheckPath is the HTTP path for health checks (e.g., "/health"). - If set, the platform will check this endpoint to determine container health + If set, the platform checks this endpoint to determine container health. """ max_replicas: int - """ - MaxReplicas is the maximum number of container instances that can be scaled up - to. If not set, will be set to MinReplicas + """MaxReplicas is the maximum number of container instances. + + Defaults to MinReplicas if not set. """ memory: float @@ -199,10 +199,7 @@ class EnvironmentVariable(TypedDict, total=False): class Volume(TypedDict, total=False): mount_path: Required[str] - """ - MountPath is the path in the container where the volume will be mounted (e.g., - "/data") - """ + """MountPath is the path in the container where the volume mounts (e.g., "/data").""" name: Required[str] """Name is the name of the volume to mount. diff --git a/src/together/types/beta/jig_update_params.py b/src/together/types/beta/jig_update_params.py index e73aad281..c91e46d92 100644 --- a/src/together/types/beta/jig_update_params.py +++ b/src/together/types/beta/jig_update_params.py @@ -46,7 +46,7 @@ class JigUpdateParams(TypedDict, total=False): environment_variables: Iterable[EnvironmentVariable] """EnvironmentVariables is a list of environment variables to set in the container. - This will replace all existing environment variables + Replaces all existing environment variables. """ gpu_count: int @@ -104,7 +104,7 @@ class JigUpdateParams(TypedDict, total=False): volumes: Iterable[Volume] """Volumes is a list of volume mounts to attach to the container. - This will replace all existing volumes + Replaces all existing volumes. """ @@ -186,10 +186,7 @@ class EnvironmentVariable(TypedDict, total=False): class Volume(TypedDict, total=False): mount_path: Required[str] - """ - MountPath is the path in the container where the volume will be mounted (e.g., - "/data") - """ + """MountPath is the path in the container where the volume mounts (e.g., "/data").""" name: Required[str] """Name is the name of the volume to mount. diff --git a/src/together/types/chat/completion_create_params.py b/src/together/types/chat/completion_create_params.py index 215b3970b..f6951b635 100644 --- a/src/together/types/chat/completion_create_params.py +++ b/src/together/types/chat/completion_create_params.py @@ -57,14 +57,14 @@ class CompletionCreateParamsBase(TypedDict, total=False): context_length_exceeded_behavior: Literal["truncate", "error"] """ - Defined the behavior of the API when max_tokens exceed the maximum context - length of the model. When set to 'error', API will return 400 with appropriate - error message. When set to 'truncate', override the max_tokens with maximum - context length of the model. + Defines the behavior of the API when max_tokens exceed the maximum context + length of the model. When set to 'error', the API returns 400 with an + appropriate error message. When set to 'truncate', overrides max_tokens with the + maximum context length of the model. """ echo: bool - """If true, the response will contain the prompt. + """If true, the response contains the prompt. Can be used with `logprobs` to return prompt logprobs. """ @@ -83,7 +83,7 @@ class CompletionCreateParamsBase(TypedDict, total=False): logprobs: int """ An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. """ @@ -144,10 +144,10 @@ class CompletionCreateParamsBase(TypedDict, total=False): """Seed value for reproducibility.""" stop: SequenceNotStr[str] - """A list of string sequences that will truncate (stop) inference text output. + """A list of string sequences that truncate (stop) inference text output. - For example, "" will stop generation as soon as the model generates the - given token. + For example, "" stops generation as soon as the model generates the given + token. """ temperature: float diff --git a/src/together/types/code_interpreter_execute_params.py b/src/together/types/code_interpreter_execute_params.py index 91cf6c02a..4078c47cd 100644 --- a/src/together/types/code_interpreter_execute_params.py +++ b/src/together/types/code_interpreter_execute_params.py @@ -13,22 +13,19 @@ class CodeInterpreterExecuteParams(TypedDict, total=False): """Code snippet to execute.""" language: Required[Literal["python"]] - """Programming language for the code to execute. - - Currently only supports Python, but more will be added. - """ + """Programming language for the code to execute. Currently only supports Python.""" files: Iterable[File] """Files to upload to the session. - If present, files will be uploaded before executing the given code. + If present, files are uploaded before executing the given code. """ session_id: str """Identifier of the current session. - Used to make follow-up calls. Requests will return an error if the session does - not belong to the caller or has expired. + Used to make follow-up calls. Returns an error if the session does not belong to + the caller or has expired. """ diff --git a/src/together/types/completion_create_params.py b/src/together/types/completion_create_params.py index a5fb0f6cf..ff960141e 100644 --- a/src/together/types/completion_create_params.py +++ b/src/together/types/completion_create_params.py @@ -31,7 +31,7 @@ class CompletionCreateParamsBase(TypedDict, total=False): """A string providing context for the model to complete.""" echo: bool - """If true, the response will contain the prompt. + """If true, the response contains the prompt. Can be used with `logprobs` to return prompt logprobs. """ @@ -48,7 +48,7 @@ class CompletionCreateParamsBase(TypedDict, total=False): logprobs: int """ An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. """ @@ -84,10 +84,10 @@ class CompletionCreateParamsBase(TypedDict, total=False): """Seed value for reproducibility.""" stop: SequenceNotStr[str] - """A list of string sequences that will truncate (stop) inference text output. + """A list of string sequences that truncate (stop) inference text output. - For example, "" will stop generation as soon as the model generates the - given token. + For example, "" stops generation as soon as the model generates the given + token. """ temperature: float diff --git a/src/together/types/endpoint_create_params.py b/src/together/types/endpoint_create_params.py index 3674e0e9b..d28f713be 100644 --- a/src/together/types/endpoint_create_params.py +++ b/src/together/types/endpoint_create_params.py @@ -33,10 +33,10 @@ class EndpointCreateParams(TypedDict, total=False): """A human-readable name for the endpoint""" inactive_timeout: Optional[int] - """ - The number of minutes of inactivity after which the endpoint will be - automatically stopped. Set to null, omit or set to 0 to disable automatic - timeout. + """The number of minutes of inactivity after which the endpoint stops + automatically. + + Set to null, omit, or set to 0 to disable automatic timeout. """ state: Literal["STARTED", "STOPPED"] diff --git a/src/together/types/endpoint_update_params.py b/src/together/types/endpoint_update_params.py index 6f992b3fa..92e241640 100644 --- a/src/together/types/endpoint_update_params.py +++ b/src/together/types/endpoint_update_params.py @@ -18,9 +18,10 @@ class EndpointUpdateParams(TypedDict, total=False): """A human-readable name for the endpoint""" inactive_timeout: Optional[int] - """ - The number of minutes of inactivity after which the endpoint will be - automatically stopped. Set to 0 to disable automatic timeout. + """The number of minutes of inactivity after which the endpoint stops + automatically. + + Set to 0 to disable automatic timeout. """ state: Literal["STARTED", "STOPPED"] diff --git a/src/together/types/fine_tuning_cancel_response.py b/src/together/types/fine_tuning_cancel_response.py index 1839ca7ee..11a649aec 100644 --- a/src/together/types/fine_tuning_cancel_response.py +++ b/src/together/types/fine_tuning_cancel_response.py @@ -66,8 +66,8 @@ class TrainingMethodTrainingMethodSft(BaseModel): train_on_inputs: Union[bool, Literal["auto"]] """ - Whether to mask the user messages in conversational data or prompts in - instruction data. + Whether to mask user messages in conversational data or prompts in instruction + data. """ @@ -160,8 +160,7 @@ class FineTuningCancelResponse(BaseModel): max_seq_length: Optional[int] = None """Maximum sequence length to use for training. - If not specified, the maximum allowed for the model and training method will be - used. + If not specified, uses the maximum allowed for the model and training method. """ model: Optional[str] = None @@ -216,7 +215,7 @@ class FineTuningCancelResponse(BaseModel): """Type of training used (full or LoRA)""" user_id: Optional[str] = None - """Identifier for the user who created the job""" + """Identifier for who created the job.""" validation_file: Optional[str] = None """File-ID of the validation file""" diff --git a/src/together/types/fine_tuning_estimate_price_params.py b/src/together/types/fine_tuning_estimate_price_params.py index a4a5e4aba..30f1dff7e 100644 --- a/src/together/types/fine_tuning_estimate_price_params.py +++ b/src/together/types/fine_tuning_estimate_price_params.py @@ -24,8 +24,8 @@ class FineTuningEstimatePriceParams(TypedDict, total=False): """The checkpoint identifier to continue training from a previous fine-tuning job. Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or - `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the - final checkpoint will be used. + `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, uses the + final checkpoint. """ model: str @@ -47,10 +47,7 @@ class FineTuningEstimatePriceParams(TypedDict, total=False): """ training_type: Optional[TrainingType] - """The training type to use. - - If not provided, the job will default to LoRA training type. - """ + """The training type to use. Defaults to LoRA if not provided.""" validation_file: str """File-ID of a validation file uploaded to the Together API""" @@ -61,8 +58,8 @@ class TrainingMethodTrainingMethodSft(TypedDict, total=False): train_on_inputs: Required[Union[bool, Literal["auto"]]] """ - Whether to mask the user messages in conversational data or prompts in - instruction data. + Whether to mask user messages in conversational data or prompts in instruction + data. """ diff --git a/src/together/types/fine_tuning_estimate_price_response.py b/src/together/types/fine_tuning_estimate_price_response.py index acd757fd8..1bcb76a66 100644 --- a/src/together/types/fine_tuning_estimate_price_response.py +++ b/src/together/types/fine_tuning_estimate_price_response.py @@ -9,7 +9,7 @@ class FineTuningEstimatePriceResponse(BaseModel): allowed_to_proceed: Optional[bool] = None - """Whether the user is allowed to proceed with the fine-tuning job""" + """Whether you are allowed to proceed with the fine-tuning job.""" estimated_eval_token_count: Optional[float] = None """The estimated number of tokens for evaluation""" @@ -21,4 +21,4 @@ class FineTuningEstimatePriceResponse(BaseModel): """The estimated number of tokens to be trained""" user_limit: Optional[float] = None - """The user's credit limit in dollars""" + """Your credit limit in dollars.""" diff --git a/src/together/types/fine_tuning_list_response.py b/src/together/types/fine_tuning_list_response.py index a5d6830bc..a606e334d 100644 --- a/src/together/types/fine_tuning_list_response.py +++ b/src/together/types/fine_tuning_list_response.py @@ -67,8 +67,8 @@ class DataTrainingMethodTrainingMethodSft(BaseModel): train_on_inputs: Union[bool, Literal["auto"]] """ - Whether to mask the user messages in conversational data or prompts in - instruction data. + Whether to mask user messages in conversational data or prompts in instruction + data. """ @@ -161,8 +161,7 @@ class Data(BaseModel): max_seq_length: Optional[int] = None """Maximum sequence length to use for training. - If not specified, the maximum allowed for the model and training method will be - used. + If not specified, uses the maximum allowed for the model and training method. """ model: Optional[str] = None @@ -217,7 +216,7 @@ class Data(BaseModel): """Type of training used (full or LoRA)""" user_id: Optional[str] = None - """Identifier for the user who created the job""" + """Identifier for who created the job.""" validation_file: Optional[str] = None """File-ID of the validation file""" diff --git a/src/together/types/finetune_response.py b/src/together/types/finetune_response.py index d50d9a747..14633e6f7 100644 --- a/src/together/types/finetune_response.py +++ b/src/together/types/finetune_response.py @@ -73,8 +73,8 @@ class TrainingMethodTrainingMethodSft(BaseModel): train_on_inputs: Union[bool, Literal["auto"]] """ - Whether to mask the user messages in conversational data or prompts in - instruction data. + Whether to mask user messages in conversational data or prompts in instruction + data. """ From cc61be030cc95d6a3d8e262ec47f7ceacfd2eb75 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Mon, 11 May 2026 21:15:06 +0000 Subject: [PATCH 16/18] fix(types): constrain endpoint parameter to literals in batches --- .stats.yml | 4 ++-- src/together/resources/batches.py | 22 ++++++++++++++++++---- src/together/types/batch_create_params.py | 14 +++++++++++--- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/.stats.yml b/.stats.yml index 29646d90c..df0d96a66 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 75 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-b584185aba41e3d597bf715d9b704f1c7d2663ae7d8f3f3c35e63d603738ee9c.yml -openapi_spec_hash: 0b26ddf285392dd9f629c1161db62376 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-72e377e4d7f3fe8bb9f0dab1a70d7d9cf0f44914ff4d9b8ab238bc7f48008621.yml +openapi_spec_hash: 798c6d992a5cb83901b5879502c22f9d config_hash: 6c214c91fad5ead4849be777fd9e8108 diff --git a/src/together/resources/batches.py b/src/together/resources/batches.py index 58c61accc..a89c3d61a 100644 --- a/src/together/resources/batches.py +++ b/src/together/resources/batches.py @@ -2,6 +2,8 @@ from __future__ import annotations +from typing_extensions import Literal + import httpx from ..types import batch_create_params @@ -46,7 +48,7 @@ def with_streaming_response(self) -> BatchesResourceWithStreamingResponse: def create( self, *, - endpoint: str, + endpoint: Literal["/v1/chat/completions", "/v1/audio/transcriptions", "/v1/audio/translations"], input_file_id: str, completion_window: str | Omit = omit, model_id: str | Omit = omit, @@ -62,7 +64,13 @@ def create( Create a new batch job with the given input file and endpoint Args: - endpoint: The endpoint to use for batch processing + endpoint: The endpoint to use for batch processing. Each line of the uploaded input file + is dispatched against this endpoint. + + - `/v1/chat/completions` — chat completion batches + - `/v1/audio/transcriptions` — audio transcription batches (e.g. + `openai/whisper-large-v3`) + - `/v1/audio/translations` — audio translation batches input_file_id: ID of the uploaded input file containing batch requests @@ -211,7 +219,7 @@ def with_streaming_response(self) -> AsyncBatchesResourceWithStreamingResponse: async def create( self, *, - endpoint: str, + endpoint: Literal["/v1/chat/completions", "/v1/audio/transcriptions", "/v1/audio/translations"], input_file_id: str, completion_window: str | Omit = omit, model_id: str | Omit = omit, @@ -227,7 +235,13 @@ async def create( Create a new batch job with the given input file and endpoint Args: - endpoint: The endpoint to use for batch processing + endpoint: The endpoint to use for batch processing. Each line of the uploaded input file + is dispatched against this endpoint. + + - `/v1/chat/completions` — chat completion batches + - `/v1/audio/transcriptions` — audio transcription batches (e.g. + `openai/whisper-large-v3`) + - `/v1/audio/translations` — audio translation batches input_file_id: ID of the uploaded input file containing batch requests diff --git a/src/together/types/batch_create_params.py b/src/together/types/batch_create_params.py index 8b696489e..a8601afe9 100644 --- a/src/together/types/batch_create_params.py +++ b/src/together/types/batch_create_params.py @@ -2,14 +2,22 @@ from __future__ import annotations -from typing_extensions import Required, TypedDict +from typing_extensions import Literal, Required, TypedDict __all__ = ["BatchCreateParams"] class BatchCreateParams(TypedDict, total=False): - endpoint: Required[str] - """The endpoint to use for batch processing""" + endpoint: Required[Literal["/v1/chat/completions", "/v1/audio/transcriptions", "/v1/audio/translations"]] + """The endpoint to use for batch processing. + + Each line of the uploaded input file is dispatched against this endpoint. + + - `/v1/chat/completions` — chat completion batches + - `/v1/audio/transcriptions` — audio transcription batches (e.g. + `openai/whisper-large-v3`) + - `/v1/audio/translations` — audio translation batches + """ input_file_id: Required[str] """ID of the uploaded input file containing batch requests""" From 643286f390291ebb3e5fed85c4fb1dc5dd9cd981 Mon Sep 17 00:00:00 2001 From: Blaine Kasten Date: Mon, 11 May 2026 16:42:39 -0700 Subject: [PATCH 17/18] chore: Add usage examples for jig commands (#361) --- src/together/lib/cli/__init__.py | 96 +++++++++++-- src/together/lib/cli/utils/_help_examples.py | 137 +++++++++++++++++++ 2 files changed, 220 insertions(+), 13 deletions(-) diff --git a/src/together/lib/cli/__init__.py b/src/together/lib/cli/__init__.py index 3819231c2..e4dc2931e 100644 --- a/src/together/lib/cli/__init__.py +++ b/src/together/lib/cli/__init__.py @@ -27,20 +27,35 @@ from together.lib.cli.utils._api_error import try_handle_server_error_message from together.lib.cli.utils._completion import install_completion from together.lib.cli.utils._help_examples import ( + JIG_HELP_EXAMPLES, EVALS_HELP_EXAMPLES, FILES_HELP_EXAMPLES, MODELS_HELP_EXAMPLES, + JIG_LOGS_HELP_EXAMPLES, + JIG_PUSH_HELP_EXAMPLES, ENDPOINTS_HELP_EXAMPLES, + JIG_BUILD_HELP_EXAMPLES, TOP_LEVEL_HELP_EXAMPLES, + JIG_DEPLOY_HELP_EXAMPLES, + JIG_SUBMIT_HELP_EXAMPLES, FINE_TUNING_HELP_EXAMPLES, + JIG_DESTROY_HELP_EXAMPLES, + JIG_SECRETS_HELP_EXAMPLES, + JIG_VOLUMES_HELP_EXAMPLES, EVALS_CREATE_HELP_EXAMPLES, FILES_UPLOAD_HELP_EXAMPLES, BETA_CLUSTERS_HELP_EXAMPLES, MODELS_UPLOAD_HELP_EXAMPLES, + JIG_JOB_STATUS_HELP_EXAMPLES, + JIG_SECRETS_SET_HELP_EXAMPLES, ENDPOINTS_CREATE_HELP_EXAMPLES, ENDPOINTS_UPDATE_HELP_EXAMPLES, + JIG_SECRETS_UNSET_HELP_EXAMPLES, ENDPOINTS_HARDWARE_HELP_EXAMPLES, FINE_TUNING_CREATE_HELP_EXAMPLES, + JIG_SECRETS_DELETE_HELP_EXAMPLES, + JIG_VOLUMES_CREATE_HELP_EXAMPLES, + JIG_VOLUMES_UPDATE_HELP_EXAMPLES, BETA_CLUSTERS_CREATE_HELP_EXAMPLES, BETA_CLUSTERS_UPDATE_HELP_EXAMPLES, FINE_TUNING_DOWNLOAD_HELP_EXAMPLES, @@ -471,48 +486,103 @@ async def run_command() -> None: storage_app.command((f"{_CLI}.beta.clusters.storage.delete:delete"), help="Delete a storage volume", alias="-d") ### Jig commands -jig_app = beta_app.command(App(name="jig", help="Build, deploy, and manage custom containers")) +jig_app = beta_app.command( + App(name="jig", help="Build, deploy, and manage custom containers", help_epilogue=JIG_HELP_EXAMPLES) +) jig_app.command((f"{_CLI}.beta.jig.jig:init"), help="Initialize configuration for a Jig deployment") jig_app.command( (f"{_CLI}.beta.jig.jig:dockerfile_cli"), name="dockerfile", help="Generate Dockerfile from jig configuration" ) -jig_app.command((f"{_CLI}.beta.jig.jig:build_cli"), name="build", help="Build container image") -jig_app.command((f"{_CLI}.beta.jig.jig:push_cli"), name="push", help="Push image to registry") -jig_app.command((f"{_CLI}.beta.jig.jig:deploy_cli"), name="deploy", help="Deploy model to Together") +jig_app.command( + (f"{_CLI}.beta.jig.jig:build_cli"), + name="build", + help="Build container image", + help_epilogue=JIG_BUILD_HELP_EXAMPLES, +) +jig_app.command( + (f"{_CLI}.beta.jig.jig:push_cli"), name="push", help="Push image to registry", help_epilogue=JIG_PUSH_HELP_EXAMPLES +) +jig_app.command( + (f"{_CLI}.beta.jig.jig:deploy_cli"), + name="deploy", + help="Deploy model to Together", + help_epilogue=JIG_DEPLOY_HELP_EXAMPLES, +) jig_app.command((f"{_CLI}.beta.jig.jig:status_cli"), name="status", help="Get deployment status") jig_app.command((f"{_CLI}.beta.jig.jig:endpoint_cli"), name="endpoint", help="Get deployment endpoint URL") -jig_app.command((f"{_CLI}.beta.jig.jig:logs_cli"), name="logs", help="Get deployment logs") -jig_app.command((f"{_CLI}.beta.jig.jig:destroy_cli"), name="destroy", help="Destroy deployment") -jig_app.command((f"{_CLI}.beta.jig.jig:submit_cli"), name="submit", help="Submit a job to the deployment") -jig_app.command((f"{_CLI}.beta.jig.jig:job_status_cli"), name="job-status", help="Get status of a specific job") +jig_app.command( + (f"{_CLI}.beta.jig.jig:logs_cli"), name="logs", help="Get deployment logs", help_epilogue=JIG_LOGS_HELP_EXAMPLES +) +jig_app.command( + (f"{_CLI}.beta.jig.jig:destroy_cli"), + name="destroy", + help="Destroy deployment", + help_epilogue=JIG_DESTROY_HELP_EXAMPLES, +) +jig_app.command( + (f"{_CLI}.beta.jig.jig:submit_cli"), + name="submit", + help="Submit a job to the deployment", + help_epilogue=JIG_SUBMIT_HELP_EXAMPLES, +) +jig_app.command( + (f"{_CLI}.beta.jig.jig:job_status_cli"), + name="job-status", + help="Get status of a specific job", + help_epilogue=JIG_JOB_STATUS_HELP_EXAMPLES, +) jig_app.command( (f"{_CLI}.beta.jig.jig:queue_status_cli"), name="queue-status", help="Get queue metrics for the deployment" ) jig_app.command((f"{_CLI}.beta.jig.jig:list_deployments_cli"), name="list", alias="ls", help="List all deployments") -secrets_app = jig_app.command(App(name="secrets", help="Manage deployment secrets", group="Subcommands")) -secrets_app.command((f"{_CLI}.beta.jig.jig:secrets_set_cli"), name="set", help="Set a secret (create or update)") -secrets_app.command((f"{_CLI}.beta.jig.jig:secrets_unset_cli"), name="unset", help="Remove a secret from local state") +secrets_app = jig_app.command( + App(name="secrets", help="Manage deployment secrets", group="Subcommands", help_epilogue=JIG_SECRETS_HELP_EXAMPLES) +) +secrets_app.command( + (f"{_CLI}.beta.jig.jig:secrets_set_cli"), + name="set", + help="Set a secret (create or update)", + help_epilogue=JIG_SECRETS_SET_HELP_EXAMPLES, +) +secrets_app.command( + (f"{_CLI}.beta.jig.jig:secrets_unset_cli"), + name="unset", + help="Remove a secret from local state", + help_epilogue=JIG_SECRETS_UNSET_HELP_EXAMPLES, +) secrets_app.command( (f"{_CLI}.beta.jig.jig:secrets_delete_cli"), name="delete", help="Delete a secret and unset it locally", alias="-d", + help_epilogue=JIG_SECRETS_DELETE_HELP_EXAMPLES, ) secrets_app.command( (f"{_CLI}.beta.jig.jig:secrets_list_cli"), name="list", alias="ls", help="List all secrets with sync status" ) ### Jig > volumes -storage_app = jig_app.command(App(name="volumes", help="Manage volumes for Jig deployments", group="Subcommands")) +storage_app = jig_app.command( + App( + name="volumes", + help="Manage volumes for Jig deployments", + group="Subcommands", + help_epilogue=JIG_VOLUMES_HELP_EXAMPLES, + ) +) storage_app.command( (f"{_CLI}.beta.jig.jig:jig_volumes_create_cli"), name="create", alias="-c", help="Create a new volume for a Jig deployment", + help_epilogue=JIG_VOLUMES_CREATE_HELP_EXAMPLES, ) storage_app.command( - (f"{_CLI}.beta.jig.jig:jig_volumes_update_cli"), name="update", help="Update a volume and re-upload files" + (f"{_CLI}.beta.jig.jig:jig_volumes_update_cli"), + name="update", + help="Update a volume and re-upload files", + help_epilogue=JIG_VOLUMES_UPDATE_HELP_EXAMPLES, ) storage_app.command((f"{_CLI}.beta.jig.jig:jig_volumes_delete_cli"), name="delete", help="Delete a volume", alias="-d") storage_app.command( diff --git a/src/together/lib/cli/utils/_help_examples.py b/src/together/lib/cli/utils/_help_examples.py index 466b5fe77..224eee8a4 100644 --- a/src/together/lib/cli/utils/_help_examples.py +++ b/src/together/lib/cli/utils/_help_examples.py @@ -328,3 +328,140 @@ [dim]-[/dim] Grow a volume to 4 TiB: [primary]tg beta clusters storage update --size-tib 4[/primary] """ + +## Beta > Jig commands + +JIG_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Bootstrap config and deploy from the current directory: + [primary]tg beta jig init[/primary] + [primary]tg beta jig deploy[/primary] + +[dim]-[/dim] Inspect a deployment and stream logs: + [primary]tg beta jig status[/primary] + [primary]tg beta jig logs --follow[/primary] + +[dim]-[/dim] List deployments or tear one down: + [primary]tg beta jig list[/primary] + [primary]tg beta jig destroy[/primary] +""" + +JIG_SECRETS_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Add or rotate a secret for this deployment: + [primary]tg beta jig secrets set HF_TOKEN "$HF_TOKEN"[/primary] + +[dim]-[/dim] List secrets and sync status: + [primary]tg beta jig secrets list[/primary] + +[dim]-[/dim] Remove a secret remotely and locally: + [primary]tg beta jig secrets delete OLD_KEY[/primary] +""" + +JIG_VOLUMES_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Create a volume and upload a directory: + [primary]tg beta jig volumes create --name model-weights --source ./weights[/primary] + +[dim]-[/dim] List volumes for the deployment: + [primary]tg beta jig volumes list[/primary] + +[dim]-[/dim] Refresh volume contents from disk: + [primary]tg beta jig volumes update --name model-weights --source ./weights[/primary] +""" + +JIG_BUILD_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Build with default tag ([primary]latest[/primary]): + [primary]tg beta jig build[/primary] + +[dim]-[/dim] Build a tagged image with warmup (torch compile cache): + [primary]tg beta jig build --tag v1 --warmup[/primary] + +[dim]-[/dim] Pass extra Docker build arguments: + [primary]tg beta jig build --docker-args '--no-cache'[/primary] +""" + +JIG_PUSH_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Push the default ([primary]latest[/primary]) image: + [primary]tg beta jig push[/primary] + +[dim]-[/dim] Push a specific tag: + [primary]tg beta jig push --tag v1[/primary] +""" + +JIG_DEPLOY_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Build, push, and deploy from config in the current directory: + [primary]tg beta jig deploy[/primary] + +[dim]-[/dim] Deploy using an image that is already in the registry (skip build/push): + [primary]tg beta jig deploy --image my-registry.example.com/my-org/my-model:abc123[/primary] + +[dim]-[/dim] Only build and push; do not update the deployment: + [primary]tg beta jig deploy --build-only[/primary] + +[dim]-[/dim] Start deploy and return immediately without waiting: + [primary]tg beta jig deploy --detach[/primary] +""" + +JIG_DESTROY_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Tear down the deployment for this project ([primary]jig.toml[/primary] / [primary]pyproject.toml[/primary]): + [primary]tg beta jig destroy[/primary] +""" + +JIG_LOGS_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Print recent logs once: + [primary]tg beta jig logs[/primary] + +[dim]-[/dim] Stream logs ([primary]Ctrl+C[/primary] to stop): + [primary]tg beta jig logs --follow[/primary] +""" + +JIG_SUBMIT_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Submit a simple prompt job: + [primary]tg beta jig submit --prompt "Hello, world!"[/primary] + +[dim]-[/dim] Submit with a JSON payload (advanced request body): + [primary]tg beta jig submit --payload '{"prompt":"Explain transformers","max_tokens":256}'[/primary] + +[dim]-[/dim] Submit and poll until the job finishes: + [primary]tg beta jig submit --prompt "Summarize this README." --watch[/primary] +""" + +JIG_JOB_STATUS_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Look up a job by request ID (from submit output): + [primary]tg beta jig job-status --request-id [/primary] + +[dim]-[/dim] Machine-readable status: + [primary]tg beta jig job-status --request-id --json[/primary] +""" + +JIG_SECRETS_SET_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Create or update a secret from the shell: + [primary]tg beta jig secrets set HF_TOKEN "$HF_TOKEN"[/primary] + +[dim]-[/dim] Set a secret with a description (shown in listings): + [primary]tg beta jig secrets set API_KEY "$API_KEY" --description "Third-party API credentials"[/primary] +""" + +JIG_SECRETS_UNSET_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Drop a secret from local state only (does not delete remotely): + [primary]tg beta jig secrets unset OLD_KEY[/primary] +""" + +JIG_SECRETS_DELETE_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Delete the secret on the server and remove it locally: + [primary]tg beta jig secrets delete REVOKED_KEY[/primary] +""" + +JIG_VOLUMES_CREATE_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Create a volume and upload files from a directory: + [primary]tg beta jig volumes create --name model-weights --source ./weights[/primary] + +[dim]-[/dim] Same using positional arguments: + [primary]tg beta jig volumes create model-weights ./weights[/primary] +""" + +JIG_VOLUMES_UPDATE_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Upload a new directory tree as the next volume version: + [primary]tg beta jig volumes update --name model-weights --source ./weights[/primary] + +[dim]-[/dim] Positional form: + [primary]tg beta jig volumes update model-weights ./weights[/primary] +""" From e374d1dba05224716378c59ab9fde7af94e8da1d Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Mon, 11 May 2026 23:42:58 +0000 Subject: [PATCH 18/18] release: 2.13.0 --- .release-please-manifest.json | 2 +- CHANGELOG.md | 35 +++++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- src/together/_version.py | 2 +- 4 files changed, 38 insertions(+), 3 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 0746cbe20..e6eadb43e 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "2.12.0" + ".": "2.13.0" } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index c868b867b..c0f93ca0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,40 @@ # Changelog +## 2.13.0 (2026-05-11) + +Full Changelog: [v2.12.0...v2.13.0](https://github.com/togethercomputer/together-py/compare/v2.12.0...v2.13.0) + +### Features + +* **api:** add max_tokens and temperature to eval judge parameters ([d35fb64](https://github.com/togethercomputer/together-py/commit/d35fb643b2cd5eff5ccb2b8b2c0eb4fbc8d30734)) +* **internal/types:** support eagerly validating pydantic iterators ([852ef60](https://github.com/togethercomputer/together-py/commit/852ef60dc108bef4dc7e80ea528ca7823d7030d9)) + + +### Bug Fixes + +* **api:** remove task field from audio transcription/translation responses ([f34ac96](https://github.com/togethercomputer/together-py/commit/f34ac960a980dbb5750208ff27eae4abc283783a)) +* **client:** add missing f-string prefix in file type error message ([d62050f](https://github.com/togethercomputer/together-py/commit/d62050fb50b4858ed32fa27d2024c7505d842946)) +* **types:** constrain endpoint parameter to literals in batches ([cc61be0](https://github.com/togethercomputer/together-py/commit/cc61be030cc95d6a3d8e262ec47f7ceacfd2eb75)) + + +### Chores + +* Add example usage for clusters commands ([#360](https://github.com/togethercomputer/together-py/issues/360)) ([a357ed6](https://github.com/togethercomputer/together-py/commit/a357ed65e81e3b1a94c3e5e63c7d2d8840f3f421)) +* Add example usage for file commands ([#359](https://github.com/togethercomputer/together-py/issues/359)) ([8d2a18b](https://github.com/togethercomputer/together-py/commit/8d2a18b2647585f4e83bd42c679e699c841cd545)) +* Add example usage for model commands ([#358](https://github.com/togethercomputer/together-py/issues/358)) ([1316203](https://github.com/togethercomputer/together-py/commit/13162032b1ae100ecc7c1a08f454b30474f0a6b4)) +* Add example usage to fine-tuning CLI help pages ([#357](https://github.com/togethercomputer/together-py/issues/357)) ([ad3cdb2](https://github.com/togethercomputer/together-py/commit/ad3cdb2d0d4f5511fa06105c4a7169cc72cf9306)) +* Add examples to the CLI help output for endpoint commands ([#354](https://github.com/togethercomputer/together-py/issues/354)) ([f51b5e8](https://github.com/togethercomputer/together-py/commit/f51b5e8ec65be8cee2eef9270b1af55cbdc2cd91)) +* Add help examples to evals commands ([#356](https://github.com/togethercomputer/together-py/issues/356)) ([0794576](https://github.com/togethercomputer/together-py/commit/0794576cd274a8a55240b3a1b780df8b23261a7c)) +* Add usage examples for jig commands ([#361](https://github.com/togethercomputer/together-py/issues/361)) ([643286f](https://github.com/togethercomputer/together-py/commit/643286f390291ebb3e5fed85c4fb1dc5dd9cd981)) +* Switch to an async version of DownloadManager ([#353](https://github.com/togethercomputer/together-py/issues/353)) ([c756670](https://github.com/togethercomputer/together-py/commit/c7566702fb57fcce808bb23ba1bc9b0737b9c352)) + + +### Documentation + +* **api:** add .ogg, .opus, .aac to supported formats in audio transcriptions/translations ([9c1211a](https://github.com/togethercomputer/together-py/commit/9c1211a8143dc435ef351dd57b9553d39ae06b8e)) +* **api:** clarify prompt parameter support in audio transcriptions/translations ([9889ead](https://github.com/togethercomputer/together-py/commit/9889ead58a66864a37d01ef0bbe92b4bc8786ff5)) +* **api:** reword docstrings to present tense across resources ([65c1756](https://github.com/togethercomputer/together-py/commit/65c175682f4f8432dfe4d880d6dc0a21acc46655)) + ## 2.12.0 (2026-05-01) Full Changelog: [v2.11.0...v2.12.0](https://github.com/togethercomputer/together-py/compare/v2.11.0...v2.12.0) diff --git a/pyproject.toml b/pyproject.toml index 745a7860a..2179ec005 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "together" -version = "2.12.0" +version = "2.13.0" description = "The official Python library for the together API" dynamic = ["readme"] license = "Apache-2.0" diff --git a/src/together/_version.py b/src/together/_version.py index 360660282..e2ed90593 100644 --- a/src/together/_version.py +++ b/src/together/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "together" -__version__ = "2.12.0" # x-release-please-version +__version__ = "2.13.0" # x-release-please-version