diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 0746cbe2..e6eadb43 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "2.12.0" + ".": "2.13.0" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index 2d417872..df0d96a6 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ configured_endpoints: 75 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-e1200616b1a93d40e478800d2c9e06ddeb10b508f2a9aa65810ee31878ba4f23.yml -openapi_spec_hash: 23245993d115722da1b697f10799f4f1 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai/togetherai-72e377e4d7f3fe8bb9f0dab1a70d7d9cf0f44914ff4d9b8ab238bc7f48008621.yml +openapi_spec_hash: 798c6d992a5cb83901b5879502c22f9d config_hash: 6c214c91fad5ead4849be777fd9e8108 diff --git a/CHANGELOG.md b/CHANGELOG.md index c868b867..c0f93ca0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,40 @@ # Changelog +## 2.13.0 (2026-05-11) + +Full Changelog: [v2.12.0...v2.13.0](https://github.com/togethercomputer/together-py/compare/v2.12.0...v2.13.0) + +### Features + +* **api:** add max_tokens and temperature to eval judge parameters ([d35fb64](https://github.com/togethercomputer/together-py/commit/d35fb643b2cd5eff5ccb2b8b2c0eb4fbc8d30734)) +* **internal/types:** support eagerly validating pydantic iterators ([852ef60](https://github.com/togethercomputer/together-py/commit/852ef60dc108bef4dc7e80ea528ca7823d7030d9)) + + +### Bug Fixes + +* **api:** remove task field from audio transcription/translation responses ([f34ac96](https://github.com/togethercomputer/together-py/commit/f34ac960a980dbb5750208ff27eae4abc283783a)) +* **client:** add missing f-string prefix in file type error message ([d62050f](https://github.com/togethercomputer/together-py/commit/d62050fb50b4858ed32fa27d2024c7505d842946)) +* **types:** constrain endpoint parameter to literals in batches ([cc61be0](https://github.com/togethercomputer/together-py/commit/cc61be030cc95d6a3d8e262ec47f7ceacfd2eb75)) + + +### Chores + +* Add example usage for clusters commands ([#360](https://github.com/togethercomputer/together-py/issues/360)) ([a357ed6](https://github.com/togethercomputer/together-py/commit/a357ed65e81e3b1a94c3e5e63c7d2d8840f3f421)) +* Add example usage for file commands ([#359](https://github.com/togethercomputer/together-py/issues/359)) ([8d2a18b](https://github.com/togethercomputer/together-py/commit/8d2a18b2647585f4e83bd42c679e699c841cd545)) +* Add example usage for model commands ([#358](https://github.com/togethercomputer/together-py/issues/358)) ([1316203](https://github.com/togethercomputer/together-py/commit/13162032b1ae100ecc7c1a08f454b30474f0a6b4)) +* Add example usage to fine-tuning CLI help pages ([#357](https://github.com/togethercomputer/together-py/issues/357)) ([ad3cdb2](https://github.com/togethercomputer/together-py/commit/ad3cdb2d0d4f5511fa06105c4a7169cc72cf9306)) +* Add examples to the CLI help output for endpoint commands ([#354](https://github.com/togethercomputer/together-py/issues/354)) ([f51b5e8](https://github.com/togethercomputer/together-py/commit/f51b5e8ec65be8cee2eef9270b1af55cbdc2cd91)) +* Add help examples to evals commands ([#356](https://github.com/togethercomputer/together-py/issues/356)) ([0794576](https://github.com/togethercomputer/together-py/commit/0794576cd274a8a55240b3a1b780df8b23261a7c)) +* Add usage examples for jig commands ([#361](https://github.com/togethercomputer/together-py/issues/361)) ([643286f](https://github.com/togethercomputer/together-py/commit/643286f390291ebb3e5fed85c4fb1dc5dd9cd981)) +* Switch to an async version of DownloadManager ([#353](https://github.com/togethercomputer/together-py/issues/353)) ([c756670](https://github.com/togethercomputer/together-py/commit/c7566702fb57fcce808bb23ba1bc9b0737b9c352)) + + +### Documentation + +* **api:** add .ogg, .opus, .aac to supported formats in audio transcriptions/translations ([9c1211a](https://github.com/togethercomputer/together-py/commit/9c1211a8143dc435ef351dd57b9553d39ae06b8e)) +* **api:** clarify prompt parameter support in audio transcriptions/translations ([9889ead](https://github.com/togethercomputer/together-py/commit/9889ead58a66864a37d01ef0bbe92b4bc8786ff5)) +* **api:** reword docstrings to present tense across resources ([65c1756](https://github.com/togethercomputer/together-py/commit/65c175682f4f8432dfe4d880d6dc0a21acc46655)) + ## 2.12.0 (2026-05-01) Full Changelog: [v2.11.0...v2.12.0](https://github.com/togethercomputer/together-py/compare/v2.11.0...v2.12.0) diff --git a/pyproject.toml b/pyproject.toml index 745a7860..2179ec00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "together" -version = "2.12.0" +version = "2.13.0" description = "The official Python library for the together API" dynamic = ["readme"] license = "Apache-2.0" diff --git a/src/together/_files.py b/src/together/_files.py index 2f7ac6da..f7507c15 100644 --- a/src/together/_files.py +++ b/src/together/_files.py @@ -99,7 +99,7 @@ async def async_to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles elif is_sequence_t(files): files = [(key, await _async_transform_file(file)) for key, file in files] else: - raise TypeError("Unexpected file type input {type(files)}, expected mapping or sequence") + raise TypeError(f"Unexpected file type input {type(files)}, expected mapping or sequence") return files diff --git a/src/together/_models.py b/src/together/_models.py index 29070e05..8c5ab260 100644 --- a/src/together/_models.py +++ b/src/together/_models.py @@ -25,7 +25,9 @@ ClassVar, Protocol, Required, + Annotated, ParamSpec, + TypeAlias, TypedDict, TypeGuard, final, @@ -79,7 +81,15 @@ from ._constants import RAW_RESPONSE_HEADER if TYPE_CHECKING: + from pydantic import GetCoreSchemaHandler, ValidatorFunctionWrapHandler + from pydantic_core import CoreSchema, core_schema from pydantic_core.core_schema import ModelField, ModelSchema, LiteralSchema, ModelFieldsSchema +else: + try: + from pydantic_core import CoreSchema, core_schema + except ImportError: + CoreSchema = None + core_schema = None __all__ = ["BaseModel", "GenericModel"] @@ -396,6 +406,76 @@ def model_dump_json( ) +class _EagerIterable(list[_T], Generic[_T]): + """ + Accepts any Iterable[T] input (including generators), consumes it + eagerly, and validates all items upfront. + + Validation preserves the original container type where possible + (e.g. a set[T] stays a set[T]). Serialization (model_dump / JSON) + always emits a list — round-tripping through model_dump() will not + restore the original container type. + """ + + @classmethod + def __get_pydantic_core_schema__( + cls, + source_type: Any, + handler: GetCoreSchemaHandler, + ) -> CoreSchema: + (item_type,) = get_args(source_type) or (Any,) + item_schema: CoreSchema = handler.generate_schema(item_type) + list_of_items_schema: CoreSchema = core_schema.list_schema(item_schema) + + return core_schema.no_info_wrap_validator_function( + cls._validate, + list_of_items_schema, + serialization=core_schema.plain_serializer_function_ser_schema( + cls._serialize, + info_arg=False, + ), + ) + + @staticmethod + def _validate(v: Iterable[_T], handler: "ValidatorFunctionWrapHandler") -> Any: + original_type: type[Any] = type(v) + + # Normalize to list so list_schema can validate each item + if isinstance(v, list): + items: list[_T] = v + else: + try: + items = list(v) + except TypeError as e: + raise TypeError("Value is not iterable") from e + + # Validate items against the inner schema + validated: list[_T] = handler(items) + + # Reconstruct original container type + if original_type is list: + return validated + # str(list) produces the list's repr, not a string built from items, + # so skip reconstruction for str and its subclasses. + if issubclass(original_type, str): + return validated + try: + return original_type(validated) + except (TypeError, ValueError): + # If the type cannot be reconstructed, just return the validated list + return validated + + @staticmethod + def _serialize(v: Iterable[_T]) -> list[_T]: + """Always serialize as a list so Pydantic's JSON encoder is happy.""" + if isinstance(v, list): + return v + return list(v) + + +EagerIterable: TypeAlias = Annotated[Iterable[_T], _EagerIterable] + + def _construct_field(value: object, field: FieldInfo, key: str) -> object: if value is None: return field_get_default(field) diff --git a/src/together/_version.py b/src/together/_version.py index 36066028..e2ed9059 100644 --- a/src/together/_version.py +++ b/src/together/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "together" -__version__ = "2.12.0" # x-release-please-version +__version__ = "2.13.0" # x-release-please-version diff --git a/src/together/lib/__init__.py b/src/together/lib/__init__.py index d0208cec..05c6a07e 100644 --- a/src/together/lib/__init__.py +++ b/src/together/lib/__init__.py @@ -10,10 +10,12 @@ UploadManager, DownloadManager, AsyncUploadManager, + AsyncDownloadManager, ) __all__ = [ "DownloadManager", + "AsyncDownloadManager", "AsyncUploadManager", "UploadManager", "FinetuneTrainingLimits", diff --git a/src/together/lib/cli/__init__.py b/src/together/lib/cli/__init__.py index fb700675..e4dc2931 100644 --- a/src/together/lib/cli/__init__.py +++ b/src/together/lib/cli/__init__.py @@ -27,11 +27,43 @@ from together.lib.cli.utils._api_error import try_handle_server_error_message from together.lib.cli.utils._completion import install_completion from together.lib.cli.utils._help_examples import ( + JIG_HELP_EXAMPLES, + EVALS_HELP_EXAMPLES, + FILES_HELP_EXAMPLES, + MODELS_HELP_EXAMPLES, + JIG_LOGS_HELP_EXAMPLES, + JIG_PUSH_HELP_EXAMPLES, ENDPOINTS_HELP_EXAMPLES, + JIG_BUILD_HELP_EXAMPLES, TOP_LEVEL_HELP_EXAMPLES, + JIG_DEPLOY_HELP_EXAMPLES, + JIG_SUBMIT_HELP_EXAMPLES, + FINE_TUNING_HELP_EXAMPLES, + JIG_DESTROY_HELP_EXAMPLES, + JIG_SECRETS_HELP_EXAMPLES, + JIG_VOLUMES_HELP_EXAMPLES, + EVALS_CREATE_HELP_EXAMPLES, + FILES_UPLOAD_HELP_EXAMPLES, + BETA_CLUSTERS_HELP_EXAMPLES, + MODELS_UPLOAD_HELP_EXAMPLES, + JIG_JOB_STATUS_HELP_EXAMPLES, + JIG_SECRETS_SET_HELP_EXAMPLES, ENDPOINTS_CREATE_HELP_EXAMPLES, ENDPOINTS_UPDATE_HELP_EXAMPLES, + JIG_SECRETS_UNSET_HELP_EXAMPLES, ENDPOINTS_HARDWARE_HELP_EXAMPLES, + FINE_TUNING_CREATE_HELP_EXAMPLES, + JIG_SECRETS_DELETE_HELP_EXAMPLES, + JIG_VOLUMES_CREATE_HELP_EXAMPLES, + JIG_VOLUMES_UPDATE_HELP_EXAMPLES, + BETA_CLUSTERS_CREATE_HELP_EXAMPLES, + BETA_CLUSTERS_UPDATE_HELP_EXAMPLES, + FINE_TUNING_DOWNLOAD_HELP_EXAMPLES, + BETA_CLUSTERS_STORAGE_HELP_EXAMPLES, + FILES_RETRIEVE_CONTENT_HELP_EXAMPLES, + BETA_CLUSTERS_STORAGE_CREATE_HELP_EXAMPLES, + BETA_CLUSTERS_STORAGE_UPDATE_HELP_EXAMPLES, + BETA_CLUSTERS_GET_CREDENTIALS_HELP_EXAMPLES, ) from together.lib.cli.utils._help_formatter import help_formatter from together.lib.cli.utils._preparse_tokens import preparse_tokens @@ -58,6 +90,7 @@ def _propagate_global_param_group(target_app: App) -> None: target_app[flag].group = "Global Options" target_app[flag].show = True target_app[flag].help = help_text + target_app.help_epilogue = target_app.help_epilogue or "" except KeyError: pass for sub in target_app.subapps: @@ -300,17 +333,39 @@ async def run_command() -> None: _CLI = "together.lib.cli.api" ## Files API commands -files_app = app.command(App(name="files", help="Upload and manage files")) -files_app.command(f"{_CLI}.files.upload:upload", help="Upload a file for fine-tuning, evals, or inference") +files_app = app.command(App(name="files", help="Upload and manage files", help_epilogue=FILES_HELP_EXAMPLES)) +files_app.command( + f"{_CLI}.files.upload:upload", + help="Upload a file for fine-tuning, evals, or inference", + help_epilogue=FILES_UPLOAD_HELP_EXAMPLES, +) files_app.command(f"{_CLI}.files.list:list", alias="ls", help="List your files") files_app.command(f"{_CLI}.files.retrieve:retrieve", help="Get file details") -files_app.command(f"{_CLI}.files.retrieve_content:retrieve_content", help="Download file contents") +files_app.command(f"{_CLI}.files.retrieve_content:retrieve_content", help="Download file contents", show=False) +files_app.command( + f"{_CLI}.files.retrieve_content:retrieve_content", + name="download", + help="Download file contents", + help_epilogue=FILES_RETRIEVE_CONTENT_HELP_EXAMPLES, +) files_app.command(f"{_CLI}.files.delete:delete", alias="-d", help="Delete a file") files_app.command(f"{_CLI}.files.check:check", help="Check a local file for issues") # Fine-tuning API commands -fine_tuning_app = app.command(App(name="fine-tuning", alias="ft", help="Create and manage fine-tuning jobs")) -fine_tuning_app.command((f"{_CLI}.fine_tuning.create:create"), alias="-c", help="Start a new fine-tuning job") +fine_tuning_app = app.command( + App( + name="fine-tuning", + alias="ft", + help="Create and manage fine-tuning jobs", + help_epilogue=FINE_TUNING_HELP_EXAMPLES, + ) +) +fine_tuning_app.command( + (f"{_CLI}.fine_tuning.create:create"), + alias="-c", + help="Start a new fine-tuning job", + help_epilogue=FINE_TUNING_CREATE_HELP_EXAMPLES, +) fine_tuning_app.command((f"{_CLI}.fine_tuning.list:list"), alias="ls", help="List fine-tuning jobs") fine_tuning_app.command((f"{_CLI}.fine_tuning.retrieve:retrieve"), help="Get fine-tuning job details") fine_tuning_app.command((f"{_CLI}.fine_tuning.cancel:cancel"), help="Cancel a fine-tuning job") @@ -322,13 +377,14 @@ async def run_command() -> None: fine_tuning_app.command( (f"{_CLI}.fine_tuning.download:download"), help="Download a fine-tuned model's weights", + help_epilogue=FINE_TUNING_DOWNLOAD_HELP_EXAMPLES, ) fine_tuning_app.command((f"{_CLI}.fine_tuning.delete:delete"), alias="-d", help="Delete a fine-tuning job") ## Models API commands -models_app = app.command(App(name="models", help="List and upload models")) +models_app = app.command(App(name="models", help="List and upload models", help_epilogue=MODELS_HELP_EXAMPLES)) models_app.command((f"{_CLI}.models.list:list"), alias="ls", help="List available models") -models_app.command((f"{_CLI}.models.upload:upload"), help="Upload a model") +models_app.command((f"{_CLI}.models.upload:upload"), help="Upload a model", help_epilogue=MODELS_UPLOAD_HELP_EXAMPLES) ## Endpoints API commands endpoints_app = app.command(App(name="endpoints", help="Deploy and manage dedicated endpoints")) @@ -343,23 +399,24 @@ async def run_command() -> None: help="Create a new endpoint", help_epilogue=ENDPOINTS_CREATE_HELP_EXAMPLES, ) -endpoints_app.command((f"{_CLI}.endpoints.retrieve:retrieve"), help="Get endpoint details", help_epilogue="") -endpoints_app.command((f"{_CLI}.endpoints.stop:stop"), help="Stop an endpoint", help_epilogue="") -endpoints_app.command((f"{_CLI}.endpoints.start:start"), help="Start an endpoint", help_epilogue="") -endpoints_app.command((f"{_CLI}.endpoints.delete:delete"), alias="-d", help="Delete an endpoint", help_epilogue="") -endpoints_app.command((f"{_CLI}.endpoints.list:list"), alias="ls", help="List your endpoints", help_epilogue="") +endpoints_app.command((f"{_CLI}.endpoints.retrieve:retrieve"), help="Get endpoint details") +endpoints_app.command((f"{_CLI}.endpoints.stop:stop"), help="Stop an endpoint") +endpoints_app.command((f"{_CLI}.endpoints.start:start"), help="Start an endpoint") +endpoints_app.command((f"{_CLI}.endpoints.delete:delete"), alias="-d", help="Delete an endpoint") +endpoints_app.command((f"{_CLI}.endpoints.list:list"), alias="ls", help="List your endpoints") endpoints_app.command( (f"{_CLI}.endpoints.update:update"), help="Update an endpoint", help_epilogue=ENDPOINTS_UPDATE_HELP_EXAMPLES ) endpoints_app.command( (f"{_CLI}.endpoints.availability_zones:availability_zones"), help="List availability zones for deploying models", - help_epilogue="", ) ## Evals API commands -evals_app = app.command(App(name="evals", help="Run and manage model evaluations")) -evals_app.command((f"{_CLI}.evals.create:create"), alias="-c", help="Create a new eval job") +evals_app = app.command(App(name="evals", help="Run and manage model evaluations", help_epilogue=EVALS_HELP_EXAMPLES)) +evals_app.command( + (f"{_CLI}.evals.create:create"), alias="-c", help="Create a new eval job", help_epilogue=EVALS_CREATE_HELP_EXAMPLES +) evals_app.command((f"{_CLI}.evals.list:list"), alias="ls", help="List eval jobs") evals_app.command((f"{_CLI}.evals.retrieve:retrieve"), help="Get eval job details") evals_app.command((f"{_CLI}.evals.status:status"), help="Get an eval job's status") @@ -377,20 +434,50 @@ async def run_command() -> None: beta_app = app.command(beta_root_app) ### Clusters API commands -clusters_app = beta_app.command(App(name="clusters", help="Create and manage GPU clusters")) +clusters_app = beta_app.command( + App(name="clusters", help="Create and manage GPU clusters", help_epilogue=BETA_CLUSTERS_HELP_EXAMPLES) +) clusters_app.command((f"{_CLI}.beta.clusters.list:list"), alias="ls", help="List your clusters") -clusters_app.command((f"{_CLI}.beta.clusters.create:create"), alias="-c", help="Create a new cluster") +clusters_app.command( + (f"{_CLI}.beta.clusters.create:create"), + alias="-c", + help="Create a new cluster", + help_epilogue=BETA_CLUSTERS_CREATE_HELP_EXAMPLES, +) clusters_app.command((f"{_CLI}.beta.clusters.retrieve:retrieve"), help="Get cluster details") -clusters_app.command((f"{_CLI}.beta.clusters.update:update"), help="Update a cluster") +clusters_app.command( + (f"{_CLI}.beta.clusters.update:update"), + help="Update a cluster", + help_epilogue=BETA_CLUSTERS_UPDATE_HELP_EXAMPLES, +) clusters_app.command((f"{_CLI}.beta.clusters.delete:delete"), alias="-d", help="Delete a cluster") clusters_app.command((f"{_CLI}.beta.clusters.list_regions:list_regions"), help="List regions for deploying clusters") -clusters_app.command((f"{_CLI}.beta.clusters.get_credentials:get_credentials"), help="Get credentials for a cluster") +clusters_app.command( + (f"{_CLI}.beta.clusters.get_credentials:get_credentials"), + help="Get credentials for a cluster", + help_epilogue=BETA_CLUSTERS_GET_CREDENTIALS_HELP_EXAMPLES, +) ### Clusters > Storage API commands -storage_app = clusters_app.command(App(name="storage", help="Manage cluster storage volumes", group="Subcommands")) +storage_app = clusters_app.command( + App( + name="storage", + help="Manage cluster storage volumes", + group="Subcommands", + help_epilogue=BETA_CLUSTERS_STORAGE_HELP_EXAMPLES, + ) +) storage_app.command((f"{_CLI}.beta.clusters.storage.list:list"), alias="ls", help="List storage volumes for a cluster") storage_app.command( - (f"{_CLI}.beta.clusters.storage.create:create"), alias="-c", help="Create a new storage volume for a cluster" + (f"{_CLI}.beta.clusters.storage.create:create"), + alias="-c", + help="Create a new storage volume for a cluster", + help_epilogue=BETA_CLUSTERS_STORAGE_CREATE_HELP_EXAMPLES, +) +storage_app.command( + (f"{_CLI}.beta.clusters.storage.update:update"), + help="Resize a storage volume", + help_epilogue=BETA_CLUSTERS_STORAGE_UPDATE_HELP_EXAMPLES, ) storage_app.command( (f"{_CLI}.beta.clusters.storage.retrieve:retrieve"), @@ -399,48 +486,103 @@ async def run_command() -> None: storage_app.command((f"{_CLI}.beta.clusters.storage.delete:delete"), help="Delete a storage volume", alias="-d") ### Jig commands -jig_app = beta_app.command(App(name="jig", help="Build, deploy, and manage custom containers")) +jig_app = beta_app.command( + App(name="jig", help="Build, deploy, and manage custom containers", help_epilogue=JIG_HELP_EXAMPLES) +) jig_app.command((f"{_CLI}.beta.jig.jig:init"), help="Initialize configuration for a Jig deployment") jig_app.command( (f"{_CLI}.beta.jig.jig:dockerfile_cli"), name="dockerfile", help="Generate Dockerfile from jig configuration" ) -jig_app.command((f"{_CLI}.beta.jig.jig:build_cli"), name="build", help="Build container image") -jig_app.command((f"{_CLI}.beta.jig.jig:push_cli"), name="push", help="Push image to registry") -jig_app.command((f"{_CLI}.beta.jig.jig:deploy_cli"), name="deploy", help="Deploy model to Together") +jig_app.command( + (f"{_CLI}.beta.jig.jig:build_cli"), + name="build", + help="Build container image", + help_epilogue=JIG_BUILD_HELP_EXAMPLES, +) +jig_app.command( + (f"{_CLI}.beta.jig.jig:push_cli"), name="push", help="Push image to registry", help_epilogue=JIG_PUSH_HELP_EXAMPLES +) +jig_app.command( + (f"{_CLI}.beta.jig.jig:deploy_cli"), + name="deploy", + help="Deploy model to Together", + help_epilogue=JIG_DEPLOY_HELP_EXAMPLES, +) jig_app.command((f"{_CLI}.beta.jig.jig:status_cli"), name="status", help="Get deployment status") jig_app.command((f"{_CLI}.beta.jig.jig:endpoint_cli"), name="endpoint", help="Get deployment endpoint URL") -jig_app.command((f"{_CLI}.beta.jig.jig:logs_cli"), name="logs", help="Get deployment logs") -jig_app.command((f"{_CLI}.beta.jig.jig:destroy_cli"), name="destroy", help="Destroy deployment") -jig_app.command((f"{_CLI}.beta.jig.jig:submit_cli"), name="submit", help="Submit a job to the deployment") -jig_app.command((f"{_CLI}.beta.jig.jig:job_status_cli"), name="job-status", help="Get status of a specific job") +jig_app.command( + (f"{_CLI}.beta.jig.jig:logs_cli"), name="logs", help="Get deployment logs", help_epilogue=JIG_LOGS_HELP_EXAMPLES +) +jig_app.command( + (f"{_CLI}.beta.jig.jig:destroy_cli"), + name="destroy", + help="Destroy deployment", + help_epilogue=JIG_DESTROY_HELP_EXAMPLES, +) +jig_app.command( + (f"{_CLI}.beta.jig.jig:submit_cli"), + name="submit", + help="Submit a job to the deployment", + help_epilogue=JIG_SUBMIT_HELP_EXAMPLES, +) +jig_app.command( + (f"{_CLI}.beta.jig.jig:job_status_cli"), + name="job-status", + help="Get status of a specific job", + help_epilogue=JIG_JOB_STATUS_HELP_EXAMPLES, +) jig_app.command( (f"{_CLI}.beta.jig.jig:queue_status_cli"), name="queue-status", help="Get queue metrics for the deployment" ) jig_app.command((f"{_CLI}.beta.jig.jig:list_deployments_cli"), name="list", alias="ls", help="List all deployments") -secrets_app = jig_app.command(App(name="secrets", help="Manage deployment secrets", group="Subcommands")) -secrets_app.command((f"{_CLI}.beta.jig.jig:secrets_set_cli"), name="set", help="Set a secret (create or update)") -secrets_app.command((f"{_CLI}.beta.jig.jig:secrets_unset_cli"), name="unset", help="Remove a secret from local state") +secrets_app = jig_app.command( + App(name="secrets", help="Manage deployment secrets", group="Subcommands", help_epilogue=JIG_SECRETS_HELP_EXAMPLES) +) +secrets_app.command( + (f"{_CLI}.beta.jig.jig:secrets_set_cli"), + name="set", + help="Set a secret (create or update)", + help_epilogue=JIG_SECRETS_SET_HELP_EXAMPLES, +) +secrets_app.command( + (f"{_CLI}.beta.jig.jig:secrets_unset_cli"), + name="unset", + help="Remove a secret from local state", + help_epilogue=JIG_SECRETS_UNSET_HELP_EXAMPLES, +) secrets_app.command( (f"{_CLI}.beta.jig.jig:secrets_delete_cli"), name="delete", help="Delete a secret and unset it locally", alias="-d", + help_epilogue=JIG_SECRETS_DELETE_HELP_EXAMPLES, ) secrets_app.command( (f"{_CLI}.beta.jig.jig:secrets_list_cli"), name="list", alias="ls", help="List all secrets with sync status" ) ### Jig > volumes -storage_app = jig_app.command(App(name="volumes", help="Manage volumes for Jig deployments", group="Subcommands")) +storage_app = jig_app.command( + App( + name="volumes", + help="Manage volumes for Jig deployments", + group="Subcommands", + help_epilogue=JIG_VOLUMES_HELP_EXAMPLES, + ) +) storage_app.command( (f"{_CLI}.beta.jig.jig:jig_volumes_create_cli"), name="create", alias="-c", help="Create a new volume for a Jig deployment", + help_epilogue=JIG_VOLUMES_CREATE_HELP_EXAMPLES, ) storage_app.command( - (f"{_CLI}.beta.jig.jig:jig_volumes_update_cli"), name="update", help="Update a volume and re-upload files" + (f"{_CLI}.beta.jig.jig:jig_volumes_update_cli"), + name="update", + help="Update a volume and re-upload files", + help_epilogue=JIG_VOLUMES_UPDATE_HELP_EXAMPLES, ) storage_app.command((f"{_CLI}.beta.jig.jig:jig_volumes_delete_cli"), name="delete", help="Delete a volume", alias="-d") storage_app.command( diff --git a/src/together/lib/cli/api/beta/clusters/storage/update.py b/src/together/lib/cli/api/beta/clusters/storage/update.py new file mode 100644 index 00000000..59e00da1 --- /dev/null +++ b/src/together/lib/cli/api/beta/clusters/storage/update.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from typing import Annotated + +from cyclopts import Parameter + +from together._utils._json import openapi_dumps +from together.lib.cli.utils.config import CLIConfigParameter +from together.lib.cli.utils._console import console + + +async def update( + volume_id: str, + size_tib: Annotated[int, Parameter(help="New size of the storage volume in TiB")], + *, + config: CLIConfigParameter, +) -> None: + """Update a storage volume (resize).""" + response = await config.client.beta.clusters.storage.update( + volume_id=volume_id, + size_tib=size_tib, + ) + + if config.json: + console.print_json(openapi_dumps(response).decode("utf-8")) + else: + console.print("[blue]Storage volume updated successfully[/blue]") + console.print(f"[primary]Volume ID:[/primary] {response.volume_id}") diff --git a/src/together/lib/cli/api/evals/create.py b/src/together/lib/cli/api/evals/create.py index 33961ac5..40039774 100644 --- a/src/together/lib/cli/api/evals/create.py +++ b/src/together/lib/cli/api/evals/create.py @@ -1,7 +1,7 @@ from __future__ import annotations -import json from typing import Any, Dict, Union, Literal, Optional, Annotated, cast +from pathlib import Path from cyclopts import Parameter @@ -35,10 +35,10 @@ async def create( input_data_file_path: Annotated[str, Parameter(help="The path to the input data file")], judge_external_api_token: Annotated[ Optional[str], Parameter(help="API token for access to the external judge model") - ], + ] = None, judge_external_base_url: Annotated[ Optional[str], Parameter(help="Base URL for access to the external judge model") - ], + ] = None, model_field: Annotated[ Optional[str], Parameter( @@ -130,6 +130,14 @@ async def create( labels_list = labels.split(",") if labels else None pass_labels_list = pass_labels.split(",") if pass_labels else None + # If the user passes a path to a file, try to upload it to the files API first + # Uploads are idempotent so we can depend on this API always giving us a file ID + if _check_path_exists(input_data_file_path): + file_upload = await config.client.files.upload(Path(input_data_file_path), purpose="eval", check=False) + training_file = file_upload.id + else: + training_file = input_data_file_path + model_to_evaluate_final: Union[Dict[str, Any], None, str] = None config_params_provided = any( [ @@ -154,7 +162,7 @@ async def create( model_to_evaluate_final = { "model": model_to_evaluate, "model_source": model_to_evaluate_source, - "max_tokens": model_to_evaluate_max_tokens, + "max_tokens": model_to_evaluate_max_tokens if model_to_evaluate_max_tokens is not None else 16000, "temperature": model_to_evaluate_temperature, "system_template": model_to_evaluate_system_template, "input_template": model_to_evaluate_input_template, @@ -185,7 +193,7 @@ async def create( model_a_final = { "model": model_a, "model_source": model_a_source, - "max_tokens": model_a_max_tokens, + "max_tokens": model_a_max_tokens if model_a_max_tokens is not None else 16000, "temperature": model_a_temperature, "system_template": model_a_system_template, "input_template": model_a_input_template, @@ -216,7 +224,7 @@ async def create( model_b_final = { "model": model_b, "model_source": model_b_source, - "max_tokens": model_b_max_tokens, + "max_tokens": model_b_max_tokens if model_b_max_tokens is not None else 16000, "temperature": model_b_temperature, "system_template": model_b_system_template, "input_template": model_b_input_template, @@ -239,7 +247,7 @@ async def create( response = await config.client.evals.create( type=type_val, parameters=ParametersEvaluationClassifyParameters( - input_data_file_path=input_data_file_path, + input_data_file_path=training_file, judge=judge_config, labels=labels_list or [], pass_labels=pass_labels_list or [], @@ -252,7 +260,7 @@ async def create( response = await config.client.evals.create( type="score", parameters=ParametersEvaluationScoreParameters( - input_data_file_path=input_data_file_path, + input_data_file_path=training_file, judge=judge_config, max_score=max_score, min_score=min_score, @@ -264,7 +272,7 @@ async def create( response = await config.client.evals.create( type=type_val, parameters=ParametersEvaluationCompareParameters( - input_data_file_path=input_data_file_path, + input_data_file_path=training_file, judge=judge_config, model_a=cast(ParametersEvaluationCompareParametersModelAEvaluationModelRequest, model_a_final), model_b=cast(ParametersEvaluationCompareParametersModelBEvaluationModelRequest, model_b_final), @@ -274,7 +282,13 @@ async def create( if config.json: console.print_json(openapi_dumps(response).decode("utf-8")) else: - console.print(json.dumps(response.model_dump(exclude_none=True), indent=4)) + url = f"https://api.together.ai/evaluations/result/{response.workflow_id}" + console.print(f"[green]√ Evaluation job created[/green] [dim]([link={url}]{response.workflow_id}[/link])[/dim]") + console.print(f" Evaluations may take some time to complete.\n") + console.print(f" To retrieve the status:") + console.print(f" [dim]-[/dim] [primary]tg evals status {response.workflow_id}[/primary]") + console.print(f" To get the results:") + console.print(f" [dim]-[/dim] [primary]tg evals {response.workflow_id}[/primary]") def _build_judge( @@ -312,3 +326,12 @@ def _build_judge( if judge_external_base_url: judge_config["external_base_url"] = judge_external_base_url return judge_config + + +def _check_path_exists(path_string: str) -> bool: + if path_string == "": + return False + p = Path(path_string) + if p.is_dir(): + raise ValueError(f"Path {path_string} is a directory, not a file. Please provide a file path.") + return p.exists() and p.is_file() diff --git a/src/together/lib/cli/api/evals/list.py b/src/together/lib/cli/api/evals/list.py index 5846a749..bae80bda 100644 --- a/src/together/lib/cli/api/evals/list.py +++ b/src/together/lib/cli/api/evals/list.py @@ -6,8 +6,14 @@ from together import omit from together.types import EvaluationJob +from together.lib.utils import log_debug from together._utils._json import openapi_dumps from together.lib.cli.utils.config import CLIConfigParameter +from together.types.evaluation_job import ( + ResultsEvaluationScoreResults, + ResultsEvaluationCompareResults, + ResultsEvaluationClassifyResults, +) from together.lib.cli.utils._console import console from together.lib.cli.components.list import ListTable from together.lib.cli.components.loader import show_loading_status @@ -45,25 +51,16 @@ async def list( return table = ListTable("Evals", empty_message="No evals found") - table.add_primary_column("Workflow ID", ratio=2) - table.add_column("Type") - table.add_column("Status") - table.add_column("Model") - table.add_column("Model A") - table.add_column("Model B") + table.add_primary_column("Workflow ID", ratio=1) + table.add_column("Type", ratio=1) + table.add_column("Result", ratio=4) for job in data: - model = _get_model_to_evaluate_name(job) - model_a = deep_get(job.parameters, ["model_a", "model"], "") - model_b = deep_get(job.parameters, ["model_b", "model"], "") - status_color = status_colors[job.status] if job.status in status_colors else "white" + result = _get_result(job) table.add_row( f"[link=https://api.together.ai/evaluations/result/{job.workflow_id}]{job.workflow_id}[/link]", job.type, - f"[{status_color}]{job.status}[/{status_color}]", - model, - model_a, - model_b, + result, ) console.print(table) if next_cursor: @@ -74,6 +71,60 @@ async def list( T = TypeVar("T") +def _get_result(job: EvaluationJob) -> str: + try: + if job.status != "completed": + status_color = status_colors[job.status] if job.status in status_colors else "white" + return f"status: [{status_color}]{job.status}[/{status_color}]" + + if job.type == "score": + score_job = cast(ResultsEvaluationScoreResults, job.results) + return "\n".join( + [ + f"mean score: [primary]{getattr(score_job.aggregated_scores, 'mean_score', 'N/A')}[/primary]", + f"pass percentage: [primary]{getattr(score_job.aggregated_scores, 'pass_percentage', 'N/A')}[/primary]", + f"std score: [primary]{getattr(score_job.aggregated_scores, 'std_score', 'N/A')}[/primary]", + ] + ) + + if job.type == "compare": + compare_job = cast(ResultsEvaluationCompareResults, job.results) + if ( + compare_job.a_wins is not None + and compare_job.b_wins is not None + and compare_job.a_wins > compare_job.b_wins + ): + return f"Winning Model: [primary]{_get_model_name(job, 'model_a')}[/primary] (model A)" + elif ( + compare_job.b_wins is not None + and compare_job.a_wins is not None + and compare_job.b_wins > compare_job.a_wins + ): + return f"Winning Model: [primary]{_get_model_name(job, 'model_b')}[/primary] (model B)" + else: + return "[primary]Tie[/primary]" + + if job.type == "classify": + classify_job = cast(ResultsEvaluationClassifyResults, job.results) + if classify_job.label_counts is None: + return "No label counts" + + labels = cast( + dict[str, int], classify_job.label_counts + ) # TODO: API has a bug in the shape of the response, so we need to cast it to the correct type + return "\n".join( + [ + f"label: [primary]{label}[/primary] (count: [primary]{count}[/primary])" + for label, count in labels.items() + ] + ) + + return "" + except Exception as e: + log_debug("Error parsing results for evals list", error=e) + return "Internal error" + + def deep_get(dictionary: dict[str, Any] | None, keys: List[str], default: T) -> T: cur = cast(Any, dictionary) for key in keys: @@ -84,15 +135,15 @@ def deep_get(dictionary: dict[str, Any] | None, keys: List[str], default: T) -> return cast(T, cur) -def _get_model_to_evaluate_name(job: EvaluationJob) -> str: +def _get_model_name(job: EvaluationJob, field: str) -> str: """ Get the name of the model to evaluate. Sometimes the parameters.model_to_evaluate is a dict, other times it's a string. """ - model_to_evaluate: str | dict[str, Any] = deep_get(job.parameters, ["model_to_evaluate"], "") + model: str | dict[str, Any] = deep_get(job.parameters, [field], "") - if isinstance(model_to_evaluate, dict): - return deep_get(model_to_evaluate, ["model"], "") + if isinstance(model, dict): + return deep_get(model, ["model"], "") - return model_to_evaluate + return model diff --git a/src/together/lib/cli/api/evals/retrieve.py b/src/together/lib/cli/api/evals/retrieve.py index b0c9601c..e754c697 100644 --- a/src/together/lib/cli/api/evals/retrieve.py +++ b/src/together/lib/cli/api/evals/retrieve.py @@ -3,12 +3,12 @@ from typing import Annotated from cyclopts import Parameter -from rich.markup import escape as escape_rich_markup from together._utils._json import openapi_dumps from together.lib.cli.utils.config import CLIConfigParameter from together.lib.cli.utils._console import console from together.lib.cli.components.loader import show_loading_status +from together.lib.cli.components.model_dump import print_model_dump async def retrieve( @@ -22,9 +22,4 @@ async def retrieve( console.print_json(openapi_dumps(response).decode("utf-8")) return - wid = response.workflow_id or evaluation_id - console.print( - f"[dim]Eval[/dim] [bold]{escape_rich_markup(str(wid))}[/bold] — " - f"[dim]status[/dim] [bold]{escape_rich_markup(str(response.status))}[/bold] — " - f"[dim]type[/dim] [bold]{escape_rich_markup(str(response.type))}[/bold]" - ) + print_model_dump(response) diff --git a/src/together/lib/cli/api/evals/status.py b/src/together/lib/cli/api/evals/status.py index b9730043..4975613b 100644 --- a/src/together/lib/cli/api/evals/status.py +++ b/src/together/lib/cli/api/evals/status.py @@ -18,11 +18,7 @@ async def status( """Get the status and results of a specific evaluation job.""" response = await show_loading_status("Retrieving eval status...", config.client.evals.status(evaluation_id)) if config.json: - console.print_json(openapi_dumps(response).decode("utf-8")) - else: - console.print(f"Status: [bold]{response.status}[/bold]") + console.print_json(openapi_dumps({"status": response.status}).decode("utf-8")) + return - if response.results: - # TODO: Add a pretty print for the results - console.print("\nResults") - console.print_json(openapi_dumps(response.results).decode("utf-8")) + console.print(f"Status: {response.status}") diff --git a/src/together/lib/cli/api/files/upload.py b/src/together/lib/cli/api/files/upload.py index 85615054..c55222e4 100644 --- a/src/together/lib/cli/api/files/upload.py +++ b/src/together/lib/cli/api/files/upload.py @@ -19,7 +19,7 @@ async def upload( file: Annotated[Path, Parameter(required=True, help="The file to upload")], purpose: Annotated[Optional[FilePurpose], Parameter(help="The purpose of the file")] = "fine-tune", - check: Annotated[Optional[bool], Parameter(help="Whether to check the file")] = True, + no_check: Annotated[Optional[bool], Parameter(negative=(), help="Skip checking the file for issues")] = False, *, config: CLIConfigParameter, ) -> None: @@ -28,7 +28,7 @@ async def upload( os.environ.setdefault("TOGETHER_DISABLE_TQDM", "true") # Manually handle check here so we can exit and provide the user good error messages - if check: + if not no_check: report = check_file(file) if report["is_check_passed"] is False: if config.json: diff --git a/src/together/lib/cli/api/fine_tuning/create.py b/src/together/lib/cli/api/fine_tuning/create.py index 3ed9034e..53eae7a5 100644 --- a/src/together/lib/cli/api/fine_tuning/create.py +++ b/src/together/lib/cli/api/fine_tuning/create.py @@ -5,6 +5,7 @@ from cyclopts import Group, Parameter, validators +from together import BaseModel from together.types import fine_tuning_estimate_price_params as pe_params from together.lib.utils import log_warn from together.lib.cli.api._utils import ( @@ -15,15 +16,15 @@ from together.lib.cli.utils._console import console from together.lib.cli.components.loader import show_loading_status from together.lib.resources.fine_tuning import async_get_model_limits +from together.lib.cli.components.model_dump import print_model_dump def get_confirmation_message(price: str, warning: str) -> str: return ( - """You are about to create a fine-tuning job. The estimated price of this job is {price} + """You are about to create a fine-tuning job. The estimated price of this job is {price}. The actual cost of your job will be determined by the model size, the number of tokens in the training file, the number of tokens in the validation file, the number of epochs, and the number of evaluations. Visit https://www.together.ai/pricing to learn more about pricing. -{warning} -Do you want to proceed? [Y/n]""" +{warning}""" ).format(price=price, warning=warning) @@ -34,11 +35,13 @@ def get_confirmation_message(price: str, warning: str) -> str: ) -def _check_path_exists(path_string: str) -> bool: - if path_string == "": +def _check_path_exists(path_string: Optional[str]) -> bool: + if path_string == "" or path_string is None: return False p = Path(path_string) - return p.exists() and (p.is_file() or p.is_dir()) + if p.is_dir(): + raise ValueError(f"Path {path_string} is a directory, not a file. Please provide a file path.") + return p.exists() and p.is_file() model_group = Group( @@ -58,19 +61,19 @@ async def create( training_file: Annotated[ str, Parameter( - name=["--training-file", "-t"], + alias="-t", help="Training file ID from Files API or local path to a file to upload", ), ], validation_file: Annotated[ - str, + Optional[str], Parameter( - name=["--validation-file", "-v"], + alias="-v", help="Validation file ID from Files API or local path to a file to upload", ), - ] = "", + ] = None, model: Annotated[ - Optional[str], Parameter(group=model_group, help="Name of the base model to run fine-tune job on") + Optional[str], Parameter(group=model_group, alias="-M", help="Name of the base model to run fine-tune job on") ] = None, from_checkpoint: Annotated[ Optional[str], @@ -79,22 +82,20 @@ async def create( help="Checkpoint to continue training from a previous fine-tuning job, formatted as `JOB_ID/OUTPUT_MODEL_NAME:STEP`; STEP is optional and defaults to the final checkpoint", ), ] = None, - n_epochs: Annotated[int, Parameter(name=["--n-epochs", "-ne"], help="Number of epochs to train for")] = 1, + n_epochs: Annotated[int, Parameter(alias="--ne", help="Number of epochs to train for")] = 1, packing: Annotated[bool, Parameter(show_default=True, help="Whether to use packing for training")] = True, n_evals: Annotated[int, Parameter(help="Number of evaluation loops to run")] = 0, max_seq_length: Annotated[int | None, Parameter(help="Maximum sequence length to use for training")] = None, - n_checkpoints: Annotated[int, Parameter(name=["--n-checkpoints", "-c"], help="Number of checkpoints to save")] = 1, + n_checkpoints: Annotated[int, Parameter(alias="-c", help="Number of checkpoints to save")] = 1, batch_size: Annotated[ int | Literal["max"], - Parameter(converter=int_or_max_converter, name=["--batch-size", "-b"], help="Train batch size"), + Parameter(converter=int_or_max_converter, alias="-b", help="Train batch size"), ] = "max", gradient_accumulation_steps: Annotated[ Optional[int], Parameter(help="Number of gradient accumulation steps (increases effective batch size without more memory)"), ] = None, - learning_rate: Annotated[ - float, Parameter(name=["--learning-rate", "-lr"], help="Learning rate") - ] = DEFAULT_LEARNING_RATE, + learning_rate: Annotated[float, Parameter(alias="--lr", help="Learning rate")] = DEFAULT_LEARNING_RATE, lr_scheduler_type: Annotated[ Literal["linear", "cosine"], Parameter(help="Learning rate scheduler type") ] = "cosine", @@ -116,7 +117,10 @@ async def create( ] = "all-linear", training_method: Annotated[ Literal["sft", "dpo"], - Parameter(help="Training method to use: sft (supervised fine-tuning) or dpo (Direct Preference Optimization)"), + Parameter( + alias=("-m"), + help="Training method to use: sft (supervised fine-tuning) or dpo (Direct Preference Optimization)", + ), ] = "sft", dpo_beta: Annotated[Optional[float], Parameter(help="DPO beta parameter")] = None, dpo_normalize_logratios_by_length: Annotated[ @@ -135,7 +139,7 @@ async def create( Parameter(help="Random seed for reproducible training, e.g. 42; uses the server default if unset"), ] = None, confirm: Annotated[ - bool, Parameter(name=["--confirm", "-y"], help="Whether to skip the launch confirmation message") + bool, Parameter(alias=("-y"), negative=(), help="Whether to skip the launch confirmation message") ] = False, train_on_inputs: Annotated[ Optional[BoolOrAuto], @@ -323,13 +327,23 @@ async def create( ) price_str = f"${finetune_price_estimation_result.estimated_total_price:.2f}" warning = _WARNING_MESSAGE_INSUFFICIENT_FUNDS if not finetune_price_estimation_result.allowed_to_proceed else "" - confirmation_message = get_confirmation_message(price=price_str, warning=warning) if not confirm: - resp = input(confirmation_message).strip().lower() + confirmation_message = get_confirmation_message(price=price_str, warning=warning) + console.print(confirmation_message) + resp = input("Do you want to proceed? [Y/n]").strip().lower() if resp and resp != "y" and resp != "yes": return + + console.print(f"Submitting a fine-tuning job with the following parameters:") + print_model_dump(BaseModel(**training_args), show_nulls=False, expand=False, padding=(0, 2)) + response = await show_loading_status( - "Creating fine-tuning job...", config.client.fine_tuning.create(**training_args, verbose=True) + "Creating fine-tuning job...", config.client.fine_tuning.create(**training_args) + ) + url = f"https://api.together.ai/fine-tuning/{response.id}" + console.print( + f"\n[green]√ Fine-tuning job has been submitted.[/green] [dim]([link={url}]{response.id}[/link])[/dim]" ) - console.print(f"\n\nSuccess! Your fine-tuning job {response.id} has been submitted.") + console.print(f"\n You can track the job's progress with the following command:") + console.print(f" [dim]-[/dim] [primary]tg fine-tuning {response.id}[/primary]") diff --git a/src/together/lib/cli/api/fine_tuning/download.py b/src/together/lib/cli/api/fine_tuning/download.py index 54ff4027..f4d9ae1e 100644 --- a/src/together/lib/cli/api/fine_tuning/download.py +++ b/src/together/lib/cli/api/fine_tuning/download.py @@ -7,8 +7,8 @@ from cyclopts import Parameter -from together import APIError, Together, APIStatusError -from together.lib import DownloadManager +from together import APIError, APIStatusError +from together.lib import AsyncDownloadManager from together._utils._json import openapi_dumps from together.lib.cli.utils.config import CLIConfigParameter from together.lib.cli.utils._console import console @@ -26,7 +26,7 @@ Parameter(name=["--checkpoint-step", "-s"], help="Fine-tuning checkpoint to download; defaults to latest if unset"), ] CheckpointTypeParam = Annotated[ - Literal["merged", "adapter", "default"], + Optional[Literal["merged", "adapter", "default"]], Parameter( name=["--checkpoint-type", "-c"], help="Checkpoint type ('merged' and 'adapter' apply to LoRA jobs only)", @@ -38,7 +38,7 @@ async def download( fine_tune_id: str, output_dir: OutputDirParam = None, checkpoint_step: CheckpointStepParam = None, - checkpoint_type: CheckpointTypeParam = "merged", + checkpoint_type: CheckpointTypeParam = None, *, config: CLIConfigParameter, ) -> None: @@ -56,9 +56,9 @@ async def download( ft_job = await show_loading_status( "Retrieving fine-tuning job...", config.client.fine_tuning.retrieve(fine_tune_id) ) - loosely_typed_checkpoint_type: str = checkpoint_type + loosely_typed_checkpoint_type: str = checkpoint_type if checkpoint_type is not None else "" if isinstance(ft_job.training_type, TrainingTypeFullTrainingType): - if checkpoint_type != "default": + if checkpoint_type is not None and checkpoint_type != "default": raise ValueError("Only DEFAULT checkpoint type is allowed for FullTrainingType") loosely_typed_checkpoint_type = "model_output_path" elif isinstance(ft_job.training_type, TrainingTypeLoRaTrainingType): @@ -87,15 +87,7 @@ async def download( os.environ.setdefault("TOGETHER_DISABLE_TQDM", "true") try: - # TODO: This is a temporary hack, - # We need to make the DownloadManager async so we can use the async client. - sync_client = Together( - api_key=config.client.api_key, - base_url=config.client.base_url, - timeout=config.client.timeout, - max_retries=config.client.max_retries, - ) - file_path, file_size = DownloadManager(sync_client).download( + file_path, file_size = await AsyncDownloadManager(config.client).download( url=url, output=output, remote_name=ft_job.x_model_output_name, diff --git a/src/together/lib/cli/api/fine_tuning/retrieve.py b/src/together/lib/cli/api/fine_tuning/retrieve.py index 70b37eeb..64bc6837 100644 --- a/src/together/lib/cli/api/fine_tuning/retrieve.py +++ b/src/together/lib/cli/api/fine_tuning/retrieve.py @@ -1,175 +1,18 @@ from __future__ import annotations -from typing import Any, cast from datetime import datetime -from rich.markup import escape as escape_rich_markup - -from together.lib.utils import convert_bytes, finetune_price_to_dollars from together._utils._json import openapi_dumps -from together.lib.utils.tools import format_datetime from together.lib.cli.api._utils import generate_progress_bar from together.lib.cli.utils.config import CLIConfigParameter from together.lib.types.fine_tuning import COMPLETED_STATUSES from together.lib.cli.utils._console import console -from together.types.finetune_response import FinetuneResponse from together.lib.cli.components.loader import show_loading_status -from together.lib.cli.api.fine_tuning.list import status_colors +from together.lib.cli.components.model_dump import print_model_dump _NEST_INDENT = 4 -def _plain(v: Any) -> str | None: - """Plain escaped text for a scalar, or None if missing.""" - if v is None: - return None - if isinstance(v, bool): - return "yes" if v else "no" - if isinstance(v, float): - s = f"{v:g}" if not v.is_integer() else str(int(v)) - return escape_rich_markup(s) - if isinstance(v, int): - return escape_rich_markup(f"{v:,}") - return escape_rich_markup(str(v)) - - -def _plain_dt(v: datetime | None) -> str | None: - if v is None: - return None - try: - return escape_rich_markup(format_datetime(v)) - except Exception: - return escape_rich_markup(str(v)) - - -def _plain_price(nano: int | None) -> str | None: - if nano is None: - return None - dollars = finetune_price_to_dollars(float(nano)) - return escape_rich_markup(f"${dollars:,.2f}") - - -def _plain_bytes(n: int | None) -> str | None: - if n is None: - return None - s = convert_bytes(float(n)) - return escape_rich_markup(s or str(n)) - - -def _print_kv(label: str, text: str | None) -> None: - lab = escape_rich_markup(label) - if text is not None: - console.print(f"[dim]{lab}:[/dim] [white]{text}[/white]") - - -def _as_jsonlike(obj: Any) -> Any: - if obj is None: - return None - if hasattr(obj, "model_dump"): - return obj.model_dump(mode="json") - return obj - - -def _walk_jsonlike(data: Any, indent: int) -> None: - """Print JSON-like dict/list trees as indented key/value lines (same style as top-level).""" - pad = " " * indent - if data is None: - console.print(f"{pad}[dim]—[/dim]") - return - if isinstance(data, dict): - d = cast(dict[str, Any], data) - if not d: - console.print(f"{pad}[dim](empty)[/dim]") - return - for key in sorted(d, key=str): - v = d[key] - kdisp = escape_rich_markup(str(key)) - if isinstance(v, dict): - console.print(f"{pad}[dim]{kdisp}:[/dim]") - _walk_jsonlike(v, indent + _NEST_INDENT) - elif isinstance(v, list): - console.print(f"{pad}[dim]{kdisp}:[/dim]") - _walk_jsonlike(v, indent + _NEST_INDENT) - else: - sv = _plain(v) - if sv is not None: - console.print(f"{pad}[dim]{kdisp}:[/dim] [white]{sv}[/white]") - return - if isinstance(data, list): - lst = cast(list[Any], data) - if not lst: - console.print(f"{pad}[dim]—[/dim]") - return - for i, item in enumerate(lst): - if isinstance(item, dict): - console.print(f"{pad}[dim][{i}][/dim]") - _walk_jsonlike(item, indent + _NEST_INDENT) - elif isinstance(item, list): - console.print(f"{pad}[dim][{i}][/dim]") - _walk_jsonlike(item, indent + _NEST_INDENT) - else: - sv = _plain(item) - if sv is None: - console.print(f"{pad}[dim][{i}]:[/dim] [dim]—[/dim]") - else: - console.print(f"{pad}[dim][{i}]:[/dim] [white]{sv}[/white]") - return - sv = _plain(data) - console.print(f"{pad}[white]{sv}[/white]" if sv else f"{pad}[dim]—[/dim]") - - -def _print_nested_section(title: str, obj: Any, indent: int = _NEST_INDENT) -> None: - console.print(f"[dim]{escape_rich_markup(title)}:[/dim]") - if obj is None: - console.print(" " * indent + "[dim]—[/dim]") - return - _walk_jsonlike(_as_jsonlike(obj), indent) - - -def _print_job_details(r: FinetuneResponse, fine_tune_id: str) -> None: - sc = status_colors.get(r.status, "white") - _print_kv("Job ID", _plain(r.id)) - console.print(f"[dim]{escape_rich_markup('Status')}:[/dim] [bold {sc}]{escape_rich_markup(r.status)}[/bold {sc}]") - _print_kv("Model Name", _plain(r.x_model_output_name)) - _print_kv("Total price", _plain_price(r.total_price)) - _print_kv("Created", _plain_dt(r.created_at)) - _print_kv("Started", _plain_dt(r.started_at)) - _print_kv("Updated", _plain_dt(r.updated_at)) - - console.print(f"\n[dim]Training Data:[/dim]") - _print_kv(" Base model", _plain(r.model)) - _print_kv(" Training file", _plain(r.training_file)) - _print_kv(" Validation file", _plain(r.validation_file)) - _print_kv(" Training lines", _plain(r.trainingfile_numlines)) - _print_kv(" Training file size", _plain_bytes(r.trainingfile_size)) - _print_kv(" From checkpoint", _plain(r.from_checkpoint)) - _print_kv(" From HF model", _plain(r.from_hf_model)) - _print_kv(" HF model revision", _plain(r.hf_model_revision)) - _print_kv(" Batch size", _plain(r.batch_size)) - _print_kv(" Learning rate", _plain(r.learning_rate)) - _print_kv(" Warmup ratio", _plain(r.warmup_ratio)) - _print_kv(" Weight decay", _plain(r.weight_decay)) - _print_kv(" Max grad norm", _plain(r.max_grad_norm)) - _print_kv(" Train on inputs", _plain(r.train_on_inputs)) - _print_kv(" Epochs (configured)", _plain(r.n_epochs)) - _print_kv(" Epochs completed", _plain(r.epochs_completed)) - _print_kv(" Checkpoints to save", _plain(r.n_checkpoints)) - _print_kv(" Eval loops", _plain(r.n_evals)) - _print_kv(" Eval steps", _plain(r.eval_steps)) - _print_kv(" Token count", _plain(r.token_count)) - _print_kv(" Parameter count", _plain(r.param_count)) - _print_kv(" Queue depth", _plain(r.queue_depth)) - _print_nested_section(" LR scheduler", r.lr_scheduler) - _print_nested_section(" Training type", r.training_type) - _print_nested_section(" Training method", r.training_method) - _print_nested_section(" Multimodal params", r.multimodal_params) - - if r.events: - console.print("\n[dim]FT Events:[/dim]") - console.print(f" [dim]Total events:[/dim] {len(r.events)}") - console.print(f" [dim]To see event log data run[/dim] tg fine-tuning list-events {fine_tune_id}") - - async def retrieve( fine_tune_id: str, *, @@ -184,11 +27,15 @@ async def retrieve( console.print_json(openapi_dumps(response).decode("utf-8")) return - if response.status in COMPLETED_STATUSES: - _print_job_details(response, fine_tune_id) - return + event_count = len(response.events) if response.events else 0 + response.events = None - progress_text = generate_progress_bar(response, datetime.now().astimezone(), use_rich=True) + if response.status not in COMPLETED_STATUSES: + progress_text = generate_progress_bar(response, datetime.now().astimezone(), use_rich=True) + console.print(progress_text) - console.print(f"[bold primary]Fine-tuning job[/bold primary] [dim]{escape_rich_markup(response.id)}[/dim]") - console.print(progress_text) + print_model_dump(response, show_nulls=False) + if event_count > 0: + console.print("\n[dim]FT Events:[/dim]") + console.print(f" [dim]Total events:[/dim] {event_count}") + console.print(f" [dim]To see event log data run[/dim] tg fine-tuning list-events {fine_tune_id}") diff --git a/src/together/lib/cli/components/model_dump.py b/src/together/lib/cli/components/model_dump.py new file mode 100644 index 00000000..fe2e2d19 --- /dev/null +++ b/src/together/lib/cli/components/model_dump.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +from typing import Any, cast +from datetime import datetime + +from rich.table import Table +from rich.padding import PaddingDimensions + +from together import BaseModel +from together.lib.utils.tools import format_datetime +from together.lib.cli.utils._console import console + + +def print_model_dump( + model: BaseModel, show_nulls: bool = True, expand: bool = True, padding: PaddingDimensions = (0, 1, 0, 0) +) -> None: + """Print an entire model with __decent__ formatting.""" + + def _pretty_print_results( + results: Any, show_nulls: bool = True, expand: bool = False, padding: PaddingDimensions = (0, 1, 0, 0) + ) -> Table: + table = Table(show_header=False, box=None, padding=padding, expand=expand) + table.add_column("Key", style="dim") + table.add_column("Value", justify="left") + if isinstance(results, dict): + for key, value in cast(dict[str, Any], results).items(): + if not show_nulls and (value is None or value == ""): + continue + if isinstance(value, dict) or isinstance(value, list): + table.add_row(_humanize_key(key), _pretty_print_results(value)) + else: + table.add_row(_humanize_key(key), _colorize_value(value)) + elif isinstance(results, list): + for item in cast(list[Any], results): + if not show_nulls and item is None: + continue + table.add_row("-", _pretty_print_results(item)) + elif isinstance(results, BaseModel): + table.add_row("", _pretty_print_results(results.model_dump(), show_nulls=show_nulls)) + else: + table.add_row("", _colorize_value(results)) + return table + + def _humanize_key(key: str) -> str: + return f"{key.replace('_', ' ').title()}:" + + def _colorize_value(value: Any) -> str: + if value is None: + return "[dim italic]n/a[/dim italic]" + if isinstance(value, bool): + return f"[bold blue]{value}[/bold blue]" + if isinstance(value, float): + return f"[bold blue]{value:g}[/bold blue]" + if isinstance(value, int): + return f"[bold blue]{value:d}[/bold blue]" + if isinstance(value, datetime): + return f"[bold blue]{format_datetime(value)}[/bold blue]" + + value = str(value) + value = value.replace("\n", "\\n") + value = value.replace("\t", "\\t") + + return f"[bold blue]{value}[/bold blue]" + + def _dump_sorted_model(model: BaseModel) -> dict[str, Any]: + """Returns a model dump where the properties are sorted by their type: + - ID fields first + - Primitives next + - Dicts/objects next + - Lists last + """ + + def _sort_items(key: str, value: Any) -> int: + # Returns a sort key: 0 for ID fields, 1 for primitives, 2 for dicts/objects, 3 for lists + if key.endswith("_id"): + return 0 + elif isinstance(value, dict) or isinstance(value, BaseModel): + return 2 + elif isinstance(value, list): + return 3 + else: + return 1 + + return dict(sorted(model.model_dump().items(), key=lambda kv: _sort_items(kv[0], kv[1]))) + + console.print( + _pretty_print_results(_dump_sorted_model(model), show_nulls=show_nulls, expand=expand, padding=padding) + ) diff --git a/src/together/lib/cli/utils/_console.py b/src/together/lib/cli/utils/_console.py index a3b1ac21..ad896db9 100644 --- a/src/together/lib/cli/utils/_console.py +++ b/src/together/lib/cli/utils/_console.py @@ -30,4 +30,4 @@ } ) -console = Console(theme=custom_theme) +console = Console(theme=custom_theme, highlight=False) diff --git a/src/together/lib/cli/utils/_help_examples.py b/src/together/lib/cli/utils/_help_examples.py index d7a30353..224eee8a 100644 --- a/src/together/lib/cli/utils/_help_examples.py +++ b/src/together/lib/cli/utils/_help_examples.py @@ -11,6 +11,103 @@ [primary]tg models upload --model-name my-org/my-model --model-source s3-or-hugging-face[/primary] """ +## Files API commands + +FILES_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Upload a file for fine-tuning: + [primary]tg files upload ./my-dataset.jsonl --purpose fine-tune[/primary] + +[dim]-[/dim] Check a local file for issues: + [primary]tg files check ./my-dataset.jsonl[/primary] + +[dim]-[/dim] Remove a file from Together: + [primary]tg files delete [/primary] + +[dim]-[/dim] Download a file: + [primary]tg files download --output ./datasets[/primary] +""" + +FILES_UPLOAD_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Upload a file for fine-tuning: + [primary]tg files upload ./my-dataset.jsonl --purpose fine-tune[/primary] + +[dim]-[/dim] Upload a file for evals: + [primary]tg files upload ./my-dataset.jsonl --purpose evals[/primary] + +[dim]-[/dim] Skip file checks: + [primary]tg files upload ./my-dataset.jsonl --no-check[/primary] +""" + +FILES_RETRIEVE_CONTENT_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Download a file: + [primary]tg files download --output ./datasets[/primary] + +[dim]-[/dim] Print file contents to stdout: + [primary]tg files download --stdout[/primary] +""" + +## Models API commands +MODELS_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] List all models: + [primary]tg models list[/primary] + +[dim]-[/dim] Upload a model: + [primary]tg models upload --model-name my-model --model-source s3-or-hugging-face[/primary] +""" + +MODELS_UPLOAD_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Upload a model from S3: + [primary]tg models upload \\ + --model-name my-model \\ + --model-source $(aws s3 presign s3://my-bucket/my-model)[/primary] + +[dim]-[/dim] Upload private model from Hugging Face: + [primary]tg models upload \\ + --model-name my-model \\ + --model-source my-org/model-name \\ + --hf-token $HUGGING_FACE_TOKEN[/primary] +""" + +## Fine-tuning API commands +FINE_TUNING_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Create a fine-tuning job: + [primary]tg ft create --model Qwen/Qwen2-1.5B --training-file ./my-dataset.jsonl[/primary] + +[dim]-[/dim] Retrieve a fine-tuning job details: + [primary]tg ft [/primary] + +[dim]-[/dim] Download a fine-tuned model's weights: + [primary]tg ft download --output-dir ./my-model[/primary] + +[dim]-[/dim] List checkpoints for a fine-tuning job: + [primary]tg ft list-checkpoints [/primary] + +[dim]-[/dim] Cancel a fine-tuning job: + [primary]tg ft cancel [/primary] +""" + +FINE_TUNING_CREATE_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Start a supervised fine-tuning job: + [primary]tg ft create -M Qwen/Qwen2-1.5B -t ./my-dataset.jsonl[/primary] + +[dim]-[/dim] Start a preference fine-tuning job: + [primary]tg ft create -m dpo -M Qwen/Qwen2-1.5B -t ./dpo_train_file.jsonl[/primary] + +[dim]-[/dim] Start a fine-tuning job from a checkpoint: + [primary]tg ft create --from-checkpoint JOB_ID/OUTPUT_MODEL_NAME:STEP --training-file ./updated-dataset.jsonl[/primary] + +[dim]-[/dim] Specify the number of checkpoints to save: + [primary]tg ft create --n-checkpoints 3 -M Qwen/Qwen2-1.5B --training-file ./my-dataset.jsonl[/primary] +""" + +FINE_TUNING_DOWNLOAD_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Download a fine-tuned model's weights: + [primary]tg ft download --output-dir ./my-model[/primary] + +[dim]-[/dim] Download a fine-tuned model's weights from a specific checkpoint: + [primary]tg ft download --checkpoint-step 1 --output-dir ./my-model[/primary] +""" + ## Endpoints API commands ENDPOINTS_HELP_EXAMPLES = """[dim]Examples:[/dim] @@ -65,3 +162,306 @@ [dim]-[/dim] Change the auto-stop timeout for an endpoint: [primary]tg endpoints update ENDPOINT_ID --inactive-timeout 30[/primary] """ + +## Evals API commands + +EVALS_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Look at the examples for creating an evaluation job: + [primary]tg evals create --help[/primary] + +[dim]-[/dim] List all evaluation jobs: + [primary]tg evals ls[/primary] + +[dim]-[/dim] Check the status of an evaluation job: + [primary]tg evals status [/primary] + +[dim]-[/dim] Get details of an evaluation job: + [primary]tg evals [/primary] +""" + +EVALS_CREATE_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Run a classification evaluation: + [primary]tg evals create \\ + --type classify \\ + --judge-model deepseek-ai/DeepSeek-V3.1 \\ + --judge-model-source serverless \\ + --judge-system-template "You are a helpful assistant" \\ + --input-data-file-path ./data.jsonl \\ + --model-to-evaluate deepseek-ai/DeepSeek-V3.1 \\ + --model-to-evaluate-source serverless \\ + --model-to-evaluate-system-template "Respond to the following comment. You can be informal but maintain a respectful tone." \\ + --model-to-evaluate-input-template "Here's a comment I saw online. How would you respond to it?\\n\\n{{question}}" \\ + --labels 'Toxic,Non-toxic' \\ + --pass-labels 'Non-toxic'[/primary] + +[dim]-[/dim] Run a score evaluation: + [primary]tg evals create \\ + --type score \\ + --judge-model deepseek-ai/DeepSeek-V3.1 \\ + --judge-model-source serverless \\ + --judge-system-template "Rate the given response on a scale from 1 to 10, where 1 is generic and 10 is unique." \\ + --input-data-file-path ./data.jsonl \\ + --model-to-evaluate deepseek-ai/DeepSeek-V3.1 \\ + --model-to-evaluate-source serverless \\ + --model-to-evaluate-system-template "You are a helpful assistant." \\ + --model-to-evaluate-input-template $'Please respond:\\n\\n{{prompt}}' \\ + --model-to-evaluate-max-tokens 512 \\ + --model-to-evaluate-temperature 0.7 \\ + --min-score 1 \\ + --max-score 10 \\ + --pass-threshold 7 + [/primary] + +[dim]-[/dim] Run a compare evaluation: + [primary]tg evals create \\ + --type compare \\ + --judge-model deepseek-ai/DeepSeek-V3.1 \\ + --judge-model-source serverless \\ + --judge-system-template "You are an expert judge. Given the user task and two model responses, say which is better and why." \\ + --input-data-file-path ./examples/eval_compare_sample.jsonl \\ + --model-a deepseek-ai/DeepSeek-V3.1 \\ + --model-a-source serverless \\ + --model-a-system-template "You are a helpful assistant." \\ + --model-a-input-template $'Answer the following:\\n\\n{{prompt}}' \\ + --model-b deepseek-ai/DeepSeek-V3.1 \\ + --model-b-source serverless \\ + --model-b-system-template "You are a concise assistant." \\ + --model-b-input-template $'Answer the following:\\n\\n{{prompt}}'[/primary] +""" + +## Beta clusters API commands + +BETA_CLUSTERS_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] List clusters and regions: + [primary]tg beta clusters list[/primary] + [primary]tg beta clusters list-regions[/primary] + +[dim]-[/dim] Write kubeconfig for a cluster (default ~/.kube/config): + [primary]tg beta clusters get-credentials [/primary] + +[dim]-[/dim] Print kubeconfig to stdout: + [primary]tg beta clusters get-credentials --file -[/primary] + +[dim]-[/dim] Non-interactive cluster create (see [primary]tg beta clusters create --help[/primary] for flags): + [primary]tg beta clusters create --non-interactive \\ + --name my-cluster --cluster-type KUBERNETES --gpu-type H100_SXM \\ + --region us-central-8 --num-gpus 8 --billing-type ON_DEMAND \\ + --nvidia-driver-version 565 --cuda-version 12.6 --volume [/primary] + +[dim]-[/dim] Update or delete a cluster: + [primary]tg beta clusters update --num-gpus 16 --cluster-type KUBERNETES[/primary] + [primary]tg beta clusters delete [/primary] +""" + +BETA_CLUSTERS_CREATE_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Create interactively (prompts for region, GPUs, drivers, etc.): + [primary]tg beta clusters create[/primary] + +[dim]-[/dim] Create without prompts (supply every required field): + [primary]tg beta clusters create --non-interactive \\ + --name my-cluster \\ + --cluster-type KUBERNETES \\ + --gpu-type H100_SXM \\ + --region us-central-8 \\ + --num-gpus 8 \\ + --billing-type ON_DEMAND \\ + --nvidia-driver-version 565 \\ + --cuda-version 12.6 \\ + --volume [/primary] +""" + +BETA_CLUSTERS_GET_CREDENTIALS_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Merge cluster kubeconfig into the default file ([primary]~/.kube/config[/primary]): + [primary]tg beta clusters get-credentials [/primary] + +[dim]-[/dim] Write to a specific path: + [primary]tg beta clusters get-credentials --file ./my-kubeconfig[/primary] + +[dim]-[/dim] Print kubeconfig to stdout (no file write): + [primary]tg beta clusters get-credentials --file -[/primary] + +[dim]-[/dim] Use a custom context name in the merged kubeconfig: + [primary]tg beta clusters get-credentials --context-name my-prod-k8s[/primary] + +[dim]-[/dim] On name conflicts with an existing kubeconfig, replace the entry: + [primary]tg beta clusters get-credentials --overwrite-existing[/primary] + +[dim]-[/dim] Set this cluster as the default kube context after merge: + [primary]tg beta clusters get-credentials --set-default-context[/primary] +""" + +BETA_CLUSTERS_UPDATE_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Change GPU count: + [primary]tg beta clusters update --num-gpus 16[/primary] + +[dim]-[/dim] Change cluster type: + [primary]tg beta clusters update --cluster-type KUBERNETES[/primary] + +[dim]-[/dim] Update both: + [primary]tg beta clusters update --num-gpus 16 --cluster-type KUBERNETES[/primary] +""" + +BETA_CLUSTERS_STORAGE_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] List storage volumes: + [primary]tg beta clusters storage list[/primary] + +[dim]-[/dim] Create or resize a volume (see subcommand help for options): + [primary]tg beta clusters storage create --region us-east-1 --size-tib 1 --volume-name my-data[/primary] + [primary]tg beta clusters storage update --size-tib 4[/primary] + +[dim]-[/dim] Use a volume when creating a cluster: + [primary]tg beta clusters create --non-interactive ... --volume [/primary] +""" + +BETA_CLUSTERS_STORAGE_CREATE_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Create a 1 TiB volume in a region ([primary]tg beta clusters list-regions[/primary] lists regions): + [primary]tg beta clusters storage create \\ + --region us-east-1 \\ + --size-tib 1 \\ + --volume-name my-training-data[/primary] + +[dim]-[/dim] Attach the volume when creating a cluster: + [primary]tg beta clusters create --non-interactive ... --volume [/primary] +""" + +BETA_CLUSTERS_STORAGE_UPDATE_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Grow a volume to 4 TiB: + [primary]tg beta clusters storage update --size-tib 4[/primary] +""" + +## Beta > Jig commands + +JIG_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Bootstrap config and deploy from the current directory: + [primary]tg beta jig init[/primary] + [primary]tg beta jig deploy[/primary] + +[dim]-[/dim] Inspect a deployment and stream logs: + [primary]tg beta jig status[/primary] + [primary]tg beta jig logs --follow[/primary] + +[dim]-[/dim] List deployments or tear one down: + [primary]tg beta jig list[/primary] + [primary]tg beta jig destroy[/primary] +""" + +JIG_SECRETS_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Add or rotate a secret for this deployment: + [primary]tg beta jig secrets set HF_TOKEN "$HF_TOKEN"[/primary] + +[dim]-[/dim] List secrets and sync status: + [primary]tg beta jig secrets list[/primary] + +[dim]-[/dim] Remove a secret remotely and locally: + [primary]tg beta jig secrets delete OLD_KEY[/primary] +""" + +JIG_VOLUMES_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Create a volume and upload a directory: + [primary]tg beta jig volumes create --name model-weights --source ./weights[/primary] + +[dim]-[/dim] List volumes for the deployment: + [primary]tg beta jig volumes list[/primary] + +[dim]-[/dim] Refresh volume contents from disk: + [primary]tg beta jig volumes update --name model-weights --source ./weights[/primary] +""" + +JIG_BUILD_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Build with default tag ([primary]latest[/primary]): + [primary]tg beta jig build[/primary] + +[dim]-[/dim] Build a tagged image with warmup (torch compile cache): + [primary]tg beta jig build --tag v1 --warmup[/primary] + +[dim]-[/dim] Pass extra Docker build arguments: + [primary]tg beta jig build --docker-args '--no-cache'[/primary] +""" + +JIG_PUSH_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Push the default ([primary]latest[/primary]) image: + [primary]tg beta jig push[/primary] + +[dim]-[/dim] Push a specific tag: + [primary]tg beta jig push --tag v1[/primary] +""" + +JIG_DEPLOY_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Build, push, and deploy from config in the current directory: + [primary]tg beta jig deploy[/primary] + +[dim]-[/dim] Deploy using an image that is already in the registry (skip build/push): + [primary]tg beta jig deploy --image my-registry.example.com/my-org/my-model:abc123[/primary] + +[dim]-[/dim] Only build and push; do not update the deployment: + [primary]tg beta jig deploy --build-only[/primary] + +[dim]-[/dim] Start deploy and return immediately without waiting: + [primary]tg beta jig deploy --detach[/primary] +""" + +JIG_DESTROY_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Tear down the deployment for this project ([primary]jig.toml[/primary] / [primary]pyproject.toml[/primary]): + [primary]tg beta jig destroy[/primary] +""" + +JIG_LOGS_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Print recent logs once: + [primary]tg beta jig logs[/primary] + +[dim]-[/dim] Stream logs ([primary]Ctrl+C[/primary] to stop): + [primary]tg beta jig logs --follow[/primary] +""" + +JIG_SUBMIT_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Submit a simple prompt job: + [primary]tg beta jig submit --prompt "Hello, world!"[/primary] + +[dim]-[/dim] Submit with a JSON payload (advanced request body): + [primary]tg beta jig submit --payload '{"prompt":"Explain transformers","max_tokens":256}'[/primary] + +[dim]-[/dim] Submit and poll until the job finishes: + [primary]tg beta jig submit --prompt "Summarize this README." --watch[/primary] +""" + +JIG_JOB_STATUS_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Look up a job by request ID (from submit output): + [primary]tg beta jig job-status --request-id [/primary] + +[dim]-[/dim] Machine-readable status: + [primary]tg beta jig job-status --request-id --json[/primary] +""" + +JIG_SECRETS_SET_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Create or update a secret from the shell: + [primary]tg beta jig secrets set HF_TOKEN "$HF_TOKEN"[/primary] + +[dim]-[/dim] Set a secret with a description (shown in listings): + [primary]tg beta jig secrets set API_KEY "$API_KEY" --description "Third-party API credentials"[/primary] +""" + +JIG_SECRETS_UNSET_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Drop a secret from local state only (does not delete remotely): + [primary]tg beta jig secrets unset OLD_KEY[/primary] +""" + +JIG_SECRETS_DELETE_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Delete the secret on the server and remove it locally: + [primary]tg beta jig secrets delete REVOKED_KEY[/primary] +""" + +JIG_VOLUMES_CREATE_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Create a volume and upload files from a directory: + [primary]tg beta jig volumes create --name model-weights --source ./weights[/primary] + +[dim]-[/dim] Same using positional arguments: + [primary]tg beta jig volumes create model-weights ./weights[/primary] +""" + +JIG_VOLUMES_UPDATE_HELP_EXAMPLES = """[dim]Examples:[/dim] +[dim]-[/dim] Upload a new directory tree as the next volume version: + [primary]tg beta jig volumes update --name model-weights --source ./weights[/primary] + +[dim]-[/dim] Positional form: + [primary]tg beta jig volumes update model-weights ./weights[/primary] +""" diff --git a/src/together/lib/resources/__init__.py b/src/together/lib/resources/__init__.py index 52e9ef43..be20bf72 100644 --- a/src/together/lib/resources/__init__.py +++ b/src/together/lib/resources/__init__.py @@ -2,10 +2,12 @@ UploadManager, DownloadManager, AsyncUploadManager, + AsyncDownloadManager, ) __all__ = [ "DownloadManager", + "AsyncDownloadManager", "UploadManager", "AsyncUploadManager", ] diff --git a/src/together/lib/resources/files.py b/src/together/lib/resources/files.py index 29fcc3f9..f3e79918 100644 --- a/src/together/lib/resources/files.py +++ b/src/together/lib/resources/files.py @@ -197,7 +197,7 @@ def download( raise APIStatusError( "Error downloading file", response=e.response, - body=e.response, + body=e.body, ) from e if not fetch_metadata: @@ -267,7 +267,7 @@ def download( raise APIStatusError( "Error downloading file", response=e.response, - body=e.response, + body=e.body, ) from e # Close the response @@ -287,6 +287,175 @@ def download( return str(file_path.resolve()), file_size +class AsyncDownloadManager(AsyncAPIResource): + async def get_file_metadata( + self, + url: str, + output: Path | None = None, + remote_name: str | None = None, + fetch_metadata: bool = False, + ) -> Tuple[Path, int]: + """ + gets remote file head and parses out file name and file size + """ + + if not fetch_metadata: + if isinstance(output, Path): + file_path = output + else: + assert isinstance(remote_name, str) + file_path = Path(remote_name) + + return file_path, 0 + + try: + response = await self._client.get( + path=url, + options=RequestOptions( + headers={"Range": "bytes=0-1"}, + ), + cast_to=httpx.Response, + stream=False, + ) + except APIStatusError as e: + raise APIStatusError( + "Error fetching file metadata", + response=e.response, + body=e.body, + ) from e + + headers = response.headers + + assert isinstance(headers, httpx.Headers) + + file_path = _prepare_output( + headers=headers, + output=output, + remote_name=remote_name, + ) + + file_size = _get_file_size(headers) + + return file_path, file_size + + async def download( + self, + url: str, + output: Path | None = None, + remote_name: str | None = None, + fetch_metadata: bool = False, + ) -> Tuple[str, int]: + # pre-fetch remote file name and file size + file_path, file_size = await self.get_file_metadata(url, output, remote_name, fetch_metadata) + + temp_file_manager = partial(tempfile.NamedTemporaryFile, mode="wb", dir=file_path.parent, delete=False) + + # Prevent parallel downloads of the same file with a lock. + lock_path = Path(file_path.as_posix() + ".lock") + + with FileLock(lock_path.as_posix()): + with temp_file_manager() as temp_file: + try: + response = await self._client.get( + path=url, + cast_to=httpx.Response, + stream=True, + ) + except APIStatusError as e: + lock_path.unlink(missing_ok=True) + raise APIStatusError( + "Error downloading file", + response=e.response, + body=e.body, + ) from e + + if not fetch_metadata: + file_size = int(response.headers.get("content-length", 0)) + + assert file_size != 0, "Unable to retrieve remote file." + + # Download with retry logic + bytes_downloaded = 0 + retry_count = 0 + retry_delay = DOWNLOAD_INITIAL_RETRY_DELAY + + DISABLE_TQDM = os.environ.get("TOGETHER_DISABLE_TQDM", "false").lower() == "true" + + with tqdm( + total=file_size, + unit="B", + unit_scale=True, + desc=f"Downloading file {file_path.name}", + disable=bool(DISABLE_TQDM), + ) as pbar: + while bytes_downloaded < file_size: + try: + # If this is a retry, close the previous response and create a new one with Range header + if bytes_downloaded > 0: + await response.aclose() + + log.info(f"Resuming download from byte {bytes_downloaded}") + response = await self._client.get( + path=url, + cast_to=httpx.Response, + stream=True, + options=RequestOptions( + headers={"Range": f"bytes={bytes_downloaded}-"}, + ), + ) + + # Download chunks + async for chunk in response.aiter_bytes(DOWNLOAD_BLOCK_SIZE): + temp_file.write(chunk) # type: ignore + bytes_downloaded += len(chunk) + pbar.update(len(chunk)) + + # Successfully completed download + break + + except (httpx.RequestError, httpx.StreamError, APIConnectionError) as e: + if retry_count >= MAX_DOWNLOAD_RETRIES: + log.error(f"Download failed after {retry_count} retries") + raise DownloadError( + f"Download failed after {retry_count} retries. Last error: {str(e)}" + ) from e + + retry_count += 1 + log.warning( + f"Download interrupted at {bytes_downloaded}/{file_size} bytes. " + f"Retry {retry_count}/{MAX_DOWNLOAD_RETRIES} in {retry_delay}s..." + ) + await self._sleep(retry_delay) + + # Exponential backoff with max delay cap + retry_delay = min(retry_delay * 2, DOWNLOAD_MAX_RETRY_DELAY) + + except APIStatusError as e: + # For API errors, don't retry + log.error(f"API error during download: {e}") + raise APIStatusError( + "Error downloading file", + response=e.response, + body=e.body, + ) from e + + # Close the response + await response.aclose() + + # Raise exception if remote file size does not match downloaded file size + if os.stat(temp_file.name).st_size != file_size: + raise DownloadError( + f"Downloaded file size `{bytes_downloaded}` bytes does not match remote file size `{file_size}` bytes." + ) + + # Moves temp file to output file path + chmod_and_replace(Path(temp_file.name), file_path) + + lock_path.unlink(missing_ok=True) + + return str(file_path.resolve()), file_size + + class UploadManager(SyncAPIResource): def get_upload_url( self, diff --git a/src/together/resources/audio/transcriptions.py b/src/together/resources/audio/transcriptions.py index 374161e3..a2f1980a 100644 --- a/src/together/resources/audio/transcriptions.py +++ b/src/together/resources/audio/transcriptions.py @@ -70,7 +70,7 @@ def create( Args: file: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, - .webm, .flac. + .webm, .flac, .ogg, .opus, .aac. diarize: Whether to enable speaker diarization. When enabled, you will get the speaker id for each word in the transcription. In the response, in the words array, you @@ -94,7 +94,10 @@ def create( model: Model to use for transcription - prompt: Optional text to bias decoding. + prompt: Optional text to bias decoding. Supported only on Whisper-family models (e.g. + `openai/whisper-large-v3`). Other STT models (e.g. + `nvidia/parakeet-tdt-0.6b-v3`) accept the field for API compatibility but ignore + it. response_format: The format of the response @@ -193,7 +196,7 @@ async def create( Args: file: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, - .webm, .flac. + .webm, .flac, .ogg, .opus, .aac. diarize: Whether to enable speaker diarization. When enabled, you will get the speaker id for each word in the transcription. In the response, in the words array, you @@ -217,7 +220,10 @@ async def create( model: Model to use for transcription - prompt: Optional text to bias decoding. + prompt: Optional text to bias decoding. Supported only on Whisper-family models (e.g. + `openai/whisper-large-v3`). Other STT models (e.g. + `nvidia/parakeet-tdt-0.6b-v3`) accept the field for API compatibility but ignore + it. response_format: The format of the response diff --git a/src/together/resources/audio/translations.py b/src/together/resources/audio/translations.py index 8f6d69bf..326b5de4 100644 --- a/src/together/resources/audio/translations.py +++ b/src/together/resources/audio/translations.py @@ -67,14 +67,17 @@ def create( Args: file: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, - .webm, .flac. + .webm, .flac, .ogg, .opus, .aac. language: Target output language. Optional ISO 639-1 language code. If omitted, language is set to English. model: Model to use for translation - prompt: Optional text to bias decoding. + prompt: Optional text to bias decoding. Supported only on Whisper-family models (e.g. + `openai/whisper-large-v3`). Other STT models (e.g. + `nvidia/parakeet-tdt-0.6b-v3`) accept the field for API compatibility but ignore + it. response_format: The format of the response @@ -167,14 +170,17 @@ async def create( Args: file: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, - .webm, .flac. + .webm, .flac, .ogg, .opus, .aac. language: Target output language. Optional ISO 639-1 language code. If omitted, language is set to English. model: Model to use for translation - prompt: Optional text to bias decoding. + prompt: Optional text to bias decoding. Supported only on Whisper-family models (e.g. + `openai/whisper-large-v3`). Other STT models (e.g. + `nvidia/parakeet-tdt-0.6b-v3`) accept the field for API compatibility but ignore + it. response_format: The format of the response diff --git a/src/together/resources/batches.py b/src/together/resources/batches.py index 58c61acc..a89c3d61 100644 --- a/src/together/resources/batches.py +++ b/src/together/resources/batches.py @@ -2,6 +2,8 @@ from __future__ import annotations +from typing_extensions import Literal + import httpx from ..types import batch_create_params @@ -46,7 +48,7 @@ def with_streaming_response(self) -> BatchesResourceWithStreamingResponse: def create( self, *, - endpoint: str, + endpoint: Literal["/v1/chat/completions", "/v1/audio/transcriptions", "/v1/audio/translations"], input_file_id: str, completion_window: str | Omit = omit, model_id: str | Omit = omit, @@ -62,7 +64,13 @@ def create( Create a new batch job with the given input file and endpoint Args: - endpoint: The endpoint to use for batch processing + endpoint: The endpoint to use for batch processing. Each line of the uploaded input file + is dispatched against this endpoint. + + - `/v1/chat/completions` — chat completion batches + - `/v1/audio/transcriptions` — audio transcription batches (e.g. + `openai/whisper-large-v3`) + - `/v1/audio/translations` — audio translation batches input_file_id: ID of the uploaded input file containing batch requests @@ -211,7 +219,7 @@ def with_streaming_response(self) -> AsyncBatchesResourceWithStreamingResponse: async def create( self, *, - endpoint: str, + endpoint: Literal["/v1/chat/completions", "/v1/audio/transcriptions", "/v1/audio/translations"], input_file_id: str, completion_window: str | Omit = omit, model_id: str | Omit = omit, @@ -227,7 +235,13 @@ async def create( Create a new batch job with the given input file and endpoint Args: - endpoint: The endpoint to use for batch processing + endpoint: The endpoint to use for batch processing. Each line of the uploaded input file + is dispatched against this endpoint. + + - `/v1/chat/completions` — chat completion batches + - `/v1/audio/transcriptions` — audio transcription batches (e.g. + `openai/whisper-large-v3`) + - `/v1/audio/translations` — audio translation batches input_file_id: ID of the uploaded input file containing batch requests diff --git a/src/together/resources/beta/clusters/clusters.py b/src/together/resources/beta/clusters/clusters.py index 15325721..5014087a 100644 --- a/src/together/resources/beta/clusters/clusters.py +++ b/src/together/resources/beta/clusters/clusters.py @@ -144,7 +144,7 @@ def create( reservation_start_time: Reservation start time of the cluster. This field is required for SCHEDULED billing to specify the reservation start time for the cluster. If not provided, - the cluster will be provisioned immediately. + the cluster provisions immediately. shared_volume: Inline configuration to create a shared volume with the cluster creation. @@ -467,7 +467,7 @@ async def create( reservation_start_time: Reservation start time of the cluster. This field is required for SCHEDULED billing to specify the reservation start time for the cluster. If not provided, - the cluster will be provisioned immediately. + the cluster provisions immediately. shared_volume: Inline configuration to create a shared volume with the cluster creation. diff --git a/src/together/resources/beta/jig/jig.py b/src/together/resources/beta/jig/jig.py index dd318e71..64ba1862 100644 --- a/src/together/resources/beta/jig/jig.py +++ b/src/together/resources/beta/jig/jig.py @@ -166,7 +166,7 @@ def update( description: Description is an optional human-readable description of your deployment environment_variables: EnvironmentVariables is a list of environment variables to set in the container. - This will replace all existing environment variables + Replaces all existing environment variables. gpu_count: GPUCount is the number of GPUs to allocate per container instance @@ -196,8 +196,8 @@ def update( termination_grace_period_seconds: TerminationGracePeriodSeconds is the time in seconds to wait for graceful shutdown before forcefully terminating the replica - volumes: Volumes is a list of volume mounts to attach to the container. This will replace - all existing volumes + volumes: Volumes is a list of volume mounts to attach to the container. Replaces all + existing volumes. extra_headers: Send extra headers @@ -320,10 +320,10 @@ def deploy( if not specified health_check_path: HealthCheckPath is the HTTP path for health checks (e.g., "/health"). If set, - the platform will check this endpoint to determine container health + the platform checks this endpoint to determine container health. - max_replicas: MaxReplicas is the maximum number of container instances that can be scaled up - to. If not set, will be set to MinReplicas + max_replicas: MaxReplicas is the maximum number of container instances. Defaults to + MinReplicas if not set. memory: Memory is the amount of RAM to allocate per container instance in GiB (e.g., 0.5 = 512MiB) @@ -576,7 +576,7 @@ async def update( description: Description is an optional human-readable description of your deployment environment_variables: EnvironmentVariables is a list of environment variables to set in the container. - This will replace all existing environment variables + Replaces all existing environment variables. gpu_count: GPUCount is the number of GPUs to allocate per container instance @@ -606,8 +606,8 @@ async def update( termination_grace_period_seconds: TerminationGracePeriodSeconds is the time in seconds to wait for graceful shutdown before forcefully terminating the replica - volumes: Volumes is a list of volume mounts to attach to the container. This will replace - all existing volumes + volumes: Volumes is a list of volume mounts to attach to the container. Replaces all + existing volumes. extra_headers: Send extra headers @@ -730,10 +730,10 @@ async def deploy( if not specified health_check_path: HealthCheckPath is the HTTP path for health checks (e.g., "/health"). If set, - the platform will check this endpoint to determine container health + the platform checks this endpoint to determine container health. - max_replicas: MaxReplicas is the maximum number of container instances that can be scaled up - to. If not set, will be set to MinReplicas + max_replicas: MaxReplicas is the maximum number of container instances. Defaults to + MinReplicas if not set. memory: Memory is the amount of RAM to allocate per container instance in GiB (e.g., 0.5 = 512MiB) diff --git a/src/together/resources/beta/jig/secrets.py b/src/together/resources/beta/jig/secrets.py index 64c80bca..f282c65e 100644 --- a/src/together/resources/beta/jig/secrets.py +++ b/src/together/resources/beta/jig/secrets.py @@ -65,7 +65,7 @@ def create( characters) value: Value is the sensitive data to store securely (e.g., API keys, passwords, - tokens). This value will be encrypted at rest + tokens). Encrypted at rest. description: Description is an optional human-readable description of the secret's purpose (max 500 characters) @@ -164,8 +164,8 @@ def update( project_id: ProjectID is ignored - the project is automatically determined from your authentication - value: Value is the new sensitive data to store securely. Updating this will replace - the existing secret value + value: Value is the new sensitive data to store securely. Updating this replaces the + existing secret value. extra_headers: Send extra headers @@ -292,7 +292,7 @@ async def create( characters) value: Value is the sensitive data to store securely (e.g., API keys, passwords, - tokens). This value will be encrypted at rest + tokens). Encrypted at rest. description: Description is an optional human-readable description of the secret's purpose (max 500 characters) @@ -391,8 +391,8 @@ async def update( project_id: ProjectID is ignored - the project is automatically determined from your authentication - value: Value is the new sensitive data to store securely. Updating this will replace - the existing secret value + value: Value is the new sensitive data to store securely. Updating this replaces the + existing secret value. extra_headers: Send extra headers diff --git a/src/together/resources/beta/jig/volumes.py b/src/together/resources/beta/jig/volumes.py index 81705898..3b94490b 100644 --- a/src/together/resources/beta/jig/volumes.py +++ b/src/together/resources/beta/jig/volumes.py @@ -61,7 +61,7 @@ def create( Create a new volume to preload files in deployments Args: - content: Content specifies the new content that will be preloaded to this volume + content: Content specifies the new content to preload to this volume. name: Name is the unique identifier for the volume within the project @@ -146,7 +146,7 @@ def update( Args: id: Volume ID or name. - content: Content specifies the new content that will be preloaded to this volume + content: Content specifies the new content to preload to this volume. name: Name is the new unique identifier for the volume within the project @@ -270,7 +270,7 @@ async def create( Create a new volume to preload files in deployments Args: - content: Content specifies the new content that will be preloaded to this volume + content: Content specifies the new content to preload to this volume. name: Name is the unique identifier for the volume within the project @@ -355,7 +355,7 @@ async def update( Args: id: Volume ID or name. - content: Content specifies the new content that will be preloaded to this volume + content: Content specifies the new content to preload to this volume. name: Name is the new unique identifier for the volume within the project diff --git a/src/together/resources/chat/completions.py b/src/together/resources/chat/completions.py index cde8485c..cfcbf6d5 100644 --- a/src/together/resources/chat/completions.py +++ b/src/together/resources/chat/completions.py @@ -99,13 +99,13 @@ def create( chat_template_kwargs: Additional configuration to pass to model engine. - context_length_exceeded_behavior: Defined the behavior of the API when max_tokens exceed the maximum context - length of the model. When set to 'error', API will return 400 with appropriate - error message. When set to 'truncate', override the max_tokens with maximum - context length of the model. + context_length_exceeded_behavior: Defines the behavior of the API when max_tokens exceed the maximum context + length of the model. When set to 'error', the API returns 400 with an + appropriate error message. When set to 'truncate', overrides max_tokens with the + maximum context length of the model. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -113,7 +113,7 @@ def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -151,9 +151,8 @@ def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. stream: If true, stream tokens as Server-Sent Events as the model generates them instead of waiting for the full model response. The stream terminates with @@ -249,13 +248,13 @@ def create( chat_template_kwargs: Additional configuration to pass to model engine. - context_length_exceeded_behavior: Defined the behavior of the API when max_tokens exceed the maximum context - length of the model. When set to 'error', API will return 400 with appropriate - error message. When set to 'truncate', override the max_tokens with maximum - context length of the model. + context_length_exceeded_behavior: Defines the behavior of the API when max_tokens exceed the maximum context + length of the model. When set to 'error', the API returns 400 with an + appropriate error message. When set to 'truncate', overrides max_tokens with the + maximum context length of the model. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -263,7 +262,7 @@ def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -301,9 +300,8 @@ def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. temperature: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate @@ -395,13 +393,13 @@ def create( chat_template_kwargs: Additional configuration to pass to model engine. - context_length_exceeded_behavior: Defined the behavior of the API when max_tokens exceed the maximum context - length of the model. When set to 'error', API will return 400 with appropriate - error message. When set to 'truncate', override the max_tokens with maximum - context length of the model. + context_length_exceeded_behavior: Defines the behavior of the API when max_tokens exceed the maximum context + length of the model. When set to 'error', the API returns 400 with an + appropriate error message. When set to 'truncate', overrides max_tokens with the + maximum context length of the model. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -409,7 +407,7 @@ def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -447,9 +445,8 @@ def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. temperature: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate @@ -640,13 +637,13 @@ async def create( chat_template_kwargs: Additional configuration to pass to model engine. - context_length_exceeded_behavior: Defined the behavior of the API when max_tokens exceed the maximum context - length of the model. When set to 'error', API will return 400 with appropriate - error message. When set to 'truncate', override the max_tokens with maximum - context length of the model. + context_length_exceeded_behavior: Defines the behavior of the API when max_tokens exceed the maximum context + length of the model. When set to 'error', the API returns 400 with an + appropriate error message. When set to 'truncate', overrides max_tokens with the + maximum context length of the model. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -654,7 +651,7 @@ async def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -692,9 +689,8 @@ async def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. stream: If true, stream tokens as Server-Sent Events as the model generates them instead of waiting for the full model response. The stream terminates with @@ -790,13 +786,13 @@ async def create( chat_template_kwargs: Additional configuration to pass to model engine. - context_length_exceeded_behavior: Defined the behavior of the API when max_tokens exceed the maximum context - length of the model. When set to 'error', API will return 400 with appropriate - error message. When set to 'truncate', override the max_tokens with maximum - context length of the model. + context_length_exceeded_behavior: Defines the behavior of the API when max_tokens exceed the maximum context + length of the model. When set to 'error', the API returns 400 with an + appropriate error message. When set to 'truncate', overrides max_tokens with the + maximum context length of the model. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -804,7 +800,7 @@ async def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -842,9 +838,8 @@ async def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. temperature: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate @@ -936,13 +931,13 @@ async def create( chat_template_kwargs: Additional configuration to pass to model engine. - context_length_exceeded_behavior: Defined the behavior of the API when max_tokens exceed the maximum context - length of the model. When set to 'error', API will return 400 with appropriate - error message. When set to 'truncate', override the max_tokens with maximum - context length of the model. + context_length_exceeded_behavior: Defines the behavior of the API when max_tokens exceed the maximum context + length of the model. When set to 'error', the API returns 400 with an + appropriate error message. When set to 'truncate', overrides max_tokens with the + maximum context length of the model. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -950,7 +945,7 @@ async def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -988,9 +983,8 @@ async def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. temperature: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate diff --git a/src/together/resources/code_interpreter/code_interpreter.py b/src/together/resources/code_interpreter/code_interpreter.py index aa231c70..ea2344b9 100644 --- a/src/together/resources/code_interpreter/code_interpreter.py +++ b/src/together/resources/code_interpreter/code_interpreter.py @@ -73,22 +73,21 @@ def execute( """Executes the given code snippet and returns the output. Without a session_id, a - new session will be created to run the code. If you do pass in a valid - session_id, the code will be run in that session. This is useful for running - multiple code snippets in the same environment, because dependencies and similar - things are persisted between calls to the same session. + new session is created to run the code. If you pass a valid session_id, the code + runs in that session. This is useful for running multiple code snippets in the + same environment, because dependencies and similar things are persisted between + calls to the same session. Args: code: Code snippet to execute. - language: Programming language for the code to execute. Currently only supports Python, - but more will be added. + language: Programming language for the code to execute. Currently only supports Python. - files: Files to upload to the session. If present, files will be uploaded before - executing the given code. + files: Files to upload to the session. If present, files are uploaded before executing + the given code. - session_id: Identifier of the current session. Used to make follow-up calls. Requests will - return an error if the session does not belong to the caller or has expired. + session_id: Identifier of the current session. Used to make follow-up calls. Returns an + error if the session does not belong to the caller or has expired. extra_headers: Send extra headers @@ -160,22 +159,21 @@ async def execute( """Executes the given code snippet and returns the output. Without a session_id, a - new session will be created to run the code. If you do pass in a valid - session_id, the code will be run in that session. This is useful for running - multiple code snippets in the same environment, because dependencies and similar - things are persisted between calls to the same session. + new session is created to run the code. If you pass a valid session_id, the code + runs in that session. This is useful for running multiple code snippets in the + same environment, because dependencies and similar things are persisted between + calls to the same session. Args: code: Code snippet to execute. - language: Programming language for the code to execute. Currently only supports Python, - but more will be added. + language: Programming language for the code to execute. Currently only supports Python. - files: Files to upload to the session. If present, files will be uploaded before - executing the given code. + files: Files to upload to the session. If present, files are uploaded before executing + the given code. - session_id: Identifier of the current session. Used to make follow-up calls. Requests will - return an error if the session does not belong to the caller or has expired. + session_id: Identifier of the current session. Used to make follow-up calls. Returns an + error if the session does not belong to the caller or has expired. extra_headers: Send extra headers diff --git a/src/together/resources/completions.py b/src/together/resources/completions.py index 7fb330b5..139f659b 100644 --- a/src/together/resources/completions.py +++ b/src/together/resources/completions.py @@ -94,8 +94,8 @@ def create( prompt: A string providing context for the model to complete. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -103,7 +103,7 @@ def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -124,9 +124,8 @@ def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. stream: If true, stream tokens as Server-Sent Events as the model generates them instead of waiting for the full model response. The stream terminates with @@ -211,8 +210,8 @@ def create( of waiting for the full model response. The stream terminates with `data: [DONE]`. If false, return a single JSON object containing the results. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -220,7 +219,7 @@ def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -241,9 +240,8 @@ def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. temperature: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate @@ -324,8 +322,8 @@ def create( of waiting for the full model response. The stream terminates with `data: [DONE]`. If false, return a single JSON object containing the results. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -333,7 +331,7 @@ def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -354,9 +352,8 @@ def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. temperature: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate @@ -526,8 +523,8 @@ async def create( prompt: A string providing context for the model to complete. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -535,7 +532,7 @@ async def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -556,9 +553,8 @@ async def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. stream: If true, stream tokens as Server-Sent Events as the model generates them instead of waiting for the full model response. The stream terminates with @@ -643,8 +639,8 @@ async def create( of waiting for the full model response. The stream terminates with `data: [DONE]`. If false, return a single JSON object containing the results. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -652,7 +648,7 @@ async def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -673,9 +669,8 @@ async def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. temperature: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate @@ -756,8 +751,8 @@ async def create( of waiting for the full model response. The stream terminates with `data: [DONE]`. If false, return a single JSON object containing the results. - echo: If true, the response will contain the prompt. Can be used with `logprobs` to - return prompt logprobs. + echo: If true, the response contains the prompt. Can be used with `logprobs` to return + prompt logprobs. frequency_penalty: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned. @@ -765,7 +760,7 @@ async def create( logit_bias: Adjusts the likelihood of specific tokens appearing in the generated output. logprobs: An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. max_tokens: The maximum number of tokens to generate. @@ -786,9 +781,8 @@ async def create( seed: Seed value for reproducibility. - stop: A list of string sequences that will truncate (stop) inference text output. For - example, "" will stop generation as soon as the model generates the given - token. + stop: A list of string sequences that truncate (stop) inference text output. For + example, "" stops generation as soon as the model generates the given token. temperature: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate diff --git a/src/together/resources/endpoints.py b/src/together/resources/endpoints.py index afa3cfce..9726a946 100644 --- a/src/together/resources/endpoints.py +++ b/src/together/resources/endpoints.py @@ -74,9 +74,9 @@ def create( ) -> DedicatedEndpoint: """Creates a new dedicated endpoint for serving models. - The endpoint will - automatically start after creation. You can deploy any supported model on - hardware configurations that meet the model's requirements. + The endpoint starts + automatically after creation. You can deploy any supported model on hardware + configurations that meet the model's requirements. Args: autoscaling: Configuration for automatic scaling of the endpoint @@ -93,9 +93,8 @@ def create( display_name: A human-readable name for the endpoint - inactive_timeout: The number of minutes of inactivity after which the endpoint will be - automatically stopped. Set to null, omit or set to 0 to disable automatic - timeout. + inactive_timeout: The number of minutes of inactivity after which the endpoint stops + automatically. Set to null, omit, or set to 0 to disable automatic timeout. state: The desired state of the endpoint @@ -192,8 +191,8 @@ def update( display_name: A human-readable name for the endpoint - inactive_timeout: The number of minutes of inactivity after which the endpoint will be - automatically stopped. Set to 0 to disable automatic timeout. + inactive_timeout: The number of minutes of inactivity after which the endpoint stops + automatically. Set to 0 to disable automatic timeout. state: The desired state of the endpoint @@ -416,9 +415,9 @@ async def create( ) -> DedicatedEndpoint: """Creates a new dedicated endpoint for serving models. - The endpoint will - automatically start after creation. You can deploy any supported model on - hardware configurations that meet the model's requirements. + The endpoint starts + automatically after creation. You can deploy any supported model on hardware + configurations that meet the model's requirements. Args: autoscaling: Configuration for automatic scaling of the endpoint @@ -435,9 +434,8 @@ async def create( display_name: A human-readable name for the endpoint - inactive_timeout: The number of minutes of inactivity after which the endpoint will be - automatically stopped. Set to null, omit or set to 0 to disable automatic - timeout. + inactive_timeout: The number of minutes of inactivity after which the endpoint stops + automatically. Set to null, omit, or set to 0 to disable automatic timeout. state: The desired state of the endpoint @@ -534,8 +532,8 @@ async def update( display_name: A human-readable name for the endpoint - inactive_timeout: The number of minutes of inactivity after which the endpoint will be - automatically stopped. Set to 0 to disable automatic timeout. + inactive_timeout: The number of minutes of inactivity after which the endpoint stops + automatically. Set to 0 to disable automatic timeout. state: The desired state of the endpoint diff --git a/src/together/resources/fine_tuning.py b/src/together/resources/fine_tuning.py index ea734c55..d67c18eb 100644 --- a/src/together/resources/fine_tuning.py +++ b/src/together/resources/fine_tuning.py @@ -508,8 +508,8 @@ def estimate_price( from_checkpoint: The checkpoint identifier to continue training from a previous fine-tuning job. Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or - `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the - final checkpoint will be used. + `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, uses the + final checkpoint. model: Name of the base model to run fine-tune job on @@ -521,8 +521,7 @@ def estimate_price( training_method: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization. - training_type: The training type to use. If not provided, the job will default to LoRA training - type. + training_type: The training type to use. Defaults to LoRA if not provided. validation_file: File-ID of a validation file uploaded to the Together API @@ -1110,8 +1109,8 @@ async def estimate_price( from_checkpoint: The checkpoint identifier to continue training from a previous fine-tuning job. Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or - `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the - final checkpoint will be used. + `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, uses the + final checkpoint. model: Name of the base model to run fine-tune job on @@ -1123,8 +1122,7 @@ async def estimate_price( training_method: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization. - training_type: The training type to use. If not provided, the job will default to LoRA training - type. + training_type: The training type to use. Defaults to LoRA if not provided. validation_file: File-ID of a validation file uploaded to the Together API diff --git a/src/together/types/audio/transcription_create_params.py b/src/together/types/audio/transcription_create_params.py index b28fab6f..82522400 100644 --- a/src/together/types/audio/transcription_create_params.py +++ b/src/together/types/audio/transcription_create_params.py @@ -14,7 +14,7 @@ class TranscriptionCreateParams(TypedDict, total=False): file: Required[Union[FileTypes, str]] """Audio file upload or public HTTP/HTTPS URL. - Supported formats .wav, .mp3, .m4a, .webm, .flac. + Supported formats .wav, .mp3, .m4a, .webm, .flac, .ogg, .opus, .aac. """ diarize: bool @@ -55,7 +55,12 @@ class TranscriptionCreateParams(TypedDict, total=False): """Model to use for transcription""" prompt: str - """Optional text to bias decoding.""" + """Optional text to bias decoding. + + Supported only on Whisper-family models (e.g. `openai/whisper-large-v3`). Other + STT models (e.g. `nvidia/parakeet-tdt-0.6b-v3`) accept the field for API + compatibility but ignore it. + """ response_format: Literal["json", "verbose_json"] """The format of the response""" diff --git a/src/together/types/audio/transcription_create_response.py b/src/together/types/audio/transcription_create_response.py index 521d0d23..d079e49e 100644 --- a/src/together/types/audio/transcription_create_response.py +++ b/src/together/types/audio/transcription_create_response.py @@ -1,7 +1,7 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. from typing import List, Union, Optional -from typing_extensions import Literal, TypeAlias +from typing_extensions import TypeAlias from ..._models import BaseModel @@ -93,9 +93,6 @@ class AudioTranscriptionVerboseJsonResponse(BaseModel): segments: List[AudioTranscriptionVerboseJsonResponseSegment] """Array of transcription segments""" - task: Literal["transcribe", "translate"] - """The task performed""" - text: str """The transcribed text""" diff --git a/src/together/types/audio/translation_create_params.py b/src/together/types/audio/translation_create_params.py index 5c944f5a..21896fdf 100644 --- a/src/together/types/audio/translation_create_params.py +++ b/src/together/types/audio/translation_create_params.py @@ -14,7 +14,7 @@ class TranslationCreateParams(TypedDict, total=False): file: Required[Union[FileTypes, str]] """Audio file upload or public HTTP/HTTPS URL. - Supported formats .wav, .mp3, .m4a, .webm, .flac. + Supported formats .wav, .mp3, .m4a, .webm, .flac, .ogg, .opus, .aac. """ language: str @@ -27,7 +27,12 @@ class TranslationCreateParams(TypedDict, total=False): """Model to use for translation""" prompt: str - """Optional text to bias decoding.""" + """Optional text to bias decoding. + + Supported only on Whisper-family models (e.g. `openai/whisper-large-v3`). Other + STT models (e.g. `nvidia/parakeet-tdt-0.6b-v3`) accept the field for API + compatibility but ignore it. + """ response_format: Literal["json", "verbose_json"] """The format of the response""" diff --git a/src/together/types/audio/translation_create_response.py b/src/together/types/audio/translation_create_response.py index cb02a893..7e750674 100644 --- a/src/together/types/audio/translation_create_response.py +++ b/src/together/types/audio/translation_create_response.py @@ -1,7 +1,7 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. from typing import List, Union, Optional -from typing_extensions import Literal, TypeAlias +from typing_extensions import TypeAlias from ..._models import BaseModel @@ -57,9 +57,6 @@ class AudioTranslationVerboseJsonResponse(BaseModel): segments: List[AudioTranslationVerboseJsonResponseSegment] """Array of translation segments""" - task: Literal["transcribe", "translate"] - """The task performed""" - text: str """The translated text""" diff --git a/src/together/types/batch_create_params.py b/src/together/types/batch_create_params.py index 8b696489..a8601afe 100644 --- a/src/together/types/batch_create_params.py +++ b/src/together/types/batch_create_params.py @@ -2,14 +2,22 @@ from __future__ import annotations -from typing_extensions import Required, TypedDict +from typing_extensions import Literal, Required, TypedDict __all__ = ["BatchCreateParams"] class BatchCreateParams(TypedDict, total=False): - endpoint: Required[str] - """The endpoint to use for batch processing""" + endpoint: Required[Literal["/v1/chat/completions", "/v1/audio/transcriptions", "/v1/audio/translations"]] + """The endpoint to use for batch processing. + + Each line of the uploaded input file is dispatched against this endpoint. + + - `/v1/chat/completions` — chat completion batches + - `/v1/audio/transcriptions` — audio transcription batches (e.g. + `openai/whisper-large-v3`) + - `/v1/audio/translations` — audio translation batches + """ input_file_id: Required[str] """ID of the uploaded input file containing batch requests""" diff --git a/src/together/types/beta/cluster_create_params.py b/src/together/types/beta/cluster_create_params.py index 5bdcb343..e961052b 100644 --- a/src/together/types/beta/cluster_create_params.py +++ b/src/together/types/beta/cluster_create_params.py @@ -97,8 +97,7 @@ class ClusterCreateParams(TypedDict, total=False): """Reservation start time of the cluster. This field is required for SCHEDULED billing to specify the reservation start - time for the cluster. If not provided, the cluster will be provisioned - immediately. + time for the cluster. If not provided, the cluster provisions immediately. """ shared_volume: SharedVolume diff --git a/src/together/types/beta/deployment.py b/src/together/types/beta/deployment.py index 378067c1..f7e36e0a 100644 --- a/src/together/types/beta/deployment.py +++ b/src/together/types/beta/deployment.py @@ -140,10 +140,7 @@ class ReplicaEvents(BaseModel): class Volume(BaseModel): mount_path: str - """ - MountPath is the path in the container where the volume will be mounted (e.g., - "/data") - """ + """MountPath is the path in the container where the volume mounts (e.g., "/data").""" name: str """Name is the name of the volume to mount. diff --git a/src/together/types/beta/jig/secret.py b/src/together/types/beta/jig/secret.py index 3700ebd8..d2927287 100644 --- a/src/together/types/beta/jig/secret.py +++ b/src/together/types/beta/jig/secret.py @@ -16,13 +16,13 @@ class Secret(BaseModel): """CreatedAt is the ISO8601 timestamp when this secret was created""" created_by: Optional[str] = None - """CreatedBy is the identifier of the user who created this secret""" + """CreatedBy is the identifier of who created this secret.""" description: Optional[str] = None """Description is a human-readable description of the secret's purpose""" last_updated_by: Optional[str] = None - """LastUpdatedBy is the identifier of the user who last updated this secret""" + """LastUpdatedBy is the identifier of who last updated this secret.""" name: Optional[str] = None """Name is the name/key of the secret""" diff --git a/src/together/types/beta/jig/secret_create_params.py b/src/together/types/beta/jig/secret_create_params.py index 9c26e0ef..793f9d74 100644 --- a/src/together/types/beta/jig/secret_create_params.py +++ b/src/together/types/beta/jig/secret_create_params.py @@ -18,7 +18,7 @@ class SecretCreateParams(TypedDict, total=False): value: Required[str] """ Value is the sensitive data to store securely (e.g., API keys, passwords, - tokens). This value will be encrypted at rest + tokens). Encrypted at rest. """ description: str diff --git a/src/together/types/beta/jig/secret_update_params.py b/src/together/types/beta/jig/secret_update_params.py index 18f7813f..884d1cef 100644 --- a/src/together/types/beta/jig/secret_update_params.py +++ b/src/together/types/beta/jig/secret_update_params.py @@ -30,5 +30,5 @@ class SecretUpdateParams(TypedDict, total=False): value: str """Value is the new sensitive data to store securely. - Updating this will replace the existing secret value + Updating this replaces the existing secret value. """ diff --git a/src/together/types/beta/jig/volume.py b/src/together/types/beta/jig/volume.py index b997ab41..b0e56387 100644 --- a/src/together/types/beta/jig/volume.py +++ b/src/together/types/beta/jig/volume.py @@ -22,8 +22,8 @@ class ContentFile(BaseModel): class Content(BaseModel): files: Optional[List[ContentFile]] = None """ - Files is the list of files that will be preloaded into the volume, if the volume - content type is "files" + Files is the list of files to preload into the volume, if the volume content + type is "files". """ source_prefix: Optional[str] = None @@ -40,7 +40,7 @@ class Content(BaseModel): class VersionHistoryContent(BaseModel): - """Content specifies the new content that will be preloaded to this volume""" + """Content specifies the new content to preload to this volume.""" source_prefix: Optional[str] = None """ @@ -57,7 +57,7 @@ class VersionHistoryContent(BaseModel): class VersionHistory(BaseModel): content: Optional[VersionHistoryContent] = None - """Content specifies the new content that will be preloaded to this volume""" + """Content specifies the new content to preload to this volume.""" mounted_by: Optional[List[str]] = None diff --git a/src/together/types/beta/jig/volume_create_params.py b/src/together/types/beta/jig/volume_create_params.py index 954a3d9d..a7ced156 100644 --- a/src/together/types/beta/jig/volume_create_params.py +++ b/src/together/types/beta/jig/volume_create_params.py @@ -9,7 +9,7 @@ class VolumeCreateParams(TypedDict, total=False): content: Required[Content] - """Content specifies the new content that will be preloaded to this volume""" + """Content specifies the new content to preload to this volume.""" name: Required[str] """Name is the unique identifier for the volume within the project""" @@ -19,7 +19,7 @@ class VolumeCreateParams(TypedDict, total=False): class Content(TypedDict, total=False): - """Content specifies the new content that will be preloaded to this volume""" + """Content specifies the new content to preload to this volume.""" source_prefix: str """ diff --git a/src/together/types/beta/jig/volume_update_params.py b/src/together/types/beta/jig/volume_update_params.py index 3b1ce6c2..3586492b 100644 --- a/src/together/types/beta/jig/volume_update_params.py +++ b/src/together/types/beta/jig/volume_update_params.py @@ -9,7 +9,7 @@ class VolumeUpdateParams(TypedDict, total=False): content: Content - """Content specifies the new content that will be preloaded to this volume""" + """Content specifies the new content to preload to this volume.""" name: str """Name is the new unique identifier for the volume within the project""" @@ -19,7 +19,7 @@ class VolumeUpdateParams(TypedDict, total=False): class Content(TypedDict, total=False): - """Content specifies the new content that will be preloaded to this volume""" + """Content specifies the new content to preload to this volume.""" source_prefix: str """ diff --git a/src/together/types/beta/jig_deploy_params.py b/src/together/types/beta/jig_deploy_params.py index b6c5797f..c0bdc9c5 100644 --- a/src/together/types/beta/jig_deploy_params.py +++ b/src/together/types/beta/jig_deploy_params.py @@ -75,13 +75,13 @@ class JigDeployParams(TypedDict, total=False): health_check_path: str """HealthCheckPath is the HTTP path for health checks (e.g., "/health"). - If set, the platform will check this endpoint to determine container health + If set, the platform checks this endpoint to determine container health. """ max_replicas: int - """ - MaxReplicas is the maximum number of container instances that can be scaled up - to. If not set, will be set to MinReplicas + """MaxReplicas is the maximum number of container instances. + + Defaults to MinReplicas if not set. """ memory: float @@ -199,10 +199,7 @@ class EnvironmentVariable(TypedDict, total=False): class Volume(TypedDict, total=False): mount_path: Required[str] - """ - MountPath is the path in the container where the volume will be mounted (e.g., - "/data") - """ + """MountPath is the path in the container where the volume mounts (e.g., "/data").""" name: Required[str] """Name is the name of the volume to mount. diff --git a/src/together/types/beta/jig_update_params.py b/src/together/types/beta/jig_update_params.py index e73aad28..c91e46d9 100644 --- a/src/together/types/beta/jig_update_params.py +++ b/src/together/types/beta/jig_update_params.py @@ -46,7 +46,7 @@ class JigUpdateParams(TypedDict, total=False): environment_variables: Iterable[EnvironmentVariable] """EnvironmentVariables is a list of environment variables to set in the container. - This will replace all existing environment variables + Replaces all existing environment variables. """ gpu_count: int @@ -104,7 +104,7 @@ class JigUpdateParams(TypedDict, total=False): volumes: Iterable[Volume] """Volumes is a list of volume mounts to attach to the container. - This will replace all existing volumes + Replaces all existing volumes. """ @@ -186,10 +186,7 @@ class EnvironmentVariable(TypedDict, total=False): class Volume(TypedDict, total=False): mount_path: Required[str] - """ - MountPath is the path in the container where the volume will be mounted (e.g., - "/data") - """ + """MountPath is the path in the container where the volume mounts (e.g., "/data").""" name: Required[str] """Name is the name of the volume to mount. diff --git a/src/together/types/chat/completion_create_params.py b/src/together/types/chat/completion_create_params.py index 215b3970..f6951b63 100644 --- a/src/together/types/chat/completion_create_params.py +++ b/src/together/types/chat/completion_create_params.py @@ -57,14 +57,14 @@ class CompletionCreateParamsBase(TypedDict, total=False): context_length_exceeded_behavior: Literal["truncate", "error"] """ - Defined the behavior of the API when max_tokens exceed the maximum context - length of the model. When set to 'error', API will return 400 with appropriate - error message. When set to 'truncate', override the max_tokens with maximum - context length of the model. + Defines the behavior of the API when max_tokens exceed the maximum context + length of the model. When set to 'error', the API returns 400 with an + appropriate error message. When set to 'truncate', overrides max_tokens with the + maximum context length of the model. """ echo: bool - """If true, the response will contain the prompt. + """If true, the response contains the prompt. Can be used with `logprobs` to return prompt logprobs. """ @@ -83,7 +83,7 @@ class CompletionCreateParamsBase(TypedDict, total=False): logprobs: int """ An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. """ @@ -144,10 +144,10 @@ class CompletionCreateParamsBase(TypedDict, total=False): """Seed value for reproducibility.""" stop: SequenceNotStr[str] - """A list of string sequences that will truncate (stop) inference text output. + """A list of string sequences that truncate (stop) inference text output. - For example, "" will stop generation as soon as the model generates the - given token. + For example, "" stops generation as soon as the model generates the given + token. """ temperature: float diff --git a/src/together/types/code_interpreter_execute_params.py b/src/together/types/code_interpreter_execute_params.py index 91cf6c02..4078c47c 100644 --- a/src/together/types/code_interpreter_execute_params.py +++ b/src/together/types/code_interpreter_execute_params.py @@ -13,22 +13,19 @@ class CodeInterpreterExecuteParams(TypedDict, total=False): """Code snippet to execute.""" language: Required[Literal["python"]] - """Programming language for the code to execute. - - Currently only supports Python, but more will be added. - """ + """Programming language for the code to execute. Currently only supports Python.""" files: Iterable[File] """Files to upload to the session. - If present, files will be uploaded before executing the given code. + If present, files are uploaded before executing the given code. """ session_id: str """Identifier of the current session. - Used to make follow-up calls. Requests will return an error if the session does - not belong to the caller or has expired. + Used to make follow-up calls. Returns an error if the session does not belong to + the caller or has expired. """ diff --git a/src/together/types/completion_create_params.py b/src/together/types/completion_create_params.py index a5fb0f6c..ff960141 100644 --- a/src/together/types/completion_create_params.py +++ b/src/together/types/completion_create_params.py @@ -31,7 +31,7 @@ class CompletionCreateParamsBase(TypedDict, total=False): """A string providing context for the model to complete.""" echo: bool - """If true, the response will contain the prompt. + """If true, the response contains the prompt. Can be used with `logprobs` to return prompt logprobs. """ @@ -48,7 +48,7 @@ class CompletionCreateParamsBase(TypedDict, total=False): logprobs: int """ An integer between 0 and 20 of the top k tokens to return log probabilities for - at each generation step, instead of just the sampled token. Log probabilities + at each generation step, instead of only the sampled token. Log probabilities help assess model confidence in token predictions. """ @@ -84,10 +84,10 @@ class CompletionCreateParamsBase(TypedDict, total=False): """Seed value for reproducibility.""" stop: SequenceNotStr[str] - """A list of string sequences that will truncate (stop) inference text output. + """A list of string sequences that truncate (stop) inference text output. - For example, "" will stop generation as soon as the model generates the - given token. + For example, "" stops generation as soon as the model generates the given + token. """ temperature: float diff --git a/src/together/types/endpoint_create_params.py b/src/together/types/endpoint_create_params.py index 3674e0e9..d28f713b 100644 --- a/src/together/types/endpoint_create_params.py +++ b/src/together/types/endpoint_create_params.py @@ -33,10 +33,10 @@ class EndpointCreateParams(TypedDict, total=False): """A human-readable name for the endpoint""" inactive_timeout: Optional[int] - """ - The number of minutes of inactivity after which the endpoint will be - automatically stopped. Set to null, omit or set to 0 to disable automatic - timeout. + """The number of minutes of inactivity after which the endpoint stops + automatically. + + Set to null, omit, or set to 0 to disable automatic timeout. """ state: Literal["STARTED", "STOPPED"] diff --git a/src/together/types/endpoint_update_params.py b/src/together/types/endpoint_update_params.py index 6f992b3f..92e24164 100644 --- a/src/together/types/endpoint_update_params.py +++ b/src/together/types/endpoint_update_params.py @@ -18,9 +18,10 @@ class EndpointUpdateParams(TypedDict, total=False): """A human-readable name for the endpoint""" inactive_timeout: Optional[int] - """ - The number of minutes of inactivity after which the endpoint will be - automatically stopped. Set to 0 to disable automatic timeout. + """The number of minutes of inactivity after which the endpoint stops + automatically. + + Set to 0 to disable automatic timeout. """ state: Literal["STARTED", "STOPPED"] diff --git a/src/together/types/eval_create_params.py b/src/together/types/eval_create_params.py index 52344186..081698c6 100644 --- a/src/together/types/eval_create_params.py +++ b/src/together/types/eval_create_params.py @@ -51,6 +51,13 @@ class ParametersEvaluationClassifyParametersJudge(TypedDict, total=False): external_base_url: str """Base URL for external judge models. Must be OpenAI-compatible base URL.""" + max_tokens: int + """Maximum number of tokens the judge model can generate. + + Defaults to 32768. Increase for reasoning models (e.g. Gemini, o-series) that + consume output token budget for chain-of-thought. + """ + num_workers: int """Number of concurrent workers for inference requests. @@ -58,6 +65,9 @@ class ParametersEvaluationClassifyParametersJudge(TypedDict, total=False): when using proxy endpoints (e.g. OpenRouter) or rate-limited external APIs. """ + temperature: float + """Sampling temperature for the judge model. Defaults to 0.05.""" + class ParametersEvaluationClassifyParametersModelToEvaluateEvaluationModelRequest(TypedDict, total=False): input_template: Required[str] @@ -129,6 +139,13 @@ class ParametersEvaluationScoreParametersJudge(TypedDict, total=False): external_base_url: str """Base URL for external judge models. Must be OpenAI-compatible base URL.""" + max_tokens: int + """Maximum number of tokens the judge model can generate. + + Defaults to 32768. Increase for reasoning models (e.g. Gemini, o-series) that + consume output token budget for chain-of-thought. + """ + num_workers: int """Number of concurrent workers for inference requests. @@ -136,6 +153,9 @@ class ParametersEvaluationScoreParametersJudge(TypedDict, total=False): when using proxy endpoints (e.g. OpenRouter) or rate-limited external APIs. """ + temperature: float + """Sampling temperature for the judge model. Defaults to 0.05.""" + class ParametersEvaluationScoreParametersModelToEvaluateEvaluationModelRequest(TypedDict, total=False): input_template: Required[str] @@ -210,6 +230,13 @@ class ParametersEvaluationCompareParametersJudge(TypedDict, total=False): external_base_url: str """Base URL for external judge models. Must be OpenAI-compatible base URL.""" + max_tokens: int + """Maximum number of tokens the judge model can generate. + + Defaults to 32768. Increase for reasoning models (e.g. Gemini, o-series) that + consume output token budget for chain-of-thought. + """ + num_workers: int """Number of concurrent workers for inference requests. @@ -217,6 +244,9 @@ class ParametersEvaluationCompareParametersJudge(TypedDict, total=False): when using proxy endpoints (e.g. OpenRouter) or rate-limited external APIs. """ + temperature: float + """Sampling temperature for the judge model. Defaults to 0.05.""" + class ParametersEvaluationCompareParametersModelAEvaluationModelRequest(TypedDict, total=False): input_template: Required[str] diff --git a/src/together/types/fine_tuning_cancel_response.py b/src/together/types/fine_tuning_cancel_response.py index 1839ca7e..11a649ae 100644 --- a/src/together/types/fine_tuning_cancel_response.py +++ b/src/together/types/fine_tuning_cancel_response.py @@ -66,8 +66,8 @@ class TrainingMethodTrainingMethodSft(BaseModel): train_on_inputs: Union[bool, Literal["auto"]] """ - Whether to mask the user messages in conversational data or prompts in - instruction data. + Whether to mask user messages in conversational data or prompts in instruction + data. """ @@ -160,8 +160,7 @@ class FineTuningCancelResponse(BaseModel): max_seq_length: Optional[int] = None """Maximum sequence length to use for training. - If not specified, the maximum allowed for the model and training method will be - used. + If not specified, uses the maximum allowed for the model and training method. """ model: Optional[str] = None @@ -216,7 +215,7 @@ class FineTuningCancelResponse(BaseModel): """Type of training used (full or LoRA)""" user_id: Optional[str] = None - """Identifier for the user who created the job""" + """Identifier for who created the job.""" validation_file: Optional[str] = None """File-ID of the validation file""" diff --git a/src/together/types/fine_tuning_estimate_price_params.py b/src/together/types/fine_tuning_estimate_price_params.py index a4a5e4ab..30f1dff7 100644 --- a/src/together/types/fine_tuning_estimate_price_params.py +++ b/src/together/types/fine_tuning_estimate_price_params.py @@ -24,8 +24,8 @@ class FineTuningEstimatePriceParams(TypedDict, total=False): """The checkpoint identifier to continue training from a previous fine-tuning job. Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or - `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the - final checkpoint will be used. + `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, uses the + final checkpoint. """ model: str @@ -47,10 +47,7 @@ class FineTuningEstimatePriceParams(TypedDict, total=False): """ training_type: Optional[TrainingType] - """The training type to use. - - If not provided, the job will default to LoRA training type. - """ + """The training type to use. Defaults to LoRA if not provided.""" validation_file: str """File-ID of a validation file uploaded to the Together API""" @@ -61,8 +58,8 @@ class TrainingMethodTrainingMethodSft(TypedDict, total=False): train_on_inputs: Required[Union[bool, Literal["auto"]]] """ - Whether to mask the user messages in conversational data or prompts in - instruction data. + Whether to mask user messages in conversational data or prompts in instruction + data. """ diff --git a/src/together/types/fine_tuning_estimate_price_response.py b/src/together/types/fine_tuning_estimate_price_response.py index acd757fd..1bcb76a6 100644 --- a/src/together/types/fine_tuning_estimate_price_response.py +++ b/src/together/types/fine_tuning_estimate_price_response.py @@ -9,7 +9,7 @@ class FineTuningEstimatePriceResponse(BaseModel): allowed_to_proceed: Optional[bool] = None - """Whether the user is allowed to proceed with the fine-tuning job""" + """Whether you are allowed to proceed with the fine-tuning job.""" estimated_eval_token_count: Optional[float] = None """The estimated number of tokens for evaluation""" @@ -21,4 +21,4 @@ class FineTuningEstimatePriceResponse(BaseModel): """The estimated number of tokens to be trained""" user_limit: Optional[float] = None - """The user's credit limit in dollars""" + """Your credit limit in dollars.""" diff --git a/src/together/types/fine_tuning_list_response.py b/src/together/types/fine_tuning_list_response.py index a5d6830b..a606e334 100644 --- a/src/together/types/fine_tuning_list_response.py +++ b/src/together/types/fine_tuning_list_response.py @@ -67,8 +67,8 @@ class DataTrainingMethodTrainingMethodSft(BaseModel): train_on_inputs: Union[bool, Literal["auto"]] """ - Whether to mask the user messages in conversational data or prompts in - instruction data. + Whether to mask user messages in conversational data or prompts in instruction + data. """ @@ -161,8 +161,7 @@ class Data(BaseModel): max_seq_length: Optional[int] = None """Maximum sequence length to use for training. - If not specified, the maximum allowed for the model and training method will be - used. + If not specified, uses the maximum allowed for the model and training method. """ model: Optional[str] = None @@ -217,7 +216,7 @@ class Data(BaseModel): """Type of training used (full or LoRA)""" user_id: Optional[str] = None - """Identifier for the user who created the job""" + """Identifier for who created the job.""" validation_file: Optional[str] = None """File-ID of the validation file""" diff --git a/src/together/types/finetune_response.py b/src/together/types/finetune_response.py index d50d9a74..14633e6f 100644 --- a/src/together/types/finetune_response.py +++ b/src/together/types/finetune_response.py @@ -73,8 +73,8 @@ class TrainingMethodTrainingMethodSft(BaseModel): train_on_inputs: Union[bool, Literal["auto"]] """ - Whether to mask the user messages in conversational data or prompts in - instruction data. + Whether to mask user messages in conversational data or prompts in instruction + data. """ diff --git a/tests/api_resources/test_evals.py b/tests/api_resources/test_evals.py index e18490a1..66a73df4 100644 --- a/tests/api_resources/test_evals.py +++ b/tests/api_resources/test_evals.py @@ -52,7 +52,9 @@ def test_method_create_with_all_params(self, client: Together) -> None: "system_template": "Imagine you are a helpful assistant", "external_api_token": "external_api_token", "external_base_url": "external_base_url", + "max_tokens": 8192, "num_workers": 5, + "temperature": 0, }, "labels": ["yes", "no"], "pass_labels": ["yes"], @@ -253,7 +255,9 @@ async def test_method_create_with_all_params(self, async_client: AsyncTogether) "system_template": "Imagine you are a helpful assistant", "external_api_token": "external_api_token", "external_base_url": "external_base_url", + "max_tokens": 8192, "num_workers": 5, + "temperature": 0, }, "labels": ["yes", "no"], "pass_labels": ["yes"], diff --git a/tests/cli/test_files.py b/tests/cli/test_files.py index dd823d2b..60171e63 100644 --- a/tests/cli/test_files.py +++ b/tests/cli/test_files.py @@ -197,17 +197,3 @@ def test_upload_does_not_check_if_disabled(self, tmp_path: Path, cli_runner: Cli call_kw = upload_mock.call_args.kwargs assert call_kw["check"] is False assert "uploaded-id" in result.output - - def test_upload_does_check_if_enabled(self, tmp_path: Path, cli_runner: CliRunner) -> None: - f = tmp_path / "data.jsonl" - f.write_text("{}\n") - uploaded = _file_response() - with patch.object(_files_upload_cli, "check_file") as check_mock, patch( - "together.resources.files.AsyncFilesResource.upload", new_callable=AsyncMock - ) as upload_mock: - upload_mock.return_value = uploaded - check_mock.return_value = {"is_check_passed": True, "message": "Checks passed"} - result = cli_runner.invoke(["files", "upload", str(f), "--check"]) - assert result.exit_code == 0 - check_mock.assert_called_once() - upload_mock.assert_called_once() diff --git a/tests/cli/test_fine_tuning.py b/tests/cli/test_fine_tuning.py index f6ee41b4..08d1b3d1 100644 --- a/tests/cli/test_fine_tuning.py +++ b/tests/cli/test_fine_tuning.py @@ -210,12 +210,12 @@ class _DM: def __init__(self, _client: object) -> None: pass - def download(self, **kwargs: object) -> tuple[str, int]: + async def download(self, **kwargs: object) -> tuple[str, int]: assert "ft_id=ft-abcd-12" in str(kwargs.get("url", "")) assert "checkpoint=model_output_path" in str(kwargs.get("url", "")) return str(out_file), 1 - with patch.object(_ft_download_mod, "DownloadManager", _DM): + with patch.object(_ft_download_mod, "AsyncDownloadManager", _DM): # Full fine-tunes require explicit --checkpoint-type default (CLI default is merged for LoRA). result = cli_runner.invoke( [ diff --git a/tests/cli/test_json_mode_pipeable_to_jq.py b/tests/cli/test_json_mode_pipeable_to_jq.py index 91c007a8..1b2ac2f8 100644 --- a/tests/cli/test_json_mode_pipeable_to_jq.py +++ b/tests/cli/test_json_mode_pipeable_to_jq.py @@ -153,6 +153,7 @@ def test_beta_clusters_json_mode(self) -> None: def test_beta_clusters_storage_json_mode(self) -> None: beta_clusters_storage = JSONValidator(("beta", "clusters", "storage")) beta_clusters_storage.run_and_assert("create --region us-east-1 --size-tib 1 --volume-name test-volume") + beta_clusters_storage.run_and_assert("update storage-123 --size-tib 4") beta_clusters_storage.run_and_assert("delete storage-123") beta_clusters_storage.run_and_assert("list") beta_clusters_storage.run_and_assert("retrieve storage-123") diff --git a/tests/test_models.py b/tests/test_models.py index c2830354..1b9cee0c 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,7 +1,8 @@ import json -from typing import TYPE_CHECKING, Any, Dict, List, Union, Optional, cast +from typing import TYPE_CHECKING, Any, Dict, List, Union, Iterable, Optional, cast from datetime import datetime, timezone -from typing_extensions import Literal, Annotated, TypeAliasType +from collections import deque +from typing_extensions import Literal, Annotated, TypedDict, TypeAliasType import pytest import pydantic @@ -9,7 +10,7 @@ from together._utils import PropertyInfo from together._compat import PYDANTIC_V1, parse_obj, model_dump, model_json -from together._models import DISCRIMINATOR_CACHE, BaseModel, construct_type +from together._models import DISCRIMINATOR_CACHE, BaseModel, EagerIterable, construct_type class BasicModel(BaseModel): @@ -961,3 +962,56 @@ def __getattr__(self, attr: str) -> Item: ... assert model.a.prop == 1 assert isinstance(model.a, Item) assert model.other == "foo" + + +# NOTE: Workaround for Pydantic Iterable behavior. +# Iterable fields are replaced with a ValidatorIterator and may be consumed +# during serialization, which can cause subsequent dumps to return empty data. +# See: https://github.com/pydantic/pydantic/issues/9541 +@pytest.mark.parametrize( + "data, expected_validated", + [ + ([1, 2, 3], [1, 2, 3]), + ((1, 2, 3), (1, 2, 3)), + (set([1, 2, 3]), set([1, 2, 3])), + (iter([1, 2, 3]), [1, 2, 3]), + ([], []), + ((x for x in [1, 2, 3]), [1, 2, 3]), + (map(lambda x: x, [1, 2, 3]), [1, 2, 3]), + (frozenset([1, 2, 3]), frozenset([1, 2, 3])), + (deque([1, 2, 3]), deque([1, 2, 3])), + ], + ids=["list", "tuple", "set", "iterator", "empty", "generator", "map", "frozenset", "deque"], +) +@pytest.mark.skipif(PYDANTIC_V1, reason="this is only supported in pydantic v2") +def test_iterable_construction(data: Iterable[int], expected_validated: Iterable[int]) -> None: + class TypeWithIterable(TypedDict): + items: EagerIterable[int] + + class Model(BaseModel): + data: TypeWithIterable + + m = Model.model_validate({"data": {"items": data}}) + assert m.data["items"] == expected_validated + + # Verify repeated dumps don't lose data (the original bug) + assert m.model_dump()["data"]["items"] == list(expected_validated) + assert m.model_dump()["data"]["items"] == list(expected_validated) + + +@pytest.mark.skipif(PYDANTIC_V1, reason="this is only supported in pydantic v2") +def test_iterable_construction_str_falls_back_to_list() -> None: + # str is iterable (over chars), but str(list_of_chars) produces the list's repr + # rather than reconstructing a string from items. We special-case str to fall + # back to list instead of attempting reconstruction. + class TypeWithIterable(TypedDict): + items: EagerIterable[str] + + class Model(BaseModel): + data: TypeWithIterable + + m = Model.model_validate({"data": {"items": "hello"}}) + + # falls back to list of chars rather than calling str(["h", "e", "l", "l", "o"]) + assert m.data["items"] == ["h", "e", "l", "l", "o"] + assert m.model_dump()["data"]["items"] == ["h", "e", "l", "l", "o"]