From 6468c942a13db66006a01df602a228e64580cca1 Mon Sep 17 00:00:00 2001 From: SSharma-10 Date: Wed, 22 Apr 2026 20:47:16 +0530 Subject: [PATCH] Add v1/messages and v1/embeddings --- specification/DigitalOcean-public.v2.yaml | 22 +++++ .../curl/inference_create_embeddings.yml | 7 ++ .../curl/inference_create_messages.yml | 7 ++ .../inference/inference_create_embeddings.yml | 44 +++++++++ .../inference/inference_create_messages.yml | 62 ++++++++++++ .../inference/models/embedding_data_item.yml | 29 ++++++ .../inference/models/embeddings_request.yml | 35 +++++++ .../inference/models/embeddings_response.yml | 25 +++++ .../inference/models/embeddings_usage.yml | 14 +++ .../models/messages_api_message_param.yml | 24 +++++ .../models/messages_create_error_response.yml | 24 +++++ .../models/messages_create_request.yml | 95 +++++++++++++++++++ .../models/messages_create_response.yml | 53 +++++++++++ .../models/messages_image_block_param.yml | 13 +++ .../models/messages_image_source_param.yml | 19 ++++ .../messages_request_content_block_param.yml | 9 ++ .../messages_request_text_block_param.yml | 16 ++++ .../messages_response_content_block.yml | 4 + .../models/messages_response_text_block.yml | 15 +++ .../messages_response_tool_use_block.yml | 23 +++++ .../models/messages_stream_event.yml | 47 +++++++++ .../models/messages_thinking_config_param.yml | 12 +++ .../models/messages_tool_choice_param.yml | 22 +++++ .../models/messages_tool_definition_param.yml | 18 ++++ .../messages_tool_result_block_param.yml | 24 +++++ .../models/messages_tool_use_block_param.yml | 23 +++++ .../inference/models/messages_usage.yml | 30 ++++++ 27 files changed, 716 insertions(+) create mode 100644 specification/resources/inference/examples/curl/inference_create_embeddings.yml create mode 100644 specification/resources/inference/examples/curl/inference_create_messages.yml create mode 100644 specification/resources/inference/inference_create_embeddings.yml create mode 100644 specification/resources/inference/inference_create_messages.yml create mode 100644 specification/resources/inference/models/embedding_data_item.yml create mode 100644 specification/resources/inference/models/embeddings_request.yml create mode 100644 specification/resources/inference/models/embeddings_response.yml create mode 100644 specification/resources/inference/models/embeddings_usage.yml create mode 100644 specification/resources/inference/models/messages_api_message_param.yml create mode 100644 specification/resources/inference/models/messages_create_error_response.yml create mode 100644 specification/resources/inference/models/messages_create_request.yml create mode 100644 specification/resources/inference/models/messages_create_response.yml create mode 100644 specification/resources/inference/models/messages_image_block_param.yml create mode 100644 specification/resources/inference/models/messages_image_source_param.yml create mode 100644 specification/resources/inference/models/messages_request_content_block_param.yml create mode 100644 specification/resources/inference/models/messages_request_text_block_param.yml create mode 100644 specification/resources/inference/models/messages_response_content_block.yml create mode 100644 specification/resources/inference/models/messages_response_text_block.yml create mode 100644 specification/resources/inference/models/messages_response_tool_use_block.yml create mode 100644 specification/resources/inference/models/messages_stream_event.yml create mode 100644 specification/resources/inference/models/messages_thinking_config_param.yml create mode 100644 specification/resources/inference/models/messages_tool_choice_param.yml create mode 100644 specification/resources/inference/models/messages_tool_definition_param.yml create mode 100644 specification/resources/inference/models/messages_tool_result_block_param.yml create mode 100644 specification/resources/inference/models/messages_tool_use_block_param.yml create mode 100644 specification/resources/inference/models/messages_usage.yml diff --git a/specification/DigitalOcean-public.v2.yaml b/specification/DigitalOcean-public.v2.yaml index 918661ca..2e8f1149 100644 --- a/specification/DigitalOcean-public.v2.yaml +++ b/specification/DigitalOcean-public.v2.yaml @@ -281,6 +281,12 @@ tags: description: |- Droplet autoscale pools manage automatic horizontal scaling for your applications based on resource usage (CPU, memory, or both) or a static configuration. + - name: Embeddings + description: |- + Text embedding vectors via `POST /v1/embeddings` on the + [Serverless Inference](https://docs.digitalocean.com/reference/api/api-reference/#tag/Serverless-Inference) base URL + `https://inference.do-ai.run` (bearer model access key). + - name: Firewalls description: |- [DigitalOcean Cloud Firewalls](https://docs.digitalocean.com/products/networking/firewalls/) @@ -2841,6 +2847,14 @@ paths: post: $ref: 'resources/inference/inference_create_chat_completion.yml' + /v1/messages: + post: + $ref: 'resources/inference/inference_create_messages.yml' + + /v1/embeddings: + post: + $ref: 'resources/inference/inference_create_embeddings.yml' + /api/v1/chat/completions: post: $ref: 'resources/inference/agent_inference_create_chat_completion.yml' @@ -2958,6 +2972,14 @@ components: curl -X POST -H "Authorization: Bearer $MODEL_ACCESS_KEY" "https://inference.do-ai.run/v1/chat/completions" ``` + ``` + curl -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $MODEL_ACCESS_KEY" -d '{"model":"claude-opus-4-6","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}' "https://inference.do-ai.run/v1/messages" + ``` + + ``` + curl -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $MODEL_ACCESS_KEY" -d '{"model":"qwen3-embedding-0.6b","input":["hello world","goodbye world"],"encoding_format":"float","user":"user-1234"}' "https://inference.do-ai.run/v1/embeddings" + ``` + **Agent Inference:** ``` diff --git a/specification/resources/inference/examples/curl/inference_create_embeddings.yml b/specification/resources/inference/examples/curl/inference_create_embeddings.yml new file mode 100644 index 00000000..57dc5cf3 --- /dev/null +++ b/specification/resources/inference/examples/curl/inference_create_embeddings.yml @@ -0,0 +1,7 @@ +lang: cURL +source: |- + curl -X POST \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $MODEL_ACCESS_KEY" \ + -d '{"model":"qwen3-embedding-0.6b","input":["hello world","goodbye world"],"encoding_format":"float","user":"user-1234"}' \ + "https://inference.do-ai.run/v1/embeddings" diff --git a/specification/resources/inference/examples/curl/inference_create_messages.yml b/specification/resources/inference/examples/curl/inference_create_messages.yml new file mode 100644 index 00000000..635c2905 --- /dev/null +++ b/specification/resources/inference/examples/curl/inference_create_messages.yml @@ -0,0 +1,7 @@ +lang: cURL +source: |- + curl -X POST \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $MODEL_ACCESS_KEY" \ + -d '{"model": "claude-opus-4-6", "max_tokens": 1024, "messages": [{"role": "user", "content": "What is the capital of Portugal?"}]}' \ + "https://inference.do-ai.run/v1/messages" diff --git a/specification/resources/inference/inference_create_embeddings.yml b/specification/resources/inference/inference_create_embeddings.yml new file mode 100644 index 00000000..4cd39860 --- /dev/null +++ b/specification/resources/inference/inference_create_embeddings.yml @@ -0,0 +1,44 @@ +operationId: inference_create_embedding +summary: Create embedding +description: > + Create vector embeddings for one or more text inputs. OpenAI-compatible request and + response. Unknown fields in the request body are rejected. There is no streaming + response for this endpoint. +tags: + - Serverless Inference + - Embeddings +servers: + - url: "https://inference.do-ai.run" + description: production +requestBody: + required: true + content: + application/json: + schema: + $ref: "models/embeddings_request.yml" +responses: + "200": + description: Embeddings and usage for the given `input` or `inputs`, in order. + headers: + ratelimit-limit: + $ref: '../../shared/headers.yml#/ratelimit-limit' + ratelimit-remaining: + $ref: '../../shared/headers.yml#/ratelimit-remaining' + ratelimit-reset: + $ref: '../../shared/headers.yml#/ratelimit-reset' + content: + application/json: + schema: + $ref: "models/embeddings_response.yml" + "401": + $ref: '../../shared/responses/unauthorized.yml' + "429": + $ref: '../../shared/responses/too_many_requests.yml' + "500": + $ref: '../../shared/responses/server_error.yml' + default: + $ref: '../../shared/responses/unexpected_error.yml' +x-codeSamples: + - $ref: 'examples/curl/inference_create_embeddings.yml' +security: + - inference_bearer_auth: [] diff --git a/specification/resources/inference/inference_create_messages.yml b/specification/resources/inference/inference_create_messages.yml new file mode 100644 index 00000000..cd866f7e --- /dev/null +++ b/specification/resources/inference/inference_create_messages.yml @@ -0,0 +1,62 @@ +operationId: inference_create_messages +summary: Create the next assistant message +description: > + Send a structured list of input messages with text and/or image content, and the model + will generate the next message in the conversation. +tags: + - Serverless Inference +servers: + - url: "https://inference.do-ai.run" + description: production +x-inference-base-url: "https://inference.do-ai.run" +requestBody: + required: true + content: + application/json: + schema: + $ref: "models/messages_create_request.yml" +responses: + "200": + description: > + Successful generation. When `stream` is true, the body is `text/event-stream` with + server-sent event (SSE) payloads; otherwise `application/json` with + `CreateMessageResponse`. + headers: + ratelimit-limit: + $ref: '../../shared/headers.yml#/ratelimit-limit' + ratelimit-remaining: + $ref: '../../shared/headers.yml#/ratelimit-remaining' + ratelimit-reset: + $ref: '../../shared/headers.yml#/ratelimit-reset' + content: + application/json: + schema: + $ref: "models/messages_create_response.yml" + text/event-stream: + schema: + $ref: "models/messages_stream_event.yml" + "400": + description: Invalid request body, validation error, or policy rejection. + headers: + ratelimit-limit: + $ref: '../../shared/headers.yml#/ratelimit-limit' + ratelimit-remaining: + $ref: '../../shared/headers.yml#/ratelimit-remaining' + ratelimit-reset: + $ref: '../../shared/headers.yml#/ratelimit-reset' + content: + application/json: + schema: + $ref: "models/messages_create_error_response.yml" + "401": + $ref: '../../shared/responses/unauthorized.yml' + "429": + $ref: '../../shared/responses/too_many_requests.yml' + "500": + $ref: '../../shared/responses/server_error.yml' + default: + $ref: '../../shared/responses/unexpected_error.yml' +x-codeSamples: + - $ref: 'examples/curl/inference_create_messages.yml' +security: + - inference_bearer_auth: [] diff --git a/specification/resources/inference/models/embedding_data_item.yml b/specification/resources/inference/models/embedding_data_item.yml new file mode 100644 index 00000000..d1fb840e --- /dev/null +++ b/specification/resources/inference/models/embedding_data_item.yml @@ -0,0 +1,29 @@ +type: object +description: One row in the embeddings `data` array, aligned with a single `input` item. +required: + - index + - object + - embedding +properties: + index: + type: integer + description: Zero-based index of the corresponding `input` item (0 when `input` is a string). + example: 0 + object: + type: string + description: The object type, which is always `embedding`. + enum: + - embedding + example: embedding + embedding: + description: The embedding vector, or a base64-encoded string when the request set encoding_format to base64. + example: [0.0123, -0.0456, 0.0001] + oneOf: + - type: array + description: Float vector when encoding_format is float or omitted. + items: + type: number + example: [0.0123, -0.0456, 0.0001] + - type: string + description: Base64 payload when encoding_format is base64. + example: AGZ...encoded... diff --git a/specification/resources/inference/models/embeddings_request.yml b/specification/resources/inference/models/embeddings_request.yml new file mode 100644 index 00000000..99f442f3 --- /dev/null +++ b/specification/resources/inference/models/embeddings_request.yml @@ -0,0 +1,35 @@ +type: object +description: Request body for `POST /v1/embeddings` (OpenAI-compatible). Extra fields are rejected. +required: + - model + - input +additionalProperties: false +properties: + model: + type: string + description: Model id to use for embeddings. Must match a model your account can access. + example: qwen3-embedding-0.6b + input: + description: A single string or 1–2048 strings; each string produces one row in `data`, in order. + example: hello world + oneOf: + - type: string + example: hello world + - type: array + minItems: 1 + maxItems: 2048 + items: + type: string + example: ["hello world", "goodbye world"] + user: + type: string + description: Optional end-user identifier to help with abuse monitoring. + example: user-1234 + encoding_format: + type: string + description: How embedding values are returned in each `data[].embedding` field. + enum: + - float + - base64 + default: float + example: float diff --git a/specification/resources/inference/models/embeddings_response.yml b/specification/resources/inference/models/embeddings_response.yml new file mode 100644 index 00000000..75b978aa --- /dev/null +++ b/specification/resources/inference/models/embeddings_response.yml @@ -0,0 +1,25 @@ +type: object +description: OpenAI-style embeddings response. +required: + - object + - model + - data + - usage +properties: + object: + type: string + description: The object type, which is always the string `list`. + enum: + - list + example: list + model: + type: string + description: The embedding model that produced the vectors. + example: qwen3-embedding-0.6b + data: + type: array + description: One entry for each `input` string, in the same order. + items: + $ref: embedding_data_item.yml + usage: + $ref: embeddings_usage.yml diff --git a/specification/resources/inference/models/embeddings_usage.yml b/specification/resources/inference/models/embeddings_usage.yml new file mode 100644 index 00000000..2d07e33d --- /dev/null +++ b/specification/resources/inference/models/embeddings_usage.yml @@ -0,0 +1,14 @@ +type: object +description: Token usage for the embeddings request. +required: + - prompt_tokens + - total_tokens +properties: + prompt_tokens: + type: integer + description: Number of input tokens used for the embedding. + example: 6 + total_tokens: + type: integer + description: Total billable tokens for the request. + example: 6 diff --git a/specification/resources/inference/models/messages_api_message_param.yml b/specification/resources/inference/models/messages_api_message_param.yml new file mode 100644 index 00000000..1a94e879 --- /dev/null +++ b/specification/resources/inference/models/messages_api_message_param.yml @@ -0,0 +1,24 @@ +type: object +description: > + One turn in the conversation. Roles are `user` or `assistant` (no `system` role; use the + top-level `system` field). Content may be a string (equivalent to a single text block) or + an array of content blocks. +required: + - role + - content +properties: + role: + type: string + description: Speaker role for this message. + enum: + - user + - assistant + example: user + content: + description: Message body as plain text or structured blocks. + example: What is the capital of Portugal? + oneOf: + - type: string + - type: array + items: + $ref: messages_request_content_block_param.yml diff --git a/specification/resources/inference/models/messages_create_error_response.yml b/specification/resources/inference/models/messages_create_error_response.yml new file mode 100644 index 00000000..c9fab6da --- /dev/null +++ b/specification/resources/inference/models/messages_create_error_response.yml @@ -0,0 +1,24 @@ +type: object +description: Error envelope for some failures from this endpoint. +required: + - type + - error +properties: + type: + type: string + enum: + - error + error: + type: object + required: + - type + - message + properties: + type: + type: string + description: Machine-readable error code. + example: invalid_request_error + message: + type: string + description: Human-readable error message. + example: max_tokens must be positive diff --git a/specification/resources/inference/models/messages_create_request.yml b/specification/resources/inference/models/messages_create_request.yml new file mode 100644 index 00000000..3a66ba9d --- /dev/null +++ b/specification/resources/inference/models/messages_create_request.yml @@ -0,0 +1,95 @@ +type: object +description: > + Request body for `POST /v1/messages`. Required fields are `model`, `messages`, and + `max_tokens`. +required: + - model + - max_tokens + - messages +properties: + model: + type: string + description: Model ID (for example `claude-opus-4-6` or a serverless model id). + example: claude-opus-4-6 + max_tokens: + type: integer + minimum: 1 + description: Maximum tokens to generate before stopping. + messages: + type: array + description: > + Conversation turns. Each item has `role` `user` or `assistant` and `content` as a + string or an array of content blocks. + minItems: 1 + items: + $ref: messages_api_message_param.yml + system: + description: System prompt as plain text or as an array of text blocks. + oneOf: + - type: string + - type: array + items: + $ref: messages_request_text_block_param.yml + stop_sequences: + type: array + description: Custom strings that stop generation when produced. + items: + type: string + stream: + type: boolean + default: false + description: When true, the response is streamed using server-sent events (SSE). + temperature: + type: number + minimum: 0 + maximum: 1 + nullable: true + description: Sampling temperature between 0.0 and 1.0. + top_p: + type: number + nullable: true + description: Nucleus sampling; use either `temperature` or `top_p`, not both. + top_k: + type: integer + minimum: 0 + nullable: true + description: Top-K sampling cutoff. + tools: + type: array + description: Tool definitions the model may invoke. + items: + $ref: messages_tool_definition_param.yml + tool_choice: + $ref: messages_tool_choice_param.yml + metadata: + type: object + description: Optional request metadata. + properties: + user_id: + type: string + description: > + Opaque identifier for the end user (for example a UUID or hash). Do not include PII. + example: 550e8400-e29b-41d4-a716-446655440000 + reasoning_effort: + type: string + nullable: true + description: > + DigitalOcean extension for reasoning-capable models. Ignored by executors that do not + support it. + enum: + - none + - minimal + - low + - medium + - high + - xhigh + speed: + type: string + nullable: true + description: > + DigitalOcean extension for preferred inference speed. Ignored when not supported. + enum: + - standard + - fast + thinking: + $ref: messages_thinking_config_param.yml diff --git a/specification/resources/inference/models/messages_create_response.yml b/specification/resources/inference/models/messages_create_response.yml new file mode 100644 index 00000000..74c4f6e6 --- /dev/null +++ b/specification/resources/inference/models/messages_create_response.yml @@ -0,0 +1,53 @@ +type: object +description: > + Non-streaming assistant message from `POST /v1/messages`. `type` is always `message` and + `role` is always `assistant`. +required: + - id + - type + - role + - content + - model + - stop_reason + - usage +properties: + id: + type: string + description: Unique identifier for this message object. + example: msg_01AbCdEfGhIjKlMnOpQrStUv + type: + type: string + description: Object type discriminator. + enum: + - message + example: message + role: + type: string + description: Always `assistant` for this response. + enum: + - assistant + example: assistant + content: + type: array + description: Assistant output blocks (`text` and/or `tool_use`). + items: + $ref: messages_response_content_block.yml + model: + type: string + description: Model that produced the message. + example: claude-opus-4-6 + stop_reason: + type: string + nullable: true + description: Why generation stopped. + enum: + - end_turn + - max_tokens + - stop_sequence + - tool_use + stop_sequence: + type: string + nullable: true + description: When `stop_reason` is `stop_sequence`, the sequence that matched. + usage: + $ref: messages_usage.yml diff --git a/specification/resources/inference/models/messages_image_block_param.yml b/specification/resources/inference/models/messages_image_block_param.yml new file mode 100644 index 00000000..d6d7077e --- /dev/null +++ b/specification/resources/inference/models/messages_image_block_param.yml @@ -0,0 +1,13 @@ +type: object +description: Image content block in a request message. +required: + - type + - source +properties: + type: + type: string + enum: + - image + example: image + source: + $ref: messages_image_source_param.yml diff --git a/specification/resources/inference/models/messages_image_source_param.yml b/specification/resources/inference/models/messages_image_source_param.yml new file mode 100644 index 00000000..5c3e9ca4 --- /dev/null +++ b/specification/resources/inference/models/messages_image_source_param.yml @@ -0,0 +1,19 @@ +type: object +description: Image payload (for example base64-encoded bytes). +required: + - type + - media_type + - data +properties: + type: + type: string + description: Source kind (for example `base64`). + example: base64 + media_type: + type: string + description: MIME type of the image (for example `image/jpeg`). + example: image/jpeg + data: + type: string + description: Encoded image data. + example: /9j/4AAQSkZJRg== diff --git a/specification/resources/inference/models/messages_request_content_block_param.yml b/specification/resources/inference/models/messages_request_content_block_param.yml new file mode 100644 index 00000000..bc4aba45 --- /dev/null +++ b/specification/resources/inference/models/messages_request_content_block_param.yml @@ -0,0 +1,9 @@ +description: > + Structured message content. Block `type` is one of `text`, `image`, `tool_use`, or + `tool_result`. Some tool definitions may be rejected by server policy even when valid + here. +oneOf: + - $ref: messages_request_text_block_param.yml + - $ref: messages_image_block_param.yml + - $ref: messages_tool_use_block_param.yml + - $ref: messages_tool_result_block_param.yml diff --git a/specification/resources/inference/models/messages_request_text_block_param.yml b/specification/resources/inference/models/messages_request_text_block_param.yml new file mode 100644 index 00000000..85390a00 --- /dev/null +++ b/specification/resources/inference/models/messages_request_text_block_param.yml @@ -0,0 +1,16 @@ +type: object +description: A text content block in a request message. +required: + - type + - text +properties: + type: + type: string + enum: + - text + description: Block type identifier. + example: text + text: + type: string + description: Plain text for this block. + example: Hello, Claude diff --git a/specification/resources/inference/models/messages_response_content_block.yml b/specification/resources/inference/models/messages_response_content_block.yml new file mode 100644 index 00000000..7ae75daa --- /dev/null +++ b/specification/resources/inference/models/messages_response_content_block.yml @@ -0,0 +1,4 @@ +description: Assistant output block (text and tool calls). +oneOf: + - $ref: messages_response_text_block.yml + - $ref: messages_response_tool_use_block.yml diff --git a/specification/resources/inference/models/messages_response_text_block.yml b/specification/resources/inference/models/messages_response_text_block.yml new file mode 100644 index 00000000..d4b165b0 --- /dev/null +++ b/specification/resources/inference/models/messages_response_text_block.yml @@ -0,0 +1,15 @@ +type: object +description: A text block in an assistant message in the response. +required: + - type + - text +properties: + type: + type: string + enum: + - text + example: text + text: + type: string + description: Generated text for this block. + example: Hi! How can I help? diff --git a/specification/resources/inference/models/messages_response_tool_use_block.yml b/specification/resources/inference/models/messages_response_tool_use_block.yml new file mode 100644 index 00000000..85bf5040 --- /dev/null +++ b/specification/resources/inference/models/messages_response_tool_use_block.yml @@ -0,0 +1,23 @@ +type: object +description: Tool call emitted by the assistant in the response. +required: + - type + - id + - name + - input +properties: + type: + type: string + enum: + - tool_use + example: tool_use + id: + type: string + example: toolu_01ABCdefGhIjKlMnOpQrStUv + name: + type: string + example: get_weather + input: + type: object + additionalProperties: true + description: Arguments for the tool invocation. diff --git a/specification/resources/inference/models/messages_stream_event.yml b/specification/resources/inference/models/messages_stream_event.yml new file mode 100644 index 00000000..d296a957 --- /dev/null +++ b/specification/resources/inference/models/messages_stream_event.yml @@ -0,0 +1,47 @@ +type: object +description: > + One server-sent event (SSE) JSON payload when `stream` is true. Each event line contains + a JSON object after the `data:` prefix. +required: + - type +properties: + type: + type: string + description: SSE event type. + enum: + - message_start + - content_block_start + - content_block_delta + - content_block_stop + - message_delta + - message_stop + - ping + message: + type: object + description: Present on `message_start`; initial message metadata. + additionalProperties: true + index: + type: integer + description: Index of the content block this event refers to. + content_block: + type: object + description: Present on `content_block_start`. + additionalProperties: true + delta: + type: object + description: Present on `content_block_delta` and `message_delta`. + additionalProperties: true + usage: + type: object + description: Streamed usage (for example on `message_delta`). + properties: + output_tokens: + type: integer + example: 64 + speed: + type: string + nullable: true + enum: + - standard + - fast + example: fast diff --git a/specification/resources/inference/models/messages_thinking_config_param.yml b/specification/resources/inference/models/messages_thinking_config_param.yml new file mode 100644 index 00000000..f2739fd9 --- /dev/null +++ b/specification/resources/inference/models/messages_thinking_config_param.yml @@ -0,0 +1,12 @@ +type: object +nullable: true +description: > + Extended thinking configuration. Executors that do not support thinking may ignore this + field. +required: + - type +properties: + type: + type: string + description: Thinking mode discriminator (for example enabled or disabled). + example: enabled diff --git a/specification/resources/inference/models/messages_tool_choice_param.yml b/specification/resources/inference/models/messages_tool_choice_param.yml new file mode 100644 index 00000000..921f9ceb --- /dev/null +++ b/specification/resources/inference/models/messages_tool_choice_param.yml @@ -0,0 +1,22 @@ +description: > + Controls how the model uses tools: automatic selection, require any tool, force a specific + tool, or a string form accepted by the service. +oneOf: + - type: string + example: auto + - type: object + required: + - type + properties: + type: + type: string + enum: + - auto + - any + - tool + description: Tool selection mode. + example: tool + name: + type: string + description: When `type` is `tool`, the tool name to use. + example: get_weather diff --git a/specification/resources/inference/models/messages_tool_definition_param.yml b/specification/resources/inference/models/messages_tool_definition_param.yml new file mode 100644 index 00000000..6f566a25 --- /dev/null +++ b/specification/resources/inference/models/messages_tool_definition_param.yml @@ -0,0 +1,18 @@ +type: object +description: Tool definition the model may call (`name`, JSON Schema for `input`). +required: + - name + - input_schema +properties: + name: + type: string + description: Tool name referenced in `tool_use` blocks. + example: get_weather + description: + type: string + description: Human-readable description of what the tool does. + example: Get the current weather for a location. + input_schema: + type: object + description: JSON Schema (draft 2020-12 style) describing the tool input object. + additionalProperties: true diff --git a/specification/resources/inference/models/messages_tool_result_block_param.yml b/specification/resources/inference/models/messages_tool_result_block_param.yml new file mode 100644 index 00000000..adb4fc47 --- /dev/null +++ b/specification/resources/inference/models/messages_tool_result_block_param.yml @@ -0,0 +1,24 @@ +type: object +description: Result for a prior `tool_use` block, returned to the model. +required: + - type + - tool_use_id + - content +properties: + type: + type: string + enum: + - tool_result + example: tool_result + tool_use_id: + type: string + example: toolu_01ABCdefGhIjKlMnOpQrStUv + content: + description: Tool output as plain text or structured objects. + example: 72°F and sunny + oneOf: + - type: string + - type: array + items: + type: object + additionalProperties: true diff --git a/specification/resources/inference/models/messages_tool_use_block_param.yml b/specification/resources/inference/models/messages_tool_use_block_param.yml new file mode 100644 index 00000000..e47630c2 --- /dev/null +++ b/specification/resources/inference/models/messages_tool_use_block_param.yml @@ -0,0 +1,23 @@ +type: object +description: Tool invocation block in a request message (from the model or a prior turn). +required: + - type + - id + - name + - input +properties: + type: + type: string + enum: + - tool_use + example: tool_use + id: + type: string + example: toolu_01ABCdefGhIjKlMnOpQrStUv + name: + type: string + example: get_weather + input: + type: object + description: JSON object matching the tool's `input_schema`. + additionalProperties: true diff --git a/specification/resources/inference/models/messages_usage.yml b/specification/resources/inference/models/messages_usage.yml new file mode 100644 index 00000000..a6f31885 --- /dev/null +++ b/specification/resources/inference/models/messages_usage.yml @@ -0,0 +1,30 @@ +type: object +description: Token usage for a non-streaming `POST /v1/messages` response. +required: + - input_tokens + - output_tokens +properties: + input_tokens: + type: integer + description: Number of input tokens billed for this request. + example: 42 + output_tokens: + type: integer + description: Number of output tokens generated. + example: 128 + cache_creation_input_tokens: + type: integer + description: Input tokens used to create a prompt cache entry, if applicable. + example: 0 + cache_read_input_tokens: + type: integer + description: Input tokens read from a prompt cache, if applicable. + example: 0 + speed: + type: string + nullable: true + description: Inference speed tier reflected in billing or routing. + enum: + - standard + - fast + example: standard