Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions specification/DigitalOcean-public.v2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,12 @@ tags:
description: |-
Droplet autoscale pools manage automatic horizontal scaling for your applications based on resource usage (CPU, memory, or both) or a static configuration.

- name: Embeddings
description: |-
Text embedding vectors via `POST /v1/embeddings` on the
[Serverless Inference](https://docs.digitalocean.com/reference/api/api-reference/#tag/Serverless-Inference) base URL
`https://inference.do-ai.run` (bearer model access key).

- name: Firewalls
description: |-
[DigitalOcean Cloud Firewalls](https://docs.digitalocean.com/products/networking/firewalls/)
Expand Down Expand Up @@ -2841,6 +2847,14 @@ paths:
post:
$ref: 'resources/inference/inference_create_chat_completion.yml'

/v1/messages:
post:
$ref: 'resources/inference/inference_create_messages.yml'

/v1/embeddings:
post:
$ref: 'resources/inference/inference_create_embeddings.yml'

/api/v1/chat/completions:
post:
$ref: 'resources/inference/agent_inference_create_chat_completion.yml'
Expand Down Expand Up @@ -2958,6 +2972,14 @@ components:
curl -X POST -H "Authorization: Bearer $MODEL_ACCESS_KEY" "https://inference.do-ai.run/v1/chat/completions"
```

```
curl -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $MODEL_ACCESS_KEY" -d '{"model":"claude-opus-4-6","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}' "https://inference.do-ai.run/v1/messages"
```

```
curl -X POST -H "Content-Type: application/json" -H "Authorization: Bearer $MODEL_ACCESS_KEY" -d '{"model":"qwen3-embedding-0.6b","input":["hello world","goodbye world"],"encoding_format":"float","user":"user-1234"}' "https://inference.do-ai.run/v1/embeddings"
```

**Agent Inference:**

```
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
lang: cURL
source: |-
curl -X POST \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $MODEL_ACCESS_KEY" \
-d '{"model":"qwen3-embedding-0.6b","input":["hello world","goodbye world"],"encoding_format":"float","user":"user-1234"}' \
"https://inference.do-ai.run/v1/embeddings"
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
lang: cURL
source: |-
curl -X POST \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $MODEL_ACCESS_KEY" \
-d '{"model": "claude-opus-4-6", "max_tokens": 1024, "messages": [{"role": "user", "content": "What is the capital of Portugal?"}]}' \
"https://inference.do-ai.run/v1/messages"
44 changes: 44 additions & 0 deletions specification/resources/inference/inference_create_embeddings.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
operationId: inference_create_embedding
summary: Create embedding
description: >
Create vector embeddings for one or more text inputs. OpenAI-compatible request and
response. Unknown fields in the request body are rejected. There is no streaming
response for this endpoint.
tags:
- Serverless Inference
- Embeddings
servers:
- url: "https://inference.do-ai.run"
description: production
requestBody:
required: true
content:
application/json:
schema:
$ref: "models/embeddings_request.yml"
responses:
"200":
description: Embeddings and usage for the given `input` or `inputs`, in order.
headers:
ratelimit-limit:
$ref: '../../shared/headers.yml#/ratelimit-limit'
ratelimit-remaining:
$ref: '../../shared/headers.yml#/ratelimit-remaining'
ratelimit-reset:
$ref: '../../shared/headers.yml#/ratelimit-reset'
content:
application/json:
schema:
$ref: "models/embeddings_response.yml"
"401":
$ref: '../../shared/responses/unauthorized.yml'
"429":
$ref: '../../shared/responses/too_many_requests.yml'
"500":
$ref: '../../shared/responses/server_error.yml'
default:
$ref: '../../shared/responses/unexpected_error.yml'
x-codeSamples:
- $ref: 'examples/curl/inference_create_embeddings.yml'
security:
- inference_bearer_auth: []
62 changes: 62 additions & 0 deletions specification/resources/inference/inference_create_messages.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
operationId: inference_create_messages
summary: Create the next assistant message
description: >
Send a structured list of input messages with text and/or image content, and the model
will generate the next message in the conversation.
tags:
- Serverless Inference
servers:
- url: "https://inference.do-ai.run"
description: production
x-inference-base-url: "https://inference.do-ai.run"
requestBody:
required: true
content:
application/json:
schema:
$ref: "models/messages_create_request.yml"
responses:
"200":
description: >
Successful generation. When `stream` is true, the body is `text/event-stream` with
server-sent event (SSE) payloads; otherwise `application/json` with
`CreateMessageResponse`.
headers:
ratelimit-limit:
$ref: '../../shared/headers.yml#/ratelimit-limit'
ratelimit-remaining:
$ref: '../../shared/headers.yml#/ratelimit-remaining'
ratelimit-reset:
$ref: '../../shared/headers.yml#/ratelimit-reset'
content:
application/json:
schema:
$ref: "models/messages_create_response.yml"
text/event-stream:
schema:
$ref: "models/messages_stream_event.yml"
"400":
description: Invalid request body, validation error, or policy rejection.
headers:
ratelimit-limit:
$ref: '../../shared/headers.yml#/ratelimit-limit'
ratelimit-remaining:
$ref: '../../shared/headers.yml#/ratelimit-remaining'
ratelimit-reset:
$ref: '../../shared/headers.yml#/ratelimit-reset'
content:
application/json:
schema:
$ref: "models/messages_create_error_response.yml"
"401":
$ref: '../../shared/responses/unauthorized.yml'
"429":
$ref: '../../shared/responses/too_many_requests.yml'
"500":
$ref: '../../shared/responses/server_error.yml'
default:
$ref: '../../shared/responses/unexpected_error.yml'
x-codeSamples:
- $ref: 'examples/curl/inference_create_messages.yml'
security:
- inference_bearer_auth: []
29 changes: 29 additions & 0 deletions specification/resources/inference/models/embedding_data_item.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
type: object
description: One row in the embeddings `data` array, aligned with a single `input` item.
required:
- index
- object
- embedding
properties:
index:
type: integer
description: Zero-based index of the corresponding `input` item (0 when `input` is a string).
example: 0
object:
type: string
description: The object type, which is always `embedding`.
enum:
- embedding
example: embedding
embedding:
description: The embedding vector, or a base64-encoded string when the request set encoding_format to base64.
example: [0.0123, -0.0456, 0.0001]
oneOf:
- type: array
description: Float vector when encoding_format is float or omitted.
items:
type: number
example: [0.0123, -0.0456, 0.0001]
- type: string
description: Base64 payload when encoding_format is base64.
example: AGZ...encoded...
35 changes: 35 additions & 0 deletions specification/resources/inference/models/embeddings_request.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
type: object
description: Request body for `POST /v1/embeddings` (OpenAI-compatible). Extra fields are rejected.
required:
- model
- input
additionalProperties: false
properties:
model:
type: string
description: Model id to use for embeddings. Must match a model your account can access.
example: qwen3-embedding-0.6b
input:
description: A single string or 1–2048 strings; each string produces one row in `data`, in order.
example: hello world
oneOf:
- type: string
example: hello world
- type: array
minItems: 1
maxItems: 2048
items:
type: string
example: ["hello world", "goodbye world"]
user:
type: string
description: Optional end-user identifier to help with abuse monitoring.
example: user-1234
encoding_format:
type: string
description: How embedding values are returned in each `data[].embedding` field.
enum:
- float
- base64
default: float
example: float
25 changes: 25 additions & 0 deletions specification/resources/inference/models/embeddings_response.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
type: object
description: OpenAI-style embeddings response.
required:
- object
- model
- data
- usage
properties:
object:
type: string
description: The object type, which is always the string `list`.
enum:
- list
example: list
model:
type: string
description: The embedding model that produced the vectors.
example: qwen3-embedding-0.6b
data:
type: array
description: One entry for each `input` string, in the same order.
items:
$ref: embedding_data_item.yml
usage:
$ref: embeddings_usage.yml
14 changes: 14 additions & 0 deletions specification/resources/inference/models/embeddings_usage.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
type: object
description: Token usage for the embeddings request.
required:
- prompt_tokens
- total_tokens
properties:
prompt_tokens:
type: integer
description: Number of input tokens used for the embedding.
example: 6
total_tokens:
type: integer
description: Total billable tokens for the request.
example: 6
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
type: object
description: >
One turn in the conversation. Roles are `user` or `assistant` (no `system` role; use the
top-level `system` field). Content may be a string (equivalent to a single text block) or
an array of content blocks.
required:
- role
- content
properties:
role:
type: string
description: Speaker role for this message.
enum:
- user
- assistant
example: user
content:
description: Message body as plain text or structured blocks.
example: What is the capital of Portugal?
oneOf:
- type: string
- type: array
items:
$ref: messages_request_content_block_param.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
type: object
description: Error envelope for some failures from this endpoint.
required:
- type
- error
properties:
type:
type: string
enum:
- error
error:
type: object
required:
- type
- message
properties:
type:
type: string
description: Machine-readable error code.
example: invalid_request_error
message:
type: string
description: Human-readable error message.
example: max_tokens must be positive
Loading
Loading