Skip to content

Commit 2151063

Browse files
feat(api): realtime 2
1 parent a5c4184 commit 2151063

7 files changed

Lines changed: 114 additions & 50 deletions

File tree

.stats.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 232
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai/openai-08cb8ed18dfe4a9fa518e278576d3cfe5710cb5c22789cf80826c900569bcf56.yml
3-
openapi_spec_hash: 20f820c94f54741b75d719f6a7371c12
4-
config_hash: f291a449469edfe61a28424e548899b2
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai/openai-371f497afe4d6070f6e252e5febbe8f453c7058a8dff0c26a01b4d88442a4ac2.yml
3+
openapi_spec_hash: d39f46e8fda45f77096448105efd175a
4+
config_hash: b64135fff1fe9cf4069b9ecf59ae8b07

src/resources/realtime/api.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ Types:
5656
- <code><a href="./src/resources/realtime/realtime.ts">RealtimeMcpToolCall</a></code>
5757
- <code><a href="./src/resources/realtime/realtime.ts">RealtimeMcpToolExecutionError</a></code>
5858
- <code><a href="./src/resources/realtime/realtime.ts">RealtimeMcphttpError</a></code>
59+
- <code><a href="./src/resources/realtime/realtime.ts">RealtimeReasoning</a></code>
60+
- <code><a href="./src/resources/realtime/realtime.ts">RealtimeReasoningEffort</a></code>
5961
- <code><a href="./src/resources/realtime/realtime.ts">RealtimeResponse</a></code>
6062
- <code><a href="./src/resources/realtime/realtime.ts">RealtimeResponseCreateAudioOutput</a></code>
6163
- <code><a href="./src/resources/realtime/realtime.ts">RealtimeResponseCreateMcpTool</a></code>
@@ -124,7 +126,6 @@ Types:
124126

125127
Types:
126128

127-
- <code><a href="./src/resources/realtime/client-secrets.ts">RealtimeSessionClientSecret</a></code>
128129
- <code><a href="./src/resources/realtime/client-secrets.ts">RealtimeSessionCreateResponse</a></code>
129130
- <code><a href="./src/resources/realtime/client-secrets.ts">RealtimeTranscriptionSessionCreateResponse</a></code>
130131
- <code><a href="./src/resources/realtime/client-secrets.ts">RealtimeTranscriptionSessionTurnDetection</a></code>

src/resources/realtime/calls.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ export interface CallAcceptParams {
134134
| (string & {})
135135
| 'gpt-realtime'
136136
| 'gpt-realtime-1.5'
137+
| 'gpt-realtime-2'
137138
| 'gpt-realtime-2025-08-28'
138139
| 'gpt-4o-realtime-preview'
139140
| 'gpt-4o-realtime-preview-2024-10-01'
@@ -157,12 +158,23 @@ export interface CallAcceptParams {
157158
*/
158159
output_modalities?: Array<'text' | 'audio'>;
159160

161+
/**
162+
* Whether the model may call multiple tools in parallel. Only supported by
163+
* reasoning Realtime models such as `gpt-realtime-2`.
164+
*/
165+
parallel_tool_calls?: boolean;
166+
160167
/**
161168
* Reference to a prompt template and its variables.
162169
* [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
163170
*/
164171
prompt?: ResponsesAPI.ResponsePrompt | null;
165172

173+
/**
174+
* Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.
175+
*/
176+
reasoning?: RealtimeAPI.RealtimeReasoning;
177+
166178
/**
167179
* How the model chooses tools. Provide one of the string modes or force a specific
168180
* function/MCP tool.

src/resources/realtime/client-secrets.ts

Lines changed: 25 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -41,32 +41,18 @@ export class ClientSecrets extends APIResource {
4141
}
4242

4343
/**
44-
* Ephemeral key returned by the API.
44+
* A Realtime session configuration object.
4545
*/
46-
export interface RealtimeSessionClientSecret {
47-
/**
48-
* Timestamp for when the token expires. Currently, all tokens expire after one
49-
* minute.
50-
*/
51-
expires_at: number;
52-
46+
export interface RealtimeSessionCreateResponse {
5347
/**
54-
* Ephemeral key usable in client environments to authenticate connections to the
55-
* Realtime API. Use this in client-side environments rather than a standard API
56-
* token, which should only be used server-side.
48+
* Unique identifier for the session that looks like `sess_1234567890abcdef`.
5749
*/
58-
value: string;
59-
}
50+
id: string;
6051

61-
/**
62-
* A new Realtime session configuration, with an ephemeral key. Default TTL for
63-
* keys is one minute.
64-
*/
65-
export interface RealtimeSessionCreateResponse {
6652
/**
67-
* Ephemeral key returned by the API.
53+
* The object type. Always `realtime.session`.
6854
*/
69-
client_secret: RealtimeSessionClientSecret;
55+
object: 'realtime.session';
7056

7157
/**
7258
* The type of session to create. Always `realtime` for the Realtime API.
@@ -78,6 +64,11 @@ export interface RealtimeSessionCreateResponse {
7864
*/
7965
audio?: RealtimeSessionCreateResponse.Audio;
8066

67+
/**
68+
* Expiration timestamp for the session, in seconds since epoch.
69+
*/
70+
expires_at?: number;
71+
8172
/**
8273
* Additional fields to include in server outputs.
8374
*
@@ -115,6 +106,7 @@ export interface RealtimeSessionCreateResponse {
115106
| (string & {})
116107
| 'gpt-realtime'
117108
| 'gpt-realtime-1.5'
109+
| 'gpt-realtime-2'
118110
| 'gpt-realtime-2025-08-28'
119111
| 'gpt-4o-realtime-preview'
120112
| 'gpt-4o-realtime-preview-2024-10-01'
@@ -144,6 +136,11 @@ export interface RealtimeSessionCreateResponse {
144136
*/
145137
prompt?: ResponsesAPI.ResponsePrompt | null;
146138

139+
/**
140+
* Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.
141+
*/
142+
reasoning?: RealtimeAPI.RealtimeReasoning;
143+
147144
/**
148145
* How the model chooses tools. Provide one of the string modes or force a specific
149146
* function/MCP tool.
@@ -215,16 +212,6 @@ export namespace RealtimeSessionCreateResponse {
215212
*/
216213
noise_reduction?: Input.NoiseReduction;
217214

218-
/**
219-
* Configuration for input audio transcription, defaults to off and can be set to
220-
* `null` to turn off once on. Input audio transcription is not native to the
221-
* model, since the model consumes audio directly. Transcription runs
222-
* asynchronously through
223-
* [the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
224-
* and should be treated as guidance of input audio content rather than precisely
225-
* what the model heard. The client can optionally set the language and prompt for
226-
* transcription, these offer additional guidance to the transcription service.
227-
*/
228215
transcription?: RealtimeAPI.AudioTranscription;
229216

230217
/**
@@ -241,6 +228,9 @@ export namespace RealtimeSessionCreateResponse {
241228
* trails off with "uhhm", the model will score a low probability of turn end and
242229
* wait longer for the user to continue speaking. This can be useful for more
243230
* natural conversations, but may have a higher latency.
231+
*
232+
* For `gpt-realtime-whisper` transcription sessions, turn detection must be set to
233+
* `null`; VAD is not supported.
244234
*/
245235
turn_detection?: Input.ServerVad | Input.SemanticVad | null;
246236
}
@@ -640,17 +630,15 @@ export namespace RealtimeTranscriptionSessionCreateResponse {
640630
*/
641631
noise_reduction?: Input.NoiseReduction;
642632

643-
/**
644-
* Configuration of the transcription model.
645-
*/
646633
transcription?: RealtimeAPI.AudioTranscription;
647634

648635
/**
649636
* Configuration for turn detection. Can be set to `null` to turn off. Server VAD
650637
* means that the model will detect the start and end of speech based on audio
651-
* volume and respond at the end of user speech.
638+
* volume and respond at the end of user speech. For `gpt-realtime-whisper`, this
639+
* must be `null`; VAD is not supported.
652640
*/
653-
turn_detection?: ClientSecretsAPI.RealtimeTranscriptionSessionTurnDetection;
641+
turn_detection?: ClientSecretsAPI.RealtimeTranscriptionSessionTurnDetection | null;
654642
}
655643

656644
export namespace Input {
@@ -672,7 +660,8 @@ export namespace RealtimeTranscriptionSessionCreateResponse {
672660
/**
673661
* Configuration for turn detection. Can be set to `null` to turn off. Server VAD
674662
* means that the model will detect the start and end of speech based on audio
675-
* volume and respond at the end of user speech.
663+
* volume and respond at the end of user speech. For `gpt-realtime-whisper`, this
664+
* must be `null`; VAD is not supported.
676665
*/
677666
export interface RealtimeTranscriptionSessionTurnDetection {
678667
/**
@@ -763,7 +752,6 @@ export namespace ClientSecretCreateParams {
763752

764753
export declare namespace ClientSecrets {
765754
export {
766-
type RealtimeSessionClientSecret as RealtimeSessionClientSecret,
767755
type RealtimeSessionCreateResponse as RealtimeSessionCreateResponse,
768756
type RealtimeTranscriptionSessionCreateResponse as RealtimeTranscriptionSessionCreateResponse,
769757
type RealtimeTranscriptionSessionTurnDetection as RealtimeTranscriptionSessionTurnDetection,

src/resources/realtime/index.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
export { Calls, type CallAcceptParams, type CallReferParams, type CallRejectParams } from './calls';
44
export {
55
ClientSecrets,
6-
type RealtimeSessionClientSecret,
76
type RealtimeSessionCreateResponse,
87
type RealtimeTranscriptionSessionCreateResponse,
98
type RealtimeTranscriptionSessionTurnDetection,

0 commit comments

Comments
 (0)