Skip to content

Commit 462a1b6

Browse files
authored
fix gpt-5, and newer openai models that require maxcompletiontokens. (#2264)
1 parent 39d794a commit 462a1b6

3 files changed

Lines changed: 64 additions & 36 deletions

File tree

docs/docs/ai-presets.mdx

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,22 @@ To use Claude models, create a preset like this:
7070
}
7171
```
7272

73+
### OpenAI
74+
75+
To use OpenAI's models:
76+
77+
```json
78+
{
79+
"ai@openai-gpt41": {
80+
"display:name": "GPT-4.1",
81+
"display:order": 2,
82+
"ai:*": true,
83+
"ai:model": "gpt-4.1",
84+
"ai:apitoken": "<your OpenAI API key>"
85+
}
86+
}
87+
```
88+
7389
### Local LLMs (Ollama)
7490

7591
To connect to a local Ollama instance:
@@ -78,7 +94,7 @@ To connect to a local Ollama instance:
7894
{
7995
"ai@ollama-llama": {
8096
"display:name": "Ollama - Llama2",
81-
"display:order": 2,
97+
"display:order": 3,
8298
"ai:*": true,
8399
"ai:baseurl": "http://localhost:11434/v1",
84100
"ai:name": "llama2",
@@ -98,7 +114,7 @@ To connect to Azure AI services:
98114
{
99115
"ai@azure-gpt4": {
100116
"display:name": "Azure GPT-4",
101-
"display:order": 3,
117+
"display:order": 4,
102118
"ai:*": true,
103119
"ai:apitype": "azure",
104120
"ai:baseurl": "<your Azure AI base URL>",
@@ -118,7 +134,7 @@ To use Perplexity's models:
118134
{
119135
"ai@perplexity-sonar": {
120136
"display:name": "Perplexity Sonar",
121-
"display:order": 4,
137+
"display:order": 5,
122138
"ai:*": true,
123139
"ai:apitype": "perplexity",
124140
"ai:model": "llama-3.1-sonar-small-128k-online",
@@ -135,7 +151,7 @@ To use Google's Gemini models from [Google AI Studio](https://aistudio.google.co
135151
{
136152
"ai@gemini-2.0": {
137153
"display:name": "Gemini 2.0",
138-
"display:order": 5,
154+
"display:order": 6,
139155
"ai:*": true,
140156
"ai:apitype": "google",
141157
"ai:model": "gemini-2.0-flash-exp",
@@ -158,9 +174,16 @@ You can define multiple presets in your `ai.json` file:
158174
"ai:model": "claude-3-5-sonnet-latest",
159175
"ai:apitoken": "<your anthropic API key>"
160176
},
177+
"ai@openai-gpt41": {
178+
"display:name": "GPT-4.1",
179+
"display:order": 2,
180+
"ai:*": true,
181+
"ai:model": "gpt-4.1",
182+
"ai:apitoken": "<your OpenAI API key>"
183+
},
161184
"ai@ollama-llama": {
162185
"display:name": "Ollama - Llama2",
163-
"display:order": 2,
186+
"display:order": 3,
164187
"ai:*": true,
165188
"ai:baseurl": "http://localhost:11434/v1",
166189
"ai:name": "llama2",
@@ -169,7 +192,7 @@ You can define multiple presets in your `ai.json` file:
169192
},
170193
"ai@perplexity-sonar": {
171194
"display:name": "Perplexity Sonar",
172-
"display:order": 3,
195+
"display:order": 4,
173196
"ai:*": true,
174197
"ai:apitype": "perplexity",
175198
"ai:model": "llama-3.1-sonar-small-128k-online",
@@ -187,3 +210,23 @@ Remember to set your default preset in `settings.json`:
187210
"ai:preset": "ai@claude-sonnet"
188211
}
189212
```
213+
214+
## Using a Proxy
215+
216+
If you need to route AI requests through an HTTP proxy, you can add the `ai:proxyurl` setting to any preset:
217+
218+
```json
219+
{
220+
"ai@claude-with-proxy": {
221+
"display:name": "Claude 3 Sonnet (via Proxy)",
222+
"display:order": 1,
223+
"ai:*": true,
224+
"ai:apitype": "anthropic",
225+
"ai:model": "claude-3-5-sonnet-latest",
226+
"ai:apitoken": "<your anthropic API key>",
227+
"ai:proxyurl": "http://proxy.example.com:8080"
228+
}
229+
}
230+
```
231+
232+
The proxy URL should be in the format `http://host:port` or `https://host:port`. This setting works with all AI providers except Wave Cloud AI (the default).

docs/docs/config.mdx

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,9 @@ wsh editconfig
4444
| ai:model | string | model name to pass to API |
4545
| ai:apiversion | string | for Azure AI only (when apitype is "azure", this will default to "2023-05-15") |
4646
| ai:orgid | string | |
47-
| ai:maxtokens | int | max tokens to pass to API |
48-
| ai:timeoutms | int | timeout (in milliseconds) for AI calls |
47+
| ai:maxtokens | int | max tokens to pass to API |
48+
| ai:timeoutms | int | timeout (in milliseconds) for AI calls |
49+
| ai:proxyurl | string | HTTP proxy URL for AI API requests (does not apply to Wave Cloud AI) |
4950
| conn:askbeforewshinstall | bool | set to false to disable popup asking if you want to install wsh extensions on new machines |
5051
| term:fontsize | float | the fontsize for the terminal block |
5152
| term:fontfamily | string | font family to use for terminal block |

pkg/waveai/openaibackend.go

Lines changed: 12 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,14 @@ func defaultAzureMapperFn(model string) string {
2929
return regexp.MustCompile(`[.:]`).ReplaceAllString(model, "")
3030
}
3131

32+
func isReasoningModel(model string) bool {
33+
m := strings.ToLower(model)
34+
return strings.HasPrefix(m, "o1") ||
35+
strings.HasPrefix(m, "o3") ||
36+
strings.HasPrefix(m, "o4") ||
37+
strings.HasPrefix(m, "gpt-5")
38+
}
39+
3240
func setApiType(opts *wshrpc.WaveAIOptsType, clientConfig *openaiapi.ClientConfig) error {
3341
ourApiType := strings.ToLower(opts.APIType)
3442
if ourApiType == "" || ourApiType == APIType_OpenAI || ourApiType == strings.ToLower(string(openaiapi.APITypeOpenAI)) {
@@ -123,38 +131,14 @@ func (OpenAIBackend) StreamCompletion(ctx context.Context, request wshrpc.WaveAI
123131
Messages: convertPrompt(request.Prompt),
124132
}
125133

126-
// Handle o1 models differently - use non-streaming API
127-
if strings.HasPrefix(request.Opts.Model, "o1-") {
134+
// Set MaxCompletionTokens for reasoning models, MaxTokens for others
135+
if isReasoningModel(request.Opts.Model) {
128136
req.MaxCompletionTokens = request.Opts.MaxTokens
129-
req.Stream = false
130-
131-
// Make non-streaming API call
132-
resp, err := client.CreateChatCompletion(ctx, req)
133-
if err != nil {
134-
rtn <- makeAIError(fmt.Errorf("error calling openai API: %v", err))
135-
return
136-
}
137-
138-
// Send header packet
139-
headerPk := MakeWaveAIPacket()
140-
headerPk.Model = resp.Model
141-
headerPk.Created = resp.Created
142-
rtn <- wshrpc.RespOrErrorUnion[wshrpc.WaveAIPacketType]{Response: *headerPk}
143-
144-
// Send content packet(s)
145-
for i, choice := range resp.Choices {
146-
pk := MakeWaveAIPacket()
147-
pk.Index = i
148-
pk.Text = choice.Message.Content
149-
pk.FinishReason = string(choice.FinishReason)
150-
rtn <- wshrpc.RespOrErrorUnion[wshrpc.WaveAIPacketType]{Response: *pk}
151-
}
152-
return
137+
} else {
138+
req.MaxTokens = request.Opts.MaxTokens
153139
}
154140

155-
// Original streaming implementation for non-o1 models
156141
req.Stream = true
157-
req.MaxTokens = request.Opts.MaxTokens
158142
if request.Opts.MaxChoices > 1 {
159143
req.N = request.Opts.MaxChoices
160144
}

0 commit comments

Comments
 (0)