fix gpt-5, and newer openai models that require maxcompletiontokens. (#2264)

sawka · web-flow · commit 462a1b68eb29 · 2025-08-19T13:30:11.000-07:00
diff --git a/docs/docs/ai-presets.mdx b/docs/docs/ai-presets.mdx
@@ -70,6 +70,22 @@ To use Claude models, create a preset like this:
 }
 ```
 
+### OpenAI
+
+To use OpenAI's models:
+
+```json
+{
+  "ai@openai-gpt41": {
+    "display:name": "GPT-4.1",
+    "display:order": 2,
+    "ai:*": true,
+    "ai:model": "gpt-4.1",
+    "ai:apitoken": "<your OpenAI API key>"
+  }
+}
+```
+
 ### Local LLMs (Ollama)
 
 To connect to a local Ollama instance:
@@ -78,7 +94,7 @@ To connect to a local Ollama instance:
 {
   "ai@ollama-llama": {
     "display:name": "Ollama - Llama2",
-    "display:order": 2,
+    "display:order": 3,
     "ai:*": true,
     "ai:baseurl": "http://localhost:11434/v1",
     "ai:name": "llama2",
@@ -98,7 +114,7 @@ To connect to Azure AI services:
 {
   "ai@azure-gpt4": {
     "display:name": "Azure GPT-4",
-    "display:order": 3,
+    "display:order": 4,
     "ai:*": true,
     "ai:apitype": "azure",
     "ai:baseurl": "<your Azure AI base URL>",
@@ -118,7 +134,7 @@ To use Perplexity's models:
 {
   "ai@perplexity-sonar": {
     "display:name": "Perplexity Sonar",
-    "display:order": 4,
+    "display:order": 5,
     "ai:*": true,
     "ai:apitype": "perplexity",
     "ai:model": "llama-3.1-sonar-small-128k-online",
@@ -135,7 +151,7 @@ To use Google's Gemini models from [Google AI Studio](https://aistudio.google.co
 {
   "ai@gemini-2.0": {
     "display:name": "Gemini 2.0",
-    "display:order": 5,
+    "display:order": 6,
     "ai:*": true,
     "ai:apitype": "google",
     "ai:model": "gemini-2.0-flash-exp",
@@ -158,9 +174,16 @@ You can define multiple presets in your `ai.json` file:
     "ai:model": "claude-3-5-sonnet-latest",
     "ai:apitoken": "<your anthropic API key>"
   },
+  "ai@openai-gpt41": {
+    "display:name": "GPT-4.1",
+    "display:order": 2,
+    "ai:*": true,
+    "ai:model": "gpt-4.1",
+    "ai:apitoken": "<your OpenAI API key>"
+  },
   "ai@ollama-llama": {
     "display:name": "Ollama - Llama2",
-    "display:order": 2,
+    "display:order": 3,
     "ai:*": true,
     "ai:baseurl": "http://localhost:11434/v1",
     "ai:name": "llama2",
@@ -169,7 +192,7 @@ You can define multiple presets in your `ai.json` file:
   },
   "ai@perplexity-sonar": {
     "display:name": "Perplexity Sonar",
-    "display:order": 3,
+    "display:order": 4,
     "ai:*": true,
     "ai:apitype": "perplexity",
     "ai:model": "llama-3.1-sonar-small-128k-online",
@@ -187,3 +210,23 @@ Remember to set your default preset in `settings.json`:
   "ai:preset": "ai@claude-sonnet"
 }
 ```
+
+## Using a Proxy
+
+If you need to route AI requests through an HTTP proxy, you can add the `ai:proxyurl` setting to any preset:
+
+```json
+{
+  "ai@claude-with-proxy": {
+    "display:name": "Claude 3 Sonnet (via Proxy)",
+    "display:order": 1,
+    "ai:*": true,
+    "ai:apitype": "anthropic",
+    "ai:model": "claude-3-5-sonnet-latest",
+    "ai:apitoken": "<your anthropic API key>",
+    "ai:proxyurl": "http://proxy.example.com:8080"
+  }
+}
+```
+
+The proxy URL should be in the format `http://host:port` or `https://host:port`. This setting works with all AI providers except Wave Cloud AI (the default).
diff --git a/docs/docs/config.mdx b/docs/docs/config.mdx
@@ -44,8 +44,9 @@ wsh editconfig
 | ai:model                             | string   | model name to pass to API                                                                                                                                                                                                                                     |
 | ai:apiversion                        | string   | for Azure AI only (when apitype is "azure", this will default to "2023-05-15")                                                                                                                                                                                |
 | ai:orgid                             | string   |                                                                                                                                                                                                                                                               |
-| ai:maxtokens                         | int      | max tokens to pass to API                                                                                                                                                                                                                                     |
-| ai:timeoutms                         | int      | timeout (in milliseconds) for AI calls                                                                                                                                                                                                                        |
+| ai:maxtokens                         | int      | max tokens to pass to API                                                                                                                                                     |
+| ai:timeoutms                         | int      | timeout (in milliseconds) for AI calls                                                                                                                                        |
+| ai:proxyurl                          | string   | HTTP proxy URL for AI API requests (does not apply to Wave Cloud AI)                                                                                                          |
 | conn:askbeforewshinstall             | bool     | set to false to disable popup asking if you want to install wsh extensions on new machines                                                                                                                                                                    |
 | term:fontsize                        | float    | the fontsize for the terminal block                                                                                                                                                                                                                           |
 | term:fontfamily                      | string   | font family to use for terminal block                                                                                                                                                                                                                         |
diff --git a/pkg/waveai/openaibackend.go b/pkg/waveai/openaibackend.go
@@ -29,6 +29,14 @@ func defaultAzureMapperFn(model string) string {
 	return regexp.MustCompile(`[.:]`).ReplaceAllString(model, "")
 }
 
+func isReasoningModel(model string) bool {
+	m := strings.ToLower(model)
+	return strings.HasPrefix(m, "o1") ||
+		strings.HasPrefix(m, "o3") ||
+		strings.HasPrefix(m, "o4") ||
+		strings.HasPrefix(m, "gpt-5")
+}
+
 func setApiType(opts *wshrpc.WaveAIOptsType, clientConfig *openaiapi.ClientConfig) error {
 	ourApiType := strings.ToLower(opts.APIType)
 	if ourApiType == "" || ourApiType == APIType_OpenAI || ourApiType == strings.ToLower(string(openaiapi.APITypeOpenAI)) {
@@ -123,38 +131,14 @@ func (OpenAIBackend) StreamCompletion(ctx context.Context, request wshrpc.WaveAI
 			Messages: convertPrompt(request.Prompt),
 		}
 
-		// Handle o1 models differently - use non-streaming API
-		if strings.HasPrefix(request.Opts.Model, "o1-") {
+		// Set MaxCompletionTokens for reasoning models, MaxTokens for others
+		if isReasoningModel(request.Opts.Model) {
 			req.MaxCompletionTokens = request.Opts.MaxTokens
-			req.Stream = false
-
-			// Make non-streaming API call
-			resp, err := client.CreateChatCompletion(ctx, req)
-			if err != nil {
-				rtn <- makeAIError(fmt.Errorf("error calling openai API: %v", err))
-				return
-			}
-
-			// Send header packet
-			headerPk := MakeWaveAIPacket()
-			headerPk.Model = resp.Model
-			headerPk.Created = resp.Created
-			rtn <- wshrpc.RespOrErrorUnion[wshrpc.WaveAIPacketType]{Response: *headerPk}
-
-			// Send content packet(s)
-			for i, choice := range resp.Choices {
-				pk := MakeWaveAIPacket()
-				pk.Index = i
-				pk.Text = choice.Message.Content
-				pk.FinishReason = string(choice.FinishReason)
-				rtn <- wshrpc.RespOrErrorUnion[wshrpc.WaveAIPacketType]{Response: *pk}
-			}
-			return
+		} else {
+			req.MaxTokens = request.Opts.MaxTokens
 		}
 
-		// Original streaming implementation for non-o1 models
 		req.Stream = true
-		req.MaxTokens = request.Opts.MaxTokens
 		if request.Opts.MaxChoices > 1 {
 			req.N = request.Opts.MaxChoices
 		}