@@ -316,13 +316,17 @@ func readSSE(t *testing.T, resp *http.Response) (content string, chunks int, got
316316}
317317
318318// chatCompletion sends a non-streaming chat request and returns the response.
319+ // A small max_tokens cap is applied to prevent runaway generation from
320+ // causing test timeouts.
319321func chatCompletion (t * testing.T , model , prompt string ) desktop.OpenAIChatResponse {
320322 t .Helper ()
323+ maxTokens := 64
321324 status , body := doJSON (t , http .MethodPost , serverURL + "/engines/v1/chat/completions" ,
322325 desktop.OpenAIChatRequest {
323- Model : model ,
324- Messages : []desktop.OpenAIChatMessage {{Role : "user" , Content : prompt }},
325- Stream : false ,
326+ Model : model ,
327+ Messages : []desktop.OpenAIChatMessage {{Role : "user" , Content : prompt }},
328+ Stream : false ,
329+ MaxTokens : & maxTokens ,
326330 })
327331 if status != http .StatusOK {
328332 t .Fatalf ("chat completion failed: status=%d body=%s" , status , body )
@@ -335,12 +339,16 @@ func chatCompletion(t *testing.T, model, prompt string) desktop.OpenAIChatRespon
335339}
336340
337341// streamingChatCompletion sends a streaming chat request and validates the SSE response.
342+ // A small max_tokens cap is applied to prevent runaway generation from
343+ // causing test timeouts.
338344func streamingChatCompletion (t * testing.T , model , prompt string ) string {
339345 t .Helper ()
346+ maxTokens := 64
340347 data , err := json .Marshal (desktop.OpenAIChatRequest {
341- Model : model ,
342- Messages : []desktop.OpenAIChatMessage {{Role : "user" , Content : prompt }},
343- Stream : true ,
348+ Model : model ,
349+ Messages : []desktop.OpenAIChatMessage {{Role : "user" , Content : prompt }},
350+ Stream : true ,
351+ MaxTokens : & maxTokens ,
344352 })
345353 if err != nil {
346354 t .Fatalf ("marshal: %v" , err )
0 commit comments