Skip to content

Commit 4a4e6bc

Browse files
committed
Merge branch 'main' into use-llamacpp-images
2 parents 18cc59d + fd76359 commit 4a4e6bc

3 files changed

Lines changed: 20 additions & 10 deletions

File tree

cmd/cli/desktop/api.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,11 @@ type ImageURL struct {
4747
}
4848

4949
type OpenAIChatRequest struct {
50-
Model string `json:"model"`
51-
Messages []OpenAIChatMessage `json:"messages"`
52-
Stream bool `json:"stream"`
53-
Tools []Tool `json:"tools,omitempty"`
50+
Model string `json:"model"`
51+
Messages []OpenAIChatMessage `json:"messages"`
52+
Stream bool `json:"stream"`
53+
Tools []Tool `json:"tools,omitempty"`
54+
MaxTokens *int `json:"max_tokens,omitempty"`
5455
}
5556

5657
type OpenAIChatResponse struct {

e2e/e2e_test.go

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -316,13 +316,17 @@ func readSSE(t *testing.T, resp *http.Response) (content string, chunks int, got
316316
}
317317

318318
// chatCompletion sends a non-streaming chat request and returns the response.
319+
// A small max_tokens cap is applied to prevent runaway generation from
320+
// causing test timeouts.
319321
func chatCompletion(t *testing.T, model, prompt string) desktop.OpenAIChatResponse {
320322
t.Helper()
323+
maxTokens := 64
321324
status, body := doJSON(t, http.MethodPost, serverURL+"/engines/v1/chat/completions",
322325
desktop.OpenAIChatRequest{
323-
Model: model,
324-
Messages: []desktop.OpenAIChatMessage{{Role: "user", Content: prompt}},
325-
Stream: false,
326+
Model: model,
327+
Messages: []desktop.OpenAIChatMessage{{Role: "user", Content: prompt}},
328+
Stream: false,
329+
MaxTokens: &maxTokens,
326330
})
327331
if status != http.StatusOK {
328332
t.Fatalf("chat completion failed: status=%d body=%s", status, body)
@@ -335,12 +339,16 @@ func chatCompletion(t *testing.T, model, prompt string) desktop.OpenAIChatRespon
335339
}
336340

337341
// streamingChatCompletion sends a streaming chat request and validates the SSE response.
342+
// A small max_tokens cap is applied to prevent runaway generation from
343+
// causing test timeouts.
338344
func streamingChatCompletion(t *testing.T, model, prompt string) string {
339345
t.Helper()
346+
maxTokens := 64
340347
data, err := json.Marshal(desktop.OpenAIChatRequest{
341-
Model: model,
342-
Messages: []desktop.OpenAIChatMessage{{Role: "user", Content: prompt}},
343-
Stream: true,
348+
Model: model,
349+
Messages: []desktop.OpenAIChatMessage{{Role: "user", Content: prompt}},
350+
Stream: true,
351+
MaxTokens: &maxTokens,
344352
})
345353
if err != nil {
346354
t.Fatalf("marshal: %v", err)

pkg/distribution/builder/from_directory.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ func FromDirectory(dirPath string, opts ...DirectoryOption) (*Builder, error) {
233233
config.GGUF = extracted.GGUF
234234
config.Safetensors = extracted.Safetensors
235235
config.Diffusers = extracted.Diffusers
236+
config.ContextSize = extracted.ContextSize
236237
}
237238
}
238239
}

0 commit comments

Comments
 (0)