Skip to content

Commit 4c223f4

Browse files
authored
Merge pull request #772 from docker/fix-issue
include verbose output alongside errors
2 parents c6c66e2 + e7a3022 commit 4c223f4

File tree

2 files changed

+66
-14
lines changed

2 files changed

+66
-14
lines changed

pkg/inference/backends/llamacpp/errors.go

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
package llamacpp
22

3-
import "regexp"
3+
import (
4+
"fmt"
5+
"regexp"
6+
"strings"
7+
)
8+
9+
// maxVerboseOutputLength is the maximum length of verbose output included in user-facing errors.
10+
// This prevents overwhelming users with excessive logs while keeping relevant context.
11+
const maxVerboseOutputLength = 4096
412

513
// llamaCppErrorPatterns contains regex patterns to extract meaningful error messages
614
// from llama.cpp stderr output. The patterns are tried in order, and the first match wins.
@@ -19,13 +27,30 @@ var llamaCppErrorPatterns = []struct {
1927
{regexp.MustCompile(`exiting due to model loading error`), "failed to load model"},
2028
}
2129

30+
// sanitizeVerboseOutput sanitizes llama.cpp output for user-facing error messages.
31+
// It truncates excessively long output and removes potentially sensitive information
32+
// like absolute file paths while preserving the core error message.
33+
func sanitizeVerboseOutput(output string) string {
34+
trimmed := strings.TrimSpace(output)
35+
36+
// Truncate if too long to avoid overwhelming users with verbose logs
37+
if len(trimmed) > maxVerboseOutputLength {
38+
trimmed = trimmed[:maxVerboseOutputLength] + "\n...[truncated]"
39+
}
40+
41+
return trimmed
42+
}
43+
2244
// ExtractLlamaCppError attempts to extract a meaningful error message from llama.cpp output.
23-
// It looks for common error patterns and returns a cleaner, more user-friendly message.
45+
// It looks for common error patterns and returns a cleaner, more user-friendly message
46+
// alongside the original verbose output for easier debugging.
47+
// The verbose output is sanitized to prevent leaking sensitive paths and truncated
48+
// if it exceeds a reasonable length.
2449
// If no recognizable pattern is found, it returns the full output.
2550
func ExtractLlamaCppError(output string) string {
2651
for _, entry := range llamaCppErrorPatterns {
2752
if entry.pattern.MatchString(output) {
28-
return entry.message
53+
return fmt.Sprintf("%s\n\nVerbose output:\n%s", entry.message, sanitizeVerboseOutput(output))
2954
}
3055
}
3156
return output

pkg/inference/backends/llamacpp/errors_test.go

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,64 @@
11
package llamacpp
22

33
import (
4+
"strings"
45
"testing"
56
)
67

78
func TestExtractLlamaCppError(t *testing.T) {
89
tests := []struct {
9-
name string
10-
input string
11-
expected string
10+
name string
11+
input string
12+
expected string
13+
expectedPrefix string
14+
expectTruncated bool
1215
}{
1316
{
14-
name: "Metal buffer allocation failure",
15-
input: "ggml_metal_buffer_init: error: failed to allocate buffer, size = 2048.00 MiB",
16-
expected: "not enough GPU memory to load the model (Metal)",
17+
name: "Metal buffer allocation failure",
18+
input: "ggml_metal_buffer_init: error: failed to allocate buffer, size = 2048.00 MiB",
19+
expected: "not enough GPU memory to load the model (Metal)\n\nVerbose output:\n" +
20+
"ggml_metal_buffer_init: error: failed to allocate buffer, size = 2048.00 MiB",
1721
},
1822
{
19-
name: "cudaMalloc OOM",
20-
input: "ggml_backend_cuda_buffer_type_alloc_buffer: allocating 12.50 MiB on device 1: cudaMalloc failed: out of memory",
21-
expected: "not enough GPU memory to load the model (CUDA)",
23+
name: "cudaMalloc OOM",
24+
input: "ggml_backend_cuda_buffer_type_alloc_buffer: allocating 12.50 MiB on device 1: cudaMalloc failed: out of memory",
25+
expected: "not enough GPU memory to load the model (CUDA)\n\nVerbose output:\n" +
26+
"ggml_backend_cuda_buffer_type_alloc_buffer: allocating 12.50 MiB on device 1: cudaMalloc failed: out of memory",
2227
},
2328
{
2429
name: "loading error",
2530
input: `common_init_from_params: failed to load model '/models/model.gguf'
2631
main: exiting due to model loading error`,
27-
expected: "failed to load model",
32+
expected: "failed to load model\n\nVerbose output:\n" +
33+
"common_init_from_params: failed to load model '/models/model.gguf'\n" +
34+
"main: exiting due to model loading error",
35+
},
36+
{
37+
name: "input with leading/trailing whitespace",
38+
input: "\n\n ggml_metal_buffer_init: error: failed to allocate buffer, size = 2048.00 MiB \n\n",
39+
expected: "not enough GPU memory to load the model (Metal)\n\nVerbose output:\n" +
40+
"ggml_metal_buffer_init: error: failed to allocate buffer, size = 2048.00 MiB",
41+
},
42+
{
43+
name: "truncation of large output",
44+
input: "ggml_metal_buffer_init: error: failed to allocate buffer, size = 2048.00 MiB\n" + strings.Repeat("verbose log line\n", 500),
45+
expectedPrefix: "not enough GPU memory to load the model (Metal)\n\nVerbose output:\n" +
46+
"ggml_metal_buffer_init: error: failed to allocate buffer, size = 2048.00 MiB\n",
47+
expectTruncated: true,
2848
},
2949
}
3050

3151
for _, tt := range tests {
3252
t.Run(tt.name, func(t *testing.T) {
3353
result := ExtractLlamaCppError(tt.input)
34-
if result != tt.expected {
54+
if tt.expectTruncated {
55+
if !strings.HasPrefix(result, tt.expectedPrefix) {
56+
t.Errorf("ExtractLlamaCppError() = %q, want prefix %q", result, tt.expectedPrefix)
57+
}
58+
if !strings.HasSuffix(result, "...[truncated]") {
59+
t.Errorf("ExtractLlamaCppError() = %q, want suffix ...[truncated]", result)
60+
}
61+
} else if result != tt.expected {
3562
t.Errorf("ExtractLlamaCppError() = %q, want %q", result, tt.expected)
3663
}
3764
})

0 commit comments

Comments
 (0)