Skip to content

Commit 9039311

Browse files
authored
Merge pull request #729 from doringeman/llamacpp-error-transformer
feat: add ErrorTransformer for llama.cpp
2 parents 59280ed + 2445139 commit 9039311

3 files changed

Lines changed: 80 additions & 8 deletions

File tree

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package llamacpp
2+
3+
import "regexp"
4+
5+
// llamaCppErrorPatterns contains regex patterns to extract meaningful error messages
6+
// from llama.cpp stderr output. The patterns are tried in order, and the first match wins.
7+
var llamaCppErrorPatterns = []struct {
8+
pattern *regexp.Regexp
9+
message string
10+
}{
11+
// Metal buffer allocation failure
12+
// https://github.com/ggml-org/llama.cpp/blob/ecd99d6a9acbc436bad085783bcd5d0b9ae9e9e9/ggml/src/ggml-metal/ggml-metal-device.m#L1498
13+
{regexp.MustCompile(`failed to allocate buffer, size = .*MiB`), "not enough GPU memory to load the model (Metal)"},
14+
// CUDA out of memory
15+
// https://github.com/ggml-org/llama.cpp/blob/ecd99d6a9acbc436bad085783bcd5d0b9ae9e9e9/ggml/src/ggml-cuda/ggml-cuda.cu#L710
16+
{regexp.MustCompile(`cudaMalloc failed: out of memory`), "not enough GPU memory to load the model (CUDA)"},
17+
// Generic model loading failure
18+
// https://github.com/ggml-org/llama.cpp/blob/ecd99d6a9acbc436bad085783bcd5d0b9ae9e9e9/tools/server/server.cpp#L254
19+
{regexp.MustCompile(`exiting due to model loading error`), "failed to load model"},
20+
}
21+
22+
// ExtractLlamaCppError attempts to extract a meaningful error message from llama.cpp output.
23+
// It looks for common error patterns and returns a cleaner, more user-friendly message.
24+
// If no recognizable pattern is found, it returns the full output.
25+
func ExtractLlamaCppError(output string) string {
26+
for _, entry := range llamaCppErrorPatterns {
27+
if entry.pattern.MatchString(output) {
28+
return entry.message
29+
}
30+
}
31+
return output
32+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
package llamacpp
2+
3+
import (
4+
"testing"
5+
)
6+
7+
func TestExtractLlamaCppError(t *testing.T) {
8+
tests := []struct {
9+
name string
10+
input string
11+
expected string
12+
}{
13+
{
14+
name: "Metal buffer allocation failure",
15+
input: "ggml_metal_buffer_init: error: failed to allocate buffer, size = 2048.00 MiB",
16+
expected: "not enough GPU memory to load the model (Metal)",
17+
},
18+
{
19+
name: "cudaMalloc OOM",
20+
input: "ggml_backend_cuda_buffer_type_alloc_buffer: allocating 12.50 MiB on device 1: cudaMalloc failed: out of memory",
21+
expected: "not enough GPU memory to load the model (CUDA)",
22+
},
23+
{
24+
name: "loading error",
25+
input: `common_init_from_params: failed to load model '/models/model.gguf'
26+
main: exiting due to model loading error`,
27+
expected: "failed to load model",
28+
},
29+
}
30+
31+
for _, tt := range tests {
32+
t.Run(tt.name, func(t *testing.T) {
33+
result := ExtractLlamaCppError(tt.input)
34+
if result != tt.expected {
35+
t.Errorf("ExtractLlamaCppError() = %q, want %q", result, tt.expected)
36+
}
37+
})
38+
}
39+
}

pkg/inference/backends/llamacpp/llamacpp.go

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -171,14 +171,15 @@ func (l *llamaCpp) Run(ctx context.Context, socket, model string, _ string, mode
171171
}
172172

173173
return backends.RunBackend(ctx, backends.RunnerConfig{
174-
BackendName: "llama.cpp",
175-
Socket: socket,
176-
BinaryPath: filepath.Join(binPath, "com.docker.llama-server"),
177-
SandboxPath: binPath,
178-
SandboxConfig: sandbox.ConfigurationLlamaCpp,
179-
Args: args,
180-
Logger: l.log,
181-
ServerLogWriter: logging.NewWriter(l.serverLog),
174+
BackendName: "llama.cpp",
175+
Socket: socket,
176+
BinaryPath: filepath.Join(binPath, "com.docker.llama-server"),
177+
SandboxPath: binPath,
178+
SandboxConfig: sandbox.ConfigurationLlamaCpp,
179+
Args: args,
180+
Logger: l.log,
181+
ServerLogWriter: logging.NewWriter(l.serverLog),
182+
ErrorTransformer: ExtractLlamaCppError,
182183
})
183184
}
184185

0 commit comments

Comments
 (0)