Skip to content

Commit 61bf34e

Browse files
localai-botmudler
andauthored
fix(traces): cap captured body size to keep admin Traces UI responsive (#9946)
The trace middleware buffered the full request and response bodies for every JSON exchange. With a chatty agent-pool RAG workload, /embeddings responses (large vector arrays) accumulated to tens of MB in the in-memory buffer; the admin Traces page would then download and parse 40+ MB on every load and on every 5s auto-refresh, locking the UI in a loading state. Add LOCALAI_TRACING_MAX_BODY_BYTES (default 64 KiB) that caps each captured body. The full payload still flows through to the real client; only the trace copy is bounded. Exchanges record body_truncated and original body_bytes so the dashboard can show that truncation happened. The cap is configurable via env, CLI, and runtime_settings.json. Also unblock recovery: the Traces page now keeps the Clear button enabled while loading, since "buffer too large to render" is exactly when the user needs to clear it. Assisted-by: Claude:claude-opus-4-7 Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
1 parent 0b2ae3c commit 61bf34e

7 files changed

Lines changed: 212 additions & 19 deletions

File tree

core/application/startup.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,13 @@ func loadRuntimeSettingsFromFile(options *config.ApplicationConfig) {
552552
options.TracingMaxItems = *settings.TracingMaxItems
553553
}
554554
}
555+
if settings.TracingMaxBodyBytes != nil {
556+
// Allow the on-disk setting to override the CLI/env default. The
557+
// startup default is non-zero (see NewApplicationConfig), so a plain
558+
// `== 0` guard like the others would never trigger; we instead respect
559+
// any value the file specifies. 0 in the file means "uncapped".
560+
options.TracingMaxBodyBytes = *settings.TracingMaxBodyBytes
561+
}
555562

556563
// Branding / whitelabeling. There are no env vars for these — the file is
557564
// the only source — so apply unconditionally. Without this block a server

core/cli/run.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ type RunCMD struct {
100100
LoadToMemory []string `env:"LOCALAI_LOAD_TO_MEMORY,LOAD_TO_MEMORY" help:"A list of models to load into memory at startup" group:"models"`
101101
EnableTracing bool `env:"LOCALAI_ENABLE_TRACING,ENABLE_TRACING" help:"Enable API tracing" group:"api"`
102102
TracingMaxItems int `env:"LOCALAI_TRACING_MAX_ITEMS" default:"1024" help:"Maximum number of traces to keep" group:"api"`
103+
TracingMaxBodyBytes int `env:"LOCALAI_TRACING_MAX_BODY_BYTES" default:"65536" help:"Maximum bytes captured per request/response body in the trace buffer (0 = uncapped). Caps memory growth from chatty endpoints like /embeddings." group:"api"`
103104
AgentJobRetentionDays int `env:"LOCALAI_AGENT_JOB_RETENTION_DAYS,AGENT_JOB_RETENTION_DAYS" default:"30" help:"Number of days to keep agent job history (default: 30)" group:"api"`
104105
OpenResponsesStoreTTL string `env:"LOCALAI_OPEN_RESPONSES_STORE_TTL,OPEN_RESPONSES_STORE_TTL" default:"0" help:"TTL for Open Responses store (e.g., 1h, 30m, 0 = no expiration)" group:"api"`
105106

@@ -273,6 +274,7 @@ func (r *RunCMD) Run(ctx *cliContext.Context) error {
273274
opts = append(opts, config.EnableTracing)
274275
}
275276
opts = append(opts, config.WithTracingMaxItems(r.TracingMaxItems))
277+
opts = append(opts, config.WithTracingMaxBodyBytes(r.TracingMaxBodyBytes))
276278

277279
token := ""
278280
if r.Peer2Peer || r.Peer2PeerToken != "" {

core/config/application_config.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ type ApplicationConfig struct {
2121
Debug bool
2222
EnableTracing bool
2323
TracingMaxItems int
24+
TracingMaxBodyBytes int // Per-body cap for captured request/response bodies; 0 disables the cap
2425
EnableBackendLogging bool
2526
GeneratedContentDir string
2627

@@ -187,6 +188,7 @@ func NewApplicationConfig(o ...AppOption) *ApplicationConfig {
187188
LRUEvictionRetryInterval: 1 * time.Second, // Default: 1 second
188189
WatchDogInterval: 500 * time.Millisecond, // Default: 500ms
189190
TracingMaxItems: 1024,
191+
TracingMaxBodyBytes: 64 * 1024, // 64 KiB - caps each request/response body in the trace buffer
190192
AgentPool: AgentPoolConfig{
191193
Enabled: true,
192194
Timeout: "5m",
@@ -578,6 +580,12 @@ func WithTracingMaxItems(items int) AppOption {
578580
}
579581
}
580582

583+
func WithTracingMaxBodyBytes(bytes int) AppOption {
584+
return func(o *ApplicationConfig) {
585+
o.TracingMaxBodyBytes = bytes
586+
}
587+
}
588+
581589
func WithGeneratedContentDir(generatedContentDir string) AppOption {
582590
return func(o *ApplicationConfig) {
583591
o.GeneratedContentDir = generatedContentDir
@@ -920,6 +928,7 @@ func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings {
920928
f16 := o.F16
921929
debug := o.Debug
922930
tracingMaxItems := o.TracingMaxItems
931+
tracingMaxBodyBytes := o.TracingMaxBodyBytes
923932
enableTracing := o.EnableTracing
924933
enableBackendLogging := o.EnableBackendLogging
925934
cors := o.CORS
@@ -1008,6 +1017,7 @@ func (o *ApplicationConfig) ToRuntimeSettings() RuntimeSettings {
10081017
F16: &f16,
10091018
Debug: &debug,
10101019
TracingMaxItems: &tracingMaxItems,
1020+
TracingMaxBodyBytes: &tracingMaxBodyBytes,
10111021
EnableTracing: &enableTracing,
10121022
EnableBackendLogging: &enableBackendLogging,
10131023
CORS: &cors,
@@ -1146,6 +1156,9 @@ func (o *ApplicationConfig) ApplyRuntimeSettings(settings *RuntimeSettings) (req
11461156
if settings.TracingMaxItems != nil {
11471157
o.TracingMaxItems = *settings.TracingMaxItems
11481158
}
1159+
if settings.TracingMaxBodyBytes != nil {
1160+
o.TracingMaxBodyBytes = *settings.TracingMaxBodyBytes
1161+
}
11491162
if settings.EnableBackendLogging != nil {
11501163
o.EnableBackendLogging = *settings.EnableBackendLogging
11511164
}

core/config/runtime_settings.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ type RuntimeSettings struct {
3838
Debug *bool `json:"debug,omitempty"`
3939
EnableTracing *bool `json:"enable_tracing,omitempty"`
4040
TracingMaxItems *int `json:"tracing_max_items,omitempty"`
41+
TracingMaxBodyBytes *int `json:"tracing_max_body_bytes,omitempty"` // Per-body cap in bytes; 0 disables the cap
4142
EnableBackendLogging *bool `json:"enable_backend_logging,omitempty"`
4243

4344
// Security/CORS settings

core/http/middleware/trace.go

Lines changed: 64 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,20 @@ import (
1717
)
1818

1919
type APIExchangeRequest struct {
20-
Method string `json:"method"`
21-
Path string `json:"path"`
22-
Headers *http.Header `json:"headers"`
23-
Body *[]byte `json:"body"`
20+
Method string `json:"method"`
21+
Path string `json:"path"`
22+
Headers *http.Header `json:"headers"`
23+
Body *[]byte `json:"body"`
24+
BodyTruncated bool `json:"body_truncated,omitempty"`
25+
BodyBytes int `json:"body_bytes,omitempty"` // original size before truncation
2426
}
2527

2628
type APIExchangeResponse struct {
27-
Status int `json:"status"`
28-
Headers *http.Header `json:"headers"`
29-
Body *[]byte `json:"body"`
29+
Status int `json:"status"`
30+
Headers *http.Header `json:"headers"`
31+
Body *[]byte `json:"body"`
32+
BodyTruncated bool `json:"body_truncated,omitempty"`
33+
BodyBytes int `json:"body_bytes,omitempty"` // original size before truncation
3034
}
3135

3236
type APIExchange struct {
@@ -66,11 +70,29 @@ var doInitializeTracing = sync.OnceFunc(func() {
6670

6771
type bodyWriter struct {
6872
http.ResponseWriter
69-
body *bytes.Buffer
73+
body *bytes.Buffer
74+
maxBytes int // 0 = unlimited capture
75+
truncated bool
76+
totalBytes int // bytes the upstream handler wrote, even past the cap
7077
}
7178

7279
func (w *bodyWriter) Write(b []byte) (int, error) {
73-
w.body.Write(b)
80+
// Capture into the trace buffer up to maxBytes, then drop the overflow
81+
// so a chatty endpoint can't grow the buffer without bound. The full
82+
// payload still flows through to the real client below.
83+
w.totalBytes += len(b)
84+
if w.maxBytes <= 0 {
85+
w.body.Write(b)
86+
} else if remain := w.maxBytes - w.body.Len(); remain > 0 {
87+
if remain >= len(b) {
88+
w.body.Write(b)
89+
} else {
90+
w.body.Write(b[:remain])
91+
w.truncated = true
92+
}
93+
} else {
94+
w.truncated = true
95+
}
7496
return w.ResponseWriter.Write(b)
7597
}
7698

@@ -80,6 +102,20 @@ func (w *bodyWriter) Flush() {
80102
}
81103
}
82104

105+
// truncateForTrace returns a defensive copy of body capped at maxBytes,
106+
// and a flag indicating whether the cap forced truncation. maxBytes <= 0
107+
// disables the cap.
108+
func truncateForTrace(body []byte, maxBytes int) ([]byte, bool) {
109+
if maxBytes <= 0 || len(body) <= maxBytes {
110+
out := make([]byte, len(body))
111+
copy(out, body)
112+
return out, false
113+
}
114+
out := make([]byte, maxBytes)
115+
copy(out, body[:maxBytes])
116+
return out, true
117+
}
118+
83119
func initializeTracing(maxItems int) {
84120
tracingMaxItems = maxItems
85121
doInitializeTracing()
@@ -134,11 +170,18 @@ func TraceMiddleware(app *application.Application) echo.MiddlewareFunc {
134170

135171
startTime := time.Now()
136172

173+
// Cap captured payload size. Without this, /embeddings and
174+
// streaming /chat/completions blow the in-memory buffer into the
175+
// tens of MB, which then locks the admin Traces UI fetching the
176+
// JSON dump faster than the 5s auto-refresh.
177+
maxBodyBytes := app.ApplicationConfig().TracingMaxBodyBytes
178+
137179
// Wrap response writer to capture body
138180
resBody := new(bytes.Buffer)
139181
mw := &bodyWriter{
140182
ResponseWriter: c.Response().Writer,
141183
body: resBody,
184+
maxBytes: maxBodyBytes,
142185
}
143186
c.Response().Writer = mw
144187

@@ -159,24 +202,27 @@ func TraceMiddleware(app *application.Application) echo.MiddlewareFunc {
159202
// via any heap-dump-style introspection, and tokens shouldn't
160203
// outlive the request that carried them.
161204
requestHeaders := redactSensitiveHeaders(c.Request().Header)
162-
requestBody := make([]byte, len(body))
163-
copy(requestBody, body)
205+
requestBody, requestTruncated := truncateForTrace(body, maxBodyBytes)
164206
responseHeaders := redactSensitiveHeaders(c.Response().Header())
165207
responseBody := make([]byte, resBody.Len())
166208
copy(responseBody, resBody.Bytes())
167209
exchange := APIExchange{
168210
Timestamp: startTime,
169211
Duration: time.Since(startTime),
170212
Request: APIExchangeRequest{
171-
Method: c.Request().Method,
172-
Path: c.Path(),
173-
Headers: &requestHeaders,
174-
Body: &requestBody,
213+
Method: c.Request().Method,
214+
Path: c.Path(),
215+
Headers: &requestHeaders,
216+
Body: &requestBody,
217+
BodyTruncated: requestTruncated,
218+
BodyBytes: len(body),
175219
},
176220
Response: APIExchangeResponse{
177-
Status: status,
178-
Headers: &responseHeaders,
179-
Body: &responseBody,
221+
Status: status,
222+
Headers: &responseHeaders,
223+
Body: &responseBody,
224+
BodyTruncated: mw.truncated,
225+
BodyBytes: mw.totalBytes,
180226
},
181227
}
182228
if handlerErr != nil {
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
package middleware
2+
3+
import (
4+
"bytes"
5+
"net/http/httptest"
6+
"strings"
7+
8+
. "github.com/onsi/ginkgo/v2"
9+
. "github.com/onsi/gomega"
10+
)
11+
12+
// The trace middleware copies request and response bodies into an in-memory
13+
// buffer that backs the admin /api/traces endpoint. With no upper bound a
14+
// chatty workload (embeddings, large completions) trivially produces a
15+
// multi-MB response that locks the Traces UI in a loading state — fetching
16+
// and parsing the payload outruns the 5-second auto-refresh. These specs
17+
// pin the capping contract so future refactors keep both the cap and the
18+
// passthrough to the real client intact.
19+
20+
var _ = Describe("bodyWriter capping", func() {
21+
It("captures the full body when maxBytes is 0 (unlimited)", func() {
22+
downstream := httptest.NewRecorder()
23+
buf := &bytes.Buffer{}
24+
bw := &bodyWriter{ResponseWriter: downstream, body: buf, maxBytes: 0}
25+
26+
payload := []byte(strings.Repeat("x", 4096))
27+
n, err := bw.Write(payload)
28+
29+
Expect(err).ToNot(HaveOccurred())
30+
Expect(n).To(Equal(len(payload)))
31+
Expect(buf.Len()).To(Equal(len(payload)))
32+
Expect(downstream.Body.Len()).To(Equal(len(payload)))
33+
Expect(bw.truncated).To(BeFalse())
34+
})
35+
36+
It("stops appending to the trace buffer once maxBytes is reached but still forwards to the client", func() {
37+
downstream := httptest.NewRecorder()
38+
buf := &bytes.Buffer{}
39+
bw := &bodyWriter{ResponseWriter: downstream, body: buf, maxBytes: 100}
40+
41+
payload := []byte(strings.Repeat("a", 250))
42+
n, err := bw.Write(payload)
43+
44+
Expect(err).ToNot(HaveOccurred())
45+
Expect(n).To(Equal(len(payload)), "Write must return the full byte count so callers see no short write")
46+
Expect(buf.Len()).To(Equal(100), "trace buffer should hold exactly maxBytes")
47+
Expect(downstream.Body.Len()).To(Equal(len(payload)), "client must still receive every byte")
48+
Expect(bw.truncated).To(BeTrue())
49+
})
50+
51+
It("handles a write that straddles the cap by keeping only the leading slice", func() {
52+
downstream := httptest.NewRecorder()
53+
buf := &bytes.Buffer{}
54+
bw := &bodyWriter{ResponseWriter: downstream, body: buf, maxBytes: 10}
55+
56+
_, err := bw.Write([]byte("12345"))
57+
Expect(err).ToNot(HaveOccurred())
58+
Expect(bw.truncated).To(BeFalse())
59+
60+
_, err = bw.Write([]byte("67890ABCDE"))
61+
Expect(err).ToNot(HaveOccurred())
62+
63+
Expect(buf.String()).To(Equal("1234567890"))
64+
Expect(downstream.Body.String()).To(Equal("1234567890ABCDE"))
65+
Expect(bw.truncated).To(BeTrue())
66+
})
67+
68+
It("ignores further writes after the cap was already hit", func() {
69+
downstream := httptest.NewRecorder()
70+
buf := &bytes.Buffer{}
71+
bw := &bodyWriter{ResponseWriter: downstream, body: buf, maxBytes: 4}
72+
73+
_, _ = bw.Write([]byte("AAAA"))
74+
_, _ = bw.Write([]byte("BBBB"))
75+
_, _ = bw.Write([]byte("CCCC"))
76+
77+
Expect(buf.String()).To(Equal("AAAA"))
78+
Expect(downstream.Body.String()).To(Equal("AAAABBBBCCCC"))
79+
Expect(bw.truncated).To(BeTrue())
80+
})
81+
})
82+
83+
var _ = Describe("truncateForTrace", func() {
84+
It("returns the input unchanged when below the cap", func() {
85+
in := []byte("hello")
86+
out, truncated := truncateForTrace(in, 1024)
87+
Expect(truncated).To(BeFalse())
88+
Expect(out).To(Equal(in))
89+
})
90+
91+
It("truncates when the input exceeds the cap and signals truncation", func() {
92+
in := []byte(strings.Repeat("z", 200))
93+
out, truncated := truncateForTrace(in, 64)
94+
Expect(truncated).To(BeTrue())
95+
Expect(out).To(HaveLen(64))
96+
Expect(string(out)).To(Equal(strings.Repeat("z", 64)))
97+
})
98+
99+
It("treats maxBytes <= 0 as unlimited (back-compat with current default)", func() {
100+
in := []byte(strings.Repeat("q", 10_000))
101+
out, truncated := truncateForTrace(in, 0)
102+
Expect(truncated).To(BeFalse())
103+
Expect(out).To(HaveLen(len(in)))
104+
})
105+
106+
It("does not retain the caller's backing array (defensive copy)", func() {
107+
in := []byte("abcdefghij")
108+
out, truncated := truncateForTrace(in, 4)
109+
Expect(truncated).To(BeTrue())
110+
Expect(string(out)).To(Equal("abcd"))
111+
112+
// Mutating the source must not corrupt the trace copy.
113+
in[0] = 'Z'
114+
Expect(string(out)).To(Equal("abcd"))
115+
})
116+
})

core/http/react-ui/src/pages/Traces.jsx

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,15 @@ export default function Traces() {
406406
<button className="btn btn-secondary btn-sm" onClick={fetchTraces}><i className="fas fa-rotate" /> Refresh</button>
407407
<button className="btn btn-secondary btn-sm" onClick={handleExport} disabled={traces.length === 0}><i className="fas fa-download" /> Export</button>
408408
<div style={{ flex: 1 }} />
409-
<button className="btn btn-danger btn-sm" onClick={handleClear} disabled={traces.length === 0}><i className="fas fa-trash" /> Clear</button>
409+
<button
410+
className="btn btn-danger btn-sm"
411+
onClick={handleClear}
412+
/* Stay enabled while loading: a massive in-memory trace buffer is
413+
precisely the case where the user can't see the table yet and
414+
needs Clear to recover. Clearing an already-empty server-side
415+
buffer is a harmless no-op. */
416+
disabled={!loading && traces.length === 0}
417+
><i className="fas fa-trash" /> Clear</button>
410418
</div>
411419

412420
{settings && (() => {

0 commit comments

Comments
 (0)