Skip to content

Commit f754e9f

Browse files
committed
tracing: use raw TCP instead of net/http to avoid crypto/tls bloat
1 parent 3a884b1 commit f754e9f

File tree

5 files changed

+73
-42
lines changed

5 files changed

+73
-42
lines changed

integration/tracing_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ func traceVMBoot(ctx context.Context, t *testing.T, vmm vm.Manager) {
9898
if err != nil {
9999
t.Logf("warning: failed to subscribe to VM trace stream: %v", err)
100100
} else {
101-
go tracing.ForwardTraces(ctx, trc, tracing.OTLPEndpoint(), hostBootTime)
101+
go tracing.ForwardTraces(ctx, trc, tracing.ParseOTLPEndpoint(), hostBootTime)
102102
}
103103

104104
// Span: TTRPC.System.Info

internal/shim/task/service.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -298,13 +298,13 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *
298298
// Start relaying VM traces to the host OTel collector.
299299
// Use WithoutCancel so the relay outlives the Create RPC context and
300300
// can forward spans from subsequent RPCs (Start, Exec, etc.).
301-
if endpoint := tracing.OTLPEndpoint(); endpoint != "" {
301+
if ep := tracing.ParseOTLPEndpoint(); ep != nil {
302302
relayCtx := context.WithoutCancel(ctx)
303303
trc, err := tracesAPI.NewTTRPCTracesClient(vmc).Stream(relayCtx, empty)
304304
if err != nil {
305305
log.G(ctx).WithError(err).Warn("failed to subscribe to VM trace stream")
306306
} else {
307-
go tracing.ForwardTraces(relayCtx, trc, endpoint, hostBootTime)
307+
go tracing.ForwardTraces(relayCtx, trc, ep, hostBootTime)
308308
}
309309
}
310310

internal/tracing/flush.go

Lines changed: 65 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
package tracing
22

33
import (
4-
"bytes"
54
"context"
65
"encoding/json"
76
"fmt"
8-
"net/http"
7+
"net"
98
"net/url"
109
"os"
1110
"strconv"
@@ -17,9 +16,9 @@ import (
1716

1817
// Flusher collects finished spans and periodically exports them via OTLP HTTP.
1918
type Flusher struct {
20-
endpoint string
19+
addr string // host:port
20+
path string // e.g. "/v1/traces"
2121
serviceName string
22-
client *http.Client
2322

2423
mu sync.Mutex
2524
pending []*Span
@@ -28,11 +27,11 @@ type Flusher struct {
2827

2928
// NewFlusher creates a flusher that exports to the given OTLP endpoint.
3029
// Call Shutdown to flush remaining spans and stop the background goroutine.
31-
func NewFlusher(ctx context.Context, endpoint, serviceName string, interval time.Duration) *Flusher {
30+
func NewFlusher(ctx context.Context, addr, path, serviceName string, interval time.Duration) *Flusher {
3231
f := &Flusher{
33-
endpoint: endpoint,
32+
addr: addr,
33+
path: path,
3434
serviceName: serviceName,
35-
client: &http.Client{},
3635
done: make(chan struct{}),
3736
}
3837
go f.loop(ctx, interval)
@@ -98,7 +97,7 @@ func (f *Flusher) flush(ctx context.Context) error {
9897
}},
9998
}
10099

101-
return postOTLP(ctx, f.client, f.endpoint, req)
100+
return postOTLP(f.addr, f.path, req)
102101
}
103102

104103
func spanToOTLPJSON(s *Span) otlpSpan {
@@ -114,54 +113,89 @@ func spanToOTLPJSON(s *Span) otlpSpan {
114113
}
115114
}
116115

117-
func postOTLP(ctx context.Context, client *http.Client, endpoint string, req otlpExportRequest) error {
118-
data, err := json.Marshal(req)
116+
// postOTLP sends an OTLP JSON request via a raw HTTP/1.1 POST over TCP.
117+
// This avoids importing net/http (and its transitive crypto/tls stack).
118+
func postOTLP(addr, path string, req otlpExportRequest) error {
119+
body, err := json.Marshal(req)
119120
if err != nil {
120121
return fmt.Errorf("marshal OTLP request: %w", err)
121122
}
122123

123-
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewReader(data))
124+
conn, err := net.DialTimeout("tcp", addr, 5*time.Second)
124125
if err != nil {
125-
return fmt.Errorf("create HTTP request: %w", err)
126+
return fmt.Errorf("connect to %s: %w", addr, err)
126127
}
127-
httpReq.Header.Set("Content-Type", "application/json")
128-
129-
resp, err := client.Do(httpReq)
130-
if err != nil {
131-
return fmt.Errorf("send OTLP request: %w", err)
128+
defer conn.Close()
129+
conn.SetDeadline(time.Now().Add(10 * time.Second))
130+
131+
// Write a minimal HTTP/1.1 request.
132+
var buf []byte
133+
buf = append(buf, "POST "...)
134+
buf = append(buf, path...)
135+
buf = append(buf, " HTTP/1.1\r\nHost: "...)
136+
buf = append(buf, addr...)
137+
buf = append(buf, "\r\nContent-Type: application/json\r\nContent-Length: "...)
138+
buf = strconv.AppendInt(buf, int64(len(body)), 10)
139+
buf = append(buf, "\r\nConnection: close\r\n\r\n"...)
140+
buf = append(buf, body...)
141+
142+
if _, err := conn.Write(buf); err != nil {
143+
return fmt.Errorf("write OTLP request: %w", err)
132144
}
133-
resp.Body.Close()
134145

135-
if resp.StatusCode >= 400 {
136-
return fmt.Errorf("OTLP export failed: %s", resp.Status)
146+
// Read just the status line to check for errors.
147+
var resp [128]byte
148+
n, _ := conn.Read(resp[:])
149+
if n < 12 {
150+
return fmt.Errorf("OTLP response too short")
151+
}
152+
// "HTTP/1.1 200" — status code starts at byte 9
153+
if resp[9] >= '4' {
154+
return fmt.Errorf("OTLP export failed: %s", string(resp[:n]))
137155
}
138156
return nil
139157
}
140158

141-
// OTLPEndpoint returns the OTLP traces endpoint URL derived from
142-
// OTEL_EXPORTER_OTLP_ENDPOINT, or "" if the env var is unset.
143-
func OTLPEndpoint() string {
159+
// OTLPEndpoint holds the parsed host:port and path for an OTLP endpoint.
160+
type OTLPEndpoint struct {
161+
addr string // host:port
162+
path string // e.g. "/v1/traces"
163+
}
164+
165+
// ParseOTLPEndpoint parses OTEL_EXPORTER_OTLP_ENDPOINT into addr and path.
166+
// Returns nil if the env var is unset.
167+
func ParseOTLPEndpoint() *OTLPEndpoint {
144168
endpoint := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
145169
if endpoint == "" {
146-
return ""
170+
return nil
171+
}
172+
u, err := url.Parse(endpoint)
173+
if err != nil || u.Host == "" {
174+
// Treat as host:port directly.
175+
return &OTLPEndpoint{addr: endpoint, path: "/v1/traces"}
147176
}
148-
if u, err := url.Parse(endpoint); err != nil || u.Scheme == "" {
149-
endpoint = "http://" + endpoint
177+
addr := u.Host
178+
if u.Port() == "" {
179+
if u.Scheme == "https" {
180+
addr += ":443"
181+
} else {
182+
addr += ":80"
183+
}
150184
}
151-
return endpoint + "/v1/traces"
185+
return &OTLPEndpoint{addr: addr, path: "/v1/traces"}
152186
}
153187

154188
// Init sets up the global span sink with an OTLP HTTP flusher.
155189
// Returns a shutdown function. If OTEL_EXPORTER_OTLP_ENDPOINT is unset,
156190
// tracing is disabled and the returned shutdown is a no-op.
157191
func Init(ctx context.Context, serviceName string) func(context.Context) error {
158-
endpoint := OTLPEndpoint()
159-
if endpoint == "" {
192+
ep := ParseOTLPEndpoint()
193+
if ep == nil {
160194
return func(context.Context) error { return nil }
161195
}
162-
f := NewFlusher(ctx, endpoint, serviceName, 100*time.Millisecond)
196+
f := NewFlusher(ctx, ep.addr, ep.path, serviceName, 100*time.Millisecond)
163197
SetSink(f)
164-
log.G(ctx).WithField("endpoint", endpoint).Debug("tracing enabled")
198+
log.G(ctx).WithField("endpoint", ep.addr).Debug("tracing enabled")
165199
return func(ctx context.Context) error {
166200
SetSink(nil)
167201
return f.Shutdown(ctx)

internal/tracing/relay.go

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package tracing
1919
import (
2020
"context"
2121
"encoding/hex"
22-
"net/http"
2322
"strconv"
2423
"time"
2524

@@ -31,9 +30,7 @@ import (
3130
// ForwardTraces reads spans from the VM trace stream and exports them
3231
// to the OTLP endpoint as JSON. hostBootTime is the host wall-clock time
3332
// captured when ttrpc became responsive, used to correct VM-vs-host clock skew.
34-
func ForwardTraces(ctx context.Context, stream tracespb.TTRPCTraces_StreamClient, endpoint string, hostBootTime time.Time) {
35-
client := &http.Client{}
36-
33+
func ForwardTraces(ctx context.Context, stream tracespb.TTRPCTraces_StreamClient, ep *OTLPEndpoint, hostBootTime time.Time) {
3734
// The VM's RTC has only second-level resolution, so its wall clock
3835
// can be up to ~1s behind the host. We compute the offset from the
3936
// first interceptor span (which is created at the moment the first
@@ -57,13 +54,13 @@ func ForwardTraces(ctx context.Context, stream tracespb.TTRPCTraces_StreamClient
5754
log.G(ctx).WithField("offset", clockOffset).Debug("VM clock offset computed")
5855
}
5956

60-
if err := exportVMSpan(ctx, client, endpoint, span, clockOffset); err != nil {
57+
if err := exportVMSpan(ep, span, clockOffset); err != nil {
6158
log.G(ctx).WithError(err).Warn("trace relay export")
6259
}
6360
}
6461
}
6562

66-
func exportVMSpan(ctx context.Context, client *http.Client, endpoint string, s *tracespb.Span, clockOffset time.Duration) error {
63+
func exportVMSpan(ep *OTLPEndpoint, s *tracespb.Span, clockOffset time.Duration) error {
6764
startNano := time.Unix(0, s.StartTimeUnixNano).Add(clockOffset).UnixNano()
6865
endNano := time.Unix(0, s.EndTimeUnixNano).Add(clockOffset).UnixNano()
6966

@@ -102,5 +99,5 @@ func exportVMSpan(ctx context.Context, client *http.Client, endpoint string, s *
10299
}},
103100
}
104101

105-
return postOTLP(ctx, client, endpoint, req)
102+
return postOTLP(ep.addr, ep.path, req)
106103
}

plugins/shim/tracing/plugin.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ func init() {
2323
ctx := ic.Context
2424

2525
shutdownTracing := tracing.Init(ctx, "nerdbox")
26-
if tracing.OTLPEndpoint() == "" {
26+
if tracing.ParseOTLPEndpoint() == nil {
2727
log.G(ctx).Debug("OTEL_EXPORTER_OTLP_ENDPOINT not set, shim tracing disabled")
2828
}
2929

0 commit comments

Comments
 (0)