parca-dev
diff --git a/‎flags/flags.go‎
Lines changed: 2 additions & 0 deletions b/‎flags/flags.go‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎main.go‎
Lines changed: 9 additions & 0 deletions b/‎main.go‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎reporter/log_streamer.go‎
Lines changed: 42 additions & 197 deletions b/‎reporter/log_streamer.go‎
Lines changed: 42 additions & 197 deletions
@@ -162,6 +162,8 @@ type Flags struct {
 	EnableOOMProfAllocs bool `default:"false" help:"Enable OOMProf alloc counts."`
 
 	MergeGpuProfiles bool `default:"false" help:"Report GPU kernel timing and GPU PC sampling under a single gpu_time/nanoseconds sample_type, differentiated by a gpu_view label (pc_sample|kernel_time). When false (the default), they are reported as separate sample_types (gpu_kernel_time/nanoseconds and gpu_pcsample/count) with no per-sample labels."`
+
+	OTLPLogging bool `default:"false" help:"Forward parca-agent's own logrus output to the remote-store as OTLP log records (in addition to local stderr). Requires a remote-store; ignored in offline mode."`
 }
 
 type ExitCode int
 
@@ -434,6 +434,15 @@ func mainWithExitCode() flags.ExitCode {
 	}
 	parcaReporter.Start(mainCtx)
 
+	if f.OTLPLogging {
+		if grpcConn == nil {
+			log.Warn("--otlp-logging is set but no remote-store is configured; agent logs will only go to stderr")
+		} else {
+			log.AddHook(reporter.NewOTLPLogrusHook(parcaReporter.Logger("parca-agent.agent")))
+			log.Info("forwarding parca-agent logs to remote-store via OTLP")
+		}
+	}
+
 	includeEnvVars := libpf.Set[string]{}
 	if len(f.IncludeEnvVar) > 0 {
 		for _, env := range f.IncludeEnvVar {
 
@@ -16,218 +16,63 @@ package reporter
 import (
 	"context"
 	"fmt"
-	"sync/atomic"
 	"time"
 
-	log "github.com/sirupsen/logrus"
-	"go.opentelemetry.io/collector/pdata/pcommon"
-	"go.opentelemetry.io/collector/pdata/plog"
-	"go.opentelemetry.io/collector/pdata/plog/plogotlp"
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc"
+	sdklog "go.opentelemetry.io/otel/sdk/log"
+	"go.opentelemetry.io/otel/sdk/resource"
 	"google.golang.org/grpc"
 )
 
-// LogEvent is the in-process representation of a single OTLP log record
-// produced by any source that uses the ParcaReporter log-event API. The
-// streamer batches a slice of these and ships them as one OTLP/gRPC
-// ExportLogsServiceRequest.
-type LogEvent struct {
-	TimestampNs         int64  // wall-clock ns (unix epoch) of the event itself
-	ObservedTimestampNs int64  // wall-clock ns at the moment the producer enqueued the event
-	Body                string // LogRecord.Body (set as a string body)
-	Attributes          map[string]LogAttr
-}
-
-// LogAttr is a tagged union covering the OTLP attribute value types we use.
-// Producers populate one of Str / Int and leave the other zero. The streamer
-// picks the right setter based on which is set.
-type LogAttr struct {
-	Str   string
-	Int   int64
-	IsInt bool
-}
-
-const (
-	logStreamerBatchSize    = 512
-	logStreamerBatchAge     = 250 * time.Millisecond
-	logStreamerQueueSize    = 4096
-	logStreamerErrorBackoff = 5 * time.Second
-	logStreamerScopeName    = "parca-agent"
-)
-
-// logStreamerOptions is the resource-attribute payload attached to every batch.
-type logStreamerOptions struct {
+// logProviderOptions is the resource-attribute payload attached to every batch
+// shipped by the OTLP log exporter.
+type logProviderOptions struct {
 	ServiceName    string // service.name = "parca-agent"
 	ServiceVersion string // service.version = build VCS revision
 	HostName       string // host.name = agent --node
 }
 
-// logStreamer batches LogEvents and ships them as OTLP/gRPC
-// ExportLogsServiceRequest messages via plogotlp.GRPCClient. Owned by
-// arrowReporter; constructed once per New() and run in the Start() goroutine
-// when grpcConn is non-nil.
-type logStreamer struct {
-	conn   *grpc.ClientConn
-	client plogotlp.GRPCClient
-	opts   logStreamerOptions
-
-	in chan LogEvent
-
-	// Counters surfaced via prometheus from arrowReporter; the streamer itself
-	// only owns the atomics. arrowReporter wires them into a registry.
-	batchesSent atomic.Uint64
-	eventsSent  atomic.Uint64
-	exportErrs  atomic.Uint64
-	queueDrops  atomic.Uint64
-	rejected    atomic.Uint64
-}
-
-func newLogStreamer(conn *grpc.ClientConn, opts logStreamerOptions) *logStreamer {
-	return &logStreamer{
-		conn:   conn,
-		client: plogotlp.NewGRPCClient(conn),
-		opts:   opts,
-		in:     make(chan LogEvent, logStreamerQueueSize),
-	}
-}
-
-// enqueue tries to publish a single event. Returns false if the queue is full;
-// the caller (ReportLogEvents) increments queueDrops and moves on.
-func (s *logStreamer) enqueue(ev LogEvent) bool {
-	select {
-	case s.in <- ev:
-		return true
-	default:
-		s.queueDrops.Add(1)
-		return false
-	}
-}
-
-// run batches LogEvents and ships them as OTLP ExportLogsServiceRequest
-// messages. Each batch is one unary RPC; transient backend errors trigger a
-// brief sleep to avoid hot-looping on persistent failures. Returns when ctx is
-// cancelled.
-func (s *logStreamer) run(ctx context.Context) {
-	batch := make([]LogEvent, 0, logStreamerBatchSize)
-	flushTimer := time.NewTimer(logStreamerBatchAge)
-	defer flushTimer.Stop()
-	stopLogFlushTimer(flushTimer)
-
-	flush := func() {
-		if len(batch) == 0 {
-			return
-		}
-		if err := s.export(ctx, batch); err != nil {
-			if ctx.Err() != nil {
-				return
-			}
-			s.exportErrs.Add(1)
-			log.Warnf("log streamer: export errored (dropping %d events, backing off %s): %v",
-				len(batch), logStreamerErrorBackoff, err)
-			// Backoff to avoid spinning on a persistently-broken endpoint.
-			// Events accumulating during the sleep are queued in s.in and may
-			// also be dropped by enqueue's non-blocking send (queueDrops).
-			select {
-			case <-ctx.Done():
-			case <-time.After(logStreamerErrorBackoff):
-			}
-		} else {
-			s.batchesSent.Add(1)
-			s.eventsSent.Add(uint64(len(batch)))
-		}
-		batch = batch[:0]
-	}
-
-	for {
-		select {
-		case <-ctx.Done():
-			flush()
-			return
-
-		case ev, ok := <-s.in:
-			if !ok {
-				flush()
-				return
-			}
-			if len(batch) == 0 {
-				resetLogFlushTimer(flushTimer, logStreamerBatchAge)
-			}
-			batch = append(batch, ev)
-			if len(batch) >= logStreamerBatchSize {
-				flush()
-				stopLogFlushTimer(flushTimer)
-			}
-
-		case <-flushTimer.C:
-			flush()
-		}
-	}
-}
+// Batching policy. The defaults the OTel SDK ships with are tuned for general
+// telemetry workloads (1 s flush interval); for probe-fire-style events we
+// want a tighter age cap so a single slow callback shows up in the UI within
+// a few hundred ms of the dtor firing.
+const (
+	logExportMaxBatchSize = 512
+	logExportInterval     = 250 * time.Millisecond
+	logMaxQueueSize       = 4096
+)
 
-// export ships one batch as a single OTLP/gRPC ExportLogsServiceRequest. The
-// returned error means the RPC itself failed; a successful RPC with
-// PartialSuccess.RejectedLogRecords > 0 is logged but not returned (the rest of
-// the batch was accepted).
-func (s *logStreamer) export(ctx context.Context, batch []LogEvent) error {
-	req := plogotlp.NewExportRequestFromLogs(s.buildLogs(batch))
-	resp, err := s.client.Export(ctx, req)
+// newLogProvider constructs an OTel logs LoggerProvider that ships records as
+// OTLP/gRPC ExportLogsServiceRequest messages over the supplied connection.
+// The connection is shared with the profile-data path (caller owns it; we
+// only borrow); the SDK's BatchProcessor runs its own goroutines for batching
+// + retry and is torn down by the returned provider's Shutdown.
+func newLogProvider(ctx context.Context, conn *grpc.ClientConn, opts logProviderOptions) (*sdklog.LoggerProvider, error) {
+	exp, err := otlploggrpc.New(ctx, otlploggrpc.WithGRPCConn(conn))
 	if err != nil {
-		return fmt.Errorf("plogotlp export: %w", err)
-	}
-	if ps := resp.PartialSuccess(); ps.RejectedLogRecords() > 0 {
-		s.rejected.Add(uint64(ps.RejectedLogRecords()))
-		log.Warnf("log streamer: server rejected %d/%d records: %s",
-			ps.RejectedLogRecords(), len(batch), ps.ErrorMessage())
+		return nil, fmt.Errorf("create otlploggrpc exporter: %w", err)
 	}
-	return nil
-}
 
-func (s *logStreamer) buildLogs(batch []LogEvent) plog.Logs {
-	logs := plog.NewLogs()
-	rl := logs.ResourceLogs().AppendEmpty()
-	resAttr := rl.Resource().Attributes()
-	resAttr.PutStr("service.name", s.opts.ServiceName)
-	if s.opts.ServiceVersion != "" {
-		resAttr.PutStr("service.version", s.opts.ServiceVersion)
+	attrs := []attribute.KeyValue{
+		attribute.String("service.name", opts.ServiceName),
 	}
-	if s.opts.HostName != "" {
-		resAttr.PutStr("host.name", s.opts.HostName)
+	if opts.ServiceVersion != "" {
+		attrs = append(attrs, attribute.String("service.version", opts.ServiceVersion))
 	}
-
-	sl := rl.ScopeLogs().AppendEmpty()
-	sl.Scope().SetName(logStreamerScopeName)
-
-	records := sl.LogRecords()
-	records.EnsureCapacity(len(batch))
-	for _, ev := range batch {
-		lr := records.AppendEmpty()
-		lr.SetTimestamp(pcommon.Timestamp(ev.TimestampNs))
-		lr.SetObservedTimestamp(pcommon.Timestamp(ev.ObservedTimestampNs))
-		lr.Body().SetStr(ev.Body)
-		a := lr.Attributes()
-		for k, v := range ev.Attributes {
-			if v.IsInt {
-				a.PutInt(k, v.Int)
-			} else {
-				a.PutStr(k, v.Str)
-			}
-		}
+	if opts.HostName != "" {
+		attrs = append(attrs, attribute.String("host.name", opts.HostName))
 	}
-
-	return logs
-}
-
-// stopLogFlushTimer drains the timer channel after Stop so the next Reset
-// starts cleanly.
-func stopLogFlushTimer(t *time.Timer) {
-	if !t.Stop() {
-		select {
-		case <-t.C:
-		default:
-		}
-	}
-}
-
-func resetLogFlushTimer(t *time.Timer, d time.Duration) {
-	stopLogFlushTimer(t)
-	t.Reset(d)
+	res := resource.NewSchemaless(attrs...)
+
+	bp := sdklog.NewBatchProcessor(exp,
+		sdklog.WithMaxQueueSize(logMaxQueueSize),
+		sdklog.WithExportMaxBatchSize(logExportMaxBatchSize),
+		sdklog.WithExportInterval(logExportInterval),
+	)
+
+	return sdklog.NewLoggerProvider(
+		sdklog.WithResource(res),
+		sdklog.WithProcessor(bp),
+	), nil
 }
Original file line number	Diff line number	Diff line change
`@@ -162,6 +162,8 @@ type Flags struct {`
`162`	`162`	EnableOOMProfAllocs bool `default:"false" help:"Enable OOMProf alloc counts."`
`163`	`163`
`164`	`164`	MergeGpuProfiles bool `default:"false" help:"Report GPU kernel timing and GPU PC sampling under a single gpu_time/nanoseconds sample_type, differentiated by a gpu_view label (pc_sample\|kernel_time). When false (the default), they are reported as separate sample_types (gpu_kernel_time/nanoseconds and gpu_pcsample/count) with no per-sample labels."`
	`165`	`+`
	`166`	+ OTLPLogging bool `default:"false" help:"Forward parca-agent's own logrus output to the remote-store as OTLP log records (in addition to local stderr). Requires a remote-store; ignored in offline mode."`
`165`	`167`	`}`
`166`	`168`
`167`	`169`	`type ExitCode int`