Skip to content

Commit 5a40696

Browse files
loispostulaolblak
andauthored
feat(telemetry): add OpenTelemetry tracing for pipeline execution (#8055)
* feat(telemetry): add OpenTelemetry tracing for pipeline execution * refactor: add context.Context to Resource interface * refactor: propagate context.Context into HTTP calls and SCM clients * fix(telemetry): harden OTel tracing against credential leaks and fix span correctness Sanitize error strings recorded on spans to strip URL-embedded credentials. Replace autoexport with explicit OTLP/stdout exporters to reduce binary bloat. Store trace.Tracer on structs instead of calling the global registry per-invocation. Remove OTel types from domain APIs (CrawlerResult, Pipeline). - Add telemetry.SanitizeError and telemetry.RecordSpanError helpers - Replace autoexport with otlptracegrpc/otlptracehttp/stdouttrace - Add tracer field to Engine, Pipeline, AutoDiscovery structs - Replace CrawlerSpanCtx (trace.SpanContext) with CrawlerKind (string) - Fix missing span.SetStatus on target FAILURE path - Fix lone RecordError without SetStatus in pipeline loop (use AddEvent) - Move span start before mutex in runFlowCallbackWithCtx for accurate timing - Rename unused ctx to _ in 98 resource plugin files - Restore invariant comments on unreachable DAG error paths Signed-off-by: Loïs Postula <lois@postu.la> --------- Signed-off-by: Loïs Postula <lois@postu.la> Co-authored-by: Olivier Vernin <olivier@vernin.me>
1 parent 2a69a93 commit 5a40696

File tree

282 files changed

+1993
-880
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

282 files changed

+1993
-880
lines changed

.github/ISSUE_TEMPLATE/1-bug-report.yml

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,48 @@ body:
5858
```
5959
validations:
6060
required: false
61+
- type: textarea
62+
attributes:
63+
label: OpenTelemetry Trace
64+
description: |
65+
Attach a trace file to help diagnose timing and execution issues.
66+
The trace can be loaded in [Grafana Tempo](https://grafana.com/docs/tempo/) or any OTLP-compatible backend.
67+
68+
1. Save this as `otel-collector-config.yaml`:
69+
```yaml
70+
receivers:
71+
otlp:
72+
protocols:
73+
grpc:
74+
endpoint: 0.0.0.0:4317
75+
exporters:
76+
file:
77+
path: /traces/output.json
78+
service:
79+
pipelines:
80+
traces:
81+
receivers: [otlp]
82+
exporters: [file]
83+
```
84+
85+
2. Start the [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/):
86+
```bash
87+
mkdir -p traces
88+
docker run --rm -p 4317:4317 \
89+
-v $(pwd)/otel-collector-config.yaml:/etc/otelcol-contrib/config.yaml \
90+
-v $(pwd)/traces:/traces \
91+
otel/opentelemetry-collector-contrib:latest
92+
```
93+
94+
3. Run updatecli with tracing enabled:
95+
```bash
96+
OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 updatecli diff --config <your-pipeline.yaml>
97+
```
98+
99+
4. Attach the resulting `traces/output.json` file below.
100+
placeholder: Attach traces/output.json file
101+
validations:
102+
required: false
61103
- type: textarea
62104
attributes:
63105
label: Anything else?

.github/workflows/go.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,4 @@ jobs:
105105
IS_TTY: true # https://github.com/ovh/venom#use-venom-in-ci
106106
# Access only to ec2 AMI api in read-only
107107
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
108+
OTEL_TRACES_EXPORTER: console

cmd/root.go

Lines changed: 59 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,23 @@
11
package cmd
22

33
import (
4+
"context"
45
"os"
56
"strings"
7+
"time"
68

79
"github.com/sirupsen/logrus"
10+
"go.opentelemetry.io/otel/attribute"
11+
"go.opentelemetry.io/otel/trace"
812
"golang.org/x/exp/slices"
913

1014
"github.com/updatecli/updatecli/pkg/core/cmdoptions"
1115
"github.com/updatecli/updatecli/pkg/core/log"
1216
"github.com/updatecli/updatecli/pkg/core/registry"
17+
"github.com/updatecli/updatecli/pkg/core/telemetry"
1318
"github.com/updatecli/updatecli/pkg/core/tmp"
1419
"github.com/updatecli/updatecli/pkg/core/udash"
20+
"github.com/updatecli/updatecli/pkg/core/version"
1521
"github.com/updatecli/updatecli/pkg/plugins/utils/ci"
1622

1723
"github.com/updatecli/updatecli/pkg/core/engine"
@@ -104,35 +110,32 @@ func init() {
104110
}
105111

106112
func run(command string) error {
113+
ctx := context.Background()
114+
shutdown := telemetry.Init(ctx, "updatecli", version.Version)
115+
defer func() {
116+
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
117+
defer cancel()
118+
if err := shutdown(shutdownCtx); err != nil {
119+
logrus.Warnf("telemetry shutdown: %v", err)
120+
}
121+
}()
122+
123+
tracer := telemetry.Tracer("updatecli")
124+
e.SetTracer(tracer)
125+
ctx, span := tracer.Start(ctx, "updatecli",
126+
trace.WithAttributes(
127+
attribute.String("updatecli.command", command),
128+
attribute.String("updatecli.version", version.Version),
129+
),
130+
)
131+
defer span.End()
107132

108133
for _, id := range pipelineIds {
109134
e.Options.PipelineIDs = append(e.Options.PipelineIDs, strings.Split(id, ",")...)
110135
}
111136

112-
for _, label := range labels {
113-
labelsArray := strings.Split(label, ",")
114-
115-
initLabels := func() {
116-
if e.Options.Labels == nil {
117-
e.Options.Labels = make(map[string]string)
118-
}
119-
}
120-
121-
for i := range labelsArray {
122-
labelKeyValue := strings.SplitN(labelsArray[i], ":", 2)
123-
if labelKeyValue[0] == "" {
124-
logrus.Warnf("Ignoring label with empty key: %q", labelsArray[i])
125-
continue
126-
}
127-
switch len(labelKeyValue) {
128-
case 2:
129-
initLabels()
130-
e.Options.Labels[labelKeyValue[0]] = labelKeyValue[1]
131-
case 1:
132-
initLabels()
133-
e.Options.Labels[labelKeyValue[0]] = ""
134-
}
135-
}
137+
if parsed := parseLabels(labels); parsed != nil {
138+
e.Options.Labels = parsed
136139
}
137140

138141
switch command {
@@ -147,13 +150,13 @@ func run(command string) error {
147150
}()
148151
}
149152

150-
err := e.Prepare()
153+
err := e.Prepare(ctx)
151154
if err != nil {
152155
logrus.Errorf("%s %s", result.FAILURE, err)
153156
return err
154157
}
155158

156-
err = e.Run()
159+
err = e.Run(ctx)
157160
if err != nil {
158161
logrus.Errorf("%s %s", result.FAILURE, err)
159162
return err
@@ -168,13 +171,13 @@ func run(command string) error {
168171
}()
169172
}
170173

171-
err := e.Prepare()
174+
err := e.Prepare(ctx)
172175
if err != nil {
173176
logrus.Errorf("%s %s", result.FAILURE, err)
174177
return err
175178
}
176179

177-
err = e.Run()
180+
err = e.Run(ctx)
178181
if err != nil {
179182
logrus.Errorf("%s %s", result.FAILURE, err)
180183
return err
@@ -189,7 +192,7 @@ func run(command string) error {
189192
}()
190193
}
191194

192-
err := e.Prepare()
195+
err := e.Prepare(ctx)
193196
if err != nil {
194197
logrus.Errorf("%s %s", result.FAILURE, err)
195198
}
@@ -242,7 +245,7 @@ func run(command string) error {
242245
}
243246

244247
if !showDisablePrepare {
245-
err := e.Prepare()
248+
err := e.Prepare(ctx)
246249
if err != nil {
247250
logrus.Errorf("%s %s", result.FAILURE, err)
248251
return err
@@ -290,6 +293,32 @@ func run(command string) error {
290293
return nil
291294
}
292295

296+
// parseLabels converts a slice of "key:value" or "key" strings into a label map.
297+
// Returns nil when the input produces no valid labels.
298+
func parseLabels(labels []string) map[string]string {
299+
var result map[string]string
300+
301+
for _, label := range labels {
302+
for _, entry := range strings.Split(label, ",") {
303+
kv := strings.SplitN(entry, ":", 2)
304+
if kv[0] == "" {
305+
logrus.Warnf("Ignoring label with empty key: %q", entry)
306+
continue
307+
}
308+
if result == nil {
309+
result = make(map[string]string)
310+
}
311+
if len(kv) == 2 {
312+
result[kv[0]] = kv[1]
313+
} else {
314+
result[kv[0]] = ""
315+
}
316+
}
317+
}
318+
319+
return result
320+
}
321+
293322
func getPolicyFilesFromRegistry() error {
294323

295324
if slices.Equal(policyReferences, []string{""}) || slices.Equal(policyReferences, []string{}) {

go.mod

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,13 @@ require (
7070
github.com/vmware-labs/yaml-jsonpath v0.3.2
7171
github.com/yuin/goldmark v1.7.16
7272
github.com/zclconf/go-cty v1.18.0
73+
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0
74+
go.opentelemetry.io/otel v1.42.0
75+
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.42.0
76+
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.42.0
77+
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.42.0
78+
go.opentelemetry.io/otel/sdk v1.42.0
79+
go.opentelemetry.io/otel/trace v1.42.0
7380
go.yaml.in/yaml/v3 v3.0.4
7481
go.yaml.in/yaml/v4 v4.0.0-rc.4
7582
golang.org/x/exp v0.0.0-20250911091902-df9299821621
@@ -128,6 +135,7 @@ require (
128135
github.com/blang/semver/v4 v4.0.0 // indirect
129136
github.com/buger/jsonparser v1.1.2 // indirect
130137
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
138+
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
131139
github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5 // indirect
132140
github.com/cockroachdb/apd/v3 v3.2.1 // indirect
133141
github.com/containerd/errdefs v1.0.0 // indirect
@@ -159,6 +167,7 @@ require (
159167
github.com/google/gnostic-models v0.7.0 // indirect
160168
github.com/google/go-querystring v1.2.0 // indirect
161169
github.com/google/s2a-go v0.1.9 // indirect
170+
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect
162171
github.com/hashicorp/terraform-config-inspect v0.0.0-20230614215431-f32df32a01cd // indirect
163172
github.com/hashicorp/terraform-svchost v0.1.1 // indirect
164173
github.com/huaweicloud/huaweicloud-sdk-go-v3 v0.1.187 // indirect
@@ -196,19 +205,18 @@ require (
196205
go.mongodb.org/mongo-driver v1.13.1 // indirect
197206
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
198207
go.opentelemetry.io/contrib/detectors/gcp v1.39.0 // indirect
208+
go.opentelemetry.io/contrib/exporters/autoexport v0.67.0 // indirect
199209
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.63.0 // indirect
200-
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect
201-
go.opentelemetry.io/otel v1.40.0 // indirect
202-
go.opentelemetry.io/otel/metric v1.40.0 // indirect
203-
go.opentelemetry.io/otel/sdk v1.40.0 // indirect
204-
go.opentelemetry.io/otel/sdk/metric v1.40.0 // indirect
205-
go.opentelemetry.io/otel/trace v1.40.0 // indirect
206-
go.opentelemetry.io/proto/otlp v1.7.1 // indirect
210+
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.42.0 // indirect
211+
go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.42.0 // indirect
212+
go.opentelemetry.io/otel/metric v1.42.0 // indirect
213+
go.opentelemetry.io/otel/sdk/metric v1.42.0 // indirect
214+
go.opentelemetry.io/proto/otlp v1.9.0 // indirect
207215
go.yaml.in/yaml/v2 v2.4.3 // indirect
208216
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect
209217
google.golang.org/genproto v0.0.0-20260128011058-8636f8732409 // indirect
210-
google.golang.org/genproto/googleapis/api v0.0.0-20260203192932-546029d2fa20 // indirect
211-
google.golang.org/genproto/googleapis/rpc v0.0.0-20260203192932-546029d2fa20 // indirect
218+
google.golang.org/genproto/googleapis/api v0.0.0-20260226221140-a57be14db171 // indirect
219+
google.golang.org/genproto/googleapis/rpc v0.0.0-20260226221140-a57be14db171 // indirect
212220
gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
213221
sigs.k8s.io/controller-runtime v0.23.1 // indirect
214222
sigs.k8s.io/randfill v1.0.0 // indirect

0 commit comments

Comments
 (0)