Skip to content

Commit 3337cc1

Browse files
khrmclaude
authored andcommitted
test: add e2e test for OpenCensus to OpenTelemetry metrics migration
Adds TestOTelMetrics, a consolidated e2e test for the OC→OTel metrics migration in Pipelines-as-Code (PR #2567). The test scrapes two pods: Controller (app.kubernetes.io/name=controller): - Asserts http_client_* metrics from knative k8s client OTel instrumentation - Asserts go_* runtime metrics - Checks PAC application metrics (pipelines_as_code_*) are absent/present - Asserts old OC metric names are absent Watcher (app.kubernetes.io/name=watcher): - Asserts kn_workqueue_* metrics (watcher uses knative reconciler) - Asserts go_* runtime metrics Verified locally with PAC controller and watcher deployed to kind via ko. Relates to #2567 Co-Authored-By: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
1 parent e1a2f48 commit 3337cc1

1 file changed

Lines changed: 267 additions & 0 deletions

File tree

test/metrics_otel_test.go

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
//go:build e2e
2+
// +build e2e
3+
4+
// Copyright 2026 The Tekton Authors
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
18+
package test
19+
20+
import (
21+
"context"
22+
"fmt"
23+
"strings"
24+
"testing"
25+
"time"
26+
27+
dto "github.com/prometheus/client_model/go"
28+
"github.com/prometheus/common/expfmt"
29+
"github.com/prometheus/common/model"
30+
corev1 "k8s.io/api/core/v1"
31+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
32+
"k8s.io/client-go/kubernetes"
33+
"k8s.io/client-go/tools/clientcmd"
34+
)
35+
36+
const (
37+
pacNamespace = "pipelines-as-code"
38+
pacMetricsPort = "9090"
39+
pacControllerSelector = "app.kubernetes.io/name=controller,app.kubernetes.io/part-of=pipelines-as-code"
40+
pacWatcherSelector = "app.kubernetes.io/name=watcher,app.kubernetes.io/part-of=pipelines-as-code"
41+
)
42+
43+
// pacKubeClient builds a kubernetes client from the default kubeconfig.
44+
func pacKubeClient(t *testing.T) kubernetes.Interface {
45+
t.Helper()
46+
rules := clientcmd.NewDefaultClientConfigLoadingRules()
47+
cfg, err := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(rules, &clientcmd.ConfigOverrides{}).ClientConfig()
48+
if err != nil {
49+
t.Fatalf("Failed to build kubeconfig: %v", err)
50+
}
51+
return kubernetes.NewForConfigOrDie(cfg)
52+
}
53+
54+
// scrapePACPodMetrics scrapes /metrics from the first Running/Ready pod
55+
// matching labelSelector via the Kubernetes API proxy. Returns an error
56+
// so callers can retry on transient failures without aborting the test.
57+
func scrapePACPodMetrics(ctx context.Context, kubeClient kubernetes.Interface, labelSelector string) (map[string]*dto.MetricFamily, error) {
58+
pods, err := kubeClient.CoreV1().Pods(pacNamespace).List(ctx, metav1.ListOptions{
59+
LabelSelector: labelSelector,
60+
})
61+
if err != nil {
62+
return nil, err
63+
}
64+
65+
var podName string
66+
for _, pod := range pods.Items {
67+
if pod.Status.Phase != corev1.PodRunning {
68+
continue
69+
}
70+
podReady := len(pod.Status.ContainerStatuses) > 0
71+
for _, cs := range pod.Status.ContainerStatuses {
72+
if !cs.Ready {
73+
podReady = false
74+
break
75+
}
76+
}
77+
if podReady {
78+
podName = pod.Name
79+
break
80+
}
81+
}
82+
if podName == "" {
83+
return nil, fmt.Errorf("no Running/Ready PAC pod found for selector %q in namespace %s", labelSelector, pacNamespace)
84+
}
85+
86+
result := kubeClient.CoreV1().RESTClient().Get().
87+
Resource("pods").
88+
Name(podName + ":" + pacMetricsPort).
89+
Namespace(pacNamespace).
90+
SubResource("proxy").
91+
Suffix("metrics").
92+
Do(ctx)
93+
94+
body, err := result.Raw()
95+
if err != nil {
96+
return nil, err
97+
}
98+
99+
parser := expfmt.NewTextParser(model.LegacyValidation)
100+
families, err := parser.TextToMetricFamilies(strings.NewReader(string(body)))
101+
if err != nil {
102+
return nil, err
103+
}
104+
return families, nil
105+
}
106+
107+
// waitForControllerMetrics polls the PAC controller pod until the named
108+
// metric appears. Transient errors are logged and retried until timeout.
109+
func waitForControllerMetrics(ctx context.Context, t *testing.T, kubeClient kubernetes.Interface, metricName string, timeout time.Duration) map[string]*dto.MetricFamily {
110+
t.Helper()
111+
return waitForPACPodMetric(ctx, t, kubeClient, pacControllerSelector, metricName, timeout)
112+
}
113+
114+
// waitForWatcherMetrics polls the PAC watcher pod until the named metric
115+
// appears. Transient errors are logged and retried until timeout.
116+
func waitForWatcherMetrics(ctx context.Context, t *testing.T, kubeClient kubernetes.Interface, metricName string, timeout time.Duration) map[string]*dto.MetricFamily {
117+
t.Helper()
118+
return waitForPACPodMetric(ctx, t, kubeClient, pacWatcherSelector, metricName, timeout)
119+
}
120+
121+
// waitForPACPodMetric is the shared polling implementation used by
122+
// waitForControllerMetrics and waitForWatcherMetrics.
123+
func waitForPACPodMetric(ctx context.Context, t *testing.T, kubeClient kubernetes.Interface, labelSelector, metricName string, timeout time.Duration) map[string]*dto.MetricFamily {
124+
t.Helper()
125+
ctx, cancel := context.WithTimeout(ctx, timeout)
126+
defer cancel()
127+
for {
128+
families, err := scrapePACPodMetrics(ctx, kubeClient, labelSelector)
129+
if err == nil {
130+
if _, ok := families[metricName]; ok {
131+
return families
132+
}
133+
} else {
134+
t.Logf("Retrying metrics scrape (%s): %v", labelSelector, err)
135+
}
136+
select {
137+
case <-ctx.Done():
138+
t.Fatalf("Timed out waiting for metric %q (selector=%s, waited %v): %v", metricName, labelSelector, timeout, ctx.Err())
139+
return nil
140+
case <-time.After(5 * time.Second):
141+
}
142+
}
143+
}
144+
145+
// TestOthersOTelMetricsController verifies that the PAC controller pod exposes the
146+
// expected OTel metric families after the OC→OTel migration (PR #2567):
147+
// - http_client_* and kn_k8s_client_* (knative k8s client OTel instrumentation)
148+
// - go_* runtime metrics
149+
// - PAC application metrics logged (appear only after first PipelineRun)
150+
// - Old OpenCensus metric names absent
151+
func TestOthersOTelMetricsController(t *testing.T) {
152+
ctx := context.Background()
153+
kubeClient := pacKubeClient(t)
154+
155+
t.Log("Waiting for PAC controller metrics (http_client_request_duration_seconds)")
156+
families := waitForControllerMetrics(ctx, t, kubeClient, "http_client_request_duration_seconds", 2*time.Minute)
157+
t.Logf("Scraped %d metric families from PAC controller", len(families))
158+
159+
tests := []struct {
160+
name string
161+
prefix string
162+
errMsg string
163+
}{
164+
{
165+
name: "http_client_prefix",
166+
prefix: "http_client_",
167+
errMsg: "Expected at least one http_client_* metric from knative k8s client instrumentation, found none",
168+
},
169+
{
170+
name: "kn_k8s_client_prefix",
171+
prefix: "kn_k8s_client_",
172+
errMsg: "Expected at least one kn_k8s_client_* metric from knative k8s client instrumentation, found none",
173+
},
174+
{
175+
name: "go_runtime_prefix",
176+
prefix: "go_",
177+
errMsg: "Expected standard go_* runtime metrics, found none",
178+
},
179+
}
180+
for _, tt := range tests {
181+
t.Run(tt.name, func(t *testing.T) {
182+
for name := range families {
183+
if strings.HasPrefix(name, tt.prefix) {
184+
return
185+
}
186+
}
187+
t.Error(tt.errMsg)
188+
})
189+
}
190+
191+
// Old OC metric names must be absent.
192+
// TODO: Remove in a future release once no OC-based release is supported.
193+
for name := range families {
194+
for _, prefix := range []string{"pipelines_as_code/", "tekton_pipelines_as_code_"} {
195+
if strings.HasPrefix(name, prefix) {
196+
t.Errorf("Old OC metric %q still present; expected removal after OTel migration", name)
197+
}
198+
}
199+
}
200+
201+
// PAC application metrics — counters only increment after the first PipelineRun
202+
// is processed; log presence without failing on a fresh install.
203+
appMetrics := []string{
204+
"pipelines_as_code_pipelinerun_count_total",
205+
"pipelines_as_code_pipelinerun_duration_seconds_sum_total",
206+
"pipelines_as_code_running_pipelineruns_count",
207+
"pipelines_as_code_git_provider_api_request_count_total",
208+
}
209+
for _, m := range appMetrics {
210+
if _, ok := families[m]; ok {
211+
t.Logf("%s found", m)
212+
} else {
213+
t.Logf("%s not yet present (no PipelineRuns processed yet)", m)
214+
}
215+
}
216+
}
217+
218+
// TestOthersOTelMetricsWatcher verifies that the PAC watcher pod exposes the
219+
// expected OTel metric families after the OC→OTel migration (PR #2567):
220+
// - kn_workqueue_* (knative reconciler workqueue)
221+
// - http_client_* and kn_k8s_client_* (knative k8s client OTel instrumentation)
222+
// - go_* runtime metrics
223+
func TestOthersOTelMetricsWatcher(t *testing.T) {
224+
ctx := context.Background()
225+
kubeClient := pacKubeClient(t)
226+
227+
t.Log("Waiting for PAC watcher metrics (go_goroutines)")
228+
families := waitForWatcherMetrics(ctx, t, kubeClient, "go_goroutines", 2*time.Minute)
229+
t.Logf("Scraped %d metric families from PAC watcher", len(families))
230+
231+
tests := []struct {
232+
name string
233+
prefix string
234+
errMsg string
235+
}{
236+
{
237+
name: "kn_workqueue_prefix",
238+
prefix: "kn_workqueue_",
239+
errMsg: "Expected at least one kn_workqueue_* metric on the PAC watcher, found none",
240+
},
241+
{
242+
name: "http_client_prefix",
243+
prefix: "http_client_",
244+
errMsg: "Expected at least one http_client_* metric on the PAC watcher, found none",
245+
},
246+
{
247+
name: "kn_k8s_client_prefix",
248+
prefix: "kn_k8s_client_",
249+
errMsg: "Expected at least one kn_k8s_client_* metric on the PAC watcher, found none",
250+
},
251+
{
252+
name: "go_runtime_prefix",
253+
prefix: "go_",
254+
errMsg: "Expected standard go_* runtime metrics on PAC watcher, found none",
255+
},
256+
}
257+
for _, tt := range tests {
258+
t.Run(tt.name, func(t *testing.T) {
259+
for name := range families {
260+
if strings.HasPrefix(name, tt.prefix) {
261+
return
262+
}
263+
}
264+
t.Error(tt.errMsg)
265+
})
266+
}
267+
}

0 commit comments

Comments
 (0)