Skip to content

Commit 452f707

Browse files
authored
refactor: reset metrics during startup (#1246)
count resources during operator start-up and not once is the ToolchainStatus controller --------- Signed-off-by: Xavier Coulon <xcoulon@redhat.com>
1 parent ec20ace commit 452f707

4 files changed

Lines changed: 10 additions & 74 deletions

File tree

cmd/main.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,13 @@ func main() { // nolint:gocyclo
424424
os.Exit(1)
425425
}
426426

427+
setupLog.Info("Updating the metrics")
428+
if err := metrics.Synchronize(ctx, mgr.GetClient(), namespace); err != nil {
429+
setupLog.Error(err, "unable to update the metrics")
430+
os.Exit(1)
431+
}
432+
setupLog.Info("Updated the metrics")
433+
427434
// create or update Toolchain status during the operator deployment
428435
setupLog.Info("Creating/updating the ToolchainStatus resource")
429436
if err := toolchainstatus.CreateOrUpdateResources(ctx, mgr.GetClient(), namespace, toolchainconfig.ToolchainStatusName); err != nil {

controllers/toolchainstatus/toolchainstatus_controller.go

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ const (
6767
hostRoutesTag statusComponentTag = "hostRoutes"
6868
hostOperatorTag statusComponentTag = "hostOperator"
6969
memberConnectionsTag statusComponentTag = "members"
70-
metricsTag statusComponentTag = "metrics"
7170
durationAfterUnready time.Duration = 10 * time.Minute
7271
)
7372

@@ -174,7 +173,6 @@ func (r *Reconciler) aggregateAndUpdateStatus(ctx context.Context, toolchainStat
174173
memberConnectionsTag: r.membersHandleStatus,
175174
registrationServiceTag: r.registrationServiceHandleStatus,
176175
hostRoutesTag: r.hostRoutesHandleStatus,
177-
metricsTag: r.updateMetrics,
178176
}
179177

180178
// track components that are not ready
@@ -259,22 +257,6 @@ func (r *Reconciler) restoredCheck(ctx context.Context, toolchainStatus *toolcha
259257
return nil
260258
}
261259

262-
// synchronizeWithCounter synchronizes the ToolchainStatus with the cached counter
263-
func (r *Reconciler) updateMetrics(ctx context.Context, _ *toolchainv1alpha1.ToolchainStatus) bool {
264-
logger := log.FromContext(ctx)
265-
logger.Info("updating the metrics")
266-
namespace, err := commonconfig.GetWatchNamespace()
267-
if err != nil {
268-
logger.Error(err, "unable to get watch namespace")
269-
return false
270-
}
271-
if err := metrics.Synchronize(ctx, r.Client, namespace); err != nil {
272-
logger.Error(err, "unable to update the metrics")
273-
return false
274-
}
275-
return true
276-
}
277-
278260
// hostOperatorHandleStatus retrieves the Deployment for the host operator and adds its status to ToolchainStatus. It returns false
279261
// if the deployment is not determined to be ready
280262
func (r *Reconciler) hostOperatorHandleStatus(ctx context.Context, toolchainStatus *toolchainv1alpha1.ToolchainStatus) bool {

pkg/metrics/metrics.go

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,6 @@ func Reset() {
134134
allHistograms = nil
135135
initMetrics()
136136
cachedSpaceCounts.Clear()
137-
initialized = false // allow the counters to be initialized again
138137
logger.Info("metrics reset")
139138
}
140139

@@ -257,8 +256,6 @@ func GetSpaceCountPerClusterSnapshot() map[string]int {
257256
return spaceCounts
258257
}
259258

260-
var initialized = false
261-
262259
// Synchronize synchronizes the content of the ToolchainStatus with the cached counter
263260
//
264261
// If the counter hasn't been initialized yet, then it adds the actual cached count
@@ -270,12 +267,6 @@ var initialized = false
270267
// If the cached counter is initialized and ToolchainStatus contains already some numbers
271268
// then it updates the ToolchainStatus numbers with the one taken from the cached counter
272269
func Synchronize(ctx context.Context, cl runtimeclient.Client, namespace string) error {
273-
// skip if cached counters are already initialized
274-
if initialized {
275-
logger.Info("skipping counters initialization because they are already initialized")
276-
return nil
277-
}
278-
279270
logger.Info("initializing counters from resources")
280271
usersignups := &toolchainv1alpha1.UserSignupList{}
281272
if err := cl.List(ctx, usersignups, runtimeclient.InNamespace(namespace)); err != nil {
@@ -312,7 +303,6 @@ func Synchronize(ctx context.Context, cl runtimeclient.Client, namespace string)
312303
for _, space := range spaces.Items {
313304
IncrementSpaceCount(space.Spec.TargetCluster)
314305
}
315-
initialized = true
316306
logger.Info("metrics initialized from UserSignups, MasterUserRecords and Spaces", "userSignups", len(usersignups.Items), "murs", len(murs.Items), "spaces", len(spaces.Items))
317307
return nil
318308
}

pkg/metrics/metrics_test.go

Lines changed: 3 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ import (
1616
"github.com/codeready-toolchain/toolchain-common/pkg/test/space"
1717
"github.com/codeready-toolchain/toolchain-common/pkg/test/usersignup"
1818

19-
"github.com/stretchr/testify/assert"
2019
"github.com/stretchr/testify/require"
2120
runtimeclient "sigs.k8s.io/controller-runtime/pkg/client"
2221
logf "sigs.k8s.io/controller-runtime/pkg/log"
@@ -146,50 +145,20 @@ func TestInitializeCountersFromExistingResources(t *testing.T) {
146145
})
147146
}
148147

149-
func TestShouldNotInitializeAgain(t *testing.T) {
150-
// given
151-
metrics.Reset()
152-
defer metrics.Reset()
153-
154-
initObjs := []runtimeclient.Object{}
155-
for index := range 10 {
156-
initObjs = append(initObjs, usersignup.NewUserSignup(usersignup.WithName(fmt.Sprintf("user-%d", index)), usersignup.WithAnnotation(toolchainv1alpha1.UserSignupActivationCounterAnnotationKey, strconv.Itoa(index+1))))
157-
initObjs = append(initObjs, masteruserrecord.NewMasterUserRecord(t, fmt.Sprintf("user-%d", index), masteruserrecord.TargetCluster("member-1")))
158-
initObjs = append(initObjs, space.NewSpace(test.HostOperatorNs, fmt.Sprintf("user-%d", index), space.WithSpecTargetCluster("member-1")))
159-
}
160-
fakeClient := test.NewFakeClient(t, initObjs...)
161-
err := metrics.Synchronize(context.TODO(), fakeClient, test.HostOperatorNs)
162-
require.NoError(t, err)
163-
164-
// when
165-
err = fakeClient.Create(context.TODO(), masteruserrecord.NewMasterUserRecord(t, "ignored", masteruserrecord.TargetCluster("member-1")))
166-
require.NoError(t, err)
167-
err = fakeClient.Create(context.TODO(), space.NewSpace(test.HostOperatorNs, "ignored", space.WithSpecTargetCluster("member-1")))
168-
require.NoError(t, err)
169-
err = metrics.Synchronize(context.TODO(), fakeClient, test.HostOperatorNs)
170-
171-
// then
172-
require.NoError(t, err)
173-
metricstest.AssertThatCountersAndMetrics(t).
174-
HaveMasterUserRecordsPerDomain(map[string]int{
175-
string(metrics.Internal): 10, // same value
176-
}).
177-
HaveSpacesForCluster("member-1", 10)
178-
}
179-
180148
func TestMultipleExecutionsInParallel(t *testing.T) {
181149
// given
182150
initObjs := []runtimeclient.Object{}
183151
for index := range 10 {
184-
initObjs = append(initObjs, masteruserrecord.NewMasterUserRecord(t, fmt.Sprintf("user-%d", index), masteruserrecord.TargetCluster("member-1")))
152+
initObjs = append(initObjs, usersignup.NewUserSignup(usersignup.WithName(fmt.Sprintf("user-%d", index)), usersignup.WithEmail(fmt.Sprintf("user-%d@redhat.com", index))))
153+
initObjs = append(initObjs, masteruserrecord.NewMasterUserRecord(t, fmt.Sprintf("user-%d", index), masteruserrecord.Email(fmt.Sprintf("user-%d@redhat.com", index)), masteruserrecord.TargetCluster("member-1")))
185154
initObjs = append(initObjs, space.NewSpace(test.HostOperatorNs, fmt.Sprintf("user-%d", index), space.WithSpecTargetCluster("member-1")))
186155
}
187156
fakeClient := test.NewFakeClient(t, initObjs...)
188157
metricstest.ResetCounters(t, fakeClient)
189158
latch := new(sync.WaitGroup)
190159
latch.Add(1)
191160
waitForFinished := new(sync.WaitGroup)
192-
161+
// run 1002 iterations to increment and decrement counters in parallel
193162
for i := range 1002 {
194163
waitForFinished.Add(4) // 4 routines to increment counters
195164
if i < 1000 {
@@ -241,23 +210,11 @@ func TestMultipleExecutionsInParallel(t *testing.T) {
241210
}(i)
242211
}
243212

244-
for range 102 {
245-
waitForFinished.Add(1)
246-
go func() {
247-
defer waitForFinished.Done()
248-
latch.Wait()
249-
err := metrics.Synchronize(context.TODO(), fakeClient, test.HostOperatorNs)
250-
assert.NoError(t, err) // require must only be used in the goroutine running the test function (testifylint)
251-
}()
252-
}
253-
254213
// when
255214
latch.Done()
256215
waitForFinished.Wait()
257-
err := metrics.Synchronize(context.TODO(), fakeClient, test.HostOperatorNs)
258216

259217
// then
260-
require.NoError(t, err)
261218
metricstest.AssertThatCountersAndMetrics(t).
262219
HaveMasterUserRecordsPerDomain(map[string]int{
263220
string(metrics.Internal): 12, // all MURs have `@redhat.com` email address

0 commit comments

Comments
 (0)