From ff03c82d7da62c004130cbfd43f93d57ad173717 Mon Sep 17 00:00:00 2001 From: Chris Werner Rau Date: Tue, 31 Mar 2026 15:15:06 +0200 Subject: [PATCH 1/6] fix: etcd fails due to unaccounted space usage (e.g. WAL) This way the real volume usage is looked at and used for resizing. --- .golangci.yaml | 2 + go.mod | 54 +-- go.sum | 106 +++--- pkg/hostedcontrolplane/controller.go | 7 +- pkg/hostedcontrolplane/controller_test.go | 1 + .../lifecycle_phases_test.go | 27 +- pkg/operator/operator.go | 2 + .../etcd_cluster/etcd_client/etcd_client.go | 56 +-- pkg/reconcilers/etcd_cluster/reconciler.go | 114 +++++- .../etcd_cluster/reconciler_test.go | 133 +++++-- .../etcd_cluster/volume_stats/volume_stats.go | 147 ++++++++ .../volume_stats/volume_stats_test.go | 346 ++++++++++++++++++ test/etcd_stubs.go | 24 +- 13 files changed, 851 insertions(+), 168 deletions(-) create mode 100644 pkg/reconcilers/etcd_cluster/volume_stats/volume_stats.go create mode 100644 pkg/reconcilers/etcd_cluster/volume_stats/volume_stats_test.go diff --git a/.golangci.yaml b/.golangci.yaml index 70b03c0..92b3ee1 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -144,6 +144,8 @@ linters: alias: kubeletv1beta1 - pkg: k8s.io/kubelet/config/v1beta1 alias: kubelettypes + - pkg: k8s.io/kubelet/pkg/apis/stats/v1alpha1 + alias: kubeletstatsv1alpha1 - pkg: k8s.io/kubernetes/cmd/kube-controller-manager/names alias: kubenames - pkg: k8s.io/client-go/tools/clientcmd/api/v1 diff --git a/go.mod b/go.mod index 0dac825..6799bbc 100644 --- a/go.mod +++ b/go.mod @@ -11,40 +11,40 @@ require ( github.com/aws/smithy-go/tracing/smithyoteltracing v1.0.13 github.com/blang/semver/v4 v4.0.0 github.com/caarlos0/env/v6 v6.10.1 - github.com/cert-manager/cert-manager v1.19.3 + github.com/cert-manager/cert-manager v1.20.1 github.com/cilium/cilium v1.19.3 - github.com/coredns/corefile-migration v1.0.30 + github.com/coredns/corefile-migration v1.0.31 github.com/go-logr/logr v1.4.3 github.com/onsi/gomega v1.39.1 github.com/robfig/cron/v3 v3.0.1 - github.com/samber/lo v1.52.0 - go.etcd.io/etcd/api/v3 v3.6.8 - go.etcd.io/etcd/client/v3 v3.6.8 - go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.65.0 - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 + github.com/samber/lo v1.53.0 + go.etcd.io/etcd/api/v3 v3.6.9 + go.etcd.io/etcd/client/v3 v3.6.9 + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.67.0 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 go.opentelemetry.io/otel v1.43.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.40.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.40.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.42.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.42.0 go.opentelemetry.io/otel/sdk v1.43.0 go.opentelemetry.io/otel/trace v1.43.0 go.uber.org/zap v1.27.1 go4.org v0.0.0-20260112195520-a5071408f32f - golang.org/x/text v0.33.0 - google.golang.org/grpc v1.80.0 + golang.org/x/text v0.35.0 + google.golang.org/grpc v1.79.3 k8s.io/api v0.35.3 k8s.io/apiextensions-apiserver v0.35.3 k8s.io/apimachinery v0.35.3 k8s.io/apiserver v0.35.3 k8s.io/client-go v0.35.3 - k8s.io/cluster-bootstrap v0.35.0 + k8s.io/cluster-bootstrap v0.35.3 k8s.io/component-base v0.35.3 - k8s.io/kube-proxy v0.35.0 - k8s.io/kubelet v0.35.0 - k8s.io/kubernetes v1.35.1 + k8s.io/kube-proxy v0.35.3 + k8s.io/kubelet v0.35.3 + k8s.io/kubernetes v1.35.3 k8s.io/utils v0.0.0-20260319190234-28399d86e0b5 - sigs.k8s.io/cluster-api v1.12.2 + sigs.k8s.io/cluster-api v1.12.4 sigs.k8s.io/controller-runtime v0.23.3 - sigs.k8s.io/gateway-api v1.4.1 + sigs.k8s.io/gateway-api v1.5.1 ) require ( @@ -122,7 +122,7 @@ require ( github.com/google/uuid v1.6.0 // indirect github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/json-iterator/go v1.1.12 // indirect @@ -160,7 +160,7 @@ require ( github.com/vishvananda/netns v0.0.5 // indirect github.com/x448/float16 v0.8.4 // indirect github.com/xlab/treeprint v1.2.0 // indirect - go.etcd.io/etcd/client/pkg/v3 v3.6.8 // indirect + go.etcd.io/etcd/client/pkg/v3 v3.6.9 // indirect go.mongodb.org/mongo-driver v1.17.9 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect go.opentelemetry.io/otel/metric v1.43.0 // indirect @@ -170,18 +170,18 @@ require ( go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect go4.org/netipx v0.0.0-20231129151722-fdeea329fbba // indirect - golang.org/x/crypto v0.47.0 // indirect + golang.org/x/crypto v0.49.0 // indirect golang.org/x/exp v0.0.0-20260112195511-716be5621a96 // indirect - golang.org/x/net v0.49.0 // indirect + golang.org/x/net v0.52.0 // indirect golang.org/x/oauth2 v0.35.0 // indirect - golang.org/x/sync v0.19.0 // indirect + golang.org/x/sync v0.20.0 // indirect golang.org/x/sys v0.42.0 // indirect - golang.org/x/term v0.39.0 // indirect + golang.org/x/term v0.41.0 // indirect golang.org/x/time v0.14.0 // indirect - golang.org/x/tools v0.41.0 // indirect + golang.org/x/tools v0.42.0 // indirect gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20260203192932-546029d2fa20 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20260203192932-546029d2fa20 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260319201613-d00831a3d3e7 // indirect google.golang.org/protobuf v1.36.11 // indirect gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/go-jose/go-jose.v2 v2.6.3 // indirect @@ -193,7 +193,7 @@ require ( k8s.io/controller-manager v0.35.0 // indirect k8s.io/cri-api v0.35.0 // indirect k8s.io/cri-client v0.35.0 // indirect - k8s.io/klog/v2 v2.130.1 // indirect + k8s.io/klog/v2 v2.140.0 // indirect k8s.io/kms v0.35.3 // indirect k8s.io/kube-openapi v0.0.0-20260127142750-a19766b6e2d4 // indirect sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.34.0 // indirect diff --git a/go.sum b/go.sum index 355e985..7b60a6d 100644 --- a/go.sum +++ b/go.sum @@ -14,10 +14,8 @@ github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE= github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI= github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU= -github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww= github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= -github.com/Masterminds/sprig v2.22.0+incompatible h1:z4yfnGrZ7netVz+0EDJ0Wi+5VZCSYp4Z0m2dk6cEM60= github.com/Masterminds/sprig/v3 v3.3.0 h1:mQh0Yrg1XPo6vjYXgtf5OtijNAKJRNcTdOOGZe3tPhs= github.com/Masterminds/sprig/v3 v3.3.0/go.mod h1:Zy1iXRYNqNLUolqCpL4uhk6SHUMAOSCzdgBfDb35Lz0= github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= @@ -76,8 +74,8 @@ github.com/caarlos0/env/v6 v6.10.1 h1:t1mPSxNpei6M5yAeu1qtRdPAK29Nbcf/n3G7x+b3/I github.com/caarlos0/env/v6 v6.10.1/go.mod h1:hvp/ryKXKipEkcuYjs9mI4bBCg+UI0Yhgm5Zu0ddvwc= github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= -github.com/cert-manager/cert-manager v1.19.3 h1:3d0Nk/HO3BOmAdBJNaBh+6YgaO3Ciey3xCpOjiX5Obs= -github.com/cert-manager/cert-manager v1.19.3/go.mod h1:e9NzLtOKxTw7y99qLyWGmPo6mrC1Nh0EKKcMkRfK+GE= +github.com/cert-manager/cert-manager v1.20.1 h1:99ExHJu5TPp1V92AvvE4oY6BkOSyJiWLxxMkbqbdGaY= +github.com/cert-manager/cert-manager v1.20.1/go.mod h1:ut67FnggYJJqAdDWLhSPnj10P06QwbNU88RYNh9MvMc= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cilium/cilium v1.19.3 h1:foJrHPk45HwshOd8Qf/kptf9JxPfNySkIDKsetZa9+Y= @@ -97,8 +95,8 @@ github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5/go.mod h1:KdCmV+x/Buvy github.com/coredns/caddy v1.1.1/go.mod h1:A6ntJQlAWuQfFlsd9hvigKbo2WS0VUs2l1e2F+BawD4= github.com/coredns/caddy v1.1.4 h1:+Lls5xASB0QsA2jpCroCOwpPlb5GjIGlxdjXxdX0XVo= github.com/coredns/caddy v1.1.4/go.mod h1:A6ntJQlAWuQfFlsd9hvigKbo2WS0VUs2l1e2F+BawD4= -github.com/coredns/corefile-migration v1.0.30 h1:ljZNPGgna+4yKv81gfkvkgLEWdtz0NjBR1glaiPI140= -github.com/coredns/corefile-migration v1.0.30/go.mod h1:56DPqONc3njpVPsdilEnfijCwNGC3/kTJLl7i7SPavY= +github.com/coredns/corefile-migration v1.0.31 h1:f7WGhY8M2Jn8P2dVO0p7wSQ1QKsMARl6WEyUjCb/V38= +github.com/coredns/corefile-migration v1.0.31/go.mod h1:56DPqONc3njpVPsdilEnfijCwNGC3/kTJLl7i7SPavY= github.com/coreos/go-semver v0.3.1 h1:yi21YpKnrx1gt5R+la8n5WgS0kCrsPp33dmEyHReZr4= github.com/coreos/go-semver v0.3.1/go.mod h1:irMmmIw/7yzSRPWryHsK7EYSg09caPQL03VsM8rvUec= github.com/coreos/go-systemd/v22 v22.7.0 h1:LAEzFkke61DFROc7zNLX/WA2i5J8gYqe0rSj9KI28KA= @@ -231,8 +229,8 @@ github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.3 h1:B+8ClL/kCQkRiU82d9xajR github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.3/go.mod h1:NbCUVmiS4foBGBHOYlCT25+YmGpJ32dZPi75pGEUpj4= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7 h1:X+2YciYSxvMQK0UZ7sg45ZVabVZBeBuvMkmuI2V3Fak= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.7/go.mod h1:lW34nIZuQ8UDPdkon5fmfp2l3+ZkQ2me/+oecHYLOII= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI= @@ -317,8 +315,8 @@ github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/sagikazarmark/locafero v0.12.0 h1:/NQhBAkUb4+fH1jivKHWusDYFjMOOKU88eegjfxfHb4= github.com/sagikazarmark/locafero v0.12.0/go.mod h1:sZh36u/YSZ918v0Io+U9ogLYQJ9tLLBmM4eneO6WwsI= -github.com/samber/lo v1.52.0 h1:Rvi+3BFHES3A8meP33VPAxiBZX/Aws5RxrschYGjomw= -github.com/samber/lo v1.52.0/go.mod h1:4+MXEGsJzbKGaUEQFKBq2xtfuznW9oz/WrgyzMzRoM0= +github.com/samber/lo v1.53.0 h1:t975lj2py4kJPQ6haz1QMgtId2gtmfktACxIXArw3HM= +github.com/samber/lo v1.53.0/go.mod h1:4+MXEGsJzbKGaUEQFKBq2xtfuznW9oz/WrgyzMzRoM0= github.com/sasha-s/go-deadlock v0.3.6 h1:TR7sfOnZ7x00tWPfD397Peodt57KzMDo+9Ae9rMiUmw= github.com/sasha-s/go-deadlock v0.3.6/go.mod h1:CUqNyyvMxTyjFqDT7MRg9mb4Dv/btmGTqSR+rky/UXo= github.com/sergi/go-diff v1.4.0 h1:n/SP9D5ad1fORl+llWyN+D6qoUETXNZARKjyY2/KVCw= @@ -370,12 +368,12 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.etcd.io/bbolt v1.4.3 h1:dEadXpI6G79deX5prL3QRNP6JB8UxVkqo4UPnHaNXJo= go.etcd.io/bbolt v1.4.3/go.mod h1:tKQlpPaYCVFctUIgFKFnAlvbmB3tpy1vkTnDWohtc0E= -go.etcd.io/etcd/api/v3 v3.6.8 h1:gqb1VN92TAI6G2FiBvWcqKtHiIjr4SU2GdXxTwyexbM= -go.etcd.io/etcd/api/v3 v3.6.8/go.mod h1:qyQj1HZPUV3B5cbAL8scG62+fyz5dSxxu0w8pn28N6Q= -go.etcd.io/etcd/client/pkg/v3 v3.6.8 h1:Qs/5C0LNFiqXxYf2GU8MVjYUEXJ6sZaYOz0zEqQgy50= -go.etcd.io/etcd/client/pkg/v3 v3.6.8/go.mod h1:GsiTRUZE2318PggZkAo6sWb6l8JLVrnckTNfbG8PWtw= -go.etcd.io/etcd/client/v3 v3.6.8 h1:B3G76t1UykqAOrbio7s/EPatixQDkQBevN8/mwiplrY= -go.etcd.io/etcd/client/v3 v3.6.8/go.mod h1:MVG4BpSIuumPi+ELF7wYtySETmoTWBHVcDoHdVupwt8= +go.etcd.io/etcd/api/v3 v3.6.9 h1:UA7iKfEW1AzgihcBSGXci2kDGQiokSq41F9HMCI/RTI= +go.etcd.io/etcd/api/v3 v3.6.9/go.mod h1:csEk/qTfxKL36NqJdU15Tgtl65A8dyEY2BYo7PRsIwk= +go.etcd.io/etcd/client/pkg/v3 v3.6.9 h1:T8nuk8Lz64C+Hzb0coBFLMSlVSQZBpAtFk46swdM1DA= +go.etcd.io/etcd/client/pkg/v3 v3.6.9/go.mod h1:WEy3PpwbbEBVRdh1NVJYsuUe/8eyI21PNJRazeD8z/Y= +go.etcd.io/etcd/client/v3 v3.6.9 h1:3X555hQXmhRr27O37wls53g68CpUiPOiHXrZfz2Al+o= +go.etcd.io/etcd/client/v3 v3.6.9/go.mod h1:KO7H1HLYh1qaljuVZJQwBFk1lRce6pJzt+C81GEnrlM= go.etcd.io/etcd/pkg/v3 v3.6.5 h1:byxWB4AqIKI4SBmquZUG1WGtvMfMaorXFoCcFbVeoxM= go.etcd.io/etcd/pkg/v3 v3.6.5/go.mod h1:uqrXrzmMIJDEy5j00bCqhVLzR5jEJIwDp5wTlLwPGOU= go.etcd.io/etcd/server/v3 v3.6.5 h1:4RbUb1Bd4y1WkBHmuF+cZII83JNQMuNXzyjwigQ06y0= @@ -386,16 +384,16 @@ go.mongodb.org/mongo-driver v1.17.9 h1:IexDdCuuNJ3BHrELgBlyaH9p60JXAvdzWR128q+U5 go.mongodb.org/mongo-driver v1.17.9/go.mod h1:LlOhpH5NUEfhxcAwG0UEkMqwYcc4JU18gtCdGudk/tQ= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.65.0 h1:XmiuHzgJt067+a6kwyAzkhXooYVv3/TOw9cM2VfJgUM= -go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.65.0/go.mod h1:KDgtbWKTQs4bM+VPUr6WlL9m/WXcmkCcBlIzqxPGzmI= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 h1:7iP2uCb7sGddAr30RRS6xjKy7AZ2JtTOPA3oolgVSw8= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0/go.mod h1:c7hN3ddxs/z6q9xwvfLPk+UHlWRQyaeR1LdgfL/66l0= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.67.0 h1:yI1/OhfEPy7J9eoa6Sj051C7n5dvpj0QX8g4sRchg04= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.67.0/go.mod h1:NoUCKYWK+3ecatC4HjkRktREheMeEtrXoQxrqYFeHSc= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 h1:OyrsyzuttWTSur2qN/Lm0m2a8yqyIjUVBZcxFPuXq2o= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0/go.mod h1:C2NGBr+kAB4bk3xtMXfZ94gqFDtg/GkI7e9zqGh5Beg= go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I= go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.40.0 h1:QKdN8ly8zEMrByybbQgv8cWBcdAarwmIPZ6FThrWXJs= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.40.0/go.mod h1:bTdK1nhqF76qiPoCCdyFIV+N/sRHYXYCTQc+3VCi3MI= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.40.0 h1:DvJDOPmSWQHWywQS6lKL+pb8s3gBLOZUtw4N+mavW1I= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.40.0/go.mod h1:EtekO9DEJb4/jRyN4v4Qjc2yA7AtfCBuz2FynRUWTXs= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.42.0 h1:THuZiwpQZuHPul65w4WcwEnkX2QIuMT+UFoOrygtoJw= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.42.0/go.mod h1:J2pvYM5NGHofZ2/Ru6zw/TNWnEQp5crgyDeSrYpXkAw= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.42.0 h1:zWWrB1U6nqhS/k6zYB74CjRpuiitRtLLi68VcgmOEto= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.42.0/go.mod h1:2qXPNBX1OVRC0IwOnfo1ljoid+RD0QK3443EaqVlsOU= go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM= go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY= go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg= @@ -425,61 +423,61 @@ go4.org/netipx v0.0.0-20231129151722-fdeea329fbba/go.mod h1:PLyyIXexvUFg3Owu6p/W golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.47.0 h1:V6e3FRj+n4dbpw86FJ8Fv7XVOql7TEwpHapKoMJ/GO8= -golang.org/x/crypto v0.47.0/go.mod h1:ff3Y9VzzKbwSSEzWqJsJVBnWmRwRSHt/6Op5n9bQc4A= +golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4= +golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA= golang.org/x/exp v0.0.0-20260112195511-716be5621a96 h1:Z/6YuSHTLOHfNFdb8zVZomZr7cqNgTJvA8+Qz75D8gU= golang.org/x/exp v0.0.0-20260112195511-716be5621a96/go.mod h1:nzimsREAkjBCIEFtHiYkrJyT+2uy9YZJB7H1k68CXZU= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.32.0 h1:9F4d3PHLljb6x//jOyokMv3eX+YDeepZSEo3mFJy93c= -golang.org/x/mod v0.32.0/go.mod h1:SgipZ/3h2Ci89DlEtEXWUk/HteuRin+HHhN+WbNhguU= +golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8= +golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.49.0 h1:eeHFmOGUTtaaPSGNmjBKpbng9MulQsJURQUAfUwY++o= -golang.org/x/net v0.49.0/go.mod h1:/ysNB2EvaqvesRkuLAyjI1ycPZlQHM3q01F02UY/MV8= +golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= +golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ= golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= -golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= -golang.org/x/term v0.39.0 h1:RclSuaJf32jOqZz74CkPA9qFuVTX7vhLlpfj/IGWlqY= -golang.org/x/term v0.39.0/go.mod h1:yxzUCTP/U+FzoxfdKmLaA0RV1WgE0VY7hXBwKtY/4ww= +golang.org/x/term v0.41.0 h1:QCgPso/Q3RTJx2Th4bDLqML4W6iJiaXFq2/ftQF13YU= +golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.33.0 h1:B3njUFyqtHDUI5jMn1YIr5B0IE2U0qck04r6d4KPAxE= -golang.org/x/text v0.33.0/go.mod h1:LuMebE6+rBincTi9+xWTY8TztLzKHc/9C1uBCG27+q8= +golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8= +golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA= golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.41.0 h1:a9b8iMweWG+S0OBnlU36rzLp20z1Rp10w+IY2czHTQc= -golang.org/x/tools v0.41.0/go.mod h1:XSY6eDqxVNiYgezAVqqCeihT4j1U2CCsqvH3WhQpnlg= +golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k= +golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gomodules.xyz/jsonpatch/v2 v2.5.0 h1:JELs8RLM12qJGXU4u/TO3V25KW8GreMKl9pdkk14RM0= gomodules.xyz/jsonpatch/v2 v2.5.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= -gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= -gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= -google.golang.org/genproto/googleapis/api v0.0.0-20260203192932-546029d2fa20 h1:7ei4lp52gK1uSejlA8AZl5AJjeLUOHBQscRQZUgAcu0= -google.golang.org/genproto/googleapis/api v0.0.0-20260203192932-546029d2fa20/go.mod h1:ZdbssH/1SOVnjnDlXzxDHK2MCidiqXtbYccJNzNYPEE= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260203192932-546029d2fa20 h1:Jr5R2J6F6qWyzINc+4AM8t5pfUz6beZpHp678GNrMbE= -google.golang.org/genproto/googleapis/rpc v0.0.0-20260203192932-546029d2fa20/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ= -google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM= -google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4= +gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= +google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57 h1:JLQynH/LBHfCTSbDWl+py8C+Rg/k1OVH3xfcaiANuF0= +google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57/go.mod h1:kSJwQxqmFXeo79zOmbrALdflXQeAYcUbgS7PbpMknCY= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260319201613-d00831a3d3e7 h1:ndE4FoJqsIceKP2oYSnUZqhTdYufCYYkqwtFzfrhI7w= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260319201613-d00831a3d3e7/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= +google.golang.org/grpc v1.79.3 h1:sybAEdRIEtvcD68Gx7dmnwjZKlyfuc61Dyo9pGXXkKE= +google.golang.org/grpc v1.79.3/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -522,8 +520,8 @@ k8s.io/cri-api v0.35.0 h1:fxLSKyJHqbyCSUsg1rW4DRpmjSEM/elZ1GXzYTSLoDQ= k8s.io/cri-api v0.35.0/go.mod h1:Cnt29u/tYl1Se1cBRL30uSZ/oJ5TaIp4sZm1xDLvcMc= k8s.io/cri-client v0.35.0 h1:U1K4bteO93yioUS38804ybN+kWaon9zrzVtB37I3fCs= k8s.io/cri-client v0.35.0/go.mod h1:XG5GkuuSpxvungsJVzW58NyWBoGSQhMMJmE5c66m9N8= -k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= -k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= +k8s.io/klog/v2 v2.140.0 h1:Tf+J3AH7xnUzZyVVXhTgGhEKnFqye14aadWv7bzXdzc= +k8s.io/klog/v2 v2.140.0/go.mod h1:o+/RWfJ6PwpnFn7OyAG3QnO47BFsymfEfrz6XyYSSp0= k8s.io/kms v0.35.3 h1:jaxr/7dNqcztGldnfCEZg8DegEOnHV6cfoBC2ACMWEg= k8s.io/kms v0.35.3/go.mod h1:VT+4ekZAdrZDMgShK37vvlyHUVhwI9t/9tvh0AyCWmQ= k8s.io/kube-openapi v0.0.0-20260127142750-a19766b6e2d4 h1:HhDfevmPS+OalTjQRKbTHppRIz01AWi8s45TMXStgYY= @@ -532,18 +530,18 @@ k8s.io/kube-proxy v0.35.0 h1:erv2wYmGZ6nyu/FtmaIb+ORD3q2rfZ4Fhn7VXs/8cPQ= k8s.io/kube-proxy v0.35.0/go.mod h1:bd9lpN3uLLOOWc/CFZbkPEi9DTkzQQymbE8FqSU4bWk= k8s.io/kubelet v0.35.0 h1:8cgJHCBCKLYuuQ7/Pxb/qWbJfX1LXIw7790ce9xHq7c= k8s.io/kubelet v0.35.0/go.mod h1:ciRzAXn7C4z5iB7FhG1L2CGPPXLTVCABDlbXt/Zz8YA= -k8s.io/kubernetes v1.35.1 h1:qmjXSCDPnOuXPuJb5pv+eLzpXhhlD09Jid1pG/OvFU8= -k8s.io/kubernetes v1.35.1/go.mod h1:AaPpCpiS8oAqRbEwpY5r3RitLpwpVp5lVXKFkJril58= +k8s.io/kubernetes v1.35.3 h1:J3dk2wybKFHwoH4eydDUGHJo4HAD+9CZbSlvk/YQuao= +k8s.io/kubernetes v1.35.3/go.mod h1:AaPpCpiS8oAqRbEwpY5r3RitLpwpVp5lVXKFkJril58= k8s.io/utils v0.0.0-20260319190234-28399d86e0b5 h1:kBawHLSnx/mYHmRnNUf9d4CpjREbeZuxoSGOX/J+aYM= k8s.io/utils v0.0.0-20260319190234-28399d86e0b5/go.mod h1:xDxuJ0whA3d0I4mf/C4ppKHxXynQ+fxnkmQH0vTHnuk= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.34.0 h1:hSfpvjjTQXQY2Fol2CS0QHMNs/WI1MOSGzCm1KhM5ec= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.34.0/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw= -sigs.k8s.io/cluster-api v1.12.2 h1:+b+M2IygfvFZJq7bsaloNakimMEVNf81zkGR1IiuxXs= -sigs.k8s.io/cluster-api v1.12.2/go.mod h1:2XuF/dmN3c/1VITb6DB44N5+Ecvsvd5KOWqrY9Q53nU= +sigs.k8s.io/cluster-api v1.12.4 h1:usvoZ+Nblfu//l31hm1B1WUdigb6OnGAJqwt8qWq/iA= +sigs.k8s.io/cluster-api v1.12.4/go.mod h1:ePDeVCVaW6SGxRgDeLt5+KK4TigEnF0LPV6ztEzRzlI= sigs.k8s.io/controller-runtime v0.23.3 h1:VjB/vhoPoA9l1kEKZHBMnQF33tdCLQKJtydy4iqwZ80= sigs.k8s.io/controller-runtime v0.23.3/go.mod h1:B6COOxKptp+YaUT5q4l6LqUJTRpizbgf9KSRNdQGns0= -sigs.k8s.io/gateway-api v1.4.1 h1:NPxFutNkKNa8UfLd2CMlEuhIPMQgDQ6DXNKG9sHbJU8= -sigs.k8s.io/gateway-api v1.4.1/go.mod h1:AR5RSqciWP98OPckEjOjh2XJhAe2Na4LHyXD2FUY7Qk= +sigs.k8s.io/gateway-api v1.5.1 h1:RqVRIlkhLhUO8wOHKTLnTJA6o/1un4po4/6M1nRzdd0= +sigs.k8s.io/gateway-api v1.5.1/go.mod h1:GvCETiaMAlLym5CovLxGjS0NysqFk3+Yuq3/rh6QL2o= sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/kustomize/api v0.21.0 h1:I7nry5p8iDJbuRdYS7ez8MUvw7XVNPcIP5GkzzuXIIQ= diff --git a/pkg/hostedcontrolplane/controller.go b/pkg/hostedcontrolplane/controller.go index 1415fdf..d588898 100644 --- a/pkg/hostedcontrolplane/controller.go +++ b/pkg/hostedcontrolplane/controller.go @@ -26,6 +26,7 @@ import ( "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/etcd_client" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/s3_client" + "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/volume_stats" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/infrastructure_cluster" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/kubeconfig" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/tlsroutes" @@ -80,6 +81,7 @@ func NewHostedControlPlaneReconciler( ) (*alias.WorkloadClusterClient, ciliumclient.Interface, error), etcdClientFactory etcd_client.EtcdClientFactory, s3ClientFactory s3_client.S3ClientFactory, + volumeStatsProvider volume_stats.EtcdVolumeStatsProvider, recorder events.EventRecorder, controllerNamespace string, reconcileFilter string, @@ -91,6 +93,7 @@ func NewHostedControlPlaneReconciler( gatewayClient: gatewayClient, etcdClientFactory: etcdClientFactory, s3ClientFactory: s3ClientFactory, + volumeStatsProvider: volumeStatsProvider, ciliumClientFactory: ciliumClientFactory, workloadClusterClientFactory: workloadClusterClientFactory, recorder: recorder, @@ -125,6 +128,7 @@ type hostedControlPlaneReconciler struct { gatewayClient gwclient.Interface etcdClientFactory etcd_client.EtcdClientFactory s3ClientFactory s3_client.S3ClientFactory + volumeStatsProvider volume_stats.EtcdVolumeStatsProvider ciliumClientFactory func(ctx context.Context) (ciliumclient.Interface, error) workloadClusterClientFactory func( ctx context.Context, @@ -349,7 +353,7 @@ func (r *hostedControlPlaneReconciler) resolveOwnerRefsToHostedControlPlanes( //+kubebuilder:rbac:groups=controlplane.cluster.x-k8s.io,resources=hostedcontrolplanes/finalizers,verbs=update //+kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=list;watch //+kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters,verbs=get -//+kubebuilder:rbac:groups=events.k8s.io,resources=events,verbs=create +//+kubebuilder:rbac:groups=events.k8s.io,resources=events,verbs=create;patch func (r *hostedControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { return tracing.WithSpan(ctx, r.tracer, "Reconcile", @@ -600,6 +604,7 @@ func (r *hostedControlPlaneReconciler) reconcileNormal( r.etcdServerStorageIncrement, r.etcdClientFactory, r.s3ClientFactory, + r.volumeStatsProvider, recorder.FromContext(ctx), r.etcdComponentLabel, r.apiServerComponentLabel, diff --git a/pkg/hostedcontrolplane/controller_test.go b/pkg/hostedcontrolplane/controller_test.go index ac2bc56..70dfe45 100644 --- a/pkg/hostedcontrolplane/controller_test.go +++ b/pkg/hostedcontrolplane/controller_test.go @@ -71,6 +71,7 @@ func createTestReconcilerWithFilter(client client.Client, reconcileFilter string workloadClusterClientStubFactory, etcdClientStubFactory, s3ClientStubFactory, + test.NewEtcdVolumeStatsProviderStub(), &recorder.InfiniteDiscardingFakeRecorder{}, "test-namespace", reconcileFilter, diff --git a/pkg/hostedcontrolplane/lifecycle_phases_test.go b/pkg/hostedcontrolplane/lifecycle_phases_test.go index 8e9e8e6..1993661 100644 --- a/pkg/hostedcontrolplane/lifecycle_phases_test.go +++ b/pkg/hostedcontrolplane/lifecycle_phases_test.go @@ -24,7 +24,6 @@ import ( "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/etcd_client" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/s3_client" . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" - clientv3 "go.etcd.io/etcd/client/v3" corev1 "k8s.io/api/core/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -246,6 +245,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { ) (s3_client.S3Client, error) { return NewS3ClientStub(), nil } + volumeStatsStub := NewEtcdVolumeStatsProviderStub() workloadClusterClientFactory := func( _ context.Context, _ *alias.ManagementClusterClient, @@ -265,6 +265,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { workloadClusterClientFactory, etcdClientFactory, s3ClientFactory, + volumeStatsStub, &recorder.InfiniteDiscardingFakeRecorder{}, "default", "", @@ -688,17 +689,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { simulateExternalSystems: func(ctx context.Context, g *WithT) { etcdClient.AlarmError = nil etcdClient.StatusError = nil - etcdClient.StatusResponses = map[string]*clientv3.StatusResponse{ - "etcd-0": { - DbSize: 1024, - }, - "etcd-1": { - DbSize: 1024, - }, - "etcd-2": { - DbSize: 2048, - }, - } + volumeStatsStub.MaxUsage = 2048 }, verifyConditionsAfter: map[bool][]types2.GomegaMatcher{ true: { @@ -1207,17 +1198,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { { name: "Let Etcd grow", simulateExternalSystems: func(ctx context.Context, g *WithT) { - etcdClient.StatusResponses = map[string]*clientv3.StatusResponse{ - "etcd-0": { - DbSize: ptr.To(resource.MustParse("1.5Gi")).Value(), - }, - "etcd-1": { - DbSize: ptr.To(resource.MustParse("1Gi")).Value(), - }, - "etcd-2": { - DbSize: ptr.To(resource.MustParse("500Mi")).Value(), - }, - } + volumeStatsStub.MaxUsage = ptr.To(resource.MustParse("1.5Gi")).Value() }, verifyResources: func(ctx context.Context, g *WithT) { g.Expect(hcp.Status.ETCDVolumeUsage).To(EqualResource(resource.MustParse("1.5Gi"))) diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 4f26aa9..d91910a 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -21,6 +21,7 @@ import ( "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/alias" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/etcd_client" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/s3_client" + "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/volume_stats" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/workload" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/util/logging" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" @@ -240,6 +241,7 @@ func setupControllers( cluster, ) }, + volume_stats.NewEtcdVolumeStatsProvider(&managementClusterClient), mgr.GetEventRecorder(hostedControlPlaneControllerName), controllerNamespace, reconcileFilter, diff --git a/pkg/reconcilers/etcd_cluster/etcd_client/etcd_client.go b/pkg/reconcilers/etcd_cluster/etcd_client/etcd_client.go index 654b13f..ec760ad 100644 --- a/pkg/reconcilers/etcd_cluster/etcd_client/etcd_client.go +++ b/pkg/reconcilers/etcd_cluster/etcd_client/etcd_client.go @@ -11,6 +11,7 @@ import ( "time" slices "github.com/samber/lo" + "github.com/samber/lo/parallel" "github.com/teutonet/cluster-api-provider-hosted-control-plane/api/v1alpha1" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util/names" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/alias" @@ -38,7 +39,7 @@ const ( ) type EtcdClient interface { - GetStatuses(ctx context.Context) (map[string]*clientv3.StatusResponse, error) + GetStatuses(ctx context.Context, readyPodNames []string) (map[string]*clientv3.StatusResponse, error) OpenSnapshotStream(ctx context.Context) (*clientv3.SnapshotResponse, func() error, error) ListAlarms(ctx context.Context) (*clientv3.AlarmResponse, error) DisarmAlarm(ctx context.Context, alarm *clientv3.AlarmMember) error @@ -100,7 +101,10 @@ func NewEtcdClient( ) } -func (e *etcdClient) GetStatuses(ctx context.Context) (map[string]*clientv3.StatusResponse, error) { +func (e *etcdClient) GetStatuses( + ctx context.Context, + readyPodNames []string, +) (map[string]*clientv3.StatusResponse, error) { return tracing.WithSpan(ctx, tracer, "EtcdClient.GetStatuses", func(ctx context.Context, span trace.Span) (map[string]*clientv3.StatusResponse, error) { ctx, cancel := context.WithTimeout(ctx, etcdDefaultCallTimeout) @@ -108,6 +112,7 @@ func (e *etcdClient) GetStatuses(ctx context.Context) (map[string]*clientv3.Stat return callETCDFuncOnAllMembers( ctx, e, + slices.PickByKeys(e.endpoints, readyPodNames), clientv3.Client.Status, ) }, @@ -209,6 +214,7 @@ func (e *etcdClient) Defragment(ctx context.Context) error { _, err := callETCDFuncOnAllMembers( ctx, e, + e.endpoints, clientv3.Client.Defragment, ) return err @@ -250,31 +256,37 @@ func (e *etcdClient) DisarmAlarm(ctx context.Context, alarm *clientv3.AlarmMembe func callETCDFuncOnAllMembers[R any]( ctx context.Context, etcd *etcdClient, + endpoints map[string]string, etcdFunc func(client clientv3.Client, ctx context.Context, endpoint string) (*R, error), ) (map[string]*R, error) { return tracing.WithSpan(ctx, tracer, "CallETCDFuncOnAllMembers", func(ctx context.Context, span trace.Span) (map[string]*R, error) { - endpointMap := slices.MapValues(etcd.endpoints, func(endpoint string, _ string) string { - return fmt.Sprintf("https://%s", net.JoinHostPort(endpoint, strconv.Itoa(int(etcd.serverPort)))) - }) - results := make(map[string]*R, len(endpointMap)) - var errs error - for _, endpoint := range endpointMap { - etcdClient, err := createEtcdClient(etcd, endpoint) - if err != nil { - errs = errors.Join(errs, err) - continue - } - result, err := callETCDFuncOnMember(ctx, etcdClient, endpoint, etcdFunc) - closeEtcdClient(etcdClient, &err, endpoint) - if err != nil { - errs = errors.Join(errs, err) - continue - } - results[endpoint] = result - } + results := parallel.Map(slices.Values(endpoints), + func(endpoint string, _ int) slices.Tuple3[string, *R, error] { + endpointKey := fmt.Sprintf( + "https://%s", + net.JoinHostPort(endpoint, strconv.Itoa(int(etcd.serverPort))), + ) + etcdClient, err := createEtcdClient(etcd, endpoint) + if err != nil { + return slices.T3[string, *R, error](endpointKey, nil, err) + } + result, err := callETCDFuncOnMember( + ctx, etcdClient, endpointKey, etcdFunc, + ) + closeEtcdClient(etcdClient, &err, endpointKey) + if err != nil { + return slices.T3[string, *R, error](endpointKey, nil, err) + } + return slices.T3(endpointKey, result, err) + }, + ) - return results, errs + return slices.SliceToMap(results, func(result slices.Tuple3[string, *R, error]) (string, *R) { + return result.A, result.B + }), errors.Join(slices.Map(results, func(result slices.Tuple3[string, *R, error], _ int) error { + return result.C + })...) }, ) } diff --git a/pkg/reconcilers/etcd_cluster/reconciler.go b/pkg/reconcilers/etcd_cluster/reconciler.go index ac844fb..7241c80 100644 --- a/pkg/reconcilers/etcd_cluster/reconciler.go +++ b/pkg/reconcilers/etcd_cluster/reconciler.go @@ -10,6 +10,7 @@ import ( "strings" "time" + "github.com/blang/semver/v4" ciliumclient "github.com/cilium/cilium/pkg/k8s/client/clientset/versioned" slices "github.com/samber/lo" "github.com/teutonet/cluster-api-provider-hosted-control-plane/api/v1alpha1" @@ -20,6 +21,7 @@ import ( "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/alias" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/etcd_client" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/s3_client" + "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/volume_stats" errorsUtil "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/util/errors" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/util/networkpolicy" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/util/tracing" @@ -66,6 +68,7 @@ func NewEtcdClusterReconciler( etcdServerStorageIncrement resource.Quantity, etcdClientFactory etcd_client.EtcdClientFactory, s3ClientFactory s3_client.S3ClientFactory, + volumeStatsProvider volume_stats.EtcdVolumeStatsProvider, recorder recorder.Recorder, componentLabel string, apiServerComponentLabel string, @@ -86,6 +89,7 @@ func NewEtcdClusterReconciler( etcdServerStorageIncrement: etcdServerStorageIncrement, etcdClientFactory: etcdClientFactory, s3ClientFactory: s3ClientFactory, + volumeStatsProvider: volumeStatsProvider, recorder: recorder, componentLabel: componentLabel, apiServerComponentLabel: apiServerComponentLabel, @@ -102,6 +106,7 @@ type etcdClusterReconciler struct { etcdServerStorageIncrement resource.Quantity etcdClientFactory etcd_client.EtcdClientFactory s3ClientFactory s3_client.S3ClientFactory + volumeStatsProvider volume_stats.EtcdVolumeStatsProvider recorder recorder.Recorder componentLabel string apiServerComponentLabel string @@ -178,7 +183,7 @@ func (er *etcdClusterReconciler) ReconcileEtcdCluster( } if ready, err := er.reconcileStatefulSet( - ctx, etcdClient, + ctx, hostedControlPlane, cluster, serverPort, peerPort, metricsPort, ); err != nil { return "", fmt.Errorf("failed to reconcile etcd StatefulSet: %w", err) @@ -186,7 +191,16 @@ func (er *etcdClusterReconciler) ReconcileEtcdCluster( return "etcd StatefulSet is not ready", nil } - if err := er.reconcileETCDMaintenance(ctx, etcdClient, hostedControlPlane); err != nil { + etcdPods, err := er.listEtcdPods(ctx, hostedControlPlane, cluster) + if err != nil { + return "", err + } + + if err := er.reconcileETCDMaintenance(ctx, etcdClient, hostedControlPlane, etcdPods); err != nil { + return "", err + } + + if err := er.etcdIsHealthy(ctx, etcdClient, hostedControlPlane); err != nil { return "", err } @@ -201,6 +215,27 @@ func (er *etcdClusterReconciler) ReconcileEtcdCluster( ) } +//+kubebuilder:rbac:groups=core,resources=pods,verbs=list + +func (er *etcdClusterReconciler) listEtcdPods( + ctx context.Context, + hostedControlPlane *v1alpha1.HostedControlPlane, + cluster *capiv2.Cluster, +) ([]corev1.Pod, error) { + podList, err := er.ManagementClusterClient.CoreV1().Pods(hostedControlPlane.Namespace).List(ctx, metav1.ListOptions{ + LabelSelector: strings.Join(slices.MapToSlice( + names.GetControlPlaneSelector(cluster, er.componentLabel).MatchLabels, + func(key, value string) string { + return fmt.Sprintf("%s=%s", key, value) + }, + ), ","), + }) + if err != nil { + return nil, fmt.Errorf("failed to list etcd pods: %w", err) + } + return podList.Items, nil +} + //+kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=patch;list func (er *etcdClusterReconciler) reconcilePVCSizes( @@ -358,13 +393,23 @@ func (er *etcdClusterReconciler) reconcileETCDMaintenance( ctx context.Context, etcdClient etcd_client.EtcdClient, hostedControlPlane *v1alpha1.HostedControlPlane, + pods []corev1.Pod, ) error { - statuses, err := etcdClient.GetStatuses(ctx) + readyPodNames := slices.Map( + slices.Filter(pods, func(pod corev1.Pod, _ int) bool { + return pod.Spec.NodeName != "" + }), + func(pod corev1.Pod, _ int) string { + return pod.Name + }, + ) + + statuses, err := etcdClient.GetStatuses(ctx, readyPodNames) if err != nil { return fmt.Errorf("failed to get etcd statuses: %w", err) } - if err := er.reconcileETCDSpaceUsage(ctx, statuses, hostedControlPlane); err != nil { + if err := er.reconcileETCDSpaceUsage(ctx, hostedControlPlane, pods); err != nil { return fmt.Errorf("failed to reconcile etcd space usage: %w", err) } @@ -377,21 +422,23 @@ func (er *etcdClusterReconciler) reconcileETCDMaintenance( func (er *etcdClusterReconciler) reconcileETCDSpaceUsage( ctx context.Context, - statuses map[string]*clientv3.StatusResponse, hostedControlPlane *v1alpha1.HostedControlPlane, + pods []corev1.Pod, ) error { return tracing.WithSpan1(ctx, er.Tracer, "ReconcileETCDSpaceUsage", - func(ctx context.Context, span trace.Span) (err error) { - dbSize := slices.Max(slices.Map(slices.Values(statuses), - func(status *clientv3.StatusResponse, _ int) int64 { - return status.DbSize - }, - )) + func(ctx context.Context, span trace.Span) error { + scheduledPods := slices.Filter(pods, func(pod corev1.Pod, _ int) bool { + return pod.Spec.NodeName != "" + }) - dbSizeQuantity := resource.NewQuantity(dbSize, resource.BinarySI) - hostedControlPlane.Status.ETCDVolumeUsage = *dbSizeQuantity - span.SetAttributes( - attribute.String("etcd.volume.usage", hostedControlPlane.Status.ETCDVolumeUsage.String()), + fsUsage := er.getFilesystemUsage(ctx, span, scheduledPods) + + hostedControlPlane.Status.ETCDVolumeUsage = *resource.NewQuantity( + slices.Max([]int64{ // only grow, never shrink + fsUsage, + hostedControlPlane.Status.ETCDVolumeUsage.Value(), + }), + resource.BinarySI, ) return nil @@ -437,6 +484,29 @@ func (er *etcdClusterReconciler) reconcileETCDDefragmentation( ) } +func (er *etcdClusterReconciler) getFilesystemUsage( + ctx context.Context, + span trace.Span, + scheduledPods []corev1.Pod, +) int64 { + maxFsUsage, err := er.volumeStatsProvider.GetMaxEtcdVolumeUsage(ctx, scheduledPods) + if err != nil { + span.RecordError(err) + er.recorder.Warnf(nil, "EtcdFilesystemUsageUnavailable", "EtcdSpaceUsageCheck", + "Failed to get etcd filesystem usage: %s", err) + return 0 + } + + span.SetAttributes( + attribute.String( + "etcd.volume.filesystem_usage", + resource.NewQuantity(maxFsUsage, resource.BinarySI).String(), + ), + ) + + return maxFsUsage +} + //+kubebuilder:rbac:groups="",resources=services,verbs=create;patch func (er *etcdClusterReconciler) reconcileService( @@ -490,7 +560,6 @@ func (er *etcdClusterReconciler) reconcileService( func (er *etcdClusterReconciler) reconcileStatefulSet( ctx context.Context, - etcdClient etcd_client.EtcdClient, hostedControlPlane *v1alpha1.HostedControlPlane, cluster *capiv2.Cluster, serverPort *corev1ac.ContainerPortApplyConfiguration, @@ -571,7 +640,7 @@ func (er *etcdClusterReconciler) reconcileStatefulSet( return false, nil } - return true, er.etcdIsHealthy(ctx, etcdClient, hostedControlPlane) + return true, nil }, ) } @@ -704,6 +773,7 @@ func (er *etcdClusterReconciler) createEtcdContainer( WithArgs(er.buildEtcdArgs( ctx, hostedControlPlane, cluster, + semver.MustParse(version.Version), etcdDataVolumeMount, etcdCertificatesVolumeMount, serverPort, peerPort, metricsPort, )...). @@ -739,6 +809,7 @@ func (er *etcdClusterReconciler) buildEtcdArgs( ctx context.Context, hostedControlPlane *v1alpha1.HostedControlPlane, cluster *capiv2.Cluster, + etcdVersion semver.Version, etcdDataVolumeMount *corev1ac.VolumeMountApplyConfiguration, etcdCertificatesVolumeMount *corev1ac.VolumeMountApplyConfiguration, serverPort *corev1ac.ContainerPortApplyConfiguration, @@ -767,7 +838,6 @@ func (er *etcdClusterReconciler) buildEtcdArgs( "listen-metrics-urls": fmt.Sprintf("http://0.0.0.0:%d", *metricsPort.ContainerPort), "auto-compaction-mode": "periodic", "auto-compaction-retention": "72h", - "snapshot-count": "10000", "client-cert-auth": "true", "trusted-ca-file": path.Join(certificatesDir, konstants.CACertName), "cert-file": path.Join(certificatesDir, "server.crt"), @@ -776,7 +846,13 @@ func (er *etcdClusterReconciler) buildEtcdArgs( "peer-trusted-ca-file": path.Join(certificatesDir, konstants.CACertName), "peer-cert-file": path.Join(certificatesDir, "peer.crt"), "peer-key-file": path.Join(certificatesDir, "peer.key"), - "quota-backend-bytes": strconv.Itoa(int(storageQuota)), + "quota-backend-bytes": strconv.FormatInt(storageQuota, 10), + } + + if int64(etcdVersion.Minor) >= version.V3_7.Minor { //nolint:gosec // semver minor will never overflow int64 + // this is deprecated and will be removed in 3.7 + // TODO: remove this when we roll 3.7 + args["snapshot-count"] = "10000" } return operatorutil.ArgsToSlice(ctx, hostedControlPlane.Spec.ETCD.Args, args) diff --git a/pkg/reconcilers/etcd_cluster/reconciler_test.go b/pkg/reconcilers/etcd_cluster/reconciler_test.go index 9a0da3f..41470d2 100644 --- a/pkg/reconcilers/etcd_cluster/reconciler_test.go +++ b/pkg/reconcilers/etcd_cluster/reconciler_test.go @@ -271,41 +271,132 @@ func TestEtcdClusterReconciler_StateTransitions_AutoGrowDecisionLogic(t *testing func TestEtcdClusterReconciler_reconcileETCDSpaceUsage(t *testing.T) { ctx := context.Background() - t.Run("should update volume usage from etcd status", func(t *testing.T) { + t.Run("should update volume usage from filesystem stats", func(t *testing.T) { g := NewWithT(t) - highestMemberDBSize := int64(5368709120) - statuses := map[string]*clientv3.StatusResponse{ - "etcd-0": { - Header: &etcdserverpb.ResponseHeader{ClusterId: 1}, - Version: "3.5.0", - DbSize: highestMemberDBSize, + fsUsage := int64(5368709120) // 5 GiB + + hcp := &v1alpha1.HostedControlPlane{ + Status: v1alpha1.HostedControlPlaneStatus{ + ETCDVolumeSize: resource.MustParse("20Gi"), + ETCDVolumeUsage: resource.MustParse("0"), }, - "etcd-1": { - Header: &etcdserverpb.ResponseHeader{ClusterId: 1}, - Version: "3.5.0", - DbSize: highestMemberDBSize / 2, + } + + volumeStub := NewEtcdVolumeStatsProviderStub() + volumeStub.MaxUsage = fsUsage + + reconciler := &etcdClusterReconciler{ + recorder: &recorder.InfiniteDiscardingFakeRecorder{}, + volumeStatsProvider: volumeStub, + } + + err := reconciler.reconcileETCDSpaceUsage(ctx, hcp, nil) + + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(hcp.Status.ETCDVolumeUsage).To(EqualResource(*resource.NewQuantity(fsUsage, resource.BinarySI))) + }) + + t.Run("should use filesystem usage when it exceeds previous", func(t *testing.T) { + g := NewWithT(t) + fsUsage := int64(5 * 1024 * 1024 * 1024) + + volumeStub := NewEtcdVolumeStatsProviderStub() + volumeStub.MaxUsage = fsUsage + + hcp := &v1alpha1.HostedControlPlane{ + Status: v1alpha1.HostedControlPlaneStatus{ + ETCDVolumeSize: resource.MustParse("20Gi"), + ETCDVolumeUsage: resource.MustParse("2Gi"), }, } + reconciler := &etcdClusterReconciler{ + recorder: &recorder.InfiniteDiscardingFakeRecorder{}, + volumeStatsProvider: volumeStub, + } + + err := reconciler.reconcileETCDSpaceUsage(ctx, hcp, nil) + + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(hcp.Status.ETCDVolumeUsage).To(EqualResource(*resource.NewQuantity(fsUsage, resource.BinarySI))) + }) + + t.Run("should not shrink volume usage", func(t *testing.T) { + g := NewWithT(t) + fsUsage := int64(1 * 1024 * 1024 * 1024) // 1 GiB + previous := int64(5 * 1024 * 1024 * 1024) // 5 GiB + + volumeStub := NewEtcdVolumeStatsProviderStub() + volumeStub.MaxUsage = fsUsage + hcp := &v1alpha1.HostedControlPlane{ Status: v1alpha1.HostedControlPlaneStatus{ ETCDVolumeSize: resource.MustParse("20Gi"), - ETCDVolumeUsage: resource.MustParse("0"), + ETCDVolumeUsage: resource.MustParse("5Gi"), }, } reconciler := &etcdClusterReconciler{ - recorder: &recorder.InfiniteDiscardingFakeRecorder{}, - etcdServerStorageBuffer: resource.MustParse("2Gi"), - etcdServerStorageIncrement: resource.MustParse("10Gi"), - etcdClientFactory: nil, + recorder: &recorder.InfiniteDiscardingFakeRecorder{}, + volumeStatsProvider: volumeStub, + } + + err := reconciler.reconcileETCDSpaceUsage(ctx, hcp, nil) + + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(hcp.Status.ETCDVolumeUsage).To(EqualResource(*resource.NewQuantity(previous, resource.BinarySI))) + }) + + t.Run("should log warning and continue when volume stats fails", func(t *testing.T) { + g := NewWithT(t) + + volumeStub := NewEtcdVolumeStatsProviderStub() + volumeStub.Error = errors.New("connection refused") + + hcp := &v1alpha1.HostedControlPlane{ + Status: v1alpha1.HostedControlPlaneStatus{ + ETCDVolumeSize: resource.MustParse("20Gi"), + ETCDVolumeUsage: resource.MustParse("15Gi"), + }, + } + + returningFakeRecorder, rec := recorder.NewInfiniteReturningFakeRecorder() + + reconciler := &etcdClusterReconciler{ + recorder: rec, + volumeStatsProvider: volumeStub, + } + + err := reconciler.reconcileETCDSpaceUsage(ctx, hcp, nil) + + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(returningFakeRecorder.Events).To(ContainElement(ContainSubstring("connection refused"))) + }) + + t.Run("should not shrink when stats fail", func(t *testing.T) { + g := NewWithT(t) + + volumeStub := NewEtcdVolumeStatsProviderStub() + volumeStub.Error = errors.New("connection refused") + + hcp := &v1alpha1.HostedControlPlane{ + Status: v1alpha1.HostedControlPlaneStatus{ + ETCDVolumeSize: resource.MustParse("20Gi"), + ETCDVolumeUsage: resource.MustParse("15Gi"), + }, + } + + reconciler := &etcdClusterReconciler{ + recorder: &recorder.InfiniteDiscardingFakeRecorder{}, + volumeStatsProvider: volumeStub, } - err := reconciler.reconcileETCDSpaceUsage(ctx, statuses, hcp) + err := reconciler.reconcileETCDSpaceUsage(ctx, hcp, nil) g.Expect(err).NotTo(HaveOccurred()) - expectedQuantity := resource.NewQuantity(highestMemberDBSize, resource.BinarySI) - g.Expect(hcp.Status.ETCDVolumeUsage).To(EqualResource(*expectedQuantity)) + // Should retain previous value since new measurement failed (returns 0) + g.Expect(hcp.Status.ETCDVolumeUsage). + To(EqualResource(*resource.NewQuantity(int64(15*1024*1024*1024), resource.BinarySI))) }) } @@ -820,7 +911,7 @@ type stallingEtcdClient struct{} var _ etcd_client.EtcdClient = new(stallingEtcdClient) -func (s *stallingEtcdClient) GetStatuses(_ context.Context) (map[string]*clientv3.StatusResponse, error) { +func (s *stallingEtcdClient) GetStatuses(_ context.Context, _ []string) (map[string]*clientv3.StatusResponse, error) { return make(map[string]*clientv3.StatusResponse), nil } @@ -852,7 +943,7 @@ func TestEtcdClusterReconciler_reconcileETCDMaintenance_GetStatusesError(t *test reconciler := &etcdClusterReconciler{recorder: &recorder.InfiniteDiscardingFakeRecorder{}} - err := reconciler.reconcileETCDMaintenance(ctx, stub, hcp) + err := reconciler.reconcileETCDMaintenance(ctx, stub, hcp, nil) g.Expect(err).To(MatchError(ContainSubstring("failed to get etcd statuses"))) g.Expect(err).To(MatchError(ContainSubstring("connection refused"))) diff --git a/pkg/reconcilers/etcd_cluster/volume_stats/volume_stats.go b/pkg/reconcilers/etcd_cluster/volume_stats/volume_stats.go new file mode 100644 index 0000000..36760b7 --- /dev/null +++ b/pkg/reconcilers/etcd_cluster/volume_stats/volume_stats.go @@ -0,0 +1,147 @@ +package volume_stats + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "math" + "strings" + + slices "github.com/samber/lo" + "github.com/samber/lo/parallel" + "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/alias" + "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/util/tracing" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" + corev1 "k8s.io/api/core/v1" + kubeletstatsv1alpha1 "k8s.io/kubelet/pkg/apis/stats/v1alpha1" +) + +var tracer = tracing.GetTracer("VolumeStats") + +var errNoStatsForNode = errors.New("no stats available for node") + +type EtcdVolumeStatsProvider interface { + GetMaxEtcdVolumeUsage( + ctx context.Context, + pods []corev1.Pod, + ) (int64, error) +} + +type kubeletEtcdVolumeStatsProvider struct { + client *alias.ManagementClusterClient +} + +var _ EtcdVolumeStatsProvider = &kubeletEtcdVolumeStatsProvider{} + +func NewEtcdVolumeStatsProvider(client *alias.ManagementClusterClient) EtcdVolumeStatsProvider { + return &kubeletEtcdVolumeStatsProvider{client: client} +} + +func (p *kubeletEtcdVolumeStatsProvider) GetMaxEtcdVolumeUsage( + ctx context.Context, + pods []corev1.Pod, +) (int64, error) { + return tracing.WithSpan(ctx, tracer, "GetMaxEtcdVolumeUsage", + func(ctx context.Context, span trace.Span) (int64, error) { + nodeStats := parallel.Map( + slices.Uniq(slices.Map(pods, func(pod corev1.Pod, _ int) string { return pod.Spec.NodeName })), + func(node string, _ int) slices.Tuple2[string, slices.Tuple2[kubeletstatsv1alpha1.Summary, error]] { + summary, err := p.getNodeStats(ctx, node) + return slices.T2(node, slices.T2(summary, err)) + }, + ) + + statsByNode := slices.SliceToMap( + nodeStats, + func( + t slices.Tuple2[string, slices.Tuple2[kubeletstatsv1alpha1.Summary, error]], + ) (string, slices.Tuple2[kubeletstatsv1alpha1.Summary, error]) { + return t.A, t.B + }, + ) + + results := slices.Map(pods, func(pod corev1.Pod, _ int) slices.Tuple2[int64, error] { + nodeResult, ok := statsByNode[pod.Spec.NodeName] + if !ok { + return slices.T2(int64(0), fmt.Errorf("%w for node %s", errNoStatsForNode, pod.Spec.NodeName)) + } + if nodeResult.B != nil { + return slices.T2(int64(0), nodeResult.B) + } + usage := p.extractPodVolumeUsage(nodeResult.A, pod.Namespace, pod.Name) + return slices.T2[int64, error](usage, nil) + }) + + sizes, errs := slices.Unzip2(results) + maxUsage := slices.Max(sizes) + + span.SetAttributes(attribute.Int64("etcd.volume.filesystem_usage_bytes", maxUsage)) + return maxUsage, errors.Join(errs...) + }, + ) +} + +//+kubebuilder:rbac:groups="",resources=nodes/proxy,verbs=get + +func (p *kubeletEtcdVolumeStatsProvider) getNodeStats( + ctx context.Context, + nodeName string, +) (kubeletstatsv1alpha1.Summary, error) { + return tracing.WithSpan(ctx, tracer, "getNodeStats", + func(ctx context.Context, span trace.Span) (kubeletstatsv1alpha1.Summary, error) { + span.SetAttributes(attribute.String("node", nodeName)) + + data, err := p.client.CoreV1().RESTClient(). + Get(). + AbsPath("/api/v1/nodes", nodeName, "proxy", "stats", "summary"). + DoRaw(ctx) + if err != nil { + return kubeletstatsv1alpha1.Summary{}, fmt.Errorf( + "failed to query kubelet stats summary for node %s: %w", + nodeName, + err, + ) + } + + var summary kubeletstatsv1alpha1.Summary + if err := json.Unmarshal(data, &summary); err != nil { + return kubeletstatsv1alpha1.Summary{}, fmt.Errorf("failed to unmarshal stats summary: %w", err) + } + + return summary, nil + }, + ) +} + +func (p *kubeletEtcdVolumeStatsProvider) extractPodVolumeUsage( + summary kubeletstatsv1alpha1.Summary, + namespace string, + podName string, +) int64 { + for i := range summary.Pods { + podStats := &summary.Pods[i] + if podStats.PodRef.Namespace != namespace || podStats.PodRef.Name != podName { + continue + } + + for j := range podStats.VolumeStats { + volStats := &podStats.VolumeStats[j] + if !isEtcdDataVolume(volStats) || volStats.UsedBytes == nil { + continue + } + //nolint:gosec // capped at MaxInt64 + return int64(min(*volStats.UsedBytes, uint64(math.MaxInt64))) + } + } + + return 0 +} + +func isEtcdDataVolume(volStats *kubeletstatsv1alpha1.VolumeStats) bool { + if volStats.PVCRef != nil { + return strings.HasPrefix(volStats.PVCRef.Name, "etcd-data-") + } + return volStats.Name == "etcd-data" +} diff --git a/pkg/reconcilers/etcd_cluster/volume_stats/volume_stats_test.go b/pkg/reconcilers/etcd_cluster/volume_stats/volume_stats_test.go new file mode 100644 index 0000000..f58c696 --- /dev/null +++ b/pkg/reconcilers/etcd_cluster/volume_stats/volume_stats_test.go @@ -0,0 +1,346 @@ +package volume_stats + +import ( + "math" + "testing" + + . "github.com/onsi/gomega" + kubeletstatsv1alpha1 "k8s.io/kubelet/pkg/apis/stats/v1alpha1" +) + +func TestIsEtcdDataVolume(t *testing.T) { + tests := []struct { + name string + vol *kubeletstatsv1alpha1.VolumeStats + expected bool + }{ + { + name: "matches PVC with etcd-data prefix", + vol: &kubeletstatsv1alpha1.VolumeStats{ + Name: "etcd-data", + PVCRef: &kubeletstatsv1alpha1.PVCReference{ + Name: "etcd-data-etcd-0", + Namespace: "default", + }, + }, + expected: true, + }, + { + name: "matches volume name etcd-data without PVC ref", + vol: &kubeletstatsv1alpha1.VolumeStats{ + Name: "etcd-data", + }, + expected: true, + }, + { + name: "does not match unrelated PVC", + vol: &kubeletstatsv1alpha1.VolumeStats{ + Name: "some-other-volume", + PVCRef: &kubeletstatsv1alpha1.PVCReference{ + Name: "some-other-pvc", + Namespace: "default", + }, + }, + expected: false, + }, + { + name: "does not match unrelated volume name without PVC ref", + vol: &kubeletstatsv1alpha1.VolumeStats{ + Name: "etcd-certificates", + }, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + g.Expect(isEtcdDataVolume(tt.vol)).To(Equal(tt.expected)) + }) + } +} + +func TestExtractPodVolumeUsage(t *testing.T) { + p := &kubeletEtcdVolumeStatsProvider{} + + t.Run("returns usage for matching pod with PVC volume", func(t *testing.T) { + g := NewWithT(t) + usage := int64(1073741824) // 1 GiB + usedBytes := uint64(usage) + summary := kubeletstatsv1alpha1.Summary{ + Pods: []kubeletstatsv1alpha1.PodStats{ + { + PodRef: kubeletstatsv1alpha1.PodReference{ + Name: "etcd-0", + Namespace: "test-ns", + }, + VolumeStats: []kubeletstatsv1alpha1.VolumeStats{ + { + Name: "etcd-data", + PVCRef: &kubeletstatsv1alpha1.PVCReference{ + Name: "etcd-data-etcd-0", + Namespace: "test-ns", + }, + FsStats: kubeletstatsv1alpha1.FsStats{UsedBytes: &usedBytes}, + }, + }, + }, + }, + } + + result := p.extractPodVolumeUsage(summary, "test-ns", "etcd-0") + g.Expect(result).To(Equal(usage)) + }) + + t.Run("returns usage for matching pod with static volume", func(t *testing.T) { + g := NewWithT(t) + usage := int64(536870912) // 512 MiB + usedBytes := uint64(usage) + summary := kubeletstatsv1alpha1.Summary{ + Pods: []kubeletstatsv1alpha1.PodStats{ + { + PodRef: kubeletstatsv1alpha1.PodReference{ + Name: "etcd-1", + Namespace: "test-ns", + }, + VolumeStats: []kubeletstatsv1alpha1.VolumeStats{ + { + Name: "etcd-data", + FsStats: kubeletstatsv1alpha1.FsStats{UsedBytes: &usedBytes}, + }, + }, + }, + }, + } + + result := p.extractPodVolumeUsage(summary, "test-ns", "etcd-1") + g.Expect(result).To(Equal(usage)) + }) + + t.Run("returns 0 for missing pod", func(t *testing.T) { + g := NewWithT(t) + summary := kubeletstatsv1alpha1.Summary{ + Pods: []kubeletstatsv1alpha1.PodStats{ + { + PodRef: kubeletstatsv1alpha1.PodReference{ + Name: "etcd-0", + Namespace: "test-ns", + }, + }, + }, + } + + result := p.extractPodVolumeUsage(summary, "test-ns", "etcd-99") + g.Expect(result).To(Equal(int64(0))) + }) + + t.Run("returns 0 when volume has nil UsedBytes", func(t *testing.T) { + g := NewWithT(t) + summary := kubeletstatsv1alpha1.Summary{ + Pods: []kubeletstatsv1alpha1.PodStats{ + { + PodRef: kubeletstatsv1alpha1.PodReference{ + Name: "etcd-0", + Namespace: "test-ns", + }, + VolumeStats: []kubeletstatsv1alpha1.VolumeStats{ + { + Name: "etcd-data", + PVCRef: &kubeletstatsv1alpha1.PVCReference{ + Name: "etcd-data-etcd-0", + Namespace: "test-ns", + }, + FsStats: kubeletstatsv1alpha1.FsStats{UsedBytes: nil}, + }, + }, + }, + }, + } + + result := p.extractPodVolumeUsage(summary, "test-ns", "etcd-0") + g.Expect(result).To(Equal(int64(0))) + }) + + t.Run("ignores non-etcd volumes", func(t *testing.T) { + g := NewWithT(t) + usedBytes := uint64(1073741824) + summary := kubeletstatsv1alpha1.Summary{ + Pods: []kubeletstatsv1alpha1.PodStats{ + { + PodRef: kubeletstatsv1alpha1.PodReference{ + Name: "etcd-0", + Namespace: "test-ns", + }, + VolumeStats: []kubeletstatsv1alpha1.VolumeStats{ + { + Name: "etcd-certificates", + PVCRef: &kubeletstatsv1alpha1.PVCReference{ + Name: "etcd-certificates-etcd-0", + Namespace: "test-ns", + }, + FsStats: kubeletstatsv1alpha1.FsStats{UsedBytes: &usedBytes}, + }, + }, + }, + }, + } + + result := p.extractPodVolumeUsage(summary, "test-ns", "etcd-0") + g.Expect(result).To(Equal(int64(0))) + }) + + t.Run("caps UsedBytes to MaxInt64 when value exceeds", func(t *testing.T) { + g := NewWithT(t) + usedBytes := uint64(math.MaxInt64) + 1 + summary := kubeletstatsv1alpha1.Summary{ + Pods: []kubeletstatsv1alpha1.PodStats{ + { + PodRef: kubeletstatsv1alpha1.PodReference{ + Name: "etcd-0", + Namespace: "test-ns", + }, + VolumeStats: []kubeletstatsv1alpha1.VolumeStats{ + { + Name: "etcd-data", + PVCRef: &kubeletstatsv1alpha1.PVCReference{ + Name: "etcd-data-etcd-0", + Namespace: "test-ns", + }, + FsStats: kubeletstatsv1alpha1.FsStats{UsedBytes: &usedBytes}, + }, + }, + }, + }, + } + + result := p.extractPodVolumeUsage(summary, "test-ns", "etcd-0") + g.Expect(result).To(Equal(int64(math.MaxInt64))) + }) + + t.Run("skips non-matching namespace", func(t *testing.T) { + g := NewWithT(t) + usedBytes := uint64(1073741824) + summary := kubeletstatsv1alpha1.Summary{ + Pods: []kubeletstatsv1alpha1.PodStats{ + { + PodRef: kubeletstatsv1alpha1.PodReference{ + Name: "etcd-0", + Namespace: "other-ns", + }, + VolumeStats: []kubeletstatsv1alpha1.VolumeStats{ + { + Name: "etcd-data", + PVCRef: &kubeletstatsv1alpha1.PVCReference{ + Name: "etcd-data-etcd-0", + Namespace: "other-ns", + }, + FsStats: kubeletstatsv1alpha1.FsStats{UsedBytes: &usedBytes}, + }, + }, + }, + }, + } + + result := p.extractPodVolumeUsage(summary, "test-ns", "etcd-0") + g.Expect(result).To(Equal(int64(0))) + }) + + t.Run("first matching volume wins when multiple etcd-data volumes exist", func(t *testing.T) { + g := NewWithT(t) + usedBytes1 := uint64(1073741824) // 1 GiB + usedBytes2 := uint64(2147483648) // 2 GiB + summary := kubeletstatsv1alpha1.Summary{ + Pods: []kubeletstatsv1alpha1.PodStats{ + { + PodRef: kubeletstatsv1alpha1.PodReference{ + Name: "etcd-0", + Namespace: "test-ns", + }, + VolumeStats: []kubeletstatsv1alpha1.VolumeStats{ + { + Name: "etcd-data", + PVCRef: &kubeletstatsv1alpha1.PVCReference{ + Name: "etcd-data-etcd-0", + Namespace: "test-ns", + }, + FsStats: kubeletstatsv1alpha1.FsStats{UsedBytes: &usedBytes1}, + }, + { + Name: "etcd-data", + PVCRef: &kubeletstatsv1alpha1.PVCReference{ + Name: "etcd-data-etcd-0-2", + Namespace: "test-ns", + }, + FsStats: kubeletstatsv1alpha1.FsStats{UsedBytes: &usedBytes2}, + }, + }, + }, + }, + } + + result := p.extractPodVolumeUsage(summary, "test-ns", "etcd-0") + g.Expect(result).To(Equal(int64(1073741824))) // first match wins + }) + + t.Run("empty summary returns 0", func(t *testing.T) { + g := NewWithT(t) + summary := kubeletstatsv1alpha1.Summary{} + result := p.extractPodVolumeUsage(summary, "test-ns", "etcd-0") + g.Expect(result).To(Equal(int64(0))) + }) +} + +func TestGetMaxEtcdVolumeUsage(t *testing.T) { + t.Run("deduplicates node stats for pods on same node", func(t *testing.T) { + g := NewWithT(t) + usedBytes1 := uint64(1073741824) // 1 GiB + usedBytes2 := uint64(2147483648) // 2 GiB + summary := kubeletstatsv1alpha1.Summary{ + Node: kubeletstatsv1alpha1.NodeStats{}, + Pods: []kubeletstatsv1alpha1.PodStats{ + { + PodRef: kubeletstatsv1alpha1.PodReference{ + Name: "etcd-0", + Namespace: "test-ns", + }, + VolumeStats: []kubeletstatsv1alpha1.VolumeStats{ + { + Name: "etcd-data", + PVCRef: &kubeletstatsv1alpha1.PVCReference{ + Name: "etcd-data-etcd-0", + Namespace: "test-ns", + }, + FsStats: kubeletstatsv1alpha1.FsStats{UsedBytes: &usedBytes1}, + }, + }, + }, + { + PodRef: kubeletstatsv1alpha1.PodReference{ + Name: "etcd-1", + Namespace: "test-ns", + }, + VolumeStats: []kubeletstatsv1alpha1.VolumeStats{ + { + Name: "etcd-data", + PVCRef: &kubeletstatsv1alpha1.PVCReference{ + Name: "etcd-data-etcd-1", + Namespace: "test-ns", + }, + FsStats: kubeletstatsv1alpha1.FsStats{UsedBytes: &usedBytes2}, + }, + }, + }, + }, + } + + // This test verifies the extraction logic - the actual provider tests + // with real node API calls would be integration tests + p := &kubeletEtcdVolumeStatsProvider{} + + usage0 := p.extractPodVolumeUsage(summary, "test-ns", "etcd-0") + usage1 := p.extractPodVolumeUsage(summary, "test-ns", "etcd-1") + + g.Expect(usage0).To(Equal(int64(1073741824))) + g.Expect(usage1).To(Equal(int64(2147483648))) + }) +} diff --git a/test/etcd_stubs.go b/test/etcd_stubs.go index 19adff0..ee3b760 100644 --- a/test/etcd_stubs.go +++ b/test/etcd_stubs.go @@ -10,8 +10,10 @@ import ( slices "github.com/samber/lo" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/etcd_client" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/s3_client" + "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/volume_stats" "go.etcd.io/etcd/api/v3/etcdserverpb" clientv3 "go.etcd.io/etcd/client/v3" + corev1 "k8s.io/api/core/v1" ) var _ etcd_client.EtcdClient = &EtcdClientStub{} @@ -36,7 +38,7 @@ func NewEtcdClientStub() *EtcdClientStub { } } -func (s *EtcdClientStub) GetStatuses(_ context.Context) (map[string]*clientv3.StatusResponse, error) { +func (s *EtcdClientStub) GetStatuses(_ context.Context, _ []string) (map[string]*clientv3.StatusResponse, error) { if s.StatusError != nil { return nil, s.StatusError } @@ -83,6 +85,26 @@ func (s *EtcdClientStub) DisarmAlarm(_ context.Context, alarm *clientv3.AlarmMem return nil } +type EtcdVolumeStatsProviderStub struct { + MaxUsage int64 + Error error +} + +var _ volume_stats.EtcdVolumeStatsProvider = &EtcdVolumeStatsProviderStub{} + +func NewEtcdVolumeStatsProviderStub() *EtcdVolumeStatsProviderStub { + return &EtcdVolumeStatsProviderStub{} +} + +func (s *EtcdVolumeStatsProviderStub) GetMaxEtcdVolumeUsage( + _ context.Context, _ []corev1.Pod, +) (int64, error) { + if s.Error != nil { + return 0, s.Error + } + return s.MaxUsage, nil +} + type S3ClientStub struct { UploadError error LastUploadedBody []byte From 1a212877266a18e061152d0073f421b7bd05b23f Mon Sep 17 00:00:00 2001 From: Chris Werner Rau Date: Tue, 31 Mar 2026 15:29:34 +0200 Subject: [PATCH 2/6] fix: inverted condition --- pkg/reconcilers/etcd_cluster/reconciler.go | 3 +- .../etcd_cluster/reconciler_test.go | 59 +++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/pkg/reconcilers/etcd_cluster/reconciler.go b/pkg/reconcilers/etcd_cluster/reconciler.go index 7241c80..8c4523b 100644 --- a/pkg/reconcilers/etcd_cluster/reconciler.go +++ b/pkg/reconcilers/etcd_cluster/reconciler.go @@ -45,6 +45,7 @@ var ( etcdVolumeResizeEvent = "EtcdVolumeAutoResize" etcdVolumeSizeReCalculatedEvent = "EtcdVolumeSizeRecalculated" errETCDBackupStalled = errors.New("etcd backup timed out: no progress in time window") + etcdClientVersion3_7 = semver.MustParse(version.V3_7.String()) ) const ( @@ -849,7 +850,7 @@ func (er *etcdClusterReconciler) buildEtcdArgs( "quota-backend-bytes": strconv.FormatInt(storageQuota, 10), } - if int64(etcdVersion.Minor) >= version.V3_7.Minor { //nolint:gosec // semver minor will never overflow int64 + if etcdVersion.LT(etcdClientVersion3_7) { // this is deprecated and will be removed in 3.7 // TODO: remove this when we roll 3.7 args["snapshot-count"] = "10000" diff --git a/pkg/reconcilers/etcd_cluster/reconciler_test.go b/pkg/reconcilers/etcd_cluster/reconciler_test.go index 41470d2..9e2e7e6 100644 --- a/pkg/reconcilers/etcd_cluster/reconciler_test.go +++ b/pkg/reconcilers/etcd_cluster/reconciler_test.go @@ -14,6 +14,7 @@ import ( "k8s.io/utils/ptr" capiv2 "sigs.k8s.io/cluster-api/api/core/v1beta2" + semver "github.com/blang/semver/v4" . "github.com/onsi/gomega" "github.com/teutonet/cluster-api-provider-hosted-control-plane/api/v1alpha1" . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" @@ -21,6 +22,7 @@ import ( clientv3 "go.etcd.io/etcd/client/v3" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + corev1ac "k8s.io/client-go/applyconfigurations/core/v1" ) func TestEtcdClusterReconciler_getETCDVolumeSize(t *testing.T) { @@ -1111,3 +1113,60 @@ func TestEtcdClusterReconciler_reconcileETCDDefragmentation(t *testing.T) { )) }) } + +func TestBuildEtcdArgs_SnapshotCount(t *testing.T) { + tests := []struct { + name string + etcdVersion semver.Version + expectSnapshotCount bool + }{ + { + name: "version < 3.7 sets snapshot-count", + etcdVersion: semver.MustParse("3.6.0"), + expectSnapshotCount: true, + }, + { + name: "version >= 3.7 omits snapshot-count", + etcdVersion: semver.MustParse("3.7.0"), + expectSnapshotCount: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + reconciler := &etcdClusterReconciler{} + hcp := &v1alpha1.HostedControlPlane{ + ObjectMeta: metav1.ObjectMeta{Namespace: "test-ns"}, + Status: v1alpha1.HostedControlPlaneStatus{ETCDVolumeSize: resource.MustParse("10Gi")}, + } + cluster := &capiv2.Cluster{ + ObjectMeta: metav1.ObjectMeta{Name: "test-cluster", Namespace: "test-ns"}, + } + serverPort := corev1ac.ContainerPort().WithContainerPort(2379) + peerPort := corev1ac.ContainerPort().WithContainerPort(2380) + metricsPort := corev1ac.ContainerPort().WithContainerPort(2381) + dataMount := corev1ac.VolumeMount().WithMountPath("/var/lib/etcd") + certMount := corev1ac.VolumeMount().WithMountPath("/etc/etcd") + + args := reconciler.buildEtcdArgs( + context.Background(), + hcp, + cluster, + tt.etcdVersion, + dataMount, + certMount, + serverPort, + peerPort, + metricsPort, + ) + + if tt.expectSnapshotCount { + g.Expect(args).To(ContainElement("--snapshot-count=10000")) + } else { + g.Expect(args).NotTo(ContainElement(ContainSubstring("snapshot-count"))) + } + }) + } +} From e586dc052bff7e47607f5c0fc5e6692b9acdb215 Mon Sep 17 00:00:00 2001 From: Chris Werner Rau Date: Tue, 5 May 2026 16:01:32 +0200 Subject: [PATCH 3/6] chore: adjust etcd space calculation and NOSPACE alarm dismissal chore: replace span/log/recorder with emit --- .gitignore | 2 + .golangci.yaml | 16 + .../hostedcontrolplane_webhook_test.go | 9 +- pkg/hostedcontrolplane/controller.go | 21 +- pkg/hostedcontrolplane/controller_test.go | 38 +-- .../lifecycle_phases_test.go | 94 +++--- pkg/operator/operator.go | 3 +- pkg/operator/operator_test.go | 5 +- pkg/operator/util/args.go | 30 +- pkg/operator/util/args_test.go | 13 +- pkg/operator/util/checksum_test.go | 15 +- pkg/operator/util/emit/emit.go | 110 +++++++ pkg/operator/util/emit/emit_test.go | 219 +++++++++++++ pkg/operator/util/images_test.go | 17 +- pkg/operator/util/names/labels_test.go | 13 +- pkg/operator/util/names/names_test.go | 23 +- pkg/operator/util/pod_test.go | 8 +- pkg/operator/util/resources_test.go | 6 +- pkg/operator/util/version_test.go | 5 +- .../apiserverresources_test.go | 5 +- pkg/reconcilers/certificates/reconciler.go | 11 +- .../certificates/reconciler_test.go | 7 +- pkg/reconcilers/etcd_cluster/reconciler.go | 203 ++++++------ .../etcd_cluster/reconciler_test.go | 292 +++++++++--------- .../etcd_cluster/volume_stats/volume_stats.go | 8 +- .../volume_stats/volume_stats_test.go | 55 ---- pkg/reconcilers/kubeconfig/reconciler_test.go | 9 +- pkg/reconcilers/reconciler.go | 16 +- pkg/reconcilers/tlsroutes/reconciler_test.go | 9 +- pkg/util/errors/errors_test.go | 3 +- test/context.go | 77 +++++ test/etcd_stubs.go | 5 +- 32 files changed, 870 insertions(+), 477 deletions(-) create mode 100644 pkg/operator/util/emit/emit.go create mode 100644 pkg/operator/util/emit/emit_test.go create mode 100644 test/context.go diff --git a/.gitignore b/.gitignore index f79f070..33ac300 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,5 @@ testbin/* *.swp *.swo *~ + +.claude diff --git a/.golangci.yaml b/.golangci.yaml index 92b3ee1..8ee215b 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -264,6 +264,15 @@ linters: # msg: Use slog instead - pattern: ^(record|events)\.NewFakeRecorder(For)?$ msg: Use pkg/operator/util/recorder/fake_recorder.go instead. + - pattern: ^trace\.Span\.AddEvent$ + msg: use emit.Warnf/Infof with SinkSpanEvent instead + - pattern: ^slog\.Logger\.(Warn|Info)Context$ + msg: use emit.Warnf/emit.Infof instead + - pattern: ^recorder\.Recorder\.(Warnf|Normalf)$ + msg: use emit.Warnf/emit.Infof instead + - pattern: ^(gomega\.)?NewWithT$ + pkg: ^github\.com/onsi/gomega$ + msg: use G() from the test package instead revive: rules: - name: exported @@ -338,6 +347,13 @@ linters: - gocritic text: "deferInLoop: Possible resource leak, 'defer' is called in the 'for' loop" path: *testFiles + - linters: + - forbidigo + path: pkg/operator/util/emit/emit\.go + # volume_stats_test cannot import the test package: test imports volume_stats (cycle) + - linters: + - forbidigo + path: pkg/reconcilers/etcd_cluster/volume_stats/volume_stats_test\.go run: timeout: 10m diff --git a/api/v1alpha1/webhook/hostedcontrolplane_webhook_test.go b/api/v1alpha1/webhook/hostedcontrolplane_webhook_test.go index c647d59..ee4501a 100644 --- a/api/v1alpha1/webhook/hostedcontrolplane_webhook_test.go +++ b/api/v1alpha1/webhook/hostedcontrolplane_webhook_test.go @@ -6,6 +6,7 @@ import ( . "github.com/onsi/gomega" "github.com/teutonet/cluster-api-provider-hosted-control-plane/api/v1alpha1" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/importcycle" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/ptr" @@ -186,7 +187,7 @@ func TestHostedControlPlaneWebhook_ValidateCreate(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) _, err := webhook.ValidateCreate(t.Context(), tt.hcp) if tt.expectErr { @@ -318,7 +319,7 @@ func TestHostedControlPlaneWebhook_ValidateUpdate(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) _, err := webhook.ValidateUpdate(t.Context(), tt.oldHCP, tt.newHCP) if tt.expectErr { @@ -332,7 +333,7 @@ func TestHostedControlPlaneWebhook_ValidateUpdate(t *testing.T) { func TestHostedControlPlaneWebhook_ValidateDelete(t *testing.T) { webhook := &hostedControlPlaneWebhook{} - g := NewWithT(t) + g, _, _ := G(t) hcp := &v1alpha1.HostedControlPlane{ ObjectMeta: metav1.ObjectMeta{ @@ -386,7 +387,7 @@ func TestHostedControlPlaneWebhook_ParseVersion(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) hcp := &v1alpha1.HostedControlPlane{ Spec: v1alpha1.HostedControlPlaneSpec{ Version: tt.version, diff --git a/pkg/hostedcontrolplane/controller.go b/pkg/hostedcontrolplane/controller.go index d588898..94c4371 100644 --- a/pkg/hostedcontrolplane/controller.go +++ b/pkg/hostedcontrolplane/controller.go @@ -19,6 +19,7 @@ import ( "github.com/teutonet/cluster-api-provider-hosted-control-plane/api" "github.com/teutonet/cluster-api-provider-hosted-control-plane/api/v1alpha1" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/importcycle" + "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util/emit" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util/recorder" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/alias" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/apiserverresources" @@ -386,7 +387,10 @@ func (r *hostedControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, fmt.Errorf("failed to retrieve owner Cluster: %w", err) } if cluster == nil { - span.AddEvent("Cluster Controller has not yet set OwnerRef") + emit.Info(ctx, emit.SinkSpanEvent, hostedControlPlane, + "ClusterOwnerRefMissing", "OwnerRefCheck", + "Cluster Controller has not yet set OwnerRef", + ) return ctrl.Result{RequeueAfter: 5 * time.Second}, nil } @@ -405,12 +409,13 @@ func (r *hostedControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl.R clusterMatch = cluster.Name == r.reconcileFilter } if !hcpMatch && !clusterMatch { - logr.FromContextAsSlogLogger(ctx). - InfoContext(ctx, "skipping reconciliation due to reconcile filter", - "filter", r.reconcileFilter, - "hcp", hostedControlPlane.Namespace+"/"+hostedControlPlane.Name, - "cluster", cluster.Namespace+"/"+cluster.Name, - ) + emit.Info(ctx, emit.SinkLogger, hostedControlPlane, + "ReconcileFilterMismatch", "SkipReconcile", + "skipping reconciliation due to reconcile filter", + "filter", r.reconcileFilter, + "hcp", hostedControlPlane.Namespace+"/"+hostedControlPlane.Name, + "cluster", cluster.Namespace+"/"+cluster.Name, + ) return reconcile.Result{}, nil } } @@ -588,7 +593,6 @@ func (r *hostedControlPlaneReconciler) reconcileNormal( r.caCertificatesDuration, r.certificatesDuration, r.konnectivityServerAudience, - recorder.FromContext(ctx), ) kubeconfigReconciler := kubeconfig.NewKubeconfigReconciler( r.managementClusterClient, @@ -605,7 +609,6 @@ func (r *hostedControlPlaneReconciler) reconcileNormal( r.etcdClientFactory, r.s3ClientFactory, r.volumeStatsProvider, - recorder.FromContext(ctx), r.etcdComponentLabel, r.apiServerComponentLabel, r.controllerNamespace, diff --git a/pkg/hostedcontrolplane/controller_test.go b/pkg/hostedcontrolplane/controller_test.go index 70dfe45..1cb0beb 100644 --- a/pkg/hostedcontrolplane/controller_test.go +++ b/pkg/hostedcontrolplane/controller_test.go @@ -13,7 +13,7 @@ import ( "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/alias" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/etcd_client" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/s3_client" - "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -34,7 +34,7 @@ var ( _ *v1alpha1.HostedControlPlane, _ *capiv2.Cluster, ) (s3_client.S3Client, error) { - return test.NewS3ClientStub(), nil + return NewS3ClientStub(), nil } etcdClientStubFactory = func( _ context.Context, @@ -43,7 +43,7 @@ var ( _ *capiv2.Cluster, _ int32, ) (etcd_client.EtcdClient, error) { - return test.NewEtcdClientStub(), nil + return NewEtcdClientStub(), nil } workloadClusterClientStubFactory = func( _ context.Context, @@ -71,7 +71,7 @@ func createTestReconcilerWithFilter(client client.Client, reconcileFilter string workloadClusterClientStubFactory, etcdClientStubFactory, s3ClientStubFactory, - test.NewEtcdVolumeStatsProviderStub(), + NewEtcdVolumeStatsProviderStub(), &recorder.InfiniteDiscardingFakeRecorder{}, "test-namespace", reconcileFilter, @@ -220,7 +220,7 @@ func TestHostedControlPlaneReconciler_ReconcileWorkflow(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := log.IntoContext(t.Context(), log.Log) - g := NewWithT(t) + g, _, _ := G(t) scheme := runtime.NewScheme() g.Expect(capiv2.AddToScheme(scheme)).To(Succeed()) g.Expect(v1alpha1.AddToScheme(scheme)).To(Succeed()) @@ -270,13 +270,13 @@ func TestHostedControlPlaneReconciler_ReconcileWorkflow(t *testing.T) { func TestHostedControlPlaneReconciler_FinalizerManagement(t *testing.T) { scheme := runtime.NewScheme() - g := NewWithT(t) + g, _, _ := G(t) g.Expect(v1alpha1.AddToScheme(scheme)).To(Succeed()) g.Expect(capiv2.AddToScheme(scheme)).To(Succeed()) t.Run("finalizer behavior during reconcile lifecycle", func(t *testing.T) { ctx := log.IntoContext(t.Context(), log.Log) - g := NewWithT(t) + g, _, _ := G(t) cluster := createTestCluster("test-cluster", "default") hostedControlPlane := withReplicas( withOwnerReference(createTestHostedControlPlane("test-hcp", "default"), cluster), @@ -319,7 +319,7 @@ func TestHostedControlPlaneReconciler_FinalizerManagement(t *testing.T) { t.Run("finalizer should be removed during deletion", func(t *testing.T) { ctx := log.IntoContext(t.Context(), log.Log) - g := NewWithT(t) + g, _, _ := G(t) cluster := createTestClusterWithPausedCondition("test-cluster", "default", false) hostedControlPlane := withDeletion( withOwnerReference(createTestHostedControlPlane("test-hcp", "default"), cluster), @@ -366,13 +366,13 @@ func TestHostedControlPlaneReconciler_FinalizerManagement(t *testing.T) { func TestHostedControlPlaneReconciler_OwnerReferenceValidation(t *testing.T) { scheme := runtime.NewScheme() - g := NewWithT(t) + g, _, _ := G(t) g.Expect(v1alpha1.AddToScheme(scheme)).To(Succeed()) g.Expect(capiv2.AddToScheme(scheme)).To(Succeed()) t.Run("should requeue when owner cluster is not found", func(t *testing.T) { ctx := log.IntoContext(t.Context(), log.Log) - g := NewWithT(t) + g, _, _ := G(t) hostedControlPlane := createTestHostedControlPlane("test-hcp", "default") fakeClient := fakeClient.NewClientBuilder(). @@ -399,7 +399,7 @@ func TestHostedControlPlaneReconciler_OwnerReferenceValidation(t *testing.T) { t.Run("should proceed when valid owner cluster is found", func(t *testing.T) { ctx := log.IntoContext(t.Context(), log.Log) - g := NewWithT(t) + g, _, _ := G(t) cluster := createTestCluster("test-cluster", "default") hostedControlPlane := withReplicas( withOwnerReference(createTestHostedControlPlane("test-hcp", "default"), cluster), @@ -436,13 +436,13 @@ func TestHostedControlPlaneReconciler_OwnerReferenceValidation(t *testing.T) { func TestHostedControlPlaneReconciler_StatusConditions(t *testing.T) { scheme := runtime.NewScheme() - g := NewWithT(t) + g, _, _ := G(t) g.Expect(v1alpha1.AddToScheme(scheme)).To(Succeed()) g.Expect(capiv2.AddToScheme(scheme)).To(Succeed()) t.Run("should set paused condition when cluster is paused", func(t *testing.T) { ctx := log.IntoContext(t.Context(), log.Log) - g := NewWithT(t) + g, _, _ := G(t) cluster := withPaused(createTestCluster("test-cluster", "default"), true) hostedControlPlane := withOwnerReference(createTestHostedControlPlane("test-hcp", "default"), cluster) @@ -488,7 +488,7 @@ func TestHostedControlPlaneReconciler_StatusConditions(t *testing.T) { func TestHostedControlPlaneReconciler_ObservedGeneration(t *testing.T) { scheme := runtime.NewScheme() - g := NewWithT(t) + g, _, _ := G(t) g.Expect(v1alpha1.AddToScheme(scheme)).To(Succeed()) g.Expect(capiv2.AddToScheme(scheme)).To(Succeed()) @@ -540,7 +540,7 @@ func TestHostedControlPlaneReconciler_ObservedGeneration(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := log.IntoContext(t.Context(), log.Log) - g := NewWithT(t) + g, _, _ := G(t) objs := []client.Object{tt.hostedControlPlane} if tt.cluster != nil { @@ -590,7 +590,7 @@ func TestHostedControlPlaneReconciler_ObservedGeneration(t *testing.T) { func TestHostedControlPlaneReconciler_NonExistentResource(t *testing.T) { scheme := runtime.NewScheme() - g := NewWithT(t) + g, _, _ := G(t) g.Expect(v1alpha1.AddToScheme(scheme)).To(Succeed()) fakeClient := fakeClient.NewClientBuilder(). @@ -613,7 +613,7 @@ func TestHostedControlPlaneReconciler_NonExistentResource(t *testing.T) { func TestHostedControlPlaneReconciler_ReconcileFilter(t *testing.T) { scheme := runtime.NewScheme() - g := NewWithT(t) + g, _, _ := G(t) g.Expect(v1alpha1.AddToScheme(scheme)).To(Succeed()) g.Expect(capiv2.AddToScheme(scheme)).To(Succeed()) @@ -644,7 +644,7 @@ func TestHostedControlPlaneReconciler_ReconcileFilter(t *testing.T) { hasFinalizer := func(t *testing.T, fc client.Client) bool { t.Helper() updated := &v1alpha1.HostedControlPlane{} - g := NewWithT(t) + g, _, _ := G(t) g.Expect(fc.Get(t.Context(), req.NamespacedName, updated)).To(Succeed()) return len(updated.Finalizers) > 0 } @@ -693,7 +693,7 @@ func TestHostedControlPlaneReconciler_ReconcileFilter(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) fc := buildClient() reconciler := createTestReconcilerWithFilter(fc, tt.filter) diff --git a/pkg/hostedcontrolplane/lifecycle_phases_test.go b/pkg/hostedcontrolplane/lifecycle_phases_test.go index 1993661..b1ab799 100644 --- a/pkg/hostedcontrolplane/lifecycle_phases_test.go +++ b/pkg/hostedcontrolplane/lifecycle_phases_test.go @@ -72,11 +72,11 @@ type testPhase struct { patchCluster func() // Function to simulate external systems BEFORE reconciliation // (e.g., cert-manager creating secrets, marking resources as ready) - simulateExternalSystems func(ctx context.Context, g *WithT) + simulateExternalSystems func(ctx context.Context, g Gomega) verifyConditionsBefore map[bool][]types2.GomegaMatcher verifyConditionsAfter map[bool][]types2.GomegaMatcher // Custom resource verifications AFTER reconciliation and simulation - verifyResources func(ctx context.Context, g *WithT) + verifyResources func(ctx context.Context, g Gomega) expectError string expectNoGenerationBump bool } @@ -109,7 +109,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { Level: slog.LevelError, // Set to LevelDebug for verbose output })) ctx := log.IntoContext(t.Context(), logger) - g := NewWithT(t) + g, _, _ := G(t) scheme, err := NewScheme() g.Expect(err).To(Succeed()) @@ -290,19 +290,19 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { }, }, expectNoGenerationBump: true, - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { g.Expect(hcp.Status.ObservedGeneration).To(BeNumerically("==", 0)) }, }, { name: "Verify ExternalManagedControlPlane Status", - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { g.Expect(hcp.Status.ExternalManagedControlPlane).To(PointTo(BeTrue())) }, }, { name: "Add Finalizer", - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { g.Expect(hcp.Finalizers).To(ContainElement("hcp.controlplane.cluster.x-k8s.io")) }, }, @@ -384,7 +384,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { ), }, }, - simulateExternalSystems: func(ctx context.Context, g *WithT) { + simulateExternalSystems: func(ctx context.Context, g Gomega) { makeCertificateReady(certManagerclient, managementClusterClient, hcp, cluster, "etcd-ca")(ctx, g) makeCertificateReady(certManagerclient, managementClusterClient, hcp, cluster, "front-proxy-ca")(ctx, g) }, @@ -413,7 +413,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { ), }, }, - simulateExternalSystems: func(ctx context.Context, g *WithT) { + simulateExternalSystems: func(ctx context.Context, g Gomega) { makeIssuerReady(certManagerclient, cluster, "etcd-ca")(ctx, g) makeIssuerReady(certManagerclient, cluster, "front-proxy-ca")(ctx, g) }, @@ -438,7 +438,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { ), }, }, - simulateExternalSystems: func(ctx context.Context, g *WithT) { + simulateExternalSystems: func(ctx context.Context, g Gomega) { serviceInterface := managementClusterClient.CoreV1().Services(hcp.Namespace) svc, err := serviceInterface.Get(ctx, fmt.Sprintf("s-%s", cluster.Name), metav1.GetOptions{}) g.Expect(err).To(Succeed()) @@ -466,7 +466,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { ), }, }, - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { g.Expect(hcp.Status.LegacyIP).To(Equal("1.1.1.1")) }, }, @@ -480,7 +480,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { ), }, }, - simulateExternalSystems: func(ctx context.Context, g *WithT) { + simulateExternalSystems: func(ctx context.Context, g Gomega) { endpoint, found, err := unstructured.NestedMap(infraCluster.Object, "spec", "controlPlaneEndpoint") g.Expect(err).To(Succeed()) g.Expect(found).To(BeTrue()) @@ -577,7 +577,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { ), }, }, - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { secretInterface := managementClusterClient.CoreV1().Secrets(hcp.Namespace) for _, name := range []string{ "admin", @@ -608,7 +608,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { ), }, }, - simulateExternalSystems: func(ctx context.Context, g *WithT) { + simulateExternalSystems: func(ctx context.Context, g Gomega) { serviceInterface := managementClusterClient.CoreV1().Services(hcp.Namespace) service, err := serviceInterface.Get( ctx, @@ -646,7 +646,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { ), }, }, - simulateExternalSystems: func(ctx context.Context, g *WithT) { + simulateExternalSystems: func(ctx context.Context, g Gomega) { statefulSetInterface := managementClusterClient.AppsV1().StatefulSets(hcp.Namespace) statefulSet, err := statefulSetInterface.Get( ctx, @@ -669,7 +669,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { )).Error().To(Succeed()) }, expectError: "statefulset offline", - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { g.Expect(hcp.Status.ETCDVolumeSize.Cmp(resource.MustParse("1Gi"))).To(Equal(0)) g.Expect(managementClusterClient.NetworkingV1().NetworkPolicies(hcp.Namespace).Get( ctx, fmt.Sprintf("%s-etcd", cluster.Name), metav1.GetOptions{}, @@ -686,7 +686,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { ), }, }, - simulateExternalSystems: func(ctx context.Context, g *WithT) { + simulateExternalSystems: func(ctx context.Context, g Gomega) { etcdClient.AlarmError = nil etcdClient.StatusError = nil volumeStatsStub.MaxUsage = 2048 @@ -698,13 +698,13 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { ), }, }, - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { g.Expect(hcp.Status.ETCDVolumeUsage).To(EqualResource(resource.MustParse("2Ki"))) }, }, { name: "Verify konnectivity Config", - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { g.Expect(managementClusterClient.CoreV1().ConfigMaps(hcp.Namespace).Get( ctx, fmt.Sprintf("%s-konnectivity", cluster.Name), @@ -714,7 +714,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { }, { name: "Verify no audit Config", - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { _, err := managementClusterClient.CoreV1().Secrets(hcp.Namespace).Get( ctx, fmt.Sprintf("%s-audit", cluster.Name), @@ -837,7 +837,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { }, { name: "Verify Metadata ConfigMaps exist", - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { coreV1Interface := workloadClusterClient.CoreV1() g.Expect(coreV1Interface.ConfigMaps(metav1.NamespacePublic).Get(ctx, api.ConfigMapClusterInfo, metav1.GetOptions{}), @@ -852,7 +852,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { }, { name: "Verify CoreDNS Deployment is scaled to 1", - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { deploymentInterface := workloadClusterClient.AppsV1().Deployments(metav1.NamespaceSystem) deployment, err := deploymentInterface.Get(ctx, "coredns", metav1.GetOptions{}) g.Expect(err).To(Succeed()) @@ -888,7 +888,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { }, { name: "Verify Konnectivity Deployment is scaled to 1", - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { deploymentInterface := workloadClusterClient.AppsV1().Deployments(metav1.NamespaceSystem) deployment, err := deploymentInterface.Get(ctx, "konnectivity-agent", metav1.GetOptions{}) g.Expect(err).To(Succeed()) @@ -927,7 +927,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { }, { name: "Add Node to Cluster", - simulateExternalSystems: func(ctx context.Context, g *WithT) { + simulateExternalSystems: func(ctx context.Context, g Gomega) { g.Expect(workloadClusterClient.CoreV1().Nodes().Create(ctx, &corev1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "node-1", @@ -949,7 +949,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { }, { name: "Verify CoreDNS and Konnectivity Agent Deployments are still scaled to 1", - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { deploymentInterface := workloadClusterClient.AppsV1().Deployments(metav1.NamespaceSystem) for _, name := range []string{"coredns", "konnectivity-agent"} { deployment, err := deploymentInterface.Get(ctx, name, metav1.GetOptions{}) @@ -960,7 +960,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { }, { name: "Make Kube Proxy Daemonset Ready", - simulateExternalSystems: func(ctx context.Context, g *WithT) { + simulateExternalSystems: func(ctx context.Context, g Gomega) { daemonSetInterface := workloadClusterClient.AppsV1().DaemonSets(metav1.NamespaceSystem) daemonSet, err := daemonSetInterface.Get(ctx, "kube-proxy", metav1.GetOptions{}) g.Expect(err).To(Succeed()) @@ -989,7 +989,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { }, { name: "Add 3 Nodes to Cluster", - simulateExternalSystems: func(ctx context.Context, g *WithT) { + simulateExternalSystems: func(ctx context.Context, g Gomega) { slices.RepeatBy(3, func(i int) bool { g.Expect(workloadClusterClient.CoreV1().Nodes().Create(ctx, &corev1.Node{ ObjectMeta: metav1.ObjectMeta{ @@ -1014,7 +1014,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { ), }, }, - simulateExternalSystems: func(ctx context.Context, g *WithT) { + simulateExternalSystems: func(ctx context.Context, g Gomega) { daemonSetInterface := workloadClusterClient.AppsV1().DaemonSets(metav1.NamespaceSystem) daemonSet, err := daemonSetInterface.Get(ctx, "kube-proxy", metav1.GetOptions{}) g.Expect(err).To(Succeed()) @@ -1043,7 +1043,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { }, { name: "Verify CoreDNS Deployment is scaled to 2", - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { deploymentInterface := workloadClusterClient.AppsV1().Deployments(metav1.NamespaceSystem) corednsDeployment, err := deploymentInterface.Get(ctx, "coredns", metav1.GetOptions{}) g.Expect(err).To(Succeed()) @@ -1080,7 +1080,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { }, { name: "Verify Konnectivity Agent Deployment is scaled to 2", - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { deploymentInterface := workloadClusterClient.AppsV1().Deployments(metav1.NamespaceSystem) konnectivityDeployment, err := deploymentInterface.Get(ctx, "konnectivity-agent", metav1.GetOptions{}) g.Expect(err).To(Succeed()) @@ -1120,7 +1120,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { patchHCP: func() { hcp.Spec.Replicas = ptr.To(int32(3)) }, - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { deploymentInterface := managementClusterClient.AppsV1().Deployments(hcp.Namespace) apiServerDeployment, err := deploymentInterface.Get( ctx, @@ -1159,7 +1159,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { patchHCP: func() { hcp.Spec.ETCD.VolumeSize = ptr.To(resource.MustParse("2Gi")) }, - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { statefulSet, err := managementClusterClient.AppsV1().StatefulSets(hcp.Namespace).Get( ctx, fmt.Sprintf("%s-etcd", cluster.Name), @@ -1197,16 +1197,16 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { }, { name: "Let Etcd grow", - simulateExternalSystems: func(ctx context.Context, g *WithT) { + simulateExternalSystems: func(ctx context.Context, g Gomega) { volumeStatsStub.MaxUsage = ptr.To(resource.MustParse("1.5Gi")).Value() }, - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { g.Expect(hcp.Status.ETCDVolumeUsage).To(EqualResource(resource.MustParse("1.5Gi"))) }, }, { name: "Verify Etcd has been resized", - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { g.Expect(hcp.Status.ETCDVolumeSize).To(EqualResource(resource.MustParse("2Gi"))) }, }, @@ -1223,7 +1223,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { }, } }, - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { deploymentInterface := managementClusterClient.AppsV1().Deployments(hcp.Namespace) deployment, err := deploymentInterface.Get( ctx, @@ -1254,7 +1254,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { }, } }, - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { deploymentInterface := managementClusterClient.AppsV1().Deployments(hcp.Namespace) deployment, err := deploymentInterface.Get( ctx, @@ -1289,7 +1289,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { }, } }, - verifyResources: func(ctx context.Context, g *WithT) { + verifyResources: func(ctx context.Context, g Gomega) { deploymentInterface := managementClusterClient.AppsV1().Deployments(hcp.Namespace) deployment, err := deploymentInterface.Get( ctx, @@ -1402,7 +1402,7 @@ func TestHostedControlPlane_FullLifecycle(t *testing.T) { g.Expect(foundConditions).To(BeNumerically(">", 0), "Should have at least some conditions set") } -func simulateK8sAPI(ctx context.Context, kubernetesClient kubernetes.Interface, g *WithT) { +func simulateK8sAPI(ctx context.Context, kubernetesClient kubernetes.Interface, g Gomega) { namespaces := []string{metav1.NamespaceSystem} nodes, err := kubernetesClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) g.Expect(err).To(Succeed()) @@ -1468,7 +1468,7 @@ func simulateK8sAPI(ctx context.Context, kubernetesClient kubernetes.Interface, } } -func verifyConditions(after map[bool][]types2.GomegaMatcher, hcp *v1alpha1.HostedControlPlane, g *WithT) { +func verifyConditions(after map[bool][]types2.GomegaMatcher, hcp *v1alpha1.HostedControlPlane, g Gomega) { for status, matchers := range after { conditionStatus := slices.Ternary(status, metav1.ConditionTrue, metav1.ConditionFalse) partitionedConditions := slices.GroupBy(hcp.Status.Conditions, @@ -1496,8 +1496,8 @@ func makeTLSRouteReady( gatewayInterface *gwfake.Clientset, hcp *v1alpha1.HostedControlPlane, name string, -) func(ctx context.Context, g *WithT) { - return func(ctx context.Context, g *WithT) { +) func(ctx context.Context, g Gomega) { + return func(ctx context.Context, g Gomega) { tlsRouteInterface := gatewayInterface.GatewayV1alpha2().TLSRoutes(hcp.Namespace) tlsRoute, err := tlsRouteInterface.Get(ctx, name, metav1.GetOptions{}) g.Expect(err).To(Succeed()) @@ -1530,8 +1530,8 @@ func makeDeploymentReady( kubernetesInterface kubernetes.Interface, namespace string, name string, -) func(ctx context.Context, g *WithT) { - return func(ctx context.Context, g *WithT) { +) func(ctx context.Context, g Gomega) { + return func(ctx context.Context, g Gomega) { deploymentInterface := kubernetesInterface.AppsV1().Deployments(namespace) deployment, err := deploymentInterface.Get(ctx, name, metav1.GetOptions{}) g.Expect(err).To(Succeed()) @@ -1557,8 +1557,8 @@ func makeCertificateReady( hcp *v1alpha1.HostedControlPlane, cluster *capiv2.Cluster, name string, -) func(ctx context.Context, g *WithT) { - return func(ctx context.Context, g *WithT) { +) func(ctx context.Context, g Gomega) { + return func(ctx context.Context, g Gomega) { certificatesInterface := certManagerClient.CertmanagerV1().Certificates(cluster.Namespace) secretInterface := managementClusterClient.CoreV1().Secrets(hcp.Namespace) certName := fmt.Sprintf("%s-%s", cluster.Name, name) @@ -1608,8 +1608,8 @@ func makeIssuerReady( certManagerClient *certmanagerfake.Clientset, cluster *capiv2.Cluster, name string, -) func(ctx context.Context, g *WithT) { - return func(ctx context.Context, g *WithT) { +) func(ctx context.Context, g Gomega) { + return func(ctx context.Context, g Gomega) { issuersInterface := certManagerClient.CertmanagerV1().Issuers(cluster.Namespace) issuerName := fmt.Sprintf("%s-%s", cluster.Name, name) issuer, err := issuersInterface.Get(ctx, issuerName, metav1.GetOptions{}) diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index d91910a..452fe93 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -18,6 +18,7 @@ import ( webhookv1alpha1 "github.com/teutonet/cluster-api-provider-hosted-control-plane/api/v1alpha1/webhook" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/hostedcontrolplane" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/etc" + "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util/emit" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/alias" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/etcd_client" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/s3_client" @@ -113,7 +114,7 @@ func Start(ctx context.Context, version string, operatorConfig etc.Config) (retE return err } - logr.FromContextAsSlogLogger(ctx).InfoContext(ctx, "Starting operator", "version", version) + emit.Info(ctx, emit.SinkLogger, nil, "OperatorStarting", "OperatorStart", "Starting operator", "version", version) if err := mgr.Start(ctx); err != nil { return fmt.Errorf("failed to start manager: %w", err) } diff --git a/pkg/operator/operator_test.go b/pkg/operator/operator_test.go index 27df1a5..7af5354 100644 --- a/pkg/operator/operator_test.go +++ b/pkg/operator/operator_test.go @@ -4,11 +4,12 @@ import ( "testing" . "github.com/onsi/gomega" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" ) func Test_fieldOwnerIsTheSame(t *testing.T) { t.Run("field owner is the same", func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) g.Expect(hostedControlPlaneControllerName).To( Equal("hcp-controller"), "field owner has changed, this needs a migration, better undo it: got %s, want %s", @@ -38,7 +39,7 @@ func Test_newResource(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) _, err := newResource(tt.args.serviceName, tt.args.version) if tt.wantErr { g.Expect(err).To(HaveOccurred()) diff --git a/pkg/operator/util/args.go b/pkg/operator/util/args.go index 298b243..dad6972 100644 --- a/pkg/operator/util/args.go +++ b/pkg/operator/util/args.go @@ -5,11 +5,8 @@ import ( "fmt" "sort" - "github.com/go-logr/logr" slices "github.com/samber/lo" - "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util/recorder" - "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/trace" + "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util/emit" ) type ArgOption struct { @@ -67,33 +64,14 @@ func ArgsToSlice( if overriddenKeys := slices.Filter(slices.Keys(userArgs), func(key string, _ int) bool { return slices.HasKey(controllerArgs, key) }); len(overriddenKeys) > 0 { - logger := logr.FromContextAsSlogLogger(ctx) - span := trace.SpanFromContext(ctx) - eventRecorder := recorder.FromContext(ctx) - for _, key := range slices.Filter(overriddenKeys, func(key string, _ int) bool { return userArgs[key] != controllerArgs[key] }) { - logger.WarnContext( - ctx, - "User argument overridden by controller", - "arg", key, - "userValue", userArgs[key], - "controllerValue", controllerArgs[key], - ) - - span.AddEvent(argumentOverriddenEvent, trace.WithAttributes( - attribute.String("arg", key), - attribute.String("userValue", userArgs[key]), - attribute.String("controllerValue", controllerArgs[key]), - )) - - eventRecorder.Warnf( - nil, + emit.Warn(ctx, emit.SinkAll, nil, "ControllerArgumentTakesPrecedence", argumentOverriddenEvent, - "User argument overridden by controller: %s (userValue=%s, controllerValue=%s)", - key, userArgs[key], controllerArgs[key], + "User argument overridden by controller", + "arg", key, "userValue", userArgs[key], "controllerValue", controllerArgs[key], ) } } diff --git a/pkg/operator/util/args_test.go b/pkg/operator/util/args_test.go index a79b752..b6189af 100644 --- a/pkg/operator/util/args_test.go +++ b/pkg/operator/util/args_test.go @@ -5,6 +5,7 @@ import ( . "github.com/onsi/gomega" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util/recorder" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" "sigs.k8s.io/controller-runtime/pkg/log" ) @@ -83,7 +84,7 @@ func TestArgsToSlice(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) result := argsToSlice(tt.userArgs, tt.controllerArgs, nil) g.Expect(result).To(Equal(tt.expected)) @@ -96,7 +97,7 @@ func TestArgsToSlice(t *testing.T) { } func TestArgsToSlice_Ordering(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) input1 := map[string]string{ "z": "26", "a": "1", "m": "13", } @@ -257,7 +258,7 @@ func TestArgsToSliceWithObservability(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := log.IntoContext(t.Context(), log.Log.WithName("test")) - g := NewWithT(t) + g, _, _ := G(t) returningFakeRecorder, eventRecorder := recorder.NewInfiniteReturningFakeRecorder() @@ -283,7 +284,7 @@ func TestArgsToSliceWithObservability(t *testing.T) { } func TestArgsToSliceWithObservabilityNilInputs(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) ctx := log.IntoContext(t.Context(), log.Log.WithName("test")) ctx = recorder.IntoContext(ctx, &recorder.InfiniteDiscardingFakeRecorder{}) @@ -300,7 +301,7 @@ func TestArgsToSliceWithObservabilityNilInputs(t *testing.T) { } func TestOverriddenArgStruct(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) arg := OverriddenArg{ Key: "test-key", UserValue: "user-value", @@ -314,7 +315,7 @@ func TestOverriddenArgStruct(t *testing.T) { } func TestArgsToSliceWithObservabilityBackwardCompatibility(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) ctx := log.IntoContext(t.Context(), log.Log.WithName("test")) ctx = recorder.IntoContext(ctx, &recorder.InfiniteDiscardingFakeRecorder{}) diff --git a/pkg/operator/util/checksum_test.go b/pkg/operator/util/checksum_test.go index 362d776..42f959d 100644 --- a/pkg/operator/util/checksum_test.go +++ b/pkg/operator/util/checksum_test.go @@ -4,6 +4,7 @@ import ( "testing" . "github.com/onsi/gomega" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/fake" @@ -149,7 +150,7 @@ func TestCalculateConfigMapChecksum(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) fakeClient := fake.NewClientset() tt.setupConfigMaps(fakeClient, tt.namespace, g) @@ -187,7 +188,7 @@ func TestCalculateConfigMapChecksum(t *testing.T) { } func TestCalculateConfigMapChecksum_Consistency(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) fakeClient := fake.NewClientset() g.Expect(fakeClient.CoreV1().ConfigMaps(namespace).Create( @@ -226,7 +227,7 @@ func TestCalculateConfigMapChecksum_Consistency(t *testing.T) { } func TestCalculateConfigMapChecksum_Ordering(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) fakeClient := fake.NewClientset() configMaps := []*corev1.ConfigMap{ @@ -279,7 +280,7 @@ func TestCalculateConfigMapChecksum_Ordering(t *testing.T) { } func TestCalculateConfigMapChecksum_DataChanges(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) fakeClient := fake.NewClientset() g.Expect(fakeClient.CoreV1().ConfigMaps(namespace).Create( @@ -372,7 +373,7 @@ func TestCalculateChecksum(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) result := calculateChecksum(tt.dataMaps...) if tt.expected == "" { @@ -398,7 +399,7 @@ func TestCalculateChecksum(t *testing.T) { } func TestCalculateChecksum_Ordering(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) map1 := map[string]any{"z": "26", "a": "1", "m": "13"} map2 := map[string]any{"a": "1", "m": "13", "z": "26"} @@ -414,7 +415,7 @@ func TestCalculateChecksum_Ordering(t *testing.T) { } func TestCalculateChecksum_Deterministic(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) dataMap := map[string]any{ "key1": "value1", "key2": "value2", diff --git a/pkg/operator/util/emit/emit.go b/pkg/operator/util/emit/emit.go new file mode 100644 index 0000000..302e4f5 --- /dev/null +++ b/pkg/operator/util/emit/emit.go @@ -0,0 +1,110 @@ +package emit + +import ( + "context" + "fmt" + "strings" + + "github.com/go-logr/logr" + "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util/recorder" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" +) + +type Sink uint + +const ( + SinkRecorder Sink = 1 << iota + SinkLogger + SinkSpanEvent + SinkAll = SinkRecorder | SinkLogger | SinkSpanEvent +) + +func Warn(ctx context.Context, sinks Sink, related runtime.Object, reason, action, msg string, fields ...any) { + if sinks&SinkRecorder != 0 { + writeRecorder(ctx, true, related, reason, action, msg, fields...) + } + if sinks&(SinkLogger|SinkSpanEvent) != 0 { + writeLogAndSpan(ctx, sinks, true, related, reason, action, msg, fields...) + } +} + +func Info(ctx context.Context, sinks Sink, related runtime.Object, reason, action, msg string, fields ...any) { + if sinks&SinkRecorder != 0 { + writeRecorder(ctx, false, related, reason, action, msg, fields...) + } + if sinks&(SinkLogger|SinkSpanEvent) != 0 { + writeLogAndSpan(ctx, sinks, false, related, reason, action, msg, fields...) + } +} + +func writeRecorder(ctx context.Context, warn bool, related runtime.Object, reason, action, msg string, fields ...any) { + rec := recorder.FromContext(ctx) + note := noteWithFields(msg, fields) + if warn { + rec.Warnf(related, reason, action, "%s", note) + } else { + rec.Normalf(related, reason, action, "%s", note) + } +} + +func noteWithFields(msg string, fields []any) string { + if len(fields) == 0 { + return msg + } + var b strings.Builder + b.WriteString(msg) + for i := 0; i+1 < len(fields); i += 2 { + fmt.Fprintf(&b, " %v=%v", fields[i], fields[i+1]) + } + return b.String() +} + +func writeLogAndSpan( + ctx context.Context, + sinks Sink, + warn bool, + related runtime.Object, + reason, action, msg string, + extraFields ...any, +) { + relLogFields, relSpanAttrs := relatedFields(related) + logFields := append(append([]any{"reason", reason, "action", action}, relLogFields...), extraFields...) + spanAttrs := append( + []attribute.KeyValue{attribute.String("reason", reason), attribute.String("message", msg)}, + relSpanAttrs..., + ) + + if sinks&SinkLogger != 0 { + logger := logr.FromContextAsSlogLogger(ctx) + if warn { + logger.WarnContext(ctx, msg, logFields...) + } else { + logger.InfoContext(ctx, msg, logFields...) + } + } + if sinks&SinkSpanEvent != 0 { + trace.SpanFromContext(ctx).AddEvent(action, trace.WithAttributes(spanAttrs...)) + } +} + +func relatedFields(related runtime.Object) (logFields []any, spanAttrs []attribute.KeyValue) { + if related == nil { + return nil, nil + } + obj, ok := related.(metav1.Object) + if !ok { + return nil, nil + } + if name := obj.GetName(); name != "" { + logFields = append(logFields, "related.name", name) + spanAttrs = append(spanAttrs, attribute.String("related.name", name)) + } + if ns := obj.GetNamespace(); ns != "" { + logFields = append(logFields, "related.namespace", ns) + spanAttrs = append(spanAttrs, attribute.String("related.namespace", ns)) + } + return logFields, spanAttrs +} diff --git a/pkg/operator/util/emit/emit_test.go b/pkg/operator/util/emit/emit_test.go new file mode 100644 index 0000000..f0c273f --- /dev/null +++ b/pkg/operator/util/emit/emit_test.go @@ -0,0 +1,219 @@ +package emit + +import ( + "context" + "encoding/json" + "log/slog" + "strings" + "testing" + + "github.com/go-logr/logr" + . "github.com/onsi/gomega" + "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util/recorder" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/sdk/trace/tracetest" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +type testEnv struct { + logBuf *strings.Builder + events *recorder.InfiniteReturningFakeRecorder + spans *tracetest.SpanRecorder +} + +func newTestEnv(t *testing.T) (context.Context, testEnv) { + t.Helper() + + var buf strings.Builder + ctx := logr.NewContext(t.Context(), logr.FromSlogHandler( + slog.NewTextHandler(&buf, &slog.HandlerOptions{Level: slog.LevelDebug}), + )) + + fakeRec, rec := recorder.NewInfiniteReturningFakeRecorder() + ctx = recorder.IntoContext(ctx, rec) + + sr := tracetest.NewSpanRecorder() + tp := sdktrace.NewTracerProvider(sdktrace.WithSpanProcessor(sr)) + ctx, span := tp.Tracer("test").Start(ctx, "test") + t.Cleanup(func() { span.End() }) + + return ctx, testEnv{logBuf: &buf, events: fakeRec, spans: sr} +} + +func (e testEnv) spanEvents() []sdktrace.Event { + return e.spans.Started()[0].Events() +} + +func spanAttrMap(event sdktrace.Event) map[string]string { + m := make(map[string]string, len(event.Attributes)) + for _, a := range event.Attributes { + m[string(a.Key)] = a.Value.AsString() + } + return m +} + +func TestWarn(t *testing.T) { + testEmit(t, true) +} + +func TestInfo(t *testing.T) { + testEmit(t, false) +} + +func testEmit(t *testing.T, warn bool) { + t.Helper() + + recorderPrefix := "Normal" + logLevel := "level=INFO" + if warn { + recorderPrefix = "Warning" + logLevel = "level=WARN" + } + + call := func(ctx context.Context, s Sink) { + if warn { + Warn(ctx, s, nil, "MyReason", "MyAction", "something bad") + } else { + Info(ctx, s, nil, "MyReason", "MyAction", "something bad") + } + } + + t.Run("recorder gets event", func(t *testing.T) { + g, _, _ := G(t) + ctx, env := newTestEnv(t) + call(ctx, SinkRecorder) + g.Expect(env.events.Events). + To(ContainElement(ContainSubstring(recorderPrefix + " MyReason MyAction something bad"))) + }) + + t.Run("logger gets correct level with fields", func(t *testing.T) { + g, _, _ := G(t) + ctx, env := newTestEnv(t) + call(ctx, SinkLogger) + out := env.logBuf.String() + g.Expect(out).To(ContainSubstring(logLevel)) + g.Expect(out).To(ContainSubstring("something bad")) + g.Expect(out).To(ContainSubstring("reason=MyReason")) + g.Expect(out).To(ContainSubstring("action=MyAction")) + }) + + t.Run("span event has action name and attributes", func(t *testing.T) { + g, _, _ := G(t) + ctx, env := newTestEnv(t) + call(ctx, SinkSpanEvent) + events := env.spanEvents() + g.Expect(events).To(HaveLen(1)) + g.Expect(events[0].Name).To(Equal("MyAction")) + attrs := spanAttrMap(events[0]) + g.Expect(attrs).To(HaveKeyWithValue("reason", "MyReason")) + g.Expect(attrs).To(HaveKeyWithValue("message", "something bad")) + }) +} + +func TestFields(t *testing.T) { + t.Run("logger includes extra fields as structured JSON key-value pairs", func(t *testing.T) { + g, _, _ := G(t) + + var buf strings.Builder + ctx := logr.NewContext(context.Background(), logr.FromSlogHandler( + slog.NewJSONHandler(&buf, &slog.HandlerOptions{Level: slog.LevelDebug}), + )) + + Info(ctx, SinkLogger, nil, "R", "A", "msg", "key1", "val1", "key2", 42) + + var entry map[string]any + g.Expect(json.Unmarshal([]byte(strings.TrimSpace(buf.String())), &entry)).To(Succeed()) + g.Expect(entry).To(HaveKeyWithValue("key1", "val1")) + g.Expect(entry).To(HaveKeyWithValue("key2", float64(42))) + g.Expect(entry).To(HaveKeyWithValue("msg", "msg")) + g.Expect(entry).To(HaveKeyWithValue("reason", "R")) + g.Expect(entry).To(HaveKeyWithValue("action", "A")) + }) + + t.Run("recorder note includes extra fields as key=value pairs", func(t *testing.T) { + g, _, _ := G(t) + ctx, env := newTestEnv(t) + Info(ctx, SinkRecorder, nil, "R", "A", "msg", "key1", "val1", "key2", 42) + g.Expect(env.events.Events).To(ContainElement(ContainSubstring("msg key1=val1 key2=42"))) + }) +} + +func TestSinkIsolation(t *testing.T) { + tests := []struct { + name string + sinks Sink + expectRecorder bool + expectLogger bool + expectSpan bool + }{ + {"recorder only", SinkRecorder, true, false, false}, + {"logger only", SinkLogger, false, true, false}, + {"span only", SinkSpanEvent, false, false, true}, + {"recorder+logger", SinkRecorder | SinkLogger, true, true, false}, + {"all", SinkAll, true, true, true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g, _, _ := G(t) + ctx, env := newTestEnv(t) + Warn(ctx, tt.sinks, nil, "R", "A", "msg") + + if tt.expectRecorder { + g.Expect(env.events.Events).NotTo(BeEmpty()) + } else { + g.Expect(env.events.Events).To(BeEmpty()) + } + if tt.expectLogger { + g.Expect(env.logBuf.String()).NotTo(BeEmpty()) + } else { + g.Expect(env.logBuf.String()).To(BeEmpty()) + } + if tt.expectSpan { + g.Expect(env.spanEvents()).NotTo(BeEmpty()) + } else { + g.Expect(env.spanEvents()).To(BeEmpty()) + } + }) + } +} + +func TestRelatedObject(t *testing.T) { + related := &corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "mypod", Namespace: "mynamespace"}} + + t.Run("logger includes name and namespace", func(t *testing.T) { + g, _, _ := G(t) + ctx, env := newTestEnv(t) + Warn(ctx, SinkLogger, related, "R", "A", "msg") + out := env.logBuf.String() + g.Expect(out).To(ContainSubstring("related.name=mypod")) + g.Expect(out).To(ContainSubstring("related.namespace=mynamespace")) + }) + + t.Run("span includes name and namespace", func(t *testing.T) { + g, _, _ := G(t) + ctx, env := newTestEnv(t) + Warn(ctx, SinkSpanEvent, related, "R", "A", "msg") + attrs := spanAttrMap(env.spanEvents()[0]) + g.Expect(attrs).To(HaveKeyWithValue("related.name", "mypod")) + g.Expect(attrs).To(HaveKeyWithValue("related.namespace", "mynamespace")) + }) + + t.Run("nil related omits related fields from logger", func(t *testing.T) { + g, _, _ := G(t) + ctx, env := newTestEnv(t) + Warn(ctx, SinkLogger, nil, "R", "A", "msg") + g.Expect(env.logBuf.String()).NotTo(ContainSubstring("related.")) + }) + + t.Run("nil related omits related attributes from span", func(t *testing.T) { + g, _, _ := G(t) + ctx, env := newTestEnv(t) + Warn(ctx, SinkSpanEvent, nil, "R", "A", "msg") + attrs := spanAttrMap(env.spanEvents()[0]) + g.Expect(attrs).NotTo(HaveKey("related.name")) + g.Expect(attrs).NotTo(HaveKey("related.namespace")) + }) +} diff --git a/pkg/operator/util/images_test.go b/pkg/operator/util/images_test.go index b20085b..3cf2585 100644 --- a/pkg/operator/util/images_test.go +++ b/pkg/operator/util/images_test.go @@ -5,6 +5,7 @@ import ( . "github.com/onsi/gomega" "github.com/teutonet/cluster-api-provider-hosted-control-plane/api/v1alpha1" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" "k8s.io/utils/ptr" ) @@ -48,7 +49,7 @@ func TestBuildImageString(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) g.Expect(buildImageString(tt.registry, tt.repository, tt.tag)).To(Equal(tt.expected)) }) } @@ -147,7 +148,7 @@ func TestResolveImageFromSpec(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) g.Expect(resolveImageFromSpec(tt.imageSpec, tt.defaultRegistry, tt.defaultRepository, tt.defaultTag)). To(Equal(tt.expected)) }) @@ -202,7 +203,7 @@ func TestResolveKubernetesComponentImage(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) g.Expect(ResolveKubernetesComponentImage(tt.imageSpec, tt.component, tt.version)).To(Equal(tt.expected)) }) } @@ -242,7 +243,7 @@ func TestResolveETCDImage(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) g.Expect(ResolveETCDImage(tt.imageSpec, tt.version)).To(Equal(tt.expected)) }) } @@ -292,7 +293,7 @@ func TestResolveKonnectivityImage(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) g.Expect(ResolveKonnectivityImage(tt.imageSpec, tt.component, tt.minorVersion)).To(Equal(tt.expected)) }) } @@ -331,7 +332,7 @@ func TestResolveKubeProxyImage(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) g.Expect(ResolveKubeProxyImage(tt.imageSpec, tt.version)).To(Equal(tt.expected)) }) } @@ -376,7 +377,7 @@ func TestResolveCoreDNSImage(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) g.Expect(ResolveCoreDNSImage(tt.imageSpec)).To(Equal(tt.expected)) }) } @@ -421,7 +422,7 @@ func TestResolveAuditWebhookImage(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) g.Expect(ResolveNginxImage(tt.imageSpec)).To(Equal(tt.expected)) }) } diff --git a/pkg/operator/util/names/labels_test.go b/pkg/operator/util/names/labels_test.go index b656e7c..f2c6991 100644 --- a/pkg/operator/util/names/labels_test.go +++ b/pkg/operator/util/names/labels_test.go @@ -4,6 +4,7 @@ import ( "testing" . "github.com/onsi/gomega" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" capiv2 "sigs.k8s.io/cluster-api/api/core/v1beta2" ) @@ -80,7 +81,7 @@ func TestGetControlPlaneLabels(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) cluster := &capiv2.Cluster{} cluster.Name = tt.clusterName result := GetControlPlaneLabels(cluster, tt.component) @@ -144,7 +145,7 @@ func TestGetControlPlaneSelector(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) cluster := &capiv2.Cluster{} cluster.Name = tt.clusterName result := GetControlPlaneSelector(cluster, tt.component) @@ -162,7 +163,7 @@ func TestGetControlPlaneSelector(t *testing.T) { } func TestGetKubeconfigLabel(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) result := GetKubeconfigLabel() g.Expect(result).To(HaveLen(1)) @@ -200,7 +201,7 @@ func TestGetKubeconfigUserLabel(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) result := GetKubeconfigUserLabel(tt.username) g.Expect(result).To(Equal(tt.expected)) }) @@ -241,7 +242,7 @@ func TestGetKubeconfigLabels(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) result := GetKubeconfigLabels(tt.username) g.Expect(result).To(Equal(tt.expected)) }) @@ -262,7 +263,7 @@ func TestGetControlPlaneLabelsSelectorConsistency(t *testing.T) { for _, tc := range testCases { t.Run("consistency_"+tc.clusterName+"_"+tc.component, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) cluster := &capiv2.Cluster{} cluster.Name = tc.clusterName diff --git a/pkg/operator/util/names/names_test.go b/pkg/operator/util/names/names_test.go index 4eb6bbf..230862d 100644 --- a/pkg/operator/util/names/names_test.go +++ b/pkg/operator/util/names/names_test.go @@ -4,6 +4,7 @@ import ( "testing" . "github.com/onsi/gomega" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" capiv2 "sigs.k8s.io/cluster-api/api/core/v1beta2" ) @@ -37,7 +38,7 @@ func TestGetRootIssuerName(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) cluster := &capiv2.Cluster{} cluster.Name = tt.clusterName result := GetRootIssuerName(cluster) @@ -66,7 +67,7 @@ func TestGetCAIssuerName(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) cluster := &capiv2.Cluster{} cluster.Name = tt.clusterName result := GetCAIssuerName(cluster) @@ -95,7 +96,7 @@ func TestGetServiceName(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) cluster := &capiv2.Cluster{} cluster.Name = tt.clusterName result := GetServiceName(cluster) @@ -133,7 +134,7 @@ func TestGetInternalServiceHost(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) cluster := &capiv2.Cluster{} cluster.Name = tt.clusterName cluster.Namespace = tt.clusterNS @@ -166,7 +167,7 @@ func TestGetEtcdClientServiceDNSName(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) cluster := &capiv2.Cluster{} cluster.Name = tt.clusterName cluster.Namespace = tt.clusterNS @@ -207,7 +208,7 @@ func TestGetEtcdDNSNames(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) cluster := &capiv2.Cluster{} cluster.Name = tt.clusterName cluster.Namespace = tt.clusterNS @@ -255,7 +256,7 @@ func TestGetKubeconfigSecretName(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) cluster := &capiv2.Cluster{} cluster.Name = tt.clusterName result := GetKubeconfigSecretName(cluster, tt.kubeconfigName) @@ -293,7 +294,7 @@ func TestGetKonnectivityServerHost(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) cluster := &capiv2.Cluster{} cluster.Name = tt.clusterName cluster.Spec.ControlPlaneEndpoint.Host = tt.controlPlaneEndpointHost @@ -323,7 +324,7 @@ func TestGetTLSRouteName(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) cluster := &capiv2.Cluster{} cluster.Name = tt.clusterName result := GetTLSRouteName(cluster) @@ -352,7 +353,7 @@ func TestGetKonnectivityTLSRouteName(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) cluster := &capiv2.Cluster{} cluster.Name = tt.clusterName result := GetKonnectivityTLSRouteName(cluster) @@ -393,7 +394,7 @@ func TestCertificateNames(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) result := tt.function(cluster) g.Expect(result).To(Equal(tt.expected)) }) diff --git a/pkg/operator/util/pod_test.go b/pkg/operator/util/pod_test.go index 0ad8078..cf554cb 100644 --- a/pkg/operator/util/pod_test.go +++ b/pkg/operator/util/pod_test.go @@ -4,6 +4,7 @@ import ( "testing" . "github.com/onsi/gomega" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" corev1ac "k8s.io/client-go/applyconfigurations/core/v1" ) @@ -12,7 +13,8 @@ func TestValidateMounts(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - validateMountTest(NewWithT(t), tt) + g, _, _ := G(t) + validateMountTest(g, tt) }) } } @@ -231,7 +233,7 @@ func validateExpectedError(g Gomega, err error, expectedMsg string) { } func TestValidateMountsErrorMessage(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) // Test specific error message format podSpec := corev1ac.PodSpec(). WithContainers( @@ -253,7 +255,7 @@ func TestValidateMountsErrorMessage(t *testing.T) { } func TestValidateMountsNilPodSpec(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) // Test edge case with nil pod spec - this will panic, which is acceptable // since it indicates a programming error defer func() { diff --git a/pkg/operator/util/resources_test.go b/pkg/operator/util/resources_test.go index 8ace7f8..93cdc98 100644 --- a/pkg/operator/util/resources_test.go +++ b/pkg/operator/util/resources_test.go @@ -5,6 +5,7 @@ import ( . "github.com/onsi/gomega" . "github.com/onsi/gomega/gstruct" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" corev1ac "k8s.io/client-go/applyconfigurations/core/v1" @@ -16,7 +17,8 @@ func TestResourceRequirementsToResourcesApplyConfiguration(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { result := ResourceRequirementsToResourcesApplyConfiguration(tt.input) - tt.validate(NewWithT(t), result) + g, _, _ := G(t) + tt.validate(g, result) }) } } @@ -230,7 +232,7 @@ func getAdvancedResourceTestCases() []resourceTestCase { } func TestResourceRequirementsToResourcesApplyConfigurationReturnType(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) input := corev1.ResourceRequirements{ Requests: corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("100m"), diff --git a/pkg/operator/util/version_test.go b/pkg/operator/util/version_test.go index a47a49f..a0eba3f 100644 --- a/pkg/operator/util/version_test.go +++ b/pkg/operator/util/version_test.go @@ -5,6 +5,7 @@ import ( . "github.com/onsi/gomega" "github.com/teutonet/cluster-api-provider-hosted-control-plane/api/v1alpha1" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" ) func TestGetMinorVersion(t *testing.T) { @@ -120,7 +121,7 @@ func TestGetMinorVersion(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) hcp := &v1alpha1.HostedControlPlane{ Spec: v1alpha1.HostedControlPlaneSpec{ Version: tt.version, @@ -141,7 +142,7 @@ func TestGetMinorVersion(t *testing.T) { } func TestGetMinorVersionNilHostedControlPlane(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) // Test edge case with nil input - this will panic, which is acceptable behavior // since it indicates a programming error defer func() { diff --git a/pkg/reconcilers/apiserverresources/apiserverresources_test.go b/pkg/reconcilers/apiserverresources/apiserverresources_test.go index c948968..be3b9a8 100644 --- a/pkg/reconcilers/apiserverresources/apiserverresources_test.go +++ b/pkg/reconcilers/apiserverresources/apiserverresources_test.go @@ -6,12 +6,13 @@ import ( . "github.com/onsi/gomega" slices "github.com/samber/lo" "github.com/teutonet/cluster-api-provider-hosted-control-plane/api/v1alpha1" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" corev1 "k8s.io/api/core/v1" corev1ac "k8s.io/client-go/applyconfigurations/core/v1" ) func TestApiServerResourcesReconciler_extractAdditionalVolumesAndMounts(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) reconciler := &apiServerResourcesReconciler{} configMapMountName := "custom-config" @@ -150,7 +151,7 @@ func TestApiServerResourcesReconciler_ResourceLifecycle_MountConfiguration(t *te for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) volumes, mounts := reconciler.extractAdditionalVolumesAndMounts(tt.mounts) g.Expect(volumes).To(HaveLen(tt.expectedVolumeCount)) diff --git a/pkg/reconcilers/certificates/reconciler.go b/pkg/reconcilers/certificates/reconciler.go index a5fece5..d9a1615 100644 --- a/pkg/reconcilers/certificates/reconciler.go +++ b/pkg/reconcilers/certificates/reconciler.go @@ -16,8 +16,8 @@ import ( slices "github.com/samber/lo" "github.com/teutonet/cluster-api-provider-hosted-control-plane/api/v1alpha1" operatorutil "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util" + "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util/emit" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util/names" - "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util/recorder" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/util/tracing" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" @@ -47,7 +47,6 @@ func NewCertificateReconciler( caCertificateDuration time.Duration, certificateDuration time.Duration, konnectivityServerAudience string, - recorder recorder.Recorder, ) CertificateReconciler { return &certificateReconciler{ certManagerClient: certManagerClient, @@ -56,7 +55,6 @@ func NewCertificateReconciler( certificateDuration: certificateDuration, certificateRenewBefore: int32(50), konnectivityServerAudience: konnectivityServerAudience, - recorder: recorder, tracer: tracing.GetTracer("certificates"), } } @@ -68,7 +66,6 @@ type certificateReconciler struct { certificateDuration time.Duration certificateRenewBefore int32 konnectivityServerAudience string - recorder recorder.Recorder tracer string } @@ -584,12 +581,12 @@ func (cr *certificateReconciler) cleanupOrphanedCertificates( ); err != nil && !apierrors.IsNotFound(err) { return fmt.Errorf("failed to delete orphaned certificate %s: %w", cert.Name, err) } - cr.recorder.Normalf( + emit.Info(ctx, emit.SinkRecorder, &cert, "CertificateDeleted", "CertificateDeleted", - "Deleted orphaned certificate %s", - cert.Name, + "Deleted orphaned certificate", + "name", cert.Name, ) return nil }, diff --git a/pkg/reconcilers/certificates/reconciler_test.go b/pkg/reconcilers/certificates/reconciler_test.go index 0795e77..11144be 100644 --- a/pkg/reconcilers/certificates/reconciler_test.go +++ b/pkg/reconcilers/certificates/reconciler_test.go @@ -6,6 +6,7 @@ import ( certmanagerv1 "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" certmanagermetav1 "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" . "github.com/onsi/gomega" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" ) func TestCertificateReconciler_isIssuerReady(t *testing.T) { @@ -71,7 +72,7 @@ func TestCertificateReconciler_isIssuerReady(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) result := reconciler.isIssuerReady(tt.issuer) g.Expect(result).To(Equal(tt.expected)) @@ -133,7 +134,7 @@ func TestCertificateReconciler_isCertificateReady(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) result := reconciler.isCertificateReady(tt.certificate) g.Expect(result).To(Equal(tt.expected)) @@ -142,7 +143,7 @@ func TestCertificateReconciler_isCertificateReady(t *testing.T) { } func TestCertificateReconciler_ErrorHandling_EdgeCases(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) reconciler := &certificateReconciler{} emptyIssuer := &certmanagerv1.Issuer{ diff --git a/pkg/reconcilers/etcd_cluster/reconciler.go b/pkg/reconcilers/etcd_cluster/reconciler.go index 8c4523b..682c741 100644 --- a/pkg/reconcilers/etcd_cluster/reconciler.go +++ b/pkg/reconcilers/etcd_cluster/reconciler.go @@ -15,8 +15,8 @@ import ( slices "github.com/samber/lo" "github.com/teutonet/cluster-api-provider-hosted-control-plane/api/v1alpha1" operatorutil "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util" + "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util/emit" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util/names" - "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util/recorder" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/alias" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/etcd_client" @@ -38,6 +38,7 @@ import ( appsv1ac "k8s.io/client-go/applyconfigurations/apps/v1" corev1ac "k8s.io/client-go/applyconfigurations/core/v1" konstants "k8s.io/kubernetes/cmd/kubeadm/app/constants" + "k8s.io/utils/ptr" capiv2 "sigs.k8s.io/cluster-api/api/core/v1beta2" ) @@ -45,7 +46,7 @@ var ( etcdVolumeResizeEvent = "EtcdVolumeAutoResize" etcdVolumeSizeReCalculatedEvent = "EtcdVolumeSizeRecalculated" errETCDBackupStalled = errors.New("etcd backup timed out: no progress in time window") - etcdClientVersion3_7 = semver.MustParse(version.V3_7.String()) + etcdClientVersion37 = semver.MustParse(version.V3_7.String()) ) const ( @@ -70,7 +71,6 @@ func NewEtcdClusterReconciler( etcdClientFactory etcd_client.EtcdClientFactory, s3ClientFactory s3_client.S3ClientFactory, volumeStatsProvider volume_stats.EtcdVolumeStatsProvider, - recorder recorder.Recorder, componentLabel string, apiServerComponentLabel string, controllerNamespace string, @@ -91,7 +91,6 @@ func NewEtcdClusterReconciler( etcdClientFactory: etcdClientFactory, s3ClientFactory: s3ClientFactory, volumeStatsProvider: volumeStatsProvider, - recorder: recorder, componentLabel: componentLabel, apiServerComponentLabel: apiServerComponentLabel, controllerComponent: systemControllerComponent, @@ -108,7 +107,6 @@ type etcdClusterReconciler struct { etcdClientFactory etcd_client.EtcdClientFactory s3ClientFactory s3_client.S3ClientFactory volumeStatsProvider volume_stats.EtcdVolumeStatsProvider - recorder recorder.Recorder componentLabel string apiServerComponentLabel string controllerComponent string @@ -166,7 +164,16 @@ func (er *etcdClusterReconciler) ReconcileEtcdCluster( return "etcd client Service not ready", nil } - hostedControlPlane.Status.ETCDVolumeSize = er.getETCDVolumeSize(hostedControlPlane) + etcdPods, err := er.listEtcdPods(ctx, hostedControlPlane, cluster) + if err != nil { + return "", err + } + + if err := er.reconcileETCDSpaceUsage(ctx, hostedControlPlane, etcdPods); err != nil { + return "", fmt.Errorf("failed to reconcile etcd space usage: %w", err) + } + + hostedControlPlane.Status.ETCDVolumeSize = er.getETCDVolumeSize(ctx, hostedControlPlane) if err := er.reconcilePVCSizes(ctx, hostedControlPlane, cluster); err != nil { return "", fmt.Errorf("failed to reconcile size of etcd PVCs: %w", err) @@ -192,10 +199,6 @@ func (er *etcdClusterReconciler) ReconcileEtcdCluster( return "etcd StatefulSet is not ready", nil } - etcdPods, err := er.listEtcdPods(ctx, hostedControlPlane, cluster) - if err != nil { - return "", err - } if err := er.reconcileETCDMaintenance(ctx, etcdClient, hostedControlPlane, etcdPods); err != nil { return "", err @@ -280,14 +283,14 @@ func (er *etcdClusterReconciler) reconcilePVCSizes( err, ) } - er.recorder.Normalf( + emit.Info(ctx, emit.SinkRecorder, &pvc, "RequestedSizeChanged", etcdVolumeResizeEvent, - "Resized etcd volume %s/%s from %s to %s", - pvc.Namespace, pvc.Name, - pvc.Spec.Resources.Requests.Storage().String(), - hostedControlPlane.Status.ETCDVolumeSize.String(), + "Resized etcd volume", + "namespace", pvc.Namespace, "name", pvc.Name, + "from", pvc.Spec.Resources.Requests.Storage().String(), + "to", hostedControlPlane.Status.ETCDVolumeSize.String(), ) } } @@ -297,20 +300,23 @@ func (er *etcdClusterReconciler) reconcilePVCSizes( ) } -func (er *etcdClusterReconciler) getETCDVolumeSize(hostedControlPlane *v1alpha1.HostedControlPlane) resource.Quantity { +func (er *etcdClusterReconciler) getETCDVolumeSize( + ctx context.Context, + hostedControlPlane *v1alpha1.HostedControlPlane, +) resource.Quantity { if hostedControlPlane.Spec.ETCD.AutoGrowEnabled() { value := hostedControlPlane.Status.ETCDVolumeSize.DeepCopy() value.Sub(hostedControlPlane.Status.ETCDVolumeUsage) if value.Cmp(er.etcdServerStorageBuffer) == -1 { newValue := hostedControlPlane.Status.ETCDVolumeSize.DeepCopy() newValue.Add(er.etcdServerStorageIncrement) - er.recorder.Normalf( - nil, + emit.Info(ctx, emit.SinkRecorder, + hostedControlPlane, "EtcdSpaceUsageCrossedThreshold", etcdVolumeSizeReCalculatedEvent, - "Calculated new etcd volume size: from %s to %s", - hostedControlPlane.Status.ETCDVolumeSize.String(), - newValue.String(), + "Calculated new etcd volume size", + "from", hostedControlPlane.Status.ETCDVolumeSize.String(), + "to", newValue.String(), ) return newValue } @@ -377,12 +383,12 @@ func (er *etcdClusterReconciler) reconcileETCDBackup( hostedControlPlane.Status.ETCDNextBackupTime = metav1.NewTime( resolvedSchedule.Next(hostedControlPlane.Status.ETCDLastBackupTime.Time), ) - er.recorder.Normalf( - nil, + emit.Info(ctx, emit.SinkRecorder, + hostedControlPlane, "EtcdBackupFinished", "EtcdBackupFinished", - "Finished etcd backup. Next backup scheduled at %s", - hostedControlPlane.Status.ETCDNextBackupTime.String(), + "Finished etcd backup", + "nextBackupTime", hostedControlPlane.Status.ETCDNextBackupTime.String(), ) } return nil @@ -432,16 +438,29 @@ func (er *etcdClusterReconciler) reconcileETCDSpaceUsage( return pod.Spec.NodeName != "" }) - fsUsage := er.getFilesystemUsage(ctx, span, scheduledPods) + fsUsage, err := er.volumeStatsProvider.GetMaxEtcdVolumeUsage(ctx, scheduledPods) + if err != nil { + if fsUsage == 0 { + return fmt.Errorf("failed to get etcd volume usage for all pods: %w", err) + } + // partial failure: don't drop below previous known value + fsUsage = max(fsUsage, hostedControlPlane.Status.ETCDVolumeUsage.Value()) + emit.Warn(ctx, + emit.SinkAll, + hostedControlPlane, "EtcdFilesystemUsageOnlyPartiallyAvailable", "EtcdSpaceUsageCheck", + "Failed to get etcd filesystem usage for some pods", "err", err, + ) + } - hostedControlPlane.Status.ETCDVolumeUsage = *resource.NewQuantity( - slices.Max([]int64{ // only grow, never shrink - fsUsage, - hostedControlPlane.Status.ETCDVolumeUsage.Value(), - }), - resource.BinarySI, + span.SetAttributes( + attribute.String( + "etcd.volume.filesystem_usage", + resource.NewQuantity(fsUsage, resource.BinarySI).String(), + ), ) + hostedControlPlane.Status.ETCDVolumeUsage = *resource.NewQuantity(fsUsage, resource.BinarySI) + return nil }, ) @@ -474,9 +493,10 @@ func (er *etcdClusterReconciler) reconcileETCDDefragmentation( return fmt.Errorf("failed to defragment etcd: %w", defragmentationErr) } hostedControlPlane.Status.ETCDLastDefragTime = metav1.NewTime(time.Now()) - er.recorder.Normalf(nil, "FragmentationThresholdExceeded", "EtcdDefragmentation", - "Defragmented etcd members due to fragmentation above %.0f%%", - etcdDefragmentationFragmentationThreshold*100, + emit.Info(ctx, emit.SinkRecorder, + hostedControlPlane, "FragmentationThresholdExceeded", "EtcdDefragmentation", + "Defragmented etcd members due to fragmentation", + "threshold", fmt.Sprintf("%.0f%%", etcdDefragmentationFragmentationThreshold*100), ) return nil } @@ -485,28 +505,6 @@ func (er *etcdClusterReconciler) reconcileETCDDefragmentation( ) } -func (er *etcdClusterReconciler) getFilesystemUsage( - ctx context.Context, - span trace.Span, - scheduledPods []corev1.Pod, -) int64 { - maxFsUsage, err := er.volumeStatsProvider.GetMaxEtcdVolumeUsage(ctx, scheduledPods) - if err != nil { - span.RecordError(err) - er.recorder.Warnf(nil, "EtcdFilesystemUsageUnavailable", "EtcdSpaceUsageCheck", - "Failed to get etcd filesystem usage: %s", err) - return 0 - } - - span.SetAttributes( - attribute.String( - "etcd.volume.filesystem_usage", - resource.NewQuantity(maxFsUsage, resource.BinarySI).String(), - ), - ) - - return maxFsUsage -} //+kubebuilder:rbac:groups="",resources=services,verbs=create;patch @@ -655,39 +653,46 @@ func (er *etcdClusterReconciler) etcdIsHealthy( if err != nil { return fmt.Errorf("failed to list etcd alarms: %w", err) } - if len(alarmResponse.Alarms) > 0 { - var ignoredAlarms []*etcdserverpb.AlarmMember - if hostedControlPlane.Spec.ETCD.AutoGrowEnabled() { - // Disarm NOSPACE, as we automatically upscale the storage and the alarm is not relevant anymore. - ignoredAlarms = slices.Filter(alarmResponse.Alarms, func(alarm *etcdserverpb.AlarmMember, _ int) bool { - return alarm.Alarm == etcdserverpb.AlarmType_NOSPACE - }) - for _, outdatedAlarm := range ignoredAlarms { - if err := etcdClient.DisarmAlarm(ctx, (*clientv3.AlarmMember)(outdatedAlarm)); err != nil { - return fmt.Errorf( - "failed to disarm etcd alarm %s for member %d: %w", - outdatedAlarm.Alarm.String(), outdatedAlarm.MemberID, err, - ) - } - er.recorder.Normalf( - nil, - "AutoGrowEnabled", - "EtcdAlarmDisarm", - "Disarmed etcd alarm %s for member %d", - outdatedAlarm.Alarm.String(), - outdatedAlarm.MemberID, + if len(alarmResponse.Alarms) == 0 { + return nil + } + + nospaceAlarms := slices.Filter(alarmResponse.Alarms, func(alarm *etcdserverpb.AlarmMember, _ int) bool { + return alarm.Alarm == etcdserverpb.AlarmType_NOSPACE + }) + headroom := hostedControlPlane.Status.ETCDVolumeSize.DeepCopy() + headroom.Sub(hostedControlPlane.Status.ETCDVolumeUsage) + var ignoredAlarms []*etcdserverpb.AlarmMember + if headroom.Cmp(er.etcdServerStorageBuffer) >= 0 { + for _, outdatedAlarm := range nospaceAlarms { + if err := etcdClient.DisarmAlarm(ctx, (*clientv3.AlarmMember)(outdatedAlarm)); err != nil { + return fmt.Errorf( + "failed to disarm etcd alarm %s for member %d: %w", + outdatedAlarm.Alarm.String(), outdatedAlarm.MemberID, err, ) } + emit.Info(ctx, emit.SinkRecorder, + hostedControlPlane, + "EtcdAlarmDisarm", + "EtcdAlarmDisarm", + "Disarmed etcd alarm", + "alarm", outdatedAlarm.Alarm.String(), + "memberID", outdatedAlarm.MemberID, + ) } - activeAlarms, _ := slices.Difference(alarmResponse.Alarms, ignoredAlarms) - if len(activeAlarms) > 0 { - return fmt.Errorf("etcd cluster has active alarms: %w", errors.Join(slices.Map(activeAlarms, - func(alarm *etcdserverpb.AlarmMember, _ int) error { - //nolint:err113 // we don't get a real error from the API, therefore we create one here - return fmt.Errorf("etcd member %d has alarm: %w", alarm.MemberID, errors.New(alarm.Alarm.String())) - }, - )...)) - } + ignoredAlarms = nospaceAlarms + } else if hostedControlPlane.Spec.ETCD.AutoGrowEnabled() { + ignoredAlarms = nospaceAlarms + } + + activeAlarms, _ := slices.Difference(alarmResponse.Alarms, ignoredAlarms) + if len(activeAlarms) > 0 { + return fmt.Errorf("etcd cluster has active alarms: %w", errors.Join(slices.Map(activeAlarms, + func(alarm *etcdserverpb.AlarmMember, _ int) error { + //nolint:err113 // we don't get a real error from the API, therefore we create one here + return fmt.Errorf("etcd member %d has alarm: %w", alarm.MemberID, errors.New(alarm.Alarm.String())) + }, + )...)) } return nil } @@ -753,6 +758,24 @@ func (er *etcdClusterReconciler) createEtcdCertificatesVolume( ) } +func isEtcdVersionBefore37(ctx context.Context, imageSpec *v1alpha1.ImageSpec) bool { + tag := ptr.Deref(ptr.Deref(imageSpec, v1alpha1.ImageSpec{}).Tag, "") + if tag == "" { + return semver.MustParse(version.Version).LT(etcdClientVersion37) + } + + parsed, err := semver.Parse(tag) + if err != nil { + emit.Warn(ctx, + emit.SinkRecorder|emit.SinkLogger, + nil, "EtcdImageTagNotSemver", "EtcdVersionCheck", + "etcd image tag is not valid semver; assuming version < 3.7", "tag", tag, + ) + return true + } + return parsed.LT(etcdClientVersion37) +} + func (er *etcdClusterReconciler) createEtcdContainer( ctx context.Context, hostedControlPlane *v1alpha1.HostedControlPlane, @@ -774,7 +797,7 @@ func (er *etcdClusterReconciler) createEtcdContainer( WithArgs(er.buildEtcdArgs( ctx, hostedControlPlane, cluster, - semver.MustParse(version.Version), + isEtcdVersionBefore37(ctx, hostedControlPlane.Spec.ETCD.Image), etcdDataVolumeMount, etcdCertificatesVolumeMount, serverPort, peerPort, metricsPort, )...). @@ -810,7 +833,7 @@ func (er *etcdClusterReconciler) buildEtcdArgs( ctx context.Context, hostedControlPlane *v1alpha1.HostedControlPlane, cluster *capiv2.Cluster, - etcdVersion semver.Version, + etcdVersionBefore37 bool, etcdDataVolumeMount *corev1ac.VolumeMountApplyConfiguration, etcdCertificatesVolumeMount *corev1ac.VolumeMountApplyConfiguration, serverPort *corev1ac.ContainerPortApplyConfiguration, @@ -850,7 +873,7 @@ func (er *etcdClusterReconciler) buildEtcdArgs( "quota-backend-bytes": strconv.FormatInt(storageQuota, 10), } - if etcdVersion.LT(etcdClientVersion3_7) { + if etcdVersionBefore37 { // this is deprecated and will be removed in 3.7 // TODO: remove this when we roll 3.7 args["snapshot-count"] = "10000" diff --git a/pkg/reconcilers/etcd_cluster/reconciler_test.go b/pkg/reconcilers/etcd_cluster/reconciler_test.go index 9e2e7e6..6165f10 100644 --- a/pkg/reconcilers/etcd_cluster/reconciler_test.go +++ b/pkg/reconcilers/etcd_cluster/reconciler_test.go @@ -7,22 +7,20 @@ import ( "testing" "time" - "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util/recorder" + "github.com/teutonet/cluster-api-provider-hosted-control-plane/api/v1alpha1" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/alias" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/etcd_client" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/s3_client" - "k8s.io/utils/ptr" - capiv2 "sigs.k8s.io/cluster-api/api/core/v1beta2" - - semver "github.com/blang/semver/v4" - . "github.com/onsi/gomega" - "github.com/teutonet/cluster-api-provider-hosted-control-plane/api/v1alpha1" . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" "go.etcd.io/etcd/api/v3/etcdserverpb" clientv3 "go.etcd.io/etcd/client/v3" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" corev1ac "k8s.io/client-go/applyconfigurations/core/v1" + "k8s.io/utils/ptr" + capiv2 "sigs.k8s.io/cluster-api/api/core/v1beta2" + + . "github.com/onsi/gomega" ) func TestEtcdClusterReconciler_getETCDVolumeSize(t *testing.T) { @@ -45,12 +43,12 @@ func TestEtcdClusterReconciler_getETCDVolumeSize(t *testing.T) { }, Status: v1alpha1.HostedControlPlaneStatus{ ETCDVolumeSize: resource.MustParse("20Gi"), - ETCDVolumeUsage: resource.MustParse("15Gi"), // 5Gi free + ETCDVolumeUsage: resource.MustParse("15Gi"), }, }, - etcdServerStorageBuffer: resource.MustParse("2Gi"), // Buffer requirement + etcdServerStorageBuffer: resource.MustParse("2Gi"), etcdServerStorageIncrement: resource.MustParse("10Gi"), - expectedSize: resource.MustParse("20Gi"), // No growth needed + expectedSize: resource.MustParse("20Gi"), }, { name: "autogrow enabled needs more space", @@ -64,12 +62,12 @@ func TestEtcdClusterReconciler_getETCDVolumeSize(t *testing.T) { }, Status: v1alpha1.HostedControlPlaneStatus{ ETCDVolumeSize: resource.MustParse("20Gi"), - ETCDVolumeUsage: resource.MustParse("19Gi"), // 1Gi free, less than buffer + ETCDVolumeUsage: resource.MustParse("19Gi"), }, }, - etcdServerStorageBuffer: resource.MustParse("2Gi"), // Buffer requirement + etcdServerStorageBuffer: resource.MustParse("2Gi"), etcdServerStorageIncrement: resource.MustParse("10Gi"), - expectedSize: resource.MustParse("30Gi"), // Should grow + expectedSize: resource.MustParse("30Gi"), }, { name: "autogrow disabled uses specified size", @@ -83,27 +81,25 @@ func TestEtcdClusterReconciler_getETCDVolumeSize(t *testing.T) { }, }, Status: v1alpha1.HostedControlPlaneStatus{ - ETCDVolumeSize: resource.MustParse("20Gi"), // Current size + ETCDVolumeSize: resource.MustParse("20Gi"), ETCDVolumeUsage: resource.MustParse("19Gi"), }, }, etcdServerStorageBuffer: resource.MustParse("2Gi"), etcdServerStorageIncrement: resource.MustParse("10Gi"), - expectedSize: resource.MustParse("25Gi"), // Uses spec size + expectedSize: resource.MustParse("25Gi"), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) - returningFakeRecorder, rec := recorder.NewInfiniteReturningFakeRecorder(tt.hostedControlPlane) + g, ctx, returningFakeRecorder := G(t, tt.hostedControlPlane) reconciler := &etcdClusterReconciler{ - recorder: rec, etcdServerStorageBuffer: tt.etcdServerStorageBuffer, etcdServerStorageIncrement: tt.etcdServerStorageIncrement, } - result := reconciler.getETCDVolumeSize(tt.hostedControlPlane) + result := reconciler.getETCDVolumeSize(ctx, tt.hostedControlPlane) g.Expect(result.Cmp(tt.expectedSize)).To(Equal(0)) @@ -137,10 +133,10 @@ func TestEtcdClusterReconciler_ErrorHandling_InvalidVolumeData(t *testing.T) { }, Status: v1alpha1.HostedControlPlaneStatus{ ETCDVolumeSize: resource.MustParse("20Gi"), - ETCDVolumeUsage: resource.MustParse("-5Gi"), // Invalid negative value + ETCDVolumeUsage: resource.MustParse("-5Gi"), }, }, - expectedVolumeSize: resource.MustParse("20Gi"), // Should not grow with invalid data + expectedVolumeSize: resource.MustParse("20Gi"), description: "Should handle negative volume usage gracefully", }, { @@ -154,18 +150,18 @@ func TestEtcdClusterReconciler_ErrorHandling_InvalidVolumeData(t *testing.T) { }, }, Status: v1alpha1.HostedControlPlaneStatus{ - ETCDVolumeSize: resource.Quantity{}, // Zero value + ETCDVolumeSize: resource.Quantity{}, ETCDVolumeUsage: resource.MustParse("5Gi"), }, }, - expectedVolumeSize: resource.MustParse("10Gi"), // Should default to minimum increment + expectedVolumeSize: resource.MustParse("10Gi"), description: "Should handle zero current volume size", }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, ctx, _ := G(t) defer func() { if r := recover(); r != nil && !tt.expectPanicRecovery { g.Expect(r).To(BeNil()) @@ -173,11 +169,10 @@ func TestEtcdClusterReconciler_ErrorHandling_InvalidVolumeData(t *testing.T) { }() reconciler := &etcdClusterReconciler{ - recorder: &recorder.InfiniteDiscardingFakeRecorder{}, etcdServerStorageBuffer: resource.MustParse("2Gi"), etcdServerStorageIncrement: resource.MustParse("10Gi"), } - result := reconciler.getETCDVolumeSize(tt.hostedControlPlane) + result := reconciler.getETCDVolumeSize(ctx, tt.hostedControlPlane) g.Expect(result.Cmp(tt.expectedVolumeSize)). To(Equal(0)) @@ -197,7 +192,7 @@ func TestEtcdClusterReconciler_StateTransitions_AutoGrowDecisionLogic(t *testing { name: "just below threshold - should trigger growth", currentSize: "20Gi", - currentUsage: "17.1Gi", // 20 - 17.1 = 2.9Gi free, less than 3Gi buffer + currentUsage: "17.1Gi", expectedSize: "30Gi", expectedGrowth: true, description: "Should grow when free space is less than buffer requirement", @@ -205,7 +200,7 @@ func TestEtcdClusterReconciler_StateTransitions_AutoGrowDecisionLogic(t *testing { name: "at threshold - should not grow", currentSize: "20Gi", - currentUsage: "17Gi", // 20-17 = 3Gi free, equal to buffer + currentUsage: "17Gi", expectedSize: "20Gi", expectedGrowth: false, description: "Should not grow when free space equals buffer requirement", @@ -213,7 +208,7 @@ func TestEtcdClusterReconciler_StateTransitions_AutoGrowDecisionLogic(t *testing { name: "well below threshold - should trigger growth", currentSize: "20Gi", - currentUsage: "19Gi", // 20-19 = 1Gi free, well below buffer + currentUsage: "19Gi", expectedSize: "30Gi", expectedGrowth: true, description: "Should grow when free space is well below buffer", @@ -221,8 +216,8 @@ func TestEtcdClusterReconciler_StateTransitions_AutoGrowDecisionLogic(t *testing { name: "massive usage spike - should grow by increment only", currentSize: "20Gi", - currentUsage: "19.5Gi", // Nearly full - expectedSize: "30Gi", // Should grow by a single increment, not double + currentUsage: "19.5Gi", + expectedSize: "30Gi", expectedGrowth: true, description: "Should grow by single increment regardless of usage spike size", }, @@ -230,10 +225,8 @@ func TestEtcdClusterReconciler_StateTransitions_AutoGrowDecisionLogic(t *testing for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) - returningFakeRecorder, fakeRecorder := recorder.NewInfiniteReturningFakeRecorder() + g, ctx, returningFakeRecorder := G(t) reconciler := &etcdClusterReconciler{ - recorder: fakeRecorder, etcdServerStorageBuffer: resource.MustParse("3Gi"), etcdServerStorageIncrement: resource.MustParse("10Gi"), } @@ -252,7 +245,7 @@ func TestEtcdClusterReconciler_StateTransitions_AutoGrowDecisionLogic(t *testing }, } - result := reconciler.getETCDVolumeSize(hcp) + result := reconciler.getETCDVolumeSize(ctx, hcp) expectedQuantity := resource.MustParse(tt.expectedSize) g.Expect(result.Cmp(expectedQuantity)).To(Equal(0)) @@ -271,10 +264,8 @@ func TestEtcdClusterReconciler_StateTransitions_AutoGrowDecisionLogic(t *testing } func TestEtcdClusterReconciler_reconcileETCDSpaceUsage(t *testing.T) { - ctx := context.Background() - t.Run("should update volume usage from filesystem stats", func(t *testing.T) { - g := NewWithT(t) + g, ctx, _ := G(t) fsUsage := int64(5368709120) // 5 GiB hcp := &v1alpha1.HostedControlPlane{ @@ -288,7 +279,6 @@ func TestEtcdClusterReconciler_reconcileETCDSpaceUsage(t *testing.T) { volumeStub.MaxUsage = fsUsage reconciler := &etcdClusterReconciler{ - recorder: &recorder.InfiniteDiscardingFakeRecorder{}, volumeStatsProvider: volumeStub, } @@ -299,7 +289,7 @@ func TestEtcdClusterReconciler_reconcileETCDSpaceUsage(t *testing.T) { }) t.Run("should use filesystem usage when it exceeds previous", func(t *testing.T) { - g := NewWithT(t) + g, ctx, _ := G(t) fsUsage := int64(5 * 1024 * 1024 * 1024) volumeStub := NewEtcdVolumeStatsProviderStub() @@ -313,7 +303,6 @@ func TestEtcdClusterReconciler_reconcileETCDSpaceUsage(t *testing.T) { } reconciler := &etcdClusterReconciler{ - recorder: &recorder.InfiniteDiscardingFakeRecorder{}, volumeStatsProvider: volumeStub, } @@ -323,10 +312,9 @@ func TestEtcdClusterReconciler_reconcileETCDSpaceUsage(t *testing.T) { g.Expect(hcp.Status.ETCDVolumeUsage).To(EqualResource(*resource.NewQuantity(fsUsage, resource.BinarySI))) }) - t.Run("should not shrink volume usage", func(t *testing.T) { - g := NewWithT(t) - fsUsage := int64(1 * 1024 * 1024 * 1024) // 1 GiB - previous := int64(5 * 1024 * 1024 * 1024) // 5 GiB + t.Run("should update to actual usage when all stats succeed", func(t *testing.T) { + g, ctx, _ := G(t) + fsUsage := int64(1 * 1024 * 1024 * 1024) // 1 GiB volumeStub := NewEtcdVolumeStatsProviderStub() volumeStub.MaxUsage = fsUsage @@ -339,20 +327,18 @@ func TestEtcdClusterReconciler_reconcileETCDSpaceUsage(t *testing.T) { } reconciler := &etcdClusterReconciler{ - recorder: &recorder.InfiniteDiscardingFakeRecorder{}, volumeStatsProvider: volumeStub, } err := reconciler.reconcileETCDSpaceUsage(ctx, hcp, nil) g.Expect(err).NotTo(HaveOccurred()) - g.Expect(hcp.Status.ETCDVolumeUsage).To(EqualResource(*resource.NewQuantity(previous, resource.BinarySI))) + g.Expect(hcp.Status.ETCDVolumeUsage).To(EqualResource(*resource.NewQuantity(fsUsage, resource.BinarySI))) }) t.Run("should log warning and continue when volume stats fails", func(t *testing.T) { - g := NewWithT(t) - volumeStub := NewEtcdVolumeStatsProviderStub() + volumeStub.MaxUsage = 10 * (1 << 30) volumeStub.Error = errors.New("connection refused") hcp := &v1alpha1.HostedControlPlane{ @@ -362,10 +348,9 @@ func TestEtcdClusterReconciler_reconcileETCDSpaceUsage(t *testing.T) { }, } - returningFakeRecorder, rec := recorder.NewInfiniteReturningFakeRecorder() + g, ctx, returningFakeRecorder := G(t) reconciler := &etcdClusterReconciler{ - recorder: rec, volumeStatsProvider: volumeStub, } @@ -376,9 +361,10 @@ func TestEtcdClusterReconciler_reconcileETCDSpaceUsage(t *testing.T) { }) t.Run("should not shrink when stats fail", func(t *testing.T) { - g := NewWithT(t) + g, ctx, _ := G(t) volumeStub := NewEtcdVolumeStatsProviderStub() + volumeStub.MaxUsage = 10 * (1 << 30) volumeStub.Error = errors.New("connection refused") hcp := &v1alpha1.HostedControlPlane{ @@ -389,31 +375,26 @@ func TestEtcdClusterReconciler_reconcileETCDSpaceUsage(t *testing.T) { } reconciler := &etcdClusterReconciler{ - recorder: &recorder.InfiniteDiscardingFakeRecorder{}, volumeStatsProvider: volumeStub, } err := reconciler.reconcileETCDSpaceUsage(ctx, hcp, nil) g.Expect(err).NotTo(HaveOccurred()) - // Should retain previous value since new measurement failed (returns 0) g.Expect(hcp.Status.ETCDVolumeUsage). To(EqualResource(*resource.NewQuantity(int64(15*1024*1024*1024), resource.BinarySI))) }) } func TestEtcdClusterReconciler_etcdIsHealthy(t *testing.T) { - ctx := context.Background() - t.Run("should handle etcd alarm errors", func(t *testing.T) { - g := NewWithT(t) + g, ctx, _ := G(t) stub := NewEtcdClientStub() stub.AlarmError = errors.New("failed to list alarms") hcp := &v1alpha1.HostedControlPlane{} reconciler := &etcdClusterReconciler{ - recorder: &recorder.InfiniteDiscardingFakeRecorder{}, etcdClientFactory: nil, } @@ -422,74 +403,98 @@ func TestEtcdClusterReconciler_etcdIsHealthy(t *testing.T) { g.Expect(err).To(MatchError(ContainSubstring("failed to list alarms"))) }) - t.Run("should disarm NOSPACE alarms when autogrow is enabled", func(t *testing.T) { - g := NewWithT(t) - stub := NewEtcdClientStub() - stub.ActiveAlarms = []*etcdserverpb.AlarmMember{ - { - MemberID: 12345, - Alarm: etcdserverpb.AlarmType_NOSPACE, - }, - } + for _, tc := range []struct { + name string + volumeSize resource.Quantity + volumeUsage resource.Quantity + expectDisarmed bool + }{ + { + name: "should disarm NOSPACE when headroom is sufficient", + volumeSize: resource.MustParse("10Gi"), + volumeUsage: resource.MustParse("5Gi"), + expectDisarmed: true, + }, + { + name: "should not disarm NOSPACE when headroom is insufficient", + volumeSize: resource.MustParse("10Gi"), + volumeUsage: resource.MustParse("9500Mi"), + expectDisarmed: false, + }, + } { + t.Run(tc.name, func(t *testing.T) { + g, ctx, _ := G(t) + stub := NewEtcdClientStub() + stub.ActiveAlarms = []*etcdserverpb.AlarmMember{ + {MemberID: 12345, Alarm: etcdserverpb.AlarmType_NOSPACE}, + } - hcp := &v1alpha1.HostedControlPlane{ - Spec: v1alpha1.HostedControlPlaneSpec{ - HostedControlPlaneInlineSpec: v1alpha1.HostedControlPlaneInlineSpec{ - ETCD: v1alpha1.ETCDComponent{ - AutoGrow: ptr.To(true), + hcp := &v1alpha1.HostedControlPlane{ + Spec: v1alpha1.HostedControlPlaneSpec{ + HostedControlPlaneInlineSpec: v1alpha1.HostedControlPlaneInlineSpec{ + ETCD: v1alpha1.ETCDComponent{AutoGrow: ptr.To(true)}, }, }, - }, - } - - reconciler := &etcdClusterReconciler{ - recorder: &recorder.InfiniteDiscardingFakeRecorder{}, - etcdClientFactory: nil, - } + Status: v1alpha1.HostedControlPlaneStatus{ + ETCDVolumeSize: tc.volumeSize, + ETCDVolumeUsage: tc.volumeUsage, + }, + } - err := reconciler.etcdIsHealthy(ctx, stub, hcp) + reconciler := &etcdClusterReconciler{ + etcdServerStorageBuffer: resource.MustParse("1Gi"), + } - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(stub.ActiveAlarms).ToNot(ContainElement( - HaveField("Alarm", etcdserverpb.AlarmType_NOSPACE), - )) - }) + g.Expect(reconciler.etcdIsHealthy(ctx, stub, hcp)).NotTo(HaveOccurred()) + if tc.expectDisarmed { + g.Expect(stub.ActiveAlarms).ToNot(ContainElement(HaveField("Alarm", etcdserverpb.AlarmType_NOSPACE))) + } else { + g.Expect(stub.ActiveAlarms).To(ContainElement(HaveField("Alarm", etcdserverpb.AlarmType_NOSPACE))) + } + }) + } - t.Run("should return error for active NOSPACE alarms when autogrow is disabled", func(t *testing.T) { - g := NewWithT(t) - stub := NewEtcdClientStub() - stub.ActiveAlarms = []*etcdserverpb.AlarmMember{ - { - MemberID: 12345, - Alarm: etcdserverpb.AlarmType_NOSPACE, - }, - } + t.Run( + "should return error for active NOSPACE alarms when autogrow is disabled and headroom is insufficient", + func(t *testing.T) { + g, ctx, _ := G(t) + stub := NewEtcdClientStub() + stub.ActiveAlarms = []*etcdserverpb.AlarmMember{ + { + MemberID: 12345, + Alarm: etcdserverpb.AlarmType_NOSPACE, + }, + } - hcp := &v1alpha1.HostedControlPlane{ - Spec: v1alpha1.HostedControlPlaneSpec{ - HostedControlPlaneInlineSpec: v1alpha1.HostedControlPlaneInlineSpec{ - ETCD: v1alpha1.ETCDComponent{ - AutoGrow: ptr.To(false), + hcp := &v1alpha1.HostedControlPlane{ + Spec: v1alpha1.HostedControlPlaneSpec{ + HostedControlPlaneInlineSpec: v1alpha1.HostedControlPlaneInlineSpec{ + ETCD: v1alpha1.ETCDComponent{ + AutoGrow: ptr.To(false), + }, }, }, - }, - } + Status: v1alpha1.HostedControlPlaneStatus{ + ETCDVolumeSize: resource.MustParse("10Gi"), + ETCDVolumeUsage: resource.MustParse("9500Mi"), + }, + } - reconciler := &etcdClusterReconciler{ - recorder: &recorder.InfiniteDiscardingFakeRecorder{}, - etcdClientFactory: nil, - } + reconciler := &etcdClusterReconciler{ + etcdServerStorageBuffer: resource.MustParse("1Gi"), + } - err := reconciler.etcdIsHealthy(ctx, stub, hcp) + err := reconciler.etcdIsHealthy(ctx, stub, hcp) - g.Expect(err).To(MatchError(ContainSubstring(etcdserverpb.AlarmType_NOSPACE.String()))) - g.Expect(stub.ActiveAlarms).To(ContainElement( - HaveField("Alarm", etcdserverpb.AlarmType_NOSPACE), - )) - }) + g.Expect(err).To(MatchError(ContainSubstring(etcdserverpb.AlarmType_NOSPACE.String()))) + g.Expect(stub.ActiveAlarms).To(ContainElement( + HaveField("Alarm", etcdserverpb.AlarmType_NOSPACE), + )) + }, + ) t.Run("should return error for active non-NOSPACE alarms", func(t *testing.T) { - g := NewWithT(t) + g, ctx, _ := G(t) stub := NewEtcdClientStub() stub.ActiveAlarms = []*etcdserverpb.AlarmMember{ { @@ -509,7 +514,6 @@ func TestEtcdClusterReconciler_etcdIsHealthy(t *testing.T) { } reconciler := &etcdClusterReconciler{ - recorder: &recorder.InfiniteDiscardingFakeRecorder{}, etcdClientFactory: nil, } @@ -519,13 +523,12 @@ func TestEtcdClusterReconciler_etcdIsHealthy(t *testing.T) { }) t.Run("should pass when no alarms present", func(t *testing.T) { - g := NewWithT(t) + g, ctx, _ := G(t) stub := NewEtcdClientStub() hcp := &v1alpha1.HostedControlPlane{} reconciler := &etcdClusterReconciler{ - recorder: &recorder.InfiniteDiscardingFakeRecorder{}, etcdClientFactory: nil, } @@ -536,12 +539,10 @@ func TestEtcdClusterReconciler_etcdIsHealthy(t *testing.T) { } func TestEtcdClusterReconciler_reconcileETCDBackup(t *testing.T) { - ctx := context.Background() - yesterday := metav1.Time{Time: time.Now().Add(-25 * time.Hour)} cronAt2AM := "0 2 * * *" + t.Run("should create snapshot and upload to S3 when scheduled", func(t *testing.T) { - g := NewWithT(t) etcdClientStub := NewEtcdClientStub() s3ClientStub := NewS3ClientStub() @@ -556,13 +557,13 @@ func TestEtcdClusterReconciler_reconcileETCDBackup(t *testing.T) { }, }, Status: v1alpha1.HostedControlPlaneStatus{ - ETCDLastBackupTime: yesterday, // Last backup was 25 hours ago + ETCDLastBackupTime: yesterday, }, } - returningFakeRecorder, fakeRecorder := recorder.NewInfiniteReturningFakeRecorder(hcp) + g, ctx, returningFakeRecorder := G(t, hcp) + reconciler := &etcdClusterReconciler{ - recorder: fakeRecorder, etcdClientFactory: nil, s3ClientFactory: func( context.Context, *alias.ManagementClusterClient, @@ -588,7 +589,7 @@ func TestEtcdClusterReconciler_reconcileETCDBackup(t *testing.T) { }) t.Run("should not create backup when not scheduled", func(t *testing.T) { - g := NewWithT(t) + g, ctx, _ := G(t) etcdClientStub := NewEtcdClientStub() s3ClientStub := NewS3ClientStub() @@ -603,12 +604,11 @@ func TestEtcdClusterReconciler_reconcileETCDBackup(t *testing.T) { }, }, Status: v1alpha1.HostedControlPlaneStatus{ - ETCDLastBackupTime: metav1.Time{Time: time.Now().Add(-1 * time.Hour)}, // Recent backup + ETCDLastBackupTime: metav1.Time{Time: time.Now().Add(-1 * time.Hour)}, }, } reconciler := &etcdClusterReconciler{ - recorder: &recorder.InfiniteDiscardingFakeRecorder{}, etcdClientFactory: nil, s3ClientFactory: func( context.Context, *alias.ManagementClusterClient, @@ -625,7 +625,7 @@ func TestEtcdClusterReconciler_reconcileETCDBackup(t *testing.T) { }) t.Run("should handle etcd snapshot creation failure", func(t *testing.T) { - g := NewWithT(t) + g, ctx, _ := G(t) etcdClientStub := NewEtcdClientStub() etcdClientStub.SnapshotError = errors.New("failed to create snapshot") s3ClientStub := NewS3ClientStub() @@ -641,12 +641,11 @@ func TestEtcdClusterReconciler_reconcileETCDBackup(t *testing.T) { }, }, Status: v1alpha1.HostedControlPlaneStatus{ - ETCDLastBackupTime: yesterday, // Last backup was 25 hours ago + ETCDLastBackupTime: yesterday, }, } reconciler := &etcdClusterReconciler{ - recorder: &recorder.InfiniteDiscardingFakeRecorder{}, etcdClientFactory: nil, s3ClientFactory: func( context.Context, *alias.ManagementClusterClient, @@ -663,7 +662,7 @@ func TestEtcdClusterReconciler_reconcileETCDBackup(t *testing.T) { }) t.Run("should handle S3 upload failure", func(t *testing.T) { - g := NewWithT(t) + g, ctx, _ := G(t) etcdClientStub := NewEtcdClientStub() s3ClientStub := NewS3ClientStub() s3ClientStub.UploadError = errors.New("failed to upload to S3") @@ -679,12 +678,11 @@ func TestEtcdClusterReconciler_reconcileETCDBackup(t *testing.T) { }, }, Status: v1alpha1.HostedControlPlaneStatus{ - ETCDLastBackupTime: yesterday, // Last backup was 25 hours ago + ETCDLastBackupTime: yesterday, }, } reconciler := &etcdClusterReconciler{ - recorder: &recorder.InfiniteDiscardingFakeRecorder{}, etcdClientFactory: nil, s3ClientFactory: func( context.Context, *alias.ManagementClusterClient, @@ -701,7 +699,7 @@ func TestEtcdClusterReconciler_reconcileETCDBackup(t *testing.T) { }) t.Run("should handle invalid cron schedule", func(t *testing.T) { - g := NewWithT(t) + g, ctx, _ := G(t) etcdClientStub := NewEtcdClientStub() s3ClientStub := NewS3ClientStub() @@ -710,7 +708,7 @@ func TestEtcdClusterReconciler_reconcileETCDBackup(t *testing.T) { HostedControlPlaneInlineSpec: v1alpha1.HostedControlPlaneInlineSpec{ ETCD: v1alpha1.ETCDComponent{ Backup: &v1alpha1.ETCDBackup{ - Schedule: "invalid cron", // Invalid schedule + Schedule: "invalid cron", }, }, }, @@ -721,7 +719,6 @@ func TestEtcdClusterReconciler_reconcileETCDBackup(t *testing.T) { } reconciler := &etcdClusterReconciler{ - recorder: &recorder.InfiniteDiscardingFakeRecorder{}, etcdClientFactory: nil, s3ClientFactory: func( context.Context, *alias.ManagementClusterClient, @@ -738,7 +735,7 @@ func TestEtcdClusterReconciler_reconcileETCDBackup(t *testing.T) { }) t.Run("should create first backup when ETCDLastBackupTime is zero", func(t *testing.T) { - g := NewWithT(t) + g, ctx, _ := G(t) etcdClientStub := NewEtcdClientStub() s3ClientStub := NewS3ClientStub() @@ -759,12 +756,11 @@ func TestEtcdClusterReconciler_reconcileETCDBackup(t *testing.T) { }, }, Status: v1alpha1.HostedControlPlaneStatus{ - ETCDLastBackupTime: metav1.Time{}, // Zero time - first backup + ETCDLastBackupTime: metav1.Time{}, }, } reconciler := &etcdClusterReconciler{ - recorder: &recorder.InfiniteDiscardingFakeRecorder{}, etcdClientFactory: nil, s3ClientFactory: func( context.Context, *alias.ManagementClusterClient, @@ -1116,25 +1112,25 @@ func TestEtcdClusterReconciler_reconcileETCDDefragmentation(t *testing.T) { func TestBuildEtcdArgs_SnapshotCount(t *testing.T) { tests := []struct { - name string - etcdVersion semver.Version - expectSnapshotCount bool + name string + etcdVersionBefore3_7 bool + expectSnapshotCount bool }{ { - name: "version < 3.7 sets snapshot-count", - etcdVersion: semver.MustParse("3.6.0"), - expectSnapshotCount: true, + name: "version < 3.7 sets snapshot-count", + etcdVersionBefore3_7: true, + expectSnapshotCount: true, }, { - name: "version >= 3.7 omits snapshot-count", - etcdVersion: semver.MustParse("3.7.0"), - expectSnapshotCount: false, + name: "version >= 3.7 omits snapshot-count", + etcdVersionBefore3_7: false, + expectSnapshotCount: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, ctx, _ := G(t) reconciler := &etcdClusterReconciler{} hcp := &v1alpha1.HostedControlPlane{ @@ -1151,10 +1147,10 @@ func TestBuildEtcdArgs_SnapshotCount(t *testing.T) { certMount := corev1ac.VolumeMount().WithMountPath("/etc/etcd") args := reconciler.buildEtcdArgs( - context.Background(), + ctx, hcp, cluster, - tt.etcdVersion, + tt.etcdVersionBefore3_7, dataMount, certMount, serverPort, diff --git a/pkg/reconcilers/etcd_cluster/volume_stats/volume_stats.go b/pkg/reconcilers/etcd_cluster/volume_stats/volume_stats.go index 36760b7..3e18f11 100644 --- a/pkg/reconcilers/etcd_cluster/volume_stats/volume_stats.go +++ b/pkg/reconcilers/etcd_cluster/volume_stats/volume_stats.go @@ -45,6 +45,10 @@ func (p *kubeletEtcdVolumeStatsProvider) GetMaxEtcdVolumeUsage( ) (int64, error) { return tracing.WithSpan(ctx, tracer, "GetMaxEtcdVolumeUsage", func(ctx context.Context, span trace.Span) (int64, error) { + if len(pods) == 0 { + return 0, nil + } + nodeStats := parallel.Map( slices.Uniq(slices.Map(pods, func(pod corev1.Pod, _ int) string { return pod.Spec.NodeName })), func(node string, _ int) slices.Tuple2[string, slices.Tuple2[kubeletstatsv1alpha1.Summary, error]] { @@ -65,7 +69,9 @@ func (p *kubeletEtcdVolumeStatsProvider) GetMaxEtcdVolumeUsage( results := slices.Map(pods, func(pod corev1.Pod, _ int) slices.Tuple2[int64, error] { nodeResult, ok := statsByNode[pod.Spec.NodeName] if !ok { - return slices.T2(int64(0), fmt.Errorf("%w for node %s", errNoStatsForNode, pod.Spec.NodeName)) + return slices.T2(int64(0), fmt.Errorf( + "missing stats for node %s: %w", pod.Spec.NodeName, errNoStatsForNode, + )) } if nodeResult.B != nil { return slices.T2(int64(0), nodeResult.B) diff --git a/pkg/reconcilers/etcd_cluster/volume_stats/volume_stats_test.go b/pkg/reconcilers/etcd_cluster/volume_stats/volume_stats_test.go index f58c696..4e6ea49 100644 --- a/pkg/reconcilers/etcd_cluster/volume_stats/volume_stats_test.go +++ b/pkg/reconcilers/etcd_cluster/volume_stats/volume_stats_test.go @@ -289,58 +289,3 @@ func TestExtractPodVolumeUsage(t *testing.T) { g.Expect(result).To(Equal(int64(0))) }) } - -func TestGetMaxEtcdVolumeUsage(t *testing.T) { - t.Run("deduplicates node stats for pods on same node", func(t *testing.T) { - g := NewWithT(t) - usedBytes1 := uint64(1073741824) // 1 GiB - usedBytes2 := uint64(2147483648) // 2 GiB - summary := kubeletstatsv1alpha1.Summary{ - Node: kubeletstatsv1alpha1.NodeStats{}, - Pods: []kubeletstatsv1alpha1.PodStats{ - { - PodRef: kubeletstatsv1alpha1.PodReference{ - Name: "etcd-0", - Namespace: "test-ns", - }, - VolumeStats: []kubeletstatsv1alpha1.VolumeStats{ - { - Name: "etcd-data", - PVCRef: &kubeletstatsv1alpha1.PVCReference{ - Name: "etcd-data-etcd-0", - Namespace: "test-ns", - }, - FsStats: kubeletstatsv1alpha1.FsStats{UsedBytes: &usedBytes1}, - }, - }, - }, - { - PodRef: kubeletstatsv1alpha1.PodReference{ - Name: "etcd-1", - Namespace: "test-ns", - }, - VolumeStats: []kubeletstatsv1alpha1.VolumeStats{ - { - Name: "etcd-data", - PVCRef: &kubeletstatsv1alpha1.PVCReference{ - Name: "etcd-data-etcd-1", - Namespace: "test-ns", - }, - FsStats: kubeletstatsv1alpha1.FsStats{UsedBytes: &usedBytes2}, - }, - }, - }, - }, - } - - // This test verifies the extraction logic - the actual provider tests - // with real node API calls would be integration tests - p := &kubeletEtcdVolumeStatsProvider{} - - usage0 := p.extractPodVolumeUsage(summary, "test-ns", "etcd-0") - usage1 := p.extractPodVolumeUsage(summary, "test-ns", "etcd-1") - - g.Expect(usage0).To(Equal(int64(1073741824))) - g.Expect(usage1).To(Equal(int64(2147483648))) - }) -} diff --git a/pkg/reconcilers/kubeconfig/reconciler_test.go b/pkg/reconcilers/kubeconfig/reconciler_test.go index fba5349..b200c3c 100644 --- a/pkg/reconcilers/kubeconfig/reconciler_test.go +++ b/pkg/reconcilers/kubeconfig/reconciler_test.go @@ -8,6 +8,7 @@ import ( slices "github.com/samber/lo" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/alias" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -91,7 +92,7 @@ func TestKubeconfigReconciler_ReconcileWorkflow(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := t.Context() - g := NewWithT(t) + g, _, _ := G(t) kubeClient := fake.NewClientset(slices.Map(tt.existingSecrets, func(s *corev1.Secret, _ int) runtime.Object { return s @@ -193,7 +194,7 @@ func TestKubeconfigReconciler_KubeconfigConnectivity(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { ctx := t.Context() - g := NewWithT(t) + g, _, _ := G(t) secrets := []*corev1.Secret{ createCertificateSecret("test-cluster-ca", "default", true), } @@ -258,7 +259,7 @@ func TestKubeconfigReconciler_KubeconfigConnectivity(t *testing.T) { func TestKubeconfigReconciler_CertificateRotation(t *testing.T) { ctx := t.Context() - g := NewWithT(t) + g, _, _ := G(t) cluster := &capiv2.Cluster{ ObjectMeta: metav1.ObjectMeta{ Name: "test-cluster", @@ -323,7 +324,7 @@ func TestKubeconfigReconciler_CertificateRotation(t *testing.T) { func TestKubeconfigReconciler_MultiUserScenarios(t *testing.T) { ctx := t.Context() - g := NewWithT(t) + g, _, _ := G(t) cluster := &capiv2.Cluster{ ObjectMeta: metav1.ObjectMeta{ Name: "test-cluster", diff --git a/pkg/reconcilers/reconciler.go b/pkg/reconcilers/reconciler.go index 32bb4ff..bacd97a 100644 --- a/pkg/reconcilers/reconciler.go +++ b/pkg/reconcilers/reconciler.go @@ -17,7 +17,7 @@ import ( "github.com/cilium/cilium/pkg/policy/api" slices "github.com/samber/lo" operatorutil "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util" - "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util/recorder" + "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util/emit" errorsUtil "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/util/errors" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/util/networkpolicy" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/util/tracing" @@ -189,11 +189,14 @@ func reconcileWorkload[RA any, RSA any, R any]( "failed to delete existing %s %s: %w", kind, name, err, ) } - recorder.FromContext(ctx).Warnf( + emit.Warn( + ctx, + emit.SinkRecorder, getObject(appliedResource), "ImmutableSpecField", fmt.Sprintf("Deleted%s", kind), - "Deleted existing %s %s due to immutable spec fields", kind, name, + "Deleted existing resource due to immutable spec fields", + "kind", kind, "name", name, ) // don't retry immediately, the funcs might not be idempotent // (and go can't figure out the generics anyways...) @@ -907,11 +910,14 @@ func reconcileSecret( "failed to delete existing secret %s: %w", name, err, ) } - recorder.FromContext(ctx).Normalf( + emit.Info( + ctx, + emit.SinkRecorder, appliedSecret, "ImmutableTypeField", "DeletedSecret", - "Deleted existing secret %s/%s due to immutable type field", namespace, name, + "Deleted existing secret due to immutable type field", + "namespace", namespace, "name", name, ) return reconcileSecret( ctx, diff --git a/pkg/reconcilers/tlsroutes/reconciler_test.go b/pkg/reconcilers/tlsroutes/reconciler_test.go index 2ecb518..ece5ca3 100644 --- a/pkg/reconcilers/tlsroutes/reconciler_test.go +++ b/pkg/reconcilers/tlsroutes/reconciler_test.go @@ -7,6 +7,7 @@ import ( . "github.com/onsi/gomega/gcustom" . "github.com/onsi/gomega/gstruct" "github.com/teutonet/cluster-api-provider-hosted-control-plane/api/v1alpha1" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" v1 "k8s.io/client-go/applyconfigurations/meta/v1" @@ -140,7 +141,7 @@ func TestTLSRoutesReconciler_TrafficRouting(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) gatewayClient := fake.NewClientset(tt.existingTLSRoutes...) reconciler := &tlsRoutesReconciler{ @@ -262,7 +263,7 @@ func TestTLSRoutesReconciler_CertificateIntegration(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) gatewayClient := fake.NewClientset() reconciler := &tlsRoutesReconciler{ @@ -373,7 +374,7 @@ func TestTLSRoutesReconciler_GatewayFailover(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) gatewayClient := fake.NewClientset() reconciler := &tlsRoutesReconciler{ @@ -403,7 +404,7 @@ func TestTLSRoutesReconciler_GatewayFailover(t *testing.T) { } func TestTLSRoutesReconciler_MultipleEndpoints(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) hostedControlPlane := &v1alpha1.HostedControlPlane{ ObjectMeta: metav1.ObjectMeta{ Name: "test-hcp", diff --git a/pkg/util/errors/errors_test.go b/pkg/util/errors/errors_test.go index 90e0e99..f61c211 100644 --- a/pkg/util/errors/errors_test.go +++ b/pkg/util/errors/errors_test.go @@ -5,6 +5,7 @@ import ( "testing" . "github.com/onsi/gomega" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" ) func TestErrorfIfErr(t *testing.T) { @@ -60,7 +61,7 @@ func TestErrorfIfErr(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) err := IfErrErrorf(tt.args.format, tt.args.args...) if tt.expectedErr == nil { diff --git a/test/context.go b/test/context.go new file mode 100644 index 0000000..05caff3 --- /dev/null +++ b/test/context.go @@ -0,0 +1,77 @@ +package test + +import ( + "context" + "log/slog" + "strings" + "testing" + + "github.com/go-logr/logr" + "github.com/onsi/gomega" + "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util/recorder" + "go.opentelemetry.io/otel" + "k8s.io/apimachinery/pkg/runtime" +) + +// G combines NewWithT and NewTestContext into one call. +// It replaces the two-line pattern: g := NewWithT(t); ctx, rec := NewTestContext(t, ...). +func G( + t *testing.T, + related ...runtime.Object, +) (gomega.Gomega, context.Context, *recorder.InfiniteReturningFakeRecorder) { + t.Helper() + ctx, rec := NewTestContext(t, related...) + return gomega.NewWithT(t), ctx, rec //nolint:forbidigo // implements G(), the wrapper the rule enforces +} + +// NewTestContext returns a context wired with: +// - a slog logger writing to t.Log (visible with -v or on failure) +// - an event recorder (optionally bound to related objects for event assertions) +// - an OTEL span named after the test (ended on t.Cleanup) +// +// Usage: ctx, rec := NewTestContext(t, hcp). +func NewTestContext( + t *testing.T, + related ...runtime.Object, +) (context.Context, *recorder.InfiniteReturningFakeRecorder) { + t.Helper() + + fakeRec, rec := recorder.NewInfiniteReturningFakeRecorder(related...) + + slogLogger := slog.New(slog.NewTextHandler(&testLogWriter{t}, &slog.HandlerOptions{Level: slog.LevelDebug})) + ctx := recorder.IntoContext( + logr.NewContext(context.Background(), logr.FromSlogHandler(slogLogger.Handler())), + rec, + ) + + ctx, span := otel.Tracer("test").Start(ctx, t.Name()) + t.Cleanup(func() { span.End() }) + + return ctx, fakeRec +} + +// Run wraps t.Run so that the subtest body always receives a pre-wired context +// and recorder — the same setup as NewTestContext — without the caller having +// to remember to do it manually. +// +// Usage: Run(t, "name", func(t *testing.T, ctx context.Context, rec *recorder.InfiniteReturningFakeRecorder) { ... }). +func Run( + t *testing.T, + name string, + fn func(*testing.T, context.Context, *recorder.InfiniteReturningFakeRecorder), + related ...runtime.Object, +) bool { + t.Helper() + return t.Run(name, func(t *testing.T) { + t.Helper() + ctx, rec := NewTestContext(t, related...) + fn(t, ctx, rec) + }) +} + +type testLogWriter struct{ t *testing.T } + +func (w *testLogWriter) Write(p []byte) (int, error) { + w.t.Log(strings.TrimSuffix(string(p), "\n")) + return len(p), nil +} diff --git a/test/etcd_stubs.go b/test/etcd_stubs.go index ee3b760..187412d 100644 --- a/test/etcd_stubs.go +++ b/test/etcd_stubs.go @@ -99,10 +99,7 @@ func NewEtcdVolumeStatsProviderStub() *EtcdVolumeStatsProviderStub { func (s *EtcdVolumeStatsProviderStub) GetMaxEtcdVolumeUsage( _ context.Context, _ []corev1.Pod, ) (int64, error) { - if s.Error != nil { - return 0, s.Error - } - return s.MaxUsage, nil + return s.MaxUsage, s.Error } type S3ClientStub struct { From c3daf3ae5f60ca7066dcc3718a315f5009f405fb Mon Sep 17 00:00:00 2001 From: Chris Werner Rau Date: Tue, 19 May 2026 10:34:17 +0200 Subject: [PATCH 4/6] fix: import cycle --- pkg/hostedcontrolplane/controller_test.go | 1 + .../lifecycle_phases_test.go | 1 + .../etcd_cluster/backup_schedule_test.go | 17 ++++--- pkg/reconcilers/etcd_cluster/reconciler.go | 11 +---- .../etcd_cluster/reconciler_test.go | 49 ++++++++++--------- test/{ => etcdtest}/etcd_stubs.go | 2 +- 6 files changed, 39 insertions(+), 42 deletions(-) rename test/{ => etcdtest}/etcd_stubs.go (99%) diff --git a/pkg/hostedcontrolplane/controller_test.go b/pkg/hostedcontrolplane/controller_test.go index 1cb0beb..35f7865 100644 --- a/pkg/hostedcontrolplane/controller_test.go +++ b/pkg/hostedcontrolplane/controller_test.go @@ -14,6 +14,7 @@ import ( "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/etcd_client" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/s3_client" . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test/etcdtest" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" diff --git a/pkg/hostedcontrolplane/lifecycle_phases_test.go b/pkg/hostedcontrolplane/lifecycle_phases_test.go index b1ab799..8829285 100644 --- a/pkg/hostedcontrolplane/lifecycle_phases_test.go +++ b/pkg/hostedcontrolplane/lifecycle_phases_test.go @@ -24,6 +24,7 @@ import ( "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/etcd_client" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/s3_client" . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test/etcdtest" corev1 "k8s.io/api/core/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" diff --git a/pkg/reconcilers/etcd_cluster/backup_schedule_test.go b/pkg/reconcilers/etcd_cluster/backup_schedule_test.go index 29a82ae..e84d8e9 100644 --- a/pkg/reconcilers/etcd_cluster/backup_schedule_test.go +++ b/pkg/reconcilers/etcd_cluster/backup_schedule_test.go @@ -6,11 +6,12 @@ import ( "time" . "github.com/onsi/gomega" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" ) func TestDailyScheduleFor(t *testing.T) { t.Run("result is within the 20:00–03:59 window", func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) identities := [][2]string{ {"ns-a", "cluster-1"}, {"ns-b", "cluster-2"}, @@ -34,19 +35,19 @@ func TestDailyScheduleFor(t *testing.T) { }) t.Run("is deterministic for the same cluster", func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) g.Expect(dailyScheduleFor("default", "my-cluster")).To(Equal(dailyScheduleFor("default", "my-cluster"))) }) t.Run("produces different schedules for different clusters", func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) g.Expect(dailyScheduleFor("ns", "cluster-a")).NotTo(Equal(dailyScheduleFor("ns", "cluster-b"))) }) } func TestResolveBackupSchedule(t *testing.T) { t.Run("@daily returns a usable schedule within the midnight window", func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) schedule, err := resolveBackupSchedule("@daily", "default", "my-cluster") g.Expect(err).NotTo(HaveOccurred()) @@ -61,7 +62,7 @@ func TestResolveBackupSchedule(t *testing.T) { }) t.Run("@daily is deterministic for the same cluster", func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) base := time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC) s1, _ := resolveBackupSchedule("@daily", "default", "my-cluster") s2, _ := resolveBackupSchedule("@daily", "default", "my-cluster") @@ -69,7 +70,7 @@ func TestResolveBackupSchedule(t *testing.T) { }) t.Run("@daily produces different next-run times for different clusters", func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) base := time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC) a, _ := resolveBackupSchedule("@daily", "ns", "cluster-a") b, _ := resolveBackupSchedule("@daily", "ns", "cluster-b") @@ -77,7 +78,7 @@ func TestResolveBackupSchedule(t *testing.T) { }) t.Run("standard cron parses successfully", func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) schedule, err := resolveBackupSchedule("0 2 * * *", "ns", "cluster") g.Expect(err).NotTo(HaveOccurred()) base := time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC) @@ -85,7 +86,7 @@ func TestResolveBackupSchedule(t *testing.T) { }) t.Run("invalid schedule returns an error", func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) _, err := resolveBackupSchedule("invalid cron", "ns", "cluster") g.Expect(err).To(HaveOccurred()) }) diff --git a/pkg/reconcilers/etcd_cluster/reconciler.go b/pkg/reconcilers/etcd_cluster/reconciler.go index 682c741..51a96b1 100644 --- a/pkg/reconcilers/etcd_cluster/reconciler.go +++ b/pkg/reconcilers/etcd_cluster/reconciler.go @@ -46,7 +46,7 @@ var ( etcdVolumeResizeEvent = "EtcdVolumeAutoResize" etcdVolumeSizeReCalculatedEvent = "EtcdVolumeSizeRecalculated" errETCDBackupStalled = errors.New("etcd backup timed out: no progress in time window") - etcdClientVersion37 = semver.MustParse(version.V3_7.String()) + etcdClientVersion37 = semver.MustParse(version.V3_7.String()) ) const ( @@ -199,7 +199,6 @@ func (er *etcdClusterReconciler) ReconcileEtcdCluster( return "etcd StatefulSet is not ready", nil } - if err := er.reconcileETCDMaintenance(ctx, etcdClient, hostedControlPlane, etcdPods); err != nil { return "", err } @@ -360,12 +359,7 @@ func (er *etcdClusterReconciler) reconcileETCDBackup( cancel(errETCDBackupStalled) }) defer watchdog.Stop() - er.recorder.Normalf( - nil, - "CronScheduleTriggered", - "EtcdBackup", - "Starting etcd backup", - ) + emit.Info(ctx, emit.SinkRecorder, nil, "CronScheduleTriggered", "EtcdBackup", "Starting etcd backup") snapshotResponse, closeClientFunc, err := etcdClient.OpenSnapshotStream(ctx) defer func() { err = errors.Join(err, closeClientFunc()) }() if err != nil { @@ -505,7 +499,6 @@ func (er *etcdClusterReconciler) reconcileETCDDefragmentation( ) } - //+kubebuilder:rbac:groups="",resources=services,verbs=create;patch func (er *etcdClusterReconciler) reconcileService( diff --git a/pkg/reconcilers/etcd_cluster/reconciler_test.go b/pkg/reconcilers/etcd_cluster/reconciler_test.go index 6165f10..ee5e6eb 100644 --- a/pkg/reconcilers/etcd_cluster/reconciler_test.go +++ b/pkg/reconcilers/etcd_cluster/reconciler_test.go @@ -8,10 +8,12 @@ import ( "time" "github.com/teutonet/cluster-api-provider-hosted-control-plane/api/v1alpha1" + "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/operator/util/recorder" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/alias" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/etcd_client" "github.com/teutonet/cluster-api-provider-hosted-control-plane/pkg/reconcilers/etcd_cluster/s3_client" . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test" + . "github.com/teutonet/cluster-api-provider-hosted-control-plane/test/etcdtest" "go.etcd.io/etcd/api/v3/etcdserverpb" clientv3 "go.etcd.io/etcd/client/v3" "k8s.io/apimachinery/pkg/api/resource" @@ -779,7 +781,7 @@ func TestEtcdClusterReconciler_reconcileETCDBackup(t *testing.T) { }) t.Run("should create backup with @daily schedule spread by cluster identity", func(t *testing.T) { - g := NewWithT(t) + g, ctx, _ := G(t) etcdClientStub := NewEtcdClientStub() s3ClientStub := NewS3ClientStub() @@ -803,7 +805,6 @@ func TestEtcdClusterReconciler_reconcileETCDBackup(t *testing.T) { } reconciler := &etcdClusterReconciler{ - recorder: &recorder.InfiniteDiscardingFakeRecorder{}, etcdClientFactory: nil, s3ClientFactory: func( context.Context, *alias.ManagementClusterClient, @@ -822,7 +823,7 @@ func TestEtcdClusterReconciler_reconcileETCDBackup(t *testing.T) { }) t.Run("should fail when snapshot open stalls", func(t *testing.T) { - g := NewWithT(t) + g, ctx, _ := G(t) hcp := &v1alpha1.HostedControlPlane{ Spec: v1alpha1.HostedControlPlaneSpec{ @@ -840,7 +841,6 @@ func TestEtcdClusterReconciler_reconcileETCDBackup(t *testing.T) { } reconciler := &etcdClusterReconciler{ - recorder: &recorder.InfiniteDiscardingFakeRecorder{}, watchdogInterval: 10 * time.Millisecond, s3ClientFactory: func( context.Context, *alias.ManagementClusterClient, @@ -858,7 +858,7 @@ func TestEtcdClusterReconciler_reconcileETCDBackup(t *testing.T) { }) t.Run("should fail when upload stalls", func(t *testing.T) { - g := NewWithT(t) + g, ctx, _ := G(t) etcdClientStub := NewEtcdClientStub() hcp := &v1alpha1.HostedControlPlane{ @@ -877,7 +877,6 @@ func TestEtcdClusterReconciler_reconcileETCDBackup(t *testing.T) { } reconciler := &etcdClusterReconciler{ - recorder: &recorder.InfiniteDiscardingFakeRecorder{}, watchdogInterval: 10 * time.Millisecond, s3ClientFactory: func( context.Context, *alias.ManagementClusterClient, @@ -934,12 +933,12 @@ func TestEtcdClusterReconciler_reconcileETCDMaintenance_GetStatusesError(t *test ctx := context.Background() t.Run("should return wrapped error when GetStatuses fails", func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) stub := NewEtcdClientStub() stub.StatusError = errors.New("connection refused") hcp := &v1alpha1.HostedControlPlane{} - reconciler := &etcdClusterReconciler{recorder: &recorder.InfiniteDiscardingFakeRecorder{}} + reconciler := &etcdClusterReconciler{} err := reconciler.reconcileETCDMaintenance(ctx, stub, hcp, nil) @@ -953,7 +952,7 @@ func TestEtcdClusterReconciler_reconcileETCDDefragmentation(t *testing.T) { emptyHCP := &v1alpha1.HostedControlPlane{} t.Run("should defrag when fragmentation exceeds threshold", func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) stub := NewEtcdClientStub() statuses := map[string]*clientv3.StatusResponse{ "etcd-0": {DbSize: 1000, DbSizeInUse: 700}, // 30% fragmented — above 20% threshold @@ -961,7 +960,8 @@ func TestEtcdClusterReconciler_reconcileETCDDefragmentation(t *testing.T) { hcp := &v1alpha1.HostedControlPlane{} returningFakeRecorder, fakeRecorder := recorder.NewInfiniteReturningFakeRecorder() - reconciler := &etcdClusterReconciler{recorder: fakeRecorder} + reconciler := &etcdClusterReconciler{} + ctx := recorder.IntoContext(ctx, fakeRecorder) err := reconciler.reconcileETCDDefragmentation(ctx, stub, statuses, hcp) @@ -975,14 +975,14 @@ func TestEtcdClusterReconciler_reconcileETCDDefragmentation(t *testing.T) { }) t.Run("should not defrag when all members are below threshold", func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) stub := NewEtcdClientStub() statuses := map[string]*clientv3.StatusResponse{ "etcd-0": {DbSize: 1000, DbSizeInUse: 900}, // 10% fragmented — below 20% threshold "etcd-1": {DbSize: 1000, DbSizeInUse: 850}, // 15% fragmented — below 20% threshold } - reconciler := &etcdClusterReconciler{recorder: &recorder.InfiniteDiscardingFakeRecorder{}} + reconciler := &etcdClusterReconciler{} err := reconciler.reconcileETCDDefragmentation(ctx, stub, statuses, emptyHCP) @@ -991,13 +991,13 @@ func TestEtcdClusterReconciler_reconcileETCDDefragmentation(t *testing.T) { }) t.Run("should skip members with DbSize zero", func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) stub := NewEtcdClientStub() statuses := map[string]*clientv3.StatusResponse{ "etcd-0": {DbSize: 0, DbSizeInUse: 0}, // uninitialised } - reconciler := &etcdClusterReconciler{recorder: &recorder.InfiniteDiscardingFakeRecorder{}} + reconciler := &etcdClusterReconciler{} err := reconciler.reconcileETCDDefragmentation(ctx, stub, statuses, emptyHCP) @@ -1006,7 +1006,7 @@ func TestEtcdClusterReconciler_reconcileETCDDefragmentation(t *testing.T) { }) t.Run("should skip defrag within cooldown period", func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) stub := NewEtcdClientStub() statuses := map[string]*clientv3.StatusResponse{ "etcd-0": {DbSize: 1000, DbSizeInUse: 700}, // 30% fragmented — above threshold @@ -1017,7 +1017,7 @@ func TestEtcdClusterReconciler_reconcileETCDDefragmentation(t *testing.T) { }, } - reconciler := &etcdClusterReconciler{recorder: &recorder.InfiniteDiscardingFakeRecorder{}} + reconciler := &etcdClusterReconciler{} err := reconciler.reconcileETCDDefragmentation(ctx, stub, statuses, hcp) @@ -1026,7 +1026,7 @@ func TestEtcdClusterReconciler_reconcileETCDDefragmentation(t *testing.T) { }) t.Run("should defrag after cooldown period expires", func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) stub := NewEtcdClientStub() statuses := map[string]*clientv3.StatusResponse{ "etcd-0": {DbSize: 1000, DbSizeInUse: 700}, // 30% fragmented — above threshold @@ -1037,7 +1037,7 @@ func TestEtcdClusterReconciler_reconcileETCDDefragmentation(t *testing.T) { }, } - reconciler := &etcdClusterReconciler{recorder: &recorder.InfiniteDiscardingFakeRecorder{}} + reconciler := &etcdClusterReconciler{} err := reconciler.reconcileETCDDefragmentation(ctx, stub, statuses, hcp) @@ -1046,7 +1046,7 @@ func TestEtcdClusterReconciler_reconcileETCDDefragmentation(t *testing.T) { }) t.Run("should propagate Defragment error and not update timestamp on failure", func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) stub := NewEtcdClientStub() statuses := map[string]*clientv3.StatusResponse{ "etcd-0": {DbSize: 1000, DbSizeInUse: 700}, // 30% fragmented @@ -1054,7 +1054,7 @@ func TestEtcdClusterReconciler_reconcileETCDDefragmentation(t *testing.T) { stub.DefragError = errors.New("defrag failed") hcp := &v1alpha1.HostedControlPlane{} - reconciler := &etcdClusterReconciler{recorder: &recorder.InfiniteDiscardingFakeRecorder{}} + reconciler := &etcdClusterReconciler{} err := reconciler.reconcileETCDDefragmentation(ctx, stub, statuses, hcp) @@ -1064,7 +1064,7 @@ func TestEtcdClusterReconciler_reconcileETCDDefragmentation(t *testing.T) { }) t.Run("should skip retry within cooldown after a failed defrag", func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) stub := NewEtcdClientStub() statuses := map[string]*clientv3.StatusResponse{ "etcd-0": {DbSize: 1000, DbSizeInUse: 700}, // 30% fragmented — above threshold @@ -1076,7 +1076,7 @@ func TestEtcdClusterReconciler_reconcileETCDDefragmentation(t *testing.T) { }, } - reconciler := &etcdClusterReconciler{recorder: &recorder.InfiniteDiscardingFakeRecorder{}} + reconciler := &etcdClusterReconciler{} err := reconciler.reconcileETCDDefragmentation(ctx, stub, statuses, hcp) @@ -1085,7 +1085,7 @@ func TestEtcdClusterReconciler_reconcileETCDDefragmentation(t *testing.T) { }) t.Run("should not use ETCDLastDefragTime for cooldown", func(t *testing.T) { - g := NewWithT(t) + g, _, _ := G(t) stub := NewEtcdClientStub() statuses := map[string]*clientv3.StatusResponse{ "etcd-0": {DbSize: 1000, DbSizeInUse: 700}, // 30% fragmented — above threshold @@ -1098,7 +1098,8 @@ func TestEtcdClusterReconciler_reconcileETCDDefragmentation(t *testing.T) { } returningFakeRecorder, fakeRecorder := recorder.NewInfiniteReturningFakeRecorder() - reconciler := &etcdClusterReconciler{recorder: fakeRecorder} + reconciler := &etcdClusterReconciler{} + ctx := recorder.IntoContext(ctx, fakeRecorder) err := reconciler.reconcileETCDDefragmentation(ctx, stub, statuses, hcp) diff --git a/test/etcd_stubs.go b/test/etcdtest/etcd_stubs.go similarity index 99% rename from test/etcd_stubs.go rename to test/etcdtest/etcd_stubs.go index 187412d..656e00d 100644 --- a/test/etcd_stubs.go +++ b/test/etcdtest/etcd_stubs.go @@ -1,4 +1,4 @@ -package test +package etcdtest import ( "bytes" From dcaab6eccad5e1ac12fae960be895ed95f575ddf Mon Sep 17 00:00:00 2001 From: Chris Werner Rau Date: Tue, 19 May 2026 10:41:50 +0200 Subject: [PATCH 5/6] chore: remove duplicate reconcileETCDSpaceUsage call --- pkg/reconcilers/etcd_cluster/reconciler.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pkg/reconcilers/etcd_cluster/reconciler.go b/pkg/reconcilers/etcd_cluster/reconciler.go index 51a96b1..2af46bc 100644 --- a/pkg/reconcilers/etcd_cluster/reconciler.go +++ b/pkg/reconcilers/etcd_cluster/reconciler.go @@ -410,10 +410,6 @@ func (er *etcdClusterReconciler) reconcileETCDMaintenance( return fmt.Errorf("failed to get etcd statuses: %w", err) } - if err := er.reconcileETCDSpaceUsage(ctx, hostedControlPlane, pods); err != nil { - return fmt.Errorf("failed to reconcile etcd space usage: %w", err) - } - if err := er.reconcileETCDDefragmentation(ctx, etcdClient, statuses, hostedControlPlane); err != nil { return fmt.Errorf("failed to reconcile etcd defragmentation: %w", err) } From 64f2eb14941a1c644b2388fabb2dcfc550593a84 Mon Sep 17 00:00:00 2001 From: Chris Werner Rau Date: Tue, 19 May 2026 11:57:49 +0200 Subject: [PATCH 6/6] fix: use ParseTolerant --- pkg/reconcilers/etcd_cluster/reconciler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/reconcilers/etcd_cluster/reconciler.go b/pkg/reconcilers/etcd_cluster/reconciler.go index 2af46bc..4245511 100644 --- a/pkg/reconcilers/etcd_cluster/reconciler.go +++ b/pkg/reconcilers/etcd_cluster/reconciler.go @@ -753,7 +753,7 @@ func isEtcdVersionBefore37(ctx context.Context, imageSpec *v1alpha1.ImageSpec) b return semver.MustParse(version.Version).LT(etcdClientVersion37) } - parsed, err := semver.Parse(tag) + parsed, err := semver.ParseTolerant(tag) if err != nil { emit.Warn(ctx, emit.SinkRecorder|emit.SinkLogger,