Skip to content

Commit 4551f66

Browse files
wenyingdcursoragent
andcommitted
feat(service): reconcile DNS records for LoadBalancer Services
- Reconcile DNS records based on the hostname annotation on LoadBalancer Services using VPCNetworkConfiguration allowed DNS zones - Report DNSRecordReady condition for DNS zone validation errors and generic DNS build errors Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 3d1282e commit 4551f66

11 files changed

Lines changed: 1395 additions & 191 deletions

File tree

cmd/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ func startServiceController(mgr manager.Manager, nsxClient *nsx.Client) {
247247
subnetport.NewSubnetPortReconciler(mgr, subnetPortService, subnetService, vpcService, ipAddressAllocationService),
248248
pod.NewPodReconciler(mgr, subnetPortService, subnetService, vpcService, nodeService),
249249
networkpolicycontroller.NewNetworkPolicyReconciler(mgr, commonService, vpcService),
250-
service.NewServiceLbReconciler(mgr, commonService),
250+
service.NewServiceLbReconciler(mgr, commonService, dnsRecordService),
251251
subnetbindingcontroller.NewReconciler(mgr, subnetService, subnetBindingService),
252252
subnetipreservationcontroller.NewReconciler(mgr, subnetIPReservationService, subnetService),
253253
)

pkg/controllers/service/service_lb_controller.go

Lines changed: 122 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,25 +5,32 @@ package service
55

66
import (
77
"context"
8+
"fmt"
89
"time"
910

1011
v1 "k8s.io/api/core/v1"
1112
apierrors "k8s.io/apimachinery/pkg/api/errors"
1213
apimachineryruntime "k8s.io/apimachinery/pkg/runtime"
14+
"k8s.io/apimachinery/pkg/types"
15+
"k8s.io/apimachinery/pkg/util/sets"
1316
"k8s.io/apimachinery/pkg/util/version"
1417
clientset "k8s.io/client-go/kubernetes"
1518
"k8s.io/client-go/rest"
1619
"k8s.io/client-go/tools/record"
1720
ctrl "sigs.k8s.io/controller-runtime"
21+
"sigs.k8s.io/controller-runtime/pkg/builder"
1822
"sigs.k8s.io/controller-runtime/pkg/client"
1923
"sigs.k8s.io/controller-runtime/pkg/controller"
24+
"sigs.k8s.io/controller-runtime/pkg/handler"
2025
"sigs.k8s.io/controller-runtime/pkg/webhook"
2126

27+
"github.com/vmware-tanzu/nsx-operator/pkg/apis/vpc/v1alpha1"
2228
"github.com/vmware-tanzu/nsx-operator/pkg/controllers/common"
2329
"github.com/vmware-tanzu/nsx-operator/pkg/logger"
2430
"github.com/vmware-tanzu/nsx-operator/pkg/metrics"
2531
_ "github.com/vmware-tanzu/nsx-operator/pkg/nsx/ratelimiter"
2632
servicecommon "github.com/vmware-tanzu/nsx-operator/pkg/nsx/services/common"
33+
"github.com/vmware-tanzu/nsx-operator/pkg/nsx/services/dns"
2734
)
2835

2936
var (
@@ -38,6 +45,7 @@ type ServiceLbReconciler struct {
3845
Client client.Client
3946
Scheme *apimachineryruntime.Scheme
4047
Service *servicecommon.Service
48+
DNS dns.DNSRecordProvider
4149
Recorder record.EventRecorder
4250
}
4351

@@ -63,25 +71,54 @@ func (r *ServiceLbReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
6371
if err := r.Client.Get(ctx, req.NamespacedName, service); err != nil {
6472
if apierrors.IsNotFound(err) {
6573
log.Info("Not found LB service", "req", req.NamespacedName)
66-
return ResultNormal, client.IgnoreNotFound(err)
74+
if _, delErr := r.DNS.DeleteRecordByOwnerNN(ctx, dns.ResourceKindService, req.Namespace, req.Name); delErr != nil {
75+
log.Error(delErr, "Failed to delete DNS records for Service", "Namespace", req.Namespace, "Name", req.Name)
76+
return common.ResultRequeueAfter10sec, delErr
77+
}
78+
return ResultNormal, nil
6779
}
6880
log.Error(err, "Failed to fetch LB service", "req", req.NamespacedName)
6981
return common.ResultRequeueAfter10sec, err
7082
}
7183

72-
if service.Spec.Type == v1.ServiceTypeLoadBalancer {
73-
log.Info("Reconciling LB service", "LBService", req.NamespacedName)
74-
log.Debug("Reconciling LB Service", "name", service.Name, "version", service.ResourceVersion, "status", service.Status)
75-
metrics.CounterInc(r.Service.NSXConfig, metrics.ControllerSyncTotal, MetricResType)
84+
if service.Spec.Type != v1.ServiceTypeLoadBalancer || !service.ObjectMeta.DeletionTimestamp.IsZero() {
85+
// Try to delete DNS records for Service when it is not a LoadBalancer or is marked for deletion
86+
if _, err := r.DNS.DeleteRecordByOwnerNN(ctx, dns.ResourceKindService, service.Namespace, service.Name); err != nil {
87+
log.Error(err, "Failed to delete DNS records for Service", "Namespace", service.Namespace, "Name", service.Name)
88+
return common.ResultRequeueAfter10sec, err
89+
}
90+
if uerr := r.removeServiceDNSReadyCondition(ctx, req.NamespacedName); uerr != nil {
91+
log.Error(uerr, "Failed to clear Service DNS Ready condition", "Service", req.NamespacedName.String())
92+
return common.ResultRequeueAfter10sec, uerr
93+
}
94+
return ResultNormal, nil
95+
}
7696

77-
if service.ObjectMeta.DeletionTimestamp.IsZero() {
78-
metrics.CounterInc(r.Service.NSXConfig, metrics.ControllerUpdateTotal, MetricResType)
79-
err := updateSuccess(r, ctx, service)
80-
if err != nil {
81-
log.Error(err, "Failed to update LB service", "Name", service.Name, "Namespace", service.Namespace)
82-
return common.ResultRequeueAfter10sec, err
83-
}
97+
if isServiceDNSSkipped(service.GetAnnotations()) {
98+
log.Info("Skipping DNS reconcile for LB service due to annotation", "Service", req.NamespacedName)
99+
if _, err := r.DNS.DeleteRecordByOwnerNN(ctx, dns.ResourceKindService, service.Namespace, service.Name); err != nil {
100+
log.Error(err, "Failed to delete DNS records for skipped Service", "Service", req.NamespacedName)
101+
return common.ResultRequeueAfter10sec, err
84102
}
103+
if uerr := r.removeServiceDNSReadyCondition(ctx, req.NamespacedName); uerr != nil {
104+
log.Error(uerr, "Failed to clear Service DNS Ready condition for skipped Service", "Service", req.NamespacedName.String())
105+
return common.ResultRequeueAfter10sec, uerr
106+
}
107+
return ResultNormal, nil
108+
}
109+
110+
log.Info("Reconciling LB service", "LBService", req.NamespacedName)
111+
log.Debug("Reconciling LB Service", "name", service.Name, "version", service.ResourceVersion, "status", service.Status)
112+
metrics.CounterInc(r.Service.NSXConfig, metrics.ControllerSyncTotal, MetricResType)
113+
114+
if err := r.reconcileLoadBalancerServiceDNS(ctx, service); err != nil {
115+
log.Error(err, "Failed to reconcile DNS for LoadBalancer Service", "Name", service.Name, "Namespace", service.Namespace)
116+
return common.ResultRequeueAfter10sec, err
117+
}
118+
119+
if err := updateSuccess(r, ctx, service); err != nil {
120+
log.Error(err, "Failed to update LB service", "Name", service.Name, "Namespace", service.Namespace)
121+
return common.ResultRequeueAfter10sec, err
85122
}
86123

87124
return ResultNormal, nil
@@ -119,13 +156,18 @@ func (r *ServiceLbReconciler) setServiceLbStatus(ctx context.Context, lbService
119156
}
120157

121158
func (r *ServiceLbReconciler) setupWithManager(mgr ctrl.Manager) error {
122-
return ctrl.NewControllerManagedBy(mgr).
159+
b := ctrl.NewControllerManagedBy(mgr).
123160
For(&v1.Service{}).
161+
Watches(
162+
&v1alpha1.NetworkInfo{},
163+
handler.EnqueueRequestsFromMapFunc(r.enqueueLBServiceRequestsFromNetworkInfo),
164+
builder.WithPredicates(predicateNetworkInfoAllowedDNSDomainsChanged()),
165+
).
124166
WithOptions(
125167
controller.Options{
126168
MaxConcurrentReconciles: common.NumReconcile(),
127-
}).
128-
Complete(r)
169+
})
170+
return b.Complete(r)
129171
}
130172

131173
// Start setup manager
@@ -172,18 +214,82 @@ func (r *ServiceLbReconciler) StartController(mgr ctrl.Manager, _ webhook.Server
172214
log.Error(err, "Failed to create controller", "controller", "ServiceLb")
173215
return err
174216
}
217+
go common.GenericGarbageCollector(make(chan bool), servicecommon.GCInterval, r.CollectGarbage)
175218
return nil
176219
}
177220

221+
// isServiceDNSSkipped reports whether the Service has opted out of DNS management via the skip annotation.
222+
func isServiceDNSSkipped(annotations map[string]string) bool {
223+
_, ok := annotations[servicecommon.AnnotationsDNSSkip]
224+
return ok
225+
}
226+
227+
// listLoadBalancerServicesWithDNSAnnotation returns Service NNs that should retain DNS rows (LB, not terminating, hostname annotation).
228+
func listLoadBalancerServicesWithDNSAnnotation(ctx context.Context, c client.Client) (sets.Set[types.NamespacedName], error) {
229+
svcList := &v1.ServiceList{}
230+
if err := c.List(ctx, svcList); err != nil {
231+
return nil, err
232+
}
233+
nnSet := sets.New[types.NamespacedName]()
234+
for i := range svcList.Items {
235+
svc := &svcList.Items[i]
236+
if svc.Spec.Type != v1.ServiceTypeLoadBalancer || !svc.ObjectMeta.DeletionTimestamp.IsZero() {
237+
continue
238+
}
239+
if isServiceDNSSkipped(svc.GetAnnotations()) {
240+
continue
241+
}
242+
if len(parseDNSHostnamesFromServiceAnnotation(svc.GetAnnotations())) == 0 {
243+
continue
244+
}
245+
nnSet.Insert(types.NamespacedName{Namespace: svc.Namespace, Name: svc.Name})
246+
}
247+
return nnSet, nil
248+
}
249+
178250
func (r *ServiceLbReconciler) CollectGarbage(ctx context.Context) error {
251+
if r.DNS == nil {
252+
return nil
253+
}
254+
apiSet, err := listLoadBalancerServicesWithDNSAnnotation(ctx, r.Client)
255+
if err != nil {
256+
log.Error(err, "Service LB GC: failed to list Services")
257+
return err
258+
}
259+
ownersByKind := r.DNS.ListRecordOwnerResource()
260+
cachedServices := ownersByKind[dns.ResourceKindService]
261+
var errs []error
262+
for nn := range cachedServices {
263+
if apiSet.Has(nn) {
264+
continue
265+
}
266+
if _, err := r.DNS.DeleteRecordByOwnerNN(ctx, dns.ResourceKindService, nn.Namespace, nn.Name); err != nil {
267+
log.Error(err, "Service LB GC: failed to delete DNS records for Service owner missing from API or no longer eligible",
268+
"Namespace", nn.Namespace, "Name", nn.Name)
269+
errs = append(errs, err)
270+
continue
271+
}
272+
if err := r.removeServiceDNSReadyCondition(ctx, nn); err != nil {
273+
log.Error(err, "Service LB GC: failed to clear Service DNS Ready condition", "Namespace", nn.Namespace, "Name", nn.Name)
274+
errs = append(errs, err)
275+
}
276+
}
277+
if len(errs) > 0 {
278+
return fmt.Errorf("service LB garbage collection encountered %d error(s): %v", len(errs), errs)
279+
}
179280
return nil
180281
}
181282

182-
func NewServiceLbReconciler(mgr ctrl.Manager, commonService servicecommon.Service) *ServiceLbReconciler {
283+
func NewServiceLbReconciler(mgr ctrl.Manager, commonService servicecommon.Service, dnsRecordService *dns.DNSRecordService) *ServiceLbReconciler {
183284
if isServiceLbStatusIpModeSupported(mgr.GetConfig()) {
285+
var dnsProv dns.DNSRecordProvider
286+
if dnsRecordService != nil {
287+
dnsProv = dnsRecordService
288+
}
184289
serviceLbReconciler := &ServiceLbReconciler{
185290
Client: mgr.GetClient(),
186291
Scheme: mgr.GetScheme(),
292+
DNS: dnsProv,
187293
Recorder: mgr.GetEventRecorderFor("serviceLb-controller"), //nolint:staticcheck // record.EventRecorder; StatusUpdater not on events.EventRecorder yet
188294
}
189295
serviceLbReconciler.Service = &commonService

0 commit comments

Comments
 (0)