Skip to content

Commit e338118

Browse files
Wait for libvirt connection in hypervisor controller
1 parent 21b8e95 commit e338118

2 files changed

Lines changed: 180 additions & 5 deletions

File tree

internal/controller/hypervisor_controller.go

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ type HypervisorReconciler struct {
5858

5959
// Channel that can be used to trigger reconcile events.
6060
reconcileCh chan event.GenericEvent
61+
62+
// An interval that determines how long to wait between connection attempts
63+
// to libvirt. This is used in the Start method when trying to connect to
64+
// libvirt, and can be set to a lower value for testing purposes.
65+
libvirtConnectInterval time.Duration
6166
}
6267

6368
const (
@@ -195,6 +200,9 @@ func (r *HypervisorReconciler) Reconcile(ctx context.Context, req ctrl.Request)
195200
Message: fmt.Sprintf("unable to connect to libvirtd: %v", err),
196201
Reason: "ConnectFailed",
197202
})
203+
// TODO: When libvirt is down, we should also set the overall Ready
204+
// condition to false, because without libvirt connection, we won't be
205+
// able to detect capacity and other scheduling-relevant details.
198206
} else {
199207
// We're connected.
200208
meta.SetStatusCondition(&hypervisor.Status.Conditions, metav1.Condition{
@@ -326,10 +334,44 @@ func (r *HypervisorReconciler) Start(ctx context.Context) error {
326334
log := logger.FromContext(ctx, "controller", "hypervisor")
327335
log.Info("starting libvirt event subscription")
328336

329-
// Ensure we're connected to libvirt.
330-
if err := r.Libvirt.Connect(); err != nil {
337+
// Get the hypervisor we will reconcile.
338+
var hypervisor kvmv1.Hypervisor
339+
key := client.ObjectKey{Name: sys.Hostname} // Cluster-scoped
340+
if err := r.Get(ctx, key, &hypervisor); err != nil {
341+
return fmt.Errorf("unable to get hypervisor: %w", err)
342+
}
343+
344+
// Block until we're connected to libvirt.
345+
for {
346+
// Exit if the context is done, e.g. when the manager is shutting down.
347+
if ctx.Err() != nil {
348+
return fmt.Errorf("context done while trying to connect to libvirt: %w", ctx.Err())
349+
}
350+
err := r.Libvirt.Connect()
351+
if err == nil {
352+
log.Info("connected to libvirt")
353+
break // Connected successfully
354+
}
331355
log.Error(err, "unable to connect to libvirt")
332-
return err
356+
// Set the hypervisor's LibVirtType condition to false with the
357+
// error message, so that it's visible in the status.
358+
meta.SetStatusCondition(&hypervisor.Status.Conditions, metav1.Condition{
359+
Type: LibVirtType, // TODO: This should be a kvmv1 condition.
360+
Status: metav1.ConditionFalse,
361+
Message: fmt.Sprintf("unable to connect to libvirt: %v", err),
362+
Reason: "ConnectFailed",
363+
})
364+
patch := client.MergeFromWithOptions(hypervisor.DeepCopy(), client.MergeFromWithOptimisticLock{})
365+
if err := r.Status().Patch(ctx, &hypervisor, patch); err != nil {
366+
log.Error(err, "unable to update hypervisor status after failed libvirt connection")
367+
}
368+
log.Info("updated hypervisor status after failed libvirt connection")
369+
timeToSleep := r.libvirtConnectInterval
370+
if timeToSleep == 0 {
371+
timeToSleep = 5 * time.Second // default value
372+
}
373+
log.Info("retrying libvirt connection after sleeping", "duration", timeToSleep)
374+
time.Sleep(timeToSleep)
333375
}
334376

335377
// Run a ticker which reconciles the hypervisor resource every minute.

internal/controller/hypervisor_controller_test.go

Lines changed: 135 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,26 @@ var _ = Describe("Hypervisor Controller", func() {
4545
Context("When testing Start method", func() {
4646
It("should successfully start and subscribe to libvirt events", func() {
4747
ctx := context.Background()
48+
49+
// Create a hypervisor resource for this test
50+
hypervisorName := "start-success-test-hypervisor"
51+
originalHostname := sys.Hostname
52+
sys.Hostname = hypervisorName
53+
defer func() {
54+
sys.Hostname = originalHostname
55+
}()
56+
57+
hypervisor := &kvmv1.Hypervisor{
58+
ObjectMeta: metav1.ObjectMeta{
59+
Name: hypervisorName,
60+
},
61+
}
62+
Expect(k8sClient.Create(ctx, hypervisor)).To(Succeed())
63+
defer func() {
64+
err := k8sClient.Delete(ctx, hypervisor)
65+
Expect(err).NotTo(HaveOccurred())
66+
}()
67+
4868
eventCallbackCalled := false
4969

5070
controllerReconciler := &HypervisorReconciler{
@@ -68,7 +88,27 @@ var _ = Describe("Hypervisor Controller", func() {
6888
})
6989

7090
It("should fail when libvirt connection fails", func() {
71-
ctx := context.Background()
91+
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
92+
defer cancel()
93+
94+
// Create a hypervisor resource for this test
95+
hypervisorName := "start-fail-test-hypervisor"
96+
originalHostname := sys.Hostname
97+
sys.Hostname = hypervisorName
98+
defer func() {
99+
sys.Hostname = originalHostname
100+
}()
101+
102+
hypervisor := &kvmv1.Hypervisor{
103+
ObjectMeta: metav1.ObjectMeta{
104+
Name: hypervisorName,
105+
},
106+
}
107+
Expect(k8sClient.Create(context.Background(), hypervisor)).To(Succeed())
108+
defer func() {
109+
err := k8sClient.Delete(context.Background(), hypervisor)
110+
Expect(err).NotTo(HaveOccurred())
111+
}()
72112

73113
controllerReconciler := &HypervisorReconciler{
74114
Client: k8sClient,
@@ -78,12 +118,105 @@ var _ = Describe("Hypervisor Controller", func() {
78118
return errors.New("connection failed")
79119
},
80120
},
121+
reconcileCh: make(chan event.GenericEvent, 1),
122+
libvirtConnectInterval: 10 * time.Millisecond,
123+
}
124+
125+
// Start runs in a goroutine so we can cancel the context
126+
done := make(chan error, 1)
127+
go func() {
128+
done <- controllerReconciler.Start(ctx)
129+
}()
130+
131+
// Wait for either completion or context cancellation
132+
select {
133+
case <-ctx.Done():
134+
// Context was cancelled, which is expected since the Start method
135+
// retries indefinitely until connected. The test passes because
136+
// we verified the connection fails and retries.
137+
case err := <-done:
138+
// If Start returns, it should be due to context cancellation
139+
Expect(err).To(HaveOccurred())
140+
Expect(err.Error()).To(ContainSubstring("context done while trying to connect to libvirt"))
141+
}
142+
})
143+
144+
It("should fail when hypervisor resource does not exist", func() {
145+
ctx := context.Background()
146+
147+
// Set hostname to a non-existent hypervisor
148+
originalHostname := sys.Hostname
149+
sys.Hostname = "non-existent-hypervisor"
150+
defer func() {
151+
sys.Hostname = originalHostname
152+
}()
153+
154+
controllerReconciler := &HypervisorReconciler{
155+
Client: k8sClient,
156+
Scheme: k8sClient.Scheme(),
157+
Libvirt: &libvirt.InterfaceMock{
158+
ConnectFunc: func() error {
159+
return nil
160+
},
161+
},
81162
reconcileCh: make(chan event.GenericEvent, 1),
82163
}
83164

84165
err := controllerReconciler.Start(ctx)
85166
Expect(err).To(HaveOccurred())
86-
Expect(err.Error()).To(ContainSubstring("connection failed"))
167+
Expect(err.Error()).To(ContainSubstring("unable to get hypervisor"))
168+
})
169+
170+
It("should retry libvirt connection and succeed after initial failures", func() {
171+
ctx := context.Background()
172+
173+
// Create a hypervisor resource for this test
174+
hypervisorName := "start-retry-test-hypervisor"
175+
originalHostname := sys.Hostname
176+
sys.Hostname = hypervisorName
177+
defer func() {
178+
sys.Hostname = originalHostname
179+
}()
180+
181+
hypervisor := &kvmv1.Hypervisor{
182+
ObjectMeta: metav1.ObjectMeta{
183+
Name: hypervisorName,
184+
},
185+
}
186+
Expect(k8sClient.Create(ctx, hypervisor)).To(Succeed())
187+
defer func() {
188+
err := k8sClient.Delete(ctx, hypervisor)
189+
Expect(err).NotTo(HaveOccurred())
190+
}()
191+
192+
// Track connection attempts
193+
connectAttempts := 0
194+
eventCallbackCalled := false
195+
196+
controllerReconciler := &HypervisorReconciler{
197+
Client: k8sClient,
198+
Scheme: k8sClient.Scheme(),
199+
Libvirt: &libvirt.InterfaceMock{
200+
ConnectFunc: func() error {
201+
connectAttempts++
202+
// Fail first 2 attempts, succeed on 3rd
203+
if connectAttempts < 3 {
204+
return errors.New("connection failed")
205+
}
206+
return nil
207+
},
208+
WatchDomainChangesFunc: func(eventId golibvirt.DomainEventID, handlerId string, handler func(context.Context, any)) {
209+
eventCallbackCalled = true
210+
},
211+
},
212+
reconcileCh: make(chan event.GenericEvent, 1),
213+
libvirtConnectInterval: 10 * time.Millisecond, // Use short interval for fast test
214+
}
215+
216+
err := controllerReconciler.Start(ctx)
217+
Expect(err).NotTo(HaveOccurred())
218+
Expect(connectAttempts).To(Equal(3))
219+
Expect(eventCallbackCalled).To(BeTrue())
87220
})
88221
})
89222

0 commit comments

Comments
 (0)