Skip to content

Commit 1f92f3a

Browse files
committed
changes from review
1 parent 0e3e761 commit 1f92f3a

File tree

3 files changed

+69
-23
lines changed

3 files changed

+69
-23
lines changed

cmd/thv-operator/controllers/virtualmcpserver_controller.go

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1668,12 +1668,12 @@ func (r *VirtualMCPServerReconciler) discoverBackends(
16681668

16691669
for i := range discoveredBackends {
16701670
backend := &discoveredBackends[i]
1671-
if healthStat, found := healthStatus[backend.Name]; found {
1671+
if healthInfo, found := healthStatus[backend.Name]; found {
16721672
// Map vmcp health status to CRD backend status
16731673
// vmcp statuses: healthy, unhealthy, degraded, unknown
16741674
// CRD statuses: ready, unavailable, degraded, unknown
16751675
var newStatus string
1676-
switch healthStat {
1676+
switch healthInfo.Status {
16771677
case "healthy":
16781678
newStatus = mcpv1alpha1.BackendStatusReady
16791679
case "unhealthy":
@@ -1687,15 +1687,20 @@ func (r *VirtualMCPServerReconciler) discoverBackends(
16871687
continue
16881688
}
16891689

1690-
// Only log if status changed
1690+
// Update status if changed
16911691
if newStatus != backend.Status {
16921692
ctxLogger.V(1).Info("Backend health check updated status",
16931693
"name", backend.Name,
16941694
"old_status", backend.Status,
16951695
"new_status", newStatus,
1696-
"health_status", healthStat)
1696+
"health_status", healthInfo.Status)
16971697
backend.Status = newStatus
16981698
}
1699+
1700+
// Update LastHealthCheck with actual health check timestamp from vmcp
1701+
if !healthInfo.LastCheckTime.IsZero() {
1702+
backend.LastHealthCheck = metav1.NewTime(healthInfo.LastCheckTime)
1703+
}
16991704
}
17001705
}
17011706
} else {
@@ -2145,6 +2150,12 @@ func (*VirtualMCPServerReconciler) vmcpReferencesCompositeToolDefinition(
21452150
return false
21462151
}
21472152

2153+
// BackendHealthInfo contains health information for a single backend
2154+
type BackendHealthInfo struct {
2155+
Status string
2156+
LastCheckTime time.Time
2157+
}
2158+
21482159
// BackendHealthStatusResponse represents the health status response from the vmcp health API
21492160
type BackendHealthStatusResponse struct {
21502161
Backends []struct {
@@ -2157,12 +2168,12 @@ type BackendHealthStatusResponse struct {
21572168
} `json:"backends"`
21582169
}
21592170

2160-
// queryVMCPHealthStatus queries the vmcp health endpoint and returns backend health status.
2171+
// queryVMCPHealthStatus queries the vmcp health endpoint and returns backend health information.
21612172
// Returns nil if health monitoring is not enabled or if there's an error.
21622173
func (*VirtualMCPServerReconciler) queryVMCPHealthStatus(
21632174
ctx context.Context,
21642175
vmcpURL string,
2165-
) map[string]string {
2176+
) map[string]*BackendHealthInfo {
21662177
ctxLogger := log.FromContext(ctx)
21672178

21682179
// Construct health endpoint URL
@@ -2208,10 +2219,13 @@ func (*VirtualMCPServerReconciler) queryVMCPHealthStatus(
22082219
return nil
22092220
}
22102221

2211-
// Convert to map of backendID -> status
2212-
healthStatus := make(map[string]string)
2222+
// Convert to map of backendID -> health info
2223+
healthStatus := make(map[string]*BackendHealthInfo)
22132224
for _, backend := range healthResp.Backends {
2214-
healthStatus[backend.BackendID] = backend.Status
2225+
healthStatus[backend.BackendID] = &BackendHealthInfo{
2226+
Status: backend.Status,
2227+
LastCheckTime: backend.LastCheckTime,
2228+
}
22152229
}
22162230

22172231
ctxLogger.V(1).Info("Retrieved health status from vmcp server",

test/e2e/thv-operator/virtualmcp/helpers.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,44 @@ func WaitForVirtualMCPServerReady(
7272
}, timeout, pollingInterval).Should(gomega.Succeed())
7373
}
7474

75+
// WaitForVirtualMCPServerDeployed waits for a VirtualMCPServer deployment to be running
76+
// without requiring the Ready condition to be True. This is useful for health monitoring tests
77+
// where some backends may intentionally be unhealthy.
78+
func WaitForVirtualMCPServerDeployed(
79+
ctx context.Context,
80+
c client.Client,
81+
name, namespace string,
82+
timeout time.Duration,
83+
pollingInterval time.Duration,
84+
) {
85+
vmcpServer := &mcpv1alpha1.VirtualMCPServer{}
86+
87+
gomega.Eventually(func() error {
88+
if err := c.Get(ctx, types.NamespacedName{
89+
Name: name,
90+
Namespace: namespace,
91+
}, vmcpServer); err != nil {
92+
return err
93+
}
94+
95+
// Check that the VirtualMCPServer has a URL (indicating it's been deployed)
96+
if vmcpServer.Status.URL == "" {
97+
return fmt.Errorf("VirtualMCPServer URL not set yet")
98+
}
99+
100+
// Check that pods are running (but not necessarily all backends healthy)
101+
labels := map[string]string{
102+
"app.kubernetes.io/name": "vmcp",
103+
"app.kubernetes.io/instance": name,
104+
}
105+
if err := checkPodsReady(ctx, c, namespace, labels); err != nil {
106+
return fmt.Errorf("VirtualMCPServer pods not ready: %w", err)
107+
}
108+
109+
return nil
110+
}, timeout, pollingInterval).Should(gomega.Succeed())
111+
}
112+
75113
// checkPodsReady checks if all pods matching the given labels are ready
76114
func checkPodsReady(ctx context.Context, c client.Client, namespace string, labels map[string]string) error {
77115
podList := &corev1.PodList{}

test/e2e/thv-operator/virtualmcp/virtualmcp_health_monitoring_test.go

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,10 @@ var _ = Describe("VirtualMCPServer Health Monitoring", Ordered, func() {
121121
}
122122
Expect(k8sClient.Create(ctx, vmcpServer)).To(Succeed())
123123

124-
By("Waiting for VirtualMCPServer to be ready")
125-
WaitForVirtualMCPServerReady(ctx, k8sClient, vmcpServerName, testNamespace, timeout, pollingInterval)
124+
By("Waiting for VirtualMCPServer to be deployed")
125+
// Use WaitForVirtualMCPServerDeployed instead of WaitForVirtualMCPServerReady
126+
// because we intentionally have an unhealthy backend, so Ready will be False
127+
WaitForVirtualMCPServerDeployed(ctx, k8sClient, vmcpServerName, testNamespace, timeout, pollingInterval)
126128
})
127129

128130
AfterAll(func() {
@@ -179,9 +181,7 @@ var _ = Describe("VirtualMCPServer Health Monitoring", Ordered, func() {
179181
})
180182

181183
It("should report healthy status for working backends", func() {
182-
// Wait for health checks to run (at least 2 intervals to stabilize)
183-
time.Sleep(15 * time.Second)
184-
184+
// Eventually will poll until health checks stabilize
185185
vmcpServer := &mcpv1alpha1.VirtualMCPServer{}
186186
Eventually(func() error {
187187
if err := k8sClient.Get(ctx, types.NamespacedName{
@@ -215,9 +215,8 @@ var _ = Describe("VirtualMCPServer Health Monitoring", Ordered, func() {
215215
})
216216

217217
It("should report unhealthy status for failing backend", func() {
218-
// Wait for unhealthy threshold to be exceeded (at least unhealthyThreshold * interval)
218+
// Eventually will poll until the unhealthy threshold is exceeded
219219
By(fmt.Sprintf("Waiting for unhealthy backend to fail %d consecutive health checks", unhealthyThreshold))
220-
time.Sleep(time.Duration(unhealthyThreshold+1) * 5 * time.Second)
221220

222221
vmcpServer := &mcpv1alpha1.VirtualMCPServer{}
223222
Eventually(func() error {
@@ -263,10 +262,7 @@ var _ = Describe("VirtualMCPServer Health Monitoring", Ordered, func() {
263262
}
264263
}
265264

266-
// Wait for at least 2 health check intervals
267-
time.Sleep(12 * time.Second)
268-
269-
// Verify timestamps have been updated
265+
// Eventually will poll until timestamps are updated (after at least 2 health check intervals)
270266
Eventually(func() error {
271267
if err := k8sClient.Get(ctx, types.NamespacedName{
272268
Name: vmcpServerName,
@@ -320,9 +316,7 @@ var _ = Describe("VirtualMCPServer Health Monitoring", Ordered, func() {
320316
}, timeout, pollingInterval).Should(Succeed())
321317

322318
By("Waiting for health monitoring to detect recovery")
323-
// Give health monitor time to detect the recovery (a few health check intervals)
324-
time.Sleep(20 * time.Second)
325-
319+
// Eventually will poll until health monitoring detects the recovery
326320
vmcpServer := &mcpv1alpha1.VirtualMCPServer{}
327321
Eventually(func() error {
328322
if err := k8sClient.Get(ctx, types.NamespacedName{

0 commit comments

Comments
 (0)