Skip to content

Commit 3d3b4e2

Browse files
committed
changes from review
1 parent 6a2a382 commit 3d3b4e2

File tree

3 files changed

+69
-23
lines changed

3 files changed

+69
-23
lines changed

cmd/thv-operator/controllers/virtualmcpserver_controller.go

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1672,12 +1672,12 @@ func (r *VirtualMCPServerReconciler) discoverBackends(
16721672

16731673
for i := range discoveredBackends {
16741674
backend := &discoveredBackends[i]
1675-
if healthStat, found := healthStatus[backend.Name]; found {
1675+
if healthInfo, found := healthStatus[backend.Name]; found {
16761676
// Map vmcp health status to CRD backend status
16771677
// vmcp statuses: healthy, unhealthy, degraded, unknown
16781678
// CRD statuses: ready, unavailable, degraded, unknown
16791679
var newStatus string
1680-
switch healthStat {
1680+
switch healthInfo.Status {
16811681
case "healthy":
16821682
newStatus = mcpv1alpha1.BackendStatusReady
16831683
case "unhealthy":
@@ -1691,15 +1691,20 @@ func (r *VirtualMCPServerReconciler) discoverBackends(
16911691
continue
16921692
}
16931693

1694-
// Only log if status changed
1694+
// Update status if changed
16951695
if newStatus != backend.Status {
16961696
ctxLogger.V(1).Info("Backend health check updated status",
16971697
"name", backend.Name,
16981698
"old_status", backend.Status,
16991699
"new_status", newStatus,
1700-
"health_status", healthStat)
1700+
"health_status", healthInfo.Status)
17011701
backend.Status = newStatus
17021702
}
1703+
1704+
// Update LastHealthCheck with actual health check timestamp from vmcp
1705+
if !healthInfo.LastCheckTime.IsZero() {
1706+
backend.LastHealthCheck = metav1.NewTime(healthInfo.LastCheckTime)
1707+
}
17031708
}
17041709
}
17051710
} else {
@@ -2149,6 +2154,12 @@ func (*VirtualMCPServerReconciler) vmcpReferencesCompositeToolDefinition(
21492154
return false
21502155
}
21512156

2157+
// BackendHealthInfo contains health information for a single backend
2158+
type BackendHealthInfo struct {
2159+
Status string
2160+
LastCheckTime time.Time
2161+
}
2162+
21522163
// BackendHealthStatusResponse represents the health status response from the vmcp health API
21532164
type BackendHealthStatusResponse struct {
21542165
Backends []struct {
@@ -2161,12 +2172,12 @@ type BackendHealthStatusResponse struct {
21612172
} `json:"backends"`
21622173
}
21632174

2164-
// queryVMCPHealthStatus queries the vmcp health endpoint and returns backend health status.
2175+
// queryVMCPHealthStatus queries the vmcp health endpoint and returns backend health information.
21652176
// Returns nil if health monitoring is not enabled or if there's an error.
21662177
func (*VirtualMCPServerReconciler) queryVMCPHealthStatus(
21672178
ctx context.Context,
21682179
vmcpURL string,
2169-
) map[string]string {
2180+
) map[string]*BackendHealthInfo {
21702181
ctxLogger := log.FromContext(ctx)
21712182

21722183
// Construct health endpoint URL
@@ -2212,10 +2223,13 @@ func (*VirtualMCPServerReconciler) queryVMCPHealthStatus(
22122223
return nil
22132224
}
22142225

2215-
// Convert to map of backendID -> status
2216-
healthStatus := make(map[string]string)
2226+
// Convert to map of backendID -> health info
2227+
healthStatus := make(map[string]*BackendHealthInfo)
22172228
for _, backend := range healthResp.Backends {
2218-
healthStatus[backend.BackendID] = backend.Status
2229+
healthStatus[backend.BackendID] = &BackendHealthInfo{
2230+
Status: backend.Status,
2231+
LastCheckTime: backend.LastCheckTime,
2232+
}
22192233
}
22202234

22212235
ctxLogger.V(1).Info("Retrieved health status from vmcp server",

test/e2e/thv-operator/virtualmcp/helpers.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,44 @@ func WaitForVirtualMCPServerReady(
7272
}, timeout, pollingInterval).Should(gomega.Succeed())
7373
}
7474

75+
// WaitForVirtualMCPServerDeployed waits for a VirtualMCPServer deployment to be running
76+
// without requiring the Ready condition to be True. This is useful for health monitoring tests
77+
// where some backends may intentionally be unhealthy.
78+
func WaitForVirtualMCPServerDeployed(
79+
ctx context.Context,
80+
c client.Client,
81+
name, namespace string,
82+
timeout time.Duration,
83+
pollingInterval time.Duration,
84+
) {
85+
vmcpServer := &mcpv1alpha1.VirtualMCPServer{}
86+
87+
gomega.Eventually(func() error {
88+
if err := c.Get(ctx, types.NamespacedName{
89+
Name: name,
90+
Namespace: namespace,
91+
}, vmcpServer); err != nil {
92+
return err
93+
}
94+
95+
// Check that the VirtualMCPServer has a URL (indicating it's been deployed)
96+
if vmcpServer.Status.URL == "" {
97+
return fmt.Errorf("VirtualMCPServer URL not set yet")
98+
}
99+
100+
// Check that pods are running (but not necessarily all backends healthy)
101+
labels := map[string]string{
102+
"app.kubernetes.io/name": "vmcp",
103+
"app.kubernetes.io/instance": name,
104+
}
105+
if err := checkPodsReady(ctx, c, namespace, labels); err != nil {
106+
return fmt.Errorf("VirtualMCPServer pods not ready: %w", err)
107+
}
108+
109+
return nil
110+
}, timeout, pollingInterval).Should(gomega.Succeed())
111+
}
112+
75113
// checkPodsReady checks if all pods matching the given labels are ready
76114
func checkPodsReady(ctx context.Context, c client.Client, namespace string, labels map[string]string) error {
77115
podList := &corev1.PodList{}

test/e2e/thv-operator/virtualmcp/virtualmcp_health_monitoring_test.go

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,10 @@ var _ = Describe("VirtualMCPServer Health Monitoring", Ordered, func() {
121121
}
122122
Expect(k8sClient.Create(ctx, vmcpServer)).To(Succeed())
123123

124-
By("Waiting for VirtualMCPServer to be ready")
125-
WaitForVirtualMCPServerReady(ctx, k8sClient, vmcpServerName, testNamespace, timeout, pollingInterval)
124+
By("Waiting for VirtualMCPServer to be deployed")
125+
// Use WaitForVirtualMCPServerDeployed instead of WaitForVirtualMCPServerReady
126+
// because we intentionally have an unhealthy backend, so Ready will be False
127+
WaitForVirtualMCPServerDeployed(ctx, k8sClient, vmcpServerName, testNamespace, timeout, pollingInterval)
126128
})
127129

128130
AfterAll(func() {
@@ -179,9 +181,7 @@ var _ = Describe("VirtualMCPServer Health Monitoring", Ordered, func() {
179181
})
180182

181183
It("should report healthy status for working backends", func() {
182-
// Wait for health checks to run (at least 2 intervals to stabilize)
183-
time.Sleep(15 * time.Second)
184-
184+
// Eventually will poll until health checks stabilize
185185
vmcpServer := &mcpv1alpha1.VirtualMCPServer{}
186186
Eventually(func() error {
187187
if err := k8sClient.Get(ctx, types.NamespacedName{
@@ -215,9 +215,8 @@ var _ = Describe("VirtualMCPServer Health Monitoring", Ordered, func() {
215215
})
216216

217217
It("should report unhealthy status for failing backend", func() {
218-
// Wait for unhealthy threshold to be exceeded (at least unhealthyThreshold * interval)
218+
// Eventually will poll until the unhealthy threshold is exceeded
219219
By(fmt.Sprintf("Waiting for unhealthy backend to fail %d consecutive health checks", unhealthyThreshold))
220-
time.Sleep(time.Duration(unhealthyThreshold+1) * 5 * time.Second)
221220

222221
vmcpServer := &mcpv1alpha1.VirtualMCPServer{}
223222
Eventually(func() error {
@@ -263,10 +262,7 @@ var _ = Describe("VirtualMCPServer Health Monitoring", Ordered, func() {
263262
}
264263
}
265264

266-
// Wait for at least 2 health check intervals
267-
time.Sleep(12 * time.Second)
268-
269-
// Verify timestamps have been updated
265+
// Eventually will poll until timestamps are updated (after at least 2 health check intervals)
270266
Eventually(func() error {
271267
if err := k8sClient.Get(ctx, types.NamespacedName{
272268
Name: vmcpServerName,
@@ -320,9 +316,7 @@ var _ = Describe("VirtualMCPServer Health Monitoring", Ordered, func() {
320316
}, timeout, pollingInterval).Should(Succeed())
321317

322318
By("Waiting for health monitoring to detect recovery")
323-
// Give health monitor time to detect the recovery (a few health check intervals)
324-
time.Sleep(20 * time.Second)
325-
319+
// Eventually will poll until health monitoring detects the recovery
326320
vmcpServer := &mcpv1alpha1.VirtualMCPServer{}
327321
Eventually(func() error {
328322
if err := k8sClient.Get(ctx, types.NamespacedName{

0 commit comments

Comments
 (0)