@@ -86,6 +86,7 @@ type VirtualMCPServerReconciler struct {
8686 PlatformDetector * ctrlutil.SharedPlatformDetector
8787
8888 // healthStatusCache caches vmcp health endpoint responses to reduce HTTP overhead
89+ // Initialized in SetupWithManager before reconciliation starts (controller-runtime contract)
8990 healthStatusCache map [string ]* healthStatusCacheEntry
9091 healthStatusCacheMutex sync.RWMutex
9192}
@@ -205,14 +206,9 @@ func (r *VirtualMCPServerReconciler) Reconcile(ctx context.Context, req ctrl.Req
205206 return ctrl.Result {}, err
206207 }
207208
208- // Apply discovered backends to latestVMCP so updateVirtualMCPServerStatus can use them
209- // for phase determination. The statusManager has the updated backends from discoverBackends,
210- // but they haven't been applied to the CR yet.
211- if discoveredBackends != nil {
212- latestVMCP .Status .DiscoveredBackends = discoveredBackends
213- }
214-
215209 // Update status based on pod health using the latest Generation
210+ // Note: updateVirtualMCPServerStatus uses statusManager.GetDiscoveredBackends()
211+ // for phase determination, so discovered backends don't need to be applied here
216212 if err := r .updateVirtualMCPServerStatus (ctx , latestVMCP , statusManager ); err != nil {
217213 ctxLogger .Error (err , "Failed to update VirtualMCPServer status" )
218214 return ctrl.Result {}, err
@@ -231,16 +227,13 @@ func (r *VirtualMCPServerReconciler) Reconcile(ctx context.Context, req ctrl.Req
231227 if vmcp .Spec .Operational != nil && vmcp .Spec .Operational .FailureHandling != nil &&
232228 vmcp .Spec .Operational .FailureHandling .HealthCheckInterval != "" {
233229 // Parse health check interval to determine requeue time
234- // Note: We parse the duration string on each reconciliation rather than caching
235- // because time.ParseDuration is extremely fast (<1μs) and reconciliation frequency
236- // is already throttled (typically every 10s). Caching would add unnecessary complexity.
237230 interval , err := time .ParseDuration (vmcp .Spec .Operational .FailureHandling .HealthCheckInterval )
238231 if err != nil {
239232 // Invalid duration format - log warning and fall through to event-driven reconciliation
240233 // This should be caught by webhook validation, but we handle it gracefully here
241- ctxLogger .Error (err , "Invalid HealthCheckInterval format, health monitoring disabled " ,
234+ ctxLogger .Error (err , "Invalid HealthCheckInterval format, falling back to event-driven reconciliation " ,
242235 "health_check_interval" , vmcp .Spec .Operational .FailureHandling .HealthCheckInterval )
243- // Continue with event-driven reconciliation instead of periodic
236+ // Continue with event-driven reconciliation instead of periodic polling
244237 } else {
245238 // Requeue at a multiple of the health check interval to ensure we catch updates
246239 // without reconciling too frequently
@@ -1715,7 +1708,7 @@ func (r *VirtualMCPServerReconciler) discoverBackends(
17151708 // Query vmcp health status and update backend statuses if health monitoring is enabled
17161709 // This provides real MCP health check results instead of just Pod/Phase status
17171710 //
1718- // Performance: Health status responses are cached with a short TTL (10s) to reduce HTTP
1711+ // Performance: Health status responses are cached with healthStatusCacheTTL to reduce HTTP
17191712 // overhead from frequent reconciliations while maintaining relatively fresh health data.
17201713 // The vmcp health endpoint itself returns cached results from periodic health checks.
17211714 if vmcp .Status .URL != "" {
@@ -1764,7 +1757,14 @@ func (r *VirtualMCPServerReconciler) discoverBackends(
17641757 }
17651758 }
17661759
1760+ // Update LastHealthCheck with actual health check timestamp from vmcp
1761+ // Do this BEFORE the shouldPreserveUnavailable check so timestamp is always fresh
1762+ if ! healthInfo .LastCheckTime .IsZero () {
1763+ discoveredBackends [i ].LastHealthCheck = metav1 .NewTime (healthInfo .LastCheckTime )
1764+ }
1765+
17671766 if shouldPreserveUnavailable {
1767+ // Skip status update but keep timestamp fresh (already updated above)
17681768 continue
17691769 }
17701770
@@ -1777,11 +1777,6 @@ func (r *VirtualMCPServerReconciler) discoverBackends(
17771777 "health_status" , healthInfo .Status )
17781778 discoveredBackends [i ].Status = newStatus
17791779 }
1780-
1781- // Update LastHealthCheck with actual health check timestamp from vmcp
1782- if ! healthInfo .LastCheckTime .IsZero () {
1783- discoveredBackends [i ].LastHealthCheck = metav1 .NewTime (healthInfo .LastCheckTime )
1784- }
17851780 }
17861781 }
17871782 } else {
@@ -1861,6 +1856,9 @@ func (phaseChangePredicate) Update(e event.UpdateEvent) bool {
18611856 }
18621857 }
18631858
1859+ // Return false for any other type. This should never happen in practice because
1860+ // this predicate is only registered for MCPServer and MCPRemoteProxy watches
1861+ // in SetupWithManager(). The controller-runtime framework guarantees type safety.
18641862 return false
18651863}
18661864
@@ -2379,8 +2377,8 @@ func (r *VirtualMCPServerReconciler) queryVMCPHealthStatus(
23792377
23802378 // Create HTTP client with derived timeout
23812379 // Note: Uses default transport which validates TLS certificates.
2382- // If the vmcp server uses self-signed certificates, ensure proper cert configuration
2383- // (e.g., via cert-manager) or use HTTP for internal cluster communication .
2380+ // For self-signed certificates, use proper certificate management (e.g., cert-manager)
2381+ // to establish trust. Disabling TLS validation or using HTTP is not recommended .
23842382 httpClient := & http.Client {
23852383 Timeout : timeout ,
23862384 }
0 commit comments