|
7 | 7 | "encoding/json" |
8 | 8 | "fmt" |
9 | 9 | "maps" |
| 10 | + "net/http" |
10 | 11 | "reflect" |
11 | 12 | "strings" |
12 | 13 | "time" |
@@ -1656,6 +1657,53 @@ func (r *VirtualMCPServerReconciler) discoverBackends( |
1656 | 1657 | "authConfigRef", authConfigRef) |
1657 | 1658 | } |
1658 | 1659 |
|
| 1660 | + // Query vmcp health status and update backend statuses if health monitoring is enabled |
| 1661 | + // This provides real MCP health check results instead of just Pod/Phase status |
| 1662 | + if vmcp.Status.URL != "" { |
| 1663 | + healthStatus := r.queryVMCPHealthStatus(ctx, vmcp.Status.URL) |
| 1664 | + if healthStatus != nil { |
| 1665 | + ctxLogger.V(1).Info("Updating backend status from vmcp health checks", |
| 1666 | + "vmcp_url", vmcp.Status.URL, |
| 1667 | + "backend_count", len(healthStatus)) |
| 1668 | + |
| 1669 | + for i := range discoveredBackends { |
| 1670 | + backend := &discoveredBackends[i] |
| 1671 | + if healthStat, found := healthStatus[backend.Name]; found { |
| 1672 | + // Map vmcp health status to CRD backend status |
| 1673 | + // vmcp statuses: healthy, unhealthy, degraded, unknown |
| 1674 | + // CRD statuses: ready, unavailable, degraded, unknown |
| 1675 | + var newStatus string |
| 1676 | + switch healthStat { |
| 1677 | + case "healthy": |
| 1678 | + newStatus = mcpv1alpha1.BackendStatusReady |
| 1679 | + case "unhealthy": |
| 1680 | + newStatus = mcpv1alpha1.BackendStatusUnavailable |
| 1681 | + case "degraded": |
| 1682 | + newStatus = mcpv1alpha1.BackendStatusDegraded |
| 1683 | + case "unknown": |
| 1684 | + newStatus = mcpv1alpha1.BackendStatusUnknown |
| 1685 | + default: |
| 1686 | + // Keep existing status if health status is unexpected |
| 1687 | + continue |
| 1688 | + } |
| 1689 | + |
| 1690 | + // Only log if status changed |
| 1691 | + if newStatus != backend.Status { |
| 1692 | + ctxLogger.V(1).Info("Backend health check updated status", |
| 1693 | + "name", backend.Name, |
| 1694 | + "old_status", backend.Status, |
| 1695 | + "new_status", newStatus, |
| 1696 | + "health_status", healthStat) |
| 1697 | + backend.Status = newStatus |
| 1698 | + } |
| 1699 | + } |
| 1700 | + } |
| 1701 | + } else { |
| 1702 | + ctxLogger.V(1).Info("Health monitoring not enabled or failed to query vmcp health endpoint", |
| 1703 | + "vmcp_url", vmcp.Status.URL) |
| 1704 | + } |
| 1705 | + } |
| 1706 | + |
1659 | 1707 | return discoveredBackends, nil |
1660 | 1708 | } |
1661 | 1709 |
|
@@ -2096,3 +2144,79 @@ func (*VirtualMCPServerReconciler) vmcpReferencesCompositeToolDefinition( |
2096 | 2144 |
|
2097 | 2145 | return false |
2098 | 2146 | } |
| 2147 | + |
| 2148 | +// BackendHealthStatusResponse represents the health status response from the vmcp health API |
| 2149 | +type BackendHealthStatusResponse struct { |
| 2150 | + Backends []struct { |
| 2151 | + BackendID string `json:"backendId"` |
| 2152 | + Status string `json:"status"` |
| 2153 | + ConsecutiveFailures int `json:"consecutiveFailures"` |
| 2154 | + LastCheckTime time.Time `json:"lastCheckTime"` |
| 2155 | + LastError string `json:"lastError,omitempty"` |
| 2156 | + LastTransitionTime time.Time `json:"lastTransitionTime"` |
| 2157 | + } `json:"backends"` |
| 2158 | +} |
| 2159 | + |
| 2160 | +// queryVMCPHealthStatus queries the vmcp health endpoint and returns backend health status. |
| 2161 | +// Returns nil if health monitoring is not enabled or if there's an error. |
| 2162 | +func (*VirtualMCPServerReconciler) queryVMCPHealthStatus( |
| 2163 | + ctx context.Context, |
| 2164 | + vmcpURL string, |
| 2165 | +) map[string]string { |
| 2166 | + ctxLogger := log.FromContext(ctx) |
| 2167 | + |
| 2168 | + // Construct health endpoint URL |
| 2169 | + healthURL := fmt.Sprintf("%s/api/backends/health", vmcpURL) |
| 2170 | + |
| 2171 | + // Create HTTP client with timeout |
| 2172 | + httpClient := &http.Client{ |
| 2173 | + Timeout: 5 * time.Second, |
| 2174 | + } |
| 2175 | + |
| 2176 | + // Create and execute request |
| 2177 | + req, err := http.NewRequestWithContext(ctx, http.MethodGet, healthURL, nil) |
| 2178 | + if err != nil { |
| 2179 | + ctxLogger.V(1).Error(err, "Failed to create health check request", "url", healthURL) |
| 2180 | + return nil |
| 2181 | + } |
| 2182 | + |
| 2183 | + resp, err := httpClient.Do(req) |
| 2184 | + if err != nil { |
| 2185 | + ctxLogger.V(1).Error(err, "Failed to query vmcp health endpoint", "url", healthURL) |
| 2186 | + return nil |
| 2187 | + } |
| 2188 | + defer resp.Body.Close() |
| 2189 | + |
| 2190 | + // Check status code |
| 2191 | + if resp.StatusCode == http.StatusServiceUnavailable { |
| 2192 | + // Health monitoring is not enabled on the vmcp server |
| 2193 | + ctxLogger.V(1).Info("Health monitoring not enabled on vmcp server", "url", healthURL) |
| 2194 | + return nil |
| 2195 | + } |
| 2196 | + |
| 2197 | + if resp.StatusCode != http.StatusOK { |
| 2198 | + ctxLogger.V(1).Info("Unexpected status code from vmcp health endpoint", |
| 2199 | + "url", healthURL, |
| 2200 | + "status_code", resp.StatusCode) |
| 2201 | + return nil |
| 2202 | + } |
| 2203 | + |
| 2204 | + // Parse response |
| 2205 | + var healthResp BackendHealthStatusResponse |
| 2206 | + if err := json.NewDecoder(resp.Body).Decode(&healthResp); err != nil { |
| 2207 | + ctxLogger.V(1).Error(err, "Failed to decode health response", "url", healthURL) |
| 2208 | + return nil |
| 2209 | + } |
| 2210 | + |
| 2211 | + // Convert to map of backendID -> status |
| 2212 | + healthStatus := make(map[string]string) |
| 2213 | + for _, backend := range healthResp.Backends { |
| 2214 | + healthStatus[backend.BackendID] = backend.Status |
| 2215 | + } |
| 2216 | + |
| 2217 | + ctxLogger.V(1).Info("Retrieved health status from vmcp server", |
| 2218 | + "url", healthURL, |
| 2219 | + "backend_count", len(healthStatus)) |
| 2220 | + |
| 2221 | + return healthStatus |
| 2222 | +} |
0 commit comments