Skip to content

Commit 29e9b57

Browse files
authored
Merge pull request #6914 from devtron-labs/fix/cluster-cache-list
fix: enhance cluster overview response with raw cluster capacity details and caching support
2 parents 20f5fc8 + 9a8e857 commit 29e9b57

4 files changed

Lines changed: 102 additions & 49 deletions

File tree

api/k8s/capacity/k8sCapacityRestHandler.go

Lines changed: 78 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,13 @@ import (
2121
"errors"
2222
"fmt"
2323
"github.com/devtron-labs/common-lib/utils"
24-
bean2 "github.com/devtron-labs/devtron/pkg/cluster/bean"
24+
clusterBean "github.com/devtron-labs/devtron/pkg/cluster/bean"
2525
"github.com/devtron-labs/devtron/pkg/cluster/environment"
2626
"github.com/devtron-labs/devtron/pkg/cluster/rbac"
2727
"github.com/devtron-labs/devtron/pkg/cluster/read"
2828
bean3 "github.com/devtron-labs/devtron/pkg/k8s/bean"
29+
overviewBean "github.com/devtron-labs/devtron/pkg/overview/bean"
30+
overviewCache "github.com/devtron-labs/devtron/pkg/overview/cache"
2931
"gopkg.in/go-playground/validator.v9"
3032
"net/http"
3133
"strconv"
@@ -53,15 +55,16 @@ type K8sCapacityRestHandler interface {
5355
EditNodeTaints(w http.ResponseWriter, r *http.Request)
5456
}
5557
type K8sCapacityRestHandlerImpl struct {
56-
logger *zap.SugaredLogger
57-
k8sCapacityService capacity.K8sCapacityService
58-
userService user.UserService
59-
enforcer casbin.Enforcer
60-
clusterService cluster.ClusterService
61-
environmentService environment.EnvironmentService
62-
clusterRbacService rbac.ClusterRbacService
63-
clusterReadService read.ClusterReadService
64-
validator *validator.Validate
58+
logger *zap.SugaredLogger
59+
k8sCapacityService capacity.K8sCapacityService
60+
userService user.UserService
61+
enforcer casbin.Enforcer
62+
clusterService cluster.ClusterService
63+
environmentService environment.EnvironmentService
64+
clusterRbacService rbac.ClusterRbacService
65+
clusterReadService read.ClusterReadService
66+
validator *validator.Validate
67+
clusterCacheService overviewCache.ClusterCacheService
6568
}
6669

6770
func NewK8sCapacityRestHandlerImpl(logger *zap.SugaredLogger,
@@ -70,17 +73,21 @@ func NewK8sCapacityRestHandlerImpl(logger *zap.SugaredLogger,
7073
clusterService cluster.ClusterService,
7174
environmentService environment.EnvironmentService,
7275
clusterRbacService rbac.ClusterRbacService,
73-
clusterReadService read.ClusterReadService, validator *validator.Validate) *K8sCapacityRestHandlerImpl {
76+
clusterReadService read.ClusterReadService,
77+
validator *validator.Validate,
78+
clusterCacheService overviewCache.ClusterCacheService,
79+
) *K8sCapacityRestHandlerImpl {
7480
return &K8sCapacityRestHandlerImpl{
75-
logger: logger,
76-
k8sCapacityService: k8sCapacityService,
77-
userService: userService,
78-
enforcer: enforcer,
79-
clusterService: clusterService,
80-
environmentService: environmentService,
81-
clusterRbacService: clusterRbacService,
82-
clusterReadService: clusterReadService,
83-
validator: validator,
81+
logger: logger,
82+
k8sCapacityService: k8sCapacityService,
83+
userService: userService,
84+
enforcer: enforcer,
85+
clusterService: clusterService,
86+
environmentService: environmentService,
87+
clusterRbacService: clusterRbacService,
88+
clusterReadService: clusterReadService,
89+
validator: validator,
90+
clusterCacheService: clusterCacheService,
8491
}
8592
}
8693

@@ -98,7 +105,7 @@ func (handler *K8sCapacityRestHandlerImpl) GetClusterListRaw(w http.ResponseWrit
98105
return
99106
}
100107
// RBAC enforcer applying
101-
var authenticatedClusters []*bean2.ClusterBean
108+
var authenticatedClusters []*clusterBean.ClusterBean
102109
var clusterDetailList []*bean.ClusterCapacityDetail
103110
for _, cluster := range clusters {
104111
authenticated, err := handler.clusterRbacService.CheckAuthorization(cluster.ClusterName, cluster.Id, token, userId, true)
@@ -140,7 +147,7 @@ func (handler *K8sCapacityRestHandlerImpl) GetClusterListWithDetail(w http.Respo
140147
return
141148
}
142149
// RBAC enforcer applying
143-
var authenticatedClusters []*bean2.ClusterBean
150+
var authenticatedClusters []*clusterBean.ClusterBean
144151
for _, cluster := range clusters {
145152
authenticated, err := handler.clusterRbacService.CheckAuthorization(cluster.ClusterName, cluster.Id, token, userId, true)
146153
if err != nil {
@@ -156,11 +163,21 @@ func (handler *K8sCapacityRestHandlerImpl) GetClusterListWithDetail(w http.Respo
156163
common.WriteJsonResp(w, errors.New("unauthorized"), nil, http.StatusForbidden)
157164
return
158165
}
159-
clusterDetailList, err := handler.k8sCapacityService.GetClusterCapacityDetailList(r.Context(), authenticatedClusters)
160-
if err != nil {
161-
handler.logger.Errorw("error in getting cluster capacity detail list", "err", err)
162-
common.WriteJsonResp(w, err, nil, http.StatusInternalServerError)
163-
return
166+
// Try to get data from cache if available
167+
var clusterDetailList []*bean.ClusterCapacityDetail
168+
cachedOverview, cacheFound := handler.clusterCacheService.GetClusterOverview()
169+
if cacheFound {
170+
handler.logger.Infow("serving cluster capacity details from cache", "totalClusters", cachedOverview.TotalClusters)
171+
// Convert ClusterOverviewResponse to RawClusterCapacityDetails list and filter by RBAC
172+
clusterDetailList = handler.filterAuthorizedClusterDetails(cachedOverview, authenticatedClusters)
173+
} else {
174+
handler.logger.Infow("cache not available, fetching cluster capacity details from k8s API")
175+
clusterDetailList, err = handler.k8sCapacityService.GetClusterCapacityDetailList(r.Context(), authenticatedClusters)
176+
if err != nil {
177+
handler.logger.Errorw("error in getting cluster capacity detail list", "err", err)
178+
common.WriteJsonResp(w, err, nil, http.StatusInternalServerError)
179+
return
180+
}
164181
}
165182
common.WriteJsonResp(w, nil, clusterDetailList, http.StatusOK)
166183
}
@@ -473,3 +490,37 @@ func (handler *K8sCapacityRestHandlerImpl) EditNodeTaints(w http.ResponseWriter,
473490
}
474491
common.WriteJsonResp(w, nil, resp, http.StatusOK)
475492
}
493+
494+
// filterAuthorizedClusterDetails converts ClusterOverviewResponse to RawClusterCapacityDetails list
495+
// and filters based on authenticated clusters. It includes:
496+
// 1. Clusters from cache (healthy clusters with capacity data)
497+
// 2. Virtual clusters (from database, not in cache)
498+
// 3. Clusters with connection errors (from database, not in cache)
499+
func (handler *K8sCapacityRestHandlerImpl) filterAuthorizedClusterDetails(
500+
cachedOverview *overviewBean.ClusterOverviewResponse,
501+
authenticatedClusters []*clusterBean.ClusterBean,
502+
) []*bean.ClusterCapacityDetail {
503+
// Create maps for quick lookup
504+
authenticatedClusterIds := make(map[int]bool)
505+
for _, authenticatedCluster := range authenticatedClusters {
506+
authenticatedClusterIds[authenticatedCluster.Id] = true
507+
}
508+
509+
// Authenticated cluster details
510+
clusterDetailList := make([]*bean.ClusterCapacityDetail, 0, len(authenticatedClusters))
511+
512+
// Add clusters from cache
513+
for _, capacityDetail := range cachedOverview.RawClusterCapacityDetails {
514+
// Only include authenticated clusters
515+
if !authenticatedClusterIds[capacityDetail.Id] {
516+
continue
517+
}
518+
}
519+
520+
handler.logger.Debugw("converted and filtered cluster details from cache",
521+
"totalCached", len(cachedOverview.RawClusterCapacityDetails),
522+
"authenticated", len(authenticatedClusters),
523+
"converted", len(clusterDetailList))
524+
525+
return clusterDetailList
526+
}

pkg/overview/ClusterOverviewService.go

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -197,13 +197,14 @@ func (impl *ClusterOverviewServiceImpl) fetchClusterDataParallel(ctx context.Con
197197
})
198198
}
199199

200+
// Create combined cluster bean list (all clusters)
201+
allClusterPointers := make([]*clusterBean.ClusterBean, len(clusters))
202+
for i := range clusters {
203+
allClusterPointers[i] = &clusters[i]
204+
}
200205
// If all clusters have connection errors, return response with error clusters only
201206
if len(validClusters) == 0 {
202207
impl.logger.Warn("All clusters have connection errors, returning response with error clusters only")
203-
allClusterPointers := make([]*clusterBean.ClusterBean, len(clusters))
204-
for i := range clusters {
205-
allClusterPointers[i] = &clusters[i]
206-
}
207208
return impl.aggregateClusterCapacityDetails(ctx, errorClusterDetails, allClusterPointers), nil
208209
}
209210

@@ -262,12 +263,6 @@ func (impl *ClusterOverviewServiceImpl) fetchClusterDataParallel(ctx context.Con
262263
allClusterDetails = append(allClusterDetails, results...)
263264
allClusterDetails = append(allClusterDetails, errorClusterDetails...)
264265

265-
// Create combined cluster bean list (all clusters)
266-
allClusterPointers := make([]*clusterBean.ClusterBean, len(clusters))
267-
for i := range clusters {
268-
allClusterPointers[i] = &clusters[i]
269-
}
270-
271266
// Log summary
272267
successCount := len(results)
273268
failedCount := len(validClusters) - successCount
@@ -398,7 +393,7 @@ func (impl *ClusterOverviewServiceImpl) buildClusterOverviewResponse(ctx context
398393
}
399394
impl.processNodeDetails(cluster, response)
400395
impl.aggregateNodeErrorCounts(cluster, response)
401-
396+
impl.addRawClusterCapacityDetails(cluster, response)
402397
}
403398

404399
impl.finalizeResponse(response, totalCpuCapacityCores, totalMemoryCapacityGi, providerCounts, versionCounts, autoscalerNodeDetailsMap)
@@ -485,6 +480,11 @@ func (impl *ClusterOverviewServiceImpl) addClusterCapacityDistribution(cluster *
485480
))
486481
}
487482

483+
// addClusterCapacityDistribution adds cluster capacity distribution entry to response
484+
func (impl *ClusterOverviewServiceImpl) addRawClusterCapacityDetails(capacity *capacityBean.ClusterCapacityDetail, response *bean.ClusterOverviewResponse) {
485+
response.RawClusterCapacityDetails = append(response.RawClusterCapacityDetails, capacity)
486+
}
487+
488488
// processNodeDistributionAndAutoscaler adds cluster node count to distribution and aggregates autoscaler counts across all clusters
489489
func (impl *ClusterOverviewServiceImpl) processNodeDistributionAndAutoscaler(ctx context.Context, cluster *capacityBean.ClusterCapacityDetail, clusterBean *clusterBean.ClusterBean, response *bean.ClusterOverviewResponse, autoscalerCounts map[string]int, autoscalerNodeDetailsMap map[string][]bean.AutoscalerNodeDetail) {
490490
// Add cluster node count to distribution

pkg/overview/bean/OverviewBean.go

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
package bean
66

77
import (
8+
capacityBean "github.com/devtron-labs/devtron/pkg/k8s/capacity/bean"
89
"time"
910

1011
"github.com/devtron-labs/common-lib/utils"
@@ -254,15 +255,16 @@ type ClusterOverviewRequest struct {
254255

255256
// ClusterOverviewResponse represents the comprehensive cluster management overview
256257
type ClusterOverviewResponse struct {
257-
TotalClusters int `json:"totalClusters"`
258-
TotalCpuCapacity *ResourceCapacity `json:"totalCpuCapacity"`
259-
TotalMemoryCapacity *ResourceCapacity `json:"totalMemoryCapacity"`
260-
ClusterStatusBreakdown *ClusterStatusBreakdown `json:"clusterStatusBreakdown"`
261-
NodeSchedulingBreakdown *NodeSchedulingBreakdown `json:"nodeSchedulingBreakdown"`
262-
NodeErrorBreakdown *NodeErrorBreakdown `json:"nodeErrorBreakdown"`
263-
ClusterDistribution *ClusterDistribution `json:"clusterDistribution"`
264-
ClusterCapacityDistribution []ClusterCapacityDistribution `json:"clusterCapacityDistribution"`
265-
NodeDistribution *NodeDistribution `json:"nodeDistribution"`
258+
TotalClusters int `json:"totalClusters"`
259+
TotalCpuCapacity *ResourceCapacity `json:"totalCpuCapacity"`
260+
TotalMemoryCapacity *ResourceCapacity `json:"totalMemoryCapacity"`
261+
ClusterStatusBreakdown *ClusterStatusBreakdown `json:"clusterStatusBreakdown"`
262+
NodeSchedulingBreakdown *NodeSchedulingBreakdown `json:"nodeSchedulingBreakdown"`
263+
NodeErrorBreakdown *NodeErrorBreakdown `json:"nodeErrorBreakdown"`
264+
ClusterDistribution *ClusterDistribution `json:"clusterDistribution"`
265+
ClusterCapacityDistribution []ClusterCapacityDistribution `json:"clusterCapacityDistribution"`
266+
RawClusterCapacityDetails []*capacityBean.ClusterCapacityDetail `json:"rawClusterCapacityDetails"`
267+
NodeDistribution *NodeDistribution `json:"nodeDistribution"`
266268
}
267269

268270
// ResourceCapacity represents capacity with value and unit

wire_gen.go

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)