Skip to content

Commit b0add68

Browse files
Update the chace semantics. (#39)
The current logic is wrong. If the controller is restarted, deployments that happened before it started will never be deleted. This PR changes the logic to only check for existing entries to prevent duplicate calls, not require an entry to actually trigger a delete operation.
1 parent 3793af8 commit b0add68

File tree

3 files changed

+35
-21
lines changed

3 files changed

+35
-21
lines changed

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,16 @@ module github.com/github/deployment-tracker
33
go 1.25.4
44

55
require (
6+
github.com/bradleyfalzon/ghinstallation/v2 v2.17.0
67
github.com/prometheus/client_golang v1.23.2
8+
golang.org/x/time v0.14.0
79
k8s.io/api v0.35.0
810
k8s.io/apimachinery v0.35.0
911
k8s.io/client-go v0.35.0
1012
)
1113

1214
require (
1315
github.com/beorn7/perks v1.0.1 // indirect
14-
github.com/bradleyfalzon/ghinstallation/v2 v2.17.0 // indirect
1516
github.com/cespare/xxhash/v2 v2.3.0 // indirect
1617
github.com/davecgh/go-spew v1.1.1 // indirect
1718
github.com/emicklei/go-restful/v3 v3.12.2 // indirect
@@ -45,7 +46,6 @@ require (
4546
golang.org/x/sys v0.38.0 // indirect
4647
golang.org/x/term v0.37.0 // indirect
4748
golang.org/x/text v0.31.0 // indirect
48-
golang.org/x/time v0.14.0 // indirect
4949
google.golang.org/protobuf v1.36.8 // indirect
5050
gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
5151
gopkg.in/inf.v0 v0.9.1 // indirect

go.sum

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,6 @@ golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU=
118118
golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254=
119119
golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
120120
golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
121-
golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY=
122-
golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
123121
golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=
124122
golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
125123
golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=

internal/controller/controller.go

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@ import (
77
"log/slog"
88
"slices"
99
"strings"
10-
"sync"
1110
"time"
1211

1312
"github.com/github/deployment-tracker/pkg/deploymentrecord"
1413
"github.com/github/deployment-tracker/pkg/image"
1514
"github.com/github/deployment-tracker/pkg/metrics"
1615
"k8s.io/apimachinery/pkg/runtime/schema"
1716
"k8s.io/apimachinery/pkg/types"
17+
amcache "k8s.io/apimachinery/pkg/util/cache"
1818
"k8s.io/client-go/metadata"
1919

2020
corev1 "k8s.io/api/core/v1"
@@ -37,6 +37,12 @@ const (
3737
RuntimeRiskAnnotationKey = "github.com/runtime-risks"
3838
)
3939

40+
type ttlCache interface {
41+
Get(k any) (any, bool)
42+
Set(k any, v any, ttl time.Duration)
43+
Delete(k any)
44+
}
45+
4046
// PodEvent represents a pod event to be processed.
4147
type PodEvent struct {
4248
Key string
@@ -60,7 +66,7 @@ type Controller struct {
6066
// best effort cache to avoid redundant posts
6167
// post requests are idempotent, so if this cache fails due to
6268
// restarts or other events, nothing will break.
63-
observedDeployments sync.Map
69+
observedDeployments ttlCache
6470
}
6571

6672
// New creates a new deployment tracker controller.
@@ -96,12 +102,13 @@ func New(clientset kubernetes.Interface, metadataClient metadata.Interface, name
96102
}
97103

98104
cntrl := &Controller{
99-
clientset: clientset,
100-
metadataClient: metadataClient,
101-
podInformer: podInformer,
102-
workqueue: queue,
103-
apiClient: apiClient,
104-
cfg: cfg,
105+
clientset: clientset,
106+
metadataClient: metadataClient,
107+
podInformer: podInformer,
108+
workqueue: queue,
109+
apiClient: apiClient,
110+
cfg: cfg,
111+
observedDeployments: amcache.NewExpiring(),
105112
}
106113

107114
// Add event handlers to the informer
@@ -395,6 +402,8 @@ func (c *Controller) deploymentExists(ctx context.Context, namespace, name strin
395402

396403
// recordContainer records a single container's deployment info.
397404
func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, container corev1.Container, status, eventType string, runtimeRisks []deploymentrecord.RuntimeRisk) error {
405+
var cacheKey string
406+
398407
dn := getARDeploymentName(pod, container, c.cfg.Template)
399408
digest := getContainerDigest(pod, container.Name)
400409

@@ -409,22 +418,21 @@ func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, conta
409418
return nil
410419
}
411420

412-
cacheKey := getCacheKey(dn, digest)
413-
414421
// Check if we've already recorded this deployment
415422
switch status {
416423
case deploymentrecord.StatusDeployed:
417-
if _, exists := c.observedDeployments.Load(cacheKey); exists {
424+
cacheKey = getCacheKey(EventCreated, dn, digest)
425+
if _, exists := c.observedDeployments.Get(cacheKey); exists {
418426
slog.Debug("Deployment already observed, skipping post",
419427
"deployment_name", dn,
420428
"digest", digest,
421429
)
422430
return nil
423431
}
424432
case deploymentrecord.StatusDecommissioned:
425-
// For delete, check if we've seen it - if not, no need to decommission
426-
if _, exists := c.observedDeployments.Load(cacheKey); !exists {
427-
slog.Debug("Deployment not in cache, skipping decommission",
433+
cacheKey = getCacheKey(EventDeleted, dn, digest)
434+
if _, exists := c.observedDeployments.Get(cacheKey); exists {
435+
slog.Debug("Deployment already deleted, skipping post",
428436
"deployment_name", dn,
429437
"digest", digest,
430438
)
@@ -488,8 +496,16 @@ func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, conta
488496
// Update cache after successful post
489497
switch status {
490498
case deploymentrecord.StatusDeployed:
491-
c.observedDeployments.Store(cacheKey, true)
499+
cacheKey = getCacheKey(EventCreated, dn, digest)
500+
c.observedDeployments.Set(cacheKey, true, 2*time.Minute)
501+
// If there was a previous delete event, remove that
502+
cacheKey = getCacheKey(EventDeleted, dn, digest)
503+
c.observedDeployments.Delete(cacheKey)
492504
case deploymentrecord.StatusDecommissioned:
505+
cacheKey = getCacheKey(EventDeleted, dn, digest)
506+
c.observedDeployments.Set(cacheKey, true, 2*time.Minute)
507+
// If there was a previous created event, remove that
508+
cacheKey = getCacheKey(EventCreated, dn, digest)
493509
c.observedDeployments.Delete(cacheKey)
494510
default:
495511
return fmt.Errorf("invalid status: %s", status)
@@ -586,8 +602,8 @@ func (c *Controller) getOwnerMetadata(ctx context.Context, namespace string, own
586602
return obj, nil
587603
}
588604

589-
func getCacheKey(dn, digest string) string {
590-
return dn + "||" + digest
605+
func getCacheKey(ev, dn, digest string) string {
606+
return ev + "||" + dn + "||" + digest
591607
}
592608

593609
// createInformerFactory creates a shared informer factory with the given resync period.

0 commit comments

Comments
 (0)