Skip to content

Commit f83161c

Browse files
committed
add metadata client, switch to using PartialObjectMetadata
1 parent e1e14b2 commit f83161c

6 files changed

Lines changed: 152 additions & 69 deletions

File tree

README.md

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,7 @@ The `DN_TEMPLATE` supports the following placeholders:
8787

8888
You can track runtime risks through annotations. Add the annotation `github.com/runtime-risks`, with a comma-separated list of supported runtime risk values. Annotations are aggregated from the pod and its owner reference objects.
8989

90-
Currently supported runtime risks:
91-
- `critical-resource`
92-
- `lateral-movement`
93-
- `internet-exposed`
94-
- `sensitive-data`
90+
Currently supported runtime risks can be found in the [Create Deployment Record API docs](https://docs.github.com/en/rest/orgs/artifact-metadata?apiVersion=2022-11-28#create-an-artifact-deployment-record). Invalid runtime risk values will be ignored.
9591

9692

9793
## Kubernetes Deployment
@@ -101,7 +97,7 @@ which includes:
10197

10298
- **Namespace**: `deployment-tracker`
10399
- **ServiceAccount**: Identity for the controller pod
104-
- **ClusterRole**: Minimal permissions (`get`, `list`, `watch` on pods)
100+
- **ClusterRole**: Minimal permissions (`get`, `list`, `watch` on pods; `get` on other supported objects)
105101
- **ClusterRoleBinding**: Binds the ServiceAccount to the ClusterRole
106102
- **Deployment**: Runs the controller with security hardening
107103

cmd/deployment-tracker/main.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"time"
1414

1515
"github.com/github/deployment-tracker/internal/controller"
16+
"k8s.io/client-go/metadata"
1617

1718
"github.com/prometheus/client_golang/prometheus/promhttp"
1819
"k8s.io/client-go/kubernetes"
@@ -112,6 +113,14 @@ func main() {
112113
os.Exit(1)
113114
}
114115

116+
// Create metadata client
117+
metadataClient, err := metadata.NewForConfig(k8sCfg)
118+
if err != nil {
119+
slog.Error("Error creating Kubernetes metadata client",
120+
"error", err)
121+
os.Exit(1)
122+
}
123+
115124
// Start the metrics server
116125
var promSrv = &http.Server{
117126
Addr: ":" + metricsPort,
@@ -151,7 +160,7 @@ func main() {
151160
cancel()
152161
}()
153162

154-
cntrl, err := controller.New(clientset, namespace, excludeNamespaces, &cntrlCfg)
163+
cntrl, err := controller.New(clientset, metadataClient, namespace, excludeNamespaces, &cntrlCfg)
155164
if err != nil {
156165
slog.Error("Failed to create controller",
157166
"error", err)

deploy/manifest.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ rules:
1717
- apiGroups: [""]
1818
resources: ["pods"]
1919
verbs: ["get", "list", "watch"]
20-
- apiGroups: [ "apps" ]
21-
resources: [ "deployments" ]
22-
verbs: [ "get" ]
23-
- apiGroups: [ "apps" ]
24-
resources: [ "replicasets" ]
25-
verbs: [ "get" ]
20+
- apiGroups: ["apps"]
21+
resources: ["deployments"]
22+
verbs: ["get"]
23+
- apiGroups: ["apps"]
24+
resources: ["replicasets"]
25+
verbs: ["get"]
2626
---
2727
apiVersion: rbac.authorization.k8s.io/v1
2828
kind: ClusterRoleBinding

internal/controller/controller.go

Lines changed: 73 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ import (
1212
"github.com/github/deployment-tracker/pkg/deploymentrecord"
1313
"github.com/github/deployment-tracker/pkg/image"
1414
"github.com/github/deployment-tracker/pkg/metrics"
15+
"k8s.io/apimachinery/pkg/runtime/schema"
1516
"k8s.io/apimachinery/pkg/types"
17+
"k8s.io/client-go/metadata"
1618

1719
corev1 "k8s.io/api/core/v1"
1820
k8serrors "k8s.io/apimachinery/pkg/api/errors"
@@ -48,19 +50,20 @@ type AggregatePodMetadata struct {
4850

4951
// Controller is the Kubernetes controller for tracking deployments.
5052
type Controller struct {
51-
clientset kubernetes.Interface
52-
podInformer cache.SharedIndexInformer
53-
workqueue workqueue.TypedRateLimitingInterface[PodEvent]
54-
apiClient *deploymentrecord.Client
55-
cfg *Config
53+
clientset kubernetes.Interface
54+
metadataClient metadata.Interface
55+
podInformer cache.SharedIndexInformer
56+
workqueue workqueue.TypedRateLimitingInterface[PodEvent]
57+
apiClient *deploymentrecord.Client
58+
cfg *Config
5659
// best effort cache to avoid redundant posts
5760
// post requests are idempotent, so if this cache fails due to
5861
// restarts or other events, nothing will break.
5962
observedDeployments sync.Map
6063
}
6164

6265
// New creates a new deployment tracker controller.
63-
func New(clientset kubernetes.Interface, namespace string, excludeNamespaces string, cfg *Config) (*Controller, error) {
66+
func New(clientset kubernetes.Interface, metadataClient metadata.Interface, namespace string, excludeNamespaces string, cfg *Config) (*Controller, error) {
6467
// Create informer factory
6568
factory := createInformerFactory(clientset, namespace, excludeNamespaces)
6669

@@ -92,11 +95,12 @@ func New(clientset kubernetes.Interface, namespace string, excludeNamespaces str
9295
}
9396

9497
cntrl := &Controller{
95-
clientset: clientset,
96-
podInformer: podInformer,
97-
workqueue: queue,
98-
apiClient: apiClient,
99-
cfg: cfg,
98+
clientset: clientset,
99+
metadataClient: metadataClient,
100+
podInformer: podInformer,
101+
workqueue: queue,
102+
apiClient: apiClient,
103+
cfg: cfg,
100104
}
101105

102106
// Add event handlers to the informer
@@ -342,16 +346,25 @@ func (c *Controller) processEvent(ctx context.Context, event PodEvent) error {
342346

343347
var lastErr error
344348

349+
// Gather aggregate metadata for adds/updates
350+
var runtimeRisks []deploymentrecord.RuntimeRisk
351+
if status != deploymentrecord.StatusDecommissioned {
352+
aggMetadata := c.aggregateMetadata(ctx, podToPartialMetadata(pod))
353+
for risk := range aggMetadata.RuntimeRisks {
354+
runtimeRisks = append(runtimeRisks, risk)
355+
}
356+
}
357+
345358
// Record info for each container in the pod
346359
for _, container := range pod.Spec.Containers {
347-
if err := c.recordContainer(ctx, pod, container, status, event.EventType); err != nil {
360+
if err := c.recordContainer(ctx, pod, container, status, event.EventType, runtimeRisks); err != nil {
348361
lastErr = err
349362
}
350363
}
351364

352365
// Also record init containers
353366
for _, container := range pod.Spec.InitContainers {
354-
if err := c.recordContainer(ctx, pod, container, status, event.EventType); err != nil {
367+
if err := c.recordContainer(ctx, pod, container, status, event.EventType, runtimeRisks); err != nil {
355368
lastErr = err
356369
}
357370
}
@@ -379,7 +392,7 @@ func (c *Controller) deploymentExists(ctx context.Context, namespace, name strin
379392
}
380393

381394
// recordContainer records a single container's deployment info.
382-
func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, container corev1.Container, status, eventType string) error {
395+
func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, container corev1.Container, status, eventType string, runtimeRisks []deploymentrecord.RuntimeRisk) error {
383396
dn := getARDeploymentName(pod, container, c.cfg.Template)
384397
digest := getContainerDigest(pod, container.Name)
385398

@@ -422,15 +435,6 @@ func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, conta
422435
// Extract image name and tag
423436
imageName, version := image.ExtractName(container.Image)
424437

425-
// Gather aggregate metadata for adds/updates
426-
var runtimeRisks []deploymentrecord.RuntimeRisk
427-
if status != deploymentrecord.StatusDecommissioned {
428-
metadata := c.aggregateMetadata(ctx, pod)
429-
for risk := range metadata.RuntimeRisks {
430-
runtimeRisks = append(runtimeRisks, risk)
431-
}
432-
}
433-
434438
// Create deployment record
435439
record := deploymentrecord.NewDeploymentRecord(
436440
imageName,
@@ -492,12 +496,12 @@ func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, conta
492496
return nil
493497
}
494498

495-
// aggregateRuntimeRisks aggregates metadata for a pod and its owners.
496-
func (c *Controller) aggregateMetadata(ctx context.Context, obj metav1.Object) AggregatePodMetadata {
497-
metadata := AggregatePodMetadata{
499+
// aggregateMetadata returns aggregated metadata for a pod and its owners.
500+
func (c *Controller) aggregateMetadata(ctx context.Context, obj *metav1.PartialObjectMetadata) AggregatePodMetadata {
501+
aggMetadata := AggregatePodMetadata{
498502
RuntimeRisks: make(map[deploymentrecord.RuntimeRisk]bool),
499503
}
500-
queue := []metav1.Object{obj}
504+
queue := []*metav1.PartialObjectMetadata{obj}
501505
visited := make(map[types.UID]bool)
502506

503507
for len(queue) > 0 {
@@ -513,20 +517,20 @@ func (c *Controller) aggregateMetadata(ctx context.Context, obj metav1.Object) A
513517
}
514518
visited[current.GetUID()] = true
515519

516-
getMetadataFromObject(current, metadata)
520+
extractMetadataFromObject(current, &aggMetadata)
517521
c.addOwnersToQueue(ctx, current, &queue)
518522
}
519523

520-
return metadata
524+
return aggMetadata
521525
}
522526

523-
// collectRuntimeRisksFromOwners takes a current object and looks up its owners, adding them to the queue for processing
527+
// addOwnersToQueue takes a current object and looks up its owners, adding them to the queue for processing
524528
// to collect their metadata.
525-
func (c *Controller) addOwnersToQueue(ctx context.Context, current metav1.Object, queue *[]metav1.Object) {
529+
func (c *Controller) addOwnersToQueue(ctx context.Context, current *metav1.PartialObjectMetadata, queue *[]*metav1.PartialObjectMetadata) {
526530
ownerRefs := current.GetOwnerReferences()
527531

528532
for _, owner := range ownerRefs {
529-
ownerObj, err := c.getOwnerObject(ctx, current.GetNamespace(), owner)
533+
ownerObj, err := c.getOwnerMetadata(ctx, current.GetNamespace(), owner)
530534
if err != nil {
531535
slog.Warn("Failed to get owner object for metadata collection",
532536
"namespace", current.GetNamespace(),
@@ -545,29 +549,39 @@ func (c *Controller) addOwnersToQueue(ctx context.Context, current metav1.Object
545549
}
546550
}
547551

548-
// getOwnerObject retrieves the owner object based on its kind, namespace, and name.
549-
func (c *Controller) getOwnerObject(ctx context.Context, namespace string, owner metav1.OwnerReference) (metav1.Object, error) {
552+
// getOwnerMetadata retrieves partial object metadata for an owner ref.
553+
func (c *Controller) getOwnerMetadata(ctx context.Context, namespace string, owner metav1.OwnerReference) (*metav1.PartialObjectMetadata, error) {
554+
gvr := schema.GroupVersionResource{
555+
Group: "apps",
556+
Version: "v1",
557+
}
558+
550559
switch owner.Kind {
551560
case "ReplicaSet":
552-
rs, err := c.clientset.AppsV1().ReplicaSets(namespace).Get(ctx, owner.Name, metav1.GetOptions{})
553-
if err != nil {
554-
return nil, err
555-
}
556-
return rs, nil
561+
gvr.Resource = "replicasets"
557562
case "Deployment":
558-
deployment, err := c.clientset.AppsV1().Deployments(namespace).Get(ctx, owner.Name, metav1.GetOptions{})
559-
if err != nil {
560-
return nil, err
561-
}
562-
return deployment, nil
563+
gvr.Resource = "deployments"
563564
default:
564-
// Unsupported kinds
565565
slog.Debug("Unsupported owner kind for runtime risk collection",
566566
"kind", owner.Kind,
567567
"name", owner.Name,
568568
)
569569
return nil, nil
570570
}
571+
572+
obj, err := c.metadataClient.Resource(gvr).Namespace(namespace).Get(ctx, owner.Name, metav1.GetOptions{})
573+
if err != nil {
574+
if k8serrors.IsNotFound(err) {
575+
slog.Debug("Owner object not found for metadata collection",
576+
"namespace", namespace,
577+
"owner_kind", owner.Kind,
578+
"owner_name", owner.Name,
579+
)
580+
return nil, nil
581+
}
582+
return nil, err
583+
}
584+
return obj, nil
571585
}
572586

573587
func getCacheKey(dn, digest string) string {
@@ -678,15 +692,25 @@ func getDeploymentName(pod *corev1.Pod) string {
678692
return ""
679693
}
680694

681-
// getMetadataFromObject extracts metadata from an object.
682-
func getMetadataFromObject(obj metav1.Object, metadata AggregatePodMetadata) {
695+
// extractMetadataFromObject extracts metadata from an object.
696+
func extractMetadataFromObject(obj *metav1.PartialObjectMetadata, aggMetadata *AggregatePodMetadata) {
683697
annotations := obj.GetAnnotations()
684698
if risks, exists := annotations[RuntimeRiskAnnotationKey]; exists {
685699
for _, risk := range strings.Split(risks, ",") {
686-
r := deploymentrecord.ValidateRuntimeRisk(strings.TrimSpace(risk))
700+
r := deploymentrecord.ValidateRuntimeRisk(risk)
687701
if r != "" {
688-
metadata.RuntimeRisks[r] = true
702+
aggMetadata.RuntimeRisks[r] = true
689703
}
690704
}
691705
}
692706
}
707+
708+
func podToPartialMetadata(pod *corev1.Pod) *metav1.PartialObjectMetadata {
709+
return &metav1.PartialObjectMetadata{
710+
TypeMeta: metav1.TypeMeta{
711+
APIVersion: "v1",
712+
Kind: "Pod",
713+
},
714+
ObjectMeta: pod.ObjectMeta,
715+
}
716+
}

pkg/deploymentrecord/record.go

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,35 @@
11
package deploymentrecord
22

3+
import (
4+
"log/slog"
5+
"strings"
6+
)
7+
38
// Status constants for deployment records.
49
const (
510
StatusDeployed = "deployed"
611
StatusDecommissioned = "decommissioned"
712
)
813

9-
// Runtime risks for deployment records.
14+
// RuntimeRisk for deployment records.
1015
type RuntimeRisk string
1116

17+
// Valid runtime risks.
1218
const (
1319
CriticalResource RuntimeRisk = "critical-resource"
1420
InternetExposed RuntimeRisk = "internet-exposed"
1521
LateralMovement RuntimeRisk = "lateral-movement"
1622
SensitiveData RuntimeRisk = "sensitive-data"
1723
)
1824

25+
// Map of valid runtime risks.
26+
var validRuntimeRisks = map[RuntimeRisk]bool{
27+
CriticalResource: true,
28+
InternetExposed: true,
29+
LateralMovement: true,
30+
SensitiveData: true,
31+
}
32+
1933
// DeploymentRecord represents a deployment event record.
2034
type DeploymentRecord struct {
2135
Name string `json:"name"`
@@ -53,14 +67,13 @@ func NewDeploymentRecord(name, digest, version, logicalEnv, physicalEnv,
5367
}
5468
}
5569

56-
// ValidateRuntimeRisk confirms is string is a valid runtime risk,
70+
// ValidateRuntimeRisk confirms if string is a valid runtime risk,
5771
// then returns the canonical runtime risk constant if valid, empty string otherwise.
5872
func ValidateRuntimeRisk(risk string) RuntimeRisk {
59-
r := RuntimeRisk(risk)
60-
switch r {
61-
case CriticalResource, InternetExposed, LateralMovement, SensitiveData:
62-
return r
63-
default:
73+
r := RuntimeRisk(strings.ToLower(strings.TrimSpace(risk)))
74+
if !validRuntimeRisks[r] {
75+
slog.Debug("Invalid runtime risk", "risk", risk)
6476
return ""
6577
}
78+
return r
6679
}

0 commit comments

Comments
 (0)