Skip to content

Commit 9c256af

Browse files
committed
add AggregatePodMetadata to track metadata of pod and its owners, add runtime risk tracking
1 parent 0c1dea6 commit 9c256af

5 files changed

Lines changed: 160 additions & 13 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
*~
22
/deployment-tracker
3+
.idea/

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ deployment records to GitHub's artifact metadata API.
2323
- **Real-time tracking**: Sends deployment records when pods are
2424
created or deleted
2525
- **Graceful shutdown**: Properly drains work queue before terminating
26+
- **Runtime risks**: Track runtime risks through annotations
2627

2728
## How It Works
2829

@@ -82,6 +83,17 @@ The `DN_TEMPLATE` supports the following placeholders:
8283
- `{{deploymentName}}` - Name of the owning Deployment
8384
- `{{containerName}}` - Container name
8485

86+
## Runtime Risks
87+
88+
You can track runtime risks through annotations. Add the annotation `github.com/runtime-risks`, with a comma-separated list of supported runtime risk values. Annotations are aggregated from the pod and its owner reference objects.
89+
90+
Currently supported runtime risks:
91+
- `critical-resource`
92+
- `lateral-movement`
93+
- `internet-exposed`
94+
- `sensitive-data`
95+
96+
8597
## Kubernetes Deployment
8698

8799
A complete deployment manifest is provided in `deploy/manifest.yaml`

deploy/manifest.yaml

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,12 @@ rules:
1717
- apiGroups: [""]
1818
resources: ["pods"]
1919
verbs: ["get", "list", "watch"]
20-
- apiGroups: ["apps"]
21-
resources: ["deployments"]
22-
verbs: ["get"]
20+
- apiGroups: [ "apps" ]
21+
resources: [ "deployments" ]
22+
verbs: [ "get" ]
23+
- apiGroups: [ "apps" ]
24+
resources: [ "replicasets" ]
25+
verbs: [ "get" ]
2326
---
2427
apiVersion: rbac.authorization.k8s.io/v1
2528
kind: ClusterRoleBinding
@@ -66,7 +69,7 @@ spec:
6669
- name: PHYSICAL_ENVIRONMENT
6770
value: "iad-moda1"
6871
- name: CLUSTER
69-
value: "kommendorkapten"
72+
value: "test-cluster"
7073
- name: BASE_URL
7174
value: "http://artifact-registry.artifact-registry.svc.cluster.local:9090"
7275
resources:

internal/controller/controller.go

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ const (
2929
EventCreated = "CREATED"
3030
// EventDeleted indicates that a pod has been deleted.
3131
EventDeleted = "DELETED"
32+
// RuntimeRiskAnnotationKey represents the annotation key for runtime risks.
33+
RuntimeRiskAnnotationKey = "github.com/runtime-risks"
3234
)
3335

3436
// PodEvent represents a pod event to be processed.
@@ -38,6 +40,11 @@ type PodEvent struct {
3840
DeletedPod *corev1.Pod // Only populated for delete events
3941
}
4042

43+
// AggregatePodMetadata represents combined metadata for a pod and its ownership hierarchy.
44+
type AggregatePodMetadata struct {
45+
RuntimeRisks map[deploymentrecord.RuntimeRisk]bool
46+
}
47+
4148
// Controller is the Kubernetes controller for tracking deployments.
4249
type Controller struct {
4350
clientset kubernetes.Interface
@@ -414,6 +421,15 @@ func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, conta
414421
// Extract image name and tag
415422
imageName, version := image.ExtractName(container.Image)
416423

424+
// Gather aggregate metadata
425+
metadata := c.aggregateMetadata(ctx, pod)
426+
var runtimeRisks []deploymentrecord.RuntimeRisk
427+
if status != deploymentrecord.StatusDecommissioned {
428+
for risk := range metadata.RuntimeRisks {
429+
runtimeRisks = append(runtimeRisks, risk)
430+
}
431+
}
432+
417433
// Create deployment record
418434
record := deploymentrecord.NewDeploymentRecord(
419435
imageName,
@@ -424,6 +440,7 @@ func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, conta
424440
c.cfg.Cluster,
425441
status,
426442
dn,
443+
runtimeRisks,
427444
)
428445

429446
if err := c.apiClient.PostOne(ctx, record); err != nil {
@@ -457,6 +474,7 @@ func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, conta
457474
"name", record.Name,
458475
"deployment_name", record.DeploymentName,
459476
"status", record.Status,
477+
"runtime_risks", record.RuntimeRisks,
460478
"digest", record.Digest,
461479
)
462480

@@ -473,6 +491,82 @@ func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, conta
473491
return nil
474492
}
475493

494+
// aggregateRuntimeRisks aggregates metadata for a pod and its owners.
495+
func (c *Controller) aggregateMetadata(ctx context.Context, obj metav1.Object) AggregatePodMetadata {
496+
metadata := AggregatePodMetadata{
497+
RuntimeRisks: make(map[deploymentrecord.RuntimeRisk]bool),
498+
}
499+
visited := make(map[string]bool)
500+
501+
getMetadataFromObject(obj, metadata)
502+
c.getMetadataFromOwners(ctx, obj, metadata, visited)
503+
504+
return metadata
505+
}
506+
507+
// collectRuntimeRisksFromOwners recursively collects metadata from owner references
508+
// in the ownership chain
509+
// Visited map prevents infinite recursion on circular ownership references.
510+
func (c *Controller) getMetadataFromOwners(ctx context.Context, obj metav1.Object, metadata AggregatePodMetadata, visited map[string]bool) {
511+
ownerRefs := obj.GetOwnerReferences()
512+
513+
for _, owner := range ownerRefs {
514+
ownerKey := fmt.Sprintf("%s/%s/%s", obj.GetNamespace(), owner.Kind, owner.Name)
515+
if visited[ownerKey] {
516+
slog.Debug("Cycle detected in ownership chain, skipping",
517+
"namespace", obj.GetNamespace(),
518+
"owner_kind", owner.Kind,
519+
"owner_name", owner.Name,
520+
)
521+
continue
522+
}
523+
visited[ownerKey] = true
524+
525+
ownerObj, err := c.getOwnerObject(ctx, obj.GetNamespace(), owner)
526+
if err != nil {
527+
slog.Warn("Failed to get owner object for metadata collection",
528+
"namespace", obj.GetNamespace(),
529+
"owner_kind", owner.Kind,
530+
"owner_name", owner.Name,
531+
"error", err,
532+
)
533+
continue
534+
}
535+
536+
if ownerObj == nil {
537+
continue
538+
}
539+
540+
getMetadataFromObject(ownerObj, metadata)
541+
c.getMetadataFromOwners(ctx, ownerObj, metadata, visited)
542+
}
543+
}
544+
545+
// getOwnerObject retrieves the owner object based on its kind, namespace, and name.
546+
func (c *Controller) getOwnerObject(ctx context.Context, namespace string, owner metav1.OwnerReference) (metav1.Object, error) {
547+
switch owner.Kind {
548+
case "ReplicaSet":
549+
rs, err := c.clientset.AppsV1().ReplicaSets(namespace).Get(ctx, owner.Name, metav1.GetOptions{})
550+
if err != nil {
551+
return nil, err
552+
}
553+
return rs, nil
554+
case "Deployment":
555+
deployment, err := c.clientset.AppsV1().Deployments(namespace).Get(ctx, owner.Name, metav1.GetOptions{})
556+
if err != nil {
557+
return nil, err
558+
}
559+
return deployment, nil
560+
default:
561+
// Unsupported kinds
562+
slog.Debug("Unsupported owner kind for runtime risk collection",
563+
"kind", owner.Kind,
564+
"name", owner.Name,
565+
)
566+
return nil, nil
567+
}
568+
}
569+
476570
func getCacheKey(dn, digest string) string {
477571
return dn + "||" + digest
478572
}
@@ -580,3 +674,16 @@ func getDeploymentName(pod *corev1.Pod) string {
580674
}
581675
return ""
582676
}
677+
678+
// getMetadataFromObject extracts metadata from an object.
679+
func getMetadataFromObject(obj metav1.Object, metadata AggregatePodMetadata) {
680+
annotations := obj.GetAnnotations()
681+
if risks, exists := annotations[RuntimeRiskAnnotationKey]; exists {
682+
for _, risk := range strings.Split(risks, ",") {
683+
r := deploymentrecord.ValidateRuntimeRisk(strings.TrimSpace(risk))
684+
if r != "" {
685+
metadata.RuntimeRisks[r] = true
686+
}
687+
}
688+
}
689+
}

pkg/deploymentrecord/record.go

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,35 @@ const (
66
StatusDecommissioned = "decommissioned"
77
)
88

9+
// Runtime risks for deployment records.
10+
type RuntimeRisk string
11+
12+
const (
13+
CriticalResource RuntimeRisk = "critical-resource"
14+
InternetExposed RuntimeRisk = "internet-exposed"
15+
LateralMovement RuntimeRisk = "lateral-movement"
16+
SensitiveData RuntimeRisk = "sensitive-data"
17+
)
18+
919
// DeploymentRecord represents a deployment event record.
1020
type DeploymentRecord struct {
11-
Name string `json:"name"`
12-
Digest string `json:"digest"`
13-
Version string `json:"version,omitempty"`
14-
LogicalEnvironment string `json:"logical_environment"`
15-
PhysicalEnvironment string `json:"physical_environment"`
16-
Cluster string `json:"cluster"`
17-
Status string `json:"status"`
18-
DeploymentName string `json:"deployment_name"`
21+
Name string `json:"name"`
22+
Digest string `json:"digest"`
23+
Version string `json:"version,omitempty"`
24+
LogicalEnvironment string `json:"logical_environment"`
25+
PhysicalEnvironment string `json:"physical_environment"`
26+
Cluster string `json:"cluster"`
27+
Status string `json:"status"`
28+
DeploymentName string `json:"deployment_name"`
29+
RuntimeRisks []RuntimeRisk `json:"runtime_risks,omitempty"`
1930
}
2031

2132
// NewDeploymentRecord creates a new DeploymentRecord with the given status.
2233
// Status must be either StatusDeployed or StatusDecommissioned.
2334
//
2435
//nolint:revive
2536
func NewDeploymentRecord(name, digest, version, logicalEnv, physicalEnv,
26-
cluster, status, deploymentName string) *DeploymentRecord {
37+
cluster, status, deploymentName string, runtimeRisks []RuntimeRisk) *DeploymentRecord {
2738
// Validate status
2839
if status != StatusDeployed && status != StatusDecommissioned {
2940
status = StatusDeployed // default to deployed if invalid
@@ -38,5 +49,18 @@ func NewDeploymentRecord(name, digest, version, logicalEnv, physicalEnv,
3849
Cluster: cluster,
3950
Status: status,
4051
DeploymentName: deploymentName,
52+
RuntimeRisks: runtimeRisks,
53+
}
54+
}
55+
56+
// ValidateRuntimeRisk confirms is string is a valid runtime risk,
57+
// then returns the canonical runtime risk constant if valid, empty string otherwise.
58+
func ValidateRuntimeRisk(risk string) RuntimeRisk {
59+
r := RuntimeRisk(risk)
60+
switch r {
61+
case CriticalResource, InternetExposed, LateralMovement, SensitiveData:
62+
return r
63+
default:
64+
return ""
4165
}
4266
}

0 commit comments

Comments
 (0)