Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 32 additions & 23 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ type options struct {
datadogGenericResourceEnabled bool
datadogCSIDriverEnabled bool
untaintControllerEnabled bool
untaintControllerWaitForCSIDriver bool

// Secret Backend options
secretBackendCommand string
Expand Down Expand Up @@ -188,6 +189,8 @@ func (opts *options) Parse() {
flag.BoolVar(&opts.datadogGenericResourceEnabled, "datadogGenericResourceEnabled", false, "Enable the DatadogGenericResource controller")
flag.BoolVar(&opts.datadogCSIDriverEnabled, "datadogCSIDriverEnabled", false, "Enable the DatadogCSIDriver controller")
flag.BoolVar(&opts.untaintControllerEnabled, "untaintControllerEnabled", false, "Enable the Untaint controller")
flag.BoolVar(&opts.untaintControllerWaitForCSIDriver, "untaintControllerWaitForCSIDriver", false,
"When true (requires --untaintControllerEnabled), the Untaint controller removes the startup taint only after both the node Agent and Datadog CSI node-server pods are Ready. Requires Pod watch coverage of CSI namespaces (DD_CSIDRIVER_WATCH_NAMESPACE).")

// DatadogAgentInternal
flag.BoolVar(&opts.createControllerRevisions, "createControllerRevisions", false, "Enable creation of ControllerRevision snapshots on each DDA spec change")
Expand Down Expand Up @@ -235,6 +238,10 @@ func run(opts *options) error {
}
version.PrintVersionLogs(setupLog)

if opts.untaintControllerWaitForCSIDriver && !opts.untaintControllerEnabled {
return setupErrorf(setupLog, fmt.Errorf("invalid flags"), "--untaintControllerWaitForCSIDriver requires --untaintControllerEnabled=true")
}

// submits the maximum go routine setting as a metric
metrics.MaxGoroutines.Set(float64(opts.maximumGoroutines))

Expand Down Expand Up @@ -287,15 +294,16 @@ func run(opts *options) error {
RenewDeadline: &renewDeadline,
RetryPeriod: &retryPeriod,
Cache: config.CacheOptions(setupLog, config.WatchOptions{
DatadogAgentEnabled: opts.datadogAgentEnabled,
DatadogMonitorEnabled: opts.datadogMonitorEnabled,
DatadogSLOEnabled: opts.datadogSLOEnabled,
DatadogAgentProfileEnabled: opts.datadogAgentProfileEnabled,
IntrospectionEnabled: opts.introspectionEnabled,
DatadogDashboardEnabled: opts.datadogDashboardEnabled,
DatadogGenericResourceEnabled: opts.datadogGenericResourceEnabled,
DatadogCSIDriverEnabled: opts.datadogCSIDriverEnabled,
UntaintControllerEnabled: opts.untaintControllerEnabled,
DatadogAgentEnabled: opts.datadogAgentEnabled,
DatadogMonitorEnabled: opts.datadogMonitorEnabled,
DatadogSLOEnabled: opts.datadogSLOEnabled,
DatadogAgentProfileEnabled: opts.datadogAgentProfileEnabled,
IntrospectionEnabled: opts.introspectionEnabled,
DatadogDashboardEnabled: opts.datadogDashboardEnabled,
DatadogGenericResourceEnabled: opts.datadogGenericResourceEnabled,
DatadogCSIDriverEnabled: opts.datadogCSIDriverEnabled,
UntaintControllerEnabled: opts.untaintControllerEnabled,
UntaintControllerWaitForCSIDriver: opts.untaintControllerWaitForCSIDriver,
}),
// UsePriorityQueue makes all controllers use the priority queue, which
// directly registers workqueue metrics into controller-runtime's metrics
Expand Down Expand Up @@ -366,20 +374,21 @@ func run(opts *options) error {
CanaryAutoPauseMaxSlowStartDuration: opts.edsCanaryAutoPauseMaxSlowStartDuration,
MaxPodSchedulerFailure: opts.edsMaxPodSchedulerFailure,
},
SupportCilium: opts.supportCilium,
CredsManager: credsManager,
DatadogAgentEnabled: opts.datadogAgentEnabled,
CreateControllerRevisions: opts.createControllerRevisions && opts.datadogAgentEnabled,
DatadogMonitorEnabled: opts.datadogMonitorEnabled,
DatadogSLOEnabled: opts.datadogSLOEnabled,
OperatorMetricsEnabled: opts.operatorMetricsEnabled,
V2APIEnabled: true,
IntrospectionEnabled: opts.introspectionEnabled,
DatadogAgentProfileEnabled: opts.datadogAgentProfileEnabled,
DatadogDashboardEnabled: opts.datadogDashboardEnabled,
DatadogGenericResourceEnabled: opts.datadogGenericResourceEnabled,
DatadogCSIDriverEnabled: opts.datadogCSIDriverEnabled,
UntaintControllerEnabled: opts.untaintControllerEnabled,
SupportCilium: opts.supportCilium,
CredsManager: credsManager,
DatadogAgentEnabled: opts.datadogAgentEnabled,
CreateControllerRevisions: opts.createControllerRevisions && opts.datadogAgentEnabled,
DatadogMonitorEnabled: opts.datadogMonitorEnabled,
DatadogSLOEnabled: opts.datadogSLOEnabled,
OperatorMetricsEnabled: opts.operatorMetricsEnabled,
V2APIEnabled: true,
IntrospectionEnabled: opts.introspectionEnabled,
DatadogAgentProfileEnabled: opts.datadogAgentProfileEnabled,
DatadogDashboardEnabled: opts.datadogDashboardEnabled,
DatadogGenericResourceEnabled: opts.datadogGenericResourceEnabled,
DatadogCSIDriverEnabled: opts.datadogCSIDriverEnabled,
UntaintControllerEnabled: opts.untaintControllerEnabled,
UntaintControllerWaitForCSIDriver: opts.untaintControllerWaitForCSIDriver,
}

versionInfo, platformInfo, err := getVersionAndPlatformInfo(rest.CopyConfig(mgr.GetConfig()))
Expand Down
88 changes: 65 additions & 23 deletions docs/untaint_controller.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,55 @@ This feature was introduced in Datadog Operator v1.28 and is currently in previe
## Overview

The Untaint controller watches Kubernetes Nodes carrying the taint
`agent.datadoghq.com/not-ready=presence:NoSchedule` and removes it once the
Datadog Agent pod on that node is `Ready`. It is intended to run alongside a
separate mechanism (cluster-autoscaler hook, CCM, admission webhook, etc.)
that adds the taint to new nodes. The use case is keeping workloads off a
node until the Datadog Agent is Ready, and recovering gracefully if the Agent never
becomes Ready.

Agent pods are matched by the label `agent.datadoghq.com/component=agent` in
the operator's watched namespaces (`WATCH_NAMESPACE` /
`agent.datadoghq.com/not-ready=presence:NoSchedule` and removes it when
readiness criteria are met (see below), or after a configurable timeout. It is
intended to run alongside a separate mechanism (cluster-autoscaler hook, CCM,
admission webhook, etc.) that adds the taint to new nodes.

**With `--untaintControllerEnabled=true` only** (and without `--untaintControllerWaitForCSIDriver`):
the controller removes the taint once the **node Agent** pod
(`agent.datadoghq.com/component=agent`) on that node is `Ready`. Agent pods are
listed in the operator's agent watch namespaces (`WATCH_NAMESPACE` /
`DD_AGENT_WATCH_NAMESPACE`).

If the Agent pod never reaches Ready on a tainted node, a configurable timeout
**With `--untaintControllerEnabled=true` and `--untaintControllerWaitForCSIDriver=true`:**
the controller waits until **both** the node Agent and **CSI
node-server** pod (`app=datadog-csi-driver-node-server`) on the node are
`Ready` before removing the taint. The taint stays until both are

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
`Ready` before removing the taint. The taint stays until both are
`Ready` before removing the taint. The taint stays until both criteria are

satisfied or a timeout fires. The operator's Pod informer then watches the

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
satisfied or a timeout fires. The operator's Pod informer then watches the
met or a timeout fires. The operator's Pod informer then watches the

**union** of `DD_AGENT_WATCH_NAMESPACE` and `DD_CSIDRIVER_WATCH_NAMESPACE` (all
pods in those namespaces—keep namespaces tight). Ensure CSI namespaces are

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
pods in those namespaces—keep namespaces tight). Ensure CSI namespaces are
pods in those namespaces. Keep these namespaces tightly scoped to limit the pod informer's watch scope.). CSI namespaces must be

covered so the controller can list CSI pod status.

**`--datadogCSIDriverEnabled`** only controls whether the **DatadogCSIDriver**
controller runs; it does **not** by itself turn on dual-readiness untaint.
Enable `--untaintControllerWaitForCSIDriver` only when you actually deploy CSI
node-server pods on tainted nodes (for example via a `DatadogCSIDriver` CR with

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
node-server pods on tainted nodes (for example via a `DatadogCSIDriver` CR with
node-server pods on tainted nodes (for example, with a `DatadogCSIDriver` CR with

the operator's CSI controller enabled, or another install path that produces
the same pod labels).

If a required pod never reaches Ready on a tainted node, a configurable timeout
policy ensures the node is never permanently unschedulable. Two clocks cover
the two failure modes:

- **Readiness timeout** — the Agent pod is on the node but not Ready. Clock:
`pod.Status.StartTime`. Pod recreation restarts the window; container
restarts inside the same pod do not.
- **Scheduling timeout** — no Agent pod is on the node. Clock:
`node.metadata.creationTimestamp`. The expected path when a DaemonSet never
schedules a pod onto the node (taint not tolerated, missing labels, etc.).
the main failure modes:

- **Readiness timeout** — at least one Agent pod is on the node but the Agent
is not Ready yet, **or** (with `--untaintControllerWaitForCSIDriver`) at least
one Agent and one CSI node-server pod are on the node, each has
`pod.Status.StartTime` set, and at least one of them is not Ready. Clock: the
**later** of the latest `StartTime` among Agent pods on the node and the latest
`StartTime` among CSI node-server pods on the node (so a recent restart on
either workload resets the window). Agent-only mode (no wait-for-CSI) still
uses only Agent `StartTime` for this clock.
- **Scheduling timeout** — no Agent pod is on the node, **or** (with wait-for-CSI)
no CSI node-server pod on the node yet. Clock: `node.metadata.creationTimestamp`.
Covers DaemonSets that never schedule onto the node (taint not tolerated,
missing labels, CSI still pulling, etc.).
- **(Wait-for-CSI only)** If **both** an Agent pod and a CSI node-server pod are on
the node but **either** still lacks `StartTime`, the controller **requeues**
after the readiness-timeout duration (coarse poll, same idea as agent-only when
`StartTime` is not populated yet)—it does **not** use the scheduling clock here,
so an old node does not instantly hit a scheduling timeout while waiting for
`StartTime` to appear.

A pod-recreation crash-loop faster than the readiness window can hold a node
tainted indefinitely; run with `policy=keep` and alert on
Expand All @@ -47,14 +75,26 @@ manager:
```yaml
args:
- --untaintControllerEnabled=true
# Optional: require CSI node-server Ready before untainting (see Overview).
- --untaintControllerWaitForCSIDriver=true
```

When this flag is enabled, the operator also injects a toleration for
| `--untaintControllerEnabled` | `--untaintControllerWaitForCSIDriver` | Behavior |
| ----------------------------- | ------------------------------------- | -------- |
| `false` | any | Untaint controller off; no Agent startup toleration for this feature. |
| `true` | `false` | Agent-only readiness; Agent DaemonSet startup toleration injected. |
| `true` | `true` | Wait for Agent **and** CSI node-server Ready; widened Pod cache (agent + `DD_CSIDRIVER_WATCH_NAMESPACE` namespaces); startup toleration on Agent and, when the DatadogCSIDriver controller is enabled, on the CSI node DaemonSet. |

`--untaintControllerWaitForCSIDriver` requires `--untaintControllerEnabled=true` (the operator exits on invalid combinations).

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
`--untaintControllerWaitForCSIDriver` requires `--untaintControllerEnabled=true` (the operator exits on invalid combinations).
**Note**: `--untaintControllerWaitForCSIDriver` requires `--untaintControllerEnabled=true`. The operator exits at startup if this combination is invalid.


When `--untaintControllerEnabled` is enabled, the operator injects a toleration for

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

UntaintControllerWaitForCSIDriver this should be enabled too right, to inject tolerations? misread.

`agent.datadoghq.com/not-ready=presence:NoSchedule` into the node Agent
DaemonSet (or ExtendedDaemonSet) pod template, unless an equivalent toleration
is already present. This avoids a deadlock where the node stays tainted because
the Agent pod cannot schedule without the toleration, especially when admission
webhook auto-injection is not in use.
is already present. When **`--untaintControllerWaitForCSIDriver`** is also true **and**
the DatadogCSIDriver controller is running (`--datadogCSIDriverEnabled=true`), the same
toleration is injected into the **Datadog CSI node-server** DaemonSet pod
template so the CSI workload can schedule on tainted nodes before the taint is
removed.

## Configuration

Expand All @@ -81,6 +121,8 @@ Metrics, under the `untaint` Prometheus subsystem:

Kubernetes Events (gated by `DD_UNTAINT_CONTROLLER_EVENTS_ENABLED=true`):

- `TaintRemoved` (Normal) — taint removed because the Agent pod became Ready.
- `TaintRemoved` (Normal) — taint removed after the Agent became Ready, or (when
`--untaintControllerWaitForCSIDriver` is enabled) after both the Agent and
CSI node-server pods became Ready.
- `UntaintTimeout` — a timeout fired. Normal under `remove`, Warning under `keep`. Message carries the reason, elapsed time, and policy.

9 changes: 7 additions & 2 deletions internal/controller/datadogcsidriver/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,14 @@
package datadogcsidriver

const (
// AppLabelKey is the Kubernetes label key on CSI node-server pods.
AppLabelKey = "app"
// NodeServerDaemonSetAppValue is the label value identifying CSI node-server pods
// (and the default DaemonSet name).
NodeServerDaemonSetAppValue = "datadog-csi-driver-node-server"

// csiDsName is the default name of the CSI driver DaemonSet
csiDsName = "datadog-csi-driver-node-server"
csiDsName = NodeServerDaemonSetAppValue
// csiDriverName is the default name of the CSIDriver Kubernetes object
csiDriverName = "k8s.csi.datadoghq.com"
// defaultCSIDriverImageName is the default CSI driver container image name
Expand Down Expand Up @@ -51,7 +57,6 @@ const (
csiDriverPort = int32(5000)

// Pod labels
appLabelKey = "app"
admissionControllerEnabledLabel = "admission.datadoghq.com/enabled"

// finalizerName is the finalizer for CSIDriver object cleanup
Expand Down
22 changes: 14 additions & 8 deletions internal/controller/datadogcsidriver/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (

"github.com/DataDog/datadog-operator/api/datadoghq/v1alpha1"
"github.com/DataDog/datadog-operator/api/datadoghq/v2alpha1"
componentagent "github.com/DataDog/datadog-operator/internal/controller/datadogagent/component/agent"
)

const (
Expand All @@ -35,17 +36,19 @@ const (

// Reconciler reconciles a DatadogCSIDriver object
type Reconciler struct {
client client.Client
scheme *runtime.Scheme
recorder record.EventRecorder
client client.Client
scheme *runtime.Scheme
recorder record.EventRecorder
untaintInjectCSIStartupToleration bool
}

// NewReconciler creates a new DatadogCSIDriver reconciler
func NewReconciler(client client.Client, scheme *runtime.Scheme, recorder record.EventRecorder) *Reconciler {
func NewReconciler(client client.Client, scheme *runtime.Scheme, recorder record.EventRecorder, untaintInjectCSIStartupToleration bool) *Reconciler {
return &Reconciler{
client: client,
scheme: scheme,
recorder: recorder,
client: client,
scheme: scheme,
recorder: recorder,
untaintInjectCSIStartupToleration: untaintInjectCSIStartupToleration,
}
}

Expand Down Expand Up @@ -198,13 +201,16 @@ func (r *Reconciler) reconcileCSIDriver(ctx context.Context, instance *v1alpha1.
}

func (r *Reconciler) reconcileDaemonSet(ctx context.Context, instance *v1alpha1.DatadogCSIDriver) error {
logger := ctrl.LoggerFrom(ctx)
desired := buildDaemonSet(instance)
if r.untaintInjectCSIStartupToleration {
componentagent.EnsureAgentNotReadyStartupToleration(logger, &desired.Spec.Template.Spec)
}

if err := controllerutil.SetControllerReference(instance, desired, r.scheme); err != nil {
return fmt.Errorf("setting owner reference: %w", err)
}

logger := ctrl.LoggerFrom(ctx)
nsName := types.NamespacedName{Name: desired.Name, Namespace: desired.Namespace}
current := &appsv1.DaemonSet{}
err := r.client.Get(ctx, nsName, current)
Expand Down
Loading
Loading