Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cmd/nfd-worker/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ func parseArgs(flags *flag.FlagSet, osArgs ...string) *worker.Args {
args.Overrides.LabelSources = overrides.LabelSources
case "no-owner-refs":
args.Overrides.NoOwnerRefs = overrides.NoOwnerRefs
case "owner-ref-pod":
args.Overrides.OwnerRefPod = overrides.OwnerRefPod
}
})

Expand Down Expand Up @@ -125,6 +127,8 @@ func initFlags(flagset *flag.FlagSet) (*worker.Args, *worker.ConfigOverrideArgs)
"Do not publish discovered features, disable connection to nfd-master and don't create NodeFeature object.")
overrides.NoOwnerRefs = flagset.Bool("no-owner-refs", false,
"Do not set owner references for NodeFeature object.")
overrides.OwnerRefPod = flagset.Bool("owner-ref-pod", false,
"Set the owner reference for NodeFeature object to Pod.")
flagset.Var(overrides.FeatureSources, "feature-sources",
"Comma separated list of feature sources. Special value 'all' enables all sources. "+
"Prefix the source name with '-' to disable it.")
Expand Down
9 changes: 9 additions & 0 deletions docs/deployment/uninstallation.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,12 @@ kubectl -n node-feature-discovery wait job.batch/nfd-master --for=condition=comp

> **NOTE:** You must run prune before removing the RBAC rules (serviceaccount,
> clusterrole and clusterrolebinding).

## Removing stale NodeFeature objects

In case NFD-Worker is configured to set the owner reference to Node or Pod,
manually delete stale NodeFeature objects.

```bash
kubectl delete -n node-feature-discovery NodeFeature $(kubectl get -o json -n node-feature-discovery NodeFeature | jq ".items[].metadata.name")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Small fix: jq ".items[].metadata.name" emits quoted strings, which kubectl delete will reject. Two options — add -r to jq, or simpler: kubectl delete -n node-feature-discovery nodefeatures.nfd.k8s-sigs.io --all.

```
17 changes: 17 additions & 0 deletions docs/reference/worker-commandline-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,23 @@ Example:
nfd-worker -no-owner-refs
```

### -owner-ref-pod

The `-owner-ref-pod` flag sets the owner references
of the NodeFeature object to Pod.

> **NOTE:** This flag takes precedence over the
> [`core.OwnerRefNode`](worker-configuration-reference.md#coreownerrefpod)
> configuration file option.

Default: *false*

Example:

```bash
nfd-worker -owner-ref-pod
```

### -oneshot

The `-oneshot` flag causes nfd-worker to exit after one pass of feature
Expand Down
18 changes: 18 additions & 0 deletions docs/reference/worker-configuration-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,24 @@ core:
noOwnerRefs: true
```

### core.ownerRefPod

Setting `core.ownerRefPod` to `true` sets the owner reference
of the NodeFeature object to Pod for nfd-worker.

> **NOTE:** Overridden by the
> [`-owner-ref-pod`](worker-commandline-reference.md#-owner-ref-pod)
> command line flag (if specified).

Default: `false`

Example:

```yaml
core:
ownerRefPod: true
```

### core.klog

The following options specify the logger configuration.
Expand Down
70 changes: 48 additions & 22 deletions pkg/nfd-worker/nfd-worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ type coreConfig struct {
LabelWhiteList utils.RegexpVal
NoPublish bool
NoOwnerRefs bool
OwnerRefPod bool
FeatureSources []string
Sources *[]string
LabelSources []string
Expand All @@ -101,6 +102,7 @@ type Args struct {
Options string
Port int
NoOwnerRefs bool
OwnerRefPod bool

Overrides ConfigOverrideArgs
}
Expand All @@ -109,6 +111,7 @@ type Args struct {
type ConfigOverrideArgs struct {
NoPublish *bool
NoOwnerRefs *bool
OwnerRefPod *bool
FeatureSources *utils.StringSliceVal
LabelSources *utils.StringSliceVal
}
Expand Down Expand Up @@ -253,33 +256,53 @@ func (w *nfdWorker) setOwnerReference() error {
ownerReference := []metav1.OwnerReference{}

if !w.config.Core.NoOwnerRefs {
// Get pod owner reference
podName := os.Getenv("POD_NAME")
// Add pod owner reference if it exists
if podName != "" {
if selfPod, err := w.k8sClient.CoreV1().Pods(w.kubernetesNamespace).Get(context.TODO(), podName, metav1.GetOptions{}); err != nil {
klog.ErrorS(err, "failed to get self pod, cannot inherit ownerReference for NodeFeature")
return err
} else {
for _, owner := range selfPod.OwnerReferences {
owner.BlockOwnerDeletion = ptr.To(false)
ownerReference = append(ownerReference, owner)
if w.config.Core.OwnerRefPod {
// Get pod owner reference
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Main concern with the current shape: wrapping the pre-existing Pod path in if OwnerRefPod and making Node the else flips the default owner from Pod to Node. Existing deployments upgrading would see their NodeFeature objects stop being owned by the worker Pod/DaemonSet.

Could we keep Pod as the default and make Node the opt-in? And, larger question: would you be open to a single enum flag -owner-ref=none|pod|node (deprecating -no-owner-refs) so we don't have two booleans encoding three states?

podName := os.Getenv("POD_NAME")
// Add pod owner reference if it exists
if podName != "" {
if selfPod, err := w.k8sClient.CoreV1().Pods(w.kubernetesNamespace).Get(context.TODO(), podName, metav1.GetOptions{}); err != nil {
klog.ErrorS(err, "failed to get self pod, cannot inherit ownerReference for NodeFeature")
return err
} else {
for _, owner := range selfPod.OwnerReferences {
owner.BlockOwnerDeletion = ptr.To(false)
ownerReference = append(ownerReference, owner)
}
}
}

podUID := os.Getenv("POD_UID")
if podUID != "" {
ownerReference = append(ownerReference, metav1.OwnerReference{
APIVersion: "v1",
Kind: "Pod",
Name: podName,
UID: types.UID(podUID),
})
podUID := os.Getenv("POD_UID")
if podUID != "" {
ownerReference = append(ownerReference, metav1.OwnerReference{
APIVersion: "v1",
Kind: "Pod",
Name: podName,
UID: types.UID(podUID),
})
} else {
klog.InfoS("Cannot append POD ownerReference to NodeFeature, POD_UID not specified")
}
} else {
klog.InfoS("Cannot append POD ownerReference to NodeFeature, POD_UID not specified")
klog.InfoS("Cannot set NodeFeature owner references, POD_NAME not specified")
}
} else {
klog.InfoS("Cannot set NodeFeature owner references, POD_NAME not specified")
// Get node owner reference
nodeName := os.Getenv("NODE_NAME")
if nodeName != "" {
if selfNode, err := w.k8sClient.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}); err != nil {
klog.ErrorS(err, "failed to get self node, cannot inherit ownerReference for NodeFeature")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Small thing while we're here: could we thread a context.Context into this function instead of context.TODO()? Pre-existing on the Pod branch, nice to fix on the way through.

return err
} else {
ownerReference = append(ownerReference, metav1.OwnerReference{
APIVersion: "v1",
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the Node case, worth setting Controller and BlockOwnerDeletion explicitly — there's no other controller claiming NodeFeature, and being explicit matches the Pod branch and K8s conventions.

Kind: "Node",
Name: nodeName,
UID: selfNode.UID,
})
}
} else {
klog.InfoS("Cannot set NodeFeature owner reference to Node, NODE_NAME not specified")
}
}
}

Expand Down Expand Up @@ -505,6 +528,9 @@ func (w *nfdWorker) configure(filepath string, overrides string) error {
if w.args.Overrides.NoOwnerRefs != nil {
c.Core.NoOwnerRefs = *w.args.Overrides.NoOwnerRefs
}
if w.args.Overrides.OwnerRefPod != nil {
c.Core.OwnerRefPod = *w.args.Overrides.OwnerRefPod
}
if w.args.Overrides.FeatureSources != nil {
c.Core.FeatureSources = *w.args.Overrides.FeatureSources
}
Expand Down
5 changes: 4 additions & 1 deletion pkg/nfd-worker/nfd-worker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,12 @@ func TestRun(t *testing.T) {
Convey("When publishing features from fake source", func() {
os.Setenv("NODE_NAME", "fake-node")
os.Setenv("KUBERNETES_NAMESPACE", "fake-ns")
b := true
args := &worker.Args{
Oneshot: true,
Oneshot: true,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Setting NoOwnerRefs: true here skips the new owner-ref logic entirely — the new code isn't exercised by any test. Could we add a table-driven test over the {none, pod, node} configurations, pre-creating a fake Node with a known UID and asserting the resulting OwnerReferences? That would also cover the Node-not-found error path.

NoOwnerRefs: true,
Overrides: worker.ConfigOverrideArgs{
NoOwnerRefs: &b,
FeatureSources: &utils.StringSliceVal{"fake"},
LabelSources: &utils.StringSliceVal{"fake"},
},
Expand Down
59 changes: 59 additions & 0 deletions test/e2e/utils/rbac.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ func ConfigureRBAC(ctx context.Context, cs clientset.Interface, ns string) error
return err
}

_, err = createClusterRoleWorker(ctx, cs)
if err != nil {
return err
}

_, err = createClusterRoleGC(ctx, cs)
if err != nil {
return err
Expand All @@ -83,6 +88,11 @@ func ConfigureRBAC(ctx context.Context, cs clientset.Interface, ns string) error
return err
}

_, err = createClusterRoleBindingWorker(ctx, cs, ns)
if err != nil {
return err
}

_, err = createClusterRoleBindingGC(ctx, cs, ns)
if err != nil {
return err
Expand All @@ -106,6 +116,10 @@ func DeconfigureRBAC(ctx context.Context, cs clientset.Interface, ns string) err
if err != nil {
return err
}
err = cs.RbacV1().ClusterRoleBindings().Delete(ctx, "nfd-worker-e2e", metav1.DeleteOptions{})
if err != nil {
return err
}
err = cs.RbacV1().ClusterRoleBindings().Delete(ctx, "nfd-gc-e2e", metav1.DeleteOptions{})
if err != nil {
return err
Expand All @@ -118,6 +132,10 @@ func DeconfigureRBAC(ctx context.Context, cs clientset.Interface, ns string) err
if err != nil {
return err
}
err = cs.RbacV1().ClusterRoles().Delete(ctx, "nfd-worker-e2e", metav1.DeleteOptions{})
if err != nil {
return err
}
err = cs.RbacV1().ClusterRoles().Delete(ctx, "nfd-gc-e2e", metav1.DeleteOptions{})
if err != nil {
return err
Expand Down Expand Up @@ -239,6 +257,24 @@ func createRoleWorker(ctx context.Context, cs clientset.Interface, ns string) (*
return cs.RbacV1().Roles(ns).Update(ctx, cr, metav1.UpdateOptions{})
}

// Configure cluster role required by NFD Worker
func createClusterRoleWorker(ctx context.Context, cs clientset.Interface) (*rbacv1.ClusterRole, error) {
cr := &rbacv1.ClusterRole{
ObjectMeta: metav1.ObjectMeta{
Name: "nfd-worker-e2e",
},
Rules: []rbacv1.PolicyRule{
{
APIGroups: []string{""},
Resources: []string{"nodes"},
Verbs: []string{"get", "list"},
},
},
}

return cs.RbacV1().ClusterRoles().Update(ctx, cr, metav1.UpdateOptions{})
}

// Configure cluster role required by NFD GC
func createClusterRoleGC(ctx context.Context, cs clientset.Interface) (*rbacv1.ClusterRole, error) {
cr := &rbacv1.ClusterRole{
Expand Down Expand Up @@ -356,6 +392,29 @@ func createRoleBindingWorker(ctx context.Context, cs clientset.Interface, ns str
return cs.RbacV1().RoleBindings(ns).Update(ctx, crb, metav1.UpdateOptions{})
}

// Configure cluster role binding required by NFD Worker
func createClusterRoleBindingWorker(ctx context.Context, cs clientset.Interface, ns string) (*rbacv1.ClusterRoleBinding, error) {
crb := &rbacv1.ClusterRoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: "nfd-worker-e2e",
},
Subjects: []rbacv1.Subject{
{
Kind: rbacv1.ServiceAccountKind,
Name: "nfd-worker-e2e",
Namespace: ns,
},
},
RoleRef: rbacv1.RoleRef{
APIGroup: rbacv1.GroupName,
Kind: "ClusterRole",
Name: "nfd-worker-e2e",
},
}

return cs.RbacV1().ClusterRoleBindings().Update(ctx, crb, metav1.UpdateOptions{})
}

// Configure cluster role binding required by NFD GC
func createClusterRoleBindingGC(ctx context.Context, cs clientset.Interface, ns string) (*rbacv1.ClusterRoleBinding, error) {
crb := &rbacv1.ClusterRoleBinding{
Expand Down