Skip to content

Commit d293e9e

Browse files
authored
[node-agent] Delete systemd unit files and drop-ins only if the unit has been created by node-agent (gardener#11015)
* Fix: Reduce file permissions for last applied OSC * Ingest containerd drop-in into OSC instead of writing it directly to the disk This prevents side effects when the containerd.service unit is changed by extensions too. * Delete systemd unit files and drop-ins only if the unit has been created by node-agent * Address PR review feedback * Address PR review feedback v2
1 parent 52f6e42 commit d293e9e

File tree

4 files changed

+144
-57
lines changed

4 files changed

+144
-57
lines changed

pkg/nodeagent/controller/operatingsystemconfig/changes.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,18 @@ func init() {
3131
decoder = serializer.NewCodecFactory(scheme).UniversalDeserializer()
3232
}
3333

34-
func extractOSCFromSecret(secret *corev1.Secret) (*extensionsv1alpha1.OperatingSystemConfig, []byte, string, error) {
34+
func extractOSCFromSecret(secret *corev1.Secret) (*extensionsv1alpha1.OperatingSystemConfig, string, error) {
3535
oscRaw, ok := secret.Data[nodeagentv1alpha1.DataKeyOperatingSystemConfig]
3636
if !ok {
37-
return nil, nil, "", fmt.Errorf("no %s key found in OSC secret", nodeagentv1alpha1.DataKeyOperatingSystemConfig)
37+
return nil, "", fmt.Errorf("no %s key found in OSC secret", nodeagentv1alpha1.DataKeyOperatingSystemConfig)
3838
}
3939

4040
osc := &extensionsv1alpha1.OperatingSystemConfig{}
4141
if err := runtime.DecodeInto(decoder, oscRaw, osc); err != nil {
42-
return nil, nil, "", fmt.Errorf("unable to decode OSC from secret data key %s: %w", nodeagentv1alpha1.DataKeyOperatingSystemConfig, err)
42+
return nil, "", fmt.Errorf("unable to decode OSC from secret data key %s: %w", nodeagentv1alpha1.DataKeyOperatingSystemConfig, err)
4343
}
4444

45-
return osc, oscRaw, secret.Annotations[nodeagentv1alpha1.AnnotationKeyChecksumDownloadedOperatingSystemConfig], nil
45+
return osc, secret.Annotations[nodeagentv1alpha1.AnnotationKeyChecksumDownloadedOperatingSystemConfig], nil
4646
}
4747

4848
type operatingSystemConfigChanges struct {

pkg/nodeagent/controller/operatingsystemconfig/containerd.go

Lines changed: 33 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"github.com/spf13/afero"
2626
"k8s.io/utils/ptr"
2727

28+
v1beta1constants "github.com/gardener/gardener/pkg/apis/core/v1beta1/constants"
2829
extensionsv1alpha1 "github.com/gardener/gardener/pkg/apis/extensions/v1alpha1"
2930
extensionsv1alpha1helper "github.com/gardener/gardener/pkg/apis/extensions/v1alpha1/helper"
3031
"github.com/gardener/gardener/pkg/utils/flow"
@@ -33,8 +34,8 @@ import (
3334
)
3435

3536
// ReconcileContainerdConfig sets required values of the given containerd configuration.
36-
func (r *Reconciler) ReconcileContainerdConfig(ctx context.Context, log logr.Logger, criConfig *extensionsv1alpha1.CRIConfig) error {
37-
if !extensionsv1alpha1helper.HasContainerdConfiguration(criConfig) {
37+
func (r *Reconciler) ReconcileContainerdConfig(ctx context.Context, log logr.Logger, osc *extensionsv1alpha1.OperatingSystemConfig) error {
38+
if !extensionsv1alpha1helper.HasContainerdConfiguration(osc.Spec.CRIConfig) {
3839
return nil
3940
}
4041

@@ -46,14 +47,13 @@ func (r *Reconciler) ReconcileContainerdConfig(ctx context.Context, log logr.Log
4647
return fmt.Errorf("failed to ensure containerd default config: %w", err)
4748
}
4849

49-
if err := r.ensureContainerdEnvironment(); err != nil {
50-
return fmt.Errorf("failed to ensure containerd environment: %w", err)
51-
}
52-
53-
if err := r.ensureContainerdConfiguration(log, criConfig); err != nil {
50+
if err := r.ensureContainerdConfiguration(log, osc.Spec.CRIConfig); err != nil {
5451
return fmt.Errorf("failed to ensure containerd config: %w", err)
5552
}
5653

54+
// Add the containerd drop-in to the OSC to prevent side effects when containerd.service is changed by extensions too.
55+
addContainerdEnvironmentDropIn(osc)
56+
5757
return nil
5858
}
5959

@@ -81,11 +81,36 @@ func (r *Reconciler) ReconcileContainerdRegistries(ctx context.Context, log logr
8181
}
8282
}
8383

84+
// addContainerdEnvironmentDropIn ingests a drop-in to set the environment for the 'containerd' service.
85+
func addContainerdEnvironmentDropIn(osc *extensionsv1alpha1.OperatingSystemConfig) {
86+
if osc.Spec.CRIConfig == nil {
87+
return
88+
}
89+
90+
unitDropIn := extensionsv1alpha1.DropIn{
91+
Name: "30-env_config.conf",
92+
Content: `[Service]
93+
Environment="PATH=` + extensionsv1alpha1.ContainerDRuntimeContainersBinFolder + `:` + os.Getenv("PATH") + `"
94+
`,
95+
}
96+
97+
for i, unit := range osc.Spec.Units {
98+
if unit.Name == v1beta1constants.OperatingSystemConfigUnitNameContainerDService {
99+
osc.Spec.Units[i].DropIns = append(osc.Spec.Units[i].DropIns, unitDropIn)
100+
return
101+
}
102+
}
103+
104+
osc.Spec.Units = append(osc.Spec.Units, extensionsv1alpha1.Unit{
105+
Name: v1beta1constants.OperatingSystemConfigUnitNameContainerDService,
106+
DropIns: []extensionsv1alpha1.DropIn{unitDropIn},
107+
})
108+
}
109+
84110
const (
85111
baseDir = "/etc/containerd"
86112
certsDir = baseDir + "/certs.d"
87113
configDir = baseDir + "/conf.d"
88-
dropinDir = "/etc/systemd/system/containerd.service.d"
89114
)
90115

91116
func (r *Reconciler) ensureContainerdConfigDirectories() error {
@@ -94,7 +119,6 @@ func (r *Reconciler) ensureContainerdConfigDirectories() error {
94119
baseDir,
95120
configDir,
96121
certsDir,
97-
dropinDir,
98122
} {
99123
if err := r.FS.MkdirAll(dir, defaultDirPermissions); err != nil {
100124
return fmt.Errorf("failure for directory %q: %w", dir, err)
@@ -130,32 +154,6 @@ func (r *Reconciler) ensureContainerdDefaultConfig(ctx context.Context) error {
130154
return r.FS.WriteFile(configFile, output, 0644)
131155
}
132156

133-
// ensureContainerdEnvironment sets the environment for the 'containerd' service.
134-
func (r *Reconciler) ensureContainerdEnvironment() error {
135-
var (
136-
unitDropin = `[Service]
137-
Environment="PATH=` + extensionsv1alpha1.ContainerDRuntimeContainersBinFolder + `:` + os.Getenv("PATH") + `"
138-
`
139-
)
140-
141-
containerdEnvFilePath := path.Join(dropinDir, "30-env_config.conf")
142-
exists, err := r.FS.Exists(containerdEnvFilePath)
143-
if err != nil {
144-
return err
145-
}
146-
147-
if exists {
148-
return nil
149-
}
150-
151-
err = r.FS.WriteFile(containerdEnvFilePath, []byte(unitDropin), 0644)
152-
if err != nil {
153-
return fmt.Errorf("unable to write unit dropin: %w", err)
154-
}
155-
156-
return nil
157-
}
158-
159157
// ensureContainerdConfiguration sets the configuration for containerd.
160158
func (r *Reconciler) ensureContainerdConfiguration(log logr.Logger, criConfig *extensionsv1alpha1.CRIConfig) error {
161159
config, err := r.FS.ReadFile(configFile)

pkg/nodeagent/controller/operatingsystemconfig/reconciler.go

Lines changed: 62 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ import (
2020
corev1 "k8s.io/api/core/v1"
2121
apierrors "k8s.io/apimachinery/pkg/api/errors"
2222
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
23+
"k8s.io/apimachinery/pkg/runtime"
24+
"k8s.io/apimachinery/pkg/runtime/schema"
25+
"k8s.io/apimachinery/pkg/runtime/serializer"
26+
jsonserializer "k8s.io/apimachinery/pkg/runtime/serializer/json"
2327
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
2428
"k8s.io/client-go/tools/record"
2529
"k8s.io/utils/ptr"
@@ -43,6 +47,16 @@ import (
4347

4448
const lastAppliedOperatingSystemConfigFilePath = nodeagentv1alpha1.BaseDir + "/last-applied-osc.yaml"
4549

50+
var codec runtime.Codec
51+
52+
func init() {
53+
scheme := runtime.NewScheme()
54+
utilruntime.Must(extensionsv1alpha1.AddToScheme(scheme))
55+
ser := jsonserializer.NewSerializerWithOptions(jsonserializer.DefaultMetaFactory, scheme, scheme, jsonserializer.SerializerOptions{Yaml: true, Pretty: false, Strict: false})
56+
versions := schema.GroupVersions([]schema.GroupVersion{nodeagentv1alpha1.SchemeGroupVersion, extensionsv1alpha1.SchemeGroupVersion})
57+
codec = serializer.NewCodecFactory(scheme).CodecForVersions(ser, ser, versions, versions)
58+
}
59+
4660
// Reconciler decodes the OperatingSystemConfig resources from secrets and applies the systemd units and files to the
4761
// node.
4862
type Reconciler struct {
@@ -85,11 +99,16 @@ func (r *Reconciler) Reconcile(ctx context.Context, request reconcile.Request) (
8599
return reconcile.Result{}, nil
86100
}
87101

88-
osc, oscRaw, oscChecksum, err := extractOSCFromSecret(secret)
102+
osc, oscChecksum, err := extractOSCFromSecret(secret)
89103
if err != nil {
90104
return reconcile.Result{}, fmt.Errorf("failed extracting OSC from secret: %w", err)
91105
}
92106

107+
log.Info("Applying containerd configuration")
108+
if err := r.ReconcileContainerdConfig(ctx, log, osc); err != nil {
109+
return reconcile.Result{}, fmt.Errorf("failed reconciling containerd configuration: %w", err)
110+
}
111+
93112
oscChanges, err := computeOperatingSystemConfigChanges(r.FS, osc)
94113
if err != nil {
95114
return reconcile.Result{}, fmt.Errorf("failed calculating the OSC changes: %w", err)
@@ -100,11 +119,6 @@ func (r *Reconciler) Reconcile(ctx context.Context, request reconcile.Request) (
100119
return reconcile.Result{}, nil
101120
}
102121

103-
log.Info("Applying containerd configuration")
104-
if err := r.ReconcileContainerdConfig(ctx, log, osc.Spec.CRIConfig); err != nil {
105-
return reconcile.Result{}, fmt.Errorf("failed reconciling containerd configuration: %w", err)
106-
}
107-
108122
log.Info("Applying new or changed inline files")
109123
if err := r.applyChangedInlineFiles(log, oscChanges.files.changed); err != nil {
110124
return reconcile.Result{}, fmt.Errorf("failed applying changed inline files: %w", err)
@@ -170,7 +184,12 @@ func (r *Reconciler) Reconcile(ctx context.Context, request reconcile.Request) (
170184
)
171185

172186
log.Info("Persisting current operating system config as 'last-applied' file to the disk", "path", lastAppliedOperatingSystemConfigFilePath)
173-
if err := r.FS.WriteFile(lastAppliedOperatingSystemConfigFilePath, oscRaw, 0644); err != nil {
187+
oscRaw, err := runtime.Encode(codec, osc)
188+
if err != nil {
189+
return reconcile.Result{}, fmt.Errorf("unable to encode OSC: %w", err)
190+
}
191+
192+
if err := r.FS.WriteFile(lastAppliedOperatingSystemConfigFilePath, oscRaw, 0600); err != nil {
174193
return reconcile.Result{}, fmt.Errorf("unable to write current OSC to file path %q: %w", lastAppliedOperatingSystemConfigFilePath, err)
175194
}
176195

@@ -387,14 +406,21 @@ func (r *Reconciler) applyChangedUnits(ctx context.Context, log logr.Logger, uni
387406

388407
func (r *Reconciler) removeDeletedUnits(ctx context.Context, log logr.Logger, node client.Object, units []extensionsv1alpha1.Unit) error {
389408
for _, unit := range units {
409+
// The unit has been created by gardener-node-agent if it has content.
410+
// Otherwise, it might be a default OS unit which was enabled/disabled or where drop-ins were added.
411+
unitCreatedByNodeAgent := unit.Content != nil
412+
390413
unitFilePath := path.Join(etcSystemdSystem, unit.Name)
391414

392415
unitFileExists, err := r.FS.Exists(unitFilePath)
393416
if err != nil {
394417
return fmt.Errorf("unable to check whether unit file %q exists: %w", unitFilePath, err)
395418
}
396419

397-
if unitFileExists {
420+
// Only stop and remove the unit file if it was created by gardener-node-agent. Otherwise, this could affect
421+
// default OS units where we add and remove drop-ins only. If operators want to stop and disable units,
422+
// they can do it by adding a unit to OSC which applies the `stop` command.
423+
if unitFileExists && unitCreatedByNodeAgent {
398424
if err := r.DBus.Disable(ctx, unit.Name); err != nil {
399425
return fmt.Errorf("unable to disable deleted unit %q: %w", unit.Name, err)
400426
}
@@ -405,11 +431,37 @@ func (r *Reconciler) removeDeletedUnits(ctx context.Context, log logr.Logger, no
405431

406432
if err := r.FS.Remove(unitFilePath); err != nil && !errors.Is(err, afero.ErrFileNotFound) {
407433
return fmt.Errorf("unable to delete systemd unit file of deleted unit %q: %w", unit.Name, err)
434+
} else {
435+
log.Info("Unit was not created by gardener-node-agent, skipping deletion of unit file", "unitName", unit.Name)
436+
}
437+
}
438+
439+
dropInFolder := unitFilePath + ".d"
440+
441+
if exists, err := r.FS.Exists(dropInFolder); err != nil {
442+
return fmt.Errorf("unable to check whether drop-in folder %q exists: %w", dropInFolder, err)
443+
} else if exists {
444+
for _, dropIn := range unit.DropIns {
445+
dropInFilePath := path.Join(dropInFolder, dropIn.Name)
446+
if err := r.FS.Remove(dropInFilePath); err != nil && !errors.Is(err, afero.ErrFileNotFound) {
447+
return fmt.Errorf("unable to delete drop-in file %q of deleted unit %q: %w", dropInFilePath, unit.Name, err)
448+
}
449+
}
450+
451+
if empty, err := r.FS.IsEmpty(dropInFolder); err != nil {
452+
return fmt.Errorf("unable to check whether drop-in folder %q is empty: %w", dropInFolder, err)
453+
} else if empty {
454+
if err := r.FS.RemoveAll(dropInFolder); err != nil && !errors.Is(err, afero.ErrFileNotFound) {
455+
return fmt.Errorf("unable to delete systemd drop-in folder of deleted unit %q: %w", unit.Name, err)
456+
}
408457
}
409458
}
410459

411-
if err := r.FS.RemoveAll(unitFilePath + ".d"); err != nil && !errors.Is(err, afero.ErrFileNotFound) {
412-
return fmt.Errorf("unable to delete systemd drop-in folder of deleted unit %q: %w", unit.Name, err)
460+
// If the unit was not created by gardener-node-agent, but it exists on the node and was removed from OSC. Restart it to apply changes.
461+
if unitFileExists && !unitCreatedByNodeAgent {
462+
if err := r.DBus.Restart(ctx, r.Recorder, node, unit.Name); err != nil {
463+
return fmt.Errorf("unable to restart unit %q removed from OSC but not created by gardener-node-agent: %w", unit.Name, err)
464+
}
413465
}
414466

415467
log.Info("Successfully removed no longer needed unit", "unitName", unit.Name)

0 commit comments

Comments
 (0)