Skip to content

Commit 86e0a74

Browse files
authored
sd-agent: add discovery.use_sd_agent config options (#45727)
### What does this PR do? This PR adds a "killswitch" for sd-agent to always fallback to system-probe no matter what. Setting `discovery.use_sd_agent` to false, or not setting it all will make it fallback. This ensures there is no accidental usage of sd-agent until we want to. ### Motivation Control sd-agent & prevent sd-agent accidental usage in the future. ### Describe how you validated your changes Unit tests. Ran with both Bazel & Cargo. ### Additional Notes Co-authored-by: guillaume.pagnoux <guillaume.pagnoux@datadoghq.com>
1 parent be89362 commit 86e0a74

7 files changed

Lines changed: 331 additions & 29 deletions

File tree

pkg/config/setup/system_probe.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ func InitSystemProbeConfig(cfg pkgconfigmodel.Setup) {
347347

348348
// Discovery config
349349
cfg.BindEnv("discovery.enabled") //nolint:forbidigo // TODO: replace by 'SetDefaultAndBindEnv'
350+
cfg.BindEnvAndSetDefault("discovery.use_sd_agent", false)
350351
cfg.BindEnvAndSetDefault("discovery.cpu_usage_update_delay", "60s")
351352
cfg.BindEnvAndSetDefault("discovery.ignored_command_names", []string{"chronyd", "cilium-agent", "containerd", "dhclient", "dockerd", "kubelet", "livenessprobe", "local-volume-pr", "sshd", "systemd"})
352353
cfg.BindEnvAndSetDefault("discovery.service_collection_interval", "60s")

pkg/config/setup/system_probe_test.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,32 @@ func TestSystemProbeDefaultConfig(t *testing.T) {
5555
})
5656
}
5757
}
58+
59+
func TestDiscoveryUseSdAgent(t *testing.T) {
60+
t.Run("disabled by default", func(t *testing.T) {
61+
cfg := newEmptyMockConf(t)
62+
InitSystemProbeConfig(cfg)
63+
assert.False(t, cfg.GetBool("discovery.use_sd_agent"))
64+
})
65+
66+
t.Run("enabled from env var", func(t *testing.T) {
67+
t.Setenv("DD_DISCOVERY_USE_SD_AGENT", "true")
68+
cfg := newEmptyMockConf(t)
69+
InitSystemProbeConfig(cfg)
70+
assert.True(t, cfg.GetBool("discovery.use_sd_agent"))
71+
})
72+
73+
t.Run("disabled from env var", func(t *testing.T) {
74+
t.Setenv("DD_DISCOVERY_USE_SD_AGENT", "false")
75+
cfg := newEmptyMockConf(t)
76+
InitSystemProbeConfig(cfg)
77+
assert.False(t, cfg.GetBool("discovery.use_sd_agent"))
78+
})
79+
80+
t.Run("enabled from config", func(t *testing.T) {
81+
cfg := newEmptyMockConf(t)
82+
InitSystemProbeConfig(cfg)
83+
cfg.SetWithoutSource("discovery.use_sd_agent", true)
84+
assert.True(t, cfg.GetBool("discovery.use_sd_agent"))
85+
})
86+
}

pkg/discovery/module/impl_linux_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,7 @@ func TestRustBinary(t *testing.T) {
443443
}
444444

445445
env := os.Environ()
446+
env = append(env, "DD_DISCOVERY_USE_SD_AGENT=true")
446447
env = append(env, "DD_DISCOVERY_ENABLED=false")
447448
// Fake system-probe binary with empty configuration file
448449
cmd := exec.Command(binaryPath, "--", truePath, "-c", "/dev/null")

pkg/discovery/module/rust/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ rust_test(
201201
"MOCK_SYSTEM_PROBE": "$(rootpath :mock_system_probe)",
202202
},
203203
deps = [
204+
"@sdagent_crates//:nix",
204205
"@sdagent_crates//:tempfile",
205206
],
206207
)

pkg/discovery/module/rust/src/config.rs

Lines changed: 132 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -242,25 +242,24 @@ pub fn get_log_level(config: &Result<Option<Yaml>>) -> log::Level {
242242

243243
/// Determines whether to run sd-agent, fallback to system-probe, or exit cleanly
244244
pub fn determine_action(config: &Result<Option<Yaml>>) -> FallbackDecision {
245-
if has_non_discovery_env_vars() {
245+
let Some(yaml_doc) = config.as_ref().ok() else {
246+
warn!("Failed to load YAML config. Falling back to system-probe.");
246247
return FallbackDecision::FallbackToSystemProbe;
247-
}
248-
249-
// Use the pre-loaded config
250-
let yaml_doc = match config {
251-
Ok(Some(doc)) => Some(doc.clone()),
252-
Ok(None) => None,
253-
Err(_) => {
254-
warn!("Failed to load YAML config. Falling back to system-probe.");
255-
return FallbackDecision::FallbackToSystemProbe;
256-
}
257248
};
258-
259-
if has_non_discovery_yaml_keys(&yaml_doc) {
249+
let use_sd_agent = is_config_enabled(
250+
"DD_DISCOVERY_USE_SD_AGENT",
251+
"discovery.use_sd_agent",
252+
yaml_doc,
253+
);
254+
255+
if use_sd_agent.is_none_or(|enabled| !enabled)
256+
|| has_non_discovery_env_vars()
257+
|| has_non_discovery_yaml_keys(yaml_doc)
258+
{
260259
return FallbackDecision::FallbackToSystemProbe;
261260
}
262261

263-
if let Some(enabled) = is_config_enabled("DD_DISCOVERY_ENABLED", "discovery.enabled", &yaml_doc)
262+
if let Some(enabled) = is_config_enabled("DD_DISCOVERY_ENABLED", "discovery.enabled", yaml_doc)
264263
&& !enabled
265264
{
266265
return FallbackDecision::ExitCleanly;
@@ -295,10 +294,16 @@ mod tests {
295294

296295
#[test]
297296
fn test_discovery_only_no_fallback() {
298-
temp_env::with_var("DD_DISCOVERY_ENABLED", Some("true"), || {
299-
let decision = determine_action_no_config();
300-
assert_eq!(decision, FallbackDecision::RunSdAgent);
301-
});
297+
temp_env::with_vars(
298+
[
299+
("DD_DISCOVERY_ENABLED", Some("true")),
300+
("DD_DISCOVERY_USE_SD_AGENT", Some("true")),
301+
],
302+
|| {
303+
let decision = determine_action_no_config();
304+
assert_eq!(decision, FallbackDecision::RunSdAgent);
305+
},
306+
);
302307
}
303308

304309
#[test]
@@ -383,7 +388,7 @@ discovery:
383388

384389
#[test]
385390
fn test_no_modules_enabled() {
386-
temp_env::with_vars(Vec::<(String, Option<String>)>::new(), || {
391+
temp_env::with_vars([("DD_DISCOVERY_USE_SD_AGENT", Some("true"))], || {
387392
// Use an empty config file to avoid picking up system config at /etc/datadog-agent/system-probe.yaml
388393
let decision = determine_action_no_config();
389394
assert_eq!(decision, FallbackDecision::RunSdAgent);
@@ -566,7 +571,7 @@ system_probe_config:
566571

567572
#[test]
568573
fn test_determine_action_discovery_only_yaml() {
569-
temp_env::with_vars(Vec::<(String, Option<String>)>::new(), || {
574+
temp_env::with_vars([("DD_DISCOVERY_USE_SD_AGENT", Some("true"))], || {
570575
let yaml = r#"
571576
discovery:
572577
enabled: true
@@ -580,15 +585,21 @@ discovery:
580585

581586
#[test]
582587
fn test_determine_action_discovery_only_env() {
583-
temp_env::with_var("DD_DISCOVERY_ENABLED", Some("true"), || {
584-
let decision = determine_action_no_config();
585-
assert_eq!(decision, FallbackDecision::RunSdAgent);
586-
});
588+
temp_env::with_vars(
589+
[
590+
("DD_DISCOVERY_ENABLED", Some("true")),
591+
("DD_DISCOVERY_USE_SD_AGENT", Some("true")),
592+
],
593+
|| {
594+
let decision = determine_action_no_config();
595+
assert_eq!(decision, FallbackDecision::RunSdAgent);
596+
},
597+
);
587598
}
588599

589600
#[test]
590601
fn test_determine_action_no_config_runs_sd_agent() {
591-
temp_env::with_vars(Vec::<(String, Option<String>)>::new(), || {
602+
temp_env::with_vars([("DD_DISCOVERY_USE_SD_AGENT", Some("true"))], || {
592603
let decision = determine_action_no_config();
593604
assert!(decision == FallbackDecision::RunSdAgent);
594605
});
@@ -646,10 +657,16 @@ discovery:
646657

647658
#[test]
648659
fn test_determine_action_exit_cleanly_disabled_env() {
649-
temp_env::with_var("DD_DISCOVERY_ENABLED", Some("false"), || {
650-
let decision = determine_action_no_config();
651-
assert_eq!(decision, FallbackDecision::ExitCleanly);
652-
});
660+
temp_env::with_vars(
661+
[
662+
("DD_DISCOVERY_ENABLED", Some("false")),
663+
("DD_DISCOVERY_USE_SD_AGENT", Some("true")),
664+
],
665+
|| {
666+
let decision = determine_action_no_config();
667+
assert_eq!(decision, FallbackDecision::ExitCleanly);
668+
},
669+
);
653670
}
654671

655672
#[test]
@@ -947,4 +964,90 @@ log_level: 12345
947964
assert_eq!(level, log::Level::Error, "Should map 'off' to Error level");
948965
});
949966
}
967+
968+
// Killswitch tests
969+
#[test]
970+
fn test_killswitch_disabled_forces_fallback() {
971+
temp_env::with_vars(
972+
[
973+
("DD_DISCOVERY_USE_SD_AGENT", Some("false")),
974+
("DD_DISCOVERY_ENABLED", Some("true")),
975+
],
976+
|| {
977+
let decision = determine_action(&Ok(None));
978+
assert_eq!(
979+
decision,
980+
FallbackDecision::FallbackToSystemProbe,
981+
"Should fallback when killswitch is false via env var"
982+
);
983+
},
984+
);
985+
}
986+
987+
#[test]
988+
fn test_killswitch_not_set_defaults_to_fallback() {
989+
temp_env::with_var("DD_DISCOVERY_ENABLED", Some("true"), || {
990+
let decision = determine_action(&Ok(None));
991+
assert_eq!(
992+
decision,
993+
FallbackDecision::FallbackToSystemProbe,
994+
"Should fallback when killswitch is not set (safe default)"
995+
);
996+
});
997+
}
998+
999+
#[test]
1000+
fn test_killswitch_enabled_allows_sd_agent() {
1001+
temp_env::with_vars(
1002+
[
1003+
("DD_DISCOVERY_USE_SD_AGENT", Some("true")),
1004+
("DD_DISCOVERY_ENABLED", Some("true")),
1005+
],
1006+
|| {
1007+
let decision = determine_action(&Ok(None));
1008+
assert_eq!(
1009+
decision,
1010+
FallbackDecision::RunSdAgent,
1011+
"Should run sd-agent when killswitch is true via env var"
1012+
);
1013+
},
1014+
);
1015+
}
1016+
1017+
#[test]
1018+
fn test_killswitch_enabled_respects_other_fallback_logic() {
1019+
temp_env::with_vars(
1020+
[
1021+
("DD_DISCOVERY_USE_SD_AGENT", Some("true")),
1022+
("DD_DISCOVERY_ENABLED", Some("true")),
1023+
("DD_NETWORK_CONFIG_ENABLED", Some("true")), // Non-discovery module
1024+
],
1025+
|| {
1026+
let decision = determine_action(&Ok(None));
1027+
assert_eq!(
1028+
decision,
1029+
FallbackDecision::FallbackToSystemProbe,
1030+
"Should still fallback for non-discovery modules even when killswitch is true"
1031+
);
1032+
},
1033+
);
1034+
}
1035+
1036+
#[test]
1037+
fn test_killswitch_enabled_with_discovery_disabled_exits_cleanly() {
1038+
temp_env::with_vars(
1039+
[
1040+
("DD_DISCOVERY_USE_SD_AGENT", Some("true")),
1041+
("DD_DISCOVERY_ENABLED", Some("false")),
1042+
],
1043+
|| {
1044+
let decision = determine_action(&Ok(None));
1045+
assert_eq!(
1046+
decision,
1047+
FallbackDecision::ExitCleanly,
1048+
"Should exit cleanly when discovery is disabled, even if killswitch is true"
1049+
);
1050+
},
1051+
);
1052+
}
9501053
}

0 commit comments

Comments
 (0)