Skip to content

Commit fce747f

Browse files
fix: deliver execution-watchdog limit via config.json
1 parent 64e93c4 commit fce747f

6 files changed

Lines changed: 223 additions & 33 deletions

File tree

cmd/stepsecurity-dev-machine-guard/main.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ func main() {
232232
log.Error("Enterprise configuration not found. Run '%s configure' or download the script from your StepSecurity dashboard.", os.Args[0])
233233
os.Exit(1)
234234
}
235-
armExecutionWatchdog(telemetry.ExecutionDeadlineFromEnv(), log)
235+
armExecutionWatchdog(telemetry.ExecutionDeadline(config.MaxExecutionDuration), log)
236236
if err := telemetry.Run(exec, log, cfg); err != nil {
237237
log.Error("%v", err)
238238
os.Exit(1)
@@ -265,9 +265,19 @@ func main() {
265265
log.Error("Scheduled installation is not supported on %s", runtime.GOOS)
266266
os.Exit(1)
267267
}
268+
269+
// Persist the loader-exported max-execution duration into config.json so
270+
// scheduler-fired runs (launchd/systemd/schtasks) — which invoke the
271+
// binary directly and never inherit the loader's exported env var — arm
272+
// the watchdog with the same value. Best-effort: a write failure just
273+
// means scheduled runs fall back to the binary's built-in default.
274+
if err := config.PersistMaxExecutionDuration(os.Getenv(telemetry.EnvMaxExecutionDuration)); err != nil {
275+
log.Warn("failed to persist max execution duration to config (%v) — scheduled runs will use the built-in default", err)
276+
}
277+
268278
log.Progress("Sending initial telemetry...")
269279
fmt.Println()
270-
armExecutionWatchdog(telemetry.ExecutionDeadlineFromEnv(), log)
280+
armExecutionWatchdog(telemetry.ExecutionDeadline(config.MaxExecutionDuration), log)
271281
telemetryErr := telemetry.Run(exec, log, cfg)
272282

273283
// On Linux, systemd.Install enabled the timer but did not start it.
@@ -371,7 +381,7 @@ func main() {
371381
}
372382
case config.IsEnterpriseMode():
373383
log.Debug("dispatch: enterprise telemetry (auto-detected)")
374-
armExecutionWatchdog(telemetry.ExecutionDeadlineFromEnv(), log)
384+
armExecutionWatchdog(telemetry.ExecutionDeadline(config.MaxExecutionDuration), log)
375385
if err := telemetry.Run(exec, log, cfg); err != nil {
376386
log.Error("%v", err)
377387
os.Exit(1)

internal/config/config.go

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,22 +27,33 @@ var (
2727
InstallDir string // "" means default (~/.stepsecurity); non-empty makes the agent put all its files (logs, hook errors, future state) under this directory. Bootstrap config.json itself stays at the legacy location. Per-run opt-out is the CLI flag --install-dir=. Resolution: --install-dir flag > STEPSECURITY_HOME env > this field > default — see internal/paths.
2828
)
2929

30+
// MaxExecutionDuration is the whole-process execution-watchdog limit
31+
// (STEPSEC_MAX_EXECUTION_DURATION). Persisted into config.json at install time
32+
// so scheduler-fired runs (launchd/systemd/schtasks) — which invoke the binary
33+
// directly and never inherit the loader-exported env var — resolve the same
34+
// value the loader configured. "" means fall back to the binary's built-in
35+
// default (4h). Declared in its own var block (not the placeholder group
36+
// above) because it carries no build-time {{...}} placeholder. See
37+
// telemetry.ExecutionDeadline.
38+
var MaxExecutionDuration string
39+
3040
// ConfigFile is the JSON structure persisted to ~/.stepsecurity/config.json.
3141
type ConfigFile struct {
32-
CustomerID string `json:"customer_id,omitempty"`
33-
APIEndpoint string `json:"api_endpoint,omitempty"`
34-
APIKey string `json:"api_key,omitempty"`
35-
ScanFrequencyHours string `json:"scan_frequency_hours,omitempty"`
36-
SearchDirs []string `json:"search_dirs,omitempty"`
37-
EnableNPMScan *bool `json:"enable_npm_scan,omitempty"`
38-
EnableBrewScan *bool `json:"enable_brew_scan,omitempty"`
39-
EnablePythonScan *bool `json:"enable_python_scan,omitempty"`
40-
IncludeTCCProtected *bool `json:"include_tcc_protected,omitempty"`
41-
ColorMode string `json:"color_mode,omitempty"`
42-
OutputFormat string `json:"output_format,omitempty"`
43-
HTMLOutputFile string `json:"html_output_file,omitempty"`
44-
LogLevel string `json:"log_level,omitempty"`
45-
InstallDir string `json:"install_dir,omitempty"`
42+
CustomerID string `json:"customer_id,omitempty"`
43+
APIEndpoint string `json:"api_endpoint,omitempty"`
44+
APIKey string `json:"api_key,omitempty"`
45+
ScanFrequencyHours string `json:"scan_frequency_hours,omitempty"`
46+
SearchDirs []string `json:"search_dirs,omitempty"`
47+
EnableNPMScan *bool `json:"enable_npm_scan,omitempty"`
48+
EnableBrewScan *bool `json:"enable_brew_scan,omitempty"`
49+
EnablePythonScan *bool `json:"enable_python_scan,omitempty"`
50+
IncludeTCCProtected *bool `json:"include_tcc_protected,omitempty"`
51+
ColorMode string `json:"color_mode,omitempty"`
52+
OutputFormat string `json:"output_format,omitempty"`
53+
HTMLOutputFile string `json:"html_output_file,omitempty"`
54+
LogLevel string `json:"log_level,omitempty"`
55+
InstallDir string `json:"install_dir,omitempty"`
56+
MaxExecutionDuration string `json:"max_execution_duration,omitempty"`
4657
}
4758

4859
// userConfigDir returns ~/.stepsecurity — the per-user config location.
@@ -172,6 +183,9 @@ func Load() {
172183
if cfg.InstallDir != "" && InstallDir == "" {
173184
InstallDir = cfg.InstallDir
174185
}
186+
if cfg.MaxExecutionDuration != "" && MaxExecutionDuration == "" {
187+
MaxExecutionDuration = cfg.MaxExecutionDuration
188+
}
175189
}
176190

177191
// IsEnterpriseMode returns true if valid enterprise credentials are configured.
@@ -631,3 +645,24 @@ func RunConfigureNonInteractive(opts NonInteractiveOptions) error {
631645
fmt.Printf("Configuration saved to %s\n", WriteConfigFilePath())
632646
return nil
633647
}
648+
649+
// PersistMaxExecutionDuration records the STEPSEC_MAX_EXECUTION_DURATION value
650+
// the loader exported into config.json at install time. Scheduler-fired runs
651+
// (launchd/systemd/schtasks) invoke the binary directly and never inherit the
652+
// loader's exported env var, so without this they fall back to the built-in 4h
653+
// default regardless of the loader's MAX_EXECUTION_DURATION_HOURS. Persisting
654+
// it lets telemetry.ExecutionDeadline pick it up on every scheduled run.
655+
// Read-modify-write so the loader-written customer_id/api_key/etc. survive.
656+
// No-op when value is empty (a direct binary install with no loader-configured
657+
// value keeps the built-in default).
658+
func PersistMaxExecutionDuration(value string) error {
659+
if value == "" {
660+
return nil
661+
}
662+
existing := loadExisting()
663+
if existing.MaxExecutionDuration == value {
664+
return nil
665+
}
666+
existing.MaxExecutionDuration = value
667+
return save(existing)
668+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package config
2+
3+
import "testing"
4+
5+
// PersistMaxExecutionDuration is a read-modify-write: it records the loader's
6+
// max-execution value into config.json without disturbing the customer_id /
7+
// api_key / etc. the loader already wrote.
8+
func TestPersistMaxExecutionDuration_RoundTrip(t *testing.T) {
9+
withHome(t)
10+
11+
// Seed config.json the way the loader's write_config would (no max-exec
12+
// field), so we prove Persist preserves the existing fields.
13+
seed := &ConfigFile{CustomerID: "acme", APIKey: "k", APIEndpoint: "https://api"}
14+
if err := save(seed); err != nil {
15+
t.Fatalf("seed save: %v", err)
16+
}
17+
18+
if err := PersistMaxExecutionDuration("2h"); err != nil {
19+
t.Fatalf("PersistMaxExecutionDuration: %v", err)
20+
}
21+
22+
got := loadExisting()
23+
if got.MaxExecutionDuration != "2h" {
24+
t.Errorf("MaxExecutionDuration = %q, want %q", got.MaxExecutionDuration, "2h")
25+
}
26+
if got.CustomerID != "acme" || got.APIKey != "k" || got.APIEndpoint != "https://api" {
27+
t.Errorf("read-modify-write clobbered existing fields: %+v", got)
28+
}
29+
}
30+
31+
// An empty value is a no-op (a direct binary install with no loader-exported
32+
// value must not write an empty field that would later parse to the default).
33+
func TestPersistMaxExecutionDuration_EmptyIsNoOp(t *testing.T) {
34+
withHome(t)
35+
if err := save(&ConfigFile{CustomerID: "acme"}); err != nil {
36+
t.Fatalf("seed save: %v", err)
37+
}
38+
39+
if err := PersistMaxExecutionDuration(""); err != nil {
40+
t.Fatalf("empty should be a no-op, got: %v", err)
41+
}
42+
43+
if got := loadExisting(); got.MaxExecutionDuration != "" {
44+
t.Errorf("empty value should not be persisted, got %q", got.MaxExecutionDuration)
45+
}
46+
}
47+
48+
// Load() must surface a persisted max-execution value into the package var the
49+
// resolver reads on scheduler-fired runs.
50+
func TestLoad_PopulatesMaxExecutionDuration(t *testing.T) {
51+
withHome(t)
52+
seed := &ConfigFile{
53+
CustomerID: "acme",
54+
APIKey: "k",
55+
APIEndpoint: "https://api",
56+
MaxExecutionDuration: "90m",
57+
}
58+
if err := save(seed); err != nil {
59+
t.Fatalf("seed save: %v", err)
60+
}
61+
62+
// Load only fills package vars still at their zero value; reset so this
63+
// test is independent of execution order within the package.
64+
MaxExecutionDuration = ""
65+
t.Cleanup(func() { MaxExecutionDuration = "" })
66+
67+
Load()
68+
69+
if MaxExecutionDuration != "90m" {
70+
t.Errorf("Load did not populate MaxExecutionDuration: got %q, want %q", MaxExecutionDuration, "90m")
71+
}
72+
}

internal/telemetry/execution_deadline.go

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,60 @@ import (
1616
// bounding pathological cases to a single daily launchd/schtasks tick.
1717
const defaultExecutionDeadline = 4 * time.Hour
1818

19-
// ExecutionDeadlineFromEnv resolves STEPSEC_MAX_EXECUTION_DURATION using
20-
// the same contract as scanDeadlineFromEnv (scan_deadline.go):
21-
// - unset / empty: returns defaultExecutionDeadline (4h)
22-
// - "0" / "off": returns 0 (watchdog disabled; main.go skips arming)
23-
// - any Go time.ParseDuration string ("2h", "30m", "45m30s"): that
24-
// duration when positive
25-
// - anything else: returns defaultExecutionDeadline (silent fallback —
26-
// telemetry runs from unattended launchd/schtasks/systemd contexts
27-
// where a typo in the env var should not be fatal to the scan)
19+
// EnvMaxExecutionDuration is an optional environment-variable override for the
20+
// execution watchdog, honored ahead of config.json for ad-hoc/manual runs
21+
// (e.g. `STEPSEC_MAX_EXECUTION_DURATION=3s ./binary send-telemetry`). The
22+
// loader no longer exports it: the configured value is delivered through
23+
// config.json (config.MaxExecutionDuration), which the binary reads on every
24+
// invocation — including scheduler-fired runs (launchd/systemd/schtasks) that
25+
// bypass the loader.
26+
const EnvMaxExecutionDuration = "STEPSEC_MAX_EXECUTION_DURATION"
27+
28+
// ExecutionDeadlineFromEnv resolves the execution deadline from the environment
29+
// only. Equivalent to ExecutionDeadline(""); kept for callers (and tests) that
30+
// have no config fallback to supply.
2831
func ExecutionDeadlineFromEnv() time.Duration {
29-
v := os.Getenv("STEPSEC_MAX_EXECUTION_DURATION")
32+
return ExecutionDeadline("")
33+
}
34+
35+
// ExecutionDeadline resolves the whole-process execution deadline with
36+
// env > config > default precedence. The env var (EnvMaxExecutionDuration) is
37+
// an optional ad-hoc override; configValue is the value the loader/installer
38+
// persists into config.json (config.MaxExecutionDuration), the primary channel
39+
// — it covers every invocation, including scheduler-fired runs
40+
// (launchd/systemd/schtasks) that invoke the binary directly. Each source uses
41+
// the same contract as scanDeadlineFromEnv (scan_deadline.go):
42+
// - "0" / "off": 0 (watchdog disabled; main.go skips arming)
43+
// - any positive Go time.ParseDuration string ("2h", "30m", "45m30s"): that
44+
// duration
45+
// - empty or unparseable: fall through to the next source, then to
46+
// defaultExecutionDeadline (a typo in an unattended launchd/schtasks/systemd
47+
// context must not be fatal to the scan)
48+
func ExecutionDeadline(configValue string) time.Duration {
49+
if d, ok := parseExecutionDeadline(os.Getenv(EnvMaxExecutionDuration)); ok {
50+
return d
51+
}
52+
if d, ok := parseExecutionDeadline(configValue); ok {
53+
return d
54+
}
55+
return defaultExecutionDeadline
56+
}
57+
58+
// parseExecutionDeadline applies the shared parse contract to a single raw
59+
// value. ok is false when the value is absent or unparseable — signalling the
60+
// caller to fall through to the next source; true (with a possibly-zero
61+
// duration) when the value explicitly resolved, including the "0"/"off" disable
62+
// case.
63+
func parseExecutionDeadline(v string) (time.Duration, bool) {
3064
if v == "" {
31-
return defaultExecutionDeadline
65+
return 0, false
3266
}
3367
if v == "0" || v == "off" {
34-
return 0
68+
return 0, true
3569
}
3670
d, err := time.ParseDuration(v)
3771
if err != nil || d <= 0 {
38-
return defaultExecutionDeadline
72+
return 0, false
3973
}
40-
return d
74+
return d, true
4175
}

internal/telemetry/execution_deadline_test.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,39 @@ func TestExecutionDeadlineFromEnv(t *testing.T) {
3838
})
3939
}
4040
}
41+
42+
// ExecutionDeadline adds a config-file fallback for scheduler-fired runs that
43+
// never see the loader-exported env var: env > config > default.
44+
func TestExecutionDeadline_EnvThenConfigThenDefault(t *testing.T) {
45+
cases := []struct {
46+
name string
47+
env string
48+
setEnv bool
49+
configVal string
50+
want time.Duration
51+
}{
52+
// Env present and valid always wins over config.
53+
{"env wins over config", "2h", true, "8h", 2 * time.Hour},
54+
{"env off disables despite config", "off", true, "8h", 0},
55+
// Env absent/unparseable falls through to the config value.
56+
{"config used when env unset", "", false, "8h", 8 * time.Hour},
57+
{"config used when env empty", "", true, "8h", 8 * time.Hour},
58+
{"config off disables when env unset", "", false, "off", 0},
59+
{"env junk falls through to config", "junk", true, "30m", 30 * time.Minute},
60+
// Neither source usable → built-in default.
61+
{"config junk falls back to default", "", false, "junk", 4 * time.Hour},
62+
{"both empty falls back to default", "", false, "", 4 * time.Hour},
63+
}
64+
for _, tc := range cases {
65+
t.Run(tc.name, func(t *testing.T) {
66+
if tc.setEnv {
67+
t.Setenv("STEPSEC_MAX_EXECUTION_DURATION", tc.env)
68+
}
69+
got := ExecutionDeadline(tc.configVal)
70+
if got != tc.want {
71+
t.Errorf("ExecutionDeadline(%q) env=%q set=%v = %v, want %v",
72+
tc.configVal, tc.env, tc.setEnv, got, tc.want)
73+
}
74+
})
75+
}
76+
}

internal/telemetry/run_status.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,11 @@ const (
3333
// runStatusHeartbeatInterval is how often the telemetry run posts a
3434
// status_info snapshot while a scan is in flight. Phase-boundary posts
3535
// fire on top of this so a fast run still surfaces phase completions
36-
// without waiting for the next tick.
37-
runStatusHeartbeatInterval = 5 * time.Minute
36+
// without waiting for the next tick. 2 minutes (matching the log-tail
37+
// emitter's throttle, log_tail_emitter.go) gives tighter visibility into
38+
// a stuck device — the last heartbeat is at most ~2 min stale — without
39+
// meaningfully adding to backend write volume for healthy short scans.
40+
runStatusHeartbeatInterval = 2 * time.Minute
3841
)
3942

4043
// runStatusBody is the JSON shape posted to /telemetry/run-status. Fields

0 commit comments

Comments
 (0)