Skip to content

Commit 2ee3faa

Browse files
authored
Merge pull request #123 from shubham-stepsecurity/sm/fix
fix: harden macOS scans against IDE pop-ups and stuck processes
2 parents 4adfd33 + eb61ae3 commit 2ee3faa

25 files changed

Lines changed: 1575 additions & 113 deletions

.gitignore

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,15 @@
2020
!docs/**/*.html
2121
!images/**/*.html
2222

23-
# Go build artifacts — never commit compiled binaries
24-
**/stepsecurity-dev-machine-guard
23+
# Go build artifacts — never commit compiled binaries. Anchored to repo
24+
# root + the cmd/ subdirs so the pattern doesn't accidentally match the
25+
# source directory cmd/stepsecurity-dev-machine-guard/ and hide every
26+
# file inside it (the previous unanchored "**/stepsecurity-dev-machine-guard"
27+
# matched the directory itself, with the gitignore side-effect of
28+
# excluding all its contents from new tracking).
29+
/stepsecurity-dev-machine-guard
30+
/cmd/stepsecurity-dev-machine-guard/stepsecurity-dev-machine-guard
31+
/cmd/stepsecurity-dev-machine-guard-task/stepsecurity-dev-machine-guard-task
2532
*.exe
2633
dist/
2734
stepsecurity-dev-machine-guard-linux

.tool-versions

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
golang 1.24.13

cmd/stepsecurity-dev-machine-guard/main.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ func main() {
232232
log.Error("Enterprise configuration not found. Run '%s configure' or download the script from your StepSecurity dashboard.", os.Args[0])
233233
os.Exit(1)
234234
}
235+
armExecutionWatchdog(telemetry.ExecutionDeadline(config.MaxExecutionDuration), log)
235236
if err := telemetry.Run(exec, log, cfg); err != nil {
236237
log.Error("%v", err)
237238
os.Exit(1)
@@ -264,8 +265,19 @@ func main() {
264265
log.Error("Scheduled installation is not supported on %s", runtime.GOOS)
265266
os.Exit(1)
266267
}
268+
269+
// Persist the loader-exported max-execution duration into config.json so
270+
// scheduler-fired runs (launchd/systemd/schtasks) — which invoke the
271+
// binary directly and never inherit the loader's exported env var — arm
272+
// the watchdog with the same value. Best-effort: a write failure just
273+
// means scheduled runs fall back to the binary's built-in default.
274+
if err := config.PersistMaxExecutionDuration(os.Getenv(telemetry.EnvMaxExecutionDuration)); err != nil {
275+
log.Warn("failed to persist max execution duration to config (%v) — scheduled runs will use the built-in default", err)
276+
}
277+
267278
log.Progress("Sending initial telemetry...")
268279
fmt.Println()
280+
armExecutionWatchdog(telemetry.ExecutionDeadline(config.MaxExecutionDuration), log)
269281
telemetryErr := telemetry.Run(exec, log, cfg)
270282

271283
// On Linux, systemd.Install enabled the timer but did not start it.
@@ -369,6 +381,7 @@ func main() {
369381
}
370382
case config.IsEnterpriseMode():
371383
log.Debug("dispatch: enterprise telemetry (auto-detected)")
384+
armExecutionWatchdog(telemetry.ExecutionDeadline(config.MaxExecutionDuration), log)
372385
if err := telemetry.Run(exec, log, cfg); err != nil {
373386
log.Error("%v", err)
374387
os.Exit(1)
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
//go:build !windows
2+
3+
package main
4+
5+
import (
6+
"os"
7+
"syscall"
8+
"time"
9+
10+
"github.com/step-security/dev-machine-guard/internal/progress"
11+
)
12+
13+
// armExecutionWatchdog spawns a single-shot goroutine that, after d
14+
// elapses, signals the process to exit. Graceful first (SIGTERM so the
15+
// signal handler at telemetry.go:259 posts the failure row, releases the
16+
// lock, and flushes the log tail), then a 5s hard-out via os.Exit(2) if
17+
// SIGTERM didn't take.
18+
//
19+
// The watchdog defends against goroutine wedges that the scan-deadline
20+
// context cancel can't reach: a hung HTTP upload bound to a stuck TCP
21+
// socket; an unreaped grandchild that slipped past the PR-122
22+
// process-group fix; a future leak we haven't anticipated. The scan
23+
// deadline (STEPSEC_MAX_SCAN_DURATION, 60min default) bounds the scan
24+
// body; this bounds the whole process.
25+
//
26+
// Goroutine lives in main rather than telemetry.Run so it survives any
27+
// panic/recover and any code path that doesn't return. No cancellation
28+
// path: the watchdog must fire even when the rest of the process is
29+
// wedged.
30+
//
31+
// d <= 0 disables the watchdog (caller passes the result of
32+
// telemetry.ExecutionDeadlineFromEnv which already returns 0 for the
33+
// "off"/"0" env settings).
34+
func armExecutionWatchdog(d time.Duration, log *progress.Logger) {
35+
if d <= 0 {
36+
return
37+
}
38+
go func() {
39+
time.Sleep(d)
40+
log.Warn("execution-watchdog: max duration %s exceeded — initiating shutdown", d)
41+
_ = syscall.Kill(os.Getpid(), syscall.SIGTERM)
42+
time.Sleep(5 * time.Second)
43+
log.Error("execution-watchdog: SIGTERM did not exit in 5s — hard kill")
44+
os.Exit(2)
45+
}()
46+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
//go:build windows
2+
3+
package main
4+
5+
import (
6+
"os"
7+
"time"
8+
9+
"github.com/step-security/dev-machine-guard/internal/progress"
10+
)
11+
12+
// armExecutionWatchdog (Windows): same intent as the Unix version but
13+
// skips the SIGTERM step. Windows console-signal mechanics (CTRL_C_EVENT
14+
// via os.Process.Signal(os.Interrupt)) only reach console-attached
15+
// processes, and the scheduled-task path under
16+
// stepsecurity-dev-machine-guard-task.exe runs without a console. Calling
17+
// os.Exit(2) directly skips the deferred lock release; the next launch's
18+
// loader-side stale-process killer (scripts/windows-device-agent/
19+
// stepsecurity-loader.ps1 Stop-StaleDmgProcesses) plus lock.Acquire's
20+
// own dead-PID handling clean up any stale state.
21+
func armExecutionWatchdog(d time.Duration, log *progress.Logger) {
22+
if d <= 0 {
23+
return
24+
}
25+
go func() {
26+
time.Sleep(d)
27+
log.Warn("execution-watchdog: max duration %s exceeded — exiting", d)
28+
os.Exit(2)
29+
}()
30+
}

internal/config/config.go

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,22 +27,33 @@ var (
2727
InstallDir string // "" means default (~/.stepsecurity); non-empty makes the agent put all its files (logs, hook errors, future state) under this directory. Bootstrap config.json itself stays at the legacy location. Per-run opt-out is the CLI flag --install-dir=. Resolution: --install-dir flag > STEPSECURITY_HOME env > this field > default — see internal/paths.
2828
)
2929

30+
// MaxExecutionDuration is the whole-process execution-watchdog limit
31+
// (STEPSEC_MAX_EXECUTION_DURATION). Persisted into config.json at install time
32+
// so scheduler-fired runs (launchd/systemd/schtasks) — which invoke the binary
33+
// directly and never inherit the loader-exported env var — resolve the same
34+
// value the loader configured. "" means fall back to the binary's built-in
35+
// default (4h). Declared in its own var block (not the placeholder group
36+
// above) because it carries no build-time {{...}} placeholder. See
37+
// telemetry.ExecutionDeadline.
38+
var MaxExecutionDuration string
39+
3040
// ConfigFile is the JSON structure persisted to ~/.stepsecurity/config.json.
3141
type ConfigFile struct {
32-
CustomerID string `json:"customer_id,omitempty"`
33-
APIEndpoint string `json:"api_endpoint,omitempty"`
34-
APIKey string `json:"api_key,omitempty"`
35-
ScanFrequencyHours string `json:"scan_frequency_hours,omitempty"`
36-
SearchDirs []string `json:"search_dirs,omitempty"`
37-
EnableNPMScan *bool `json:"enable_npm_scan,omitempty"`
38-
EnableBrewScan *bool `json:"enable_brew_scan,omitempty"`
39-
EnablePythonScan *bool `json:"enable_python_scan,omitempty"`
40-
IncludeTCCProtected *bool `json:"include_tcc_protected,omitempty"`
41-
ColorMode string `json:"color_mode,omitempty"`
42-
OutputFormat string `json:"output_format,omitempty"`
43-
HTMLOutputFile string `json:"html_output_file,omitempty"`
44-
LogLevel string `json:"log_level,omitempty"`
45-
InstallDir string `json:"install_dir,omitempty"`
42+
CustomerID string `json:"customer_id,omitempty"`
43+
APIEndpoint string `json:"api_endpoint,omitempty"`
44+
APIKey string `json:"api_key,omitempty"`
45+
ScanFrequencyHours string `json:"scan_frequency_hours,omitempty"`
46+
SearchDirs []string `json:"search_dirs,omitempty"`
47+
EnableNPMScan *bool `json:"enable_npm_scan,omitempty"`
48+
EnableBrewScan *bool `json:"enable_brew_scan,omitempty"`
49+
EnablePythonScan *bool `json:"enable_python_scan,omitempty"`
50+
IncludeTCCProtected *bool `json:"include_tcc_protected,omitempty"`
51+
ColorMode string `json:"color_mode,omitempty"`
52+
OutputFormat string `json:"output_format,omitempty"`
53+
HTMLOutputFile string `json:"html_output_file,omitempty"`
54+
LogLevel string `json:"log_level,omitempty"`
55+
InstallDir string `json:"install_dir,omitempty"`
56+
MaxExecutionDuration string `json:"max_execution_duration,omitempty"`
4657
}
4758

4859
// userConfigDir returns ~/.stepsecurity — the per-user config location.
@@ -172,6 +183,9 @@ func Load() {
172183
if cfg.InstallDir != "" && InstallDir == "" {
173184
InstallDir = cfg.InstallDir
174185
}
186+
if cfg.MaxExecutionDuration != "" && MaxExecutionDuration == "" {
187+
MaxExecutionDuration = cfg.MaxExecutionDuration
188+
}
175189
}
176190

177191
// IsEnterpriseMode returns true if valid enterprise credentials are configured.
@@ -642,3 +656,24 @@ func RunConfigureNonInteractive(opts NonInteractiveOptions) error {
642656
fmt.Printf("Configuration saved to %s\n", WriteConfigFilePath())
643657
return nil
644658
}
659+
660+
// PersistMaxExecutionDuration records the STEPSEC_MAX_EXECUTION_DURATION value
661+
// the loader exported into config.json at install time. Scheduler-fired runs
662+
// (launchd/systemd/schtasks) invoke the binary directly and never inherit the
663+
// loader's exported env var, so without this they fall back to the built-in 4h
664+
// default regardless of the loader's MAX_EXECUTION_DURATION_HOURS. Persisting
665+
// it lets telemetry.ExecutionDeadline pick it up on every scheduled run.
666+
// Read-modify-write so the loader-written customer_id/api_key/etc. survive.
667+
// No-op when value is empty (a direct binary install with no loader-configured
668+
// value keeps the built-in default).
669+
func PersistMaxExecutionDuration(value string) error {
670+
if value == "" {
671+
return nil
672+
}
673+
existing := loadExisting()
674+
if existing.MaxExecutionDuration == value {
675+
return nil
676+
}
677+
existing.MaxExecutionDuration = value
678+
return save(existing)
679+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package config
2+
3+
import "testing"
4+
5+
// PersistMaxExecutionDuration is a read-modify-write: it records the loader's
6+
// max-execution value into config.json without disturbing the customer_id /
7+
// api_key / etc. the loader already wrote.
8+
func TestPersistMaxExecutionDuration_RoundTrip(t *testing.T) {
9+
withHome(t)
10+
11+
// Seed config.json the way the loader's write_config would (no max-exec
12+
// field), so we prove Persist preserves the existing fields.
13+
seed := &ConfigFile{CustomerID: "acme", APIKey: "k", APIEndpoint: "https://api"}
14+
if err := save(seed); err != nil {
15+
t.Fatalf("seed save: %v", err)
16+
}
17+
18+
if err := PersistMaxExecutionDuration("2h"); err != nil {
19+
t.Fatalf("PersistMaxExecutionDuration: %v", err)
20+
}
21+
22+
got := loadExisting()
23+
if got.MaxExecutionDuration != "2h" {
24+
t.Errorf("MaxExecutionDuration = %q, want %q", got.MaxExecutionDuration, "2h")
25+
}
26+
if got.CustomerID != "acme" || got.APIKey != "k" || got.APIEndpoint != "https://api" {
27+
t.Errorf("read-modify-write clobbered existing fields: %+v", got)
28+
}
29+
}
30+
31+
// An empty value is a no-op (a direct binary install with no loader-exported
32+
// value must not write an empty field that would later parse to the default).
33+
func TestPersistMaxExecutionDuration_EmptyIsNoOp(t *testing.T) {
34+
withHome(t)
35+
if err := save(&ConfigFile{CustomerID: "acme"}); err != nil {
36+
t.Fatalf("seed save: %v", err)
37+
}
38+
39+
if err := PersistMaxExecutionDuration(""); err != nil {
40+
t.Fatalf("empty should be a no-op, got: %v", err)
41+
}
42+
43+
if got := loadExisting(); got.MaxExecutionDuration != "" {
44+
t.Errorf("empty value should not be persisted, got %q", got.MaxExecutionDuration)
45+
}
46+
}
47+
48+
// Load() must surface a persisted max-execution value into the package var the
49+
// resolver reads on scheduler-fired runs.
50+
func TestLoad_PopulatesMaxExecutionDuration(t *testing.T) {
51+
withHome(t)
52+
seed := &ConfigFile{
53+
CustomerID: "acme",
54+
APIKey: "k",
55+
APIEndpoint: "https://api",
56+
MaxExecutionDuration: "90m",
57+
}
58+
if err := save(seed); err != nil {
59+
t.Fatalf("seed save: %v", err)
60+
}
61+
62+
// Load only fills package vars still at their zero value; reset so this
63+
// test is independent of execution order within the package.
64+
MaxExecutionDuration = ""
65+
t.Cleanup(func() { MaxExecutionDuration = "" })
66+
67+
Load()
68+
69+
if MaxExecutionDuration != "90m" {
70+
t.Errorf("Load did not populate MaxExecutionDuration: got %q, want %q", MaxExecutionDuration, "90m")
71+
}
72+
}

0 commit comments

Comments
 (0)