Skip to content

Commit df024ee

Browse files
feat(remediation): free-core remediation governance + execute/rollback + admin user-mgmt (#601)
Lands the full remediation stack (formerly #599 + #600 + #601) in one squash merge. - Governance (was #599): request/approve/reject workflow + projected-lift estimate; remediation_requests + remediation_transactions (migration 0037). - Admin user management (was #600): admin password reset + account disable/enable; users.disabled_at (migration 0038), login rejects disabled accounts, sessions revoked on disable. - Execution engine (was #601): per-rule Fix + rollback over Kensa v0.5.1 (pkg/kensa.Default Remediate/Rollback), queued RemediationWorker, host-detail Fix button. Single-rule manual execute/rollback is FREE CORE; bulk/auto remediation is the licensed track (license.EnforceFeature(remediation_execution)). Validated against main's 100% Specter gate (structural + outcome). Two stale-test gaps that #600/#601 never gate-tested (they targeted intermediate stack branches; go-ci only runs on PRs to main) were fixed: system-rbac/AC-07 (remediation:execute is ungated) and api-users/AC-16-18 (per-AC subtest tokens).
1 parent dd921b2 commit df024ee

64 files changed

Lines changed: 8359 additions & 535 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

api/openapi.yaml

Lines changed: 519 additions & 2 deletions
Large diffs are not rendered by default.

audit/events.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -952,6 +952,31 @@ events:
952952
- code: admin.user.deleted
953953
severity: warning
954954

955+
- code: admin.user.password_reset
956+
severity: warning
957+
description: An administrator reset another user's (or their own) password.
958+
detail_schema:
959+
type: object
960+
properties:
961+
target_user_id: {type: string}
962+
self: {type: boolean}
963+
964+
- code: admin.user.disabled
965+
severity: warning
966+
description: An administrator disabled a user account (cannot authenticate).
967+
detail_schema:
968+
type: object
969+
properties:
970+
target_user_id: {type: string}
971+
972+
- code: admin.user.enabled
973+
severity: warning
974+
description: An administrator re-enabled a previously disabled user account.
975+
detail_schema:
976+
type: object
977+
properties:
978+
target_user_id: {type: string}
979+
955980
- code: admin.role.changed
956981
severity: warning
957982

auth/permissions.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -313,15 +313,13 @@ permissions:
313313

314314
- id: remediation:execute
315315
category: remediation
316-
description: Execute an approved remediation against hosts
316+
description: Execute an approved single-rule remediation against a host (free core)
317317
dangerous: true
318-
license_gated: remediation_execution
319318

320319
- id: remediation:rollback
321320
category: remediation
322-
description: Roll back a previously executed remediation
321+
description: Roll back a previously executed remediation (free core)
323322
dangerous: true
324-
license_gated: remediation_execution
325323

326324
# =========================================================================
327325
# integration - plugins and webhooks
@@ -511,6 +509,8 @@ roles:
511509
- policy:read
512510
- remediation:read
513511
- remediation:request
512+
- remediation:execute
513+
- remediation:rollback
514514
- integration:read
515515
- audit:read
516516
- system:read

cmd/openwatch/main.go

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"log/slog"
1818
"os"
1919
"os/signal"
20+
"path/filepath"
2021
"runtime"
2122
"syscall"
2223
"time"
@@ -50,6 +51,7 @@ import (
5051
openlog "github.com/Hanalyx/openwatch/internal/log"
5152
"github.com/Hanalyx/openwatch/internal/notification"
5253
"github.com/Hanalyx/openwatch/internal/posture"
54+
"github.com/Hanalyx/openwatch/internal/remediation"
5355
"github.com/Hanalyx/openwatch/internal/report"
5456
"github.com/Hanalyx/openwatch/internal/scanresult"
5557
compsched "github.com/Hanalyx/openwatch/internal/scheduler"
@@ -573,10 +575,54 @@ func cmdServe(cfg *config.Config, _ []string, stdout, stderr *os.File) int {
573575
exceptionSvc := exception.NewService(pool, audit.Emit)
574576
exceptionSvc.Run(ctx, 0)
575577

578+
// Remediation governance: request/approve/reject + projected lift (free
579+
// core), AND the queued single-rule execute/rollback (Tier A free core).
580+
// Spec api-remediation.
581+
remediationSvc := remediation.NewService(pool, audit.Emit)
582+
remTxWriter := transactionlog.NewWriter(pool, audit.Emit)
583+
584+
// Remediation execution executor: shares the scan executor's per-host
585+
// inFlight guard by chaining WithRemediateFunc onto it (so a host is never
586+
// scanned + remediated at the same instant). The apply-enabled Kensa needs
587+
// a durable SQLite store for rollback pre-state — derive a path from the
588+
// kensa store env (dev default under the working dir).
589+
remExecutor := scanExecutor
590+
if remFn, rbFn, remErr := kensa.NewProductionRemediateFunc(bootCtx, kensa.RemediateFuncDeps{
591+
Pool: pool,
592+
Credentials: credSvc,
593+
RulesDir: scanRulesDir,
594+
HostKeyMode: owssh.ModeTOFU,
595+
KnownHosts: knownhosts.NewStore(pool),
596+
Variables: func(ctx context.Context) (map[string]string, error) {
597+
vars, err := cfgStore.LoadScanVars(ctx)
598+
return vars, err
599+
},
600+
Profiles: connStore,
601+
Policy: func(ctx context.Context) (bool, error) {
602+
cfg, err := cfgStore.LoadSecurity(ctx)
603+
return cfg.AllowCredentialSudoPassword, err
604+
},
605+
StorePath: kensaStorePath(bootCtx),
606+
}); remErr != nil {
607+
slog.WarnContext(bootCtx, "kensa remediation wiring unavailable — remediation execute/rollback will fail until the kensa-rules package is installed (or OPENWATCH_KENSA_RULES_DIR set)",
608+
slog.String("error", remErr.Error()))
609+
} else {
610+
remExecutor = remExecutor.WithRemediateFunc(remFn, rbFn)
611+
}
612+
remediationWorker := worker.NewRemediationWorker(worker.RemediationConfig{
613+
Pool: pool,
614+
Executor: remExecutor,
615+
Service: remediationSvc,
616+
Writer: remTxWriter,
617+
QueueKey: scanQueueKey,
618+
Bus: bus,
619+
Emit: audit.Emit,
620+
})
621+
576622
scanWorker := worker.NewScanWorker(worker.Config{
577623
Pool: pool,
578624
Executor: scanExecutor,
579-
Writer: transactionlog.NewWriter(pool, audit.Emit),
625+
Writer: remTxWriter,
580626
ScanResults: scanresult.NewWriter(pool),
581627
QueueKey: scanQueueKey,
582628
Emit: audit.Emit,
@@ -592,10 +638,12 @@ func cmdServe(cfg *config.Config, _ []string, stdout, stderr *os.File) int {
592638
WithAlerts(alerts.NewService(pool, audit.Emit)).
593639
WithScanQueue(scanQueueKey).
594640
WithScanWorker(scanWorker).
641+
WithRemediationWorker(remediationWorker).
595642
WithRuleCatalog(ruleCatalog).
596643
WithRuleLibrary(ruleLibrary).
597644
WithVariableCatalog(varCatalog).
598645
WithExceptions(exceptionSvc).
646+
WithRemediation(remediationSvc).
599647
WithGroups(group.NewService(pool)).
600648
WithReports(report.NewService(pool)).
601649
WithScanResults(scanresult.NewReader(pool)).
@@ -653,6 +701,26 @@ func (a collectorSSHAdapter) Dial(ctx context.Context, host string, port int, cr
653701
return sess, nil
654702
}
655703

704+
// kensaStorePath resolves the durable SQLite path Kensa uses for remediation
705+
// rollback pre-state. Resolution order:
706+
//
707+
// OPENWATCH_KENSA_STORE_PATH explicit override (production: a durable path
708+
// under the data dir, e.g.
709+
// /var/lib/openwatch/kensa/remediation.db)
710+
// <workdir>/.kensa/remediation.db dev default (warned)
711+
//
712+
// The pre-state log MUST survive restarts for rollback to work, so production
713+
// installs set the env to a persistent location.
714+
func kensaStorePath(ctx context.Context) string {
715+
if p := os.Getenv("OPENWATCH_KENSA_STORE_PATH"); p != "" {
716+
return p
717+
}
718+
def := filepath.Join(".kensa", "remediation.db")
719+
slog.WarnContext(ctx, "OPENWATCH_KENSA_STORE_PATH unset — using working-dir default for kensa rollback pre-state; production must set a durable path",
720+
slog.String("store_path", def))
721+
return def
722+
}
723+
656724
// parseLogLevel maps the config string to a slog.Level. Unknown values
657725
// default to info (Validate would have caught them earlier).
658726
func parseLogLevel(s string) slog.Level {

cmd/openwatch/worker.go

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"github.com/Hanalyx/openwatch/internal/knownhosts"
3131
"github.com/Hanalyx/openwatch/internal/license"
3232
openlog "github.com/Hanalyx/openwatch/internal/log"
33+
"github.com/Hanalyx/openwatch/internal/remediation"
3334
"github.com/Hanalyx/openwatch/internal/scanresult"
3435
"github.com/Hanalyx/openwatch/internal/scheduler"
3536
"github.com/Hanalyx/openwatch/internal/secretkey"
@@ -200,6 +201,45 @@ func cmdWorker(cfg *config.Config, args []string, stdout, stderr *os.File) int {
200201
writer := transactionlog.NewWriter(pool, audit.Emit)
201202
scanResultsWriter := scanresult.NewWriter(pool)
202203

204+
// Remediation execution wiring (Tier A free core): chain the apply-enabled
205+
// Remediate/Rollback seams onto the same executor so a host's scan +
206+
// remediate share one per-host inFlight guard. The apply-enabled Kensa
207+
// needs a durable SQLite store for rollback pre-state.
208+
remExecutor := executor
209+
remFn, rbFn, remErr := kensa.NewProductionRemediateFunc(bootCtx, kensa.RemediateFuncDeps{
210+
Pool: pool,
211+
Credentials: credSvc,
212+
RulesDir: rulesDir,
213+
HostKeyMode: owssh.ModeTOFU,
214+
KnownHosts: knownhosts.NewStore(pool),
215+
Variables: func(ctx context.Context) (map[string]string, error) {
216+
vars, err := varStore.LoadScanVars(ctx)
217+
return vars, err
218+
},
219+
Profiles: connprofile.NewStore(pool),
220+
Policy: func(ctx context.Context) (bool, error) {
221+
cfg, err := varStore.LoadSecurity(ctx)
222+
return cfg.AllowCredentialSudoPassword, err
223+
},
224+
StorePath: kensaStorePath(bootCtx),
225+
})
226+
if remErr != nil {
227+
slog.WarnContext(bootCtx, "kensa remediation wiring unavailable — remediation jobs claimed by this worker will fail",
228+
slog.String("error", remErr.Error()))
229+
} else {
230+
remExecutor = remExecutor.WithRemediateFunc(remFn, rbFn)
231+
}
232+
remediationWorker := worker.NewRemediationWorker(worker.RemediationConfig{
233+
Pool: pool,
234+
Executor: remExecutor,
235+
Service: remediation.NewService(pool, audit.Emit),
236+
Writer: writer,
237+
QueueKey: queueKey,
238+
Emit: audit.Emit,
239+
// Bus nil: the dedicated worker has no SSE subscribers (cross-process
240+
// delivery is a known non-goal, same as scan.completed).
241+
})
242+
203243
// Post-scan schedule updates run here too: the dedicated worker
204244
// classifies each completed scan into a compliance state so
205245
// host_compliance_schedule stays fresh whichever process executed
@@ -217,14 +257,15 @@ func cmdWorker(cfg *config.Config, args []string, stdout, stderr *os.File) int {
217257
!scanCfg.Enabled || scanCfg.MaintenanceGlobal)
218258

219259
scanWorker := worker.NewScanWorker(worker.Config{
220-
Pool: pool,
221-
Executor: executor,
222-
Writer: writer,
223-
ScanResults: scanResultsWriter,
224-
QueueKey: queueKey,
225-
PollInterval: *pollInterval,
226-
Emit: audit.Emit,
227-
Sched: sched,
260+
Pool: pool,
261+
Executor: executor,
262+
Writer: writer,
263+
ScanResults: scanResultsWriter,
264+
QueueKey: queueKey,
265+
PollInterval: *pollInterval,
266+
Emit: audit.Emit,
267+
Sched: sched,
268+
RemediationProcessor: remediationWorker,
228269
})
229270

230271
ctx, stop := signal.NotifyContext(bootCtx, syscall.SIGINT, syscall.SIGTERM)

0 commit comments

Comments
 (0)