diff --git a/CONTEXT.md b/CONTEXT.md index a98fa4c1..435dcfbd 100644 --- a/CONTEXT.md +++ b/CONTEXT.md @@ -99,7 +99,6 @@ Legacy REST endpoints were removed from OpenAPI and code: ```text /api/v1/command /api/v1/procedure -/api/v1/procedures /api/v1/logs /api/v1/metrics /api/v1/pstree diff --git a/api/openapi.yaml b/api/openapi.yaml index 393db7b3..29822f0d 100644 --- a/api/openapi.yaml +++ b/api/openapi.yaml @@ -251,9 +251,28 @@ components: additionalProperties: $ref: '#/components/schemas/RuntimeUIPackage' - CommandStatusMap: + LockStatus: type: object - additionalProperties: true + required: + - status + - last_status_change + properties: + status: + type: string + enum: [running, done, error, waiting] + exit_code: + type: integer + nullable: true + last_status_change: + type: integer + format: int64 + + ProcedureStatusMap: + type: object + additionalProperties: + type: object + additionalProperties: + $ref: '#/components/schemas/LockStatus' ScrollLogMap: type: object @@ -299,9 +318,8 @@ components: updated_at: type: string format: date-time - commands: - type: object - additionalProperties: true + procedures: + $ref: '#/components/schemas/ProcedureStatusMap' DeletedScroll: type: object @@ -583,26 +601,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/CommandStatusMap' - - /api/v1/scrolls/{id}/procedures: - get: - operationId: getScrollProcedures - summary: Get procedure state - tags: [runtime, daemon] - parameters: - - name: id - in: path - required: true - schema: - type: string - responses: - '200': - description: Procedure state - content: - application/json: - schema: - $ref: '#/components/schemas/CommandStatusMap' + $ref: '#/components/schemas/ProcedureStatusMap' /api/v1/scrolls/{id}/consoles: get: diff --git a/apps/druid/adapters/cli/client/command.go b/apps/druid/adapters/cli/client/command.go deleted file mode 100644 index 51f7f597..00000000 --- a/apps/druid/adapters/cli/client/command.go +++ /dev/null @@ -1,104 +0,0 @@ -package client - -import ( - "fmt" - "os" - "sort" - "text/tabwriter" - - "github.com/highcard-dev/daemon/internal/core/domain" - "github.com/spf13/cobra" -) - -var CommandCommand = &cobra.Command{ - Use: "command", - Short: "Inspect and run scroll commands", -} - -var CommandRunCommand = &cobra.Command{ - Use: "run ", - Short: "Run a command on a daemon-managed scroll", - Args: cobra.ExactArgs(2), - RunE: func(cmd *cobra.Command, args []string) error { - daemon, err := runtimeDaemonClient() - if err != nil { - return err - } - scroll, err := daemon.RunScrollCommand(cmd.Context(), args[0], args[1]) - if err != nil { - return err - } - return printJSON(scroll) - }, -} - -var CommandListCommand = &cobra.Command{ - Use: "list ", - Short: "List commands for a daemon-managed scroll", - Args: cobra.ExactArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - daemon, err := runtimeDaemonClient() - if err != nil { - return err - } - file, err := daemon.GetScrollConfig(cmd.Context(), args[0]) - if err != nil { - return err - } - queue, err := daemon.GetScrollQueue(cmd.Context(), args[0]) - if err != nil { - return err - } - return printCommandRows(commandRows(file, queue)) - }, -} - -type commandRow struct { - command string - status string - runMode string - procedures int -} - -func commandRows(file *domain.File, queue map[string]domain.ScrollLockStatus) []commandRow { - if file == nil { - return nil - } - commands := make([]string, 0, len(file.Commands)) - for command := range file.Commands { - commands = append(commands, command) - } - sort.Strings(commands) - - rows := []commandRow{} - for _, command := range commands { - definition := file.Commands[command] - if definition == nil { - continue - } - status := string(queue[command]) - if status == "" { - status = "-" - } - runMode := string(definition.Run) - if runMode == "" { - runMode = string(domain.RunModeAlways) - } - rows = append(rows, commandRow{ - command: command, - status: status, - runMode: runMode, - procedures: len(definition.Procedures), - }) - } - return rows -} - -func printCommandRows(rows []commandRow) error { - w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) - fmt.Fprintln(w, "COMMAND\tSTATUS\tRUN\tPROCEDURES") - for _, row := range rows { - fmt.Fprintf(w, "%s\t%s\t%s\t%d\n", row.command, row.status, row.runMode, row.procedures) - } - return w.Flush() -} diff --git a/apps/druid/adapters/cli/client/procedure.go b/apps/druid/adapters/cli/client/procedure.go index c5b26352..8c80a7b9 100644 --- a/apps/druid/adapters/cli/client/procedure.go +++ b/apps/druid/adapters/cli/client/procedure.go @@ -28,7 +28,7 @@ var ProcedureListCommand = &cobra.Command{ if err != nil { return err } - statuses, err := daemon.GetScrollProcedures(cmd.Context(), args[0]) + statuses, err := daemon.GetScrollQueue(cmd.Context(), args[0]) if err != nil { return err } @@ -70,7 +70,7 @@ type procedureRow struct { console string } -func procedureRows(file *domain.File, statuses map[string]domain.ScrollLockStatus, consoles map[string]domain.Console) []procedureRow { +func procedureRows(file *domain.File, statuses domain.ProcedureStatusMap, consoles map[string]domain.Console) []procedureRow { if file == nil { return nil } @@ -88,11 +88,7 @@ func procedureRows(file *domain.File, statuses map[string]domain.ScrollLockStatu } for idx, procedure := range definition.Procedures { name := domain.ProcedureName(command, idx, procedure) - procedureStatusValue := "" - if name != command { - procedureStatusValue = string(statuses[name]) - } - status := procedureStatus(name, procedureStatusValue, string(statuses[command]), consoles) + status := procedureStatus(name, statuses[command][name], consoles) console := "no" if _, ok := consoles[name]; ok { console = "yes" @@ -103,9 +99,9 @@ func procedureRows(file *domain.File, statuses map[string]domain.ScrollLockStatu return rows } -func procedureStatus(name string, status string, commandStatus string, consoles map[string]domain.Console) string { - if status != "" { - return status +func procedureStatus(name string, status domain.LockStatus, consoles map[string]domain.Console) string { + if status.Status != "" { + return string(status.Status) } if console, ok := consoles[name]; ok { if console.Exit == nil { @@ -116,12 +112,6 @@ func procedureStatus(name string, status string, commandStatus string, consoles } return string(domain.ScrollLockStatusError) } - if commandStatus == string(domain.ScrollLockStatusRunning) { - return string(domain.ScrollLockStatusWaiting) - } - if commandStatus != "" { - return commandStatus - } return "-" } diff --git a/apps/druid/adapters/cli/client/procedure_test.go b/apps/druid/adapters/cli/client/procedure_test.go index a4416fea..d374edbe 100644 --- a/apps/druid/adapters/cli/client/procedure_test.go +++ b/apps/druid/adapters/cli/client/procedure_test.go @@ -19,9 +19,11 @@ func TestProcedureRowsCombineConfigStatusAndConsoles(t *testing.T) { }, }, }} - rows := procedureRows(file, map[string]domain.ScrollLockStatus{ - "start": domain.ScrollLockStatusWaiting, - "coldstart": domain.ScrollLockStatusRunning, + rows := procedureRows(file, domain.ProcedureStatusMap{ + "start": { + "coldstart": {Status: domain.ScrollLockStatusRunning}, + "start.1": {Status: domain.ScrollLockStatusWaiting}, + }, }, map[string]domain.Console{ "coldstart": {}, }) @@ -48,8 +50,10 @@ func TestProcedureRowsDoNotMarkEveryProcedureRunningFromCommandStatus(t *testing }, }, }} - rows := procedureRows(file, map[string]domain.ScrollLockStatus{ - "start": domain.ScrollLockStatusRunning, + rows := procedureRows(file, domain.ProcedureStatusMap{ + "start": { + "start": {Status: domain.ScrollLockStatusWaiting}, + }, }, map[string]domain.Console{ "coldstart": {}, }) @@ -62,38 +66,6 @@ func TestProcedureRowsDoNotMarkEveryProcedureRunningFromCommandStatus(t *testing } } -func TestCommandRunCallsDaemon(t *testing.T) { - daemon := &fakeProcedureDaemon{} - withClientConfig(t, Config{Daemon: func() (RuntimeDaemon, error) { return daemon, nil }}) - - if err := CommandRunCommand.RunE(&cobra.Command{}, []string{"scroll-a", "start"}); err != nil { - t.Fatal(err) - } - if daemon.runScroll != "scroll-a" || daemon.runCommand != "start" { - t.Fatalf("run scroll=%q command=%q", daemon.runScroll, daemon.runCommand) - } -} - -func TestCommandRowsCombineConfigAndQueue(t *testing.T) { - file := &domain.File{Commands: map[string]*domain.CommandInstructionSet{ - "install": {Run: domain.RunModeOnce, Procedures: []*domain.Procedure{{}}}, - "start": {Run: domain.RunModeRestart, Procedures: []*domain.Procedure{{}, {}}}, - }} - rows := commandRows(file, map[string]domain.ScrollLockStatus{ - "start": domain.ScrollLockStatusWaiting, - }) - - if len(rows) != 2 { - t.Fatalf("rows = %#v", rows) - } - if rows[0] != (commandRow{command: "install", status: "-", runMode: "once", procedures: 1}) { - t.Fatalf("row 0 = %#v", rows[0]) - } - if rows[1] != (commandRow{command: "start", status: "waiting", runMode: "restart", procedures: 2}) { - t.Fatalf("row 1 = %#v", rows[1]) - } -} - func TestProcedureAttachRequiresActiveConsole(t *testing.T) { daemon := &fakeProcedureDaemon{consoles: map[string]domain.Console{"start": {}}} var attachedScroll, attachedConsole string @@ -125,9 +97,7 @@ func withClientConfig(t *testing.T, cfg Config) { } type fakeProcedureDaemon struct { - runScroll string - runCommand string - consoles map[string]domain.Console + consoles map[string]domain.Console } func (f *fakeProcedureDaemon) CreateScroll(ctx context.Context, name string, artifact string, registryCredentials []api.RegistryCredential) (*api.RuntimeScroll, error) { @@ -150,21 +120,11 @@ func (f *fakeProcedureDaemon) DeleteScroll(ctx context.Context, id string) (*api return nil, nil } -func (f *fakeProcedureDaemon) RunScrollCommand(ctx context.Context, id string, command string) (*api.RuntimeScroll, error) { - f.runScroll = id - f.runCommand = command - return &api.RuntimeScroll{Id: id}, nil -} - func (f *fakeProcedureDaemon) GetScrollConfig(ctx context.Context, id string) (*domain.File, error) { return &domain.File{}, nil } -func (f *fakeProcedureDaemon) GetScrollProcedures(ctx context.Context, id string) (map[string]domain.ScrollLockStatus, error) { - return nil, nil -} - -func (f *fakeProcedureDaemon) GetScrollQueue(ctx context.Context, id string) (map[string]domain.ScrollLockStatus, error) { +func (f *fakeProcedureDaemon) GetScrollQueue(ctx context.Context, id string) (domain.ProcedureStatusMap, error) { return nil, nil } diff --git a/apps/druid/adapters/cli/client/register.go b/apps/druid/adapters/cli/client/register.go index a610e1a7..767a4cec 100644 --- a/apps/druid/adapters/cli/client/register.go +++ b/apps/druid/adapters/cli/client/register.go @@ -15,10 +15,8 @@ type RuntimeDaemon interface { ListScrolls(ctx context.Context) ([]api.RuntimeScroll, error) GetScroll(ctx context.Context, id string) (*api.RuntimeScroll, error) DeleteScroll(ctx context.Context, id string) (*api.DeletedScroll, error) - RunScrollCommand(ctx context.Context, id string, command string) (*api.RuntimeScroll, error) GetScrollConfig(ctx context.Context, id string) (*domain.File, error) - GetScrollQueue(ctx context.Context, id string) (map[string]domain.ScrollLockStatus, error) - GetScrollProcedures(ctx context.Context, id string) (map[string]domain.ScrollLockStatus, error) + GetScrollQueue(ctx context.Context, id string) (domain.ProcedureStatusMap, error) GetScrollConsoles(ctx context.Context, id string) (map[string]domain.Console, error) GetScrollPorts(ctx context.Context, id string) ([]api.RuntimePortStatus, error) StartScroll(ctx context.Context, id string) (*api.RuntimeScroll, error) @@ -43,10 +41,8 @@ var config Config func Register(root *cobra.Command, cfg Config) { config = cfg RoutingCommand.AddCommand(RoutingTargetsCommand, RoutingApplyCommand) - CommandCommand.AddCommand(CommandRunCommand, CommandListCommand) ProcedureCommand.AddCommand(ProcedureListCommand, ProcedureAttachCommand) root.AddCommand( - CommandCommand, CreateCommand, DeleteCommand, DescribeCommand, diff --git a/apps/druid/adapters/cli/client/routing_publish_test.go b/apps/druid/adapters/cli/client/routing_publish_test.go index 40b84190..aaf91cf6 100644 --- a/apps/druid/adapters/cli/client/routing_publish_test.go +++ b/apps/druid/adapters/cli/client/routing_publish_test.go @@ -141,11 +141,11 @@ type fakeRoutingDaemon struct { func (f *fakeRoutingDaemon) CreateScroll(ctx context.Context, name string, artifact string, registryCredentials []api.RegistryCredential) (*api.RuntimeScroll, error) { f.createCalls++ - return &api.RuntimeScroll{Id: name, Artifact: artifact, Root: "/root", ScrollName: name, Status: api.Created}, nil + return &api.RuntimeScroll{Id: name, Artifact: artifact, Root: "/root", ScrollName: name, Status: api.RuntimeScrollStatusCreated}, nil } func (f *fakeRoutingDaemon) UpdateScroll(ctx context.Context, id string, artifact string, registryCredentials []api.RegistryCredential) (*api.RuntimeScroll, error) { - return &api.RuntimeScroll{Id: id, Artifact: artifact, Root: "/root", ScrollName: id, Status: api.Created}, nil + return &api.RuntimeScroll{Id: id, Artifact: artifact, Root: "/root", ScrollName: id, Status: api.RuntimeScrollStatusCreated}, nil } func (f *fakeRoutingDaemon) ListScrolls(ctx context.Context) ([]api.RuntimeScroll, error) { @@ -153,26 +153,18 @@ func (f *fakeRoutingDaemon) ListScrolls(ctx context.Context) ([]api.RuntimeScrol } func (f *fakeRoutingDaemon) GetScroll(ctx context.Context, id string) (*api.RuntimeScroll, error) { - return &api.RuntimeScroll{Id: id, Status: api.Created}, nil + return &api.RuntimeScroll{Id: id, Status: api.RuntimeScrollStatusCreated}, nil } func (f *fakeRoutingDaemon) DeleteScroll(ctx context.Context, id string) (*api.DeletedScroll, error) { return nil, nil } -func (f *fakeRoutingDaemon) RunScrollCommand(ctx context.Context, id string, command string) (*api.RuntimeScroll, error) { - return nil, nil -} - func (f *fakeRoutingDaemon) GetScrollConfig(ctx context.Context, id string) (*domain.File, error) { return &domain.File{}, nil } -func (f *fakeRoutingDaemon) GetScrollProcedures(ctx context.Context, id string) (map[string]domain.ScrollLockStatus, error) { - return nil, nil -} - -func (f *fakeRoutingDaemon) GetScrollQueue(ctx context.Context, id string) (map[string]domain.ScrollLockStatus, error) { +func (f *fakeRoutingDaemon) GetScrollQueue(ctx context.Context, id string) (domain.ProcedureStatusMap, error) { return nil, nil } @@ -202,7 +194,7 @@ func (f *fakeRoutingDaemon) ApplyScrollRouting(ctx context.Context, id string, a f.applyCalls++ f.applied = assignments routing := append([]api.RuntimeRouteAssignment(nil), assignments...) - return &api.RuntimeScroll{Id: id, Status: api.Created, Routing: &routing}, nil + return &api.RuntimeScroll{Id: id, Status: api.RuntimeScrollStatusCreated, Routing: &routing}, nil } func (f *fakeRoutingDaemon) GetScrollUIPackages(ctx context.Context, id string) (map[string]api.RuntimeUIPackage, error) { @@ -210,7 +202,7 @@ func (f *fakeRoutingDaemon) GetScrollUIPackages(ctx context.Context, id string) } func (f *fakeRoutingDaemon) PublishScrollUIPackage(ctx context.Context, id string, scope string, path string) (*api.RuntimeScroll, error) { - return &api.RuntimeScroll{Id: id, Status: api.Created}, nil + return &api.RuntimeScroll{Id: id, Status: api.RuntimeScrollStatusCreated}, nil } func (f *fakeRoutingDaemon) EnableWatch(ctx context.Context, id string, request api.DevWatchRequest) (*api.DevWatchResponse, error) { diff --git a/apps/druid/adapters/cli/root_test.go b/apps/druid/adapters/cli/root_test.go index c1249d49..5776b089 100644 --- a/apps/druid/adapters/cli/root_test.go +++ b/apps/druid/adapters/cli/root_test.go @@ -79,8 +79,6 @@ func TestRootCommandExposesDaemonTargets(t *testing.T) { func TestRootCommandExposesLeanProcedureCommands(t *testing.T) { for _, args := range [][]string{ - {"command", "run"}, - {"command", "list"}, {"procedure", "list"}, {"procedure", "attach"}, } { @@ -89,7 +87,7 @@ func TestRootCommandExposesLeanProcedureCommands(t *testing.T) { t.Fatalf("druid should expose %v", args) } } - for _, removed := range [][]string{{"run"}, {"attach"}} { + for _, removed := range [][]string{{"run"}, {"attach"}, {"command"}} { cmd, _, err := RootCmd.Find(removed) if err == nil && cmd != nil && cmd.Name() == removed[0] { t.Fatalf("druid should not expose removed top-level %s", removed[0]) diff --git a/apps/druid/adapters/daemonclient/openapi_client.go b/apps/druid/adapters/daemonclient/openapi_client.go index daf7b47f..8ce19d8c 100644 --- a/apps/druid/adapters/daemonclient/openapi_client.go +++ b/apps/druid/adapters/daemonclient/openapi_client.go @@ -164,44 +164,23 @@ func (c *OpenAPIClient) GetScrollConfig(ctx context.Context, id string) (*domain return &file, json.Unmarshal(data, &file) } -func (c *OpenAPIClient) GetScrollProcedures(ctx context.Context, id string) (map[string]domain.ScrollLockStatus, error) { - res, err := c.client.GetScrollProceduresWithResponse(ctx, id) +func (c *OpenAPIClient) GetScrollQueue(ctx context.Context, id string) (domain.ProcedureStatusMap, error) { + res, err := c.client.GetScrollQueueWithResponse(ctx, id) if err != nil { return nil, err } if err := ensureStatus(res.StatusCode(), res.Body); err != nil { return nil, err } - out := map[string]domain.ScrollLockStatus{} + out := domain.ProcedureStatusMap{} if res.JSON200 == nil { return out, nil } - for name, value := range *res.JSON200 { - if status, ok := value.(string); ok { - out[name] = domain.ScrollLockStatus(status) - } - } - return out, nil -} - -func (c *OpenAPIClient) GetScrollQueue(ctx context.Context, id string) (map[string]domain.ScrollLockStatus, error) { - res, err := c.client.GetScrollQueueWithResponse(ctx, id) + data, err := json.Marshal(res.JSON200) if err != nil { return nil, err } - if err := ensureStatus(res.StatusCode(), res.Body); err != nil { - return nil, err - } - out := map[string]domain.ScrollLockStatus{} - if res.JSON200 == nil { - return out, nil - } - for name, value := range *res.JSON200 { - if status, ok := value.(string); ok { - out[name] = domain.ScrollLockStatus(status) - } - } - return out, nil + return out, json.Unmarshal(data, &out) } func (c *OpenAPIClient) GetScrollConsoles(ctx context.Context, id string) (map[string]domain.Console, error) { diff --git a/apps/druid/adapters/http/handlers/routes.go b/apps/druid/adapters/http/handlers/routes.go index 94332053..20661d64 100644 --- a/apps/druid/adapters/http/handlers/routes.go +++ b/apps/druid/adapters/http/handlers/routes.go @@ -70,7 +70,6 @@ func RegisterPublicRoutes(app *fiber.App, handlers RouteHandlers) { app.Put("/:id/api/v1/scroll/commands/:command", handlers.Server.AddDaemonCommand) app.Post("/:id/api/v1/command", handlers.Server.RunDaemonCommand) app.Get("/:id/api/v1/queue", handlers.Server.GetDaemonQueue) - app.Get("/:id/api/v1/procedures", handlers.Server.GetDaemonProcedures) app.Get("/:id/api/v1/consoles", handlers.Server.GetDaemonConsoles) app.Get("/:id/api/v1/logs", handlers.Server.GetDaemonLogs) app.Get("/:id/api/v1/logs/:stream", handlers.Server.GetDaemonStreamLogs) diff --git a/apps/druid/adapters/http/handlers/scroll_handler.go b/apps/druid/adapters/http/handlers/scroll_handler.go index 89fc6cf6..5f32b7b6 100644 --- a/apps/druid/adapters/http/handlers/scroll_handler.go +++ b/apps/druid/adapters/http/handlers/scroll_handler.go @@ -214,17 +214,6 @@ func (h *ScrollHandler) GetScrollQueue(c *fiber.Ctx, id string) error { return c.JSON(queue) } -func (h *ScrollHandler) GetScrollProcedures(c *fiber.Ctx, id string) error { - if _, err := h.getScroll(id); err != nil { - return err - } - procedures, err := h.supervisor.Procedures(id) - if err != nil { - return err - } - return c.JSON(procedures) -} - func (h *ScrollHandler) GetScrollConsoles(c *fiber.Ctx, id string) error { if _, err := h.getScroll(id); err != nil { return err @@ -271,10 +260,6 @@ func (h *ScrollHandler) GetDaemonQueue(c *fiber.Ctx) error { return h.GetScrollQueue(c, c.Params("id")) } -func (h *ScrollHandler) GetDaemonProcedures(c *fiber.Ctx) error { - return h.GetScrollProcedures(c, c.Params("id")) -} - func (h *ScrollHandler) GetDaemonConsoles(c *fiber.Ctx) error { return h.GetScrollConsoles(c, c.Params("id")) } diff --git a/apps/druid/core/services/runtime_access.go b/apps/druid/core/services/runtime_access.go index 017414f2..4c9bd4b4 100644 --- a/apps/druid/core/services/runtime_access.go +++ b/apps/druid/core/services/runtime_access.go @@ -90,18 +90,10 @@ func (s *RuntimeSupervisor) ScrollFile(id string) (*domain.File, error) { return session.scrollService.GetFile(), nil } -func (s *RuntimeSupervisor) Queue(id string) (map[string]domain.ScrollLockStatus, error) { +func (s *RuntimeSupervisor) Queue(id string) (domain.ProcedureStatusMap, error) { session, err := s.sessionFor(id) if err != nil { return nil, err } - return session.queueManager.GetQueue(), nil -} - -func (s *RuntimeSupervisor) Procedures(id string) (map[string]domain.ScrollLockStatus, error) { - session, err := s.sessionFor(id) - if err != nil { - return nil, err - } - return session.Procedures(), nil + return session.Queue(), nil } diff --git a/apps/druid/core/services/runtime_lifecycle.go b/apps/druid/core/services/runtime_lifecycle.go index 433035d3..a0df0ef3 100644 --- a/apps/druid/core/services/runtime_lifecycle.go +++ b/apps/druid/core/services/runtime_lifecycle.go @@ -12,7 +12,7 @@ func (s *RuntimeSupervisor) DeleteWithPolicy(id string, purgeData bool) error { delete(s.sessions, id) s.mu.Unlock() if session != nil { - session.Shutdown() + session.stopDeploymentQueue() } runtimeScroll, err := s.store.GetScroll(id) @@ -37,7 +37,7 @@ func (s *RuntimeSupervisor) StartScroll(id string) (*domain.RuntimeScroll, error return nil, err } session.mu.Lock() - session.runtimeScroll.Status = deriveRuntimeScrollStatus(session.runtimeScroll.Commands, session.scrollService.GetFile().Commands) + session.runtimeScroll.Status = deriveRuntimeScrollStatus(session.runtimeScroll.Procedures, session.scrollService.GetFile().Commands) if session.runtimeScroll.Status == domain.RuntimeScrollStatusCreated { session.runtimeScroll.Status = domain.RuntimeScrollStatusRunning } @@ -60,6 +60,6 @@ func (s *RuntimeSupervisor) Stop(id string) (*domain.RuntimeScroll, error) { session.markError(err) return nil, err } - session.Shutdown() + session.stopDeploymentQueue() return s.store.GetScroll(id) } diff --git a/apps/druid/core/services/runtime_session.go b/apps/druid/core/services/runtime_session.go index 5de0ed25..83e31d25 100644 --- a/apps/druid/core/services/runtime_session.go +++ b/apps/druid/core/services/runtime_session.go @@ -17,10 +17,11 @@ type RuntimeSession struct { store ports.RuntimeScrollStore runtimeScroll *domain.RuntimeScroll scrollService *coreservices.ScrollService - queueManager *coreservices.QueueManager watchService ports.WatchServiceInterface runtimeBackend ports.RuntimeBackendInterface - procedures ports.ProcedureLauchnerInterface + queue map[string]*runtimeQueueItem + workWg sync.WaitGroup + notifierChan []chan []string devWatchPaths []string devCommands []string devDaemonURL string @@ -31,6 +32,8 @@ type RuntimeSession struct { devWatchCancel context.CancelFunc mu sync.Mutex + queueMu sync.Mutex + runMu sync.Mutex started bool } @@ -46,6 +49,9 @@ func NewRuntimeSession( if len(scrollYAML) == 0 { return nil, fmt.Errorf("runtime scroll %s has no scroll_yaml", runtimeScroll.ID) } + if runtimeService == nil { + return nil, fmt.Errorf("runtime backend is required") + } scrollService, err := coreservices.NewCachedScrollService(runtimeScroll.Root, scrollYAML) if err != nil { return nil, err @@ -56,73 +62,17 @@ func NewRuntimeSession( scrollService: scrollService, runtimeBackend: runtimeService, } - queueManager, processLauncher, err := session.newQueue(scrollService, runtimeScroll.Root, runtimeScroll.ScrollName) - if err != nil { - return nil, err - } - session.queueManager = queueManager - session.procedures = processLauncher - queueManager.SetStatusObserver(session.persistCommandStatus) + session.resetQueueState() return session, nil } func (s *RuntimeSession) Start() { s.mu.Lock() - defer s.mu.Unlock() if s.started { + s.mu.Unlock() return } s.started = true - go s.queueManager.Work() -} - -func (s *RuntimeSession) Shutdown() { - s.queueManager.Shutdown() -} - -func (s *RuntimeSession) startQueue() { - s.mu.Lock() - queueManager := s.queueManager - s.mu.Unlock() - go queueManager.Work() -} - -func (s *RuntimeSession) newQueue(scrollService *coreservices.ScrollService, root string, scrollName string) (*coreservices.QueueManager, *coreservices.ProcedureLauncher, error) { - processLauncher, err := coreservices.NewProcedureLauncherForRuntime(scrollService, s.runtimeBackend, root, s.runtimeScroll.ID, scrollName, func() []domain.RuntimeRouteAssignment { - s.mu.Lock() - defer s.mu.Unlock() - routing := make([]domain.RuntimeRouteAssignment, len(s.runtimeScroll.Routing)) - copy(routing, s.runtimeScroll.Routing) - return routing - }) - if err != nil { - return nil, nil, err - } - queueManager := coreservices.NewQueueManager(scrollService, processLauncher) - queueManager.SetStatusObserver(s.persistCommandStatus) - return queueManager, processLauncher, nil -} - -func (s *RuntimeSession) replaceQueue(start bool) (*coreservices.QueueManager, error) { - s.mu.Lock() - scrollService := s.scrollService - root := s.runtimeScroll.Root - scrollName := s.runtimeScroll.ScrollName - oldQueue := s.queueManager s.mu.Unlock() - - queueManager, processLauncher, err := s.newQueue(scrollService, root, scrollName) - if err != nil { - return nil, err - } - - s.mu.Lock() - s.queueManager = queueManager - s.procedures = processLauncher - s.mu.Unlock() - - if start { - go queueManager.Work() - } - return oldQueue, nil + s.triggerRunQueue() } diff --git a/apps/druid/core/services/runtime_session_cache.go b/apps/druid/core/services/runtime_session_cache.go index 710e2412..8b4063e2 100644 --- a/apps/druid/core/services/runtime_session_cache.go +++ b/apps/druid/core/services/runtime_session_cache.go @@ -59,7 +59,7 @@ func (s *RuntimeSupervisor) startSession(runtimeScroll *domain.RuntimeScroll) (* s.mu.Lock() if existing := s.sessions[runtimeScroll.ID]; existing != nil { s.mu.Unlock() - session.Shutdown() + session.stopDeploymentQueue() return existing, nil } s.sessions[runtimeScroll.ID] = session @@ -71,8 +71,8 @@ func (s *RuntimeSupervisor) markScrollError(runtimeScroll *domain.RuntimeScroll, logger.Log().Error("failed to restore runtime scroll", zap.String("scroll", runtimeScroll.ID), zap.Error(err)) runtimeScroll.Status = domain.RuntimeScrollStatusError runtimeScroll.LastError = err.Error() - if runtimeScroll.Commands == nil { - runtimeScroll.Commands = map[string]domain.LockStatus{} + if runtimeScroll.Procedures == nil { + runtimeScroll.Procedures = domain.ProcedureStatusMap{} } _ = s.store.UpdateScroll(runtimeScroll) } diff --git a/apps/druid/core/services/runtime_session_commands.go b/apps/druid/core/services/runtime_session_commands.go index cebaa26d..1c4fa50e 100644 --- a/apps/druid/core/services/runtime_session_commands.go +++ b/apps/druid/core/services/runtime_session_commands.go @@ -13,31 +13,57 @@ import ( func (s *RuntimeSession) Hydrate() error { s.mu.Lock() - statuses := copyCommandStatuses(s.runtimeScroll.Commands) + statuses := copyProcedureStatuses(s.runtimeScroll.Procedures) runtimeStatus := s.runtimeScroll.Status s.mu.Unlock() commands := s.scrollService.GetFile().Commands if len(statuses) > 0 { - filtered := map[string]domain.LockStatus{} + filtered := domain.ProcedureStatusMap{} removedStaleStatus := false - for commandName, status := range statuses { + for commandName, procedureStatuses := range statuses { command := commands[commandName] if command == nil { removedStaleStatus = true continue } + filteredProcedures := map[string]domain.LockStatus{} + for idx, procedure := range command.Procedures { + procedureName := domain.ProcedureName(commandName, idx, procedure) + if status, ok := procedureStatuses[procedureName]; ok { + filteredProcedures[procedureName] = status + } + } + if len(filteredProcedures) != len(procedureStatuses) { + removedStaleStatus = true + } // Kubernetes keeps persistent workloads alive; do not requeue them just because // the singleton API process restarted. - if runtimeStatus == domain.RuntimeScrollStatusRunning && status.Status == domain.ScrollLockStatusDone && command.Run == domain.RunModePersistent { + commandStatus, ok := deriveCommandStatus(filteredProcedures, commandName, command) + if ok && runtimeStatus == domain.RuntimeScrollStatusRunning && commandStatus == domain.ScrollLockStatusRunning && command.Run == domain.RunModePersistent { continue } - filtered[commandName] = status + if len(filteredProcedures) > 0 { + filtered[commandName] = filteredProcedures + } } if removedStaleStatus { s.mu.Lock() - for commandName := range s.runtimeScroll.Commands { + for commandName := range s.runtimeScroll.Procedures { if commands[commandName] == nil { - delete(s.runtimeScroll.Commands, commandName) + delete(s.runtimeScroll.Procedures, commandName) + continue + } + for procedureName := range s.runtimeScroll.Procedures[commandName] { + found := false + for idx, procedure := range commands[commandName].Procedures { + if procedureName == domain.ProcedureName(commandName, idx, procedure) { + found = true + break + } + } + if !found { + delete(s.runtimeScroll.Procedures[commandName], procedureName) + } } } err := s.store.UpdateScroll(s.runtimeScroll) @@ -47,7 +73,7 @@ func (s *RuntimeSession) Hydrate() error { } } statuses = filtered - if err := s.queueManager.HydrateCommandStatuses(statuses); err != nil { + if err := s.HydrateFromState(statuses); err != nil { return err } } @@ -55,7 +81,7 @@ func (s *RuntimeSession) Hydrate() error { return err } s.mu.Lock() - s.runtimeScroll.Status = deriveRuntimeScrollStatus(s.runtimeScroll.Commands, s.scrollService.GetFile().Commands) + s.runtimeScroll.Status = deriveRuntimeScrollStatus(s.runtimeScroll.Procedures, s.scrollService.GetFile().Commands) err := s.store.UpdateScroll(s.runtimeScroll) s.mu.Unlock() return err @@ -73,14 +99,14 @@ func (s *RuntimeSession) AutoStartServe() error { s.rememberDoneDependencies(command, map[string]bool{}) if command.Run == domain.RunModePersistent { s.mu.Lock() - status, ok := s.runtimeScroll.Commands[serveCommand] + status, ok := deriveCommandStatus(s.runtimeScroll.Procedures[serveCommand], serveCommand, command) runtimeStatus := s.runtimeScroll.Status s.mu.Unlock() - if ok && status.Status == domain.ScrollLockStatusDone && runtimeStatus == domain.RuntimeScrollStatusRunning { + if ok && (status == domain.ScrollLockStatusDone || status == domain.ScrollLockStatusRunning) && runtimeStatus == domain.RuntimeScrollStatusRunning { return nil } } - if err := s.queueManager.AddForcedItem(serveCommand); err != nil && !errors.Is(err, coreservices.ErrAlreadyInQueue) { + if err := s.AddForcedItem(serveCommand); err != nil && !errors.Is(err, coreservices.ErrAlreadyInQueue) { return err } return nil @@ -100,19 +126,19 @@ func (s *RuntimeSession) RunWithContext(ctx context.Context, command string) (*d longRunning := targetCommand.Run == domain.RunModeRestart || targetCommand.Run == domain.RunModePersistent s.rememberDoneDependencies(targetCommand, map[string]bool{}) - if err := s.queueManager.AddTempItem(command); err != nil { + if err := s.AddTempItem(command); err != nil { s.markError(err) return nil, err } if !longRunning { - if err := s.queueManager.WaitUntilEmptyContext(ctx); err != nil { + if err := s.WaitUntilEmptyContext(ctx); err != nil { s.markError(err) return nil, err } } s.mu.Lock() - s.runtimeScroll.Status = deriveRuntimeScrollStatus(s.runtimeScroll.Commands, s.scrollService.GetFile().Commands) + s.runtimeScroll.Status = deriveRuntimeScrollStatus(s.runtimeScroll.Procedures, s.scrollService.GetFile().Commands) err = s.store.UpdateScroll(s.runtimeScroll) id := s.runtimeScroll.ID s.mu.Unlock() @@ -129,9 +155,9 @@ func (s *RuntimeSession) refreshCommandState() { } commands := s.scrollService.GetFile().Commands removedStaleStatus := false - for commandName := range fresh.Commands { + for commandName := range fresh.Procedures { if commands[commandName] == nil { - delete(fresh.Commands, commandName) + delete(fresh.Procedures, commandName) removedStaleStatus = true } } @@ -139,7 +165,7 @@ func (s *RuntimeSession) refreshCommandState() { _ = s.store.UpdateScroll(fresh) } s.mu.Lock() - s.runtimeScroll.Commands = copyCommandStatuses(fresh.Commands) + s.runtimeScroll.Procedures = copyProcedureStatuses(fresh.Procedures) s.runtimeScroll.Status = fresh.Status s.mu.Unlock() } @@ -154,10 +180,11 @@ func (s *RuntimeSession) rememberDoneDependencies(command *domain.CommandInstruc } seen[dependency] = true s.mu.Lock() - status, ok := s.runtimeScroll.Commands[dependency] + dependencyCommand := s.scrollService.GetFile().Commands[dependency] + status, ok := deriveCommandStatus(s.runtimeScroll.Procedures[dependency], dependency, dependencyCommand) s.mu.Unlock() - if ok && status.Status == domain.ScrollLockStatusDone { - s.queueManager.RememberDoneItem(dependency) + if ok && status == domain.ScrollLockStatusDone { + s.RememberDoneItem(dependency) } dependencyCommand, err := s.scrollService.GetCommand(dependency) if err == nil { @@ -167,28 +194,72 @@ func (s *RuntimeSession) rememberDoneDependencies(command *domain.CommandInstruc } func (s *RuntimeSession) persistCommandStatus(command string, status domain.ScrollLockStatus, exitCode *int) { + if status == domain.ScrollLockStatusRunning { + return + } s.mu.Lock() defer s.mu.Unlock() commands := s.scrollService.GetFile().Commands - if commands[command] == nil { + commandDefinition := commands[command] + if commandDefinition == nil { return } - if s.runtimeScroll.Commands == nil { - s.runtimeScroll.Commands = map[string]domain.LockStatus{} + if s.runtimeScroll.Procedures == nil { + s.runtimeScroll.Procedures = domain.ProcedureStatusMap{} } - for commandName := range s.runtimeScroll.Commands { + for commandName := range s.runtimeScroll.Procedures { if commands[commandName] == nil { - delete(s.runtimeScroll.Commands, commandName) + delete(s.runtimeScroll.Procedures, commandName) + } + } + if s.runtimeScroll.Procedures[command] == nil { + s.runtimeScroll.Procedures[command] = map[string]domain.LockStatus{} + } + for idx, procedure := range commandDefinition.Procedures { + procedureName := domain.ProcedureName(command, idx, procedure) + s.runtimeScroll.Procedures[command][procedureName] = domain.LockStatus{ + Status: status, + ExitCode: exitCode, + LastStatusChange: time.Now().Unix(), } } - s.runtimeScroll.Commands[command] = domain.LockStatus{ + s.runtimeScroll.Status = deriveRuntimeScrollStatus(s.runtimeScroll.Procedures, s.scrollService.GetFile().Commands) + if err := s.store.UpdateScroll(s.runtimeScroll); err != nil { + logger.Log().Error("failed to persist command status", zap.String("scroll", s.runtimeScroll.ID), zap.String("command", command), zap.Error(err)) + } +} + +func (s *RuntimeSession) Snapshot() domain.ProcedureStatusMap { + s.mu.Lock() + defer s.mu.Unlock() + return copyProcedureStatuses(s.runtimeScroll.Procedures) +} + +func (s *RuntimeSession) SetCommandStatus(command string, status domain.ScrollLockStatus, exitCode *int) { + s.persistCommandStatus(command, status, exitCode) +} + +func (s *RuntimeSession) persistProcedureStatus(command string, procedure string, status domain.ScrollLockStatus, exitCode *int) { + s.mu.Lock() + defer s.mu.Unlock() + commands := s.scrollService.GetFile().Commands + if commands[command] == nil { + return + } + if s.runtimeScroll.Procedures == nil { + s.runtimeScroll.Procedures = domain.ProcedureStatusMap{} + } + if s.runtimeScroll.Procedures[command] == nil { + s.runtimeScroll.Procedures[command] = map[string]domain.LockStatus{} + } + s.runtimeScroll.Procedures[command][procedure] = domain.LockStatus{ Status: status, ExitCode: exitCode, LastStatusChange: time.Now().Unix(), } - s.runtimeScroll.Status = deriveRuntimeScrollStatus(s.runtimeScroll.Commands, s.scrollService.GetFile().Commands) + s.runtimeScroll.Status = deriveRuntimeScrollStatus(s.runtimeScroll.Procedures, commands) if err := s.store.UpdateScroll(s.runtimeScroll); err != nil { - logger.Log().Error("failed to persist command status", zap.String("scroll", s.runtimeScroll.ID), zap.String("command", command), zap.Error(err)) + logger.Log().Error("failed to persist procedure status", zap.String("scroll", s.runtimeScroll.ID), zap.String("command", command), zap.String("procedure", procedure), zap.Error(err)) } } diff --git a/apps/druid/core/services/runtime_session_execution.go b/apps/druid/core/services/runtime_session_execution.go new file mode 100644 index 00000000..1ffbc8bf --- /dev/null +++ b/apps/druid/core/services/runtime_session_execution.go @@ -0,0 +1,78 @@ +package services + +import ( + "errors" + + "github.com/highcard-dev/daemon/internal/core/domain" + "github.com/highcard-dev/daemon/internal/core/ports" + coreservices "github.com/highcard-dev/daemon/internal/core/services" + "github.com/highcard-dev/daemon/internal/utils/logger" + "go.uber.org/zap" +) + +func (s *RuntimeSession) runCommand(cmd string) error { + command, err := s.scrollService.GetCommand(cmd) + if err != nil { + return err + } + + logger.Log().Info("Running command", + zap.String("cmd", cmd), + zap.String("runMode", string(command.Run)), + ) + + s.mu.Lock() + root := s.runtimeScroll.Root + scrollID := s.runtimeScroll.ID + scrollName := s.runtimeScroll.ScrollName + routing := make([]domain.RuntimeRouteAssignment, len(s.runtimeScroll.Routing)) + copy(routing, s.runtimeScroll.Routing) + s.mu.Unlock() + + if root == "" { + root = s.scrollService.GetCwd() + } + file := s.scrollService.GetFile() + procedureEnv, err := coreservices.BuildRuntimeProcedureEnv(file, cmd, command, coreservices.RuntimeEnvContext{ + ScrollID: scrollID, + ScrollName: scrollName, + Backend: s.runtimeBackend.Name(), + Routing: routing, + }) + if err != nil { + s.setCommandProcedureStatus(cmd, command, domain.ScrollLockStatusError, nil) + return err + } + + exitCode, err := s.runtimeBackend.RunCommand(ports.RuntimeCommand{ + Name: cmd, + ScrollID: scrollID, + Command: command, + Root: root, + GlobalPorts: file.Ports, + Routing: routing, + ProcedureEnv: procedureEnv, + ProcedureStatusObserver: func(procedure string, status domain.ScrollLockStatus, exitCode *int) { + s.persistProcedureStatus(cmd, procedure, status, exitCode) + }, + }) + if err != nil { + s.setCommandProcedureStatus(cmd, command, domain.ScrollLockStatusError, exitCode) + return err + } + if exitCode != nil && *exitCode != 0 { + s.setCommandProcedureStatus(cmd, command, domain.ScrollLockStatusError, exitCode) + return &domain.CommandExecutionError{ + Command: cmd, + ExitCode: *exitCode, + Err: errors.New("command failed"), + } + } + return nil +} + +func (s *RuntimeSession) setCommandProcedureStatus(commandName string, command *domain.CommandInstructionSet, status domain.ScrollLockStatus, exitCode *int) { + for idx, procedure := range command.Procedures { + s.persistProcedureStatus(commandName, domain.ProcedureName(commandName, idx, procedure), status, exitCode) + } +} diff --git a/apps/druid/core/services/runtime_session_execution_test.go b/apps/druid/core/services/runtime_session_execution_test.go new file mode 100644 index 00000000..d9122ecb --- /dev/null +++ b/apps/druid/core/services/runtime_session_execution_test.go @@ -0,0 +1,207 @@ +package services + +import ( + "errors" + "testing" + + "github.com/highcard-dev/daemon/internal/core/domain" + "github.com/highcard-dev/daemon/internal/core/ports" +) + +func TestRuntimeSessionRunCommandPassesCommandContextToRuntimeBackend(t *testing.T) { + var seen ports.RuntimeCommand + session := newRuntimeSessionExecutionTest(t, executionScrollYAML(), &fakeWorkerBackend{ + runCommand: func(command ports.RuntimeCommand) (*int, error) { + seen = command + return nil, nil + }, + }) + + if err := session.runCommand("serve"); err != nil { + t.Fatal(err) + } + + if seen.Name != "serve" { + t.Fatalf("Name = %s, want serve", seen.Name) + } + if seen.Command == nil || seen.Command.Run != domain.RunModePersistent { + t.Fatalf("Command = %#v, want persistent instruction set", seen.Command) + } + if seen.Root != session.runtimeScroll.Root { + t.Fatalf("Root = %s, want %s", seen.Root, session.runtimeScroll.Root) + } + if len(seen.GlobalPorts) != 1 || seen.GlobalPorts[0].Name != "http" { + t.Fatalf("GlobalPorts = %#v", seen.GlobalPorts) + } +} + +func TestRuntimeSessionRunCommandPassesRoutingAndScrollIdentity(t *testing.T) { + var seen ports.RuntimeCommand + session := newRuntimeSessionExecutionTest(t, executionScrollYAML(), &fakeWorkerBackend{ + runCommand: func(command ports.RuntimeCommand) (*int, error) { + seen = command + return nil, nil + }, + }) + session.runtimeScroll.Routing = []domain.RuntimeRouteAssignment{{ + Name: "web-http", + PortName: "http", + Host: "web.example.test", + ExternalIP: "192.0.2.10", + PublicPort: 443, + URL: "https://web.example.test", + }} + + if err := session.runCommand("serve"); err != nil { + t.Fatal(err) + } + + if seen.ScrollID != "scroll-a" { + t.Fatalf("ScrollID = %s, want scroll-a", seen.ScrollID) + } + if len(seen.Routing) != 1 || seen.Routing[0].PublicPort != 443 { + t.Fatalf("Routing = %#v", seen.Routing) + } + env := seen.ProcedureEnv["web"] + if env["DRUID_SCROLL_ID"] != "scroll-a" || env["DRUID_SCROLL_NAME"] != "scroll-name" { + t.Fatalf("scroll env = %#v", env) + } + if env["DRUID_RUNTIME_BACKEND"] != "fake-worker" { + t.Fatalf("backend env = %#v", env) + } + if env["DRUID_PORT_HTTP_PUBLIC"] != "443" || env["DRUID_PORT_HTTP_HOST"] != "web.example.test" || env["DRUID_PORT_HTTP_URL"] != "https://web.example.test" { + t.Fatalf("routing env = %#v", env) + } + if env["DRUID_IP"] != "192.0.2.10" || env["DRUID_PORT_HTTP_IP"] != "192.0.2.10" { + t.Fatalf("ip env = %#v", env) + } + if _, ok := env["DRUID_IP_WAIT"]; ok { + t.Fatalf("DRUID_IP_WAIT should not be set after routing: %#v", env) + } +} + +func TestRuntimeSessionRunCommandPersistsProcedureStatusCallbacks(t *testing.T) { + exitCode := 0 + session := newRuntimeSessionExecutionTest(t, executionScrollYAML(), &fakeWorkerBackend{ + runCommand: func(command ports.RuntimeCommand) (*int, error) { + command.ObserveProcedureStatus("web", domain.ScrollLockStatusRunning, nil) + command.ObserveProcedureStatus("web", domain.ScrollLockStatusDone, &exitCode) + return nil, nil + }, + }) + + if err := session.runCommand("serve"); err != nil { + t.Fatal(err) + } + + updated, err := session.store.GetScroll(session.runtimeScroll.ID) + if err != nil { + t.Fatal(err) + } + status := updated.Procedures["serve"]["web"] + if status.Status != domain.ScrollLockStatusDone || status.ExitCode == nil || *status.ExitCode != 0 { + t.Fatalf("procedure status = %#v, want done with exit 0", status) + } +} + +func TestRuntimeSessionRunCommandMarksProceduresErrorOnEnvBuildFailure(t *testing.T) { + session := newRuntimeSessionExecutionTest(t, duplicatePortScrollYAML(), &fakeWorkerBackend{}) + + if err := session.runCommand("serve"); err == nil { + t.Fatal("expected env build failure") + } + + updated, err := session.store.GetScroll(session.runtimeScroll.ID) + if err != nil { + t.Fatal(err) + } + status := updated.Procedures["serve"]["web"] + if status.Status != domain.ScrollLockStatusError { + t.Fatalf("procedure status = %#v, want error", status) + } +} + +func TestRuntimeSessionRunCommandMarksProceduresErrorOnBackendFailure(t *testing.T) { + exitCode := 23 + session := newRuntimeSessionExecutionTest(t, executionScrollYAML(), &fakeWorkerBackend{ + runCommand: func(command ports.RuntimeCommand) (*int, error) { + return &exitCode, errors.New("backend failed") + }, + }) + + if err := session.runCommand("serve"); err == nil { + t.Fatal("expected backend failure") + } + + updated, err := session.store.GetScroll(session.runtimeScroll.ID) + if err != nil { + t.Fatal(err) + } + status := updated.Procedures["serve"]["web"] + if status.Status != domain.ScrollLockStatusError || status.ExitCode == nil || *status.ExitCode != 23 { + t.Fatalf("procedure status = %#v, want error with exit 23", status) + } +} + +func newRuntimeSessionExecutionTest(t *testing.T, scrollYAML string, backend *fakeWorkerBackend) *RuntimeSession { + t.Helper() + store := newTestStateStore(t) + runtimeScroll := &domain.RuntimeScroll{ + ID: "scroll-a", + Artifact: "local", + Root: t.TempDir(), + ScrollName: "scroll-name", + ScrollYAML: scrollYAML, + Procedures: domain.ProcedureStatusMap{}, + } + if err := store.CreateScroll(runtimeScroll); err != nil { + t.Fatal(err) + } + session, err := NewRuntimeSession(store, runtimeScroll, backend) + if err != nil { + t.Fatal(err) + } + return session +} + +func executionScrollYAML() string { + return `name: scroll-name +desc: Runtime session execution test +version: 0.1.0 +app_version: "1.0" +ports: + - name: http + protocol: http + port: 8080 +serve: serve +commands: + serve: + run: persistent + procedures: + - id: web + image: alpine:3.20 + env: + APP_ENV: test + DRUID_PORT_HTTP: user-value + DRUID_SCROLL_NAME: user-name +` +} + +func duplicatePortScrollYAML() string { + return `name: scroll-name +desc: Runtime session execution test +version: 0.1.0 +app_version: "1.0" +ports: + - name: web-port + port: 8080 + - name: web_port + port: 8081 +serve: serve +commands: + serve: + procedures: + - id: web + image: alpine:3.20 +` +} diff --git a/apps/druid/core/services/runtime_session_queue.go b/apps/druid/core/services/runtime_session_queue.go new file mode 100644 index 00000000..0af7fcd4 --- /dev/null +++ b/apps/druid/core/services/runtime_session_queue.go @@ -0,0 +1,494 @@ +package services + +import ( + "context" + "errors" + "time" + + "github.com/highcard-dev/daemon/internal/core/domain" + coreservices "github.com/highcard-dev/daemon/internal/core/services" + "github.com/highcard-dev/daemon/internal/utils/logger" + "go.uber.org/zap" +) + +type runtimeQueueItem struct { + err error + doneChan chan struct{} + inFlight bool + rememberDone bool + runRequested bool + restartCount uint +} + +func (s *RuntimeSession) AddTempItem(cmd string) error { + return s.addQueueItem(cmd, coreservices.AddItemOptions{}) +} + +func (s *RuntimeSession) AddForcedItem(cmd string) error { + return s.addQueueItem(cmd, coreservices.AddItemOptions{Force: true}) +} + +func (s *RuntimeSession) AddTempItemWithWait(cmd string) error { + return s.addQueueItem(cmd, coreservices.AddItemOptions{Wait: true}) +} + +func (s *RuntimeSession) RememberDoneItem(cmd string) { + s.queueMu.Lock() + defer s.queueMu.Unlock() + item, ok := s.queue[cmd] + if !ok { + item = &runtimeQueueItem{} + s.queue[cmd] = item + } + item.rememberDone = true + item.inFlight = false + item.err = nil +} + +func (s *RuntimeSession) addQueueItem(cmd string, options coreservices.AddItemOptions) error { + logger.Log().Debug("Running command", zap.String("cmd", cmd)) + + command, err := s.scrollService.GetCommand(cmd) + if err != nil { + return err + } + + s.queueMu.Lock() + item := s.queue[cmd] + snapshot := s.Snapshot() + currentStatus, hasCurrentStatus := s.derivedScheduledStatusLocked(cmd, item, snapshot) + if item != nil { + if currentStatus != domain.ScrollLockStatusDone && currentStatus != domain.ScrollLockStatusError { + s.queueMu.Unlock() + return coreservices.ErrAlreadyInQueue + } + } + if hasCurrentStatus && currentStatus == domain.ScrollLockStatusDone && command.Run == domain.RunModeOnce && !options.Force { + s.queueMu.Unlock() + return coreservices.ErrCommandDoneOnce + } + + var doneChan chan struct{} + if options.Wait { + doneChan = make(chan struct{}) + } + item = &runtimeQueueItem{doneChan: doneChan} + s.queue[cmd] = item + s.setQueueStatusLocked(cmd, item, domain.ScrollLockStatusWaiting, nil) + s.queueMu.Unlock() + + s.triggerRunQueue() + + if options.Wait { + <-doneChan + s.queueMu.Lock() + item := s.queue[cmd] + var itemErr error + if item != nil { + itemErr = item.err + } + s.queueMu.Unlock() + if itemErr != nil { + return itemErr + } + } + + return nil +} + +func (s *RuntimeSession) HydrateFromState(statuses domain.ProcedureStatusMap) error { + for cmd, procedureStatuses := range statuses { + command, err := s.scrollService.GetCommand(cmd) + if err != nil { + return err + } + + commandStatus, ok := coreservices.DeriveCommandStatusFromProcedures(cmd, command, procedureStatuses) + if !ok { + continue + } + + if commandStatus == domain.ScrollLockStatusDone { + if command.Run != domain.RunModeRestart && command.Run != domain.RunModePersistent { + s.queueMu.Lock() + s.queue[cmd] = &runtimeQueueItem{rememberDone: true} + s.queueMu.Unlock() + continue + } + } + + s.rememberHydratedItem(cmd) + } + + return nil +} + +func (s *RuntimeSession) rememberHydratedItem(cmd string) { + s.queueMu.Lock() + defer s.queueMu.Unlock() + item, ok := s.queue[cmd] + if !ok { + item = &runtimeQueueItem{} + s.queue[cmd] = item + } + item.err = nil + item.rememberDone = false + item.runRequested = true +} + +func (s *RuntimeSession) triggerRunQueue() { + s.mu.Lock() + started := s.started + s.mu.Unlock() + if !started { + return + } + s.startRunQueue() +} + +func (s *RuntimeSession) startRunQueue() { + s.workWg.Add(1) + go func() { + defer s.workWg.Done() + s.RunQueue() + s.notify() + }() +} + +func (s *RuntimeSession) RunQueue() { + s.runMu.Lock() + defer s.runMu.Unlock() + + s.queueMu.Lock() + queueKeys := make(map[string]domain.ScrollLockStatus, len(s.queue)) + runRequested := make(map[string]bool, len(s.queue)) + snapshot := s.Snapshot() + for cmd, item := range s.queue { + status, _ := s.derivedScheduledStatusLocked(cmd, item, snapshot) + queueKeys[cmd] = status + runRequested[cmd] = item.runRequested + } + s.queueMu.Unlock() + + logger.Log().Info("Running queue", zap.Any("queueKeys", queueKeys)) + + for cmd, status := range queueKeys { + if status == domain.ScrollLockStatusRunning && !runRequested[cmd] { + continue + } + + command, err := s.scrollService.GetCommand(cmd) + if err != nil { + logger.Log().Error("Error getting command", zap.String("command", cmd), zap.Error(err)) + s.queueMu.Lock() + delete(s.queue, cmd) + s.queueMu.Unlock() + continue + } + + if status == domain.ScrollLockStatusError && !runRequested[cmd] { + continue + } + + isRestartMode := command.Run == domain.RunModeRestart + if status == domain.ScrollLockStatusDone && !isRestartMode && !runRequested[cmd] { + continue + } + + dependenciesReady := true + for _, dep := range command.Needs { + if s.isScheduled(dep) { + if s.getQueueStatus(dep) != domain.ScrollLockStatusDone { + dependenciesReady = false + } + continue + } + + dependencyStatus := s.derivedQueueStatus(dep) + if dependencyStatus == domain.ScrollLockStatusDone { + continue + } + + dependenciesReady = false + if err := s.AddTempItem(dep); err != nil && !errors.Is(err, coreservices.ErrAlreadyInQueue) && !errors.Is(err, coreservices.ErrCommandDoneOnce) { + logger.Log().Error("Error adding dependency", zap.String("command", cmd), zap.String("dependency", dep), zap.Error(err)) + } + } + if !dependenciesReady { + logger.Log().Info("Dependencies not ready", zap.String("command", cmd)) + continue + } + + s.queueMu.Lock() + item := s.queue[cmd] + if item == nil { + s.queueMu.Unlock() + continue + } + runMode := command.Run + s.setQueueStatusLocked(cmd, item, domain.ScrollLockStatusRunning, nil) + s.queueMu.Unlock() + + logger.Log().Info("Running command", zap.String("command", cmd)) + s.workWg.Add(1) + go func(c string, i *runtimeQueueItem) { + defer s.workWg.Done() + defer func() { + if i.doneChan != nil { + close(i.doneChan) + } + s.triggerRunQueue() + }() + + startedAt := time.Now() + err := s.runCommand(c) + isRestartMode := runMode == domain.RunModeRestart + + if err != nil { + logger.Log().Error("Error running command", zap.String("command", c), zap.Error(err)) + if !isRestartMode || domain.IsNonRetryableCommandError(err) { + s.setQueueError(c, err) + return + } + } + + if isRestartMode { + s.setQueueStatus(c, domain.ScrollLockStatusWaiting, nil) + s.queueMu.Lock() + if time.Since(startedAt) < 30*time.Second { + i.restartCount++ + } else { + i.restartCount = 0 + } + restartCount := i.restartCount + s.queueMu.Unlock() + if restartCount > 0 { + backoff := time.Duration(1<<(restartCount-1)) * time.Second + if backoff > 5*time.Minute { + backoff = 5 * time.Minute + } + logger.Log().Info("Restarting with backoff", zap.String("command", c), zap.Duration("backoff", backoff), zap.Uint("restartCount", restartCount)) + time.Sleep(backoff) + } else { + logger.Log().Info("Command done, restarting", zap.String("command", c)) + } + } else { + logger.Log().Info("Command done", zap.String("command", c)) + s.setQueueStatus(c, domain.ScrollLockStatusDone, nil) + } + }(cmd, item) + } +} + +func (s *RuntimeSession) WaitUntilEmpty() { + _ = s.WaitUntilEmptyContext(context.Background()) +} + +func (s *RuntimeSession) WaitUntilEmptyContext(ctx context.Context) error { + notifier := make(chan []string, 10) + + s.queueMu.Lock() + s.notifierChan = append(s.notifierChan, notifier) + if !s.hasActiveItemsLocked() { + s.removeNotifierLocked(notifier) + s.queueMu.Unlock() + return nil + } + s.queueMu.Unlock() + defer func() { + s.queueMu.Lock() + s.removeNotifierLocked(notifier) + s.queueMu.Unlock() + }() + + for { + select { + case cmds := <-notifier: + if len(cmds) == 0 { + return nil + } + case <-ctx.Done(): + return ctx.Err() + } + } +} + +func (s *RuntimeSession) GetQueue() map[string]domain.ScrollLockStatus { + s.queueMu.Lock() + defer s.queueMu.Unlock() + + queue := make(map[string]domain.ScrollLockStatus) + snapshot := s.Snapshot() + for cmd, item := range s.queue { + queue[cmd], _ = s.derivedScheduledStatusLocked(cmd, item, snapshot) + } + return queue +} + +func (s *RuntimeSession) stopDeploymentQueue() { + s.mu.Lock() + s.started = false + s.mu.Unlock() + s.drainQueueWork() + s.resetQueueState() +} + +func (s *RuntimeSession) drainQueueWork() { + done := make(chan struct{}) + go func() { + s.workWg.Wait() + close(done) + }() + + select { + case <-done: + case <-time.After(5 * time.Second): + logger.Log().Warn("Timed out waiting for queue work to finish") + } +} + +func (s *RuntimeSession) resetQueueState() { + s.queueMu.Lock() + defer s.queueMu.Unlock() + s.queue = make(map[string]*runtimeQueueItem) + s.notifierChan = make([]chan []string, 0) +} + +func (s *RuntimeSession) notify() { + s.queueMu.Lock() + queuedCommands := make([]string, 0) + snapshot := s.Snapshot() + + for cmd, item := range s.queue { + status, _ := s.derivedScheduledStatusLocked(cmd, item, snapshot) + if status != domain.ScrollLockStatusDone && status != domain.ScrollLockStatusError { + queuedCommands = append(queuedCommands, cmd) + } + } + + notifiers := make([]chan []string, len(s.notifierChan)) + copy(notifiers, s.notifierChan) + s.queueMu.Unlock() + + for _, notifier := range notifiers { + select { + case notifier <- queuedCommands: + default: + logger.Log().Debug("Skipping slow queue notifier") + } + } +} + +func (s *RuntimeSession) isScheduled(cmd string) bool { + s.queueMu.Lock() + defer s.queueMu.Unlock() + _, ok := s.queue[cmd] + return ok +} + +func (s *RuntimeSession) getQueueStatus(cmd string) domain.ScrollLockStatus { + s.queueMu.Lock() + defer s.queueMu.Unlock() + if item, ok := s.queue[cmd]; ok { + status, _ := s.derivedScheduledStatusLocked(cmd, item, s.Snapshot()) + return status + } + return s.derivedQueueStatus(cmd) +} + +func (s *RuntimeSession) derivedQueueStatus(cmd string) domain.ScrollLockStatus { + command, err := s.scrollService.GetCommand(cmd) + if err != nil { + return domain.ScrollLockStatusWaiting + } + status, ok := coreservices.DeriveCommandStatusFromProcedures(cmd, command, s.Snapshot()[cmd]) + if !ok { + return domain.ScrollLockStatusWaiting + } + return status +} + +func (s *RuntimeSession) setQueueError(cmd string, err error) { + s.queueMu.Lock() + defer s.queueMu.Unlock() + if item, ok := s.queue[cmd]; ok { + item.inFlight = false + item.err = err + } + s.SetCommandStatus(cmd, domain.ScrollLockStatusError, coreservices.CommandExitCode(err)) +} + +func (s *RuntimeSession) setQueueStatus(cmd string, status domain.ScrollLockStatus, exitCode *int) { + s.queueMu.Lock() + defer s.queueMu.Unlock() + if item, ok := s.queue[cmd]; ok { + s.setQueueStatusLocked(cmd, item, status, exitCode) + } +} + +func (s *RuntimeSession) setQueueStatusLocked(cmd string, item *runtimeQueueItem, status domain.ScrollLockStatus, exitCode *int) { + item.inFlight = status == domain.ScrollLockStatusRunning + if status != domain.ScrollLockStatusError { + item.err = nil + } + switch status { + case domain.ScrollLockStatusDone: + item.rememberDone = true + item.runRequested = false + case domain.ScrollLockStatusWaiting: + item.rememberDone = false + item.runRequested = true + case domain.ScrollLockStatusRunning: + item.runRequested = false + case domain.ScrollLockStatusError: + item.runRequested = false + } + if status != domain.ScrollLockStatusRunning { + s.SetCommandStatus(cmd, status, exitCode) + } +} + +func (s *RuntimeSession) derivedScheduledStatusLocked(cmd string, item *runtimeQueueItem, snapshot domain.ProcedureStatusMap) (domain.ScrollLockStatus, bool) { + if item != nil { + if item.err != nil { + return domain.ScrollLockStatusError, true + } + if item.inFlight { + return domain.ScrollLockStatusRunning, true + } + if item.rememberDone { + return domain.ScrollLockStatusDone, true + } + } + command, err := s.scrollService.GetCommand(cmd) + if err != nil { + return domain.ScrollLockStatusWaiting, item != nil + } + if status, ok := coreservices.DeriveCommandStatusFromProcedures(cmd, command, snapshot[cmd]); ok { + return status, true + } + if item != nil { + return domain.ScrollLockStatusWaiting, true + } + return "", false +} + +func (s *RuntimeSession) hasActiveItemsLocked() bool { + snapshot := s.Snapshot() + for cmd, item := range s.queue { + status, _ := s.derivedScheduledStatusLocked(cmd, item, snapshot) + if status != domain.ScrollLockStatusDone && status != domain.ScrollLockStatusError { + return true + } + } + return false +} + +func (s *RuntimeSession) removeNotifierLocked(notifier chan []string) { + for i, n := range s.notifierChan { + if n == notifier { + s.notifierChan = append(s.notifierChan[:i], s.notifierChan[i+1:]...) + return + } + } +} diff --git a/apps/druid/core/services/runtime_session_runtime.go b/apps/druid/core/services/runtime_session_runtime.go index 99a5f55c..d0e18046 100644 --- a/apps/druid/core/services/runtime_session_runtime.go +++ b/apps/druid/core/services/runtime_session_runtime.go @@ -23,25 +23,10 @@ func (s *RuntimeSession) RoutingTargets() ([]domain.RuntimeRoutingTarget, error) return s.runtimeBackend.RoutingTargets(runtimeScroll.Root, s.scrollService.GetFile().Commands, s.scrollService.GetFile().Ports) } -func (s *RuntimeSession) Procedures() map[string]domain.ScrollLockStatus { - statuses := s.procedures.GetProcedureStatuses() - out := make(map[string]domain.ScrollLockStatus, len(statuses)) - for name, status := range statuses { - out[name] = status - } - for commandName, status := range statuses { - command := s.scrollService.GetFile().Commands[commandName] - if command == nil { - continue - } - for idx, procedure := range command.Procedures { - procedureName := domain.ProcedureName(commandName, idx, procedure) - if _, ok := out[procedureName]; !ok { - out[procedureName] = status - } - } - } - return out +func (s *RuntimeSession) Queue() domain.ProcedureStatusMap { + s.mu.Lock() + defer s.mu.Unlock() + return copyProcedureStatuses(s.runtimeScroll.Procedures) } func (s *RuntimeSession) ApplyRouting(assignments []domain.RuntimeRouteAssignment) (*domain.RuntimeScroll, error) { @@ -62,32 +47,41 @@ func (s *RuntimeSession) StopRuntime() error { root := s.runtimeScroll.Root started := s.started s.mu.Unlock() - oldQueue, err := s.replaceQueue(false) - if err != nil { - return err + if started { + s.mu.Lock() + s.started = false + s.mu.Unlock() + s.drainQueueWork() } - oldQueue.Shutdown() + s.resetQueueState() if err := s.runtimeBackend.StopRuntime(root); err != nil { if started { - s.startQueue() + s.mu.Lock() + s.started = true + s.mu.Unlock() + s.triggerRunQueue() } return err } s.mu.Lock() commands := s.scrollService.GetFile().Commands - for commandName, status := range s.runtimeScroll.Commands { + for commandName, procedures := range s.runtimeScroll.Procedures { command := commands[commandName] - if command != nil && command.Run == domain.RunModeOnce && status.Status == domain.ScrollLockStatusDone { + status, ok := deriveCommandStatus(procedures, commandName, command) + if ok && command != nil && command.Run == domain.RunModeOnce && status == domain.ScrollLockStatusDone { continue } - delete(s.runtimeScroll.Commands, commandName) + delete(s.runtimeScroll.Procedures, commandName) } s.runtimeScroll.Status = domain.RuntimeScrollStatusStopped s.runtimeScroll.LastError = "" - err = s.store.UpdateScroll(s.runtimeScroll) + err := s.store.UpdateScroll(s.runtimeScroll) s.mu.Unlock() if err == nil && started { - s.startQueue() + s.mu.Lock() + s.started = true + s.mu.Unlock() + s.triggerRunQueue() } return err } @@ -122,18 +116,22 @@ func (s *RuntimeSession) ApplyRestore(materialized *ports.RuntimeMaterialization if err != nil { return err } - queueManager, processLauncher, err := s.newQueue(scrollService, root, scrollService.GetCurrent().Name) - if err != nil { - return err + s.mu.Lock() + started := s.started + s.mu.Unlock() + if started { + s.mu.Lock() + s.started = false + s.mu.Unlock() + s.drainQueueWork() } s.mu.Lock() - oldQueue := s.queueManager commands := scrollService.GetFile().Commands routing := preserveRoutingAssignments(s.runtimeScroll.Routing, scrollService.GetFile().Ports) - for commandName := range s.runtimeScroll.Commands { + for commandName := range s.runtimeScroll.Procedures { if commands[commandName] == nil { - delete(s.runtimeScroll.Commands, commandName) + delete(s.runtimeScroll.Procedures, commandName) } } s.runtimeScroll.Artifact = materialized.Artifact @@ -145,14 +143,14 @@ func (s *RuntimeSession) ApplyRestore(materialized *ports.RuntimeMaterialization s.runtimeScroll.Status = domain.RuntimeScrollStatusStopped s.runtimeScroll.LastError = "" s.scrollService = scrollService - s.queueManager = queueManager - s.procedures = processLauncher - started := s.started err = s.store.UpdateScroll(s.runtimeScroll) s.mu.Unlock() if err == nil && started { - oldQueue.Shutdown() - go queueManager.Work() + s.resetQueueState() + s.mu.Lock() + s.started = true + s.mu.Unlock() + s.triggerRunQueue() } return err } diff --git a/apps/druid/core/services/runtime_status.go b/apps/druid/core/services/runtime_status.go index bbca73ff..7f922141 100644 --- a/apps/druid/core/services/runtime_status.go +++ b/apps/druid/core/services/runtime_status.go @@ -2,35 +2,89 @@ package services import "github.com/highcard-dev/daemon/internal/core/domain" -func deriveRuntimeScrollStatus(statuses map[string]domain.LockStatus, commands map[string]*domain.CommandInstructionSet) domain.RuntimeScrollStatus { - if len(statuses) == 0 { +func deriveRuntimeScrollStatus(procedures domain.ProcedureStatusMap, commands map[string]*domain.CommandInstructionSet) domain.RuntimeScrollStatus { + if len(procedures) == 0 { + return domain.RuntimeScrollStatusCreated + } + queue := deriveCommandStatuses(procedures, commands) + if len(queue) == 0 { return domain.RuntimeScrollStatusCreated } hasActive := false - hasPersistentDone := false - for commandName, status := range statuses { - if status.Status == domain.ScrollLockStatusError { + for _, status := range queue { + switch status { + case domain.ScrollLockStatusError: return domain.RuntimeScrollStatusError - } - if status.Status == domain.ScrollLockStatusRunning || status.Status == domain.ScrollLockStatusWaiting { + case domain.ScrollLockStatusRunning, domain.ScrollLockStatusWaiting: hasActive = true } - if status.Status == domain.ScrollLockStatusDone { - if command := commands[commandName]; command != nil && command.Run == domain.RunModePersistent { - hasPersistentDone = true - } - } } - if hasActive || hasPersistentDone { + if hasActive { return domain.RuntimeScrollStatusRunning } return domain.RuntimeScrollStatusStopped } -func copyCommandStatuses(statuses map[string]domain.LockStatus) map[string]domain.LockStatus { - copied := map[string]domain.LockStatus{} - for command, status := range statuses { - copied[command] = status +func deriveCommandStatuses(procedures domain.ProcedureStatusMap, commands map[string]*domain.CommandInstructionSet) map[string]domain.ScrollLockStatus { + queue := map[string]domain.ScrollLockStatus{} + for commandName, command := range commands { + status, ok := deriveCommandStatus(procedures[commandName], commandName, command) + if ok { + queue[commandName] = status + } + } + return queue +} + +func deriveCommandStatus(statuses map[string]domain.LockStatus, commandName string, command *domain.CommandInstructionSet) (domain.ScrollLockStatus, bool) { + if command == nil || len(command.Procedures) == 0 { + return "", false + } + seen := false + allDone := true + hasRunning := false + hasWaiting := false + for idx, procedure := range command.Procedures { + procedureName := domain.ProcedureName(commandName, idx, procedure) + status, ok := statuses[procedureName] + if !ok { + allDone = false + continue + } + seen = true + switch status.Status { + case domain.ScrollLockStatusError: + return domain.ScrollLockStatusError, true + case domain.ScrollLockStatusRunning: + hasRunning = true + allDone = false + case domain.ScrollLockStatusWaiting: + hasWaiting = true + allDone = false + case domain.ScrollLockStatusDone: + default: + allDone = false + } + } + if hasRunning { + return domain.ScrollLockStatusRunning, true + } + if hasWaiting || (seen && !allDone) { + return domain.ScrollLockStatusWaiting, true + } + if seen && allDone { + return domain.ScrollLockStatusDone, true + } + return "", false +} + +func copyProcedureStatuses(statuses domain.ProcedureStatusMap) domain.ProcedureStatusMap { + copied := domain.ProcedureStatusMap{} + for command, procedures := range statuses { + copied[command] = map[string]domain.LockStatus{} + for procedure, status := range procedures { + copied[command][procedure] = status + } } return copied } diff --git a/apps/druid/core/services/runtime_supervisor.go b/apps/druid/core/services/runtime_supervisor.go index e17129b1..5e772d2f 100644 --- a/apps/druid/core/services/runtime_supervisor.go +++ b/apps/druid/core/services/runtime_supervisor.go @@ -106,12 +106,12 @@ func (s *RuntimeSupervisor) CreateWithOwner(artifact string, name string, ownerI return nil, err } placeholder := &domain.RuntimeScroll{ - ID: id, - OwnerID: ownerID, - Artifact: artifact, - Root: s.runtimeBackend.RootRef(id, namespace), - Status: domain.RuntimeScrollStatusCreated, - Commands: map[string]domain.LockStatus{}, + ID: id, + OwnerID: ownerID, + Artifact: artifact, + Root: s.runtimeBackend.RootRef(id, namespace), + Status: domain.RuntimeScrollStatusCreated, + Procedures: domain.ProcedureStatusMap{}, } if err := s.store.CreateScroll(placeholder); err != nil { return nil, err @@ -241,7 +241,7 @@ func (s *RuntimeSupervisor) applyMaterializedScroll(runtimeScroll *domain.Runtim runtimeScroll.ScrollYAML = string(materialized.ScrollYAML) runtimeScroll.Status = domain.RuntimeScrollStatusCreated runtimeScroll.LastError = "" - runtimeScroll.Commands = map[string]domain.LockStatus{} + runtimeScroll.Procedures = domain.ProcedureStatusMap{} if err := s.store.UpdateScroll(runtimeScroll); err != nil { return nil, err } diff --git a/apps/druid/core/services/runtime_supervisor_test.go b/apps/druid/core/services/runtime_supervisor_test.go index e38f299c..404c14a0 100644 --- a/apps/druid/core/services/runtime_supervisor_test.go +++ b/apps/druid/core/services/runtime_supervisor_test.go @@ -89,15 +89,76 @@ func TestRuntimeSessionHydrateDoesNotDuplicateActiveServe(t *testing.T) { t.Fatal(err) } - queue := session.queueManager.GetQueue() + queue := session.GetQueue() if len(queue) != 1 { t.Fatalf("queue len = %d, want 1: %#v", len(queue), queue) } - if queue["start"] != domain.ScrollLockStatusWaiting { - t.Fatalf("start = %s, want waiting", queue["start"]) + if queue["start"] != domain.ScrollLockStatusRunning { + t.Fatalf("start = %s, want running", queue["start"]) } } +func TestRuntimeSessionHydratePreservesProcedureStateWhenReattachingRunningRestart(t *testing.T) { + root := t.TempDir() + store := newTestStateStore(t) + scrollYAML := multiProcedureScrollYAML() + runtimeScroll := &domain.RuntimeScroll{ + ID: "cached", + Artifact: "local", + Root: root, + ScrollName: "cached", + ScrollYAML: scrollYAML, + Status: domain.RuntimeScrollStatusRunning, + Procedures: domain.ProcedureStatusMap{ + "start": { + "coldstart": {Status: domain.ScrollLockStatusDone}, + "start": {Status: domain.ScrollLockStatusRunning}, + }, + }, + } + if err := store.CreateScroll(runtimeScroll); err != nil { + t.Fatal(err) + } + called := make(chan ports.RuntimeCommand, 1) + release := make(chan struct{}) + session, err := NewRuntimeSession(store, runtimeScroll, &fakeWorkerBackend{ + runCommand: func(command ports.RuntimeCommand) (*int, error) { + called <- command + <-release + return nil, nil + }, + }) + if err != nil { + t.Fatal(err) + } + if err := session.Hydrate(); err != nil { + t.Fatal(err) + } + + session.Start() + select { + case command := <-called: + if command.Name != "start" { + t.Fatalf("reattached command = %s, want start", command.Name) + } + case <-time.After(2 * time.Second): + t.Fatal("timed out waiting for hydrated command reattach") + } + + updated, err := store.GetScroll(runtimeScroll.ID) + if err != nil { + t.Fatal(err) + } + if updated.Procedures["start"]["coldstart"].Status != domain.ScrollLockStatusDone { + t.Fatalf("coldstart = %s, want done; procedures=%#v", updated.Procedures["start"]["coldstart"].Status, updated.Procedures) + } + if updated.Procedures["start"]["start"].Status != domain.ScrollLockStatusRunning { + t.Fatalf("start = %s, want running; procedures=%#v", updated.Procedures["start"]["start"].Status, updated.Procedures) + } + close(release) + session.stopDeploymentQueue() +} + func TestRuntimeSessionHydrateSkipsMissingServe(t *testing.T) { session := newRuntimeSessionForTest(t, map[string]domain.LockStatus{}, cachedScrollYAML("")) @@ -105,7 +166,7 @@ func TestRuntimeSessionHydrateSkipsMissingServe(t *testing.T) { t.Fatal(err) } - if queue := session.queueManager.GetQueue(); len(queue) != 0 { + if queue := session.GetQueue(); len(queue) != 0 { t.Fatalf("queue = %#v, want empty", queue) } } @@ -123,14 +184,14 @@ func TestRuntimeSessionHydrateDropsStaleCommandStatus(t *testing.T) { if err != nil { t.Fatal(err) } - if _, ok := updated.Commands["missing"]; ok { - t.Fatalf("stale command was not removed: %#v", updated.Commands) + if _, ok := updated.Procedures["missing"]; ok { + t.Fatalf("stale command was not removed: %#v", updated.Procedures) } } func TestRuntimeSessionHydrateDoesNotRequeueRunningPersistentServe(t *testing.T) { session := newRuntimeSessionForTest(t, map[string]domain.LockStatus{ - "start": {Status: domain.ScrollLockStatusDone}, + "start": {Status: domain.ScrollLockStatusRunning}, }, `name: cached desc: Cached scroll version: 0.1.0 @@ -152,7 +213,7 @@ commands: t.Fatal(err) } - if queue := session.queueManager.GetQueue(); len(queue) != 0 { + if queue := session.GetQueue(); len(queue) != 0 { t.Fatalf("queue = %#v, want empty", queue) } } @@ -176,7 +237,7 @@ func TestRuntimeSessionAutoStartServeRemembersDoneOnceDependencies(t *testing.T) t.Fatal(err) } - queue := session.queueManager.GetQueue() + queue := session.GetQueue() if queue["install"] != domain.ScrollLockStatusDone { t.Fatalf("install = %s, want done; queue=%#v", queue["install"], queue) } @@ -203,8 +264,8 @@ func TestRuntimeSessionStopPreservesDoneOnceCommands(t *testing.T) { if updated.Status != domain.RuntimeScrollStatusStopped { t.Fatalf("status = %s, want stopped", updated.Status) } - if len(updated.Commands) != 1 || updated.Commands["install"].Status != domain.ScrollLockStatusDone { - t.Fatalf("commands = %#v, want only install done", updated.Commands) + if len(updated.Procedures) != 1 || updated.Procedures["install"]["install.0"].Status != domain.ScrollLockStatusDone { + t.Fatalf("procedures = %#v, want only install done", updated.Procedures) } } @@ -234,8 +295,8 @@ commands: - image: alpine:3.20 command: ["false"] `, - Commands: map[string]domain.LockStatus{}, - Status: domain.RuntimeScrollStatusRunning, + Procedures: domain.ProcedureStatusMap{}, + Status: domain.RuntimeScrollStatusRunning, } if err := store.CreateScroll(runtimeScroll); err != nil { t.Fatal(err) @@ -276,8 +337,8 @@ func TestRuntimeSupervisorStartDoesNotHydrateStoppedScroll(t *testing.T) { ScrollName: "cached", ScrollYAML: installThenStartScrollYAML(), Status: domain.RuntimeScrollStatusStopped, - Commands: map[string]domain.LockStatus{ - "install": {Status: domain.ScrollLockStatusDone}, + Procedures: domain.ProcedureStatusMap{ + "install": {"install.0": {Status: domain.ScrollLockStatusDone}}, }, } if err := store.CreateScroll(runtimeScroll); err != nil { @@ -296,8 +357,8 @@ func TestRuntimeSupervisorStartDoesNotHydrateStoppedScroll(t *testing.T) { if err != nil { t.Fatal(err) } - if updated.Commands["install"].Status != domain.ScrollLockStatusDone { - t.Fatalf("commands = %#v, want install still done", updated.Commands) + if updated.Procedures["install"]["install.0"].Status != domain.ScrollLockStatusDone { + t.Fatalf("procedures = %#v, want install still done", updated.Procedures) } } @@ -310,8 +371,8 @@ func TestRuntimeSupervisorStartHydratesRunningScroll(t *testing.T) { ScrollName: "cached", ScrollYAML: installThenStartScrollYAML(), Status: domain.RuntimeScrollStatusRunning, - Commands: map[string]domain.LockStatus{ - "install": {Status: domain.ScrollLockStatusDone}, + Procedures: domain.ProcedureStatusMap{ + "install": {"install.0": {Status: domain.ScrollLockStatusDone}}, }, } if err := store.CreateScroll(runtimeScroll); err != nil { @@ -324,7 +385,7 @@ func TestRuntimeSupervisorStartHydratesRunningScroll(t *testing.T) { } defer func() { if session := supervisor.sessions["running-scroll"]; session != nil { - session.Shutdown() + session.stopDeploymentQueue() } }() @@ -332,7 +393,7 @@ func TestRuntimeSupervisorStartHydratesRunningScroll(t *testing.T) { if session == nil { t.Fatal("running scroll was not hydrated") } - queue := session.queueManager.GetQueue() + queue := session.GetQueue() if queue["install"] != domain.ScrollLockStatusDone { t.Fatalf("install = %s, want done; queue=%#v", queue["install"], queue) } @@ -363,8 +424,8 @@ func TestRuntimeSupervisorEnsureCanCreate(t *testing.T) { if runtimeScroll.Status != domain.RuntimeScrollStatusCreated { t.Fatalf("status = %s, want created", runtimeScroll.Status) } - if len(runtimeScroll.Commands) != 0 { - t.Fatalf("commands = %#v, want empty", runtimeScroll.Commands) + if len(runtimeScroll.Procedures) != 0 { + t.Fatalf("procedures = %#v, want empty", runtimeScroll.Procedures) } } @@ -390,8 +451,8 @@ func TestRuntimeSupervisorCreateCanCreate(t *testing.T) { if runtimeScroll.Status != domain.RuntimeScrollStatusCreated { t.Fatalf("status = %s, want created", runtimeScroll.Status) } - if len(runtimeScroll.Commands) != 0 { - t.Fatalf("commands = %#v, want empty", runtimeScroll.Commands) + if len(runtimeScroll.Procedures) != 0 { + t.Fatalf("procedures = %#v, want empty", runtimeScroll.Procedures) } } @@ -558,11 +619,11 @@ func TestRuntimeSupervisorEnsureRetriesIncompleteMaterializationFailure(t *testi func TestRuntimeSupervisorEnsureRepairsIncompletePlaceholder(t *testing.T) { store := newTestStateStore(t) if err := store.CreateScroll(&domain.RuntimeScroll{ - ID: "repair-scroll", - Artifact: "registry.local/lab:1.0", - Root: store.Root("repair-scroll"), - Status: domain.RuntimeScrollStatusCreated, - Commands: map[string]domain.LockStatus{}, + ID: "repair-scroll", + Artifact: "registry.local/lab:1.0", + Root: store.Root("repair-scroll"), + Status: domain.RuntimeScrollStatusCreated, + Procedures: domain.ProcedureStatusMap{}, }); err != nil { t.Fatal(err) } @@ -597,7 +658,7 @@ func TestRuntimeSupervisorEnsureDoesNotRetryExistingError(t *testing.T) { ScrollYAML: cachedScrollYAML("start"), Status: domain.RuntimeScrollStatusError, LastError: "procedure field mode is unsupported", - Commands: map[string]domain.LockStatus{}, + Procedures: domain.ProcedureStatusMap{}, } if err := store.CreateScroll(existing); err != nil { t.Fatal(err) @@ -630,8 +691,8 @@ func TestRuntimeSupervisorEnsureUpdatesChangedArtifact(t *testing.T) { ScrollName: "old-scroll", ScrollYAML: cachedScrollYAML("start"), Status: domain.RuntimeScrollStatusRunning, - Commands: map[string]domain.LockStatus{ - "start": {Status: domain.ScrollLockStatusDone}, + Procedures: domain.ProcedureStatusMap{ + "start": {"start.0": {Status: domain.ScrollLockStatusDone}}, }, Routing: []domain.RuntimeRouteAssignment{ {Name: "main-route", PortName: "main", Host: "old.example.test"}, @@ -667,8 +728,8 @@ func TestRuntimeSupervisorEnsureUpdatesChangedArtifact(t *testing.T) { if updated.Status != domain.RuntimeScrollStatusStopped { t.Fatalf("status = %s, want stopped", updated.Status) } - if len(updated.Commands) != 0 { - t.Fatalf("commands = %#v, want cleared", updated.Commands) + if len(updated.Procedures) != 0 { + t.Fatalf("procedures = %#v, want cleared", updated.Procedures) } if len(updated.Routing) != 1 || updated.Routing[0].PortName != "main" { t.Fatalf("routing = %#v, want matching route preserved", updated.Routing) @@ -688,7 +749,7 @@ func TestRuntimeSupervisorUpdateUsesPullWorkerWhenAvailable(t *testing.T) { ScrollName: "old-scroll", ScrollYAML: cachedScrollYAML("start"), Status: domain.RuntimeScrollStatusStopped, - Commands: map[string]domain.LockStatus{}, + Procedures: domain.ProcedureStatusMap{}, } if err := store.CreateScroll(existing); err != nil { t.Fatal(err) @@ -724,7 +785,7 @@ func TestRuntimeSupervisorUpdateRefreshesCurrentArtifactAndRestartsRunningScroll ScrollName: "old-scroll", ScrollYAML: cachedScrollYAML("start"), Status: domain.RuntimeScrollStatusRunning, - Commands: map[string]domain.LockStatus{"start": {Status: domain.ScrollLockStatusDone}}, + Procedures: domain.ProcedureStatusMap{"start": {"start.0": {Status: domain.ScrollLockStatusDone}}}, } if err := store.CreateScroll(existing); err != nil { t.Fatal(err) @@ -766,9 +827,9 @@ func TestRuntimeSupervisorRestoreUsesPullWorkerResult(t *testing.T) { ScrollName: "old-scroll", ScrollYAML: cachedScrollYAML("start"), Status: domain.RuntimeScrollStatusRunning, - Commands: map[string]domain.LockStatus{ - "start": {Status: domain.ScrollLockStatusDone}, - "obsolete": {Status: domain.ScrollLockStatusDone}, + Procedures: domain.ProcedureStatusMap{ + "start": {"start.0": {Status: domain.ScrollLockStatusDone}}, + "obsolete": {"obsolete.0": {Status: domain.ScrollLockStatusDone}}, }, Routing: []domain.RuntimeRouteAssignment{ {Name: "main-route", PortName: "main", Host: "old.example.test"}, @@ -800,8 +861,8 @@ func TestRuntimeSupervisorRestoreUsesPullWorkerResult(t *testing.T) { if restored.Status != domain.RuntimeScrollStatusStopped { t.Fatalf("status = %s, want stopped", restored.Status) } - if _, ok := restored.Commands["obsolete"]; ok { - t.Fatalf("commands = %#v, want obsolete command removed", restored.Commands) + if _, ok := restored.Procedures["obsolete"]; ok { + t.Fatalf("procedures = %#v, want obsolete command removed", restored.Procedures) } if len(restored.Routing) != 1 || restored.Routing[0].PortName != "main" { t.Fatalf("routing = %#v, want matching route preserved", restored.Routing) @@ -811,11 +872,11 @@ func TestRuntimeSupervisorRestoreUsesPullWorkerResult(t *testing.T) { func TestNewRuntimeSessionRequiresPersistedScrollYAML(t *testing.T) { store := newTestStateStore(t) runtimeScroll := &domain.RuntimeScroll{ - ID: "missing-yaml", - Artifact: "local", - Root: "runtime://missing-yaml", - Status: domain.RuntimeScrollStatusCreated, - Commands: map[string]domain.LockStatus{}, + ID: "missing-yaml", + Artifact: "local", + Root: "runtime://missing-yaml", + Status: domain.RuntimeScrollStatusCreated, + Procedures: domain.ProcedureStatusMap{}, } if err := store.CreateScroll(runtimeScroll); err != nil { t.Fatal(err) @@ -847,87 +908,79 @@ func TestRuntimeSessionApplyRoutingPersistsAssignments(t *testing.T) { } } -func TestRuntimeSessionProceduresUsesLauncherStatus(t *testing.T) { +func TestRuntimeSessionQueueReturnsProcedureStatuses(t *testing.T) { session := newRuntimeSessionForTest(t, map[string]domain.LockStatus{}, cachedScrollYAML("")) - session.queueManager.RememberDoneItem("start") - session.procedures = fakeProcedureStatuses{statuses: map[string]domain.ScrollLockStatus{ - "start.0": domain.ScrollLockStatusRunning, - }} + session.RememberDoneItem("start") + session.runtimeScroll.Procedures = domain.ProcedureStatusMap{"start": {"start.0": {Status: domain.ScrollLockStatusRunning}}} - got := session.Procedures() - if got["start.0"] != domain.ScrollLockStatusRunning { - t.Fatalf("procedures = %#v", got) + got := session.Queue() + if got["start"]["start.0"].Status != domain.ScrollLockStatusRunning { + t.Fatalf("queue = %#v", got) } - if _, ok := got["start"]; ok { - t.Fatalf("procedures leaked queue status: %#v", got) + if _, ok := got["start"]["start"]; ok { + t.Fatalf("queue leaked command status as procedure: %#v", got) } } -func TestRuntimeSessionProceduresExpandCommandStatusToProcedureAliases(t *testing.T) { +func TestRuntimeSessionQueueReturnsExplicitProcedureStatuses(t *testing.T) { session := newRuntimeSessionForTest(t, map[string]domain.LockStatus{}, multiProcedureScrollYAML()) - session.procedures = fakeProcedureStatuses{statuses: map[string]domain.ScrollLockStatus{ - "start": domain.ScrollLockStatusRunning, + session.runtimeScroll.Procedures = domain.ProcedureStatusMap{"start": { + "coldstart": {Status: domain.ScrollLockStatusDone}, + "start": {Status: domain.ScrollLockStatusRunning}, }} - got := session.Procedures() - if got["start"] != domain.ScrollLockStatusRunning { - t.Fatalf("procedures = %#v, want compatibility command key", got) + got := session.Queue() + if got["start"]["start"].Status != domain.ScrollLockStatusRunning { + t.Fatalf("queue = %#v, want start running", got) } - if got["coldstart"] != domain.ScrollLockStatusRunning { - t.Fatalf("procedures = %#v, want coldstart alias", got) + if got["start"]["coldstart"].Status != domain.ScrollLockStatusDone { + t.Fatalf("queue = %#v, want coldstart done", got) } } -func TestRuntimeSessionProceduresExpandUnnamedProcedureFallback(t *testing.T) { +func TestRuntimeSessionQueueUsesUnnamedProcedureFallback(t *testing.T) { session := newRuntimeSessionForTest(t, map[string]domain.LockStatus{}, multiProcedureScrollYAML()) - session.procedures = fakeProcedureStatuses{statuses: map[string]domain.ScrollLockStatus{ - "install": domain.ScrollLockStatusDone, - }} + session.runtimeScroll.Procedures = domain.ProcedureStatusMap{"install": {"install.0": {Status: domain.ScrollLockStatusDone}}} - got := session.Procedures() - if got["install"] != domain.ScrollLockStatusDone { - t.Fatalf("procedures = %#v, want compatibility command key", got) - } - if got["install.0"] != domain.ScrollLockStatusDone { - t.Fatalf("procedures = %#v, want unnamed procedure fallback alias", got) + got := session.Queue() + if got["install"]["install.0"].Status != domain.ScrollLockStatusDone { + t.Fatalf("queue = %#v, want unnamed procedure fallback", got) } } -func TestRuntimeSessionProceduresPreserveExistingProcedureStatus(t *testing.T) { +func TestRuntimeSessionQueuePreservesExistingProcedureStatus(t *testing.T) { session := newRuntimeSessionForTest(t, map[string]domain.LockStatus{}, multiProcedureScrollYAML()) - session.procedures = fakeProcedureStatuses{statuses: map[string]domain.ScrollLockStatus{ - "start": domain.ScrollLockStatusRunning, - "coldstart": domain.ScrollLockStatusDone, + session.runtimeScroll.Procedures = domain.ProcedureStatusMap{"start": { + "start": {Status: domain.ScrollLockStatusRunning}, + "coldstart": {Status: domain.ScrollLockStatusDone}, }} - got := session.Procedures() - if got["coldstart"] != domain.ScrollLockStatusDone { - t.Fatalf("procedures = %#v, want existing procedure status preserved", got) + got := session.Queue() + if got["start"]["coldstart"].Status != domain.ScrollLockStatusDone { + t.Fatalf("queue = %#v, want existing procedure status preserved", got) } } -func TestRuntimeSessionProceduresDoNotChangeQueue(t *testing.T) { +func TestRuntimeSessionQueueDoesNotChangeInternalCommandQueue(t *testing.T) { session := newRuntimeSessionForTest(t, map[string]domain.LockStatus{}, multiProcedureScrollYAML()) - session.queueManager.RememberDoneItem("install") - session.procedures = fakeProcedureStatuses{statuses: map[string]domain.ScrollLockStatus{ - "install": domain.ScrollLockStatusDone, - }} + session.RememberDoneItem("install") + session.runtimeScroll.Procedures = domain.ProcedureStatusMap{"install": {"install.0": {Status: domain.ScrollLockStatusDone}}} - _ = session.Procedures() - queue := session.queueManager.GetQueue() + _ = session.Queue() + queue := session.GetQueue() if _, ok := queue["install.0"]; ok { - t.Fatalf("queue changed by procedure aliases: %#v", queue) + t.Fatalf("internal queue changed by procedure state: %#v", queue) } if queue["install"] != domain.ScrollLockStatusDone { t.Fatalf("queue = %#v, want original command key only", queue) } } -func TestDeriveRuntimeScrollStatusTreatsDonePersistentAsRunning(t *testing.T) { - status := deriveRuntimeScrollStatus(map[string]domain.LockStatus{ - "start": {Status: domain.ScrollLockStatusDone}, +func TestDeriveRuntimeScrollStatusTreatsRunningPersistentAsRunning(t *testing.T) { + status := deriveRuntimeScrollStatus(domain.ProcedureStatusMap{ + "start": {"start.0": {Status: domain.ScrollLockStatusRunning}}, }, map[string]*domain.CommandInstructionSet{ - "start": {Run: domain.RunModePersistent}, + "start": {Run: domain.RunModePersistent, Procedures: []*domain.Procedure{{}}}, }) if status != domain.RuntimeScrollStatusRunning { @@ -936,10 +989,10 @@ func TestDeriveRuntimeScrollStatusTreatsDonePersistentAsRunning(t *testing.T) { } func TestDeriveRuntimeScrollStatusTreatsDoneFiniteAsStopped(t *testing.T) { - status := deriveRuntimeScrollStatus(map[string]domain.LockStatus{ - "report": {Status: domain.ScrollLockStatusDone}, + status := deriveRuntimeScrollStatus(domain.ProcedureStatusMap{ + "report": {"report.0": {Status: domain.ScrollLockStatusDone}}, }, map[string]*domain.CommandInstructionSet{ - "report": {Run: domain.RunModeAlways}, + "report": {Run: domain.RunModeAlways, Procedures: []*domain.Procedure{{}}}, }) if status != domain.RuntimeScrollStatusStopped { @@ -976,13 +1029,29 @@ func newRuntimeSessionForTest(t *testing.T, commands map[string]domain.LockStatu t.Helper() root := t.TempDir() store := newTestStateStore(t) + procedures := domain.ProcedureStatusMap{} + scroll, err := domain.NewScrollFromBytes(root, []byte(scrollYAML)) + if err != nil { + t.Fatal(err) + } + for commandName, status := range commands { + command := scroll.Commands[commandName] + procedures[commandName] = map[string]domain.LockStatus{} + if command == nil || len(command.Procedures) == 0 { + procedures[commandName][commandName+".0"] = status + continue + } + for idx, procedure := range command.Procedures { + procedures[commandName][domain.ProcedureName(commandName, idx, procedure)] = status + } + } runtimeScroll := &domain.RuntimeScroll{ ID: "cached", Artifact: "local", Root: root, ScrollName: "cached", ScrollYAML: scrollYAML, - Commands: commands, + Procedures: procedures, } if err := store.CreateScroll(runtimeScroll); err != nil { t.Fatal(err) @@ -1193,7 +1262,7 @@ commands: func assertQueued(t *testing.T, session *RuntimeSession, command string) { t.Helper() - queue := session.queueManager.GetQueue() + queue := session.GetQueue() if queue[command] != domain.ScrollLockStatusWaiting { t.Fatalf("%s = %s, want waiting; queue=%#v", command, queue[command], queue) } @@ -1207,15 +1276,3 @@ func newTestStateStore(t *testing.T) ports.RuntimeScrollStore { } return store } - -type fakeProcedureStatuses struct { - statuses map[string]domain.ScrollLockStatus -} - -func (f fakeProcedureStatuses) Run(string) error { - return nil -} - -func (f fakeProcedureStatuses) GetProcedureStatuses() map[string]domain.ScrollLockStatus { - return f.statuses -} diff --git a/apps/druid/core/services/runtime_update.go b/apps/druid/core/services/runtime_update.go index 9e00e350..21f2699e 100644 --- a/apps/druid/core/services/runtime_update.go +++ b/apps/druid/core/services/runtime_update.go @@ -32,7 +32,7 @@ func (s *RuntimeSupervisor) updateExistingScroll(runtimeScroll *domain.RuntimeSc delete(s.sessions, runtimeScroll.ID) s.mu.Unlock() if session != nil { - session.Shutdown() + session.stopDeploymentQueue() } if wasRunning { @@ -82,7 +82,7 @@ func (s *RuntimeSupervisor) updateExistingScroll(runtimeScroll *domain.RuntimeSc runtimeScroll.Root = materialized.Root runtimeScroll.ScrollName = scroll.Name runtimeScroll.ScrollYAML = string(materialized.ScrollYAML) - runtimeScroll.Commands = map[string]domain.LockStatus{} + runtimeScroll.Procedures = domain.ProcedureStatusMap{} runtimeScroll.Routing = preserveRoutingAssignments(existingRouting, scroll.Ports) runtimeScroll.LastError = "" if wasRunning || runtimeScroll.Status == domain.RuntimeScrollStatusStopped { diff --git a/docs_md/kubernetes_keepalive.md b/docs_md/kubernetes_keepalive.md new file mode 100644 index 00000000..c66e7d4c --- /dev/null +++ b/docs_md/kubernetes_keepalive.md @@ -0,0 +1,30 @@ +--- +title: "Kubernetes keepAliveTraffic" +sidebar_label: Kubernetes keepAliveTraffic +--- + +## Kubernetes keepAliveTraffic + +Kubernetes runtimes use Hubble Relay to evaluate `keepAliveTraffic` on expected ports. + +When a running job procedure has an expected port with `keepAliveTraffic`, druid checks for matching Hubble flows over the configured window. If the full window has elapsed and no flow is observed, druid deletes that procedure job and records it as a clean stop. The command run mode is not changed; `restart` and `persistent` scheduling decide what runs next. + +Coldstarter procedures are not stopped by this rule. For Minecraft restart-mode scrolls, put `keepAliveTraffic` on the real runtime procedure's `main` expected port, not on the coldstarter procedure. + +The current Hubble integration tracks flow presence. Use a minimum such as `1b/60m` to mean "at least one observed flow in the last 60 minutes". + +Required daemon configuration: + +``` +DRUID_HUBBLE_RELAY_ADDR=hubble-relay.kube-system.svc.cluster.local:80 +``` + +Validation commands: + +``` +kubectl -n kube-system get svc hubble-relay +kubectl -n kube-system rollout status deployment/hubble-relay +kubectl -n druid-system get deploy druid-cli -o jsonpath='{.spec.template.spec.containers[0].env[?(@.name=="DRUID_HUBBLE_RELAY_ADDR")].value}{"\n"}' +``` + +If Hubble Relay is disabled or unavailable, druid does not stop any procedure for missing traffic and reports `hubble-relay-unavailable` in port status/logs. diff --git a/internal/api/generated.go b/internal/api/generated.go index 5b87d440..1757a040 100644 --- a/internal/api/generated.go +++ b/internal/api/generated.go @@ -22,13 +22,21 @@ import ( "github.com/oapi-codegen/runtime" ) +// Defines values for LockStatusStatus. +const ( + LockStatusStatusDone LockStatusStatus = "done" + LockStatusStatusError LockStatusStatus = "error" + LockStatusStatusRunning LockStatusStatus = "running" + LockStatusStatusWaiting LockStatusStatus = "waiting" +) + // Defines values for RuntimeScrollStatus. const ( - Created RuntimeScrollStatus = "created" - Deleted RuntimeScrollStatus = "deleted" - Error RuntimeScrollStatus = "error" - Running RuntimeScrollStatus = "running" - Stopped RuntimeScrollStatus = "stopped" + RuntimeScrollStatusCreated RuntimeScrollStatus = "created" + RuntimeScrollStatusDeleted RuntimeScrollStatus = "deleted" + RuntimeScrollStatusError RuntimeScrollStatus = "error" + RuntimeScrollStatusRunning RuntimeScrollStatus = "running" + RuntimeScrollStatusStopped RuntimeScrollStatus = "stopped" ) // Defines values for PublishScrollUIPackageParamsScope. @@ -42,9 +50,6 @@ type ApplyRoutingRequest struct { Assignments []RuntimeRouteAssignment `json:"assignments"` } -// CommandStatusMap defines model for CommandStatusMap. -type CommandStatusMap map[string]interface{} - // CreateScrollRequest defines model for CreateScrollRequest. type CreateScrollRequest struct { // Artifact OCI artifact reference or local scroll path @@ -97,6 +102,19 @@ type HealthResponse struct { StartDate *time.Time `json:"start_date"` } +// LockStatus defines model for LockStatus. +type LockStatus struct { + ExitCode *int `json:"exit_code"` + LastStatusChange int64 `json:"last_status_change"` + Status LockStatusStatus `json:"status"` +} + +// LockStatusStatus defines model for LockStatus.Status. +type LockStatusStatus string + +// ProcedureStatusMap defines model for ProcedureStatusMap. +type ProcedureStatusMap map[string]map[string]LockStatus + // PublishUIPackageRequest defines model for PublishUIPackageRequest. type PublishUIPackageRequest struct { // Path Optional scroll-root-relative .wasm path. Defaults to private/dist/app.wasm or public/dist/app.wasm. @@ -164,11 +182,11 @@ type RuntimeRoutingTarget struct { // RuntimeScroll defines model for RuntimeScroll. type RuntimeScroll struct { Artifact string `json:"artifact"` - Commands *map[string]interface{} `json:"commands,omitempty"` CreatedAt time.Time `json:"created_at"` Id string `json:"id"` LastError *string `json:"last_error,omitempty"` OwnerId *string `json:"owner_id,omitempty"` + Procedures *ProcedureStatusMap `json:"procedures,omitempty"` Root string `json:"root"` Routing *[]RuntimeRouteAssignment `json:"routing,omitempty"` ScrollName string `json:"scroll_name"` @@ -340,9 +358,6 @@ type ClientInterface interface { // GetScrollPorts request GetScrollPorts(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error) - // GetScrollProcedures request - GetScrollProcedures(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error) - // GetScrollQueue request GetScrollQueue(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error) @@ -559,18 +574,6 @@ func (c *Client) GetScrollPorts(ctx context.Context, id string, reqEditors ...Re return c.Client.Do(req) } -func (c *Client) GetScrollProcedures(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error) { - req, err := NewGetScrollProceduresRequest(c.Server, id) - if err != nil { - return nil, err - } - req = req.WithContext(ctx) - if err := c.applyEditors(ctx, req, reqEditors); err != nil { - return nil, err - } - return c.Client.Do(req) -} - func (c *Client) GetScrollQueue(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error) { req, err := NewGetScrollQueueRequest(c.Server, id) if err != nil { @@ -1153,40 +1156,6 @@ func NewGetScrollPortsRequest(server string, id string) (*http.Request, error) { return req, nil } -// NewGetScrollProceduresRequest generates requests for GetScrollProcedures -func NewGetScrollProceduresRequest(server string, id string) (*http.Request, error) { - var err error - - var pathParam0 string - - pathParam0, err = runtime.StyleParamWithLocation("simple", false, "id", runtime.ParamLocationPath, id) - if err != nil { - return nil, err - } - - serverURL, err := url.Parse(server) - if err != nil { - return nil, err - } - - operationPath := fmt.Sprintf("/api/v1/scrolls/%s/procedures", pathParam0) - if operationPath[0] == '/' { - operationPath = "." + operationPath - } - - queryURL, err := serverURL.Parse(operationPath) - if err != nil { - return nil, err - } - - req, err := http.NewRequest("GET", queryURL.String(), nil) - if err != nil { - return nil, err - } - - return req, nil -} - // NewGetScrollQueueRequest generates requests for GetScrollQueue func NewGetScrollQueueRequest(server string, id string) (*http.Request, error) { var err error @@ -1637,9 +1606,6 @@ type ClientWithResponsesInterface interface { // GetScrollPortsWithResponse request GetScrollPortsWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*GetScrollPortsResponse, error) - // GetScrollProceduresWithResponse request - GetScrollProceduresWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*GetScrollProceduresResponse, error) - // GetScrollQueueWithResponse request GetScrollQueueWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*GetScrollQueueResponse, error) @@ -1941,32 +1907,10 @@ func (r GetScrollPortsResponse) StatusCode() int { return 0 } -type GetScrollProceduresResponse struct { - Body []byte - HTTPResponse *http.Response - JSON200 *CommandStatusMap -} - -// Status returns HTTPResponse.Status -func (r GetScrollProceduresResponse) Status() string { - if r.HTTPResponse != nil { - return r.HTTPResponse.Status - } - return http.StatusText(0) -} - -// StatusCode returns HTTPResponse.StatusCode -func (r GetScrollProceduresResponse) StatusCode() int { - if r.HTTPResponse != nil { - return r.HTTPResponse.StatusCode - } - return 0 -} - type GetScrollQueueResponse struct { Body []byte HTTPResponse *http.Response - JSON200 *CommandStatusMap + JSON200 *ProcedureStatusMap } // Status returns HTTPResponse.Status @@ -2293,15 +2237,6 @@ func (c *ClientWithResponses) GetScrollPortsWithResponse(ctx context.Context, id return ParseGetScrollPortsResponse(rsp) } -// GetScrollProceduresWithResponse request returning *GetScrollProceduresResponse -func (c *ClientWithResponses) GetScrollProceduresWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*GetScrollProceduresResponse, error) { - rsp, err := c.GetScrollProcedures(ctx, id, reqEditors...) - if err != nil { - return nil, err - } - return ParseGetScrollProceduresResponse(rsp) -} - // GetScrollQueueWithResponse request returning *GetScrollQueueResponse func (c *ClientWithResponses) GetScrollQueueWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*GetScrollQueueResponse, error) { rsp, err := c.GetScrollQueue(ctx, id, reqEditors...) @@ -2734,32 +2669,6 @@ func ParseGetScrollPortsResponse(rsp *http.Response) (*GetScrollPortsResponse, e return response, nil } -// ParseGetScrollProceduresResponse parses an HTTP response from a GetScrollProceduresWithResponse call -func ParseGetScrollProceduresResponse(rsp *http.Response) (*GetScrollProceduresResponse, error) { - bodyBytes, err := io.ReadAll(rsp.Body) - defer func() { _ = rsp.Body.Close() }() - if err != nil { - return nil, err - } - - response := &GetScrollProceduresResponse{ - Body: bodyBytes, - HTTPResponse: rsp, - } - - switch { - case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 200: - var dest CommandStatusMap - if err := json.Unmarshal(bodyBytes, &dest); err != nil { - return nil, err - } - response.JSON200 = &dest - - } - - return response, nil -} - // ParseGetScrollQueueResponse parses an HTTP response from a GetScrollQueueWithResponse call func ParseGetScrollQueueResponse(rsp *http.Response) (*GetScrollQueueResponse, error) { bodyBytes, err := io.ReadAll(rsp.Body) @@ -2775,7 +2684,7 @@ func ParseGetScrollQueueResponse(rsp *http.Response) (*GetScrollQueueResponse, e switch { case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 200: - var dest CommandStatusMap + var dest ProcedureStatusMap if err := json.Unmarshal(bodyBytes, &dest); err != nil { return nil, err } @@ -3032,9 +2941,6 @@ type ServerInterface interface { // Get runtime scroll port status // (GET /api/v1/scrolls/{id}/ports) GetScrollPorts(c *fiber.Ctx, id string) error - // Get procedure state - // (GET /api/v1/scrolls/{id}/procedures) - GetScrollProcedures(c *fiber.Ctx, id string) error // Get runtime queue state // (GET /api/v1/scrolls/{id}/queue) GetScrollQueue(c *fiber.Ctx, id string) error @@ -3231,22 +3137,6 @@ func (siw *ServerInterfaceWrapper) GetScrollPorts(c *fiber.Ctx) error { return siw.Handler.GetScrollPorts(c, id) } -// GetScrollProcedures operation middleware -func (siw *ServerInterfaceWrapper) GetScrollProcedures(c *fiber.Ctx) error { - - var err error - - // ------------- Path parameter "id" ------------- - var id string - - err = runtime.BindStyledParameterWithOptions("simple", "id", c.Params("id"), &id, runtime.BindStyledParameterOptions{Explode: false, Required: true}) - if err != nil { - return fiber.NewError(fiber.StatusBadRequest, fmt.Errorf("Invalid format for parameter id: %w", err).Error()) - } - - return siw.Handler.GetScrollProcedures(c, id) -} - // GetScrollQueue operation middleware func (siw *ServerInterfaceWrapper) GetScrollQueue(c *fiber.Ctx) error { @@ -3444,8 +3334,6 @@ func RegisterHandlersWithOptions(router fiber.Router, si ServerInterface, option router.Get(options.BaseURL+"/api/v1/scrolls/:id/ports", wrapper.GetScrollPorts) - router.Get(options.BaseURL+"/api/v1/scrolls/:id/procedures", wrapper.GetScrollProcedures) - router.Get(options.BaseURL+"/api/v1/scrolls/:id/queue", wrapper.GetScrollQueue) router.Post(options.BaseURL+"/api/v1/scrolls/:id/restore", wrapper.RestoreScroll) @@ -3469,52 +3357,53 @@ func RegisterHandlersWithOptions(router fiber.Router, si ServerInterface, option // Base64 encoded, gzipped, json marshaled Swagger object var swaggerSpec = []string{ - "H4sIAAAAAAAC/+xbW1MbufL/Kqr5/6vOi7HJ3h44Tyw5u4fdpMKBpPKwm6JkqW0raCQhaQAfyt/9lG7j", - "uWiM7cAGUvtCYY+61f3ri1o97fuCyFJJAcKa4ui+MGQBJfb/HivFl+eyskzMz+G6AmPd10pLBdoy8Iuw", - "MWwuykTOLJT+n//XMCuOiv+brNlPIu/JeSUsK8GxhuOavliNCrtUUBwVWGu8LFarUaHhumIaaHH0R2ur", - "T/VaOf0MxBOfyLLEgl5YbCvzFisvHqXMMikwP2uIbXUFOQYasIULoiXnwwpry2aY+CcUDNFMuQ2Ko+Ld", - "ySlKT5GGGWgQBJDUiEuCOTKeMVLYLopRAXe4VDxoG2jMmOqK0fF8PrFgrP9z5P4UtazGaibmTlZG+wK8", - "BqWBYAsUYc6wQTOpkcAljNE7FVBAxuIpB6SDCRCjk7DgdIZkyawFOkJ2AYhiKKVAcxCgsQWDsECMjluC", - "f5ZTk5PNcczA8zgi/DM8Y0ZxvPTaIWMZ54jIEgyaaVlGpMdLXPLtJTYKk4zYv1dT0ALc/vUqD2ySX4OR", - "lSZgxuh0LqQGiqZLJKQ4aJBOMbkCQc04t7u8FaAvcxaNkYL8CsQoqgxQvzupjJUl6IMZJkzMkXbBhHBl", - "F1Kz/2JHn91Lw5wZq5eXRAMFYRnmOwRuJD6paR8O2hQuuYh9DRws0BBx/VALiPRUMD7A3aO1YWng1Ne4", - "Iw5zSyKDnET/EqbSu6SAnnTp4SVl80g9ELyDcfO3ez4P9/w3YG4X52CUFAb6flBKmrHISaU1CIsWnhoF", - "Z0N+bTMVyauc/krLuQZj+mzP4hOkQBMQFs+DnbnE1CHsBPO4ugQ3k7rEtjgqZlxid36U+I6VVVkcvTo8", - "HBUlE+HTYS2CqMop6Bhe2l5SbDO6fVyAaOZmv9aHXb2jIzxwXlGMClFx7nJ958QdiE0PUc4OZ9WUM7P4", - "cHqGyRWew2Bg+qN1w8Hjw/pAS2kPNHBs2Q2g8S02pT+Ux+g1zHDFrUFWIqXZDbYwoczYCVYqrJMaKScN", - "aX8/ziaeniIZB+3psJADSUNhY26lzqeOyoAeSB8dlD3/BkGDcQ77GOLHMU7eJS/bLzkOhTcNwBdHM8wN", - "jB4v3N2W3kkb4kyl5IDFbrkg4nAmtb2oz5+25lNZCZrbZ+RBv2Qqi4l/pmRLRCYszEM4XgGoY85u4L3G", - "sxkjWR4cG3uJiWU3zC4vsWeVjcjtz5xhiZSWBGilB+i0tJJInrf/3eV0aQNctXxM2J9+WMvW2CmcXFlO", - "todGA+74cKe9Eo282szzlgkqb/My7aJdx/lSLNbYRgs0AB1FD1srXyO0wWO7t6ye28KddamAb/LP3coV", - "J/jl8NNNDhKS64ZwqDTP57hN+jMxf4/1HDLab1dz7RIdDym/b+wY4ECs1MPX2pxLdlExoG8YgWEJ04Ih", - "Dbfz28v0fc+HWwJ0ttvgxkN3hI3HDQkdAbNzJ4D4TgDdKZMOFPU+NYPWwXAbi+t+upRy4BwNTv3oHZdR", - "EWqkDe6xvnwJV0H+kbAqRoWuhHDL3CqplP8uaD6qL2ifMshV7FKFym5bRepS0Hh6RXe0Vu5WWHtSxL2N", - "Ra15yzlae2/w3lre4Zq1j/QCf/fjT/lqb2eFNyTOJhBu0aiIHaoowM46mk0ZahfjZhNYSARv5Hxjh68R", - "F0M5sXb53hYfvL779wHTfcP6M6fuCQ52uDTMNJgFGP9lvLj/wyASb5I1g691Y+4g5E8JUmlmlxeOUSx/", - "AWvQx1Vw5/Dpl+Sav31875NAE6ffPr5HVl6BCE075iWwS6S0vGEUtI9Ax94VYp7dWv+FtcqL6ujTnm32", - "Fwup7YGrnCm6rkAv02ZSo48wvZDkCiwiUggg6d7MHKFfXKT6Jmyx3hkr9js4WFzKFzPpNiZS2OAKq66S", - "r3XFKDp5c4o4rgRZ+DYmRSUWLlKQp2QC9IFvwdDUJMZKcUbCfX6EOLuCP8Xc9zpB34A2I0SxxVNswIw8", - "w1uYpmfjP724zPo+Qy1AMSrc0yDW4fjV+NCfPwoEVqw4Kr73X4Xo9wadYMUmN68moZHhvokVVFvDX8Em", - "R261PArPPNwWT2lYGDoq3l7+ZuYbK36z7w4PE5KxSm1AMPls3Fbp7cRDXt3p23hTtWUOK3z0/3j4/V+4", - "8UUod1Al8A1moTni46kqS6yXEc4ujhbPjb+7B0uMioB38cmRJjMFzzENO7Xhf8OMvYhrvhD8XWqOWLj1", - "00oPm9RNTIq0cXHi101NU+uRoIlPmti46tNkgGi+5inC+QfG/izp8tEcIfcmadU+bF3luerZ4dWjidCB", - "/yG4USrj2qgHRTq4b4a975IT8F11f4ZmLdLsuj+RRXKN/a0scvjVLBJQ61okKNKxCII7Zmw4WmQsP/gy", - "tGfNzua6Z3QV8ryr2fvmCq9tanMprHEJFrTb4j6cobGEjEeor6/bQI8aoHVr0k9PaIT2K6eHjZDuLatR", - "8cPhD8OvQOJyIS2a+S5N22ph253iaJRP47+CfZnI7+j+X4q4O0e/MG25OJi4uqxSw7nrZ//8iU3y+Pnw", - "ob7+c8uNAWbkSGNAtrPiHZCqEWDRantZPHWNJvfxv9Ww9c8rEWSOwydP4QGjLBNSb/hCwjvcqmk3KL8w", - "zM8r0T0L18jsZXwxY/PBIrrOvidh3TPMwd3Les8QZ1ib9U0zKtxPniq3bF9MjeRgtkI1rHyGuO7QQO5j", - "nhRDV7AMExjrjnkf+vii2hCpgCKyBmUP8LmcbwH8G7fqhRUUrU5gBnOn0z5484BFwjp+TJAPI62ktltA", - "fSZDTf7ssN7lVt94D77zzR45oFJz49GrvBb3vSKm9pRtjLle+8KipzctmzspknYez1zkqM6KvQC/rqCC", - "h7H+j1/2DcLsFRuEOLn3dWPVGubrCMrD6UmDsXJTF+Y8LPj7KvPU99yA89Z3mWS4vWKr8aY4b3U/5R/7", - "YnHtyzF97icKz83cQ/eels3PQBtmbJzilPog/NgBaBw3RLq2Td8JwuzEQy4wCa8jtzjUWiMzL75UaQ8A", - "bVOtBAKU8MrUi+EXDHFqOtmmJtjDRvWMYj5IL9zjb7TtdxFmiDfHh1/0KP08Y6XaBLRU3yzOfibnIZyl", - "6hbUt1JfcYmpQbcLxgEpDX5qS8z9S+j9zFCxSXPmZ3NCaoyWvEyrNOeVMmV2mG8Hij6cooSKu7P6C2mu", - "6s4QoA/nb8wX22Jy7/dcTeIWw5EShe4Y6K/rfwZsNvFJA2pxkr9I06WZGbSnqk+GfrmwikXKM3nTeMvs", - "AsUpr6ZLlWCxD/FOsRK0QlggzDVgujyYVoxbFGZNPpyij8cXbxOXPX1SpV+g5N2vOaP1ggrW3GjZ13aG", - "p2nMB67ds2TGeJyA2qZfEMeRk1WHJquOz06LOOBYTApnusi0NxIWhAjDV6UfrRM0vS9A4O9dbmUjyUQo", - "7vu9RWSsBly6Y9BRa7CawQ3ma2rfOezTxpZAvNCvhVkThkt9nzI7tuZ390NqZs3hFqbGr8xwOZPaIibC", - "2CiTojZH1WDg69U+bZhwQmQB5MpkCeOMUp/0bcUtO4h+kNwip33yhD6L12HMjLMZkCXhefLoPn3qX1zx", - "costWSSbUbgBLpX3hPgDvYSfW5bhcSyEtAE158oIEwKmoT2un5ti9Wn1vwAAAP//VFfcMl4/AAA=", + "H4sIAAAAAAAC/+xbS3MbN/L/Kij8/1V7GZFyXgftSZE3WSVKWSvZ5UPiYoFAk4Q5A4wAjCiuit99C6/h", + "DAfDl6VYSuXisgZooPvXDzQazUdMZVFKAcJofPaINZ1BQdx/z8syX97IynAxvYG7CrSxn0slS1CGg5tE", + "tOZTUURybqBw//l/BRN8hv9vuF5+GNYe3lTC8ALs0nBe0+NVhs2yBHyGiVJkiVerDCu4q7gChs9+b231", + "qZ4rx5+BOuILBcTALVUyz/v5VYZPCHUjDDRVvDRcCnyG311cojiKFExAgaCApEK5pCRH2i2MSmJmOMPw", + "QIoy98x6Gj1gquJsMJ0ODWjj/jmz/+CaV20UF1PLK2ddBt5CqYASAwyRnBONJlIhQQoYoHdujmXCkHEO", + "SHkEEWdDP+FygmTBjQGWITMDxAgUUqApCFDEgEZEIM4GLcY/y7FO8WZXTMDzNCz8049xXeZk6aRD2vA8", + "R1QWoNFEySIgPViSIt+fY10SmmD712oMSoDdv57lgI38K9CyUhT0AF1OhVTA0HiJhBQnDdIxoXMQTA9S", + "u8uFADVKaTQYOnIzEGeo0sDc7rTSRhagTiaEcjFFyvoCIpWZScX/Syx9ci8FU66NWo6oAgbCcJIf4HeB", + "+KKm3e1z0V1SDvcWcjDAvMd1Xc0j0hFBG2IqN2GtWOZX6kq8wQ63U8ICKY7+JXSlDgkBHe7i4IjxaaDu", + "cd5ev/nbPF+Gef4bSG5mN6BLKTR07aCQLKGRi0opEAbNHDXyxobc3GYokvOU/KWSUwVad5e9DiOoBEVB", + "GDL1es4lYRZhy5jD1Qa4iVQFMfgMT3JJ7PlRkAdeVAU+e3N6muGCC//Xac2CqIoxqOBeyowYMQnZPs5A", + "NGOzm+vcrt7REp5Yq8AZFlWe21iPz4yqYJdvOohSeriSdH5bO31bB/DAzYgGRfTsx4WBqRcuJ9qMvEpG", + "dEbE1NHVzHNhfvgOpwgbQUdY5H7HqhLCipFhJoXTrVJS4QwvCLcZT0OUHoHDmkmuUjhcK0mB2fjkpv5G", + "SheNGOP+ZL1uR6me79s8qIH0KsFAl6NqnHM9+3B5TeicTKE3ZLqkZ0tK4ALuiZLSnCjIieH3gAYLoguX", + "Lg3QW5iQKjcaGYlKxe+JgSHj2gxJWfp5UqHSckPb3wfJI6EjSCJ0dGSYyZ5wXhKtF1Klg3qlQfUE9g1z", + "cOs3CBoLp6whBN/zEMHeRf8/7tjqC7zMA4/PJiTXkD1dILZbuvDRYGcsZQ5EHBalAw7XUpm+IDGWlWCp", + "fTIH+oiXSUzcWClbLDZCwhygPM/5PbxXZDLhNLmGc21CDb/nZjkiphVumrFy/2ygn6MyBog0nZJGUpmn", + "9f8wGi+Nh2ufcOhyiuRKpoNGA+4weNBekUbOt6+54ILJRZqnQ6TbML7oizW2QQMNQLNgYWvha4S2WOzm", + "9TVxthkbCvJt9nlYImkZH/WPbjMQH1y3uEOl8nSM2yY/F9P3RE0hIf1+2fAh3rFL+GN9R0MO1Ei17dTt", + "muQmKhrUPafQz2Gc0CfhfnY7it87NtxiYGO7LWbcd3vbetxQV2phBwXEnluTi7A+90oNN28v/UrfeaQl", + "ci+Lt5Q9x6m37SevaGXYp0pbrKSTqAascdZIWbWRZem+xaw13qA/JZCv+Kj0Cd6+gtQZoUskq5IdqO3U", + "tb02qIB7G4tsnU43jKu19xYjrvntT127SM/IN9//kE76DhZ4S/xsAmEnZTiUEAMDB8t49PWgA1Yqjvl4", + "cCWnO+4ntV/0hcba5DtbfHDyHl+ojdcO446eumjbW4JUMFGgZ6Ddx1BZ+YdGNFz16wW+VkljAyF3WNBK", + "cbO8tQuFLBiIAnVeeXP2f/0UTfOXj+9dEGji9MvH98jIOQhfVeWOA7NEpZL3nIFyHmiXt/mYW24t/8wY", + "FyEdfdyzvfztTCpzYhNohu4qUMu4mVToI4xvJZ2DQVQKATQWNrgldJNxTHP8FuudScl/BQuLPTLERNqN", + "qRTGm8JqU8i3quIMXVxdopxUgs5cnZmhggjrKchRcgHqxNXIWKzik7LMOfUFlwzlfA5/iKkrRoO6B6Uz", + "xIghY6JBZ27BBYzj2OAPxy43rhBUM4AzbEc9W6eDN4NTd36VIEjJ8Rn+1n3y3u8UOiQlH96/GfpKk/0S", + "Eqm2hD+DiYbcqklht7i/NF4yP9GXvJy+3AXNVb7cZt+cnkYkQ7LagGD4Wdut4uvPLqveKKw5VbV59jOc", + "939/+u2fuPGtz3pQJcg94b6a5PypKgqilgHOTRwNmWp3hfeayLDHG3+ypFFN3nJ0Q09t+K+4NrdhzheC", + "f0jOEfK3bljpYBPLvVGQNi6W/brqrGs5IjRhpImNTUJ1AojmOxz25x9o86NkyyczhNRT36p92BpVwaqj", + "hzdPxsIG/LvgRjGNa6PuBdnAfTvsXZMcgnv2cGdoUiPNZ5Fn0kjq5WUvjZx+NY141DY14gXZ0AiCB66N", + "P1pkSD/ypa+f64PV9cjZysd5m7N31eXf1Wp1lUSRAgwou8WjP0NDChmOUJdft4HOGqBt5qSfnlEJ7TfB", + "3UqI95ZVhr87/a7/jSpMF9KgiSvWtLXmtz3Ij7J0GP8ZzOtE/kDz/1LE7Tn6hWHL+sHQ5mVV2R+7fnTj", + "z6ySp4+Hu8r7Ly02epiRJQ0O2Y6KD0CrhoMFrR2lcSqLggimh4/hf6t+7d9UwvN84ac+hwVkyUVoveEr", + "cW9/q2abTvmFbn5Tic2zcI3MUcoXEz7tTaLr6Hvh573AGLx5We8o4poovb5pBoG7wbNMTTsWUy1z0Huh", + "6me+QFzThaZWJ0A/5lEwNIelb5FZF8670If3ak1lCQzRNShHgJ/L6R7AX9lZryyhaFUCE5hbmY7BO/dY", + "RKzDnxHyfqRLqcweUF9Ln5O/OKwPudU3nsMPvtkjC1Qsbjx5ltda/SiPuauggt16/I+b9sp8JvXO1NWX", + "E81hCFvwvmvMWgN9F2DZ7S8KtJHbygI3fsLfufVzX7w8znsn11FxR3lX4+kyrXXX1h8KNWHu61F96jcJ", + "L03dfYl4S+fXoDTXJvR9SnXif90ALLTBIVXrpmsE/k1/lwkM/fvYHkdmq5Xj1Z+d7caUfY5PT4AiXokE", + "xv/mIfRZR93UBEfoqO6dSzvprR3+i9ahbn3X8Xb/cJOepMCkjSy3AS3LvyzOrklkF86y3MzwFlLNc0mY", + "RosZzwGVClw3kZi6V9Hj1FDxYbMJZXtAavQ6vE6tNBtoEhUC33cNDH24RBEVe4lyN6RUrSBBgD7cXOkv", + "1sXw0e25GoYt+j0lML2hoD+vIOex2bZO7JgKHeY4dj2mWvmfKT/p66hfhSTlhTx9LbiZodB21DSpAgxx", + "Lr6RrHipEBGI5AoIW56MK54b5JsfPlyij+e3v8VVjrTJMv5mJW1+zaahV5SwpnqdvrYxPE+l2K+6eZZM", + "eB5acjYvsinbCG2yUat9rT7n15c4dNzhIbaqC4t2epQ8E74bqHC9XoLFAjYCd++yMxtBJkDx2C12IW0U", + "kMIeg5ZagVEc7km+pnalrC5teLsIF/o1M2tCf6nvUib7qNzurmtKr1dYwFi7mYlVrqUyiAvfx8ilqNVR", + "NRZw+WqX1rfcIDoDOtdJwtA00yX9rcoNPwl2EM0iJX20hO4Sb33fU84nQJc0T5MH8+lS/2STlwUxdBZ1", + "xuAeclk6Swg/6Yv42WmJNc6FkMajZk0ZEUpBN6Qn9bjGq0+r/wUAAP//PgoALk8/AAA=", } // GetSwagger returns the content of the embedded swagger specification file diff --git a/internal/core/domain/command_status.go b/internal/core/domain/command_status.go index 7289f4a9..a44f2af1 100644 --- a/internal/core/domain/command_status.go +++ b/internal/core/domain/command_status.go @@ -17,6 +17,8 @@ type LockStatus struct { LastStatusChange int64 `json:"last_status_change"` } +type ProcedureStatusMap map[string]map[string]LockStatus + type CommandExecutionError struct { Command string ExitCode int diff --git a/internal/core/domain/queue_item.go b/internal/core/domain/queue_item.go deleted file mode 100644 index 4966ac88..00000000 --- a/internal/core/domain/queue_item.go +++ /dev/null @@ -1,9 +0,0 @@ -package domain - -type QueueItem struct { - Name string - Status ScrollLockStatus - Error error - DoneChan chan struct{} - RestartCount uint -} diff --git a/internal/core/domain/runtime_scroll.go b/internal/core/domain/runtime_scroll.go index ba90644f..5690349e 100644 --- a/internal/core/domain/runtime_scroll.go +++ b/internal/core/domain/runtime_scroll.go @@ -34,7 +34,7 @@ type RuntimeScroll struct { UIPackages RuntimeUIPackages `json:"ui_packages,omitempty"` CreatedAt time.Time `json:"created_at"` UpdatedAt time.Time `json:"updated_at"` - Commands map[string]LockStatus `json:"commands,omitempty"` + Procedures ProcedureStatusMap `json:"procedures,omitempty"` } type RuntimeState struct { diff --git a/internal/core/ports/services_ports.go b/internal/core/ports/services_ports.go index bcf2667a..42a9755b 100644 --- a/internal/core/ports/services_ports.go +++ b/internal/core/ports/services_ports.go @@ -30,11 +30,6 @@ type ScrollServiceInterface interface { GetCommand(cmd string) (*domain.CommandInstructionSet, error) } -type ProcedureLauchnerInterface interface { - Run(cmd string) error - GetProcedureStatuses() map[string]domain.ScrollLockStatus -} - type LogManagerInterface interface { GetStreams() map[string]*domain.Log AddLine(stream string, sc []byte) @@ -78,13 +73,20 @@ type RuntimeScrollStore interface { } type RuntimeCommand struct { - Name string - ScrollID string - Command *domain.CommandInstructionSet - Root string - GlobalPorts []domain.Port - Routing []domain.RuntimeRouteAssignment - ProcedureEnv map[string]map[string]string + Name string + ScrollID string + Command *domain.CommandInstructionSet + Root string + GlobalPorts []domain.Port + Routing []domain.RuntimeRouteAssignment + ProcedureEnv map[string]map[string]string + ProcedureStatusObserver func(procedure string, status domain.ScrollLockStatus, exitCode *int) +} + +func (c RuntimeCommand) ObserveProcedureStatus(procedure string, status domain.ScrollLockStatus, exitCode *int) { + if c.ProcedureStatusObserver != nil { + c.ProcedureStatusObserver(procedure, status, exitCode) + } } type RuntimeUIPackageAction struct { diff --git a/internal/core/services/procedure_launcher.go b/internal/core/services/procedure_launcher.go deleted file mode 100644 index 51a9d926..00000000 --- a/internal/core/services/procedure_launcher.go +++ /dev/null @@ -1,138 +0,0 @@ -package services - -import ( - "errors" - "sync" - - "github.com/highcard-dev/daemon/internal/core/domain" - "github.com/highcard-dev/daemon/internal/core/ports" - "github.com/highcard-dev/daemon/internal/utils/logger" - "go.uber.org/zap" -) - -type ProcedureLauncher struct { - runtimeBackend ports.RuntimeBackendInterface - runtimeRoot string - runtimeScrollID string - runtimeScrollName string - routingProvider func() []domain.RuntimeRouteAssignment - scrollService ports.ScrollServiceInterface - procedures map[string]domain.ScrollLockStatus - proceduresMutex *sync.Mutex -} - -func NewProcedureLauncher( - scrollService ports.ScrollServiceInterface, - runtimeBackend ports.RuntimeBackendInterface, - runtimeRoot string, -) (*ProcedureLauncher, error) { - return NewProcedureLauncherForScroll(scrollService, runtimeBackend, runtimeRoot, "") -} - -func NewProcedureLauncherForScroll( - scrollService ports.ScrollServiceInterface, - runtimeBackend ports.RuntimeBackendInterface, - runtimeRoot string, - runtimeScrollID string, -) (*ProcedureLauncher, error) { - return NewProcedureLauncherForRuntime(scrollService, runtimeBackend, runtimeRoot, runtimeScrollID, "", nil) -} - -func NewProcedureLauncherForRuntime( - scrollService ports.ScrollServiceInterface, - runtimeBackend ports.RuntimeBackendInterface, - runtimeRoot string, - runtimeScrollID string, - runtimeScrollName string, - routingProvider func() []domain.RuntimeRouteAssignment, -) (*ProcedureLauncher, error) { - if runtimeBackend == nil { - return nil, errors.New("runtime backend is required") - } - - s := &ProcedureLauncher{ - runtimeBackend: runtimeBackend, - runtimeRoot: runtimeRoot, - runtimeScrollID: runtimeScrollID, - runtimeScrollName: runtimeScrollName, - routingProvider: routingProvider, - scrollService: scrollService, - procedures: make(map[string]domain.ScrollLockStatus), - proceduresMutex: &sync.Mutex{}, - } - - return s, nil -} - -func (sc *ProcedureLauncher) setProcedureStatus(procedure string, status domain.ScrollLockStatus) { - sc.proceduresMutex.Lock() - defer sc.proceduresMutex.Unlock() - sc.procedures[procedure] = status -} - -func (sc *ProcedureLauncher) GetProcedureStatuses() map[string]domain.ScrollLockStatus { - sc.proceduresMutex.Lock() - defer sc.proceduresMutex.Unlock() - statuses := make(map[string]domain.ScrollLockStatus, len(sc.procedures)) - for name, status := range sc.procedures { - statuses[name] = status - } - return statuses -} - -func (sc *ProcedureLauncher) Run(cmd string) error { - command, err := sc.scrollService.GetCommand(cmd) - if err != nil { - sc.setProcedureStatus(cmd, domain.ScrollLockStatusError) - return err - } - - logger.Log().Info("Running command", - zap.String("cmd", cmd), - zap.String("runMode", string(command.Run)), - ) - - root := sc.runtimeRoot - if root == "" { - root = sc.scrollService.GetCwd() - } - file := sc.scrollService.GetFile() - routing := []domain.RuntimeRouteAssignment{} - if sc.routingProvider != nil { - routing = sc.routingProvider() - } - procedureEnv, err := BuildRuntimeProcedureEnv(file, cmd, command, RuntimeEnvContext{ - ScrollID: sc.runtimeScrollID, - ScrollName: sc.runtimeScrollName, - Backend: sc.runtimeBackend.Name(), - Routing: routing, - }) - if err != nil { - sc.setProcedureStatus(cmd, domain.ScrollLockStatusError) - return err - } - sc.setProcedureStatus(cmd, domain.ScrollLockStatusRunning) - exitCode, err := sc.runtimeBackend.RunCommand(ports.RuntimeCommand{ - Name: cmd, - ScrollID: sc.runtimeScrollID, - Command: command, - Root: root, - GlobalPorts: file.Ports, - Routing: routing, - ProcedureEnv: procedureEnv, - }) - if err != nil { - sc.setProcedureStatus(cmd, domain.ScrollLockStatusError) - return err - } - if exitCode != nil && *exitCode != 0 { - sc.setProcedureStatus(cmd, domain.ScrollLockStatusError) - return &domain.CommandExecutionError{ - Command: cmd, - ExitCode: *exitCode, - Err: errors.New("command failed"), - } - } - sc.setProcedureStatus(cmd, domain.ScrollLockStatusDone) - return nil -} diff --git a/internal/core/services/procedure_launcher_test.go b/internal/core/services/procedure_launcher_test.go deleted file mode 100644 index 3130314f..00000000 --- a/internal/core/services/procedure_launcher_test.go +++ /dev/null @@ -1,243 +0,0 @@ -package services_test - -import ( - "testing" - - "github.com/highcard-dev/daemon/internal/core/domain" - "github.com/highcard-dev/daemon/internal/core/ports" - "github.com/highcard-dev/daemon/internal/core/services" - mock_ports "github.com/highcard-dev/daemon/test/mock" - "go.uber.org/mock/gomock" -) - -func TestProcedureLauncherPassesCommandContextToRuntimeBackend(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - scrollService := mock_ports.NewMockScrollServiceInterface(ctrl) - runtimeBackend := mock_ports.NewMockRuntimeBackendInterface(ctrl) - command := &domain.CommandInstructionSet{ - Run: domain.RunModePersistent, - Procedures: []*domain.Procedure{{ - Image: "alpine:3.20", - }}, - } - file := &domain.File{Ports: []domain.Port{{Name: "http", Port: 80}}} - - scrollService.EXPECT().GetCommand("serve").Return(command, nil) - scrollService.EXPECT().GetFile().Return(file) - runtimeBackend.EXPECT().Name().Return("docker") - runtimeBackend.EXPECT().RunCommand(gomock.Any()).DoAndReturn(func(runtimeCommand ports.RuntimeCommand) (*int, error) { - if runtimeCommand.Name != "serve" { - t.Fatalf("Name = %s, want serve", runtimeCommand.Name) - } - if runtimeCommand.Command != command { - t.Fatal("Command was not forwarded to runtime backend") - } - if runtimeCommand.Root != "/runtime-data" { - t.Fatalf("Root = %s, want /runtime-data", runtimeCommand.Root) - } - if len(runtimeCommand.GlobalPorts) != 1 || runtimeCommand.GlobalPorts[0].Name != "http" { - t.Fatalf("GlobalPorts = %#v", runtimeCommand.GlobalPorts) - } - return nil, nil - }) - - launcher, err := services.NewProcedureLauncher(scrollService, runtimeBackend, "/runtime-data") - if err != nil { - t.Fatal(err) - } - if err := launcher.Run("serve"); err != nil { - t.Fatal(err) - } -} - -func TestProcedureLauncherPassesRoutingToRuntimeBackend(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - scrollService := mock_ports.NewMockScrollServiceInterface(ctrl) - runtimeBackend := mock_ports.NewMockRuntimeBackendInterface(ctrl) - command := &domain.CommandInstructionSet{Procedures: []*domain.Procedure{{Image: "alpine:3.20"}}} - routing := []domain.RuntimeRouteAssignment{{PortName: "http", PublicPort: 18080}} - - scrollService.EXPECT().GetCommand("serve").Return(command, nil) - scrollService.EXPECT().GetFile().Return(&domain.File{Ports: []domain.Port{{Name: "http", Port: 80}}}) - runtimeBackend.EXPECT().Name().Return("docker") - runtimeBackend.EXPECT().RunCommand(gomock.Any()).DoAndReturn(func(runtimeCommand ports.RuntimeCommand) (*int, error) { - if len(runtimeCommand.Routing) != 1 || runtimeCommand.Routing[0].PublicPort != 18080 { - t.Fatalf("Routing = %#v", runtimeCommand.Routing) - } - return nil, nil - }) - - launcher, err := services.NewProcedureLauncherForRuntime(scrollService, runtimeBackend, "/runtime-data", "scroll-a", "", func() []domain.RuntimeRouteAssignment { - return routing - }) - if err != nil { - t.Fatal(err) - } - if err := launcher.Run("serve"); err != nil { - t.Fatal(err) - } -} - -func TestProcedureLauncherPassesScrollIDToRuntimeBackend(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - scrollService := mock_ports.NewMockScrollServiceInterface(ctrl) - runtimeBackend := mock_ports.NewMockRuntimeBackendInterface(ctrl) - command := &domain.CommandInstructionSet{ - Procedures: []*domain.Procedure{{Image: "alpine:3.20"}}, - } - scrollService.EXPECT().GetCommand("serve").Return(command, nil) - scrollService.EXPECT().GetFile().Return(&domain.File{}) - runtimeBackend.EXPECT().Name().Return("docker") - runtimeBackend.EXPECT().RunCommand(gomock.Any()).DoAndReturn(func(runtimeCommand ports.RuntimeCommand) (*int, error) { - if runtimeCommand.ScrollID != "scroll-a" { - t.Fatalf("ScrollID = %s, want scroll-a", runtimeCommand.ScrollID) - } - return nil, nil - }) - - launcher, err := services.NewProcedureLauncherForScroll(scrollService, runtimeBackend, "/runtime-data", "scroll-a") - if err != nil { - t.Fatal(err) - } - if err := launcher.Run("serve"); err != nil { - t.Fatal(err) - } -} - -func TestProcedureLauncherBuildsStableRuntimeEnv(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - scrollService := mock_ports.NewMockScrollServiceInterface(ctrl) - runtimeBackend := mock_ports.NewMockRuntimeBackendInterface(ctrl) - command := &domain.CommandInstructionSet{ - Procedures: []*domain.Procedure{{ - Id: ptrString("web"), - Image: "alpine:3.20", - Env: map[string]string{ - "APP_ENV": "test", - "DRUID_PORT_HTTP": "user-value", - "DRUID_SCROLL_NAME": "user-name", - }, - }}, - } - file := &domain.File{ - Name: "scroll-name", - Ports: []domain.Port{{ - Name: "http", - Port: 8080, - Protocol: "http", - }}, - } - - scrollService.EXPECT().GetCommand("serve").Return(command, nil) - scrollService.EXPECT().GetFile().Return(file) - runtimeBackend.EXPECT().Name().Return("kubernetes") - runtimeBackend.EXPECT().RunCommand(gomock.Any()).DoAndReturn(func(runtimeCommand ports.RuntimeCommand) (*int, error) { - env := runtimeCommand.ProcedureEnv["web"] - if env["APP_ENV"] != "test" { - t.Fatalf("APP_ENV = %q, want test", env["APP_ENV"]) - } - if env["DRUID_PORT_HTTP"] != "8080" || env["DRUID_PORT_HTTP_1"] != "8080" { - t.Fatalf("port env = %#v", env) - } - if env["DRUID_SCROLL_ID"] != "scroll-a" || env["DRUID_SCROLL_NAME"] != "scroll-name" { - t.Fatalf("scroll env = %#v", env) - } - if env["DRUID_RUNTIME_BACKEND"] != "kubernetes" { - t.Fatalf("backend env = %#v", env) - } - if env["DRUID_PORT_HTTP_PUBLIC"] != "443" || env["DRUID_PORT_HTTP_HOST"] != "web.example.test" || env["DRUID_PORT_HTTP_URL"] != "https://web.example.test" { - t.Fatalf("routing env = %#v", env) - } - if env["DRUID_IP"] != "192.0.2.10" || env["DRUID_PORT_HTTP_IP"] != "192.0.2.10" { - t.Fatalf("ip env = %#v", env) - } - if _, ok := env["DRUID_IP_WAIT"]; ok { - t.Fatalf("DRUID_IP_WAIT should not be set after routing: %#v", env) - } - return nil, nil - }) - - launcher, err := services.NewProcedureLauncherForRuntime(scrollService, runtimeBackend, "/runtime-data", "scroll-a", "", func() []domain.RuntimeRouteAssignment { - return []domain.RuntimeRouteAssignment{{ - Name: "web-http", - PortName: "http", - Host: "web.example.test", - ExternalIP: "192.0.2.10", - PublicPort: 443, - URL: "https://web.example.test", - }} - }) - if err != nil { - t.Fatal(err) - } - if err := launcher.Run("serve"); err != nil { - t.Fatal(err) - } -} - -func TestBuildRuntimeProcedureEnvSetsWaitBeforeRouting(t *testing.T) { - command := &domain.CommandInstructionSet{ - Procedures: []*domain.Procedure{{Image: "alpine:3.20"}}, - } - envs, err := services.BuildRuntimeProcedureEnv(&domain.File{ - Name: "scroll-name", - Ports: []domain.Port{{Name: "game-port", Port: 7777}}, - }, "serve", command, services.RuntimeEnvContext{ScrollID: "scroll-a", Backend: "docker"}) - if err != nil { - t.Fatal(err) - } - env := envs["serve.0"] - if env["DRUID_IP_WAIT"] != "true" { - t.Fatalf("env = %#v, want DRUID_IP_WAIT", env) - } - if env["DRUID_PORT_GAME_PORT"] != "7777" { - t.Fatalf("env = %#v, want normalized port env", env) - } -} - -func TestBuildRuntimeProcedureEnvDerivesURLFromPortProtocol(t *testing.T) { - command := &domain.CommandInstructionSet{Procedures: []*domain.Procedure{{Image: "alpine:3.20"}}} - envs, err := services.BuildRuntimeProcedureEnv(&domain.File{ - Name: "test", - Ports: []domain.Port{{Name: "http", Port: 8080, Protocol: "http"}}, - }, "serve", command, services.RuntimeEnvContext{ - Routing: []domain.RuntimeRouteAssignment{{ - Name: "web-http", - PortName: "http", - Host: "localhost", - ExternalIP: "127.0.0.1", - PublicPort: 18080, - }}, - }) - if err != nil { - t.Fatal(err) - } - if got := envs["serve.0"]["DRUID_PORT_HTTP_URL"]; got != "http://localhost:18080" { - t.Fatalf("DRUID_PORT_HTTP_URL = %q", got) - } -} - -func TestBuildRuntimeProcedureEnvRejectsDuplicateNormalizedPortNames(t *testing.T) { - _, err := services.BuildRuntimeProcedureEnv(&domain.File{ - Name: "scroll-name", - Ports: []domain.Port{ - {Name: "web-port", Port: 8080}, - {Name: "web_port", Port: 8081}, - }, - }, "serve", &domain.CommandInstructionSet{Procedures: []*domain.Procedure{{Image: "alpine:3.20"}}}, services.RuntimeEnvContext{}) - if err == nil { - t.Fatal("expected duplicate normalized port names to fail") - } -} - -func ptrString(value string) *string { - return &value -} diff --git a/internal/core/services/queue_manager.go b/internal/core/services/queue_manager.go index 1ebf2eb6..821889d6 100644 --- a/internal/core/services/queue_manager.go +++ b/internal/core/services/queue_manager.go @@ -1,16 +1,10 @@ package services import ( - "context" "errors" "fmt" - "sync" - "time" "github.com/highcard-dev/daemon/internal/core/domain" - "github.com/highcard-dev/daemon/internal/core/ports" - "github.com/highcard-dev/daemon/internal/utils/logger" - "go.uber.org/zap" ) var ErrAlreadyInQueue = fmt.Errorf("command is already in queue") @@ -22,481 +16,49 @@ type AddItemOptions struct { Force bool } -type QueueStatusObserver func(command string, status domain.ScrollLockStatus, exitCode *int) - -type QueueManager struct { - mu sync.Mutex - runQueueMu sync.Mutex - scrollService ports.ScrollServiceInterface - procedureLauncher ports.ProcedureLauchnerInterface - commandQueue map[string]*domain.QueueItem - taskChan chan string - taskDoneChan chan struct{} - shutdownChan chan struct{} - shutdownDoneChan chan struct{} - shutdownOnce sync.Once - workWg sync.WaitGroup - notifierChan []chan []string - statusObserver QueueStatusObserver -} - -func NewQueueManager( - scrollService ports.ScrollServiceInterface, - procedureLauncher ports.ProcedureLauchnerInterface, -) *QueueManager { - return &QueueManager{ - scrollService: scrollService, - procedureLauncher: procedureLauncher, - commandQueue: make(map[string]*domain.QueueItem), - taskChan: make(chan string, 100), - taskDoneChan: make(chan struct{}, 1), - shutdownChan: make(chan struct{}), - shutdownDoneChan: make(chan struct{}), - notifierChan: make([]chan []string, 0), - } -} - -func (sc *QueueManager) workItem(cmd string) error { - queueItem := sc.GetQueueItem(cmd) - if queueItem == nil { - return fmt.Errorf("command %s not found", cmd) - } - - logger.Log().Debug("Running command", - zap.String("cmd", cmd), - ) - - return sc.procedureLauncher.Run(cmd) -} - -func (sc *QueueManager) notify() { - sc.mu.Lock() - queuedCommands := make([]string, 0) - - for cmd, item := range sc.commandQueue { - if item.Status != domain.ScrollLockStatusDone && item.Status != domain.ScrollLockStatusError { - queuedCommands = append(queuedCommands, cmd) - } - } - - notifiers := make([]chan []string, len(sc.notifierChan)) - copy(notifiers, sc.notifierChan) - sc.mu.Unlock() - - for _, notifier := range notifiers { - select { - case notifier <- queuedCommands: - default: - logger.Log().Debug("Skipping slow queue notifier") - } - } -} - -func (sc *QueueManager) AddTempItem(cmd string) error { - return sc.addQueueItem(cmd, AddItemOptions{}) -} - -func (sc *QueueManager) AddForcedItem(cmd string) error { - return sc.addQueueItem(cmd, AddItemOptions{Force: true}) -} - -func (sc *QueueManager) RememberDoneItem(cmd string) { - sc.mu.Lock() - defer sc.mu.Unlock() - if _, ok := sc.commandQueue[cmd]; ok { - return - } - sc.commandQueue[cmd] = &domain.QueueItem{ - Status: domain.ScrollLockStatusDone, - } -} - -func (sc *QueueManager) AddTempItemWithWait(cmd string) error { - return sc.addQueueItem(cmd, AddItemOptions{ - Wait: true, - }) -} - -func (sc *QueueManager) addQueueItem(cmd string, options AddItemOptions) error { - sc.mu.Lock() - - logger.Log().Debug("Running command", - zap.String("cmd", cmd), - ) - - command, err := sc.scrollService.GetCommand(cmd) - - if err != nil { - sc.mu.Unlock() - return err - } - - if value, ok := sc.commandQueue[cmd]; ok { - - if value.Status != domain.ScrollLockStatusDone && value.Status != domain.ScrollLockStatusError { - sc.mu.Unlock() - return ErrAlreadyInQueue - } - - if value.Status == domain.ScrollLockStatusDone && command.Run == domain.RunModeOnce && !options.Force { - sc.mu.Unlock() - return ErrCommandDoneOnce - } - } - - var doneChan chan struct{} - if options.Wait { - doneChan = make(chan struct{}) - } - - item := &domain.QueueItem{ - Status: domain.ScrollLockStatusWaiting, - DoneChan: doneChan, - } - - sc.commandQueue[cmd] = item - sc.observeStatusLocked(cmd, domain.ScrollLockStatusWaiting, nil) - - sc.mu.Unlock() - - sc.taskChan <- cmd - - if options.Wait { - <-doneChan - item := sc.GetQueueItem(cmd) - if item != nil && item.Error != nil { - return item.Error - } - } - - return nil -} - -func (sc *QueueManager) SetStatusObserver(observer QueueStatusObserver) { - sc.mu.Lock() - defer sc.mu.Unlock() - sc.statusObserver = observer -} - -func (sc *QueueManager) HydrateCommandStatuses(statuses map[string]domain.LockStatus) error { - for cmd, status := range statuses { - command, err := sc.scrollService.GetCommand(cmd) - if err != nil { - return err - } - - if status.Status == domain.ScrollLockStatusDone { - if command.Run != domain.RunModeRestart && command.Run != domain.RunModePersistent { - sc.mu.Lock() - sc.commandQueue[cmd] = &domain.QueueItem{ - Status: domain.ScrollLockStatusDone, - } - sc.mu.Unlock() - continue - } - } - - sc.addQueueItem(cmd, AddItemOptions{}) - } - - return nil -} - -func (sc *QueueManager) Work() { - defer close(sc.shutdownDoneChan) - - for { - select { - case <-sc.taskChan: - sc.startRunQueue() - case <-sc.taskDoneChan: - sc.startRunQueue() - case <-sc.shutdownChan: - sc.workWg.Wait() - sc.mu.Lock() - sc.commandQueue = make(map[string]*domain.QueueItem) - sc.mu.Unlock() - return - } - } -} - -func (sc *QueueManager) startRunQueue() { - sc.workWg.Add(1) - go func() { - defer sc.workWg.Done() - sc.RunQueue() - sc.notify() - }() -} - -func (sc *QueueManager) RunQueue() { - sc.runQueueMu.Lock() - defer sc.runQueueMu.Unlock() - - sc.mu.Lock() - - queueKeys := make(map[string]domain.ScrollLockStatus, len(sc.commandQueue)) - for k, v := range sc.commandQueue { - queueKeys[k] = v.Status +func DeriveCommandStatusFromProcedures(commandName string, command *domain.CommandInstructionSet, statuses map[string]domain.LockStatus) (domain.ScrollLockStatus, bool) { + if command == nil || len(command.Procedures) == 0 { + return "", false } - - sc.mu.Unlock() - - logger.Log().Info("Running queue", zap.Any("queueKeys", queueKeys)) - - for cmd, status := range queueKeys { - - if status == domain.ScrollLockStatusRunning { + seen := false + allDone := true + hasRunning := false + hasWaiting := false + for idx, procedure := range command.Procedures { + procedureName := domain.ProcedureName(commandName, idx, procedure) + status, ok := statuses[procedureName] + if !ok { + allDone = false continue } - - command, err := sc.scrollService.GetCommand(cmd) - if err != nil { - logger.Log().Error("Error getting command", - zap.String("command", cmd), - zap.Error(err), - ) - sc.mu.Lock() - delete(sc.commandQueue, cmd) - sc.mu.Unlock() - continue - } - - if status == domain.ScrollLockStatusError { - continue - } - - isRestartMode := command.Run == domain.RunModeRestart - if status == domain.ScrollLockStatusDone && !isRestartMode { - continue - } - - dependencies := command.Needs - dependenciesReady := true - for _, dep := range dependencies { - _, ok := sc.commandQueue[dep] - if !ok { - dependenciesReady = false - sc.AddTempItem(dep) - continue - } - - if sc.getStatus(dep) != domain.ScrollLockStatusDone { - dependenciesReady = false - continue - } - } - if dependenciesReady { - item := sc.GetQueueItem(cmd) - runMode := command.Run - // We only run one command at a time to keep dependency resolution deterministic. - sc.setStatus(cmd, domain.ScrollLockStatusRunning, nil) - logger.Log().Info("Running command", zap.String("command", cmd)) - sc.workWg.Add(1) - go func(c string, i *domain.QueueItem) { - defer sc.workWg.Done() - defer func() { - if i.DoneChan != nil { - close(i.DoneChan) - } - - select { - case sc.taskDoneChan <- struct{}{}: - case <-sc.shutdownChan: - } - }() - - startedAt := time.Now() - err := sc.workItem(c) - isRestartMode := runMode == domain.RunModeRestart - - if err != nil { - logger.Log().Error("Error running command", zap.String("command", c), zap.Error(err)) - if !isRestartMode || domain.IsNonRetryableCommandError(err) { - sc.setError(c, err) - return - } - } - - if isRestartMode { - // Set status to waiting immediately so shutdown captures correct state. - sc.setStatus(c, domain.ScrollLockStatusWaiting, nil) - - // Exponential backoff for fast restarts (1s, 2s, 4s, ... max 5m) - if time.Since(startedAt) < 30*time.Second { - i.RestartCount++ - } else { - i.RestartCount = 0 - } - if i.RestartCount > 0 { - backoff := time.Duration(1<<(i.RestartCount-1)) * time.Second - if backoff > 5*time.Minute { - backoff = 5 * time.Minute - } - logger.Log().Info("Restarting with backoff", zap.String("command", c), zap.Duration("backoff", backoff), zap.Uint("restartCount", i.RestartCount)) - time.Sleep(backoff) - } else { - logger.Log().Info("Command done, restarting", zap.String("command", c)) - } - } else { - logger.Log().Info("Command done", zap.String("command", c)) - sc.setStatus(c, domain.ScrollLockStatusDone, nil) - } - - }(cmd, item) - } else { - logger.Log().Info("Dependencies not ready", zap.String("command", cmd)) - } - } -} - -func (sc *QueueManager) Shutdown() { - sc.shutdownOnce.Do(func() { - close(sc.shutdownChan) - sc.notify() - }) - - select { - case <-sc.shutdownDoneChan: - case <-time.After(5 * time.Second): - logger.Log().Warn("Timed out waiting for queue manager shutdown") - } -} - -func (sc *QueueManager) WaitUntilEmpty() { - _ = sc.WaitUntilEmptyContext(context.Background()) -} - -func (sc *QueueManager) WaitUntilEmptyContext(ctx context.Context) error { - notifier := make(chan []string, 10) - - sc.mu.Lock() - sc.notifierChan = append(sc.notifierChan, notifier) - if !sc.hasActiveItemsLocked() { - sc.removeNotifierLocked(notifier) - sc.mu.Unlock() - return nil - } - sc.mu.Unlock() - defer func() { - sc.mu.Lock() - sc.removeNotifierLocked(notifier) - sc.mu.Unlock() - }() - - for { - select { - case cmds := <-notifier: - if len(cmds) == 0 { - return nil - } - case <-ctx.Done(): - return ctx.Err() - case <-sc.shutdownChan: - return fmt.Errorf("queue manager shut down while waiting for commands") - } - } -} - -func (sc *QueueManager) activeCommands() []string { - sc.mu.Lock() - defer sc.mu.Unlock() - cmds := make([]string, 0) - for cmd, item := range sc.commandQueue { - if item.Status != domain.ScrollLockStatusDone && item.Status != domain.ScrollLockStatusError { - cmds = append(cmds, cmd) - } - } - return cmds -} - -func (sc *QueueManager) waitUntilEmptyForTest() { - for { - cmds := sc.activeCommands() - if len(cmds) == 0 { - return - } - time.Sleep(10 * time.Millisecond) - } -} - -func (sc *QueueManager) hasActiveItemsLocked() bool { - for _, item := range sc.commandQueue { - if item.Status != domain.ScrollLockStatusDone && item.Status != domain.ScrollLockStatusError { - return true - } - } - return false -} - -func (sc *QueueManager) removeNotifierLocked(notifier chan []string) { - for i, n := range sc.notifierChan { - if n == notifier { - sc.notifierChan = append(sc.notifierChan[:i], sc.notifierChan[i+1:]...) - return + seen = true + switch status.Status { + case domain.ScrollLockStatusError: + return domain.ScrollLockStatusError, true + case domain.ScrollLockStatusRunning: + hasRunning = true + allDone = false + case domain.ScrollLockStatusWaiting: + hasWaiting = true + allDone = false + case domain.ScrollLockStatusDone: + default: + allDone = false } } -} - -func (sc *QueueManager) GetQueueItem(cmd string) *domain.QueueItem { - sc.mu.Lock() - defer sc.mu.Unlock() - - if value, ok := sc.commandQueue[cmd]; ok { - return value - } - - return nil -} - -func (sc *QueueManager) getStatus(cmd string) domain.ScrollLockStatus { - sc.mu.Lock() - defer sc.mu.Unlock() - if value, ok := sc.commandQueue[cmd]; ok { - return value.Status - } - return domain.ScrollLockStatusDone -} - -func (sc *QueueManager) setError(cmd string, err error) { - sc.mu.Lock() - defer sc.mu.Unlock() - if value, ok := sc.commandQueue[cmd]; ok { - value.Status = domain.ScrollLockStatusError - value.Error = err - } - sc.observeStatusLocked(cmd, domain.ScrollLockStatusError, commandExitCode(err)) -} - -func (sc *QueueManager) setStatus(cmd string, status domain.ScrollLockStatus, exitCode *int) { - sc.mu.Lock() - defer sc.mu.Unlock() - if value, ok := sc.commandQueue[cmd]; ok { - value.Status = status + if hasRunning { + return domain.ScrollLockStatusRunning, true } - sc.observeStatusLocked(cmd, status, exitCode) -} - -func (sc *QueueManager) GetQueue() map[string]domain.ScrollLockStatus { - sc.mu.Lock() - defer sc.mu.Unlock() - - queue := make(map[string]domain.ScrollLockStatus) - for cmd, item := range sc.commandQueue { - queue[cmd] = item.Status + if hasWaiting || (seen && !allDone) { + return domain.ScrollLockStatusWaiting, true } - return queue -} - -func (sc *QueueManager) observeStatusLocked(cmd string, status domain.ScrollLockStatus, exitCode *int) { - if sc.statusObserver == nil { - return + if seen && allDone { + return domain.ScrollLockStatusDone, true } - sc.statusObserver(cmd, status, exitCode) + return "", false } -func commandExitCode(err error) *int { +func CommandExitCode(err error) *int { var commandErr *domain.CommandExecutionError if err != nil && errors.As(err, &commandErr) { return &commandErr.ExitCode diff --git a/internal/core/services/queue_manager_test.go b/internal/core/services/queue_manager_test.go index dacabf9c..ca0e67e1 100644 --- a/internal/core/services/queue_manager_test.go +++ b/internal/core/services/queue_manager_test.go @@ -2,422 +2,77 @@ package services_test import ( "errors" - "fmt" "testing" "github.com/highcard-dev/daemon/internal/core/domain" "github.com/highcard-dev/daemon/internal/core/services" - mock_ports "github.com/highcard-dev/daemon/test/mock" - "go.uber.org/mock/gomock" ) -type CommandTest struct { - Repeat int - AccualExecution int - RunMode domain.RunMode -} - -func TestQueueManager(t *testing.T) { +func TestDeriveCommandStatusFromProcedures(t *testing.T) { + command := &domain.CommandInstructionSet{Procedures: []*domain.Procedure{{}, {}}} - testCases := []CommandTest{ + tests := []struct { + name string + statuses map[string]domain.LockStatus + want domain.ScrollLockStatus + wantOK bool + }{ { - Repeat: 1, - AccualExecution: 1, - RunMode: domain.RunModeAlways, + name: "missing", + wantOK: false, }, { - Repeat: 5, - AccualExecution: 5, - RunMode: domain.RunModeAlways, + name: "done", + statuses: map[string]domain.LockStatus{ + "start.0": {Status: domain.ScrollLockStatusDone}, + "start.1": {Status: domain.ScrollLockStatusDone}, + }, + want: domain.ScrollLockStatusDone, + wantOK: true, }, { - Repeat: 1, - AccualExecution: 1, - RunMode: domain.RunModeOnce, + name: "running", + statuses: map[string]domain.LockStatus{ + "start.0": {Status: domain.ScrollLockStatusDone}, + "start.1": {Status: domain.ScrollLockStatusRunning}, + }, + want: domain.ScrollLockStatusRunning, + wantOK: true, }, { - Repeat: 2, - AccualExecution: 1, - RunMode: domain.RunModeOnce, + name: "waiting", + statuses: map[string]domain.LockStatus{ + "start.0": {Status: domain.ScrollLockStatusDone}, + }, + want: domain.ScrollLockStatusWaiting, + wantOK: true, }, { - Repeat: 5, - AccualExecution: 1, - RunMode: domain.RunModeOnce, + name: "error", + statuses: map[string]domain.LockStatus{ + "start.0": {Status: domain.ScrollLockStatusError}, + }, + want: domain.ScrollLockStatusError, + wantOK: true, }, } - for _, testCase := range testCases { - - t.Run(fmt.Sprintf("AddItem (RunMode: %s, Repeat: %d)", testCase.RunMode, testCase.Repeat), func(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - scrollService := mock_ports.NewMockScrollServiceInterface(ctrl) - runtimeBackend := mock_ports.NewMockRuntimeBackendInterface(ctrl) - - procedureLauncher, err := services.NewProcedureLauncher(scrollService, runtimeBackend, "/tmp") - if err != nil { - t.Error(err) - } - queueManager := services.NewQueueManager(scrollService, procedureLauncher) - - exitCode := 0 - runtimeBackend.EXPECT().Name().Return("docker").AnyTimes() - runtimeBackend.EXPECT().RunCommand(gomock.Any()).Return(&exitCode, nil).Times(testCase.AccualExecution) - - scrollService.EXPECT().GetCommand("test").Return(&domain.CommandInstructionSet{ - Run: testCase.RunMode, - Procedures: []*domain.Procedure{ - { - Image: "alpine:3.20", - Command: []string{"echo", "hello"}, - }, - }, - }, nil).AnyTimes() - - scrollService.EXPECT().GetCwd().Return("/tmp").AnyTimes() - scrollService.EXPECT().GetFile().Return(&domain.File{}).AnyTimes() - - go queueManager.Work() - - for i := 0; i < testCase.Repeat; i++ { - err := queueManager.AddTempItem("test") - if err != nil { - if testCase.RunMode == domain.RunModeOnce && err == services.ErrCommandDoneOnce { - continue - } - t.Error(err) - } - queueManager.WaitUntilEmpty() - } - }) - - t.Run(fmt.Sprintf("AddItem error first, but after that succeeds (RunMode: %s, Repeat: %d)", testCase.RunMode, testCase.Repeat), func(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - scrollService := mock_ports.NewMockScrollServiceInterface(ctrl) - - procedureLauncher := mock_ports.NewMockProcedureLauchnerInterface(ctrl) - queueManager := services.NewQueueManager(scrollService, procedureLauncher) - - scrollService.EXPECT().GetCommand("test").Return(&domain.CommandInstructionSet{ - Run: testCase.RunMode, - Procedures: []*domain.Procedure{ - { - Image: "alpine:3.20", - Command: []string{"echo", "hello"}, - }, - }, - }, nil).AnyTimes() - - scrollService.EXPECT().GetCwd().Return("/tmp").AnyTimes() - - times := testCase.AccualExecution - if testCase.RunMode == domain.RunModeOnce && testCase.Repeat > 1 { - times = 2 - } - - first := true - procedureLauncher.EXPECT().Run(gomock.Any()).DoAndReturn(func(cmd string) error { - if first { - first = false - return fmt.Errorf("error") - } else { - return nil - } - }).Times(times) - - go queueManager.Work() - - for i := 0; i < testCase.Repeat; i++ { - err := queueManager.AddTempItem("test") - - if err != nil { - if testCase.RunMode == domain.RunModeOnce && err == services.ErrCommandDoneOnce { - continue - } - t.Error(err) - } - queueManager.WaitUntilEmpty() + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, ok := services.DeriveCommandStatusFromProcedures("start", command, tt.statuses) + if ok != tt.wantOK || got != tt.want { + t.Fatalf("status = %s, ok = %v; want %s, %v", got, ok, tt.want, tt.wantOK) } }) - - } - - t.Run("AddItem Deep Need Structure", func(t *testing.T) { - - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - scrollService := mock_ports.NewMockScrollServiceInterface(ctrl) - runtimeBackend := mock_ports.NewMockRuntimeBackendInterface(ctrl) - - procedureLauncher, err := services.NewProcedureLauncher(scrollService, runtimeBackend, "/tmp") - if err != nil { - t.Error(err) - } - queueManager := services.NewQueueManager(scrollService, procedureLauncher) - - exitCode := 0 - runtimeBackend.EXPECT().Name().Return("docker").AnyTimes() - runtimeBackend.EXPECT().RunCommand(gomock.Any()).Return(&exitCode, nil).Times(4) - - scrollService.EXPECT().GetCommand("test").Return(&domain.CommandInstructionSet{ - Needs: []string{"dep1"}, - Procedures: []*domain.Procedure{ - { - Image: "alpine:3.20", - Command: []string{"echo", "hello"}, - }, - }, - }, nil).AnyTimes() - - scrollService.EXPECT().GetCommand("dep1").Return(&domain.CommandInstructionSet{ - Needs: []string{"dep2.1", "dep2.2"}, - Procedures: []*domain.Procedure{ - { - Image: "alpine:3.20", - Command: []string{"echo", "hello1"}, - }, - }, - }, nil).AnyTimes() - scrollService.EXPECT().GetCommand("dep2.1").Return(&domain.CommandInstructionSet{ - Run: domain.RunModeOnce, - Procedures: []*domain.Procedure{ - { - Image: "alpine:3.20", - Command: []string{"echo", "hello2.1"}, - }, - }, - }, nil).AnyTimes() - scrollService.EXPECT().GetCommand("dep2.2").Return(&domain.CommandInstructionSet{ - Procedures: []*domain.Procedure{ - { - Image: "alpine:3.20", - Command: []string{"echo", "hello2.2"}, - }, - }, - }, nil).AnyTimes() - - scrollService.EXPECT().GetCwd().Return("/tmp").AnyTimes() - scrollService.EXPECT().GetFile().Return(&domain.File{}).AnyTimes() - - go queueManager.Work() - err = queueManager.AddTempItem("test") - if err != nil { - t.Error(err) - } - - queueManager.WaitUntilEmpty() - - queue := queueManager.GetQueue() - if queue["dep2.1"] != domain.ScrollLockStatusDone { - t.Errorf("dep2.1 status must be done, got %s", queue["dep2.1"]) - } - }) -} - -func TestQueueManagerRestartStopsOnNonRetryableError(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - scrollService := mock_ports.NewMockScrollServiceInterface(ctrl) - procedureLauncher := mock_ports.NewMockProcedureLauchnerInterface(ctrl) - queueManager := services.NewQueueManager(scrollService, procedureLauncher) - - scrollService.EXPECT().GetCommand("start").Return(&domain.CommandInstructionSet{Run: domain.RunModeRestart}, nil).AnyTimes() - procedureLauncher.EXPECT().Run("start").Return(domain.NonRetryableCommand(errors.New("port already in use"))).Times(1) - - go queueManager.Work() - if err := queueManager.AddTempItem("start"); err != nil { - t.Fatal(err) - } - queueManager.WaitUntilEmpty() - - if got := queueManager.GetQueue()["start"]; got != domain.ScrollLockStatusError { - t.Fatalf("start = %s, want error", got) } } -func TestQueueManagerRestartRetriesRetryableError(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - scrollService := mock_ports.NewMockScrollServiceInterface(ctrl) - procedureLauncher := mock_ports.NewMockProcedureLauchnerInterface(ctrl) - queueManager := services.NewQueueManager(scrollService, procedureLauncher) - - scrollService.EXPECT().GetCommand("start").Return(&domain.CommandInstructionSet{Run: domain.RunModeRestart}, nil).AnyTimes() - attempt := 0 - procedureLauncher.EXPECT().Run("start").DoAndReturn(func(string) error { - attempt++ - if attempt == 1 { - return errors.New("temporary crash") - } - return domain.NonRetryableCommand(errors.New("stop test")) - }).Times(2) - - go queueManager.Work() - if err := queueManager.AddTempItem("start"); err != nil { - t.Fatal(err) - } - queueManager.WaitUntilEmpty() - - if attempt != 2 { - t.Fatalf("attempts = %d, want retry", attempt) - } -} - -func TestQueueManagerStatusObserver(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - scrollService := mock_ports.NewMockScrollServiceInterface(ctrl) - procedureLauncher := mock_ports.NewMockProcedureLauchnerInterface(ctrl) - queueManager := services.NewQueueManager(scrollService, procedureLauncher) - - scrollService.EXPECT().GetCommand("test").Return(&domain.CommandInstructionSet{}, nil).AnyTimes() - procedureLauncher.EXPECT().Run("test").Return(nil) - - observed := []domain.ScrollLockStatus{} - queueManager.SetStatusObserver(func(command string, status domain.ScrollLockStatus, exitCode *int) { - if command == "test" { - observed = append(observed, status) - } - }) - - go queueManager.Work() - if err := queueManager.AddTempItem("test"); err != nil { - t.Fatal(err) - } - queueManager.WaitUntilEmpty() - - want := []domain.ScrollLockStatus{ - domain.ScrollLockStatusWaiting, - domain.ScrollLockStatusRunning, - domain.ScrollLockStatusDone, - } - if len(observed) != len(want) { - t.Fatalf("expected %d observed statuses, got %d: %v", len(want), len(observed), observed) - } - for i := range want { - if observed[i] != want[i] { - t.Fatalf("status %d = %s, want %s", i, observed[i], want[i]) - } - } -} - -func TestQueueManagerPersistentCommandCompletesWithoutLooping(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - scrollService := mock_ports.NewMockScrollServiceInterface(ctrl) - procedureLauncher := mock_ports.NewMockProcedureLauchnerInterface(ctrl) - queueManager := services.NewQueueManager(scrollService, procedureLauncher) - - scrollService.EXPECT().GetCommand("serve").Return(&domain.CommandInstructionSet{Run: domain.RunModePersistent}, nil).AnyTimes() - procedureLauncher.EXPECT().Run("serve").Return(nil).Times(1) - - go queueManager.Work() - if err := queueManager.AddTempItem("serve"); err != nil { - t.Fatal(err) - } - queueManager.WaitUntilEmpty() - - if got := queueManager.GetQueue()["serve"]; got != domain.ScrollLockStatusDone { - t.Fatalf("serve = %s, want done", got) - } -} - -func TestQueueManagerRememberDoneItemSatisfiesDependency(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - scrollService := mock_ports.NewMockScrollServiceInterface(ctrl) - procedureLauncher := mock_ports.NewMockProcedureLauchnerInterface(ctrl) - queueManager := services.NewQueueManager(scrollService, procedureLauncher) - queueManager.RememberDoneItem("verify") - - scrollService.EXPECT().GetCommand("report").Return(&domain.CommandInstructionSet{Needs: []string{"verify"}}, nil).AnyTimes() - scrollService.EXPECT().GetCommand("verify").Return(&domain.CommandInstructionSet{}, nil).AnyTimes() - procedureLauncher.EXPECT().Run("report").Return(nil) - - go queueManager.Work() - if err := queueManager.AddTempItem("report"); err != nil { - t.Fatal(err) - } - queueManager.WaitUntilEmpty() - - queue := queueManager.GetQueue() - if queue["report"] != domain.ScrollLockStatusDone { - t.Fatalf("report = %s, want done; queue=%#v", queue["report"], queue) - } - if queue["verify"] != domain.ScrollLockStatusDone { - t.Fatalf("verify = %s, want done; queue=%#v", queue["verify"], queue) - } -} - -func TestQueueManagerHydrateCommandStatuses(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - scrollService := mock_ports.NewMockScrollServiceInterface(ctrl) - procedureLauncher := mock_ports.NewMockProcedureLauchnerInterface(ctrl) - queueManager := services.NewQueueManager(scrollService, procedureLauncher) - - scrollService.EXPECT().GetCommand("install").Return(&domain.CommandInstructionSet{Run: domain.RunModeOnce}, nil).AnyTimes() - scrollService.EXPECT().GetCommand("start").Return(&domain.CommandInstructionSet{Run: domain.RunModeRestart}, nil).AnyTimes() - scrollService.EXPECT().GetCommand("serve").Return(&domain.CommandInstructionSet{Run: domain.RunModePersistent}, nil).AnyTimes() - scrollService.EXPECT().GetCommand("repair").Return(&domain.CommandInstructionSet{}, nil).AnyTimes() - - if err := queueManager.HydrateCommandStatuses(map[string]domain.LockStatus{ - "install": {Status: domain.ScrollLockStatusDone}, - "start": {Status: domain.ScrollLockStatusDone}, - "serve": {Status: domain.ScrollLockStatusDone}, - "repair": {Status: domain.ScrollLockStatusError}, - }); err != nil { - t.Fatal(err) - } - - queue := queueManager.GetQueue() - if queue["install"] != domain.ScrollLockStatusDone { - t.Fatalf("install = %s, want done", queue["install"]) - } - if queue["start"] != domain.ScrollLockStatusWaiting { - t.Fatalf("start = %s, want waiting", queue["start"]) - } - if queue["serve"] != domain.ScrollLockStatusWaiting { - t.Fatalf("serve = %s, want waiting", queue["serve"]) - } - if queue["repair"] != domain.ScrollLockStatusWaiting { - t.Fatalf("repair = %s, want waiting", queue["repair"]) - } -} - -func TestQueueManagerAddForcedItemRerunsDoneOnceCommand(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - scrollService := mock_ports.NewMockScrollServiceInterface(ctrl) - procedureLauncher := mock_ports.NewMockProcedureLauchnerInterface(ctrl) - queueManager := services.NewQueueManager(scrollService, procedureLauncher) - - scrollService.EXPECT().GetCommand("start").Return(&domain.CommandInstructionSet{Run: domain.RunModeOnce}, nil).AnyTimes() - procedureLauncher.EXPECT().Run("start").Return(nil).Times(2) - - go queueManager.Work() - - if err := queueManager.AddTempItem("start"); err != nil { - t.Fatal(err) - } - queueManager.WaitUntilEmpty() - if err := queueManager.AddTempItem("start"); err != services.ErrCommandDoneOnce { - t.Fatalf("AddTempItem error = %v, want ErrCommandDoneOnce", err) +func TestCommandExitCode(t *testing.T) { + exitCode := services.CommandExitCode(&domain.CommandExecutionError{ExitCode: 23, Err: errors.New("failed")}) + if exitCode == nil || *exitCode != 23 { + t.Fatalf("exitCode = %v, want 23", exitCode) } - if err := queueManager.AddForcedItem("start"); err != nil { - t.Fatal(err) + if exitCode := services.CommandExitCode(errors.New("plain")); exitCode != nil { + t.Fatalf("exitCode = %v, want nil", exitCode) } - queueManager.WaitUntilEmpty() } diff --git a/internal/core/services/runtime_env_test.go b/internal/core/services/runtime_env_test.go new file mode 100644 index 00000000..057c8f4f --- /dev/null +++ b/internal/core/services/runtime_env_test.go @@ -0,0 +1,63 @@ +package services_test + +import ( + "testing" + + "github.com/highcard-dev/daemon/internal/core/domain" + "github.com/highcard-dev/daemon/internal/core/services" +) + +func TestBuildRuntimeProcedureEnvSetsWaitBeforeRouting(t *testing.T) { + command := &domain.CommandInstructionSet{ + Procedures: []*domain.Procedure{{Image: "alpine:3.20"}}, + } + envs, err := services.BuildRuntimeProcedureEnv(&domain.File{ + Name: "scroll-name", + Ports: []domain.Port{{Name: "game-port", Port: 7777}}, + }, "serve", command, services.RuntimeEnvContext{ScrollID: "scroll-a", Backend: "docker"}) + if err != nil { + t.Fatal(err) + } + env := envs["serve.0"] + if env["DRUID_IP_WAIT"] != "true" { + t.Fatalf("env = %#v, want DRUID_IP_WAIT", env) + } + if env["DRUID_PORT_GAME_PORT"] != "7777" { + t.Fatalf("env = %#v, want normalized port env", env) + } +} + +func TestBuildRuntimeProcedureEnvDerivesURLFromPortProtocol(t *testing.T) { + command := &domain.CommandInstructionSet{Procedures: []*domain.Procedure{{Image: "alpine:3.20"}}} + envs, err := services.BuildRuntimeProcedureEnv(&domain.File{ + Name: "test", + Ports: []domain.Port{{Name: "http", Port: 8080, Protocol: "http"}}, + }, "serve", command, services.RuntimeEnvContext{ + Routing: []domain.RuntimeRouteAssignment{{ + Name: "web-http", + PortName: "http", + Host: "localhost", + ExternalIP: "127.0.0.1", + PublicPort: 18080, + }}, + }) + if err != nil { + t.Fatal(err) + } + if got := envs["serve.0"]["DRUID_PORT_HTTP_URL"]; got != "http://localhost:18080" { + t.Fatalf("DRUID_PORT_HTTP_URL = %q", got) + } +} + +func TestBuildRuntimeProcedureEnvRejectsDuplicateNormalizedPortNames(t *testing.T) { + _, err := services.BuildRuntimeProcedureEnv(&domain.File{ + Name: "scroll-name", + Ports: []domain.Port{ + {Name: "web-port", Port: 8080}, + {Name: "web_port", Port: 8081}, + }, + }, "serve", &domain.CommandInstructionSet{Procedures: []*domain.Procedure{{Image: "alpine:3.20"}}}, services.RuntimeEnvContext{}) + if err == nil { + t.Fatal("expected duplicate normalized port names to fail") + } +} diff --git a/internal/core/services/runtime_scroll_manager.go b/internal/core/services/runtime_scroll_manager.go index d279ecf2..a431cb1a 100644 --- a/internal/core/services/runtime_scroll_manager.go +++ b/internal/core/services/runtime_scroll_manager.go @@ -62,7 +62,7 @@ func (m *RuntimeScrollManager) CreateWithDigest(artifact string, artifactDigest ScrollName: scroll.Name, ScrollYAML: string(scrollYAML), Status: domain.RuntimeScrollStatusCreated, - Commands: map[string]domain.LockStatus{}, + Procedures: domain.ProcedureStatusMap{}, } if err := m.store.CreateScroll(runtimeScroll); err != nil { return nil, err diff --git a/internal/runtime/docker/procedures.go b/internal/runtime/docker/procedures.go index ccf39626..15b7a69c 100644 --- a/internal/runtime/docker/procedures.go +++ b/internal/runtime/docker/procedures.go @@ -34,32 +34,43 @@ func (b *Backend) RunCommand(command ports.RuntimeCommand) (*int, error) { ) if command.Command.Run == domain.RunModePersistent { if procedure.IsSignal() { + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusRunning, nil) if err := b.Signal(procedureName, procedure.Target, procedure.Signal, command.Root); err != nil { + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusError, nil) return nil, err } + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusDone, nil) continue } if procedure.Image == "" { return nil, fmt.Errorf("docker runtime procedure %s requires image", procedureName) } + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusRunning, nil) if err := b.startPersistentContainer(runtimeConsoleID(command.ScrollID, procedureName), command.Name, procedureName, procedureResourceName(command.Name, idx), procedure, command.Root, command.GlobalPorts, command.Routing, env); err != nil { + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusError, nil) return nil, err } continue } + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusRunning, nil) exitCode, err := b.runProcedure(runtimeConsoleID(command.ScrollID, procedureName), command.Name, procedureName, procedureResourceName(command.Name, idx), procedure, command.Root, command.GlobalPorts, command.Routing, env) if err != nil { if exitCode != nil && *exitCode != 0 && procedure.IgnoreFailure { + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusDone, exitCode) continue } + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusError, exitCode) return exitCode, err } if exitCode != nil && *exitCode != 0 { if procedure.IgnoreFailure { + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusDone, exitCode) continue } + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusError, exitCode) return exitCode, nil } + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusDone, exitCode) } return nil, nil } diff --git a/internal/runtime/docker/state_store.go b/internal/runtime/docker/state_store.go index aa2ba2b1..620d8be9 100644 --- a/internal/runtime/docker/state_store.go +++ b/internal/runtime/docker/state_store.go @@ -27,16 +27,16 @@ const scrollsTableSQL = ` artifact_digest TEXT NOT NULL DEFAULT '', root TEXT NOT NULL, scroll_name TEXT NOT NULL, - scroll_yaml TEXT NOT NULL DEFAULT '', - status TEXT NOT NULL, - last_error TEXT NOT NULL DEFAULT '', - created_at TEXT NOT NULL, - updated_at TEXT NOT NULL, - commands_json TEXT NOT NULL DEFAULT '{}', - routing_json TEXT NOT NULL DEFAULT '[]', - ui_packages_json TEXT NOT NULL DEFAULT '{}' - ) -` + scroll_yaml TEXT NOT NULL DEFAULT '', + status TEXT NOT NULL, + last_error TEXT NOT NULL DEFAULT '', + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + procedures_json TEXT NOT NULL DEFAULT '{}', + routing_json TEXT NOT NULL DEFAULT '[]', + ui_packages_json TEXT NOT NULL DEFAULT '{}' + ) + ` func NewStateStore(stateDir string) (*StateStore, error) { if stateDir == "" { @@ -73,10 +73,10 @@ func (s *StateStore) CreateScroll(scroll *domain.RuntimeScroll) error { if scroll.Status == "" { scroll.Status = domain.RuntimeScrollStatusCreated } - if scroll.Commands == nil { - scroll.Commands = map[string]domain.LockStatus{} + if scroll.Procedures == nil { + scroll.Procedures = domain.ProcedureStatusMap{} } - commands, err := json.Marshal(scroll.Commands) + procedures, err := json.Marshal(scroll.Procedures) if err != nil { return err } @@ -91,9 +91,9 @@ func (s *StateStore) CreateScroll(scroll *domain.RuntimeScroll) error { } _, err = db.Exec(` - INSERT INTO scrolls (id, owner_id, artifact, artifact_digest, root, scroll_name, scroll_yaml, status, last_error, created_at, updated_at, commands_json, routing_json, ui_packages_json) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - `, scroll.ID, scroll.OwnerID, scroll.Artifact, scroll.ArtifactDigest, scroll.Root, scroll.ScrollName, scroll.ScrollYAML, scroll.Status, scroll.LastError, formatTime(scroll.CreatedAt), formatTime(scroll.UpdatedAt), string(commands), string(routing), string(uiPackages)) + INSERT INTO scrolls (id, owner_id, artifact, artifact_digest, root, scroll_name, scroll_yaml, status, last_error, created_at, updated_at, procedures_json, routing_json, ui_packages_json) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + `, scroll.ID, scroll.OwnerID, scroll.Artifact, scroll.ArtifactDigest, scroll.Root, scroll.ScrollName, scroll.ScrollYAML, scroll.Status, scroll.LastError, formatTime(scroll.CreatedAt), formatTime(scroll.UpdatedAt), string(procedures), string(routing), string(uiPackages)) if err != nil { return fmt.Errorf("create runtime scroll %s: %w", scroll.ID, err) } @@ -108,10 +108,10 @@ func (s *StateStore) ListScrolls() ([]*domain.RuntimeScroll, error) { defer db.Close() rows, err := db.Query(` - SELECT id, owner_id, artifact, artifact_digest, root, scroll_name, scroll_yaml, status, last_error, created_at, updated_at, commands_json, routing_json, ui_packages_json - FROM scrolls - ORDER BY id - `) + SELECT id, owner_id, artifact, artifact_digest, root, scroll_name, scroll_yaml, status, last_error, created_at, updated_at, procedures_json, routing_json, ui_packages_json + FROM scrolls + ORDER BY id + `) if err != nil { return nil, err } @@ -136,10 +136,10 @@ func (s *StateStore) GetScroll(id string) (*domain.RuntimeScroll, error) { defer db.Close() row := db.QueryRow(` - SELECT id, owner_id, artifact, artifact_digest, root, scroll_name, scroll_yaml, status, last_error, created_at, updated_at, commands_json, routing_json, ui_packages_json - FROM scrolls - WHERE id = ? - `, id) + SELECT id, owner_id, artifact, artifact_digest, root, scroll_name, scroll_yaml, status, last_error, created_at, updated_at, procedures_json, routing_json, ui_packages_json + FROM scrolls + WHERE id = ? + `, id) scroll, err := scanRuntimeScroll(row) if errors.Is(err, sql.ErrNoRows) { return nil, domain.ErrRuntimeScrollNotFound @@ -155,7 +155,7 @@ func (s *StateStore) UpdateScroll(scroll *domain.RuntimeScroll) error { defer db.Close() scroll.UpdatedAt = time.Now().UTC() - commands, err := json.Marshal(scroll.Commands) + procedures, err := json.Marshal(scroll.Procedures) if err != nil { return err } @@ -169,9 +169,9 @@ func (s *StateStore) UpdateScroll(scroll *domain.RuntimeScroll) error { } res, err := db.Exec(` UPDATE scrolls - SET owner_id = ?, artifact = ?, artifact_digest = ?, root = ?, scroll_name = ?, scroll_yaml = ?, status = ?, last_error = ?, updated_at = ?, commands_json = ?, routing_json = ?, ui_packages_json = ? - WHERE id = ? - `, scroll.OwnerID, scroll.Artifact, scroll.ArtifactDigest, scroll.Root, scroll.ScrollName, scroll.ScrollYAML, scroll.Status, scroll.LastError, formatTime(scroll.UpdatedAt), string(commands), string(routing), string(uiPackages), scroll.ID) + SET owner_id = ?, artifact = ?, artifact_digest = ?, root = ?, scroll_name = ?, scroll_yaml = ?, status = ?, last_error = ?, updated_at = ?, procedures_json = ?, routing_json = ?, ui_packages_json = ? + WHERE id = ? + `, scroll.OwnerID, scroll.Artifact, scroll.ArtifactDigest, scroll.Root, scroll.ScrollName, scroll.ScrollYAML, scroll.Status, scroll.LastError, formatTime(scroll.UpdatedAt), string(procedures), string(routing), string(uiPackages), scroll.ID) if err != nil { return err } @@ -226,6 +226,18 @@ func (s *StateStore) open() (*sql.DB, error) { db.Close() return nil, err } + hasLegacyCommands, err := tableHasColumn(db, "scrolls", "commands_"+"json") + if err != nil { + db.Close() + return nil, err + } + if hasLegacyCommands { + db.Close() + if err := s.resetDB(); err != nil { + return nil, err + } + return s.open() + } if err := ensureColumn(db, "scrolls", "artifact_digest", "TEXT NOT NULL DEFAULT ''"); err != nil { db.Close() return nil, err @@ -242,6 +254,10 @@ func (s *StateStore) open() (*sql.DB, error) { db.Close() return nil, err } + if err := ensureColumn(db, "scrolls", "procedures_json", "TEXT NOT NULL DEFAULT '{}'"); err != nil { + db.Close() + return nil, err + } if err := ensureColumn(db, "scrolls", "routing_json", "TEXT NOT NULL DEFAULT '[]'"); err != nil { db.Close() return nil, err @@ -253,6 +269,15 @@ func (s *StateStore) open() (*sql.DB, error) { return db, nil } +func (s *StateStore) resetDB() error { + for _, path := range []string{s.dbPath, s.dbPath + "-wal", s.dbPath + "-shm"} { + if err := os.Remove(path); err != nil && !os.IsNotExist(err) { + return err + } + } + return nil +} + func ensureColumn(db *sql.DB, table string, column string, definition string) error { exists, err := tableHasColumn(db, table, column) if err != nil || exists { @@ -305,24 +330,24 @@ func scanRuntimeScroll(scanner runtimeScrollScanner) (*domain.RuntimeScroll, err var lastError string var createdAt string var updatedAt string - var commandsJSON string + var proceduresJSON string var routingJSON string var uiPackagesJSON string - if err := scanner.Scan(&scroll.ID, &scroll.OwnerID, &scroll.Artifact, &scroll.ArtifactDigest, &scroll.Root, &scroll.ScrollName, &scroll.ScrollYAML, &status, &lastError, &createdAt, &updatedAt, &commandsJSON, &routingJSON, &uiPackagesJSON); err != nil { + if err := scanner.Scan(&scroll.ID, &scroll.OwnerID, &scroll.Artifact, &scroll.ArtifactDigest, &scroll.Root, &scroll.ScrollName, &scroll.ScrollYAML, &status, &lastError, &createdAt, &updatedAt, &proceduresJSON, &routingJSON, &uiPackagesJSON); err != nil { return nil, err } scroll.Status = domain.RuntimeScrollStatus(status) scroll.LastError = lastError scroll.CreatedAt = parseTime(createdAt) scroll.UpdatedAt = parseTime(updatedAt) - if commandsJSON == "" { - commandsJSON = "{}" + if proceduresJSON == "" { + proceduresJSON = "{}" } - if err := json.Unmarshal([]byte(commandsJSON), &scroll.Commands); err != nil { + if err := json.Unmarshal([]byte(proceduresJSON), &scroll.Procedures); err != nil { return nil, err } - if scroll.Commands == nil { - scroll.Commands = map[string]domain.LockStatus{} + if scroll.Procedures == nil { + scroll.Procedures = domain.ProcedureStatusMap{} } if routingJSON == "" { routingJSON = "[]" diff --git a/internal/runtime/docker/state_store_test.go b/internal/runtime/docker/state_store_test.go index e6750c28..f0ccacf5 100644 --- a/internal/runtime/docker/state_store_test.go +++ b/internal/runtime/docker/state_store_test.go @@ -7,7 +7,7 @@ import ( "github.com/highcard-dev/daemon/internal/core/domain" ) -func TestStateStorePersistsCommandStatuses(t *testing.T) { +func TestStateStorePersistsProcedureStatuses(t *testing.T) { store, err := NewStateStore(t.TempDir()) if err != nil { t.Fatal(err) @@ -19,10 +19,12 @@ func TestStateStorePersistsCommandStatuses(t *testing.T) { Root: "/tmp/root", ScrollName: "test", ScrollYAML: "name: test\n", - Commands: map[string]domain.LockStatus{ + Procedures: domain.ProcedureStatusMap{ "start": { - Status: domain.ScrollLockStatusRunning, - LastStatusChange: 10, + "start.0": { + Status: domain.ScrollLockStatusRunning, + LastStatusChange: 10, + }, }, }, } @@ -31,7 +33,7 @@ func TestStateStorePersistsCommandStatuses(t *testing.T) { t.Fatal(err) } - scroll.Commands["start"] = domain.LockStatus{ + scroll.Procedures["start"]["start.0"] = domain.LockStatus{ Status: domain.ScrollLockStatusError, ExitCode: &exitCode, LastStatusChange: 20, @@ -45,7 +47,7 @@ func TestStateStorePersistsCommandStatuses(t *testing.T) { if err != nil { t.Fatal(err) } - status := got.Commands["start"] + status := got.Procedures["start"]["start.0"] if status.Status != domain.ScrollLockStatusError { t.Fatalf("status = %s, want error", status.Status) } diff --git a/internal/runtime/kubernetes/keepalive.go b/internal/runtime/kubernetes/keepalive.go new file mode 100644 index 00000000..657e0247 --- /dev/null +++ b/internal/runtime/kubernetes/keepalive.go @@ -0,0 +1,125 @@ +package kubernetes + +import ( + "context" + "strings" + "time" + + batchv1 "k8s.io/api/batch/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/highcard-dev/daemon/internal/core/domain" + "github.com/highcard-dev/daemon/internal/utils/logger" + "go.uber.org/zap" +) + +func (b *Backend) keepAliveTrafficIdleStopper(namespace string, root string, commandName string, procedureName string, procedure *domain.Procedure, globalPorts []domain.Port) jobIdleStopFunc { + if !b.config.HubbleEnabled() || b.hubble == nil || procedure == nil || coldstarterProcedure(procedureName, procedure) { + return nil + } + ports := portsByName(globalPorts) + thresholds := make([]keepAliveThreshold, 0, len(procedure.ExpectedPorts)) + for _, expectedPort := range procedure.ExpectedPorts { + if expectedPort.KeepAliveTraffic == "" { + continue + } + threshold, err := domain.ParseKeepAliveTraffic(expectedPort.KeepAliveTraffic) + if err != nil { + return func(context.Context, *batchv1.Job) (bool, error) { + return false, err + } + } + port, ok := ports[expectedPort.Name] + if !ok { + return nil + } + thresholds = append(thresholds, keepAliveThreshold{ + expectedPort: expectedPort, + port: port, + window: threshold.Window, + }) + } + if len(thresholds) == 0 { + return nil + } + _, scrollID, err := parseRef(root) + if err != nil { + return func(context.Context, *batchv1.Job) (bool, error) { + return false, err + } + } + return func(ctx context.Context, job *batchv1.Job) (bool, error) { + if job == nil { + return false, nil + } + now := time.Now() + for _, threshold := range thresholds { + if !keepAliveWindowElapsed(now, job.CreationTimestamp, threshold.window) { + return false, nil + } + } + for _, threshold := range thresholds { + hasTraffic, err := b.hubble.HasFlow(ctx, TrafficQuery{ + Namespace: namespace, + ScrollID: scrollID, + ProcedureName: procedureName, + Port: threshold.port, + ExpectedPort: threshold.expectedPort, + Window: threshold.window, + }) + if err != nil { + logger.Log().Warn("Hubble Relay unavailable; keepAliveTraffic enforcement skipped", + zap.String("namespace", namespace), + zap.String("job", job.Name), + zap.String("command", commandName), + zap.String("procedure", procedureName), + zap.String("port", threshold.expectedPort.Name), + zap.Error(err), + ) + return false, nil + } + if hasTraffic { + return false, nil + } + } + logger.Log().Info("Stopping idle Kubernetes procedure after keepAliveTraffic miss", + zap.String("namespace", namespace), + zap.String("job", job.Name), + zap.String("command", commandName), + zap.String("procedure", procedureName), + zap.Int("ports", len(thresholds)), + ) + if err := b.deleteJobAndWait(ctx, namespace, job.Name); err != nil { + return false, err + } + return true, nil + } +} + +type keepAliveThreshold struct { + expectedPort domain.ExpectedPort + port domain.Port + window time.Duration +} + +func keepAliveWindowElapsed(now time.Time, created metav1.Time, window time.Duration) bool { + if created.IsZero() { + return false + } + return now.Sub(created.Time) >= window +} + +func coldstarterProcedure(procedureName string, procedure *domain.Procedure) bool { + if strings.EqualFold(procedureName, "coldstart") { + return true + } + if procedure == nil { + return false + } + for _, part := range procedure.Command { + if strings.Contains(part, "druid-coldstarter") { + return true + } + } + return false +} diff --git a/internal/runtime/kubernetes/procedures.go b/internal/runtime/kubernetes/procedures.go index 9f0fcc32..6b02c2d9 100644 --- a/internal/runtime/kubernetes/procedures.go +++ b/internal/runtime/kubernetes/procedures.go @@ -84,10 +84,13 @@ func (b *Backend) RunCommand(command ports.RuntimeCommand) (*int, error) { ) if command.Command.Run == domain.RunModePersistent { if procedure.IsSignal() { + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusRunning, nil) if err := b.Signal(procedureName, procedure.Target, procedure.Signal, command.Root); err != nil { + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusError, nil) logger.Log().Error("Kubernetes signal procedure failed", zap.String("scroll_id", command.ScrollID), zap.String("command", command.Name), zap.String("procedure", procedureName), zap.String("target", procedure.Target), zap.String("signal", procedure.Signal), zap.Error(err)) return nil, err } + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusDone, nil) logger.Log().Info("Kubernetes signal procedure completed", zap.String("scroll_id", command.ScrollID), zap.String("command", command.Name), zap.String("procedure", procedureName), zap.String("target", procedure.Target), zap.String("signal", procedure.Signal)) continue } @@ -96,29 +99,37 @@ func (b *Backend) RunCommand(command ports.RuntimeCommand) (*int, error) { logger.Log().Error("Kubernetes persistent procedure missing image", zap.String("scroll_id", command.ScrollID), zap.String("command", command.Name), zap.String("procedure", procedureName), zap.Error(err)) return nil, err } + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusRunning, nil) if err := b.ensurePersistentProcedure(context.Background(), command.ScrollID, command.Root, command.Name, procedureName, resourceName, procedure, command.GlobalPorts, env, portUse); err != nil { + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusError, nil) logger.Log().Error("Kubernetes persistent procedure failed", zap.String("scroll_id", command.ScrollID), zap.String("command", command.Name), zap.String("procedure", procedureName), zap.Error(err)) return nil, err } continue } + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusRunning, nil) exitCode, err := b.runJobProcedure(command.ScrollID, command.Name, procedureName, resourceName, procedure, command.Root, command.GlobalPorts, env, portUse) if err != nil { if exitCode != nil && *exitCode != 0 && procedure.IgnoreFailure { + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusDone, exitCode) logger.Log().Warn("Kubernetes job procedure failed but failure is ignored", zap.String("scroll_id", command.ScrollID), zap.String("command", command.Name), zap.String("procedure", procedureName), zap.Int("exit_code", *exitCode), zap.Error(err)) continue } + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusError, exitCode) logger.Log().Error("Kubernetes job procedure failed", zap.String("scroll_id", command.ScrollID), zap.String("command", command.Name), zap.String("procedure", procedureName), zap.Any("exit_code", exitCode), zap.Error(err)) return exitCode, err } if exitCode != nil && *exitCode != 0 { if procedure.IgnoreFailure { + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusDone, exitCode) logger.Log().Warn("Kubernetes job procedure failed but failure is ignored", zap.String("scroll_id", command.ScrollID), zap.String("command", command.Name), zap.String("procedure", procedureName), zap.Int("exit_code", *exitCode)) continue } + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusError, exitCode) logger.Log().Warn("Kubernetes command stopped after non-zero procedure exit", zap.String("scroll_id", command.ScrollID), zap.String("command", command.Name), zap.String("procedure", procedureName), zap.Int("exit_code", *exitCode)) return exitCode, nil } + command.ObserveProcedureStatus(procedureName, domain.ScrollLockStatusDone, exitCode) if exitCode != nil { logger.Log().Info("Kubernetes job procedure completed", zap.String("scroll_id", command.ScrollID), zap.String("command", command.Name), zap.String("procedure", procedureName), zap.Int("exit_code", *exitCode)) } @@ -189,7 +200,7 @@ func (b *Backend) runJobProcedure(scrollID string, commandName string, procedure } else { logger.Log().Warn("Could not find Kubernetes job pod before wait; console logs may be empty", zap.String("scroll_id", scrollID), zap.String("command", commandName), zap.String("procedure", procedureName), zap.String("namespace", namespace), zap.String("job", jobName), zap.Error(err)) } - exitCode, err := b.waitForJob(ctx, namespace, jobName) + exitCode, err := b.waitForJobWithIdleStop(ctx, namespace, jobName, b.keepAliveTrafficIdleStopper(namespace, root, commandName, procedureName, procedure, globalPorts)) if exitCode != nil { console.MarkExited(*exitCode) } diff --git a/internal/runtime/kubernetes/resources_test.go b/internal/runtime/kubernetes/resources_test.go index 6cbd5873..cb2cf322 100644 --- a/internal/runtime/kubernetes/resources_test.go +++ b/internal/runtime/kubernetes/resources_test.go @@ -7,6 +7,7 @@ import ( "fmt" "strings" "testing" + "time" appsv1 "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" @@ -736,6 +737,136 @@ func TestExpectedPortsSkipsHubbleWhenDisabled(t *testing.T) { } } +func TestKeepAliveTrafficStopsIdleRunningProcedure(t *testing.T) { + root := ref("druid", "druid-static-web-data") + job := &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "start-job", + Namespace: "druid", + CreationTimestamp: metav1.NewTime(time.Now().Add(-2 * time.Minute)), + Labels: procedureTestLabels(root, "start", "start", 1), + }, + Status: batchv1.JobStatus{Active: 1}, + } + client := fake.NewSimpleClientset(job) + backend := NewWithClient(Config{Namespace: "druid"}, coreservices.NewConsoleManager(coreservices.NewLogManager()), client, fakeHubble{hasFlow: false}) + stopper := backend.keepAliveTrafficIdleStopper("druid", root, "start", "start", &domain.Procedure{ + ExpectedPorts: []domain.ExpectedPort{{Name: "main", KeepAliveTraffic: "1b/1s"}}, + }, []domain.Port{{Name: "main", Port: 25565, Protocol: "tcp"}}) + if stopper == nil { + t.Fatal("stopper = nil, want enforcement") + } + + stopped, err := stopper(context.Background(), job) + if err != nil { + t.Fatal(err) + } + if !stopped { + t.Fatal("stopped = false, want true") + } + if _, err := client.BatchV1().Jobs("druid").Get(context.Background(), "start-job", metav1.GetOptions{}); !apierrors.IsNotFound(err) { + t.Fatalf("job err = %v, want not found", err) + } +} + +func TestKeepAliveTrafficKeepsProcedureWhenTrafficPresent(t *testing.T) { + root := ref("druid", "druid-static-web-data") + job := &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "start-job", + Namespace: "druid", + CreationTimestamp: metav1.NewTime(time.Now().Add(-2 * time.Minute)), + Labels: procedureTestLabels(root, "start", "start", 1), + }, + Status: batchv1.JobStatus{Active: 1}, + } + client := fake.NewSimpleClientset(job) + backend := NewWithClient(Config{Namespace: "druid"}, coreservices.NewConsoleManager(coreservices.NewLogManager()), client, fakeHubble{hasFlow: true}) + stopper := backend.keepAliveTrafficIdleStopper("druid", root, "start", "start", &domain.Procedure{ + ExpectedPorts: []domain.ExpectedPort{{Name: "main", KeepAliveTraffic: "1b/1s"}}, + }, []domain.Port{{Name: "main", Port: 25565, Protocol: "tcp"}}) + + stopped, err := stopper(context.Background(), job) + if err != nil { + t.Fatal(err) + } + if stopped { + t.Fatal("stopped = true, want false") + } + if _, err := client.BatchV1().Jobs("druid").Get(context.Background(), "start-job", metav1.GetOptions{}); err != nil { + t.Fatalf("job err = %v", err) + } +} + +func TestKeepAliveTrafficKeepsProcedureWhenHubbleUnavailable(t *testing.T) { + root := ref("druid", "druid-static-web-data") + job := &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "start-job", + Namespace: "druid", + CreationTimestamp: metav1.NewTime(time.Now().Add(-2 * time.Minute)), + Labels: procedureTestLabels(root, "start", "start", 1), + }, + Status: batchv1.JobStatus{Active: 1}, + } + client := fake.NewSimpleClientset(job) + backend := NewWithClient(Config{Namespace: "druid"}, coreservices.NewConsoleManager(coreservices.NewLogManager()), client, fakeHubble{err: errors.New("relay unavailable")}) + stopper := backend.keepAliveTrafficIdleStopper("druid", root, "start", "start", &domain.Procedure{ + ExpectedPorts: []domain.ExpectedPort{{Name: "main", KeepAliveTraffic: "1b/1s"}}, + }, []domain.Port{{Name: "main", Port: 25565, Protocol: "tcp"}}) + + stopped, err := stopper(context.Background(), job) + if err != nil { + t.Fatal(err) + } + if stopped { + t.Fatal("stopped = true, want false") + } + if _, err := client.BatchV1().Jobs("druid").Get(context.Background(), "start-job", metav1.GetOptions{}); err != nil { + t.Fatalf("job err = %v", err) + } +} + +func TestKeepAliveTrafficWaitsForFullWindowBeforeStopping(t *testing.T) { + root := ref("druid", "druid-static-web-data") + job := &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "start-job", + Namespace: "druid", + CreationTimestamp: metav1.NewTime(time.Now()), + Labels: procedureTestLabels(root, "start", "start", 1), + }, + Status: batchv1.JobStatus{Active: 1}, + } + client := fake.NewSimpleClientset(job) + backend := NewWithClient(Config{Namespace: "druid"}, coreservices.NewConsoleManager(coreservices.NewLogManager()), client, fakeHubble{hasFlow: false}) + stopper := backend.keepAliveTrafficIdleStopper("druid", root, "start", "start", &domain.Procedure{ + ExpectedPorts: []domain.ExpectedPort{{Name: "main", KeepAliveTraffic: "1b/1m"}}, + }, []domain.Port{{Name: "main", Port: 25565, Protocol: "tcp"}}) + + stopped, err := stopper(context.Background(), job) + if err != nil { + t.Fatal(err) + } + if stopped { + t.Fatal("stopped = true, want false before full window") + } + if _, err := client.BatchV1().Jobs("druid").Get(context.Background(), "start-job", metav1.GetOptions{}); err != nil { + t.Fatalf("job err = %v", err) + } +} + +func TestKeepAliveTrafficDoesNotStopColdstarter(t *testing.T) { + backend := NewWithClient(Config{Namespace: "druid"}, coreservices.NewConsoleManager(coreservices.NewLogManager()), fake.NewSimpleClientset(), fakeHubble{hasFlow: false}) + stopper := backend.keepAliveTrafficIdleStopper("druid", ref("druid", "druid-static-web-data"), "start", "coldstart", &domain.Procedure{ + Command: []string{"druid-coldstarter"}, + ExpectedPorts: []domain.ExpectedPort{{Name: "main", KeepAliveTraffic: "1b/1s"}}, + }, []domain.Port{{Name: "main", Port: 25565, Protocol: "tcp"}}) + if stopper != nil { + t.Fatal("stopper != nil, want coldstarter keepAliveTraffic to be reporting-only") + } +} + func TestRoutingTargetsReturnStableBackendServices(t *testing.T) { backend := NewWithClient(Config{Namespace: "druid"}, coreservices.NewConsoleManager(coreservices.NewLogManager()), fake.NewSimpleClientset(), fakeHubble{}) root := ref("druid", "druid-static-web-data") diff --git a/internal/runtime/kubernetes/state_store.go b/internal/runtime/kubernetes/state_store.go index ba29e2fe..7c485c73 100644 --- a/internal/runtime/kubernetes/state_store.go +++ b/internal/runtime/kubernetes/state_store.go @@ -30,7 +30,7 @@ const ( configMapKeyLastError = "last_error" configMapKeyCreatedAt = "created_at" configMapKeyUpdatedAt = "updated_at" - configMapKeyCommandsJSON = "commands_json" + configMapKeyProceduresJSON = "procedures_json" configMapKeyRoutingJSON = "routing_json" configMapKeyUIPackagesJSON = "ui_packages_json" ) @@ -75,8 +75,8 @@ func (s *ConfigMapStateStore) CreateScroll(scroll *domain.RuntimeScroll) error { if scroll.Status == "" { scroll.Status = domain.RuntimeScrollStatusCreated } - if scroll.Commands == nil { - scroll.Commands = map[string]domain.LockStatus{} + if scroll.Procedures == nil { + scroll.Procedures = domain.ProcedureStatusMap{} } configMap, err := runtimeScrollConfigMap(s.namespace, scroll) if err != nil { @@ -153,7 +153,7 @@ func (s *ConfigMapStateStore) DeleteScroll(id string) error { } func runtimeScrollConfigMap(namespace string, scroll *domain.RuntimeScroll) (*corev1.ConfigMap, error) { - commands, err := json.Marshal(scroll.Commands) + procedures, err := json.Marshal(scroll.Procedures) if err != nil { return nil, err } @@ -188,7 +188,7 @@ func runtimeScrollConfigMap(namespace string, scroll *domain.RuntimeScroll) (*co configMapKeyLastError: scroll.LastError, configMapKeyCreatedAt: formatRuntimeTime(scroll.CreatedAt), configMapKeyUpdatedAt: formatRuntimeTime(scroll.UpdatedAt), - configMapKeyCommandsJSON: string(commands), + configMapKeyProceduresJSON: string(procedures), configMapKeyRoutingJSON: string(routing), configMapKeyUIPackagesJSON: string(uiPackages), }, @@ -197,12 +197,12 @@ func runtimeScrollConfigMap(namespace string, scroll *domain.RuntimeScroll) (*co func runtimeScrollFromConfigMap(configMap *corev1.ConfigMap) (*domain.RuntimeScroll, error) { data := configMap.Data - commandsJSON := data[configMapKeyCommandsJSON] - if commandsJSON == "" { - commandsJSON = "{}" + proceduresJSON := data[configMapKeyProceduresJSON] + if proceduresJSON == "" { + proceduresJSON = "{}" } - commands := map[string]domain.LockStatus{} - if err := json.Unmarshal([]byte(commandsJSON), &commands); err != nil { + procedures := domain.ProcedureStatusMap{} + if err := json.Unmarshal([]byte(proceduresJSON), &procedures); err != nil { return nil, err } routingJSON := data[configMapKeyRoutingJSON] @@ -239,7 +239,7 @@ func runtimeScrollFromConfigMap(configMap *corev1.ConfigMap) (*domain.RuntimeScr UIPackages: uiPackages, CreatedAt: parseRuntimeTime(data[configMapKeyCreatedAt]), UpdatedAt: parseRuntimeTime(data[configMapKeyUpdatedAt]), - Commands: commands, + Procedures: procedures, } if scroll.Status == "" { scroll.Status = domain.RuntimeScrollStatusCreated diff --git a/internal/runtime/kubernetes/state_store_test.go b/internal/runtime/kubernetes/state_store_test.go index 558783b4..c830ffb0 100644 --- a/internal/runtime/kubernetes/state_store_test.go +++ b/internal/runtime/kubernetes/state_store_test.go @@ -19,8 +19,10 @@ func TestConfigMapStateStoreRoundTripsRuntimeScroll(t *testing.T) { ScrollName: "container-lab", ScrollYAML: "name: container-lab\n", Status: domain.RuntimeScrollStatusCreated, - Commands: map[string]domain.LockStatus{ - "verify": {Status: domain.ScrollLockStatusError, ExitCode: &exitCode, LastStatusChange: 123}, + Procedures: domain.ProcedureStatusMap{ + "verify": { + "verify.0": {Status: domain.ScrollLockStatusError, ExitCode: &exitCode, LastStatusChange: 123}, + }, }, } @@ -35,15 +37,15 @@ func TestConfigMapStateStoreRoundTripsRuntimeScroll(t *testing.T) { if got.Artifact != scroll.Artifact || got.Root != scroll.Root || got.ScrollYAML != scroll.ScrollYAML { t.Fatalf("stored scroll mismatch: %#v", got) } - if got.Commands["verify"].Status != domain.ScrollLockStatusError { - t.Fatalf("command status = %s, want error", got.Commands["verify"].Status) + if got.Procedures["verify"]["verify.0"].Status != domain.ScrollLockStatusError { + t.Fatalf("procedure status = %s, want error", got.Procedures["verify"]["verify.0"].Status) } - if got.Commands["verify"].ExitCode == nil || *got.Commands["verify"].ExitCode != exitCode { - t.Fatalf("exit code = %#v, want %d", got.Commands["verify"].ExitCode, exitCode) + if got.Procedures["verify"]["verify.0"].ExitCode == nil || *got.Procedures["verify"]["verify.0"].ExitCode != exitCode { + t.Fatalf("exit code = %#v, want %d", got.Procedures["verify"]["verify.0"].ExitCode, exitCode) } got.Status = domain.RuntimeScrollStatusRunning - got.Commands["verify"] = domain.LockStatus{Status: domain.ScrollLockStatusDone, LastStatusChange: 456} + got.Procedures["verify"]["verify.0"] = domain.LockStatus{Status: domain.ScrollLockStatusDone, LastStatusChange: 456} if err := store.UpdateScroll(got); err != nil { t.Fatal(err) } @@ -52,7 +54,7 @@ func TestConfigMapStateStoreRoundTripsRuntimeScroll(t *testing.T) { if err != nil { t.Fatal(err) } - if len(list) != 1 || list[0].Status != domain.RuntimeScrollStatusRunning || list[0].Commands["verify"].Status != domain.ScrollLockStatusDone { + if len(list) != 1 || list[0].Status != domain.RuntimeScrollStatusRunning || list[0].Procedures["verify"]["verify.0"].Status != domain.ScrollLockStatusDone { t.Fatalf("list = %#v, want updated scroll", list) } @@ -60,8 +62,11 @@ func TestConfigMapStateStoreRoundTripsRuntimeScroll(t *testing.T) { if err != nil { t.Fatal(err) } - if configMap.Data[configMapKeyCommandsJSON] == "" { - t.Fatal("commands_json was not stored") + if configMap.Data[configMapKeyProceduresJSON] == "" { + t.Fatal("procedures_json was not stored") + } + if _, ok := configMap.Data["commands_"+"json"]; ok { + t.Fatal("legacy command status JSON was stored") } if err := store.DeleteScroll("container-lab"); err != nil { diff --git a/internal/runtime/kubernetes/wait_jobs.go b/internal/runtime/kubernetes/wait_jobs.go index 719136df..5e1422d1 100644 --- a/internal/runtime/kubernetes/wait_jobs.go +++ b/internal/runtime/kubernetes/wait_jobs.go @@ -84,7 +84,13 @@ func (b *Backend) waitForStatefulSet(ctx context.Context, namespace string, name } } +type jobIdleStopFunc func(context.Context, *batchv1.Job) (bool, error) + func (b *Backend) waitForJob(ctx context.Context, namespace string, jobName string) (*int, error) { + return b.waitForJobWithIdleStop(ctx, namespace, jobName, nil) +} + +func (b *Backend) waitForJobWithIdleStop(ctx context.Context, namespace string, jobName string, idleStop jobIdleStopFunc) (*int, error) { startedAt := time.Now() deadline := time.Now().Add(24 * time.Hour) for { @@ -120,6 +126,17 @@ func (b *Backend) waitForJob(ctx context.Context, namespace string, jobName stri logger.Log().Error("Timed out waiting for Kubernetes job", zap.String("namespace", namespace), zap.String("job", jobName), zap.Int32("succeeded", job.Status.Succeeded), zap.Int32("failed", job.Status.Failed), zap.Int32("active", job.Status.Active)) return nil, fmt.Errorf("timed out waiting for job %s", jobName) } + if idleStop != nil && activeKubernetesJob(job) { + stopped, err := idleStop(ctx, job) + if err != nil { + logger.Log().Warn("Kubernetes job keepAliveTraffic enforcement skipped", zap.String("namespace", namespace), zap.String("job", jobName), zap.Error(err)) + } else if stopped { + exitCode := 0 + b.recordJobExit(namespace, jobName, exitCode) + logger.Log().Info("Kubernetes job stopped by keepAliveTraffic enforcement", zap.String("namespace", namespace), zap.String("job", jobName), zap.Int("exit_code", exitCode)) + return &exitCode, nil + } + } sleep := jobPollInterval(time.Since(startedAt)) logger.Log().Debug("Kubernetes job still running", zap.String("namespace", namespace), zap.String("job", jobName), zap.Int32("succeeded", job.Status.Succeeded), zap.Int32("failed", job.Status.Failed), zap.Int32("active", job.Status.Active), zap.Duration("sleep", sleep), zap.Time("deadline", deadline)) if err := sleepUntilNextPoll(ctx, deadline, sleep); err != nil { diff --git a/test/integration/docker/docker_cli_test.go b/test/integration/docker/docker_cli_test.go index aceed3e9..1ccf45ac 100644 --- a/test/integration/docker/docker_cli_test.go +++ b/test/integration/docker/docker_cli_test.go @@ -62,7 +62,7 @@ func TestDockerBackendCLIComplexLifecycle(t *testing.T) { statuses := e2e.RunClientJSON[[]e2e.RuntimePortStatus](t, bins, socket, "ports", created.ID) assertPortBound(t, statuses, fixture) - e2e.RunClient(t, bins, socket, "command", "run", created.ID, "record") + e2e.UnixJSONRequest(t, socket, http.MethodPost, "/api/v1/scrolls/"+created.ID+"/commands/record", "") root := strings.TrimPrefix(created.Root, "docker-bind://") if got := readDockerRootFile(t, root, "data/finite.txt"); !strings.Contains(got, "finite-ok") { t.Fatalf("finite file = %q, want finite-ok", got) @@ -195,19 +195,9 @@ func TestDockerBackendColdstarterFrontsRuntime(t *testing.T) { if started.Status != "running" { t.Fatalf("started status = %s, want running", started.Status) } - procedures := readStatusMap(t, socket, "/api/v1/scrolls/"+created.ID+"/procedures") - if procedures["coldstart"] != "running" { - t.Fatalf("procedures = %#v, want coldstart alias running", procedures) - } - if procedures["start"] != "running" { - t.Fatalf("procedures = %#v, want compatibility command key running", procedures) - } - queue := readStatusMap(t, socket, "/api/v1/scrolls/"+created.ID+"/queue") - if _, ok := queue["coldstart"]; ok { - t.Fatalf("queue = %#v, want command keys only", queue) - } - if queue["start"] != "running" { - t.Fatalf("queue = %#v, want start command running", queue) + queue := readProcedureStatusMap(t, socket, "/api/v1/scrolls/"+created.ID+"/queue") + if queue["start"]["coldstart"].Status != "running" { + t.Fatalf("queue = %#v, want start/coldstart running", queue) } if got := e2e.WaitHTTP(t, fmt.Sprintf("http://127.0.0.1:%d/index.txt", publicPort)); !strings.Contains(got, "cold-started") { t.Fatalf("served body = %q, want cold-started", got) @@ -215,10 +205,14 @@ func TestDockerBackendColdstarterFrontsRuntime(t *testing.T) { e2e.UnixJSONRequest(t, socket, http.MethodDelete, "/api/v1/scrolls/"+created.ID+"?purge_data=true", "") } -func readStatusMap(t *testing.T, socket string, path string) map[string]string { +type lockStatus struct { + Status string `json:"status"` +} + +func readProcedureStatusMap(t *testing.T, socket string, path string) map[string]map[string]lockStatus { t.Helper() body := e2e.UnixJSONRequest(t, socket, http.MethodGet, path, "") - statuses := map[string]string{} + statuses := map[string]map[string]lockStatus{} if err := json.Unmarshal([]byte(body), &statuses); err != nil { t.Fatalf("decode %s JSON: %v\n%s", path, err, body) } diff --git a/test/integration/example_test.go b/test/integration/example_test.go index 4c83a5bb..e3224772 100644 --- a/test/integration/example_test.go +++ b/test/integration/example_test.go @@ -10,7 +10,10 @@ import ( "testing" "time" - "github.com/highcard-dev/daemon/internal/core/services" + appservices "github.com/highcard-dev/daemon/apps/druid/core/services" + "github.com/highcard-dev/daemon/internal/core/domain" + coreservices "github.com/highcard-dev/daemon/internal/core/services" + "github.com/highcard-dev/daemon/internal/runtime/docker" mock_ports "github.com/highcard-dev/daemon/test/mock" test_utils "github.com/highcard-dev/daemon/test/utils" "github.com/otiai10/copy" @@ -27,7 +30,9 @@ type ServiceConfig struct { LogSpy func(string, []byte) bool } -func checkQueue(queueManager *services.QueueManager, config ServiceConfig) error { +func checkQueue(queueManager interface { + GetQueue() map[string]domain.ScrollLockStatus +}, config ServiceConfig) error { queue := queueManager.GetQueue() for _, status := range config.CommandStatus { if _, ok := queue[status]; !ok { @@ -98,8 +103,7 @@ func TestExamples(t *testing.T) { return } - scrollService, err := services.NewScrollService(path) - if err != nil { + if _, err := coreservices.NewScrollService(path); err != nil { t.Error(err) return } @@ -107,16 +111,37 @@ func TestExamples(t *testing.T) { exitCode := 0 runtimeBackend.EXPECT().Name().Return("docker").AnyTimes() runtimeBackend.EXPECT().RunCommand(gomock.Any()).Return(&exitCode, nil).AnyTimes() - procedureLauncher, err := services.NewProcedureLauncher(scrollService, runtimeBackend, "/tmp") + scrollYAML, err := os.ReadFile(path + "scroll.yaml") + if err != nil { + t.Error(err) + return + } + store, err := docker.NewStateStore(t.TempDir()) + if err != nil { + t.Error(err) + return + } + runtimeScroll := &domain.RuntimeScroll{ + ID: config.ServiceName, + Root: path, + ScrollName: config.ServiceName, + ScrollYAML: string(scrollYAML), + Status: domain.RuntimeScrollStatusCreated, + Procedures: domain.ProcedureStatusMap{}, + } + if err := store.CreateScroll(runtimeScroll); err != nil { + t.Error(err) + return + } + session, err := appservices.NewRuntimeSession(store, runtimeScroll, runtimeBackend) if err != nil { t.Error(err) return } - queueManager := services.NewQueueManager(scrollService, procedureLauncher) - go queueManager.Work() + session.Start() - err = queueManager.AddTempItem("start") + err = session.AddTempItem("start") if err != nil { t.Error(err) @@ -139,18 +164,17 @@ func TestExamples(t *testing.T) { t.Error("Failed to test to server: ", err) } - err = checkQueue(queueManager, config) + err = checkQueue(session, config) if err != nil { t.Error(err) return } - err = queueManager.AddTempItemWithWait("stop") + err = session.AddTempItemWithWait("stop") if err != nil { t.Error(err) return } - queueManager.Shutdown() if config.TestAddress != "" { err = test_utils.ConnectionTest(config.TestAddress, false) @@ -160,7 +184,7 @@ func TestExamples(t *testing.T) { } } - err = checkQueue(queueManager, config) + err = checkQueue(session, config) if err != nil { t.Error(err) return diff --git a/test/mock/services.go b/test/mock/services.go index 20af502a..b919a411 100644 --- a/test/mock/services.go +++ b/test/mock/services.go @@ -185,58 +185,6 @@ func (mr *MockScrollServiceInterfaceMockRecorder) GetFile() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetFile", reflect.TypeOf((*MockScrollServiceInterface)(nil).GetFile)) } -// MockProcedureLauchnerInterface is a mock of ProcedureLauchnerInterface interface. -type MockProcedureLauchnerInterface struct { - ctrl *gomock.Controller - recorder *MockProcedureLauchnerInterfaceMockRecorder - isgomock struct{} -} - -// MockProcedureLauchnerInterfaceMockRecorder is the mock recorder for MockProcedureLauchnerInterface. -type MockProcedureLauchnerInterfaceMockRecorder struct { - mock *MockProcedureLauchnerInterface -} - -// NewMockProcedureLauchnerInterface creates a new mock instance. -func NewMockProcedureLauchnerInterface(ctrl *gomock.Controller) *MockProcedureLauchnerInterface { - mock := &MockProcedureLauchnerInterface{ctrl: ctrl} - mock.recorder = &MockProcedureLauchnerInterfaceMockRecorder{mock} - return mock -} - -// EXPECT returns an object that allows the caller to indicate expected use. -func (m *MockProcedureLauchnerInterface) EXPECT() *MockProcedureLauchnerInterfaceMockRecorder { - return m.recorder -} - -// GetProcedureStatuses mocks base method. -func (m *MockProcedureLauchnerInterface) GetProcedureStatuses() map[string]domain.ScrollLockStatus { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetProcedureStatuses") - ret0, _ := ret[0].(map[string]domain.ScrollLockStatus) - return ret0 -} - -// GetProcedureStatuses indicates an expected call of GetProcedureStatuses. -func (mr *MockProcedureLauchnerInterfaceMockRecorder) GetProcedureStatuses() *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetProcedureStatuses", reflect.TypeOf((*MockProcedureLauchnerInterface)(nil).GetProcedureStatuses)) -} - -// Run mocks base method. -func (m *MockProcedureLauchnerInterface) Run(cmd string) error { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "Run", cmd) - ret0, _ := ret[0].(error) - return ret0 -} - -// Run indicates an expected call of Run. -func (mr *MockProcedureLauchnerInterfaceMockRecorder) Run(cmd any) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Run", reflect.TypeOf((*MockProcedureLauchnerInterface)(nil).Run), cmd) -} - // MockLogManagerInterface is a mock of LogManagerInterface interface. type MockLogManagerInterface struct { ctrl *gomock.Controller