InstaNode-dev
diff --git a/‎internal/config/config.go‎
Lines changed: 19 additions & 0 deletions b/‎internal/config/config.go‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎internal/db/migrations/068_deploy_scale_to_zero.sql‎
Lines changed: 68 additions & 0 deletions b/‎internal/db/migrations/068_deploy_scale_to_zero.sql‎
Lines changed: 68 additions & 0 deletions
diff --git a/‎internal/handlers/deploy.go‎
Lines changed: 5 additions & 0 deletions b/‎internal/handlers/deploy.go‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎internal/handlers/deploy_buildfailed_autopsy_test.go‎
Lines changed: 3 additions & 0 deletions b/‎internal/handlers/deploy_buildfailed_autopsy_test.go‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎internal/handlers/deploy_stack_internal_coverage_test.go‎
Lines changed: 3 additions & 0 deletions b/‎internal/handlers/deploy_stack_internal_coverage_test.go‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎internal/handlers/deploy_teardown_reconciler_test.go‎
Lines changed: 3 additions & 0 deletions b/‎internal/handlers/deploy_teardown_reconciler_test.go‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎internal/handlers/deploy_wake.go‎
Lines changed: 116 additions & 0 deletions b/‎internal/handlers/deploy_wake.go‎
Lines changed: 116 additions & 0 deletions
diff --git a/‎internal/handlers/deploy_wake_test.go‎
Lines changed: 83 additions & 0 deletions b/‎internal/handlers/deploy_wake_test.go‎
Lines changed: 83 additions & 0 deletions
diff --git a/‎internal/handlers/openapi.go‎
Lines changed: 15 additions & 0 deletions b/‎internal/handlers/openapi.go‎
Lines changed: 15 additions & 0 deletions
@@ -197,6 +197,15 @@ type Config struct {
 	// Off → /deploy/new rejects source=git with 501; tarball/image unaffected.
 	DeploySourceGitEnabled bool
 
+	// DeployScaleToZeroEnabled gates scale-to-zero (idle descheduling, Task #54).
+	// Default FALSE: the worker idle-scaler patches idle Deployments to
+	// replicas=0 and the api wake path (POST /deploy/:id/wake) brings them back.
+	// Off → the wake endpoint returns 501 and nothing in the api scales an app;
+	// the worker idle-scaler is independently gated by its own
+	// DEPLOY_SCALE_TO_ZERO_ENABLED env so the two services share the flag name.
+	// Enabling it is an operator action (see infra runbook) after a canary.
+	DeployScaleToZeroEnabled bool
+
 	// GitHub App (P4) — install-once push-to-deploy + short-lived installation
 	// tokens for private-repo clones. Distinct from the GitHub OAuth *login* app
 	// above (GitHubClientID/Secret). GitHubAppEnabled gates the whole feature:
@@ -501,6 +510,16 @@ func Load() *Config {
 		cfg.DeploySourceGitEnabled = false
 	}
 
+	// DEPLOY_SCALE_TO_ZERO_ENABLED: default FALSE (off until operator canary).
+	// Shared flag name with the worker idle-scaler; the api half gates the wake
+	// endpoint + any api-initiated scale, the worker half gates the idle sweep.
+	switch strings.ToLower(strings.TrimSpace(os.Getenv("DEPLOY_SCALE_TO_ZERO_ENABLED"))) {
+	case "true", "1", "yes":
+		cfg.DeployScaleToZeroEnabled = true
+	default:
+		cfg.DeployScaleToZeroEnabled = false
+	}
+
 	// GITHUB_APP_ENABLED: default FALSE (off until the operator registers the
 	// App and provisions GITHUB_APP_* secrets — see infra/GITHUB-APP-RUNBOOK.md).
 	switch strings.ToLower(strings.TrimSpace(os.Getenv("GITHUB_APP_ENABLED"))) {
 
@@ -0,0 +1,68 @@
+-- 068_deploy_scale_to_zero.sql — scale-to-zero (idle descheduling) state columns.
+--
+-- WHY: a deployed-but-idle app costs a full pod's worth of compute even when it
+-- serves zero requests. Scale-to-zero (Task #54) lets the worker patch an idle
+-- Deployment to replicas=0 (~$0 compute) and wake it back to replicas=1 on
+-- demand. This migration adds the per-deployment state the idle-scaler and the
+-- wake path read/write. The whole feature is gated behind the
+-- DEPLOY_SCALE_TO_ZERO_ENABLED worker env flag (default OFF), so these columns
+-- are inert — populated at create-time but acted upon only when an operator
+-- enables the flag.
+--
+-- Columns:
+--   last_activity_at  TIMESTAMPTZ — floor "last known activity" marker. Set to
+--                                   now() at create-time, bumped on every wake
+--                                   and on redeploy. The idle-scaler descheduals
+--                                   a Deployment only when
+--                                   now() - last_activity_at > idle_threshold.
+--
+--                                   v1 NOTE: the api is NOT in the request path
+--                                   (apps are served by k8s Ingress straight to
+--                                   the per-app Service), and no nginx-ingress
+--                                   request-total scrape is wired yet, so the
+--                                   honest "activity" signal v1 captures is
+--                                   deploy / redeploy / explicit-wake events —
+--                                   NOT per-HTTP-request traffic. A follow-up
+--                                   (documented in the worker job header) will
+--                                   wire an ingress request-counter to bump this
+--                                   column on real traffic for true
+--                                   traffic-based idle detection.
+--
+--   scaled_to_zero    BOOLEAN     — true while the app is currently descheduled
+--                                   (replicas=0). The wake path reads this to
+--                                   decide whether a scale-up is needed; the
+--                                   dashboard/agent reads it to show "sleeping".
+--                                   The idle-scaler sets it true on scale-down,
+--                                   the wake path sets it false on scale-up.
+--
+--   always_on         BOOLEAN     — per-app opt-out. A pinned app (an operator
+--                                   or Pro+ user who wants zero cold-starts) is
+--                                   never descheduled by the idle-scaler. Default
+--                                   false → eligible for scale-to-zero.
+--
+-- Idempotent + forward-only. Existing rows get last_activity_at backfilled from
+-- updated_at (their most recent known activity) so the idle-scaler does not
+-- immediately deschedule every pre-existing deploy the first time the flag is
+-- turned on; scaled_to_zero / always_on default to false.
+
+ALTER TABLE deployments
+    ADD COLUMN IF NOT EXISTS last_activity_at TIMESTAMPTZ,
+    ADD COLUMN IF NOT EXISTS scaled_to_zero   BOOLEAN NOT NULL DEFAULT false,
+    ADD COLUMN IF NOT EXISTS always_on        BOOLEAN NOT NULL DEFAULT false;
+
+-- Backfill: seed last_activity_at from updated_at for every pre-existing row so
+-- the very first idle-scaler tick after the flag is enabled treats existing
+-- deploys as "recently active" rather than immediately idle. New rows set
+-- last_activity_at = now() at INSERT time (see CreateDeployment).
+UPDATE deployments
+SET    last_activity_at = COALESCE(updated_at, created_at, now())
+WHERE  last_activity_at IS NULL;
+
+-- Partial index: the idle-scaler scans for healthy, eligible, not-yet-zeroed
+-- deployments ordered by activity. Excluding always_on + already-zeroed +
+-- terminal rows keeps the index narrow and the scan cheap.
+CREATE INDEX IF NOT EXISTS idx_deployments_idle_candidates
+    ON deployments (last_activity_at)
+    WHERE status = 'healthy'
+      AND scaled_to_zero = false
+      AND always_on = false;
@@ -572,6 +572,11 @@ func deploymentToMapWithDB(d *models.Deployment, db *sql.DB) fiber.Map {
 		// image_ref is echoed (caller-supplied, no secret); registry_creds is
 		// NEVER returned — only registry_creds_set lifecycle metadata.
 		"source": deploymentSourceOrDefault(d.Source),
+		// Scale-to-zero state (migration 068). scaled_to_zero=true → the app is
+		// asleep (replicas=0); the dashboard/agent surfaces "sleeping — wake"
+		// and POSTs /deploy/:id/wake. always_on=true → pinned (never descheduled).
+		"scaled_to_zero": d.ScaledToZero,
+		"always_on":      d.AlwaysOn,
 	}
 	if d.Source == "image" {
 		m["image_ref"] = d.ImageRef
 
@@ -56,6 +56,9 @@ func (m *mockProvider) Redeploy(_ context.Context, _ string, _ []byte, _ map[str
 func (m *mockProvider) UpdateAccessControl(_ context.Context, _ string, _ bool, _ []string) error {
 	panic("mockProvider.UpdateAccessControl: not expected in this test")
 }
+func (m *mockProvider) Scale(_ context.Context, _ string, _ int32) error {
+	panic("mockProvider.Scale: not expected in this test")
+}
 
 // mockBuildLogFetcher wraps mockProvider and adds FetchBuildLogs so the handler
 // code can type-assert to compute.BuildLogFetcher.
 
@@ -64,6 +64,9 @@ func (covPanicProvider) Redeploy(context.Context, string, []byte, map[string]str
 func (covPanicProvider) UpdateAccessControl(context.Context, string, bool, []string) error {
 	panic("covPanicProvider.UpdateAccessControl: not expected")
 }
+func (covPanicProvider) Scale(context.Context, string, int32) error {
+	panic("covPanicProvider.Scale: not expected")
+}
 
 // covFailProvider's Deploy/Redeploy return a configurable error. It does NOT
 // implement BuildLogFetcher, so fetchBuildLogsForAutopsy returns nil
 
@@ -75,6 +75,9 @@ func (f *fakeTeardownProvider) Redeploy(context.Context, string, []byte, map[str
 func (f *fakeTeardownProvider) UpdateAccessControl(context.Context, string, bool, []string) error {
 	return nil
 }
+func (f *fakeTeardownProvider) Scale(context.Context, string, int32) error {
+	return nil
+}
 
 func reconcilerRequireDB(t *testing.T) {
 	t.Helper()
 
@@ -0,0 +1,116 @@
+package handlers
+
+// deploy_wake.go — explicit wake path for scale-to-zero (Task #54).
+//
+// WHY AN EXPLICIT WAKE (v1 design decision)
+//
+// instanode.dev serves a deployed app via a k8s Ingress on
+// *.deployment.instanode.dev that routes straight to the per-app Service in
+// the instant-deploy-<appID> namespace. The api process is NOT in the request
+// path. Transparent wake-on-request (a request to a sleeping app
+// auto-scales it and holds the connection until ready) therefore requires an
+// ACTIVATOR proxy in front of every app — KEDA http-add-on or a Knative-style
+// activator. That is a significant new dependency and is explicitly out of
+// scope for the scale-to-zero v1.
+//
+// v1 ships scale-DOWN (worker idle-scaler) + this fast EXPLICIT wake:
+//
+//   POST /deploy/:id/wake → scales the app back to replicas=1 and returns once
+//   the scale patch is accepted by k8s. The pod still needs its normal startup
+//   time before it serves traffic, so a request that races the wake gets the
+//   app's own cold-start latency (a brief 502/503 from the ingress until the
+//   pod is Ready), exactly as a fresh rollout would. Callers/dashboard/agents
+//   surface "sleeping — wake" and retry the app URL after waking.
+//
+// COLD-START CONTRACT (documented v1 limitation)
+//
+//   - While scaled_to_zero, the app URL returns the ingress's upstream-down
+//     response (502/503) because there is no pod. This is the documented v1
+//     trade-off of explicit wake vs a transparent activator.
+//   - POST /deploy/:id/wake is idempotent: waking an already-awake app just
+//     refreshes last_activity_at (so it won't be re-descheduled immediately).
+//   - The endpoint is gated by DEPLOY_SCALE_TO_ZERO_ENABLED. With the flag OFF
+//     it returns 501 and performs NO scaling and NO DB writes (flag-off inert).
+
+import (
+	"errors"
+	"log/slog"
+
+	"github.com/gofiber/fiber/v2"
+
+	"instant.dev/internal/middleware"
+	"instant.dev/internal/models"
+)
+
+// Wake handles POST /deploy/:id/wake. It scales a (possibly scaled-to-zero)
+// deployment back to replicas=1 and clears the scaled_to_zero flag, returning
+// the refreshed deployment. See the file header for the cold-start contract.
+func (h *DeployHandler) Wake(c *fiber.Ctx) error {
+	if !h.cfg.DeployScaleToZeroEnabled {
+		// Flag OFF → fully inert: no scale call, no DB write.
+		return respondError(c, fiber.StatusNotImplemented, "scale_to_zero_disabled",
+			"Scale-to-zero is not enabled on this platform")
+	}
+
+	team, err := h.requireTeam(c)
+	if err != nil {
+		return err
+	}
+
+	appID := c.Params("id")
+	d, err := models.GetDeploymentByAppID(c.Context(), h.db, appID)
+	if err != nil {
+		var notFound *models.ErrDeploymentNotFound
+		if errors.As(err, &notFound) {
+			return respondError(c, fiber.StatusNotFound, "not_found", "Deployment not found")
+		}
+		return respondError(c, fiber.StatusServiceUnavailable, "fetch_failed", "Failed to fetch deployment")
+	}
+
+	if d.TeamID != team.ID {
+		// 404 not 403: never confirm the existence of another team's deployment.
+		return respondError(c, fiber.StatusNotFound, "not_found", "Deployment not found")
+	}
+
+	// Scale the k8s Deployment back to 1 replica. A NotFound Deployment is a
+	// no-op inside compute.Scale (the row may have been torn down), so this only
+	// errors on a real k8s transport failure — surface it so the caller retries.
+	if d.ProviderID != "" {
+		if scaleErr := h.compute.Scale(c.Context(), appID, 1); scaleErr != nil {
+			slog.Warn("deploy.wake.scale_failed",
+				"app_id", appID, "provider_id", d.ProviderID, "error", scaleErr,
+				"request_id", middleware.GetRequestID(c))
+			return respondError(c, fiber.StatusServiceUnavailable, "wake_failed",
+				"Failed to wake deployment; please retry")
+		}
+	}
+
+	// DB half: clear scaled_to_zero + bump last_activity_at so the idle-scaler
+	// doesn't immediately re-deschedule the just-woken app.
+	if _, dbErr := models.WakeDeployment(c.Context(), h.db, d.ID); dbErr != nil {
+		slog.Error("deploy.wake.db_failed",
+			"app_id", appID, "error", dbErr,
+			"request_id", middleware.GetRequestID(c))
+		return respondError(c, fiber.StatusServiceUnavailable, "wake_failed",
+			"Failed to record wake; please retry")
+	}
+
+	// Re-read so the response reflects the cleared flag + new activity stamp.
+	fresh, err := models.GetDeploymentByID(c.Context(), h.db, d.ID)
+	if err != nil {
+		// The scale + DB write already succeeded; a re-read failure shouldn't
+		// fail the wake. Fall back to the pre-read row with the fields we just set.
+		d.ScaledToZero = false
+		fresh = d
+	}
+
+	slog.Info("deploy.woke",
+		"app_id", appID, "team_id", team.ID,
+		"request_id", middleware.GetRequestID(c))
+
+	return c.JSON(fiber.Map{
+		"ok":         true,
+		"message":    "Deployment woken — the app will be reachable once its pod is Ready (cold start).",
+		"deployment": deploymentToMapWithDB(fresh, h.db),
+	})
+}
@@ -0,0 +1,83 @@
+package handlers
+
+// deploy_wake_test.go — scale-to-zero wake endpoint coverage (Task #54).
+//
+// The flag-off path is the load-bearing safety property (rule: default OFF,
+// inert when off). It must short-circuit with 501 BEFORE any auth lookup, scale
+// call, or DB write — so this test constructs the handler with the flag off and
+// asserts a 501 with no compute interaction. A panicking compute provider proves
+// the handler never reaches the scale layer when the flag is off.
+
+import (
+	"context"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	"github.com/gofiber/fiber/v2"
+
+	"instant.dev/internal/config"
+	"instant.dev/internal/providers/compute"
+)
+
+// wakePanicProvider satisfies compute.Provider; Scale panics so a flag-off wake
+// that incorrectly reaches the compute layer fails loudly.
+type wakePanicProvider struct{}
+
+func (wakePanicProvider) Deploy(context.Context, compute.DeployOptions) (*compute.AppDeployment, error) {
+	panic("Deploy: not expected")
+}
+func (wakePanicProvider) Status(context.Context, string) (*compute.AppDeployment, error) {
+	panic("Status: not expected")
+}
+func (wakePanicProvider) Logs(context.Context, string, bool) (io.ReadCloser, error) {
+	panic("Logs: not expected")
+}
+func (wakePanicProvider) Teardown(context.Context, string) error { panic("Teardown: not expected") }
+func (wakePanicProvider) Redeploy(context.Context, string, []byte, map[string]string) (*compute.AppDeployment, error) {
+	panic("Redeploy: not expected")
+}
+func (wakePanicProvider) UpdateAccessControl(context.Context, string, bool, []string) error {
+	panic("UpdateAccessControl: not expected")
+}
+func (wakePanicProvider) Scale(context.Context, string, int32) error {
+	panic("Scale: not expected when scale-to-zero flag is OFF")
+}
+
+// TestWake_FlagOff_Returns501Inert proves the wake endpoint is fully inert when
+// DEPLOY_SCALE_TO_ZERO_ENABLED is off: 501 response, and the (panicking)
+// compute provider is never touched.
+func TestWake_FlagOff_Returns501Inert(t *testing.T) {
+	h := &DeployHandler{
+		cfg:     &config.Config{DeployScaleToZeroEnabled: false},
+		compute: wakePanicProvider{},
+	}
+	// Mirror the production fiber ErrorHandler so respondError's
+	// ErrResponseWritten sentinel isn't turned into a 500 by the default handler.
+	app := fiber.New(fiber.Config{
+		ErrorHandler: func(_ *fiber.Ctx, err error) error {
+			if err == ErrResponseWritten {
+				return nil
+			}
+			return err
+		},
+	})
+	app.Post("/deploy/:id/wake", h.Wake)
+
+	req := httptest.NewRequest(http.MethodPost, "/deploy/app-123/wake", nil)
+	resp, err := app.Test(req, 1000)
+	if err != nil {
+		t.Fatalf("app.Test: %v", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusNotImplemented {
+		t.Fatalf("flag-off wake status = %d, want 501", resp.StatusCode)
+	}
+	body, _ := io.ReadAll(resp.Body)
+	if !strings.Contains(string(body), "scale_to_zero_disabled") {
+		t.Errorf("flag-off body = %q; want scale_to_zero_disabled error code", string(body))
+	}
+}
@@ -586,6 +586,21 @@ const openAPISpec = `{
         }
       }
     },
+    "/deploy/{id}/wake": {
+      "post": {
+        "summary": "Wake a scaled-to-zero (sleeping) deployment",
+        "description": "Scale-to-zero (Task #54). Scales an idle, descheduled app back to one replica and clears its sleeping state. The app becomes reachable once its pod is Ready (a one-time cold start — a request that races the wake gets the ingress's upstream-down response until the pod is up). Idempotent: waking an already-awake app just refreshes its last-activity marker so the idle-scaler won't immediately re-deschedule it. Returns 501 when scale-to-zero is not enabled on the platform (the default). Cross-tenant requests return 404.",
+        "security": [{ "bearerAuth": [] }],
+        "parameters": [{ "name": "id", "in": "path", "required": true, "schema": { "type": "string" }, "description": "Deployment id (UUID or short app_id slug)." }],
+        "responses": {
+          "200": { "description": "Deployment woken", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/DeployResponse" } } } },
+          "401": { "description": "Unauthorized" },
+          "404": { "description": "Not found (or owned by another team)" },
+          "501": { "description": "scale_to_zero_disabled — scale-to-zero is not enabled on this platform (default)." },
+          "503": { "description": "wake_failed — transient failure scaling the app; retry." }
+        }
+      }
+    },
     "/api/v1/deployments/{id}/make-permanent": {
       "post": {
         "summary": "Opt a deployment out of the auto-24h TTL",
Original file line number	Diff line number	Diff line change
`@@ -56,6 +56,9 @@ func (m *mockProvider) Redeploy(_ context.Context, _ string, _ []byte, _ map[str`
`56`	`56`	`func (m *mockProvider) UpdateAccessControl(_ context.Context, _ string, _ bool, _ []string) error {`
`57`	`57`	`panic("mockProvider.UpdateAccessControl: not expected in this test")`
`58`	`58`	`}`
	`59`	`+func (m *mockProvider) Scale(_ context.Context, _ string, _ int32) error {`
	`60`	`+ panic("mockProvider.Scale: not expected in this test")`
	`61`	`+}`
`59`	`62`
`60`	`63`	`// mockBuildLogFetcher wraps mockProvider and adds FetchBuildLogs so the handler`
`61`	`64`	`// code can type-assert to compute.BuildLogFetcher.`
Original file line number	Diff line number	Diff line change
`@@ -64,6 +64,9 @@ func (covPanicProvider) Redeploy(context.Context, string, []byte, map[string]str`
`64`	`64`	`func (covPanicProvider) UpdateAccessControl(context.Context, string, bool, []string) error {`
`65`	`65`	`panic("covPanicProvider.UpdateAccessControl: not expected")`
`66`	`66`	`}`
	`67`	`+func (covPanicProvider) Scale(context.Context, string, int32) error {`
	`68`	`+ panic("covPanicProvider.Scale: not expected")`
	`69`	`+}`
`67`	`70`
`68`	`71`	`// covFailProvider's Deploy/Redeploy return a configurable error. It does NOT`
`69`	`72`	`// implement BuildLogFetcher, so fetchBuildLogsForAutopsy returns nil`
Original file line number	Diff line number	Diff line change
`@@ -75,6 +75,9 @@ func (f *fakeTeardownProvider) Redeploy(context.Context, string, []byte, map[str`
`75`	`75`	`func (f *fakeTeardownProvider) UpdateAccessControl(context.Context, string, bool, []string) error {`
`76`	`76`	`return nil`
`77`	`77`	`}`
	`78`	`+func (f *fakeTeardownProvider) Scale(context.Context, string, int32) error {`
	`79`	`+ return nil`
	`80`	`+}`
`78`	`81`
`79`	`82`	`func reconcilerRequireDB(t *testing.T) {`
`80`	`83`	`t.Helper()`