|
| 1 | +package handlers |
| 2 | + |
| 3 | +// deploy_wake.go — explicit wake path for scale-to-zero (Task #54). |
| 4 | +// |
| 5 | +// WHY AN EXPLICIT WAKE (v1 design decision) |
| 6 | +// |
| 7 | +// instanode.dev serves a deployed app via a k8s Ingress on |
| 8 | +// *.deployment.instanode.dev that routes straight to the per-app Service in |
| 9 | +// the instant-deploy-<appID> namespace. The api process is NOT in the request |
| 10 | +// path. Transparent wake-on-request (a request to a sleeping app |
| 11 | +// auto-scales it and holds the connection until ready) therefore requires an |
| 12 | +// ACTIVATOR proxy in front of every app — KEDA http-add-on or a Knative-style |
| 13 | +// activator. That is a significant new dependency and is explicitly out of |
| 14 | +// scope for the scale-to-zero v1. |
| 15 | +// |
| 16 | +// v1 ships scale-DOWN (worker idle-scaler) + this fast EXPLICIT wake: |
| 17 | +// |
| 18 | +// POST /deploy/:id/wake → scales the app back to replicas=1 and returns once |
| 19 | +// the scale patch is accepted by k8s. The pod still needs its normal startup |
| 20 | +// time before it serves traffic, so a request that races the wake gets the |
| 21 | +// app's own cold-start latency (a brief 502/503 from the ingress until the |
| 22 | +// pod is Ready), exactly as a fresh rollout would. Callers/dashboard/agents |
| 23 | +// surface "sleeping — wake" and retry the app URL after waking. |
| 24 | +// |
| 25 | +// COLD-START CONTRACT (documented v1 limitation) |
| 26 | +// |
| 27 | +// - While scaled_to_zero, the app URL returns the ingress's upstream-down |
| 28 | +// response (502/503) because there is no pod. This is the documented v1 |
| 29 | +// trade-off of explicit wake vs a transparent activator. |
| 30 | +// - POST /deploy/:id/wake is idempotent: waking an already-awake app just |
| 31 | +// refreshes last_activity_at (so it won't be re-descheduled immediately). |
| 32 | +// - The endpoint is gated by DEPLOY_SCALE_TO_ZERO_ENABLED. With the flag OFF |
| 33 | +// it returns 501 and performs NO scaling and NO DB writes (flag-off inert). |
| 34 | + |
| 35 | +import ( |
| 36 | + "errors" |
| 37 | + "log/slog" |
| 38 | + |
| 39 | + "github.com/gofiber/fiber/v2" |
| 40 | + |
| 41 | + "instant.dev/internal/middleware" |
| 42 | + "instant.dev/internal/models" |
| 43 | +) |
| 44 | + |
| 45 | +// Wake handles POST /deploy/:id/wake. It scales a (possibly scaled-to-zero) |
| 46 | +// deployment back to replicas=1 and clears the scaled_to_zero flag, returning |
| 47 | +// the refreshed deployment. See the file header for the cold-start contract. |
| 48 | +func (h *DeployHandler) Wake(c *fiber.Ctx) error { |
| 49 | + if !h.cfg.DeployScaleToZeroEnabled { |
| 50 | + // Flag OFF → fully inert: no scale call, no DB write. |
| 51 | + return respondError(c, fiber.StatusNotImplemented, "scale_to_zero_disabled", |
| 52 | + "Scale-to-zero is not enabled on this platform") |
| 53 | + } |
| 54 | + |
| 55 | + team, err := h.requireTeam(c) |
| 56 | + if err != nil { |
| 57 | + return err |
| 58 | + } |
| 59 | + |
| 60 | + appID := c.Params("id") |
| 61 | + d, err := models.GetDeploymentByAppID(c.Context(), h.db, appID) |
| 62 | + if err != nil { |
| 63 | + var notFound *models.ErrDeploymentNotFound |
| 64 | + if errors.As(err, ¬Found) { |
| 65 | + return respondError(c, fiber.StatusNotFound, "not_found", "Deployment not found") |
| 66 | + } |
| 67 | + return respondError(c, fiber.StatusServiceUnavailable, "fetch_failed", "Failed to fetch deployment") |
| 68 | + } |
| 69 | + |
| 70 | + if d.TeamID != team.ID { |
| 71 | + // 404 not 403: never confirm the existence of another team's deployment. |
| 72 | + return respondError(c, fiber.StatusNotFound, "not_found", "Deployment not found") |
| 73 | + } |
| 74 | + |
| 75 | + // Scale the k8s Deployment back to 1 replica. A NotFound Deployment is a |
| 76 | + // no-op inside compute.Scale (the row may have been torn down), so this only |
| 77 | + // errors on a real k8s transport failure — surface it so the caller retries. |
| 78 | + if d.ProviderID != "" { |
| 79 | + if scaleErr := h.compute.Scale(c.Context(), appID, 1); scaleErr != nil { |
| 80 | + slog.Warn("deploy.wake.scale_failed", |
| 81 | + "app_id", appID, "provider_id", d.ProviderID, "error", scaleErr, |
| 82 | + "request_id", middleware.GetRequestID(c)) |
| 83 | + return respondError(c, fiber.StatusServiceUnavailable, "wake_failed", |
| 84 | + "Failed to wake deployment; please retry") |
| 85 | + } |
| 86 | + } |
| 87 | + |
| 88 | + // DB half: clear scaled_to_zero + bump last_activity_at so the idle-scaler |
| 89 | + // doesn't immediately re-deschedule the just-woken app. |
| 90 | + if _, dbErr := models.WakeDeployment(c.Context(), h.db, d.ID); dbErr != nil { |
| 91 | + slog.Error("deploy.wake.db_failed", |
| 92 | + "app_id", appID, "error", dbErr, |
| 93 | + "request_id", middleware.GetRequestID(c)) |
| 94 | + return respondError(c, fiber.StatusServiceUnavailable, "wake_failed", |
| 95 | + "Failed to record wake; please retry") |
| 96 | + } |
| 97 | + |
| 98 | + // Re-read so the response reflects the cleared flag + new activity stamp. |
| 99 | + fresh, err := models.GetDeploymentByID(c.Context(), h.db, d.ID) |
| 100 | + if err != nil { |
| 101 | + // The scale + DB write already succeeded; a re-read failure shouldn't |
| 102 | + // fail the wake. Fall back to the pre-read row with the fields we just set. |
| 103 | + d.ScaledToZero = false |
| 104 | + fresh = d |
| 105 | + } |
| 106 | + |
| 107 | + slog.Info("deploy.woke", |
| 108 | + "app_id", appID, "team_id", team.ID, |
| 109 | + "request_id", middleware.GetRequestID(c)) |
| 110 | + |
| 111 | + return c.JSON(fiber.Map{ |
| 112 | + "ok": true, |
| 113 | + "message": "Deployment woken — the app will be reachable once its pod is Ready (cold start).", |
| 114 | + "deployment": deploymentToMapWithDB(fresh, h.db), |
| 115 | + }) |
| 116 | +} |
0 commit comments