Skip to content

Commit e549271

Browse files
authored
Add gateway replica statuses and pipeline (#3990)
- Introduce gateway replica statuses. - Provision and terminate gateway replicas independently from each other, in a separate pipeline. In this version, the pipelines have the following responsibilities. Gateway pipeline: - `SUBMITTED` — create replica DB records, transition to `PROVISIONING`; in a future version — create the load balancer (e.g., AWS ALB). - `PROVISIONING` — once all replicas reach `RUNNING`, transition to `RUNNING`; if any replica enters `TERMINATING` or `TERMINATED`, transition to `FAILED`. - `RUNNING`, `FAILED` — delete the gateway if deletion requested and all replicas are `TERMINATED`. Gateway replica pipeline: - `SUBMITTED` — call backend to create the cloud instance, transition to `PROVISIONING` on success, or `TERMINATED` on failure. - `PROVISIONING` — SSH-connect to the instance and configure the gateway, transition to `RUNNING` on success, or `TERMINATING` on failure. - `RUNNING` — nothing to do. - `TERMINATING` — call backend to destroy the cloud instance, transition to `TERMINATED`. - `TERMINATED` — nothing to do. - also `SUBMITTED`, `PROVISIONING`, or `RUNNING` — transition to `TERMINATING` or `TERMINATED` if the gateway is `FAILED` or gateway deletion is requested.
1 parent 28ea5f8 commit e549271

18 files changed

Lines changed: 2263 additions & 572 deletions

File tree

mkdocs/docs/concepts/gateways.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ The example-gateway doesn't exist. Create it? [y/n]: y
4343
Provisioning...
4444
---> 100%
4545
46-
BACKEND REGION NAME HOSTNAME DOMAIN DEFAULT STATUS
47-
aws eu-west-1 example-gateway example.com ✓ submitted
46+
NAME BACKEND HOSTNAME DOMAIN DEFAULT STATUS
47+
example-gateway aws (eu-west-1) 34.244.128.46 example.com ✓ running
4848
```
4949

5050
</div>
@@ -211,8 +211,8 @@ To balance requests between gateway replicas, add DNS records for each replica o
211211
$ dstack gateway list
212212
NAME BACKEND HOSTNAME DOMAIN DEFAULT STATUS
213213
example-gateway example.com ✓ running
214-
replica=0 aws (eu-west-1) 34.244.128.46
215-
replica=1 aws (eu-west-1) 18.201.201.174
214+
replica=0 aws (eu-west-1) 34.244.128.46 running
215+
replica=1 aws (eu-west-1) 18.201.201.174 running
216216
```
217217

218218
</div>

src/dstack/_internal/cli/utils/gateway.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,10 @@ def get_gateways_table(
118118
gateway.replicas[0].backend, gateway.replicas[0].region
119119
)
120120
gateway_row["HOSTNAME"] = gateway_row.get("HOSTNAME", gateway.replicas[0].hostname)
121+
gateway_row["STATUS"] = gateway.replicas[0].status or gateway.status
122+
gateway_row["ERROR"] = ". ".join(
123+
m for m in [gateway.status_message, gateway.replicas[0].status_message] if m
124+
)
121125
add_row_from_dict(table, gateway_row)
122126

123127
if len(gateway.replicas) > 1:
@@ -126,7 +130,9 @@ def get_gateways_table(
126130
"NAME": f" replica={replica.replica_num}",
127131
"BACKEND": format_backend(replica.backend, replica.region),
128132
"HOSTNAME": replica.hostname,
133+
"STATUS": replica.status,
129134
"CREATED": format_date(replica.created_at),
135+
"ERROR": replica.status_message,
130136
}
131137
add_row_from_dict(table, replica_row, style="secondary")
132138

src/dstack/_internal/core/models/gateways.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,14 @@ class GatewayStatus(str, Enum):
2121
FAILED = "failed"
2222

2323

24+
class GatewayReplicaStatus(str, Enum):
25+
SUBMITTED = "submitted"
26+
PROVISIONING = "provisioning"
27+
RUNNING = "running"
28+
TERMINATING = "terminating"
29+
TERMINATED = "terminated"
30+
31+
2432
class LetsEncryptGatewayCertificate(CoreModel):
2533
type: Annotated[
2634
Literal["lets-encrypt"], Field(description="Automatic certificates by Let's Encrypt")
@@ -119,11 +127,14 @@ class GatewaySpec(CoreModel):
119127

120128

121129
class GatewayReplica(CoreModel):
122-
hostname: str
130+
hostname: Optional[str] = None
123131
replica_num: int
124-
backend: BackendType
125-
region: str
132+
backend: Optional[BackendType] = None
133+
region: Optional[str] = None
126134
created_at: datetime.datetime
135+
status: Optional[GatewayReplicaStatus] = None
136+
"""`status` is only optional on the client side for compatibility with 0.20.25 and 0.20.26 servers"""
137+
status_message: Optional[str] = None
127138

128139

129140
class Gateway(CoreModel):

src/dstack/_internal/server/background/pipeline_tasks/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
from dstack._internal.server.background.pipeline_tasks.base import Pipeline
44
from dstack._internal.server.background.pipeline_tasks.compute_groups import ComputeGroupPipeline
55
from dstack._internal.server.background.pipeline_tasks.fleets import FleetPipeline
6+
from dstack._internal.server.background.pipeline_tasks.gateway_replicas import (
7+
GatewayReplicaPipeline,
8+
)
69
from dstack._internal.server.background.pipeline_tasks.gateways import GatewayPipeline
710
from dstack._internal.server.background.pipeline_tasks.instances import InstancePipeline
811
from dstack._internal.server.background.pipeline_tasks.jobs_running import JobRunningPipeline
@@ -33,6 +36,7 @@ def __init__(self) -> None:
3336
ComputeGroupPipeline(pipeline_hinter=self._hinter),
3437
FleetPipeline(pipeline_hinter=self._hinter),
3538
GatewayPipeline(pipeline_hinter=self._hinter),
39+
GatewayReplicaPipeline(pipeline_hinter=self._hinter),
3640
JobSubmittedPipeline(pipeline_hinter=self._hinter),
3741
JobRunningPipeline(pipeline_hinter=self._hinter),
3842
JobTerminatingPipeline(pipeline_hinter=self._hinter),

0 commit comments

Comments
 (0)