Skip to content

Commit da5c0cc

Browse files
authored
Rolling deployments for port (#2893)
Allow changing the `port` service configuration property using rolling deployments. Implemented by moving `port` to the job spec.
1 parent c7143e4 commit da5c0cc

File tree

12 files changed

+54
-10
lines changed

12 files changed

+54
-10
lines changed

docs/docs/concepts/services.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -725,7 +725,7 @@ The rolling deployment stops when all replicas are updated or when a new deploym
725725
??? info "Supported properties"
726726
<!-- NOTE: should be in sync with constants in server/services/runs.py -->
727727

728-
Rolling deployment supports changes to the following properties: `resources`, `volumes`, `docker`, `files`, `image`, `user`, `privileged`, `entrypoint`, `working_dir`, `python`, `nvcc`, `single_branch`, `env`, `shell`, `commands`, as well as changes to [repo](repos.md) or [file](#files) contents.
728+
Rolling deployment supports changes to the following properties: `port`, `resources`, `volumes`, `docker`, `files`, `image`, `user`, `privileged`, `entrypoint`, `working_dir`, `python`, `nvcc`, `single_branch`, `env`, `shell`, `commands`, as well as changes to [repo](repos.md) or [file](#files) contents.
729729

730730
Changes to `replicas` and `scaling` can be applied without redeploying replicas.
731731

src/dstack/_internal/core/compatibility/runs.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,8 @@ def get_job_spec_excludes(job_specs: list[JobSpec]) -> IncludeExcludeDictType:
148148
spec_excludes["repo_data"] = True
149149
if all(not s.file_archives for s in job_specs):
150150
spec_excludes["file_archives"] = True
151+
if all(s.service_port is None for s in job_specs):
152+
spec_excludes["service_port"] = True
151153

152154
return spec_excludes
153155

src/dstack/_internal/core/models/configurations.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,8 +398,9 @@ class TaskConfiguration(
398398

399399
class ServiceConfigurationParams(CoreModel):
400400
port: Annotated[
401+
# NOTE: it's a PortMapping for historical reasons. Only `port.container_port` is used.
401402
Union[ValidPort, constr(regex=r"^[0-9]+:[0-9]+$"), PortMapping],
402-
Field(description="The port, that application listens on or the mapping"),
403+
Field(description="The port the application listens on"),
403404
]
404405
gateway: Annotated[
405406
Optional[Union[bool, str]],

src/dstack/_internal/core/models/runs.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
DEFAULT_REPO_DIR,
1212
AnyRunConfiguration,
1313
RunConfiguration,
14+
ServiceConfiguration,
1415
)
1516
from dstack._internal.core.models.files import FileArchiveMapping
1617
from dstack._internal.core.models.instances import (
@@ -253,6 +254,8 @@ class JobSpec(CoreModel):
253254
# TODO: drop this comment when supporting jobs submitted before 0.19.17 is no longer relevant.
254255
repo_code_hash: Optional[str] = None
255256
file_archives: list[FileArchiveMapping] = []
257+
# None for non-services and pre-0.19.19 services. See `get_service_port`
258+
service_port: Optional[int] = None
256259

257260

258261
class JobProvisioningData(CoreModel):
@@ -550,3 +553,11 @@ def get_policy_map(spot_policy: Optional[SpotPolicy], default: SpotPolicy) -> Op
550553
SpotPolicy.ONDEMAND: False,
551554
}
552555
return policy_map[spot_policy]
556+
557+
558+
def get_service_port(job_spec: JobSpec, configuration: ServiceConfiguration) -> int:
559+
# Compatibility with pre-0.19.19 job specs that do not have the `service_port` property.
560+
# TODO: drop when pre-0.19.19 jobs are no longer relevant.
561+
if job_spec.service_port is None:
562+
return configuration.port.container_port
563+
return job_spec.service_port

src/dstack/_internal/core/services/ssh/attach.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def __init__(
6464
run_name: str,
6565
dockerized: bool,
6666
ssh_proxy: Optional[SSHConnectionParams] = None,
67+
service_port: Optional[int] = None,
6768
local_backend: bool = False,
6869
bind_address: Optional[str] = None,
6970
):
@@ -90,6 +91,7 @@ def __init__(
9091
},
9192
)
9293
self.ssh_proxy = ssh_proxy
94+
self.service_port = service_port
9395

9496
hosts: dict[str, dict[str, Union[str, int, FilePath]]] = {}
9597
self.hosts = hosts

src/dstack/_internal/server/services/gateways/client.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77

88
from dstack._internal.core.consts import DSTACK_RUNNER_SSH_PORT
99
from dstack._internal.core.errors import GatewayError
10-
from dstack._internal.core.models.configurations import RateLimit
10+
from dstack._internal.core.models.configurations import RateLimit, ServiceConfiguration
1111
from dstack._internal.core.models.instances import SSHConnectionParams
12-
from dstack._internal.core.models.runs import JobSubmission, Run
12+
from dstack._internal.core.models.runs import JobSpec, JobSubmission, Run, get_service_port
1313
from dstack._internal.proxy.gateway.schemas.stats import ServiceStats
1414
from dstack._internal.server import settings
1515

@@ -80,13 +80,15 @@ async def unregister_service(self, project: str, run_name: str):
8080
async def register_replica(
8181
self,
8282
run: Run,
83+
job_spec: JobSpec,
8384
job_submission: JobSubmission,
8485
ssh_head_proxy: Optional[SSHConnectionParams],
8586
ssh_head_proxy_private_key: Optional[str],
8687
):
88+
assert isinstance(run.run_spec.configuration, ServiceConfiguration)
8789
payload = {
8890
"job_id": job_submission.id.hex,
89-
"app_port": run.run_spec.configuration.port.container_port,
91+
"app_port": get_service_port(job_spec, run.run_spec.configuration),
9092
"ssh_head_proxy": ssh_head_proxy.dict() if ssh_head_proxy is not None else None,
9193
"ssh_head_proxy_private_key": ssh_head_proxy_private_key,
9294
}

src/dstack/_internal/server/services/jobs/configurators/base.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
PortMapping,
1616
PythonVersion,
1717
RunConfigurationType,
18+
ServiceConfiguration,
1819
)
1920
from dstack._internal.core.models.profiles import (
2021
DEFAULT_STOP_DURATION,
@@ -153,6 +154,7 @@ async def _get_job_spec(
153154
repo_data=self.run_spec.repo_data,
154155
repo_code_hash=self.run_spec.repo_code_hash,
155156
file_archives=self.run_spec.file_archives,
157+
service_port=self._service_port(),
156158
)
157159
return job_spec
158160

@@ -306,6 +308,11 @@ def _ssh_key(self, jobs_per_replica: int) -> Optional[JobSSHKey]:
306308
)
307309
return self._job_ssh_key
308310

311+
def _service_port(self) -> Optional[int]:
312+
if isinstance(self.run_spec.configuration, ServiceConfiguration):
313+
return self.run_spec.configuration.port.container_port
314+
return None
315+
309316

310317
def interpolate_job_volumes(
311318
run_volumes: List[Union[MountPoint, str]],

src/dstack/_internal/server/services/proxy/repo.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@
1212
from dstack._internal.core.models.instances import RemoteConnectionInfo, SSHConnectionParams
1313
from dstack._internal.core.models.runs import (
1414
JobProvisioningData,
15+
JobSpec,
1516
JobStatus,
1617
RunSpec,
1718
RunStatus,
1819
ServiceSpec,
20+
get_service_port,
1921
)
2022
from dstack._internal.core.models.services import AnyModel
2123
from dstack._internal.proxy.lib.models import (
@@ -97,9 +99,10 @@ async def get_service(self, project_name: str, run_name: str) -> Optional[Servic
9799
if rci.ssh_proxy is not None:
98100
ssh_head_proxy = rci.ssh_proxy
99101
ssh_head_proxy_private_key = get_or_error(rci.ssh_proxy_keys)[0].private
102+
job_spec: JobSpec = JobSpec.__response__.parse_raw(job.job_spec_data)
100103
replica = Replica(
101104
id=job.id.hex,
102-
app_port=run_spec.configuration.port.container_port,
105+
app_port=get_service_port(job_spec, run_spec.configuration),
103106
ssh_destination=ssh_destination,
104107
ssh_port=ssh_port,
105108
ssh_proxy=ssh_proxy,

src/dstack/_internal/server/services/runs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -996,6 +996,7 @@ def _validate_run_spec_and_set_defaults(run_spec: RunSpec):
996996
"scaling",
997997
# rolling deployment
998998
# NOTE: keep this list in sync with the "Rolling deployment" section in services.md
999+
"port",
9991000
"resources",
10001001
"volumes",
10011002
"docker",

src/dstack/_internal/server/services/services/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from dstack._internal.core.models.configurations import SERVICE_HTTPS_DEFAULT, ServiceConfiguration
2323
from dstack._internal.core.models.gateways import GatewayConfiguration, GatewayStatus
2424
from dstack._internal.core.models.instances import SSHConnectionParams
25-
from dstack._internal.core.models.runs import Run, RunSpec, ServiceModelSpec, ServiceSpec
25+
from dstack._internal.core.models.runs import JobSpec, Run, RunSpec, ServiceModelSpec, ServiceSpec
2626
from dstack._internal.server import settings
2727
from dstack._internal.server.models import GatewayModel, JobModel, ProjectModel, RunModel
2828
from dstack._internal.server.services.gateways import (
@@ -179,6 +179,7 @@ async def register_replica(
179179
async with conn.client() as client:
180180
await client.register_replica(
181181
run=run,
182+
job_spec=JobSpec.__response__.parse_raw(job_model.job_spec_data),
182183
job_submission=job_submission,
183184
ssh_head_proxy=ssh_head_proxy,
184185
ssh_head_proxy_private_key=ssh_head_proxy_private_key,

0 commit comments

Comments
 (0)