Skip to content

Commit 423eba6

Browse files
committed
Default Docker registry and credentials
Add the following server environment variables for configuring the default Docker registry and the default Docker registry credentials on the `dstack` server level: - `DSTACK_SERVER_DEFAULT_DOCKER_REGISTRY` – A default Docker registry to use for job images that do not specify an explicit registry. E.g., if set to `registry.example`, then `image: ubuntu` becomes equivalent to `image: registry.example/ubuntu`. **Note**: This setting should only be used for configuring registries that act as a pull-through cache for Docker Hub. The default `dstack` images are also pulled from the configured registry. - `DSTACK_SERVER_DEFAULT_DOCKER_REGISTRY_USERNAME`{ #DSTACK_SERVER_DEFAULT_DOCKER_REGISTRY_USERNAME } – Username for authenticating with the default Docker registry. See `DSTACK_SERVER_DEFAULT_DOCKER_REGISTRY_PASSWORD`. - `DSTACK_SERVER_DEFAULT_DOCKER_REGISTRY_PASSWORD`{ #DSTACK_SERVER_DEFAULT_DOCKER_REGISTRY_PASSWORD } – Password for authenticating with the default Docker registry. Applied only when the image has no explicit registry and the run configuration does not specify `registry_auth`. **Note**: The value may be visible to anyone who can SSH into instances managed by `dstack`, which usually includes all users of that `dstack` server.
1 parent 869754b commit 423eba6

File tree

16 files changed

+594
-45
lines changed

16 files changed

+594
-45
lines changed

docs/docs/reference/environment-variables.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,9 @@ For more details on the options below, refer to the [server deployment](../guide
139139
- `DSTACK_SERVER_INSTANCE_HEALTH_TTL_SECONDS`{ #DSTACK_SERVER_INSTANCE_HEALTH_TTL_SECONDS } – Maximum age of instance health checks.
140140
- `DSTACK_SERVER_INSTANCE_HEALTH_MIN_COLLECT_INTERVAL_SECONDS`{ #DSTACK_SERVER_INSTANCE_HEALTH_MIN_COLLECT_INTERVAL_SECONDS } – Minimum time interval between consecutive health checks of the same instance.
141141
- `DSTACK_SERVER_EVENTS_TTL_SECONDS`{ #DSTACK_SERVER_EVENTS_TTL_SECONDS } - Maximum age of event records. Set to `0` to disable event storage. Defaults to 30 days.
142+
- `DSTACK_SERVER_DEFAULT_DOCKER_REGISTRY`{ #DSTACK_SERVER_DEFAULT_DOCKER_REGISTRY } – A default Docker registry to use for job images that do not specify an explicit registry. E.g., if set to `registry.example`, then `image: ubuntu` becomes equivalent to `image: registry.example/ubuntu`. **Note**: This setting should only be used for configuring registries that act as a pull-through cache for Docker Hub. The default `dstack` images are also pulled from the configured registry.
143+
- `DSTACK_SERVER_DEFAULT_DOCKER_REGISTRY_USERNAME`{ #DSTACK_SERVER_DEFAULT_DOCKER_REGISTRY_USERNAME } – Username for authenticating with the default Docker registry. See `DSTACK_SERVER_DEFAULT_DOCKER_REGISTRY_PASSWORD`.
144+
- `DSTACK_SERVER_DEFAULT_DOCKER_REGISTRY_PASSWORD`{ #DSTACK_SERVER_DEFAULT_DOCKER_REGISTRY_PASSWORD } – Password for authenticating with the default Docker registry. Applied only when the image has no explicit registry and the run configuration does not specify `registry_auth`. **Note**: The value may be visible to anyone who can SSH into instances managed by `dstack`, which usually includes all users of that `dstack` server.
142145

143146
??? info "Internal environment variables"
144147
The following environment variables are intended for development purposes:

src/dstack/_internal/server/background/pipeline_tasks/jobs_running.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@
6767
from dstack._internal.server.services.backends.provisioning import (
6868
get_instance_specific_gpu_devices,
6969
get_instance_specific_mounts,
70-
resolve_provisioning_image_name,
70+
resolve_provisioning_image,
7171
)
7272
from dstack._internal.server.services.gateways import get_or_add_gateway_connection
7373
from dstack._internal.server.services.instances import (
@@ -1126,14 +1126,16 @@ def _process_provisioning_with_shim(
11261126
ssh_user: Optional[str],
11271127
ssh_key: Optional[str],
11281128
) -> bool:
1129-
job_spec = JobSpec.__response__.parse_raw(job_model.job_spec_data)
1129+
job_spec = get_job_spec(job_model)
11301130
shim_client = client.ShimClient(port=ports[DSTACK_SHIM_HTTP_PORT])
11311131

11321132
resp = shim_client.healthcheck()
11331133
if resp is None:
11341134
logger.debug("%s: shim is not available yet", fmt(job_model))
11351135
return False
11361136

1137+
image_name, registry_auth = resolve_provisioning_image(job_spec.image_name, registry_auth, jpd)
1138+
11371139
registry_username = ""
11381140
registry_password = ""
11391141
if registry_auth is not None:
@@ -1167,7 +1169,6 @@ def _process_provisioning_with_shim(
11671169
cpu = None
11681170
memory = None
11691171
network_mode = NetworkMode.HOST
1170-
image_name = resolve_provisioning_image_name(job_spec, jpd)
11711172
if shim_client.is_api_v2_supported():
11721173
shim_client.submit_task(
11731174
task_id=job_model.id,

src/dstack/_internal/server/background/pipeline_tasks/jobs_submitted.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import asyncio
2+
import copy
23
import uuid
34
from contextlib import AsyncExitStack
45
from dataclasses import dataclass
@@ -85,6 +86,7 @@
8586
)
8687
from dstack._internal.server.services import events
8788
from dstack._internal.server.services.backends import get_project_backend_by_type_or_error
89+
from dstack._internal.server.services.docker import apply_server_docker_defaults
8890
from dstack._internal.server.services.fleets import (
8991
check_can_create_new_cloud_instance_in_fleet,
9092
generate_fleet_name,
@@ -1863,6 +1865,11 @@ async def _provision_new_capacity(
18631865
volumes: Optional[list[list[Volume]]] = None,
18641866
fleet_model: Optional[FleetModel] = None,
18651867
) -> Union[_FailedNewCapacityProvisioning, _ProvisionNewCapacityResult]:
1868+
jobs = copy.deepcopy(jobs)
1869+
for job in jobs:
1870+
job.job_spec.image_name, job.job_spec.registry_auth = apply_server_docker_defaults(
1871+
job.job_spec.image_name, job.job_spec.registry_auth
1872+
)
18661873
job = jobs[0]
18671874
if volumes is None:
18681875
volumes = []

src/dstack/_internal/server/background/scheduled_tasks/running_jobs.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
from dstack._internal.server.services.backends.provisioning import (
5858
get_instance_specific_gpu_devices,
5959
get_instance_specific_mounts,
60-
resolve_provisioning_image_name,
60+
resolve_provisioning_image,
6161
)
6262
from dstack._internal.server.services.instances import (
6363
get_instance_remote_connection_info,
@@ -750,6 +750,8 @@ def _process_provisioning_with_shim(
750750
logger.debug("%s: shim is not available yet", fmt(job_model))
751751
return False # shim is not available yet
752752

753+
image_name, registry_auth = resolve_provisioning_image(job_spec.image_name, registry_auth, jpd)
754+
753755
registry_username = ""
754756
registry_password = ""
755757
if registry_auth is not None:
@@ -790,7 +792,6 @@ def _process_provisioning_with_shim(
790792
cpu = None
791793
memory = None
792794
network_mode = NetworkMode.HOST
793-
image_name = resolve_provisioning_image_name(job_spec, jpd)
794795
if shim_client.is_api_v2_supported():
795796
shim_client.submit_task(
796797
task_id=job_model.id,

src/dstack/_internal/server/background/scheduled_tasks/submitted_jobs.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import asyncio
2+
import copy
23
import itertools
34
import uuid
45
from contextlib import AsyncExitStack
@@ -81,6 +82,7 @@
8182
)
8283
from dstack._internal.server.services import events
8384
from dstack._internal.server.services.backends import get_project_backend_by_type_or_error
85+
from dstack._internal.server.services.docker import apply_server_docker_defaults
8486
from dstack._internal.server.services.fleets import (
8587
check_can_create_new_cloud_instance_in_fleet,
8688
generate_fleet_name,
@@ -1169,6 +1171,11 @@ async def _provision_new_capacity(
11691171
and run only the master job in case there are no offers supporting cluster groups.
11701172
Other jobs should be provisioned one-by-one later.
11711173
"""
1174+
jobs = copy.deepcopy(jobs)
1175+
for job in jobs:
1176+
job.job_spec.image_name, job.job_spec.registry_auth = apply_server_docker_defaults(
1177+
job.job_spec.image_name, job.job_spec.registry_auth
1178+
)
11721179
job = jobs[0]
11731180
if volumes is None:
11741181
volumes = []

src/dstack/_internal/server/services/backends/provisioning.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
import re
2+
from typing import Optional
23

34
from dstack._internal import settings
45
from dstack._internal.core.models.backends.base import BackendType
5-
from dstack._internal.core.models.runs import JobProvisioningData, JobSpec
6+
from dstack._internal.core.models.common import RegistryAuth
7+
from dstack._internal.core.models.runs import JobProvisioningData
68
from dstack._internal.core.models.volumes import InstanceMountPoint
79
from dstack._internal.server.schemas.runner import GPUDevice
10+
from dstack._internal.server.services.docker import apply_server_docker_defaults, parse_image_name
811

912
_AWS_EFA_ENABLED_INSTANCE_TYPE_PATTERNS = [
1013
# TODO: p6-b200 isn't supported yet in gpuhunt
@@ -87,17 +90,18 @@ def get_instance_specific_gpu_devices(
8790
return gpu_devices
8891

8992

90-
def resolve_provisioning_image_name(
91-
job_spec: JobSpec,
93+
def resolve_provisioning_image(
94+
image_name: str,
95+
registry_auth: Optional[RegistryAuth],
9296
job_provisioning_data: JobProvisioningData,
93-
) -> str:
94-
image_name = job_spec.image_name
97+
) -> tuple[str, Optional[RegistryAuth]]:
98+
image_name, registry_auth = apply_server_docker_defaults(image_name, registry_auth)
9599
if job_provisioning_data.backend == BackendType.AWS:
96-
return _patch_base_image_for_aws_efa(
100+
image_name = _patch_base_image_for_aws_efa(
97101
image_name,
98102
job_provisioning_data.instance_type.name,
99103
)
100-
return image_name
104+
return image_name, registry_auth
101105

102106

103107
def _patch_base_image_for_aws_efa(
@@ -111,7 +115,7 @@ def _patch_base_image_for_aws_efa(
111115
if not is_efa_enabled:
112116
return image_name
113117

114-
if not image_name.startswith(f"{settings.DSTACK_BASE_IMAGE}:"):
118+
if parse_image_name(image_name).repo != settings.DSTACK_BASE_IMAGE:
115119
return image_name
116120

117121
if image_name.endswith(f"-base-ubuntu{settings.DSTACK_BASE_IMAGE_UBUNTU_VERSION}"):

src/dstack/_internal/server/services/docker.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
FrozenCoreModel,
1515
RegistryAuth,
1616
)
17+
from dstack._internal.server import settings as server_settings
1718
from dstack._internal.server.utils.common import join_byte_stream_checked
1819

1920
DEFAULT_PLATFORM = "linux/amd64"
@@ -151,6 +152,26 @@ def is_host(s: str) -> bool:
151152
return s == "localhost" or ":" in s or "." in s
152153

153154

155+
def apply_server_docker_defaults(
156+
image_name: str,
157+
registry_auth: Optional[RegistryAuth],
158+
) -> tuple[str, Optional[RegistryAuth]]:
159+
if parse_image_name(image_name).registry is not None:
160+
return image_name, registry_auth
161+
if server_settings.SERVER_DEFAULT_DOCKER_REGISTRY is not None:
162+
image_name = f"{server_settings.SERVER_DEFAULT_DOCKER_REGISTRY}/{image_name}"
163+
if (
164+
registry_auth is None
165+
and server_settings.SERVER_DEFAULT_DOCKER_REGISTRY_USERNAME is not None
166+
and server_settings.SERVER_DEFAULT_DOCKER_REGISTRY_PASSWORD is not None
167+
):
168+
registry_auth = RegistryAuth(
169+
username=server_settings.SERVER_DEFAULT_DOCKER_REGISTRY_USERNAME,
170+
password=server_settings.SERVER_DEFAULT_DOCKER_REGISTRY_PASSWORD,
171+
)
172+
return image_name, registry_auth
173+
174+
154175
DOCKER_TARGET_PATH_PATTERN = re.compile(r"^(/[^/\0]*)+/?$")
155176

156177

src/dstack/_internal/server/services/jobs/configurators/base.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,11 @@
4848
from dstack._internal.core.models.volumes import MountPoint, VolumeMountPoint
4949
from dstack._internal.core.services.profiles import get_retry
5050
from dstack._internal.core.services.ssh.ports import filter_reserved_ports
51-
from dstack._internal.server.services.docker import ImageConfig, get_image_config
51+
from dstack._internal.server.services.docker import (
52+
ImageConfig,
53+
apply_server_docker_defaults,
54+
get_image_config,
55+
)
5256
from dstack._internal.utils import crypto
5357
from dstack._internal.utils.common import run_async
5458
from dstack._internal.utils.interpolator import InterpolatorError, VariablesInterpolator
@@ -77,7 +81,7 @@ def get_default_python_verison() -> str:
7781
def get_default_image(nvcc: bool = False) -> str:
7882
"""
7983
Note: May be overridden by dstack (e.g., EFA-enabled version for AWS EFA-capable instances).
80-
See `dstack._internal.server.services.backends.provisioning.resolve_provisioning_image_name`
84+
See `dstack._internal.server.services.backends.provisioning.resolve_provisioning_image`
8185
for details.
8286
8387
Args:
@@ -140,9 +144,10 @@ async def _get_image_config(self) -> ImageConfig:
140144
)
141145
except InterpolatorError as e:
142146
raise ServerClientError(e.args[0])
147+
image_name, registry_auth = apply_server_docker_defaults(self._image_name(), registry_auth)
143148
image_config = await run_async(
144149
_get_image_config,
145-
self._image_name(),
150+
image_name,
146151
registry_auth,
147152
)
148153
self._image_config = image_config

src/dstack/_internal/server/settings.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,14 @@
137137
os.getenv("DSTACK_DEFAULT_SERVICE_CLIENT_MAX_BODY_SIZE", 64 * 1024 * 1024)
138138
)
139139

140+
SERVER_DEFAULT_DOCKER_REGISTRY = os.getenv("DSTACK_SERVER_DEFAULT_DOCKER_REGISTRY") or None
141+
SERVER_DEFAULT_DOCKER_REGISTRY_USERNAME = (
142+
os.getenv("DSTACK_SERVER_DEFAULT_DOCKER_REGISTRY_USERNAME") or None
143+
)
144+
SERVER_DEFAULT_DOCKER_REGISTRY_PASSWORD = (
145+
os.getenv("DSTACK_SERVER_DEFAULT_DOCKER_REGISTRY_PASSWORD") or None
146+
)
147+
140148
USER_PROJECT_DEFAULT_QUOTA = int(os.getenv("DSTACK_USER_PROJECT_DEFAULT_QUOTA", 10))
141149
FORBID_SERVICES_WITHOUT_GATEWAY = os.getenv("DSTACK_FORBID_SERVICES_WITHOUT_GATEWAY") is not None
142150

src/tests/_internal/server/background/pipeline_tasks/test_running_jobs.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
DevEnvironmentConfiguration,
2121
ProbeConfig,
2222
ServiceConfiguration,
23+
TaskConfiguration,
2324
)
2425
from dstack._internal.core.models.gateways import GatewayStatus
2526
from dstack._internal.core.models.instances import InstanceStatus
@@ -2023,3 +2024,53 @@ async def invalidate_lock(*args, **kwargs):
20232024
.all()
20242025
)
20252026
assert probes == []
2027+
2028+
async def test_provisioning_shim_uses_server_default_registry(
2029+
self,
2030+
monkeypatch: pytest.MonkeyPatch,
2031+
test_db,
2032+
session: AsyncSession,
2033+
worker: JobRunningWorker,
2034+
ssh_tunnel_mock: Mock,
2035+
shim_client_mock: Mock,
2036+
):
2037+
monkeypatch.setattr(server_settings, "SERVER_DEFAULT_DOCKER_REGISTRY", "registry.example")
2038+
monkeypatch.setattr(
2039+
server_settings, "SERVER_DEFAULT_DOCKER_REGISTRY_USERNAME", "server-user"
2040+
)
2041+
monkeypatch.setattr(
2042+
server_settings, "SERVER_DEFAULT_DOCKER_REGISTRY_PASSWORD", "server-pass"
2043+
)
2044+
project = await create_project(session=session)
2045+
user = await create_user(session=session)
2046+
repo = await create_repo(session=session, project_id=project.id)
2047+
run_spec = get_run_spec(
2048+
repo_id=repo.name,
2049+
configuration=TaskConfiguration(image="ubuntu"),
2050+
)
2051+
run = await create_run(
2052+
session=session,
2053+
project=project,
2054+
repo=repo,
2055+
user=user,
2056+
run_spec=run_spec,
2057+
)
2058+
instance = await create_instance(
2059+
session=session, project=project, status=InstanceStatus.BUSY
2060+
)
2061+
job = await create_job(
2062+
session=session,
2063+
run=run,
2064+
status=JobStatus.PROVISIONING,
2065+
job_provisioning_data=get_job_provisioning_data(dockerized=True),
2066+
instance=instance,
2067+
instance_assigned=True,
2068+
)
2069+
2070+
await _process_job(session, worker, job)
2071+
2072+
shim_client_mock.submit_task.assert_called_once()
2073+
call_kwargs = shim_client_mock.submit_task.call_args[1]
2074+
assert call_kwargs["image_name"] == "registry.example/ubuntu"
2075+
assert call_kwargs["registry_username"] == "server-user"
2076+
assert call_kwargs["registry_password"] == "server-pass"

0 commit comments

Comments
 (0)