Skip to content

Commit daa3d03

Browse files
authored
Kubernetes: configure /dev/shm if requested (#3135)
Part-of: #3126
1 parent f90259b commit daa3d03

File tree

1 file changed

+34
-5
lines changed
  • src/dstack/_internal/core/backends/kubernetes

1 file changed

+34
-5
lines changed

src/dstack/_internal/core/backends/kubernetes/compute.py

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
Resources,
4848
SSHConnectionParams,
4949
)
50-
from dstack._internal.core.models.resources import CPUSpec
50+
from dstack._internal.core.models.resources import CPUSpec, Memory
5151
from dstack._internal.core.models.runs import Job, JobProvisioningData, Requirements, Run
5252
from dstack._internal.core.models.volumes import Volume
5353
from dstack._internal.utils.common import parse_memory
@@ -171,11 +171,15 @@ def run_job(
171171
"jump_pod_port": jump_pod_port,
172172
},
173173
).start()
174-
resources_spec = job.job_spec.requirements.resources
175-
assert isinstance(resources_spec.cpu, CPUSpec)
174+
176175
resources_requests: dict[str, str] = {}
177176
resources_limits: dict[str, str] = {}
178177
node_affinity: Optional[client.V1NodeAffinity] = None
178+
volumes_: list[client.V1Volume] = []
179+
volume_mounts: list[client.V1VolumeMount] = []
180+
181+
resources_spec = job.job_spec.requirements.resources
182+
assert isinstance(resources_spec.cpu, CPUSpec)
179183
if (cpu_min := resources_spec.cpu.count.min) is not None:
180184
resources_requests["cpu"] = str(cpu_min)
181185
if (gpu_spec := resources_spec.gpu) is not None:
@@ -231,13 +235,32 @@ def run_job(
231235
),
232236
],
233237
)
238+
234239
if (memory_min := resources_spec.memory.min) is not None:
235-
resources_requests["memory"] = f"{float(memory_min)}Gi"
240+
resources_requests["memory"] = _render_memory(memory_min)
236241
if (
237242
resources_spec.disk is not None
238243
and (disk_min := resources_spec.disk.size.min) is not None
239244
):
240-
resources_requests["ephemeral-storage"] = f"{float(disk_min)}Gi"
245+
resources_requests["ephemeral-storage"] = _render_memory(disk_min)
246+
if (shm_size := resources_spec.shm_size) is not None:
247+
shm_volume_name = "dev-shm"
248+
volumes_.append(
249+
client.V1Volume(
250+
name=shm_volume_name,
251+
empty_dir=client.V1EmptyDirVolumeSource(
252+
medium="Memory",
253+
size_limit=_render_memory(shm_size),
254+
),
255+
)
256+
)
257+
volume_mounts.append(
258+
client.V1VolumeMount(
259+
name=shm_volume_name,
260+
mount_path="/dev/shm",
261+
)
262+
)
263+
241264
pod = client.V1Pod(
242265
metadata=client.V1ObjectMeta(
243266
name=instance_name,
@@ -264,9 +287,11 @@ def run_job(
264287
requests=resources_requests,
265288
limits=resources_limits,
266289
),
290+
volume_mounts=volume_mounts,
267291
)
268292
],
269293
affinity=node_affinity,
294+
volumes=volumes_,
270295
),
271296
)
272297
call_api_method(
@@ -452,6 +477,10 @@ def _parse_memory(memory: str) -> int:
452477
return int(parse_memory(memory, as_untis="M"))
453478

454479

480+
def _render_memory(memory: Memory) -> str:
481+
return f"{float(memory)}Gi"
482+
483+
455484
def _get_gpus_from_node_labels(labels: dict[str, str]) -> tuple[list[Gpu], Optional[str]]:
456485
# We rely on https://github.com/NVIDIA/k8s-device-plugin/tree/main/docs/gpu-feature-discovery
457486
# to detect gpus. Note that "nvidia.com/gpu.product" is not a short gpu name like "T4" or

0 commit comments

Comments
 (0)