4747 Resources ,
4848 SSHConnectionParams ,
4949)
50- from dstack ._internal .core .models .resources import CPUSpec
50+ from dstack ._internal .core .models .resources import CPUSpec , Memory
5151from dstack ._internal .core .models .runs import Job , JobProvisioningData , Requirements , Run
5252from dstack ._internal .core .models .volumes import Volume
5353from dstack ._internal .utils .common import parse_memory
@@ -171,11 +171,15 @@ def run_job(
171171 "jump_pod_port" : jump_pod_port ,
172172 },
173173 ).start ()
174- resources_spec = job .job_spec .requirements .resources
175- assert isinstance (resources_spec .cpu , CPUSpec )
174+
176175 resources_requests : dict [str , str ] = {}
177176 resources_limits : dict [str , str ] = {}
178177 node_affinity : Optional [client .V1NodeAffinity ] = None
178+ volumes_ : list [client .V1Volume ] = []
179+ volume_mounts : list [client .V1VolumeMount ] = []
180+
181+ resources_spec = job .job_spec .requirements .resources
182+ assert isinstance (resources_spec .cpu , CPUSpec )
179183 if (cpu_min := resources_spec .cpu .count .min ) is not None :
180184 resources_requests ["cpu" ] = str (cpu_min )
181185 if (gpu_spec := resources_spec .gpu ) is not None :
@@ -231,13 +235,32 @@ def run_job(
231235 ),
232236 ],
233237 )
238+
234239 if (memory_min := resources_spec .memory .min ) is not None :
235- resources_requests ["memory" ] = f" { float (memory_min )} Gi"
240+ resources_requests ["memory" ] = _render_memory (memory_min )
236241 if (
237242 resources_spec .disk is not None
238243 and (disk_min := resources_spec .disk .size .min ) is not None
239244 ):
240- resources_requests ["ephemeral-storage" ] = f"{ float (disk_min )} Gi"
245+ resources_requests ["ephemeral-storage" ] = _render_memory (disk_min )
246+ if (shm_size := resources_spec .shm_size ) is not None :
247+ shm_volume_name = "dev-shm"
248+ volumes_ .append (
249+ client .V1Volume (
250+ name = shm_volume_name ,
251+ empty_dir = client .V1EmptyDirVolumeSource (
252+ medium = "Memory" ,
253+ size_limit = _render_memory (shm_size ),
254+ ),
255+ )
256+ )
257+ volume_mounts .append (
258+ client .V1VolumeMount (
259+ name = shm_volume_name ,
260+ mount_path = "/dev/shm" ,
261+ )
262+ )
263+
241264 pod = client .V1Pod (
242265 metadata = client .V1ObjectMeta (
243266 name = instance_name ,
@@ -264,9 +287,11 @@ def run_job(
264287 requests = resources_requests ,
265288 limits = resources_limits ,
266289 ),
290+ volume_mounts = volume_mounts ,
267291 )
268292 ],
269293 affinity = node_affinity ,
294+ volumes = volumes_ ,
270295 ),
271296 )
272297 call_api_method (
@@ -452,6 +477,10 @@ def _parse_memory(memory: str) -> int:
452477 return int (parse_memory (memory , as_untis = "M" ))
453478
454479
480+ def _render_memory (memory : Memory ) -> str :
481+ return f"{ float (memory )} Gi"
482+
483+
455484def _get_gpus_from_node_labels (labels : dict [str , str ]) -> tuple [list [Gpu ], Optional [str ]]:
456485 # We rely on https://github.com/NVIDIA/k8s-device-plugin/tree/main/docs/gpu-feature-discovery
457486 # to detect gpus. Note that "nvidia.com/gpu.product" is not a short gpu name like "T4" or
0 commit comments