Skip to content
This repository was archived by the owner on Jan 27, 2026. It is now read-only.

Commit b92f7f7

Browse files
authored
imp(worker) container restart on failure (#510)
* improve worker container restart
1 parent e642879 commit b92f7f7

2 files changed

Lines changed: 11 additions & 1 deletion

File tree

crates/worker/src/docker/docker_manager.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ impl DockerManager {
224224
volumes: Option<Vec<(String, String, bool, bool)>>,
225225
shm_size: Option<u64>,
226226
entrypoint: Option<Vec<String>>,
227+
restart_policy_max_retries: Option<i64>,
227228
) -> Result<String, DockerError> {
228229
info!("Starting to pull image: {}", image);
229230

@@ -408,12 +409,20 @@ impl DockerManager {
408409
}]),
409410
binds: volume_binds,
410411
shm_size: shm_size.map(|s| s as i64),
412+
restart_policy: Some(bollard::models::RestartPolicy {
413+
name: Some(bollard::models::RestartPolicyNameEnum::ON_FAILURE),
414+
maximum_retry_count: restart_policy_max_retries,
415+
}),
411416
..Default::default()
412417
})
413418
} else {
414419
Some(HostConfig {
415420
extra_hosts: Some(vec!["host.docker.internal:host-gateway".into()]),
416421
binds: volume_binds,
422+
restart_policy: Some(bollard::models::RestartPolicy {
423+
name: Some(bollard::models::RestartPolicyNameEnum::ON_FAILURE),
424+
maximum_retry_count: restart_policy_max_retries,
425+
}),
417426
..Default::default()
418427
})
419428
};
@@ -459,6 +468,7 @@ impl DockerManager {
459468
Ok(container.id)
460469
}
461470

471+
/// Remove container, volumes, and directories
462472
pub async fn remove_container(&self, container_id: &str) -> Result<(), DockerError> {
463473
let container = (self.get_container_details(container_id).await).ok();
464474

crates/worker/src/docker/service.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ impl DockerService {
236236
67108864 // Default to 64MB in bytes
237237
}
238238
};
239-
match manager_clone.start_container(&payload.image, &container_task_id, Some(env_vars), Some(cmd), gpu, Some(volumes), Some(shm_size), payload.entrypoint).await {
239+
match manager_clone.start_container(&payload.image, &container_task_id, Some(env_vars), Some(cmd), gpu, Some(volumes), Some(shm_size), payload.entrypoint, None).await {
240240
Ok(container_id) => {
241241
Console::info("DockerService", &format!("Container started with id: {}", container_id));
242242
},

0 commit comments

Comments
 (0)