4545 InstanceOfferWithAvailability ,
4646 InstanceRuntime ,
4747 InstanceStatus ,
48+ InstanceTerminationReason ,
4849 RemoteConnectionInfo ,
4950 SSHKey ,
5051)
@@ -240,7 +241,7 @@ def _check_and_mark_terminating_if_idle_duration_expired(instance: InstanceModel
240241 delta = datetime .timedelta (seconds = idle_seconds )
241242 if idle_duration > delta :
242243 instance .status = InstanceStatus .TERMINATING
243- instance .termination_reason = "Idle timeout"
244+ instance .termination_reason = InstanceTerminationReason . IDLE_TIMEOUT . value
244245 logger .info (
245246 "Instance %s idle duration expired: idle time %ss. Terminating" ,
246247 instance .name ,
@@ -262,7 +263,7 @@ async def _add_remote(instance: InstanceModel) -> None:
262263 retry_duration_deadline = instance .created_at + timedelta (seconds = PROVISIONING_TIMEOUT_SECONDS )
263264 if retry_duration_deadline < get_current_datetime ():
264265 instance .status = InstanceStatus .TERMINATED
265- instance .termination_reason = "Provisioning timeout expired"
266+ instance .termination_reason = InstanceTerminationReason . PROOVISIONING_TIMEOUT . value
266267 logger .warning (
267268 "Failed to start instance %s in %d seconds. Terminating..." ,
268269 instance .name ,
@@ -285,7 +286,8 @@ async def _add_remote(instance: InstanceModel) -> None:
285286 ssh_proxy_pkeys = None
286287 except (ValueError , PasswordRequiredException ):
287288 instance .status = InstanceStatus .TERMINATED
288- instance .termination_reason = "Unsupported private SSH key type"
289+ instance .termination_reason = InstanceTerminationReason .ERROR .value
290+ instance .termination_reason_message = "Unsupported private SSH key type"
289291 logger .warning (
290292 "Failed to add instance %s: unsupported private SSH key type" ,
291293 instance .name ,
@@ -343,7 +345,10 @@ async def _add_remote(instance: InstanceModel) -> None:
343345 )
344346 if instance_network is not None and internal_ip is None :
345347 instance .status = InstanceStatus .TERMINATED
346- instance .termination_reason = "Failed to locate internal IP address on the given network"
348+ instance .termination_reason = InstanceTerminationReason .ERROR .value
349+ instance .termination_reason_message = (
350+ "Failed to locate internal IP address on the given network"
351+ )
347352 logger .warning (
348353 "Failed to add instance %s: failed to locate internal IP address on the given network" ,
349354 instance .name ,
@@ -356,7 +361,8 @@ async def _add_remote(instance: InstanceModel) -> None:
356361 if internal_ip is not None :
357362 if not is_ip_among_addresses (ip_address = internal_ip , addresses = host_network_addresses ):
358363 instance .status = InstanceStatus .TERMINATED
359- instance .termination_reason = (
364+ instance .termination_reason = InstanceTerminationReason .ERROR .value
365+ instance .termination_reason_message = (
360366 "Specified internal IP not found among instance interfaces"
361367 )
362368 logger .warning (
@@ -378,7 +384,8 @@ async def _add_remote(instance: InstanceModel) -> None:
378384 instance .total_blocks = blocks
379385 else :
380386 instance .status = InstanceStatus .TERMINATED
381- instance .termination_reason = "Cannot split into blocks"
387+ instance .termination_reason = InstanceTerminationReason .ERROR .value
388+ instance .termination_reason_message = "Cannot split into blocks"
382389 logger .warning (
383390 "Failed to add instance %s: cannot split into blocks" ,
384391 instance .name ,
@@ -497,7 +504,8 @@ async def _create_instance(session: AsyncSession, instance: InstanceModel) -> No
497504 requirements = get_instance_requirements (instance )
498505 except ValidationError as e :
499506 instance .status = InstanceStatus .TERMINATED
500- instance .termination_reason = (
507+ instance .termination_reason = InstanceTerminationReason .ERROR .value
508+ instance .termination_reason_message = (
501509 f"Error to parse profile, requirements or instance_configuration: { e } "
502510 )
503511 logger .warning (
@@ -645,7 +653,7 @@ async def _create_instance(session: AsyncSession, instance: InstanceModel) -> No
645653 )
646654 return
647655
648- _mark_terminated (instance , "All offers failed" if offers else "No offers found" )
656+ _mark_terminated (instance , InstanceTerminationReason . NO_OFFERS . value )
649657 if (
650658 instance .fleet
651659 and _is_fleet_master_instance (instance )
@@ -656,7 +664,7 @@ async def _create_instance(session: AsyncSession, instance: InstanceModel) -> No
656664 for sibling_instance in instance .fleet .instances :
657665 if sibling_instance .id == instance .id :
658666 continue
659- _mark_terminated (sibling_instance , "Master instance failed to start" )
667+ _mark_terminated (sibling_instance , InstanceTerminationReason . MASTER_FAILED . value )
660668
661669
662670def _mark_terminated (instance : InstanceModel , termination_reason : str ) -> None :
@@ -681,7 +689,7 @@ async def _check_instance(session: AsyncSession, instance: InstanceModel) -> Non
681689 ):
682690 # A busy instance could have no active jobs due to this bug: https://github.com/dstackai/dstack/issues/2068
683691 instance .status = InstanceStatus .TERMINATING
684- instance .termination_reason = "Instance job finished"
692+ instance .termination_reason = InstanceTerminationReason . JOB_FINISHED . value
685693 logger .info (
686694 "Detected busy instance %s with finished job. Marked as TERMINATING" ,
687695 instance .name ,
@@ -810,7 +818,7 @@ async def _check_instance(session: AsyncSession, instance: InstanceModel) -> Non
810818 deadline = instance .termination_deadline
811819 if get_current_datetime () > deadline :
812820 instance .status = InstanceStatus .TERMINATING
813- instance .termination_reason = "Termination deadline"
821+ instance .termination_reason = InstanceTerminationReason . TERMINATION_TIMEOUT . value
814822 logger .warning (
815823 "Instance %s shim waiting timeout. Marked as TERMINATING" ,
816824 instance .name ,
@@ -839,7 +847,7 @@ async def _wait_for_instance_provisioning_data(
839847 "Instance %s failed because instance has not become running in time" , instance .name
840848 )
841849 instance .status = InstanceStatus .TERMINATING
842- instance .termination_reason = "Instance has not become running in time"
850+ instance .termination_reason = InstanceTerminationReason . STARTING_TIMEOUT . value
843851 return
844852
845853 backend = await backends_services .get_project_backend_by_type (
@@ -852,7 +860,8 @@ async def _wait_for_instance_provisioning_data(
852860 instance .name ,
853861 )
854862 instance .status = InstanceStatus .TERMINATING
855- instance .termination_reason = "Backend not available"
863+ instance .termination_reason = InstanceTerminationReason .ERROR .value
864+ instance .termination_reason_message = "Backend not available"
856865 return
857866 try :
858867 await run_async (
@@ -869,7 +878,8 @@ async def _wait_for_instance_provisioning_data(
869878 repr (e ),
870879 )
871880 instance .status = InstanceStatus .TERMINATING
872- instance .termination_reason = "Error while waiting for instance to become running"
881+ instance .termination_reason = InstanceTerminationReason .ERROR .value
882+ instance .termination_reason_message = "Error while waiting for instance to become running"
873883 except Exception :
874884 logger .exception (
875885 "Got exception when updating instance %s provisioning data" , instance .name
0 commit comments