diff --git a/src/codeflare_sdk/ray/cluster/cluster.py b/src/codeflare_sdk/ray/cluster/cluster.py index 438d352f..4152d70d 100644 --- a/src/codeflare_sdk/ray/cluster/cluster.py +++ b/src/codeflare_sdk/ray/cluster/cluster.py @@ -351,21 +351,21 @@ def status( # check the ray cluster status cluster = _ray_cluster_status(self.config.name, self.config.namespace) if cluster: - if cluster.status == RayClusterStatus.SUSPENDED: - ready = False - status = CodeFlareClusterStatus.SUSPENDED - if cluster.status == RayClusterStatus.UNKNOWN: - ready = False - status = CodeFlareClusterStatus.STARTING if cluster.status == RayClusterStatus.READY: ready = True status = CodeFlareClusterStatus.READY + elif cluster.status == RayClusterStatus.SUSPENDED: + ready = False + status = CodeFlareClusterStatus.SUSPENDED elif cluster.status in [ RayClusterStatus.UNHEALTHY, RayClusterStatus.FAILED, ]: ready = False status = CodeFlareClusterStatus.FAILED + elif cluster.status == RayClusterStatus.UNKNOWN: + ready = False + status = CodeFlareClusterStatus.STARTING if print_to_console: # overriding the number of gpus with requested diff --git a/src/codeflare_sdk/ray/cluster/test_status.py b/src/codeflare_sdk/ray/cluster/test_status.py index f12bd916..ce35f4d0 100644 --- a/src/codeflare_sdk/ray/cluster/test_status.py +++ b/src/codeflare_sdk/ray/cluster/test_status.py @@ -91,6 +91,11 @@ def test_cluster_status(mocker): assert status == CodeFlareClusterStatus.READY assert ready is True + fake_ray.status = RayClusterStatus.SUSPENDED + status, ready = cf.status() + assert status == CodeFlareClusterStatus.SUSPENDED + assert ready is False + def rc_status_fields(group, version, namespace, plural, *args): assert group == "ray.io"