Skip to content

Commit 112357e

Browse files
feat(RHOAIEG-57444): Add autoscaling gates for Kueue
1 parent fa7c9f9 commit 112357e

2 files changed

Lines changed: 80 additions & 0 deletions

File tree

src/codeflare_sdk/ray/cluster/build_ray_cluster.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,17 @@ def build_ray_cluster(cluster: "codeflare_sdk.ray.cluster.Cluster"):
118118
# Determine autoscaling vs fixed-size worker replica settings
119119
autoscaling_enabled = cluster.config.enable_autoscaling
120120
if autoscaling_enabled:
121+
from codeflare_sdk.common.kueue.kueue import get_default_kueue_name
122+
123+
lq_name = cluster.config.local_queue or get_default_kueue_name(
124+
cluster.config.namespace
125+
)
126+
if lq_name is not None:
127+
raise ValueError(
128+
"Autoscaling is not supported when Kueue is enabled. "
129+
"Please remove the autoscaler configuration from your "
130+
"ClusterConfiguration."
131+
)
121132
worker_replicas = cluster.config.min_workers
122133
worker_min_replicas = cluster.config.min_workers
123134
worker_max_replicas = cluster.config.max_workers

src/codeflare_sdk/ray/cluster/test_config.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,10 @@ def test_autoscaling_disabled_ignores_workers():
296296
def test_autoscaling_spec_generation(mocker):
297297
mocker.patch("kubernetes.client.ApisApi.get_api_versions")
298298
mocker.patch("kubernetes.client.CustomObjectsApi.list_namespaced_custom_object")
299+
mocker.patch(
300+
"codeflare_sdk.common.kueue.kueue.get_default_kueue_name",
301+
return_value=None,
302+
)
299303

300304
cluster = Cluster(
301305
ClusterConfiguration(
@@ -315,6 +319,71 @@ def test_autoscaling_spec_generation(mocker):
315319
assert worker_group["maxReplicas"] == 10
316320

317321

322+
def test_autoscaling_blocked_when_local_queue_set(mocker):
323+
mocker.patch("kubernetes.client.ApisApi.get_api_versions")
324+
mocker.patch("kubernetes.client.CustomObjectsApi.list_namespaced_custom_object")
325+
326+
with pytest.raises(
327+
ValueError,
328+
match="Autoscaling is not supported when Kueue is enabled",
329+
):
330+
Cluster(
331+
ClusterConfiguration(
332+
name="autoscale-kueue-explicit",
333+
namespace="ns",
334+
enable_autoscaling=True,
335+
min_workers=1,
336+
max_workers=8,
337+
local_queue="my-queue",
338+
)
339+
)
340+
341+
342+
def test_autoscaling_blocked_when_default_queue_exists(mocker):
343+
mocker.patch("kubernetes.client.ApisApi.get_api_versions")
344+
mocker.patch("kubernetes.client.CustomObjectsApi.list_namespaced_custom_object")
345+
mocker.patch(
346+
"codeflare_sdk.common.kueue.kueue.get_default_kueue_name",
347+
return_value="default-queue",
348+
)
349+
350+
with pytest.raises(
351+
ValueError,
352+
match="Autoscaling is not supported when Kueue is enabled",
353+
):
354+
Cluster(
355+
ClusterConfiguration(
356+
name="autoscale-kueue-default",
357+
namespace="ns",
358+
enable_autoscaling=True,
359+
min_workers=1,
360+
max_workers=8,
361+
)
362+
)
363+
364+
365+
def test_autoscaling_allowed_when_no_queue(mocker):
366+
mocker.patch("kubernetes.client.ApisApi.get_api_versions")
367+
mocker.patch("kubernetes.client.CustomObjectsApi.list_namespaced_custom_object")
368+
mocker.patch(
369+
"codeflare_sdk.common.kueue.kueue.get_default_kueue_name",
370+
return_value=None,
371+
)
372+
373+
cluster = Cluster(
374+
ClusterConfiguration(
375+
name="autoscale-no-kueue",
376+
namespace="ns",
377+
enable_autoscaling=True,
378+
min_workers=1,
379+
max_workers=8,
380+
)
381+
)
382+
383+
spec = cluster.resource_yaml["spec"]
384+
assert spec["enableInTreeAutoscaling"] is True
385+
386+
318387
def test_autoscaling_disabled_spec_unchanged(mocker):
319388
mocker.patch("kubernetes.client.ApisApi.get_api_versions")
320389
mocker.patch("kubernetes.client.CustomObjectsApi.list_namespaced_custom_object")

0 commit comments

Comments
 (0)