From 150ffdd204554befc58b297bbcf0b161a12e3bb7 Mon Sep 17 00:00:00 2001 From: DavidAdaRH Date: Tue, 10 Feb 2026 14:54:42 +0000 Subject: [PATCH] RHOAIENG-27175: add RayCluster name validation rh-pre-commit.version: 2.3.2 rh-pre-commit.check-secrets: ENABLED --- src/codeflare_sdk/ray/cluster/config.py | 16 ++++++++++++++++ src/codeflare_sdk/ray/cluster/test_config.py | 9 +++++++++ .../ray/rayjobs/test/test_rayjob.py | 15 ++++----------- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/src/codeflare_sdk/ray/cluster/config.py b/src/codeflare_sdk/ray/cluster/config.py index c9dc07724..f3e6fa4f4 100644 --- a/src/codeflare_sdk/ray/cluster/config.py +++ b/src/codeflare_sdk/ray/cluster/config.py @@ -19,6 +19,7 @@ """ import pathlib +import re import warnings from dataclasses import dataclass, field, fields from typing import Dict, List, Optional, Union, get_args, get_origin @@ -38,6 +39,20 @@ "huawei.com/Ascend310": "NPU", } +# Kubernetes metadata.name must be a lowercase RFC 1123 subdomain +_RFC1123_SUBDOMAIN = re.compile( + r"^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$" +) + + +def _validate_cluster_name(name: str): + """Raise ValueError if name is not a valid Kubernetes metadata.name (RFC 1123 subdomain).""" + if not name or not _RFC1123_SUBDOMAIN.match(name): + raise ValueError( + "Cluster name must be a valid RFC 1123 subdomain " + "(lowercase, numbers, hyphens/dots; start and end with letter or number)." + ) + @dataclass class ClusterConfiguration: @@ -171,6 +186,7 @@ def __post_init__(self): self._validate_extended_resource_requests( self.worker_extended_resource_requests ) + _validate_cluster_name(self.name) def _combine_extended_resource_mapping(self): if overwritten := set(self.extended_resource_mapping.keys()).intersection( diff --git a/src/codeflare_sdk/ray/cluster/test_config.py b/src/codeflare_sdk/ray/cluster/test_config.py index ccdbc23d2..9742464f0 100644 --- a/src/codeflare_sdk/ray/cluster/test_config.py +++ b/src/codeflare_sdk/ray/cluster/test_config.py @@ -201,6 +201,15 @@ def test_ray_usage_stats_enabled(mocker): assert env_vars["RAY_USAGE_STATS_ENABLED"] == "1" +def test_cluster_name_validation(): + with pytest.raises(ValueError): + ClusterConfiguration(name="TestCluster", namespace="ns") + with pytest.raises(ValueError): + ClusterConfiguration(name="testcluster-", namespace="ns") + with pytest.raises(ValueError): + ClusterConfiguration(name="-testcluster", namespace="ns") + + # Make sure to always keep this function last def test_cleanup(): os.remove(f"{cluster_dir}test-all-params.yaml") diff --git a/src/codeflare_sdk/ray/rayjobs/test/test_rayjob.py b/src/codeflare_sdk/ray/rayjobs/test/test_rayjob.py index 5c13584a7..bdbb12925 100644 --- a/src/codeflare_sdk/ray/rayjobs/test/test_rayjob.py +++ b/src/codeflare_sdk/ray/rayjobs/test/test_rayjob.py @@ -18,7 +18,6 @@ from ray.runtime_env import RuntimeEnv from codeflare_sdk.ray.rayjobs.rayjob import RayJob -from codeflare_sdk.ray.cluster.config import ClusterConfiguration from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig from kubernetes.client import V1Volume, V1VolumeMount, V1Toleration @@ -80,7 +79,7 @@ def test_rayjob_init_validation_both_provided(auto_mock_setup): """ Test that providing both cluster_name and cluster_config raises error. """ - cluster_config = ClusterConfiguration(name="test-cluster", namespace="test") + cluster_config = ManagedClusterConfig() with pytest.raises( ValueError, @@ -145,9 +144,7 @@ def test_rayjob_init_with_cluster_config(auto_mock_setup): """ Test RayJob initialization with cluster configuration for auto-creation. """ - cluster_config = ClusterConfiguration( - name="auto-cluster", namespace="test-namespace", num_workers=2 - ) + cluster_config = ManagedClusterConfig(num_workers=2) rayjob = RayJob( job_name="test-job", @@ -166,9 +163,7 @@ def test_rayjob_cluster_name_generation(auto_mock_setup): """ Test that cluster names are generated when config has empty name. """ - cluster_config = ClusterConfiguration( - name="", # Empty name should trigger generation - namespace="test-namespace", + cluster_config = ManagedClusterConfig( num_workers=1, ) @@ -186,9 +181,7 @@ def test_rayjob_cluster_config_namespace_none(auto_mock_setup): """ Test that cluster config namespace is set when None. """ - cluster_config = ClusterConfiguration( - name="test-cluster", - namespace=None, # This should be set to job namespace + cluster_config = ManagedClusterConfig( num_workers=1, )