Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions src/codeflare_sdk/ray/cluster/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"""

import pathlib
import re
import warnings
from dataclasses import dataclass, field, fields
from typing import Dict, List, Optional, Union, get_args, get_origin
Expand All @@ -38,6 +39,20 @@
"huawei.com/Ascend310": "NPU",
}

# Kubernetes metadata.name must be a lowercase RFC 1123 subdomain
_RFC1123_SUBDOMAIN = re.compile(
r"^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$"
)


def _validate_cluster_name(name: str):
"""Raise ValueError if name is not a valid Kubernetes metadata.name (RFC 1123 subdomain)."""
if not name or not _RFC1123_SUBDOMAIN.match(name):
raise ValueError(
"Cluster name must be a valid RFC 1123 subdomain "
"(lowercase, numbers, hyphens/dots; start and end with letter or number)."
)


@dataclass
class ClusterConfiguration:
Expand Down Expand Up @@ -171,6 +186,7 @@ def __post_init__(self):
self._validate_extended_resource_requests(
self.worker_extended_resource_requests
)
_validate_cluster_name(self.name)

def _combine_extended_resource_mapping(self):
if overwritten := set(self.extended_resource_mapping.keys()).intersection(
Expand Down
9 changes: 9 additions & 0 deletions src/codeflare_sdk/ray/cluster/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,15 @@ def test_ray_usage_stats_enabled(mocker):
assert env_vars["RAY_USAGE_STATS_ENABLED"] == "1"


def test_cluster_name_validation():
with pytest.raises(ValueError):
ClusterConfiguration(name="TestCluster", namespace="ns")
with pytest.raises(ValueError):
ClusterConfiguration(name="testcluster-", namespace="ns")
with pytest.raises(ValueError):
ClusterConfiguration(name="-testcluster", namespace="ns")


# Make sure to always keep this function last
def test_cleanup():
os.remove(f"{cluster_dir}test-all-params.yaml")
15 changes: 4 additions & 11 deletions src/codeflare_sdk/ray/rayjobs/test/test_rayjob.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from ray.runtime_env import RuntimeEnv

from codeflare_sdk.ray.rayjobs.rayjob import RayJob
from codeflare_sdk.ray.cluster.config import ClusterConfiguration
from codeflare_sdk.ray.rayjobs.config import ManagedClusterConfig
from kubernetes.client import V1Volume, V1VolumeMount, V1Toleration

Expand Down Expand Up @@ -80,7 +79,7 @@ def test_rayjob_init_validation_both_provided(auto_mock_setup):
"""
Test that providing both cluster_name and cluster_config raises error.
"""
cluster_config = ClusterConfiguration(name="test-cluster", namespace="test")
cluster_config = ManagedClusterConfig()

with pytest.raises(
ValueError,
Expand Down Expand Up @@ -145,9 +144,7 @@ def test_rayjob_init_with_cluster_config(auto_mock_setup):
"""
Test RayJob initialization with cluster configuration for auto-creation.
"""
cluster_config = ClusterConfiguration(
name="auto-cluster", namespace="test-namespace", num_workers=2
)
cluster_config = ManagedClusterConfig(num_workers=2)

rayjob = RayJob(
job_name="test-job",
Expand All @@ -166,9 +163,7 @@ def test_rayjob_cluster_name_generation(auto_mock_setup):
"""
Test that cluster names are generated when config has empty name.
"""
cluster_config = ClusterConfiguration(
name="", # Empty name should trigger generation
namespace="test-namespace",
cluster_config = ManagedClusterConfig(
num_workers=1,
)

Expand All @@ -186,9 +181,7 @@ def test_rayjob_cluster_config_namespace_none(auto_mock_setup):
"""
Test that cluster config namespace is set when None.
"""
cluster_config = ClusterConfiguration(
name="test-cluster",
namespace=None, # This should be set to job namespace
cluster_config = ManagedClusterConfig(
num_workers=1,
)

Expand Down
Loading