Skip to content

Commit 5aaa2a9

Browse files
check for mtls config and prompt user helpfully
1 parent 7b5e8c2 commit 5aaa2a9

2 files changed

Lines changed: 60 additions & 4 deletions

File tree

src/codeflare_sdk/ray/cluster/cluster.py

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,15 @@ def _client_verify_tls(self):
110110

111111
@property
112112
def job_client(self):
113+
"""
114+
Get the Ray Job Submission Client for this cluster.
115+
116+
Note: If connecting to a cluster with mTLS enabled, ensure you have called
117+
cluster.wait_ready() first to automatically generate TLS certificates.
118+
"""
119+
# Check for certificates before creating client
120+
self._check_tls_certs_exist()
121+
113122
k8client = get_api_client()
114123
if self._job_submission_client:
115124
return self._job_submission_client
@@ -382,7 +391,6 @@ def wait_ready(self, timeout: Optional[int] = None, dashboard_check: bool = True
382391
time += 5
383392
print("Requested cluster is up and running!")
384393

385-
386394
# Automatically generate TLS certificates (required for mTLS)
387395
try:
388396
from codeflare_sdk.common.utils import generate_cert
@@ -439,10 +447,54 @@ def details(self, print_to_console: bool = True) -> RayCluster:
439447
pretty_print.print_clusters([cluster])
440448
return cluster
441449

450+
def _check_tls_certs_exist(self):
451+
"""
452+
Check if TLS certificates exist and print helpful warning if not.
453+
454+
This is called by connection methods (cluster_uri, local_client_url, job_client)
455+
to help users debug mTLS connection issues.
456+
"""
457+
from codeflare_sdk.common.utils.generate_cert import _get_tls_base_dir
458+
from pathlib import Path
459+
460+
cert_dir = _get_tls_base_dir() / f"{self.config.name}-{self.config.namespace}"
461+
462+
if not cert_dir.exists() or not (cert_dir / "tls.crt").exists():
463+
print("\n" + "=" * 70)
464+
print("⚠️ WARNING: TLS Certificates Not Found!")
465+
print("=" * 70)
466+
print(f"Expected location: {cert_dir}")
467+
print()
468+
print("TLS certificates are required for mTLS connections to Ray clusters.")
469+
print(
470+
"Without certificates, your connection will likely fail with a timeout"
471+
)
472+
print("or TLS handshake error.")
473+
print()
474+
print("To fix this issue:")
475+
print(" 1. Call cluster.wait_ready() after cluster.apply()")
476+
print(
477+
" → This automatically generates certificates when cluster is ready"
478+
)
479+
print()
480+
print(" 2. Or manually generate certificates:")
481+
print(" from codeflare_sdk.common.utils import generate_cert")
482+
print(
483+
f" generate_cert.generate_tls_cert('{self.config.name}', '{self.config.namespace}')"
484+
)
485+
print(
486+
f" generate_cert.export_env('{self.config.name}', '{self.config.namespace}')"
487+
)
488+
print("=" * 70 + "\n")
489+
442490
def cluster_uri(self) -> str:
443491
"""
444492
Returns a string containing the cluster's URI.
493+
494+
Note: If connecting to a cluster with mTLS enabled, ensure you have called
495+
cluster.wait_ready() first to automatically generate TLS certificates.
445496
"""
497+
self._check_tls_certs_exist()
446498
return f"ray://{self.config.name}-head-svc.{self.config.namespace}.svc:10001"
447499

448500
def refresh_certificates(self):
@@ -586,7 +638,13 @@ def local_client_url(self):
586638
Returns:
587639
str:
588640
The Ray client URL based on the ingress domain.
641+
642+
Note: If connecting to a cluster with mTLS enabled, ensure you have called
643+
cluster.wait_ready() first to automatically generate TLS certificates.
589644
"""
645+
# Check if TLS certificates exist and provide helpful warning if not
646+
self._check_tls_certs_exist()
647+
590648
ingress_domain = _get_ingress_domain(self)
591649
return f"ray://{ingress_domain}"
592650

src/codeflare_sdk/ray/cluster/test_cluster.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -282,12 +282,10 @@ def test_cluster_uris(mocker):
282282
"kubernetes.client.CustomObjectsApi.list_namespaced_custom_object",
283283
return_value=get_local_queue("kueue.x-k8s.io", "v1beta1", "ns", "localqueues"),
284284
)
285-
<<<<<<< HEAD
286-
=======
285+
287286
# Mock the TLS cert check to avoid warnings in test output
288287
mocker.patch("codeflare_sdk.ray.cluster.cluster.Cluster._check_tls_certs_exist")
289288

290-
>>>>>>> 94b6f8b35 (fix)
291289
cluster = create_cluster(mocker)
292290
mocker.patch(
293291
"kubernetes.client.NetworkingV1Api.list_namespaced_ingress",

0 commit comments

Comments
 (0)