Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
apiVersion: ray.io/v1alpha1
kind: RayCluster
metadata:
name: ml-training-cluster
namespace: ml-team
labels:
team: ml
env: prod
spec:
rayVersion: 2.43.0
headGroupSpec:
serviceType: ClusterIP
rayStartParams:
dashboard-host: 0.0.0.0
num-cpus: '4'
template:
spec:
containers:
- image: custom/ray:gpu
name: ray-head
ports: []
env:
- name: NCCL_DEBUG
value: INFO
lifecycle:
preStop:
exec:
command:
- /bin/sh
- -c
- ray stop
resources:
requests:
cpu: '4'
memory: 16Gi
limits:
cpu: '4'
memory: 16Gi
volumeMounts:
- name: data
mountPath: /data
volumes:
- name: data
persistentVolumeClaim:
claimName: data-pvc
workerGroupSpecs:
- groupName: gpu-workers
maxReplicas: 8
minReplicas: 2
rayStartParams: {}
replicas: 4
template:
spec:
containers:
- image: custom/ray:gpu
name: ray-worker
env:
- name: NCCL_DEBUG
value: INFO
lifecycle:
preStop:
exec:
command:
- /bin/sh
- -c
- ray stop
resources:
requests:
cpu: '8'
memory: 32Gi
nvidia.com/gpu: 2
limits:
nvidia.com/gpu: 2
volumeMounts:
- name: data
mountPath: /data
volumes:
- name: data
persistentVolumeClaim:
claimName: data-pvc
61 changes: 61 additions & 0 deletions test/core/execution/artifacts/expected_kuberay_cluster_basic.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
apiVersion: ray.io/v1alpha1
kind: RayCluster
metadata:
name: test-cluster
namespace: default
labels: {}
spec:
rayVersion: 2.43.0
headGroupSpec:
serviceType: ClusterIP
rayStartParams:
dashboard-host: 0.0.0.0
template:
spec:
containers:
- image: rayproject/ray:2.43.0
name: ray-head
ports: []
env: []
lifecycle:
preStop:
exec:
command:
- /bin/sh
- -c
- ray stop
resources:
requests:
cpu: '1'
memory: 2Gi
limits:
cpu: '1'
memory: 2Gi
volumeMounts: []
volumes: []
workerGroupSpecs:
- groupName: workers
maxReplicas: 2
minReplicas: 2
rayStartParams: {}
replicas: 2
template:
spec:
containers:
- image: rayproject/ray:2.43.0
name: ray-worker
env: []
lifecycle:
preStop:
exec:
command:
- /bin/sh
- -c
- ray stop
resources:
requests:
cpu: '2'
memory: 4Gi
limits: {}
volumeMounts: []
volumes: []
10 changes: 10 additions & 0 deletions test/core/execution/artifacts/expected_kuberay_job_basic.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
apiVersion: ray.io/v1
kind: RayJob
metadata:
name: test-job
namespace: default
spec:
entrypoint: python train.py
shutdownAfterJobFinishes: true
rayClusterSpec: {}
runtimeEnvYAML: null
Empty file added test/run/ray/__init__.py
Empty file.
Loading
Loading