Skip to content

Commit 1ad0586

Browse files
committed
Upgrade skypilot to v0.10.0, introduce network_tier
Signed-off-by: Roee Landesman <roeeland@cisco.com>
1 parent 2ca3b41 commit 1ad0586

3 files changed

Lines changed: 3248 additions & 2724 deletions

File tree

nemo_run/core/execution/skypilot.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@
3535
try:
3636
import sky
3737
import sky.task as skyt
38-
from sky.utils import status_lib
3938
from sky import backends
39+
from sky.utils import status_lib
4040

4141
_SKYPILOT_AVAILABLE = True
4242
except ImportError:
@@ -62,7 +62,8 @@ class SkypilotExecutor(Executor):
6262
gpus="A10G",
6363
gpus_per_node=devices,
6464
container_image="nvcr.io/nvidia/nemo:dev",
65-
cloud="kubernetes",
65+
infra="k8s/my-context",
66+
network_tier="best",
6667
cluster_name="nemo_tester",
6768
file_mounts={
6869
"nemo_run.whl": "nemo_run.whl"
@@ -105,6 +106,8 @@ class SkypilotExecutor(Executor):
105106
idle_minutes_to_autostop: Optional[int] = None
106107
torchrun_nproc_per_node: Optional[int] = None
107108
cluster_config_overrides: Optional[dict[str, Any]] = None
109+
infra: Optional[str] = None
110+
network_tier: Optional[str] = None
108111
packager: Packager = field(default_factory=lambda: GitArchivePackager()) # type: ignore # noqa: F821
109112

110113
def __post_init__(self):
@@ -114,6 +117,13 @@ def __post_init__(self):
114117
assert isinstance(self.packager, GitArchivePackager), (
115118
"Only GitArchivePackager is currently supported for SkypilotExecutor."
116119
)
120+
if self.infra is not None:
121+
assert self.cloud is None, "Cannot specify both `infra` and `cloud` parameters."
122+
assert self.region is None, "Cannot specify both `infra` and `region` parameters."
123+
assert self.zone is None, "Cannot specify both `infra` and `zone` parameters."
124+
logger.info(
125+
"`cloud` is deprecated and will be removed in a future version. Use `infra` instead."
126+
)
117127

118128
@classmethod
119129
def parse_app(cls: Type["SkypilotExecutor"], app_id: str) -> tuple[str, str, int]:
@@ -173,6 +183,8 @@ def parse_attr(attr: str):
173183
"memory",
174184
"instance_type",
175185
"use_spot",
186+
"infra",
187+
"network_tier",
176188
"image_id",
177189
"disk_size",
178190
"disk_tier",

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,10 @@ lepton = "nemo_run.run.torchx_backend.schedulers.lepton:create_scheduler"
5151

5252
[project.optional-dependencies]
5353
skypilot = [
54-
"skypilot[kubernetes]>=0.9.2",
54+
"skypilot[kubernetes]>=0.10.0",
5555
]
5656
skypilot-all = [
57-
"skypilot[all]>=0.9.2",
57+
"skypilot[all]>=0.10.0",
5858
]
5959
ray = [
6060
"kubernetes"

0 commit comments

Comments
 (0)