Skip to content

Commit 9fd9a2b

Browse files
committed
Make Lepton dependency optional
1 parent b24cb43 commit 9fd9a2b

13 files changed

Lines changed: 292 additions & 60 deletions

File tree

docs/index.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@ To install Skypilot with optional features, use one of the following commands:
3434

3535
You can also manually install Skypilot from https://skypilot.readthedocs.io/en/latest/getting-started/installation.html
3636

37-
If using DGX Cloud Lepton, use the following command to install the Lepton CLI:
37+
If using DGX Cloud Lepton, install NeMo Run with the Lepton extra:
3838

3939
```bash
40-
pip install leptonai
40+
pip install "nemo_run[lepton]"
4141
```
4242

4343
To authenticate with the DGX Cloud Lepton cluster, navigate to the **Settings > Tokens** page in the DGX Cloud Lepton UI and copy the ``lep login`` command shown on the page and run it in the terminal.

nemo_run/__init__.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
from nemo_run.core.execution.docker import DockerExecutor
2727
from nemo_run.core.execution.kubeflow import KubeflowExecutor
2828
from nemo_run.core.execution.launcher import FaultTolerance, SlurmRay, SlurmTemplate, Torchrun
29-
from nemo_run.core.execution.lepton import LeptonExecutor
3029
from nemo_run.core.execution.local import LocalExecutor
3130
from nemo_run.core.execution.skypilot import SkypilotExecutor
3231
from nemo_run.core.execution.slurm import SlurmExecutor
@@ -40,6 +39,15 @@
4039
from nemo_run.run.experiment import Experiment
4140
from nemo_run.run.plugin import ExperimentPlugin as Plugin
4241

42+
43+
def __getattr__(name: str):
44+
if name == "LeptonExecutor":
45+
from nemo_run.core.execution.lepton import LeptonExecutor
46+
47+
return LeptonExecutor
48+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
49+
50+
4351
__all__ = [
4452
"autoconvert",
4553
"cli",

nemo_run/core/execution/__init__.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,20 @@
1414
# limitations under the License.
1515

1616
from nemo_run.core.execution.dgxcloud import DGXCloudExecutor
17-
from nemo_run.core.execution.lepton import LeptonExecutor
1817
from nemo_run.core.execution.local import LocalExecutor
1918
from nemo_run.core.execution.kubeflow import KubeflowExecutor
2019
from nemo_run.core.execution.skypilot import SkypilotExecutor
2120
from nemo_run.core.execution.slurm import SlurmExecutor
2221

22+
23+
def __getattr__(name: str):
24+
if name == "LeptonExecutor":
25+
from nemo_run.core.execution.lepton import LeptonExecutor
26+
27+
return LeptonExecutor
28+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
29+
30+
2331
__all__ = [
2432
"LocalExecutor",
2533
"SlurmExecutor",

nemo_run/core/execution/lepton.py

Lines changed: 67 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16+
from __future__ import annotations
17+
1618
import base64
1719
import logging
1820
import os
@@ -22,36 +24,80 @@
2224
import time
2325
from dataclasses import dataclass, field
2426
from datetime import datetime
27+
from enum import Enum
2528
from pathlib import Path
2629
from typing import Any, List, Optional, Set, Type
2730

2831
from invoke.context import Context
29-
from leptonai.api.v2.client import APIClient
30-
from leptonai.api.v1.types.affinity import LeptonResourceAffinity
31-
from leptonai.api.v1.types.common import Metadata, LeptonVisibility
32-
from leptonai.api.v1.types.dedicated_node_group import DedicatedNodeGroup
33-
from leptonai.api.v1.types.deployment import (
34-
EnvVar,
35-
EnvValue,
36-
LeptonContainer,
37-
Mount,
38-
)
39-
from leptonai.api.v1.types.job import (
40-
LeptonJob,
41-
LeptonJobState,
42-
LeptonJobUserSpec,
43-
ReservationConfig,
44-
)
45-
from leptonai.api.v1.types.replica import Replica
4632

4733
from nemo_run.config import get_nemorun_home
4834
from nemo_run.core.execution.base import Executor, ExecutorMacros
4935
from nemo_run.core.packaging.base import Packager
5036
from nemo_run.core.packaging.git import GitArchivePackager
5137

38+
_LEPTON_IMPORT_ERROR: ImportError | None = None
39+
_LEPTON_AVAILABLE = False
40+
41+
try:
42+
from leptonai.api.v1.types.affinity import LeptonResourceAffinity
43+
from leptonai.api.v1.types.common import LeptonVisibility, Metadata
44+
from leptonai.api.v1.types.dedicated_node_group import DedicatedNodeGroup
45+
from leptonai.api.v1.types.deployment import (
46+
EnvVar,
47+
EnvValue,
48+
LeptonContainer,
49+
Mount,
50+
)
51+
from leptonai.api.v1.types.job import (
52+
LeptonJob,
53+
LeptonJobState,
54+
LeptonJobUserSpec,
55+
ReservationConfig,
56+
)
57+
from leptonai.api.v1.types.replica import Replica
58+
from leptonai.api.v2.client import APIClient
59+
60+
_LEPTON_AVAILABLE = True
61+
except ImportError as e:
62+
_LEPTON_IMPORT_ERROR = e
63+
64+
class LeptonJobState(Enum):
65+
Starting = "Starting"
66+
Running = "Running"
67+
Failed = "Failed"
68+
Completed = "Completed"
69+
Deleting = "Deleting"
70+
Restarting = "Restarting"
71+
Archived = "Archived"
72+
Stopped = "Stopped"
73+
Stopping = "Stopping"
74+
Unknown = "Unknown"
75+
76+
APIClient = None
77+
DedicatedNodeGroup = None
78+
EnvValue = None
79+
EnvVar = None
80+
LeptonContainer = None
81+
LeptonJob = None
82+
LeptonJobUserSpec = None
83+
LeptonResourceAffinity = None
84+
LeptonVisibility = None
85+
Metadata = None
86+
Mount = None
87+
Replica = None
88+
ReservationConfig = None
89+
5290
logger = logging.getLogger(__name__)
5391

5492

93+
def _require_leptonai() -> None:
94+
if not _LEPTON_AVAILABLE:
95+
raise ImportError(
96+
"leptonai package is required for LeptonExecutor. "
97+
'Install it with: pip install "nemo_run[lepton]"'
98+
) from _LEPTON_IMPORT_ERROR
99+
100+
55101
@dataclass(kw_only=True)
56102
class LeptonExecutor(Executor):
57103
"""
@@ -84,6 +130,9 @@ class LeptonExecutor(Executor):
84130
head_resource_shape: Optional[str] = "" # Only used for LeptonRayCluster
85131
ray_version: Optional[str] = None # Only used for LeptonRayCluster
86132

133+
def __post_init__(self) -> None:
134+
_require_leptonai()
135+
87136
def stop_job(self, job_id: str):
88137
"""
89138
Send a stop signal to the requested job
@@ -376,6 +425,7 @@ def cancel(self, job_id: str):
376425

377426
@classmethod
378427
def logs(cls: Type["LeptonExecutor"], app_id: str, fallback_path: Optional[str]):
428+
_require_leptonai()
379429
client = APIClient()
380430

381431
# Get the first replica from the job which contains the job logs

nemo_run/run/ray/cluster.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,21 @@
1717
from typing import Optional, Type
1818

1919
from nemo_run.core.execution.base import Executor
20-
from nemo_run.core.execution.lepton import LeptonExecutor
2120
from nemo_run.core.execution.slurm import SlurmExecutor
2221
from nemo_run.core.frontend.console.api import configure_logging
23-
from nemo_run.run.ray.lepton import LeptonRayCluster
2422
from nemo_run.run.ray.slurm import SlurmRayCluster
2523

24+
# Import guard for Lepton dependencies
25+
try:
26+
from nemo_run.core.execution.lepton import LeptonExecutor
27+
from nemo_run.run.ray.lepton import LeptonRayCluster
28+
29+
_LEPTON_RAY_AVAILABLE = True
30+
except ImportError:
31+
LeptonExecutor = None
32+
LeptonRayCluster = None
33+
_LEPTON_RAY_AVAILABLE = False
34+
2635
# Import guard for Kubernetes dependencies
2736
try:
2837
from nemo_run.core.execution.kuberay import KubeRayExecutor
@@ -45,9 +54,11 @@ def __post_init__(self):
4554
configure_logging(level=self.log_level)
4655
backend_map: dict[Type[Executor], Type] = {
4756
SlurmExecutor: SlurmRayCluster,
48-
LeptonExecutor: LeptonRayCluster,
4957
}
5058

59+
if _LEPTON_RAY_AVAILABLE and LeptonExecutor is not None and LeptonRayCluster is not None:
60+
backend_map[LeptonExecutor] = LeptonRayCluster
61+
5162
if _KUBERAY_AVAILABLE and KubeRayExecutor is not None and KubeRayCluster is not None:
5263
backend_map[KubeRayExecutor] = KubeRayCluster
5364

nemo_run/run/ray/job.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,21 @@
1717
from typing import Any, Optional, Type
1818

1919
from nemo_run.core.execution.base import Executor
20-
from nemo_run.core.execution.lepton import LeptonExecutor
2120
from nemo_run.core.execution.slurm import SlurmExecutor
2221
from nemo_run.core.frontend.console.api import configure_logging
23-
from nemo_run.run.ray.lepton import LeptonRayJob
2422
from nemo_run.run.ray.slurm import SlurmRayJob
2523

24+
# Import guard for Lepton dependencies
25+
try:
26+
from nemo_run.core.execution.lepton import LeptonExecutor
27+
from nemo_run.run.ray.lepton import LeptonRayJob
28+
29+
_LEPTON_RAY_AVAILABLE = True
30+
except ImportError:
31+
LeptonExecutor = None
32+
LeptonRayJob = None
33+
_LEPTON_RAY_AVAILABLE = False
34+
2635
# Import guard for Kubernetes dependencies
2736
try:
2837
from nemo_run.core.execution.kuberay import KubeRayExecutor
@@ -49,10 +58,12 @@ class RayJob:
4958
def __post_init__(self) -> None: # noqa: D401 – simple implementation
5059
configure_logging(level=self.log_level)
5160
backend_map: dict[Type[Executor], Type[Any]] = {
52-
LeptonExecutor: LeptonRayJob,
5361
SlurmExecutor: SlurmRayJob,
5462
}
5563

64+
if _LEPTON_RAY_AVAILABLE and LeptonExecutor is not None and LeptonRayJob is not None:
65+
backend_map[LeptonExecutor] = LeptonRayJob
66+
5667
if _KUBERAY_AVAILABLE and KubeRayExecutor is not None and KubeRayJob is not None:
5768
backend_map[KubeRayExecutor] = KubeRayJob
5869

@@ -62,7 +73,7 @@ def __post_init__(self) -> None: # noqa: D401 – simple implementation
6273
backend_cls = backend_map[self.executor.__class__]
6374
self.backend = backend_cls(name=self.name, executor=self.executor)
6475

65-
if isinstance(self.executor, LeptonExecutor):
76+
if LeptonExecutor is not None and isinstance(self.executor, LeptonExecutor):
6677
self.backend.cluster_name = self.cluster_name
6778
self.backend.cluster_ready_timeout = self.cluster_ready_timeout
6879

nemo_run/run/ray/lepton.py

Lines changed: 56 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,33 +13,57 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16+
from __future__ import annotations
17+
1618
import asyncio
1719
import json
1820
import logging
1921
import sys
2022
import time
21-
import urllib3
2223
import warnings
2324
from dataclasses import dataclass
24-
from ray.job_submission import JobSubmissionClient
25-
from rich.pretty import pretty_repr
2625
from typing import Any, Optional, TypeAlias
2726

28-
from leptonai.api.v1.types.affinity import LeptonResourceAffinity
29-
from leptonai.api.v1.types.dedicated_node_group import DedicatedNodeGroup
30-
from leptonai.api.v1.types.deployment import EnvVar, EnvValue
31-
32-
from nemo_run.core.execution.lepton import LeptonExecutor
27+
import urllib3
28+
from rich.pretty import pretty_repr
3329

34-
from leptonai.api.v2.client import APIClient
35-
from leptonai.api.v1.types.raycluster import (
36-
LeptonRayCluster as LeptonRayClusterSpec,
37-
LeptonRayClusterUserSpec,
38-
Metadata,
39-
RayHeadGroupSpec,
40-
RayWorkerGroupSpec,
41-
)
42-
from leptonai.cli.raycluster import DEFAULT_RAY_IMAGE
30+
from nemo_run.core.execution.lepton import LeptonExecutor, _require_leptonai
31+
32+
_RAY_IMPORT_ERROR: ImportError | None = None
33+
_RAY_AVAILABLE = False
34+
try:
35+
from ray.job_submission import JobSubmissionClient
36+
37+
_RAY_AVAILABLE = True
38+
except ImportError as e:
39+
_RAY_IMPORT_ERROR = e
40+
JobSubmissionClient = None
41+
42+
try:
43+
from leptonai.api.v1.types.affinity import LeptonResourceAffinity
44+
from leptonai.api.v1.types.dedicated_node_group import DedicatedNodeGroup
45+
from leptonai.api.v1.types.deployment import EnvVar, EnvValue
46+
from leptonai.api.v1.types.raycluster import (
47+
LeptonRayCluster as LeptonRayClusterSpec,
48+
LeptonRayClusterUserSpec,
49+
Metadata,
50+
RayHeadGroupSpec,
51+
RayWorkerGroupSpec,
52+
)
53+
from leptonai.api.v2.client import APIClient
54+
from leptonai.cli.raycluster import DEFAULT_RAY_IMAGE
55+
except ImportError:
56+
APIClient = None
57+
DEFAULT_RAY_IMAGE = None
58+
DedicatedNodeGroup = None
59+
EnvValue = None
60+
EnvVar = None
61+
LeptonRayClusterSpec = None
62+
LeptonRayClusterUserSpec = None
63+
LeptonResourceAffinity = None
64+
Metadata = None
65+
RayHeadGroupSpec = None
66+
RayWorkerGroupSpec = None
4367

4468
noquote: TypeAlias = str
4569

@@ -49,6 +73,19 @@
4973
RAY_NOT_READY_STATE = "Not Ready"
5074

5175

76+
def _require_ray() -> None:
77+
if not _RAY_AVAILABLE:
78+
raise ImportError(
79+
"ray is required for Lepton Ray helpers. "
80+
'Install it with: pip install "nemo_run[lepton]"'
81+
) from _RAY_IMPORT_ERROR
82+
83+
84+
def _require_lepton_ray() -> None:
85+
_require_leptonai()
86+
_require_ray()
87+
88+
5289
@dataclass(kw_only=True)
5390
class LeptonRayCluster:
5491
EXECUTOR_CLS = LeptonExecutor
@@ -57,6 +94,7 @@ class LeptonRayCluster:
5794
executor: LeptonExecutor
5895

5996
def __post_init__(self):
97+
_require_lepton_ray()
6098
self.cluster_map: dict[str, str] = {}
6199

62100
def _node_group_id(self, client: APIClient) -> DedicatedNodeGroup:
@@ -374,6 +412,7 @@ class LeptonRayJob:
374412
# Internals
375413
# ---------------------------------------------------------------------
376414
def __post_init__(self):
415+
_require_lepton_ray()
377416
self.submission_id = None
378417

379418
def _get_last_submission_id(self) -> Optional[int]:

0 commit comments

Comments
 (0)