Skip to content

Commit 8f99017

Browse files
committed
Make Lepton dependency optional
Signed-off-by: Hemil Desai <hemild@nvidia.com>
1 parent b24cb43 commit 8f99017

9 files changed

Lines changed: 220 additions & 51 deletions

File tree

docs/index.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@ To install Skypilot with optional features, use one of the following commands:
3434

3535
You can also manually install Skypilot from https://skypilot.readthedocs.io/en/latest/getting-started/installation.html
3636

37-
If using DGX Cloud Lepton, use the following command to install the Lepton CLI:
37+
If using DGX Cloud Lepton, install NeMo Run with the Lepton extra:
3838

3939
```bash
40-
pip install leptonai
40+
pip install "nemo_run[lepton]"
4141
```
4242

4343
To authenticate with the DGX Cloud Lepton cluster, navigate to the **Settings > Tokens** page in the DGX Cloud Lepton UI and copy the ``lep login`` command shown on the page and run it in the terminal.

nemo_run/core/execution/lepton.py

Lines changed: 54 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16+
from __future__ import annotations
17+
1618
import base64
1719
import logging
1820
import os
@@ -22,36 +24,67 @@
2224
import time
2325
from dataclasses import dataclass, field
2426
from datetime import datetime
27+
from enum import Enum
2528
from pathlib import Path
2629
from typing import Any, List, Optional, Set, Type
2730

2831
from invoke.context import Context
29-
from leptonai.api.v2.client import APIClient
30-
from leptonai.api.v1.types.affinity import LeptonResourceAffinity
31-
from leptonai.api.v1.types.common import Metadata, LeptonVisibility
32-
from leptonai.api.v1.types.dedicated_node_group import DedicatedNodeGroup
33-
from leptonai.api.v1.types.deployment import (
34-
EnvVar,
35-
EnvValue,
36-
LeptonContainer,
37-
Mount,
38-
)
39-
from leptonai.api.v1.types.job import (
40-
LeptonJob,
41-
LeptonJobState,
42-
LeptonJobUserSpec,
43-
ReservationConfig,
44-
)
45-
from leptonai.api.v1.types.replica import Replica
4632

4733
from nemo_run.config import get_nemorun_home
4834
from nemo_run.core.execution.base import Executor, ExecutorMacros
4935
from nemo_run.core.packaging.base import Packager
5036
from nemo_run.core.packaging.git import GitArchivePackager
5137

38+
_LEPTON_IMPORT_ERROR: ImportError | None = None
39+
_LEPTON_AVAILABLE = False
40+
41+
try:
42+
from leptonai.api.v1.types.affinity import LeptonResourceAffinity
43+
from leptonai.api.v1.types.common import LeptonVisibility, Metadata
44+
from leptonai.api.v1.types.dedicated_node_group import DedicatedNodeGroup
45+
from leptonai.api.v1.types.deployment import (
46+
EnvVar,
47+
EnvValue,
48+
LeptonContainer,
49+
Mount,
50+
)
51+
from leptonai.api.v1.types.job import (
52+
LeptonJob,
53+
LeptonJobState,
54+
LeptonJobUserSpec,
55+
ReservationConfig,
56+
)
57+
from leptonai.api.v1.types.replica import Replica
58+
from leptonai.api.v2.client import APIClient
59+
60+
_LEPTON_AVAILABLE = True
61+
except ImportError as e:
62+
_LEPTON_IMPORT_ERROR = e
63+
64+
class LeptonJobState(Enum):
65+
Starting = "Starting"
66+
Running = "Running"
67+
Failed = "Failed"
68+
Completed = "Completed"
69+
Deleting = "Deleting"
70+
Restarting = "Restarting"
71+
Archived = "Archived"
72+
Stopped = "Stopped"
73+
Stopping = "Stopping"
74+
Unknown = "Unknown"
75+
76+
5277
logger = logging.getLogger(__name__)
5378

5479

80+
def _require_leptonai() -> None:
81+
if not _LEPTON_AVAILABLE:
82+
raise ImportError(
83+
"leptonai package is required for LeptonExecutor. "
84+
'Install it with: pip install "nemo_run[lepton]"'
85+
) from _LEPTON_IMPORT_ERROR
86+
87+
5588
@dataclass(kw_only=True)
5689
class LeptonExecutor(Executor):
5790
"""
@@ -84,6 +117,9 @@ class LeptonExecutor(Executor):
84117
head_resource_shape: Optional[str] = "" # Only used for LeptonRayCluster
85118
ray_version: Optional[str] = None # Only used for LeptonRayCluster
86119

120+
def __post_init__(self) -> None:
121+
_require_leptonai()
122+
87123
def stop_job(self, job_id: str):
88124
"""
89125
Send a stop signal to the requested job
@@ -376,6 +412,7 @@ def cancel(self, job_id: str):
376412

377413
@classmethod
378414
def logs(cls: Type["LeptonExecutor"], app_id: str, fallback_path: Optional[str]):
415+
_require_leptonai()
379416
client = APIClient()
380417

381418
# Get the first replica from the job which contains the job logs

nemo_run/run/ray/lepton.py

Lines changed: 44 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,33 +13,45 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16+
from __future__ import annotations
17+
1618
import asyncio
1719
import json
1820
import logging
1921
import sys
2022
import time
21-
import urllib3
2223
import warnings
2324
from dataclasses import dataclass
24-
from ray.job_submission import JobSubmissionClient
25-
from rich.pretty import pretty_repr
2625
from typing import Any, Optional, TypeAlias
2726

28-
from leptonai.api.v1.types.affinity import LeptonResourceAffinity
29-
from leptonai.api.v1.types.dedicated_node_group import DedicatedNodeGroup
30-
from leptonai.api.v1.types.deployment import EnvVar, EnvValue
31-
32-
from nemo_run.core.execution.lepton import LeptonExecutor
27+
import urllib3
28+
from rich.pretty import pretty_repr
3329

34-
from leptonai.api.v2.client import APIClient
35-
from leptonai.api.v1.types.raycluster import (
36-
LeptonRayCluster as LeptonRayClusterSpec,
37-
LeptonRayClusterUserSpec,
38-
Metadata,
39-
RayHeadGroupSpec,
40-
RayWorkerGroupSpec,
41-
)
42-
from leptonai.cli.raycluster import DEFAULT_RAY_IMAGE
30+
from nemo_run.core.execution.lepton import LeptonExecutor, _LEPTON_AVAILABLE, _require_leptonai
31+
32+
_RAY_IMPORT_ERROR: ImportError | None = None
33+
_RAY_AVAILABLE = False
34+
try:
35+
from ray.job_submission import JobSubmissionClient
36+
37+
_RAY_AVAILABLE = True
38+
except ImportError as e:
39+
_RAY_IMPORT_ERROR = e
40+
JobSubmissionClient = None
41+
42+
if _LEPTON_AVAILABLE:
43+
from leptonai.api.v1.types.affinity import LeptonResourceAffinity
44+
from leptonai.api.v1.types.dedicated_node_group import DedicatedNodeGroup
45+
from leptonai.api.v1.types.deployment import EnvVar, EnvValue
46+
from leptonai.api.v1.types.raycluster import (
47+
LeptonRayCluster as LeptonRayClusterSpec,
48+
LeptonRayClusterUserSpec,
49+
Metadata,
50+
RayHeadGroupSpec,
51+
RayWorkerGroupSpec,
52+
)
53+
from leptonai.api.v2.client import APIClient
54+
from leptonai.cli.raycluster import DEFAULT_RAY_IMAGE
4355

4456
noquote: TypeAlias = str
4557

@@ -49,6 +61,19 @@
4961
RAY_NOT_READY_STATE = "Not Ready"
5062

5163

64+
def _require_ray() -> None:
65+
if not _RAY_AVAILABLE:
66+
raise ImportError(
67+
"ray is required for Lepton Ray helpers. "
68+
'Install it with: pip install "nemo_run[lepton]"'
69+
) from _RAY_IMPORT_ERROR
70+
71+
72+
def _require_lepton_ray() -> None:
73+
_require_leptonai()
74+
_require_ray()
75+
76+
5277
@dataclass(kw_only=True)
5378
class LeptonRayCluster:
5479
EXECUTOR_CLS = LeptonExecutor
@@ -57,6 +82,7 @@ class LeptonRayCluster:
5782
executor: LeptonExecutor
5883

5984
def __post_init__(self):
85+
_require_lepton_ray()
6086
self.cluster_map: dict[str, str] = {}
6187

6288
def _node_group_id(self, client: APIClient) -> DedicatedNodeGroup:
@@ -374,6 +400,7 @@ class LeptonRayJob:
374400
# Internals
375401
# ---------------------------------------------------------------------
376402
def __post_init__(self):
403+
_require_lepton_ray()
377404
self.submission_id = None
378405

379406
def _get_last_submission_id(self) -> Optional[int]:

nemo_run/run/torchx_backend/schedulers/lepton.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,12 @@
2424

2525
import fiddle as fdl
2626
import fiddle._src.experimental.dataclasses as fdl_dc
27-
from leptonai.api.v1.types.job import LeptonJobState
2827
from torchx.schedulers.api import AppDryRunInfo, DescribeAppResponse, ListAppResponse, Scheduler
2928
from torchx.specs import AppDef, AppState, ReplicaStatus, Role, RoleStatus, runopts
3029

3130
from nemo_run.config import get_nemorun_home
3231
from nemo_run.core.execution.base import Executor
33-
from nemo_run.core.execution.lepton import LeptonExecutor
32+
from nemo_run.core.execution.lepton import LeptonExecutor, LeptonJobState, _require_leptonai
3433
from nemo_run.core.serialization.zlib_json import ZlibJSONSerializer
3534
from nemo_run.run.torchx_backend.schedulers.api import SchedulerMixin
3635

@@ -70,6 +69,7 @@ class LeptonRequest:
7069
class LeptonScheduler(SchedulerMixin, Scheduler[dict[str, str]]): # type: ignore
7170
def __init__(self, session_name: str) -> None:
7271
super().__init__("lepton", session_name)
72+
_require_leptonai()
7373

7474
def _run_opts(self) -> runopts:
7575
opts = runopts()
@@ -86,9 +86,8 @@ def _submit_dryrun( # type: ignore
8686
app: AppDef,
8787
cfg: Executor,
8888
) -> AppDryRunInfo[LeptonRequest]:
89-
assert isinstance(cfg, LeptonExecutor), (
90-
f"{cfg.__class__} not supported for Lepton scheduler."
91-
)
89+
if not isinstance(cfg, LeptonExecutor):
90+
raise AssertionError(f"{cfg.__class__} not supported for Lepton scheduler.")
9291
executor = cfg
9392

9493
assert len(app.roles) == 1, "Only single-role apps are supported."

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ dependencies = [
2929
"jinja2>=3.1.4",
3030
"networkx >= 3.3",
3131
"omegaconf>=2.3.0",
32-
"leptonai>=0.26.6",
3332
"toml",
3433
]
3534
readme = "README.md"
@@ -50,6 +49,9 @@ skypilot_jobs = "nemo_run.run.torchx_backend.schedulers.skypilot_jobs:create_sch
5049
kubeflow = "nemo_run.run.torchx_backend.schedulers.kubeflow:create_scheduler"
5150

5251
[project.optional-dependencies]
52+
lepton = [
53+
"leptonai>=0.26.6",
54+
]
5355
skypilot = [
5456
"skypilot[kubernetes]>=0.10.0",
5557
]

test/core/execution/test_lepton.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,21 @@
1919
from unittest.mock import MagicMock, mock_open, patch
2020

2121
import pytest
22-
from leptonai.api.v1.types.common import LeptonVisibility, Metadata
23-
from leptonai.api.v1.types.deployment import (
22+
23+
pytest.importorskip("leptonai")
24+
25+
from leptonai.api.v1.types.common import LeptonVisibility, Metadata # noqa: E402
26+
from leptonai.api.v1.types.deployment import ( # noqa: E402
2427
LeptonContainer,
2528
LeptonResourceAffinity,
2629
Mount,
2730
EnvVar,
2831
EnvValue,
2932
)
30-
from leptonai.api.v1.types.job import LeptonJob, LeptonJobUserSpec
33+
from leptonai.api.v1.types.job import LeptonJob, LeptonJobUserSpec # noqa: E402
3134

32-
from nemo_run.core.execution.lepton import LeptonExecutor, LeptonJobState
33-
from nemo_run.core.packaging.git import GitArchivePackager
35+
from nemo_run.core.execution.lepton import LeptonExecutor, LeptonJobState # noqa: E402
36+
from nemo_run.core.packaging.git import GitArchivePackager # noqa: E402
3437

3538

3639
class MockLeptonJob:
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
import subprocess
17+
import sys
18+
import textwrap
19+
from pathlib import Path
20+
21+
22+
REPO_ROOT = Path(__file__).parents[3]
23+
24+
25+
def _run_with_blocked_leptonai(code: str) -> subprocess.CompletedProcess[str]:
26+
blocker = """
27+
import importlib.abc
28+
import sys
29+
30+
31+
class BlockLeptonai(importlib.abc.MetaPathFinder):
32+
def find_spec(self, fullname, path=None, target=None):
33+
if fullname == "leptonai" or fullname.startswith("leptonai."):
34+
raise ModuleNotFoundError("No module named 'leptonai'")
35+
return None
36+
37+
38+
sys.meta_path.insert(0, BlockLeptonai())
39+
"""
40+
script = blocker + "\n" + textwrap.dedent(code)
41+
return subprocess.run(
42+
[sys.executable, "-c", script],
43+
cwd=REPO_ROOT,
44+
text=True,
45+
capture_output=True,
46+
check=False,
47+
)
48+
49+
50+
def test_nemo_run_import_without_leptonai() -> None:
51+
result = _run_with_blocked_leptonai(
52+
"""
53+
import sys
54+
55+
import nemo_run as run
56+
from nemo_run import LeptonExecutor as PublicLeptonExecutor
57+
from nemo_run.core.execution import LeptonExecutor as ExecutionLeptonExecutor
58+
59+
assert run.LocalExecutor.__name__ == "LocalExecutor"
60+
assert run.LeptonExecutor.__name__ == "LeptonExecutor"
61+
assert PublicLeptonExecutor is run.LeptonExecutor
62+
assert ExecutionLeptonExecutor is run.LeptonExecutor
63+
assert "leptonai" not in sys.modules
64+
65+
try:
66+
run.LeptonExecutor(container_image="image", nemo_run_dir="/nemo")
67+
except ImportError as e:
68+
assert "nemo_run[lepton]" in str(e)
69+
else:
70+
raise AssertionError("LeptonExecutor should require the lepton extra")
71+
"""
72+
)
73+
74+
assert result.returncode == 0, result.stderr
75+
76+
77+
def test_scheduler_and_ray_modules_import_without_leptonai() -> None:
78+
result = _run_with_blocked_leptonai(
79+
"""
80+
import sys
81+
82+
from nemo_run.core.execution.lepton import LeptonExecutor
83+
from nemo_run.run.torchx_backend.schedulers.api import REVERSE_EXECUTOR_MAPPING
84+
import nemo_run.run.ray.cluster
85+
import nemo_run.run.ray.job
86+
import nemo_run.run.ray.lepton
87+
88+
assert REVERSE_EXECUTOR_MAPPING["lepton"] is LeptonExecutor
89+
assert "leptonai" not in sys.modules
90+
"""
91+
)
92+
93+
assert result.returncode == 0, result.stderr

0 commit comments

Comments
 (0)