Skip to content

Commit 0695eb9

Browse files
committed
[Fix] Scenarios using gpu
1 parent 9cf12ca commit 0695eb9

3 files changed

Lines changed: 12 additions & 11 deletions

File tree

app/deployer.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -668,12 +668,6 @@ def run_frontend(self):
668668
"/var/run/docker.sock not found, please check if Docker is running and Docker Compose is installed."
669669
)
670670

671-
try:
672-
subprocess.check_call(["nvidia-smi"])
673-
self.gpu_available = True
674-
except Exception:
675-
logging.info("No GPU available for the frontend, nodes will be deploy in CPU mode")
676-
677671
network_name = f"{os.environ['USER']}_nebula-net-base"
678672

679673
# Create the Docker network
@@ -684,7 +678,6 @@ def run_frontend(self):
684678
environment = {
685679
"NEBULA_CONTROLLER_NAME": os.environ["USER"],
686680
"NEBULA_PRODUCTION": self.production,
687-
"NEBULA_GPU_AVAILABLE": self.gpu_available,
688681
"NEBULA_ADVANCED_ANALYTICS": self.advanced_analytics,
689682
"NEBULA_FRONTEND_LOG": "/nebula/app/logs/frontend.log",
690683
"NEBULA_LOGS_DIR": "/nebula/app/logs/",
@@ -756,6 +749,12 @@ def run_controller(self):
756749
)
757750

758751
network_name = f"{os.environ['USER']}_nebula-net-base"
752+
753+
try:
754+
subprocess.check_call(["nvidia-smi"])
755+
self.gpu_available = True
756+
except Exception:
757+
logging.info("No GPU available for the frontend, nodes will be deploy in CPU mode")
759758

760759
# Create the Docker network
761760
base = DockerUtils.create_docker_network(network_name)
@@ -790,6 +789,11 @@ def run_controller(self):
790789
],
791790
extra_hosts={"host.docker.internal": "host-gateway"},
792791
port_bindings={self.controller_port: self.controller_port},
792+
device_requests=[{
793+
"Driver": "nvidia",
794+
"Count": -1,
795+
"Capabilities": [["gpu"]],
796+
}] if self.gpu_available else None,
793797
)
794798

795799
networking_config = client.api.create_networking_config({

nebula/controller/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM ubuntu:22.04
1+
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
22

33
ENV DEBIAN_FRONTEND=noninteractive
44

nebula/frontend/app.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ class Settings:
3131
resources_threshold (float): Threshold for resource usage alerts (default: 0.0).
3232
port (int): Port for the Nebula frontend service (default: 6060).
3333
production (bool): Whether the application is running in production mode.
34-
gpu_available (bool): Whether GPU resources are available.
3534
advanced_analytics (bool): Whether advanced analytics features are enabled.
3635
host_platform (str): Underlying host operating platform (e.g., 'unix').
3736
log_dir (str): Directory path where application logs are stored.
@@ -51,7 +50,6 @@ class Settings:
5150
resources_threshold: float = 0.0
5251
port: int = os.environ.get("NEBULA_FRONTEND_PORT", 6060)
5352
production: bool = os.environ.get("NEBULA_PRODUCTION", "False") == "True"
54-
gpu_available: bool = os.environ.get("NEBULA_GPU_AVAILABLE", "False") == "True"
5553
advanced_analytics: bool = os.environ.get("NEBULA_ADVANCED_ANALYTICS", "False") == "True"
5654
host_platform: str = os.environ.get("NEBULA_HOST_PLATFORM", "unix")
5755
log_dir: str = os.environ.get("NEBULA_LOGS_DIR")
@@ -2091,7 +2089,6 @@ async def nebula_dashboard_deployment(request: Request, session: dict = Depends(
20912089
"request": request,
20922090
"scenario_running": scenario_running,
20932091
"user_logged_in": session.get("user"),
2094-
"gpu_available": settings.gpu_available,
20952092
},
20962093
)
20972094

0 commit comments

Comments
 (0)