Skip to content

Commit ed678c3

Browse files
committed
feat(bot): add VRAM and junction temps from gputemps
1 parent 3f00fae commit ed678c3

1 file changed

Lines changed: 86 additions & 2 deletions

File tree

bot/nvidia.py

Lines changed: 86 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22
NVIDIA GPU utilities for getting GPU information.
33
"""
44
import asyncio
5+
import json
6+
import os
57
import re
8+
import subprocess
69
from dataclasses import dataclass, field
710
from typing import List, Optional
811

@@ -19,6 +22,8 @@ class GPUInfo:
1922
power_cap: int # Watts
2023
memory_used: int # MiB
2124
memory_total: int # MiB
25+
vram_temp: int = 0 # Celsius, VRAM temperature from gputemps
26+
junction_temp: int = 0 # Celsius, junction temperature from gputemps
2227

2328
@property
2429
def memory_percent(self) -> int:
@@ -170,6 +175,66 @@ def parse_nvidia_smi(raw_output: bytes) -> NvidiaInfo:
170175
)
171176

172177

178+
async def run_gputemps(
179+
path: str = "~/projects/cpp/gpu-mem-temp/gputemps",
180+
use_sudo: bool = True,
181+
) -> dict:
182+
"""
183+
Execute gputemps and return parsed JSON.
184+
185+
Args:
186+
path: Path to gputemps executable
187+
use_sudo: Prepend sudo -n (requires passwordless sudo)
188+
189+
Returns:
190+
Parsed JSON dict
191+
192+
Raises:
193+
FileNotFoundError: If gputemps not found
194+
subprocess.SubprocessError: On other errors
195+
"""
196+
expanded_path = os.path.expanduser(path) if path.startswith("~") else path
197+
cmd = ["sudo", "-n", expanded_path] if use_sudo else [expanded_path]
198+
try:
199+
process = await asyncio.create_subprocess_exec(
200+
*cmd, "--once", "--json",
201+
stdout=asyncio.subprocess.PIPE,
202+
stderr=asyncio.subprocess.PIPE,
203+
)
204+
stdout, _stderr = await asyncio.wait_for(process.communicate(), timeout=10)
205+
206+
if process.returncode != 0:
207+
raise subprocess.SubprocessError(
208+
f"gputemps returned non-zero exit code: {process.returncode}",
209+
)
210+
211+
return json.loads(stdout)
212+
except FileNotFoundError:
213+
raise FileNotFoundError("gputemps not found")
214+
215+
216+
def merge_vram_temps(info: NvidiaInfo, gputemps_data: Optional[dict]) -> None:
217+
"""
218+
Merge VRAM temperatures from gputemps into NvidiaInfo.
219+
220+
Args:
221+
info: Parsed NvidiaInfo structure (modified in-place)
222+
gputemps_data: Parsed JSON from gputemps, or None
223+
"""
224+
if not gputemps_data:
225+
return
226+
227+
gpu_temps = {}
228+
for g in gputemps_data.get("gpus", []):
229+
idx = g["index"]
230+
gpu_temps[idx] = g
231+
for gpu in info.gpus:
232+
temps = gpu_temps.get(gpu.id)
233+
if temps:
234+
gpu.vram_temp = temps.get("vram", 0)
235+
gpu.junction_temp = temps.get("junction", 0)
236+
237+
173238
def format_for_vk(info: NvidiaInfo) -> str:
174239
"""
175240
Format NvidiaInfo as concise text suitable for VK message.
@@ -196,6 +261,13 @@ def format_for_vk(info: NvidiaInfo) -> str:
196261
f"{gpu.power_usage}W/{gpu.power_cap}W "
197262
f"{gpu.memory_used}/{gpu.memory_total}MiB ({gpu.memory_percent}%)"
198263
)
264+
if gpu.junction_temp or gpu.vram_temp:
265+
temps = []
266+
if gpu.junction_temp:
267+
temps.append(f"Junction: {gpu.junction_temp}C")
268+
if gpu.vram_temp:
269+
temps.append(f"VRAM: {gpu.vram_temp}C")
270+
lines.append(f" {' '.join(temps)}")
199271

200272
return "\n".join(lines)
201273

@@ -243,21 +315,33 @@ async def get_gpu_simple_message(timeout: int = 30) -> tuple[Optional[str], Opti
243315
return None, f"Error: {str(e)[:2000]}"
244316

245317

246-
async def get_gpu_info_vk_message(timeout: int = 30) -> tuple[Optional[str], Optional[str]]:
318+
async def get_gpu_info_vk_message(
319+
timeout: int = 30,
320+
gputemps_path: str = "~/projects/cpp/gpu-mem-temp/gputemps",
321+
) -> tuple[Optional[str], Optional[str]]:
247322
"""
248323
Get GPU information and format as VK message.
249324
250-
Convenience function that runs nvidia-smi, parses, and formats output.
325+
Runs nvidia-smi and gputemps, merges VRAM temperatures, and formats output.
251326
252327
Args:
253328
timeout: Timeout in seconds
329+
gputemps_path: Path to gputemps executable
254330
255331
Returns:
256332
Tuple of (message_text, error_text) - one will be None
257333
"""
258334
try:
259335
raw_output = await run_nvidia_smi(timeout=timeout)
260336
info = parse_nvidia_smi(raw_output)
337+
338+
gputemps_data = None
339+
try:
340+
gputemps_data = await run_gputemps(path=gputemps_path)
341+
except (FileNotFoundError, subprocess.SubprocessError, json.JSONDecodeError):
342+
pass
343+
344+
merge_vram_temps(info, gputemps_data)
261345
message = format_for_vk(info)
262346
return message, None
263347
except FileNotFoundError:

0 commit comments

Comments
 (0)