22NVIDIA GPU utilities for getting GPU information.
33"""
44import asyncio
5+ import json
6+ import os
57import re
8+ import subprocess
69from dataclasses import dataclass , field
710from typing import List , Optional
811
@@ -19,6 +22,8 @@ class GPUInfo:
1922 power_cap : int # Watts
2023 memory_used : int # MiB
2124 memory_total : int # MiB
25+ vram_temp : int = 0 # Celsius, VRAM temperature from gputemps
26+ junction_temp : int = 0 # Celsius, junction temperature from gputemps
2227
2328 @property
2429 def memory_percent (self ) -> int :
@@ -170,6 +175,66 @@ def parse_nvidia_smi(raw_output: bytes) -> NvidiaInfo:
170175 )
171176
172177
178+ async def run_gputemps (
179+ path : str = "~/projects/cpp/gpu-mem-temp/gputemps" ,
180+ use_sudo : bool = True ,
181+ ) -> dict :
182+ """
183+ Execute gputemps and return parsed JSON.
184+
185+ Args:
186+ path: Path to gputemps executable
187+ use_sudo: Prepend sudo -n (requires passwordless sudo)
188+
189+ Returns:
190+ Parsed JSON dict
191+
192+ Raises:
193+ FileNotFoundError: If gputemps not found
194+ subprocess.SubprocessError: On other errors
195+ """
196+ expanded_path = os .path .expanduser (path ) if path .startswith ("~" ) else path
197+ cmd = ["sudo" , "-n" , expanded_path ] if use_sudo else [expanded_path ]
198+ try :
199+ process = await asyncio .create_subprocess_exec (
200+ * cmd , "--once" , "--json" ,
201+ stdout = asyncio .subprocess .PIPE ,
202+ stderr = asyncio .subprocess .PIPE ,
203+ )
204+ stdout , _stderr = await asyncio .wait_for (process .communicate (), timeout = 10 )
205+
206+ if process .returncode != 0 :
207+ raise subprocess .SubprocessError (
208+ f"gputemps returned non-zero exit code: { process .returncode } " ,
209+ )
210+
211+ return json .loads (stdout )
212+ except FileNotFoundError :
213+ raise FileNotFoundError ("gputemps not found" )
214+
215+
216+ def merge_vram_temps (info : NvidiaInfo , gputemps_data : Optional [dict ]) -> None :
217+ """
218+ Merge VRAM temperatures from gputemps into NvidiaInfo.
219+
220+ Args:
221+ info: Parsed NvidiaInfo structure (modified in-place)
222+ gputemps_data: Parsed JSON from gputemps, or None
223+ """
224+ if not gputemps_data :
225+ return
226+
227+ gpu_temps = {}
228+ for g in gputemps_data .get ("gpus" , []):
229+ idx = g ["index" ]
230+ gpu_temps [idx ] = g
231+ for gpu in info .gpus :
232+ temps = gpu_temps .get (gpu .id )
233+ if temps :
234+ gpu .vram_temp = temps .get ("vram" , 0 )
235+ gpu .junction_temp = temps .get ("junction" , 0 )
236+
237+
173238def format_for_vk (info : NvidiaInfo ) -> str :
174239 """
175240 Format NvidiaInfo as concise text suitable for VK message.
@@ -196,6 +261,13 @@ def format_for_vk(info: NvidiaInfo) -> str:
196261 f"{ gpu .power_usage } W/{ gpu .power_cap } W "
197262 f"{ gpu .memory_used } /{ gpu .memory_total } MiB ({ gpu .memory_percent } %)"
198263 )
264+ if gpu .junction_temp or gpu .vram_temp :
265+ temps = []
266+ if gpu .junction_temp :
267+ temps .append (f"Junction: { gpu .junction_temp } C" )
268+ if gpu .vram_temp :
269+ temps .append (f"VRAM: { gpu .vram_temp } C" )
270+ lines .append (f" { ' ' .join (temps )} " )
199271
200272 return "\n " .join (lines )
201273
@@ -243,21 +315,33 @@ async def get_gpu_simple_message(timeout: int = 30) -> tuple[Optional[str], Opti
243315 return None , f"Error: { str (e )[:2000 ]} "
244316
245317
246- async def get_gpu_info_vk_message (timeout : int = 30 ) -> tuple [Optional [str ], Optional [str ]]:
318+ async def get_gpu_info_vk_message (
319+ timeout : int = 30 ,
320+ gputemps_path : str = "~/projects/cpp/gpu-mem-temp/gputemps" ,
321+ ) -> tuple [Optional [str ], Optional [str ]]:
247322 """
248323 Get GPU information and format as VK message.
249324
250- Convenience function that runs nvidia-smi, parses , and formats output.
325+ Runs nvidia-smi and gputemps, merges VRAM temperatures , and formats output.
251326
252327 Args:
253328 timeout: Timeout in seconds
329+ gputemps_path: Path to gputemps executable
254330
255331 Returns:
256332 Tuple of (message_text, error_text) - one will be None
257333 """
258334 try :
259335 raw_output = await run_nvidia_smi (timeout = timeout )
260336 info = parse_nvidia_smi (raw_output )
337+
338+ gputemps_data = None
339+ try :
340+ gputemps_data = await run_gputemps (path = gputemps_path )
341+ except (FileNotFoundError , subprocess .SubprocessError , json .JSONDecodeError ):
342+ pass
343+
344+ merge_vram_temps (info , gputemps_data )
261345 message = format_for_vk (info )
262346 return message , None
263347 except FileNotFoundError :
0 commit comments