@@ -548,30 +548,30 @@ struct MLXServer: AsyncParsableCommand {
548548 let snapshot = await stats. snapshot ( )
549549 let uptime = snapshot. uptimeSeconds
550550 var lines : [ String ] = [ ]
551- lines. append ( " # HELP mlx_server_requests_total Total requests processed " )
552- lines. append ( " # TYPE mlx_server_requests_total counter " )
553- lines. append ( " mlx_server_requests_total \( snapshot. requestsTotal) " )
554- lines. append ( " # HELP mlx_server_requests_active Currently active requests " )
555- lines. append ( " # TYPE mlx_server_requests_active gauge " )
556- lines. append ( " mlx_server_requests_active \( snapshot. requestsActive) " )
557- lines. append ( " # HELP mlx_server_tokens_generated_total Total tokens generated " )
558- lines. append ( " # TYPE mlx_server_tokens_generated_total counter " )
559- lines. append ( " mlx_server_tokens_generated_total \( snapshot. tokensGenerated) " )
560- lines. append ( " # HELP mlx_server_tokens_per_second Average token generation rate " )
561- lines. append ( " # TYPE mlx_server_tokens_per_second gauge " )
562- lines. append ( " mlx_server_tokens_per_second \( String ( format: " %.2f " , snapshot. avgTokensPerSec) ) " )
563- lines. append ( " # HELP mlx_server_memory_active_bytes Active GPU memory usage " )
564- lines. append ( " # TYPE mlx_server_memory_active_bytes gauge " )
565- lines. append ( " mlx_server_memory_active_bytes \( activeMemBytes) " )
566- lines. append ( " # HELP mlx_server_memory_peak_bytes Peak GPU memory usage " )
567- lines. append ( " # TYPE mlx_server_memory_peak_bytes gauge " )
568- lines. append ( " mlx_server_memory_peak_bytes \( peakMemBytes) " )
569- lines. append ( " # HELP mlx_server_memory_cache_bytes Cached GPU memory " )
570- lines. append ( " # TYPE mlx_server_memory_cache_bytes gauge " )
571- lines. append ( " mlx_server_memory_cache_bytes \( cacheMemBytes) " )
572- lines. append ( " # HELP mlx_server_uptime_seconds Server uptime " )
573- lines. append ( " # TYPE mlx_server_uptime_seconds gauge " )
574- lines. append ( " mlx_server_uptime_seconds \( String ( format: " %.0f " , uptime) ) " )
551+ lines. append ( " # HELP swiftlm_requests_total Total requests processed " )
552+ lines. append ( " # TYPE swiftlm_requests_total counter " )
553+ lines. append ( " swiftlm_requests_total \( snapshot. requestsTotal) " )
554+ lines. append ( " # HELP swiftlm_requests_active Currently active requests " )
555+ lines. append ( " # TYPE swiftlm_requests_active gauge " )
556+ lines. append ( " swiftlm_requests_active \( snapshot. requestsActive) " )
557+ lines. append ( " # HELP swiftlm_tokens_generated_total Total tokens generated " )
558+ lines. append ( " # TYPE swiftlm_tokens_generated_total counter " )
559+ lines. append ( " swiftlm_tokens_generated_total \( snapshot. tokensGenerated) " )
560+ lines. append ( " # HELP swiftlm_tokens_per_second Average token generation rate " )
561+ lines. append ( " # TYPE swiftlm_tokens_per_second gauge " )
562+ lines. append ( " swiftlm_tokens_per_second \( String ( format: " %.2f " , snapshot. avgTokensPerSec) ) " )
563+ lines. append ( " # HELP swiftlm_memory_active_bytes Active GPU memory usage " )
564+ lines. append ( " # TYPE swiftlm_memory_active_bytes gauge " )
565+ lines. append ( " swiftlm_memory_active_bytes \( activeMemBytes) " )
566+ lines. append ( " # HELP swiftlm_memory_peak_bytes Peak GPU memory usage " )
567+ lines. append ( " # TYPE swiftlm_memory_peak_bytes gauge " )
568+ lines. append ( " swiftlm_memory_peak_bytes \( peakMemBytes) " )
569+ lines. append ( " # HELP swiftlm_memory_cache_bytes Cached GPU memory " )
570+ lines. append ( " # TYPE swiftlm_memory_cache_bytes gauge " )
571+ lines. append ( " swiftlm_memory_cache_bytes \( cacheMemBytes) " )
572+ lines. append ( " # HELP swiftlm_uptime_seconds Server uptime " )
573+ lines. append ( " # TYPE swiftlm_uptime_seconds gauge " )
574+ lines. append ( " swiftlm_uptime_seconds \( String ( format: " %.0f " , uptime) ) " )
575575 lines. append ( " " )
576576 let metrics = lines. joined ( separator: " \n " )
577577 return Response (
0 commit comments