@@ -7,6 +7,12 @@ import (
77 "runtime"
88 "sync"
99 "time"
10+
11+ // gopsutil provides cross-platform system and process utilities.
12+ // It supports linux/amd64 and darwin/amd64 (the target platforms for this codebase),
13+ // as well as Windows, FreeBSD, OpenBSD, and Solaris.
14+ "github.com/shirou/gopsutil/cpu"
15+ "github.com/shirou/gopsutil/mem"
1016)
1117
1218// PerformanceMetricsRecorder provides a simple interface for recording
@@ -107,27 +113,40 @@ func (pm *PerformanceMetrics) registerAllMetrics() {
107113 MetricSigningSuccessTotal ,
108114 MetricSigningFailedTotal ,
109115 MetricSigningTimeoutsTotal ,
116+ MetricRedemptionExecutionsTotal ,
117+ MetricRedemptionExecutionsSuccessTotal ,
118+ MetricRedemptionExecutionsFailedTotal ,
119+ MetricRedemptionProofSubmissionsTotal ,
120+ MetricRedemptionProofSubmissionsSuccessTotal ,
121+ MetricRedemptionProofSubmissionsFailedTotal ,
110122 MetricWalletActionsTotal ,
111123 MetricWalletActionSuccessTotal ,
112124 MetricWalletActionFailedTotal ,
113125 MetricWalletHeartbeatFailuresTotal ,
114126 MetricCoordinationWindowsDetectedTotal ,
115127 MetricCoordinationProceduresExecutedTotal ,
116128 MetricCoordinationFailedTotal ,
129+ MetricCoordinationLeaderTimeoutTotal ,
117130 MetricPeerConnectionsTotal ,
118131 MetricPeerDisconnectionsTotal ,
119132 MetricMessageBroadcastTotal ,
120133 MetricMessageReceivedTotal ,
121134 MetricPingTestsTotal ,
122135 MetricPingTestSuccessTotal ,
123136 MetricPingTestFailedTotal ,
137+ MetricNetworkJoinRequestsTotal ,
138+ MetricNetworkJoinRequestsSuccessTotal ,
139+ MetricNetworkJoinRequestsFailedTotal ,
140+ MetricFirewallRejectionsTotal ,
124141 MetricWalletDispatcherRejectedTotal ,
125142 }
126143
127144 // First, initialize all counters in the map
145+ pm .countersMutex .Lock ()
128146 for _ , name := range counters {
129147 pm .counters [name ] = & counter {value : 0 }
130148 }
149+ pm .countersMutex .Unlock ()
131150
132151 // Then, register observers (this prevents concurrent map read/write)
133152 for _ , name := range counters {
@@ -159,7 +178,9 @@ func (pm *PerformanceMetrics) registerAllMetrics() {
159178 WalletActionMetricName (actionType , "failed_total" ),
160179 }
161180 for _ , name := range actionCounters {
181+ pm .countersMutex .Lock ()
162182 pm .counters [name ] = & counter {value : 0 }
183+ pm .countersMutex .Unlock ()
163184 metricName := name // Capture for closure
164185 pm .registry .ObserveApplicationSource (
165186 "performance" ,
@@ -181,9 +202,11 @@ func (pm *PerformanceMetrics) registerAllMetrics() {
181202
182203 // Register duration metric for this action type
183204 durationName := WalletActionMetricName (actionType , "duration_seconds" )
205+ pm .histogramsMutex .Lock ()
184206 pm .histograms [durationName ] = & histogram {
185207 buckets : make (map [float64 ]float64 ),
186208 }
209+ pm .histogramsMutex .Unlock ()
187210 durationMetricName := durationName // Capture for closure
188211 pm .registry .ObserveApplicationSource (
189212 "performance" ,
@@ -212,17 +235,22 @@ func (pm *PerformanceMetrics) registerAllMetrics() {
212235 durationMetrics := []string {
213236 MetricDKGDurationSeconds ,
214237 MetricSigningDurationSeconds ,
238+ MetricRedemptionActionDurationSeconds ,
215239 MetricWalletActionDurationSeconds ,
216240 MetricCoordinationDurationSeconds ,
241+ MetricCoordinationWindowDurationSeconds ,
217242 MetricPingTestDurationSeconds ,
243+ MetricNetworkHandshakeDurationSeconds ,
218244 }
219245
220246 // First, initialize all histograms in the map
247+ pm .histogramsMutex .Lock ()
221248 for _ , name := range durationMetrics {
222249 pm .histograms [name ] = & histogram {
223250 buckets : make (map [float64 ]float64 ),
224251 }
225252 }
253+ pm .histogramsMutex .Unlock ()
226254
227255 // Then, register observers (this prevents concurrent map read/write)
228256 for _ , name := range durationMetrics {
@@ -270,12 +298,17 @@ func (pm *PerformanceMetrics) registerAllMetrics() {
270298 MetricCPUUtilization ,
271299 MetricMemoryUsageMB ,
272300 MetricGoroutineCount ,
301+ MetricCPULoadPercent ,
302+ MetricRAMUtilizationPercent ,
303+ MetricSwapUtilizationPercent ,
273304 }
274305
275306 // First, initialize all gauges in the map
307+ pm .gaugesMutex .Lock ()
276308 for _ , name := range gauges {
277309 pm .gauges [name ] = & gauge {value : 0 }
278310 }
311+ pm .gaugesMutex .Unlock ()
279312
280313 // Then, register observers (this prevents concurrent map read/write)
281314 for _ , name := range gauges {
@@ -394,7 +427,7 @@ func (pm *PerformanceMetrics) SetGauge(name string, value float64) {
394427// observeSystemMetrics periodically collects and updates system metrics
395428// including CPU utilization, memory usage, and goroutine count.
396429func (pm * PerformanceMetrics ) observeSystemMetrics (ctx context.Context ) {
397- ticker := time .NewTicker (10 * time .Second ) // Update every 10 seconds
430+ ticker := time .NewTicker (60 * time .Second ) // Update every 10 seconds
398431 defer ticker .Stop ()
399432
400433 var lastMemStats runtime.MemStats
@@ -428,6 +461,9 @@ func (pm *PerformanceMetrics) observeSystemMetrics(ctx context.Context) {
428461 lastMemStats = memStats
429462 lastUpdateTime = now
430463 }
464+
465+ // Update OS-level machine stats
466+ pm .updateMachineStats ()
431467 case <- ctx .Done ():
432468 return
433469 }
@@ -483,6 +519,37 @@ func (pm *PerformanceMetrics) calculateCPUUtilizationHeuristic(
483519 return cpuUtilization
484520}
485521
522+ // updateMachineStats collects and updates OS-level machine statistics
523+ // including CPU load, RAM utilization, and swapfile utilization.
524+ func (pm * PerformanceMetrics ) updateMachineStats () {
525+ // Get CPU load percentage (1-second average)
526+ // NOTE: cpu.Percent blocks for the specified duration (1 second) to sample
527+ // CPU usage over that interval. This blocking behavior is intentional and
528+ // necessary to obtain an accurate CPU utilization measurement. The function
529+ // will not return until the 1-second sampling period completes.
530+ cpuPercent , err := cpu .Percent (time .Second , false )
531+ if err == nil && len (cpuPercent ) > 0 {
532+ pm .SetGauge (MetricCPULoadPercent , cpuPercent [0 ])
533+ }
534+
535+ // Get memory statistics
536+ memInfo , err := mem .VirtualMemory ()
537+ if err == nil {
538+ // RAM utilization percentage
539+ pm .SetGauge (MetricRAMUtilizationPercent , memInfo .UsedPercent )
540+
541+ // Swap utilization percentage
542+ swapInfo , err := mem .SwapMemory ()
543+ if err == nil && swapInfo .Total > 0 {
544+ swapUtilizationPercent := (float64 (swapInfo .Used ) / float64 (swapInfo .Total )) * 100.0
545+ pm .SetGauge (MetricSwapUtilizationPercent , swapUtilizationPercent )
546+ } else {
547+ // If swap is not available or has no total, set to 0
548+ pm .SetGauge (MetricSwapUtilizationPercent , 0 )
549+ }
550+ }
551+ }
552+
486553// NoOpPerformanceMetrics is a no-op implementation of PerformanceMetricsRecorder
487554// that can be used when metrics are disabled.
488555type NoOpPerformanceMetrics struct {}
@@ -550,6 +617,17 @@ const (
550617 MetricSigningAttemptsPerOperation = "signing_attempts_per_operation"
551618 MetricSigningTimeoutsTotal = "signing_timeouts_total"
552619
620+ // Redemption Metrics
621+ MetricRedemptionExecutionsTotal = "redemption_executions_total"
622+ MetricRedemptionExecutionsSuccessTotal = "redemption_executions_success_total"
623+ MetricRedemptionExecutionsFailedTotal = "redemption_executions_failed_total"
624+ MetricRedemptionActionDurationSeconds = "redemption_action_duration_seconds"
625+
626+ // Redemption Proof Submission Metrics (SPV maintainer)
627+ MetricRedemptionProofSubmissionsTotal = "redemption_proof_submissions_total"
628+ MetricRedemptionProofSubmissionsSuccessTotal = "redemption_proof_submissions_success_total"
629+ MetricRedemptionProofSubmissionsFailedTotal = "redemption_proof_submissions_failed_total"
630+
553631 // Wallet Action Metrics (aggregate)
554632 MetricWalletActionsTotal = "wallet_actions_total"
555633 MetricWalletActionSuccessTotal = "wallet_action_success_total"
@@ -565,9 +643,18 @@ const (
565643 // Coordination Metrics
566644 MetricCoordinationWindowsDetectedTotal = "coordination_windows_detected_total"
567645 MetricCoordinationProceduresExecutedTotal = "coordination_procedures_executed_total"
568- MetricCoordinationFailedTotal = "coordination_failed_total"
646+ MetricCoordinationFailedTotal = "coordination_failed_total" // Only when node is leader
647+ MetricCoordinationLeaderTimeoutTotal = "coordination_leader_timeout_total" // When follower observes leader timeout
569648 MetricCoordinationDurationSeconds = "coordination_duration_seconds"
570649
650+ // Coordination Window Metrics (per-window tracking)
651+ MetricCoordinationWindowDurationSeconds = "coordination_window_duration_seconds"
652+ MetricCoordinationWindowWalletsCoordinated = "coordination_window_wallets_coordinated"
653+ MetricCoordinationWindowWalletsSuccessful = "coordination_window_wallets_successful"
654+ MetricCoordinationWindowWalletsFailed = "coordination_window_wallets_failed"
655+ MetricCoordinationWindowTotalFaults = "coordination_window_total_faults"
656+ MetricCoordinationWindowCoordinationBlock = "coordination_window_coordination_block"
657+
571658 // Network Metrics
572659 MetricIncomingMessageQueueSize = "incoming_message_queue_size"
573660 MetricMessageHandlerQueueSize = "message_handler_queue_size"
@@ -580,14 +667,24 @@ const (
580667 MetricPingTestFailedTotal = "ping_test_failed_total"
581668 MetricPingTestDurationSeconds = "ping_test_duration_seconds"
582669
670+ // Network Join Request Metrics (inbound connection attempts from peers)
671+ MetricNetworkJoinRequestsTotal = "network_join_requests_total" // Total inbound join attempts
672+ MetricNetworkJoinRequestsSuccessTotal = "network_join_requests_success_total" // Successful joins
673+ MetricNetworkJoinRequestsFailedTotal = "network_join_requests_failed_total" // Failed joins (handshake failure)
674+ MetricNetworkHandshakeDurationSeconds = "network_handshake_duration_seconds" // Handshake duration
675+ MetricFirewallRejectionsTotal = "firewall_rejections_total" // Firewall rejections
676+
583677 // Wallet Dispatcher Metrics
584678 MetricWalletDispatcherActiveActions = "wallet_dispatcher_active_actions"
585679 MetricWalletDispatcherRejectedTotal = "wallet_dispatcher_rejected_total"
586680
587681 // System Metrics
588- MetricCPUUtilization = "cpu_utilization_percent"
589- MetricMemoryUsageMB = "memory_usage_mb"
590- MetricGoroutineCount = "goroutine_count"
682+ MetricCPUUtilization = "cpu_utilization_percent"
683+ MetricMemoryUsageMB = "memory_usage_mb"
684+ MetricGoroutineCount = "goroutine_count"
685+ MetricCPULoadPercent = "cpu_load_percent"
686+ MetricRAMUtilizationPercent = "ram_utilization_percent"
687+ MetricSwapUtilizationPercent = "swap_utilization_percent"
591688)
592689
593690// WalletActionMetricName generates a metric name for a specific wallet action type.
0 commit comments