6262import re
6363import subprocess
6464import threading
65+ import time
6566
6667import sams .base
6768
@@ -126,6 +127,10 @@ def stopped(self):
126127class Sampler (sams .base .Sampler ):
127128 def __init__ (self , id , outQueue , config ):
128129 super (Sampler , self ).__init__ (id , outQueue , config )
130+ self ._start_time = time .time ()
131+ self ._last_sample_time = dict ()
132+ self ._average_values = dict ()
133+ self ._last_averaged_values = dict ()
129134 self .processes = {}
130135 self .sampler_interval = self .config .get ([self .id , "sampler_interval" ], 60 )
131136 self .gpu_index_environment = self .config .get (
@@ -147,6 +152,14 @@ def __init__(self, id, outQueue, config):
147152 "utilization.memory" ,
148153 ],
149154 )
155+ self .metrics_to_average = self .config .get (
156+ [self .id , "metrics_to_average" ],
157+ [
158+ "power.draw" ,
159+ "utilization.gpu" ,
160+ "utilization.memory" ,
161+ ],
162+ )
150163
151164 self .smi = None
152165 if self .gpu_index_environment in os .environ :
@@ -172,11 +185,48 @@ def sample(self):
172185 logger .debug (data )
173186 index = data ["index" ]
174187 del data ["index" ]
188+ self .compute_sample_averages (data , index )
175189 entry = {index : data }
176190 most_recent_sample .append (self ._storage_wrapping (entry ))
177191 self .store (entry )
178192 self ._most_recent_sample = most_recent_sample
179193
194+ def compute_sample_averages (self , data , index ):
195+ """ Computes averages of selected measurements by
196+ means of trapezoidal quadrature, approximating
197+ that the time this function is called is the actual
198+ time of sampling. This is not completely correct but simplifies
199+ the implementation.
200+ """
201+ sample_time = time .time ()
202+ if index not in self ._last_sample_time :
203+ # Keep it simple by approximating sampling time
204+ self ._last_sample_time [index ] = self ._start_time
205+ self ._average_values [index ] = dict ()
206+ self ._last_averaged_values [index ] = dict ()
207+ for key in data :
208+ if key .replace ('_' , '.' ) in self .metrics_to_average :
209+ # Initialize trapezoidal integral at 0.
210+ self ._average_values [index ][key ] = 0.
211+ self ._last_averaged_values [index ][key ] = 0.
212+ elapsed_time = sample_time - self ._last_sample_time [index ]
213+ total_elapsed_time = sample_time - self ._start_time
214+ average_values = self ._average_values [index ]
215+ last_averaged_values = self ._last_averaged_values [index ]
216+ self ._last_sample_time [index ] = sample_time
217+ for key , item in data .items ():
218+ if key .replace ('_' , '.' ) in self .metrics_to_average :
219+ # Trapezoidal quadrature
220+ weighted_item = (
221+ 0.5 * (float (item ) + float (last_averaged_values [key ])) * elapsed_time )
222+ last_averaged_values [key ] = item
223+ previous_integral = average_values [key ] * (total_elapsed_time - elapsed_time )
224+ new_integral = previous_integral + weighted_item
225+ average_values [key ] = new_integral / total_elapsed_time
226+
227+ for key , item in average_values .items ():
228+ data [key + '_average' ] = item
229+
180230 def final_data (self ):
181231 if self .smi :
182232 self .smi .stop ()
0 commit comments