@@ -118,11 +118,11 @@ def _apply_mutate(qty: float, mutate: str) -> float:
118118 elif mutate_upper == "FLOOR" :
119119 return math .floor (qty )
120120 elif mutate_upper == "NUMBOOL" :
121- # If qty near 0, set it at 0. Else, set it to 1.
122- return 0.0 if abs ( qty ) < 1e-9 else 1.0
121+ # If qty equals 0, leave it at 0. Else, set it to 1.
122+ return 0.0 if qty == 0 else 1.0
123123 elif mutate_upper == "NOTNUMBOOL" :
124- # If qty near 0, set it to 1. Else, set it to 0.
125- return 1.0 if abs ( qty ) < 1e-9 else 0.0
124+ # If qty equals 0, set it to 1. Else, set it to 0.
125+ return 1.0 if qty == 0 else 0.0
126126 else : # NONE or any unrecognized value
127127 return qty
128128
@@ -175,9 +175,8 @@ def _parse_numeric(value: Any, default: float = 0) -> float:
175175
176176def aggregate_rates_by_type (
177177 pairs : list [tuple [str , str ]],
178- ) -> tuple [dict , float , dict ]:
179- rate_sums : defaultdict [str , float ] = defaultdict (float )
180- qty_sums : defaultdict [str , float ] = defaultdict (float )
178+ ) -> tuple [dict , float ]:
179+ sums : defaultdict [str , float ] = defaultdict (float )
181180 for _ , log_str in pairs :
182181 try :
183182 entry = json .loads (log_str )
@@ -197,34 +196,20 @@ def aggregate_rates_by_type(
197196 except (TypeError , ValueError ):
198197 continue
199198
200- # Track raw qty sum (before any transformation)
201- qty_sums [mtype ] += qty
202-
203- # Apply mutate transformation for rating calculation
199+ # Apply mutate transformation
204200 qty_mutated = _apply_mutate (qty , mutate )
205201
206202 # Apply factor and offset
207203 qty_rate = qty_mutated * factor + offset
208204
209205 # Calculate rate
210- rate_sums [mtype ] += qty_rate * price
211-
212- by_types = {
213- k : {"Rate" : round (v , 4 )} for k , v in sorted (rate_sums .items ())
214- }
215- qty_by_types = {
216- k : {"qty_sum" : round (v , 4 )} for k , v in sorted (qty_sums .items ())
217- }
218- total = sum (rate_sums .values ())
219- return by_types , total , qty_by_types
206+ sums [mtype ] += qty_rate * price
207+ by_types = {k : {"Rate" : round (v , 4 )} for k , v in sorted (sums .items ())}
208+ total = sum (sums .values ())
209+ return by_types , total
220210
221211
222212def build_summary (pairs : list [tuple [str , str ]]) -> dict [str , Any ]:
223- # Early exit if no pairs
224- if not pairs :
225- print ("Error: No log entries to summarize" , file = sys .stderr )
226- sys .exit (1 )
227-
228213 log_count = len (pairs )
229214 per_ts = Counter (ts for ts , _ in pairs )
230215 n_ts = len (per_ts )
@@ -233,52 +218,36 @@ def build_summary(pairs: list[tuple[str, str]]) -> dict[str, Any]:
233218 if counts and len (set (counts )) > 1 :
234219 mps = "ERROR"
235220
236- # Parse first and last entries (guaranteed to exist after early exit check)
237- first = json .loads (pairs [0 ][1 ])
238- last = json .loads (pairs [- 1 ][1 ])
239-
240- time_block = {
241- "begin_step" : {
242- "nanosec" : int (pairs [0 ][0 ]),
243- "begin" : first .get ("start" ),
244- "end" : first .get ("end" ),
245- },
246- "end_step" : {
247- "nanosec" : int (pairs [- 1 ][0 ]),
248- "begin" : last .get ("start" ),
249- "end" : last .get ("end" ),
250- },
251- }
252-
253- # Get aggregated data by type
254- by_types , total_r , qty_by_types = aggregate_rates_by_type (pairs )
255-
256- # Get overall time range for by_type entries
257- begin_time = first .get ("start" )
258- end_time = last .get ("end" )
259-
260- # Build flat list of entries
261- rate_list = []
262- for type_name in sorted (by_types .keys ()):
263- entry = {
264- "Begin" : begin_time ,
265- "End" : end_time ,
266- "Qty" : qty_by_types .get (type_name , {}).get ("qty_sum" , 0.0 ),
267- "Rate" : by_types [type_name ]["Rate" ],
268- "Type" : type_name ,
221+ if pairs :
222+ first = json .loads (pairs [0 ][1 ])
223+ last = json .loads (pairs [- 1 ][1 ])
224+ time_block = {
225+ "begin_step" : {
226+ "nanosec" : int (pairs [0 ][0 ]),
227+ "begin" : first .get ("start" ),
228+ "end" : first .get ("end" ),
229+ },
230+ "end_step" : {
231+ "nanosec" : int (pairs [- 1 ][0 ]),
232+ "begin" : last .get ("start" ),
233+ "end" : last .get ("end" ),
234+ },
269235 }
270- rate_list .append (entry )
236+ else :
237+ empty = {"nanosec" : None , "begin" : None , "end" : None }
238+ time_block = {"begin_step" : empty .copy (), "end_step" : empty .copy ()}
271239
240+ by_types , total_r = aggregate_rates_by_type (pairs )
272241 return {
273242 "time" : time_block ,
274- "data_summary " : {
243+ "data_log " : {
275244 "total_timesteps" : n_ts ,
276245 "metrics_per_step" : mps ,
277246 "log_count" : log_count ,
278- "total_rating" : round (total_r , 4 ),
279247 },
280- "by_type" : {
281- "rate" : rate_list ,
248+ "rate" : {
249+ "by_types" : by_types ,
250+ "total" : {"Rating" : round (total_r , 4 )},
282251 },
283252 }
284253
@@ -295,36 +264,10 @@ def write_yaml(path: Path, doc: dict[str, Any]) -> None:
295264 )
296265
297266
298- def _str_to_bool (value : str ) -> bool :
299- """
300- Convert string to boolean.
301-
302- Args:
303- value: String representation of boolean.
304-
305- Returns:
306- Boolean value.
307-
308- Raises:
309- argparse.ArgumentTypeError: If value cannot be converted.
310- """
311- if isinstance (value , bool ):
312- return value
313- if value .lower () in ('yes' , 'true' , 't' , 'y' , '1' ):
314- return True
315- elif value .lower () in ('no' , 'false' , 'f' , 'n' , '0' ):
316- return False
317- else :
318- raise argparse .ArgumentTypeError (
319- f"Boolean value expected. Got: { value } "
320- )
321-
322-
323267def main () -> None :
324268 parser = argparse .ArgumentParser (
325269 description = (
326- "Summarize Loki JSON log entries to YAML "
327- "(time, data_summary, by_type)."
270+ "Summarize Loki JSON log entries to YAML (time, data_log, rate)."
328271 ),
329272 )
330273 parser .add_argument (
@@ -339,21 +282,11 @@ def main() -> None:
339282 )
340283 parser .add_argument (
341284 "--debug" ,
342- type = _str_to_bool ,
343- default = False ,
344- metavar = "BOOL" ,
345- help = (
346- "Enable debug mode: write <stem>_diff.txt with one "
347- "[ts,log] JSON per line (true/false)."
348- ),
349- )
350- parser .add_argument (
351- "--debug_dir" ,
352285 type = Path ,
353286 default = None ,
354287 metavar = "DIR" ,
355288 help = (
356- "Directory for debug output. Required when --debug is enabled ."
289+ "If set, write <stem>_diff.txt with one [ts,log] JSON per line ."
357290 ),
358291 )
359292 args = parser .parse_args ()
@@ -366,24 +299,24 @@ def main() -> None:
366299 out_path = args .output or (args .json .parent / f"{ stem } _total.yml" )
367300 pairs = extract_and_sort (args .json )
368301
369- if args .debug :
370- # Require debug directory when debug mode is enabled
371- if not args .debug_dir :
372- print (
373- "Error: --debug_dir is required when --debug is enabled" ,
374- file = sys .stderr
375- )
376- sys .exit (1 )
377- debug_dir = args .debug_dir
378- debug_dir .mkdir (parents = True , exist_ok = True )
379- dbg_file = debug_dir / f"{ args .json .stem } _diff.txt"
302+ dbg = str (args .debug ).strip () if args .debug is not None else ""
303+ if dbg and dbg != "." :
304+ args .debug .mkdir (parents = True , exist_ok = True )
305+ dbg_file = args .debug / f"{ args .json .stem } _diff.txt"
380306 with dbg_file .open ("w" , encoding = "utf-8" ) as f :
381307 for ts , log_str in pairs :
382308 print (json .dumps ([ts , log_str ], ensure_ascii = False ), file = f )
383309
384310 doc = build_summary (pairs )
385311 write_yaml (out_path , doc )
386312
313+ if doc ["data_log" ]["metrics_per_step" ] == "ERROR" :
314+ per_ts = Counter (ts for ts , _ in pairs )
315+ exp = next (iter (per_ts .values ()), 0 )
316+ for ts in sorted (per_ts , key = int ):
317+ if per_ts [ts ] != exp :
318+ print (ts , per_ts [ts ], file = sys .stdout )
319+
387320
388321if __name__ == "__main__" :
389322 main ()
0 commit comments