@@ -56,8 +56,8 @@ def __init__(self, dist_config: DistributedConfig, args: DictConfig):
5656 "train/grad_norm" : torchmetrics .MeanMetric (),
5757 "train/learning_rate" : torchmetrics .MeanMetric (),
5858 "train/step_time" : torchmetrics .MeanMetric (),
59- "train/tokens_per_second " : torchmetrics .MeanMetric (),
60- "train/unpadded_tokens_per_second " : torchmetrics .MeanMetric (),
59+ "train/tokens_per_second_per_gpu " : torchmetrics .MeanMetric (),
60+ "train/unpadded_tokens_per_second_per_gpu " : torchmetrics .MeanMetric (),
6161 "train/total_unpadded_tokens_per_batch" : torchmetrics .SumMetric (),
6262 "train/perplexity" : torchmetrics .text .Perplexity (ignore_index = - 100 ),
6363 "train/gpu_memory_allocated_max_gb" : torchmetrics .MaxMetric (),
@@ -102,8 +102,8 @@ def log_step(
102102 self .metrics ["train/learning_rate" ].update (lr )
103103 self .metrics ["train/grad_norm" ].update (grad_norm )
104104 self .metrics ["train/step_time" ].update (step_time )
105- self .metrics ["train/tokens_per_second " ].update (num_tokens / step_time )
106- self .metrics ["train/unpadded_tokens_per_second " ].update (num_unpadded_tokens / step_time )
105+ self .metrics ["train/tokens_per_second_per_gpu " ].update (num_tokens / step_time )
106+ self .metrics ["train/unpadded_tokens_per_second_per_gpu " ].update (num_unpadded_tokens / step_time )
107107 self .metrics ["train/total_unpadded_tokens_per_batch" ].update (num_unpadded_tokens / self .logging_frequency )
108108
109109 # Handle sequence packing for torchmetrics calculation.
0 commit comments