Skip to content

Commit c86162b

Browse files
authored
minor esm2 readme and wandb updates (#1348)
Signed-off-by: Peter St. John <pstjohn@nvidia.com>
1 parent 9990901 commit c86162b

2 files changed

Lines changed: 6 additions & 5 deletions

File tree

bionemo-recipes/models/esm2/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,5 +156,6 @@ huggingface-cli upload nvidia/${MODEL_NAME} $PWD/checkpoint_export/${MODEL_NAME}
156156
Or, upload all models at once with:
157157

158158
```bash
159-
for dir in *; do huggingface-cli upload nvidia/$(basename "$dir") "$dir/"; done
159+
cd checkpoint_export
160+
for dir in */; do hf upload --repo-type model nvidia/$(basename "$dir") "$dir/"; done
160161
```

bionemo-recipes/recipes/esm2_native_te/perf_logger.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ def __init__(self, dist_config: DistributedConfig, args: DictConfig):
5656
"train/grad_norm": torchmetrics.MeanMetric(),
5757
"train/learning_rate": torchmetrics.MeanMetric(),
5858
"train/step_time": torchmetrics.MeanMetric(),
59-
"train/tokens_per_second": torchmetrics.MeanMetric(),
60-
"train/unpadded_tokens_per_second": torchmetrics.MeanMetric(),
59+
"train/tokens_per_second_per_gpu": torchmetrics.MeanMetric(),
60+
"train/unpadded_tokens_per_second_per_gpu": torchmetrics.MeanMetric(),
6161
"train/total_unpadded_tokens_per_batch": torchmetrics.SumMetric(),
6262
"train/perplexity": torchmetrics.text.Perplexity(ignore_index=-100),
6363
"train/gpu_memory_allocated_max_gb": torchmetrics.MaxMetric(),
@@ -102,8 +102,8 @@ def log_step(
102102
self.metrics["train/learning_rate"].update(lr)
103103
self.metrics["train/grad_norm"].update(grad_norm)
104104
self.metrics["train/step_time"].update(step_time)
105-
self.metrics["train/tokens_per_second"].update(num_tokens / step_time)
106-
self.metrics["train/unpadded_tokens_per_second"].update(num_unpadded_tokens / step_time)
105+
self.metrics["train/tokens_per_second_per_gpu"].update(num_tokens / step_time)
106+
self.metrics["train/unpadded_tokens_per_second_per_gpu"].update(num_unpadded_tokens / step_time)
107107
self.metrics["train/total_unpadded_tokens_per_batch"].update(num_unpadded_tokens / self.logging_frequency)
108108

109109
# Handle sequence packing for torchmetrics calculation.

0 commit comments

Comments
 (0)