test(esm2): update perf_logger tests for split _attn_work_*_accum buffers

gagank1 · gagank1 · commit 44172ae3d46f · 2026-04-24T12:20:13.000-07:00
The single self._attn_work_accum was split into _attn_work_unpadded_accum
and _attn_work_padded_accum to support the unpadded/padded MFU distinction,
but two tests in esm2_native_te still referenced the old single name,
failing in CI with AttributeError. Update the assertions to check both
buffers. With no attention_mask and no cu_seq_lens on the test batch,
both paths fall through to shape-synthesis and hold the same value, so
each test now asserts both accumulators hold the expected amount.

No changes needed in llama3 / opengenome2_llama / codonfm: their test
files don't exercise _attn_work_accum lifecycle directly.

Signed-off-by: Gagan Kaushik &lt;gkaushik@nvidia.com&gt;
diff --git a/bionemo-recipes/recipes/esm2_native_te/tests/test_perf_logger.py b/bionemo-recipes/recipes/esm2_native_te/tests/test_perf_logger.py
@@ -213,7 +213,7 @@ def test_num_tokens_accumulates_across_grad_acc(self, mock_wandb, mock_tqdm):
         assert perf_logger.running_loss.item() == pytest.approx(4.0)
 
     def test_attn_work_accumulates_across_grad_acc(self, mock_wandb, mock_tqdm):
-        """_attn_work_accum sums Σ(Lᵢ²) over all micro-batches when log_mfu=True."""
+        """Both _attn_work_*_accum buffers sum Σ(Lᵢ²) over all micro-batches when log_mfu=True."""
         dist_config = DistributedConfig()
         args = _make_args(logging_frequency=1, log_mfu=True, max_seq_length=128)
         perf_logger = PerfLogger(dist_config, args, model_config_dict=_esm_cfg())
@@ -230,8 +230,11 @@ def test_attn_work_accumulates_across_grad_acc(self, mock_wandb, mock_tqdm):
             outputs.logits = torch.randn(2, 64, ESM2_VOCAB, device=device)
             perf_logger.log_micro_step(step=1, batch=batch, outputs=outputs)
 
-        # Accumulator should hold 3 * 2 * 64² = 24576
-        assert perf_logger._attn_work_accum.item() == 3 * 2 * 64 * 64
+        # With no attention_mask and no cu_seq_lens, both unpadded and padded paths fall
+        # through to the shape-synthesis branch, so both accumulators hold 3 * 2 * 64² = 24576.
+        expected = 3 * 2 * 64 * 64
+        assert perf_logger._attn_work_unpadded_accum.item() == expected
+        assert perf_logger._attn_work_padded_accum.item() == expected
 
     def test_reset_on_log_boundary(self, mock_wandb, mock_tqdm):
         """Calling log_step on a logging-boundary step drains all accumulators."""
@@ -248,5 +251,6 @@ def test_reset_on_log_boundary(self, mock_wandb, mock_tqdm):
         assert perf_logger.grad_acc_step_count == 0
         assert perf_logger.num_tokens == 0
         assert perf_logger.num_unpadded_tokens.item() == 0
-        assert perf_logger._attn_work_accum.item() == 0
+        assert perf_logger._attn_work_unpadded_accum.item() == 0
+        assert perf_logger._attn_work_padded_accum.item() == 0
         assert perf_logger.running_loss.item() == pytest.approx(0.0)