br: adjust memory thresholds [skip ci]

broland-hat · broland-hat · commit 9537a47af433 · 2025-08-20T18:54:37.000Z
Signed-off-by: Brian Roland &lt;broland@nvidia.com&gt;
diff --git a/sub-packages/bionemo-evo2/tests/bionemo/evo2/conftest.py b/sub-packages/bionemo-evo2/tests/bionemo/evo2/conftest.py
@@ -38,8 +38,6 @@ def pytest_sessionstart(session):
 def pytest_sessionfinish(session, exitstatus):
     """Called at the end of the test session."""
     if torch.cuda.is_available():
-        peak_memory = torch.cuda.max_memory_allocated()
-        final_memory = torch.cuda.memory_allocated()
         print(
             f"""
             sub-packages/bionemo-evo2/tests/bionemoe/evo2: Test session complete
diff --git a/sub-packages/bionemo-evo2/tests/bionemo/evo2/test_evo2.py b/sub-packages/bionemo-evo2/tests/bionemo/evo2/test_evo2.py
@@ -48,6 +48,11 @@
 logger.setLevel(logging.DEBUG)  # Capture all levels in the logger itself
 
 
+MEM_REQUIREMENT_1B_GB = 18  # add 0.6 GB to max mem reserved, and round up
+MEM_REQUIREMENT_7B_GB = 48
+
+
+
 def load_weights_sharded_inplace_nemo2_to_mcore(
     model: MegatronModelType,
     distributed_checkpoint_dir: str | Path,
@@ -365,7 +370,7 @@ def check_matchrate(*, ckpt_name, matchrate, assert_matchrate=True):
 def test_forward(sequences: list[str], ckpt_name: str, expected_matchpercents: list[float]):
     assert len(sequences) > 0
     gb_available = torch.cuda.mem_get_info()[0] / 1024**3
-    if (gb_available < 20 and "1b" in ckpt_name) or (gb_available < 40 and "7b" in ckpt_name):
+    if (gb_available < MEM_REQUIREMENT_1B_GB and "1b" in ckpt_name) or (gb_available < MEM_REQUIREMENT_7B_GB and "7b" in ckpt_name):
         pytest.skip(
             f"Inference API requires more than 38GB of memory for 1b models, or 50GB for 7b models. {gb_available=}"
         )
@@ -429,7 +434,7 @@ def test_forward_manual(sequences: list[str], ckpt_name: str, expected_matchperc
     is_fp8_supported, compute_capability, device_info = check_fp8_support(torch.cuda.current_device())
     skip = "evo2/1b-8k:" in ckpt_name and not is_fp8_supported
     gb_available = torch.cuda.mem_get_info()[0] / 1024**3
-    if (gb_available < 20 and flash_decode) or (gb_available < 40 and flash_decode and "7b" in ckpt_name):
+    if (gb_available < MEM_REQUIREMENT_1B_GB and flash_decode) or (gb_available < MEM_REQUIREMENT_7B_GB and flash_decode and "7b" in ckpt_name):
         pytest.skip(
             f"Inference API requires more than 38GB of memory for 1b models, or 50GB for 7b models. {gb_available=}"
         )
@@ -544,7 +549,7 @@ def test_batch_generate(
     assert len(sequences) > 0
     is_fp8_supported, compute_capability, device_info = check_fp8_support(torch.cuda.current_device())
     gb_available = torch.cuda.mem_get_info()[0] / 1024**3
-    if (gb_available < 20 and "1b" in ckpt_name) or (gb_available < 40 and "7b" in ckpt_name):
+    if (gb_available < MEM_REQUIREMENT_1B_GB and "1b" in ckpt_name) or (gb_available < MEM_REQUIREMENT_7B_GB and "7b" in ckpt_name):
         pytest.skip(
             f"Inference API requires more than 38GB of memory for 1b models, or 50GB for 7b models. {gb_available=}"
         )
@@ -615,7 +620,7 @@ def test_batch_generate_coding_sequences(
 ):
     assert len(coding_sequences) > 0
     gb_available = torch.cuda.mem_get_info()[0] / 1024**3
-    if (gb_available < 20 and "1b" in ckpt_name) or (gb_available < 40 and "7b" in ckpt_name):
+    if (gb_available < MEM_REQUIREMENT_1B_GB and "1b" in ckpt_name) or (gb_available < MEM_REQUIREMENT_7B_GB and "7b" in ckpt_name):
         pytest.skip(
             f"Inference API requires more than 38GB of memory for 1b models, or 50GB for 7b models. {gb_available=}"
         )
@@ -724,7 +729,7 @@ def test_generate_speed(
 ):
     is_fp8_supported, compute_capability, device_info = check_fp8_support(torch.cuda.current_device())
     gb_available = torch.cuda.mem_get_info()[0] / 1024**3
-    if (gb_available < 20 and "1b" in ckpt_name) or (gb_available < 40 and "7b" in ckpt_name):
+    if (gb_available < MEM_REQUIREMENT_1B_GB and "1b" in ckpt_name) or (gb_available < MEM_REQUIREMENT_7B_GB and "7b" in ckpt_name):
         pytest.skip(
             f"Inference API requires more than 38GB of memory for 1b models, or 50GB for 7b models. {gb_available=}"
         )
diff --git a/sub-packages/bionemo-testing/src/bionemo/testing/torch.py b/sub-packages/bionemo-testing/src/bionemo/testing/torch.py
@@ -72,9 +72,10 @@ def get_device_and_memory_allocated() -> str:
         current device index: {current_device_index}
         current device uuid: {props.uuid}
         current device name: {props.name}
-        memory available: {torch.cuda.mem_get_info()[0] / 1024**3:.3f} GB
-        memory allocated: {torch.cuda.memory_allocated() / 1024**3:.3f} GB
-        max memory allocated: {torch.cuda.max_memory_allocated() / 1024**3:.3f} GB
+        memory, total on device: {torch.cuda.mem_get_info()[1] / 1024**3:.3f} GB
+        memory, available on device: {torch.cuda.mem_get_info()[0] / 1024**3:.3f} GB        
+        memory allocated for tensors etc: {torch.cuda.memory_allocated() / 1024**3:.3f} GB
+        max memory reserved for tensors etc: {torch.cuda.max_memory_allocated() / 1024**3:.3f} GB
         """
     )
     return message