Skip to content

Commit 9537a47

Browse files
committed
br: adjust memory thresholds [skip ci]
Signed-off-by: Brian Roland <broland@nvidia.com>
1 parent 7330d87 commit 9537a47

3 files changed

Lines changed: 14 additions & 10 deletions

File tree

sub-packages/bionemo-evo2/tests/bionemo/evo2/conftest.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@ def pytest_sessionstart(session):
3838
def pytest_sessionfinish(session, exitstatus):
3939
"""Called at the end of the test session."""
4040
if torch.cuda.is_available():
41-
peak_memory = torch.cuda.max_memory_allocated()
42-
final_memory = torch.cuda.memory_allocated()
4341
print(
4442
f"""
4543
sub-packages/bionemo-evo2/tests/bionemoe/evo2: Test session complete

sub-packages/bionemo-evo2/tests/bionemo/evo2/test_evo2.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@
4848
logger.setLevel(logging.DEBUG) # Capture all levels in the logger itself
4949

5050

51+
MEM_REQUIREMENT_1B_GB = 18 # add 0.6 GB to max mem reserved, and round up
52+
MEM_REQUIREMENT_7B_GB = 48
53+
54+
55+
5156
def load_weights_sharded_inplace_nemo2_to_mcore(
5257
model: MegatronModelType,
5358
distributed_checkpoint_dir: str | Path,
@@ -365,7 +370,7 @@ def check_matchrate(*, ckpt_name, matchrate, assert_matchrate=True):
365370
def test_forward(sequences: list[str], ckpt_name: str, expected_matchpercents: list[float]):
366371
assert len(sequences) > 0
367372
gb_available = torch.cuda.mem_get_info()[0] / 1024**3
368-
if (gb_available < 20 and "1b" in ckpt_name) or (gb_available < 40 and "7b" in ckpt_name):
373+
if (gb_available < MEM_REQUIREMENT_1B_GB and "1b" in ckpt_name) or (gb_available < MEM_REQUIREMENT_7B_GB and "7b" in ckpt_name):
369374
pytest.skip(
370375
f"Inference API requires more than 38GB of memory for 1b models, or 50GB for 7b models. {gb_available=}"
371376
)
@@ -429,7 +434,7 @@ def test_forward_manual(sequences: list[str], ckpt_name: str, expected_matchperc
429434
is_fp8_supported, compute_capability, device_info = check_fp8_support(torch.cuda.current_device())
430435
skip = "evo2/1b-8k:" in ckpt_name and not is_fp8_supported
431436
gb_available = torch.cuda.mem_get_info()[0] / 1024**3
432-
if (gb_available < 20 and flash_decode) or (gb_available < 40 and flash_decode and "7b" in ckpt_name):
437+
if (gb_available < MEM_REQUIREMENT_1B_GB and flash_decode) or (gb_available < MEM_REQUIREMENT_7B_GB and flash_decode and "7b" in ckpt_name):
433438
pytest.skip(
434439
f"Inference API requires more than 38GB of memory for 1b models, or 50GB for 7b models. {gb_available=}"
435440
)
@@ -544,7 +549,7 @@ def test_batch_generate(
544549
assert len(sequences) > 0
545550
is_fp8_supported, compute_capability, device_info = check_fp8_support(torch.cuda.current_device())
546551
gb_available = torch.cuda.mem_get_info()[0] / 1024**3
547-
if (gb_available < 20 and "1b" in ckpt_name) or (gb_available < 40 and "7b" in ckpt_name):
552+
if (gb_available < MEM_REQUIREMENT_1B_GB and "1b" in ckpt_name) or (gb_available < MEM_REQUIREMENT_7B_GB and "7b" in ckpt_name):
548553
pytest.skip(
549554
f"Inference API requires more than 38GB of memory for 1b models, or 50GB for 7b models. {gb_available=}"
550555
)
@@ -615,7 +620,7 @@ def test_batch_generate_coding_sequences(
615620
):
616621
assert len(coding_sequences) > 0
617622
gb_available = torch.cuda.mem_get_info()[0] / 1024**3
618-
if (gb_available < 20 and "1b" in ckpt_name) or (gb_available < 40 and "7b" in ckpt_name):
623+
if (gb_available < MEM_REQUIREMENT_1B_GB and "1b" in ckpt_name) or (gb_available < MEM_REQUIREMENT_7B_GB and "7b" in ckpt_name):
619624
pytest.skip(
620625
f"Inference API requires more than 38GB of memory for 1b models, or 50GB for 7b models. {gb_available=}"
621626
)
@@ -724,7 +729,7 @@ def test_generate_speed(
724729
):
725730
is_fp8_supported, compute_capability, device_info = check_fp8_support(torch.cuda.current_device())
726731
gb_available = torch.cuda.mem_get_info()[0] / 1024**3
727-
if (gb_available < 20 and "1b" in ckpt_name) or (gb_available < 40 and "7b" in ckpt_name):
732+
if (gb_available < MEM_REQUIREMENT_1B_GB and "1b" in ckpt_name) or (gb_available < MEM_REQUIREMENT_7B_GB and "7b" in ckpt_name):
728733
pytest.skip(
729734
f"Inference API requires more than 38GB of memory for 1b models, or 50GB for 7b models. {gb_available=}"
730735
)

sub-packages/bionemo-testing/src/bionemo/testing/torch.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,10 @@ def get_device_and_memory_allocated() -> str:
7272
current device index: {current_device_index}
7373
current device uuid: {props.uuid}
7474
current device name: {props.name}
75-
memory available: {torch.cuda.mem_get_info()[0] / 1024**3:.3f} GB
76-
memory allocated: {torch.cuda.memory_allocated() / 1024**3:.3f} GB
77-
max memory allocated: {torch.cuda.max_memory_allocated() / 1024**3:.3f} GB
75+
memory, total on device: {torch.cuda.mem_get_info()[1] / 1024**3:.3f} GB
76+
memory, available on device: {torch.cuda.mem_get_info()[0] / 1024**3:.3f} GB
77+
memory allocated for tensors etc: {torch.cuda.memory_allocated() / 1024**3:.3f} GB
78+
max memory reserved for tensors etc: {torch.cuda.max_memory_allocated() / 1024**3:.3f} GB
7879
"""
7980
)
8081
return message

0 commit comments

Comments
 (0)