Skip to content

Commit fc97c2a

Browse files
committed
Remove redundant test
Signed-off-by: John St John <jstjohn@nvidia.com>
1 parent 2f32856 commit fc97c2a

1 file changed

Lines changed: 0 additions & 94 deletions

File tree

sub-packages/bionemo-evo2/tests/bionemo/evo2/run/test_train.py

Lines changed: 0 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,11 @@
1717
# limitations under the License.
1818
import argparse
1919
import io
20-
import os
2120
import shlex
2221
from contextlib import redirect_stderr, redirect_stdout
2322
from typing import Tuple
2423

2524
import pytest
26-
from lightning.fabric.plugins.environments.lightning import find_free_network_port
2725
from nemo import lightning as nl
2826
from transformer_engine.pytorch.fp8 import check_fp8_support
2927

@@ -152,98 +150,6 @@ def test_train_evo2_stops(tmp_path):
152150
assert "train_step_timing in s" in trainer.logged_metrics
153151

154152

155-
@pytest.mark.slow
156-
@pytest.mark.parametrize("model_size", ["7b_nv", "7b_arc_longcontext"])
157-
def test_train_single_gpu(tmp_path, model_size: str):
158-
"""
159-
This test runs them single gpu evo2 training command with sample data in a temporary directory.
160-
"""
161-
num_steps = 7
162-
open_port = find_free_network_port()
163-
# a local copy of the environment
164-
env = dict(**os.environ)
165-
env["MASTER_PORT"] = str(open_port)
166-
# Part 1: Make sure training runs for only --early-stop-on-step steps
167-
additional_args1 = [
168-
"--result-dir",
169-
str(tmp_path),
170-
"--model-size",
171-
model_size,
172-
"--num-layers",
173-
str(4),
174-
"--hybrid-override-pattern",
175-
"SDH*",
176-
"--no-activation-checkpointing",
177-
"--use-precision-aware-optimizer",
178-
"--add-bias-output",
179-
"--bf16-main-grads",
180-
"--val-check-interval",
181-
str(5),
182-
"--max-steps",
183-
str(num_steps),
184-
"--early-stop-on-step",
185-
str(num_steps - 2),
186-
"--warmup-steps",
187-
str(1),
188-
"--seq-length",
189-
str(128),
190-
"--wandb-offline",
191-
"--wandb-anonymous",
192-
"--mock-data",
193-
]
194-
args1 = parse_args(args=additional_args1)
195-
stdout_buf, stderr_buf = io.StringIO(), io.StringIO()
196-
with redirect_stdout(stdout_buf), redirect_stderr(stderr_buf), distributed_model_parallel_state():
197-
train(args=args1)
198-
train_stdout = stdout_buf.getvalue()
199-
train_lines = train_stdout.split("\n")
200-
iteration_lines = [line for line in train_lines if "Training epoch" in line]
201-
assert len(iteration_lines) == 5
202-
iteration_line_1 = iteration_lines[0]
203-
# No strong opinion on how the total should be computed in the case of early stopping. We allow either for now
204-
# unless there is an issue, such as with the LR scheduler...
205-
# TODO: Add a test somewhere that covers that early stopping callback has no impact on the LR scheduler
206-
assert "iteration 0/4" in iteration_line_1 or "iteration 0/6" in iteration_line_1
207-
iteration_line_final = iteration_lines[-1]
208-
assert "iteration 4/4" in iteration_line_final or "iteration 4/6" in iteration_line_final
209-
210-
# Part 2: Make sure training picks up where it left off
211-
additional_args2 = [
212-
"--result-dir",
213-
str(tmp_path),
214-
"--model-size",
215-
model_size,
216-
"--num-layers",
217-
str(4),
218-
"--hybrid-override-pattern",
219-
"SDH*",
220-
"--no-activation-checkpointing",
221-
"--use-precision-aware-optimizer",
222-
"--add-bias-output",
223-
"--max-steps",
224-
str(num_steps),
225-
"--warmup-steps",
226-
str(1),
227-
"--seq-length",
228-
str(128),
229-
"--wandb-offline",
230-
"--wandb-anonymous",
231-
"--mock-data",
232-
]
233-
args2 = parse_args(args=additional_args2)
234-
stdout_buf, stderr_buf = io.StringIO(), io.StringIO()
235-
with redirect_stdout(stdout_buf), redirect_stderr(stderr_buf), distributed_model_parallel_state():
236-
train(args=args2)
237-
train_stdout = stdout_buf.getvalue()
238-
train_lines = train_stdout.split("\n")
239-
iteration_lines = [line for line in train_lines if "Training epoch" in line]
240-
assert len(iteration_lines) == 2
241-
iteration_line_1 = iteration_lines[0]
242-
assert "iteration 5/6" in iteration_line_1
243-
iteration_line_2 = iteration_lines[1]
244-
assert "iteration 6/6" in iteration_line_2
245-
246-
247153
@pytest.mark.parametrize(
248154
"additional_args",
249155
[

0 commit comments

Comments
 (0)