Skip to content

Commit 9ea5211

Browse files
author
maxtext authors
committed
Merge pull request #1677 from AI-Hypercomputer:disable_goodput_logging
PiperOrigin-RevId: 754102205
2 parents b5cf469 + 72a60dd commit 9ea5211

5 files changed

Lines changed: 24 additions & 0 deletions

File tree

MaxText/tests/integration_tests/gradient_accumulation_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ def test_grad_accumulate_same_loss(self):
4949
r"dataset_path=gs://maxtext-dataset",
5050
"gradient_clipping_threshold=0", # Ensures we are testing raw scales of gradients (clipping off)
5151
"enable_checkpointing=False",
52+
"enable_goodput_recording=False",
5253
"base_emb_dim=256",
5354
"base_num_decoder_layers=4",
5455
rf"tokenizer_path={os.path.join(os.path.dirname(PKG_DIR), 'assets', 'tokenizer.llama2')}",

MaxText/tests/integration_tests/standalone_dl_ckpt_test.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ def test_standalone_dataloader(self):
4949
"dataset_path=gs://maxtext-dataset",
5050
"steps=100",
5151
"enable_checkpointing=false",
52+
"enable_goodput_recording=False",
5253
rf"tokenizer_path={os.path.join(os.path.dirname(PKG_DIR), 'assets', 'tokenizer.llama2')}",
5354
)
5455
) # need to pass relative path to tokenizer
@@ -74,6 +75,7 @@ def test_standalone_checkpointer(self):
7475
"enable_checkpointing=True",
7576
"checkpoint_period=50",
7677
"async_checkpointing=False",
78+
"enable_goodput_recording=False",
7779
)
7880
)
7981
# restore at 50 and checkpoint at 100
@@ -93,6 +95,7 @@ def test_standalone_checkpointer(self):
9395
"enable_checkpointing=True",
9496
"checkpoint_period=50",
9597
"async_checkpointing=False",
98+
"enable_goodput_recording=False",
9699
)
97100
)
98101

MaxText/tests/integration_tests/train_tests.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ class TrainTests(unittest.TestCase):
4545
r"dataset_path=gs://maxtext-dataset",
4646
"steps=2",
4747
"enable_checkpointing=False",
48+
"enable_goodput_recording=False",
4849
"dataset_type=synthetic",
4950
rf"tokenizer_path={os.path.join(os.path.dirname(PKG_DIR), 'assets', 'tokenizer.llama2')}",
5051
],
@@ -56,6 +57,7 @@ class TrainTests(unittest.TestCase):
5657
r"dataset_path=gs://maxtext-dataset",
5758
"steps=2",
5859
"enable_checkpointing=False",
60+
"enable_goodput_recording=False",
5961
"per_device_batch_size=0.25",
6062
"ici_tensor_parallelism=4",
6163
rf"tokenizer_path={os.path.join(os.path.dirname(PKG_DIR), 'assets', 'tokenizer.llama2')}",
@@ -68,6 +70,7 @@ class TrainTests(unittest.TestCase):
6870
r"dataset_path=gs://maxtext-dataset",
6971
"steps=2",
7072
"ici_tensor_transpose_parallelism=4",
73+
"enable_goodput_recording=False",
7174
rf"tokenizer_path={os.path.join(os.path.dirname(PKG_DIR), 'assets', 'tokenizer.llama2')}",
7275
],
7376
"int8": [ # tests base config with int8
@@ -79,6 +82,7 @@ class TrainTests(unittest.TestCase):
7982
"quantization=int8",
8083
"steps=2",
8184
"enable_checkpointing=False",
85+
"enable_goodput_recording=False",
8286
rf"tokenizer_path={os.path.join(os.path.dirname(PKG_DIR), 'assets', 'tokenizer.llama2')}",
8387
],
8488
"fp8": [ # tests base config with fp8
@@ -90,6 +94,7 @@ class TrainTests(unittest.TestCase):
9094
"quantization=fp8",
9195
"steps=2",
9296
"enable_checkpointing=False",
97+
"enable_goodput_recording=False",
9398
rf"tokenizer_path={os.path.join(os.path.dirname(PKG_DIR), 'assets', 'tokenizer.llama2')}",
9499
],
95100
"nanoo_fp8": [ # tests base config with nanoo_fp8
@@ -101,6 +106,7 @@ class TrainTests(unittest.TestCase):
101106
"quantization=nanoo_fp8",
102107
"steps=2",
103108
"enable_checkpointing=False",
109+
"enable_goodput_recording=False",
104110
rf"tokenizer_path={os.path.join(os.path.dirname(PKG_DIR), 'assets', 'tokenizer.llama2')}",
105111
],
106112
"dropout": [ # tests base config with dropout
@@ -111,6 +117,7 @@ class TrainTests(unittest.TestCase):
111117
r"dataset_path=gs://maxtext-dataset",
112118
"steps=2",
113119
"enable_checkpointing=False",
120+
"enable_goodput_recording=False",
114121
"max_target_length=128",
115122
"per_device_batch_size=1",
116123
"dropout_rate=0.02",
@@ -123,6 +130,7 @@ class TrainTests(unittest.TestCase):
123130
"run_name=runner_test",
124131
"steps=2",
125132
"enable_checkpointing=False",
133+
"enable_goodput_recording=False",
126134
"dataset_type=hf",
127135
"hf_path=parquet",
128136
r"hf_train_files=gs://maxtext-dataset/hf/c4/c4-train-00000-of-01637.parquet",
@@ -235,6 +243,7 @@ def test_gpu_context_parallelism(self):
235243
r"dataset_path=gs://maxtext-dataset",
236244
"steps=10",
237245
"enable_checkpointing=False",
246+
"enable_goodput_recording=False",
238247
"attention=cudnn_flash_te",
239248
"ici_fsdp_parallelism=2",
240249
"ici_context_parallelism=2",

MaxText/tests/pipeline_parallelism_test.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ def test_circular_minimum_microbatches_same_output_and_grad(self):
157157
config = pyconfig.initialize(
158158
[sys.argv[0], os.path.join(PKG_DIR, "configs", "base.yml")],
159159
enable_checkpointing=False,
160+
enable_goodput_recording=False,
160161
run_name="circular_minimum_microbatches",
161162
max_target_length=128,
162163
base_emb_dim=28,
@@ -173,6 +174,7 @@ def test_circular_extra_microbatches_same_output_and_grad(self):
173174
config = pyconfig.initialize(
174175
[sys.argv[0], os.path.join(PKG_DIR, "configs", "base.yml")],
175176
enable_checkpointing=False,
177+
enable_goodput_recording=False,
176178
run_name="circular_extra_microbatches",
177179
max_target_length=128,
178180
base_emb_dim=28,
@@ -189,6 +191,7 @@ def test_circular_ag_once(self):
189191
config = pyconfig.initialize(
190192
[sys.argv[0], os.path.join(PKG_DIR, "configs", "base.yml")],
191193
enable_checkpointing=False,
194+
enable_goodput_recording=False,
192195
run_name="circular_ag_once",
193196
max_target_length=128,
194197
base_emb_dim=28,
@@ -239,6 +242,7 @@ def test_full_train_circular(self):
239242
"dataset_type=synthetic",
240243
"steps=3",
241244
"enable_checkpointing=False",
245+
"enable_goodput_recording=False",
242246
"ici_pipeline_parallelism=4",
243247
"num_layers_per_pipeline_stage=2",
244248
"num_pipeline_microbatches=8",
@@ -253,6 +257,7 @@ def test_delay_activation_forwarding_same_output_and_grad(self):
253257
config = pyconfig.initialize(
254258
[sys.argv[0], os.path.join(PKG_DIR, "configs", "base.yml")],
255259
enable_checkpointing=False,
260+
enable_goodput_recording=False,
256261
run_name="activation_forwarding",
257262
max_target_length=128,
258263
base_emb_dim=28,
@@ -287,6 +292,7 @@ def test_full_train_non_circular(self):
287292
"dataset_type=synthetic",
288293
"steps=3",
289294
"enable_checkpointing=False",
295+
"enable_goodput_recording=False",
290296
"ici_pipeline_parallelism=4",
291297
"num_layers_per_pipeline_stage=8",
292298
"num_pipeline_microbatches=8",
@@ -318,6 +324,7 @@ def test_subset_layers(self):
318324
"dataset_type=synthetic",
319325
"steps=3",
320326
"enable_checkpointing=False",
327+
"enable_goodput_recording=False",
321328
"ici_pipeline_parallelism=4",
322329
"num_layers_per_pipeline_stage=1",
323330
"num_pipeline_repeats=2",
@@ -351,6 +358,7 @@ def test_full_train_fp8(self):
351358
"dataset_type=synthetic",
352359
"steps=3",
353360
"enable_checkpointing=False",
361+
"enable_goodput_recording=False",
354362
"ici_pipeline_parallelism=4",
355363
rf"tokenizer_path={os.path.join(os.path.dirname(PKG_DIR), 'assets', 'tokenizer.llama2')}",
356364
"quantization=fp8",
@@ -382,6 +390,7 @@ def test_full_train_nanoo_fp8(self):
382390
"dataset_type=synthetic",
383391
"steps=3",
384392
"enable_checkpointing=False",
393+
"enable_goodput_recording=False",
385394
"ici_pipeline_parallelism=4",
386395
rf"tokenizer_path={os.path.join(os.path.dirname(PKG_DIR), 'assets', 'tokenizer.llama2')}",
387396
"quantization=nanoo_fp8",

MaxText/tests/simple_decoder_layer_test.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ def test_simple_decoder_layer(self):
3232
r"dataset_path=gs://maxtext-dataset",
3333
"decoder_block=simple",
3434
"enable_checkpointing=False",
35+
"enable_goodput_recording=False",
3536
rf"tokenizer_path={os.path.join(os.path.dirname(PKG_DIR), 'assets', 'tokenizer.llama2')}",
3637
"steps=3",
3738
]
@@ -48,6 +49,7 @@ def test_mlp_decoder_layer(self):
4849
r"dataset_path=gs://maxtext-dataset",
4950
"decoder_block=simple_mlp",
5051
"enable_checkpointing=False",
52+
"enable_goodput_recording=False",
5153
rf"tokenizer_path={os.path.join(os.path.dirname(PKG_DIR), 'assets', 'tokenizer.llama2')}",
5254
"steps=3",
5355
]

0 commit comments

Comments
 (0)