Skip to content

Commit b2153a3

Browse files
Merge pull request #4081 from AI-Hypercomputer:fix-22b-sh-oom
PiperOrigin-RevId: 927515131
2 parents 9c9e095 + 0ad8d3f commit b2153a3

1 file changed

Lines changed: 1 addition & 1 deletion

File tree

  • src/maxtext/configs/tpu/v4

src/maxtext/configs/tpu/v4/22b.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,6 @@ fi
5656
# Train
5757
export LIBTPU_INIT_ARGS="--xla_enable_async_all_gather=true TPU_MEGACORE=MEGACORE_DENSE"
5858
python3 -m maxtext.trainers.pre_train.$EXECUTABLE "${MAXTEXT_CONFIGS_DIR:-${MAXTEXT_REPO_ROOT:-$PWD}/src/maxtext/configs}"//base.yml\
59-
ici_fsdp_parallelism=64 steps=10 per_device_batch_size=13 profiler=xplane remat_policy=full\
59+
ici_fsdp_parallelism=64 steps=10 per_device_batch_size=13 profiler=xplane remat_policy=full attention=flash num_vocab_tiling=8\
6060
base_emb_dim=6144 base_num_kv_heads=24 base_num_query_heads=24 base_mlp_dim=24576 base_num_decoder_layers=48\
6161
base_output_directory=$OUTPUT_PATH dataset_path=$DATASET_PATH

0 commit comments

Comments
 (0)