Skip to content

Commit 823497e

Browse files
committed
consolidate the step 5 (checkpoint conversion back to HF)
1 parent f82653a commit 823497e

5 files changed

Lines changed: 31 additions & 30 deletions

File tree

tests/end_to_end/tpu/gemma3/4b/test_gemma3.sh

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
# 1. Run inference on the pre-converted checkpoint.
77
# 2. Run pre-training starting from the pre-converted checkpoint.
88
# 3. Run inference on the checkpoint produced by the pre-training run.
9-
# 4. Convert the checkpoint produced by the pre-training run back to HuggingFace format.
109

1110
# Usage:
1211
# export HF_TOKEN=<your Hugging Face access token>
@@ -85,10 +84,4 @@ else
8584
scan_layers=false prompt='I love to' attention=\'dot_product\'
8685
fi
8786

88-
# Step 5: Convert the checkpoint from MaxText format to Hugging Face format
89-
python3 -m maxtext.checkpoint_conversion.to_huggingface \
90-
model_name=${MODEL_NAME} tokenizer_type="huggingface" \
91-
load_parameters_path=${BASE_OUTPUT_DIRECTORY}/train/${run_id}/checkpoints/4/items \
92-
base_output_directory=${BASE_OUTPUT_DIRECTORY}/to_huggingface/unscanned/${run_id} \
93-
use_multimodal=${USE_MULTIMODAL} \
94-
scan_layers=false
87+

tests/end_to_end/tpu/gemma3/4b/test_gemma3_multimodal_sft.sh

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
# 1. Run inference on the pre-converted checkpoint.
77
# 2. Run SFT of Gemma3-4B on ChartQA dataset with the converted checkpoint.
88
# 3. Run inference on the checkpoint produced by the SFT run.
9-
# 4. Convert the checkpoint produced by the SFT run back to HuggingFace format.
109

1110
# Usage:
1211
# export HF_TOKEN=<your Hugging Face access token>
@@ -79,9 +78,3 @@ python3 -m maxtext.inference.decode \
7978
image_path=\'tests/assets/test_image.jpg\' \
8079
attention=\'dot_product\'
8180

82-
# Step 5: Convert the SFT checkpoint back to HuggingFace format
83-
python3 -m maxtext.checkpoint_conversion.to_huggingface \
84-
model_name=${MODEL_NAME} \
85-
load_parameters_path=${BASE_OUTPUT_DIRECTORY}/multimodal/sft/${run_id}/checkpoints/4/items \
86-
base_output_directory=${BASE_OUTPUT_DIRECTORY}/to_huggingface/unscanned/${run_id} \
87-
use_multimodal=true scan_layers=false

tests/end_to_end/tpu/gemma3/4b/test_gemma3_rl.sh

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
# 1. Run inference on the pre-converted checkpoint.
77
# 2. Run RL starting from the pre-converted checkpoint.
88
# 3. Run inference on the checkpoint produced by the RL run.
9-
# 4. Convert the checkpoint produced by the RL run back to HuggingFace format.
109

1110
# Usage:
1211
# export HF_TOKEN=<your Hugging Face access token>
@@ -58,10 +57,4 @@ python3 -m maxtext.inference.vllm_decode \
5857
hbm_utilization_vllm=0.5 \
5958
prompt='Suggest some famous landmarks in London.' \
6059
use_chat_template=True scan_layers=true
61-
62-
# Step 5: Convert the checkpoint from MaxText format to Hugging Face format
63-
python3 -m maxtext.checkpoint_conversion.to_huggingface \
64-
model_name=${MODEL_NAME} \
65-
load_parameters_path=${BASE_OUTPUT_DIRECTORY}/rl/${run_id}/checkpoints/actor/5/model_params \
66-
base_output_directory=${BASE_OUTPUT_DIRECTORY}/to_huggingface/unscanned/${run_id} \
67-
use_multimodal=false scan_layers=true
60+

tests/end_to_end/tpu/gemma3/4b/test_gemma3_sft.sh

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
# 1. Run inference on the pre-converted checkpoint.
77
# 2. Run SFT starting from the pre-converted checkpoint.
88
# 3. Run inference on the checkpoint produced by the SFT run.
9-
# 4. Convert the checkpoint produced by the SFT run back to HuggingFace format.
109

1110
# Usage:
1211
# export HF_TOKEN=<your Hugging Face access token>
@@ -55,9 +54,4 @@ python3 -m maxtext.inference.vllm_decode \
5554
prompt="Suggest some famous landmarks in London." \
5655
use_chat_template=True scan_layers=true
5756

58-
# Step 5: Convert the checkpoint from MaxText format to Hugging Face format
59-
python3 -m maxtext.checkpoint_conversion.to_huggingface \
60-
model_name=${MODEL_NAME} \
61-
load_parameters_path=${BASE_OUTPUT_DIRECTORY}/sft/${run_id}/checkpoints/5/model_params \
62-
base_output_directory=${BASE_OUTPUT_DIRECTORY}/to_huggingface/unscanned/${run_id} \
63-
use_multimodal=false scan_layers=true
57+
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/bin/bash
2+
3+
# Converts a MaxText checkpoint to a Hugging Face model checkpoint.
4+
5+
# Usage:
6+
# export HF_TOKEN=<your Hugging Face access token>
7+
# export RUN_ID=$(date +%Y-%m-%d-%H-%M-%S)
8+
# bash test_gemma3_to_hf.sh $RUN_ID $CHECKPOINT_PATH
9+
10+
set -ex
11+
12+
run_id=$1
13+
CKPT_PATH=$2
14+
15+
MODEL_NAME='gemma3-4b'
16+
export MODEL_VARIATION='4b'
17+
TOKENIZER_PATH="${MAXTEXT_ASSETS_ROOT:-${MAXTEXT_PKG_DIR:-${MAXTEXT_REPO_ROOT:-$PWD}/src/maxtext/assets/tokenizers}}"'/tokenizer.gemma3'
18+
USE_MULTIMODAL=false
19+
BASE_OUTPUT_DIRECTORY=gs://runner-maxtext-logs/${MODEL_NAME}
20+
SCAN_LAYERS=false
21+
22+
python3 -m maxtext.checkpoint_conversion.to_huggingface "${MAXTEXT_CONFIGS_DIR:-${MAXTEXT_REPO_ROOT:-$PWD}/src/maxtext/configs}"//base.yml \
23+
model_name=${MODEL_NAME} \
24+
hf_access_token=${HF_TOKEN} \
25+
load_parameters_path=${CKPT_PATH} \
26+
base_output_directory=${BASE_OUTPUT_DIRECTORY}/to_huggingface/unscanned/${run_id} \
27+
use_multimodal=${USE_MULTIMODAL} \
28+
scan_layers=$SCAN_LAYERS

0 commit comments

Comments
 (0)