Skip to content

Commit a73b79b

Browse files
SurbhiJainUSCGoogle-ML-Automation
authored andcommitted
Update checkpoint conversion documentation and fix src/maxtext references in docs
PiperOrigin-RevId: 888829289
1 parent 4e68f26 commit a73b79b

85 files changed

Lines changed: 2174 additions & 2168 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.coveragerc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ omit =
1010
[paths]
1111
source =
1212
src/MaxText
13-
src/MaxText
13+
src/maxtext
1414
*/site-packages/MaxText
1515
*/site-packages/maxtext
1616

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ repos:
4242
# args:
4343
# - '--jobs=auto'
4444
# - '--keep-going'
45-
# - 'src/MaxText/'
45+
# - 'src/maxtext/'
4646

4747
- repo: https://github.com/google/pyink
4848
rev: 24.10.1

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ See our guide on running MaxText in decoupled mode, without any GCP dependencies
5757
* \[October 10, 2025\] Post-Training (SFT, RL) via [Tunix](https://github.com/google/tunix) is now available.
5858
* \[September 26, 2025\] Vocabulary tiling ([PR](https://github.com/AI-Hypercomputer/maxtext/pull/2242)) is now supported in MaxText! Adjust config `num_vocab_tiling` to unlock more efficient memory usage.
5959
* \[September 24, 2025\] The GPT-OSS family of models (20B, 120B) is now supported.
60-
* \[September 15, 2025\] MaxText is now available as a [PyPI package](https://pypi.org/project/maxtext). Users can now [install maxtext through pip](https://maxtext.readthedocs.io/en/latest/guides/install_maxtext.html).
60+
* \[September 15, 2025\] MaxText is now available as a [PyPI package](https://pypi.org/project/maxtext). Users can now [install maxtext through pip](https://maxtext.readthedocs.io/en/latest/install_maxtext.html).
6161
* \[September 5, 2025\] MaxText has moved to an `src` layout as part of [RESTRUCTURE.md](https://github.com/AI-Hypercomputer/maxtext/blob/aca5b24931ebcbadb55a82e56ebffe8024874028/RESTRUCTURE.md). For existing environments, please run `pip install -e .` from MaxText root.
6262
* \[August 13, 2025\] The Qwen3 2507 MoE family of models is now supported: MoEs: 235B Thinking & 480B Coder as well as existing dense models: 0.6B, 4B, 8B, 14B, and 32B.
6363
* \[July 27, 2025\] Updated TFLOPS/s calculation ([PR](https://github.com/AI-Hypercomputer/maxtext/pull/1988)) to account for causal attention, dividing the attention flops in half. Accounted for sliding window and chunked attention reduced attention flops in [PR](https://github.com/AI-Hypercomputer/maxtext/pull/2009) and [PR](https://github.com/AI-Hypercomputer/maxtext/pull/2030). Changes impact large sequence configs, as explained in this [doc](https://maxtext.readthedocs.io/en/latest/reference/performance_metrics.html)

benchmarks/convergence/c4_exp.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323

2424
from benchmarks.benchmark_utils import MaxTextModel, _add_to_model_dictionary
2525
from benchmarks.convergence.convergence_utils import DatasetHParams, ConvHParams, _setup_model_convergence_
26-
2726
from benchmarks.maxtext_v5p_model_configs import deepseek_v3_ep_256_v5p_512
2827

2928
c4_pretrain_model_dict = {}

benchmarks/disruption_management/monitor.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
import time
3030

3131
from benchmarks.disruption_management.disruption_utils import wait_for_pod_to_start
32-
3332
from benchmarks.disruption_management.disruption_handler import DisruptionConfig
3433
from benchmarks.disruption_management.disruption_handler import TriggerType
3534

benchmarks/llama2_v6e-256_benchmarks.py

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,12 @@
1717
on a specific v6e-256 hardware setup using the XPK runner.
1818
"""
1919

20-
import maxtext_trillium_model_configs as model_configs
20+
import os
2121

22-
from maxtext_xpk_runner import BenchmarkRunner
23-
from maxtext_xpk_runner import HWConfig
24-
from maxtext_xpk_runner import SWconfig
25-
from maxtext_xpk_runner import xpk_benchmark_runner
26-
from maxtext_xpk_runner import XpkConfig
22+
from benchmarks import maxtext_trillium_model_configs as model_configs
23+
from benchmarks.maxtext_xpk_runner import WorkloadConfig
24+
from benchmarks.maxtext_xpk_runner import xpk_benchmark_runner
25+
from benchmarks.maxtext_xpk_runner import XpkClusterConfig
2726

2827

2928
DATE = "20241009"
@@ -35,34 +34,37 @@
3534
DEVICE_TYPE = "v6e-256"
3635
NUM_SLICES = 1
3736
BASE_OUTPUT_DIR = "gs://maxtext-experiments-tpem/"
38-
39-
v6e_env_configs = SWconfig(base_docker_image=BASE_DOCKER_IMAGE, libtpu_version=DATE)
40-
v6e_256_configs = HWConfig(num_slices=NUM_SLICES, device_type=DEVICE_TYPE)
41-
42-
llama2_70b_4096 = BenchmarkRunner(
43-
model_name=model_configs.llama2_70b_4096,
44-
software_config=v6e_env_configs,
45-
hardware_config=v6e_256_configs,
46-
)
47-
48-
llama2_7b_4096 = BenchmarkRunner(
49-
model_name=model_configs.llama2_7b_4096,
50-
software_config=v6e_env_configs,
51-
hardware_config=v6e_256_configs,
52-
)
37+
XPK_PATH = os.path.join("~", "xpk")
38+
BENCHMARK_STEPS = 20
5339

5440

5541
def main() -> None:
56-
cluster_config = XpkConfig(
42+
cluster_config = XpkClusterConfig(
5743
cluster_name=CLUSTER_NAME,
5844
project=PROJECT,
5945
zone=ZONE,
60-
num_slices=NUM_SLICES,
6146
device_type=DEVICE_TYPE,
62-
base_output_directory=BASE_OUTPUT_DIR,
6347
)
6448

65-
xpk_benchmark_runner(cluster_config, [llama2_7b_4096, llama2_70b_4096])
49+
workload_configs = []
50+
for model in [model_configs.llama2_7b_4096, model_configs.llama2_70b_4096]:
51+
workload_configs.append(
52+
WorkloadConfig(
53+
model=model,
54+
num_slices=NUM_SLICES,
55+
device_type=DEVICE_TYPE,
56+
base_output_directory=BASE_OUTPUT_DIR,
57+
base_docker_image=BASE_DOCKER_IMAGE,
58+
libtpu_type=None,
59+
libtpu_nightly_version=DATE,
60+
pathways_config=None,
61+
xpk_path=XPK_PATH,
62+
num_steps=BENCHMARK_STEPS,
63+
priority="medium",
64+
)
65+
)
66+
67+
xpk_benchmark_runner(cluster_config, workload_configs)
6668

6769

6870
if __name__ == "__main__":

benchmarks/maxtext_xpk_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@
3535
import omegaconf
3636

3737
import benchmarks.maxtext_trillium_model_configs as model_configs
38+
import benchmarks.xla_flags_library as xla_flags
3839
from benchmarks.globals import MAXTEXT_PKG_DIR
3940
from benchmarks.command_utils import run_command_with_updates
40-
import benchmarks.xla_flags_library as xla_flags
4141
from benchmarks.disruption_management.disruption_handler import DisruptionConfig
4242
from benchmarks.disruption_management.disruption_manager import DisruptionManager
4343
from benchmarks.xpk_configs import XpkClusterConfig

benchmarks/recipes/mcjax_long_running_recipe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
import benchmarks.maxtext_trillium_model_configs as model_configs
2828
import benchmarks.maxtext_xpk_runner as mxr
2929
from benchmarks.xpk_configs import XpkClusterConfig
30-
from . import user_configs
30+
from benchmarks.recipes import user_configs
3131

3232
# Cluster Params
3333
CLUSTER = "v6e-256-cluster"

benchmarks/recipes/pw_elastic_training_recipe.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@
2525

2626
parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
2727
sys.path.append(parent_dir)
28-
from . import args_helper as helper
29-
from . import user_configs
3028

3129
from benchmarks.disruption_management.disruption_handler import DisruptionMethod
32-
from .runner_utils import generate_and_run_workloads
30+
from benchmarks.recipes import args_helper as helper
31+
from benchmarks.recipes import user_configs
32+
from benchmarks.recipes.runner_utils import generate_and_run_workloads
3333

3434
user_configs.USER_CONFIG.max_restarts = 10
3535
COMPARE_WITH_MCJAX = True

benchmarks/recipes/pw_headless_mode.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
"""
2323

2424
import benchmarks.recipes.args_helper as helper
25-
from .. import maxtext_xpk_runner as mxr
26-
from ..recipes.user_configs import USER_CONFIG
25+
from benchmarks import maxtext_xpk_runner as mxr
26+
from benchmarks.recipes.user_configs import USER_CONFIG
2727

2828

2929
def main() -> int:

0 commit comments

Comments
 (0)