From d780fa5f1f0bbeac10f453991426ad7b27ae6359 Mon Sep 17 00:00:00 2001 From: Alex Shraer Date: Sat, 6 Jun 2026 19:53:40 +0000 Subject: [PATCH] docs: add links to Checkpoint Conversion Guide in getting started tutorials --- docs/conf.py | 2 ++ docs/run_maxtext/run_maxtext_localhost.md | 2 +- docs/tutorials/first_run.md | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 6609bb8784..5f005f88a3 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -183,6 +183,8 @@ # Ignore GitHub PRs and blobs that trigger rate limiting r"https://github\.com/AI-Hypercomputer/maxtext/pull/.*", r"https://github\.com/AI-Hypercomputer/maxtext/blob/.*", + # Ignore Toronto CS links which frequently time out in CI environment + r"https://www\.cs\.toronto\.edu/.*", ] diff --git a/docs/run_maxtext/run_maxtext_localhost.md b/docs/run_maxtext/run_maxtext_localhost.md index 49b0e55746..2a648a77a3 100644 --- a/docs/run_maxtext/run_maxtext_localhost.md +++ b/docs/run_maxtext/run_maxtext_localhost.md @@ -63,7 +63,7 @@ python3 -m maxtext.inference.decode \ per_device_batch_size=1 ``` -**Note:** Because the model hasn't been properly trained, the output text will be random. To generate meaningful output, you need to load a trained checkpoint using the `load_parameters_path` argument. +**Note:** Because the model hasn't been properly trained, the output text will be random. To generate meaningful output, you need to load a trained checkpoint using the `load_parameters_path` argument. For instructions on how to convert pre-trained Hugging Face model checkpoints (like Llama or Gemma) to MaxText's Orbax format, please refer to the [Checkpoint Conversion Guide](../guides/checkpointing_solutions/convert_checkpoint.md). ### Running models using provided configs diff --git a/docs/tutorials/first_run.md b/docs/tutorials/first_run.md index ab98ece2f1..c27e137932 100644 --- a/docs/tutorials/first_run.md +++ b/docs/tutorials/first_run.md @@ -58,7 +58,7 @@ python3 -m maxtext.inference.decode \ per_device_batch_size=1 ``` -This command uses a model with randomly initialized weights, so the outputs are also random. To get high quality output you need pass in a checkpoint, typically via the `load_parameters_path` argument. +This command uses a model with randomly initialized weights, so the outputs are also random. To get high quality output you need pass in a checkpoint, typically via the `load_parameters_path` argument. For instructions on how to convert pre-trained Hugging Face model checkpoints (like Llama or Gemma) to MaxText's Orbax format, please refer to the [Checkpoint Conversion Guide](../guides/checkpointing_solutions/convert_checkpoint.md). ### Run MaxText via notebook @@ -66,7 +66,7 @@ In the same TPU VM where you just installed all the dependencies of MaxText, You #### Decoding in MaxText via notebook -You can use [demo_decoding.ipynb](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/maxtext/examples/demo_decoding.ipynb) to try out decoding on MaxText's `Llama3.1-8b` model implementation. In this notebook, we give `"I love to"` as the prompt, and the greedily sampled first output token is `" cook"`. Please remember to provide the path to your `Llama3.1-8b` checkpoint for the `load_parameters_path` argument in the config inside the notebook. You can use [to_maxtext.py](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/maxtext/checkpoint_conversion/to_maxtext.py) to create a MaxText/Orbax checkpoint from a Huggingface checkpoint. +You can use [demo_decoding.ipynb](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/maxtext/examples/demo_decoding.ipynb) to try out decoding on MaxText's `Llama3.1-8b` model implementation. In this notebook, we give `"I love to"` as the prompt, and the greedily sampled first output token is `" cook"`. Please remember to provide the path to your `Llama3.1-8b` checkpoint for the `load_parameters_path` argument in the config inside the notebook. You can use the [to_maxtext.py](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/maxtext/checkpoint_conversion/to_maxtext.py) script or follow the [Checkpoint Conversion Guide](../guides/checkpointing_solutions/convert_checkpoint.md) to create a MaxText/Orbax checkpoint from a Hugging Face checkpoint. ### Run MaxText on NVIDIA GPUs