From d780fa5f1f0bbeac10f453991426ad7b27ae6359 Mon Sep 17 00:00:00 2001
From: Alex Shraer <shralex@gmail.com>
Date: Sat, 6 Jun 2026 19:53:40 +0000
Subject: [PATCH] docs: add links to Checkpoint Conversion Guide in getting
 started tutorials

---
 docs/conf.py                              | 2 ++
 docs/run_maxtext/run_maxtext_localhost.md | 2 +-
 docs/tutorials/first_run.md               | 4 ++--
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 6609bb8784..5f005f88a3 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -183,6 +183,8 @@
     # Ignore GitHub PRs and blobs that trigger rate limiting
     r"https://github\.com/AI-Hypercomputer/maxtext/pull/.*",
     r"https://github\.com/AI-Hypercomputer/maxtext/blob/.*",
+    # Ignore Toronto CS links which frequently time out in CI environment
+    r"https://www\.cs\.toronto\.edu/.*",
 ]
 
 
diff --git a/docs/run_maxtext/run_maxtext_localhost.md b/docs/run_maxtext/run_maxtext_localhost.md
index 49b0e55746..2a648a77a3 100644
--- a/docs/run_maxtext/run_maxtext_localhost.md
+++ b/docs/run_maxtext/run_maxtext_localhost.md
@@ -63,7 +63,7 @@ python3 -m maxtext.inference.decode \
   per_device_batch_size=1
 ```
 
-**Note:** Because the model hasn't been properly trained, the output text will be random. To generate meaningful output, you need to load a trained checkpoint using the `load_parameters_path` argument.
+**Note:** Because the model hasn't been properly trained, the output text will be random. To generate meaningful output, you need to load a trained checkpoint using the `load_parameters_path` argument. For instructions on how to convert pre-trained Hugging Face model checkpoints (like Llama or Gemma) to MaxText's Orbax format, please refer to the [Checkpoint Conversion Guide](../guides/checkpointing_solutions/convert_checkpoint.md).
 
 ### Running models using provided configs
 
diff --git a/docs/tutorials/first_run.md b/docs/tutorials/first_run.md
index ab98ece2f1..c27e137932 100644
--- a/docs/tutorials/first_run.md
+++ b/docs/tutorials/first_run.md
@@ -58,7 +58,7 @@ python3 -m maxtext.inference.decode \
   per_device_batch_size=1
 ```
 
-This command uses a model with randomly initialized weights, so the outputs are also random. To get high quality output you need pass in a checkpoint, typically via the `load_parameters_path` argument.
+This command uses a model with randomly initialized weights, so the outputs are also random. To get high quality output you need pass in a checkpoint, typically via the `load_parameters_path` argument. For instructions on how to convert pre-trained Hugging Face model checkpoints (like Llama or Gemma) to MaxText's Orbax format, please refer to the [Checkpoint Conversion Guide](../guides/checkpointing_solutions/convert_checkpoint.md).
 
 ### Run MaxText via notebook
 
@@ -66,7 +66,7 @@ In the same TPU VM where you just installed all the dependencies of MaxText, You
 
 #### Decoding in MaxText via notebook
 
-You can use [demo_decoding.ipynb](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/maxtext/examples/demo_decoding.ipynb) to try out decoding on MaxText's `Llama3.1-8b` model implementation. In this notebook, we give `"I love to"` as the prompt, and the greedily sampled first output token is `" cook"`. Please remember to provide the path to your `Llama3.1-8b` checkpoint for the `load_parameters_path` argument in the config inside the notebook. You can use [to_maxtext.py](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/maxtext/checkpoint_conversion/to_maxtext.py) to create a MaxText/Orbax checkpoint from a Huggingface checkpoint.
+You can use [demo_decoding.ipynb](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/maxtext/examples/demo_decoding.ipynb) to try out decoding on MaxText's `Llama3.1-8b` model implementation. In this notebook, we give `"I love to"` as the prompt, and the greedily sampled first output token is `" cook"`. Please remember to provide the path to your `Llama3.1-8b` checkpoint for the `load_parameters_path` argument in the config inside the notebook. You can use the [to_maxtext.py](https://github.com/AI-Hypercomputer/maxtext/blob/main/src/maxtext/checkpoint_conversion/to_maxtext.py) script or follow the [Checkpoint Conversion Guide](../guides/checkpointing_solutions/convert_checkpoint.md) to create a MaxText/Orbax checkpoint from a Hugging Face checkpoint.
 
 ### Run MaxText on NVIDIA GPUs