ml-explore · davidkoski · Apr 15, 2026 · Apr 11, 2026 · Apr 11, 2026 · Apr 11, 2026
diff --git a/Libraries/MLXHuggingFace/Documentation.docc/Documentation.md b/Libraries/MLXHuggingFace/Documentation.docc/Documentation.md
@@ -0,0 +1,8 @@
+# ``MLXHuggingFace``
+
+Macros to assist in adapting HuggingFace Hub and Tokenizer libraries.
+
+## Overview
+
+See [MLXLMCommon](MLXLMCommon) for information about how to use these macros.
+
diff --git a/Libraries/MLXHuggingFace/Macros.swift b/Libraries/MLXHuggingFace/Macros.swift
@@ -1,41 +1,133 @@
 import Foundation
 import MLXLMCommon
 
+/// Wrap a `HubClient` as a `Downloader`.
+///
+/// ```swift
+/// import MLXHuggingFace
+/// import HuggingFace
+///
+/// let model = try await loadModelContainer(
+///     from: #hubDownloader(HubClient()),
+///     using: #huggingFaceTokenizerLoader(),
+///     configuration: modelConfiguration
+/// )
+/// ```
 @freestanding(expression)
 public macro hubDownloader(_ hub: Any) -> MLXLMCommon.Downloader =
     #externalMacro(module: "MLXHuggingFaceMacros", type: "DownloaderMacro")
 
+/// Provide a default `HubClient` as a `Downloader`.
+///
+/// ```swift
+/// import MLXHuggingFace
+/// import HuggingFace
+///
+/// let model = try await loadModelContainer(
+///     from: #hubDownloader(),
+///     using: #huggingFaceTokenizerLoader(),
+///     configuration: modelConfiguration
+/// )
+/// ```
 @freestanding(expression)
 public macro hubDownloader() -> MLXLMCommon.Downloader =
     #externalMacro(module: "MLXHuggingFaceMacros", type: "DownloaderMacro")
 
+/// Wrap a `Tokenizers.Tokenizer` in `Tokenizer`.
+///
+/// This is used internally by ``huggingFaceTokenizerLoader()`` -- typically not used directly.
+///
+/// ```swift
+/// import MLXHuggingFace
+/// import Tokenizers
+///
+/// let t: Tokenizers.Tokenizer
+///
+/// let tokenizer = #adaptHuggingFaceTokenizer(t)
+/// ```
 @freestanding(expression)
 public macro adaptHuggingFaceTokenizer(_ upstream: Any) -> MLXLMCommon.Tokenizer =
     #externalMacro(module: "MLXHuggingFaceMacros", type: "TokenizerAdaptorMacro")
 
+/// Provide a `TokenizerLoader` from `Tokenizers.AutoTokenizer`.
+///
+/// ```swift
+/// import MLXHuggingFace
+/// import HuggingFace
+///
+/// let model = try await loadModelContainer(
+///     from: #hubDownloader(),
+///     using: #huggingFaceTokenizerLoader(),
+///     configuration: modelConfiguration
+/// )
+/// ```
 @freestanding(expression)
 public macro huggingFaceTokenizerLoader() -> MLXLMCommon.TokenizerLoader =
     #externalMacro(module: "MLXHuggingFaceMacros", type: "TokenizerLoaderMacro")
 
+/// Load a `ModelContainer` using default hub client and tokenizer loader.
+///
+/// ```swift
+/// import MLXHuggingFace
+/// import HuggingFace
+/// import Tokenizers
+///
+/// let model = try await huggingFaceLoadModelContainer(
+///     configuration: modelConfiguration
+/// )
+/// ```
 @freestanding(expression)
 public macro huggingFaceLoadModelContainer(
     configuration: ModelConfiguration
 ) -> ModelContainer =
     #externalMacro(module: "MLXHuggingFaceMacros", type: "LoadContainerMacro")
 
+/// Load a `ModelContainer` using default hub client and tokenizer loader with progress.
+///
+/// ```swift
+/// import MLXHuggingFace
+/// import HuggingFace
+/// import Tokenizers
+///
+/// let model = try await huggingFaceLoadModelContainer(
+///     configuration: modelConfiguration
+/// ) { progres in ... }
+/// ```
 @freestanding(expression)
 public macro huggingFaceLoadModelContainer(
     configuration: ModelConfiguration,
     progressHandler: @Sendable @escaping (Progress) -> Void
 ) -> ModelContainer =
     #externalMacro(module: "MLXHuggingFaceMacros", type: "LoadContainerMacro")
 
+/// Load a `ModelContext` using default hub client and tokenizer loader.
+///
+/// ```swift
+/// import MLXHuggingFace
+/// import HuggingFace
+/// import Tokenizers
+///
+/// let modelContext = try await huggingFaceLoadModel(
+///     configuration: modelConfiguration
+/// )
+/// ```
 @freestanding(expression)
 public macro huggingFaceLoadModel(
     configuration: ModelConfiguration
 ) -> ModelContext =
     #externalMacro(module: "MLXHuggingFaceMacros", type: "LoadContextMacro")
 
+/// Load a `ModelContext` using default hub client and tokenizer loader with progress.
+///
+/// ```swift
+/// import MLXHuggingFace
+/// import HuggingFace
+/// import Tokenizers
+///
+/// let modelContext = try await huggingFaceLoadModel(
+///     configuration: modelConfiguration
+/// ) { progres in ... }
+/// ```
 @freestanding(expression)
 public macro huggingFaceLoadModel(
     configuration: ModelConfiguration,

diff --git a/Libraries/MLXLLM/Documentation.docc/Documentation.md b/Libraries/MLXLLM/Documentation.docc/Documentation.md
@@ -16,15 +16,23 @@ See <doc:evaluation>.
 Using LLMs and VLMs is as easy as this:
 
 ```swift
+import MLXLMCommon
+
+let downloader: any Downloader = ...
+let tokenizerLoader: any TokenizerLoader = ...
+
 let model = try await loadModel(
-    using: TokenizersLoader(),
+    from: downloader,
+    using: tokenizerLoader,
     id: "mlx-community/Qwen3-4B-4bit"
 )
 let session = ChatSession(model)
-print(try await session.respond(to: "What are two things to see in San Francisco?")
-print(try await session.respond(to: "How about a great place to eat?")
+print(try await session.respond(to: "What are two things to see in San Francisco?"))
+print(try await session.respond(to: "How about a great place to eat?"))
 ```
 
+See [MLXLMCommon](MLXLMCommon) for information about `Downloader` and `TokenizerLoader`.
+
 More advanced APIs are available for those that need them, see <doc:using-model>.
 
 ## Topics

diff --git a/Libraries/MLXLMCommon/Documentation.docc/Documentation.md b/Libraries/MLXLMCommon/Documentation.docc/Documentation.md
@@ -4,6 +4,7 @@ Common language model code.
 
 ## Articles
 
+- <doc:upgrade>
 - <doc:wired-memory>
 
 ## Other MLX Libraries Packages

diff --git a/Libraries/MLXLMCommon/Documentation.docc/developing.md b/Libraries/MLXLMCommon/Documentation.docc/developing.md
@@ -0,0 +1,66 @@
+# Developing mlx-swift-lm
+
+Techniques for developing _in_ mlx-swift-lm.
+
+## Work on Infrastructure
+
+The simplest case for working in mlx-swift-lm is working on 
+infrastructure, e.g. ``KVCache`` or ``ToolCallParser``.
+You can simply fork mlx-swift-lm and modify the files.  There are unit
+tests that let you exercise the functionality and you can add
+more for your specific additions.
+
+The unit tests run without downloading model weights.  There
+are some tests that exercise the models by using random
+weights and mock tokenizers, see EvalTests.  This is mostly
+useful for testing the generation loop itself rather than
+any particular model.
+
+## Work on Models
+
+If you are working on porting or modifying a model you have a few options:
+
+- use `IntegrationTesting/IntegrationTesting.xcodeproj`
+- use `llm-tool` from mlx-swift-examples
+- use your own application
+
+### IntegrationTesting
+
+`IntegrationTesting.xcodeproj` integrates with the HuggingFace
+downloader and tokenizer packages directly and uses [MLXHuggingFace](MLXHuggingFace)
+macros to adapt their APIs, see <doc:using> for more information.
+This uses code from `IntegrationTestHelpers`
+to download weights and run real models.  You can easily change which models
+it uses or add your own custom tests.
+
+Note: these tests are _not_ run in the CI environment, but are a great way
+to test the models in your own development environment.
+
+### mlx-swift-examples / custom application
+
+You can also test your model by integrating it with a tool or application.
+This document describes using [`llm-tool`](https://github.com/ml-explore/mlx-swift-examples/blob/main/Tools/llm-tool/README.md) from `mlx-swift-examples`
+but the same technique will work with any custom code.
+
+`llm-tool` is a command line tool where you can specify the prompt and the model
+as arguments when you run it:
+
+```
+--model mlx-community/Mistral-7B-Instruct-v0.3-4bit
+--prompt "tell me a story"
+```
+
+You will want to have mlx-swift-examples (or your own project) open in
+Xcode with a local checkout of mlx-swift-lm (your fork).  mlx-swift-examples
+will reference a tagged version of mlx-swift-lm and you need to
+switch that to reference your local version.  There are two basic
+methods for doing that (variations on a theme):
+
+- drag the `mlx-swift-lm` _directory_ onto the top item (the mlx-swift-examples project) in the Xcode navigator and chose _reference files in place_
+- [Xcode documentation](https://developer.apple.com/documentation/xcode/editing-a-package-dependency-as-a-local-package)
+
+In both cases you will get an override of the mlx-swift-lm dependency for this
+project.  In addition to using your local copy, you can also _edit_ mlx-swift-lm
+at the same time that you use mlx-swift-examples.
+
+For more details on how to configure projects in general, see <doc:using>.
diff --git a/Libraries/MLXLMCommon/Documentation.docc/porting.md b/Libraries/MLXLMCommon/Documentation.docc/porting.md
@@ -9,6 +9,11 @@ There are a number of ways to implement new models in MLX in Swift:
 
 This document talks primarily about the latter.
 
+## Developing mlx-swift-lm
+
+Please see <doc:developing> for more information about how to
+do development _in_ mlx-swift-lm.
+
 ## Porting Models from MLX in Python
 
 Let's consider a concrete example, [gemma.py](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/models/gemma.py). For reference, here is the current port [Gemma.swift](https://github.com/ml-explore/mlx-swift-lm/blob/main/Libraries/MLXLLM/Models/Gemma.swift).