diff --git a/Libraries/MLXHuggingFace/Documentation.docc/Documentation.md b/Libraries/MLXHuggingFace/Documentation.docc/Documentation.md new file mode 100644 index 000000000..ad41a6da7 --- /dev/null +++ b/Libraries/MLXHuggingFace/Documentation.docc/Documentation.md @@ -0,0 +1,8 @@ +# ``MLXHuggingFace`` + +Macros to assist in adapting HuggingFace Hub and Tokenizer libraries. + +## Overview + +See [MLXLMCommon](MLXLMCommon) for information about how to use these macros. + diff --git a/Libraries/MLXHuggingFace/Macros.swift b/Libraries/MLXHuggingFace/Macros.swift index 7160ae1ee..54092ce50 100644 --- a/Libraries/MLXHuggingFace/Macros.swift +++ b/Libraries/MLXHuggingFace/Macros.swift @@ -1,28 +1,98 @@ import Foundation import MLXLMCommon +/// Wrap a `HubClient` as a `Downloader`. +/// +/// ```swift +/// import MLXHuggingFace +/// import HuggingFace +/// +/// let model = try await loadModelContainer( +/// from: #hubDownloader(HubClient()), +/// using: #huggingFaceTokenizerLoader(), +/// configuration: modelConfiguration +/// ) +/// ``` @freestanding(expression) public macro hubDownloader(_ hub: Any) -> MLXLMCommon.Downloader = #externalMacro(module: "MLXHuggingFaceMacros", type: "DownloaderMacro") +/// Provide a default `HubClient` as a `Downloader`. +/// +/// ```swift +/// import MLXHuggingFace +/// import HuggingFace +/// +/// let model = try await loadModelContainer( +/// from: #hubDownloader(), +/// using: #huggingFaceTokenizerLoader(), +/// configuration: modelConfiguration +/// ) +/// ``` @freestanding(expression) public macro hubDownloader() -> MLXLMCommon.Downloader = #externalMacro(module: "MLXHuggingFaceMacros", type: "DownloaderMacro") +/// Wrap a `Tokenizers.Tokenizer` in `Tokenizer`. +/// +/// This is used internally by ``huggingFaceTokenizerLoader()`` -- typically not used directly. +/// +/// ```swift +/// import MLXHuggingFace +/// import Tokenizers +/// +/// let t: Tokenizers.Tokenizer +/// +/// let tokenizer = #adaptHuggingFaceTokenizer(t) +/// ``` @freestanding(expression) public macro adaptHuggingFaceTokenizer(_ upstream: Any) -> MLXLMCommon.Tokenizer = #externalMacro(module: "MLXHuggingFaceMacros", type: "TokenizerAdaptorMacro") +/// Provide a `TokenizerLoader` from `Tokenizers.AutoTokenizer`. +/// +/// ```swift +/// import MLXHuggingFace +/// import HuggingFace +/// +/// let model = try await loadModelContainer( +/// from: #hubDownloader(), +/// using: #huggingFaceTokenizerLoader(), +/// configuration: modelConfiguration +/// ) +/// ``` @freestanding(expression) public macro huggingFaceTokenizerLoader() -> MLXLMCommon.TokenizerLoader = #externalMacro(module: "MLXHuggingFaceMacros", type: "TokenizerLoaderMacro") +/// Load a `ModelContainer` using default hub client and tokenizer loader. +/// +/// ```swift +/// import MLXHuggingFace +/// import HuggingFace +/// import Tokenizers +/// +/// let model = try await huggingFaceLoadModelContainer( +/// configuration: modelConfiguration +/// ) +/// ``` @freestanding(expression) public macro huggingFaceLoadModelContainer( configuration: ModelConfiguration ) -> ModelContainer = #externalMacro(module: "MLXHuggingFaceMacros", type: "LoadContainerMacro") +/// Load a `ModelContainer` using default hub client and tokenizer loader with progress. +/// +/// ```swift +/// import MLXHuggingFace +/// import HuggingFace +/// import Tokenizers +/// +/// let model = try await huggingFaceLoadModelContainer( +/// configuration: modelConfiguration +/// ) { progres in ... } +/// ``` @freestanding(expression) public macro huggingFaceLoadModelContainer( configuration: ModelConfiguration, @@ -30,12 +100,34 @@ public macro huggingFaceLoadModelContainer( ) -> ModelContainer = #externalMacro(module: "MLXHuggingFaceMacros", type: "LoadContainerMacro") +/// Load a `ModelContext` using default hub client and tokenizer loader. +/// +/// ```swift +/// import MLXHuggingFace +/// import HuggingFace +/// import Tokenizers +/// +/// let modelContext = try await huggingFaceLoadModel( +/// configuration: modelConfiguration +/// ) +/// ``` @freestanding(expression) public macro huggingFaceLoadModel( configuration: ModelConfiguration ) -> ModelContext = #externalMacro(module: "MLXHuggingFaceMacros", type: "LoadContextMacro") +/// Load a `ModelContext` using default hub client and tokenizer loader with progress. +/// +/// ```swift +/// import MLXHuggingFace +/// import HuggingFace +/// import Tokenizers +/// +/// let modelContext = try await huggingFaceLoadModel( +/// configuration: modelConfiguration +/// ) { progres in ... } +/// ``` @freestanding(expression) public macro huggingFaceLoadModel( configuration: ModelConfiguration, diff --git a/Libraries/MLXLLM/Documentation.docc/Documentation.md b/Libraries/MLXLLM/Documentation.docc/Documentation.md index b0a9c7a82..b0f96376c 100644 --- a/Libraries/MLXLLM/Documentation.docc/Documentation.md +++ b/Libraries/MLXLLM/Documentation.docc/Documentation.md @@ -16,15 +16,23 @@ See . Using LLMs and VLMs is as easy as this: ```swift +import MLXLMCommon + +let downloader: any Downloader = ... +let tokenizerLoader: any TokenizerLoader = ... + let model = try await loadModel( - using: TokenizersLoader(), + from: downloader, + using: tokenizerLoader, id: "mlx-community/Qwen3-4B-4bit" ) let session = ChatSession(model) -print(try await session.respond(to: "What are two things to see in San Francisco?") -print(try await session.respond(to: "How about a great place to eat?") +print(try await session.respond(to: "What are two things to see in San Francisco?")) +print(try await session.respond(to: "How about a great place to eat?")) ``` +See [MLXLMCommon](MLXLMCommon) for information about `Downloader` and `TokenizerLoader`. + More advanced APIs are available for those that need them, see . ## Topics diff --git a/Libraries/MLXLMCommon/Documentation.docc/Documentation.md b/Libraries/MLXLMCommon/Documentation.docc/Documentation.md index b7c3f278c..3fa793d07 100644 --- a/Libraries/MLXLMCommon/Documentation.docc/Documentation.md +++ b/Libraries/MLXLMCommon/Documentation.docc/Documentation.md @@ -4,6 +4,7 @@ Common language model code. ## Articles +- - ## Other MLX Libraries Packages diff --git a/Libraries/MLXLMCommon/Documentation.docc/developing.md b/Libraries/MLXLMCommon/Documentation.docc/developing.md new file mode 100644 index 000000000..b9987334a --- /dev/null +++ b/Libraries/MLXLMCommon/Documentation.docc/developing.md @@ -0,0 +1,66 @@ +# Developing mlx-swift-lm + +Techniques for developing _in_ mlx-swift-lm. + +## Work on Infrastructure + +The simplest case for working in mlx-swift-lm is working on +infrastructure, e.g. ``KVCache`` or ``ToolCallParser``. +You can simply fork mlx-swift-lm and modify the files. There are unit +tests that let you exercise the functionality and you can add +more for your specific additions. + +The unit tests run without downloading model weights. There +are some tests that exercise the models by using random +weights and mock tokenizers, see EvalTests. This is mostly +useful for testing the generation loop itself rather than +any particular model. + +## Work on Models + +If you are working on porting or modifying a model you have a few options: + +- use `IntegrationTesting/IntegrationTesting.xcodeproj` +- use `llm-tool` from mlx-swift-examples +- use your own application + +### IntegrationTesting + +`IntegrationTesting.xcodeproj` integrates with the HuggingFace +downloader and tokenizer packages directly and uses [MLXHuggingFace](MLXHuggingFace) +macros to adapt their APIs, see for more information. +This uses code from `IntegrationTestHelpers` +to download weights and run real models. You can easily change which models +it uses or add your own custom tests. + +Note: these tests are _not_ run in the CI environment, but are a great way +to test the models in your own development environment. + +### mlx-swift-examples / custom application + +You can also test your model by integrating it with a tool or application. +This document describes using [`llm-tool`](https://github.com/ml-explore/mlx-swift-examples/blob/main/Tools/llm-tool/README.md) from `mlx-swift-examples` +but the same technique will work with any custom code. + +`llm-tool` is a command line tool where you can specify the prompt and the model +as arguments when you run it: + +``` +--model mlx-community/Mistral-7B-Instruct-v0.3-4bit +--prompt "tell me a story" +``` + +You will want to have mlx-swift-examples (or your own project) open in +Xcode with a local checkout of mlx-swift-lm (your fork). mlx-swift-examples +will reference a tagged version of mlx-swift-lm and you need to +switch that to reference your local version. There are two basic +methods for doing that (variations on a theme): + +- drag the `mlx-swift-lm` _directory_ onto the top item (the mlx-swift-examples project) in the Xcode navigator and chose _reference files in place_ +- [Xcode documentation](https://developer.apple.com/documentation/xcode/editing-a-package-dependency-as-a-local-package) + +In both cases you will get an override of the mlx-swift-lm dependency for this +project. In addition to using your local copy, you can also _edit_ mlx-swift-lm +at the same time that you use mlx-swift-examples. + +For more details on how to configure projects in general, see . diff --git a/Libraries/MLXLMCommon/Documentation.docc/porting.md b/Libraries/MLXLMCommon/Documentation.docc/porting.md index 30d412788..3969ec308 100644 --- a/Libraries/MLXLMCommon/Documentation.docc/porting.md +++ b/Libraries/MLXLMCommon/Documentation.docc/porting.md @@ -9,6 +9,11 @@ There are a number of ways to implement new models in MLX in Swift: This document talks primarily about the latter. +## Developing mlx-swift-lm + +Please see for more information about how to +do development _in_ mlx-swift-lm. + ## Porting Models from MLX in Python Let's consider a concrete example, [gemma.py](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/models/gemma.py). For reference, here is the current port [Gemma.swift](https://github.com/ml-explore/mlx-swift-lm/blob/main/Libraries/MLXLLM/Models/Gemma.swift). diff --git a/Libraries/MLXLMCommon/Documentation.docc/upgrade.md b/Libraries/MLXLMCommon/Documentation.docc/upgrade.md new file mode 100644 index 000000000..95344adc5 --- /dev/null +++ b/Libraries/MLXLMCommon/Documentation.docc/upgrade.md @@ -0,0 +1,319 @@ +# Upgrade From 2.x Release + +Notes on upgrading from mlx-swift-lm 2.x releases. + +## Introduction + +mlx-swift-lm 3.x has breaking API changes from 2.x: + +- Download and Tokenizers are protocols and require concrete implementations +- MLXEmbedders now uses the same download/load infrastructure as MLXLMCommon + +See for more information. + +This was done for several reasons: + +- break the hard dependency on the HuggingFace Hub and Tokenizer implementations + - this allows other implementations with other design constraints, such as performance optimizations + - see +- provide a mechanism to separate the download of weights and the load of weights + +## Selecting a Downloader and Tokenizer + +See for details on selecting a Downloader and a Tokenizer and +how to hook these up. + +### Using MLXHuggingFace Macros + +If using the , if you had code like this: + +```swift +import MLXLLM +import MLXLMCommon + +let modelConfiguration = LLMRegistry.gemma3_1B_qat_4bit +let model = try await loadModelContainer(configuration: modelConfiguration) + +... +``` + +you would convert that like this: + +```swift +import MLXLLM +import MLXLMCommon +import MLXHuggingFace + +import HuggingFace +import Tokenizers + +let modelConfiguration = LLMRegistry.gemma3_1B_qat_4bit +let model = try await #huggingFaceLoadModelContainer( + configuration: modelConfiguration +) + +... +``` + +If you want a little more control over the downloader or the tokenizer loader, that +expands to this: + +```swift +import MLXLLM +import MLXLMCommon +import MLXHuggingFace + +import HuggingFace +import Tokenizers + +let modelConfiguration = LLMRegistry.gemma3_1B_qat_4bit +let model = try await loadModelContainer( + from: #hubDownloader(), + using: #huggingFaceTokenizerLoader(), + configuration: modelConfiguration +) + +... +``` + +### Using Integration Packages + +If you are using an , such as [https://github.com/DePasqualeOrg/swift-tokenizers-mlx](https://github.com/DePasqualeOrg/swift-tokenizers-mlx), you would do something similar: + +```swift +import MLXLLM +import MLXLMCommon + +let modelConfiguration = LLMRegistry.gemma3_1B_qat_4bit +let model = try await loadModelContainer(configuration: modelConfiguration) + +... +``` + +becomes: + +```swift +import MLXLLM +import MLXLMCommon + +import MLXLMHFAPI +import MLXLMTokenizers + +let modelConfiguration = LLMRegistry.gemma3_1B_qat_4bit +let model = try await loadModelContainer( + from: HubClient(), + configuration: modelConfiguration +) + +... +``` + +## MLXEmbedders + +MLXEmbedders requires the same . Additionally, +there are some changes to type names and methods -- these now use the same structure +and mechanism as MLXLMCommon / MLXLLM. + +Previously the download and load of the model was done like this: + +```swift +import MLXEmbedders + +let defaultModelConfiguration = ModelConfiguration.nomic_text_v1_5 +let container = try await MLXEmbedders.loadModelContainer( + hub: HubApi(), + configuration: configuration +) + +// use it ... +``` + +now, using the (see + for the pattern using other tokenizer +packages): + +```swift +import MLXEmbedders +import MLXLMCommon +import MLXHuggingFace + +import HuggingFace +import Tokenizers + +// ModelConfiguration -> EmbedderRegistry +let defaultModelConfiguration = EmbedderRegistry.nomic_text_v1_5 + +let hub = #hubDownloader() +let loader = #huggingFaceTokenizerLoader() + +// MLXEmbedders.loadModelContainer (free function) -> EmbedderModelFactory.shared.loadContainer +let container = try await EmbedderModelFactory.shared.loadContainer( + from: hub, + using: loader, + configuration: configuration +) + +// use it ... +``` + +These types are removed or replaced: + +- `ModelConfiguration` -> use MLXLMCommon +- `ModelConfiguration.nomic_text_v1_5` -> `EmbedderRegistry.nomic_text_v1_5` +- `BaseConfiguration` -> use MLXLMCommon +- `ModelType` - removed +- `ModelContainer` -> EmbedderModelContainer and EmbedderModelContext (matches LLM/VLM concepts) +- `load()` free functions -> EmbedderModelFactory + +## Release Notes + +Detailed release notes. + +### New dependencies + +Add your preferred tokenizer and downloader adapters: + +```swift +// Before (2.x) – single dependency +.package(url: "https://github.com/ml-explore/mlx-swift-lm/", from: "2.30.0"), + +// After (3.x) – core + adapters +.package(url: "https://github.com/ml-explore/mlx-swift-lm/", from: "3.0.0"), +.package(url: "https://github.com/DePasqualeOrg/swift-tokenizers-mlx/", from: "0.1.0"), +.package(url: "https://github.com/DePasqualeOrg/swift-hf-api-mlx/", from: "0.1.0"), +``` + +And add their products to your target: + +```swift +.product(name: "MLXLMTokenizers", package: "swift-tokenizers-mlx"), +.product(name: "MLXLMHFAPI", package: "swift-hf-api-mlx"), + +// If you use MLXEmbedders: +.product(name: "MLXEmbeddersTokenizers", package: "swift-tokenizers-mlx"), +.product(name: "MLXEmbeddersHFAPI", package: "swift-hf-api-mlx"), +``` + +### New imports + +```swift +// Before (2.x) +import MLXLLM + +// After (3.x) +import MLXLLM +import MLXLMHFAPI // Downloader adapter +import MLXLMTokenizers // Tokenizer adapter +``` + +If you use MLXEmbedders: + +```swift +import MLXEmbedders +import MLXEmbeddersHFAPI // Downloader adapter +import MLXEmbeddersTokenizers // Tokenizer adapter +``` + +### Loading API changes + +The core APIs now include a `from:` parameter of type `URL` or `any Downloader` as well as a `using:` parameter for the tokenizer loader. Tokenizer integration packages may supply convenience methods with a default tokenizer loader, allowing you to omit the `using:` parameter. + +The most visible call-site changes are: + +- `hub:` → `from:`: Models are now loaded from a directory `URL` or `Downloader`. +- `HubApi` → `HubClient`: A new implementation of the Hugging Face Hub client is used. + +Example when downloading from Hugging Face: + +```swift +// Before (2.x) – hub defaulted to HubApi() +let container = try await loadModelContainer( + id: "mlx-community/Qwen3-4B-4bit" +) + +// After (3.x) – Using Swift Hugging Face + Swift Tokenizers +let container = try await loadModelContainer( + from: HubClient.default, + id: "mlx-community/Qwen3-4B-4bit" +) +``` + +At the lower-level core API, you can still pass any `Downloader` and any `TokenizerLoader` explicitly. + +Loading from a local directory: + +```swift +// Before (2.x) +let container = try await loadModelContainer(directory: modelDirectory) + +// After (3.x) +let container = try await loadModelContainer(from: modelDirectory) +``` + +Loading with a model factory: + +```swift +let container = try await LLMModelFactory.shared.loadContainer( + from: HubClient.default, + configuration: modelConfiguration +) +``` + +Loading an embedder: + +```swift +import MLXEmbedders +import MLXEmbeddersHFAPI +import MLXEmbeddersTokenizers + +let container = try await loadModelContainer( + from: HubClient.default, + configuration: .configuration(id: "sentence-transformers/all-MiniLM-L6-v2") +) +``` + +### Renamed methods + +`decode(tokens:)` is renamed to `decode(tokenIds:)` to align with the `transformers` library in Python: + +```swift +// Before (2.x) +let text = tokenizer.decode(tokens: ids) + +// After (3.0) +let text = tokenizer.decode(tokenIds: ids) +``` + +## Breaking Changes + +### Loading API + +The `hub` parameter (previously `HubApi`) has been replaced with `from` (any `Downloader` or `URL` for a local directory). Functions that previously defaulted to `defaultHubApi` no longer have a default – callers must either pass a `Downloader` explicitly or use the convenience methods in `MLXLMHuggingFace` / `MLXEmbeddersHuggingFace`, which default to `HubClient.default`. + +For most users who were using the default Hub client, adding `import MLXLMHuggingFace` or `import MLXEmbeddersHuggingFace` and using the convenience overloads is sufficient. + +Users who were passing a custom `HubApi` instance should create a `HubClient` instead and pass it as the `from` parameter. `HubClient` conforms to `Downloader` via `MLXLMHuggingFace`. + +### `ModelConfiguration` + +- `tokenizerId` and `overrideTokenizer` have been replaced by `tokenizerSource: TokenizerSource?`, which supports `.id(String)` for remote sources and `.directory(URL)` for local paths. +- `preparePrompt` has been removed. This shouldn't be used anyway, since support for chat templates is available. +- `modelDirectory(hub:)` has been removed. For local directories, pass the `URL` directly to the loading functions. For remote models, the `Downloader` protocol handles resolution. + +### Tokenizer loading + +`loadTokenizer(configuration:hub:)` has been removed. Tokenizer loading now uses `AutoTokenizer.from(directory:)` from Swift Tokenizers directly. + +`replacementTokenizers` (the `TokenizerReplacementRegistry`) has been removed. Use `AutoTokenizer.register(_:for:)` from Swift Tokenizers instead. + +### `defaultHubApi` + +The `defaultHubApi` global has been removed. Hugging Face Hub access is now provided by `HubClient.default` from the `HuggingFace` module. + +### Low-level APIs + +- `downloadModel(hub:configuration:progressHandler:)` → `Downloader.download(id:revision:matching:useLatest:progressHandler:)` +- `loadTokenizerConfig(configuration:hub:)` → `AutoTokenizer.from(directory:)` +- `ModelFactory._load(hub:configuration:progressHandler:)` → `_load(configuration: ResolvedModelConfiguration)` +- `ModelFactory._loadContainer`: removed (base `loadContainer` now builds the container from `_load`) + diff --git a/Libraries/MLXLMCommon/Documentation.docc/using.md b/Libraries/MLXLMCommon/Documentation.docc/using.md new file mode 100644 index 000000000..7abc788f1 --- /dev/null +++ b/Libraries/MLXLMCommon/Documentation.docc/using.md @@ -0,0 +1,258 @@ +# Using mlx-swift-lm + +How to use mlx-swift-lm in your own tools and applications + +## Overview + +Using mlx-swift-lm to add LLM, VLM or text embedding capabilities to your own +software is straightforward: + +- add a depdendency on `mlx-swift-lm` +- add a dependency on a _Downloader_ and _Tokenizer_ +- adapt the API of the Downloader and Tokenizer to conform to the protocols + +Then make use of the model: + +```swift +import MLXLMCommon + +let downloader: any Downloader = ... +let tokenizerLoader: any TokenizerLoader = ... + +let model = try await loadModel( + from: downloader, + using: tokenizerLoader, + id: "mlx-community/Qwen3-4B-4bit" +) +let session = ChatSession(model) +print(try await session.respond(to: "What are two things to see in San Francisco?")) +print(try await session.respond(to: "How about a great place to eat?")) +``` + +## Downloaders and Tokenizers + +There are 3 general ways to select and use concrete Downloader and Tokenizer implementations: + +- implementing protocols +- using an integration package +- using [MLXHuggingFace](MLXHuggingFace) macros + +If you are from mlx-swift-lm 2.x the macros will be the +simplest way, but consider as there are alternate +implementations that may provide features and capabilities that you want. + +### Implementing Protocols + +The other two methods use exactly this technique to wrap concrete +implementations in the mlx-swift-lm protocol. You can do this yourself +if you have custom code or simply wish to see how it works. + +mlx-swift-lm requires implementation of at least the two tokenizer protocols: + +- ``Downloader`` -- required if you need to download weights. Not needed if you have local weights. +- ``Tokenizer`` -- adapt the concrete tokenizers to the mlx-swift-lm protocol. +- ``TokenizerLoader`` -- factory for ``Tokenizer`` implementations. + +You can look at implementations for examples +of how to write these -- there are only a few properties and methods +and they typically have trivial mappings to the concrete implementation. + +This example shows adapting `HuggingFace.HubClient` to the `Downloader` protocol: + +```swift +import HuggingFace +import MLXLMCommon + +struct HubDownloader: MLXLMCommon.Downloader { + private let upstream: HubClient + + init(_ upstream: HubClient) { + self.upstream = upstream + } + + init() { + self.upstream = HubClient() + } + + public func download( + id: String, + revision: String?, + matching patterns: [String], + useLatest: Bool, + progressHandler: @Sendable @escaping (Progress) -> Void + ) async throws -> URL { + guard let repoID = HuggingFace.Repo.ID(rawValue: id) else { + throw HuggingFaceDownloaderError.invalidRepositoryID(id) + } + let revision = revision ?? "main" + + return try await upstream.downloadSnapshot( + of: repoID, + revision: revision, + matching: patterns, + progressHandler: { @MainActor progress in + progressHandler(progress) + } + ) + } +} + +// now you can use it +let downloader = HubDownloader() +let tokenizerLoader: any TokenizerLoader = ... + +let model = try await loadModel( + from: downloader, + using: tokenizerLoader, + id: "mlx-community/Qwen3-4B-4bit" +) +``` + +### Integration Packages + +Integration packages provide an adapter that encapsulates a concrete +implementation. Adding a dependency on the adapter will transitively +add a dependency on the implementation. + +So which adapter do you chose? + +- `huggingface/swift-transformers` + - this is the package that mlx-swift-lm originally integrated with +- `DePasqualeOrg/swift-tokenizers` + - Swift Tokenizers is a streamlined and optimized fork of Swift Transformers that focuses solely on tokenizer functionality, with an optional Rust backend for even better performance. + +You need a downloader package if you want to download weights at runtime -- this isn't +required if you have some other way to get weights into a local directory. + +| Downloader package (implementation) | Adapter | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| [DePasqualeOrg/swift-hf-api](https://github.com/DePasqualeOrg/swift-hf-api) | [DePasqualeOrg/swift-hf-api-mlx](https://github.com/DePasqualeOrg/swift-hf-api-mlx) | +| [huggingface/swift-huggingface](https://github.com/huggingface/swift-huggingface) | [DePasqualeOrg/swift-huggingface-mlx](https://github.com/DePasqualeOrg/swift-huggingface-mlx) | + +The tokenizer package translates Strings into tokens for model consumption and back: + +| Tokenizer package (implementation) | Adapter | +| ------------------------------------------------------------ | ------------------------------------------------------------ | +| [DePasqualeOrg/swift-tokenizers](https://github.com/DePasqualeOrg/swift-tokenizers) | [DePasqualeOrg/swift-tokenizers-mlx](https://github.com/DePasqualeOrg/swift-tokenizers-mlx) | +| [huggingface/swift-transformers](https://github.com/huggingface/swift-transformers) | [DePasqualeOrg/swift-transformers-mlx](https://github.com/DePasqualeOrg/swift-transformers-mlx) | + +See for information about how to hook it up. + +### MLXHuggingFace Macros + +To provide parity with mlx-swift-lm 2.x there is a built in integration with +the HuggingFace downloader and tokenizer implementations using macros. + +Add these dependencies to your project (see ): + +- [https://github.com/huggingface/swift-huggingface](https://github.com/huggingface/swift-huggingface) +- [https://github.com/huggingface/swift-transformers](https://github.com/huggingface/swift-transformers) + +and add `HuggingFace`, `Tokenizers`, `MLXLLM`, `MLXLMCommon` and `MLXHuggingFace` as libraries that your project links. + +You can use the integration like this: + +```swift +import MLXLLM +import MLXLMCommon +import MLXHuggingFace + +import HuggingFace +import Tokenizers + +let modelConfiguration = LLMRegistry.gemma3_1B_qat_4bit + +let model = try await #huggingFaceLoadModelContainer( + configuration: modelConfiguration +) + +let session = ChatSession(model) +print(try await session.respond(to: "What are two things to see in San Francisco?")) +print(try await session.respond(to: "How about a great place to eat?")) +``` + +or if you prefer more explicit downloader and tokenizer loading for more +control: + +```swift +import HuggingFace +import Tokenizers + +import MLXLLM +import MLXLMCommon +import MLXHuggingFace + +let modelConfiguration = LLMRegistry.gemma3_1B_qat_4bit + +let model = try await LLMModelFactory.shared.loadContainer( + from: #hubDownloader(), + using: #huggingFaceTokenizerLoader(), + configuration: modelConfiguration +) + +let session = ChatSession(model) +print(try await session.respond(to: "What are two things to see in San Francisco?")) +print(try await session.respond(to: "How about a great place to eat?")) +``` + +`#hubDownloader()` provides an integration just like what is shown in and `#huggingFaceTokenizerLoader()` +provides something similar to load the tokenizers. + +See for more information on upgrading from a 2.x release. + +## Xcode projects + +You can read the [Xcode documentation](https://developer.apple.com/documentation/xcode/adding-package-dependencies-to-your-app). + +Click on your project (the top item in the Xcode navigator) and select the **Project** (top item). Then select **Package Dependencies** and click `+` to add a new dependency. + +For all integration methods you will need to add: + +- [https://github.com/ml-explore/mlx-swift-lm](https://github.com/ml-explore/mlx-swift-lm) + +Beyond that, chose one of the 3 integration methods and add either the adapter packages OR the implementation packages if using macros/local implemenentation. See . + +## Package.swift / SwiftPM + +In your Package.swift add a reference to mlx-swift-lm, chosing either the `main` branch or something that tracks versions: + +```swift +.package(url: "https://github.com/ml-explore/mlx-swift-lm", .upToNextMajor(from: "3.31.3")), +``` + +Beyond that, chose one of the 3 integration methods and add either the adapter packages OR the implementation packages if using macros/local implemenentation. See . + +You can use the like this: + +```swift +.package(url: "https://github.com/huggingface/swift-huggingface", from: "0.9.0"), +.package(url: "https://github.com/huggingface/swift-transformers", from: "1.3.0"), +``` + +```swift +.target( + name: "YourTargetName", + dependencies: [ + .product(name: "MLXLLM", package: "mlx-swift-lm"), + .product(name: "MLXHuggingFace", package: "mlx-swift-lm"), + .product(name: "HuggingFace", package: "swift-huggingface"), + .product(name: "Tokenizers", package: "swift-transformers"), + ]), +``` + +or one of the integration packages: + +```swift +.package(url: "https://github.com/DePasqualeOrg/swift-tokenizers-mlx", from: "0.1.0"), +.package(url: "https://github.com/DePasqualeOrg/swift-hf-api-mlx", from: "0.1.0"), +``` + +```swift +.target( + name: "YourTargetName", + dependencies: [ + .product(name: "MLXLLM", package: "mlx-swift-lm"), + .product(name: "MLXLMTokenizers", package: "swift-tokenizers-mlx"), + .product(name: "MLXLMHuggingFace", package: "swift-hf-api-mlx"), + ]), +``` diff --git a/README.md b/README.md index b45683fdc..8d3111d0e 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ # MLX Swift LM +MLX Swift LM is a Swift package to build tools and applications with large language models (LLMs) and vision language models (VLMs) in [MLX Swift](https://github.com/ml-explore/mlx-swift). + > [!IMPORTANT] > The `main` branch is a _new_ major version number: 3.x. In order > to decouple from tokenizer and downloader packages some breaking -> changes were introduced. See [Breaking Changes](#breaking-changes) for more information. - -MLX Swift LM is a Swift package to build tools and applications with large language models (LLMs) and vision language models (VLMs) in [MLX Swift](https://github.com/ml-explore/mlx-swift). +> changes were introduced. See [upgrading documentation](https://swiftpackageindex.com/ml-explore/mlx-swift-lm/main/documentation/mlxlmcommon/upgrade) for detailed instructions on upgrading. Some key features include: @@ -15,26 +15,36 @@ Some key features include: For some example applications and tools that use MLX Swift LM, check out [MLX Swift Examples](https://github.com/ml-explore/mlx-swift-examples). +## Documentation + +Developers can use these examples in their own programs -- just import the swift package! + +- [Porting and implementing models](https://swiftpackageindex.com/ml-explore/mlx-swift-lm/main/documentation/mlxlmcommon/porting) +- [Techniques for developing in mlx-swift-lm](https://swiftpackageindex.com/ml-explore/mlx-swift-lm/main/documentation/mlxlmcommon/developing) +- [MLXLLMCommon](https://swiftpackageindex.com/ml-explore/mlx-swift-lm/main/documentation/mlxlmcommon): Common API for LLM and VLM +- [MLXLLM](https://swiftpackageindex.com/ml-explore/mlx-swift-lm/main/documentation/mlxllm): Large language model example implementations +- [MLXVLM](https://swiftpackageindex.com/ml-explore/mlx-swift-lm/main/documentation/mlxvlm): Vision language model example implementations +- [MLXEmbedders](https://swiftpackageindex.com/ml-explore/mlx-swift-lm/main/documentation/mlxembedders): Popular encoders and embedding models example implementations + ## Usage -This package integrates with a variety of tokenizer and downloader packages through protocol conformance. Users can pick from three ways to integrate with these packages, which offer different tradeoffs between freedom and convenience: +This package integrates with a variety of tokenizer and downloader packages through protocol conformance. Users can pick from three ways to integrate with these packages, which offer different tradeoffs between freedom and convenience. -- Maximum freedom - - Copy the protocol conformance code (~100 lines) from the [integration packages](#Tokenizer-and-Downloader-Integrations) -- Freedom and convenience - - Use the [integration packages](#Tokenizer-and-Downloader-Integrations) for your preferred tokenizer and downloader packages -- Convenience - - Use the macros for integration with Swift Transformers and Swift Hugging Face +See documentation on [how to integrate mlx-swift-lm and downloaders/tokenizers](https://swiftpackageindex.com/ml-explore/mlx-swift-lm/main/documentation/mlxlmcommon/using). ### Installation Add the core package to your `Package.swift`: ```swift -.package(url: "https://github.com/ml-explore/mlx-swift-lm", branch: "main"), +.package(url: "https://github.com/ml-explore/mlx-swift-lm", .upToNextMajor(from: "3.31.3")), ``` -Then add your preferred tokenizer and downloader integrations: +Then chose one of the methods below to select download and tokenizer implementations. + +### Method 1: Integration Packages + +Then add your preferred tokenizer and downloader integrations, see [how to integrate mlx-swift-lm and downloaders/tokenizers](https://swiftpackageindex.com/ml-explore/mlx-swift-lm/main/documentation/mlxlmcommon/using#Integration-Packages): ```swift .package(url: "https://github.com/DePasqualeOrg/swift-tokenizers-mlx", from: "0.1.0"), @@ -53,258 +63,67 @@ And add the libraries to your target: ]), ``` -### Tokenizer and Downloader Integrations - -Tokenization and model downloading are handled by separate packages. Adapters make it easy to use your preferred tokenizer and downloader packages. For instructions on how to use them, see the readmes in the respective packages. - -| Tokenizer package | Adapter | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| [DePasqualeOrg/swift-tokenizers](https://github.com/DePasqualeOrg/swift-tokenizers) | [DePasqualeOrg/swift-tokenizers-mlx](https://github.com/DePasqualeOrg/swift-tokenizers-mlx) | -| [huggingface/swift-transformers](https://github.com/huggingface/swift-transformers) | [DePasqualeOrg/swift-transformers-mlx](https://github.com/DePasqualeOrg/swift-transformers-mlx) | +### Method 2: Macros -| Downloader package | Adapter | -| ------------------------------------------------------------ | ------------------------------------------------------------ | -| [huggingface/swift-huggingface](https://github.com/huggingface/swift-huggingface) | [DePasqualeOrg/swift-huggingface-mlx](https://github.com/DePasqualeOrg/swift-huggingface-mlx) | -| [DePasqualeOrg/swift-hf-api](https://github.com/DePasqualeOrg/swift-hf-api) | [DePasqualeOrg/swift-hf-api-mlx](https://github.com/DePasqualeOrg/swift-hf-api-mlx) | +This preserves parity with mlx-swift-lm 2.x. Simply reference the huggingface packages and use the `MLXHuggingFace` macros to adapt the APIs. [Read more here](https://swiftpackageindex.com/ml-explore/mlx-swift-lm/main/documentation/mlxlmcommon/using#MLXHuggingFace-Macros). - -> **Note:** The adapters are offered for convenience and are not required. You can also use tokenizer and downloader packages directly by setting up the required protocol conformance for MLX Swift LM, just like the code in the integration packages. Alternatively, you can use the macros provided by this package to integrate with Swift Transformers and Swift Hugging Face. - -### Quick Start - -You can get started with a wide variety of open-weights LLMs and VLMs using this simplified API (for more details, see [MLXLMCommon](Libraries/MLXLMCommon)): +Add these to your dependencies: ```swift -import MLXLLM -import MLXLMHuggingFace -import MLXLMTokenizers - -let model = try await loadModel( - from: HubClient.default, - using: TokenizersLoader(), - id: "mlx-community/Qwen3-4B-4bit" -) -let session = ChatSession(model) -print(try await session.respond(to: "What are two things to see in San Francisco?")) -print(try await session.respond(to: "How about a great place to eat?")) +.package(url: "https://github.com/huggingface/swift-huggingface", upToNextMajor(from: "0.9.0")), +.package(url: "https://github.com/huggingface/swift-transformers", upToNextMajor(from: "1.3.0")), ``` -Loading from a local directory: +And add the libraries to your target: ```swift -import MLXLLM -import MLXLMTokenizers - -let modelDirectory = URL(filePath: "/path/to/model") -let container = try await loadModelContainer( - from: modelDirectory, - using: TokenizersLoader() -) +.target( + name: "YourTargetName", + dependencies: [ + .product(name: "MLXLMCommon", package: "mlx-swift-lm"), + .product(name: "MLXLLM", package: "mlx-swift-lm"), + .product(name: "MLXHuggingFace", package: "mlx-swift-lm"), + .product(name: "HuggingFace", package: "swift-huggingface"), + .product(name: "Tokenizers", package: "swift-transformers"), + ]), ``` -Use a custom Hugging Face client: - -```swift -import MLXLLM -import MLXLMHuggingFace -import MLXLMTokenizers +## Quick Start -let hub = HubClient(token: "hf_...") -let container = try await loadModelContainer( - from: hub, - using: TokenizersLoader(), - id: "mlx-community/Qwen3-4B-4bit" -) -``` +You can get started with a wide variety of open-weights LLMs and VLMs using this simplified API (for more details, see [MLXLMCommon](Libraries/MLXLMCommon)): -Use a custom downloader: +If using the [integration macros](https://swiftpackageindex.com/ml-explore/mlx-swift-lm/main/documentation/mlxlmcommon/using#MLXHuggingFace-Macros), you can get started with code like this: ```swift import MLXLLM import MLXLMCommon -import MLXLMTokenizers - -struct S3Downloader: Downloader { - func download( - id: String, - revision: String?, - matching patterns: [String], - useLatest: Bool, - progressHandler: @Sendable @escaping (Progress) -> Void - ) async throws -> URL { - // Download files and return a local directory URL. - return URL(filePath: "/tmp/model") - } -} - -let container = try await loadModelContainer( - from: S3Downloader(), - using: TokenizersLoader(), - id: "my-bucket/my-model" -) -``` - -Or use the underlying API to control every aspect of the evaluation. - -## Migrating to Version 3 - -Version 3 of MLX Swift LM decouples the tokenizer and downloader implementations. See the [integrations](#Tokenizer-and-Downloader-Integrations) section for details. - -### New dependencies - -Add your preferred tokenizer and downloader adapters: - -```swift -// Before (2.x) – single dependency -.package(url: "https://github.com/ml-explore/mlx-swift-lm/", from: "2.30.0"), - -// After (3.x) – core + adapters -.package(url: "https://github.com/ml-explore/mlx-swift-lm/", from: "3.0.0"), -.package(url: "https://github.com/DePasqualeOrg/swift-tokenizers-mlx/", from: "0.1.0"), -.package(url: "https://github.com/DePasqualeOrg/swift-hf-api-mlx/", from: "0.1.0"), -``` - -And add their products to your target: - -```swift -.product(name: "MLXLMTokenizers", package: "swift-tokenizers-mlx"), -.product(name: "MLXLMHFAPI", package: "swift-hf-api-mlx"), - -// If you use MLXEmbedders: -.product(name: "MLXEmbeddersTokenizers", package: "swift-tokenizers-mlx"), -.product(name: "MLXEmbeddersHFAPI", package: "swift-hf-api-mlx"), -``` - -### New imports - -```swift -// Before (2.x) -import MLXLLM - -// After (3.x) -import MLXLLM -import MLXLMHFAPI // Downloader adapter -import MLXLMTokenizers // Tokenizer adapter -``` - -If you use MLXEmbedders: +import MLXHuggingFace -```swift -import MLXEmbedders -import MLXEmbeddersHFAPI // Downloader adapter -import MLXEmbeddersTokenizers // Tokenizer adapter -``` - -### Loading API changes - -The core APIs now include a `from:` parameter of type `URL` or `any Downloader` as well as a `using:` parameter for the tokenizer loader. Tokenizer integration packages may supply convenience methods with a default tokenizer loader, allowing you to omit the `using:` parameter. - -The most visible call-site changes are: +import HuggingFace +import Tokenizers -- `hub:` → `from:`: Models are now loaded from a directory `URL` or `Downloader`. -- `HubApi` → `HubClient`: A new implementation of the Hugging Face Hub client is used. +let modelConfiguration = LLMRegistry.gemma3_1B_qat_4bit -Example when downloading from Hugging Face: - -```swift -// Before (2.x) – hub defaulted to HubApi() -let container = try await loadModelContainer( - id: "mlx-community/Qwen3-4B-4bit" -) - -// After (3.x) – Using Swift Hugging Face + Swift Tokenizers -let container = try await loadModelContainer( - from: HubClient.default, - id: "mlx-community/Qwen3-4B-4bit" +let model = try await #huggingFaceLoadModelContainer( + configuration: modelConfiguration ) -``` - -At the lower-level core API, you can still pass any `Downloader` and any `TokenizerLoader` explicitly. -Loading from a local directory: - -```swift -// Before (2.x) -let container = try await loadModelContainer(directory: modelDirectory) - -// After (3.x) -let container = try await loadModelContainer(from: modelDirectory) +let session = ChatSession(model) +print(try await session.respond(to: "What are two things to see in San Francisco?")) +print(try await session.respond(to: "How about a great place to eat?")) ``` -Loading with a model factory: - -```swift -let container = try await LLMModelFactory.shared.loadContainer( - from: HubClient.default, - configuration: modelConfiguration -) -``` +Using the [adapter packages](https://swiftpackageindex.com/ml-explore/mlx-swift-lm/main/documentation/mlxlmcommon/using#Integration-Packages) you would have similar code -- replace the imports and the load line. -Loading an embedder: +For example, loading from a local directory using the [DePasqualeOrg/swift-tokenizers-mlx](https://github.com/DePasqualeOrg/swift-tokenizers-mlx): ```swift -import MLXEmbedders -import MLXEmbeddersHFAPI -import MLXEmbeddersTokenizers +import MLXLLM +import MLXLMTokenizers +let modelDirectory = URL(filePath: "/path/to/model") let container = try await loadModelContainer( - from: HubClient.default, - configuration: .configuration(id: "sentence-transformers/all-MiniLM-L6-v2") + from: modelDirectory, + using: TokenizersLoader() ) ``` - -### Renamed methods - -`decode(tokens:)` is renamed to `decode(tokenIds:)` to align with the `transformers` library in Python: - -```swift -// Before (2.x) -let text = tokenizer.decode(tokens: ids) - -// After (3.0) -let text = tokenizer.decode(tokenIds: ids) -``` - -## Documentation - -Developers can use these examples in their own programs -- just import the swift package! - -- [Porting and implementing models](https://swiftpackageindex.com/ml-explore/mlx-swift-lm/main/documentation/mlxlmcommon/porting) -- [MLXLLMCommon](https://swiftpackageindex.com/ml-explore/mlx-swift-lm/main/documentation/mlxlmcommon): Common API for LLM and VLM -- [MLXLLM](https://swiftpackageindex.com/ml-explore/mlx-swift-lm/main/documentation/mlxllm): Large language model example implementations -- [MLXVLM](https://swiftpackageindex.com/ml-explore/mlx-swift-lm/main/documentation/mlxvlm): Vision language model example implementations -- [MLXEmbedders](https://swiftpackageindex.com/ml-explore/mlx-swift-lm/main/documentation/mlxembedders): Popular encoders and embedding models example implementations - -## Breaking Changes - -### Loading API - -The `hub` parameter (previously `HubApi`) has been replaced with `from` (any `Downloader` or `URL` for a local directory). Functions that previously defaulted to `defaultHubApi` no longer have a default – callers must either pass a `Downloader` explicitly or use the convenience methods in `MLXLMHuggingFace` / `MLXEmbeddersHuggingFace`, which default to `HubClient.default`. - -For most users who were using the default Hub client, adding `import MLXLMHuggingFace` or `import MLXEmbeddersHuggingFace` and using the convenience overloads is sufficient. - -Users who were passing a custom `HubApi` instance should create a `HubClient` instead and pass it as the `from` parameter. `HubClient` conforms to `Downloader` via `MLXLMHuggingFace`. - -### `ModelConfiguration` - -- `tokenizerId` and `overrideTokenizer` have been replaced by `tokenizerSource: TokenizerSource?`, which supports `.id(String)` for remote sources and `.directory(URL)` for local paths. -- `preparePrompt` has been removed. This shouldn't be used anyway, since support for chat templates is available. -- `modelDirectory(hub:)` has been removed. For local directories, pass the `URL` directly to the loading functions. For remote models, the `Downloader` protocol handles resolution. - -### Tokenizer loading - -`loadTokenizer(configuration:hub:)` has been removed. Tokenizer loading now uses `AutoTokenizer.from(directory:)` from Swift Tokenizers directly. - -`replacementTokenizers` (the `TokenizerReplacementRegistry`) has been removed. Use `AutoTokenizer.register(_:for:)` from Swift Tokenizers instead. - -### `defaultHubApi` - -The `defaultHubApi` global has been removed. Hugging Face Hub access is now provided by `HubClient.default` from the `HuggingFace` module. - -### Low-level APIs - -- `downloadModel(hub:configuration:progressHandler:)` → `Downloader.download(id:revision:matching:useLatest:progressHandler:)` -- `loadTokenizerConfig(configuration:hub:)` → `AutoTokenizer.from(directory:)` -- `ModelFactory._load(hub:configuration:progressHandler:)` → `_load(configuration: ResolvedModelConfiguration)` -- `ModelFactory._loadContainer`: removed (base `loadContainer` now builds the container from `_load`) -