Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions .github/workflows/pull_request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ jobs:
xcrun --show-sdk-build-version
swift --version
rm -rf ~/Library/Developer/Xcode/DerivedData/*
xcodebuild build-for-testing -scheme mlx-libraries-Package -destination 'platform=macOS'
xcodebuild build-for-testing -scheme mlx-libraries-Package -destination 'platform=macOS' -skipMacroValidation

- name: Build tools (Xcode, macOS)
shell: sh
Expand All @@ -107,6 +107,7 @@ jobs:
xcrun --show-sdk-build-version
swift --version
find . -name Package.resolved -exec rm {} \;
xcodebuild -scheme llm-tool
xcodebuild -scheme image-tool
xcodebuild -scheme mnist-tool
xcodebuild -scheme llm-tool -skipMacroValidation
xcodebuild -scheme embedder-tool -skipMacroValidation
xcodebuild -scheme image-tool -skipMacroValidation
xcodebuild -scheme mnist-tool -skipMacroValidation
9 changes: 8 additions & 1 deletion Applications/LLMBasic/ChatModel.swift
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
// Copyright © 2025 Apple Inc.

import HuggingFace
import MLXHuggingFace
import MLXLLM
import MLXLMCommon
import SwiftUI
import Tokenizers

/// which model to load
private let modelConfiguration = LLMRegistry.gemma3_1B_qat_4bit
Expand Down Expand Up @@ -40,7 +43,11 @@ private let generateParameters = GenerateParameters(temperature: 0.5)
case .idle:
let task = Task {
// download and report progress
try await loadModelContainer(configuration: modelConfiguration) { value in
try await LLMModelFactory.shared.loadContainer(
from: #hubDownloader(),
using: #huggingFaceTokenizerLoader(),
configuration: modelConfiguration
) { value in
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Along with linking the HuggingFace libraries, this is the pattern for adopting the new API.

Task { @MainActor in
self.progress = value.fractionCompleted
}
Expand Down
24 changes: 12 additions & 12 deletions Applications/LLMEval/ViewModels/LLMEvaluator.swift
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
// Copyright © 2025 Apple Inc.

import Hub
import HuggingFace
import MLX
import MLXHuggingFace
import MLXLLM
import MLXLMCommon
import Metal
import SwiftUI
import Tokenizers

@Observable
@MainActor
Expand Down Expand Up @@ -101,14 +104,11 @@ class LLMEvaluator {

Memory.cacheLimit = 20 * 1024 * 1024

let hub = HubApi(
downloadBase: FileManager.default.urls(for: .cachesDirectory, in: .userDomainMask).first
)

do {
let modelDirectory = try await downloadModel(
hub: hub,
configuration: modelConfiguration
let downloader = #hubDownloader()

let resolved = try await resolve(
configuration: modelConfiguration, from: downloader, useLatest: false
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a little more complex because it separates out the download from the load. resolve() will download and produce resolved urls that can be loaded.

) { [weak self] progress in
Task { @MainActor in
self?.updateDownloadProgress(progress)
Expand All @@ -117,8 +117,9 @@ class LLMEvaluator {

// Verify the download succeeded by checking for model files
let fileManager = FileManager.default
let directoryExists = fileManager.fileExists(atPath: modelDirectory.path)
let contents = (try? fileManager.contentsOfDirectory(atPath: modelDirectory.path)) ?? []
let directoryExists = fileManager.fileExists(atPath: resolved.modelDirectory.path)
let contents =
(try? fileManager.contentsOfDirectory(atPath: resolved.modelDirectory.path)) ?? []
let hasSafetensors = contents.contains { $0.hasSuffix(".safetensors") }

if !directoryExists || !hasSafetensors {
Expand All @@ -137,9 +138,8 @@ class LLMEvaluator {
totalSize = nil

let modelContainer = try await LLMModelFactory.shared.loadContainer(
hub: hub,
configuration: modelConfiguration
) { _ in }
from: resolved.modelDirectory,
using: #huggingFaceTokenizerLoader())

let numParams = await modelContainer.perform { $0.model.numParameters() }

Expand Down
39 changes: 26 additions & 13 deletions Applications/LoRATrainingExample/ContentView.swift
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright © 2024 Apple Inc.

import HuggingFace
import MLX
import MLXHuggingFace
import MLXLLM
import MLXLMCommon
import MLXNN
Expand Down Expand Up @@ -141,7 +143,12 @@ class LoRAEvaluator {
progress = .init(title: "Loading \(name)", current: 0, limit: 1)
}

let downloader = #hubDownloader()
let loader = #huggingFaceTokenizerLoader()
Comment on lines +146 to +147
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't work yet -- there is a local Progress type that conflicts with the use in the macro. See ml-explore/mlx-swift-lm#189


let modelContainer = try await LLMModelFactory.shared.loadContainer(
from: downloader,
using: loader,
configuration: modelConfiguration
) {
progress in
Expand Down Expand Up @@ -186,7 +193,7 @@ class LoRAEvaluator {
let modelContainer = try await loadModel()

// apply LoRA adapters and train
let modelAdapter = try await modelContainer.perform { context in
let _ = try await modelContainer.perform { context in
try LoRAContainer.from(
model: context.model,
configuration: LoRAConfiguration(numLayers: loraLayers)
Expand Down Expand Up @@ -263,22 +270,28 @@ class LoRAEvaluator {
let modelContainer = try await loadModel()

// evaluate
let result = try await modelContainer.perform { context in
let input = try await context.processor.prepare(input: .init(prompt: prompt))
return try MLXLMCommon.generate(
input: input, parameters: generateParameters, context: context
) { tokens in
if tokens.count % evaluateShowEvery == 0 {
let fullOutput = context.tokenizer.decode(tokens: tokens)
Task { @MainActor in
self.output = fullOutput
}
let input = try await modelContainer.processor.prepare(input: .init(prompt: prompt))

var count = 0
var output = ""
for try await item in try await modelContainer.generate(
input: input, parameters: generateParameters
) {
switch item {
case .chunk(let string):
count += 1
output += string

if count % evaluateShowEvery == 0 {
self.output = output
}
return tokens.count >= maxTokens ? .stop : .more

default:
break
}
}

self.output = result.output
self.output = output
self.progress = nil
}
}
10 changes: 9 additions & 1 deletion Applications/MLXChatExample/Services/MLXService.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@
//

import Foundation
import HuggingFace
import MLX
import MLXHuggingFace
import MLXLLM
import MLXLMCommon
import MLXVLM
import Tokenizers

/// A service class that manages machine learning models for text and vision-language tasks.
/// This class handles model loading, caching, and text generation using various LLM and VLM models.
Expand Down Expand Up @@ -63,9 +66,14 @@ class MLXService {
VLMModelFactory.shared
}

let downloader = #hubDownloader()
let loader = #huggingFaceTokenizerLoader()

// Load model and track download progress
let container = try await factory.loadContainer(
hub: .default, configuration: model.configuration
from: downloader,
using: loader,
configuration: model.configuration
) { progress in
Task { @MainActor in
self.modelDownloadProgress = progress
Expand Down
80 changes: 76 additions & 4 deletions Package.resolved

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@ let package = Package(
targets: ["StableDiffusion"]),
],
dependencies: [
.package(url: "https://github.com/ml-explore/mlx-swift", .upToNextMinor(from: "0.30.3")),
.package(url: "https://github.com/ml-explore/mlx-swift", .upToNextMinor(from: "0.31.3")),

// Note: used by StableDiffusion library to download weights
.package(
url: "https://github.com/huggingface/swift-transformers",
.upToNextMinor(from: "1.1.0")
.upToNextMajor(from: "1.3.0")
),
.package(url: "https://github.com/1024jp/GzipSwift", "6.0.1" ... "6.0.1"), // Only needed by MLXMNIST
],
Expand Down
9 changes: 5 additions & 4 deletions Tools/embedder-tool/EmbedderRuntime+Embedding.swift
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import Foundation
import MLX
import MLXEmbedders
import Tokenizers
import MLXLMCommon

public struct RuntimeEmbeddingResult {
public let embeddings: [(index: Int, vector: [Float])]
Expand All @@ -26,9 +26,10 @@ extension EmbedderRuntime {
embeddings: [], skippedIndices: [], fallbackDescription: nil)
}

return try await container.perform { model, tokenizer, pooler in
return try await container.perform { context in
var skippedIndices: [Int] = []

let tokenizer = context.tokenizer
let encoded = texts.enumerated().compactMap { index, text -> (Int, [Int])? in
let tokens = tokenizer.encode(text: text, addSpecialTokens: true)
guard !tokens.isEmpty else {
Expand Down Expand Up @@ -58,14 +59,14 @@ extension EmbedderRuntime {
let mask = (padded .!= padToken)
let tokenTypes = MLXArray.zeros(like: padded)

let outputs = model(
let outputs = context.model(
padded,
positionIds: nil,
tokenTypeIds: tokenTypes,
attentionMask: mask
)

let poolingModule = resolvedPooler(for: pooler)
let poolingModule = resolvedPooler(for: context.pooling)
let pooled = poolingModule(
outputs,
mask: mask,
Expand Down
Loading
Loading