Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ struct TransformersCLI: AsyncParsableCommand {
@Option(help: "Repetition penalty to discourage repeating tokens (typical: 1.0-2.0, 1.0 = no penalty)")
var repetitionPenalty: Float?

@Option(help: "Path to a local folder containing tokenizer_config.json and tokenizer.json")
var tokenizerFolder: String?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
var tokenizerFolder: String?
var tokenizerPath: String?

(nit: this is perhaps more idiomatic in Swift APIs)


func generate(
model: LanguageModel,
config: GenerationConfig,
Expand Down Expand Up @@ -104,7 +107,17 @@ struct TransformersCLI: AsyncParsableCommand {
let url = URL(filePath: modelPath)
let compiledURL = try compile(at: url)
print("Loading model \(compiledURL)")
let model = try LanguageModel.loadCompiled(url: compiledURL, computeUnits: computeUnits.asMLComputeUnits)
let model: LanguageModel
if let tokenizerFolder {
let tokenizerURL = URL(filePath: tokenizerFolder, directoryHint: .isDirectory)
model = try LanguageModel.loadCompiled(
url: compiledURL,
tokenizerFolder: tokenizerURL,
computeUnits: computeUnits.asMLComputeUnits
)
} else {
model = try LanguageModel.loadCompiled(url: compiledURL, computeUnits: computeUnits.asMLComputeUnits)
}

var config = model.defaultGenerationConfig
config.doSample = doSample
Expand Down
37 changes: 35 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,41 @@ example converting and running Mistral 7B using CoreML [here](https://github.com

The [modernization of Core ML](https://github.com/huggingface/swift-transformers/pull/257) and corresponding examples were primarily contributed by @joshnewnham, @1duo, @alejandro-isaza, @aseemw. Thank you 🙏

### Offline CoreML tokenizers

When you bundle a compiled CoreML model and tokenizer files with your app, you can skip any network requests by injecting
the tokenizer (or a local configuration) when constructing `LanguageModel`:

```swift
let compiledURL: URL = ... // path to .mlmodelc
let tokenizerFolder: URL = ... // folder containing tokenizer_config.json and tokenizer.json
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
let tokenizerFolder: URL = ... // folder containing tokenizer_config.json and tokenizer.json
let tokenizerURL: URL = ... // folder containing tokenizer_config.json and tokenizer.json


let model = try LanguageModel.loadCompiled(
url: compiledURL,
tokenizerFolder: tokenizerFolder
)

// Or construct the tokenizer yourself (inside an async context)
let tokenizer = try await AutoTokenizer.from(modelFolder: tokenizerFolder)
let modelWithTokenizer = try LanguageModel.loadCompiled(
url: compiledURL,
tokenizer: tokenizer
)
```

Make sure the tokenizer assets come from the same Hugging Face repo as the original checkpoint. For the
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Make sure the tokenizer assets come from the same Hugging Face repo as the original checkpoint. For the
Make sure the tokenizer assets come from the same Hugging Face repo as the original checkpoint or are compatible with the model you use. For the

Mistral example in `Examples/Mistral7B/`, you can fetch the tokenizer like this:

```bash
huggingface-cli download \
mistralai/Mistral-7B-Instruct-v0.3 \
tokenizer.json tokenizer_config.json \
--local-dir Examples/Mistral7B/local-tokenizer
```

If the repo is gated, authenticate with `huggingface-cli login` first. Both initializers reuse the tokenizer
you pass in and never reach out to the Hugging Face Hub.

## Usage via SwiftPM

To use `swift-transformers` with SwiftPM, you can add this to your `Package.swift`:
Expand Down Expand Up @@ -139,5 +174,3 @@ To format your code, run `swift format -i --recursive .`.
## License

[Apache 2](LICENSE).


96 changes: 85 additions & 11 deletions Sources/Models/LanguageModel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,30 @@ public class LanguageModel {

/// Creates a new language model instance from a CoreML model.
///
/// - Parameter model: The CoreML model to wrap
/// - Parameters:
/// - model: The CoreML model to wrap
/// - configuration: Optional Hub configuration already resolved on disk
/// - tokenizer: Optional preconstructed tokenizer to reuse
/// - Important: Triggers a fatal error if the model doesn't have the expected input shape information
public required init(model: MLModel) {
public required init(
model: MLModel,
configuration: LanguageModelConfigurationFromHub? = nil,
tokenizer: Tokenizer? = nil
) {
self.model = model
_tokenizer = tokenizer
(minContextLength, maxContextLength) = Self.contextRange(from: model)
configuration = LanguageModelConfigurationFromHub(modelName: modelName)
if let configuration {
self.configuration = configuration
} else if tokenizer == nil {
self.configuration = LanguageModelConfigurationFromHub(modelName: modelName)
} else {
self.configuration = nil
}
Comment on lines +49 to +55
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find it a bit confusing that if configuration is provided, then tokenizer will be silently ignored. These look like two different ways to inject a tokenizer. Could we maybe use multiple initializers instead?

Another option is to just remove the configuration argument for now and discuss in a new PR. Is the main reason to add it to provide a custom HubApi? That's useful, of course, but perhaps we could just provide that instead of the full configuration.

}

public convenience required init(model: MLModel) {
self.init(model: model, configuration: nil, tokenizer: nil)
}

public func resetState() async {}
Expand Down Expand Up @@ -142,17 +160,60 @@ public extension LanguageModel {
/// - Parameters:
/// - url: The URL of the compiled CoreML model file (.mlmodelc)
/// - computeUnits: The compute units to use for model inference
/// - configuration: Optional Hub configuration describing tokenizer/model metadata
/// - tokenizer: Optional tokenizer instance to reuse instead of loading from disk
/// - Returns: A configured `LanguageModel` instance
/// - Throws: An error if the model cannot be loaded from the specified URL
static func loadCompiled(url: URL, computeUnits: MLComputeUnits = .cpuAndGPU) throws -> LanguageModel {
static func loadCompiled(
url: URL,
computeUnits: MLComputeUnits = .cpuAndGPU,
configuration: LanguageModelConfigurationFromHub? = nil,
tokenizer: Tokenizer? = nil
) throws -> LanguageModel {
let config = MLModelConfiguration()
config.computeUnits = computeUnits
let model = try MLModel(contentsOf: url, configuration: config)
return switch kvCacheAvailability(for: model) {
case .statefulKVCache: LanguageModelWithStatefulKVCache(model: model)
default: LanguageModel(model: model)
case .statefulKVCache:
LanguageModelWithStatefulKVCache(
model: model,
configuration: configuration,
tokenizer: tokenizer
)
default:
LanguageModel(
model: model,
configuration: configuration,
tokenizer: tokenizer
)
}
}

static func loadCompiled(
url: URL,
tokenizerFolder: URL,
computeUnits: MLComputeUnits = .cpuAndGPU
Comment on lines +194 to +195
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
tokenizerFolder: URL,
computeUnits: MLComputeUnits = .cpuAndGPU
computeUnits: MLComputeUnits = .cpuAndGPU,
tokenizer tokenizerFolder: URL,

Making it look like an overloaded version of the previous method (keeping same order and overloading the type for tokenizer, while still using tokenizerFolder inside. This is usual in Swift APIs (although perhaps the tokenizer name could be somewhat misleading).

) throws -> LanguageModel {
let configuration = LanguageModelConfigurationFromHub(modelFolder: tokenizerFolder)
return try loadCompiled(
url: url,
computeUnits: computeUnits,
configuration: configuration
)
}

static func loadCompiled(
url: URL,
tokenizer: Tokenizer,
computeUnits: MLComputeUnits = .cpuAndGPU
Comment on lines +207 to +208
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
tokenizer: Tokenizer,
computeUnits: MLComputeUnits = .cpuAndGPU
computeUnits: MLComputeUnits = .cpuAndGPU,
tokenizer: Tokenizer,

) throws -> LanguageModel {
try loadCompiled(
url: url,
computeUnits: computeUnits,
configuration: nil,
tokenizer: tokenizer
)
}
}

@available(macOS 15.0, iOS 18.0, *)
Expand Down Expand Up @@ -304,7 +365,8 @@ public extension LanguageModel {
/// - Throws: An error if the configuration cannot be loaded
var modelConfig: Config? {
get async throws {
try await configuration!.modelConfig
guard let configuration else { return nil }
return try await configuration.modelConfig
}
}

Expand All @@ -314,7 +376,8 @@ public extension LanguageModel {
/// - Throws: An error if the configuration cannot be loaded
var tokenizerConfig: Config? {
get async throws {
try await configuration!.tokenizerConfig
guard let configuration else { return nil }
return try await configuration.tokenizerConfig
}
}

Expand All @@ -324,7 +387,10 @@ public extension LanguageModel {
/// - Throws: An error if the tokenizer data cannot be loaded
var tokenizerData: Config {
get async throws {
try await configuration!.tokenizerData
guard let configuration else {
throw TokenizerError.missingConfig
}
return try await configuration.tokenizerData
}
}

Expand Down Expand Up @@ -434,8 +500,12 @@ public class LanguageModelWithStatefulKVCache: LanguageModel {

var state: MLState?

public required init(model: MLModel) {
super.init(model: model)
public required init(
model: MLModel,
configuration: LanguageModelConfigurationFromHub? = nil,
tokenizer: Tokenizer? = nil
) {
super.init(model: model, configuration: configuration, tokenizer: tokenizer)
// To support pre-filling and extend, the input must support
// flexible shapes.
guard maxContextLength - minContextLength > 1 else {
Expand Down Expand Up @@ -506,11 +576,15 @@ public class LanguageModelWithStatefulKVCache: LanguageModel {
public enum TokenizerError: LocalizedError {
/// The tokenizer configuration file could not be found.
case tokenizerConfigNotFound
/// The language model configuration required to load tokenizer data is missing.
case missingConfig

public var errorDescription: String? {
switch self {
case .tokenizerConfigNotFound:
String(localized: "Tokenizer configuration could not be found. The model may be missing required tokenizer files.", comment: "Error when tokenizer configuration is missing")
case .missingConfig:
String(localized: "Language model configuration was not set, tokenizer assets could not be loaded.", comment: "Error when configuration needed for tokenizer data is missing")
}
}
}
Expand Down