45d43f2645
- LlmEngineBridge.h/.mm: Objective-C++ wrapper around LiteRT-LM C++ API - SleepyAgent-Bridging-Header.h: Swift bridging header - Updated LlmEngine.swift to use the bridge - Added LITERT_INTEGRATION.md with detailed research findings Based on analysis of Google's litert-samples repository: - Google uses C++ bridge pattern for iOS (confirmed in image_segmentation example) - MediaPipe has working Swift API but is deprecated - LiteRT-LM Swift APIs are 'coming soon' The bridge pattern matches how Google AI Edge Gallery iOS app is likely implemented
284 lines
9.2 KiB
Swift
284 lines
9.2 KiB
Swift
import Foundation
|
|
import UIKit
|
|
|
|
// MARK: - Errors
|
|
|
|
public enum LlmEngineError: LocalizedError {
|
|
case modelNotFound(path: String)
|
|
case modelNotLoaded
|
|
case conversationClosed
|
|
case generationFailed(underlying: Error)
|
|
case invalidMultimodalInput
|
|
case engineInitializationFailed(underlying: Error)
|
|
case notImplemented
|
|
|
|
public var errorDescription: String? {
|
|
switch self {
|
|
case .modelNotFound(let path):
|
|
return "Model file not found at: \(path)"
|
|
case .modelNotLoaded:
|
|
return "No model is currently loaded"
|
|
case .conversationClosed:
|
|
return "Conversation has been closed"
|
|
case .generationFailed(let error):
|
|
return "Generation failed: \(error.localizedDescription)"
|
|
case .invalidMultimodalInput:
|
|
return "Invalid multimodal input provided"
|
|
case .engineInitializationFailed(let error):
|
|
return "Failed to initialize engine: \(error.localizedDescription)"
|
|
case .notImplemented:
|
|
return "This feature requires LiteRT-LM C++ integration"
|
|
}
|
|
}
|
|
}
|
|
|
|
// MARK: - Conversation
|
|
|
|
public final class Conversation: @unchecked Sendable {
|
|
public var isAlive: Bool = true
|
|
internal var messageHistory: [(role: String, content: String)] = []
|
|
|
|
internal init() {}
|
|
|
|
public func close() {
|
|
isAlive = false
|
|
}
|
|
|
|
deinit {
|
|
close()
|
|
}
|
|
}
|
|
|
|
// MARK: - LlmEngine Protocol
|
|
|
|
public protocol LlmEngine: Actor {
|
|
var isLoaded: Bool { get }
|
|
|
|
func loadModel(path: String) async throws
|
|
func createConversation(systemPrompt: String) throws -> Conversation
|
|
func generate(
|
|
conversation: Conversation,
|
|
prompt: String,
|
|
audioData: Data?,
|
|
images: [UIImage]?
|
|
) async throws -> String
|
|
func generateStream(
|
|
conversation: Conversation,
|
|
prompt: String,
|
|
audioData: Data?,
|
|
images: [UIImage]?
|
|
) -> AsyncThrowingStream<String, Error>
|
|
func unload()
|
|
}
|
|
|
|
// MARK: - LiteRT-LM Engine Implementation
|
|
|
|
/// LiteRT-LM based LLM Engine using Objective-C++ bridge
|
|
///
|
|
/// Architecture:
|
|
/// - Swift LlmEngine (this file) -> Obj-C++ LlmEngineBridge -> C++ LiteRT-LM
|
|
///
|
|
/// This approach is necessary because:
|
|
/// 1. LiteRT-LM Swift APIs are "coming soon" (as of 2025)
|
|
/// 2. Google's own apps use C++ bridge pattern (verified in litert-samples)
|
|
///
|
|
public actor LiteRtLlmEngine: LlmEngine {
|
|
public static let shared = LiteRtLlmEngine()
|
|
|
|
public private(set) var isLoaded: Bool = false
|
|
private var currentModelPath: String?
|
|
private var currentConversation: Conversation?
|
|
private var systemPrompt: String = ""
|
|
|
|
// Objective-C++ bridge instance
|
|
private var bridge: LlmEngineBridge?
|
|
|
|
private let maxTokens = 16384
|
|
|
|
private init() {}
|
|
|
|
// MARK: - Model Loading
|
|
|
|
public func loadModel(path: String) async throws {
|
|
unload()
|
|
|
|
guard FileManager.default.fileExists(atPath: path) else {
|
|
throw LlmEngineError.modelNotFound(path: path)
|
|
}
|
|
|
|
// Initialize the Objective-C++ bridge
|
|
var error: NSError?
|
|
let newBridge = LlmEngineBridge(
|
|
modelPath: path,
|
|
accelerator: .cpu, // Can use .metal for GPU acceleration
|
|
error: &error
|
|
)
|
|
|
|
if let error = error {
|
|
throw LlmEngineError.engineInitializationFailed(underlying: error)
|
|
}
|
|
|
|
guard let bridge = newBridge else {
|
|
throw LlmEngineError.engineInitializationFailed(
|
|
underlying: NSError(domain: "LlmEngine", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to create bridge"])
|
|
)
|
|
}
|
|
|
|
self.bridge = bridge
|
|
self.isLoaded = true
|
|
self.currentModelPath = path
|
|
|
|
print("[LiteRtLlmEngine] Model loaded: \(path)")
|
|
}
|
|
|
|
// MARK: - Conversation Management
|
|
|
|
public func createConversation(systemPrompt: String) throws -> Conversation {
|
|
guard isLoaded else {
|
|
throw LlmEngineError.modelNotLoaded
|
|
}
|
|
|
|
self.systemPrompt = systemPrompt
|
|
|
|
let conversation = Conversation()
|
|
conversation.messageHistory.append(("system", systemPrompt))
|
|
|
|
// Clear any existing history in the bridge
|
|
bridge?.clearHistory()
|
|
|
|
// Add system prompt to bridge
|
|
bridge?.add(toHistory: systemPrompt, role: "system")
|
|
|
|
self.currentConversation = conversation
|
|
return conversation
|
|
}
|
|
|
|
// MARK: - Generation
|
|
|
|
public func generate(
|
|
conversation: Conversation,
|
|
prompt: String,
|
|
audioData: Data? = nil,
|
|
images: [UIImage]? = nil
|
|
) async throws -> String {
|
|
guard conversation.isAlive else {
|
|
throw LlmEngineError.conversationClosed
|
|
}
|
|
|
|
guard isLoaded, let bridge = bridge else {
|
|
throw LlmEngineError.modelNotLoaded
|
|
}
|
|
|
|
// TODO: Handle multimodal inputs (images, audio)
|
|
// For now, focus on text-only generation
|
|
if audioData != nil || !(images?.isEmpty ?? true) {
|
|
// Multimodal not yet implemented in bridge
|
|
throw LlmEngineError.notImplemented
|
|
}
|
|
|
|
// Add user message to history
|
|
conversation.messageHistory.append(("user", prompt))
|
|
bridge.add(toHistory: prompt, role: "user")
|
|
|
|
// Generate response
|
|
var error: NSError?
|
|
let response = bridge.generateResponse(prompt, error: &error)
|
|
|
|
if let error = error {
|
|
throw LlmEngineError.generationFailed(underlying: error)
|
|
}
|
|
|
|
guard let text = response else {
|
|
throw LlmEngineError.generationFailed(
|
|
underlying: NSError(domain: "LlmEngine", code: -1, userInfo: [NSLocalizedDescriptionKey: "Empty response"])
|
|
)
|
|
}
|
|
|
|
// Add assistant response to history
|
|
conversation.messageHistory.append(("assistant", text))
|
|
bridge.add(toHistory: text, role: "assistant")
|
|
|
|
return text
|
|
}
|
|
|
|
public func generateStream(
|
|
conversation: Conversation,
|
|
prompt: String,
|
|
audioData: Data? = nil,
|
|
images: [UIImage]? = nil
|
|
) -> AsyncThrowingStream<String, Error> {
|
|
AsyncThrowingStream { continuation in
|
|
Task {
|
|
do {
|
|
guard conversation.isAlive else {
|
|
throw LlmEngineError.conversationClosed
|
|
}
|
|
|
|
guard self.isLoaded, let bridge = self.bridge else {
|
|
throw LlmEngineError.modelNotLoaded
|
|
}
|
|
|
|
// Handle multimodal (not implemented)
|
|
if audioData != nil || !(images?.isEmpty ?? true) {
|
|
throw LlmEngineError.notImplemented
|
|
}
|
|
|
|
// Add user message to history
|
|
conversation.messageHistory.append(("user", prompt))
|
|
bridge.add(toHistory: prompt, role: "user")
|
|
|
|
// Get streaming response from bridge
|
|
var error: NSError?
|
|
guard let stream = bridge.generateResponseStream(prompt, error: &error) else {
|
|
if let error = error {
|
|
throw LlmEngineError.generationFailed(underlying: error)
|
|
} else {
|
|
throw LlmEngineError.generationFailed(
|
|
underlying: NSError(domain: "LlmEngine", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to create stream"])
|
|
)
|
|
}
|
|
}
|
|
|
|
// Read chunks from stream
|
|
var fullResponse = ""
|
|
while stream.hasMore {
|
|
if let chunk = stream.nextChunk() {
|
|
continuation.yield(chunk)
|
|
fullResponse.append(chunk)
|
|
}
|
|
// Small delay to prevent blocking
|
|
try await Task.sleep(nanoseconds: 1_000_000) // 1ms
|
|
}
|
|
|
|
// Close stream
|
|
stream.close()
|
|
|
|
// Add complete response to history
|
|
conversation.messageHistory.append(("assistant", fullResponse))
|
|
bridge.add(toHistory: fullResponse, role: "assistant")
|
|
|
|
continuation.finish()
|
|
|
|
} catch {
|
|
continuation.finish(throwing: error)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// MARK: - Utility
|
|
|
|
public func unload() {
|
|
bridge?.close()
|
|
bridge = nil
|
|
isLoaded = false
|
|
currentModelPath = nil
|
|
currentConversation = nil
|
|
print("[LiteRtLlmEngine] Unloaded")
|
|
}
|
|
|
|
public func estimateTokens(text: String) -> Int {
|
|
return bridge?.estimateTokens(text) ?? (text.count / 4)
|
|
}
|
|
}
|