import Foundation import UIKit // MARK: - Errors public enum LlmEngineError: LocalizedError { case modelNotFound(path: String) case modelNotLoaded case conversationClosed case generationFailed(underlying: Error) case invalidMultimodalInput case engineInitializationFailed(underlying: Error) case notImplemented public var errorDescription: String? { switch self { case .modelNotFound(let path): return "Model file not found at: \(path)" case .modelNotLoaded: return "No model is currently loaded" case .conversationClosed: return "Conversation has been closed" case .generationFailed(let error): return "Generation failed: \(error.localizedDescription)" case .invalidMultimodalInput: return "Invalid multimodal input provided" case .engineInitializationFailed(let error): return "Failed to initialize engine: \(error.localizedDescription)" case .notImplemented: return "This feature requires LiteRT-LM C++ integration" } } } // MARK: - Conversation public final class Conversation: @unchecked Sendable { public var isAlive: Bool = true internal var messageHistory: [(role: String, content: String)] = [] internal init() {} public func close() { isAlive = false } deinit { close() } } // MARK: - LlmEngine Protocol public protocol LlmEngine: Actor { var isLoaded: Bool { get } func loadModel(path: String) async throws func createConversation(systemPrompt: String) throws -> Conversation func generate( conversation: Conversation, prompt: String, audioData: Data?, images: [UIImage]? ) async throws -> String func generateStream( conversation: Conversation, prompt: String, audioData: Data?, images: [UIImage]? ) -> AsyncThrowingStream func unload() } // MARK: - LiteRT-LM Engine Implementation /// LiteRT-LM based LLM Engine using Objective-C++ bridge /// /// Architecture: /// - Swift LlmEngine (this file) -> Obj-C++ LlmEngineBridge -> C++ LiteRT-LM /// /// This approach is necessary because: /// 1. LiteRT-LM Swift APIs are "coming soon" (as of 2025) /// 2. Google's own apps use C++ bridge pattern (verified in litert-samples) /// public actor LiteRtLlmEngine: LlmEngine { public static let shared = LiteRtLlmEngine() public private(set) var isLoaded: Bool = false private var currentModelPath: String? private var currentConversation: Conversation? private var systemPrompt: String = "" // Objective-C++ bridge instance private var bridge: LlmEngineBridge? private let maxTokens = 16384 private init() {} // MARK: - Model Loading public func loadModel(path: String) async throws { unload() guard FileManager.default.fileExists(atPath: path) else { throw LlmEngineError.modelNotFound(path: path) } // Initialize the Objective-C++ bridge var error: NSError? let newBridge = LlmEngineBridge( modelPath: path, accelerator: .cpu, // Can use .metal for GPU acceleration error: &error ) if let error = error { throw LlmEngineError.engineInitializationFailed(underlying: error) } guard let bridge = newBridge else { throw LlmEngineError.engineInitializationFailed( underlying: NSError(domain: "LlmEngine", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to create bridge"]) ) } self.bridge = bridge self.isLoaded = true self.currentModelPath = path print("[LiteRtLlmEngine] Model loaded: \(path)") } // MARK: - Conversation Management public func createConversation(systemPrompt: String) throws -> Conversation { guard isLoaded else { throw LlmEngineError.modelNotLoaded } self.systemPrompt = systemPrompt let conversation = Conversation() conversation.messageHistory.append(("system", systemPrompt)) // Clear any existing history in the bridge bridge?.clearHistory() // Add system prompt to bridge bridge?.add(toHistory: systemPrompt, role: "system") self.currentConversation = conversation return conversation } // MARK: - Generation public func generate( conversation: Conversation, prompt: String, audioData: Data? = nil, images: [UIImage]? = nil ) async throws -> String { guard conversation.isAlive else { throw LlmEngineError.conversationClosed } guard isLoaded, let bridge = bridge else { throw LlmEngineError.modelNotLoaded } // TODO: Handle multimodal inputs (images, audio) // For now, focus on text-only generation if audioData != nil || !(images?.isEmpty ?? true) { // Multimodal not yet implemented in bridge throw LlmEngineError.notImplemented } // Add user message to history conversation.messageHistory.append(("user", prompt)) bridge.add(toHistory: prompt, role: "user") // Generate response var error: NSError? let response = bridge.generateResponse(prompt, error: &error) if let error = error { throw LlmEngineError.generationFailed(underlying: error) } guard let text = response else { throw LlmEngineError.generationFailed( underlying: NSError(domain: "LlmEngine", code: -1, userInfo: [NSLocalizedDescriptionKey: "Empty response"]) ) } // Add assistant response to history conversation.messageHistory.append(("assistant", text)) bridge.add(toHistory: text, role: "assistant") return text } public func generateStream( conversation: Conversation, prompt: String, audioData: Data? = nil, images: [UIImage]? = nil ) -> AsyncThrowingStream { AsyncThrowingStream { continuation in Task { do { guard conversation.isAlive else { throw LlmEngineError.conversationClosed } guard self.isLoaded, let bridge = self.bridge else { throw LlmEngineError.modelNotLoaded } // Handle multimodal (not implemented) if audioData != nil || !(images?.isEmpty ?? true) { throw LlmEngineError.notImplemented } // Add user message to history conversation.messageHistory.append(("user", prompt)) bridge.add(toHistory: prompt, role: "user") // Get streaming response from bridge var error: NSError? guard let stream = bridge.generateResponseStream(prompt, error: &error) else { if let error = error { throw LlmEngineError.generationFailed(underlying: error) } else { throw LlmEngineError.generationFailed( underlying: NSError(domain: "LlmEngine", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to create stream"]) ) } } // Read chunks from stream var fullResponse = "" while stream.hasMore { if let chunk = stream.nextChunk() { continuation.yield(chunk) fullResponse.append(chunk) } // Small delay to prevent blocking try await Task.sleep(nanoseconds: 1_000_000) // 1ms } // Close stream stream.close() // Add complete response to history conversation.messageHistory.append(("assistant", fullResponse)) bridge.add(toHistory: fullResponse, role: "assistant") continuation.finish() } catch { continuation.finish(throwing: error) } } } } // MARK: - Utility public func unload() { bridge?.close() bridge = nil isLoaded = false currentModelPath = nil currentConversation = nil print("[LiteRtLlmEngine] Unloaded") } public func estimateTokens(text: String) -> Int { return bridge?.estimateTokens(text) ?? (text.count / 4) } }