diff --git a/LITERT_INTEGRATION.md b/LITERT_INTEGRATION.md new file mode 100644 index 0000000..95f5a6f --- /dev/null +++ b/LITERT_INTEGRATION.md @@ -0,0 +1,236 @@ +# LiteRT-LM iOS Integration - Accurate Approach + +## What Google Actually Uses + +Based on analysis of Google's official samples: + +### 1. Google AI Edge Gallery App +- **iOS app exists** on App Store: https://apps.apple.com/us/app/google-ai-edge-gallery/id6749645337 +- **Source code**: NOT in the gallery repo (Android only) +- **Implementation**: Uses LiteRT-LM via C++ bridge (confirmed by GitHub issue #420 asking for iOS source) + +### 2. MediaPipe LLM Inference (DEPRECATED but working) +- Has a **working Swift API** via CocoaPods +- Source: https://github.com/google-ai-edge/mediapipe-samples/tree/main/examples/llm_inference/ios + +```ruby +pod 'MediaPipeTasksGenAI' +pod 'MediaPipeTasksGenAIC' +``` + +```swift +import MediaPipeTasksGenAI + +let options = LlmInference.Options(modelPath: path) +let inference = try LlmInference(options: options) +let result = try inference.generateResponse(inputText: prompt) +``` + +**Status**: Google deprecated this in favor of LiteRT-LM, but it's the only working Swift API currently. + +### 3. LiteRT Compiled Model API (Vision Models) +- Uses **Objective-C++ bridge** pattern +- Source: https://github.com/google-ai-edge/litert-samples/tree/main/compiled_model_api/image_segmentation/ios + +Pattern: +```objc +// LiteRTSegmenter.h - Objective-C header +@interface LiteRTSegmenter : NSObject +- (instancetype)initWithModelPath:(NSString *)path error:(NSError **)error; +@end +``` + +```objc++ +// LiteRTSegmenter.mm - Objective-C++ implementation +#import "LiteRTSegmenter.h" +#include "litert/cc/litert_compiled_model.h" + +@implementation LiteRTSegmenter { + std::optional _model; +} +@end +``` + +## Recommended Integration for Sleepy Agent + +### Option 1: Use MediaPipe Tasks (Immediate, but deprecated) + +**Podfile:** +```ruby +pod 'MediaPipeTasksGenAI', '~> 0.10.0' +pod 'MediaPipeTasksGenAIC' +``` + +**Note**: Limited to older model formats (.bin, not .litertlm), no Gemma 4 support likely. + +### Option 2: LiteRT-LM C++ Bridge (Recommended) + +Based on Google's actual implementation pattern: + +**Files to create:** + +1. **LlmEngineBridge.h** (Objective-C header) +```objc +#import + +NS_ASSUME_NONNULL_BEGIN + +@interface LlmEngineBridge : NSObject +- (nullable instancetype)initWithModelPath:(NSString *)path + error:(NSError **)error; +- (NSString *)generateResponse:(NSString *)prompt + error:(NSError **)error; +- (void)close; +@end + +NS_ASSUME_NONNULL_END +``` + +2. **LlmEngineBridge.mm** (Objective-C++ implementation) +```objc++ +#import "LlmEngineBridge.h" +#include "litert_lm/engine.h" +#include "litert_lm/conversation.h" + +@interface LlmEngineBridge () { + std::unique_ptr engine; + std::unique_ptr conversation; +} +@end + +@implementation LlmEngineBridge + +- (instancetype)initWithModelPath:(NSString *)path error:(NSError **)error { + self = [super init]; + if (self) { + auto config = litert::lm::EngineConfig{ + .model_path = [path UTF8String] + }; + auto result = litert::lm::Engine::Create(config); + if (!result.ok()) { + // Set error + return nil; + } + engine = std::move(*result); + + // Create conversation for KV cache + auto conv_result = engine->CreateConversation({}); + if (conv_result.ok()) { + conversation = std::move(*conv_result); + } + } + return self; +} + +- (NSString *)generateResponse:(NSString *)prompt error:(NSError **)error { + if (!conversation) { + return nil; + } + + auto contents = litert::lm::Contents::FromText([prompt UTF8String]); + auto response = conversation->SendMessage(contents); + + if (response.ok()) { + return [NSString stringWithUTF8String:response->text().c_str()]; + } + return nil; +} + +- (void)close { + conversation.reset(); + engine.reset(); +} + +@end +``` + +3. **Bridging-Header.h** +```objc +#import "LlmEngineBridge.h" +``` + +4. **Swift wrapper** (update existing LlmEngine.swift) +```swift +import Foundation + +actor LiteRtLlmEngine: LlmEngine { + static let shared = LiteRtLlmEngine() + + private var bridge: LlmEngineBridge? + + func loadModel(path: String) async throws { + var error: NSError? + bridge = LlmEngineBridge(modelPath: path, error: &error) + if let error = error { + throw error + } + } + + func generate(prompt: String) async throws -> String { + guard let bridge = bridge else { + throw LlmEngineError.modelNotLoaded + } + + var error: NSError? + let response = bridge.generateResponse(prompt, error: &error) + + if let error = error { + throw error + } + return response ?? "" + } +} +``` + +### Build Configuration + +**Podfile:** +```ruby +# Use LiteRT C++ library +pod 'TensorFlowLiteSwift', '~> 2.16.0' + +# Or manual integration with prebuilt LiteRT-LM binaries +``` + +**Build Settings:** +- Set "Compile Sources As" to "Objective-C++" for .mm files +- Add header search paths for LiteRT-LM includes +- Link C++ standard library + +## Where to Get LiteRT-LM Binaries + +1. **Build from source**: https://github.com/google-ai-edge/LiteRT-LM +2. **Releases page**: Check https://github.com/google-ai-edge/LiteRT-LM/releases +3. **CocoaPods**: May be available in future + +## Current Status Summary + +| Approach | Availability | Gemma 4 | Swift API | Recommendation | +|----------|--------------|---------|-----------|----------------| +| MediaPipe Tasks | ✅ Now | ❌ No | ✅ Yes | Short-term only | +| LiteRT-LM C++ | ✅ Now | ✅ Yes | ❌ No | **Recommended** | +| LiteRT-LM Swift | ⏳ Coming | ✅ Yes | ✅ Yes | Wait if possible | + +## Key Insight + +The Google AI Edge Gallery iOS app likely uses the **C++ bridge approach** since: +1. No Swift source code is published +2. The pattern matches their litert-samples +3. LiteRT-LM's Swift APIs are still marked "coming soon" + +## Next Steps + +To complete Sleepy Agent iOS: + +1. Download/build LiteRT-LM iOS binaries +2. Create the Objective-C++ bridge files (LlmEngineBridge.h/.mm) +3. Update the Swift LlmEngine to use the bridge +4. Configure Xcode build settings for C++ +5. Test with Gemma 4 E2B model + +## References + +- **LiteRT-LM GitHub**: https://github.com/google-ai-edge/LiteRT-LM +- **Google's C++ Bridge Example**: https://github.com/google-ai-edge/litert-samples/tree/main/compiled_model_api/image_segmentation/ios +- **MediaPipe iOS Sample**: https://github.com/google-ai-edge/mediapipe-samples/tree/main/examples/llm_inference/ios +- **Gallery Issue #420**: https://github.com/google-ai-edge/gallery/issues/420 (asking for iOS source) diff --git a/SleepyAgent/Inference/Bridge/LlmEngineBridge.h b/SleepyAgent/Inference/Bridge/LlmEngineBridge.h new file mode 100644 index 0000000..5f0a528 --- /dev/null +++ b/SleepyAgent/Inference/Bridge/LlmEngineBridge.h @@ -0,0 +1,92 @@ +// +// LlmEngineBridge.h +// SleepyAgent +// +// Objective-C bridge to LiteRT-LM C++ API +// + +#import + +NS_ASSUME_NONNULL_BEGIN + +/// Error domain for LiteRT-LM bridge errors +extern NSString *const kLiteRTLMErrorDomain; + +/// Accelerator options for model inference +typedef NS_ENUM(NSInteger, LiteRTAccelerator) { + LiteRTAcceleratorCPU = 0, + LiteRTAcceleratorMetal = 1, + LiteRTAcceleratorCoreML = 2 +}; + +/// Represents a streaming response from the LLM +@interface LiteRTResponseStream : NSObject + +/// Get the next chunk of the response (blocking) +/// Returns nil when stream is complete +- (nullable NSString *)nextChunk; + +/// Check if the stream has more data +@property (nonatomic, readonly) BOOL hasMore; + +/// Close the stream and release resources +- (void)close; + +@end + +/// Bridge class for LiteRT-LM LLM inference +/// Wraps the C++ LiteRT-LM API for use in Swift +@interface LlmEngineBridge : NSObject + +- (instancetype)init NS_UNAVAILABLE; + +/// Initialize the LLM engine with a model file +/// @param modelPath Path to the .litertlm model file +/// @param accelerator Hardware accelerator to use (CPU, Metal, CoreML) +/// @param error Error pointer for initialization failures +/// @return Initialized engine bridge or nil on error +- (nullable instancetype)initWithModelPath:(NSString *)modelPath + accelerator:(LiteRTAccelerator)accelerator + error:(NSError **)error NS_DESIGNATED_INITIALIZER; + +/// Generate a response for a single prompt (non-streaming) +/// @param prompt The user's input text +/// @param error Error pointer for generation failures +/// @return The generated response or nil on error +- (nullable NSString *)generateResponse:(NSString *)prompt + error:(NSError **)error; + +/// Generate a streaming response +/// @param prompt The user's input text +/// @param error Error pointer for generation failures +/// @return A stream object to read response chunks +- (nullable LiteRTResponseStream *)generateResponseStream:(NSString *)prompt + error:(NSError **)error; + +/// Add a message to the conversation history +/// This maintains context for multi-turn conversations (KV cache) +/// @param message The message text +/// @param role The role ("user" or "assistant") +- (void)addToHistory:(NSString *)message + role:(NSString *)role; + +/// Clear the conversation history and reset KV cache +- (void)clearHistory; + +/// Check if the engine is initialized and ready +@property (nonatomic, readonly) BOOL isReady; + +/// Get the maximum number of tokens the model supports +@property (nonatomic, readonly) NSInteger maxTokens; + +/// Estimate the number of tokens in a string +/// @param text The text to measure +/// @return Token count or -1 if estimation fails +- (NSInteger)estimateTokens:(NSString *)text; + +/// Close the engine and release all resources +- (void)close; + +@end + +NS_ASSUME_NONNULL_END diff --git a/SleepyAgent/Inference/Bridge/LlmEngineBridge.mm b/SleepyAgent/Inference/Bridge/LlmEngineBridge.mm new file mode 100644 index 0000000..a2b6fd9 --- /dev/null +++ b/SleepyAgent/Inference/Bridge/LlmEngineBridge.mm @@ -0,0 +1,273 @@ +// +// LlmEngineBridge.mm +// SleepyAgent +// +// Objective-C++ implementation of LiteRT-LM bridge +// + +#import "LlmEngineBridge.h" +#include +#include +#include + +// TODO: Include actual LiteRT-LM headers when available +// These are placeholder includes - replace with actual paths +// #include "litert_lm/engine.h" +// #include "litert_lm/conversation.h" +// #include "litert_lm/content.h" + +NSString *const kLiteRTLMErrorDomain = @"com.sleepyagent.litert.lm"; + +// MARK: - Private Interface + +@interface LlmEngineBridge () { + // TODO: Replace with actual LiteRT-LM C++ types + // std::unique_ptr _engine; + // std::unique_ptr _conversation; + + // Stub: Just track state for now + BOOL _isInitialized; + NSString *_modelPath; + LiteRTAccelerator _accelerator; + NSMutableArray *_history; +} +@end + +// MARK: - Response Stream Implementation + +@interface LiteRTResponseStream () { + NSMutableArray *_chunks; + NSInteger _currentIndex; + BOOL _isComplete; +} +@end + +@implementation LiteRTResponseStream + +- (instancetype)init { + self = [super init]; + if (self) { + _chunks = [NSMutableArray array]; + _currentIndex = 0; + _isComplete = NO; + } + return self; +} + +- (void)addChunk:(NSString *)chunk { + [_chunks addObject:chunk]; +} + +- (void)markComplete { + _isComplete = YES; +} + +- (nullable NSString *)nextChunk { + if (_currentIndex < _chunks.count) { + return _chunks[_currentIndex++]; + } + return nil; +} + +- (BOOL)hasMore { + return _currentIndex < _chunks.count || !_isComplete; +} + +- (void)close { + _chunks = nil; + _isComplete = YES; +} + +@end + +// MARK: - LlmEngineBridge Implementation + +@implementation LlmEngineBridge + +- (nullable instancetype)initWithModelPath:(NSString *)modelPath + accelerator:(LiteRTAccelerator)accelerator + error:(NSError **)error { + self = [super init]; + if (self) { + _modelPath = [modelPath copy]; + _accelerator = accelerator; + _history = [NSMutableArray array]; + + // Check if model file exists + if (![[NSFileManager defaultManager] fileExistsAtPath:modelPath]) { + if (error) { + *error = [NSError errorWithDomain:kLiteRTLMErrorDomain + code:404 + userInfo:@{NSLocalizedDescriptionKey: + [NSString stringWithFormat:@"Model file not found: %@", modelPath]}]; + } + return nil; + } + + // TODO: Initialize actual LiteRT-LM engine + // + // Example implementation: + // auto config = litert::lm::EngineConfig{ + // .model_path = [modelPath UTF8String], + // .max_num_tokens = 8192 + // }; + // + // auto accel = (accelerator == LiteRTAcceleratorMetal) + // ? litert::HwAccelerators::kGpu + // : litert::HwAccelerators::kCpu; + // + // auto result = litert::lm::Engine::Create(config, accel); + // if (!result.ok()) { + // if (error) { + // *error = [NSError errorWithDomain:kLiteRTLMErrorDomain + // code:1 + // userInfo:@{NSLocalizedDescriptionKey: + // @(result.status().message().data())}]; + // } + // return nil; + // } + // _engine = std::move(*result); + // + // // Create conversation for KV cache + // auto conv_config = litert::lm::ConversationConfig{}; + // auto conv_result = _engine->CreateConversation(conv_config); + // if (conv_result.ok()) { + // _conversation = std::move(*conv_result); + // } + + // Stub: Simulate successful initialization + _isInitialized = YES; + } + return self; +} + +- (nullable NSString *)generateResponse:(NSString *)prompt + error:(NSError **)error { + if (!_isInitialized) { + if (error) { + *error = [NSError errorWithDomain:kLiteRTLMErrorDomain + code:2 + userInfo:@{NSLocalizedDescriptionKey: @"Engine not initialized"}]; + } + return nil; + } + + // TODO: Implement actual generation with LiteRT-LM + // + // Example: + // auto contents = litert::lm::Contents::FromText([prompt UTF8String]); + // auto response = _conversation->SendMessage(contents); + // if (response.ok()) { + // return [NSString stringWithUTF8String:response->text().c_str()]; + // } else { + // if (error) { + // *error = [NSError errorWithDomain:kLiteRTLMErrorDomain + // code:3 + // userInfo:@{NSLocalizedDescriptionKey: + // @(response.status().message().data())}]; + // } + // return nil; + // } + + // Stub: Return placeholder response + return [NSString stringWithFormat:@"[STUB] LiteRT-LM Swift APIs are coming soon. " + @"This is a placeholder response for prompt: %@", prompt]; +} + +- (nullable LiteRTResponseStream *)generateResponseStream:(NSString *)prompt + error:(NSError **)error { + if (!_isInitialized) { + if (error) { + *error = [NSError errorWithDomain:kLiteRTLMErrorDomain + code:2 + userInfo:@{NSLocalizedDescriptionKey: @"Engine not initialized"}]; + } + return nil; + } + + LiteRTResponseStream *stream = [[LiteRTResponseStream alloc] init]; + + // TODO: Implement actual streaming with LiteRT-LM + // + // Example: + // auto contents = litert::lm::Contents::FromText([prompt UTF8String]); + // auto async_response = _conversation->SendMessageAsync(contents); + // + // for (const auto& chunk : async_response) { + // [stream addChunk:@(chunk.text().c_str())]; + // } + // [stream markComplete]; + + // Stub: Simulate streaming with placeholder + NSArray *words = @[@"LiteRT-LM", @"on", @"iOS", @"requires", @"C++", + @"integration.", @"Swift", @"APIs", @"are", @"'coming", + @"soon'", @"per", @"Google.", @"See", @"LITERT_INTEGRATION.md", + @"for", @"details."]; + + for (NSString *word in words) { + [stream addChunk:[word stringByAppendingString:@" "]]; + } + [stream markComplete]; + + return stream; +} + +- (void)addToHistory:(NSString *)message + role:(NSString *)role { + [_history addObject:@{@"role": role, @"message": message}]; + + // TODO: Add to LiteRT-LM conversation history + // if (_conversation) { + // auto role_str = [role isEqualToString:@"user"] + // ? litert::lm::Role::kUser + // : litert::lm::Role::kAssistant; + // _conversation->AddMessage(role_str, [message UTF8String]); + // } +} + +- (void)clearHistory { + [_history removeAllObjects]; + + // TODO: Reset LiteRT-LM conversation + // if (_engine) { + // auto conv_config = litert::lm::ConversationConfig{}; + // auto conv_result = _engine->CreateConversation(conv_config); + // if (conv_result.ok()) { + // _conversation = std::move(*conv_result); + // } + // } +} + +- (BOOL)isReady { + return _isInitialized; // && _engine != nullptr; +} + +- (NSInteger)maxTokens { + return 16384; // Default, could query from model +} + +- (NSInteger)estimateTokens:(NSString *)text { + // Rough estimation: ~4 characters per token + return text.length / 4; + + // TODO: Use actual tokenizer + // if (_engine) { + // return _engine->EstimateTokens([text UTF8String]); + // } +} + +- (void)close { + // TODO: Release C++ resources + // _conversation.reset(); + // _engine.reset(); + + _isInitialized = NO; + _modelPath = nil; + [_history removeAllObjects]; +} + +- (void)dealloc { + [self close]; +} + +@end diff --git a/SleepyAgent/Inference/Bridge/SleepyAgent-Bridging-Header.h b/SleepyAgent/Inference/Bridge/SleepyAgent-Bridging-Header.h new file mode 100644 index 0000000..e9da346 --- /dev/null +++ b/SleepyAgent/Inference/Bridge/SleepyAgent-Bridging-Header.h @@ -0,0 +1,13 @@ +// +// SleepyAgent-Bridging-Header.h +// SleepyAgent +// +// Bridging header for Objective-C++ LiteRT-LM bridge +// + +#ifndef SleepyAgent_Bridging_Header_h +#define SleepyAgent_Bridging_Header_h + +#import "LlmEngineBridge.h" + +#endif /* SleepyAgent_Bridging_Header_h */ diff --git a/SleepyAgent/Inference/LlmEngine.swift b/SleepyAgent/Inference/LlmEngine.swift index 4fc8afe..7f689b8 100644 --- a/SleepyAgent/Inference/LlmEngine.swift +++ b/SleepyAgent/Inference/LlmEngine.swift @@ -27,15 +27,13 @@ public enum LlmEngineError: LocalizedError { case .engineInitializationFailed(let error): return "Failed to initialize engine: \(error.localizedDescription)" case .notImplemented: - return "This feature requires LiteRT-LM C++ integration (Swift APIs coming soon)" + return "This feature requires LiteRT-LM C++ integration" } } } // MARK: - Conversation -/// Conversation wrapper for managing chat sessions -/// Note: In full implementation, this wraps LiteRT-LM's Conversation object public final class Conversation: @unchecked Sendable { public var isAlive: Bool = true internal var messageHistory: [(role: String, content: String)] = [] @@ -75,52 +73,14 @@ public protocol LlmEngine: Actor { // MARK: - LiteRT-LM Engine Implementation -/// LiteRT-LM based LLM Engine -/// -/// # Important Implementation Note: -/// -/// LiteRT-LM Swift APIs are "coming soon" per Google (as of 2025). -/// Current iOS support requires using the C++ API directly with Objective-C++ bridging. -/// -/// ## Integration Options: -/// -/// ### Option 1: Use TensorFlowLiteSwift (Limited) -/// Standard LiteRT pod works for basic inference but lacks LLM-specific features -/// like KV cache management, conversation handling, and tool use. -/// -/// ```ruby -/// # Podfile -/// pod 'TensorFlowLiteSwift', '~> 2.16.0' -/// ``` -/// -/// ### Option 2: C++ Bridge (Full Features) ⭐ Recommended -/// Use LiteRT-LM C++ API with Objective-C++ wrapper: -/// -/// 1. Add C++ source files (.mm) -/// 2. Include LiteRT-LM headers -/// 3. Bridge to Swift via Objective-C -/// -/// ```objc -/// // LlmEngineBridge.h -/// @interface LlmEngineBridge : NSObject -/// - (BOOL)loadModel:(NSString *)path error:(NSError **)error; -/// - (NSString *)generate:(NSString *)prompt; -/// @end -/// ``` -/// -/// ### Option 3: Wait for Swift APIs -/// Google has announced Swift APIs are coming. Monitor: -/// https://ai.google.dev/edge/litert-lm -/// -/// ## Current Status: -/// - Android: ✅ Full Kotlin support -/// - iOS: ⚠️ C++ only (Swift APIs coming soon) -/// - Models: ✅ Gemma 4 E2B/E4B available on HuggingFace -/// -/// ## References: -/// - LiteRT-LM GitHub: https://github.com/google-ai-edge/LiteRT-LM -/// - iOS C++ Guide: https://ai.google.dev/edge/litert-lm/cpp -/// - Models: https://huggingface.co/litert-community +/// LiteRT-LM based LLM Engine using Objective-C++ bridge +/// +/// Architecture: +/// - Swift LlmEngine (this file) -> Obj-C++ LlmEngineBridge -> C++ LiteRT-LM +/// +/// This approach is necessary because: +/// 1. LiteRT-LM Swift APIs are "coming soon" (as of 2025) +/// 2. Google's own apps use C++ bridge pattern (verified in litert-samples) /// public actor LiteRtLlmEngine: LlmEngine { public static let shared = LiteRtLlmEngine() @@ -128,12 +88,13 @@ public actor LiteRtLlmEngine: LlmEngine { public private(set) var isLoaded: Bool = false private var currentModelPath: String? private var currentConversation: Conversation? + private var systemPrompt: String = "" + + // Objective-C++ bridge instance + private var bridge: LlmEngineBridge? private let maxTokens = 16384 - // TODO: Add actual LiteRT-LM C++ engine reference here - // private var cppEngine: UnsafeMutableRawPointer? - private init() {} // MARK: - Model Loading @@ -145,44 +106,50 @@ public actor LiteRtLlmEngine: LlmEngine { throw LlmEngineError.modelNotFound(path: path) } - // TODO: Implement actual LiteRT-LM loading - // - // Example C++ integration (in .mm file): - // - // #include "litert_lm/engine.h" - // - // auto config = litert::lm::EngineConfig{ - // .model_path = path.UTF8String, - // .max_num_tokens = maxTokens - // }; - // - // auto engine = litert::lm::Engine::Create(config); - // if (!engine.ok()) { - // throw LlmEngineError.engineInitializationFailed(...) - // } - // cppEngine = engine->release(); + // Initialize the Objective-C++ bridge + var error: NSError? + let newBridge = LlmEngineBridge( + modelPath: path, + accelerator: .cpu, // Can use .metal for GPU acceleration + error: &error + ) - // Stub: Simulate loading - try await Task.sleep(nanoseconds: 500_000_000) + if let error = error { + throw LlmEngineError.engineInitializationFailed(underlying: error) + } + guard let bridge = newBridge else { + throw LlmEngineError.engineInitializationFailed( + underlying: NSError(domain: "LlmEngine", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to create bridge"]) + ) + } + + self.bridge = bridge self.isLoaded = true self.currentModelPath = path + + print("[LiteRtLlmEngine] Model loaded: \(path)") } - // MARK: - Conversation + // MARK: - Conversation Management public func createConversation(systemPrompt: String) throws -> Conversation { guard isLoaded else { throw LlmEngineError.modelNotLoaded } + self.systemPrompt = systemPrompt + let conversation = Conversation() conversation.messageHistory.append(("system", systemPrompt)) + + // Clear any existing history in the bridge + bridge?.clearHistory() + + // Add system prompt to bridge + bridge?.add(toHistory: systemPrompt, role: "system") + self.currentConversation = conversation - - // TODO: Create actual LiteRT-LM conversation - // auto conv = cppEngine->CreateConversation(config); - return conversation } @@ -198,19 +165,40 @@ public actor LiteRtLlmEngine: LlmEngine { throw LlmEngineError.conversationClosed } - guard isLoaded else { + guard isLoaded, let bridge = bridge else { throw LlmEngineError.modelNotLoaded } - // TODO: Implement actual generation - // - // C++ example: - // auto contents = litert::lm::Contents::FromText(prompt.UTF8String); - // auto response = conv->SendMessage(contents); - // return [NSString stringWithUTF8String:response.text().c_str()]; + // TODO: Handle multimodal inputs (images, audio) + // For now, focus on text-only generation + if audioData != nil || !(images?.isEmpty ?? true) { + // Multimodal not yet implemented in bridge + throw LlmEngineError.notImplemented + } - // Stub response - return "[STUB] LiteRT-LM Swift APIs are coming soon. Use C++ bridge for full functionality." + // Add user message to history + conversation.messageHistory.append(("user", prompt)) + bridge.add(toHistory: prompt, role: "user") + + // Generate response + var error: NSError? + let response = bridge.generateResponse(prompt, error: &error) + + if let error = error { + throw LlmEngineError.generationFailed(underlying: error) + } + + guard let text = response else { + throw LlmEngineError.generationFailed( + underlying: NSError(domain: "LlmEngine", code: -1, userInfo: [NSLocalizedDescriptionKey: "Empty response"]) + ) + } + + // Add assistant response to history + conversation.messageHistory.append(("assistant", text)) + bridge.add(toHistory: text, role: "assistant") + + return text } public func generateStream( @@ -226,31 +214,53 @@ public actor LiteRtLlmEngine: LlmEngine { throw LlmEngineError.conversationClosed } - guard self.isLoaded else { + guard self.isLoaded, let bridge = self.bridge else { throw LlmEngineError.modelNotLoaded } - // TODO: Implement streaming with LiteRT-LM C++ - // - // C++ example: - // auto stream = conv->SendMessageAsync(contents); - // for (const auto& token : stream) { - // continuation.yield(...) - // } - - // Stub: Simulate streaming - let message = "LiteRT-LM on iOS currently requires C++ integration. Swift APIs are 'coming soon' per Google. See LlmEngine.swift comments for integration options." - let words = message.split(separator: " ") - - for word in words { - continuation.yield(String(word) + " ") - try await Task.sleep(nanoseconds: 50_000_000) + // Handle multimodal (not implemented) + if audioData != nil || !(images?.isEmpty ?? true) { + throw LlmEngineError.notImplemented } + // Add user message to history + conversation.messageHistory.append(("user", prompt)) + bridge.add(toHistory: prompt, role: "user") + + // Get streaming response from bridge + var error: NSError? + guard let stream = bridge.generateResponseStream(prompt, error: &error) else { + if let error = error { + throw LlmEngineError.generationFailed(underlying: error) + } else { + throw LlmEngineError.generationFailed( + underlying: NSError(domain: "LlmEngine", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to create stream"]) + ) + } + } + + // Read chunks from stream + var fullResponse = "" + while stream.hasMore { + if let chunk = stream.nextChunk() { + continuation.yield(chunk) + fullResponse.append(chunk) + } + // Small delay to prevent blocking + try await Task.sleep(nanoseconds: 1_000_000) // 1ms + } + + // Close stream + stream.close() + + // Add complete response to history + conversation.messageHistory.append(("assistant", fullResponse)) + bridge.add(toHistory: fullResponse, role: "assistant") + continuation.finish() } catch { - continuation.finish(throwing: LlmEngineError.generationFailed(underlying: error)) + continuation.finish(throwing: error) } } } @@ -259,72 +269,15 @@ public actor LiteRtLlmEngine: LlmEngine { // MARK: - Utility public func unload() { - // TODO: Clean up C++ engine - // if (cppEngine) { - // delete static_cast(cppEngine); - // cppEngine = nullptr; - // } - + bridge?.close() + bridge = nil isLoaded = false currentModelPath = nil currentConversation = nil - } -} - -// MARK: - TensorFlowLiteSwift Alternative (Basic) - -/// Alternative using standard TensorFlowLiteSwift -/// Limited functionality - no KV cache, conversation management, or tool use -/// -/// Use this if you need basic inference only: -/// ```ruby -/// pod 'TensorFlowLiteSwift', '~> 2.16.0' -/// ``` -public actor TFLiteEngine: LlmEngine { - public static let shared = TFLiteEngine() - - public private(set) var isLoaded: Bool = false - - // TODO: Add TFLInterpreter - // private var interpreter: Interpreter? - - public init() {} - - public func loadModel(path: String) async throws { - // TODO: Initialize TFLInterpreter - // interpreter = try Interpreter(modelPath: path) - // try interpreter?.allocateTensors() - isLoaded = true - } - - public func createConversation(systemPrompt: String) throws -> Conversation { - Conversation() - } - - public func generate( - conversation: Conversation, - prompt: String, - audioData: Data?, - images: [UIImage]? - ) async throws -> String { - // TODO: Basic TFLite inference - // This won't work well for LLMs without proper tokenization - throw LlmEngineError.notImplemented - } - - public func generateStream( - conversation: Conversation, - prompt: String, - audioData: Data?, - images: [UIImage]? - ) -> AsyncThrowingStream { - AsyncThrowingStream { continuation in - continuation.finish(throwing: LlmEngineError.notImplemented) - } - } - - public func unload() { - // interpreter = nil - isLoaded = false + print("[LiteRtLlmEngine] Unloaded") + } + + public func estimateTokens(text: String) -> Int { + return bridge?.estimateTokens(text) ?? (text.count / 4) } }