Update LiteRT-LM implementation with accurate iOS status
- Document that Swift APIs are 'coming soon' per Google - Add C++ bridge integration guide - Create stub implementation that explains current limitations - Add LITERT_IOS_STATUS.md with detailed integration options - Fix MainViewModel to match MainView property expectations
This commit is contained in:
+141
@@ -0,0 +1,141 @@
|
|||||||
|
# Xcode
|
||||||
|
#
|
||||||
|
# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
|
||||||
|
|
||||||
|
## User settings
|
||||||
|
xcuserdata/
|
||||||
|
|
||||||
|
## compatibility with Xcode 8 and earlier (ignoring not required starting Xcode 9)
|
||||||
|
*.xcscmblueprint
|
||||||
|
*.xccheckout
|
||||||
|
|
||||||
|
## compatibility with Xcode 3 and earlier (ignoring not required starting Xcode 4)
|
||||||
|
build/
|
||||||
|
DerivedData/
|
||||||
|
*.moved-aside
|
||||||
|
*.pbxuser
|
||||||
|
!default.pbxuser
|
||||||
|
*.mode1v3
|
||||||
|
!default.mode1v3
|
||||||
|
*.mode2v3
|
||||||
|
!default.mode2v3
|
||||||
|
*.perspectivev3
|
||||||
|
!default.perspectivev3
|
||||||
|
|
||||||
|
## Obj-C/Swift specific
|
||||||
|
*.hmap
|
||||||
|
|
||||||
|
## App packaging
|
||||||
|
*.ipa
|
||||||
|
*.dSYM.zip
|
||||||
|
*.dSYM
|
||||||
|
|
||||||
|
## Playgrounds
|
||||||
|
timeline.xctimeline
|
||||||
|
playground.xcworkspace
|
||||||
|
|
||||||
|
# Swift Package Manager
|
||||||
|
#
|
||||||
|
# Add this line if you want to avoid checking in source code from Swift Package Manager dependencies.
|
||||||
|
# Packages/
|
||||||
|
# Package.pins
|
||||||
|
# Package.resolved
|
||||||
|
# *.xcodeproj
|
||||||
|
#
|
||||||
|
# Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata
|
||||||
|
# hence it is not needed unless you have added a package configuration file to your project
|
||||||
|
# .swiftpm
|
||||||
|
|
||||||
|
.build/
|
||||||
|
|
||||||
|
# CocoaPods
|
||||||
|
#
|
||||||
|
# We recommend against adding the Pods directory to your .gitignore. However
|
||||||
|
# you should judge for yourself, the pros and cons are mentioned at:
|
||||||
|
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
|
||||||
|
#
|
||||||
|
Pods/
|
||||||
|
#
|
||||||
|
# Add this line if you want to avoid checking in source code from the Xcode workspace
|
||||||
|
*.xcworkspace
|
||||||
|
|
||||||
|
# Carthage
|
||||||
|
#
|
||||||
|
# Add this line if you want to avoid checking in source code from Carthage dependencies.
|
||||||
|
# Carthage/Checkouts
|
||||||
|
|
||||||
|
Carthage/Build/
|
||||||
|
|
||||||
|
# Accio dependency management
|
||||||
|
Dependencies/
|
||||||
|
.accio/
|
||||||
|
|
||||||
|
# fastlane
|
||||||
|
#
|
||||||
|
# It is recommended to not store the screenshots in the git repo.
|
||||||
|
# Instead, use fastlane to re-generate the screenshots whenever they are needed.
|
||||||
|
# For more information about the recommended setup visit:
|
||||||
|
# https://docs.fastlane.tools/best-practices/source-control/#source-control
|
||||||
|
|
||||||
|
fastlane/report.xml
|
||||||
|
fastlane/Preview.html
|
||||||
|
fastlane/screenshots/**/*.png
|
||||||
|
fastlane/test_output
|
||||||
|
|
||||||
|
# Code Injection
|
||||||
|
#
|
||||||
|
# After new code Injection tools there's a generated folder /iOSInjectionProject
|
||||||
|
# https://github.com/johnno1962/injectionforxcode
|
||||||
|
|
||||||
|
iOSInjectionProject/
|
||||||
|
|
||||||
|
# macOS
|
||||||
|
.DS_Store
|
||||||
|
.AppleDouble
|
||||||
|
.LSOverride
|
||||||
|
|
||||||
|
# Thumbnails
|
||||||
|
._*
|
||||||
|
|
||||||
|
# Files that might appear in the root of a volume
|
||||||
|
.DocumentRevisions-V100
|
||||||
|
.fseventsd
|
||||||
|
.Spotlight-V100
|
||||||
|
.TemporaryItems
|
||||||
|
.Trashes
|
||||||
|
.VolumeIcon.icns
|
||||||
|
.com.apple.timemachine.donotpresent
|
||||||
|
|
||||||
|
# Directories potentially created on remote AFP share
|
||||||
|
.AppleDB
|
||||||
|
.AppleDesktop
|
||||||
|
Network Trash Folder
|
||||||
|
Temporary Items
|
||||||
|
.apdisk
|
||||||
|
|
||||||
|
# Project specific
|
||||||
|
build/
|
||||||
|
*.ipa
|
||||||
|
*.xcarchive
|
||||||
|
DerivedData/
|
||||||
|
|
||||||
|
# Model files (large binaries)
|
||||||
|
*.litertlm
|
||||||
|
*.bin
|
||||||
|
*.gguf
|
||||||
|
|
||||||
|
# Downloaded files
|
||||||
|
*.tmp
|
||||||
|
*.part
|
||||||
|
*.download
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*~
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
@@ -0,0 +1,172 @@
|
|||||||
|
# LiteRT-LM on iOS - Current Status
|
||||||
|
|
||||||
|
## ⚠️ Critical Information
|
||||||
|
|
||||||
|
**LiteRT-LM Swift APIs are "coming soon"** as of 2025 (per [Google AI Edge](https://ai.google.dev/edge/litert-lm)).
|
||||||
|
|
||||||
|
## Current iOS Support
|
||||||
|
|
||||||
|
| Feature | Android | iOS Current | iOS Future |
|
||||||
|
|---------|---------|-------------|------------|
|
||||||
|
| Kotlin/Swift API | ✅ Full | ❌ Not yet | ⏳ Coming soon |
|
||||||
|
| C++ API | ✅ Available | ✅ Available | ✅ Available |
|
||||||
|
| Gemma 4 Models | ✅ Yes | ✅ Yes (.litertlm) | ✅ Yes |
|
||||||
|
| KV Cache | ✅ Managed | ⚠️ Manual (C++) | ✅ Managed |
|
||||||
|
| Conversation API | ✅ Yes | ⚠️ Manual (C++) | ✅ Yes |
|
||||||
|
| Tool Use | ✅ Yes | ⚠️ Manual (C++) | ✅ Yes |
|
||||||
|
| Metal GPU | N/A | ✅ Yes | ✅ Yes |
|
||||||
|
| CoreML NPU | N/A | ✅ Yes | ✅ Yes |
|
||||||
|
|
||||||
|
## Integration Options
|
||||||
|
|
||||||
|
### Option 1: C++ Bridge (Recommended for Production)
|
||||||
|
|
||||||
|
Use LiteRT-LM C++ API with Objective-C++ bridging.
|
||||||
|
|
||||||
|
**Files needed:**
|
||||||
|
- `LlmEngineBridge.h` - Objective-C header
|
||||||
|
- `LlmEngineBridge.mm` - Objective-C++ implementation
|
||||||
|
- `LlmEngine.swift` - Swift wrapper
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
```objc
|
||||||
|
// LlmEngineBridge.h
|
||||||
|
@interface LlmEngineBridge : NSObject
|
||||||
|
- (BOOL)loadModel:(NSString *)path error:(NSError **)error;
|
||||||
|
- (NSString *)generate:(NSString *)prompt;
|
||||||
|
@end
|
||||||
|
```
|
||||||
|
|
||||||
|
```objc
|
||||||
|
// LlmEngineBridge.mm
|
||||||
|
#import "LlmEngineBridge.h"
|
||||||
|
#include "litert_lm/engine.h"
|
||||||
|
|
||||||
|
@implementation LlmEngineBridge {
|
||||||
|
std::unique_ptr<litert::lm::Engine> engine;
|
||||||
|
}
|
||||||
|
|
||||||
|
- (BOOL)loadModel:(NSString *)path error:(NSError **)error {
|
||||||
|
auto config = litert::lm::EngineConfig{
|
||||||
|
.model_path = [path UTF8String]
|
||||||
|
};
|
||||||
|
auto result = litert::lm::Engine::Create(config);
|
||||||
|
if (!result.ok()) {
|
||||||
|
// Set error
|
||||||
|
return NO;
|
||||||
|
}
|
||||||
|
engine = std::move(*result);
|
||||||
|
return YES;
|
||||||
|
}
|
||||||
|
|
||||||
|
@end
|
||||||
|
```
|
||||||
|
|
||||||
|
**Pros:**
|
||||||
|
- Full LiteRT-LM features (KV cache, tool use, multimodal)
|
||||||
|
- Best performance (Metal/CoreML delegates)
|
||||||
|
- Production-ready
|
||||||
|
|
||||||
|
**Cons:**
|
||||||
|
- Requires Objective-C++ knowledge
|
||||||
|
- More complex build setup
|
||||||
|
- Bridge code maintenance
|
||||||
|
|
||||||
|
### Option 2: TensorFlowLiteSwift (Limited)
|
||||||
|
|
||||||
|
Use standard TensorFlow Lite Swift pod.
|
||||||
|
|
||||||
|
```ruby
|
||||||
|
pod 'TensorFlowLiteSwift', '~> 2.16.0'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Pros:**
|
||||||
|
- Pure Swift
|
||||||
|
- Easy integration
|
||||||
|
- Stable API
|
||||||
|
|
||||||
|
**Cons:**
|
||||||
|
- ❌ No KV cache management
|
||||||
|
- ❌ No conversation handling
|
||||||
|
- ❌ No tool use support
|
||||||
|
- ❌ No streaming generation
|
||||||
|
- Manual tokenization required
|
||||||
|
|
||||||
|
**Verdict:** Not suitable for LLM chat apps.
|
||||||
|
|
||||||
|
### Option 3: Wait for Swift APIs
|
||||||
|
|
||||||
|
Monitor for official Swift API release:
|
||||||
|
- https://ai.google.dev/edge/litert-lm
|
||||||
|
- https://github.com/google-ai-edge/LiteRT-LM
|
||||||
|
|
||||||
|
**Timeline:** Unknown (marked as "coming soon" since 2024)
|
||||||
|
|
||||||
|
## What Works Now
|
||||||
|
|
||||||
|
The current implementation uses **stub/fallback mode**:
|
||||||
|
- ✅ UI fully functional
|
||||||
|
- ✅ Audio recording/playback
|
||||||
|
- ✅ TTS
|
||||||
|
- ✅ Web search
|
||||||
|
- ✅ Model download
|
||||||
|
- ✅ Conversation management
|
||||||
|
- ❌ LLM inference (stubbed)
|
||||||
|
|
||||||
|
## To Enable Full LLM Support
|
||||||
|
|
||||||
|
### Step 1: Add C++ Bridge
|
||||||
|
|
||||||
|
1. Create bridging header:
|
||||||
|
```bash
|
||||||
|
# In your project
|
||||||
|
touch SleepyAgent/Inference/Bridge/LlmEngineBridge.h
|
||||||
|
touch SleepyAgent/Inference/Bridge/LlmEngineBridge.mm
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Download LiteRT-LM iOS binaries:
|
||||||
|
```bash
|
||||||
|
# From GitHub releases or build from source
|
||||||
|
# https://github.com/google-ai-edge/LiteRT-LM/releases
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Link libraries:
|
||||||
|
- `liblitert_lm.a` (static library)
|
||||||
|
- `libtensorflow-lite.a`
|
||||||
|
- Metal framework
|
||||||
|
- CoreML framework
|
||||||
|
|
||||||
|
### Step 2: Update Build Settings
|
||||||
|
|
||||||
|
In Xcode:
|
||||||
|
1. Set `Compile Sources As` to `Objective-C++` for .mm files
|
||||||
|
2. Add header search paths for LiteRT-LM
|
||||||
|
3. Link required frameworks
|
||||||
|
|
||||||
|
### Step 3: Implement Bridge Methods
|
||||||
|
|
||||||
|
See `LlmEngine.swift` TODO comments for specific methods to implement.
|
||||||
|
|
||||||
|
## Testing Without LLM
|
||||||
|
|
||||||
|
The app works in "demo mode" with stub responses. To test:
|
||||||
|
1. Build and run
|
||||||
|
2. Type any message
|
||||||
|
3. See stub response about LiteRT-LM integration
|
||||||
|
|
||||||
|
## References
|
||||||
|
|
||||||
|
- **LiteRT-LM GitHub:** https://github.com/google-ai-edge/LiteRT-LM
|
||||||
|
- **iOS C++ Guide:** https://ai.google.dev/edge/litert-lm/cpp
|
||||||
|
- **CocoaPods:** https://cocoapods.org/pods/TensorFlowLiteSwift
|
||||||
|
- **Models:** https://huggingface.co/litert-community
|
||||||
|
- **Sample App:** https://github.com/google-ai-edge/gallery (AI Edge Gallery)
|
||||||
|
|
||||||
|
## Recommendation
|
||||||
|
|
||||||
|
For a developer build/demo:
|
||||||
|
1. Use current stub implementation to test UI/features
|
||||||
|
2. Add C++ bridge when ready for production LLM support
|
||||||
|
3. Monitor for official Swift API release
|
||||||
|
|
||||||
|
The architecture is ready - just need the inference backend integration.
|
||||||
@@ -1,5 +1,4 @@
|
|||||||
import Foundation
|
import Foundation
|
||||||
import LiteRT
|
|
||||||
import UIKit
|
import UIKit
|
||||||
|
|
||||||
// MARK: - Errors
|
// MARK: - Errors
|
||||||
@@ -11,6 +10,7 @@ public enum LlmEngineError: LocalizedError {
|
|||||||
case generationFailed(underlying: Error)
|
case generationFailed(underlying: Error)
|
||||||
case invalidMultimodalInput
|
case invalidMultimodalInput
|
||||||
case engineInitializationFailed(underlying: Error)
|
case engineInitializationFailed(underlying: Error)
|
||||||
|
case notImplemented
|
||||||
|
|
||||||
public var errorDescription: String? {
|
public var errorDescription: String? {
|
||||||
switch self {
|
switch self {
|
||||||
@@ -26,34 +26,24 @@ public enum LlmEngineError: LocalizedError {
|
|||||||
return "Invalid multimodal input provided"
|
return "Invalid multimodal input provided"
|
||||||
case .engineInitializationFailed(let error):
|
case .engineInitializationFailed(let error):
|
||||||
return "Failed to initialize engine: \(error.localizedDescription)"
|
return "Failed to initialize engine: \(error.localizedDescription)"
|
||||||
|
case .notImplemented:
|
||||||
|
return "This feature requires LiteRT-LM C++ integration (Swift APIs coming soon)"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// MARK: - LiteRT Conversation Wrapper
|
// MARK: - Conversation
|
||||||
|
|
||||||
/// Wrapper for LiteRT Conversation to manage lifecycle
|
/// Conversation wrapper for managing chat sessions
|
||||||
|
/// Note: In full implementation, this wraps LiteRT-LM's Conversation object
|
||||||
public final class Conversation: @unchecked Sendable {
|
public final class Conversation: @unchecked Sendable {
|
||||||
internal let liteRtConversation: LRTConversation
|
public var isAlive: Bool = true
|
||||||
private let lock = NSLock()
|
internal var messageHistory: [(role: String, content: String)] = []
|
||||||
private var isClosed = false
|
|
||||||
|
|
||||||
public var isAlive: Bool {
|
internal init() {}
|
||||||
lock.lock()
|
|
||||||
defer { lock.unlock() }
|
|
||||||
return !isClosed
|
|
||||||
}
|
|
||||||
|
|
||||||
internal init(liteRtConversation: LRTConversation) {
|
|
||||||
self.liteRtConversation = liteRtConversation
|
|
||||||
}
|
|
||||||
|
|
||||||
public func close() {
|
public func close() {
|
||||||
lock.lock()
|
isAlive = false
|
||||||
defer { lock.unlock() }
|
|
||||||
guard !isClosed else { return }
|
|
||||||
isClosed = true
|
|
||||||
liteRtConversation.close()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
deinit {
|
deinit {
|
||||||
@@ -63,52 +53,86 @@ public final class Conversation: @unchecked Sendable {
|
|||||||
|
|
||||||
// MARK: - LlmEngine Protocol
|
// MARK: - LlmEngine Protocol
|
||||||
|
|
||||||
/// LLM Engine interface for text generation with optional multimodal inputs
|
|
||||||
public protocol LlmEngine: Actor {
|
public protocol LlmEngine: Actor {
|
||||||
/// Load a model from the given path
|
var isLoaded: Bool { get }
|
||||||
|
|
||||||
func loadModel(path: String) async throws
|
func loadModel(path: String) async throws
|
||||||
|
|
||||||
/// Creates a new conversation with the given system prompt
|
|
||||||
/// This should be called once per chat session to enable KV cache reuse
|
|
||||||
func createConversation(systemPrompt: String) throws -> Conversation
|
func createConversation(systemPrompt: String) throws -> Conversation
|
||||||
|
|
||||||
/// Generate a response within an existing conversation
|
|
||||||
/// This reuses the KV cache from previous turns
|
|
||||||
func generate(
|
func generate(
|
||||||
conversation: Conversation,
|
conversation: Conversation,
|
||||||
prompt: String,
|
prompt: String,
|
||||||
audioData: Data?,
|
audioData: Data?,
|
||||||
images: [UIImage]?
|
images: [UIImage]?
|
||||||
) async throws -> String
|
) async throws -> String
|
||||||
|
|
||||||
/// Generate a streaming response within an existing conversation
|
|
||||||
func generateStream(
|
func generateStream(
|
||||||
conversation: Conversation,
|
conversation: Conversation,
|
||||||
prompt: String,
|
prompt: String,
|
||||||
audioData: Data?,
|
audioData: Data?,
|
||||||
images: [UIImage]?
|
images: [UIImage]?
|
||||||
) -> AsyncThrowingStream<String, Error>
|
) -> AsyncThrowingStream<String, Error>
|
||||||
|
|
||||||
/// Check if a model is currently loaded
|
|
||||||
func isLoaded() -> Bool
|
|
||||||
|
|
||||||
/// Unload the current model and free resources
|
|
||||||
func unload()
|
func unload()
|
||||||
}
|
}
|
||||||
|
|
||||||
// MARK: - LiteRT-LM Engine Implementation
|
// MARK: - LiteRT-LM Engine Implementation
|
||||||
|
|
||||||
/// LiteRT-LM based LLM Engine implementation for Gemma models
|
/// LiteRT-LM based LLM Engine
|
||||||
/// Uses .litert model format - download from HuggingFace LiteRT Community
|
///
|
||||||
@globalActor
|
/// # Important Implementation Note:
|
||||||
|
///
|
||||||
|
/// LiteRT-LM Swift APIs are "coming soon" per Google (as of 2025).
|
||||||
|
/// Current iOS support requires using the C++ API directly with Objective-C++ bridging.
|
||||||
|
///
|
||||||
|
/// ## Integration Options:
|
||||||
|
///
|
||||||
|
/// ### Option 1: Use TensorFlowLiteSwift (Limited)
|
||||||
|
/// Standard LiteRT pod works for basic inference but lacks LLM-specific features
|
||||||
|
/// like KV cache management, conversation handling, and tool use.
|
||||||
|
///
|
||||||
|
/// ```ruby
|
||||||
|
/// # Podfile
|
||||||
|
/// pod 'TensorFlowLiteSwift', '~> 2.16.0'
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// ### Option 2: C++ Bridge (Full Features) ⭐ Recommended
|
||||||
|
/// Use LiteRT-LM C++ API with Objective-C++ wrapper:
|
||||||
|
///
|
||||||
|
/// 1. Add C++ source files (.mm)
|
||||||
|
/// 2. Include LiteRT-LM headers
|
||||||
|
/// 3. Bridge to Swift via Objective-C
|
||||||
|
///
|
||||||
|
/// ```objc
|
||||||
|
/// // LlmEngineBridge.h
|
||||||
|
/// @interface LlmEngineBridge : NSObject
|
||||||
|
/// - (BOOL)loadModel:(NSString *)path error:(NSError **)error;
|
||||||
|
/// - (NSString *)generate:(NSString *)prompt;
|
||||||
|
/// @end
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// ### Option 3: Wait for Swift APIs
|
||||||
|
/// Google has announced Swift APIs are coming. Monitor:
|
||||||
|
/// https://ai.google.dev/edge/litert-lm
|
||||||
|
///
|
||||||
|
/// ## Current Status:
|
||||||
|
/// - Android: ✅ Full Kotlin support
|
||||||
|
/// - iOS: ⚠️ C++ only (Swift APIs coming soon)
|
||||||
|
/// - Models: ✅ Gemma 4 E2B/E4B available on HuggingFace
|
||||||
|
///
|
||||||
|
/// ## References:
|
||||||
|
/// - LiteRT-LM GitHub: https://github.com/google-ai-edge/LiteRT-LM
|
||||||
|
/// - iOS C++ Guide: https://ai.google.dev/edge/litert-lm/cpp
|
||||||
|
/// - Models: https://huggingface.co/litert-community
|
||||||
|
///
|
||||||
public actor LiteRtLlmEngine: LlmEngine {
|
public actor LiteRtLlmEngine: LlmEngine {
|
||||||
public static let shared = LiteRtLlmEngine()
|
public static let shared = LiteRtLlmEngine()
|
||||||
|
|
||||||
private var engine: LRTEngine?
|
public private(set) var isLoaded: Bool = false
|
||||||
private var currentModelPath: String?
|
private var currentModelPath: String?
|
||||||
|
private var currentConversation: Conversation?
|
||||||
|
|
||||||
private let maxTokens = 16384
|
private let maxTokens = 16384
|
||||||
private let cacheDirName = "litertlm_cache"
|
|
||||||
|
// TODO: Add actual LiteRT-LM C++ engine reference here
|
||||||
|
// private var cppEngine: UnsafeMutableRawPointer?
|
||||||
|
|
||||||
private init() {}
|
private init() {}
|
||||||
|
|
||||||
@@ -117,48 +141,49 @@ public actor LiteRtLlmEngine: LlmEngine {
|
|||||||
public func loadModel(path: String) async throws {
|
public func loadModel(path: String) async throws {
|
||||||
unload()
|
unload()
|
||||||
|
|
||||||
let modelFile = URL(fileURLWithPath: path)
|
|
||||||
guard FileManager.default.fileExists(atPath: path) else {
|
guard FileManager.default.fileExists(atPath: path) else {
|
||||||
throw LlmEngineError.modelNotFound(path: path)
|
throw LlmEngineError.modelNotFound(path: path)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure cache directory exists
|
// TODO: Implement actual LiteRT-LM loading
|
||||||
let cacheDir = FileManager.default.temporaryDirectory.appendingPathComponent(cacheDirName)
|
//
|
||||||
try? FileManager.default.createDirectory(at: cacheDir, withIntermediateDirectories: true)
|
// Example C++ integration (in .mm file):
|
||||||
|
//
|
||||||
|
// #include "litert_lm/engine.h"
|
||||||
|
//
|
||||||
|
// auto config = litert::lm::EngineConfig{
|
||||||
|
// .model_path = path.UTF8String,
|
||||||
|
// .max_num_tokens = maxTokens
|
||||||
|
// };
|
||||||
|
//
|
||||||
|
// auto engine = litert::lm::Engine::Create(config);
|
||||||
|
// if (!engine.ok()) {
|
||||||
|
// throw LlmEngineError.engineInitializationFailed(...)
|
||||||
|
// }
|
||||||
|
// cppEngine = engine->release();
|
||||||
|
|
||||||
do {
|
// Stub: Simulate loading
|
||||||
let engineConfig = LRTEngineConfig(
|
try await Task.sleep(nanoseconds: 500_000_000)
|
||||||
modelPath: path,
|
|
||||||
backend: .cpu,
|
self.isLoaded = true
|
||||||
visionBackend: .cpu,
|
self.currentModelPath = path
|
||||||
audioBackend: .cpu,
|
|
||||||
maxNumTokens: maxTokens,
|
|
||||||
cacheDir: cacheDir.path
|
|
||||||
)
|
|
||||||
|
|
||||||
let newEngine = LRTEngine(config: engineConfig)
|
|
||||||
try newEngine.initialize()
|
|
||||||
|
|
||||||
self.engine = newEngine
|
|
||||||
self.currentModelPath = path
|
|
||||||
|
|
||||||
} catch {
|
|
||||||
throw LlmEngineError.engineInitializationFailed(underlying: error)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// MARK: - Conversation Management
|
// MARK: - Conversation
|
||||||
|
|
||||||
public func createConversation(systemPrompt: String) throws -> Conversation {
|
public func createConversation(systemPrompt: String) throws -> Conversation {
|
||||||
guard let engine = engine else {
|
guard isLoaded else {
|
||||||
throw LlmEngineError.modelNotLoaded
|
throw LlmEngineError.modelNotLoaded
|
||||||
}
|
}
|
||||||
|
|
||||||
let systemContent = LRTContent.text(systemPrompt)
|
let conversation = Conversation()
|
||||||
let conversationConfig = LRTConversationConfig(systemInstruction: systemContent)
|
conversation.messageHistory.append(("system", systemPrompt))
|
||||||
|
self.currentConversation = conversation
|
||||||
|
|
||||||
let liteRtConversation = engine.createConversation(config: conversationConfig)
|
// TODO: Create actual LiteRT-LM conversation
|
||||||
return Conversation(liteRtConversation: liteRtConversation)
|
// auto conv = cppEngine->CreateConversation(config);
|
||||||
|
|
||||||
|
return conversation
|
||||||
}
|
}
|
||||||
|
|
||||||
// MARK: - Generation
|
// MARK: - Generation
|
||||||
@@ -173,9 +198,19 @@ public actor LiteRtLlmEngine: LlmEngine {
|
|||||||
throw LlmEngineError.conversationClosed
|
throw LlmEngineError.conversationClosed
|
||||||
}
|
}
|
||||||
|
|
||||||
let contents = try buildContents(prompt: prompt, audioData: audioData, images: images)
|
guard isLoaded else {
|
||||||
let response = try conversation.liteRtConversation.sendMessage(contents)
|
throw LlmEngineError.modelNotLoaded
|
||||||
return response.stringValue ?? ""
|
}
|
||||||
|
|
||||||
|
// TODO: Implement actual generation
|
||||||
|
//
|
||||||
|
// C++ example:
|
||||||
|
// auto contents = litert::lm::Contents::FromText(prompt.UTF8String);
|
||||||
|
// auto response = conv->SendMessage(contents);
|
||||||
|
// return [NSString stringWithUTF8String:response.text().c_str()];
|
||||||
|
|
||||||
|
// Stub response
|
||||||
|
return "[STUB] LiteRT-LM Swift APIs are coming soon. Use C++ bridge for full functionality."
|
||||||
}
|
}
|
||||||
|
|
||||||
public func generateStream(
|
public func generateStream(
|
||||||
@@ -191,24 +226,25 @@ public actor LiteRtLlmEngine: LlmEngine {
|
|||||||
throw LlmEngineError.conversationClosed
|
throw LlmEngineError.conversationClosed
|
||||||
}
|
}
|
||||||
|
|
||||||
// For multimodal inputs, use Contents API (non-streaming for now)
|
guard self.isLoaded else {
|
||||||
if audioData != nil || !(images?.isEmpty ?? true) {
|
throw LlmEngineError.modelNotLoaded
|
||||||
let contents = try buildContents(prompt: prompt, audioData: audioData, images: images)
|
|
||||||
let response = try conversation.liteRtConversation.sendMessage(contents)
|
|
||||||
if let text = response.stringValue {
|
|
||||||
continuation.yield(text)
|
|
||||||
}
|
|
||||||
continuation.finish()
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Text-only streaming - reuses KV cache
|
// TODO: Implement streaming with LiteRT-LM C++
|
||||||
let stream = conversation.liteRtConversation.sendMessageAsync(prompt)
|
//
|
||||||
|
// C++ example:
|
||||||
|
// auto stream = conv->SendMessageAsync(contents);
|
||||||
|
// for (const auto& token : stream) {
|
||||||
|
// continuation.yield(...)
|
||||||
|
// }
|
||||||
|
|
||||||
for try await message in stream {
|
// Stub: Simulate streaming
|
||||||
if let text = message.stringValue {
|
let message = "LiteRT-LM on iOS currently requires C++ integration. Swift APIs are 'coming soon' per Google. See LlmEngine.swift comments for integration options."
|
||||||
continuation.yield(text)
|
let words = message.split(separator: " ")
|
||||||
}
|
|
||||||
|
for word in words {
|
||||||
|
continuation.yield(String(word) + " ")
|
||||||
|
try await Task.sleep(nanoseconds: 50_000_000)
|
||||||
}
|
}
|
||||||
|
|
||||||
continuation.finish()
|
continuation.finish()
|
||||||
@@ -220,116 +256,75 @@ public actor LiteRtLlmEngine: LlmEngine {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// MARK: - Utility Methods
|
// MARK: - Utility
|
||||||
|
|
||||||
public func isLoaded() -> Bool {
|
|
||||||
engine != nil
|
|
||||||
}
|
|
||||||
|
|
||||||
public func unload() {
|
public func unload() {
|
||||||
engine?.close()
|
// TODO: Clean up C++ engine
|
||||||
engine = nil
|
// if (cppEngine) {
|
||||||
|
// delete static_cast<litert::lm::Engine*>(cppEngine);
|
||||||
|
// cppEngine = nullptr;
|
||||||
|
// }
|
||||||
|
|
||||||
|
isLoaded = false
|
||||||
currentModelPath = nil
|
currentModelPath = nil
|
||||||
|
currentConversation = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MARK: - TensorFlowLiteSwift Alternative (Basic)
|
||||||
|
|
||||||
|
/// Alternative using standard TensorFlowLiteSwift
|
||||||
|
/// Limited functionality - no KV cache, conversation management, or tool use
|
||||||
|
///
|
||||||
|
/// Use this if you need basic inference only:
|
||||||
|
/// ```ruby
|
||||||
|
/// pod 'TensorFlowLiteSwift', '~> 2.16.0'
|
||||||
|
/// ```
|
||||||
|
public actor TFLiteEngine: LlmEngine {
|
||||||
|
public static let shared = TFLiteEngine()
|
||||||
|
|
||||||
|
public private(set) var isLoaded: Bool = false
|
||||||
|
|
||||||
|
// TODO: Add TFLInterpreter
|
||||||
|
// private var interpreter: Interpreter?
|
||||||
|
|
||||||
|
public init() {}
|
||||||
|
|
||||||
|
public func loadModel(path: String) async throws {
|
||||||
|
// TODO: Initialize TFLInterpreter
|
||||||
|
// interpreter = try Interpreter(modelPath: path)
|
||||||
|
// try interpreter?.allocateTensors()
|
||||||
|
isLoaded = true
|
||||||
}
|
}
|
||||||
|
|
||||||
// MARK: - Private Helpers
|
public func createConversation(systemPrompt: String) throws -> Conversation {
|
||||||
|
Conversation()
|
||||||
|
}
|
||||||
|
|
||||||
private func buildContents(
|
public func generate(
|
||||||
|
conversation: Conversation,
|
||||||
prompt: String,
|
prompt: String,
|
||||||
audioData: Data?,
|
audioData: Data?,
|
||||||
images: [UIImage]?
|
images: [UIImage]?
|
||||||
) throws -> LRTContents {
|
) async throws -> String {
|
||||||
var contents: [LRTContent] = []
|
// TODO: Basic TFLite inference
|
||||||
|
// This won't work well for LLMs without proper tokenization
|
||||||
// Add images first if provided (max 1 for efficiency)
|
throw LlmEngineError.notImplemented
|
||||||
if let images = images {
|
|
||||||
for image in images.prefix(1) {
|
|
||||||
if let resizedImage = resizeImage(image, maxSize: CGSize(width: 512, height: 512)),
|
|
||||||
let jpegData = resizedImage.jpegData(compressionQuality: 0.85) {
|
|
||||||
contents.append(LRTContent.imageData(jpegData))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add audio if provided
|
|
||||||
if let audioData = audioData, audioData.count >= 6400 {
|
|
||||||
// Assume audio is already in WAV format or convert if needed
|
|
||||||
let wavData = isWavData(audioData) ? audioData : try convertPcmToWav(audioData)
|
|
||||||
contents.append(LRTContent.audioData(wavData))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add text prompt
|
|
||||||
contents.append(LRTContent.text(prompt))
|
|
||||||
|
|
||||||
return LRTContents(contents: contents)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private func resizeImage(_ image: UIImage, maxSize: CGSize) -> UIImage? {
|
public func generateStream(
|
||||||
let size = image.size
|
conversation: Conversation,
|
||||||
|
prompt: String,
|
||||||
guard size.width > maxSize.width || size.height > maxSize.height else {
|
audioData: Data?,
|
||||||
return image
|
images: [UIImage]?
|
||||||
|
) -> AsyncThrowingStream<String, Error> {
|
||||||
|
AsyncThrowingStream { continuation in
|
||||||
|
continuation.finish(throwing: LlmEngineError.notImplemented)
|
||||||
}
|
}
|
||||||
|
|
||||||
let widthRatio = maxSize.width / size.width
|
|
||||||
let heightRatio = maxSize.height / size.height
|
|
||||||
let ratio = min(widthRatio, heightRatio)
|
|
||||||
|
|
||||||
let newSize = CGSize(width: size.width * ratio, height: size.height * ratio)
|
|
||||||
|
|
||||||
UIGraphicsBeginImageContextWithOptions(newSize, false, 1.0)
|
|
||||||
defer { UIGraphicsEndImageContext() }
|
|
||||||
|
|
||||||
image.draw(in: CGRect(origin: .zero, size: newSize))
|
|
||||||
return UIGraphicsGetImageFromCurrentImageContext()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private func isWavData(_ data: Data) -> Bool {
|
public func unload() {
|
||||||
// Check for WAV header: "RIFF" magic number
|
// interpreter = nil
|
||||||
guard data.count >= 12 else { return false }
|
isLoaded = false
|
||||||
let header = data.prefix(4)
|
|
||||||
return header.elementsEqual([0x52, 0x49, 0x46, 0x46]) // "RIFF"
|
|
||||||
}
|
|
||||||
|
|
||||||
private func convertPcmToWav(_ pcmData: Data, sampleRate: Int32 = 16000, channels: UInt16 = 1) throws -> Data {
|
|
||||||
var wavData = Data()
|
|
||||||
|
|
||||||
// RIFF header
|
|
||||||
wavData.append("RIFF".data(using: .ascii)!)
|
|
||||||
|
|
||||||
// File size (will be filled later)
|
|
||||||
let fileSize = UInt32(pcmData.count + 36)
|
|
||||||
wavData.append(withUnsafeBytes(of: fileSize.littleEndian) { Data($0) })
|
|
||||||
|
|
||||||
// WAVE header
|
|
||||||
wavData.append("WAVE".data(using: .ascii)!)
|
|
||||||
|
|
||||||
// fmt chunk
|
|
||||||
wavData.append("fmt ".data(using: .ascii)!)
|
|
||||||
let fmtChunkSize: UInt32 = 16
|
|
||||||
wavData.append(withUnsafeBytes(of: fmtChunkSize.littleEndian) { Data($0) })
|
|
||||||
|
|
||||||
let audioFormat: UInt16 = 1 // PCM
|
|
||||||
wavData.append(withUnsafeBytes(of: audioFormat.littleEndian) { Data($0) })
|
|
||||||
|
|
||||||
wavData.append(withUnsafeBytes(of: channels.littleEndian) { Data($0) })
|
|
||||||
wavData.append(withUnsafeBytes(of: sampleRate.littleEndian) { Data($0) })
|
|
||||||
|
|
||||||
let byteRate = UInt32(sampleRate) * UInt32(channels) * 2 // 16-bit
|
|
||||||
wavData.append(withUnsafeBytes(of: byteRate.littleEndian) { Data($0) })
|
|
||||||
|
|
||||||
let blockAlign = channels * 2
|
|
||||||
wavData.append(withUnsafeBytes(of: blockAlign.littleEndian) { Data($0) })
|
|
||||||
|
|
||||||
let bitsPerSample: UInt16 = 16
|
|
||||||
wavData.append(withUnsafeBytes(of: bitsPerSample.littleEndian) { Data($0) })
|
|
||||||
|
|
||||||
// data chunk
|
|
||||||
wavData.append("data".data(using: .ascii)!)
|
|
||||||
let dataChunkSize = UInt32(pcmData.count)
|
|
||||||
wavData.append(withUnsafeBytes(of: dataChunkSize.littleEndian) { Data($0) })
|
|
||||||
wavData.append(pcmData)
|
|
||||||
|
|
||||||
return wavData
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,6 +16,12 @@ class MainViewModel: ObservableObject {
|
|||||||
@Published var conversations: [ConversationInfo] = []
|
@Published var conversations: [ConversationInfo] = []
|
||||||
@Published var currentConversationId: UUID = UUID()
|
@Published var currentConversationId: UUID = UUID()
|
||||||
|
|
||||||
|
// Computed properties for view state
|
||||||
|
var isLoading: Bool { isGenerating && currentResponse.isEmpty }
|
||||||
|
var isProcessing: Bool { isGenerating }
|
||||||
|
var isExecutingTool: Bool { false } // TODO: Implement tool execution state
|
||||||
|
var streamingText: String { currentResponse }
|
||||||
|
|
||||||
private let agent: Agent
|
private let agent: Agent
|
||||||
private let audioRecorder: AudioRecorder
|
private let audioRecorder: AudioRecorder
|
||||||
private let ttsService: TtsService
|
private let ttsService: TtsService
|
||||||
@@ -41,6 +47,11 @@ class MainViewModel: ObservableObject {
|
|||||||
await processTextMessage(text)
|
await processTextMessage(text)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func sendMessage(text: String) async {
|
||||||
|
guard !text.isEmpty else { return }
|
||||||
|
await processTextMessage(text)
|
||||||
|
}
|
||||||
|
|
||||||
func sendImage(_ image: UIImage, text: String = "") async {
|
func sendImage(_ image: UIImage, text: String = "") async {
|
||||||
selectedImage = image
|
selectedImage = image
|
||||||
let displayText = text.isEmpty ? "[Image]" : text
|
let displayText = text.isEmpty ? "[Image]" : text
|
||||||
@@ -156,6 +167,11 @@ class MainViewModel: ObservableObject {
|
|||||||
errorMessage = message
|
errorMessage = message
|
||||||
showError = true
|
showError = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func dismissError() {
|
||||||
|
showError = false
|
||||||
|
errorMessage = nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ConversationInfo: Identifiable {
|
struct ConversationInfo: Identifiable {
|
||||||
|
|||||||
Reference in New Issue
Block a user