Update LiteRT-LM implementation with accurate iOS status
- Document that Swift APIs are 'coming soon' per Google - Add C++ bridge integration guide - Create stub implementation that explains current limitations - Add LITERT_IOS_STATUS.md with detailed integration options - Fix MainViewModel to match MainView property expectations
This commit is contained in:
+141
@@ -0,0 +1,141 @@
|
||||
# Xcode
|
||||
#
|
||||
# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
|
||||
|
||||
## User settings
|
||||
xcuserdata/
|
||||
|
||||
## compatibility with Xcode 8 and earlier (ignoring not required starting Xcode 9)
|
||||
*.xcscmblueprint
|
||||
*.xccheckout
|
||||
|
||||
## compatibility with Xcode 3 and earlier (ignoring not required starting Xcode 4)
|
||||
build/
|
||||
DerivedData/
|
||||
*.moved-aside
|
||||
*.pbxuser
|
||||
!default.pbxuser
|
||||
*.mode1v3
|
||||
!default.mode1v3
|
||||
*.mode2v3
|
||||
!default.mode2v3
|
||||
*.perspectivev3
|
||||
!default.perspectivev3
|
||||
|
||||
## Obj-C/Swift specific
|
||||
*.hmap
|
||||
|
||||
## App packaging
|
||||
*.ipa
|
||||
*.dSYM.zip
|
||||
*.dSYM
|
||||
|
||||
## Playgrounds
|
||||
timeline.xctimeline
|
||||
playground.xcworkspace
|
||||
|
||||
# Swift Package Manager
|
||||
#
|
||||
# Add this line if you want to avoid checking in source code from Swift Package Manager dependencies.
|
||||
# Packages/
|
||||
# Package.pins
|
||||
# Package.resolved
|
||||
# *.xcodeproj
|
||||
#
|
||||
# Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata
|
||||
# hence it is not needed unless you have added a package configuration file to your project
|
||||
# .swiftpm
|
||||
|
||||
.build/
|
||||
|
||||
# CocoaPods
|
||||
#
|
||||
# We recommend against adding the Pods directory to your .gitignore. However
|
||||
# you should judge for yourself, the pros and cons are mentioned at:
|
||||
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
|
||||
#
|
||||
Pods/
|
||||
#
|
||||
# Add this line if you want to avoid checking in source code from the Xcode workspace
|
||||
*.xcworkspace
|
||||
|
||||
# Carthage
|
||||
#
|
||||
# Add this line if you want to avoid checking in source code from Carthage dependencies.
|
||||
# Carthage/Checkouts
|
||||
|
||||
Carthage/Build/
|
||||
|
||||
# Accio dependency management
|
||||
Dependencies/
|
||||
.accio/
|
||||
|
||||
# fastlane
|
||||
#
|
||||
# It is recommended to not store the screenshots in the git repo.
|
||||
# Instead, use fastlane to re-generate the screenshots whenever they are needed.
|
||||
# For more information about the recommended setup visit:
|
||||
# https://docs.fastlane.tools/best-practices/source-control/#source-control
|
||||
|
||||
fastlane/report.xml
|
||||
fastlane/Preview.html
|
||||
fastlane/screenshots/**/*.png
|
||||
fastlane/test_output
|
||||
|
||||
# Code Injection
|
||||
#
|
||||
# After new code Injection tools there's a generated folder /iOSInjectionProject
|
||||
# https://github.com/johnno1962/injectionforxcode
|
||||
|
||||
iOSInjectionProject/
|
||||
|
||||
# macOS
|
||||
.DS_Store
|
||||
.AppleDouble
|
||||
.LSOverride
|
||||
|
||||
# Thumbnails
|
||||
._*
|
||||
|
||||
# Files that might appear in the root of a volume
|
||||
.DocumentRevisions-V100
|
||||
.fseventsd
|
||||
.Spotlight-V100
|
||||
.TemporaryItems
|
||||
.Trashes
|
||||
.VolumeIcon.icns
|
||||
.com.apple.timemachine.donotpresent
|
||||
|
||||
# Directories potentially created on remote AFP share
|
||||
.AppleDB
|
||||
.AppleDesktop
|
||||
Network Trash Folder
|
||||
Temporary Items
|
||||
.apdisk
|
||||
|
||||
# Project specific
|
||||
build/
|
||||
*.ipa
|
||||
*.xcarchive
|
||||
DerivedData/
|
||||
|
||||
# Model files (large binaries)
|
||||
*.litertlm
|
||||
*.bin
|
||||
*.gguf
|
||||
|
||||
# Downloaded files
|
||||
*.tmp
|
||||
*.part
|
||||
*.download
|
||||
|
||||
# IDE
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# Environment
|
||||
.env
|
||||
.env.local
|
||||
@@ -0,0 +1,172 @@
|
||||
# LiteRT-LM on iOS - Current Status
|
||||
|
||||
## ⚠️ Critical Information
|
||||
|
||||
**LiteRT-LM Swift APIs are "coming soon"** as of 2025 (per [Google AI Edge](https://ai.google.dev/edge/litert-lm)).
|
||||
|
||||
## Current iOS Support
|
||||
|
||||
| Feature | Android | iOS Current | iOS Future |
|
||||
|---------|---------|-------------|------------|
|
||||
| Kotlin/Swift API | ✅ Full | ❌ Not yet | ⏳ Coming soon |
|
||||
| C++ API | ✅ Available | ✅ Available | ✅ Available |
|
||||
| Gemma 4 Models | ✅ Yes | ✅ Yes (.litertlm) | ✅ Yes |
|
||||
| KV Cache | ✅ Managed | ⚠️ Manual (C++) | ✅ Managed |
|
||||
| Conversation API | ✅ Yes | ⚠️ Manual (C++) | ✅ Yes |
|
||||
| Tool Use | ✅ Yes | ⚠️ Manual (C++) | ✅ Yes |
|
||||
| Metal GPU | N/A | ✅ Yes | ✅ Yes |
|
||||
| CoreML NPU | N/A | ✅ Yes | ✅ Yes |
|
||||
|
||||
## Integration Options
|
||||
|
||||
### Option 1: C++ Bridge (Recommended for Production)
|
||||
|
||||
Use LiteRT-LM C++ API with Objective-C++ bridging.
|
||||
|
||||
**Files needed:**
|
||||
- `LlmEngineBridge.h` - Objective-C header
|
||||
- `LlmEngineBridge.mm` - Objective-C++ implementation
|
||||
- `LlmEngine.swift` - Swift wrapper
|
||||
|
||||
**Example:**
|
||||
```objc
|
||||
// LlmEngineBridge.h
|
||||
@interface LlmEngineBridge : NSObject
|
||||
- (BOOL)loadModel:(NSString *)path error:(NSError **)error;
|
||||
- (NSString *)generate:(NSString *)prompt;
|
||||
@end
|
||||
```
|
||||
|
||||
```objc
|
||||
// LlmEngineBridge.mm
|
||||
#import "LlmEngineBridge.h"
|
||||
#include "litert_lm/engine.h"
|
||||
|
||||
@implementation LlmEngineBridge {
|
||||
std::unique_ptr<litert::lm::Engine> engine;
|
||||
}
|
||||
|
||||
- (BOOL)loadModel:(NSString *)path error:(NSError **)error {
|
||||
auto config = litert::lm::EngineConfig{
|
||||
.model_path = [path UTF8String]
|
||||
};
|
||||
auto result = litert::lm::Engine::Create(config);
|
||||
if (!result.ok()) {
|
||||
// Set error
|
||||
return NO;
|
||||
}
|
||||
engine = std::move(*result);
|
||||
return YES;
|
||||
}
|
||||
|
||||
@end
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- Full LiteRT-LM features (KV cache, tool use, multimodal)
|
||||
- Best performance (Metal/CoreML delegates)
|
||||
- Production-ready
|
||||
|
||||
**Cons:**
|
||||
- Requires Objective-C++ knowledge
|
||||
- More complex build setup
|
||||
- Bridge code maintenance
|
||||
|
||||
### Option 2: TensorFlowLiteSwift (Limited)
|
||||
|
||||
Use standard TensorFlow Lite Swift pod.
|
||||
|
||||
```ruby
|
||||
pod 'TensorFlowLiteSwift', '~> 2.16.0'
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- Pure Swift
|
||||
- Easy integration
|
||||
- Stable API
|
||||
|
||||
**Cons:**
|
||||
- ❌ No KV cache management
|
||||
- ❌ No conversation handling
|
||||
- ❌ No tool use support
|
||||
- ❌ No streaming generation
|
||||
- Manual tokenization required
|
||||
|
||||
**Verdict:** Not suitable for LLM chat apps.
|
||||
|
||||
### Option 3: Wait for Swift APIs
|
||||
|
||||
Monitor for official Swift API release:
|
||||
- https://ai.google.dev/edge/litert-lm
|
||||
- https://github.com/google-ai-edge/LiteRT-LM
|
||||
|
||||
**Timeline:** Unknown (marked as "coming soon" since 2024)
|
||||
|
||||
## What Works Now
|
||||
|
||||
The current implementation uses **stub/fallback mode**:
|
||||
- ✅ UI fully functional
|
||||
- ✅ Audio recording/playback
|
||||
- ✅ TTS
|
||||
- ✅ Web search
|
||||
- ✅ Model download
|
||||
- ✅ Conversation management
|
||||
- ❌ LLM inference (stubbed)
|
||||
|
||||
## To Enable Full LLM Support
|
||||
|
||||
### Step 1: Add C++ Bridge
|
||||
|
||||
1. Create bridging header:
|
||||
```bash
|
||||
# In your project
|
||||
touch SleepyAgent/Inference/Bridge/LlmEngineBridge.h
|
||||
touch SleepyAgent/Inference/Bridge/LlmEngineBridge.mm
|
||||
```
|
||||
|
||||
2. Download LiteRT-LM iOS binaries:
|
||||
```bash
|
||||
# From GitHub releases or build from source
|
||||
# https://github.com/google-ai-edge/LiteRT-LM/releases
|
||||
```
|
||||
|
||||
3. Link libraries:
|
||||
- `liblitert_lm.a` (static library)
|
||||
- `libtensorflow-lite.a`
|
||||
- Metal framework
|
||||
- CoreML framework
|
||||
|
||||
### Step 2: Update Build Settings
|
||||
|
||||
In Xcode:
|
||||
1. Set `Compile Sources As` to `Objective-C++` for .mm files
|
||||
2. Add header search paths for LiteRT-LM
|
||||
3. Link required frameworks
|
||||
|
||||
### Step 3: Implement Bridge Methods
|
||||
|
||||
See `LlmEngine.swift` TODO comments for specific methods to implement.
|
||||
|
||||
## Testing Without LLM
|
||||
|
||||
The app works in "demo mode" with stub responses. To test:
|
||||
1. Build and run
|
||||
2. Type any message
|
||||
3. See stub response about LiteRT-LM integration
|
||||
|
||||
## References
|
||||
|
||||
- **LiteRT-LM GitHub:** https://github.com/google-ai-edge/LiteRT-LM
|
||||
- **iOS C++ Guide:** https://ai.google.dev/edge/litert-lm/cpp
|
||||
- **CocoaPods:** https://cocoapods.org/pods/TensorFlowLiteSwift
|
||||
- **Models:** https://huggingface.co/litert-community
|
||||
- **Sample App:** https://github.com/google-ai-edge/gallery (AI Edge Gallery)
|
||||
|
||||
## Recommendation
|
||||
|
||||
For a developer build/demo:
|
||||
1. Use current stub implementation to test UI/features
|
||||
2. Add C++ bridge when ready for production LLM support
|
||||
3. Monitor for official Swift API release
|
||||
|
||||
The architecture is ready - just need the inference backend integration.
|
||||
@@ -1,5 +1,4 @@
|
||||
import Foundation
|
||||
import LiteRT
|
||||
import UIKit
|
||||
|
||||
// MARK: - Errors
|
||||
@@ -11,6 +10,7 @@ public enum LlmEngineError: LocalizedError {
|
||||
case generationFailed(underlying: Error)
|
||||
case invalidMultimodalInput
|
||||
case engineInitializationFailed(underlying: Error)
|
||||
case notImplemented
|
||||
|
||||
public var errorDescription: String? {
|
||||
switch self {
|
||||
@@ -26,34 +26,24 @@ public enum LlmEngineError: LocalizedError {
|
||||
return "Invalid multimodal input provided"
|
||||
case .engineInitializationFailed(let error):
|
||||
return "Failed to initialize engine: \(error.localizedDescription)"
|
||||
case .notImplemented:
|
||||
return "This feature requires LiteRT-LM C++ integration (Swift APIs coming soon)"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - LiteRT Conversation Wrapper
|
||||
// MARK: - Conversation
|
||||
|
||||
/// Wrapper for LiteRT Conversation to manage lifecycle
|
||||
/// Conversation wrapper for managing chat sessions
|
||||
/// Note: In full implementation, this wraps LiteRT-LM's Conversation object
|
||||
public final class Conversation: @unchecked Sendable {
|
||||
internal let liteRtConversation: LRTConversation
|
||||
private let lock = NSLock()
|
||||
private var isClosed = false
|
||||
public var isAlive: Bool = true
|
||||
internal var messageHistory: [(role: String, content: String)] = []
|
||||
|
||||
public var isAlive: Bool {
|
||||
lock.lock()
|
||||
defer { lock.unlock() }
|
||||
return !isClosed
|
||||
}
|
||||
|
||||
internal init(liteRtConversation: LRTConversation) {
|
||||
self.liteRtConversation = liteRtConversation
|
||||
}
|
||||
internal init() {}
|
||||
|
||||
public func close() {
|
||||
lock.lock()
|
||||
defer { lock.unlock() }
|
||||
guard !isClosed else { return }
|
||||
isClosed = true
|
||||
liteRtConversation.close()
|
||||
isAlive = false
|
||||
}
|
||||
|
||||
deinit {
|
||||
@@ -63,52 +53,86 @@ public final class Conversation: @unchecked Sendable {
|
||||
|
||||
// MARK: - LlmEngine Protocol
|
||||
|
||||
/// LLM Engine interface for text generation with optional multimodal inputs
|
||||
public protocol LlmEngine: Actor {
|
||||
/// Load a model from the given path
|
||||
var isLoaded: Bool { get }
|
||||
|
||||
func loadModel(path: String) async throws
|
||||
|
||||
/// Creates a new conversation with the given system prompt
|
||||
/// This should be called once per chat session to enable KV cache reuse
|
||||
func createConversation(systemPrompt: String) throws -> Conversation
|
||||
|
||||
/// Generate a response within an existing conversation
|
||||
/// This reuses the KV cache from previous turns
|
||||
func generate(
|
||||
conversation: Conversation,
|
||||
prompt: String,
|
||||
audioData: Data?,
|
||||
images: [UIImage]?
|
||||
) async throws -> String
|
||||
|
||||
/// Generate a streaming response within an existing conversation
|
||||
func generateStream(
|
||||
conversation: Conversation,
|
||||
prompt: String,
|
||||
audioData: Data?,
|
||||
images: [UIImage]?
|
||||
) -> AsyncThrowingStream<String, Error>
|
||||
|
||||
/// Check if a model is currently loaded
|
||||
func isLoaded() -> Bool
|
||||
|
||||
/// Unload the current model and free resources
|
||||
func unload()
|
||||
}
|
||||
|
||||
// MARK: - LiteRT-LM Engine Implementation
|
||||
|
||||
/// LiteRT-LM based LLM Engine implementation for Gemma models
|
||||
/// Uses .litert model format - download from HuggingFace LiteRT Community
|
||||
@globalActor
|
||||
/// LiteRT-LM based LLM Engine
|
||||
///
|
||||
/// # Important Implementation Note:
|
||||
///
|
||||
/// LiteRT-LM Swift APIs are "coming soon" per Google (as of 2025).
|
||||
/// Current iOS support requires using the C++ API directly with Objective-C++ bridging.
|
||||
///
|
||||
/// ## Integration Options:
|
||||
///
|
||||
/// ### Option 1: Use TensorFlowLiteSwift (Limited)
|
||||
/// Standard LiteRT pod works for basic inference but lacks LLM-specific features
|
||||
/// like KV cache management, conversation handling, and tool use.
|
||||
///
|
||||
/// ```ruby
|
||||
/// # Podfile
|
||||
/// pod 'TensorFlowLiteSwift', '~> 2.16.0'
|
||||
/// ```
|
||||
///
|
||||
/// ### Option 2: C++ Bridge (Full Features) ⭐ Recommended
|
||||
/// Use LiteRT-LM C++ API with Objective-C++ wrapper:
|
||||
///
|
||||
/// 1. Add C++ source files (.mm)
|
||||
/// 2. Include LiteRT-LM headers
|
||||
/// 3. Bridge to Swift via Objective-C
|
||||
///
|
||||
/// ```objc
|
||||
/// // LlmEngineBridge.h
|
||||
/// @interface LlmEngineBridge : NSObject
|
||||
/// - (BOOL)loadModel:(NSString *)path error:(NSError **)error;
|
||||
/// - (NSString *)generate:(NSString *)prompt;
|
||||
/// @end
|
||||
/// ```
|
||||
///
|
||||
/// ### Option 3: Wait for Swift APIs
|
||||
/// Google has announced Swift APIs are coming. Monitor:
|
||||
/// https://ai.google.dev/edge/litert-lm
|
||||
///
|
||||
/// ## Current Status:
|
||||
/// - Android: ✅ Full Kotlin support
|
||||
/// - iOS: ⚠️ C++ only (Swift APIs coming soon)
|
||||
/// - Models: ✅ Gemma 4 E2B/E4B available on HuggingFace
|
||||
///
|
||||
/// ## References:
|
||||
/// - LiteRT-LM GitHub: https://github.com/google-ai-edge/LiteRT-LM
|
||||
/// - iOS C++ Guide: https://ai.google.dev/edge/litert-lm/cpp
|
||||
/// - Models: https://huggingface.co/litert-community
|
||||
///
|
||||
public actor LiteRtLlmEngine: LlmEngine {
|
||||
public static let shared = LiteRtLlmEngine()
|
||||
|
||||
private var engine: LRTEngine?
|
||||
public private(set) var isLoaded: Bool = false
|
||||
private var currentModelPath: String?
|
||||
private var currentConversation: Conversation?
|
||||
|
||||
private let maxTokens = 16384
|
||||
private let cacheDirName = "litertlm_cache"
|
||||
|
||||
// TODO: Add actual LiteRT-LM C++ engine reference here
|
||||
// private var cppEngine: UnsafeMutableRawPointer?
|
||||
|
||||
private init() {}
|
||||
|
||||
@@ -117,48 +141,49 @@ public actor LiteRtLlmEngine: LlmEngine {
|
||||
public func loadModel(path: String) async throws {
|
||||
unload()
|
||||
|
||||
let modelFile = URL(fileURLWithPath: path)
|
||||
guard FileManager.default.fileExists(atPath: path) else {
|
||||
throw LlmEngineError.modelNotFound(path: path)
|
||||
}
|
||||
|
||||
// Ensure cache directory exists
|
||||
let cacheDir = FileManager.default.temporaryDirectory.appendingPathComponent(cacheDirName)
|
||||
try? FileManager.default.createDirectory(at: cacheDir, withIntermediateDirectories: true)
|
||||
// TODO: Implement actual LiteRT-LM loading
|
||||
//
|
||||
// Example C++ integration (in .mm file):
|
||||
//
|
||||
// #include "litert_lm/engine.h"
|
||||
//
|
||||
// auto config = litert::lm::EngineConfig{
|
||||
// .model_path = path.UTF8String,
|
||||
// .max_num_tokens = maxTokens
|
||||
// };
|
||||
//
|
||||
// auto engine = litert::lm::Engine::Create(config);
|
||||
// if (!engine.ok()) {
|
||||
// throw LlmEngineError.engineInitializationFailed(...)
|
||||
// }
|
||||
// cppEngine = engine->release();
|
||||
|
||||
do {
|
||||
let engineConfig = LRTEngineConfig(
|
||||
modelPath: path,
|
||||
backend: .cpu,
|
||||
visionBackend: .cpu,
|
||||
audioBackend: .cpu,
|
||||
maxNumTokens: maxTokens,
|
||||
cacheDir: cacheDir.path
|
||||
)
|
||||
// Stub: Simulate loading
|
||||
try await Task.sleep(nanoseconds: 500_000_000)
|
||||
|
||||
let newEngine = LRTEngine(config: engineConfig)
|
||||
try newEngine.initialize()
|
||||
|
||||
self.engine = newEngine
|
||||
self.currentModelPath = path
|
||||
|
||||
} catch {
|
||||
throw LlmEngineError.engineInitializationFailed(underlying: error)
|
||||
}
|
||||
self.isLoaded = true
|
||||
self.currentModelPath = path
|
||||
}
|
||||
|
||||
// MARK: - Conversation Management
|
||||
// MARK: - Conversation
|
||||
|
||||
public func createConversation(systemPrompt: String) throws -> Conversation {
|
||||
guard let engine = engine else {
|
||||
guard isLoaded else {
|
||||
throw LlmEngineError.modelNotLoaded
|
||||
}
|
||||
|
||||
let systemContent = LRTContent.text(systemPrompt)
|
||||
let conversationConfig = LRTConversationConfig(systemInstruction: systemContent)
|
||||
let conversation = Conversation()
|
||||
conversation.messageHistory.append(("system", systemPrompt))
|
||||
self.currentConversation = conversation
|
||||
|
||||
let liteRtConversation = engine.createConversation(config: conversationConfig)
|
||||
return Conversation(liteRtConversation: liteRtConversation)
|
||||
// TODO: Create actual LiteRT-LM conversation
|
||||
// auto conv = cppEngine->CreateConversation(config);
|
||||
|
||||
return conversation
|
||||
}
|
||||
|
||||
// MARK: - Generation
|
||||
@@ -173,9 +198,19 @@ public actor LiteRtLlmEngine: LlmEngine {
|
||||
throw LlmEngineError.conversationClosed
|
||||
}
|
||||
|
||||
let contents = try buildContents(prompt: prompt, audioData: audioData, images: images)
|
||||
let response = try conversation.liteRtConversation.sendMessage(contents)
|
||||
return response.stringValue ?? ""
|
||||
guard isLoaded else {
|
||||
throw LlmEngineError.modelNotLoaded
|
||||
}
|
||||
|
||||
// TODO: Implement actual generation
|
||||
//
|
||||
// C++ example:
|
||||
// auto contents = litert::lm::Contents::FromText(prompt.UTF8String);
|
||||
// auto response = conv->SendMessage(contents);
|
||||
// return [NSString stringWithUTF8String:response.text().c_str()];
|
||||
|
||||
// Stub response
|
||||
return "[STUB] LiteRT-LM Swift APIs are coming soon. Use C++ bridge for full functionality."
|
||||
}
|
||||
|
||||
public func generateStream(
|
||||
@@ -191,24 +226,25 @@ public actor LiteRtLlmEngine: LlmEngine {
|
||||
throw LlmEngineError.conversationClosed
|
||||
}
|
||||
|
||||
// For multimodal inputs, use Contents API (non-streaming for now)
|
||||
if audioData != nil || !(images?.isEmpty ?? true) {
|
||||
let contents = try buildContents(prompt: prompt, audioData: audioData, images: images)
|
||||
let response = try conversation.liteRtConversation.sendMessage(contents)
|
||||
if let text = response.stringValue {
|
||||
continuation.yield(text)
|
||||
}
|
||||
continuation.finish()
|
||||
return
|
||||
guard self.isLoaded else {
|
||||
throw LlmEngineError.modelNotLoaded
|
||||
}
|
||||
|
||||
// Text-only streaming - reuses KV cache
|
||||
let stream = conversation.liteRtConversation.sendMessageAsync(prompt)
|
||||
// TODO: Implement streaming with LiteRT-LM C++
|
||||
//
|
||||
// C++ example:
|
||||
// auto stream = conv->SendMessageAsync(contents);
|
||||
// for (const auto& token : stream) {
|
||||
// continuation.yield(...)
|
||||
// }
|
||||
|
||||
for try await message in stream {
|
||||
if let text = message.stringValue {
|
||||
continuation.yield(text)
|
||||
}
|
||||
// Stub: Simulate streaming
|
||||
let message = "LiteRT-LM on iOS currently requires C++ integration. Swift APIs are 'coming soon' per Google. See LlmEngine.swift comments for integration options."
|
||||
let words = message.split(separator: " ")
|
||||
|
||||
for word in words {
|
||||
continuation.yield(String(word) + " ")
|
||||
try await Task.sleep(nanoseconds: 50_000_000)
|
||||
}
|
||||
|
||||
continuation.finish()
|
||||
@@ -220,116 +256,75 @@ public actor LiteRtLlmEngine: LlmEngine {
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Utility Methods
|
||||
|
||||
public func isLoaded() -> Bool {
|
||||
engine != nil
|
||||
}
|
||||
// MARK: - Utility
|
||||
|
||||
public func unload() {
|
||||
engine?.close()
|
||||
engine = nil
|
||||
// TODO: Clean up C++ engine
|
||||
// if (cppEngine) {
|
||||
// delete static_cast<litert::lm::Engine*>(cppEngine);
|
||||
// cppEngine = nullptr;
|
||||
// }
|
||||
|
||||
isLoaded = false
|
||||
currentModelPath = nil
|
||||
currentConversation = nil
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - TensorFlowLiteSwift Alternative (Basic)
|
||||
|
||||
/// Alternative using standard TensorFlowLiteSwift
|
||||
/// Limited functionality - no KV cache, conversation management, or tool use
|
||||
///
|
||||
/// Use this if you need basic inference only:
|
||||
/// ```ruby
|
||||
/// pod 'TensorFlowLiteSwift', '~> 2.16.0'
|
||||
/// ```
|
||||
public actor TFLiteEngine: LlmEngine {
|
||||
public static let shared = TFLiteEngine()
|
||||
|
||||
public private(set) var isLoaded: Bool = false
|
||||
|
||||
// TODO: Add TFLInterpreter
|
||||
// private var interpreter: Interpreter?
|
||||
|
||||
public init() {}
|
||||
|
||||
public func loadModel(path: String) async throws {
|
||||
// TODO: Initialize TFLInterpreter
|
||||
// interpreter = try Interpreter(modelPath: path)
|
||||
// try interpreter?.allocateTensors()
|
||||
isLoaded = true
|
||||
}
|
||||
|
||||
// MARK: - Private Helpers
|
||||
public func createConversation(systemPrompt: String) throws -> Conversation {
|
||||
Conversation()
|
||||
}
|
||||
|
||||
private func buildContents(
|
||||
public func generate(
|
||||
conversation: Conversation,
|
||||
prompt: String,
|
||||
audioData: Data?,
|
||||
images: [UIImage]?
|
||||
) throws -> LRTContents {
|
||||
var contents: [LRTContent] = []
|
||||
|
||||
// Add images first if provided (max 1 for efficiency)
|
||||
if let images = images {
|
||||
for image in images.prefix(1) {
|
||||
if let resizedImage = resizeImage(image, maxSize: CGSize(width: 512, height: 512)),
|
||||
let jpegData = resizedImage.jpegData(compressionQuality: 0.85) {
|
||||
contents.append(LRTContent.imageData(jpegData))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add audio if provided
|
||||
if let audioData = audioData, audioData.count >= 6400 {
|
||||
// Assume audio is already in WAV format or convert if needed
|
||||
let wavData = isWavData(audioData) ? audioData : try convertPcmToWav(audioData)
|
||||
contents.append(LRTContent.audioData(wavData))
|
||||
}
|
||||
|
||||
// Add text prompt
|
||||
contents.append(LRTContent.text(prompt))
|
||||
|
||||
return LRTContents(contents: contents)
|
||||
) async throws -> String {
|
||||
// TODO: Basic TFLite inference
|
||||
// This won't work well for LLMs without proper tokenization
|
||||
throw LlmEngineError.notImplemented
|
||||
}
|
||||
|
||||
private func resizeImage(_ image: UIImage, maxSize: CGSize) -> UIImage? {
|
||||
let size = image.size
|
||||
|
||||
guard size.width > maxSize.width || size.height > maxSize.height else {
|
||||
return image
|
||||
public func generateStream(
|
||||
conversation: Conversation,
|
||||
prompt: String,
|
||||
audioData: Data?,
|
||||
images: [UIImage]?
|
||||
) -> AsyncThrowingStream<String, Error> {
|
||||
AsyncThrowingStream { continuation in
|
||||
continuation.finish(throwing: LlmEngineError.notImplemented)
|
||||
}
|
||||
|
||||
let widthRatio = maxSize.width / size.width
|
||||
let heightRatio = maxSize.height / size.height
|
||||
let ratio = min(widthRatio, heightRatio)
|
||||
|
||||
let newSize = CGSize(width: size.width * ratio, height: size.height * ratio)
|
||||
|
||||
UIGraphicsBeginImageContextWithOptions(newSize, false, 1.0)
|
||||
defer { UIGraphicsEndImageContext() }
|
||||
|
||||
image.draw(in: CGRect(origin: .zero, size: newSize))
|
||||
return UIGraphicsGetImageFromCurrentImageContext()
|
||||
}
|
||||
|
||||
private func isWavData(_ data: Data) -> Bool {
|
||||
// Check for WAV header: "RIFF" magic number
|
||||
guard data.count >= 12 else { return false }
|
||||
let header = data.prefix(4)
|
||||
return header.elementsEqual([0x52, 0x49, 0x46, 0x46]) // "RIFF"
|
||||
}
|
||||
|
||||
private func convertPcmToWav(_ pcmData: Data, sampleRate: Int32 = 16000, channels: UInt16 = 1) throws -> Data {
|
||||
var wavData = Data()
|
||||
|
||||
// RIFF header
|
||||
wavData.append("RIFF".data(using: .ascii)!)
|
||||
|
||||
// File size (will be filled later)
|
||||
let fileSize = UInt32(pcmData.count + 36)
|
||||
wavData.append(withUnsafeBytes(of: fileSize.littleEndian) { Data($0) })
|
||||
|
||||
// WAVE header
|
||||
wavData.append("WAVE".data(using: .ascii)!)
|
||||
|
||||
// fmt chunk
|
||||
wavData.append("fmt ".data(using: .ascii)!)
|
||||
let fmtChunkSize: UInt32 = 16
|
||||
wavData.append(withUnsafeBytes(of: fmtChunkSize.littleEndian) { Data($0) })
|
||||
|
||||
let audioFormat: UInt16 = 1 // PCM
|
||||
wavData.append(withUnsafeBytes(of: audioFormat.littleEndian) { Data($0) })
|
||||
|
||||
wavData.append(withUnsafeBytes(of: channels.littleEndian) { Data($0) })
|
||||
wavData.append(withUnsafeBytes(of: sampleRate.littleEndian) { Data($0) })
|
||||
|
||||
let byteRate = UInt32(sampleRate) * UInt32(channels) * 2 // 16-bit
|
||||
wavData.append(withUnsafeBytes(of: byteRate.littleEndian) { Data($0) })
|
||||
|
||||
let blockAlign = channels * 2
|
||||
wavData.append(withUnsafeBytes(of: blockAlign.littleEndian) { Data($0) })
|
||||
|
||||
let bitsPerSample: UInt16 = 16
|
||||
wavData.append(withUnsafeBytes(of: bitsPerSample.littleEndian) { Data($0) })
|
||||
|
||||
// data chunk
|
||||
wavData.append("data".data(using: .ascii)!)
|
||||
let dataChunkSize = UInt32(pcmData.count)
|
||||
wavData.append(withUnsafeBytes(of: dataChunkSize.littleEndian) { Data($0) })
|
||||
wavData.append(pcmData)
|
||||
|
||||
return wavData
|
||||
public func unload() {
|
||||
// interpreter = nil
|
||||
isLoaded = false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,12 @@ class MainViewModel: ObservableObject {
|
||||
@Published var conversations: [ConversationInfo] = []
|
||||
@Published var currentConversationId: UUID = UUID()
|
||||
|
||||
// Computed properties for view state
|
||||
var isLoading: Bool { isGenerating && currentResponse.isEmpty }
|
||||
var isProcessing: Bool { isGenerating }
|
||||
var isExecutingTool: Bool { false } // TODO: Implement tool execution state
|
||||
var streamingText: String { currentResponse }
|
||||
|
||||
private let agent: Agent
|
||||
private let audioRecorder: AudioRecorder
|
||||
private let ttsService: TtsService
|
||||
@@ -41,6 +47,11 @@ class MainViewModel: ObservableObject {
|
||||
await processTextMessage(text)
|
||||
}
|
||||
|
||||
func sendMessage(text: String) async {
|
||||
guard !text.isEmpty else { return }
|
||||
await processTextMessage(text)
|
||||
}
|
||||
|
||||
func sendImage(_ image: UIImage, text: String = "") async {
|
||||
selectedImage = image
|
||||
let displayText = text.isEmpty ? "[Image]" : text
|
||||
@@ -156,6 +167,11 @@ class MainViewModel: ObservableObject {
|
||||
errorMessage = message
|
||||
showError = true
|
||||
}
|
||||
|
||||
func dismissError() {
|
||||
showError = false
|
||||
errorMessage = nil
|
||||
}
|
||||
}
|
||||
|
||||
struct ConversationInfo: Identifiable {
|
||||
|
||||
Reference in New Issue
Block a user