45d43f2645
- LlmEngineBridge.h/.mm: Objective-C++ wrapper around LiteRT-LM C++ API - SleepyAgent-Bridging-Header.h: Swift bridging header - Updated LlmEngine.swift to use the bridge - Added LITERT_INTEGRATION.md with detailed research findings Based on analysis of Google's litert-samples repository: - Google uses C++ bridge pattern for iOS (confirmed in image_segmentation example) - MediaPipe has working Swift API but is deprecated - LiteRT-LM Swift APIs are 'coming soon' The bridge pattern matches how Google AI Edge Gallery iOS app is likely implemented
93 lines
2.9 KiB
Objective-C
93 lines
2.9 KiB
Objective-C
//
|
|
// LlmEngineBridge.h
|
|
// SleepyAgent
|
|
//
|
|
// Objective-C bridge to LiteRT-LM C++ API
|
|
//
|
|
|
|
#import <Foundation/Foundation.h>
|
|
|
|
NS_ASSUME_NONNULL_BEGIN
|
|
|
|
/// Error domain for LiteRT-LM bridge errors
|
|
extern NSString *const kLiteRTLMErrorDomain;
|
|
|
|
/// Accelerator options for model inference
|
|
typedef NS_ENUM(NSInteger, LiteRTAccelerator) {
|
|
LiteRTAcceleratorCPU = 0,
|
|
LiteRTAcceleratorMetal = 1,
|
|
LiteRTAcceleratorCoreML = 2
|
|
};
|
|
|
|
/// Represents a streaming response from the LLM
|
|
@interface LiteRTResponseStream : NSObject
|
|
|
|
/// Get the next chunk of the response (blocking)
|
|
/// Returns nil when stream is complete
|
|
- (nullable NSString *)nextChunk;
|
|
|
|
/// Check if the stream has more data
|
|
@property (nonatomic, readonly) BOOL hasMore;
|
|
|
|
/// Close the stream and release resources
|
|
- (void)close;
|
|
|
|
@end
|
|
|
|
/// Bridge class for LiteRT-LM LLM inference
|
|
/// Wraps the C++ LiteRT-LM API for use in Swift
|
|
@interface LlmEngineBridge : NSObject
|
|
|
|
- (instancetype)init NS_UNAVAILABLE;
|
|
|
|
/// Initialize the LLM engine with a model file
|
|
/// @param modelPath Path to the .litertlm model file
|
|
/// @param accelerator Hardware accelerator to use (CPU, Metal, CoreML)
|
|
/// @param error Error pointer for initialization failures
|
|
/// @return Initialized engine bridge or nil on error
|
|
- (nullable instancetype)initWithModelPath:(NSString *)modelPath
|
|
accelerator:(LiteRTAccelerator)accelerator
|
|
error:(NSError **)error NS_DESIGNATED_INITIALIZER;
|
|
|
|
/// Generate a response for a single prompt (non-streaming)
|
|
/// @param prompt The user's input text
|
|
/// @param error Error pointer for generation failures
|
|
/// @return The generated response or nil on error
|
|
- (nullable NSString *)generateResponse:(NSString *)prompt
|
|
error:(NSError **)error;
|
|
|
|
/// Generate a streaming response
|
|
/// @param prompt The user's input text
|
|
/// @param error Error pointer for generation failures
|
|
/// @return A stream object to read response chunks
|
|
- (nullable LiteRTResponseStream *)generateResponseStream:(NSString *)prompt
|
|
error:(NSError **)error;
|
|
|
|
/// Add a message to the conversation history
|
|
/// This maintains context for multi-turn conversations (KV cache)
|
|
/// @param message The message text
|
|
/// @param role The role ("user" or "assistant")
|
|
- (void)addToHistory:(NSString *)message
|
|
role:(NSString *)role;
|
|
|
|
/// Clear the conversation history and reset KV cache
|
|
- (void)clearHistory;
|
|
|
|
/// Check if the engine is initialized and ready
|
|
@property (nonatomic, readonly) BOOL isReady;
|
|
|
|
/// Get the maximum number of tokens the model supports
|
|
@property (nonatomic, readonly) NSInteger maxTokens;
|
|
|
|
/// Estimate the number of tokens in a string
|
|
/// @param text The text to measure
|
|
/// @return Token count or -1 if estimation fails
|
|
- (NSInteger)estimateTokens:(NSString *)text;
|
|
|
|
/// Close the engine and release all resources
|
|
- (void)close;
|
|
|
|
@end
|
|
|
|
NS_ASSUME_NONNULL_END
|