Files
sleepy_agent_ios/SleepyAgent/Inference/Bridge/LlmEngineBridge.h
T
sleepy 45d43f2645 Add Objective-C++ bridge for LiteRT-LM integration
- LlmEngineBridge.h/.mm: Objective-C++ wrapper around LiteRT-LM C++ API
- SleepyAgent-Bridging-Header.h: Swift bridging header
- Updated LlmEngine.swift to use the bridge
- Added LITERT_INTEGRATION.md with detailed research findings

Based on analysis of Google's litert-samples repository:
- Google uses C++ bridge pattern for iOS (confirmed in image_segmentation example)
- MediaPipe has working Swift API but is deprecated
- LiteRT-LM Swift APIs are 'coming soon'

The bridge pattern matches how Google AI Edge Gallery iOS app is likely implemented
2026-04-06 14:54:06 +02:00

93 lines
2.9 KiB
Objective-C

//
// LlmEngineBridge.h
// SleepyAgent
//
// Objective-C bridge to LiteRT-LM C++ API
//
#import <Foundation/Foundation.h>
NS_ASSUME_NONNULL_BEGIN
/// Error domain for LiteRT-LM bridge errors
extern NSString *const kLiteRTLMErrorDomain;
/// Accelerator options for model inference
typedef NS_ENUM(NSInteger, LiteRTAccelerator) {
LiteRTAcceleratorCPU = 0,
LiteRTAcceleratorMetal = 1,
LiteRTAcceleratorCoreML = 2
};
/// Represents a streaming response from the LLM
@interface LiteRTResponseStream : NSObject
/// Get the next chunk of the response (blocking)
/// Returns nil when stream is complete
- (nullable NSString *)nextChunk;
/// Check if the stream has more data
@property (nonatomic, readonly) BOOL hasMore;
/// Close the stream and release resources
- (void)close;
@end
/// Bridge class for LiteRT-LM LLM inference
/// Wraps the C++ LiteRT-LM API for use in Swift
@interface LlmEngineBridge : NSObject
- (instancetype)init NS_UNAVAILABLE;
/// Initialize the LLM engine with a model file
/// @param modelPath Path to the .litertlm model file
/// @param accelerator Hardware accelerator to use (CPU, Metal, CoreML)
/// @param error Error pointer for initialization failures
/// @return Initialized engine bridge or nil on error
- (nullable instancetype)initWithModelPath:(NSString *)modelPath
accelerator:(LiteRTAccelerator)accelerator
error:(NSError **)error NS_DESIGNATED_INITIALIZER;
/// Generate a response for a single prompt (non-streaming)
/// @param prompt The user's input text
/// @param error Error pointer for generation failures
/// @return The generated response or nil on error
- (nullable NSString *)generateResponse:(NSString *)prompt
error:(NSError **)error;
/// Generate a streaming response
/// @param prompt The user's input text
/// @param error Error pointer for generation failures
/// @return A stream object to read response chunks
- (nullable LiteRTResponseStream *)generateResponseStream:(NSString *)prompt
error:(NSError **)error;
/// Add a message to the conversation history
/// This maintains context for multi-turn conversations (KV cache)
/// @param message The message text
/// @param role The role ("user" or "assistant")
- (void)addToHistory:(NSString *)message
role:(NSString *)role;
/// Clear the conversation history and reset KV cache
- (void)clearHistory;
/// Check if the engine is initialized and ready
@property (nonatomic, readonly) BOOL isReady;
/// Get the maximum number of tokens the model supports
@property (nonatomic, readonly) NSInteger maxTokens;
/// Estimate the number of tokens in a string
/// @param text The text to measure
/// @return Token count or -1 if estimation fails
- (NSInteger)estimateTokens:(NSString *)text;
/// Close the engine and release all resources
- (void)close;
@end
NS_ASSUME_NONNULL_END