// // LlmEngineBridge.h // SleepyAgent // // Objective-C bridge to LiteRT-LM C++ API // #import NS_ASSUME_NONNULL_BEGIN /// Error domain for LiteRT-LM bridge errors extern NSString *const kLiteRTLMErrorDomain; /// Accelerator options for model inference typedef NS_ENUM(NSInteger, LiteRTAccelerator) { LiteRTAcceleratorCPU = 0, LiteRTAcceleratorMetal = 1, LiteRTAcceleratorCoreML = 2 }; /// Represents a streaming response from the LLM @interface LiteRTResponseStream : NSObject /// Get the next chunk of the response (blocking) /// Returns nil when stream is complete - (nullable NSString *)nextChunk; /// Check if the stream has more data @property (nonatomic, readonly) BOOL hasMore; /// Close the stream and release resources - (void)close; @end /// Bridge class for LiteRT-LM LLM inference /// Wraps the C++ LiteRT-LM API for use in Swift @interface LlmEngineBridge : NSObject - (instancetype)init NS_UNAVAILABLE; /// Initialize the LLM engine with a model file /// @param modelPath Path to the .litertlm model file /// @param accelerator Hardware accelerator to use (CPU, Metal, CoreML) /// @param error Error pointer for initialization failures /// @return Initialized engine bridge or nil on error - (nullable instancetype)initWithModelPath:(NSString *)modelPath accelerator:(LiteRTAccelerator)accelerator error:(NSError **)error NS_DESIGNATED_INITIALIZER; /// Generate a response for a single prompt (non-streaming) /// @param prompt The user's input text /// @param error Error pointer for generation failures /// @return The generated response or nil on error - (nullable NSString *)generateResponse:(NSString *)prompt error:(NSError **)error; /// Generate a streaming response /// @param prompt The user's input text /// @param error Error pointer for generation failures /// @return A stream object to read response chunks - (nullable LiteRTResponseStream *)generateResponseStream:(NSString *)prompt error:(NSError **)error; /// Add a message to the conversation history /// This maintains context for multi-turn conversations (KV cache) /// @param message The message text /// @param role The role ("user" or "assistant") - (void)addToHistory:(NSString *)message role:(NSString *)role; /// Clear the conversation history and reset KV cache - (void)clearHistory; /// Check if the engine is initialized and ready @property (nonatomic, readonly) BOOL isReady; /// Get the maximum number of tokens the model supports @property (nonatomic, readonly) NSInteger maxTokens; /// Estimate the number of tokens in a string /// @param text The text to measure /// @return Token count or -1 if estimation fails - (NSInteger)estimateTokens:(NSString *)text; /// Close the engine and release all resources - (void)close; @end NS_ASSUME_NONNULL_END