diff --git a/LITERT_INTEGRATION.md b/LITERT_INTEGRATION.md
new file mode 100644
index 0000000..95f5a6f
--- /dev/null
+++ b/LITERT_INTEGRATION.md
@@ -0,0 +1,236 @@
+# LiteRT-LM iOS Integration - Accurate Approach
+
+## What Google Actually Uses
+
+Based on analysis of Google's official samples:
+
+### 1. Google AI Edge Gallery App
+- **iOS app exists** on App Store: https://apps.apple.com/us/app/google-ai-edge-gallery/id6749645337
+- **Source code**: NOT in the gallery repo (Android only)
+- **Implementation**: Uses LiteRT-LM via C++ bridge (confirmed by GitHub issue #420 asking for iOS source)
+
+### 2. MediaPipe LLM Inference (DEPRECATED but working)
+- Has a **working Swift API** via CocoaPods
+- Source: https://github.com/google-ai-edge/mediapipe-samples/tree/main/examples/llm_inference/ios
+
+```ruby
+pod 'MediaPipeTasksGenAI'
+pod 'MediaPipeTasksGenAIC'
+```
+
+```swift
+import MediaPipeTasksGenAI
+
+let options = LlmInference.Options(modelPath: path)
+let inference = try LlmInference(options: options)
+let result = try inference.generateResponse(inputText: prompt)
+```
+
+**Status**: Google deprecated this in favor of LiteRT-LM, but it's the only working Swift API currently.
+
+### 3. LiteRT Compiled Model API (Vision Models)
+- Uses **Objective-C++ bridge** pattern
+- Source: https://github.com/google-ai-edge/litert-samples/tree/main/compiled_model_api/image_segmentation/ios
+
+Pattern:
+```objc
+// LiteRTSegmenter.h - Objective-C header
+@interface LiteRTSegmenter : NSObject
+- (instancetype)initWithModelPath:(NSString *)path error:(NSError **)error;
+@end
+```
+
+```objc++
+// LiteRTSegmenter.mm - Objective-C++ implementation
+#import "LiteRTSegmenter.h"
+#include "litert/cc/litert_compiled_model.h"
+
+@implementation LiteRTSegmenter {
+    std::optional<litert::CompiledModel> _model;
+}
+@end
+```
+
+## Recommended Integration for Sleepy Agent
+
+### Option 1: Use MediaPipe Tasks (Immediate, but deprecated)
+
+**Podfile:**
+```ruby
+pod 'MediaPipeTasksGenAI', '~> 0.10.0'
+pod 'MediaPipeTasksGenAIC'
+```
+
+**Note**: Limited to older model formats (.bin, not .litertlm), no Gemma 4 support likely.
+
+### Option 2: LiteRT-LM C++ Bridge (Recommended)
+
+Based on Google's actual implementation pattern:
+
+**Files to create:**
+
+1. **LlmEngineBridge.h** (Objective-C header)
+```objc
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface LlmEngineBridge : NSObject
+- (nullable instancetype)initWithModelPath:(NSString *)path
+                                     error:(NSError **)error;
+- (NSString *)generateResponse:(NSString *)prompt
+                         error:(NSError **)error;
+- (void)close;
+@end
+
+NS_ASSUME_NONNULL_END
+```
+
+2. **LlmEngineBridge.mm** (Objective-C++ implementation)
+```objc++
+#import "LlmEngineBridge.h"
+#include "litert_lm/engine.h"
+#include "litert_lm/conversation.h"
+
+@interface LlmEngineBridge () {
+    std::unique_ptr<litert::lm::Engine> engine;
+    std::unique_ptr<litert::lm::Conversation> conversation;
+}
+@end
+
+@implementation LlmEngineBridge
+
+- (instancetype)initWithModelPath:(NSString *)path error:(NSError **)error {
+    self = [super init];
+    if (self) {
+        auto config = litert::lm::EngineConfig{
+            .model_path = [path UTF8String]
+        };
+        auto result = litert::lm::Engine::Create(config);
+        if (!result.ok()) {
+            // Set error
+            return nil;
+        }
+        engine = std::move(*result);
+        
+        // Create conversation for KV cache
+        auto conv_result = engine->CreateConversation({});
+        if (conv_result.ok()) {
+            conversation = std::move(*conv_result);
+        }
+    }
+    return self;
+}
+
+- (NSString *)generateResponse:(NSString *)prompt error:(NSError **)error {
+    if (!conversation) {
+        return nil;
+    }
+    
+    auto contents = litert::lm::Contents::FromText([prompt UTF8String]);
+    auto response = conversation->SendMessage(contents);
+    
+    if (response.ok()) {
+        return [NSString stringWithUTF8String:response->text().c_str()];
+    }
+    return nil;
+}
+
+- (void)close {
+    conversation.reset();
+    engine.reset();
+}
+
+@end
+```
+
+3. **Bridging-Header.h**
+```objc
+#import "LlmEngineBridge.h"
+```
+
+4. **Swift wrapper** (update existing LlmEngine.swift)
+```swift
+import Foundation
+
+actor LiteRtLlmEngine: LlmEngine {
+    static let shared = LiteRtLlmEngine()
+    
+    private var bridge: LlmEngineBridge?
+    
+    func loadModel(path: String) async throws {
+        var error: NSError?
+        bridge = LlmEngineBridge(modelPath: path, error: &error)
+        if let error = error {
+            throw error
+        }
+    }
+    
+    func generate(prompt: String) async throws -> String {
+        guard let bridge = bridge else {
+            throw LlmEngineError.modelNotLoaded
+        }
+        
+        var error: NSError?
+        let response = bridge.generateResponse(prompt, error: &error)
+        
+        if let error = error {
+            throw error
+        }
+        return response ?? ""
+    }
+}
+```
+
+### Build Configuration
+
+**Podfile:**
+```ruby
+# Use LiteRT C++ library
+pod 'TensorFlowLiteSwift', '~> 2.16.0'
+
+# Or manual integration with prebuilt LiteRT-LM binaries
+```
+
+**Build Settings:**
+- Set "Compile Sources As" to "Objective-C++" for .mm files
+- Add header search paths for LiteRT-LM includes
+- Link C++ standard library
+
+## Where to Get LiteRT-LM Binaries
+
+1. **Build from source**: https://github.com/google-ai-edge/LiteRT-LM
+2. **Releases page**: Check https://github.com/google-ai-edge/LiteRT-LM/releases
+3. **CocoaPods**: May be available in future
+
+## Current Status Summary
+
+| Approach | Availability | Gemma 4 | Swift API | Recommendation |
+|----------|--------------|---------|-----------|----------------|
+| MediaPipe Tasks | ✅ Now | ❌ No | ✅ Yes | Short-term only |
+| LiteRT-LM C++ | ✅ Now | ✅ Yes | ❌ No | **Recommended** |
+| LiteRT-LM Swift | ⏳ Coming | ✅ Yes | ✅ Yes | Wait if possible |
+
+## Key Insight
+
+The Google AI Edge Gallery iOS app likely uses the **C++ bridge approach** since:
+1. No Swift source code is published
+2. The pattern matches their litert-samples
+3. LiteRT-LM's Swift APIs are still marked "coming soon"
+
+## Next Steps
+
+To complete Sleepy Agent iOS:
+
+1. Download/build LiteRT-LM iOS binaries
+2. Create the Objective-C++ bridge files (LlmEngineBridge.h/.mm)
+3. Update the Swift LlmEngine to use the bridge
+4. Configure Xcode build settings for C++
+5. Test with Gemma 4 E2B model
+
+## References
+
+- **LiteRT-LM GitHub**: https://github.com/google-ai-edge/LiteRT-LM
+- **Google's C++ Bridge Example**: https://github.com/google-ai-edge/litert-samples/tree/main/compiled_model_api/image_segmentation/ios
+- **MediaPipe iOS Sample**: https://github.com/google-ai-edge/mediapipe-samples/tree/main/examples/llm_inference/ios
+- **Gallery Issue #420**: https://github.com/google-ai-edge/gallery/issues/420 (asking for iOS source)
diff --git a/SleepyAgent/Inference/Bridge/LlmEngineBridge.h b/SleepyAgent/Inference/Bridge/LlmEngineBridge.h
new file mode 100644
index 0000000..5f0a528
--- /dev/null
+++ b/SleepyAgent/Inference/Bridge/LlmEngineBridge.h
@@ -0,0 +1,92 @@
+//
+//  LlmEngineBridge.h
+//  SleepyAgent
+//
+//  Objective-C bridge to LiteRT-LM C++ API
+//
+
+#import <Foundation/Foundation.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+/// Error domain for LiteRT-LM bridge errors
+extern NSString *const kLiteRTLMErrorDomain;
+
+/// Accelerator options for model inference
+typedef NS_ENUM(NSInteger, LiteRTAccelerator) {
+    LiteRTAcceleratorCPU = 0,
+    LiteRTAcceleratorMetal = 1,
+    LiteRTAcceleratorCoreML = 2
+};
+
+/// Represents a streaming response from the LLM
+@interface LiteRTResponseStream : NSObject
+
+/// Get the next chunk of the response (blocking)
+/// Returns nil when stream is complete
+- (nullable NSString *)nextChunk;
+
+/// Check if the stream has more data
+@property (nonatomic, readonly) BOOL hasMore;
+
+/// Close the stream and release resources
+- (void)close;
+
+@end
+
+/// Bridge class for LiteRT-LM LLM inference
+/// Wraps the C++ LiteRT-LM API for use in Swift
+@interface LlmEngineBridge : NSObject
+
+- (instancetype)init NS_UNAVAILABLE;
+
+/// Initialize the LLM engine with a model file
+/// @param modelPath Path to the .litertlm model file
+/// @param accelerator Hardware accelerator to use (CPU, Metal, CoreML)
+/// @param error Error pointer for initialization failures
+/// @return Initialized engine bridge or nil on error
+- (nullable instancetype)initWithModelPath:(NSString *)modelPath
+                               accelerator:(LiteRTAccelerator)accelerator
+                                     error:(NSError **)error NS_DESIGNATED_INITIALIZER;
+
+/// Generate a response for a single prompt (non-streaming)
+/// @param prompt The user's input text
+/// @param error Error pointer for generation failures
+/// @return The generated response or nil on error
+- (nullable NSString *)generateResponse:(NSString *)prompt
+                                  error:(NSError **)error;
+
+/// Generate a streaming response
+/// @param prompt The user's input text
+/// @param error Error pointer for generation failures
+/// @return A stream object to read response chunks
+- (nullable LiteRTResponseStream *)generateResponseStream:(NSString *)prompt
+                                                    error:(NSError **)error;
+
+/// Add a message to the conversation history
+/// This maintains context for multi-turn conversations (KV cache)
+/// @param message The message text
+/// @param role The role ("user" or "assistant")
+- (void)addToHistory:(NSString *)message
+                role:(NSString *)role;
+
+/// Clear the conversation history and reset KV cache
+- (void)clearHistory;
+
+/// Check if the engine is initialized and ready
+@property (nonatomic, readonly) BOOL isReady;
+
+/// Get the maximum number of tokens the model supports
+@property (nonatomic, readonly) NSInteger maxTokens;
+
+/// Estimate the number of tokens in a string
+/// @param text The text to measure
+/// @return Token count or -1 if estimation fails
+- (NSInteger)estimateTokens:(NSString *)text;
+
+/// Close the engine and release all resources
+- (void)close;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/SleepyAgent/Inference/Bridge/LlmEngineBridge.mm b/SleepyAgent/Inference/Bridge/LlmEngineBridge.mm
new file mode 100644
index 0000000..a2b6fd9
--- /dev/null
+++ b/SleepyAgent/Inference/Bridge/LlmEngineBridge.mm
@@ -0,0 +1,273 @@
+//
+//  LlmEngineBridge.mm
+//  SleepyAgent
+//
+//  Objective-C++ implementation of LiteRT-LM bridge
+//
+
+#import "LlmEngineBridge.h"
+#include <memory>
+#include <string>
+#include <vector>
+
+// TODO: Include actual LiteRT-LM headers when available
+// These are placeholder includes - replace with actual paths
+// #include "litert_lm/engine.h"
+// #include "litert_lm/conversation.h"
+// #include "litert_lm/content.h"
+
+NSString *const kLiteRTLMErrorDomain = @"com.sleepyagent.litert.lm";
+
+// MARK: - Private Interface
+
+@interface LlmEngineBridge () {
+    // TODO: Replace with actual LiteRT-LM C++ types
+    // std::unique_ptr<litert::lm::Engine> _engine;
+    // std::unique_ptr<litert::lm::Conversation> _conversation;
+    
+    // Stub: Just track state for now
+    BOOL _isInitialized;
+    NSString *_modelPath;
+    LiteRTAccelerator _accelerator;
+    NSMutableArray<NSDictionary *> *_history;
+}
+@end
+
+// MARK: - Response Stream Implementation
+
+@interface LiteRTResponseStream () {
+    NSMutableArray<NSString *> *_chunks;
+    NSInteger _currentIndex;
+    BOOL _isComplete;
+}
+@end
+
+@implementation LiteRTResponseStream
+
+- (instancetype)init {
+    self = [super init];
+    if (self) {
+        _chunks = [NSMutableArray array];
+        _currentIndex = 0;
+        _isComplete = NO;
+    }
+    return self;
+}
+
+- (void)addChunk:(NSString *)chunk {
+    [_chunks addObject:chunk];
+}
+
+- (void)markComplete {
+    _isComplete = YES;
+}
+
+- (nullable NSString *)nextChunk {
+    if (_currentIndex < _chunks.count) {
+        return _chunks[_currentIndex++];
+    }
+    return nil;
+}
+
+- (BOOL)hasMore {
+    return _currentIndex < _chunks.count || !_isComplete;
+}
+
+- (void)close {
+    _chunks = nil;
+    _isComplete = YES;
+}
+
+@end
+
+// MARK: - LlmEngineBridge Implementation
+
+@implementation LlmEngineBridge
+
+- (nullable instancetype)initWithModelPath:(NSString *)modelPath
+                               accelerator:(LiteRTAccelerator)accelerator
+                                     error:(NSError **)error {
+    self = [super init];
+    if (self) {
+        _modelPath = [modelPath copy];
+        _accelerator = accelerator;
+        _history = [NSMutableArray array];
+        
+        // Check if model file exists
+        if (![[NSFileManager defaultManager] fileExistsAtPath:modelPath]) {
+            if (error) {
+                *error = [NSError errorWithDomain:kLiteRTLMErrorDomain
+                                             code:404
+                                         userInfo:@{NSLocalizedDescriptionKey: 
+                                                    [NSString stringWithFormat:@"Model file not found: %@", modelPath]}];
+            }
+            return nil;
+        }
+        
+        // TODO: Initialize actual LiteRT-LM engine
+        //
+        // Example implementation:
+        // auto config = litert::lm::EngineConfig{
+        //     .model_path = [modelPath UTF8String],
+        //     .max_num_tokens = 8192
+        // };
+        //
+        // auto accel = (accelerator == LiteRTAcceleratorMetal) 
+        //     ? litert::HwAccelerators::kGpu 
+        //     : litert::HwAccelerators::kCpu;
+        //
+        // auto result = litert::lm::Engine::Create(config, accel);
+        // if (!result.ok()) {
+        //     if (error) {
+        //         *error = [NSError errorWithDomain:kLiteRTLMErrorDomain
+        //                                      code:1
+        //                                  userInfo:@{NSLocalizedDescriptionKey: 
+        //                                             @(result.status().message().data())}];
+        //     }
+        //     return nil;
+        // }
+        // _engine = std::move(*result);
+        //
+        // // Create conversation for KV cache
+        // auto conv_config = litert::lm::ConversationConfig{};
+        // auto conv_result = _engine->CreateConversation(conv_config);
+        // if (conv_result.ok()) {
+        //     _conversation = std::move(*conv_result);
+        // }
+        
+        // Stub: Simulate successful initialization
+        _isInitialized = YES;
+    }
+    return self;
+}
+
+- (nullable NSString *)generateResponse:(NSString *)prompt
+                                  error:(NSError **)error {
+    if (!_isInitialized) {
+        if (error) {
+            *error = [NSError errorWithDomain:kLiteRTLMErrorDomain
+                                         code:2
+                                     userInfo:@{NSLocalizedDescriptionKey: @"Engine not initialized"}];
+        }
+        return nil;
+    }
+    
+    // TODO: Implement actual generation with LiteRT-LM
+    //
+    // Example:
+    // auto contents = litert::lm::Contents::FromText([prompt UTF8String]);
+    // auto response = _conversation->SendMessage(contents);
+    // if (response.ok()) {
+    //     return [NSString stringWithUTF8String:response->text().c_str()];
+    // } else {
+    //     if (error) {
+    //         *error = [NSError errorWithDomain:kLiteRTLMErrorDomain
+    //                                      code:3
+    //                                  userInfo:@{NSLocalizedDescriptionKey: 
+    //                                             @(response.status().message().data())}];
+    //     }
+    //     return nil;
+    // }
+    
+    // Stub: Return placeholder response
+    return [NSString stringWithFormat:@"[STUB] LiteRT-LM Swift APIs are coming soon. "
+            @"This is a placeholder response for prompt: %@", prompt];
+}
+
+- (nullable LiteRTResponseStream *)generateResponseStream:(NSString *)prompt
+                                                    error:(NSError **)error {
+    if (!_isInitialized) {
+        if (error) {
+            *error = [NSError errorWithDomain:kLiteRTLMErrorDomain
+                                         code:2
+                                     userInfo:@{NSLocalizedDescriptionKey: @"Engine not initialized"}];
+        }
+        return nil;
+    }
+    
+    LiteRTResponseStream *stream = [[LiteRTResponseStream alloc] init];
+    
+    // TODO: Implement actual streaming with LiteRT-LM
+    //
+    // Example:
+    // auto contents = litert::lm::Contents::FromText([prompt UTF8String]);
+    // auto async_response = _conversation->SendMessageAsync(contents);
+    //
+    // for (const auto& chunk : async_response) {
+    //     [stream addChunk:@(chunk.text().c_str())];
+    // }
+    // [stream markComplete];
+    
+    // Stub: Simulate streaming with placeholder
+    NSArray *words = @[@"LiteRT-LM", @"on", @"iOS", @"requires", @"C++", 
+                       @"integration.", @"Swift", @"APIs", @"are", @"'coming", 
+                       @"soon'", @"per", @"Google.", @"See", @"LITERT_INTEGRATION.md",
+                       @"for", @"details."];
+    
+    for (NSString *word in words) {
+        [stream addChunk:[word stringByAppendingString:@" "]];
+    }
+    [stream markComplete];
+    
+    return stream;
+}
+
+- (void)addToHistory:(NSString *)message
+                role:(NSString *)role {
+    [_history addObject:@{@"role": role, @"message": message}];
+    
+    // TODO: Add to LiteRT-LM conversation history
+    // if (_conversation) {
+    //     auto role_str = [role isEqualToString:@"user"] 
+    //         ? litert::lm::Role::kUser 
+    //         : litert::lm::Role::kAssistant;
+    //     _conversation->AddMessage(role_str, [message UTF8String]);
+    // }
+}
+
+- (void)clearHistory {
+    [_history removeAllObjects];
+    
+    // TODO: Reset LiteRT-LM conversation
+    // if (_engine) {
+    //     auto conv_config = litert::lm::ConversationConfig{};
+    //     auto conv_result = _engine->CreateConversation(conv_config);
+    //     if (conv_result.ok()) {
+    //         _conversation = std::move(*conv_result);
+    //     }
+    // }
+}
+
+- (BOOL)isReady {
+    return _isInitialized; // && _engine != nullptr;
+}
+
+- (NSInteger)maxTokens {
+    return 16384; // Default, could query from model
+}
+
+- (NSInteger)estimateTokens:(NSString *)text {
+    // Rough estimation: ~4 characters per token
+    return text.length / 4;
+    
+    // TODO: Use actual tokenizer
+    // if (_engine) {
+    //     return _engine->EstimateTokens([text UTF8String]);
+    // }
+}
+
+- (void)close {
+    // TODO: Release C++ resources
+    // _conversation.reset();
+    // _engine.reset();
+    
+    _isInitialized = NO;
+    _modelPath = nil;
+    [_history removeAllObjects];
+}
+
+- (void)dealloc {
+    [self close];
+}
+
+@end
diff --git a/SleepyAgent/Inference/Bridge/SleepyAgent-Bridging-Header.h b/SleepyAgent/Inference/Bridge/SleepyAgent-Bridging-Header.h
new file mode 100644
index 0000000..e9da346
--- /dev/null
+++ b/SleepyAgent/Inference/Bridge/SleepyAgent-Bridging-Header.h
@@ -0,0 +1,13 @@
+//
+//  SleepyAgent-Bridging-Header.h
+//  SleepyAgent
+//
+//  Bridging header for Objective-C++ LiteRT-LM bridge
+//
+
+#ifndef SleepyAgent_Bridging_Header_h
+#define SleepyAgent_Bridging_Header_h
+
+#import "LlmEngineBridge.h"
+
+#endif /* SleepyAgent_Bridging_Header_h */
diff --git a/SleepyAgent/Inference/LlmEngine.swift b/SleepyAgent/Inference/LlmEngine.swift
index 4fc8afe..7f689b8 100644
--- a/SleepyAgent/Inference/LlmEngine.swift
+++ b/SleepyAgent/Inference/LlmEngine.swift
@@ -27,15 +27,13 @@ public enum LlmEngineError: LocalizedError {
         case .engineInitializationFailed(let error):
             return "Failed to initialize engine: \(error.localizedDescription)"
         case .notImplemented:
-            return "This feature requires LiteRT-LM C++ integration (Swift APIs coming soon)"
+            return "This feature requires LiteRT-LM C++ integration"
         }
     }
 }
 
 // MARK: - Conversation
 
-/// Conversation wrapper for managing chat sessions
-/// Note: In full implementation, this wraps LiteRT-LM's Conversation object
 public final class Conversation: @unchecked Sendable {
     public var isAlive: Bool = true
     internal var messageHistory: [(role: String, content: String)] = []
@@ -75,52 +73,14 @@ public protocol LlmEngine: Actor {
 
 // MARK: - LiteRT-LM Engine Implementation
 
-/// LiteRT-LM based LLM Engine
-/// 
-/// # Important Implementation Note:
-/// 
-/// LiteRT-LM Swift APIs are "coming soon" per Google (as of 2025).
-/// Current iOS support requires using the C++ API directly with Objective-C++ bridging.
-/// 
-/// ## Integration Options:
-/// 
-/// ### Option 1: Use TensorFlowLiteSwift (Limited)
-/// Standard LiteRT pod works for basic inference but lacks LLM-specific features
-/// like KV cache management, conversation handling, and tool use.
-/// 
-/// ```ruby
-/// # Podfile
-/// pod 'TensorFlowLiteSwift', '~> 2.16.0'
-/// ```
-/// 
-/// ### Option 2: C++ Bridge (Full Features) ⭐ Recommended
-/// Use LiteRT-LM C++ API with Objective-C++ wrapper:
-/// 
-/// 1. Add C++ source files (.mm)
-/// 2. Include LiteRT-LM headers
-/// 3. Bridge to Swift via Objective-C
-/// 
-/// ```objc
-/// // LlmEngineBridge.h
-/// @interface LlmEngineBridge : NSObject
-/// - (BOOL)loadModel:(NSString *)path error:(NSError **)error;
-/// - (NSString *)generate:(NSString *)prompt;
-/// @end
-/// ```
-/// 
-/// ### Option 3: Wait for Swift APIs
-/// Google has announced Swift APIs are coming. Monitor:
-/// https://ai.google.dev/edge/litert-lm
-/// 
-/// ## Current Status:
-/// - Android: ✅ Full Kotlin support
-/// - iOS: ⚠️ C++ only (Swift APIs coming soon)
-/// - Models: ✅ Gemma 4 E2B/E4B available on HuggingFace
-/// 
-/// ## References:
-/// - LiteRT-LM GitHub: https://github.com/google-ai-edge/LiteRT-LM
-/// - iOS C++ Guide: https://ai.google.dev/edge/litert-lm/cpp
-/// - Models: https://huggingface.co/litert-community
+/// LiteRT-LM based LLM Engine using Objective-C++ bridge
+///
+/// Architecture:
+/// - Swift LlmEngine (this file) -> Obj-C++ LlmEngineBridge -> C++ LiteRT-LM
+///
+/// This approach is necessary because:
+/// 1. LiteRT-LM Swift APIs are "coming soon" (as of 2025)
+/// 2. Google's own apps use C++ bridge pattern (verified in litert-samples)
 ///
 public actor LiteRtLlmEngine: LlmEngine {
     public static let shared = LiteRtLlmEngine()
@@ -128,12 +88,13 @@ public actor LiteRtLlmEngine: LlmEngine {
     public private(set) var isLoaded: Bool = false
     private var currentModelPath: String?
     private var currentConversation: Conversation?
+    private var systemPrompt: String = ""
+    
+    // Objective-C++ bridge instance
+    private var bridge: LlmEngineBridge?
     
     private let maxTokens = 16384
     
-    // TODO: Add actual LiteRT-LM C++ engine reference here
-    // private var cppEngine: UnsafeMutableRawPointer?
-    
     private init() {}
     
     // MARK: - Model Loading
@@ -145,44 +106,50 @@ public actor LiteRtLlmEngine: LlmEngine {
             throw LlmEngineError.modelNotFound(path: path)
         }
         
-        // TODO: Implement actual LiteRT-LM loading
-        // 
-        // Example C++ integration (in .mm file):
-        // 
-        // #include "litert_lm/engine.h"
-        // 
-        // auto config = litert::lm::EngineConfig{
-        //     .model_path = path.UTF8String,
-        //     .max_num_tokens = maxTokens
-        // };
-        // 
-        // auto engine = litert::lm::Engine::Create(config);
-        // if (!engine.ok()) {
-        //     throw LlmEngineError.engineInitializationFailed(...)
-        // }
-        // cppEngine = engine->release();
+        // Initialize the Objective-C++ bridge
+        var error: NSError?
+        let newBridge = LlmEngineBridge(
+            modelPath: path,
+            accelerator: .cpu, // Can use .metal for GPU acceleration
+            error: &error
+        )
         
-        // Stub: Simulate loading
-        try await Task.sleep(nanoseconds: 500_000_000)
+        if let error = error {
+            throw LlmEngineError.engineInitializationFailed(underlying: error)
+        }
         
+        guard let bridge = newBridge else {
+            throw LlmEngineError.engineInitializationFailed(
+                underlying: NSError(domain: "LlmEngine", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to create bridge"])
+            )
+        }
+        
+        self.bridge = bridge
         self.isLoaded = true
         self.currentModelPath = path
+        
+        print("[LiteRtLlmEngine] Model loaded: \(path)")
     }
     
-    // MARK: - Conversation
+    // MARK: - Conversation Management
     
     public func createConversation(systemPrompt: String) throws -> Conversation {
         guard isLoaded else {
             throw LlmEngineError.modelNotLoaded
         }
         
+        self.systemPrompt = systemPrompt
+        
         let conversation = Conversation()
         conversation.messageHistory.append(("system", systemPrompt))
+        
+        // Clear any existing history in the bridge
+        bridge?.clearHistory()
+        
+        // Add system prompt to bridge
+        bridge?.add(toHistory: systemPrompt, role: "system")
+        
         self.currentConversation = conversation
-        
-        // TODO: Create actual LiteRT-LM conversation
-        // auto conv = cppEngine->CreateConversation(config);
-        
         return conversation
     }
     
@@ -198,19 +165,40 @@ public actor LiteRtLlmEngine: LlmEngine {
             throw LlmEngineError.conversationClosed
         }
         
-        guard isLoaded else {
+        guard isLoaded, let bridge = bridge else {
             throw LlmEngineError.modelNotLoaded
         }
         
-        // TODO: Implement actual generation
-        // 
-        // C++ example:
-        // auto contents = litert::lm::Contents::FromText(prompt.UTF8String);
-        // auto response = conv->SendMessage(contents);
-        // return [NSString stringWithUTF8String:response.text().c_str()];
+        // TODO: Handle multimodal inputs (images, audio)
+        // For now, focus on text-only generation
+        if audioData != nil || !(images?.isEmpty ?? true) {
+            // Multimodal not yet implemented in bridge
+            throw LlmEngineError.notImplemented
+        }
         
-        // Stub response
-        return "[STUB] LiteRT-LM Swift APIs are coming soon. Use C++ bridge for full functionality."
+        // Add user message to history
+        conversation.messageHistory.append(("user", prompt))
+        bridge.add(toHistory: prompt, role: "user")
+        
+        // Generate response
+        var error: NSError?
+        let response = bridge.generateResponse(prompt, error: &error)
+        
+        if let error = error {
+            throw LlmEngineError.generationFailed(underlying: error)
+        }
+        
+        guard let text = response else {
+            throw LlmEngineError.generationFailed(
+                underlying: NSError(domain: "LlmEngine", code: -1, userInfo: [NSLocalizedDescriptionKey: "Empty response"])
+            )
+        }
+        
+        // Add assistant response to history
+        conversation.messageHistory.append(("assistant", text))
+        bridge.add(toHistory: text, role: "assistant")
+        
+        return text
     }
     
     public func generateStream(
@@ -226,31 +214,53 @@ public actor LiteRtLlmEngine: LlmEngine {
                         throw LlmEngineError.conversationClosed
                     }
                     
-                    guard self.isLoaded else {
+                    guard self.isLoaded, let bridge = self.bridge else {
                         throw LlmEngineError.modelNotLoaded
                     }
                     
-                    // TODO: Implement streaming with LiteRT-LM C++
-                    // 
-                    // C++ example:
-                    // auto stream = conv->SendMessageAsync(contents);
-                    // for (const auto& token : stream) {
-                    //     continuation.yield(...)
-                    // }
-                    
-                    // Stub: Simulate streaming
-                    let message = "LiteRT-LM on iOS currently requires C++ integration. Swift APIs are 'coming soon' per Google. See LlmEngine.swift comments for integration options."
-                    let words = message.split(separator: " ")
-                    
-                    for word in words {
-                        continuation.yield(String(word) + " ")
-                        try await Task.sleep(nanoseconds: 50_000_000)
+                    // Handle multimodal (not implemented)
+                    if audioData != nil || !(images?.isEmpty ?? true) {
+                        throw LlmEngineError.notImplemented
                     }
                     
+                    // Add user message to history
+                    conversation.messageHistory.append(("user", prompt))
+                    bridge.add(toHistory: prompt, role: "user")
+                    
+                    // Get streaming response from bridge
+                    var error: NSError?
+                    guard let stream = bridge.generateResponseStream(prompt, error: &error) else {
+                        if let error = error {
+                            throw LlmEngineError.generationFailed(underlying: error)
+                        } else {
+                            throw LlmEngineError.generationFailed(
+                                underlying: NSError(domain: "LlmEngine", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to create stream"])
+                            )
+                        }
+                    }
+                    
+                    // Read chunks from stream
+                    var fullResponse = ""
+                    while stream.hasMore {
+                        if let chunk = stream.nextChunk() {
+                            continuation.yield(chunk)
+                            fullResponse.append(chunk)
+                        }
+                        // Small delay to prevent blocking
+                        try await Task.sleep(nanoseconds: 1_000_000) // 1ms
+                    }
+                    
+                    // Close stream
+                    stream.close()
+                    
+                    // Add complete response to history
+                    conversation.messageHistory.append(("assistant", fullResponse))
+                    bridge.add(toHistory: fullResponse, role: "assistant")
+                    
                     continuation.finish()
                     
                 } catch {
-                    continuation.finish(throwing: LlmEngineError.generationFailed(underlying: error))
+                    continuation.finish(throwing: error)
                 }
             }
         }
@@ -259,72 +269,15 @@ public actor LiteRtLlmEngine: LlmEngine {
     // MARK: - Utility
     
     public func unload() {
-        // TODO: Clean up C++ engine
-        // if (cppEngine) {
-        //     delete static_cast<litert::lm::Engine*>(cppEngine);
-        //     cppEngine = nullptr;
-        // }
-        
+        bridge?.close()
+        bridge = nil
         isLoaded = false
         currentModelPath = nil
         currentConversation = nil
-    }
-}
-
-// MARK: - TensorFlowLiteSwift Alternative (Basic)
-
-/// Alternative using standard TensorFlowLiteSwift
-/// Limited functionality - no KV cache, conversation management, or tool use
-/// 
-/// Use this if you need basic inference only:
-/// ```ruby
-/// pod 'TensorFlowLiteSwift', '~> 2.16.0'
-/// ```
-public actor TFLiteEngine: LlmEngine {
-    public static let shared = TFLiteEngine()
-    
-    public private(set) var isLoaded: Bool = false
-    
-    // TODO: Add TFLInterpreter
-    // private var interpreter: Interpreter?
-    
-    public init() {}
-    
-    public func loadModel(path: String) async throws {
-        // TODO: Initialize TFLInterpreter
-        // interpreter = try Interpreter(modelPath: path)
-        // try interpreter?.allocateTensors()
-        isLoaded = true
-    }
-    
-    public func createConversation(systemPrompt: String) throws -> Conversation {
-        Conversation()
-    }
-    
-    public func generate(
-        conversation: Conversation,
-        prompt: String,
-        audioData: Data?,
-        images: [UIImage]?
-    ) async throws -> String {
-        // TODO: Basic TFLite inference
-        // This won't work well for LLMs without proper tokenization
-        throw LlmEngineError.notImplemented
-    }
-    
-    public func generateStream(
-        conversation: Conversation,
-        prompt: String,
-        audioData: Data?,
-        images: [UIImage]?
-    ) -> AsyncThrowingStream<String, Error> {
-        AsyncThrowingStream { continuation in
-            continuation.finish(throwing: LlmEngineError.notImplemented)
-        }
-    }
-    
-    public func unload() {
-        // interpreter = nil
-        isLoaded = false
+        print("[LiteRtLlmEngine] Unloaded")
+    }
+    
+    public func estimateTokens(text: String) -> Int {
+        return bridge?.estimateTokens(text) ?? (text.count / 4)
     }
 }