Add server agent modes: full_remote and delegation

New Features: - RemoteAgent: Full server-as-agent mode (bypasses local model) - DelegationAgent: Local model decides when to ask server for help - Server mode selector in Settings: Local Only / Full Remote / Smart - Updated MainViewModel to support all three modes for text, audio, and images - SettingsUiState and SettingsViewModel updated with serverAgentMode Modes: - local_only: Use only local Gemma 4 model (default) - full_remote: All queries go to delegate server (OpenAI-compatible API) - delegation: Local model classifies queries, delegates complex ones to server
2026-04-06 18:52:17 +02:00
parent 47df14c952
commit 8ab2e661ee
8 changed files with 5211 additions and 84 deletions
@@ -95,7 +95,8 @@ class AppModule(private val context: Context) {
            agent = agent,
            llmEngine = llmEngine,
            userSettings = userSettings,
-            webSearchTool = webSearchTool
+            webSearchTool = webSearchTool,
            httpClient = ktorClient
        )
    }
@@ -0,0 +1,346 @@
 package com.sleepy.agent.inference
 import android.util.Log
 import io.ktor.client.HttpClient
 import io.ktor.client.call.body
 import io.ktor.client.plugins.timeout
 import io.ktor.client.request.post
 import io.ktor.client.request.setBody
 import io.ktor.http.ContentType
 import io.ktor.http.contentType
 import kotlinx.coroutines.flow.Flow
 import kotlinx.coroutines.flow.channelFlow
 import kotlinx.coroutines.flow.collect
 import kotlinx.coroutines.flow.flow
 import kotlinx.coroutines.flow.onEach
 import kotlinx.serialization.encodeToString
 import kotlinx.serialization.json.Json
 import kotlinx.serialization.json.JsonObject
 import kotlinx.serialization.json.buildJsonObject
 import kotlinx.serialization.json.put
 import kotlinx.serialization.json.putJsonArray
 import kotlinx.serialization.json.putJsonObject
 /**
 * Delegation Agent - Local model decides when to ask the big model for help.
 * 
 * Flow:
 * 1. User input → Local model (small, fast)
 * 2. Local model classifies if it needs help:
 *    - Simple question (factual, from training data) → Answer directly
 * *    - Complex question (reasoning, current events) → Delegate to big model
 *    - Uncertain → Ask big model for help
 * 3. If delegation needed:
 *    - Local model formulates a clear request
 *    - Request sent to big model (server)
 *    - Big model responds
 *    - Local model incorporates that into final answer
 * 4. Final answer to user
 */
 class DelegationAgent(
    private val localEngine: LlmEngine,
    private val httpClient: HttpClient,
    private val delegateServerUrl: String
 ) {
    companion object {
        private const val TAG = "DelegationAgent"
        private val json = Json { ignoreUnknownKeys = true }
        // Classification thresholds
        private const val CONFIDENCE_THRESHOLD = 0.7f
    }
    private var localConversation: Conversation? = null
    private val delegationPrompt = """
        You are a helpful assistant. Before answering, assess your confidence:
        Can you answer this confidently based on your training? Reply with EXACTLY one of:
        [DIRECT] - You know this well and can answer directly
        [DELEGATE: question for big model] - You need help, provide a clear question for a smarter model
        [CLARIFY] - You need more information from the user
        Your assessment:
    """.trimIndent()
    private val synthesisPrompt = """
        You received help from a more knowledgeable model. Synthesize this into a helpful,
        natural response for the user. Don't mention that you asked for help - just provide
        the answer conversationally.
        User's original question: {user_question}
        Helpful information received: {delegated_response}
        Your response:
    """.trimIndent()
    /**
     * Process user input with delegation to big model when needed.
     */
    suspend fun processWithDelegation(
        userInput: String,
        conversationHistory: List<Pair<String, String>> = emptyList(),  // (role, content) pairs
        onStatusUpdate: ((String) -> Unit)? = null
    ): Flow<DelegationEvent> = channelFlow {
        try {
            // Step 1: Local model assesses confidence
            onStatusUpdate?.invoke("Thinking...")
            send(DelegationEvent.Status("Analyzing question..."))
            val classification = classifyQuestion(userInput, conversationHistory)
            when {
                classification.startsWith("[DIRECT]") -> {
                    // Step 2a: Answer directly with local model
                    Log.d(TAG, "Answering directly")
                    onStatusUpdate?.invoke("Answering...")
                    send(DelegationEvent.Status("Answering directly..."))
                    answerDirectly(userInput, conversationHistory).collect { event ->
                        when (event) {
                            is AgentEvent.Token -> send(DelegationEvent.Token(event.text))
                            is AgentEvent.Complete -> send(DelegationEvent.Complete(event.response))
                            is AgentEvent.Error -> send(DelegationEvent.Error(event.message))
                            else -> {}
                        }
                    }
                }
                classification.startsWith("[DELEGATE:") -> {
                    // Step 2b: Delegate to big model
                    val extractedQuestion = extractDelegateQuestion(classification)
                    Log.d(TAG, "Delegating to big model: $extractedQuestion")
                    onStatusUpdate?.invoke("Consulting expert model...")
                    send(DelegationEvent.Status("Consulting expert model..."))
                    val delegatedResponse = queryBigModel(extractedQuestion, conversationHistory)
                    // Step 3: Synthesize with local model
                    onStatusUpdate?.invoke("Synthesizing answer...")
                    send(DelegationEvent.Status("Synthesizing answer..."))
                    synthesizeResponse(userInput, delegatedResponse, conversationHistory).collect { event ->
                        when (event) {
                            is AgentEvent.Token -> send(DelegationEvent.Token(event.text))
                            is AgentEvent.Complete -> send(DelegationEvent.Complete(event.response))
                            is AgentEvent.Error -> send(DelegationEvent.Error(event.message))
                            else -> {}
                        }
                    }
                }
                classification.startsWith("[CLARIFY]") -> {
                    // Step 2c: Ask user for clarification
                    Log.d(TAG, "Asking for clarification")
                    val clarificationRequest = classification.removePrefix("[CLARIFY]").trim()
                        .ifEmpty { "I need more information to help you. Could you provide more details about what you're looking for?" }
                    send(DelegationEvent.Token(clarificationRequest))
                    send(DelegationEvent.Complete(clarificationRequest))
                }
                else -> {
                    // Fallback: try direct answer
                    Log.w(TAG, "Unknown classification: $classification, falling back to direct")
                    answerDirectly(userInput, conversationHistory).collect { event ->
                        when (event) {
                            is AgentEvent.Token -> send(DelegationEvent.Token(event.text))
                            is AgentEvent.Complete -> send(DelegationEvent.Complete(event.response))
                            is AgentEvent.Error -> send(DelegationEvent.Error(event.message))
                            else -> {}
                        }
                    }
                }
            }
        } catch (e: Exception) {
            Log.e(TAG, "Error in delegation flow", e)
            send(DelegationEvent.Error("Error: ${e.message}"))
        }
    }
    /**
     * Quick classification without full generation.
     */
    private suspend fun classifyQuestion(
        userInput: String,
        history: List<Pair<String, String>>
    ): String {
        // Ensure conversation exists
        if (localConversation?.isAlive != true) {
            localConversation = localEngine.createConversation(delegationPrompt)
        }
        val prompt = buildString {
            history.takeLast(3).forEach { (role, content) ->
                appendLine("$role: $content")
            }
            appendLine("User: $userInput")
            appendLine()
            append("Assessment: ")
        }
        return try {
            localEngine.generate(
                conversation = localConversation!!,
                prompt = prompt,
                audioData = null,
                images = null
            ).trim()
        } catch (e: Exception) {
            Log.e(TAG, "Classification failed", e)
            "[DIRECT]" // Fallback to direct answer
        }
    }
    /**
     * Answer directly using local model.
     */
    private fun answerDirectly(
        userInput: String,
        history: List<Pair<String, String>>
    ): Flow<AgentEvent> = flow {
        if (localConversation?.isAlive != true) {
            localConversation = localEngine.createConversation()
        }
        val prompt = buildString {
            history.takeLast(5).forEach { (role, content) ->
                appendLine("$role: $content")
            }
            appendLine("User: $userInput")
            appendLine()
            append("Assistant: ")
        }
        val response = localEngine.generate(
            conversation = localConversation!!,
            prompt = prompt,
            audioData = null,
            images = null
        )
        emit(AgentEvent.Token(response))
        emit(AgentEvent.Complete(response))
    }
    /**
     * Query the big model on the server.
     */
    private suspend fun queryBigModel(
        question: String,
        history: List<Pair<String, String>>
    ): String {
        return try {
            val requestBody = buildJsonObject {
                putJsonArray("messages") {
                    history.forEach { (role, content) ->
                        addJsonObject {
                            put("role", if (role == "User") "user" else "assistant")
                            put("content", content)
                        }
                    }
                    addJsonObject {
                        put("role", "user")
                        put("content", question)
                    }
                }
                put("stream", false)
                put("temperature", 0.7)
            }
            val response: String = httpClient.post("$delegateServerUrl/v1/chat/completions") {
                contentType(ContentType.Application.Json)
                setBody(requestBody)
                timeout {
                    requestTimeoutMillis = 120_000
                    connectTimeoutMillis = 30_000
                }
            }.body()
            parseServerResponse(response)
        } catch (e: Exception) {
            Log.e(TAG, "Failed to query big model", e)
            "I apologize, but I couldn't reach the expert model at this time. Let me try to help with what I know: [local model will attempt answer]"
        }
    }
    /**
     * Synthesize the delegated response into a natural answer.
     */
    private fun synthesizeResponse(
        userQuestion: String,
        delegatedResponse: String,
        history: List<Pair<String, String>>
    ): Flow<AgentEvent> = flow {
        if (localConversation?.isAlive != true) {
            localConversation = localEngine.createConversation()
        }
        val prompt = synthesisPrompt
            .replace("{user_question}", userQuestion)
            .replace("{delegated_response}", delegatedResponse)
        val response = localEngine.generate(
            conversation = localConversation!!,
            prompt = prompt,
            audioData = null,
            images = null
        )
        emit(AgentEvent.Token(response))
        emit(AgentEvent.Complete(response))
    }
    private fun extractDelegateQuestion(classification: String): String {
        // Extract question from [DELEGATE: question here]
        val start = classification.indexOf("[DELEGATE:")
        if (start == -1) return classification
        val end = classification.indexOf("]", start + 10)
        if (end == -1) return classification
        return classification.substring(start + 10, end).trim()
    }
    private fun parseServerResponse(response: String): String {
        return try {
            val completion = json.decodeFromString<OpenAICompletion>(response)
            completion.choices.firstOrNull()?.message?.content 
                ?: "I received information but couldn't parse it properly."
        } catch (e: Exception) {
            Log.w(TAG, "Failed to parse server response as JSON, returning raw")
            response
        }
    }
    fun reset() {
        localConversation?.close()
        localConversation = null
    }
    @kotlinx.serialization.Serializable
    data class OpenAICompletion(
        val choices: List<CompletionChoice>
    ) {
        @kotlinx.serialization.Serializable
        data class CompletionChoice(
            val message: Message
        ) {
            @kotlinx.serialization.Serializable
            data class Message(
                val content: String
            )
        }
    }
 }
 sealed class DelegationEvent {
    data class Token(val text: String) : DelegationEvent()
    data class Status(val message: String) : DelegationEvent()
    data class Complete(val response: String) : DelegationEvent()
    data class Error(val message: String) : DelegationEvent()
 }
@@ -0,0 +1,299 @@
 package com.sleepy.agent.inference
 import android.util.Log
 import io.ktor.client.HttpClient
 import io.ktor.client.call.body
 import io.ktor.client.plugins.timeout
 import io.ktor.client.request.post
 import io.ktor.client.request.setBody
 import io.ktor.http.ContentType
 import io.ktor.http.contentType
 import kotlinx.coroutines.flow.Flow
 import kotlinx.coroutines.flow.channelFlow
 import kotlinx.coroutines.flow.flow
 import kotlinx.serialization.Serializable
 import kotlinx.serialization.json.Json
 import kotlinx.serialization.json.JsonObject
 import kotlinx.serialization.json.buildJsonObject
 import kotlinx.serialization.json.put
 import kotlinx.serialization.json.putJsonArray
 import kotlinx.serialization.json.putJsonObject
 /**
 * Remote agent that delegates all LLM calls to a server.
 * This bypasses the local model entirely - useful for:
 * 1. Using powerful server-side models when local model is insufficient
 * 2. Testing the app UI without loading a local model
 * 3. Fallback when local model fails
 */
 class RemoteAgent(
    private val httpClient: HttpClient,
    private val baseUrl: String
 ) {
    companion object {
        private const val TAG = "RemoteAgent"
        private val json = Json { ignoreUnknownKeys = true }
        // Supported API formats
        enum class ApiFormat {
            OPENAI_COMPATIBLE,  // /v1/chat/completions
            OLLAMA,             // /api/generate or /api/chat
            CUSTOM              // Custom endpoint
        }
    }
    private val messageHistory = mutableListOf<RemoteMessage>()
    private var apiFormat = ApiFormat.OPENAI_COMPATIBLE
    data class RemoteMessage(
        val role: String,  // "system", "user", "assistant"
        val content: String
    )
    /**
     * Send a message to the remote server and get streaming response.
     */
    suspend fun sendMessage(
        message: String,
        systemPrompt: String? = null,
        stream: Boolean = true
    ): Flow<RemoteAgentEvent> = channelFlow {
        try {
            // Add user message to history
            messageHistory.add(RemoteMessage("user", message))
            // Build request based on detected API format
            val requestBody = when (apiFormat) {
                ApiFormat.OPENAI_COMPATIBLE -> buildOpenAIRequest(systemPrompt, stream)
                ApiFormat.OLLAMA -> buildOllamaRequest(systemPrompt, stream)
                ApiFormat.CUSTOM -> buildCustomRequest(systemPrompt)
            }
            Log.d(TAG, "Sending request to $baseUrl (format: $apiFormat)")
            val endpoint = when (apiFormat) {
                ApiFormat.OPENAI_COMPATIBLE -> "$baseUrl/v1/chat/completions"
                ApiFormat.OLLAMA -> "$baseUrl/api/chat"
                ApiFormat.CUSTOM -> baseUrl
            }
            val response: String = httpClient.post(endpoint) {
                contentType(ContentType.Application.Json)
                setBody(requestBody)
                timeout {
                    requestTimeoutMillis = 120_000  // 2 minutes for generation
                    connectTimeoutMillis = 30_000
                }
            }.body()
            if (stream) {
                // Handle streaming response (SSE format)
                handleStreamingResponse(response)
            } else {
                // Handle non-streaming response
                handleNonStreamingResponse(response)
            }
        } catch (e: Exception) {
            Log.e(TAG, "Error calling remote server", e)
            send(RemoteAgentEvent.Error("Server error: ${e.message}"))
        }
    }
    /**
     * Quick check if server is available.
     */
    suspend fun checkServer(): Boolean {
        return try {
            // Try to detect API format by probing endpoints
            val openaiResponse = httpClient.post("$baseUrl/v1/models") {
                timeout { requestTimeoutMillis = 5000 }
            }
            if (openaiResponse.status.value == 200) {
                apiFormat = ApiFormat.OPENAI_COMPATIBLE
                Log.d(TAG, "Detected OpenAI-compatible API")
                return true
            }
            val ollamaResponse = httpClient.post("$baseUrl/api/tags") {
                timeout { requestTimeoutMillis = 5000 }
            }
            if (ollamaResponse.status.value == 200) {
                apiFormat = ApiFormat.OLLAMA
                Log.d(TAG, "Detected Ollama API")
                return true
            }
            // Assume custom if base URL responds
            apiFormat = ApiFormat.CUSTOM
            true
        } catch (e: Exception) {
            Log.e(TAG, "Server check failed", e)
            false
        }
    }
    /**
     * Clear conversation history.
     */
    fun clearHistory() {
        messageHistory.clear()
    }
    private fun buildOpenAIRequest(systemPrompt: String?, stream: Boolean): JsonObject {
        return buildJsonObject {
            put("model", "local-model")  // Server usually ignores this for single-model setups
            putJsonArray("messages") {
                // System message
                systemPrompt?.let {
                    addJsonObject {
                        put("role", "system")
                        put("content", it)
                    }
                }
                // Conversation history
                messageHistory.forEach { msg ->
                    addJsonObject {
                        put("role", msg.role)
                        put("content", msg.content)
                    }
                }
            }
            put("stream", stream)
            put("temperature", 0.7)
            put("max_tokens", 4096)
        }
    }
    private fun buildOllamaRequest(systemPrompt: String?, stream: Boolean): JsonObject {
        return buildJsonObject {
            put("model", "local-model")
            putJsonArray("messages") {
                systemPrompt?.let {
                    addJsonObject {
                        put("role", "system")
                        put("content", it)
                    }
                }
                messageHistory.forEach { msg ->
                    addJsonObject {
                        put("role", msg.role)
                        put("content", msg.content)
                    }
                }
            }
            put("stream", stream)
        }
    }
    private fun buildCustomRequest(systemPrompt: String?): JsonObject {
        return buildJsonObject {
            put("prompt", buildPromptWithHistory(systemPrompt))
        }
    }
    private fun buildPromptWithHistory(systemPrompt: String?): String {
        return buildString {
            systemPrompt?.let { appendLine(it).appendLine() }
            messageHistory.forEach { msg ->
                when (msg.role) {
                    "user" -> appendLine("User: ${msg.content}")
                    "assistant" -> appendLine("Assistant: ${msg.content}")
                }
            }
            appendLine("Assistant:")
        }
    }
    private suspend fun kotlinx.coroutines.channels.SendChannel<RemoteAgentEvent>.handleStreamingResponse(response: String) {
        // Parse SSE format (Server-Sent Events)
        val lines = response.lines()
        val responseBuilder = StringBuilder()
        for (line in lines) {
            when {
                line.startsWith("data: ") -> {
                    val data = line.substring(6)
                    if (data == "[DONE]") {
                        // Stream complete
                        val fullResponse = responseBuilder.toString()
                        messageHistory.add(RemoteMessage("assistant", fullResponse))
                        send(RemoteAgentEvent.Complete(fullResponse))
                        return
                    }
                    try {
                        val chunk = json.decodeFromString<OpenAIChunk>(data)
                        val content = chunk.choices.firstOrNull()?.delta?.content ?: ""
                        if (content.isNotEmpty()) {
                            responseBuilder.append(content)
                            send(RemoteAgentEvent.Token(content))
                        }
                    } catch (e: Exception) {
                        Log.w(TAG, "Failed to parse chunk: $data")
                    }
                }
            }
        }
        // If we get here without [DONE], return what we have
        val fullResponse = responseBuilder.toString()
        if (fullResponse.isNotEmpty()) {
            messageHistory.add(RemoteMessage("assistant", fullResponse))
            send(RemoteAgentEvent.Complete(fullResponse))
        }
    }
    private suspend fun kotlinx.coroutines.channels.SendChannel<RemoteAgentEvent>.handleNonStreamingResponse(response: String) {
        try {
            val completion = json.decodeFromString<OpenAICompletion>(response)
            val content = completion.choices.firstOrNull()?.message?.content ?: ""
            messageHistory.add(RemoteMessage("assistant", content))
            // Emit as single token for consistency
            send(RemoteAgentEvent.Token(content))
            send(RemoteAgentEvent.Complete(content))
        } catch (e: Exception) {
            Log.e(TAG, "Failed to parse response", e)
            send(RemoteAgentEvent.Error("Failed to parse server response"))
        }
    }
    // Data classes for OpenAI-compatible API
    @Serializable
    data class OpenAIChunk(
        val choices: List<Choice>
    ) {
        @Serializable
        data class Choice(
            val delta: Delta
        ) {
            @Serializable
            data class Delta(
                val content: String? = null
            )
        }
    }
    @Serializable
    data class OpenAICompletion(
        val choices: List<CompletionChoice>
    ) {
        @Serializable
        data class CompletionChoice(
            val message: Message
        ) {
            @Serializable
            data class Message(
                val content: String
            )
        }
    }
 }
 sealed class RemoteAgentEvent {
    data class Token(val text: String) : RemoteAgentEvent()
    data class Complete(val response: String) : RemoteAgentEvent()
    data class Error(val message: String) : RemoteAgentEvent()
 }
@@ -23,6 +23,9 @@ class UserSettings(
        val MODEL_SOURCE = stringPreferencesKey("model_source")
        val SELECTED_SERVER_MODEL = stringPreferencesKey("selected_server_model")
        // Server agent mode: "local_only", "full_remote", "delegation"
        val SERVER_AGENT_MODE = stringPreferencesKey("server_agent_mode")
        // TTS settings
        val TTS_ENABLED = booleanPreferencesKey("tts_enabled")
        val TTS_AUTO_MODE = booleanPreferencesKey("tts_auto_mode")
@@ -55,6 +58,11 @@ class UserSettings(
        prefs[ENABLE_SERVER_DELEGATION] ?: false
    }
    // Server agent mode: "local_only" (default), "full_remote", "delegation"
    val serverAgentMode: Flow<String> = dataStore.data.map { prefs ->
        prefs[SERVER_AGENT_MODE] ?: "local_only"
    }
    val modelSource: Flow<ModelSource> = dataStore.data.map { prefs ->
        prefs[MODEL_SOURCE]?.let { ModelSource.valueOf(it) } ?: ModelSource.FILE_PATH
    }
@@ -106,6 +114,12 @@ class UserSettings(
        }
    }
    suspend fun setServerAgentMode(mode: String) {
        dataStore.edit { prefs ->
            prefs[SERVER_AGENT_MODE] = mode
        }
    }
    suspend fun setModelSource(source: ModelSource) {
        dataStore.edit { prefs ->
            prefs[MODEL_SOURCE] = source.name
@@ -12,7 +12,11 @@ import com.sleepy.agent.data.ConversationStorage
 import com.sleepy.agent.download.ModelDownloadManager
 import com.sleepy.agent.inference.Agent
 import com.sleepy.agent.inference.AgentEvent
 import com.sleepy.agent.inference.DelegationAgent
 import com.sleepy.agent.inference.DelegationEvent
 import com.sleepy.agent.inference.LlmEngine
 import com.sleepy.agent.inference.RemoteAgent
 import com.sleepy.agent.inference.RemoteAgentEvent
 import com.sleepy.agent.settings.UserSettings
 import com.sleepy.agent.tools.WebSearchTool
 import kotlinx.coroutines.flow.MutableStateFlow
@@ -50,7 +54,8 @@ class MainViewModel(
    private val agent: Agent,
    private val llmEngine: LlmEngine,
    private val userSettings: UserSettings,
-    private val webSearchTool: WebSearchTool
+    private val webSearchTool: WebSearchTool,
    private val httpClient: io.ktor.client.HttpClient
 ) : ViewModel() {
    private val conversationStorage = ConversationStorage(context)
@@ -75,6 +80,10 @@ class MainViewModel(
    // Track if user started with voice or text for TTS auto mode
    private var firstInputWasVoice: Boolean? = null
    // Remote agents for server modes
    private var remoteAgent: RemoteAgent? = null
    private var delegationAgent: DelegationAgent? = null
    companion object {
        private const val TAG = "MainViewModel"
        private const val KEY_MESSAGES = "messages"
@@ -113,6 +122,17 @@ class MainViewModel(
                Log.d(TAG, "Updated web search URL to: $url")
            }
        }
        // Initialize remote agents when delegate server URL changes
        viewModelScope.launch {
            userSettings.delegateServerUrl.collect { url ->
                if (url.isNotEmpty()) {
                    remoteAgent = RemoteAgent(httpClient, url)
                    delegationAgent = DelegationAgent(llmEngine, httpClient, url)
                    Log.d(TAG, "Initialized remote agents with server: $url")
                }
            }
        }
    }
    private fun restoreState() {
@@ -250,12 +270,11 @@ class MainViewModel(
            _uiState.value = UIState.PROCESSING
-            val useServer = userSettings.enableServerDelegation.first()
+            val mode = userSettings.serverAgentMode.first()
-            if (useServer) {
+            when (mode) {
-                processAudioWithServer(audioData)
+                "full_remote", "delegation" -> processAudioWithServer(audioData)
-            } else {
+                else -> processAudioWithLocalModel(audioData)
                processAudioWithLocalModel(audioData)
            }
        }
    }
@@ -350,14 +369,68 @@ class MainViewModel(
        _messages.value = _messages.value + userMessage
        saveState()
-        val aiMessage = ConversationMessage(
+        _uiState.value = UIState.PROCESSING
-            text = "Server mode doesn't support native audio understanding yet. Please use local model for voice input.",
+        
-            isUser = false
+        // For server mode with audio, we need to either:
-        )
+        // 1. Use local model to transcribe, then send text to server
-        _messages.value = _messages.value + aiMessage
+        // 2. Send audio to server if it supports it
-        saveState()
+        // For now, transcribe locally first
-        _uiState.value = UIState.IDLE
+        
        if (!llmEngine.isLoaded()) {
            val modelPath = userSettings.modelPath.first()
            if (modelPath.isNotEmpty()) {
                _responseText.value = "Loading model for transcription..."
                val result = llmEngine.loadModel(modelPath)
                result.onFailure { e ->
                    _uiState.value = UIState.ERROR
                    _responseText.value = "Failed to load model: ${e.message}"
                    return@processAudioWithServer
                }
            } else {
                _uiState.value = UIState.ERROR
                _responseText.value = "No model loaded for transcription. Please load a model first."
                return
            }
        }
        try {
            // First, transcribe the audio locally
            val transcription = llmEngine.generate(
                conversation = ensureConversation(),
                prompt = "Transcribe this audio:",
                audioData = audioData,
                images = null
            )
            Log.d(TAG, "Transcribed: $transcription")
            // Update the user message with transcription
            val updatedMessages = _messages.value.toMutableList()
            updatedMessages[updatedMessages.size - 1] = userMessage.copy(
                text = "🎤 \"$transcription\""
            )
            _messages.value = updatedMessages
            saveState()
            // Now process the transcribed text with the server
            val mode = userSettings.serverAgentMode.first()
            when (mode) {
                "full_remote" -> processTextWithRemoteAgent(transcription)
                "delegation" -> processTextWithDelegation(transcription)
                else -> processTextWithLocalModel(transcription)
            }
        } catch (e: Exception) {
            Log.e(TAG, "Error processing audio with server", e)
            _uiState.value = UIState.ERROR
            _responseText.value = "Error: ${e.message}"
        }
    }
    private fun ensureConversation() = conversation?.takeIf { it.isAlive } 
        ?: llmEngine.createConversation().also { conversation = it }
    private var conversation: com.sleepy.agent.inference.Conversation? = null
    fun sendTextMessage(text: String) {
        viewModelScope.launch {
@@ -372,13 +445,13 @@ class MainViewModel(
                Log.d(TAG, "First input was text - TTS auto-disabled")
            }
-            val useServer = userSettings.enableServerDelegation.first()
+            val mode = userSettings.serverAgentMode.first()
-            Log.d(TAG, "useServer: $useServer")
+            Log.d(TAG, "Server agent mode: $mode")
-            if (useServer) {
+            when (mode) {
-                processTextWithServer(text)
+                "full_remote" -> processTextWithRemoteAgent(text)
-            } else {
+                "delegation" -> processTextWithDelegation(text)
-                processTextWithLocalModel(text)
+                else -> processTextWithLocalModel(text)
            }
        }
    }
@@ -493,7 +566,10 @@ class MainViewModel(
        }
    }
-    private suspend fun processTextWithServer(text: String) {
+    /**
     * Full remote mode - bypass local model entirely, use server as the agent.
     */
    private suspend fun processTextWithRemoteAgent(text: String) {
        val userMessage = ConversationMessage(
            text = text,
            isUser = true
@@ -503,14 +579,117 @@ class MainViewModel(
        _uiState.value = UIState.PROCESSING
        val remote = remoteAgent
        if (remote == null) {
            _uiState.value = UIState.ERROR
            _responseText.value = "No server configured. Please set a delegate server URL in Settings."
            return
        }
        try {
            val responseBuilder = StringBuilder()
            remote.sendMessage(
                message = text,
                systemPrompt = "You are a helpful AI assistant."
            ).collect { event ->
                when (event) {
                    is RemoteAgentEvent.Token -> {
                        responseBuilder.append(event.text)
                        _responseText.value = responseBuilder.toString()
                        _uiState.value = UIState.SPEAKING
                    }
                    is RemoteAgentEvent.Complete -> {
                        val aiMessage = ConversationMessage(
-            text = "Server mode not yet implemented. Please use local model.",
+                            text = event.response,
                            isUser = false
                        )
                        _messages.value = _messages.value + aiMessage
                        saveState()
                        speakResponse(event.response)
                        _uiState.value = UIState.IDLE
                    }
                    is RemoteAgentEvent.Error -> {
                        _responseText.value = "Error: ${event.message}"
                        _uiState.value = UIState.ERROR
                    }
                }
            }
        } catch (e: Exception) {
            Log.e(TAG, "Error in remote agent", e)
            _uiState.value = UIState.ERROR
            _responseText.value = "Server error: ${e.message}"
        }
    }
    /**
     * Delegation mode - local model decides when to ask big model for help.
     */
    private suspend fun processTextWithDelegation(text: String) {
        val userMessage = ConversationMessage(
            text = text,
            isUser = true
        )
        _messages.value = _messages.value + userMessage
        saveState()
        _uiState.value = UIState.PROCESSING
        val delegation = delegationAgent
        if (delegation == null) {
            _uiState.value = UIState.ERROR
            _responseText.value = "Delegation not available. Please set a delegate server URL in Settings."
            return
        }
        try {
            val responseBuilder = StringBuilder()
            delegation.processWithDelegation(
                userInput = text,
                conversationHistory = _messages.value.map { it.isUser to it.text }
            ).collect { event ->
                when (event) {
                    is DelegationEvent.Token -> {
                        responseBuilder.append(event.text)
                        _responseText.value = responseBuilder.toString()
                        _uiState.value = UIState.SPEAKING
                    }
                    is DelegationEvent.Status -> {
                        _responseText.value = event.message
                    }
                    is DelegationEvent.Complete -> {
                        val aiMessage = ConversationMessage(
                            text = event.response,
                            isUser = false
                        )
                        _messages.value = _messages.value + aiMessage
                        saveState()
                        speakResponse(event.response)
                        _uiState.value = UIState.IDLE
                    }
                    is DelegationEvent.Error -> {
                        _responseText.value = "Error: ${event.message}"
                        _uiState.value = UIState.ERROR
                    }
                }
            }
        } catch (e: Exception) {
            Log.e(TAG, "Error in delegation agent", e)
            _uiState.value = UIState.ERROR
            _responseText.value = "Error: ${e.message}"
        }
    }
    /**
     * Legacy server delegation - now redirects to appropriate mode.
     */
    private suspend fun processTextWithServer(text: String) {
        // Use full remote mode by default for legacy "use server" setting
        processTextWithRemoteAgent(text)
    }
    fun setResponse(text: String) {
        _responseText.value = text
@@ -564,6 +743,20 @@ class MainViewModel(
            firstInputWasVoice = false // Image is not voice input
            _uiState.value = UIState.PROCESSING
            // Check server mode
            val mode = userSettings.serverAgentMode.first()
            // For server modes, we need a local model to process the image first
            // Then send the description/results to the server
            when (mode) {
                "full_remote" -> processImageWithRemoteAgent(bitmap, text)
                "delegation" -> processImageWithDelegation(bitmap, text)
                else -> processImageWithLocalModel(bitmap, text)
            }
        }
    }
    private suspend fun processImageWithLocalModel(bitmap: android.graphics.Bitmap, text: String) {
        try {
            if (!llmEngine.isLoaded()) {
                val modelPath = userSettings.modelPath.first()
@@ -573,13 +766,187 @@ class MainViewModel(
                    result.onFailure { e ->
                        _uiState.value = UIState.ERROR
                        _responseText.value = "Failed to load model: ${e.message}"
-                            return@launch
+                        return@processImageWithLocalModel
                    }
                    agent.prewarmCache()
                } else {
                    _uiState.value = UIState.ERROR
                    _responseText.value = "No model loaded. Please go to Settings and load a model."
-                        return@launch
+                    return
                }
            }
            val responseBuilder = StringBuilder()
            Log.d(TAG, "Processing image with local model...")
            agent.processInput(
                input = text,
                images = listOf(bitmap)
            ).collect { event ->
                when (event) {
                    is AgentEvent.Token -> {
                        responseBuilder.append(event.text)
                        _responseText.value = responseBuilder.toString()
                        _uiState.value = UIState.SPEAKING
                    }
                    is AgentEvent.ExecutingTool -> {
                        _uiState.value = UIState.EXECUTING_TOOL
                        _responseText.value = "🔧 Using ${event.toolName}..."
                    }
                    is AgentEvent.ToolResult -> {
                        // Tool completed
                    }
                    is AgentEvent.Complete -> {
                        val aiMessage = ConversationMessage(
                            text = event.response,
                            isUser = false
                        )
                        _messages.value = _messages.value + aiMessage
                        saveState()
                        speakResponse(event.response)
                        _uiState.value = UIState.IDLE
                    }
                    is AgentEvent.Error -> {
                        _responseText.value = "Error: ${event.message}"
                        _uiState.value = UIState.ERROR
                    }
                    else -> {}
                }
            }
        } catch (e: Exception) {
            Log.e(TAG, "Error processing image", e)
            _uiState.value = UIState.ERROR
            _responseText.value = "Error processing image: ${e.message}"
            val errorMessage = ConversationMessage(
                text = "❌ Failed to process image: ${e.message}",
                isUser = false
            )
            _messages.value = _messages.value + errorMessage
            saveState()
        }
    }
    private suspend fun processImageWithRemoteAgent(bitmap: android.graphics.Bitmap, text: String) {
        try {
            // First, get image description from local model
            if (!llmEngine.isLoaded()) {
                val modelPath = userSettings.modelPath.first()
                if (modelPath.isNotEmpty()) {
                    _responseText.value = "Analyzing image..."
                    val result = llmEngine.loadModel(modelPath)
                    result.onFailure { e ->
                        _uiState.value = UIState.ERROR
                        _responseText.value = "Failed to load model: ${e.message}"
                        return@processImageWithRemoteAgent
                    }
                } else {
                    _uiState.value = UIState.ERROR
                    _responseText.value = "No model loaded for image analysis. Please load a model first."
                    return
                }
            }
            // Get image description from local model
            val description = llmEngine.generate(
                conversation = ensureConversation(),
                prompt = "Describe this image in detail:",
                audioData = null,
                images = listOf(bitmap)
            )
            Log.d(TAG, "Image description: $description")
            // Now send description + user text to remote agent
            val fullPrompt = if (text.isNotBlank()) {
                "User question about image: $text\n\nImage description: $description"
            } else {
                "Describe this image: $description"
            }
            processTextWithRemoteAgent(fullPrompt)
        } catch (e: Exception) {
            Log.e(TAG, "Error processing image with remote agent", e)
            _uiState.value = UIState.ERROR
            _responseText.value = "Error: ${e.message}"
        }
    }
    private suspend fun processImageWithDelegation(bitmap: android.graphics.Bitmap, text: String) {
        // For delegation mode, process image locally first
        // The delegation agent will decide if server help is needed based on the description
        try {
            if (!llmEngine.isLoaded()) {
                val modelPath = userSettings.modelPath.first()
                if (modelPath.isNotEmpty()) {
                    _responseText.value = "Analyzing image..."
                    val result = llmEngine.loadModel(modelPath)
                    result.onFailure { e ->
                        _uiState.value = UIState.ERROR
                        _responseText.value = "Failed to load model: ${e.message}"
                        return@processImageWithDelegation
                    }
                } else {
                    _uiState.value = UIState.ERROR
                    _responseText.value = "No model loaded for image analysis. Please load a model first."
                    return
                }
            }
            // Get initial analysis from local model
            val description = llmEngine.generate(
                conversation = ensureConversation(),
                prompt = if (text.isNotBlank()) "Analyze this image and answer: $text" else "Describe this image:",
                audioData = null,
                images = listOf(bitmap)
            )
            // Add the local model's response to conversation
            val localResponse = ConversationMessage(
                text = description,
                isUser = false
            )
            _messages.value = _messages.value + localResponse
            saveState()
            // Now use delegation to decide if we need more help
            // The delegation agent will see the image was processed and decide
            val followUp = if (text.isNotBlank()) "Is this answer complete and accurate?" else "Can you provide more details?"
            processTextWithDelegation(followUp)
        } catch (e: Exception) {
            Log.e(TAG, "Error processing image with delegation", e)
            _uiState.value = UIState.ERROR
            _responseText.value = "Error: ${e.message}"
        }
    }
    // Legacy onImageSelected body - now extracted to separate functions
    private suspend fun processImageWithLocalModelLegacy(bitmap: android.graphics.Bitmap, text: String) {
        try {
            if (!llmEngine.isLoaded()) {
                val modelPath = userSettings.modelPath.first()
                if (modelPath.isNotEmpty()) {
                    _responseText.value = "Loading model..."
                    val result = llmEngine.loadModel(modelPath)
                    result.onFailure { e ->
                        _uiState.value = UIState.ERROR
                        _responseText.value = "Failed to load model: ${e.message}"
                        return@processImageWithLocalModelLegacy
                    }
                    agent.prewarmCache()
                } else {
                    _uiState.value = UIState.ERROR
                    _responseText.value = "No model loaded. Please go to Settings and load a model."
                    return
                }
            }
@@ -587,9 +954,8 @@ class MainViewModel(
            Log.d(TAG, "Processing image with model...")
                // Send empty text with image - model will process image naturally
            agent.processInput(
-                    input = text, // Use the text the user typed (may be empty)
+                input = text,
                images = listOf(bitmap)
            ).collect { event ->
                when (event) {
@@ -136,8 +136,10 @@ fun SettingsScreen(
            ServerSection(
                searchServerUrl = uiState.searchServerUrl,
                delegateServerUrl = uiState.delegateServerUrl,
                serverAgentMode = uiState.serverAgentMode,
                onSearchServerChange = { viewModel.setSearchServerUrl(it) },
-                onDelegateServerChange = { viewModel.setDelegateServerUrl(it) }
+                onDelegateServerChange = { viewModel.setDelegateServerUrl(it) },
                onServerAgentModeChange = { viewModel.setServerAgentMode(it) }
            )
            HorizontalDivider()
@@ -391,8 +393,10 @@ private fun ModelCard(
 private fun ServerSection(
    searchServerUrl: String,
    delegateServerUrl: String,
    serverAgentMode: String,
    onSearchServerChange: (String) -> Unit,
-    onDelegateServerChange: (String) -> Unit
+    onDelegateServerChange: (String) -> Unit,
    onServerAgentModeChange: (String) -> Unit
 ) {
    Column(verticalArrangement = Arrangement.spacedBy(12.dp)) {
        Text(
@@ -420,6 +424,50 @@ private fun ServerSection(
            singleLine = true
        )
        // Server Agent Mode selector (only shown if delegate server is configured)
        if (delegateServerUrl.isNotEmpty()) {
            Text(
                text = "Agent Mode",
                style = MaterialTheme.typography.bodyMedium
            )
            // Mode selection buttons
            Row(
                modifier = Modifier.fillMaxWidth(),
                horizontalArrangement = Arrangement.spacedBy(8.dp)
            ) {
                ModeButton(
                    text = "Local Only",
                    selected = serverAgentMode == "local_only",
                    onClick = { onServerAgentModeChange("local_only") },
                    modifier = Modifier.weight(1f)
                )
                ModeButton(
                    text = "Full Remote",
                    selected = serverAgentMode == "full_remote",
                    onClick = { onServerAgentModeChange("full_remote") },
                    modifier = Modifier.weight(1f)
                )
                ModeButton(
                    text = "Smart",
                    selected = serverAgentMode == "delegation",
                    onClick = { onServerAgentModeChange("delegation") },
                    modifier = Modifier.weight(1f)
                )
            }
            Text(
                text = when (serverAgentMode) {
                    "local_only" -> "Uses only the local model on your device."
                    "full_remote" -> "Bypasses local model entirely. All queries go to the server."
                    "delegation" -> "Local model decides when to ask the server for help."
                    else -> ""
                },
                style = MaterialTheme.typography.bodySmall,
                color = MaterialTheme.colorScheme.onSurfaceVariant
            )
        }
        Text(
            text = "Leave empty to disable server features. URLs are saved automatically.",
            style = MaterialTheme.typography.bodySmall,
@@ -428,6 +476,31 @@ private fun ServerSection(
    }
 }
@Composable
 private fun ModeButton(
    text: String,
    selected: Boolean,
    onClick: () -> Unit,
    modifier: Modifier = Modifier
 ) {
    TextButton(
        onClick = onClick,
        modifier = modifier,
        colors = ButtonDefaults.textButtonColors(
            containerColor = if (selected) 
                MaterialTheme.colorScheme.primaryContainer 
            else 
                MaterialTheme.colorScheme.surfaceVariant,
            contentColor = if (selected)
                MaterialTheme.colorScheme.onPrimaryContainer
            else
                MaterialTheme.colorScheme.onSurfaceVariant
        )
    ) {
        Text(text, style = MaterialTheme.typography.labelMedium)
    }
 }
@Composable
 private fun TtsSection(
    enabled: Boolean,
@@ -35,6 +35,7 @@ data class SettingsUiState(
    val serverEnabled: Boolean = false,
    val searchServerUrl: String = "",
    val delegateServerUrl: String = "",
    val serverAgentMode: String = "local_only", // "local_only", "full_remote", "delegation"
    val searchServerHealthy: Boolean? = null,
    val delegateServerHealthy: Boolean? = null,
    val serverModels: List<String> = emptyList(),
@@ -197,6 +198,7 @@ class SettingsViewModel(
            val ttsEnabled = userSettings.ttsEnabled.first()
            val ttsAutoMode = userSettings.ttsAutoMode.first()
            val floatingButtonEnabled = userSettings.floatingButtonEnabled.first()
            val serverAgentMode = userSettings.serverAgentMode.first()
            val finalModelPath = if (modelPath.isEmpty() && ModelDownloadManager.isModelDownloaded(context)) {
                ModelDownloadManager.getModelFile(context).absolutePath
@@ -211,6 +213,7 @@ class SettingsViewModel(
                serverEnabled = serverEnabled,
                searchServerUrl = searchServerUrl,
                delegateServerUrl = delegateServerUrl,
                serverAgentMode = serverAgentMode,
                selectedModel = selectedModel,
                isLoading = false,
                modelLoaded = llmEngine.isLoaded(),
@@ -551,6 +554,14 @@ class SettingsViewModel(
        }
    }
    // Server agent mode
    fun setServerAgentMode(mode: String) {
        _uiState.value = _uiState.value.copy(serverAgentMode = mode)
        viewModelScope.launch {
            userSettings.setServerAgentMode(mode)
        }
    }
    // Floating button (experimental)
    fun setFloatingButtonEnabled(enabled: Boolean) {
        _uiState.value = _uiState.value.copy(floatingButtonEnabled = enabled)