From 8ab2e661eeda17def2196a1f87d7ade4c0dda735 Mon Sep 17 00:00:00 2001 From: Kaloyan Nikolov Date: Mon, 6 Apr 2026 18:52:17 +0200 Subject: [PATCH] Add server agent modes: full_remote and delegation New Features: - RemoteAgent: Full server-as-agent mode (bypasses local model) - DelegationAgent: Local model decides when to ask server for help - Server mode selector in Settings: Local Only / Full Remote / Smart - Updated MainViewModel to support all three modes for text, audio, and images - SettingsUiState and SettingsViewModel updated with serverAgentMode Modes: - local_only: Use only local Gemma 4 model (default) - full_remote: All queries go to delegate server (OpenAI-compatible API) - delegation: Local model classifies queries, delegates complex ones to server --- .../java/com/sleepy/agent/di/AppModule.kt | 3 +- .../sleepy/agent/inference/DelegationAgent.kt | 346 ++ .../com/sleepy/agent/inference/RemoteAgent.kt | 299 ++ .../com/sleepy/agent/settings/UserSettings.kt | 14 + .../sleepy/agent/ui/screens/MainViewModel.kt | 528 ++- .../sleepy/agent/ui/screens/SettingsScreen.kt | 77 +- .../agent/ui/screens/SettingsViewModel.kt | 11 + ..._b6e445ef-9683-4416-99eb-67a1e102c927.html | 4017 +++++++++++++++++ 8 files changed, 5211 insertions(+), 84 deletions(-) create mode 100644 app/src/main/java/com/sleepy/agent/inference/DelegationAgent.kt create mode 100644 app/src/main/java/com/sleepy/agent/inference/RemoteAgent.kt create mode 100644 pi-session-2026-04-06T10-08-42-731Z_b6e445ef-9683-4416-99eb-67a1e102c927.html diff --git a/app/src/main/java/com/sleepy/agent/di/AppModule.kt b/app/src/main/java/com/sleepy/agent/di/AppModule.kt index 11d07c7..4ee9025 100644 --- a/app/src/main/java/com/sleepy/agent/di/AppModule.kt +++ b/app/src/main/java/com/sleepy/agent/di/AppModule.kt @@ -95,7 +95,8 @@ class AppModule(private val context: Context) { agent = agent, llmEngine = llmEngine, userSettings = userSettings, - webSearchTool = webSearchTool + webSearchTool = webSearchTool, + httpClient = ktorClient ) } diff --git a/app/src/main/java/com/sleepy/agent/inference/DelegationAgent.kt b/app/src/main/java/com/sleepy/agent/inference/DelegationAgent.kt new file mode 100644 index 0000000..b715a62 --- /dev/null +++ b/app/src/main/java/com/sleepy/agent/inference/DelegationAgent.kt @@ -0,0 +1,346 @@ +package com.sleepy.agent.inference + +import android.util.Log +import io.ktor.client.HttpClient +import io.ktor.client.call.body +import io.ktor.client.plugins.timeout +import io.ktor.client.request.post +import io.ktor.client.request.setBody +import io.ktor.http.ContentType +import io.ktor.http.contentType +import kotlinx.coroutines.flow.Flow +import kotlinx.coroutines.flow.channelFlow +import kotlinx.coroutines.flow.collect +import kotlinx.coroutines.flow.flow +import kotlinx.coroutines.flow.onEach +import kotlinx.serialization.encodeToString +import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonObject +import kotlinx.serialization.json.buildJsonObject +import kotlinx.serialization.json.put +import kotlinx.serialization.json.putJsonArray +import kotlinx.serialization.json.putJsonObject + +/** + * Delegation Agent - Local model decides when to ask the big model for help. + * + * Flow: + * 1. User input โ†’ Local model (small, fast) + * 2. Local model classifies if it needs help: + * - Simple question (factual, from training data) โ†’ Answer directly + * * - Complex question (reasoning, current events) โ†’ Delegate to big model + * - Uncertain โ†’ Ask big model for help + * 3. If delegation needed: + * - Local model formulates a clear request + * - Request sent to big model (server) + * - Big model responds + * - Local model incorporates that into final answer + * 4. Final answer to user + */ +class DelegationAgent( + private val localEngine: LlmEngine, + private val httpClient: HttpClient, + private val delegateServerUrl: String +) { + companion object { + private const val TAG = "DelegationAgent" + private val json = Json { ignoreUnknownKeys = true } + + // Classification thresholds + private const val CONFIDENCE_THRESHOLD = 0.7f + } + + private var localConversation: Conversation? = null + + private val delegationPrompt = """ + You are a helpful assistant. Before answering, assess your confidence: + + Can you answer this confidently based on your training? Reply with EXACTLY one of: + + [DIRECT] - You know this well and can answer directly + [DELEGATE: question for big model] - You need help, provide a clear question for a smarter model + [CLARIFY] - You need more information from the user + + Your assessment: + """.trimIndent() + + private val synthesisPrompt = """ + You received help from a more knowledgeable model. Synthesize this into a helpful, + natural response for the user. Don't mention that you asked for help - just provide + the answer conversationally. + + User's original question: {user_question} + + Helpful information received: {delegated_response} + + Your response: + """.trimIndent() + + /** + * Process user input with delegation to big model when needed. + */ + suspend fun processWithDelegation( + userInput: String, + conversationHistory: List> = emptyList(), // (role, content) pairs + onStatusUpdate: ((String) -> Unit)? = null + ): Flow = channelFlow { + try { + // Step 1: Local model assesses confidence + onStatusUpdate?.invoke("Thinking...") + send(DelegationEvent.Status("Analyzing question...")) + + val classification = classifyQuestion(userInput, conversationHistory) + + when { + classification.startsWith("[DIRECT]") -> { + // Step 2a: Answer directly with local model + Log.d(TAG, "Answering directly") + onStatusUpdate?.invoke("Answering...") + send(DelegationEvent.Status("Answering directly...")) + + answerDirectly(userInput, conversationHistory).collect { event -> + when (event) { + is AgentEvent.Token -> send(DelegationEvent.Token(event.text)) + is AgentEvent.Complete -> send(DelegationEvent.Complete(event.response)) + is AgentEvent.Error -> send(DelegationEvent.Error(event.message)) + else -> {} + } + } + } + + classification.startsWith("[DELEGATE:") -> { + // Step 2b: Delegate to big model + val extractedQuestion = extractDelegateQuestion(classification) + Log.d(TAG, "Delegating to big model: $extractedQuestion") + onStatusUpdate?.invoke("Consulting expert model...") + send(DelegationEvent.Status("Consulting expert model...")) + + val delegatedResponse = queryBigModel(extractedQuestion, conversationHistory) + + // Step 3: Synthesize with local model + onStatusUpdate?.invoke("Synthesizing answer...") + send(DelegationEvent.Status("Synthesizing answer...")) + + synthesizeResponse(userInput, delegatedResponse, conversationHistory).collect { event -> + when (event) { + is AgentEvent.Token -> send(DelegationEvent.Token(event.text)) + is AgentEvent.Complete -> send(DelegationEvent.Complete(event.response)) + is AgentEvent.Error -> send(DelegationEvent.Error(event.message)) + else -> {} + } + } + } + + classification.startsWith("[CLARIFY]") -> { + // Step 2c: Ask user for clarification + Log.d(TAG, "Asking for clarification") + val clarificationRequest = classification.removePrefix("[CLARIFY]").trim() + .ifEmpty { "I need more information to help you. Could you provide more details about what you're looking for?" } + + send(DelegationEvent.Token(clarificationRequest)) + send(DelegationEvent.Complete(clarificationRequest)) + } + + else -> { + // Fallback: try direct answer + Log.w(TAG, "Unknown classification: $classification, falling back to direct") + answerDirectly(userInput, conversationHistory).collect { event -> + when (event) { + is AgentEvent.Token -> send(DelegationEvent.Token(event.text)) + is AgentEvent.Complete -> send(DelegationEvent.Complete(event.response)) + is AgentEvent.Error -> send(DelegationEvent.Error(event.message)) + else -> {} + } + } + } + } + + } catch (e: Exception) { + Log.e(TAG, "Error in delegation flow", e) + send(DelegationEvent.Error("Error: ${e.message}")) + } + } + + /** + * Quick classification without full generation. + */ + private suspend fun classifyQuestion( + userInput: String, + history: List> + ): String { + // Ensure conversation exists + if (localConversation?.isAlive != true) { + localConversation = localEngine.createConversation(delegationPrompt) + } + + val prompt = buildString { + history.takeLast(3).forEach { (role, content) -> + appendLine("$role: $content") + } + appendLine("User: $userInput") + appendLine() + append("Assessment: ") + } + + return try { + localEngine.generate( + conversation = localConversation!!, + prompt = prompt, + audioData = null, + images = null + ).trim() + } catch (e: Exception) { + Log.e(TAG, "Classification failed", e) + "[DIRECT]" // Fallback to direct answer + } + } + + /** + * Answer directly using local model. + */ + private fun answerDirectly( + userInput: String, + history: List> + ): Flow = flow { + if (localConversation?.isAlive != true) { + localConversation = localEngine.createConversation() + } + + val prompt = buildString { + history.takeLast(5).forEach { (role, content) -> + appendLine("$role: $content") + } + appendLine("User: $userInput") + appendLine() + append("Assistant: ") + } + + val response = localEngine.generate( + conversation = localConversation!!, + prompt = prompt, + audioData = null, + images = null + ) + + emit(AgentEvent.Token(response)) + emit(AgentEvent.Complete(response)) + } + + /** + * Query the big model on the server. + */ + private suspend fun queryBigModel( + question: String, + history: List> + ): String { + return try { + val requestBody = buildJsonObject { + putJsonArray("messages") { + history.forEach { (role, content) -> + addJsonObject { + put("role", if (role == "User") "user" else "assistant") + put("content", content) + } + } + addJsonObject { + put("role", "user") + put("content", question) + } + } + put("stream", false) + put("temperature", 0.7) + } + + val response: String = httpClient.post("$delegateServerUrl/v1/chat/completions") { + contentType(ContentType.Application.Json) + setBody(requestBody) + timeout { + requestTimeoutMillis = 120_000 + connectTimeoutMillis = 30_000 + } + }.body() + + parseServerResponse(response) + + } catch (e: Exception) { + Log.e(TAG, "Failed to query big model", e) + "I apologize, but I couldn't reach the expert model at this time. Let me try to help with what I know: [local model will attempt answer]" + } + } + + /** + * Synthesize the delegated response into a natural answer. + */ + private fun synthesizeResponse( + userQuestion: String, + delegatedResponse: String, + history: List> + ): Flow = flow { + if (localConversation?.isAlive != true) { + localConversation = localEngine.createConversation() + } + + val prompt = synthesisPrompt + .replace("{user_question}", userQuestion) + .replace("{delegated_response}", delegatedResponse) + + val response = localEngine.generate( + conversation = localConversation!!, + prompt = prompt, + audioData = null, + images = null + ) + + emit(AgentEvent.Token(response)) + emit(AgentEvent.Complete(response)) + } + + private fun extractDelegateQuestion(classification: String): String { + // Extract question from [DELEGATE: question here] + val start = classification.indexOf("[DELEGATE:") + if (start == -1) return classification + + val end = classification.indexOf("]", start + 10) + if (end == -1) return classification + + return classification.substring(start + 10, end).trim() + } + + private fun parseServerResponse(response: String): String { + return try { + val completion = json.decodeFromString(response) + completion.choices.firstOrNull()?.message?.content + ?: "I received information but couldn't parse it properly." + } catch (e: Exception) { + Log.w(TAG, "Failed to parse server response as JSON, returning raw") + response + } + } + + fun reset() { + localConversation?.close() + localConversation = null + } + + @kotlinx.serialization.Serializable + data class OpenAICompletion( + val choices: List + ) { + @kotlinx.serialization.Serializable + data class CompletionChoice( + val message: Message + ) { + @kotlinx.serialization.Serializable + data class Message( + val content: String + ) + } + } +} + +sealed class DelegationEvent { + data class Token(val text: String) : DelegationEvent() + data class Status(val message: String) : DelegationEvent() + data class Complete(val response: String) : DelegationEvent() + data class Error(val message: String) : DelegationEvent() +} diff --git a/app/src/main/java/com/sleepy/agent/inference/RemoteAgent.kt b/app/src/main/java/com/sleepy/agent/inference/RemoteAgent.kt new file mode 100644 index 0000000..be56e77 --- /dev/null +++ b/app/src/main/java/com/sleepy/agent/inference/RemoteAgent.kt @@ -0,0 +1,299 @@ +package com.sleepy.agent.inference + +import android.util.Log +import io.ktor.client.HttpClient +import io.ktor.client.call.body +import io.ktor.client.plugins.timeout +import io.ktor.client.request.post +import io.ktor.client.request.setBody +import io.ktor.http.ContentType +import io.ktor.http.contentType +import kotlinx.coroutines.flow.Flow +import kotlinx.coroutines.flow.channelFlow +import kotlinx.coroutines.flow.flow +import kotlinx.serialization.Serializable +import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonObject +import kotlinx.serialization.json.buildJsonObject +import kotlinx.serialization.json.put +import kotlinx.serialization.json.putJsonArray +import kotlinx.serialization.json.putJsonObject + +/** + * Remote agent that delegates all LLM calls to a server. + * This bypasses the local model entirely - useful for: + * 1. Using powerful server-side models when local model is insufficient + * 2. Testing the app UI without loading a local model + * 3. Fallback when local model fails + */ +class RemoteAgent( + private val httpClient: HttpClient, + private val baseUrl: String +) { + companion object { + private const val TAG = "RemoteAgent" + private val json = Json { ignoreUnknownKeys = true } + + // Supported API formats + enum class ApiFormat { + OPENAI_COMPATIBLE, // /v1/chat/completions + OLLAMA, // /api/generate or /api/chat + CUSTOM // Custom endpoint + } + } + + private val messageHistory = mutableListOf() + private var apiFormat = ApiFormat.OPENAI_COMPATIBLE + + data class RemoteMessage( + val role: String, // "system", "user", "assistant" + val content: String + ) + + /** + * Send a message to the remote server and get streaming response. + */ + suspend fun sendMessage( + message: String, + systemPrompt: String? = null, + stream: Boolean = true + ): Flow = channelFlow { + try { + // Add user message to history + messageHistory.add(RemoteMessage("user", message)) + + // Build request based on detected API format + val requestBody = when (apiFormat) { + ApiFormat.OPENAI_COMPATIBLE -> buildOpenAIRequest(systemPrompt, stream) + ApiFormat.OLLAMA -> buildOllamaRequest(systemPrompt, stream) + ApiFormat.CUSTOM -> buildCustomRequest(systemPrompt) + } + + Log.d(TAG, "Sending request to $baseUrl (format: $apiFormat)") + + val endpoint = when (apiFormat) { + ApiFormat.OPENAI_COMPATIBLE -> "$baseUrl/v1/chat/completions" + ApiFormat.OLLAMA -> "$baseUrl/api/chat" + ApiFormat.CUSTOM -> baseUrl + } + + val response: String = httpClient.post(endpoint) { + contentType(ContentType.Application.Json) + setBody(requestBody) + timeout { + requestTimeoutMillis = 120_000 // 2 minutes for generation + connectTimeoutMillis = 30_000 + } + }.body() + + if (stream) { + // Handle streaming response (SSE format) + handleStreamingResponse(response) + } else { + // Handle non-streaming response + handleNonStreamingResponse(response) + } + + } catch (e: Exception) { + Log.e(TAG, "Error calling remote server", e) + send(RemoteAgentEvent.Error("Server error: ${e.message}")) + } + } + + /** + * Quick check if server is available. + */ + suspend fun checkServer(): Boolean { + return try { + // Try to detect API format by probing endpoints + val openaiResponse = httpClient.post("$baseUrl/v1/models") { + timeout { requestTimeoutMillis = 5000 } + } + if (openaiResponse.status.value == 200) { + apiFormat = ApiFormat.OPENAI_COMPATIBLE + Log.d(TAG, "Detected OpenAI-compatible API") + return true + } + + val ollamaResponse = httpClient.post("$baseUrl/api/tags") { + timeout { requestTimeoutMillis = 5000 } + } + if (ollamaResponse.status.value == 200) { + apiFormat = ApiFormat.OLLAMA + Log.d(TAG, "Detected Ollama API") + return true + } + + // Assume custom if base URL responds + apiFormat = ApiFormat.CUSTOM + true + } catch (e: Exception) { + Log.e(TAG, "Server check failed", e) + false + } + } + + /** + * Clear conversation history. + */ + fun clearHistory() { + messageHistory.clear() + } + + private fun buildOpenAIRequest(systemPrompt: String?, stream: Boolean): JsonObject { + return buildJsonObject { + put("model", "local-model") // Server usually ignores this for single-model setups + putJsonArray("messages") { + // System message + systemPrompt?.let { + addJsonObject { + put("role", "system") + put("content", it) + } + } + // Conversation history + messageHistory.forEach { msg -> + addJsonObject { + put("role", msg.role) + put("content", msg.content) + } + } + } + put("stream", stream) + put("temperature", 0.7) + put("max_tokens", 4096) + } + } + + private fun buildOllamaRequest(systemPrompt: String?, stream: Boolean): JsonObject { + return buildJsonObject { + put("model", "local-model") + putJsonArray("messages") { + systemPrompt?.let { + addJsonObject { + put("role", "system") + put("content", it) + } + } + messageHistory.forEach { msg -> + addJsonObject { + put("role", msg.role) + put("content", msg.content) + } + } + } + put("stream", stream) + } + } + + private fun buildCustomRequest(systemPrompt: String?): JsonObject { + return buildJsonObject { + put("prompt", buildPromptWithHistory(systemPrompt)) + } + } + + private fun buildPromptWithHistory(systemPrompt: String?): String { + return buildString { + systemPrompt?.let { appendLine(it).appendLine() } + messageHistory.forEach { msg -> + when (msg.role) { + "user" -> appendLine("User: ${msg.content}") + "assistant" -> appendLine("Assistant: ${msg.content}") + } + } + appendLine("Assistant:") + } + } + + private suspend fun kotlinx.coroutines.channels.SendChannel.handleStreamingResponse(response: String) { + // Parse SSE format (Server-Sent Events) + val lines = response.lines() + val responseBuilder = StringBuilder() + + for (line in lines) { + when { + line.startsWith("data: ") -> { + val data = line.substring(6) + if (data == "[DONE]") { + // Stream complete + val fullResponse = responseBuilder.toString() + messageHistory.add(RemoteMessage("assistant", fullResponse)) + send(RemoteAgentEvent.Complete(fullResponse)) + return + } + + try { + val chunk = json.decodeFromString(data) + val content = chunk.choices.firstOrNull()?.delta?.content ?: "" + if (content.isNotEmpty()) { + responseBuilder.append(content) + send(RemoteAgentEvent.Token(content)) + } + } catch (e: Exception) { + Log.w(TAG, "Failed to parse chunk: $data") + } + } + } + } + + // If we get here without [DONE], return what we have + val fullResponse = responseBuilder.toString() + if (fullResponse.isNotEmpty()) { + messageHistory.add(RemoteMessage("assistant", fullResponse)) + send(RemoteAgentEvent.Complete(fullResponse)) + } + } + + private suspend fun kotlinx.coroutines.channels.SendChannel.handleNonStreamingResponse(response: String) { + try { + val completion = json.decodeFromString(response) + val content = completion.choices.firstOrNull()?.message?.content ?: "" + + messageHistory.add(RemoteMessage("assistant", content)) + + // Emit as single token for consistency + send(RemoteAgentEvent.Token(content)) + send(RemoteAgentEvent.Complete(content)) + } catch (e: Exception) { + Log.e(TAG, "Failed to parse response", e) + send(RemoteAgentEvent.Error("Failed to parse server response")) + } + } + + // Data classes for OpenAI-compatible API + @Serializable + data class OpenAIChunk( + val choices: List + ) { + @Serializable + data class Choice( + val delta: Delta + ) { + @Serializable + data class Delta( + val content: String? = null + ) + } + } + + @Serializable + data class OpenAICompletion( + val choices: List + ) { + @Serializable + data class CompletionChoice( + val message: Message + ) { + @Serializable + data class Message( + val content: String + ) + } + } +} + +sealed class RemoteAgentEvent { + data class Token(val text: String) : RemoteAgentEvent() + data class Complete(val response: String) : RemoteAgentEvent() + data class Error(val message: String) : RemoteAgentEvent() +} diff --git a/app/src/main/java/com/sleepy/agent/settings/UserSettings.kt b/app/src/main/java/com/sleepy/agent/settings/UserSettings.kt index 3431920..ff46e7c 100644 --- a/app/src/main/java/com/sleepy/agent/settings/UserSettings.kt +++ b/app/src/main/java/com/sleepy/agent/settings/UserSettings.kt @@ -23,6 +23,9 @@ class UserSettings( val MODEL_SOURCE = stringPreferencesKey("model_source") val SELECTED_SERVER_MODEL = stringPreferencesKey("selected_server_model") + // Server agent mode: "local_only", "full_remote", "delegation" + val SERVER_AGENT_MODE = stringPreferencesKey("server_agent_mode") + // TTS settings val TTS_ENABLED = booleanPreferencesKey("tts_enabled") val TTS_AUTO_MODE = booleanPreferencesKey("tts_auto_mode") @@ -54,6 +57,11 @@ class UserSettings( val enableServerDelegation: Flow = dataStore.data.map { prefs -> prefs[ENABLE_SERVER_DELEGATION] ?: false } + + // Server agent mode: "local_only" (default), "full_remote", "delegation" + val serverAgentMode: Flow = dataStore.data.map { prefs -> + prefs[SERVER_AGENT_MODE] ?: "local_only" + } val modelSource: Flow = dataStore.data.map { prefs -> prefs[MODEL_SOURCE]?.let { ModelSource.valueOf(it) } ?: ModelSource.FILE_PATH @@ -105,6 +113,12 @@ class UserSettings( prefs[ENABLE_SERVER_DELEGATION] = enabled } } + + suspend fun setServerAgentMode(mode: String) { + dataStore.edit { prefs -> + prefs[SERVER_AGENT_MODE] = mode + } + } suspend fun setModelSource(source: ModelSource) { dataStore.edit { prefs -> diff --git a/app/src/main/java/com/sleepy/agent/ui/screens/MainViewModel.kt b/app/src/main/java/com/sleepy/agent/ui/screens/MainViewModel.kt index 07ef8b8..9d58cfa 100644 --- a/app/src/main/java/com/sleepy/agent/ui/screens/MainViewModel.kt +++ b/app/src/main/java/com/sleepy/agent/ui/screens/MainViewModel.kt @@ -12,7 +12,11 @@ import com.sleepy.agent.data.ConversationStorage import com.sleepy.agent.download.ModelDownloadManager import com.sleepy.agent.inference.Agent import com.sleepy.agent.inference.AgentEvent +import com.sleepy.agent.inference.DelegationAgent +import com.sleepy.agent.inference.DelegationEvent import com.sleepy.agent.inference.LlmEngine +import com.sleepy.agent.inference.RemoteAgent +import com.sleepy.agent.inference.RemoteAgentEvent import com.sleepy.agent.settings.UserSettings import com.sleepy.agent.tools.WebSearchTool import kotlinx.coroutines.flow.MutableStateFlow @@ -50,7 +54,8 @@ class MainViewModel( private val agent: Agent, private val llmEngine: LlmEngine, private val userSettings: UserSettings, - private val webSearchTool: WebSearchTool + private val webSearchTool: WebSearchTool, + private val httpClient: io.ktor.client.HttpClient ) : ViewModel() { private val conversationStorage = ConversationStorage(context) @@ -75,6 +80,10 @@ class MainViewModel( // Track if user started with voice or text for TTS auto mode private var firstInputWasVoice: Boolean? = null + // Remote agents for server modes + private var remoteAgent: RemoteAgent? = null + private var delegationAgent: DelegationAgent? = null + companion object { private const val TAG = "MainViewModel" private const val KEY_MESSAGES = "messages" @@ -113,6 +122,17 @@ class MainViewModel( Log.d(TAG, "Updated web search URL to: $url") } } + + // Initialize remote agents when delegate server URL changes + viewModelScope.launch { + userSettings.delegateServerUrl.collect { url -> + if (url.isNotEmpty()) { + remoteAgent = RemoteAgent(httpClient, url) + delegationAgent = DelegationAgent(llmEngine, httpClient, url) + Log.d(TAG, "Initialized remote agents with server: $url") + } + } + } } private fun restoreState() { @@ -250,12 +270,11 @@ class MainViewModel( _uiState.value = UIState.PROCESSING - val useServer = userSettings.enableServerDelegation.first() + val mode = userSettings.serverAgentMode.first() - if (useServer) { - processAudioWithServer(audioData) - } else { - processAudioWithLocalModel(audioData) + when (mode) { + "full_remote", "delegation" -> processAudioWithServer(audioData) + else -> processAudioWithLocalModel(audioData) } } } @@ -350,14 +369,68 @@ class MainViewModel( _messages.value = _messages.value + userMessage saveState() - val aiMessage = ConversationMessage( - text = "Server mode doesn't support native audio understanding yet. Please use local model for voice input.", - isUser = false - ) - _messages.value = _messages.value + aiMessage - saveState() - _uiState.value = UIState.IDLE + _uiState.value = UIState.PROCESSING + + // For server mode with audio, we need to either: + // 1. Use local model to transcribe, then send text to server + // 2. Send audio to server if it supports it + // For now, transcribe locally first + + if (!llmEngine.isLoaded()) { + val modelPath = userSettings.modelPath.first() + if (modelPath.isNotEmpty()) { + _responseText.value = "Loading model for transcription..." + val result = llmEngine.loadModel(modelPath) + result.onFailure { e -> + _uiState.value = UIState.ERROR + _responseText.value = "Failed to load model: ${e.message}" + return@processAudioWithServer + } + } else { + _uiState.value = UIState.ERROR + _responseText.value = "No model loaded for transcription. Please load a model first." + return + } + } + + try { + // First, transcribe the audio locally + val transcription = llmEngine.generate( + conversation = ensureConversation(), + prompt = "Transcribe this audio:", + audioData = audioData, + images = null + ) + + Log.d(TAG, "Transcribed: $transcription") + + // Update the user message with transcription + val updatedMessages = _messages.value.toMutableList() + updatedMessages[updatedMessages.size - 1] = userMessage.copy( + text = "๐ŸŽค \"$transcription\"" + ) + _messages.value = updatedMessages + saveState() + + // Now process the transcribed text with the server + val mode = userSettings.serverAgentMode.first() + when (mode) { + "full_remote" -> processTextWithRemoteAgent(transcription) + "delegation" -> processTextWithDelegation(transcription) + else -> processTextWithLocalModel(transcription) + } + + } catch (e: Exception) { + Log.e(TAG, "Error processing audio with server", e) + _uiState.value = UIState.ERROR + _responseText.value = "Error: ${e.message}" + } } + + private fun ensureConversation() = conversation?.takeIf { it.isAlive } + ?: llmEngine.createConversation().also { conversation = it } + + private var conversation: com.sleepy.agent.inference.Conversation? = null fun sendTextMessage(text: String) { viewModelScope.launch { @@ -372,13 +445,13 @@ class MainViewModel( Log.d(TAG, "First input was text - TTS auto-disabled") } - val useServer = userSettings.enableServerDelegation.first() - Log.d(TAG, "useServer: $useServer") + val mode = userSettings.serverAgentMode.first() + Log.d(TAG, "Server agent mode: $mode") - if (useServer) { - processTextWithServer(text) - } else { - processTextWithLocalModel(text) + when (mode) { + "full_remote" -> processTextWithRemoteAgent(text) + "delegation" -> processTextWithDelegation(text) + else -> processTextWithLocalModel(text) } } } @@ -493,7 +566,10 @@ class MainViewModel( } } - private suspend fun processTextWithServer(text: String) { + /** + * Full remote mode - bypass local model entirely, use server as the agent. + */ + private suspend fun processTextWithRemoteAgent(text: String) { val userMessage = ConversationMessage( text = text, isUser = true @@ -503,13 +579,116 @@ class MainViewModel( _uiState.value = UIState.PROCESSING - val aiMessage = ConversationMessage( - text = "Server mode not yet implemented. Please use local model.", - isUser = false + val remote = remoteAgent + if (remote == null) { + _uiState.value = UIState.ERROR + _responseText.value = "No server configured. Please set a delegate server URL in Settings." + return + } + + try { + val responseBuilder = StringBuilder() + + remote.sendMessage( + message = text, + systemPrompt = "You are a helpful AI assistant." + ).collect { event -> + when (event) { + is RemoteAgentEvent.Token -> { + responseBuilder.append(event.text) + _responseText.value = responseBuilder.toString() + _uiState.value = UIState.SPEAKING + } + is RemoteAgentEvent.Complete -> { + val aiMessage = ConversationMessage( + text = event.response, + isUser = false + ) + _messages.value = _messages.value + aiMessage + saveState() + + speakResponse(event.response) + _uiState.value = UIState.IDLE + } + is RemoteAgentEvent.Error -> { + _responseText.value = "Error: ${event.message}" + _uiState.value = UIState.ERROR + } + } + } + } catch (e: Exception) { + Log.e(TAG, "Error in remote agent", e) + _uiState.value = UIState.ERROR + _responseText.value = "Server error: ${e.message}" + } + } + + /** + * Delegation mode - local model decides when to ask big model for help. + */ + private suspend fun processTextWithDelegation(text: String) { + val userMessage = ConversationMessage( + text = text, + isUser = true ) - _messages.value = _messages.value + aiMessage + _messages.value = _messages.value + userMessage saveState() - _uiState.value = UIState.IDLE + + _uiState.value = UIState.PROCESSING + + val delegation = delegationAgent + if (delegation == null) { + _uiState.value = UIState.ERROR + _responseText.value = "Delegation not available. Please set a delegate server URL in Settings." + return + } + + try { + val responseBuilder = StringBuilder() + + delegation.processWithDelegation( + userInput = text, + conversationHistory = _messages.value.map { it.isUser to it.text } + ).collect { event -> + when (event) { + is DelegationEvent.Token -> { + responseBuilder.append(event.text) + _responseText.value = responseBuilder.toString() + _uiState.value = UIState.SPEAKING + } + is DelegationEvent.Status -> { + _responseText.value = event.message + } + is DelegationEvent.Complete -> { + val aiMessage = ConversationMessage( + text = event.response, + isUser = false + ) + _messages.value = _messages.value + aiMessage + saveState() + + speakResponse(event.response) + _uiState.value = UIState.IDLE + } + is DelegationEvent.Error -> { + _responseText.value = "Error: ${event.message}" + _uiState.value = UIState.ERROR + } + } + } + } catch (e: Exception) { + Log.e(TAG, "Error in delegation agent", e) + _uiState.value = UIState.ERROR + _responseText.value = "Error: ${e.message}" + } + } + + /** + * Legacy server delegation - now redirects to appropriate mode. + */ + private suspend fun processTextWithServer(text: String) { + // Use full remote mode by default for legacy "use server" setting + processTextWithRemoteAgent(text) } fun setResponse(text: String) { @@ -542,15 +721,15 @@ class MainViewModel( setError("Failed to load image") return } - + // Validate bitmap if (bitmap.width == 0 || bitmap.height == 0) { setError("Invalid image dimensions") return } - + Log.d(TAG, "Image selected: ${bitmap.width}x${bitmap.height}, text: '$text'") - + viewModelScope.launch { // Add image message to chat (with text if provided) val displayText = if (text.isNotBlank()) "๐Ÿ–ผ๏ธ $text" else "๐Ÿ–ผ๏ธ [Image]" @@ -560,64 +739,251 @@ class MainViewModel( ) _messages.value = _messages.value + userMessage saveState() - + firstInputWasVoice = false // Image is not voice input _uiState.value = UIState.PROCESSING - - try { - if (!llmEngine.isLoaded()) { - val modelPath = userSettings.modelPath.first() - if (modelPath.isNotEmpty()) { - _responseText.value = "Loading model..." - val result = llmEngine.loadModel(modelPath) - result.onFailure { e -> - _uiState.value = UIState.ERROR - _responseText.value = "Failed to load model: ${e.message}" - return@launch - } - agent.prewarmCache() - } else { + + // Check server mode + val mode = userSettings.serverAgentMode.first() + + // For server modes, we need a local model to process the image first + // Then send the description/results to the server + when (mode) { + "full_remote" -> processImageWithRemoteAgent(bitmap, text) + "delegation" -> processImageWithDelegation(bitmap, text) + else -> processImageWithLocalModel(bitmap, text) + } + } + } + + private suspend fun processImageWithLocalModel(bitmap: android.graphics.Bitmap, text: String) { + try { + if (!llmEngine.isLoaded()) { + val modelPath = userSettings.modelPath.first() + if (modelPath.isNotEmpty()) { + _responseText.value = "Loading model..." + val result = llmEngine.loadModel(modelPath) + result.onFailure { e -> _uiState.value = UIState.ERROR - _responseText.value = "No model loaded. Please go to Settings and load a model." - return@launch + _responseText.value = "Failed to load model: ${e.message}" + return@processImageWithLocalModel } + agent.prewarmCache() + } else { + _uiState.value = UIState.ERROR + _responseText.value = "No model loaded. Please go to Settings and load a model." + return } - - val responseBuilder = StringBuilder() - - Log.d(TAG, "Processing image with model...") - - // Send empty text with image - model will process image naturally - agent.processInput( - input = text, // Use the text the user typed (may be empty) - images = listOf(bitmap) - ).collect { event -> - when (event) { - is AgentEvent.Token -> { - responseBuilder.append(event.text) - _responseText.value = responseBuilder.toString() - _uiState.value = UIState.SPEAKING - } - - is AgentEvent.ExecutingTool -> { - _uiState.value = UIState.EXECUTING_TOOL - _responseText.value = "๐Ÿ”ง Using ${event.toolName}..." - } - - is AgentEvent.ToolResult -> { - // Tool completed - } - - is AgentEvent.Complete -> { - val aiMessage = ConversationMessage( - text = event.response, - isUser = false - ) - _messages.value = _messages.value + aiMessage - saveState() - - speakResponse(event.response) - _uiState.value = UIState.IDLE + } + + val responseBuilder = StringBuilder() + + Log.d(TAG, "Processing image with local model...") + + agent.processInput( + input = text, + images = listOf(bitmap) + ).collect { event -> + when (event) { + is AgentEvent.Token -> { + responseBuilder.append(event.text) + _responseText.value = responseBuilder.toString() + _uiState.value = UIState.SPEAKING + } + + is AgentEvent.ExecutingTool -> { + _uiState.value = UIState.EXECUTING_TOOL + _responseText.value = "๐Ÿ”ง Using ${event.toolName}..." + } + + is AgentEvent.ToolResult -> { + // Tool completed + } + + is AgentEvent.Complete -> { + val aiMessage = ConversationMessage( + text = event.response, + isUser = false + ) + _messages.value = _messages.value + aiMessage + saveState() + + speakResponse(event.response) + _uiState.value = UIState.IDLE + } + + is AgentEvent.Error -> { + _responseText.value = "Error: ${event.message}" + _uiState.value = UIState.ERROR + } + + else -> {} + } + } + } catch (e: Exception) { + Log.e(TAG, "Error processing image", e) + _uiState.value = UIState.ERROR + _responseText.value = "Error processing image: ${e.message}" + + val errorMessage = ConversationMessage( + text = "โŒ Failed to process image: ${e.message}", + isUser = false + ) + _messages.value = _messages.value + errorMessage + saveState() + } + } + + private suspend fun processImageWithRemoteAgent(bitmap: android.graphics.Bitmap, text: String) { + try { + // First, get image description from local model + if (!llmEngine.isLoaded()) { + val modelPath = userSettings.modelPath.first() + if (modelPath.isNotEmpty()) { + _responseText.value = "Analyzing image..." + val result = llmEngine.loadModel(modelPath) + result.onFailure { e -> + _uiState.value = UIState.ERROR + _responseText.value = "Failed to load model: ${e.message}" + return@processImageWithRemoteAgent + } + } else { + _uiState.value = UIState.ERROR + _responseText.value = "No model loaded for image analysis. Please load a model first." + return + } + } + + // Get image description from local model + val description = llmEngine.generate( + conversation = ensureConversation(), + prompt = "Describe this image in detail:", + audioData = null, + images = listOf(bitmap) + ) + + Log.d(TAG, "Image description: $description") + + // Now send description + user text to remote agent + val fullPrompt = if (text.isNotBlank()) { + "User question about image: $text\n\nImage description: $description" + } else { + "Describe this image: $description" + } + + processTextWithRemoteAgent(fullPrompt) + + } catch (e: Exception) { + Log.e(TAG, "Error processing image with remote agent", e) + _uiState.value = UIState.ERROR + _responseText.value = "Error: ${e.message}" + } + } + + private suspend fun processImageWithDelegation(bitmap: android.graphics.Bitmap, text: String) { + // For delegation mode, process image locally first + // The delegation agent will decide if server help is needed based on the description + try { + if (!llmEngine.isLoaded()) { + val modelPath = userSettings.modelPath.first() + if (modelPath.isNotEmpty()) { + _responseText.value = "Analyzing image..." + val result = llmEngine.loadModel(modelPath) + result.onFailure { e -> + _uiState.value = UIState.ERROR + _responseText.value = "Failed to load model: ${e.message}" + return@processImageWithDelegation + } + } else { + _uiState.value = UIState.ERROR + _responseText.value = "No model loaded for image analysis. Please load a model first." + return + } + } + + // Get initial analysis from local model + val description = llmEngine.generate( + conversation = ensureConversation(), + prompt = if (text.isNotBlank()) "Analyze this image and answer: $text" else "Describe this image:", + audioData = null, + images = listOf(bitmap) + ) + + // Add the local model's response to conversation + val localResponse = ConversationMessage( + text = description, + isUser = false + ) + _messages.value = _messages.value + localResponse + saveState() + + // Now use delegation to decide if we need more help + // The delegation agent will see the image was processed and decide + val followUp = if (text.isNotBlank()) "Is this answer complete and accurate?" else "Can you provide more details?" + processTextWithDelegation(followUp) + + } catch (e: Exception) { + Log.e(TAG, "Error processing image with delegation", e) + _uiState.value = UIState.ERROR + _responseText.value = "Error: ${e.message}" + } + } + + // Legacy onImageSelected body - now extracted to separate functions + private suspend fun processImageWithLocalModelLegacy(bitmap: android.graphics.Bitmap, text: String) { + try { + if (!llmEngine.isLoaded()) { + val modelPath = userSettings.modelPath.first() + if (modelPath.isNotEmpty()) { + _responseText.value = "Loading model..." + val result = llmEngine.loadModel(modelPath) + result.onFailure { e -> + _uiState.value = UIState.ERROR + _responseText.value = "Failed to load model: ${e.message}" + return@processImageWithLocalModelLegacy + } + agent.prewarmCache() + } else { + _uiState.value = UIState.ERROR + _responseText.value = "No model loaded. Please go to Settings and load a model." + return + } + } + + val responseBuilder = StringBuilder() + + Log.d(TAG, "Processing image with model...") + + agent.processInput( + input = text, + images = listOf(bitmap) + ).collect { event -> + when (event) { + is AgentEvent.Token -> { + responseBuilder.append(event.text) + _responseText.value = responseBuilder.toString() + _uiState.value = UIState.SPEAKING + } + + is AgentEvent.ExecutingTool -> { + _uiState.value = UIState.EXECUTING_TOOL + _responseText.value = "๐Ÿ”ง Using ${event.toolName}..." + } + + is AgentEvent.ToolResult -> { + // Tool completed + } + + is AgentEvent.Complete -> { + val aiMessage = ConversationMessage( + text = event.response, + isUser = false + ) + _messages.value = _messages.value + aiMessage + saveState() + + speakResponse(event.response) + _uiState.value = UIState.IDLE } is AgentEvent.Error -> { diff --git a/app/src/main/java/com/sleepy/agent/ui/screens/SettingsScreen.kt b/app/src/main/java/com/sleepy/agent/ui/screens/SettingsScreen.kt index 6298a40..1f05625 100644 --- a/app/src/main/java/com/sleepy/agent/ui/screens/SettingsScreen.kt +++ b/app/src/main/java/com/sleepy/agent/ui/screens/SettingsScreen.kt @@ -136,8 +136,10 @@ fun SettingsScreen( ServerSection( searchServerUrl = uiState.searchServerUrl, delegateServerUrl = uiState.delegateServerUrl, + serverAgentMode = uiState.serverAgentMode, onSearchServerChange = { viewModel.setSearchServerUrl(it) }, - onDelegateServerChange = { viewModel.setDelegateServerUrl(it) } + onDelegateServerChange = { viewModel.setDelegateServerUrl(it) }, + onServerAgentModeChange = { viewModel.setServerAgentMode(it) } ) HorizontalDivider() @@ -391,8 +393,10 @@ private fun ModelCard( private fun ServerSection( searchServerUrl: String, delegateServerUrl: String, + serverAgentMode: String, onSearchServerChange: (String) -> Unit, - onDelegateServerChange: (String) -> Unit + onDelegateServerChange: (String) -> Unit, + onServerAgentModeChange: (String) -> Unit ) { Column(verticalArrangement = Arrangement.spacedBy(12.dp)) { Text( @@ -420,6 +424,50 @@ private fun ServerSection( singleLine = true ) + // Server Agent Mode selector (only shown if delegate server is configured) + if (delegateServerUrl.isNotEmpty()) { + Text( + text = "Agent Mode", + style = MaterialTheme.typography.bodyMedium + ) + + // Mode selection buttons + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.spacedBy(8.dp) + ) { + ModeButton( + text = "Local Only", + selected = serverAgentMode == "local_only", + onClick = { onServerAgentModeChange("local_only") }, + modifier = Modifier.weight(1f) + ) + ModeButton( + text = "Full Remote", + selected = serverAgentMode == "full_remote", + onClick = { onServerAgentModeChange("full_remote") }, + modifier = Modifier.weight(1f) + ) + ModeButton( + text = "Smart", + selected = serverAgentMode == "delegation", + onClick = { onServerAgentModeChange("delegation") }, + modifier = Modifier.weight(1f) + ) + } + + Text( + text = when (serverAgentMode) { + "local_only" -> "Uses only the local model on your device." + "full_remote" -> "Bypasses local model entirely. All queries go to the server." + "delegation" -> "Local model decides when to ask the server for help." + else -> "" + }, + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurfaceVariant + ) + } + Text( text = "Leave empty to disable server features. URLs are saved automatically.", style = MaterialTheme.typography.bodySmall, @@ -428,6 +476,31 @@ private fun ServerSection( } } +@Composable +private fun ModeButton( + text: String, + selected: Boolean, + onClick: () -> Unit, + modifier: Modifier = Modifier +) { + TextButton( + onClick = onClick, + modifier = modifier, + colors = ButtonDefaults.textButtonColors( + containerColor = if (selected) + MaterialTheme.colorScheme.primaryContainer + else + MaterialTheme.colorScheme.surfaceVariant, + contentColor = if (selected) + MaterialTheme.colorScheme.onPrimaryContainer + else + MaterialTheme.colorScheme.onSurfaceVariant + ) + ) { + Text(text, style = MaterialTheme.typography.labelMedium) + } +} + @Composable private fun TtsSection( enabled: Boolean, diff --git a/app/src/main/java/com/sleepy/agent/ui/screens/SettingsViewModel.kt b/app/src/main/java/com/sleepy/agent/ui/screens/SettingsViewModel.kt index d89da1c..c24e6f3 100644 --- a/app/src/main/java/com/sleepy/agent/ui/screens/SettingsViewModel.kt +++ b/app/src/main/java/com/sleepy/agent/ui/screens/SettingsViewModel.kt @@ -35,6 +35,7 @@ data class SettingsUiState( val serverEnabled: Boolean = false, val searchServerUrl: String = "", val delegateServerUrl: String = "", + val serverAgentMode: String = "local_only", // "local_only", "full_remote", "delegation" val searchServerHealthy: Boolean? = null, val delegateServerHealthy: Boolean? = null, val serverModels: List = emptyList(), @@ -197,6 +198,7 @@ class SettingsViewModel( val ttsEnabled = userSettings.ttsEnabled.first() val ttsAutoMode = userSettings.ttsAutoMode.first() val floatingButtonEnabled = userSettings.floatingButtonEnabled.first() + val serverAgentMode = userSettings.serverAgentMode.first() val finalModelPath = if (modelPath.isEmpty() && ModelDownloadManager.isModelDownloaded(context)) { ModelDownloadManager.getModelFile(context).absolutePath @@ -211,6 +213,7 @@ class SettingsViewModel( serverEnabled = serverEnabled, searchServerUrl = searchServerUrl, delegateServerUrl = delegateServerUrl, + serverAgentMode = serverAgentMode, selectedModel = selectedModel, isLoading = false, modelLoaded = llmEngine.isLoaded(), @@ -551,6 +554,14 @@ class SettingsViewModel( } } + // Server agent mode + fun setServerAgentMode(mode: String) { + _uiState.value = _uiState.value.copy(serverAgentMode = mode) + viewModelScope.launch { + userSettings.setServerAgentMode(mode) + } + } + // Floating button (experimental) fun setFloatingButtonEnabled(enabled: Boolean) { _uiState.value = _uiState.value.copy(floatingButtonEnabled = enabled) diff --git a/pi-session-2026-04-06T10-08-42-731Z_b6e445ef-9683-4416-99eb-67a1e102c927.html b/pi-session-2026-04-06T10-08-42-731Z_b6e445ef-9683-4416-99eb-67a1e102c927.html new file mode 100644 index 0000000..0b006f7 --- /dev/null +++ b/pi-session-2026-04-06T10-08-42-731Z_b6e445ef-9683-4416-99eb-67a1e102c927.html @@ -0,0 +1,4017 @@ + + + + + + Session Export + + + + + +
+ + +
+
+
+
+
+ +
+
+ + + + + + + + + + + + +