Add server agent modes: full_remote and delegation

New Features:
- RemoteAgent: Full server-as-agent mode (bypasses local model)
- DelegationAgent: Local model decides when to ask server for help
- Server mode selector in Settings: Local Only / Full Remote / Smart
- Updated MainViewModel to support all three modes for text, audio, and images
- SettingsUiState and SettingsViewModel updated with serverAgentMode

Modes:
- local_only: Use only local Gemma 4 model (default)
- full_remote: All queries go to delegate server (OpenAI-compatible API)
- delegation: Local model classifies queries, delegates complex ones to server
This commit is contained in:
2026-04-06 18:52:17 +02:00
parent 47df14c952
commit 8ab2e661ee
8 changed files with 5211 additions and 84 deletions
@@ -95,7 +95,8 @@ class AppModule(private val context: Context) {
agent = agent, agent = agent,
llmEngine = llmEngine, llmEngine = llmEngine,
userSettings = userSettings, userSettings = userSettings,
webSearchTool = webSearchTool webSearchTool = webSearchTool,
httpClient = ktorClient
) )
} }
@@ -0,0 +1,346 @@
package com.sleepy.agent.inference
import android.util.Log
import io.ktor.client.HttpClient
import io.ktor.client.call.body
import io.ktor.client.plugins.timeout
import io.ktor.client.request.post
import io.ktor.client.request.setBody
import io.ktor.http.ContentType
import io.ktor.http.contentType
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.channelFlow
import kotlinx.coroutines.flow.collect
import kotlinx.coroutines.flow.flow
import kotlinx.coroutines.flow.onEach
import kotlinx.serialization.encodeToString
import kotlinx.serialization.json.Json
import kotlinx.serialization.json.JsonObject
import kotlinx.serialization.json.buildJsonObject
import kotlinx.serialization.json.put
import kotlinx.serialization.json.putJsonArray
import kotlinx.serialization.json.putJsonObject
/**
* Delegation Agent - Local model decides when to ask the big model for help.
*
* Flow:
* 1. User input → Local model (small, fast)
* 2. Local model classifies if it needs help:
* - Simple question (factual, from training data) → Answer directly
* * - Complex question (reasoning, current events) → Delegate to big model
* - Uncertain → Ask big model for help
* 3. If delegation needed:
* - Local model formulates a clear request
* - Request sent to big model (server)
* - Big model responds
* - Local model incorporates that into final answer
* 4. Final answer to user
*/
class DelegationAgent(
private val localEngine: LlmEngine,
private val httpClient: HttpClient,
private val delegateServerUrl: String
) {
companion object {
private const val TAG = "DelegationAgent"
private val json = Json { ignoreUnknownKeys = true }
// Classification thresholds
private const val CONFIDENCE_THRESHOLD = 0.7f
}
private var localConversation: Conversation? = null
private val delegationPrompt = """
You are a helpful assistant. Before answering, assess your confidence:
Can you answer this confidently based on your training? Reply with EXACTLY one of:
[DIRECT] - You know this well and can answer directly
[DELEGATE: question for big model] - You need help, provide a clear question for a smarter model
[CLARIFY] - You need more information from the user
Your assessment:
""".trimIndent()
private val synthesisPrompt = """
You received help from a more knowledgeable model. Synthesize this into a helpful,
natural response for the user. Don't mention that you asked for help - just provide
the answer conversationally.
User's original question: {user_question}
Helpful information received: {delegated_response}
Your response:
""".trimIndent()
/**
* Process user input with delegation to big model when needed.
*/
suspend fun processWithDelegation(
userInput: String,
conversationHistory: List<Pair<String, String>> = emptyList(), // (role, content) pairs
onStatusUpdate: ((String) -> Unit)? = null
): Flow<DelegationEvent> = channelFlow {
try {
// Step 1: Local model assesses confidence
onStatusUpdate?.invoke("Thinking...")
send(DelegationEvent.Status("Analyzing question..."))
val classification = classifyQuestion(userInput, conversationHistory)
when {
classification.startsWith("[DIRECT]") -> {
// Step 2a: Answer directly with local model
Log.d(TAG, "Answering directly")
onStatusUpdate?.invoke("Answering...")
send(DelegationEvent.Status("Answering directly..."))
answerDirectly(userInput, conversationHistory).collect { event ->
when (event) {
is AgentEvent.Token -> send(DelegationEvent.Token(event.text))
is AgentEvent.Complete -> send(DelegationEvent.Complete(event.response))
is AgentEvent.Error -> send(DelegationEvent.Error(event.message))
else -> {}
}
}
}
classification.startsWith("[DELEGATE:") -> {
// Step 2b: Delegate to big model
val extractedQuestion = extractDelegateQuestion(classification)
Log.d(TAG, "Delegating to big model: $extractedQuestion")
onStatusUpdate?.invoke("Consulting expert model...")
send(DelegationEvent.Status("Consulting expert model..."))
val delegatedResponse = queryBigModel(extractedQuestion, conversationHistory)
// Step 3: Synthesize with local model
onStatusUpdate?.invoke("Synthesizing answer...")
send(DelegationEvent.Status("Synthesizing answer..."))
synthesizeResponse(userInput, delegatedResponse, conversationHistory).collect { event ->
when (event) {
is AgentEvent.Token -> send(DelegationEvent.Token(event.text))
is AgentEvent.Complete -> send(DelegationEvent.Complete(event.response))
is AgentEvent.Error -> send(DelegationEvent.Error(event.message))
else -> {}
}
}
}
classification.startsWith("[CLARIFY]") -> {
// Step 2c: Ask user for clarification
Log.d(TAG, "Asking for clarification")
val clarificationRequest = classification.removePrefix("[CLARIFY]").trim()
.ifEmpty { "I need more information to help you. Could you provide more details about what you're looking for?" }
send(DelegationEvent.Token(clarificationRequest))
send(DelegationEvent.Complete(clarificationRequest))
}
else -> {
// Fallback: try direct answer
Log.w(TAG, "Unknown classification: $classification, falling back to direct")
answerDirectly(userInput, conversationHistory).collect { event ->
when (event) {
is AgentEvent.Token -> send(DelegationEvent.Token(event.text))
is AgentEvent.Complete -> send(DelegationEvent.Complete(event.response))
is AgentEvent.Error -> send(DelegationEvent.Error(event.message))
else -> {}
}
}
}
}
} catch (e: Exception) {
Log.e(TAG, "Error in delegation flow", e)
send(DelegationEvent.Error("Error: ${e.message}"))
}
}
/**
* Quick classification without full generation.
*/
private suspend fun classifyQuestion(
userInput: String,
history: List<Pair<String, String>>
): String {
// Ensure conversation exists
if (localConversation?.isAlive != true) {
localConversation = localEngine.createConversation(delegationPrompt)
}
val prompt = buildString {
history.takeLast(3).forEach { (role, content) ->
appendLine("$role: $content")
}
appendLine("User: $userInput")
appendLine()
append("Assessment: ")
}
return try {
localEngine.generate(
conversation = localConversation!!,
prompt = prompt,
audioData = null,
images = null
).trim()
} catch (e: Exception) {
Log.e(TAG, "Classification failed", e)
"[DIRECT]" // Fallback to direct answer
}
}
/**
* Answer directly using local model.
*/
private fun answerDirectly(
userInput: String,
history: List<Pair<String, String>>
): Flow<AgentEvent> = flow {
if (localConversation?.isAlive != true) {
localConversation = localEngine.createConversation()
}
val prompt = buildString {
history.takeLast(5).forEach { (role, content) ->
appendLine("$role: $content")
}
appendLine("User: $userInput")
appendLine()
append("Assistant: ")
}
val response = localEngine.generate(
conversation = localConversation!!,
prompt = prompt,
audioData = null,
images = null
)
emit(AgentEvent.Token(response))
emit(AgentEvent.Complete(response))
}
/**
* Query the big model on the server.
*/
private suspend fun queryBigModel(
question: String,
history: List<Pair<String, String>>
): String {
return try {
val requestBody = buildJsonObject {
putJsonArray("messages") {
history.forEach { (role, content) ->
addJsonObject {
put("role", if (role == "User") "user" else "assistant")
put("content", content)
}
}
addJsonObject {
put("role", "user")
put("content", question)
}
}
put("stream", false)
put("temperature", 0.7)
}
val response: String = httpClient.post("$delegateServerUrl/v1/chat/completions") {
contentType(ContentType.Application.Json)
setBody(requestBody)
timeout {
requestTimeoutMillis = 120_000
connectTimeoutMillis = 30_000
}
}.body()
parseServerResponse(response)
} catch (e: Exception) {
Log.e(TAG, "Failed to query big model", e)
"I apologize, but I couldn't reach the expert model at this time. Let me try to help with what I know: [local model will attempt answer]"
}
}
/**
* Synthesize the delegated response into a natural answer.
*/
private fun synthesizeResponse(
userQuestion: String,
delegatedResponse: String,
history: List<Pair<String, String>>
): Flow<AgentEvent> = flow {
if (localConversation?.isAlive != true) {
localConversation = localEngine.createConversation()
}
val prompt = synthesisPrompt
.replace("{user_question}", userQuestion)
.replace("{delegated_response}", delegatedResponse)
val response = localEngine.generate(
conversation = localConversation!!,
prompt = prompt,
audioData = null,
images = null
)
emit(AgentEvent.Token(response))
emit(AgentEvent.Complete(response))
}
private fun extractDelegateQuestion(classification: String): String {
// Extract question from [DELEGATE: question here]
val start = classification.indexOf("[DELEGATE:")
if (start == -1) return classification
val end = classification.indexOf("]", start + 10)
if (end == -1) return classification
return classification.substring(start + 10, end).trim()
}
private fun parseServerResponse(response: String): String {
return try {
val completion = json.decodeFromString<OpenAICompletion>(response)
completion.choices.firstOrNull()?.message?.content
?: "I received information but couldn't parse it properly."
} catch (e: Exception) {
Log.w(TAG, "Failed to parse server response as JSON, returning raw")
response
}
}
fun reset() {
localConversation?.close()
localConversation = null
}
@kotlinx.serialization.Serializable
data class OpenAICompletion(
val choices: List<CompletionChoice>
) {
@kotlinx.serialization.Serializable
data class CompletionChoice(
val message: Message
) {
@kotlinx.serialization.Serializable
data class Message(
val content: String
)
}
}
}
sealed class DelegationEvent {
data class Token(val text: String) : DelegationEvent()
data class Status(val message: String) : DelegationEvent()
data class Complete(val response: String) : DelegationEvent()
data class Error(val message: String) : DelegationEvent()
}
@@ -0,0 +1,299 @@
package com.sleepy.agent.inference
import android.util.Log
import io.ktor.client.HttpClient
import io.ktor.client.call.body
import io.ktor.client.plugins.timeout
import io.ktor.client.request.post
import io.ktor.client.request.setBody
import io.ktor.http.ContentType
import io.ktor.http.contentType
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.channelFlow
import kotlinx.coroutines.flow.flow
import kotlinx.serialization.Serializable
import kotlinx.serialization.json.Json
import kotlinx.serialization.json.JsonObject
import kotlinx.serialization.json.buildJsonObject
import kotlinx.serialization.json.put
import kotlinx.serialization.json.putJsonArray
import kotlinx.serialization.json.putJsonObject
/**
* Remote agent that delegates all LLM calls to a server.
* This bypasses the local model entirely - useful for:
* 1. Using powerful server-side models when local model is insufficient
* 2. Testing the app UI without loading a local model
* 3. Fallback when local model fails
*/
class RemoteAgent(
private val httpClient: HttpClient,
private val baseUrl: String
) {
companion object {
private const val TAG = "RemoteAgent"
private val json = Json { ignoreUnknownKeys = true }
// Supported API formats
enum class ApiFormat {
OPENAI_COMPATIBLE, // /v1/chat/completions
OLLAMA, // /api/generate or /api/chat
CUSTOM // Custom endpoint
}
}
private val messageHistory = mutableListOf<RemoteMessage>()
private var apiFormat = ApiFormat.OPENAI_COMPATIBLE
data class RemoteMessage(
val role: String, // "system", "user", "assistant"
val content: String
)
/**
* Send a message to the remote server and get streaming response.
*/
suspend fun sendMessage(
message: String,
systemPrompt: String? = null,
stream: Boolean = true
): Flow<RemoteAgentEvent> = channelFlow {
try {
// Add user message to history
messageHistory.add(RemoteMessage("user", message))
// Build request based on detected API format
val requestBody = when (apiFormat) {
ApiFormat.OPENAI_COMPATIBLE -> buildOpenAIRequest(systemPrompt, stream)
ApiFormat.OLLAMA -> buildOllamaRequest(systemPrompt, stream)
ApiFormat.CUSTOM -> buildCustomRequest(systemPrompt)
}
Log.d(TAG, "Sending request to $baseUrl (format: $apiFormat)")
val endpoint = when (apiFormat) {
ApiFormat.OPENAI_COMPATIBLE -> "$baseUrl/v1/chat/completions"
ApiFormat.OLLAMA -> "$baseUrl/api/chat"
ApiFormat.CUSTOM -> baseUrl
}
val response: String = httpClient.post(endpoint) {
contentType(ContentType.Application.Json)
setBody(requestBody)
timeout {
requestTimeoutMillis = 120_000 // 2 minutes for generation
connectTimeoutMillis = 30_000
}
}.body()
if (stream) {
// Handle streaming response (SSE format)
handleStreamingResponse(response)
} else {
// Handle non-streaming response
handleNonStreamingResponse(response)
}
} catch (e: Exception) {
Log.e(TAG, "Error calling remote server", e)
send(RemoteAgentEvent.Error("Server error: ${e.message}"))
}
}
/**
* Quick check if server is available.
*/
suspend fun checkServer(): Boolean {
return try {
// Try to detect API format by probing endpoints
val openaiResponse = httpClient.post("$baseUrl/v1/models") {
timeout { requestTimeoutMillis = 5000 }
}
if (openaiResponse.status.value == 200) {
apiFormat = ApiFormat.OPENAI_COMPATIBLE
Log.d(TAG, "Detected OpenAI-compatible API")
return true
}
val ollamaResponse = httpClient.post("$baseUrl/api/tags") {
timeout { requestTimeoutMillis = 5000 }
}
if (ollamaResponse.status.value == 200) {
apiFormat = ApiFormat.OLLAMA
Log.d(TAG, "Detected Ollama API")
return true
}
// Assume custom if base URL responds
apiFormat = ApiFormat.CUSTOM
true
} catch (e: Exception) {
Log.e(TAG, "Server check failed", e)
false
}
}
/**
* Clear conversation history.
*/
fun clearHistory() {
messageHistory.clear()
}
private fun buildOpenAIRequest(systemPrompt: String?, stream: Boolean): JsonObject {
return buildJsonObject {
put("model", "local-model") // Server usually ignores this for single-model setups
putJsonArray("messages") {
// System message
systemPrompt?.let {
addJsonObject {
put("role", "system")
put("content", it)
}
}
// Conversation history
messageHistory.forEach { msg ->
addJsonObject {
put("role", msg.role)
put("content", msg.content)
}
}
}
put("stream", stream)
put("temperature", 0.7)
put("max_tokens", 4096)
}
}
private fun buildOllamaRequest(systemPrompt: String?, stream: Boolean): JsonObject {
return buildJsonObject {
put("model", "local-model")
putJsonArray("messages") {
systemPrompt?.let {
addJsonObject {
put("role", "system")
put("content", it)
}
}
messageHistory.forEach { msg ->
addJsonObject {
put("role", msg.role)
put("content", msg.content)
}
}
}
put("stream", stream)
}
}
private fun buildCustomRequest(systemPrompt: String?): JsonObject {
return buildJsonObject {
put("prompt", buildPromptWithHistory(systemPrompt))
}
}
private fun buildPromptWithHistory(systemPrompt: String?): String {
return buildString {
systemPrompt?.let { appendLine(it).appendLine() }
messageHistory.forEach { msg ->
when (msg.role) {
"user" -> appendLine("User: ${msg.content}")
"assistant" -> appendLine("Assistant: ${msg.content}")
}
}
appendLine("Assistant:")
}
}
private suspend fun kotlinx.coroutines.channels.SendChannel<RemoteAgentEvent>.handleStreamingResponse(response: String) {
// Parse SSE format (Server-Sent Events)
val lines = response.lines()
val responseBuilder = StringBuilder()
for (line in lines) {
when {
line.startsWith("data: ") -> {
val data = line.substring(6)
if (data == "[DONE]") {
// Stream complete
val fullResponse = responseBuilder.toString()
messageHistory.add(RemoteMessage("assistant", fullResponse))
send(RemoteAgentEvent.Complete(fullResponse))
return
}
try {
val chunk = json.decodeFromString<OpenAIChunk>(data)
val content = chunk.choices.firstOrNull()?.delta?.content ?: ""
if (content.isNotEmpty()) {
responseBuilder.append(content)
send(RemoteAgentEvent.Token(content))
}
} catch (e: Exception) {
Log.w(TAG, "Failed to parse chunk: $data")
}
}
}
}
// If we get here without [DONE], return what we have
val fullResponse = responseBuilder.toString()
if (fullResponse.isNotEmpty()) {
messageHistory.add(RemoteMessage("assistant", fullResponse))
send(RemoteAgentEvent.Complete(fullResponse))
}
}
private suspend fun kotlinx.coroutines.channels.SendChannel<RemoteAgentEvent>.handleNonStreamingResponse(response: String) {
try {
val completion = json.decodeFromString<OpenAICompletion>(response)
val content = completion.choices.firstOrNull()?.message?.content ?: ""
messageHistory.add(RemoteMessage("assistant", content))
// Emit as single token for consistency
send(RemoteAgentEvent.Token(content))
send(RemoteAgentEvent.Complete(content))
} catch (e: Exception) {
Log.e(TAG, "Failed to parse response", e)
send(RemoteAgentEvent.Error("Failed to parse server response"))
}
}
// Data classes for OpenAI-compatible API
@Serializable
data class OpenAIChunk(
val choices: List<Choice>
) {
@Serializable
data class Choice(
val delta: Delta
) {
@Serializable
data class Delta(
val content: String? = null
)
}
}
@Serializable
data class OpenAICompletion(
val choices: List<CompletionChoice>
) {
@Serializable
data class CompletionChoice(
val message: Message
) {
@Serializable
data class Message(
val content: String
)
}
}
}
sealed class RemoteAgentEvent {
data class Token(val text: String) : RemoteAgentEvent()
data class Complete(val response: String) : RemoteAgentEvent()
data class Error(val message: String) : RemoteAgentEvent()
}
@@ -23,6 +23,9 @@ class UserSettings(
val MODEL_SOURCE = stringPreferencesKey("model_source") val MODEL_SOURCE = stringPreferencesKey("model_source")
val SELECTED_SERVER_MODEL = stringPreferencesKey("selected_server_model") val SELECTED_SERVER_MODEL = stringPreferencesKey("selected_server_model")
// Server agent mode: "local_only", "full_remote", "delegation"
val SERVER_AGENT_MODE = stringPreferencesKey("server_agent_mode")
// TTS settings // TTS settings
val TTS_ENABLED = booleanPreferencesKey("tts_enabled") val TTS_ENABLED = booleanPreferencesKey("tts_enabled")
val TTS_AUTO_MODE = booleanPreferencesKey("tts_auto_mode") val TTS_AUTO_MODE = booleanPreferencesKey("tts_auto_mode")
@@ -55,6 +58,11 @@ class UserSettings(
prefs[ENABLE_SERVER_DELEGATION] ?: false prefs[ENABLE_SERVER_DELEGATION] ?: false
} }
// Server agent mode: "local_only" (default), "full_remote", "delegation"
val serverAgentMode: Flow<String> = dataStore.data.map { prefs ->
prefs[SERVER_AGENT_MODE] ?: "local_only"
}
val modelSource: Flow<ModelSource> = dataStore.data.map { prefs -> val modelSource: Flow<ModelSource> = dataStore.data.map { prefs ->
prefs[MODEL_SOURCE]?.let { ModelSource.valueOf(it) } ?: ModelSource.FILE_PATH prefs[MODEL_SOURCE]?.let { ModelSource.valueOf(it) } ?: ModelSource.FILE_PATH
} }
@@ -106,6 +114,12 @@ class UserSettings(
} }
} }
suspend fun setServerAgentMode(mode: String) {
dataStore.edit { prefs ->
prefs[SERVER_AGENT_MODE] = mode
}
}
suspend fun setModelSource(source: ModelSource) { suspend fun setModelSource(source: ModelSource) {
dataStore.edit { prefs -> dataStore.edit { prefs ->
prefs[MODEL_SOURCE] = source.name prefs[MODEL_SOURCE] = source.name
@@ -12,7 +12,11 @@ import com.sleepy.agent.data.ConversationStorage
import com.sleepy.agent.download.ModelDownloadManager import com.sleepy.agent.download.ModelDownloadManager
import com.sleepy.agent.inference.Agent import com.sleepy.agent.inference.Agent
import com.sleepy.agent.inference.AgentEvent import com.sleepy.agent.inference.AgentEvent
import com.sleepy.agent.inference.DelegationAgent
import com.sleepy.agent.inference.DelegationEvent
import com.sleepy.agent.inference.LlmEngine import com.sleepy.agent.inference.LlmEngine
import com.sleepy.agent.inference.RemoteAgent
import com.sleepy.agent.inference.RemoteAgentEvent
import com.sleepy.agent.settings.UserSettings import com.sleepy.agent.settings.UserSettings
import com.sleepy.agent.tools.WebSearchTool import com.sleepy.agent.tools.WebSearchTool
import kotlinx.coroutines.flow.MutableStateFlow import kotlinx.coroutines.flow.MutableStateFlow
@@ -50,7 +54,8 @@ class MainViewModel(
private val agent: Agent, private val agent: Agent,
private val llmEngine: LlmEngine, private val llmEngine: LlmEngine,
private val userSettings: UserSettings, private val userSettings: UserSettings,
private val webSearchTool: WebSearchTool private val webSearchTool: WebSearchTool,
private val httpClient: io.ktor.client.HttpClient
) : ViewModel() { ) : ViewModel() {
private val conversationStorage = ConversationStorage(context) private val conversationStorage = ConversationStorage(context)
@@ -75,6 +80,10 @@ class MainViewModel(
// Track if user started with voice or text for TTS auto mode // Track if user started with voice or text for TTS auto mode
private var firstInputWasVoice: Boolean? = null private var firstInputWasVoice: Boolean? = null
// Remote agents for server modes
private var remoteAgent: RemoteAgent? = null
private var delegationAgent: DelegationAgent? = null
companion object { companion object {
private const val TAG = "MainViewModel" private const val TAG = "MainViewModel"
private const val KEY_MESSAGES = "messages" private const val KEY_MESSAGES = "messages"
@@ -113,6 +122,17 @@ class MainViewModel(
Log.d(TAG, "Updated web search URL to: $url") Log.d(TAG, "Updated web search URL to: $url")
} }
} }
// Initialize remote agents when delegate server URL changes
viewModelScope.launch {
userSettings.delegateServerUrl.collect { url ->
if (url.isNotEmpty()) {
remoteAgent = RemoteAgent(httpClient, url)
delegationAgent = DelegationAgent(llmEngine, httpClient, url)
Log.d(TAG, "Initialized remote agents with server: $url")
}
}
}
} }
private fun restoreState() { private fun restoreState() {
@@ -250,12 +270,11 @@ class MainViewModel(
_uiState.value = UIState.PROCESSING _uiState.value = UIState.PROCESSING
val useServer = userSettings.enableServerDelegation.first() val mode = userSettings.serverAgentMode.first()
if (useServer) { when (mode) {
processAudioWithServer(audioData) "full_remote", "delegation" -> processAudioWithServer(audioData)
} else { else -> processAudioWithLocalModel(audioData)
processAudioWithLocalModel(audioData)
} }
} }
} }
@@ -350,14 +369,68 @@ class MainViewModel(
_messages.value = _messages.value + userMessage _messages.value = _messages.value + userMessage
saveState() saveState()
val aiMessage = ConversationMessage( _uiState.value = UIState.PROCESSING
text = "Server mode doesn't support native audio understanding yet. Please use local model for voice input.",
isUser = false // For server mode with audio, we need to either:
) // 1. Use local model to transcribe, then send text to server
_messages.value = _messages.value + aiMessage // 2. Send audio to server if it supports it
saveState() // For now, transcribe locally first
_uiState.value = UIState.IDLE
if (!llmEngine.isLoaded()) {
val modelPath = userSettings.modelPath.first()
if (modelPath.isNotEmpty()) {
_responseText.value = "Loading model for transcription..."
val result = llmEngine.loadModel(modelPath)
result.onFailure { e ->
_uiState.value = UIState.ERROR
_responseText.value = "Failed to load model: ${e.message}"
return@processAudioWithServer
} }
} else {
_uiState.value = UIState.ERROR
_responseText.value = "No model loaded for transcription. Please load a model first."
return
}
}
try {
// First, transcribe the audio locally
val transcription = llmEngine.generate(
conversation = ensureConversation(),
prompt = "Transcribe this audio:",
audioData = audioData,
images = null
)
Log.d(TAG, "Transcribed: $transcription")
// Update the user message with transcription
val updatedMessages = _messages.value.toMutableList()
updatedMessages[updatedMessages.size - 1] = userMessage.copy(
text = "🎤 \"$transcription\""
)
_messages.value = updatedMessages
saveState()
// Now process the transcribed text with the server
val mode = userSettings.serverAgentMode.first()
when (mode) {
"full_remote" -> processTextWithRemoteAgent(transcription)
"delegation" -> processTextWithDelegation(transcription)
else -> processTextWithLocalModel(transcription)
}
} catch (e: Exception) {
Log.e(TAG, "Error processing audio with server", e)
_uiState.value = UIState.ERROR
_responseText.value = "Error: ${e.message}"
}
}
private fun ensureConversation() = conversation?.takeIf { it.isAlive }
?: llmEngine.createConversation().also { conversation = it }
private var conversation: com.sleepy.agent.inference.Conversation? = null
fun sendTextMessage(text: String) { fun sendTextMessage(text: String) {
viewModelScope.launch { viewModelScope.launch {
@@ -372,13 +445,13 @@ class MainViewModel(
Log.d(TAG, "First input was text - TTS auto-disabled") Log.d(TAG, "First input was text - TTS auto-disabled")
} }
val useServer = userSettings.enableServerDelegation.first() val mode = userSettings.serverAgentMode.first()
Log.d(TAG, "useServer: $useServer") Log.d(TAG, "Server agent mode: $mode")
if (useServer) { when (mode) {
processTextWithServer(text) "full_remote" -> processTextWithRemoteAgent(text)
} else { "delegation" -> processTextWithDelegation(text)
processTextWithLocalModel(text) else -> processTextWithLocalModel(text)
} }
} }
} }
@@ -493,7 +566,10 @@ class MainViewModel(
} }
} }
private suspend fun processTextWithServer(text: String) { /**
* Full remote mode - bypass local model entirely, use server as the agent.
*/
private suspend fun processTextWithRemoteAgent(text: String) {
val userMessage = ConversationMessage( val userMessage = ConversationMessage(
text = text, text = text,
isUser = true isUser = true
@@ -503,14 +579,117 @@ class MainViewModel(
_uiState.value = UIState.PROCESSING _uiState.value = UIState.PROCESSING
val remote = remoteAgent
if (remote == null) {
_uiState.value = UIState.ERROR
_responseText.value = "No server configured. Please set a delegate server URL in Settings."
return
}
try {
val responseBuilder = StringBuilder()
remote.sendMessage(
message = text,
systemPrompt = "You are a helpful AI assistant."
).collect { event ->
when (event) {
is RemoteAgentEvent.Token -> {
responseBuilder.append(event.text)
_responseText.value = responseBuilder.toString()
_uiState.value = UIState.SPEAKING
}
is RemoteAgentEvent.Complete -> {
val aiMessage = ConversationMessage( val aiMessage = ConversationMessage(
text = "Server mode not yet implemented. Please use local model.", text = event.response,
isUser = false isUser = false
) )
_messages.value = _messages.value + aiMessage _messages.value = _messages.value + aiMessage
saveState() saveState()
speakResponse(event.response)
_uiState.value = UIState.IDLE _uiState.value = UIState.IDLE
} }
is RemoteAgentEvent.Error -> {
_responseText.value = "Error: ${event.message}"
_uiState.value = UIState.ERROR
}
}
}
} catch (e: Exception) {
Log.e(TAG, "Error in remote agent", e)
_uiState.value = UIState.ERROR
_responseText.value = "Server error: ${e.message}"
}
}
/**
* Delegation mode - local model decides when to ask big model for help.
*/
private suspend fun processTextWithDelegation(text: String) {
val userMessage = ConversationMessage(
text = text,
isUser = true
)
_messages.value = _messages.value + userMessage
saveState()
_uiState.value = UIState.PROCESSING
val delegation = delegationAgent
if (delegation == null) {
_uiState.value = UIState.ERROR
_responseText.value = "Delegation not available. Please set a delegate server URL in Settings."
return
}
try {
val responseBuilder = StringBuilder()
delegation.processWithDelegation(
userInput = text,
conversationHistory = _messages.value.map { it.isUser to it.text }
).collect { event ->
when (event) {
is DelegationEvent.Token -> {
responseBuilder.append(event.text)
_responseText.value = responseBuilder.toString()
_uiState.value = UIState.SPEAKING
}
is DelegationEvent.Status -> {
_responseText.value = event.message
}
is DelegationEvent.Complete -> {
val aiMessage = ConversationMessage(
text = event.response,
isUser = false
)
_messages.value = _messages.value + aiMessage
saveState()
speakResponse(event.response)
_uiState.value = UIState.IDLE
}
is DelegationEvent.Error -> {
_responseText.value = "Error: ${event.message}"
_uiState.value = UIState.ERROR
}
}
}
} catch (e: Exception) {
Log.e(TAG, "Error in delegation agent", e)
_uiState.value = UIState.ERROR
_responseText.value = "Error: ${e.message}"
}
}
/**
* Legacy server delegation - now redirects to appropriate mode.
*/
private suspend fun processTextWithServer(text: String) {
// Use full remote mode by default for legacy "use server" setting
processTextWithRemoteAgent(text)
}
fun setResponse(text: String) { fun setResponse(text: String) {
_responseText.value = text _responseText.value = text
@@ -564,6 +743,20 @@ class MainViewModel(
firstInputWasVoice = false // Image is not voice input firstInputWasVoice = false // Image is not voice input
_uiState.value = UIState.PROCESSING _uiState.value = UIState.PROCESSING
// Check server mode
val mode = userSettings.serverAgentMode.first()
// For server modes, we need a local model to process the image first
// Then send the description/results to the server
when (mode) {
"full_remote" -> processImageWithRemoteAgent(bitmap, text)
"delegation" -> processImageWithDelegation(bitmap, text)
else -> processImageWithLocalModel(bitmap, text)
}
}
}
private suspend fun processImageWithLocalModel(bitmap: android.graphics.Bitmap, text: String) {
try { try {
if (!llmEngine.isLoaded()) { if (!llmEngine.isLoaded()) {
val modelPath = userSettings.modelPath.first() val modelPath = userSettings.modelPath.first()
@@ -573,13 +766,187 @@ class MainViewModel(
result.onFailure { e -> result.onFailure { e ->
_uiState.value = UIState.ERROR _uiState.value = UIState.ERROR
_responseText.value = "Failed to load model: ${e.message}" _responseText.value = "Failed to load model: ${e.message}"
return@launch return@processImageWithLocalModel
} }
agent.prewarmCache() agent.prewarmCache()
} else { } else {
_uiState.value = UIState.ERROR _uiState.value = UIState.ERROR
_responseText.value = "No model loaded. Please go to Settings and load a model." _responseText.value = "No model loaded. Please go to Settings and load a model."
return@launch return
}
}
val responseBuilder = StringBuilder()
Log.d(TAG, "Processing image with local model...")
agent.processInput(
input = text,
images = listOf(bitmap)
).collect { event ->
when (event) {
is AgentEvent.Token -> {
responseBuilder.append(event.text)
_responseText.value = responseBuilder.toString()
_uiState.value = UIState.SPEAKING
}
is AgentEvent.ExecutingTool -> {
_uiState.value = UIState.EXECUTING_TOOL
_responseText.value = "🔧 Using ${event.toolName}..."
}
is AgentEvent.ToolResult -> {
// Tool completed
}
is AgentEvent.Complete -> {
val aiMessage = ConversationMessage(
text = event.response,
isUser = false
)
_messages.value = _messages.value + aiMessage
saveState()
speakResponse(event.response)
_uiState.value = UIState.IDLE
}
is AgentEvent.Error -> {
_responseText.value = "Error: ${event.message}"
_uiState.value = UIState.ERROR
}
else -> {}
}
}
} catch (e: Exception) {
Log.e(TAG, "Error processing image", e)
_uiState.value = UIState.ERROR
_responseText.value = "Error processing image: ${e.message}"
val errorMessage = ConversationMessage(
text = "❌ Failed to process image: ${e.message}",
isUser = false
)
_messages.value = _messages.value + errorMessage
saveState()
}
}
private suspend fun processImageWithRemoteAgent(bitmap: android.graphics.Bitmap, text: String) {
try {
// First, get image description from local model
if (!llmEngine.isLoaded()) {
val modelPath = userSettings.modelPath.first()
if (modelPath.isNotEmpty()) {
_responseText.value = "Analyzing image..."
val result = llmEngine.loadModel(modelPath)
result.onFailure { e ->
_uiState.value = UIState.ERROR
_responseText.value = "Failed to load model: ${e.message}"
return@processImageWithRemoteAgent
}
} else {
_uiState.value = UIState.ERROR
_responseText.value = "No model loaded for image analysis. Please load a model first."
return
}
}
// Get image description from local model
val description = llmEngine.generate(
conversation = ensureConversation(),
prompt = "Describe this image in detail:",
audioData = null,
images = listOf(bitmap)
)
Log.d(TAG, "Image description: $description")
// Now send description + user text to remote agent
val fullPrompt = if (text.isNotBlank()) {
"User question about image: $text\n\nImage description: $description"
} else {
"Describe this image: $description"
}
processTextWithRemoteAgent(fullPrompt)
} catch (e: Exception) {
Log.e(TAG, "Error processing image with remote agent", e)
_uiState.value = UIState.ERROR
_responseText.value = "Error: ${e.message}"
}
}
private suspend fun processImageWithDelegation(bitmap: android.graphics.Bitmap, text: String) {
// For delegation mode, process image locally first
// The delegation agent will decide if server help is needed based on the description
try {
if (!llmEngine.isLoaded()) {
val modelPath = userSettings.modelPath.first()
if (modelPath.isNotEmpty()) {
_responseText.value = "Analyzing image..."
val result = llmEngine.loadModel(modelPath)
result.onFailure { e ->
_uiState.value = UIState.ERROR
_responseText.value = "Failed to load model: ${e.message}"
return@processImageWithDelegation
}
} else {
_uiState.value = UIState.ERROR
_responseText.value = "No model loaded for image analysis. Please load a model first."
return
}
}
// Get initial analysis from local model
val description = llmEngine.generate(
conversation = ensureConversation(),
prompt = if (text.isNotBlank()) "Analyze this image and answer: $text" else "Describe this image:",
audioData = null,
images = listOf(bitmap)
)
// Add the local model's response to conversation
val localResponse = ConversationMessage(
text = description,
isUser = false
)
_messages.value = _messages.value + localResponse
saveState()
// Now use delegation to decide if we need more help
// The delegation agent will see the image was processed and decide
val followUp = if (text.isNotBlank()) "Is this answer complete and accurate?" else "Can you provide more details?"
processTextWithDelegation(followUp)
} catch (e: Exception) {
Log.e(TAG, "Error processing image with delegation", e)
_uiState.value = UIState.ERROR
_responseText.value = "Error: ${e.message}"
}
}
// Legacy onImageSelected body - now extracted to separate functions
private suspend fun processImageWithLocalModelLegacy(bitmap: android.graphics.Bitmap, text: String) {
try {
if (!llmEngine.isLoaded()) {
val modelPath = userSettings.modelPath.first()
if (modelPath.isNotEmpty()) {
_responseText.value = "Loading model..."
val result = llmEngine.loadModel(modelPath)
result.onFailure { e ->
_uiState.value = UIState.ERROR
_responseText.value = "Failed to load model: ${e.message}"
return@processImageWithLocalModelLegacy
}
agent.prewarmCache()
} else {
_uiState.value = UIState.ERROR
_responseText.value = "No model loaded. Please go to Settings and load a model."
return
} }
} }
@@ -587,9 +954,8 @@ class MainViewModel(
Log.d(TAG, "Processing image with model...") Log.d(TAG, "Processing image with model...")
// Send empty text with image - model will process image naturally
agent.processInput( agent.processInput(
input = text, // Use the text the user typed (may be empty) input = text,
images = listOf(bitmap) images = listOf(bitmap)
).collect { event -> ).collect { event ->
when (event) { when (event) {
@@ -136,8 +136,10 @@ fun SettingsScreen(
ServerSection( ServerSection(
searchServerUrl = uiState.searchServerUrl, searchServerUrl = uiState.searchServerUrl,
delegateServerUrl = uiState.delegateServerUrl, delegateServerUrl = uiState.delegateServerUrl,
serverAgentMode = uiState.serverAgentMode,
onSearchServerChange = { viewModel.setSearchServerUrl(it) }, onSearchServerChange = { viewModel.setSearchServerUrl(it) },
onDelegateServerChange = { viewModel.setDelegateServerUrl(it) } onDelegateServerChange = { viewModel.setDelegateServerUrl(it) },
onServerAgentModeChange = { viewModel.setServerAgentMode(it) }
) )
HorizontalDivider() HorizontalDivider()
@@ -391,8 +393,10 @@ private fun ModelCard(
private fun ServerSection( private fun ServerSection(
searchServerUrl: String, searchServerUrl: String,
delegateServerUrl: String, delegateServerUrl: String,
serverAgentMode: String,
onSearchServerChange: (String) -> Unit, onSearchServerChange: (String) -> Unit,
onDelegateServerChange: (String) -> Unit onDelegateServerChange: (String) -> Unit,
onServerAgentModeChange: (String) -> Unit
) { ) {
Column(verticalArrangement = Arrangement.spacedBy(12.dp)) { Column(verticalArrangement = Arrangement.spacedBy(12.dp)) {
Text( Text(
@@ -420,6 +424,50 @@ private fun ServerSection(
singleLine = true singleLine = true
) )
// Server Agent Mode selector (only shown if delegate server is configured)
if (delegateServerUrl.isNotEmpty()) {
Text(
text = "Agent Mode",
style = MaterialTheme.typography.bodyMedium
)
// Mode selection buttons
Row(
modifier = Modifier.fillMaxWidth(),
horizontalArrangement = Arrangement.spacedBy(8.dp)
) {
ModeButton(
text = "Local Only",
selected = serverAgentMode == "local_only",
onClick = { onServerAgentModeChange("local_only") },
modifier = Modifier.weight(1f)
)
ModeButton(
text = "Full Remote",
selected = serverAgentMode == "full_remote",
onClick = { onServerAgentModeChange("full_remote") },
modifier = Modifier.weight(1f)
)
ModeButton(
text = "Smart",
selected = serverAgentMode == "delegation",
onClick = { onServerAgentModeChange("delegation") },
modifier = Modifier.weight(1f)
)
}
Text(
text = when (serverAgentMode) {
"local_only" -> "Uses only the local model on your device."
"full_remote" -> "Bypasses local model entirely. All queries go to the server."
"delegation" -> "Local model decides when to ask the server for help."
else -> ""
},
style = MaterialTheme.typography.bodySmall,
color = MaterialTheme.colorScheme.onSurfaceVariant
)
}
Text( Text(
text = "Leave empty to disable server features. URLs are saved automatically.", text = "Leave empty to disable server features. URLs are saved automatically.",
style = MaterialTheme.typography.bodySmall, style = MaterialTheme.typography.bodySmall,
@@ -428,6 +476,31 @@ private fun ServerSection(
} }
} }
@Composable
private fun ModeButton(
text: String,
selected: Boolean,
onClick: () -> Unit,
modifier: Modifier = Modifier
) {
TextButton(
onClick = onClick,
modifier = modifier,
colors = ButtonDefaults.textButtonColors(
containerColor = if (selected)
MaterialTheme.colorScheme.primaryContainer
else
MaterialTheme.colorScheme.surfaceVariant,
contentColor = if (selected)
MaterialTheme.colorScheme.onPrimaryContainer
else
MaterialTheme.colorScheme.onSurfaceVariant
)
) {
Text(text, style = MaterialTheme.typography.labelMedium)
}
}
@Composable @Composable
private fun TtsSection( private fun TtsSection(
enabled: Boolean, enabled: Boolean,
@@ -35,6 +35,7 @@ data class SettingsUiState(
val serverEnabled: Boolean = false, val serverEnabled: Boolean = false,
val searchServerUrl: String = "", val searchServerUrl: String = "",
val delegateServerUrl: String = "", val delegateServerUrl: String = "",
val serverAgentMode: String = "local_only", // "local_only", "full_remote", "delegation"
val searchServerHealthy: Boolean? = null, val searchServerHealthy: Boolean? = null,
val delegateServerHealthy: Boolean? = null, val delegateServerHealthy: Boolean? = null,
val serverModels: List<String> = emptyList(), val serverModels: List<String> = emptyList(),
@@ -197,6 +198,7 @@ class SettingsViewModel(
val ttsEnabled = userSettings.ttsEnabled.first() val ttsEnabled = userSettings.ttsEnabled.first()
val ttsAutoMode = userSettings.ttsAutoMode.first() val ttsAutoMode = userSettings.ttsAutoMode.first()
val floatingButtonEnabled = userSettings.floatingButtonEnabled.first() val floatingButtonEnabled = userSettings.floatingButtonEnabled.first()
val serverAgentMode = userSettings.serverAgentMode.first()
val finalModelPath = if (modelPath.isEmpty() && ModelDownloadManager.isModelDownloaded(context)) { val finalModelPath = if (modelPath.isEmpty() && ModelDownloadManager.isModelDownloaded(context)) {
ModelDownloadManager.getModelFile(context).absolutePath ModelDownloadManager.getModelFile(context).absolutePath
@@ -211,6 +213,7 @@ class SettingsViewModel(
serverEnabled = serverEnabled, serverEnabled = serverEnabled,
searchServerUrl = searchServerUrl, searchServerUrl = searchServerUrl,
delegateServerUrl = delegateServerUrl, delegateServerUrl = delegateServerUrl,
serverAgentMode = serverAgentMode,
selectedModel = selectedModel, selectedModel = selectedModel,
isLoading = false, isLoading = false,
modelLoaded = llmEngine.isLoaded(), modelLoaded = llmEngine.isLoaded(),
@@ -551,6 +554,14 @@ class SettingsViewModel(
} }
} }
// Server agent mode
fun setServerAgentMode(mode: String) {
_uiState.value = _uiState.value.copy(serverAgentMode = mode)
viewModelScope.launch {
userSettings.setServerAgentMode(mode)
}
}
// Floating button (experimental) // Floating button (experimental)
fun setFloatingButtonEnabled(enabled: Boolean) { fun setFloatingButtonEnabled(enabled: Boolean) {
_uiState.value = _uiState.value.copy(floatingButtonEnabled = enabled) _uiState.value = _uiState.value.copy(floatingButtonEnabled = enabled)
File diff suppressed because one or more lines are too long