Add server agent modes: full_remote and delegation
New Features: - RemoteAgent: Full server-as-agent mode (bypasses local model) - DelegationAgent: Local model decides when to ask server for help - Server mode selector in Settings: Local Only / Full Remote / Smart - Updated MainViewModel to support all three modes for text, audio, and images - SettingsUiState and SettingsViewModel updated with serverAgentMode Modes: - local_only: Use only local Gemma 4 model (default) - full_remote: All queries go to delegate server (OpenAI-compatible API) - delegation: Local model classifies queries, delegates complex ones to server
This commit is contained in:
@@ -95,7 +95,8 @@ class AppModule(private val context: Context) {
|
|||||||
agent = agent,
|
agent = agent,
|
||||||
llmEngine = llmEngine,
|
llmEngine = llmEngine,
|
||||||
userSettings = userSettings,
|
userSettings = userSettings,
|
||||||
webSearchTool = webSearchTool
|
webSearchTool = webSearchTool,
|
||||||
|
httpClient = ktorClient
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,346 @@
|
|||||||
|
package com.sleepy.agent.inference
|
||||||
|
|
||||||
|
import android.util.Log
|
||||||
|
import io.ktor.client.HttpClient
|
||||||
|
import io.ktor.client.call.body
|
||||||
|
import io.ktor.client.plugins.timeout
|
||||||
|
import io.ktor.client.request.post
|
||||||
|
import io.ktor.client.request.setBody
|
||||||
|
import io.ktor.http.ContentType
|
||||||
|
import io.ktor.http.contentType
|
||||||
|
import kotlinx.coroutines.flow.Flow
|
||||||
|
import kotlinx.coroutines.flow.channelFlow
|
||||||
|
import kotlinx.coroutines.flow.collect
|
||||||
|
import kotlinx.coroutines.flow.flow
|
||||||
|
import kotlinx.coroutines.flow.onEach
|
||||||
|
import kotlinx.serialization.encodeToString
|
||||||
|
import kotlinx.serialization.json.Json
|
||||||
|
import kotlinx.serialization.json.JsonObject
|
||||||
|
import kotlinx.serialization.json.buildJsonObject
|
||||||
|
import kotlinx.serialization.json.put
|
||||||
|
import kotlinx.serialization.json.putJsonArray
|
||||||
|
import kotlinx.serialization.json.putJsonObject
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delegation Agent - Local model decides when to ask the big model for help.
|
||||||
|
*
|
||||||
|
* Flow:
|
||||||
|
* 1. User input → Local model (small, fast)
|
||||||
|
* 2. Local model classifies if it needs help:
|
||||||
|
* - Simple question (factual, from training data) → Answer directly
|
||||||
|
* * - Complex question (reasoning, current events) → Delegate to big model
|
||||||
|
* - Uncertain → Ask big model for help
|
||||||
|
* 3. If delegation needed:
|
||||||
|
* - Local model formulates a clear request
|
||||||
|
* - Request sent to big model (server)
|
||||||
|
* - Big model responds
|
||||||
|
* - Local model incorporates that into final answer
|
||||||
|
* 4. Final answer to user
|
||||||
|
*/
|
||||||
|
class DelegationAgent(
|
||||||
|
private val localEngine: LlmEngine,
|
||||||
|
private val httpClient: HttpClient,
|
||||||
|
private val delegateServerUrl: String
|
||||||
|
) {
|
||||||
|
companion object {
|
||||||
|
private const val TAG = "DelegationAgent"
|
||||||
|
private val json = Json { ignoreUnknownKeys = true }
|
||||||
|
|
||||||
|
// Classification thresholds
|
||||||
|
private const val CONFIDENCE_THRESHOLD = 0.7f
|
||||||
|
}
|
||||||
|
|
||||||
|
private var localConversation: Conversation? = null
|
||||||
|
|
||||||
|
private val delegationPrompt = """
|
||||||
|
You are a helpful assistant. Before answering, assess your confidence:
|
||||||
|
|
||||||
|
Can you answer this confidently based on your training? Reply with EXACTLY one of:
|
||||||
|
|
||||||
|
[DIRECT] - You know this well and can answer directly
|
||||||
|
[DELEGATE: question for big model] - You need help, provide a clear question for a smarter model
|
||||||
|
[CLARIFY] - You need more information from the user
|
||||||
|
|
||||||
|
Your assessment:
|
||||||
|
""".trimIndent()
|
||||||
|
|
||||||
|
private val synthesisPrompt = """
|
||||||
|
You received help from a more knowledgeable model. Synthesize this into a helpful,
|
||||||
|
natural response for the user. Don't mention that you asked for help - just provide
|
||||||
|
the answer conversationally.
|
||||||
|
|
||||||
|
User's original question: {user_question}
|
||||||
|
|
||||||
|
Helpful information received: {delegated_response}
|
||||||
|
|
||||||
|
Your response:
|
||||||
|
""".trimIndent()
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process user input with delegation to big model when needed.
|
||||||
|
*/
|
||||||
|
suspend fun processWithDelegation(
|
||||||
|
userInput: String,
|
||||||
|
conversationHistory: List<Pair<String, String>> = emptyList(), // (role, content) pairs
|
||||||
|
onStatusUpdate: ((String) -> Unit)? = null
|
||||||
|
): Flow<DelegationEvent> = channelFlow {
|
||||||
|
try {
|
||||||
|
// Step 1: Local model assesses confidence
|
||||||
|
onStatusUpdate?.invoke("Thinking...")
|
||||||
|
send(DelegationEvent.Status("Analyzing question..."))
|
||||||
|
|
||||||
|
val classification = classifyQuestion(userInput, conversationHistory)
|
||||||
|
|
||||||
|
when {
|
||||||
|
classification.startsWith("[DIRECT]") -> {
|
||||||
|
// Step 2a: Answer directly with local model
|
||||||
|
Log.d(TAG, "Answering directly")
|
||||||
|
onStatusUpdate?.invoke("Answering...")
|
||||||
|
send(DelegationEvent.Status("Answering directly..."))
|
||||||
|
|
||||||
|
answerDirectly(userInput, conversationHistory).collect { event ->
|
||||||
|
when (event) {
|
||||||
|
is AgentEvent.Token -> send(DelegationEvent.Token(event.text))
|
||||||
|
is AgentEvent.Complete -> send(DelegationEvent.Complete(event.response))
|
||||||
|
is AgentEvent.Error -> send(DelegationEvent.Error(event.message))
|
||||||
|
else -> {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
classification.startsWith("[DELEGATE:") -> {
|
||||||
|
// Step 2b: Delegate to big model
|
||||||
|
val extractedQuestion = extractDelegateQuestion(classification)
|
||||||
|
Log.d(TAG, "Delegating to big model: $extractedQuestion")
|
||||||
|
onStatusUpdate?.invoke("Consulting expert model...")
|
||||||
|
send(DelegationEvent.Status("Consulting expert model..."))
|
||||||
|
|
||||||
|
val delegatedResponse = queryBigModel(extractedQuestion, conversationHistory)
|
||||||
|
|
||||||
|
// Step 3: Synthesize with local model
|
||||||
|
onStatusUpdate?.invoke("Synthesizing answer...")
|
||||||
|
send(DelegationEvent.Status("Synthesizing answer..."))
|
||||||
|
|
||||||
|
synthesizeResponse(userInput, delegatedResponse, conversationHistory).collect { event ->
|
||||||
|
when (event) {
|
||||||
|
is AgentEvent.Token -> send(DelegationEvent.Token(event.text))
|
||||||
|
is AgentEvent.Complete -> send(DelegationEvent.Complete(event.response))
|
||||||
|
is AgentEvent.Error -> send(DelegationEvent.Error(event.message))
|
||||||
|
else -> {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
classification.startsWith("[CLARIFY]") -> {
|
||||||
|
// Step 2c: Ask user for clarification
|
||||||
|
Log.d(TAG, "Asking for clarification")
|
||||||
|
val clarificationRequest = classification.removePrefix("[CLARIFY]").trim()
|
||||||
|
.ifEmpty { "I need more information to help you. Could you provide more details about what you're looking for?" }
|
||||||
|
|
||||||
|
send(DelegationEvent.Token(clarificationRequest))
|
||||||
|
send(DelegationEvent.Complete(clarificationRequest))
|
||||||
|
}
|
||||||
|
|
||||||
|
else -> {
|
||||||
|
// Fallback: try direct answer
|
||||||
|
Log.w(TAG, "Unknown classification: $classification, falling back to direct")
|
||||||
|
answerDirectly(userInput, conversationHistory).collect { event ->
|
||||||
|
when (event) {
|
||||||
|
is AgentEvent.Token -> send(DelegationEvent.Token(event.text))
|
||||||
|
is AgentEvent.Complete -> send(DelegationEvent.Complete(event.response))
|
||||||
|
is AgentEvent.Error -> send(DelegationEvent.Error(event.message))
|
||||||
|
else -> {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.e(TAG, "Error in delegation flow", e)
|
||||||
|
send(DelegationEvent.Error("Error: ${e.message}"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Quick classification without full generation.
|
||||||
|
*/
|
||||||
|
private suspend fun classifyQuestion(
|
||||||
|
userInput: String,
|
||||||
|
history: List<Pair<String, String>>
|
||||||
|
): String {
|
||||||
|
// Ensure conversation exists
|
||||||
|
if (localConversation?.isAlive != true) {
|
||||||
|
localConversation = localEngine.createConversation(delegationPrompt)
|
||||||
|
}
|
||||||
|
|
||||||
|
val prompt = buildString {
|
||||||
|
history.takeLast(3).forEach { (role, content) ->
|
||||||
|
appendLine("$role: $content")
|
||||||
|
}
|
||||||
|
appendLine("User: $userInput")
|
||||||
|
appendLine()
|
||||||
|
append("Assessment: ")
|
||||||
|
}
|
||||||
|
|
||||||
|
return try {
|
||||||
|
localEngine.generate(
|
||||||
|
conversation = localConversation!!,
|
||||||
|
prompt = prompt,
|
||||||
|
audioData = null,
|
||||||
|
images = null
|
||||||
|
).trim()
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.e(TAG, "Classification failed", e)
|
||||||
|
"[DIRECT]" // Fallback to direct answer
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Answer directly using local model.
|
||||||
|
*/
|
||||||
|
private fun answerDirectly(
|
||||||
|
userInput: String,
|
||||||
|
history: List<Pair<String, String>>
|
||||||
|
): Flow<AgentEvent> = flow {
|
||||||
|
if (localConversation?.isAlive != true) {
|
||||||
|
localConversation = localEngine.createConversation()
|
||||||
|
}
|
||||||
|
|
||||||
|
val prompt = buildString {
|
||||||
|
history.takeLast(5).forEach { (role, content) ->
|
||||||
|
appendLine("$role: $content")
|
||||||
|
}
|
||||||
|
appendLine("User: $userInput")
|
||||||
|
appendLine()
|
||||||
|
append("Assistant: ")
|
||||||
|
}
|
||||||
|
|
||||||
|
val response = localEngine.generate(
|
||||||
|
conversation = localConversation!!,
|
||||||
|
prompt = prompt,
|
||||||
|
audioData = null,
|
||||||
|
images = null
|
||||||
|
)
|
||||||
|
|
||||||
|
emit(AgentEvent.Token(response))
|
||||||
|
emit(AgentEvent.Complete(response))
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Query the big model on the server.
|
||||||
|
*/
|
||||||
|
private suspend fun queryBigModel(
|
||||||
|
question: String,
|
||||||
|
history: List<Pair<String, String>>
|
||||||
|
): String {
|
||||||
|
return try {
|
||||||
|
val requestBody = buildJsonObject {
|
||||||
|
putJsonArray("messages") {
|
||||||
|
history.forEach { (role, content) ->
|
||||||
|
addJsonObject {
|
||||||
|
put("role", if (role == "User") "user" else "assistant")
|
||||||
|
put("content", content)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
addJsonObject {
|
||||||
|
put("role", "user")
|
||||||
|
put("content", question)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
put("stream", false)
|
||||||
|
put("temperature", 0.7)
|
||||||
|
}
|
||||||
|
|
||||||
|
val response: String = httpClient.post("$delegateServerUrl/v1/chat/completions") {
|
||||||
|
contentType(ContentType.Application.Json)
|
||||||
|
setBody(requestBody)
|
||||||
|
timeout {
|
||||||
|
requestTimeoutMillis = 120_000
|
||||||
|
connectTimeoutMillis = 30_000
|
||||||
|
}
|
||||||
|
}.body()
|
||||||
|
|
||||||
|
parseServerResponse(response)
|
||||||
|
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.e(TAG, "Failed to query big model", e)
|
||||||
|
"I apologize, but I couldn't reach the expert model at this time. Let me try to help with what I know: [local model will attempt answer]"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Synthesize the delegated response into a natural answer.
|
||||||
|
*/
|
||||||
|
private fun synthesizeResponse(
|
||||||
|
userQuestion: String,
|
||||||
|
delegatedResponse: String,
|
||||||
|
history: List<Pair<String, String>>
|
||||||
|
): Flow<AgentEvent> = flow {
|
||||||
|
if (localConversation?.isAlive != true) {
|
||||||
|
localConversation = localEngine.createConversation()
|
||||||
|
}
|
||||||
|
|
||||||
|
val prompt = synthesisPrompt
|
||||||
|
.replace("{user_question}", userQuestion)
|
||||||
|
.replace("{delegated_response}", delegatedResponse)
|
||||||
|
|
||||||
|
val response = localEngine.generate(
|
||||||
|
conversation = localConversation!!,
|
||||||
|
prompt = prompt,
|
||||||
|
audioData = null,
|
||||||
|
images = null
|
||||||
|
)
|
||||||
|
|
||||||
|
emit(AgentEvent.Token(response))
|
||||||
|
emit(AgentEvent.Complete(response))
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun extractDelegateQuestion(classification: String): String {
|
||||||
|
// Extract question from [DELEGATE: question here]
|
||||||
|
val start = classification.indexOf("[DELEGATE:")
|
||||||
|
if (start == -1) return classification
|
||||||
|
|
||||||
|
val end = classification.indexOf("]", start + 10)
|
||||||
|
if (end == -1) return classification
|
||||||
|
|
||||||
|
return classification.substring(start + 10, end).trim()
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun parseServerResponse(response: String): String {
|
||||||
|
return try {
|
||||||
|
val completion = json.decodeFromString<OpenAICompletion>(response)
|
||||||
|
completion.choices.firstOrNull()?.message?.content
|
||||||
|
?: "I received information but couldn't parse it properly."
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.w(TAG, "Failed to parse server response as JSON, returning raw")
|
||||||
|
response
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun reset() {
|
||||||
|
localConversation?.close()
|
||||||
|
localConversation = null
|
||||||
|
}
|
||||||
|
|
||||||
|
@kotlinx.serialization.Serializable
|
||||||
|
data class OpenAICompletion(
|
||||||
|
val choices: List<CompletionChoice>
|
||||||
|
) {
|
||||||
|
@kotlinx.serialization.Serializable
|
||||||
|
data class CompletionChoice(
|
||||||
|
val message: Message
|
||||||
|
) {
|
||||||
|
@kotlinx.serialization.Serializable
|
||||||
|
data class Message(
|
||||||
|
val content: String
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sealed class DelegationEvent {
|
||||||
|
data class Token(val text: String) : DelegationEvent()
|
||||||
|
data class Status(val message: String) : DelegationEvent()
|
||||||
|
data class Complete(val response: String) : DelegationEvent()
|
||||||
|
data class Error(val message: String) : DelegationEvent()
|
||||||
|
}
|
||||||
@@ -0,0 +1,299 @@
|
|||||||
|
package com.sleepy.agent.inference
|
||||||
|
|
||||||
|
import android.util.Log
|
||||||
|
import io.ktor.client.HttpClient
|
||||||
|
import io.ktor.client.call.body
|
||||||
|
import io.ktor.client.plugins.timeout
|
||||||
|
import io.ktor.client.request.post
|
||||||
|
import io.ktor.client.request.setBody
|
||||||
|
import io.ktor.http.ContentType
|
||||||
|
import io.ktor.http.contentType
|
||||||
|
import kotlinx.coroutines.flow.Flow
|
||||||
|
import kotlinx.coroutines.flow.channelFlow
|
||||||
|
import kotlinx.coroutines.flow.flow
|
||||||
|
import kotlinx.serialization.Serializable
|
||||||
|
import kotlinx.serialization.json.Json
|
||||||
|
import kotlinx.serialization.json.JsonObject
|
||||||
|
import kotlinx.serialization.json.buildJsonObject
|
||||||
|
import kotlinx.serialization.json.put
|
||||||
|
import kotlinx.serialization.json.putJsonArray
|
||||||
|
import kotlinx.serialization.json.putJsonObject
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remote agent that delegates all LLM calls to a server.
|
||||||
|
* This bypasses the local model entirely - useful for:
|
||||||
|
* 1. Using powerful server-side models when local model is insufficient
|
||||||
|
* 2. Testing the app UI without loading a local model
|
||||||
|
* 3. Fallback when local model fails
|
||||||
|
*/
|
||||||
|
class RemoteAgent(
|
||||||
|
private val httpClient: HttpClient,
|
||||||
|
private val baseUrl: String
|
||||||
|
) {
|
||||||
|
companion object {
|
||||||
|
private const val TAG = "RemoteAgent"
|
||||||
|
private val json = Json { ignoreUnknownKeys = true }
|
||||||
|
|
||||||
|
// Supported API formats
|
||||||
|
enum class ApiFormat {
|
||||||
|
OPENAI_COMPATIBLE, // /v1/chat/completions
|
||||||
|
OLLAMA, // /api/generate or /api/chat
|
||||||
|
CUSTOM // Custom endpoint
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private val messageHistory = mutableListOf<RemoteMessage>()
|
||||||
|
private var apiFormat = ApiFormat.OPENAI_COMPATIBLE
|
||||||
|
|
||||||
|
data class RemoteMessage(
|
||||||
|
val role: String, // "system", "user", "assistant"
|
||||||
|
val content: String
|
||||||
|
)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Send a message to the remote server and get streaming response.
|
||||||
|
*/
|
||||||
|
suspend fun sendMessage(
|
||||||
|
message: String,
|
||||||
|
systemPrompt: String? = null,
|
||||||
|
stream: Boolean = true
|
||||||
|
): Flow<RemoteAgentEvent> = channelFlow {
|
||||||
|
try {
|
||||||
|
// Add user message to history
|
||||||
|
messageHistory.add(RemoteMessage("user", message))
|
||||||
|
|
||||||
|
// Build request based on detected API format
|
||||||
|
val requestBody = when (apiFormat) {
|
||||||
|
ApiFormat.OPENAI_COMPATIBLE -> buildOpenAIRequest(systemPrompt, stream)
|
||||||
|
ApiFormat.OLLAMA -> buildOllamaRequest(systemPrompt, stream)
|
||||||
|
ApiFormat.CUSTOM -> buildCustomRequest(systemPrompt)
|
||||||
|
}
|
||||||
|
|
||||||
|
Log.d(TAG, "Sending request to $baseUrl (format: $apiFormat)")
|
||||||
|
|
||||||
|
val endpoint = when (apiFormat) {
|
||||||
|
ApiFormat.OPENAI_COMPATIBLE -> "$baseUrl/v1/chat/completions"
|
||||||
|
ApiFormat.OLLAMA -> "$baseUrl/api/chat"
|
||||||
|
ApiFormat.CUSTOM -> baseUrl
|
||||||
|
}
|
||||||
|
|
||||||
|
val response: String = httpClient.post(endpoint) {
|
||||||
|
contentType(ContentType.Application.Json)
|
||||||
|
setBody(requestBody)
|
||||||
|
timeout {
|
||||||
|
requestTimeoutMillis = 120_000 // 2 minutes for generation
|
||||||
|
connectTimeoutMillis = 30_000
|
||||||
|
}
|
||||||
|
}.body()
|
||||||
|
|
||||||
|
if (stream) {
|
||||||
|
// Handle streaming response (SSE format)
|
||||||
|
handleStreamingResponse(response)
|
||||||
|
} else {
|
||||||
|
// Handle non-streaming response
|
||||||
|
handleNonStreamingResponse(response)
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.e(TAG, "Error calling remote server", e)
|
||||||
|
send(RemoteAgentEvent.Error("Server error: ${e.message}"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Quick check if server is available.
|
||||||
|
*/
|
||||||
|
suspend fun checkServer(): Boolean {
|
||||||
|
return try {
|
||||||
|
// Try to detect API format by probing endpoints
|
||||||
|
val openaiResponse = httpClient.post("$baseUrl/v1/models") {
|
||||||
|
timeout { requestTimeoutMillis = 5000 }
|
||||||
|
}
|
||||||
|
if (openaiResponse.status.value == 200) {
|
||||||
|
apiFormat = ApiFormat.OPENAI_COMPATIBLE
|
||||||
|
Log.d(TAG, "Detected OpenAI-compatible API")
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
val ollamaResponse = httpClient.post("$baseUrl/api/tags") {
|
||||||
|
timeout { requestTimeoutMillis = 5000 }
|
||||||
|
}
|
||||||
|
if (ollamaResponse.status.value == 200) {
|
||||||
|
apiFormat = ApiFormat.OLLAMA
|
||||||
|
Log.d(TAG, "Detected Ollama API")
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assume custom if base URL responds
|
||||||
|
apiFormat = ApiFormat.CUSTOM
|
||||||
|
true
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.e(TAG, "Server check failed", e)
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear conversation history.
|
||||||
|
*/
|
||||||
|
fun clearHistory() {
|
||||||
|
messageHistory.clear()
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun buildOpenAIRequest(systemPrompt: String?, stream: Boolean): JsonObject {
|
||||||
|
return buildJsonObject {
|
||||||
|
put("model", "local-model") // Server usually ignores this for single-model setups
|
||||||
|
putJsonArray("messages") {
|
||||||
|
// System message
|
||||||
|
systemPrompt?.let {
|
||||||
|
addJsonObject {
|
||||||
|
put("role", "system")
|
||||||
|
put("content", it)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Conversation history
|
||||||
|
messageHistory.forEach { msg ->
|
||||||
|
addJsonObject {
|
||||||
|
put("role", msg.role)
|
||||||
|
put("content", msg.content)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
put("stream", stream)
|
||||||
|
put("temperature", 0.7)
|
||||||
|
put("max_tokens", 4096)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun buildOllamaRequest(systemPrompt: String?, stream: Boolean): JsonObject {
|
||||||
|
return buildJsonObject {
|
||||||
|
put("model", "local-model")
|
||||||
|
putJsonArray("messages") {
|
||||||
|
systemPrompt?.let {
|
||||||
|
addJsonObject {
|
||||||
|
put("role", "system")
|
||||||
|
put("content", it)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
messageHistory.forEach { msg ->
|
||||||
|
addJsonObject {
|
||||||
|
put("role", msg.role)
|
||||||
|
put("content", msg.content)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
put("stream", stream)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun buildCustomRequest(systemPrompt: String?): JsonObject {
|
||||||
|
return buildJsonObject {
|
||||||
|
put("prompt", buildPromptWithHistory(systemPrompt))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun buildPromptWithHistory(systemPrompt: String?): String {
|
||||||
|
return buildString {
|
||||||
|
systemPrompt?.let { appendLine(it).appendLine() }
|
||||||
|
messageHistory.forEach { msg ->
|
||||||
|
when (msg.role) {
|
||||||
|
"user" -> appendLine("User: ${msg.content}")
|
||||||
|
"assistant" -> appendLine("Assistant: ${msg.content}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
appendLine("Assistant:")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private suspend fun kotlinx.coroutines.channels.SendChannel<RemoteAgentEvent>.handleStreamingResponse(response: String) {
|
||||||
|
// Parse SSE format (Server-Sent Events)
|
||||||
|
val lines = response.lines()
|
||||||
|
val responseBuilder = StringBuilder()
|
||||||
|
|
||||||
|
for (line in lines) {
|
||||||
|
when {
|
||||||
|
line.startsWith("data: ") -> {
|
||||||
|
val data = line.substring(6)
|
||||||
|
if (data == "[DONE]") {
|
||||||
|
// Stream complete
|
||||||
|
val fullResponse = responseBuilder.toString()
|
||||||
|
messageHistory.add(RemoteMessage("assistant", fullResponse))
|
||||||
|
send(RemoteAgentEvent.Complete(fullResponse))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
val chunk = json.decodeFromString<OpenAIChunk>(data)
|
||||||
|
val content = chunk.choices.firstOrNull()?.delta?.content ?: ""
|
||||||
|
if (content.isNotEmpty()) {
|
||||||
|
responseBuilder.append(content)
|
||||||
|
send(RemoteAgentEvent.Token(content))
|
||||||
|
}
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.w(TAG, "Failed to parse chunk: $data")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we get here without [DONE], return what we have
|
||||||
|
val fullResponse = responseBuilder.toString()
|
||||||
|
if (fullResponse.isNotEmpty()) {
|
||||||
|
messageHistory.add(RemoteMessage("assistant", fullResponse))
|
||||||
|
send(RemoteAgentEvent.Complete(fullResponse))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private suspend fun kotlinx.coroutines.channels.SendChannel<RemoteAgentEvent>.handleNonStreamingResponse(response: String) {
|
||||||
|
try {
|
||||||
|
val completion = json.decodeFromString<OpenAICompletion>(response)
|
||||||
|
val content = completion.choices.firstOrNull()?.message?.content ?: ""
|
||||||
|
|
||||||
|
messageHistory.add(RemoteMessage("assistant", content))
|
||||||
|
|
||||||
|
// Emit as single token for consistency
|
||||||
|
send(RemoteAgentEvent.Token(content))
|
||||||
|
send(RemoteAgentEvent.Complete(content))
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.e(TAG, "Failed to parse response", e)
|
||||||
|
send(RemoteAgentEvent.Error("Failed to parse server response"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Data classes for OpenAI-compatible API
|
||||||
|
@Serializable
|
||||||
|
data class OpenAIChunk(
|
||||||
|
val choices: List<Choice>
|
||||||
|
) {
|
||||||
|
@Serializable
|
||||||
|
data class Choice(
|
||||||
|
val delta: Delta
|
||||||
|
) {
|
||||||
|
@Serializable
|
||||||
|
data class Delta(
|
||||||
|
val content: String? = null
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Serializable
|
||||||
|
data class OpenAICompletion(
|
||||||
|
val choices: List<CompletionChoice>
|
||||||
|
) {
|
||||||
|
@Serializable
|
||||||
|
data class CompletionChoice(
|
||||||
|
val message: Message
|
||||||
|
) {
|
||||||
|
@Serializable
|
||||||
|
data class Message(
|
||||||
|
val content: String
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sealed class RemoteAgentEvent {
|
||||||
|
data class Token(val text: String) : RemoteAgentEvent()
|
||||||
|
data class Complete(val response: String) : RemoteAgentEvent()
|
||||||
|
data class Error(val message: String) : RemoteAgentEvent()
|
||||||
|
}
|
||||||
@@ -23,6 +23,9 @@ class UserSettings(
|
|||||||
val MODEL_SOURCE = stringPreferencesKey("model_source")
|
val MODEL_SOURCE = stringPreferencesKey("model_source")
|
||||||
val SELECTED_SERVER_MODEL = stringPreferencesKey("selected_server_model")
|
val SELECTED_SERVER_MODEL = stringPreferencesKey("selected_server_model")
|
||||||
|
|
||||||
|
// Server agent mode: "local_only", "full_remote", "delegation"
|
||||||
|
val SERVER_AGENT_MODE = stringPreferencesKey("server_agent_mode")
|
||||||
|
|
||||||
// TTS settings
|
// TTS settings
|
||||||
val TTS_ENABLED = booleanPreferencesKey("tts_enabled")
|
val TTS_ENABLED = booleanPreferencesKey("tts_enabled")
|
||||||
val TTS_AUTO_MODE = booleanPreferencesKey("tts_auto_mode")
|
val TTS_AUTO_MODE = booleanPreferencesKey("tts_auto_mode")
|
||||||
@@ -55,6 +58,11 @@ class UserSettings(
|
|||||||
prefs[ENABLE_SERVER_DELEGATION] ?: false
|
prefs[ENABLE_SERVER_DELEGATION] ?: false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Server agent mode: "local_only" (default), "full_remote", "delegation"
|
||||||
|
val serverAgentMode: Flow<String> = dataStore.data.map { prefs ->
|
||||||
|
prefs[SERVER_AGENT_MODE] ?: "local_only"
|
||||||
|
}
|
||||||
|
|
||||||
val modelSource: Flow<ModelSource> = dataStore.data.map { prefs ->
|
val modelSource: Flow<ModelSource> = dataStore.data.map { prefs ->
|
||||||
prefs[MODEL_SOURCE]?.let { ModelSource.valueOf(it) } ?: ModelSource.FILE_PATH
|
prefs[MODEL_SOURCE]?.let { ModelSource.valueOf(it) } ?: ModelSource.FILE_PATH
|
||||||
}
|
}
|
||||||
@@ -106,6 +114,12 @@ class UserSettings(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
suspend fun setServerAgentMode(mode: String) {
|
||||||
|
dataStore.edit { prefs ->
|
||||||
|
prefs[SERVER_AGENT_MODE] = mode
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
suspend fun setModelSource(source: ModelSource) {
|
suspend fun setModelSource(source: ModelSource) {
|
||||||
dataStore.edit { prefs ->
|
dataStore.edit { prefs ->
|
||||||
prefs[MODEL_SOURCE] = source.name
|
prefs[MODEL_SOURCE] = source.name
|
||||||
|
|||||||
@@ -12,7 +12,11 @@ import com.sleepy.agent.data.ConversationStorage
|
|||||||
import com.sleepy.agent.download.ModelDownloadManager
|
import com.sleepy.agent.download.ModelDownloadManager
|
||||||
import com.sleepy.agent.inference.Agent
|
import com.sleepy.agent.inference.Agent
|
||||||
import com.sleepy.agent.inference.AgentEvent
|
import com.sleepy.agent.inference.AgentEvent
|
||||||
|
import com.sleepy.agent.inference.DelegationAgent
|
||||||
|
import com.sleepy.agent.inference.DelegationEvent
|
||||||
import com.sleepy.agent.inference.LlmEngine
|
import com.sleepy.agent.inference.LlmEngine
|
||||||
|
import com.sleepy.agent.inference.RemoteAgent
|
||||||
|
import com.sleepy.agent.inference.RemoteAgentEvent
|
||||||
import com.sleepy.agent.settings.UserSettings
|
import com.sleepy.agent.settings.UserSettings
|
||||||
import com.sleepy.agent.tools.WebSearchTool
|
import com.sleepy.agent.tools.WebSearchTool
|
||||||
import kotlinx.coroutines.flow.MutableStateFlow
|
import kotlinx.coroutines.flow.MutableStateFlow
|
||||||
@@ -50,7 +54,8 @@ class MainViewModel(
|
|||||||
private val agent: Agent,
|
private val agent: Agent,
|
||||||
private val llmEngine: LlmEngine,
|
private val llmEngine: LlmEngine,
|
||||||
private val userSettings: UserSettings,
|
private val userSettings: UserSettings,
|
||||||
private val webSearchTool: WebSearchTool
|
private val webSearchTool: WebSearchTool,
|
||||||
|
private val httpClient: io.ktor.client.HttpClient
|
||||||
) : ViewModel() {
|
) : ViewModel() {
|
||||||
|
|
||||||
private val conversationStorage = ConversationStorage(context)
|
private val conversationStorage = ConversationStorage(context)
|
||||||
@@ -75,6 +80,10 @@ class MainViewModel(
|
|||||||
// Track if user started with voice or text for TTS auto mode
|
// Track if user started with voice or text for TTS auto mode
|
||||||
private var firstInputWasVoice: Boolean? = null
|
private var firstInputWasVoice: Boolean? = null
|
||||||
|
|
||||||
|
// Remote agents for server modes
|
||||||
|
private var remoteAgent: RemoteAgent? = null
|
||||||
|
private var delegationAgent: DelegationAgent? = null
|
||||||
|
|
||||||
companion object {
|
companion object {
|
||||||
private const val TAG = "MainViewModel"
|
private const val TAG = "MainViewModel"
|
||||||
private const val KEY_MESSAGES = "messages"
|
private const val KEY_MESSAGES = "messages"
|
||||||
@@ -113,6 +122,17 @@ class MainViewModel(
|
|||||||
Log.d(TAG, "Updated web search URL to: $url")
|
Log.d(TAG, "Updated web search URL to: $url")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize remote agents when delegate server URL changes
|
||||||
|
viewModelScope.launch {
|
||||||
|
userSettings.delegateServerUrl.collect { url ->
|
||||||
|
if (url.isNotEmpty()) {
|
||||||
|
remoteAgent = RemoteAgent(httpClient, url)
|
||||||
|
delegationAgent = DelegationAgent(llmEngine, httpClient, url)
|
||||||
|
Log.d(TAG, "Initialized remote agents with server: $url")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun restoreState() {
|
private fun restoreState() {
|
||||||
@@ -250,12 +270,11 @@ class MainViewModel(
|
|||||||
|
|
||||||
_uiState.value = UIState.PROCESSING
|
_uiState.value = UIState.PROCESSING
|
||||||
|
|
||||||
val useServer = userSettings.enableServerDelegation.first()
|
val mode = userSettings.serverAgentMode.first()
|
||||||
|
|
||||||
if (useServer) {
|
when (mode) {
|
||||||
processAudioWithServer(audioData)
|
"full_remote", "delegation" -> processAudioWithServer(audioData)
|
||||||
} else {
|
else -> processAudioWithLocalModel(audioData)
|
||||||
processAudioWithLocalModel(audioData)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -350,14 +369,68 @@ class MainViewModel(
|
|||||||
_messages.value = _messages.value + userMessage
|
_messages.value = _messages.value + userMessage
|
||||||
saveState()
|
saveState()
|
||||||
|
|
||||||
val aiMessage = ConversationMessage(
|
_uiState.value = UIState.PROCESSING
|
||||||
text = "Server mode doesn't support native audio understanding yet. Please use local model for voice input.",
|
|
||||||
isUser = false
|
// For server mode with audio, we need to either:
|
||||||
)
|
// 1. Use local model to transcribe, then send text to server
|
||||||
_messages.value = _messages.value + aiMessage
|
// 2. Send audio to server if it supports it
|
||||||
saveState()
|
// For now, transcribe locally first
|
||||||
_uiState.value = UIState.IDLE
|
|
||||||
|
if (!llmEngine.isLoaded()) {
|
||||||
|
val modelPath = userSettings.modelPath.first()
|
||||||
|
if (modelPath.isNotEmpty()) {
|
||||||
|
_responseText.value = "Loading model for transcription..."
|
||||||
|
val result = llmEngine.loadModel(modelPath)
|
||||||
|
result.onFailure { e ->
|
||||||
|
_uiState.value = UIState.ERROR
|
||||||
|
_responseText.value = "Failed to load model: ${e.message}"
|
||||||
|
return@processAudioWithServer
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
_uiState.value = UIState.ERROR
|
||||||
|
_responseText.value = "No model loaded for transcription. Please load a model first."
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// First, transcribe the audio locally
|
||||||
|
val transcription = llmEngine.generate(
|
||||||
|
conversation = ensureConversation(),
|
||||||
|
prompt = "Transcribe this audio:",
|
||||||
|
audioData = audioData,
|
||||||
|
images = null
|
||||||
|
)
|
||||||
|
|
||||||
|
Log.d(TAG, "Transcribed: $transcription")
|
||||||
|
|
||||||
|
// Update the user message with transcription
|
||||||
|
val updatedMessages = _messages.value.toMutableList()
|
||||||
|
updatedMessages[updatedMessages.size - 1] = userMessage.copy(
|
||||||
|
text = "🎤 \"$transcription\""
|
||||||
|
)
|
||||||
|
_messages.value = updatedMessages
|
||||||
|
saveState()
|
||||||
|
|
||||||
|
// Now process the transcribed text with the server
|
||||||
|
val mode = userSettings.serverAgentMode.first()
|
||||||
|
when (mode) {
|
||||||
|
"full_remote" -> processTextWithRemoteAgent(transcription)
|
||||||
|
"delegation" -> processTextWithDelegation(transcription)
|
||||||
|
else -> processTextWithLocalModel(transcription)
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.e(TAG, "Error processing audio with server", e)
|
||||||
|
_uiState.value = UIState.ERROR
|
||||||
|
_responseText.value = "Error: ${e.message}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun ensureConversation() = conversation?.takeIf { it.isAlive }
|
||||||
|
?: llmEngine.createConversation().also { conversation = it }
|
||||||
|
|
||||||
|
private var conversation: com.sleepy.agent.inference.Conversation? = null
|
||||||
|
|
||||||
fun sendTextMessage(text: String) {
|
fun sendTextMessage(text: String) {
|
||||||
viewModelScope.launch {
|
viewModelScope.launch {
|
||||||
@@ -372,13 +445,13 @@ class MainViewModel(
|
|||||||
Log.d(TAG, "First input was text - TTS auto-disabled")
|
Log.d(TAG, "First input was text - TTS auto-disabled")
|
||||||
}
|
}
|
||||||
|
|
||||||
val useServer = userSettings.enableServerDelegation.first()
|
val mode = userSettings.serverAgentMode.first()
|
||||||
Log.d(TAG, "useServer: $useServer")
|
Log.d(TAG, "Server agent mode: $mode")
|
||||||
|
|
||||||
if (useServer) {
|
when (mode) {
|
||||||
processTextWithServer(text)
|
"full_remote" -> processTextWithRemoteAgent(text)
|
||||||
} else {
|
"delegation" -> processTextWithDelegation(text)
|
||||||
processTextWithLocalModel(text)
|
else -> processTextWithLocalModel(text)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -493,7 +566,10 @@ class MainViewModel(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private suspend fun processTextWithServer(text: String) {
|
/**
|
||||||
|
* Full remote mode - bypass local model entirely, use server as the agent.
|
||||||
|
*/
|
||||||
|
private suspend fun processTextWithRemoteAgent(text: String) {
|
||||||
val userMessage = ConversationMessage(
|
val userMessage = ConversationMessage(
|
||||||
text = text,
|
text = text,
|
||||||
isUser = true
|
isUser = true
|
||||||
@@ -503,14 +579,117 @@ class MainViewModel(
|
|||||||
|
|
||||||
_uiState.value = UIState.PROCESSING
|
_uiState.value = UIState.PROCESSING
|
||||||
|
|
||||||
|
val remote = remoteAgent
|
||||||
|
if (remote == null) {
|
||||||
|
_uiState.value = UIState.ERROR
|
||||||
|
_responseText.value = "No server configured. Please set a delegate server URL in Settings."
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
val responseBuilder = StringBuilder()
|
||||||
|
|
||||||
|
remote.sendMessage(
|
||||||
|
message = text,
|
||||||
|
systemPrompt = "You are a helpful AI assistant."
|
||||||
|
).collect { event ->
|
||||||
|
when (event) {
|
||||||
|
is RemoteAgentEvent.Token -> {
|
||||||
|
responseBuilder.append(event.text)
|
||||||
|
_responseText.value = responseBuilder.toString()
|
||||||
|
_uiState.value = UIState.SPEAKING
|
||||||
|
}
|
||||||
|
is RemoteAgentEvent.Complete -> {
|
||||||
val aiMessage = ConversationMessage(
|
val aiMessage = ConversationMessage(
|
||||||
text = "Server mode not yet implemented. Please use local model.",
|
text = event.response,
|
||||||
isUser = false
|
isUser = false
|
||||||
)
|
)
|
||||||
_messages.value = _messages.value + aiMessage
|
_messages.value = _messages.value + aiMessage
|
||||||
saveState()
|
saveState()
|
||||||
|
|
||||||
|
speakResponse(event.response)
|
||||||
_uiState.value = UIState.IDLE
|
_uiState.value = UIState.IDLE
|
||||||
}
|
}
|
||||||
|
is RemoteAgentEvent.Error -> {
|
||||||
|
_responseText.value = "Error: ${event.message}"
|
||||||
|
_uiState.value = UIState.ERROR
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.e(TAG, "Error in remote agent", e)
|
||||||
|
_uiState.value = UIState.ERROR
|
||||||
|
_responseText.value = "Server error: ${e.message}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delegation mode - local model decides when to ask big model for help.
|
||||||
|
*/
|
||||||
|
private suspend fun processTextWithDelegation(text: String) {
|
||||||
|
val userMessage = ConversationMessage(
|
||||||
|
text = text,
|
||||||
|
isUser = true
|
||||||
|
)
|
||||||
|
_messages.value = _messages.value + userMessage
|
||||||
|
saveState()
|
||||||
|
|
||||||
|
_uiState.value = UIState.PROCESSING
|
||||||
|
|
||||||
|
val delegation = delegationAgent
|
||||||
|
if (delegation == null) {
|
||||||
|
_uiState.value = UIState.ERROR
|
||||||
|
_responseText.value = "Delegation not available. Please set a delegate server URL in Settings."
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
val responseBuilder = StringBuilder()
|
||||||
|
|
||||||
|
delegation.processWithDelegation(
|
||||||
|
userInput = text,
|
||||||
|
conversationHistory = _messages.value.map { it.isUser to it.text }
|
||||||
|
).collect { event ->
|
||||||
|
when (event) {
|
||||||
|
is DelegationEvent.Token -> {
|
||||||
|
responseBuilder.append(event.text)
|
||||||
|
_responseText.value = responseBuilder.toString()
|
||||||
|
_uiState.value = UIState.SPEAKING
|
||||||
|
}
|
||||||
|
is DelegationEvent.Status -> {
|
||||||
|
_responseText.value = event.message
|
||||||
|
}
|
||||||
|
is DelegationEvent.Complete -> {
|
||||||
|
val aiMessage = ConversationMessage(
|
||||||
|
text = event.response,
|
||||||
|
isUser = false
|
||||||
|
)
|
||||||
|
_messages.value = _messages.value + aiMessage
|
||||||
|
saveState()
|
||||||
|
|
||||||
|
speakResponse(event.response)
|
||||||
|
_uiState.value = UIState.IDLE
|
||||||
|
}
|
||||||
|
is DelegationEvent.Error -> {
|
||||||
|
_responseText.value = "Error: ${event.message}"
|
||||||
|
_uiState.value = UIState.ERROR
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.e(TAG, "Error in delegation agent", e)
|
||||||
|
_uiState.value = UIState.ERROR
|
||||||
|
_responseText.value = "Error: ${e.message}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Legacy server delegation - now redirects to appropriate mode.
|
||||||
|
*/
|
||||||
|
private suspend fun processTextWithServer(text: String) {
|
||||||
|
// Use full remote mode by default for legacy "use server" setting
|
||||||
|
processTextWithRemoteAgent(text)
|
||||||
|
}
|
||||||
|
|
||||||
fun setResponse(text: String) {
|
fun setResponse(text: String) {
|
||||||
_responseText.value = text
|
_responseText.value = text
|
||||||
@@ -564,6 +743,20 @@ class MainViewModel(
|
|||||||
firstInputWasVoice = false // Image is not voice input
|
firstInputWasVoice = false // Image is not voice input
|
||||||
_uiState.value = UIState.PROCESSING
|
_uiState.value = UIState.PROCESSING
|
||||||
|
|
||||||
|
// Check server mode
|
||||||
|
val mode = userSettings.serverAgentMode.first()
|
||||||
|
|
||||||
|
// For server modes, we need a local model to process the image first
|
||||||
|
// Then send the description/results to the server
|
||||||
|
when (mode) {
|
||||||
|
"full_remote" -> processImageWithRemoteAgent(bitmap, text)
|
||||||
|
"delegation" -> processImageWithDelegation(bitmap, text)
|
||||||
|
else -> processImageWithLocalModel(bitmap, text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private suspend fun processImageWithLocalModel(bitmap: android.graphics.Bitmap, text: String) {
|
||||||
try {
|
try {
|
||||||
if (!llmEngine.isLoaded()) {
|
if (!llmEngine.isLoaded()) {
|
||||||
val modelPath = userSettings.modelPath.first()
|
val modelPath = userSettings.modelPath.first()
|
||||||
@@ -573,13 +766,187 @@ class MainViewModel(
|
|||||||
result.onFailure { e ->
|
result.onFailure { e ->
|
||||||
_uiState.value = UIState.ERROR
|
_uiState.value = UIState.ERROR
|
||||||
_responseText.value = "Failed to load model: ${e.message}"
|
_responseText.value = "Failed to load model: ${e.message}"
|
||||||
return@launch
|
return@processImageWithLocalModel
|
||||||
}
|
}
|
||||||
agent.prewarmCache()
|
agent.prewarmCache()
|
||||||
} else {
|
} else {
|
||||||
_uiState.value = UIState.ERROR
|
_uiState.value = UIState.ERROR
|
||||||
_responseText.value = "No model loaded. Please go to Settings and load a model."
|
_responseText.value = "No model loaded. Please go to Settings and load a model."
|
||||||
return@launch
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
val responseBuilder = StringBuilder()
|
||||||
|
|
||||||
|
Log.d(TAG, "Processing image with local model...")
|
||||||
|
|
||||||
|
agent.processInput(
|
||||||
|
input = text,
|
||||||
|
images = listOf(bitmap)
|
||||||
|
).collect { event ->
|
||||||
|
when (event) {
|
||||||
|
is AgentEvent.Token -> {
|
||||||
|
responseBuilder.append(event.text)
|
||||||
|
_responseText.value = responseBuilder.toString()
|
||||||
|
_uiState.value = UIState.SPEAKING
|
||||||
|
}
|
||||||
|
|
||||||
|
is AgentEvent.ExecutingTool -> {
|
||||||
|
_uiState.value = UIState.EXECUTING_TOOL
|
||||||
|
_responseText.value = "🔧 Using ${event.toolName}..."
|
||||||
|
}
|
||||||
|
|
||||||
|
is AgentEvent.ToolResult -> {
|
||||||
|
// Tool completed
|
||||||
|
}
|
||||||
|
|
||||||
|
is AgentEvent.Complete -> {
|
||||||
|
val aiMessage = ConversationMessage(
|
||||||
|
text = event.response,
|
||||||
|
isUser = false
|
||||||
|
)
|
||||||
|
_messages.value = _messages.value + aiMessage
|
||||||
|
saveState()
|
||||||
|
|
||||||
|
speakResponse(event.response)
|
||||||
|
_uiState.value = UIState.IDLE
|
||||||
|
}
|
||||||
|
|
||||||
|
is AgentEvent.Error -> {
|
||||||
|
_responseText.value = "Error: ${event.message}"
|
||||||
|
_uiState.value = UIState.ERROR
|
||||||
|
}
|
||||||
|
|
||||||
|
else -> {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.e(TAG, "Error processing image", e)
|
||||||
|
_uiState.value = UIState.ERROR
|
||||||
|
_responseText.value = "Error processing image: ${e.message}"
|
||||||
|
|
||||||
|
val errorMessage = ConversationMessage(
|
||||||
|
text = "❌ Failed to process image: ${e.message}",
|
||||||
|
isUser = false
|
||||||
|
)
|
||||||
|
_messages.value = _messages.value + errorMessage
|
||||||
|
saveState()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private suspend fun processImageWithRemoteAgent(bitmap: android.graphics.Bitmap, text: String) {
|
||||||
|
try {
|
||||||
|
// First, get image description from local model
|
||||||
|
if (!llmEngine.isLoaded()) {
|
||||||
|
val modelPath = userSettings.modelPath.first()
|
||||||
|
if (modelPath.isNotEmpty()) {
|
||||||
|
_responseText.value = "Analyzing image..."
|
||||||
|
val result = llmEngine.loadModel(modelPath)
|
||||||
|
result.onFailure { e ->
|
||||||
|
_uiState.value = UIState.ERROR
|
||||||
|
_responseText.value = "Failed to load model: ${e.message}"
|
||||||
|
return@processImageWithRemoteAgent
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
_uiState.value = UIState.ERROR
|
||||||
|
_responseText.value = "No model loaded for image analysis. Please load a model first."
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get image description from local model
|
||||||
|
val description = llmEngine.generate(
|
||||||
|
conversation = ensureConversation(),
|
||||||
|
prompt = "Describe this image in detail:",
|
||||||
|
audioData = null,
|
||||||
|
images = listOf(bitmap)
|
||||||
|
)
|
||||||
|
|
||||||
|
Log.d(TAG, "Image description: $description")
|
||||||
|
|
||||||
|
// Now send description + user text to remote agent
|
||||||
|
val fullPrompt = if (text.isNotBlank()) {
|
||||||
|
"User question about image: $text\n\nImage description: $description"
|
||||||
|
} else {
|
||||||
|
"Describe this image: $description"
|
||||||
|
}
|
||||||
|
|
||||||
|
processTextWithRemoteAgent(fullPrompt)
|
||||||
|
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.e(TAG, "Error processing image with remote agent", e)
|
||||||
|
_uiState.value = UIState.ERROR
|
||||||
|
_responseText.value = "Error: ${e.message}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private suspend fun processImageWithDelegation(bitmap: android.graphics.Bitmap, text: String) {
|
||||||
|
// For delegation mode, process image locally first
|
||||||
|
// The delegation agent will decide if server help is needed based on the description
|
||||||
|
try {
|
||||||
|
if (!llmEngine.isLoaded()) {
|
||||||
|
val modelPath = userSettings.modelPath.first()
|
||||||
|
if (modelPath.isNotEmpty()) {
|
||||||
|
_responseText.value = "Analyzing image..."
|
||||||
|
val result = llmEngine.loadModel(modelPath)
|
||||||
|
result.onFailure { e ->
|
||||||
|
_uiState.value = UIState.ERROR
|
||||||
|
_responseText.value = "Failed to load model: ${e.message}"
|
||||||
|
return@processImageWithDelegation
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
_uiState.value = UIState.ERROR
|
||||||
|
_responseText.value = "No model loaded for image analysis. Please load a model first."
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get initial analysis from local model
|
||||||
|
val description = llmEngine.generate(
|
||||||
|
conversation = ensureConversation(),
|
||||||
|
prompt = if (text.isNotBlank()) "Analyze this image and answer: $text" else "Describe this image:",
|
||||||
|
audioData = null,
|
||||||
|
images = listOf(bitmap)
|
||||||
|
)
|
||||||
|
|
||||||
|
// Add the local model's response to conversation
|
||||||
|
val localResponse = ConversationMessage(
|
||||||
|
text = description,
|
||||||
|
isUser = false
|
||||||
|
)
|
||||||
|
_messages.value = _messages.value + localResponse
|
||||||
|
saveState()
|
||||||
|
|
||||||
|
// Now use delegation to decide if we need more help
|
||||||
|
// The delegation agent will see the image was processed and decide
|
||||||
|
val followUp = if (text.isNotBlank()) "Is this answer complete and accurate?" else "Can you provide more details?"
|
||||||
|
processTextWithDelegation(followUp)
|
||||||
|
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.e(TAG, "Error processing image with delegation", e)
|
||||||
|
_uiState.value = UIState.ERROR
|
||||||
|
_responseText.value = "Error: ${e.message}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Legacy onImageSelected body - now extracted to separate functions
|
||||||
|
private suspend fun processImageWithLocalModelLegacy(bitmap: android.graphics.Bitmap, text: String) {
|
||||||
|
try {
|
||||||
|
if (!llmEngine.isLoaded()) {
|
||||||
|
val modelPath = userSettings.modelPath.first()
|
||||||
|
if (modelPath.isNotEmpty()) {
|
||||||
|
_responseText.value = "Loading model..."
|
||||||
|
val result = llmEngine.loadModel(modelPath)
|
||||||
|
result.onFailure { e ->
|
||||||
|
_uiState.value = UIState.ERROR
|
||||||
|
_responseText.value = "Failed to load model: ${e.message}"
|
||||||
|
return@processImageWithLocalModelLegacy
|
||||||
|
}
|
||||||
|
agent.prewarmCache()
|
||||||
|
} else {
|
||||||
|
_uiState.value = UIState.ERROR
|
||||||
|
_responseText.value = "No model loaded. Please go to Settings and load a model."
|
||||||
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -587,9 +954,8 @@ class MainViewModel(
|
|||||||
|
|
||||||
Log.d(TAG, "Processing image with model...")
|
Log.d(TAG, "Processing image with model...")
|
||||||
|
|
||||||
// Send empty text with image - model will process image naturally
|
|
||||||
agent.processInput(
|
agent.processInput(
|
||||||
input = text, // Use the text the user typed (may be empty)
|
input = text,
|
||||||
images = listOf(bitmap)
|
images = listOf(bitmap)
|
||||||
).collect { event ->
|
).collect { event ->
|
||||||
when (event) {
|
when (event) {
|
||||||
|
|||||||
@@ -136,8 +136,10 @@ fun SettingsScreen(
|
|||||||
ServerSection(
|
ServerSection(
|
||||||
searchServerUrl = uiState.searchServerUrl,
|
searchServerUrl = uiState.searchServerUrl,
|
||||||
delegateServerUrl = uiState.delegateServerUrl,
|
delegateServerUrl = uiState.delegateServerUrl,
|
||||||
|
serverAgentMode = uiState.serverAgentMode,
|
||||||
onSearchServerChange = { viewModel.setSearchServerUrl(it) },
|
onSearchServerChange = { viewModel.setSearchServerUrl(it) },
|
||||||
onDelegateServerChange = { viewModel.setDelegateServerUrl(it) }
|
onDelegateServerChange = { viewModel.setDelegateServerUrl(it) },
|
||||||
|
onServerAgentModeChange = { viewModel.setServerAgentMode(it) }
|
||||||
)
|
)
|
||||||
|
|
||||||
HorizontalDivider()
|
HorizontalDivider()
|
||||||
@@ -391,8 +393,10 @@ private fun ModelCard(
|
|||||||
private fun ServerSection(
|
private fun ServerSection(
|
||||||
searchServerUrl: String,
|
searchServerUrl: String,
|
||||||
delegateServerUrl: String,
|
delegateServerUrl: String,
|
||||||
|
serverAgentMode: String,
|
||||||
onSearchServerChange: (String) -> Unit,
|
onSearchServerChange: (String) -> Unit,
|
||||||
onDelegateServerChange: (String) -> Unit
|
onDelegateServerChange: (String) -> Unit,
|
||||||
|
onServerAgentModeChange: (String) -> Unit
|
||||||
) {
|
) {
|
||||||
Column(verticalArrangement = Arrangement.spacedBy(12.dp)) {
|
Column(verticalArrangement = Arrangement.spacedBy(12.dp)) {
|
||||||
Text(
|
Text(
|
||||||
@@ -420,6 +424,50 @@ private fun ServerSection(
|
|||||||
singleLine = true
|
singleLine = true
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Server Agent Mode selector (only shown if delegate server is configured)
|
||||||
|
if (delegateServerUrl.isNotEmpty()) {
|
||||||
|
Text(
|
||||||
|
text = "Agent Mode",
|
||||||
|
style = MaterialTheme.typography.bodyMedium
|
||||||
|
)
|
||||||
|
|
||||||
|
// Mode selection buttons
|
||||||
|
Row(
|
||||||
|
modifier = Modifier.fillMaxWidth(),
|
||||||
|
horizontalArrangement = Arrangement.spacedBy(8.dp)
|
||||||
|
) {
|
||||||
|
ModeButton(
|
||||||
|
text = "Local Only",
|
||||||
|
selected = serverAgentMode == "local_only",
|
||||||
|
onClick = { onServerAgentModeChange("local_only") },
|
||||||
|
modifier = Modifier.weight(1f)
|
||||||
|
)
|
||||||
|
ModeButton(
|
||||||
|
text = "Full Remote",
|
||||||
|
selected = serverAgentMode == "full_remote",
|
||||||
|
onClick = { onServerAgentModeChange("full_remote") },
|
||||||
|
modifier = Modifier.weight(1f)
|
||||||
|
)
|
||||||
|
ModeButton(
|
||||||
|
text = "Smart",
|
||||||
|
selected = serverAgentMode == "delegation",
|
||||||
|
onClick = { onServerAgentModeChange("delegation") },
|
||||||
|
modifier = Modifier.weight(1f)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
Text(
|
||||||
|
text = when (serverAgentMode) {
|
||||||
|
"local_only" -> "Uses only the local model on your device."
|
||||||
|
"full_remote" -> "Bypasses local model entirely. All queries go to the server."
|
||||||
|
"delegation" -> "Local model decides when to ask the server for help."
|
||||||
|
else -> ""
|
||||||
|
},
|
||||||
|
style = MaterialTheme.typography.bodySmall,
|
||||||
|
color = MaterialTheme.colorScheme.onSurfaceVariant
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
Text(
|
Text(
|
||||||
text = "Leave empty to disable server features. URLs are saved automatically.",
|
text = "Leave empty to disable server features. URLs are saved automatically.",
|
||||||
style = MaterialTheme.typography.bodySmall,
|
style = MaterialTheme.typography.bodySmall,
|
||||||
@@ -428,6 +476,31 @@ private fun ServerSection(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Composable
|
||||||
|
private fun ModeButton(
|
||||||
|
text: String,
|
||||||
|
selected: Boolean,
|
||||||
|
onClick: () -> Unit,
|
||||||
|
modifier: Modifier = Modifier
|
||||||
|
) {
|
||||||
|
TextButton(
|
||||||
|
onClick = onClick,
|
||||||
|
modifier = modifier,
|
||||||
|
colors = ButtonDefaults.textButtonColors(
|
||||||
|
containerColor = if (selected)
|
||||||
|
MaterialTheme.colorScheme.primaryContainer
|
||||||
|
else
|
||||||
|
MaterialTheme.colorScheme.surfaceVariant,
|
||||||
|
contentColor = if (selected)
|
||||||
|
MaterialTheme.colorScheme.onPrimaryContainer
|
||||||
|
else
|
||||||
|
MaterialTheme.colorScheme.onSurfaceVariant
|
||||||
|
)
|
||||||
|
) {
|
||||||
|
Text(text, style = MaterialTheme.typography.labelMedium)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Composable
|
@Composable
|
||||||
private fun TtsSection(
|
private fun TtsSection(
|
||||||
enabled: Boolean,
|
enabled: Boolean,
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ data class SettingsUiState(
|
|||||||
val serverEnabled: Boolean = false,
|
val serverEnabled: Boolean = false,
|
||||||
val searchServerUrl: String = "",
|
val searchServerUrl: String = "",
|
||||||
val delegateServerUrl: String = "",
|
val delegateServerUrl: String = "",
|
||||||
|
val serverAgentMode: String = "local_only", // "local_only", "full_remote", "delegation"
|
||||||
val searchServerHealthy: Boolean? = null,
|
val searchServerHealthy: Boolean? = null,
|
||||||
val delegateServerHealthy: Boolean? = null,
|
val delegateServerHealthy: Boolean? = null,
|
||||||
val serverModels: List<String> = emptyList(),
|
val serverModels: List<String> = emptyList(),
|
||||||
@@ -197,6 +198,7 @@ class SettingsViewModel(
|
|||||||
val ttsEnabled = userSettings.ttsEnabled.first()
|
val ttsEnabled = userSettings.ttsEnabled.first()
|
||||||
val ttsAutoMode = userSettings.ttsAutoMode.first()
|
val ttsAutoMode = userSettings.ttsAutoMode.first()
|
||||||
val floatingButtonEnabled = userSettings.floatingButtonEnabled.first()
|
val floatingButtonEnabled = userSettings.floatingButtonEnabled.first()
|
||||||
|
val serverAgentMode = userSettings.serverAgentMode.first()
|
||||||
|
|
||||||
val finalModelPath = if (modelPath.isEmpty() && ModelDownloadManager.isModelDownloaded(context)) {
|
val finalModelPath = if (modelPath.isEmpty() && ModelDownloadManager.isModelDownloaded(context)) {
|
||||||
ModelDownloadManager.getModelFile(context).absolutePath
|
ModelDownloadManager.getModelFile(context).absolutePath
|
||||||
@@ -211,6 +213,7 @@ class SettingsViewModel(
|
|||||||
serverEnabled = serverEnabled,
|
serverEnabled = serverEnabled,
|
||||||
searchServerUrl = searchServerUrl,
|
searchServerUrl = searchServerUrl,
|
||||||
delegateServerUrl = delegateServerUrl,
|
delegateServerUrl = delegateServerUrl,
|
||||||
|
serverAgentMode = serverAgentMode,
|
||||||
selectedModel = selectedModel,
|
selectedModel = selectedModel,
|
||||||
isLoading = false,
|
isLoading = false,
|
||||||
modelLoaded = llmEngine.isLoaded(),
|
modelLoaded = llmEngine.isLoaded(),
|
||||||
@@ -551,6 +554,14 @@ class SettingsViewModel(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Server agent mode
|
||||||
|
fun setServerAgentMode(mode: String) {
|
||||||
|
_uiState.value = _uiState.value.copy(serverAgentMode = mode)
|
||||||
|
viewModelScope.launch {
|
||||||
|
userSettings.setServerAgentMode(mode)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Floating button (experimental)
|
// Floating button (experimental)
|
||||||
fun setFloatingButtonEnabled(enabled: Boolean) {
|
fun setFloatingButtonEnabled(enabled: Boolean) {
|
||||||
_uiState.value = _uiState.value.copy(floatingButtonEnabled = enabled)
|
_uiState.value = _uiState.value.copy(floatingButtonEnabled = enabled)
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user