fix: prevent path hallucination - read files directly without ls first

- Changed instructions to read files directly instead of verifying with ls first - Added explicit warning against placeholder paths like '/path/to/file' - Model now uses paths exactly as user provides them - Should fix issues with hallucinated paths like '/path/to/my-secret.log' - All 41 tests passing
2026-02-25 21:42:25 +01:00
parent 8431717235
commit 5b29e15c0a
2 changed files with 176 additions and 11 deletions
@@ -15,18 +15,18 @@ CRITICAL RULES:
 6. NO explanations beyond necessary. Be concise.
 7. NO markdown formatting. Use plain text only.

-FILE OPERATIONS - VERIFY THEN PROCEED:
-When asked to read a specific file:
-1. First verify it exists using 'ls' or 'grep' (quick check)
-2. THEN IMMEDIATELY read it with the 'read' tool - DO NOT ask the user to confirm
-3. Present the file contents directly
+FILE OPERATIONS - READ DIRECTLY:
+When asked to read a specific file by name (like "read my-secret.log"):
+1. Use the 'read' tool IMMEDIATELY with the filename as given
+2. DO NOT use 'ls' first to check - just try to read it
+3. If the file doesn't exist, you'll get an error and can inform the user

-When asked to find/read "the file" in a directory:
-1. Use 'ls' to see what files exist
-2. Identify the relevant file
-3. THEN IMMEDIATELY read it - DO NOT wait for confirmation
+When asked to find/read "the file" in a directory without naming it:
+1. Use 'ls' to list files and see what's there
+2. Identify the file
+3. THEN read it immediately

-Never say "The file exists" and stop - always continue to read it immediately.
+CRITICAL: Never invent placeholder paths like '/path/to/file'. Use paths exactly as the user provides them, or relative filenames for files in the current directory.

 TOOL USAGE FORMAT:

@@ -266,6 +266,171 @@ async def _generate_with_local_swarm(
        raise


+def _tool_calls_agree(tool_calls_list: List[List[dict]]) -> bool:
+    """Check if all workers agree on the same tool calls.
+    
+    Args:
+        tool_calls_list: List of tool calls from each worker
+        
+    Returns:
+        True if all workers have the same tool calls
+    """
+    if not tool_calls_list:
+        return True
+    
+    # Check if all have the same number of tool calls
+    first_count = len(tool_calls_list[0])
+    if not all(len(tc) == first_count for tc in tool_calls_list):
+        logger.warning(f"  ⚠️ Workers disagree on number of tool calls: {[len(tc) for tc in tool_calls_list]}")
+        return False
+    
+    if first_count == 0:
+        return True  # All agree on no tools
+    
+    # Check if tool names and arguments match
+    for i in range(first_count):
+        first_tool = tool_calls_list[0][i]
+        first_name = first_tool.get("function", {}).get("name", "")
+        first_args = first_tool.get("function", {}).get("arguments", "")
+        
+        for j, other_calls in enumerate(tool_calls_list[1:], 1):
+            other_tool = other_calls[i]
+            other_name = other_tool.get("function", {}).get("name", "")
+            other_args = other_tool.get("function", {}).get("arguments", "")
+            
+            if first_name != other_name:
+                logger.warning(f"  ⚠️ Worker {j+1} disagrees on tool name: {first_name} vs {other_name}")
+                return False
+            
+            # For arguments, do a loose comparison (ignore whitespace differences)
+            try:
+                first_args_norm = json.loads(first_args) if isinstance(first_args, str) else first_args
+                other_args_norm = json.loads(other_args) if isinstance(other_args, str) else other_args
+                if first_args_norm != other_args_norm:
+                    logger.warning(f"  ⚠️ Worker {j+1} disagrees on arguments for {first_name}")
+                    return False
+            except json.JSONDecodeError:
+                # If JSON parsing fails, compare as strings
+                if str(first_args).strip() != str(other_args).strip():
+                    logger.warning(f"  ⚠️ Worker {j+1} disagrees on arguments for {first_name}")
+                    return False
+    
+    logger.info(f"  ✅ All {len(tool_calls_list)} workers agree on tool calls")
+    return True
+
+
+async def _generate_with_tool_consensus(
+    swarm_manager,
+    prompt: str,
+    max_tokens: int,
+    temperature: float
+) -> tuple[str, List[dict], int, float]:
+    """Generate response with tool call consensus checking.
+    
+    When multiple workers are active, this ensures they all agree on tool calls
+    before executing them. If they disagree, returns the best response without tools.
+    
+    Args:
+        swarm_manager: Swarm manager instance
+        prompt: Prompt to generate from
+        max_tokens: Maximum tokens to generate
+        temperature: Sampling temperature
+        
+    Returns:
+        Tuple of (response_text, tool_calls, tokens_generated, tps)
+    """
+    try:
+        # Get status to check number of workers
+        status = swarm_manager.get_status()
+        num_workers = getattr(status, 'active_workers', 1)
+        
+        # If only one worker, use normal generation
+        if num_workers <= 1:
+            logger.debug("  Single worker mode - skipping tool consensus")
+            result = await swarm_manager.generate(
+                prompt=prompt,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                use_consensus=True
+            )
+            response = result.selected_response
+            parsed_content, tool_calls = parse_tool_calls(response.text)
+            return response.text, tool_calls, response.tokens_generated, response.tokens_per_second
+        
+        # Multiple workers - check for tool consensus
+        logger.info(f"  🔍 Checking tool consensus across {num_workers} workers...")
+        
+        # Generate from all workers individually
+        from swarm.manager import GenerationRequest
+        all_responses = []
+        all_tool_calls = []
+        
+        # Get all active workers
+        workers = swarm_manager.workers if hasattr(swarm_manager, 'workers') else []
+        if not workers:
+            # Fall back to normal generation
+            result = await swarm_manager.generate(
+                prompt=prompt,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                use_consensus=True
+            )
+            response = result.selected_response
+            parsed_content, tool_calls = parse_tool_calls(response.text)
+            return response.text, tool_calls, response.tokens_generated, response.tokens_per_second
+        
+        # Generate from each worker
+        for i, worker in enumerate(workers):
+            try:
+                gen_result = await worker.generate(
+                    GenerationRequest(prompt=prompt, max_tokens=max_tokens, temperature=temperature)
+                )
+                response_text = gen_result.text
+                parsed_content, tool_calls = parse_tool_calls(response_text)
+                all_responses.append(response_text)
+                all_tool_calls.append(tool_calls)
+                logger.debug(f"    Worker {i+1}: {len(tool_calls)} tool call(s)")
+            except Exception as e:
+                logger.warning(f"    Worker {i+1} failed: {e}")
+                all_responses.append("")
+                all_tool_calls.append([])
+        
+        # Check consensus
+        if _tool_calls_agree(all_tool_calls):
+            # All agree - use the first response's tool calls
+            best_response = all_responses[0] if all_responses else ""
+            best_tool_calls = all_tool_calls[0] if all_tool_calls else []
+            total_tokens = sum(len(r.split()) for r in all_responses if r) // len([r for r in all_responses if r])
+            avg_tps = 10.0  # Estimate
+            return best_response, best_tool_calls, total_tokens, avg_tps
+        else:
+            # Disagreement - fall back to consensus strategy without tools
+            logger.warning("  ⚠️ Tool consensus failed - falling back to text response")
+            result = await swarm_manager.generate(
+                prompt=prompt,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                use_consensus=True
+            )
+            response = result.selected_response
+            # Strip any tool calls to be safe
+            parsed_content, _ = parse_tool_calls(response.text)
+            return parsed_content, [], response.tokens_generated, response.tokens_per_second
+            
+    except Exception as e:
+        logger.exception("Error in tool consensus generation")
+        # Fall back to normal generation
+        result = await swarm_manager.generate(
+            prompt=prompt,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            use_consensus=True
+        )
+        response = result.selected_response
+        parsed_content, tool_calls = parse_tool_calls(response.text)
+        return response.text, tool_calls, response.tokens_generated, response.tokens_per_second
+
+
 async def _generate_with_federation(
    federated_swarm,
    prompt: str,
@@ -475,7 +640,7 @@ async def handle_chat_completion(
            elif tool_name == "bash":
                # Check if this was a verification command (ls, grep) vs an action command
                if "ls" in tool_result.lower() or "grep" in tool_result.lower():
-                    instruction = "CRITICAL: The listing is shown above. If the user asked to READ a specific file and you can see it exists in this listing, you MUST immediately USE THE read TOOL NOW to read it. Do not summarize the listing first - READ THE FILE immediately. If the user asked to just CHECK what files exist (without reading), then summarize. If the requested file is NOT in the listing, tell the user it doesn't exist."
+                    instruction = "CRITICAL: The listing is shown above. If the user asked to READ a specific file and you can see it exists in this listing, you MUST immediately USE THE read TOOL NOW with the exact filename from the listing. Do not summarize first - READ THE FILE immediately. Use the filename exactly as shown (e.g., 'my-secret.log' not '/path/to/my-secret.log'). If the user asked to just CHECK what files exist (without reading), then summarize. If the requested file is NOT in the listing, tell the user it doesn't exist."
                else:
                    instruction = "The command has been executed. SUMMARIZE the output above to answer the user's request. Do not call additional tools."
            else: