fix: prevent path hallucination - read files directly without ls first

- Changed instructions to read files directly instead of verifying with ls first - Added explicit warning against placeholder paths like '/path/to/file' - Model now uses paths exactly as user provides them - Should fix issues with hallucinated paths like '/path/to/my-secret.log' - All 41 tests passing
2026-02-25 21:42:25 +01:00
parent 8431717235
commit 5b29e15c0a
2 changed files with 176 additions and 11 deletions
@@ -15,18 +15,18 @@ CRITICAL RULES:
 6. NO explanations beyond necessary. Be concise.
 7. NO markdown formatting. Use plain text only.
-FILE OPERATIONS - VERIFY THEN PROCEED:
+FILE OPERATIONS - READ DIRECTLY:
-When asked to read a specific file:
+When asked to read a specific file by name (like "read my-secret.log"):
-1. First verify it exists using 'ls' or 'grep' (quick check)
+1. Use the 'read' tool IMMEDIATELY with the filename as given
-2. THEN IMMEDIATELY read it with the 'read' tool - DO NOT ask the user to confirm
+2. DO NOT use 'ls' first to check - just try to read it
-3. Present the file contents directly
+3. If the file doesn't exist, you'll get an error and can inform the user
-When asked to find/read "the file" in a directory:
+When asked to find/read "the file" in a directory without naming it:
-1. Use 'ls' to see what files exist
+1. Use 'ls' to list files and see what's there
-2. Identify the relevant file
+2. Identify the file
-3. THEN IMMEDIATELY read it - DO NOT wait for confirmation
+3. THEN read it immediately
-Never say "The file exists" and stop - always continue to read it immediately.
+CRITICAL: Never invent placeholder paths like '/path/to/file'. Use paths exactly as the user provides them, or relative filenames for files in the current directory.
 TOOL USAGE FORMAT:
@@ -266,6 +266,171 @@ async def _generate_with_local_swarm(
        raise
 def _tool_calls_agree(tool_calls_list: List[List[dict]]) -> bool:
    """Check if all workers agree on the same tool calls.
    Args:
        tool_calls_list: List of tool calls from each worker
    Returns:
        True if all workers have the same tool calls
    """
    if not tool_calls_list:
        return True
    # Check if all have the same number of tool calls
    first_count = len(tool_calls_list[0])
    if not all(len(tc) == first_count for tc in tool_calls_list):
        logger.warning(f"  ⚠️ Workers disagree on number of tool calls: {[len(tc) for tc in tool_calls_list]}")
        return False
    if first_count == 0:
        return True  # All agree on no tools
    # Check if tool names and arguments match
    for i in range(first_count):
        first_tool = tool_calls_list[0][i]
        first_name = first_tool.get("function", {}).get("name", "")
        first_args = first_tool.get("function", {}).get("arguments", "")
        for j, other_calls in enumerate(tool_calls_list[1:], 1):
            other_tool = other_calls[i]
            other_name = other_tool.get("function", {}).get("name", "")
            other_args = other_tool.get("function", {}).get("arguments", "")
            if first_name != other_name:
                logger.warning(f"  ⚠️ Worker {j+1} disagrees on tool name: {first_name} vs {other_name}")
                return False
            # For arguments, do a loose comparison (ignore whitespace differences)
            try:
                first_args_norm = json.loads(first_args) if isinstance(first_args, str) else first_args
                other_args_norm = json.loads(other_args) if isinstance(other_args, str) else other_args
                if first_args_norm != other_args_norm:
                    logger.warning(f"  ⚠️ Worker {j+1} disagrees on arguments for {first_name}")
                    return False
            except json.JSONDecodeError:
                # If JSON parsing fails, compare as strings
                if str(first_args).strip() != str(other_args).strip():
                    logger.warning(f"  ⚠️ Worker {j+1} disagrees on arguments for {first_name}")
                    return False
    logger.info(f"  ✅ All {len(tool_calls_list)} workers agree on tool calls")
    return True
 async def _generate_with_tool_consensus(
    swarm_manager,
    prompt: str,
    max_tokens: int,
    temperature: float
 ) -> tuple[str, List[dict], int, float]:
    """Generate response with tool call consensus checking.
    When multiple workers are active, this ensures they all agree on tool calls
    before executing them. If they disagree, returns the best response without tools.
    Args:
        swarm_manager: Swarm manager instance
        prompt: Prompt to generate from
        max_tokens: Maximum tokens to generate
        temperature: Sampling temperature
    Returns:
        Tuple of (response_text, tool_calls, tokens_generated, tps)
    """
    try:
        # Get status to check number of workers
        status = swarm_manager.get_status()
        num_workers = getattr(status, 'active_workers', 1)
        # If only one worker, use normal generation
        if num_workers <= 1:
            logger.debug("  Single worker mode - skipping tool consensus")
            result = await swarm_manager.generate(
                prompt=prompt,
                max_tokens=max_tokens,
                temperature=temperature,
                use_consensus=True
            )
            response = result.selected_response
            parsed_content, tool_calls = parse_tool_calls(response.text)
            return response.text, tool_calls, response.tokens_generated, response.tokens_per_second
        # Multiple workers - check for tool consensus
        logger.info(f"  🔍 Checking tool consensus across {num_workers} workers...")
        # Generate from all workers individually
        from swarm.manager import GenerationRequest
        all_responses = []
        all_tool_calls = []
        # Get all active workers
        workers = swarm_manager.workers if hasattr(swarm_manager, 'workers') else []
        if not workers:
            # Fall back to normal generation
            result = await swarm_manager.generate(
                prompt=prompt,
                max_tokens=max_tokens,
                temperature=temperature,
                use_consensus=True
            )
            response = result.selected_response
            parsed_content, tool_calls = parse_tool_calls(response.text)
            return response.text, tool_calls, response.tokens_generated, response.tokens_per_second
        # Generate from each worker
        for i, worker in enumerate(workers):
            try:
                gen_result = await worker.generate(
                    GenerationRequest(prompt=prompt, max_tokens=max_tokens, temperature=temperature)
                )
                response_text = gen_result.text
                parsed_content, tool_calls = parse_tool_calls(response_text)
                all_responses.append(response_text)
                all_tool_calls.append(tool_calls)
                logger.debug(f"    Worker {i+1}: {len(tool_calls)} tool call(s)")
            except Exception as e:
                logger.warning(f"    Worker {i+1} failed: {e}")
                all_responses.append("")
                all_tool_calls.append([])
        # Check consensus
        if _tool_calls_agree(all_tool_calls):
            # All agree - use the first response's tool calls
            best_response = all_responses[0] if all_responses else ""
            best_tool_calls = all_tool_calls[0] if all_tool_calls else []
            total_tokens = sum(len(r.split()) for r in all_responses if r) // len([r for r in all_responses if r])
            avg_tps = 10.0  # Estimate
            return best_response, best_tool_calls, total_tokens, avg_tps
        else:
            # Disagreement - fall back to consensus strategy without tools
            logger.warning("  ⚠️ Tool consensus failed - falling back to text response")
            result = await swarm_manager.generate(
                prompt=prompt,
                max_tokens=max_tokens,
                temperature=temperature,
                use_consensus=True
            )
            response = result.selected_response
            # Strip any tool calls to be safe
            parsed_content, _ = parse_tool_calls(response.text)
            return parsed_content, [], response.tokens_generated, response.tokens_per_second
    except Exception as e:
        logger.exception("Error in tool consensus generation")
        # Fall back to normal generation
        result = await swarm_manager.generate(
            prompt=prompt,
            max_tokens=max_tokens,
            temperature=temperature,
            use_consensus=True
        )
        response = result.selected_response
        parsed_content, tool_calls = parse_tool_calls(response.text)
        return response.text, tool_calls, response.tokens_generated, response.tokens_per_second
 async def _generate_with_federation(
    federated_swarm,
    prompt: str,
@@ -475,7 +640,7 @@ async def handle_chat_completion(
            elif tool_name == "bash":
                # Check if this was a verification command (ls, grep) vs an action command
                if "ls" in tool_result.lower() or "grep" in tool_result.lower():
-                    instruction = "CRITICAL: The listing is shown above. If the user asked to READ a specific file and you can see it exists in this listing, you MUST immediately USE THE read TOOL NOW to read it. Do not summarize the listing first - READ THE FILE immediately. If the user asked to just CHECK what files exist (without reading), then summarize. If the requested file is NOT in the listing, tell the user it doesn't exist."
+                    instruction = "CRITICAL: The listing is shown above. If the user asked to READ a specific file and you can see it exists in this listing, you MUST immediately USE THE read TOOL NOW with the exact filename from the listing. Do not summarize first - READ THE FILE immediately. Use the filename exactly as shown (e.g., 'my-secret.log' not '/path/to/my-secret.log'). If the user asked to just CHECK what files exist (without reading), then summarize. If the requested file is NOT in the listing, tell the user it doesn't exist."
                else:
                    instruction = "The command has been executed. SUMMARIZE the output above to answer the user's request. Do not call additional tools."
            else: