ternary_tests/autoresearch.sh

#!/bin/bash
# autoresearch.sh — Autonomous experiment loop
#
# Usage: ./autoresearch.sh [model] [time_budget_seconds]
#
# This script runs the autoresearch loop:
#   1. Agent proposes a change to train.py
#   2. Git commit the change
#   3. Run training for fixed time budget
#   4. Extract val_ppl from results.tsv
#   5. If improved → keep; if worse → git reset
#   6. Repeat

set -e

MODEL="${1:-HuggingFaceTB/SmolLM-135M}"
TIME_BUDGET="${2:-300}"
RESULTS_FILE="results.tsv"

# Ensure git is initialized
if [ ! -d ".git" ]; then
    git init
    git add -A
    git commit -m "Initial commit"
fi

echo "=== Autoresearch Loop ==="
echo "Model: $MODEL"
echo "Time budget: ${TIME_BUDGET}s"
echo "Results: $RESULTS_FILE"
echo ""

# Function to get best PPL from results.tsv
get_best_ppl() {
    if [ ! -f "$RESULTS_FILE" ]; then
        echo "999999"
        return
    fi
    # Get the best_ppl from the last successful run (column 7)
    tail -1 "$RESULTS_FILE" | cut -f7 | tr -d '[:space:]'
}

# Function to get last status
get_last_status() {
    if [ ! -f "$RESULTS_FILE" ]; then
        echo "none"
        return
    fi
    tail -1 "$RESULTS_FILE" | cut -f3 | tr -d '[:space:]'
}

# Initial commit if not committed
git add -A
git commit -m "Initial setup" --allow-empty 2>/dev/null || true

BEST_PPL=$(get_best_ppl)
RUN_NUM=0

while true; do
    RUN_NUM=$((RUN_NUM + 1))
    echo ""
    echo "========================================"
    echo "RUN #$RUN_NUM"
    echo "Current best PPL: $BEST_PPL"
    echo "========================================"

    # Save current state
    PREV_COMMIT=$(git rev-parse HEAD)

    # Prompt the agent to make a change
    # In production, this would call the LLM agent
    # For now, we just run with current config
    echo "Running training..."

    # Run training
    START_TIME=$(date +%s)

    python3 train.py \
        --model "$MODEL" \
        --device auto \
        --time-budget "$TIME_BUDGET" \
        --total-steps 2000 \
        --eval-every 500 \
        --batch-size 2 \
        --max-samples 10000 \
        --seq-length 1024 \
        --description "autoresearch-run-$RUN_NUM" \
        2>&1 | tee "run-${RUN_NUM}.log" || true

    END_TIME=$(date +%s)
    ELAPSED=$((END_TIME - START_TIME))

    # Check results
    STATUS=$(get_last_status)
    NEW_PPL=$(get_best_ppl)

    echo ""
    echo "Run #$RUN_NUM completed in ${ELAPSED}s"
    echo "Status: $STATUS"
    echo "Best PPL: $NEW_PPL"

    if [ "$STATUS" = "success" ]; then
        # Compare with previous best
        if echo "$NEW_PPL $BEST_PPL" | awk '{exit !($1 < $2)}'; then
            echo "IMPROVED! Keeping changes."
            BEST_PPL=$NEW_PPL
            git add results.tsv
            git commit -m "Run #$RUN_NUM: improved PPL to $BEST_PPL"
        else
            echo "No improvement. Reverting."
            git reset --hard $PREV_COMMIT 2>/dev/null || true
            git checkout -- results.tsv 2>/dev/null || true
        fi
    else
        echo "FAILED. Reverting."
        git reset --hard $PREV_COMMIT 2>/dev/null || true
        git checkout -- results.tsv 2>/dev/null || true
    fi

    echo ""
    echo "Continuing... (Ctrl+C to stop)"
done