123 lines
3.2 KiB
Bash
Executable File
123 lines
3.2 KiB
Bash
Executable File
#!/bin/bash
|
|
# autoresearch.sh — Autonomous experiment loop
|
|
#
|
|
# Usage: ./autoresearch.sh [model] [time_budget_seconds]
|
|
#
|
|
# This script runs the autoresearch loop:
|
|
# 1. Agent proposes a change to train.py
|
|
# 2. Git commit the change
|
|
# 3. Run training for fixed time budget
|
|
# 4. Extract val_ppl from results.tsv
|
|
# 5. If improved → keep; if worse → git reset
|
|
# 6. Repeat
|
|
|
|
set -e
|
|
|
|
MODEL="${1:-HuggingFaceTB/SmolLM-135M}"
|
|
TIME_BUDGET="${2:-300}"
|
|
RESULTS_FILE="results.tsv"
|
|
|
|
# Ensure git is initialized
|
|
if [ ! -d ".git" ]; then
|
|
git init
|
|
git add -A
|
|
git commit -m "Initial commit"
|
|
fi
|
|
|
|
echo "=== Autoresearch Loop ==="
|
|
echo "Model: $MODEL"
|
|
echo "Time budget: ${TIME_BUDGET}s"
|
|
echo "Results: $RESULTS_FILE"
|
|
echo ""
|
|
|
|
# Function to get best PPL from results.tsv
|
|
get_best_ppl() {
|
|
if [ ! -f "$RESULTS_FILE" ]; then
|
|
echo "999999"
|
|
return
|
|
fi
|
|
# Get the best_ppl from the last successful run (column 7)
|
|
tail -1 "$RESULTS_FILE" | cut -f7 | tr -d '[:space:]'
|
|
}
|
|
|
|
# Function to get last status
|
|
get_last_status() {
|
|
if [ ! -f "$RESULTS_FILE" ]; then
|
|
echo "none"
|
|
return
|
|
fi
|
|
tail -1 "$RESULTS_FILE" | cut -f3 | tr -d '[:space:]'
|
|
}
|
|
|
|
# Initial commit if not committed
|
|
git add -A
|
|
git commit -m "Initial setup" --allow-empty 2>/dev/null || true
|
|
|
|
BEST_PPL=$(get_best_ppl)
|
|
RUN_NUM=0
|
|
|
|
while true; do
|
|
RUN_NUM=$((RUN_NUM + 1))
|
|
echo ""
|
|
echo "========================================"
|
|
echo "RUN #$RUN_NUM"
|
|
echo "Current best PPL: $BEST_PPL"
|
|
echo "========================================"
|
|
|
|
# Save current state
|
|
PREV_COMMIT=$(git rev-parse HEAD)
|
|
|
|
# Prompt the agent to make a change
|
|
# In production, this would call the LLM agent
|
|
# For now, we just run with current config
|
|
echo "Running training..."
|
|
|
|
# Run training
|
|
START_TIME=$(date +%s)
|
|
|
|
python3 train.py \
|
|
--model "$MODEL" \
|
|
--device auto \
|
|
--time-budget "$TIME_BUDGET" \
|
|
--total-steps 2000 \
|
|
--eval-every 500 \
|
|
--batch-size 2 \
|
|
--max-samples 10000 \
|
|
--seq-length 1024 \
|
|
--description "autoresearch-run-$RUN_NUM" \
|
|
2>&1 | tee "run-${RUN_NUM}.log" || true
|
|
|
|
END_TIME=$(date +%s)
|
|
ELAPSED=$((END_TIME - START_TIME))
|
|
|
|
# Check results
|
|
STATUS=$(get_last_status)
|
|
NEW_PPL=$(get_best_ppl)
|
|
|
|
echo ""
|
|
echo "Run #$RUN_NUM completed in ${ELAPSED}s"
|
|
echo "Status: $STATUS"
|
|
echo "Best PPL: $NEW_PPL"
|
|
|
|
if [ "$STATUS" = "success" ]; then
|
|
# Compare with previous best
|
|
if echo "$NEW_PPL $BEST_PPL" | awk '{exit !($1 < $2)}'; then
|
|
echo "IMPROVED! Keeping changes."
|
|
BEST_PPL=$NEW_PPL
|
|
git add results.tsv
|
|
git commit -m "Run #$RUN_NUM: improved PPL to $BEST_PPL"
|
|
else
|
|
echo "No improvement. Reverting."
|
|
git reset --hard $PREV_COMMIT 2>/dev/null || true
|
|
git checkout -- results.tsv 2>/dev/null || true
|
|
fi
|
|
else
|
|
echo "FAILED. Reverting."
|
|
git reset --hard $PREV_COMMIT 2>/dev/null || true
|
|
git checkout -- results.tsv 2>/dev/null || true
|
|
fi
|
|
|
|
echo ""
|
|
echo "Continuing... (Ctrl+C to stop)"
|
|
done
|