|
| 1 | +name: Stockfish Benchmark |
| 2 | + |
| 3 | +on: |
| 4 | + pull_request: |
| 5 | + branches: [ master ] |
| 6 | + |
| 7 | +permissions: |
| 8 | + contents: read |
| 9 | + pull-requests: write |
| 10 | + |
| 11 | +env: |
| 12 | + GIT_LFS_SKIP_SMUDGE: 1 |
| 13 | + MOONFISH_OPENING_BOOK: ${{ github.workspace }}/opening_book/cerebellum.bin |
| 14 | + |
| 15 | +jobs: |
| 16 | + react-start: |
| 17 | + runs-on: ubuntu-latest |
| 18 | + if: github.event_name == 'pull_request' |
| 19 | + steps: |
| 20 | + - name: Add eyes reaction to PR |
| 21 | + env: |
| 22 | + GH_TOKEN: ${{ github.token }} |
| 23 | + run: | |
| 24 | + gh api repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/reactions \ |
| 25 | + -f content='eyes' --silent || true |
| 26 | +
|
| 27 | + benchmark: |
| 28 | + runs-on: ubuntu-latest |
| 29 | + needs: react-start |
| 30 | + strategy: |
| 31 | + fail-fast: false |
| 32 | + matrix: |
| 33 | + chunk: [0, 1, 2, 3, 4] # 5 parallel jobs, 20 rounds each = 100 total per skill level |
| 34 | + skill_level: [3, 4, 5] # Test against multiple skill levels |
| 35 | + env: |
| 36 | + UV_SYSTEM_PYTHON: 1 |
| 37 | + |
| 38 | + steps: |
| 39 | + - uses: actions/checkout@v4 |
| 40 | + with: |
| 41 | + lfs: false |
| 42 | + fetch-depth: 0 |
| 43 | + |
| 44 | + - name: Ensure opening book |
| 45 | + run: | |
| 46 | + set -euo pipefail |
| 47 | +
|
| 48 | + if [ -f opening_book/cerebellum.bin ]; then |
| 49 | + if head -1 opening_book/cerebellum.bin | grep -q "git-lfs"; then |
| 50 | + echo "LFS pointer detected; downloading opening book..." |
| 51 | + rm -f opening_book/cerebellum.bin |
| 52 | + else |
| 53 | + echo "Opening book already present." |
| 54 | + exit 0 |
| 55 | + fi |
| 56 | + fi |
| 57 | +
|
| 58 | + echo "Downloading full opening book from release..." |
| 59 | + curl -L -o opening_book/cerebellum.bin "https://github.com/luccabb/moonfish/releases/download/v1.0.0/cerebellum.bin" |
| 60 | +
|
| 61 | + - name: Verify opening book |
| 62 | + run: | |
| 63 | + ls -lh opening_book/cerebellum.bin |
| 64 | + python - <<'PY' |
| 65 | + import os, sys |
| 66 | + path = "opening_book/cerebellum.bin" |
| 67 | + size = os.path.getsize(path) |
| 68 | + print(f"opening book size: {size} bytes") |
| 69 | + if size < 10_000_000: |
| 70 | + print("opening book too small; likely an LFS pointer", file=sys.stderr) |
| 71 | + sys.exit(1) |
| 72 | + PY |
| 73 | +
|
| 74 | + - name: Install uv |
| 75 | + uses: astral-sh/setup-uv@v5 |
| 76 | + with: |
| 77 | + enable-cache: true |
| 78 | + cache-dependency-glob: "requirements.txt" |
| 79 | + |
| 80 | + - name: Set up Python |
| 81 | + uses: actions/setup-python@v5 |
| 82 | + with: |
| 83 | + python-version: '3.10' |
| 84 | + |
| 85 | + - name: Install dependencies |
| 86 | + run: make install |
| 87 | + |
| 88 | + - name: Validate opening book with python-chess |
| 89 | + run: | |
| 90 | + python - <<'PY' |
| 91 | + import chess |
| 92 | + import chess.polyglot |
| 93 | + book_path = "opening_book/cerebellum.bin" |
| 94 | + with chess.polyglot.MemoryMappedReader(book_path) as reader: |
| 95 | + entry = reader.find(chess.Board()) |
| 96 | + print(f"book entry: {entry.move.uci()}") |
| 97 | + PY |
| 98 | +
|
| 99 | + - name: Install Stockfish |
| 100 | + run: | |
| 101 | + sudo apt-get update |
| 102 | + sudo apt-get install -y stockfish |
| 103 | +
|
| 104 | + - name: Install cutechess-cli dependencies |
| 105 | + run: | |
| 106 | + sudo apt-get install -y cmake qt5-qmake qtbase5-dev qtbase5-dev-tools libqt5svg5-dev |
| 107 | +
|
| 108 | + - name: Cache cutechess-cli |
| 109 | + id: cache-cutechess |
| 110 | + uses: actions/cache@v4 |
| 111 | + with: |
| 112 | + path: /usr/local/bin/cutechess-cli |
| 113 | + key: cutechess-cli-1.4.0 |
| 114 | + |
| 115 | + - name: Build cutechess-cli |
| 116 | + if: steps.cache-cutechess.outputs.cache-hit != 'true' |
| 117 | + run: | |
| 118 | + git clone --depth 1 --branch v1.4.0 https://github.com/cutechess/cutechess.git /tmp/cutechess |
| 119 | + cd /tmp/cutechess |
| 120 | + mkdir build && cd build |
| 121 | + cmake .. |
| 122 | + make -j$(nproc) |
| 123 | + sudo cp cutechess-cli /usr/local/bin/ |
| 124 | +
|
| 125 | + - name: Build moonfish binary |
| 126 | + run: make build-lichess |
| 127 | + |
| 128 | + - name: Run Stockfish benchmark |
| 129 | + run: | |
| 130 | + CHUNK=${{ matrix.chunk }} |
| 131 | + SKILL=${{ matrix.skill_level }} |
| 132 | + ROUNDS_PER_CHUNK=20 |
| 133 | + SEED=$((CHUNK * 1000 + SKILL * 100 + 42)) # Different seed per chunk/skill for opening variety |
| 134 | +
|
| 135 | + echo "Running moonfish vs Stockfish benchmark (chunk $CHUNK, skill $SKILL)..." |
| 136 | + echo "Stockfish skill level: $SKILL" |
| 137 | + echo "Moonfish: 60s per move, Stockfish: 60+5 time control" |
| 138 | + echo "Rounds: $ROUNDS_PER_CHUNK, Concurrency: $(nproc), Seed: $SEED" |
| 139 | + echo "" |
| 140 | +
|
| 141 | + cutechess-cli \ |
| 142 | + -engine name=moonfish cmd=./dist/moonfish dir=. proto=uci tc=inf st=60 timemargin=10000 \ |
| 143 | + -engine name=stockfish cmd=stockfish proto=uci option.Skill\ Level=$SKILL option.Threads=1 tc=60+5 timemargin=10000 \ |
| 144 | + -rounds $ROUNDS_PER_CHUNK \ |
| 145 | + -repeat \ |
| 146 | + -concurrency 20 \ |
| 147 | + -pgnout benchmark-skill$SKILL-chunk$CHUNK.pgn \ |
| 148 | + -srand $SEED \ |
| 149 | + -recover \ |
| 150 | + 2>&1 | tee benchmark-skill$SKILL-chunk$CHUNK.log |
| 151 | +
|
| 152 | + echo "" |
| 153 | + echo "=== Benchmark Results (Skill $SKILL, Chunk $CHUNK) ===" |
| 154 | + tail -20 benchmark-skill$SKILL-chunk$CHUNK.log |
| 155 | +
|
| 156 | + - name: Parse results |
| 157 | + run: | |
| 158 | + CHUNK=${{ matrix.chunk }} |
| 159 | + SKILL=${{ matrix.skill_level }} |
| 160 | + PGN="benchmark-skill$SKILL-chunk$CHUNK.pgn" |
| 161 | +
|
| 162 | + # Extract score line from log |
| 163 | + SCORE=$(grep "Score of moonfish vs stockfish:" benchmark-skill$SKILL-chunk$CHUNK.log | tail -1) |
| 164 | + WINS=$(echo "$SCORE" | sed -E 's/.*: ([0-9]+) - ([0-9]+) - ([0-9]+).*/\1/') |
| 165 | + LOSSES=$(echo "$SCORE" | sed -E 's/.*: ([0-9]+) - ([0-9]+) - ([0-9]+).*/\2/') |
| 166 | + DRAWS=$(echo "$SCORE" | sed -E 's/.*: ([0-9]+) - ([0-9]+) - ([0-9]+).*/\3/') |
| 167 | +
|
| 168 | + # Parse PGN for detailed stats |
| 169 | + # Moonfish as White: wins/losses/draws |
| 170 | + WHITE_WINS=$(grep -B5 'Result "1-0"' "$PGN" | grep -c 'White "moonfish"' || echo 0) |
| 171 | + WHITE_LOSSES=$(grep -B5 'Result "0-1"' "$PGN" | grep -c 'White "moonfish"' || echo 0) |
| 172 | + WHITE_DRAWS=$(grep -B5 'Result "1/2-1/2"' "$PGN" | grep -c 'White "moonfish"' || echo 0) |
| 173 | +
|
| 174 | + # Moonfish as Black: wins/losses/draws |
| 175 | + BLACK_WINS=$(grep -B5 'Result "0-1"' "$PGN" | grep -c 'Black "moonfish"' || echo 0) |
| 176 | + BLACK_LOSSES=$(grep -B5 'Result "1-0"' "$PGN" | grep -c 'Black "moonfish"' || echo 0) |
| 177 | + BLACK_DRAWS=$(grep -B5 'Result "1/2-1/2"' "$PGN" | grep -c 'Black "moonfish"' || echo 0) |
| 178 | +
|
| 179 | + # Save detailed results |
| 180 | + cat > results-skill$SKILL-chunk$CHUNK.txt << EOF |
| 181 | + SKILL=$SKILL |
| 182 | + WINS=$WINS |
| 183 | + LOSSES=$LOSSES |
| 184 | + DRAWS=$DRAWS |
| 185 | + WHITE_WINS=$WHITE_WINS |
| 186 | + WHITE_LOSSES=$WHITE_LOSSES |
| 187 | + WHITE_DRAWS=$WHITE_DRAWS |
| 188 | + BLACK_WINS=$BLACK_WINS |
| 189 | + BLACK_LOSSES=$BLACK_LOSSES |
| 190 | + BLACK_DRAWS=$BLACK_DRAWS |
| 191 | + EOF |
| 192 | +
|
| 193 | + echo "Skill $SKILL, Chunk $CHUNK: W=$WINS L=$LOSSES D=$DRAWS (White: $WHITE_WINS-$WHITE_LOSSES-$WHITE_DRAWS, Black: $BLACK_WINS-$BLACK_LOSSES-$BLACK_DRAWS)" |
| 194 | +
|
| 195 | + - name: Upload chunk results |
| 196 | + uses: actions/upload-artifact@v4 |
| 197 | + if: always() |
| 198 | + with: |
| 199 | + name: benchmark-skill${{ matrix.skill_level }}-chunk${{ matrix.chunk }} |
| 200 | + path: | |
| 201 | + benchmark-skill${{ matrix.skill_level }}-chunk${{ matrix.chunk }}.pgn |
| 202 | + benchmark-skill${{ matrix.skill_level }}-chunk${{ matrix.chunk }}.log |
| 203 | + results-skill${{ matrix.skill_level }}-chunk${{ matrix.chunk }}.txt |
| 204 | +
|
| 205 | + aggregate: |
| 206 | + runs-on: ubuntu-latest |
| 207 | + needs: benchmark |
| 208 | + if: ${{ !cancelled() && contains(join(needs.benchmark.result, ','), 'success') }} |
| 209 | + steps: |
| 210 | + - uses: actions/checkout@v4 |
| 211 | + |
| 212 | + - name: Download all chunk results |
| 213 | + uses: actions/download-artifact@v4 |
| 214 | + with: |
| 215 | + pattern: benchmark-skill*-chunk* |
| 216 | + merge-multiple: true |
| 217 | + |
| 218 | + - name: Merge all PGN files |
| 219 | + run: | |
| 220 | + for SKILL in 3 4 5; do |
| 221 | + cat benchmark-skill$SKILL-chunk*.pgn > benchmark-skill$SKILL-all.pgn 2>/dev/null || echo "No PGN files for skill $SKILL" |
| 222 | + done |
| 223 | +
|
| 224 | + - name: Aggregate results |
| 225 | + run: | |
| 226 | + echo "Aggregating results from all chunks..." |
| 227 | +
|
| 228 | + # Build comment body |
| 229 | + { |
| 230 | + echo "## 🔬 Stockfish Benchmark Results" |
| 231 | + echo "" |
| 232 | +
|
| 233 | + for SKILL in 3 4 5; do |
| 234 | + # Initialize counters |
| 235 | + TOTAL_WINS=0 TOTAL_LOSSES=0 TOTAL_DRAWS=0 |
| 236 | + TOTAL_WHITE_WINS=0 TOTAL_WHITE_LOSSES=0 TOTAL_WHITE_DRAWS=0 |
| 237 | + TOTAL_BLACK_WINS=0 TOTAL_BLACK_LOSSES=0 TOTAL_BLACK_DRAWS=0 |
| 238 | +
|
| 239 | + for f in results-skill$SKILL-chunk*.txt; do |
| 240 | + if [ -f "$f" ]; then |
| 241 | + eval "$(grep -E '^[A-Z_]+=' "$f" | sed 's/^[[:space:]]*//')" |
| 242 | + TOTAL_WINS=$((TOTAL_WINS + WINS)) |
| 243 | + TOTAL_LOSSES=$((TOTAL_LOSSES + LOSSES)) |
| 244 | + TOTAL_DRAWS=$((TOTAL_DRAWS + DRAWS)) |
| 245 | + TOTAL_WHITE_WINS=$((TOTAL_WHITE_WINS + WHITE_WINS)) |
| 246 | + TOTAL_WHITE_LOSSES=$((TOTAL_WHITE_LOSSES + WHITE_LOSSES)) |
| 247 | + TOTAL_WHITE_DRAWS=$((TOTAL_WHITE_DRAWS + WHITE_DRAWS)) |
| 248 | + TOTAL_BLACK_WINS=$((TOTAL_BLACK_WINS + BLACK_WINS)) |
| 249 | + TOTAL_BLACK_LOSSES=$((TOTAL_BLACK_LOSSES + BLACK_LOSSES)) |
| 250 | + TOTAL_BLACK_DRAWS=$((TOTAL_BLACK_DRAWS + BLACK_DRAWS)) |
| 251 | + fi |
| 252 | + done |
| 253 | +
|
| 254 | + TOTAL=$((TOTAL_WINS + TOTAL_LOSSES + TOTAL_DRAWS)) |
| 255 | + WHITE_TOTAL=$((TOTAL_WHITE_WINS + TOTAL_WHITE_LOSSES + TOTAL_WHITE_DRAWS)) |
| 256 | + BLACK_TOTAL=$((TOTAL_BLACK_WINS + TOTAL_BLACK_LOSSES + TOTAL_BLACK_DRAWS)) |
| 257 | +
|
| 258 | + echo "### vs Stockfish Skill Level $SKILL" |
| 259 | + echo "" |
| 260 | + echo "| Metric | Wins | Losses | Draws | Total | Win % |" |
| 261 | + echo "|--------|------|--------|-------|-------|-------|" |
| 262 | +
|
| 263 | + if [ "$TOTAL" -gt 0 ]; then |
| 264 | + WIN_RATE=$(echo "scale=1; $TOTAL_WINS * 100 / $TOTAL" | bc) |
| 265 | + echo "| **Overall** | $TOTAL_WINS | $TOTAL_LOSSES | $TOTAL_DRAWS | $TOTAL | ${WIN_RATE}% |" |
| 266 | + fi |
| 267 | + if [ "$WHITE_TOTAL" -gt 0 ]; then |
| 268 | + WHITE_WIN_RATE=$(echo "scale=1; $TOTAL_WHITE_WINS * 100 / $WHITE_TOTAL" | bc) |
| 269 | + echo "| As White | $TOTAL_WHITE_WINS | $TOTAL_WHITE_LOSSES | $TOTAL_WHITE_DRAWS | $WHITE_TOTAL | ${WHITE_WIN_RATE}% |" |
| 270 | + fi |
| 271 | + if [ "$BLACK_TOTAL" -gt 0 ]; then |
| 272 | + BLACK_WIN_RATE=$(echo "scale=1; $TOTAL_BLACK_WINS * 100 / $BLACK_TOTAL" | bc) |
| 273 | + echo "| As Black | $TOTAL_BLACK_WINS | $TOTAL_BLACK_LOSSES | $TOTAL_BLACK_DRAWS | $BLACK_TOTAL | ${BLACK_WIN_RATE}% |" |
| 274 | + fi |
| 275 | +
|
| 276 | + # Parse game endings (excluding checkmates, which are covered by win/loss stats) |
| 277 | + PGN="benchmark-skill$SKILL-all.pgn" |
| 278 | + if [ -f "$PGN" ]; then |
| 279 | + ENDINGS=$(grep -oE ', [^}]+\}' "$PGN" | sed 's/, //; s/}//' | grep -v 'mates' | sort | uniq -c | sort -rn) |
| 280 | + if [ -n "$ENDINGS" ]; then |
| 281 | + echo "" |
| 282 | + echo "**Non-checkmate endings:**" |
| 283 | + echo "$ENDINGS" | while read count ending; do |
| 284 | + echo "- $ending: $count" |
| 285 | + done |
| 286 | + fi |
| 287 | + fi |
| 288 | + echo "" |
| 289 | + done |
| 290 | +
|
| 291 | + echo "<details><summary>Configuration</summary>" |
| 292 | + echo "" |
| 293 | + echo "- 5 chunks × 20 rounds × 3 skill levels = 300 total games" |
| 294 | + echo "- Each opening played with colors reversed (-repeat) for fairness" |
| 295 | + echo "- Moonfish: 60s per move" |
| 296 | + echo "- Stockfish: 60+5 time control" |
| 297 | + echo "" |
| 298 | + echo "</details>" |
| 299 | + } > pr-comment.md |
| 300 | +
|
| 301 | + # Also write to step summary |
| 302 | + cat pr-comment.md >> $GITHUB_STEP_SUMMARY |
| 303 | +
|
| 304 | + - name: Comment on PR |
| 305 | + if: github.event_name == 'pull_request' |
| 306 | + env: |
| 307 | + GH_TOKEN: ${{ github.token }} |
| 308 | + run: | |
| 309 | + gh pr comment ${{ github.event.pull_request.number }} --body-file pr-comment.md |
| 310 | +
|
| 311 | + - name: Update PR reaction (eyes -> thumbs up) |
| 312 | + if: github.event_name == 'pull_request' |
| 313 | + env: |
| 314 | + GH_TOKEN: ${{ github.token }} |
| 315 | + run: | |
| 316 | + # Remove eyes reaction |
| 317 | + REACTIONS=$(gh api repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/reactions --jq '.[] | select(.content == "eyes") | .id' || true) |
| 318 | + for ID in $REACTIONS; do |
| 319 | + gh api -X DELETE repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/reactions/$ID --silent || true |
| 320 | + done |
| 321 | + # Add thumbs up |
| 322 | + gh api repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/reactions \ |
| 323 | + -f content='+1' --silent || true |
| 324 | +
|
| 325 | + - name: Upload aggregated results |
| 326 | + uses: actions/upload-artifact@v4 |
| 327 | + with: |
| 328 | + name: benchmark-aggregated |
| 329 | + path: | |
| 330 | + benchmark-skill*-all.pgn |
| 331 | + results-*.txt |
0 commit comments