Skip to content

Commit bb2ac9f

Browse files
luccabbclaude
andcommitted
[2/9] Fix transposition table with proper bounds
Implements correct transposition table behavior with bound types: **Transposition Table Changes:** - Add `Bound` enum: EXACT, LOWER_BOUND, UPPER_BOUND - Use Zobrist hash as cache key (fast integer vs slow FEN string) - Store bound type and depth with each cache entry - Only use cached scores when depth is sufficient - Properly handle bound types in lookups: - EXACT: use score directly - LOWER_BOUND: use if score >= beta (fail high) - UPPER_BOUND: use if score <= alpha (fail low) **Null Move Pruning Fix:** - Added missing `null_move` parameter check (was always trying null move) **Parallel Engine Updates:** - Update lazy_smp, l1p, l2p to use new zobrist hash cache key - Add context managers for Pool/Manager (proper resource cleanup) - Fix score negation in l1p (opponent perspective -> our perspective) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 74dd444 commit bb2ac9f

4 files changed

Lines changed: 179 additions & 147 deletions

File tree

moonfish/engines/alpha_beta.py

Lines changed: 64 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
from copy import copy
2+
from enum import IntEnum
23
from multiprocessing.managers import DictProxy
3-
from typing import Dict, Optional, Tuple
4+
from typing import Dict, Optional, Tuple, Union
45

6+
import chess.polyglot
57
import chess.syzygy
68
from chess import Board, Move
79

@@ -10,9 +12,20 @@
1012
from moonfish.move_ordering import organize_moves, organize_moves_quiescence
1113
from moonfish.psqt import board_evaluation, count_pieces
1214

13-
CACHE_KEY = Dict[
14-
Tuple[str, int, bool, float, float], Tuple[float | int, Optional[Move]]
15-
]
15+
16+
class Bound(IntEnum):
17+
"""Transposition table bound types."""
18+
19+
EXACT = 0 # Score is exact (PV node, score was within alpha-beta window)
20+
LOWER_BOUND = 1 # Score is at least this value (failed high / beta cutoff)
21+
UPPER_BOUND = 2 # Score is at most this value (failed low)
22+
23+
24+
# Depth value for terminal positions (checkmate/stalemate) - always usable
25+
DEPTH_MAX = 10000
26+
27+
# Cache: zobrist_hash -> (score, best_move, bound_type, depth)
28+
CACHE_TYPE = Dict[int, Tuple[Union[float, int], Optional[Move], Bound, int]]
1629

1730

1831
class AlphaBeta:
@@ -124,7 +137,7 @@ def negamax(
124137
board: Board,
125138
depth: int,
126139
null_move: bool,
127-
cache: DictProxy | CACHE_KEY,
140+
cache: DictProxy | CACHE_TYPE,
128141
alpha: float = float("-inf"),
129142
beta: float = float("inf"),
130143
) -> Tuple[float | int, Optional[Move]]:
@@ -157,17 +170,36 @@ def negamax(
157170
Returns:
158171
- best_score, best_move: returns best move that it found and its value.
159172
"""
160-
cache_key = (board.fen(), depth, null_move, alpha, beta)
161-
# check if board was already evaluated
173+
original_alpha = alpha
174+
cache_key = chess.polyglot.zobrist_hash(board)
175+
176+
# Check transposition table
162177
if cache_key in cache:
163-
return cache[cache_key]
178+
cached_score, cached_move, cached_bound, cached_depth = cache[cache_key]
179+
180+
# Only use score if cached search was at least as deep as we need
181+
# Use cached result if:
182+
# - EXACT: score is exact
183+
# - LOWER_BOUND and score >= beta: true score is at least cached, causes cutoff
184+
# - UPPER_BOUND and score <= alpha: true score is at most cached, no improvement
185+
if cached_depth >= depth and (
186+
cached_bound == Bound.EXACT
187+
or (cached_bound == Bound.LOWER_BOUND and cached_score >= beta)
188+
or (cached_bound == Bound.UPPER_BOUND and cached_score <= alpha)
189+
):
190+
return cached_score, cached_move
164191

165192
if board.is_checkmate():
166-
cache[cache_key] = (-self.config.checkmate_score, None)
193+
cache[cache_key] = (
194+
-self.config.checkmate_score,
195+
None,
196+
Bound.EXACT,
197+
DEPTH_MAX,
198+
)
167199
return (-self.config.checkmate_score, None)
168200

169201
if board.is_stalemate():
170-
cache[cache_key] = (0, None)
202+
cache[cache_key] = (0, None, Bound.EXACT, DEPTH_MAX)
171203
return (0, None)
172204

173205
# recursion base case
@@ -179,12 +211,13 @@ def negamax(
179211
alpha=alpha,
180212
beta=beta,
181213
)
182-
cache[cache_key] = (board_score, None)
214+
cache[cache_key] = (board_score, None, Bound.EXACT, depth)
183215
return board_score, None
184216

185-
# null move prunning
217+
# null move pruning
186218
if (
187219
self.config.null_move
220+
and null_move
188221
and depth >= (self.config.null_move_r + 1)
189222
and not board.is_check()
190223
):
@@ -201,12 +234,11 @@ def negamax(
201234
)[0]
202235
board.pop()
203236
if board_score >= beta:
204-
cache[cache_key] = (beta, None)
237+
# Null move confirmed beta cutoff - this is a lower bound
238+
cache[cache_key] = (beta, None, Bound.LOWER_BOUND, depth)
205239
return beta, None
206240

207241
best_move = None
208-
209-
# initializing best_score
210242
best_score = float("-inf")
211243
moves = organize_moves(board)
212244

@@ -230,36 +262,38 @@ def negamax(
230262
# take move back
231263
board.pop()
232264

233-
# beta-cutoff
234-
if board_score >= beta:
235-
cache[cache_key] = (board_score, move)
236-
return board_score, move
237-
238265
# update best move
239266
if board_score > best_score:
240267
best_score = board_score
241268
best_move = move
242269

243-
# setting alpha variable to do pruning
244-
alpha = max(alpha, board_score)
270+
# beta-cutoff: opponent won't allow this position
271+
if best_score >= beta:
272+
# LOWER_BOUND: true score is at least best_score
273+
cache[cache_key] = (best_score, best_move, Bound.LOWER_BOUND, depth)
274+
return best_score, best_move
245275

246-
# alpha beta pruning when we already found a solution that is at least as
247-
# good as the current one those branches won't be able to influence the
248-
# final decision so we don't need to waste time analyzing them
249-
if alpha >= beta:
250-
break
276+
# update alpha
277+
alpha = max(alpha, best_score)
251278

252279
# if no best move, make a random one
253280
if not best_move:
254281
best_move = self.random_move(board)
255282

256-
# save result before returning
257-
cache[cache_key] = (best_score, best_move)
283+
# Determine bound type based on whether we improved alpha
284+
if best_score <= original_alpha:
285+
# Failed low: we didn't find anything better than what we already had
286+
bound = Bound.UPPER_BOUND
287+
else:
288+
# Score is exact: we found a score within the window
289+
bound = Bound.EXACT
290+
291+
cache[cache_key] = (best_score, best_move, bound, depth)
258292
return best_score, best_move
259293

260294
def search_move(self, board: Board) -> Move:
261295
# create shared cache
262-
cache: CACHE_KEY = {}
296+
cache: CACHE_TYPE = {}
263297

264298
best_move = self.negamax(
265299
board, copy(self.config.negamax_depth), self.config.null_move, cache

moonfish/engines/l1p_alpha_beta.py

Lines changed: 34 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -15,35 +15,38 @@ class Layer1ParallelAlphaBeta(AlphaBeta):
1515
def search_move(self, board: Board) -> Move:
1616
# start multiprocessing
1717
nprocs = cpu_count()
18-
pool = Pool(processes=nprocs)
19-
manager = Manager()
20-
shared_cache = manager.dict()
21-
22-
# creating list of moves at layer 1
23-
moves = list(board.legal_moves)
24-
arguments = []
25-
for move in moves:
26-
board.push(move)
27-
arguments.append(
28-
(
29-
copy(board),
30-
copy(self.config.negamax_depth) - 1,
31-
self.config.null_move,
32-
shared_cache,
18+
19+
with Pool(processes=nprocs) as pool, Manager() as manager:
20+
shared_cache = manager.dict()
21+
22+
# creating list of moves at layer 1
23+
moves = list(board.legal_moves)
24+
arguments = []
25+
for move in moves:
26+
board.push(move)
27+
arguments.append(
28+
(
29+
copy(board),
30+
copy(self.config.negamax_depth) - 1,
31+
self.config.null_move,
32+
shared_cache,
33+
)
3334
)
34-
)
35-
board.pop()
36-
37-
# executing all the moves at layer 1 in parallel
38-
# starmap blocks until all process are done
39-
processes = pool.starmap(self.negamax, arguments)
40-
results = []
41-
42-
# inserting move information in the results
43-
for i in range(len(processes)):
44-
results.append((*processes[i], moves[i]))
45-
46-
# sorting results and getting best move
47-
results.sort(key=lambda a: a[0])
48-
best_move = results[0][2]
49-
return best_move
35+
board.pop()
36+
37+
# executing all the moves at layer 1 in parallel
38+
# starmap blocks until all processes are done
39+
processes = pool.starmap(self.negamax, arguments)
40+
results = []
41+
42+
# inserting move information in the results
43+
# negamax returns (score, best_move) - we negate score since
44+
# it's from opponent's perspective
45+
for i in range(len(processes)):
46+
score = -processes[i][0] # Negate: opponent's -> our perspective
47+
results.append((score, processes[i][1], moves[i]))
48+
49+
# sorting results by score (descending) and getting best move
50+
results.sort(key=lambda a: a[0], reverse=True)
51+
best_move = results[0][2]
52+
return best_move

moonfish/engines/l2p_alpha_beta.py

Lines changed: 59 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -80,65 +80,65 @@ def search_move(self, board: Board) -> Move:
8080
START_LAYER = 2
8181
# start multiprocessing
8282
nprocs = cpu_count()
83-
pool = Pool(processes=nprocs)
84-
manager = Manager()
85-
shared_cache = manager.dict()
8683

87-
# pointer that help us in finding the best next move
88-
board_to_move_that_generates_it = manager.dict()
84+
with Pool(processes=nprocs) as pool, Manager() as manager:
85+
shared_cache = manager.dict()
86+
87+
# pointer that help us in finding the best next move
88+
board_to_move_that_generates_it = manager.dict()
89+
90+
# starting board list
91+
board_list = [(board, board, 0)]
92+
93+
# generating all possible boards for up to 2 moves ahead
94+
for _ in range(START_LAYER):
95+
arguments = [
96+
(board, board_to_move_that_generates_it, layer)
97+
for board, _, layer in board_list
98+
]
99+
processes = pool.starmap(self.generate_board_and_moves, arguments)
100+
board_list = [board for board in sum(processes, [])]
101+
102+
negamax_arguments = [
103+
(
104+
board,
105+
copy(self.config.negamax_depth) - START_LAYER,
106+
self.config.null_move,
107+
shared_cache,
108+
)
109+
for board, _, _ in board_list
110+
]
89111

90-
# starting board list
91-
board_list = [(board, board, 0)]
112+
parallel_layer_result = pool.starmap(self.negamax, negamax_arguments)
92113

93-
# generating all possible boards for up to 2 moves ahead
94-
for _ in range(START_LAYER):
95-
arguments = [
96-
(board, board_to_move_that_generates_it, layer)
97-
for board, _, layer in board_list
98-
]
99-
processes = pool.starmap(self.generate_board_and_moves, arguments)
100-
board_list = [board for board in sum(processes, [])]
101-
102-
negamax_arguments = [
103-
(
104-
board,
105-
copy(self.config.negamax_depth) - START_LAYER,
106-
self.config.null_move,
107-
shared_cache,
108-
)
109-
for board, _, _ in board_list
110-
]
111-
112-
parallel_layer_result = pool.starmap(self.negamax, negamax_arguments)
113-
114-
# grouping output based on the board that generates it
115-
groups = defaultdict(list)
116-
117-
# adding information about the board and layer
118-
# that generates the results and separating them
119-
# into groups based on the root board
120-
for i in range(len(parallel_layer_result)):
121-
groups[board_list[i][1].fen()].append(
122-
(*parallel_layer_result[i], board_list[i][0], board_list[i][2])
123-
)
124-
125-
best_boards = []
126-
127-
for group in groups.values():
128-
# layer and checkmate corrections
129-
# they are needed to adjust for
130-
# boards from different layers
131-
group = list(map(LAYER_SIGNAL_CORRECTION, group))
132-
group = list(map(self.checkmate_correction, group))
133-
# get best move from group
134-
group.sort(key=lambda a: a[0])
135-
best_boards.append(group[0])
136-
137-
# get best board
138-
best_boards.sort(key=lambda a: a[0], reverse=True)
139-
best_board = best_boards[0][2].fen()
140-
141-
# get move that results in best board
142-
best_move = board_to_move_that_generates_it[best_board]
143-
144-
return best_move
114+
# grouping output based on the board that generates it
115+
groups = defaultdict(list)
116+
117+
# adding information about the board and layer
118+
# that generates the results and separating them
119+
# into groups based on the root board
120+
for i in range(len(parallel_layer_result)):
121+
groups[board_list[i][1].fen()].append(
122+
(*parallel_layer_result[i], board_list[i][0], board_list[i][2])
123+
)
124+
125+
best_boards = []
126+
127+
for group in groups.values():
128+
# layer and checkmate corrections
129+
# they are needed to adjust for
130+
# boards from different layers
131+
group = list(map(LAYER_SIGNAL_CORRECTION, group))
132+
group = list(map(self.checkmate_correction, group))
133+
# get best move from group
134+
group.sort(key=lambda a: a[0])
135+
best_boards.append(group[0])
136+
137+
# get best board
138+
best_boards.sort(key=lambda a: a[0], reverse=True)
139+
best_board = best_boards[0][2].fen()
140+
141+
# get move that results in best board
142+
best_move = board_to_move_that_generates_it[best_board]
143+
144+
return best_move

0 commit comments

Comments
 (0)