-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_best_move_venezuelan.py
204 lines (165 loc) · 8.61 KB
/
get_best_move_venezuelan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# from domino_game_analyzer import GameState, DominoTile, PlayerPosition, PlayerPosition_SOUTH, PlayerPosition_NORTH
import math
from domino_data_types import GameState, DominoTile, move, PlayerPosition, PlayerPosition_SOUTH, PlayerPosition_NORTH
from domino_utils import list_possible_moves
def min_max_alpha_beta(state: GameState, depth: int, alpha: float, beta: float, cache: dict[GameState, tuple[int, int]] = {}, best_path_flag: bool = True) -> tuple[move, float, list[tuple[PlayerPosition, move]]]:
"""
Implement the min-max algorithm with alpha-beta pruning for the domino game, including the optimal path.
:param state: The current GameState
:param depth: The depth to search in the game tree
:param alpha: The best value that the maximizer currently can guarantee at that level or above
:param beta: The best value that the minimizer currently can guarantee at that level or above
:param cache: The cache dictionary to use for memoization
:param best_path_flag: Flag to indicate if best_path is needed or not
:return: A tuple of (best_move, best_score, optimal_path)
"""
if depth == 0 or state.is_game_over():
_, total_score = count_game_stats(state, print_stats=False, cache=cache)
return None, total_score, []
current_player = state.current_player
# is_maximizing = current_player in (PlayerPosition.NORTH, PlayerPosition.SOUTH)
is_maximizing = current_player in (PlayerPosition_NORTH, PlayerPosition_SOUTH)
best_move = None
best_path = []
possible_moves = list_possible_moves(state)
if is_maximizing:
best_score = -math.inf
for move in possible_moves:
tile_and_loc_info, _, _ = move
# tile, is_left = tile_info if tile_info is not None else (None, None)
# if tile is None: # Pass move
if tile_and_loc_info is None: # Pass move
new_state = state.pass_turn()
else:
# assert is_left is not None
tile, is_left = tile_and_loc_info
new_state = state.play_hand(tile, is_left)
_, score, path = min_max_alpha_beta(new_state, depth - 1, alpha, beta, cache)
if score > best_score:
best_score = score
# best_move = (tile, is_left)
# best_path = [(current_player, (tile, is_left))] + path
best_move = tile_and_loc_info
if best_path_flag:
best_path = [(current_player, tile_and_loc_info)] + path
alpha = max(alpha, best_score)
if beta <= alpha:
break # Beta cut-off
else:
best_score = math.inf
for move in possible_moves:
tile_and_loc_info, _, _ = move
# tile, is_left = tile_and_loc_info if tile_and_loc_info is not None else (None, None)
# if tile is None: # Pass move
if tile_and_loc_info is None: # Pass move
new_state = state.pass_turn()
else:
# assert is_left is not None
tile, is_left = tile_and_loc_info
new_state = state.play_hand(tile, is_left)
_, score, path = min_max_alpha_beta(new_state, depth - 1, alpha, beta, cache)
if score < best_score:
best_score = score
# best_move = (tile, is_left)
best_move = tile_and_loc_info
# best_path = [(current_player, (tile, is_left))] + path
if best_path_flag:
best_path = [(current_player, tile_and_loc_info)] + path
beta = min(beta, best_score)
if beta <= alpha:
break # Alpha cut-off
return best_move, best_score, best_path
def get_best_move_alpha_beta(state: GameState, depth: int, cache: dict[GameState, tuple[int, int]] = {}, best_path_flag: bool = True) -> tuple[move, float, list[tuple[PlayerPosition, move]]]:
"""
Get the best move for the current player using the min-max algorithm with alpha-beta pruning, including the optimal path.
:param state: The current GameState
:param depth: The depth to search in the game tree
:param cache: The cache dictionary to use for memoization
:param best_path_flag: Flag to indicate if best_path is needed or not
:return: A tuple of (best_move, best_score, optimal_path)
"""
return min_max_alpha_beta(state, depth, -math.inf, math.inf, cache, best_path_flag)
# cache_hit: int = 0
# cache_miss: int = 0
def count_game_stats(initial_state: GameState, print_stats: bool = True, cache: dict[GameState, tuple[int, int]] = {}) -> tuple[int, float]:
# global cache_hit, cache_miss
# stack: list[tuple[GameState, list[tuple[DominoTile, bool]]]] = [(initial_state, [])] # Stack contains (state, path) pairs
stack: list[tuple[GameState, list[GameState]]] = [(initial_state, [])] # Stack contains (state, path) pairs
winning_stats = {-1: 0, 0: 0, 1: 0}
while stack:
state, path = stack.pop()
if state in cache:
# cache_hit += 1
total_games, total_score = cache[state]
# Update all states in the path with this result
for path_state in reversed(path):
if path_state in cache:
cache[path_state] = (
cache[path_state][0] + total_games,
cache[path_state][1] + total_score
)
else:
cache[path_state] = (total_games, total_score)
continue
# cache_miss += 1
if state.is_game_over():
winner, pair_0_pips, pair_1_pips = determine_winning_pair(state)
winning_stats[winner] += 1
score = 0 if winner == -1 else (pair_1_pips if winner == 0 else -pair_0_pips)
total_games, total_score = 1, score
# Cache the result for this terminal state
cache[state] = (total_games, total_score)
# Update all states in the path with this result
for path_state in reversed(path):
if path_state in cache:
cache[path_state] = (
cache[path_state][0] + total_games,
cache[path_state][1] + total_score
)
else:
cache[path_state] = (total_games, total_score)
else:
current_hand = state.get_current_hand()
moves = []
# Generate possible moves
if state.right_end is None and state.left_end is None:
moves = [(tile, True) for tile in current_hand]
else:
for tile in current_hand:
if tile.can_connect(state.left_end):
moves.append((tile, True))
if tile.can_connect(state.right_end) and state.left_end != state.right_end:
moves.append((tile, False))
# If no moves are possible, pass the turn
if not moves:
new_state = state.pass_turn()
stack.append((new_state, path + [state]))
else:
for tile, left in moves:
new_state = state.play_hand(tile, left)
stack.append((new_state, path + [state]))
# Calculate final statistics
total_games, total_score = cache[initial_state]
exp_score = total_score / total_games if total_games > 0 else 0
if print_stats:
print(f"Number of possible game outcomes: {total_games}")
print('Winning stats:', winning_stats)
print(f'Expected score: {exp_score:.4f}')
# print(f'Cache hits: {cache_hit}')
# print(f'Cache misses: {cache_miss}')
print(f'Total cached states: {len(cache)}')
return total_games, exp_score
def determine_winning_pair(state: GameState) -> tuple[int, int, int]:
pair_0_pips = sum(tile.get_pip_sum() for hand in state.player_hands[::2] for tile in hand)
pair_1_pips = sum(tile.get_pip_sum() for hand in state.player_hands[1::2] for tile in hand)
# Check if a player has run out of tiles
for i, hand in enumerate(state.player_hands):
if len(hand) == 0:
# print(f'player {i} domino')
return i % 2, pair_0_pips, pair_1_pips
# If we're here, the game must be blocked
if pair_1_pips == pair_0_pips:
result = -1
else:
result = 1 if pair_1_pips < pair_0_pips else 0
return result, pair_0_pips, pair_1_pips