Skip to content

Commit 1432d93

Browse files
committed
added get_best_move_venezuelan
1 parent a8d4b3d commit 1432d93

File tree

1 file changed

+204
-0
lines changed

1 file changed

+204
-0
lines changed

get_best_move_venezuelan.py

+204
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
# from domino_game_analyzer import GameState, DominoTile, PlayerPosition, PlayerPosition_SOUTH, PlayerPosition_NORTH
2+
import math
3+
from domino_data_types import GameState, DominoTile, move, PlayerPosition, PlayerPosition_SOUTH, PlayerPosition_NORTH
4+
from domino_utils import list_possible_moves
5+
6+
def min_max_alpha_beta(state: GameState, depth: int, alpha: float, beta: float, cache: dict[GameState, tuple[int, int]] = {}, best_path_flag: bool = True) -> tuple[move, float, list[tuple[PlayerPosition, move]]]:
7+
"""
8+
Implement the min-max algorithm with alpha-beta pruning for the domino game, including the optimal path.
9+
10+
:param state: The current GameState
11+
:param depth: The depth to search in the game tree
12+
:param alpha: The best value that the maximizer currently can guarantee at that level or above
13+
:param beta: The best value that the minimizer currently can guarantee at that level or above
14+
:param cache: The cache dictionary to use for memoization
15+
:param best_path_flag: Flag to indicate if best_path is needed or not
16+
:return: A tuple of (best_move, best_score, optimal_path)
17+
"""
18+
if depth == 0 or state.is_game_over():
19+
_, total_score = count_game_stats(state, print_stats=False, cache=cache)
20+
return None, total_score, []
21+
22+
current_player = state.current_player
23+
# is_maximizing = current_player in (PlayerPosition.NORTH, PlayerPosition.SOUTH)
24+
is_maximizing = current_player in (PlayerPosition_NORTH, PlayerPosition_SOUTH)
25+
26+
best_move = None
27+
best_path = []
28+
29+
possible_moves = list_possible_moves(state)
30+
31+
if is_maximizing:
32+
best_score = -math.inf
33+
for move in possible_moves:
34+
tile_and_loc_info, _, _ = move
35+
# tile, is_left = tile_info if tile_info is not None else (None, None)
36+
37+
# if tile is None: # Pass move
38+
if tile_and_loc_info is None: # Pass move
39+
new_state = state.pass_turn()
40+
else:
41+
# assert is_left is not None
42+
tile, is_left = tile_and_loc_info
43+
new_state = state.play_hand(tile, is_left)
44+
45+
_, score, path = min_max_alpha_beta(new_state, depth - 1, alpha, beta, cache)
46+
47+
if score > best_score:
48+
best_score = score
49+
# best_move = (tile, is_left)
50+
# best_path = [(current_player, (tile, is_left))] + path
51+
best_move = tile_and_loc_info
52+
if best_path_flag:
53+
best_path = [(current_player, tile_and_loc_info)] + path
54+
55+
alpha = max(alpha, best_score)
56+
if beta <= alpha:
57+
break # Beta cut-off
58+
else:
59+
best_score = math.inf
60+
for move in possible_moves:
61+
tile_and_loc_info, _, _ = move
62+
# tile, is_left = tile_and_loc_info if tile_and_loc_info is not None else (None, None)
63+
64+
# if tile is None: # Pass move
65+
if tile_and_loc_info is None: # Pass move
66+
new_state = state.pass_turn()
67+
else:
68+
# assert is_left is not None
69+
tile, is_left = tile_and_loc_info
70+
new_state = state.play_hand(tile, is_left)
71+
72+
_, score, path = min_max_alpha_beta(new_state, depth - 1, alpha, beta, cache)
73+
74+
if score < best_score:
75+
best_score = score
76+
# best_move = (tile, is_left)
77+
best_move = tile_and_loc_info
78+
# best_path = [(current_player, (tile, is_left))] + path
79+
if best_path_flag:
80+
best_path = [(current_player, tile_and_loc_info)] + path
81+
82+
beta = min(beta, best_score)
83+
if beta <= alpha:
84+
break # Alpha cut-off
85+
86+
return best_move, best_score, best_path
87+
88+
def get_best_move_alpha_beta(state: GameState, depth: int, cache: dict[GameState, tuple[int, int]] = {}, best_path_flag: bool = True) -> tuple[move, float, list[tuple[PlayerPosition, move]]]:
89+
"""
90+
Get the best move for the current player using the min-max algorithm with alpha-beta pruning, including the optimal path.
91+
92+
:param state: The current GameState
93+
:param depth: The depth to search in the game tree
94+
:param cache: The cache dictionary to use for memoization
95+
:param best_path_flag: Flag to indicate if best_path is needed or not
96+
:return: A tuple of (best_move, best_score, optimal_path)
97+
"""
98+
return min_max_alpha_beta(state, depth, -math.inf, math.inf, cache, best_path_flag)
99+
100+
# cache_hit: int = 0
101+
# cache_miss: int = 0
102+
103+
def count_game_stats(initial_state: GameState, print_stats: bool = True, cache: dict[GameState, tuple[int, int]] = {}) -> tuple[int, float]:
104+
# global cache_hit, cache_miss
105+
106+
# stack: list[tuple[GameState, list[tuple[DominoTile, bool]]]] = [(initial_state, [])] # Stack contains (state, path) pairs
107+
stack: list[tuple[GameState, list[GameState]]] = [(initial_state, [])] # Stack contains (state, path) pairs
108+
winning_stats = {-1: 0, 0: 0, 1: 0}
109+
110+
while stack:
111+
state, path = stack.pop()
112+
113+
if state in cache:
114+
# cache_hit += 1
115+
total_games, total_score = cache[state]
116+
117+
# Update all states in the path with this result
118+
for path_state in reversed(path):
119+
if path_state in cache:
120+
cache[path_state] = (
121+
cache[path_state][0] + total_games,
122+
cache[path_state][1] + total_score
123+
)
124+
else:
125+
cache[path_state] = (total_games, total_score)
126+
127+
continue
128+
129+
# cache_miss += 1
130+
131+
if state.is_game_over():
132+
winner, pair_0_pips, pair_1_pips = determine_winning_pair(state)
133+
winning_stats[winner] += 1
134+
score = 0 if winner == -1 else (pair_1_pips if winner == 0 else -pair_0_pips)
135+
total_games, total_score = 1, score
136+
137+
# Cache the result for this terminal state
138+
cache[state] = (total_games, total_score)
139+
140+
# Update all states in the path with this result
141+
for path_state in reversed(path):
142+
if path_state in cache:
143+
cache[path_state] = (
144+
cache[path_state][0] + total_games,
145+
cache[path_state][1] + total_score
146+
)
147+
else:
148+
cache[path_state] = (total_games, total_score)
149+
else:
150+
current_hand = state.get_current_hand()
151+
moves = []
152+
153+
# Generate possible moves
154+
if state.right_end is None and state.left_end is None:
155+
moves = [(tile, True) for tile in current_hand]
156+
else:
157+
for tile in current_hand:
158+
if tile.can_connect(state.left_end):
159+
moves.append((tile, True))
160+
if tile.can_connect(state.right_end) and state.left_end != state.right_end:
161+
moves.append((tile, False))
162+
163+
# If no moves are possible, pass the turn
164+
if not moves:
165+
new_state = state.pass_turn()
166+
stack.append((new_state, path + [state]))
167+
else:
168+
for tile, left in moves:
169+
new_state = state.play_hand(tile, left)
170+
stack.append((new_state, path + [state]))
171+
172+
# Calculate final statistics
173+
total_games, total_score = cache[initial_state]
174+
exp_score = total_score / total_games if total_games > 0 else 0
175+
176+
if print_stats:
177+
print(f"Number of possible game outcomes: {total_games}")
178+
print('Winning stats:', winning_stats)
179+
print(f'Expected score: {exp_score:.4f}')
180+
# print(f'Cache hits: {cache_hit}')
181+
# print(f'Cache misses: {cache_miss}')
182+
print(f'Total cached states: {len(cache)}')
183+
184+
return total_games, exp_score
185+
186+
def determine_winning_pair(state: GameState) -> tuple[int, int, int]:
187+
188+
pair_0_pips = sum(tile.get_pip_sum() for hand in state.player_hands[::2] for tile in hand)
189+
pair_1_pips = sum(tile.get_pip_sum() for hand in state.player_hands[1::2] for tile in hand)
190+
191+
# Check if a player has run out of tiles
192+
for i, hand in enumerate(state.player_hands):
193+
if len(hand) == 0:
194+
# print(f'player {i} domino')
195+
return i % 2, pair_0_pips, pair_1_pips
196+
197+
# If we're here, the game must be blocked
198+
if pair_1_pips == pair_0_pips:
199+
result = -1
200+
else:
201+
result = 1 if pair_1_pips < pair_0_pips else 0
202+
return result, pair_0_pips, pair_1_pips
203+
204+

0 commit comments

Comments
 (0)