1
1
from DominoPlayer import HumanPlayer , available_moves , stats
2
2
from collections import defaultdict
3
3
from DominoGameState import DominoGameState
4
- from domino_data_types import DominoTile , PlayerPosition , GameState , PlayerPosition_SOUTH , PlayerPosition_names , move
4
+ from domino_data_types import DominoTile , PlayerPosition , GameState , PlayerPosition_SOUTH , PlayerPosition_names , PlayerTiles , move
5
5
from get_best_move2 import get_best_move_alpha_beta
6
6
from domino_utils import history_to_domino_tiles_history , list_possible_moves , list_possible_moves_from_hand
7
7
from domino_game_tracker import domino_game_state_our_perspective , generate_sample_from_game_state
@@ -55,9 +55,9 @@ def next_move(self, game_state: DominoGameState, player_hand: list[tuple[int,int
55
55
self .print_verbose_info (_player_hand , _unplayed_tiles , _knowledge_tracker , _player_tiles_count , _starting_player )
56
56
57
57
# num_samples = 1000 if len(game_state.history) > 8 else 100 if len(game_state.history) > 4 else 25 if len(game_state.history) > 0 else 1000
58
- num_samples = 24
58
+ # num_samples = 24
59
59
60
- best_move = self .get_best_move (set (_player_hand ), _remaining_tiles , _knowledge_tracker , _player_tiles_count , _board_ends , num_samples , verbose = verbose )
60
+ best_move = self .get_best_move (set (_player_hand ), _remaining_tiles , _knowledge_tracker , _player_tiles_count , _board_ends , verbose = verbose )
61
61
62
62
if best_move is None :
63
63
return None
@@ -67,8 +67,120 @@ def next_move(self, game_state: DominoGameState, player_hand: list[tuple[int,int
67
67
return (tile .top , tile .bottom ), side
68
68
69
69
def update_unlikely_tiles (self , game_state : DominoGameState , player_from_south_pov : int , actual_move : tuple [tuple [int ,int ],str ], tiles_not_in_players_hand : list [tuple [int ,int ]]) -> None :
70
+ unplayed_tiles = self .get_unplayed_tiles (game_state , [])
71
+ _unplayed_tiles = DominoTile .loi_to_domino_tiles (unplayed_tiles )
72
+ _tiles_not_in_players_hand = DominoTile .loi_to_domino_tiles (tiles_not_in_players_hand )
73
+ # Generate all possible tiles that could have been played (except for the first move of the game)
74
+ # The tile can't be in the tiles_not_in_players_hand or among the played tiles
75
+ possible_tiles = self .generate_possible_tiles (game_state .ends , _unplayed_tiles , _tiles_not_in_players_hand )
76
+ # Filter out the tiles that are not in the player's hand (i.e. suits where the player passed)
70
77
pass
71
78
79
+ # For each possible tile that theoretically could have been played
80
+ for tile in possible_tiles :
81
+ # Sample a hand for every player (including south)
82
+ # Constraint: south cannot have tiles from tiles_not_in_players_hand
83
+ # Constraint: south has to have the actual move
84
+ # Constraint: south has to have the tile we are comparing against
85
+ pass
86
+ # sample = generate_sample_from_game_state_from_another_perspective(...)
87
+
88
+ # Calculate statistics for the samples
89
+ # If a tile has significantly better expected score than the actual , add it to the unlikely_tiles set for the player
90
+
91
+ pass
92
+
93
+ def probability_from_another_perspective (unplayed_tiles : list [DominoTile ], not_with_tiles : dict [PlayerPosition , list [DominoTile ]], player_tiles : PlayerTiles ) -> dict [PlayerPosition , dict [DominoTile , float ]]:
94
+ """
95
+ Calculate the probability of each tile being with each player from another player's perspective.
96
+
97
+ Args:
98
+ unplayed_tiles (list[DominoTile]): List of tiles that are not yet played.
99
+ not_with_tiles (dict[PlayerPosition, list[DominoTile]]): Dictionary of tiles known not to be with each player.
100
+ player_tiles (PlayerTiles): Number of tiles each player has.
101
+
102
+ Returns:
103
+ dict[PlayerPosition, dict[DominoTile, float]]: Probability of each tile being with each player.
104
+ """
105
+ from collections import defaultdict
106
+
107
+ probabilities = {player : defaultdict (float ) for player in PlayerPosition }
108
+
109
+ # Step 1: Determine possible tiles for each player
110
+ possible_tiles = {}
111
+ for player in PlayerPosition :
112
+ # Exclude tiles that are known not to be with the player
113
+ possible = set (unplayed_tiles ) - set (not_with_tiles .get (player , []))
114
+ possible_tiles [player ] = possible
115
+
116
+ # Step 2: Calculate total number of possible tile assignments
117
+ total_possible_assignments = sum (len (tiles ) for tiles in possible_tiles .values ())
118
+
119
+ if total_possible_assignments == 0 :
120
+ # If no possible assignments, return zero probabilities
121
+ return probabilities
122
+
123
+ # Step 3: Assign initial probabilities based on the proportion of tiles each player can have
124
+ for player in PlayerPosition :
125
+ num_tiles = player_tiles [player ]
126
+ num_possible = len (possible_tiles [player ])
127
+ if num_possible == 0 or num_tiles == 0 :
128
+ continue
129
+ probability_per_tile = num_tiles / num_possible
130
+ for tile in possible_tiles [player ]:
131
+ probabilities [player ][tile ] += probability_per_tile
132
+
133
+ # Step 4: Normalize probabilities so that the sum of probabilities for each tile across all players does not exceed 1
134
+ for tile in unplayed_tiles :
135
+ total_prob = sum (probabilities [player ][tile ] for player in PlayerPosition )
136
+ if total_prob > 1.0 :
137
+ for player in PlayerPosition :
138
+ if tile in probabilities [player ]:
139
+ probabilities [player ][tile ] /= total_prob
140
+
141
+ # Step 5: Ensure that probabilities are between 0 and 1
142
+ for player in PlayerPosition :
143
+ for tile in probabilities [player ]:
144
+ probabilities [player ][tile ] = min (probabilities [player ][tile ], 1.0 )
145
+
146
+ return probabilities
147
+
148
+ def generate_sample_from_game_state_from_another_perspective (unplayed_tiles : list [DominoTile ], known_with_tiles : dict [PlayerPosition , list [DominoTile ]], not_with_tiles : dict [PlayerPosition , list [DominoTile ]], player_tiles : PlayerTiles )-> dict [str , list [DominoTile ]]:
149
+ sample : dict [str , list [DominoTile ]] = {player : [] for player in PlayerPosition }
150
+
151
+ for player in range (4 ):
152
+ sample [player ] = known_with_tiles .get (player , [])
153
+
154
+ assert any (len (sample [player ]) > player_tiles [player ] for player in PlayerPosition ), 'Sample cannot have more tiles than the player has'
155
+
156
+ known_tiles_set = set () # Create a set to hold all known tiles
157
+ for tiles in known_with_tiles .values ():
158
+ known_tiles_set .update (tiles ) # Add known tiles to the set
159
+
160
+ local_unplayed_tiles = [tile for tile in unplayed_tiles if tile not in known_tiles_set ] # Filter unplayed tiles
161
+
162
+ tile_probabilities = probability_from_another_perspective (local_unplayed_tiles , not_with_tiles , player_tiles )
163
+
164
+ # TODO: Use tile_probabilities to generate the sample
165
+
166
+ return sample
167
+
168
+ def generate_possible_tiles (self , board_ends : tuple [int ,int ], unplayed_tiles : set [DominoTile ], tiles_not_possible : set [DominoTile ]) -> set [DominoTile ]:
169
+ possible_tiles = set ()
170
+
171
+ if board_ends != (- 1 , - 1 ): # Except first move of the game
172
+ left_end , right_end = board_ends
173
+ for tile in unplayed_tiles :
174
+ if tile .top == left_end or tile .bottom == left_end :
175
+ possible_tiles .add (tile )
176
+ if tile .top == right_end or tile .bottom == right_end :
177
+ possible_tiles .add (tile )
178
+
179
+ # Remove played tiles and tiles not in player's hand
180
+ possible_tiles = possible_tiles - tiles_not_possible
181
+
182
+ return possible_tiles
183
+
72
184
def print_verbose_info (self , player_hand : list [DominoTile ], unplayed_tiles : list [DominoTile ], knowledge_tracker : CommonKnowledgeTracker , player_tiles_count : dict [PlayerPosition , int ], starting_player : PlayerPosition ) -> None :
73
185
print ("\n --- Verbose Information ---" )
74
186
# print(f"Starting player: {starting_player.name}")
@@ -110,9 +222,8 @@ def sample_and_search(self, final_south_hand: set[DominoTile], final_remaining_t
110
222
consecutive_passes = 0
111
223
)
112
224
113
- depth = 24
225
+ depth = 99 # Set it high enough, that it is never reached in practice, so the score is an integer
114
226
115
- # possible_moves = list_possible_moves(sample_state, include_stats=False)
116
227
if possible_moves is None :
117
228
possible_moves = list_possible_moves (sample_state )
118
229
move_scores : list [tuple [move , float ]] = []
@@ -125,15 +236,13 @@ def sample_and_search(self, final_south_hand: set[DominoTile], final_remaining_t
125
236
tile , is_left = move [0 ]
126
237
new_state = sample_state .play_hand (tile , is_left )
127
238
128
- # _, best_score, _ = get_best_move_alpha_beta(new_state, depth, sample_cache, best_path_flag=False)
129
239
_ , best_score , _ = get_best_move_alpha_beta (new_state , depth , sample_cache , best_path_flag = False )
130
240
move_scores .append ((move [0 ], best_score ))
131
- # return move[0], best_score
132
241
return move_scores
133
242
134
243
def get_best_move (self , final_south_hand : set [DominoTile ], remaining_tiles : set [DominoTile ],
135
244
knowledge_tracker : CommonKnowledgeTracker , player_tiles_count : dict [PlayerPosition , int ],
136
- board_ends : tuple [int | None ,int | None ], num_samples : int = 1000 , verbose : bool = False ) -> tuple [DominoTile , bool ] | None :
245
+ board_ends : tuple [int | None ,int | None ], verbose : bool = False ) -> tuple [DominoTile , bool ] | None :
137
246
138
247
inferred_knowledge : dict [PlayerPosition , set [DominoTile ]] = {
139
248
# player: set() for player in PlayerPosition
@@ -158,13 +267,13 @@ def get_best_move(self, final_south_hand: set[DominoTile], remaining_tiles: set[
158
267
total_samples = 0
159
268
batch_size = 16
160
269
confidence_level = 0.95
161
- min_samples = 30 * batch_size
162
- max_samples = 100 * batch_size
270
+ min_samples = 3 * batch_size
271
+ max_samples = 75 * batch_size
163
272
possible_moves = list_possible_moves_from_hand (final_south_hand , board_ends )
164
273
165
274
# Add timer and time limit
166
275
start_time = time .time ()
167
- time_limit = 120 # 30 seconds time limit
276
+ time_limit = 60 # 30 seconds time limit
168
277
169
278
with ProcessPoolExecutor () as executor :
170
279
0 commit comments