-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalytic_agent_player_parallel_ci.py
319 lines (269 loc) · 15.1 KB
/
analytic_agent_player_parallel_ci.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
from DominoPlayer import HumanPlayer, available_moves, stats
from collections import defaultdict
from DominoGameState import DominoGameState
from domino_data_types import DominoTile, PlayerPosition, GameState, PlayerPosition_SOUTH, PlayerPosition_names, move
from get_best_move2 import get_best_move_alpha_beta
# from get_best_move_venezuelan import get_best_move_alpha_beta
from domino_utils import history_to_domino_tiles_history, list_possible_moves, list_possible_moves_from_hand
from domino_game_tracker import domino_game_state_our_perspective, generate_sample_from_game_state
from domino_common_knowledge import CommonKnowledgeTracker
from statistics import mean, median, stdev, mode
import copy, time
from tqdm import tqdm
from concurrent.futures import ProcessPoolExecutor, as_completed
from scipy import stats as scipy_stats
class AnalyticAgentPlayer(HumanPlayer):
def __init__(self, position: int = 0) -> None:
super().__init__()
self.move_history: list[tuple[int, tuple[tuple[int, int], str]|None]] = []
self.tile_count_history: dict[int, list[int]] = defaultdict(list)
# self.round_scores: list[int] = []
self.first_game = True
self.position = position
def next_move(self, game_state: DominoGameState, player_hand: list[tuple[int,int]], verbose: bool = True) -> tuple[tuple[int,int], str] | None:
# if self.first_game:
if game_state.first_round:
# Check if the move is forced
if game_state.variant in {'international','venezuelan'} and len(game_state.history)==0:
self.first_game = False
print('Playing mandatory (6,6) opening on the first game.')
return (6,6),'l'
else:
self.first_game = False
unplayed_tiles = self.get_unplayed_tiles(game_state, player_hand)
_unplayed_tiles = DominoTile.loi_to_domino_tiles(unplayed_tiles)
_player_hand = DominoTile.loi_to_domino_tiles(player_hand)
_moves = history_to_domino_tiles_history(game_state.history)
_remaining_tiles = set(_unplayed_tiles)
# _initial_player_tiles = {p: 7 for p in PlayerPosition}
_initial_player_tiles = {p: 7 for p in range(4)}
# _starting_player = PlayerPosition((game_state.history[0][0] - self.position)%4) if len(game_state.history)>0 else PlayerPosition.SOUTH
_starting_player = ((game_state.history[0][0] - self.position)%4) if len(game_state.history)>0 else PlayerPosition_SOUTH
current_player, _final_remaining_tiles, _board_ends, _player_tiles_count, _knowledge_tracker = domino_game_state_our_perspective(
_remaining_tiles, _moves, _initial_player_tiles, current_player=_starting_player)
if verbose:
self.print_verbose_info(_player_hand, _unplayed_tiles, _knowledge_tracker, _player_tiles_count, _starting_player)
best_move = self.get_best_move(set(_player_hand), _remaining_tiles, _knowledge_tracker, _player_tiles_count, _board_ends, verbose=verbose)
if best_move is None:
return None
else:
tile, is_left = best_move
side = 'l' if is_left else 'r'
return (tile.top, tile.bottom), side
def print_verbose_info(self, player_hand: list[DominoTile], unplayed_tiles: list[DominoTile], knowledge_tracker: CommonKnowledgeTracker, player_tiles_count: dict[PlayerPosition, int], starting_player: PlayerPosition) -> None:
print("\n--- Verbose Information ---")
# print(f"Starting player: {starting_player.name}")
print(f"Starting player: {PlayerPosition_names[starting_player]}")
print(f"Player's hand: {player_hand}")
print(f"Remaining tiles: {unplayed_tiles}")
print("\nCommon knowledge of missing suits:")
# for player in PlayerPosition:
for player in range(4):
# print(f" {player.name}: {knowledge_tracker.common_knowledge_missing_suits[player]}")
print(f" {PlayerPosition_names[player]}: {knowledge_tracker.common_knowledge_missing_suits[player]}")
print("\nRemaining tiles for each player:")
for player, count in player_tiles_count.items():
# print(f" {player.name}: {count}")
print(f" {PlayerPosition_names[player]}: {count}")
print("----------------------------\n")
def sample_and_search(self, final_south_hand: set[DominoTile], final_remaining_tiles_without_south_tiles: set[DominoTile], player_tiles_count: dict[PlayerPosition, int], inferred_knowledge_for_current_player: dict[PlayerPosition, set[DominoTile]], board_ends: tuple[int|None,int|None], possible_moves: list[tuple[tuple[DominoTile, bool] | None, int | None, float | None]]|None = None) -> list[tuple[move, float]]:
sample = generate_sample_from_game_state(
PlayerPosition_SOUTH,
final_south_hand,
final_remaining_tiles_without_south_tiles,
player_tiles_count,
inferred_knowledge_for_current_player
)
sample_hands = (
frozenset(final_south_hand),
frozenset(sample['E']),
frozenset(sample['N']),
frozenset(sample['W'])
)
sample_state = GameState(
player_hands=sample_hands,
current_player=PlayerPosition_SOUTH,
left_end=board_ends[0],
right_end=board_ends[1],
consecutive_passes=0
)
depth = 2 * (28 + 4)
if possible_moves is None:
possible_moves = list_possible_moves(sample_state)
move_scores: list[tuple[move, float]] = []
sample_cache: dict[GameState, tuple[int, int]] = {}
for move in possible_moves:
if move[0] is None:
new_state = sample_state.pass_turn()
else:
tile, is_left = move[0]
new_state = sample_state.play_hand(tile, is_left)
# _, best_score, _ = get_best_move_alpha_beta(new_state, depth, sample_cache, best_path_flag=False)
_, best_score, _ = get_best_move_alpha_beta(new_state, depth, sample_cache, best_path_flag=False)
assert best_score % 1 == 0.0, f"Score is not an integer: {best_score}, state: {new_state}"
move_scores.append((move[0], best_score))
# return move[0], best_score
return move_scores
def get_best_move(self, final_south_hand: set[DominoTile], remaining_tiles: set[DominoTile],
knowledge_tracker: CommonKnowledgeTracker, player_tiles_count: dict[PlayerPosition, int],
board_ends: tuple[int|None,int|None], verbose: bool = False) -> tuple[DominoTile, bool] | None:
inferred_knowledge: dict[PlayerPosition, set[DominoTile]] = {
# player: set() for player in PlayerPosition
player: set() for player in range(4)
}
for tile in remaining_tiles:
# for player in PlayerPosition:
for player in range(4):
if tile.top in knowledge_tracker.common_knowledge_missing_suits[player] or tile.bottom in knowledge_tracker.common_knowledge_missing_suits[player]:
inferred_knowledge[player].add(tile)
final_remaining_tiles_without_south_tiles = remaining_tiles - final_south_hand
# inferred_knowledge_for_current_player = copy.deepcopy(inferred_knowledge)
inferred_knowledge_for_current_player = inferred_knowledge
for player, tiles in inferred_knowledge_for_current_player.items():
inferred_knowledge_for_current_player[player] = tiles - final_south_hand
move_scores = defaultdict(list)
# Use ProcessPoolExecutor to parallelize the execution
total_samples = 0
batch_size = 24
confidence_level = 0.95
min_samples = 10 * batch_size
max_samples = 75 * batch_size
possible_moves = list_possible_moves_from_hand(final_south_hand, board_ends)
# Add timer and time limit
start_time = time.time()
time_limit = 90 # 30 seconds time limit
with ProcessPoolExecutor() as executor:
while total_samples < max_samples:
futures = [
executor.submit(
self.sample_and_search,
final_south_hand,
final_remaining_tiles_without_south_tiles,
player_tiles_count,
inferred_knowledge_for_current_player,
board_ends,
possible_moves
)
for _ in range(min(batch_size, max_samples - total_samples))
]
for future in tqdm(as_completed(futures), total=len(futures), desc=f"Analyzing moves (total: {total_samples})", leave=False):
sample_scores = future.result()
for move, score in sample_scores:
assert score % 1 == 0.0, f"Score is not an integer: {score}"
move_scores[move].append(score)
total_samples += len(futures)
# Calculate confidence intervals
move_stats = {}
for move, scores in move_scores.items():
n = len(scores)
mean_score = mean(scores)
std_dev = stdev(scores) if n > 1 else 0
ci = scipy_stats.t.interval(confidence=confidence_level, df=n-1, loc=mean_score, scale=std_dev/n**0.5)
move_stats[move] = {"mean": mean_score, "ci_lower": ci[0], "ci_upper": ci[1]}
# Check if time limit is exceeded
if time.time() - start_time > time_limit:
print(f"Time limit of {time_limit} seconds exceeded. Terminating early.")
break
# Print statistics after each batch
if verbose:
self.print_move_statistics(move_scores, total_samples)
if not move_scores or len(move_scores) == 1:
# If there's only one move or a pass, we're done after min_samples
max_samples = min_samples
continue
# Check if we can determine the best move and update possible_moves
sorted_moves = sorted(move_stats.items(), key=lambda x: x[1]["mean"], reverse=True)
best_move = sorted_moves[0][0]
best_move_stats = move_stats[best_move]
# Keep only moves with overlapping confidence intervals only if we have at least min_samples for every move
if all(len(scores) >= min_samples for scores in move_scores.values()):
possible_moves = [(move, False, False) for move, stats in sorted_moves if stats["ci_upper"] >= best_move_stats["ci_lower"]]
# If only one move remains, we're done after min_samples
if len(possible_moves) == 1:
max_samples = min_samples
continue
if not move_scores:
if verbose:
print("No legal moves available. Player must pass.")
return None
if verbose:
self.print_move_statistics(move_scores, total_samples)
# Calculate the time taken to find the best move
time_taken = time.time() - start_time
print(f"\nTime taken to find the best move: {time_taken:.2f} seconds")
best_move = max(move_scores, key=lambda x: mean(move_scores[x]))
return best_move
def print_move_statistics(self, move_scores: dict[move, list[float]], num_samples: int) -> None:
print(f"\nMove Statistics (based on {num_samples} samples):")
# Calculate statistics for each move
move_statistics = {}
for move, scores in move_scores.items():
if len(scores) > 1:
n = len(scores)
mean_score = mean(scores)
std_dev = stdev(scores)
confidence_interval = scipy_stats.t.interval(confidence=0.95, df=n-1, loc=mean_score, scale=std_dev/n**0.5)
move_statistics[move] = {
"count": n,
"mean": mean_score,
"std_dev": std_dev,
"median": median(scores),
"mode": mode(scores),
"min": min(scores),
"max": max(scores),
"ci_lower": confidence_interval[0],
"ci_upper": confidence_interval[1]
}
else:
move_statistics[move] = {
"count": len(scores),
"mean": scores[0],
"std_dev": 0,
"median": scores[0],
"mode": scores[0],
"min": scores[0],
"max": scores[0],
"ci_lower": scores[0],
"ci_upper": scores[0]
}
# Sort moves by their mean score, descending order
sorted_moves = sorted(move_statistics.items(), key=lambda x: x[1]["mean"], reverse=True)
# Print statistics for each move
for move, stats in sorted_moves:
if move is None:
move_str = "Pass"
else:
tile, is_left = move
direction = "left" if is_left else "right"
move_str = f"Play {tile} on the {direction}"
print(f"\nMove: {move_str}")
print(f" Count: {stats['count']}")
print(f" Mean Score: {stats['mean']:.4f}")
print(f" Standard Deviation: {stats['std_dev']:.4f}")
print(f" Median Score: {stats['median']:.4f}")
print(f" Mode Score: {stats['mode']:.4f}")
print(f" Min Score: {stats['min']:.4f}")
print(f" Max Score: {stats['max']:.4f}")
print(f" 95% Confidence Interval (mean): ({stats['ci_lower']:.4f}, {stats['ci_upper']:.4f})")
# Identify the best move based on the highest mean score
best_move = max(move_statistics, key=lambda x: move_statistics[x]["mean"])
best_stats = move_statistics[best_move]
print("\nBest Move Overall:")
if best_move is None:
print(f"Best move: Pass")
else:
tile, is_left = best_move
direction = "left" if is_left else "right"
print(f"Best move: Play {tile} on the {direction}")
print(f"Mean Expected Score: {best_stats['mean']:.4f}")
def end_round(self, scores: list[int], team: int) -> None:
super().end_round(scores, team)
# self.record_round_score(scores)
def record_move(self, game_state: DominoGameState, move: tuple[tuple[int, int], str]|None) -> None:
self.move_history.append((game_state.next_player, move))
for i, count in enumerate(game_state.player_tile_counts):
self.tile_count_history[i].append(count)
def get_move_history(self) -> list[tuple[int, tuple[tuple[int, int], str]|None]]:
return self.move_history
def get_tile_count_history(self) -> dict[int, list[int]]:
return dict(self.tile_count_history)