Skip to content

Commit e89bfff

Browse files
committed
inference for 4 players 1.0
1 parent 1b6587b commit e89bfff

9 files changed

+1017
-11
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
# pycache
55
__pycache__/*
6+
tests/__pycache__/*
67

78
# log files
89
*.log

analytic_agent_w_inf.py

+120-11
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from DominoPlayer import HumanPlayer, available_moves, stats
22
from collections import defaultdict
33
from DominoGameState import DominoGameState
4-
from domino_data_types import DominoTile, PlayerPosition, GameState, PlayerPosition_SOUTH, PlayerPosition_names, move
4+
from domino_data_types import DominoTile, PlayerPosition, GameState, PlayerPosition_SOUTH, PlayerPosition_names, PlayerTiles, move
55
from get_best_move2 import get_best_move_alpha_beta
66
from domino_utils import history_to_domino_tiles_history, list_possible_moves, list_possible_moves_from_hand
77
from domino_game_tracker import domino_game_state_our_perspective, generate_sample_from_game_state
@@ -55,9 +55,9 @@ def next_move(self, game_state: DominoGameState, player_hand: list[tuple[int,int
5555
self.print_verbose_info(_player_hand, _unplayed_tiles, _knowledge_tracker, _player_tiles_count, _starting_player)
5656

5757
# num_samples = 1000 if len(game_state.history) > 8 else 100 if len(game_state.history) > 4 else 25 if len(game_state.history) > 0 else 1000
58-
num_samples = 24
58+
# num_samples = 24
5959

60-
best_move = self.get_best_move(set(_player_hand), _remaining_tiles, _knowledge_tracker, _player_tiles_count, _board_ends, num_samples, verbose=verbose)
60+
best_move = self.get_best_move(set(_player_hand), _remaining_tiles, _knowledge_tracker, _player_tiles_count, _board_ends, verbose=verbose)
6161

6262
if best_move is None:
6363
return None
@@ -67,8 +67,120 @@ def next_move(self, game_state: DominoGameState, player_hand: list[tuple[int,int
6767
return (tile.top, tile.bottom), side
6868

6969
def update_unlikely_tiles(self, game_state: DominoGameState, player_from_south_pov: int, actual_move: tuple[tuple[int,int],str], tiles_not_in_players_hand: list[tuple[int,int]]) -> None:
70+
unplayed_tiles = self.get_unplayed_tiles(game_state, [])
71+
_unplayed_tiles = DominoTile.loi_to_domino_tiles(unplayed_tiles)
72+
_tiles_not_in_players_hand = DominoTile.loi_to_domino_tiles(tiles_not_in_players_hand)
73+
# Generate all possible tiles that could have been played (except for the first move of the game)
74+
# The tile can't be in the tiles_not_in_players_hand or among the played tiles
75+
possible_tiles = self.generate_possible_tiles(game_state.ends, _unplayed_tiles, _tiles_not_in_players_hand)
76+
# Filter out the tiles that are not in the player's hand (i.e. suits where the player passed)
7077
pass
7178

79+
# For each possible tile that theoretically could have been played
80+
for tile in possible_tiles:
81+
# Sample a hand for every player (including south)
82+
# Constraint: south cannot have tiles from tiles_not_in_players_hand
83+
# Constraint: south has to have the actual move
84+
# Constraint: south has to have the tile we are comparing against
85+
pass
86+
# sample = generate_sample_from_game_state_from_another_perspective(...)
87+
88+
# Calculate statistics for the samples
89+
# If a tile has significantly better expected score than the actual , add it to the unlikely_tiles set for the player
90+
91+
pass
92+
93+
def probability_from_another_perspective(unplayed_tiles: list[DominoTile], not_with_tiles: dict[PlayerPosition, list[DominoTile]], player_tiles: PlayerTiles) -> dict[PlayerPosition, dict[DominoTile, float]]:
94+
"""
95+
Calculate the probability of each tile being with each player from another player's perspective.
96+
97+
Args:
98+
unplayed_tiles (list[DominoTile]): List of tiles that are not yet played.
99+
not_with_tiles (dict[PlayerPosition, list[DominoTile]]): Dictionary of tiles known not to be with each player.
100+
player_tiles (PlayerTiles): Number of tiles each player has.
101+
102+
Returns:
103+
dict[PlayerPosition, dict[DominoTile, float]]: Probability of each tile being with each player.
104+
"""
105+
from collections import defaultdict
106+
107+
probabilities = {player: defaultdict(float) for player in PlayerPosition}
108+
109+
# Step 1: Determine possible tiles for each player
110+
possible_tiles = {}
111+
for player in PlayerPosition:
112+
# Exclude tiles that are known not to be with the player
113+
possible = set(unplayed_tiles) - set(not_with_tiles.get(player, []))
114+
possible_tiles[player] = possible
115+
116+
# Step 2: Calculate total number of possible tile assignments
117+
total_possible_assignments = sum(len(tiles) for tiles in possible_tiles.values())
118+
119+
if total_possible_assignments == 0:
120+
# If no possible assignments, return zero probabilities
121+
return probabilities
122+
123+
# Step 3: Assign initial probabilities based on the proportion of tiles each player can have
124+
for player in PlayerPosition:
125+
num_tiles = player_tiles[player]
126+
num_possible = len(possible_tiles[player])
127+
if num_possible == 0 or num_tiles == 0:
128+
continue
129+
probability_per_tile = num_tiles / num_possible
130+
for tile in possible_tiles[player]:
131+
probabilities[player][tile] += probability_per_tile
132+
133+
# Step 4: Normalize probabilities so that the sum of probabilities for each tile across all players does not exceed 1
134+
for tile in unplayed_tiles:
135+
total_prob = sum(probabilities[player][tile] for player in PlayerPosition)
136+
if total_prob > 1.0:
137+
for player in PlayerPosition:
138+
if tile in probabilities[player]:
139+
probabilities[player][tile] /= total_prob
140+
141+
# Step 5: Ensure that probabilities are between 0 and 1
142+
for player in PlayerPosition:
143+
for tile in probabilities[player]:
144+
probabilities[player][tile] = min(probabilities[player][tile], 1.0)
145+
146+
return probabilities
147+
148+
def generate_sample_from_game_state_from_another_perspective(unplayed_tiles: list[DominoTile], known_with_tiles: dict[PlayerPosition, list[DominoTile]], not_with_tiles: dict[PlayerPosition, list[DominoTile]], player_tiles: PlayerTiles)-> dict[str, list[DominoTile]]:
149+
sample: dict[str, list[DominoTile]] = {player: [] for player in PlayerPosition}
150+
151+
for player in range(4):
152+
sample[player] = known_with_tiles.get(player, [])
153+
154+
assert any(len(sample[player]) > player_tiles[player] for player in PlayerPosition), 'Sample cannot have more tiles than the player has'
155+
156+
known_tiles_set = set() # Create a set to hold all known tiles
157+
for tiles in known_with_tiles.values():
158+
known_tiles_set.update(tiles) # Add known tiles to the set
159+
160+
local_unplayed_tiles = [tile for tile in unplayed_tiles if tile not in known_tiles_set] # Filter unplayed tiles
161+
162+
tile_probabilities = probability_from_another_perspective(local_unplayed_tiles, not_with_tiles, player_tiles)
163+
164+
# TODO: Use tile_probabilities to generate the sample
165+
166+
return sample
167+
168+
def generate_possible_tiles(self, board_ends: tuple[int,int], unplayed_tiles: set[DominoTile], tiles_not_possible: set[DominoTile]) -> set[DominoTile]:
169+
possible_tiles = set()
170+
171+
if board_ends != (-1, -1): # Except first move of the game
172+
left_end, right_end = board_ends
173+
for tile in unplayed_tiles:
174+
if tile.top == left_end or tile.bottom == left_end:
175+
possible_tiles.add(tile)
176+
if tile.top == right_end or tile.bottom == right_end:
177+
possible_tiles.add(tile)
178+
179+
# Remove played tiles and tiles not in player's hand
180+
possible_tiles = possible_tiles - tiles_not_possible
181+
182+
return possible_tiles
183+
72184
def print_verbose_info(self, player_hand: list[DominoTile], unplayed_tiles: list[DominoTile], knowledge_tracker: CommonKnowledgeTracker, player_tiles_count: dict[PlayerPosition, int], starting_player: PlayerPosition) -> None:
73185
print("\n--- Verbose Information ---")
74186
# print(f"Starting player: {starting_player.name}")
@@ -110,9 +222,8 @@ def sample_and_search(self, final_south_hand: set[DominoTile], final_remaining_t
110222
consecutive_passes=0
111223
)
112224

113-
depth = 24
225+
depth = 99 # Set it high enough, that it is never reached in practice, so the score is an integer
114226

115-
# possible_moves = list_possible_moves(sample_state, include_stats=False)
116227
if possible_moves is None:
117228
possible_moves = list_possible_moves(sample_state)
118229
move_scores: list[tuple[move, float]] = []
@@ -125,15 +236,13 @@ def sample_and_search(self, final_south_hand: set[DominoTile], final_remaining_t
125236
tile, is_left = move[0]
126237
new_state = sample_state.play_hand(tile, is_left)
127238

128-
# _, best_score, _ = get_best_move_alpha_beta(new_state, depth, sample_cache, best_path_flag=False)
129239
_, best_score, _ = get_best_move_alpha_beta(new_state, depth, sample_cache, best_path_flag=False)
130240
move_scores.append((move[0], best_score))
131-
# return move[0], best_score
132241
return move_scores
133242

134243
def get_best_move(self, final_south_hand: set[DominoTile], remaining_tiles: set[DominoTile],
135244
knowledge_tracker: CommonKnowledgeTracker, player_tiles_count: dict[PlayerPosition, int],
136-
board_ends: tuple[int|None,int|None], num_samples: int = 1000, verbose: bool = False) -> tuple[DominoTile, bool] | None:
245+
board_ends: tuple[int|None,int|None], verbose: bool = False) -> tuple[DominoTile, bool] | None:
137246

138247
inferred_knowledge: dict[PlayerPosition, set[DominoTile]] = {
139248
# player: set() for player in PlayerPosition
@@ -158,13 +267,13 @@ def get_best_move(self, final_south_hand: set[DominoTile], remaining_tiles: set[
158267
total_samples = 0
159268
batch_size = 16
160269
confidence_level = 0.95
161-
min_samples = 30 * batch_size
162-
max_samples = 100 * batch_size
270+
min_samples = 3 * batch_size
271+
max_samples = 75 * batch_size
163272
possible_moves = list_possible_moves_from_hand(final_south_hand, board_ends)
164273

165274
# Add timer and time limit
166275
start_time = time.time()
167-
time_limit = 120 # 30 seconds time limit
276+
time_limit = 60 # 30 seconds time limit
168277

169278
with ProcessPoolExecutor() as executor:
170279

domino_data_types.py

+5
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,13 @@ def next_player(pos: PlayerPosition)-> PlayerPosition:
1414

1515
PlayerPosition_names = ['SOUTH', 'EAST', 'NORTH', 'WEST']
1616

17+
PLAYERS = ['S', 'E', 'N', 'W']
18+
PLAYERS_INDEX = {'S': 0, 'E': 1, 'N': 2, 'W': 3}
19+
1720
PlayerTiles = namedtuple('PlayerTiles', ['N', 'E', 'W'])
1821

22+
PlayerTiles4 = namedtuple('PlayerTiles4', ['S', 'N', 'E', 'W'])
23+
1924
# @dataclass(frozen=True)
2025
@dataclass
2126
class DominoTile:

0 commit comments

Comments
 (0)