Kaggle - ConnectX(3) - 4枚そろえるボードゲーム

Connect Xコンペに関する3回目の記事です。

Connect X

今回は、スコア1029.1を叩き出している「Cell Swarm」というノートブックのエージェントを参考にさせて頂きました。

Cell Swarm

Cell Swarmノートブックのエージェント

エージェントの実装だけ抽出してみます。

Swarmは「群れ」という意味で、Cell Swarmだと「セルの群れ」とか「セルの集まり」とかいう意味でしょうか。

処理はコメントをご参照ください。

[ソース]

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
def my_agent(obs, conf):

def evaluate_cell(cell):
""" evaluate qualities of the cell """
# セルの品質を評価。パターンを取得して、そのセルのポイント付けをしているみたい。
cell = get_patterns(cell)
cell = calculate_points(cell)
for i in range(1, conf.rows):
cell = explore_cell_above(cell, i)
return cell

def get_patterns(cell):
""" get swarm and opponent's patterns of each axis of the cell """
# 群れと対戦相手のセルの各軸パターンを取得。
ne = get_pattern(cell["x"], lambda z : z + 1, cell["y"], lambda z : z - 1, conf.inarow)
sw = get_pattern(cell["x"], lambda z : z - 1, cell["y"], lambda z : z + 1, conf.inarow)[::-1]
cell["swarm_patterns"]["NE_SW"] = sw + [{"mark": swarm_mark}] + ne
cell["opp_patterns"]["NE_SW"] = sw + [{"mark": opp_mark}] + ne
e = get_pattern(cell["x"], lambda z : z + 1, cell["y"], lambda z : z, conf.inarow)
w = get_pattern(cell["x"], lambda z : z - 1, cell["y"], lambda z : z, conf.inarow)[::-1]
cell["swarm_patterns"]["E_W"] = w + [{"mark": swarm_mark}] + e
cell["opp_patterns"]["E_W"] = w + [{"mark": opp_mark}] + e
se = get_pattern(cell["x"], lambda z : z + 1, cell["y"], lambda z : z + 1, conf.inarow)
nw = get_pattern(cell["x"], lambda z : z - 1, cell["y"], lambda z : z - 1, conf.inarow)[::-1]
cell["swarm_patterns"]["SE_NW"] = nw + [{"mark": swarm_mark}] + se
cell["opp_patterns"]["SE_NW"] = nw + [{"mark": opp_mark}] + se
s = get_pattern(cell["x"], lambda z : z, cell["y"], lambda z : z + 1, conf.inarow)
n = get_pattern(cell["x"], lambda z : z, cell["y"], lambda z : z - 1, conf.inarow)[::-1]
cell["swarm_patterns"]["S_N"] = n + [{"mark": swarm_mark}] + s
cell["opp_patterns"]["S_N"] = n + [{"mark": opp_mark}] + s
return cell

def get_pattern(x, x_fun, y, y_fun, cells_remained):
""" get pattern of marks in direction """
# ある方向へのマークパターンを取得
pattern = []
x = x_fun(x)
y = y_fun(y)
# if cell is inside swarm's borders
# セルが群れの境界内にある場合
if y >= 0 and y < conf.rows and x >= 0 and x < conf.columns:
pattern.append({
"mark": swarm[x][y]["mark"]
})
# amount of cells to explore in this direction
# ある方向へのセルの総数
cells_remained -= 1
if cells_remained > 1:
pattern.extend(get_pattern(x, x_fun, y, y_fun, cells_remained))
return pattern

def calculate_points(cell):
""" calculate amounts of swarm's and opponent's correct patterns and add them to cell's points """
for i in range(conf.inarow - 1):
# inarow = amount of marks in pattern to consider that pattern as correct
inarow = conf.inarow - i
swarm_points = 0
opp_points = 0
# calculate swarm's points and depth
# 群れのポイントと深さを計算
swarm_points = evaluate_pattern(swarm_points, cell["swarm_patterns"]["E_W"], swarm_mark, inarow)
swarm_points = evaluate_pattern(swarm_points, cell["swarm_patterns"]["NE_SW"], swarm_mark, inarow)
swarm_points = evaluate_pattern(swarm_points, cell["swarm_patterns"]["SE_NW"], swarm_mark, inarow)
swarm_points = evaluate_pattern(swarm_points, cell["swarm_patterns"]["S_N"], swarm_mark, inarow)
# calculate opponent's points and depth
# 対戦相手のポイントと深さを計算
opp_points = evaluate_pattern(opp_points, cell["opp_patterns"]["E_W"], opp_mark, inarow)
opp_points = evaluate_pattern(opp_points, cell["opp_patterns"]["NE_SW"], opp_mark, inarow)
opp_points = evaluate_pattern(opp_points, cell["opp_patterns"]["SE_NW"], opp_mark, inarow)
opp_points = evaluate_pattern(opp_points, cell["opp_patterns"]["S_N"], opp_mark, inarow)
# if more than one mark required for victory
# 勝つために1つ以上のマークが必要かどうか
if i > 0:
# swarm_mark or opp_mark priority
# 自分のマークと対戦相手のマークの優先順位
if swarm_points > opp_points:
cell["points"].append(swarm_points)
cell["points"].append(opp_points)
else:
cell["points"].append(opp_points)
cell["points"].append(swarm_points)
else:
cell["points"].append(swarm_points)
cell["points"].append(opp_points)
return cell

def evaluate_pattern(points, pattern, mark, inarow):
""" get amount of points, if pattern has required amounts of marks and zeros """
# saving enough cells for required amounts of marks and zeros
# マーク数と非マーク数の総数を保存する
for i in range(len(pattern) - (conf.inarow - 1)):
marks = 0
zeros = 0
# check part of pattern for required amounts of marks and zeros
# マーク数と非マーク数の総数をチェックする
for j in range(conf.inarow):
if pattern[i + j]["mark"] == mark:
marks += 1
elif pattern[i + j]["mark"] == 0:
zeros += 1
if marks >= inarow and (marks + zeros) == conf.inarow:
return points + 1
return points

def explore_cell_above(cell, i):
""" add positive or negative points from cell above (if it exists) to points of current cell """
# ポジティブなポイントかネガティブなポイントを追加する
if (cell["y"] - i) >= 0:
cell_above = swarm[cell["x"]][cell["y"] - i]
cell_above = get_patterns(cell_above)
cell_above = calculate_points(cell_above)
# points will be positive or negative
# ポイントがポジティブかネガティブか
n = -1 if i & 1 else 1
# if it is first cell above
# 最初のセルの上かどうか
if i == 1:
# add first 4 points of cell_above["points"] to cell["points"]
# 最初の4ポイントを追加する
cell["points"][2:2] = [n * cell_above["points"][1], n * cell_above["points"][0]]
# if it is not potential "seven" pattern in cell and cell_above has more points
if abs(cell["points"][4]) < 2 and abs(cell["points"][4]) < cell_above["points"][2]:
cell["points"][4:4] = [n * cell_above["points"][2]]
# if it is not potential "seven" pattern in cell and cell_above has more points
if abs(cell["points"][5]) < 2 and abs(cell["points"][5]) < cell_above["points"][3]:
cell["points"][5:5] = [n * cell_above["points"][3]]
else:
cell["points"][7:7] = [n * cell_above["points"][3]]
else:
cell["points"][6:6] = [n * cell_above["points"][2], n * cell_above["points"][3]]
cell["points"].append(n * cell_above["points"][4])
cell["points"].append(n * cell_above["points"][5])
else:
cell["points"].extend(map(lambda z : z * n, cell_above["points"]))
else:
cell["points"].extend([0, 0, 0, 0, 0, 0])
return cell

def choose_best_cell(best_cell, current_cell):
""" compare two cells and return the best one """
# 2つのセルを比較しベストなセルを返す
if best_cell is not None:
for i in range(len(best_cell["points"])):
# compare amounts of points of two cells
# 2つのセルの総ポイントを比較する
if best_cell["points"][i] < current_cell["points"][i]:
best_cell = current_cell
break
if best_cell["points"][i] > current_cell["points"][i]:
break
# if ["points"][i] of cells are equal, compare distance to swarm's center of each cell
# もし["points"][i]セルが等しい場合、各セルの群れの中心への距離を比較する
if best_cell["points"][i] > 0:
if best_cell["distance_to_center"] > current_cell["distance_to_center"]:
best_cell = current_cell
break
if best_cell["distance_to_center"] < current_cell["distance_to_center"]:
break
else:
best_cell = current_cell
return best_cell

###############################################################################
# define swarm's and opponent's marks
# 群れと対戦相手のマークを定義
swarm_mark = obs.mark
opp_mark = 2 if swarm_mark == 1 else 1
# define swarm's center
# 群れの中央位置を定義
swarm_center_horizontal = conf.columns // 2
swarm_center_vertical = conf.rows // 2

# define swarm as two dimensional array of cells
# セルの2次元配列として群れを定義
swarm = []
for column in range(conf.columns):
swarm.append([])
for row in range(conf.rows):
cell = {
"x": column,
"y": row,
"mark": obs.board[conf.columns * row + column],
"swarm_patterns": {},
"opp_patterns": {},
"distance_to_center": abs(row - swarm_center_vertical) + abs(column - swarm_center_horizontal),
"points": []
}
swarm[column].append(cell)

best_cell = None
# start searching for best_cell from swarm center
# 群れの中央から最適なセル位置を検索開始
x = swarm_center_horizontal
# shift to right or left from swarm center
# 群れの中央から右か左にシフト
shift = 0

# searching for best_cell
# 最適なセル位置を検索
while x >= 0 and x < conf.columns:
# find first empty cell starting from bottom of the column
# カラムの底位置からマークされていない最初の位置を見つける
y = conf.rows - 1
while y >= 0 and swarm[x][y]["mark"] != 0:
y -= 1
# if column is not full
# カラムがフルでない場合
if y >= 0:
# current cell evaluates its own qualities
# 現在のセルの評価
current_cell = evaluate_cell(swarm[x][y])
# current cell compares itself against best cell
# 現在のセルと最適なセル位置を比較
best_cell = choose_best_cell(best_cell, current_cell)

# shift x to right or left from swarm center
# 中央から右か左にずらす
if shift >= 0:
shift += 1
shift *= -1
x = swarm_center_horizontal + shift

# return index of the best cell column
# 最適なカラム位置のインデックスを返す
return best_cell["x"]

エージェントの評価

ランダム選択の相手との結果と、NegaMax法の相手との結果(平均報酬)を表示します。

[ソース]

1
2
3
4
5
6
def mean_reward(rewards):
return sum(r[0] for r in rewards) / float(len(rewards))

# Run multiple episodes to estimate its performance.
print("My Agent vs Random Agent:", mean_reward(evaluate("connectx", [my_agent, "random"], num_episodes=10)))
print("My Agent vs Negamax Agent:", mean_reward(evaluate("connectx", [my_agent, "negamax"], num_episodes=10)))

[結果]

ランダム相手には完勝しており、NegaMax法の相手との結果も勝ち越しています。

なかなか強いエージェントみたいです。