# 合法手の取得 for direction in directions: # 駒の移動元 x = position_src%3 + self.dxy[direction][0] y = int(position_src/3) + self.dxy[direction][1] p = x + y * 3
# 移動可能時は合法手として追加 if0 <= x and x <= 2and0<= y and y <= 3and self.pieces[p] == 0: actions.append(self.position_to_action(p, direction)) return actions
# 駒の移動 if position_src < 8: # 駒の移動元 x = position_dst%3 - self.dxy[position_src][0] y = int(position_dst/3) - self.dxy[position_src][1] position_src = x + y * 3
# パッケージのインポート from tensorflow.keras.layers import Activation, Add, BatchNormalization, Conv2D, Dense, GlobalAveragePooling2D, Input from tensorflow.keras.models import Model from tensorflow.keras.regularizers import l2 from tensorflow.keras import backend as K import os
# 残差ブロックの作成 defresidual_block(): deff(x): sc = x x = conv(DN_FILTERS)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = conv(DN_FILTERS)(x) x = BatchNormalization()(x) x = Add()([x, sc]) x = Activation('relu')(x) return x return f
# デュアルネットワークの作成 defdual_network(): # モデル作成済みの場合は無処理 if os.path.exists('./model/best.h5'): return
# 入力層 input = Input(shape=DN_INPUT_SHAPE)
# 畳み込み層 x = conv(DN_FILTERS)(input) x = BatchNormalization()(x) x = Activation('relu')(x)
# 残差ブロック x 16 for i inrange(DN_RESIDUAL_NUM): x = residual_block()(x)
# プーリング層 x = GlobalAveragePooling2D()(x)
# ポリシー出力 p = Dense(DN_OUTPUT_SIZE, kernel_regularizer=l2(0.0005), activation='softmax', name='pi')(x)
# バリュー出力 v = Dense(1, kernel_regularizer=l2(0.0005))(x) v = Activation('tanh', name='v')(v)
# モデルの作成 model = Model(inputs=input, outputs=[p,v])
# パッケージのインポート from game import State from dual_network import DN_INPUT_SHAPE from math import sqrt from tensorflow.keras.models import load_model from pathlib import Path import numpy as np
# 推論 defpredict(model, state): # 推論のための入力テ゛ータのシェイフ゜の変換 a, b, c = DN_INPUT_SHAPE x = np.array(state.pieces_array()) x = x.reshape(c, a, b).transpose(1, 2, 0).reshape(1, a, b, c)
# パッケージのインポート from game import State from pv_mcts import pv_mcts_scores from dual_network import DN_OUTPUT_SIZE from datetime import datetime from tensorflow.keras.models import load_model from tensorflow.keras import backend as K from pathlib import Path import numpy as np import pickle import os
# パッケージのインポート from dual_network import DN_INPUT_SHAPE from tensorflow.keras.callbacks import LearningRateScheduler, LambdaCallback from tensorflow.keras.models import load_model from tensorflow.keras import backend as K from pathlib import Path import numpy as np import pickle
# パラメータの準備 RN_EPOCHS = 100# 学習回数
# 学習データの読み込み defload_data(): history_path = sorted(Path('./data').glob('*.history'))[-1] with history_path.open(mode='rb') as f: return pickle.load(f)
# パッケージのインポート from game import State from pv_mcts import pv_mcts_action from tensorflow.keras.models import load_model from tensorflow.keras import backend as K from pathlib import Path from shutil import copy import numpy as np
# パッケージのインポート from dual_network import dual_network from self_play import self_play from train_network import train_network from evaluate_network import evaluate_network
# デュアルネットワークの作成 dual_network()
for i inrange(10): print('Train',i,'====================') # セルフプレイ部 self_play()
# パッケージのインポート from game import State from pv_mcts import pv_mcts_action from tensorflow.keras.models import load_model from pathlib import Path from threading import Thread import tkinter as tk from PIL import Image, ImageTk
# ベストプレイヤーのモデルの読み込み model = load_model('./model/best.h5')
# 持ち駒の種類の取得 captures = [] for i inrange(3): if self.state.pieces[12+i] >= 2: captures.append(1+i) if self.state.pieces[12+i] >= 1: captures.append(1+i)
# 駒の移動先を駒の移動方向に変換 defposition_to_direction(self, position_src, position_dst): dx = position_dst%3-position_src%3 dy = int(position_dst/3)-int(position_src/3) for i inrange(8): if self.dxy[i][0] == dx and self.dxy[i][1] == dy: return i return0
駒の描画を行います。
human_play.py
1 2 3 4 5 6
# 駒の描画 defdraw_piece(self, index, first_player, piece_type): x = (index%3)*80 y = int(index/3)*80+40 index = 0if first_player else1 self.c.create_image(x, y, image=self.images[piece_type][index], anchor=tk.NW)
持ち駒の描画を行います。
human_play.py
1 2 3 4 5 6 7 8 9
# 持ち駒の描画 defdraw_capture(self, first_player, pieces): index, x, dx, y = (2, 0, 40, 360) if first_player else (3, 200, -40, 0) captures = [] for i inrange(3): if pieces[12+i] >= 2: captures.append(1+i) if pieces[12+i] >= 1: captures.append(1+i) for i inrange(len(captures)): self.c.create_image(x+dx*i, y, image=self.images[captures[i]][index], anchor=tk.NW)
カーソルの描画を行います。
human_play.py
1 2 3 4 5 6
# カーソルの描画 defdraw_cursor(self, x, y, size): self.c.create_line(x+1, y+1, x+size-1, y+1, width = 4.0, fill = '#FF0000') self.c.create_line(x+1, y+size-1, x+size-1, y+size-1, width = 4.0, fill = '#FF0000') self.c.create_line(x+1, y+1, x+1, y+size-1, width = 4.0, fill = '#FF0000') self.c.create_line(x+size-1, y+1, x+size-1, y+size-1, width = 4.0, fill = '#FF0000')
# 描画の更新 defon_draw(self): # マス目 self.c.delete('all') self.c.create_rectangle(0, 0, 240, 400, width = 0.0, fill = '#EDAA56') for i inrange(1,3): self.c.create_line(i*80+1, 40, i*80, 360, width = 2.0, fill = '#000000') for i inrange(5): self.c.create_line(0, 40+i*80, 240, 40+i*80, width = 2.0, fill = '#000000')
# 駒 for p inrange(12): p0, p1 = (p, 11-p) if self.state.is_first_player() else (11-p, p) if self.state.pieces[p0] != 0: self.draw_piece(p, self.state.is_first_player(), self.state.pieces[p0]) if self.state.enemy_pieces[p1] != 0: self.draw_piece(p, not self.state.is_first_player(), self.state.enemy_pieces[p1])
# 任意のマスが合法手かどうか defis_legal_action_xy(self, x, y, flip=False): # 任意のマスの任意の方向が合法手かどうか defis_legal_action_xy_dxy(x, y, dx, dy): # 1つ目 相手の石 x, y = x+dx, y+dy if y < 0or5 < y or x < 0or5 < x or \ self.enemy_pieces[x+y*6] != 1: returnFalse
# 2つ目以降 for j inrange(6): # 空 if y < 0or5 < y or x < 0or5 < x or \ (self.enemy_pieces[x+y*6] == 0and self.pieces[x+y*6] == 0): returnFalse
# 自分の石 if self.pieces[x+y*6] == 1: # 反転 if flip: for i inrange(6): x, y = x-dx, y-dy if self.pieces[x+y*6] == 1: returnTrue self.pieces[x+y*6] = 1 self.enemy_pieces[x+y*6] = 0 returnTrue # 相手の石 x, y = x+dx, y+dy returnFalse
# 空きなし if self.enemy_pieces[x+y*6] == 1or self.pieces[x+y*6] == 1: returnFalse
# 石を置く if flip: self.pieces[x+y*6] = 1
# 任意の位置が合法手かどうか flag = False for dx, dy in self.dxy: if is_legal_action_xy_dxy(x, y, dx, dy): flag = True return flag
# パッケージのインポート from tensorflow.keras.layers import Activation, Add, BatchNormalization, Conv2D, Dense, GlobalAveragePooling2D, Input from tensorflow.keras.models import Model from tensorflow.keras.regularizers import l2 from tensorflow.keras import backend as K import os
# 残差ブロックの作成 defresidual_block(): deff(x): sc = x x = conv(DN_FILTERS)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = conv(DN_FILTERS)(x) x = BatchNormalization()(x) x = Add()([x, sc]) x = Activation('relu')(x) return x return f
# デュアルネットワークの作成 defdual_network(): # モデル作成済みの場合は無処理 if os.path.exists('./model/best.h5'): return
# 入力層 input = Input(shape=DN_INPUT_SHAPE)
# 畳み込み層 x = conv(DN_FILTERS)(input) x = BatchNormalization()(x) x = Activation('relu')(x)
# 残差ブロック x 16 for i inrange(DN_RESIDUAL_NUM): x = residual_block()(x)
# プーリング層 x = GlobalAveragePooling2D()(x)
# ポリシー出力 p = Dense(DN_OUTPUT_SIZE, kernel_regularizer=l2(0.0005), activation='softmax', name='pi')(x)
# バリュー出力 v = Dense(1, kernel_regularizer=l2(0.0005))(x) v = Activation('tanh', name='v')(v)
# モデルの作成 model = Model(inputs=input, outputs=[p,v])
# パッケージのインポート from game import State from dual_network import DN_INPUT_SHAPE from math import sqrt from tensorflow.keras.models import load_model from pathlib import Path import numpy as np
# 推論 defpredict(model, state): # 推論のための入力テ゛ータのシェイフ゜の変換 a, b, c = DN_INPUT_SHAPE x = np.array([state.pieces, state.enemy_pieces]) x = x.reshape(c, a, b).transpose(1, 2, 0).reshape(1, a, b, c)
# パッケージのインポート from game import State from pv_mcts import pv_mcts_scores from dual_network import DN_OUTPUT_SIZE from datetime import datetime from tensorflow.keras.models import load_model from tensorflow.keras import backend as K from pathlib import Path import numpy as np import pickle import os
# パッケージのインポート from dual_network import DN_INPUT_SHAPE from tensorflow.keras.callbacks import LearningRateScheduler, LambdaCallback from tensorflow.keras.models import load_model from tensorflow.keras import backend as K from pathlib import Path import numpy as np import pickle
# パラメータの準備 RN_EPOCHS = 100# 学習回数
# 学習データの読み込み defload_data(): history_path = sorted(Path('./data').glob('*.history'))[-1] with history_path.open(mode='rb') as f: return pickle.load(f)
# パッケージのインポート from game import State from pv_mcts import pv_mcts_action from tensorflow.keras.models import load_model from tensorflow.keras import backend as K from pathlib import Path from shutil import copy import numpy as np
# パッケージのインポート from dual_network import dual_network from self_play import self_play from train_network import train_network from evaluate_network import evaluate_network
# デュアルネットワークの作成 dual_network()
for i inrange(10): print('Train',i,'====================') # セルフプレイ部 self_play()
# パッケージのインポート from game import State from pv_mcts import pv_mcts_action from tensorflow.keras.models import load_model from pathlib import Path from threading import Thread import tkinter as tk
# ベストプレイヤーのモデルの読み込み model = load_model('./model/best.h5')
# 石の数の取得 defpiece_count(self, pieces): count = 0 for i in pieces: if i == 1: count += 1 return count
負けかどうかを判定します。
game.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
# 負けかどうか defis_lose(self): # 4並びかどうか defis_comp(x, y, dx, dy): for k inrange(4): if y < 0or5 < y or x < 0or6 < x or \ self.enemy_pieces[x+y*7] == 0: returnFalse x, y = x+dx, y+dy returnTrue
# 負けかどうか for j inrange(6): for i inrange(7): if is_comp(i, j, 1, 0) or is_comp(i, j, 0, 1) or \ is_comp(i, j, 1, -1) or is_comp(i, j, 1, 1): returnTrue returnFalse
# パッケージのインポート from tensorflow.keras.layers import Activation, Add, BatchNormalization, Conv2D, Dense, GlobalAveragePooling2D, Input from tensorflow.keras.models import Model from tensorflow.keras.regularizers import l2 from tensorflow.keras import backend as K import os
# 残差ブロックの作成 defresidual_block(): deff(x): sc = x x = conv(DN_FILTERS)(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = conv(DN_FILTERS)(x) x = BatchNormalization()(x) x = Add()([x, sc]) x = Activation('relu')(x) return x return f
# デュアルネットワークの作成 defdual_network(): # モデル作成済みの場合は無処理 if os.path.exists('./model/best.h5'): return
# 入力層 input = Input(shape=DN_INPUT_SHAPE)
# 畳み込み層 x = conv(DN_FILTERS)(input) x = BatchNormalization()(x) x = Activation('relu')(x)
# 残差ブロック x 16 for i inrange(DN_RESIDUAL_NUM): x = residual_block()(x)
# プーリング層 x = GlobalAveragePooling2D()(x)
# ポリシー出力 p = Dense(DN_OUTPUT_SIZE, kernel_regularizer=l2(0.0005), activation='softmax', name='pi')(x)
# バリュー出力 v = Dense(1, kernel_regularizer=l2(0.0005))(x) v = Activation('tanh', name='v')(v)
# モデルの作成 model = Model(inputs=input, outputs=[p,v])
# パッケージのインポート from game import State from dual_network import DN_INPUT_SHAPE from math import sqrt from tensorflow.keras.models import load_model from pathlib import Path import numpy as np
# 推論 defpredict(model, state): # 推論のための入力テ゛ータのシェイフ゜の変換 a, b, c = DN_INPUT_SHAPE x = np.array([state.pieces, state.enemy_pieces]) x = x.reshape(c, a, b).transpose(1, 2, 0).reshape(1, a, b, c)
# パッケージのインポート from game import State from pv_mcts import pv_mcts_scores from dual_network import DN_OUTPUT_SIZE from datetime import datetime from tensorflow.keras.models import load_model from tensorflow.keras import backend as K from pathlib import Path import numpy as np import pickle import os
# パッケージのインポート from dual_network import DN_INPUT_SHAPE from tensorflow.keras.callbacks import LearningRateScheduler, LambdaCallback from tensorflow.keras.models import load_model from tensorflow.keras import backend as K from pathlib import Path import numpy as np import pickle
# パラメータの準備 RN_EPOCHS = 100# 学習回数
# 学習データの読み込み defload_data(): history_path = sorted(Path('./data').glob('*.history'))[-1] with history_path.open(mode='rb') as f: return pickle.load(f)
# パッケージのインポート from game import State from pv_mcts import pv_mcts_action from tensorflow.keras.models import load_model from tensorflow.keras import backend as K from pathlib import Path from shutil import copy import numpy as np
# パッケージのインポート from dual_network import dual_network from self_play import self_play from train_network import train_network from evaluate_network import evaluate_network
# デュアルネットワークの作成 dual_network()
for i inrange(10): print('Train',i,'====================') # セルフプレイ部 self_play()
# パッケージのインポート from game import State from pv_mcts import pv_mcts_action from tensorflow.keras.models import load_model from pathlib import Path from threading import Thread import tkinter as tk
# ベストプレイヤーのモデルの読み込み model = load_model('./model/best.h5')
# 石の描画 defdraw_piece(self, index, first_player): x = (index%7)*40+5 y = int(index/7)*40+5 if first_player: self.c.create_oval(x, y, x+30, y+30, width = 1.0, fill = '#FF0000') else: self.c.create_oval(x, y, x+30, y+30, width = 1.0, fill = '#FFFF00')
描画の更新を行います。すべてのマス目と石を描画します。
human_play.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# 描画の更新 defon_draw(self): self.c.delete('all') self.c.create_rectangle(0, 0, 280, 240, width = 0.0, fill = '#00A0FF') for i inrange(42): x = (i%7)*40+5 y = int(i/7)*40+5 self.c.create_oval(x, y, x+30, y+30, width = 1.0, fill = '#FFFFFF')
for i inrange(42): if self.state.pieces[i] == 1: self.draw_piece(i, self.state.is_first_player()) if self.state.enemy_pieces[i] == 1: self.draw_piece(i, not self.state.is_first_player())
ゲームUIを実行します。
human_play.py
1 2 3 4
# ゲームUIの実行 f = GameUI(model=model) f.pack() f.mainloop()
# パッケージのインポート from game import State from pv_mcts import pv_mcts_action from game import State, random_action, alpha_beta_action, mcts_action from tensorflow.keras.models import load_model from pathlib import Path from threading import Thread import tkinter as tk
学習したベストプレイヤーのモデルを読み込みます。
human_play.py
1 2
# ベストプレイヤーのモデルの読み込み model = load_model('./model/best.h5')
# パッケージのインポート from dual_network import dual_network from self_play import self_play from train_network import train_network from evaluate_network import evaluate_network from evaluate_best_player import evaluate_best_player
# パッケージのインポート from game import State, random_action, alpha_beta_action, mcts_action from pv_mcts import pv_mcts_action from tensorflow.keras.models import load_model from tensorflow.keras import backend as K from pathlib import Path import numpy as np
# パッケージのインポート from game import State from pv_mcts import pv_mcts_action from tensorflow.keras.models import load_model from tensorflow.keras import backend as K from pathlib import Path from shutil import copy import numpy as np
# パッケージのインポート from dual_network import DN_INPUT_SHAPE from tensorflow.keras.callbacks import LearningRateScheduler, LambdaCallback from tensorflow.keras.models import load_model from tensorflow.keras import backend as K from pathlib import Path import numpy as np import pickle
パラメータを定義します。RN_EPOCHSは学習回数を表します。
train_network.py
1 2
# パラメータの準備 RN_EPOCHS = 100# 学習回数
セルフデータ部で保存した学習データ(data/*.history)を読み込む関数を定義します。
train_network.py
1 2 3 4 5
# 学習データの読み込み defload_data(): history_path = sorted(Path('./data').glob('*.history'))[-1] with history_path.open(mode='rb') as f: return pickle.load(f)
# パッケージのインポート from game import State from pv_mcts import pv_mcts_scores from dual_network import DN_OUTPUT_SIZE from datetime import datetime from tensorflow.keras.models import load_model from tensorflow.keras import backend as K from pathlib import Path import numpy as np import pickle import os
# パッケージのインポート from game import State from dual_network import DN_INPUT_SHAPE from math import sqrt from tensorflow.keras.models import load_model from pathlib import Path import numpy as np
# 推論 defpredict(model, state): # 推論のための入力テ゛ータのシェイフ゜の変換 a, b, c = DN_INPUT_SHAPE x = np.array([state.pieces, state.enemy_pieces]) x = x.reshape(c, a, b).transpose(1, 2, 0).reshape(1, a, b, c)