AnyTrading - イーサリアム投資を強化学習で実行　学習アルゴリズムACKTR（４番目） - 全勝モデルの再検証

1月 29, 2021

1月17日の記事にて学習アルゴリズムACKTRでイーサリアムの学習済みモデルを１０種類作成しました。

そのうちの４番目の学習済みモデルに対して、検証したところ全勝だったので少し条件を変えて再検証してみます。

ソース

前回までの検証では、データ参照位置を５０日分ずらしながら検証していましたが、今回は２０日ずつ移動してみます。

データ参照位置を変えることで、試行回数を増やすことと、別パターンでのデータで投資結果がどのように変わるのかを確認することが目的です。

ソースは下記の通りです。

[ソース]

import os, gym
import datetime
import gym_anytrading
import matplotlib.pyplot as plt
from gym_anytrading.envs import TradingEnv, ForexEnv, StocksEnv, Actions, Positions
from gym_anytrading.datasets import FOREX_EURUSD_1H_ASK, STOCKS_GOOGL
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2
from stable_baselines import ACKTR
from stable_baselines.bench import Monitor
from stable_baselines.common import set_global_seeds

import numpy as np
import matplotlib.pyplot as plt

# 勝敗をカウントする
def count(lst):
    cnt_win = 0
    cnt_lose = 0
    cnt_draw = 0
    for x in lst:
        if x == 0:
            cnt_draw += 1
        elif x > 0:
            cnt_win += 1
        else:
            cnt_lose += 1

    return cnt_win, cnt_lose, cnt_draw

def simulation(i, prm):
    global means
    # ログフォルダの生成
    log_dir = './logs/'
    os.makedirs(log_dir, exist_ok=True)
    # 環境の生成
    env = gym.make('forex-v0', frame_bound=(prm['start_idx'],
                                            prm['end_idx']),
                                            window_size = prm['window_size'])
    env = Monitor(env, log_dir, allow_early_resets=True)
    # シードの指定
    env.seed(0)
    set_global_seeds(0)
    # ベクトル化環境の生成
    env = DummyVecEnv([lambda: env])
    # モデルの読み込み
    # model = PPO2.load('model{}'.format(i))
    model = ACKTR.load('model{}'.format(i))
    # モデルのテスト
    env = gym.make('forex-v0', frame_bound=(prm['start_idx'] + prm['move_idx'],
                                            prm['end_idx']   + prm['move_idx']),
                                            window_size = prm['window_size'])
    env.seed(0)
    state = env.reset()
    while True:
        # 行動の取得
        action, _ = model.predict(state)    # 0 or 1
        # 1ステップ実行
        state, reward, done, info = env.step(action)
        # エピソード完了
        if done:
            print('info:', info, info['total_reward'])  # info: {'total_reward': 8610370000.0, 'total_profit': 1.7844206334206751, 'position': 1} 8610370000.0
            means.append(info['total_reward'])
            break
    # グラフのプロット
    plt.cla()
    env.render_all()

cnt_win = 0
cnt_lose = 0
cnt_draw = 0
for move_idx in range(0, 1001, 20):
    labels = []
    means = []
    prm = {'window_size':    10,      #window_size 参照すべき直前のデータ数
           'start_idx'  :    10,      #start_idx 学習データの開始位置
           'end_idx'    :  1010,      #end_idx 学習データの終了位置
           'move_idx'   :   move_idx} #学習データからの移動分。移動したものを検証データとする。
    for i in range(30):
        labels.append('{}'.format(i))
        simulation(4, prm)

    x = np.arange(len(labels))
    width = 0.35

    fig, ax = plt.subplots()

    rect = ax.bar(x, means, width)
    ax.set_xticks(x)
    ax.set_xticklabels(labels)

    cnt = count(means)
    plt.title('[Average]{:,.0f}  [Win]{} [Lose]{} [Draw]{}'.format(np.average(means), cnt[0], cnt[1], cnt[2]))

    plt.savefig('trading{:03d}.png'.format(move_idx))

    if cnt[0] == cnt[1]:
        cnt_draw += 1
    elif cnt[0] > cnt[1]:
        cnt_win += 1
    else:
        cnt_lose += 1

print('{}勝 {}敗 {}分'.format(cnt_win, cnt_lose, cnt_draw))