Fetch the repository succeeded.
This action will force synchronization from lizhigong/DQN-9pointgame, which will overwrite any changes that you have made since you forked the repository, and can not be recovered!!!
Synchronous operation will process in the background and will refresh the page when finishing processing. Please be patient.
import tensorflow as tf
import numpy as np
import random
import os
import Map
#定义DQN
class DQN():
def __init__(self):
self.n_input = Map.mapsize * Map.mapsize
self.n_output = 1
self.current_q_step = 0
self.avg_loss = 0
self.train_times = 0
self.x = tf.placeholder("float", [None, Map.mapsize, Map.mapsize], name = 'x')
self.y = tf.placeholder("float", [None, self.n_output], name = 'y')
self.create_Q_network()
self.create_training_method()
self.saver = tf.train.Saver()
self.sess = tf.Session()
#self.sess = tf.InteractiveSession()
self.sess.run(tf.initialize_all_variables())
def create_Q_network(self):
wd0 = tf.Variable(tf.random_normal([self.n_input, 512], stddev = 0.1), dtype=tf.float32, name = 'wd0')
wd1 = tf.Variable(tf.random_normal([512, 256], stddev = 0.1), dtype=tf.float32, name = 'wd1')
wd2 = tf.Variable(tf.random_normal([256, 256], stddev = 0.1), dtype=tf.float32, name = 'wd2')
wd3 = tf.Variable(tf.random_normal([256, self.n_output], stddev = 0.1), dtype=tf.float32, name = 'wd3')
bd0 = tf.Variable(tf.random_normal([512], stddev = 0.1), dtype=tf.float32, name = 'bd0')
bd1 = tf.Variable(tf.random_normal([256], stddev = 0.1), dtype=tf.float32, name = 'bd1')
bd2 = tf.Variable(tf.random_normal([256], stddev = 0.1), dtype=tf.float32, name = 'bd2')
bd3 = tf.Variable(tf.random_normal([self.n_output], stddev = 0.1), dtype=tf.float32, name = 'bd3')
weights = {
'wd0' : wd0,
'wd1' : wd1,
'wd2' : wd2,
'wd3' : wd3
}
biases = {
'bd0' : bd0,
'bd1' : bd1,
'bd2' : bd2,
'bd3' : bd3
}
self.Q_value = self.conv_basic(self.x, weights, biases)
self.Q_Weihgts = [weights, biases]
def conv_basic(self, _input, _w, _b):
#input
_out = tf.reshape(_input, shape = [-1, self.n_input])
#fully connected layer0
_out = tf.nn.relu(tf.add(tf.matmul(_out, _w['wd0']), _b['bd0']))
#fully connected layer1
_out = tf.nn.relu(tf.add(tf.matmul(_out, _w['wd1']), _b['bd1']))
#fully connected layer2
_out = tf.nn.relu(tf.add(tf.matmul(_out, _w['wd2']), _b['bd2']))
#fully connected layer3
_out = tf.add(tf.matmul(_out, _w['wd3']), _b['bd3'])
return _out
def copyWeightsToTarget(self):
for i in range(len(self.Q_Weihgts)):
for key,value in self.Q_Weihgts[i].items():
self.sess.run(tf.assign(self.TargetQ_Weights[i][key], self.Q_Weihgts[i][key]))
def create_training_method(self):
#self.cost = tf.reduce_mean(self.LosFunction(logits=self.Q_value, labels=self.y))
self.cost = tf.reduce_sum(tf.squared_difference(self.Q_value,self.y))
self.optm = tf.train.AdamOptimizer(learning_rate = 0.001, name='Adam').minimize(self.cost)
'''
def LosFunction(self, logits, labels):
los = tf.square(logits - labels)
return los
'''
def restore(self):
if os.path.exists('Saver/annsaver.ckpt-0.index'):
self.saver.restore(self.sess, os.path.abspath('Saver/annsaver.ckpt-0'))
#黑棋代表电脑 如果该白旗走的话 用黑白反转棋盘
def computerPlay(self, IsTurnWhite):
board = []
if IsTurnWhite:
board = np.array(Map.blackBoard)
else:
board = np.array(Map.whiteBoard)
boards = []
positions = []
for i in range(Map.mapsize):
for j in range(Map.mapsize):
if board[j][i] == Map.backcode:
predx = np.copy(board)
predx[j][i] = 2
boards.append(predx)
positions.append([i, j])
if len(positions) == 0:
return 0,0,0
nextStep = None
#if Map.AutoPlay == 0:
nextStep = self.sess.run(self.Q_value, feed_dict = {self.x : boards})
#else:
#nextStep = self.sess.run(self.TargetQ_value, feed_dict = {self.x : boards})
#print(nextStep)
maxx = 0
maxy = 0
maxValue = -1000 #实际最大价值 用于后续学习
# maxi = 0
for i in range(len(positions)):
value = nextStep[i]
if value > maxValue:
maxValue = value
maxx = positions[i][0]
maxy = positions[i][1]
# print(boards)
# print(nextStep)
if Map.AutoPlay > 0:
step = random.randint(0, len(positions) - 1)
maxx = positions[step][0]
maxy = positions[step][1]
return maxx, maxy, maxValue
def TrainOnce(self, winner):
board1 = np.array(Map.mapRecords1)
board2 = np.array(Map.mapRecords2)
step1 = np.array(Map.stepRecords1)
step2 = np.array(Map.stepRecords2)
scoreR1 = np.array(Map.scoreRecords1)
scoreR2 = np.array(Map.scoreRecords2)
board1 = np.reshape(board1, [-1, Map.mapsize, Map.mapsize])
board2 = np.reshape(board2, [-1, Map.mapsize, Map.mapsize])
step1 = np.reshape(step1, [-1, Map.mapsize, Map.mapsize])
step2 = np.reshape(step2, [-1, Map.mapsize, Map.mapsize])
score1 = []
score2 = []
board1 = (board1 * (1 - step1)) + step1 * 2
board2 = (board2 * (1 - step2)) + step2 * 2
#每步的价值 = 奖励(胜1负-1其他0) + (-0.95) * 对方棋盘能达到的最大价值(max taget Q)
for i in range(len(board1)):
if i == len(scoreR2):#白方多一步 白方赢
score1.append([1.0]) #获得1分奖励
if winner == 2:
print('error step count!')
else:
score1.append([scoreR2[i] * -0.8])
#score1.append([0])
if winner == 2:
#惩罚败方的最后一步
score1[len(score1) - 1][0] = -0.8
for i in range(len(board2)):
if i == len(scoreR1) - 1:#黑白方步数一样 黑方赢
score2.append([1.0])
if winner == 1:
print('error step count!')
else:
score2.append([scoreR1[i + 1] * -0.8])
#score2.append([0])
if winner == 1:
score2[len(score2) - 1][0] = -0.8
borders = np.concatenate([board1, board2],axis=0)
scores = np.concatenate([score1, score2],axis=0)
_, totalLoss = self.sess.run([self.optm, self.cost], feed_dict = {self.x : borders,
self.y:scores })
self.avg_loss += totalLoss
self.train_times += 1
if Map.AutoPlay % 100 == 0:
print('train avg loss ' + str(self.avg_loss / self.train_times) + ' has times ' + str(Map.AutoPlay))
self.avg_loss = 0
self.train_times = 0
if Map.AutoPlay == 0:
self.saver.save(self.sess, os.path.abspath('Saver/annsaver.ckpt'), global_step = 0)
else:
self.saver.save(self.sess, os.path.abspath('Saver/annsaver.ckpt'), global_step = (Map.AutoPlay - 1) // 100)
def PlayWidthHuman(self):
self.restore()
Map.PlayWithComputer = self.computerPlay
Map.TrainNet = self.TrainOnce
Map.ShowWind()
if __name__ == '__main__':
dqn = DQN()
dqn.PlayWidthHuman()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。