1 Star 0 Fork 8

长挺俊/DQN-9pointgame

forked from lizhigong/DQN-9pointgame 
Create your Gitee Account
Explore and code with more than 12 million developers,Free private repositories !:)
Sign up
This repository doesn't specify license. Please pay attention to the specific project description and its upstream code dependency when using it.
Clone or Download
AnnNet.py 7.45 KB
Copy Edit Raw Blame History
import tensorflow as tf
import numpy as np
import random
import os
import Map
#定义DQN
class DQN():
def __init__(self):
self.n_input = Map.mapsize * Map.mapsize
self.n_output = 1
self.current_q_step = 0
self.avg_loss = 0
self.train_times = 0
self.x = tf.placeholder("float", [None, Map.mapsize, Map.mapsize], name = 'x')
self.y = tf.placeholder("float", [None, self.n_output], name = 'y')
self.create_Q_network()
self.create_training_method()
self.saver = tf.train.Saver()
self.sess = tf.Session()
#self.sess = tf.InteractiveSession()
self.sess.run(tf.initialize_all_variables())
def create_Q_network(self):
wd0 = tf.Variable(tf.random_normal([self.n_input, 512], stddev = 0.1), dtype=tf.float32, name = 'wd0')
wd1 = tf.Variable(tf.random_normal([512, 256], stddev = 0.1), dtype=tf.float32, name = 'wd1')
wd2 = tf.Variable(tf.random_normal([256, 256], stddev = 0.1), dtype=tf.float32, name = 'wd2')
wd3 = tf.Variable(tf.random_normal([256, self.n_output], stddev = 0.1), dtype=tf.float32, name = 'wd3')
bd0 = tf.Variable(tf.random_normal([512], stddev = 0.1), dtype=tf.float32, name = 'bd0')
bd1 = tf.Variable(tf.random_normal([256], stddev = 0.1), dtype=tf.float32, name = 'bd1')
bd2 = tf.Variable(tf.random_normal([256], stddev = 0.1), dtype=tf.float32, name = 'bd2')
bd3 = tf.Variable(tf.random_normal([self.n_output], stddev = 0.1), dtype=tf.float32, name = 'bd3')
weights = {
'wd0' : wd0,
'wd1' : wd1,
'wd2' : wd2,
'wd3' : wd3
}
biases = {
'bd0' : bd0,
'bd1' : bd1,
'bd2' : bd2,
'bd3' : bd3
}
self.Q_value = self.conv_basic(self.x, weights, biases)
self.Q_Weihgts = [weights, biases]
def conv_basic(self, _input, _w, _b):
#input
_out = tf.reshape(_input, shape = [-1, self.n_input])
#fully connected layer0
_out = tf.nn.relu(tf.add(tf.matmul(_out, _w['wd0']), _b['bd0']))
#fully connected layer1
_out = tf.nn.relu(tf.add(tf.matmul(_out, _w['wd1']), _b['bd1']))
#fully connected layer2
_out = tf.nn.relu(tf.add(tf.matmul(_out, _w['wd2']), _b['bd2']))
#fully connected layer3
_out = tf.add(tf.matmul(_out, _w['wd3']), _b['bd3'])
return _out
def copyWeightsToTarget(self):
for i in range(len(self.Q_Weihgts)):
for key,value in self.Q_Weihgts[i].items():
self.sess.run(tf.assign(self.TargetQ_Weights[i][key], self.Q_Weihgts[i][key]))
def create_training_method(self):
#self.cost = tf.reduce_mean(self.LosFunction(logits=self.Q_value, labels=self.y))
self.cost = tf.reduce_sum(tf.squared_difference(self.Q_value,self.y))
self.optm = tf.train.AdamOptimizer(learning_rate = 0.001, name='Adam').minimize(self.cost)
'''
def LosFunction(self, logits, labels):
los = tf.square(logits - labels)
return los
'''
def restore(self):
if os.path.exists('Saver/annsaver.ckpt-0.index'):
self.saver.restore(self.sess, os.path.abspath('Saver/annsaver.ckpt-0'))
#黑棋代表电脑 如果该白旗走的话 用黑白反转棋盘
def computerPlay(self, IsTurnWhite):
board = []
if IsTurnWhite:
board = np.array(Map.blackBoard)
else:
board = np.array(Map.whiteBoard)
boards = []
positions = []
for i in range(Map.mapsize):
for j in range(Map.mapsize):
if board[j][i] == Map.backcode:
predx = np.copy(board)
predx[j][i] = 2
boards.append(predx)
positions.append([i, j])
if len(positions) == 0:
return 0,0,0
nextStep = None
#if Map.AutoPlay == 0:
nextStep = self.sess.run(self.Q_value, feed_dict = {self.x : boards})
#else:
#nextStep = self.sess.run(self.TargetQ_value, feed_dict = {self.x : boards})
#print(nextStep)
maxx = 0
maxy = 0
maxValue = -1000 #实际最大价值 用于后续学习
# maxi = 0
for i in range(len(positions)):
value = nextStep[i]
if value > maxValue:
maxValue = value
maxx = positions[i][0]
maxy = positions[i][1]
# print(boards)
# print(nextStep)
if Map.AutoPlay > 0:
step = random.randint(0, len(positions) - 1)
maxx = positions[step][0]
maxy = positions[step][1]
return maxx, maxy, maxValue
def TrainOnce(self, winner):
board1 = np.array(Map.mapRecords1)
board2 = np.array(Map.mapRecords2)
step1 = np.array(Map.stepRecords1)
step2 = np.array(Map.stepRecords2)
scoreR1 = np.array(Map.scoreRecords1)
scoreR2 = np.array(Map.scoreRecords2)
board1 = np.reshape(board1, [-1, Map.mapsize, Map.mapsize])
board2 = np.reshape(board2, [-1, Map.mapsize, Map.mapsize])
step1 = np.reshape(step1, [-1, Map.mapsize, Map.mapsize])
step2 = np.reshape(step2, [-1, Map.mapsize, Map.mapsize])
score1 = []
score2 = []
board1 = (board1 * (1 - step1)) + step1 * 2
board2 = (board2 * (1 - step2)) + step2 * 2
#每步的价值 = 奖励(胜1负-1其他0) + (-0.95) * 对方棋盘能达到的最大价值(max taget Q)
for i in range(len(board1)):
if i == len(scoreR2):#白方多一步 白方赢
score1.append([1.0]) #获得1分奖励
if winner == 2:
print('error step count!')
else:
score1.append([scoreR2[i] * -0.8])
#score1.append([0])
if winner == 2:
#惩罚败方的最后一步
score1[len(score1) - 1][0] = -0.8
for i in range(len(board2)):
if i == len(scoreR1) - 1:#黑白方步数一样 黑方赢
score2.append([1.0])
if winner == 1:
print('error step count!')
else:
score2.append([scoreR1[i + 1] * -0.8])
#score2.append([0])
if winner == 1:
score2[len(score2) - 1][0] = -0.8
borders = np.concatenate([board1, board2],axis=0)
scores = np.concatenate([score1, score2],axis=0)
_, totalLoss = self.sess.run([self.optm, self.cost], feed_dict = {self.x : borders,
self.y:scores })
self.avg_loss += totalLoss
self.train_times += 1
if Map.AutoPlay % 100 == 0:
print('train avg loss ' + str(self.avg_loss / self.train_times) + ' has times ' + str(Map.AutoPlay))
self.avg_loss = 0
self.train_times = 0
if Map.AutoPlay == 0:
self.saver.save(self.sess, os.path.abspath('Saver/annsaver.ckpt'), global_step = 0)
else:
self.saver.save(self.sess, os.path.abspath('Saver/annsaver.ckpt'), global_step = (Map.AutoPlay - 1) // 100)
def PlayWidthHuman(self):
self.restore()
Map.PlayWithComputer = self.computerPlay
Map.TrainNet = self.TrainOnce
Map.ShowWind()
if __name__ == '__main__':
dqn = DQN()
dqn.PlayWidthHuman()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/tingjunzhang/DQN-9pointgame.git
git@gitee.com:tingjunzhang/DQN-9pointgame.git
tingjunzhang
DQN-9pointgame
DQN-9pointgame
master

Search