代码拉取完成,页面将自动刷新
import numpy as np
import mindspore.numpy as mnp
from mindspore import Tensor
class Agent:
def __init__(self, maze, memory_buffer, use_softmax=True):
"""智能体初始化"""
self.env = maze
self.buffer = memory_buffer
self.num_act = 4
self.use_softmax = use_softmax
self.total_reward = 0
self.min_reward = -self.env.maze.size
self.isgameon = True
def make_a_move(self, net, epsilon, device='cpu'):
"""执行动作"""
action = self.select_action(net, epsilon, device)
current_state = self.env.state()
next_state, reward, self.isgameon = self.env.state_update(action)
self.total_reward += reward
if self.total_reward < self.min_reward:
self.isgameon = False
if not self.isgameon:
self.total_reward = 0
transition = Transition(current_state, action, next_state, reward, self.isgameon)
self.buffer.push(transition)
def select_action(self, net, epsilon, device='cpu'):
"""选择动作"""
state = Tensor(self.env.state(), mindspore.float32).reshape(1, -1).to(device)
qvalues = net(state).asnumpy().squeeze()
if self.use_softmax:
p = mnp.softmax(qvalues / epsilon).squeeze()
p /= np.sum(p)
action = np.random.choice(self.num_act, p=p)
else:
if np.random.random() < epsilon:
action = np.random.randint(self.num_act, size=1)[0]
else:
action = int(np.argmax(qvalues, axis=0))
return action
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。