代码拉取完成,页面将自动刷新
# +-----------------------------------------------+
# | RL-ROBOT. Reinforcement Learning for Robotics |
# | Angel Martinez-Tenor |
# | MAPIR. University of Malaga. 2016 |
# +-----------------------------------------------+
""" SARSA algorithm """
import time
import numpy as np
import agent
import exp
import lp
ID = "S"
CAPTION = "SARSA"
def setup():
"""Setup algorithm"""
pass # no needed here
def execute():
"""Execute the learning algorithm"""
s = lp.s
a = lp.a
alpha = lp.alpha
q = lp.q
v = lp.v
policy = lp.policy
agent.execute_action(a)
time.sleep(lp.step_time)
sp = agent.observe_state()
r = agent.obtain_reward(s, a, sp)
ap = agent.select_action(sp)
# update Q
delta = r + exp.GAMMA * q[sp, ap] - q[s, a] # TD error (SARSA)
q[s, a] = q[s, a] + alpha * delta # update rule
# Update V and Policy
v[s] = np.max(q[s])
policy[s] = np.argmax(q[s])
lp.s = s
lp.a = a
lp.sp = sp
lp.ap = ap
lp.r = r
lp.alpha = alpha
lp.q = q
lp.v = v
lp.policy = policy
lp.delta = delta
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。