代码拉取完成,页面将自动刷新
import sys
import gym
import envs.airsim
from baselines import deepq
def callback(lcl, glb):
# stop training if reward exceeds 199999
is_solved = lcl['t'] > 100 and sum(lcl['episode_rewards'][-101:-1]) / 100 >= 199999
return is_solved
def main():
env = gym.make("AirSimCarEnv-v0")
model = deepq.models.mlp([64], layer_norm=True)
print("\n======= Training session starts for DQN Car =======")
act = deepq.learn(
env,
q_func=model,
lr=1e-3,
max_timesteps=100000,
buffer_size=50000,
exploration_fraction=1.0, #0.1,
exploration_final_eps=0.02,
print_freq=10,
param_noise=True,
checkpoint_freq=2,
learning_starts=5,
callback=callback
)
trainedModel = "car.pkl"
print("\nSaving model to", trainedModel)
act.save(trainedModel)
if __name__ == '__main__':
main()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。