代码拉取完成,页面将自动刷新
from demo import Demonstration, load_all_demos
from utils import ACTION_TO_ID
from trainer import Trainer
from tqdm import tqdm
import os
import argparse
import numpy as np
import torch
if __name__ == '__main__':
# args
parser = argparse.ArgumentParser()
parser.add_argument('-m', '--base_model', required=True)
parser.add_argument('-d', '--demo_dir', required=True, help='path to dir with demos')
parser.add_argument('-i', '--iterations', default=333, type=int, help='how many training steps')
parser.add_argument('-s', '--seed', default=1234, type=int)
parser.add_argument('-t', '--task_type', default='stack', help='stack/row/unstack/vertical_square')
parser.add_argument('-o', '--out_dir', default=None, help='where to write finetuned model, WILL NOT SAVE IF BLANK')
parser.add_argument('-l', '--learning_rate', default=1e-5, help="What learning rate to use?")
parser.add_argument('--trial_reward', default=False, action='store_true', help='use trial reward?')
parser.add_argument('--future_reward_discount', dest='future_reward_discount', type=float, action='store', default=0.65)
args = parser.parse_args()
# define workspace_limits (Cols: min max, Rows: x y z (define workspace limits in robot coordinates))
workspace_limits = np.asarray([[-0.724, -0.276], [-0.224, 0.224], [-0.0001, 0.5]])
# seed np.random
np.random.seed(args.seed)
# first load the demo(s)
demos = load_all_demos(args.demo_dir, check_z_height=False,
task_type=args.task_type)
num_demos = len(demos)
# now load the trainer
model_path = os.path.abspath(args.base_model)
place_common_sense = (args.task_type != 'unstack')
if args.task_type == 'stack':
place_dilation = 0.00
else:
place_dilation = 0.05
trainer = Trainer(method='reinforcement', push_rewards=False,
future_reward_discount=0.65, is_testing=False, snapshot_file=args.base_model,
force_cpu=False, goal_condition_len=0, place=True, pretrained=True,
flops=False, network='densenet', common_sense=True,
place_common_sense=place_common_sense, show_heightmap=False,
place_dilation=place_dilation, common_sense_backprop=True,
trial_reward='spot', num_dilation=0, lr=args.learning_rate)
print('train_offline.py assumes all demos are optimal with perfect action choices and exactly 6 steps long. Update the code if this is not the case.')
# next compute the rewards for the trial (all steps successful)
prog_rewards = np.array([1.0, 2.0, 2.0, 3.0, 3.0, 4.0])
print('Demo progress rewards: ' + str(prog_rewards))
# compute trial_rewards
trainer.clearance_log = [[6]]
trainer.reward_value_log = prog_rewards[:, None]
trainer.trial_reward_value_log_update()
print('Demo trial rewards: ' + str(trainer.trial_reward_value_log))
if args.trial_reward:
print('Fine tuning ' + args.task_type + ' task with Trial Reward')
else:
print('Fine tuning ' + args.task_type + ' task with Progress Reward')
# store losses, checkpoint model every 25 iterations
losses = []
models = {} # dict {iter: model_weights}
for i in tqdm(range(args.iterations)):
# generate random number between 0 and 1, and another between 1 and 3 (inclusive)
demo_num = np.random.randint(0, 2)
progress = np.random.randint(1, 4)
action_str = ['grasp', 'place'][np.random.randint(0, 2)]
# get imgs
d = demos[demo_num]
color_heightmap, valid_depth_heightmap = d.get_heightmaps(action_str,
d.action_dict[progress][action_str + '_image_ind'], use_hist=True)
# get action info
action_vec = d.action_dict[progress][ACTION_TO_ID[action_str]]
# convert rotation angle to index
best_rot_ind = np.around((np.rad2deg(action_vec[-2]) % 360) * 16 / 360).astype(int)
# convert robot coordinates to pixel
workspace_pixel_offset = workspace_limits[:2, 0] * -1 * 1000
# need to index with (y, x) downstream, so swap order in best_pix_ind
best_action_xy = ((workspace_pixel_offset + 1000 * action_vec[:2]) / 2).astype(int)
best_pix_ind = [best_rot_ind, best_action_xy[1], best_action_xy[0]]
# get next set of heightmaps for reward computation
if action_str == 'grasp':
next_action_str = 'place'
next_progress = progress
else:
next_action_str = 'grasp'
next_progress = progress + 1
# if we finished trial, set next action str to 'end'
if next_progress > 3:
next_action_str = 'end'
# get next set of heightmaps then compute reward
next_color_heightmap, next_depth_heightmap = d.get_heightmaps(next_action_str,
d.action_dict[next_progress][action_str + '_image_ind'], use_hist=True)
# compute reward
grasp_success = (action_str == 'grasp')
place_success = (action_str == 'place')
# multiplier is progress for grasp, progress + 1 (next_progress) for place
multiplier = progress if grasp_success else next_progress
if not args.trial_reward:
reward, old_reward = trainer.get_label_value(action_str, push_success=False,
grasp_success=grasp_success, change_detected=True, prev_push_predictions=None,
prev_grasp_predictions=None, next_color_heightmap=next_color_heightmap,
next_depth_heightmap=next_depth_heightmap, color_success=None,
goal_condition=None, place_success=place_success, prev_place_predictions=None,
reward_multiplier=multiplier)
else:
# index into trial_rewards to get reward
# e.g. if progress is 1, grasp reward is trial_rewards[0]
reward = trainer.trial_reward_value_log[(progress - 1) * 2 + ACTION_TO_ID[action_str] - 1]
# training step
loss = trainer.backprop(color_heightmap, valid_depth_heightmap, action_str,
best_pix_ind, reward, return_loss=True, silent=True)
losses.append(loss.detach().cpu().data.numpy())
# checkpoint
if (i + 1) % 25 == 0:
models[i] = trainer.model.state_dict()
# get model with lowest + most stable loss
min_loss = np.max(losses)
best_model_ind = None
for i in range(24, len(losses), 25):
# avg loss across 5 consecutive steps
avg_loss = np.mean(losses[i-2:min(i+3, len(losses))])
if avg_loss < min_loss:
min_loss = avg_loss
best_model_ind = i
# create filenames and save best model
if 'row' in args.base_model:
base_name = 'row'
elif 'unstack' in args.base_model:
base_name = 'unstack'
elif 'stack' in args.base_model:
base_name = 'stack'
else:
base_name = 'vertical_square'
model_name = '_'.join(['base', base_name, 'finetune', args.task_type])
if args.out_dir is not None:
print("Finetuned", base_name, "model on", args.task_type)
print("Saving model at", best_model_ind + 1, "iterations with loss of", str(min_loss) + "...")
if not os.path.exists(args.out_dir):
os.makedirs(args.out_dir)
torch.save(models[best_model_ind], os.path.join(args.out_dir, model_name + '.pth'))
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。