1 Star 0 Fork 0

gvraky/yolo_compression

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
kd_gan.py 21.13 KB
一键复制 编辑 原始数据 按行查看 历史
# Based on
# https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9046859
import torch.distributed as dist
import test # import test.py to get mAP after each epoch
from models import *
from utils.datasets import *
from utils.utils import *
from utils.my_utils import create_kd_argparser, create_config, create_scheduler, create_optimizer, initialize_model, create_dataloaders, load_kd_checkpoints
from utils.pruning import create_mask_LTH, apply_mask_LTH
mixed_precision = True
try: # Mixed precision training https://github.com/NVIDIA/apex
from apex import amp
except:
mixed_precision = False # not installed
ft = torch.cuda.FloatTensor
def train():
data = config['data']
img_size, img_size_test = config['img_size'] if len(config['img_size']) == 2 else config['img_size'] * 2 # train, test sizes
epochs = config['epochs'] # 500200 batches at bs 64, 117263 images = 273 epochs
batch_size = config['batch_size']
accumulate = config['accumulate'] # effective bs = batch_size * accumulate = 16 * 4 = 64
# Initialize
init_seeds(config['seed'])
if config['multi_scale']:
img_sz_min = round(img_size / 32 / 1.5)
img_sz_max = round(img_size / 32 * 1.5)
img_size = img_sz_max * 32 # initiate with maximum multi_scale size
print('Using multi-scale %g - %g' % (img_sz_min * 32, img_size))
# Configure run
data_dict = parse_data_cfg(data)
nc = int(data_dict['classes']) # number of classes
# Initialize Teacher
if config['teacher_darknet'] == 'default':
teacher = Darknet(cfg=config['teacher_cfg'], arc=config['teacher_arc']).to(device)
elif config['teacher_darknet'] == 'soft':
teacher = SoftDarknet(cfg=config['teacher_cfg'], arc=config['teacher_arc']).to(device)
# Initialize Student
if config['student_darknet'] == 'default':
if 'nano' in config['student_cfg']:
print('Using a YOLO Nano arc')
student = YOLO_Nano(config['student_cfg']).to(device)
else: student = Darknet(cfg=config['student_cfg']).to(device)
elif config['student_darknet'] == 'soft':
student = SoftDarknet(cfg=config['student_cfg'], arc=config['student_arc']).to(device)
# Create Discriminators
D_models = None
if len(config['teacher_indexes']):
D_models = Discriminator(config['teacher_indexes'], teacher, config['D_kernel_size']).to(device)
G_optim = create_optimizer(student, config)
D_optim = create_optimizer(D_models, config, is_D=True)
GAN_criterion = torch.nn.BCELoss()
mask = None
if ('mask' in config and config['mask']) or ('mask_path' in config and config['mask_path']):
print('Creating mask')
mask = create_mask_LTH(teacher).to(device)
start_epoch, best_fitness, teacher, student, mask, D_models, G_optim, D_optim = load_kd_checkpoints(
config,
teacher, student,
mask, D_models,
G_optim, D_optim, device
)
if mask is not None:
print('Applying mask in teacher')
apply_mask_LTH(teacher, mask)
del mask
torch.cuda.empty_cache()
if config['xavier_norm']:
initialize_model(student, torch.nn.init.xavier_normal_)
elif config['xavier_uniform']:
initialize_model(student, torch.nn.init.xavier_uniform_)
G_scheduler = create_scheduler(config, G_optim, start_epoch)
D_scheduler = create_scheduler(config, D_optim, start_epoch)
# Mixed precision training https://github.com/NVIDIA/apex
if mixed_precision:
student, G_optim = amp.initialize(student, G_scheduler, opt_level='O1', verbosity=0)
# Initialize distributed training
if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available():
dist.init_process_group(backend='nccl', # 'distributed backend'
init_method='tcp://127.0.0.1:9999', # distributed training init method
world_size=1, # number of nodes for distributed training
rank=0) # distributed training node rank
teacher = torch.nn.parallel.DistributedDataParallel(teacher, find_unused_parameters=True)
teacher.yolo_layers = teacher.module.yolo_layers # move yolo layer indices to top level
student = torch.nn.parallel.DistributedDataParallel(student, find_unused_parameters=True)
student.yolo_layers = student.module.yolo_layers # move yolo layer indices to top level
trainloader, validloader = create_dataloaders(config)
# Start training
nb = len(trainloader)
prebias = start_epoch == 0
student.nc = nc # attach number of classes to student
teacher.nc = nc
student.arc = config['student_arc'] # attach yolo architecture
teacher.arc = config['teacher_arc']
student.hyp = config['hyp'] # attach hyperparameters to student
teacher.hyp = config['hyp']
student.class_weights = labels_to_class_weights(trainloader.dataset.labels, nc).to(device) # attach class weights
teacher.class_weights = student.class_weights
maps = np.zeros(nc) # mAP per class
# torch.autograd.set_detect_anomaly(True)
results = (0, 0, 0, 0, 0, 0, 0) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
t0 = time.time()
torch_utils.model_info(student, report='summary') # 'full' or 'summary'
print('Starting training for %g epochs...' % epochs)
teacher.train()
max_wo_best = 0
###############
# Start epoch #
###############
for epoch in range(start_epoch, epochs):
student.train()
student.gr = 1 - (1 + math.cos(min(epoch * 2, epochs) * math.pi / epochs)) / 2 # GIoU <-> 1.0 loss ratio
# Prebias
if prebias:
ne = max(round(30 / nb), 3) # number of prebias epochs
ps = np.interp(epoch, [0, ne], [0.1, config['hyp']['lr0'] * 2]), \
np.interp(epoch, [0, ne], [0.9, config['hyp']['momentum']]) # prebias settings (lr=0.1, momentum=0.9)
if epoch == ne:
print_model_biases(student)
prebias = False
# Bias optimizer settings
G_optim.param_groups[2]['lr'] = ps[0]
if G_optim.param_groups[2].get('momentum') is not None: # for SGD but not Adam
G_optim.param_groups[2]['momentum'] = ps[1]
# Update image weights (optional)
if trainloader.dataset.image_weights:
w = student.class_weights.cpu().numpy() * (1 - maps) ** 2 # class weights
image_weights = labels_to_image_weights(trainloader.dataset.labels, nc=nc, class_weights=w)
trainloader.dataset.indices = random.choices(range(trainloader.dataset.n), weights=image_weights, k=trainloader.dataset.n) # rand weighted idx
mloss = torch.zeros(9).to(device) # mean losses
print(('\n' + '%10s' * 13) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'G_loss', 'D_loss', 'D_x', 'D_g_z1', 'D_g_z2', 'total', 'targets', 'img_size'))
pbar = tqdm(enumerate(trainloader), total=nb) # progress bar
####################
# Start mini-batch #
####################
for i, (imgs, targets, paths, _) in pbar:
real_data_label = torch.ones(imgs.shape[0], device=device)
fake_data_label = torch.zeros(imgs.shape[0], device=device)
ni = i + nb * epoch # number integrated batches (since train start)
imgs = imgs.to(device).float() / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0
targets = targets.to(device)
# Plot images with bounding boxes
if ni < 1:
f = config['sub_working_dir'] + 'train_batch%g.png' % i # filename
plot_images(imgs=imgs, targets=targets, paths=paths, fname=f)
if tb_writer:
tb_writer.add_image(f, cv2.imread(f)[:, :, ::-1], dataformats='HWC')
# Multi-Scale training
if config['multi_scale']:
if ni / accumulate % 1 == 0: # adjust img_size (67% - 150%) every 1 batch
img_size = random.randrange(img_sz_min, img_sz_max + 1) * 32
sf = img_size / max(imgs.shape[2:]) # scale factor
if sf != 1:
ns = [math.ceil(x * sf / 32.) * 32 for x in imgs.shape[2:]] # new shape (stretched to 32-multiple)
imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
# Run student
if len(config['student_indexes']) and epoch < config['second_stage']:
pred_std, fts_std = student(imgs, config['student_indexes'])
if 'nano' in config['student_cfg']: # YOLO Nano outputs in the reversed order
fts_std.reverse()
else: pred_std = student(imgs)
###################################################
# Update D: maximize log(D(x)) + log(1 - D(G(z))) #
###################################################
D_loss_real, D_loss_fake, D_x, D_g_z1 = ft([.0]), ft([.0]), ft([.0]), ft([.0])
if epoch < config['second_stage']:
# Run teacher
with torch.no_grad():
_, fts_tch = teacher(imgs, config['teacher_indexes'])
# Discriminate the real data
real_data_discrimination = D_models(fts_tch)
for output in real_data_discrimination: D_x += output.mean().item() /3.
# Discriminate the fake data
fake_data_discrimination = D_models([x.detach() for x in fts_std])
for output in fake_data_discrimination: D_g_z1 += output.mean().item() / 3.
# Compute loss
for x in real_data_discrimination:
D_loss_real += GAN_criterion(x, real_data_label)
for x in fake_data_discrimination:
D_loss_fake += GAN_criterion(x, fake_data_label)
# Scale loss by nominal batch_size of 64
D_loss_real *= batch_size / 64
D_loss_fake *= batch_size / 64
# Compute gradient
D_loss_real.backward()
D_loss_fake.backward()
# Optimize accumulated gradient
if ni % accumulate == 0:
D_optim.step()
D_optim.zero_grad()
###################################
# Update G: maximize log(D(G(z))) #
###################################
G_loss, D_g_z2 = ft([.0]), ft([.0])
if epoch < config['second_stage']:
# Since we already update D, perform another forward with fake batch through D
fake_data_discrimination = D_models(fts_std)
for output in fake_data_discrimination: D_g_z2 += output.mean().item() /3.
# Compute loss
for x in fake_data_discrimination:
G_loss += GAN_criterion(x, real_data_label) # fake labels are real for generator cost
obj_detec_loss, loss_items = ft([.0]), ft([.0, .0, .0, .0])
# Scale loss by nominal batch_size of 64
G_loss *= batch_size / 64
# Compute gradient
G_loss.backward()
else:
# Compute loss
obj_detec_loss, loss_items = compute_loss(pred_std, targets, student)
# Scale loss by nominal batch_size of 64
obj_detec_loss *= batch_size / 64
# Compute gradient
obj_detec_loss.backward()
# Optimize accumulated gradient
if ni % accumulate == 0:
G_optim.step()
G_optim.zero_grad()
D_loss = D_loss_real + D_loss_fake
total_loss = obj_detec_loss + D_loss + G_loss + obj_detec_loss
all_losses = torch.cat( [loss_items[:3], G_loss, D_loss, D_x, D_g_z1, D_g_z2, total_loss] ).detach()
if not torch.isfinite(total_loss):
print('WARNING: non-finite loss, ending training ', all_losses)
return results
# Print batch results
mloss = (mloss * i + all_losses) / (i + 1) # update mean losses
mem = '%.3gG' % (torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0) # (GB)
s = ('%10s' * 2 + '%10.3g' * 11) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, len(targets), img_size)
pbar.set_description(s)
##################
# End mini-batch #
##################
# Update scheduler
G_scheduler.step()
D_scheduler.step()
final_epoch = epoch + 1 == epochs
if not config['notest'] or final_epoch: # Calculate mAP
is_coco = any([x in data for x in ['coco.data', 'coco2014.data', 'coco2017.data']]) and student.nc == 80
thres = .1
while True:
try:
results, maps = test.test(
cfg = config['cfg'], data = data, batch_size=1,
img_size=img_size_test, model=student,
conf_thres=thres if epoch < config['second_stage'] else 0.001,
iou_thres=0.6, save_json=final_epoch and is_coco, single_cls=config['single_cls'],
dataloader=None, folder = config['sub_working_dir']
)
break
except:
thres += .1
# Write epoch results
with open(config['results_file'], 'a') as f:
f.write(s + '%10.3g' * 7 % results + '\n') # P, R, mAP, F1, test_losses=(GIoU, obj, cls)
if len(config['name']) and config['bucket']:
os.system('gsutil cp results.txt gs://%s/results/results%s.txt' % (config['bucket'], config['name']))
# Write Tensorboard results
if tb_writer:
x = list(mloss) + list(results)
titles = [
'GIoU', 'Objectness', 'Classification', 'Generator Loss', 'Discriminator Loss',
'D_x', 'D_g_z1', 'D_g_z2' 'Train Loss',
'Precision', 'Recall', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
]
for xi, title in zip(x, titles):
tb_writer.add_scalar(title, xi, epoch)
# Update best mAP
fi = fitness(np.array(results).reshape(1, -1)) # fitness_i = weighted combination of [P, R, mAP, F1]
if fi > best_fitness:
best_fitness = fi
max_wo_best = 0
else:
max_wo_best += 1
if config['early_stop'] and max_wo_best == config['early_stop']: print('Ending training due to early stop')
# Save training results
save = (not config['nosave']) or (final_epoch and not config['evolve'])
if save:
with open(config['results_file'], 'r') as f:
# Create checkpoint
chkpt = {
'epoch': epoch,
'best_fitness': best_fitness,
'training_results': f.read(),
'model': student.module.state_dict() if type(student) is nn.parallel.DistributedDataParallel
else student.state_dict(),
'D': D_models.state_dict(),
'G_optim': None if final_epoch else G_optim.state_dict(),
'D_optim': None if final_epoch else D_optim.state_dict()
}
# Save last checkpoint
torch.save(chkpt, config['last'])
# Save best checkpoint
if best_fitness == fi:
torch.save(chkpt, config['best_gan'] if epoch < config['second_stage'] else config['best'])
# Delete checkpoint
del chkpt
torch.cuda.empty_cache()
if config['early_stop'] and max_wo_best == config['early_stop']: break
#############
# End epoch #
#############
n = config['name']
if len(n):
n = '_' + n if not n.isnumeric() else n
fresults, flast, fbest = 'results%s.txt' % n, 'last%s.pt' % n, 'best%s.pt' % n
os.rename(config['results_file'], config['sub_working_dir'] + fresults)
os.rename(config['last'], config['sub_working_dir'] + flast) if os.path.exists(config['last']) else None
os.rename(config['best'], config['sub_working_dir'] + fbest) if os.path.exists(config['best']) else None
# Updating results, last and best
config['results_file'] = config['sub_working_dir'] + fresults
config['last'] = config['sub_working_dir'] + flast
config['best'] = config['sub_working_dir'] + fbest
if config['bucket']: # save to cloud
os.system('gsutil cp %s gs://%s/results' % (fresults, config['bucket']))
os.system('gsutil cp %s gs://%s/weights' % (config['sub_working_dir'] + flast, config['bucket']))
# os.system('gsutil cp %s gs://%s/weights' % (config['sub_working_dir'] + fbest, config['bucket']))
if not config['evolve']:
plot_results(folder= config['sub_working_dir'])
print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
dist.destroy_process_group() if torch.cuda.device_count() > 1 else None
torch.cuda.empty_cache()
return results
if __name__ == '__main__':
args = create_kd_argparser()
config = create_config(args)
print("sub working dir: %s" % config['sub_working_dir'])
# Saving configurations
import json
with open(config['sub_working_dir'] + 'config.json', 'w') as f:
json.dump(config, f)
f.close()
config['last'] = config['sub_working_dir'] + 'last.pt'
config['best_gan'] = config['sub_working_dir'] + 'best_gan.pt'
config['best'] = config['sub_working_dir'] + 'best.pt'
config['results_file'] = config['sub_working_dir'] + 'results.txt'
print(config)
device = torch_utils.select_device(config['device'], apex=mixed_precision, batch_size=config['batch_size'])
if device.type == 'cpu':
mixed_precision = False
tb_writer = None
if not config['evolve']: # Train normally
try:
# Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/
from torch.utils.tensorboard import SummaryWriter
tb_writer = SummaryWriter(log_dir= config['sub_working_dir'] + 'runs/')
except:
pass
train() # train normally
else: # Evolve hyperparameters (optional)
config['notest'], config['nosave'] = True, True # only test/save final epoch
if config['bucket']:
os.system('gsutil cp gs://%s/evolve.txt .' % config['bucket']) # download evolve.txt if exists
for _ in range(1): # generations to evolve
if os.path.exists('evolve.txt'): # if evolve.txt exists: select best hyps and mutate
# Select parent(s)
parent = 'single' # parent selection method: 'single' or 'weighted'
x = np.loadtxt('evolve.txt', ndmin=2)
n = min(5, len(x)) # number of previous results to consider
x = x[np.argsort(-fitness(x))][:n] # top n mutations
w = fitness(x) - fitness(x).min() # weights
if parent == 'single' or len(x) == 1:
# x = x[random.randint(0, n - 1)] # random selection
x = x[random.choices(range(n), weights=w)[0]] # weighted selection
elif parent == 'weighted':
x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination
# Mutate
method, mp, s = 3, 0.9, 0.2 # method, mutation probability, sigma
npr = np.random
npr.seed(int(time.time()))
g = np.array([1, 1, 1, 1, 1, 1, 1, 0, .1, 1, 0, 1, 1, 1, 1, 1, 1, 1]) # gains
ng = len(g)
if method == 1:
v = (npr.randn(ng) * npr.random() * g * s + 1) ** 2.0
elif method == 2:
v = (npr.randn(ng) * npr.random(ng) * g * s + 1) ** 2.0
elif method == 3:
v = np.ones(ng)
while all(v == 1): # mutate until a change occurs (prevent duplicates)
# v = (g * (npr.random(ng) < mp) * npr.randn(ng) * s + 1) ** 2.0
v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
for i, k in enumerate(config['hyp'].keys()): # plt.hist(v.ravel(), 300)
config['hyp'][k] = x[i + 7] * v[i] # mutate
# Clip to limits
keys = ['lr0', 'iou_t', 'momentum', 'weight_decay', 'hsv_s', 'hsv_v', 'translate', 'scale', 'fl_gamma']
limits = [(1e-5, 1e-2), (0.00, 0.70), (0.60, 0.98), (0, 0.001), (0, .9), (0, .9), (0, .9), (0, .9), (0, 3)]
for k, v in zip(keys, limits):
config['hyp'][k] = np.clip(config['hyp'][k], v[0], v[1])
# Train mutation
results = train()
# Write mutation results
print_mutation(config['hyp'], results, config['bucket'])
# Plot results
# plot_evolution_results(config['hyp'])
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/gvraky/yolo_compression.git
git@gitee.com:gvraky/yolo_compression.git
gvraky
yolo_compression
yolo_compression
master

搜索帮助