代码拉取完成,页面将自动刷新
'''
Python 3.6
Pytorch >= 0.4
Written by Hongyu Wang in Beihang university
'''
import torch
import math
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import numpy
import torch.utils.data as data
from data_iterator import dataIterator
from Attention_RNN import AttnDecoderRNN
from Densenet_torchvision import densenet121
from PIL import Image
from numpy import *
torch.backends.cudnn.benchmark = False
def cmp_result(label,rec):
dist_mat = numpy.zeros((len(label)+1, len(rec)+1),dtype='int32')
dist_mat[0,:] = range(len(rec) + 1)
dist_mat[:,0] = range(len(label) + 1)
for i in range(1, len(label) + 1):
for j in range(1, len(rec) + 1):
hit_score = dist_mat[i-1, j-1] + (label[i-1] != rec[j-1])
ins_score = dist_mat[i,j-1] + 1
del_score = dist_mat[i-1, j] + 1
dist_mat[i,j] = min(hit_score, ins_score, del_score)
dist = dist_mat[len(label), len(rec)]
return dist, len(label),hit_score,ins_score,del_score
def load_dict(dictFile):
fp=open(dictFile)
stuff=fp.readlines()
fp.close()
lexicon={}
for l in stuff:
w=l.strip().split()
lexicon[w[0]]=int(w[1])
print('total words/phones',len(lexicon))
return lexicon
valid_datasets=['./offline-test.pkl', './test_caption.txt']
dictionaries=['./dictionary.txt']
batch_Imagesize=16
valid_batch_Imagesize=16
batch_size_t=1
maxlen=48
maxImagesize=100000
hidden_size = 256
gpu = [0]
worddicts = load_dict(dictionaries[0])
worddicts_r = [None] * len(worddicts)
for kk, vv in worddicts.items():
worddicts_r[vv] = kk
test,test_label = dataIterator(valid_datasets[0],valid_datasets[1],worddicts,batch_size=1,batch_Imagesize=batch_Imagesize,maxlen=maxlen,maxImagesize=maxImagesize)
class custom_dset(data.Dataset):
def __init__(self,train,train_label):
self.train = train
self.train_label = train_label
def __getitem__(self, index):
train_setting = torch.from_numpy(numpy.array(self.train[index]))
label_setting = torch.from_numpy(numpy.array(self.train_label[index])).type(torch.LongTensor)
size = train_setting.size()
train_setting = train_setting.view(1,size[2],size[3])
label_setting = label_setting.view(-1)
return train_setting,label_setting
def __len__(self):
return len(self.train)
off_image_test = custom_dset(test,test_label)
#print(off_image_train[10])
def imresize(im,sz):
pil_im = Image.fromarray(im)
return array(pil_im.resize(sz))
def collate_fn(batch):
batch.sort(key=lambda x: len(x[1]), reverse=True)
img, label = zip(*batch)
aa1 = 0
bb1 = 0
k = 0
k1 = 0
max_len = len(label[0])+1
for j in range(len(img)):
size = img[j].size()
if size[1] > aa1:
aa1 = size[1]
if size[2] > bb1:
bb1 = size[2]
for ii in img:
ii = ii.float()
img_size_h = ii.size()[1]
img_size_w = ii.size()[2]
img_mask_sub_s = torch.ones(1,img_size_h,img_size_w).type(torch.FloatTensor)
img_mask_sub_s = img_mask_sub_s*255.0
img_mask_sub = torch.cat((ii,img_mask_sub_s),dim=0)
padding_h = aa1-img_size_h
padding_w = bb1-img_size_w
m = torch.nn.ZeroPad2d((0,padding_w,0,padding_h))
img_mask_sub_padding = m(img_mask_sub)
img_mask_sub_padding = img_mask_sub_padding.unsqueeze(0)
if k==0:
img_padding_mask = img_mask_sub_padding
else:
img_padding_mask = torch.cat((img_padding_mask,img_mask_sub_padding),dim=0)
k = k+1
for ii1 in label:
ii1 = ii1.long()
ii1 = ii1.unsqueeze(0)
ii1_len = ii1.size()[1]
m = torch.nn.ZeroPad2d((0,max_len-ii1_len,0,0))
ii1_padding = m(ii1)
if k1 == 0:
label_padding = ii1_padding
else:
label_padding = torch.cat((label_padding,ii1_padding),dim=0)
k1 = k1+1
img_padding_mask = img_padding_mask/255.0
return img_padding_mask, label_padding
test_loader = torch.utils.data.DataLoader(
dataset = off_image_test,
batch_size = batch_size_t,
shuffle = True,
collate_fn = collate_fn
)
encoder = densenet121()
attn_decoder1 = AttnDecoderRNN(hidden_size,112,dropout_p=0.5)
encoder = torch.nn.DataParallel(encoder, device_ids=gpu)
attn_decoder1 = torch.nn.DataParallel(attn_decoder1, device_ids=gpu)
encoder = encoder.cuda()
attn_decoder1 = attn_decoder1.cuda()
encoder.load_state_dict(torch.load('model/encoder_lr0.00001_BN_te1_d05_SGD_bs8_mask_conv_bn_b.pkl'))
attn_decoder1.load_state_dict(torch.load('model/attn_decoder_lr0.00001_BN_te1_d05_SGD_bs8_mask_conv_bn_b.pkl'))
total_dist = 0
total_label = 0
total_line = 0
total_line_rec = 0
hit_all =0
ins_all =0
dls_all =0
wer_1 = 0
wer_2 = 0
wer_3 = 0
wer_4 = 0
wer_5 = 0
wer_6 = 0
wer_up=0
encoder.eval()
attn_decoder1.eval()
for step_t, (x_t, y_t) in enumerate(test_loader):
x_real_high = x_t.size()[2]
x_real_width = x_t.size()[3]
if x_t.size()[0]<batch_size_t:
break
h_mask_t = []
w_mask_t = []
for i in x_t:
#h*w
size_mask_t = i[1].size()
s_w_t = str(i[1][0])
s_h_t = str(i[1][:,1])
w_t = s_w_t.count('1')
h_t = s_h_t.count('1')
h_comp_t = int(h_t/16)+1
w_comp_t = int(w_t/16)+1
h_mask_t.append(h_comp_t)
w_mask_t.append(w_comp_t)
x_t = x_t.cuda()
y_t = y_t.cuda()
output_highfeature_t = encoder(x_t)
x_mean_t = torch.mean(output_highfeature_t)
x_mean_t = float(x_mean_t)
output_area_t1 = output_highfeature_t.size()
output_area_t = output_area_t1[3]
dense_input = output_area_t1[2]
decoder_input_t = torch.LongTensor([111]*batch_size_t)
decoder_input_t = decoder_input_t.cuda()
decoder_hidden_t = torch.randn(batch_size_t, 1, hidden_size).cuda()
decoder_hidden_t = decoder_hidden_t * x_mean_t
decoder_hidden_t = torch.tanh(decoder_hidden_t)
prediction = torch.zeros(batch_size_t,maxlen)
#label = torch.zeros(batch_size_t,maxlen)
prediction_sub = []
label_sub = []
label_real = []
prediction_real = []
decoder_attention_t = torch.zeros(batch_size_t,1,dense_input,output_area_t).cuda()
attention_sum_t = torch.zeros(batch_size_t,1,dense_input,output_area_t).cuda()
m = torch.nn.ZeroPad2d((0,maxlen-y_t.size()[1],0,0))
y_t = m(y_t)
for i in range(maxlen):
decoder_output, decoder_hidden_t, decoder_attention_t, attention_sum_t = attn_decoder1(decoder_input_t,
decoder_hidden_t,
output_highfeature_t,
output_area_t,
attention_sum_t,
decoder_attention_t,dense_input,batch_size_t,h_mask_t,w_mask_t,gpu)
topv,topi = torch.max(decoder_output,2)
if torch.sum(topi)==0:
break
decoder_input_t = topi
decoder_input_t = decoder_input_t.view(batch_size_t)
#print(topi.size()) 16,1
# prediction
prediction[:,i] = decoder_input_t
for i in range(batch_size_t):
for j in range(maxlen):
if int(prediction[i][j]) ==0:
break
else:
prediction_sub.append(int(prediction[i][j]))
prediction_real.append(worddicts_r[int(prediction[i][j])])
if len(prediction_sub)<maxlen:
prediction_sub.append(0)
for k in range(y_t.size()[1]):
if int(y_t[i][k]) ==0:
break
else:
label_sub.append(int(y_t[i][k]))
label_real.append(worddicts_r[int(y_t[i][k])])
label_sub.append(0)
dist, llen, hit, ins, dls = cmp_result(label_sub, prediction_sub)
wer_step = float(dist) / llen
total_dist += dist
total_label += llen
total_line += 1
if dist == 0:
total_line_rec = total_line_rec+ 1
print('step is %d' % (step_t))
print('prediction is ')
#print(''.join(prediction_real))
print(prediction_real)
print('the truth is')
#print(''.join(label_real))
print(label_real)
print('the wer is %.5f' % (wer_step))
label_sub = []
prediction_sub = []
label_real = []
prediction_real = []
# dist, llen, hit, ins, dls = cmp_result(label, prediction)
# wer_step = float(dist) / llen
# print('the wer is %.5f' % (wer_step))
# if wer_step <= 0.1:
# wer_1 += 1
# elif 0.1 < wer_step <= 0.2:
# wer_2 += 1
# elif 0.2 < wer_step <= 0.3:
# wer_3 += 1
# elif 0.3 < wer_step <= 0.4:
# wer_4 += 1
# elif 0.4 < wer_step <= 0.5:
# wer_5 += 1
# elif 0.5 < wer_step <= 0.6:
# wer_6 += 1
# else:
# wer_up += 1
# hit_all += hit
# ins_all += ins
# dls_all += dls
# total_dist += dist
# total_label += llen
# total_line += 1
# if dist == 0:
# total_line_rec += 1
wer = float(total_dist) / total_label
sacc = float(total_line_rec) / total_line
print('wer is %.5f' % (wer))
print('sacc is %.5f ' % (sacc))
# print('hit is %d' % (hit_all))
# print('ins is %d' % (ins_all))
# print('dls is %d' % (dls_all))
# print('wer loss is %.5f,%.5f,%.5f,%.5f,%.5f,%.5f,%.5f' % (wer_1, wer_2, wer_3, wer_4, wer_5, wer_6, wer_up))
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。