代码拉取完成,页面将自动刷新
同步操作将从 李志红/ASR_Syllable 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
#-*- coding:UTF-8 -*-
#author:zhangwei
class ModelLanguage():
def __init__(self , modelpath):
self.modelpath = modelpath
self.slash = '/'
if self.slash != self.modelpath[-1]:
self.modelpath = self.modelpath + self.slash
pass
def load_model(self):
self.dict_pinyin = self.get_symbol_dict('dict.txt')
self.model1 = self.get_language_model(self.modelpath + 'language_model1.txt')
self.model2 = self.get_language_model(self.modelpath + 'language_model2.txt')
self.pinyin = self.get_pinyin(self.modelpath + 'dic_pinyin.txt')
model = (self.dict_pinyin , self.model1 , self.model2)
return model
def get_symbol_dict(self , dict_filename):
dic_symbol = {}
list_symbol = []
with open(dict_filename , 'r') as fr:
lines = fr.readlines()
for line in lines:
res = line.split('\n')
for i in res:
if i != '':
txt_1 = i.split('\t')
dic_symbol[txt_1[0]] = txt_1[1]
list_symbol.append(txt_1[0])
return dic_symbol
def get_language_model(self , modelname):
dic_model = {}
with open(modelname, 'r') as fr:
lines = fr.readlines()
for line in lines:
res = line.split('\n')
for i in res:
if i != '':
txt_1 = i.split('\t')
if len(txt_1) == 1:
continue
dic_model[txt_1[0]] = txt_1[1]
return dic_model
def get_pinyin(self , filename):
dic = {}
with open(filename, 'r') as fr:
lines = fr.readlines()
for line in lines:
res = line.split('\n')
for i in res:
if i == '':
continue
pinyin_split = i.split('\t')
list_pinyin = pinyin_split[0]
if (list_pinyin not in dic and int(pinyin_split[1]) > 1):
dic[list_pinyin] = pinyin_split[1]
return dic
def decode(self , list_syllabel , yuzhi=0.0001):
list_words = []
num_pinyin = len(list_syllabel)
# print(num_pinyin)
for i in range(num_pinyin):
if list_syllabel[i] in self.dict_pinyin:
ls = self.dict_pinyin[list_syllabel[i]]
# print(ls)
else:
break
if i == 0:
num_ls = len(ls)
# print(ls , num_ls)
for j in range(num_ls):
# tuple_word = ['' , 0.0]
tuple_word = [ls[j] , 1.0]
list_words.append(tuple_word)
# print(list_words)
continue
else:
# print(list_words)
list_words_2 = []
num_ls_word = len(list_words)
# print(num_ls_word)
# print(ls)
for j in range(0 , num_ls_word):
num_ls = len(ls)
# print(num_ls)
for k in range(0 , num_ls):
tuple_word = ['' , 0.0]
tuple_word = list(list_words[j])
# print(tuple_word[0])
# print(ls[k])
tuple_word[0] = tuple_word[0] + ls[k]
# print(tuple_word[0])
tmp_words = tuple_word[0][-2:]
# print(tmp_words)
if tmp_words in self.model2:
# print(tmp_words , tmp_words in self.model2)
tuple_word[1] = tuple_word[1] * float(self.model2[tmp_words]) / float(self.model1[tmp_words[-2]])
#print(self.model2[tmp_words] , self.model1[tmp_words[-2]])
#print(tuple_word[1])
else:
tuple_word[1] = 0.0
continue
# print(tuple_word)
# print(tuple_word[1] >= pow(yuzhi , 1))
if tuple_word[1] >= pow(yuzhi , i):
list_words_2.append(tuple_word)
list_words = list_words_2
# print(list_words)
for i in range(0 , len(list_words)):
# print(i)
for j in range(i + 1 , len(list_words)):
if list_words[i][1] < list_words[j][1]:
tmp = list_words[i]
list_words[i] = list_words[j]
list_words[j] = tmp
return list_words
def speech_to_text(self , list_syllabel):
r = ''
length = len(list_syllabel)
if length == 0:
return ''
str_tmp = [list_syllabel[0]]
for i in range(0 , length - 1):
str_split = list_syllabel[i] + ' ' +list_syllabel[i + 1]
if str_split in self.pinyin:
str_tmp.append(list_syllabel[i + 1])
else:
str_decode = self.decode(str_tmp , 0.0000)
# print(str_tmp , str_decode)
if str_decode != []:
r += str_decode[0][0]
str_tmp = [list_syllabel[i + 1]]
str_decode = self.decode(str_tmp , 0.0000)
if str_decode != []:
r += str_decode[0][0]
return r
if __name__ == '__main__':
modelpath = '/home/zhangwei/PycharmProjects/ASR_Thchs30/model_language/'
ms = ModelLanguage(modelpath=modelpath)
ms.load_model()
list_syllabel = ['wu2' , 'xi1']
r = ms.speech_to_text(list_syllabel)
print(r)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。