代码拉取完成,页面将自动刷新
同步操作将从 Gitee 极速下载/EmotiVoice 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
# Copyright 2023, YOUDAO
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from pypinyin import pinyin, lazy_pinyin, Style
import jieba
import string
from cn2an.an2cn import An2Cn
from pypinyin_dict.phrase_pinyin_data import cc_cedict
cc_cedict.load()
re_special_pinyin = re.compile(r'^(n|ng|m)$')
def split_py(py):
tone = py[-1]
py = py[:-1]
sm = ""
ym = ""
suf_r = ""
if re_special_pinyin.match(py):
py = 'e' + py
if py[-1] == 'r':
suf_r = 'r'
py = py[:-1]
if py == 'zi' or py == 'ci' or py == 'si' or py == 'ri':
sm = py[:1]
ym = "ii"
elif py == 'zhi' or py == 'chi' or py == 'shi':
sm = py[:2]
ym = "iii"
elif py == 'ya' or py == 'yan' or py == 'yang' or py == 'yao' or py == 'ye' or py == 'yong' or py == 'you':
sm = ""
ym = 'i' + py[1:]
elif py == 'yi' or py == 'yin' or py == 'ying':
sm = ""
ym = py[1:]
elif py == 'yu' or py == 'yv' or py == 'yuan' or py == 'yvan' or py == 'yue ' or py == 'yve' or py == 'yun' or py == 'yvn':
sm = ""
ym = 'v' + py[2:]
elif py == 'wu':
sm = ""
ym = "u"
elif py[0] == 'w':
sm = ""
ym = "u" + py[1:]
elif len(py) >= 2 and (py[0] == 'j' or py[0] == 'q' or py[0] == 'x') and py[1] == 'u':
sm = py[0]
ym = 'v' + py[2:]
else:
seg_pos = re.search('a|e|i|o|u|v', py)
sm = py[:seg_pos.start()]
ym = py[seg_pos.start():]
if ym == 'ui':
ym = 'uei'
elif ym == 'iu':
ym = 'iou'
elif ym == 'un':
ym = 'uen'
elif ym == 'ue':
ym = 've'
ym += suf_r + tone
return sm, ym
chinese_punctuation_pattern = r'[\u3002\uff0c\uff1f\uff01\uff1b\uff1a\u201c\u201d\u2018\u2019\u300a\u300b\u3008\u3009\u3010\u3011\u300e\u300f\u2014\u2026\u3001\uff08\uff09]'
def has_chinese_punctuation(text):
match = re.search(chinese_punctuation_pattern, text)
return match is not None
def has_english_punctuation(text):
return text in string.punctuation
# with thanks to KimigaiiWuyi in https://github.com/netease-youdao/EmotiVoice/pull/17.
# Updated on November 20, 2023: EmotiVoice now incorporates cn2an (https://github.com/Ailln/cn2an) for number processing.
re_digits = re.compile('(\d[\d\.]*)')
def number_to_chinese(number):
an2cn = An2Cn()
result = an2cn.an2cn(number)
return result
def tn_chinese(text):
parts = re_digits.split(text)
words = []
for part in parts:
if re_digits.match(part):
words.append(number_to_chinese(part))
else:
words.append(part)
return ''.join(words)
def g2p_cn(text):
res_text=["<sos/eos>"]
seg_list = jieba.cut(text)
for seg in seg_list:
if seg == " ": continue
seg_tn = tn_chinese(seg)
py =[_py[0] for _py in pinyin(seg_tn, style=Style.TONE3,neutral_tone_with_five=True)]
if any([has_chinese_punctuation(_py) for _py in py]) or any([has_english_punctuation(_py) for _py in py]):
res_text.pop()
res_text.append("sp3")
else:
py = [" ".join(split_py(_py)) for _py in py]
res_text.append(" sp0 ".join(py))
res_text.append("sp1")
#res_text.pop()
res_text.append("<sos/eos>")
return " ".join(res_text)
if __name__ == "__main__":
import sys
from os.path import isfile
if len(sys.argv) < 2:
print("Usage: python %s <text>" % sys.argv[0])
exit()
text_file = sys.argv[1]
if isfile(text_file):
fp = open(text_file, 'r')
for line in fp:
phoneme=g2p_cn(line.rstrip())
print(phoneme)
fp.close()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。