1 Star 0 Fork 1

江嵩镗/Speaker_Recognition

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
Speaker_Recognition.py 16.83 KB
一键复制 编辑 原始数据 按行查看 历史
DEDSEC_Roger 提交于 2023-05-20 11:01 . fix bugs of self.user_input
import os
import sys
import typing
import numpy as np
import PySide2
from PySide2.QtCore import *
from PySide2.QtGui import *
from PySide2.QtWidgets import *
from Audio import Audio
from Model import Model
from Profile import Profile
from tabWidget import Ui_Form
class MyWidget(QWidget):
record_signal = Signal(int)
save_signal = Signal(str)
infer_signal = Signal(type(np.array))
recognize_signal = Signal(type(np.array))
enroll_signal = Signal(type(np.array), str)
delete_signal = Signal(str)
def __init__(self,
parent: typing.Optional[PySide2.QtWidgets.QWidget] = ...,
f: PySide2.QtCore.Qt.WindowFlags = ...) -> None:
super().__init__(parent, f)
self.setup_ui()
# stable indicates signal that always connect
self.setup_stable_signal()
# unstable indicates signal that will be disconnected
self.setup_unstable_signal()
# prevent disconnecting error
self.operate("connect")
self.normal()
def setup_ui(self):
self.ui = Ui_Form()
self.ui.setupUi(self)
self.record_text = "录音图标"
self.delete_text = self.ui.delete_button.text()
self.auto_text = self.ui.auto_button.text()
self.cancel_text = self.ui.enroll_cancel_button.text()
self.ui.recognize_cancel_button.setText(self.cancel_text)
self.keyword = "你好,BIP"
# self.ui.display_debug.setPlaceholderText("调试信息,用户可忽略。")
# recognize
self.ui.username_output.setPlaceholderText("在此显示用户名")
self.ui.username_output.setFocusPolicy(Qt.NoFocus)
self.ui.recognize_result.setPlaceholderText("在此显示操作提示")
# enroll
self.ui.user_input.lineEdit().setPlaceholderText("在此输入用户名,可下拉用户列表")
self.ui.enroll_result.setPlaceholderText("在此显示操作提示和结果")
# delete
self.delete_query = QMessageBox(self)
self.delete_query.setIcon(QMessageBox.Question)
self.delete_query.setInformativeText("是否删除该用户?")
self.delete_query.setStandardButtons(QMessageBox.Ok
| QMessageBox.Cancel)
self.delete_query.setDefaultButton(QMessageBox.Ok)
def setup_stable_signal(self):
self.vad_mode = 3
self.audio_dir = "Audio"
self.enroll_duration = 6
self.enr_rec_ratio = 3
self.recognize_duration = self.enroll_duration // self.enr_rec_ratio
modelname = "ECAPA_TDNN_GLOB_c512-ASTP-emb192-ArcMargin-LM"
for config in modelname.split('-'):
if "emb" in config:
self.embeding_size = int(config[3:])
self.model_path = os.path.join("Model", f"{modelname}.onnx")
self.profile_dir = os.path.join("Profile", f"{modelname}")
# ---- rate <= 0.001 -----
# thres = 0.17244 @ fnr = 0.0009573449633017764, fpr = 0.1182944335158701
# thres = 0.45377 @ fpr = 0.0009569886756339852, fnr = 0.13397510903095416
# ---- rate <= 0.0001 -----
# thres = 0.06691 @ fnr = 5.318583129454313e-05, fpr = 0.3009197724493593
# thres = 0.49913 @ fpr = 5.316603753524607e-05, fnr = 0.21981704074034678
# ---- rate <= 0 -----
# thres = 0.05753 @ fnr = 0.0, fpr = 0.32356850443936414
# thres = 0.53752 @ fpr = 0.0, fnr = 0.3117753430486119
# ---- vox1_O_cleaned.kaldi.score -----
# EER = 1.972
# thres = 0.31796
# minDCF (p_target:0.01 c_miss:1 c_fa:1) = 0.194
self.recognize_threshold = 0.45377
self.save_threshold = 0.53752
# audio
self.audio_thread = QThread()
self.audio = Audio(None, self.vad_mode)
self.audio.moveToThread(self.audio_thread)
self.audio_thread.start()
self.audio_thread.finished.connect(self.audio_thread.deleteLater)
self.record_signal.connect(self.audio.record)
self.save_signal.connect(self.audio.save)
# model
self.model_thread = QThread()
self.model = Model(None, self.model_path, self.audio.sample_rate)
self.model.moveToThread(self.model_thread)
self.model_thread.start()
self.model_thread.finished.connect(self.model_thread.deleteLater)
self.infer_signal.connect(self.model.infer)
# profile
self.profile = Profile(self, self.profile_dir, self.embeding_size)
self.recognize_signal.connect(self.profile.recognize)
self.enroll_signal.connect(self.profile.enroll)
self.delete_signal.connect(self.profile.delete)
# timer
# recognize
self.prompt_timer = QTimer(self)
self.prompt_interval = 8 * 1000
self.prompt_timer.timeout.connect(self.prompt_timeout)
self.prompt_timer.start(self.prompt_interval)
self.recognize_prompt_texts = []
self.recognize_prompt_texts.append(
f"点击“{self.record_text}”,即可开始识别,识别需要录制{self.recognize_duration}秒音频。"
)
self.recognize_prompt_texts.append(
f"勾选“{self.auto_text}”,即可持续识别,反选恢复手动模式。")
self.recognize_prompt_texts.append(
f"要退出识别,或退出自动模式,点击“{self.cancel_text}”即可。")
self.recognize_prompt_texts.append("要进行注册或删除,需要先退出识别,或退出自动模式。")
self.recognize_prompt_count = 0
self.ui.recognize_prompt.setText(
self.recognize_prompt_texts[self.recognize_prompt_count])
# enroll
self.enroll_prompt_texts = []
self.enroll_prompt_texts.append(
f"需要先输入用户名,才能点击“{self.record_text}”,开始注册,注册需要录制{self.enroll_duration}秒音频。"
)
self.enroll_prompt_texts.append(
"点击左下角菜单,选择附件,选择Keyboard,即可使用虚拟键盘,输入用户名后,关闭虚拟键盘,再回到本界面。")
self.enroll_prompt_texts.append(
f"识别和注册建议说一样的话,比如“{self.keyword}”,已注册的用户可以继续注册其他常用语。")
self.enroll_prompt_texts.append(
f"需要先输入用户名,才能点击“{self.delete_text}”,只能删除已注册的用户。")
self.enroll_prompt_texts.append(f"要退出注册,点击“{self.cancel_text}”即可。")
self.enroll_prompt_texts.append("要进行识别,需要先退出注册。")
self.enroll_prompt_count = 0
self.ui.enroll_prompt.setText(
self.enroll_prompt_texts[self.enroll_prompt_count])
# button
# recognize
self.ui.recognize_button.clicked.connect(self.recognize)
self.ui.auto_button.clicked.connect(self.auto)
self.ui.recognize_cancel_button.clicked.connect(self.cancel)
# enroll
self.ui.enroll_button.clicked.connect(self.enroll)
self.ui.delete_button.clicked.connect(self.delete)
self.ui.enroll_cancel_button.clicked.connect(self.cancel)
self.users_set = set(self.profile.user_embeddings.keys())
self.ui.user_input.addItems(list(self.users_set))
self.ui.user_input.setMaxVisibleItems(8)
self.ui.user_input.setEditable(True)
self.ui.user_input.setCurrentIndex(-1)
self.ui.user_input.setInsertPolicy(QComboBox.NoInsert)
def setup_unstable_signal(self):
self.signals = []
self.slots = []
# audio
self.signals.append(self.audio.after_vad_signal)
self.slots.append(self.after_vad)
self.signals.append(self.audio.recorded_signal)
self.slots.append(self.recorded)
# model
self.signals.append(self.model.inferred_signal)
self.slots.append(self.inferred)
# profile
self.signals.append(self.profile.recognized_signal)
self.slots.append(self.recognized)
self.signals.append(self.profile.enrolled_signal)
self.slots.append(self.enrolled)
self.signals.append(self.profile.deleted_signal)
self.slots.append(self.deleted)
def operate(self, operation: str):
assert operation in ["connect", "disconnect"]
for signal, slot in zip(self.signals, self.slots):
getattr(signal, operation)(slot)
@Slot()
def prompt_timeout(self):
# recognize
self.recognize_prompt_count += 1
self.recognize_prompt_count %= len(self.recognize_prompt_texts)
self.ui.recognize_prompt.setText(
self.recognize_prompt_texts[self.recognize_prompt_count])
# enroll
self.enroll_prompt_count += 1
self.enroll_prompt_count %= len(self.enroll_prompt_texts)
self.ui.enroll_prompt.setText(
self.enroll_prompt_texts[self.enroll_prompt_count])
@Slot()
def normal(self):
# ui
for idx in range(self.ui.tabWidget.count()):
self.ui.tabWidget.setTabEnabled(idx, True)
# recognize
self.ui.recognize_button.setDisabled(False)
self.ui.recognize_cancel_button.setDisabled(True)
# enroll
self.ui.enroll_button.setDisabled(False)
self.ui.delete_button.setDisabled(False)
self.ui.enroll_cancel_button.setDisabled(True)
self.ui.user_input.lineEdit().setFocus()
# logic
self.state = "normal"
self.operate("disconnect")
if self.ui.auto_button.isChecked():
self.recognize()
@Slot()
def recognize(self):
# ui
self.ui.tabWidget.setTabEnabled(0, False)
self.ui.recognize_button.setDisabled(True)
self.ui.recognize_cancel_button.setDisabled(False)
# logic
self.operate("connect")
self.state = "recognize"
self.record()
@Slot()
def auto(self):
if self.ui.auto_button.isChecked():
if "normal" == self.state:
self.recognize()
else:
if "recognize" == self.state:
self.cancel()
@Slot()
def enroll(self):
def validate_username(username: str):
for char in username:
if char.isalpha():
continue
if "_" == char:
continue
if "0" <= char <= "9":
continue
return False
return True
# ui
self.ui.tabWidget.setTabEnabled(1, False)
self.ui.enroll_button.setDisabled(True)
self.ui.delete_button.setDisabled(True)
self.ui.enroll_cancel_button.setDisabled(False)
# logic
self.operate("connect")
self.username = self.ui.user_input.currentText()
if 0 == len(self.username):
self.ui.enroll_result.setText("请先输入用户名。")
self.normal()
elif not validate_username(self.username):
self.ui.enroll_result.setText("用户名只能由文字、字母、下划线_或数字组成。")
self.normal()
else:
self.state = "enroll"
self.record()
@Slot()
def delete(self):
# logic
self.operate("connect")
self.username = self.ui.user_input.currentText()
if 0 == len(self.username):
self.ui.enroll_result.setText("请先输入用户名。")
self.normal()
elif self.username not in self.profile.user_embeddings.keys():
self.ui.enroll_result.setText(f"{self.username},未注册。")
self.normal()
else:
self.state = "delete"
self.delete_query.setText(f"{self.username},已注册。")
ret = self.delete_query.exec_()
if QMessageBox.Ok == ret:
self.delete_signal.emit(self.username)
else:
self.normal()
@Slot()
def cancel(self):
# recognize
self.ui.auto_button.setChecked(False)
self.ui.recognize_result.clear()
# enroll
self.ui.enroll_result.clear()
self.normal()
self.audio.running = False
def record(self):
if "recognize" == self.state:
duration = self.recognize_duration
string = f"正在录音,需要录制{duration}秒,识别和注册建议说一样的话。"
self.ui.recognize_result.setText(string)
elif "enroll" == self.state:
duration = self.enroll_duration
string = f"正在录音,需要录制{duration}秒,"
if self.username in self.profile.user_embeddings.keys():
string += f"可以说其他常用语。"
else:
string += f"可以重复说“{self.keyword}”。"
self.ui.enroll_result.setText(string)
self.audio.running = True
self.record_signal.emit(duration)
@Slot()
def after_vad(self, need_duration: float):
if "recognize" == self.state:
self.ui.recognize_result.setText(
f"还需要录制{(need_duration):.1f}秒,您有{self.audio.record_duration}秒时间。"
)
self.ui.username_output.clear()
elif "enroll" == self.state:
self.ui.enroll_result.setText(
f"还需要录制{(need_duration):.1f}秒,您有{self.audio.record_duration}秒时间。"
)
@Slot()
def recorded(self, voiced_frame: np.array):
voiced_frames = []
if "recognize" == self.state:
voiced_frames.append(voiced_frame)
self.ui.recognize_result.setText("录制完成,正在推理。")
elif "enroll" == self.state:
length = len(voiced_frame) // self.enr_rec_ratio
for i in range(self.enr_rec_ratio):
voiced_frames.append(voiced_frame[i * length:(i + 1) * length])
self.save_signal.emit(
os.path.join(self.audio_dir, f"{self.username}_enroll.wav"))
self.ui.enroll_result.setText("录制完成,正在推理。")
voiced_frames = np.stack(voiced_frames, axis=0)
self.infer_signal.emit(voiced_frames)
@Slot()
def inferred(self, embeddings: np.ndarray):
if "recognize" == self.state:
self.ui.recognize_result.append("推理完成,正在识别。")
self.recognize_signal.emit(np.squeeze(embeddings, axis=0))
elif "enroll" == self.state:
self.ui.enroll_result.append("推理完成,正在注册。")
self.enroll_signal.emit(embeddings, self.username)
@Slot()
def recognized(self, user_score_sorted: list):
# no enrolled user
if not user_score_sorted:
self.ui.username_output.setText("未注册")
self.save_signal.emit(os.path.join(self.audio_dir, "unknown.wav"))
self.normal()
return
max_score = user_score_sorted[0][1]
if max_score >= self.recognize_threshold:
max_username = user_score_sorted[0][0]
self.ui.username_output.setText(f"{max_username}")
if max_score >= self.save_threshold:
self.save_signal.emit(
os.path.join(self.audio_dir, f"{max_username}_certain.wav"))
else:
self.save_signal.emit(
os.path.join(self.audio_dir,
f"{max_username}_uncertain.wav"))
else:
self.ui.username_output.setText("未注册")
self.save_signal.emit(os.path.join(self.audio_dir, "unknown.wav"))
self.normal()
# python helps with the out of index problem
# user_score_sorted = user_score_sorted[:10]
# string = ""
# for username, score in user_score_sorted:
# string += f"{username}: {score:.2f}, "
# string = string[:-2]
# self.ui.display_debug.setText(string)
@Slot()
def enrolled(self, enrolled_count: int):
self.ui.enroll_result.append(
f"{self.username},已注册{enrolled_count}个嵌入码。")
if self.username not in self.users_set:
self.ui.user_input.addItem(self.username)
self.users_set.add(self.username)
self.normal()
@Slot()
def deleted(self):
self.ui.enroll_result.append(f"{self.username},已删除。")
self.ui.user_input.removeItem(self.ui.user_input.findText(
self.username))
self.users_set.remove(self.username)
self.normal()
def closeEvent(self, event: PySide2.QtGui.QCloseEvent) -> None:
print(f"{self} closed")
if "normal" != self.state:
self.cancel()
self.audio_thread.quit()
self.audio_thread.wait()
self.audio.p.terminate()
self.model_thread.quit()
self.model_thread.wait()
if "__main__" == __name__:
app = QApplication(sys.argv)
window = MyWidget(None, Qt.WindowFlags())
window.showMaximized()
sys.exit(app.exec_())
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/jiangsong-boring/Speaker_Recognition.git
git@gitee.com:jiangsong-boring/Speaker_Recognition.git
jiangsong-boring
Speaker_Recognition
Speaker_Recognition
main

搜索帮助