PythonAI算法
使用Python与PyQt5构建本地Qwen3-ASR语音识别助手
本文介绍了一款基于Python和PyQt5开发的本地化AI助手工具。该应用集成了Qwen3-ASR语音识别模型与Ollama本地大语言模型,实现了音频文件转文字及智能对话功能。系统采用多线程设计避免UI阻塞,支持多语言识别、流式输出及设备自适应(CPU/GPU)。通过图形界面管理模型加载、音频选择及提示词设置,确保数据完全本地化处理,保护隐私。适用于会议记录、内容分析及多语言辅助等场景。

本文介绍了一款基于Python和PyQt5开发的本地化AI助手工具。该应用集成了Qwen3-ASR语音识别模型与Ollama本地大语言模型,实现了音频文件转文字及智能对话功能。系统采用多线程设计避免UI阻塞,支持多语言识别、流式输出及设备自适应(CPU/GPU)。通过图形界面管理模型加载、音频选择及提示词设置,确保数据完全本地化处理,保护隐私。适用于会议记录、内容分析及多语言辅助等场景。


基于PyQt5的桌面应用程序,结合了Qwen3-ASR语音识别和Ollama本地大语言模型的智能助手工具。
这个应用是一个本地化AI助手工具,主要功能包括:
应用采用工作线程模型,避免长时间操作阻塞UI:
# 自动选择设备(CPU/GPU)
device_map = "cuda:0" if torch.cuda.is_available() else "cpu"
# 自动选择精度
dtype = "float32" if not torch.cuda.is_available() else "float16"
核心依赖:
硬件要求:
""" 测试GUI应用是否可以正常启动(本地模式) """
import sys
import os
# 设置Python路径
PYTHON_PATH = r"D:\python310\python.exe"
def check_dependencies():
"""检查依赖"""
print("检查依赖...")
# 检查PyQt5
try:
import PyQt5
print("✓ PyQt5 已安装")
except ImportError:
print("✗ PyQt5 未安装")
return False
# 检查requests
try:
import requests
print("✓ requests 已安装")
except ImportError:
print("✗ requests 未安装")
return False
# 检查soundfile
try:
import soundfile
print("✓ soundfile 已安装")
except ImportError:
print("✗ soundfile 未安装")
return False
# 检查torch
try:
import torch
print("✓ torch 已安装")
print(f" - 版本:{torch.__version__}")
print(f" - CUDA: ")
torch.cuda.is_available():
()
ImportError:
()
:
qwen_asr Qwen3ASRModel
()
ImportError:
()
():
()
requests
:
response = requests.get(, timeout=)
response.status_code == :
()
models = response.json().get(, [])
models:
()
m models[:]:
()
(models) > :
()
:
()
:
()
:
()
()
():
()
pathlib Path
model_path = Path()
model_path.exists():
()
files = (model_path.glob())
files:
()
:
()
config_files = (model_path.glob())
config_files:
()
:
()
()
():
( * )
()
( * )
()
check_dependencies():
()
()
()
sys.exit()
check_model_files()
check_services()
( + * )
()
( * )
()
()
()
()
response = ().strip().lower()
response == :
()
()
:
os.execv(PYTHON_PATH, [PYTHON_PATH, ])
Exception e:
()
traceback
traceback.print_exc()
()
:
()
__name__ == :
main()
""" Qwen3-ASR + Ollama GUI 应用程序 功能:音频识别 + Ollama模型调用(直接使用本地模型) """
import sys
import os
import json
import requests
import soundfile as sf
import io
import torch
from typing import List, Optional
from pathlib import Path
from PyQt5.QtWidgets import (
QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
QPushButton, QLabel, QTextEdit, QComboBox, QFileDialog, QGroupBox,
QProgressBar, QSplitter, QMessageBox, QCheckBox, QFormLayout,
QScrollArea, QFrame, QSpinBox
)
from PyQt5.QtCore import Qt, QThread, pyqtSignal, QTimer
from PyQt5.QtGui import QFont, QTextCursor, QColor
# 导入Qwen3-ASR模型
from qwen_asr import Qwen3ASRModel
# ===================== 配置 =====================
MODEL_PATH = "." # 使用当前目录(模型文件已在此目录下)
OLLAMA_API_URL = "http://localhost:11434"
# ===================== 工作线程 =====================
class ASRWorker(QThread):
"""语音识别工作线程 - 直接使用本地模型"""
finished = pyqtSignal(bool, str, str) # success, text, language
progress = pyqtSignal(str) # progress message
error = pyqtSignal(str) # error message
def __init__(self, audio_path: , model, language: [] = ):
().__init__()
.audio_path = audio_path
.model = model
.language = language
():
:
.progress.emit()
(.audio_path, ) f:
audio_bytes = f.read()
audio, sr = sf.read(io.BytesIO(audio_bytes))
.progress.emit()
.model :
.error.emit()
.progress.emit()
results = .model.transcribe(
audio=(audio, sr),
language=.language
)
results (results) > :
result = results[]
text = result.text
language = result.language
.finished.emit(, text, language)
:
.error.emit()
Exception e:
traceback
error_details = traceback.format_exc()
()
.error.emit()
():
finished = pyqtSignal()
error = pyqtSignal()
streaming = pyqtSignal()
():
().__init__()
.model = model
.prompt = prompt
.api_url = api_url
.enable_streaming = enable_streaming
():
:
.enable_streaming:
response = requests.post(
,
json={
: .model,
: .prompt,
:
},
stream=,
timeout=
)
line response.iter_lines():
line:
:
data = json.loads(line)
data:
.streaming.emit(data[])
json.JSONDecodeError:
.finished.emit()
:
response = requests.post(
,
json={
: .model,
: .prompt,
:
},
timeout=
)
response.status_code == :
result = response.json()
.finished.emit(result.get(, ))
:
.error.emit()
Exception e:
.error.emit()
():
finished = pyqtSignal()
error = pyqtSignal()
():
().__init__()
.api_url = api_url
():
:
response = requests.get(, timeout=)
response.status_code == :
result = response.json()
models = [m[] m result.get(, [])]
.finished.emit(models)
:
.error.emit()
Exception e:
.error.emit()
():
finished = pyqtSignal()
error = pyqtSignal()
progress = pyqtSignal()
():
().__init__()
.model_path = model_path
.device_map = device_map
.dtype = dtype
():
:
.progress.emit()
.progress.emit()
dtype_map = {
: torch.float16,
: torch.bfloat16,
: torch.float32
}
dtype = dtype_map.get(.dtype, torch.float32)
model = Qwen3ASRModel.from_pretrained(
.model_path,
dtype=dtype,
device_map=.device_map,
max_inference_batch_size=,
max_new_tokens=,
)
.progress.emit()
.finished.emit(model)
Exception e:
traceback
error_details = traceback.format_exc()
()
.error.emit()
():
():
().__init__()
.current_audio_path =
.asr_model =
.asr_worker =
.ollama_worker =
.fetch_models_worker =
.load_model_worker =
.ollama_api_url = OLLAMA_API_URL
.init_ui()
.load_asr_model()
():
.setWindowTitle()
.setGeometry(, , , )
main_widget = QWidget()
.setCentralWidget(main_widget)
main_layout = QHBoxLayout(main_widget)
splitter = QSplitter(Qt.Horizontal)
main_layout.addWidget(splitter)
left_panel = .create_left_panel()
splitter.addWidget(left_panel)
right_panel = .create_right_panel()
splitter.addWidget(right_panel)
splitter.setStretchFactor(, )
splitter.setStretchFactor(, )
.setStyleSheet()
() -> QWidget:
panel = QWidget()
layout = QVBoxLayout(panel)
model_group = QGroupBox()
model_layout = QVBoxLayout()
.model_status_label = QLabel()
.model_status_label.setStyleSheet()
model_layout.addWidget(.model_status_label)
.load_model_btn = QPushButton()
.load_model_btn.clicked.connect(.load_asr_model)
model_layout.addWidget(.load_model_btn)
model_group.setLayout(model_layout)
layout.addWidget(model_group)
audio_group = QGroupBox()
audio_layout = QVBoxLayout()
.audio_path_label = QLabel()
.audio_path_label.setStyleSheet()
.audio_path_label.setWordWrap()
audio_layout.addWidget(.audio_path_label)
select_audio_btn = QPushButton()
select_audio_btn.clicked.connect(.select_audio_file)
audio_layout.addWidget(select_audio_btn)
audio_group.setLayout(audio_layout)
layout.addWidget(audio_group)
asr_group = QGroupBox()
asr_layout = QFormLayout()
.language_combo = QComboBox()
.language_combo.addItem(, )
.language_combo.addItem(, )
.language_combo.addItem(, )
.language_combo.addItem(, )
.language_combo.addItem(, )
.language_combo.addItem(, )
.language_combo.addItem(, )
.language_combo.addItem(, )
.language_combo.addItem(, )
asr_layout.addRow(, .language_combo)
.asr_progress = QProgressBar()
.asr_progress.setVisible()
asr_layout.addRow(, .asr_progress)
asr_group.setLayout(asr_layout)
layout.addWidget(asr_group)
recognize_btn = QPushButton()
recognize_btn.setStyleSheet()
recognize_btn.clicked.connect(.start_asr)
recognize_btn.setEnabled()
.recognize_btn = recognize_btn
layout.addWidget(recognize_btn)
ollama_group = QGroupBox()
ollama_layout = QVBoxLayout()
.refresh_models_btn = QPushButton()
.refresh_models_btn.clicked.connect(.refresh_models)
ollama_layout.addWidget(.refresh_models_btn)
ollama_layout.addWidget(QLabel())
.model_combo = QComboBox()
.model_combo.setEnabled()
ollama_layout.addWidget(.model_combo)
ollama_group.setLayout(ollama_layout)
layout.addWidget(ollama_group)
prompt_group = QGroupBox()
prompt_layout = QVBoxLayout()
prompt_layout.addWidget(QLabel())
.system_prompt = QTextEdit()
.system_prompt.setPlaceholderText()
.system_prompt.setMaximumHeight()
.system_prompt.setText()
prompt_layout.addWidget(.system_prompt)
.use_recognized_text = QCheckBox()
.use_recognized_text.setChecked()
prompt_layout.addWidget(.use_recognized_text)
prompt_group.setLayout(prompt_layout)
layout.addWidget(prompt_group)
generate_btn = QPushButton()
generate_btn.setStyleSheet()
generate_btn.clicked.connect(.call_ollama)
layout.addWidget(generate_btn)
.streaming_checkbox = QCheckBox()
.streaming_checkbox.setChecked()
layout.addWidget(.streaming_checkbox)
layout.addStretch()
panel
() -> QWidget:
panel = QWidget()
layout = QVBoxLayout(panel)
scroll = QScrollArea()
scroll.setWidgetResizable()
scroll_content = QWidget()
scroll_layout = QVBoxLayout(scroll_content)
asr_result_group = QGroupBox()
asr_result_layout = QVBoxLayout()
.asr_result_text = QTextEdit()
.asr_result_text.setPlaceholderText()
.asr_result_text.setReadOnly()
asr_result_layout.addWidget(.asr_result_text)
.detected_language_label = QLabel()
.detected_language_label.setStyleSheet()
asr_result_layout.addWidget(.detected_language_label)
asr_result_group.setLayout(asr_result_layout)
scroll_layout.addWidget(asr_result_group)
ollama_result_group = QGroupBox()
ollama_result_layout = QVBoxLayout()
.ollama_result_text = QTextEdit()
.ollama_result_text.setPlaceholderText()
.ollama_result_text.setReadOnly()
.ollama_result_text.setStyleSheet()
ollama_result_layout.addWidget(.ollama_result_text)
ollama_result_group.setLayout(ollama_result_layout)
scroll_layout.addWidget(ollama_result_group)
.status_label = QLabel()
.status_label.setStyleSheet()
scroll_layout.addWidget(.status_label)
scroll_layout.addStretch()
scroll.setWidget(scroll_content)
layout.addWidget(scroll)
panel
():
.load_model_btn.setEnabled()
.recognize_btn.setEnabled()
.model_status_label.setText()
.model_status_label.setStyleSheet()
device_map = torch.cuda.is_available()
dtype = torch.cuda.is_available()
.load_model_worker = LoadModelWorker(MODEL_PATH, device_map, dtype)
.load_model_worker.progress.connect(.on_model_progress)
.load_model_worker.finished.connect(.on_model_loaded)
.load_model_worker.error.connect(.on_model_error)
.load_model_worker.start()
():
.model_status_label.setText(message)
.status_label.setText(message)
():
.asr_model = model
.load_model_btn.setEnabled()
.recognize_btn.setEnabled()
.model_status_label.setText()
.model_status_label.setStyleSheet()
.update_status()
()
():
.load_model_btn.setEnabled()
.recognize_btn.setEnabled()
.model_status_label.setText()
.model_status_label.setStyleSheet()
.status_label.setStyleSheet()
.update_status()
QMessageBox.critical(, , )
():
file_path, _ = QFileDialog.getOpenFileName(
, , ,
)
file_path:
.current_audio_path = file_path
.audio_path_label.setText()
.audio_path_label.setStyleSheet()
.update_status()
():
.asr_model :
QMessageBox.warning(, , )
.current_audio_path:
QMessageBox.warning(, , )
.set_ui_enabled()
.asr_progress.setVisible()
.asr_progress.setRange(, )
.asr_worker :
(.asr_worker, ):
.asr_worker.deleteLater()
.asr_worker =
:
language = .language_combo.currentData()
.asr_worker = ASRWorker(.current_audio_path, .asr_model, language)
(.asr_worker, ASRWorker):
TypeError()
.asr_worker.progress.connect(.on_asr_progress)
.asr_worker.finished.connect(.on_asr_finished)
.asr_worker.error.connect(.on_asr_error)
.asr_worker.start()
Exception e:
.asr_worker =
.asr_progress.setVisible()
.set_ui_enabled()
QMessageBox.critical(, , )
.update_status()
():
.update_status(message)
():
.asr_progress.setVisible()
.set_ui_enabled()
success:
.asr_result_text.setText(text)
.detected_language_label.setText()
.update_status()
:
.asr_result_text.clear()
.detected_language_label.clear()
():
.asr_progress.setVisible()
.set_ui_enabled()
QMessageBox.critical(, , error_msg)
.update_status()
():
.update_status()
.fetch_models_worker :
(.fetch_models_worker, ):
.fetch_models_worker.deleteLater()
.fetch_models_worker =
:
.fetch_models_worker = FetchModelsWorker(.ollama_api_url)
(.fetch_models_worker, FetchModelsWorker):
TypeError()
.fetch_models_worker.finished.connect(.on_models_fetched)
.fetch_models_worker.error.connect(.on_models_error)
.fetch_models_worker.start()
Exception e:
.fetch_models_worker =
QMessageBox.critical(, , )
.update_status()
():
.model_combo.clear()
.model_combo.addItem(, )
.model_combo.setEnabled()
.fetch_models_worker :
(.fetch_models_worker, ):
.fetch_models_worker.deleteLater()
.fetch_models_worker =
:
.fetch_models_worker = FetchModelsWorker(.ollama_api_url)
(.fetch_models_worker, FetchModelsWorker):
TypeError()
.fetch_models_worker.finished.connect(.on_models_fetched)
.fetch_models_worker.error.connect(.on_models_error)
.fetch_models_worker.start()
.update_status()
Exception e:
.fetch_models_worker =
.model_combo.clear()
.model_combo.addItem(, )
QMessageBox.critical(, , )
.update_status()
():
.model_combo.clear()
models:
.model_combo.addItem(, )
.update_status()
:
model models:
.model_combo.addItem(model, model)
.model_combo.setEnabled()
.update_status()
():
.model_combo.clear()
.model_combo.addItem(, )
.model_combo.setEnabled()
.update_status()
QMessageBox.warning(, , )
():
model_name = .model_combo.currentData()
model_name (model_name, ):
QMessageBox.warning(, , )
recognized_text = .asr_result_text.toPlainText()
system_prompt = .system_prompt.toPlainText()
recognized_text system_prompt:
QMessageBox.warning(, , )
.use_recognized_text.isChecked() recognized_text:
full_prompt =
:
full_prompt = system_prompt
.ollama_result_text.clear()
.set_ui_enabled()
.update_status()
.ollama_worker :
(.ollama_worker, ):
.ollama_worker.deleteLater()
.ollama_worker =
:
.ollama_worker = OllamaWorker(model_name, full_prompt, .ollama_api_url, .streaming_checkbox.isChecked())
(.ollama_worker, OllamaWorker):
TypeError()
.ollama_worker.finished.connect(.on_ollama_finished)
.ollama_worker.error.connect(.on_ollama_error)
.ollama_worker.streaming.connect(.on_ollama_streaming)
.ollama_worker.start()
Exception e:
.ollama_worker =
.set_ui_enabled()
QMessageBox.critical(, , )
.update_status()
():
.set_ui_enabled()
.update_status()
response:
.ollama_result_text.append(response)
():
.set_ui_enabled()
QMessageBox.critical(, , error_msg)
.update_status()
():
.ollama_result_text.moveCursor(QTextCursor.End)
.ollama_result_text.insertPlainText(text)
.ollama_result_text.ensureCursorVisible()
():
child .findChildren(QPushButton):
child.setEnabled(enabled)
.language_combo.setEnabled(enabled)
.model_combo.setEnabled(enabled .model_combo.count() > )
():
.status_label.setText(message)
():
.asr_worker .asr_worker.isRunning():
.asr_worker.terminate()
.asr_worker.wait()
.ollama_worker .ollama_worker.isRunning():
.ollama_worker.terminate()
.ollama_worker.wait()
.fetch_models_worker .fetch_models_worker.isRunning():
.fetch_models_worker.terminate()
.fetch_models_worker.wait()
.load_model_worker .load_model_worker.isRunning():
.load_model_worker.terminate()
.load_model_worker.wait()
event.accept()
():
app = QApplication(sys.argv)
app.setApplicationName()
app.setOrganizationName()
window = ASROllamaApp()
window.show()
sys.exit(app.exec_())
__name__ == :
main()

微信公众号「极客日志」,在微信中扫描左侧二维码关注。展示文案:极客日志 zeeklog
使用加密算法(如AES、TripleDES、Rabbit或RC4)加密和解密文本明文。 在线工具,加密/解密文本在线工具,online
生成新的随机RSA私钥和公钥pem证书。 在线工具,RSA密钥对生成器在线工具,online
基于 Mermaid.js 实时预览流程图、时序图等图表,支持源码编辑与即时渲染。 在线工具,Mermaid 预览与可视化编辑在线工具,online
解析常见 curl 参数并生成 fetch、axios、PHP curl 或 Python requests 示例代码。 在线工具,curl 转代码在线工具,online
将字符串编码和解码为其 Base64 格式表示形式即可。 在线工具,Base64 字符串编码/解码在线工具,online
将字符串、文件或图像转换为其 Base64 表示形式。 在线工具,Base64 文件转换器在线工具,online