""" Qwen3-ASR + Ollama GUI 应用程序 功能:音频识别 + Ollama模型调用(直接使用本地模型) """
import sys
import os
import json
import requests
import soundfile as sf
import io
import torch
from typing import List, Optional
from pathlib import Path
from PyQt5.QtWidgets import (
QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout,
QPushButton, QLabel, QTextEdit, QComboBox, QFileDialog, QGroupBox,
QProgressBar, QSplitter, QMessageBox, QCheckBox, QFormLayout,
QScrollArea, QFrame, QSpinBox
)
from PyQt5.QtCore import Qt, QThread, pyqtSignal, QTimer
from PyQt5.QtGui import QFont, QTextCursor, QColor
from qwen_asr import Qwen3ASRModel
MODEL_PATH = "."
OLLAMA_API_URL = "http://localhost:11434"
class ASRWorker(QThread):
"""语音识别工作线程 - 直接使用本地模型"""
finished = pyqtSignal(bool, str, str)
progress = pyqtSignal(str)
error = pyqtSignal(str)
def __init__(self, audio_path: str, model, language: Optional[str] = None):
super().__init__()
self.audio_path = audio_path
self.model = model
self.language = language
def run(self):
try:
self.progress.emit("正在读取音频文件...")
with open(self.audio_path, 'rb') as f:
audio_bytes = f.read()
audio, sr = sf.read(io.BytesIO(audio_bytes))
self.progress.emit("正在初始化模型...")
if self.model is None:
self.error.emit("模型未初始化")
return
self.progress.emit(f"正在识别语音(语言:{self.language or '自动检测'})...")
results = self.model.transcribe(
audio=(audio, sr),
language=self.language
)
if results and len(results) > 0:
result = results[0]
text = result.text
language = result.language
self.finished.emit(True, text, language)
else:
self.error.emit("未返回识别结果")
except Exception as e:
import traceback
error_details = traceback.format_exc()
print(f"ASR错误详情:\n{error_details}")
self.error.emit(f"处理失败:{str(e)}")
class OllamaWorker(QThread):
"""Ollama模型调用工作线程"""
finished = pyqtSignal(str)
error = pyqtSignal(str)
streaming = pyqtSignal(str)
def __init__(self, model: str, prompt: str, api_url: str, enable_streaming: bool = False):
super().__init__()
self.model = model
self.prompt = prompt
self.api_url = api_url
self.enable_streaming = enable_streaming
def run(self):
try:
if self.enable_streaming:
response = requests.post(
f"{self.api_url}/api/generate",
json={
"model": self.model,
"prompt": self.prompt,
"stream": True
},
stream=True,
timeout=300
)
for line in response.iter_lines():
if line:
try:
data = json.loads(line)
if 'response' in data:
self.streaming.emit(data['response'])
except json.JSONDecodeError:
continue
self.finished.emit("")
else:
response = requests.post(
f"{self.api_url}/api/generate",
json={
"model": self.model,
"prompt": self.prompt,
"stream": False
},
timeout=300
)
if response.status_code == 200:
result = response.json()
self.finished.emit(result.get('response', ''))
else:
self.error.emit(f"Ollama错误:HTTP {response.status_code}")
except Exception as e:
self.error.emit(f"调用失败:{str(e)}")
class FetchModelsWorker(QThread):
"""获取Ollama模型列表的线程"""
finished = pyqtSignal(list)
error = pyqtSignal(str)
def __init__(self, api_url: str):
super().__init__()
self.api_url = api_url
def run(self):
try:
response = requests.get(f"{self.api_url}/api/tags", timeout=10)
if response.status_code == 200:
result = response.json()
models = [m['name'] for m in result.get('models', [])]
self.finished.emit(models)
else:
self.error.emit(f"获取模型列表失败:HTTP {response.status_code}")
except Exception as e:
self.error.emit(f"连接失败:{str(e)}")
class LoadModelWorker(QThread):
"""加载Qwen3-ASR模型的线程"""
finished = pyqtSignal(object)
error = pyqtSignal(str)
progress = pyqtSignal(str)
def __init__(self, model_path: str, device_map: str, dtype: str):
super().__init__()
self.model_path = model_path
self.device_map = device_map
self.dtype = dtype
def run(self):
try:
self.progress.emit("正在加载Qwen3-ASR模型...")
self.progress.emit("这可能需要几分钟时间,请耐心等待...")
dtype_map = {
'float16': torch.float16,
'bfloat16': torch.bfloat16,
'float32': torch.float32
}
dtype = dtype_map.get(self.dtype, torch.float32)
model = Qwen3ASRModel.from_pretrained(
self.model_path,
dtype=dtype,
device_map=self.device_map,
max_inference_batch_size=32,
max_new_tokens=256,
)
self.progress.emit("模型加载完成!")
self.finished.emit(model)
except Exception as e:
import traceback
error_details = traceback.format_exc()
print(f"模型加载错误详情:\n{error_details}")
self.error.emit(f"模型加载失败:{str(e)}")
class ASROllamaApp(QMainWindow):
def __init__(self):
super().__init__()
self.current_audio_path = None
self.asr_model = None
self.asr_worker = None
self.ollama_worker = None
self.fetch_models_worker = None
self.load_model_worker = None
self.ollama_api_url = OLLAMA_API_URL
self.init_ui()
self.load_asr_model()
def init_ui(self):
"""初始化UI界面"""
self.setWindowTitle("Qwen3-ASR + Ollama 智能助手(本地模式)")
self.setGeometry(100, 100, 1200, 800)
main_widget = QWidget()
self.setCentralWidget(main_widget)
main_layout = QHBoxLayout(main_widget)
splitter = QSplitter(Qt.Horizontal)
main_layout.addWidget(splitter)
left_panel = self.create_left_panel()
splitter.addWidget(left_panel)
right_panel = self.create_right_panel()
splitter.addWidget(right_panel)
splitter.setStretchFactor(0, 1)
splitter.setStretchFactor(1, 2)
self.setStyleSheet("""
QMainWindow { background-color: #f0f0f0; }
QGroupBox { font-weight: bold; border: 2px solid #cccccc; border-radius: 5px; margin-top: 10px; padding-top: 10px; }
QGroupBox::title { subcontrol-origin: margin; left: 10px; padding: 0 3px; }
QPushButton { background-color: #4CAF50; color: white; border: none; padding: 8px 16px; border-radius: 4px; font-weight: bold; }
QPushButton:hover { background-color: #45a049; }
QPushButton:pressed { background-color: #3d8b40; }
QPushButton:disabled { background-color: #cccccc; }
QTextEdit { border: 2px solid #cccccc; border-radius: 5px; padding: 5px; background-color: white; }
QComboBox { border: 2px solid #cccccc; border-radius: 4px; padding: 5px; background-color: white; }
QLabel { font-size: 12px; color: #333333; }
QProgressBar { border: 2px solid #cccccc; border-radius: 5px; text-align: center; }
QProgressBar::chunk { background-color: #4CAF50; }
""")
def create_left_panel(self) -> QWidget:
"""创建左侧控制面板"""
panel = QWidget()
layout = QVBoxLayout(panel)
model_group = QGroupBox("[ASR] ASR模型状态")
model_layout = QVBoxLayout()
self.model_status_label = QLabel("模型未加载")
self.model_status_label.setStyleSheet("color: #ff9800; font-weight: bold; font-size: 14px;")
model_layout.addWidget(self.model_status_label)
self.load_model_btn = QPushButton("[刷新] 重新加载模型")
self.load_model_btn.clicked.connect(self.load_asr_model)
model_layout.addWidget(self.load_model_btn)
model_group.setLayout(model_layout)
layout.addWidget(model_group)
audio_group = QGroupBox("[音频] 音频文件")
audio_layout = QVBoxLayout()
self.audio_path_label = QLabel("未选择文件")
self.audio_path_label.setStyleSheet("color: #666666; font-style: italic;")
self.audio_path_label.setWordWrap(True)
audio_layout.addWidget(self.audio_path_label)
select_audio_btn = QPushButton("选择音频文件")
select_audio_btn.clicked.connect(self.select_audio_file)
audio_layout.addWidget(select_audio_btn)
audio_group.setLayout(audio_layout)
layout.addWidget(audio_group)
asr_group = QGroupBox("[识别] 语音识别设置")
asr_layout = QFormLayout()
self.language_combo = QComboBox()
self.language_combo.addItem("自动检测", None)
self.language_combo.addItem("中文", "Chinese")
self.language_combo.addItem("英语", "English")
self.language_combo.addItem("粤语", "Cantonese")
self.language_combo.addItem("日语", "Japanese")
self.language_combo.addItem("韩语", "Korean")
self.language_combo.addItem("法语", "French")
self.language_combo.addItem("德语", "German")
self.language_combo.addItem("西班牙语", "Spanish")
asr_layout.addRow("语言:", self.language_combo)
self.asr_progress = QProgressBar()
self.asr_progress.setVisible(False)
asr_layout.addRow("进度:", self.asr_progress)
asr_group.setLayout(asr_layout)
layout.addWidget(asr_group)
recognize_btn = QPushButton("[识别] 开始识别")
recognize_btn.setStyleSheet("""
QPushButton { background-color: #2196F3; color: white; font-size: 14px; padding: 12px; }
QPushButton:hover { background-color: #1976D2; }
QPushButton:disabled { background-color: #cccccc; }
""")
recognize_btn.clicked.connect(self.start_asr)
recognize_btn.setEnabled(False)
self.recognize_btn = recognize_btn
layout.addWidget(recognize_btn)
ollama_group = QGroupBox("[ASR] Ollama模型设置")
ollama_layout = QVBoxLayout()
self.refresh_models_btn = QPushButton("[刷新] 刷新模型列表")
self.refresh_models_btn.clicked.connect(self.refresh_models)
ollama_layout.addWidget(self.refresh_models_btn)
ollama_layout.addWidget(QLabel("选择模型:"))
self.model_combo = QComboBox()
self.model_combo.setEnabled(False)
ollama_layout.addWidget(self.model_combo)
ollama_group.setLayout(ollama_layout)
layout.addWidget(ollama_group)
prompt_group = QGroupBox("[提示] 提示词设置")
prompt_layout = QVBoxLayout()
prompt_layout.addWidget(QLabel("系统提示词:"))
self.system_prompt = QTextEdit()
self.system_prompt.setPlaceholderText("输入系统提示词...")
self.system_prompt.setMaximumHeight(100)
self.system_prompt.setText("你是一个专业的文本处理助手。请根据用户提供的文本内容,提供准确、有用的回复。")
prompt_layout.addWidget(self.system_prompt)
self.use_recognized_text = QCheckBox("将识别文本自动添加到提示词")
self.use_recognized_text.setChecked(True)
prompt_layout.addWidget(self.use_recognized_text)
prompt_group.setLayout(prompt_layout)
layout.addWidget(prompt_group)
generate_btn = QPushButton("[生成] 调用Ollama生成回复")
generate_btn.setStyleSheet("""
QPushButton { background-color: #FF9800; color: white; font-size: 14px; padding: 12px; }
QPushButton:hover { background-color: #F57C00; }
""")
generate_btn.clicked.connect(self.call_ollama)
layout.addWidget(generate_btn)
self.streaming_checkbox = QCheckBox("启用流式输出(实时显示)")
self.streaming_checkbox.setChecked(True)
layout.addWidget(self.streaming_checkbox)
layout.addStretch()
return panel
def create_right_panel(self) -> QWidget:
"""创建右侧结果面板"""
panel = QWidget()
layout = QVBoxLayout(panel)
scroll = QScrollArea()
scroll.setWidgetResizable(True)
scroll_content = QWidget()
scroll_layout = QVBoxLayout(scroll_content)
asr_result_group = QGroupBox("[结果] 语音识别结果")
asr_result_layout = QVBoxLayout()
self.asr_result_text = QTextEdit()
self.asr_result_text.setPlaceholderText("识别结果将显示在这里...")
self.asr_result_text.setReadOnly(True)
asr_result_layout.addWidget(self.asr_result_text)
self.detected_language_label = QLabel()
self.detected_language_label.setStyleSheet("color: #666666; font-size: 11px;")
asr_result_layout.addWidget(self.detected_language_label)
asr_result_group.setLayout(asr_result_layout)
scroll_layout.addWidget(asr_result_group)
ollama_result_group = QGroupBox("[回复] Ollama生成结果")
ollama_result_layout = QVBoxLayout()
self.ollama_result_text = QTextEdit()
self.ollama_result_text.setPlaceholderText("Ollama的回复将显示在这里...")
self.ollama_result_text.setReadOnly(True)
self.ollama_result_text.setStyleSheet("""
QTextEdit { background-color: #fffde7; }
""")
ollama_result_layout.addWidget(self.ollama_result_text)
ollama_result_group.setLayout(ollama_result_layout)
scroll_layout.addWidget(ollama_result_group)
self.status_label = QLabel("正在加载模型...")
self.status_label.setStyleSheet("""
QLabel { background-color: #ff9800; color: white; padding: 5px; border-radius: 3px; }
""")
scroll_layout.addWidget(self.status_label)
scroll_layout.addStretch()
scroll.setWidget(scroll_content)
layout.addWidget(scroll)
return panel
def load_asr_model(self):
"""加载Qwen3-ASR模型"""
self.load_model_btn.setEnabled(False)
self.recognize_btn.setEnabled(False)
self.model_status_label.setText("正在加载模型...")
self.model_status_label.setStyleSheet("color: #ff9800; font-weight: bold;")
device_map = "cuda:0" if torch.cuda.is_available() else "cpu"
dtype = "float32" if not torch.cuda.is_available() else "float16"
self.load_model_worker = LoadModelWorker(MODEL_PATH, device_map, dtype)
self.load_model_worker.progress.connect(self.on_model_progress)
self.load_model_worker.finished.connect(self.on_model_loaded)
self.load_model_worker.error.connect(self.on_model_error)
self.load_model_worker.start()
def on_model_progress(self, message: str):
"""模型加载进度"""
self.model_status_label.setText(message)
self.status_label.setText(message)
def on_model_loaded(self, model):
"""模型加载完成"""
self.asr_model = model
self.load_model_btn.setEnabled(True)
self.recognize_btn.setEnabled(True)
self.model_status_label.setText("[OK] 模型已加载")
self.model_status_label.setStyleSheet("color: #4CAF50; font-weight: bold;")
self.update_status("模型加载完成!")
print("Qwen3-ASR模型加载成功")
def on_model_error(self, error_msg: str):
"""模型加载错误"""
self.load_model_btn.setEnabled(True)
self.recognize_btn.setEnabled(False)
self.model_status_label.setText("[X] 模型加载失败")
self.model_status_label.setStyleSheet("color: #f44336; font-weight: bold;")
self.status_label.setStyleSheet("""
QLabel { background-color: #f44336; color: white; padding: 5px; border-radius: 3px; }
""")
self.update_status("模型加载失败")
QMessageBox.critical(self, "模型加载失败", f"无法加载Qwen3-ASR模型:\n\n{error_msg}\n\n请确保:\n1. 模型文件存在于正确位置\n2. 有足够的内存\n3. 已安装正确的依赖")
def select_audio_file(self):
"""选择音频文件"""
file_path, _ = QFileDialog.getOpenFileName(
self, "选择音频文件", "", "音频文件 (*.wav *.mp3 *.ogg *.flac *.m4a *.aac);;所有文件 (*.*)"
)
if file_path:
self.current_audio_path = file_path
self.audio_path_label.setText(f"[OK] {os.path.basename(file_path)}")
self.audio_path_label.setStyleSheet("color: #4CAF50; font-weight: bold;")
self.update_status(f"已选择:{os.path.basename(file_path)}")
def start_asr(self):
"""开始语音识别"""
if self.asr_model is None:
QMessageBox.warning(self, "警告", "模型未加载,请先加载模型!")
return
if not self.current_audio_path:
QMessageBox.warning(self, "警告", "请先选择音频文件!")
return
self.set_ui_enabled(False)
self.asr_progress.setVisible(True)
self.asr_progress.setRange(0, 0)
if self.asr_worker is not None:
if hasattr(self.asr_worker, 'deleteLater'):
self.asr_worker.deleteLater()
self.asr_worker = None
try:
language = self.language_combo.currentData()
self.asr_worker = ASRWorker(self.current_audio_path, self.asr_model, language)
if not isinstance(self.asr_worker, ASRWorker):
raise TypeError(f"Failed to create ASRWorker, got {type(self.asr_worker)}")
self.asr_worker.progress.connect(self.on_asr_progress)
self.asr_worker.finished.connect(self.on_asr_finished)
self.asr_worker.error.connect(self.on_asr_error)
self.asr_worker.start()
except Exception as e:
self.asr_worker = None
self.asr_progress.setVisible(False)
self.set_ui_enabled(True)
QMessageBox.critical(self, "错误", f"创建ASR工作线程失败:{str(e)}")
self.update_status("识别失败")
def on_asr_progress(self, message: str):
"""ASR进度更新"""
self.update_status(message)
def on_asr_finished(self, success: bool, text: str, language: str):
"""ASR完成"""
self.asr_progress.setVisible(False)
self.set_ui_enabled(True)
if success:
self.asr_result_text.setText(text)
self.detected_language_label.setText(f"检测语言:{language}")
self.update_status("识别完成!")
else:
self.asr_result_text.clear()
self.detected_language_label.clear()
def on_asr_error(self, error_msg: str):
"""ASR错误"""
self.asr_progress.setVisible(False)
self.set_ui_enabled(True)
QMessageBox.critical(self, "识别失败", error_msg)
self.update_status("识别失败")
def check_ollama_connection(self):
"""检查Ollama连接"""
self.update_status("正在检查Ollama连接...")
if self.fetch_models_worker is not None:
if hasattr(self.fetch_models_worker, 'deleteLater'):
self.fetch_models_worker.deleteLater()
self.fetch_models_worker = None
try:
self.fetch_models_worker = FetchModelsWorker(self.ollama_api_url)
if not isinstance(self.fetch_models_worker, FetchModelsWorker):
raise TypeError(f"Failed to create FetchModelsWorker, got {type(self.fetch_models_worker)}")
self.fetch_models_worker.finished.connect(self.on_models_fetched)
self.fetch_models_worker.error.connect(self.on_models_error)
self.fetch_models_worker.start()
except Exception as e:
self.fetch_models_worker = None
QMessageBox.critical(self, "错误", f"创建检查连接线程失败:{str(e)}")
self.update_status("检查失败")
def refresh_models(self):
"""刷新模型列表"""
self.model_combo.clear()
self.model_combo.addItem("正在加载...", None)
self.model_combo.setEnabled(False)
if self.fetch_models_worker is not None:
if hasattr(self.fetch_models_worker, 'deleteLater'):
self.fetch_models_worker.deleteLater()
self.fetch_models_worker = None
try:
self.fetch_models_worker = FetchModelsWorker(self.ollama_api_url)
if not isinstance(self.fetch_models_worker, FetchModelsWorker):
raise TypeError(f"Failed to create FetchModelsWorker, got {type(self.fetch_models_worker)}")
self.fetch_models_worker.finished.connect(self.on_models_fetched)
self.fetch_models_worker.error.connect(self.on_models_error)
self.fetch_models_worker.start()
self.update_status("正在刷新模型列表...")
except Exception as e:
self.fetch_models_worker = None
self.model_combo.clear()
self.model_combo.addItem("加载失败", None)
QMessageBox.critical(self, "错误", f"创建模型列表线程失败:{str(e)}")
self.update_status("加载失败")
def on_models_fetched(self, models: List[str]):
"""模型列表获取完成"""
self.model_combo.clear()
if not models:
self.model_combo.addItem("未找到模型", None)
self.update_status("未找到Ollama模型")
else:
for model in models:
self.model_combo.addItem(model, model)
self.model_combo.setEnabled(True)
self.update_status(f"已加载 {len(models)} 个模型")
def on_models_error(self, error_msg: str):
"""模型列表获取错误"""
self.model_combo.clear()
self.model_combo.addItem("连接失败", None)
self.model_combo.setEnabled(False)
self.update_status("Ollama连接失败")
QMessageBox.warning(self, "连接失败", f"无法连接到Ollama: {error_msg}\n请确保Ollama服务正在运行。")
def call_ollama(self):
"""调用Ollama生成回复"""
model_name = self.model_combo.currentData()
if model_name is None or not isinstance(model_name, str):
QMessageBox.warning(self, "警告", "请先选择Ollama模型!")
return
recognized_text = self.asr_result_text.toPlainText()
system_prompt = self.system_prompt.toPlainText()
if not recognized_text and not system_prompt:
QMessageBox.warning(self, "警告", "请先进行语音识别或输入提示词!")
return
if self.use_recognized_text.isChecked() and recognized_text:
full_prompt = f"识别的文本内容:\n{recognized_text}\n\n{system_prompt}"
else:
full_prompt = system_prompt
self.ollama_result_text.clear()
self.set_ui_enabled(False)
self.update_status("正在调用Ollama...")
if self.ollama_worker is not None:
if hasattr(self.ollama_worker, 'deleteLater'):
self.ollama_worker.deleteLater()
self.ollama_worker = None
try:
self.ollama_worker = OllamaWorker(model_name, full_prompt, self.ollama_api_url, self.streaming_checkbox.isChecked())
if not isinstance(self.ollama_worker, OllamaWorker):
raise TypeError(f"Failed to create OllamaWorker, got {type(self.ollama_worker)}")
self.ollama_worker.finished.connect(self.on_ollama_finished)
self.ollama_worker.error.connect(self.on_ollama_error)
self.ollama_worker.streaming.connect(self.on_ollama_streaming)
self.ollama_worker.start()
except Exception as e:
self.ollama_worker = None
self.set_ui_enabled(True)
QMessageBox.critical(self, "错误", f"创建Ollama工作线程失败:{str(e)}")
self.update_status("调用失败")
def on_ollama_finished(self, response: str):
"""Ollama完成"""
self.set_ui_enabled(True)
self.update_status("生成完成!")
if response:
self.ollama_result_text.append(response)
def on_ollama_error(self, error_msg: str):
"""Ollama错误"""
self.set_ui_enabled(True)
QMessageBox.critical(self, "调用失败", error_msg)
self.update_status("调用失败")
def on_ollama_streaming(self, text: str):
"""Ollama流式输出"""
self.ollama_result_text.moveCursor(QTextCursor.End)
self.ollama_result_text.insertPlainText(text)
self.ollama_result_text.ensureCursorVisible()
def set_ui_enabled(self, enabled: bool):
"""设置UI可用状态"""
for child in self.findChildren(QPushButton):
child.setEnabled(enabled)
self.language_combo.setEnabled(enabled)
self.model_combo.setEnabled(enabled and self.model_combo.count() > 1)
def update_status(self, message: str):
"""更新状态栏"""
self.status_label.setText(message)
def closeEvent(self, event):
"""窗口关闭事件"""
if self.asr_worker and self.asr_worker.isRunning():
self.asr_worker.terminate()
self.asr_worker.wait()
if self.ollama_worker and self.ollama_worker.isRunning():
self.ollama_worker.terminate()
self.ollama_worker.wait()
if self.fetch_models_worker and self.fetch_models_worker.isRunning():
self.fetch_models_worker.terminate()
self.fetch_models_worker.wait()
if self.load_model_worker and self.load_model_worker.isRunning():
self.load_model_worker.terminate()
self.load_model_worker.wait()
event.accept()
def main():
app = QApplication(sys.argv)
app.setApplicationName("Qwen3-ASR + Ollama")
app.setOrganizationName("ASR-Ollama")
window = ASROllamaApp()
window.show()
sys.exit(app.exec_())
if __name__ == "__main__":
main()