Phi-3-mini-128k-instruct Chainlit 插件开发:思维链可视化与 Token 统计
为基于 vLLM 部署的 Phi-3-mini-128k-instruct 模型开发两个实用的 Chainlit 插件:思维链可视化面板和Token 用量统计面板。通过这两个插件,不仅能'看见'模型的推理路径,还能精确掌握每次对话的成本,让 AI 对话从'黑盒'走向'透明'。
介绍如何为基于 vLLM 部署的 Phi-3-mini-128k-instruct 模型开发 Chainlit 插件。主要实现两个功能:思维链可视化面板,用于展示模型的推理步骤;Token 用量统计面板,用于实时监控对话成本与资源消耗。内容包括环境配置、插件结构搭建、核心逻辑解析(正则匹配思维链、tiktoken 计数)、UI 组件集成及配置管理。通过插件化设计,提升 AI 对话的可解释性与透明度,便于成本控制与性能优化。
为基于 vLLM 部署的 Phi-3-mini-128k-instruct 模型开发两个实用的 Chainlit 插件:思维链可视化面板和Token 用量统计面板。通过这两个插件,不仅能'看见'模型的推理路径,还能精确掌握每次对话的成本,让 AI 对话从'黑盒'走向'透明'。
在开始插件开发之前,确保基础环境已经就绪。已成功部署 Phi-3-mini-128k-instruct 模型,并且能够通过 Chainlit 前端正常调用。
打开终端,检查模型服务日志,确认部署成功。
cat /workspace/llm.log
如果看到类似下面的输出,说明模型服务正在正常运行:
INFO 04-10 14:30:15 llm_engine.py:73] Initializing an LLM engine with config: model="/workspace/models/Phi-3-mini-128k-instruct", tokenizer="/workspace/models/Phi-3-mini-128k-instruct", ...
INFO 04-10 14:30:20 llm_engine.py:210] # GPU blocks: 496, # CPU blocks: 512
INFO 04-10 14:30:22 model_runner.py:96] Loading model weights took 4.85 GB
INFO 04-10 14:30:25 llm_engine.py:287] LLM engine is ready.
打开 Chainlit 前端界面,进行简单的提问测试,确保基础调用流程畅通无阻。
输入一个问题,比如'请用中文解释什么是机器学习',应该能正常收到模型的回复。这个基础功能是我们后续插件开发的基石。
在动手写代码之前,先搞清楚两个关键问题:思维链可视化和Token 统计到底要做什么,以及 Chainlit 插件是怎么工作的。
大语言模型在生成回答时,内部其实有一个复杂的推理过程。虽然我们无法直接看到神经元如何激活,但可以通过一些技术手段,让模型的'思考步骤'展现出来。
实现思路:
每次与 AI 对话都会消耗计算资源,而 Token 是衡量这种消耗的基本单位。了解 Token 用量,对于成本控制、性能优化都至关重要。
需要统计的数据:
Chainlit 提供了灵活的插件系统,允许在不修改核心代码的情况下扩展界面功能。插件主要通过以下方式工作:
这个插件会在模型生成回答时,自动分析并展示其推理过程。
在 Chainlit 项目目录中创建插件文件夹和文件。
mkdir -p plugins/thought_chain
touch plugins/thought_chain/__init__.py
touch plugins/thought_chain/visualizer.py
touch plugins/thought_chain/utils.py
编辑 plugins/thought_chain/utils.py,添加思维链解析的核心逻辑。
import re
from typing import List, Dict, Any
import json
class ThoughtChainParser:
"""思维链解析器,用于从模型输出中提取推理步骤"""
def __init__(self):
# 定义常见的思维链提示模式
self.patterns = {
'step_by_step': r'(?:步骤|Step)\s*\d+[::]\s*(.*?)(?=(?:步骤|Step)\s*\d+[::]|$)',
'reasoning': r'(?:思考 | 推理|Reasoning)[::]\s*(.*?)(?=(?:因此 | 所以|Thus|Therefore)|$)',
'bullet_points': r'[•\-*]\s*(.*?)(?=\n[•\-*]|\n\n|$)',
'numbered_list': r'\d+\.\s*(.*?)(?=\n\d+\.|\n\n|$)'
}
def parse(self, text: str) -> List[Dict[str, Any]]:
"""
从文本中解析思维链
Args:
text: 模型生成的文本
Returns:
思维步骤列表,每个步骤包含内容和类型
"""
steps = []
# 尝试不同的解析模式
for pattern_name, pattern in self.patterns.items():
matches = re.findall(pattern, text, re.DOTALL)
if matches:
for i, match in enumerate(matches):
step_text = match.strip()
if step_text and len(step_text) > 10:
steps.append({
'step': len(steps) + 1,
'content': step_text,
'type': pattern_name,
'confidence': 0.8
})
# 如果没有找到结构化步骤,尝试按句子分割
if not steps:
sentences = re.split(r'[。!?.!?]\s*', text)
for i, sentence in enumerate(sentences):
sentence = sentence.strip()
if sentence and len(sentence) > 15:
steps.append({
'step': i + 1,
'content': sentence,
'type': 'sentence',
'confidence': 0.5
})
return steps
def extract_final_answer(self, text: str, steps: List[Dict]) -> str:
"""
从文本中提取最终答案
Args:
text: 完整文本
steps: 解析出的思维步骤
Returns:
最终答案文本
"""
if not steps:
return text
last_step_content = steps[-1]['content']
last_step_pos = text.rfind(last_step_content)
if last_step_pos != -1:
answer_start = last_step_pos + len(last_step_content)
final_answer = text[answer_start:].strip()
prefixes = ['因此', '所以', '综上所述', '答案是', 'Answer:', 'Thus', 'Therefore']
for prefix in prefixes:
if final_answer.startswith(prefix):
final_answer = final_answer[len(prefix):].strip()
return final_answer if final_answer else "(答案已包含在推理过程中)"
return text
编辑 plugins/thought_chain/visualizer.py,实现 Chainlit 的可视化组件。
import chainlit as cl
from typing import List, Dict, Any
import json
from .utils import ThoughtChainParser
class ThoughtChainVisualizer:
"""思维链可视化组件"""
def __init__(self):
self.parser = ThoughtChainParser()
async def visualize(self, message: cl.Message):
"""
可视化消息中的思维链
Args:
message: Chainlit 消息对象
"""
text = message.content
steps = self.parser.parse(text)
final_answer = self.parser.extract_final_answer(text, steps)
if steps:
await self._create_thought_chain_panel(steps, final_answer)
else:
await self._create_simple_analysis(text)
async def _create_thought_chain_panel(self, steps: List[Dict], final_answer: str):
"""创建思维链可视化面板"""
elements = []
elements.append(
cl.Text(
name="thought_chain_title",
content="## 🤔 模型思考过程",
display="side"
)
)
for step in steps:
step_content = f"**步骤 {step['step']}** ({step['type']})\n\n{step['content']}"
elements.append(
cl.Text(
name=f"step_{step['step']}",
content=step_content,
display="side"
)
)
if final_answer:
elements.append(
cl.Text(
name="final_answer",
content=f"## ✅ 最终答案\n\n{final_answer}",
display="side"
)
)
analysis = self._create_analysis_summary(steps)
elements.append(
cl.Text(
name="analysis_summary",
content=f"## 📊 分析摘要\n\n{analysis}",
display="side"
)
)
for element in elements:
await element.send()
async def _create_simple_analysis(self, text: str):
"""创建简单分析面板"""
analysis = self._analyze_text_structure(text)
await cl.Text(
name="simple_analysis",
content=f"## 📝 回答结构分析\n\n{analysis}",
display="side"
).send()
def _create_analysis_summary(self, steps: List[Dict]) -> str:
"""创建分析摘要"""
total_steps = len(steps)
step_types = {}
for step in steps:
step_type = step['type']
step_types[step_type] = step_types.get(step_type, 0) + 1
summary = f"- **总推理步骤**: {total_steps} 步\n"
for step_type, count in step_types.items():
type_name = {
'step_by_step': '分步推理',
'reasoning': '逻辑推理',
'bullet_points': '要点列举',
'numbered_list': '编号列表',
'sentence': '句子分析'
}.get(step_type, step_type)
summary += f"- **{type_name}**: {count} 步\n"
avg_confidence = sum(step['confidence'] for step in steps) / total_steps
summary += f"- **推理清晰度**: {avg_confidence:.1%}\n"
return summary
def _analyze_text_structure(self, text: str) -> str:
"""分析文本结构"""
sentences = [s.strip() for s in re.split(r'[。!?.!?]\s*', text) if s.strip()]
words = len(''.join(text.split()))
analysis = f"- **总句子数**: {len(sentences)}\n"
analysis += f"- **总字数**: {words} 字\n"
analysis += f"- **平均句长**: {words/len(sentences):.1f} 字/句\n"
if any(keyword in text.lower() for keyword in ['首先', '其次', '然后', '最后']):
analysis += "- **文本类型**: 顺序说明\n"
elif any(keyword in text.lower() for keyword in ['因为', '所以', '因此', '由于']):
analysis += "- **文本类型**: 因果论证\n"
elif text.count('\n') > 3 or '•' in text or '-' in text:
analysis += "- **文本类型**: 列表说明\n"
else:
analysis += "- **文本类型**: 一般叙述\n"
return analysis
编辑主应用文件 app.py。
import chainlit as cl
from chainlit import run_sync
from plugins.thought_chain.visualizer import ThoughtChainVisualizer
import asyncio
thought_visualizer = ThoughtChainVisualizer()
@cl.on_message
async def main(message: cl.Message):
"""处理用户消息"""
await cl.Message(
content=f"用户提问:{message.content}",
author="User"
).send()
simulated_response = "让我思考一下这个问题。首先,我需要理解什么是机器学习。机器学习是人工智能的一个分支..."
msg = cl.Message(content="")
await msg.send()
for chunk in simulated_response.split():
await msg.stream_token(chunk + " ")
await asyncio.sleep(0.05)
msg.content = simulated_response
await msg.update()
await thought_visualizer.visualize(msg)
@cl.on_chat_start
async def start_chat():
welcome_msg = cl.Message(
content="欢迎使用 Phi-3-mini 智能助手!我已启用思维链可视化功能。",
author="Assistant"
)
await welcome_msg.send()
await cl.Sidebar(
name="thought_chain_sidebar",
content="## 思维链可视化面板\n\n模型的思考过程将在这里显示。",
size="large"
).send()
if __name__ == "__main__":
cl.run()
mkdir -p plugins/token_stats
touch plugins/token_stats/__init__.py
touch plugins/token_stats/counter.py
touch plugins/token_stats/panel.py
编辑 plugins/token_stats/counter.py。
import tiktoken
from typing import Dict, Any, Optional
import time
from dataclasses import dataclass
from datetime import datetime
@dataclass
class TokenStats:
input_tokens: int = 0
output_tokens: int = 0
total_tokens: int = 0
start_time: Optional[float] = None
end_time: Optional[float] = None
cost_estimate: float = 0.0
@property
def duration(self) -> float:
if self.start_time and self.end_time:
return self.end_time - self.start_time
return 0.0
@property
def tokens_per_second(self) -> float:
if self.duration > 0 and self.output_tokens > 0:
return self.output_tokens / self.duration
return 0.0
class TokenCounter:
def __init__(self, model_name: str = "gpt-3.5-turbo"):
try:
self.encoder = tiktoken.encoding_for_model(model_name)
except:
self.encoder = tiktoken.get_encoding("cl100k_base")
self.pricing = {
"input": 0.0015 / 1000,
"output": 0.0020 / 1000
}
def count_tokens(self, text: str) -> int:
if not text:
return 0
try:
tokens = self.encoder.encode(text)
return len(tokens)
except:
chinese_chars = sum(1 for c in text if '\u4e00' <= c <= '\u9fff')
english_chars = len(text) - chinese_chars
return int(chinese_chars / 2 + english_chars / 4)
def calculate_cost(self, input_tokens: int, output_tokens: int) -> float:
input_cost = input_tokens * self.pricing["input"]
output_cost = output_tokens * self.pricing["output"]
return input_cost + output_cost
def create_stats(self, input_text: str, output_text: str, start_time: Optional[float] = None, end_time: Optional[float] = None) -> TokenStats:
input_tokens = self.count_tokens(input_text)
output_tokens = self.count_tokens(output_text)
total_tokens = input_tokens + output_tokens
stats = TokenStats(
input_tokens=input_tokens,
output_tokens=output_tokens,
total_tokens=total_tokens,
start_time=start_time,
end_time=end_time or time.time(),
cost_estimate=self.calculate_cost(input_tokens, output_tokens)
)
return stats
def format_stats(self, stats: TokenStats) -> Dict[str, Any]:
return {
"输入 Token 数": f"{stats.input_tokens:,}",
"输出 Token 数": f"{stats.output_tokens:,}",
"总 Token 数": f"{stats.total_tokens:,}",
"处理时间": f"{stats.duration:.2f}秒",
"生成速度": f"{stats.tokens_per_second:.1f} Token/秒",
"预估成本": f"${stats.cost_estimate:.6f}",
"时间戳": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
编辑 plugins/token_stats/panel.py。
import chainlit as cl
from typing import Dict, Any, List
import json
from datetime import datetime
from .counter import TokenCounter, TokenStats
class TokenStatsPanel:
def __init__(self):
self.counter = TokenCounter()
self.session_stats: List[Dict[str, Any]] = []
async def update_panel(self, input_text: str, output_text: str, start_time: float, end_time: float):
stats = self.counter.create_stats(
input_text=input_text,
output_text=output_text,
start_time=start_time,
end_time=end_time
)
formatted_stats = self.counter.format_stats(stats)
self.session_stats.append({
"timestamp": datetime.now().isoformat(),
"stats": formatted_stats,
"input_preview": input_text[:100] + ("..." if len(input_text) > 100 else ""),
"output_preview": output_text[:100] + ("..." if len(output_text) > 100 else "")
})
await self._render_panel(formatted_stats)
await self._update_history_panel()
async def _render_panel(self, stats: Dict[str, Any]):
stats_content = "## 📊 本次对话统计\n\n"
for key, value in stats.items():
if key != "时间戳":
stats_content += f"**{key}**: {value}\n\n"
total_tokens = int(stats["总 Token 数"].replace(",", ""))
max_tokens = 128000
if total_tokens > 0:
percentage = min(100, (total_tokens / max_tokens) * 100)
bar_length = 20
filled = int(bar_length * percentage / 100)
bar = "█" * filled + "░" * (bar_length - filled)
stats_content += f"**上下文使用率**: {percentage:.1f}%\n"
stats_content += f"`[{bar}]`\n\n"
stats_content += f"*统计时间:{stats.get('时间戳', 'N/A')}*"
await cl.Sidebar(
name="token_stats",
content=stats_content,
size="medium"
).send()
async def _update_history_panel(self):
if len(self.session_stats) <= 1:
return
history_content = "## 📈 历史统计\n\n"
recent_stats = self.session_stats[-5:]
for i, record in enumerate(recent_stats):
history_content += f"### 对话 {len(self.session_stats) - len(recent_stats) + i + 1}\n"
history_content += f"**时间**: {record['timestamp'][11:19]}\n"
history_content += f"**输入**: {record['input_preview']}\n"
history_content += f"**输出**: {record['output_preview']}\n"
history_content += f"**总 Token**: {record['stats']['总 Token 数']}\n"
history_content += "---\n\n"
total_input = sum(int(s['stats']['输入 Token 数'].replace(",", "")) for s in self.session_stats)
total_output = sum(int(s['stats']['输出 Token 数'].replace(",", "")) for s in self.session_stats)
total_cost = sum(float(s['stats']['预估成本'].replace("$", "")) for s in self.session_stats)
history_content += "### 累计统计\n"
history_content += f"**总对话数**: {len(self.session_stats)}\n"
history_content += f"**累计输入 Token**: {total_input:,}\n"
history_content += f"**累计输出 Token**: {total_output:,}\n"
history_content += f"**累计预估成本**: ${total_cost:.6f}\n"
await cl.Sidebar(
name="token_history",
content=history_content,
size="large"
).send()
async def show_initial_panel(self):
initial_content = "## 📊 Token 用量统计面板\n本面板将实时显示 Token 消耗与成本统计。"
await cl.Sidebar(
name="token_stats",
content=initial_content,
size="medium"
).send()
更新 app.py。
import chainlit as cl
from chainlit import run_sync
from plugins.thought_chain.visualizer import ThoughtChainVisualizer
from plugins.token_stats.panel import TokenStatsPanel
import asyncio
import time
thought_visualizer = ThoughtChainVisualizer()
token_panel = TokenStatsPanel()
@cl.on_message
async def main(message: cl.Message):
start_time = time.time()
user_msg = cl.Message(
content=f"用户提问:{message.content}",
author="User"
)
await user_msg.send()
simulated_response = "这是一个模拟的 Phi-3 模型回复。"
msg = cl.Message(content="")
await msg.send()
for chunk in simulated_response.split():
await msg.stream_token(chunk + " ")
await asyncio.sleep(0.05)
msg.content = simulated_response
await msg.update()
end_time = time.time()
await token_panel.update_panel(
input_text=message.content,
output_text=simulated_response,
start_time=start_time,
end_time=end_time
)
await thought_visualizer.visualize(msg)
@cl.on_chat_start
async def start_chat():
welcome_msg = cl.Message(
content="欢迎使用 Phi-3-mini 智能助手!已启用思维链可视化与 Token 统计。",
author="Assistant"
)
await welcome_msg.send()
await token_panel.show_initial_panel()
if __name__ == "__main__":
cl.run()
创建 config.py。
# config.py
import os
from dataclasses import dataclass
from typing import Optional
@dataclass
class PluginConfig:
thought_chain_enabled: bool = True
thought_chain_min_confidence: float = 0.3
thought_chain_max_steps: int = 10
token_stats_enabled: bool = True
token_cost_per_input: float = 0.0015 / 1000
token_cost_per_output: float = 0.0020 / 1000
show_cost_estimation: bool = True
sidebar_position: str = "right"
auto_collapse: bool = False
@classmethod
def from_env(cls):
return cls(
thought_chain_enabled=os.getenv("THOUGHT_CHAIN_ENABLED", "true").lower() == "true",
thought_chain_min_confidence=float(os.getenv("THOUGHT_CHAIN_MIN_CONFIDENCE", "0.3")),
thought_chain_max_steps=int(os.getenv("THOUGHT_CHAIN_MAX_STEPS", "10")),
token_stats_enabled=os.getenv("TOKEN_STATS_ENABLED", "true").lower() == "true",
token_cost_per_input=float(os.getenv("TOKEN_COST_INPUT", "0.0000015")),
token_cost_per_output=float(os.getenv("TOKEN_COST_OUTPUT", "0.0000020")),
show_cost_estimation=os.getenv("SHOW_COST_ESTIMATION", "true").lower() == "true",
sidebar_position=os.getenv("SIDEBAR_POSITION", "right"),
auto_collapse=os.getenv("AUTO_COLLAPSE", "false").lower() == "true"
)
config = PluginConfig.from_env()
创建 plugins/settings/panel.py。
# plugins/settings/panel.py
import chainlit as cl
from typing import Dict, Any
import json
class SettingsPanel:
def __init__(self, config):
self.config = config
async def show_settings(self):
settings_elements = [
cl.Checkbox(name="thought_chain_enabled", label="启用思维链可视化", initial=self.config.thought_chain_enabled),
cl.Slider(name="thought_chain_confidence", label="置信度阈值", initial=self.config.thought_chain_min_confidence, min=0, max=1, step=0.1),
cl.NumberInput(name="thought_chain_max_steps", label="最大显示步骤", initial=self.config.thought_chain_max_steps, min=1, max=20),
cl.Divider(),
cl.Checkbox(name="token_stats_enabled", label="启用 Token 统计", initial=self.config.token_stats_enabled),
cl.Button(name="save_settings", label="保存设置", variant="primary")
]
settings_content = "## ⚙️ 插件设置\n支持动态调整可视化与统计参数。"
await cl.Sidebar(
name="settings_panel",
content=settings_content,
elements=settings_elements,
size="medium"
).send()
async def handle_settings_change(self, settings: Dict[str, Any]):
if "thought_chain_enabled" in settings:
self.config.thought_chain_enabled = settings["thought_chain_enabled"]
if "thought_chain_confidence" in settings:
self.config.thought_chain_min_confidence = settings["thought_chain_confidence"]
await cl.Message(
content="✅ 设置已更新!",
author="System"
).send()
phi3-chainlit-enhanced/
├── app.py
├── config.py
├── requirements.txt
├── plugins/
│ ├── thought_chain/
│ │ ├── visualizer.py
│ │ └── utils.py
│ ├── token_stats/
│ │ ├── counter.py
│ │ └── panel.py
│ └── settings/
│ └── panel.py
└── README.md
pip install -r requirements.txt
chainlit run app.py --port 7860
在实际部署中,需将模拟函数替换为真实的 vLLM API 调用。
import requests
import json
async def call_phi3_model_real(prompt: str) -> str:
url = "http://localhost:8000/v1/completions"
headers = {"Content-Type": "application/json"}
data = {
"model": "Phi-3-mini-128k-instruct",
"prompt": prompt,
"max_tokens": 1024,
"temperature": 0.7,
"stream": True
}
try:
response = requests.post(url, headers=headers, json=data, stream=True)
full_response = ""
for line in response.iter_lines():
if line:
line = line.decode('utf-8')
if line.startswith('data: '):
json_str = line[6:]
if json_str != '[DONE]':
try:
chunk = json.loads(json_str)
token = chunk['choices'][0]['text']
full_response += token
except json.JSONDecodeError:
continue
return full_response
except Exception as e:
return f"调用模型时出错:{str(e)}"
本文介绍了为 Phi-3-mini-128k-instruct 模型开发 Chainlit 插件的方法,实现了思维链可视化和 Token 用量统计功能。通过插件化设计,提升了 AI 对话的可解释性与透明度,便于成本控制与性能优化。

微信公众号「极客日志」,在微信中扫描左侧二维码关注。展示文案:极客日志 zeeklog
使用加密算法(如AES、TripleDES、Rabbit或RC4)加密和解密文本明文。 在线工具,加密/解密文本在线工具,online
生成新的随机RSA私钥和公钥pem证书。 在线工具,RSA密钥对生成器在线工具,online
基于 Mermaid.js 实时预览流程图、时序图等图表,支持源码编辑与即时渲染。 在线工具,Mermaid 预览与可视化编辑在线工具,online
解析常见 curl 参数并生成 fetch、axios、PHP curl 或 Python requests 示例代码。 在线工具,curl 转代码在线工具,online
将字符串编码和解码为其 Base64 格式表示形式即可。 在线工具,Base64 字符串编码/解码在线工具,online
将字符串、文件或图像转换为其 Base64 表示形式。 在线工具,Base64 文件转换器在线工具,online