WebResearcher 迭代式深度研究智能体架构与使用指南

WebResearcher 迭代式深度研究智能体架构与使用指南 | 极客日志

工具	描述	使用场景
`search`	通过 Serper API 的 Google 搜索	通用网页信息
`google_scholar`	学术论文搜索	科研文献查询
`visit`	网页内容提取	深度内容分析
`python`	沙盒代码执行	数据分析、计算
`parse_file`	多格式文件解析器	文档处理

pip install webresearcher

# 启动 WebUI 服务
cd webui
python3 app.py
# 访问 http://localhost:8000

# 设置 API 密钥
export LLM_API_KEY="your_key"
export SERPER_API_KEY="your_key"
# 运行研究查询
webresearcher "刘翔破纪录时候是多少岁？"

import asyncio
from webresearcher import WebResearcherAgent

# 配置
llm_config = {
    "model": "gpt-4o",
    "api_key": "your-api-key",
    # 可选，默认从环境变量 LLM_API_KEY 读取
    "base_url": "https://api.openai.com/v1",
    # 可选，默认从环境变量 LLM_BASE_URL 读取
    "generate_cfg": {"temperature": 0.6}
}

# 创建 Agent（也可以通过参数直接传入 api_key 和 base_url）
agent = WebResearcherAgent(
    llm_config=llm_config,
    function_list=["search", "google_scholar", "python"],
)

# 开始研究
async def main():
    result = await agent.run("您的研究问题")
    print(result['prediction'])

asyncio.run(main())

import asyncio
from webresearcher.react_agent import ReactAgent

llm_config = {
    "model": "gpt-4o",
    "api_key": "your-api-key",
    # 可选，默认从环境变量 LLM_API_KEY 读取
    "base_url": "https://api.openai.com/v1",
    # 可选，默认从环境变量 LLM_BASE_URL 读取
    "generate_cfg": {"temperature": 0.6}
}

agent = ReactAgent(
    llm_config=llm_config,
    function_list=["search", "google_scholar", "visit", "python"],
)

async def main():
    result = await agent.run("2024 年巴黎的人口是多少？请给出平方根。")
    # 返回结构包含：question / prediction / termination / trajectory
    print(result["prediction"]) # 始终为非空字符串

asyncio.run(main())

webresearcher "复杂问题" --use-tts --num-agents 3

from webresearcher import TestTimeScalingAgent

agent = TestTimeScalingAgent(llm_config, function_list)
result = await agent.run("复杂问题", num_parallel_agents=3)

from webresearcher import BaseTool, WebResearcherAgent, TOOL_MAP

class MyCustomTool(BaseTool):
    name = "my_tool"
    description = "工具功能描述"
    parameters = {"type": "object", "properties": {...}}

    def call(self, params, **kwargs):
        # 您的工具逻辑
        return "结果"

# 注册并使用
TOOL_MAP['my_tool'] = MyCustomTool()
agent = WebResearcherAgent(function_list=["my_tool", "search"])

from webresearcher import WebResearcherAgent

questions = ["问题 1", "问题 2", "问题 3"]
agent = WebResearcherAgent()
for question in questions:
    result = await agent.run(question)
    print(f"Q: {question}\nA: {result['prediction']}\n")

# 配置沙箱端点
export SANDBOX_FUSION_ENDPOINTS="http://your-sandbox-endpoint.com"

from webresearcher import PythonInterpreter

# 如果配置了沙箱则使用沙箱，否则降级到本地执行
interpreter = PythonInterpreter()
result = interpreter.call({'code': 'print("Hello, World!")'})

# 运行前设置日志级别
export WEBRESEARCHER_LOG_LEVEL=DEBUG
# 选项：DEBUG, INFO, WARNING, ERROR, CRITICAL
webresearcher "你的问题"

from webresearcher import set_log_level, add_file_logger, WebResearcherAgent

# 设置控制台日志级别
set_log_level("WARNING") # 只显示警告和错误
# 添加文件日志，支持自动轮转
add_file_logger("research.log", level="DEBUG")

# 现在执行研究
agent = WebResearcherAgent()
result = await agent.run("你的问题")

# 必需
LLM_API_KEY=...
# LLM API 密钥 (OpenAI/DeepSeek 等)
SERPER_API_KEY=...
# Serper API（Google 搜索）
# 可选
LLM_BASE_URL=https://...
# 自定义 LLM 端点，或 DeepSeek base url
LLM_MODEL_NAME=gpt-4o
# 默认模型名称
JINA_API_KEY=...
# Jina AI（网页抓取）
SANDBOX_FUSION_ENDPOINTS=...
# 代码执行沙盒
MAX_LLM_CALL_PER_RUN=50
# 每次研究的最大迭代次数
FILE_DIR=./files
# 文件存储目录

llm_config = {
    "model": "deepseek-v3.1", # 或：o3-mini, gpt-4-turbo 等
    "api_key": "your-api-key",
    # 可选，默认从环境变量 LLM_API_KEY 读取
    "base_url": "https://api.openai.com/v1",
    # 可选，默认从环境变量 LLM_BASE_URL 读取
    "generate_cfg": {
        "temperature": 0.6, # 采样温度 (0.0-2.0)
        "top_p": 0.95, # 核采样
        "presence_penalty": 1.1, # 重复惩罚
        "model_thinking_type": "enabled" # enabled|disabled|auto, 如果不支持 thinking，则不设置
    },
    "max_input_tokens": 32000, # 上下文窗口限制
    "llm_timeout": 300.0, # LLM API 超时（秒）
    "agent_timeout": 600.0, # Agent 总超时（秒）
}

import asyncio
from webresearcher import WebWeaverAgent

async def main():
    # 配置 LLM
    llm_config = {
        "model": "gpt-4o",
        "generate_cfg": {
            "temperature": 0.1, # 低温度用于事实性研究
            "top_p": 0.95,
            "max_tokens": 10000,
        },
        "llm_timeout": 300.0,
    }
    # 初始化智能体
    agent = WebWeaverAgent(llm_config=llm_config)
    # 执行研究
    question = "气候变化的主要原因是什么？"
    result = await agent.run(question)
    # 访问结果
    print("最终大纲:", result['final_outline'])
    print("最终报告:", result['final_report'])
    print("记忆库大小:", result['memory_bank_size'])

if __name__ == "__main__":
    asyncio.run(main())

# 使用 WebWeaver 模式
webresearcher "气候变化的原因是什么？" --use-webweaver
# 保存结果到文件
webresearcher "研究问题" --use-webweaver --output report.json
# 详细日志
webresearcher "问题" --use-webweaver --verbose

特性	WebResearcher	WebWeaver
架构	单智能体	双智能体
范式	IterResearch	动态大纲
记忆	无状态工作空间	Memory Bank
输出	直接答案	大纲 + 报告
引用	隐式	显式带 ID
结构	迭代综合	层次化
适用场景	快速问答	综合报告

# 安装开发依赖
pip install -e ".[dev]"
# 运行测试
pytest
# 运行覆盖率测试
pytest --cov=webresearcher

WebResearcher 迭代式深度研究智能体架构与使用指南

简介

传统 Agent 的问题

WebResearcher 的解决方案

🏗️ 架构

核心组件

可用工具

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具

🚀 快速开始

安装

WebUI 使用（推荐）

CLI 基础使用

Python API

Multi-Turn ReAct：ReactAgent

📚 高级用法

测试时扩展 (TTS)

自定义工具

批量处理

Python 解释器配置

日志管理

🎯 功能特性

核心特性

工具特性

生产特性

📊 性能表现

🔧 配置

环境变量

LLM 配置

🎭 WebWeaver Agent

架构组件

1. Memory Bank（记忆库）

2. Planner Agent（规划智能体）

3. Writer Agent（写作智能体）

核心特性

动态大纲

引用支撑的报告

WebWeaver 使用方法

基础使用

命令行使用

WebResearcher vs WebWeaver 对比

何时使用 WebWeaver

📝 示例

🧪 测试

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具