简介
本次演示围绕 Bright Data 与 Haystack 的集成实操展开,完整展示了从获取 Bright Data API 密钥、创建 SERP API 与 Web Unlocker API Zone,到在 Haystack 中封装为 Tool 并接入 Agent 的全流程。
同时通过实际示例,演示了 AI Agent 在推理过程中按需调用搜索与网页抓取能力,实时获取 Google 相关股票新闻并完成自动化总结的效果。
实测结果表明,借助 Bright Data,可在无需自行维护代理和反爬逻辑的情况下,稳定获取搜索结果与网页正文数据,适合生产级 AI Agent 场景。
可参考代码
import os
import json
import asyncio
import traceback
from typing import Union, List, Any, Optional
from brightdata import BrightDataClient
from haystack.tools import Tool
from haystack.components.generators.chat import OpenAIChatGenerator
from haystack.components.agents import Agent
from haystack.dataclasses import ChatMessage
# ========== 你的配置(按你写的,不改'变量名/结构',但不再硬编码密钥)==========
# ✅ 运行前请在系统环境变量里设置:
# BRIGHTDATA_API_TOKEN=...
# OPENAI_API_KEY=...
#
# Windows PowerShell 示例:
# setx BRIGHTDATA_API_TOKEN "xxx"
# setx OPENAI_API_KEY "yyy"
#
# 然后重开一个终端再运行脚本。
os.environ["BRIGHTDATA_API_TOKEN"] = "XX"
os.environ["OPENAI_API_KEY"] = "sk-XX"
SERP_ZONE_NAME = "serp_api2"
WEB_UNLOCKER_ZONE_NAME = "web_unlocker1" # <-- 换成你控制台真实的 Web Unlocker zone 名
ENABLE_WEB_UNLOCKER =
():
:
asyncio.get_running_loop()
new_loop = asyncio.new_event_loop()
:
new_loop.run_until_complete(coro)
:
new_loop.close()
RuntimeError:
asyncio.run(coro)
() -> :
{: (e), : traceback.format_exc(limit=)}
() -> :
obj :
(obj, ):
obj
k [, , , , , , ]:
v = (obj, k, )
v :
v
:
d = (obj)
d:
d
Exception:
(obj)
() -> :
results :
json.dumps({: }, ensure_ascii=)
(results, ) results:
json.dumps(results, ensure_ascii=)
(results, ):
out = []
r results:
r :
out.append()
(r, ) r:
out.append(r)
:
out.append(_extract_payload(r))
json.dumps(out, ensure_ascii=)
json.dumps(_extract_payload(results), ensure_ascii=)
():
():
:
BrightDataClient(serp_zone=SERP_ZONE_NAME) c:
(query, ):
tasks = [c.search.google(query=q, **kwargs) q query]
asyncio.gather(*tasks)
:
c.search.google(query=query, **kwargs)
Exception e:
_as_error_payload(e)
_run_async(_inner())
():
():
:
BrightDataClient(web_unlocker_zone=WEB_UNLOCKER_ZONE_NAME) c:
(url, ):
tasks = [c.scrape_url(u, country=country) u url]
asyncio.gather(*tasks)
:
c.scrape_url(url, country=country)
Exception e:
_as_error_payload(e)
_run_async(_inner())
serp_parameters = {
: ,
: {
: {: [, ], : {: }},
: {: },
},
: [],
}
():
kwargs = kwargs {}
kwargs.setdefault(, )
kwargs.setdefault(, )
google_search_sync(query, **kwargs)
serp_api_tool = Tool(
name=,
description=,
parameters=serp_parameters,
function=serp_api_tool_entry,
outputs_to_string={: _results_to_json},
)
unlocker_parameters = {
: ,
: {
: {: [, ], : {: }},
: {: },
},
: [],
}
():
scrape_url_sync(url, country=country)
web_unlocker_tool = Tool(
name=,
description=,
parameters=unlocker_parameters,
function=web_unlocker_tool_entry,
outputs_to_string={: _results_to_json},
)
chat_generator = OpenAIChatGenerator(model=, api_base_url=)
tools = [serp_api_tool] + ([web_unlocker_tool] ENABLE_WEB_UNLOCKER [])
agent = Agent(chat_generator=chat_generator, tools=tools)
agent.warm_up()
prompt =
response = agent.run(messages=[ChatMessage.from_user(prompt)])
msg response[]:
role = msg._role.value
role == :
content msg._content:
()
(content.result)
role == :
content msg._content:
(content, ):
()
(content.text)

