Python 使用 Web Unlocker API 抓取亚马逊数据

Python 使用 Web Unlocker API 抓取亚马逊数据 | 极客日志

import requests
from bs4 import BeautifulSoup
import pandas as pd
import warnings

# 忽略 SSL 警告
warnings.filterwarnings('ignore', message='Unverified HTTPS request')

# 您的 Bright Data 凭证 (请替换为您的实际凭证)
customer_id = "your_customer_id"
zone_name = "web_unlocker_zone"
zone_password = "your_zone_password"

# 代理设置
proxy_url = "brd.superproxy.io:port"
proxy_auth = f"brd-customer-{customer_id}-zone-{zone_name}:{zone_password}"
proxies = {
    "http": f"http://{proxy_auth}@{proxy_url}",
    "https": f"http://{proxy_auth}@{proxy_url}"
}

# 目标亚马逊搜索 URL
target_url = "https://www.amazon.com/s?k=gaming&language=zh&_encoding=UTF8"

# 添加适当的请求头，模拟真实浏览器
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
    "Accept-Encoding": "gzip, deflate, br",
    "Referer": "https://www.amazon.com/"
}

try:
    print("正在通过 Bright Data 代理发送请求...")
    response = requests.get(
        target_url,
        proxies=proxies,
        headers=headers,
        verify=False  # 禁用 SSL 验证
    )
    print(f"请求状态码：{response.status_code}")

    # 保存 HTML 响应
    with open("amazon_gaming_search.html", "w", encoding="utf-8") as file:
        file.write(response.text)
    print("成功获取亚马逊搜索数据，已保存到 amazon_gaming_search.html")

    # 解析搜索结果
    soup = BeautifulSoup(response.text, "html.parser")
    search_results = []

    # 针对亚马逊搜索结果页面的选择器
    product_cards = soup.select(".s-result-item[data-asin]:not([data-asin=''])")
    print(f"找到 {len(product_cards)} 个产品")

    for card in product_cards:
        asin = card.get("data-asin")
        try:
            title_element = card.select_one("h2 a span")
            title = title_element.text.strip() if title_element else "N/A"
            price_element = card.select_one(".a-price .a-offscreen")
            price = price_element.text.strip() if price_element else "N/A"
            rating_element = card.select_one(".a-icon-star-small")
            rating = rating_element.text.strip() if rating_element else "N/A"
            reviews_element = card.select_one("span.a-size-base.s-underline-text")
            reviews = reviews_element.text.strip() if reviews_element else "N/A"

            search_results.append({
                "asin": asin,
                "title": title,
                "price": price,
                "rating": rating,
                "reviews": reviews,
                "url": f"https://www.amazon.com/dp/{asin}"
            })
            print(f"已解析：{title[:30]}...")
        except Exception as e:
            print(f"解析产品 {asin} 时出错：{str(e)}")

    # 保存结果到 CSV
    if search_results:
        df = pd.DataFrame(search_results)
        df.to_csv("amazon_gaming_search_results.csv", index=False, encoding="utf-8-sig")
        print(f"已成功抓取 {len(search_results)} 个搜索结果，保存到 amazon_gaming_search_results.csv")
        print("\n搜索结果前 5 条数据:")
        print(df.head().to_string())
    else:
        print("未找到搜索结果")
except Exception as e:
    print(f"请求失败：{str(e)}")

# 获取商品信息
product_elements = driver.find_elements(By.CSS_SELECTOR, ".s-main-slot .s-result-item")

# 创建 CSV 文件并写入数据
with open('amazon_products.csv', 'w', newline='', encoding='gbk') as csvfile:
    fieldnames = ['Title', 'Price', 'Image URL']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for index, product in enumerate(product_elements):
        try:
            title = product.find_element(By.CSS_SELECTOR, ".a-text-normal").text
            price = product.find_element(By.CSS_SELECTOR, ".a-price-whole").text
            image_url = product.find_element(By.CSS_SELECTOR, "img.s-image").get_attribute("src")
            print(f"Product {index + 1}:")
            print(f"Title: {title}")
            print(f"Price: {price} USD")
            print(f"Image URL: {image_url}")
            # 写入 CSV 文件
            writer.writerow({'Title': title, 'Price': price, 'Image URL': image_url})
        except Exception as e:
            print(f"Skipping product {index + 1} due to missing information.")
        time.sleep(2)

# 关闭浏览器
driver.quit()

Python 使用 Web Unlocker API 抓取亚马逊数据

一、Web Unlocker API 简介

二、开始使用 Web Unlocker API

1、首先进入控制台页面

2、创建通道

3、查看详细信息

4、配置网页解锁器

5、以 Python 脚本获取亚马逊平台数据为示例

（1）定位具体数据

（2）编写 Python 代码

6、结果示例

三、Web Scraper

1、快速使用 Web Scraper

2、通过 python 获取亚马逊网页数据

3、定位具体数据

4、运行并保存到 csv 文件

四、SERP API

五、总结

更多推荐文章

相关免费在线工具

Python 使用 Web Unlocker API 抓取亚马逊数据

一、Web Unlocker API 简介

二、开始使用 Web Unlocker API

1、首先进入控制台页面

2、创建通道

3、查看详细信息

4、配置网页解锁器

5、以 Python 脚本获取亚马逊平台数据为示例

（1）定位具体数据

（2）编写 Python 代码

6、结果示例

三、Web Scraper

1、快速使用 Web Scraper

2、通过 python 获取亚马逊网页数据

3、定位具体数据

4、运行并保存到 csv 文件

四、SERP API

五、总结

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具