Web Unlocker API 实战：AI 训练数据集构建与网页数据抓取方案 | 极客日志

Web Unlocker API 实战：AI 训练数据集构建与网页数据抓取方案 | 极客日志

for category in categories:
    category_section = soup.find('div', {'class': category})
    if category_section:
        tag = category_section.get('data-tag', '')
        title = category_section.find('h2').text if category_section.find('h2') else ''
        coords = category_section.get('data-coords', '')
        img_url = category_section.find('img')['src'] if category_section.find('img') else ''
        # 将数据整理到 dataset 中
        dataset.append({
            'Tag': tag,
            'Title': title,
            'Coords': coords,
            'Image URL': img_url
        })