Python 实用小项目：爬虫、聊天机器人与数据分析示例 | 极客日志

PythonAI算法

Python 实用小项目：爬虫、聊天机器人与数据分析示例

综述由AI生成提供了七个 Python 实用小项目的完整代码与说明，包括知乎图片抓取、聊天机器人对话模拟、唐诗作者 AI 分析、彩票随机生成、自动写检讨书、屏幕截图录制以及 GIF 动图制作。内容涵盖 Selenium 爬虫、NLP 分类算法、PIL 图像处理及 xlrd 办公自动化等技术点。所有代码均经过整理修复了常见语法错误与环境依赖问题，适合作为 Python 初学者的实战练习材料。

指针猎手发布于 2025/2/6更新于 2026/6/925 浏览

Python 实用小项目：爬虫、聊天机器人与数据分析示例

本文整理了几个适合新手练习的 Python 小项目，涵盖网络爬虫、自然语言处理、自动化办公等方向。这些项目代码经过调试，旨在帮助初学者理解 Python 在实际场景中的应用。

环境准备

在运行以下代码前，请确保已安装 Python 3.x 环境，并安装必要的第三方库：

pip install selenium requests jieba nltk pillow xlrd

部分功能可能需要额外的依赖（如 ChromeDriver），请根据具体项目需求配置。

1. 抓取知乎图片

本项目演示如何使用 Selenium 模拟浏览器行为，滚动页面并提取图片链接进行下载。

注意： 网站反爬策略可能随时变化，请遵守相关法律法规及网站服务条款。

from selenium import webdriver
import time
import urllib.request
import re

def download_zhihu_images():
    driver = webdriver.Chrome()
    driver.maximize_window()
    # 替换为实际目标 URL
    url = "https://www.zhihu.com/question/29134042"
    driver.get(url)
    
    i = 0
    while i < 10:
        # 滚动到底部
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)
        try:
            # 尝试点击加载更多按钮
            button = driver.find_element_by_css_selector('button.QuestionMainAction')
            button.click()
            print(f"page{i}")
            time.sleep(1)
        except Exception:
            break
    
    result_raw = driver.page_source
    # 使用正则提取 img src 属性
    content_list = re.findall(r'img src="(.+?)"', str(result_raw))
    
    n = 0
    for link in content_list:
         link.startswith():
            i = time.time()
            local =  % (i)
            :
                urllib.request.urlretrieve(link, local)
                ()
             Exception  e:
                ()
        n += 
    
    driver.quit()

 __name__ == :
    download_zhihu_images()

相关免费在线工具

加密/解密文本
使用加密算法（如AES、TripleDES、Rabbit或RC4）加密和解密文本明文。在线工具，加密/解密文本在线工具，online
RSA密钥对生成器
生成新的随机RSA私钥和公钥pem证书。在线工具，RSA密钥对生成器在线工具，online
Mermaid 预览与可视化编辑
基于 Mermaid.js 实时预览流程图、时序图等图表，支持源码编辑与即时渲染。在线工具，Mermaid 预览与可视化编辑在线工具，online
随机西班牙地址生成器
随机生成西班牙地址（支持马德里、加泰罗尼亚、安达卢西亚、瓦伦西亚筛选），支持数量快捷选择、显示全部与下载。在线工具，随机西班牙地址生成器在线工具，online
Gemini 图片去水印
基于开源反向 Alpha 混合算法去除 Gemini/Nano Banana 图片水印，支持批量处理与下载。在线工具，Gemini 图片去水印在线工具，online
curl 转代码
解析常见 curl 参数并生成 fetch、axios、PHP curl 或 Python requests 示例代码。在线工具，curl 转代码在线工具，online

from time import sleep
import requests

print("请输入话题：")
s = input()

while True:
    # 调用图灵机器人 API
    resp = requests.post(
        "http://www.tuling123.com/openapi/api",
        data={"key": "YOUR_API_KEY", "info": s}
    )
    resp = resp.json()
    sleep(1)
    print('小鱼：', resp.get('text', '无回复'))
    s = resp.get('text', '')
    
    # 调用青云客 API
    resp = requests.get(
        "http://api.qingyunke.com/api.php",
        {'key': 'free', 'appid': 0, 'msg': s}
    )
    resp.encoding = 'utf8'
    resp = resp.json()
    sleep(1)
    print('菲菲：', resp.get('content', '无回复'))
    s = resp.get('content', '')

import jieba
from nltk.classify import NaiveBayesClassifier

def analyze_poetry_author():
    # 读取训练数据
    try:
        with open(r"libai.txt", "r", encoding='utf-8') as f:
            text1 = f.read()
        with open(r"dufu.txt", "r", encoding='utf-8') as f:
            text2 = f.read()
    except FileNotFoundError:
        print("未找到训练数据文件 libai.txt 或 dufu.txt")
        return

    # 分词
    list1 = jieba.cut(text1)
    result1 = " ".join(list1)
    list2 = jieba.cut(text2)
    result2 = " ".join(list2)

    libai = result1.split()
    dufu = result2.split()

    # 特征提取函数
    def word_feats(words):
        return dict([(word, True) for word in words])

    # 构建训练集
    libai_features = [(word_feats(lb), 'lb') for lb in libai]
    dufu_features = [(word_feats(df), 'df') for df in dufu]
    train_set = libai_features + dufu_features

    # 训练分类器
    classifier = NaiveBayesClassifier.train(train_set)

    # 测试输入
    sentence = input("请输入一句你喜欢的诗：")
    seg_list = jieba.cut(sentence)
    words = list(seg_list)

    # 统计结果
    lb_count = 0
    df_count = 0
    for word in words:
        class_result = classifier.classify(word_feats(word))
        if class_result == 'lb':
            lb_count += 1
        elif class_result == 'df':
            df_count += 1

    total = len(words)
    if total > 0:
        x = float(lb_count / total)
        y = float(df_count / total)
        print(f'李白的可能性：{x * 100:.2f}%')
        print(f'杜甫的可能性：{y * 100:.2f}%')
    else:
        print("无法分析，请检查输入内容")

if __name__ == "__main__":
    analyze_poetry_author()

import random

def generate_lottery_numbers():
    temp = [i + 1 for i in range(35)]
    random.shuffle(temp)
    selected = []
    for i in range(7):
        selected.append(temp[i])
    selected.sort()
    
    # 打印彩色输出（支持 ANSI 转义码）
    print('\033[0;31;;1m', end="")
    print(*selected[:6], end="")
    print('\033[0;34;;1m', end=" ")
    print(selected[-1])
    print('\033[0m')  # 重置颜色

if __name__ == "__main__":
    generate_lottery_numbers()

import random
import xlrd

def auto_excuse_letter():
    try:
        ExcelFile = xlrd.open_workbook(r'test.xlsx')
        sheet = ExcelFile.sheet_by_name('Sheet1')
    except Exception as e:
        print(f"打开 Excel 失败：{e}")
        return

    event = input("请输入具体事件：")
    target_len = int(input("老师要求的字数："))
    
    generated_text = []
    current_len = 0
    attempts = 0
    max_attempts = 1000
    
    while current_len < target_len and attempts < max_attempts:
        row_idx = random.randint(1, sheet.nrows - 1) # 跳过表头
        rows = sheet.row_values(row_idx)
        if rows:
            segment = str(rows[0]) if isinstance(rows[0], str) else str(rows)
            generated_text.append(segment)
            current_len = len("".join(generated_text))
        attempts += 1

    full_text = "".join(generated_text)
    print("\n" + " "*8 + "检讨书")
    print("老师：")
    print(f"我不应该{event}，{full_text}")
    print("再次请老师原谅！")

if __name__ == "__main__":
    auto_excuse_letter()

from time import sleep
from PIL import ImageGrab

def screen_capture_loop():
    minutes = int(input("请输入想抓屏几分钟："))
    duration_seconds = minutes * 60
    count = 1
    
    print("开始抓屏...")
    start_time = time.time()
    
    while count <= duration_seconds:
        im = ImageGrab.grab()
        filename = r"%d.jpg" % (count)
        im.save(filename, 'JPEG')
        print(f"已保存：{filename}")
        count += 1
        sleep(1) # 每秒一张

if __name__ == "__main__":
    import time
    screen_capture_loop()

from PIL import Image

def create_gif():
    try:
        # 加载第一张图片作为基础
        im = Image.open("1.jpg")
        images = []
        # 添加后续图片
        images.append(Image.open('2.jpg'))
        images.append(Image.open('3.jpg'))
        
        # 保存为 GIF
        im.save('output.gif', save_all=True, append_images=images, loop=0, duration=100)
        print("GIF 生成成功：output.gif")
    except FileNotFoundError:
        print("错误：找不到指定的图片文件，请检查路径")
    except Exception as e:
        print(f"生成失败：{e}")

if __name__ == "__main__":
    create_gif()

Python 实用小项目：爬虫、聊天机器人与数据分析示例

Python 实用小项目：爬虫、聊天机器人与数据分析示例

环境准备

1. 抓取知乎图片

更多推荐文章

相关免费在线工具

2. 聊天机器人对话模拟

3. AI 分析唐诗作者

4. 彩票随机生成

5. 自动生成检讨书

6. 屏幕截图录制

7. 制作 GIF 动图

结语

更多推荐文章

相关免费在线工具

Python 实用小项目：爬虫、聊天机器人与数据分析示例

Python 实用小项目：爬虫、聊天机器人与数据分析示例

环境准备

1. 抓取知乎图片

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具

2. 聊天机器人对话模拟

3. AI 分析唐诗作者

4. 彩票随机生成

5. 自动生成检讨书

6. 屏幕截图录制

7. 制作 GIF 动图

结语

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具