🔧 核心技术栈
- 数据采集 (Crawler): httpx (异步 HTTP 请求), BeautifulSoup4 (HTML 解析)
- 并发控制 (Concurrency): asyncio (协程调度)
- 数据可视化 (GUI): Flet (基于 Flutter 的 Python UI 框架)
- 部署 (Deploy): Android APK / iOS IPA
第一部分:硬核爬虫设计
1. 逆向 API 分析与封装
通过抓包(F12 Network),分析各大平台的搜索接口。为了统一调用,定义一个 CrawlerService 类。
2. asyncio.gather 实现真·并发
用户输入一个关键词,同时向 3-5 个平台发起请求。
3. 反爬与 Cookie 管理
设计 DataHelper 类来专门管理 Cookie 和 Headers。
- 支持从 config.json 动态读取 Cookie(比如 VIP 账号)。
- 随机 User-Agent 生成。
- Referer 防盗链处理。
📱 第二部分:Flet 可视化
有了强大的爬虫后端,需要'皮肤'来展示数据。Flet 允许用 Python 写出类似 Flutter 的原生界面。
1. 列表渲染 (ListView)
爬虫返回的 JSON 数据,直接映射为 Flet 的 UI 组件列表。
2. 移动端音频流处理
对于爬取到的 .mp3 或 .m4a 链接,不直接使用 Pygame(兼容性差),而是直接调用 Flet 的 ft.Audio,底层调用的是 Android 的 ExoPlayer,支持流式播放。
📦 第三部分:从 Python 脚本到 Android APK
这是爬虫工程师想学的技能:如何让你的脚本脱离电脑运行?
- 环境:安装 flet 库。
- 原理:Flet 会自动拉取 Flutter 引擎,将你的 Python 爬虫代码编译成字节码,并打包进 APK 中。
命令:在项目根目录运行:
flet build apk
代码示例
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="pygame")
warnings.filterwarnings("ignore", category=DeprecationWarning)
import flet as ft
import httpx
import asyncio
import json
import base64
import os
import random
import re
import time
uuid
urllib.parse
bs4 BeautifulSoup
pygame
mutagen.mp3 MP3
:
():
.helper = helper
():
url =
params = {: keyword, : , : , : , : }
httpx.AsyncClient(verify=) client:
:
headers = .helper.get_headers()
resp = client.post(url, headers=headers, data=params)
data = resp.json()
songs = data[][]
results = []
s songs:
pic_url = s.get(, {}).get(, )
pic_url s.get():
pic_url = s[][].get(, )
results.append({
: s[],
: s[][][],
: s[],
: s[],
: pic_url,
: ,
:
})
results
:
[]
():
media_id:
media_id = songmid
guid = (random.randint(, ))
file_types = [{: , : , : media_id}, {: , : , : media_id}]
url =
data = {
: {: , : , : {: guid, : , : }},
: {
: , : , : {
: guid, : [songmid] * , : [] * , : .helper.qq_uin, : , : , : [ ft file_types]
}
}
}
httpx.AsyncClient(verify=) client:
:
headers = .helper.get_headers()
resp = client.get(url, params={: json.dumps(data)}, headers=headers)
js = resp.json()
midurlinfos = js.get(, {}).get(, {}).get(, [])
sip = js.get(, {}).get(, {}).get(, [])
info midurlinfos:
info.get():
base = sip[] sip
:
():
search_url =
httpx.AsyncClient(verify=) client:
:
headers = .helper.get_headers()
resp = client.get(search_url, headers=headers)
text = resp.text
text.startswith():
text = text[:-]
text.endswith():
text = text[text.find() + :-]
data = json.loads(text)
songs = data[][][]
results = []
s songs:
songmid = s[]
media_mid = s.get(, s.get(, songmid))
albummid = s[]
pic = albummid
results.append({
: s[],
: s[][][],
: songmid,
: media_mid,
: pic,
: ,
:
})
results
:
[]
():
search_url =
httpx.AsyncClient(verify=) client:
:
headers = .helper.get_headers()
resp = client.get(search_url, headers=headers)
data = resp.json()
songs = data[][]
tasks = []
s songs:
tasks.append(client.get(
,
headers=headers))
detail_resps = asyncio.gather(*tasks, return_exceptions=)
results = []
r detail_resps:
(r, httpx.Response):
:
d = r.json()[]
d[]:
results.append({
: d[],
: d[],
: d[],
: d[],
: d[],
: d[],
:
})
:
results
:
[]
():
tasks = []
platform [, ]:
tasks.append(.search_netease(keyword))
platform [, ]:
tasks.append(.search_qq(keyword))
platform [, ]:
tasks.append(.search_kugou(keyword))
results = asyncio.gather(*tasks)
merged = []
results:
max_len = ((r) r results)
i (max_len):
r results:
i < (r):
merged.append(r[i])
merged
():
url =
httpx.AsyncClient(verify=, follow_redirects=) client:
:
headers = {
: ,
: ,
: ,
:
}
resp = client.get(url, headers=headers, timeout=)
soup = BeautifulSoup(resp.text, )
results = []
iusc_links = soup.select()
link iusc_links:
:
m_str = link.get()
m_str:
m_data = json.loads(m_str)
img_url = m_data.get() m_data.get()
full_url = m_data.get()
img_url:
results.append({: full_url, : img_url})
:
results:
imgs = soup.select()
img imgs:
src = img.get() img.get()
src src.startswith():
results.append({: src, : src})
random.shuffle(results)
results[:]
Exception e:
()
[]
():
results = []
():
:
bili_url =
headers = .helper.get_headers()
headers:
headers[] =
resp = client.get(bili_url, headers=headers)
data = resp.json()
local_res = []
data.get() == data.get() data[].get():
user data[][][:]:
local_res.append({
: ,
: user[],
: ,
: user[].replace(, ),
:
})
local_res
:
[]
():
:
encoded_q = urllib.parse.quote(keyword)
weibo_url =
headers = {
: ,
:
}
resp = client.get(weibo_url, headers=headers)
data = resp.json()
local_res = []
cards = data.get(, {}).get(, [])
count =
card cards:
count >= :
card:
item card[]:
item.get() == item:
u = item[]
local_res.append({
: ,
: u.get(),
: ,
: u.get(, ),
:
})
count +=
local_res
:
[]
httpx.AsyncClient(verify=, timeout=) client:
tasks = []
platform [, ]:
tasks.append(fetch_bili(client))
platform [, ]:
tasks.append(fetch_weibo(client))
tasks:
task_results = asyncio.gather(*tasks, return_exceptions=)
tr task_results:
(tr, ):
results.extend(tr)
platform [, ]:
results.append({
: ,
: ,
: ,
: ,
:
})
platform [, ]:
results.append({
: ,
: ,
: ,
: ,
:
})
results
__name__ == :
ft.run(main)


