二级 Python 考试综合应用题真题及参考代码解析 | 极客日志

Python算法

二级 Python 考试综合应用题真题及参考代码解析

二级 Python 综合应用题实战解析，覆盖 19 套真题，涉及文件读写、字符串处理、jieba 分词、词频统计及数据清洗等核心考点。提供修正后的完整可运行代码，重点讲解字典操作、列表排序及正则匹配技巧，帮助考生掌握数据处理逻辑与编程规范。

DevOpsTeam发布于 2026/3/30更新于 2026/4/250 浏览

第 1 套题

第 1 小问：传感器数据筛选

任务要求：读取 sensor.txt，筛选包含 "earpa001" 的行并写入新文件。

fi = open("sensor.txt", "r", encoding="utf-8")
fo = open("earpa001.txt", "w")
for line in fi:
    ls = line.strip("\n").split(",")
    if ls[1].count("earpa001") > 0:
        fo.write('{},{},{},{}\n'.format(ls[0], ls[1], ls[2], ls[3]))
fi.close()
fo.close()

第 2 小问：设备型号统计与排序

任务要求：统计设备型号组合出现次数，按频次降序输出。

fi = open("earpa001.txt", "r")
fo = open("earpa001_count.txt", "w")
d = {}
for line in fi:
    ls = line.strip("\n").split(",")
    m = ls[2] + "-" + ls[3]
    d[m] = d.get(m, 0) + 1
ls = list(d.items())
ls.sort(key=lambda x: x[1], reverse=True)
for i in range((ls)):
    fo.write(.(ls[i][], ls[i][]))
fi.close()
fo.close()

相关免费在线工具

加密/解密文本
使用加密算法（如AES、TripleDES、Rabbit或RC4）加密和解密文本明文。在线工具，加密/解密文本在线工具，online
Gemini 图片去水印
基于开源反向 Alpha 混合算法去除 Gemini/Nano Banana 图片水印，支持批量处理与下载。在线工具，Gemini 图片去水印在线工具，online
curl 转代码
解析常见 curl 参数并生成 fetch、axios、PHP curl 或 Python requests 示例代码。在线工具，curl 转代码在线工具，online
Base64 字符串编码/解码
将字符串编码和解码为其 Base64 格式表示形式即可。在线工具，Base64 字符串编码/解码在线工具，online
Base64 文件转换器
将字符串、文件或图像转换为其 Base64 表示形式。在线工具，Base64 文件转换器在线工具，online
Markdown转HTML
将 Markdown（GFM）转为 HTML 片段，浏览器内 marked 解析；与 HTML转Markdown 互为补充。在线工具，Markdown转HTML在线工具，online

fi = open("论语.txt", "r")
fo = open("论语 - 原文.txt", "w")
flag = False
for line in fi:
    if "【原文】" in line:
        flag = True
        continue
    if "【注释】" in line:
        flag = False
    line = line.strip(" \n")
    if flag and line:
        fo.write(line + "\n")
fi.close()
fo.close()

fi = open("论语 - 原文.txt", "r")
fo = open("论语 - 提纯原文.txt", "w")
for line in fi:
    for i in range(0, 30):
        line = line.replace('({})'.format(i), '')
    fo.write(line)
fi.close()
fo.close()

f = open("PY301-SunSign.csv")
name = input("请输入星座中文名称")
for line in f.read().split("\n"):
    ls = []
    if name in line:
        ls = line.split(",")
        print("{}的生日位于{}-{}之间".format(ls[1], ls[2], ls[3]))
f.close()

f = open("py301-sunsign.csv", "r")
x = input("请输入星座序号（例如，5）：")
ls = []
for line in f:
    ls.append(line.strip('\n').split(','))
num = x.split()
for i in num:
    for row in ls:
        if row[0] == i:
            if len(row[2]) == 3:
                m1, d1 = row[2][0], row[2][1:3]
            else:
                m1, d1 = row[2][0:2], row[2][2:4]
            if len(row[3]) == 3:
                m2, d2 = row[3][0], row[3][1:3]
            else:
                m2, d2 = row[3][0:2], row[3][2:4]
            print("{}({})的生日是{}月{}日至{}月{}日之间".format(row[1], row[4], m1, d1, m2, d2))
f.close()

f = open("py301-sunsign.csv", 'r')
ls = []
for line in f:
    ls.append(line.strip(' \n').split(','))
f.close()
x = input("请输入星座序号（例如，5）：")
num = x.strip(' \n').split()
for i in num:
    if 0 < int(i) < len(ls):
        for row in ls:
            if row[0] == i:
                m1 = row[2][0] if len(row[2]) == 3 else row[2][0:2]
                d1 = row[2][1:3] if len(row[2]) == 3 else row[2][2:4]
                m2 = row[3][0] if len(row[2]) == 3 else row[3][0:2]
                d2 = row[3][1:3] if len(row[2]) == 3 else row[3][2:4]
                print("{}({})的生日是{}月{}日至{}月{}日之间".format(row[1], row[4], m1, d1, m2, d2))
    else:
        print("输入星座序号有误！")

f = open('命运.txt', 'r')
d = {}
for i in f.read():
    if i not in "，。？！《》【】''''":
        d[i] = d.get(i, 0) + 1
ls = list(d.items())
ls.sort(key=lambda x: x[1], reverse=True)
print("{}:{}".format(ls[0][0], ls[0][1]))
f.close()

f = open('命运.txt', 'r')
d = {}
for i in f.read():
    if i not in "，：。？！《》【】''''\"\n":
        d[i] = d.get(i, 0) + 1
ls = list(d.items())
ls.sort(key=lambda x: x[1], reverse=True)
for i in range(10):
    print(ls[i][0], end="")
f.close()

f = open('命运.txt', 'r')
fi = open('命运 - 频次排序.txt', 'w')
d = {}
for i in f.read():
    if i not in "\n":
        d[i] = d.get(i, 0) + 1
ls = list(d.items())
ls.sort(key=lambda x: x[1], reverse=True)
s = ""
for k in ls:
    s += "{}:{}".format(k[0], k[1]) + ','
fi.write(s[:-1])
f.close()
fi.close()

import jieba
f = open('data.txt', 'r')
lines = f.readlines()
f.close()
f = open('out.txt', 'w')
for line in lines:
    line = line.strip(' ')  # 删除每行首尾可能出现的空格
    wordList = jieba.lcut(line)  # 用结巴分词，对每行内容进行分词
    f.writelines('\n'.join(wordList))  # 将分词结果存到文件 out.txt 中
f.close()

import jieba
f = open('out.txt', 'r')
words = f.readlines()
f.close()
D = {}
for w in words:
    D[w[:-1]] = D.get(w[:-1], 0) + 1
print("曹操出现次数为:{} ".format(D["曹操"]))

import jieba
fi = open('data.txt', 'r')
f = open('out1.txt', 'w')
words = []
for line in fi.readlines():
    line = line.strip('\n')
    wordlist = jieba.lcut(line)
    for word in wordlist:
        if len(word) >= 3 and (word not in words):
            words.append(word)
for word in words:
    f.write(word + '\n')
fi.close()
f.close()

import jieba
fi = open('data.txt', 'r')
fo = open('out2.txt', 'w')
words = []
for line in fi.readlines():
    line = line.strip('\n')
    wordlist = jieba.lcut(line)
    for word in wordlist:
        if len(word) >= 3:
            words.append(word)
d = {}
for word in words:
    d[word] = d.get(word, 0) + 1
ls = list(d.items())
ls.sort(key=lambda x: x[1], reverse=True)
s = ''
for i in ls:
    s = '{}:{}\n'.format(i[0], i[1])
    fo.write(s)
fi.close()
fo.close()

fi = open("score301.txt")
L = []
for f in fi:
    st = f.strip("\n").split()
    grade = sum(list(map(lambda x: eval(x), st[2:])))
    st.append(grade)
    L.append(st)
L.sort(key=lambda x: x[-1], reverse=True)
fo = open("cand301.txt", "w")
for s in L[0:10]:
    fo.write(" ".join(s[:-1]) + "\n")
fi.close()
fo.close()

''' 输入文件：cand301.txt 输出文件：best301.txt '''
fi = open("cand301.txt", "r")
fo = open("best301.txt", "w")
for i in fi:
    s = i.strip("\n").split()
    if min(list(map(lambda x: eval(x), s[2:]))) >= 60:
        fo.write(" ".join(s[:2]) + "\n")
fi.close()
fo.close()

fo = open('data.txt', 'r')
lines = fo.read().split('\n')
fo.close()
L = []
for line in lines:
    if "alt=" in line:
        L.append(line)
S = []
for line in L:
    point_start = line.find('alt=') + 5
    point_end = line.find('"', point_start, -1)
    S.append(line[point_start:point_end])
f = open("univ.txt", "w")
for school in S:
    f.write(school)
    f.write('\n')
f.close()

n = 0
k = 0
f = open("univ.txt", "r")
lines = f.read().split('\n')
f.close()
for school in lines:
    if (("大学" in school) or ("学院" in school)) and ("大学生" not in school):
        print(school)
    if "大学" in school:
        n += 1
    elif "学院" in school:
        k += 1
print("包含大学的名称数量是{}".format(n))
print("包含学院的名称数量是{}".format(k))

import jieba
f2018 = open('data2018.txt', 'r')
line2018 = f2018.read().split('\n')
f2018.close()
f2019 = open('data2019.txt', 'r')
line2019 = f2019.read().split('\n')
f2019.close()
d = {}
for i in line2018:
    word = jieba.lcut(i)
    for j in word:
        if len(j) >= 2:
            d[j] = d.get(j, 0) + 1
lt = list(d.items())
lt.sort(key=lambda x: x[1], reverse=True)
print('2018:', end='')
for i in range(9):
    print('{}:{}'.format(lt[i][0], lt[i][1]), end='')
print(',', end='')
print('{}:{}'.format(lt[9][0], lt[9][1]))
d = {}
for i in line2019:
    word = jieba.lcut(i)
    for j in word:
        if len(j) >= 2:
            d[j] = d.get(j, 0) + 1
lt = list(d.items())
lt.sort(key=lambda x: x[1], reverse=True)
print('2019:', end='')
for i in range(9):
    print('{}:{}'.format(lt[i][0], lt[i][1]), end='')
print(',', end='')
print('{}:{}'.format(lt[9][0], lt[9][1]))

import jieba
f2018 = open('data2018.txt', 'r')
line2018 = f2018.read().split('\n')
f2018.close()
f2019 = open('data2019.txt', 'r')
line2019 = f2019.read().split('\n')
f2019.close()
d = {}
for i in line2018:
    word = jieba.lcut(i)
    for j in word:
        if len(j) >= 2:
            d[j] = d.get(j, 0) + 1
lt = list(d.items())
lt.sort(key=lambda x: x[1], reverse=True)
ls2018 = []
for i in range(10):
    ls2018.append(lt[i][0])
d = {}
for i in line2019:
    word = jieba.lcut(i)
    for j in word:
        if len(j) >= 2:
            d[j] = d.get(j, 0) + 1
lt = list(d.items())
lt.sort(key=lambda x: x[1], reverse=True)
ls2019 = []
for i in range(10):
    ls2019.append(lt[i][0])
lslike = []
for i in ls2018:
    if i in ls2019:
        lslike.append(i)
for i in lslike:
    ls2018.remove(i)
    ls2019.remove(i)
print('共有词语:', end='')
for i in lslike[:-1]:
    print(i, end=',')
print(lslike[-1])
print('2019 特有:', end='')
for i in ls2019[:-1]:
    print(i, end=',')
print(ls2019[-1])
print('2018 特有:', end='')
for i in ls2018[:-1]:
    print(i, end=',')
print(ls2018[-1])

import jieba
f = open('data.txt', 'r')
data = f.read()
f.close()
f = open('clean.txt', 'w')
s = ''
x = '，。？、''""；：、）\n（！'
for i in data:
    if i not in x:
        s += i
f.write(s)
f.close()

import jieba
f = open('clean.txt', 'r')
data = f.read()
l = jieba.lcut(data)
d = {}
for i in l:
    if len(i) >= 3:
        d[i] = d.get(i, 0) + 1
lt = list(d.items())
lt.sort(key=lambda x: x[1], reverse=True)
for i in lt[:9]:
    print(i[0], ':', i[1], end=',', sep='')
print(lt[9][0], ':', lt[9][1], sep='')
f.close()

import jieba
fp = open("红楼梦.txt", encoding='utf-8')
ss = fp.read()
fp.close()
point = "，。：；？"
cnt = 0
for i in ss:
    if i in point:
        cnt += 1
print(cnt)
words = jieba.lcut(ss)
dc = []
for i in words:
    if len(i) >= 2:
        dc.append(i)
dc1 = set(dc)
print(len(dc1))
d = {}
for i in dc:
    d[i] = d.get(i, 0) + 1
lt = list(d.items())
lt.sort(key=lambda x: x[1], reverse=True)
for x in lt[0:2]:
    print("{},{}".format(x[0], x[1]))

import jieba
fs = open("八十天环游地球.txt", "r")
lss = fs.readlines()
dels = ' "？！：，。'
lens = 0
new_list = []
for lr in lss:
    if lr != "\n":
        lens += 1
    new_str = ""
    for i in range(len(lr)):
        if lr[i] in dels:
            pass
        else:
            new_str += lr[i]
    new_list.append(new_str)
print("共{}个非空行".format(lens))
alens = 0
wlens = 0
word_list = []
for lr in lss:
    alens += len(lr)
    words = jieba.lcut(lr)
    for i in words:
        word_list.append(i)
wlens = len(word_list)
print("剩余字符数{}, 词语数{}".format(alens, wlens))
fo = open("八十天环游地球 - 章节.txt", "w")
for lr in lss:
    if "章 " in lr:
        fo.write(lr)
fo.close()

# 第一问：统计素材文件中学生人数
f1 = open('data301.txt', 'rt')
stu_lst = f1.readlines()
print("素材文件中学生的人数是{}".format(len(stu_lst)))
f1.close()
# 第二问：计算所有学生的平均分
stu_dic = {}
for stu_str in stu_lst:
    if '\n' in stu_str:
        stu_str = stu_str.replace('\n', '')
    stu = stu_str.split(':')
    name = stu[0]
    score = stu[1].split(',')[1]
    stu_dic[name] = score
avg = sum([int(score) for score in stu_dic.values()]) / len(stu_dic)
print("所有学生的平均分是{:.1f}".format(avg))
# 第三问：输出学生分数信息到 result 文件中
name_score = list(stu_dic.items())
name_score_lst = [item[0] + "," + item[1] + "\n" for item in name_score]
f2 = open('result301.txt', 'wt')
for i in name_score_lst:
    f2.write(i)
f2.close()

fi = open("data301.txt", "r")
ss = fi.read()
print(ss.count("<a"))
fi.seek(0)
count = 0
ls = []
flag = 0
for i in ss.split("\n"):
    if ".JPG" in i:
        count += 1
fi.close()
print(count)
fp = open("images.txt", "w")
for i in ss.split("\n"):
    if ".JPG" in i:
        start_point = i.find("http://")
        end_point = i.find(".JPG")
        fp.write(i[start_point:end_point+4] + "\n")
fp.close()

stop_word = ['我们','同时','之后','更好','这些','进行']
# 第一问：读文件，统计文件中的字符数
f1 = open('data301.txt', 'rt')
txt = f1.read()
print("素材文件字符个数是{}。".format(len(txt)))
f1.close()
# 第二问：统计词频，输出长度大于 1 的词的个数，排除特殊词
import jieba
txt_wordslist = jieba.lcut(txt)
count = 0
word_count = {}
for word in txt_wordslist:
    if (len(word) > 1) and (word not in stop_word):
        word_count[word] = word_count.get(word, 0) + 1
print("长度大于 1 且不相同的词的个数是{}。".format(len(word_count.items())))
wordlist = list(word_count.items())
wordlist.sort(key=lambda x: x[1], reverse=True)
for i in wordlist:
    if i[0] not in stop_word:
        topword = i[0]
        break
print("词频最大的词是：{}".format(topword))
# 第三问：将长度大于 1 并且词频最大的词所在的句子，排除特殊词
sentence_list = txt.split('。')
fo = open("out301.txt", "w")
for sentence in sentence_list:
    if topword in sentence:
        fo.write(sentence + "\n")
fo.close()

fi = open("data301.txt", "r")
dc = {}
name = ''
count = 0
flag = 1
for line in fi:
    if '"name":' in line:
        name = line.split(':')[1].strip(' ,"\n')
        flag = 1
    elif '"value":' in line and flag == 1:
        dx = int(line.split(':')[1].strip(' \n'))
        dc[name] = dx
        flag = 0
        count += 1
fi.close()
print("一共有{}个国家".format(count))
lt = list(dc.items())
lt.sort(key=lambda x: x[1], reverse=True)
print("确诊人数最多的国家是{}，人数是{}".format(lt[0][0], lt[0][1]))
lw = 0
lz = 0
for i in lt:
    if i[1] >= 10000:
        lw += 1
    elif i[1] == 0:
        lz += 1
print("确诊人数超过 1W 的国家有{}个".format(lw))
print("确诊人数为 0 的国家有{}个".format(lz))

# 问题 1：
with open("data301.csv", "r") as f:
    lines = f.read().split("\n")
for i in range(5):
    print(lines[i])
# 问题 2：
def total(lst):
    sum_val = 0
    for i in lst:
        sum_val += i
    return sum_val
yslst = []
zzlst = []
zylst = []
for line in lines[1:]:
    lst = line.split(',')
    if lst != [""]:
        yslst.append(eval(lst[1]))
        zzlst.append(eval(lst[2]))
        zylst.append(eval(lst[3]))
print('统计，疑似，重症，治愈')
print('总数，{}, {}, {}'.format(total(yslst), total(zzlst), total(zylst)))
print('最大值，{}, {}, {}'.format(max(yslst), max(zzlst), max(zylst)))
# 问题 3：
d = {}
ls = []
for line in lines[1:-1]:
    lst = line.split(',')
    d[lst[0]] = eval(lst[1])
l = list(d.items())
l.sort(key=lambda x: x[1], reverse=True)
avg = total(yslst) / (len(lines) - 2)
for line in l:
    if line[1] > avg:
        ls.append(line[0])
print(','.join(ls))

import jieba
fs = open("data301.txt", "r")
lss = fs.read()
fs.close()
for c in "，。？！：":
    print('"{}"的个数为{}个'.format(c, lss.count(c)))
ds = {}
words = jieba.lcut(lss)
for w in words:
    ds[len(w)] = ds.get(len(w), [])
    ds[len(w)].append(w)
for i in range(2, 6):
    print('{}字词有{}个'.format(i, len(ds[i])))
    ls = ds[i]
    d = {}
    for j in ls:
        d[j] = d.get(j, 0) + 1
    lt = list(d.items())
    lt.sort(key=lambda x: x[1], reverse=True)
    print(lt[0][0])

import jieba
fs = open("data301.txt", "r")
lss = fs.readlines()
fs.close()
lens = 0
for lr in lss:
    if lr != "\n":
        lens += 1
print("共{}个非空行。".format(lens))
ts = {}
for lr in lss:
    if lr.strip() != "":
        words = lr.strip().split(":")
        ts[words[0]] = ts.get(words[0], [])
        ts[words[0]].append(words[1])
names = ts.keys()
print("共{}个人发言：{}".format(len(ts), ",".join(names)))
for r in ts.keys():
    words = []
    for i in ts[r]:
        words += jieba.lcut(i)
    d = {}
    for i in words:
        if len(i) > 1:
            d[i] = d.get(i, 0) + 1
    lt = list(d.items())
    lt.sort(key=lambda x: x[1], reverse=True)
    print("{}说了{}个词，最多的词是：{}".format(r, len(lt), lt[0][0]))

二级 Python 考试综合应用题真题及参考代码解析

第 1 套题

第 1 小问：传感器数据筛选

第 2 小问：设备型号统计与排序

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具

第 2 套题

第 1 小问：《论语》原文提取

第 2 小问：去除数字括号

第 3 套题

第 1 小问：星座日期查询

第 2 小问：序号匹配与日期格式化

第 3 小问：增强版序号校验

第 4 套题

第 1 小问：字符频率统计

第 2 小问：高频词前缀输出

第 3 小问：频次排序写入文件

第 5 套题

第 1 小问：文本清洗与分词

第 2 小问：特定词频统计

第 6 套题

第 1 小问：去重长词提取

第 2 小问：词频排序输出

第 7 套题

第 1 小问：成绩计算与排名

第 2 小问：单科及格筛选

第 8 套题

第 1 小问：HTML 属性提取

第 2 小问：大学学院名称统计

第 9 套题

第 1 小问：两年词频对比

第 2 小问：特有词分析

第 10 套题

第 1 小问：特殊字符清洗

第 2 小问：长词频统计

第 11 套题

例题：红楼梦文本分析

第 12 套题

例题：八十天环游地球

第 13 套题

例题：学生成绩处理

第 14 套题

例题：图片链接提取

第 15 套题

例题：停用词过滤与句子定位

第 16 套题

例题：疫情数据统计

第 17 套题

例题：CSV 数据分析

第 18 套题

例题：标点与字词统计

第 19 套题

例题：发言记录分析

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具