批量解析 EML 邮件文件,AI 辅助快速生成年终总结
在年底撰写工作总结时,邮件记录是最真实的工作轨迹凭证。手动整理数十上百封邮件不仅耗时,还容易遗漏关键信息。本文将分享一个 Python 脚本,可批量解析 EML 格式邮件,自动汇总发件人、收件人、主题、正文等核心信息,生成结构化的文本报告,直接投喂 AI 即可快速生成高质量年终总结。
教程核心价值
- 解放双手:批量处理任意数量 EML 文件,无需手动打开每封邮件
- 结构化汇总:自动提取邮件关键信息,按统一格式整理
- AI 友好:生成的纯文本报告可直接作为 AI 提示词,快速生成年终总结
- 编码兼容:完美解决中文邮件、中文路径乱码问题
环境准备
1. 安装 Python
确保本地安装 Python 3.7 及以上版本(推荐 3.9+),可从 Python 官网下载安装。
2. 安装依赖库
打开命令提示符(CMD)或终端,执行以下命令安装所需依赖:
pip install html2text pywin32
html2text:将邮件中的 HTML 格式正文转为纯文本pywin32:解决 Windows 系统中文路径访问问题
完整代码实现
将以下代码保存为 eml_summary.py 文件:
import email
import quopri
import base64
import html2text
from email.header import decode_header
import os
import glob
from datetime import datetime
import sys
# 解决中文路径问题
if sys.platform == 'win32':
import win32api
import win32con
def decode_email_header(header_value):
"""解码邮件头(处理中文等非 ASCII 字符)"""
if not header_value:
return ""
decoded_parts = decode_header(header_value)
header_parts = []
part, encoding decoded_parts:
(part, ):
encoding:
header_parts.append(part.decode(encoding, errors=))
:
:
header_parts.append(part.decode())
:
:
header_parts.append(part.decode())
:
header_parts.append(part.decode())
:
header_parts.append(part)
.join(header_parts)
():
:
decoded_bytes = base64.b64decode(content)
decoded_bytes.decode(charset, errors=)
Exception e:
:
decoded_bytes.decode(, errors=)
:
decoded_bytes.decode(, errors=)
():
payload = part.get_payload(decode=)
charset = part.get_content_charset()
transfer_encoding = part.get(, ).lower()
transfer_encoding == :
text = decode_base64_content(payload, charset)
transfer_encoding == :
decoded_bytes = quopri.decodestring(payload)
:
text = decoded_bytes.decode(charset, errors=)
:
text = decoded_bytes.decode(, errors=)
:
:
text = payload.decode(charset, errors=)
:
:
text = payload.decode(, errors=)
:
text = payload.decode(, errors=)
text
():
:
sys.platform == :
eml_file_path = win32api.GetShortPathName(eml_file_path)
(eml_file_path, ) f:
msg = email.message_from_bytes(f.read())
email_info = {
: os.path.basename(eml_file_path),
: decode_email_header(msg.get(, )),
: decode_email_header(msg.get(, )),
: decode_email_header(msg.get(, )),
: decode_email_header(msg.get(, )),
: decode_email_header(msg.get(, )),
: ,
:
}
body_text =
body_html =
msg.is_multipart():
part msg.walk():
content_type = part.get_content_type()
content_disposition = (part.get())
content_disposition:
content_type == :
body_text = decode_email_body(part)
content_type == :
body_html = decode_email_body(part)
:
content_type = msg.get_content_type()
content_type == :
body_text = decode_email_body(msg)
content_type == :
body_html = decode_email_body(msg)
body_text:
email_info[] = body_text
body_html:
h2t = html2text.HTML2Text()
h2t.ignore_links =
h2t.ignore_images =
h2t.unicode_snob =
h2t.body_width =
email_info[] = h2t.handle(body_html)
Exception e:
error_msg =
email_info = {
: os.path.basename(eml_file_path),
: error_msg,
: ,
: ,
: ,
: ,
: ,
:
}
()
email_info
():
output_dir = os.path.dirname(output_file_path)
output_dir os.path.exists(output_dir):
os.makedirs(output_dir)
(output_file_path, , encoding=, errors=) f:
f.write( * + )
f.write()
f.write()
f.write()
f.write()
f.write()
f.write( * + )
idx, email_info (email_info_list, ):
f.write()
f.write( * + )
key, value email_info.items():
value:
key == :
f.write()
:
f.write()
f.write( + * + )
():
output_txt =
current_dir = os.getcwd()
eml_files = []
file os.listdir(current_dir):
file.lower().endswith():
eml_files.append(os.path.join(current_dir, file))
eml_files:
()
()
( * )
email_info_list = []
eml_file eml_files:
()
email_content = extract_email_content(eml_file)
email_info_list.append(email_content)
status = email_content[] ==
()
()
save_summary_to_txt(email_info_list, output_txt)
( * )
()
()
()
()
success_count = ([x x email_info_list x[] == ])
fail_count = ([x x email_info_list x[] != ])
()
()
failed_files = [x[] x email_info_list x[] != ]
failed_files:
()
file failed_files[:]:
()
(failed_files) > :
()
__name__ == :
sys.platform == :
os.system()
main()

