项目简介
- 功能:上传
.docx → 返回 Markdown 文本 + 提取图片(通过 API 提供图片访问 URL),前端支持预览与打包下载 ZIP。
- 适用场景:将 Word 文档内容迁移到博客、技术文档、知识库时快速生成 Markdown。
环境与依赖
- Python 3.8+(示例中也可用 Python 3.13)
- 依赖见
requirements.txt:
python-docx==0.8.11
Flask==2.3.0
Flask-CORS==4.0.0
Werkzeug==2.3.0
python -m venv venv
source venv/bin/activate
pip install -r requirements.txt
项目结构(简要)
app.py — Flask 后端,负责接收上传、解析 DOCX、提取图片并返回 Markdown。
templates/index.html — 前端页面,基于 Vue 3,提供上传、展示与下载功能。
uploads/ — 存放上传的文件与提取的图片(运行时生成)。
完整代码
""" DOCX 转 Markdown 的 Flask API 后端 """
from flask import Flask, request, jsonify, send_file, render_template
from flask_cors import CORS
from docx import Document
import os
import io
import json
from pathlib import Path
from werkzeug.utils import secure_filename
app = Flask(__name__, static_folder='templates')
CORS(app)
UPLOAD_FOLDER = 'uploads'
ALLOWED_EXTENSIONS = {'docx', 'doc'}
MAX_FILE_SIZE = * *
os.path.exists(UPLOAD_FOLDER):
os.makedirs(UPLOAD_FOLDER)
app.config[] = UPLOAD_FOLDER
app.config[] = MAX_FILE_SIZE
():
filename filename.rsplit(, )[].lower() ALLOWED_EXTENSIONS
():
images = []
drawing run.element.findall():
blip drawing.findall():
embed_id = blip.get()
embed_id:
:
image_part = run.part.rels[embed_id].target_part
image_data = image_part.blob
content_type = image_part.content_type
ext_map = {
: ,
: ,
: ,
: ,
: ,
:
}
ext = ext_map.get(content_type, )
image_counter +=
image_filename =
image_path = os.path.join(image_dir, image_filename)
(image_path, ) f:
f.write(image_data)
images.append({
: image_filename,
: image_path,
: image_counter
})
Exception e:
images, image_counter
():
image_dir :
image_dir = os.path.join(UPLOAD_FOLDER, )
os.path.exists(image_dir):
os.makedirs(image_dir)
doc = Document(docx_path)
markdown_content = []
image_counter =
para doc.paragraphs:
run para.runs:
images, image_counter = extract_images_from_run(run, image_dir, image_counter)
img images:
rel_path = os.path.join(, img[])
markdown_content.append()
markdown_content.append()
text = para.text.strip()
text:
markdown_content.append()
style = para.style.name para.style
style:
markdown_content.append()
style:
markdown_content.append()
style:
markdown_content.append()
style:
markdown_content.append()
style:
markdown_content.append()
style:
markdown_content.append()
:
formatted_text = process_runs(para.runs)
formatted_text:
markdown_content.append(formatted_text)
:
markdown_content.append(text)
table doc.tables:
markdown_content.append()
markdown_table, image_counter = convert_table_to_markdown(table, image_dir, image_counter)
markdown_content.extend(markdown_table)
markdown_content.append()
result = .join(markdown_content)
result, image_counter
():
result = []
run runs:
text = run.text
text:
run.bold:
text =
run.italic:
text =
run.underline:
text =
result.append(text)
.join(result).strip()
():
markdown_lines = []
i, row (table.rows):
cells = row.cells
row_content = []
cell cells:
cell_parts = []
para cell.paragraphs:
run para.runs:
images, image_counter = extract_images_from_run(run, image_dir, image_counter)
img images:
rel_path = os.path.join(, img[])
cell_parts.append()
para_text = para.text.strip()
para_text:
cell_parts.append(para_text)
cell_text = .join(cell_parts).strip()
row_content.append(cell_text)
markdown_lines.append( + .join(row_content) + )
i == :
separator = + .join([ _ row_content]) +
markdown_lines.append(separator)
markdown_lines, image_counter
():
send_file(, mimetype=)
():
jsonify({: })
():
:
request.files:
jsonify({: , : }),
file = request.files[]
file.filename == :
jsonify({: , : }),
allowed_file(file.filename):
jsonify({: , : }),
filename = secure_filename(file.filename)
filepath = os.path.join(app.config[], filename)
file.save(filepath)
image_dir = os.path.join(app.config[], Path(filename).stem + )
markdown_content, image_count = convert_docx_to_markdown(filepath, image_dir)
images = []
os.path.exists(image_dir):
img_file os.listdir(image_dir):
os.path.isfile(os.path.join(image_dir, img_file)):
images.append({
: img_file,
:
})
img images:
old_path =
markdown_content = markdown_content.replace(old_path, img[])
:
os.remove(filepath)
:
jsonify({
: ,
: markdown_content,
: images,
: image_count
})
Exception e:
jsonify({: , : (e)}),
():
:
full_path = os.path.join(app.config[], filepath)
os.path.abspath(full_path).startswith(os.path.abspath(app.config[])):
jsonify({: , : }),
os.path.exists(full_path):
jsonify({: , : }),
send_file(full_path)
Exception e:
jsonify({: , : (e)}),
__name__ == :
app.run(debug=, host=, port=)