跳到主要内容前端使用 pdf.js 提取 PDF 文件中的图片 | 极客日志JavaScript大前端
前端使用 pdf.js 提取 PDF 文件中的图片
一种基于前端技术从 PDF 文件中提取图片的方法。通过使用 pdf.js 库解析 PDF 文档,遍历页面对象列表识别图像资源,并将其转换为 Canvas 后导出为 PNG 格式。该方案无需后端支持,直接在浏览器中完成文件处理与图片下载,适用于轻量级文档处理场景。
月光旅人0 浏览 功能概述
本工具允许用户上传 PDF 文件,并在前端通过 pdf.js 库自动提取其中包含的所有图片。提取后的图片以网格形式展示,支持预览和下载。
核心代码
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>PDF 图片提取工具</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); min-height: 100vh; padding: 20px; }
{ : ; : auto; }
{ : center; : white; : ; }
{ : ; : ; : (, , , ); }
{ : ; : ; }
{ : white; : ; : ; : (, , , ); : ; }
{ : dashed ; : ; : ; : center; : pointer; : all ease; : ; }
{ : ; : ; : (-); }
{ : ; : ; : (); }
{ : ; : ; }
{ : ; : ; : ; : ; }
{ : ; : ; }
{ : none; }
{ : (, , ); : white; : none; : ; : ; : ; : ; : pointer; : all ease; : (, , , ); }
{ : (-); : (, , , ); }
{ : (); }
{ : ; : not-allowed; : none; }
{ : none; : ; }
{ : ; : ; : ; : ; : hidden; }
{ : ; : (, , ); : ; : width ease; : ; }
{ : center; : ; : ; : ; }
{ : none; : white; : ; : ; : (, , , ); }
{ : flex; : space-between; : center; : ; : ; : solid ; }
{ : ; : ; : ; }
{ : (, , ); : white; : ; : ; : ; : ; }
{ : grid; : (auto-fill, (, fr)); : ; }
{ : solid ; : ; : hidden; : all ease; : white; }
{ : (-); : (, , , ); : ; }
{ : ; : ; : flex; : center; : center; : ; : hidden; }
{ : ; : ; : contain; }
{ : ; : white; }
{ : ; : ; : ; : ; : hidden; : ellipsis; : nowrap; }
{ : flex; : space-between; : center; : ; : ; : ; }
{ : flex; : ; }
{ : ; : ; : ; : ; : none; : pointer; : all ease; : ; }
{ : ; : white; }
{ : ; : (-); }
{ : ; : ; }
{ : ; }
{ : center; : ; : ; }
{ : ; : ; : ; }
{ : none; : fixed; : ; : ; : ; : ; : (, , , ); : ; : center; : center; }
{ : flex; }
{ : ; : ; : relative; }
{ : ; : ; : contain; }
{ : absolute; : -; : ; : white; : ; : none; : ; : ; : ; : pointer; : ; : bold; : all ease; }
{ : ; : (); }
fadeIn { { : ; : (); } { : ; : (); } }
{ : fadeIn ease; }
📄 PDF 图片提取工具
上传 PDF 文件,自动提取其中的所有图片
📁
点击或拖拽 PDF 文件到此处
支持单个 PDF 文件上传
处理中...
提取的图片
0 张图片
×
微信扫一扫,关注极客日志
微信公众号「极客日志」,在微信中扫描左侧二维码关注。展示文案:极客日志 zeeklog
相关免费在线工具
- Keycode 信息
查找任何按下的键的javascript键代码、代码、位置和修饰符。 在线工具,Keycode 信息在线工具,online
- Escape 与 Native 编解码
JavaScript 字符串转义/反转义;Java 风格 \uXXXX(Native2Ascii)编码与解码。 在线工具,Escape 与 Native 编解码在线工具,online
- JavaScript / HTML 格式化
使用 Prettier 在浏览器内格式化 JavaScript 或 HTML 片段。 在线工具,JavaScript / HTML 格式化在线工具,online
- JavaScript 压缩与混淆
Terser 压缩、变量名混淆,或 javascript-obfuscator 高强度混淆(体积会增大)。 在线工具,JavaScript 压缩与混淆在线工具,online
- Base64 字符串编码/解码
将字符串编码和解码为其 Base64 格式表示形式即可。 在线工具,Base64 字符串编码/解码在线工具,online
- Base64 文件转换器
将字符串、文件或图像转换为其 Base64 表示形式。 在线工具,Base64 文件转换器在线工具,online
.container
max-width
1200px
margin
0
.header
text-align
color
margin-bottom
30px
.header
h1
font-size
36px
margin-bottom
10px
text-shadow
2px
2px
4px
rgba
0
0
0
0.2
.header
p
font-size
16px
opacity
0.9
.upload-card
background
border-radius
16px
padding
40px
box-shadow
0
10px
30px
rgba
0
0
0
0.2
margin-bottom
30px
.upload-area
border
3px
#667eea
border-radius
12px
padding
60px
20px
text-align
cursor
transition
0.3s
background
#f8f9ff
.upload-area
:hover
border-color
#764ba2
background
#f0f2ff
transform
translateY
2px
.upload-area
.dragover
border-color
#764ba2
background
#e8ebff
transform
scale
1.02
.upload-icon
font-size
48px
margin-bottom
20px
.upload-text
font-size
18px
color
#333
margin-bottom
10px
font-weight
600
.upload-hint
font-size
14px
color
#666
#fileInput
display
.btn
background
linear-gradient
135deg
#667eea
0%
#764ba2
100%
color
border
padding
12px
30px
border-radius
8px
font-size
16px
font-weight
600
cursor
transition
0.3s
box-shadow
0
4px
15px
rgba
102
126
234
0.4
.btn
:hover
transform
translateY
2px
box-shadow
0
6px
20px
rgba
102
126
234
0.6
.btn
:active
transform
translateY
0
.btn
:disabled
opacity
0.6
cursor
transform
.progress-container
display
margin-top
20px
.progress-bar
width
100%
height
8px
background
#e0e0e0
border-radius
4px
overflow
.progress-fill
height
100%
background
linear-gradient
90deg
#667eea
0%
#764ba2
100%
border-radius
4px
transition
0.3s
width
0%
.progress-text
text-align
margin-top
10px
color
#666
font-size
14px
.images-container
display
background
border-radius
16px
padding
40px
box-shadow
0
10px
30px
rgba
0
0
0
0.2
.images-header
display
justify-content
align-items
margin-bottom
30px
padding-bottom
20px
border-bottom
2px
#f0f0f0
.images-title
font-size
24px
font-weight
700
color
#333
.images-count
background
linear-gradient
135deg
#667eea
0%
#764ba2
100%
color
padding
8px
20px
border-radius
20px
font-size
14px
font-weight
600
.images-grid
display
grid-template-columns
repeat
minmax
250px
1
gap
20px
.image-card
border
2px
#e0e0e0
border-radius
12px
overflow
transition
0.3s
background
.image-card
:hover
transform
translateY
4px
box-shadow
0
8px
20px
rgba
0
0
0
0.1
border-color
#667eea
.image-wrapper
width
100%
height
200px
display
align-items
justify-content
background
#f8f9fa
overflow
.image-wrapper
img
max-width
100%
max-height
100%
object-fit
.image-info
padding
15px
background
.image-name
font-size
14px
color
#333
margin-bottom
8px
font-weight
600
overflow
text-overflow
white-space
.image-meta
display
justify-content
align-items
font-size
12px
color
#999
margin-bottom
12px
.image-actions
display
gap
8px
.btn-small
flex
1
padding
8px
16px
font-size
13px
border-radius
6px
border
cursor
transition
0.2s
font-weight
600
.btn-download
background
#667eea
color
.btn-download
:hover
background
#5568d3
transform
translateY
1px
.btn-preview
background
#f0f0f0
color
#333
.btn-preview
:hover
background
#e0e0e0
.empty-state
text-align
padding
60px
20px
color
#999
.empty-icon
font-size
64px
margin-bottom
20px
opacity
0.5
.modal
display
position
top
0
left
0
width
100%
height
100%
background
rgba
0
0
0
0.9
z-index
1000
align-items
justify-content
.modal
.active
display
.modal-content
max-width
90%
max-height
90%
position
.modal-image
max-width
100%
max-height
90vh
object-fit
.modal-close
position
top
40px
right
0
background
color
#333
border
width
36px
height
36px
border-radius
50%
cursor
font-size
20px
font-weight
transition
0.2s
.modal-close
:hover
background
#f0f0f0
transform
rotate
90deg
@keyframes
from
opacity
0
transform
translateY
20px
to
opacity
1
transform
translateY
0
.image-card
animation
0.3s
</style>
</head>
<body>
<div class="container">
<div class="header">
<h1>
</h1>
<p>
</p>
</div>
<div class="upload-card">
<div id="uploadArea" class="upload-area">
<div class="upload-icon">
</div>
<div class="upload-text">
</div>
<div class="upload-hint">
</div>
</div>
<input type="file" accept=".pdf,application/pdf" id="fileInput">
<div class="progress-container" id="progressContainer">
<div class="progress-bar">
<div class="progress-fill" id="progressFill">
</div>
</div>
<div class="progress-text" id="progressText">
</div>
</div>
</div>
<div class="images-container" id="imagesContainer">
<div class="images-header">
<div class="images-title">
</div>
<div class="images-count" id="imagesCount">
</div>
</div>
<div class="images-grid" id="imagesGrid">
</div>
</div>
</div>
<div class="modal" id="modal">
<div class="modal-content">
<button class="modal-close" onclick="closeModal()">
</button>
<img class="modal-image" id="modalImage" alt="预览">
</div>
</div>
<script>
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
const uploadArea = document.getElementById('uploadArea');
const fileInput = document.getElementById('fileInput');
const progressContainer = document.getElementById('progressContainer');
const progressFill = document.getElementById('progressFill');
const progressText = document.getElementById('progressText');
const imagesContainer = document.getElementById('imagesContainer');
const imagesGrid = document.getElementById('imagesGrid');
const imagesCount = document.getElementById('imagesCount');
let extractedImages = [];
uploadArea.addEventListener('click', () => {
fileInput.click();
});
fileInput.addEventListener('change', (e) => {
const file = e.target.files[0];
if (file && file.type === 'application/pdf') {
handleFile(file);
}
});
uploadArea.addEventListener('dragover', (e) => {
e.preventDefault();
uploadArea.classList.add('dragover');
});
uploadArea.addEventListener('dragleave', () => {
uploadArea.classList.remove('dragover');
});
uploadArea.addEventListener('drop', (e) => {
e.preventDefault();
uploadArea.classList.remove('dragover');
const file = e.dataTransfer.files[0];
if (file && file.type === 'application/pdf') {
handleFile(file);
}
});
async function handleFile(file) {
extractedImages = [];
imagesGrid.innerHTML = '';
imagesContainer.style.display = 'none';
progressContainer.style.display = 'block';
try {
const arrayBuffer = await file.arrayBuffer();
await extractImagesFromPDF(arrayBuffer, file.name);
progressContainer.style.display = 'none';
displayImages();
} catch (error) {
console.error('处理 PDF 失败:', error);
progressText.textContent = '处理失败:' + error.message;
progressText.style.color = '#e74c3c';
}
}
async function extractImagesFromPDF(arrayBuffer, fileName) {
const pdfDocument = await pdfjsLib.getDocument({ data: arrayBuffer, useSystemFonts: true, disableFontFace: false, verbosity: 0, isEvalSupported: false, maxImageSize: 1024 * 1024 * 10 }).promise;
const totalPages = pdfDocument.numPages;
let imageIndex = 0;
for (let pageNum = 1; pageNum <= totalPages; pageNum++) {
updateProgress(pageNum, totalPages);
const page = await pdfDocument.getPage(pageNum);
const operatorList = await page.getOperatorList();
for (let i = 0; i < operatorList.fnArray.length; i++) {
const fn = operatorList.fnArray[i];
if (fn === pdfjsLib.OPS.paintImageXObject || fn === pdfjsLib.OPS.paintInlineImageXObject) {
const imageName = operatorList.argsArray[i][0];
await new Promise((resolve) => {
page.objs.get(imageName, async (img) => {
console.log('Image object:', img);
if (!img) {
resolve();
return;
}
try {
if (img.bitmap && img.bitmap instanceof ImageBitmap) {
const canvas = document.createElement('canvas');
canvas.width = img.width;
canvas.height = img.height;
const ctx = canvas.getContext('2d');
ctx.drawImage(img.bitmap, 0, 0);
await finishImageProcessing(canvas, img, fileName, pageNum, imageIndex);
resolve();
return;
}
if (window.ImageBitmap && img instanceof ImageBitmap) {
const canvas = document.createElement('canvas');
canvas.width = img.width;
canvas.height = img.height;
const ctx = canvas.getContext('2d');
ctx.drawImage(img, 0, 0);
await finishImageProcessing(canvas, img, fileName, pageNum, imageIndex);
resolve();
return;
}
if (img.data && img.width && img.height) {
const canvas = document.createElement('canvas');
canvas.width = img.width;
canvas.height = img.height;
const ctx = canvas.getContext('2d');
const imageData = ctx.createImageData(img.width, img.height);
imageData.data.set(img.data);
ctx.putImageData(imageData, 0, 0);
await finishImageProcessing(canvas, img, fileName, pageNum, imageIndex);
resolve();
return;
}
if (img instanceof HTMLImageElement || img instanceof HTMLCanvasElement) {
const canvas = document.createElement('canvas');
canvas.width = img.width;
canvas.height = img.height;
const ctx = canvas.getContext('2d');
ctx.drawImage(img, 0, 0);
await finishImageProcessing(canvas, img, fileName, pageNum, imageIndex);
resolve();
return;
}
if (img.src) {
const canvas = document.createElement('canvas');
canvas.width = img.width;
canvas.height = img.height;
const ctx = canvas.getContext('2d');
const image = new Image();
image.onload = async () => {
ctx.drawImage(image, 0, 0);
await finishImageProcessing(canvas, img, fileName, pageNum, imageIndex);
resolve();
};
image.onerror = () => {
console.error('加载图片失败');
resolve();
};
image.src = img.src;
return;
}
console.warn('无法处理的图片对象:', { hasData: !!img.data, hasBitmap: !!img.bitmap, width: img.width, height: img.height, keys: Object.keys(img) });
resolve();
} catch (error) {
console.error('处理图片失败:', error, img);
resolve();
}
});
});
imageIndex++;
}
}
}
}
function finishImageProcessing(canvas, img, fileName, pageNum, imageIndex) {
return new Promise((resolve) => {
canvas.toBlob((blob) => {
if (blob) {
const url = URL.createObjectURL(blob);
const name = `${fileName.replace('.pdf', '')}_page${pageNum}_img${imageIndex}.png`;
extractedImages.push({ url: url, name: name, size: blob.size, width: canvas.width, height: canvas.height, blob: blob });
}
resolve();
}, 'image/png');
});
}
function updateProgress(current, total) {
const percent = (current / total) * 100;
progressFill.style.width = percent + '%';
progressText.textContent = `正在处理第 ${current}/${total} 页...`;
}
function displayImages() {
if (extractedImages.length === 0) {
imagesContainer.style.display = 'block';
imagesGrid.innerHTML = `
<div class="empty-state">
<div class="empty-icon">🖼️</div>
<div>未在 PDF 中找到图片</div>
</div>`;
imagesCount.textContent = '0 张图片';
return;
}
imagesContainer.style.display = 'block';
imagesCount.textContent = `${extractedImages.length} 张图片`;
extractedImages.forEach((image, index) => {
const card = document.createElement('div');
card.className = 'image-card';
card.style.animationDelay = `${index * 0.05}s`;
card.innerHTML = `
<div class="image-wrapper">
<img src="${image.url}" alt="${image.name}">
</div>
<div class="image-info">
<div class="image-name" title="${image.name}">${image.name}</div>
<div class="image-meta">
<span>${image.width} × ${image.height}</span>
<span>${formatBytes(image.size)}</span>
</div>
<div class="image-actions">
<button class="btn-small btn-preview" onclick="previewImage('${image.url}')">预览</button>
<button class="btn-small btn-download" onclick="downloadImage(${index})">下载</button>
</div>
</div>`;
imagesGrid.appendChild(card);
});
}
function formatBytes(bytes) {
if (bytes === 0) return '0 B';
const k = 1024;
const sizes = ['B', 'KB', 'MB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return Math.round(bytes / Math.pow(k, i) * 100) / 100 + ' ' + sizes[i];
}
function previewImage(url) {
document.getElementById('modalImage').src = url;
document.getElementById('modal').classList.add('active');
}
function closeModal() {
document.getElementById('modal').classList.remove('active');
}
function downloadImage(index) {
const image = extractedImages[index];
const link = document.createElement('a');
link.href = image.url;
link.download = image.name;
link.click();
}
document.getElementById('modal').addEventListener('click', (e) => {
if (e.target.id === 'modal') {
closeModal();
}
});
</script>
</body>
</html>