1. 读取 .doc 格式的 Word 文档
引入依赖
如果使用 Maven 管理项目,在 pom.xml 中添加 Apache POI 的依赖:
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>5.2.3</version>
</dependency>
代码示例
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import java.io.FileInputStream;
import java.io.IOException;
public class ReadDocFile {
public static void main(String[] args) {
try (FileInputStream fis = new FileInputStream("example.doc")) {
HWPFDocument document = new HWPFDocument(fis);
WordExtractor extractor = new WordExtractor(document);
String extractor.getText();
System.out.println(content);
} (IOException e) {
e.printStackTrace();
System.out.println( + e.getMessage());
}
}
}


