使用iText读取PDF文件

xiaoxiao2021-02-28  14

之前学习到了如何使用iText生成PDF文件,之后发现iText还提供了读取PDF的API,便尝试了写了个Demo.仅供参考。

package com.java.inputPDF; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; import com.itextpdf.text.Rectangle; import com.itextpdf.text.pdf.PdfReader; import com.itextpdf.text.pdf.parser.FilteredTextRenderListener; import com.itextpdf.text.pdf.parser.LocationTextExtractionStrategy; import com.itextpdf.text.pdf.parser.PdfTextExtractor; import com.itextpdf.text.pdf.parser.RegionTextRenderFilter; import com.itextpdf.text.pdf.parser.RenderFilter; import com.itextpdf.text.pdf.parser.TextExtractionStrategy; /** * 创建日期:2017-10-13下午2:28:35 * 修改日期: * 作者:ttan * 描述:iText读取PDF */ public class ReadPdfByiText { public static void main(String[] args) throws IOException { String outputPath = "E:\\ReadPdf\\PdfContent_1.txt"; PrintWriter writer = new PrintWriter(new FileOutputStream(outputPath)); String fileName = "E:\\ReadPdf\\111.pdf"; readPdf(writer, fileName);//直接读全PDF面 //readPdf_filter(fileName);//读取PDF面的某个区域 } public static void readPdf(PrintWriter writer,String fileName){ String pageContent = ""; try { PdfReader reader = new PdfReader(fileName); int pageNum = reader.getNumberOfPages(); for(int i=1;i<=pageNum;i++){ pageContent += PdfTextExtractor.getTextFromPage(reader, i);//读取第i页的文档内容 } writer.write(pageContent); } catch (Exception e) { e.printStackTrace(); }finally{ writer.close(); } } public static void readPdf_filter(PrintWriter writer,String fileName){ String pageContent = ""; try { Rectangle rect = new Rectangle(90, 0, 450, 40); RenderFilter filter = new RegionTextRenderFilter(rect); PdfReader reader = new PdfReader(fileName); int pageNum = reader.getNumberOfPages(); TextExtractionStrategy strategy; for (int i = 1; i <= pageNum; i++) { strategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), filter); pageContent +=PdfTextExtractor.getTextFromPage(reader, i, strategy); } /*String[] split = pageContent.split(" "); for(String ss : split){ System.out.println(ss.substring(ss.lastIndexOf(":")+1, ss.length())); }*/ writer.write(pageContent); } catch (Exception e) { e.printStackTrace(); }finally{ writer.close(); } } }

转载请注明原文地址: https://www.6miu.com/read-1400207.html

最新回复(0)