将DOCX文档转化为PDF是项目中常见的需求之一,目前主流的方法可以分为两大类,一类是利用各种Office应用进行转换,譬如Microsoft Office、WPS以及LiberOffice,另一种是利用各种语言提供的对于Office文档读取的接口(譬如Apache POI)然后使用专门的PDFGenerator库,譬如IText进行PDF构建。总的来说,从样式上利用Office应用可以保证较好的样式,不过相对而言效率会比较低。其中Microsoft Office涉及版权,不可轻易使用(笔者所在公司就被抓包了),WPS目前使用比较广泛,不过存在超链接截断问题,即超过256个字符的超链接会被截断,LiberOffice的样式排版相对比较随意。而利用POI接口进行读取与生成的方式性能较好,适用于对于格式要求不是很高的情况。另外还有一些封装好的在线工具或者命令行工具,譬如docx2pdf与OfficeTOpdf。
以下是Apache POI实现word转pdf
Converter
import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; public abstract class Converter { private final String LOADING_FORMAT = "\nLoading stream\n\n"; private final String PROCESSING_FORMAT = "Load completed in %1$dms, now converting...\n\n"; private final String SAVING_FORMAT = "Conversion took %1$dms.\n\nTotal: %2$dms\n"; private long startTime; private long startOfProcessTime; protected InputStream inStream; protected OutputStream outStream; protected boolean showOutputMessages = false; protected boolean closeStreamsWhenComplete = true; public Converter(InputStream inStream, OutputStream outStream, boolean showMessages, boolean closeStreamsWhenComplete){ this.inStream = inStream; this.outStream = outStream; this.showOutputMessages = showMessages; this.closeStreamsWhenComplete = closeStreamsWhenComplete; } public abstract void convert() throws Exception; private void startTime(){ startTime = System.currentTimeMillis(); startOfProcessTime = startTime; } protected void loading(){ sendToOutputOrNot(String.format(LOADING_FORMAT)); startTime(); } protected void processing(){ long currentTime = System.currentTimeMillis(); long prevProcessTook = currentTime - startOfProcessTime; sendToOutputOrNot(String.format(PROCESSING_FORMAT, prevProcessTook)); startOfProcessTime = System.currentTimeMillis(); } protected void finished(){ long currentTime = System.currentTimeMillis(); long timeTaken = currentTime - startTime; long prevProcessTook = currentTime - startOfProcessTime; startOfProcessTime = System.currentTimeMillis(); if(closeStreamsWhenComplete){ try { inStream.close(); outStream.close(); } catch (IOException e) { //Nothing done } } sendToOutputOrNot(String.format(SAVING_FORMAT, prevProcessTook, timeTaken)); } private void sendToOutputOrNot(String toBePrinted){ if(showOutputMessages){ actuallySendToOutput(toBePrinted); } } protected void actuallySendToOutput(String toBePrinted){ } }
DocToPDFConverter:
import java.io.InputStream; import java.io.OutputStream; import java.io.PrintStream; import java.net.URL; import org.apache.commons.io.IOUtils; import org.docx4j.Docx4J; import org.docx4j.convert.in.Doc; import org.docx4j.convert.out.FOSettings; import org.docx4j.fonts.IdentityPlusMapper; import org.docx4j.fonts.Mapper; import org.docx4j.fonts.PhysicalFont; import org.docx4j.fonts.PhysicalFonts; import org.docx4j.jaxb.Context; import org.docx4j.openpackaging.packages.WordprocessingMLPackage; import org.docx4j.wml.RFonts; import org.springframework.core.io.ClassPathResource; import org.springframework.core.io.Resource; public class DocToPDFConverter extends Converter { public DocToPDFConverter(InputStream inStream, OutputStream outStream, boolean showMessages, boolean closeStreamsWhenComplete) { super(inStream, outStream, showMessages, closeStreamsWhenComplete); } @Override public void convert() throws Exception { loading(); InputStream iStream = inStream; try { WordprocessingMLPackage wordMLPackage = getMLPackage(iStream); Mapper fontMapper = new IdentityPlusMapper(); String fontFamily = "SimSun"; Resource fileRource = new ClassPathResource("simsun.ttf"); String path = fileRource.getFile().getAbsolutePath(); URL fontUrl = new URL("file:"+path); PhysicalFonts.addPhysicalFont(fontUrl); PhysicalFont simsunFont = PhysicalFonts.get(fontFamily); fontMapper.put(fontFamily, simsunFont); RFonts rfonts = Context.getWmlObjectFactory().createRFonts(); // 设置文件默认字体 rfonts.setAsciiTheme(null); rfonts.setAscii(fontFamily); wordMLPackage.getMainDocumentPart().getPropertyResolver().getDocumentDefaultRPr().setRFonts(rfonts); wordMLPackage.setFontMapper(fontMapper); FOSettings foSettings = Docx4J.createFOSettings(); foSettings.setWmlPackage(wordMLPackage); Docx4J.toFO(foSettings, outStream, Docx4J.FLAG_EXPORT_PREFER_XSL); } catch (Exception ex) { ex.printStackTrace(); } finally { IOUtils.closeQuietly(outStream); } /* * InputStream iStream = inStream; * * * * String regex = null; //Windows: // String * regex=".*(calibri|camb|cour|arial|symb|times|Times|zapf).*"; regex= * ".*(calibri|camb|cour|arial|times|comic|georgia|impact|LSANS|pala|tahoma|trebuc|verdana|symbol|webdings|wingding).*"; * // Mac // String // * regex=".*(Courier New|Arial|Times New Roman|Comic Sans|Georgia|Impact|Lucida Console|Lucida Sans Unicode|Palatino Linotype|Tahoma|Trebuchet|Verdana|Symbol|Webdings|Wingdings|MS Sans Serif|MS Serif).*" * ; PhysicalFonts.setRegex(regex); WordprocessingMLPackage * wordMLPackage = getMLPackage(iStream); // WordprocessingMLPackage * wordMLPackage = WordprocessingMLPackage.load(iStream) FieldUpdater * updater = new FieldUpdater(wordMLPackage); updater.update(true); // * process processing(); // Add font * * Mapper fontMapper = new IdentityPlusMapper(); * * PhysicalFont font = PhysicalFonts.get("Arial UTF-8 MS"); if (font != * null) { fontMapper.put("Times New Roman", font); * fontMapper.put("Arial", font); fontMapper.put("Calibri", font); } * fontMapper.put("Calibri", PhysicalFonts.get("Calibri")); * fontMapper.put("Algerian", font); fontMapper.put("华文行楷", * PhysicalFonts.get("STXingkai")); fontMapper.put("华文仿宋", * PhysicalFonts.get("STFangsong")); fontMapper.put("隶书", * PhysicalFonts.get("LiSu")); fontMapper.put("Libian SC Regular", * PhysicalFonts.get("SimSun")); * wordMLPackage.setFontMapper(fontMapper); FOSettings foSettings = * Docx4J.createFOSettings(); foSettings.setFoDumpFile(new * java.io.File("E:/xi.fo")); foSettings.setWmlPackage(wordMLPackage); * // Docx4J.toPDF(wordMLPackage, outStream); Docx4J.toFO(foSettings, * outStream, Docx4J.FLAG_EXPORT_PREFER_XSL); */ finished(); } protected WordprocessingMLPackage getMLPackage(InputStream iStream) throws Exception { //PrintStream originalStdout = System.out; System.setOut(new PrintStream(new OutputStream() { public void write(int b) { // DO NOTHING } })); WordprocessingMLPackage mlPackage = Doc.convert(iStream); //System.setOut(originalStdout); //System.out.println(outStream); return mlPackage; } }
DocxToPDFConverter:
import java.awt.Color; import java.io.InputStream; import java.io.OutputStream; import org.apache.poi.xwpf.converter.pdf.PdfConverter; import org.apache.poi.xwpf.converter.pdf.PdfOptions; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.springframework.core.io.ClassPathResource; import org.springframework.core.io.Resource; import com.lowagie.text.Font; import com.lowagie.text.pdf.BaseFont; import fr.opensagres.xdocreport.itext.extension.font.ITextFontRegistry; public class DocxToPDFConverter extends Converter { public DocxToPDFConverter(InputStream inStream, OutputStream outStream, boolean showMessages, boolean closeStreamsWhenComplete) { super(inStream, outStream, showMessages, closeStreamsWhenComplete); } @Override public void convert() throws Exception { loading(); PdfOptions options = PdfOptions.create(); XWPFDocument document = new XWPFDocument(inStream); //支持中文字体 options.fontProvider(new ITextFontRegistry() { public Font getFont(String familyName, String encoding, float size, int style, Color color) { try { Resource fileRource = new ClassPathResource("simsun.ttf"); String path = fileRource.getFile().getAbsolutePath(); BaseFont bfChinese = BaseFont.createFont(path, BaseFont.IDENTITY_H, BaseFont.EMBEDDED); Font fontChinese = new Font(bfChinese, size, style, color); if (familyName != null) fontChinese.setFamily(familyName); return fontChinese; } catch (Throwable e) { e.printStackTrace(); return ITextFontRegistry.getRegistry().getFont(familyName, encoding, size, style, color); } } }); processing(); PdfConverter.getInstance().convert(document, outStream, options); finished(); } }
main 方法的实现代码
Converter converter;
path = request.getSession().getServletContext().getRealPath("").replaceAll("\\\\", "/") + "/flyingsauser/preview.pdf"; File file = new File(path); OutputStream outputStream = new FileOutputStream(file); String url = attachmentEntity.getUrl(); inputStream = OSSClientUtil.getFileObject(url); if(!file.exists()){ file.createNewFile(); } if(url.endsWith(".docx")) { converter = new DocxToPDFConverter(inputStream, outputStream, true, true); converter.convert(); fileInputStream = new FileInputStream(file); } else if(url.endsWith(".doc")){ converter = new DocToPDFConverter(inputStream, outputStream, true, true); converter.convert(); fileInputStream = new FileInputStream(file);
以上就是word转pdf的实现,里面添加了对中文的支持需要添加simsun.ttf。
具体源码实现参照了下方的github的代码
https://github.com/yeokm1/docs-to-pdf-converter
