<dependency> <groupId>net.sourceforge.htmlcleaner</groupId> <artifactId>htmlcleaner</artifactId> <version>2.15</version> </dependency> <dependency> <groupId>org.xhtmlrenderer</groupId> <artifactId>flying-saucer-pdf-itext5</artifactId> <version>9.0.7</version> </dependency> <dependency> <groupId>com.itextpdf</groupId> <artifactId>itextpdf</artifactId> <version>5.4.3</version> </dependency>
public class HtmlUrlToPdfUtil {
private static final Logger logger = LoggerFactory.getLogger(HtmlUrlToPdfUtil.class); /** * 下載url html to pdf @throws */ public static void urlToPdf(HttpServletResponse response, String url, String fileName) { ByteArrayOutputStream out = null; HtmlCleaner cleaner = null; try { if (null == url || "".equals(url) || null == fileName || "".equals(fileName)) { String data = new String("参数不正确,无法进行导出您需要的文件!"); response.setCharacterEncoding("gbk"); response.setHeader("contentType", "text/html; charset=gbk"); response.getOutputStream().write(data.getBytes("gbk")); response.flushBuffer(); return; } out = new ByteArrayOutputStream(); cleaner = new HtmlCleaner(); CleanerProperties props = cleaner.getProperties(); // 根据 String 表示形式创建 URL 对象。 URL htmlUrl = new URL(url); // 返回一个 URLConnection 对象,它表示到 URL 所引用的远程对象的连接。 HttpURLConnection conn = (HttpURLConnection) htmlUrl.openConnection(); ByteArrayOutputStream outHtml = new ByteArrayOutputStream(); InputStream inn = conn.getInputStream(); byte[] buffer = new byte[1024]; int len = 0; while ((len = inn.read(buffer)) != -1) { outHtml.write(buffer, 0, len); } byte[] data = outHtml.toByteArray(); logger.info("获取时utf-8:" + new String(data, "utf-8")); TagNode node = cleaner.clean(new String(data, "utf-8")); new PrettyXmlSerializer(props).writeToStream(node, out, "utf-8"); ITextRenderer renderer = new ITextRenderer(); renderer.setDocumentFromString(new String(out.toByteArray(), "utf-8")); ITextFontResolver fontResolver = renderer.getFontResolver(); // 中文字体 fontResolver.addFont("/com/tontisa/erp/font/simsun.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED); renderer.layout(); // 设置响应文档类型为pdf response.setContentType("application/pdf;charset=utf-8"); response.addHeader("Content-Disposition", "attachment; filename=" + fileName.replaceAll(" ", "")); renderer.createPDF(response.getOutputStream()); // 设置响应数据大小 // Finishing up renderer.finishPDF(); response.flushBuffer(); out.flush(); out.close(); outHtml.flush(); outHtml.close(); } catch (Exception e) { logger.error("urlToPdf异常", e); } } /** * 下載 String html content to pdf @throws */ public static void htmlToPdf(HttpServletResponse response, String html, String fileName) { ByteArrayOutputStream out = null; HtmlCleaner cleaner = null; try { if (null == html || "".equals(html) || null == fileName || "".equals(fileName)) { String data = new String("参数不正确,无法进行导出您需要的文件!"); response.setCharacterEncoding("gbk"); response.setHeader("contentType", "text/html; charset=gbk"); response.getOutputStream().write(data.getBytes("gbk")); response.flushBuffer(); return; } out = new ByteArrayOutputStream(); cleaner = new HtmlCleaner(); CleanerProperties props = cleaner.getProperties(); // 根据 String 表示形式创建 URL 对象。 logger.info("获取时utf-8:" + html); TagNode node = cleaner.clean(html); new PrettyXmlSerializer(props).writeToStream(node, out, "utf-8"); ITextRenderer renderer = new ITextRenderer(); renderer.setDocumentFromString(new String(out.toByteArray(), "utf-8")); ITextFontResolver fontResolver = renderer.getFontResolver(); // 中文字体 fontResolver.addFont("/com/tontisa/erp/font/simsun.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED); renderer.layout(); // 设置响应文档类型为pdf response.setContentType("application/pdf;charset=utf-8"); response.addHeader("Content-Disposition", "attachment; filename=" + fileName.replaceAll(" ", "")); renderer.createPDF(response.getOutputStream()); // 设置响应数据大小 // Finishing up renderer.finishPDF(); response.flushBuffer(); out.flush(); out.close(); } catch (Exception e) { logger.error("htmlToPdf异常", e); } } @SuppressWarnings("resource") public static void main(String[] args) throws Exception { try { HtmlCleaner cleaner = new HtmlCleaner(); CleanerProperties props = cleaner.getProperties(); System.out.println("----------"); InputStream inputStream = new FileInputStream("D:\\111.html"); String encoding = System.getProperty("file.encoding"); System.out.println(encoding); ByteArrayOutputStream outHtml = new ByteArrayOutputStream(); byte[] buffer = new byte[1024]; int len = 0; while ((len = inputStream.read(buffer)) != -1) { outHtml.write(buffer, 0, len); } byte[] data = outHtml.toByteArray(); logger.info("转换前utf-8:" + new String(data, "utf-8")); TagNode node = cleaner.clean(new String(data, "utf-8")); ByteArrayOutputStream out = new ByteArrayOutputStream(); new PrettyXmlSerializer(props).writeToStream(node, out, "utf-8"); ITextRenderer renderer = new ITextRenderer(); renderer.setDocumentFromString(new String(out.toByteArray(), "utf-8")); ITextFontResolver fontResolver = renderer.getFontResolver(); fontResolver.addFont("/com/tontisa/erp/font/simsun.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED); renderer.layout(); OutputStream outputStream = new FileOutputStream("D:/HTMLTOPDF" + (new Date()).getTime() + ".pdf"); renderer.createPDF(outputStream); renderer.finishPDF(); out.flush(); out.close(); } catch (Exception e) { logger.error("htmltopfd异常", e); } } }