POI转HTML(仅针对doc):
package com.vito.demo.test
;
import java.io.ByteArrayOutputStream
;
import java.io.File
;
import java.io.FileInputStream
;
import java.io.FileNotFoundException
;
import java.io.FileOutputStream
;
import java.io.InputStream
;
import java.util.List
;
import javax.xml.parsers.DocumentBuilderFactory
;
import javax.xml.transform.OutputKeys
;
import javax.xml.transform.Transformer
;
import javax.xml.transform.TransformerFactory
;
import javax.xml.transform.dom.DOMSource
;
import javax.xml.transform.stream.StreamResult
;
import org.apache.commons.io.FileUtils
;
import org.apache.poi.hwpf.HWPFDocument
;
import org.apache.poi.hwpf.converter.PicturesManager
;
import org.apache.poi.hwpf.converter.WordToHtmlConverter
;
import org.apache.poi.hwpf.usermodel.Picture
;
import org.apache.poi.hwpf.usermodel.PictureType
;
import org.w3c.dom.Document
;
public class PoiWordToHtml {
public static void main(String[] args)
throws Throwable {
final String path =
"G:\\doc\\";
final String file =
"客户需求文档.doc";
InputStream input =
new FileInputStream(path + file)
;
HWPFDocument wordDocument =
new HWPFDocument(input)
;
WordToHtmlConverter wordToHtmlConverter =
new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument())
;
wordToHtmlConverter.setPicturesManager(
new PicturesManager() {
public String
savePicture(
byte[] content
, PictureType pictureType
,
String suggestedName
, float widthInches
, float heightInches) {
return suggestedName
;
}
})
;
wordToHtmlConverter.processDocument(wordDocument)
;
List pics = wordDocument.getPicturesTable().getAllPictures()
;
if (pics !=
null) {
for (
int i =
0; i < pics.size()
; i++) {
Picture pic = (Picture) pics.get(i)
;
try {
pic.writeImageContent(
new FileOutputStream(path
+ pic.suggestFullFileName()))
;
}
catch (FileNotFoundException e) {
e.printStackTrace()
;
}
}
}
Document htmlDocument = wordToHtmlConverter.getDocument()
;
ByteArrayOutputStream outStream =
new ByteArrayOutputStream()
;
DOMSource domSource =
new DOMSource(htmlDocument)
;
StreamResult streamResult =
new StreamResult(outStream)
;
TransformerFactory tf = TransformerFactory.newInstance()
;
Transformer serializer = tf.newTransformer()
;
serializer.setOutputProperty(OutputKeys.ENCODING
, "utf-8")
;
serializer.setOutputProperty(OutputKeys.INDENT
, "yes")
;
serializer.setOutputProperty(OutputKeys.METHOD
, "html")
;
serializer.transform(domSource
, streamResult)
;
outStream.close()
;
String content =
new String(outStream.toByteArray())
;
FileUtils.write(
new File(path
, "1.html")
, content
, "utf-8")
;
}
}
html转word:
public void htmlToWord2()
throws Exception {
InputStream bodyIs =
new FileInputStream(
"f:\\1.html")
;
InputStream cssIs =
new FileInputStream(
"f:\\1.css")
;
String body =
this.getContent(bodyIs)
;
String css =
this.getContent(cssIs)
;
//拼一个标准的HTML格式文档
String content =
"<html><head><style>" + css +
"</style></head><body>" + body +
"</body></html>";
InputStream is =
new ByteArrayInputStream(content.getBytes(
"GBK"))
;
OutputStream os =
new FileOutputStream(
"f:\\1.doc")
;
this.inputStreamToWord(is
, os)
;
}
/**
* 把is写入到对应的word输出流os中
* 不考虑异常的捕获,直接抛出
* @param is
* @param os
* @throws IOException
*/
private void inputStreamToWord(InputStream is
, OutputStream os)
throws IOException {
POIFSFileSystem fs =
new POIFSFileSystem()
;
//对应于org.apache.poi.hdf.extractor.WordDocument
fs.createDocument(is
, "WordDocument")
;
fs.writeFilesystem(os)
;
os.close()
;
is.close()
;
}
/**
* 把输入流里面的内容以UTF-8编码当文本取出。
* 不考虑异常,直接抛出
* @param ises
* @return
* @throws IOException
*/
private String
getContent(InputStream... ises)
throws IOException {
if (ises !=
null) {
StringBuilder result =
new StringBuilder()
;
BufferedReader br
;
String line
;
for (InputStream is : ises) {
br =
new BufferedReader(
new InputStreamReader(is
, "UTF-8"))
;
while ((line=br.readLine()) !=
null) {
result.append(line)
;
}
}
return result.toString()
;
}
return null;
}