java 读取doc 转 html
2018-07-21 本文已影响0人
东方舵手
HWPFDocument hfd = new HWPFDocument(wordFile.getInputStream());
// ============转换成html
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
public String savePicture(byte[] content, PictureType pictureType, String suggestedName,
float widthInches, float heightInches) {
return suggestedName;
}
});
wordToHtmlConverter.processDocument(hfd);
List pics = hfd.getPicturesTable().getAllPictures();
if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get(i);
try {
pic.writeImageContent(new FileOutputStream(path + pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
String content = new String(outStream.toByteArray());
name = getNameHtml(file1.getOriginalFilename());
FileUtils.writeStringToFile(new File(path, name), content, "utf-8");
// ====================转换成html end==============================
// 要读取的html文件路径 读取html的内容
File f = new File(path, name);
// 输入流
InputStreamReader isr1 = new InputStreamReader(new FileInputStream(f),"utf-8");
BufferedReader br = new BufferedReader(isr1);
// 获取html转换成String
String s;
String AllContent = "";
// 按行读取
while ((s = br.readLine()) != null) {
AllContent = AllContent + s;
}