java基础

java 读取doc 转 html

2018-07-21  本文已影响0人  东方舵手
HWPFDocument hfd = new HWPFDocument(wordFile.getInputStream());

            // ============转换成html
            WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                    DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
            wordToHtmlConverter.setPicturesManager(new PicturesManager() {
                public String savePicture(byte[] content, PictureType pictureType, String suggestedName,
                        float widthInches, float heightInches) {
                    return suggestedName;
                }
            });
            wordToHtmlConverter.processDocument(hfd);
            List pics = hfd.getPicturesTable().getAllPictures();
            if (pics != null) {
                for (int i = 0; i < pics.size(); i++) {
                    Picture pic = (Picture) pics.get(i);
                    try {
                        pic.writeImageContent(new FileOutputStream(path + pic.suggestFullFileName()));
                    } catch (FileNotFoundException e) {
                        e.printStackTrace();
                    }
                }
            }
            Document htmlDocument = wordToHtmlConverter.getDocument();
            ByteArrayOutputStream outStream = new ByteArrayOutputStream();
            DOMSource domSource = new DOMSource(htmlDocument);
            StreamResult streamResult = new StreamResult(outStream);
            TransformerFactory tf = TransformerFactory.newInstance();
            Transformer serializer = tf.newTransformer();
            serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
            serializer.setOutputProperty(OutputKeys.INDENT, "yes");
            serializer.setOutputProperty(OutputKeys.METHOD, "html");
            serializer.transform(domSource, streamResult);
            outStream.close();
            String content = new String(outStream.toByteArray());
            name = getNameHtml(file1.getOriginalFilename());
            FileUtils.writeStringToFile(new File(path, name), content, "utf-8");

            // ====================转换成html  end==============================
            

            // 要读取的html文件路径  读取html的内容
            File f = new File(path, name);
            // 输入流
            InputStreamReader isr1 = new InputStreamReader(new FileInputStream(f),"utf-8");
            BufferedReader br = new BufferedReader(isr1);
            // 获取html转换成String
            String s;
            String AllContent = "";
            // 按行读取
            while ((s = br.readLine()) != null) {
                AllContent = AllContent + s;
            }
上一篇下一篇

猜你喜欢

热点阅读