java poi读取.doc和.docx文件时获取图片与段落的对

2020-11-17  本文已影响0人  东本三月

.doc文件

//            doc格式
            List<WordParagraph> wordParagraphs=new ArrayList<>();
            HWPFDocument doc = new HWPFDocument(file_word.getInputStream());
            Range range = doc.getRange();
            int numP = range.numParagraphs();


            //StringBuffer ret = new StringBuffer();
            for (int i = 0; i < numP; ++i) {
                //从每一段落中获取文字
                Paragraph p = range.getParagraph(i);
                //ret.append(p.text());
                WordParagraph wordParagraph=new WordParagraph(p.text(),i);
                wordParagraph.init();
                wordParagraphs.add(wordParagraph);
            }

           // List<Picture> pictsList = new ArrayList();
            // 得到文档的数据流
            byte[] dataStream = doc.getDataStream();
            int numChar = range.numCharacterRuns();

            Integer paragraphOrder=0;
            PicturesTable pTable = new PicturesTable(doc, dataStream, new byte[1024]);
            for (int j = 0; j < numChar; ++j) {
                CharacterRun cRun = range.getCharacterRun(j);
                boolean has = pTable.hasPicture(cRun);
                String[] temp_array=(cRun.toString()+" ").split("\r");
                paragraphOrder=paragraphOrder+temp_array.length-1;

                if (has) {
                    Picture picture = pTable.extractPicture(cRun, true);
                    if(paragraphOrder<wordParagraphs.size()){
                        wordParagraphs.get(paragraphOrder).addPictures(picture);
                    }
                }
            }

.docx

//           docx格式
            XWPFDocument document=new XWPFDocument(file_word.getInputStream());
            List<XWPFParagraph> XWPFParagraphList=document.getParagraphs();
            //List<XWPFPictureData> picList = document.getAllPictures();
            for (int i = 0; i < XWPFParagraphList.size(); ++i) {
                //从每一段落中获取文字
                XWPFParagraph p = XWPFParagraphList.get(i);
                WordParagraph wordParagraph=new WordParagraph(p.getParagraphText(),i);
                List<XWPFPictureData> pictureList=readImageInfoInParagraph(p);
                wordParagraph.setPictures(pictureList);
                wordParagraph.init();
                wordParagraphs.add(wordParagraph);

            }
    //获取某一个段落中的所有图片
    public static List<XWPFPictureData> readImageInfoInParagraph(XWPFParagraph paragraph) {
        List<XWPFPictureData> res=new ArrayList<>();
        //段落中所有XWPFRun
        List<XWPFRun> runList = paragraph.getRuns();
        for (XWPFRun run : runList) {
            List<XWPFPicture> pictures=run.getEmbeddedPictures();
            for(int i=0;i<pictures.size();i++){
                res.add(pictures.get(i).getPictureData());
            }
        }
        return res;
    }
上一篇 下一篇

猜你喜欢

热点阅读