private PicturesSource(HWPFDocument doc) { picturesTable = doc.getPicturesTable(); all = picturesTable.getAllPictures(); // Build the Offset-Picture lookup map lookup = new HashMap<Integer, Picture>(); for (Picture p : all) { lookup.put(p.getStartOffset(), p); } // Work out which Pictures aren't referenced by // a \u0001 in the main text // These are \u0008 escher floating ones, ones // found outside the normal text, and who // knows what else... nonU1based = new ArrayList<Picture>(); nonU1based.addAll(all); Range r = doc.getRange(); for (int i = 0; i < r.numCharacterRuns(); i++) { CharacterRun cr = r.getCharacterRun(i); if (picturesTable.hasPicture(cr)) { Picture p = getFor(cr); int at = nonU1based.indexOf(p); nonU1based.set(at, null); } } }
private void dumpPictures() { if ( _doc instanceof HWPFOldDocument ) { System.out.println( "Word 95 not supported so far" ); return; } List<Picture> allPictures = ( (HWPFDocument) _doc ).getPicturesTable() .getAllPictures(); for ( Picture picture : allPictures ) { System.out.println(picture); } }
private void dumpPictures() { if ( _doc instanceof HWPFOldDocument ) { System.out.println( "Word 95 not supported so far" ); return; } List<Picture> allPictures = ( (HWPFDocument) _doc ).getPicturesTable() .getAllPictures(); for ( Picture picture : allPictures ) { System.out.println( picture.toString() ); } }
//you can use the org.apache.poi.hwpf.extractor.WordExtractor to get the text String fileName = "example.doc"; HWPFDocument wordDoc = new HWPFDocument(new FileInputStream(fileName)); WordExtractor extractor = new WordExtractor(wordDoc); String[] text = extractor.getParagraphText(); int lineCounter = text.length; String articleStr = ""; // This string object use to store text from the word document. for(int index = 0;index < lineCounter;++ index){ String paragraphStr = text[index].replaceAll("\r\n","").replaceAll("\n","").trim(); int paragraphLength = paragraphStr.length(); if(paragraphLength != 0){ articleStr.concat(paragraphStr); } } //you can use the org.apache.poi.hwpf.usermodel.Picture to get the image List<Picture> picturesList = wordDoc.getPicturesTable().getAllPictures(); for(int i = 0;i < picturesList.size();++i){ BufferedImage image = null; Picture pic = picturesList.get(i); image = ImageIO.read(new ByteArrayInputStream(pic.getContent())); if(image != null){ System.out.println("Image["+i+"]"+" ImageWidth:"+image.getWidth()+" ImageHeight:"+image.getHeight()+" Suggest Image Format:"+pic.suggestFileExtension()); } }
HWPFDocument doc = new HWPFDocument(fs);// fs is the object of POIFSFileSystem Range range = doc.getRange(); PicturesTable pt = doc.getPicturesTable(); List<Picture> listPic = pt.getAllPictures(); Picture pic = listPic.get(0);// iterate listPic upto its size to get images byte[] picArr = pic.getContent();
PicturesTable pictureTable = document.getPicturesTable(); PicturesSource pictures = new PicturesSource(document); HeaderStories headerFooter = null;
private PicturesSource(HWPFDocument doc) { picturesTable = doc.getPicturesTable(); all = picturesTable.getAllPictures(); // Build the Offset-Picture lookup map lookup = new HashMap<Integer, Picture>(); for (Picture p : all) { lookup.put(p.getStartOffset(), p); } // Work out which Pictures aren't referenced by // a \u0001 in the main text // These are \u0008 escher floating ones, ones // found outside the normal text, and who // knows what else... nonU1based = new ArrayList<Picture>(); nonU1based.addAll(all); Range r = doc.getRange(); for (int i = 0; i < r.numCharacterRuns(); i++) { CharacterRun cr = r.getCharacterRun(i); if (picturesTable.hasPicture(cr)) { Picture p = getFor(cr); int at = nonU1based.indexOf(p); nonU1based.set(at, null); } } }
public PictureRunMapper(HWPFDocument doc) { picturesTable = doc.getPicturesTable(); all = picturesTable.getAllPictures(); // Build the Offset-Picture lookup map lookup = new HashMap<>(); for (Picture p : all) { lookup.put(p.getStartOffset(), p); } // Work out which Pictures aren't referenced by // a \u0001 in the main text // These are \u0008 escher floating ones, ones // found outside the normal text, and who // knows what else... nonU1based = new ArrayList<>(); nonU1based.addAll(all); Range r = doc.getRange(); for (int i = 0; i < r.numCharacterRuns(); i++) { CharacterRun cr = r.getCharacterRun(i); if (picturesTable.hasPicture(cr)) { Picture p = getFor(cr); int at = nonU1based.indexOf(p); nonU1based.set(at, null); } } }
private PicturesSource(HWPFDocument doc) { picturesTable = doc.getPicturesTable(); all = picturesTable.getAllPictures(); // Build the Offset-Picture lookup map lookup = new HashMap<Integer, Picture>(); for (Picture p : all) { lookup.put(p.getStartOffset(), p); } // Work out which Pictures aren't referenced by // a \u0001 in the main text // These are \u0008 escher floating ones, ones // found outside the normal text, and who // knows what else... nonU1based = new ArrayList<Picture>(); nonU1based.addAll(all); Range r = doc.getRange(); for (int i = 0; i < r.numCharacterRuns(); i++) { CharacterRun cr = r.getCharacterRun(i); if (picturesTable.hasPicture(cr)) { Picture p = getFor(cr); int at = nonU1based.indexOf(p); nonU1based.set(at, null); } } }
&& ( (HWPFDocument) wordDocument ).getPicturesTable() .hasPicture( characterRun ) ) Picture picture = newFormat.getPicturesTable().extractPicture( characterRun, true );
&& ( (HWPFDocument) wordDocument ).getPicturesTable() .hasPicture( characterRun ) ) Picture picture = newFormat.getPicturesTable().extractPicture( characterRun, true );
PicturesTable pictureTable = document.getPicturesTable(); PicturesSource pictures = new PicturesSource(document);
PicturesTable pictureTable = document.getPicturesTable(); PicturesSource pictures = new PicturesSource(document); HeaderStories headerFooter = null;