static byte[] process(File docFile) throws Exception { final HWPFDocumentCore wordDocument = AbstractWordUtils.loadDoc(docFile); SummaryInformation summaryInformation = wordDocument.getSummaryInformation(); System.out.println(summaryInformation.getAuthor()); System.out.println(summaryInformation.getApplicationName() + ":" + summaryInformation.getTitle()); Thumbnail thumbnail = new Thumbnail(summaryInformation.getThumbnail()); System.out.println(thumbnail.getClipboardFormat()); System.out.println(thumbnail.getClipboardFormatTag()); return thumbnail.getThumbnailAsWMF(); }
/** * Used to construct a Range from a document. This is generally used to * create a Range that spans the whole document, or at least one whole part * of the document (eg main text, header, comment) * * @param start * Starting character offset of the range. * @param end * Ending character offset of the range. * @param doc * The HWPFDocument the range is based on. */ public Range(int start, int end, HWPFDocumentCore doc) { _start = start; _end = end; _doc = doc; _sections = _doc.getSectionTable().getSections(); _paragraphs = _doc.getParagraphTable().getParagraphs(); _characters = _doc.getCharacterTable().getTextRuns(); _text = _doc.getText(); _parent = new WeakReference<>(null); sanityCheckStartEnd(); }
private void buildParagraphs() { paragraphs = new LinkedHashMap<>(); StringBuilder part = new StringBuilder(); String text = _doc.getDocumentText(); for ( int charIndex = 0; charIndex < text.length(); charIndex++ ) { char c = text.charAt( charIndex ); part.append( c ); if ( c == 13 || c == 7 || c == 12 ) { paragraphs.put( Integer.valueOf( charIndex ), part.toString() ); part.setLength( 0 ); } } }
public HWPFList getList() { if ( getIlfo() == 0x000 || getIlfo() == 0xF801 ) { throw new IllegalStateException( "Paragraph not in list" ); } return new HWPFList( getDocument().getStyleSheet(), getDocument().getListTables(), getIlfo() ); }
public void dumpChpx( boolean withProperties, boolean withSprms ) { for ( CHPX chpx : _doc.getCharacterTable().getTextRuns() ) { System.out.println( chpx ); if ( withProperties ) { System.out.println( chpx.getCharacterProperties( _doc.getStyleSheet(), (short) StyleSheet.NIL_STYLE ) ); } if ( withSprms ) { SprmIterator sprmIt = new SprmIterator( chpx.getGrpprl(), 0 ); while ( sprmIt.hasNext() ) { SprmOperation sprm = sprmIt.next(); System.out.println( "\t" + sprm); } } String text = new Range( chpx.getStart(), chpx.getEnd(), _doc.getOverallRange() ) { public String toString() { return "CHPX range (" + super.toString() + ")"; } }.text(); StringBuilder stringBuilder = new StringBuilder(); for ( char c : text.toCharArray() ) { if ( c < 30 ) stringBuilder.append("\\0x").append(Integer.toHexString(c)); else stringBuilder.append( c ); } System.out.println( stringBuilder ); } }
/** * Inserts text into the front of this range. * * @param text * The text to insert * @return The character run that text was inserted into. */ public CharacterRun insertBefore( String text ) { initAll(); _text.insert( _start, text ); _doc.getCharacterTable().adjustForInsert( _charStart, text.length() ); _doc.getParagraphTable().adjustForInsert( _parStart, text.length() ); _doc.getSectionTable().adjustForInsert( _sectionStart, text.length() ); if ( _doc instanceof HWPFDocument ) { ( (BookmarksImpl) ( (HWPFDocument) _doc ).getBookmarks() ) .afterInsert( _start, text.length() ); } adjustForInsert( text.length() ); // update the FIB.CCPText + friends fields adjustFIB( text.length() ); sanityCheck(); return getCharacterRun( 0 ); }
/** * Inserts text into the front of this range and it gives that text the * CharacterProperties specified in props. * * @param text * The text to insert. * @param props * The CharacterProperties to give the text. * @return A new CharacterRun that has the given text and properties and is * n ow a part of the document. * @deprecated POI 3.8 beta 4. User code should not work with {@link CharacterProperties} */ @Deprecated private CharacterRun insertBefore(String text, CharacterProperties props) { initAll(); PAPX papx = _paragraphs.get(_parStart); short istd = papx.getIstd(); StyleSheet ss = _doc.getStyleSheet(); CharacterProperties baseStyle = ss.getCharacterStyle(istd); byte[] grpprl = CharacterSprmCompressor.compressCharacterProperty(props, baseStyle); SprmBuffer buf = new SprmBuffer(grpprl, 0); _doc.getCharacterTable().insert(_charStart, _start, buf); return insertBefore(text); }
StyleSheet ss = _doc.getStyleSheet(); ParagraphProperties baseStyle = ss.getParagraphStyle(styleIndex); CharacterProperties baseChp = ss.getCharacterStyle(styleIndex); SprmBuffer buf = new SprmBuffer(withIndex, 2); _doc.getParagraphTable().insert(_parStart, _start, buf); insertBefore(text, baseChp); return getParagraph(0);
public void processDocument( HWPFDocumentCore wordDocument ) { try { final SummaryInformation summaryInformation = wordDocument .getSummaryInformation(); if ( summaryInformation != null ) { processDocumentInformation( summaryInformation ); } } catch ( Exception exc ) { logger.log( POILogger.WARN, "Unable to process document summary information: ", exc, exc ); } final Range docRange = wordDocument.getRange(); if ( docRange.numSections() == 1 ) { processSingleSection( wordDocument, docRange.getSection( 0 ) ); afterProcess(); return; } processDocumentPart( wordDocument, docRange ); afterProcess(); }
byte[] mainStream = _doc.getMainStream(); for ( PAPX papx : _doc.getParagraphTable().getParagraphs() ) { System.out.println( papx ); Paragraph paragraph = Paragraph.newParagraph( _doc.getOverallRange(), papx ); System.out.println( paragraph.getProps() );
_props.setAccessible( true ); for ( PAPX papx : _doc.getParagraphTable().getParagraphs() ) _doc.getOverallRange(), papx ); System.out.println( _props.get( paragraph ) );
public void dumpParagraphsDom( boolean withText ) { Range range = _doc.getOverallRange(); for ( int p = 0; p < range.numParagraphs(); p++ ) { Paragraph paragraph = range.getParagraph( p ); System.out.println( p + ":\t" + paragraph); if ( withText ) System.out.println( paragraph.text() ); } }
public void dumpFIB() { FileInformationBlock fib = _doc.getFileInformationBlock(); System.out.println( fib ); }
public String getFontName() { if (_doc.getFontTable() == null) // old word format return null; return _doc.getFontTable().getMainFont(_props.getFtcAscii()); }
/** * Inserts a list into the beginning of this range. * * @param props * The properties of the list entry. All list entries are * paragraphs. * @param listID * The id of the list that contains the properties. * @param level * The indentation level of the list. * @param styleIndex * The base style's index in the stylesheet. * @return The empty ListEntry that is now part of the document. * @deprecated Use code shall not work with {@link ParagraphProperties} */ @Deprecated public ListEntry insertBefore(ParagraphProperties props, int listID, int level, int styleIndex) { ListTables lt = _doc.getListTables(); if (lt.getLevel(listID, level) == null) { throw new NoSuchElementException("The specified list and level do not exist"); } int ilfo = lt.getOverrideIndexFromListID(listID); props.setIlfo(ilfo); props.setIlvl((byte) level); return (ListEntry) insertBefore(props, styleIndex); }
public void dumpParagraphs( boolean dumpAssotiatedPapx ) { for ( Map.Entry<Integer, String> entry : paragraphs.entrySet() ) { Integer endOfParagraphCharOffset = entry.getKey(); System.out.println( "[...; " + ( endOfParagraphCharOffset + 1 ) + "): " + entry.getValue() ); if ( dumpAssotiatedPapx ) { boolean hasAssotiatedPapx = false; for ( PAPX papx : _doc.getParagraphTable().getParagraphs() ) { if ( papx.getStart() <= endOfParagraphCharOffset.intValue() && endOfParagraphCharOffset.intValue() < papx .getEnd() ) { hasAssotiatedPapx = true; System.out.println( "* " + papx ); SprmIterator sprmIt = new SprmIterator( papx.getGrpprl(), 2 ); dumpSprms( sprmIt, "** " ); } } if ( !hasAssotiatedPapx ) { System.out.println( "* " + "NO PAPX ASSOTIATED WITH PARAGRAPH!" ); } } } }
/** * Inserts text into the front of this range. * * @param text * The text to insert * @return The character run that text was inserted into. */ public CharacterRun insertBefore( String text ) { initAll(); _text.insert( _start, text ); _doc.getCharacterTable().adjustForInsert( _charStart, text.length() ); _doc.getParagraphTable().adjustForInsert( _parStart, text.length() ); _doc.getSectionTable().adjustForInsert( _sectionStart, text.length() ); if ( _doc instanceof HWPFDocument ) { ( (BookmarksImpl) ( (HWPFDocument) _doc ).getBookmarks() ) .afterInsert( _start, text.length() ); } adjustForInsert( text.length() ); // update the FIB.CCPText + friends fields adjustFIB( text.length() ); assert sanityCheck(); return getCharacterRun( 0 ); }
public void dumpChpx( boolean withProperties, boolean withSprms ) for ( CHPX chpx : _doc.getCharacterTable().getTextRuns() ) _doc.getStyleSheet(), (short) StyleSheet.NIL_STYLE ) ); _doc.getOverallRange() )
/** * Inserts text into the front of this range and it gives that text the * CharacterProperties specified in props. * * @param text * The text to insert. * @param props * The CharacterProperties to give the text. * @return A new CharacterRun that has the given text and properties and is * n ow a part of the document. * @deprecated User code should not work with {@link CharacterProperties} */ @Deprecated public CharacterRun insertBefore(String text, CharacterProperties props) // throws UnsupportedEncodingException { initAll(); PAPX papx = _paragraphs.get(_parStart); short istd = papx.getIstd(); StyleSheet ss = _doc.getStyleSheet(); CharacterProperties baseStyle = ss.getCharacterStyle(istd); byte[] grpprl = CharacterSprmCompressor.compressCharacterProperty(props, baseStyle); SprmBuffer buf = new SprmBuffer(grpprl, 0); _doc.getCharacterTable().insert(_charStart, _start, buf); return insertBefore(text); }
StyleSheet ss = _doc.getStyleSheet(); ParagraphProperties baseStyle = ss.getParagraphStyle(styleIndex); CharacterProperties baseChp = ss.getCharacterStyle(styleIndex); SprmBuffer buf = new SprmBuffer(withIndex, 2); _doc.getParagraphTable().insert(_parStart, _start, buf); insertBefore(text, baseChp); return getParagraph(0);