public String toString() { return "TextPiece from " + getStart() + " to " + getEnd() + " (" + getPieceDescriptor() + ")"; } }
protected TextPiece newTextPiece(int nodeStartChars, int nodeEndChars, byte[] buf, PieceDescriptor pd) { return new TextPiece(nodeStartChars, nodeEndChars, buf, pd); }
public boolean isIndexInTable(int bytePos) { for (TextPiece tp : _textPiecesFCOrder) { int pieceStart = tp.getPieceDescriptor().getFilePosition(); if (bytePos > pieceStart + tp.bytesLength()) { continue; } if (pieceStart > bytePos) { return false; } return true; } return false; }
public int getByteIndex(int charPos) { int byteCount = 0; for (TextPiece tp : _textPieces) { if (charPos >= tp.getEnd()) { byteCount = tp.getPieceDescriptor().getFilePosition() + (tp.getEnd() - tp.getStart()) * (tp.isUnicode() ? 2 : 1); if (charPos == tp.getEnd()) break; continue; } if (charPos < tp.getEnd()) { int left = charPos - tp.getStart(); byteCount = tp.getPieceDescriptor().getFilePosition() + left * (tp.isUnicode() ? 2 : 1); break; } } return byteCount; }
public byte[] writeTo(ByteArrayOutputStream docStream) throws IOException { PlexOfCps textPlex = new PlexOfCps(PieceDescriptor.getSizeInBytes()); // int fcMin = docStream.getOffset(); for (TextPiece next : _textPieces) { PieceDescriptor pd = next.getPieceDescriptor(); int offset = docStream.size(); int mod = (offset % POIFSConstants.SMALLER_BIG_BLOCK_SIZE); if (mod != 0) { mod = POIFSConstants.SMALLER_BIG_BLOCK_SIZE - mod; byte[] buf = IOUtils.safelyAllocate(mod, MAX_RECORD_LENGTH); docStream.write(buf); } // set the text piece position to the current docStream offset. pd.setFilePosition(docStream.size()); // write the text to the docstream and save the piece descriptor to // the // plex which will be written later to the tableStream. docStream.write(next.getRawBytes()); // The TextPiece is already in characters, which // makes our life much easier int nodeStart = next.getStart(); int nodeEnd = next.getEnd(); textPlex.addProperty(new GenericPropertyNode(nodeStart, nodeEnd, pd.toByteArray())); } return textPlex.toByteArray(); }
/** * Adjust all the text piece after inserting some text into one of them * * @param listIndex The TextPiece that had characters inserted into * @param length The number of characters inserted */ public int adjustForInsert(int listIndex, int length) { int size = _textPieces.size(); TextPiece tp = _textPieces.get(listIndex); // Update with the new end tp.setEnd(tp.getEnd() + length); // Now change all subsequent ones for (int x = listIndex + 1; x < size; x++) { tp = _textPieces.get(x); tp.setStart(tp.getStart() + length); tp.setEnd(tp.getEnd() + length); } // All done return length; }
public int[][] getCharIndexRanges( int startBytePosInclusive, int endBytePosExclusive ) { List<int[]> result = new LinkedList<int[]>(); for ( TextPiece textPiece : _textPiecesFCOrder ) { final int tpStart = textPiece.getPieceDescriptor() .getFilePosition(); final int tpEnd = textPiece.getPieceDescriptor().getFilePosition() + textPiece.bytesLength(); if ( startBytePosInclusive > tpEnd ) continue; if ( endBytePosExclusive < tpStart ) break; final int rangeStartBytes = Math.max( tpStart, startBytePosInclusive ); final int rangeEndBytes = Math.min( tpEnd, endBytePosExclusive ); final int rangeLengthBytes = rangeEndBytes - rangeStartBytes; if ( rangeStartBytes > rangeEndBytes ) continue; final int encodingMultiplier = textPiece.isUnicode() ? 2 : 1; final int rangeStartCp = textPiece.getStart() + ( rangeStartBytes - tpStart ) / encodingMultiplier; final int rangeEndCp = rangeStartCp + rangeLengthBytes / encodingMultiplier; result.add( new int[] { rangeStartCp, rangeEndCp } ); } return result.toArray( new int[result.size()][] ); }
public StringBuilder getText() { final long start = System.currentTimeMillis(); // rebuild document paragraphs structure StringBuilder docText = new StringBuilder(); for (TextPiece textPiece : _textPieces) { String toAppend = textPiece.getStringBuilder().toString(); int toAppendLength = toAppend.length(); if (toAppendLength != textPiece.getEnd() - textPiece.getStart()) { logger.log( POILogger.WARN, "Text piece has boundaries [", Integer.valueOf(textPiece.getStart()), "; ", Integer.valueOf(textPiece.getEnd()), ") but length ", Integer.valueOf(textPiece.getEnd() - textPiece.getStart())); } docText.replace(textPiece.getStart(), textPiece.getStart() + toAppendLength, toAppend); } logger.log(POILogger.DEBUG, "Document text were rebuilded in ", Long.valueOf(System.currentTimeMillis() - start), " ms (", Integer.valueOf(docText.length()), " chars)"); return docText; }
@Deprecated public int getCharIndex(int startBytePos, int startCP) { int charCount = 0; int bytePos = lookIndexForward(startBytePos); for (TextPiece tp : _textPieces) { int pieceStart = tp.getPieceDescriptor().getFilePosition(); int bytesLength = tp.bytesLength(); int pieceEnd = pieceStart + bytesLength; int toAdd; if (bytePos < pieceStart || bytePos > pieceEnd) { toAdd = bytesLength; } else if (bytePos > pieceStart && bytePos < pieceEnd) { toAdd = (bytePos - pieceStart); } else { toAdd = bytesLength - (pieceEnd - bytePos); } if (tp.isUnicode()) { charCount += toAdd / 2; } else { charCount += toAdd; } if (bytePos >= pieceStart && bytePos <= pieceEnd && charCount >= startCP) { break; } } return charCount; }
@Override public int[][] getCharIndexRanges(int startBytePosInclusive, int endBytePosExclusive) { List<int[]> result = new LinkedList<>(); for (TextPiece textPiece : _textPiecesFCOrder) { final int tpStart = textPiece.getPieceDescriptor() .getFilePosition(); final int tpEnd = textPiece.getPieceDescriptor().getFilePosition() + textPiece.bytesLength(); if (startBytePosInclusive > tpEnd) continue; if (endBytePosExclusive <= tpStart) break; final int rangeStartBytes = Math.max(tpStart, startBytePosInclusive); final int rangeEndBytes = Math.min(tpEnd, endBytePosExclusive); final int rangeLengthBytes = rangeEndBytes - rangeStartBytes; if (rangeStartBytes > rangeEndBytes) continue; final int encodingMultiplier = getEncodingMultiplier(textPiece); final int rangeStartCp = textPiece.getStart() + (rangeStartBytes - tpStart) / encodingMultiplier; final int rangeEndCp = rangeStartCp + rangeLengthBytes / encodingMultiplier; result.add(new int[]{rangeStartCp, rangeEndCp}); } return result.toArray(new int[result.size()][]); }
int currentTextStart = currentPiece.getStart(); int currentTextEnd = currentPiece.getEnd(); currentTextStart = currentPiece.getStart (); currentTextEnd = currentPiece.getEnd (); String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart); finalTextBuf.append(str); String str = currentPiece.substring(runStart - currentTextStart, currentTextEnd - currentTextStart); finalTextBuf.append(str); currentTextStart = currentPiece.getStart (); runStart = currentTextStart; currentTextEnd = currentPiece.getEnd (); String str = currentPiece.substring(0, runEnd - currentTextStart); finalTextBuf.append(str); String str = currentPiece.substring(runStart - currentTextStart, runEnd - currentTextStart); if (textIt.hasNext()) currentTextStart = currentPiece.getStart(); currentTextEnd = currentPiece.getEnd();
public int compare(TextPiece textPiece, TextPiece textPiece1) { return Integer.compare(textPiece.getPieceDescriptor().fc, textPiece1 .getPieceDescriptor().fc); } }
@Deprecated public StringBuffer getStringBuffer() { return new StringBuffer(getStringBuilder()); }
System.out.println("format: "+doc.getDocumentSummaryInformation().getFormat()); for( TextPiece tp : doc.getTextTable().getTextPieces() ) { System.out.println("TP: "+tp.getStringBuffer().substring(0, 100)); System.out.println("TP: "+tp.getPieceDescriptor().isUnicode());
public boolean equals(Object o) { if (limitsAreEqual(o)) { TextPiece tp = (TextPiece)o; return getStringBuilder().toString().equals(tp.getStringBuilder().toString()) && tp._usesUnicode == _usesUnicode && _pd.equals(tp._pd); } return false; }
/** * Returns the character position we start at. */ public int getCP() { return getStart(); }
text.append( tp.getStringBuilder() ); byte[] textData = new byte[_fib.getFibBase().getFcMac()-_fib.getFibBase().getFcMin()]; System.arraycopy(_mainStream, _fib.getFibBase().getFcMin(), textData, 0, textData.length); TextPiece tp = new TextPiece( 0, textData.length, textData, pd ); tpt.add(tp); text.append(tp.getStringBuilder());
public boolean equals(Object o) { if (!(o instanceof TextPieceTable)) return false; TextPieceTable tpt = (TextPieceTable) o; int size = tpt._textPieces.size(); if (size == _textPieces.size()) { for (int x = 0; x < size; x++) { if (!tpt._textPieces.get(x).equals(_textPieces.get(x))) { return false; } } return true; } return false; }
/** * @param start Beginning offset in main document stream, in characters. * @param end Ending offset in main document stream, in characters. * @param text The raw bytes of our text */ public TextPiece(int start, int end, byte[] text, PieceDescriptor pd) { super(start, end, buildInitSB(text, pd)); _usesUnicode = pd.isUnicode(); _pd = pd; // Validate int textLength = ((CharSequence) _buf).length(); if (end - start != textLength) { throw new IllegalStateException("Told we're for characters " + start + " -> " + end + ", but actually covers " + textLength + " characters!"); } if (end < start) { throw new IllegalStateException("Told we're of negative size! start=" + start + " end=" + end); } }