@Override public void copyTo(AttributeImpl target) { CharTermAttribute t = (CharTermAttribute) target; t.copyBuffer(termBuffer, 0, termLength); }
/** * Fills CharTermAttribute with the current token text. */ public final void getText(CharTermAttribute t) { t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead); }
/** * Fills Lucene token with the current token text. */ final void getText(CharTermAttribute t) { t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead); }
/** * Fills CharTermAttribute with the current token text. */ public final void getText(CharTermAttribute t) { t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead); }
/** * Fills CharTermAttribute with the current token text. */ public final void getText(CharTermAttribute t) { t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead); }
termAtt.copyBuffer(termBuffer, start, (end - start)); } else { termAtt.setEmpty();
/** * Increments the {@link TokenStream} with a {@link CharTermAttribute} without elisioned start */ @Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { char[] termBuffer = termAtt.buffer(); int termLength = termAtt.length(); int index = -1; for (int i = 0; i < termLength; i++) { char ch = termBuffer[i]; if (ch == '\'' || ch == '\u2019') { index = i; break; } } // An apostrophe has been found. If the prefix is an article strip it off. if (index >= 0 && articles.contains(termBuffer, 0, index)) { termAtt.copyBuffer(termBuffer, index + 1, termLength - (index + 1)); } return true; } else { return false; } } }
termAtt.copyBuffer(resultTokenBuffer, start, endPosition - start); offsetAtt.setOffset(correctOffset(start), correctOffset(endPosition)); skipped++;
termAtt.copyBuffer(backup, 0, termBufferLength);
@Override public final boolean incrementToken() throws IOException { if (!input.incrementToken()) return false; if ((!keywordAttr.isKeyword()) && stemmer.stem(termAtt.buffer(), 0, termAtt.length())) termAtt.copyBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength()); return true; } }
@Override protected boolean incrementWord() { int start = wordBreaker.current(); if (start == BreakIterator.DONE) { return false; // BreakIterator exhausted } // find the next set of boundaries, skipping over non-tokens int end = wordBreaker.next(); while (end != BreakIterator.DONE && !Character.isLetterOrDigit(Character.codePointAt(buffer, sentenceStart + start, sentenceEnd))) { start = end; end = wordBreaker.next(); } if (end == BreakIterator.DONE) { return false; // BreakIterator exhausted } clearAttributes(); termAtt.copyBuffer(buffer, sentenceStart + start, end - start); offsetAtt.setOffset(correctOffset(offset + sentenceStart + start), correctOffset(offset + sentenceStart + end)); return true; } }
@Override public boolean incrementToken() throws IOException { if (state != null) { assert preserveOriginal : "state should only be captured if preserveOriginal is true"; restoreState(state); posIncAttr.setPositionIncrement(0); state = null; return true; } if (input.incrementToken()) { final char[] buffer = termAtt.buffer(); final int length = termAtt.length(); // If no characters actually require rewriting then we // just return token as-is: for(int i = 0 ; i < length ; ++i) { final char c = buffer[i]; if (c >= '\u0080') { foldToASCII(buffer, length); termAtt.copyBuffer(output, 0, outputPos); break; } } return true; } else { return false; } }
charTermAttr.copyBuffer(spare.chars(), start, end - start); currentGroup[currentMatcher]++; return true; charTermAttr.setLength(end); } else { charTermAttr.copyBuffer(spare.chars(), start, end - start);
/** Returns the next input Token, after being stemmed */ @Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { if (!keywordAttr.isKeyword()) { char termBuffer[] = termAtt.buffer(); final int length = termAtt.length(); stemmer.setCurrent(termBuffer, length); stemmer.stem(); final char finalTerm[] = stemmer.getCurrentBuffer(); final int newLength = stemmer.getCurrentBufferLength(); if (finalTerm != termBuffer) termAtt.copyBuffer(finalTerm, 0, newLength); else termAtt.setLength(newLength); } return true; } else { return false; } } }
termAtt.copyBuffer(curTermBuffer, 0, charLength); curGramSize++; return true; termAtt.copyBuffer(curTermBuffer, 0, curTermLength); curTermBuffer = null; return true;
/** * Generates a word/number part, updating the appropriate attributes * * @param isSingleWord {@code true} if the generation is occurring from a single word, {@code false} otherwise */ private void generatePart(boolean isSingleWord) { clearAttributes(); termAttribute.copyBuffer(savedBuffer, iterator.current, iterator.end - iterator.current); int startOffset = savedStartOffset + iterator.current; int endOffset = savedStartOffset + iterator.end; if (hasIllegalOffsets) { // historically this filter did this regardless for 'isSingleWord', // but we must do a sanity check: if (isSingleWord && startOffset <= savedEndOffset) { offsetAttribute.setOffset(startOffset, savedEndOffset); } else { offsetAttribute.setOffset(savedStartOffset, savedEndOffset); } } else { offsetAttribute.setOffset(startOffset, endOffset); } posIncAttribute.setPositionIncrement(position(false)); typeAttribute.setType(savedType); }
@Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { if (fstReader == null) { // No overrides return true; } if (!keywordAtt.isKeyword()) { // don't muck with already-keyworded terms final BytesRef stem = stemmerOverrideMap.get(termAtt.buffer(), termAtt.length(), scratchArc, fstReader); if (stem != null) { spare = ArrayUtil.grow(termAtt.buffer(), stem.length); final int length = UnicodeUtil.UTF8toUTF16(stem, spare); if (spare != termAtt.buffer()) { termAtt.copyBuffer(spare, 0, length); } else { termAtt.setLength(length); } keywordAtt.setKeyword(true); } } return true; } else { return false; } }
termAttribute.copyBuffer(savedTermBuffer, startPart, endPart - startPart); } else { termAttribute.copyBuffer(termPart, 0, termPart.length);
final int start = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos); final int end = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize); termAtt.copyBuffer(curTermBuffer, start, end - start); posIncrAtt.setPositionIncrement(curPosIncr); curPosIncr = 0; termAtt.copyBuffer(curTermBuffer, 0, curTermLength); curTermBuffer = null; return true;
newTarget.termAtt.copyBuffer(fillerToken, 0, fillerToken.length); newTarget.isFiller = true; --numFillerTokensToInsert; newTarget.termAtt.copyBuffer(fillerToken, 0, fillerToken.length); newTarget.isFiller = true; --numFillerTokensToInsert;