org.apache.lucene.analysis.tokenattributes.CharTermAttribute.copyBuffer java code examples

@Override
public void copyTo(AttributeImpl target) {
 CharTermAttribute t = (CharTermAttribute) target;
 t.copyBuffer(termBuffer, 0, termLength);
}

/**
 * Fills CharTermAttribute with the current token text.
 */
public final void getText(CharTermAttribute t) {
 t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
}

/**
 * Fills Lucene token with the current token text.
 */
final void getText(CharTermAttribute t) {
 t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
}

/**
 * Fills CharTermAttribute with the current token text.
 */
public final void getText(CharTermAttribute t) {
 t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
}

/**
 * Fills CharTermAttribute with the current token text.
 */
public final void getText(CharTermAttribute t) {
 t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
}

 termAtt.copyBuffer(termBuffer, start, (end - start));
} else {
 termAtt.setEmpty();

 /**
  * Increments the {@link TokenStream} with a {@link CharTermAttribute} without elisioned start
  */
 @Override
 public final boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
   char[] termBuffer = termAtt.buffer();
   int termLength = termAtt.length();

   int index = -1;
   for (int i = 0; i < termLength; i++) {
    char ch = termBuffer[i];
    if (ch == '\'' || ch == '\u2019') {
     index = i;
     break;
    }
   }

   // An apostrophe has been found. If the prefix is an article strip it off.
   if (index >= 0 && articles.contains(termBuffer, 0, index)) {
    termAtt.copyBuffer(termBuffer, index + 1, termLength - (index + 1));
   }

   return true;
  } else {
   return false;
  }
 }
}

termAtt.copyBuffer(resultTokenBuffer, start, endPosition - start);
offsetAtt.setOffset(correctOffset(start), correctOffset(endPosition));
skipped++;

termAtt.copyBuffer(backup, 0, termBufferLength);

 @Override
 public final boolean incrementToken() throws IOException {
  if (!input.incrementToken())
   return false;

  if ((!keywordAttr.isKeyword()) && stemmer.stem(termAtt.buffer(), 0, termAtt.length()))
   termAtt.copyBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength());
  return true;
 }
}

 @Override
 protected boolean incrementWord() {
  int start = wordBreaker.current();
  if (start == BreakIterator.DONE) {
   return false; // BreakIterator exhausted
  }

  // find the next set of boundaries, skipping over non-tokens
  int end = wordBreaker.next();
  while (end != BreakIterator.DONE &&
      !Character.isLetterOrDigit(Character.codePointAt(buffer, sentenceStart + start, sentenceEnd))) {
   start = end;
   end = wordBreaker.next();
  }

  if (end == BreakIterator.DONE) {
   return false; // BreakIterator exhausted
  }

  clearAttributes();
  termAtt.copyBuffer(buffer, sentenceStart + start, end - start);
  offsetAtt.setOffset(correctOffset(offset + sentenceStart + start), correctOffset(offset + sentenceStart + end));
  return true;
 }
}

@Override
public boolean incrementToken() throws IOException {
 if (state != null) {
  assert preserveOriginal : "state should only be captured if preserveOriginal is true";
  restoreState(state);
  posIncAttr.setPositionIncrement(0);
  state = null;
  return true;
 }
 if (input.incrementToken()) {
  final char[] buffer = termAtt.buffer();
  final int length = termAtt.length();
  // If no characters actually require rewriting then we
  // just return token as-is:
  for(int i = 0 ; i < length ; ++i) {
   final char c = buffer[i];
   if (c >= '\u0080')
   {
    foldToASCII(buffer, length);
    termAtt.copyBuffer(output, 0, outputPos);
    break;
   }
  }
  return true;
 } else {
  return false;
 }
}

charTermAttr.copyBuffer(spare.chars(), start, end - start);
currentGroup[currentMatcher]++;
return true;
 charTermAttr.setLength(end);
} else {
 charTermAttr.copyBuffer(spare.chars(), start, end - start);

 /** Returns the next input Token, after being stemmed */
 @Override
 public final boolean incrementToken() throws IOException {
  if (input.incrementToken()) {
   if (!keywordAttr.isKeyword()) {
    char termBuffer[] = termAtt.buffer();
    final int length = termAtt.length();
    stemmer.setCurrent(termBuffer, length);
    stemmer.stem();
    final char finalTerm[] = stemmer.getCurrentBuffer();
    final int newLength = stemmer.getCurrentBufferLength();
    if (finalTerm != termBuffer)
     termAtt.copyBuffer(finalTerm, 0, newLength);
    else
     termAtt.setLength(newLength);
   }
   return true;
  } else {
   return false;
  }
 }
}

termAtt.copyBuffer(curTermBuffer, 0, charLength);
curGramSize++;
return true;
termAtt.copyBuffer(curTermBuffer, 0, curTermLength);
curTermBuffer = null;
return true;

/**
 * Generates a word/number part, updating the appropriate attributes
 *
 * @param isSingleWord {@code true} if the generation is occurring from a single word, {@code false} otherwise
 */
private void generatePart(boolean isSingleWord) {
 clearAttributes();
 termAttribute.copyBuffer(savedBuffer, iterator.current, iterator.end - iterator.current);
 int startOffset = savedStartOffset + iterator.current;
 int endOffset = savedStartOffset + iterator.end;
 
 if (hasIllegalOffsets) {
  // historically this filter did this regardless for 'isSingleWord', 
  // but we must do a sanity check:
  if (isSingleWord && startOffset <= savedEndOffset) {
   offsetAttribute.setOffset(startOffset, savedEndOffset);
  } else {
   offsetAttribute.setOffset(savedStartOffset, savedEndOffset);
  }
 } else {
  offsetAttribute.setOffset(startOffset, endOffset);
 }
 posIncAttribute.setPositionIncrement(position(false));
 typeAttribute.setType(savedType);
}

@Override
public boolean incrementToken() throws IOException {
 if (input.incrementToken()) {
  if (fstReader == null) {
   // No overrides
   return true;
  }
  if (!keywordAtt.isKeyword()) { // don't muck with already-keyworded terms
   final BytesRef stem = stemmerOverrideMap.get(termAtt.buffer(), termAtt.length(), scratchArc, fstReader);
   if (stem != null) {
    spare = ArrayUtil.grow(termAtt.buffer(), stem.length);
    final int length = UnicodeUtil.UTF8toUTF16(stem, spare);
    if (spare != termAtt.buffer()) {
     termAtt.copyBuffer(spare, 0, length);
    } else {
     termAtt.setLength(length);
    }
    keywordAtt.setKeyword(true);
   }
  }
  return true;
 } else {
  return false;
 }
}

 termAttribute.copyBuffer(savedTermBuffer, startPart, endPart - startPart);
} else {
 termAttribute.copyBuffer(termPart, 0, termPart.length);

final int start = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos);
final int end = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
termAtt.copyBuffer(curTermBuffer, start, end - start);
posIncrAtt.setPositionIncrement(curPosIncr);
curPosIncr = 0;
termAtt.copyBuffer(curTermBuffer, 0, curTermLength);
curTermBuffer = null;
return true;

newTarget.termAtt.copyBuffer(fillerToken, 0, fillerToken.length);
newTarget.isFiller = true;
--numFillerTokensToInsert;
  newTarget.termAtt.copyBuffer(fillerToken, 0, fillerToken.length);
  newTarget.isFiller = true;
  --numFillerTokensToInsert;

Javadoc

Copies the contents of buffer, starting at offset for length characters, into the termBuffer array.

Popular methods of CharTermAttribute

length
append
Appends the contents of the other CharTermAttribute to this character sequence. The characters of th
buffer
Returns the internal termBuffer character array which you can then directly alter. If the array is t
setEmpty
Sets the length of the termBuffer to zero. Use this method before appending contents using the Appen
setLength
Set number of valid characters (length of the term) in the termBuffer array. Use this to truncate th
resizeBuffer
Grows the termBuffer to at least size newSize, preserving the existing content.
charAt
subSequence
toString

Popular in Java

Parsing JSON documents to java classes using gson
onCreateOptionsMenu (Activity)
getSystemService (Context)
putExtra (Intent)
Charset (java.nio.charset)
A charset is a named mapping between Unicode characters and byte sequences. Every Charset can decode
MessageDigest (java.security)
Uses a one-way hash function to turn an arbitrary number of bytes into a fixed-length byte sequence.
Time (java.sql)
Java representation of an SQL TIME value. Provides utilities to format and parse the time's represen
Enumeration (java.util)
A legacy iteration interface.New code should use Iterator instead. Iterator replaces the enumeration
Font (java.awt)
The Font class represents fonts, which are used to render text in a visible way. A font provides the
Kernel (java.awt.image)
Top 12 Jupyter Notebook extensions

How to use copyBuffermethodin org.apache.lucene.analysis.tokenattributes.CharTermAttribute

Best Java code snippets using org.apache.lucene.analysis.tokenattributes.CharTermAttribute.copyBuffer (Showing top 20 results out of 387)

How to use
copyBuffer
method
in
org.apache.lucene.analysis.tokenattributes.CharTermAttribute