org.apache.lucene.analysis.tokenattributes.CharTermAttribute.subSequence java code examples

/** Construct the compound token based on a slice of the current {@link CompoundWordTokenFilterBase#termAtt}. */
public CompoundToken(int offset, int length) {
 this.txt = CompoundWordTokenFilterBase.this.termAtt.subSequence(offset, offset + length);
 
 // offsets of the original word
 this.startOffset = CompoundWordTokenFilterBase.this.offsetAtt.startOffset();
 this.endOffset = CompoundWordTokenFilterBase.this.offsetAtt.endOffset();
}

/** Construct the compound token based on a slice of the current {@link CompoundWordTokenFilterBase#termAtt}. */
public CompoundToken(int offset, int length) {
 this.txt = CompoundWordTokenFilterBase.this.termAtt.subSequence(offset, offset + length);
 
 // offsets of the original word
 this.startOffset = CompoundWordTokenFilterBase.this.offsetAtt.startOffset();
 this.endOffset = CompoundWordTokenFilterBase.this.offsetAtt.endOffset();
}

private void addSubtokensToStack(char separatorChar, String newTokenType)
{
  char[] termBuffer = termAttribute.buffer();
  int termLength = termAttribute.length();
  int offset = 0;
  // We iterate over the array, trying to find the separatorChar ('.' or ',')
  for (int index = 0; index <= termLength; index++)
  {
    // Note that we actually iterate past the last character in the array. At this point index == termLength.
    // We must check for this condition first to stop ArrayIndexOutOfBoundsException.
    // Being at the end of the array is a subtoken border just like the separator character ('.'), except we don't want to
    // add a duplicate token if no separator was already found. Hence we also check for offset > 0.
    if ((index < termLength && termBuffer[index] == separatorChar)
       || (index == termLength && offset > 0))
    {
      int subtokenLength = index - offset;
      // Check that this is not an "empty" subtoken
      if (subtokenLength > 0)
      {
        if (subtokenStack.isEmpty())
        {
          nextType = newTokenType;
          current = captureState();
        }
        subtokenStack.add(termAttribute.subSequence(offset, subtokenLength + offset));
      }
      offset = index + 1;
    }
  }
}

/** Construct the compound token based on a slice of the current {@link CompoundWordTokenFilterBase#termAtt}. */
public CompoundToken(int offset, int length) {
 this.txt = CompoundWordTokenFilterBase.this.termAtt.subSequence(offset, offset + length);
 
 // offsets of the original word
 int startOff = CompoundWordTokenFilterBase.this.offsetAtt.startOffset();
 int endOff = CompoundWordTokenFilterBase.this.offsetAtt.endOffset();
 
 if (endOff - startOff != CompoundWordTokenFilterBase.this.termAtt.length()) {
  // if length by start + end offsets doesn't match the term text then assume
  // this is a synonym and don't adjust the offsets.
  this.startOffset = startOff;
  this.endOffset = endOff;
 } else {
  final int newStart = startOff + offset;
  this.startOffset = newStart;
  this.endOffset = newStart + length;
 }
}

/** Construct the compound token based on a slice of the current {@link Lucene43CompoundWordTokenFilterBase#termAtt}. */
public CompoundToken(int offset, int length) {
 this.txt = Lucene43CompoundWordTokenFilterBase.this.termAtt.subSequence(offset, offset + length);
 
 // offsets of the original word
 int startOff = Lucene43CompoundWordTokenFilterBase.this.offsetAtt.startOffset();
 int endOff = Lucene43CompoundWordTokenFilterBase.this.offsetAtt.endOffset();
 
 if (endOff - startOff != Lucene43CompoundWordTokenFilterBase.this.termAtt.length()) {
  // if length by start + end offsets doesn't match the term text then assume
  // this is a synonym and don't adjust the offsets.
  this.startOffset = startOff;
  this.endOffset = endOff;
 } else {
  final int newStart = startOff + offset;
  this.startOffset = newStart;
  this.endOffset = newStart + length;
 }
}

/** Construct the compound token based on a slice of the current {@link CompoundWordTokenFilterBase#termAtt}.
 *
 * @param offset  the initial offset
 * @param length  the token length
 * */
public CompoundToken(int offset, int length) {
 this.txt = CompoundWordTokenFilterBase.this.termAtt.subSequence(offset, offset + length);
 
 // offsets of the original word
 int startOff = CompoundWordTokenFilterBase.this.offsetAtt.startOffset();
 int endOff = CompoundWordTokenFilterBase.this.offsetAtt.endOffset();
 
 if (endOff - startOff != CompoundWordTokenFilterBase.this.termAtt.length()) {
  // if length by start + end offsets doesn't match the term text then assume
  // this is a synonym and don't adjust the offsets.
  this.startOffset = startOff;
  this.endOffset = endOff;
 } else {
  final int newStart = startOff + offset;
  this.startOffset = newStart;
  this.endOffset = newStart + length;
 }
}

/**
 * Default Constructor.
 *
 * @param queryString Query String to analyse
 * @param fieldName Name of field to query
 * @param analyzer Lucene Analyzer that creates tokens/terms
 * @param subQueryCreator subQuery to use to perform the query
 */
QueryCreator(String queryString, final String fieldName, final Analyzer analyzer, final SubQuery subQueryCreator)
{
  this.fieldName = notNull("You must provide a field name", fieldName);
  notNull("You must provide a Analyzer", analyzer);
  this.subQueryCreator = notNull("subQueryCreator", subQueryCreator);
  queryString = (queryString == null ? "" : queryString);
  try
  {
    final TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(queryString));
    CharTermAttribute termAttribute = tokenStream.getAttribute(CharTermAttribute.class);
    while (tokenStream.incrementToken()) {
      int termLength = termAttribute.length();
      tokens.add(termAttribute.subSequence(0, termLength).toString());
    }
  }
  catch (final IOException e)
  {
    // wont happen
  }
}

/** Construct the compound token based on a slice of the current {@link CompoundWordTokenFilterBase#termAtt}.
 *
 * @param offset  the initial offset
 * @param length  the token length
 * */
public CompoundToken(int offset, int length) {
 this.txt = CompoundWordTokenFilterBase.this.termAtt.subSequence(offset, offset + length);
 
 // offsets of the original word
 int startOff = CompoundWordTokenFilterBase.this.offsetAtt.startOffset();
 int endOff = CompoundWordTokenFilterBase.this.offsetAtt.endOffset();
 
 if (endOff - startOff != CompoundWordTokenFilterBase.this.termAtt.length()) {
  // if length by start + end offsets doesn't match the term text then assume
  // this is a synonym and don't adjust the offsets.
  this.startOffset = startOff;
  this.endOffset = endOff;
 } else {
  final int newStart = startOff + offset;
  this.startOffset = newStart;
  this.endOffset = newStart + length;
 }
}

termAtt.copyBuffer(curTermBuffer, start, end - start);
termAtt.append(suffix);
if ((curGramSize == curTermLength - curPos) && !seenSuffixes.add(termAtt.subSequence(0, termAtt.length()))) {
  curTermBuffer = null;
  continue;
if (!seenInfixes.add(termAtt.subSequence(0, termAtt.length()))) {
  curGramSize = 0;
  continue;

Popular methods of CharTermAttribute

length
append
Appends the contents of the other CharTermAttribute to this character sequence. The characters of th
buffer
Returns the internal termBuffer character array which you can then directly alter. If the array is t
setEmpty
Sets the length of the termBuffer to zero. Use this method before appending contents using the Appen
setLength
Set number of valid characters (length of the term) in the termBuffer array. Use this to truncate th
copyBuffer
Copies the contents of buffer, starting at offset for length characters, into the termBuffer array.
resizeBuffer
Grows the termBuffer to at least size newSize, preserving the existing content.
charAt
toString

Popular in Java

Making http post requests using okhttp
getResourceAsStream (ClassLoader)
notifyDataSetChanged (ArrayAdapter)
scheduleAtFixedRate (ScheduledExecutorService)
Proxy (java.net)
This class represents proxy server settings. A created instance of Proxy stores a type and an addres
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
Collection (java.util)
Collection is the root of the collection hierarchy. It defines operations on data collections and t
Deque (java.util)
A linear collection that supports element insertion and removal at both ends. The name deque is shor
AtomicInteger (java.util.concurrent.atomic)
An int value that may be updated atomically. See the java.util.concurrent.atomic package specificati
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
From CI to AI: The AI layer in your organization

How to use subSequencemethodin org.apache.lucene.analysis.tokenattributes.CharTermAttribute

Best Java code snippets using org.apache.lucene.analysis.tokenattributes.CharTermAttribute.subSequence (Showing top 9 results out of 315)

How to use
subSequence
method
in
org.apache.lucene.analysis.tokenattributes.CharTermAttribute