org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute java code examples

Refine search

try (TokenStream stream = tokenStream = field.tokenStream(docState.analyzer, tokenStream)) {
 stream.reset();
 invertState.setAttributeSource(stream);
 termsHashPerField.start(field, first);
 while (stream.incrementToken()) {
  int posIncr = invertState.posIncrAttribute.getPositionIncrement();
  invertState.position += posIncr;
  if (invertState.position < invertState.lastPosition) {
  int startOffset = invertState.offset + invertState.offsetAttribute.startOffset();
  int endOffset = invertState.offset + invertState.offsetAttribute.endOffset();
  if (startOffset < invertState.lastStartOffset || endOffset < startOffset) {
   throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, and offsets must not go backwards "
 stream.end();
 invertState.position += invertState.posIncrAttribute.getPositionIncrement();
 invertState.offset += invertState.offsetAttribute.endOffset();
 invertState.position += docState.analyzer.getPositionIncrementGap(fieldInfo.name);
 invertState.offset += docState.analyzer.getOffsetGap(fieldInfo.name);

String[] parts;
skipCounter = 0;
while (input.incrementToken()) {
  final String text = new String(termAtt.buffer(), 0, termAtt.length());
  if (text.isEmpty()) {
    return true;
    skipCounter += posIncrAttribute.getPositionIncrement();
  } else {
    if (skipCounter != 0) {
      posIncrAttribute.setPositionIncrement(posIncrAttribute.getPositionIncrement() + skipCounter);

/**
 * {@inheritDoc}
 */
@Override
public void end() throws IOException {
  super.end();
  posIncrAttribute.setPositionIncrement(posIncrAttribute.getPositionIncrement() + skipCounter);
}

  @Override
  public final void end() throws IOException {
    super.end();
    // set final offset
    int finalOffset = correctOffset(this.endPosition);
    offsetAtt.setOffset(finalOffset, finalOffset);
    posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
  }
}

@Override
public final boolean incrementToken() throws IOException {
 skippedPositions = 0;
 while (input.incrementToken()) {
  if (accept()) {
   if (skippedPositions != 0) {
    posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
   }
   return true;
  }
  skippedPositions += posIncrAtt.getPositionIncrement();
 }
 // reached EOS -- return false
 return false;
}

uniqueTerms = new CharArraySet(8, false);
int outputTokenSize = 0;
while (input.incrementToken()) {
 if (outputTokenSize > maxOutputTokenSize) {
  continue;
 final char term[] = termAttribute.buffer();
 final int length = termAttribute.length();
input.end();
inputEnded = true;
offsetAtt.setOffset(0, offsetAtt.endOffset());
posLenAtt.setPositionLength(1);
posIncrAtt.setPositionIncrement(1);
typeAtt.setType("fingerprint");
 termAttribute.setEmpty();
 return false;

private Token getNextSuffixInputToken(Token token) throws IOException {
 if (!suffix.incrementToken()) return null;
 token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
 token.setPositionIncrement(posIncrAtt.getPositionIncrement());
 token.setFlags(flagsAtt.getFlags());
 token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
 token.setType(typeAtt.type());
 token.setPayload(payloadAtt.getPayload());
 return token;
}

try (TokenStream source = analyzer.tokenStream(field, termStr)) {
  source.reset();
  List<BytesRef> currentPos = new ArrayList<>();
  CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
  PositionIncrementAttribute posAtt = source.addAttribute(PositionIncrementAttribute.class);
    boolean hasMoreTokens = source.incrementToken();
    while (hasMoreTokens) {
      if (currentPos.isEmpty() == false && posAtt.getPositionIncrement() > 0) {
        tlist.add(currentPos);
        currentPos = new ArrayList<>();
      final BytesRef term = analyzer.normalize(field, termAtt.toString());
      currentPos.add(term);
      hasMoreTokens = source.incrementToken();

private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
  Collection<Token> result = new ArrayList<Token>();
  assert analyzer != null;
  TokenStream ts = analyzer.tokenStream("", q);
  try {
    ts.reset();
    // TODO: support custom attributes
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
    FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
    PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
    while (ts.incrementToken()){
      Token token = new Token();
      token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
      token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
      token.setType(typeAtt.type());
      token.setFlags(flagsAtt.getFlags());
      token.setPayload(payloadAtt.getPayload());
      token.setPositionIncrement(posIncAtt.getPositionIncrement());
      result.add(token);
    }
    ts.end();
    return result;
  } finally {
    IOUtils.closeWhileHandlingException(ts);
  }
}

/** 
 * Creates complex boolean query from the cached tokenstream contents 
 */
protected Query analyzeMultiBoolean(String field, TokenStream stream, BooleanClause.Occur operator) throws IOException {
 BooleanQuery.Builder q = newBooleanQuery();
 List<Term> currentQuery = new ArrayList<>();
 
 TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
 PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
 
 stream.reset();
 while (stream.incrementToken()) {
  if (posIncrAtt.getPositionIncrement() != 0) {
   add(q, currentQuery, operator);
   currentQuery.clear();
  }
  currentQuery.add(new Term(field, termAtt.getBytesRef()));
 }
 add(q, currentQuery, operator);
 
 return q.build();
}

final TermToBytesRefAttribute termBytesAtt = in.addAttribute(TermToBytesRefAttribute.class);
final PositionIncrementAttribute posIncAtt = in.addAttribute(PositionIncrementAttribute.class);
final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class);
in.reset();
int state = -1;
while (in.incrementToken()) {
 int currentIncr = posIncAtt.getPositionIncrement();
 if (pos == -1 && currentIncr < 1) {
  throw new IllegalStateException("Malformed TokenStream, start token can't have increment less than 1");
 int endPos = pos + posLengthAtt.getPositionLength();
 while (state < endPos) {
  state = builder.createState();

/**
 * Copy the inner's stream attributes values to the main stream's ones. This filter
 * uses an inner stream, therefore it needs to be cleared so that other filters
 * have clean attributes data. Because of that, the attributes datatypeURI and
 * node have to saved in order to be restored after.
 */
private void copyInnerStreamAttributes() {
 // backup datatype and node path
 final IntsRef nodePath = IntsRef.deepCopyOf(nodeAtt.node());
 final char[] dt = dtypeAtt.datatypeURI();
 // clear attributes
 input.clearAttributes();
 // copy inner attributes
 final int len = tokenTermAtt.length();
 termAtt.copyBuffer(tokenTermAtt.buffer(), 0, len);
 offsetAtt.setOffset(tokenOffsetAtt.startOffset(), tokenOffsetAtt.endOffset());
 posIncrAtt.setPositionIncrement(tokenPosIncrAtt.getPositionIncrement());
 typeAtt.setType(tokenTypeAtt.type());
 // TupleTokenizer handles the setting of tuple/cell values and the datatype URI
 // restore datatype and node
 nodeAtt.copyNode(nodePath);
 dtypeAtt.setDatatypeURI(dt);
}

try (TokenStream ts = analyzer.tokenStream("", text)) {
 CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
 PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
 ts.reset();
 reuse.clear();
 while (ts.incrementToken()) {
  int length = termAtt.length();
  if (length == 0) {
   throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token");
  if (posIncAtt.getPositionIncrement() != 1) {
   throw new IllegalArgumentException("term: " + text + " analyzed to a token (" + termAtt +
                     ") with position increment != 1 (got: " + posIncAtt.getPositionIncrement() + ")");
   reuse.setLength(reuse.length() + 1);
  System.arraycopy(termAtt.buffer(), 0, reuse.chars(), end, length);
  reuse.setLength(reuse.length() + length);

  private ArrayList<Data> analyze(Analyzer analyzer1) throws IOException {
    ArrayList<Data> results = new ArrayList<>(50);
    TokenStream ts = analyzer1.tokenStream("foo", text);
    ts.reset();
    while (ts.incrementToken()) {
      Data data = new Data();
      OffsetAttribute offsetAttribute = ts.getAttribute(OffsetAttribute.class);
      data.startOffset = offsetAttribute.startOffset();
      data.endOffset = offsetAttribute.endOffset();
      data.positionLength = ts.getAttribute(PositionLengthAttribute.class).getPositionLength();
      data.positionIncGap = ts.getAttribute(PositionIncrementAttribute.class).getPositionIncrement();
      data.tokenType = ts.getAttribute(HebrewTokenTypeAttribute.class).getType().toString();
      data.term = ts.getAttribute(CharTermAttribute.class).toString();

      if (ts.getAttribute(KeywordAttribute.class) != null)
        data.isKeyword = ts.getAttribute(KeywordAttribute.class).isKeyword();
      // System.out.println(data.term + " " + data.tokenType);
      results.add(data);
    }
    ts.close();

    return results;
  }
}

public static List<Span> generatePayload(String attributeName, String fieldValue, Analyzer luceneAnalyzer) {
  List<Span> payload = new ArrayList<>();
  try {
    TokenStream tokenStream = luceneAnalyzer.tokenStream(null, new StringReader(fieldValue));
    OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class);
    CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
    PositionIncrementAttribute positionIncrementAttribute =
        tokenStream.addAttribute(PositionIncrementAttribute.class);
    int tokenPositionCounter = -1;
    tokenStream.reset();
    while (tokenStream.incrementToken()) {
      tokenPositionCounter += positionIncrementAttribute.getPositionIncrement();
      int tokenPosition = tokenPositionCounter;
      int charStart = offsetAttribute.startOffset();
      int charEnd = offsetAttribute.endOffset();
      String analyzedTermStr = charTermAttribute.toString();
      String originalTermStr = fieldValue.substring(charStart, charEnd);
      payload.add(new Span(attributeName, charStart, charEnd, analyzedTermStr, originalTermStr, tokenPosition));
    }
    tokenStream.close();
  } catch (IOException e) {
    throw new DataflowException(e);
  }
  return payload;
}

    tokReader = new StringReader(field.stringValue());
  tokens = analyzer.reusableTokenStream(field.name(), tokReader);
if (position > 0)
  position += analyzer.getPositionIncrementGap(field.name());
tokens.reset(); // reset the TokenStream to the first token
  offsetAttribute = (OffsetAttribute) tokens.addAttribute(OffsetAttribute.class);
      .addAttribute(PositionIncrementAttribute.class);
    position += (posIncrAttribute.getPositionIncrement() - 1);
    offsetVector.add(lastOffset + offsetAttribute.startOffset());
    offsetVector.add(lastOffset + offsetAttribute.endOffset());

@Override
public void copyTo(AttributeImpl target) {
 if (target instanceof PackedTokenAttributeImpl) {
  final PackedTokenAttributeImpl to = (PackedTokenAttributeImpl) target;
  to.copyBuffer(buffer(), 0, length());
  to.positionIncrement = positionIncrement;
  to.positionLength = positionLength;
  to.startOffset = startOffset;
  to.endOffset = endOffset;
  to.type = type;
  to.termFrequency = termFrequency;
 } else {
  super.copyTo(target);
  ((OffsetAttribute) target).setOffset(startOffset, endOffset);
  ((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
  ((PositionLengthAttribute) target).setPositionLength(positionLength);
  ((TypeAttribute) target).setType(type);
  ((TermFrequencyAttribute) target).setTermFrequency(termFrequency);
 }
}

  gramBuilder.append(tokenSeparator);
 gramBuilder.append(nextToken.termAtt.buffer(), 0, 
           nextToken.termAtt.length());
 ++builtGramSize;
posIncrAtt.setPositionIncrement(isOutputHere ? 0 : 1);
termAtt.setEmpty().append(gramBuilder);
if (gramSize.getValue() > 1) {
 typeAtt.setType(tokenType);
 noShingleOutput = false;
offsetAtt.setOffset(offsetAtt.startOffset(), nextToken.offsetAtt.endOffset());
if (outputUnigrams) {
 posLenAtt.setPositionLength(builtGramSize);
} else {
 posLenAtt.setPositionLength(Math.max(1, (builtGramSize - minShingleSize) + 1));

private void setAttribs(PendingToken tok) {
  clearAttributes();
  this.posIncrAtt.setPositionIncrement(tok.nonpos ? 0 : 1);
  this.termAtt.setEmpty();
  this.termAtt.append(tok.str);
  this.offsetAtt.setOffset(tok.start, tok.end);
}

if (input.incrementToken() == false) {
 return false;
  return true;
int termLength = termAttribute.length();
char[] termBuffer = termAttribute.buffer();
accumPosInc += posIncAttribute.getPositionIncrement();
 posIncAttribute.setPositionIncrement(accumPosInc);
 accumPosInc = 0;
 return true;
endOffset = Math.max(endOffset, lastStartOffset);
offsetAttribute.setOffset(startOffset, endOffset);
lastStartOffset = startOffset;
 termAttribute.copyBuffer(savedTermBuffer, startPart, endPart - startPart);
} else {
 termAttribute.copyBuffer(termPart, 0, termPart.length);
posIncAttribute.setPositionIncrement(accumPosInc + startPos - wordPos);
accumPosInc = 0;
posLenAttribute.setPositionLength(endPos - startPos);
wordPos = startPos;
return true;

Javadoc

Determines the position of this token relative to the previous Token in a TokenStream, used in phrase searching.

The default value is one.

Some common uses for this are:

Set it to zero to put multiple terms in the same position. This is useful if, e.g., a word has multiple stems. Searches for phrases including either stem will match. In this case, all but the first stem's increment should be set to zero: the increment of the first instance should be one. Repeating a token with an increment of zero can also be used to boost the scores of matches on that token.
Set it to values greater than one to inhibit exact phrase matches. If, for example, one does not want phrases to match across removed stop words, then one could build a stop word filter that removes stop words and also sets the increment to the number of stop words removed before each non-stop word. Then exact phrase queries will only match when the terms occur with no intervening stop words.

Most used methods

getPositionIncrement
Returns the position increment of this Token.
setPositionIncrement
Set the position increment. The default value is one.

Popular in Java

Making http post requests using okhttp
onRequestPermissionsResult (Fragment)
setContentView (Activity)
getApplicationContext (Context)
String (java.lang)
TimerTask (java.util)
The TimerTask class represents a task to run at a specified time. The task may be run once or repeat
ConcurrentHashMap (java.util.concurrent)
A plug-in replacement for JDK1.5 java.util.concurrent.ConcurrentHashMap. This version is based on or
HttpServletRequest (javax.servlet.http)
Extends the javax.servlet.ServletRequest interface to provide request information for HTTP servlets.
ImageIO (javax.imageio)
Option (scala)
Top plugins for WebStorm

How to usePositionIncrementAttribute in org.apache.lucene.analysis.tokenattributes

Best Java code snippets using org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute (Showing top 20 results out of 738)

Refine search

How to use
PositionIncrementAttribute
in
org.apache.lucene.analysis.tokenattributes