/** * Creates simple boolean query from the cached tokenstream contents */ protected Query analyzeBoolean(String field, TokenStream stream) throws IOException { TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); stream.reset(); List<Term> terms = new ArrayList<>(); while (stream.incrementToken()) { terms.add(new Term(field, termAtt.getBytesRef())); } return newSynonymQuery(terms.toArray(new Term[terms.size()])); }
/** * Creates simple term query from the cached tokenstream contents */ protected Query analyzeTerm(String field, TokenStream stream) throws IOException { TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); stream.reset(); if (!stream.incrementToken()) { throw new AssertionError(); } return newTermQuery(new Term(field, termAtt.getBytesRef())); }
/** * Creates a span query from the tokenstream. In the case of a single token, a simple <code>SpanTermQuery</code> is * returned. When multiple tokens, an ordered <code>SpanNearQuery</code> with slop 0 is returned. */ protected SpanQuery createSpanQuery(TokenStream in, String field) throws IOException { TermToBytesRefAttribute termAtt = in.getAttribute(TermToBytesRefAttribute.class); if (termAtt == null) { return null; } List<SpanTermQuery> terms = new ArrayList<>(); while (in.incrementToken()) { terms.add(new SpanTermQuery(new Term(field, termAtt.getBytesRef()))); } if (terms.isEmpty()) { return null; } else if (terms.size() == 1) { return terms.get(0); } else { return new SpanNearQuery(terms.toArray(new SpanTermQuery[0]), 0, true); } }
/** * Creates complex boolean query from the cached tokenstream contents */ protected Query analyzeMultiBoolean(String field, TokenStream stream, BooleanClause.Occur operator) throws IOException { BooleanQuery.Builder q = newBooleanQuery(); List<Term> currentQuery = new ArrayList<>(); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); stream.reset(); while (stream.incrementToken()) { if (posIncrAtt.getPositionIncrement() != 0) { add(q, currentQuery, operator); currentQuery.clear(); } currentQuery.add(new Term(field, termAtt.getBytesRef())); } add(q, currentQuery, operator); return q.build(); }
+ this + " and input \"" + text + "\""); final BytesRef term = BytesRef.deepCopyOf(termAtt.getBytesRef()); if (ts.incrementToken()) { throw new IllegalStateException("The normalization token stream is "
multiTerms.add(new Term(field, termAtt.getBytesRef()));
/** * Creates simple phrase query from the cached tokenstream contents */ protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.setSlop(slop); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); int position = -1; stream.reset(); while (stream.incrementToken()) { if (enablePositionIncrements) { position += posIncrAtt.getPositionIncrement(); } else { position += 1; } builder.add(new Term(field, termAtt.getBytesRef()), position); } return builder.build(); }
BytesRef term = termBytesAtt.getBytesRef(); int id = getTermID(currentIncr, prevIncr, term); builder.addTransition(pos, endPos, id);
int termID = bytesHash.add(termAtt.getBytesRef());
} catch (MaxBytesLengthExceededException e) { byte[] prefix = new byte[30]; BytesRef bigTerm = invertState.termAttribute.getBytesRef(); System.arraycopy(bigTerm.bytes, bigTerm.offset, prefix, 0, 30); String msg = "Document contains at least one immense term in field=\"" + fieldInfo.name + "\" (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + Arrays.toString(prefix) + "...', original message: " + e.getMessage();
final BytesRef termUTF8 = changeToken(termBytesAtt.getBytesRef()); int[] termUnicode = null; final Position endPosData = positions.get(endPos);
multiTerms.add(new Term(field, termAtt.getBytesRef()));
@Override public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException { PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.setSlop(slop); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); int position = -1; stream.reset(); while (stream.incrementToken()) { if (enablePosIncrements) { position += posIncrAtt.getPositionIncrement(); } else { position += 1; } builder.add(new Term(field, termAtt.getBytesRef()), position); } return builder.build(); }
int cursor = 0; while (input.incrementToken()) { BytesRef bytesRef = termBytesAtt.getBytesRef(); long tokenHash = MurmurHash3.hash128(bytesRef.bytes, bytesRef.offset, bytesRef.length, 0, seed).h1; byte tokenByte = (byte) (tokenHash & 0xFF);
@Override protected boolean accept() throws IOException { return termsHash.find(bytesAtt.getBytesRef()) >= 0; } }
/** * Creates simple boolean query from the cached tokenstream contents */ protected Query analyzeBoolean(String field, TokenStream stream) throws IOException { TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); stream.reset(); List<Term> terms = new ArrayList<>(); while (stream.incrementToken()) { terms.add(new Term(field, termAtt.getBytesRef())); } return newSynonymQuery(terms.toArray(new Term[terms.size()])); }
/** * Creates simple term query from the cached tokenstream contents */ private Query analyzeTerm(String field, TokenStream stream) throws IOException { TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); stream.reset(); if (!stream.incrementToken()) { throw new AssertionError(); } return newTermQuery(new Term(field, termAtt.getBytesRef())); }
/** * Creates simple term query from the cached tokenstream contents */ protected Query analyzeTerm(String field, TokenStream stream) throws IOException { TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); stream.reset(); if (!stream.incrementToken()) { throw new AssertionError(); } return newTermQuery(new Term(field, termAtt.getBytesRef())); }
/** * Creates simple boolean query from the cached tokenstream contents */ private Query analyzeBoolean(String field, TokenStream stream) throws IOException { BooleanQuery.Builder q = new BooleanQuery.Builder(); q.setDisableCoord(true); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); stream.reset(); while (stream.incrementToken()) { Query currentQuery = newTermQuery(new Term(field, termAtt.getBytesRef())); q.add(currentQuery, BooleanClause.Occur.SHOULD); } return q.build(); }
@Override protected Query analyzeTerm(String field, TokenStream stream) throws IOException { TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); FuzzyCandidateAttribute fuzzyAttr = stream.getAttribute(FuzzyCandidateAttribute.class); stream.reset(); if (!stream.incrementToken()) { throw new AssertionError(); } Term term = new Term(field, termAtt.getBytesRef()); if (stream.hasAttribute(FuzzyCandidateAttribute.class) && fuzzyAttr.isCandidateForFuzzing()) { return new FuzzyQuery(term, fuzzyAttr.getEditDistance(), fuzzyAttr.getConstantPrefixLength()); } else { return newTermQuery(term); } }