@Override public BytesRef next() { if (i >= terms.size()) return null; return terms.get(i++).bytes(); } };
/** Constructs a query for terms starting with <code>prefix</code>. */ public PrefixQuery(Term prefix) { // It's OK to pass unlimited maxDeterminizedStates: the automaton is born small and determinized: super(prefix, toAutomaton(prefix.bytes()), Integer.MAX_VALUE, true); if (prefix == null) { throw new NullPointerException("prefix must not be null"); } }
/** returns true if term is within k edits of the query term */ private boolean matches(BytesRef termIn, int k) { return k == 0 ? termIn.equals(term.bytes()) : automata[k].runAutomaton.run(termIn.bytes, termIn.offset, termIn.length); }
/** add a term */ public void add(Term term) { add(term.field(), term.bytes()); }
/** * Increments search count for {@code term} by {@code value}. * @param term term for which to increment search count * @param value value to increment by */ public void incrementSearchCount(final Term term, final int value) { if (term == null) { throw new IllegalArgumentException("Cannot increment search count for null"); } boolean gotLock = lock.readLock().tryLock(); if (!gotLock) { // do not wait for rebuild return; } try { if (lookups.get(term.field()).get(term.text()) == null) { return; // unknown term } PopularityMap map = searchCountMaps.get(term.field()); if (map != null) { map.increment(term.bytes(), value); } } finally { lock.readLock().unlock(); } }
/** * Returns {@link TermStatistics} for a term. * * This can be overridden for example, to return a term's statistics * across a distributed collection. * @lucene.experimental */ public TermStatistics termStatistics(Term term, TermContext context) throws IOException { return new TermStatistics(term.bytes(), context.docFreq(), context.totalTermFreq()); }
/** Returns {@link PostingsEnum} for the specified term. * This will return null if either the field or * term does not exist. * <p><b>NOTE:</b> The returned {@link PostingsEnum} may contain deleted docs. * @see TermsEnum#postings(PostingsEnum) */ public final PostingsEnum postings(Term term, int flags) throws IOException { assert term.field() != null; assert term.bytes() != null; final Terms terms = terms(term.field()); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); if (termsEnum.seekExact(term.bytes())) { return termsEnum.postings(null, flags); } } return null; }
@Override protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException { if (maxEdits == 0 || prefixLength >= term.text().length()) { // can only match if it's exact return new SingleTermsEnum(terms.iterator(), term.bytes()); } return new FuzzyTermsEnum(terms, atts, getTerm(), maxEdits, prefixLength, transpositions); }
@Override public Scorer scorer(LeafReaderContext context) throws IOException { Similarity.SimScorer simScorer = similarity.simScorer(simWeight, context); // we use termscorers + disjunction as an impl detail List<Scorer> subScorers = new ArrayList<>(); for (int i = 0; i < terms.length; i++) { TermState state = termContexts[i].get(context.ord); if (state != null) { TermsEnum termsEnum = context.reader().terms(terms[i].field()).iterator(); termsEnum.seekExact(terms[i].bytes(), state); PostingsEnum postings = termsEnum.postings(null, PostingsEnum.FREQS); subScorers.add(new TermScorer(this, postings, simScorer)); } } if (subScorers.isEmpty()) { return null; } else if (subScorers.size() == 1) { // we must optimize this case (term not in segment), disjunctionscorer requires >= 2 subs return subScorers.get(0); } else { return new SynonymScorer(simScorer, this, subScorers); } }
@Override public final int docFreq(Term term) throws IOException { final Terms terms = terms(term.field()); if (terms == null) { return 0; } final TermsEnum termsEnum = terms.iterator(); if (termsEnum.seekExact(term.bytes())) { return termsEnum.docFreq(); } else { return 0; } }
/** Returns the number of documents containing the term * <code>t</code>. This method returns 0 if the term or * field does not exists. This method does not take into * account deleted documents that have not yet been merged * away. */ @Override public final long totalTermFreq(Term term) throws IOException { final Terms terms = terms(term.field()); if (terms == null) { return 0; } final TermsEnum termsEnum = terms.iterator(); if (termsEnum.seekExact(term.bytes())) { return termsEnum.totalTermFreq(); } else { return 0; } }
if (termsEnum != null && termsEnum.seekExact(deleteTerm.bytes())) { postingsEnum = termsEnum.postings(postingsEnum, 0); int delDocLimit = segDeletes.get(deleteTerm);
public TermWeight(IndexSearcher searcher, boolean needsScores, float boost, TermContext termStates) throws IOException { super(TermQuery.this); if (needsScores && termStates == null) { throw new IllegalStateException("termStates are required when scores are needed"); } this.needsScores = needsScores; this.termStates = termStates; this.similarity = searcher.getSimilarity(needsScores); final CollectionStatistics collectionStats; final TermStatistics termStats; if (needsScores) { collectionStats = searcher.collectionStatistics(term.field()); termStats = searcher.termStatistics(term, termStates); } else { // we do not need the actual stats, use fake stats with docFreq=maxDoc and ttf=-1 final int maxDoc = searcher.getIndexReader().maxDoc(); collectionStats = new CollectionStatistics(term.field(), maxDoc, -1, -1, -1); termStats = new TermStatistics(term.bytes(), maxDoc, -1); } this.stats = similarity.computeWeight(boost, collectionStats, termStats); }
te.seekExact(t.bytes(), state); PostingsEnum postingsEnum = te.postings(null, 24); postingsFreqs[i] = new CustomPhraseQuery.PostingsAndFreq(postingsEnum, query.positions[i], t);
/** * Returns a {@link TermsEnum} positioned at this weights Term or null if * the term does not exist in the given context */ private TermsEnum getTermsEnum(LeafReaderContext context) throws IOException { if (termStates != null) { // TermQuery either used as a Query or the term states have been provided at construction time assert termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); final TermState state = termStates.get(context.ord); if (state == null) { // term is not present in that reader assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term; return null; } final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(); termsEnum.seekExact(term.bytes(), state); return termsEnum; } else { // TermQuery used as a filter, so the term states have not been built up front Terms terms = context.reader().terms(term.field()); if (terms == null) { return null; } final TermsEnum termsEnum = terms.iterator(); if (termsEnum.seekExact(term.bytes())) { return termsEnum; } else { return null; } } }
return null; te.seekExact(t.bytes(), state); PostingsEnum postingsEnum = te.postings(null, exposeOffsets ? PostingsEnum.ALL : PostingsEnum.POSITIONS); postingsFreqs[i] = new PostingsAndFreq(postingsEnum, positions[i], t);
/** * Creates a {@link TermContext} from a top-level {@link IndexReaderContext} and the * given {@link Term}. This method will lookup the given term in all context's leaf readers * and register each of the readers containing the term in the returned {@link TermContext} * using the leaf reader's ordinal. * <p> * Note: the given context must be a top-level context. */ public static TermContext build(IndexReaderContext context, Term term) throws IOException { assert context != null && context.isTopLevel; final String field = term.field(); final BytesRef bytes = term.bytes(); final TermContext perReaderTermState = new TermContext(context); //if (DEBUG) System.out.println("prts.build term=" + term); for (final LeafReaderContext ctx : context.leaves()) { //if (DEBUG) System.out.println(" r=" + leaves[i].reader); final Terms terms = ctx.reader().terms(field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); if (termsEnum.seekExact(bytes)) { final TermState termState = termsEnum.termState(); //if (DEBUG) System.out.println(" found"); perReaderTermState.register(termState, ctx.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); } } } return perReaderTermState; }
@Override public Spans getSpans(final LeafReaderContext context, Postings requiredPostings) throws IOException { assert termContext.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); final TermState state = termContext.get(context.ord); if (state == null) { // term is not present in that reader assert context.reader().docFreq(term) == 0 : "no termstate found but term exists in reader term=" + term; return null; } final Terms terms = context.reader().terms(term.field()); if (terms == null) return null; if (terms.hasPositions() == false) throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")"); final TermsEnum termsEnum = terms.iterator(); termsEnum.seekExact(term.bytes(), state); final PostingsEnum postings = termsEnum.postings(null, requiredPostings.getRequiredPostings()); float positionsCost = termPositionsCost(termsEnum) * PHRASE_TO_SPAN_TERM_POSITIONS_COST; return new TermSpans(getSimScorer(context), postings, term, positionsCost); } }