/** * Creates a new sorted wrapper, using {@link * BytesRef#getUTF8SortedAsUnicodeComparator} for * sorting. */ public SortedInputIterator(InputIterator source) throws IOException { this(source, BytesRef.getUTF8SortedAsUnicodeComparator()); }
@Override int compareTerm(Terms.Bucket other) { return BytesRef.getUTF8SortedAsUnicodeComparator().compare(termBytes, ((Bucket) other).termBytes); }
@Override int compareTerm(final SignificantTerms.Bucket other) { return BytesRef.getUTF8SortedAsUnicodeComparator().compare(termBytes, ((ComputedSignificantStringTermsBucket) other).termBytes); }
@Override int compareTerm(SignificantTerms.Bucket other) { return BytesRef.getUTF8SortedAsUnicodeComparator().compare(termBytes, ((Bucket) other).termBytes); }
@Override int compareTerm(final SignificantTerms.Bucket other) { return BytesRef.getUTF8SortedAsUnicodeComparator().compare(termBytes, ((ComputedSignificantStringTermsBucket) other).termBytes); }
/** Collapse the hash table and sort in-place; also sets * this.sortedTermIDs to the results */ public int[] sortPostings() { sortedTermIDs = bytesHash.sort(BytesRef.getUTF8SortedAsUnicodeComparator()); return sortedTermIDs; }
/** * Creates an {@link FSTCompletion} with default options: 10 buckets, exact match * promoted to first position and {@link InMemorySorter} with a comparator obtained from * {@link BytesRef#getUTF8SortedAsUnicodeComparator()}. */ public FSTCompletionBuilder() { this(DEFAULT_BUCKETS, new InMemorySorter(BytesRef.getUTF8SortedAsUnicodeComparator()), Integer.MAX_VALUE); }
TermsIncludingScoreQuery(String field, boolean multipleValuesPerDocument, BytesRefHash terms, float[] scores, Query originalQuery) { this.field = field; this.multipleValuesPerDocument = multipleValuesPerDocument; this.terms = terms; this.scores = scores; this.originalQuery = originalQuery; this.ords = terms.sort(BytesRef.getUTF8SortedAsUnicodeComparator()); this.unwrittenOriginalQuery = originalQuery; }
/** * Creates an {@link FSTCompletion} with default options: 10 buckets, exact match * promoted to first position and {@link InMemorySorter} with a comparator obtained from * {@link BytesRef#getUTF8SortedAsUnicodeComparator()}. */ public FSTCompletionBuilder() { this(DEFAULT_BUCKETS, new InMemorySorter(BytesRef.getUTF8SortedAsUnicodeComparator()), Integer.MAX_VALUE); }
public DocValuesTermsQuery(String field, Collection<BytesRef> terms) { this.field = Objects.requireNonNull(field); Objects.requireNonNull(terms, "Collection of terms must not be null"); this.terms = terms.toArray(new BytesRef[terms.size()]); ArrayUtil.timSort(this.terms, BytesRef.getUTF8SortedAsUnicodeComparator()); }
/** * @param field The field that should contain terms that are specified in the previous parameter * @param terms The terms that matching documents should have. The terms must be sorted by natural order. */ TermsQuery(String field, Query fromQuery, BytesRefHash terms) { super(field); this.fromQuery = fromQuery; this.terms = terms; ords = terms.sort(BytesRef.getUTF8SortedAsUnicodeComparator()); }
/** Collapse the hash table and sort in-place; also sets * this.sortedTermIDs to the results */ public int[] sortPostings() { sortedTermIDs = bytesHash.sort(BytesRef.getUTF8SortedAsUnicodeComparator()); return sortedTermIDs; }
/** * Sorts hashed terms into ascending order, reusing memory along the * way. Note that sorting is lazily delayed until required (often it's * not required at all). If a sorted view is required then hashing + * sort + binary search is still faster and smaller than TreeMap usage * (which would be an alternative and somewhat more elegant approach, * apart from more sophisticated Tries / prefix trees). */ public void sortTerms() { if (sortedTerms == null) { sortedTerms = terms.sort(BytesRef.getUTF8SortedAsUnicodeComparator()); } }
TermIterator() throws IOException { termsEnum = reader.terms(new Term(field, "")); Term term = termsEnum.term(); if (term == null || term.field() != field) { comp = null; } else { comp = BytesRef.getUTF8SortedAsUnicodeComparator(); } }
@Override public boolean seekExact(BytesRef text) { termUpto = binarySearch(text, br, 0, info.terms.size()-1, info.terms, info.sortedTerms, BytesRef.getUTF8SortedAsUnicodeComparator()); return termUpto >= 0; }
HighFrequencyIterator() throws IOException { termsEnum = reader.terms(new Term(field, "")); minNumDocs = (int)(thresh * (float)reader.numDocs()); Term term = termsEnum.term(); if (term == null || term.field() != field) { comp = null; } else { comp = BytesRef.getUTF8SortedAsUnicodeComparator(); } }
SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefHash terms, int[] ords) { super(tenum); this.terms = terms; this.ords = ords; comparator = BytesRef.getUTF8SortedAsUnicodeComparator(); lastElement = terms.size() - 1; lastTerm = terms.get(ords[lastElement], new BytesRef()); seekTerm = terms.get(ords[upto], spare); }
@Override public SeekStatus seekCeil(BytesRef text) { termUpto = binarySearch(text, br, 0, info.terms.size()-1, info.terms, info.sortedTerms, BytesRef.getUTF8SortedAsUnicodeComparator()); if (termUpto < 0) { // not found; choose successor termUpto = -termUpto-1; if (termUpto >= info.terms.size()) { return SeekStatus.END; } else { info.terms.get(info.sortedTerms[termUpto], br); return SeekStatus.NOT_FOUND; } } else { return SeekStatus.FOUND; } }
SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefTermsSet termsSet) { super(tenum); this.terms = termsSet.getBytesRefHash(); this.ords = this.terms.sort(BytesRef.getUTF8SortedAsUnicodeComparator()); lastElement = terms.size() - 1; lastTerm = terms.get(ords[lastElement], new BytesRef()); seekTerm = terms.get(ords[upto], spare); }
/** * Returns an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter} * @return an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter} * @throws IOException if an {@link IOException} occurs; */ public StemmerOverrideMap build() throws IOException { ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton(); org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<>( FST.INPUT_TYPE.BYTE4, outputs); final int[] sort = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator()); IntsRefBuilder intsSpare = new IntsRefBuilder(); final int size = hash.size(); BytesRef spare = new BytesRef(); for (int i = 0; i < size; i++) { int id = sort[i]; BytesRef bytesRef = hash.get(id, spare); intsSpare.copyUTF8Bytes(bytesRef); builder.add(intsSpare.get(), new BytesRef(outputValues.get(id))); } return new StemmerOverrideMap(builder.finish(), ignoreCase); }