public void addDocumentNormalizations(LucandraTermInfo[] allDocs, String field, ReaderCache cache) { byte[] norms = cache.fieldNorms.get(field); OpenBitSet docHits = cache.docHits; for (LucandraTermInfo docInfo : allDocs) { int idx = docInfo.docId; if (idx > numDocs) throw new IllegalStateException("numDocs reached"); Byte norm = docInfo.norm; if (norm == null) norm = defaultNorm; // Check for cached reads if (norms != null && norms.length > idx && norms[idx] == norm) continue; docHits.fastSet(idx); if (norms == null) norms = new byte[numDocs]; norms[idx] = norm; } cache.fieldNorms.put(field, norms); }
@Override protected Object createValue(IndexReader reader, Entry entryKey) throws IOException { Entry entry = entryKey; String field = entry.field; if (reader.maxDoc() == reader.docFreq(new Term(field))) { return DocIdSet.EMPTY_DOCIDSET; } OpenBitSet res = new OpenBitSet(reader.maxDoc()); TermDocs termDocs = reader.termDocs(); TermEnum termEnum = reader.terms (new Term (field)); try { do { Term term = termEnum.term(); if (term==null || term.field() != field) break; termDocs.seek (termEnum); while (termDocs.next()) { res.fastSet(termDocs.doc()); } } while (termEnum.next()); } finally { termDocs.close(); termEnum.close(); } res.flip(0, reader.maxDoc()); return res; } }
@Override public void collect(int doc) throws IOException { bits.fastSet(doc + docBase); }
@Override public void collect(final int doc) throws IOException { int index = docBase + doc; docIds.fastSet(index); hitCount++; minDocId = Math.min(minDocId, index); maxDocId = Math.max(maxDocId, index); }
/** * Collects documents as specified in {@link super#collect(int)}. * Also stores the score associated with this document. * * @param doc The document id to collect * @throws IOException */ public void collect(int doc) throws IOException { doc = doc + docBase; bits.fastSet(doc); scores[doc] = scorer.score(); }
public void collect(int doc) throws IOException { doc += base; // optimistically collect the first docs in an array // in case the total number will be small enough to represent // as a small set like SortedIntDocSet instead... // Storing in this array will be quicker to convert // than scanning through a potentially huge bit vector. // FUTURE: when search methods all start returning docs in order, maybe // we could have a ListDocSet() and use the collected array directly. if (pos < scratch.length) { scratch[pos]=doc; } else { // this conditional could be removed if BitSet was preallocated, but that // would take up more memory, and add more GC time... if (bits==null) bits = new OpenBitSet(maxDoc); bits.fastSet(doc); } pos++; }
/** * Converts a filter into a DocSet. * This method is not cache-aware and no caches are checked. */ public DocSet convertFilter(Filter lfilter) throws IOException { BitSet bs = lfilter.bits(this.reader); OpenBitSet obs = new OpenBitSet(bs.size()); for(int i=bs.nextSetBit(0); i>=0; i=bs.nextSetBit(i+1)) { obs.fastSet(i); } return new BitDocSet(obs); }
public DocSet getDocSet() { if (pos<=scratch.length) { // assumes docs were collected in sorted order! return new SortedIntDocSet(scratch, pos); } else { // set the bits for ids that were collected in the array for (int i=0; i<scratch.length; i++) bits.fastSet(scratch[i]); return new BitDocSet(bits,pos); } }
public DocSet getDocSet() { if (pos<=scratch.length) { // assumes docs were collected in sorted order! return new SortedIntDocSet(scratch, pos); } else { // set the bits for ids that were collected in the array for (int i=0; i<scratch.length; i++) bits.fastSet(scratch[i]); return new BitDocSet(bits,pos); } }
/** * Adds a document to the internal document buffer. * * @param doc The lucene identifier of the document to add */ protected void addDoc(int doc) { // If we have less than hashMaxSize documents, just // keep adding them to docbuf. We will turn them into // a HashDocSet later. if (docbufSize < hashMaxSize) { docbuf[docbufSize] = doc; if (doc > docbufMaxDoc) { docbufMaxDoc = doc; } } else { // We have exceeded hashMaxSize. Allocate a bit set // if we don't have one yet, then add to that. if (docbufBitSet == null) { long startTime = System.currentTimeMillis(); docbufBitSet = new OpenBitSet(docbufMaxDoc + 1); for (int i = 0; i < docbufSize; i++) { docbufBitSet.fastSet(docbuf[i]); } timeConvertToBitSet = System.currentTimeMillis() - startTime; } docbufBitSet.set(doc); } docbufSize++; }
public void collect(int doc) throws IOException { collector.collect(doc); doc += base; // optimistically collect the first docs in an array // in case the total number will be small enough to represent // as a small set like SortedIntDocSet instead... // Storing in this array will be quicker to convert // than scanning through a potentially huge bit vector. // FUTURE: when search methods all start returning docs in order, maybe // we could have a ListDocSet() and use the collected array directly. if (pos < scratch.length) { scratch[pos]=doc; } else { // this conditional could be removed if BitSet was preallocated, but that // would take up more memory, and add more GC time... if (bits==null) bits = new OpenBitSet(maxDoc); bits.fastSet(doc); } pos++; }
@Override public OpenBitSet getBits() { int maxDoc = size() > 0 ? docs[size()-1] : 0; OpenBitSet bs = new OpenBitSet(maxDoc+1); for (int doc : docs) { bs.fastSet(doc); } return bs; }
private void calculateScores() throws IOException { // initialize buffers OpenBitSet docPointers = new OpenBitSet(reader.maxDoc()); TermPositions tp = null; List<Term> uniqueTerms = new LinkedList<Term>(new LinkedHashSet<Term>(terms)); uniqueTermSize = uniqueTerms.size(); this.roughThresholdFreq = (int) (uniqueTermSize * ROUGH_CUTOFF); for (Iterator<Term> iter = uniqueTerms.iterator(); iter.hasNext();) { try { tp = reader.termPositions(iter.next()); while (tp.next()) { int f = scoredDocs.adjustOrPutValue(tp.doc(), 1, 1); if (f > roughThresholdFreq) { docPointers.fastSet(tp.doc()); } } } finally { if (tp != null) { tp.close(); } } } if (docPointers.cardinality() > 0) { docPointerIterator = (OpenBitSetIterator) docPointers.iterator(); } }
private void calculateScores() throws IOException { // initialize buffers OpenBitSet docPointers = new OpenBitSet(reader.maxDoc()); TermPositions tp = null; List<Term> uniqueTerms = new LinkedList<Term>(new LinkedHashSet<Term>(terms)); uniqueTermSize = uniqueTerms.size(); roughThresholdFreq = (int) (uniqueTermSize * (threshold*0.01f)); for (Iterator<Term> iter = uniqueTerms.iterator(); iter.hasNext();) { try { tp = reader.termPositions(iter.next()); while (tp.next()) { int f = scoredDocs.adjustOrPutValue(tp.doc(), 1, 1); if (f > roughThresholdFreq) { docPointers.fastSet(tp.doc()); } } } finally { if (tp != null) { tp.close(); } } } if (docPointers.cardinality() > 0) { docPointerIterator = (OpenBitSetIterator) docPointers.iterator(); } }
@Override public OpenBitSet bitSet(FacetDataCache<?> dataCache) { @SuppressWarnings("unchecked") int[] index = facetValueConverter.convert((FacetDataCache<String>) dataCache, vals); OpenBitSet bitset = new OpenBitSet(dataCache.valArray.size()); for (int i : index) { bitset.fastSet(i); } if (takeCompliment) { // flip the bits for (int i = 0; i < index.length; ++i) { bitset.fastFlip(i); } } return bitset; }
terms.fastSet(sfc.termNum); last = count;
@SuppressWarnings("unchecked") FacetOrRandomAccessDocIdSet( FacetHandler<FacetDataCache<?>> facetHandler, BoboSegmentReader reader, String[] vals, FacetValueConverter valConverter, boolean takeCompliment) { _dataCache = facetHandler.getFacetData(reader); _orderArray = _dataCache.orderArray; _index = valConverter.convert(((FacetDataCache<String>) _dataCache), vals); _bitset = new OpenBitSet(_dataCache.valArray.size()); for (int i : _index) { _bitset.fastSet(i); } if (takeCompliment) { // flip the bits for (int i = 0; i < _dataCache.valArray.size(); ++i) { _bitset.fastFlip(i); } } }
bitset.fastSet(i);
if (!_nestedArray.addData(docID, t)) logOverflow(fieldName); minID = docID; bitset.fastSet(docID); int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; while (docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) { df++; if (!_nestedArray.addData(docID, valId)) logOverflow(fieldName); bitset.fastSet(docID);