/** {@inheritDoc} */ @Override public long estimateSize() { return storage().indexes().size(); }
@Override public void seal() { sealed = true; sortedIntList = new int[rawIntSet.size()]; rawIntSet.toArray(sortedIntList); Arrays.sort(sortedIntList); if (sortedIntList.length == 0) { min = null; max = null; return; } // Update min/max based on raw docs. min = sortedIntList[0]; max = sortedIntList[sortedIntList.length - 1]; // Merge the raw and aggregated docs, so stats for dictionary creation are collected correctly. int numAggregated = aggregatedIntSet.size(); if (numAggregated > 0) { rawIntSet.addAll(aggregatedIntSet); sortedIntList = new int[rawIntSet.size()]; rawIntSet.toArray(sortedIntList); Arrays.sort(sortedIntList); } } }
@Override public byte[] serialize(IntSet intSet) { int size = intSet.size(); byte[] bytes = new byte[Integer.BYTES + size * Integer.BYTES]; ByteBuffer byteBuffer = ByteBuffer.wrap(bytes); byteBuffer.putInt(size); IntIterator iterator = intSet.iterator(); while (iterator.hasNext()) { byteBuffer.putInt(iterator.nextInt()); } return bytes; }
@Override public int countUnique() { IntSet ints = new IntOpenHashSet(size()); for (int i = 0; i < size(); i++) { ints.add(data.getInt(i)); } return ints.size(); }
RealtimeDictionaryBasedRangePredicateEvaluator(RangePredicate rangePredicate, MutableDictionary dictionary) { _matchingDictIdSet = new IntOpenHashSet(); int dictionarySize = dictionary.length(); if (dictionarySize == 0) { _numMatchingDictIds = 0; _alwaysFalse = true; return; } String lowerBoundary = rangePredicate.getLowerBoundary(); String upperBoundary = rangePredicate.getUpperBoundary(); boolean includeLowerBoundary = rangePredicate.includeLowerBoundary(); boolean includeUpperBoundary = rangePredicate.includeUpperBoundary(); if (lowerBoundary.equals("*")) { lowerBoundary = dictionary.getMinVal().toString(); } if (upperBoundary.equals("*")) { upperBoundary = dictionary.getMaxVal().toString(); } for (int dictId = 0; dictId < dictionarySize; dictId++) { if (dictionary.inRange(lowerBoundary, upperBoundary, dictId, includeLowerBoundary, includeUpperBoundary)) { _matchingDictIdSet.add(dictId); } } _numMatchingDictIds = _matchingDictIdSet.size(); if (_numMatchingDictIds == 0) { _alwaysFalse = true; } else if (dictionarySize == _numMatchingDictIds) { _alwaysTrue = true; } }
@Override public TimeColumn unique() { IntSet ints = new IntOpenHashSet(data); TimeColumn column = emptyCopy(ints.size()); column.data = IntArrayList.wrap(ints.toIntArray()); column.setName(name() + " Unique values"); return column; }
/** {@inheritDoc} */ @Override public int nonZeroElements() { int res = 0; IntIterator rowIter = indexesMap().keySet().iterator(); while (rowIter.hasNext()) { int row = rowIter.nextInt(); res += indexesMap().get(row).size(); } return res; }
@Override public int countUnique() { IntSet uniqueElements = new IntOpenHashSet(); for (int i = 0; i < size(); i++) { if (!isMissingValue(getInt(i))) { uniqueElements.add(getInt(i)); } } return uniqueElements.size(); }
int numMatchingDictIds = matchingDictIds.size(); int numChildren = starTreeNode.getNumChildren();
DictionaryBasedInPredicateEvaluator(InPredicate inPredicate, Dictionary dictionary) { String[] values = inPredicate.getValues(); _matchingDictIdSet = new IntOpenHashSet(HashUtil.getMinHashSetSize(values.length)); for (String value : values) { int dictId = dictionary.indexOf(value); if (dictId >= 0) { _matchingDictIdSet.add(dictId); } } _numMatchingDictIds = _matchingDictIdSet.size(); if (_numMatchingDictIds == 0) { _alwaysFalse = true; } else if (dictionary.length() == _numMatchingDictIds) { _alwaysTrue = true; } }
DictionaryBasedNotInPredicateEvaluator(NotInPredicate notInPredicate, Dictionary dictionary) { String[] values = notInPredicate.getValues(); _nonMatchingDictIdSet = new IntOpenHashSet(HashUtil.getMinHashSetSize(values.length)); for (String value : values) { int dictId = dictionary.indexOf(value); if (dictId >= 0) { _nonMatchingDictIdSet.add(dictId); } } _numNonMatchingDictIds = _nonMatchingDictIdSet.size(); if (_numNonMatchingDictIds == 0) { _alwaysTrue = true; } else if (dictionary.length() == _numNonMatchingDictIds) { _alwaysFalse = true; } _dictionary = dictionary; }
@Override public DateColumn unique() { IntSet ints = new IntOpenHashSet(data.size()); for (int i = 0; i < size(); i++) { ints.add(data.getInt(i)); } DateColumn copy = emptyCopy(ints.size()); copy.setName(name() + " Unique values"); copy.data = IntArrayList.wrap(ints.toIntArray()); return copy; }
@Override public long size() { if (set instanceof IntOpenHashBigSet) { return ((IntOpenHashBigSet) set).size64(); } return set.size(); }
@Override public int countUnique() { IntSet ints = new IntOpenHashSet(data); return ints.size(); }
/** * Efficiently get the elements contained in both sets. * Note that this implementation will alter the original sets. */ private static IntSet intersection(IntSet set1, IntSet set2) { if (set1.size() > set2.size()) { set2.retainAll(set1); return set2; } else { set1.retainAll(set2); return set1; } }
@Override public IntToDoubleFunction similarity(int idx1) { IntSet set = new IntOpenHashSet(); data.getUidxPreferences(idx1).map(IdxPref::v1).forEach(set::add); return idx2 -> { int coo = (int) data.getUidxPreferences(idx2) .map(IdxPref::v1) .filter(set::contains) .count(); return sim(coo, set.size(), data.numItems(idx2)); }; }
@Override public void write(DataOutput out) throws IOException { out.writeInt(set.size()); IntIterator iter = set.iterator(); while (iter.hasNext()) { out.writeInt(iter.nextInt()); } }
@Override public int countUnique() { IntSet ints = new IntOpenHashSet(size()); for (int i = 0; i < size(); i++) { ints.add(data.getInt(i)); } return ints.size(); }
@Override public DateColumn unique() { IntSet ints = new IntOpenHashSet(data.size()); for (int i = 0; i < size(); i++) { ints.add(data.getInt(i)); } DateColumn copy = emptyCopy(ints.size()); copy.setName(name() + " Unique values"); copy.data = IntArrayList.wrap(ints.toIntArray()); return copy; }