@Override public Integer compute(final Map<String, Object> combinedAggregators) { final ArrayOfDoublesSketch sketch = (ArrayOfDoublesSketch) getField().compute(combinedAggregators); return sketch.getRetainedEntries(); }
/** * Estimates the cardinality of the set (number of unique values presented to the sketch) * @return best estimate of the number of unique values */ public double getEstimate() { if (!isEstimationMode()) { return getRetainedEntries(); } return getRetainedEntries() / getTheta(); }
/** * Gets the approximate upper error bound given the specified number of Standard Deviations. * This will return getEstimate() if isEmpty() is true. * * @param numStdDev * <a href="{@docRoot}/resources/dictionary.html#numStdDev">See Number of Standard Deviations</a> * @return the upper bound. */ public double getUpperBound(final int numStdDev) { if (!isEstimationMode()) { return getRetainedEntries(); } return BinomialBoundsN.getUpperBound(getRetainedEntries(), getTheta(), numStdDev, isEmpty_); }
/** * Gets the approximate lower error bound given the specified number of Standard Deviations. * This will return getEstimate() if isEmpty() is true. * * @param numStdDev * <a href="{@docRoot}/resources/dictionary.html#numStdDev">See Number of Standard Deviations</a> * @return the lower bound. */ public double getLowerBound(final int numStdDev) { if (!isEstimationMode()) { return getRetainedEntries(); } return BinomialBoundsN.getLowerBound(getRetainedEntries(), getTheta(), numStdDev, isEmpty_); }
@Override public String toString() { final int seedHash = Short.toUnsignedInt(getSeedHash()); final StringBuilder sb = new StringBuilder(); sb.append("### ").append(this.getClass().getSimpleName()).append(" SUMMARY: ").append(LS); sb.append(" Estimate : ").append(getEstimate()).append(LS); sb.append(" Upper Bound, 95% conf : ").append(getUpperBound(2)).append(LS); sb.append(" Lower Bound, 95% conf : ").append(getLowerBound(2)).append(LS); sb.append(" Theta (double) : ").append(getTheta()).append(LS); sb.append(" Theta (long) : ").append(getThetaLong()).append(LS); sb.append(" EstMode? : ").append(isEstimationMode()).append(LS); sb.append(" Empty? : ").append(isEmpty()).append(LS); sb.append(" Retained Entries : ").append(getRetainedEntries()).append(LS); if (this instanceof ArrayOfDoublesUpdatableSketch) { final ArrayOfDoublesUpdatableSketch updatable = (ArrayOfDoublesUpdatableSketch) this; sb.append(" Nominal Entries (k) : ").append(updatable.getNominalEntries()).append(LS); sb.append(" Current Capacity : ").append(updatable.getCurrentCapacity()).append(LS); sb.append(" Resize Factor : ").append(updatable.getResizeFactor().getValue()).append(LS); sb.append(" Sampling Probability (p): ").append(updatable.getSamplingProbability()).append(LS); } sb.append(" Seed Hash : ") .append(Integer.toHexString(seedHash)).append(" | ").append(seedHash).append(LS); sb.append("### END SKETCH SUMMARY").append(LS); return sb.toString(); }
private static long[] convertToHashTable(final ArrayOfDoublesSketch sketch) { final int size = Math.max( ceilingPowerOf2((int) Math.ceil(sketch.getRetainedEntries() / REBUILD_THRESHOLD)), 1 << MIN_LG_NOM_LONGS ); final long[] hashTable = new long[size]; final ArrayOfDoublesSketchIterator it = sketch.iterator(); final int lgSize = Integer.numberOfTrailingZeros(size); while (it.next()) { HashOperations.hashInsertOnly(hashTable, lgSize, it.getKey()); } return hashTable; }
private void getNoMatchSetFromSketch(final ArrayOfDoublesSketch sketch) { count_ = sketch.getRetainedEntries(); keys_ = new long[count_]; values_ = new double[count_ * numValues_]; final ArrayOfDoublesSketchIterator it = sketch.iterator(); int i = 0; while (it.next()) { keys_[i] = it.getKey(); System.arraycopy(it.getValues(), 0, values_, i * numValues_, numValues_); i++; } }
final long thetaB = b == null ? Long.MAX_VALUE : b.getThetaLong(); theta_ = Math.min(thetaA, thetaB); if (a == null || a.getRetainedEntries() == 0) { return; } if (b == null || b.getRetainedEntries() == 0) { getNoMatchSetFromSketch(a); } else { hashTable = convertToHashTable(b); final int lgHashTableSize = Integer.numberOfTrailingZeros(hashTable.length); final int noMatchSize = a.getRetainedEntries(); keys_ = new long[noMatchSize]; values_ = new double[noMatchSize * numValues_];
@Test public void serializeDeserializeSampling() { int sketchSize = 16384; int numberOfUniques = sketchSize; ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(sketchSize).setSamplingProbability(0.5f).build(); for (int i = 0; i < numberOfUniques; i++) sketch1.update(i, new double[] {1.0}); ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.heapify(Memory.wrap(sketch1.toByteArray())); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); }
theta_ = min(theta_, sketchIn.getThetaLong()); isEmpty_ |= sketchIn.isEmpty(); if (isEmpty_ || sketchIn.getRetainedEntries() == 0) { sketch_ = null; return; sketch_ = createSketch(sketchIn.getRetainedEntries(), numValues_, seed_); final ArrayOfDoublesSketchIterator it = sketchIn.iterator(); while (it.next()) { final int matchSize = min(sketch_.getRetainedEntries(), sketchIn.getRetainedEntries()); final long[] matchKeys = new long[matchSize]; final double[][] matchValues = new double[matchSize][];
@Test public void serializeDeserializeSmallExact() { ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(); us.update("a", new double[] {1.0}); us.update("b", new double[] {1.0}); us.update("c", new double[] {1.0}); ArrayOfDoublesCompactSketch sketch1 = us.compact(); ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketches.heapifySketch(Memory.wrap(sketch1.toByteArray())); Assert.assertFalse(sketch2.isEmpty()); Assert.assertFalse(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), 3.0); Assert.assertEquals(sketch2.getLowerBound(1), 3.0); Assert.assertEquals(sketch2.getUpperBound(1), 3.0); Assert.assertEquals(sketch2.getRetainedEntries(), 3); Assert.assertEquals(sketch2.getThetaLong(), Long.MAX_VALUE); Assert.assertEquals(sketch2.getTheta(), 1.0); double[][] values = sketch2.getValues(); Assert.assertEquals(values.length, 3); for (double[] array: values) { Assert.assertEquals(array[0], 1.0); } }
@Test public void serializeDeserializeSmallExact() { ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.wrap(new byte[1000000])); us.update("a", new double[] {1.0}); us.update("b", new double[] {1.0}); us.update("c", new double[] {1.0}); ArrayOfDoublesCompactSketch sketch1 = us.compact(WritableMemory.wrap(new byte[1000000])); ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketches.wrapSketch(WritableMemory.wrap(sketch1.toByteArray())); Assert.assertFalse(sketch2.isEmpty()); Assert.assertFalse(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), 3.0); Assert.assertEquals(sketch2.getLowerBound(1), 3.0); Assert.assertEquals(sketch2.getUpperBound(1), 3.0); Assert.assertEquals(sketch2.getRetainedEntries(), 3); Assert.assertEquals(sketch2.getThetaLong(), Long.MAX_VALUE); Assert.assertEquals(sketch2.getTheta(), 1.0); double[][] values = sketch2.getValues(); Assert.assertEquals(values.length, 3); for (double[] array: values) { Assert.assertEquals(array[0], 1.0); } }
@Test public void serializeDeserializeSampling() { int sketchSize = 16384; int numberOfUniques = sketchSize; ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder(). setNominalEntries(sketchSize).setSamplingProbability(0.5f). build(WritableMemory.wrap(new byte[1000000])); for (int i = 0; i < numberOfUniques; i++) { sketch1.update(i, new double[] {1.0}); } ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.wrap(WritableMemory.wrap(sketch1.toByteArray())); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); }
/** * Estimates the cardinality of the set (number of unique values presented to the sketch) * @return best estimate of the number of unique values */ public double getEstimate() { if (!isEstimationMode()) { return getRetainedEntries(); } return getRetainedEntries() / getTheta(); }
/** * Gets the approximate lower error bound given the specified number of Standard Deviations. * This will return getEstimate() if isEmpty() is true. * * @param numStdDev * <a href="{@docRoot}/resources/dictionary.html#numStdDev">See Number of Standard Deviations</a> * @return the lower bound. */ public double getLowerBound(final int numStdDev) { if (!isEstimationMode()) { return getRetainedEntries(); } return BinomialBoundsN.getLowerBound(getRetainedEntries(), getTheta(), numStdDev, isEmpty_); }
/** * Gets the approximate upper error bound given the specified number of Standard Deviations. * This will return getEstimate() if isEmpty() is true. * * @param numStdDev * <a href="{@docRoot}/resources/dictionary.html#numStdDev">See Number of Standard Deviations</a> * @return the upper bound. */ public double getUpperBound(final int numStdDev) { if (!isEstimationMode()) { return getRetainedEntries(); } return BinomialBoundsN.getUpperBound(getRetainedEntries(), getTheta(), numStdDev, isEmpty_); }
@Override public Integer compute(final Map<String, Object> combinedAggregators) { final ArrayOfDoublesSketch sketch = (ArrayOfDoublesSketch) getField().compute(combinedAggregators); return sketch.getRetainedEntries(); }
@Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append("### ").append(this.getClass().getSimpleName()).append(" SUMMARY: ").append(LS); sb.append(" Estimate : ").append(getEstimate()).append(LS); sb.append(" Upper Bound, 95% conf : ").append(getUpperBound(2)).append(LS); sb.append(" Lower Bound, 95% conf : ").append(getLowerBound(2)).append(LS); sb.append(" Theta (double) : ").append(getTheta()).append(LS); sb.append(" Theta (long) : ").append(getThetaLong()).append(LS); sb.append(" EstMode? : ").append(isEstimationMode()).append(LS); sb.append(" Empty? : ").append(isEmpty()).append(LS); sb.append(" Retained Entries : ").append(getRetainedEntries()).append(LS); if (this instanceof ArrayOfDoublesUpdatableSketch) { final ArrayOfDoublesUpdatableSketch updatable = (ArrayOfDoublesUpdatableSketch) this; sb.append(" Nominal Entries (k) : ").append(updatable.getNominalEntries()).append(LS); sb.append(" Current Capacity : ").append(updatable.getCurrentCapacity()).append(LS); sb.append(" Resize Factor : ").append(updatable.getResizeFactor().getValue()).append(LS); sb.append(" Sampling Probability (p): ").append(updatable.getSamplingProbability()).append(LS); sb.append(" Update Seed : ") .append(Long.toHexString(updatable.getSeed())).append(" | ") .append(Long.toString(updatable.getSeed())).append(LS); } sb.append(" Seed Hash : ") .append(Integer.toHexString(Short.toUnsignedInt(getSeedHash()))).append(LS); sb.append("### END SKETCH SUMMARY").append(LS); return sb.toString(); }
private static long[] convertToHashTable(final ArrayOfDoublesSketch sketch) { final int size = Math.max( ceilingPowerOf2((int) Math.ceil(sketch.getRetainedEntries() / REBUILD_THRESHOLD)), 1 << MIN_LG_NOM_LONGS ); final long[] hashTable = new long[size]; final ArrayOfDoublesSketchIterator it = sketch.iterator(); final int lgSize = Integer.numberOfTrailingZeros(size); while (it.next()) { HashOperations.hashInsertOnly(hashTable, lgSize, it.getKey()); } return hashTable; }
private void getNoMatchSetFromSketch(final ArrayOfDoublesSketch sketch) { count_ = sketch.getRetainedEntries(); keys_ = new long[count_]; values_ = new double[count_ * numValues_]; final ArrayOfDoublesSketchIterator it = sketch.iterator(); int i = 0; while (it.next()) { keys_[i] = it.getKey(); System.arraycopy(it.getValues(), 0, values_, i * numValues_, numValues_); i++; } }