private static void generateSketches() throws Exception { Path path = FileSystems.getDefault().getPath("array_of_doubles_sketch_data.tsv"); try (BufferedWriter out = Files.newBufferedWriter(path, StandardCharsets.UTF_8)) { Random rand = ThreadLocalRandom.current(); int key = 0; for (int i = 0; i < 20; i++) { ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(1024) .build(); sketch.update(key++, new double[] {1}); sketch.update(key++, new double[] {1}); out.write("2015010101"); out.write('\t'); out.write("product_" + (rand.nextInt(10) + 1)); out.write('\t'); out.write(StringUtils.encodeBase64String(sketch.compact().toByteArray())); out.newLine(); } } }
/** * Converts the given UpdatableArrayOfDoublesSketch to this compact form. * @param sketch the given UpdatableArrayOfDoublesSketch * @param dstMem the given destination Memory. */ DirectArrayOfDoublesCompactSketch(final ArrayOfDoublesUpdatableSketch sketch, final WritableMemory dstMem) { this(sketch, sketch.getThetaLong(), dstMem); }
/** * Heapify the given Memory as an ArrayOfDoublesUpdatableSketch * @param mem the given Memory * @return an ArrayOfDoublesUpdatableSketch */ public static ArrayOfDoublesUpdatableSketch heapify(final Memory mem) { return heapify(mem, DEFAULT_UPDATE_SEED); }
@Test public void exactMode() { ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); Assert.assertTrue(sketch.isEmpty()); Assert.assertEquals(sketch.getEstimate(), 0.0); for (int i = 1; i <= 4096; i++) sketch.update(i, new double[] {1.0}); Assert.assertFalse(sketch.isEmpty()); Assert.assertFalse(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 4096.0); Assert.assertEquals(sketch.getUpperBound(1), 4096.0); Assert.assertEquals(sketch.getLowerBound(1), 4096.0); Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); Assert.assertEquals(sketch.getTheta(), 1.0); double[][] values = sketch.getValues(); Assert.assertEquals(values.length, 4096); int count = 0; sketch.reset(); Assert.assertTrue(sketch.isEmpty()); Assert.assertFalse(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 0.0); Assert.assertEquals(sketch.getUpperBound(1), 0.0); Assert.assertEquals(sketch.getLowerBound(1), 0.0); Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); Assert.assertEquals(sketch.getTheta(), 1.0); ArrayOfDoublesSketchIterator it = sketch.iterator(); while (it.next()) { Assert.fail("empty sketch expected");
@Test public void isEmptyWithSampling() { float samplingProbability = 0.1f; ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(samplingProbability).build(); Assert.assertTrue(sketch.isEmpty()); Assert.assertFalse(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 0.0); Assert.assertEquals(sketch.getUpperBound(1), 0.0); Assert.assertEquals(sketch.getLowerBound(1), 0.0); Assert.assertEquals(sketch.getThetaLong() / (double) Long.MAX_VALUE, (double) samplingProbability); Assert.assertEquals(sketch.getTheta(), (double) samplingProbability); }
@Test public void heapToDirectExactTwoDoubles() { double[] valuesArr = {1.0, 2.0}; ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); sketch1.update("a", valuesArr); sketch1.update("b", valuesArr); sketch1.update("c", valuesArr); sketch1.update("d", valuesArr); sketch1.update("a", valuesArr); noopUpdates(sketch1, valuesArr); ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.wrap(WritableMemory.wrap(sketch1.toByteArray())); sketch2.update("b", valuesArr); sketch2.update("c", valuesArr); sketch2.update("d", valuesArr); Assert.assertFalse(sketch2.isEmpty()); Assert.assertFalse(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), 4.0); Assert.assertEquals(sketch2.getUpperBound(1), 4.0); Assert.assertEquals(sketch2.getLowerBound(1), 4.0); Assert.assertEquals(sketch2.getThetaLong(), Long.MAX_VALUE); Assert.assertEquals(sketch2.getTheta(), 1.0); double[][] values = sketch2.getValues(); Assert.assertEquals(values.length, 4); for (double[] array: values) { Assert.assertEquals(array.length, 2); Assert.assertEquals(array[0], 2.0); Assert.assertEquals(array[1], 4.0); } }
/** * This method uses synchronization because it can be used during indexing, * and Druid can call aggregate() and get() concurrently * https://github.com/apache/incubator-druid/pull/3956 */ @Override public void aggregate() { final IndexedInts keys = keySelector.getRow(); for (int i = 0; i < valueSelectors.length; i++) { values[i] = valueSelectors[i].getDouble(); } synchronized (this) { for (int i = 0, keysSize = keys.size(); i < keysSize; i++) { final String key = keySelector.lookupName(keys.get(i)); sketch.update(key, values); } } }
@Test public void serializeDeserializeEstimation() { ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(); for (int i = 0; i < 8192; i++) { us.update(i, new double[] {1.0}); } WritableMemory wmem = WritableMemory.wrap(us.toByteArray()); ArrayOfDoublesUpdatableSketch wrappedUS = ArrayOfDoublesSketches.wrapUpdatableSketch(wmem); Assert.assertFalse(wrappedUS.isEmpty()); Assert.assertTrue(wrappedUS.isEstimationMode()); Assert.assertEquals(wrappedUS.getEstimate(), us.getEstimate()); Assert.assertEquals(wrappedUS.getThetaLong(), us.getThetaLong()); ArrayOfDoublesUpdatableSketch heapUS = ArrayOfDoublesSketches.heapifyUpdatableSketch(wmem); Assert.assertFalse(heapUS.isEmpty()); Assert.assertTrue(heapUS.isEstimationMode()); Assert.assertEquals(heapUS.getEstimate(), us.getEstimate()); Assert.assertEquals(heapUS.getThetaLong(), us.getThetaLong()); ArrayOfDoublesCompactSketch sketch1 = us.compact(); ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketches.heapifySketch(Memory.wrap(sketch1.toByteArray())); Assert.assertFalse(sketch2.isEmpty()); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), sketch1.getEstimate()); Assert.assertEquals(sketch2.getThetaLong(), sketch1.getThetaLong()); }
super(sketch.getNumValues()); isEmpty_ = sketch.isEmpty(); theta_ = Math.min(sketch.getThetaLong(), theta); seedHash_ = Util.computeSeedHash(sketch.getSeed()); final int count = sketch.getRetainedEntries(); if (count > 0) { keys_ = new long[count]; values_ = new double[count * numValues_]; final ArrayOfDoublesSketchIterator it = sketch.iterator(); int i = 0; while (it.next()) {
/** * This method uses synchronization because it can be used during indexing, * and Druid can call aggregate() and get() concurrently * https://github.com/apache/incubator-druid/pull/3956 * The returned sketch is a separate instance of ArrayOfDoublesCompactSketch * representing the current state of the aggregation, and is not affected by consequent * aggregate() calls */ @Override public synchronized Object get() { return sketch.compact(); }
@Test public void serializeDeserializeExact() { ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch1.update(1, new double[] {1.0}); ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.heapify(WritableMemory.wrap(sketch1.toByteArray())); Assert.assertEquals(sketch2.getEstimate(), 1.0); double[][] values = sketch2.getValues(); Assert.assertEquals(values.length, 1); Assert.assertEquals(values[0][0], 1.0); // the same key, so still one unique sketch2.update(1, new double[] {1.0}); Assert.assertEquals(sketch2.getEstimate(), 1.0); sketch2.update(2, new double[] {1.0}); Assert.assertEquals(sketch2.getEstimate(), 2.0); }
@Test public void heapToDirectWithSeed() { long seed = 1; double[] values = {1.0}; ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed).build(); sketch1.update("a", values); sketch1.update("b", values); sketch1.update("c", values); ArrayOfDoublesUpdatableSketch sketch2 = ArrayOfDoublesUpdatableSketch.wrap(WritableMemory.wrap(sketch1.toByteArray()), seed); sketch2.update("b", values); sketch2.update("c", values); sketch2.update("d", values); Assert.assertEquals(sketch2.getEstimate(), 4.0); }
@Test public void serializeDeserializeSampling() { int sketchSize = 16384; int numberOfUniques = sketchSize; ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(sketchSize).setSamplingProbability(0.5f).build(); for (int i = 0; i < numberOfUniques; i++) sketch1.update(i, new double[] {1.0}); ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.heapify(Memory.wrap(sketch1.toByteArray())); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); }
@Test public void doubleSum() { ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch.update(1, new double[] {1.0}); Assert.assertEquals(sketch.getRetainedEntries(), 1); Assert.assertEquals(sketch.getValues()[0][0], 1.0); sketch.update(1, new double[] {0.7}); Assert.assertEquals(sketch.getRetainedEntries(), 1); Assert.assertEquals(sketch.getValues()[0][0], 1.7); sketch.update(1, new double[] {0.8}); Assert.assertEquals(sketch.getRetainedEntries(), 1); Assert.assertEquals(sketch.getValues()[0][0], 2.5); }
@Test public void wrapAndTryUpdatingSketch() { ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch1.update(1, new double[] {1}); ArrayOfDoublesUpdatableSketch sketch2 = (ArrayOfDoublesUpdatableSketch) ArrayOfDoublesSketches.wrapSketch(Memory.wrap(sketch1.toByteArray())); Assert.assertEquals(sketch2.getEstimate(), 1.0); sketch2.toByteArray(); boolean thrown = false; try { sketch2.update(2, new double[] {1}); } catch (SketchesReadOnlyException e) { thrown = true; } try { sketch2.trim(); } catch (SketchesReadOnlyException e) { thrown = true; } Assert.assertTrue(thrown); }
@Override public String toString() { final int seedHash = Short.toUnsignedInt(getSeedHash()); final StringBuilder sb = new StringBuilder(); sb.append("### ").append(this.getClass().getSimpleName()).append(" SUMMARY: ").append(LS); sb.append(" Estimate : ").append(getEstimate()).append(LS); sb.append(" Upper Bound, 95% conf : ").append(getUpperBound(2)).append(LS); sb.append(" Lower Bound, 95% conf : ").append(getLowerBound(2)).append(LS); sb.append(" Theta (double) : ").append(getTheta()).append(LS); sb.append(" Theta (long) : ").append(getThetaLong()).append(LS); sb.append(" EstMode? : ").append(isEstimationMode()).append(LS); sb.append(" Empty? : ").append(isEmpty()).append(LS); sb.append(" Retained Entries : ").append(getRetainedEntries()).append(LS); if (this instanceof ArrayOfDoublesUpdatableSketch) { final ArrayOfDoublesUpdatableSketch updatable = (ArrayOfDoublesUpdatableSketch) this; sb.append(" Nominal Entries (k) : ").append(updatable.getNominalEntries()).append(LS); sb.append(" Current Capacity : ").append(updatable.getCurrentCapacity()).append(LS); sb.append(" Resize Factor : ").append(updatable.getResizeFactor().getValue()).append(LS); sb.append(" Sampling Probability (p): ").append(updatable.getSamplingProbability()).append(LS); } sb.append(" Seed Hash : ") .append(Integer.toHexString(seedHash)).append(" | ").append(seedHash).append(LS); sb.append("### END SKETCH SUMMARY").append(LS); return sb.toString(); }
@Test public void heapifyAndUpdateSketch() { ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch1.update(1, new double[] {1}); // downcasting is not recommended, for testing only ArrayOfDoublesUpdatableSketch sketch2 = (ArrayOfDoublesUpdatableSketch) ArrayOfDoublesSketches.heapifySketch(Memory.wrap(sketch1.toByteArray())); sketch2.update(2, new double[] {1}); Assert.assertEquals(sketch2.getEstimate(), 2.0); }
@Test public void updatesOfAllKeyTypes() { ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch.update(1L, new double[] {1.0}); sketch.update(2.0, new double[] {1.0}); sketch.update(new byte[] {3}, new double[] {1.0}); sketch.update(new int[] {4}, new double[] {1.0}); sketch.update(new long[] {5L}, new double[] {1.0}); sketch.update("a", new double[] {1.0}); Assert.assertEquals(sketch.getEstimate(), 6.0); }
/** * Wrap the given WritableMemory as an ArrayOfDoublesUpdatableSketch * @param mem the given Memory * @return an ArrayOfDoublesUpdatableSketch */ public static ArrayOfDoublesUpdatableSketch wrap(final WritableMemory mem) { return wrap(mem, DEFAULT_UPDATE_SEED); }
Assert.assertEquals(sketch.getEstimate(), 0.0); for (int i = 1; i <= 8192; i++) sketch.update(i, new double[] {1.0}); Assert.assertTrue(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 8192, 8192 * 0.01); Assert.assertTrue(sketch.getEstimate() >= sketch.getLowerBound(1)); Assert.assertTrue(sketch.getEstimate() < sketch.getUpperBound(1)); Assert.assertTrue(sketch.getRetainedEntries() > 4096); sketch.trim(); Assert.assertEquals(sketch.getRetainedEntries(), 4096); double[][] values = sketch.getValues(); int count = 0; for (double[] array: values) { sketch.reset(); Assert.assertTrue(sketch.isEmpty()); Assert.assertFalse(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 0.0); Assert.assertEquals(sketch.getUpperBound(1), 0.0); Assert.assertEquals(sketch.getLowerBound(1), 0.0); Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); Assert.assertEquals(sketch.getTheta(), 1.0); ArrayOfDoublesSketchIterator it = sketch.iterator(); while (it.next()) { Assert.fail("empty sketch expected");