@Test(expectedExceptions = SketchesArgumentException.class) public void invalidSamplingProbability() { new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(2f); }
@Test public void isEmptyWithSampling() { float samplingProbability = 0.1f; ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(samplingProbability).build(); Assert.assertTrue(sketch.isEmpty()); Assert.assertFalse(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 0.0); Assert.assertEquals(sketch.getUpperBound(1), 0.0); Assert.assertEquals(sketch.getLowerBound(1), 0.0); Assert.assertEquals(sketch.getThetaLong() / (double) Long.MAX_VALUE, (double) samplingProbability); Assert.assertEquals(sketch.getTheta(), (double) samplingProbability); }
@Test public void sampling() { float samplingProbability = 0.001f; ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(samplingProbability).build(); sketch.update("a", new double[] {1.0}); Assert.assertFalse(sketch.isEmpty()); Assert.assertTrue(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 0.0); Assert.assertTrue(sketch.getUpperBound(1) > 0.0); Assert.assertEquals(sketch.getLowerBound(1), 0.0, 0.0000001); Assert.assertEquals(sketch.getThetaLong() / (double) Long.MAX_VALUE, (double) samplingProbability); Assert.assertEquals(sketch.getTheta(), (double) samplingProbability); }
@Test public void isEmptyWithSampling() { float samplingProbability = 0.1f; ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder(). setSamplingProbability(samplingProbability). build(WritableMemory.wrap(new byte[1000000])); Assert.assertTrue(sketch.isEmpty()); Assert.assertTrue(((DirectArrayOfDoublesQuickSelectSketch)sketch).isInSamplingMode()); Assert.assertFalse(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 0.0); Assert.assertEquals(sketch.getUpperBound(1), 0.0); Assert.assertEquals(sketch.getLowerBound(1), 0.0); Assert.assertEquals( sketch.getThetaLong() / (double) Long.MAX_VALUE, (double) samplingProbability); Assert.assertEquals(sketch.getTheta(), (double) samplingProbability); }
@Test // very low probability of being sampled // once the an input value is chosen so that it is rejected, the test will continue to work // unless the hash function and the seed are the same public void sampling() { float samplingProbability = 0.001f; ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder(). setSamplingProbability(samplingProbability). build(WritableMemory.wrap(new byte[1000000])); sketch.update("a", new double[] {1.0}); Assert.assertFalse(sketch.isEmpty()); Assert.assertTrue(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 0.0); Assert.assertTrue(sketch.getUpperBound(1) > 0.0); Assert.assertEquals(sketch.getLowerBound(1), 0.0, 0.0000001); Assert.assertEquals( sketch.getThetaLong() / (double) Long.MAX_VALUE, (double) samplingProbability); Assert.assertEquals(sketch.getTheta(), (double) samplingProbability); }
@Test public void notEmptyNoEntries() { ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(0.01f).build(); sketch1.update("a", new double[] {1}); // this happens to get rejected because of sampling with low probability ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); intersection.update(sketch1, null); ArrayOfDoublesCompactSketch result = intersection.getResult(); Assert.assertTrue(result.isEmpty()); Assert.assertEquals(result.getRetainedEntries(), 0); Assert.assertEquals(result.getEstimate(), 0.0); Assert.assertEquals(result.getLowerBound(1), 0.0); Assert.assertEquals(result.getUpperBound(1), 0.0); Assert.assertEquals(result.getValues().length, 0); }
@Test public void serializeDeserializeSampling() { int sketchSize = 16384; int numberOfUniques = sketchSize; ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(sketchSize).setSamplingProbability(0.5f).build(); for (int i = 0; i < numberOfUniques; i++) sketch1.update(i, new double[] {1.0}); ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.heapify(Memory.wrap(sketch1.toByteArray())); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); }
@Test public void serializeDeserializeSampling() { int sketchSize = 16384; int numberOfUniques = sketchSize; ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder(). setNominalEntries(sketchSize).setSamplingProbability(0.5f). build(WritableMemory.wrap(new byte[1000000])); for (int i = 0; i < numberOfUniques; i++) { sketch1.update(i, new double[] {1.0}); } ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.wrap(WritableMemory.wrap(sketch1.toByteArray())); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); }
@Test public void heapMixedMode() { int key = 0; ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); for (int i = 0; i < 1000; i++) { sketch1.update(key++, new double[] {1.0}); } key -= 500; // overlap half of the entries ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(0.2f).build(); for (int i = 0; i < 20000; i++) { sketch2.update(key++, new double[] {1.0}); } ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().buildUnion(); union.update(sketch1); union.update(sketch2); ArrayOfDoublesCompactSketch result = union.getResult(); Assert.assertFalse(result.isEmpty()); Assert.assertTrue(result.isEstimationMode()); Assert.assertEquals(result.getEstimate(), 20500.0, 20500 * 0.01); }