private static void generateSketches() throws Exception { Path path = FileSystems.getDefault().getPath("array_of_doubles_sketch_data.tsv"); try (BufferedWriter out = Files.newBufferedWriter(path, StandardCharsets.UTF_8)) { Random rand = ThreadLocalRandom.current(); int key = 0; for (int i = 0; i < 20; i++) { ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(1024) .build(); sketch.update(key++, new double[] {1}); sketch.update(key++, new double[] {1}); out.write("2015010101"); out.write('\t'); out.write("product_" + (rand.nextInt(10) + 1)); out.write('\t'); out.write(StringUtils.encodeBase64String(sketch.compact().toByteArray())); out.newLine(); } } }
@Test public void nullInput() { ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); intersection.update(null, null); ArrayOfDoublesCompactSketch result = intersection.getResult(); Assert.assertTrue(result.isEmpty()); Assert.assertEquals(result.getRetainedEntries(), 0); Assert.assertEquals(result.getEstimate(), 0.0); Assert.assertEquals(result.getLowerBound(1), 0.0); Assert.assertEquals(result.getUpperBound(1), 0.0); Assert.assertEquals(result.getValues().length, 0); }
@Test public void exactModeFromQuickSelectSketch() { ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.wrap(new byte[1000000])); us.update(1, new double[] {1.0}); us.update(2, new double[] {1.0}); us.update(3, new double[] {1.0}); us.update(1, new double[] {1.0}); us.update(2, new double[] {1.0}); us.update(3, new double[] {1.0}); ArrayOfDoublesCompactSketch sketch = us.compact(WritableMemory.wrap(new byte[1000000])); Assert.assertFalse(sketch.isEmpty()); Assert.assertFalse(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 3.0); Assert.assertEquals(sketch.getLowerBound(1), 3.0); Assert.assertEquals(sketch.getUpperBound(1), 3.0); Assert.assertEquals(sketch.getRetainedEntries(), 3); Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); Assert.assertEquals(sketch.getTheta(), 1.0); Assert.assertEquals(sketch.getSeedHash(), Util.computeSeedHash(DEFAULT_UPDATE_SEED)); double[][] values = sketch.getValues(); Assert.assertEquals(values.length, 3); for (double[] array: values) { Assert.assertEquals(array[0], 2.0); } }
@Test public void emptyFromQuickSelectSketch() { ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(); ArrayOfDoublesCompactSketch sketch = us.compact(); Assert.assertTrue(sketch.isEmpty()); Assert.assertFalse(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 0.0); Assert.assertEquals(sketch.getLowerBound(1), 0.0); Assert.assertEquals(sketch.getUpperBound(1), 0.0); Assert.assertEquals(sketch.getRetainedEntries(), 0); Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); Assert.assertEquals(sketch.getTheta(), 1.0); Assert.assertNotNull(sketch.getValues()); Assert.assertEquals(sketch.getValues().length, 0); ArrayOfDoublesSketchIterator it = sketch.iterator(); while (it.next()) { Assert.fail("empty sketch expected"); } }
@Test public void heapDeserializeV0_9_1() throws Exception { byte[] bytes = TestUtil.readBytesFromFile(getClass().getClassLoader().getResource("ArrayOfDoublesUnion_v0.9.1.bin").getFile()); ArrayOfDoublesUnion union2 = ArrayOfDoublesUnion.heapify(Memory.wrap(bytes)); ArrayOfDoublesCompactSketch result = union2.getResult(); Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); union2.reset(); result = union2.getResult(); Assert.assertTrue(result.isEmpty()); Assert.assertFalse(result.isEstimationMode()); Assert.assertEquals(result.getEstimate(), 0.0); Assert.assertEquals(result.getUpperBound(1), 0.0); Assert.assertEquals(result.getLowerBound(1), 0.0); Assert.assertEquals(result.getTheta(), 1.0); double[][] values = result.getValues(); for (int i = 0; i < values.length; i++) { Assert.assertEquals(values[i][0], 2.0); } }
@Test public void heapEstimationModeFullOverlapTwoValuesAndDownsizing() { int key = 0; ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); for (int i = 0; i < 8192; i++) { sketch1.update(key++, new double[] {1.0, 2.0}); } key = 0; // full overlap ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); for (int i = 0; i < 8192; i++) { sketch2.update(key++, new double[] {1.0, 2.0}); } ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().setNumberOfValues(2).setNominalEntries(1024).buildUnion(); union.update(sketch1); union.update(sketch2); ArrayOfDoublesCompactSketch result = union.getResult(); Assert.assertFalse(result.isEmpty()); Assert.assertTrue(result.isEstimationMode()); Assert.assertEquals(result.getEstimate(), 8192.0, 8192 * 0.01); Assert.assertEquals(result.getRetainedEntries(), 1024); // union was downsampled ArrayOfDoublesSketchIterator it = result.iterator(); double[] expected = {2, 4}; while (it.next()) { Assert.assertEquals(it.getValues(), expected, Arrays.toString(it.getValues()) + " != " + Arrays.toString(expected)); } }
@Test public void serializeDeserializeEstimation() { ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.wrap(new byte[1000000])); for (int i = 0; i < 8192; i++) { us.update(i, new double[] {1.0}); } ArrayOfDoublesCompactSketch sketch1 = us.compact(WritableMemory.wrap(new byte[1000000])); ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketches.wrapSketch(WritableMemory.wrap(sketch1.toByteArray())); Assert.assertFalse(sketch2.isEmpty()); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), sketch1.getEstimate()); Assert.assertEquals(sketch2.getThetaLong(), sketch1.getThetaLong()); }
@Test public void heapToDirect() { ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch1.update(1, new double[] {1.0}); sketch1.update(1, new double[] {1.0}); sketch1.update(1, new double[] {1.0}); sketch1.update(2, new double[] {1.0}); ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch2.update(2, new double[] {1.0}); sketch2.update(2, new double[] {1.0}); sketch2.update(3, new double[] {1.0}); sketch2.update(3, new double[] {1.0}); sketch2.update(3, new double[] {1.0}); ArrayOfDoublesUnion heapUnion = new ArrayOfDoublesSetOperationBuilder().buildUnion(); heapUnion.update(sketch1); ArrayOfDoublesUnion directUnion = ArrayOfDoublesUnion.wrap(WritableMemory.wrap(heapUnion.toByteArray())); directUnion.update(sketch2); ArrayOfDoublesCompactSketch result = directUnion.getResult(WritableMemory.wrap(new byte[1000000])); Assert.assertFalse(result.isEmpty()); Assert.assertEquals(result.getEstimate(), 3.0); double[][] values = result.getValues(); Assert.assertEquals(values.length, 3); Assert.assertEquals(values[0][0], 3.0); Assert.assertEquals(values[1][0], 3.0); Assert.assertEquals(values[2][0], 3.0); }
@Test public void heapMixedMode() { int key = 0; ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); for (int i = 0; i < 1000; i++) { sketch1.update(key++, new double[] {1.0}); } key -= 500; // overlap half of the entries ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(0.2f).build(); for (int i = 0; i < 20000; i++) { sketch2.update(key++, new double[] {1.0}); } ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().buildUnion(); union.update(sketch1); union.update(sketch2); ArrayOfDoublesCompactSketch result = union.getResult(); Assert.assertFalse(result.isEmpty()); Assert.assertTrue(result.isEstimationMode()); Assert.assertEquals(result.getEstimate(), 20500.0, 20500 * 0.01); }
@Test public void wrapAndTryUpdatingUnionV0_9_1() throws Exception { byte[] bytes = TestUtil.readBytesFromFile(getClass().getClassLoader() .getResource("ArrayOfDoublesUnion_v0.9.1.bin").getFile()); ArrayOfDoublesUnion union2 = ArrayOfDoublesUnion.wrap(Memory.wrap(bytes)); ArrayOfDoublesCompactSketch result = union2.getResult(); Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); boolean thrown = false; try { union2.reset(); } catch (SketchesReadOnlyException e) { thrown = true; } Assert.assertTrue(thrown); }
@Test public void heapEstimationMode() { int key = 0; ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); for (int i = 0; i < 8192; i++) { sketch1.update(key++, new double[] {1.0}); } key -= 4096; // overlap half of the entries ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); for (int i = 0; i < 8192; i++) { sketch2.update(key++, new double[] {1.0}); } ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().buildUnion(); union.update(sketch1); union.update(sketch2); ArrayOfDoublesCompactSketch result = union.getResult(); Assert.assertFalse(result.isEmpty()); Assert.assertTrue(result.isEstimationMode()); Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); union.reset(); result = union.getResult(); Assert.assertTrue(result.isEmpty()); Assert.assertFalse(result.isEstimationMode()); Assert.assertEquals(result.getEstimate(), 0.0); Assert.assertEquals(result.getUpperBound(1), 0.0); Assert.assertEquals(result.getLowerBound(1), 0.0); Assert.assertEquals(result.getTheta(), 1.0); }
@Test public void serializeDeserializeEstimation() { ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(); for (int i = 0; i < 8192; i++) { us.update(i, new double[] {1.0}); } WritableMemory wmem = WritableMemory.wrap(us.toByteArray()); ArrayOfDoublesUpdatableSketch wrappedUS = ArrayOfDoublesSketches.wrapUpdatableSketch(wmem); Assert.assertFalse(wrappedUS.isEmpty()); Assert.assertTrue(wrappedUS.isEstimationMode()); Assert.assertEquals(wrappedUS.getEstimate(), us.getEstimate()); Assert.assertEquals(wrappedUS.getThetaLong(), us.getThetaLong()); ArrayOfDoublesUpdatableSketch heapUS = ArrayOfDoublesSketches.heapifyUpdatableSketch(wmem); Assert.assertFalse(heapUS.isEmpty()); Assert.assertTrue(heapUS.isEstimationMode()); Assert.assertEquals(heapUS.getEstimate(), us.getEstimate()); Assert.assertEquals(heapUS.getThetaLong(), us.getThetaLong()); ArrayOfDoublesCompactSketch sketch1 = us.compact(); ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketches.heapifySketch(Memory.wrap(sketch1.toByteArray())); Assert.assertFalse(sketch2.isEmpty()); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), sketch1.getEstimate()); Assert.assertEquals(sketch2.getThetaLong(), sketch1.getThetaLong()); }
@Test public void directToHeap() { ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch1.update(1, new double[] {1.0}); sketch1.update(1, new double[] {1.0}); sketch1.update(1, new double[] {1.0}); sketch1.update(2, new double[] {1.0}); ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch2.update(2, new double[] {1.0}); sketch2.update(2, new double[] {1.0}); sketch2.update(3, new double[] {1.0}); sketch2.update(3, new double[] {1.0}); sketch2.update(3, new double[] {1.0}); ArrayOfDoublesUnion directUnion = new ArrayOfDoublesSetOperationBuilder().buildUnion(WritableMemory.wrap(new byte[1000000])); directUnion.update(sketch1); ArrayOfDoublesUnion heapUnion = ArrayOfDoublesUnion.heapify(Memory.wrap(directUnion.toByteArray())); heapUnion.update(sketch2); ArrayOfDoublesCompactSketch result = heapUnion.getResult(); Assert.assertFalse(result.isEmpty()); Assert.assertEquals(result.getEstimate(), 3.0); double[][] values = result.getValues(); Assert.assertEquals(values.length, 3); Assert.assertEquals(values[0][0], 3.0); Assert.assertEquals(values[1][0], 3.0); Assert.assertEquals(values[2][0], 3.0); }
@Test public void heapSerializeDeserializeWithSeed() { long seed = 1; int key = 0; ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed).build(); for (int i = 0; i < 8192; i++) { sketch1.update(key++, new double[] {1.0}); } key -= 4096; // overlap half of the entries ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed).build(); for (int i = 0; i < 8192; i++) { sketch2.update(key++, new double[] {1.0}); } ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().setSeed(seed).buildUnion(); union1.update(sketch1); union1.update(sketch2); ArrayOfDoublesUnion union2 = ArrayOfDoublesUnion.heapify(Memory.wrap(union1.toByteArray()), seed); ArrayOfDoublesCompactSketch result = union2.getResult(); Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); }
@Test public void empty() { ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); intersection.update(sketch1, null); ArrayOfDoublesCompactSketch result = intersection.getResult(); Assert.assertTrue(result.isEmpty()); Assert.assertEquals(result.getRetainedEntries(), 0); Assert.assertEquals(result.getEstimate(), 0.0); Assert.assertEquals(result.getLowerBound(1), 0.0); Assert.assertEquals(result.getUpperBound(1), 0.0); Assert.assertEquals(result.getValues().length, 0); }
@Test public void emptyFromQuickSelectSketch() { ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.wrap(new byte[1000000])); ArrayOfDoublesCompactSketch sketch = us.compact(WritableMemory.wrap(new byte[1000000])); Assert.assertTrue(sketch.isEmpty()); Assert.assertFalse(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 0.0); Assert.assertEquals(sketch.getLowerBound(1), 0.0); Assert.assertEquals(sketch.getUpperBound(1), 0.0); Assert.assertEquals(sketch.getRetainedEntries(), 0); Assert.assertEquals(sketch.getThetaLong(), Long.MAX_VALUE); Assert.assertEquals(sketch.getTheta(), 1.0); Assert.assertNotNull(sketch.getValues()); Assert.assertEquals(sketch.getValues().length, 0); ArrayOfDoublesSketchIterator it = sketch.iterator(); while (it.next()) { Assert.fail("empty sketch expected"); } }
Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); Assert.assertTrue(result.isEmpty()); Assert.assertFalse(result.isEstimationMode()); Assert.assertEquals(result.getEstimate(), 0.0); Assert.assertEquals(result.getUpperBound(1), 0.0); Assert.assertEquals(result.getLowerBound(1), 0.0); Assert.assertEquals(result.getTheta(), 1.0); double[][] values = result.getValues(); for (int i = 0; i < values.length; i++) { Assert.assertEquals(values[i][0], 2.0);
@Test(expectedExceptions = SketchesArgumentException.class) public void deserializeWithWrongSeed() { ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(); for (int i = 0; i < 8192; i++) { us.update(i, new double[] {1.0}); } ArrayOfDoublesCompactSketch sketch1 = us.compact(); ArrayOfDoublesSketches.heapifySketch(Memory.wrap(sketch1.toByteArray()), 123); } }
@Test public void directSerializeDeserializeWithSeed() { long seed = 1; int key = 0; ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed) .build(WritableMemory.wrap(new byte[1000000])); for (int i = 0; i < 8192; i++) { sketch1.update(key++, new double[] {1.0}); } key -= 4096; // overlap half of the entries ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(seed) .build(WritableMemory.wrap(new byte[1000000])); for (int i = 0; i < 8192; i++) { sketch2.update(key++, new double[] {1.0}); } ArrayOfDoublesUnion union1 = new ArrayOfDoublesSetOperationBuilder().setSeed(seed) .buildUnion(WritableMemory.wrap(new byte[1000000])); union1.update(sketch1); union1.update(sketch2); ArrayOfDoublesUnion union2 = ArrayOfDoublesUnion.wrap(WritableMemory.wrap(union1.toByteArray()), seed); ArrayOfDoublesCompactSketch result = union2.getResult(WritableMemory.wrap(new byte[1000000])); Assert.assertEquals(result.getEstimate(), 12288.0, 12288 * 0.01); }
@Test public void notEmptyNoEntries() { ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(0.01f).build(); sketch1.update("a", new double[] {1}); // this happens to get rejected because of sampling with low probability ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().buildIntersection(); intersection.update(sketch1, null); ArrayOfDoublesCompactSketch result = intersection.getResult(); Assert.assertTrue(result.isEmpty()); Assert.assertEquals(result.getRetainedEntries(), 0); Assert.assertEquals(result.getEstimate(), 0.0); Assert.assertEquals(result.getLowerBound(1), 0.0); Assert.assertEquals(result.getUpperBound(1), 0.0); Assert.assertEquals(result.getValues().length, 0); }