@Test public void makeAggregateCombiner() { AggregatorFactory aggregatorFactory = new ArrayOfDoublesSketchAggregatorFactory("", "", null, null, null); AggregatorFactory combiningFactory = aggregatorFactory.getCombiningFactory(); AggregateCombiner<ArrayOfDoublesSketch> combiner = combiningFactory.makeAggregateCombiner(); ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch1.update("a", new double[] {1}); ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch2.update("b", new double[] {1}); sketch2.update("c", new double[] {1}); TestObjectColumnSelector<ArrayOfDoublesSketch> selector = new TestObjectColumnSelector<ArrayOfDoublesSketch>(new ArrayOfDoublesSketch[] {sketch1, sketch2}); combiner.reset(selector); Assert.assertEquals(1, combiner.getObject().getEstimate(), 0); selector.increment(); combiner.fold(selector); Assert.assertEquals(3, combiner.getObject().getEstimate(), 0); }
public ArrayOfDoublesSketchBuildAggregator( final DimensionSelector keySelector, final List<BaseDoubleColumnValueSelector> valueSelectors, final int nominalEntries ) { this.keySelector = keySelector; this.valueSelectors = valueSelectors.toArray(new BaseDoubleColumnValueSelector[0]); values = new double[valueSelectors.size()]; sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(nominalEntries) .setNumberOfValues(valueSelectors.size()).build(); }
public ArrayOfDoublesSketchNoOpAggregator(final int numberOfValues) { emptySketch = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(numberOfValues).build().compact(); }
private static void generateSketches() throws Exception { Path path = FileSystems.getDefault().getPath("array_of_doubles_sketch_data.tsv"); try (BufferedWriter out = Files.newBufferedWriter(path, StandardCharsets.UTF_8)) { Random rand = ThreadLocalRandom.current(); int key = 0; for (int i = 0; i < 20; i++) { ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(1024) .build(); sketch.update(key++, new double[] {1}); sketch.update(key++, new double[] {1}); out.write("2015010101"); out.write('\t'); out.write("product_" + (rand.nextInt(10) + 1)); out.write('\t'); out.write(StringUtils.encodeBase64String(sketch.compact().toByteArray())); out.newLine(); } } }
@Test(expectedExceptions = SketchesArgumentException.class) public void incompatibleSeeds() { ArrayOfDoublesSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(1).build(); ArrayOfDoublesSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(2).build(); ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().setSeed(3).buildAnotB(); aNotB.update(sketchA, sketchB); }
@Test public void isEmptyWithSampling() { float samplingProbability = 0.1f; ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(samplingProbability).build(); Assert.assertTrue(sketch.isEmpty()); Assert.assertFalse(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 0.0); Assert.assertEquals(sketch.getUpperBound(1), 0.0); Assert.assertEquals(sketch.getLowerBound(1), 0.0); Assert.assertEquals(sketch.getThetaLong() / (double) Long.MAX_VALUE, (double) samplingProbability); Assert.assertEquals(sketch.getTheta(), (double) samplingProbability); }
@Test public void serializeDeserializeSampling() { int sketchSize = 16384; int numberOfUniques = sketchSize; ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(sketchSize).setSamplingProbability(0.5f).build(); for (int i = 0; i < numberOfUniques; i++) sketch1.update(i, new double[] {1.0}); ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.heapify(Memory.wrap(sketch1.toByteArray())); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); }
@Test public void serializeDeserializeEstimationNoResize() throws Exception { ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder(). setResizeFactor(ResizeFactor.X1).build(); for (int j = 0; j < 10; j++) { for (int i = 0; i < 8192; i++) sketch1.update(i, new double[] {1.0}); } byte[] byteArray = sketch1.toByteArray(); //for visual testing //TestUtil.writeBytesToFile(byteArray, "ArrayOfDoublesQuickSelectSketch4K.data"); ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.heapify(Memory.wrap(byteArray)); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), 8192, 8192 * 0.99); Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); double[][] values = sketch2.getValues(); Assert.assertTrue(values.length >= 4096); for (double[] array: values) Assert.assertEquals(array[0], 10.0); }
@Test(expectedExceptions = SketchesArgumentException.class) public void invalidSamplingProbability() { new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(2f); }
public ArrayOfDoublesSketchNoOpBufferAggregator(final int numberOfValues) { emptySketch = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(numberOfValues).build().compact(); }
@Test(expectedExceptions = SketchesArgumentException.class) public void incompatibleSeedB() { ArrayOfDoublesSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(1).build(); ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); aNotB.update(null, sketch); }
@Test public void sampling() { float samplingProbability = 0.001f; ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(samplingProbability).build(); sketch.update("a", new double[] {1.0}); Assert.assertFalse(sketch.isEmpty()); Assert.assertTrue(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 0.0); Assert.assertTrue(sketch.getUpperBound(1) > 0.0); Assert.assertEquals(sketch.getLowerBound(1), 0.0, 0.0000001); Assert.assertEquals(sketch.getThetaLong() / (double) Long.MAX_VALUE, (double) samplingProbability); Assert.assertEquals(sketch.getTheta(), (double) samplingProbability); }
@Test public void serializeDeserializeSampling() { int sketchSize = 16384; int numberOfUniques = sketchSize; ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder(). setNominalEntries(sketchSize).setSamplingProbability(0.5f). build(WritableMemory.wrap(new byte[1000000])); for (int i = 0; i < numberOfUniques; i++) { sketch1.update(i, new double[] {1.0}); } ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.wrap(WritableMemory.wrap(sketch1.toByteArray())); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); }
@Test(expectedExceptions = SketchesArgumentException.class) public void notEnoughMemory() { new ArrayOfDoublesUpdatableSketchBuilder(). setNominalEntries(32).build(WritableMemory.wrap(new byte[1055])); } }
@Test public void serializeDeserializeEstimationNoResize() throws Exception { ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setResizeFactor(ResizeFactor.X1). build(WritableMemory.wrap(new byte[1000000])); for (int j = 0; j < 10; j++) { for (int i = 0; i < 8192; i++) { sketch1.update(i, new double[] {1.0}); } } byte[] byteArray = sketch1.toByteArray(); //for visual testing //TestUtil.writeBytesToFile(byteArray, "ArrayOfDoublesQuickSelectSketch4K.data"); ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.wrap(WritableMemory.wrap(byteArray)); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate(), 8192, 8192 * 0.99); Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); double[][] values = sketch2.getValues(); Assert.assertTrue(values.length >= 4096); for (double[] array: values) { Assert.assertEquals(array[0], 10.0); } }
@Test(expectedExceptions = SketchesArgumentException.class) public void fromQuickSelectSketchNotEnoughMemory() { ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.wrap(new byte[1000000])); us.update(1, new double[] {1.0}); us.compact(WritableMemory.wrap(new byte[39])); }
@Test(expectedExceptions = SketchesArgumentException.class) public void checkInsertExceptions() { ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); sketch1.update("a", new double[] {1.0}); }
@Test(expectedExceptions = SketchesArgumentException.class) public void incompatibleSeedA() { ArrayOfDoublesSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(1).build(); ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); aNotB.update(sketch, null); }
@Override public void init(final ByteBuffer buf, final int position) { final WritableMemory mem = WritableMemory.wrap(buf, ByteOrder.LITTLE_ENDIAN); final WritableMemory region = mem.writableRegion(position, maxIntermediateSize); new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(nominalEntries) .setNumberOfValues(valueSelectors.length) .setNumberOfValues(valueSelectors.length).build(region); }
@Test public void isEmptyWithSampling() { float samplingProbability = 0.1f; ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder(). setSamplingProbability(samplingProbability). build(WritableMemory.wrap(new byte[1000000])); Assert.assertTrue(sketch.isEmpty()); Assert.assertTrue(((DirectArrayOfDoublesQuickSelectSketch)sketch).isInSamplingMode()); Assert.assertFalse(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 0.0); Assert.assertEquals(sketch.getUpperBound(1), 0.0); Assert.assertEquals(sketch.getLowerBound(1), 0.0); Assert.assertEquals( sketch.getThetaLong() / (double) Long.MAX_VALUE, (double) samplingProbability); Assert.assertEquals(sketch.getTheta(), (double) samplingProbability); }