public ArrayOfDoublesSketchBuildAggregator( final DimensionSelector keySelector, final List<BaseDoubleColumnValueSelector> valueSelectors, final int nominalEntries ) { this.keySelector = keySelector; this.valueSelectors = valueSelectors.toArray(new BaseDoubleColumnValueSelector[0]); values = new double[valueSelectors.size()]; sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(nominalEntries) .setNumberOfValues(valueSelectors.size()).build(); }
public ArrayOfDoublesSketchNoOpAggregator(final int numberOfValues) { emptySketch = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(numberOfValues).build().compact(); }
public ArrayOfDoublesSketchNoOpBufferAggregator(final int numberOfValues) { emptySketch = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(numberOfValues).build().compact(); }
@Override public void init(final ByteBuffer buf, final int position) { final WritableMemory mem = WritableMemory.wrap(buf, ByteOrder.LITTLE_ENDIAN); final WritableMemory region = mem.writableRegion(position, maxIntermediateSize); new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(nominalEntries) .setNumberOfValues(valueSelectors.length) .setNumberOfValues(valueSelectors.length).build(region); }
private static void generateSketches() throws Exception { Path path = FileSystems.getDefault().getPath("array_of_doubles_sketch_data.tsv"); try (BufferedWriter out = Files.newBufferedWriter(path, StandardCharsets.UTF_8)) { Random rand = ThreadLocalRandom.current(); int key = 0; for (int i = 0; i < 20; i++) { ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(1024) .build(); sketch.update(key++, new double[] {1}); sketch.update(key++, new double[] {1}); out.write("2015010101"); out.write('\t'); out.write("product_" + (rand.nextInt(10) + 1)); out.write('\t'); out.write(StringUtils.encodeBase64String(sketch.compact().toByteArray())); out.newLine(); } } }
@Test public void makeAggregateCombiner() { AggregatorFactory aggregatorFactory = new ArrayOfDoublesSketchAggregatorFactory("", "", null, null, null); AggregatorFactory combiningFactory = aggregatorFactory.getCombiningFactory(); AggregateCombiner<ArrayOfDoublesSketch> combiner = combiningFactory.makeAggregateCombiner(); ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch1.update("a", new double[] {1}); ArrayOfDoublesUpdatableSketch sketch2 = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch2.update("b", new double[] {1}); sketch2.update("c", new double[] {1}); TestObjectColumnSelector<ArrayOfDoublesSketch> selector = new TestObjectColumnSelector<ArrayOfDoublesSketch>(new ArrayOfDoublesSketch[] {sketch1, sketch2}); combiner.reset(selector); Assert.assertEquals(1, combiner.getObject().getEstimate(), 0); selector.increment(); combiner.fold(selector); Assert.assertEquals(3, combiner.getObject().getEstimate(), 0); }
@Test(expectedExceptions = SketchesArgumentException.class) public void checkInsertExceptions() { ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); sketch1.update("a", new double[] {1.0}); }
@Test(expectedExceptions = SketchesArgumentException.class) public void notEnoughMemory() { new ArrayOfDoublesUpdatableSketchBuilder(). setNominalEntries(32).build(WritableMemory.wrap(new byte[1055])); } }
@Test public void isEmptyWithSampling() { float samplingProbability = 0.1f; ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSamplingProbability(samplingProbability).build(); Assert.assertTrue(sketch.isEmpty()); Assert.assertFalse(sketch.isEstimationMode()); Assert.assertEquals(sketch.getEstimate(), 0.0); Assert.assertEquals(sketch.getUpperBound(1), 0.0); Assert.assertEquals(sketch.getLowerBound(1), 0.0); Assert.assertEquals(sketch.getThetaLong() / (double) Long.MAX_VALUE, (double) samplingProbability); Assert.assertEquals(sketch.getTheta(), (double) samplingProbability); }
@Test(expectedExceptions = SketchesArgumentException.class) public void incompatibleSeeds() { ArrayOfDoublesSketch sketchA = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(1).build(); ArrayOfDoublesSketch sketchB = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(2).build(); ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().setSeed(3).buildAnotB(); aNotB.update(sketchA, sketchB); }
@Test(expectedExceptions = SketchesArgumentException.class) public void incompatibleSeedB() { ArrayOfDoublesSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(1).build(); ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); aNotB.update(null, sketch); }
@Test(expectedExceptions = SketchesArgumentException.class) public void incompatibleInputSketchMoreValues() { ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNumberOfValues(2).build(); ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().buildUnion(); union.update(sketch); }
@Test(expectedExceptions = SketchesArgumentException.class) public void incompatibleSeedA() { ArrayOfDoublesSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(1).build(); ArrayOfDoublesAnotB aNotB = new ArrayOfDoublesSetOperationBuilder().buildAnotB(); aNotB.update(sketch, null); }
@Test(expectedExceptions = SketchesArgumentException.class) public void fromQuickSelectSketchNotEnoughMemory() { ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.wrap(new byte[1000000])); us.update(1, new double[] {1.0}); us.compact(WritableMemory.wrap(new byte[39])); }
@Test public void heapifyAndUpdateSketch() { ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch1.update(1, new double[] {1}); // downcasting is not recommended, for testing only ArrayOfDoublesUpdatableSketch sketch2 = (ArrayOfDoublesUpdatableSketch) ArrayOfDoublesSketches.heapifySketch(Memory.wrap(sketch1.toByteArray())); sketch2.update(2, new double[] {1}); Assert.assertEquals(sketch2.getEstimate(), 2.0); }
@Test public void serializeDeserializeSampling() { int sketchSize = 16384; int numberOfUniques = sketchSize; ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(sketchSize).setSamplingProbability(0.5f).build(); for (int i = 0; i < numberOfUniques; i++) sketch1.update(i, new double[] {1.0}); ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.heapify(Memory.wrap(sketch1.toByteArray())); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); }
@Test(expectedExceptions = SketchesArgumentException.class) public void incompatibleSeeds() { ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(1).build(); ArrayOfDoublesIntersection intersection = new ArrayOfDoublesSetOperationBuilder().setSeed(2).buildIntersection(); intersection.update(sketch, combiner); } }
@Test(expectedExceptions = SketchesArgumentException.class) public void incompatibleSeeds() { ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setSeed(1).build(); ArrayOfDoublesUnion union = new ArrayOfDoublesSetOperationBuilder().setSeed(2).buildUnion(); union.update(sketch); }
@Test public void updatesOfAllKeyTypes() { ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().build(); sketch.update(1L, new double[] {1.0}); sketch.update(2.0, new double[] {1.0}); sketch.update(new byte[] {3}, new double[] {1.0}); sketch.update(new int[] {4}, new double[] {1.0}); sketch.update(new long[] {5L}, new double[] {1.0}); sketch.update("a", new double[] {1.0}); Assert.assertEquals(sketch.getEstimate(), 6.0); }
@Test(expectedExceptions = SketchesArgumentException.class) public void deserializeWithWrongSeed() { ArrayOfDoublesUpdatableSketch us = new ArrayOfDoublesUpdatableSketchBuilder().build(WritableMemory.wrap(new byte[1000000])); for (int i = 0; i < 8192; i++) { us.update(i, new double[] {1.0}); } ArrayOfDoublesCompactSketch sketch1 = us.compact(WritableMemory.wrap(new byte[1000000])); ArrayOfDoublesSketches.wrapSketch(WritableMemory.wrap(sketch1.toByteArray()), 123); }