public ArrayOfDoublesSketchBuildAggregator( final DimensionSelector keySelector, final List<BaseDoubleColumnValueSelector> valueSelectors, final int nominalEntries ) { this.keySelector = keySelector; this.valueSelectors = valueSelectors.toArray(new BaseDoubleColumnValueSelector[0]); values = new double[valueSelectors.size()]; sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(nominalEntries) .setNumberOfValues(valueSelectors.size()).build(); }
@Override public void init(final ByteBuffer buf, final int position) { final WritableMemory mem = WritableMemory.wrap(buf, ByteOrder.LITTLE_ENDIAN); final WritableMemory region = mem.writableRegion(position, maxIntermediateSize); new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(nominalEntries) .setNumberOfValues(valueSelectors.length) .setNumberOfValues(valueSelectors.length).build(region); }
private static void generateSketches() throws Exception { Path path = FileSystems.getDefault().getPath("array_of_doubles_sketch_data.tsv"); try (BufferedWriter out = Files.newBufferedWriter(path, StandardCharsets.UTF_8)) { Random rand = ThreadLocalRandom.current(); int key = 0; for (int i = 0; i < 20; i++) { ArrayOfDoublesUpdatableSketch sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(1024) .build(); sketch.update(key++, new double[] {1}); sketch.update(key++, new double[] {1}); out.write("2015010101"); out.write('\t'); out.write("product_" + (rand.nextInt(10) + 1)); out.write('\t'); out.write(StringUtils.encodeBase64String(sketch.compact().toByteArray())); out.newLine(); } } }
@Test(expectedExceptions = SketchesArgumentException.class) public void notEnoughMemory() { new ArrayOfDoublesUpdatableSketchBuilder(). setNominalEntries(32).build(WritableMemory.wrap(new byte[1055])); } }
@Test public void serializeDeserializeSampling() { int sketchSize = 16384; int numberOfUniques = sketchSize; ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(sketchSize).setSamplingProbability(0.5f).build(); for (int i = 0; i < numberOfUniques; i++) sketch1.update(i, new double[] {1.0}); ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.heapify(Memory.wrap(sketch1.toByteArray())); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); }
@Test public void serializeDeserializeSampling() { int sketchSize = 16384; int numberOfUniques = sketchSize; ArrayOfDoublesUpdatableSketch sketch1 = new ArrayOfDoublesUpdatableSketchBuilder(). setNominalEntries(sketchSize).setSamplingProbability(0.5f). build(WritableMemory.wrap(new byte[1000000])); for (int i = 0; i < numberOfUniques; i++) { sketch1.update(i, new double[] {1.0}); } ArrayOfDoublesSketch sketch2 = ArrayOfDoublesSketch.wrap(WritableMemory.wrap(sketch1.toByteArray())); Assert.assertTrue(sketch2.isEstimationMode()); Assert.assertEquals(sketch2.getEstimate() / numberOfUniques, 1.0, 0.01); Assert.assertEquals(sketch2.getRetainedEntries() / (double) numberOfUniques, 0.5, 0.01); Assert.assertEquals(sketch1.getTheta(), sketch2.getTheta()); }
public ArrayOfDoublesSketchBuildAggregator( final DimensionSelector keySelector, final List<BaseDoubleColumnValueSelector> valueSelectors, final int nominalEntries ) { this.keySelector = keySelector; this.valueSelectors = valueSelectors.toArray(new BaseDoubleColumnValueSelector[0]); values = new double[valueSelectors.size()]; sketch = new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(nominalEntries) .setNumberOfValues(valueSelectors.size()).build(); }
@Override public void init(final ByteBuffer buf, final int position) { final WritableMemory mem = WritableMemory.wrap(buf); final WritableMemory region = mem.writableRegion(position, maxIntermediateSize); new ArrayOfDoublesUpdatableSketchBuilder().setNominalEntries(nominalEntries) .setNumberOfValues(valueSelectors.length) .setNumberOfValues(valueSelectors.length).build(region); }