/** * Creates a TDigest of whichever type is the currently recommended type. MergingDigest is generally the best * known implementation right now. * * @param compression The compression parameter. 100 is a common value for normal uses. 1000 is extremely large. * The number of centroids retained will be a smallish (usually less than 10) multiple of this number. * @return the TDigest */ @SuppressWarnings({"unused", "WeakerAccess", "SameParameterValue"}) public static TDigest createDigest(double compression) { return createMergingDigest(compression); }
@Nonnull @Override public TDigest extractGroupByResult(@Nonnull GroupByResultHolder groupByResultHolder, int groupKey) { TDigest tDigest = groupByResultHolder.getResult(groupKey); if (tDigest == null) { return TDigest.createMergingDigest(DEFAULT_TDIGEST_COMPRESSION); } else { return tDigest; } }
@Nonnull @Override public TDigest extractAggregationResult(@Nonnull AggregationResultHolder aggregationResultHolder) { TDigest tDigest = aggregationResultHolder.getResult(); if (tDigest == null) { return TDigest.createMergingDigest(DEFAULT_TDIGEST_COMPRESSION); } else { return tDigest; } }
/** * Returns the TDigest from the result holder or creates a new one if it does not exist. * * @param aggregationResultHolder Result holder * @return TDigest from the result holder */ protected static TDigest getTDigest(@Nonnull AggregationResultHolder aggregationResultHolder) { TDigest tDigest = aggregationResultHolder.getResult(); if (tDigest == null) { tDigest = TDigest.createMergingDigest(DEFAULT_TDIGEST_COMPRESSION); aggregationResultHolder.setValue(tDigest); } return tDigest; }
@Override public TDigest getInitialAggregatedValue(Object rawValue) { TDigest initialValue; if (rawValue instanceof byte[]) { byte[] bytes = (byte[]) rawValue; initialValue = deserializeAggregatedValue(bytes); _maxByteSize = Math.max(_maxByteSize, bytes.length); } else { initialValue = TDigest.createMergingDigest(PercentileTDigestAggregationFunction.DEFAULT_TDIGEST_COMPRESSION); initialValue.add(((Number) rawValue).doubleValue()); _maxByteSize = Math.max(_maxByteSize, initialValue.byteSize()); } return initialValue; }
/** * Returns the TDigest for the given group key. If one does not exist, creates a new one and returns that. * * @param groupByResultHolder Result holder * @param groupKey Group key for which to return the TDigest * @return TDigest for the group key */ protected static TDigest getTDigest(@Nonnull GroupByResultHolder groupByResultHolder, int groupKey) { TDigest tDigest = groupByResultHolder.getResult(groupKey); if (tDigest == null) { tDigest = TDigest.createMergingDigest(DEFAULT_TDIGEST_COMPRESSION); groupByResultHolder.setValueForKey(groupKey, tDigest); } return tDigest; } }
@Override Object getRandomRawValue(Random random) { TDigest tDigest = TDigest.createMergingDigest(COMPRESSION); tDigest.add(random.nextLong()); tDigest.add(random.nextLong()); return ObjectSerDeUtils.TDIGEST_SER_DE.serialize(tDigest); }
@Test public void testTDigest() { for (int i = 0; i < NUM_ITERATIONS; i++) { TDigest expected = TDigest.createMergingDigest(PercentileTDigestAggregationFunction.DEFAULT_TDIGEST_COMPRESSION); int size = RANDOM.nextInt(100) + 1; for (int j = 0; j < size; j++) { expected.add(RANDOM.nextDouble()); } byte[] bytes = ObjectSerDeUtils.serialize(expected); TDigest actual = ObjectSerDeUtils.deserialize(bytes, ObjectSerDeUtils.ObjectType.TDigest); for (int j = 0; j <= 100; j++) { assertEquals(actual.quantile(j / 100.0), expected.quantile(j / 100.0), 1e-5); } } } }
GenericData.Record record = new GenericData.Record(avroSchema); TDigest tDigest = TDigest.createMergingDigest(PercentileTDigestAggregationFunction.DEFAULT_TDIGEST_COMPRESSION); tDigest.add(_random.nextDouble());
valueMap.put(DOUBLE_COLUMN, value); TDigest tDigest = TDigest.createMergingDigest(PercentileTDigestAggregationFunction.DEFAULT_TDIGEST_COMPRESSION); tDigest.add(value); ByteBuffer byteBuffer = ByteBuffer.allocate(tDigest.byteSize());
TDigest tDigest = TDigest.createMergingDigest(PercentileTDigestAggregationFunction.DEFAULT_TDIGEST_COMPRESSION); for (int j = 0; j < numMultiValues; j++) { double value = RANDOM.nextDouble() * VALUE_RANGE;
public TDigestNumericHistogram() { super(); tdigest = TDigest.createMergingDigest(DEFAULT_COMPRESSION); }