public void merge(PercentileCounter counter) { assert this.compression == counter.compression; registers.add(counter.registers); }
public double getResultEstimate() { return registers.quantile(quantileRatio); }
@Override public TDigest getInitialAggregatedValue(Object rawValue) { TDigest initialValue; if (rawValue instanceof byte[]) { byte[] bytes = (byte[]) rawValue; initialValue = deserializeAggregatedValue(bytes); _maxByteSize = Math.max(_maxByteSize, bytes.length); } else { initialValue = TDigest.createMergingDigest(PercentileTDigestAggregationFunction.DEFAULT_TDIGEST_COMPRESSION); initialValue.add(((Number) rawValue).doubleValue()); _maxByteSize = Math.max(_maxByteSize, initialValue.byteSize()); } return initialValue; }
/** * Calculates percentile from {@link TDigest}. * <p>Handles cases where only one value in TDigest object. */ public static double calculatePercentile(@Nonnull TDigest tDigest, int percentile) { if (tDigest.size() == 1) { // Specialize cases where only one value in TDigest (cannot use quantile method) return tDigest.centroids().iterator().next().mean(); } else { return tDigest.quantile(percentile / 100.0); } }
GenericData.Record record = new GenericData.Record(avroSchema); TDigest tDigest = TDigest.createMergingDigest(PercentileTDigestAggregationFunction.DEFAULT_TDIGEST_COMPRESSION); tDigest.add(_random.nextDouble()); ByteBuffer buffer = ByteBuffer.allocate(tDigest.byteSize()); tDigest.asBytes(buffer); _fixedExpected.add(buffer.array()); tDigest.add(_random.nextDouble()); buffer = ByteBuffer.allocate(tDigest.byteSize()); tDigest.asBytes(buffer); _varExpected.add(buffer.array());
@Test public void testBasic() { int times = 1; int compression = 100; for (int t = 0; t < times; t++) { TDigest tDigest = TDigest.createAvlTreeDigest(compression); Random random = new Random(); int dataSize = 10000; List<Double> dataset = Lists.newArrayListWithCapacity(dataSize); for (int i = 0; i < dataSize; i++) { double d = random.nextDouble(); tDigest.add(d); dataset.add(d); } Collections.sort(dataset); double actualResult = tDigest.quantile(0.5); double expectedResult = MathUtil.findMedianInSortedList(dataset); assertEquals(expectedResult, actualResult, 0.01); } } }
@Test public void testTDigest() { for (int i = 0; i < NUM_ITERATIONS; i++) { TDigest expected = TDigest.createMergingDigest(PercentileTDigestAggregationFunction.DEFAULT_TDIGEST_COMPRESSION); int size = RANDOM.nextInt(100) + 1; for (int j = 0; j < size; j++) { expected.add(RANDOM.nextDouble()); } byte[] bytes = ObjectSerDeUtils.serialize(expected); TDigest actual = ObjectSerDeUtils.deserialize(bytes, ObjectSerDeUtils.ObjectType.TDigest); for (int j = 0; j <= 100; j++) { assertEquals(actual.quantile(j / 100.0), expected.quantile(j / 100.0), 1e-5); } } } }
@Override public void processHistogram(MetricName name, Histogram histogram, FlushProcessorContext context) throws Exception { if (histogram instanceof WavefrontHistogram && useWavefrontHistograms) { WavefrontHistogram wavefrontHistogram = (WavefrontHistogram) histogram; wavefront.report.Histogram.Builder builder = wavefront.report.Histogram.newBuilder(); builder.setBins(Lists.newLinkedList()); builder.setCounts(Lists.newLinkedList()); long minMillis = Long.MAX_VALUE; if (wavefrontHistogram.count() == 0) return; for (WavefrontHistogram.MinuteBin minuteBin : wavefrontHistogram.bins(true)) { builder.getBins().add(minuteBin.getDist().quantile(.5)); builder.getCounts().add(Math.toIntExact(minuteBin.getDist().size())); minMillis = Long.min(minMillis, minuteBin.getMinMillis()); } builder.setType(HistogramType.TDIGEST); builder.setDuration(Math.toIntExact(currentMillis.get() - minMillis)); context.report(builder.build()); } else { context.reportSubMetric(histogram.count(), "count"); for (Map.Entry<String, Double> entry : MetricsToTimeseries.explodeSummarizable(histogram, reportEmptyHistogramStats).entrySet()) { context.reportSubMetric(entry.getValue(), entry.getKey()); } for (Map.Entry<String, Double> entry : MetricsToTimeseries.explodeSampling(histogram, reportEmptyHistogramStats).entrySet()) { context.reportSubMetric(entry.getValue(), entry.getKey()); } histogram.clear(); } sentCounter.inc(); }
@Override Object getRandomRawValue(Random random) { TDigest tDigest = TDigest.createMergingDigest(COMPRESSION); tDigest.add(random.nextLong()); tDigest.add(random.nextLong()); return ObjectSerDeUtils.TDIGEST_SER_DE.serialize(tDigest); }
Iterator<Centroid> ix1 = d1.centroids().iterator(); Iterator<Centroid> ix2 = d2.centroids().iterator(); double x1 = d1.getMin(); double x2 = d2.getMin(); while (x1 <= d1.getMax() && x2 <= d2.getMax()) { if (x1 < x2) { diff = maxDiff(d1, d2, diff, x1); diff = maxDiff(d1, d2, diff, x1); double q1 = d1.cdf(x1); double q2 = d2.cdf(x2); if (q1 < q2) { x1 = nextValue(d1, ix1, x1); while (x1 <= d1.getMax()) { diff = maxDiff(d1, d2, diff, x1); x1 = nextValue(d1, ix1, x1); while (x2 <= d2.getMax()) { diff = maxDiff(d2, d2, diff, x2); x2 = nextValue(d2, ix2, x2); long n1 = d1.size(); long n2 = d2.size(); return diff * Math.sqrt((double) n1 * n2 / (n1 + n2));
@Override public void add(TDigest other) { List<Centroid> tmp = new ArrayList<>(); for (Centroid centroid : other.centroids()) { tmp.add(centroid); } Collections.shuffle(tmp, gen); for (Centroid centroid : tmp) { add(centroid.mean(), centroid.count(), centroid); } }
private void internalProcessWavefrontHistogram(WavefrontHistogram hist, Context context) throws Exception { final JsonGenerator json = context.json; json.writeStartObject(); json.writeArrayFieldStart("bins"); for (WavefrontHistogram.MinuteBin bin : hist.bins(clear)) { final Collection<Centroid> centroids = bin.getDist().centroids(); json.writeStartObject(); // Count json.writeNumberField("count", bin.getDist().size()); // Start json.writeNumberField("startMillis", bin.getMinMillis()); // Duration json.writeNumberField("durationMillis", 60 * 1000); // Means json.writeArrayFieldStart("means"); for (Centroid c : centroids) { json.writeNumber(c.mean()); } json.writeEndArray(); // Counts json.writeArrayFieldStart("counts"); for (Centroid c : centroids) { json.writeNumber(c.count()); } json.writeEndArray(); json.writeEndObject(); } json.writeEndArray(); json.writeEndObject(); }
@Override public void write(Kryo kryo, Output output, PercentileCounter counter) { int length = counter.getRegisters().byteSize(); ByteBuffer buffer = ByteBuffer.allocate(length); counter.getRegisters().asSmallBytes(buffer); output.writeDouble(counter.getCompression()); output.writeDouble(counter.getQuantileRatio()); output.writeInt(buffer.position()); output.write(buffer.array(), 0, buffer.position()); }
private void testPercentileSize(int sumNums, Integer sqrtNum, Integer compresion) throws Exception { compresion = compresion == null ? DEFAULT_COMPRESSION : compresion; PercentileAggregator aggregator = createPercentileAggreator(sumNums, sqrtNum, compresion); double actual = getActualSize(aggregator); double estimate = getEstimateSize((int) aggregator.getState().getRegisters().size(), 1, compresion); assertTrue(Math.abs(actual - estimate) / actual < 0.3); aggregator.reset(); }
/** * Creates a TDigest of whichever type is the currently recommended type. MergingDigest is generally the best * known implementation right now. * * @param compression The compression parameter. 100 is a common value for normal uses. 1000 is extremely large. * The number of centroids retained will be a smallish (usually less than 10) multiple of this number. * @return the TDigest */ @SuppressWarnings({"unused", "WeakerAccess", "SameParameterValue"}) public static TDigest createDigest(double compression) { return createMergingDigest(compression); }
other.compress(); size += other.centroidCount(); System.arraycopy(md.weight, 0, w, offset, md.lastUsedCell); if (data != null) { for (Centroid centroid : other.centroids()) { data.add(centroid.data()); for (Centroid centroid : other.centroids()) { m[offset] = centroid.mean(); w[offset] = centroid.count();