com.tdunning.math.stats.TDigest java code examples

public void merge(PercentileCounter counter) {
  assert this.compression == counter.compression;
  registers.add(counter.registers);
}

public double getResultEstimate() {
  return registers.quantile(quantileRatio);
}

@Override
public TDigest getInitialAggregatedValue(Object rawValue) {
 TDigest initialValue;
 if (rawValue instanceof byte[]) {
  byte[] bytes = (byte[]) rawValue;
  initialValue = deserializeAggregatedValue(bytes);
  _maxByteSize = Math.max(_maxByteSize, bytes.length);
 } else {
  initialValue = TDigest.createMergingDigest(PercentileTDigestAggregationFunction.DEFAULT_TDIGEST_COMPRESSION);
  initialValue.add(((Number) rawValue).doubleValue());
  _maxByteSize = Math.max(_maxByteSize, initialValue.byteSize());
 }
 return initialValue;
}

@Override
public TDigest applyAggregatedValue(TDigest value, TDigest aggregatedValue) {
 value.add(aggregatedValue);
 _maxByteSize = Math.max(_maxByteSize, value.byteSize());
 return value;
}

@Override
public byte[] serialize(TDigest tDigest) {
 byte[] bytes = new byte[tDigest.byteSize()];
 tDigest.asBytes(ByteBuffer.wrap(bytes));
 return bytes;
}

/**
 * Calculates percentile from {@link TDigest}.
 * <p>Handles cases where only one value in TDigest object.
 */
public static double calculatePercentile(@Nonnull TDigest tDigest, int percentile) {
 if (tDigest.size() == 1) {
  // Specialize cases where only one value in TDigest (cannot use quantile method)
  return tDigest.centroids().iterator().next().mean();
 } else {
  return tDigest.quantile(percentile / 100.0);
 }
}

GenericData.Record record = new GenericData.Record(avroSchema);
TDigest tDigest = TDigest.createMergingDigest(PercentileTDigestAggregationFunction.DEFAULT_TDIGEST_COMPRESSION);
tDigest.add(_random.nextDouble());
ByteBuffer buffer = ByteBuffer.allocate(tDigest.byteSize());
tDigest.asBytes(buffer);
_fixedExpected.add(buffer.array());
 tDigest.add(_random.nextDouble());
buffer = ByteBuffer.allocate(tDigest.byteSize());
tDigest.asBytes(buffer);
_varExpected.add(buffer.array());

  @Test
  public void testBasic() {
    int times = 1;
    int compression = 100;
    for (int t = 0; t < times; t++) {
      TDigest tDigest = TDigest.createAvlTreeDigest(compression);
      Random random = new Random();
      int dataSize = 10000;
      List<Double> dataset = Lists.newArrayListWithCapacity(dataSize);
      for (int i = 0; i < dataSize; i++) {
        double d = random.nextDouble();
        tDigest.add(d);
        dataset.add(d);
      }
      Collections.sort(dataset);

      double actualResult = tDigest.quantile(0.5);
      double expectedResult = MathUtil.findMedianInSortedList(dataset);
      assertEquals(expectedResult, actualResult, 0.01);
    }
  }
}

 @Test
 public void testTDigest() {
  for (int i = 0; i < NUM_ITERATIONS; i++) {
   TDigest expected = TDigest.createMergingDigest(PercentileTDigestAggregationFunction.DEFAULT_TDIGEST_COMPRESSION);
   int size = RANDOM.nextInt(100) + 1;
   for (int j = 0; j < size; j++) {
    expected.add(RANDOM.nextDouble());
   }

   byte[] bytes = ObjectSerDeUtils.serialize(expected);
   TDigest actual = ObjectSerDeUtils.deserialize(bytes, ObjectSerDeUtils.ObjectType.TDigest);

   for (int j = 0; j <= 100; j++) {
    assertEquals(actual.quantile(j / 100.0), expected.quantile(j / 100.0), 1e-5);
   }
  }
 }
}

if (i == qCuts.length) {
  newQ = 1;
  x = Math.max(dist1.getMax(), dist2.getMax()) + 1;
} else {
  newQ = qCuts[i];
  x = dist1.quantile(newQ);
count[0][i] = dist1.size() * (newQ - oldQ);
double q2 = dist2.cdf(x);
count[1][i] = dist2.size() * (q2 - oldQ2);
oldQ = newQ;
oldQ2 = q2;

@Override
public void processHistogram(MetricName name, Histogram histogram, FlushProcessorContext context) throws Exception {
 if (histogram instanceof WavefrontHistogram && useWavefrontHistograms) {
  WavefrontHistogram wavefrontHistogram = (WavefrontHistogram) histogram;
  wavefront.report.Histogram.Builder builder = wavefront.report.Histogram.newBuilder();
  builder.setBins(Lists.newLinkedList());
  builder.setCounts(Lists.newLinkedList());
  long minMillis = Long.MAX_VALUE;
  if (wavefrontHistogram.count() == 0) return;
  for (WavefrontHistogram.MinuteBin minuteBin : wavefrontHistogram.bins(true)) {
   builder.getBins().add(minuteBin.getDist().quantile(.5));
   builder.getCounts().add(Math.toIntExact(minuteBin.getDist().size()));
   minMillis = Long.min(minMillis, minuteBin.getMinMillis());
  }
  builder.setType(HistogramType.TDIGEST);
  builder.setDuration(Math.toIntExact(currentMillis.get() - minMillis));
  context.report(builder.build());
 } else {
  context.reportSubMetric(histogram.count(), "count");
  for (Map.Entry<String, Double> entry : MetricsToTimeseries.explodeSummarizable(histogram, reportEmptyHistogramStats).entrySet()) {
   context.reportSubMetric(entry.getValue(), entry.getKey());
  }
  for (Map.Entry<String, Double> entry : MetricsToTimeseries.explodeSampling(histogram, reportEmptyHistogramStats).entrySet()) {
   context.reportSubMetric(entry.getValue(), entry.getKey());
  }
  histogram.clear();
 }
 sentCounter.inc();
}

@Override
Object getRandomRawValue(Random random) {
 TDigest tDigest = TDigest.createMergingDigest(COMPRESSION);
 tDigest.add(random.nextLong());
 tDigest.add(random.nextLong());
 return ObjectSerDeUtils.TDIGEST_SER_DE.serialize(tDigest);
}

Iterator<Centroid> ix1 = d1.centroids().iterator();
Iterator<Centroid> ix2 = d2.centroids().iterator();
double x1 = d1.getMin();
double x2 = d2.getMin();
while (x1 <= d1.getMax() && x2 <= d2.getMax()) {
  if (x1 < x2) {
    diff = maxDiff(d1, d2, diff, x1);
    diff = maxDiff(d1, d2, diff, x1);
    double q1 = d1.cdf(x1);
    double q2 = d2.cdf(x2);
    if (q1 < q2) {
      x1 = nextValue(d1, ix1, x1);
while (x1 <= d1.getMax()) {
  diff = maxDiff(d1, d2, diff, x1);
  x1 = nextValue(d1, ix1, x1);
while (x2 <= d2.getMax()) {
  diff = maxDiff(d2, d2, diff, x2);
  x2 = nextValue(d2, ix2, x2);
long n1 = d1.size();
long n2 = d2.size();
return diff * Math.sqrt((double) n1 * n2 / (n1 + n2));

@Override
public void add(TDigest other) {
  List<Centroid> tmp = new ArrayList<>();
  for (Centroid centroid : other.centroids()) {
    tmp.add(centroid);
  }
  Collections.shuffle(tmp, gen);
  for (Centroid centroid : tmp) {
    add(centroid.mean(), centroid.count(), centroid);
  }
}

private void internalProcessWavefrontHistogram(WavefrontHistogram hist, Context context) throws Exception {
 final JsonGenerator json = context.json;
 json.writeStartObject();
 json.writeArrayFieldStart("bins");
 for (WavefrontHistogram.MinuteBin bin : hist.bins(clear)) {
  final Collection<Centroid> centroids = bin.getDist().centroids();
  json.writeStartObject();
  // Count
  json.writeNumberField("count", bin.getDist().size());
  // Start
  json.writeNumberField("startMillis", bin.getMinMillis());
  // Duration
  json.writeNumberField("durationMillis", 60 * 1000);
  // Means
  json.writeArrayFieldStart("means");
  for (Centroid c : centroids) {
   json.writeNumber(c.mean());
  }
  json.writeEndArray();
  // Counts
  json.writeArrayFieldStart("counts");
  for (Centroid c : centroids) {
   json.writeNumber(c.count());
  }
  json.writeEndArray();
  json.writeEndObject();
 }
 json.writeEndArray();
 json.writeEndObject();
}

@Override
public void write(Kryo kryo, Output output, PercentileCounter counter) {
  int length = counter.getRegisters().byteSize();
  ByteBuffer buffer = ByteBuffer.allocate(length);
  counter.getRegisters().asSmallBytes(buffer);
  output.writeDouble(counter.getCompression());
  output.writeDouble(counter.getQuantileRatio());
  output.writeInt(buffer.position());
  output.write(buffer.array(), 0, buffer.position());
}

private void testPercentileSize(int sumNums, Integer sqrtNum, Integer compresion) throws Exception {
  compresion = compresion == null ? DEFAULT_COMPRESSION : compresion;
  PercentileAggregator aggregator = createPercentileAggreator(sumNums, sqrtNum, compresion);
  double actual = getActualSize(aggregator);
  double estimate = getEstimateSize((int) aggregator.getState().getRegisters().size(), 1, compresion);
  assertTrue(Math.abs(actual - estimate) / actual < 0.3);
  aggregator.reset();
}

@Override
public void add(List<? extends TDigest> others) {
  for (TDigest other : others) {
    setMinMax(Math.min(min, other.getMin()), Math.max(max, other.getMax()));
    for (Centroid centroid : other.centroids()) {
      add(centroid.mean(), centroid.count(), recordAllData ? centroid.data() : null);
    }
  }
}

/**
 * Creates a TDigest of whichever type is the currently recommended type.  MergingDigest is generally the best
 * known implementation right now.
 *
 * @param compression The compression parameter.  100 is a common value for normal uses.  1000 is extremely large.
 *                    The number of centroids retained will be a smallish (usually less than 10) multiple of this number.
 * @return the TDigest
 */
@SuppressWarnings({"unused", "WeakerAccess", "SameParameterValue"})
public static TDigest createDigest(double compression) {
  return createMergingDigest(compression);
}

other.compress();
size += other.centroidCount();
  System.arraycopy(md.weight, 0, w, offset, md.lastUsedCell);
  if (data != null) {
    for (Centroid centroid : other.centroids()) {
      data.add(centroid.data());
  for (Centroid centroid : other.centroids()) {
    m[offset] = centroid.mean();
    w[offset] = centroid.count();

Javadoc

Adaptive histogram based on something like streaming k-means crossed with Q-digest.

The special characteristics of this algorithm are:

a) smaller summaries than Q-digest

b) works on doubles as well as integers.

c) provides part per million accuracy for extreme quantiles and typically d) fast

e) simple

f) test coverage > 90%

g) easy to adapt for use with map-reduce

Most used methods

add
quantile
byteSize
centroids
size
asBytes
Serialize this TDigest into a byte buffer. Note that the serialization used is very straightforward
createMergingDigest
Creates an MergingDigest. This is generally the best known implementation right now.
asSmallBytes
cdf
compress
createAvlTreeDigest
getMax

Popular in Java

Making http requests using okhttp
putExtra (Intent)
getContentResolver (Context)
requestLocationUpdates (LocationManager)
BigDecimal (java.math)
An immutable arbitrary-precision signed decimal.A value is represented by an arbitrary-precision "un
Dictionary (java.util)
Note: Do not use this class since it is obsolete. Please use the Map interface for new implementatio
LinkedList (java.util)
Doubly-linked list implementation of the List and Dequeinterfaces. Implements all optional list oper
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
FileUtils (org.apache.commons.io)
General file manipulation utilities. Facilities are provided in the following areas: * writing to a
Font (java.awt)
The Font class represents fonts, which are used to render text in a visible way. A font provides the
Best IntelliJ plugins

How to useTDigest in com.tdunning.math.stats

Best Java code snippets using com.tdunning.math.stats.TDigest (Showing top 20 results out of 315)

How to use
TDigest
in
com.tdunning.math.stats