com.tdunning.math.stats.TDigest.add java code examples

public void merge(PercentileCounter counter) {
  assert this.compression == counter.compression;
  registers.add(counter.registers);
}

public void add(double v) {
  registers.add(v);
}

@Nonnull
@Override
public TDigest merge(@Nonnull TDigest intermediateResult1, @Nonnull TDigest intermediateResult2) {
 intermediateResult1.add(intermediateResult2);
 return intermediateResult1;
}

@Override
public TDigest applyRawValue(TDigest value, Object rawValue) {
 if (rawValue instanceof byte[]) {
  value.add(deserializeAggregatedValue((byte[]) rawValue));
 } else {
  value.add(((Number) rawValue).doubleValue());
 }
 _maxByteSize = Math.max(_maxByteSize, value.byteSize());
 return value;
}

@Override
public TDigest applyAggregatedValue(TDigest value, TDigest aggregatedValue) {
 value.add(aggregatedValue);
 _maxByteSize = Math.max(_maxByteSize, value.byteSize());
 return value;
}

@org.openjdk.jmh.annotations.Benchmark
public void add(ThreadState state) {
  if (state.index >= data.length) {
    state.index = 0;
  }
  td.add(data[state.index++]);
}

@Benchmark
public void timeAdd(MergeBench.ThreadState state) {
  if (state.index >= data.length) {
    state.index = 0;
  }
  tdigest.add(data[state.index++]);
}

@Override
public void aggregateGroupBySV(int length, @Nonnull int[] groupKeyArray,
  @Nonnull GroupByResultHolder groupByResultHolder, @Nonnull BlockValSet... blockValSets) {
 double[][] valuesArray = blockValSets[0].getDoubleValuesMV();
 for (int i = 0; i < length; i++) {
  TDigest tDigest = getTDigest(groupByResultHolder, groupKeyArray[i]);
  for (double value : valuesArray[i]) {
   tDigest.add(value);
  }
 }
}

@Override
public void aggregate(int length, @Nonnull AggregationResultHolder aggregationResultHolder,
  @Nonnull BlockValSet... blockValSets) {
 double[][] valuesArray = blockValSets[0].getDoubleValuesMV();
 TDigest tDigest = getTDigest(aggregationResultHolder);
 for (int i = 0; i < length; i++) {
  for (double value : valuesArray[i]) {
   tDigest.add(value);
  }
 }
}

 @Override
 public void aggregateGroupByMV(int length, @Nonnull int[][] groupKeysArray,
   @Nonnull GroupByResultHolder groupByResultHolder, @Nonnull BlockValSet... blockValSets) {
  double[][] valuesArray = blockValSets[0].getDoubleValuesMV();
  for (int i = 0; i < length; i++) {
   double[] values = valuesArray[i];
   for (int groupKey : groupKeysArray[i]) {
    TDigest tDigest = getTDigest(groupByResultHolder, groupKey);
    for (double value : values) {
     tDigest.add(value);
    }
   }
  }
 }
}

@Override
public TDigest getInitialAggregatedValue(Object rawValue) {
 TDigest initialValue;
 if (rawValue instanceof byte[]) {
  byte[] bytes = (byte[]) rawValue;
  initialValue = deserializeAggregatedValue(bytes);
  _maxByteSize = Math.max(_maxByteSize, bytes.length);
 } else {
  initialValue = TDigest.createMergingDigest(PercentileTDigestAggregationFunction.DEFAULT_TDIGEST_COMPRESSION);
  initialValue.add(((Number) rawValue).doubleValue());
  _maxByteSize = Math.max(_maxByteSize, initialValue.byteSize());
 }
 return initialValue;
}

@Setup
public void setup() {
  data = new double[10000000];
  for (int i = 0; i < data.length; i++) {
    data[i] = gen.nextDouble();
  }
  if (method.equals("tree")) {
    td = new AVLTreeDigest(compression);
  } else {
    td = new MergingDigest(500);
  }
  // First values are very cheap to add, we are more interested in the steady state,
  // when the summary is full. Summaries are expected to contain about 5*compression
  // centroids, hence the 5 factor
  for (int i = 0; i < 5 * compression; ++i) {
    td.add(gen.nextDouble());
  }
}

@Override
public void aggregate(int length, @Nonnull AggregationResultHolder aggregationResultHolder,
  @Nonnull BlockValSet... blockValSets) {
 TDigest tDigest = getTDigest(aggregationResultHolder);
 FieldSpec.DataType valueType = blockValSets[0].getValueType();
 switch (valueType) {
  case INT:
  case LONG:
  case FLOAT:
  case DOUBLE:
   double[] valueArray = blockValSets[0].getDoubleValuesSV();
   for (int i = 0; i < length; i++) {
    tDigest.add(valueArray[i]);
   }
   break;
  case BYTES:
   // Serialized TDigest
   byte[][] bytesValues = blockValSets[0].getBytesValuesSV();
   for (int i = 0; i < length; i++) {
    tDigest.add(ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(ByteBuffer.wrap(bytesValues[i])));
   }
   break;
  default:
   throw new IllegalStateException("Illegal data type for PERCENTILE_TDIGEST aggregation function: " + valueType);
 }
}

@Override
Object getRandomRawValue(Random random) {
 TDigest tDigest = TDigest.createMergingDigest(COMPRESSION);
 tDigest.add(random.nextLong());
 tDigest.add(random.nextLong());
 return ObjectSerDeUtils.TDIGEST_SER_DE.serialize(tDigest);
}

@Override
public void aggregateGroupBySV(int length, @Nonnull int[] groupKeyArray,
  @Nonnull GroupByResultHolder groupByResultHolder, @Nonnull BlockValSet... blockValSets) {
 FieldSpec.DataType valueType = blockValSets[0].getValueType();
 switch (valueType) {
  case INT:
  case LONG:
  case FLOAT:
  case DOUBLE:
   double[] valueArray = blockValSets[0].getDoubleValuesSV();
   for (int i = 0; i < length; i++) {
    TDigest tDigest = getTDigest(groupByResultHolder, groupKeyArray[i]);
    tDigest.add(valueArray[i]);
   }
   break;
  case BYTES:
   // Serialized TDigest
   byte[][] bytesValues = blockValSets[0].getBytesValuesSV();
   for (int i = 0; i < length; i++) {
    TDigest tDigest = getTDigest(groupByResultHolder, groupKeyArray[i]);
    tDigest.add(ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(ByteBuffer.wrap(bytesValues[i])));
   }
   break;
  default:
   throw new IllegalStateException("Illegal data type for PERCENTILE_TDIGEST aggregation function: " + valueType);
 }
}

for (int groupKey : groupKeysArray[i]) {
 TDigest tDigest = getTDigest(groupByResultHolder, groupKey);
 tDigest.add(value);
for (int groupKey : groupKeysArray[i]) {
 TDigest tDigest = getTDigest(groupByResultHolder, groupKey);
 tDigest.add(value);

@Setup
public void setUp() {
  random = ThreadLocalRandom.current();
  tdigest = tdigestFactory.create(compression);
  distribution = distributionFactory.create(random);
  // first values are cheap to add, so pre-fill the t-digest to have more realistic results
  for (int i = 0; i < 10000; ++i) {
    tdigest.add(distribution.nextDouble());
  }
  for (int i = 0; i < data.length; ++i) {
    data[i] = distribution.nextDouble();
  }
}

 @Test
 public void testTDigest() {
  for (int i = 0; i < NUM_ITERATIONS; i++) {
   TDigest expected = TDigest.createMergingDigest(PercentileTDigestAggregationFunction.DEFAULT_TDIGEST_COMPRESSION);
   int size = RANDOM.nextInt(100) + 1;
   for (int j = 0; j < size; j++) {
    expected.add(RANDOM.nextDouble());
   }

   byte[] bytes = ObjectSerDeUtils.serialize(expected);
   TDigest actual = ObjectSerDeUtils.deserialize(bytes, ObjectSerDeUtils.ObjectType.TDigest);

   for (int j = 0; j <= 100; j++) {
    assertEquals(actual.quantile(j / 100.0), expected.quantile(j / 100.0), 1e-5);
   }
  }
 }
}

  @Test
  public void testBasic() {
    int times = 1;
    int compression = 100;
    for (int t = 0; t < times; t++) {
      TDigest tDigest = TDigest.createAvlTreeDigest(compression);
      Random random = new Random();
      int dataSize = 10000;
      List<Double> dataset = Lists.newArrayListWithCapacity(dataSize);
      for (int i = 0; i < dataSize; i++) {
        double d = random.nextDouble();
        tDigest.add(d);
        dataset.add(d);
      }
      Collections.sort(dataset);

      double actualResult = tDigest.quantile(0.5);
      double expectedResult = MathUtil.findMedianInSortedList(dataset);
      assertEquals(expectedResult, actualResult, 0.01);
    }
  }
}

@Test
public void testTDigest() {
  double compression = 100;
  double quantile = 0.5;
  PercentileCounter counter = new PercentileCounter(compression, quantile);
  TDigest tDigest = TDigest.createAvlTreeDigest(compression);
  Random random = new Random();
  int dataSize = 10000;
  List<Double> dataset = Lists.newArrayListWithCapacity(dataSize);
  for (int i = 0; i < dataSize; i++) {
    double d = random.nextDouble();
    counter.add(d);
    tDigest.add(d);
  }
  double actualResult = counter.getResultEstimate();
  Collections.sort(dataset);
  double expectedResult = tDigest.quantile(quantile);
  assertEquals(expectedResult, actualResult, 0);
}

Javadoc

Add a sample to this TDigest.

Popular methods of TDigest

quantile
byteSize
centroids
size
asBytes
Serialize this TDigest into a byte buffer. Note that the serialization used is very straightforward
createMergingDigest
Creates an MergingDigest. This is generally the best known implementation right now.
asSmallBytes
cdf
compress
createAvlTreeDigest
getMax
getMin

Popular in Java

Making http requests using okhttp
putExtra (Intent)
getContentResolver (Context)
requestLocationUpdates (LocationManager)
BigDecimal (java.math)
An immutable arbitrary-precision signed decimal.A value is represented by an arbitrary-precision "un
Dictionary (java.util)
Note: Do not use this class since it is obsolete. Please use the Map interface for new implementatio
LinkedList (java.util)
Doubly-linked list implementation of the List and Dequeinterfaces. Implements all optional list oper
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
FileUtils (org.apache.commons.io)
General file manipulation utilities. Facilities are provided in the following areas: * writing to a
Font (java.awt)
The Font class represents fonts, which are used to render text in a visible way. A font provides the
Best IntelliJ plugins

How to use addmethodin com.tdunning.math.stats.TDigest

Best Java code snippets using com.tdunning.math.stats.TDigest.add (Showing top 20 results out of 315)

How to use
add
method
in
com.tdunning.math.stats.TDigest