/** * Creates an {@link MergingDigest}. This is generally the best known implementation right now. * * @param compression The compression parameter. 100 is a common value for normal uses. 1000 is extremely large. * The number of centroids retained will be a smallish (usually less than 10) multiple of this number. * @return the MergingDigest */ @SuppressWarnings("WeakerAccess") public static TDigest createMergingDigest(double compression) { return new MergingDigest(compression); }
@Override public void add(double x, int w) { add(x, w, (List<Double>) null); }
@Override public TDigest deserialize(ByteBuffer byteBuffer) { return MergingDigest.fromBytes(byteBuffer); } };
@Setup public void setup() { data = new double[10000000]; for (int i = 0; i < data.length; i++) { data[i] = gen.nextDouble(); } td = new MergingDigest(compression, (factor + 1) * compression, compression); td.setScaleFunction(ScaleFunction.valueOf(scaleFunction)); // First values are very cheap to add, we are more interested in the steady state, // when the summary is full. Summaries are expected to contain about 0.6*compression // centroids, hence the 5 * compression * (factor+1) for (int i = 0; i < 5 * compression * (factor + 1); ++i) { td.add(gen.nextDouble()); } }
double compression = buf.getDouble(); int n = buf.getInt(); MergingDigest r = new MergingDigest(compression); r.setMinMax(min, max); r.lastUsedCell = n; for (int i = 0; i < n; i++) { int n = buf.getShort(); int bufferSize = buf.getShort(); MergingDigest r = new MergingDigest(compression, bufferSize, n); r.setMinMax(min, max); r.lastUsedCell = buf.getShort(); for (int i = 0; i < r.lastUsedCell; i++) {
throw new IllegalArgumentException("q should be in [0,1], got " + q); mergeNewValues(); return weightedAverage(mean[i], z2, mean[i + 1], z1); return weightedAverage(mean[n - 1], z1, max, z2);
@Override public int centroidCount() { mergeNewValues(); return lastUsedCell; }
@Override public int byteSize() { compress(); // format code, compression(float), buffer-size(int), temp-size(int), #centroids-1(int), // then two doubles per centroid return lastUsedCell * 16 + 32; }
private void mergeNewValues(boolean force, double compression) { if (totalWeight == 0 && unmergedWeight == 0) { // seriously nothing to do return; } if (force || unmergedWeight > 0) { // note that we run the merge in reverse every other merge to avoid left-to-right bias in merging merge(tempMean, tempWeight, tempUsed, tempData, order, unmergedWeight, useAlternatingSort & mergeCount % 2 == 1, compression); mergeCount++; tempUsed = 0; unmergedWeight = 0; if (data != null) { tempData = new ArrayList<>(); } } }
/** * Exposed for testing. */ int checkWeights() { return checkWeights(weight, totalWeight, lastUsedCell); }
@Override public String toString() { return "MergingDigest" + "-" + getScaleFunction() + "-" + (useWeightLimit ? "weight" : "kSize") + "-" + (useAlternatingSort ? "alternating" : "stable") + "-" + (useTwoLevelCompression ? "twoLevel" : "oneLevel"); } }
/** * Merges any pending inputs and compresses the data down to the public setting. * Note that this typically loses a bit of precision and thus isn't a thing to * be doing all the time. It is best done only when we want to show results to * the outside world. */ @Override public void compress() { mergeNewValues(true, publicCompression); }
@Override public int smallByteSize() { compress(); // format code(int), compression(float), buffer-size(short), temp-size(short), #centroids-1(short), // then two floats per centroid return lastUsedCell * 8 + 30; }
private void add(double[] m, double[] w, int count, List<List<Double>> data) { if (m.length != w.length) { throw new IllegalArgumentException("Arrays not same length"); } if (m.length < count + lastUsedCell) { // make room to add existing centroids double[] m1 = new double[count + lastUsedCell]; System.arraycopy(m, 0, m1, 0, count); m = m1; double[] w1 = new double[count + lastUsedCell]; System.arraycopy(w, 0, w1, 0, count); w = w1; } double total = 0; for (int i = 0; i < count; i++) { total += w[i]; } merge(m, w, count, data, null, total, false, compression); }
@Override void add(double x, int w, Centroid base) { add(x, w, base.data()); }
@Override public TDigest deserialize(byte[] bytes) { return MergingDigest.fromBytes(ByteBuffer.wrap(bytes)); }
@Override TDigest create(double compression) { return new MergingDigest(compression, (int) (10 * compression)); }
private void mergeNewValues() { mergeNewValues(false, compression); }
@Override public Collection<Centroid> centroids() { compress(); return new AbstractCollection<Centroid>() { @Override
@Benchmark @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.MICROSECONDS) public void add(ThreadState state) { if (state.index >= data.length) { state.index = 0; } td.add(data[state.index++]); }