/** * A histogram structure that will record a sketch of a distribution. * * @param compression How should accuracy be traded for size? A value of N here will give quantile errors * almost always less than 3/N with considerably smaller errors expected for extreme * quantiles. Conversely, you should expect to track about 5 N centroids for this * accuracy. */ @SuppressWarnings("WeakerAccess") public AVLTreeDigest(double compression) { this.compression = compression; summary = new AVLGroupTree(false); }
@Override public TDigest recordAllData() { if (summary.size() != 0) { throw new IllegalStateException("Can only ask to record added data on an empty summary"); } summary = new AVLGroupTree(true); return super.recordAllData(); }
/** * A histogram structure that will record a sketch of a distribution. * * @param compression How should accuracy be traded for size? A value of N here will give quantile errors * almost always less than 3/N with considerably smaller errors expected for extreme * quantiles. Conversely, you should expect to track about 5 N centroids for this * accuracy. */ public AVLTreeDigest(double compression) { this.compression = compression; summary = new AVLGroupTree(false); }
@Override public TDigest recordAllData() { if (summary.size() != 0) { throw new IllegalStateException("Can only ask to record added data on an empty summary"); } summary = new AVLGroupTree(true); return super.recordAllData(); }
@Override public void compress() { if (summary.size() <= 1) { return; } AVLGroupTree centroids = summary; this.summary = new AVLGroupTree(recordAllData); final int[] nodes = new int[centroids.size()]; nodes[0] = centroids.first(); for (int i = 1; i < nodes.length; ++i) { nodes[i] = centroids.next(nodes[i-1]); assert nodes[i] != IntAVLTree.NIL; } assert centroids.next(nodes[nodes.length - 1]) == IntAVLTree.NIL; for (int i = centroids.size() - 1; i > 0; --i) { final int other = gen.nextInt(i + 1); final int tmp = nodes[other]; nodes[other] = nodes[i]; nodes[i] = tmp; } for (int node : nodes) { add(centroids.mean(node), centroids.count(node), centroids.data(node)); } }