@Override public void compress() { ArrayDigest reduced = new ArrayDigest(pageSize, compression); if (recordAllData) { reduced.recordAllData(); } List<Index> tmp = new ArrayList<Index>(); Iterator<Index> ix = this.iterator(0, 0); while (ix.hasNext()) { tmp.add(ix.next()); } Collections.shuffle(tmp, gen); for (Index index : tmp) { reduced.add(mean(index), count(index)); } data = reduced.data; centroidCount = reduced.centroidCount; }
void addRaw(double x, int w) { List<Double> tmp = new ArrayList<Double>(); tmp.add(x); addRaw(x, w, recordAllData ? tmp : null); }
/** * Returns an upper bound on the number of bytes that will be required to represent this histogram in * the tighter representation. */ @Override public int smallByteSize() { int bound = byteSize(); ByteBuffer buf = ByteBuffer.allocate(bound); asSmallBytes(buf); return buf.position(); }
@Override public void add(double x, int w) { checkValue(x); Index start = floor(x); if (start == null) { start = ceiling(x); addRaw(x, w); } else { Iterable<Index> neighbors = inclusiveTail(start); double minDistance = Double.MAX_VALUE; int lastNeighbor = 0; int i = 0; for (Index neighbor : neighbors) { double z = Math.abs(mean(neighbor) - x); if (z <= minDistance) { minDistance = z; long sum = headSum(start); i = 0; double n = 0; double z = Math.abs(mean(neighbor) - x); double q = (sum + count(neighbor) / 2.0) / totalWeight; double k = 4 * totalWeight * q * (1 - q) / compression; if (z == minDistance && count(neighbor) + w <= k) { n++; if (gen.nextDouble() < 1 / n) {
if (centroidCount() == 0) { return Double.NaN; } else if (centroidCount() == 1) { return data.get(0).centroids[0]; final double index = q * (size() - 1); if (firstPage == 0) { it = iterator(0, 0); } else { final int previousPageIndex = firstPage - 1; previousMean = previousPage.centroids[lastSubPage]; previousIndex = total - (previousPage.counts[lastSubPage] + 1.0) / 2; it = iterator(firstPage, 0); return quantile(previousIndex, index, nextIndex, previousMean, next.mean()); } else if (!it.hasNext()) { final double nextIndex2 = size() - 1; final double nextMean2 = (next.mean() * (nextIndex2 - previousIndex) - previousMean * (nextIndex2 - nextIndex)) / (nextIndex - previousIndex); return quantile(nextIndex, index, nextIndex2, next.mean(), nextMean2);
pageSize = buf.getInt(); ArrayDigest r = new ArrayDigest(pageSize, compression); int n = buf.getInt(); double[] means = new double[n]; r.add(means[i], buf.getInt()); pageSize = buf.getInt(); ArrayDigest r = new ArrayDigest(pageSize, compression); int n = buf.getInt(); double[] means = new double[n]; int z = decode(buf); r.add(means[i], z);
/** * Returns a cursor pointing to the first element <= x. Exposed only for testing. * @param x The value used to find the cursor. * @return The cursor. */ public Index floor(double x) { Iterator<Index> rx = allBefore(x); if (!rx.hasNext()) { return null; } Index r = rx.next(); Index z = r; while (rx.hasNext() && mean(z) == x) { r = z; z = rx.next(); } return r; }
/** * Creates an ArrayDigest with default page size. * * @param compression The compression parameter. 100 is a common value for normal uses. 1000 is extremely large. * The number of centroids retained will be a smallish (usually less than 10) multiple of this number. * @return the ArrayDigest */ public static ArrayDigest createArrayDigest(double compression) { return new ArrayDigest(32, compression); }
public Index ceiling(double x) { Iterator<Index> r = allAfter(x); return r.hasNext() ? r.next() : null; }
/** * Creates an ArrayDigest with specified page size. * * @param pageSize The internal page size to use. This should be about sqrt(10*compression) * @param compression The compression parameter. 100 is a common value for normal uses. 1000 is extremely large. * The number of centroids retained will be a smallish (usually less than 10) multiple of this number. * @return the ArrayDigest */ public static ArrayDigest createArrayDigest(int pageSize, double compression) { return new ArrayDigest(pageSize, compression); }
@Override void add(double x, int w, Centroid base) { addRaw(x, w, base.data()); }