final double[] counters = {0, 0, 0}; final long[] answers = {200, 100, 200}; DoublesPmfCdfImpl.bilinearTimeIncrementHistogramCounters(accessor, 100, splitPoints, counters); for (int j = 0; j < counters.length; j++) { assertEquals(counters[j], answers[j], 0.00001); final double[] counters = {0, 0, 0}; final long[] answers = {0, 0, 500}; DoublesPmfCdfImpl.bilinearTimeIncrementHistogramCounters(accessor, 100, splitPoints, counters); for (int j = 0; j < counters.length; j++) { assertEquals(counters[j], answers[j], 0.00001); final double[] counters = {0, 0, 0}; final long[] answers = {500, 0, 0}; DoublesPmfCdfImpl.bilinearTimeIncrementHistogramCounters(accessor, 100, splitPoints, counters); for (int j = 0; j < counters.length; j++) { assertEquals(counters[j], answers[j], 0.00001);
final double[] counters = {0, 0, 0}; final long[] answers = {200, 100, 200}; DoublesPmfCdfImpl.linearTimeIncrementHistogramCounters(accessor, 100, splitPoints, counters); for (int j = 0; j < counters.length; j++) { assertEquals(counters[j], answers[j], 0.00001); final double[] counters = {0, 0, 0}; final long[] answers = {0, 0, 500}; DoublesPmfCdfImpl.linearTimeIncrementHistogramCounters(accessor, 100, splitPoints, counters); for (int j = 0; j < counters.length; j++) { assertEquals(counters[j], answers[j], 0.00001); final double[] counters = {0, 0, 0}; final long[] answers = {500, 0, 0}; DoublesPmfCdfImpl.linearTimeIncrementHistogramCounters(accessor, 100, splitPoints, counters); for (int j = 0; j < counters.length; j++) { assertEquals(counters[j], answers[j], 0.00001);
/** * Returns an approximation to the Probability Mass Function (PMF) of the input stream * given a set of splitPoints (values). * * <p>The resulting approximations have a probabilistic guarantee that be obtained from the * getNormalizedRankError(true) function. * * <p>If the sketch is empty this returns null.</p> * * @param splitPoints an array of <i>m</i> unique, monotonically increasing double values * that divide the real number line into <i>m+1</i> consecutive disjoint intervals. * The definition of an "interval" is inclusive of the left splitPoint (or minimum value) and * exclusive of the right splitPoint, with the exception that the last interval will include * the maximum value. * It is not necessary to include either the min or max values in these splitpoints. * * @return an array of m+1 doubles each of which is an approximation * to the fraction of the input stream values (the mass) that fall into one of those intervals. * The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right * splitPoint, with the exception that the last interval will include maximum value. */ public double[] getPMF(final double[] splitPoints) { if (isEmpty()) { return null; } return DoublesPmfCdfImpl.getPMFOrCDF(this, splitPoints, false); }
if (numSplitPoints < 50) { // empirically determined crossover DoublesPmfCdfImpl.bilinearTimeIncrementHistogramCounters( sketchAccessor, weight, splitPoints, counters); } else { sketchAccessor.sort(); DoublesPmfCdfImpl.linearTimeIncrementHistogramCounters( sketchAccessor, weight, splitPoints, counters); DoublesPmfCdfImpl.linearTimeIncrementHistogramCounters( sketchAccessor, weight, splitPoints, counters);
static double[] getPMFOrCDF(final DoublesSketch sketch, final double[] splitPoints, final boolean isCDF) { final double[] buckets = internalBuildHistogram(sketch, splitPoints); final long n = sketch.getN(); if (isCDF) { double subtotal = 0; for (int j = 0; j < buckets.length; j++) { subtotal += buckets[j]; buckets[j] = subtotal / n; //normalize by n } } else { // PMF for (int j = 0; j < buckets.length; j++) { buckets[j] /= n; //normalize by n } } return buckets; }
if (numSplitPoints < 50) { // empirically determined crossover DoublesPmfCdfImpl.bilinearTimeIncrementHistogramCounters( sketchAccessor, weight, splitPoints, counters); } else { sketchAccessor.sort(); DoublesPmfCdfImpl.linearTimeIncrementHistogramCounters( sketchAccessor, weight, splitPoints, counters); DoublesPmfCdfImpl.linearTimeIncrementHistogramCounters( sketchAccessor, weight, splitPoints, counters);
static double[] getPMFOrCDF(final DoublesSketch sketch, final double[] splitPoints, final boolean isCDF) { final double[] buckets = internalBuildHistogram(sketch, splitPoints); final long n = sketch.getN(); if (isCDF) { double subtotal = 0; for (int j = 0; j < buckets.length; j++) { subtotal += buckets[j]; buckets[j] = subtotal / n; //normalize by n } } else { // PMF for (int j = 0; j < buckets.length; j++) { buckets[j] /= n; //normalize by n } } return buckets; }
/** * Returns an approximation to the Cumulative Distribution Function (CDF), which is the * cumulative analog of the PMF, of the input stream given a set of splitPoint (values). * * <p>The resulting approximations have a probabilistic guarantee that be obtained from the * getNormalizedRankError(false) function. * * <p>If the sketch is empty this returns null.</p> * * @param splitPoints an array of <i>m</i> unique, monotonically increasing double values * that divide the real number line into <i>m+1</i> consecutive disjoint intervals. * The definition of an "interval" is inclusive of the left splitPoint (or minimum value) and * exclusive of the right splitPoint, with the exception that the last interval will include * the maximum value. * It is not necessary to include either the min or max values in these splitpoints. * * @return an array of m+1 double values, which are a consecutive approximation to the CDF * of the input stream given the splitPoints. The value at array position j of the returned * CDF array is the sum of the returned values in positions 0 through j of the returned PMF * array. */ public double[] getCDF(final double[] splitPoints) { if (isEmpty()) { return null; } return DoublesPmfCdfImpl.getPMFOrCDF(this, splitPoints, true); }
/** * Returns an approximation to the Probability Mass Function (PMF) of the input stream * given a set of splitPoints (values). * * <p>The resulting approximations have a probabilistic guarantee that be obtained from the * getNormalizedRankError(true) function. * * <p>If the sketch is empty this returns null.</p> * * @param splitPoints an array of <i>m</i> unique, monotonically increasing double values * that divide the real number line into <i>m+1</i> consecutive disjoint intervals. * The definition of an "interval" is inclusive of the left splitPoint (or minimum value) and * exclusive of the right splitPoint, with the exception that the last interval will include * the maximum value. * It is not necessary to include either the min or max values in these splitpoints. * * @return an array of m+1 doubles each of which is an approximation * to the fraction of the input stream values (the mass) that fall into one of those intervals. * The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right * splitPoint, with the exception that the last interval will include maximum value. */ public double[] getPMF(final double[] splitPoints) { if (isEmpty()) { return null; } return DoublesPmfCdfImpl.getPMFOrCDF(this, splitPoints, false); }
/** * Returns an approximation to the Cumulative Distribution Function (CDF), which is the * cumulative analog of the PMF, of the input stream given a set of splitPoint (values). * * <p>The resulting approximations have a probabilistic guarantee that be obtained from the * getNormalizedRankError(false) function. * * <p>If the sketch is empty this returns null.</p> * * @param splitPoints an array of <i>m</i> unique, monotonically increasing double values * that divide the real number line into <i>m+1</i> consecutive disjoint intervals. * The definition of an "interval" is inclusive of the left splitPoint (or minimum value) and * exclusive of the right splitPoint, with the exception that the last interval will include * the maximum value. * It is not necessary to include either the min or max values in these splitpoints. * * @return an array of m+1 double values, which are a consecutive approximation to the CDF * of the input stream given the splitPoints. The value at array position j of the returned * CDF array is the sum of the returned values in positions 0 through j of the returned PMF * array. */ public double[] getCDF(final double[] splitPoints) { if (isEmpty()) { return null; } return DoublesPmfCdfImpl.getPMFOrCDF(this, splitPoints, true); }