else { if (aux == null) { aux = new DoublesAuxiliary(this); quantiles[i] = aux.getQuantile(fRank);
/** * Constructs the Auxiliary structure from the DoublesSketch * @param qs a DoublesSketch */ DoublesAuxiliary(final DoublesSketch qs ) { final int k = qs.getK(); final long n = qs.getN(); final long bitPattern = qs.getBitPattern(); final int numSamples = qs.getRetainedItems(); final DoublesSketchAccessor sketchAccessor = DoublesSketchAccessor.wrap(qs); final double[] itemsArr = new double[numSamples]; final long[] cumWtsArr = new long[numSamples + 1]; // the extra slot is very important // Populate from DoublesSketch: // copy over the "levels" and then the base buffer, all with appropriate weights populateFromDoublesSketch(k, n, bitPattern, sketchAccessor, itemsArr, cumWtsArr); // Sort the first "numSamples" slots of the two arrays in tandem, // taking advantage of the already sorted blocks of length k blockyTandemMergeSort(itemsArr, cumWtsArr, numSamples, k); final long total = QuantilesHelper.convertToPrecedingCummulative(cumWtsArr); assert total == n; auxN_ = n; auxSamplesArr_ = itemsArr; auxCumWtsArr_ = cumWtsArr; }
/** * Get the estimated quantile given a fractional rank. * @param fRank the fractional rank where: 0 ≤ fRank ≤ 1.0. * @return the estimated quantile */ double getQuantile(final double fRank) { checkFractionalRankBounds(fRank); final long pos = QuantilesHelper.posOfPhi(fRank, auxN_); return approximatelyAnswerPositionalQuery(pos); }
blockyTandemMergeSortRecursion(keyDst, valDst, keySrc, valSrc, grpStart1, grpLen1, blkSize, arrLim); blockyTandemMergeSortRecursion(keyDst, valDst, keySrc, valSrc, grpStart2, grpLen2, blkSize, arrLim); tandemMerge(keySrc, valSrc, arrStart1, arrLen1, arrStart2, arrLen2,
final DoublesAuxiliary p = new DoublesAuxiliary(sketch1); final DoublesAuxiliary q = new DoublesAuxiliary(sketch2);
/** * * @param numTries number of tries * @param maxArrLen maximum length of array size */ private static void testBlockyTandemMergeSort(int numTries, int maxArrLen) { int arrLen = 0; double[] arr = null; for (arrLen = 0; arrLen <= maxArrLen; arrLen++) { for (int blkSize = 1; blkSize <= (arrLen + 100); blkSize++) { for (int tryno = 1; tryno <= numTries; tryno++) { arr = makeMergeTestInput(arrLen, blkSize); long [] brr = makeTheTandemArray(arr); assertMergeTestPrecondition(arr, brr, arrLen, blkSize); DoublesAuxiliary.blockyTandemMergeSort(arr, brr, arrLen, blkSize); /* verify sorted order */ for (int i = 0; i < (arrLen-1); i++) { assert arr[i] <= arr[i+1]; } assertMergeTestPostcondition(arr, brr, arrLen); } } } //System.out.printf ("Passed: testBlockyTandemMergeSort%n"); }
/** * blockyTandemMergeSort() is an implementation of top-down merge sort specialized * for the case where the input contains successive equal-length blocks * that have already been sorted, so that only the top part of the * merge tree remains to be executed. Also, two arrays are sorted in tandem, * as discussed below. * @param keyArr array of keys * @param valArr array of values * @param arrLen length of keyArr and valArr * @param blkSize size of internal sorted blocks */ //used by DoublesAuxiliary and UtilTest static void blockyTandemMergeSort(final double[] keyArr, final long[] valArr, final int arrLen, final int blkSize) { assert blkSize >= 1; if (arrLen <= blkSize) { return; } int numblks = arrLen / blkSize; if ((numblks * blkSize) < arrLen) { numblks += 1; } assert ((numblks * blkSize) >= arrLen); // duplicate the input is preparation for the "ping-pong" copy reduction strategy. final double[] keyTmp = Arrays.copyOf(keyArr, arrLen); final long[] valTmp = Arrays.copyOf(valArr, arrLen); blockyTandemMergeSortRecursion(keyTmp, valTmp, keyArr, valArr, 0, numblks, blkSize, arrLen); }
@Test public void checkConstructAuxiliary() { for (int k = 2; k <= 32; k *= 2) { HeapUpdateDoublesSketch qs = HeapUpdateDoublesSketch.newInstance(k); for (int numItemsSoFar = 0; numItemsSoFar < 1000; numItemsSoFar++) { DoublesAuxiliary aux = new DoublesAuxiliary(qs); int numSamples = qs.getRetainedItems(); double[] auxItems = aux.auxSamplesArr_; long[] auxAccum = aux.auxCumWtsArr_; assertTrue(qs.getN() == aux.auxN_); assertTrue(numItemsSoFar == aux.auxN_); assertTrue(auxItems.length == numSamples); assertTrue(auxAccum.length == (numSamples + 1)); double mqSumOfSamples = sumOfSamplesInSketch(qs); double auSumOfSamples = sumOfDoublesInSubArray(auxItems, 0, numSamples); // the following test might be able to detect errors in handling the samples // e.g. accidentally dropping or duplicating a sample assertTrue(Math.floor(0.5 + mqSumOfSamples) == Math.floor(0.5 + auSumOfSamples)); // the following test might be able to detect errors in handling the sample weights assertTrue(auxAccum[numSamples] == numItemsSoFar); for (int i = 0; i < (numSamples-1); i++) { assertTrue(auxItems[i] <= auxItems[i+1]); // assert sorted order assertTrue(auxAccum[i] < auxAccum[i+1]); // assert cumulative property } // This is a better test when the items are inserted in reverse order // as follows, but the negation seems kind of awkward. qs.update (-1.0 * (numItemsSoFar + 1) ); } // end of loop over test stream } // end of loop over values of k }
blockyTandemMergeSortRecursion(keyDst, valDst, keySrc, valSrc, grpStart1, grpLen1, blkSize, arrLim); blockyTandemMergeSortRecursion(keyDst, valDst, keySrc, valSrc, grpStart2, grpLen2, blkSize, arrLim); tandemMerge(keySrc, valSrc, arrStart1, arrLen1, arrStart2, arrLen2,
/** * blockyTandemMergeSort() is an implementation of top-down merge sort specialized * for the case where the input contains successive equal-length blocks * that have already been sorted, so that only the top part of the * merge tree remains to be executed. Also, two arrays are sorted in tandem, * as discussed below. * @param keyArr array of keys * @param valArr array of values * @param arrLen length of keyArr and valArr * @param blkSize size of internal sorted blocks */ //used by DoublesAuxiliary and UtilTest static void blockyTandemMergeSort(final double[] keyArr, final long[] valArr, final int arrLen, final int blkSize) { assert blkSize >= 1; if (arrLen <= blkSize) { return; } int numblks = arrLen / blkSize; if (numblks * blkSize < arrLen) { numblks += 1; } assert (numblks * blkSize >= arrLen); // duplicate the input is preparation for the "ping-pong" copy reduction strategy. final double[] keyTmp = Arrays.copyOf(keyArr, arrLen); final long[] valTmp = Arrays.copyOf(valArr, arrLen); blockyTandemMergeSortRecursion(keyTmp, valTmp, keyArr, valArr, 0, numblks, blkSize, arrLen); }
/** * This returns an approximation to the value of the data item * that would be preceded by the given fraction of a hypothetical sorted * version of the input stream so far. * * <p>We note that this method has a fairly large overhead (microseconds instead of nanoseconds) * so it should not be called multiple times to get different quantiles from the same * sketch. Instead use getQuantiles(), which pays the overhead only once. * * <p>If the sketch is empty this returns Double.NaN. * * @param fraction the specified fractional position in the hypothetical sorted stream. * These are also called normalized ranks or fractional ranks. * If fraction = 0.0, the true minimum value of the stream is returned. * If fraction = 1.0, the true maximum value of the stream is returned. * * @return the approximation to the value at the above fraction */ public double getQuantile(final double fraction) { if (isEmpty()) { return Double.NaN; } if ((fraction < 0.0) || (fraction > 1.0)) { throw new SketchesArgumentException("Fraction cannot be less than zero or greater than 1.0"); } if (fraction == 0.0) { return getMinValue(); } else if (fraction == 1.0) { return getMaxValue(); } else { final DoublesAuxiliary aux = new DoublesAuxiliary(this); return aux.getQuantile(fraction); } }
/** * Constructs the Auxiliary structure from the DoublesSketch * @param qs a DoublesSketch */ DoublesAuxiliary(final DoublesSketch qs ) { final int k = qs.getK(); final long n = qs.getN(); final long bitPattern = qs.getBitPattern(); final int numSamples = qs.getRetainedItems(); final DoublesSketchAccessor sketchAccessor = DoublesSketchAccessor.wrap(qs); final double[] itemsArr = new double[numSamples]; final long[] cumWtsArr = new long[numSamples + 1]; // the extra slot is very important // Populate from DoublesSketch: // copy over the "levels" and then the base buffer, all with appropriate weights populateFromDoublesSketch(k, n, bitPattern, sketchAccessor, itemsArr, cumWtsArr); // Sort the first "numSamples" slots of the two arrays in tandem, // taking advantage of the already sorted blocks of length k blockyTandemMergeSort(itemsArr, cumWtsArr, numSamples, k); final long total = QuantilesHelper.convertToPrecedingCummulative(cumWtsArr); assert total == n; auxN_ = n; auxSamplesArr_ = itemsArr; auxCumWtsArr_ = cumWtsArr; }
final DoublesAuxiliary p = new DoublesAuxiliary(sketch1); final DoublesAuxiliary q = new DoublesAuxiliary(sketch2);
/** * Get the estimated value given phi * @param phi the fractional position where: 0 ≤ φ ≤ 1.0. * @return the estimated value given phi */ double getQuantile(final double phi) { assert 0.0 <= phi; assert phi <= 1.0; final long n = this.auxN_; if (n <= 0) { return Double.NaN; } final long pos = QuantilesHelper.posOfPhi(phi, n); return approximatelyAnswerPositionalQuery(pos); }
/** * This returns an approximation to the value of the data item * that would be preceded by the given fraction of a hypothetical sorted * version of the input stream so far. * * <p>We note that this method has a fairly large overhead (microseconds instead of nanoseconds) * so it should not be called multiple times to get different quantiles from the same * sketch. Instead use getQuantiles(), which pays the overhead only once. * * <p>If the sketch is empty this returns Double.NaN. * * @param fraction the specified fractional position in the hypothetical sorted stream. * These are also called normalized ranks or fractional ranks. * If fraction = 0.0, the true minimum value of the stream is returned. * If fraction = 1.0, the true maximum value of the stream is returned. * * @return the approximation to the value at the above fraction */ public double getQuantile(final double fraction) { if (isEmpty()) { return Double.NaN; } if ((fraction < 0.0) || (fraction > 1.0)) { throw new SketchesArgumentException("Fraction cannot be less than zero or greater than 1.0"); } if (fraction == 0.0) { return getMinValue(); } else if (fraction == 1.0) { return getMaxValue(); } else { final DoublesAuxiliary aux = new DoublesAuxiliary(this); return aux.getQuantile(fraction); } }
else { if (aux == null) { aux = new DoublesAuxiliary(this); answers[i] = aux.getQuantile(fraction);