/** * {@inheritDoc} */ @Override public Percentile copy() { return new Percentile(this); }
/** * Returns an estimate of the <code>p</code>th percentile of the values * in the <code>values</code> array. * <p> * <ul> * <li>Returns <code>Double.NaN</code> if <code>values</code> has length * <code>0</code></li></p> * <li>Returns (for any value of <code>p</code>) <code>values[0]</code> * if <code>values</code> has length <code>1</code></li> * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> * is null or p is not a valid quantile value (p must be greater than 0 * and less than or equal to 100)</li> * </ul></p> * <p> * See {@link org.apache.commons.math3.stat.descriptive.rank.Percentile} for * a description of the percentile estimation algorithm used.</p> * * @param values input array of values * @param p the percentile value to compute * @return the percentile value or Double.NaN if the array is empty * @throws MathIllegalArgumentException if <code>values</code> is null * or p is invalid */ public static double percentile(final double[] values, final double p) throws MathIllegalArgumentException { return PERCENTILE.evaluate(values,p); }
/** * Copy constructor, creates a new {@code Percentile} identical * to the {@code original} * * @param original the {@code Percentile} instance to copy * @throws NullArgumentException if original is null */ public Percentile(final Percentile original) throws NullArgumentException { MathUtils.checkNotNull(original); estimationType = original.getEstimationType(); nanStrategy = original.getNaNStrategy(); kthSelector = original.getKthSelector(); setData(original.getDataRef()); if (original.cachedPivots != null) { System.arraycopy(original.cachedPivots, 0, cachedPivots, 0, original.cachedPivots.length); } setQuantile(original.quantile); }
/** * Returns the result of evaluating the statistic over the stored data. * <p> * The stored array is the one which was set by previous calls to * {@link #setData(double[])} * </p> * @param p the percentile value to compute * @return the value of the statistic applied to the stored data * @throws MathIllegalArgumentException if p is not a valid quantile value * (p must be greater than 0 and less than or equal to 100) */ public double evaluate(final double p) throws MathIllegalArgumentException { return evaluate(getDataRef(), p); }
private Double _calculateNthPercentile(Collection<Double> values, Double percentileValue) { return new Percentile().evaluate(Doubles.toArray(values), percentileValue); }
@Override public double doublePercentile(int percentile){ if(this.size == 0){ throw new IllegalStateException(); } double[] data = new double[this.size]; System.arraycopy(this.values, 0, data, 0, data.length); Arrays.sort(data); Percentile statistic = new Percentile(); statistic.setData(data); return statistic.evaluate(percentile); } }
@Override public void consume(List<Datum> records) { List<DatumWithNorm> toClassify = new ArrayList<>(); double[] scores = new double[records.size()]; for(int i = 0; i < records.size(); i++) { Datum d = records.get(i); DatumWithNorm dwn = new DatumWithNorm(d); toClassify.add(dwn); scores[i] = dwn.getNorm(); } Percentile pCalc = new Percentile().withNaNStrategy(NaNStrategy.MAXIMAL); pCalc.setData(scores); double cutoff = pCalc.evaluate(scores, targetPercentile * 100); log.debug("{} Percentile Cutoff: {}", targetPercentile, cutoff); log.debug("Median: {}", pCalc.evaluate(50)); log.debug("Max: {}", pCalc.evaluate(100)); for(DatumWithNorm dwn : toClassify) { results.add(new OutlierClassificationResult(dwn.getDatum(), dwn.getNorm() >= cutoff || dwn.getNorm().isInfinite())); } }
@Override public double getValue() { return new org.apache.commons.math3.stat.descriptive.rank.Percentile(nth * 100) .withEstimationType(org.apache.commons.math3.stat.descriptive.rank.Percentile.EstimationType.R_7) .withNaNStrategy(NaNStrategy.FIXED) .evaluate(values, 0, n); }
_percentile.setData(historyMetricValues.doubleArray()); double upperPercentileMetricValue = _percentile.evaluate(_anomalyUpperPercentile); if (upperPercentileMetricValue <= SIGNIFICANT_METRIC_VALUE_THRESHOLD) { return null; double lowerThreshold = _percentile.evaluate(_anomalyLowerPercentile) * _anomalyLowerMargin; double currentMetricValue = current.metricValues().valuesFor(metricId).latest();
@Override protected int calculateNumericResult(DescriptiveStatistics ds) { ds.setPercentileImpl(new Percentile().withEstimationType(Percentile.EstimationType.R_3)); return actualResult = (int) ds.getPercentile((double) percentile); }
if (values == getDataRef()) { work = getDataRef(); } else { switch (nanStrategy) { case MAXIMAL:// Replace NaNs with +INFs work = replaceAndSlice(values, begin, length, Double.NaN, Double.POSITIVE_INFINITY); break; case MINIMAL:// Replace NaNs with -INFs work = replaceAndSlice(values, begin, length, Double.NaN, Double.NEGATIVE_INFINITY); break; case REMOVED:// Drop NaNs from data work = removeAndSlice(values, begin, length, Double.NaN); break; case FAILED:// just throw exception as NaN is un-acceptable work = copyOf(values, begin, length); MathArrays.checkNotNaN(work); break; default: //FIXED work = copyOf(values,begin,length); break;
/** * Replace every occurrence of a given value with a replacement value in a * copied slice of array defined by array part from [begin, begin+length). * @param values the input array * @param begin start index of the array to include * @param length number of elements to include from begin * @param original the value to be replaced with * @param replacement the value to be used for replacement * @return the copy of sliced array with replaced values */ private static double[] replaceAndSlice(final double[] values, final int begin, final int length, final double original, final double replacement) { final double[] temp = copyOf(values, begin, length); for(int i = 0; i < length; i++) { temp[i] = Precision.equalsIncludingNaN(original, temp[i]) ? replacement : temp[i]; } return temp; }
/** * Get pivots which is either cached or a newly created one * * @param values array containing the input numbers * @return cached pivots or a newly created one */ private int[] getPivots(final double[] values) { final int[] pivotsHeap; if (values == getDataRef()) { pivotsHeap = cachedPivots; } else { pivotsHeap = new int[PIVOTS_HEAP_LENGTH]; Arrays.fill(pivotsHeap, -1); } return pivotsHeap; }
private Double _calculateNthPercentile(Collection<Double> values, Double percentileValue) { return new Percentile().evaluate(Doubles.toArray(values), percentileValue); }
/** * @return an array of values to split the numeric feature's values on when * building candidate splits. When input size is <= MAX_NUMERIC_SPLITS + 1, it will * return the averages between success values as split points. When larger, it will * return MAX_NUMERIC_SPLITS approximate percentiles through the data. */ private static double[] chooseNumericSplitPoints(double[] values) { if (values.length <= 1) { return values; } if (values.length <= MAX_NUMERIC_SPLITS + 1) { double[] splitPoints = new double[values.length - 1]; for (int i = 1; i < values.length; i++) { splitPoints[i-1] = (values[i] + values[i-1]) / 2.0; } return splitPoints; } Percentile distribution = new Percentile(); distribution.setData(values); double[] percentiles = new double[MAX_NUMERIC_SPLITS]; for (int i = 0 ; i < percentiles.length; i++) { double p = 100.0 * ((i + 1.0) / (MAX_NUMERIC_SPLITS + 1.0)); percentiles[i] = distribution.evaluate(p); } return percentiles; }
_percentile.setData(historyMetricValues.doubleArray()); double upperPercentileMetricValue = _percentile.evaluate(_anomalyUpperPercentile); if (upperPercentileMetricValue <= SIGNIFICANT_METRIC_VALUE_THRESHOLD) { return null; double lowerThreshold = _percentile.evaluate(_anomalyLowerPercentile) * _anomalyLowerMargin; double currentMetricValue = current.metricValues().valuesFor(metricId).latest();
/** * Returns the result of evaluating the statistic over the stored data. * <p> * The stored array is the one which was set by previous calls to * {@link #setData(double[])} * </p> * @param p the percentile value to compute * @return the value of the statistic applied to the stored data * @throws MathIllegalArgumentException if p is not a valid quantile value * (p must be greater than 0 and less than or equal to 100) */ public double evaluate(final double p) throws MathIllegalArgumentException { return evaluate(getDataRef(), p); }
/** * Computes the bandwidth */ private void computeBandwidth(){ double n = (double)x.length; stats = new DescriptiveStatistics(x); stats.setPercentileImpl(new Percentile().withEstimationType(Percentile.EstimationType.R_7));//Use the same percentile method as R. double observedSd = stats.getStandardDeviation(); double observedQ1 = stats.getPercentile(25); double observedQ3 = stats.getPercentile(75); double observedIqr = observedQ3-observedQ1; if(bandwidthType==BandwidthType.BW_NRD){ //Scott's plugin bandwidth (bw.nrd in R) h = 1.06*Math.min(observedSd, observedIqr/1.34)*Math.pow(n, -1.0/5.0); }else{ //Silverman's rule of thumb (bw.nrd0 is the default in R and the default here.) h = 0.9*Math.min(observedSd, observedIqr/1.34)*Math.pow(n, -1.0/5.0); } //apply adjustment factor h *= adjust; }