/** * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula"> * t statistic </a> to use in comparing the mean of the dataset described by * <code>sampleStats</code> to <code>mu</code>. * <p> * This statistic can be used to perform a one sample t-test for the mean. * </p><p> * <strong>Preconditions</strong>: <ul> * <li><code>observed.getN() ≥ 2</code>. * </li></ul></p> * * @param mu comparison constant * @param sampleStats DescriptiveStatistics holding sample summary statitstics * @return t statistic * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code> * @throws NumberIsTooSmallException if the number of samples is < 2 */ public double t(final double mu, final StatisticalSummary sampleStats) throws NullArgumentException, NumberIsTooSmallException { checkSampleData(sampleStats); return t(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN()); }
double var = current.getVariance(); double m2 = var * (n - 1d); double mean = current.getMean(); while (iterator.hasNext()) { current = iterator.next(); final double curN = current.getN(); n += curN; final double meanDiff = current.getMean() - mean; mean = sum / n; final double curM2 = current.getVariance() * (curN - 1d);
return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN());
/** * Computes the empirical distribution using values from the file * in <code>valuesFileURL</code> and <code>binCount</code> bins. * <p> * <code>valuesFileURL</code> must exist and be readable by this process * at runtime.</p> * <p> * This method must be called before using <code>getNext()</code> * with <code>mode = DIGEST_MODE</code></p> * * @param binCount the number of bins used in computing the empirical * distribution * @throws NullArgumentException if the {@code valuesFileURL} has not been set * @throws IOException if an error occurs reading the input file * @throws ZeroException if URL contains no data */ public void computeDistribution(int binCount) throws NullArgumentException, IOException, ZeroException { empiricalDistribution = new EmpiricalDistribution(binCount, randomData.getRandomGenerator()); empiricalDistribution.load(valuesFileURL); mu = empiricalDistribution.getSampleStats().getMean(); sigma = empiricalDistribution.getSampleStats().getStandardDeviation(); }
@Override public double getMean() { return delegate.getMean(); }
private void addMeanMaxMeasurements(Map<String, double[]> loadedMetrics, List<Measurement> measurements, String metricName, String unit) { Optional<StatisticalSummary> statistics = getStats(loadedMetrics, metricName); if (statistics.isPresent()) { measurements.add(Measurement.measurement("cluster-" + metricName + "_max", unit, statistics.get().getMax())); measurements.add(Measurement.measurement("cluster-" + metricName + "_mean", unit, statistics.get().getMean())); } }
public static StatisticalSummary combine(StatisticalSummary s1, StatisticalSummary s2) { if (s1.getN() == 0){ return s2; } else if (s2.getN() == 0) { return s1; } else if (s1.getN() == 0 && s2.getN() == 0) { return emptySummary; } long n = s1.getN() + s2.getN(); double mean = (s1.getN() * s1.getMean() + s2.getN() * s2.getMean()) / n; double s1Diff = (mean - s1.getMean()) * (mean - s1.getMean()); double s2Diff = (mean - s2.getMean()) * (mean - s2.getMean()); double var = (s1.getN() * (s1.getVariance() + s1Diff) + s2.getN() * (s2.getVariance() + s2Diff)) / n; double sum = s1.getSum() + s2.getSum(); double max = Math.max(s1.getMax(), s2.getMax()); double min = Math.min(s1.getMin(), s2.getMin()); return new StatisticalSummaryValues(mean, var, n, max, min, sum); }
/** * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula"> * t statistic </a> to use in comparing the mean of the dataset described by * <code>sampleStats</code> to <code>mu</code>. * <p> * This statistic can be used to perform a one sample t-test for the mean. * </p><p> * <strong>Preconditions</strong>: <ul> * <li><code>observed.getN() ≥ 2</code>. * </li></ul></p> * * @param mu comparison constant * @param sampleStats DescriptiveStatistics holding sample summary statitstics * @return t statistic * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code> * @throws NumberIsTooSmallException if the number of samples is < 2 */ public double t(final double mu, final StatisticalSummary sampleStats) throws NullArgumentException, NumberIsTooSmallException { checkSampleData(sampleStats); return t(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN()); }
public Values(StatisticalSummary summary) { this(summary.getMean(), summary.getVariance(), summary.getN(), summary.getMax(), summary.getMin(), summary.getSum()); }
/** * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula"> * t statistic </a> to use in comparing the mean of the dataset described by * <code>sampleStats</code> to <code>mu</code>. * <p> * This statistic can be used to perform a one sample t-test for the mean. * </p><p> * <strong>Preconditions</strong>: <ul> * <li><code>observed.getN() ≥ 2</code>. * </li></ul></p> * * @param mu comparison constant * @param sampleStats DescriptiveStatistics holding sample summary statitstics * @return t statistic * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code> * @throws NumberIsTooSmallException if the number of samples is < 2 */ public double t(final double mu, final StatisticalSummary sampleStats) throws NullArgumentException, NumberIsTooSmallException { checkSampleData(sampleStats); return t(sampleStats.getMean(), mu, sampleStats.getVariance(), sampleStats.getN()); }
public static StatisticalSummary scale(StatisticalSummary s, double scale) { double square = scale * scale; return new StatisticalSummaryValues( s.getMean() * scale, s.getVariance() * square, s.getN(), s.getMax() * scale, s.getMin() * scale, s.getSum() * scale ); }
@SuppressWarnings("unchecked") public static <V> DataFrame<V> describe(final DataFrame<V> df) { final DataFrame<V> desc = new DataFrame<>(); for (final Object col : df.columns()) { for (final Object row : df.index()) { final V value = df.get(row, col); if (value instanceof StatisticalSummary) { if (!desc.columns().contains(col)) { desc.add(col); if (desc.isEmpty()) { for (final Object r : df.index()) { for (final Object stat : Arrays.asList("count", "mean", "std", "var", "max", "min")) { final Object name = name(df, r, stat); desc.append(name, Collections.<V>emptyList()); } } } } final StatisticalSummary summary = StatisticalSummary.class.cast(value); desc.set(name(df, row, "count"), col, (V)new Double(summary.getN())); desc.set(name(df, row, "mean"), col, (V)new Double(summary.getMean())); desc.set(name(df, row, "std"), col, (V)new Double(summary.getStandardDeviation())); desc.set(name(df, row, "var"), col, (V)new Double(summary.getVariance())); desc.set(name(df, row, "max"), col, (V)new Double(summary.getMax())); desc.set(name(df, row, "min"), col, (V)new Double(summary.getMin())); } } } return desc; }
/** * Computes the empirical distribution using values from the file * in <code>valuesFileURL</code> and <code>binCount</code> bins. * <p> * <code>valuesFileURL</code> must exist and be readable by this process * at runtime.</p> * <p> * This method must be called before using <code>getNext()</code> * with <code>mode = DIGEST_MODE</code></p> * * @param binCount the number of bins used in computing the empirical * distribution * @throws NullArgumentException if the {@code valuesFileURL} has not been set * @throws IOException if an error occurs reading the input file * @throws ZeroException if URL contains no data */ public void computeDistribution(int binCount) throws NullArgumentException, IOException, ZeroException { empiricalDistribution = new EmpiricalDistribution(binCount, randomData.getRandomGenerator()); empiricalDistribution.load(valuesFileURL); mu = empiricalDistribution.getSampleStats().getMean(); sigma = empiricalDistribution.getSampleStats().getStandardDeviation(); }
return describeSize(max); } else { double mean = size.getMean(); double sem = size.getStandardDeviation() / Math.sqrt(size.getN()); String meanDescription;
@SuppressWarnings("unchecked") public static <V> DataFrame<V> describe(final DataFrame<V> df) { final DataFrame<V> desc = new DataFrame<>(); for (final Object col : df.columns()) { for (final Object row : df.index()) { final V value = df.get(row, col); if (value instanceof StatisticalSummary) { if (!desc.columns().contains(col)) { desc.add(col); if (desc.isEmpty()) { for (final Object r : df.index()) { for (final Object stat : Arrays.asList("count", "mean", "std", "var", "max", "min")) { final Object name = name(df, r, stat); desc.append(name, Collections.<V>emptyList()); } } } } final StatisticalSummary summary = StatisticalSummary.class.cast(value); desc.set(name(df, row, "count"), col, (V)new Double(summary.getN())); desc.set(name(df, row, "mean"), col, (V)new Double(summary.getMean())); desc.set(name(df, row, "std"), col, (V)new Double(summary.getStandardDeviation())); desc.set(name(df, row, "var"), col, (V)new Double(summary.getVariance())); desc.set(name(df, row, "max"), col, (V)new Double(summary.getMax())); desc.set(name(df, row, "min"), col, (V)new Double(summary.getMin())); } } } return desc; }
public static String describeDuration(StatisticalSummary duration, TimeUnit units) { double min = duration.getMin(); double max = duration.getMax(); if (min == max) { return describeDuration(max, units); } else { double mean = duration.getMean(); double sem = duration.getStandardDeviation() / Math.sqrt(duration.getN()); String meanDescription; if (sem == 0) { meanDescription = describeDuration(mean, units); } else { TimeUnit targetUnits = displayUnitFor(Math.round(mean), units); double scaledMean = convert(mean, units, targetUnits); double scaledSem = convert(sem, units, targetUnits); meanDescription = "(" + toThreeSigFig(scaledMean, 2000, scaledSem) + ") " + SHORT_TIMEUNIT_NAMES.get(targetUnits); } double sd = duration.getStandardDeviation(); return " min. " + describeDuration(min, units) + ", mean " + meanDescription + ", SD " + describeDuration(sd, units) + ", max. " + describeDuration(max, units); } }