@Override protected void compareResults(NumericColumnSummary<Integer> result1, NumericColumnSummary<Integer> result2) { Assert.assertEquals(result1.getTotalCount(), result2.getTotalCount()); Assert.assertEquals(result1.getNullCount(), result2.getNullCount()); Assert.assertEquals(result1.getMissingCount(), result2.getMissingCount()); Assert.assertEquals(result1.getNonMissingCount(), result2.getNonMissingCount()); Assert.assertEquals(result1.getInfinityCount(), result2.getInfinityCount()); Assert.assertEquals(result1.getNanCount(), result2.getNanCount()); Assert.assertEquals(result1.containsNull(), result2.containsNull()); Assert.assertEquals(result1.containsNonNull(), result2.containsNonNull()); Assert.assertEquals(result1.getMin().intValue(), result2.getMin().intValue()); Assert.assertEquals(result1.getMax().intValue(), result2.getMax().intValue()); Assert.assertEquals(result1.getSum().intValue(), result2.getSum().intValue()); Assert.assertEquals(result1.getMean().doubleValue(), result2.getMean().doubleValue(), 1e-12d); Assert.assertEquals(result1.getVariance().doubleValue(), result2.getVariance().doubleValue(), 1e-9d); Assert.assertEquals(result1.getStandardDeviation().doubleValue(), result2.getStandardDeviation().doubleValue(), 1e-12d); } }.summarize(intValues);
@Test public void testCounts() throws Exception { NumericColumnSummary<Double> summary = summarize(Double.NaN, 1.0, null, 123.0, -44.00001, Double.POSITIVE_INFINITY, 55.0, Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY, null, Double.NaN); Assert.assertEquals(11, summary.getTotalCount()); Assert.assertEquals(2, summary.getNullCount()); Assert.assertEquals(9, summary.getNonNullCount()); Assert.assertEquals(7, summary.getMissingCount()); Assert.assertEquals(4, summary.getNonMissingCount()); Assert.assertEquals(2, summary.getNanCount()); Assert.assertEquals(3, summary.getInfinityCount()); }
@Override public String toString() { return "NumericColumnSummary{" + "totalCount=" + getTotalCount() + ", nullCount=" + nullCount + ", nonNullCount=" + getNonNullCount() + ", missingCount=" + getMissingCount() + ", nonMissingCount=" + nonMissingCount + ", nanCount=" + nanCount + ", infinityCount=" + infinityCount + ", min=" + min + ", max=" + max + ", sum=" + sum + ", mean=" + mean + ", variance=" + variance + ", standardDeviation=" + standardDeviation + '}'; } }
@Override protected void compareResults(NumericColumnSummary<Float> result1, NumericColumnSummary<Float> result2) { Assert.assertEquals(result1.getMin(), result2.getMin(), 0.0f); Assert.assertEquals(result1.getMax(), result2.getMax(), 0.0f); Assert.assertEquals(result1.getMean(), result2.getMean(), 1e-10d); Assert.assertEquals(result1.getVariance(), result2.getVariance(), 1e-9d); Assert.assertEquals(result1.getStandardDeviation(), result2.getStandardDeviation(), 1e-10d); }
Assert.assertEquals(8, col0Summary.getNonMissingCount()); Assert.assertEquals(1, col0Summary.getMin().shortValue()); Assert.assertEquals(10, col0Summary.getMax().shortValue()); Assert.assertEquals(5.375, col0Summary.getMean().doubleValue(), 0.0); Assert.assertEquals(1, col1Summary.getMin().intValue()); Assert.assertEquals(10, col1Summary.getMax().intValue()); Assert.assertEquals(5.375, col1Summary.getMean().doubleValue(), 0.0); Assert.assertEquals(-100L, col2Summary.getMin().longValue()); Assert.assertEquals(10000L, col2Summary.getMax().longValue()); Assert.assertEquals(8, col3Summary.getTotalCount()); Assert.assertEquals(0.001000, col3Summary.getMin().doubleValue(), 0.0000001); Assert.assertEquals(0.89999999, col3Summary.getMax().doubleValue(), 0.0000001); Assert.assertEquals(0.2376249988883501, col3Summary.getMean().doubleValue(), 0.000000000001); Assert.assertEquals(0.0768965488108089, col3Summary.getVariance().doubleValue(), 0.00000001); Assert.assertEquals(0.27730226975415995, col3Summary.getStandardDeviation().doubleValue(), 0.000000000001); Assert.assertEquals(6, col4Summary.getNonMissingCount()); Assert.assertEquals(2, col4Summary.getMissingCount()); Assert.assertEquals(0.0000000000023, col4Summary.getMin().doubleValue(), 0.0); Assert.assertEquals(79.5, col4Summary.getMax().doubleValue(), 0.000000000001); Assert.assertEquals(100.0, col7Summary.getMax().doubleValue(), 0.00001); Assert.assertEquals(50.0, col7Summary.getMin().doubleValue(), 0.00001);
/** * Use some values from Anscombe's Quartet for testing. * * <p>There was no particular reason to use these except they have known means and variance. * * <p>https://en.wikipedia.org/wiki/Anscombe%27s_quartet */ @Test public void testAnscomesQuartetXValues() throws Exception { final Double[] q1x = { 10.0, 8.0, 13.0, 9.0, 11.0, 14.0, 6.0, 4.0, 12.0, 7.0, 5.0 }; final Double[] q4x = { 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 19.0, 8.0, 8.0, 8.0 }; NumericColumnSummary<Double> q1 = summarize(q1x); NumericColumnSummary<Double> q4 = summarize(q4x); Assert.assertEquals(9.0, q1.getMean().doubleValue(), 0.0); Assert.assertEquals(9.0, q4.getMean().doubleValue(), 0.0); Assert.assertEquals(11.0, q1.getVariance().doubleValue(), 1e-10d); Assert.assertEquals(11.0, q4.getVariance().doubleValue(), 1e-10d); double stddev = Math.sqrt(11.0); Assert.assertEquals(stddev, q1.getStandardDeviation().doubleValue(), 1e-10d); Assert.assertEquals(stddev, q4.getStandardDeviation().doubleValue(), 1e-10d); }
/** * Use some values from Anscombe's Quartet for testing. * * <p>There was no particular reason to use these except they have known means and variance. * * <p>https://en.wikipedia.org/wiki/Anscombe%27s_quartet */ @Test public void testAnscomesQuartetYValues() throws Exception { final Float[] q1y = { 8.04f, 6.95f, 7.58f, 8.81f, 8.33f, 9.96f, 7.24f, 4.26f, 10.84f, 4.82f, 5.68f }; final Float[] q2y = { 9.14f, 8.14f, 8.74f, 8.77f, 9.26f, 8.1f, 6.13f, 3.1f, 9.13f, 7.26f, 4.74f }; final Float[] q3y = { 7.46f, 6.77f, 12.74f, 7.11f, 7.81f, 8.84f, 6.08f, 5.39f, 8.15f, 6.42f, 5.73f }; final Float[] q4y = { 6.58f, 5.76f, 7.71f, 8.84f, 8.47f, 7.04f, 5.25f, 12.5f, 5.56f, 7.91f, 6.89f }; NumericColumnSummary<Float> q1 = summarize(q1y); NumericColumnSummary<Float> q2 = summarize(q2y); NumericColumnSummary<Float> q3 = summarize(q3y); NumericColumnSummary<Float> q4 = summarize(q4y); // the y values are have less precisely matching means and variances Assert.assertEquals(7.5, q1.getMean().doubleValue(), 0.001); Assert.assertEquals(7.5, q2.getMean().doubleValue(), 0.001); Assert.assertEquals(7.5, q3.getMean().doubleValue(), 0.001); Assert.assertEquals(7.5, q4.getMean().doubleValue(), 0.001); Assert.assertEquals(4.12, q1.getVariance().doubleValue(), 0.01); Assert.assertEquals(4.12, q2.getVariance().doubleValue(), 0.01); Assert.assertEquals(4.12, q3.getVariance().doubleValue(), 0.01); Assert.assertEquals(4.12, q4.getVariance().doubleValue(), 0.01); }
@Test public void testMin() throws Exception { Assert.assertEquals(-1000, summarize(-1000, 0, 1, 50, 999, 1001).getMin().shortValue()); Assert.assertEquals(Short.MIN_VALUE, summarize((int) Short.MIN_VALUE, -1000, 0).getMin().shortValue()); Assert.assertEquals(-2, summarize(1, 8, 7, 6, 9, 10, 2, 3, 5, 0, 11, -2, 3).getMin().shortValue()); Assert.assertEquals(-2, summarize(1, 8, 7, 6, 9, null, 10, 2, 3, 5, null, 0, 11, -2, 3).getMin().shortValue()); Assert.assertNull(summarize().getMin()); }
@Test public void testMean() throws Exception { Assert.assertEquals(50.0, summarize(0, 100).getMean(), 0.0); Assert.assertEquals(33.333333, summarize(0, 0, 100).getMean(), 0.00001); Assert.assertEquals(50.0, summarize(0, 0, 100, 100).getMean(), 0.0); Assert.assertEquals(50.0, summarize(0, 100, null).getMean(), 0.0); Assert.assertNull(summarize().getMean()); }
@Test public void testMax() throws Exception { Assert.assertEquals(1001, summarize(-1000, 0, 1, 50, 999, 1001).getMax().intValue()); Assert.assertEquals(0, summarize(Integer.MIN_VALUE, -1000, 0).getMax().intValue()); Assert.assertEquals(11, summarize(1, 8, 7, 6, 9, 10, 2, 3, 5, 0, 11, -2, 3).getMax().intValue()); Assert.assertEquals(11, summarize(1, 8, 7, 6, 9, null, 10, 2, 3, 5, null, 0, 11, -2, 3).getMax().intValue()); Assert.assertNull(summarize().getMax()); }
@Override public NumericColumnSummary<T> result() { Double variance = null; if (nonMissingCount > 1) { variance = m2.value() / (nonMissingCount - 1); } return new NumericColumnSummary<T>( nonMissingCount, nullCount, nanCount, infinityCount, // if nonMissingCount was zero some fields should be undefined nonMissingCount == 0 ? null : min.result(), nonMissingCount == 0 ? null : max.result(), nonMissingCount == 0 ? null : sum.result(), nonMissingCount == 0 ? null : mean.value(), variance, variance == null ? null : Math.sqrt(variance) // standard deviation ); }
@Override protected void compareResults(NumericColumnSummary<Double> result1, NumericColumnSummary<Double> result2) { Assert.assertEquals(result1.getMin(), result2.getMin(), 0.0); Assert.assertEquals(result1.getMax(), result2.getMax(), 0.0); Assert.assertEquals(result1.getMean(), result2.getMean(), 1e-12d); Assert.assertEquals(result1.getVariance(), result2.getVariance(), 1e-9d); Assert.assertEquals(result1.getStandardDeviation(), result2.getStandardDeviation(), 1e-12d); }
@Override public String toString() { return "NumericColumnSummary{" + "totalCount=" + getTotalCount() + ", nullCount=" + nullCount + ", nonNullCount=" + getNonNullCount() + ", missingCount=" + getMissingCount() + ", nonMissingCount=" + nonMissingCount + ", nanCount=" + nanCount + ", infinityCount=" + infinityCount + ", min=" + min + ", max=" + max + ", sum=" + sum + ", mean=" + mean + ", variance=" + variance + ", standardDeviation=" + standardDeviation + '}'; } }
/** * Use some values from Anscombe's Quartet for testing. * * <p>There was no particular reason to use these except they have known means and variance. * * <p>https://en.wikipedia.org/wiki/Anscombe%27s_quartet */ @Test public void testAnscomesQuartetXValues() throws Exception { final Float[] q1x = { 10.0f, 8.0f, 13.0f, 9.0f, 11.0f, 14.0f, 6.0f, 4.0f, 12.0f, 7.0f, 5.0f }; final Float[] q4x = { 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 19.0f, 8.0f, 8.0f, 8.0f }; NumericColumnSummary<Float> q1 = summarize(q1x); NumericColumnSummary<Float> q4 = summarize(q4x); Assert.assertEquals(9.0, q1.getMean().doubleValue(), 0.0f); Assert.assertEquals(9.0, q4.getMean().doubleValue(), 0.0f); Assert.assertEquals(11.0, q1.getVariance().doubleValue(), 1e-10d); Assert.assertEquals(11.0, q4.getVariance().doubleValue(), 1e-10d); double stddev = Math.sqrt(11.0f); Assert.assertEquals(stddev, q1.getStandardDeviation().doubleValue(), 1e-10d); Assert.assertEquals(stddev, q4.getStandardDeviation().doubleValue(), 1e-10d); }
/** * Use some values from Anscombe's Quartet for testing. * * <p>There was no particular reason to use these except they have known means and variance. * * <p>https://en.wikipedia.org/wiki/Anscombe%27s_quartet */ @Test public void testAnscomesQuartetYValues() throws Exception { final Double[] q1y = { 8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68 }; final Double[] q2y = { 9.14, 8.14, 8.74, 8.77, 9.26, 8.1, 6.13, 3.1, 9.13, 7.26, 4.74 }; final Double[] q3y = { 7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42, 5.73 }; final Double[] q4y = { 6.58, 5.76, 7.71, 8.84, 8.47, 7.04, 5.25, 12.5, 5.56, 7.91, 6.89 }; NumericColumnSummary<Double> q1 = summarize(q1y); NumericColumnSummary<Double> q2 = summarize(q2y); NumericColumnSummary<Double> q3 = summarize(q3y); NumericColumnSummary<Double> q4 = summarize(q4y); // the y values are have less precisely matching means and variances Assert.assertEquals(7.5, q1.getMean().doubleValue(), 0.001); Assert.assertEquals(7.5, q2.getMean().doubleValue(), 0.001); Assert.assertEquals(7.5, q3.getMean().doubleValue(), 0.001); Assert.assertEquals(7.5, q4.getMean().doubleValue(), 0.001); Assert.assertEquals(4.12, q1.getVariance().doubleValue(), 0.01); Assert.assertEquals(4.12, q2.getVariance().doubleValue(), 0.01); Assert.assertEquals(4.12, q3.getVariance().doubleValue(), 0.01); Assert.assertEquals(4.12, q4.getVariance().doubleValue(), 0.01); }
@Test public void testMin() throws Exception { Assert.assertEquals(-1000, summarize(-1000, 0, 1, 50, 999, 1001).getMin().intValue()); Assert.assertEquals(Integer.MIN_VALUE, summarize(Integer.MIN_VALUE, -1000, 0).getMin().intValue()); Assert.assertEquals(-2, summarize(1, 8, 7, 6, 9, 10, 2, 3, 5, 0, 11, -2, 3).getMin().intValue()); Assert.assertEquals(-2, summarize(1, 8, 7, 6, 9, null, 10, 2, 3, 5, null, 0, 11, -2, 3).getMin().intValue()); Assert.assertNull(summarize().getMin()); }
@Test public void testMean() throws Exception { Assert.assertEquals(50.0, summarize(0, 100).getMean(), 0.0); Assert.assertEquals(33.333333, summarize(0, 0, 100).getMean(), 0.00001); Assert.assertEquals(50.0, summarize(0, 0, 100, 100).getMean(), 0.0); Assert.assertEquals(50.0, summarize(0, 100, null).getMean(), 0.0); Assert.assertNull(summarize().getMean()); }
@Test public void testMax() throws Exception { Assert.assertEquals(1001.0f, summarize(-1000.0f, 0.0f, 1.0f, 50.0f, 999.0f, 1001.0f).getMax().floatValue(), 0.0f); Assert.assertEquals(11.0f, summarize(1.0f, 8.0f, 7.0f, 6.0f, 9.0f, 10.0f, 2.0f, 3.0f, 5.0f, 0.0f, 11.0f, -2.0f, 3.0f).getMax().floatValue(), 0.0f); Assert.assertEquals(11.0f, summarize(1.0f, 8.0f, 7.0f, 6.0f, 9.0f, null, 10.0f, 2.0f, 3.0f, 5.0f, null, 0.0f, 11.0f, -2.0f, 3.0f).getMax().floatValue(), 0.0f); Assert.assertEquals(-2.0f, summarize(-8.0f, -7.0f, -6.0f, -9.0f, null, -10.0f, null, -2.0f).getMax().floatValue(), 0.0f); Assert.assertNull(summarize().getMax()); }
@Override public NumericColumnSummary<T> result() { Double variance = null; if (nonMissingCount > 1) { variance = m2.value() / (nonMissingCount - 1); } return new NumericColumnSummary<T>( nonMissingCount, nullCount, nanCount, infinityCount, // if nonMissingCount was zero some fields should be undefined nonMissingCount == 0 ? null : min.result(), nonMissingCount == 0 ? null : max.result(), nonMissingCount == 0 ? null : sum.result(), nonMissingCount == 0 ? null : mean.value(), variance, variance == null ? null : Math.sqrt(variance) // standard deviation ); }
@Override protected void compareResults(NumericColumnSummary<Integer> result1, NumericColumnSummary<Integer> result2) { Assert.assertEquals(result1.getTotalCount(), result2.getTotalCount()); Assert.assertEquals(result1.getNullCount(), result2.getNullCount()); Assert.assertEquals(result1.getMissingCount(), result2.getMissingCount()); Assert.assertEquals(result1.getNonMissingCount(), result2.getNonMissingCount()); Assert.assertEquals(result1.getInfinityCount(), result2.getInfinityCount()); Assert.assertEquals(result1.getNanCount(), result2.getNanCount()); Assert.assertEquals(result1.containsNull(), result2.containsNull()); Assert.assertEquals(result1.containsNonNull(), result2.containsNonNull()); Assert.assertEquals(result1.getMin().intValue(), result2.getMin().intValue()); Assert.assertEquals(result1.getMax().intValue(), result2.getMax().intValue()); Assert.assertEquals(result1.getSum().intValue(), result2.getSum().intValue()); Assert.assertEquals(result1.getMean().doubleValue(), result2.getMean().doubleValue(), 1e-12d); Assert.assertEquals(result1.getVariance().doubleValue(), result2.getVariance().doubleValue(), 1e-9d); Assert.assertEquals(result1.getStandardDeviation().doubleValue(), result2.getStandardDeviation().doubleValue(), 1e-12d); } }.summarize(values);