@Override public Float result() { // overflow will go to infinity return (float) sum.value(); } }
@Override public Double result() { return sum.value(); } }
@Override public StringColumnSummary result() { return new StringColumnSummary( nonNullCount, nullCount, emptyCount, nonNullCount == 0L ? null : minStringLength, nonNullCount == 0L ? null : maxStringLength, nonNullCount == 0L ? null : meanLength.value() ); } }
@Override public void combine(Aggregator<String, StringColumnSummary> otherSameType) { StringSummaryAggregator other = (StringSummaryAggregator) otherSameType; nullCount += other.nullCount; minStringLength = Math.min(minStringLength, other.minStringLength); maxStringLength = Math.max(maxStringLength, other.maxStringLength); if (nonNullCount == 0) { nonNullCount = other.nonNullCount; emptyCount = other.emptyCount; meanLength = other.meanLength; } else if (other.nonNullCount != 0) { long combinedCount = nonNullCount + other.nonNullCount; emptyCount += other.emptyCount; double deltaMean = other.meanLength.value() - meanLength.value(); meanLength = meanLength.add(deltaMean * other.nonNullCount / combinedCount); nonNullCount = combinedCount; } }
@Override public void aggregate(String value) { if (value == null) { nullCount++; } else { nonNullCount++; if (value.isEmpty()) { emptyCount++; } int length = value.length(); minStringLength = Math.min(minStringLength, length); maxStringLength = Math.max(maxStringLength, length); double delta = length - meanLength.value(); meanLength = meanLength.add(delta / nonNullCount); } }
/** * Increments the Kahan sum by adding two sums, and updating the correction term for reducing numeric errors. */ public CompensatedSum add(CompensatedSum other) { double correctedSum = other.value() + (delta + other.delta()); double updatedValue = value + correctedSum; double updatedDelta = correctedSum - (updatedValue - value); return new CompensatedSum(updatedValue, updatedDelta); }
@Override public NumericColumnSummary<T> result() { Double variance = null; if (nonMissingCount > 1) { variance = m2.value() / (nonMissingCount - 1); } return new NumericColumnSummary<T>( nonMissingCount, nullCount, nanCount, infinityCount, // if nonMissingCount was zero some fields should be undefined nonMissingCount == 0 ? null : min.result(), nonMissingCount == 0 ? null : max.result(), nonMissingCount == 0 ? null : sum.result(), nonMissingCount == 0 ? null : mean.value(), variance, variance == null ? null : Math.sqrt(variance) // standard deviation ); }
/** * Add a value to the current aggregation. */ @Override public void aggregate(T value) { if (value == null) { nullCount++; } else if (isNan(value)) { nanCount++; } else if (isInfinite(value)) { infinityCount++; } else { nonMissingCount++; min.aggregate(value); max.aggregate(value); sum.aggregate(value); double doubleValue = value.doubleValue(); double delta = doubleValue - mean.value(); mean = mean.add(delta / nonMissingCount); m2 = m2.add(delta * (doubleValue - mean.value())); } }
/** * combine two aggregations. */ @Override public void combine(Aggregator<T, NumericColumnSummary<T>> otherSameType) { NumericSummaryAggregator<T> other = (NumericSummaryAggregator<T>) otherSameType; nullCount += other.nullCount; nanCount += other.nanCount; infinityCount += other.infinityCount; if (nonMissingCount == 0) { nonMissingCount = other.nonMissingCount; min = other.min; max = other.max; sum = other.sum; mean = other.mean; m2 = other.m2; } else if (other.nonMissingCount != 0) { long combinedCount = nonMissingCount + other.nonMissingCount; min.combine(other.min); max.combine(other.max); sum.combine(other.sum); double deltaMean = other.mean.value() - mean.value(); mean = mean.add(deltaMean * other.nonMissingCount / combinedCount); m2 = m2.add(other.m2).add(deltaMean * deltaMean * nonMissingCount * other.nonMissingCount / combinedCount); nonMissingCount = combinedCount; } }
double naiveResult1 = smallSum.value(); double naiveResult2 = largeSum.value(); naiveResult1 += smallSum.value(); naiveResult2 += smallSum.value(); naiveResult1 += largeSum.value(); naiveResult2 += smallSum.value(); Assert.assertEquals(1000.011, compensatedResult1.value(), 0.0); Assert.assertEquals(1000.011, compensatedResult2.value(), 0.0); Assert.assertEquals(1000.0109999999997, naiveResult2, 0.0); Assert.assertEquals(compensatedResult1.value(), compensatedResult2.value(), 0.0); Assert.assertEquals(naiveResult1, naiveResult2, 0.0001); Assert.assertNotEquals(naiveResult1, naiveResult2, 0.0);
@Test public void testDelta() throws Exception { CompensatedSum compensatedResult1 = new CompensatedSum(0.001, 0.0); for (int i = 0; i < 10; i++) { compensatedResult1 = compensatedResult1.add(0.001); } Assert.assertEquals(0.011, compensatedResult1.value(), 0.0); Assert.assertEquals(new Double("8.673617379884035E-19"), compensatedResult1.delta(), 0.0); }
@Override public Float result() { // overflow will go to infinity return (float) sum.value(); } }
@Override public Double result() { return sum.value(); } }
@Override public Double result() { return sum.value(); } }
@Override public Float result() { // overflow will go to infinity return (float) sum.value(); } }
@Override public StringColumnSummary result() { return new StringColumnSummary( nonNullCount, nullCount, emptyCount, nonNullCount == 0L ? null : minStringLength, nonNullCount == 0L ? null : maxStringLength, nonNullCount == 0L ? null : meanLength.value() ); } }
@Override public StringColumnSummary result() { return new StringColumnSummary( nonNullCount, nullCount, emptyCount, nonNullCount == 0L ? null : minStringLength, nonNullCount == 0L ? null : maxStringLength, nonNullCount == 0L ? null : meanLength.value() ); } }
@Override public void aggregate(String value) { if (value == null) { nullCount++; } else { nonNullCount++; if (value.isEmpty()) { emptyCount++; } int length = value.length(); minStringLength = Math.min(minStringLength, length); maxStringLength = Math.max(maxStringLength, length); double delta = length - meanLength.value(); meanLength = meanLength.add(delta / nonNullCount); } }
/** * Increments the Kahan sum by adding two sums, and updating the correction term for reducing numeric errors. */ public CompensatedSum add(CompensatedSum other) { double correctedSum = other.value() + (delta + other.delta()); double updatedValue = value + correctedSum; double updatedDelta = correctedSum - (updatedValue - value); return new CompensatedSum(updatedValue, updatedDelta); }
/** * Increments the Kahan sum by adding two sums, and updating the correction term for reducing numeric errors. */ public CompensatedSum add(CompensatedSum other) { double correctedSum = other.value() + (delta + other.delta()); double updatedValue = value + correctedSum; double updatedDelta = correctedSum - (updatedValue - value); return new CompensatedSum(updatedValue, updatedDelta); }