@Override public OptionalDouble getValueFromPlanNodeEstimate(PlanNodeStatsEstimate planNodeStatsEstimate, StatsContext statsContext) { double highValue = getSymbolStatistics(planNodeStatsEstimate, columnName, statsContext).getHighValue(); if (isInfinite(highValue)) { return OptionalDouble.empty(); } return OptionalDouble.of(highValue); }
public SymbolStatsAssertion highValue(double expected) { assertEstimateEquals(statistics.getHighValue(), expected, "highValue mismatch"); return this; }
public static StatisticRange from(SymbolStatsEstimate estimate) { return new StatisticRange(estimate.getLowValue(), estimate.getHighValue(), estimate.getDistinctValuesCount()); }
private double maxDistinctValuesByLowHigh(SymbolStatsEstimate symbolStats, Type type) { if (symbolStats.statisticRange().length() == 0.0) { return 1; } if (!isDiscrete(type)) { return NaN; } double length = symbolStats.getHighValue() - symbolStats.getLowValue(); if (isNaN(length)) { return NaN; } if (type instanceof DecimalType) { length *= pow(10, ((DecimalType) type).getScale()); } return floor(length + 1); }
public SymbolStatsAssertion emptyRange() { assertTrue(isNaN(statistics.getLowValue()) && isNaN(statistics.getHighValue()), "expected empty range (NaN, NaN) but got (" + statistics.getLowValue() + ", " + statistics.getHighValue() + ") instead"); assertEquals(statistics.getDistinctValuesCount(), 0., "expected no distinctValuesCount"); assertEquals(statistics.getAverageRowSize(), 0., "expected 0 average row size"); assertEquals(statistics.getNullsFraction(), 1., "expected all nulls"); return this; }
@Override protected SymbolStatsEstimate visitArithmeticUnary(ArithmeticUnaryExpression node, Void context) { SymbolStatsEstimate stats = process(node.getValue()); switch (node.getSign()) { case PLUS: return stats; case MINUS: return SymbolStatsEstimate.buildFrom(stats) .setLowValue(-stats.getHighValue()) .setHighValue(-stats.getLowValue()) .build(); default: throw new IllegalStateException("Unexpected sign: " + node.getSign()); } }
public static Builder buildFrom(SymbolStatsEstimate other) { return builder() .setLowValue(other.getLowValue()) .setHighValue(other.getHighValue()) .setNullsFraction(other.getNullsFraction()) .setAverageRowSize(other.getAverageRowSize()) .setDistinctValuesCount(other.getDistinctValuesCount()); }
private static void assertAddRange(PlanNodeStatsEstimate first, PlanNodeStatsEstimate second, double expectedLow, double expectedHigh) { SymbolStatsEstimate statistics = addStatsAndMaxDistinctValues(first, second).getSymbolStatistics(SYMBOL); assertEquals(statistics.getLowValue(), expectedLow); assertEquals(statistics.getHighValue(), expectedHigh); }
private static void assertCapRange(PlanNodeStatsEstimate stats, PlanNodeStatsEstimate cap, double expectedLow, double expectedHigh) { SymbolStatsEstimate symbolStats = capStats(stats, cap).getSymbolStatistics(SYMBOL); assertEquals(symbolStats.getLowValue(), expectedLow); assertEquals(symbolStats.getHighValue(), expectedHigh); }
double highValue = sourceStats.getHighValue();
private Consumer<SymbolStatsAssertion> equalTo(SymbolStatsEstimate estimate) { return symbolAssert -> { symbolAssert .lowValue(estimate.getLowValue()) .highValue(estimate.getHighValue()) .distinctValuesCount(estimate.getDistinctValuesCount()) .nullsFraction(estimate.getNullsFraction()); }; }
private SymbolStatsEstimate estimateCoalesce(SymbolStatsEstimate left, SymbolStatsEstimate right) { // Question to reviewer: do you have a method to check if fraction is empty or saturated? if (left.getNullsFraction() == 0) { return left; } else if (left.getNullsFraction() == 1.0) { return right; } else { return SymbolStatsEstimate.builder() .setLowValue(min(left.getLowValue(), right.getLowValue())) .setHighValue(max(left.getHighValue(), right.getHighValue())) .setDistinctValuesCount(left.getDistinctValuesCount() + min(right.getDistinctValuesCount(), input.getOutputRowCount() * left.getNullsFraction())) .setNullsFraction(left.getNullsFraction() * right.getNullsFraction()) // TODO check if dataSize estimation method is correct .setAverageRowSize(max(left.getAverageRowSize(), right.getAverageRowSize())) .build(); } } }
double leftHigh = left.getHighValue(); double rightLow = right.getLowValue(); double rightHigh = right.getHighValue(); if (isNaN(leftLow) || isNaN(leftHigh) || isNaN(rightLow) || isNaN(rightHigh)) { result.setLowValue(NaN)
private static void assertSubtractRange(double supersetLow, double supersetHigh, double subsetLow, double subsetHigh, double expectedLow, double expectedHigh) { PlanNodeStatsEstimate first = statistics(30, NaN, NaN, new StatisticRange(supersetLow, supersetHigh, 10)); PlanNodeStatsEstimate second = statistics(20, NaN, NaN, new StatisticRange(subsetLow, subsetHigh, 5)); SymbolStatsEstimate statistics = subtractSubsetStats(first, second).getSymbolStatistics(SYMBOL); assertEquals(statistics.getLowValue(), expectedLow); assertEquals(statistics.getHighValue(), expectedHigh); }
private void assertSymbolStatsEqual(Symbol symbol, SymbolStatsEstimate actual, SymbolStatsEstimate expected) { assertEstimateEquals(actual.getNullsFraction(), expected.getNullsFraction(), "nullsFraction mismatch for %s", symbol.getName()); assertEstimateEquals(actual.getLowValue(), expected.getLowValue(), "lowValue mismatch for %s", symbol.getName()); assertEstimateEquals(actual.getHighValue(), expected.getHighValue(), "highValue mismatch for %s", symbol.getName()); assertEstimateEquals(actual.getDistinctValuesCount(), expected.getDistinctValuesCount(), "distinct values count mismatch for %s", symbol.getName()); assertEstimateEquals(actual.getAverageRowSize(), expected.getAverageRowSize(), "average row size mismatch for %s", symbol.getName()); } }
public static PlanNodeStatsEstimate capStats(PlanNodeStatsEstimate stats, PlanNodeStatsEstimate cap) { if (stats.isOutputRowCountUnknown() || cap.isOutputRowCountUnknown()) { return PlanNodeStatsEstimate.unknown(); } PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.builder(); double cappedRowCount = min(stats.getOutputRowCount(), cap.getOutputRowCount()); result.setOutputRowCount(cappedRowCount); stats.getSymbolsWithKnownStatistics().forEach(symbol -> { SymbolStatsEstimate symbolStats = stats.getSymbolStatistics(symbol); SymbolStatsEstimate capSymbolStats = cap.getSymbolStatistics(symbol); SymbolStatsEstimate.Builder newSymbolStats = SymbolStatsEstimate.builder(); // for simplicity keep the average row size the same as in the input // in most cases the average row size doesn't change after applying filters newSymbolStats.setAverageRowSize(symbolStats.getAverageRowSize()); newSymbolStats.setDistinctValuesCount(min(symbolStats.getDistinctValuesCount(), capSymbolStats.getDistinctValuesCount())); newSymbolStats.setLowValue(max(symbolStats.getLowValue(), capSymbolStats.getLowValue())); newSymbolStats.setHighValue(min(symbolStats.getHighValue(), capSymbolStats.getHighValue())); double numberOfNulls = stats.getOutputRowCount() * symbolStats.getNullsFraction(); double capNumberOfNulls = cap.getOutputRowCount() * capSymbolStats.getNullsFraction(); double cappedNumberOfNulls = min(numberOfNulls, capNumberOfNulls); double cappedNullsFraction = cappedRowCount == 0 ? 1 : cappedNumberOfNulls / cappedRowCount; newSymbolStats.setNullsFraction(cappedNullsFraction); result.addSymbolStatistics(symbol, newSymbolStats.build()); }); return result.build(); }
public SymbolStatsAssertion isEqualTo(SymbolStatsEstimate expected) { return nullsFraction(expected.getNullsFraction()) .lowValue(expected.getLowValue()) .highValue(expected.getHighValue()) .distinctValuesCount(expected.getDistinctValuesCount()) .averageRowSize(expected.getAverageRowSize()); } }
.setHighValue(leftSymbolStats.getHighValue()) .setDistinctValuesCount(leftSymbolStats.getDistinctValuesCount()) .setNullsFraction(newNullsFraction)
SymbolStatsEstimate.buildFrom(columnStats) .setLowValue(leftColumnStats.getLowValue()) .setHighValue(leftColumnStats.getHighValue()) .setNullsFraction(newLeftNullsFraction) .setDistinctValuesCount(leftNDV - matchingRightNDV)
.symbolStats(u, stats -> stats .lowValue(uStats.getLowValue()) .highValue(uStats.getHighValue()) .nullsFraction(0) .distinctValuesCount(uStats.getDistinctValuesCount() - xStats.getDistinctValuesCount())) .symbolStats(x, stats -> stats .lowValue(xStats.getLowValue()) .highValue(xStats.getHighValue()) .nullsFraction(0) .distinctValuesCount(xStats.getDistinctValuesCount() * 0.5)) .nullsFraction(0) .lowValue(xStats.getLowValue()) .highValue(xStats.getHighValue()) .distinctValuesCountUnknown()) .symbolStatsUnknown(unknown)