public static Builder buildFrom(SymbolStatsEstimate other) { return builder() .setLowValue(other.getLowValue()) .setHighValue(other.getHighValue()) .setNullsFraction(other.getNullsFraction()) .setAverageRowSize(other.getAverageRowSize()) .setDistinctValuesCount(other.getDistinctValuesCount()); }
public SymbolStatsEstimate build() { return new SymbolStatsEstimate(lowValue, highValue, nullsFraction, averageRowSize, distinctValuesCount); } }
double leftNDV = leftColumnStats.getDistinctValuesCount(); double matchingRightNDV = rightColumnStats.getDistinctValuesCount() * unmatchedJoinComplementNdvsCoefficient; double nonMatchingLeftValuesFraction = leftColumnStats.getValuesFraction() * (leftNDV - matchingRightNDV) / leftNDV; double scaleFactor = nonMatchingLeftValuesFraction + leftColumnStats.getNullsFraction(); double newLeftNullsFraction = leftColumnStats.getNullsFraction() / scaleFactor; result = result.mapSymbolColumnStatistics(drivingClause.getLeft(), columnStats -> SymbolStatsEstimate.buildFrom(columnStats) .setLowValue(leftColumnStats.getLowValue()) .setHighValue(leftColumnStats.getHighValue()) .setNullsFraction(newLeftNullsFraction) .setDistinctValuesCount(leftNDV - matchingRightNDV) SymbolStatsEstimate.buildFrom(columnStats) .setLowValue(NaN) .setHighValue(NaN) .setDistinctValuesCount(0.0) .build()); result = result.mapOutputRowCount(rowCount -> rowCount * leftColumnStats.getNullsFraction());
public static StatisticRange from(SymbolStatsEstimate estimate) { return new StatisticRange(estimate.getLowValue(), estimate.getHighValue(), estimate.getDistinctValuesCount()); }
private double maxDistinctValuesByLowHigh(SymbolStatsEstimate symbolStats, Type type) { if (symbolStats.statisticRange().length() == 0.0) { return 1; } if (!isDiscrete(type)) { return NaN; } double length = symbolStats.getHighValue() - symbolStats.getLowValue(); if (isNaN(length)) { return NaN; } if (type instanceof DecimalType) { length *= pow(10, ((DecimalType) type).getScale()); } return floor(length + 1); }
double newNullsFraction = (innerJoinSymbolStats.getNullsFraction() * innerJoinRowCount + joinComplementSymbolStats.getNullsFraction() * joinComplementRowCount) / outputRowCount; outputStats.addSymbolStatistics(symbol, SymbolStatsEstimate.buildFrom(innerJoinSymbolStats) .setLowValue(leftSymbolStats.getLowValue()) .setHighValue(leftSymbolStats.getHighValue()) .setDistinctValuesCount(leftSymbolStats.getDistinctValuesCount()) .setNullsFraction(newNullsFraction) .build()); double newNullsFraction = (innerJoinSymbolStats.getNullsFraction() * innerJoinRowCount + joinComplementRowCount) / outputRowCount; outputStats.addSymbolStatistics(symbol, innerJoinSymbolStats.mapNullsFraction(nullsFraction -> newNullsFraction));
.lowValue(uStats.getLowValue()) .highValue(uStats.getHighValue()) .nullsFraction(0) .distinctValuesCount(uStats.getDistinctValuesCount() - xStats.getDistinctValuesCount())) .symbolStats(x, stats -> stats.isEqualTo(xStats)) .symbolStats(z, stats -> stats.isEqualTo(zStats)) .outputRowsCount(inputStatistics.getOutputRowCount() * uStats.getValuesFraction() * (1 - xStats.getDistinctValuesCount() / uStats.getDistinctValuesCount())); .lowValue(xStats.getLowValue()) .highValue(xStats.getHighValue()) .nullsFraction(0) .distinctValuesCount(xStats.getDistinctValuesCount() * 0.5)) .symbolStats(u, stats -> stats.isEqualTo(uStats)) .symbolStats(z, stats -> stats.isEqualTo(zStats)) .outputRowsCount(inputStatistics.getOutputRowCount() * xStats.getValuesFraction() * 0.5); .symbolStats(x, stats -> stats .nullsFraction(0) .lowValue(xStats.getLowValue()) .highValue(xStats.getHighValue()) .distinctValuesCountUnknown()) .symbolStatsUnknown(unknown)
if (symbolStats.isUnknown()) { return SymbolStatsEstimate.unknown(); double distinctValuesCount = symbolStats.getDistinctValuesCount(); double nullsFraction = symbolStats.getNullsFraction(); return SymbolStatsEstimate.zero(); return SymbolStatsEstimate.buildFrom(symbolStats) .setDistinctValuesCount(distinctValuesCount) .setNullsFraction(nullsFraction)
private SymbolStatsEstimate capNDV(SymbolStatsEstimate symbolStats, double rowCount) { double ndv = symbolStats.getDistinctValuesCount(); double nulls = symbolStats.getNullsFraction(); if (isNaN(ndv) || isNaN(rowCount) || isNaN(nulls)) { return symbolStats; } if (ndv <= rowCount * (1 - nulls)) { return symbolStats; } return symbolStats .mapDistinctValuesCount(n -> (min(ndv, rowCount) + rowCount * (1 - nulls)) / 2) .mapNullsFraction(n -> nulls / 2); }
private static PlanNodeStatsEstimate estimateExpressionEqualToExpression( PlanNodeStatsEstimate inputStatistics, SymbolStatsEstimate leftExpressionStatistics, Optional<Symbol> leftExpressionSymbol, SymbolStatsEstimate rightExpressionStatistics, Optional<Symbol> rightExpressionSymbol) { if (isNaN(leftExpressionStatistics.getDistinctValuesCount()) || isNaN(rightExpressionStatistics.getDistinctValuesCount())) { return PlanNodeStatsEstimate.unknown(); } StatisticRange leftExpressionRange = StatisticRange.from(leftExpressionStatistics); StatisticRange rightExpressionRange = StatisticRange.from(rightExpressionStatistics); StatisticRange intersect = leftExpressionRange.intersect(rightExpressionRange); double nullsFilterFactor = (1 - leftExpressionStatistics.getNullsFraction()) * (1 - rightExpressionStatistics.getNullsFraction()); double leftNdv = leftExpressionRange.getDistinctValuesCount(); double rightNdv = rightExpressionRange.getDistinctValuesCount(); double filterFactor = 1.0 / max(leftNdv, rightNdv, 1); double retainedNdv = min(leftNdv, rightNdv); PlanNodeStatsEstimate.Builder estimate = PlanNodeStatsEstimate.buildFrom(inputStatistics) .setOutputRowCount(inputStatistics.getOutputRowCount() * nullsFilterFactor * filterFactor); SymbolStatsEstimate equalityStats = SymbolStatsEstimate.builder() .setAverageRowSize(averageExcludingNaNs(leftExpressionStatistics.getAverageRowSize(), rightExpressionStatistics.getAverageRowSize())) .setNullsFraction(0) .setStatisticsRange(intersect) .setDistinctValuesCount(retainedNdv) .build(); leftExpressionSymbol.ifPresent(symbol -> estimate.addSymbolStatistics(symbol, equalityStats)); rightExpressionSymbol.ifPresent(symbol -> estimate.addSymbolStatistics(symbol, equalityStats)); return estimate.build(); }
private static void assertAddRange(PlanNodeStatsEstimate first, PlanNodeStatsEstimate second, double expectedLow, double expectedHigh) { SymbolStatsEstimate statistics = addStatsAndMaxDistinctValues(first, second).getSymbolStatistics(SYMBOL); assertEquals(statistics.getLowValue(), expectedLow); assertEquals(statistics.getHighValue(), expectedHigh); }
private static SymbolStatsEstimate addColumnStats(SymbolStatsEstimate leftStats, double leftRows, SymbolStatsEstimate rightStats, double rightRows, double newRowCount, RangeAdditionStrategy strategy) { checkArgument(newRowCount > 0, "newRowCount must be greater than zero"); StatisticRange leftRange = StatisticRange.from(leftStats); StatisticRange rightRange = StatisticRange.from(rightStats); StatisticRange sum = strategy.add(leftRange, rightRange); double nullsCountRight = rightStats.getNullsFraction() * rightRows; double nullsCountLeft = leftStats.getNullsFraction() * leftRows; double totalSizeLeft = (leftRows - nullsCountLeft) * leftStats.getAverageRowSize(); double totalSizeRight = (rightRows - nullsCountRight) * rightStats.getAverageRowSize(); double newNullsFraction = (nullsCountLeft + nullsCountRight) / newRowCount; double newNonNullsRowCount = newRowCount * (1.0 - newNullsFraction); // FIXME, weights to average. left and right should be equal in most cases anyway double newAverageRowSize = newNonNullsRowCount == 0 ? 0 : ((totalSizeLeft + totalSizeRight) / newNonNullsRowCount); return SymbolStatsEstimate.builder() .setStatisticsRange(sum) .setAverageRowSize(newAverageRowSize) .setNullsFraction(newNullsFraction) .build(); } }
if (valueStats.isUnknown()) { return PlanNodeStatsEstimate.unknown(); double notNullValuesBeforeIn = input.getOutputRowCount() * (1 - valueStats.getNullsFraction()); Symbol valueSymbol = Symbol.from(node.getValue()); SymbolStatsEstimate newSymbolStats = inEstimate.getSymbolStatistics(valueSymbol) .mapDistinctValuesCount(newDistinctValuesCount -> min(newDistinctValuesCount, valueStats.getDistinctValuesCount())); result.addSymbolStatistics(valueSymbol, newSymbolStats);
@Override public OptionalDouble getValueFromPlanNodeEstimate(PlanNodeStatsEstimate planNodeStatsEstimate, StatsContext statsContext) { return asOptional(getSymbolStatistics(planNodeStatsEstimate, columnName, statsContext).getDistinctValuesCount()); }
public static PlanNodeStatsEstimate groupBy(PlanNodeStatsEstimate sourceStats, Collection<Symbol> groupBySymbols, Map<Symbol, Aggregation> aggregations) { PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.builder(); for (Symbol groupBySymbol : groupBySymbols) { SymbolStatsEstimate symbolStatistics = sourceStats.getSymbolStatistics(groupBySymbol); result.addSymbolStatistics(groupBySymbol, symbolStatistics.mapNullsFraction(nullsFraction -> { if (nullsFraction == 0.0) { return 0.0; } return 1.0 / (symbolStatistics.getDistinctValuesCount() + 1); })); } double rowsCount = 1; for (Symbol groupBySymbol : groupBySymbols) { SymbolStatsEstimate symbolStatistics = sourceStats.getSymbolStatistics(groupBySymbol); int nullRow = (symbolStatistics.getNullsFraction() == 0.0) ? 0 : 1; rowsCount *= symbolStatistics.getDistinctValuesCount() + nullRow; } result.setOutputRowCount(min(rowsCount, sourceStats.getOutputRowCount())); for (Map.Entry<Symbol, Aggregation> aggregationEntry : aggregations.entrySet()) { result.addSymbolStatistics(aggregationEntry.getKey(), estimateAggregationStats(aggregationEntry.getValue(), sourceStats)); } return result.build(); }
private static SymbolStatsEstimate nullStatsEstimate() { return SymbolStatsEstimate.builder() .setDistinctValuesCount(0) .setNullsFraction(1) .build(); } }
for (Symbol groupBySymbol : node.getPartitionBy()) { SymbolStatsEstimate symbolStatistics = sourceStats.getSymbolStatistics(groupBySymbol); int nullRow = (symbolStatistics.getNullsFraction() == 0.0) ? 0 : 1; partitionCount *= symbolStatistics.getDistinctValuesCount() + nullRow; .addSymbolStatistics(node.getRowNumberSymbol(), SymbolStatsEstimate.builder()
@Override protected SymbolStatsEstimate visitArithmeticUnary(ArithmeticUnaryExpression node, Void context) { SymbolStatsEstimate stats = process(node.getValue()); switch (node.getSign()) { case PLUS: return stats; case MINUS: return SymbolStatsEstimate.buildFrom(stats) .setLowValue(-stats.getHighValue()) .setHighValue(-stats.getLowValue()) .build(); default: throw new IllegalStateException("Unexpected sign: " + node.getSign()); } }
@Override protected SymbolStatsEstimate visitFunctionCall(FunctionCall node, Void context) { Map<NodeRef<Expression>, Type> expressionTypes = getExpressionTypes(session, node, types); ExpressionInterpreter interpreter = ExpressionInterpreter.expressionOptimizer(node, metadata, session, expressionTypes); Object value = interpreter.optimize(NoOpSymbolResolver.INSTANCE); if (value == null || value instanceof NullLiteral) { return nullStatsEstimate(); } if (value instanceof Expression && !(value instanceof Literal)) { // value is not a constant return SymbolStatsEstimate.unknown(); } // value is a constant return SymbolStatsEstimate.builder() .setNullsFraction(0) .setDistinctValuesCount(1) .build(); }
@Override public OptionalDouble getValueFromPlanNodeEstimate(PlanNodeStatsEstimate planNodeStatsEstimate, StatsContext statsContext) { return asOptional(getSymbolStatistics(planNodeStatsEstimate, columnName, statsContext).getNullsFraction()); }