private SymbolStatsEstimate zeroNullsFraction(SymbolStatsEstimate symbolStats) { return symbolStats.mapNullsFraction(fraction -> 0.0); }
private SymbolStatsEstimate capNDV(SymbolStatsEstimate symbolStats, double rowCount) { double ndv = symbolStats.getDistinctValuesCount(); double nulls = symbolStats.getNullsFraction(); if (isNaN(ndv) || isNaN(rowCount) || isNaN(nulls)) { return symbolStats; } if (ndv <= rowCount * (1 - nulls)) { return symbolStats; } return symbolStats .mapDistinctValuesCount(n -> (min(ndv, rowCount) + rowCount * (1 - nulls)) / 2) .mapNullsFraction(n -> nulls / 2); }
public static PlanNodeStatsEstimate groupBy(PlanNodeStatsEstimate sourceStats, Collection<Symbol> groupBySymbols, Map<Symbol, Aggregation> aggregations) { PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.builder(); for (Symbol groupBySymbol : groupBySymbols) { SymbolStatsEstimate symbolStatistics = sourceStats.getSymbolStatistics(groupBySymbol); result.addSymbolStatistics(groupBySymbol, symbolStatistics.mapNullsFraction(nullsFraction -> { if (nullsFraction == 0.0) { return 0.0; } return 1.0 / (symbolStatistics.getDistinctValuesCount() + 1); })); } double rowsCount = 1; for (Symbol groupBySymbol : groupBySymbols) { SymbolStatsEstimate symbolStatistics = sourceStats.getSymbolStatistics(groupBySymbol); int nullRow = (symbolStatistics.getNullsFraction() == 0.0) ? 0 : 1; rowsCount *= symbolStatistics.getDistinctValuesCount() + nullRow; } result.setOutputRowCount(min(rowsCount, sourceStats.getOutputRowCount())); for (Map.Entry<Symbol, Aggregation> aggregationEntry : aggregations.entrySet()) { result.addSymbolStatistics(aggregationEntry.getKey(), estimateAggregationStats(aggregationEntry.getValue(), sourceStats)); } return result.build(); }
private static PlanNodeStatsEstimate estimateExpressionNotEqualToExpression( PlanNodeStatsEstimate inputStatistics, SymbolStatsEstimate leftExpressionStatistics, Optional<Symbol> leftExpressionSymbol, SymbolStatsEstimate rightExpressionStatistics, Optional<Symbol> rightExpressionSymbol) { double nullsFilterFactor = (1 - leftExpressionStatistics.getNullsFraction()) * (1 - rightExpressionStatistics.getNullsFraction()); PlanNodeStatsEstimate inputNullsFiltered = inputStatistics.mapOutputRowCount(size -> size * nullsFilterFactor); SymbolStatsEstimate leftNullsFiltered = leftExpressionStatistics.mapNullsFraction(nullsFraction -> 0.0); SymbolStatsEstimate rightNullsFiltered = rightExpressionStatistics.mapNullsFraction(nullsFraction -> 0.0); PlanNodeStatsEstimate equalityStats = estimateExpressionEqualToExpression( inputNullsFiltered, leftNullsFiltered, leftExpressionSymbol, rightNullsFiltered, rightExpressionSymbol); if (equalityStats.isOutputRowCountUnknown()) { return PlanNodeStatsEstimate.unknown(); } PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.buildFrom(inputNullsFiltered); double equalityFilterFactor = equalityStats.getOutputRowCount() / inputNullsFiltered.getOutputRowCount(); if (!isFinite(equalityFilterFactor)) { equalityFilterFactor = 0.0; } result.setOutputRowCount(inputNullsFiltered.getOutputRowCount() * (1 - equalityFilterFactor)); leftExpressionSymbol.ifPresent(symbol -> result.addSymbolStatistics(symbol, leftNullsFiltered)); rightExpressionSymbol.ifPresent(symbol -> result.addSymbolStatistics(symbol, rightNullsFiltered)); return result.build(); }
@Override protected PlanNodeStatsEstimate visitIsNotNullPredicate(IsNotNullPredicate node, Void context) { if (node.getValue() instanceof SymbolReference) { Symbol symbol = Symbol.from(node.getValue()); SymbolStatsEstimate symbolStats = input.getSymbolStatistics(symbol); PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.buildFrom(input); result.setOutputRowCount(input.getOutputRowCount() * (1 - symbolStats.getNullsFraction())); result.addSymbolStatistics(symbol, symbolStats.mapNullsFraction(x -> 0.0)); return result.build(); } return PlanNodeStatsEstimate.unknown(); }
SymbolStatsEstimate innerJoinSymbolStats = innerJoinStats.getSymbolStatistics(symbol); double newNullsFraction = (innerJoinSymbolStats.getNullsFraction() * innerJoinRowCount + joinComplementRowCount) / outputRowCount; outputStats.addSymbolStatistics(symbol, innerJoinSymbolStats.mapNullsFraction(nullsFraction -> newNullsFraction));