public PlanNodeStatsEstimate mapOutputRowCount(Function<Double, Double> mappingFunction) { return buildFrom(this).setOutputRowCount(mappingFunction.apply(outputRowCount)).build(); }
public PlanNodeStatsEstimate mapSymbolColumnStatistics(Symbol symbol, Function<SymbolStatsEstimate, SymbolStatsEstimate> mappingFunction) { return buildFrom(this) .addSymbolStatistics(symbol, mappingFunction.apply(getSymbolStatistics(symbol))) .build(); }
@Override protected Optional<PlanNodeStatsEstimate> doCalculate(EnforceSingleRowNode node, StatsProvider sourceStats, Lookup lookup, Session session, TypeProvider types) { return Optional.of(PlanNodeStatsEstimate.buildFrom(sourceStats.getStats(node.getSource())) .setOutputRowCount(1) .build()); } }
@Override protected Optional<PlanNodeStatsEstimate> doCalculate(LimitNode node, StatsProvider statsProvider, Lookup lookup, Session session, TypeProvider types) { PlanNodeStatsEstimate sourceStats = statsProvider.getStats(node.getSource()); if (sourceStats.getOutputRowCount() <= node.getCount()) { return Optional.of(sourceStats); } // LIMIT actually limits (or when there was no row count estimated for source) return Optional.of(PlanNodeStatsEstimate.buildFrom(sourceStats) .setOutputRowCount(node.getCount()) .build()); } }
private PlanNodeStatsEstimate normalize(PlanNodeStatsEstimate stats, Optional<Collection<Symbol>> outputSymbols, TypeProvider types) { if (stats.isOutputRowCountUnknown()) { return PlanNodeStatsEstimate.unknown(); } PlanNodeStatsEstimate.Builder normalized = PlanNodeStatsEstimate.buildFrom(stats); Predicate<Symbol> symbolFilter = outputSymbols .map(ImmutableSet::copyOf) .map(set -> (Predicate<Symbol>) set::contains) .orElse(symbol -> true); for (Symbol symbol : stats.getSymbolsWithKnownStatistics()) { if (!symbolFilter.test(symbol)) { normalized.removeSymbolStatistics(symbol); continue; } SymbolStatsEstimate symbolStats = stats.getSymbolStatistics(symbol); SymbolStatsEstimate normalizedSymbolStats = stats.getOutputRowCount() == 0 ? SymbolStatsEstimate.zero() : normalizeSymbolStats(symbol, symbolStats, stats, types); if (normalizedSymbolStats.isUnknown()) { normalized.removeSymbolStatistics(symbol); continue; } if (!Objects.equals(normalizedSymbolStats, symbolStats)) { normalized.addSymbolStatistics(symbol, normalizedSymbolStats); } } return normalized.build(); }
return Optional.of(PlanNodeStatsEstimate.buildFrom(sourceStats) .setOutputRowCount(outputRowsCount) .addSymbolStatistics(node.getRowNumberSymbol(), SymbolStatsEstimate.builder()
@Override public Optional<PlanNodeStatsEstimate> calculate(AssignUniqueId assignUniqueId, StatsProvider statsProvider, Lookup lookup, Session session, TypeProvider types) { PlanNodeStatsEstimate sourceStats = statsProvider.getStats(assignUniqueId.getSource()); return Optional.of(PlanNodeStatsEstimate.buildFrom(sourceStats) .addSymbolStatistics(assignUniqueId.getIdColumn(), SymbolStatsEstimate.builder() .setDistinctValuesCount(sourceStats.getOutputRowCount()) .setNullsFraction(0.0) .setAverageRowSize(BIGINT.getFixedSize()) .build()) .build()); } }
private static PlanNodeStatsEstimate estimateExpressionNotEqualToLiteral( PlanNodeStatsEstimate inputStatistics, SymbolStatsEstimate expressionStatistics, Optional<Symbol> expressionSymbol, OptionalDouble literalValue) { StatisticRange expressionRange = StatisticRange.from(expressionStatistics); StatisticRange filterRange; if (literalValue.isPresent()) { filterRange = new StatisticRange(literalValue.getAsDouble(), literalValue.getAsDouble(), 1); } else { filterRange = new StatisticRange(NEGATIVE_INFINITY, POSITIVE_INFINITY, 1); } StatisticRange intersectRange = expressionRange.intersect(filterRange); double filterFactor = 1 - expressionRange.overlapPercentWith(intersectRange); PlanNodeStatsEstimate.Builder estimate = PlanNodeStatsEstimate.buildFrom(inputStatistics); estimate.setOutputRowCount(filterFactor * (1 - expressionStatistics.getNullsFraction()) * inputStatistics.getOutputRowCount()); if (expressionSymbol.isPresent()) { SymbolStatsEstimate symbolNewEstimate = buildFrom(expressionStatistics) .setNullsFraction(0.0) .setDistinctValuesCount(max(expressionStatistics.getDistinctValuesCount() - 1, 0)) .build(); estimate = estimate.addSymbolStatistics(expressionSymbol.get(), symbolNewEstimate); } return estimate.build(); }
PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.buildFrom(input); result.setOutputRowCount(min(inEstimate.getOutputRowCount(), notNullValuesBeforeIn));
@Override protected PlanNodeStatsEstimate visitIsNotNullPredicate(IsNotNullPredicate node, Void context) { if (node.getValue() instanceof SymbolReference) { Symbol symbol = Symbol.from(node.getValue()); SymbolStatsEstimate symbolStats = input.getSymbolStatistics(symbol); PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.buildFrom(input); result.setOutputRowCount(input.getOutputRowCount() * (1 - symbolStats.getNullsFraction())); result.addSymbolStatistics(symbol, symbolStats.mapNullsFraction(x -> 0.0)); return result.build(); } return PlanNodeStatsEstimate.unknown(); }
private static PlanNodeStatsEstimate estimateExpressionNotEqualToExpression( PlanNodeStatsEstimate inputStatistics, SymbolStatsEstimate leftExpressionStatistics, Optional<Symbol> leftExpressionSymbol, SymbolStatsEstimate rightExpressionStatistics, Optional<Symbol> rightExpressionSymbol) { double nullsFilterFactor = (1 - leftExpressionStatistics.getNullsFraction()) * (1 - rightExpressionStatistics.getNullsFraction()); PlanNodeStatsEstimate inputNullsFiltered = inputStatistics.mapOutputRowCount(size -> size * nullsFilterFactor); SymbolStatsEstimate leftNullsFiltered = leftExpressionStatistics.mapNullsFraction(nullsFraction -> 0.0); SymbolStatsEstimate rightNullsFiltered = rightExpressionStatistics.mapNullsFraction(nullsFraction -> 0.0); PlanNodeStatsEstimate equalityStats = estimateExpressionEqualToExpression( inputNullsFiltered, leftNullsFiltered, leftExpressionSymbol, rightNullsFiltered, rightExpressionSymbol); if (equalityStats.isOutputRowCountUnknown()) { return PlanNodeStatsEstimate.unknown(); } PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.buildFrom(inputNullsFiltered); double equalityFilterFactor = equalityStats.getOutputRowCount() / inputNullsFiltered.getOutputRowCount(); if (!isFinite(equalityFilterFactor)) { equalityFilterFactor = 0.0; } result.setOutputRowCount(inputNullsFiltered.getOutputRowCount() * (1 - equalityFilterFactor)); leftExpressionSymbol.ifPresent(symbol -> result.addSymbolStatistics(symbol, leftNullsFiltered)); rightExpressionSymbol.ifPresent(symbol -> result.addSymbolStatistics(symbol, rightNullsFiltered)); return result.build(); }
private static PlanNodeStatsEstimate estimateExpressionEqualToExpression( PlanNodeStatsEstimate inputStatistics, SymbolStatsEstimate leftExpressionStatistics, Optional<Symbol> leftExpressionSymbol, SymbolStatsEstimate rightExpressionStatistics, Optional<Symbol> rightExpressionSymbol) { if (isNaN(leftExpressionStatistics.getDistinctValuesCount()) || isNaN(rightExpressionStatistics.getDistinctValuesCount())) { return PlanNodeStatsEstimate.unknown(); } StatisticRange leftExpressionRange = StatisticRange.from(leftExpressionStatistics); StatisticRange rightExpressionRange = StatisticRange.from(rightExpressionStatistics); StatisticRange intersect = leftExpressionRange.intersect(rightExpressionRange); double nullsFilterFactor = (1 - leftExpressionStatistics.getNullsFraction()) * (1 - rightExpressionStatistics.getNullsFraction()); double leftNdv = leftExpressionRange.getDistinctValuesCount(); double rightNdv = rightExpressionRange.getDistinctValuesCount(); double filterFactor = 1.0 / max(leftNdv, rightNdv, 1); double retainedNdv = min(leftNdv, rightNdv); PlanNodeStatsEstimate.Builder estimate = PlanNodeStatsEstimate.buildFrom(inputStatistics) .setOutputRowCount(inputStatistics.getOutputRowCount() * nullsFilterFactor * filterFactor); SymbolStatsEstimate equalityStats = SymbolStatsEstimate.builder() .setAverageRowSize(averageExcludingNaNs(leftExpressionStatistics.getAverageRowSize(), rightExpressionStatistics.getAverageRowSize())) .setNullsFraction(0) .setStatisticsRange(intersect) .setDistinctValuesCount(retainedNdv) .build(); leftExpressionSymbol.ifPresent(symbol -> estimate.addSymbolStatistics(symbol, equalityStats)); rightExpressionSymbol.ifPresent(symbol -> estimate.addSymbolStatistics(symbol, equalityStats)); return estimate.build(); }
@Override protected PlanNodeStatsEstimate visitIsNullPredicate(IsNullPredicate node, Void context) { if (node.getValue() instanceof SymbolReference) { Symbol symbol = Symbol.from(node.getValue()); SymbolStatsEstimate symbolStats = input.getSymbolStatistics(symbol); PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.buildFrom(input); result.setOutputRowCount(input.getOutputRowCount() * symbolStats.getNullsFraction()); result.addSymbolStatistics(symbol, SymbolStatsEstimate.builder() .setNullsFraction(1.0) .setLowValue(NaN) .setHighValue(NaN) .setDistinctValuesCount(0.0) .build()); return result.build(); } return PlanNodeStatsEstimate.unknown(); }
PlanNodeStatsEstimate.Builder outputStats = PlanNodeStatsEstimate.buildFrom(innerJoinStats); outputStats.setOutputRowCount(outputRowCount);
private PlanNodeStatsEstimate filterByAuxiliaryClause(PlanNodeStatsEstimate stats, EquiJoinClause clause, TypeProvider types) { // we just clear null fraction and adjust ranges here // selectivity is mostly handled by driving clause. We just scale heuristically by UNKNOWN_FILTER_COEFFICIENT here. SymbolStatsEstimate leftStats = stats.getSymbolStatistics(clause.getLeft()); SymbolStatsEstimate rightStats = stats.getSymbolStatistics(clause.getRight()); StatisticRange leftRange = StatisticRange.from(leftStats); StatisticRange rightRange = StatisticRange.from(rightStats); StatisticRange intersect = leftRange.intersect(rightRange); double leftFilterValue = firstNonNaN(leftRange.overlapPercentWith(intersect), 1); double rightFilterValue = firstNonNaN(rightRange.overlapPercentWith(intersect), 1); double leftNdvInRange = leftFilterValue * leftRange.getDistinctValuesCount(); double rightNdvInRange = rightFilterValue * rightRange.getDistinctValuesCount(); double retainedNdv = MoreMath.min(leftNdvInRange, rightNdvInRange); SymbolStatsEstimate newLeftStats = buildFrom(leftStats) .setNullsFraction(0) .setStatisticsRange(intersect) .setDistinctValuesCount(retainedNdv) .build(); SymbolStatsEstimate newRightStats = buildFrom(rightStats) .setNullsFraction(0) .setStatisticsRange(intersect) .setDistinctValuesCount(retainedNdv) .build(); PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.buildFrom(stats) .setOutputRowCount(stats.getOutputRowCount() * UNKNOWN_FILTER_COEFFICIENT) .addSymbolStatistics(clause.getLeft(), newLeftStats) .addSymbolStatistics(clause.getRight(), newRightStats); return normalizer.normalize(result.build(), types); }