public Builder setStatisticsRange(StatisticRange range) { return setLowValue(range.getLow()) .setHighValue(range.getHigh()) .setDistinctValuesCount(range.getDistinctValuesCount()); }
private static PlanNodeStatsEstimate estimateExpressionEqualToExpression( PlanNodeStatsEstimate inputStatistics, SymbolStatsEstimate leftExpressionStatistics, Optional<Symbol> leftExpressionSymbol, SymbolStatsEstimate rightExpressionStatistics, Optional<Symbol> rightExpressionSymbol) { if (isNaN(leftExpressionStatistics.getDistinctValuesCount()) || isNaN(rightExpressionStatistics.getDistinctValuesCount())) { return PlanNodeStatsEstimate.unknown(); } StatisticRange leftExpressionRange = StatisticRange.from(leftExpressionStatistics); StatisticRange rightExpressionRange = StatisticRange.from(rightExpressionStatistics); StatisticRange intersect = leftExpressionRange.intersect(rightExpressionRange); double nullsFilterFactor = (1 - leftExpressionStatistics.getNullsFraction()) * (1 - rightExpressionStatistics.getNullsFraction()); double leftNdv = leftExpressionRange.getDistinctValuesCount(); double rightNdv = rightExpressionRange.getDistinctValuesCount(); double filterFactor = 1.0 / max(leftNdv, rightNdv, 1); double retainedNdv = min(leftNdv, rightNdv); PlanNodeStatsEstimate.Builder estimate = PlanNodeStatsEstimate.buildFrom(inputStatistics) .setOutputRowCount(inputStatistics.getOutputRowCount() * nullsFilterFactor * filterFactor); SymbolStatsEstimate equalityStats = SymbolStatsEstimate.builder() .setAverageRowSize(averageExcludingNaNs(leftExpressionStatistics.getAverageRowSize(), rightExpressionStatistics.getAverageRowSize())) .setNullsFraction(0) .setStatisticsRange(intersect) .setDistinctValuesCount(retainedNdv) .build(); leftExpressionSymbol.ifPresent(symbol -> estimate.addSymbolStatistics(symbol, equalityStats)); rightExpressionSymbol.ifPresent(symbol -> estimate.addSymbolStatistics(symbol, equalityStats)); return estimate.build(); }
private PlanNodeStatsEstimate filterByAuxiliaryClause(PlanNodeStatsEstimate stats, EquiJoinClause clause, TypeProvider types) { // we just clear null fraction and adjust ranges here // selectivity is mostly handled by driving clause. We just scale heuristically by UNKNOWN_FILTER_COEFFICIENT here. SymbolStatsEstimate leftStats = stats.getSymbolStatistics(clause.getLeft()); SymbolStatsEstimate rightStats = stats.getSymbolStatistics(clause.getRight()); StatisticRange leftRange = StatisticRange.from(leftStats); StatisticRange rightRange = StatisticRange.from(rightStats); StatisticRange intersect = leftRange.intersect(rightRange); double leftFilterValue = firstNonNaN(leftRange.overlapPercentWith(intersect), 1); double rightFilterValue = firstNonNaN(rightRange.overlapPercentWith(intersect), 1); double leftNdvInRange = leftFilterValue * leftRange.getDistinctValuesCount(); double rightNdvInRange = rightFilterValue * rightRange.getDistinctValuesCount(); double retainedNdv = MoreMath.min(leftNdvInRange, rightNdvInRange); SymbolStatsEstimate newLeftStats = buildFrom(leftStats) .setNullsFraction(0) .setStatisticsRange(intersect) .setDistinctValuesCount(retainedNdv) .build(); SymbolStatsEstimate newRightStats = buildFrom(rightStats) .setNullsFraction(0) .setStatisticsRange(intersect) .setDistinctValuesCount(retainedNdv) .build(); PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.buildFrom(stats) .setOutputRowCount(stats.getOutputRowCount() * UNKNOWN_FILTER_COEFFICIENT) .addSymbolStatistics(clause.getLeft(), newLeftStats) .addSymbolStatistics(clause.getRight(), newRightStats); return normalizer.normalize(result.build(), types); }
@Test public void testOverlapPercentWith() { StatisticRange zeroToTen = range(0, 10, 10); StatisticRange empty = StatisticRange.empty(); // Equal ranges assertOverlap(zeroToTen, range(0, 10, 5), 1); assertOverlap(zeroToTen, range(0, 10, 20), 1); assertOverlap(zeroToTen, range(0, 10, 20), 1); // Some overlap assertOverlap(zeroToTen, range(5, 3000, 3), 0.5); // Single value overlap assertOverlap(zeroToTen, range(3, 3, 1), 1 / zeroToTen.getDistinctValuesCount()); assertOverlap(zeroToTen, range(10, 100, 357), 1 / zeroToTen.getDistinctValuesCount()); // No overlap assertOverlap(zeroToTen, range(20, 30, 10), 0); // Empty ranges assertOverlap(zeroToTen, empty, 0); assertOverlap(empty, zeroToTen, 0); // no test for empty, empty) since any return value is correct assertOverlap(unboundedRange(10), empty, 0); // Unbounded (infinite), NDV-based assertOverlap(unboundedRange(10), unboundedRange(20), 1); assertOverlap(unboundedRange(20), unboundedRange(10), 0.5); assertOverlap(unboundedRange(0.1), unboundedRange(1), 1); assertOverlap(unboundedRange(0.0), unboundedRange(1), 0); assertOverlap(unboundedRange(0.0), unboundedRange(0), 0); }