public StatisticRange addAndCollapseDistinctValues(StatisticRange other) { double overlapPercentOfThis = this.overlapPercentWith(other); double overlapPercentOfOther = other.overlapPercentWith(this); double overlapDistinctValuesThis = overlapPercentOfThis * distinctValues; double overlapDistinctValuesOther = overlapPercentOfOther * other.distinctValues; double maxOverlappingValues = max(overlapDistinctValuesThis, overlapDistinctValuesOther); double newDistinctValues = maxOverlappingValues + (1 - overlapPercentOfThis) * distinctValues + (1 - overlapPercentOfOther) * other.distinctValues; return new StatisticRange(minExcludeNaN(low, other.low), maxExcludeNaN(high, other.high), newDistinctValues); }
private double overlappingDistinctValues(StatisticRange other) { double overlapPercentOfLeft = overlapPercentWith(other); double overlapPercentOfRight = other.overlapPercentWith(this); double overlapDistinctValuesLeft = overlapPercentOfLeft * distinctValues; double overlapDistinctValuesRight = overlapPercentOfRight * other.distinctValues; double minInputDistinctValues = minExcludeNaN(this.distinctValues, other.distinctValues); return minExcludeNaN(minInputDistinctValues, maxExcludeNaN(overlapDistinctValuesLeft, overlapDistinctValuesRight)); }
private static void assertOverlap(StatisticRange a, StatisticRange b, double expected) { assertEstimateEquals(a.overlapPercentWith(b), expected, "overlapPercentWith"); } }
private static PlanNodeStatsEstimate estimateFilterRange( PlanNodeStatsEstimate inputStatistics, SymbolStatsEstimate expressionStatistics, Optional<Symbol> expressionSymbol, StatisticRange filterRange) { StatisticRange expressionRange = StatisticRange.from(expressionStatistics); StatisticRange intersectRange = expressionRange.intersect(filterRange); double filterFactor = expressionRange.overlapPercentWith(intersectRange); PlanNodeStatsEstimate estimate = inputStatistics.mapOutputRowCount(rowCount -> filterFactor * (1 - expressionStatistics.getNullsFraction()) * rowCount); if (expressionSymbol.isPresent()) { SymbolStatsEstimate symbolNewEstimate = SymbolStatsEstimate.builder() .setAverageRowSize(expressionStatistics.getAverageRowSize()) .setStatisticsRange(intersectRange) .setNullsFraction(0.0) .build(); estimate = estimate.mapSymbolColumnStatistics(expressionSymbol.get(), oldStats -> symbolNewEstimate); } return estimate; }
private static PlanNodeStatsEstimate estimateExpressionNotEqualToLiteral( PlanNodeStatsEstimate inputStatistics, SymbolStatsEstimate expressionStatistics, Optional<Symbol> expressionSymbol, OptionalDouble literalValue) { StatisticRange expressionRange = StatisticRange.from(expressionStatistics); StatisticRange filterRange; if (literalValue.isPresent()) { filterRange = new StatisticRange(literalValue.getAsDouble(), literalValue.getAsDouble(), 1); } else { filterRange = new StatisticRange(NEGATIVE_INFINITY, POSITIVE_INFINITY, 1); } StatisticRange intersectRange = expressionRange.intersect(filterRange); double filterFactor = 1 - expressionRange.overlapPercentWith(intersectRange); PlanNodeStatsEstimate.Builder estimate = PlanNodeStatsEstimate.buildFrom(inputStatistics); estimate.setOutputRowCount(filterFactor * (1 - expressionStatistics.getNullsFraction()) * inputStatistics.getOutputRowCount()); if (expressionSymbol.isPresent()) { SymbolStatsEstimate symbolNewEstimate = buildFrom(expressionStatistics) .setNullsFraction(0.0) .setDistinctValuesCount(max(expressionStatistics.getDistinctValuesCount() - 1, 0)) .build(); estimate = estimate.addSymbolStatistics(expressionSymbol.get(), symbolNewEstimate); } return estimate.build(); }
private PlanNodeStatsEstimate filterByAuxiliaryClause(PlanNodeStatsEstimate stats, EquiJoinClause clause, TypeProvider types) { // we just clear null fraction and adjust ranges here // selectivity is mostly handled by driving clause. We just scale heuristically by UNKNOWN_FILTER_COEFFICIENT here. SymbolStatsEstimate leftStats = stats.getSymbolStatistics(clause.getLeft()); SymbolStatsEstimate rightStats = stats.getSymbolStatistics(clause.getRight()); StatisticRange leftRange = StatisticRange.from(leftStats); StatisticRange rightRange = StatisticRange.from(rightStats); StatisticRange intersect = leftRange.intersect(rightRange); double leftFilterValue = firstNonNaN(leftRange.overlapPercentWith(intersect), 1); double rightFilterValue = firstNonNaN(rightRange.overlapPercentWith(intersect), 1); double leftNdvInRange = leftFilterValue * leftRange.getDistinctValuesCount(); double rightNdvInRange = rightFilterValue * rightRange.getDistinctValuesCount(); double retainedNdv = MoreMath.min(leftNdvInRange, rightNdvInRange); SymbolStatsEstimate newLeftStats = buildFrom(leftStats) .setNullsFraction(0) .setStatisticsRange(intersect) .setDistinctValuesCount(retainedNdv) .build(); SymbolStatsEstimate newRightStats = buildFrom(rightStats) .setNullsFraction(0) .setStatisticsRange(intersect) .setDistinctValuesCount(retainedNdv) .build(); PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.buildFrom(stats) .setOutputRowCount(stats.getOutputRowCount() * UNKNOWN_FILTER_COEFFICIENT) .addSymbolStatistics(clause.getLeft(), newLeftStats) .addSymbolStatistics(clause.getRight(), newRightStats); return normalizer.normalize(result.build(), types); }