return normalizer.normalize(leftStats.mapOutputRowCount(rowCount -> 0.0), types);
@Test public void testLeftJoinComplementStatsWithNoClauses() { PlanNodeStatsEstimate expected = NORMALIZER.normalize(LEFT_STATS.mapOutputRowCount(rowCount -> 0.0), TYPES); PlanNodeStatsEstimate actual = JOIN_STATS_RULE.calculateJoinComplementStats( Optional.empty(), ImmutableList.of(), LEFT_STATS, RIGHT_STATS, TYPES); assertEquals(actual, expected); }
@Override public Optional<PlanNodeStatsEstimate> doCalculate(FilterNode node, StatsProvider statsProvider, Lookup lookup, Session session, TypeProvider types) { PlanNodeStatsEstimate sourceStats = statsProvider.getStats(node.getSource()); PlanNodeStatsEstimate estimate = filterStatsCalculator.filterStats(sourceStats, node.getPredicate(), session, types); if (isDefaultFilterFactorEnabled(session) && estimate.isOutputRowCountUnknown()) { estimate = sourceStats.mapOutputRowCount(sourceRowCount -> sourceStats.getOutputRowCount() * UNKNOWN_FILTER_COEFFICIENT); } return Optional.of(estimate); } }
private PlanNodeStatsEstimate computeInnerJoinStats(JoinNode node, PlanNodeStatsEstimate crossJoinStats, Session session, TypeProvider types) { List<EquiJoinClause> equiJoinCriteria = node.getCriteria(); if (equiJoinCriteria.isEmpty()) { if (!node.getFilter().isPresent()) { return crossJoinStats; } // TODO: this might explode stats return filterStatsCalculator.filterStats(crossJoinStats, node.getFilter().get(), session, types); } PlanNodeStatsEstimate equiJoinEstimate = filterByEquiJoinClauses(crossJoinStats, node.getCriteria(), session, types); if (equiJoinEstimate.isOutputRowCountUnknown()) { return PlanNodeStatsEstimate.unknown(); } if (!node.getFilter().isPresent()) { return equiJoinEstimate; } PlanNodeStatsEstimate filteredEquiJoinEstimate = filterStatsCalculator.filterStats(equiJoinEstimate, node.getFilter().get(), session, types); if (filteredEquiJoinEstimate.isOutputRowCountUnknown()) { return normalizer.normalize(equiJoinEstimate.mapOutputRowCount(rowCount -> rowCount * UNKNOWN_FILTER_COEFFICIENT), types); } return filteredEquiJoinEstimate; }
private Optional<PlanNodeStatsEstimate> calculate(FilterNode filterNode, SemiJoinNode semiJoinNode, StatsProvider statsProvider, Session session, TypeProvider types) { PlanNodeStatsEstimate sourceStats = statsProvider.getStats(semiJoinNode.getSource()); PlanNodeStatsEstimate filteringSourceStats = statsProvider.getStats(semiJoinNode.getFilteringSource()); Symbol filteringSourceJoinSymbol = semiJoinNode.getFilteringSourceJoinSymbol(); Symbol sourceJoinSymbol = semiJoinNode.getSourceJoinSymbol(); Optional<SemiJoinOutputFilter> semiJoinOutputFilter = extractSemiJoinOutputFilter(filterNode.getPredicate(), semiJoinNode.getSemiJoinOutput()); if (!semiJoinOutputFilter.isPresent()) { return Optional.empty(); } PlanNodeStatsEstimate semiJoinStats; if (semiJoinOutputFilter.get().isNegated()) { semiJoinStats = computeAntiJoin(sourceStats, filteringSourceStats, sourceJoinSymbol, filteringSourceJoinSymbol); } else { semiJoinStats = computeSemiJoin(sourceStats, filteringSourceStats, sourceJoinSymbol, filteringSourceJoinSymbol); } if (semiJoinStats.isOutputRowCountUnknown()) { return Optional.of(PlanNodeStatsEstimate.unknown()); } // apply remaining predicate PlanNodeStatsEstimate filteredStats = filterStatsCalculator.filterStats(semiJoinStats, semiJoinOutputFilter.get().getRemainingPredicate(), session, types); if (filteredStats.isOutputRowCountUnknown()) { return Optional.of(semiJoinStats.mapOutputRowCount(rowCount -> rowCount * UNKNOWN_FILTER_COEFFICIENT)); } return Optional.of(filteredStats); }
private static PlanNodeStatsEstimate estimateFilterRange( PlanNodeStatsEstimate inputStatistics, SymbolStatsEstimate expressionStatistics, Optional<Symbol> expressionSymbol, StatisticRange filterRange) { StatisticRange expressionRange = StatisticRange.from(expressionStatistics); StatisticRange intersectRange = expressionRange.intersect(filterRange); double filterFactor = expressionRange.overlapPercentWith(intersectRange); PlanNodeStatsEstimate estimate = inputStatistics.mapOutputRowCount(rowCount -> filterFactor * (1 - expressionStatistics.getNullsFraction()) * rowCount); if (expressionSymbol.isPresent()) { SymbolStatsEstimate symbolNewEstimate = SymbolStatsEstimate.builder() .setAverageRowSize(expressionStatistics.getAverageRowSize()) .setStatisticsRange(intersectRange) .setNullsFraction(0.0) .build(); estimate = estimate.mapSymbolColumnStatistics(expressionSymbol.get(), oldStats -> symbolNewEstimate); } return estimate; }
private static PlanNodeStatsEstimate estimateExpressionNotEqualToExpression( PlanNodeStatsEstimate inputStatistics, SymbolStatsEstimate leftExpressionStatistics, Optional<Symbol> leftExpressionSymbol, SymbolStatsEstimate rightExpressionStatistics, Optional<Symbol> rightExpressionSymbol) { double nullsFilterFactor = (1 - leftExpressionStatistics.getNullsFraction()) * (1 - rightExpressionStatistics.getNullsFraction()); PlanNodeStatsEstimate inputNullsFiltered = inputStatistics.mapOutputRowCount(size -> size * nullsFilterFactor); SymbolStatsEstimate leftNullsFiltered = leftExpressionStatistics.mapNullsFraction(nullsFraction -> 0.0); SymbolStatsEstimate rightNullsFiltered = rightExpressionStatistics.mapNullsFraction(nullsFraction -> 0.0); PlanNodeStatsEstimate equalityStats = estimateExpressionEqualToExpression( inputNullsFiltered, leftNullsFiltered, leftExpressionSymbol, rightNullsFiltered, rightExpressionSymbol); if (equalityStats.isOutputRowCountUnknown()) { return PlanNodeStatsEstimate.unknown(); } PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.buildFrom(inputNullsFiltered); double equalityFilterFactor = equalityStats.getOutputRowCount() / inputNullsFiltered.getOutputRowCount(); if (!isFinite(equalityFilterFactor)) { equalityFilterFactor = 0.0; } result.setOutputRowCount(inputNullsFiltered.getOutputRowCount() * (1 - equalityFilterFactor)); leftExpressionSymbol.ifPresent(symbol -> result.addSymbolStatistics(symbol, leftNullsFiltered)); rightExpressionSymbol.ifPresent(symbol -> result.addSymbolStatistics(symbol, rightNullsFiltered)); return result.build(); }
@Test public void testLeftJoinComplementStatsWithMultipleClauses() { PlanNodeStatsEstimate expected = planNodeStats( LEFT_ROWS_COUNT * (LEFT_JOIN_COLUMN_NULLS + LEFT_JOIN_COLUMN_NON_NULLS / 4), symbolStatistics(LEFT_JOIN_COLUMN, 0.0, 20.0, LEFT_JOIN_COLUMN_NULLS / (LEFT_JOIN_COLUMN_NULLS + LEFT_JOIN_COLUMN_NON_NULLS / 4), 5), LEFT_OTHER_COLUMN_STATS) .mapOutputRowCount(rowCount -> rowCount / UNKNOWN_FILTER_COEFFICIENT); PlanNodeStatsEstimate actual = JOIN_STATS_RULE.calculateJoinComplementStats( Optional.empty(), ImmutableList.of(new EquiJoinClause(new Symbol(LEFT_JOIN_COLUMN), new Symbol(RIGHT_JOIN_COLUMN)), new EquiJoinClause(new Symbol(LEFT_OTHER_COLUMN), new Symbol(RIGHT_OTHER_COLUMN))), LEFT_STATS, RIGHT_STATS, TYPES); assertEquals(actual, expected); }
.setDistinctValuesCount(leftNDV - matchingRightNDV) .build()); result = result.mapOutputRowCount(rowCount -> rowCount * scaleFactor); .setDistinctValuesCount(0.0) .build()); result = result.mapOutputRowCount(rowCount -> rowCount * leftColumnStats.getNullsFraction()); result = result.mapOutputRowCount(rowCount -> min(leftStats.getOutputRowCount(), rowCount / Math.pow(UNKNOWN_FILTER_COEFFICIENT, numberOfRemainingClauses)));
private PlanNodeStatsEstimate estimateLogicalAnd(Expression left, Expression right) { // first try to estimate in the fair way PlanNodeStatsEstimate leftEstimate = process(left); if (!leftEstimate.isOutputRowCountUnknown()) { PlanNodeStatsEstimate logicalAndEstimate = new FilterExpressionStatsCalculatingVisitor(leftEstimate, session, types).process(right); if (!logicalAndEstimate.isOutputRowCountUnknown()) { return logicalAndEstimate; } } // If some of the filters cannot be estimated, take the smallest estimate. // Apply 0.9 filter factor as "unknown filter" factor. PlanNodeStatsEstimate rightEstimate = process(right); PlanNodeStatsEstimate smallestKnownEstimate; if (leftEstimate.isOutputRowCountUnknown()) { smallestKnownEstimate = rightEstimate; } else if (rightEstimate.isOutputRowCountUnknown()) { smallestKnownEstimate = leftEstimate; } else { smallestKnownEstimate = leftEstimate.getOutputRowCount() <= rightEstimate.getOutputRowCount() ? leftEstimate : rightEstimate; } if (smallestKnownEstimate.isOutputRowCountUnknown()) { return PlanNodeStatsEstimate.unknown(); } return smallestKnownEstimate.mapOutputRowCount(rowCount -> rowCount * UNKNOWN_FILTER_COEFFICIENT); }