public PlanNodeStatsEstimate normalize(PlanNodeStatsEstimate stats, TypeProvider types) { return normalize(stats, Optional.empty(), types); }
public PlanNodeStatsEstimate normalize(PlanNodeStatsEstimate stats, Collection<Symbol> outputSymbols, TypeProvider types) { return normalize(stats, Optional.of(outputSymbols), types); }
@Override public PlanNodeStatsEstimate process(Node node, @Nullable Void context) { return normalizer.normalize(super.process(node, context), types); }
@Override public final Optional<PlanNodeStatsEstimate> calculate(T node, StatsProvider sourceStats, Lookup lookup, Session session, TypeProvider types) { return doCalculate(node, sourceStats, lookup, session, types) .map(estimate -> normalizer.normalize(estimate, node.getOutputSymbols(), types)); }
return normalizer.normalize(leftStats.mapOutputRowCount(rowCount -> 0.0), types); .filter(estimate -> !estimate.isOutputRowCountUnknown()) .max(comparingDouble(PlanNodeStatsEstimate::getOutputRowCount)) .map(estimate -> normalizer.normalize(estimate, types)) .orElse(PlanNodeStatsEstimate.unknown());
@Test public void testLeftJoinComplementStatsWithNoClauses() { PlanNodeStatsEstimate expected = NORMALIZER.normalize(LEFT_STATS.mapOutputRowCount(rowCount -> 0.0), TYPES); PlanNodeStatsEstimate actual = JOIN_STATS_RULE.calculateJoinComplementStats( Optional.empty(), ImmutableList.of(), LEFT_STATS, RIGHT_STATS, TYPES); assertEquals(actual, expected); }
private PlanNodeStatsAssertion assertNormalized(PlanNodeStatsEstimate estimate, TypeProvider types) { PlanNodeStatsEstimate normalized = normalizer.normalize(estimate, estimate.getSymbolsWithKnownStatistics(), types); return PlanNodeStatsAssertion.assertThat(normalized); }
private PlanNodeStatsEstimate computeInnerJoinStats(JoinNode node, PlanNodeStatsEstimate crossJoinStats, Session session, TypeProvider types) { List<EquiJoinClause> equiJoinCriteria = node.getCriteria(); if (equiJoinCriteria.isEmpty()) { if (!node.getFilter().isPresent()) { return crossJoinStats; } // TODO: this might explode stats return filterStatsCalculator.filterStats(crossJoinStats, node.getFilter().get(), session, types); } PlanNodeStatsEstimate equiJoinEstimate = filterByEquiJoinClauses(crossJoinStats, node.getCriteria(), session, types); if (equiJoinEstimate.isOutputRowCountUnknown()) { return PlanNodeStatsEstimate.unknown(); } if (!node.getFilter().isPresent()) { return equiJoinEstimate; } PlanNodeStatsEstimate filteredEquiJoinEstimate = filterStatsCalculator.filterStats(equiJoinEstimate, node.getFilter().get(), session, types); if (filteredEquiJoinEstimate.isOutputRowCountUnknown()) { return normalizer.normalize(equiJoinEstimate.mapOutputRowCount(rowCount -> rowCount * UNKNOWN_FILTER_COEFFICIENT), types); } return filteredEquiJoinEstimate; }
private PlanNodeStatsEstimate crossJoinStats(JoinNode node, PlanNodeStatsEstimate leftStats, PlanNodeStatsEstimate rightStats, TypeProvider types) { PlanNodeStatsEstimate.Builder builder = PlanNodeStatsEstimate.builder() .setOutputRowCount(leftStats.getOutputRowCount() * rightStats.getOutputRowCount()); node.getLeft().getOutputSymbols().forEach(symbol -> builder.addSymbolStatistics(symbol, leftStats.getSymbolStatistics(symbol))); node.getRight().getOutputSymbols().forEach(symbol -> builder.addSymbolStatistics(symbol, rightStats.getSymbolStatistics(symbol))); return normalizer.normalize(builder.build(), types); }
private static void checkConsistent(StatsNormalizer normalizer, String source, PlanNodeStatsEstimate stats, Collection<Symbol> outputSymbols, TypeProvider types) PlanNodeStatsEstimate normalized = normalizer.normalize(stats, outputSymbols, types); if (Objects.equals(stats, normalized)) { return;
@Test public void testRightJoinComplementStats() { PlanNodeStatsEstimate expected = NORMALIZER.normalize( planNodeStats( RIGHT_ROWS_COUNT * RIGHT_JOIN_COLUMN_NULLS, symbolStatistics(RIGHT_JOIN_COLUMN, NaN, NaN, 1.0, 0), RIGHT_OTHER_COLUMN_STATS), TYPES); PlanNodeStatsEstimate actual = JOIN_STATS_RULE.calculateJoinComplementStats( Optional.empty(), ImmutableList.of(new EquiJoinClause(new Symbol(RIGHT_JOIN_COLUMN), new Symbol(LEFT_JOIN_COLUMN))), RIGHT_STATS, LEFT_STATS, TYPES); assertEquals(actual, expected); }
@Test public void testDropNonOutputSymbols() { Symbol a = new Symbol("a"); Symbol b = new Symbol("b"); Symbol c = new Symbol("c"); PlanNodeStatsEstimate estimate = PlanNodeStatsEstimate.builder() .setOutputRowCount(40) .addSymbolStatistics(a, SymbolStatsEstimate.builder().setDistinctValuesCount(20).build()) .addSymbolStatistics(b, SymbolStatsEstimate.builder().setDistinctValuesCount(30).build()) .addSymbolStatistics(c, SymbolStatsEstimate.unknown()) .build(); PlanNodeStatsAssertion.assertThat(normalizer.normalize(estimate, ImmutableList.of(b, c), TypeProvider.copyOf(ImmutableMap.of(b, BIGINT, c, BIGINT)))) .symbolsWithKnownStats(b) .symbolStats(b, symbolAssert -> symbolAssert.distinctValuesCount(30)); }
private PlanNodeStatsEstimate filterByAuxiliaryClause(PlanNodeStatsEstimate stats, EquiJoinClause clause, TypeProvider types) { // we just clear null fraction and adjust ranges here // selectivity is mostly handled by driving clause. We just scale heuristically by UNKNOWN_FILTER_COEFFICIENT here. SymbolStatsEstimate leftStats = stats.getSymbolStatistics(clause.getLeft()); SymbolStatsEstimate rightStats = stats.getSymbolStatistics(clause.getRight()); StatisticRange leftRange = StatisticRange.from(leftStats); StatisticRange rightRange = StatisticRange.from(rightStats); StatisticRange intersect = leftRange.intersect(rightRange); double leftFilterValue = firstNonNaN(leftRange.overlapPercentWith(intersect), 1); double rightFilterValue = firstNonNaN(rightRange.overlapPercentWith(intersect), 1); double leftNdvInRange = leftFilterValue * leftRange.getDistinctValuesCount(); double rightNdvInRange = rightFilterValue * rightRange.getDistinctValuesCount(); double retainedNdv = MoreMath.min(leftNdvInRange, rightNdvInRange); SymbolStatsEstimate newLeftStats = buildFrom(leftStats) .setNullsFraction(0) .setStatisticsRange(intersect) .setDistinctValuesCount(retainedNdv) .build(); SymbolStatsEstimate newRightStats = buildFrom(rightStats) .setNullsFraction(0) .setStatisticsRange(intersect) .setDistinctValuesCount(retainedNdv) .build(); PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.buildFrom(stats) .setOutputRowCount(stats.getOutputRowCount() * UNKNOWN_FILTER_COEFFICIENT) .addSymbolStatistics(clause.getLeft(), newLeftStats) .addSymbolStatistics(clause.getRight(), newRightStats); return normalizer.normalize(result.build(), types); }