private PlanNodeStatsEstimate computeFullJoinStats( JoinNode node, PlanNodeStatsEstimate leftStats, PlanNodeStatsEstimate rightStats, PlanNodeStatsEstimate crossJoinStats, Session session, TypeProvider types) { PlanNodeStatsEstimate rightJoinComplementStats = calculateJoinComplementStats(node.getFilter(), flippedCriteria(node), rightStats, leftStats, types); return addJoinComplementStats( rightStats, computeLeftJoinStats(node, leftStats, rightStats, crossJoinStats, session, types), rightJoinComplementStats); }
@Override protected Optional<PlanNodeStatsEstimate> doCalculate(JoinNode node, StatsProvider sourceStats, Lookup lookup, Session session, TypeProvider types) { PlanNodeStatsEstimate leftStats = sourceStats.getStats(node.getLeft()); PlanNodeStatsEstimate rightStats = sourceStats.getStats(node.getRight()); PlanNodeStatsEstimate crossJoinStats = crossJoinStats(node, leftStats, rightStats, types); switch (node.getType()) { case INNER: return Optional.of(computeInnerJoinStats(node, crossJoinStats, session, types)); case LEFT: return Optional.of(computeLeftJoinStats(node, leftStats, rightStats, crossJoinStats, session, types)); case RIGHT: return Optional.of(computeRightJoinStats(node, leftStats, rightStats, crossJoinStats, session, types)); case FULL: return Optional.of(computeFullJoinStats(node, leftStats, rightStats, crossJoinStats, session, types)); default: throw new IllegalStateException("Unknown join type: " + node.getType()); } }
private PlanNodeStatsEstimate computeRightJoinStats( JoinNode node, PlanNodeStatsEstimate leftStats, PlanNodeStatsEstimate rightStats, PlanNodeStatsEstimate crossJoinStats, Session session, TypeProvider types) { PlanNodeStatsEstimate innerJoinStats = computeInnerJoinStats(node, crossJoinStats, session, types); PlanNodeStatsEstimate rightJoinComplementStats = calculateJoinComplementStats(node.getFilter(), flippedCriteria(node), rightStats, leftStats, types); return addJoinComplementStats( rightStats, innerJoinStats, rightJoinComplementStats); }
private PlanNodeStatsEstimate computeLeftJoinStats( JoinNode node, PlanNodeStatsEstimate leftStats, PlanNodeStatsEstimate rightStats, PlanNodeStatsEstimate crossJoinStats, Session session, TypeProvider types) { PlanNodeStatsEstimate innerJoinStats = computeInnerJoinStats(node, crossJoinStats, session, types); PlanNodeStatsEstimate leftJoinComplementStats = calculateJoinComplementStats(node.getFilter(), node.getCriteria(), leftStats, rightStats, types); return addJoinComplementStats( leftStats, innerJoinStats, leftJoinComplementStats); }
.map(drivingClause -> calculateJoinComplementStats(leftStats, rightStats, drivingClause, criteria.size() - 1 + numberOfFilterClauses)) .filter(estimate -> !estimate.isOutputRowCountUnknown()) .max(comparingDouble(PlanNodeStatsEstimate::getOutputRowCount))
@Test public void testAddJoinComplementStats() { double statsToAddNdv = 5; PlanNodeStatsEstimate statsToAdd = planNodeStats(RIGHT_ROWS_COUNT, symbolStatistics(LEFT_JOIN_COLUMN, 0.0, 5.0, 0.2, statsToAddNdv)); PlanNodeStatsEstimate addedStats = planNodeStats(TOTAL_ROWS_COUNT, symbolStatistics(LEFT_JOIN_COLUMN, 0.0, 20.0, (LEFT_ROWS_COUNT * LEFT_JOIN_COLUMN_NULLS + RIGHT_ROWS_COUNT * 0.2) / TOTAL_ROWS_COUNT, LEFT_JOIN_COLUMN_NDV), symbolStatistics(LEFT_OTHER_COLUMN, 42, 42, (0.42 * LEFT_ROWS_COUNT + RIGHT_ROWS_COUNT) / TOTAL_ROWS_COUNT, 1)); assertThat(JOIN_STATS_RULE.addJoinComplementStats( LEFT_STATS, LEFT_STATS, statsToAdd)) .equalTo(addedStats); }
private PlanNodeStatsEstimate filterByEquiJoinClauses( PlanNodeStatsEstimate stats, Collection<EquiJoinClause> clauses, Session session, TypeProvider types) { checkArgument(!clauses.isEmpty(), "clauses is empty"); PlanNodeStatsEstimate result = PlanNodeStatsEstimate.unknown(); // Join equality clauses are usually correlated. Therefore we shouldn't treat each join equality // clause separately because stats estimates would be way off. Instead we choose so called // "driving clause" which mostly reduces join output rows cardinality and apply UNKNOWN_FILTER_COEFFICIENT // for other (auxiliary) clauses. Queue<EquiJoinClause> remainingClauses = new LinkedList<>(clauses); EquiJoinClause drivingClause = remainingClauses.poll(); for (int i = 0; i < clauses.size(); i++) { PlanNodeStatsEstimate estimate = filterByEquiJoinClauses(stats, drivingClause, remainingClauses, session, types); if (result.isOutputRowCountUnknown() || (!estimate.isOutputRowCountUnknown() && estimate.getOutputRowCount() < result.getOutputRowCount())) { result = estimate; } remainingClauses.add(drivingClause); drivingClause = remainingClauses.poll(); } return result; }
@Provides @Singleton public static StatsCalculator createNewStatsCalculator(Metadata metadata) { StatsNormalizer normalizer = new StatsNormalizer(); ScalarStatsCalculator scalarStatsCalculator = new ScalarStatsCalculator(metadata); FilterStatsCalculator filterStatsCalculator = new FilterStatsCalculator(metadata, scalarStatsCalculator, normalizer); ImmutableList.Builder<ComposableStatsCalculator.Rule<?>> rules = ImmutableList.builder(); rules.add(new OutputStatsRule()); rules.add(new TableScanStatsRule(metadata, normalizer)); rules.add(new SimpleFilterProjectSemiJoinStatsRule(normalizer, filterStatsCalculator)); // this must be before FilterStatsRule rules.add(new FilterStatsRule(normalizer, filterStatsCalculator)); rules.add(new ValuesStatsRule(metadata)); rules.add(new LimitStatsRule(normalizer)); rules.add(new EnforceSingleRowStatsRule(normalizer)); rules.add(new ProjectStatsRule(scalarStatsCalculator, normalizer)); rules.add(new ExchangeStatsRule(normalizer)); rules.add(new JoinStatsRule(filterStatsCalculator, normalizer)); rules.add(new SpatialJoinStatsRule(filterStatsCalculator, normalizer)); rules.add(new AggregationStatsRule(normalizer)); rules.add(new UnionStatsRule(normalizer)); rules.add(new AssignUniqueIdStatsRule()); rules.add(new SemiJoinStatsRule()); rules.add(new RowNumberStatsRule(normalizer)); return new ComposableStatsCalculator(rules.build()); } }
private PlanNodeStatsEstimate filterByEquiJoinClauses( PlanNodeStatsEstimate stats, EquiJoinClause drivingClause, Collection<EquiJoinClause> remainingClauses, Session session, TypeProvider types) { ComparisonExpression drivingPredicate = new ComparisonExpression(EQUAL, drivingClause.getLeft().toSymbolReference(), drivingClause.getRight().toSymbolReference()); PlanNodeStatsEstimate filteredStats = filterStatsCalculator.filterStats(stats, drivingPredicate, session, types); for (EquiJoinClause clause : remainingClauses) { filteredStats = filterByAuxiliaryClause(filteredStats, clause, types); } return filteredStats; }
@Test public void testLeftJoinComplementStatsWithNoClauses() { PlanNodeStatsEstimate expected = NORMALIZER.normalize(LEFT_STATS.mapOutputRowCount(rowCount -> 0.0), TYPES); PlanNodeStatsEstimate actual = JOIN_STATS_RULE.calculateJoinComplementStats( Optional.empty(), ImmutableList.of(), LEFT_STATS, RIGHT_STATS, TYPES); assertEquals(actual, expected); }
private PlanNodeStatsEstimate computeInnerJoinStats(JoinNode node, PlanNodeStatsEstimate crossJoinStats, Session session, TypeProvider types) { List<EquiJoinClause> equiJoinCriteria = node.getCriteria(); if (equiJoinCriteria.isEmpty()) { if (!node.getFilter().isPresent()) { return crossJoinStats; } // TODO: this might explode stats return filterStatsCalculator.filterStats(crossJoinStats, node.getFilter().get(), session, types); } PlanNodeStatsEstimate equiJoinEstimate = filterByEquiJoinClauses(crossJoinStats, node.getCriteria(), session, types); if (equiJoinEstimate.isOutputRowCountUnknown()) { return PlanNodeStatsEstimate.unknown(); } if (!node.getFilter().isPresent()) { return equiJoinEstimate; } PlanNodeStatsEstimate filteredEquiJoinEstimate = filterStatsCalculator.filterStats(equiJoinEstimate, node.getFilter().get(), session, types); if (filteredEquiJoinEstimate.isOutputRowCountUnknown()) { return normalizer.normalize(equiJoinEstimate.mapOutputRowCount(rowCount -> rowCount * UNKNOWN_FILTER_COEFFICIENT), types); } return filteredEquiJoinEstimate; }
@Test public void testJoinComplementStats() { PlanNodeStatsEstimate expected = planNodeStats(LEFT_ROWS_COUNT * (LEFT_JOIN_COLUMN_NULLS + LEFT_JOIN_COLUMN_NON_NULLS / 4), symbolStatistics(LEFT_JOIN_COLUMN, 0.0, 20.0, LEFT_JOIN_COLUMN_NULLS / (LEFT_JOIN_COLUMN_NULLS + LEFT_JOIN_COLUMN_NON_NULLS / 4), 5), LEFT_OTHER_COLUMN_STATS); PlanNodeStatsEstimate actual = JOIN_STATS_RULE.calculateJoinComplementStats( Optional.empty(), ImmutableList.of(new EquiJoinClause(new Symbol(LEFT_JOIN_COLUMN), new Symbol(RIGHT_JOIN_COLUMN))), LEFT_STATS, RIGHT_STATS, TYPES); assertEquals(actual, expected); }
@Test public void testRightJoinComplementStats() { PlanNodeStatsEstimate expected = NORMALIZER.normalize( planNodeStats( RIGHT_ROWS_COUNT * RIGHT_JOIN_COLUMN_NULLS, symbolStatistics(RIGHT_JOIN_COLUMN, NaN, NaN, 1.0, 0), RIGHT_OTHER_COLUMN_STATS), TYPES); PlanNodeStatsEstimate actual = JOIN_STATS_RULE.calculateJoinComplementStats( Optional.empty(), ImmutableList.of(new EquiJoinClause(new Symbol(RIGHT_JOIN_COLUMN), new Symbol(LEFT_JOIN_COLUMN))), RIGHT_STATS, LEFT_STATS, TYPES); assertEquals(actual, expected); }
@Test public void testLeftJoinComplementStatsWithMultipleClauses() { PlanNodeStatsEstimate expected = planNodeStats( LEFT_ROWS_COUNT * (LEFT_JOIN_COLUMN_NULLS + LEFT_JOIN_COLUMN_NON_NULLS / 4), symbolStatistics(LEFT_JOIN_COLUMN, 0.0, 20.0, LEFT_JOIN_COLUMN_NULLS / (LEFT_JOIN_COLUMN_NULLS + LEFT_JOIN_COLUMN_NON_NULLS / 4), 5), LEFT_OTHER_COLUMN_STATS) .mapOutputRowCount(rowCount -> rowCount / UNKNOWN_FILTER_COEFFICIENT); PlanNodeStatsEstimate actual = JOIN_STATS_RULE.calculateJoinComplementStats( Optional.empty(), ImmutableList.of(new EquiJoinClause(new Symbol(LEFT_JOIN_COLUMN), new Symbol(RIGHT_JOIN_COLUMN)), new EquiJoinClause(new Symbol(LEFT_OTHER_COLUMN), new Symbol(RIGHT_OTHER_COLUMN))), LEFT_STATS, RIGHT_STATS, TYPES); assertEquals(actual, expected); }