private PlanNodeStatsEstimate filterByEquiJoinClauses( PlanNodeStatsEstimate stats, Collection<EquiJoinClause> clauses, Session session, TypeProvider types) { checkArgument(!clauses.isEmpty(), "clauses is empty"); PlanNodeStatsEstimate result = PlanNodeStatsEstimate.unknown(); // Join equality clauses are usually correlated. Therefore we shouldn't treat each join equality // clause separately because stats estimates would be way off. Instead we choose so called // "driving clause" which mostly reduces join output rows cardinality and apply UNKNOWN_FILTER_COEFFICIENT // for other (auxiliary) clauses. Queue<EquiJoinClause> remainingClauses = new LinkedList<>(clauses); EquiJoinClause drivingClause = remainingClauses.poll(); for (int i = 0; i < clauses.size(); i++) { PlanNodeStatsEstimate estimate = filterByEquiJoinClauses(stats, drivingClause, remainingClauses, session, types); if (result.isOutputRowCountUnknown() || (!estimate.isOutputRowCountUnknown() && estimate.getOutputRowCount() < result.getOutputRowCount())) { result = estimate; } remainingClauses.add(drivingClause); drivingClause = remainingClauses.poll(); } return result; }
private PlanNodeStatsEstimate computeInnerJoinStats(JoinNode node, PlanNodeStatsEstimate crossJoinStats, Session session, TypeProvider types) { List<EquiJoinClause> equiJoinCriteria = node.getCriteria(); if (equiJoinCriteria.isEmpty()) { if (!node.getFilter().isPresent()) { return crossJoinStats; } // TODO: this might explode stats return filterStatsCalculator.filterStats(crossJoinStats, node.getFilter().get(), session, types); } PlanNodeStatsEstimate equiJoinEstimate = filterByEquiJoinClauses(crossJoinStats, node.getCriteria(), session, types); if (equiJoinEstimate.isOutputRowCountUnknown()) { return PlanNodeStatsEstimate.unknown(); } if (!node.getFilter().isPresent()) { return equiJoinEstimate; } PlanNodeStatsEstimate filteredEquiJoinEstimate = filterStatsCalculator.filterStats(equiJoinEstimate, node.getFilter().get(), session, types); if (filteredEquiJoinEstimate.isOutputRowCountUnknown()) { return normalizer.normalize(equiJoinEstimate.mapOutputRowCount(rowCount -> rowCount * UNKNOWN_FILTER_COEFFICIENT), types); } return filteredEquiJoinEstimate; }