private PlanNodeStatsEstimate mapToOutputSymbols(PlanNodeStatsEstimate estimate, List<Symbol> inputs, List<Symbol> outputs) { checkArgument(inputs.size() == outputs.size(), "Input symbols count does not match output symbols count"); PlanNodeStatsEstimate.Builder mapped = PlanNodeStatsEstimate.builder() .setOutputRowCount(estimate.getOutputRowCount()); for (int i = 0; i < inputs.size(); i++) { mapped.addSymbolStatistics(outputs.get(i), estimate.getSymbolStatistics(inputs.get(i))); } return mapped.build(); } }
private PlanNodeStatsEstimate mapToOutputSymbols(PlanNodeStatsEstimate estimate, ListMultimap<Symbol, Symbol> mapping, int index) { PlanNodeStatsEstimate.Builder mapped = PlanNodeStatsEstimate.builder() .setOutputRowCount(estimate.getOutputRowCount()); mapping.keySet().stream() .forEach(symbol -> mapped.addSymbolStatistics(symbol, estimate.getSymbolStatistics(mapping.get(symbol).get(index)))); return mapped.build(); } }
private static PlanNodeStatsEstimate createZeroStats(PlanNodeStatsEstimate stats) { PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.builder(); result.setOutputRowCount(0); stats.getSymbolsWithKnownStatistics().forEach(symbol -> result.addSymbolStatistics(symbol, SymbolStatsEstimate.zero())); return result.build(); }
@Override protected Optional<PlanNodeStatsEstimate> doCalculate(TableScanNode node, StatsProvider sourceStats, Lookup lookup, Session session, TypeProvider types) { // TODO Construct predicate like AddExchanges's LayoutConstraintEvaluator Constraint<ColumnHandle> constraint = new Constraint<>(node.getCurrentConstraint()); TableStatistics tableStatistics = metadata.getTableStatistics(session, node.getTable(), constraint); Map<Symbol, SymbolStatsEstimate> outputSymbolStats = new HashMap<>(); for (Map.Entry<Symbol, ColumnHandle> entry : node.getAssignments().entrySet()) { Symbol symbol = entry.getKey(); Optional<ColumnStatistics> columnStatistics = Optional.ofNullable(tableStatistics.getColumnStatistics().get(entry.getValue())); outputSymbolStats.put(symbol, columnStatistics.map(statistics -> toSymbolStatistics(tableStatistics, statistics)).orElse(SymbolStatsEstimate.unknown())); } return Optional.of(PlanNodeStatsEstimate.builder() .setOutputRowCount(tableStatistics.getRowCount().getValue()) .addSymbolStatistics(outputSymbolStats) .build()); }
@Override public Optional<PlanNodeStatsEstimate> calculate(ValuesNode node, StatsProvider sourceStats, Lookup lookup, Session session, TypeProvider types) { PlanNodeStatsEstimate.Builder statsBuilder = PlanNodeStatsEstimate.builder(); statsBuilder.setOutputRowCount(node.getRows().size()); for (int symbolId = 0; symbolId < node.getOutputSymbols().size(); ++symbolId) { Symbol symbol = node.getOutputSymbols().get(symbolId); List<Object> symbolValues = getSymbolValues(node, symbolId, session, types.get(symbol)); statsBuilder.addSymbolStatistics(symbol, buildSymbolStatistics(symbolValues, session, types.get(symbol))); } return Optional.of(statsBuilder.build()); }
@Override protected Optional<PlanNodeStatsEstimate> doCalculate(ProjectNode node, StatsProvider statsProvider, Lookup lookup, Session session, TypeProvider types) { PlanNodeStatsEstimate sourceStats = statsProvider.getStats(node.getSource()); PlanNodeStatsEstimate.Builder calculatedStats = PlanNodeStatsEstimate.builder() .setOutputRowCount(sourceStats.getOutputRowCount()); for (Map.Entry<Symbol, Expression> entry : node.getAssignments().entrySet()) { calculatedStats.addSymbolStatistics(entry.getKey(), scalarStatsCalculator.calculate(entry.getValue(), sourceStats, session, types)); } return Optional.of(calculatedStats.build()); } }
@Override protected PlanNodeStatsEstimate visitBooleanLiteral(BooleanLiteral node, Void context) { if (node.getValue()) { return input; } PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.builder(); result.setOutputRowCount(0.0); input.getSymbolsWithKnownStatistics().forEach(symbol -> result.addSymbolStatistics(symbol, SymbolStatsEstimate.zero())); return result.build(); }
private PlanNodeStatsEstimate crossJoinStats(SpatialJoinNode node, PlanNodeStatsEstimate leftStats, PlanNodeStatsEstimate rightStats) { PlanNodeStatsEstimate.Builder builder = PlanNodeStatsEstimate.builder() .setOutputRowCount(leftStats.getOutputRowCount() * rightStats.getOutputRowCount()); node.getLeft().getOutputSymbols().forEach(symbol -> builder.addSymbolStatistics(symbol, leftStats.getSymbolStatistics(symbol))); node.getRight().getOutputSymbols().forEach(symbol -> builder.addSymbolStatistics(symbol, rightStats.getSymbolStatistics(symbol))); return builder.build(); }
private static PlanNodeStatsEstimate statsEstimate(Collection<Symbol> symbols, double outputSizeInBytes) { checkArgument(symbols.size() > 0, "No symbols"); checkArgument(ImmutableSet.copyOf(symbols).size() == symbols.size(), "Duplicate symbols"); double rowCount = outputSizeInBytes / symbols.size() / AVERAGE_ROW_SIZE; PlanNodeStatsEstimate.Builder builder = PlanNodeStatsEstimate.builder() .setOutputRowCount(rowCount); for (Symbol symbol : symbols) { builder.addSymbolStatistics( symbol, SymbolStatsEstimate.builder() .setNullsFraction(0) .setAverageRowSize(AVERAGE_ROW_SIZE) .build()); } return builder.build(); }
private PlanNodeStatsEstimate crossJoinStats(JoinNode node, PlanNodeStatsEstimate leftStats, PlanNodeStatsEstimate rightStats, TypeProvider types) { PlanNodeStatsEstimate.Builder builder = PlanNodeStatsEstimate.builder() .setOutputRowCount(leftStats.getOutputRowCount() * rightStats.getOutputRowCount()); node.getLeft().getOutputSymbols().forEach(symbol -> builder.addSymbolStatistics(symbol, leftStats.getSymbolStatistics(symbol))); node.getRight().getOutputSymbols().forEach(symbol -> builder.addSymbolStatistics(symbol, rightStats.getSymbolStatistics(symbol))); return normalizer.normalize(builder.build(), types); }
private static PlanNodeStatsEstimate planNodeStats(double rowCount, SymbolStatistics... symbolStatistics) { PlanNodeStatsEstimate.Builder builder = PlanNodeStatsEstimate.builder() .setOutputRowCount(rowCount); for (SymbolStatistics symbolStatistic : symbolStatistics) { builder.addSymbolStatistics(symbolStatistic.symbol, symbolStatistic.estimate); } return builder.build(); }
@Test public void testStatsForEmptyValues() { tester().assertStatsFor(pb -> pb .values(ImmutableList.of(pb.symbol("a", BIGINT)), ImmutableList.of())) .check(outputStats -> outputStats.equalTo( PlanNodeStatsEstimate.builder() .setOutputRowCount(0) .addSymbolStatistics(new Symbol("a"), SymbolStatsEstimate.zero()) .build())); } }
@Test public void testNoCapping() { Symbol a = new Symbol("a"); PlanNodeStatsEstimate estimate = PlanNodeStatsEstimate.builder() .setOutputRowCount(30) .addSymbolStatistics(a, SymbolStatsEstimate.builder().setDistinctValuesCount(20).build()) .build(); assertNormalized(estimate) .symbolStats(a, symbolAssert -> symbolAssert.distinctValuesCount(20)); }
private static PlanNodeStatsEstimate statistics(double rowCount, double nullsFraction, double averageRowSize, StatisticRange range) { return PlanNodeStatsEstimate.builder() .setOutputRowCount(rowCount) .addSymbolStatistics(SYMBOL, SymbolStatsEstimate.builder() .setNullsFraction(nullsFraction) .setAverageRowSize(averageRowSize) .setStatisticsRange(range) .build()) .build(); }
@Test public void testDropNonOutputSymbols() { Symbol a = new Symbol("a"); Symbol b = new Symbol("b"); Symbol c = new Symbol("c"); PlanNodeStatsEstimate estimate = PlanNodeStatsEstimate.builder() .setOutputRowCount(40) .addSymbolStatistics(a, SymbolStatsEstimate.builder().setDistinctValuesCount(20).build()) .addSymbolStatistics(b, SymbolStatsEstimate.builder().setDistinctValuesCount(30).build()) .addSymbolStatistics(c, SymbolStatsEstimate.unknown()) .build(); PlanNodeStatsAssertion.assertThat(normalizer.normalize(estimate, ImmutableList.of(b, c), TypeProvider.copyOf(ImmutableMap.of(b, BIGINT, c, BIGINT)))) .symbolsWithKnownStats(b) .symbolStats(b, symbolAssert -> symbolAssert.distinctValuesCount(30)); }
private void testCapDistinctValuesByToDomainRangeLength(Type type, double ndv, Object low, Object high, double expectedNormalizedNdv) { Symbol symbol = new Symbol("x"); SymbolStatsEstimate symbolStats = SymbolStatsEstimate.builder() .setNullsFraction(0) .setDistinctValuesCount(ndv) .setLowValue(asStatsValue(low, type)) .setHighValue(asStatsValue(high, type)) .build(); PlanNodeStatsEstimate estimate = PlanNodeStatsEstimate.builder() .setOutputRowCount(10000000000L) .addSymbolStatistics(symbol, symbolStats).build(); assertNormalized(estimate, TypeProvider.copyOf(ImmutableMap.of(symbol, type))) .symbolStats(symbol, symbolAssert -> symbolAssert.distinctValuesCount(expectedNormalizedNdv)); }
@Test public void testSymbolReference() { SymbolStatsEstimate xStats = SymbolStatsEstimate.builder() .setLowValue(-1) .setHighValue(10) .setDistinctValuesCount(4) .setNullsFraction(0.1) .setAverageRowSize(2.0) .build(); PlanNodeStatsEstimate inputStatistics = PlanNodeStatsEstimate.builder() .addSymbolStatistics(new Symbol("x"), xStats) .build(); assertCalculate(expression("x"), inputStatistics).isEqualTo(xStats); assertCalculate(expression("y"), inputStatistics).isEqualTo(SymbolStatsEstimate.unknown()); }
private PlanNodeStatsEstimate xyStats(double lowX, double highX, double lowY, double highY) { return PlanNodeStatsEstimate.builder() .addSymbolStatistics(new Symbol("x"), SymbolStatsEstimate.builder() .setLowValue(lowX) .setHighValue(highX) .build()) .addSymbolStatistics(new Symbol("y"), SymbolStatsEstimate.builder() .setLowValue(lowY) .setHighValue(highY) .build()) .build(); }
@Test public void testAggregationStatsCappedToInputRows() { tester().assertStatsFor(pb -> pb .aggregation(ab -> ab .addAggregation(pb.symbol("count_on_x", BIGINT), expression("count(x)"), ImmutableList.of(BIGINT)) .singleGroupingSet(pb.symbol("y", BIGINT), pb.symbol("z", BIGINT)) .source(pb.values(pb.symbol("x", BIGINT), pb.symbol("y", BIGINT), pb.symbol("z", BIGINT))))) .withSourceStats(PlanNodeStatsEstimate.builder() .setOutputRowCount(100) .addSymbolStatistics(new Symbol("y"), SymbolStatsEstimate.builder().setDistinctValuesCount(50).build()) .addSymbolStatistics(new Symbol("z"), SymbolStatsEstimate.builder().setDistinctValuesCount(50).build()) .build()) .check(check -> check.outputRowsCount(100)); } }
@Test public void tesCapDistinctValuesByOutputRowCount() { Symbol a = new Symbol("a"); Symbol b = new Symbol("b"); Symbol c = new Symbol("c"); PlanNodeStatsEstimate estimate = PlanNodeStatsEstimate.builder() .addSymbolStatistics(a, SymbolStatsEstimate.builder().setNullsFraction(0).setDistinctValuesCount(20).build()) .addSymbolStatistics(b, SymbolStatsEstimate.builder().setNullsFraction(0.4).setDistinctValuesCount(20).build()) .addSymbolStatistics(c, SymbolStatsEstimate.unknown()) .setOutputRowCount(10) .build(); assertNormalized(estimate) .symbolStats(a, symbolAssert -> symbolAssert.distinctValuesCount(10)) .symbolStats(b, symbolAssert -> symbolAssert.distinctValuesCount(8)) .symbolStats(c, SymbolStatsAssertion::distinctValuesCountUnknown); }