private PlanNodeStatsEstimate mapToOutputSymbols(PlanNodeStatsEstimate estimate, List<Symbol> inputs, List<Symbol> outputs) { checkArgument(inputs.size() == outputs.size(), "Input symbols count does not match output symbols count"); PlanNodeStatsEstimate.Builder mapped = PlanNodeStatsEstimate.builder() .setOutputRowCount(estimate.getOutputRowCount()); for (int i = 0; i < inputs.size(); i++) { mapped.addSymbolStatistics(outputs.get(i), estimate.getSymbolStatistics(inputs.get(i))); } return mapped.build(); } }
private PlanNodeStatsEstimate mapToOutputSymbols(PlanNodeStatsEstimate estimate, ListMultimap<Symbol, Symbol> mapping, int index) { PlanNodeStatsEstimate.Builder mapped = PlanNodeStatsEstimate.builder() .setOutputRowCount(estimate.getOutputRowCount()); mapping.keySet().stream() .forEach(symbol -> mapped.addSymbolStatistics(symbol, estimate.getSymbolStatistics(mapping.get(symbol).get(index)))); return mapped.build(); } }
@Override public Optional<PlanNodeStatsEstimate> calculate(ValuesNode node, StatsProvider sourceStats, Lookup lookup, Session session, TypeProvider types) { PlanNodeStatsEstimate.Builder statsBuilder = PlanNodeStatsEstimate.builder(); statsBuilder.setOutputRowCount(node.getRows().size()); for (int symbolId = 0; symbolId < node.getOutputSymbols().size(); ++symbolId) { Symbol symbol = node.getOutputSymbols().get(symbolId); List<Object> symbolValues = getSymbolValues(node, symbolId, session, types.get(symbol)); statsBuilder.addSymbolStatistics(symbol, buildSymbolStatistics(symbolValues, session, types.get(symbol))); } return Optional.of(statsBuilder.build()); }
@Override protected PlanNodeStatsEstimate visitIsNullPredicate(IsNullPredicate node, Void context) { if (node.getValue() instanceof SymbolReference) { Symbol symbol = Symbol.from(node.getValue()); SymbolStatsEstimate symbolStats = input.getSymbolStatistics(symbol); PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.buildFrom(input); result.setOutputRowCount(input.getOutputRowCount() * symbolStats.getNullsFraction()); result.addSymbolStatistics(symbol, SymbolStatsEstimate.builder() .setNullsFraction(1.0) .setLowValue(NaN) .setHighValue(NaN) .setDistinctValuesCount(0.0) .build()); return result.build(); } return PlanNodeStatsEstimate.unknown(); }
@Test public void testDropNonOutputSymbols() { Symbol a = new Symbol("a"); Symbol b = new Symbol("b"); Symbol c = new Symbol("c"); PlanNodeStatsEstimate estimate = PlanNodeStatsEstimate.builder() .setOutputRowCount(40) .addSymbolStatistics(a, SymbolStatsEstimate.builder().setDistinctValuesCount(20).build()) .addSymbolStatistics(b, SymbolStatsEstimate.builder().setDistinctValuesCount(30).build()) .addSymbolStatistics(c, SymbolStatsEstimate.unknown()) .build(); PlanNodeStatsAssertion.assertThat(normalizer.normalize(estimate, ImmutableList.of(b, c), TypeProvider.copyOf(ImmutableMap.of(b, BIGINT, c, BIGINT)))) .symbolsWithKnownStats(b) .symbolStats(b, symbolAssert -> symbolAssert.distinctValuesCount(30)); }
@Test public void testAggregationStatsCappedToInputRows() { tester().assertStatsFor(pb -> pb .aggregation(ab -> ab .addAggregation(pb.symbol("count_on_x", BIGINT), expression("count(x)"), ImmutableList.of(BIGINT)) .singleGroupingSet(pb.symbol("y", BIGINT), pb.symbol("z", BIGINT)) .source(pb.values(pb.symbol("x", BIGINT), pb.symbol("y", BIGINT), pb.symbol("z", BIGINT))))) .withSourceStats(PlanNodeStatsEstimate.builder() .setOutputRowCount(100) .addSymbolStatistics(new Symbol("y"), SymbolStatsEstimate.builder().setDistinctValuesCount(50).build()) .addSymbolStatistics(new Symbol("z"), SymbolStatsEstimate.builder().setDistinctValuesCount(50).build()) .build()) .check(check -> check.outputRowsCount(100)); } }
private void testCapDistinctValuesByToDomainRangeLength(Type type, double ndv, Object low, Object high, double expectedNormalizedNdv) { Symbol symbol = new Symbol("x"); SymbolStatsEstimate symbolStats = SymbolStatsEstimate.builder() .setNullsFraction(0) .setDistinctValuesCount(ndv) .setLowValue(asStatsValue(low, type)) .setHighValue(asStatsValue(high, type)) .build(); PlanNodeStatsEstimate estimate = PlanNodeStatsEstimate.builder() .setOutputRowCount(10000000000L) .addSymbolStatistics(symbol, symbolStats).build(); assertNormalized(estimate, TypeProvider.copyOf(ImmutableMap.of(symbol, type))) .symbolStats(symbol, symbolAssert -> symbolAssert.distinctValuesCount(expectedNormalizedNdv)); }
@Test public void testStatsForEmptyValues() { tester().assertStatsFor(pb -> pb .values(ImmutableList.of(pb.symbol("a", BIGINT)), ImmutableList.of())) .check(outputStats -> outputStats.equalTo( PlanNodeStatsEstimate.builder() .setOutputRowCount(0) .addSymbolStatistics(new Symbol("a"), SymbolStatsEstimate.zero()) .build())); } }
@Override protected PlanNodeStatsEstimate visitIsNotNullPredicate(IsNotNullPredicate node, Void context) { if (node.getValue() instanceof SymbolReference) { Symbol symbol = Symbol.from(node.getValue()); SymbolStatsEstimate symbolStats = input.getSymbolStatistics(symbol); PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.buildFrom(input); result.setOutputRowCount(input.getOutputRowCount() * (1 - symbolStats.getNullsFraction())); result.addSymbolStatistics(symbol, symbolStats.mapNullsFraction(x -> 0.0)); return result.build(); } return PlanNodeStatsEstimate.unknown(); }
public PlanNodeStatsEstimate mapOutputRowCount(Function<Double, Double> mappingFunction) { return buildFrom(this).setOutputRowCount(mappingFunction.apply(outputRowCount)).build(); }
@Override protected Optional<PlanNodeStatsEstimate> doCalculate(LimitNode node, StatsProvider statsProvider, Lookup lookup, Session session, TypeProvider types) { PlanNodeStatsEstimate sourceStats = statsProvider.getStats(node.getSource()); if (sourceStats.getOutputRowCount() <= node.getCount()) { return Optional.of(sourceStats); } // LIMIT actually limits (or when there was no row count estimated for source) return Optional.of(PlanNodeStatsEstimate.buildFrom(sourceStats) .setOutputRowCount(node.getCount()) .build()); } }
private static PlanNodeStatsEstimate createZeroStats(PlanNodeStatsEstimate stats) { PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.builder(); result.setOutputRowCount(0); stats.getSymbolsWithKnownStatistics().forEach(symbol -> result.addSymbolStatistics(symbol, SymbolStatsEstimate.zero())); return result.build(); }
@Override protected PlanNodeStatsEstimate visitBooleanLiteral(BooleanLiteral node, Void context) { if (node.getValue()) { return input; } PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.builder(); result.setOutputRowCount(0.0); input.getSymbolsWithKnownStatistics().forEach(symbol -> result.addSymbolStatistics(symbol, SymbolStatsEstimate.zero())); return result.build(); }
private static PlanNodeStatsEstimate statistics(double rowCount, double nullsFraction, double averageRowSize, StatisticRange range) { return PlanNodeStatsEstimate.builder() .setOutputRowCount(rowCount) .addSymbolStatistics(SYMBOL, SymbolStatsEstimate.builder() .setNullsFraction(nullsFraction) .setAverageRowSize(averageRowSize) .setStatisticsRange(range) .build()) .build(); }
private static PlanNodeStatsEstimate planNodeStats(double rowCount, SymbolStatistics... symbolStatistics) { PlanNodeStatsEstimate.Builder builder = PlanNodeStatsEstimate.builder() .setOutputRowCount(rowCount); for (SymbolStatistics symbolStatistic : symbolStatistics) { builder.addSymbolStatistics(symbolStatistic.symbol, symbolStatistic.estimate); } return builder.build(); }
private PlanNodeStatsEstimate crossJoinStats(SpatialJoinNode node, PlanNodeStatsEstimate leftStats, PlanNodeStatsEstimate rightStats) { PlanNodeStatsEstimate.Builder builder = PlanNodeStatsEstimate.builder() .setOutputRowCount(leftStats.getOutputRowCount() * rightStats.getOutputRowCount()); node.getLeft().getOutputSymbols().forEach(symbol -> builder.addSymbolStatistics(symbol, leftStats.getSymbolStatistics(symbol))); node.getRight().getOutputSymbols().forEach(symbol -> builder.addSymbolStatistics(symbol, rightStats.getSymbolStatistics(symbol))); return builder.build(); }
@Test public void testNoCapping() { Symbol a = new Symbol("a"); PlanNodeStatsEstimate estimate = PlanNodeStatsEstimate.builder() .setOutputRowCount(30) .addSymbolStatistics(a, SymbolStatsEstimate.builder().setDistinctValuesCount(20).build()) .build(); assertNormalized(estimate) .symbolStats(a, symbolAssert -> symbolAssert.distinctValuesCount(20)); }
private PlanNodeStatsEstimate crossJoinStats(JoinNode node, PlanNodeStatsEstimate leftStats, PlanNodeStatsEstimate rightStats, TypeProvider types) { PlanNodeStatsEstimate.Builder builder = PlanNodeStatsEstimate.builder() .setOutputRowCount(leftStats.getOutputRowCount() * rightStats.getOutputRowCount()); node.getLeft().getOutputSymbols().forEach(symbol -> builder.addSymbolStatistics(symbol, leftStats.getSymbolStatistics(symbol))); node.getRight().getOutputSymbols().forEach(symbol -> builder.addSymbolStatistics(symbol, rightStats.getSymbolStatistics(symbol))); return normalizer.normalize(builder.build(), types); }
@Override protected Optional<PlanNodeStatsEstimate> doCalculate(EnforceSingleRowNode node, StatsProvider sourceStats, Lookup lookup, Session session, TypeProvider types) { return Optional.of(PlanNodeStatsEstimate.buildFrom(sourceStats.getStats(node.getSource())) .setOutputRowCount(1) .build()); } }
@Override protected Optional<PlanNodeStatsEstimate> doCalculate(ProjectNode node, StatsProvider statsProvider, Lookup lookup, Session session, TypeProvider types) { PlanNodeStatsEstimate sourceStats = statsProvider.getStats(node.getSource()); PlanNodeStatsEstimate.Builder calculatedStats = PlanNodeStatsEstimate.builder() .setOutputRowCount(sourceStats.getOutputRowCount()); for (Map.Entry<Symbol, Expression> entry : node.getAssignments().entrySet()) { calculatedStats.addSymbolStatistics(entry.getKey(), scalarStatsCalculator.calculate(entry.getValue(), sourceStats, session, types)); } return Optional.of(calculatedStats.build()); } }