public SymbolStatsEstimate mapNullsFraction(Function<Double, Double> mappingFunction) { return buildFrom(this).setNullsFraction(mappingFunction.apply(nullsFraction)).build(); }
private static PlanNodeStatsEstimate statsEstimate(Collection<Symbol> symbols, double outputSizeInBytes) { checkArgument(symbols.size() > 0, "No symbols"); checkArgument(ImmutableSet.copyOf(symbols).size() == symbols.size(), "Duplicate symbols"); double rowCount = outputSizeInBytes / symbols.size() / AVERAGE_ROW_SIZE; PlanNodeStatsEstimate.Builder builder = PlanNodeStatsEstimate.builder() .setOutputRowCount(rowCount); for (Symbol symbol : symbols) { builder.addSymbolStatistics( symbol, SymbolStatsEstimate.builder() .setNullsFraction(0) .setAverageRowSize(AVERAGE_ROW_SIZE) .build()); } return builder.build(); }
@Override protected SymbolStatsEstimate visitLiteral(Literal node, Void context) { Object value = evaluate(metadata, session.toConnectorSession(), node); Type type = ExpressionAnalyzer.createConstantAnalyzer(metadata, session, ImmutableList.of(), WarningCollector.NOOP).analyze(node, Scope.create()); OptionalDouble doubleValue = toStatsRepresentation(metadata, session, type, value); SymbolStatsEstimate.Builder estimate = SymbolStatsEstimate.builder() .setNullsFraction(0) .setDistinctValuesCount(1); if (doubleValue.isPresent()) { estimate.setLowValue(doubleValue.getAsDouble()); estimate.setHighValue(doubleValue.getAsDouble()); } return estimate.build(); }
private static SymbolStatsEstimate addColumnStats(SymbolStatsEstimate leftStats, double leftRows, SymbolStatsEstimate rightStats, double rightRows, double newRowCount, RangeAdditionStrategy strategy) { checkArgument(newRowCount > 0, "newRowCount must be greater than zero"); StatisticRange leftRange = StatisticRange.from(leftStats); StatisticRange rightRange = StatisticRange.from(rightStats); StatisticRange sum = strategy.add(leftRange, rightRange); double nullsCountRight = rightStats.getNullsFraction() * rightRows; double nullsCountLeft = leftStats.getNullsFraction() * leftRows; double totalSizeLeft = (leftRows - nullsCountLeft) * leftStats.getAverageRowSize(); double totalSizeRight = (rightRows - nullsCountRight) * rightStats.getAverageRowSize(); double newNullsFraction = (nullsCountLeft + nullsCountRight) / newRowCount; double newNonNullsRowCount = newRowCount * (1.0 - newNullsFraction); // FIXME, weights to average. left and right should be equal in most cases anyway double newAverageRowSize = newNonNullsRowCount == 0 ? 0 : ((totalSizeLeft + totalSizeRight) / newNonNullsRowCount); return SymbolStatsEstimate.builder() .setStatisticsRange(sum) .setAverageRowSize(newAverageRowSize) .setNullsFraction(newNullsFraction) .build(); } }
private static PlanNodeStatsEstimate estimateFilterRange( PlanNodeStatsEstimate inputStatistics, SymbolStatsEstimate expressionStatistics, Optional<Symbol> expressionSymbol, StatisticRange filterRange) { StatisticRange expressionRange = StatisticRange.from(expressionStatistics); StatisticRange intersectRange = expressionRange.intersect(filterRange); double filterFactor = expressionRange.overlapPercentWith(intersectRange); PlanNodeStatsEstimate estimate = inputStatistics.mapOutputRowCount(rowCount -> filterFactor * (1 - expressionStatistics.getNullsFraction()) * rowCount); if (expressionSymbol.isPresent()) { SymbolStatsEstimate symbolNewEstimate = SymbolStatsEstimate.builder() .setAverageRowSize(expressionStatistics.getAverageRowSize()) .setStatisticsRange(intersectRange) .setNullsFraction(0.0) .build(); estimate = estimate.mapSymbolColumnStatistics(expressionSymbol.get(), oldStats -> symbolNewEstimate); } return estimate; }
@Override protected SymbolStatsEstimate visitFunctionCall(FunctionCall node, Void context) { Map<NodeRef<Expression>, Type> expressionTypes = getExpressionTypes(session, node, types); ExpressionInterpreter interpreter = ExpressionInterpreter.expressionOptimizer(node, metadata, session, expressionTypes); Object value = interpreter.optimize(NoOpSymbolResolver.INSTANCE); if (value == null || value instanceof NullLiteral) { return nullStatsEstimate(); } if (value instanceof Expression && !(value instanceof Literal)) { // value is not a constant return SymbolStatsEstimate.unknown(); } // value is a constant return SymbolStatsEstimate.builder() .setNullsFraction(0) .setDistinctValuesCount(1) .build(); }
@Test public void testCastDoubleToShortRange() { PlanNodeStatsEstimate inputStatistics = PlanNodeStatsEstimate.builder() .addSymbolStatistics(new Symbol("a"), SymbolStatsEstimate.builder() .setNullsFraction(0.3) .setLowValue(1.6) .setHighValue(3.3) .setDistinctValuesCount(10) .setAverageRowSize(2.0) .build()) .build(); assertCalculate(new Cast(new SymbolReference("a"), "bigint"), inputStatistics) .lowValue(2.0) .highValue(3.0) .distinctValuesCount(2) .nullsFraction(0.3) .dataSizeUnknown(); }
@Test public void testCastBigintToDouble() { PlanNodeStatsEstimate inputStatistics = PlanNodeStatsEstimate.builder() .addSymbolStatistics(new Symbol("a"), SymbolStatsEstimate.builder() .setNullsFraction(0.3) .setLowValue(2.0) .setHighValue(10.0) .setDistinctValuesCount(4) .setAverageRowSize(2.0) .build()) .build(); assertCalculate(new Cast(new SymbolReference("a"), "double"), inputStatistics) .lowValue(2.0) .highValue(10.0) .distinctValuesCount(4) .nullsFraction(0.3) .dataSizeUnknown(); }
@Test public void testCastDoubleToBigint() { PlanNodeStatsEstimate inputStatistics = PlanNodeStatsEstimate.builder() .addSymbolStatistics(new Symbol("a"), SymbolStatsEstimate.builder() .setNullsFraction(0.3) .setLowValue(1.6) .setHighValue(17.3) .setDistinctValuesCount(10) .setAverageRowSize(2.0) .build()) .build(); assertCalculate(new Cast(new SymbolReference("a"), "bigint"), inputStatistics) .lowValue(2.0) .highValue(17.0) .distinctValuesCount(10) .nullsFraction(0.3) .dataSizeUnknown(); }
@Test public void testCastDoubleToShortRangeUnknownDistinctValuesCount() { PlanNodeStatsEstimate inputStatistics = PlanNodeStatsEstimate.builder() .addSymbolStatistics(new Symbol("a"), SymbolStatsEstimate.builder() .setNullsFraction(0.3) .setLowValue(1.6) .setHighValue(3.3) .setAverageRowSize(2.0) .build()) .build(); assertCalculate(new Cast(new SymbolReference("a"), "bigint"), inputStatistics) .lowValue(2.0) .highValue(3.0) .distinctValuesCountUnknown() .nullsFraction(0.3) .dataSizeUnknown(); }
@Test public void tesCapDistinctValuesByOutputRowCount() { Symbol a = new Symbol("a"); Symbol b = new Symbol("b"); Symbol c = new Symbol("c"); PlanNodeStatsEstimate estimate = PlanNodeStatsEstimate.builder() .addSymbolStatistics(a, SymbolStatsEstimate.builder().setNullsFraction(0).setDistinctValuesCount(20).build()) .addSymbolStatistics(b, SymbolStatsEstimate.builder().setNullsFraction(0.4).setDistinctValuesCount(20).build()) .addSymbolStatistics(c, SymbolStatsEstimate.unknown()) .setOutputRowCount(10) .build(); assertNormalized(estimate) .symbolStats(a, symbolAssert -> symbolAssert.distinctValuesCount(10)) .symbolStats(b, symbolAssert -> symbolAssert.distinctValuesCount(8)) .symbolStats(c, SymbolStatsAssertion::distinctValuesCountUnknown); }
@Override protected PlanNodeStatsEstimate visitIsNullPredicate(IsNullPredicate node, Void context) { if (node.getValue() instanceof SymbolReference) { Symbol symbol = Symbol.from(node.getValue()); SymbolStatsEstimate symbolStats = input.getSymbolStatistics(symbol); PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.buildFrom(input); result.setOutputRowCount(input.getOutputRowCount() * symbolStats.getNullsFraction()); result.addSymbolStatistics(symbol, SymbolStatsEstimate.builder() .setNullsFraction(1.0) .setLowValue(NaN) .setHighValue(NaN) .setDistinctValuesCount(0.0) .build()); return result.build(); } return PlanNodeStatsEstimate.unknown(); }
private SymbolStatsEstimate toSymbolStatistics(TableStatistics tableStatistics, ColumnStatistics columnStatistics) { double nullsFraction = columnStatistics.getNullsFraction().getValue(); double nonNullRowsCount = tableStatistics.getRowCount().getValue() * (1.0 - nullsFraction); double averageRowSize = nonNullRowsCount == 0 ? 0 : columnStatistics.getDataSize().getValue() / nonNullRowsCount; SymbolStatsEstimate.Builder result = SymbolStatsEstimate.builder(); result.setNullsFraction(nullsFraction); result.setDistinctValuesCount(columnStatistics.getDistinctValuesCount().getValue()); result.setAverageRowSize(averageRowSize); columnStatistics.getRange().ifPresent(range -> { result.setLowValue(range.getMin()); result.setHighValue(range.getMax()); }); return result.build(); } }
@Test public void testSymbolReference() { SymbolStatsEstimate xStats = SymbolStatsEstimate.builder() .setLowValue(-1) .setHighValue(10) .setDistinctValuesCount(4) .setNullsFraction(0.1) .setAverageRowSize(2.0) .build(); PlanNodeStatsEstimate inputStatistics = PlanNodeStatsEstimate.builder() .addSymbolStatistics(new Symbol("x"), xStats) .build(); assertCalculate(expression("x"), inputStatistics).isEqualTo(xStats); assertCalculate(expression("y"), inputStatistics).isEqualTo(SymbolStatsEstimate.unknown()); }
private void testCapDistinctValuesByToDomainRangeLength(Type type, double ndv, Object low, Object high, double expectedNormalizedNdv) { Symbol symbol = new Symbol("x"); SymbolStatsEstimate symbolStats = SymbolStatsEstimate.builder() .setNullsFraction(0) .setDistinctValuesCount(ndv) .setLowValue(asStatsValue(low, type)) .setHighValue(asStatsValue(high, type)) .build(); PlanNodeStatsEstimate estimate = PlanNodeStatsEstimate.builder() .setOutputRowCount(10000000000L) .addSymbolStatistics(symbol, symbolStats).build(); assertNormalized(estimate, TypeProvider.copyOf(ImmutableMap.of(symbol, type))) .symbolStats(symbol, symbolAssert -> symbolAssert.distinctValuesCount(expectedNormalizedNdv)); }
@Override public Optional<PlanNodeStatsEstimate> calculate(AssignUniqueId assignUniqueId, StatsProvider statsProvider, Lookup lookup, Session session, TypeProvider types) { PlanNodeStatsEstimate sourceStats = statsProvider.getStats(assignUniqueId.getSource()); return Optional.of(PlanNodeStatsEstimate.buildFrom(sourceStats) .addSymbolStatistics(assignUniqueId.getIdColumn(), SymbolStatsEstimate.builder() .setDistinctValuesCount(sourceStats.getOutputRowCount()) .setNullsFraction(0.0) .setAverageRowSize(BIGINT.getFixedSize()) .build()) .build()); } }
private static PlanNodeStatsEstimate statistics(double rowCount, double nullsFraction, double averageRowSize, StatisticRange range) { return PlanNodeStatsEstimate.builder() .setOutputRowCount(rowCount) .addSymbolStatistics(SYMBOL, SymbolStatsEstimate.builder() .setNullsFraction(nullsFraction) .setAverageRowSize(averageRowSize) .setStatisticsRange(range) .build()) .build(); }
public static Builder buildFrom(SymbolStatsEstimate other) { return builder() .setLowValue(other.getLowValue()) .setHighValue(other.getHighValue()) .setNullsFraction(other.getNullsFraction()) .setAverageRowSize(other.getAverageRowSize()) .setDistinctValuesCount(other.getDistinctValuesCount()); }
private static SymbolStatistics symbolStatistics(String symbolName, double low, double high, double nullsFraction, double ndv) { return new SymbolStatistics( new Symbol(symbolName), SymbolStatsEstimate.builder() .setLowValue(low) .setHighValue(high) .setNullsFraction(nullsFraction) .setDistinctValuesCount(ndv) .build()); }
private static SymbolStatsEstimate nullStatsEstimate() { return SymbolStatsEstimate.builder() .setDistinctValuesCount(0) .setNullsFraction(1) .build(); } }