new Symbol("a"), SymbolStatsEstimate.builder() .setNullsFraction(0) .setLowValue(6) .setHighValue(55) .setDistinctValuesCount(2) .build()) .addSymbolStatistics( new Symbol("b"), SymbolStatsEstimate.builder() .setNullsFraction(0.33333333333333333) .setLowValue(13.5) .setHighValue(13.5) .setDistinctValuesCount(1) .build()) .build())); new Symbol("v"), SymbolStatsEstimate.builder() .setNullsFraction(0.25) .setDistinctValuesCount(3) .build()) .build()));
.setAverageRowSize(Math.max(left.getAverageRowSize(), right.getAverageRowSize())) .setNullsFraction(left.getNullsFraction() + right.getNullsFraction() - left.getNullsFraction() * right.getNullsFraction()) .setDistinctValuesCount(min(left.getDistinctValuesCount() * right.getDistinctValuesCount(), input.getOutputRowCount())); double rightHigh = right.getHighValue(); if (isNaN(leftLow) || isNaN(leftHigh) || isNaN(rightLow) || isNaN(rightHigh)) { result.setLowValue(NaN) .setHighValue(NaN); result.setLowValue(Double.NEGATIVE_INFINITY) .setHighValue(Double.POSITIVE_INFINITY); result.setLowValue(max(-maxDivisor, leftLow)) .setHighValue(0); result.setLowValue(0) .setHighValue(min(maxDivisor, leftHigh)); result.setLowValue(max(-maxDivisor, leftLow)) .setHighValue(min(maxDivisor, leftHigh)); double highValue = max(v1, v2, v3, v4); result.setLowValue(lowValue) .setHighValue(highValue); return result.build();
private static PlanNodeStatsEstimate estimateExpressionEqualToExpression( PlanNodeStatsEstimate inputStatistics, SymbolStatsEstimate leftExpressionStatistics, Optional<Symbol> leftExpressionSymbol, SymbolStatsEstimate rightExpressionStatistics, Optional<Symbol> rightExpressionSymbol) { if (isNaN(leftExpressionStatistics.getDistinctValuesCount()) || isNaN(rightExpressionStatistics.getDistinctValuesCount())) { return PlanNodeStatsEstimate.unknown(); } StatisticRange leftExpressionRange = StatisticRange.from(leftExpressionStatistics); StatisticRange rightExpressionRange = StatisticRange.from(rightExpressionStatistics); StatisticRange intersect = leftExpressionRange.intersect(rightExpressionRange); double nullsFilterFactor = (1 - leftExpressionStatistics.getNullsFraction()) * (1 - rightExpressionStatistics.getNullsFraction()); double leftNdv = leftExpressionRange.getDistinctValuesCount(); double rightNdv = rightExpressionRange.getDistinctValuesCount(); double filterFactor = 1.0 / max(leftNdv, rightNdv, 1); double retainedNdv = min(leftNdv, rightNdv); PlanNodeStatsEstimate.Builder estimate = PlanNodeStatsEstimate.buildFrom(inputStatistics) .setOutputRowCount(inputStatistics.getOutputRowCount() * nullsFilterFactor * filterFactor); SymbolStatsEstimate equalityStats = SymbolStatsEstimate.builder() .setAverageRowSize(averageExcludingNaNs(leftExpressionStatistics.getAverageRowSize(), rightExpressionStatistics.getAverageRowSize())) .setNullsFraction(0) .setStatisticsRange(intersect) .setDistinctValuesCount(retainedNdv) .build(); leftExpressionSymbol.ifPresent(symbol -> estimate.addSymbolStatistics(symbol, equalityStats)); rightExpressionSymbol.ifPresent(symbol -> estimate.addSymbolStatistics(symbol, equalityStats)); return estimate.build(); }
.setOutputRowCount(10) .addSymbolStatistics(new Symbol("i11"), SymbolStatsEstimate.builder() .setLowValue(1) .setHighValue(10) .setDistinctValuesCount(5) .setNullsFraction(0.3) .build()) .addSymbolStatistics(new Symbol("i12"), SymbolStatsEstimate.builder() .setLowValue(0) .setHighValue(3) .setDistinctValuesCount(4) .setNullsFraction(0) .build()) .addSymbolStatistics(new Symbol("i13"), SymbolStatsEstimate.builder() .setLowValue(10) .setHighValue(15) .setDistinctValuesCount(4) .setNullsFraction(0.1) .build()) .addSymbolStatistics(new Symbol("i14"), SymbolStatsEstimate.builder() .setLowValue(10) .setHighValue(15) .setDistinctValuesCount(4) .setNullsFraction(0.1) .build()) .addSymbolStatistics(new Symbol("i15"), SymbolStatsEstimate.builder() .setLowValue(10) .setHighValue(15) .setDistinctValuesCount(4)
.setAverageRowSize(8.0) .setDistinctValuesCount(300) .setLowValue(0) .setHighValue(20) .setNullsFraction(0.1) .build(); wStats = SymbolStatsEstimate.builder() .setAverageRowSize(8.0) .setDistinctValuesCount(30) .setLowValue(0) .setHighValue(20) .setNullsFraction(0.1) .build(); xStats = SymbolStatsEstimate.builder() .setAverageRowSize(4.0) .setDistinctValuesCount(40.0) .setLowValue(-10.0) .setHighValue(10.0) .setNullsFraction(0.25) .build(); yStats = SymbolStatsEstimate.builder() .setAverageRowSize(4.0) .setDistinctValuesCount(20.0) .setLowValue(0.0) .setHighValue(5.0) .setNullsFraction(0.5) .build(); zStats = SymbolStatsEstimate.builder() .setAverageRowSize(4.0)
.setOutputRowCount(10) .addSymbolStatistics(new Symbol("i1"), SymbolStatsEstimate.builder() .setLowValue(1) .setHighValue(10) .setDistinctValuesCount(5) .setNullsFraction(0) .build()) .addSymbolStatistics(new Symbol("i2"), SymbolStatsEstimate.builder() .setLowValue(0) .setHighValue(3) .setDistinctValuesCount(4) .setNullsFraction(0) .build()) .addSymbolStatistics(new Symbol("i3"), SymbolStatsEstimate.builder() .setLowValue(10) .setHighValue(15) .setDistinctValuesCount(4) .setNullsFraction(0.1) .build()) .build()) .check(check -> check .setOutputRowCount(10) .addSymbolStatistics(new Symbol("i1"), SymbolStatsEstimate.builder() .setLowValue(1) .setHighValue(10) .setDistinctValuesCount(5) .setNullsFraction(0) .build())
.setOutputRowCount(10) .addSymbolStatistics(new Symbol("i11"), SymbolStatsEstimate.builder() .setLowValue(1) .setHighValue(10) .setDistinctValuesCount(5) .setNullsFraction(0.3) .build()) .addSymbolStatistics(new Symbol("i12"), SymbolStatsEstimate.builder() .setLowValue(0) .setHighValue(3) .setDistinctValuesCount(4) .setNullsFraction(0) .build()) .addSymbolStatistics(new Symbol("i13"), SymbolStatsEstimate.builder() .setLowValue(10) .setHighValue(15) .setDistinctValuesCount(4) .setNullsFraction(0.1) .build()) .addSymbolStatistics(new Symbol("i14"), SymbolStatsEstimate.builder() .setLowValue(10) .setHighValue(15) .setDistinctValuesCount(4) .setNullsFraction(0.1) .build()) .build()) .withSourceStats(1, PlanNodeStatsEstimate.builder() .setOutputRowCount(20) .addSymbolStatistics(new Symbol("i21"), SymbolStatsEstimate.builder()
.setAverageRowSize(8.0) .setDistinctValuesCount(300) .setLowValue(0) .setHighValue(20) .setNullsFraction(0.1) .build(); wStats = SymbolStatsEstimate.builder() .setAverageRowSize(8.0) .setDistinctValuesCount(30) .setLowValue(0) .setHighValue(20) .setNullsFraction(0.1) .build(); xStats = SymbolStatsEstimate.builder() .setAverageRowSize(4.0) .setDistinctValuesCount(40.0) .setLowValue(-10.0) .setHighValue(10.0) .setNullsFraction(0.25) .build(); yStats = SymbolStatsEstimate.builder() .setAverageRowSize(4.0) .setDistinctValuesCount(20.0) .setLowValue(0.0) .setHighValue(5.0) .setNullsFraction(0.5) .build(); zStats = SymbolStatsEstimate.builder() .setAverageRowSize(4.0)
.setAverageRowSize(4.0) .setDistinctValuesCount(40.0) .setLowValue(-10.0) .setHighValue(10.0) .setNullsFraction(0.25) .build(); yStats = SymbolStatsEstimate.builder() .setAverageRowSize(4.0) .setDistinctValuesCount(20.0) .setLowValue(0.0) .setHighValue(5.0) .setNullsFraction(0.5) .build(); zStats = SymbolStatsEstimate.builder() .setAverageRowSize(4.0) .setDistinctValuesCount(5.0) .setLowValue(-100.0) .setHighValue(100.0) .setNullsFraction(0.1) .build(); leftOpenStats = SymbolStatsEstimate.builder() .setAverageRowSize(4.0) .setDistinctValuesCount(50.0) .setLowValue(NEGATIVE_INFINITY) .setHighValue(15.0) .setNullsFraction(0.1) .build(); rightOpenStats = SymbolStatsEstimate.builder() .setAverageRowSize(4.0)
.setOutputRowCount(10) .addSymbolStatistics(new Symbol("i1"), SymbolStatsEstimate.builder() .setLowValue(1) .setHighValue(10) .setDistinctValuesCount(5) .setNullsFraction(0) .build()) .addSymbolStatistics(new Symbol("i2"), SymbolStatsEstimate.builder() .setLowValue(0) .setHighValue(3) .setDistinctValuesCount(4) .setNullsFraction(0) .build()) .addSymbolStatistics(new Symbol("i3"), SymbolStatsEstimate.builder() .setLowValue(10) .setHighValue(15) .setDistinctValuesCount(4) .setNullsFraction(0.1) .build()) .build()) .check(check -> check.outputRowsCountUnknown()); .setOutputRowCount(10) .addSymbolStatistics(new Symbol("i1"), SymbolStatsEstimate.builder() .setLowValue(1) .setHighValue(10) .setDistinctValuesCount(5) .setNullsFraction(0) .build())
newSymbolStats.setAverageRowSize(supersetSymbolStats.getAverageRowSize()); double subsetNullsCount = subsetSymbolStats.getNullsFraction() * subsetRowCount; double newNullsCount = max(supersetNullsCount - subsetNullsCount, 0); newSymbolStats.setNullsFraction(min(newNullsCount, outputRowCount) / outputRowCount); newSymbolStats.setDistinctValuesCount(newDistinctValuesCount); newSymbolStats.setLowValue(supersetSymbolStats.getLowValue()); newSymbolStats.setHighValue(supersetSymbolStats.getHighValue()); result.addSymbolStatistics(symbol, newSymbolStats.build()); });
.setLowValue(10) .setHighValue(15) .setDistinctValuesCount(4) .setNullsFraction(0.1) .build()) .check(outputRowCountAndZStatsAreCalculated); .setLowValue(10) .setHighValue(15) .setDistinctValuesCount(4) .build()) .check(outputRowCountAndZStatsAreCalculated); .setLowValue(10) .setHighValue(15) .setNullsFraction(0.1) .build()) .check(outputRowsCountAndZStatsAreNotFullyCalculated); .setLowValue(10) .setHighValue(15) .build()) .check(outputRowsCountAndZStatsAreNotFullyCalculated);
.setLowValue(1) .setDistinctValuesCount(rowsPerPartition) .setNullsFraction(0.0) .setAverageRowSize(BIGINT.getFixedSize()) .build()) .build());
.setOutputRowCount(100) .addSymbolStatistics(new Symbol("x"), SymbolStatsEstimate.builder() .setLowValue(1) .setHighValue(10) .setDistinctValuesCount(5) .setNullsFraction(0.3) .build()) .addSymbolStatistics(new Symbol("y"), SymbolStatsEstimate.builder() .setLowValue(0) .setHighValue(3) .setDistinctValuesCount(3) .setNullsFraction(0) .build()) .addSymbolStatistics(new Symbol("z"), zStats) .build())
.setLowValue(1) .setHighValue(10) .setDistinctValuesCount(5) .setNullsFraction(0.3) .build();
result = result.mapSymbolColumnStatistics(drivingClause.getLeft(), columnStats -> SymbolStatsEstimate.buildFrom(columnStats) .setLowValue(leftColumnStats.getLowValue()) .setHighValue(leftColumnStats.getHighValue()) .setNullsFraction(newLeftNullsFraction) .setDistinctValuesCount(leftNDV - matchingRightNDV) .build()); result = result.mapOutputRowCount(rowCount -> rowCount * scaleFactor); .setLowValue(NaN) .setHighValue(NaN) .setNullsFraction(1.0) .setDistinctValuesCount(0.0) .build()); result = result.mapOutputRowCount(rowCount -> rowCount * leftColumnStats.getNullsFraction());
.setDistinctValuesCount(distinctValuesCount) .setNullsFraction(nullsFraction) .build();
.setLowValue(-1) .setHighValue(10) .setDistinctValuesCount(4) .setNullsFraction(0.1) .setAverageRowSize(2.0) .build()) .addSymbolStatistics(new Symbol("y"), SymbolStatsEstimate.builder() .setLowValue(-2) .setHighValue(5) .setDistinctValuesCount(3) .setNullsFraction(0.2) .setAverageRowSize(2.0) .build()) .setOutputRowCount(10) .build();
new Symbol("a"), SymbolStatsEstimate.builder() .setNullsFraction(0.3) .setLowValue(1) .setHighValue(30) .setDistinctValuesCount(20) .build()) .addSymbolStatistics( new Symbol("b"), SymbolStatsEstimate.builder() .setNullsFraction(0.6) .setLowValue(13.5) .setHighValue(POSITIVE_INFINITY) .setDistinctValuesCount(40) .build()) .build();
outputStats.addSymbolStatistics(symbol, SymbolStatsEstimate.buildFrom(innerJoinSymbolStats) .setLowValue(leftSymbolStats.getLowValue()) .setHighValue(leftSymbolStats.getHighValue()) .setDistinctValuesCount(leftSymbolStats.getDistinctValuesCount()) .setNullsFraction(newNullsFraction) .build());