public PlanNodeStatsAssertion symbolsWithKnownStats(Symbol... symbols) { assertEquals(actual.getSymbolsWithKnownStatistics(), ImmutableSet.copyOf(symbols), "symbols with known stats"); return this; }
private static PlanNodeStatsEstimate createZeroStats(PlanNodeStatsEstimate stats) { PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.builder(); result.setOutputRowCount(0); stats.getSymbolsWithKnownStatistics().forEach(symbol -> result.addSymbolStatistics(symbol, SymbolStatsEstimate.zero())); return result.build(); }
@Test public void verifyTestInputConsistent() { // if tests' input is not normalized, other tests don't make sense checkConsistent( new StatsNormalizer(), "standardInputStatistics", standardInputStatistics, standardInputStatistics.getSymbolsWithKnownStatistics(), types); }
@Override protected PlanNodeStatsEstimate visitBooleanLiteral(BooleanLiteral node, Void context) { if (node.getValue()) { return input; } PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.builder(); result.setOutputRowCount(0.0); input.getSymbolsWithKnownStatistics().forEach(symbol -> result.addSymbolStatistics(symbol, SymbolStatsEstimate.zero())); return result.build(); }
private PlanNodeStatsAssertion assertNormalized(PlanNodeStatsEstimate estimate) { TypeProvider types = TypeProvider.copyOf(estimate.getSymbolsWithKnownStatistics().stream() .collect(toImmutableMap(identity(), symbol -> BIGINT))); return assertNormalized(estimate, types); }
private PlanNodeStatsAssertion assertNormalized(PlanNodeStatsEstimate estimate, TypeProvider types) { PlanNodeStatsEstimate normalized = normalizer.normalize(estimate, estimate.getSymbolsWithKnownStatistics(), types); return PlanNodeStatsAssertion.assertThat(normalized); }
private static PlanNodeStatsEstimate addStats(PlanNodeStatsEstimate left, PlanNodeStatsEstimate right, RangeAdditionStrategy strategy) { if (left.isOutputRowCountUnknown() || right.isOutputRowCountUnknown()) { return PlanNodeStatsEstimate.unknown(); } PlanNodeStatsEstimate.Builder statsBuilder = PlanNodeStatsEstimate.builder(); double newRowCount = left.getOutputRowCount() + right.getOutputRowCount(); concat(left.getSymbolsWithKnownStatistics().stream(), right.getSymbolsWithKnownStatistics().stream()) .distinct() .forEach(symbol -> { SymbolStatsEstimate symbolStats = SymbolStatsEstimate.zero(); if (newRowCount > 0) { symbolStats = addColumnStats( left.getSymbolStatistics(symbol), left.getOutputRowCount(), right.getSymbolStatistics(symbol), right.getOutputRowCount(), newRowCount, strategy); } statsBuilder.addSymbolStatistics(symbol, symbolStats); }); return statsBuilder.setOutputRowCount(newRowCount).build(); }
public PlanNodeStatsAssertion equalTo(PlanNodeStatsEstimate expected) { assertEstimateEquals(actual.getOutputRowCount(), expected.getOutputRowCount(), "outputRowCount mismatch"); for (Symbol symbol : union(expected.getSymbolsWithKnownStatistics(), actual.getSymbolsWithKnownStatistics())) { assertSymbolStatsEqual(symbol, actual.getSymbolStatistics(symbol), expected.getSymbolStatistics(symbol)); } return this; }
private PlanNodeStatsEstimate normalize(PlanNodeStatsEstimate stats, Optional<Collection<Symbol>> outputSymbols, TypeProvider types) { if (stats.isOutputRowCountUnknown()) { return PlanNodeStatsEstimate.unknown(); } PlanNodeStatsEstimate.Builder normalized = PlanNodeStatsEstimate.buildFrom(stats); Predicate<Symbol> symbolFilter = outputSymbols .map(ImmutableSet::copyOf) .map(set -> (Predicate<Symbol>) set::contains) .orElse(symbol -> true); for (Symbol symbol : stats.getSymbolsWithKnownStatistics()) { if (!symbolFilter.test(symbol)) { normalized.removeSymbolStatistics(symbol); continue; } SymbolStatsEstimate symbolStats = stats.getSymbolStatistics(symbol); SymbolStatsEstimate normalizedSymbolStats = stats.getOutputRowCount() == 0 ? SymbolStatsEstimate.zero() : normalizeSymbolStats(symbol, symbolStats, stats, types); if (normalizedSymbolStats.isUnknown()) { normalized.removeSymbolStatistics(symbol); continue; } if (!Objects.equals(normalizedSymbolStats, symbolStats)) { normalized.addSymbolStatistics(symbol, normalizedSymbolStats); } } return normalized.build(); }
for (Symbol symbol : stats.getSymbolsWithKnownStatistics()) { if (!Objects.equals(stats.getSymbolStatistics(symbol), normalized.getSymbolStatistics(symbol))) { problems.add(format(
outputStats.setOutputRowCount(outputRowCount); for (Symbol symbol : joinComplementStats.getSymbolsWithKnownStatistics()) { SymbolStatsEstimate leftSymbolStats = sourceStats.getSymbolStatistics(symbol); SymbolStatsEstimate innerJoinSymbolStats = innerJoinStats.getSymbolStatistics(symbol); for (Symbol symbol : difference(innerJoinStats.getSymbolsWithKnownStatistics(), joinComplementStats.getSymbolsWithKnownStatistics())) { SymbolStatsEstimate innerJoinSymbolStats = innerJoinStats.getSymbolStatistics(symbol); double newNullsFraction = (innerJoinSymbolStats.getNullsFraction() * innerJoinRowCount + joinComplementRowCount) / outputRowCount;
result.setOutputRowCount(outputRowCount); superset.getSymbolsWithKnownStatistics().forEach(symbol -> { SymbolStatsEstimate supersetSymbolStats = superset.getSymbolStatistics(symbol); SymbolStatsEstimate subsetSymbolStats = subset.getSymbolStatistics(symbol);
public static PlanNodeStatsEstimate capStats(PlanNodeStatsEstimate stats, PlanNodeStatsEstimate cap) { if (stats.isOutputRowCountUnknown() || cap.isOutputRowCountUnknown()) { return PlanNodeStatsEstimate.unknown(); } PlanNodeStatsEstimate.Builder result = PlanNodeStatsEstimate.builder(); double cappedRowCount = min(stats.getOutputRowCount(), cap.getOutputRowCount()); result.setOutputRowCount(cappedRowCount); stats.getSymbolsWithKnownStatistics().forEach(symbol -> { SymbolStatsEstimate symbolStats = stats.getSymbolStatistics(symbol); SymbolStatsEstimate capSymbolStats = cap.getSymbolStatistics(symbol); SymbolStatsEstimate.Builder newSymbolStats = SymbolStatsEstimate.builder(); // for simplicity keep the average row size the same as in the input // in most cases the average row size doesn't change after applying filters newSymbolStats.setAverageRowSize(symbolStats.getAverageRowSize()); newSymbolStats.setDistinctValuesCount(min(symbolStats.getDistinctValuesCount(), capSymbolStats.getDistinctValuesCount())); newSymbolStats.setLowValue(max(symbolStats.getLowValue(), capSymbolStats.getLowValue())); newSymbolStats.setHighValue(min(symbolStats.getHighValue(), capSymbolStats.getHighValue())); double numberOfNulls = stats.getOutputRowCount() * symbolStats.getNullsFraction(); double capNumberOfNulls = cap.getOutputRowCount() * capSymbolStats.getNullsFraction(); double cappedNumberOfNulls = min(numberOfNulls, capNumberOfNulls); double cappedNullsFraction = cappedRowCount == 0 ? 1 : cappedNumberOfNulls / cappedRowCount; newSymbolStats.setNullsFraction(cappedNullsFraction); result.addSymbolStatistics(symbol, newSymbolStats.build()); }); return result.build(); }