@Test public void testBooleanLiteralStats() { assertExpression("true") .equalTo(standardInputStatistics); assertExpression("false") .outputRowsCount(0.0) .symbolStats("x", SymbolStatsAssertion::empty) .symbolStats("y", SymbolStatsAssertion::empty) .symbolStats("z", SymbolStatsAssertion::empty) .symbolStats("leftOpen", SymbolStatsAssertion::empty) .symbolStats("rightOpen", SymbolStatsAssertion::empty) .symbolStats("emptyRange", SymbolStatsAssertion::empty) .symbolStats("unknownRange", SymbolStatsAssertion::empty); }
.check(check -> { check.outputRowsCount(144) .symbolStats("a", assertion -> assertion.isEqualTo(expectedANotInC)) .symbolStats("b", assertion -> assertion.isEqualTo(bStats)) .symbolStatsUnknown("c") .symbolStatsUnknown("sjo");
public PlanNodeStatsAssertion symbolStats(String symbolName, Consumer<SymbolStatsAssertion> symbolStatsAssertionConsumer) { return symbolStats(new Symbol(symbolName), symbolStatsAssertionConsumer); }
.check(check -> { check.outputRowsCount(180) .symbolStats("a", assertion -> assertion.isEqualTo(expectedAInC)) .symbolStatsUnknown("b") .symbolStatsUnknown("c")
@Test public void testIsNotNullFilter() { assertExpression("x IS NOT NULL") .outputRowsCount(750.0) .symbolStats("x", symbolStats -> symbolStats.distinctValuesCount(40.0) .lowValue(-10.0) .highValue(10.0) .nullsFraction(0.0)); assertExpression("emptyRange IS NOT NULL") .outputRowsCount(0.0) .symbolStats("emptyRange", SymbolStatsAssertion::empty); }
.check(check -> { check.outputRowsCount(180) .symbolStats("a", assertion -> assertion.isEqualTo(expectedAInC)) .symbolStats("b", assertion -> assertion.isEqualTo(bStats)) .symbolStatsUnknown("c") .symbolStatsUnknown("sjo");
public PlanNodeStatsAssertion symbolStatsUnknown(Symbol symbol) { return symbolStats(symbol, columnStats -> columnStats .lowValueUnknown() .highValueUnknown() .nullsFractionUnknown() .distinctValuesCountUnknown()); }
.check(check -> check .outputRowsCount(10) .symbolStats("a", assertion -> assertion.isEqualTo(stats)) .symbolStats("b", assertion -> assertion.isEqualTo(stats)) .symbolStatsUnknown("c") .symbolStatsUnknown("sjo"));
@Test public void testIsNullFilter() { assertExpression("x IS NULL") .outputRowsCount(250.0) .symbolStats(new Symbol("x"), symbolStats -> symbolStats.distinctValuesCount(0) .emptyRange() .nullsFraction(1.0)); assertExpression("emptyRange IS NULL") .outputRowsCount(1000.0) .symbolStats(new Symbol("emptyRange"), SymbolStatsAssertion::empty); }
@Test public void testSymbolEqualsSameSymbolFilter() { assertExpression("x = x") .outputRowsCount(750) .symbolStats("x", symbolStats -> SymbolStatsEstimate.builder() .setAverageRowSize(4.0) .setDistinctValuesCount(40.0) .setLowValue(-10.0) .setHighValue(10.0) .build()); }
@Test public void tesCapDistinctValuesByOutputRowCount() { Symbol a = new Symbol("a"); Symbol b = new Symbol("b"); Symbol c = new Symbol("c"); PlanNodeStatsEstimate estimate = PlanNodeStatsEstimate.builder() .addSymbolStatistics(a, SymbolStatsEstimate.builder().setNullsFraction(0).setDistinctValuesCount(20).build()) .addSymbolStatistics(b, SymbolStatsEstimate.builder().setNullsFraction(0.4).setDistinctValuesCount(20).build()) .addSymbolStatistics(c, SymbolStatsEstimate.unknown()) .setOutputRowCount(10) .build(); assertNormalized(estimate) .symbolStats(a, symbolAssert -> symbolAssert.distinctValuesCount(10)) .symbolStats(b, symbolAssert -> symbolAssert.distinctValuesCount(8)) .symbolStats(c, SymbolStatsAssertion::distinctValuesCountUnknown); }
@Test public void symbolToCastExpressionNotEqual() { double rowCount = 807.3; assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("u"), new Cast(new SymbolReference("w"), BIGINT))) .outputRowsCount(rowCount) .symbolStats("u", equalTo(capNDV(zeroNullsFraction(uStats), rowCount))) .symbolStats("w", equalTo(capNDV(wStats, rowCount))) .symbolStats("z", equalTo(capNDV(zStats, rowCount))); rowCount = 897.0; assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("u"), new Cast(new LongLiteral("10"), BIGINT))) .outputRowsCount(rowCount) .symbolStats("u", equalTo(capNDV(updateNDV(zeroNullsFraction(uStats), -1), rowCount))) .symbolStats("z", equalTo(capNDV(zStats, rowCount))); }
@Test public void testNoCapping() { Symbol a = new Symbol("a"); PlanNodeStatsEstimate estimate = PlanNodeStatsEstimate.builder() .setOutputRowCount(30) .addSymbolStatistics(a, SymbolStatsEstimate.builder().setDistinctValuesCount(20).build()) .build(); assertNormalized(estimate) .symbolStats(a, symbolAssert -> symbolAssert.distinctValuesCount(20)); }
assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("u"), new SymbolReference("w"))) .outputRowsCount(rowCount) .symbolStats("u", equalTo(capNDV(zeroNullsFraction(uStats), rowCount))) .symbolStats("w", equalTo(capNDV(zeroNullsFraction(wStats), rowCount))) .symbolStats("z", equalTo(capNDV(zStats, rowCount))); assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("x"), new SymbolReference("y"))) .outputRowsCount(rowCount) .symbolStats("x", equalTo(capNDV(zeroNullsFraction(xStats), rowCount))) .symbolStats("y", equalTo(capNDV(zeroNullsFraction(yStats), rowCount))) .symbolStats("z", equalTo(capNDV(zStats, rowCount))); assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("x"), new SymbolReference("w"))) .outputRowsCount(rowCount) .symbolStats("x", equalTo(capNDV(zeroNullsFraction(xStats), rowCount))) .symbolStats("w", equalTo(capNDV(zeroNullsFraction(wStats), rowCount))) .symbolStats("z", equalTo(capNDV(zStats, rowCount))); assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("x"), new SymbolReference("u"))) .outputRowsCount(rowCount) .symbolStats("x", equalTo(capNDV(zeroNullsFraction(xStats), rowCount))) .symbolStats("u", equalTo(capNDV(zeroNullsFraction(uStats), rowCount))) .symbolStats("z", equalTo(capNDV(zStats, rowCount)));
@Test public void testNotStats() { assertExpression("NOT(x < 0e0)") .outputRowsCount(625) // FIXME - nulls shouldn't be restored .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.averageRowSize(4.0) .lowValue(-10.0) .highValue(10.0) .distinctValuesCount(20.0) .nullsFraction(0.4)) // FIXME - nulls shouldn't be restored .symbolStats(new Symbol("y"), symbolAssert -> symbolAssert.isEqualTo(yStats)); assertExpression("NOT(x IS NULL)") .outputRowsCount(750) .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.averageRowSize(4.0) .lowValue(-10.0) .highValue(10.0) .distinctValuesCount(40.0) .nullsFraction(0)) .symbolStats(new Symbol("y"), symbolAssert -> symbolAssert.isEqualTo(yStats)); assertExpression("NOT(json_array_contains(JSON '[]', x))") .outputRowsCountUnknown(); }
assertExpression("x < 3e0") .outputRowsCount(lessThan3Rows) .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.averageRowSize(4.0) .lowValue(-10) assertExpression(format(xEquals, minusThree)) .outputRowsCount(18.75) .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.averageRowSize(4.0) .lowValue(-3) assertExpression(format(xLessThan, minusThree)) .outputRowsCount(262.5) .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.averageRowSize(4.0) .lowValue(-10)
.symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.averageRowSize(4.0) .lowValue(-7.5) .symbolStats(new Symbol("x"), SymbolStatsAssertion::emptyRange) .symbolStats(new Symbol("y"), SymbolStatsAssertion::emptyRange); .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.lowValue(-10) .highValue(0) .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.lowValue(-10) .highValue(0)
@Test public void testDropNonOutputSymbols() { Symbol a = new Symbol("a"); Symbol b = new Symbol("b"); Symbol c = new Symbol("c"); PlanNodeStatsEstimate estimate = PlanNodeStatsEstimate.builder() .setOutputRowCount(40) .addSymbolStatistics(a, SymbolStatsEstimate.builder().setDistinctValuesCount(20).build()) .addSymbolStatistics(b, SymbolStatsEstimate.builder().setDistinctValuesCount(30).build()) .addSymbolStatistics(c, SymbolStatsEstimate.unknown()) .build(); PlanNodeStatsAssertion.assertThat(normalizer.normalize(estimate, ImmutableList.of(b, c), TypeProvider.copyOf(ImmutableMap.of(b, BIGINT, c, BIGINT)))) .symbolsWithKnownStats(b) .symbolStats(b, symbolAssert -> symbolAssert.distinctValuesCount(30)); }
private void testCapDistinctValuesByToDomainRangeLength(Type type, double ndv, Object low, Object high, double expectedNormalizedNdv) { Symbol symbol = new Symbol("x"); SymbolStatsEstimate symbolStats = SymbolStatsEstimate.builder() .setNullsFraction(0) .setDistinctValuesCount(ndv) .setLowValue(asStatsValue(low, type)) .setHighValue(asStatsValue(high, type)) .build(); PlanNodeStatsEstimate estimate = PlanNodeStatsEstimate.builder() .setOutputRowCount(10000000000L) .addSymbolStatistics(symbol, symbolStats).build(); assertNormalized(estimate, TypeProvider.copyOf(ImmutableMap.of(symbol, type))) .symbolStats(symbol, symbolAssert -> symbolAssert.distinctValuesCount(expectedNormalizedNdv)); }
.symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.averageRowSize(4.0) .lowValue(-10.0) .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.averageRowSize(4.0) .lowValue(-7.5) .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.averageRowSize(4.0) .lowValue(1) .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.averageRowSize(4.0) .lowValue(1)