@Test public void testIsNotNullFilter() { assertExpression("x IS NOT NULL") .outputRowsCount(750.0) .symbolStats("x", symbolStats -> symbolStats.distinctValuesCount(40.0) .lowValue(-10.0) .highValue(10.0) .nullsFraction(0.0)); assertExpression("emptyRange IS NOT NULL") .outputRowsCount(0.0) .symbolStats("emptyRange", SymbolStatsAssertion::empty); }
.outputRowsCount(281.25) .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.averageRowSize(4.0) .outputRowsCount(0) .symbolStats(new Symbol("x"), SymbolStatsAssertion::emptyRange) .symbolStats(new Symbol("y"), SymbolStatsAssertion::emptyRange); .outputRowsCount(337.5) .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.lowValue(-10) .outputRowsCount(337.5) .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.lowValue(-10) .outputRowsCount(0);
@Test public void testNotStats() { assertExpression("NOT(x < 0e0)") .outputRowsCount(625) // FIXME - nulls shouldn't be restored .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.averageRowSize(4.0) .lowValue(-10.0) .highValue(10.0) .distinctValuesCount(20.0) .nullsFraction(0.4)) // FIXME - nulls shouldn't be restored .symbolStats(new Symbol("y"), symbolAssert -> symbolAssert.isEqualTo(yStats)); assertExpression("NOT(x IS NULL)") .outputRowsCount(750) .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.averageRowSize(4.0) .lowValue(-10.0) .highValue(10.0) .distinctValuesCount(40.0) .nullsFraction(0)) .symbolStats(new Symbol("y"), symbolAssert -> symbolAssert.isEqualTo(yStats)); assertExpression("NOT(json_array_contains(JSON '[]', x))") .outputRowsCountUnknown(); }
@Test public void testIsNullFilter() { assertExpression("x IS NULL") .outputRowsCount(250.0) .symbolStats(new Symbol("x"), symbolStats -> symbolStats.distinctValuesCount(0) .emptyRange() .nullsFraction(1.0)); assertExpression("emptyRange IS NULL") .outputRowsCount(1000.0) .symbolStats(new Symbol("emptyRange"), SymbolStatsAssertion::empty); }
.symbolStats(w, stats -> stats.isEqualTo(wStats)) .symbolStats(z, stats -> stats.isEqualTo(zStats)) .outputRowsCount(inputStatistics.getOutputRowCount() * xStats.getValuesFraction() * (wStats.getDistinctValuesCount() / xStats.getDistinctValuesCount())); .symbolStats(u, stats -> stats.isEqualTo(uStats)) .symbolStats(z, stats -> stats.isEqualTo(zStats)) .outputRowsCount(inputStatistics.getOutputRowCount() * xStats.getValuesFraction()); .outputRowsCount(0); .outputRowsCount(1000) .symbolStats(fractionalNdv, stats -> stats .nullsFraction(0)
@Test public void testSymbolEqualsSameSymbolFilter() { assertExpression("x = x") .outputRowsCount(750) .symbolStats("x", symbolStats -> SymbolStatsEstimate.builder() .setAverageRowSize(4.0) .setDistinctValuesCount(40.0) .setLowValue(-10.0) .setHighValue(10.0) .build()); }
.symbolStats(x, stats -> stats.isEqualTo(xStats)) .symbolStats(z, stats -> stats.isEqualTo(zStats)) .outputRowsCount(inputStatistics.getOutputRowCount() * uStats.getValuesFraction() * (1 - xStats.getDistinctValuesCount() / uStats.getDistinctValuesCount())); .symbolStats(u, stats -> stats.isEqualTo(uStats)) .symbolStats(z, stats -> stats.isEqualTo(zStats)) .outputRowsCount(inputStatistics.getOutputRowCount() * xStats.getValuesFraction() * 0.5); .outputRowsCount(0); .outputRowsCount(500) .symbolStats(fractionalNdv, stats -> stats .nullsFraction(0)
.outputRowsCount(lessThan3Rows) .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.averageRowSize(4.0) .outputRowsCount(lessThan3Rows); .outputRowsCount(18.75) .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.averageRowSize(4.0) .outputRowsCount(262.5) .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.averageRowSize(4.0)
.outputRowsCount(15) .symbolStats("z", symbolStatsAssertion -> symbolStatsAssertion .lowValue(10)
@Test public void testBooleanLiteralStats() { assertExpression("true") .equalTo(standardInputStatistics); assertExpression("false") .outputRowsCount(0.0) .symbolStats("x", SymbolStatsAssertion::empty) .symbolStats("y", SymbolStatsAssertion::empty) .symbolStats("z", SymbolStatsAssertion::empty) .symbolStats("leftOpen", SymbolStatsAssertion::empty) .symbolStats("rightOpen", SymbolStatsAssertion::empty) .symbolStats("emptyRange", SymbolStatsAssertion::empty) .symbolStats("unknownRange", SymbolStatsAssertion::empty); }
.outputRowsCount(375) .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.averageRowSize(4.0) .outputRowsCount(37.5) .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.averageRowSize(4.0) .outputRowsCount(37.5) .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.averageRowSize(4.0) .outputRowsCount(37.5) .symbolStats(new Symbol("x"), symbolAssert -> symbolAssert.averageRowSize(4.0)
@Test public void testAggregationStatsCappedToInputRows() { tester().assertStatsFor(pb -> pb .aggregation(ab -> ab .addAggregation(pb.symbol("count_on_x", BIGINT), expression("count(x)"), ImmutableList.of(BIGINT)) .singleGroupingSet(pb.symbol("y", BIGINT), pb.symbol("z", BIGINT)) .source(pb.values(pb.symbol("x", BIGINT), pb.symbol("y", BIGINT), pb.symbol("z", BIGINT))))) .withSourceStats(PlanNodeStatsEstimate.builder() .setOutputRowCount(100) .addSymbolStatistics(new Symbol("y"), SymbolStatsEstimate.builder().setDistinctValuesCount(50).build()) .addSymbolStatistics(new Symbol("z"), SymbolStatsEstimate.builder().setDistinctValuesCount(50).build()) .build()) .check(check -> check.outputRowsCount(100)); } }
@Test public void symbolToCastExpressionNotEqual() { double rowCount = 807.3; assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("u"), new Cast(new SymbolReference("w"), BIGINT))) .outputRowsCount(rowCount) .symbolStats("u", equalTo(capNDV(zeroNullsFraction(uStats), rowCount))) .symbolStats("w", equalTo(capNDV(wStats, rowCount))) .symbolStats("z", equalTo(capNDV(zStats, rowCount))); rowCount = 897.0; assertCalculate(new ComparisonExpression(NOT_EQUAL, new SymbolReference("u"), new Cast(new LongLiteral("10"), BIGINT))) .outputRowsCount(rowCount) .symbolStats("u", equalTo(capNDV(updateNDV(zeroNullsFraction(uStats), -1), rowCount))) .symbolStats("z", equalTo(capNDV(zStats, rowCount))); }
.build()) .check(check -> { check.outputRowsCount(144) .symbolStats("a", assertion -> assertion.isEqualTo(expectedANotInC)) .symbolStats("b", assertion -> assertion.isEqualTo(bStats))
.build()) .check(check -> { check.outputRowsCount(180) .symbolStats("a", assertion -> assertion.isEqualTo(expectedAInC)) .symbolStatsUnknown("b")
.build()) .check(check -> { check.outputRowsCount(180) .symbolStats("a", assertion -> assertion.isEqualTo(expectedAInC)) .symbolStats("b", assertion -> assertion.isEqualTo(bStats))
.build()) .check(check -> { check.outputRowsCount(720) .symbolStats("a", assertion -> assertion.isEqualTo(expectedANotInCWithExtraFilter)) .symbolStats("b", assertion -> assertion.isEqualTo(bStats))
.build()) .check(check -> check .outputRowsCount(10) .symbolStats("a", assertion -> assertion.isEqualTo(stats)) .symbolStats("b", assertion -> assertion.isEqualTo(stats))
.outputRowsCount(rowCount) .symbolStats("u", equalTo(capNDV(zeroNullsFraction(uStats), rowCount))) .symbolStats("w", equalTo(capNDV(zeroNullsFraction(wStats), rowCount))) .outputRowsCount(rowCount) .symbolStats("x", equalTo(capNDV(zeroNullsFraction(xStats), rowCount))) .symbolStats("y", equalTo(capNDV(zeroNullsFraction(yStats), rowCount))) .outputRowsCount(rowCount) .symbolStats("x", equalTo(capNDV(zeroNullsFraction(xStats), rowCount))) .symbolStats("w", equalTo(capNDV(zeroNullsFraction(wStats), rowCount))) .outputRowsCount(rowCount) .symbolStats("x", equalTo(capNDV(zeroNullsFraction(xStats), rowCount))) .symbolStats("u", equalTo(capNDV(zeroNullsFraction(uStats), rowCount)))
.outputRowsCount(rowCount) .symbolStats("u", equalTo(capNDV(zeroNullsFraction(uStats), rowCount))) .symbolStats("w", equalTo(capNDV(zeroNullsFraction(wStats), rowCount))) .outputRowsCount(rowCount) .symbolStats("x", symbolAssert -> { symbolAssert.averageRowSize(4) .outputRowsCount(rowCount) .symbolStats("x", symbolAssert -> { symbolAssert.averageRowSize(6) .outputRowsCount(rowCount) .symbolStats("x", symbolAssert -> { symbolAssert.averageRowSize(6)