public static SymbolStatsAssertion assertThat(SymbolStatsEstimate actual) { return new SymbolStatsAssertion(actual); }
public PlanNodeStatsAssertion symbolStats(Symbol symbol, Consumer<SymbolStatsAssertion> columnAssertionConsumer) { SymbolStatsAssertion columnAssertion = SymbolStatsAssertion.assertThat(actual.getSymbolStatistics(symbol)); columnAssertionConsumer.accept(columnAssertion); return this; }
public SymbolStatsAssertion isEqualTo(SymbolStatsEstimate expected) { return nullsFraction(expected.getNullsFraction()) .lowValue(expected.getLowValue()) .highValue(expected.getHighValue()) .distinctValuesCount(expected.getDistinctValuesCount()) .averageRowSize(expected.getAverageRowSize()); } }
public void empty() { this.emptyRange() .distinctValuesCount(0) .nullsFraction(1); }
public SymbolStatsAssertion unknownRange() { return lowValueUnknown() .highValueUnknown(); }
public PlanNodeStatsAssertion symbolStatsUnknown(Symbol symbol) { return symbolStats(symbol, columnStats -> columnStats .lowValueUnknown() .highValueUnknown() .nullsFractionUnknown() .distinctValuesCountUnknown()); }
.outputRowsCount(30) .symbolStats("o1", assertion -> assertion .lowValue(1) .highValue(20) .distinctValuesCountUnknown() .nullsFraction(0.3666666)) .symbolStats("o2", assertion -> assertion .lowValue(0) .highValue(7) .distinctValuesCount(4) .nullsFractionUnknown()) .symbolStats("o3", assertion -> assertion .lowValueUnknown() .highValueUnknown() .distinctValuesCount(6) .nullsFraction(0.1666667)) .symbolStats("o4", assertion -> assertion .lowValue(10) .highValue(15) .distinctValuesCount(4) .nullsFraction(0.1)));
.distinctValuesCount(1.0) .lowValue(7) .highValue(7) .nullsFraction(0.0); .distinctValuesCount(1.0) .lowValue(8) .highValue(8) .nullsFraction(0.0); .distinctValuesCount(1.0) .lowValue(9) .highValue(9) .nullsFraction(0.0); .distinctValuesCount(1.0) .lowValue(Long.MAX_VALUE) .highValue(Long.MAX_VALUE) .nullsFraction(0.0); .distinctValuesCount(1.0) .lowValue(7.5) .highValue(7.5) .nullsFraction(0.0); .distinctValuesCount(1.0) .lowValue(75.5) .highValue(75.5) .nullsFraction(0.0);
.lowValue(xStats.getLowValue()) .highValue(xStats.getHighValue()) .nullsFraction(0) .distinctValuesCount(wStats.getDistinctValuesCount())) .symbolStats(w, stats -> stats.isEqualTo(wStats)) .symbolStats(z, stats -> stats.isEqualTo(zStats)) .outputRowsCount(inputStatistics.getOutputRowCount() * xStats.getValuesFraction() * (wStats.getDistinctValuesCount() / xStats.getDistinctValuesCount())); .lowValue(xStats.getLowValue()) .highValue(xStats.getHighValue()) .nullsFraction(0) .distinctValuesCount(xStats.getDistinctValuesCount())) .symbolStats(u, stats -> stats.isEqualTo(uStats)) .symbolStats(z, stats -> stats.isEqualTo(zStats)) .outputRowsCount(inputStatistics.getOutputRowCount() * xStats.getValuesFraction()); .nullsFraction(0) .distinctValuesCountUnknown() .unknownRange()) .symbolStats(u, stats -> stats.isEqualTo(uStats)) .symbolStats(z, stats -> stats.isEqualTo(zStats)) .outputRowsCountUnknown(); .nullsFraction(0) .lowValue(xStats.getLowValue()) .highValue(xStats.getHighValue()) .distinctValuesCountUnknown()) .symbolStatsUnknown(unknown) .symbolStats(z, stats -> stats.isEqualTo(zStats))
.outputRowsCount(15) .symbolStats("z", symbolStatsAssertion -> symbolStatsAssertion .lowValue(10) .highValue(15) .distinctValuesCount(4) .nullsFraction(0.2)) .symbolStats("y", symbolStatsAssertion -> symbolStatsAssertion .lowValue(0) .highValue(3) .distinctValuesCount(3) .nullsFraction(0)); .outputRowsCountUnknown() .symbolStats("z", symbolStatsAssertion -> symbolStatsAssertion .unknownRange() .distinctValuesCountUnknown() .nullsFractionUnknown()) .symbolStats("y", symbolStatsAssertion -> symbolStatsAssertion .unknownRange() .nullsFractionUnknown() .distinctValuesCountUnknown());
@Test public void testFunctionCall() { assertCalculate( new FunctionCall( QualifiedName.of("length"), ImmutableList.of(new Cast(new NullLiteral(), "VARCHAR(10)")))) .distinctValuesCount(0.0) .lowValueUnknown() .highValueUnknown() .nullsFraction(1.0); assertCalculate( new FunctionCall( QualifiedName.of("length"), ImmutableList.of(new SymbolReference("x"))), PlanNodeStatsEstimate.unknown(), TypeProvider.viewOf(ImmutableMap.of(new Symbol("x"), createVarcharType(2)))) .distinctValuesCountUnknown() .lowValueUnknown() .highValueUnknown() .nullsFractionUnknown(); }
@Test public void testCastDoubleToShortRangeUnknownDistinctValuesCount() { PlanNodeStatsEstimate inputStatistics = PlanNodeStatsEstimate.builder() .addSymbolStatistics(new Symbol("a"), SymbolStatsEstimate.builder() .setNullsFraction(0.3) .setLowValue(1.6) .setHighValue(3.3) .setAverageRowSize(2.0) .build()) .build(); assertCalculate(new Cast(new SymbolReference("a"), "bigint"), inputStatistics) .lowValue(2.0) .highValue(3.0) .distinctValuesCountUnknown() .nullsFraction(0.3) .dataSizeUnknown(); }
@Test public void testVarbinaryConstant() { MetadataManager metadata = createTestMetadataManager(); LiteralEncoder literalEncoder = new LiteralEncoder(metadata.getBlockEncodingSerde()); Expression expression = literalEncoder.toExpression(Slices.utf8Slice("ala ma kota"), VARBINARY); assertCalculate(expression) .distinctValuesCount(1.0) .lowValueUnknown() .highValueUnknown() .nullsFraction(0.0); }
@Test public void testDivideArithmeticBinaryExpression() { assertCalculate(expression("x / y"), xyStats(-11, -3, -5, -4)).lowValue(0.6).highValue(2.75); assertCalculate(expression("x / y"), xyStats(-11, -3, -5, 4)).lowValue(NEGATIVE_INFINITY).highValue(POSITIVE_INFINITY); assertCalculate(expression("x / y"), xyStats(-11, -3, 4, 5)).lowValue(-2.75).highValue(-0.6); assertCalculate(expression("x / y"), xyStats(-11, 0, -5, -4)).lowValue(0).highValue(2.75); assertCalculate(expression("x / y"), xyStats(-11, 0, -5, 4)).lowValue(NEGATIVE_INFINITY).highValue(POSITIVE_INFINITY); assertCalculate(expression("x / y"), xyStats(-11, 0, 4, 5)).lowValue(-2.75).highValue(0); assertCalculate(expression("x / y"), xyStats(-11, 3, -5, -4)).lowValue(-0.75).highValue(2.75); assertCalculate(expression("x / y"), xyStats(-11, 3, -5, 4)).lowValue(NEGATIVE_INFINITY).highValue(POSITIVE_INFINITY); assertCalculate(expression("x / y"), xyStats(-11, 3, 4, 5)).lowValue(-2.75).highValue(0.75); assertCalculate(expression("x / y"), xyStats(0, 3, -5, -4)).lowValue(-0.75).highValue(0); assertCalculate(expression("x / y"), xyStats(0, 3, -5, 4)).lowValue(NEGATIVE_INFINITY).highValue(POSITIVE_INFINITY); assertCalculate(expression("x / y"), xyStats(0, 3, 4, 5)).lowValue(0).highValue(0.75); assertCalculate(expression("x / y"), xyStats(3, 11, -5, -4)).lowValue(-2.75).highValue(-0.6); assertCalculate(expression("x / y"), xyStats(3, 11, -5, 4)).lowValue(NEGATIVE_INFINITY).highValue(POSITIVE_INFINITY); assertCalculate(expression("x / y"), xyStats(3, 11, 4, 5)).lowValue(0.6).highValue(2.75); }
.isEqualTo(allNullStats); assertCalculate(expression("x - all_null"), relationStats) .isEqualTo(allNullStats); assertCalculate(expression("all_null - x"), relationStats) .isEqualTo(allNullStats); assertCalculate(expression("all_null * x"), relationStats) .isEqualTo(allNullStats); assertCalculate(expression("x % all_null"), relationStats) .isEqualTo(allNullStats); assertCalculate(expression("all_null % x"), relationStats) .isEqualTo(allNullStats); assertCalculate(expression("x / all_null"), relationStats) .isEqualTo(allNullStats); assertCalculate(expression("all_null / x"), relationStats) .isEqualTo(allNullStats);
@Test public void testNoCapping() { Symbol a = new Symbol("a"); PlanNodeStatsEstimate estimate = PlanNodeStatsEstimate.builder() .setOutputRowCount(30) .addSymbolStatistics(a, SymbolStatsEstimate.builder().setDistinctValuesCount(20).build()) .build(); assertNormalized(estimate) .symbolStats(a, symbolAssert -> symbolAssert.distinctValuesCount(20)); }
public SymbolStatsAssertion highValueUnknown() { return highValue(POSITIVE_INFINITY); }
.outputRowsCount(30) .symbolStats("o1", assertion -> assertion .lowValue(1) .highValue(20) .dataSizeUnknown() .nullsFraction(0.3666666)) .symbolStats("o2", assertion -> assertion .lowValue(0) .highValue(7) .distinctValuesCount(6.4) .nullsFractionUnknown()) .symbolStats("o3", assertion -> assertion .lowValueUnknown() .highValueUnknown() .distinctValuesCount(8.5) .nullsFraction(0.1666667)) .symbolStats("o4", assertion -> assertion .lowValue(10) .highValue(15) .distinctValuesCount(4.0) .nullsFraction(0.1)) .symbolStats("o5", assertion -> assertion .lowValue(NEGATIVE_INFINITY) .highValue(POSITIVE_INFINITY) .distinctValuesCountUnknown() .nullsFraction(0.7)));
.outputRowsCount(475.0) // all rows minus nulls multiplied by ((distinct values - 1) / distinct values) .symbolStats("y", symbolAssert -> { symbolAssert.averageRowSize(4.0) .distinctValuesCount(19.0) .lowValue(0.0) .highValue(5.0) .nullsFraction(0.0); }); .outputRowsCount(731.25) // all rows minus nulls multiplied by ((distinct values - 1) / distinct values) .symbolStats("x", symbolAssert -> { symbolAssert.averageRowSize(4.0) .distinctValuesCount(39.0) .lowValue(-10.0) .highValue(10.0) .nullsFraction(0.0); }); .outputRowsCount(500.0) // all rows minus nulls .symbolStats("y", symbolAssert -> { symbolAssert.averageRowSize(4.0) .distinctValuesCount(19.0) .lowValue(0.0) .highValue(5.0) .nullsFraction(0.0); }); .outputRowsCount(882.0) // all rows minus nulls multiplied by ((distinct values - 1) / distinct values) .symbolStats("leftOpen", symbolAssert -> {
.lowValue(uStats.getLowValue()) .highValue(uStats.getHighValue()) .nullsFraction(0) .distinctValuesCount(uStats.getDistinctValuesCount() - xStats.getDistinctValuesCount())) .symbolStats(x, stats -> stats.isEqualTo(xStats)) .symbolStats(z, stats -> stats.isEqualTo(zStats)) .outputRowsCount(inputStatistics.getOutputRowCount() * uStats.getValuesFraction() * (1 - xStats.getDistinctValuesCount() / uStats.getDistinctValuesCount())); .lowValue(xStats.getLowValue()) .highValue(xStats.getHighValue()) .nullsFraction(0) .distinctValuesCount(xStats.getDistinctValuesCount() * 0.5)) .symbolStats(u, stats -> stats.isEqualTo(uStats)) .symbolStats(z, stats -> stats.isEqualTo(zStats)) .outputRowsCount(inputStatistics.getOutputRowCount() * xStats.getValuesFraction() * 0.5); .nullsFraction(0) .distinctValuesCountUnknown() .unknownRange()) .symbolStats(u, stats -> stats.isEqualTo(uStats)) .symbolStats(z, stats -> stats.isEqualTo(zStats)) .outputRowsCountUnknown(); .nullsFraction(0) .lowValue(xStats.getLowValue()) .highValue(xStats.getHighValue()) .distinctValuesCountUnknown()) .symbolStatsUnknown(unknown) .symbolStats(z, stats -> stats.isEqualTo(zStats))