public static HiveColumnStatistics createBooleanColumnStatistics(OptionalLong trueCount, OptionalLong falseCount, OptionalLong nullsCount) { return builder() .setBooleanStatistics(new BooleanStatistics(trueCount, falseCount)) .setNullsCount(nullsCount) .build(); }
private static Optional<BooleanStatistics> mergeBooleanStatistics(Optional<BooleanStatistics> first, Optional<BooleanStatistics> second) { // normally, either both or none is present if (first.isPresent() && second.isPresent()) { return Optional.of(new BooleanStatistics( reduce(first.get().getTrueCount(), second.get().getTrueCount(), ADD, false), reduce(first.get().getFalseCount(), second.get().getFalseCount(), ADD, false))); } return Optional.empty(); }
private static OptionalLong getDistinctValuesCount(HiveColumnStatistics statistics) { if (statistics.getBooleanStatistics().isPresent() && statistics.getBooleanStatistics().get().getFalseCount().isPresent() && statistics.getBooleanStatistics().get().getTrueCount().isPresent()) { long falseCount = statistics.getBooleanStatistics().get().getFalseCount().getAsLong(); long trueCount = statistics.getBooleanStatistics().get().getTrueCount().getAsLong(); return OptionalLong.of((falseCount > 0 ? 1 : 0) + (trueCount > 0 ? 1 : 0)); } if (statistics.getDistinctValuesCount().isPresent()) { return statistics.getDistinctValuesCount(); } return OptionalLong.empty(); }
private static ColumnStatisticsObj createBooleanStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) { BooleanColumnStatsData data = new BooleanColumnStatsData(); statistics.getNullsCount().ifPresent(data::setNumNulls); statistics.getBooleanStatistics().ifPresent(booleanStatistics -> { booleanStatistics.getFalseCount().ifPresent(data::setNumFalses); booleanStatistics.getTrueCount().ifPresent(data::setNumTrues); }); return new ColumnStatisticsObj(columnName, columnType.toString(), booleanStats(data)); }
long numberOfTrue = BIGINT.getLong(computedStatistics.get(NUMBER_OF_TRUE_VALUES), 0); long numberOfNonNullValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0); result.setBooleanStatistics(new BooleanStatistics(OptionalLong.of(numberOfTrue), OptionalLong.of(numberOfNonNullValues - numberOfTrue)));
OptionalLong falseCount = booleanStatistics.getFalseCount(); OptionalLong trueCount = booleanStatistics.getTrueCount(); falseCount.ifPresent(count -> checkStatistics(count >= 0, table, partition, column, "falseCount must be greater than or equal to zero: %s", count));
@Test public void testBooleanStatsToColumnStatistics() { BooleanColumnStatsData booleanColumnStatsData = new BooleanColumnStatsData(); booleanColumnStatsData.setNumTrues(100); booleanColumnStatsData.setNumFalses(10); booleanColumnStatsData.setNumNulls(0); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(booleanColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty()); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.of(100), OptionalLong.of(10)))); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.of(0)); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); }
@Test public void testMergeBooleanColumnStatistics() { assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.of(1), OptionalLong.of(2))).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.of(1), OptionalLong.of(2))).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.of(2), OptionalLong.of(3))).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.of(3), OptionalLong.of(5))).build()); }
@Test public void testEmptyBooleanStatsToColumnStatistics() { BooleanColumnStatsData emptyBooleanColumnStatsData = new BooleanColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(emptyBooleanColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty()); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty()))); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.empty()); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); }
@Test public void testBooleanStatsToColumnStatistics() { BooleanColumnStatsData booleanColumnStatsData = new BooleanColumnStatsData(); booleanColumnStatsData.setNumTrues(100); booleanColumnStatsData.setNumFalses(10); booleanColumnStatsData.setNumNulls(0); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(booleanColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty()); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.of(100), OptionalLong.of(10)))); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.of(0)); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); }
@Test public void testEmptyBooleanStatsToColumnStatistics() { BooleanColumnStatsData emptyBooleanColumnStatsData = new BooleanColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", BOOLEAN_TYPE_NAME, booleanStats(emptyBooleanColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty()); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.of(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty()))); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.empty()); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); }
@Test public void testMergeBooleanColumnStatistics() { assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.of(1), OptionalLong.of(2))).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.of(1), OptionalLong.of(2))).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.of(2), OptionalLong.of(3))).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.of(3), OptionalLong.of(5))).build()); }