private static Optional<DoubleStatistics> mergeDoubleStatistics(Optional<DoubleStatistics> first, Optional<DoubleStatistics> second) { // normally, either both or none is present if (first.isPresent() && second.isPresent()) { return Optional.of(new DoubleStatistics( reduce(first.get().getMin(), second.get().getMin(), MIN, true), reduce(first.get().getMax(), second.get().getMax(), MAX, true))); } return Optional.empty(); }
public static HiveColumnStatistics createDoubleColumnStatistics(OptionalDouble min, OptionalDouble max, OptionalLong nullsCount, OptionalLong distinctValuesCount) { return builder() .setDoubleStatistics(new DoubleStatistics(min, max)) .setNullsCount(nullsCount) .setDistinctValuesCount(distinctValuesCount) .build(); }
private static void setMinMax(ConnectorSession session, DateTimeZone timeZone, Type type, Block min, Block max, HiveColumnStatistics.Builder result) { if (type.equals(BIGINT) || type.equals(INTEGER) || type.equals(SMALLINT) || type.equals(TINYINT)) { result.setIntegerStatistics(new IntegerStatistics(getIntegerValue(session, type, min), getIntegerValue(session, type, max))); } else if (type.equals(DOUBLE) || type.equals(REAL)) { result.setDoubleStatistics(new DoubleStatistics(getDoubleValue(session, type, min), getDoubleValue(session, type, max))); } else if (type.equals(DATE)) { result.setDateStatistics(new DateStatistics(getDateValue(session, type, min), getDateValue(session, type, max))); } else if (type.equals(TIMESTAMP)) { result.setIntegerStatistics(new IntegerStatistics(getTimestampValue(timeZone, min), getTimestampValue(timeZone, max))); } else if (type instanceof DecimalType) { result.setDecimalStatistics(new DecimalStatistics(getDecimalValue(session, type, min), getDecimalValue(session, type, max))); } else { throw new IllegalArgumentException("Unexpected type: " + type); } }
private static HiveColumnStatistics doubleRange(OptionalDouble min, OptionalDouble max) { return HiveColumnStatistics.builder() .setDoubleStatistics(new DoubleStatistics(min, max)) .build(); }
@Test public void testEmptyDoubleStatsToColumnStatistics() { DoubleColumnStatsData emptyDoubleColumnStatsData = new DoubleColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(emptyDoubleColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty()); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.of(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty()))); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.empty()); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); }
@Test public void testDoubleStatsToColumnStatistics() { DoubleColumnStatsData doubleColumnStatsData = new DoubleColumnStatsData(); doubleColumnStatsData.setLowValue(0); doubleColumnStatsData.setHighValue(100); doubleColumnStatsData.setNumNulls(1); doubleColumnStatsData.setNumDVs(20); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(doubleColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(1000)); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.of(new DoubleStatistics(OptionalDouble.of(0), OptionalDouble.of(100)))); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.of(1)); assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(19)); }
@Test public void testMergeDoubleColumnStatistics() { assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty())).build(), HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty())).build(), HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty())).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.of(1), OptionalDouble.of(2))).build(), HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty())).build(), HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.of(1), OptionalDouble.of(2))).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.of(1), OptionalDouble.of(2))).build(), HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.of(0), OptionalDouble.of(3))).build(), HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.of(0), OptionalDouble.of(3))).build()); }
private static HiveColumnStatistics doubleRange(OptionalDouble min, OptionalDouble max) { return HiveColumnStatistics.builder() .setDoubleStatistics(new DoubleStatistics(min, max)) .build(); }
@Test public void testEmptyDoubleStatsToColumnStatistics() { DoubleColumnStatsData emptyDoubleColumnStatsData = new DoubleColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(emptyDoubleColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty()); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.of(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty()))); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.empty()); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); }
@Test public void testDoubleStatsToColumnStatistics() { DoubleColumnStatsData doubleColumnStatsData = new DoubleColumnStatsData(); doubleColumnStatsData.setLowValue(0); doubleColumnStatsData.setHighValue(100); doubleColumnStatsData.setNumNulls(1); doubleColumnStatsData.setNumDVs(20); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DOUBLE_TYPE_NAME, doubleStats(doubleColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(1000)); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.of(new DoubleStatistics(OptionalDouble.of(0), OptionalDouble.of(100)))); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.of(1)); assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(19)); }
@Test public void testMergeDoubleColumnStatistics() { assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty())).build(), HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty())).build(), HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty())).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.of(1), OptionalDouble.of(2))).build(), HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty())).build(), HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.of(1), OptionalDouble.of(2))).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.of(1), OptionalDouble.of(2))).build(), HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.of(0), OptionalDouble.of(3))).build(), HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.of(0), OptionalDouble.of(3))).build()); }