private static Optional<DateStatistics> mergeDateStatistics(Optional<DateStatistics> first, Optional<DateStatistics> second) { // normally, either both or none is present if (first.isPresent() && second.isPresent()) { return Optional.of(new DateStatistics( reduce(first.get().getMin(), second.get().getMin(), MIN, true), reduce(first.get().getMax(), second.get().getMax(), MAX, true))); } return Optional.empty(); }
public static HiveColumnStatistics createDateColumnStatistics(Optional<LocalDate> min, Optional<LocalDate> max, OptionalLong nullsCount, OptionalLong distinctValuesCount) { return builder() .setDateStatistics(new DateStatistics(min, max)) .setNullsCount(nullsCount) .setDistinctValuesCount(distinctValuesCount) .build(); }
private static HiveColumnStatistics dateRange(Optional<String> min, Optional<String> max) { return HiveColumnStatistics.builder() .setDateStatistics(new DateStatistics(min.map(TestMetastoreHiveStatisticsProvider::parseDate), max.map(TestMetastoreHiveStatisticsProvider::parseDate))) .build(); }
private static void setMinMax(ConnectorSession session, DateTimeZone timeZone, Type type, Block min, Block max, HiveColumnStatistics.Builder result) { if (type.equals(BIGINT) || type.equals(INTEGER) || type.equals(SMALLINT) || type.equals(TINYINT)) { result.setIntegerStatistics(new IntegerStatistics(getIntegerValue(session, type, min), getIntegerValue(session, type, max))); } else if (type.equals(DOUBLE) || type.equals(REAL)) { result.setDoubleStatistics(new DoubleStatistics(getDoubleValue(session, type, min), getDoubleValue(session, type, max))); } else if (type.equals(DATE)) { result.setDateStatistics(new DateStatistics(getDateValue(session, type, min), getDateValue(session, type, max))); } else if (type.equals(TIMESTAMP)) { result.setIntegerStatistics(new IntegerStatistics(getTimestampValue(timeZone, min), getTimestampValue(timeZone, max))); } else if (type instanceof DecimalType) { result.setDecimalStatistics(new DecimalStatistics(getDecimalValue(session, type, min), getDecimalValue(session, type, max))); } else { throw new IllegalArgumentException("Unexpected type: " + type); } }
@Test public void testMergeDateColumnStatistics() { assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.empty(), Optional.empty())).build(), HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.empty(), Optional.empty())).build(), HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.empty(), Optional.empty())).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.of(LocalDate.ofEpochDay(1)), Optional.of(LocalDate.ofEpochDay(2)))).build(), HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.empty(), Optional.empty())).build(), HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.of(LocalDate.ofEpochDay(1)), Optional.of(LocalDate.ofEpochDay(2)))).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.of(LocalDate.ofEpochDay(1)), Optional.of(LocalDate.ofEpochDay(2)))).build(), HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.of(LocalDate.ofEpochDay(0)), Optional.of(LocalDate.ofEpochDay(3)))).build(), HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.of(LocalDate.ofEpochDay(0)), Optional.of(LocalDate.ofEpochDay(3)))).build()); }
@Test public void testEmptyDateStatsToColumnStatistics() { DateColumnStatsData emptyDateColumnStatsData = new DateColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DATE_TYPE_NAME, dateStats(emptyDateColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty()); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.of(new DateStatistics(Optional.empty(), Optional.empty()))); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.empty()); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); }
@Test public void testDateStatsToColumnStatistics() { DateColumnStatsData dateColumnStatsData = new DateColumnStatsData(); dateColumnStatsData.setLowValue(new Date(1000)); dateColumnStatsData.setHighValue(new Date(2000)); dateColumnStatsData.setNumNulls(1); dateColumnStatsData.setNumDVs(20); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DATE_TYPE_NAME, dateStats(dateColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(1000)); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.of(new DateStatistics(Optional.of(LocalDate.ofEpochDay(1000)), Optional.of(LocalDate.ofEpochDay(2000))))); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.of(1)); assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(19)); }
private static HiveColumnStatistics dateRange(Optional<String> min, Optional<String> max) { return HiveColumnStatistics.builder() .setDateStatistics(new DateStatistics(min.map(TestMetastoreHiveStatisticsProvider::parseDate), max.map(TestMetastoreHiveStatisticsProvider::parseDate))) .build(); }
@Test public void testMergeDateColumnStatistics() { assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.empty(), Optional.empty())).build(), HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.empty(), Optional.empty())).build(), HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.empty(), Optional.empty())).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.of(LocalDate.ofEpochDay(1)), Optional.of(LocalDate.ofEpochDay(2)))).build(), HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.empty(), Optional.empty())).build(), HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.of(LocalDate.ofEpochDay(1)), Optional.of(LocalDate.ofEpochDay(2)))).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.of(LocalDate.ofEpochDay(1)), Optional.of(LocalDate.ofEpochDay(2)))).build(), HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.of(LocalDate.ofEpochDay(0)), Optional.of(LocalDate.ofEpochDay(3)))).build(), HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.of(LocalDate.ofEpochDay(0)), Optional.of(LocalDate.ofEpochDay(3)))).build()); }
@Test public void testEmptyDateStatsToColumnStatistics() { DateColumnStatsData emptyDateColumnStatsData = new DateColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DATE_TYPE_NAME, dateStats(emptyDateColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty()); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.of(new DateStatistics(Optional.empty(), Optional.empty()))); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.empty()); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); }
@Test public void testDateStatsToColumnStatistics() { DateColumnStatsData dateColumnStatsData = new DateColumnStatsData(); dateColumnStatsData.setLowValue(new Date(1000)); dateColumnStatsData.setHighValue(new Date(2000)); dateColumnStatsData.setNumNulls(1); dateColumnStatsData.setNumDVs(20); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DATE_TYPE_NAME, dateStats(dateColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(1000)); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.of(new DateStatistics(Optional.of(LocalDate.ofEpochDay(1000)), Optional.of(LocalDate.ofEpochDay(2000))))); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.of(1)); assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(19)); }