result.setMaxValueSizeInBytes(getIntegerValue(session, BIGINT, computedStatistics.get(MAX_VALUE_SIZE_IN_BYTES))); result.setTotalSizeInBytes(getIntegerValue(session, BIGINT, computedStatistics.get(TOTAL_SIZE_IN_BYTES))); result.setNullsCount(rowCount - BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0)); long numberOfDistinctValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_DISTINCT_VALUES), 0); if (numberOfDistinctValues > numberOfNonNullValues) { result.setDistinctValuesCount(numberOfNonNullValues); result.setDistinctValuesCount(numberOfDistinctValues); long numberOfTrue = BIGINT.getLong(computedStatistics.get(NUMBER_OF_TRUE_VALUES), 0); long numberOfNonNullValues = BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0); result.setBooleanStatistics(new BooleanStatistics(OptionalLong.of(numberOfTrue), OptionalLong.of(numberOfNonNullValues - numberOfTrue))); return result.build();
private static void setMinMax(ConnectorSession session, DateTimeZone timeZone, Type type, Block min, Block max, HiveColumnStatistics.Builder result) { if (type.equals(BIGINT) || type.equals(INTEGER) || type.equals(SMALLINT) || type.equals(TINYINT)) { result.setIntegerStatistics(new IntegerStatistics(getIntegerValue(session, type, min), getIntegerValue(session, type, max))); } else if (type.equals(DOUBLE) || type.equals(REAL)) { result.setDoubleStatistics(new DoubleStatistics(getDoubleValue(session, type, min), getDoubleValue(session, type, max))); } else if (type.equals(DATE)) { result.setDateStatistics(new DateStatistics(getDateValue(session, type, min), getDateValue(session, type, max))); } else if (type.equals(TIMESTAMP)) { result.setIntegerStatistics(new IntegerStatistics(getTimestampValue(timeZone, min), getTimestampValue(timeZone, max))); } else if (type instanceof DecimalType) { result.setDecimalStatistics(new DecimalStatistics(getDecimalValue(session, type, min), getDecimalValue(session, type, max))); } else { throw new IllegalArgumentException("Unexpected type: " + type); } }
PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)) .setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setMaxValueSizeInBytes(-1).build())) .build(), invalidColumnStatistics("maxValueSizeInBytes must be greater than or equal to zero: -1")); PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)) .setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setTotalSizeInBytes(-1).build())) .build(), invalidColumnStatistics("totalSizeInBytes must be greater than or equal to zero: -1")); PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)) .setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setNullsCount(-1).build())) .build(), invalidColumnStatistics("nullsCount must be greater than or equal to zero: -1")); PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)) .setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setNullsCount(1).build())) .build(), invalidColumnStatistics("nullsCount must be less than or equal to rowCount. nullsCount: 1. rowCount: 0.")); PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)) .setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setDistinctValuesCount(-1).build())) .build(), invalidColumnStatistics("distinctValuesCount must be greater than or equal to zero: -1"));
@Test public void testMergeGenericColumnStatistics() { assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDistinctValuesCount(OptionalLong.empty()).build(), HiveColumnStatistics.builder().setDistinctValuesCount(OptionalLong.empty()).build(), HiveColumnStatistics.builder().setDistinctValuesCount(OptionalLong.empty()).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDistinctValuesCount(OptionalLong.of(1)).build(), HiveColumnStatistics.builder().setDistinctValuesCount(OptionalLong.empty()).build(), HiveColumnStatistics.builder().setDistinctValuesCount(OptionalLong.empty()).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDistinctValuesCount(OptionalLong.of(1)).build(), HiveColumnStatistics.builder().setDistinctValuesCount(OptionalLong.of(2)).build(), HiveColumnStatistics.builder().setDistinctValuesCount(OptionalLong.of(2)).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setNullsCount(OptionalLong.empty()).build(), HiveColumnStatistics.builder().setNullsCount(OptionalLong.empty()).build(), HiveColumnStatistics.builder().setNullsCount(OptionalLong.empty()).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setNullsCount(OptionalLong.of(1)).build(), HiveColumnStatistics.builder().setNullsCount(OptionalLong.empty()).build(), HiveColumnStatistics.builder().setNullsCount(OptionalLong.empty()).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setNullsCount(OptionalLong.of(1)).build(), HiveColumnStatistics.builder().setNullsCount(OptionalLong.of(2)).build(), HiveColumnStatistics.builder().setNullsCount(OptionalLong.of(3)).build()); }
@Test public void testMergeStringColumnStatistics() { assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setMaxValueSizeInBytes(OptionalLong.empty()).build(), HiveColumnStatistics.builder().setMaxValueSizeInBytes(OptionalLong.empty()).build(), HiveColumnStatistics.builder().setMaxValueSizeInBytes(OptionalLong.empty()).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setMaxValueSizeInBytes(OptionalLong.of(1)).build(), HiveColumnStatistics.builder().setMaxValueSizeInBytes(OptionalLong.empty()).build(), HiveColumnStatistics.builder().setMaxValueSizeInBytes(OptionalLong.of(1)).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setMaxValueSizeInBytes(OptionalLong.of(2)).build(), HiveColumnStatistics.builder().setMaxValueSizeInBytes(OptionalLong.of(3)).build(), HiveColumnStatistics.builder().setMaxValueSizeInBytes(OptionalLong.of(3)).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setTotalSizeInBytes(OptionalLong.empty()).build(), HiveColumnStatistics.builder().setTotalSizeInBytes(OptionalLong.empty()).build(), HiveColumnStatistics.builder().setTotalSizeInBytes(OptionalLong.empty()).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setTotalSizeInBytes(OptionalLong.of(1)).build(), HiveColumnStatistics.builder().setTotalSizeInBytes(OptionalLong.empty()).build(), HiveColumnStatistics.builder().setTotalSizeInBytes(OptionalLong.of(1)).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setTotalSizeInBytes(OptionalLong.of(2)).build(), HiveColumnStatistics.builder().setTotalSizeInBytes(OptionalLong.of(3)).build(), HiveColumnStatistics.builder().setTotalSizeInBytes(OptionalLong.of(5)).build()); }
@Test public void testMergeDecimalColumnStatistics() { assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.empty(), Optional.empty())).build(), HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.empty(), Optional.empty())).build(), HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.empty(), Optional.empty())).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.of(BigDecimal.valueOf(1)), Optional.of(BigDecimal.valueOf(2)))).build(), HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.empty(), Optional.empty())).build(), HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.of(BigDecimal.valueOf(1)), Optional.of(BigDecimal.valueOf(2)))).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.of(BigDecimal.valueOf(1)), Optional.of(BigDecimal.valueOf(2)))).build(), HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.of(BigDecimal.valueOf(0)), Optional.of(BigDecimal.valueOf(3)))).build(), HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.of(BigDecimal.valueOf(0)), Optional.of(BigDecimal.valueOf(3)))).build()); }
@Test public void testMergeIntegerColumnStatistics() { assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setIntegerStatistics(new IntegerStatistics(OptionalLong.empty(), OptionalLong.empty())).build(), HiveColumnStatistics.builder().setIntegerStatistics(new IntegerStatistics(OptionalLong.empty(), OptionalLong.empty())).build(), HiveColumnStatistics.builder().setIntegerStatistics(new IntegerStatistics(OptionalLong.empty(), OptionalLong.empty())).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setIntegerStatistics(new IntegerStatistics(OptionalLong.of(1), OptionalLong.of(2))).build(), HiveColumnStatistics.builder().setIntegerStatistics(new IntegerStatistics(OptionalLong.empty(), OptionalLong.empty())).build(), HiveColumnStatistics.builder().setIntegerStatistics(new IntegerStatistics(OptionalLong.of(1), OptionalLong.of(2))).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setIntegerStatistics(new IntegerStatistics(OptionalLong.of(1), OptionalLong.of(2))).build(), HiveColumnStatistics.builder().setIntegerStatistics(new IntegerStatistics(OptionalLong.of(0), OptionalLong.of(3))).build(), HiveColumnStatistics.builder().setIntegerStatistics(new IntegerStatistics(OptionalLong.of(0), OptionalLong.of(3))).build()); }
@Test public void testMergeDateColumnStatistics() { assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.empty(), Optional.empty())).build(), HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.empty(), Optional.empty())).build(), HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.empty(), Optional.empty())).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.of(LocalDate.ofEpochDay(1)), Optional.of(LocalDate.ofEpochDay(2)))).build(), HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.empty(), Optional.empty())).build(), HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.of(LocalDate.ofEpochDay(1)), Optional.of(LocalDate.ofEpochDay(2)))).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.of(LocalDate.ofEpochDay(1)), Optional.of(LocalDate.ofEpochDay(2)))).build(), HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.of(LocalDate.ofEpochDay(0)), Optional.of(LocalDate.ofEpochDay(3)))).build(), HiveColumnStatistics.builder().setDateStatistics(new DateStatistics(Optional.of(LocalDate.ofEpochDay(0)), Optional.of(LocalDate.ofEpochDay(3)))).build()); }
@Test public void testMergeBooleanColumnStatistics() { assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.of(1), OptionalLong.of(2))).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.empty(), OptionalLong.empty())).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.of(1), OptionalLong.of(2))).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.of(2), OptionalLong.of(3))).build(), HiveColumnStatistics.builder().setBooleanStatistics(new BooleanStatistics(OptionalLong.of(3), OptionalLong.of(5))).build()); }
@Test public void testMergeDoubleColumnStatistics() { assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty())).build(), HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty())).build(), HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty())).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.of(1), OptionalDouble.of(2))).build(), HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.empty(), OptionalDouble.empty())).build(), HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.of(1), OptionalDouble.of(2))).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.of(1), OptionalDouble.of(2))).build(), HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.of(0), OptionalDouble.of(3))).build(), HiveColumnStatistics.builder().setDoubleStatistics(new DoubleStatistics(OptionalDouble.of(0), OptionalDouble.of(3))).build()); }
private static PartitionStatistics nullsCount(long nullsCount) { return new PartitionStatistics(HiveBasicStatistics.createEmptyStatistics(), ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setNullsCount(nullsCount).build())); }
private static PartitionStatistics dataSize(long dataSize) { return new PartitionStatistics(HiveBasicStatistics.createEmptyStatistics(), ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setTotalSizeInBytes(dataSize).build())); }
private static PartitionStatistics rowsCountAndDataSize(long rowsCount, long dataSize) { return new PartitionStatistics( new HiveBasicStatistics(0, rowsCount, 0, 0), ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setTotalSizeInBytes(dataSize).build())); }
private static HiveColumnStatistics decimalRange(Optional<BigDecimal> min, Optional<BigDecimal> max) { return HiveColumnStatistics.builder() .setDecimalStatistics(new DecimalStatistics(min, max)) .build(); } }
private static HiveColumnStatistics doubleRange(OptionalDouble min, OptionalDouble max) { return HiveColumnStatistics.builder() .setDoubleStatistics(new DoubleStatistics(min, max)) .build(); }
private static HiveColumnStatistics integerRange(OptionalLong min, OptionalLong max) { return HiveColumnStatistics.builder() .setIntegerStatistics(new IntegerStatistics(min, max)) .build(); }
private static HiveColumnStatistics distinctValuesCount(long count) { return HiveColumnStatistics.builder() .setDistinctValuesCount(count) .build(); }
private static PartitionStatistics rowsCountAndNullsCount(long rowsCount, long nullsCount) { return new PartitionStatistics( new HiveBasicStatistics(0, rowsCount, 0, 0), ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setNullsCount(nullsCount).build())); }
private static HiveColumnStatistics dateRange(Optional<String> min, Optional<String> max) { return HiveColumnStatistics.builder() .setDateStatistics(new DateStatistics(min.map(TestMetastoreHiveStatisticsProvider::parseDate), max.map(TestMetastoreHiveStatisticsProvider::parseDate))) .build(); }
PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)) .setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setMaxValueSizeInBytes(-1).build())) .build(), invalidColumnStatistics("maxValueSizeInBytes must be greater than or equal to zero: -1")); PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)) .setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setTotalSizeInBytes(-1).build())) .build(), invalidColumnStatistics("totalSizeInBytes must be greater than or equal to zero: -1")); PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)) .setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setNullsCount(-1).build())) .build(), invalidColumnStatistics("nullsCount must be greater than or equal to zero: -1")); PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)) .setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setNullsCount(1).build())) .build(), invalidColumnStatistics("nullsCount must be less than or equal to rowCount. nullsCount: 1. rowCount: 0.")); PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)) .setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setDistinctValuesCount(-1).build())) .build(), invalidColumnStatistics("distinctValuesCount must be greater than or equal to zero: -1"));