public static HiveColumnStatistics createDecimalColumnStatistics(Optional<BigDecimal> min, Optional<BigDecimal> max, OptionalLong nullsCount, OptionalLong distinctValuesCount) { return builder() .setDecimalStatistics(new DecimalStatistics(min, max)) .setNullsCount(nullsCount) .setDistinctValuesCount(distinctValuesCount) .build(); }
private static Optional<DecimalStatistics> mergeDecimalStatistics(Optional<DecimalStatistics> first, Optional<DecimalStatistics> second) { // normally, either both or none is present if (first.isPresent() && second.isPresent()) { return Optional.of(new DecimalStatistics( reduce(first.get().getMin(), second.get().getMin(), MIN, true), reduce(first.get().getMax(), second.get().getMax(), MAX, true))); } return Optional.empty(); }
private static Optional<DoubleRange> createDecimalRange(DecimalStatistics statistics) { if (statistics.getMin().isPresent() && statistics.getMax().isPresent()) { return Optional.of(new DoubleRange(statistics.getMin().get().doubleValue(), statistics.getMax().get().doubleValue())); } return Optional.empty(); }
private static ColumnStatisticsObj createDecimalStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) { DecimalColumnStatsData data = new DecimalColumnStatsData(); statistics.getDecimalStatistics().ifPresent(decimalStatistics -> { decimalStatistics.getMin().ifPresent(value -> data.setLowValue(toMetastoreDecimal(value))); decimalStatistics.getMax().ifPresent(value -> data.setHighValue(toMetastoreDecimal(value))); }); statistics.getNullsCount().ifPresent(data::setNumNulls); toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); return new ColumnStatisticsObj(columnName, columnType.toString(), decimalStats(data)); }
@Test public void testMergeDecimalColumnStatistics() { assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.empty(), Optional.empty())).build(), HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.empty(), Optional.empty())).build(), HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.empty(), Optional.empty())).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.of(BigDecimal.valueOf(1)), Optional.of(BigDecimal.valueOf(2)))).build(), HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.empty(), Optional.empty())).build(), HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.of(BigDecimal.valueOf(1)), Optional.of(BigDecimal.valueOf(2)))).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.of(BigDecimal.valueOf(1)), Optional.of(BigDecimal.valueOf(2)))).build(), HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.of(BigDecimal.valueOf(0)), Optional.of(BigDecimal.valueOf(3)))).build(), HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.of(BigDecimal.valueOf(0)), Optional.of(BigDecimal.valueOf(3)))).build()); }
private static void setMinMax(ConnectorSession session, DateTimeZone timeZone, Type type, Block min, Block max, HiveColumnStatistics.Builder result) { if (type.equals(BIGINT) || type.equals(INTEGER) || type.equals(SMALLINT) || type.equals(TINYINT)) { result.setIntegerStatistics(new IntegerStatistics(getIntegerValue(session, type, min), getIntegerValue(session, type, max))); } else if (type.equals(DOUBLE) || type.equals(REAL)) { result.setDoubleStatistics(new DoubleStatistics(getDoubleValue(session, type, min), getDoubleValue(session, type, max))); } else if (type.equals(DATE)) { result.setDateStatistics(new DateStatistics(getDateValue(session, type, min), getDateValue(session, type, max))); } else if (type.equals(TIMESTAMP)) { result.setIntegerStatistics(new IntegerStatistics(getTimestampValue(timeZone, min), getTimestampValue(timeZone, max))); } else if (type instanceof DecimalType) { result.setDecimalStatistics(new DecimalStatistics(getDecimalValue(session, type, min), getDecimalValue(session, type, max))); } else { throw new IllegalArgumentException("Unexpected type: " + type); } }
private static HiveColumnStatistics decimalRange(Optional<BigDecimal> min, Optional<BigDecimal> max) { return HiveColumnStatistics.builder() .setDecimalStatistics(new DecimalStatistics(min, max)) .build(); } }
@Test public void testDecimalStatsToColumnStatistics() { DecimalColumnStatsData decimalColumnStatsData = new DecimalColumnStatsData(); BigDecimal low = new BigDecimal("0"); decimalColumnStatsData.setLowValue(new Decimal(ByteBuffer.wrap(low.unscaledValue().toByteArray()), (short) low.scale())); BigDecimal high = new BigDecimal("100"); decimalColumnStatsData.setHighValue(new Decimal(ByteBuffer.wrap(high.unscaledValue().toByteArray()), (short) high.scale())); decimalColumnStatsData.setNumNulls(1); decimalColumnStatsData.setNumDVs(20); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DECIMAL_TYPE_NAME, decimalStats(decimalColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(1000)); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.of(new DecimalStatistics(Optional.of(low), Optional.of(high)))); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.of(1)); assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(19)); }
@Test public void testEmptyDecimalStatsToColumnStatistics() { DecimalColumnStatsData emptyDecimalColumnStatsData = new DecimalColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DECIMAL_TYPE_NAME, decimalStats(emptyDecimalColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty()); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.of(new DecimalStatistics(Optional.empty(), Optional.empty()))); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.empty()); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); }
@Test public void testMergeDecimalColumnStatistics() { assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.empty(), Optional.empty())).build(), HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.empty(), Optional.empty())).build(), HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.empty(), Optional.empty())).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.of(BigDecimal.valueOf(1)), Optional.of(BigDecimal.valueOf(2)))).build(), HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.empty(), Optional.empty())).build(), HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.of(BigDecimal.valueOf(1)), Optional.of(BigDecimal.valueOf(2)))).build()); assertMergeHiveColumnStatistics( HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.of(BigDecimal.valueOf(1)), Optional.of(BigDecimal.valueOf(2)))).build(), HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.of(BigDecimal.valueOf(0)), Optional.of(BigDecimal.valueOf(3)))).build(), HiveColumnStatistics.builder().setDecimalStatistics(new DecimalStatistics(Optional.of(BigDecimal.valueOf(0)), Optional.of(BigDecimal.valueOf(3)))).build()); }
private static HiveColumnStatistics decimalRange(Optional<BigDecimal> min, Optional<BigDecimal> max) { return HiveColumnStatistics.builder() .setDecimalStatistics(new DecimalStatistics(min, max)) .build(); } }
@Test public void testDecimalStatsToColumnStatistics() { DecimalColumnStatsData decimalColumnStatsData = new DecimalColumnStatsData(); BigDecimal low = new BigDecimal("0"); decimalColumnStatsData.setLowValue(new Decimal(ByteBuffer.wrap(low.unscaledValue().toByteArray()), (short) low.scale())); BigDecimal high = new BigDecimal("100"); decimalColumnStatsData.setHighValue(new Decimal(ByteBuffer.wrap(high.unscaledValue().toByteArray()), (short) high.scale())); decimalColumnStatsData.setNumNulls(1); decimalColumnStatsData.setNumDVs(20); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DECIMAL_TYPE_NAME, decimalStats(decimalColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.of(1000)); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.of(new DecimalStatistics(Optional.of(low), Optional.of(high)))); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.of(1)); assertEquals(actual.getDistinctValuesCount(), OptionalLong.of(19)); }
@Test public void testEmptyDecimalStatsToColumnStatistics() { DecimalColumnStatsData emptyDecimalColumnStatsData = new DecimalColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", DECIMAL_TYPE_NAME, decimalStats(emptyDecimalColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty()); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.of(new DecimalStatistics(Optional.empty(), Optional.empty()))); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.empty()); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); }