public static HiveColumnStatistics createBinaryColumnStatistics(OptionalLong maxValueSizeInBytes, OptionalLong totalSizeInBytes, OptionalLong nullsCount) { return builder() .setMaxValueSizeInBytes(maxValueSizeInBytes) .setTotalSizeInBytes(totalSizeInBytes) .setNullsCount(nullsCount) .build(); }
@Test public void testMergeHiveColumnStatisticsMap() { Map<String, HiveColumnStatistics> first = ImmutableMap.of( "column1", createIntegerColumnStatistics(OptionalLong.of(1), OptionalLong.of(2), OptionalLong.of(3), OptionalLong.of(4)), "column2", createDoubleColumnStatistics(OptionalDouble.of(2), OptionalDouble.of(3), OptionalLong.of(4), OptionalLong.of(5)), "column3", createBinaryColumnStatistics(OptionalLong.of(5), OptionalLong.of(5), OptionalLong.of(10)), "column4", createBooleanColumnStatistics(OptionalLong.of(1), OptionalLong.of(2), OptionalLong.of(3))); Map<String, HiveColumnStatistics> second = ImmutableMap.of( "column5", createIntegerColumnStatistics(OptionalLong.of(1), OptionalLong.of(2), OptionalLong.of(3), OptionalLong.of(4)), "column2", createDoubleColumnStatistics(OptionalDouble.of(1), OptionalDouble.of(4), OptionalLong.of(4), OptionalLong.of(6)), "column3", createBinaryColumnStatistics(OptionalLong.of(6), OptionalLong.of(5), OptionalLong.of(10)), "column6", createBooleanColumnStatistics(OptionalLong.of(1), OptionalLong.of(2), OptionalLong.of(3))); Map<String, HiveColumnStatistics> expected = ImmutableMap.of( "column2", createDoubleColumnStatistics(OptionalDouble.of(1), OptionalDouble.of(4), OptionalLong.of(8), OptionalLong.of(6)), "column3", createBinaryColumnStatistics(OptionalLong.of(6), OptionalLong.of(10), OptionalLong.of(20))); assertThat(merge(first, second)).isEqualTo(expected); assertThat(merge(ImmutableMap.of(), ImmutableMap.of())).isEqualTo(ImmutableMap.of()); }
private Builder(HiveColumnStatistics other) { this.integerStatistics = other.getIntegerStatistics(); this.doubleStatistics = other.getDoubleStatistics(); this.decimalStatistics = other.getDecimalStatistics(); this.dateStatistics = other.getDateStatistics(); this.booleanStatistics = other.getBooleanStatistics(); this.maxValueSizeInBytes = other.getMaxValueSizeInBytes(); this.totalSizeInBytes = other.getTotalSizeInBytes(); this.nullsCount = other.getNullsCount(); this.distinctValuesCount = other.getDistinctValuesCount(); }
private static Optional<DoubleRange> createRange(Type type, HiveColumnStatistics statistics) { if (type.equals(BIGINT) || type.equals(INTEGER) || type.equals(SMALLINT) || type.equals(TINYINT)) { return statistics.getIntegerStatistics().flatMap(integerStatistics -> createIntegerRange(type, integerStatistics)); } if (type.equals(DOUBLE) || type.equals(REAL)) { return statistics.getDoubleStatistics().flatMap(MetastoreHiveStatisticsProvider::createDoubleRange); } if (type.equals(DATE)) { return statistics.getDateStatistics().flatMap(MetastoreHiveStatisticsProvider::createDateRange); } if (type instanceof DecimalType) { return statistics.getDecimalStatistics().flatMap(MetastoreHiveStatisticsProvider::createDecimalRange); } throw new IllegalArgumentException("Unexpected type: " + type); }
private static OptionalLong getDistinctValuesCount(HiveColumnStatistics statistics) { if (statistics.getBooleanStatistics().isPresent() && statistics.getBooleanStatistics().get().getFalseCount().isPresent() && statistics.getBooleanStatistics().get().getTrueCount().isPresent()) { long falseCount = statistics.getBooleanStatistics().get().getFalseCount().getAsLong(); long trueCount = statistics.getBooleanStatistics().get().getTrueCount().getAsLong(); return OptionalLong.of((falseCount > 0 ? 1 : 0) + (trueCount > 0 ? 1 : 0)); } if (statistics.getDistinctValuesCount().isPresent()) { return statistics.getDistinctValuesCount(); } return OptionalLong.empty(); }
PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)) .setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setMaxValueSizeInBytes(-1).build())) .build(), invalidColumnStatistics("maxValueSizeInBytes must be greater than or equal to zero: -1")); PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)) .setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setTotalSizeInBytes(-1).build())) .build(), invalidColumnStatistics("totalSizeInBytes must be greater than or equal to zero: -1")); PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)) .setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setNullsCount(-1).build())) .build(), invalidColumnStatistics("nullsCount must be greater than or equal to zero: -1")); PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)) .setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setNullsCount(1).build())) .build(), invalidColumnStatistics("nullsCount must be less than or equal to rowCount. nullsCount: 1. rowCount: 0.")); PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)) .setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setDistinctValuesCount(-1).build())) .build(), invalidColumnStatistics("distinctValuesCount must be greater than or equal to zero: -1"));
OptionalLong nullsCount = longStatsData.isSetNumNulls() ? fromMetastoreNullsCount(longStatsData.getNumNulls()) : OptionalLong.empty(); OptionalLong distinctValuesCount = longStatsData.isSetNumDVs() ? OptionalLong.of(longStatsData.getNumDVs()) : OptionalLong.empty(); return createIntegerColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); OptionalLong nullsCount = doubleStatsData.isSetNumNulls() ? fromMetastoreNullsCount(doubleStatsData.getNumNulls()) : OptionalLong.empty(); OptionalLong distinctValuesCount = doubleStatsData.isSetNumDVs() ? OptionalLong.of(doubleStatsData.getNumDVs()) : OptionalLong.empty(); return createDoubleColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); OptionalLong nullsCount = decimalStatsData.isSetNumNulls() ? fromMetastoreNullsCount(decimalStatsData.getNumNulls()) : OptionalLong.empty(); OptionalLong distinctValuesCount = decimalStatsData.isSetNumDVs() ? OptionalLong.of(decimalStatsData.getNumDVs()) : OptionalLong.empty(); return createDecimalColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); OptionalLong nullsCount = dateStatsData.isSetNumNulls() ? fromMetastoreNullsCount(dateStatsData.getNumNulls()) : OptionalLong.empty(); OptionalLong distinctValuesCount = dateStatsData.isSetNumDVs() ? OptionalLong.of(dateStatsData.getNumDVs()) : OptionalLong.empty(); return createDateColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); return createBooleanColumnStatistics( booleanStatsData.isSetNumTrues() ? OptionalLong.of(booleanStatsData.getNumTrues()) : OptionalLong.empty(), booleanStatsData.isSetNumFalses() ? OptionalLong.of(booleanStatsData.getNumFalses()) : OptionalLong.empty(), OptionalLong nullsCount = stringStatsData.isSetNumNulls() ? fromMetastoreNullsCount(stringStatsData.getNumNulls()) : OptionalLong.empty(); OptionalLong distinctValuesCount = stringStatsData.isSetNumDVs() ? OptionalLong.of(stringStatsData.getNumDVs()) : OptionalLong.empty(); return createStringColumnStatistics( maxColumnLength, getTotalSizeInBytes(averageColumnLength, rowCount, nullsCount), OptionalDouble averageColumnLength = binaryStatsData.isSetAvgColLen() ? OptionalDouble.of(binaryStatsData.getAvgColLen()) : OptionalDouble.empty(); OptionalLong nullsCount = binaryStatsData.isSetNumNulls() ? fromMetastoreNullsCount(binaryStatsData.getNumNulls()) : OptionalLong.empty(); return createBinaryColumnStatistics(
private static ColumnStatisticsObj createDateStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) { DateColumnStatsData data = new DateColumnStatsData(); statistics.getDateStatistics().ifPresent(dateStatistics -> { dateStatistics.getMin().ifPresent(value -> data.setLowValue(toMetastoreDate(value))); dateStatistics.getMax().ifPresent(value -> data.setHighValue(toMetastoreDate(value))); }); statistics.getNullsCount().ifPresent(data::setNumNulls); toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); return new ColumnStatisticsObj(columnName, columnType.toString(), dateStats(data)); }
private static ColumnStatisticsObj createDecimalStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) { DecimalColumnStatsData data = new DecimalColumnStatsData(); statistics.getDecimalStatistics().ifPresent(decimalStatistics -> { decimalStatistics.getMin().ifPresent(value -> data.setLowValue(toMetastoreDecimal(value))); decimalStatistics.getMax().ifPresent(value -> data.setHighValue(toMetastoreDecimal(value))); }); statistics.getNullsCount().ifPresent(data::setNumNulls); toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); return new ColumnStatisticsObj(columnName, columnType.toString(), decimalStats(data)); }
private static ColumnStatisticsObj createBooleanStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) { BooleanColumnStatsData data = new BooleanColumnStatsData(); statistics.getNullsCount().ifPresent(data::setNumNulls); statistics.getBooleanStatistics().ifPresent(booleanStatistics -> { booleanStatistics.getFalseCount().ifPresent(data::setNumFalses); booleanStatistics.getTrueCount().ifPresent(data::setNumTrues); }); return new ColumnStatisticsObj(columnName, columnType.toString(), booleanStats(data)); }
@Test public void testCalculateDistinctValuesCount() { assertEquals(calculateDistinctValuesCount(ImmutableList.of()), Estimate.unknown()); assertEquals(calculateDistinctValuesCount(ImmutableList.of(HiveColumnStatistics.empty())), Estimate.unknown()); assertEquals(calculateDistinctValuesCount(ImmutableList.of(HiveColumnStatistics.empty(), HiveColumnStatistics.empty())), Estimate.unknown()); assertEquals(calculateDistinctValuesCount(ImmutableList.of(distinctValuesCount(1))), Estimate.of(1)); assertEquals(calculateDistinctValuesCount(ImmutableList.of(distinctValuesCount(1), distinctValuesCount(2))), Estimate.of(2)); assertEquals(calculateDistinctValuesCount(ImmutableList.of(distinctValuesCount(1), HiveColumnStatistics.empty())), Estimate.of(1)); assertEquals(calculateDistinctValuesCount(ImmutableList.of(createBooleanColumnStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty()))), Estimate.unknown()); assertEquals(calculateDistinctValuesCount(ImmutableList.of(createBooleanColumnStatistics(OptionalLong.of(1), OptionalLong.of(0), OptionalLong.empty()))), Estimate.of(1)); assertEquals(calculateDistinctValuesCount(ImmutableList.of(createBooleanColumnStatistics(OptionalLong.of(10), OptionalLong.empty(), OptionalLong.empty()))), Estimate.unknown()); assertEquals(calculateDistinctValuesCount(ImmutableList.of(createBooleanColumnStatistics(OptionalLong.of(10), OptionalLong.of(10), OptionalLong.empty()))), Estimate.of(2)); assertEquals(calculateDistinctValuesCount(ImmutableList.of(createBooleanColumnStatistics(OptionalLong.empty(), OptionalLong.of(10), OptionalLong.empty()))), Estimate.unknown()); assertEquals(calculateDistinctValuesCount(ImmutableList.of(createBooleanColumnStatistics(OptionalLong.of(0), OptionalLong.of(10), OptionalLong.empty()))), Estimate.of(1)); assertEquals(calculateDistinctValuesCount(ImmutableList.of(createBooleanColumnStatistics(OptionalLong.of(0), OptionalLong.of(0), OptionalLong.empty()))), Estimate.of(0)); assertEquals( calculateDistinctValuesCount(ImmutableList.of( createBooleanColumnStatistics(OptionalLong.of(0), OptionalLong.of(10), OptionalLong.empty()), createBooleanColumnStatistics(OptionalLong.of(1), OptionalLong.of(10), OptionalLong.empty()))), Estimate.of(2)); }
@Test public void testGetTableStatisticsUnpartitioned() { PartitionStatistics statistics = PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())) .setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))) .build(); MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((table, hivePartitions) -> ImmutableMap.of(UNPARTITIONED_ID, statistics)); TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty()); TableStatistics expected = TableStatistics.builder() .setRowCount(Estimate.of(1000)) .setColumnStatistics( columnHandle, ColumnStatistics.builder() .setRange(new DoubleRange(-100, 100)) .setNullsFraction(Estimate.of(0.5)) .setDistinctValuesCount(Estimate.of(300)) .build()) .build(); assertEquals( statisticsProvider.getTableStatistics( session, TABLE, ImmutableMap.of(COLUMN, columnHandle), ImmutableMap.of(COLUMN, BIGINT), ImmutableList.of(new HivePartition(TABLE))), expected); }
PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)) .setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setMaxValueSizeInBytes(-1).build())) .build(), invalidColumnStatistics("maxValueSizeInBytes must be greater than or equal to zero: -1")); PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)) .setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setTotalSizeInBytes(-1).build())) .build(), invalidColumnStatistics("totalSizeInBytes must be greater than or equal to zero: -1")); PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)) .setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setNullsCount(-1).build())) .build(), invalidColumnStatistics("nullsCount must be greater than or equal to zero: -1")); PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)) .setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setNullsCount(1).build())) .build(), invalidColumnStatistics("nullsCount must be less than or equal to rowCount. nullsCount: 1. rowCount: 0.")); PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(0, 0, 0, 0)) .setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.builder().setDistinctValuesCount(-1).build())) .build(), invalidColumnStatistics("distinctValuesCount must be greater than or equal to zero: -1"));
@Test public void testCalculateDistinctValuesCount() { assertEquals(calculateDistinctValuesCount(ImmutableList.of()), Estimate.unknown()); assertEquals(calculateDistinctValuesCount(ImmutableList.of(HiveColumnStatistics.empty())), Estimate.unknown()); assertEquals(calculateDistinctValuesCount(ImmutableList.of(HiveColumnStatistics.empty(), HiveColumnStatistics.empty())), Estimate.unknown()); assertEquals(calculateDistinctValuesCount(ImmutableList.of(distinctValuesCount(1))), Estimate.of(1)); assertEquals(calculateDistinctValuesCount(ImmutableList.of(distinctValuesCount(1), distinctValuesCount(2))), Estimate.of(2)); assertEquals(calculateDistinctValuesCount(ImmutableList.of(distinctValuesCount(1), HiveColumnStatistics.empty())), Estimate.of(1)); assertEquals(calculateDistinctValuesCount(ImmutableList.of(createBooleanColumnStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty()))), Estimate.unknown()); assertEquals(calculateDistinctValuesCount(ImmutableList.of(createBooleanColumnStatistics(OptionalLong.of(1), OptionalLong.of(0), OptionalLong.empty()))), Estimate.of(1)); assertEquals(calculateDistinctValuesCount(ImmutableList.of(createBooleanColumnStatistics(OptionalLong.of(10), OptionalLong.empty(), OptionalLong.empty()))), Estimate.unknown()); assertEquals(calculateDistinctValuesCount(ImmutableList.of(createBooleanColumnStatistics(OptionalLong.of(10), OptionalLong.of(10), OptionalLong.empty()))), Estimate.of(2)); assertEquals(calculateDistinctValuesCount(ImmutableList.of(createBooleanColumnStatistics(OptionalLong.empty(), OptionalLong.of(10), OptionalLong.empty()))), Estimate.unknown()); assertEquals(calculateDistinctValuesCount(ImmutableList.of(createBooleanColumnStatistics(OptionalLong.of(0), OptionalLong.of(10), OptionalLong.empty()))), Estimate.of(1)); assertEquals(calculateDistinctValuesCount(ImmutableList.of(createBooleanColumnStatistics(OptionalLong.of(0), OptionalLong.of(0), OptionalLong.empty()))), Estimate.of(0)); assertEquals( calculateDistinctValuesCount(ImmutableList.of( createBooleanColumnStatistics(OptionalLong.of(0), OptionalLong.of(10), OptionalLong.empty()), createBooleanColumnStatistics(OptionalLong.of(1), OptionalLong.of(10), OptionalLong.empty()))), Estimate.of(2)); }
PartitionStatistics statistics = PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())) .setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))) .build(); MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((table, hivePartitions) -> ImmutableMap.of(partitionName, statistics));
public static HiveColumnStatistics merge(HiveColumnStatistics first, HiveColumnStatistics second) { return new HiveColumnStatistics( mergeIntegerStatistics(first.getIntegerStatistics(), second.getIntegerStatistics()), mergeDoubleStatistics(first.getDoubleStatistics(), second.getDoubleStatistics()), mergeDecimalStatistics(first.getDecimalStatistics(), second.getDecimalStatistics()), mergeDateStatistics(first.getDateStatistics(), second.getDateStatistics()), mergeBooleanStatistics(first.getBooleanStatistics(), second.getBooleanStatistics()), reduce(first.getMaxValueSizeInBytes(), second.getMaxValueSizeInBytes(), MAX, true), reduce(first.getTotalSizeInBytes(), second.getTotalSizeInBytes(), ADD, true), reduce(first.getNullsCount(), second.getNullsCount(), ADD, false), reduce(first.getDistinctValuesCount(), second.getDistinctValuesCount(), MAX, false)); }
public static HiveColumnStatistics createBooleanColumnStatistics(OptionalLong trueCount, OptionalLong falseCount, OptionalLong nullsCount) { return builder() .setBooleanStatistics(new BooleanStatistics(trueCount, falseCount)) .setNullsCount(nullsCount) .build(); }
@Test public void testMergeHiveColumnStatisticsMap() { Map<String, HiveColumnStatistics> first = ImmutableMap.of( "column1", createIntegerColumnStatistics(OptionalLong.of(1), OptionalLong.of(2), OptionalLong.of(3), OptionalLong.of(4)), "column2", createDoubleColumnStatistics(OptionalDouble.of(2), OptionalDouble.of(3), OptionalLong.of(4), OptionalLong.of(5)), "column3", createBinaryColumnStatistics(OptionalLong.of(5), OptionalLong.of(5), OptionalLong.of(10)), "column4", createBooleanColumnStatistics(OptionalLong.of(1), OptionalLong.of(2), OptionalLong.of(3))); Map<String, HiveColumnStatistics> second = ImmutableMap.of( "column5", createIntegerColumnStatistics(OptionalLong.of(1), OptionalLong.of(2), OptionalLong.of(3), OptionalLong.of(4)), "column2", createDoubleColumnStatistics(OptionalDouble.of(1), OptionalDouble.of(4), OptionalLong.of(4), OptionalLong.of(6)), "column3", createBinaryColumnStatistics(OptionalLong.of(6), OptionalLong.of(5), OptionalLong.of(10)), "column6", createBooleanColumnStatistics(OptionalLong.of(1), OptionalLong.of(2), OptionalLong.of(3))); Map<String, HiveColumnStatistics> expected = ImmutableMap.of( "column2", createDoubleColumnStatistics(OptionalDouble.of(1), OptionalDouble.of(4), OptionalLong.of(8), OptionalLong.of(6)), "column3", createBinaryColumnStatistics(OptionalLong.of(6), OptionalLong.of(10), OptionalLong.of(20))); assertThat(merge(first, second)).isEqualTo(expected); assertThat(merge(ImmutableMap.of(), ImmutableMap.of())).isEqualTo(ImmutableMap.of()); }
@Test public void testGetTableStatisticsUnpartitioned() { PartitionStatistics statistics = PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())) .setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))) .build(); MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((table, hivePartitions) -> ImmutableMap.of(UNPARTITIONED_ID, statistics)); TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty()); TableStatistics expected = TableStatistics.builder() .setRowCount(Estimate.of(1000)) .setColumnStatistics( columnHandle, ColumnStatistics.builder() .setRange(new DoubleRange(-100, 100)) .setNullsFraction(Estimate.of(0.5)) .setDistinctValuesCount(Estimate.of(300)) .build()) .build(); assertEquals( statisticsProvider.getTableStatistics( session, TABLE, ImmutableMap.of(COLUMN, columnHandle), ImmutableMap.of(COLUMN, BIGINT), ImmutableList.of(new HivePartition(TABLE))), expected); }
@Test public void testEmptyStringColumnStatsData() { StringColumnStatsData emptyStringColumnStatsData = new StringColumnStatsData(); ColumnStatisticsObj columnStatisticsObj = new ColumnStatisticsObj("my_col", STRING_TYPE_NAME, stringStats(emptyStringColumnStatsData)); HiveColumnStatistics actual = fromMetastoreApiColumnStatistics(columnStatisticsObj, OptionalLong.empty()); assertEquals(actual.getIntegerStatistics(), Optional.empty()); assertEquals(actual.getDoubleStatistics(), Optional.empty()); assertEquals(actual.getDecimalStatistics(), Optional.empty()); assertEquals(actual.getDateStatistics(), Optional.empty()); assertEquals(actual.getBooleanStatistics(), Optional.empty()); assertEquals(actual.getMaxValueSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getTotalSizeInBytes(), OptionalLong.empty()); assertEquals(actual.getNullsCount(), OptionalLong.empty()); assertEquals(actual.getDistinctValuesCount(), OptionalLong.empty()); }