@VisibleForTesting static ColumnStatistics createDataColumnStatistics(String column, Type type, double rowsCount, Collection<PartitionStatistics> partitionStatistics) { List<HiveColumnStatistics> columnStatistics = partitionStatistics.stream() .map(PartitionStatistics::getColumnStatistics) .map(statistics -> statistics.get(column)) .filter(Objects::nonNull) .collect(toImmutableList()); if (columnStatistics.isEmpty()) { return ColumnStatistics.empty(); } return ColumnStatistics.builder() .setDistinctValuesCount(calculateDistinctValuesCount(columnStatistics)) .setNullsFraction(calculateNullsFraction(column, partitionStatistics)) .setDataSize(calculateDataSize(column, partitionStatistics, rowsCount)) .setRange(calculateRange(type, columnStatistics)) .build(); }
@Test public void testCreateDataColumnStatistics() { assertEquals(createDataColumnStatistics(COLUMN, BIGINT, 1000, ImmutableList.of()), ColumnStatistics.empty()); assertEquals( createDataColumnStatistics(COLUMN, BIGINT, 1000, ImmutableList.of(PartitionStatistics.empty(), PartitionStatistics.empty())), ColumnStatistics.empty()); assertEquals( createDataColumnStatistics( COLUMN, BIGINT, 1000, ImmutableList.of(new PartitionStatistics(HiveBasicStatistics.createZeroStatistics(), ImmutableMap.of("column2", HiveColumnStatistics.empty())))), ColumnStatistics.empty()); }
@Test public void testCreateDataColumnStatistics() { assertEquals(createDataColumnStatistics(COLUMN, BIGINT, 1000, ImmutableList.of()), ColumnStatistics.empty()); assertEquals( createDataColumnStatistics(COLUMN, BIGINT, 1000, ImmutableList.of(PartitionStatistics.empty(), PartitionStatistics.empty())), ColumnStatistics.empty()); assertEquals( createDataColumnStatistics( COLUMN, BIGINT, 1000, ImmutableList.of(new PartitionStatistics(HiveBasicStatistics.createZeroStatistics(), ImmutableMap.of("column2", HiveColumnStatistics.empty())))), ColumnStatistics.empty()); }