private TableStatistics toTableStatistics(Map<String, ColumnHandle> columnHandles, TableStatisticsData statisticsData) { long rowCount = statisticsData.getRowCount(); TableStatistics.Builder tableStatistics = TableStatistics.builder() .setRowCount(Estimate.of(rowCount)); if (rowCount > 0) { Map<String, ColumnStatisticsData> columnsData = statisticsData.getColumns(); for (Map.Entry<String, ColumnHandle> entry : columnHandles.entrySet()) { TpcdsColumnHandle columnHandle = (TpcdsColumnHandle) entry.getValue(); tableStatistics.setColumnStatistics(entry.getValue(), toColumnStatistics(columnsData.get(entry.getKey()), columnHandle.getType(), rowCount)); } } return tableStatistics.build(); }
return result.build();
private TableStatistics createZeroStatistics(Map<String, ColumnHandle> columns, Map<String, Type> columnTypes) { TableStatistics.Builder result = TableStatistics.builder(); result.setRowCount(Estimate.of(0)); columns.forEach((columnName, columnHandle) -> { Type columnType = columnTypes.get(columnName); verify(columnType != null, "columnType is missing for column: %s", columnName); ColumnStatistics.Builder columnStatistics = ColumnStatistics.builder(); columnStatistics.setNullsFraction(Estimate.of(0)); columnStatistics.setDistinctValuesCount(Estimate.of(0)); if (hasDataSize(columnType)) { columnStatistics.setDataSize(Estimate.of(0)); } result.setColumnStatistics(columnHandle, columnStatistics.build()); }); return result.build(); }
.setDistinctValuesCount(Estimate.of(300)) .build()) .build(); assertEquals( statisticsProvider.getTableStatistics(
@Test public void testGetTableStatisticsUnpartitioned() { PartitionStatistics statistics = PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())) .setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))) .build(); MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((table, hivePartitions) -> ImmutableMap.of(UNPARTITIONED_ID, statistics)); TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty()); TableStatistics expected = TableStatistics.builder() .setRowCount(Estimate.of(1000)) .setColumnStatistics( columnHandle, ColumnStatistics.builder() .setRange(new DoubleRange(-100, 100)) .setNullsFraction(Estimate.of(0.5)) .setDistinctValuesCount(Estimate.of(300)) .build()) .build(); assertEquals( statisticsProvider.getTableStatistics( session, TABLE, ImmutableMap.of(COLUMN, columnHandle), ImmutableMap.of(COLUMN, BIGINT), ImmutableList.of(new HivePartition(TABLE))), expected); }
private TableStatistics toTableStatistics(Map<String, ColumnHandle> columnHandles, TableStatisticsData statisticsData) { long rowCount = statisticsData.getRowCount(); TableStatistics.Builder tableStatistics = TableStatistics.builder() .setRowCount(Estimate.of(rowCount)); if (rowCount > 0) { Map<String, ColumnStatisticsData> columnsData = statisticsData.getColumns(); for (Map.Entry<String, ColumnHandle> entry : columnHandles.entrySet()) { TpcdsColumnHandle columnHandle = (TpcdsColumnHandle) entry.getValue(); tableStatistics.setColumnStatistics(entry.getValue(), toColumnStatistics(columnsData.get(entry.getKey()), columnHandle.getType(), rowCount)); } } return tableStatistics.build(); }
private TableStatistics toTableStatistics(TableStatisticsData tableStatisticsData, TpchTableHandle tpchTableHandle, Map<String, ColumnHandle> columnHandles) { TableStatistics.Builder builder = TableStatistics.builder() .setRowCount(Estimate.of(tableStatisticsData.getRowCount())); tableStatisticsData.getColumns().forEach((columnName, stats) -> { TpchColumnHandle columnHandle = (TpchColumnHandle) getColumnHandle(tpchTableHandle, columnHandles, columnName); builder.setColumnStatistics(columnHandle, toColumnStatistics(stats, columnHandle.getType())); }); return builder.build(); }
.setDistinctValuesCount(Estimate.of(300)) .build()) .build(); assertEquals( statisticsProvider.getTableStatistics(
@Test public void testGetTableStatisticsUnpartitioned() { PartitionStatistics statistics = PartitionStatistics.builder() .setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())) .setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))) .build(); MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((table, hivePartitions) -> ImmutableMap.of(UNPARTITIONED_ID, statistics)); TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty()); TableStatistics expected = TableStatistics.builder() .setRowCount(Estimate.of(1000)) .setColumnStatistics( columnHandle, ColumnStatistics.builder() .setRange(new DoubleRange(-100, 100)) .setNullsFraction(Estimate.of(0.5)) .setDistinctValuesCount(Estimate.of(300)) .build()) .build(); assertEquals( statisticsProvider.getTableStatistics( session, TABLE, ImmutableMap.of(COLUMN, columnHandle), ImmutableMap.of(COLUMN, BIGINT), ImmutableList.of(new HivePartition(TABLE))), expected); }
private TableStatistics toTableStatistics(TableStatisticsData tableStatisticsData, TpchTableHandle tpchTableHandle, Map<String, ColumnHandle> columnHandles) { TableStatistics.Builder builder = TableStatistics.builder() .setRowCount(Estimate.of(tableStatisticsData.getRowCount())); tableStatisticsData.getColumns().forEach((columnName, stats) -> { TpchColumnHandle columnHandle = (TpchColumnHandle) getColumnHandle(tpchTableHandle, columnHandles, columnName); builder.setColumnStatistics(columnHandle, toColumnStatistics(stats, columnHandle.getType())); }); return builder.build(); }