/** * Get statistics for table for given filtering constraint. */ default TableStatistics getTableStatistics(ConnectorSession session, ConnectorTableHandle tableHandle, Constraint<ColumnHandle> constraint) { return TableStatistics.empty(); }
public ColumnStatistics build() { return new ColumnStatistics(nullsFraction, distinctValuesCount, dataSize, range); } }
/** * Describes statistics that must be collected during a write. */ default TableStatisticsMetadata getStatisticsCollectionMetadata(ConnectorSession session, ConnectorTableMetadata tableMetadata) { return TableStatisticsMetadata.empty(); }
private TableStatistics createZeroStatistics(Map<String, ColumnHandle> columns, Map<String, Type> columnTypes) { TableStatistics.Builder result = TableStatistics.builder(); result.setRowCount(Estimate.of(0)); columns.forEach((columnName, columnHandle) -> { Type columnType = columnTypes.get(columnName); verify(columnType != null, "columnType is missing for column: %s", columnName); ColumnStatistics.Builder columnStatistics = ColumnStatistics.builder(); columnStatistics.setNullsFraction(Estimate.of(0)); columnStatistics.setDistinctValuesCount(Estimate.of(0)); if (hasDataSize(columnType)) { columnStatistics.setDataSize(Estimate.of(0)); } result.setColumnStatistics(columnHandle, columnStatistics.build()); }); return result.build(); }
private static ColumnStatistics createPartitionColumnStatistics( HiveColumnHandle column, Type type, List<HivePartition> partitions, Map<String, PartitionStatistics> statistics, double averageRowsPerPartition, double rowCount) { return ColumnStatistics.builder() .setDistinctValuesCount(Estimate.of(calculateDistinctPartitionKeys(column, partitions, statistics, averageRowsPerPartition))) .setNullsFraction(Estimate.of(calculateNullsFractionForPartitioningKey(column, partitions, statistics, averageRowsPerPartition, rowCount))) .setRange(calculateRangeForPartitioningKey(column, type, partitions)) .setDataSize(calculateDataSizeForPartitioningKey(column, type, partitions, statistics, averageRowsPerPartition)) .build(); }
private ColumnStatistics toColumnStatistics(ColumnStatisticsData stats, Type columnType) { return ColumnStatistics.builder() .setNullsFraction(Estimate.zero()) .setDistinctValuesCount(stats.getDistinctValuesCount().map(Estimate::of).orElse(Estimate.unknown())) .setDataSize(stats.getDataSize().map(Estimate::of).orElse(Estimate.unknown())) .setRange(toRange(stats.getMin(), stats.getMax(), columnType)) .build(); }
private SymbolStatsEstimate toSymbolStatistics(TableStatistics tableStatistics, ColumnStatistics columnStatistics) { double nullsFraction = columnStatistics.getNullsFraction().getValue(); double nonNullRowsCount = tableStatistics.getRowCount().getValue() * (1.0 - nullsFraction); double averageRowSize = nonNullRowsCount == 0 ? 0 : columnStatistics.getDataSize().getValue() / nonNullRowsCount; SymbolStatsEstimate.Builder result = SymbolStatsEstimate.builder(); result.setNullsFraction(nullsFraction); result.setDistinctValuesCount(columnStatistics.getDistinctValuesCount().getValue()); result.setAverageRowSize(averageRowSize); columnStatistics.getRange().ifPresent(range -> { result.setLowValue(range.getMin()); result.setHighValue(range.getMax()); }); return result.build(); } }
private TableStatistics toTableStatistics(TableStatisticsData tableStatisticsData, TpchTableHandle tpchTableHandle, Map<String, ColumnHandle> columnHandles) { TableStatistics.Builder builder = TableStatistics.builder() .setRowCount(Estimate.of(tableStatisticsData.getRowCount())); tableStatisticsData.getColumns().forEach((columnName, stats) -> { TpchColumnHandle columnHandle = (TpchColumnHandle) getColumnHandle(tpchTableHandle, columnHandles, columnName); builder.setColumnStatistics(columnHandle, toColumnStatistics(stats, columnHandle.getType())); }); return builder.build(); }
private static void assertRange(double min, double max) { DoubleRange range = new DoubleRange(min, max); assertEquals(range.getMin(), min); assertEquals(range.getMax(), max); } }
@VisibleForTesting static String serialize(ColumnStatisticMetadata value) { return value.getStatisticType().name() + ":" + value.getColumnName(); } }
private static Estimate toEstimate(Optional<Double> value) { return value .map(Estimate::of) .orElse(Estimate.unknown()); } }
@VisibleForTesting static ColumnStatisticMetadata deserialize(String value) { int separatorIndex = value.indexOf(':'); checkArgument(separatorIndex >= 0, "separator not found: %s", value); String statisticType = value.substring(0, separatorIndex); String column = value.substring(separatorIndex + 1); return new ColumnStatisticMetadata(column, ColumnStatisticType.valueOf(statisticType)); } }
public ComputedStatistics build() { return new ComputedStatistics(groupingColumns, groupingValues, tableStatistics, columnStatistics); } }
public TableStatistics build() { return new TableStatistics(rowCount, columnStatisticsMap); } }
public static Builder builder(List<String> groupingColumns, List<Block> groupingValues) { return new Builder(groupingColumns, groupingValues); }
public static Builder builder() { return new Builder(); }
public static Builder builder() { return new Builder(); }
private ColumnStatistics toColumnStatistics(ColumnStatisticsData columnStatisticsData, Type type, long rowCount) { ColumnStatistics.Builder columnStatistics = ColumnStatistics.builder(); long nullCount = columnStatisticsData.getNullsCount(); columnStatistics.setNullsFraction(Estimate.of((double) nullCount / rowCount)); columnStatistics.setRange(toRange(columnStatisticsData.getMin(), columnStatisticsData.getMax(), type)); columnStatistics.setDistinctValuesCount(Estimate.of(columnStatisticsData.getDistinctValuesCount())); columnStatistics.setDataSize(columnStatisticsData.getDataSize().map(Estimate::of).orElse(Estimate.unknown())); return columnStatistics.build(); }
private static ColumnStatistics createColumnStatistics(Optional<Double> distinctValuesCount, Optional<DoubleRange> range, Optional<Double> dataSize) { return ColumnStatistics.builder() .setNullsFraction(Estimate.zero()) .setDistinctValuesCount(toEstimate(distinctValuesCount)) .setRange(range) .setDataSize(toEstimate(dataSize)) .build(); }
@Override public TableStatistics getTableStatistics(ConnectorSession session, JdbcTableHandle handle, TupleDomain<ColumnHandle> tupleDomain) { return TableStatistics.empty(); }