Refine search
/** * Get statistics for table for given filtering constraint. */ default TableStatistics getTableStatistics(ConnectorSession session, ConnectorTableHandle tableHandle, Constraint<ColumnHandle> constraint) { return TableStatistics.empty(); }
private TableStatistics toTableStatistics(Map<String, ColumnHandle> columnHandles, TableStatisticsData statisticsData) { long rowCount = statisticsData.getRowCount(); TableStatistics.Builder tableStatistics = TableStatistics.builder() .setRowCount(Estimate.of(rowCount)); if (rowCount > 0) { Map<String, ColumnStatisticsData> columnsData = statisticsData.getColumns(); for (Map.Entry<String, ColumnHandle> entry : columnHandles.entrySet()) { TpcdsColumnHandle columnHandle = (TpcdsColumnHandle) entry.getValue(); tableStatistics.setColumnStatistics(entry.getValue(), toColumnStatistics(columnsData.get(entry.getKey()), columnHandle.getType(), rowCount)); } } return tableStatistics.build(); }
@Test public void testNoTableStatsForNotSupportedSchema() { Stream.of("sf0.001", "sf0.1", "sf10") .forEach(schemaName -> Table.getBaseTables() .forEach(table -> { SchemaTableName schemaTableName = new SchemaTableName(schemaName, table.getName()); ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName); TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue()); assertTrue(tableStatistics.getRowCount().isUnknown()); assertTrue(tableStatistics.getColumnStatistics().isEmpty()); })); }
@Override protected Optional<PlanNodeStatsEstimate> doCalculate(TableScanNode node, StatsProvider sourceStats, Lookup lookup, Session session, TypeProvider types) { // TODO Construct predicate like AddExchanges's LayoutConstraintEvaluator Constraint<ColumnHandle> constraint = new Constraint<>(node.getCurrentConstraint()); TableStatistics tableStatistics = metadata.getTableStatistics(session, node.getTable(), constraint); Map<Symbol, SymbolStatsEstimate> outputSymbolStats = new HashMap<>(); for (Map.Entry<Symbol, ColumnHandle> entry : node.getAssignments().entrySet()) { Symbol symbol = entry.getKey(); Optional<ColumnStatistics> columnStatistics = Optional.ofNullable(tableStatistics.getColumnStatistics().get(entry.getValue())); outputSymbolStats.put(symbol, columnStatistics.map(statistics -> toSymbolStatistics(tableStatistics, statistics)).orElse(SymbolStatsEstimate.unknown())); } return Optional.of(PlanNodeStatsEstimate.builder() .setOutputRowCount(tableStatistics.getRowCount().getValue()) .addSymbolStatistics(outputSymbolStats) .build()); }
return TableStatistics.empty(); return TableStatistics.empty(); double rowCount = averageRowsPerPartition * queriedPartitionsCount; TableStatistics.Builder result = TableStatistics.builder(); result.setRowCount(Estimate.of(rowCount)); for (Map.Entry<String, ColumnHandle> column : columns.entrySet()) { String columnName = column.getKey();
private void testTableStats(String schema, TpchTable<?> table, Constraint<ColumnHandle> constraint, double expectedRowCount) { TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName())); TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, constraint); double actualRowCountValue = tableStatistics.getRowCount().getValue(); assertEquals(tableStatistics.getRowCount(), Estimate.of(actualRowCountValue)); assertEquals(actualRowCountValue, expectedRowCount, expectedRowCount * TOLERANCE); }
estimateAssertion.assertClose(tableStatistics.getRowCount(), Estimate.of(6), "Row count does not match"); assertTrue(tableStatistics.getColumnStatistics().containsKey(column)); assertNotNull(tableStatistics.getColumnStatistics().get(column)); tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CALL_CENTER_SK.getName())), ColumnStatistics.builder() .setNullsFraction(Estimate.of(0)) .setDistinctValuesCount(Estimate.of(6)) .setRange(new DoubleRange(1, 6)) .build()); tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CALL_CENTER_ID.getName())), ColumnStatistics.builder() .setNullsFraction(Estimate.of(0)) tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_ZIP.getName())), ColumnStatistics.builder() .setNullsFraction(Estimate.of(0)) tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_GMT_OFFSET.getName())), ColumnStatistics.builder() .setNullsFraction(Estimate.of(0)) tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_REC_START_DATE.getName())), ColumnStatistics.builder() .setNullsFraction(Estimate.of(0)) tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CLOSED_DATE_SK.getName())),
@Test public void testNullFraction() { SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.WEB_SITE.getName()); ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName); TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue()); Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle); // some null values assertColumnStatistics( tableStatistics.getColumnStatistics().get(columnHandles.get(WebSiteColumn.WEB_REC_END_DATE.getName())), ColumnStatistics.builder() .setNullsFraction(Estimate.of(0.5)) .setDistinctValuesCount(Estimate.of(3)) .setRange(new DoubleRange(10819L, 11549L)) .build()); }
private void testNoTableStats(String schema, TpchTable<?> table) { TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName())); TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, alwaysTrue()); assertTrue(tableStatistics.getRowCount().isUnknown()); }
private SymbolStatsEstimate toSymbolStatistics(TableStatistics tableStatistics, ColumnStatistics columnStatistics) { double nullsFraction = columnStatistics.getNullsFraction().getValue(); double nonNullRowsCount = tableStatistics.getRowCount().getValue() * (1.0 - nullsFraction); double averageRowSize = nonNullRowsCount == 0 ? 0 : columnStatistics.getDataSize().getValue() / nonNullRowsCount; SymbolStatsEstimate.Builder result = SymbolStatsEstimate.builder(); result.setNullsFraction(nullsFraction); result.setDistinctValuesCount(columnStatistics.getDistinctValuesCount().getValue()); result.setAverageRowSize(averageRowSize); columnStatistics.getRange().ifPresent(range -> { result.setLowValue(range.getMin()); result.setHighValue(range.getMax()); }); return result.build(); } }
private List<Expression> buildStatisticsRows(TableMetadata tableMetadata, Map<String, ColumnHandle> columnHandles, TableStatistics tableStatistics) { ImmutableList.Builder<Expression> rowsBuilder = ImmutableList.builder(); for (ColumnMetadata columnMetadata : tableMetadata.getColumns()) { if (columnMetadata.isHidden()) { continue; } String columnName = columnMetadata.getName(); Type columnType = columnMetadata.getType(); ColumnHandle columnHandle = columnHandles.get(columnName); ColumnStatistics columnStatistics = tableStatistics.getColumnStatistics().get(columnHandle); if (columnStatistics != null) { rowsBuilder.add(createColumnStatsRow(columnName, columnType, columnStatistics)); } else { rowsBuilder.add(createEmptyColumnStatsRow(columnName)); } } // Stats for whole table rowsBuilder.add(createTableStatsRow(tableStatistics)); return rowsBuilder.build(); }
private static Row createTableStatsRow(TableStatistics tableStatistics) { ImmutableList.Builder<Expression> rowValues = ImmutableList.builder(); rowValues.add(NULL_VARCHAR); rowValues.add(NULL_DOUBLE); rowValues.add(NULL_DOUBLE); rowValues.add(NULL_DOUBLE); rowValues.add(createEstimateRepresentation(tableStatistics.getRowCount())); rowValues.add(NULL_VARCHAR); rowValues.add(NULL_VARCHAR); return new Row(rowValues.build()); }
public TableStatistics build() { return new TableStatistics(rowCount, columnStatisticsMap); } }
private void testTableStats(String schema, TpchTable<?> table, Constraint<ColumnHandle> constraint, double expectedRowCount) { TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName())); TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, constraint); double actualRowCountValue = tableStatistics.getRowCount().getValue(); assertEquals(tableStatistics.getRowCount(), Estimate.of(actualRowCountValue)); assertEquals(actualRowCountValue, expectedRowCount, expectedRowCount * TOLERANCE); }
estimateAssertion.assertClose(tableStatistics.getRowCount(), Estimate.of(6), "Row count does not match"); assertTrue(tableStatistics.getColumnStatistics().containsKey(column)); assertNotNull(tableStatistics.getColumnStatistics().get(column)); tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CALL_CENTER_SK.getName())), ColumnStatistics.builder() .setNullsFraction(Estimate.of(0)) .setDistinctValuesCount(Estimate.of(6)) .setRange(new DoubleRange(1, 6)) .build()); tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CALL_CENTER_ID.getName())), ColumnStatistics.builder() .setNullsFraction(Estimate.of(0)) tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_ZIP.getName())), ColumnStatistics.builder() .setNullsFraction(Estimate.of(0)) tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_GMT_OFFSET.getName())), ColumnStatistics.builder() .setNullsFraction(Estimate.of(0)) tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_REC_START_DATE.getName())), ColumnStatistics.builder() .setNullsFraction(Estimate.of(0)) tableStatistics.getColumnStatistics().get(columnHandles.get(CallCenterColumn.CC_CLOSED_DATE_SK.getName())),
@Test public void testNullFraction() { SchemaTableName schemaTableName = new SchemaTableName("sf1", Table.WEB_SITE.getName()); ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName); TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue()); Map<String, ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle); // some null values assertColumnStatistics( tableStatistics.getColumnStatistics().get(columnHandles.get(WebSiteColumn.WEB_REC_END_DATE.getName())), ColumnStatistics.builder() .setNullsFraction(Estimate.of(0.5)) .setDistinctValuesCount(Estimate.of(3)) .setRange(new DoubleRange(10819L, 11549L)) .build()); }
private void testNoTableStats(String schema, TpchTable<?> table) { TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName())); TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, alwaysTrue()); assertTrue(tableStatistics.getRowCount().isUnknown()); }
private void testColumnStats(String schema, TpchTable<?> table, TpchColumn<?> column, Constraint<ColumnHandle> constraint, ColumnStatistics expected) { TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName())); TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, constraint); ColumnHandle columnHandle = tpchMetadata.getColumnHandles(session, tableHandle).get(column.getSimplifiedColumnName()); ColumnStatistics actual = tableStatistics.getColumnStatistics().get(columnHandle); EstimateAssertion estimateAssertion = new EstimateAssertion(TOLERANCE); estimateAssertion.assertClose(actual.getDistinctValuesCount(), expected.getDistinctValuesCount(), "distinctValuesCount"); estimateAssertion.assertClose(actual.getDataSize(), expected.getDataSize(), "dataSize"); estimateAssertion.assertClose(actual.getNullsFraction(), expected.getNullsFraction(), "nullsFraction"); estimateAssertion.assertClose(actual.getRange(), expected.getRange(), "range"); }
public TableStatistics build() { return new TableStatistics(rowCount, columnStatisticsMap); } }
@Test public void testTableStatsExistenceSupportedSchema() { Stream.of("sf0.01", "tiny", "sf1", "sf1.000") .forEach(schemaName -> Table.getBaseTables() .forEach(table -> { SchemaTableName schemaTableName = new SchemaTableName(schemaName, table.getName()); ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName); TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue()); assertFalse(tableStatistics.getRowCount().isUnknown()); for (ColumnHandle column : metadata.getColumnHandles(session, tableHandle).values()) { assertTrue(tableStatistics.getColumnStatistics().containsKey(column)); assertNotNull(tableStatistics.getColumnStatistics().get(column)); } })); }