private static Row createTableStatsRow(TableStatistics tableStatistics) { ImmutableList.Builder<Expression> rowValues = ImmutableList.builder(); rowValues.add(NULL_VARCHAR); rowValues.add(NULL_DOUBLE); rowValues.add(NULL_DOUBLE); rowValues.add(NULL_DOUBLE); rowValues.add(createEstimateRepresentation(tableStatistics.getRowCount())); rowValues.add(NULL_VARCHAR); rowValues.add(NULL_VARCHAR); return new Row(rowValues.build()); }
@Override protected Optional<PlanNodeStatsEstimate> doCalculate(TableScanNode node, StatsProvider sourceStats, Lookup lookup, Session session, TypeProvider types) { // TODO Construct predicate like AddExchanges's LayoutConstraintEvaluator Constraint<ColumnHandle> constraint = new Constraint<>(node.getCurrentConstraint()); TableStatistics tableStatistics = metadata.getTableStatistics(session, node.getTable(), constraint); Map<Symbol, SymbolStatsEstimate> outputSymbolStats = new HashMap<>(); for (Map.Entry<Symbol, ColumnHandle> entry : node.getAssignments().entrySet()) { Symbol symbol = entry.getKey(); Optional<ColumnStatistics> columnStatistics = Optional.ofNullable(tableStatistics.getColumnStatistics().get(entry.getValue())); outputSymbolStats.put(symbol, columnStatistics.map(statistics -> toSymbolStatistics(tableStatistics, statistics)).orElse(SymbolStatsEstimate.unknown())); } return Optional.of(PlanNodeStatsEstimate.builder() .setOutputRowCount(tableStatistics.getRowCount().getValue()) .addSymbolStatistics(outputSymbolStats) .build()); }
private void testTableStats(String schema, TpchTable<?> table, Constraint<ColumnHandle> constraint, double expectedRowCount) { TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName())); TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, constraint); double actualRowCountValue = tableStatistics.getRowCount().getValue(); assertEquals(tableStatistics.getRowCount(), Estimate.of(actualRowCountValue)); assertEquals(actualRowCountValue, expectedRowCount, expectedRowCount * TOLERANCE); }
@Test public void testNoTableStatsForNotSupportedSchema() { Stream.of("sf0.001", "sf0.1", "sf10") .forEach(schemaName -> Table.getBaseTables() .forEach(table -> { SchemaTableName schemaTableName = new SchemaTableName(schemaName, table.getName()); ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName); TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue()); assertTrue(tableStatistics.getRowCount().isUnknown()); assertTrue(tableStatistics.getColumnStatistics().isEmpty()); })); }
@Test public void testTableStatsExistenceSupportedSchema() { Stream.of("sf0.01", "tiny", "sf1", "sf1.000") .forEach(schemaName -> Table.getBaseTables() .forEach(table -> { SchemaTableName schemaTableName = new SchemaTableName(schemaName, table.getName()); ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName); TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue()); assertFalse(tableStatistics.getRowCount().isUnknown()); for (ColumnHandle column : metadata.getColumnHandles(session, tableHandle).values()) { assertTrue(tableStatistics.getColumnStatistics().containsKey(column)); assertNotNull(tableStatistics.getColumnStatistics().get(column)); } })); }
private void testNoTableStats(String schema, TpchTable<?> table) { TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName())); TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, alwaysTrue()); assertTrue(tableStatistics.getRowCount().isUnknown()); }
private SymbolStatsEstimate toSymbolStatistics(TableStatistics tableStatistics, ColumnStatistics columnStatistics) { double nullsFraction = columnStatistics.getNullsFraction().getValue(); double nonNullRowsCount = tableStatistics.getRowCount().getValue() * (1.0 - nullsFraction); double averageRowSize = nonNullRowsCount == 0 ? 0 : columnStatistics.getDataSize().getValue() / nonNullRowsCount; SymbolStatsEstimate.Builder result = SymbolStatsEstimate.builder(); result.setNullsFraction(nullsFraction); result.setDistinctValuesCount(columnStatistics.getDistinctValuesCount().getValue()); result.setAverageRowSize(averageRowSize); columnStatistics.getRange().ifPresent(range -> { result.setLowValue(range.getMin()); result.setHighValue(range.getMax()); }); return result.build(); } }
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, Constraint.alwaysTrue()); assertFalse(tableStatistics.getRowCount().isUnknown(), "row count is unknown");
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue()); estimateAssertion.assertClose(tableStatistics.getRowCount(), Estimate.of(6), "Row count does not match");
private void testTableStats(String schema, TpchTable<?> table, Constraint<ColumnHandle> constraint, double expectedRowCount) { TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName())); TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, constraint); double actualRowCountValue = tableStatistics.getRowCount().getValue(); assertEquals(tableStatistics.getRowCount(), Estimate.of(actualRowCountValue)); assertEquals(actualRowCountValue, expectedRowCount, expectedRowCount * TOLERANCE); }
@Test public void testNoTableStatsForNotSupportedSchema() { Stream.of("sf0.001", "sf0.1", "sf10") .forEach(schemaName -> Table.getBaseTables() .forEach(table -> { SchemaTableName schemaTableName = new SchemaTableName(schemaName, table.getName()); ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName); TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue()); assertTrue(tableStatistics.getRowCount().isUnknown()); assertTrue(tableStatistics.getColumnStatistics().isEmpty()); })); }
@Test public void testTableStatsExistenceSupportedSchema() { Stream.of("sf0.01", "tiny", "sf1", "sf1.000") .forEach(schemaName -> Table.getBaseTables() .forEach(table -> { SchemaTableName schemaTableName = new SchemaTableName(schemaName, table.getName()); ConnectorTableHandle tableHandle = metadata.getTableHandle(session, schemaTableName); TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue()); assertFalse(tableStatistics.getRowCount().isUnknown()); for (ColumnHandle column : metadata.getColumnHandles(session, tableHandle).values()) { assertTrue(tableStatistics.getColumnStatistics().containsKey(column)); assertNotNull(tableStatistics.getColumnStatistics().get(column)); } })); }
private void testNoTableStats(String schema, TpchTable<?> table) { TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName())); TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, alwaysTrue()); assertTrue(tableStatistics.getRowCount().isUnknown()); }
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, Constraint.alwaysTrue()); assertFalse(tableStatistics.getRowCount().isUnknown(), "row count is unknown");
TableStatistics tableStatistics = metadata.getTableStatistics(session, tableHandle, alwaysTrue()); estimateAssertion.assertClose(tableStatistics.getRowCount(), Estimate.of(6), "Row count does not match");