private Row createColumnStatsRow(String columnName, Type type, ColumnStatistics columnStatistics) { ImmutableList.Builder<Expression> rowValues = ImmutableList.builder(); rowValues.add(new StringLiteral(columnName)); rowValues.add(createEstimateRepresentation(columnStatistics.getDataSize())); rowValues.add(createEstimateRepresentation(columnStatistics.getDistinctValuesCount())); rowValues.add(createEstimateRepresentation(columnStatistics.getNullsFraction())); rowValues.add(NULL_DOUBLE); rowValues.add(toStringLiteral(type, columnStatistics.getRange().map(DoubleRange::getMin))); rowValues.add(toStringLiteral(type, columnStatistics.getRange().map(DoubleRange::getMax))); return new Row(rowValues.build()); }
private void assertColumnStatistics(ColumnStatistics actual, ColumnStatistics expected) { estimateAssertion.assertClose(actual.getNullsFraction(), expected.getNullsFraction(), "Nulls fraction"); estimateAssertion.assertClose(actual.getDataSize(), expected.getDataSize(), "Data size"); estimateAssertion.assertClose(actual.getDistinctValuesCount(), expected.getDistinctValuesCount(), "Distinct values count"); assertEquals(actual.getRange(), expected.getRange()); } }
private void testColumnStats(String schema, TpchTable<?> table, TpchColumn<?> column, Constraint<ColumnHandle> constraint, ColumnStatistics expected) { TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName())); TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, constraint); ColumnHandle columnHandle = tpchMetadata.getColumnHandles(session, tableHandle).get(column.getSimplifiedColumnName()); ColumnStatistics actual = tableStatistics.getColumnStatistics().get(columnHandle); EstimateAssertion estimateAssertion = new EstimateAssertion(TOLERANCE); estimateAssertion.assertClose(actual.getDistinctValuesCount(), expected.getDistinctValuesCount(), "distinctValuesCount"); estimateAssertion.assertClose(actual.getDataSize(), expected.getDataSize(), "dataSize"); estimateAssertion.assertClose(actual.getNullsFraction(), expected.getNullsFraction(), "nullsFraction"); estimateAssertion.assertClose(actual.getRange(), expected.getRange(), "range"); }
private SymbolStatsEstimate toSymbolStatistics(TableStatistics tableStatistics, ColumnStatistics columnStatistics) { double nullsFraction = columnStatistics.getNullsFraction().getValue(); double nonNullRowsCount = tableStatistics.getRowCount().getValue() * (1.0 - nullsFraction); double averageRowSize = nonNullRowsCount == 0 ? 0 : columnStatistics.getDataSize().getValue() / nonNullRowsCount; SymbolStatsEstimate.Builder result = SymbolStatsEstimate.builder(); result.setNullsFraction(nullsFraction); result.setDistinctValuesCount(columnStatistics.getDistinctValuesCount().getValue()); result.setAverageRowSize(averageRowSize); columnStatistics.getRange().ifPresent(range -> { result.setLowValue(range.getMin()); result.setHighValue(range.getMax()); }); return result.build(); } }
columnStatistics.getNullsFraction().isUnknown(), "unknown nulls fraction for " + columnName);
private void assertColumnStatistics(ColumnStatistics actual, ColumnStatistics expected) { estimateAssertion.assertClose(actual.getNullsFraction(), expected.getNullsFraction(), "Nulls fraction"); estimateAssertion.assertClose(actual.getDataSize(), expected.getDataSize(), "Data size"); estimateAssertion.assertClose(actual.getDistinctValuesCount(), expected.getDistinctValuesCount(), "Distinct values count"); assertEquals(actual.getRange(), expected.getRange()); } }
private void testColumnStats(String schema, TpchTable<?> table, TpchColumn<?> column, Constraint<ColumnHandle> constraint, ColumnStatistics expected) { TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName())); TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, constraint); ColumnHandle columnHandle = tpchMetadata.getColumnHandles(session, tableHandle).get(column.getSimplifiedColumnName()); ColumnStatistics actual = tableStatistics.getColumnStatistics().get(columnHandle); EstimateAssertion estimateAssertion = new EstimateAssertion(TOLERANCE); estimateAssertion.assertClose(actual.getDistinctValuesCount(), expected.getDistinctValuesCount(), "distinctValuesCount"); estimateAssertion.assertClose(actual.getDataSize(), expected.getDataSize(), "dataSize"); estimateAssertion.assertClose(actual.getNullsFraction(), expected.getNullsFraction(), "nullsFraction"); estimateAssertion.assertClose(actual.getRange(), expected.getRange(), "range"); }
columnStatistics.getNullsFraction().isUnknown(), "unknown nulls fraction for " + columnName);