@Override public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTableHandle tableHandle, Constraint<ColumnHandle> constraint) { TpchTableHandle tpchTableHandle = (TpchTableHandle) tableHandle; String tableName = tpchTableHandle.getTableName(); TpchTable<?> tpchTable = TpchTable.getTable(tableName); Map<TpchColumn<?>, List<Object>> columnValuesRestrictions = ImmutableMap.of(); if (predicatePushdownEnabled) { columnValuesRestrictions = getColumnValuesRestrictions(tpchTable, constraint); } Optional<TableStatisticsData> optionalTableStatisticsData = statisticsEstimator.estimateStats(tpchTable, columnValuesRestrictions, tpchTableHandle.getScaleFactor()); Map<String, ColumnHandle> columnHandles = getColumnHandles(session, tpchTableHandle); return optionalTableStatisticsData .map(tableStatisticsData -> toTableStatistics(optionalTableStatisticsData.get(), tpchTableHandle, columnHandles)) .orElse(TableStatistics.empty()); }
@Override public Type getType(int field) { return getPrestoType(getTpchColumn(field)); }
@Override public ConnectorTableHandle getTableHandleForStatisticsCollection(ConnectorSession session, SchemaTableName tableName, Map<String, Object> analyzeProperties) { return getTableHandle(session, tableName); }
@Override public ConnectorTableMetadata getTableMetadata(ConnectorSession session, ConnectorTableHandle tableHandle) { TpchTableHandle tpchTableHandle = (TpchTableHandle) tableHandle; TpchTable<?> tpchTable = TpchTable.getTable(tpchTableHandle.getTableName()); String schemaName = scaleFactorSchemaName(tpchTableHandle.getScaleFactor()); return getTableMetadata(schemaName, tpchTable, columnNaming); }
Map<String, ColumnHandle> columns = getColumnHandles(session, tableHandle); if (tableHandle.getTableName().equals(TpchTable.ORDERS.getTableName())) { if (partitioningEnabled) { new TpchPartitioningHandle( TpchTable.ORDERS.getTableName(), calculateTotalRows(OrderGenerator.SCALE_BASE, tableHandle.getScaleFactor())), ImmutableList.of(orderKeyColumn))); partitioningColumns = Optional.of(ImmutableSet.of(orderKeyColumn)); predicate = toTupleDomain(ImmutableMap.of( toColumnHandle(OrderColumn.ORDER_STATUS), filterValues(ORDER_STATUS_NULLABLE_VALUES, OrderColumn.ORDER_STATUS, constraint))); unenforcedConstraint = filterOutColumnFromPredicate(constraint.getSummary(), toColumnHandle(OrderColumn.ORDER_STATUS)); predicate = toTupleDomain(ImmutableMap.of( toColumnHandle(PartColumn.CONTAINER), filterValues(PART_CONTAINER_NULLABLE_VALUES, PartColumn.CONTAINER, constraint), toColumnHandle(PartColumn.TYPE), filterValues(PART_TYPE_NULLABLE_VALUES, PartColumn.TYPE, constraint))); unenforcedConstraint = filterOutColumnFromPredicate(constraint.getSummary(), toColumnHandle(PartColumn.CONTAINER)); unenforcedConstraint = filterOutColumnFromPredicate(unenforcedConstraint, toColumnHandle(PartColumn.TYPE)); new TpchPartitioningHandle( TpchTable.ORDERS.getTableName(), calculateTotalRows(OrderGenerator.SCALE_BASE, tableHandle.getScaleFactor())), ImmutableList.of(orderKeyColumn))); partitioningColumns = Optional.of(ImmutableSet.of(orderKeyColumn));
public TpchIndexedData(String connectorId, TpchIndexSpec tpchIndexSpec) { requireNonNull(connectorId, "connectorId is null"); requireNonNull(tpchIndexSpec, "tpchIndexSpec is null"); TpchMetadata tpchMetadata = new TpchMetadata(connectorId); TpchRecordSetProvider tpchRecordSetProvider = new TpchRecordSetProvider(); ImmutableMap.Builder<Set<TpchScaledColumn>, IndexedTable> indexedTablesBuilder = ImmutableMap.builder(); Set<TpchScaledTable> tables = tpchIndexSpec.listIndexedTables(); for (TpchScaledTable table : tables) { SchemaTableName tableName = new SchemaTableName("sf" + table.getScaleFactor(), table.getTableName()); TpchTableHandle tableHandle = tpchMetadata.getTableHandle(null, tableName); Map<String, ColumnHandle> columnHandles = new LinkedHashMap<>(tpchMetadata.getColumnHandles(null, tableHandle)); for (Set<String> columnNames : tpchIndexSpec.getColumnIndexes(table)) { List<String> keyColumnNames = ImmutableList.copyOf(columnNames); // Finalize the key order Set<TpchScaledColumn> keyColumns = keyColumnNames.stream() .map(name -> new TpchScaledColumn(table, name)) .collect(toImmutableSet()); TpchTable<?> tpchTable = TpchTable.getTable(table.getTableName()); RecordSet recordSet = tpchRecordSetProvider.getRecordSet(tpchTable, ImmutableList.copyOf(columnHandles.values()), table.getScaleFactor(), 0, 1, TupleDomain.all()); IndexedTable indexedTable = indexTable(recordSet, ImmutableList.copyOf(columnHandles.keySet()), keyColumnNames); indexedTablesBuilder.put(keyColumns, indexedTable); } } indexedTables = indexedTablesBuilder.build(); }
private static TupleDomain<ColumnHandle> fixedValueTupleDomain(TpchMetadata tpchMetadata, TpchColumn<?> column1, Object value1, TpchColumn<?> column2, Object value2) { return TupleDomain.fromFixedValues( ImmutableMap.of( tpchMetadata.toColumnHandle(column1), new NullableValue(getPrestoType(column1), value1), tpchMetadata.toColumnHandle(column2), new NullableValue(getPrestoType(column2), value2))); }
private void testColumnStats(String schema, TpchTable<?> table, TpchColumn<?> column, Constraint<ColumnHandle> constraint, ColumnStatistics expected) { TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName())); TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, constraint); ColumnHandle columnHandle = tpchMetadata.getColumnHandles(session, tableHandle).get(column.getSimplifiedColumnName()); ColumnStatistics actual = tableStatistics.getColumnStatistics().get(columnHandle); EstimateAssertion estimateAssertion = new EstimateAssertion(TOLERANCE); estimateAssertion.assertClose(actual.getDistinctValuesCount(), expected.getDistinctValuesCount(), "distinctValuesCount"); estimateAssertion.assertClose(actual.getDataSize(), expected.getDataSize(), "dataSize"); estimateAssertion.assertClose(actual.getNullsFraction(), expected.getNullsFraction(), "nullsFraction"); estimateAssertion.assertClose(actual.getRange(), expected.getRange(), "range"); }
private void testNoTableStats(String schema, TpchTable<?> table) { TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName())); TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, alwaysTrue()); assertTrue(tableStatistics.getRowCount().isUnknown()); }
@Test public void testPartTypeAndPartContainerPredicatePushdown() TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName("sf1", PART.getTableName())); assertTupleDomainEquals(tableLayout.getUnenforcedConstraint(), TupleDomain.all(), session); assertTupleDomainEquals( filterOutColumnFromPredicate(tableLayout.getTableLayout().getPredicate(), tpchMetadata.toColumnHandle(PartColumn.CONTAINER)), domain, session); assertTupleDomainEquals(tableLayout.getUnenforcedConstraint(), TupleDomain.all(), session); assertTupleDomainEquals( filterOutColumnFromPredicate(tableLayout.getTableLayout().getPredicate(), tpchMetadata.toColumnHandle(PartColumn.TYPE)), domain, session);
private void insertRows(TpchMetadata tpchMetadata, TpchTable tpchTable) { TpchTableHandle tableHandle = tpchMetadata.getTableHandle(null, new SchemaTableName(TINY_SCHEMA_NAME, tpchTable.getTableName())); insertRows(tpchMetadata.getTableMetadata(null, tableHandle), handle, createTpchRecordSet(tpchTable, tableHandle.getScaleFactor())); }
@Override public Map<SchemaTableName, List<ColumnMetadata>> listTableColumns(ConnectorSession session, SchemaTablePrefix prefix) { ImmutableMap.Builder<SchemaTableName, List<ColumnMetadata>> tableColumns = ImmutableMap.builder(); for (String schemaName : getSchemaNames(session, prefix.getSchema())) { for (TpchTable<?> tpchTable : TpchTable.getTables()) { if (prefix.getTableName() == null || tpchTable.getTableName().equals(prefix.getTableName())) { ConnectorTableMetadata tableMetadata = getTableMetadata(schemaName, tpchTable, columnNaming); tableColumns.put(new SchemaTableName(schemaName, tpchTable.getTableName()), tableMetadata.getColumns()); } } } return tableColumns.build(); }
private Map<TpchColumn<?>, List<Object>> getColumnValuesRestrictions(TpchTable<?> tpchTable, Constraint<ColumnHandle> constraint) { TupleDomain<ColumnHandle> constraintSummary = constraint.getSummary(); if (constraintSummary.isAll()) { return emptyMap(); } else if (constraintSummary.isNone()) { Set<TpchColumn<?>> columns = ImmutableSet.copyOf(tpchTable.getColumns()); return asMap(columns, key -> emptyList()); } else { Map<ColumnHandle, Domain> domains = constraintSummary.getDomains().get(); Optional<Domain> orderStatusDomain = Optional.ofNullable(domains.get(toColumnHandle(ORDER_STATUS))); Optional<Map<TpchColumn<?>, List<Object>>> allowedColumnValues = orderStatusDomain.map(domain -> { List<Object> allowedValues = ORDER_STATUS_VALUES.stream() .filter(domain::includesNullableValue) .collect(toList()); return avoidTrivialOrderStatusRestriction(allowedValues); }); return allowedColumnValues.orElse(emptyMap()); } }
@Override public ConnectorTableLayout getTableLayout(ConnectorSession session, ConnectorTableLayoutHandle handle) { TpchTableLayoutHandle layout = (TpchTableLayoutHandle) handle; // tables in this connector have a single layout return getTableLayouts(session, layout.getTable(), Constraint.alwaysTrue(), Optional.empty()) .get(0) .getTableLayout(); }
@Override public ColumnMetadata getColumnMetadata(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle columnHandle) { ConnectorTableMetadata tableMetadata = getTableMetadata(session, tableHandle); String columnName = ((TpchColumnHandle) columnHandle).getColumnName(); for (ColumnMetadata column : tableMetadata.getColumns()) { if (column.getName().equals(columnName)) { return column; } } throw new IllegalArgumentException(String.format("Table %s does not have column %s", tableMetadata.getTable(), columnName)); }
private Set<NullableValue> filterValues(Set<NullableValue> nullableValues, TpchColumn<?> column, Constraint<ColumnHandle> constraint) { return nullableValues.stream() .filter(convertToPredicate(constraint.getSummary(), toColumnHandle(column))) .filter(value -> !constraint.predicate().isPresent() || constraint.predicate().get().test(ImmutableMap.of(toColumnHandle(column), value))) .collect(toSet()); }
@Override public ConnectorMetadata getMetadata(ConnectorTransactionHandle transaction) { return new TpchMetadata(catalogName, columnNaming, predicatePushdownEnabled, partitioningEnabled); }
Map<String, ColumnHandle> columns = getColumnHandles(session, tableHandle); if (tableHandle.getTableName().equals(TpchTable.ORDERS.getTableName())) { if (partitioningEnabled) { new TpchPartitioningHandle( TpchTable.ORDERS.getTableName(), calculateTotalRows(OrderGenerator.SCALE_BASE, tableHandle.getScaleFactor())), ImmutableList.of(orderKeyColumn))); partitioningColumns = Optional.of(ImmutableSet.of(orderKeyColumn)); predicate = toTupleDomain(ImmutableMap.of( toColumnHandle(OrderColumn.ORDER_STATUS), filterValues(ORDER_STATUS_NULLABLE_VALUES, OrderColumn.ORDER_STATUS, constraint))); unenforcedConstraint = filterOutColumnFromPredicate(constraint.getSummary(), toColumnHandle(OrderColumn.ORDER_STATUS)); predicate = toTupleDomain(ImmutableMap.of( toColumnHandle(PartColumn.CONTAINER), filterValues(PART_CONTAINER_NULLABLE_VALUES, PartColumn.CONTAINER, constraint), toColumnHandle(PartColumn.TYPE), filterValues(PART_TYPE_NULLABLE_VALUES, PartColumn.TYPE, constraint))); unenforcedConstraint = filterOutColumnFromPredicate(constraint.getSummary(), toColumnHandle(PartColumn.CONTAINER)); unenforcedConstraint = filterOutColumnFromPredicate(unenforcedConstraint, toColumnHandle(PartColumn.TYPE)); new TpchPartitioningHandle( TpchTable.ORDERS.getTableName(), calculateTotalRows(OrderGenerator.SCALE_BASE, tableHandle.getScaleFactor())), ImmutableList.of(orderKeyColumn))); partitioningColumns = Optional.of(ImmutableSet.of(orderKeyColumn));
private static TupleDomain<ColumnHandle> fixedValueTupleDomain(TpchMetadata tpchMetadata, TpchColumn<?> column1, Object value1, TpchColumn<?> column2, Object value2) { return TupleDomain.fromFixedValues( ImmutableMap.of( tpchMetadata.toColumnHandle(column1), new NullableValue(getPrestoType(column1), value1), tpchMetadata.toColumnHandle(column2), new NullableValue(getPrestoType(column2), value2))); }
private void testColumnStats(String schema, TpchTable<?> table, TpchColumn<?> column, Constraint<ColumnHandle> constraint, ColumnStatistics expected) { TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName())); TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, constraint); ColumnHandle columnHandle = tpchMetadata.getColumnHandles(session, tableHandle).get(column.getSimplifiedColumnName()); ColumnStatistics actual = tableStatistics.getColumnStatistics().get(columnHandle); EstimateAssertion estimateAssertion = new EstimateAssertion(TOLERANCE); estimateAssertion.assertClose(actual.getDistinctValuesCount(), expected.getDistinctValuesCount(), "distinctValuesCount"); estimateAssertion.assertClose(actual.getDataSize(), expected.getDataSize(), "dataSize"); estimateAssertion.assertClose(actual.getNullsFraction(), expected.getNullsFraction(), "nullsFraction"); estimateAssertion.assertClose(actual.getRange(), expected.getRange(), "range"); }