private String tableStatisticsDataFilename(TpchTable<?> table, Optional<TpchColumn<?>> partitionColumn, Optional<String> partitionValue) { Optional<String> partitionDescription = getPartitionDescription(partitionColumn, partitionValue); return table.getTableName() + partitionDescription.map(value -> "." + value).orElse(""); }
public TpchMetadata(String connectorId, ColumnNaming columnNaming, boolean predicatePushdownEnabled, boolean partitioningEnabled) { ImmutableSet.Builder<String> tableNames = ImmutableSet.builder(); for (TpchTable<?> tpchTable : TpchTable.getTables()) { tableNames.add(tpchTable.getTableName()); } this.tableNames = tableNames.build(); this.connectorId = connectorId; this.columnNaming = columnNaming; this.predicatePushdownEnabled = predicatePushdownEnabled; this.partitioningEnabled = partitioningEnabled; this.statisticsEstimator = createStatisticsEstimator(); }
private static String redisTableName(TpchTable<?> table) { return TPCH_SCHEMA + ":" + table.getTableName().toLowerCase(ENGLISH); }
private static String kafkaTopicName(TpchTable<?> table) { return TPCH_SCHEMA + "." + table.getTableName().toLowerCase(ENGLISH); }
@Override public List<SchemaTableName> listTables(ConnectorSession session, Optional<String> filterSchema) { ImmutableList.Builder<SchemaTableName> builder = ImmutableList.builder(); for (String schemaName : getSchemaNames(session, filterSchema)) { for (TpchTable<?> tpchTable : TpchTable.getTables()) { builder.add(new SchemaTableName(schemaName, tpchTable.getTableName())); } } return builder.build(); }
@Override public final List<PrestoThriftSchemaTableName> listTables(PrestoThriftNullableSchemaName schemaNameOrNull) { List<PrestoThriftSchemaTableName> tables = new ArrayList<>(); for (String schemaName : getSchemaNames(schemaNameOrNull.getSchemaName())) { for (TpchTable<?> tpchTable : TpchTable.getTables()) { tables.add(new PrestoThriftSchemaTableName(schemaName, tpchTable.getTableName())); } } return tables; }
@Override public Map<SchemaTableName, List<ColumnMetadata>> listTableColumns(ConnectorSession session, SchemaTablePrefix prefix) { ImmutableMap.Builder<SchemaTableName, List<ColumnMetadata>> tableColumns = ImmutableMap.builder(); for (String schemaName : getSchemaNames(session, Optional.ofNullable(prefix.getSchemaName()))) { for (TpchTable<?> tpchTable : TpchTable.getTables()) { if (prefix.getTableName() == null || tpchTable.getTableName().equals(prefix.getTableName())) { ConnectorTableMetadata tableMetadata = getTableMetadata(schemaName, tpchTable, columnNaming); tableColumns.put(new SchemaTableName(schemaName, tpchTable.getTableName()), tableMetadata.getColumns()); } } } return tableColumns.build(); }
private static ConnectorTableMetadata getTableMetadata(String schemaName, TpchTable<?> tpchTable, ColumnNaming columnNaming) { ImmutableList.Builder<ColumnMetadata> columns = ImmutableList.builder(); for (TpchColumn<? extends TpchEntity> column : tpchTable.getColumns()) { columns.add(new ColumnMetadata(columnNaming.getName(column), getPrestoType(column))); } columns.add(new ColumnMetadata(ROW_NUMBER_COLUMN_NAME, BIGINT, null, true)); SchemaTableName tableName = new SchemaTableName(schemaName, tpchTable.getTableName()); return new ConnectorTableMetadata(tableName, columns.build()); }
public static void copyTpchTables( QueryRunner queryRunner, String sourceCatalog, String sourceSchema, Session session, Iterable<TpchTable<?>> tables) { log.info("Loading data from %s.%s...", sourceCatalog, sourceSchema); long startTime = System.nanoTime(); for (TpchTable<?> table : tables) { copyTable(queryRunner, sourceCatalog, sourceSchema, table.getTableName().toLowerCase(ENGLISH), session); } log.info("Loading from %s.%s complete in %s", sourceCatalog, sourceSchema, nanosSince(startTime).toString(SECONDS)); }
private static void loadTpchTable(EmbeddedRedis embeddedRedis, TestingPrestoClient prestoClient, TpchTable<?> table, String dataFormat) { long start = System.nanoTime(); log.info("Running import for %s", table.getTableName()); RedisTestUtils.loadTpchTable( embeddedRedis, prestoClient, redisTableName(table), new QualifiedObjectName("tpch", TINY_SCHEMA_NAME, table.getTableName().toLowerCase(ENGLISH)), dataFormat); log.info("Imported %s in %s", table.getTableName(), nanosSince(start).convertToMostSuccinctTimeUnit()); }
private static void loadTpchTopic(EmbeddedKafka embeddedKafka, TestingPrestoClient prestoClient, TpchTable<?> table) { long start = System.nanoTime(); log.info("Running import for %s", table.getTableName()); TestUtils.loadTpchTopic(embeddedKafka, prestoClient, kafkaTopicName(table), new QualifiedObjectName("tpch", TINY_SCHEMA_NAME, table.getTableName().toLowerCase(ENGLISH))); log.info("Imported %s in %s", 0, table.getTableName(), nanosSince(start).convertToMostSuccinctTimeUnit()); }
private static void loadTpchTopic(EmbeddedElasticsearchNode embeddedElasticsearchNode, TestingPrestoClient prestoClient, TpchTable<?> table) { long start = System.nanoTime(); LOG.info("Running import for %s", table.getTableName()); ElasticsearchLoader loader = new ElasticsearchLoader(embeddedElasticsearchNode.getClient(), table.getTableName().toLowerCase(ENGLISH), prestoClient.getServer(), prestoClient.getDefaultSession()); loader.execute(format("SELECT * from %s", new QualifiedObjectName(TPCH_SCHEMA, TINY_SCHEMA_NAME, table.getTableName().toLowerCase(ENGLISH)))); LOG.info("Imported %s in %s", table.getTableName(), nanosSince(start).convertToMostSuccinctTimeUnit()); }
private static Map<SchemaTableName, RedisTableDescription> createTpchTableDescriptions(Metadata metadata, Iterable<TpchTable<?>> tables, String dataFormat) throws Exception { JsonCodec<RedisTableDescription> tableDescriptionJsonCodec = new CodecSupplier<>(RedisTableDescription.class, metadata).get(); ImmutableMap.Builder<SchemaTableName, RedisTableDescription> tableDescriptions = ImmutableMap.builder(); for (TpchTable<?> table : tables) { String tableName = table.getTableName(); SchemaTableName tpchTable = new SchemaTableName(TPCH_SCHEMA, tableName); tableDescriptions.put(loadTpchTableDescription(tableDescriptionJsonCodec, tpchTable, dataFormat)); } return tableDescriptions.build(); }
@Test public void testTableScanStats() { TpchTable.getTables() .forEach(table -> statisticsAssertion.check("SELECT * FROM " + table.getTableName(), checks -> checks.estimate(OUTPUT_ROW_COUNT, noError()))); }
private void insertRows(TpchMetadata tpchMetadata, TpchTable tpchTable) { TpchTableHandle tableHandle = tpchMetadata.getTableHandle(null, new SchemaTableName(TINY_SCHEMA_NAME, tpchTable.getTableName())); insertRows(tpchMetadata.getTableMetadata(null, tableHandle), handle, createTpchRecordSet(tpchTable, tableHandle.getScaleFactor())); }
private static Map<SchemaTableName, KafkaTopicDescription> createTpchTopicDescriptions(Metadata metadata, Iterable<TpchTable<?>> tables) throws Exception { JsonCodec<KafkaTopicDescription> topicDescriptionJsonCodec = new CodecSupplier<>(KafkaTopicDescription.class, metadata).get(); ImmutableMap.Builder<SchemaTableName, KafkaTopicDescription> topicDescriptions = ImmutableMap.builder(); for (TpchTable<?> table : tables) { String tableName = table.getTableName(); SchemaTableName tpchTable = new SchemaTableName(TPCH_SCHEMA, tableName); topicDescriptions.put(loadTpchTopicDescription(topicDescriptionJsonCodec, tpchTable.toString(), tpchTable)); } return topicDescriptions.build(); }
public static void copyTpchTablesBucketed( QueryRunner queryRunner, String sourceCatalog, String sourceSchema, Session session, Iterable<TpchTable<?>> tables) { log.info("Loading data from %s.%s...", sourceCatalog, sourceSchema); long startTime = System.nanoTime(); for (TpchTable<?> table : tables) { copyTableBucketed(queryRunner, new QualifiedObjectName(sourceCatalog, sourceSchema, table.getTableName().toLowerCase(ENGLISH)), session); } log.info("Loading from %s.%s complete in %s", sourceCatalog, sourceSchema, nanosSince(startTime).toString(SECONDS)); }
private void testTableStats(String schema, TpchTable<?> table, Constraint<ColumnHandle> constraint, double expectedRowCount) { TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName())); TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, constraint); double actualRowCountValue = tableStatistics.getRowCount().getValue(); assertEquals(tableStatistics.getRowCount(), Estimate.of(actualRowCountValue)); assertEquals(actualRowCountValue, expectedRowCount, expectedRowCount * TOLERANCE); }
private void testNoTableStats(String schema, TpchTable<?> table) { TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName())); TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, alwaysTrue()); assertTrue(tableStatistics.getRowCount().isUnknown()); }
private void testColumnStats(String schema, TpchTable<?> table, TpchColumn<?> column, Constraint<ColumnHandle> constraint, ColumnStatistics expected) { TpchTableHandle tableHandle = tpchMetadata.getTableHandle(session, new SchemaTableName(schema, table.getTableName())); TableStatistics tableStatistics = tpchMetadata.getTableStatistics(session, tableHandle, constraint); ColumnHandle columnHandle = tpchMetadata.getColumnHandles(session, tableHandle).get(column.getSimplifiedColumnName()); ColumnStatistics actual = tableStatistics.getColumnStatistics().get(columnHandle); EstimateAssertion estimateAssertion = new EstimateAssertion(TOLERANCE); estimateAssertion.assertClose(actual.getDistinctValuesCount(), expected.getDistinctValuesCount(), "distinctValuesCount"); estimateAssertion.assertClose(actual.getDataSize(), expected.getDataSize(), "dataSize"); estimateAssertion.assertClose(actual.getNullsFraction(), expected.getNullsFraction(), "nullsFraction"); estimateAssertion.assertClose(actual.getRange(), expected.getRange(), "range"); }