private static ConnectorPageSource createPageSource(HiveTransactionHandle transaction, HiveClientConfig config, File outputFile) { Properties splitProperties = new Properties(); splitProperties.setProperty(FILE_INPUT_FORMAT, config.getHiveStorageFormat().getInputFormat()); splitProperties.setProperty(SERIALIZATION_LIB, config.getHiveStorageFormat().getSerDe()); splitProperties.setProperty("columns", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getName).collect(toList()))); splitProperties.setProperty("columns.types", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getHiveType).map(hiveType -> hiveType.getHiveTypeName().toString()).collect(toList()))); HiveSplit split = new HiveSplit( SCHEMA_NAME, TABLE_NAME, "", "file:///" + outputFile.getAbsolutePath(), 0, outputFile.length(), outputFile.length(), splitProperties, ImmutableList.of(), ImmutableList.of(), OptionalInt.empty(), false, TupleDomain.all(), ImmutableMap.of(), Optional.empty(), false); HivePageSourceProvider provider = new HivePageSourceProvider(config, createTestHdfsEnvironment(config), getDefaultHiveRecordCursorProvider(config), getDefaultHiveDataStreamFactories(config), TYPE_MANAGER); return provider.createPageSource(transaction, getSession(config), split, ImmutableList.copyOf(getColumnHandles())); }
checkPageSource(pageSource.get(), testColumns, getTypes(columnHandles), rowCount);
public static Set<HiveFileWriterFactory> getDefaultHiveFileWriterFactories(HiveClientConfig hiveClientConfig) { HdfsEnvironment testHdfsEnvironment = createTestHdfsEnvironment(hiveClientConfig); return ImmutableSet.<HiveFileWriterFactory>builder() .add(new RcFileFileWriterFactory(testHdfsEnvironment, TYPE_MANAGER, new NodeVersion("test_version"), hiveClientConfig, new FileFormatDataSourceStats())) .add(getDefaultOrcFileWriterFactory(hiveClientConfig)) .build(); }
@Test public void testDecimalColumns() { TypeManager typeManager = new TypeRegistry(); IonSqlQueryBuilder queryBuilder = new IonSqlQueryBuilder(typeManager); List<HiveColumnHandle> columns = ImmutableList.of( new HiveColumnHandle("quantity", HiveType.valueOf("decimal(20,0)"), parseTypeSignature(DECIMAL), 0, REGULAR, Optional.empty()), new HiveColumnHandle("extendedprice", HiveType.valueOf("decimal(20,2)"), parseTypeSignature(DECIMAL), 1, REGULAR, Optional.empty()), new HiveColumnHandle("discount", HiveType.valueOf("decimal(10,2)"), parseTypeSignature(DECIMAL), 2, REGULAR, Optional.empty())); DecimalType decimalType = DecimalType.createDecimalType(10, 2); TupleDomain<HiveColumnHandle> tupleDomain = withColumnDomains( ImmutableMap.of( columns.get(0), Domain.create(ofRanges(Range.lessThan(DecimalType.createDecimalType(20, 0), longDecimal("50"))), false), columns.get(1), Domain.create(ofRanges(Range.equal(HiveType.valueOf("decimal(20,2)").getType(typeManager), longDecimal("0.05"))), false), columns.get(2), Domain.create(ofRanges(Range.range(decimalType, shortDecimal("0.0"), true, shortDecimal("0.02"), true)), false))); assertEquals("SELECT s._1, s._2, s._3 FROM S3Object s WHERE ((case s._1 when '' then null else CAST(s._1 AS DECIMAL(20,0)) end < 50)) AND " + "(case s._2 when '' then null else CAST(s._2 AS DECIMAL(20,2)) end = 0.05) AND ((case s._3 when '' then null else CAST(s._3 AS DECIMAL(10,2)) " + "end >= 0.00 AND case s._3 when '' then null else CAST(s._3 AS DECIMAL(10,2)) end <= 0.02))", queryBuilder.buildSql(columns, tupleDomain)); }
config.getRecursiveDirWalkerEnabled()); pageSinkProvider = new HivePageSinkProvider( getDefaultHiveFileWriterFactories(config), hdfsEnvironment, PAGE_SORTER, new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig()), new HiveWriterStats(), getDefaultOrcFileWriterFactory(config)); pageSourceProvider = new HivePageSourceProvider(config, hdfsEnvironment, getDefaultHiveRecordCursorProvider(config), getDefaultHiveDataStreamFactories(config), TYPE_MANAGER);
private static ConnectorPageSource createPageSource(HiveTransactionHandle transaction, HiveClientConfig config, File outputFile) { Properties splitProperties = new Properties(); splitProperties.setProperty(FILE_INPUT_FORMAT, config.getHiveStorageFormat().getInputFormat()); splitProperties.setProperty(SERIALIZATION_LIB, config.getHiveStorageFormat().getSerDe()); splitProperties.setProperty("columns", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getName).collect(toList()))); splitProperties.setProperty("columns.types", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getHiveType).map(HiveType::getHiveTypeName).collect(toList()))); HiveSplit split = new HiveSplit(CLIENT_ID, SCHEMA_NAME, TABLE_NAME, "", "file:///" + outputFile.getAbsolutePath(), 0, outputFile.length(), splitProperties, ImmutableList.of(), ImmutableList.of(), false, TupleDomain.all()); HivePageSourceProvider provider = new HivePageSourceProvider(config, createHdfsEnvironment(config), getDefaultHiveRecordCursorProvider(config), getDefaultHiveDataStreamFactories(config), TYPE_MANAGER); return provider.createPageSource(transaction, getSession(config), split, ImmutableList.copyOf(getColumnHandles())); }
ImmutableMap.of()); JsonCodec<PartitionUpdate> partitionUpdateCodec = JsonCodec.jsonCodec(PartitionUpdate.class); HdfsEnvironment hdfsEnvironment = createTestHdfsEnvironment(config); HivePageSinkProvider provider = new HivePageSinkProvider( getDefaultHiveFileWriterFactories(config), hdfsEnvironment, PAGE_SORTER, new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig()), stats, getDefaultOrcFileWriterFactory(config)); return provider.createPageSink(transaction, getSession(config), handle);
public static Set<HiveRecordCursorProvider> getDefaultHiveRecordCursorProvider(HiveClientConfig hiveClientConfig) { HdfsEnvironment testHdfsEnvironment = createTestHdfsEnvironment(hiveClientConfig); return ImmutableSet.<HiveRecordCursorProvider>builder() .add(new GenericHiveRecordCursorProvider(testHdfsEnvironment)) .build(); }
@Test public void testReuse() { BytesWritable value = new BytesWritable(); byte[] first = "hello world".getBytes(UTF_8); value.set(first, 0, first.length); byte[] second = "bye".getBytes(UTF_8); value.set(second, 0, second.length); Type type = new TypeToken<Map<BytesWritable, Long>>() {}.getType(); ObjectInspector inspector = getInspector(type); Block actual = getBlockObject(mapType(createUnboundedVarcharType(), BIGINT), ImmutableMap.of(value, 0L), inspector); Block expected = mapBlockOf(createUnboundedVarcharType(), BIGINT, "bye", 0L); assertBlockEquals(actual, expected); }
private static RowType toRowType(List<ColumnMetadata> columns) { return rowType(columns.stream() .map(col -> new NamedTypeSignature(Optional.of(new RowFieldName(format("f_%s", col.getName()), false)), col.getType().getTypeSignature())) .collect(toList())); }
false); pageSinkProvider = new HivePageSinkProvider( getDefaultHiveFileWriterFactories(hiveClientConfig), hdfsEnvironment, PAGE_SORTER, new HiveSessionProperties(hiveClientConfig, new OrcFileWriterConfig(), new ParquetFileWriterConfig()), new HiveWriterStats(), getDefaultOrcFileWriterFactory(hiveClientConfig)); pageSourceProvider = new HivePageSourceProvider(hiveClientConfig, hdfsEnvironment, getDefaultHiveRecordCursorProvider(hiveClientConfig), getDefaultHiveDataStreamFactories(hiveClientConfig), TYPE_MANAGER);
executor); pageSinkProvider = new HivePageSinkProvider(hdfsEnvironment, metastoreClient, new GroupByHashPageIndexerFactory(), typeManager, new HiveClientConfig(), locationService, partitionUpdateCodec); pageSourceProvider = new HivePageSourceProvider(hiveClientConfig, hdfsEnvironment, getDefaultHiveRecordCursorProvider(hiveClientConfig), getDefaultHiveDataStreamFactories(hiveClientConfig), TYPE_MANAGER);
ImmutableMap.of()); JsonCodec<PartitionUpdate> partitionUpdateCodec = JsonCodec.jsonCodec(PartitionUpdate.class); HdfsEnvironment hdfsEnvironment = createTestHdfsEnvironment(config); HivePageSinkProvider provider = new HivePageSinkProvider( getDefaultHiveFileWriterFactories(config), hdfsEnvironment, PAGE_SORTER, new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig()), stats, getDefaultOrcFileWriterFactory(config)); return provider.createPageSink(transaction, getSession(config), handle);
@Test public void testIsS3FileSystem() { HdfsEnvironment hdfsEnvironment = createTestHdfsEnvironment(new HiveClientConfig()); assertTrue(isS3FileSystem(CONTEXT, hdfsEnvironment, new Path("s3://test-bucket/test-folder"))); assertFalse(isS3FileSystem(CONTEXT, hdfsEnvironment, new Path("/test-dir/test-folder"))); }
@Test public void testMapBlock() { MapHolder holder = new MapHolder(); holder.map = new TreeMap<>(); holder.map.put("twelve", new InnerStruct(13, 14L)); holder.map.put("fifteen", new InnerStruct(16, 17L)); RowType rowType = RowType.anonymous(ImmutableList.of(INTEGER, BIGINT)); RowType rowOfMapOfVarcharRowType = RowType.anonymous(ImmutableList.of(mapType(VARCHAR, rowType))); Block actual = toBinaryBlock(rowOfMapOfVarcharRowType, holder, getInspector(MapHolder.class)); Block mapBlock = mapBlockOf( VARCHAR, rowType, new Object[] {utf8Slice("fifteen"), utf8Slice("twelve")}, new Object[] {rowBlockOf(rowType.getTypeParameters(), 16, 17L), rowBlockOf(rowType.getTypeParameters(), 13, 14L)}); Block expected = rowBlockOf(ImmutableList.of(mapType(VARCHAR, rowType)), mapBlock); assertBlockEquals(actual, expected); }
@Test public void testDecimalColumns() { TypeManager typeManager = new TypeRegistry(); IonSqlQueryBuilder queryBuilder = new IonSqlQueryBuilder(typeManager); List<HiveColumnHandle> columns = ImmutableList.of( new HiveColumnHandle("quantity", HiveType.valueOf("decimal(20,0)"), parseTypeSignature(DECIMAL), 0, REGULAR, Optional.empty()), new HiveColumnHandle("extendedprice", HiveType.valueOf("decimal(20,2)"), parseTypeSignature(DECIMAL), 1, REGULAR, Optional.empty()), new HiveColumnHandle("discount", HiveType.valueOf("decimal(10,2)"), parseTypeSignature(DECIMAL), 2, REGULAR, Optional.empty())); DecimalType decimalType = DecimalType.createDecimalType(10, 2); TupleDomain<HiveColumnHandle> tupleDomain = withColumnDomains( ImmutableMap.of( columns.get(0), Domain.create(ofRanges(Range.lessThan(DecimalType.createDecimalType(20, 0), longDecimal("50"))), false), columns.get(1), Domain.create(ofRanges(Range.equal(HiveType.valueOf("decimal(20,2)").getType(typeManager), longDecimal("0.05"))), false), columns.get(2), Domain.create(ofRanges(Range.range(decimalType, shortDecimal("0.0"), true, shortDecimal("0.02"), true)), false))); assertEquals("SELECT s._1, s._2, s._3 FROM S3Object s WHERE ((case s._1 when '' then null else CAST(s._1 AS DECIMAL(20,0)) end < 50)) AND " + "(case s._2 when '' then null else CAST(s._2 AS DECIMAL(20,2)) end = 0.05) AND ((case s._3 when '' then null else CAST(s._3 AS DECIMAL(10,2)) " + "end >= 0.00 AND case s._3 when '' then null else CAST(s._3 AS DECIMAL(10,2)) end <= 0.02))", queryBuilder.buildSql(columns, tupleDomain)); }
private static RowType toRowType(List<ColumnMetadata> columns) { return rowType(columns.stream() .map(col -> new NamedTypeSignature(Optional.of(new RowFieldName(format("f_%s", col.getName()), false)), col.getType().getTypeSignature())) .collect(toList())); }
config.getRecursiveDirWalkerEnabled()); pageSinkProvider = new HivePageSinkProvider( getDefaultHiveFileWriterFactories(config), hdfsEnvironment, PAGE_SORTER, new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig()), new HiveWriterStats(), getDefaultOrcFileWriterFactory(config)); pageSourceProvider = new HivePageSourceProvider(config, hdfsEnvironment, getDefaultHiveRecordCursorProvider(config), getDefaultHiveDataStreamFactories(config), TYPE_MANAGER);
private MaterializedResult readTable( Transaction transaction, ConnectorTableHandle tableHandle, List<ColumnHandle> columnHandles, ConnectorSession session, TupleDomain<ColumnHandle> tupleDomain, OptionalInt expectedSplitCount, Optional<HiveStorageFormat> expectedStorageFormat) throws Exception { List<ConnectorTableLayoutResult> tableLayoutResults = transaction.getMetadata().getTableLayouts( session, tableHandle, new Constraint<>(tupleDomain), Optional.empty()); ConnectorTableLayoutHandle layoutHandle = getOnlyElement(tableLayoutResults).getTableLayout().getHandle(); List<ConnectorSplit> splits = getAllSplits(splitManager.getSplits(transaction.getTransactionHandle(), session, layoutHandle, UNGROUPED_SCHEDULING)); if (expectedSplitCount.isPresent()) { assertEquals(splits.size(), expectedSplitCount.getAsInt()); } ImmutableList.Builder<MaterializedRow> allRows = ImmutableList.builder(); for (ConnectorSplit split : splits) { try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, columnHandles)) { expectedStorageFormat.ifPresent(format -> assertPageSourceType(pageSource, format)); MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles)); allRows.addAll(result.getMaterializedRows()); } } return new MaterializedResult(allRows.build(), getTypes(columnHandles)); }
private static ConnectorPageSource createPageSource(HiveTransactionHandle transaction, HiveClientConfig config, File outputFile) { Properties splitProperties = new Properties(); splitProperties.setProperty(FILE_INPUT_FORMAT, config.getHiveStorageFormat().getInputFormat()); splitProperties.setProperty(SERIALIZATION_LIB, config.getHiveStorageFormat().getSerDe()); splitProperties.setProperty("columns", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getName).collect(toList()))); splitProperties.setProperty("columns.types", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getHiveType).map(hiveType -> hiveType.getHiveTypeName().toString()).collect(toList()))); HiveSplit split = new HiveSplit( SCHEMA_NAME, TABLE_NAME, "", "file:///" + outputFile.getAbsolutePath(), 0, outputFile.length(), outputFile.length(), splitProperties, ImmutableList.of(), ImmutableList.of(), OptionalInt.empty(), false, TupleDomain.all(), ImmutableMap.of(), Optional.empty(), false); HivePageSourceProvider provider = new HivePageSourceProvider(config, createTestHdfsEnvironment(config), getDefaultHiveRecordCursorProvider(config), getDefaultHiveDataStreamFactories(config), TYPE_MANAGER); return provider.createPageSource(transaction, getSession(config), split, ImmutableList.copyOf(getColumnHandles())); }