public static Set<HiveFileWriterFactory> getDefaultHiveFileWriterFactories(HiveClientConfig hiveClientConfig) { HdfsEnvironment testHdfsEnvironment = createTestHdfsEnvironment(hiveClientConfig); return ImmutableSet.<HiveFileWriterFactory>builder() .add(new RcFileFileWriterFactory(testHdfsEnvironment, TYPE_MANAGER, new NodeVersion("test_version"), hiveClientConfig, new FileFormatDataSourceStats())) .add(getDefaultOrcFileWriterFactory(hiveClientConfig)) .build(); }
MBeanServer platformMBeanServer = ManagementFactory.getPlatformMBeanServer(); binder.bind(MBeanServer.class).toInstance(new RebindSafeMBeanServer(platformMBeanServer)); binder.bind(NodeVersion.class).toInstance(new NodeVersion(context.getNodeManager().getCurrentNode().getVersion())); binder.bind(NodeManager.class).toInstance(context.getNodeManager()); binder.bind(TypeManager.class).toInstance(context.getTypeManager());
public static OrcFileWriterFactory getDefaultOrcFileWriterFactory(HiveClientConfig hiveClientConfig) { HdfsEnvironment testHdfsEnvironment = createTestHdfsEnvironment(hiveClientConfig); return new OrcFileWriterFactory( testHdfsEnvironment, TYPE_MANAGER, new NodeVersion("test_version"), hiveClientConfig, new FileFormatDataSourceStats(), new OrcFileWriterConfig()); }
@Test(dataProvider = "rowCount") public void testRcBinaryOptimizedWriter(int rowCount) throws Exception { List<TestColumn> testColumns = TEST_COLUMNS.stream() // RCBinary interprets empty VARCHAR as nulls .filter(testColumn -> !testColumn.getName().equals("t_empty_varchar")) // t_map_null_key_* must be disabled because Presto can not produce maps with null keys so the writer will throw .filter(TestHiveFileFormats::withoutNullMapKeyTests) .collect(toList()); TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties(new HiveClientConfig().setRcfileOptimizedWriterEnabled(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); assertThatFileFormat(RCBINARY) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testOrcOptimizedWriter(int rowCount) throws Exception { TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties( new HiveClientConfig() .setOrcOptimizedWriterEnabled(true) .setOrcWriterValidationPercentage(100.0), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); // A Presto page can not contain a map with null keys, so a page based writer can not write null keys List<TestColumn> testColumns = TEST_COLUMNS.stream() .filter(testColumn -> !testColumn.getName().equals("t_map_null_key") && !testColumn.getName().equals("t_map_null_key_complex_value") && !testColumn.getName().equals("t_map_null_key_complex_key_value")) .collect(toList()); assertThatFileFormat(ORC) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS, new OrcWriterOptions())) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testDwrfOptimizedWriter(int rowCount) throws Exception { TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties( new HiveClientConfig() .setOrcOptimizedWriterEnabled(true) .setOrcWriterValidationPercentage(100.0), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); // DWRF does not support modern Hive types // A Presto page can not contain a map with null keys, so a page based writer can not write null keys List<TestColumn> testColumns = TEST_COLUMNS.stream() .filter(testColumn -> !hasType(testColumn.getObjectInspector(), PrimitiveCategory.DATE, PrimitiveCategory.VARCHAR, PrimitiveCategory.CHAR, PrimitiveCategory.DECIMAL)) .filter(testColumn -> !testColumn.getName().equals("t_map_null_key") && !testColumn.getName().equals("t_map_null_key_complex_value") && !testColumn.getName().equals("t_map_null_key_complex_key_value")) .collect(toList()); assertThatFileFormat(DWRF) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS, new OrcWriterOptions())) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new DwrfPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testRcTextOptimizedWriter(int rowCount) throws Exception { List<TestColumn> testColumns = TEST_COLUMNS.stream() // t_map_null_key_* must be disabled because Presto can not produce maps with null keys so the writer will throw .filter(TestHiveFileFormats::withoutNullMapKeyTests) .collect(toImmutableList()); TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties(new HiveClientConfig().setRcfileOptimizedWriterEnabled(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); assertThatFileFormat(RCTEXT) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
partitionUpdateCodec, new HiveTypeTranslator(), new NodeVersion("test_version")); transactionManager = new HiveTransactionManager(); splitManager = new HiveSplitManager(
public static Set<HiveFileWriterFactory> getDefaultHiveFileWriterFactories(HiveClientConfig hiveClientConfig) { HdfsEnvironment testHdfsEnvironment = createTestHdfsEnvironment(hiveClientConfig); return ImmutableSet.<HiveFileWriterFactory>builder() .add(new RcFileFileWriterFactory(testHdfsEnvironment, TYPE_MANAGER, new NodeVersion("test_version"), hiveClientConfig, new FileFormatDataSourceStats())) .add(getDefaultOrcFileWriterFactory(hiveClientConfig)) .build(); }
public static OrcFileWriterFactory getDefaultOrcFileWriterFactory(HiveClientConfig hiveClientConfig) { HdfsEnvironment testHdfsEnvironment = createTestHdfsEnvironment(hiveClientConfig); return new OrcFileWriterFactory( testHdfsEnvironment, TYPE_MANAGER, new NodeVersion("test_version"), hiveClientConfig, new FileFormatDataSourceStats(), new OrcFileWriterConfig()); }
@Test(dataProvider = "rowCount") public void testRcBinaryOptimizedWriter(int rowCount) throws Exception { List<TestColumn> testColumns = TEST_COLUMNS.stream() // RCBinary interprets empty VARCHAR as nulls .filter(testColumn -> !testColumn.getName().equals("t_empty_varchar")) // t_map_null_key_* must be disabled because Presto can not produce maps with null keys so the writer will throw .filter(TestHiveFileFormats::withoutNullMapKeyTests) .collect(toList()); TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties(new HiveClientConfig().setRcfileOptimizedWriterEnabled(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); assertThatFileFormat(RCBINARY) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testRcTextOptimizedWriter(int rowCount) throws Exception { List<TestColumn> testColumns = TEST_COLUMNS.stream() // t_map_null_key_* must be disabled because Presto can not produce maps with null keys so the writer will throw .filter(TestHiveFileFormats::withoutNullMapKeyTests) .collect(toImmutableList()); TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties(new HiveClientConfig().setRcfileOptimizedWriterEnabled(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); assertThatFileFormat(RCTEXT) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testOrcOptimizedWriter(int rowCount) throws Exception { TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties( new HiveClientConfig() .setOrcOptimizedWriterEnabled(true) .setOrcWriterValidationPercentage(100.0), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); // A Presto page can not contain a map with null keys, so a page based writer can not write null keys List<TestColumn> testColumns = TEST_COLUMNS.stream() .filter(testColumn -> !testColumn.getName().equals("t_map_null_key") && !testColumn.getName().equals("t_map_null_key_complex_value") && !testColumn.getName().equals("t_map_null_key_complex_key_value")) .collect(toList()); assertThatFileFormat(ORC) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS, new OrcWriterOptions())) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testDwrfOptimizedWriter(int rowCount) throws Exception { TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties( new HiveClientConfig() .setOrcOptimizedWriterEnabled(true) .setOrcWriterValidationPercentage(100.0), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); // DWRF does not support modern Hive types // A Presto page can not contain a map with null keys, so a page based writer can not write null keys List<TestColumn> testColumns = TEST_COLUMNS.stream() .filter(testColumn -> !hasType(testColumn.getObjectInspector(), PrimitiveCategory.DATE, PrimitiveCategory.VARCHAR, PrimitiveCategory.CHAR, PrimitiveCategory.DECIMAL)) .filter(testColumn -> !testColumn.getName().equals("t_map_null_key") && !testColumn.getName().equals("t_map_null_key_complex_value") && !testColumn.getName().equals("t_map_null_key_complex_key_value")) .collect(toList()); assertThatFileFormat(DWRF) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS, new OrcWriterOptions())) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new DwrfPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
partitionUpdateCodec, new HiveTypeTranslator(), new NodeVersion("test_version")); transactionManager = new HiveTransactionManager(); splitManager = new HiveSplitManager(