@Test(dataProvider = "rowCount") public void testParquetPageSourceSchemaEvolution(int rowCount) throws Exception { List<TestColumn> writeColumns = getTestColumnsSupportedByParquet(); // test index-based access List<TestColumn> readColumns = writeColumns.stream() .map(column -> new TestColumn( column.getName() + "_new", column.getObjectInspector(), column.getWriteValue(), column.getExpectedValue(), column.isPartitionKey())) .collect(toList()); assertThatFileFormat(PARQUET) .withWriteColumns(writeColumns) .withReadColumns(readColumns) .withSession(parquetPageSourceSession) .withRowsCount(rowCount) .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); // test name-based access readColumns = Lists.reverse(writeColumns); assertThatFileFormat(PARQUET) .withWriteColumns(writeColumns) .withReadColumns(readColumns) .withSession(parquetPageSourceSessionUseName) .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testRcBinaryOptimizedWriter(int rowCount) throws Exception { List<TestColumn> testColumns = TEST_COLUMNS.stream() // RCBinary interprets empty VARCHAR as nulls .filter(testColumn -> !testColumn.getName().equals("t_empty_varchar")) // t_map_null_key_* must be disabled because Presto can not produce maps with null keys so the writer will throw .filter(TestHiveFileFormats::withoutNullMapKeyTests) .collect(toList()); TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties(new HiveClientConfig().setRcfileOptimizedWriterEnabled(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); assertThatFileFormat(RCBINARY) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testDwrfOptimizedWriter(int rowCount) throws Exception { TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties( new HiveClientConfig() .setOrcOptimizedWriterEnabled(true) .setOrcWriterValidationPercentage(100.0), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); // DWRF does not support modern Hive types // A Presto page can not contain a map with null keys, so a page based writer can not write null keys List<TestColumn> testColumns = TEST_COLUMNS.stream() .filter(testColumn -> !hasType(testColumn.getObjectInspector(), PrimitiveCategory.DATE, PrimitiveCategory.VARCHAR, PrimitiveCategory.CHAR, PrimitiveCategory.DECIMAL)) .filter(testColumn -> !testColumn.getName().equals("t_map_null_key") && !testColumn.getName().equals("t_map_null_key_complex_value") && !testColumn.getName().equals("t_map_null_key_complex_key_value")) .collect(toList()); assertThatFileFormat(DWRF) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS, new OrcWriterOptions())) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new DwrfPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
.withWriteColumns(ImmutableList.of(writeColumn)) .withReadColumns(ImmutableList.of(readColumn)) .withSession(parquetPageSourceSession) .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS));
@Test(dataProvider = "rowCount") public void testParquetPageSourceSchemaEvolution(int rowCount) throws Exception { List<TestColumn> writeColumns = getTestColumnsSupportedByParquet(); // test index-based access List<TestColumn> readColumns = writeColumns.stream() .map(column -> new TestColumn( column.getName() + "_new", column.getObjectInspector(), column.getWriteValue(), column.getExpectedValue(), column.isPartitionKey())) .collect(toList()); assertThatFileFormat(PARQUET) .withWriteColumns(writeColumns) .withReadColumns(readColumns) .withSession(parquetPageSourceSession) .withRowsCount(rowCount) .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); // test name-based access readColumns = Lists.reverse(writeColumns); assertThatFileFormat(PARQUET) .withWriteColumns(writeColumns) .withReadColumns(readColumns) .withSession(parquetPageSourceSessionUseName) .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
.withSession(parquetPageSourceSession) .isFailingForPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage);
@Test(dataProvider = "rowCount") public void testDwrfOptimizedWriter(int rowCount) throws Exception { TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties( new HiveClientConfig() .setOrcOptimizedWriterEnabled(true) .setOrcWriterValidationPercentage(100.0), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); // DWRF does not support modern Hive types // A Presto page can not contain a map with null keys, so a page based writer can not write null keys List<TestColumn> testColumns = TEST_COLUMNS.stream() .filter(testColumn -> !hasType(testColumn.getObjectInspector(), PrimitiveCategory.DATE, PrimitiveCategory.VARCHAR, PrimitiveCategory.CHAR, PrimitiveCategory.DECIMAL)) .filter(testColumn -> !testColumn.getName().equals("t_map_null_key") && !testColumn.getName().equals("t_map_null_key_complex_value") && !testColumn.getName().equals("t_map_null_key_complex_key_value")) .collect(toList()); assertThatFileFormat(DWRF) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS, new OrcWriterOptions())) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new DwrfPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testOrcOptimizedWriter(int rowCount) throws Exception { TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties( new HiveClientConfig() .setOrcOptimizedWriterEnabled(true) .setOrcWriterValidationPercentage(100.0), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); // A Presto page can not contain a map with null keys, so a page based writer can not write null keys List<TestColumn> testColumns = TEST_COLUMNS.stream() .filter(testColumn -> !testColumn.getName().equals("t_map_null_key") && !testColumn.getName().equals("t_map_null_key_complex_value") && !testColumn.getName().equals("t_map_null_key_complex_key_value")) .collect(toList()); assertThatFileFormat(ORC) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS, new OrcWriterOptions())) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testRcTextOptimizedWriter(int rowCount) throws Exception { List<TestColumn> testColumns = TEST_COLUMNS.stream() // t_map_null_key_* must be disabled because Presto can not produce maps with null keys so the writer will throw .filter(TestHiveFileFormats::withoutNullMapKeyTests) .collect(toImmutableList()); TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties(new HiveClientConfig().setRcfileOptimizedWriterEnabled(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); assertThatFileFormat(RCTEXT) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testRcBinaryOptimizedWriter(int rowCount) throws Exception { List<TestColumn> testColumns = TEST_COLUMNS.stream() // RCBinary interprets empty VARCHAR as nulls .filter(testColumn -> !testColumn.getName().equals("t_empty_varchar")) // t_map_null_key_* must be disabled because Presto can not produce maps with null keys so the writer will throw .filter(TestHiveFileFormats::withoutNullMapKeyTests) .collect(toList()); TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties(new HiveClientConfig().setRcfileOptimizedWriterEnabled(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); assertThatFileFormat(RCBINARY) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testParquetPageSource(int rowCount) throws Exception { List<TestColumn> testColumns = getTestColumnsSupportedByParquet(); assertThatFileFormat(PARQUET) .withColumns(testColumns) .withSession(parquetPageSourceSession) .withRowsCount(rowCount) .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testOrcUseColumnNames(int rowCount) throws Exception { TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); assertThatFileFormat(ORC) .withWriteColumns(TEST_COLUMNS) .withRowsCount(rowCount) .withReadColumns(Lists.reverse(TEST_COLUMNS)) .withSession(session) .isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, true, HDFS_ENVIRONMENT, STATS)); }
.withWriteColumns(ImmutableList.of(writeColumn)) .withReadColumns(ImmutableList.of(readColumn)) .withSession(parquetPageSourceSession) .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS));
.withSession(parquetPageSourceSession) .isFailingForPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS), expectedErrorCode, expectedMessage);
@Test(dataProvider = "rowCount") public void testOrcOptimizedWriter(int rowCount) throws Exception { TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties( new HiveClientConfig() .setOrcOptimizedWriterEnabled(true) .setOrcWriterValidationPercentage(100.0), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); // A Presto page can not contain a map with null keys, so a page based writer can not write null keys List<TestColumn> testColumns = TEST_COLUMNS.stream() .filter(testColumn -> !testColumn.getName().equals("t_map_null_key") && !testColumn.getName().equals("t_map_null_key_complex_value") && !testColumn.getName().equals("t_map_null_key_complex_key_value")) .collect(toList()); assertThatFileFormat(ORC) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new OrcFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS, new OrcWriterOptions())) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testRcTextOptimizedWriter(int rowCount) throws Exception { List<TestColumn> testColumns = TEST_COLUMNS.stream() // t_map_null_key_* must be disabled because Presto can not produce maps with null keys so the writer will throw .filter(TestHiveFileFormats::withoutNullMapKeyTests) .collect(toImmutableList()); TestingConnectorSession session = new TestingConnectorSession( new HiveSessionProperties(new HiveClientConfig().setRcfileOptimizedWriterEnabled(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); assertThatFileFormat(RCTEXT) .withColumns(testColumns) .withRowsCount(rowCount) .withSession(session) .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testOrcUseColumnNames(int rowCount) throws Exception { TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); assertThatFileFormat(ORC) .withWriteColumns(TEST_COLUMNS) .withRowsCount(rowCount) .withReadColumns(Lists.reverse(TEST_COLUMNS)) .withSession(session) .isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, true, HDFS_ENVIRONMENT, STATS)); }
@Test(dataProvider = "rowCount") public void testParquetPageSource(int rowCount) throws Exception { List<TestColumn> testColumns = getTestColumnsSupportedByParquet(); assertThatFileFormat(PARQUET) .withColumns(testColumns) .withSession(parquetPageSourceSession) .withRowsCount(rowCount) .isReadableByPageSource(new ParquetPageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); }