private static TestingConnectorSession getSession(HiveClientConfig config) { return new TestingConnectorSession(new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); }
hiveClientConfig.isOrcBloomFiltersEnabled(), false), dataSizeSessionProperty( ORC_MAX_MERGE_DISTANCE, "ORC: Maximum size of gap between two reads to merge into a single read", hiveClientConfig.getOrcMaxMergeDistance(), false), dataSizeSessionProperty( ORC_MAX_BUFFER_SIZE, "ORC: Maximum size of a single read", hiveClientConfig.getOrcMaxBufferSize(), false), dataSizeSessionProperty( ORC_STREAM_BUFFER_SIZE, "ORC: Size of buffer for streaming reads", hiveClientConfig.getOrcStreamBufferSize(), false), dataSizeSessionProperty( ORC_TINY_STRIPE_THRESHOLD, "ORC: Threshold below which an ORC stripe or file will read in its entirety", hiveClientConfig.getOrcTinyStripeThreshold(), false), dataSizeSessionProperty( ORC_MAX_READ_BLOCK_SIZE, "ORC: Soft max size of Presto blocks produced by ORC reader", hiveClientConfig.isOrcLazyReadSmallRanges(), false), dataSizeSessionProperty(
this.maxOpenSortFiles = maxOpenSortFiles; this.immutablePartitions = immutablePartitions; this.insertExistingPartitionsBehavior = HiveSessionProperties.getInsertExistingPartitionsBehavior(session); if (immutablePartitions) { checkArgument(insertExistingPartitionsBehavior != InsertExistingPartitionsBehavior.APPEND, "insertExistingPartitionsBehavior cannot be APPEND"); this.sessionProperties = hiveSessionProperties.getSessionProperties().stream() .collect(toImmutableMap(PropertyMetadata::getName, entry -> session.getProperty(entry.getName(), entry.getJavaType()).toString()));
hiveStorageTimeZone, typeManager, getOrcMaxMergeDistance(session), getOrcMaxBufferSize(session), getOrcStreamBufferSize(session), getOrcTinyStripeThreshold(session), getOrcMaxReadBlockSize(session), getOrcLazyReadSmallRanges(session), false, stats));
if (!HiveSessionProperties.isOrcOptimizedWriterEnabled(session)) { return Optional.empty(); if (HiveSessionProperties.isOrcOptimizedWriterValidate(session)) { validationInputFactory = Optional.of(() -> { try { new OrcDataSourceId(path.toString()), fileSystem.getFileStatus(path).getLen(), getOrcMaxMergeDistance(session), getOrcMaxBufferSize(session), getOrcStreamBufferSize(session), false, fileSystem.open(path), compression, orcWriterOptions .withStripeMinSize(getOrcOptimizedWriterMinStripeSize(session)) .withStripeMaxSize(getOrcOptimizedWriterMaxStripeSize(session)) .withStripeMaxRowCount(getOrcOptimizedWriterMaxStripeRows(session)) .withDictionaryMaxMemory(getOrcOptimizedWriterMaxDictionaryMemory(session)) .withMaxStringStatisticsLimit(getOrcStringStatisticsLimit(session)), fileInputColumnIndexes, ImmutableMap.<String, String>builder() hiveStorageTimeZone, validationInputFactory, getOrcOptimizedWriterValidateMode(session), stats));
ImmutableSet.of(), procedures, hiveSessionProperties.getSessionProperties(), HiveSchemaProperties.SCHEMA_PROPERTIES, hiveTableProperties.getTableProperties(),
hiveStorageTimeZone, typeManager, getOrcMaxMergeDistance(session), getOrcMaxBufferSize(session), getOrcStreamBufferSize(session)));
private void verifyPartitionedBucketedTable(HiveStorageFormat storageFormat, String tableName) { TableMetadata tableMetadata = getTableMetadata(catalog, TPCH_SCHEMA, tableName); assertEquals(tableMetadata.getMetadata().getProperties().get(STORAGE_FORMAT_PROPERTY), storageFormat); assertEquals(tableMetadata.getMetadata().getProperties().get(PARTITIONED_BY_PROPERTY), ImmutableList.of("orderstatus")); assertEquals(tableMetadata.getMetadata().getProperties().get(BUCKETED_BY_PROPERTY), ImmutableList.of("custkey", "custkey2")); assertEquals(tableMetadata.getMetadata().getProperties().get(BUCKET_COUNT_PROPERTY), 11); List<?> partitions = getPartitions(tableName); assertEquals(partitions.size(), 3); assertQuery("SELECT * from " + tableName, "SELECT custkey, custkey, comment, orderstatus FROM orders"); for (int i = 1; i <= 30; i++) { assertQuery( format("SELECT * from " + tableName + " where custkey = %d and custkey2 = %d", i, i), format("SELECT custkey, custkey, comment, orderstatus FROM orders where custkey = %d", i)); } assertThatThrownBy(() -> assertUpdate("INSERT INTO " + tableName + " VALUES (1, 1, 'comment', 'O')", 1)) .hasMessage(getExpectedErrorMessageForInsertExistingBucketedTable( getInsertExistingPartitionsBehavior(getConnectorSession(getSession())), "orderstatus=O")); }
new TestingNodeManager("fake-environment"), new HiveEventClient(), new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig()), stats, getDefaultOrcFileWriterFactory(config));
partition.getColumnCoercions(), Optional.empty(), isForceLocalScheduling(session), s3SelectPushdownEnabled); lastResult = addSplitsToSource(targetSplits, splitFactory); partition.getColumnCoercions(), bucketConversionRequiresWorkerParticipation ? bucketConversion : Optional.empty(), isForceLocalScheduling(session), s3SelectPushdownEnabled);
hiveStorageTimeZone, typeManager, getOrcMaxMergeDistance(session), getOrcMaxBufferSize(session), getOrcStreamBufferSize(session), getOrcTinyStripeThreshold(session), getOrcMaxReadBlockSize(session), getOrcLazyReadSmallRanges(session), isOrcBloomFiltersEnabled(session), stats));
new ClassLoaderSafeConnectorPageSinkProvider(pageSinkProvider, classLoader), ImmutableSet.of(), hiveSessionProperties.getSessionProperties(), hiveTableProperties.getTableProperties(), accessControl,
hiveStorageTimeZone, typeManager, getOrcMaxMergeDistance(session), getOrcMaxBufferSize(session), getOrcStreamBufferSize(session)));
private void testInsertPartitionedBucketedTableFewRows(Session session, HiveStorageFormat storageFormat) { String tableName = "test_insert_partitioned_bucketed_table_few_rows"; assertUpdate(session, "" + "CREATE TABLE " + tableName + " (" + " bucket_key varchar," + " col varchar," + " partition_key varchar)" + "WITH (" + "format = '" + storageFormat + "', " + "partitioned_by = ARRAY[ 'partition_key' ], " + "bucketed_by = ARRAY[ 'bucket_key' ], " + "bucket_count = 11)"); assertUpdate( // make sure that we will get one file per bucket regardless of writer count configured getParallelWriteSession(), "INSERT INTO " + tableName + " " + "VALUES " + " (VARCHAR 'a', VARCHAR 'b', VARCHAR 'c'), " + " ('aa', 'bb', 'cc'), " + " ('aaa', 'bbb', 'ccc')", 3); verifyPartitionedBucketedTableAsFewRows(storageFormat, tableName); assertThatThrownBy(() -> assertUpdate(session, "INSERT INTO test_insert_partitioned_bucketed_table_few_rows VALUES ('a0', 'b0', 'c')", 1)) .hasMessage(getExpectedErrorMessageForInsertExistingBucketedTable( getInsertExistingPartitionsBehavior(getConnectorSession(session)), "partition_key=c")); assertUpdate(session, "DROP TABLE test_insert_partitioned_bucketed_table_few_rows"); assertFalse(getQueryRunner().tableExists(session, tableName)); }
new TestingNodeManager("fake-environment"), new HiveEventClient(), new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig()), new HiveWriterStats(), getDefaultOrcFileWriterFactory(config));
boolean forceLocalScheduling = HiveSessionProperties.isForceLocalScheduling(session);
protected ConnectorSession newSession() { return new TestingConnectorSession(new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); }
new ClassLoaderSafeConnectorHandleResolver(handleResolver, classLoader), ImmutableSet.of(), hiveSessionProperties.getSessionProperties(), hiveTableProperties.getTableProperties(), accessControl);
getInsertExistingPartitionsBehavior(getConnectorSession(session)), "partition_key=c"));
new TestingNodeManager("fake-environment"), new HiveEventClient(), new HiveSessionProperties(hiveClientConfig, new OrcFileWriterConfig(), new ParquetFileWriterConfig()), new HiveWriterStats(), getDefaultOrcFileWriterFactory(hiveClientConfig));