public static StorageFormat fromHiveStorageFormat(HiveStorageFormat hiveStorageFormat) { return new StorageFormat(hiveStorageFormat.getSerDe(), hiveStorageFormat.getInputFormat(), hiveStorageFormat.getOutputFormat()); }
private static HiveStorageFormat extractHiveStorageFormat(Table table) { StorageFormat storageFormat = table.getStorage().getStorageFormat(); String outputFormat = storageFormat.getOutputFormat(); String serde = storageFormat.getSerDe(); for (HiveStorageFormat format : HiveStorageFormat.values()) { if (format.getOutputFormat().equals(outputFormat) && format.getSerDe().equals(serde)) { return format; } } throw new PrestoException(HIVE_UNSUPPORTED_FORMAT, format("Output format %s with SerDe %s is not supported", outputFormat, serde)); }
private static Properties createSchema(HiveStorageFormat format, List<String> columnNames, List<Type> columnTypes) { Properties schema = new Properties(); TypeTranslator typeTranslator = new HiveTypeTranslator(); schema.setProperty(SERIALIZATION_LIB, format.getSerDe()); schema.setProperty(FILE_INPUT_FORMAT, format.getInputFormat()); schema.setProperty(META_TABLE_COLUMNS, columnNames.stream() .collect(joining(","))); schema.setProperty(META_TABLE_COLUMN_TYPES, columnTypes.stream() .map(type -> toHiveType(typeTranslator, type)) .map(HiveType::getHiveTypeName) .map(HiveTypeName::toString) .collect(joining(":"))); return schema; }
public static boolean isAvroTableWithSchemaSet(org.apache.hadoop.hive.metastore.api.Table table) { if (table.getParameters() == null) { return false; } StorageDescriptor storageDescriptor = table.getSd(); if (storageDescriptor == null) { throw new PrestoException(HIVE_INVALID_METADATA, "Table does not contain a storage descriptor: " + table); } SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo(); if (serdeInfo == null) { throw new PrestoException(HIVE_INVALID_METADATA, "Table storage descriptor is missing SerDe info"); } return serdeInfo.getSerializationLib() != null && table.getParameters().get(AVRO_SCHEMA_URL_KEY) != null && serdeInfo.getSerializationLib().equals(AVRO.getSerDe()); }
private static ConnectorPageSource createPageSource(HiveTransactionHandle transaction, HiveClientConfig config, File outputFile) { Properties splitProperties = new Properties(); splitProperties.setProperty(FILE_INPUT_FORMAT, config.getHiveStorageFormat().getInputFormat()); splitProperties.setProperty(SERIALIZATION_LIB, config.getHiveStorageFormat().getSerDe()); splitProperties.setProperty("columns", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getName).collect(toList()))); splitProperties.setProperty("columns.types", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getHiveType).map(hiveType -> hiveType.getHiveTypeName().toString()).collect(toList()))); HiveSplit split = new HiveSplit( SCHEMA_NAME, TABLE_NAME, "", "file:///" + outputFile.getAbsolutePath(), 0, outputFile.length(), outputFile.length(), splitProperties, ImmutableList.of(), ImmutableList.of(), OptionalInt.empty(), false, TupleDomain.all(), ImmutableMap.of(), Optional.empty(), false); HivePageSourceProvider provider = new HivePageSourceProvider(config, createTestHdfsEnvironment(config), getDefaultHiveRecordCursorProvider(config), getDefaultHiveDataStreamFactories(config), TYPE_MANAGER); return provider.createPageSource(transaction, getSession(config), split, ImmutableList.copyOf(getColumnHandles())); }
splitProperties.setProperty(SERIALIZATION_LIB, storageFormat.getSerDe()); splitProperties.setProperty("columns", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getName))); splitProperties.setProperty("columns.types", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getType)));
splitProperties.setProperty(SERIALIZATION_LIB, storageFormat.getSerDe()); splitProperties.setProperty("columns", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getName))); splitProperties.setProperty("columns.types", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getType)));
@SuppressWarnings("deprecation") SerDe serDe = newInstance(storageFormat.getSerDe(), SerDe.class);
.setStorageFormat(StorageFormat.create(hiveStorageFormat.getSerDe(), hiveStorageFormat.getInputFormat(), hiveStorageFormat.getOutputFormat())) .setBucketProperty(bucketProperty) .setSerdeParameters(ImmutableMap.of());
private static Properties createSchema(HiveStorageFormat format, List<String> columnNames, List<Type> columnTypes) { Properties schema = new Properties(); schema.setProperty(SERIALIZATION_LIB, format.getSerDe()); schema.setProperty(FILE_INPUT_FORMAT, format.getInputFormat()); schema.setProperty(META_TABLE_COLUMNS, columnNames.stream() .collect(joining(","))); schema.setProperty(META_TABLE_COLUMN_TYPES, columnTypes.stream() .map(HiveType::toHiveType) .map(HiveType::getHiveTypeName) .collect(joining(":"))); return schema; } }
private static Properties createSchema(HiveStorageFormat format, List<String> columnNames, List<Type> columnTypes) { Properties schema = new Properties(); TypeTranslator typeTranslator = new HiveTypeTranslator(); schema.setProperty(SERIALIZATION_LIB, format.getSerDe()); schema.setProperty(FILE_INPUT_FORMAT, format.getInputFormat()); schema.setProperty(META_TABLE_COLUMNS, columnNames.stream() .collect(joining(","))); schema.setProperty(META_TABLE_COLUMN_TYPES, columnTypes.stream() .map(type -> toHiveType(typeTranslator, type)) .map(HiveType::getHiveTypeName) .map(HiveTypeName::toString) .collect(joining(":"))); return schema; }
private static HiveStorageFormat extractHiveStorageFormat(Table table) { StorageDescriptor descriptor = table.getSd(); if (descriptor == null) { throw new PrestoException(HIVE_INVALID_METADATA, "Table is missing storage descriptor"); } SerDeInfo serdeInfo = descriptor.getSerdeInfo(); if (serdeInfo == null) { throw new PrestoException(HIVE_INVALID_METADATA, "Table storage descriptor is missing SerDe info"); } String outputFormat = descriptor.getOutputFormat(); String serializationLib = serdeInfo.getSerializationLib(); for (HiveStorageFormat format : HiveStorageFormat.values()) { if (format.getOutputFormat().equals(outputFormat) && format.getSerDe().equals(serializationLib)) { return format; } } throw new PrestoException(HIVE_UNSUPPORTED_FORMAT, format("Output format %s with SerDe %s is not supported", outputFormat, serializationLib)); }
private static ConnectorPageSource createPageSource(HiveTransactionHandle transaction, HiveClientConfig config, File outputFile) { Properties splitProperties = new Properties(); splitProperties.setProperty(FILE_INPUT_FORMAT, config.getHiveStorageFormat().getInputFormat()); splitProperties.setProperty(SERIALIZATION_LIB, config.getHiveStorageFormat().getSerDe()); splitProperties.setProperty("columns", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getName).collect(toList()))); splitProperties.setProperty("columns.types", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getHiveType).map(HiveType::getHiveTypeName).collect(toList()))); HiveSplit split = new HiveSplit(CLIENT_ID, SCHEMA_NAME, TABLE_NAME, "", "file:///" + outputFile.getAbsolutePath(), 0, outputFile.length(), splitProperties, ImmutableList.of(), ImmutableList.of(), false, TupleDomain.all()); HivePageSourceProvider provider = new HivePageSourceProvider(config, createHdfsEnvironment(config), getDefaultHiveRecordCursorProvider(config), getDefaultHiveDataStreamFactories(config), TYPE_MANAGER); return provider.createPageSource(transaction, getSession(config), split, ImmutableList.copyOf(getColumnHandles())); }
public RecordFormatWriter(File targetFile, List<String> columnNames, List<Type> columnTypes, HiveCompressionCodec compressionCodec, HiveStorageFormat format) { JobConf config = new JobConf(conf); configureCompression(config, compressionCodec); List<DataColumn> dataColumns = new ArrayList<>(columnNames.size()); for (int i = 0; i < columnNames.size(); i++) { dataColumns.add(new DataColumn(columnNames.get(i), columnTypes.get(i), HiveType.toHiveType(columnTypes.get(i)))); } recordWriter = new HiveRecordWriter( "test_schema", "test_table", null, compressionCodec != HiveCompressionCodec.NONE, true, dataColumns, format.getOutputFormat(), format.getSerDe(), createSchema(format, columnNames, columnTypes), targetFile.getName(), targetFile.getParent(), targetFile.toString(), TYPE_MANAGER, config); }
private static StorageDescriptor makeStorageDescriptor(String tableName, HiveStorageFormat format, Path targetPath, List<FieldSchema> columns) { SerDeInfo serdeInfo = new SerDeInfo(); serdeInfo.setName(tableName); serdeInfo.setSerializationLib(format.getSerDe()); serdeInfo.setParameters(ImmutableMap.of()); StorageDescriptor sd = new StorageDescriptor(); sd.setLocation(targetPath.toString()); sd.setCols(columns); sd.setSerdeInfo(serdeInfo); sd.setInputFormat(format.getInputFormat()); sd.setOutputFormat(format.getOutputFormat()); sd.setParameters(ImmutableMap.of()); return sd; } }
private static ConnectorPageSource createPageSource(HiveTransactionHandle transaction, HiveClientConfig config, File outputFile) { Properties splitProperties = new Properties(); splitProperties.setProperty(FILE_INPUT_FORMAT, config.getHiveStorageFormat().getInputFormat()); splitProperties.setProperty(SERIALIZATION_LIB, config.getHiveStorageFormat().getSerDe()); splitProperties.setProperty("columns", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getName).collect(toList()))); splitProperties.setProperty("columns.types", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getHiveType).map(hiveType -> hiveType.getHiveTypeName().toString()).collect(toList()))); HiveSplit split = new HiveSplit( SCHEMA_NAME, TABLE_NAME, "", "file:///" + outputFile.getAbsolutePath(), 0, outputFile.length(), outputFile.length(), splitProperties, ImmutableList.of(), ImmutableList.of(), OptionalInt.empty(), false, TupleDomain.all(), ImmutableMap.of(), Optional.empty(), false); HivePageSourceProvider provider = new HivePageSourceProvider(config, createTestHdfsEnvironment(config), getDefaultHiveRecordCursorProvider(config), getDefaultHiveDataStreamFactories(config), TYPE_MANAGER); return provider.createPageSource(transaction, getSession(config), split, ImmutableList.copyOf(getColumnHandles())); }
splitProperties.setProperty(SERIALIZATION_LIB, storageFormat.getSerDe()); splitProperties.setProperty("columns", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getName))); splitProperties.setProperty("columns.types", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getType)));
splitProperties.setProperty(SERIALIZATION_LIB, storageFormat.getSerDe()); splitProperties.setProperty("columns", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getName))); splitProperties.setProperty("columns.types", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getType)));
.setStorageFormat(StorageFormat.create(hiveStorageFormat.getSerDe(), hiveStorageFormat.getInputFormat(), hiveStorageFormat.getOutputFormat())) .setBucketProperty(bucketProperty) .setSerdeParameters(ImmutableMap.of());
serdeInfo.setSerializationLib(hiveStorageFormat.getSerDe()); serdeInfo.setParameters(ImmutableMap.of());