public static StorageFormat fromHiveStorageFormat(HiveStorageFormat hiveStorageFormat) { return new StorageFormat(hiveStorageFormat.getSerDe(), hiveStorageFormat.getInputFormat(), hiveStorageFormat.getOutputFormat()); }
public static HiveStorageFormat getHiveStorageFormat(ConnectorSession session) { return HiveStorageFormat.valueOf(session.getProperty(HIVE_STORAGE_FORMAT, String.class).toUpperCase(ENGLISH)); }
private static HiveStorageFormat extractHiveStorageFormat(Table table) { StorageFormat storageFormat = table.getStorage().getStorageFormat(); String outputFormat = storageFormat.getOutputFormat(); String serde = storageFormat.getSerDe(); for (HiveStorageFormat format : HiveStorageFormat.values()) { if (format.getOutputFormat().equals(outputFormat) && format.getSerDe().equals(serde)) { return format; } } throw new PrestoException(HIVE_UNSUPPORTED_FORMAT, format("Output format %s with SerDe %s is not supported", outputFormat, serde)); }
private static Properties createSchema(HiveStorageFormat format, List<String> columnNames, List<Type> columnTypes) { Properties schema = new Properties(); TypeTranslator typeTranslator = new HiveTypeTranslator(); schema.setProperty(SERIALIZATION_LIB, format.getSerDe()); schema.setProperty(FILE_INPUT_FORMAT, format.getInputFormat()); schema.setProperty(META_TABLE_COLUMNS, columnNames.stream() .collect(joining(","))); schema.setProperty(META_TABLE_COLUMN_TYPES, columnTypes.stream() .map(type -> toHiveType(typeTranslator, type)) .map(HiveType::getHiveTypeName) .map(HiveTypeName::toString) .collect(joining(":"))); return schema; }
throws Exception HiveOutputFormat<?, ?> outputFormat = newInstance(storageFormat.getOutputFormat(), HiveOutputFormat.class); @SuppressWarnings("deprecation") SerDe serDe = newInstance(storageFormat.getSerDe(), SerDe.class);
@Test public void testTableCreation() throws Exception { for (HiveStorageFormat storageFormat : HiveStorageFormat.values()) { createTable(temporaryCreateTable, storageFormat); dropTable(temporaryCreateTable); } }
throw new TableNotFoundException(new SchemaTableName(handle.getSchemaName(), handle.getTableName())); if (!table.get().getStorage().getStorageFormat().getInputFormat().equals(tableStorageFormat.getInputFormat()) && isRespectTableFormat(session)) { throw new PrestoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Table format changed during insert"); if (!partition.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && isRespectTableFormat(session)) { throw new PrestoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Partition format changed during insert");
public static boolean isAvroTableWithSchemaSet(org.apache.hadoop.hive.metastore.api.Table table) { if (table.getParameters() == null) { return false; } StorageDescriptor storageDescriptor = table.getSd(); if (storageDescriptor == null) { throw new PrestoException(HIVE_INVALID_METADATA, "Table does not contain a storage descriptor: " + table); } SerDeInfo serdeInfo = storageDescriptor.getSerdeInfo(); if (serdeInfo == null) { throw new PrestoException(HIVE_INVALID_METADATA, "Table storage descriptor is missing SerDe info"); } return serdeInfo.getSerializationLib() != null && table.getParameters().get(AVRO_SCHEMA_URL_KEY) != null && serdeInfo.getSerializationLib().equals(AVRO.getSerDe()); }
private FileFormatAssertion assertThatFileFormat(HiveStorageFormat hiveStorageFormat) { return new FileFormatAssertion(hiveStorageFormat.name()) .withStorageFormat(hiveStorageFormat); }
public RecordFormatWriter(File targetFile, List<String> columnNames, List<Type> columnTypes, HiveCompressionCodec compressionCodec, HiveStorageFormat format, ConnectorSession session) { JobConf config = new JobConf(conf); configureCompression(config, compressionCodec); recordWriter = new RecordFileWriter( new Path(targetFile.toURI()), columnNames, fromHiveStorageFormat(format), createSchema(format, columnNames, columnTypes), format.getEstimatedWriterSystemMemoryUsage(), config, TYPE_MANAGER, session); }
private static ConnectorPageSource createPageSource(HiveTransactionHandle transaction, HiveClientConfig config, File outputFile) { Properties splitProperties = new Properties(); splitProperties.setProperty(FILE_INPUT_FORMAT, config.getHiveStorageFormat().getInputFormat()); splitProperties.setProperty(SERIALIZATION_LIB, config.getHiveStorageFormat().getSerDe()); splitProperties.setProperty("columns", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getName).collect(toList()))); splitProperties.setProperty("columns.types", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getHiveType).map(hiveType -> hiveType.getHiveTypeName().toString()).collect(toList()))); HiveSplit split = new HiveSplit( SCHEMA_NAME, TABLE_NAME, "", "file:///" + outputFile.getAbsolutePath(), 0, outputFile.length(), outputFile.length(), splitProperties, ImmutableList.of(), ImmutableList.of(), OptionalInt.empty(), false, TupleDomain.all(), ImmutableMap.of(), Optional.empty(), false); HivePageSourceProvider provider = new HivePageSourceProvider(config, createTestHdfsEnvironment(config), getDefaultHiveRecordCursorProvider(config), getDefaultHiveDataStreamFactories(config), TYPE_MANAGER); return provider.createPageSource(transaction, getSession(config), split, ImmutableList.copyOf(getColumnHandles())); }
outputFormat = tableStorageFormat.getOutputFormat(); serDe = tableStorageFormat.getSerDe(); target = locationService.targetPath(locationHandle, partitionName); write = locationService.writePath(locationHandle, partitionName).orElse(target); outputFormat = tableStorageFormat.getOutputFormat(); serDe = tableStorageFormat.getSerDe();
public TableMetadata(Table table, Map<String, HiveColumnStatistics> columnStatistics) { owner = table.getOwner(); tableType = table.getTableType(); dataColumns = table.getDataColumns(); partitionColumns = table.getPartitionColumns(); parameters = table.getParameters(); StorageFormat tableFormat = table.getStorage().getStorageFormat(); storageFormat = Arrays.stream(HiveStorageFormat.values()) .filter(format -> tableFormat.equals(StorageFormat.fromHiveStorageFormat(format))) .findFirst(); bucketProperty = table.getStorage().getBucketProperty(); serdeParameters = table.getStorage().getSerdeParameters(); if (tableType.equals(TableType.EXTERNAL_TABLE.name())) { externalLocation = Optional.of(table.getStorage().getLocation()); } else { externalLocation = Optional.empty(); } viewOriginalText = table.getViewOriginalText(); viewExpandedText = table.getViewExpandedText(); this.columnStatistics = ImmutableMap.copyOf(requireNonNull(columnStatistics, "columnStatistics is null")); }
assertEquals(table.getStorage().getStorageFormat().getInputFormat(), storageFormat.getInputFormat());
private static String makeFileName(File tempDir, HiveClientConfig config) { return tempDir.getAbsolutePath() + "/" + config.getHiveStorageFormat().name() + "." + config.getHiveCompressionCodec().name(); }
outputStorageFormat, schema, partitionStorageFormat.getEstimatedWriterSystemMemoryUsage(), conf, typeManager,
.setStorageFormat(StorageFormat.create(hiveStorageFormat.getSerDe(), hiveStorageFormat.getInputFormat(), hiveStorageFormat.getOutputFormat())) .setBucketProperty(bucketProperty) .setSerdeParameters(ImmutableMap.of());
splitProperties.setProperty(FILE_INPUT_FORMAT, storageFormat.getInputFormat()); splitProperties.setProperty(SERIALIZATION_LIB, storageFormat.getSerDe()); splitProperties.setProperty("columns", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getName))); splitProperties.setProperty("columns.types", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getType)));
public RecordFormatWriter(File targetFile, List<String> columnNames, List<Type> columnTypes, HiveCompressionCodec compressionCodec, HiveStorageFormat format) { JobConf config = new JobConf(conf); configureCompression(config, compressionCodec); List<DataColumn> dataColumns = new ArrayList<>(columnNames.size()); for (int i = 0; i < columnNames.size(); i++) { dataColumns.add(new DataColumn(columnNames.get(i), columnTypes.get(i), HiveType.toHiveType(columnTypes.get(i)))); } recordWriter = new HiveRecordWriter( "test_schema", "test_table", null, compressionCodec != HiveCompressionCodec.NONE, true, dataColumns, format.getOutputFormat(), format.getSerDe(), createSchema(format, columnNames, columnTypes), targetFile.getName(), targetFile.getParent(), targetFile.toString(), TYPE_MANAGER, config); }
private static HiveStorageFormat extractHiveStorageFormat(Table table) { StorageDescriptor descriptor = table.getSd(); if (descriptor == null) { throw new PrestoException(HIVE_INVALID_METADATA, "Table is missing storage descriptor"); } SerDeInfo serdeInfo = descriptor.getSerdeInfo(); if (serdeInfo == null) { throw new PrestoException(HIVE_INVALID_METADATA, "Table storage descriptor is missing SerDe info"); } String outputFormat = descriptor.getOutputFormat(); String serializationLib = serdeInfo.getSerializationLib(); for (HiveStorageFormat format : HiveStorageFormat.values()) { if (format.getOutputFormat().equals(outputFormat) && format.getSerDe().equals(serializationLib)) { return format; } } throw new PrestoException(HIVE_UNSUPPORTED_FORMAT, format("Output format %s with SerDe %s is not supported", outputFormat, serializationLib)); }