typeManager.getType(handle.getTypeSignature()), columnComment.get(handle.getName()).orElse(null), columnExtraInfo(handle.isPartitionKey()), handle.isHidden());
if (column.isPartitionKey()) { partitionColumns.add(inputIndex);
public void validateColumns(List<HiveColumnHandle> handles) { if (this == AVRO) { for (HiveColumnHandle handle : handles) { if (!handle.isPartitionKey()) { validateAvroType(handle.getHiveType().getTypeInfo(), handle.getName()); } } } }
Type columnType = columnTypes.get(columnName); ColumnStatistics columnStatistics; if (columnHandle.isPartitionKey()) { columnStatistics = createPartitionColumnStatistics(columnHandle, columnType, partitions, statistics, averageRowsPerPartition, rowCount);
@Test public void testColumnHandleDeserialize() throws Exception { String json = objectMapper.writeValueAsString(COLUMN_HANDLE_AS_MAP); HiveColumnHandle columnHandle = objectMapper.readValue(json, HiveColumnHandle.class); assertEquals(columnHandle.getName(), "column"); assertEquals(columnHandle.getTypeSignature(), DOUBLE.getTypeSignature()); assertEquals(columnHandle.getHiveType(), HiveType.HIVE_FLOAT); assertEquals(columnHandle.getHiveColumnIndex(), -1); assertEquals(columnHandle.isPartitionKey(), true); }
for (HiveColumnHandle column : inputColumns) { HiveType hiveType = column.getHiveType(); if (column.isPartitionKey()) { partitionColumnNames.add(column.getName()); partitionColumnTypes.add(typeManager.getType(column.getTypeSignature()));
HiveType type = columnHandle.getHiveType(); if (!partitionColumnNames.contains(name)) { verify(!columnHandle.isPartitionKey(), "Column handles are not consistent with partitioned by property"); columns.add(new Column(name, type, columnHandle.getComment())); verify(columnHandle.isPartitionKey(), "Column handles are not consistent with partitioned by property");
private void testRoundTrip(HiveColumnHandle expected) { String json = codec.toJson(expected); HiveColumnHandle actual = codec.fromJson(json); assertEquals(actual.getName(), expected.getName()); assertEquals(actual.getHiveType(), expected.getHiveType()); assertEquals(actual.getHiveColumnIndex(), expected.getHiveColumnIndex()); assertEquals(actual.isPartitionKey(), expected.isPartitionKey()); } }
@Override @SuppressWarnings("deprecation") public ReadContext init( Configuration configuration, Map<String, String> keyValueMetaData, MessageType messageType) { List<parquet.schema.Type> fields = columns.stream() .filter(column -> !column.isPartitionKey()) .map(column -> getParquetType(column, messageType, useParquetColumnNames)) .filter(Objects::nonNull) .collect(toList()); MessageType requestedProjection = new MessageType(messageType.getName(), fields); return new ReadContext(requestedProjection); }
public PrestoReadSupport(boolean useParquetColumnNames, List<HiveColumnHandle> columns, MessageType messageType) { this.columns = columns; this.useParquetColumnNames = useParquetColumnNames; ImmutableList.Builder<Converter> converters = ImmutableList.builder(); for (int i = 0; i < columns.size(); i++) { HiveColumnHandle column = columns.get(i); if (!column.isPartitionKey()) { parquet.schema.Type parquetType = getParquetType(column, messageType, useParquetColumnNames); if (parquetType == null) { continue; } if (parquetType.isPrimitive()) { converters.add(new ParquetPrimitiveColumnConverter(i)); } else { converters.add(new ParquetColumnConverter(createGroupConverter(types[i], parquetType.getName(), parquetType, i), i)); } } } this.converters = converters.build(); }
.filter(columnHandle -> !columnHandle.isPartitionKey()) .filter(columnHandle -> columnHandle.getHiveType().equals(HiveType.HIVE_DATE)) .collect(toList());
public static ParquetPredicate buildParquetPredicate( List<HiveColumnHandle> columns, TupleDomain<HiveColumnHandle> effectivePredicate, MessageType fileSchema, TypeManager typeManager) { ImmutableList.Builder<ColumnReference<HiveColumnHandle>> columnReferences = ImmutableList.builder(); for (HiveColumnHandle column : columns) { if (!column.isPartitionKey()) { int parquetFieldIndex = lookupParquetColumn(column, fileSchema); Type type = typeManager.getType(column.getTypeSignature()); columnReferences.add(new ColumnReference<>(column, parquetFieldIndex, type)); } } return new TupleDomainParquetPredicate<>(effectivePredicate, columnReferences.build()); }
private static Function<HiveColumnHandle, ColumnMetadata> columnMetadataGetter(Table table, TypeManager typeManager) { ImmutableList.Builder<String> columnNames = ImmutableList.builder(); table.getPartitionKeys().stream().map(FieldSchema::getName).forEach(columnNames::add); table.getSd().getCols().stream().map(FieldSchema::getName).forEach(columnNames::add); List<String> allColumnNames = columnNames.build(); if (allColumnNames.size() > Sets.newHashSet(allColumnNames).size()) { throw new PrestoException(HIVE_INVALID_METADATA, format("Hive metadata for table %s is invalid: Table descriptor contains duplicate columns", table.getTableName())); } List<FieldSchema> tableColumns = table.getSd().getCols(); ImmutableMap.Builder<String, String> builder = ImmutableMap.builder(); for (FieldSchema field : concat(tableColumns, table.getPartitionKeys())) { if ((field.getComment() != null) && !field.getComment().equals("from deserializer")) { builder.put(field.getName(), field.getComment()); } } Map<String, String> columnComment = builder.build(); return handle -> new ColumnMetadata( handle.getName(), typeManager.getType(handle.getTypeSignature()), annotateColumnComment(columnComment.get(handle.getName()), handle.isPartitionKey()), false); }
ImmutableList.Builder<DataColumn> dataColumns = ImmutableList.builder(); for (HiveColumnHandle column : inputColumns) { if (column.isPartitionKey()) { partitionColumnNames.add(column.getName()); partitionColumnTypes.add(typeManager.getType(column.getTypeSignature())); for (int inputIndex = 0; inputIndex < inputColumnsWithoutSample.size(); inputIndex++) { HiveColumnHandle column = inputColumnsWithoutSample.get(inputIndex); if (column.isPartitionKey()) { partitionColumns.add(inputIndex);
@Test public void testColumnHandleDeserialize() throws Exception { String json = objectMapper.writeValueAsString(COLUMN_HANDLE_AS_MAP); HiveColumnHandle columnHandle = objectMapper.readValue(json, HiveColumnHandle.class); assertEquals(columnHandle.getName(), "column"); assertEquals(columnHandle.getHiveType(), HiveType.HIVE_FLOAT); assertEquals(columnHandle.getHiveColumnIndex(), -1); assertEquals(columnHandle.isPartitionKey(), true); }
@Test public void testColumnHandleDeserialize() throws Exception { String json = objectMapper.writeValueAsString(COLUMN_HANDLE_AS_MAP); HiveColumnHandle columnHandle = objectMapper.readValue(json, HiveColumnHandle.class); assertEquals(columnHandle.getName(), "column"); assertEquals(columnHandle.getTypeSignature(), DOUBLE.getTypeSignature()); assertEquals(columnHandle.getHiveType(), HiveType.HIVE_FLOAT); assertEquals(columnHandle.getHiveColumnIndex(), -1); assertEquals(columnHandle.isPartitionKey(), true); }
private static List<HiveColumnHandle> getPhysicalHiveColumnHandles(List<HiveColumnHandle> columns, boolean useOrcColumnNames, OrcReader reader, Path path) { if (!useOrcColumnNames) { return columns; } verifyFileHasColumnNames(reader.getColumnNames(), path); Map<String, Integer> physicalNameOrdinalMap = buildPhysicalNameOrdinalMap(reader); int nextMissingColumnIndex = physicalNameOrdinalMap.size(); ImmutableList.Builder<HiveColumnHandle> physicalColumns = ImmutableList.builder(); for (HiveColumnHandle column : columns) { Integer physicalOrdinal = physicalNameOrdinalMap.get(column.getName()); if (physicalOrdinal == null) { // if the column is missing from the file, assign it a column number larger // than the number of columns in the file so the reader will fill it with nulls physicalOrdinal = nextMissingColumnIndex; nextMissingColumnIndex++; } physicalColumns.add(new HiveColumnHandle(column.getClientId(), column.getName(), column.getHiveType(), column.getTypeSignature(), physicalOrdinal, column.isPartitionKey())); } return physicalColumns.build(); }
.filter(column -> !column.isPartitionKey()) .map(column -> getParquetType(column, fileSchema, useParquetColumnNames)) .filter(Objects::nonNull)
private void testRoundTrip(HiveColumnHandle expected) { String json = codec.toJson(expected); HiveColumnHandle actual = codec.fromJson(json); assertEquals(actual.getName(), expected.getName()); assertEquals(actual.getHiveType(), expected.getHiveType()); assertEquals(actual.getHiveColumnIndex(), expected.getHiveColumnIndex()); assertEquals(actual.isPartitionKey(), expected.isPartitionKey()); } }
@Test public void testRoundTrip() { HiveColumnHandle expected = new HiveColumnHandle("client", "name", HiveType.HIVE_FLOAT, parseTypeSignature(StandardTypes.DOUBLE), 88, true); String json = codec.toJson(expected); HiveColumnHandle actual = codec.fromJson(json); assertEquals(actual.getClientId(), expected.getClientId()); assertEquals(actual.getName(), expected.getName()); assertEquals(actual.getHiveType(), expected.getHiveType()); assertEquals(actual.getHiveColumnIndex(), expected.getHiveColumnIndex()); assertEquals(actual.isPartitionKey(), expected.isPartitionKey()); } }