private static Map<String, Integer> buildPhysicalNameOrdinalMap(OrcReader reader) { ImmutableMap.Builder<String, Integer> physicalNameOrdinalMap = ImmutableMap.builder(); int ordinal = 0; for (String physicalColumnName : reader.getColumnNames()) { physicalNameOrdinalMap.put(physicalColumnName, ordinal); ordinal++; } return physicalNameOrdinalMap.build(); } }
private static List<HiveColumnHandle> getPhysicalHiveColumnHandles(List<HiveColumnHandle> columns, boolean useOrcColumnNames, OrcReader reader, Path path) { if (!useOrcColumnNames) { return columns; } verifyFileHasColumnNames(reader.getColumnNames(), path); Map<String, Integer> physicalNameOrdinalMap = buildPhysicalNameOrdinalMap(reader); int nextMissingColumnIndex = physicalNameOrdinalMap.size(); ImmutableList.Builder<HiveColumnHandle> physicalColumns = ImmutableList.builder(); for (HiveColumnHandle column : columns) { Integer physicalOrdinal = physicalNameOrdinalMap.get(column.getName()); if (physicalOrdinal == null) { // if the column is missing from the file, assign it a column number larger // than the number of columns in the file so the reader will fill it with nulls physicalOrdinal = nextMissingColumnIndex; nextMissingColumnIndex++; } physicalColumns.add(new HiveColumnHandle(column.getName(), column.getHiveType(), column.getTypeSignature(), physicalOrdinal, column.getColumnType(), column.getComment())); } return physicalColumns.build(); }
validateWrite(validation -> validation.getColumnNames().equals(getColumnNames()), "Unexpected column names"); validateWrite(validation -> validation.getRowGroupMaxRowCount() == footer.getRowsInRowGroup(), "Unexpected rows in group"); if (writeValidation.isPresent()) {
private static Map<String, Integer> buildPhysicalNameOrdinalMap(OrcReader reader) { ImmutableMap.Builder<String, Integer> physicalNameOrdinalMap = ImmutableMap.builder(); int ordinal = 0; for (String physicalColumnName : reader.getColumnNames()) { physicalNameOrdinalMap.put(physicalColumnName, ordinal); ordinal++; } return physicalNameOrdinalMap.build(); } }
private List<ColumnInfo> getColumnInfo(OrcReader reader) { Optional<OrcFileMetadata> metadata = getOrcFileMetadata(reader); if (metadata.isPresent()) { return getColumnInfoFromOrcUserMetadata(metadata.get()); } // support for legacy files without metadata return getColumnInfoFromOrcColumnTypes(reader.getColumnNames(), reader.getFooter().getTypes()); }
private static ColumnStats doComputeColumnStats(OrcReader orcReader, long columnId, Type type) throws IOException { int columnIndex = columnIndex(orcReader.getColumnNames(), columnId); OrcRecordReader reader = orcReader.createRecordReader(ImmutableMap.of(columnIndex, type), OrcPredicate.TRUE, UTC, newSimpleAggregatedMemoryContext(), INITIAL_BATCH_SIZE); if (type.equals(BooleanType.BOOLEAN)) { return indexBoolean(type, reader, columnIndex, columnId); } if (type.equals(BigintType.BIGINT) || type.equals(DateType.DATE) || type.equals(TimestampType.TIMESTAMP)) { return indexLong(type, reader, columnIndex, columnId); } if (type.equals(DoubleType.DOUBLE)) { return indexDouble(type, reader, columnIndex, columnId); } if (type instanceof VarcharType) { return indexString(type, reader, columnIndex, columnId); } return null; }
private static List<HiveColumnHandle> getPhysicalHiveColumnHandles(List<HiveColumnHandle> columns, boolean useOrcColumnNames, OrcReader reader, Path path) { if (!useOrcColumnNames) { return columns; } verifyFileHasColumnNames(reader.getColumnNames(), path); Map<String, Integer> physicalNameOrdinalMap = buildPhysicalNameOrdinalMap(reader); int nextMissingColumnIndex = physicalNameOrdinalMap.size(); ImmutableList.Builder<HiveColumnHandle> physicalColumns = ImmutableList.builder(); for (HiveColumnHandle column : columns) { Integer physicalOrdinal = physicalNameOrdinalMap.get(column.getName()); if (physicalOrdinal == null) { // if the column is missing from the file, assign it a column number larger // than the number of columns in the file so the reader will fill it with nulls physicalOrdinal = nextMissingColumnIndex; nextMissingColumnIndex++; } physicalColumns.add(new HiveColumnHandle(column.getClientId(), column.getName(), column.getHiveType(), column.getTypeSignature(), physicalOrdinal, column.isPartitionKey())); } return physicalColumns.build(); }
OrcReader reader = new OrcReader(dataSource, ORC, readerAttributes.getMaxMergeDistance(), readerAttributes.getMaxReadSize(), readerAttributes.getTinyStripeThreshold(), HUGE_MAX_READ_BLOCK_SIZE); Map<Long, Integer> indexMap = columnIdIndex(reader.getColumnNames()); ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder(); ImmutableList.Builder<Integer> columnIndexes = ImmutableList.builder();
validateWrite(validation -> validation.getColumnNames().equals(getColumnNames()), "Unexpected column names"); validateWrite(validation -> validation.getRowGroupMaxRowCount() == footer.getRowsInRowGroup(), "Unexpected rows in group"); if (writeValidation.isPresent()) {