private static Map<Integer, ColumnStatistics> getStatisticsByColumnOrdinal(OrcType rootStructType, List<ColumnStatistics> fileStats) { requireNonNull(rootStructType, "rootStructType is null"); checkArgument(rootStructType.getOrcTypeKind() == OrcTypeKind.STRUCT); requireNonNull(fileStats, "fileStats is null"); ImmutableMap.Builder<Integer, ColumnStatistics> statistics = ImmutableMap.builder(); for (int ordinal = 0; ordinal < rootStructType.getFieldCount(); ordinal++) { if (fileStats.size() > ordinal) { ColumnStatistics element = fileStats.get(rootStructType.getFieldTypeIndex(ordinal)); if (element != null) { statistics.put(ordinal, element); } } } return statistics.build(); }
private static void includeOrcColumnsRecursive(List<OrcType> types, Set<Integer> result, int typeId) { result.add(typeId); OrcType type = types.get(typeId); int children = type.getFieldCount(); for (int i = 0; i < children; ++i) { includeOrcColumnsRecursive(types, result, type.getFieldTypeIndex(i)); } }
private static StreamReader[] createStreamReaders( OrcDataSource orcDataSource, List<OrcType> types, DateTimeZone hiveStorageTimeZone, Map<Integer, Type> includedColumns, AggregatedMemoryContext systemMemoryContext) { List<StreamDescriptor> streamDescriptors = createStreamDescriptor("", "", 0, types, orcDataSource).getNestedStreams(); OrcType rowType = types.get(0); StreamReader[] streamReaders = new StreamReader[rowType.getFieldCount()]; for (int columnId = 0; columnId < rowType.getFieldCount(); columnId++) { if (includedColumns.containsKey(columnId)) { StreamDescriptor streamDescriptor = streamDescriptors.get(columnId); streamReaders[columnId] = StreamReaders.createStreamReader(streamDescriptor, hiveStorageTimeZone, systemMemoryContext); } } return streamReaders; }
private static Map<Integer, ColumnStatistics> getRowGroupStatistics(OrcType rootStructType, Map<StreamId, List<RowGroupIndex>> columnIndexes, int rowGroup) { requireNonNull(rootStructType, "rootStructType is null"); checkArgument(rootStructType.getOrcTypeKind() == OrcTypeKind.STRUCT); requireNonNull(columnIndexes, "columnIndexes is null"); checkArgument(rowGroup >= 0, "rowGroup is negative"); Map<Integer, List<ColumnStatistics>> groupedColumnStatistics = new HashMap<>(); for (Entry<StreamId, List<RowGroupIndex>> entry : columnIndexes.entrySet()) { groupedColumnStatistics.computeIfAbsent(entry.getKey().getColumn(), key -> new ArrayList<>()) .add(entry.getValue().get(rowGroup).getColumnStatistics()); } ImmutableMap.Builder<Integer, ColumnStatistics> statistics = ImmutableMap.builder(); for (int ordinal = 0; ordinal < rootStructType.getFieldCount(); ordinal++) { List<ColumnStatistics> columnStatistics = groupedColumnStatistics.get(rootStructType.getFieldTypeIndex(ordinal)); if (columnStatistics != null) { if (columnStatistics.size() == 1) { statistics.put(ordinal, getOnlyElement(columnStatistics)); } else { // Merge statistics from different streams // This can happen if map is represented as struct (DWRF only) statistics.put(ordinal, mergeColumnStatistics(columnStatistics)); } } } return statistics.build(); }
if (entry.getKey() < root.getFieldCount()) { presentColumns.add(entry.getKey()); presentColumnsAndTypes.put(entry.getKey(), entry.getValue());
private static StreamDescriptor createStreamDescriptor(String parentStreamName, String fieldName, int typeId, List<OrcType> types, OrcDataSource dataSource) { OrcType type = types.get(typeId); if (!fieldName.isEmpty()) { parentStreamName += "." + fieldName; } ImmutableList.Builder<StreamDescriptor> nestedStreams = ImmutableList.builder(); if (type.getOrcTypeKind() == OrcTypeKind.STRUCT) { for (int i = 0; i < type.getFieldCount(); ++i) { nestedStreams.add(createStreamDescriptor(parentStreamName, type.getFieldName(i), type.getFieldTypeIndex(i), types, dataSource)); } } else if (type.getOrcTypeKind() == OrcTypeKind.LIST) { nestedStreams.add(createStreamDescriptor(parentStreamName, "item", type.getFieldTypeIndex(0), types, dataSource)); } else if (type.getOrcTypeKind() == OrcTypeKind.MAP) { nestedStreams.add(createStreamDescriptor(parentStreamName, "key", type.getFieldTypeIndex(0), types, dataSource)); nestedStreams.add(createStreamDescriptor(parentStreamName, "value", type.getFieldTypeIndex(1), types, dataSource)); } return new StreamDescriptor(parentStreamName, typeId, fieldName, type.getOrcTypeKind(), dataSource, nestedStreams.build()); }
checkArgument(rootType.getFieldCount() == types.size()); ImmutableList.Builder<ColumnWriter> columnWriters = ImmutableList.builder(); ImmutableSet.Builder<SliceDictionaryColumnWriter> sliceColumnWriters = ImmutableSet.builder();
for (int fieldId = 0; fieldId < orcType.getFieldCount(); fieldId++) { int fieldColumnIndex = orcType.getFieldTypeIndex(fieldId); Type fieldType = type.getTypeParameters().get(fieldId);
private static Map<Integer, ColumnStatistics> getStatisticsByColumnOrdinal(OrcType rootStructType, List<ColumnStatistics> fileStats) { requireNonNull(rootStructType, "rootStructType is null"); checkArgument(rootStructType.getOrcTypeKind() == OrcTypeKind.STRUCT); requireNonNull(fileStats, "fileStats is null"); ImmutableMap.Builder<Integer, ColumnStatistics> statistics = ImmutableMap.builder(); for (int ordinal = 0; ordinal < rootStructType.getFieldCount(); ordinal++) { ColumnStatistics element = fileStats.get(rootStructType.getFieldTypeIndex(ordinal)); if (element != null) { statistics.put(ordinal, element); } } return statistics.build(); }
private static void includeOrcColumnsRecursive(List<OrcType> types, Set<Integer> result, int typeId) { result.add(typeId); OrcType type = types.get(typeId); int children = type.getFieldCount(); for (int i = 0; i < children; ++i) { includeOrcColumnsRecursive(types, result, type.getFieldTypeIndex(i)); } }
private static void includeOrcColumnsRecursive(List<OrcType> types, Set<Integer> result, int typeId) { result.add(typeId); OrcType type = types.get(typeId); int children = type.getFieldCount(); for (int i = 0; i < children; ++i) { includeOrcColumnsRecursive(types, result, type.getFieldTypeIndex(i)); } }
private static Map<Integer, ColumnStatistics> getStatisticsByColumnOrdinal(OrcType rootStructType, List<ColumnStatistics> fileStats) { requireNonNull(rootStructType, "rootStructType is null"); checkArgument(rootStructType.getOrcTypeKind() == OrcTypeKind.STRUCT); requireNonNull(fileStats, "fileStats is null"); ImmutableMap.Builder<Integer, ColumnStatistics> statistics = ImmutableMap.builder(); for (int ordinal = 0; ordinal < rootStructType.getFieldCount(); ordinal++) { if (fileStats.size() > ordinal) { ColumnStatistics element = fileStats.get(rootStructType.getFieldTypeIndex(ordinal)); if (element != null) { statistics.put(ordinal, element); } } } return statistics.build(); }
private static StreamReader[] createStreamReaders(OrcDataSource orcDataSource, List<OrcType> types, DateTimeZone hiveStorageTimeZone, Map<Integer, Type> includedColumns) { List<StreamDescriptor> streamDescriptors = createStreamDescriptor("", "", 0, types, orcDataSource).getNestedStreams(); OrcType rowType = types.get(0); StreamReader[] streamReaders = new StreamReader[rowType.getFieldCount()]; for (int columnId = 0; columnId < rowType.getFieldCount(); columnId++) { if (includedColumns.containsKey(columnId)) { StreamDescriptor streamDescriptor = streamDescriptors.get(columnId); streamReaders[columnId] = StreamReaders.createStreamReader(streamDescriptor, hiveStorageTimeZone); } } return streamReaders; }
private static Map<Integer, ColumnStatistics> getRowGroupStatistics(OrcType rootStructType, Map<Integer, List<RowGroupIndex>> columnIndexes, int rowGroup) { requireNonNull(rootStructType, "rootStructType is null"); checkArgument(rootStructType.getOrcTypeKind() == OrcTypeKind.STRUCT); requireNonNull(columnIndexes, "columnIndexes is null"); checkArgument(rowGroup >= 0, "rowGroup is negative"); ImmutableMap.Builder<Integer, ColumnStatistics> statistics = ImmutableMap.builder(); for (int ordinal = 0; ordinal < rootStructType.getFieldCount(); ordinal++) { List<RowGroupIndex> rowGroupIndexes = columnIndexes.get(rootStructType.getFieldTypeIndex(ordinal)); if (rowGroupIndexes != null) { statistics.put(ordinal, rowGroupIndexes.get(rowGroup).getColumnStatistics()); } } return statistics.build(); }
private static StreamReader[] createStreamReaders( OrcDataSource orcDataSource, List<OrcType> types, DateTimeZone hiveStorageTimeZone, Map<Integer, Type> includedColumns, AggregatedMemoryContext systemMemoryContext) { List<StreamDescriptor> streamDescriptors = createStreamDescriptor("", "", 0, types, orcDataSource).getNestedStreams(); OrcType rowType = types.get(0); StreamReader[] streamReaders = new StreamReader[rowType.getFieldCount()]; for (int columnId = 0; columnId < rowType.getFieldCount(); columnId++) { if (includedColumns.containsKey(columnId)) { StreamDescriptor streamDescriptor = streamDescriptors.get(columnId); streamReaders[columnId] = StreamReaders.createStreamReader(streamDescriptor, hiveStorageTimeZone, systemMemoryContext); } } return streamReaders; }
private static Map<Integer, ColumnStatistics> getRowGroupStatistics(OrcType rootStructType, Map<StreamId, List<RowGroupIndex>> columnIndexes, int rowGroup) { requireNonNull(rootStructType, "rootStructType is null"); checkArgument(rootStructType.getOrcTypeKind() == OrcTypeKind.STRUCT); requireNonNull(columnIndexes, "columnIndexes is null"); checkArgument(rowGroup >= 0, "rowGroup is negative"); Map<Integer, List<ColumnStatistics>> groupedColumnStatistics = new HashMap<>(); for (Entry<StreamId, List<RowGroupIndex>> entry : columnIndexes.entrySet()) { groupedColumnStatistics.computeIfAbsent(entry.getKey().getColumn(), key -> new ArrayList<>()) .add(entry.getValue().get(rowGroup).getColumnStatistics()); } ImmutableMap.Builder<Integer, ColumnStatistics> statistics = ImmutableMap.builder(); for (int ordinal = 0; ordinal < rootStructType.getFieldCount(); ordinal++) { List<ColumnStatistics> columnStatistics = groupedColumnStatistics.get(rootStructType.getFieldTypeIndex(ordinal)); if (columnStatistics != null) { if (columnStatistics.size() == 1) { statistics.put(ordinal, getOnlyElement(columnStatistics)); } else { // Merge statistics from different streams // This can happen if map is represented as struct (DWRF only) statistics.put(ordinal, mergeColumnStatistics(columnStatistics)); } } } return statistics.build(); }
private static StreamDescriptor createStreamDescriptor(String parentStreamName, String fieldName, int typeId, List<OrcType> types, OrcDataSource dataSource) { OrcType type = types.get(typeId); if (!fieldName.isEmpty()) { parentStreamName += "." + fieldName; } ImmutableList.Builder<StreamDescriptor> nestedStreams = ImmutableList.builder(); if (type.getOrcTypeKind() == OrcTypeKind.STRUCT) { for (int i = 0; i < type.getFieldCount(); ++i) { nestedStreams.add(createStreamDescriptor(parentStreamName, type.getFieldName(i), type.getFieldTypeIndex(i), types, dataSource)); } } else if (type.getOrcTypeKind() == OrcTypeKind.LIST) { nestedStreams.add(createStreamDescriptor(parentStreamName, "item", type.getFieldTypeIndex(0), types, dataSource)); } else if (type.getOrcTypeKind() == OrcTypeKind.MAP) { nestedStreams.add(createStreamDescriptor(parentStreamName, "key", type.getFieldTypeIndex(0), types, dataSource)); nestedStreams.add(createStreamDescriptor(parentStreamName, "value", type.getFieldTypeIndex(1), types, dataSource)); } return new StreamDescriptor(parentStreamName, typeId, fieldName, type.getOrcTypeKind(), dataSource, nestedStreams.build()); }
private static StreamDescriptor createStreamDescriptor(String parentStreamName, String fieldName, int typeId, List<OrcType> types, OrcDataSource dataSource) { OrcType type = types.get(typeId); if (!fieldName.isEmpty()) { parentStreamName += "." + fieldName; } ImmutableList.Builder<StreamDescriptor> nestedStreams = ImmutableList.builder(); if (type.getOrcTypeKind() == OrcTypeKind.STRUCT) { for (int i = 0; i < type.getFieldCount(); ++i) { nestedStreams.add(createStreamDescriptor(parentStreamName, type.getFieldName(i), type.getFieldTypeIndex(i), types, dataSource)); } } else if (type.getOrcTypeKind() == OrcTypeKind.LIST) { nestedStreams.add(createStreamDescriptor(parentStreamName, "item", type.getFieldTypeIndex(0), types, dataSource)); } else if (type.getOrcTypeKind() == OrcTypeKind.MAP) { nestedStreams.add(createStreamDescriptor(parentStreamName, "key", type.getFieldTypeIndex(0), types, dataSource)); nestedStreams.add(createStreamDescriptor(parentStreamName, "value", type.getFieldTypeIndex(1), types, dataSource)); } return new StreamDescriptor(parentStreamName, typeId, fieldName, type.getOrcTypeKind(), dataSource, nestedStreams.build()); }
for (int fieldId = 0; fieldId < orcType.getFieldCount(); fieldId++) { int fieldColumnIndex = orcType.getFieldTypeIndex(fieldId); Type fieldType = type.getTypeParameters().get(fieldId);
List<String> fieldNames = type.getFieldNames(); ImmutableList.Builder<TypeSignatureParameter> fieldTypes = ImmutableList.builder(); for (int i = 0; i < type.getFieldCount(); i++) { fieldTypes.add(TypeSignatureParameter.of(new NamedTypeSignature( Optional.of(new RowFieldName(fieldNames.get(i), false)),