static boolean isIndexStream(Stream stream) { return stream.getStreamKind() == ROW_INDEX || stream.getStreamKind() == DICTIONARY_COUNT || stream.getStreamKind() == BLOOM_FILTER || stream.getStreamKind() == BLOOM_FILTER_UTF8; }
private static boolean isDictionary(Stream stream, ColumnEncodingKind columnEncoding) { return stream.getStreamKind() == DICTIONARY_DATA || (stream.getStreamKind() == LENGTH && (columnEncoding == DICTIONARY || columnEncoding == DICTIONARY_V2)); }
private Map<StreamId, List<HiveBloomFilter>> readBloomFilterIndexes(Map<StreamId, Stream> streams, Map<StreamId, OrcInputStream> streamsData) throws IOException { ImmutableMap.Builder<StreamId, List<HiveBloomFilter>> bloomFilters = ImmutableMap.builder(); for (Entry<StreamId, Stream> entry : streams.entrySet()) { Stream stream = entry.getValue(); if (stream.getStreamKind() == BLOOM_FILTER) { OrcInputStream inputStream = streamsData.get(entry.getKey()); bloomFilters.put(entry.getKey(), metadataReader.readBloomFilterIndexes(inputStream)); } // TODO: add support for BLOOM_FILTER_UTF8 } return bloomFilters.build(); }
public StreamId(Stream stream) { this.column = stream.getColumn(); this.sequence = stream.getSequence(); this.streamKind = stream.getStreamKind(); }
private Map<StreamId, List<RowGroupIndex>> readColumnIndexes(Map<StreamId, Stream> streams, Map<StreamId, OrcInputStream> streamsData, Map<StreamId, List<HiveBloomFilter>> bloomFilterIndexes) throws IOException { ImmutableMap.Builder<StreamId, List<RowGroupIndex>> columnIndexes = ImmutableMap.builder(); for (Entry<StreamId, Stream> entry : streams.entrySet()) { Stream stream = entry.getValue(); if (stream.getStreamKind() == ROW_INDEX) { OrcInputStream inputStream = streamsData.get(entry.getKey()); List<HiveBloomFilter> bloomFilters = bloomFilterIndexes.get(entry.getKey()); List<RowGroupIndex> rowGroupIndexes = metadataReader.readRowIndexes(hiveWriterVersion, inputStream); if (bloomFilters != null && !bloomFilters.isEmpty()) { ImmutableList.Builder<RowGroupIndex> newRowGroupIndexes = ImmutableList.builder(); for (int i = 0; i < rowGroupIndexes.size(); i++) { RowGroupIndex rowGroupIndex = rowGroupIndexes.get(i); ColumnStatistics columnStatistics = rowGroupIndex.getColumnStatistics() .withBloomFilter(bloomFilters.get(i)); newRowGroupIndexes.add(new RowGroupIndex(rowGroupIndex.getPositions(), columnStatistics)); } rowGroupIndexes = newRowGroupIndexes.build(); } columnIndexes.put(entry.getKey(), rowGroupIndexes); } } return columnIndexes.build(); }
streamKindsBuilder.put(stream.getColumn(), stream.getStreamKind());
private static OrcProto.Stream toStream(Stream stream) { return OrcProto.Stream.newBuilder() .setColumn(stream.getColumn()) .setKind(toStreamKind(stream.getStreamKind())) .setLength(stream.getLength()) .build(); }
streams.put(new StreamId(stream), stream); if (stream.getStreamKind() == StreamKind.IN_DICTIONARY) { ColumnEncoding columnEncoding = columnEncodings.get(stream.getColumn());
private static DwrfProto.Stream toStream(Stream stream) { return DwrfProto.Stream.newBuilder() .setColumn(stream.getColumn()) .setKind(toStreamKind(stream.getStreamKind())) .setLength(stream.getLength()) .setUseVInts(stream.isUseVInts()) .build(); }
streamDataOutput.writeData(sliceOutput); Stream stream = streamDataOutput.getStream(); assertEquals(stream.getStreamKind(), StreamKind.DATA); assertEquals(stream.getColumn(), 33); assertEquals(stream.getLength(), sliceOutput.size());
streamDataOutput.writeData(sliceOutput); Stream stream = streamDataOutput.getStream(); assertEquals(stream.getStreamKind(), StreamKind.DATA); assertEquals(stream.getColumn(), 33); assertEquals(stream.getLength(), sliceOutput.size());
static boolean isIndexStream(Stream stream) { return stream.getStreamKind() == ROW_INDEX || stream.getStreamKind() == DICTIONARY_COUNT || stream.getStreamKind() == BLOOM_FILTER || stream.getStreamKind() == BLOOM_FILTER_UTF8; }
private static boolean isDictionary(Stream stream, ColumnEncodingKind columnEncoding) { return stream.getStreamKind() == DICTIONARY_DATA || (stream.getStreamKind() == LENGTH && (columnEncoding == DICTIONARY || columnEncoding == DICTIONARY_V2)); }
private static boolean isIndexStream(Stream stream) { return stream.getStreamKind() == ROW_INDEX || stream.getStreamKind() == DICTIONARY_COUNT; }
private static boolean isDictionary(Stream stream, ColumnEncodingKind columnEncoding) { return stream.getStreamKind() == DICTIONARY_DATA || (stream.getStreamKind() == LENGTH && (columnEncoding == DICTIONARY || columnEncoding == DICTIONARY_V2)); }
public StreamId(Stream stream) { this.column = stream.getColumn(); this.streamKind = stream.getStreamKind(); }
private Map<Integer, List<RowGroupIndex>> readColumnIndexes(Map<StreamId, Stream> streams, Map<StreamId, OrcInputStream> streamsData) throws IOException { ImmutableMap.Builder<Integer, List<RowGroupIndex>> columnIndexes = ImmutableMap.builder(); for (Entry<StreamId, Stream> entry : streams.entrySet()) { Stream stream = entry.getValue(); if (stream.getStreamKind() == ROW_INDEX) { OrcInputStream inputStream = streamsData.get(entry.getKey()); columnIndexes.put(stream.getColumn(), metadataReader.readRowIndexes(inputStream)); } } return columnIndexes.build(); }
public StreamId(Stream stream) { this.column = stream.getColumn(); this.sequence = stream.getSequence(); this.streamKind = stream.getStreamKind(); }
private static OrcProto.Stream toStream(Stream stream) { return OrcProto.Stream.newBuilder() .setColumn(stream.getColumn()) .setKind(toStreamKind(stream.getStreamKind())) .setLength(stream.getLength()) .build(); }
private static DwrfProto.Stream toStream(Stream stream) { return DwrfProto.Stream.newBuilder() .setColumn(stream.getColumn()) .setKind(toStreamKind(stream.getStreamKind())) .setLength(stream.getLength()) .setUseVInts(stream.isUseVInts()) .build(); }