private static OrcProto.Stream toStream(Stream stream) { return OrcProto.Stream.newBuilder() .setColumn(stream.getColumn()) .setKind(toStreamKind(stream.getStreamKind())) .setLength(stream.getLength()) .build(); }
@Override public StreamDataOutput getStreamDataOutput(int column) { return new StreamDataOutput(buffer::writeDataTo, new Stream(column, DATA, toIntExact(buffer.getOutputDataSize()), true)); }
private static DwrfProto.Stream toStream(Stream stream) { return DwrfProto.Stream.newBuilder() .setColumn(stream.getColumn()) .setKind(toStreamKind(stream.getStreamKind())) .setLength(stream.getLength()) .setUseVInts(stream.isUseVInts()) .build(); }
public StreamId(Stream stream) { this.column = stream.getColumn(); this.sequence = stream.getSequence(); this.streamKind = stream.getStreamKind(); }
private Map<StreamId, ValueInputStream<?>> createValueStreams(Map<StreamId, Stream> streams, Map<StreamId, OrcInputStream> streamsData, List<ColumnEncoding> columnEncodings) { ImmutableMap.Builder<StreamId, ValueInputStream<?>> valueStreams = ImmutableMap.builder(); for (Entry<StreamId, Stream> entry : streams.entrySet()) { StreamId streamId = entry.getKey(); Stream stream = entry.getValue(); ColumnEncodingKind columnEncoding = columnEncodings.get(stream.getColumn()) .getColumnEncoding(stream.getSequence()) .getColumnEncodingKind(); // skip index and empty streams if (isIndexStream(stream) || stream.getLength() == 0) { continue; } OrcInputStream inputStream = streamsData.get(streamId); OrcTypeKind columnType = types.get(stream.getColumn()).getOrcTypeKind(); valueStreams.put(streamId, ValueStreams.createValueStreams(streamId, inputStream, columnType, columnEncoding, stream.isUseVInts())); } return valueStreams.build(); }
streamKindsBuilder.put(stream.getColumn(), stream.getStreamKind());
private Map<StreamId, ValueStream<?>> createValueStreams(Map<StreamId, Stream> streams, Map<StreamId, OrcInputStream> streamsData, List<ColumnEncoding> columnEncodings) { ImmutableMap.Builder<StreamId, ValueStream<?>> valueStreams = ImmutableMap.builder(); for (Entry<StreamId, Stream> entry : streams.entrySet()) { StreamId streamId = entry.getKey(); Stream stream = entry.getValue(); ColumnEncodingKind columnEncoding = columnEncodings.get(stream.getColumn()).getColumnEncodingKind(); // skip index and empty streams if (isIndexStream(stream) || stream.getLength() == 0) { continue; } OrcInputStream inputStream = streamsData.get(streamId); OrcTypeKind columnType = types.get(stream.getColumn()).getOrcTypeKind(); valueStreams.put(streamId, ValueStreams.createValueStreams(streamId, inputStream, columnType, columnEncoding, stream.isUseVInts())); } return valueStreams.build(); }
public Optional<StreamDataOutput> getStreamDataOutput(int column) { checkArgument(closed); if (booleanOutputStream == null) { return Optional.empty(); } StreamDataOutput streamDataOutput = booleanOutputStream.getStreamDataOutput(column); // rewrite the DATA stream created by the boolean output stream to a PRESENT stream Stream stream = new Stream(column, PRESENT, toIntExact(streamDataOutput.size()), streamDataOutput.getStream().isUseVInts()); return Optional.of(new StreamDataOutput( sliceOutput -> { streamDataOutput.writeData(sliceOutput); return stream.getLength(); }, stream)); }
static boolean isIndexStream(Stream stream) { return stream.getStreamKind() == ROW_INDEX || stream.getStreamKind() == DICTIONARY_COUNT || stream.getStreamKind() == BLOOM_FILTER || stream.getStreamKind() == BLOOM_FILTER_UTF8; }
@Override public long size() { return stream.getLength(); }
public InputStreamSources createDictionaryStreamSources(Map<StreamId, Stream> streams, Map<StreamId, ValueInputStream<?>> valueStreams, List<ColumnEncoding> columnEncodings) { ImmutableMap.Builder<StreamId, InputStreamSource<?>> dictionaryStreamBuilder = ImmutableMap.builder(); for (Entry<StreamId, Stream> entry : streams.entrySet()) { StreamId streamId = entry.getKey(); Stream stream = entry.getValue(); int column = stream.getColumn(); // only process dictionary streams ColumnEncodingKind columnEncoding = columnEncodings.get(column) .getColumnEncoding(stream.getSequence()) .getColumnEncodingKind(); if (!isDictionary(stream, columnEncoding)) { continue; } // skip streams without data ValueInputStream<?> valueStream = valueStreams.get(streamId); if (valueStream == null) { continue; } OrcTypeKind columnType = types.get(stream.getColumn()).getOrcTypeKind(); StreamCheckpoint streamCheckpoint = getDictionaryStreamCheckpoint(streamId, columnType, columnEncoding); InputStreamSource<?> streamSource = createCheckpointStreamSource(valueStream, streamCheckpoint); dictionaryStreamBuilder.put(streamId, streamSource); } return new InputStreamSources(dictionaryStreamBuilder.build()); }
public StreamSources createDictionaryStreamSources(Map<StreamId, Stream> streams, Map<StreamId, ValueStream<?>> valueStreams, List<ColumnEncoding> columnEncodings) { ImmutableMap.Builder<StreamId, StreamSource<?>> dictionaryStreamBuilder = ImmutableMap.builder(); for (Entry<StreamId, Stream> entry : streams.entrySet()) { StreamId streamId = entry.getKey(); Stream stream = entry.getValue(); int column = stream.getColumn(); // only process dictionary streams ColumnEncodingKind columnEncoding = columnEncodings.get(column).getColumnEncodingKind(); if (!isDictionary(stream, columnEncoding)) { continue; } // skip streams without data ValueStream<?> valueStream = valueStreams.get(streamId); if (valueStream == null) { continue; } OrcTypeKind columnType = types.get(stream.getColumn()).getOrcTypeKind(); StreamCheckpoint streamCheckpoint = getDictionaryStreamCheckpoint(streamId, columnType, columnEncoding); StreamSource<?> streamSource = createCheckpointStreamSource(valueStream, streamCheckpoint); dictionaryStreamBuilder.put(streamId, streamSource); } return new StreamSources(dictionaryStreamBuilder.build()); }
boolean hasRowGroupDictionary = false; for (Stream stream : stripeFooter.getStreams()) { if (includedOrcColumns.contains(stream.getColumn())) { streams.put(new StreamId(stream), stream); if (stream.getStreamKind() == StreamKind.IN_DICTIONARY) { ColumnEncoding columnEncoding = columnEncodings.get(stream.getColumn());
private Map<StreamId, ValueInputStream<?>> createValueStreams(Map<StreamId, Stream> streams, Map<StreamId, OrcInputStream> streamsData, List<ColumnEncoding> columnEncodings) { ImmutableMap.Builder<StreamId, ValueInputStream<?>> valueStreams = ImmutableMap.builder(); for (Entry<StreamId, Stream> entry : streams.entrySet()) { StreamId streamId = entry.getKey(); Stream stream = entry.getValue(); ColumnEncodingKind columnEncoding = columnEncodings.get(stream.getColumn()) .getColumnEncoding(stream.getSequence()) .getColumnEncodingKind(); // skip index and empty streams if (isIndexStream(stream) || stream.getLength() == 0) { continue; } OrcInputStream inputStream = streamsData.get(streamId); OrcTypeKind columnType = types.get(stream.getColumn()).getOrcTypeKind(); valueStreams.put(streamId, ValueStreams.createValueStreams(streamId, inputStream, columnType, columnEncoding, stream.isUseVInts())); } return valueStreams.build(); }
public StreamId(Stream stream) { this.column = stream.getColumn(); this.sequence = stream.getSequence(); this.streamKind = stream.getStreamKind(); }
public Optional<StreamDataOutput> getStreamDataOutput(int column) { checkArgument(closed); if (booleanOutputStream == null) { return Optional.empty(); } StreamDataOutput streamDataOutput = booleanOutputStream.getStreamDataOutput(column); // rewrite the DATA stream created by the boolean output stream to a PRESENT stream Stream stream = new Stream(column, PRESENT, toIntExact(streamDataOutput.size()), streamDataOutput.getStream().isUseVInts()); return Optional.of(new StreamDataOutput( sliceOutput -> { streamDataOutput.writeData(sliceOutput); return stream.getLength(); }, stream)); }
private static boolean isDictionary(Stream stream, ColumnEncodingKind columnEncoding) { return stream.getStreamKind() == DICTIONARY_DATA || (stream.getStreamKind() == LENGTH && (columnEncoding == DICTIONARY || columnEncoding == DICTIONARY_V2)); }
private static Map<StreamId, DiskRange> getDiskRanges(List<Stream> streams) { ImmutableMap.Builder<StreamId, DiskRange> streamDiskRanges = ImmutableMap.builder(); long stripeOffset = 0; for (Stream stream : streams) { int streamLength = toIntExact(stream.getLength()); // ignore zero byte streams if (streamLength > 0) { streamDiskRanges.put(new StreamId(stream), new DiskRange(stripeOffset, streamLength)); } stripeOffset += streamLength; } return streamDiskRanges.build(); }
public InputStreamSources createDictionaryStreamSources(Map<StreamId, Stream> streams, Map<StreamId, ValueInputStream<?>> valueStreams, List<ColumnEncoding> columnEncodings) { ImmutableMap.Builder<StreamId, InputStreamSource<?>> dictionaryStreamBuilder = ImmutableMap.builder(); for (Entry<StreamId, Stream> entry : streams.entrySet()) { StreamId streamId = entry.getKey(); Stream stream = entry.getValue(); int column = stream.getColumn(); // only process dictionary streams ColumnEncodingKind columnEncoding = columnEncodings.get(column) .getColumnEncoding(stream.getSequence()) .getColumnEncodingKind(); if (!isDictionary(stream, columnEncoding)) { continue; } // skip streams without data ValueInputStream<?> valueStream = valueStreams.get(streamId); if (valueStream == null) { continue; } OrcTypeKind columnType = types.get(stream.getColumn()).getOrcTypeKind(); StreamCheckpoint streamCheckpoint = getDictionaryStreamCheckpoint(streamId, columnType, columnEncoding); InputStreamSource<?> streamSource = createCheckpointStreamSource(valueStream, streamCheckpoint); dictionaryStreamBuilder.put(streamId, streamSource); } return new InputStreamSources(dictionaryStreamBuilder.build()); }
streamDataOutput.writeData(sliceOutput); Stream stream = streamDataOutput.getStream(); assertEquals(stream.getStreamKind(), StreamKind.DATA); assertEquals(stream.getColumn(), 33); assertEquals(stream.getLength(), sliceOutput.size());