@Override public void startStripe(InputStreamSources dictionaryStreamSources, List<ColumnEncoding> encoding) throws IOException { ColumnEncodingKind kind = encoding.get(streamDescriptor.getStreamId()) .getColumnEncoding(streamDescriptor.getSequence()) .getColumnEncodingKind(); if (kind == DIRECT || kind == DIRECT_V2 || kind == DWRF_DIRECT) { currentReader = directReader; } else if (kind == DWRF_MAP_FLAT) { currentReader = flatReader; } else { throw new IllegalArgumentException("Unsupported encoding " + kind); } currentReader.startStripe(dictionaryStreamSources, encoding); }
@Override public void startStripe(InputStreamSources dictionaryStreamSources, List<ColumnEncoding> encoding) throws IOException { ColumnEncodingKind columnEncodingKind = encoding.get(streamDescriptor.getStreamId()) .getColumnEncoding(streamDescriptor.getSequence()) .getColumnEncodingKind(); if (columnEncodingKind == DIRECT || columnEncodingKind == DIRECT_V2 || columnEncodingKind == DWRF_DIRECT) { currentReader = directReader; } else if (columnEncodingKind == DICTIONARY || columnEncodingKind == DICTIONARY_V2) { currentReader = dictionaryReader; } else { throw new IllegalArgumentException("Unsupported encoding " + columnEncodingKind); } currentReader.startStripe(dictionaryStreamSources, encoding); }
@Override public void startStripe(InputStreamSources dictionaryStreamSources, List<ColumnEncoding> encoding) throws IOException { ColumnEncodingKind kind = encoding.get(streamDescriptor.getStreamId()) .getColumnEncoding(streamDescriptor.getSequence()) .getColumnEncodingKind(); if (kind == DIRECT || kind == DIRECT_V2 || kind == DWRF_DIRECT) { currentReader = directReader; } else if (kind == DICTIONARY) { currentReader = dictionaryReader; } else { throw new IllegalArgumentException("Unsupported encoding " + kind); } currentReader.startStripe(dictionaryStreamSources, encoding); }
private Map<StreamId, ValueInputStream<?>> createValueStreams(Map<StreamId, Stream> streams, Map<StreamId, OrcInputStream> streamsData, List<ColumnEncoding> columnEncodings) { ImmutableMap.Builder<StreamId, ValueInputStream<?>> valueStreams = ImmutableMap.builder(); for (Entry<StreamId, Stream> entry : streams.entrySet()) { StreamId streamId = entry.getKey(); Stream stream = entry.getValue(); ColumnEncodingKind columnEncoding = columnEncodings.get(stream.getColumn()) .getColumnEncoding(stream.getSequence()) .getColumnEncodingKind(); // skip index and empty streams if (isIndexStream(stream) || stream.getLength() == 0) { continue; } OrcInputStream inputStream = streamsData.get(streamId); OrcTypeKind columnType = types.get(stream.getColumn()).getOrcTypeKind(); valueStreams.put(streamId, ValueStreams.createValueStreams(streamId, inputStream, columnType, columnEncoding, stream.isUseVInts())); } return valueStreams.build(); }
public InputStreamSources createDictionaryStreamSources(Map<StreamId, Stream> streams, Map<StreamId, ValueInputStream<?>> valueStreams, List<ColumnEncoding> columnEncodings) { ImmutableMap.Builder<StreamId, InputStreamSource<?>> dictionaryStreamBuilder = ImmutableMap.builder(); for (Entry<StreamId, Stream> entry : streams.entrySet()) { StreamId streamId = entry.getKey(); Stream stream = entry.getValue(); int column = stream.getColumn(); // only process dictionary streams ColumnEncodingKind columnEncoding = columnEncodings.get(column) .getColumnEncoding(stream.getSequence()) .getColumnEncodingKind(); if (!isDictionary(stream, columnEncoding)) { continue; } // skip streams without data ValueInputStream<?> valueStream = valueStreams.get(streamId); if (valueStream == null) { continue; } OrcTypeKind columnType = types.get(stream.getColumn()).getOrcTypeKind(); StreamCheckpoint streamCheckpoint = getDictionaryStreamCheckpoint(streamId, columnType, columnEncoding); InputStreamSource<?> streamSource = createCheckpointStreamSource(valueStream, streamCheckpoint); dictionaryStreamBuilder.put(streamId, streamSource); } return new InputStreamSources(dictionaryStreamBuilder.build()); }
private static OrcProto.ColumnEncoding toColumnEncoding(ColumnEncoding columnEncodings) { checkArgument( !columnEncodings.getAdditionalSequenceEncodings().isPresent(), "Writing columns with non-zero sequence IDs is not supported in ORC: " + columnEncodings); return OrcProto.ColumnEncoding.newBuilder() .setKind(toColumnEncoding(columnEncodings.getColumnEncodingKind())) .setDictionarySize(columnEncodings.getDictionarySize()) .build(); }
private static DwrfProto.ColumnEncoding toColumnEncoding(ColumnEncoding columnEncodings) { checkArgument( !columnEncodings.getAdditionalSequenceEncodings().isPresent(), "DWRF writer doesn't support writing columns with non-zero sequence IDs: " + columnEncodings); return DwrfProto.ColumnEncoding.newBuilder() .setKind(toColumnEncoding(columnEncodings.getColumnEncodingKind())) .setDictionarySize(columnEncodings.getDictionarySize()) .build(); }
List<Integer> positionsList = entry.getValue().get(rowGroupId).getPositions(); ColumnEncodingKind columnEncoding = columnEncodings.get(column).getColumnEncodingKind(); OrcTypeKind columnType = columnTypes.get(column).getOrcTypeKind(); Set<StreamKind> availableStreams = streamKinds.get(column);
ColumnEncoding columnEncoding = columnEncodings.get(stream.getColumn()); if (columnEncoding.getColumnEncodingKind() == DICTIONARY) { hasRowGroupDictionary = true; && additionalSequenceEncodings.get().stream() .map(DwrfSequenceEncoding::getValueEncoding) .anyMatch(encoding -> encoding.getColumnEncodingKind() == DICTIONARY)) { hasRowGroupDictionary = true;
@Override public void startStripe(StreamSources dictionaryStreamSources, List<ColumnEncoding> encoding) throws IOException { ColumnEncodingKind columnEncodingKind = encoding.get(streamDescriptor.getStreamId()).getColumnEncodingKind(); if (columnEncodingKind == DIRECT || columnEncodingKind == DIRECT_V2 || columnEncodingKind == DWRF_DIRECT) { currentReader = directReader; } else if (columnEncodingKind == DICTIONARY || columnEncodingKind == DICTIONARY_V2) { currentReader = dictionaryReader; } else { throw new IllegalArgumentException("Unsupported encoding " + columnEncodingKind); } currentReader.startStripe(dictionaryStreamSources, encoding); }
@Override public void startStripe(StreamSources dictionaryStreamSources, List<ColumnEncoding> encoding) throws IOException { ColumnEncodingKind kind = encoding.get(streamDescriptor.getStreamId()).getColumnEncodingKind(); if (kind == DIRECT || kind == DIRECT_V2 || kind == DWRF_DIRECT) { currentReader = directReader; } else if (kind == DICTIONARY) { currentReader = dictionaryReader; } else { throw new IllegalArgumentException("Unsupported encoding " + kind); } currentReader.startStripe(dictionaryStreamSources, encoding); }
private Map<StreamId, ValueStream<?>> createValueStreams(Map<StreamId, Stream> streams, Map<StreamId, OrcInputStream> streamsData, List<ColumnEncoding> columnEncodings) { ImmutableMap.Builder<StreamId, ValueStream<?>> valueStreams = ImmutableMap.builder(); for (Entry<StreamId, Stream> entry : streams.entrySet()) { StreamId streamId = entry.getKey(); Stream stream = entry.getValue(); ColumnEncodingKind columnEncoding = columnEncodings.get(stream.getColumn()).getColumnEncodingKind(); // skip index and empty streams if (isIndexStream(stream) || stream.getLength() == 0) { continue; } OrcInputStream inputStream = streamsData.get(streamId); OrcTypeKind columnType = types.get(stream.getColumn()).getOrcTypeKind(); valueStreams.put(streamId, ValueStreams.createValueStreams(streamId, inputStream, columnType, columnEncoding, stream.isUseVInts())); } return valueStreams.build(); }
public StreamSources createDictionaryStreamSources(Map<StreamId, Stream> streams, Map<StreamId, ValueStream<?>> valueStreams, List<ColumnEncoding> columnEncodings) { ImmutableMap.Builder<StreamId, StreamSource<?>> dictionaryStreamBuilder = ImmutableMap.builder(); for (Entry<StreamId, Stream> entry : streams.entrySet()) { StreamId streamId = entry.getKey(); Stream stream = entry.getValue(); int column = stream.getColumn(); // only process dictionary streams ColumnEncodingKind columnEncoding = columnEncodings.get(column).getColumnEncodingKind(); if (!isDictionary(stream, columnEncoding)) { continue; } // skip streams without data ValueStream<?> valueStream = valueStreams.get(streamId); if (valueStream == null) { continue; } OrcTypeKind columnType = types.get(stream.getColumn()).getOrcTypeKind(); StreamCheckpoint streamCheckpoint = getDictionaryStreamCheckpoint(streamId, columnType, columnEncoding); StreamSource<?> streamSource = createCheckpointStreamSource(valueStream, streamCheckpoint); dictionaryStreamBuilder.put(streamId, streamSource); } return new StreamSources(dictionaryStreamBuilder.build()); }
@Override public void startStripe(InputStreamSources dictionaryStreamSources, List<ColumnEncoding> encoding) throws IOException { ColumnEncodingKind kind = encoding.get(streamDescriptor.getStreamId()) .getColumnEncoding(streamDescriptor.getSequence()) .getColumnEncodingKind(); if (kind == DIRECT || kind == DIRECT_V2 || kind == DWRF_DIRECT) { currentReader = directReader; } else if (kind == DWRF_MAP_FLAT) { currentReader = flatReader; } else { throw new IllegalArgumentException("Unsupported encoding " + kind); } currentReader.startStripe(dictionaryStreamSources, encoding); }
@Override public void startStripe(InputStreamSources dictionaryStreamSources, List<ColumnEncoding> encoding) throws IOException { ColumnEncodingKind kind = encoding.get(streamDescriptor.getStreamId()) .getColumnEncoding(streamDescriptor.getSequence()) .getColumnEncodingKind(); if (kind == DIRECT || kind == DIRECT_V2 || kind == DWRF_DIRECT) { currentReader = directReader; } else if (kind == DICTIONARY) { currentReader = dictionaryReader; } else { throw new IllegalArgumentException("Unsupported encoding " + kind); } currentReader.startStripe(dictionaryStreamSources, encoding); }
private Map<StreamId, ValueInputStream<?>> createValueStreams(Map<StreamId, Stream> streams, Map<StreamId, OrcInputStream> streamsData, List<ColumnEncoding> columnEncodings) { ImmutableMap.Builder<StreamId, ValueInputStream<?>> valueStreams = ImmutableMap.builder(); for (Entry<StreamId, Stream> entry : streams.entrySet()) { StreamId streamId = entry.getKey(); Stream stream = entry.getValue(); ColumnEncodingKind columnEncoding = columnEncodings.get(stream.getColumn()) .getColumnEncoding(stream.getSequence()) .getColumnEncodingKind(); // skip index and empty streams if (isIndexStream(stream) || stream.getLength() == 0) { continue; } OrcInputStream inputStream = streamsData.get(streamId); OrcTypeKind columnType = types.get(stream.getColumn()).getOrcTypeKind(); valueStreams.put(streamId, ValueStreams.createValueStreams(streamId, inputStream, columnType, columnEncoding, stream.isUseVInts())); } return valueStreams.build(); }
@Override public void startStripe(InputStreamSources dictionaryStreamSources, List<ColumnEncoding> encoding) throws IOException { ColumnEncodingKind columnEncodingKind = encoding.get(streamDescriptor.getStreamId()) .getColumnEncoding(streamDescriptor.getSequence()) .getColumnEncodingKind(); if (columnEncodingKind == DIRECT || columnEncodingKind == DIRECT_V2 || columnEncodingKind == DWRF_DIRECT) { currentReader = directReader; } else if (columnEncodingKind == DICTIONARY || columnEncodingKind == DICTIONARY_V2) { currentReader = dictionaryReader; } else { throw new IllegalArgumentException("Unsupported encoding " + columnEncodingKind); } currentReader.startStripe(dictionaryStreamSources, encoding); }
public InputStreamSources createDictionaryStreamSources(Map<StreamId, Stream> streams, Map<StreamId, ValueInputStream<?>> valueStreams, List<ColumnEncoding> columnEncodings) { ImmutableMap.Builder<StreamId, InputStreamSource<?>> dictionaryStreamBuilder = ImmutableMap.builder(); for (Entry<StreamId, Stream> entry : streams.entrySet()) { StreamId streamId = entry.getKey(); Stream stream = entry.getValue(); int column = stream.getColumn(); // only process dictionary streams ColumnEncodingKind columnEncoding = columnEncodings.get(column) .getColumnEncoding(stream.getSequence()) .getColumnEncodingKind(); if (!isDictionary(stream, columnEncoding)) { continue; } // skip streams without data ValueInputStream<?> valueStream = valueStreams.get(streamId); if (valueStream == null) { continue; } OrcTypeKind columnType = types.get(stream.getColumn()).getOrcTypeKind(); StreamCheckpoint streamCheckpoint = getDictionaryStreamCheckpoint(streamId, columnType, columnEncoding); InputStreamSource<?> streamSource = createCheckpointStreamSource(valueStream, streamCheckpoint); dictionaryStreamBuilder.put(streamId, streamSource); } return new InputStreamSources(dictionaryStreamBuilder.build()); }
private static OrcProto.ColumnEncoding toColumnEncoding(ColumnEncoding columnEncodings) { checkArgument( !columnEncodings.getAdditionalSequenceEncodings().isPresent(), "Writing columns with non-zero sequence IDs is not supported in ORC: " + columnEncodings); return OrcProto.ColumnEncoding.newBuilder() .setKind(toColumnEncoding(columnEncodings.getColumnEncodingKind())) .setDictionarySize(columnEncodings.getDictionarySize()) .build(); }
private static DwrfProto.ColumnEncoding toColumnEncoding(ColumnEncoding columnEncodings) { checkArgument( !columnEncodings.getAdditionalSequenceEncodings().isPresent(), "DWRF writer doesn't support writing columns with non-zero sequence IDs: " + columnEncodings); return DwrfProto.ColumnEncoding.newBuilder() .setKind(toColumnEncoding(columnEncodings.getColumnEncodingKind())) .setDictionarySize(columnEncodings.getDictionarySize()) .build(); }