public MapColumnWriter(int column, CompressionKind compression, int bufferSize, OrcEncoding orcEncoding, ColumnWriter keyWriter, ColumnWriter valueWriter) { checkArgument(column >= 0, "column is negative"); this.column = column; this.compressed = requireNonNull(compression, "compression is null") != NONE; this.columnEncoding = new ColumnEncoding(orcEncoding == DWRF ? DIRECT : DIRECT_V2, 0); this.keyWriter = requireNonNull(keyWriter, "keyWriter is null"); this.valueWriter = requireNonNull(valueWriter, "valueWriter is null"); this.lengthStream = createLengthOutputStream(compression, bufferSize, orcEncoding); this.presentStream = new PresentOutputStream(compression, bufferSize); }
private static OrcProto.ColumnEncoding toColumnEncoding(ColumnEncoding columnEncodings) { checkArgument( !columnEncodings.getAdditionalSequenceEncodings().isPresent(), "Writing columns with non-zero sequence IDs is not supported in ORC: " + columnEncodings); return OrcProto.ColumnEncoding.newBuilder() .setKind(toColumnEncoding(columnEncodings.getColumnEncodingKind())) .setDictionarySize(columnEncodings.getDictionarySize()) .build(); }
@Override public void startStripe(InputStreamSources dictionaryStreamSources, List<ColumnEncoding> encoding) throws IOException { ColumnEncodingKind kind = encoding.get(streamDescriptor.getStreamId()) .getColumnEncoding(streamDescriptor.getSequence()) .getColumnEncodingKind(); if (kind == DIRECT || kind == DIRECT_V2 || kind == DWRF_DIRECT) { currentReader = directReader; } else if (kind == DWRF_MAP_FLAT) { currentReader = flatReader; } else { throw new IllegalArgumentException("Unsupported encoding " + kind); } currentReader.startStripe(dictionaryStreamSources, encoding); }
@Override public void startStripe(InputStreamSources dictionaryStreamSources, List<ColumnEncoding> encoding) { dictionaryDataStreamSource = dictionaryStreamSources.getInputStreamSource(streamDescriptor, DICTIONARY_DATA, LongInputStream.class); dictionarySize = encoding.get(streamDescriptor.getStreamId()) .getColumnEncoding(streamDescriptor.getSequence()) .getDictionarySize(); dictionaryOpen = false; inDictionaryStreamSource = missingStreamSource(BooleanInputStream.class); presentStreamSource = missingStreamSource(BooleanInputStream.class); dataStreamSource = missingStreamSource(LongInputStream.class); readOffset = 0; nextBatchSize = 0; presentStream = null; inDictionaryStream = null; dataStream = null; rowGroupOpen = false; }
List<Integer> positionsList = entry.getValue().get(rowGroupId).getPositions(); ColumnEncodingKind columnEncoding = columnEncodings.get(column).getColumnEncodingKind(); OrcTypeKind columnType = columnTypes.get(column).getOrcTypeKind(); Set<StreamKind> availableStreams = streamKinds.get(column);
ColumnEncoding columnEncoding = columnEncodings.get(stream.getColumn()); if (columnEncoding.getColumnEncodingKind() == DICTIONARY) { hasRowGroupDictionary = true; Optional<List<DwrfSequenceEncoding>> additionalSequenceEncodings = columnEncoding.getAdditionalSequenceEncodings(); if (additionalSequenceEncodings.isPresent() && additionalSequenceEncodings.get().stream() .map(DwrfSequenceEncoding::getValueEncoding) .anyMatch(encoding -> encoding.getColumnEncodingKind() == DICTIONARY)) { hasRowGroupDictionary = true;
@Override public void startStripe(StreamSources dictionaryStreamSources, List<ColumnEncoding> encoding) throws IOException { dictionaryDataStreamSource = dictionaryStreamSources.getStreamSource(streamDescriptor, DICTIONARY_DATA, LongStream.class); dictionarySize = encoding.get(streamDescriptor.getStreamId()).getDictionarySize(); dictionaryOpen = false; inDictionaryStreamSource = missingStreamSource(BooleanStream.class); presentStreamSource = missingStreamSource(BooleanStream.class); dataStreamSource = missingStreamSource(LongStream.class); readOffset = 0; nextBatchSize = 0; presentStream = null; inDictionaryStream = null; dataStream = null; rowGroupOpen = false; }
@Override public void startStripe(InputStreamSources dictionaryStreamSources, List<ColumnEncoding> encodings) throws IOException { presentStreamSource = missingStreamSource(BooleanInputStream.class); inMapStreamSources.clear(); valueStreamDescriptors.clear(); valueStreamReaders.clear(); ColumnEncoding encoding = encodings.get(baseValueStreamDescriptor.getStreamId()); // encoding.getAdditionalSequenceEncodings() may not be present when every map is empty or null List<DwrfSequenceEncoding> additionalSequenceEncodings = encoding.getAdditionalSequenceEncodings().orElse(Collections.emptyList()); // The ColumnEncoding with sequence ID 0 doesn't have any data associated with it for (int sequence = 1; sequence <= additionalSequenceEncodings.size(); sequence++) { inMapStreamSources.add(missingStreamSource(BooleanInputStream.class)); StreamDescriptor valueStreamDescriptor = copyStreamDescriptorWithSequence(baseValueStreamDescriptor, sequence); valueStreamDescriptors.add(valueStreamDescriptor); StreamReader valueStreamReader = StreamReaders.createStreamReader(valueStreamDescriptor, hiveStorageTimeZone, systemMemoryContext); valueStreamReader.startStripe(dictionaryStreamSources, encodings); valueStreamReaders.add(valueStreamReader); } keyBlockTemplate = getKeyBlockTemplate(additionalSequenceEncodings); readOffset = 0; nextBatchSize = 0; presentStream = null; rowGroupOpen = false; }
@Override public void startStripe(InputStreamSources dictionaryStreamSources, List<ColumnEncoding> encoding) { stripeDictionaryDataStreamSource = dictionaryStreamSources.getInputStreamSource(streamDescriptor, DICTIONARY_DATA, ByteArrayInputStream.class); stripeDictionaryLengthStreamSource = dictionaryStreamSources.getInputStreamSource(streamDescriptor, LENGTH, LongInputStream.class); stripeDictionarySize = encoding.get(streamDescriptor.getStreamId()) .getColumnEncoding(streamDescriptor.getSequence()) .getDictionarySize(); stripeDictionaryOpen = false; presentStreamSource = missingStreamSource(BooleanInputStream.class); dataStreamSource = missingStreamSource(LongInputStream.class); inDictionaryStreamSource = missingStreamSource(BooleanInputStream.class); rowGroupDictionaryLengthStreamSource = missingStreamSource(RowGroupDictionaryLengthInputStream.class); rowGroupDictionaryDataStreamSource = missingStreamSource(ByteArrayInputStream.class); readOffset = 0; nextBatchSize = 0; presentStream = null; inDictionaryStream = null; dataStream = null; rowGroupOpen = false; }
@Override public void startStripe(StreamSources dictionaryStreamSources, List<ColumnEncoding> encoding) throws IOException { ColumnEncodingKind columnEncodingKind = encoding.get(streamDescriptor.getStreamId()).getColumnEncodingKind(); if (columnEncodingKind == DIRECT || columnEncodingKind == DIRECT_V2 || columnEncodingKind == DWRF_DIRECT) { currentReader = directReader; } else if (columnEncodingKind == DICTIONARY || columnEncodingKind == DICTIONARY_V2) { currentReader = dictionaryReader; } else { throw new IllegalArgumentException("Unsupported encoding " + columnEncodingKind); } currentReader.startStripe(dictionaryStreamSources, encoding); }
ColumnEncoding columnEncoding = columnEncodings.get(stream.getColumn()); if (columnEncoding.getColumnEncodingKind() == DICTIONARY) { hasRowGroupDictionary = true; Optional<List<DwrfSequenceEncoding>> additionalSequenceEncodings = columnEncoding.getAdditionalSequenceEncodings(); if (additionalSequenceEncodings.isPresent() && additionalSequenceEncodings.get().stream() .map(DwrfSequenceEncoding::getValueEncoding) .anyMatch(encoding -> encoding.getColumnEncodingKind() == DICTIONARY)) { hasRowGroupDictionary = true;
@Override public void startStripe(StreamSources dictionaryStreamSources, List<ColumnEncoding> encoding) throws IOException { dictionaryDataStreamSource = dictionaryStreamSources.getStreamSource(streamDescriptor, DICTIONARY_DATA, ByteArrayStream.class); dictionaryLengthStreamSource = dictionaryStreamSources.getStreamSource(streamDescriptor, LENGTH, LongStream.class); dictionarySize = encoding.get(streamDescriptor.getStreamId()).getDictionarySize(); dictionaryOpen = false; presentStreamSource = missingStreamSource(BooleanStream.class); dataStreamSource = missingStreamSource(LongStream.class); inDictionaryStreamSource = missingStreamSource(BooleanStream.class); rowGroupDictionaryLengthStreamSource = missingStreamSource(RowGroupDictionaryLengthStream.class); rowGroupDictionaryDataStreamSource = missingStreamSource(ByteArrayStream.class); readOffset = 0; nextBatchSize = 0; presentStream = null; inDictionaryStream = null; dataStream = null; rowGroupOpen = false; }
@Override public void startStripe(InputStreamSources dictionaryStreamSources, List<ColumnEncoding> encodings) throws IOException { presentStreamSource = missingStreamSource(BooleanInputStream.class); inMapStreamSources.clear(); valueStreamDescriptors.clear(); valueStreamReaders.clear(); ColumnEncoding encoding = encodings.get(baseValueStreamDescriptor.getStreamId()); // encoding.getAdditionalSequenceEncodings() may not be present when every map is empty or null List<DwrfSequenceEncoding> additionalSequenceEncodings = encoding.getAdditionalSequenceEncodings().orElse(Collections.emptyList()); // The ColumnEncoding with sequence ID 0 doesn't have any data associated with it for (int sequence = 1; sequence <= additionalSequenceEncodings.size(); sequence++) { inMapStreamSources.add(missingStreamSource(BooleanInputStream.class)); StreamDescriptor valueStreamDescriptor = copyStreamDescriptorWithSequence(baseValueStreamDescriptor, sequence); valueStreamDescriptors.add(valueStreamDescriptor); StreamReader valueStreamReader = StreamReaders.createStreamReader(valueStreamDescriptor, hiveStorageTimeZone, systemMemoryContext); valueStreamReader.startStripe(dictionaryStreamSources, encodings); valueStreamReaders.add(valueStreamReader); } keyBlockTemplate = getKeyBlockTemplate(additionalSequenceEncodings); readOffset = 0; nextBatchSize = 0; presentStream = null; rowGroupOpen = false; }
private static DwrfProto.ColumnEncoding toColumnEncoding(ColumnEncoding columnEncodings) { checkArgument( !columnEncodings.getAdditionalSequenceEncodings().isPresent(), "DWRF writer doesn't support writing columns with non-zero sequence IDs: " + columnEncodings); return DwrfProto.ColumnEncoding.newBuilder() .setKind(toColumnEncoding(columnEncodings.getColumnEncodingKind())) .setDictionarySize(columnEncodings.getDictionarySize()) .build(); }
public ListColumnWriter(int column, CompressionKind compression, int bufferSize, OrcEncoding orcEncoding, ColumnWriter elementWriter) { checkArgument(column >= 0, "column is negative"); this.column = column; this.compressed = requireNonNull(compression, "compression is null") != NONE; this.columnEncoding = new ColumnEncoding(orcEncoding == DWRF ? DIRECT : DIRECT_V2, 0); this.elementWriter = requireNonNull(elementWriter, "elementWriter is null"); this.lengthStream = createLengthOutputStream(compression, bufferSize, orcEncoding); this.presentStream = new PresentOutputStream(compression, bufferSize); }
@Override public void startStripe(InputStreamSources dictionaryStreamSources, List<ColumnEncoding> encoding) throws IOException { ColumnEncodingKind columnEncodingKind = encoding.get(streamDescriptor.getStreamId()) .getColumnEncoding(streamDescriptor.getSequence()) .getColumnEncodingKind(); if (columnEncodingKind == DIRECT || columnEncodingKind == DIRECT_V2 || columnEncodingKind == DWRF_DIRECT) { currentReader = directReader; } else if (columnEncodingKind == DICTIONARY || columnEncodingKind == DICTIONARY_V2) { currentReader = dictionaryReader; } else { throw new IllegalArgumentException("Unsupported encoding " + columnEncodingKind); } currentReader.startStripe(dictionaryStreamSources, encoding); }
@Override public void startStripe(InputStreamSources dictionaryStreamSources, List<ColumnEncoding> encoding) { dictionaryDataStreamSource = dictionaryStreamSources.getInputStreamSource(streamDescriptor, DICTIONARY_DATA, LongInputStream.class); dictionarySize = encoding.get(streamDescriptor.getStreamId()) .getColumnEncoding(streamDescriptor.getSequence()) .getDictionarySize(); dictionaryOpen = false; inDictionaryStreamSource = missingStreamSource(BooleanInputStream.class); presentStreamSource = missingStreamSource(BooleanInputStream.class); dataStreamSource = missingStreamSource(LongInputStream.class); readOffset = 0; nextBatchSize = 0; presentStream = null; inDictionaryStream = null; dataStream = null; rowGroupOpen = false; }
@Override public void startStripe(StreamSources dictionaryStreamSources, List<ColumnEncoding> encoding) throws IOException { ColumnEncodingKind kind = encoding.get(streamDescriptor.getStreamId()).getColumnEncodingKind(); if (kind == DIRECT || kind == DIRECT_V2 || kind == DWRF_DIRECT) { currentReader = directReader; } else if (kind == DICTIONARY) { currentReader = dictionaryReader; } else { throw new IllegalArgumentException("Unsupported encoding " + kind); } currentReader.startStripe(dictionaryStreamSources, encoding); }
private static OrcProto.ColumnEncoding toColumnEncoding(ColumnEncoding columnEncodings) { checkArgument( !columnEncodings.getAdditionalSequenceEncodings().isPresent(), "Writing columns with non-zero sequence IDs is not supported in ORC: " + columnEncodings); return OrcProto.ColumnEncoding.newBuilder() .setKind(toColumnEncoding(columnEncodings.getColumnEncodingKind())) .setDictionarySize(columnEncodings.getDictionarySize()) .build(); }
public LongColumnWriter(int column, Type type, CompressionKind compression, int bufferSize, OrcEncoding orcEncoding, Supplier<LongValueStatisticsBuilder> statisticsBuilderSupplier) { checkArgument(column >= 0, "column is negative"); this.column = column; this.type = requireNonNull(type, "type is null"); this.compressed = requireNonNull(compression, "compression is null") != NONE; if (orcEncoding == DWRF) { this.columnEncoding = new ColumnEncoding(DIRECT, 0); this.dataStream = new LongOutputStreamDwrf(compression, bufferSize, true, DATA); } else { this.columnEncoding = new ColumnEncoding(DIRECT_V2, 0); this.dataStream = new LongOutputStreamV2(compression, bufferSize, true, DATA); } this.presentStream = new PresentOutputStream(compression, bufferSize); this.statisticsBuilderSupplier = requireNonNull(statisticsBuilderSupplier, "statisticsBuilderSupplier is null"); this.statisticsBuilder = statisticsBuilderSupplier.get(); }