private static ValueInputStream<?> createLongStream( OrcInputStream inputStream, ColumnEncodingKind encoding, OrcTypeKind type, boolean signed, boolean usesVInt) { if (encoding == DIRECT_V2 || encoding == DICTIONARY_V2) { return new LongInputStreamV2(inputStream, signed, false); } else if (encoding == DIRECT || encoding == DICTIONARY) { return new LongInputStreamV1(inputStream, signed); } else if (encoding == DWRF_DIRECT) { return new LongInputStreamDwrf(inputStream, type, signed, usesVInt); } else { throw new IllegalArgumentException("Unsupported encoding for long stream: " + encoding); } } }
@Override protected Long readValue(LongInputStreamV2 valueStream) throws IOException { return valueStream.next(); } }
private void readValues() throws IOException { lastReadInputCheckpoint = input.getCheckpoint(); // read the first 2 bits and determine the encoding type int firstByte = input.read(); if (firstByte < 0) { throw new OrcCorruptionException(input.getOrcDataSourceId(), "Read past end of RLE integer"); } int enc = (firstByte >>> 6) & 0x03; if (EncodingType.SHORT_REPEAT.ordinal() == enc) { readShortRepeatValues(firstByte); } else if (EncodingType.DIRECT.ordinal() == enc) { readDirectValues(firstByte); } else if (EncodingType.PATCHED_BASE.ordinal() == enc) { readPatchedBaseValues(firstByte); } else { readDeltaValues(firstByte); } }
@Override public long next() throws IOException { if (used == numLiterals) { numLiterals = 0; used = 0; readValues(); } return literals[used++]; }
@Override public void seekToCheckpoint(LongStreamCheckpoint checkpoint) throws IOException { LongStreamV2Checkpoint v2Checkpoint = (LongStreamV2Checkpoint) checkpoint; // if the checkpoint is within the current buffer, just adjust the pointer if (lastReadInputCheckpoint == v2Checkpoint.getInputStreamCheckpoint() && v2Checkpoint.getOffset() <= numLiterals) { used = v2Checkpoint.getOffset(); } else { // otherwise, discard the buffer and start over input.seekToCheckpoint(v2Checkpoint.getInputStreamCheckpoint()); numLiterals = 0; used = 0; skip(v2Checkpoint.getOffset()); } }
@Override public void skip(long items) throws IOException { while (items > 0) { if (used == numLiterals) { numLiterals = 0; used = 0; readValues(); } long consume = Math.min(items, numLiterals - used); used += consume; items -= consume; } } }
@Override public void seekToCheckpoint(LongStreamCheckpoint checkpoint) throws IOException { LongStreamV2Checkpoint v2Checkpoint = (LongStreamV2Checkpoint) checkpoint; // if the checkpoint is within the current buffer, just adjust the pointer if (lastReadInputCheckpoint == v2Checkpoint.getInputStreamCheckpoint() && v2Checkpoint.getOffset() <= numLiterals) { used = v2Checkpoint.getOffset(); } else { // otherwise, discard the buffer and start over input.seekToCheckpoint(v2Checkpoint.getInputStreamCheckpoint()); numLiterals = 0; used = 0; skip(v2Checkpoint.getOffset()); } }
private void readValues() throws IOException { lastReadInputCheckpoint = input.getCheckpoint(); // read the first 2 bits and determine the encoding type int firstByte = input.read(); if (firstByte < 0) { throw new OrcCorruptionException(input.getOrcDataSourceId(), "Read past end of RLE integer"); } int enc = (firstByte >>> 6) & 0x03; if (EncodingType.SHORT_REPEAT.ordinal() == enc) { readShortRepeatValues(firstByte); } else if (EncodingType.DIRECT.ordinal() == enc) { readDirectValues(firstByte); } else if (EncodingType.PATCHED_BASE.ordinal() == enc) { readPatchedBaseValues(firstByte); } else { readDeltaValues(firstByte); } }
@Override protected LongInputStreamV2 createValueStream(Slice slice) throws OrcCorruptionException { Optional<OrcDecompressor> orcDecompressor = createOrcDecompressor(ORC_DATA_SOURCE_ID, SNAPPY, COMPRESSION_BLOCK_SIZE); OrcInputStream input = new OrcInputStream(ORC_DATA_SOURCE_ID, slice.getInput(), orcDecompressor, newSimpleAggregatedMemoryContext(), slice.getRetainedSize()); return new LongInputStreamV2(input, true, false); }
@Override public long next() throws IOException { if (used == numLiterals) { numLiterals = 0; used = 0; readValues(); } return literals[used++]; }
@Override protected Long readValue(LongInputStreamV2 valueStream) throws IOException { return valueStream.next(); } }
private static ValueInputStream<?> createLongStream( OrcInputStream inputStream, ColumnEncodingKind encoding, OrcTypeKind type, boolean signed, boolean usesVInt) { if (encoding == DIRECT_V2 || encoding == DICTIONARY_V2) { return new LongInputStreamV2(inputStream, signed, false); } else if (encoding == DIRECT || encoding == DICTIONARY) { return new LongInputStreamV1(inputStream, signed); } else if (encoding == DWRF_DIRECT) { return new LongInputStreamDwrf(inputStream, type, signed, usesVInt); } else { throw new IllegalArgumentException("Unsupported encoding for long stream: " + encoding); } } }
@Override public void skip(long items) throws IOException { while (items > 0) { if (used == numLiterals) { numLiterals = 0; used = 0; readValues(); } long consume = Math.min(items, numLiterals - used); used += consume; items -= consume; } } }
@Override protected LongInputStreamV2 createValueStream(Slice slice) throws OrcCorruptionException { Optional<OrcDecompressor> orcDecompressor = createOrcDecompressor(ORC_DATA_SOURCE_ID, SNAPPY, COMPRESSION_BLOCK_SIZE); OrcInputStream input = new OrcInputStream(ORC_DATA_SOURCE_ID, slice.getInput(), orcDecompressor, newSimpleAggregatedMemoryContext(), slice.getRetainedSize()); return new LongInputStreamV2(input, true, false); }