private void checkEndOfRowGroup() throws IOException { if (rowsReturned != totalCountLoadedSoFar) return; PageReadStore pages = reader.readNextRowGroup(); if (pages == null) { throw new IOException("expecting more rows but reached last block. Read " + rowsReturned + " out of " + totalRowCount); } List<ColumnDescriptor> columns = requestedSchema.getColumns(); columnReaders = new VectorizedColumnReader[columns.size()]; for (int i = 0; i < columns.size(); ++i) { if (missingColumns[i]) continue; columnReaders[i] = new VectorizedColumnReader(columns.get(i), pages.getPageReader(columns.get(i))); } totalCountLoadedSoFar += pages.getRowCount(); } }
readPage(); leftInPage = (int) (endOfPageValueCount - valuesRead); decodeDictionaryIds(rowId, num, column, dictionaryIds); decodeDictionaryIds(0, rowId, column, column.getDictionaryIds()); readBooleanBatch(rowId, num, column); break; case INT32: readIntBatch(rowId, num, column); break; case INT64: readLongBatch(rowId, num, column); break; case INT96: readBinaryBatch(rowId, num, column); break; case FLOAT: readFloatBatch(rowId, num, column); break; case DOUBLE: readDoubleBatch(rowId, num, column); break; case BINARY: readBinaryBatch(rowId, num, column); break; case FIXED_LEN_BYTE_ARRAY: readFixedLenByteArrayBatch(
/** * Advances to the next batch of rows. Returns false if there are no more. */ public boolean nextBatch() throws IOException { for (WritableColumnVector vector : columnVectors) { vector.reset(); } columnarBatch.setNumRows(0); if (rowsReturned >= totalRowCount) return false; checkEndOfRowGroup(); int num = (int) Math.min((long) capacity, totalCountLoadedSoFar - rowsReturned); for (int i = 0; i < columnReaders.length; ++i) { if (columnReaders[i] == null) continue; columnReaders[i].readBatch(num, columnVectors[i]); } rowsReturned += num; columnarBatch.setNumRows(num); numBatched = num; batchIdx = 0; return true; }
private void readPageV2(DataPageV2 page) throws IOException { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = createRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels(), descriptor); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); this.defColumn = new VectorizedRleValuesReader(bitWidth); this.definitionLevelColumn = new ValuesReaderIntIterator(this.defColumn); this.defColumn.initFromBuffer( this.pageValueCount, page.getDefinitionLevels().toByteArray()); try { initDataReader(page.getDataEncoding(), page.getData().toByteArray(), 0); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } } }
private void readPageV2(DataPageV2 page) throws IOException { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = createRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels(), descriptor); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); // do not read the length from the stream. v2 pages handle dividing the page bytes. this.defColumn = new VectorizedRleValuesReader(bitWidth, false); this.definitionLevelColumn = new ValuesReaderIntIterator(this.defColumn); this.defColumn.initFromPage( this.pageValueCount, page.getDefinitionLevels().toInputStream()); try { initDataReader(page.getDataEncoding(), page.getData().toInputStream()); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } } }
readPage(); leftInPage = (int) (endOfPageValueCount - valuesRead); decodeDictionaryIds(rowId, num, column, dictionaryIds); decodeDictionaryIds(0, rowId, column, column.getDictionaryIds()); readBooleanBatch(rowId, num, column); break; case INT32: readIntBatch(rowId, num, column); break; case INT64: readLongBatch(rowId, num, column); break; case INT96: readBinaryBatch(rowId, num, column); break; case FLOAT: readFloatBatch(rowId, num, column); break; case DOUBLE: readDoubleBatch(rowId, num, column); break; case BINARY: readBinaryBatch(rowId, num, column); break; case FIXED_LEN_BYTE_ARRAY: readFixedLenByteArrayBatch(
private void readPageV1(DataPageV1 page) throws IOException { this.pageValueCount = page.getValueCount(); ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL); ValuesReader dlReader; // Initialize the decoders. if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) { throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding()); } int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); this.defColumn = new VectorizedRleValuesReader(bitWidth); dlReader = this.defColumn; this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader); this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader); try { BytesInput bytes = page.getBytes(); ByteBufferInputStream in = bytes.toInputStream(); rlReader.initFromPage(pageValueCount, in); dlReader.initFromPage(pageValueCount, in); initDataReader(page.getValueEncoding(), in); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } }
/** * Advances to the next batch of rows. Returns false if there are no more. */ public boolean nextBatch() throws IOException { for (WritableColumnVector vector : columnVectors) { vector.reset(); } columnarBatch.setNumRows(0); if (rowsReturned >= totalRowCount) return false; checkEndOfRowGroup(); int num = (int) Math.min((long) capacity, totalCountLoadedSoFar - rowsReturned); for (int i = 0; i < columnReaders.length; ++i) { if (columnReaders[i] == null) continue; columnReaders[i].readBatch(num, columnVectors[i]); } rowsReturned += num; columnarBatch.setNumRows(num); numBatched = num; batchIdx = 0; return true; }
private void checkEndOfRowGroup() throws IOException { if (rowsReturned != totalCountLoadedSoFar) return; PageReadStore pages = reader.readNextRowGroup(); if (pages == null) { throw new IOException("expecting more rows but reached last block. Read " + rowsReturned + " out of " + totalRowCount); } List<ColumnDescriptor> columns = requestedSchema.getColumns(); List<Type> types = requestedSchema.asGroupType().getFields(); columnReaders = new VectorizedColumnReader[columns.size()]; for (int i = 0; i < columns.size(); ++i) { if (missingColumns[i]) continue; columnReaders[i] = new VectorizedColumnReader(columns.get(i), types.get(i).getOriginalType(), pages.getPageReader(columns.get(i)), convertTz); } totalCountLoadedSoFar += pages.getRowCount(); } }
readPage(); leftInPage = (int) (endOfPageValueCount - valuesRead); decodeDictionaryIds(rowId, num, column, dictionaryIds); decodeDictionaryIds(0, rowId, column, column.getDictionaryIds()); readBooleanBatch(rowId, num, column); break; case INT32: readIntBatch(rowId, num, column); break; case INT64: readLongBatch(rowId, num, column); break; case INT96: readBinaryBatch(rowId, num, column); break; case FLOAT: readFloatBatch(rowId, num, column); break; case DOUBLE: readDoubleBatch(rowId, num, column); break; case BINARY: readBinaryBatch(rowId, num, column); break; case FIXED_LEN_BYTE_ARRAY: readFixedLenByteArrayBatch(rowId, num, column, descriptor.getTypeLength());
private void readPageV2(DataPageV2 page) throws IOException { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = createRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels(), descriptor); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); // do not read the length from the stream. v2 pages handle dividing the page bytes. this.defColumn = new VectorizedRleValuesReader(bitWidth, false); this.definitionLevelColumn = new ValuesReaderIntIterator(this.defColumn); this.defColumn.initFromPage( this.pageValueCount, page.getDefinitionLevels().toInputStream()); try { initDataReader(page.getDataEncoding(), page.getData().toInputStream()); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } } }
/** * Advances to the next batch of rows. Returns false if there are no more. */ public boolean nextBatch() throws IOException { columnarBatch.reset(); if (rowsReturned >= totalRowCount) return false; checkEndOfRowGroup(); int num = (int) Math.min((long) columnarBatch.capacity(), totalCountLoadedSoFar - rowsReturned); for (int i = 0; i < columnReaders.length; ++i) { if (columnReaders[i] == null) continue; columnReaders[i].readBatch(num, columnarBatch.column(i)); } rowsReturned += num; columnarBatch.setNumRows(num); numBatched = num; batchIdx = 0; return true; }
private void checkEndOfRowGroup() throws IOException { if (rowsReturned != totalCountLoadedSoFar) return; PageReadStore pages = reader.readNextRowGroup(); if (pages == null) { throw new IOException("expecting more rows but reached last block. Read " + rowsReturned + " out of " + totalRowCount); } List<ColumnDescriptor> columns = requestedSchema.getColumns(); List<Type> types = requestedSchema.asGroupType().getFields(); columnReaders = new VectorizedColumnReader[columns.size()]; for (int i = 0; i < columns.size(); ++i) { if (missingColumns[i]) continue; columnReaders[i] = new VectorizedColumnReader(columns.get(i), types.get(i).getOriginalType(), pages.getPageReader(columns.get(i)), convertTz); } totalCountLoadedSoFar += pages.getRowCount(); } }
readPage(); leftInPage = (int) (endOfPageValueCount - valuesRead); decodeDictionaryIds(rowId, num, column, dictionaryIds); decodeDictionaryIds(0, rowId, column, column.getDictionaryIds()); readBooleanBatch(rowId, num, column); break; case INT32: readIntBatch(rowId, num, column); break; case INT64: readLongBatch(rowId, num, column); break; case INT96: readBinaryBatch(rowId, num, column); break; case FLOAT: readFloatBatch(rowId, num, column); break; case DOUBLE: readDoubleBatch(rowId, num, column); break; case BINARY: readBinaryBatch(rowId, num, column); break; case FIXED_LEN_BYTE_ARRAY: readFixedLenByteArrayBatch(rowId, num, column, descriptor.getTypeLength());
private void readPageV1(DataPageV1 page) throws IOException { this.pageValueCount = page.getValueCount(); ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL); ValuesReader dlReader; // Initialize the decoders. if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) { throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding()); } int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); this.defColumn = new VectorizedRleValuesReader(bitWidth); dlReader = this.defColumn; this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader); this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader); try { BytesInput bytes = page.getBytes(); ByteBufferInputStream in = bytes.toInputStream(); rlReader.initFromPage(pageValueCount, in); dlReader.initFromPage(pageValueCount, in); initDataReader(page.getValueEncoding(), in); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } }
/** * Advances to the next batch of rows. Returns false if there are no more. */ public boolean nextBatch() throws IOException { columnarBatch.reset(); if (rowsReturned >= totalRowCount) return false; checkEndOfRowGroup(); int num = (int) Math.min((long) columnarBatch.capacity(), totalCountLoadedSoFar - rowsReturned); for (int i = 0; i < columnReaders.length; ++i) { if (columnReaders[i] == null) continue; columnReaders[i].readBatch(num, columnarBatch.column(i)); } rowsReturned += num; columnarBatch.setNumRows(num); numBatched = num; batchIdx = 0; return true; }
private void checkEndOfRowGroup() throws IOException { if (rowsReturned != totalCountLoadedSoFar) return; PageReadStore pages = reader.readNextRowGroup(); if (pages == null) { throw new IOException("expecting more rows but reached last block. Read " + rowsReturned + " out of " + totalRowCount); } List<ColumnDescriptor> columns = requestedSchema.getColumns(); columnReaders = new VectorizedColumnReader[columns.size()]; for (int i = 0; i < columns.size(); ++i) { if (missingColumns[i]) continue; columnReaders[i] = new VectorizedColumnReader(columns.get(i), pages.getPageReader(columns.get(i))); } totalCountLoadedSoFar += pages.getRowCount(); } }
private void readPageV1(DataPageV1 page) throws IOException { this.pageValueCount = page.getValueCount(); ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL); ValuesReader dlReader; // Initialize the decoders. if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) { throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding()); } int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); this.defColumn = new VectorizedRleValuesReader(bitWidth); dlReader = this.defColumn; this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader); this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader); try { byte[] bytes = page.getBytes().toByteArray(); rlReader.initFromPage(pageValueCount, bytes, 0); int next = rlReader.getNextOffset(); dlReader.initFromPage(pageValueCount, bytes, next); next = dlReader.getNextOffset(); initDataReader(page.getValueEncoding(), bytes, next); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } }
private void readPageV2(DataPageV2 page) throws IOException { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = createRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels(), descriptor); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); this.defColumn = new VectorizedRleValuesReader(bitWidth); this.definitionLevelColumn = new ValuesReaderIntIterator(this.defColumn); this.defColumn.initFromBuffer( this.pageValueCount, page.getDefinitionLevels().toByteArray()); try { initDataReader(page.getDataEncoding(), page.getData().toByteArray(), 0); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } } }
private void readPageV1(DataPageV1 page) throws IOException { this.pageValueCount = page.getValueCount(); ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL); ValuesReader dlReader; // Initialize the decoders. if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) { throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding()); } int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); this.defColumn = new VectorizedRleValuesReader(bitWidth); dlReader = this.defColumn; this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader); this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader); try { byte[] bytes = page.getBytes().toByteArray(); rlReader.initFromPage(pageValueCount, bytes, 0); int next = rlReader.getNextOffset(); dlReader.initFromPage(pageValueCount, bytes, next); next = dlReader.getNextOffset(); initDataReader(page.getValueEncoding(), bytes, next); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } }