/** * @return the bytes representing the packed values * @throws IOException if there is an exception while creating the BytesInput */ public BytesInput toBytes() throws IOException { int packedByteLength = packedPosition + BytesUtils.paddedByteCountFromBits(inputSize * bitWidth); LOG.debug("writing {} bytes", (totalFullSlabSize + packedByteLength)); if (inputSize > 0) { for (int i = inputSize; i < input.length; i++) { input[i] = 0; } pack(); } return concat(concat(slabs), BytesInput.from(packed, 0, packedByteLength)); }
/** * @return the bytes representing the packed values * @throws IOException if there is an exception while creating the BytesInput */ public BytesInput toBytes() throws IOException { int packedByteLength = packedPosition + BytesUtils.paddedByteCountFromBits(inputSize * bitWidth); LOG.debug("writing {} bytes", (totalFullSlabSize + packedByteLength)); if (inputSize > 0) { for (int i = inputSize; i < input.length; i++) { input[i] = 0; } pack(); } return concat(concat(slabs), BytesInput.from(packed, 0, packedByteLength)); }
public BytesInput toBytesInput() { return BytesInput.concat( BytesInput.fromUnsignedVarInt(blockSizeInValues), BytesInput.fromUnsignedVarInt(miniBlockNumInABlock)); } }
@Override public BytesInput getBytes() { return BytesInput.concat(prefixLengthWriter.getBytes(), suffixWriter.getBytes()); }
public BytesInput toBytesInput() { return BytesInput.concat( BytesInput.fromUnsignedVarInt(blockSizeInValues), BytesInput.fromUnsignedVarInt(miniBlockNumInABlock)); } }
@Override public BytesInput getBytes() { return BytesInput.concat(prefixLengthWriter.getBytes(), suffixWriter.getBytes()); }
@Override public BytesInput getBytes() { try { out.flush(); } catch (IOException e) { throw new ParquetEncodingException("could not write page", e); } LOG.debug("writing a buffer of size {}", arrayOut.size()); return BytesInput.concat(lengthWriter.getBytes(), BytesInput.from(arrayOut)); }
@Override public BytesInput getBytes() { try { out.flush(); } catch (IOException e) { throw new ParquetEncodingException("could not write page", e); } LOG.debug("writing a buffer of size {}", arrayOut.size()); return BytesInput.concat(lengthWriter.getBytes(), BytesInput.from(arrayOut)); }
@Override public BytesInput getBytes() { try { // prepend the length of the column BytesInput rle = encoder.toBytes(); return BytesInput.concat(BytesInput.fromInt(Ints.checkedCast(rle.size())), rle); } catch (IOException e) { throw new ParquetEncodingException(e); } }
@Override public BytesInput getBytes() { try { // prepend the length of the column BytesInput rle = encoder.toBytes(); return BytesInput.concat(BytesInput.fromInt(Ints.checkedCast(rle.size())), rle); } catch (IOException e) { throw new ParquetEncodingException(e); } }
/** * getBytes will trigger flushing block buffer, DO NOT write after getBytes() is called without calling reset() * * @return a BytesInput that contains the encoded page data */ @Override public BytesInput getBytes() { // The Page Header should include: blockSizeInValues, numberOfMiniBlocks, totalValueCount if (deltaValuesToFlush != 0) { flushBlockBuffer(); } return BytesInput.concat( config.toBytesInput(), BytesInput.fromUnsignedVarInt(totalValueCount), BytesInput.fromZigZagVarLong(firstValue), BytesInput.from(baos)); }
/** * getBytes will trigger flushing block buffer, DO NOT write after getBytes() is called without calling reset() * * @return a BytesInput that contains the encoded page data */ @Override public BytesInput getBytes() { // The Page Header should include: blockSizeInValues, numberOfMiniBlocks, totalValueCount if (deltaValuesToFlush != 0) { flushBlockBuffer(); } return BytesInput.concat( config.toBytesInput(), BytesInput.fromUnsignedVarInt(totalValueCount), BytesInput.fromZigZagVarInt(firstValue), BytesInput.from(baos)); }
/** * getBytes will trigger flushing block buffer, DO NOT write after getBytes() is called without calling reset() * * @return a BytesInput that contains the encoded page data */ @Override public BytesInput getBytes() { // The Page Header should include: blockSizeInValues, numberOfMiniBlocks, totalValueCount if (deltaValuesToFlush != 0) { flushBlockBuffer(); } return BytesInput.concat( config.toBytesInput(), BytesInput.fromUnsignedVarInt(totalValueCount), BytesInput.fromZigZagVarInt(firstValue), BytesInput.from(baos)); }
/** * getBytes will trigger flushing block buffer, DO NOT write after getBytes() is called without calling reset() * * @return a BytesInput that contains the encoded page data */ @Override public BytesInput getBytes() { // The Page Header should include: blockSizeInValues, numberOfMiniBlocks, totalValueCount if (deltaValuesToFlush != 0) { flushBlockBuffer(); } return BytesInput.concat( config.toBytesInput(), BytesInput.fromUnsignedVarInt(totalValueCount), BytesInput.fromZigZagVarLong(firstValue), BytesInput.from(baos)); }
@Override void writePage(int rowCount, int valueCount, Statistics<?> statistics, ValuesWriter repetitionLevels, ValuesWriter definitionLevels, ValuesWriter values) throws IOException { pageWriter.writePage( concat(repetitionLevels.getBytes(), definitionLevels.getBytes(), values.getBytes()), valueCount, rowCount, statistics, repetitionLevels.getEncoding(), definitionLevels.getEncoding(), values.getEncoding()); } }
buf.collect(BytesInput.concat(BytesInput.from(tempOutputStream), compressedBytes)); rlEncodings.add(rlEncoding); dlEncodings.add(dlEncoding);
@Override public BytesInput getBytes() { int maxDicId = getDictionarySize() - 1; LOG.debug("max dic id {}", maxDicId); int bitWidth = BytesUtils.getWidthFromMaxInt(maxDicId); int initialSlabSize = CapacityByteArrayOutputStream.initialSlabSizeHeuristic(MIN_INITIAL_SLAB_SIZE, maxDictionaryByteSize, 10); RunLengthBitPackingHybridEncoder encoder = new RunLengthBitPackingHybridEncoder(bitWidth, initialSlabSize, maxDictionaryByteSize, this.allocator); encoders.add(encoder); IntIterator iterator = encodedValues.iterator(); try { while (iterator.hasNext()) { encoder.writeInt(iterator.next()); } // encodes the bit width byte[] bytesHeader = new byte[] { (byte) bitWidth }; BytesInput rleEncodedBytes = encoder.toBytes(); LOG.debug("rle encoded bytes {}", rleEncodedBytes.size()); BytesInput bytes = concat(BytesInput.from(bytesHeader), rleEncodedBytes); // remember size of dictionary when we last wrote a page lastUsedDictionarySize = getDictionarySize(); lastUsedDictionaryByteSize = dictionaryByteSize; return bytes; } catch (IOException e) { throw new ParquetEncodingException("could not encode the values", e); } }
@Override public BytesInput getBytes() { int maxDicId = getDictionarySize() - 1; LOG.debug("max dic id {}", maxDicId); int bitWidth = BytesUtils.getWidthFromMaxInt(maxDicId); int initialSlabSize = CapacityByteArrayOutputStream.initialSlabSizeHeuristic(MIN_INITIAL_SLAB_SIZE, maxDictionaryByteSize, 10); RunLengthBitPackingHybridEncoder encoder = new RunLengthBitPackingHybridEncoder(bitWidth, initialSlabSize, maxDictionaryByteSize, this.allocator); encoders.add(encoder); IntIterator iterator = encodedValues.iterator(); try { while (iterator.hasNext()) { encoder.writeInt(iterator.next()); } // encodes the bit width byte[] bytesHeader = new byte[] { (byte) bitWidth }; BytesInput rleEncodedBytes = encoder.toBytes(); LOG.debug("rle encoded bytes {}", rleEncodedBytes.size()); BytesInput bytes = concat(BytesInput.from(bytesHeader), rleEncodedBytes); // remember size of dictionary when we last wrote a page lastUsedDictionarySize = getDictionarySize(); lastUsedDictionaryByteSize = dictionaryByteSize; return bytes; } catch (IOException e) { throw new ParquetEncodingException("could not encode the values", e); } }
private void writePage() { if (DEBUG) LOG.debug("write page"); try { pageWriter.writePage( concat(repetitionLevelColumn.getBytes(), definitionLevelColumn.getBytes(), dataColumn.getBytes()), valueCount, statistics, repetitionLevelColumn.getEncoding(), definitionLevelColumn.getEncoding(), dataColumn.getEncoding()); } catch (IOException e) { throw new ParquetEncodingException("could not write page for " + path, e); } repetitionLevelColumn.reset(); definitionLevelColumn.reset(); dataColumn.reset(); valueCount = 0; resetStatistics(); }
BytesInput.concat( BytesInput.from(tempOutputStream), repetitionLevels,