/** * creates an uncompressed page * @param bytes the content of the page * @param dictionarySize the value count in the dictionary * @param encoding the encoding used */ public DictionaryPage(BytesInput bytes, int dictionarySize, Encoding encoding) { this(bytes, (int)bytes.size(), dictionarySize, encoding); // TODO: fix sizes long or int }
private SequenceBytesIn(BytesInput[] inputs) { this.inputs = inputs; long total = 0; for (BytesInput input : inputs) { total += input.size(); } this.size = total; }
private SequenceBytesIn(List<BytesInput> inputs) { this.inputs = inputs; long total = 0; for (BytesInput input : inputs) { total += input.size(); } this.size = total; }
private SequenceBytesIn(List<BytesInput> inputs) { this.inputs = inputs; long total = 0; for (BytesInput input : inputs) { total += input.size(); } this.size = total; }
/** * * @return a new byte array materializing the contents of this input * @throws IOException */ public byte[] toByteArray() throws IOException { BAOS baos = new BAOS((int)size()); this.writeAllTo(baos); if (DEBUG) LOG.debug("converted " + size() + " to byteArray of " + baos.size() + " bytes"); return baos.getBuf(); }
/** * * @return a new byte array materializing the contents of this input * @throws IOException */ public byte[] toByteArray() throws IOException { BAOS baos = new BAOS((int)size()); this.writeAllTo(baos); if (DEBUG) LOG.debug("converted " + size() + " to byteArray of " + baos.size() + " bytes"); return baos.getBuf(); }
/** * * @return a new byte array materializing the contents of this input * @throws IOException */ public byte[] toByteArray() throws IOException { BAOS baos = new BAOS((int)size()); this.writeAllTo(baos); if (DEBUG) LOG.debug("converted " + size() + " to byteArray of " + baos.size() + " bytes"); return baos.getBuf(); }
@SuppressWarnings("unused") @Override public void writeAllTo(OutputStream out) throws IOException { for (BytesInput input : inputs) { if (DEBUG) LOG.debug("write " + input.size() + " bytes to out"); if (DEBUG && input instanceof SequenceBytesIn) LOG.debug("{"); input.writeAllTo(out); if (DEBUG && input instanceof SequenceBytesIn) LOG.debug("}"); } }
@Override public void writeAllTo(OutputStream out) throws IOException { for (BytesInput input : inputs) { if (DEBUG) LOG.debug("write " + input.size() + " bytes to out"); if (DEBUG && input instanceof SequenceBytesIn) LOG.debug("{"); input.writeAllTo(out); if (DEBUG && input instanceof SequenceBytesIn) LOG.debug("}"); } }
@SuppressWarnings("unused") @Override public void writeAllTo(OutputStream out) throws IOException { for (BytesInput input : inputs) { if (DEBUG) LOG.debug("write " + input.size() + " bytes to out"); if (DEBUG && input instanceof SequenceBytesIn) LOG.debug("{"); input.writeAllTo(out); if (DEBUG && input instanceof SequenceBytesIn) LOG.debug("}"); } }
@Override public String toString() { return "Page V2 [" + "dl size=" + definitionLevels.size() + ", " + "rl size=" + repetitionLevels.size() + ", " + "data size=" + data.size() + ", " + "data enc=" + dataEncoding + ", " + "valueCount=" + getValueCount() + ", " + "rowCount=" + getRowCount() + ", " + "is compressed=" + isCompressed + ", " + "uncompressedSize=" + getUncompressedSize() + "]"; } }
@Override public void writePage(BytesInput bytesInput, int valueCount, Encoding encoding) throws IOException { if (valueCount == 0) { throw new ParquetEncodingException("illegal page of 0 values"); } memSize += bytesInput.size(); pages.add(new Page(BytesInput.copy(bytesInput), valueCount, (int)bytesInput.size(), encoding)); totalValueCount += valueCount; if (DEBUG) LOG.debug("page written for " + bytesInput.size() + " bytes and " + valueCount + " records"); }
@Override public String toString() { return "Page [bytes.size=" + bytes.size() + ", valueCount=" + getValueCount() + ", uncompressedSize=" + getUncompressedSize() + "]"; }
@Override public BytesInput getBytes() { serializeCurrentValue(); BytesInput buf = bitWriter.finish(); if (Log.DEBUG) LOG.debug("writing a buffer of size " + buf.size() + " + 4 bytes"); // We serialize the length so that on deserialization we can // deserialize as we go, instead of having to load everything // into memory return concat(BytesInput.fromInt((int)buf.size()), buf); }
/** * creates a dictionary page * @param bytes the (possibly compressed) content of the page * @param uncompressedSize the size uncompressed * @param dictionarySize the value count in the dictionary * @param encoding the encoding used */ public DictionaryPage(BytesInput bytes, int uncompressedSize, int dictionarySize, Encoding encoding) { super(Ints.checkedCast(bytes.size()), uncompressedSize); this.bytes = checkNotNull(bytes, "bytes"); this.dictionarySize = dictionarySize; this.encoding = checkNotNull(encoding, "encoding"); }
@Override public BytesInput getBytes() { if (!fellBackAlready && firstPage) { // we use the first page to decide if we're going to use this encoding BytesInput bytes = initialWriter.getBytes(); if (!initialWriter.isCompressionSatisfying(rawDataByteSize, bytes.size())) { fallBack(); } else { return bytes; } } return currentWriter.getBytes(); }
@Override public BytesInput getBytes() { try { // prepend the length of the column BytesInput rle = encoder.toBytes(); return BytesInput.concat(BytesInput.fromInt(Ints.checkedCast(rle.size())), rle); } catch (IOException e) { throw new ParquetEncodingException(e); } }
@Override public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException { if (this.dictionaryPage != null) { throw new ParquetEncodingException("Only one dictionary page is allowed"); } BytesInput dictionaryBytes = dictionaryPage.getBytes(); int uncompressedSize = (int)dictionaryBytes.size(); BytesInput compressedBytes = compressor.compress(dictionaryBytes); this.dictionaryPage = new DictionaryPage(BytesInput.copy(compressedBytes), uncompressedSize, dictionaryPage.getDictionarySize(), dictionaryPage.getEncoding()); }
@Override public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException { if (this.dictionaryPage != null) { throw new ParquetEncodingException("Only one dictionary page is allowed"); } BytesInput dictionaryBytes = dictionaryPage.getBytes(); int uncompressedSize = (int)dictionaryBytes.size(); BytesInput compressedBytes = compressor.compress(dictionaryBytes); this.dictionaryPage = new DictionaryPage(BytesInput.copy(compressedBytes), uncompressedSize, dictionaryPage.getDictionarySize(), dictionaryPage.getEncoding()); }
private void readPageV2(DataPageV2 page) { this.repetitionLevelColumn = newRLEIterator(path.getMaxRepetitionLevel(), page.getRepetitionLevels()); this.definitionLevelColumn = newRLEIterator(path.getMaxDefinitionLevel(), page.getDefinitionLevels()); try { if (DEBUG) LOG.debug("page data size " + page.getData().size() + " bytes and " + pageValueCount + " records"); initDataReader(page.getDataEncoding(), page.getData().toByteArray(), 0, page.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + path, e); } }