@Override public String toString() { return "Page [bytes.size=" + bytes.size() + ", entryCount=" + dictionarySize + ", uncompressedSize=" + getUncompressedSize() + ", encoding=" + encoding + "]"; }
@Override public String toString() { return "Page [bytes.size=" + bytes.size() + ", entryCount=" + dictionarySize + ", uncompressedSize=" + getUncompressedSize() + ", encoding=" + encoding + "]"; }
public DictionaryPage copy() throws IOException { return new DictionaryPage(BytesInput.copy(bytes), getUncompressedSize(), dictionarySize, encoding); }
public DictionaryPage copy() throws IOException { return new DictionaryPage(BytesInput.copy(bytes), getUncompressedSize(), dictionarySize, encoding); }
public void writeToFileWriter(ParquetFileWriter writer) throws IOException { writer.writeColumnChunk( path, totalValueCount, compressor.getCodecName(), dictionaryPage, buf, uncompressedLength, compressedLength, totalStatistics, columnIndexBuilder, offsetIndexBuilder, rlEncodings, dlEncodings, dataEncodings); if (LOG.isDebugEnabled()) { LOG.debug( String.format( "written %,dB for %s: %,d values, %,dB raw, %,dB comp, %d pages, encodings: %s", buf.size(), path, totalValueCount, uncompressedLength, compressedLength, pageCount, new HashSet<Encoding>(dataEncodings)) + (dictionaryPage != null ? String.format( ", dic { %,d entries, %,dB raw, %,dB comp}", dictionaryPage.getDictionarySize(), dictionaryPage.getUncompressedSize(), dictionaryPage.getDictionarySize()) : "")); } rlEncodings.clear(); dlEncodings.clear(); dataEncodings.clear(); pageCount = 0; }
public void writeToFileWriter(ParquetFileWriter writer) throws IOException { writer.startColumn(path, totalValueCount, compressor.getCodecName()); if (dictionaryPage != null) { writer.writeDictionaryPage(dictionaryPage); // tracking the dictionary encoding is handled in writeDictionaryPage } writer.writeDataPages(buf, uncompressedLength, compressedLength, totalStatistics, rlEncodings, dlEncodings, dataEncodings); writer.endColumn(); if (LOG.isDebugEnabled()) { LOG.debug( String.format( "written %,dB for %s: %,d values, %,dB raw, %,dB comp, %d pages, encodings: %s", buf.size(), path, totalValueCount, uncompressedLength, compressedLength, pageCount, new HashSet<Encoding>(dataEncodings)) + (dictionaryPage != null ? String.format( ", dic { %,d entries, %,dB raw, %,dB comp}", dictionaryPage.getDictionarySize(), dictionaryPage.getUncompressedSize(), dictionaryPage.getDictionarySize()) : "")); } rlEncodings.clear(); dlEncodings.clear(); dataEncodings.clear(); pageCount = 0; }
/** * Writes a number of pages within corresponding column chunk * @param writer the parquet file writer * @throws IOException if the file can not be created */ public void writeToFileWriter(ParquetFileWriter writer) throws IOException { writer.startColumn(path, totalValueCount, compressor.getCodecName()); if (dictionaryPage != null) { writer.writeDictionaryPage(dictionaryPage); // tracking the dictionary encoding is handled in writeDictionaryPage } writer.writeDataPages(BytesInput.from(buf), uncompressedLength, compressedLength, totalStatistics, rlEncodings, dlEncodings, dataEncodings); writer.endColumn(); logger.debug( String.format( "written %,dB for %s: %,d values, %,dB raw, %,dB comp, %d pages, encodings: %s", buf.size(), path, totalValueCount, uncompressedLength, compressedLength, pageCount, Sets.newHashSet(dataEncodings)) + (dictionaryPage != null ? String.format( ", dic { %,d entries, %,dB raw, %,dB comp}", dictionaryPage.getDictionarySize(), dictionaryPage.getUncompressedSize(), dictionaryPage.getDictionarySize()) : "")); rlEncodings.clear(); dlEncodings.clear(); dataEncodings.clear(); pageCount = 0; }
@Override public DictionaryPage readDictionaryPage() { if (compressedDictionaryPage == null) { return null; } try { return new DictionaryPage( decompressor.decompress(compressedDictionaryPage.getBytes(), compressedDictionaryPage.getUncompressedSize()), compressedDictionaryPage.getDictionarySize(), compressedDictionaryPage.getEncoding()); } catch (IOException e) { throw new ParquetDecodingException("Could not decompress dictionary page", e); } } }
@Override public DictionaryPage readDictionaryPage() { if (compressedDictionaryPage == null) { return null; } try { return new DictionaryPage( decompressor.decompress(compressedDictionaryPage.getBytes(), compressedDictionaryPage.getUncompressedSize()), compressedDictionaryPage.getDictionarySize(), compressedDictionaryPage.getEncoding()); } catch (IOException e) { throw new ParquetDecodingException("Could not decompress dictionary page", e); } } }
/** * writes a dictionary page page * @param dictionaryPage the dictionary page * @throws IOException if there is an error while writing */ public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException { state = state.write(); LOG.debug("{}: write dictionary page: {} values", out.getPos(), dictionaryPage.getDictionarySize()); currentChunkDictionaryPageOffset = out.getPos(); int uncompressedSize = dictionaryPage.getUncompressedSize(); int compressedPageSize = (int)dictionaryPage.getBytes().size(); // TODO: fix casts metadataConverter.writeDictionaryPageHeader( uncompressedSize, compressedPageSize, dictionaryPage.getDictionarySize(), dictionaryPage.getEncoding(), out); long headerSize = out.getPos() - currentChunkDictionaryPageOffset; this.uncompressedLength += uncompressedSize + headerSize; this.compressedLength += compressedPageSize + headerSize; LOG.debug("{}: write dictionary page content {}", out.getPos(), compressedPageSize); dictionaryPage.getBytes().writeAllTo(out); encodingStatsBuilder.addDictEncoding(dictionaryPage.getEncoding()); currentEncodings.add(dictionaryPage.getEncoding()); }
/** * writes a dictionary page page * @param dictionaryPage the dictionary page * @throws IOException if there is an error while writing */ public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException { state = state.write(); LOG.debug("{}: write dictionary page: {} values", out.getPos(), dictionaryPage.getDictionarySize()); currentChunkDictionaryPageOffset = out.getPos(); int uncompressedSize = dictionaryPage.getUncompressedSize(); int compressedPageSize = (int)dictionaryPage.getBytes().size(); // TODO: fix casts metadataConverter.writeDictionaryPageHeader( uncompressedSize, compressedPageSize, dictionaryPage.getDictionarySize(), dictionaryPage.getEncoding(), out); long headerSize = out.getPos() - currentChunkDictionaryPageOffset; this.uncompressedLength += uncompressedSize + headerSize; this.compressedLength += compressedPageSize + headerSize; LOG.debug("{}: write dictionary page content {}", out.getPos(), compressedPageSize); dictionaryPage.getBytes().writeAllTo(out); encodingStatsBuilder.addDictEncoding(dictionaryPage.getEncoding()); currentEncodings.add(dictionaryPage.getEncoding()); }
/** * Reads and decompresses a dictionary page for the given column chunk. * * Returns null if the given column chunk has no dictionary page. * * @param meta a column's ColumnChunkMetaData to read the dictionary from * @return an uncompressed DictionaryPage or null * @throws IOException if there is an error while reading the dictionary */ DictionaryPage readDictionary(ColumnChunkMetaData meta) throws IOException { if (!meta.getEncodings().contains(Encoding.PLAIN_DICTIONARY) && !meta.getEncodings().contains(Encoding.RLE_DICTIONARY)) { return null; } // TODO: this should use getDictionaryPageOffset() but it isn't reliable. if (f.getPos() != meta.getStartingPos()) { f.seek(meta.getStartingPos()); } PageHeader pageHeader = Util.readPageHeader(f); if (!pageHeader.isSetDictionary_page_header()) { return null; // TODO: should this complain? } DictionaryPage compressedPage = readCompressedDictionary(pageHeader, f); BytesInputDecompressor decompressor = options.getCodecFactory().getDecompressor(meta.getCodec()); return new DictionaryPage( decompressor.decompress(compressedPage.getBytes(), compressedPage.getUncompressedSize()), compressedPage.getDictionarySize(), compressedPage.getEncoding()); }
/** * Reads and decompresses a dictionary page for the given column chunk. * * Returns null if the given column chunk has no dictionary page. * * @param meta a column's ColumnChunkMetaData to read the dictionary from * @return an uncompressed DictionaryPage or null * @throws IOException if there is an error while reading the dictionary */ DictionaryPage readDictionary(ColumnChunkMetaData meta) throws IOException { if (!meta.getEncodings().contains(Encoding.PLAIN_DICTIONARY) && !meta.getEncodings().contains(Encoding.RLE_DICTIONARY)) { return null; } // TODO: this should use getDictionaryPageOffset() but it isn't reliable. if (f.getPos() != meta.getStartingPos()) { f.seek(meta.getStartingPos()); } PageHeader pageHeader = Util.readPageHeader(f); if (!pageHeader.isSetDictionary_page_header()) { return null; // TODO: should this complain? } DictionaryPage compressedPage = readCompressedDictionary(pageHeader, f); BytesInputDecompressor decompressor = options.getCodecFactory().getDecompressor(meta.getCodec()); return new DictionaryPage( decompressor.decompress(compressedPage.getBytes(), compressedPage.getUncompressedSize()), compressedPage.getDictionarySize(), compressedPage.getEncoding()); }