public void writeToFileWriter(ParquetFileWriter writer) throws IOException { writer.startColumn(path, totalValueCount, compressor.getCodecName()); if (dictionaryPage != null) { writer.writeDictionaryPage(dictionaryPage); // tracking the dictionary encoding is handled in writeDictionaryPage } writer.writeDataPages(buf, uncompressedLength, compressedLength, totalStatistics, rlEncodings, dlEncodings, dataEncodings); writer.endColumn(); if (LOG.isDebugEnabled()) { LOG.debug( String.format( "written %,dB for %s: %,d values, %,dB raw, %,dB comp, %d pages, encodings: %s", buf.size(), path, totalValueCount, uncompressedLength, compressedLength, pageCount, new HashSet<Encoding>(dataEncodings)) + (dictionaryPage != null ? String.format( ", dic { %,d entries, %,dB raw, %,dB comp}", dictionaryPage.getDictionarySize(), dictionaryPage.getUncompressedSize(), dictionaryPage.getDictionarySize()) : "")); } rlEncodings.clear(); dlEncodings.clear(); dataEncodings.clear(); pageCount = 0; }
/** * Writes a number of pages within corresponding column chunk * @param writer the parquet file writer * @throws IOException if the file can not be created */ public void writeToFileWriter(ParquetFileWriter writer) throws IOException { writer.startColumn(path, totalValueCount, compressor.getCodecName()); if (dictionaryPage != null) { writer.writeDictionaryPage(dictionaryPage); // tracking the dictionary encoding is handled in writeDictionaryPage } writer.writeDataPages(BytesInput.from(buf), uncompressedLength, compressedLength, totalStatistics, rlEncodings, dlEncodings, dataEncodings); writer.endColumn(); logger.debug( String.format( "written %,dB for %s: %,d values, %,dB raw, %,dB comp, %d pages, encodings: %s", buf.size(), path, totalValueCount, uncompressedLength, compressedLength, pageCount, Sets.newHashSet(dataEncodings)) + (dictionaryPage != null ? String.format( ", dic { %,d entries, %,dB raw, %,dB comp}", dictionaryPage.getDictionarySize(), dictionaryPage.getUncompressedSize(), dictionaryPage.getDictionarySize()) : "")); rlEncodings.clear(); dlEncodings.clear(); dataEncodings.clear(); pageCount = 0; }
Set<Encoding> dlEncodings, List<Encoding> dataEncodings) throws IOException { startColumn(descriptor, valueCount, compressionCodecName);
ColumnDescriptor c1 = schema.getColumnDescription(path1); w.startColumn(c1, props.recordsPerRowGroup, codec); final int valsPerPage = (int) Math.ceil(props.recordsPerRowGroup / (float) fieldInfo.numberOfPages); final int PAGE_SIZE = 1024 * 1024; // 1 MB