Refine search
public BaseVectorizedColumnReader( ColumnDescriptor descriptor, PageReader pageReader, boolean skipTimestampConversion, Type parquetType, TypeInfo hiveType) throws IOException { this.descriptor = descriptor; this.type = parquetType; this.pageReader = pageReader; this.maxDefLevel = descriptor.getMaxDefinitionLevel(); this.skipTimestampConversion = skipTimestampConversion; this.hiveType = hiveType; DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); if (dictionaryPage != null) { try { this.dictionary = ParquetDataColumnReaderFactory .getDataColumnReaderByTypeOnDictionary(parquetType.asPrimitiveType(), hiveType, dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage), skipTimestampConversion); this.isCurrentPageDictionaryEncoded = true; } catch (IOException e) { throw new IOException("could not decode the dictionary for " + descriptor, e); } } else { this.dictionary = null; this.isCurrentPageDictionaryEncoded = false; } }
@Override public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException { if (this.dictionaryPage != null) { throw new ParquetEncodingException("Only one dictionary page is allowed"); } BytesInput dictionaryBytes = dictionaryPage.getBytes(); int uncompressedSize = (int)dictionaryBytes.size(); BytesInput compressedBytes = compressor.compress(dictionaryBytes); this.dictionaryPage = new DictionaryPage(BytesInput.copy(compressedBytes), uncompressedSize, dictionaryPage.getDictionarySize(), dictionaryPage.getEncoding()); }
private static DictionaryPage reusableCopy(DictionaryPage dict) { if (dict == null) { return null; } try { return new DictionaryPage( BytesInput.from(dict.getBytes().toByteArray()), dict.getDictionarySize(), dict.getEncoding()); } catch (IOException e) { throw new ParquetDecodingException("Cannot read dictionary", e); } }
@Override public DictionaryPage readDictionaryPage() { if (compressedDictionaryPage == null) { return null; } try { return new DictionaryPage( decompressor.decompress(compressedDictionaryPage.getBytes(), compressedDictionaryPage.getUncompressedSize()), compressedDictionaryPage.getDictionarySize(), compressedDictionaryPage.getEncoding()); } catch (IOException e) { throw new ParquetDecodingException("Could not decompress dictionary page", e); } } }
/** * writes a dictionary page page * @param dictionaryPage the dictionary page * @throws IOException if there is an error while writing */ public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException { state = state.write(); LOG.debug("{}: write dictionary page: {} values", out.getPos(), dictionaryPage.getDictionarySize()); currentChunkDictionaryPageOffset = out.getPos(); int uncompressedSize = dictionaryPage.getUncompressedSize(); int compressedPageSize = (int)dictionaryPage.getBytes().size(); // TODO: fix casts metadataConverter.writeDictionaryPageHeader( uncompressedSize, compressedPageSize, dictionaryPage.getDictionarySize(), dictionaryPage.getEncoding(), out); long headerSize = out.getPos() - currentChunkDictionaryPageOffset; this.uncompressedLength += uncompressedSize + headerSize; this.compressedLength += compressedPageSize + headerSize; LOG.debug("{}: write dictionary page content {}", out.getPos(), compressedPageSize); dictionaryPage.getBytes().writeAllTo(out); encodingStatsBuilder.addDictEncoding(dictionaryPage.getEncoding()); currentEncodings.add(dictionaryPage.getEncoding()); }
/** * @param dictionaryPage a dictionary page of encoded double values * @throws IOException if there is an exception while decoding the dictionary page */ public PlainDoubleDictionary(DictionaryPage dictionaryPage) throws IOException { super(dictionaryPage); ByteBufferInputStream in = dictionaryPage.getBytes().toInputStream(); doubleDictionaryContent = new double[dictionaryPage.getDictionarySize()]; DoublePlainValuesReader doubleReader = new DoublePlainValuesReader(); doubleReader.initFromPage(dictionaryPage.getDictionarySize(), in); for (int i = 0; i < doubleDictionaryContent.length; i++) { doubleDictionaryContent[i] = doubleReader.readDouble(); } }
public static Dictionary readDictionary(FSDataInputStream in, ColumnDescriptor column, PageHeaderWithOffset pageHeader, BytesDecompressor decompressor) throws IOException { in.seek(pageHeader.getOffset()); final byte[] data = new byte[pageHeader.getPageHeader().getCompressed_page_size()]; int read = in.read(data); if (read != data.length) { throw new IOException(format("Failed to read dictionary page, read %d bytes, expected %d", read, data.length)); } final DictionaryPage dictionaryPage = new DictionaryPage( decompressor.decompress(BytesInput.from(data), pageHeader.getPageHeader().getUncompressed_page_size()), pageHeader.getPageHeader().getDictionary_page_header().getNum_values(), CONVERTER.getEncoding(pageHeader.getPageHeader().getDictionary_page_header().getEncoding())); return dictionaryPage.getEncoding().initDictionary(column, dictionaryPage); }
private DictionaryPage readDictionaryPageHelper(PageHeader pageHeader) throws IOException { ByteBuffer data = uncompressPage(pageHeader, false); return new DictionaryPage( BytesInput.from(data, 0, pageHeader.uncompressed_page_size), pageHeader.getDictionary_page_header().getNum_values(), parquetMetadataConverter.getEncoding(pageHeader.dictionary_page_header.encoding) ); }
private void readDictionaryPage(final PageHeader pageHeader, final ColumnReader<?> parentStatus) throws IOException { int compressedSize = pageHeader.getCompressed_page_size(); int uncompressedSize = pageHeader.getUncompressed_page_size(); final DrillBuf dictionaryData = readPage(pageHeader, compressedSize, uncompressedSize); allocatedDictionaryBuffers.add(dictionaryData); DictionaryPage page = new DictionaryPage( asBytesInput(dictionaryData, 0, uncompressedSize), pageHeader.uncompressed_page_size, pageHeader.dictionary_page_header.num_values, valueOf(pageHeader.dictionary_page_header.encoding.name())); this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page); }
/** * Writes a number of pages within corresponding column chunk * @param writer the parquet file writer * @throws IOException if the file can not be created */ public void writeToFileWriter(ParquetFileWriter writer) throws IOException { writer.startColumn(path, totalValueCount, compressor.getCodecName()); if (dictionaryPage != null) { writer.writeDictionaryPage(dictionaryPage); // tracking the dictionary encoding is handled in writeDictionaryPage } writer.writeDataPages(BytesInput.from(buf), uncompressedLength, compressedLength, totalStatistics, rlEncodings, dlEncodings, dataEncodings); writer.endColumn(); logger.debug( String.format( "written %,dB for %s: %,d values, %,dB raw, %,dB comp, %d pages, encodings: %s", buf.size(), path, totalValueCount, uncompressedLength, compressedLength, pageCount, Sets.newHashSet(dataEncodings)) + (dictionaryPage != null ? String.format( ", dic { %,d entries, %,dB raw, %,dB comp}", dictionaryPage.getDictionarySize(), dictionaryPage.getUncompressedSize(), dictionaryPage.getDictionarySize()) : "")); rlEncodings.clear(); dlEncodings.clear(); dataEncodings.clear(); pageCount = 0; }
if (dictionaryPage == null) { dictionaryPage = new DictionaryPage( decompressor.decompress(BytesInput.from(in, pageHeader.compressed_page_size), pageHeader.getUncompressed_page_size()), pageHeader.uncompressed_page_size, parquetMetadataConverter.getEncoding(pageHeader.dictionary_page_header.encoding) buffer.flip(); return new DataPageV1( decompressor.decompress(BytesInput.from(buffer), pageHeader.getUncompressed_page_size()), pageHeader.data_page_header.num_values, pageHeader.uncompressed_page_size, BytesInput decompressedPageData = decompressor.decompress( BytesInput.from(buffer), pageHeader.uncompressed_page_size); ByteBuffer byteBuffer = decompressedPageData.toByteBuffer();
final ByteBuffer dictionaryBytes = dictionaryPage.getBytes().toByteBuffer(); binaryDictionaryContent = new Binary[dictionaryPage.getDictionarySize()];
public DictionaryPage copy() throws IOException { return new DictionaryPage(BytesInput.copy(bytes), getUncompressedSize(), dictionarySize, encoding); }
@Override public String toString() { return "Page [bytes.size=" + bytes.size() + ", entryCount=" + dictionarySize + ", uncompressedSize=" + getUncompressedSize() + ", encoding=" + encoding + "]"; }
public void writeToFileWriter(ParquetFileWriter writer) throws IOException { writer.writeColumnChunk( path, totalValueCount, compressor.getCodecName(), dictionaryPage, buf, uncompressedLength, compressedLength, totalStatistics, columnIndexBuilder, offsetIndexBuilder, rlEncodings, dlEncodings, dataEncodings); if (LOG.isDebugEnabled()) { LOG.debug( String.format( "written %,dB for %s: %,d values, %,dB raw, %,dB comp, %d pages, encodings: %s", buf.size(), path, totalValueCount, uncompressedLength, compressedLength, pageCount, new HashSet<Encoding>(dataEncodings)) + (dictionaryPage != null ? String.format( ", dic { %,d entries, %,dB raw, %,dB comp}", dictionaryPage.getDictionarySize(), dictionaryPage.getUncompressedSize(), dictionaryPage.getDictionarySize()) : "")); } rlEncodings.clear(); dlEncodings.clear(); dataEncodings.clear(); pageCount = 0; }
protected DictionaryPage dictPage(ValuesWriter dictPageWriter) { DictionaryPage ret = new DictionaryPage(dictPageWriter.getBytes(), lastUsedDictionarySize, encodingForDictionaryPage); dictPageWriter.close(); return ret; }
@Override public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException { if (this.dictionaryPage != null) { throw new ParquetEncodingException("Only one dictionary page is allowed"); } BytesInput dictionaryBytes = dictionaryPage.getBytes(); int uncompressedSize = (int)dictionaryBytes.size(); BytesInput compressedBytes = compressor.compress(dictionaryBytes); this.dictionaryPage = new DictionaryPage(BytesInput.copy(compressedBytes), uncompressedSize, dictionaryPage.getDictionarySize(), dictionaryPage.getEncoding()); }
private static DictionaryPage reusableCopy(DictionaryPage dict) { if (dict == null) { return null; } try { return new DictionaryPage( BytesInput.from(dict.getBytes().toByteArray()), dict.getDictionarySize(), dict.getEncoding()); } catch (IOException e) { throw new ParquetDecodingException("Cannot read dictionary", e); } }
@Override public DictionaryPage readDictionaryPage() { if (compressedDictionaryPage == null) { return null; } try { return new DictionaryPage( decompressor.decompress(compressedDictionaryPage.getBytes(), compressedDictionaryPage.getUncompressedSize()), compressedDictionaryPage.getDictionarySize(), compressedDictionaryPage.getEncoding()); } catch (IOException e) { throw new ParquetDecodingException("Could not decompress dictionary page", e); } } }
/** * writes a dictionary page page * @param dictionaryPage the dictionary page * @throws IOException if there is an error while writing */ public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException { state = state.write(); LOG.debug("{}: write dictionary page: {} values", out.getPos(), dictionaryPage.getDictionarySize()); currentChunkDictionaryPageOffset = out.getPos(); int uncompressedSize = dictionaryPage.getUncompressedSize(); int compressedPageSize = (int)dictionaryPage.getBytes().size(); // TODO: fix casts metadataConverter.writeDictionaryPageHeader( uncompressedSize, compressedPageSize, dictionaryPage.getDictionarySize(), dictionaryPage.getEncoding(), out); long headerSize = out.getPos() - currentChunkDictionaryPageOffset; this.uncompressedLength += uncompressedSize + headerSize; this.compressedLength += compressedPageSize + headerSize; LOG.debug("{}: write dictionary page content {}", out.getPos(), compressedPageSize); dictionaryPage.getBytes().writeAllTo(out); encodingStatsBuilder.addDictEncoding(dictionaryPage.getEncoding()); currentEncodings.add(dictionaryPage.getEncoding()); }