public static ParquetEncoding getParquetEncoding(Encoding encoding) { switch (encoding) { case PLAIN: return ParquetEncoding.PLAIN; case RLE: return ParquetEncoding.RLE; case BIT_PACKED: return ParquetEncoding.BIT_PACKED; case PLAIN_DICTIONARY: return ParquetEncoding.PLAIN_DICTIONARY; case DELTA_BINARY_PACKED: return ParquetEncoding.DELTA_BINARY_PACKED; case DELTA_LENGTH_BYTE_ARRAY: return ParquetEncoding.DELTA_LENGTH_BYTE_ARRAY; case DELTA_BYTE_ARRAY: return ParquetEncoding.DELTA_BYTE_ARRAY; case RLE_DICTIONARY: return ParquetEncoding.RLE_DICTIONARY; default: throw new ParquetDecodingException("Unsupported Parquet encoding: " + encoding); } }
private int readInt() { try { return decoder.readInt(); } catch (IOException e) { throw new ParquetDecodingException(e); } } }
@Override public int readLevel() { try { return delegate.readInt(); } catch (IOException e) { throw new ParquetDecodingException(e); } } }
private ValuesReader initDataReader(ParquetEncoding dataEncoding, byte[] bytes, int offset, int valueCount) { ValuesReader valuesReader; if (dataEncoding.usesDictionary()) { if (dictionary == null) { throw new ParquetDecodingException("Dictionary is missing for Page"); } valuesReader = dataEncoding.getDictionaryBasedValuesReader(columnDescriptor, VALUES, dictionary); } else { valuesReader = dataEncoding.getValuesReader(columnDescriptor, VALUES); } try { valuesReader.initFromPage(valueCount, bytes, offset); return valuesReader; } catch (IOException e) { throw new ParquetDecodingException("Error reading parquet page in column " + columnDescriptor, e); } } }
static int getMaxLevel(ColumnDescriptor descriptor, ValuesType valuesType) { switch (valuesType) { case REPETITION_LEVEL: return descriptor.getMaxRepetitionLevel(); case DEFINITION_LEVEL: return descriptor.getMaxDefinitionLevel(); case VALUES: if (descriptor.getType() == BOOLEAN) { return 1; } default: throw new ParquetDecodingException("Unsupported values type: " + valuesType); } }
public void setPageReader(PageReader pageReader) { this.pageReader = requireNonNull(pageReader, "pageReader"); DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); if (dictionaryPage != null) { try { dictionary = dictionaryPage.getEncoding().initDictionary(columnDescriptor, dictionaryPage); } catch (IOException e) { throw new ParquetDecodingException("could not decode the dictionary for " + columnDescriptor, e); } } else { dictionary = null; } checkArgument(pageReader.getTotalValueCount() > 0, "page is empty"); totalValueCount = pageReader.getTotalValueCount(); }
@Override public Dictionary initDictionary(ColumnDescriptor descriptor, DictionaryPage dictionaryPage) throws IOException { switch (descriptor.getType()) { case BINARY: return new BinaryDictionary(dictionaryPage); case FIXED_LEN_BYTE_ARRAY: return new BinaryDictionary(dictionaryPage, descriptor.getTypeLength()); case INT96: return new BinaryDictionary(dictionaryPage, INT96_TYPE_LENGTH); case INT64: return new LongDictionary(dictionaryPage); case DOUBLE: return new DoubleDictionary(dictionaryPage); case INT32: return new IntegerDictionary(dictionaryPage); case FLOAT: return new FloatDictionary(dictionaryPage); default: throw new ParquetDecodingException("Dictionary encoding does not support: " + descriptor.getType()); } } },
@Override public ValuesReader getValuesReader(ColumnDescriptor descriptor, ValuesType valuesType) { switch (descriptor.getType()) { case BOOLEAN: return new BooleanPlainValuesReader(); case BINARY: return new BinaryPlainValuesReader(); case FLOAT: return new FloatPlainValuesReader(); case DOUBLE: return new DoublePlainValuesReader(); case INT32: return new IntegerPlainValuesReader(); case INT64: return new LongPlainValuesReader(); case INT96: return new FixedLenByteArrayPlainValuesReader(INT96_TYPE_LENGTH); case FIXED_LEN_BYTE_ARRAY: return new FixedLenByteArrayPlainValuesReader(descriptor.getTypeLength()); default: throw new ParquetDecodingException("Plain values reader does not support: " + descriptor.getType()); } }
private ValuesReader readPageV1(DataPageV1 page) { ValuesReader rlReader = page.getRepetitionLevelEncoding().getValuesReader(columnDescriptor, REPETITION_LEVEL); ValuesReader dlReader = page.getDefinitionLevelEncoding().getValuesReader(columnDescriptor, DEFINITION_LEVEL); repetitionReader = new LevelValuesReader(rlReader); definitionReader = new LevelValuesReader(dlReader); try { byte[] bytes = page.getSlice().getBytes(); rlReader.initFromPage(page.getValueCount(), bytes, 0); int offset = rlReader.getNextOffset(); dlReader.initFromPage(page.getValueCount(), bytes, offset); offset = dlReader.getNextOffset(); return initDataReader(page.getValueEncoding(), bytes, offset, page.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("Error reading parquet page " + page + " in column " + columnDescriptor, e); } }
private void checkRead() { if (valuesRead >= totalValueCount) { throw new ParquetDecodingException("no more value to read, total value count is " + totalValueCount); } }
@Override public void skip() { try { int length = BytesUtils.readIntLittleEndian(in, offset); offset += 4 + length; } catch (IOException e) { throw new ParquetDecodingException("could not skip bytes at offset " + offset, e); } catch (RuntimeException e) { throw new ParquetDecodingException("could not skip bytes at offset " + offset, e); } }
/** * {@inheritDoc} * @see parquet.column.values.ValuesReader#readInteger() */ @Override public int readInteger() { try { return bitPackingReader.read(); } catch (IOException e) { throw new ParquetDecodingException(e); } }
@Override public void skip() { try { in.skipBytes(4); } catch (IOException e) { throw new ParquetDecodingException("could not skip int", e); } }
@Override public int readLevel() { try { return delegate.readInt(); } catch (IOException e) { throw new ParquetDecodingException(e); } } }
@Override int nextInt() { try { return delegate.readInt(); } catch (IOException e) { throw new ParquetDecodingException(e); } } }
@Override public double readDouble() { try { return in.readDouble(); } catch (IOException e) { throw new ParquetDecodingException("could not read double", e); } } }
@Override public int readInteger() { try { return in.readInt(); } catch (IOException e) { throw new ParquetDecodingException("could not read int", e); } } }
@Override public void skip() { try { in.skipBytes(8); } catch (IOException e) { throw new ParquetDecodingException("could not skip long", e); } }
@Override public ValuesReader getValuesReader(ColumnDescriptor descriptor, ValuesType valuesType) { if (descriptor.getType() != BINARY) { throw new ParquetDecodingException("Encoding DELTA_BYTE_ARRAY is only supported for type BINARY"); } return new DeltaByteArrayReader(); } },
private ValuesReader readPageV2(DataPageV2 page) { repetitionReader = buildLevelRLEReader(columnDescriptor.getMaxRepetitionLevel(), page.getRepetitionLevels()); definitionReader = buildLevelRLEReader(columnDescriptor.getMaxDefinitionLevel(), page.getDefinitionLevels()); try { return initDataReader(page.getDataEncoding(), page.getData().toByteArray(), 0, page.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + columnDescriptor, e); } }