private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount) throws IOException { this.pageValueCount = valueCount; this.endOfPageValueCount = valuesRead + pageValueCount; if (dataEncoding.usesDictionary()) { this.dataColumn = null; if (dictionary == null) { throw new IOException( "could not read page in col " + descriptor + " as the dictionary was missing for encoding " + dataEncoding); } dataColumn = ParquetDataColumnReaderFactory.getDataColumnReaderByType(type.asPrimitiveType(), hiveType, dataEncoding.getDictionaryBasedValuesReader(descriptor, VALUES, dictionary .getDictionary()), skipTimestampConversion); this.isCurrentPageDictionaryEncoded = true; } else { dataColumn = ParquetDataColumnReaderFactory.getDataColumnReaderByType(type.asPrimitiveType(), hiveType, dataEncoding.getValuesReader(descriptor, VALUES), skipTimestampConversion); this.isCurrentPageDictionaryEncoded = false; } try { dataColumn.initFromPage(pageValueCount, in); } catch (IOException e) { throw new IOException("could not read page in col " + descriptor, e); } }
private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in) throws IOException { this.endOfPageValueCount = valuesRead + pageValueCount; if (dataEncoding.usesDictionary()) { this.dataColumn = null; if (dictionary == null) { throw new IOException( "could not read page in col " + descriptor + " as the dictionary was missing for encoding " + dataEncoding); } @SuppressWarnings("deprecation") Encoding plainDict = Encoding.PLAIN_DICTIONARY; // var to allow warning suppression if (dataEncoding != plainDict && dataEncoding != Encoding.RLE_DICTIONARY) { throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding); } this.dataColumn = new VectorizedRleValuesReader(); this.isCurrentPageDictionaryEncoded = true; } else { if (dataEncoding != Encoding.PLAIN) { throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding); } this.dataColumn = new VectorizedPlainValuesReader(); this.isCurrentPageDictionaryEncoded = false; } try { dataColumn.initFromPage(pageValueCount, in); } catch (IOException e) { throw new IOException("could not read page in col " + descriptor, e); } }
private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in) throws IOException { this.endOfPageValueCount = valuesRead + pageValueCount; if (dataEncoding.usesDictionary()) { this.dataColumn = null; if (dictionary == null) { throw new IOException( "could not read page in col " + descriptor + " as the dictionary was missing for encoding " + dataEncoding); } @SuppressWarnings("deprecation") Encoding plainDict = Encoding.PLAIN_DICTIONARY; // var to allow warning suppression if (dataEncoding != plainDict && dataEncoding != Encoding.RLE_DICTIONARY) { throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding); } this.dataColumn = new VectorizedRleValuesReader(); this.isCurrentPageDictionaryEncoded = true; } else { if (dataEncoding != Encoding.PLAIN) { throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding); } this.dataColumn = new VectorizedPlainValuesReader(); this.isCurrentPageDictionaryEncoded = false; } try { dataColumn.initFromPage(pageValueCount, in); } catch (IOException e) { throw new IOException("could not read page in col " + descriptor, e); } }
private void initDataReader(Encoding dataEncoding, byte[] bytes, int offset) throws IOException { this.endOfPageValueCount = valuesRead + pageValueCount; if (dataEncoding.usesDictionary()) { this.dataColumn = null; if (dictionary == null) { throw new IOException( "could not read page in col " + descriptor + " as the dictionary was missing for encoding " + dataEncoding); } @SuppressWarnings("deprecation") Encoding plainDict = Encoding.PLAIN_DICTIONARY; // var to allow warning suppression if (dataEncoding != plainDict && dataEncoding != Encoding.RLE_DICTIONARY) { throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding); } this.dataColumn = new VectorizedRleValuesReader(); this.isCurrentPageDictionaryEncoded = true; } else { if (dataEncoding != Encoding.PLAIN) { throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding); } this.dataColumn = new VectorizedPlainValuesReader(); this.isCurrentPageDictionaryEncoded = false; } try { dataColumn.initFromPage(pageValueCount, bytes, offset); } catch (IOException e) { throw new IOException("could not read page in col " + descriptor, e); } }
private boolean isDictionaryEncoded(Collection<Encoding> encodings) { for (Encoding encoding : encodings) { if (encoding.usesDictionary()) { return true; } } return false; }
@Override public Encoding getEncoding() { Encoding encoding = currentWriter.getEncoding(); if (!fellBackAlready && !initialUsedAndHadDictionary) { initialUsedAndHadDictionary = encoding.usesDictionary(); } return encoding; }
@Override public Encoding getEncoding() { Encoding encoding = currentWriter.getEncoding(); if (!fellBackAlready && !initialUsedAndHadDictionary) { initialUsedAndHadDictionary = encoding.usesDictionary(); } return encoding; }
private void initDataReader(Encoding dataEncoding, byte[] bytes, int offset) throws IOException { this.endOfPageValueCount = valuesRead + pageValueCount; if (dataEncoding.usesDictionary()) { this.dataColumn = null; if (dictionary == null) { throw new IOException( "could not read page in col " + descriptor + " as the dictionary was missing for encoding " + dataEncoding); } @SuppressWarnings("deprecation") Encoding plainDict = Encoding.PLAIN_DICTIONARY; // var to allow warning suppression if (dataEncoding != plainDict && dataEncoding != Encoding.RLE_DICTIONARY) { throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding); } this.dataColumn = new VectorizedRleValuesReader(); this.isCurrentPageDictionaryEncoded = true; } else { if (dataEncoding != Encoding.PLAIN) { throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding); } this.dataColumn = new VectorizedPlainValuesReader(); this.isCurrentPageDictionaryEncoded = false; } try { dataColumn.initFromPage(pageValueCount, bytes, offset); } catch (IOException e) { throw new IOException("could not read page in col " + descriptor, e); } }
private void initDataReader(Encoding dataEncoding, byte[] bytes, int offset) throws IOException { this.endOfPageValueCount = valuesRead + pageValueCount; if (dataEncoding.usesDictionary()) { this.dataColumn = null; if (dictionary == null) { throw new IOException( "could not read page in col " + descriptor + " as the dictionary was missing for encoding " + dataEncoding); } this.dataColumn = new VectorizedRleValuesReader(); this.isCurrentPageDictionaryEncoded = true; } else { if (dataEncoding != Encoding.PLAIN) { throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding); } this.dataColumn = new VectorizedPlainValuesReader(); this.isCurrentPageDictionaryEncoded = false; } try { dataColumn.initFromPage(pageValueCount, bytes, offset); } catch (IOException e) { throw new IOException("could not read page in col " + descriptor, e); } }
private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount) { ValuesReader previousReader = this.dataColumn; this.currentEncoding = dataEncoding; this.pageValueCount = valueCount; this.endOfPageValueCount = readValues + pageValueCount; if (dataEncoding.usesDictionary()) { if (dictionary == null) { throw new ParquetDecodingException( "could not read page in col " + path + " as the dictionary was missing for encoding " + dataEncoding); } this.dataColumn = dataEncoding.getDictionaryBasedValuesReader(path, VALUES, dictionary); } else { this.dataColumn = dataEncoding.getValuesReader(path, VALUES); } if (dataEncoding.usesDictionary() && converter.hasDictionarySupport()) { bindToDictionary(dictionary); } else { bind(path.getType()); } try { dataColumn.initFromPage(pageValueCount, in); } catch (IOException e) { throw new ParquetDecodingException("could not read page in col " + path, e); } if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, dataEncoding) && previousReader != null && previousReader instanceof RequiresPreviousReader) { // previous reader can only be set if reading sequentially ((RequiresPreviousReader) dataColumn).setPreviousReader(previousReader); } }
private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount) { ValuesReader previousReader = this.dataColumn; this.currentEncoding = dataEncoding; this.pageValueCount = valueCount; this.endOfPageValueCount = readValues + pageValueCount; if (dataEncoding.usesDictionary()) { if (dictionary == null) { throw new ParquetDecodingException( "could not read page in col " + path + " as the dictionary was missing for encoding " + dataEncoding); } this.dataColumn = dataEncoding.getDictionaryBasedValuesReader(path, VALUES, dictionary); } else { this.dataColumn = dataEncoding.getValuesReader(path, VALUES); } if (dataEncoding.usesDictionary() && converter.hasDictionarySupport()) { bindToDictionary(dictionary); } else { bind(path.getType()); } try { dataColumn.initFromPage(pageValueCount, in); } catch (IOException e) { throw new ParquetDecodingException("could not read page in col " + path, e); } if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, dataEncoding) && previousReader != null && previousReader instanceof RequiresPreviousReader) { // previous reader can only be set if reading sequentially ((RequiresPreviousReader) dataColumn).setPreviousReader(previousReader); } }
if (dataEncoding.usesDictionary()) { if (dict == null) { throw new ParquetDecodingException(
definitionLevels.initFromPage(currentPageCount, pageDataBuffer, (int) readPosInBytes); readPosInBytes = definitionLevels.getNextOffset(); if (!valueEncoding.usesDictionary()) { valueReader = valueEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.VALUES); valueReader.initFromPage(currentPageCount, pageDataBuffer, (int) readPosInBytes); valueReader.initFromPage(currentPageCount, pageDataBuffer, (int) readPosInBytes); if (valueEncoding.usesDictionary()) {
definitionLevels.initFromPage(currentPageCount, in); readPosInBytes = in.position(); if (!valueEncoding.usesDictionary()) { valueReader = valueEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.VALUES); valueReader.initFromPage(currentPageCount, in); valueReader.initFromPage(currentPageCount, in); if (valueEncoding.usesDictionary()) {