break; case FIXED_LEN_BYTE_ARRAY: readFixedLenByteArrayBatch(rowId, num, column, descriptor.getTypeLength()); break; default:
private ValuesWriter getFixedLenByteArrayValuesWriter(ColumnDescriptor path) { // dictionary encoding was not enabled in PARQUET 1.0 return new FixedLenByteArrayPlainValuesWriter(path.getTypeLength(), parquetProperties.getInitialSlabSize(), parquetProperties.getPageSizeThreshold(), parquetProperties.getAllocator()); }
private ValuesWriter getFixedLenByteArrayValuesWriter(ColumnDescriptor path) { // dictionary encoding was not enabled in PARQUET 1.0 return new FixedLenByteArrayPlainValuesWriter(path.getTypeLength(), parquetProperties.getInitialSlabSize(), parquetProperties.getPageSizeThreshold(), parquetProperties.getAllocator()); }
public void resolveDrillType(Map<String, SchemaElement> schemaElements, OptionManager options) { se = schemaElements.get(ParquetReaderUtility.getFullColumnPath(column)); type = ParquetToDrillTypeConverter.toMajorType(column.getType(), column.getTypeLength(), getDataMode(column), se, options); field = MaterializedField.create(toFieldName(column.getPath()).getLastSegment().getNameSegment().getPath(), type); length = getDataTypeLength(); }
@Override public Dictionary initDictionary(ColumnDescriptor descriptor, DictionaryPage dictionaryPage) throws IOException { switch (descriptor.getType()) { case BINARY: return new PlainBinaryDictionary(dictionaryPage); case FIXED_LEN_BYTE_ARRAY: return new PlainBinaryDictionary(dictionaryPage, descriptor.getTypeLength()); case INT96: return new PlainBinaryDictionary(dictionaryPage, 12); case INT64: return new PlainLongDictionary(dictionaryPage); case DOUBLE: return new PlainDoubleDictionary(dictionaryPage); case INT32: return new PlainIntegerDictionary(dictionaryPage); case FLOAT: return new PlainFloatDictionary(dictionaryPage); default: throw new ParquetDecodingException("Dictionary encoding not supported for type: " + descriptor.getType()); } } },
@Override public Dictionary initDictionary(ColumnDescriptor descriptor, DictionaryPage dictionaryPage) throws IOException { switch (descriptor.getType()) { case BINARY: return new PlainBinaryDictionary(dictionaryPage); case FIXED_LEN_BYTE_ARRAY: return new PlainBinaryDictionary(dictionaryPage, descriptor.getTypeLength()); case INT96: return new PlainBinaryDictionary(dictionaryPage, 12); case INT64: return new PlainLongDictionary(dictionaryPage); case DOUBLE: return new PlainDoubleDictionary(dictionaryPage); case INT32: return new PlainIntegerDictionary(dictionaryPage); case FLOAT: return new PlainFloatDictionary(dictionaryPage); default: throw new ParquetDecodingException("Dictionary encoding not supported for type: " + descriptor.getType()); } } },
@Override public Dictionary initDictionary(ColumnDescriptor descriptor, DictionaryPage dictionaryPage) throws IOException { switch (descriptor.getType()) { case BINARY: return new BinaryDictionary(dictionaryPage); case FIXED_LEN_BYTE_ARRAY: return new BinaryDictionary(dictionaryPage, descriptor.getTypeLength()); case INT96: return new BinaryDictionary(dictionaryPage, INT96_TYPE_LENGTH); case INT64: return new LongDictionary(dictionaryPage); case DOUBLE: return new DoubleDictionary(dictionaryPage); case INT32: return new IntegerDictionary(dictionaryPage); case FLOAT: return new FloatDictionary(dictionaryPage); default: throw new ParquetDecodingException("Dictionary encoding does not support: " + descriptor.getType()); } } },
@Override public ValuesReader getValuesReader(ColumnDescriptor descriptor, ValuesType valuesType) { switch (descriptor.getType()) { case BOOLEAN: return new BooleanPlainValuesReader(); case BINARY: return new BinaryPlainValuesReader(); case FLOAT: return new FloatPlainValuesReader(); case DOUBLE: return new DoublePlainValuesReader(); case INT32: return new IntegerPlainValuesReader(); case INT64: return new LongPlainValuesReader(); case INT96: return new FixedLenByteArrayPlainValuesReader(INT96_TYPE_LENGTH); case FIXED_LEN_BYTE_ARRAY: return new FixedLenByteArrayPlainValuesReader(descriptor.getTypeLength()); default: throw new ParquetDecodingException("Plain values reader does not support: " + descriptor.getType()); } }
@Override public ValuesReader getValuesReader(ColumnDescriptor descriptor, ValuesType valuesType) { switch (descriptor.getType()) { case BOOLEAN: return new BooleanPlainValuesReader(); case BINARY: return new BinaryPlainValuesReader(); case FLOAT: return new FloatPlainValuesReader(); case DOUBLE: return new DoublePlainValuesReader(); case INT32: return new IntegerPlainValuesReader(); case INT64: return new LongPlainValuesReader(); case INT96: return new FixedLenByteArrayPlainValuesReader(12); case FIXED_LEN_BYTE_ARRAY: return new FixedLenByteArrayPlainValuesReader(descriptor.getTypeLength()); default: throw new ParquetDecodingException("no plain reader for type " + descriptor.getType()); } }
@Override public Dictionary initDictionary(ColumnDescriptor descriptor, DictionaryPage dictionaryPage) throws IOException { switch (descriptor.getType()) { case BINARY: return new BinaryDictionary(dictionaryPage); case FIXED_LEN_BYTE_ARRAY: return new BinaryDictionary(dictionaryPage, descriptor.getTypeLength()); case INT96: return new BinaryDictionary(dictionaryPage, INT96_TYPE_LENGTH); case INT64: return new LongDictionary(dictionaryPage); case DOUBLE: return new DoubleDictionary(dictionaryPage); case INT32: return new IntegerDictionary(dictionaryPage); case FLOAT: return new FloatDictionary(dictionaryPage); default: throw new ParquetDecodingException("Dictionary encoding does not support: " + descriptor.getType()); } } },
@Override public ValuesReader getValuesReader(ColumnDescriptor descriptor, ValuesType valuesType) { switch (descriptor.getType()) { case BOOLEAN: return new BooleanPlainValuesReader(); case BINARY: return new BinaryPlainValuesReader(); case FLOAT: return new FloatPlainValuesReader(); case DOUBLE: return new DoublePlainValuesReader(); case INT32: return new IntegerPlainValuesReader(); case INT64: return new LongPlainValuesReader(); case INT96: return new FixedLenByteArrayPlainValuesReader(12); case FIXED_LEN_BYTE_ARRAY: return new FixedLenByteArrayPlainValuesReader(descriptor.getTypeLength()); default: throw new ParquetDecodingException("no plain reader for type " + descriptor.getType()); } }
@Override public ValuesReader getValuesReader(ColumnDescriptor descriptor, ValuesType valuesType) { switch (descriptor.getType()) { case BOOLEAN: return new BooleanPlainValuesReader(); case BINARY: return new BinaryPlainValuesReader(); case FLOAT: return new FloatPlainValuesReader(); case DOUBLE: return new DoublePlainValuesReader(); case INT32: return new IntegerPlainValuesReader(); case INT64: return new LongPlainValuesReader(); case INT96: return new FixedLenByteArrayPlainValuesReader(INT96_TYPE_LENGTH); case FIXED_LEN_BYTE_ARRAY: return new FixedLenByteArrayPlainValuesReader(descriptor.getTypeLength()); default: throw new ParquetDecodingException("Plain values reader does not support: " + descriptor.getType()); } }
protected ColumnReader(DeprecatedParquetVectorizedReader parentReader, int allocateSize, ColumnDescriptor descriptor, ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v, SchemaElement schemaElement) throws ExecutionSetupException { this.parentReader = parentReader; this.columnDescriptor = descriptor; this.columnChunkMetaData = columnChunkMetaData; this.isFixedLength = fixedLength; this.schemaElement = schemaElement; this.valueVec = v; this.pageReader = (parentReader.getSingleStream() != null)? new DeprecatedSingleStreamPageReader(this, parentReader.getSingleStream(), parentReader.getHadoopPath(), columnChunkMetaData) : new PageReader(this, parentReader.getFileSystem(), parentReader.getHadoopPath(), columnChunkMetaData); if (columnDescriptor.getType() != PrimitiveType.PrimitiveTypeName.BINARY) { if (columnDescriptor.getType() == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) { dataTypeLengthInBits = columnDescriptor.getTypeLength() * 8; } else if (columnDescriptor.getType() == PrimitiveTypeName.INT96 && (valueVec instanceof TimeStampMilliVector || valueVec instanceof TimeStampMilliVector)) { // if int 96 column is being read as a Timestamp, this truncates the time format used by Impala // dataTypeLengthInBits is only ever used when computing offsets into the destination vector, so it // needs to be set to the bit width of the resulting Arrow type, usually this matches the input length dataTypeLengthInBits = 64; } else { dataTypeLengthInBits = DeprecatedParquetVectorizedReader.getTypeLengthInBits(columnDescriptor.getType()); } } }
for (int i = 0; i < num; i++) { if (defColumn.readInteger() == maxDefLevel) { ((IntegerColumnVector) column).vector[rowId + i] = (int) binaryToUnscaledLong(data.readBinary(descriptor.getTypeLength())); } else { column.noNulls = false; for (int i = 0; i < num; i++) { if (defColumn.readInteger() == maxDefLevel) { ((LongColumnVector) column).vector[rowId + i] = binaryToUnscaledLong(data.readBinary(descriptor.getTypeLength())); } else { column.noNulls = false; for (int i = 0; i < num; i++) { if (defColumn.readInteger() == maxDefLevel) { ((BytesColumnVector) column).setVal(rowId + i, data.readBinary(descriptor.getTypeLength()).getBytes()); } else { column.noNulls = false;
static DictionaryValuesWriter dictionaryWriter(ColumnDescriptor path, ParquetProperties properties, Encoding dictPageEncoding, Encoding dataPageEncoding) { switch (path.getType()) { case BOOLEAN: throw new IllegalArgumentException("no dictionary encoding for BOOLEAN"); case BINARY: return new DictionaryValuesWriter.PlainBinaryDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator()); case INT32: return new DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator()); case INT64: return new DictionaryValuesWriter.PlainLongDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator()); case INT96: return new DictionaryValuesWriter.PlainFixedLenArrayDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), 12, dataPageEncoding, dictPageEncoding, properties.getAllocator()); case DOUBLE: return new DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator()); case FLOAT: return new DictionaryValuesWriter.PlainFloatDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator()); case FIXED_LEN_BYTE_ARRAY: return new DictionaryValuesWriter.PlainFixedLenArrayDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), path.getTypeLength(), dataPageEncoding, dictPageEncoding, properties.getAllocator()); default: throw new IllegalArgumentException("Unknown type " + path.getType()); } }
static DictionaryValuesWriter dictionaryWriter(ColumnDescriptor path, ParquetProperties properties, Encoding dictPageEncoding, Encoding dataPageEncoding) { switch (path.getType()) { case BOOLEAN: throw new IllegalArgumentException("no dictionary encoding for BOOLEAN"); case BINARY: return new DictionaryValuesWriter.PlainBinaryDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator()); case INT32: return new DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator()); case INT64: return new DictionaryValuesWriter.PlainLongDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator()); case INT96: return new DictionaryValuesWriter.PlainFixedLenArrayDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), 12, dataPageEncoding, dictPageEncoding, properties.getAllocator()); case DOUBLE: return new DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator()); case FLOAT: return new DictionaryValuesWriter.PlainFloatDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator()); case FIXED_LEN_BYTE_ARRAY: return new DictionaryValuesWriter.PlainFixedLenArrayDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), path.getTypeLength(), dataPageEncoding, dictPageEncoding, properties.getAllocator()); default: throw new IllegalArgumentException("Unknown type " + path.getType()); } }
dataTypeLengthInBits = columnDescriptor.getTypeLength() * 8; } else { dataTypeLengthInBits = ParquetColumnMetadata.getTypeLengthInBits(columnDescriptor.getType());
break; case FIXED_LEN_BYTE_ARRAY: readFixedLenByteArrayBatch(rowId, num, column, descriptor.getTypeLength()); break; default: