private ColumnChunkMetaData getColumnChunkMetaData(ColumnDescriptor columnDescriptor) throws IOException { for (ColumnChunkMetaData metadata : currentBlockMetadata.getColumns()) { if (metadata.getPath().equals(ColumnPath.get(columnDescriptor.getPath()))) { return metadata; } } throw new ParquetCorruptionException("Metadata is missing for column: %s", columnDescriptor); }
.map(value -> value.toLowerCase(Locale.ENGLISH)) .toArray(String[]::new); ColumnPath columnPath = ColumnPath.get(path); PrimitiveTypeName primitiveTypeName = messageType.getType(columnPath.toArray()).asPrimitiveType().getPrimitiveTypeName(); ColumnChunkMetaData column = ColumnChunkMetaData.get(
private ColumnPath getPath(parquet.format.ColumnMetaData metaData) { String[] path = metaData.path_in_schema.toArray(new String[metaData.path_in_schema.size()]); return ColumnPath.get(path); }
public static ColumnPath fromDotString(String path) { checkNotNull(path, "path"); return get(path.split("\\.")); }
public static ColumnPath fromDotString(String path) { checkNotNull(path, "path"); return get(path.split("\\.")); }
/** * @param f the Parquet file (will be opened for read in this constructor) * @param blocks the blocks to read * @param colums the columns to read (their path) * @param codecClassName the codec used to compress the blocks * @throws IOException if the file can not be opened */ public ParquetFileReader(Configuration configuration, Path filePath, List<BlockMetaData> blocks, List<ColumnDescriptor> columns) throws IOException { this.filePath = filePath; FileSystem fs = filePath.getFileSystem(configuration); this.f = fs.open(filePath); this.blocks = blocks; for (ColumnDescriptor col : columns) { paths.put(ColumnPath.get(col.getPath()), col); } this.codecFactory = new CodecFactory(configuration); }
private SchemaCompatibilityValidator(MessageType schema) { for (ColumnDescriptor cd : schema.getColumns()) { ColumnPath columnPath = ColumnPath.get(cd.getPath()); columnsAccordingToSchema.put(columnPath, cd); OriginalType ot = schema.getType(cd.getPath()).getOriginalType(); if (ot != null) { originalTypes.put(columnPath, ot); } } }
private ColumnChunkMetaData getColumnChunkMetaData(ColumnDescriptor columnDescriptor) throws IOException { for (ColumnChunkMetaData metadata : currentBlockMetadata.getColumns()) { if (metadata.getPath().equals(ColumnPath.get(columnDescriptor.getPath()))) { return metadata; } } throw new ParquetCorruptionException("Malformed Parquet file. Could not find column metadata %s", columnDescriptor); }
@Override public Converter getConverter(int fieldIndex) { // get the real converter from the delegate Converter delegateConverter = checkNotNull(delegate.getConverter(fieldIndex), "delegate converter"); // determine the indexFieldPath for the converter proxy we're about to make, which is // this converter's path + the requested fieldIndex List<Integer> newIndexFieldPath = new ArrayList<Integer>(indexFieldPath.size() + 1); newIndexFieldPath.addAll(indexFieldPath); newIndexFieldPath.add(fieldIndex); if (delegateConverter.isPrimitive()) { PrimitiveColumnIO columnIO = getColumnIO(newIndexFieldPath); ColumnPath columnPath = ColumnPath.get(columnIO.getColumnDescriptor().getPath()); ValueInspector[] valueInspectors = getValueInspectors(columnPath); return new FilteringPrimitiveConverter(delegateConverter.asPrimitiveConverter(), valueInspectors); } else { return new FilteringGroupConverter(delegateConverter.asGroupConverter(), newIndexFieldPath, valueInspectorsByColumn, columnIOsByIndexFieldPath); } }
ColumnDescriptor columnDescriptor = requestedSchema.getColumns().get(i); if (isColumnPredicate(columnDescriptor, effectivePredicate) && columnChunkMetaData.getPath().equals(ColumnPath.get(columnDescriptor.getPath())) && isOnlyDictionaryEncodingPages(columnChunkMetaData.getEncodings())) { try {
/** * start a column inside a block * @param descriptor the column descriptor * @param valueCount the value count in this column * @param statistics the statistics in this column * @param compressionCodecName * @throws IOException */ public void startColumn(ColumnDescriptor descriptor, long valueCount, CompressionCodecName compressionCodecName) throws IOException { state = state.startColumn(); if (DEBUG) LOG.debug(out.getPos() + ": start column: " + descriptor + " count=" + valueCount); currentEncodings = new HashSet<parquet.column.Encoding>(); currentChunkPath = ColumnPath.get(descriptor.getPath()); currentChunkType = descriptor.getType(); currentChunkCodec = compressionCodecName; currentChunkValueCount = valueCount; currentChunkFirstDataPage = out.getPos(); compressedLength = 0; uncompressedLength = 0; // need to know what type of stats to initialize to // better way to do this? currentStatistics = Statistics.getStatsBasedOnType(currentChunkType); }
/** * start a column inside a block * @param descriptor the column descriptor * @param valueCount the value count in this column * @param statistics the statistics in this column * @param compressionCodecName * @throws IOException */ public void startColumn(ColumnDescriptor descriptor, long valueCount, CompressionCodecName compressionCodecName) throws IOException { state = state.startColumn(); if (DEBUG) LOG.debug(out.getPos() + ": start column: " + descriptor + " count=" + valueCount); currentEncodings = new HashSet<parquet.column.Encoding>(); currentChunkPath = ColumnPath.get(descriptor.getPath()); currentChunkType = descriptor.getType(); currentChunkCodec = compressionCodecName; currentChunkValueCount = valueCount; currentChunkFirstDataPage = out.getPos(); compressedLength = 0; uncompressedLength = 0; // need to know what type of stats to initialize to // better way to do this? currentStatistics = Statistics.getStatsBasedOnType(currentChunkType); }
ColumnMetaData metaData = columnChunk.meta_data; String[] path = metaData.path_in_schema.toArray(new String[metaData.path_in_schema.size()]); ColumnPath columnPath = ColumnPath.get(path); ColumnChunkMetaData column = ColumnChunkMetaData.get( columnPath,