private ColumnChunkMetaData getColumnChunkMetaData(ColumnDescriptor columnDescriptor) throws IOException { for (ColumnChunkMetaData metadata : currentBlockMetadata.getColumns()) { if (metadata.getPath().equals(ColumnPath.get(columnDescriptor.getPath()))) { return metadata; } } throw new ParquetCorruptionException("Metadata is missing for column: %s", columnDescriptor); }
.map(value -> value.toLowerCase(Locale.ENGLISH)) .toArray(String[]::new); ColumnPath columnPath = ColumnPath.get(path); PrimitiveTypeName primitiveTypeName = messageType.getType(columnPath.toArray()).asPrimitiveType().getPrimitiveTypeName(); ColumnChunkMetaData column = ColumnChunkMetaData.get( columnPath,
private ColumnPath getPath(parquet.format.ColumnMetaData metaData) { String[] path = metaData.path_in_schema.toArray(new String[metaData.path_in_schema.size()]); return ColumnPath.get(path); }
private static Map<ColumnDescriptor, Statistics<?>> getStatistics(BlockMetaData blockMetadata, Map<List<String>, RichColumnDescriptor> descriptorsByPath) { ImmutableMap.Builder<ColumnDescriptor, Statistics<?>> statistics = ImmutableMap.builder(); for (ColumnChunkMetaData columnMetaData : blockMetadata.getColumns()) { Statistics<?> columnStatistics = columnMetaData.getStatistics(); if (columnStatistics != null) { RichColumnDescriptor descriptor = descriptorsByPath.get(Arrays.asList(columnMetaData.getPath().toArray())); if (descriptor != null) { statistics.put(descriptor, columnStatistics); } } } return statistics.build(); }
@Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Column column = (Column) o; if (!columnType.equals(column.columnType)) return false; if (!columnPath.equals(column.columnPath)) return false; return true; }
@Override protected ColumnPath toCanonical(ColumnPath value) { String[] path = new String[value.p.length]; for (int i = 0; i < value.p.length; i++) { path[i] = value.p[i].intern(); } return new ColumnPath(path); } };
public static BinaryColumn binaryColumn(String columnPath) { return new BinaryColumn(ColumnPath.fromDotString(columnPath)); }
@Override public int hashCode() { int result = columnPath.hashCode(); result = 31 * result + columnType.hashCode(); return result; } }
public static ColumnPath fromDotString(String path) { checkNotNull(path, "path"); return get(path.split("\\.")); }
private static Map<ColumnDescriptor, DictionaryDescriptor> getDictionaries(BlockMetaData blockMetadata, ParquetDataSource dataSource, Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<ColumnDescriptor> parquetTupleDomain) { ImmutableMap.Builder<ColumnDescriptor, DictionaryDescriptor> dictionaries = ImmutableMap.builder(); for (ColumnChunkMetaData columnMetaData : blockMetadata.getColumns()) { RichColumnDescriptor descriptor = descriptorsByPath.get(Arrays.asList(columnMetaData.getPath().toArray())); if (descriptor != null) { if (isOnlyDictionaryEncodingPages(columnMetaData.getEncodings()) && isColumnPredicate(descriptor, parquetTupleDomain)) { int totalSize = toIntExact(columnMetaData.getTotalSize()); byte[] buffer = new byte[totalSize]; dataSource.readFully(columnMetaData.getStartingPos(), buffer); Optional<DictionaryPage> dictionaryPage = readDictionaryPage(buffer, columnMetaData.getCodec()); dictionaries.put(descriptor, new DictionaryDescriptor(descriptor, dictionaryPage)); break; } } } return dictionaries.build(); }
@Override public boolean equals(Object obj) { if (obj instanceof ColumnChunkProperties) { ColumnChunkProperties other = (ColumnChunkProperties)obj; return other.codec == codec && other.path.equals(path) && other.type == type && equals(other.encodings, encodings); } return false; }
@Override protected ColumnPath toCanonical(ColumnPath value) { String[] path = new String[value.p.length]; for (int i = 0; i < value.p.length; i++) { path[i] = value.p[i].intern(); } return new ColumnPath(path); } };
public static LongColumn longColumn(String columnPath) { return new LongColumn(ColumnPath.fromDotString(columnPath)); }
@Override public int hashCode() { return codec.hashCode() ^ path.hashCode() ^ type.hashCode() ^ Arrays.hashCode(encodings.toArray()); }
public static ColumnPath fromDotString(String path) { checkNotNull(path, "path"); return get(path.split("\\.")); }
private ColumnChunkMetaData getColumnChunkMetaData(ColumnDescriptor columnDescriptor) throws IOException { for (ColumnChunkMetaData metadata : currentBlockMetadata.getColumns()) { if (metadata.getPath().equals(ColumnPath.get(columnDescriptor.getPath()))) { return metadata; } } throw new ParquetCorruptionException("Malformed Parquet file. Could not find column metadata %s", columnDescriptor); }
ColumnMetaData metaData = columnChunk.meta_data; String[] path = metaData.path_in_schema.toArray(new String[metaData.path_in_schema.size()]); ColumnPath columnPath = ColumnPath.get(path); ColumnChunkMetaData column = ColumnChunkMetaData.get( columnPath, messageType.getType(columnPath.toArray()).asPrimitiveType().getPrimitiveTypeName(), CompressionCodecName.fromParquet(metaData.codec), readEncodings(metaData.encodings), readStats(metaData.statistics, messageType.getType(columnPath.toArray()).asPrimitiveType().getPrimitiveTypeName()), metaData.data_page_offset, metaData.dictionary_page_offset,
public static void showDetails(PrettyPrintWriter out, List<ColumnChunkMetaData> ccmeta) { Map<String,Object> chunks = new LinkedHashMap<String,Object>(); for (ColumnChunkMetaData cmeta : ccmeta) { String[] path = cmeta.getPath().toArray(); Map<String,Object> current = chunks; for (int i = 0; i < path.length - 1; ++i) { String next = path[i]; if (!current.containsKey(next)) { current.put(next, new LinkedHashMap<String,Object>()); } current = (Map<String,Object>)current.get(next); } current.put(path[path.length - 1], cmeta); } showColumnChunkDetails(out, chunks, 0); }
public static ColumnPath get(String... path){ return paths.canonicalize(new ColumnPath(path)); }
public static BooleanColumn booleanColumn(String columnPath) { return new BooleanColumn(ColumnPath.fromDotString(columnPath)); }