inputStream.seek(metadataIndex); FileMetaData fileMetaData = readFileMetaData(inputStream); List<SchemaElement> schema = fileMetaData.getSchema(); validateParquet(!schema.isEmpty(), "Empty Parquet schema in file: %s", file); List<RowGroup> rowGroups = fileMetaData.getRow_groups(); if (rowGroups != null) { for (RowGroup rowGroup : rowGroups) { List<KeyValue> keyValueList = fileMetaData.getKey_value_metadata(); if (keyValueList != null) { for (KeyValue keyValue : keyValueList) { return new ParquetMetadata(new parquet.hadoop.metadata.FileMetaData(messageType, keyValueMetaData, fileMetaData.getCreated_by()), blocks);
public Object getFieldValue(_Fields field) { switch (field) { case VERSION: return new Integer(getVersion()); case SCHEMA: return getSchema(); case NUM_ROWS: return new Long(getNum_rows()); case ROW_GROUPS: return getRow_groups(); case KEY_VALUE_METADATA: return getKey_value_metadata(); case CREATED_BY: return getCreated_by(); } throw new IllegalStateException(); }
static FileMetaData filterFileMetaData(FileMetaData metaData, RangeMetadataFilter filter) { List<RowGroup> rowGroups = metaData.getRow_groups(); List<RowGroup> newRowGroups = new ArrayList<RowGroup>(); for (RowGroup rowGroup : rowGroups) { long totalSize = 0; long startIndex = getOffset(rowGroup.getColumns().get(0)); for (ColumnChunk col : rowGroup.getColumns()) { totalSize += col.getMeta_data().getTotal_compressed_size(); } long midPoint = startIndex + totalSize / 2; if (filter.contains(midPoint)) { newRowGroups.add(rowGroup); } } metaData.setRow_groups(newRowGroups); return metaData; }
public FileMetaData toParquetMetadata(int currentVersion, ParquetMetadata parquetMetadata) { List<BlockMetaData> blocks = parquetMetadata.getBlocks(); List<RowGroup> rowGroups = new ArrayList<RowGroup>(); int numRows = 0; for (BlockMetaData block : blocks) { numRows += block.getRowCount(); addRowGroup(parquetMetadata, rowGroups, block); } FileMetaData fileMetaData = new FileMetaData( currentVersion, toParquetSchema(parquetMetadata.getFileMetaData().getSchema()), numRows, rowGroups); Set<Entry<String, String>> keyValues = parquetMetadata.getFileMetaData().getKeyValueMetaData().entrySet(); for (Entry<String, String> keyValue : keyValues) { addKeyValue(fileMetaData, keyValue.getKey(), keyValue.getValue()); } fileMetaData.setCreated_by(parquetMetadata.getFileMetaData().getCreatedBy()); return fileMetaData; }
public FileMetaData deepCopy() { return new FileMetaData(this); }
@Override public void addKeyValueMetaData(KeyValue kv) { md.addToKey_value_metadata(kv); } }
@Override public boolean equals(Object that) { if (that == null) return false; if (that instanceof FileMetaData) return this.equals((FileMetaData)that); return false; }
@Override public void addRowGroup(RowGroup rowGroup) { md.addToRow_groups(rowGroup); }
public Object getFieldValue(_Fields field) { switch (field) { case VERSION: return new Integer(getVersion()); case SCHEMA: return getSchema(); case NUM_ROWS: return new Long(getNum_rows()); case ROW_GROUPS: return getRow_groups(); case KEY_VALUE_METADATA: return getKey_value_metadata(); case CREATED_BY: return getCreated_by(); } throw new IllegalStateException(); }
public FileMetaData deepCopy() { return new FileMetaData(this); }
@Override public void addKeyValueMetaData(KeyValue kv) { md.addToKey_value_metadata(kv); } }
@Override public boolean equals(Object that) { if (that == null) return false; if (that instanceof FileMetaData) return this.equals((FileMetaData)that); return false; }
@Override public void addRowGroup(RowGroup rowGroup) { md.addToRow_groups(rowGroup); }
inputStream.seek(metadataIndex); FileMetaData fileMetaData = readFileMetaData(inputStream); List<SchemaElement> schema = fileMetaData.getSchema(); validateParquet(!schema.isEmpty(), "Empty Parquet schema in file: %s", file); List<RowGroup> rowGroups = fileMetaData.getRow_groups(); if (rowGroups != null) { for (RowGroup rowGroup : rowGroups) { List<KeyValue> keyValueList = fileMetaData.getKey_value_metadata(); if (keyValueList != null) { for (KeyValue keyValue : keyValueList) { return new ParquetMetadata(new parquet.hadoop.metadata.FileMetaData(messageType, keyValueMetaData, fileMetaData.getCreated_by()), blocks);
public static FileMetaData readFileMetaData(InputStream from) throws IOException { return read(from, new FileMetaData()); } /**
private void addKeyValue(FileMetaData fileMetaData, String key, String value) { KeyValue keyValue = new KeyValue(key); keyValue.value = value; fileMetaData.addToKey_value_metadata(keyValue); }
public ParquetMetadata fromParquetMetadata(FileMetaData parquetMetadata) throws IOException { MessageType messageType = fromParquetSchema(parquetMetadata.getSchema()); List<BlockMetaData> blocks = new ArrayList<BlockMetaData>(); List<RowGroup> row_groups = parquetMetadata.getRow_groups(); if (row_groups != null) { for (RowGroup rowGroup : row_groups) { List<KeyValue> key_value_metadata = parquetMetadata.getKey_value_metadata(); if (key_value_metadata != null) { for (KeyValue keyValue : key_value_metadata) { new parquet.hadoop.metadata.FileMetaData(messageType, keyValueMetaData, parquetMetadata.getCreated_by()), blocks);
public static FileMetaData readFileMetaData(InputStream from) throws IOException { return read(from, new FileMetaData()); } /**
/** * reads the meta data from the stream * @param from the stream to read the metadata from * @param skipRowGroups whether row groups should be skipped * @return the resulting metadata * @throws IOException */ public static FileMetaData readFileMetaData(InputStream from, boolean skipRowGroups) throws IOException { FileMetaData md = new FileMetaData(); if (skipRowGroups) { readFileMetaData(from, new DefaultFileMetaDataConsumer(md), skipRowGroups); } else { read(from, md); } return md; }
/** * reads the meta data from the stream * @param from the stream to read the metadata from * @param skipRowGroups whether row groups should be skipped * @return the resulting metadata * @throws IOException */ public static FileMetaData readFileMetaData(InputStream from, boolean skipRowGroups) throws IOException { FileMetaData md = new FileMetaData(); if (skipRowGroups) { readFileMetaData(from, new DefaultFileMetaDataConsumer(md), skipRowGroups); } else { read(from, md); } return md; }