metadataIndex); inputStream.seek(metadataIndex); FileMetaData fileMetaData = readFileMetaData(inputStream); List<SchemaElement> schema = fileMetaData.getSchema(); validateParquet(!schema.isEmpty(), "Empty Parquet schema in file: %s", file);
protected PageHeader readPageHeader() throws IOException { return Util.readPageHeader(this); }
/** * reads the meta data from the stream * @param from the stream to read the metadata from * @param skipRowGroups whether row groups should be skipped * @return the resulting metadata * @throws IOException */ public static FileMetaData readFileMetaData(InputStream from, boolean skipRowGroups) throws IOException { FileMetaData md = new FileMetaData(); if (skipRowGroups) { readFileMetaData(from, new DefaultFileMetaDataConsumer(md), skipRowGroups); } else { read(from, md); } return md; }
private static TProtocol protocol(OutputStream to) { return protocol(new TIOStreamTransport(to)); }
public static PageHeader readPageHeader(InputStream from) throws IOException { return read(from, new PageHeader()); }
public static void writePageHeader(PageHeader pageHeader, OutputStream to) throws IOException { write(pageHeader, to); }
private static void serializeFooter(ParquetMetadata footer, FSDataOutputStream out) throws IOException { long footerIndex = out.getPos(); parquet.format.FileMetaData parquetMetadata = new ParquetMetadataConverter().toParquetMetadata(CURRENT_VERSION, footer); writeFileMetaData(parquetMetadata, out); if (DEBUG) LOG.debug(out.getPos() + ": footer length = " + (out.getPos() - footerIndex)); BytesUtils.writeIntLittleEndian(out, (int)(out.getPos() - footerIndex)); out.write(MAGIC); }
public void writeDataPageV2Header( int uncompressedSize, int compressedSize, int valueCount, int nullCount, int rowCount, parquet.column.statistics.Statistics statistics, parquet.column.Encoding dataEncoding, int rlByteLength, int dlByteLength, OutputStream to) throws IOException { writePageHeader( newDataPageV2Header( uncompressedSize, compressedSize, valueCount, nullCount, rowCount, statistics, dataEncoding, rlByteLength, dlByteLength), to); }
/** * reads the meta data from the stream * @param from the stream to read the metadata from * @param skipRowGroups whether row groups should be skipped * @return the resulting metadata * @throws IOException */ public static FileMetaData readFileMetaData(InputStream from, boolean skipRowGroups) throws IOException { FileMetaData md = new FileMetaData(); if (skipRowGroups) { readFileMetaData(from, new DefaultFileMetaDataConsumer(md), skipRowGroups); } else { read(from, md); } return md; }
private static void write(TBase<?, ?> tbase, OutputStream to) throws IOException { try { tbase.write(protocol(to)); } catch (TException e) { throw new IOException("can not write " + tbase, e); } } }
public static FileMetaData readFileMetaData(InputStream from) throws IOException { return read(from, new FileMetaData()); } /**
public static void writePageHeader(PageHeader pageHeader, OutputStream to) throws IOException { write(pageHeader, to); }
private static void serializeFooter(ParquetMetadata footer, FSDataOutputStream out) throws IOException { long footerIndex = out.getPos(); parquet.format.FileMetaData parquetMetadata = new ParquetMetadataConverter().toParquetMetadata(CURRENT_VERSION, footer); writeFileMetaData(parquetMetadata, out); if (DEBUG) LOG.debug(out.getPos() + ": footer length = " + (out.getPos() - footerIndex)); BytesUtils.writeIntLittleEndian(out, (int)(out.getPos() - footerIndex)); out.write(MAGIC); }
/** * [ startOffset, endOffset ) * @author Julien Le Dem */ static final class RangeMetadataFilter extends MetadataFilter { final long startOffset; final long endOffset; RangeMetadataFilter(long startOffset, long endOffset) { super(); this.startOffset = startOffset; this.endOffset = endOffset; } @Override <T, E extends Throwable> T accept(MetadataFilterVisitor<T, E> visitor) throws E { return visitor.visit(this); } boolean contains(long offset) { return offset >= this.startOffset && offset < this.endOffset; } @Override public String toString() { return "range(s:" + startOffset + ", e:" + endOffset + ")"; } }
@Override public FileMetaData visit(NoFilter filter) throws IOException { return readFileMetaData(from); } @Override
private static Optional<DictionaryPage> readDictionaryPage(byte[] data, CompressionCodecName codecName) { try { ByteArrayInputStream inputStream = new ByteArrayInputStream(data); PageHeader pageHeader = Util.readPageHeader(inputStream); if (pageHeader.type != PageType.DICTIONARY_PAGE) { return Optional.empty(); } Slice compressedData = wrappedBuffer(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size()); DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); ParquetEncoding encoding = getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name())); int dictionarySize = dicHeader.getNum_values(); return Optional.of(new DictionaryPage(decompress(codecName, compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding)); } catch (IOException ignored) { return Optional.empty(); } }
private static void write(TBase<?, ?> tbase, OutputStream to) throws IOException { try { tbase.write(protocol(to)); } catch (TException e) { throw new IOException("can not write " + tbase, e); } } }
public static FileMetaData readFileMetaData(InputStream from) throws IOException { return read(from, new FileMetaData()); } /**
public static void writeFileMetaData(parquet.format.FileMetaData fileMetadata, OutputStream to) throws IOException { write(fileMetadata, to); }
@Deprecated public void writeDataPageHeader( int uncompressedSize, int compressedSize, int valueCount, parquet.column.Encoding rlEncoding, parquet.column.Encoding dlEncoding, parquet.column.Encoding valuesEncoding, OutputStream to) throws IOException { writePageHeader(newDataPageHeader(uncompressedSize, compressedSize, valueCount, new parquet.column.statistics.BooleanStatistics(), rlEncoding, dlEncoding, valuesEncoding), to); }