long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset(); if (firstDataPage >= start && firstDataPage < start + length) { footerBlocks.add(block);
/** * @return the offset of the first byte in the chunk */ public long getStartingPos() { long dictionaryPageOffset = getDictionaryPageOffset(); long firstDataPageOffset = getFirstDataPageOffset(); if (dictionaryPageOffset > 0 && dictionaryPageOffset < firstDataPageOffset) { // if there's a dictionary and it's before the first data page, start from there return dictionaryPageOffset; } return firstDataPageOffset; }
@Override public String toString() { return "ColumnMetaData{" + properties.toString() + ", " + getFirstDataPageOffset() + "}"; } }
long end = -1L; for (ColumnChunkMetaData column : block.getColumns()) { long off = column.getFirstDataPageOffset(); long len = column.getTotalSize(); begin = Math.min(begin, off);
long end = -1L; for (ColumnChunkMetaData column : block.getColumns()) { long offset = column.getFirstDataPageOffset(); long size = column.getTotalSize(); begin = Math.min(begin, offset);
long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset(); if (firstDataPage >= start && firstDataPage < start + length) { splitGroup.add(block);
private static void showDetails(PrettyPrintWriter out, ColumnChunkMetaData meta, boolean name) { long doff = meta.getDictionaryPageOffset(); long foff = meta.getFirstDataPageOffset(); long tsize = meta.getTotalSize(); long usize = meta.getTotalUncompressedSize(); long count = meta.getValueCount(); double ratio = usize / (double)tsize; String encodings = Joiner.on(',').skipNulls().join(meta.getEncodings()); if (name) { String path = Joiner.on('.').skipNulls().join(meta.getPath()); out.format("%s: ", path); } out.format(" %s", meta.getType()); out.format(" %s", meta.getCodec()); out.format(" DO:%d", doff); out.format(" FPO:%d", foff); out.format(" SZ:%d/%d/%.2f", tsize, usize, ratio); out.format(" VC:%d", count); if (!encodings.isEmpty()) out.format(" ENC:%s", encodings); out.println(); }
long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset(); if (firstDataPage >= start && firstDataPage < start + length) { blocks.add(block);
private void addRowGroup(ParquetMetadata parquetMetadata, List<RowGroup> rowGroups, BlockMetaData block) { //rowGroup.total_byte_size = ; List<ColumnChunkMetaData> columns = block.getColumns(); List<ColumnChunk> parquetColumns = new ArrayList<ColumnChunk>(); for (ColumnChunkMetaData columnMetaData : columns) { ColumnChunk columnChunk = new ColumnChunk(columnMetaData.getFirstDataPageOffset()); // verify this is the right offset columnChunk.file_path = block.getPath(); // they are in the same file for now columnChunk.meta_data = new parquet.format.ColumnMetaData( getType(columnMetaData.getType()), toFormatEncodings(columnMetaData.getEncodings()), Arrays.asList(columnMetaData.getPath().toArray()), columnMetaData.getCodec().getParquetCompressionCodec(), columnMetaData.getValueCount(), columnMetaData.getTotalUncompressedSize(), columnMetaData.getTotalSize(), columnMetaData.getFirstDataPageOffset()); columnChunk.meta_data.dictionary_page_offset = columnMetaData.getDictionaryPageOffset(); if (!columnMetaData.getStatistics().isEmpty()) { columnChunk.meta_data.setStatistics(toParquetStatistics(columnMetaData.getStatistics())); } // columnChunk.meta_data.index_page_offset = ; // columnChunk.meta_data.key_value_metadata = ; // nothing yet parquetColumns.add(columnChunk); } RowGroup rowGroup = new RowGroup(parquetColumns, block.getTotalByteSize(), block.getRowCount()); rowGroups.add(rowGroup); }
final long splitLength = ((FileSplit) oldSplit).getLength(); for (final BlockMetaData block : blocks) { final long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset(); if (firstDataPage >= splitStart && firstDataPage < splitStart + splitLength) { splitGroup.add(block);
filePath + " offset " + descriptor.metadata.getFirstDataPageOffset() + " but got " + valuesCountReadSoFar + " values instead over " + pagesInChunk.size() + " pages ending at file offset " + (descriptor.fileOffset + pos()));