@Override public void readFields(DataInput in) throws IOException { realSplit = new ParquetInputSplit(); realSplit.readFields(in); }
@Override public void initialize(GuaguaFileSplit split) throws IOException { ReadSupport<Tuple> readSupport = getReadSupportInstance(this.conf); this.parquetRecordReader = new ParquetRecordReader<Tuple>(readSupport, getFilter(this.conf)); ParquetInputSplit parquetInputSplit = new ParquetInputSplit(new Path(split.getPath()), split.getOffset(), split.getOffset() + split.getLength(), split.getLength(), null, null); try { this.parquetRecordReader.initialize(parquetInputSplit, buildContext()); } catch (InterruptedException e) { throw new GuaguaRuntimeException(e); } }
ParquetInputSplit split = new ParquetInputSplit(path, start, start + length, length, null, offsets);
/** * Builds a {@code ParquetInputSplit} from a mapreduce {@link FileSplit}. * * @param split a mapreduce FileSplit * @return a ParquetInputSplit * @throws IOException */ static ParquetInputSplit from(FileSplit split) throws IOException { return new ParquetInputSplit(split.getPath(), split.getStart(), split.getStart() + split.getLength(), split.getLength(), split.getLocations(), null); }
/** * Builds a {@code ParquetInputSplit} from a mapred * {@link org.apache.hadoop.mapred.FileSplit}. * * @param split a mapreduce FileSplit * @return a ParquetInputSplit * @throws IOException */ static ParquetInputSplit from(org.apache.hadoop.mapred.FileSplit split) throws IOException { return new ParquetInputSplit(split.getPath(), split.getStart(), split.getStart() + split.getLength(), split.getLength(), split.getLocations(), null); }
public ParquetInputSplit getParquetInputSplit(FileStatus fileStatus, String requestedSchema, Map<String, String> readSupportMetadata) throws IOException { MessageType requested = MessageTypeParser.parseMessageType(requestedSchema); long length = 0; for (BlockMetaData block : this.getRowGroups()) { List<ColumnChunkMetaData> columns = block.getColumns(); for (ColumnChunkMetaData column : columns) { if (requested.containsPath(column.getPath().toArray())) { length += column.getTotalSize(); } } } BlockMetaData lastRowGroup = this.getRowGroups().get(this.getRowGroupCount() - 1); long end = lastRowGroup.getStartingPos() + lastRowGroup.getTotalByteSize(); long[] rowGroupOffsets = new long[this.getRowGroupCount()]; for (int i = 0; i < rowGroupOffsets.length; i++) { rowGroupOffsets[i] = this.getRowGroups().get(i).getStartingPos(); } return new ParquetInputSplit( fileStatus.getPath(), hdfsBlock.getOffset(), end, length, hdfsBlock.getHosts(), rowGroupOffsets ); } }
skipTimestampConversion = !Strings.nullToEmpty(fileMetaData.getCreatedBy()).startsWith("parquet-mr"); split = new ParquetInputSplit(finalPath, splitStart, splitLength,