/** * @param readSupport Object which helps reads files of the given type, e.g. Thrift, Avro. * @param filter for filtering individual records */ public ParquetRecordReader(ReadSupport<T> readSupport, Filter filter) { internalReader = new InternalParquetRecordReader<T>(readSupport, filter); }
@Override public void close() throws IOException { if (reader != null) { reader.close(); } }
/** * {@inheritDoc} */ @Override public T getCurrentValue() throws IOException, InterruptedException { return internalReader.getCurrentValue(); }
private void initReader() throws IOException { if (reader != null) { reader.close(); reader = null; } if (footersIterator.hasNext()) { Footer footer = footersIterator.next(); List<BlockMetaData> blocks = footer.getParquetMetadata().getBlocks(); MessageType fileSchema = footer.getParquetMetadata().getFileMetaData().getSchema(); List<BlockMetaData> filteredBlocks = RowGroupFilter.filterRowGroups( filter, blocks, fileSchema); reader = new InternalParquetRecordReader<T>(readSupport, filter); reader.initialize(fileSchema, footer.getParquetMetadata().getFileMetaData().getKeyValueMetaData(), footer.getFile(), filteredBlocks, conf); } }
/** * @return the next record or null if finished * @throws IOException */ public T read() throws IOException { try { if (reader != null && reader.nextKeyValue()) { return reader.getCurrentValue(); } else { initReader(); return reader == null ? null : read(); } } catch (InterruptedException e) { throw new IOException(e); } }
/** * {@inheritDoc} */ @Override public float getProgress() throws IOException, InterruptedException { return internalReader.getProgress(); }
/** * {@inheritDoc} */ @Override public boolean nextKeyValue() throws IOException, InterruptedException { return internalReader.nextKeyValue(); }
private boolean contains(GroupType group, String[] path, int index) { if (index == path.length) { return false; } if (group.containsField(path[index])) { Type type = group.getType(path[index]); if (type.isPrimitive()) { return index + 1 == path.length; } else { return contains(type.asGroupType(), path, index + 1); } } return false; }
public boolean nextKeyValue() throws IOException, InterruptedException { boolean recordFound = false; while (!recordFound) { // no more records left if (current >= total) { return false; } try { checkRead(); currentValue = recordReader.read(); current ++; if (recordReader.shouldSkipCurrentRecord()) { // this record is being filtered via the filter2 package if (DEBUG) LOG.debug("skipping record"); continue; } if (currentValue == null) { // only happens with FilteredRecordReader at end of block current = totalCountLoadedSoFar; if (DEBUG) LOG.debug("filtered record reader reached end of block"); continue; } recordFound = true; if (DEBUG) LOG.debug("read value: " + currentValue); } catch (RuntimeException e) { throw new ParquetDecodingException(format("Can not read value at %d in block %d in file %s", current, currentBlock, file), e); } } return true; }
public void initialize(MessageType fileSchema, Map<String, String> fileMetadata, Path file, List<BlockMetaData> blocks, Configuration configuration) throws IOException { // initialize a ReadContext for this file ReadSupport.ReadContext readContext = readSupport.init(new InitContext( configuration, toSetMultiMap(fileMetadata), fileSchema)); this.requestedSchema = readContext.getRequestedSchema(); this.fileSchema = fileSchema; this.file = file; this.columnCount = requestedSchema.getPaths().size(); this.recordConverter = readSupport.prepareForRead( configuration, fileMetadata, fileSchema, readContext); this.strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true); List<ColumnDescriptor> columns = requestedSchema.getColumns(); reader = new ParquetFileReader(configuration, file, blocks, columns); for (BlockMetaData block : blocks) { total += block.getRowCount(); } LOG.info("RecordReader initialized will read a total of " + total + " records."); }
internalReader.initialize( fileSchema, fileMetaData, path, filteredBlocks, configuration);
/** * {@inheritDoc} */ @Override public void close() throws IOException { internalReader.close(); }