private void checkEndOfRowGroup() throws IOException { if (rowsReturned != totalCountLoadedSoFar) return; PageReadStore pages = reader.readNextRowGroup(); if (pages == null) { throw new IOException("expecting more rows but reached last block. Read " + rowsReturned + " out of " + totalRowCount); } List<ColumnDescriptor> columns = requestedSchema.getColumns(); List<Type> types = requestedSchema.asGroupType().getFields(); columnReaders = new VectorizedColumnReader[columns.size()]; for (int i = 0; i < columns.size(); ++i) { if (missingColumns[i]) continue; columnReaders[i] = new VectorizedColumnReader(columns.get(i), types.get(i).getOriginalType(), pages.getPageReader(columns.get(i)), convertTz); } totalCountLoadedSoFar += pages.getRowCount(); } }
private void checkEndOfRowGroup() throws IOException { if (rowsReturned != totalCountLoadedSoFar) return; PageReadStore pages = reader.readNextRowGroup(); if (pages == null) { throw new IOException("expecting more rows but reached last block. Read " + rowsReturned + " out of " + totalRowCount); } List<ColumnDescriptor> columns = requestedSchema.getColumns(); List<Type> types = requestedSchema.asGroupType().getFields(); columnReaders = new VectorizedColumnReader[columns.size()]; for (int i = 0; i < columns.size(); ++i) { if (missingColumns[i]) continue; columnReaders[i] = new VectorizedColumnReader(columns.get(i), types.get(i).getOriginalType(), pages.getPageReader(columns.get(i)), convertTz); } totalCountLoadedSoFar += pages.getRowCount(); } }
@Override public ParquetValueReader<?> message(StructType expected, MessageType message, List<ParquetValueReader<?>> fieldReaders) { return struct(expected, message.asGroupType(), fieldReaders); }
@Override public ParquetValueWriter<?> message(MessageType message, List<ParquetValueWriter<?>> fieldWriters) { return struct(message.asGroupType(), fieldWriters); }
@Override public ParquetValueReader<?> message(Types.StructType expected, MessageType message, List<ParquetValueReader<?>> fieldReaders) { return struct(expected, message.asGroupType(), fieldReaders); }
@Override public ParquetValueReader<?> message(Types.StructType expected, MessageType message, List<ParquetValueReader<?>> fieldReaders) { return struct(expected, message.asGroupType(), fieldReaders); }
@Override public ParquetValueReader<?> message(Types.StructType expected, MessageType message, List<ParquetValueReader<?>> fieldReaders) { return struct(expected, message.asGroupType(), fieldReaders); }
@Override public ParquetValueWriter<?> message(MessageType message, List<ParquetValueWriter<?>> fieldWriters) { return struct(message.asGroupType(), fieldWriters); }
/** * Called on executor side before {@link #prepareForRead(Configuration, Map, MessageType, ReadContext)} and * instantiating actual Parquet record readers. * Responsible for figuring out Parquet requested schema used for column pruning. */ @Override public ReadContext init(InitContext context) { MessageType requestedSchema = clipParquetSchema(context.getFileSchema().asGroupType(), fieldNames); return new ReadContext(requestedSchema, new HashMap<String, String>()); }
/** * Converts Parquet {@link MessageType} to Flink field-name and {@link InternalType} pairs. */ public Map<String, InternalType> convertToInternalType(MessageType parquetSchema) { List<Type> types = parquetSchema.asGroupType().getFields(); Map<String, InternalType> result = new HashMap<>(); for (Type type : types) { String name = type.getName(); switch (type.getRepetition()) { case OPTIONAL: case REQUIRED: result.put(name, convertType(type)); break; default: throw new UnsupportedOperationException(type + " is not supported"); } } return result; }
@Override public void visit(MessageType messageType) { SchemaElement element = new SchemaElement(messageType.getName()); if (messageType.getId() != null) { element.setField_id(messageType.getId().intValue()); } visitChildren(result, messageType.asGroupType(), element); }
@Override public void visit(MessageType messageType) { SchemaElement element = new SchemaElement(messageType.getName()); if (messageType.getId() != null) { element.setField_id(messageType.getId().intValue()); } visitChildren(result, messageType.asGroupType(), element); }