public ParquetReader(MessageColumnIO messageColumnIO, List<BlockMetaData> blocks, ParquetDataSource dataSource, AggregatedMemoryContext systemMemoryContext) { this.blocks = blocks; this.dataSource = requireNonNull(dataSource, "dataSource is null"); this.systemMemoryContext = requireNonNull(systemMemoryContext, "systemMemoryContext is null"); this.currentRowGroupMemoryContext = systemMemoryContext.newAggregatedMemoryContext(); columns = messageColumnIO.getLeaves(); columnReaders = new PrimitiveColumnReader[columns.size()]; }
public <T> RecordReader<T> getRecordReader(PageReadStore columns, RecordMaterializer<T> recordMaterializer) { return getRecordReader(columns, recordMaterializer, FilterCompat.NOOP); }
@Test public void testPushParser() { MemPageStore memPageStore = new MemPageStore(); MemColumnWriteStore columns = new MemColumnWriteStore(memPageStore, 800); MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema); new GroupWriter(columnIO.getRecordWriter(columns), schema).write(r1); columns.flush(); final Deque<String> expectations = new ArrayDeque<String>(); for (String string : expectedEventsForR1) { expectations.add(string); } RecordReader<Void> recordReader = columnIO.getRecordReader(memPageStore, new ExpectationValidatingConverter(expectations, schema)); recordReader.read(); }
@Override public void visit(MessageType messageType) { columnIO = new MessageColumnIO(requestedSchema, validating); visitChildren(columnIO, messageType, requestedSchema); columnIO.setLevels(); columnIO.setLeaves(leaves); }
private void initStore() { // we don't want this number to be too small // ideally we divide the block equally across the columns // it is unlikely all columns are going to be the same size. int initialBlockBufferSize = max(MINIMUM_BUFFER_SIZE, blockSize / schema.getColumns().size() / 5); pageStore = new ColumnChunkPageWriteStore(compressor, schema, initialBlockBufferSize); // we don't want this number to be too small either // ideally, slightly bigger than the page size, but not bigger than the block buffer int initialPageBufferSize = max(MINIMUM_BUFFER_SIZE, min(pageSize + pageSize / 10, initialBlockBufferSize)); store = new ColumnWriteStoreImpl(pageStore, pageSize, initialPageBufferSize, dictionaryPageSize, enableDictionary, writerVersion); MessageColumnIO columnIO = new ColumnIOFactory(validating).getColumnIO(schema); writeSupport.prepareForWrite(columnIO.getRecordWriter(store)); }
public RecordConsumer getRecordWriter(ColumnWriteStore columns) { RecordConsumer recordWriter = new MessageColumnIORecordConsumer(columns); if (DEBUG) recordWriter = new RecordConsumerLoggingWrapper(recordWriter); return validating ? new ValidatingRecordConsumer(recordWriter, getType()) : recordWriter; }
void setLevels() { setLevels(0, 0, new String[0], new int[0], Arrays.<ColumnIO>asList(this), Arrays.<ColumnIO>asList(this)); }
/** * @deprecated use {@link #getRecordReader(PageReadStore, RecordMaterializer, Filter)} */ @Deprecated public <T> RecordReader<T> getRecordReader(PageReadStore columns, RecordMaterializer<T> recordMaterializer, UnboundRecordFilter filter) { return getRecordReader(columns, recordMaterializer, FilterCompat.get(filter)); }
@Override public void visit(MessageType messageType) { columnIO = new MessageColumnIO(messageType, validating); visitChildren(columnIO, messageType); columnIO.setLevels(); columnIO.setLeaves(leaves); }
private void initStore() { pageStore = new ColumnChunkPageWriteStore(compressor, schema, pageSize); columnStore = parquetProperties.newColumnWriteStore( schema, pageStore, pageSize); MessageColumnIO columnIO = new ColumnIOFactory(validating).getColumnIO(schema); writeSupport.prepareForWrite(columnIO.getRecordWriter(columnStore)); }
public RecordConsumer getRecordWriter(ColumnWriteStore columns) { RecordConsumer recordWriter = new MessageColumnIORecordConsumer(columns); if (DEBUG) recordWriter = new RecordConsumerLoggingWrapper(recordWriter); return validating ? new ValidatingRecordConsumer(recordWriter, getType()) : recordWriter; }
void setLevels() { setLevels(0, 0, new String[0], new int[0], Arrays.<ColumnIO>asList(this), Arrays.<ColumnIO>asList(this)); }
public static List<PrimitiveColumnIO> getColumns(MessageType fileSchema, MessageType requestedSchema) { return (new ColumnIOFactory()).getColumnIO(requestedSchema, fileSchema, true).getLeaves(); }
private RecordReader<Object> createRecordReader(PageReadStore store) { assert materializer != null; this.currentRecordReader = columnIo.getRecordReader(store, materializer); this.rowRest = store.getRowCount(); return currentRecordReader; }
private static void write(MemPageStore memPageStore) { MemColumnWriteStore columns = new MemColumnWriteStore(memPageStore, 50*1024*1024); MessageColumnIO columnIO = newColumnFactory(schema); GroupWriter groupWriter = new GroupWriter(columnIO.getRecordWriter(columns), schema); groupWriter.write(r1); groupWriter.write(r2); write(groupWriter, 10000); write(groupWriter, 10000); write(groupWriter, 10000); write(groupWriter, 10000); write(groupWriter, 10000); write(groupWriter, 100000); write(groupWriter, 1000000); columns.flush(); System.out.println(); System.out.println(columns.memSize()+" bytes used total"); System.out.println("max col size: "+columns.maxColMemSize()+" bytes"); }
@Override public RecordReader<T> visit(NoOpFilter noOpFilter) { return new RecordReaderImplementation<T>( MessageColumnIO.this, recordMaterializer, validating, new ColumnReadStoreImpl(columns, recordMaterializer.getRootConverter(), getType())); } });
public MessageColumnIORecordConsumer(ColumnWriteStore columns) { int maxDepth = 0; this.columnWriter = new ColumnWriter[MessageColumnIO.this.getLeaves().size()]; for (PrimitiveColumnIO primitiveColumnIO : MessageColumnIO.this.getLeaves()) { maxDepth = Math.max(maxDepth, primitiveColumnIO.getFieldPath().length); columnWriter[primitiveColumnIO.getId()] = columns.getColumnWriter(primitiveColumnIO.getColumnDescriptor()); } currentIndex = new int[maxDepth]; r = new int[maxDepth]; }
private RecordReaderImplementation<Group> getRecordReader(MessageColumnIO columnIO, MessageType schema, PageReadStore pageReadStore) { RecordConverter<Group> recordConverter = new GroupRecordConverter(schema); return (RecordReaderImplementation<Group>)columnIO.getRecordReader(pageReadStore, recordConverter); }
GroupWriter groupWriter = new GroupWriter(columnIO.getRecordWriter(columns), schema); groupWriter.write(r1); groupWriter.write(r2);
@Override public RecordReader<T> visit(UnboundRecordFilterCompat unboundRecordFilterCompat) { return new FilteredRecordReader<T>( MessageColumnIO.this, recordMaterializer, validating, new ColumnReadStoreImpl(columns, recordMaterializer.getRootConverter(), getType()), unboundRecordFilterCompat.getUnboundRecordFilter(), columns.getRowCount() ); }