public ColumnWriteStore newColumnWriteStore(MessageType schema, PageWriteStore pageStore) { switch (writerVersion) { case PARQUET_1_0: return new ColumnWriteStoreV1(pageStore, this); case PARQUET_2_0: return new ColumnWriteStoreV2(schema, pageStore, this); default: throw new IllegalArgumentException("unknown version " + writerVersion); } }
public ColumnWriter getColumnWriter(ColumnDescriptor path) { ColumnWriterV1 column = columns.get(path); if (column == null) { column = newMemColumn(path); columns.put(path, column); } return column; }
ColumnDescriptor path = schema.getColumns().get(columnIndex); ColumnChunkPageWriteStore store = new ColumnChunkPageWriteStore(compressor, schema, allocator, ParquetProperties.DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH); ColumnWriteStoreV1 columnWriteStoreV1 = new ColumnWriteStoreV1(schema, store, ParquetProperties.builder().build()); for (BlocksCombiner.SmallBlock smallBlock : smallBlocks.getBlocks()) { ParquetFileReader parquetFileReader = smallBlock.getReader(); try { Optional<PageReader> columnChunkPageReader = parquetFileReader.readColumnInBlock(smallBlock.getBlockIndex(), path); ColumnWriter columnWriter = columnWriteStoreV1.getColumnWriter(path); if (columnChunkPageReader.isPresent()) { ColumnReader columnReader = columnReadStore.newMemColumnReader(path, columnChunkPageReader.get()); if (def == 0) { columnWriteStoreV1.endRecord(); this.startBlock(smallBlocks.getRowCount()); columnWriteStoreV1.flush(); store.flushToFileWriter(path, this);
public ColumnWriteStore newColumnWriteStore(MessageType schema, PageWriteStore pageStore) { switch (writerVersion) { case PARQUET_1_0: return new ColumnWriteStoreV1(schema, pageStore, this); case PARQUET_2_0: return new ColumnWriteStoreV2(schema, pageStore, this); default: throw new IllegalArgumentException("unknown version " + writerVersion); } }
private void newSchema() throws IOException { // Reset it to half of current number and bound it within the limits recordCountForNextMemCheck = min(max(MINIMUM_RECORD_COUNT_FOR_CHECK, recordCountForNextMemCheck / 2), MAXIMUM_RECORD_COUNT_FOR_CHECK); String json = new Schema(batchSchema).toJson(); extraMetaData.put(DREMIO_ARROW_SCHEMA_2_1, json); List<Type> types = Lists.newArrayList(); for (Field field : batchSchema) { if (field.getName().equalsIgnoreCase(WriterPrel.PARTITION_COMPARATOR_FIELD)) { continue; } Type childType = getType(field); if (childType != null) { types.add(childType); } } Preconditions.checkState(types.size() > 0, "No types for parquet schema"); schema = new MessageType("root", types); int dictionarySize = (int)context.getOptions().getOption(ExecConstants.PARQUET_DICT_PAGE_SIZE_VALIDATOR); final ParquetProperties parquetProperties = new ParquetProperties(dictionarySize, writerVersion, enableDictionary, new ParquetDirectByteBufferAllocator(columnEncoderAllocator), pageSize, true, enableDictionaryForBinary); pageStore = ColumnChunkPageWriteStoreExposer.newColumnChunkPageWriteStore(codecFactory.getCompressor(codec), schema, parquetProperties); store = new ColumnWriteStoreV1(pageStore, pageSize, parquetProperties); MessageColumnIO columnIO = new ColumnIOFactory(false).getColumnIO(this.schema); consumer = columnIO.getRecordWriter(store); setUp(schema, consumer); }
.withValuesWriterFactory(new DefaultV1ValuesWriterFactory()) .build(); store = new ColumnWriteStoreV1(pageStore, parquetProperties); MessageColumnIO columnIO = new ColumnIOFactory(false).getColumnIO(this.schema); consumer = columnIO.getRecordWriter(store);