public ColumnWriteStoreV2( MessageType schema, PageWriteStore pageWriteStore, ParquetProperties props) { this.props = props; this.thresholdTolerance = (long)(props.getPageSizeThreshold() * THRESHOLD_TOLERANCE_RATIO); Map<ColumnDescriptor, ColumnWriterV2> mcolumns = new TreeMap<ColumnDescriptor, ColumnWriterV2>(); for (ColumnDescriptor path : schema.getColumns()) { PageWriter pageWriter = pageWriteStore.getPageWriter(path); mcolumns.put(path, new ColumnWriterV2(path, pageWriter, props)); } this.columns = unmodifiableMap(mcolumns); this.writers = this.columns.values(); this.rowCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck(); }
@Deprecated ColumnWriteStoreBase( final PageWriteStore pageWriteStore, final ParquetProperties props) { this.props = props; this.thresholdTolerance = (long) (props.getPageSizeThreshold() * THRESHOLD_TOLERANCE_RATIO); this.columns = new TreeMap<>(); this.rowCountForNextSizeCheck = min(props.getMinRowCountForPageSizeCheck(), props.getPageRowCountLimit()); columnWriterProvider = new ColumnWriterProvider() { @Override public ColumnWriter getColumnWriter(ColumnDescriptor path) { ColumnWriterBase column = columns.get(path); if (column == null) { column = createColumnWriter(path, pageWriteStore.getPageWriter(path), props); columns.put(path, column); } return column; } }; }
ColumnWriteStoreBase( MessageType schema, PageWriteStore pageWriteStore, ParquetProperties props) { this.props = props; this.thresholdTolerance = (long) (props.getPageSizeThreshold() * THRESHOLD_TOLERANCE_RATIO); Map<ColumnDescriptor, ColumnWriterBase> mcolumns = new TreeMap<>(); for (ColumnDescriptor path : schema.getColumns()) { PageWriter pageWriter = pageWriteStore.getPageWriter(path); mcolumns.put(path, createColumnWriter(path, pageWriter, props)); } this.columns = unmodifiableMap(mcolumns); this.rowCountForNextSizeCheck = min(props.getMinRowCountForPageSizeCheck(), props.getPageRowCountLimit()); columnWriterProvider = new ColumnWriterProvider() { @Override public ColumnWriter getColumnWriter(ColumnDescriptor path) { return columns.get(path); } }; }
minRecordToWait = props.getMinRowCountForPageSizeCheck(); max(minRecordToWait / 2, props.getMinRowCountForPageSizeCheck()), props.getMaxRowCountForPageSizeCheck()); } else { rowCountForNextSizeCheck = rowCount + props.getMinRowCountForPageSizeCheck();
public ColumnWriterV1(ColumnDescriptor path, PageWriter pageWriter, ParquetProperties props) { this.path = path; this.pageWriter = pageWriter; this.props = props; // initial check of memory usage. So that we have enough data to make an initial prediction this.valueCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck(); resetStatistics(); this.repetitionLevelColumn = props.newRepetitionLevelWriter(path); this.definitionLevelColumn = props.newDefinitionLevelWriter(path); this.dataColumn = props.newValuesWriter(path); }
minRecordToWait = props.getMinRowCountForPageSizeCheck(); max(minRecordToWait / 2, props.getMinRowCountForPageSizeCheck()), props.getMaxRowCountForPageSizeCheck()); } else { rowCountForNextSizeCheck = rowCount + props.getMinRowCountForPageSizeCheck();
valueCountForNextSizeCheck = valueCount / 2; } else { valueCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck(); valueCountForNextSizeCheck += props.getMinRowCountForPageSizeCheck();
LOG.info("Maximum row group padding size is {} bytes", maxPaddingSize); LOG.info("Page size checking is: {}", (props.estimateNextSizeCheck() ? "estimated" : "constant")); LOG.info("Min row count for page size check is: {}", props.getMinRowCountForPageSizeCheck()); LOG.info("Max row count for page size check is: {}", props.getMaxRowCountForPageSizeCheck());
LOG.info("Maximum row group padding size is {} bytes", maxPaddingSize); LOG.info("Page size checking is: {}", (props.estimateNextSizeCheck() ? "estimated" : "constant")); LOG.info("Min row count for page size check is: {}", props.getMinRowCountForPageSizeCheck()); LOG.info("Max row count for page size check is: {}", props.getMaxRowCountForPageSizeCheck()); LOG.info("Truncate length for column indexes is: {}", props.getColumnIndexTruncateLength());