@Override protected void consumeOneRecord(GenericRecord record) { upsertHandle.write(record); }
protected HoodieMergeHandle getUpdateHandle(String commitTime, String fileId, Iterator<HoodieRecord<T>> recordItr) { return new HoodieMergeHandle<>(config, commitTime, this, recordItr, fileId); }
public void doAppend() { while (recordItr.hasNext()) { HoodieRecord record = recordItr.next(); init(record); flushToDiskIfRequired(record); writeToBuffer(record); } doAppend(header); estimatedNumberOfBytesWritten += averageRecordSize * numberOfRecords; }
public Iterator<List<WriteStatus>> handleInsert(String commitTime, String partitionPath, String fileId, Iterator<HoodieRecord<T>> recordItr) { HoodieCreateHandle createHandle = new HoodieCreateHandle(config, commitTime, this, partitionPath, fileId, recordItr); createHandle.write(); return Collections.singletonList(Collections.singletonList(createHandle.close())).iterator(); }
@Override public void write(HoodieRecord record, Optional<IndexedRecord> insertValue) { Optional recordMetadata = record.getData().getMetadata(); try { init(record); flushToDiskIfRequired(record); writeToBuffer(record); } catch (Throwable t) { // Not throwing exception from here, since we don't want to fail the entire job // for a single record writeStatus.markFailure(record, t, recordMetadata); logger.error("Error writing record " + record, t); } }
private static PairFlatMapFunction<String, String, String> getFilesToDeleteFunc(HoodieTable table, HoodieWriteConfig config) { return (PairFlatMapFunction<String, String, String>) partitionPathToClean -> { HoodieCleanHelper cleaner = new HoodieCleanHelper(table, config); return cleaner.getDeletePaths(partitionPathToClean).stream() .map(deleteFile -> new Tuple2<>(partitionPathToClean, deleteFile.toString())).iterator(); }; }
private boolean writeUpdateRecord(HoodieRecord<T> hoodieRecord, Optional<IndexedRecord> indexedRecord) { if (indexedRecord.isPresent()) { updatedRecordsWritten++; } return writeRecord(hoodieRecord, indexedRecord); }
/** * Checks if the number of records have reached the set threshold and then flushes the records to disk */ private void flushToDiskIfRequired(HoodieRecord record) { // Append if max number of records reached to achieve block size if (numberOfRecords >= (int) (maxBlockSize / averageRecordSize)) { // Recompute averageRecordSize before writing a new block and update existing value with // avg of new and old logger.info("AvgRecordSize => " + averageRecordSize); averageRecordSize = (averageRecordSize + SizeEstimator.estimate(record)) / 2; doAppend(header); estimatedNumberOfBytesWritten += averageRecordSize * numberOfRecords; numberOfRecords = 0; } }
private void writeToBuffer(HoodieRecord<T> record) { // update the new location of the record, so we know where to find it next record.setNewLocation(new HoodieRecordLocation(commitTime, fileId)); Optional<IndexedRecord> indexedRecord = getIndexedRecord(record); if (indexedRecord.isPresent()) { recordList.add(indexedRecord.get()); } else { keysToDelete.add(record.getRecordKey()); } numberOfRecords++; }
public HoodieIOHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable) { this.commitTime = commitTime; this.config = config; this.fs = hoodieTable.getMetaClient().getFs(); this.hoodieTable = hoodieTable; this.hoodieTimeline = hoodieTable.getCompletedCommitTimeline(); this.schema = createHoodieWriteSchema(config); this.timer = new HoodieTimer().startTimer(); }
private HoodieWriteConfig getConfig() { return getConfigBuilder() .withCompactionConfig(HoodieCompactionConfig.newBuilder().withMaxNumDeltaCommitsBeforeCompaction(1).build()) .build(); }
public void doAppend() { while (recordItr.hasNext()) { HoodieRecord record = recordItr.next(); init(record); flushToDiskIfRequired(record); writeToBuffer(record); } doAppend(header); estimatedNumberOfBytesWritten += averageRecordSize * numberOfRecords; }
public Iterator<List<WriteStatus>> handleInsert(String commitTime, String partitionPath, String fileId, Iterator<HoodieRecord<T>> recordItr) { HoodieCreateHandle createHandle = new HoodieCreateHandle(config, commitTime, this, partitionPath, fileId, recordItr); createHandle.write(); return Collections.singletonList(Collections.singletonList(createHandle.close())).iterator(); }
protected HoodieMergeHandle getUpdateHandle(String commitTime, String fileId, Iterator<HoodieRecord<T>> recordItr) { return new HoodieMergeHandle<>(config, commitTime, this, recordItr, fileId); }
@Override protected void consumeOneRecord(GenericRecord record) { upsertHandle.write(record); }
private boolean writeUpdateRecord(HoodieRecord<T> hoodieRecord, Optional<IndexedRecord> indexedRecord) { if (indexedRecord.isPresent()) { updatedRecordsWritten++; } return writeRecord(hoodieRecord, indexedRecord); }
/** * Checks if the number of records have reached the set threshold and then flushes the records to disk */ private void flushToDiskIfRequired(HoodieRecord record) { // Append if max number of records reached to achieve block size if (numberOfRecords >= (int) (maxBlockSize / averageRecordSize)) { // Recompute averageRecordSize before writing a new block and update existing value with // avg of new and old logger.info("AvgRecordSize => " + averageRecordSize); averageRecordSize = (averageRecordSize + SizeEstimator.estimate(record)) / 2; doAppend(header); estimatedNumberOfBytesWritten += averageRecordSize * numberOfRecords; numberOfRecords = 0; } }
private void writeToBuffer(HoodieRecord<T> record) { // update the new location of the record, so we know where to find it next record.setNewLocation(new HoodieRecordLocation(commitTime, fileId)); Optional<IndexedRecord> indexedRecord = getIndexedRecord(record); if (indexedRecord.isPresent()) { recordList.add(indexedRecord.get()); } else { keysToDelete.add(record.getKey()); } numberOfRecords++; }
protected HoodieMergeHandle getUpdateHandle(String commitTime, String fileId, Map<String, HoodieRecord<T>> keyToNewRecords, Optional<HoodieDataFile> dataFileToBeMerged) { return new HoodieMergeHandle<>(config, commitTime, this, keyToNewRecords, fileId, dataFileToBeMerged); }
protected HoodieMergeHandle getUpdateHandle(String commitTime, String fileId, Map<String, HoodieRecord<T>> keyToNewRecords, Optional<HoodieDataFile> dataFileToBeMerged) { return new HoodieMergeHandle<>(config, commitTime, this, keyToNewRecords, fileId, dataFileToBeMerged); }