protected HoodieMergeHandle getUpdateHandle(String commitTime, String fileId, Iterator<HoodieRecord<T>> recordItr) { return new HoodieMergeHandle<>(config, commitTime, this, recordItr, fileId); }
@Override protected void consumeOneRecord(GenericRecord record) { upsertHandle.write(record); }
private boolean writeUpdateRecord(HoodieRecord<T> hoodieRecord, Optional<IndexedRecord> indexedRecord) { if (indexedRecord.isPresent()) { updatedRecordsWritten++; } return writeRecord(hoodieRecord, indexedRecord); }
protected Iterator<List<WriteStatus>> handleUpdateInternal(HoodieMergeHandle upsertHandle, String commitTime, String fileId) throws IOException { if (upsertHandle.getOldFilePath() == null) { throw new HoodieUpsertException( "Error in finding the old file path at commit " + commitTime + " for fileId: " + fileId); } else { AvroReadSupport.setAvroReadSchema(getHadoopConf(), upsertHandle.getSchema()); BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null; try (ParquetReader<IndexedRecord> reader = AvroParquetReader.<IndexedRecord>builder(upsertHandle.getOldFilePath()) .withConf(getHadoopConf()).build()) { wrapper = new SparkBoundedInMemoryExecutor(config, new ParquetReaderIterator(reader), throw new HoodieException(e); } finally { upsertHandle.close(); if (null != wrapper) { wrapper.shutdownNow(); if (upsertHandle.getWriteStatus().getPartitionPath() == null) { logger.info("Upsert Handle has partition path as null " + upsertHandle.getOldFilePath() + ", " + upsertHandle.getWriteStatus()); return Collections.singletonList(Collections.singletonList(upsertHandle.getWriteStatus())) .iterator();
Optional<IndexedRecord> combinedAvroRecord = hoodieRecord.getData() .combineAndGetUpdateValue(oldRecord, schema); if (writeUpdateRecord(hoodieRecord, combinedAvroRecord)) { + getOldFilePath() + " to new file " + getStorageWriterPath(); try { storageWriter.writeAvro(key, oldRecord); } catch (ClassCastException e) { logger.error("Schema mismatch when rewriting old record " + oldRecord + " from file " + getOldFilePath() + " to file " + getStorageWriterPath() + " with schema " + schema .toString(true)); throw new HoodieUpsertException(errMsg, e); } catch (IOException e) { logger.error("Failed to merge old record into new file for key " + key + " from old file " + getOldFilePath() + " to new file " + getStorageWriterPath(), e); throw new HoodieUpsertException(errMsg, e);
if (!writtenRecordKeys.contains(key)) { HoodieRecord<T> hoodieRecord = keyToNewRecords.get(key); writeRecord(hoodieRecord, hoodieRecord.getData().getInsertValue(schema)); insertRecordsWritten++; writeStatus.getStat().setTotalWriteBytes(FSUtils.getFileSize(fs, getStorageWriterPath())); writeStatus.getStat().setNumWrites(recordsWritten); writeStatus.getStat().setNumDeletes(recordsDeleted);
newFilePath = new Path(config.getBasePath(), relativePath); if (config.shouldUseTempFolderForCopyOnWriteForMerge()) { this.tempPath = makeTempPath(partitionPath, TaskContext.getPartitionId(), fileId, TaskContext.get().stageId(), TaskContext.get().taskAttemptId()); getStorageWriterPath().toString())); .getStorageWriter(commitTime, getStorageWriterPath(), hoodieTable, config, schema); } catch (IOException io) { logger.error("Error in update task at commit " + commitTime, io);
public HoodieMergeHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable, Iterator<HoodieRecord<T>> recordItr, String fileId) { super(config, commitTime, hoodieTable); this.fileSystemView = hoodieTable.getROFileSystemView(); String partitionPath = init(fileId, recordItr); init(fileId, partitionPath, fileSystemView.getLatestDataFiles(partitionPath) .filter(dataFile -> dataFile.getFileId().equals(fileId)).findFirst()); }
protected Iterator<List<WriteStatus>> handleUpdateInternal(HoodieMergeHandle upsertHandle, String commitTime, String fileId) throws IOException { if (upsertHandle.getOldFilePath() == null) { throw new HoodieUpsertException( "Error in finding the old file path at commit " + commitTime + " for fileId: " + fileId); } else { AvroReadSupport.setAvroReadSchema(getHadoopConf(), upsertHandle.getSchema()); ParquetReader<IndexedRecord> reader = AvroParquetReader.builder(upsertHandle.getOldFilePath()) .withConf(getHadoopConf()).build(); BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null; } finally { reader.close(); upsertHandle.close(); if (null != wrapper) { wrapper.shutdownNow(); if (upsertHandle.getWriteStatus().getPartitionPath() == null) { logger.info("Upsert Handle has partition path as null " + upsertHandle.getOldFilePath() + ", " + upsertHandle.getWriteStatus()); return Collections.singletonList(Collections.singletonList(upsertHandle.getWriteStatus())) .iterator();
Optional<IndexedRecord> combinedAvroRecord = hoodieRecord.getData() .combineAndGetUpdateValue(oldRecord, schema); if (writeUpdateRecord(hoodieRecord, combinedAvroRecord)) { + getOldFilePath() + " to new file " + getStorageWriterPath(); try { storageWriter.writeAvro(key, oldRecord); } catch (ClassCastException e) { logger.error("Schema mismatch when rewriting old record " + oldRecord + " from file " + getOldFilePath() + " to file " + getStorageWriterPath() + " with schema " + schema .toString(true)); throw new HoodieUpsertException(errMsg, e); } catch (IOException e) { logger.error("Failed to merge old record into new file for key " + key + " from old file " + getOldFilePath() + " to new file " + getStorageWriterPath(), e); throw new HoodieUpsertException(errMsg, e);
@Override public WriteStatus close() { try { // write out any pending records (this can happen when inserts are turned into updates) for (String key : keyToNewRecords.keySet()) { if (!writtenRecordKeys.contains(key)) { HoodieRecord<T> hoodieRecord = keyToNewRecords.get(key); writeRecord(hoodieRecord, hoodieRecord.getData().getInsertValue(schema)); insertRecordsWritten++; } } keyToNewRecords.clear(); writtenRecordKeys.clear(); if (storageWriter != null) { storageWriter.close(); } writeStatus.getStat().setTotalWriteBytes(FSUtils.getFileSize(fs, getStorageWriterPath())); writeStatus.getStat().setNumWrites(recordsWritten); writeStatus.getStat().setNumDeletes(recordsDeleted); writeStatus.getStat().setNumUpdateWrites(updatedRecordsWritten); writeStatus.getStat().setNumInserts(insertRecordsWritten); writeStatus.getStat().setTotalWriteErrors(writeStatus.getFailedRecords().size()); RuntimeStats runtimeStats = new RuntimeStats(); runtimeStats.setTotalUpsertTime(timer.endTimer()); writeStatus.getStat().setRuntimeStats(runtimeStats); return writeStatus; } catch (IOException e) { throw new HoodieUpsertException("Failed to close UpdateHandle", e); } }
newFilePath = new Path(config.getBasePath(), relativePath); if (config.shouldUseTempFolderForCopyOnWriteForMerge()) { this.tempPath = makeTempPath(partitionPath, TaskContext.getPartitionId(), fileId, TaskContext.get().stageId(), TaskContext.get().taskAttemptId()); getStorageWriterPath().toString())); .getStorageWriter(commitTime, getStorageWriterPath(), hoodieTable, config, schema); } catch (IOException io) { logger.error("Error in update task at commit " + commitTime, io);
public HoodieMergeHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable, Map<String, HoodieRecord<T>> keyToNewRecords, String fileId, Optional<HoodieDataFile> dataFileToBeMerged) { super(config, commitTime, hoodieTable); this.fileSystemView = hoodieTable.getROFileSystemView(); this.keyToNewRecords = keyToNewRecords; init(fileId, keyToNewRecords.get(keyToNewRecords.keySet().stream().findFirst().get()) .getPartitionPath(), dataFileToBeMerged); }
public HoodieMergeHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable, Iterator<HoodieRecord<T>> recordItr, String fileId) { super(config, commitTime, hoodieTable); this.fileSystemView = hoodieTable.getROFileSystemView(); String partitionPath = init(fileId, recordItr); init(fileId, partitionPath, fileSystemView.getLatestDataFiles(partitionPath) .filter(dataFile -> dataFile.getFileId().equals(fileId)).findFirst()); }
protected HoodieMergeHandle getUpdateHandle(String commitTime, String fileId, Map<String, HoodieRecord<T>> keyToNewRecords, Optional<HoodieDataFile> dataFileToBeMerged) { return new HoodieMergeHandle<>(config, commitTime, this, keyToNewRecords, fileId, dataFileToBeMerged); }
@Override protected void consumeOneRecord(GenericRecord record) { upsertHandle.write(record); }
private boolean writeUpdateRecord(HoodieRecord<T> hoodieRecord, Optional<IndexedRecord> indexedRecord) { if (indexedRecord.isPresent()) { updatedRecordsWritten++; } return writeRecord(hoodieRecord, indexedRecord); }
public HoodieMergeHandle(HoodieWriteConfig config, String commitTime, HoodieTable<T> hoodieTable, Map<String, HoodieRecord<T>> keyToNewRecords, String fileId, Optional<HoodieDataFile> dataFileToBeMerged) { super(config, commitTime, hoodieTable); this.fileSystemView = hoodieTable.getROFileSystemView(); this.keyToNewRecords = keyToNewRecords; init(fileId, keyToNewRecords.get(keyToNewRecords.keySet().stream().findFirst().get()) .getPartitionPath(), dataFileToBeMerged); }
protected HoodieMergeHandle getUpdateHandle(String commitTime, String fileId, Iterator<HoodieRecord<T>> recordItr) { return new HoodieMergeHandle<>(config, commitTime, this, recordItr, fileId); }
protected HoodieMergeHandle getUpdateHandle(String commitTime, String fileId, Map<String, HoodieRecord<T>> keyToNewRecords, Optional<HoodieDataFile> dataFileToBeMerged) { return new HoodieMergeHandle<>(config, commitTime, this, keyToNewRecords, fileId, dataFileToBeMerged); }