public long fetchTotalUpdateRecordsWritten() { long totalUpdateRecordsWritten = 0; for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) { for (HoodieWriteStat stat : stats) { totalUpdateRecordsWritten += stat.getNumUpdateWrites(); } } return totalUpdateRecordsWritten; }
public long fetchTotalInsertRecordsWritten() { long totalInsertRecordsWritten = 0; for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) { for (HoodieWriteStat stat : stats) { if (stat.getPrevCommit() != null && stat.getPrevCommit().equals("null")) { totalInsertRecordsWritten += stat.getNumWrites(); } } } return totalInsertRecordsWritten; }
/** * Finalize the written data onto storage. Perform any final cleanups * * @param jsc Spark Context * @param stats List of HoodieWriteStats * @throws HoodieIOException if some paths can't be finalized on storage */ public void finalizeWrite(JavaSparkContext jsc, List<HoodieWriteStat> stats) throws HoodieIOException { if (config.isConsistencyCheckEnabled()) { List<String> pathsToCheck = stats.stream() .map(stat -> stat.getTempPath() != null ? stat.getTempPath() : stat.getPath()) .collect(Collectors.toList()); List<String> failingPaths = new ConsistencyCheck(config.getBasePath(), pathsToCheck, jsc, config.getFinalizeWriteParallelism()) .check(MAX_CONSISTENCY_CHECKS, INITIAL_CONSISTENCY_CHECK_INTERVAL_MS); if (failingPaths.size() > 0) { throw new HoodieIOException("Could not verify consistency of paths : " + failingPaths); } } } }
public HashMap<String, String> getFileIdAndRelativePaths() { HashMap<String, String> filePaths = new HashMap<>(); // list all partitions paths for (Map.Entry<String, List<HoodieWriteStat>> entry : getPartitionToWriteStats().entrySet()) { for (HoodieWriteStat stat : entry.getValue()) { filePaths.put(stat.getFileId(), stat.getPath()); } } return filePaths; }
/** * Performs actions to durably, persist the current changes and returns a WriteStatus object */ @Override public WriteStatus close() { logger.info("Closing the file " + writeStatus.getFileId() + " as we are done with all the records " + recordsWritten); try { storageWriter.close(); HoodieWriteStat stat = new HoodieWriteStat(); stat.setPartitionPath(writeStatus.getPartitionPath()); stat.setNumWrites(recordsWritten); stat.setNumDeletes(recordsDeleted); stat.setNumInserts(insertRecordsWritten); stat.setPrevCommit(HoodieWriteStat.NULL_COMMIT); stat.setFileId(writeStatus.getFileId()); stat.setPaths(new Path(config.getBasePath()), path, tempPath); stat.setTotalWriteBytes(FSUtils.getFileSize(fs, getStorageWriterPath())); stat.setTotalWriteErrors(writeStatus.getFailedRecords().size()); RuntimeStats runtimeStats = new RuntimeStats(); runtimeStats.setTotalCreateTime(timer.endTimer()); stat.setRuntimeStats(runtimeStats); writeStatus.setStat(stat); return writeStatus; } catch (IOException e) { throw new HoodieInsertException("Failed to close the Insert Handle for path " + path, e); } }
List<HoodieWriteStat> stats = entry.getValue(); for (HoodieWriteStat stat : stats) { rows.add(new Comparable[]{path, stat.getFileId(), stat.getPrevCommit(), stat.getNumUpdateWrites(), stat.getNumWrites(), stat.getTotalWriteBytes(), stat.getTotalWriteErrors()});
String partitionPath = stat.getPartitionPath(); HoodieRollingStat hoodieRollingStat = new HoodieRollingStat(stat.getFileId(), stat.getNumWrites() - (stat.getNumUpdateWrites() - stat.getNumDeletes()), stat.getNumUpdateWrites(), stat.getNumDeletes(), stat.getTotalWriteBytes()); rollingStatMetadata.addRollingStat(partitionPath, hoodieRollingStat);
@Override public WriteStatus close() { try { // flush any remaining records to disk doAppend(header); if (writer != null) { writer.close(); } writeStatus.getStat().setFileId(this.fileId); writeStatus.getStat().setNumWrites(recordsWritten); writeStatus.getStat().setNumUpdateWrites(updatedRecordsWritten); writeStatus.getStat().setNumInserts(insertRecordsWritten); writeStatus.getStat().setNumDeletes(recordsDeleted); writeStatus.getStat().setTotalWriteBytes(estimatedNumberOfBytesWritten); writeStatus.getStat().setTotalWriteErrors(writeStatus.getFailedRecords().size()); RuntimeStats runtimeStats = new RuntimeStats(); runtimeStats.setTotalUpsertTime(timer.endTimer()); writeStatus.getStat().setRuntimeStats(runtimeStats); return writeStatus; } catch (IOException e) { throw new HoodieUpsertException("Failed to close UpdateHandle", e); } }
public static List<HoodieWriteStat> generateFakeHoodieWriteStat(int limit) { List<HoodieWriteStat> writeStatList = new ArrayList<>(); for (int i = 0; i < limit; i++) { HoodieWriteStat writeStat = new HoodieWriteStat(); writeStat.setFileId(UUID.randomUUID().toString()); writeStat.setNumDeletes(0); writeStat.setNumUpdateWrites(100); writeStat.setNumWrites(100); writeStat.setPath("/some/fake/path" + i); writeStat.setPartitionPath("/some/fake/partition/path" + i); writeStat.setTotalLogFilesCompacted(100L); RuntimeStats runtimeStats = new RuntimeStats(); runtimeStats.setTotalScanTime(100); runtimeStats.setTotalCreateTime(100); runtimeStats.setTotalUpsertTime(100); writeStat.setRuntimeStats(runtimeStats); writeStatList.add(writeStat); } return writeStatList; } }
writeStatus.setStat(new HoodieWriteStat()); try { writeStatus.getStat().setPrevCommit(FSUtils.getCommitTime(latestValidFilePath)); writeStatus.getStat().setPartitionPath(partitionPath); writeStatus.getStat().setFileId(fileId); writeStatus.getStat().setPaths(new Path(config.getBasePath()), newFilePath, tempPath);
.filter(status -> status.getStat().getPrevCommit() != HoodieWriteStat.NULL_COMMIT).count() > 0); .map(status -> status.getStat().getNumWrites()).reduce((a,b) -> a + b).get(), 100); .map(status -> status.getStat().getNumUpdateWrites()).reduce((a,b) -> a + b).get(), 0); .map(status -> status.getStat().getNumInserts()).reduce((a,b) -> a + b).get(), 100); .filter(status -> status.getStat().getPrevCommit() == HoodieWriteStat.NULL_COMMIT).count(), 0); .map(status -> status.getStat().getNumWrites()).reduce((a,b) -> a + b).get(), 100); .map(status -> status.getStat().getNumUpdateWrites()).reduce((a,b) -> a + b).get(), 100); .map(status -> status.getStat().getNumInserts()).reduce((a,b) -> a + b).get(), 0); .filter(status -> status.getStat().getPrevCommit() == HoodieWriteStat.NULL_COMMIT).count(), 0); .map(status -> status.getStat().getNumWrites()).reduce((a,b) -> a + b).get(), 200); .map(status -> status.getStat().getNumUpdateWrites()).reduce((a,b) -> a + b).get(), 100); .map(status -> status.getStat().getNumInserts()).reduce((a,b) -> a + b).get(), 100);
WorkloadStat partitionStat = profile.getWorkloadStat(path.toString()); partitionStat.getUpdateLocationToCount().entrySet().stream().forEach(entry -> { HoodieWriteStat writeStat = new HoodieWriteStat(); writeStat.setFileId(entry.getKey()); writeStat.setPrevCommit(entry.getValue().getKey()); writeStat.setNumUpdateWrites(entry.getValue().getValue()); metadata.addWriteStat(path.toString(), writeStat); });
/** * {@link #updateSinkStat(Optional)} will compute {@link SinkStat} and persist changes into {@link IMetadataManager}. * As a part of {@link SinkStat} computation; it will compute avg record size for current run. * @param writesStatuses */ private void updateSinkStat(final Optional<JavaRDD<WriteStatus>> writesStatuses) { if (writesStatuses.isPresent()) { final LongAccumulator avgRecordSizeCounter = writesStatuses.get().rdd().sparkContext().longAccumulator(); writesStatuses.get().foreach( writeStatus -> { final long writeBytes = writeStatus.getStat().getTotalWriteBytes(); final long numInserts = writeStatus.getStat().getNumWrites() - writeStatus.getStat().getNumUpdateWrites(); if (writeBytes > 0 && numInserts > 0) { avgRecordSizeCounter.add(writeBytes / numInserts); } } ); final long avgRecordSize = (int) avgRecordSizeCounter.avg(); if (avgRecordSize > 0) { log.info("Updating Sink Stat manager : avgRecordSize : {}", avgRecordSize); this.sinkStatMgr.getCurrentStat().put(SinkStat.AVG_RECORD_SIZE, Long.toString(avgRecordSize)); } } this.sinkStatMgr.persist(); }
fileName, stageId, taskAttemptId)); HoodieWriteStat writeStat = new HoodieWriteStat(); writeStat.setPaths(basePath, finalizeFilePath, tempFilePath); assertEquals(finalizeFilePath, new Path(basePath, writeStat.getPath())); assertEquals(tempFilePath, new Path(basePath, writeStat.getTempPath())); writeStat = new HoodieWriteStat(); writeStat.setPaths(basePath, finalizeFilePath, null); assertEquals(finalizeFilePath, new Path(basePath, writeStat.getPath())); assertNull(writeStat.getTempPath());
private static List<HoodieWriteStat> createTestData(Path partPath, boolean isParquetSchemaSimple, String commitTime) throws IOException, URISyntaxException, InterruptedException { List<HoodieWriteStat> writeStats = Lists.newArrayList(); for (int i = 0; i < 5; i++) { // Create 5 files String fileId = UUID.randomUUID().toString(); Path filePath = new Path(partPath.toString() + "/" + FSUtils.makeDataFileName(commitTime, DEFAULT_TASK_PARTITIONID, fileId)); generateParquetData(filePath, isParquetSchemaSimple); HoodieWriteStat writeStat = new HoodieWriteStat(); writeStat.setFileId(fileId); writeStat.setPath(filePath.toString()); writeStats.add(writeStat); } return writeStats; }
.filter(wStat -> { return wStat != null && wStat.getPrevCommit() != HoodieWriteStat.NULL_COMMIT && wStat.getPrevCommit() != null && !deletedFiles.contains(wStat.getFileId()); }).forEach(wStat -> { HoodieLogFormat.Writer writer = null; String baseCommitTime = wStat.getPrevCommit(); if (hoodieIndex.isGlobal()) { baseCommitTime = fileIdToBaseCommitTimeForLogMap.get(wStat.getFileId()); .withFileId(wStat.getFileId()).overBaseCommit(baseCommitTime) .withFs(this.metaClient.getFs()) .withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
private static HoodieCommitMetadata createLogFiles( Map<String, List<HoodieWriteStat>> partitionWriteStats, boolean isLogSchemaSimple) throws InterruptedException, IOException, URISyntaxException { HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata(); for (Entry<String, List<HoodieWriteStat>> wEntry : partitionWriteStats.entrySet()) { String partitionPath = wEntry.getKey(); for (HoodieWriteStat wStat : wEntry.getValue()) { Path path = new Path(wStat.getPath()); HoodieDataFile dataFile = new HoodieDataFile(fileSystem.getFileStatus(path)); HoodieLogFile logFile = generateLogData(path, isLogSchemaSimple); HoodieDeltaWriteStat writeStat = new HoodieDeltaWriteStat(); writeStat.setFileId(dataFile.getFileId()); writeStat.setPath(logFile.getPath().toString()); commitMetadata.addWriteStat(partitionPath, writeStat); } } return commitMetadata; }
private WriteStatus getSampleWriteStatus(final int numInserts, final int numUpdateWrites) { final WriteStatus writeStatus = new WriteStatus(); HoodieWriteStat hoodieWriteStat = new HoodieWriteStat(); hoodieWriteStat.setNumInserts(numInserts); hoodieWriteStat.setNumUpdateWrites(numUpdateWrites); writeStatus.setStat(hoodieWriteStat); return writeStatus; }
logger.info("New InsertHandle for partition :" + partitionPath); writeStatus.getStat().setPrevCommit(baseInstantTime); writeStatus.setFileId(fileId); writeStatus.setPartitionPath(partitionPath); writeStatus.getStat().setPartitionPath(partitionPath); writeStatus.getStat().setFileId(fileId); averageRecordSize = SizeEstimator.estimate(record); try { writeStatus.getStat().setPath(path.toString()); doInit = false;
public long fetchTotalFilesUpdated() { long totalFilesUpdated = 0; for (List<HoodieWriteStat> stats : partitionToWriteStats.values()) { for (HoodieWriteStat stat : stats) { if (stat.getPrevCommit() != null && !stat.getPrevCommit().equals("null")) { totalFilesUpdated++; } } } return totalFilesUpdated; }