@Override public String toString() { return "HoodieArchivedLogFile {" + super.getPath() + '}'; } }
private void createNewFile() throws IOException { this.output = fs.create(this.logFile.getPath(), false, bufferSize, replication, WriterBuilder.DEFAULT_SIZE_THRESHOLD, null); }
/** * Rename log files. This is done for un-scheduling a pending compaction operation NOTE: Can only be used safely when * no writer (ingestion/compaction) is running. * * @param metaClient Hoodie Table Meta-Client * @param oldLogFile Old Log File * @param newLogFile New Log File */ protected static void renameLogFile(HoodieTableMetaClient metaClient, HoodieLogFile oldLogFile, HoodieLogFile newLogFile) throws IOException { FileStatus[] statuses = metaClient.getFs().listStatus(oldLogFile.getPath()); Preconditions.checkArgument(statuses.length == 1, "Only one status must be present"); Preconditions.checkArgument(statuses[0].isFile(), "Source File must exist"); Preconditions.checkArgument(oldLogFile.getPath().getParent().equals(newLogFile.getPath().getParent()), "Log file must only be moved within the parent directory"); metaClient.getFs().rename(oldLogFile.getPath(), newLogFile.getPath()); }
public CompactionOperation(java.util.Optional<HoodieDataFile> dataFile, String partitionPath, List<HoodieLogFile> logFiles, Map<String, Double> metrics) { if (dataFile.isPresent()) { this.baseInstantTime = dataFile.get().getCommitTime(); this.dataFilePath = Optional.of(dataFile.get().getPath()); this.fileId = dataFile.get().getFileId(); this.dataFileCommitTime = Optional.of(dataFile.get().getCommitTime()); } else { assert logFiles.size() > 0; this.dataFilePath = Optional.absent(); this.baseInstantTime = FSUtils.getBaseCommitTimeFromLogPath(logFiles.get(0).getPath()); this.fileId = FSUtils.getFileIdFromLogPath(logFiles.get(0).getPath()); this.dataFileCommitTime = Optional.absent(); } this.partitionPath = partitionPath; this.deltaFilePaths = logFiles.stream().map(s -> s.getPath().toString()) .collect(Collectors.toList()); this.metrics = metrics; }
private void doAppend(Map<HoodieLogBlock.HeaderMetadataType, String> header) { try { header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, commitTime); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString()); if (recordList.size() > 0) { writer = writer.appendBlock(new HoodieAvroDataBlock(recordList, header)); recordList.clear(); } if (keysToDelete.size() > 0) { writer = writer.appendBlock( new HoodieDeleteBlock(keysToDelete.stream().toArray(HoodieKey[]::new), header)); keysToDelete.clear(); } } catch (Exception e) { throw new HoodieAppendException( "Failed while appending records to " + currentLogFile.getPath(), e); } }
HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, int bufferSize, boolean readBlockLazily, boolean reverseReader) throws IOException { this.inputStream = new FSDataInputStream( new BufferedFSInputStream((FSInputStream) fs.open(logFile.getPath(), bufferSize).getWrappedStream(), bufferSize)); this.logFile = logFile; this.readerSchema = readerSchema; this.readBlockLazily = readBlockLazily; this.reverseReader = reverseReader; if (this.reverseReader) { this.reverseLogFilePosition = this.lastReverseLogFilePosition = fs .getFileStatus(logFile.getPath()).getLen(); } addShutDownHook(); }
private void doAppend(Map<HoodieLogBlock.HeaderMetadataType, String> header) { try { header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, commitTime); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString()); if (recordList.size() > 0) { writer = writer.appendBlock(new HoodieAvroDataBlock(recordList, header)); recordList.clear(); } if (keysToDelete.size() > 0) { writer = writer.appendBlock( new HoodieDeleteBlock(keysToDelete.stream().toArray(String[]::new), header)); keysToDelete.clear(); } } catch (Exception e) { throw new HoodieAppendException( "Failed while appending records to " + currentLogFile.getPath(), e); } }
Preconditions.checkArgument(lf.getLogVersion() - maxVersion > 0, "Expect new log version to be sane"); HoodieLogFile newLogFile = new HoodieLogFile(new Path(lf.getPath().getParent(), FSUtils.makeLogFileName(lf.getFileId(), "." + FSUtils.getFileExtensionFromLog(lf.getPath()), compactionInstant, lf.getLogVersion() - maxVersion))); return Pair.of(lf, newLogFile);
/** * Generate compaction operation from file-slice * * @param partitionPath Partition path * @param fileSlice File Slice * @param metricsCaptureFunction Metrics Capture function * @return Compaction Operation */ public static HoodieCompactionOperation buildFromFileSlice(String partitionPath, FileSlice fileSlice, Optional<Function<Pair<String, FileSlice>, Map<String, Double>>> metricsCaptureFunction) { HoodieCompactionOperation.Builder builder = HoodieCompactionOperation.newBuilder(); builder.setPartitionPath(partitionPath); builder.setFileId(fileSlice.getFileId()); builder.setBaseInstantTime(fileSlice.getBaseInstantTime()); builder.setDeltaFilePaths(fileSlice.getLogFiles().map(lf -> lf.getPath().toString()).collect(Collectors.toList())); if (fileSlice.getDataFile().isPresent()) { builder.setDataFilePath(fileSlice.getDataFile().get().getPath()); } if (metricsCaptureFunction.isPresent()) { builder.setMetrics(metricsCaptureFunction.get().apply(Pair.of(partitionPath, fileSlice))); } return builder.build(); }
private List<HoodieCompactionOperation> createCompactionOperations(HoodieWriteConfig config, Map<Long, List<Long>> sizesMap, Map<Long, String> keyToPartitionMap) { List<HoodieCompactionOperation> operations = Lists.newArrayList(sizesMap.size()); sizesMap.forEach((k, v) -> { HoodieDataFile df = TestHoodieDataFile.newDataFile(k); String partitionPath = keyToPartitionMap.get(k); List<HoodieLogFile> logFiles = v.stream().map(TestHoodieLogFile::newLogFile).collect(Collectors.toList()); operations.add(new HoodieCompactionOperation(df.getCommitTime(), logFiles.stream().map(s -> s.getPath().toString()).collect(Collectors.toList()), df.getPath(), df.getFileId(), partitionPath, config.getCompactionStrategy().captureMetrics(config, Optional.of(df), partitionPath, logFiles))); }); return operations; } }
}).collect(Collectors.toList()); Map<String, String> renameFilesFromUndo = undoFiles.stream().collect(Collectors.toMap(p -> p.getRight().getPath().toString(), x -> x.getLeft().getPath().toString())); Map<String, String> expRenameFiles = renameFiles.stream().collect(Collectors.toMap(p -> p.getLeft().getPath().toString(), x -> x.getRight().getPath().toString())); if (expNumRepairs > 0) { Assert.assertFalse("Rename Files must be non-empty", renameFiles.isEmpty());
private static HoodieCommitMetadata createLogFiles( Map<String, List<HoodieWriteStat>> partitionWriteStats, boolean isLogSchemaSimple) throws InterruptedException, IOException, URISyntaxException { HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata(); for (Entry<String, List<HoodieWriteStat>> wEntry : partitionWriteStats.entrySet()) { String partitionPath = wEntry.getKey(); for (HoodieWriteStat wStat : wEntry.getValue()) { Path path = new Path(wStat.getPath()); HoodieDataFile dataFile = new HoodieDataFile(fileSystem.getFileStatus(path)); HoodieLogFile logFile = generateLogData(path, isLogSchemaSimple); HoodieDeltaWriteStat writeStat = new HoodieDeltaWriteStat(); writeStat.setFileId(dataFile.getFileId()); writeStat.setPath(logFile.getPath().toString()); commitMetadata.addWriteStat(partitionPath, writeStat); } } return commitMetadata; }
/** * Validates if generated compaction operation matches with input file slice and partition path * * @param slice File Slice * @param op HoodieCompactionOperation * @param expPartitionPath Partition path */ private void testFileSliceCompactionOpEquality(FileSlice slice, HoodieCompactionOperation op, String expPartitionPath) { Assert.assertEquals("Partition path is correct", expPartitionPath, op.getPartitionPath()); Assert.assertEquals("Same base-instant", slice.getBaseInstantTime(), op.getBaseInstantTime()); Assert.assertEquals("Same file-id", slice.getFileId(), op.getFileId()); if (slice.getDataFile().isPresent()) { Assert.assertEquals("Same data-file", slice.getDataFile().get().getPath(), op.getDataFilePath()); } List<String> paths = slice.getLogFiles().map(l -> l.getPath().toString()).collect(Collectors.toList()); IntStream.range(0, paths.size()).boxed().forEach(idx -> { Assert.assertEquals("Log File Index " + idx, paths.get(idx), op.getDeltaFilePaths().get(idx)); }); Assert.assertEquals("Metrics set", metrics, op.getMetrics()); } }
"100").map(s -> s.getPath().toString()).collect(Collectors.toList());
@Test public void testBasicAppend() throws IOException, InterruptedException, URISyntaxException { Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath) .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1") .overBaseCommit("100").withFs(fs).build(); List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100); Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap(); header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100"); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header); writer = writer.appendBlock(dataBlock); long size = writer.getCurrentSize(); assertTrue("We just wrote a block - size should be > 0", size > 0); assertEquals("Write should be auto-flushed. The size reported by FileStatus and the writer should match", size, fs.getFileStatus(writer.getLogFile().getPath()).getLen()); writer.close(); }
@Test public void testAvroLogRecordReaderWithInvalidRollback() throws IOException, URISyntaxException, InterruptedException { Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema()); // Set a small threshold so that every block is a new version Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath) .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1") .overBaseCommit("100").withFs(fs).build(); // Write 1 List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100); Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap(); header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100"); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString()); HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records1, header); writer = writer.appendBlock(dataBlock); // Write invalid rollback for a failed write (possible for in-flight commits) header.put(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME, "101"); header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE, String.valueOf(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK.ordinal())); HoodieCommandBlock commandBlock = new HoodieCommandBlock(header); writer = writer.appendBlock(commandBlock); List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100").map(s -> s.getPath().toString()).collect(Collectors.toList()); HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "100", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH); assertEquals("We still would read 100 records", 100, scanner.getTotalLogRecords()); final List<String> readKeys = new ArrayList<>(100); scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey())); assertEquals("Stream collect should return all 150 records", 100, readKeys.size()); }
@Test public void testLeaseRecovery() throws IOException, URISyntaxException, InterruptedException { Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath) .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1") .overBaseCommit("100").withFs(fs).build(); List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100); Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap(); header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100"); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header); writer = writer.appendBlock(dataBlock); long size1 = writer.getCurrentSize(); // do not close this writer - this simulates a data note appending to a log dying without closing the file // writer.close(); writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath) .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100") .withFs(fs).build(); records = SchemaTestUtil.generateTestRecords(0, 100); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); dataBlock = new HoodieAvroDataBlock(records, header); writer = writer.appendBlock(dataBlock); long size2 = writer.getCurrentSize(); assertTrue("We just wrote a new block - size2 should be > size1", size2 > size1); assertEquals("Write should be auto-flushed. The size reported by FileStatus and the writer should match", size2, fs.getFileStatus(writer.getLogFile().getPath()).getLen()); writer.close(); }