/** * Read schema from a data file from the last compaction commit done. */ @SuppressWarnings("OptionalUsedAsFieldOrParameterType") private MessageType readSchemaFromLastCompaction(Optional<HoodieInstant> lastCompactionCommitOpt) throws IOException { HoodieInstant lastCompactionCommit = lastCompactionCommitOpt.orElseThrow( () -> new HoodieHiveSyncException( "Could not read schema from last compaction, no compaction commits found on path " + syncConfig.basePath)); // Read from the compacted file wrote HoodieCommitMetadata compactionMetadata = HoodieCommitMetadata.fromBytes( activeTimeline.getInstantDetails(lastCompactionCommit).get(), HoodieCommitMetadata.class); String filePath = compactionMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values() .stream().findAny().orElseThrow(() -> new IllegalArgumentException( "Could not find any data file written for compaction " + lastCompactionCommit + ", could not get schema for dataset " + metaClient.getBasePath())); return readSchemaFromDataFile(new Path(filePath)); }
@SuppressWarnings("OptionalUsedAsFieldOrParameterType") List<String> getPartitionsWrittenToSince(Optional<String> lastCommitTimeSynced) { if (!lastCommitTimeSynced.isPresent()) { LOG.info("Last commit time synced is not known, listing all partitions"); try { return FSUtils.getAllPartitionPaths(fs, syncConfig.basePath, syncConfig.assumeDatePartitioning); } catch (IOException e) { throw new HoodieIOException("Failed to list all partitions in " + syncConfig.basePath, e); } } else { LOG.info("Last commit time synced is " + lastCommitTimeSynced.get() + ", Getting commits since then"); HoodieTimeline timelineToSync = activeTimeline.findInstantsAfter(lastCommitTimeSynced.get(), Integer.MAX_VALUE); return timelineToSync.getInstants().map(s -> { try { return HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(s).get(), HoodieCommitMetadata.class); } catch (IOException e) { throw new HoodieIOException( "Failed to get partitions written since " + lastCommitTimeSynced, e); } }).flatMap(s -> s.getPartitionToWriteStats().keySet().stream()).distinct() .collect(Collectors.toList()); } }
public static HashMap<String, String> getLatestFileIDsToFullPath(String basePath, HoodieTimeline commitTimeline, List<HoodieInstant> commitsToReturn) throws IOException { HashMap<String, String> fileIdToFullPath = new HashMap<>(); for (HoodieInstant commit : commitsToReturn) { HoodieCommitMetadata metadata = HoodieCommitMetadata.fromBytes(commitTimeline.getInstantDetails(commit).get(), HoodieCommitMetadata.class); fileIdToFullPath.putAll(metadata.getFileIdAndFullPaths(basePath)); } return fileIdToFullPath; }
metaClient.getCommitTimeline().getInstantDetails( new HoodieInstant(true, instant.getAction(), instant.getTimestamp())) .get(), HoodieCommitMetadata.class);
public static long countNewRecords(HoodieTableMetaClient target, List<String> commitsToCatchup) throws IOException { long totalNew = 0; HoodieTimeline timeline = target.getActiveTimeline().reload().getCommitTimeline().filterCompletedInstants(); for (String commit : commitsToCatchup) { HoodieCommitMetadata c = HoodieCommitMetadata.fromBytes( timeline.getInstantDetails(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commit)).get(), HoodieCommitMetadata.class); totalNew += c.fetchTotalRecordsWritten() - c.fetchTotalUpdateRecordsWritten(); } return totalNew; } }
@CliCommand(value = "cleans show", help = "Show the cleans") public String showCleans( @CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws IOException { HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline(); HoodieTimeline timeline = activeTimeline.getCleanerTimeline().filterCompletedInstants(); List<HoodieInstant> cleans = timeline.getInstants().collect(Collectors.toList()); List<Comparable[]> rows = new ArrayList<>(); Collections.reverse(cleans); for (int i = 0; i < cleans.size(); i++) { HoodieInstant clean = cleans.get(i); HoodieCleanMetadata cleanMetadata = AvroUtils .deserializeHoodieCleanMetadata(timeline.getInstantDetails(clean).get()); rows.add(new Comparable[]{clean.getTimestamp(), cleanMetadata.getEarliestCommitToRetain(), cleanMetadata.getTotalFilesDeleted(), cleanMetadata.getTimeTakenInMillis()}); } TableHeader header = new TableHeader() .addTableHeaderField("CleanTime") .addTableHeaderField("EarliestCommandRetained") .addTableHeaderField("Total Files Deleted") .addTableHeaderField("Total Time Taken"); return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows); }
metaClient.getCommitTimeline().getInstantDetails( new HoodieInstant(true, instant.getAction(), instant.getTimestamp())) .get(), HoodieCommitMetadata.class);
if (lastCommit.isPresent()) { HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes( commitTimelineOpt.get().getInstantDetails(lastCommit.get()).get(), HoodieCommitMetadata.class); if (commitMetadata.getMetadata(CHECKPOINT_KEY) != null) { resumeCheckpointStr = Optional.of(commitMetadata.getMetadata(CHECKPOINT_KEY));
case HoodieTimeline.CLEAN_ACTION: { archivedMetaWrapper.setHoodieCleanMetadata(AvroUtils .deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieCleanMetadata.class)); archivedMetaWrapper.setActionType(ActionType.clean.name()); .fromBytes(commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieCommitMetadata.class); archivedMetaWrapper.setHoodieCommitMetadata(commitMetadataConverter(commitMetadata)); archivedMetaWrapper.setActionType(ActionType.commit.name()); .deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieRollbackMetadata.class)); archivedMetaWrapper.setActionType(ActionType.rollback.name()); .deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieSavepointMetadata.class)); archivedMetaWrapper.setActionType(ActionType.savepoint.name()); .fromBytes(commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieCommitMetadata.class); archivedMetaWrapper.setHoodieCommitMetadata(commitMetadataConverter(commitMetadata)); archivedMetaWrapper.setActionType(ActionType.commit.name());
case HoodieTimeline.CLEAN_ACTION: { archivedMetaWrapper.setHoodieCleanMetadata(AvroUtils .deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieCleanMetadata.class)); archivedMetaWrapper.setActionType(ActionType.clean.name()); .fromBytes(commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieCommitMetadata.class); archivedMetaWrapper.setHoodieCommitMetadata(commitMetadataConverter(commitMetadata)); archivedMetaWrapper.setActionType(ActionType.commit.name()); .deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieRollbackMetadata.class)); archivedMetaWrapper.setActionType(ActionType.rollback.name()); .deserializeAvroMetadata(commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieSavepointMetadata.class)); archivedMetaWrapper.setActionType(ActionType.savepoint.name()); .fromBytes(commitTimeline.getInstantDetails(hoodieInstant).get(), HoodieCommitMetadata.class); archivedMetaWrapper.setHoodieCommitMetadata(commitMetadataConverter(commitMetadata)); archivedMetaWrapper.setActionType(ActionType.commit.name());
() -> new InvalidDatasetException(syncConfig.basePath)); HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes( activeTimeline.getInstantDetails(lastCommit).get(), HoodieCommitMetadata.class); String filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values() .stream().findAny().orElseThrow(() -> new IllegalArgumentException( activeTimeline.getInstantDetails(lastDeltaInstant).get(), HoodieCommitMetadata.class); Pair<String, HoodieFileFormat> filePathWithFormat = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values()
if (lastCommit.isPresent()) { HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes( commitTimelineOpt.get().getInstantDetails(lastCommit.get()).get(), HoodieCommitMetadata.class); if (commitMetadata.getMetadata(CHECKPOINT_KEY) != null) { resumeCheckpointStr = Optional.of(commitMetadata.getMetadata(CHECKPOINT_KEY));
timeline.getInstantDetails(cleanInstant).get()); List<Comparable[]> rows = new ArrayList<>(); for (Map.Entry<String, HoodieCleanPartitionMetadata> entry : cleanMetadata.getPartitionMetadata().entrySet()) {
for (int i = 0; i < commits.size(); i++) { HoodieInstant commit = commits.get(i); HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(commit).get(), HoodieCommitMetadata.class); rows.add(new Comparable[]{commit.getTimestamp(),
@Test public void checkCommitTimeline() throws IOException { HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline(); HoodieTimeline activeCommitTimeline = activeTimeline.getCommitTimeline(); assertTrue("Should be empty commit timeline", activeCommitTimeline.empty()); HoodieInstant instant = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1"); activeTimeline.createInflight(instant); activeTimeline.saveAsComplete(instant, Optional.of("test-detail".getBytes())); // Commit timeline should not auto-reload every time getActiveCommitTimeline(), it should be cached activeTimeline = metaClient.getActiveTimeline(); activeCommitTimeline = activeTimeline.getCommitTimeline(); assertTrue("Should be empty commit timeline", activeCommitTimeline.empty()); HoodieInstant completedInstant = HoodieTimeline.getCompletedInstant(instant); activeTimeline = activeTimeline.reload(); activeCommitTimeline = activeTimeline.getCommitTimeline(); assertFalse("Should be the 1 commit we made", activeCommitTimeline.empty()); assertEquals("Commit should be 1", completedInstant, activeCommitTimeline.getInstants().findFirst().get()); assertArrayEquals("Commit value should be \"test-detail\"", "test-detail".getBytes(), activeCommitTimeline.getInstantDetails(completedInstant).get()); }
for (HoodieInstant entry : timeline.getInstants().collect(Collectors.toList())) { HoodieCommitMetadata commitMetadata = HoodieCommitMetadata .fromBytes(timeline.getInstantDetails(entry).get(), HoodieCommitMetadata.class);
HoodieTimeline commitTimeline = metaClient.getCommitTimeline().filterCompletedInstants(); HoodieCommitMetadata commitMetadata = HoodieCommitMetadata .fromBytes(commitTimeline.getInstantDetails(commitInstant).get(), HoodieCommitMetadata.class); String basePath = table.getMetaClient().getBasePath(); Collection<String> commitPathNames = commitMetadata.getFileIdAndFullPaths(basePath).values();