/** * Get the list of savepoints in this table */ public List<String> getSavepoints() { return getCompletedSavepointTimeline().getInstants().map(HoodieInstant::getTimestamp) .collect(Collectors.toList()); }
/** * Get the list of savepoints in this table */ public List<String> getSavepoints() { return getCompletedSavepointTimeline().getInstants().map(HoodieInstant::getTimestamp) .collect(Collectors.toList()); }
/** * Get a list of instant times that have occurred, from the given instant timestamp. */ public static List<String> listCommitsSince(FileSystem fs, String basePath, String instantTimestamp) { HoodieTimeline timeline = allCompletedCommitsCompactions(fs, basePath); return timeline.findInstantsAfter(instantTimestamp, Integer.MAX_VALUE).getInstants() .map(HoodieInstant::getTimestamp).collect(Collectors.toList()); }
/** * Get a list of instant times that have occurred, from the given instant timestamp. */ public static List<String> listCommitsSince(FileSystem fs, String basePath, String instantTimestamp) { HoodieTimeline timeline = allCompletedCommitsCompactions(fs, basePath); return timeline.findInstantsAfter(instantTimestamp, Integer.MAX_VALUE).getInstants() .map(HoodieInstant::getTimestamp).collect(Collectors.toList()); }
private String getLastCommitTimePulled(FileSystem fs, String sourceTableLocation) throws IOException { HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs.getConf(), sourceTableLocation); List<String> commitsToSync = metadata.getActiveTimeline().getCommitsTimeline() .filterCompletedInstants() .findInstantsAfter(config.fromCommitTime, config.maxCommits) .getInstants().map(HoodieInstant::getTimestamp) .collect(Collectors.toList()); if (commitsToSync.isEmpty()) { log.warn("Nothing to sync. All commits in " + config.sourceTable + " are " + metadata.getActiveTimeline().getCommitsTimeline().filterCompletedInstants() .getInstants().collect(Collectors.toList()) + " and from commit time is " + config.fromCommitTime); return null; } log.info("Syncing commits " + commitsToSync); return commitsToSync.get(commitsToSync.size() - 1); }
@CliCommand(value = "savepoints show", help = "Show the savepoints") public String showSavepoints() throws IOException { HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline(); HoodieTimeline timeline = activeTimeline.getSavePointTimeline().filterCompletedInstants(); List<HoodieInstant> commits = timeline.getInstants().collect(Collectors.toList()); String[][] rows = new String[commits.size()][]; Collections.reverse(commits); for (int i = 0; i < commits.size(); i++) { HoodieInstant commit = commits.get(i); rows[i] = new String[] {commit.getTimestamp()}; } return HoodiePrintHelper.print(new String[] {"SavepointTime"}, rows); }
private String getLastCommitTimePulled(FileSystem fs, String sourceTableLocation) throws IOException { HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs.getConf(), sourceTableLocation); List<String> commitsToSync = metadata.getActiveTimeline().getCommitsTimeline() .filterCompletedInstants() .findInstantsAfter(config.fromCommitTime, config.maxCommits) .getInstants().map(HoodieInstant::getTimestamp) .collect(Collectors.toList()); if (commitsToSync.isEmpty()) { log.warn("Nothing to sync. All commits in " + config.sourceTable + " are " + metadata.getActiveTimeline().getCommitsTimeline().filterCompletedInstants() .getInstants().collect(Collectors.toList()) + " and from commit time is " + config.fromCommitTime); return null; } log.info("Syncing commits " + commitsToSync); return commitsToSync.get(commitsToSync.size() - 1); }
/** * Create a file system view, as of the given timeline */ public HoodieTableFileSystemView(HoodieTableMetaClient metaClient, HoodieTimeline visibleActiveTimeline) { this.metaClient = metaClient; this.visibleActiveTimeline = visibleActiveTimeline; this.fileGroupMap = new HashMap<>(); this.partitionToFileGroupsMap = new HashMap<>(); // Build fileId to Pending Compaction Instants List<HoodieInstant> pendingCompactionInstants = metaClient.getActiveTimeline().filterPendingCompactionTimeline().getInstants().collect(Collectors.toList()); this.fileIdToPendingCompaction = ImmutableMap.copyOf( CompactionUtils.getAllPendingCompactionOperations(metaClient).entrySet().stream() .map(entry -> Pair.of(entry.getKey(), Pair.of(entry.getValue().getKey(), CompactionOperation.convertFromAvroRecordInstance(entry.getValue().getValue())))) .collect(Collectors.toMap(Pair::getKey, Pair::getValue))); }
@SuppressWarnings("OptionalUsedAsFieldOrParameterType") List<String> getPartitionsWrittenToSince(Optional<String> lastCommitTimeSynced) { if (!lastCommitTimeSynced.isPresent()) { LOG.info("Last commit time synced is not known, listing all partitions"); try { return FSUtils.getAllPartitionPaths(fs, syncConfig.basePath, syncConfig.assumeDatePartitioning); } catch (IOException e) { throw new HoodieIOException("Failed to list all partitions in " + syncConfig.basePath, e); } } else { LOG.info("Last commit time synced is " + lastCommitTimeSynced.get() + ", Getting commits since then"); HoodieTimeline timelineToSync = activeTimeline.findInstantsAfter(lastCommitTimeSynced.get(), Integer.MAX_VALUE); return timelineToSync.getInstants().map(s -> { try { return HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(s).get(), HoodieCommitMetadata.class); } catch (IOException e) { throw new HoodieIOException( "Failed to get partitions written since " + lastCommitTimeSynced, e); } }).flatMap(s -> s.getPartitionToWriteStats().keySet().stream()).distinct() .collect(Collectors.toList()); } }
/** * Get all pending compaction plans along with their instants * * @param metaClient Hoodie Meta Client */ public static List<Pair<HoodieInstant, HoodieCompactionPlan>> getAllPendingCompactionPlans( HoodieTableMetaClient metaClient) { List<HoodieInstant> pendingCompactionInstants = metaClient.getActiveTimeline().filterPendingCompactionTimeline().getInstants().collect(Collectors.toList()); return pendingCompactionInstants.stream().map(instant -> { try { return Pair.of(instant, getCompactionPlan(metaClient, instant.getTimestamp())); } catch (IOException e) { throw new HoodieException(e); } }).collect(Collectors.toList()); }
/** * Cleanup all inflight commits */ private void rollbackInflightCommits() { HoodieTable<T> table = HoodieTable.getHoodieTable( new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true), config, jsc); HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterInflightsExcludingCompaction(); List<String> commits = inflightTimeline.getInstants().map(HoodieInstant::getTimestamp) .collect(Collectors.toList()); Collections.reverse(commits); for (String commit : commits) { rollback(commit); } }
@CliCommand(value = "commits compare", help = "Compare commits with another Hoodie dataset") public String compareCommits(@CliOption(key = {"path"}, help = "Path of the dataset to compare to") final String path) throws Exception { HoodieTableMetaClient target = new HoodieTableMetaClient(HoodieCLI.conf, path); HoodieTimeline targetTimeline = target.getActiveTimeline().getCommitsTimeline().filterCompletedInstants(); HoodieTableMetaClient source = HoodieCLI.tableMetadata; HoodieTimeline sourceTimeline = source.getActiveTimeline().getCommitsTimeline().filterCompletedInstants(); String targetLatestCommit = targetTimeline.getInstants().iterator().hasNext() ? "0" : targetTimeline.lastInstant().get().getTimestamp(); String sourceLatestCommit = sourceTimeline.getInstants().iterator().hasNext() ? "0" : sourceTimeline.lastInstant().get().getTimestamp(); if (sourceLatestCommit != null && HoodieTimeline.compareTimestamps(targetLatestCommit, sourceLatestCommit, HoodieTimeline.GREATER)) { // source is behind the target List<String> commitsToCatchup = targetTimeline.findInstantsAfter(sourceLatestCommit, Integer.MAX_VALUE) .getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList()); return "Source " + source.getTableConfig().getTableName() + " is behind by " + commitsToCatchup.size() + " commits. Commits to catch up - " + commitsToCatchup; } else { List<String> commitsToCatchup = sourceTimeline.findInstantsAfter(targetLatestCommit, Integer.MAX_VALUE) .getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList()); return "Source " + source.getTableConfig().getTableName() + " is ahead by " + commitsToCatchup.size() + " commits. Commits to catch up - " + commitsToCatchup; } }
/** * Cleanup all inflight commits */ private void rollbackInflightCommits() { HoodieTable<T> table = HoodieTable.getHoodieTable( new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true), config, jsc); HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterInflightsExcludingCompaction(); List<String> commits = inflightTimeline.getInstants().map(HoodieInstant::getTimestamp) .collect(Collectors.toList()); Collections.reverse(commits); for (String commit : commits) { rollback(commit); } }
String actionType = metaClient.getCommitActionType(); HoodieActiveTimeline activeTimeline = this.getActiveTimeline(); List<String> inflights = this.getInflightCommitTimeline().getInstants() .map(HoodieInstant::getTimestamp).collect(Collectors.toList());
/** * Obtain all new data written into the Hoodie dataset since the given timestamp. */ public static Dataset<Row> readSince(String basePath, SQLContext sqlContext, HoodieTimeline commitTimeline, String lastCommitTime) { List<HoodieInstant> commitsToReturn = commitTimeline.findInstantsAfter(lastCommitTime, Integer.MAX_VALUE) .getInstants().collect(Collectors.toList()); try { // Go over the commit metadata, and obtain the new files that need to be read. HashMap<String, String> fileIdToFullPath = getLatestFileIDsToFullPath(basePath, commitTimeline, commitsToReturn); return sqlContext.read().parquet(fileIdToFullPath.values().toArray(new String[fileIdToFullPath.size()])) .filter(String.format("%s >'%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, lastCommitTime)); } catch (IOException e) { throw new HoodieException("Error pulling data incrementally from commitTimestamp :" + lastCommitTime, e); } }
@CliCommand(value = "cleans show", help = "Show the cleans") public String showCleans( @CliOption(key = {"limit"}, help = "Limit commits", unspecifiedDefaultValue = "-1") final Integer limit, @CliOption(key = {"sortBy"}, help = "Sorting Field", unspecifiedDefaultValue = "") final String sortByField, @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending, @CliOption(key = { "headeronly"}, help = "Print Header Only", unspecifiedDefaultValue = "false") final boolean headerOnly) throws IOException { HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline(); HoodieTimeline timeline = activeTimeline.getCleanerTimeline().filterCompletedInstants(); List<HoodieInstant> cleans = timeline.getInstants().collect(Collectors.toList()); List<Comparable[]> rows = new ArrayList<>(); Collections.reverse(cleans); for (int i = 0; i < cleans.size(); i++) { HoodieInstant clean = cleans.get(i); HoodieCleanMetadata cleanMetadata = AvroUtils .deserializeHoodieCleanMetadata(timeline.getInstantDetails(clean).get()); rows.add(new Comparable[]{clean.getTimestamp(), cleanMetadata.getEarliestCommitToRetain(), cleanMetadata.getTotalFilesDeleted(), cleanMetadata.getTimeTakenInMillis()}); } TableHeader header = new TableHeader() .addTableHeaderField("CleanTime") .addTableHeaderField("EarliestCommandRetained") .addTableHeaderField("Total Files Deleted") .addTableHeaderField("Total Time Taken"); return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending, limit, headerOnly, rows); }
private void moveCompactionFromRequestedToInflight(String compactionInstantTime, HoodieWriteClient client, HoodieWriteConfig cfg) throws IOException { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath()); HoodieInstant compactionInstant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime); HoodieCompactionPlan workload = AvroUtils.deserializeCompactionPlan( metaClient.getActiveTimeline().getInstantAuxiliaryDetails(compactionInstant).get()); metaClient.getActiveTimeline().transitionCompactionRequestedToInflight(compactionInstant); HoodieInstant instant = metaClient.getActiveTimeline().reload().filterPendingCompactionTimeline().getInstants() .filter(in -> in.getTimestamp().equals(compactionInstantTime)).findAny().get(); assertTrue("Instant must be marked inflight", instant.isInflight()); }
HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream.of(instant1Complete, instant2Complete, instant3Complete, instant4Complete, instant5), timeline.getCommitTimeline().getInstants()); HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream.of(instant1Complete, instant2Complete, instant3Complete, instant4Complete), timeline.getCommitTimeline().filterCompletedInstants().getInstants()); HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream.of(instant5), timeline.getCommitTimeline().filterInflightsExcludingCompaction().getInstants());
@Test public void checkCommitTimeline() throws IOException { HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline(); HoodieTimeline activeCommitTimeline = activeTimeline.getCommitTimeline(); assertTrue("Should be empty commit timeline", activeCommitTimeline.empty()); HoodieInstant instant = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1"); activeTimeline.createInflight(instant); activeTimeline.saveAsComplete(instant, Optional.of("test-detail".getBytes())); // Commit timeline should not auto-reload every time getActiveCommitTimeline(), it should be cached activeTimeline = metaClient.getActiveTimeline(); activeCommitTimeline = activeTimeline.getCommitTimeline(); assertTrue("Should be empty commit timeline", activeCommitTimeline.empty()); HoodieInstant completedInstant = HoodieTimeline.getCompletedInstant(instant); activeTimeline = activeTimeline.reload(); activeCommitTimeline = activeTimeline.getCommitTimeline(); assertFalse("Should be the 1 commit we made", activeCommitTimeline.empty()); assertEquals("Commit should be 1", completedInstant, activeCommitTimeline.getInstants().findFirst().get()); assertArrayEquals("Commit value should be \"test-detail\"", "test-detail".getBytes(), activeCommitTimeline.getInstantDetails(completedInstant).get()); }
@Test public void testTimelineOperations() throws Exception { timeline = new MockHoodieTimeline(Stream.of("01", "03", "05", "07", "09", "11", "13", "15", "17", "19"), Stream.of("21", "23")); HoodieTestUtils.assertStreamEquals("", Stream.of("05", "07", "09", "11"), timeline.getCommitTimeline().filterCompletedInstants().findInstantsInRange("04", "11").getInstants() .map(HoodieInstant::getTimestamp)); HoodieTestUtils.assertStreamEquals("", Stream.of("09", "11"), timeline.getCommitTimeline().filterCompletedInstants().findInstantsAfter("07", 2).getInstants() .map(HoodieInstant::getTimestamp)); assertFalse(timeline.empty()); assertFalse(timeline.getCommitTimeline().filterInflightsExcludingCompaction().empty()); assertEquals("", 12, timeline.countInstants()); HoodieTimeline activeCommitTimeline = timeline.getCommitTimeline().filterCompletedInstants(); assertEquals("", 10, activeCommitTimeline.countInstants()); assertEquals("", "01", activeCommitTimeline.firstInstant().get().getTimestamp()); assertEquals("", "11", activeCommitTimeline.nthInstant(5).get().getTimestamp()); assertEquals("", "19", activeCommitTimeline.lastInstant().get().getTimestamp()); assertEquals("", "09", activeCommitTimeline.nthFromLastInstant(5).get().getTimestamp()); assertTrue("", activeCommitTimeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, "09"))); assertFalse("", activeCommitTimeline.isBeforeTimelineStarts("02")); assertTrue("", activeCommitTimeline.isBeforeTimelineStarts("00")); } }