/** * Get the commit timeline visible for this table */ public HoodieTimeline getCommitsTimeline() { switch (this.getTableType()) { case COPY_ON_WRITE: return getActiveTimeline().getCommitTimeline(); case MERGE_ON_READ: // We need to include the parquet files written out in delta commits // Include commit action to be able to start doing a MOR over a COW dataset - no // migration required return getActiveTimeline().getCommitsTimeline(); default: throw new HoodieException("Unsupported table type :" + this.getTableType()); } }
/** * Get the commit + pending-compaction timeline visible for this table. * A RT filesystem view is constructed with this timeline so that file-slice after pending compaction-requested * instant-time is also considered valid. A RT file-system view for reading must then merge the file-slices before * and after pending compaction instant so that all delta-commits are read. */ public HoodieTimeline getCommitsAndCompactionTimeline() { switch (this.getTableType()) { case COPY_ON_WRITE: return getActiveTimeline().getCommitTimeline(); case MERGE_ON_READ: return getActiveTimeline().getCommitsAndCompactionTimeline(); default: throw new HoodieException("Unsupported table type :" + this.getTableType()); } }
@Override protected HoodieRollingStatMetadata getRollingStats() { try { Optional<HoodieInstant> lastInstant = this.getActiveTimeline().getDeltaCommitTimeline().filterCompletedInstants() .lastInstant(); if (lastInstant.isPresent()) { HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes( this.getActiveTimeline().getInstantDetails(lastInstant.get()).get(), HoodieCommitMetadata.class); Optional<String> lastRollingStat = Optional.ofNullable(commitMetadata.getExtraMetadata() .get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY)); if (lastRollingStat.isPresent()) { return HoodieCommitMetadata .fromBytes(lastRollingStat.get().getBytes(), HoodieRollingStatMetadata.class); } } return null; } catch (IOException e) { throw new HoodieException(); } }
HoodieTable hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg, jsc); Optional<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant(); assertTrue(deltaCommit.isPresent()); assertEquals("Delta commit should be 001", "001", deltaCommit.get().getTimestamp()); Optional<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant(); assertFalse(commit.isPresent()); deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant(); assertTrue(deltaCommit.isPresent()); assertEquals("Latest Delta commit should be 002", "002", deltaCommit.get().getTimestamp()); commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant(); assertFalse(commit.isPresent()); hoodieTable.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants(), allFiles); dataFilesToRead = roView.getLatestDataFiles(); List<HoodieDataFile> newDataFilesList = dataFilesToRead.collect(Collectors.toList());
timeline = new HoodieActiveTimeline(metaClient); timeline.saveAsComplete(instant1, Optional.empty()); timeline.saveAsComplete(instant2, Optional.empty()); timeline.saveAsComplete(instant3, Optional.empty()); timeline.saveAsComplete(instant4, Optional.empty()); timeline.createInflight(instant5); timeline = timeline.reload(); assertEquals("Total instants should be 5", 5, timeline.countInstants()); HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream.of(instant1Complete, instant2Complete, instant3Complete, instant4Complete, instant5), timeline.getInstants()); HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream.of(instant1Complete, instant2Complete, instant3Complete, instant4Complete, instant5), timeline.getCommitTimeline().getInstants()); HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream.of(instant1Complete, instant2Complete, instant3Complete, instant4Complete), timeline.getCommitTimeline().filterCompletedInstants().getInstants()); HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream.of(instant5), timeline.getCommitTimeline().filterInflightsExcludingCompaction().getInstants());
timeline = metaClient.getActiveTimeline().getCommitTimeline(); } else if (excludeCompaction) { timeline = metaClient.getActiveTimeline().getCommitsTimeline(); } else { timeline = metaClient.getActiveTimeline().getCommitsAndCompactionTimeline();
HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants(); HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime); return "Commit " + commitTime + " not found in Commits " + timeline; HoodieCommitMetadata meta = HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(commitInstant).get(), HoodieCommitMetadata.class); List<Comparable[]> rows = new ArrayList<>();
@Test public void checkCommitTimeline() throws IOException { HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline(); HoodieTimeline activeCommitTimeline = activeTimeline.getCommitTimeline(); assertTrue("Should be empty commit timeline", activeCommitTimeline.empty()); HoodieInstant instant = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1"); activeTimeline.createInflight(instant); activeTimeline.saveAsComplete(instant, Optional.of("test-detail".getBytes())); // Commit timeline should not auto-reload every time getActiveCommitTimeline(), it should be cached activeTimeline = metaClient.getActiveTimeline(); activeCommitTimeline = activeTimeline.getCommitTimeline(); assertTrue("Should be empty commit timeline", activeCommitTimeline.empty()); HoodieInstant completedInstant = HoodieTimeline.getCompletedInstant(instant); activeTimeline = activeTimeline.reload(); activeCommitTimeline = activeTimeline.getCommitTimeline(); assertFalse("Should be the 1 commit we made", activeCommitTimeline.empty()); assertEquals("Commit should be 1", completedInstant, activeCommitTimeline.getInstants().findFirst().get()); assertArrayEquals("Commit value should be \"test-detail\"", "test-detail".getBytes(), activeCommitTimeline.getInstantDetails(completedInstant).get()); }
private String scanForCommitTime(FileSystem fs, String targetDataPath) throws IOException { if (targetDataPath == null) { throw new IllegalArgumentException( "Please specify either --fromCommitTime or --targetDataPath"); } if (!fs.exists(new Path(targetDataPath)) || !fs.exists(new Path(targetDataPath + "/.hoodie"))) { return "0"; } HoodieTableMetaClient metadata = new HoodieTableMetaClient(fs.getConf(), targetDataPath); Optional<HoodieInstant> lastCommit = metadata.getActiveTimeline().getCommitsTimeline() .filterCompletedInstants().lastInstant(); if (lastCommit.isPresent()) { return lastCommit.get().getTimestamp(); } return "0"; }
HoodieTimeline timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline(); Dataset<Row> readRecords = HoodieClientTestUtils.readCommit(dfsBasePath, sqlContext, timeline, readCommitTime); assertEquals("Should contain 100 records", readRecords.count(), records.size()); fs = FSUtils.getFs(tablePath, HoodieTestUtils.getDefaultHadoopConf()); metaClient = new HoodieTableMetaClient(fs.getConf(), tablePath); timeline = new HoodieActiveTimeline(metaClient).getCommitTimeline(); Dataset<Row> localReadRecords = HoodieClientTestUtils.readCommit(tablePath, sqlContext, timeline, writeCommitTime); assertEquals("Should contain 100 records", localReadRecords.count(), localRecords.size());
@Test public void checkSerDe() throws IOException, ClassNotFoundException { // check if this object is serialized and de-serialized, we are able to read from the file system HoodieTableMetaClient deseralizedMetaClient = HoodieTestUtils .serializeDeserialize(metaClient, HoodieTableMetaClient.class); assertNotNull(deseralizedMetaClient); HoodieActiveTimeline commitTimeline = deseralizedMetaClient.getActiveTimeline(); HoodieInstant instant = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, "1"); commitTimeline.createInflight(instant); commitTimeline.saveAsComplete(instant, Optional.of("test-detail".getBytes())); commitTimeline = commitTimeline.reload(); HoodieInstant completedInstant = HoodieTimeline.getCompletedInstant(instant); assertEquals("Commit should be 1 and completed", completedInstant, commitTimeline.getInstants().findFirst().get()); assertArrayEquals("Commit value should be \"test-detail\"", "test-detail".getBytes(), commitTimeline.getInstantDetails(completedInstant).get()); }
boolean headerOnly) throws IOException { HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline(); HoodieTimeline timeline = activeTimeline.getCommitsAndCompactionTimeline(); HoodieTimeline commitTimeline = activeTimeline.getCommitTimeline().filterCompletedInstants(); Set<String> committed = commitTimeline.getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toSet()); activeTimeline.getInstantAuxiliaryDetails( HoodieTimeline.getCompactionRequestedInstant(instant.getTimestamp())).get()); } catch (HoodieIOException ioe) { workload = AvroUtils.deserializeCompactionPlan(activeTimeline.getInstantAuxiliaryDetails( HoodieTimeline.getCompactionRequestedInstant(instant.getTimestamp())).get());
HoodieTestDataGenerator.createCommitFile(basePath, "103", dfs.getConf()); HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants(); assertEquals("Loaded 4 commits and the count should match", 4, timeline.countInstants()); boolean result = archiveLog.archiveIfRequired(jsc); assertTrue(result); timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants(); assertEquals("Should not archive commits when maxCommitsToKeep is 5", 4, timeline.countInstants());
private static Map<String, String> readMetadataInfo( @NonNull final HoodieConfiguration hoodieConf) { try { final FileSystem fs = FSUtils.getFs(hoodieConf.getConf()); HoodieUtil.initHoodieDataset(fs, hoodieConf); final HoodieTableMetaClient hoodieTableMetaClient = new HoodieTableMetaClient(new HadoopConfiguration(hoodieConf.getConf()).getHadoopConf(), hoodieConf.getBasePath(), true); final HoodieActiveTimeline hoodieActiveTimeline = hoodieTableMetaClient.getActiveTimeline(); final java.util.Optional<HoodieInstant> lastInstant = hoodieActiveTimeline.getCommitTimeline() .filterCompletedInstants().lastInstant(); if (lastInstant.isPresent()) { log.info("using hoodie instant for reading checkpoint info :{}", lastInstant.get().getTimestamp()); final HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(hoodieActiveTimeline.getInstantDetails(lastInstant.get()).get()); final String serCommitInfo = commitMetadata.getMetadata(HOODIE_METADATA_KEY); if (!Strings.isNullOrEmpty(serCommitInfo)) { return MapUtil.deserializeMap(serCommitInfo); } } return new HashMap<>(); } catch (IOException e) { log.error("failed to read metadata info", e); throw new JobRuntimeException("failed to read metadata information", e); } } }
private List<FileSlice> getCurrentLatestFileSlices(HoodieTable table, HoodieWriteConfig cfg) throws IOException { HoodieTableFileSystemView view = new HoodieTableFileSystemView(table.getMetaClient(), table.getMetaClient().getActiveTimeline().reload().getCommitsAndCompactionTimeline()); List<FileSlice> fileSliceList = Arrays.asList(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS).stream().flatMap(partition -> view.getLatestFileSlices(partition)).collect(Collectors.toList()); return fileSliceList; }
/** * Get the compacted commit timeline visible for this table */ public HoodieTimeline getCommitTimeline() { switch (this.getTableType()) { case COPY_ON_WRITE: case MERGE_ON_READ: // We need to include the parquet files written out in delta commits in tagging return getActiveTimeline().getCommitTimeline(); default: throw new HoodieException("Unsupported table type :" + this.getTableType()); } }
private void moveCompactionFromRequestedToInflight(String compactionInstantTime, HoodieWriteClient client, HoodieWriteConfig cfg) throws IOException { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath()); HoodieInstant compactionInstant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime); HoodieCompactionPlan workload = AvroUtils.deserializeCompactionPlan( metaClient.getActiveTimeline().getInstantAuxiliaryDetails(compactionInstant).get()); metaClient.getActiveTimeline().transitionCompactionRequestedToInflight(compactionInstant); HoodieInstant instant = metaClient.getActiveTimeline().reload().filterPendingCompactionTimeline().getInstants() .filter(in -> in.getTimestamp().equals(compactionInstantTime)).findAny().get(); assertTrue("Instant must be marked inflight", instant.isInflight()); }
public static long countNewRecords(HoodieTableMetaClient target, List<String> commitsToCatchup) throws IOException { long totalNew = 0; HoodieTimeline timeline = target.getActiveTimeline().reload().getCommitTimeline().filterCompletedInstants(); for (String commit : commitsToCatchup) { HoodieCommitMetadata c = HoodieCommitMetadata.fromBytes( timeline.getInstantDetails(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commit)).get(), HoodieCommitMetadata.class); totalNew += c.fetchTotalRecordsWritten() - c.fetchTotalUpdateRecordsWritten(); } return totalNew; } }
HoodieCommitMetadata metadata = HoodieCommitMetadata.fromBytes(table.getActiveTimeline().getInstantDetails(table .getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(), HoodieCommitMetadata.class); HoodieRollingStatMetadata rollingStatMetadata = HoodieCommitMetadata.fromBytes(metadata.getExtraMetadata() .get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY).getBytes(), HoodieRollingStatMetadata.class); metadata = HoodieCommitMetadata.fromBytes(table.getActiveTimeline().getInstantDetails(table .getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(), HoodieCommitMetadata.class); rollingStatMetadata = HoodieCommitMetadata.fromBytes(metadata.getExtraMetadata() .get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY).getBytes(), HoodieRollingStatMetadata.class); metadata = HoodieCommitMetadata.fromBytes(table.getActiveTimeline().getInstantDetails(table .getActiveTimeline().getCommitsTimeline().lastInstant().get()).get(), HoodieCommitMetadata.class); HoodieRollingStatMetadata rollingStatMetadata1 = HoodieCommitMetadata.fromBytes(metadata.getExtraMetadata() .get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY).getBytes(), HoodieRollingStatMetadata.class); metadata = HoodieCommitMetadata.fromBytes(table.getActiveTimeline().getInstantDetails(table .getActiveTimeline().getDeltaCommitTimeline().lastInstant().get()).get(), HoodieCommitMetadata.class); rollingStatMetadata = HoodieCommitMetadata.fromBytes(metadata.getExtraMetadata() .get(HoodieRollingStatMetadata.ROLLING_STAT_METADATA_KEY).getBytes(), HoodieRollingStatMetadata.class);
/** * Get all instants (commits, delta commits, clean, savepoint, rollback) that result in actions, * in the active timeline * */ public HoodieTimeline getAllCommitsTimeline() { return getTimelineOfActions( Sets.newHashSet(COMMIT_ACTION, DELTA_COMMIT_ACTION, CLEAN_ACTION, COMPACTION_ACTION, SAVEPOINT_ACTION, ROLLBACK_ACTION)); }