protected HoodieTable(HoodieWriteConfig config, JavaSparkContext jsc) { this.config = config; this.metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true); this.index = HoodieIndex.createIndex(config, jsc); }
protected HoodieTable(HoodieWriteConfig config, JavaSparkContext jsc) { this.config = config; this.metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true); this.index = HoodieIndex.createIndex(config, jsc); }
/** * Return all pending compactions with instant time for clients to decide what to compact next. * @return */ public List<Pair<String, HoodieCompactionPlan>> getPendingCompactions() { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), hoodieTable.getMetaClient().getBasePath(), true); return CompactionUtils.getAllPendingCompactionPlans(metaClient).stream() .map(instantWorkloadPair -> Pair.of(instantWorkloadPair.getKey().getTimestamp(), instantWorkloadPair.getValue())) .collect(Collectors.toList()); } }
/** * Commit changes performed at the given commitTime marker */ public boolean commit(String commitTime, JavaRDD<WriteStatus> writeStatuses, Optional<Map<String, String>> extraMetadata) { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true); return commit(commitTime, writeStatuses, extraMetadata, metaClient.getCommitActionType()); }
/** * Commit changes performed at the given commitTime marker */ public boolean commit(String commitTime, JavaRDD<WriteStatus> writeStatuses, Optional<Map<String, String>> extraMetadata) { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true); return commit(commitTime, writeStatuses, extraMetadata, metaClient.getCommitActionType()); }
@CliCommand(value = "commits refresh", help = "Refresh the commits") public String refreshCommits() throws IOException { HoodieTableMetaClient metadata = new HoodieTableMetaClient(HoodieCLI.conf, HoodieCLI.tableMetadata.getBasePath()); HoodieCLI.setTableMetadata(metadata); return "Metadata for table " + metadata.getTableConfig().getTableName() + " refreshed."; }
@CliCommand(value = "savepoints refresh", help = "Refresh the savepoints") public String refreshMetaClient() throws IOException { HoodieTableMetaClient metadata = new HoodieTableMetaClient(HoodieCLI.conf, HoodieCLI.tableMetadata.getBasePath()); HoodieCLI.setTableMetadata(metadata); return "Metadata for table " + metadata.getTableConfig().getTableName() + " refreshed."; }
@CliCommand(value = "cleans refresh", help = "Refresh the commits") public String refreshCleans() throws IOException { HoodieTableMetaClient metadata = new HoodieTableMetaClient(HoodieCLI.conf, HoodieCLI.tableMetadata.getBasePath()); HoodieCLI.setTableMetadata(metadata); return "Metadata for table " + metadata.getTableConfig().getTableName() + " refreshed."; }
private HoodieTable getTableAndInitCtx() { // Create a Hoodie table which encapsulated the commits and files visible // Create a Hoodie table which encapsulated the commits and files visible HoodieTable table = HoodieTable.getHoodieTable( new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true), config, jsc); if (table.getMetaClient().getCommitActionType().equals(HoodieTimeline.COMMIT_ACTION)) { writeContext = metrics.getCommitCtx(); } else { writeContext = metrics.getDeltaCommitCtx(); } return table; }
/** * Enssure compaction plan is valid * @param compactionInstant Compaction Instant * @throws Exception */ private void ensureValidCompactionPlan(String compactionInstant) throws Exception { metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), basePath, true); // Ensure compaction-plan is good to begin with List<ValidationOpResult> validationResults = client.validateCompactionPlan(metaClient, compactionInstant, 1); Assert.assertFalse("Some validations failed", validationResults.stream().filter(v -> !v.isSuccess()).findAny().isPresent()); }
@CliCommand(value = "commits sync", help = "Compare commits with another Hoodie dataset") public String syncCommits(@CliOption(key = {"path"}, help = "Path of the dataset to compare to") final String path) throws Exception { HoodieCLI.syncTableMetadata = new HoodieTableMetaClient(HoodieCLI.conf, path); HoodieCLI.state = HoodieCLI.CLIState.SYNC; return "Load sync state between " + HoodieCLI.tableMetadata.getTableConfig().getTableName() + " and " + HoodieCLI.syncTableMetadata.getTableConfig().getTableName(); }
@CliCommand(value = "connect", help = "Connect to a hoodie dataset") public String connect( @CliOption(key = {"path"}, mandatory = true, help = "Base Path of the dataset") final String path) throws IOException { boolean initialized = HoodieCLI.initConf(); HoodieCLI.initFS(initialized); HoodieCLI.setTableMetadata(new HoodieTableMetaClient(HoodieCLI.conf, path)); HoodieCLI.state = HoodieCLI.CLIState.DATASET; return "Metadata for table " + HoodieCLI.tableMetadata.getTableConfig().getTableName() + " loaded"; }
private JavaRDD<WriteStatus> updateIndexAndCommitIfNeeded(JavaRDD<WriteStatus> writeStatusRDD, HoodieTable<T> table, String commitTime) { // Update the index back JavaRDD<WriteStatus> statuses = index.updateLocation(writeStatusRDD, jsc, table); // Trigger the insert and collect statuses statuses = statuses.persist(config.getWriteStatusStorageLevel()); commitOnAutoCommit(commitTime, statuses, new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true) .getCommitActionType()); return statuses; }
private HoodieTable getTableAndInitCtx() { // Create a Hoodie table which encapsulated the commits and files visible // Create a Hoodie table which encapsulated the commits and files visible HoodieTable table = HoodieTable.getHoodieTable( new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true), config, jsc); if (table.getMetaClient().getCommitActionType() == HoodieTimeline.COMMIT_ACTION) { writeContext = metrics.getCommitCtx(); } else { writeContext = metrics.getDeltaCommitCtx(); } return table; }
private void scheduleCompaction(String compactionInstantTime, HoodieWriteClient client, HoodieWriteConfig cfg) throws IOException { client.scheduleCompactionAtInstant(compactionInstantTime, Optional.empty()); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath()); HoodieInstant instant = metaClient.getActiveTimeline().filterPendingCompactionTimeline().lastInstant().get(); assertEquals("Last compaction instant must be the one set", instant.getTimestamp(), compactionInstantTime); }
private void moveCompactionFromRequestedToInflight(String compactionInstantTime, HoodieWriteClient client, HoodieWriteConfig cfg) throws IOException { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath()); HoodieInstant compactionInstant = HoodieTimeline.getCompactionRequestedInstant(compactionInstantTime); HoodieCompactionPlan workload = AvroUtils.deserializeCompactionPlan( metaClient.getActiveTimeline().getInstantAuxiliaryDetails(compactionInstant).get()); metaClient.getActiveTimeline().transitionCompactionRequestedToInflight(compactionInstant); HoodieInstant instant = metaClient.getActiveTimeline().reload().filterPendingCompactionTimeline().getInstants() .filter(in -> in.getTimestamp().equals(compactionInstantTime)).findAny().get(); assertTrue("Instant must be marked inflight", instant.isInflight()); }
@Test public void testArchiveEmptyDataset() throws IOException { HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath) .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2) .forTable("test-trip-table").build(); HoodieCommitArchiveLog archiveLog = new HoodieCommitArchiveLog(cfg, new HoodieTableMetaClient(dfs.getConf(), cfg.getBasePath(), true)); boolean result = archiveLog.archiveIfRequired(jsc); assertTrue(result); }
@Test(expected = HoodieNotSupportedException.class) public void testCompactionOnCopyOnWriteFail() throws Exception { HoodieTestUtils.initTableType(hadoopConf, basePath, HoodieTableType.COPY_ON_WRITE); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath); HoodieTable table = HoodieTable.getHoodieTable(metaClient, getConfig(), jsc); String compactionInstantTime = HoodieActiveTimeline.createNewCommitTime(); table.compact(jsc, compactionInstantTime, table.scheduleCompaction(jsc, compactionInstantTime)); }
@Test(expected = IllegalStateException.class) public void testGetAllPendingCompactionOperationsWithDupFileId() throws IOException { // Case where there is duplicate fileIds in compaction requests HoodieCompactionPlan plan1 = createCompactionPlan(metaClient, "000", "001", 10, true, true); HoodieCompactionPlan plan2 = createCompactionPlan(metaClient, "002", "003", 0, false, false); scheduleCompaction(metaClient, "001", plan1); scheduleCompaction(metaClient, "003", plan2); // schedule same plan again so that there will be duplicates scheduleCompaction(metaClient, "005", plan1); metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), basePath, true); Map<String, Pair<String, HoodieCompactionOperation>> res = CompactionUtils.getAllPendingCompactionOperations(metaClient); }
private void refreshFsView(FileStatus[] statuses) { metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), basePath, true); if (statuses != null) { fsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline().getCommitsAndCompactionTimeline().filterCompletedAndCompactionInstants(), statuses); } else { fsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline().getCommitsAndCompactionTimeline().filterCompletedAndCompactionInstants()); } roView = (TableFileSystemView.ReadOptimizedView) fsView; rtView = (TableFileSystemView.RealtimeView) fsView; }