public String getBasePath() { return metaClient.getBasePath(); }
/** * Read schema from a data file from the last compaction commit done. */ @SuppressWarnings("OptionalUsedAsFieldOrParameterType") private MessageType readSchemaFromLastCompaction(Optional<HoodieInstant> lastCompactionCommitOpt) throws IOException { HoodieInstant lastCompactionCommit = lastCompactionCommitOpt.orElseThrow( () -> new HoodieHiveSyncException( "Could not read schema from last compaction, no compaction commits found on path " + syncConfig.basePath)); // Read from the compacted file wrote HoodieCommitMetadata compactionMetadata = HoodieCommitMetadata.fromBytes( activeTimeline.getInstantDetails(lastCompactionCommit).get(), HoodieCommitMetadata.class); String filePath = compactionMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values() .stream().findAny().orElseThrow(() -> new IllegalArgumentException( "Could not find any data file written for compaction " + lastCompactionCommit + ", could not get schema for dataset " + metaClient.getBasePath())); return readSchemaFromDataFile(new Path(filePath)); }
/** * Describes table properties */ @CliCommand(value = "desc", help = "Describle Hoodie Table properties") public String descTable() { TableHeader header = new TableHeader() .addTableHeaderField("Property") .addTableHeaderField("Value"); List<Comparable[]> rows = new ArrayList<>(); rows.add(new Comparable[]{"basePath", HoodieCLI.tableMetadata.getBasePath()}); rows.add(new Comparable[]{"metaPath", HoodieCLI.tableMetadata.getMetaPath()}); rows.add(new Comparable[]{"fileSystem", HoodieCLI.tableMetadata.getFs().getScheme()}); HoodieCLI.tableMetadata.getTableConfig().getProps().entrySet().forEach(e -> { rows.add(new Comparable[]{e.getKey(), e.getValue()}); }); return HoodiePrintHelper.print(header, new HashMap<>(), "", false, -1, false, rows); } }
/** * Given a partition path, obtain all filegroups within that. All methods, that work at the * partition level go through this. */ @Override public Stream<HoodieFileGroup> getAllFileGroups(String partitionPathStr) { // return any previously fetched groups. if (partitionToFileGroupsMap.containsKey(partitionPathStr)) { return partitionToFileGroupsMap.get(partitionPathStr).stream(); } try { // Create the path if it does not exist already Path partitionPath = FSUtils.getPartitionPath(metaClient.getBasePath(), partitionPathStr); FSUtils.createPathIfNotExists(metaClient.getFs(), partitionPath); FileStatus[] statuses = metaClient.getFs().listStatus(partitionPath); List<HoodieFileGroup> fileGroups = addFilesToView(statuses); return fileGroups.stream(); } catch (IOException e) { throw new HoodieIOException( "Failed to list data files in partition " + partitionPathStr, e); } }
/** * Return all pending compactions with instant time for clients to decide what to compact next. * @return */ public List<Pair<String, HoodieCompactionPlan>> getPendingCompactions() { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), hoodieTable.getMetaClient().getBasePath(), true); return CompactionUtils.getAllPendingCompactionPlans(metaClient).stream() .map(instantWorkloadPair -> Pair.of(instantWorkloadPair.getKey().getTimestamp(), instantWorkloadPair.getValue())) .collect(Collectors.toList()); } }
/** * Return all pending compactions with instant time for clients to decide what to compact next. * @return */ public List<Pair<String, HoodieCompactionPlan>> getPendingCompactions() { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), hoodieTable.getMetaClient().getBasePath(), true); return CompactionUtils.getAllPendingCompactionPlans(metaClient).stream() .map(instantWorkloadPair -> Pair.of(instantWorkloadPair.getKey().getTimestamp(), instantWorkloadPair.getValue())) .collect(Collectors.toList()); } }
private Writer createLogWriter(Optional<FileSlice> fileSlice, String baseCommitTime) throws IOException, InterruptedException { return HoodieLogFormat.newWriterBuilder() .onParentPath(new Path(hoodieTable.getMetaClient().getBasePath(), partitionPath)) .withFileId(fileId).overBaseCommit(baseCommitTime).withLogVersion( fileSlice.get().getLogFiles().map(logFile -> logFile.getLogVersion()) .max(Comparator.naturalOrder()).orElse(HoodieLogFile.LOGFILE_BASE_VERSION)) .withSizeThreshold(config.getLogFileMaxSize()).withFs(fs) .withFileExtension(HoodieLogFile.DELTA_EXTENSION).build(); }
public JavaPairRDD<HoodieKey, Optional<String>> fetchRecordLocation(JavaRDD<HoodieKey> hoodieKeys, JavaSparkContext jsc, HoodieTable<T> hoodieTable) { JavaPairRDD<String, String> partitionRecordKeyPairRDD = hoodieKeys .mapToPair(key -> new Tuple2<>(key.getPartitionPath(), key.getRecordKey())); // Lookup indexes for all the partition/recordkey pair JavaPairRDD<String, String> rowKeyFilenamePairRDD = lookupIndex(partitionRecordKeyPairRDD, jsc, hoodieTable); JavaPairRDD<String, HoodieKey> rowKeyHoodieKeyPairRDD = hoodieKeys .mapToPair(key -> new Tuple2<>(key.getRecordKey(), key)); return rowKeyHoodieKeyPairRDD.leftOuterJoin(rowKeyFilenamePairRDD).mapToPair(keyPathTuple -> { Optional<String> recordLocationPath; if (keyPathTuple._2._2.isPresent()) { String fileName = keyPathTuple._2._2.get(); String partitionPath = keyPathTuple._2._1.getPartitionPath(); recordLocationPath = Optional .of(new Path(new Path(hoodieTable.getMetaClient().getBasePath(), partitionPath), fileName) .toUri().getPath()); } else { recordLocationPath = Optional.absent(); } return new Tuple2<>(keyPathTuple._2._1, recordLocationPath); }); }
private Writer createLogWriter(Optional<FileSlice> fileSlice, String baseCommitTime) throws IOException, InterruptedException { return HoodieLogFormat.newWriterBuilder() .onParentPath(new Path(hoodieTable.getMetaClient().getBasePath(), partitionPath)) .withFileId(fileId).overBaseCommit(baseCommitTime).withLogVersion( fileSlice.get().getLogFiles().map(logFile -> logFile.getLogVersion()) .max(Comparator.naturalOrder()).orElse(HoodieLogFile.LOGFILE_BASE_VERSION)) .withSizeThreshold(config.getLogFileMaxSize()).withFs(fs) .withFileExtension(HoodieLogFile.DELTA_EXTENSION).build(); }
@CliCommand(value = "commits refresh", help = "Refresh the commits") public String refreshCommits() throws IOException { HoodieTableMetaClient metadata = new HoodieTableMetaClient(HoodieCLI.conf, HoodieCLI.tableMetadata.getBasePath()); HoodieCLI.setTableMetadata(metadata); return "Metadata for table " + metadata.getTableConfig().getTableName() + " refreshed."; }
@CliCommand(value = "savepoints refresh", help = "Refresh the savepoints") public String refreshMetaClient() throws IOException { HoodieTableMetaClient metadata = new HoodieTableMetaClient(HoodieCLI.conf, HoodieCLI.tableMetadata.getBasePath()); HoodieCLI.setTableMetadata(metadata); return "Metadata for table " + metadata.getTableConfig().getTableName() + " refreshed."; }
@CliCommand(value = "cleans refresh", help = "Refresh the commits") public String refreshCleans() throws IOException { HoodieTableMetaClient metadata = new HoodieTableMetaClient(HoodieCLI.conf, HoodieCLI.tableMetadata.getBasePath()); HoodieCLI.setTableMetadata(metadata); return "Metadata for table " + metadata.getTableConfig().getTableName() + " refreshed."; }
/** * Load all involved files as <Partition, filename> pair RDD from all partitions in the table. */ @Override @VisibleForTesting List<Tuple2<String, BloomIndexFileInfo>> loadInvolvedFiles(List<String> partitions, final JavaSparkContext jsc, final HoodieTable hoodieTable) { HoodieTableMetaClient metaClient = hoodieTable.getMetaClient(); try { List<String> allPartitionPaths = FSUtils .getAllPartitionPaths(metaClient.getFs(), metaClient.getBasePath(), config.shouldAssumeDatePartitioning()); return super.loadInvolvedFiles(allPartitionPaths, jsc, hoodieTable); } catch (IOException e) { throw new HoodieIOException("Failed to load all partitions", e); } }
@Before public void init() throws IOException { metaClient = HoodieTestUtils.init(tmpFolder.getRoot().getAbsolutePath()); basePath = metaClient.getBasePath(); }
@Before public void init() throws IOException { metaClient = HoodieTestUtils.initTableType(getDefaultHadoopConf(), tmpFolder.getRoot().getAbsolutePath(), HoodieTableType.MERGE_ON_READ); basePath = metaClient.getBasePath(); }
@CliCommand(value = "savepoint create", help = "Savepoint a commit") public String savepoint(@CliOption(key = {"commit"}, help = "Commit to savepoint") final String commitTime, @CliOption(key = {"user"}, help = "User who is creating the savepoint") final String user, @CliOption(key = {"comments"}, help = "Comments for creating the savepoint") final String comments) throws Exception { HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline(); HoodieTimeline timeline = activeTimeline.getCommitTimeline().filterCompletedInstants(); HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime); if (!timeline.containsInstant(commitInstant)) { return "Commit " + commitTime + " not found in Commits " + timeline; } HoodieWriteClient client = createHoodieClient(null, HoodieCLI.tableMetadata.getBasePath()); if (client.savepoint(commitTime, user, comments)) { // Refresh the current refreshMetaClient(); return String.format("The commit \"%s\" has been savepointed.", commitTime); } return String.format("Failed: Could not savepoint commit \"%s\".", commitTime); }
@CliCommand(value = "repair deduplicate", help = "De-duplicate a partition path contains duplicates & produce " + "repaired files to replace with") public String deduplicate(@CliOption(key = { "duplicatedPartitionPath"}, help = "Partition Path containing the duplicates", mandatory = true) final String duplicatedPartitionPath, @CliOption(key = { "repairedOutputPath"}, help = "Location to place the repaired files", mandatory = true) final String repairedOutputPath, @CliOption(key = { "sparkProperties"}, help = "Spark Properites File Path", mandatory = true) final String sparkPropertiesPath) throws Exception { SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath); sparkLauncher.addAppArgs(SparkMain.SparkCommand.DEDUPLICATE.toString(), duplicatedPartitionPath, repairedOutputPath, HoodieCLI.tableMetadata.getBasePath()); Process process = sparkLauncher.launch(); InputStreamConsumer.captureOutput(process); int exitCode = process.waitFor(); if (exitCode != 0) { return "Deduplicated files placed in: " + repairedOutputPath; } return "Deduplication failed "; }
@Before public void init() throws IOException { metaClient = HoodieTestUtils.init(tmpFolder.getRoot().getAbsolutePath());; basePath = metaClient.getBasePath(); fsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants()); roView = (TableFileSystemView.ReadOptimizedView) fsView; rtView = (TableFileSystemView.RealtimeView) fsView; }
@CliCommand(value = "commit rollback", help = "Rollback a commit") public String rollbackCommit(@CliOption(key = {"commit"}, help = "Commit to rollback") final String commitTime, @CliOption(key = {"sparkProperties"}, help = "Spark Properites File Path") final String sparkPropertiesPath) throws Exception { HoodieActiveTimeline activeTimeline = HoodieCLI.tableMetadata.getActiveTimeline(); HoodieTimeline timeline = activeTimeline.getCommitsTimeline().filterCompletedInstants(); HoodieInstant commitInstant = new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, commitTime); if (!timeline.containsInstant(commitInstant)) { return "Commit " + commitTime + " not found in Commits " + timeline; } SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath); sparkLauncher .addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(), commitTime, HoodieCLI.tableMetadata.getBasePath()); Process process = sparkLauncher.launch(); InputStreamConsumer.captureOutput(process); int exitCode = process.waitFor(); // Refresh the current refreshCommits(); if (exitCode != 0) { return "Commit " + commitTime + " failed to roll back"; } return "Commit " + commitTime + " rolled back"; }
@Test public void checkMetadata() { assertEquals("Table name should be raw_trips", HoodieTestUtils.RAW_TRIPS_TEST_NAME, metaClient.getTableConfig().getTableName()); assertEquals("Basepath should be the one assigned", basePath, metaClient.getBasePath()); assertEquals("Metapath should be ${basepath}/.hoodie", basePath + "/.hoodie", metaClient.getMetaPath()); }