/** * Get the view of the file system for this table */ public TableFileSystemView getFileSystemView() { return new HoodieTableFileSystemView(metaClient, getCompletedCommitsTimeline()); }
/** * Get the read optimized view of the file system for this table */ public TableFileSystemView.ReadOptimizedView getROFileSystemView() { return new HoodieTableFileSystemView(metaClient, getCompletedCommitsTimeline()); }
/** * Get the completed (commit + compaction) view of the file system for this table */ public TableFileSystemView getCompletedFileSystemView() { return new HoodieTableFileSystemView(metaClient, metaClient.getCommitsTimeline()); }
/** * Get the view of the file system for this table */ public TableFileSystemView getFileSystemView() { return new HoodieTableFileSystemView(metaClient, getCompletedCommitTimeline()); }
/** * Get the read optimized view of the file system for this table */ public TableFileSystemView.ReadOptimizedView getROFileSystemView() { return new HoodieTableFileSystemView(metaClient, getCompletedCommitTimeline()); }
/** * Get the completed (commit + compaction) view of the file system for this table */ public TableFileSystemView getCompletedFileSystemView() { return new HoodieTableFileSystemView(metaClient, metaClient.getCommitsTimeline()); }
/** * Get the real time view of the file system for this table */ public TableFileSystemView.RealtimeView getRTFileSystemView() { return new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline().filterCompletedAndCompactionInstants()); }
/** * Get the real time view of the file system for this table */ public TableFileSystemView.RealtimeView getRTFileSystemView() { return new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline().filterCompletedAndCompactionInstants()); }
/** * Renames delta files to make file-slices consistent with the timeline as dictated by Hoodie metadata. * Use when compaction unschedule fails partially. * * This operation MUST be executed with compactions and writer turned OFF. * @param compactionInstant Compaction Instant to be repaired * @param dryRun Dry Run Mode */ public List<RenameOpResult> repairCompaction(String compactionInstant, int parallelism, boolean dryRun) throws Exception { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath); List<ValidationOpResult> validationResults = validateCompactionPlan(metaClient, compactionInstant, parallelism); List<ValidationOpResult> failed = validationResults.stream() .filter(v -> !v.isSuccess()).collect(Collectors.toList()); if (failed.isEmpty()) { return new ArrayList<>(); } final HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline()); List<Pair<HoodieLogFile, HoodieLogFile>> renameActions = failed.stream().flatMap(v -> getRenamingActionsToAlignWithCompactionOperation(metaClient, compactionInstant, v.getOperation(), Optional.of(fsView)).stream()).collect(Collectors.toList()); return runRenamingOps(metaClient, renameActions, parallelism, dryRun); }
Optional<HoodieTableFileSystemView> fsViewOpt, boolean skipValidation) throws IOException { HoodieTableFileSystemView fsView = fsViewOpt.isPresent() ? fsViewOpt.get() : new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline()); HoodieCompactionPlan plan = getCompactionPlan(metaClient, compactionInstant); if (plan.getOperations() != null) {
/** * Validate all compaction operations in a compaction plan. Verifies the file-slices are consistent with corresponding * compaction operations. * * @param metaClient Hoodie Table Meta Client * @param compactionInstant Compaction Instant */ public List<ValidationOpResult> validateCompactionPlan(HoodieTableMetaClient metaClient, String compactionInstant, int parallelism) throws IOException { HoodieCompactionPlan plan = getCompactionPlan(metaClient, compactionInstant); HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline()); if (plan.getOperations() != null) { List<CompactionOperation> ops = plan.getOperations().stream() .map(CompactionOperation::convertFromAvroRecordInstance).collect(Collectors.toList()); return jsc.parallelize(ops, parallelism).map(op -> { try { return validateCompactionOperation(metaClient, compactionInstant, op, Optional.of(fsView)); } catch (IOException e) { throw new HoodieIOException(e.getMessage(), e); } }).collect(); } return new ArrayList<>(); }
Optional<HoodieTableFileSystemView> fsViewOpt) { HoodieTableFileSystemView fileSystemView = fsViewOpt.isPresent() ? fsViewOpt.get() : new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline()); HoodieInstant lastInstant = metaClient.getCommitsAndCompactionTimeline().lastInstant().get(); FileSlice merged =
List<Pair<HoodieLogFile, HoodieLogFile>> result = new ArrayList<>(); HoodieTableFileSystemView fileSystemView = fsViewOpt.isPresent() ? fsViewOpt.get() : new HoodieTableFileSystemView(metaClient, metaClient.getCommitsAndCompactionTimeline()); if (!skipValidation) { validateCompactionOperation(metaClient, compactionInstant, operation, Optional.of(fileSystemView));
private List<FileSlice> getCurrentLatestFileSlices(HoodieTable table, HoodieWriteConfig cfg) throws IOException { HoodieTableFileSystemView view = new HoodieTableFileSystemView(table.getMetaClient(), table.getMetaClient().getActiveTimeline().reload().getCommitsAndCompactionTimeline()); List<FileSlice> fileSliceList = Arrays.asList(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS).stream().flatMap(partition -> view.getLatestFileSlices(partition)).collect(Collectors.toList()); return fileSliceList; }
/** * Reads the paths under the a hoodie dataset out as a DataFrame */ public static Dataset<Row> read(JavaSparkContext jsc, String basePath, SQLContext sqlContext, FileSystem fs, String... paths) { List<String> filteredPaths = new ArrayList<>(); try { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(fs.getConf(), basePath, true); for (String path : paths) { TableFileSystemView.ReadOptimizedView fileSystemView = new HoodieTableFileSystemView( metaClient, metaClient.getCommitsTimeline().filterCompletedInstants(), fs.globStatus(new Path(path))); List<HoodieDataFile> latestFiles = fileSystemView.getLatestDataFiles().collect(Collectors.toList()); for (HoodieDataFile file : latestFiles) { filteredPaths.add(file.getPath()); } } return sqlContext.read().parquet(filteredPaths.toArray(new String[filteredPaths.size()])); } catch (Exception e) { throw new HoodieException("Error reading hoodie dataset as a dataframe", e); } }
return new HoodieTableFileSystemView(metaClient, filteredTimeline, statuses);
private List<HoodieDataFile> getCurrentLatestDataFiles(HoodieTable table, HoodieWriteConfig cfg) throws IOException { FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(table.getMetaClient().getFs(), cfg.getBasePath()); HoodieTableFileSystemView view = new HoodieTableFileSystemView(table.getMetaClient(), table.getCompletedCommitsTimeline(), allFiles); List<HoodieDataFile> dataFilesToRead = view.getLatestDataFiles().collect(Collectors.toList()); return dataFilesToRead; }
private void refreshFsView(FileStatus[] statuses) { metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), basePath, true); if (statuses != null) { fsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline().getCommitsAndCompactionTimeline().filterCompletedAndCompactionInstants(), statuses); } else { fsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline().getCommitsAndCompactionTimeline().filterCompletedAndCompactionInstants()); } roView = (TableFileSystemView.ReadOptimizedView) fsView; rtView = (TableFileSystemView.RealtimeView) fsView; }
@Before public void init() throws IOException { metaClient = HoodieTestUtils.init(tmpFolder.getRoot().getAbsolutePath());; basePath = metaClient.getBasePath(); fsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants()); roView = (TableFileSystemView.ReadOptimizedView) fsView; rtView = (TableFileSystemView.RealtimeView) fsView; }
HoodieTable hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg, jsc); FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath()); HoodieTableFileSystemView roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);