List<String> commitsToReturn = timeline.findInstantsAfter(lastIncrementalTs, maxCommits) .getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList()); List<HoodieDataFile> filteredFiles = roView.getLatestDataFilesInRange(commitsToReturn) .collect(Collectors.toList()); for (HoodieDataFile filteredFile : filteredFiles) { } else { List<HoodieDataFile> filteredFiles = roView.getLatestDataFiles() .collect(Collectors.toList()); LOG.info("Total paths to process after hoodie filter " + filteredFiles.size());
Stream<HoodieDataFile> dataFiles = fsView.getLatestDataFilesBeforeOrOn(partition, latestCommitTimestamp); dataFiles.forEach(
List<HoodieDataFile> dataFiles = roView.getAllDataFiles(partitionPath).collect(Collectors.toList()); if (skipCreatingDataFile) { assertTrue("No data file expected", dataFiles.isEmpty()); dataFiles = roView.getLatestDataFiles().collect(Collectors.toList()); if (skipCreatingDataFile) { assertEquals("Expect no data file to be returned", 0, dataFiles.size()); dataFiles = roView.getLatestDataFiles(partitionPath).collect(Collectors.toList()); if (skipCreatingDataFile) { assertEquals("Expect no data file to be returned", 0, dataFiles.size()); dataFiles = roView.getLatestDataFilesBeforeOrOn(partitionPath, deltaInstantTime5).collect(Collectors.toList()); if (skipCreatingDataFile) { assertEquals("Expect no data file to be returned", 0, dataFiles.size()); dataFiles = roView.getLatestDataFilesInRange(allInstantTimes).collect(Collectors.toList()); if (skipCreatingDataFile) { assertEquals("Expect no data file to be returned", 0, dataFiles.size()); roView.getAllDataFiles(partitionPath); dataFiles = roView.getLatestDataFiles().collect(Collectors.toList()); assertEquals("Expect only one data-file to be sent", 1, dataFiles.size()); dataFiles.stream().forEach(df -> { compactionRequestedTime); }); dataFiles = roView.getLatestDataFiles(partitionPath).collect(Collectors.toList());
HoodieTable table = HoodieTable.getHoodieTable(metadata, config, jsc); TableFileSystemView.ReadOptimizedView fileSystemView = table.getROFileSystemView(); List<HoodieDataFile> files = fileSystemView.getLatestDataFilesBeforeOrOn(testPartitionPath, commitTime3) .collect(Collectors.toList()); int numTotalInsertsInCommit3 = 0;
TableFileSystemView.ReadOptimizedView roView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles); Stream<HoodieDataFile> dataFilesToRead = roView.getLatestDataFiles(); assertTrue(!dataFilesToRead.findAny().isPresent()); dataFilesToRead = roView.getLatestDataFiles(); assertTrue("RealtimeTableView should list the parquet files we wrote in the delta commit", dataFilesToRead.findAny().isPresent()); List<String> dataFiles = roView.getLatestDataFiles().map(hf -> hf.getPath()).collect(Collectors.toList()); List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath); assertEquals(recordsRead.size(), 200); dataFiles = roView.getLatestDataFiles().map(hf -> hf.getPath()).collect(Collectors.toList()); recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath); assertEquals(recordsRead.size(), 200); dataFiles = roView.getLatestDataFiles().map(hf -> hf.getPath()).collect(Collectors.toList()); recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath); assertEquals(recordsRead.size(), 200); hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg, jsc); roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles); dataFiles = roView.getLatestDataFiles().map(hf -> hf.getPath()).collect(Collectors.toList()); recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath); hoodieTable = HoodieTable.getHoodieTable(metaClient, cfg, jsc); roView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsTimeline(), allFiles); List<HoodieDataFile> dataFiles2 = roView.getLatestDataFiles().collect(Collectors.toList());
Stream<HoodieDataFile> dataFiles = fsView.getLatestDataFilesBeforeOrOn(partition, latestCommitTimestamp); dataFiles.forEach(
return view1.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("003")); }).collect(Collectors.toList()); assertEquals("The data files for commit 003 should be present", 3, dataFiles.size()); return view1.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("002")); }).collect(Collectors.toList()); assertEquals("The data files for commit 002 should be present", 3, dataFiles.size()); return view2.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("004")); }).collect(Collectors.toList()); assertEquals("The data files for commit 004 should be present", 3, dataFiles.size()); final TableFileSystemView.ReadOptimizedView view3 = table.getROFileSystemView(); dataFiles = partitionPaths.stream().flatMap(s -> { return view3.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("002")); }).collect(Collectors.toList()); assertEquals("The data files for commit 002 be available", 3, dataFiles.size()); return view3.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("003")); }).collect(Collectors.toList()); assertEquals("The data files for commit 003 should be rolled back", 0, dataFiles.size()); return view3.getAllDataFiles(s).filter(f -> f.getCommitTime().equals("004")); }).collect(Collectors.toList()); assertEquals("The data files for commit 004 should be rolled back", 0, dataFiles.size());
List<HoodieDataFile> statuses1 = roView.getLatestDataFiles().collect(Collectors.toList()); assertEquals(3, statuses1.size()); Set<String> filenames = Sets.newHashSet();
List<String> latestFiles = view.getLatestDataFilesBeforeOrOn(partitionPath, commitTime) .map(HoodieDataFile::getFileName).collect(Collectors.toList()); return new Tuple2<>(partitionPath, latestFiles);
TableFileSystemView.ReadOptimizedView roView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles); Stream<HoodieDataFile> dataFilesToRead = roView.getLatestDataFiles(); assertTrue(!dataFilesToRead.findAny().isPresent()); dataFilesToRead = roView.getLatestDataFiles(); assertTrue("RealtimeTableView should list the parquet files we wrote in the delta commit", dataFilesToRead.findAny().isPresent()); dataFilesToRead = roView.getLatestDataFiles(); assertTrue(dataFilesToRead.findAny().isPresent());
HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc); List<HoodieDataFile> files = table.getROFileSystemView() .getLatestDataFilesBeforeOrOn(testPartitionPath, commitTime3) .collect(Collectors.toList()); assertEquals("Total of 2 valid data files", 2, files.size());
.getLatestDataFilesOn(operation.getPartitionPath(), operation.getBaseInstantTime()) .filter(df -> df.getFileId().equals(operation.getFileId())).findFirst();
TableFileSystemView.ReadOptimizedView roView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitsTimeline().filterCompletedInstants(), allFiles); Stream<HoodieDataFile> dataFilesToRead = roView.getLatestDataFiles(); Map<String, Long> parquetFileIdToSize = dataFilesToRead.collect( Collectors.toMap(HoodieDataFile::getFileId, HoodieDataFile::getFileSize)); dataFilesToRead = roView.getLatestDataFiles(); List<HoodieDataFile> dataFilesList = dataFilesToRead.collect(Collectors.toList()); assertTrue("RealtimeTableView should list the parquet files we wrote in the delta commit", roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants(), allFiles); dataFilesToRead = roView.getLatestDataFiles(); List<HoodieDataFile> newDataFilesList = dataFilesToRead.collect(Collectors.toList()); Map<String, Long> parquetFileIdToNewSize = newDataFilesList.stream().collect( .filter(entry -> parquetFileIdToSize.get(entry.getKey()) < entry.getValue()).count() > 0); List<String> dataFiles = roView.getLatestDataFiles().map(hf -> hf.getPath()).collect(Collectors.toList()); List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
List<String> latestFiles = view.getLatestDataFilesBeforeOrOn(partitionPath, commitTime) .map(HoodieDataFile::getFileName).collect(Collectors.toList()); return new Tuple2<>(partitionPath, latestFiles);
assertEquals(1, fileSliceMap.get(fileId4).longValue()); List<HoodieDataFile> dataFileList = roView.getLatestDataFilesBeforeOrOn("2016/05/01", commitTime4) .collect(Collectors.toList()); assertEquals(3, dataFileList.size()); List<HoodieDataFile> dataFiles = roView.getLatestDataFilesBeforeOrOn("2016/05/01", commitTime3) .collect(Collectors.toList()); assertEquals(dataFiles.size(), 3);
List<HoodieDataFile> dataFiles = roView.getLatestDataFiles().collect(Collectors.toList()); assertTrue("No data file expected", dataFiles.isEmpty()); List<FileSlice> fileSliceList = rtView.getLatestFileSlices(partitionPath).collect(Collectors.toList());
TableFileSystemView.ReadOptimizedView roView = new HoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles); Stream<HoodieDataFile> dataFilesToRead = roView.getLatestDataFiles(); assertTrue(!dataFilesToRead.findAny().isPresent()); dataFilesToRead = roView.getLatestDataFiles(); assertTrue("RealtimeTableView should list the parquet files we wrote in the delta commit", dataFilesToRead.findAny().isPresent()); dataFilesToRead = roView.getLatestDataFiles(); assertTrue(dataFilesToRead.findAny().isPresent()); List<String> dataFiles = roView.getLatestDataFiles().map(hf -> hf.getPath()).collect(Collectors.toList()); List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(dataFiles, basePath);
List<HoodieDataFile> dataFiles = roView.getLatestDataFilesInRange(Lists.newArrayList(commitTime2, commitTime3)) .collect(Collectors.toList()); assertEquals(3, dataFiles.size());
.getLatestDataFilesOn(operation.getPartitionPath(), operation.getBaseInstantTime()) .filter(df -> df.getFileId().equals(operation.getFileId())).findFirst();
if (latestCommitTime.isPresent()) { filteredFiles = hoodieTable.getROFileSystemView() .getLatestDataFilesBeforeOrOn(partitionPath, latestCommitTime.get().getTimestamp()) .map(f -> new Tuple2<>(partitionPath, f)).collect(toList());