public static HoodieTableMetaClient init(String basePath) throws IOException { return initTableType(getDefaultHadoopConf(), basePath, HoodieTableType.COPY_ON_WRITE); }
public static void createCleanFiles(String basePath, String commitTime) throws IOException { createCleanFiles(basePath, commitTime, HoodieTestUtils.getDefaultHadoopConf()); }
HoodieTestUtils.createCommitFiles(basePath, commitTime1); HoodieTestUtils.createInflightCommitFiles(basePath, commitTime2, commitTime3); String file11 = HoodieTestUtils.createDataFile(basePath, "2016/05/01", commitTime1, "id11"); String file12 = HoodieTestUtils.createDataFile(basePath, "2016/05/02", commitTime1, "id12"); String file13 = HoodieTestUtils.createDataFile(basePath, "2016/05/06", commitTime1, "id13"); String file21 = HoodieTestUtils.createDataFile(basePath, "2016/05/01", commitTime2, "id21"); String file22 = HoodieTestUtils.createDataFile(basePath, "2016/05/02", commitTime2, "id22"); String file23 = HoodieTestUtils.createDataFile(basePath, "2016/05/06", commitTime2, "id23"); String file31 = HoodieTestUtils.createDataFile(basePath, "2016/05/01", commitTime3, "id31"); String file32 = HoodieTestUtils.createDataFile(basePath, "2016/05/02", commitTime3, "id32"); String file33 = HoodieTestUtils.createDataFile(basePath, "2016/05/06", commitTime3, "id33"); assertTrue(HoodieTestUtils.doesCommitExist(basePath, commitTime1)); assertTrue(HoodieTestUtils.doesInflightExist(basePath, commitTime2)); assertTrue(HoodieTestUtils.doesInflightExist(basePath, commitTime3)); assertTrue(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime3, file31) && HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime3, file32) && HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime3, file33)); assertTrue(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime2, file21) && HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime2, file22) && HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime2, file23)); assertTrue(HoodieTestUtils.doesDataFileExist(basePath, "2016/05/01", commitTime1, file11) && HoodieTestUtils.doesDataFileExist(basePath, "2016/05/02", commitTime1, file12) && HoodieTestUtils.doesDataFileExist(basePath, "2016/05/06", commitTime1, file13));
public static final void createInflightCleanFiles(String basePath, String... commitTimes) throws IOException { createInflightCleanFiles(basePath, HoodieTestUtils.getDefaultHadoopConf(), commitTimes); }
HoodieTableMetaClient metaClient = HoodieTestUtils.initTableType(jsc.hadoopConfiguration(), basePath, HoodieTableType.MERGE_ON_READ); String file1P0 = HoodieTestUtils.createNewDataFile(basePath, DEFAULT_FIRST_PARTITION_PATH, "000"); String file2P0L0 = HoodieTestUtils .createNewLogFile(fs, basePath, DEFAULT_FIRST_PARTITION_PATH, "000", file1P0, Optional.empty()); String file2P0L1 = HoodieTestUtils .createNewLogFile(fs, basePath, DEFAULT_FIRST_PARTITION_PATH, "000", file1P0, Optional.of(2)); HoodieTestUtils.createCompactionCommitFiles(fs, basePath, "000"); HoodieTestUtils.createDataFile(basePath, DEFAULT_FIRST_PARTITION_PATH, "001", file1P0); file2P0L0 = HoodieTestUtils .createNewLogFile(fs, basePath, DEFAULT_FIRST_PARTITION_PATH, "001", file1P0, Optional.empty()); file2P0L0 = HoodieTestUtils .createNewLogFile(fs, basePath, DEFAULT_FIRST_PARTITION_PATH, "001", file1P0, Optional.of(2)); file2P0L0 = HoodieTestUtils .createNewLogFile(fs, basePath, DEFAULT_FIRST_PARTITION_PATH, "001", file1P0, Optional.of(3)); HoodieTestUtils.createCompactionCommitFiles(fs, basePath, "001"); assertEquals("Must clean three files, one parquet and 2 log files", 3, getCleanStat(hoodieCleanStats, DEFAULT_FIRST_PARTITION_PATH).getSuccessDeleteFiles().size()); assertFalse(HoodieTestUtils.doesDataFileExist(basePath, DEFAULT_FIRST_PARTITION_PATH, "000", file1P0)); assertFalse( HoodieTestUtils.doesLogFileExist(basePath, DEFAULT_FIRST_PARTITION_PATH, "000", file2P0L0, Optional.empty())); assertFalse( HoodieTestUtils.doesLogFileExist(basePath, DEFAULT_FIRST_PARTITION_PATH, "000", file2P0L0, Optional.of(2)));
public static void createCommitFile(String basePath, String commitTime) throws IOException { createCommitFile(basePath, commitTime, HoodieTestUtils.getDefaultHadoopConf()); }
HoodieTableMetaClient metaClient = HoodieTestUtils.initTableType(jsc.hadoopConfiguration(), basePath, HoodieTableType.MERGE_ON_READ); String[] instants = new String[]{"000", "001", "003", "005", "007", "009", "011", "013"}; HoodieTestUtils.createCommitFiles(basePath, instant); int maxNumFileIdsForCompaction = 4; for (int i = 0; i < maxNumFileIds; i++) { final String fileId = HoodieTestUtils.createDataFile(basePath, DEFAULT_FIRST_PARTITION_PATH, instants[0], fileIds[i]); HoodieTestUtils.createNewLogFile(fs, basePath, DEFAULT_FIRST_PARTITION_PATH, instants[0], fileId, Optional.empty()); HoodieTestUtils.createNewLogFile(fs, basePath, DEFAULT_FIRST_PARTITION_PATH, instants[0], fileId, Optional.of(2)); fileIdToLatestInstantBeforeCompaction.put(fileId, instants[0]); HoodieTestUtils.createNewLogFile(fs, basePath, DEFAULT_FIRST_PARTITION_PATH, compactionInstants[j], fileId, Optional.empty()); HoodieTestUtils.createNewLogFile(fs, basePath, DEFAULT_FIRST_PARTITION_PATH, compactionInstants[j], fileId, Optional.of(2)); } else { HoodieTestUtils.createDataFile(basePath, DEFAULT_FIRST_PARTITION_PATH, instants[j], fileId); HoodieTestUtils.createNewLogFile(fs, basePath, DEFAULT_FIRST_PARTITION_PATH, instants[j], fileId, Optional.empty()); HoodieTestUtils.createNewLogFile(fs, basePath, DEFAULT_FIRST_PARTITION_PATH, instants[j], fileId, Optional.of(2)); fileIdToLatestInstantBeforeCompaction.put(fileId, instants[j]); List<FileSlice> fileSliceList = compactionInstantsToFileSlices.get(instant);
HoodieTestUtils.createCommitFiles(basePath, "000"); String file1P0C0 = HoodieTestUtils.createNewDataFile(basePath, DEFAULT_FIRST_PARTITION_PATH, "000"); String file1P1C0 = HoodieTestUtils.createNewDataFile(basePath, DEFAULT_SECOND_PARTITION_PATH, "000"); assertEquals("Must not clean any files", 0, getCleanStat(hoodieCleanStatsOne, DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles().size()); assertTrue(HoodieTestUtils.doesDataFileExist(basePath, DEFAULT_FIRST_PARTITION_PATH, "000", file1P0C0)); assertTrue(HoodieTestUtils.doesDataFileExist(basePath, DEFAULT_SECOND_PARTITION_PATH, "000", file1P1C0)); HoodieTestUtils.createCommitFiles(basePath, "001"); table = HoodieTable.getHoodieTable(new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true), config, jsc); String file2P0C1 = HoodieTestUtils.createNewDataFile(basePath, DEFAULT_FIRST_PARTITION_PATH, "001"); // insert String file2P1C1 = HoodieTestUtils.createNewDataFile(basePath, DEFAULT_SECOND_PARTITION_PATH, "001"); // insert HoodieTestUtils.createDataFile(basePath, DEFAULT_FIRST_PARTITION_PATH, "001", file1P0C0); // update HoodieTestUtils.createDataFile(basePath, DEFAULT_SECOND_PARTITION_PATH, "001", file1P1C0); // update assertEquals("Must not clean any files", 0, getCleanStat(hoodieCleanStatsTwo, DEFAULT_SECOND_PARTITION_PATH).getSuccessDeleteFiles().size()); assertTrue(HoodieTestUtils.doesDataFileExist(basePath, DEFAULT_FIRST_PARTITION_PATH, "001", file2P0C1)); assertTrue(HoodieTestUtils.doesDataFileExist(basePath, DEFAULT_SECOND_PARTITION_PATH, "001", file2P1C1)); assertTrue(HoodieTestUtils.doesDataFileExist(basePath, DEFAULT_FIRST_PARTITION_PATH, "000", file1P0C0)); assertTrue(HoodieTestUtils.doesDataFileExist(basePath, DEFAULT_SECOND_PARTITION_PATH, "000", file1P1C0)); HoodieTestUtils.createCommitFiles(basePath, "002"); table = HoodieTable.getHoodieTable(new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true), config, jsc);
@Before public void setUp() throws Exception { this.metaClient = HoodieTestUtils.init(tmpFolder.getRoot().getAbsolutePath()); }
HoodieCompactionConfig.newBuilder().archiveCommitsWith(2, 4).build()) .forTable("test-trip-table").build(); HoodieTestUtils.init(hadoopConf, basePath); HoodieTestUtils.createCleanFiles(basePath, "100", dfs.getConf()); HoodieTestUtils.createInflightCleanFiles(basePath, dfs.getConf(), "101"); HoodieTestUtils.createCleanFiles(basePath, "101", dfs.getConf()); HoodieTestUtils.createCleanFiles(basePath, "102", dfs.getConf()); HoodieTestUtils.createCleanFiles(basePath, "103", dfs.getConf()); HoodieTestUtils.createCleanFiles(basePath, "104", dfs.getConf()); HoodieTestUtils.createCleanFiles(basePath, "105", dfs.getConf()); HoodieTestUtils.createInflightCleanFiles(basePath, dfs.getConf(), "106", "107");
@Before public void init() throws Exception { // Create a temp folder as the base path TemporaryFolder folder = new TemporaryFolder(); folder.create(); this.basePath = folder.getRoot().getAbsolutePath(); HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath); // Initialize a local spark env jsc = new JavaSparkContext(HoodieClientTestUtils.getSparkConfForTest("TestUpdateMapFunction")); }
String fileId = UUID.randomUUID().toString(); if (createDataFile) { HoodieTestUtils.createDataFile(metaClient.getBasePath(), DEFAULT_PARTITION_PATHS[0], instantId, fileId); HoodieTestUtils.createNewLogFile(metaClient.getFs(), metaClient.getBasePath(), DEFAULT_PARTITION_PATHS[0], instantId, fileId, Optional.of(1)); HoodieTestUtils.createNewLogFile(metaClient.getFs(), metaClient.getBasePath(), DEFAULT_PARTITION_PATHS[0], instantId, fileId, Optional.of(2)); FileSlice slice = new FileSlice(instantId, fileId); .getLogFilePath(metaClient.getBasePath(), DEFAULT_PARTITION_PATHS[0], instantId, fileId, Optional.of(1)); String logFilePath2 = HoodieTestUtils .getLogFilePath(metaClient.getBasePath(), DEFAULT_PARTITION_PATHS[0], instantId, fileId, Optional.of(2)); slice.addLogFile(new HoodieLogFile(new Path(logFilePath1))); CompactionUtils.buildFromFileSlice(DEFAULT_PARTITION_PATHS[0], slice, Optional.empty()); if (deltaCommitsAfterCompactionRequests) { HoodieTestUtils.createNewLogFile(metaClient.getFs(), metaClient.getBasePath(), DEFAULT_PARTITION_PATHS[0], compactionInstantId, fileId, Optional.of(1)); HoodieTestUtils.createNewLogFile(metaClient.getFs(), metaClient.getBasePath(), DEFAULT_PARTITION_PATHS[0], compactionInstantId, fileId, Optional.of(2));
HoodieParquetConfig config = new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP, ParquetWriter.DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE, 120 * 1024 * 1024, HoodieTestUtils.getDefaultHadoopConf(), Double.valueOf(HoodieStorageConfig.DEFAULT_STREAM_COMPRESSION_RATIO)); HoodieParquetWriter writer = new HoodieParquetWriter( HoodieTestUtils.createMetadataFolder(basePath); HoodieTestUtils.createCommitFiles(basePath, commitTime);
public static HoodieTableMetaClient init(Configuration hadoopConf, String basePath) throws IOException { return initTableType(hadoopConf, basePath, HoodieTableType.COPY_ON_WRITE); }
HoodieTestUtils.initTableType(jsc.hadoopConfiguration(), basePath, HoodieTableType.COPY_ON_WRITE); HoodieTestUtils.initTableType(jsc.hadoopConfiguration(), basePath, HoodieTableType.MERGE_ON_READ); FileStatus[] allFiles = HoodieTestUtils.listAllDataFilesInPath(metaClient.getFs(), cfg.getBasePath()); HoodieTableFileSystemView roView = new HoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
/** * Utility to simulate commit touching files in a partition * * @param files List of file-Ids to be touched * @param partitionPath Partition * @param commitTime Commit Timestamp * @throws IOException in case of error */ void updateAllFilesInPartition(List<String> files, String partitionPath, String commitTime) throws IOException { for (String fileId : files) { HoodieTestUtils.createDataFile(basePath, partitionPath, commitTime, fileId); } }
public static String writeParquetFile(String basePath, String partitionPath, List<HoodieRecord> records, Schema schema, BloomFilter filter, boolean createCommitTime) throws IOException, InterruptedException { Thread.sleep(1000); String commitTime = HoodieTestUtils.makeNewCommitTime(); String fileId = UUID.randomUUID().toString(); String filename = FSUtils.makeDataFileName(commitTime, 1, fileId); HoodieTestUtils.createCommitFiles(basePath, commitTime); return HoodieClientTestUtils .writeParquetFile(basePath, partitionPath, filename, records, schema, filter, createCommitTime); } }
HoodieTestUtils.createCommitFiles(basePath, "000"); List<String> filesP0C0 = createFilesInPartition(DEFAULT_FIRST_PARTITION_PATH, "000", 100); List<String> filesP1C0 = createFilesInPartition(DEFAULT_SECOND_PARTITION_PATH, "000", 10); List<String> filesP2C0 = createFilesInPartition(DEFAULT_THIRD_PARTITION_PATH, "000", 10); HoodieTestUtils.createCommitFiles(basePath, "001"); updateAllFilesInPartition(filesP0C0, DEFAULT_FIRST_PARTITION_PATH, "001"); updateAllFilesInPartition(filesP1C0, DEFAULT_SECOND_PARTITION_PATH, "001"); updateAllFilesInPartition(filesP2C0, DEFAULT_THIRD_PARTITION_PATH, "001"); HoodieTestUtils.createCommitFiles(basePath, "002"); updateAllFilesInPartition(filesP0C0, DEFAULT_FIRST_PARTITION_PATH, "002"); updateAllFilesInPartition(filesP1C0, DEFAULT_SECOND_PARTITION_PATH, "002"); updateAllFilesInPartition(filesP2C0, DEFAULT_THIRD_PARTITION_PATH, "002"); HoodieTestUtils.createCommitFiles(basePath, "003"); updateAllFilesInPartition(filesP0C0, DEFAULT_FIRST_PARTITION_PATH, "003"); updateAllFilesInPartition(filesP1C0, DEFAULT_SECOND_PARTITION_PATH, "003");
HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream.of(instant1Complete, instant2Complete, instant3Complete, instant4Complete, instant5), timeline.getInstants()); HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream.of(instant1Complete, instant2Complete, instant3Complete, instant4Complete, instant5), timeline.getCommitTimeline().getInstants()); HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream.of(instant1Complete, instant2Complete, instant3Complete, instant4Complete), timeline.getCommitTimeline().filterCompletedInstants().getInstants()); HoodieTestUtils.assertStreamEquals("Check the instants stream", Stream.of(instant5), timeline.getCommitTimeline().filterInflightsExcludingCompaction().getInstants());
private static void setPropsForInputFormat(HoodieRealtimeInputFormat inputFormat, JobConf jobConf, Schema schema, String basePath) { List<Schema.Field> fields = schema.getFields(); String names = fields.stream().map(f -> f.name().toString()).collect(Collectors.joining(",")); String postions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(",")); Configuration conf = HoodieTestUtils.getDefaultHadoopConf(); jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names); jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions); jobConf.set("partition_columns", "datestr"); conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions); conf.set("partition_columns", "datestr"); inputFormat.setConf(conf); jobConf.addResource(conf); }