public static final String getDataFilePath(String basePath, String partitionPath, String commitTime, String fileID) throws IOException { return basePath + "/" + partitionPath + "/" + FSUtils .makeDataFileName(commitTime, DEFAULT_TASK_PARTITIONID, fileID); }
public Path makeNewPath(String partitionPath, int taskPartitionId, String fileName) { Path path = new Path(config.getBasePath(), partitionPath); try { fs.mkdirs(path); // create a new partition as needed. } catch (IOException e) { throw new HoodieIOException("Failed to make dir " + path, e); } return new Path(path.toString(), FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName)); }
public Path makeNewPath(String partitionPath, int taskPartitionId, String fileName) { Path path = FSUtils.getPartitionPath(config.getBasePath(), partitionPath); try { fs.mkdirs(path); // create a new partition as needed. } catch (IOException e) { throw new HoodieIOException("Failed to make dir " + path, e); } return new Path(path.toString(), FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName)); }
public static final String createDataFile(String basePath, String partitionPath, String commitTime, String fileID) throws IOException { String folderPath = basePath + "/" + partitionPath + "/"; new File(folderPath).mkdirs(); new File(folderPath + FSUtils.makeDataFileName(commitTime, DEFAULT_TASK_PARTITIONID, fileID)).createNewFile(); return fileID; }
public static void fakeDataFile(String basePath, String partitionPath, String commitTime, String fileId, long length) throws Exception { String parentPath = String.format("%s/%s", basePath, partitionPath); new File(parentPath).mkdirs(); String path = String.format("%s/%s", parentPath, FSUtils.makeDataFileName(commitTime, 0, fileId)); new File(path).createNewFile(); new RandomAccessFile(path, "rw").setLength(length); }
@Test public void testMakeDataFileName() { String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()); int taskPartitionId = 2; String fileName = UUID.randomUUID().toString(); assertTrue(FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName) .equals(fileName + "_" + taskPartitionId + "_" + commitTime + ".parquet")); }
@Test public void testGetCommitTime() { String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()); int taskPartitionId = 2; String fileName = UUID.randomUUID().toString(); String fullFileName = FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName); assertTrue(FSUtils.getCommitTime(fullFileName).equals(commitTime)); }
@Test public void testGetFileNameWithoutMeta() { String commitTime = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()); int taskPartitionId = 2; String fileName = UUID.randomUUID().toString(); String fullFileName = FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName); assertTrue(FSUtils.getFileId(fullFileName).equals(fileName)); }
private static List<HoodieWriteStat> createTestData(Path partPath, boolean isParquetSchemaSimple, String commitTime) throws IOException, URISyntaxException, InterruptedException { List<HoodieWriteStat> writeStats = Lists.newArrayList(); for (int i = 0; i < 5; i++) { // Create 5 files String fileId = UUID.randomUUID().toString(); Path filePath = new Path(partPath.toString() + "/" + FSUtils.makeDataFileName(commitTime, DEFAULT_TASK_PARTITIONID, fileId)); generateParquetData(filePath, isParquetSchemaSimple); HoodieWriteStat writeStat = new HoodieWriteStat(); writeStat.setFileId(fileId); writeStat.setPath(filePath.toString()); writeStats.add(writeStat); } return writeStats; }
public static String writeParquetFile(String basePath, String partitionPath, List<HoodieRecord> records, Schema schema, BloomFilter filter, boolean createCommitTime) throws IOException, InterruptedException { Thread.sleep(1000); String commitTime = HoodieTestUtils.makeNewCommitTime(); String fileId = UUID.randomUUID().toString(); String filename = FSUtils.makeDataFileName(commitTime, 1, fileId); HoodieTestUtils.createCommitFiles(basePath, commitTime); return HoodieClientTestUtils .writeParquetFile(basePath, partitionPath, filename, records, schema, filter, createCommitTime); } }
String fileId3 = UUID.randomUUID().toString(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId1)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3)).createNewFile(); }); if (fileId.equals(fileId1)) { assertEquals(filenames, Sets.newHashSet(FSUtils.makeDataFileName(commitTime1, 1, fileId1), FSUtils.makeDataFileName(commitTime4, 1, fileId1))); } else if (fileId.equals(fileId2)) { assertEquals(filenames, Sets.newHashSet(FSUtils.makeDataFileName(commitTime1, 1, fileId2), FSUtils.makeDataFileName(commitTime2, 1, fileId2), FSUtils.makeDataFileName(commitTime3, 1, fileId2))); } else { assertEquals(filenames, Sets.newHashSet(FSUtils.makeDataFileName(commitTime3, 1, fileId3), FSUtils.makeDataFileName(commitTime4, 1, fileId3)));
String fileId3 = UUID.randomUUID().toString(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId1)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3)).createNewFile(); filenames.add(status.getFileName()); assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime1, 1, fileId1))); assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime2, 1, fileId2)));
@Test public void testMakeNewPath() throws Exception { String fileName = UUID.randomUUID().toString(); String partitionPath = "2016/05/04"; int unitNumber = (int) (Math.random() * 10); HoodieRecord record = mock(HoodieRecord.class); when(record.getPartitionPath()).thenReturn(partitionPath); String commitTime = HoodieTestUtils.makeNewCommitTime(); HoodieWriteConfig config = makeHoodieClientConfig(); HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath); HoodieTable table = HoodieTable.getHoodieTable(metaClient, config, jsc); HoodieCreateHandle io = new HoodieCreateHandle(config, commitTime, table, partitionPath, UUID.randomUUID().toString()); Path newPath = io.makeNewPath(record.getPartitionPath(), unitNumber, fileName); assertTrue(newPath.toString().equals( this.basePath + "/" + partitionPath + "/" + FSUtils.makeDataFileName(commitTime, unitNumber, fileName))); }
String fileId3 = UUID.randomUUID().toString(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId1)).createNewFile(); new File(fullPartitionPath + FSUtils.makeLogFileName(fileId1, HoodieLogFile.DELTA_EXTENSION, commitTime1, 0)) .createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId1)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime1, 1, fileId2)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime2, 1, fileId2)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId2)).createNewFile(); new File(fullPartitionPath + FSUtils.makeLogFileName(fileId2, HoodieLogFile.DELTA_EXTENSION, commitTime4, 0)) .createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime3, 1, fileId3)).createNewFile(); new File(fullPartitionPath + FSUtils.makeDataFileName(commitTime4, 1, fileId3)).createNewFile(); filenames.add(status.getFileName()); assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId2))); assertTrue(filenames.contains(FSUtils.makeDataFileName(commitTime3, 1, fileId3)));
config.getBasePath() + "/" + partitionPath + "/" + latestValidFilePath); String relativePath = new Path(partitionPath + "/" + FSUtils .makeDataFileName(commitTime, TaskContext.getPartitionId(), fileId)).toString(); newFilePath = new Path(config.getBasePath(), relativePath); if (config.shouldUseTempFolderForCopyOnWriteForMerge()) {
config.getBasePath() + "/" + partitionPath + "/" + latestValidFilePath); String relativePath = new Path((partitionPath.isEmpty() ? "" : partitionPath + "/") + FSUtils .makeDataFileName(commitTime, TaskContext.getPartitionId(), fileId)).toString(); newFilePath = new Path(config.getBasePath(), relativePath); if (config.shouldUseTempFolderForCopyOnWriteForMerge()) {
Path tempPath = new Path(basePath, HoodieTableMetaClient.TEMPFOLDER_NAME); Path finalizeFilePath = new Path(partitionPath, FSUtils.makeDataFileName(commitTime, taskPartitionId, fileName)); Path tempFilePath = new Path(tempPath, FSUtils .makeTempDataFileName(partitionPathString, commitTime, taskPartitionId,
String fileName1 = FSUtils.makeDataFileName(commitTime1, 1, fileId); new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile(); refreshFsView(null); String fileName2 = FSUtils.makeDataFileName(commitTime2, 1, fileId); new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile(); refreshFsView(null);
@Test public void testInsertWithPartialFailures() throws Exception { HoodieWriteConfig config = makeHoodieClientConfig(); String commitTime = HoodieTestUtils.makeNewCommitTime(); FileSystem fs = FSUtils.getFs(basePath, jsc.hadoopConfiguration()); HoodieTableMetaClient metadata = new HoodieTableMetaClient(jsc.hadoopConfiguration(), basePath); HoodieCopyOnWriteTable table = new HoodieCopyOnWriteTable(config, jsc); // Write a few records, and get atleast one file // 10 records for partition 1, 1 record for partition 2. List<HoodieRecord> records = newHoodieRecords(10, "2016-01-31T03:16:41.415Z"); records.addAll(newHoodieRecords(1, "2016-02-01T03:16:41.415Z")); // Simulate crash after first file List<WriteStatus> statuses = HoodieClientTestUtils .collectStatuses(table.handleInsert(commitTime, records.iterator())); WriteStatus status = statuses.get(0); Path partialFile = new Path(String.format("%s/%s/%s", basePath, status.getPartitionPath(), FSUtils.makeDataFileName(commitTime, 0, status.getFileId()))); assertTrue(fs.exists(partialFile)); // When we retry records = newHoodieRecords(10, "2016-01-31T03:16:41.415Z"); records.addAll(newHoodieRecords(1, "2016-02-01T03:16:41.415Z")); statuses = HoodieClientTestUtils.collectStatuses(table.handleInsert(commitTime, records.iterator())); status = statuses.get(0); Path retriedFIle = new Path(String.format("%s/%s/%s", basePath, status.getPartitionPath(), FSUtils.makeDataFileName(commitTime, 0, status.getFileId()))); assertTrue(fs.exists(retriedFIle)); assertFalse(fs.exists(partialFile)); }
if (createDataFile) { slice.setDataFile(new TestHoodieDataFile(metaClient.getBasePath() + "/" + DEFAULT_PARTITION_PATHS[0] + "/" + FSUtils.makeDataFileName(instantId, 1, fileId)));