private Task<MoveWork> createMoveTask(Path source, Path destination) { Task<MoveWork> moveTask = mock(MoveTask.class); MoveWork moveWork = new MoveWork(); moveWork.setLoadFileWork(new LoadFileDesc(source, destination, true, null, null, false)); when(moveTask.getWork()).thenReturn(moveWork); return moveTask; }
@Test public void testMovePathsThatCanBeMerged() { final Path condInputPath = new Path("s3a://bucket/scratch/-ext-10000"); final Path condOutputPath = new Path("s3a://bucket/scratch/-ext-10002"); final Path targetMoveWorkPath = new Path("s3a://bucket/scratch/-ext-10003"); final MoveWork mockWork = mock(MoveWork.class); when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc( condOutputPath, targetMoveWorkPath, false, "", "", false)); assertTrue("Merging BlobStore paths should be allowed.", GenMapRedUtils.shouldMergeMovePaths(hiveConf, condInputPath, condOutputPath, mockWork)); }
when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc( condInputPath, condOutputPath, false, "", "", false)); assertFalse("Merging paths is not allowed when both conditional output path is not equals to MoveWork input path.", when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc( condOutputPath, new Path("unused"), false, "", "", false)); assertFalse("Merging paths is not allowed when conditional input path is not a BlobStore path.", when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc( condOutputPath, new Path("hdfs://hdfs-path"), false, "", "", false)); assertFalse("Merging paths is not allowed when MoveWork output path is not a BlobStore path.",
/** * Merges the given Conditional input path and the linked MoveWork into one only MoveWork. * This is an optimization for BlobStore systems to avoid doing two renames or copies that are not necessary. * * @param condInputPath A path that the ConditionalTask uses as input for its sub-tasks. * @param linkedMoveWork A MoveWork that the ConditionalTask uses to link to its sub-tasks. * @return A new MoveWork that has the Conditional input path as source and the linkedMoveWork as target. */ @VisibleForTesting protected static MoveWork mergeMovePaths(Path condInputPath, MoveWork linkedMoveWork) { MoveWork newWork = new MoveWork(linkedMoveWork); LoadFileDesc fileDesc = null; LoadTableDesc tableDesc = null; if (linkedMoveWork.getLoadFileWork() != null) { fileDesc = new LoadFileDesc(linkedMoveWork.getLoadFileWork()); fileDesc.setSourcePath(condInputPath); } else if (linkedMoveWork.getLoadTableWork() != null) { tableDesc = new LoadTableDesc(linkedMoveWork.getLoadTableWork()); tableDesc.setSourcePath(condInputPath); } else { throw new IllegalArgumentException("Merging a path with a MoveWork with multi-files work is not allowed."); } newWork.setLoadFileWork(fileDesc); newWork.setLoadTableWork(tableDesc); return newWork; }
/** * Merges the given Conditional input path and the linked MoveWork into one only MoveWork. * This is an optimization for BlobStore systems to avoid doing two renames or copies that are not necessary. * * @param condInputPath A path that the ConditionalTask uses as input for its sub-tasks. * @param linkedMoveWork A MoveWork that the ConditionalTask uses to link to its sub-tasks. * @param lineageState A LineageState used to track what changes. * @return A new MoveWork that has the Conditional input path as source and the linkedMoveWork as target. */ @VisibleForTesting protected static MoveWork mergeMovePaths(Path condInputPath, MoveWork linkedMoveWork, LineageState lineageState) { MoveWork newWork = new MoveWork(linkedMoveWork); LoadFileDesc fileDesc = null; LoadTableDesc tableDesc = null; if (linkedMoveWork.getLoadFileWork() != null) { fileDesc = new LoadFileDesc(linkedMoveWork.getLoadFileWork()); fileDesc.setSourcePath(condInputPath); lineageState.updateDirToOpMap(condInputPath, linkedMoveWork.getLoadFileWork().getSourcePath()); } else if (linkedMoveWork.getLoadTableWork() != null) { tableDesc = new LoadTableDesc(linkedMoveWork.getLoadTableWork()); tableDesc.setSourcePath(condInputPath); lineageState.updateDirToOpMap(condInputPath, linkedMoveWork.getLoadTableWork().getSourcePath()); } else { throw new IllegalArgumentException("Merging a path with a MoveWork with multi-files work is not allowed."); } newWork.setLoadFileWork(fileDesc); newWork.setLoadTableWork(tableDesc); return newWork; }
@Test public void testMergePathValidMoveWorkReturnsNewMoveWork() { final Path condInputPath = new Path("s3a://bucket/scratch/-ext-10000"); final Path condOutputPath = new Path("s3a://bucket/scratch/-ext-10002"); final Path targetMoveWorkPath = new Path("s3a://bucket/scratch/-ext-10003"); final MoveWork mockWork = mock(MoveWork.class); final LineageState lineageState = new LineageState(); MoveWork newWork; // test using loadFileWork when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc( condOutputPath, targetMoveWorkPath, false, "", "", false)); newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork, lineageState); assertNotNull(newWork); assertNotEquals(newWork, mockWork); assertEquals(condInputPath, newWork.getLoadFileWork().getSourcePath()); assertEquals(targetMoveWorkPath, newWork.getLoadFileWork().getTargetDir()); // test using loadTableWork TableDesc tableDesc = new TableDesc(); reset(mockWork); when(mockWork.getLoadTableWork()).thenReturn(new LoadTableDesc( condOutputPath, tableDesc, null)); newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork, lineageState); assertNotNull(newWork); assertNotEquals(newWork, mockWork); assertEquals(condInputPath, newWork.getLoadTableWork().getSourcePath()); assertTrue(newWork.getLoadTableWork().getTable().equals(tableDesc)); }
loadFileWork.add(new LoadFileDesc(tblDesc, viewDesc, queryTmpdir, dest_path, isDfsDir, cols, colTypes));
} else { loadFileWork.add(new LoadFileDesc(tblDesc, viewDesc, queryTmpdir, destinationPath, isDfsDir, cols, colTypes,
new LoadFileDesc(fsInputDesc.getFinalDirName(), finalName, true, null, null), false); MapWork cplan; Serializable work;
new LoadFileDesc(inputDirName, finalName, true, null, null, false), false);
new LoadFileDesc(fsConf.getDirName(), finalName, true, null, null), false);
new LoadFileDesc(fsInputDesc.getDirName(), finalName, true, null, null), false); MapredWork cplan = createMergeTask(ctx.getConf(), tsMerge, fsInputDesc);
new LoadFileDesc(fsInputDesc.getFinalDirName(), finalName, true, null, null), false); MapWork cplan; Serializable work;
loadFileWork.add(new LoadFileDesc(queryTmpdir, destStr, isDfsDir, cols, colTypes));
loadFileWork.add(new LoadFileDesc(tblDesc, queryTmpdir, dest_path, isDfsDir, cols, colTypes));