Path targetDir = lfd.getTargetDir(); List<Path> targetDirs = new ArrayList<Path>(toMove.size());
LoadFileDesc lfd = mvWork.getLoadFileWork(); Path targetDir = lfd.getTargetDir(); List<Path> targetDirs = new ArrayList<Path>(toMove.size());
Path targetPath = lfd.getTargetDir(); Path sourcePath = lfd.getSourcePath(); if (targetPath.equals(sourcePath)) {
private void verifyMoveTask(Task<? extends Serializable> task, Path source, Path target) { MoveTask moveTask = (MoveTask)task; assertEquals(source, moveTask.getWork().getLoadFileWork().getSourcePath()); assertEquals(target, moveTask.getWork().getLoadFileWork().getTargetDir()); } }
/** * Checks whether the given input/output paths and a linked MoveWork should be merged into one only MoveWork. * This is an optimization for BlobStore systems to avoid doing two renames/copies that are not necessary. * * @param conf A HiveConf object to check if BlobStorage optimizations are enabled. * @param condInputPath A path that the ConditionalTask uses as input for its sub-tasks. * @param condOutputPath A path that the ConditionalTask uses as output for its sub-tasks. * @param linkedMoveWork A MoveWork that the ConditionalTask uses to link to its sub-tasks. * @return True if both Conditional input/output paths and the linked MoveWork should be merged. */ @VisibleForTesting protected static boolean shouldMergeMovePaths(HiveConf conf, Path condInputPath, Path condOutputPath, MoveWork linkedMoveWork) { Path linkedSourcePath, linkedTargetPath; if (linkedMoveWork == null || !BlobStorageUtils.areOptimizationsEnabled(conf)) { return false; } if (linkedMoveWork.getLoadFileWork() != null && linkedMoveWork.getLoadTableWork() == null) { linkedSourcePath = linkedMoveWork.getLoadFileWork().getSourcePath(); linkedTargetPath = linkedMoveWork.getLoadFileWork().getTargetDir(); } else if (linkedMoveWork.getLoadTableWork() != null && linkedMoveWork.getLoadFileWork() == null) { linkedSourcePath = linkedMoveWork.getLoadTableWork().getSourcePath(); linkedTargetPath = getTableLocationPath(conf, linkedMoveWork.getLoadTableWork().getTable()); } else { return false; } return condOutputPath.equals(linkedSourcePath) && BlobStorageUtils.isBlobStoragePath(conf, condInputPath) && BlobStorageUtils.isBlobStoragePath(conf, linkedTargetPath); }
@Test public void testMergePathValidMoveWorkReturnsNewMoveWork() { final Path condInputPath = new Path("s3a://bucket/scratch/-ext-10000"); final Path condOutputPath = new Path("s3a://bucket/scratch/-ext-10002"); final Path targetMoveWorkPath = new Path("s3a://bucket/scratch/-ext-10003"); final MoveWork mockWork = mock(MoveWork.class); final LineageState lineageState = new LineageState(); MoveWork newWork; // test using loadFileWork when(mockWork.getLoadFileWork()).thenReturn(new LoadFileDesc( condOutputPath, targetMoveWorkPath, false, "", "", false)); newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork, lineageState); assertNotNull(newWork); assertNotEquals(newWork, mockWork); assertEquals(condInputPath, newWork.getLoadFileWork().getSourcePath()); assertEquals(targetMoveWorkPath, newWork.getLoadFileWork().getTargetDir()); // test using loadTableWork TableDesc tableDesc = new TableDesc(); reset(mockWork); when(mockWork.getLoadTableWork()).thenReturn(new LoadTableDesc( condOutputPath, tableDesc, null)); newWork = GenMapRedUtils.mergeMovePaths(condInputPath, mockWork, lineageState); assertNotNull(newWork); assertNotEquals(newWork, mockWork); assertEquals(condInputPath, newWork.getLoadTableWork().getSourcePath()); assertTrue(newWork.getLoadTableWork().getTable().equals(tableDesc)); }
Path targetPath = lfd.getTargetDir(); Path sourcePath = lfd.getSourcePath(); moveFile(sourcePath, targetPath, lfd.getIsDfsDir());
/** * Checks whether the given input/output paths and a linked MoveWork should be merged into one only MoveWork. * This is an optimization for BlobStore systems to avoid doing two renames/copies that are not necessary. * * @param conf A HiveConf object to check if BlobStorage optimizations are enabled. * @param condInputPath A path that the ConditionalTask uses as input for its sub-tasks. * @param condOutputPath A path that the ConditionalTask uses as output for its sub-tasks. * @param linkedMoveWork A MoveWork that the ConditionalTask uses to link to its sub-tasks. * @return True if both Conditional input/output paths and the linked MoveWork should be merged. */ @VisibleForTesting protected static boolean shouldMergeMovePaths(HiveConf conf, Path condInputPath, Path condOutputPath, MoveWork linkedMoveWork) { Path linkedSourcePath, linkedTargetPath; if (linkedMoveWork == null || !BlobStorageUtils.areOptimizationsEnabled(conf)) { return false; } if (linkedMoveWork.getLoadFileWork() != null && linkedMoveWork.getLoadTableWork() == null) { linkedSourcePath = linkedMoveWork.getLoadFileWork().getSourcePath(); linkedTargetPath = linkedMoveWork.getLoadFileWork().getTargetDir(); } else if (linkedMoveWork.getLoadTableWork() != null && linkedMoveWork.getLoadFileWork() == null) { linkedSourcePath = linkedMoveWork.getLoadTableWork().getSourcePath(); linkedTargetPath = getTableLocationPath(conf, linkedMoveWork.getLoadTableWork().getTable()); } else { return false; } return condOutputPath.equals(linkedSourcePath) && BlobStorageUtils.isBlobStoragePath(conf, condInputPath) && BlobStorageUtils.isBlobStoragePath(conf, linkedTargetPath); }
LoadFileDesc lfd = mvWork.getLoadFileWork(); Path targetDir = lfd.getTargetDir(); List<Path> targetDirs = new ArrayList<Path>(toMove.size());
LoadFileDesc lfd = mvWork.getLoadFileWork(); String targetDir = lfd.getTargetDir(); List<String> targetDirs = new ArrayList<String>(toMove.size()); int numDPCols = dpCtx.getNumDPCols();
Path targetPath = new Path(lfd.getTargetDir()); Path sourcePath = new Path(lfd.getSourceDir()); moveFile(sourcePath, targetPath, lfd.getIsDfsDir());
Path targetPath = lfd.getTargetDir(); Path sourcePath = lfd.getSourcePath(); moveFile(sourcePath, targetPath, lfd.getIsDfsDir());